1 /*-------------------------------------------------------------------------
2  *
3  * tzparser.c
4  *	  Functions for parsing timezone offset files
5  *
6  * Note: this code is invoked from the check_hook for the GUC variable
7  * timezone_abbreviations.  Therefore, it should report problems using
8  * GUC_check_errmsg() and related functions, and try to avoid throwing
9  * elog(ERROR).  This is not completely bulletproof at present --- in
10  * particular out-of-memory will throw an error.  Could probably fix with
11  * PG_TRY if necessary.
12  *
13  *
14  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
15  * Portions Copyright (c) 1994, Regents of the University of California
16  *
17  * IDENTIFICATION
18  *	  src/backend/utils/misc/tzparser.c
19  *
20  *-------------------------------------------------------------------------
21  */
22 
23 #include "postgres.h"
24 
25 #include <ctype.h>
26 
27 #include "miscadmin.h"
28 #include "storage/fd.h"
29 #include "utils/guc.h"
30 #include "utils/memutils.h"
31 #include "utils/tzparser.h"
32 
33 
34 #define WHITESPACE " \t\n\r"
35 
36 static bool validateTzEntry(tzEntry *tzentry);
37 static bool splitTzLine(const char *filename, int lineno,
38 						char *line, tzEntry *tzentry);
39 static int	addToArray(tzEntry **base, int *arraysize, int n,
40 					   tzEntry *entry, bool override);
41 static int	ParseTzFile(const char *filename, int depth,
42 						tzEntry **base, int *arraysize, int n);
43 
44 
45 /*
46  * Apply additional validation checks to a tzEntry
47  *
48  * Returns true if OK, else false
49  */
50 static bool
validateTzEntry(tzEntry * tzentry)51 validateTzEntry(tzEntry *tzentry)
52 {
53 	unsigned char *p;
54 
55 	/*
56 	 * Check restrictions imposed by datetktbl storage format (see datetime.c)
57 	 */
58 	if (strlen(tzentry->abbrev) > TOKMAXLEN)
59 	{
60 		GUC_check_errmsg("time zone abbreviation \"%s\" is too long (maximum %d characters) in time zone file \"%s\", line %d",
61 						 tzentry->abbrev, TOKMAXLEN,
62 						 tzentry->filename, tzentry->lineno);
63 		return false;
64 	}
65 
66 	/*
67 	 * Sanity-check the offset: shouldn't exceed 14 hours
68 	 */
69 	if (tzentry->offset > 14 * 60 * 60 ||
70 		tzentry->offset < -14 * 60 * 60)
71 	{
72 		GUC_check_errmsg("time zone offset %d is out of range in time zone file \"%s\", line %d",
73 						 tzentry->offset,
74 						 tzentry->filename, tzentry->lineno);
75 		return false;
76 	}
77 
78 	/*
79 	 * Convert abbrev to lowercase (must match datetime.c's conversion)
80 	 */
81 	for (p = (unsigned char *) tzentry->abbrev; *p; p++)
82 		*p = pg_tolower(*p);
83 
84 	return true;
85 }
86 
87 /*
88  * Attempt to parse the line as a timezone abbrev spec
89  *
90  * Valid formats are:
91  *	name  zone
92  *	name  offset  dst
93  *
94  * Returns true if OK, else false; data is stored in *tzentry
95  */
96 static bool
splitTzLine(const char * filename,int lineno,char * line,tzEntry * tzentry)97 splitTzLine(const char *filename, int lineno, char *line, tzEntry *tzentry)
98 {
99 	char	   *abbrev;
100 	char	   *offset;
101 	char	   *offset_endptr;
102 	char	   *remain;
103 	char	   *is_dst;
104 
105 	tzentry->lineno = lineno;
106 	tzentry->filename = filename;
107 
108 	abbrev = strtok(line, WHITESPACE);
109 	if (!abbrev)
110 	{
111 		GUC_check_errmsg("missing time zone abbreviation in time zone file \"%s\", line %d",
112 						 filename, lineno);
113 		return false;
114 	}
115 	tzentry->abbrev = pstrdup(abbrev);
116 
117 	offset = strtok(NULL, WHITESPACE);
118 	if (!offset)
119 	{
120 		GUC_check_errmsg("missing time zone offset in time zone file \"%s\", line %d",
121 						 filename, lineno);
122 		return false;
123 	}
124 
125 	/* We assume zone names don't begin with a digit or sign */
126 	if (isdigit((unsigned char) *offset) || *offset == '+' || *offset == '-')
127 	{
128 		tzentry->zone = NULL;
129 		tzentry->offset = strtol(offset, &offset_endptr, 10);
130 		if (offset_endptr == offset || *offset_endptr != '\0')
131 		{
132 			GUC_check_errmsg("invalid number for time zone offset in time zone file \"%s\", line %d",
133 							 filename, lineno);
134 			return false;
135 		}
136 
137 		is_dst = strtok(NULL, WHITESPACE);
138 		if (is_dst && pg_strcasecmp(is_dst, "D") == 0)
139 		{
140 			tzentry->is_dst = true;
141 			remain = strtok(NULL, WHITESPACE);
142 		}
143 		else
144 		{
145 			/* there was no 'D' dst specifier */
146 			tzentry->is_dst = false;
147 			remain = is_dst;
148 		}
149 	}
150 	else
151 	{
152 		/*
153 		 * Assume entry is a zone name.  We do not try to validate it by
154 		 * looking up the zone, because that would force loading of a lot of
155 		 * zones that probably will never be used in the current session.
156 		 */
157 		tzentry->zone = pstrdup(offset);
158 		tzentry->offset = 0;
159 		tzentry->is_dst = false;
160 		remain = strtok(NULL, WHITESPACE);
161 	}
162 
163 	if (!remain)				/* no more non-whitespace chars */
164 		return true;
165 
166 	if (remain[0] != '#')		/* must be a comment */
167 	{
168 		GUC_check_errmsg("invalid syntax in time zone file \"%s\", line %d",
169 						 filename, lineno);
170 		return false;
171 	}
172 	return true;
173 }
174 
175 /*
176  * Insert entry into sorted array
177  *
178  * *base: base address of array (changeable if must enlarge array)
179  * *arraysize: allocated length of array (changeable if must enlarge array)
180  * n: current number of valid elements in array
181  * entry: new data to insert
182  * override: true if OK to override
183  *
184  * Returns the new array length (new value for n), or -1 if error
185  */
186 static int
addToArray(tzEntry ** base,int * arraysize,int n,tzEntry * entry,bool override)187 addToArray(tzEntry **base, int *arraysize, int n,
188 		   tzEntry *entry, bool override)
189 {
190 	tzEntry    *arrayptr;
191 	int			low;
192 	int			high;
193 
194 	/*
195 	 * Search the array for a duplicate; as a useful side effect, the array is
196 	 * maintained in sorted order.  We use strcmp() to ensure we match the
197 	 * sort order datetime.c expects.
198 	 */
199 	arrayptr = *base;
200 	low = 0;
201 	high = n - 1;
202 	while (low <= high)
203 	{
204 		int			mid = (low + high) >> 1;
205 		tzEntry    *midptr = arrayptr + mid;
206 		int			cmp;
207 
208 		cmp = strcmp(entry->abbrev, midptr->abbrev);
209 		if (cmp < 0)
210 			high = mid - 1;
211 		else if (cmp > 0)
212 			low = mid + 1;
213 		else
214 		{
215 			/*
216 			 * Found a duplicate entry; complain unless it's the same.
217 			 */
218 			if ((midptr->zone == NULL && entry->zone == NULL &&
219 				 midptr->offset == entry->offset &&
220 				 midptr->is_dst == entry->is_dst) ||
221 				(midptr->zone != NULL && entry->zone != NULL &&
222 				 strcmp(midptr->zone, entry->zone) == 0))
223 			{
224 				/* return unchanged array */
225 				return n;
226 			}
227 			if (override)
228 			{
229 				/* same abbrev but something is different, override */
230 				midptr->zone = entry->zone;
231 				midptr->offset = entry->offset;
232 				midptr->is_dst = entry->is_dst;
233 				return n;
234 			}
235 			/* same abbrev but something is different, complain */
236 			GUC_check_errmsg("time zone abbreviation \"%s\" is multiply defined",
237 							 entry->abbrev);
238 			GUC_check_errdetail("Entry in time zone file \"%s\", line %d, conflicts with entry in file \"%s\", line %d.",
239 								midptr->filename, midptr->lineno,
240 								entry->filename, entry->lineno);
241 			return -1;
242 		}
243 	}
244 
245 	/*
246 	 * No match, insert at position "low".
247 	 */
248 	if (n >= *arraysize)
249 	{
250 		*arraysize *= 2;
251 		*base = (tzEntry *) repalloc(*base, *arraysize * sizeof(tzEntry));
252 	}
253 
254 	arrayptr = *base + low;
255 
256 	memmove(arrayptr + 1, arrayptr, (n - low) * sizeof(tzEntry));
257 
258 	memcpy(arrayptr, entry, sizeof(tzEntry));
259 
260 	return n + 1;
261 }
262 
263 /*
264  * Parse a single timezone abbrev file --- can recurse to handle @INCLUDE
265  *
266  * filename: user-specified file name (does not include path)
267  * depth: current recursion depth
268  * *base: array for results (changeable if must enlarge array)
269  * *arraysize: allocated length of array (changeable if must enlarge array)
270  * n: current number of valid elements in array
271  *
272  * Returns the new array length (new value for n), or -1 if error
273  */
274 static int
ParseTzFile(const char * filename,int depth,tzEntry ** base,int * arraysize,int n)275 ParseTzFile(const char *filename, int depth,
276 			tzEntry **base, int *arraysize, int n)
277 {
278 	char		share_path[MAXPGPATH];
279 	char		file_path[MAXPGPATH];
280 	FILE	   *tzFile;
281 	char		tzbuf[1024];
282 	char	   *line;
283 	tzEntry		tzentry;
284 	int			lineno = 0;
285 	bool		override = false;
286 	const char *p;
287 
288 	/*
289 	 * We enforce that the filename is all alpha characters.  This may be
290 	 * overly restrictive, but we don't want to allow access to anything
291 	 * outside the timezonesets directory, so for instance '/' *must* be
292 	 * rejected.
293 	 */
294 	for (p = filename; *p; p++)
295 	{
296 		if (!isalpha((unsigned char) *p))
297 		{
298 			/* at level 0, just use guc.c's regular "invalid value" message */
299 			if (depth > 0)
300 				GUC_check_errmsg("invalid time zone file name \"%s\"",
301 								 filename);
302 			return -1;
303 		}
304 	}
305 
306 	/*
307 	 * The maximal recursion depth is a pretty arbitrary setting. It is hard
308 	 * to imagine that someone needs more than 3 levels so stick with this
309 	 * conservative setting until someone complains.
310 	 */
311 	if (depth > 3)
312 	{
313 		GUC_check_errmsg("time zone file recursion limit exceeded in file \"%s\"",
314 						 filename);
315 		return -1;
316 	}
317 
318 	get_share_path(my_exec_path, share_path);
319 	snprintf(file_path, sizeof(file_path), "%s/timezonesets/%s",
320 			 share_path, filename);
321 	tzFile = AllocateFile(file_path, "r");
322 	if (!tzFile)
323 	{
324 		/*
325 		 * Check to see if the problem is not the filename but the directory.
326 		 * This is worth troubling over because if the installation share/
327 		 * directory is missing or unreadable, this is likely to be the first
328 		 * place we notice a problem during postmaster startup.
329 		 */
330 		int			save_errno = errno;
331 		DIR		   *tzdir;
332 
333 		snprintf(file_path, sizeof(file_path), "%s/timezonesets",
334 				 share_path);
335 		tzdir = AllocateDir(file_path);
336 		if (tzdir == NULL)
337 		{
338 			GUC_check_errmsg("could not open directory \"%s\": %m",
339 							 file_path);
340 			GUC_check_errhint("This may indicate an incomplete PostgreSQL installation, or that the file \"%s\" has been moved away from its proper location.",
341 							  my_exec_path);
342 			return -1;
343 		}
344 		FreeDir(tzdir);
345 		errno = save_errno;
346 
347 		/*
348 		 * otherwise, if file doesn't exist and it's level 0, guc.c's
349 		 * complaint is enough
350 		 */
351 		if (errno != ENOENT || depth > 0)
352 			GUC_check_errmsg("could not read time zone file \"%s\": %m",
353 							 filename);
354 
355 		return -1;
356 	}
357 
358 	while (!feof(tzFile))
359 	{
360 		lineno++;
361 		if (fgets(tzbuf, sizeof(tzbuf), tzFile) == NULL)
362 		{
363 			if (ferror(tzFile))
364 			{
365 				GUC_check_errmsg("could not read time zone file \"%s\": %m",
366 								 filename);
367 				return -1;
368 			}
369 			/* else we're at EOF after all */
370 			break;
371 		}
372 		if (strlen(tzbuf) == sizeof(tzbuf) - 1)
373 		{
374 			/* the line is too long for tzbuf */
375 			GUC_check_errmsg("line is too long in time zone file \"%s\", line %d",
376 							 filename, lineno);
377 			return -1;
378 		}
379 
380 		/* skip over whitespace */
381 		line = tzbuf;
382 		while (*line && isspace((unsigned char) *line))
383 			line++;
384 
385 		if (*line == '\0')		/* empty line */
386 			continue;
387 		if (*line == '#')		/* comment line */
388 			continue;
389 
390 		if (pg_strncasecmp(line, "@INCLUDE", strlen("@INCLUDE")) == 0)
391 		{
392 			/* pstrdup so we can use filename in result data structure */
393 			char	   *includeFile = pstrdup(line + strlen("@INCLUDE"));
394 
395 			includeFile = strtok(includeFile, WHITESPACE);
396 			if (!includeFile || !*includeFile)
397 			{
398 				GUC_check_errmsg("@INCLUDE without file name in time zone file \"%s\", line %d",
399 								 filename, lineno);
400 				return -1;
401 			}
402 			n = ParseTzFile(includeFile, depth + 1,
403 							base, arraysize, n);
404 			if (n < 0)
405 				return -1;
406 			continue;
407 		}
408 
409 		if (pg_strncasecmp(line, "@OVERRIDE", strlen("@OVERRIDE")) == 0)
410 		{
411 			override = true;
412 			continue;
413 		}
414 
415 		if (!splitTzLine(filename, lineno, line, &tzentry))
416 			return -1;
417 		if (!validateTzEntry(&tzentry))
418 			return -1;
419 		n = addToArray(base, arraysize, n, &tzentry, override);
420 		if (n < 0)
421 			return -1;
422 	}
423 
424 	FreeFile(tzFile);
425 
426 	return n;
427 }
428 
429 /*
430  * load_tzoffsets --- read and parse the specified timezone offset file
431  *
432  * On success, return a filled-in TimeZoneAbbrevTable, which must have been
433  * malloc'd not palloc'd.  On failure, return NULL, using GUC_check_errmsg
434  * and friends to give details of the problem.
435  */
436 TimeZoneAbbrevTable *
load_tzoffsets(const char * filename)437 load_tzoffsets(const char *filename)
438 {
439 	TimeZoneAbbrevTable *result = NULL;
440 	MemoryContext tmpContext;
441 	MemoryContext oldContext;
442 	tzEntry    *array;
443 	int			arraysize;
444 	int			n;
445 
446 	/*
447 	 * Create a temp memory context to work in.  This makes it easy to clean
448 	 * up afterwards.
449 	 */
450 	tmpContext = AllocSetContextCreate(CurrentMemoryContext,
451 									   "TZParserMemory",
452 									   ALLOCSET_SMALL_SIZES);
453 	oldContext = MemoryContextSwitchTo(tmpContext);
454 
455 	/* Initialize array at a reasonable size */
456 	arraysize = 128;
457 	array = (tzEntry *) palloc(arraysize * sizeof(tzEntry));
458 
459 	/* Parse the file(s) */
460 	n = ParseTzFile(filename, 0, &array, &arraysize, 0);
461 
462 	/* If no errors so far, let datetime.c allocate memory & convert format */
463 	if (n >= 0)
464 	{
465 		result = ConvertTimeZoneAbbrevs(array, n);
466 		if (!result)
467 			GUC_check_errmsg("out of memory");
468 	}
469 
470 	/* Clean up */
471 	MemoryContextSwitchTo(oldContext);
472 	MemoryContextDelete(tmpContext);
473 
474 	return result;
475 }
476