1 /*-------------------------------------------------------------------------
2  *
3  * tzparser.c
4  *	  Functions for parsing timezone offset files
5  *
6  * Note: this code is invoked from the check_hook for the GUC variable
7  * timezone_abbreviations.  Therefore, it should report problems using
8  * GUC_check_errmsg() and related functions, and try to avoid throwing
9  * elog(ERROR).  This is not completely bulletproof at present --- in
10  * particular out-of-memory will throw an error.  Could probably fix with
11  * PG_TRY if necessary.
12  *
13  *
14  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
15  * Portions Copyright (c) 1994, Regents of the University of California
16  *
17  * IDENTIFICATION
18  *	  src/backend/utils/misc/tzparser.c
19  *
20  *-------------------------------------------------------------------------
21  */
22 
23 #include "postgres.h"
24 
25 #include <ctype.h>
26 
27 #include "miscadmin.h"
28 #include "storage/fd.h"
29 #include "utils/guc.h"
30 #include "utils/memutils.h"
31 #include "utils/tzparser.h"
32 
33 
34 #define WHITESPACE " \t\n\r"
35 
36 static bool validateTzEntry(tzEntry *tzentry);
37 static bool splitTzLine(const char *filename, int lineno,
38 			char *line, tzEntry *tzentry);
39 static int addToArray(tzEntry **base, int *arraysize, int n,
40 		   tzEntry *entry, bool override);
41 static int ParseTzFile(const char *filename, int depth,
42 			tzEntry **base, int *arraysize, int n);
43 
44 
45 /*
46  * Apply additional validation checks to a tzEntry
47  *
48  * Returns TRUE if OK, else false
49  */
50 static bool
validateTzEntry(tzEntry * tzentry)51 validateTzEntry(tzEntry *tzentry)
52 {
53 	unsigned char *p;
54 
55 	/*
56 	 * Check restrictions imposed by datetkntbl storage format (see
57 	 * datetime.c)
58 	 */
59 	if (strlen(tzentry->abbrev) > TOKMAXLEN)
60 	{
61 		GUC_check_errmsg("time zone abbreviation \"%s\" is too long (maximum %d characters) in time zone file \"%s\", line %d",
62 						 tzentry->abbrev, TOKMAXLEN,
63 						 tzentry->filename, tzentry->lineno);
64 		return false;
65 	}
66 
67 	/*
68 	 * Sanity-check the offset: shouldn't exceed 14 hours
69 	 */
70 	if (tzentry->offset > 14 * 60 * 60 ||
71 		tzentry->offset < -14 * 60 * 60)
72 	{
73 		GUC_check_errmsg("time zone offset %d is out of range in time zone file \"%s\", line %d",
74 						 tzentry->offset,
75 						 tzentry->filename, tzentry->lineno);
76 		return false;
77 	}
78 
79 	/*
80 	 * Convert abbrev to lowercase (must match datetime.c's conversion)
81 	 */
82 	for (p = (unsigned char *) tzentry->abbrev; *p; p++)
83 		*p = pg_tolower(*p);
84 
85 	return true;
86 }
87 
88 /*
89  * Attempt to parse the line as a timezone abbrev spec
90  *
91  * Valid formats are:
92  *	name  zone
93  *	name  offset  dst
94  *
95  * Returns TRUE if OK, else false; data is stored in *tzentry
96  */
97 static bool
splitTzLine(const char * filename,int lineno,char * line,tzEntry * tzentry)98 splitTzLine(const char *filename, int lineno, char *line, tzEntry *tzentry)
99 {
100 	char	   *abbrev;
101 	char	   *offset;
102 	char	   *offset_endptr;
103 	char	   *remain;
104 	char	   *is_dst;
105 
106 	tzentry->lineno = lineno;
107 	tzentry->filename = filename;
108 
109 	abbrev = strtok(line, WHITESPACE);
110 	if (!abbrev)
111 	{
112 		GUC_check_errmsg("missing time zone abbreviation in time zone file \"%s\", line %d",
113 						 filename, lineno);
114 		return false;
115 	}
116 	tzentry->abbrev = pstrdup(abbrev);
117 
118 	offset = strtok(NULL, WHITESPACE);
119 	if (!offset)
120 	{
121 		GUC_check_errmsg("missing time zone offset in time zone file \"%s\", line %d",
122 						 filename, lineno);
123 		return false;
124 	}
125 
126 	/* We assume zone names don't begin with a digit or sign */
127 	if (isdigit((unsigned char) *offset) || *offset == '+' || *offset == '-')
128 	{
129 		tzentry->zone = NULL;
130 		tzentry->offset = strtol(offset, &offset_endptr, 10);
131 		if (offset_endptr == offset || *offset_endptr != '\0')
132 		{
133 			GUC_check_errmsg("invalid number for time zone offset in time zone file \"%s\", line %d",
134 							 filename, lineno);
135 			return false;
136 		}
137 
138 		is_dst = strtok(NULL, WHITESPACE);
139 		if (is_dst && pg_strcasecmp(is_dst, "D") == 0)
140 		{
141 			tzentry->is_dst = true;
142 			remain = strtok(NULL, WHITESPACE);
143 		}
144 		else
145 		{
146 			/* there was no 'D' dst specifier */
147 			tzentry->is_dst = false;
148 			remain = is_dst;
149 		}
150 	}
151 	else
152 	{
153 		/*
154 		 * Assume entry is a zone name.  We do not try to validate it by
155 		 * looking up the zone, because that would force loading of a lot of
156 		 * zones that probably will never be used in the current session.
157 		 */
158 		tzentry->zone = pstrdup(offset);
159 		tzentry->offset = 0;
160 		tzentry->is_dst = false;
161 		remain = strtok(NULL, WHITESPACE);
162 	}
163 
164 	if (!remain)				/* no more non-whitespace chars */
165 		return true;
166 
167 	if (remain[0] != '#')		/* must be a comment */
168 	{
169 		GUC_check_errmsg("invalid syntax in time zone file \"%s\", line %d",
170 						 filename, lineno);
171 		return false;
172 	}
173 	return true;
174 }
175 
176 /*
177  * Insert entry into sorted array
178  *
179  * *base: base address of array (changeable if must enlarge array)
180  * *arraysize: allocated length of array (changeable if must enlarge array)
181  * n: current number of valid elements in array
182  * entry: new data to insert
183  * override: TRUE if OK to override
184  *
185  * Returns the new array length (new value for n), or -1 if error
186  */
187 static int
addToArray(tzEntry ** base,int * arraysize,int n,tzEntry * entry,bool override)188 addToArray(tzEntry **base, int *arraysize, int n,
189 		   tzEntry *entry, bool override)
190 {
191 	tzEntry    *arrayptr;
192 	int			low;
193 	int			high;
194 
195 	/*
196 	 * Search the array for a duplicate; as a useful side effect, the array is
197 	 * maintained in sorted order.  We use strcmp() to ensure we match the
198 	 * sort order datetime.c expects.
199 	 */
200 	arrayptr = *base;
201 	low = 0;
202 	high = n - 1;
203 	while (low <= high)
204 	{
205 		int			mid = (low + high) >> 1;
206 		tzEntry    *midptr = arrayptr + mid;
207 		int			cmp;
208 
209 		cmp = strcmp(entry->abbrev, midptr->abbrev);
210 		if (cmp < 0)
211 			high = mid - 1;
212 		else if (cmp > 0)
213 			low = mid + 1;
214 		else
215 		{
216 			/*
217 			 * Found a duplicate entry; complain unless it's the same.
218 			 */
219 			if ((midptr->zone == NULL && entry->zone == NULL &&
220 				 midptr->offset == entry->offset &&
221 				 midptr->is_dst == entry->is_dst) ||
222 				(midptr->zone != NULL && entry->zone != NULL &&
223 				 strcmp(midptr->zone, entry->zone) == 0))
224 			{
225 				/* return unchanged array */
226 				return n;
227 			}
228 			if (override)
229 			{
230 				/* same abbrev but something is different, override */
231 				midptr->zone = entry->zone;
232 				midptr->offset = entry->offset;
233 				midptr->is_dst = entry->is_dst;
234 				return n;
235 			}
236 			/* same abbrev but something is different, complain */
237 			GUC_check_errmsg("time zone abbreviation \"%s\" is multiply defined",
238 							 entry->abbrev);
239 			GUC_check_errdetail("Entry in time zone file \"%s\", line %d, conflicts with entry in file \"%s\", line %d.",
240 								midptr->filename, midptr->lineno,
241 								entry->filename, entry->lineno);
242 			return -1;
243 		}
244 	}
245 
246 	/*
247 	 * No match, insert at position "low".
248 	 */
249 	if (n >= *arraysize)
250 	{
251 		*arraysize *= 2;
252 		*base = (tzEntry *) repalloc(*base, *arraysize * sizeof(tzEntry));
253 	}
254 
255 	arrayptr = *base + low;
256 
257 	memmove(arrayptr + 1, arrayptr, (n - low) * sizeof(tzEntry));
258 
259 	memcpy(arrayptr, entry, sizeof(tzEntry));
260 
261 	return n + 1;
262 }
263 
264 /*
265  * Parse a single timezone abbrev file --- can recurse to handle @INCLUDE
266  *
267  * filename: user-specified file name (does not include path)
268  * depth: current recursion depth
269  * *base: array for results (changeable if must enlarge array)
270  * *arraysize: allocated length of array (changeable if must enlarge array)
271  * n: current number of valid elements in array
272  *
273  * Returns the new array length (new value for n), or -1 if error
274  */
275 static int
ParseTzFile(const char * filename,int depth,tzEntry ** base,int * arraysize,int n)276 ParseTzFile(const char *filename, int depth,
277 			tzEntry **base, int *arraysize, int n)
278 {
279 	char		share_path[MAXPGPATH];
280 	char		file_path[MAXPGPATH];
281 	FILE	   *tzFile;
282 	char		tzbuf[1024];
283 	char	   *line;
284 	tzEntry		tzentry;
285 	int			lineno = 0;
286 	bool		override = false;
287 	const char *p;
288 
289 	/*
290 	 * We enforce that the filename is all alpha characters.  This may be
291 	 * overly restrictive, but we don't want to allow access to anything
292 	 * outside the timezonesets directory, so for instance '/' *must* be
293 	 * rejected.
294 	 */
295 	for (p = filename; *p; p++)
296 	{
297 		if (!isalpha((unsigned char) *p))
298 		{
299 			/* at level 0, just use guc.c's regular "invalid value" message */
300 			if (depth > 0)
301 				GUC_check_errmsg("invalid time zone file name \"%s\"",
302 								 filename);
303 			return -1;
304 		}
305 	}
306 
307 	/*
308 	 * The maximal recursion depth is a pretty arbitrary setting. It is hard
309 	 * to imagine that someone needs more than 3 levels so stick with this
310 	 * conservative setting until someone complains.
311 	 */
312 	if (depth > 3)
313 	{
314 		GUC_check_errmsg("time zone file recursion limit exceeded in file \"%s\"",
315 						 filename);
316 		return -1;
317 	}
318 
319 	get_share_path(my_exec_path, share_path);
320 	snprintf(file_path, sizeof(file_path), "%s/timezonesets/%s",
321 			 share_path, filename);
322 	tzFile = AllocateFile(file_path, "r");
323 	if (!tzFile)
324 	{
325 		/*
326 		 * Check to see if the problem is not the filename but the directory.
327 		 * This is worth troubling over because if the installation share/
328 		 * directory is missing or unreadable, this is likely to be the first
329 		 * place we notice a problem during postmaster startup.
330 		 */
331 		int			save_errno = errno;
332 		DIR		   *tzdir;
333 
334 		snprintf(file_path, sizeof(file_path), "%s/timezonesets",
335 				 share_path);
336 		tzdir = AllocateDir(file_path);
337 		if (tzdir == NULL)
338 		{
339 			GUC_check_errmsg("could not open directory \"%s\": %m",
340 							 file_path);
341 			GUC_check_errhint("This may indicate an incomplete PostgreSQL installation, or that the file \"%s\" has been moved away from its proper location.",
342 							  my_exec_path);
343 			return -1;
344 		}
345 		FreeDir(tzdir);
346 		errno = save_errno;
347 
348 		/*
349 		 * otherwise, if file doesn't exist and it's level 0, guc.c's
350 		 * complaint is enough
351 		 */
352 		if (errno != ENOENT || depth > 0)
353 			GUC_check_errmsg("could not read time zone file \"%s\": %m",
354 							 filename);
355 
356 		return -1;
357 	}
358 
359 	while (!feof(tzFile))
360 	{
361 		lineno++;
362 		if (fgets(tzbuf, sizeof(tzbuf), tzFile) == NULL)
363 		{
364 			if (ferror(tzFile))
365 			{
366 				GUC_check_errmsg("could not read time zone file \"%s\": %m",
367 								 filename);
368 				return -1;
369 			}
370 			/* else we're at EOF after all */
371 			break;
372 		}
373 		if (strlen(tzbuf) == sizeof(tzbuf) - 1)
374 		{
375 			/* the line is too long for tzbuf */
376 			GUC_check_errmsg("line is too long in time zone file \"%s\", line %d",
377 							 filename, lineno);
378 			return -1;
379 		}
380 
381 		/* skip over whitespace */
382 		line = tzbuf;
383 		while (*line && isspace((unsigned char) *line))
384 			line++;
385 
386 		if (*line == '\0')		/* empty line */
387 			continue;
388 		if (*line == '#')		/* comment line */
389 			continue;
390 
391 		if (pg_strncasecmp(line, "@INCLUDE", strlen("@INCLUDE")) == 0)
392 		{
393 			/* pstrdup so we can use filename in result data structure */
394 			char	   *includeFile = pstrdup(line + strlen("@INCLUDE"));
395 
396 			includeFile = strtok(includeFile, WHITESPACE);
397 			if (!includeFile || !*includeFile)
398 			{
399 				GUC_check_errmsg("@INCLUDE without file name in time zone file \"%s\", line %d",
400 								 filename, lineno);
401 				return -1;
402 			}
403 			n = ParseTzFile(includeFile, depth + 1,
404 							base, arraysize, n);
405 			if (n < 0)
406 				return -1;
407 			continue;
408 		}
409 
410 		if (pg_strncasecmp(line, "@OVERRIDE", strlen("@OVERRIDE")) == 0)
411 		{
412 			override = true;
413 			continue;
414 		}
415 
416 		if (!splitTzLine(filename, lineno, line, &tzentry))
417 			return -1;
418 		if (!validateTzEntry(&tzentry))
419 			return -1;
420 		n = addToArray(base, arraysize, n, &tzentry, override);
421 		if (n < 0)
422 			return -1;
423 	}
424 
425 	FreeFile(tzFile);
426 
427 	return n;
428 }
429 
430 /*
431  * load_tzoffsets --- read and parse the specified timezone offset file
432  *
433  * On success, return a filled-in TimeZoneAbbrevTable, which must have been
434  * malloc'd not palloc'd.  On failure, return NULL, using GUC_check_errmsg
435  * and friends to give details of the problem.
436  */
437 TimeZoneAbbrevTable *
load_tzoffsets(const char * filename)438 load_tzoffsets(const char *filename)
439 {
440 	TimeZoneAbbrevTable *result = NULL;
441 	MemoryContext tmpContext;
442 	MemoryContext oldContext;
443 	tzEntry    *array;
444 	int			arraysize;
445 	int			n;
446 
447 	/*
448 	 * Create a temp memory context to work in.  This makes it easy to clean
449 	 * up afterwards.
450 	 */
451 	tmpContext = AllocSetContextCreate(CurrentMemoryContext,
452 									   "TZParserMemory",
453 									   ALLOCSET_SMALL_SIZES);
454 	oldContext = MemoryContextSwitchTo(tmpContext);
455 
456 	/* Initialize array at a reasonable size */
457 	arraysize = 128;
458 	array = (tzEntry *) palloc(arraysize * sizeof(tzEntry));
459 
460 	/* Parse the file(s) */
461 	n = ParseTzFile(filename, 0, &array, &arraysize, 0);
462 
463 	/* If no errors so far, let datetime.c allocate memory & convert format */
464 	if (n >= 0)
465 	{
466 		result = ConvertTimeZoneAbbrevs(array, n);
467 		if (!result)
468 			GUC_check_errmsg("out of memory");
469 	}
470 
471 	/* Clean up */
472 	MemoryContextSwitchTo(oldContext);
473 	MemoryContextDelete(tmpContext);
474 
475 	return result;
476 }
477