1 /*-------------------------------------------------------------------------
2  *
3  * genfile.c
4  *		Functions for direct access to files
5  *
6  *
7  * Copyright (c) 2004-2017, PostgreSQL Global Development Group
8  *
9  * Author: Andreas Pflug <pgadmin@pse-consulting.de>
10  *
11  * IDENTIFICATION
12  *	  src/backend/utils/adt/genfile.c
13  *
14  *-------------------------------------------------------------------------
15  */
16 #include "postgres.h"
17 
18 #include <sys/file.h>
19 #include <sys/stat.h>
20 #include <unistd.h>
21 #include <dirent.h>
22 
23 #include "access/htup_details.h"
24 #include "access/xlog_internal.h"
25 #include "catalog/pg_type.h"
26 #include "funcapi.h"
27 #include "mb/pg_wchar.h"
28 #include "miscadmin.h"
29 #include "postmaster/syslogger.h"
30 #include "storage/fd.h"
31 #include "utils/builtins.h"
32 #include "utils/memutils.h"
33 #include "utils/timestamp.h"
34 
35 
36 /*
37  * Convert a "text" filename argument to C string, and check it's allowable.
38  *
39  * Filename may be absolute or relative to the DataDir, but we only allow
40  * absolute paths that match DataDir or Log_directory.
41  */
42 static char *
convert_and_check_filename(text * arg)43 convert_and_check_filename(text *arg)
44 {
45 	char	   *filename;
46 
47 	filename = text_to_cstring(arg);
48 	canonicalize_path(filename);	/* filename can change length here */
49 
50 	if (is_absolute_path(filename))
51 	{
52 		/* Disallow '/a/b/data/..' */
53 		if (path_contains_parent_reference(filename))
54 			ereport(ERROR,
55 					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
56 					 (errmsg("reference to parent directory (\"..\") not allowed"))));
57 
58 		/*
59 		 * Allow absolute paths if within DataDir or Log_directory, even
60 		 * though Log_directory might be outside DataDir.
61 		 */
62 		if (!path_is_prefix_of_path(DataDir, filename) &&
63 			(!is_absolute_path(Log_directory) ||
64 			 !path_is_prefix_of_path(Log_directory, filename)))
65 			ereport(ERROR,
66 					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
67 					 (errmsg("absolute path not allowed"))));
68 	}
69 	else if (!path_is_relative_and_below_cwd(filename))
70 		ereport(ERROR,
71 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
72 				 (errmsg("path must be in or below the current directory"))));
73 
74 	return filename;
75 }
76 
77 
78 /*
79  * Read a section of a file, returning it as bytea
80  *
81  * Caller is responsible for all permissions checking.
82  *
83  * We read the whole of the file when bytes_to_read is negative.
84  */
85 static bytea *
read_binary_file(const char * filename,int64 seek_offset,int64 bytes_to_read,bool missing_ok)86 read_binary_file(const char *filename, int64 seek_offset, int64 bytes_to_read,
87 				 bool missing_ok)
88 {
89 	bytea	   *buf;
90 	size_t		nbytes;
91 	FILE	   *file;
92 
93 	if (bytes_to_read < 0)
94 	{
95 		if (seek_offset < 0)
96 			bytes_to_read = -seek_offset;
97 		else
98 		{
99 			struct stat fst;
100 
101 			if (stat(filename, &fst) < 0)
102 			{
103 				if (missing_ok && errno == ENOENT)
104 					return NULL;
105 				else
106 					ereport(ERROR,
107 							(errcode_for_file_access(),
108 							 errmsg("could not stat file \"%s\": %m", filename)));
109 			}
110 
111 			bytes_to_read = fst.st_size - seek_offset;
112 		}
113 	}
114 
115 	/* not sure why anyone thought that int64 length was a good idea */
116 	if (bytes_to_read > (MaxAllocSize - VARHDRSZ))
117 		ereport(ERROR,
118 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
119 				 errmsg("requested length too large")));
120 
121 	if ((file = AllocateFile(filename, PG_BINARY_R)) == NULL)
122 	{
123 		if (missing_ok && errno == ENOENT)
124 			return NULL;
125 		else
126 			ereport(ERROR,
127 					(errcode_for_file_access(),
128 					 errmsg("could not open file \"%s\" for reading: %m",
129 							filename)));
130 	}
131 
132 	if (fseeko(file, (off_t) seek_offset,
133 			   (seek_offset >= 0) ? SEEK_SET : SEEK_END) != 0)
134 		ereport(ERROR,
135 				(errcode_for_file_access(),
136 				 errmsg("could not seek in file \"%s\": %m", filename)));
137 
138 	buf = (bytea *) palloc((Size) bytes_to_read + VARHDRSZ);
139 
140 	nbytes = fread(VARDATA(buf), 1, (size_t) bytes_to_read, file);
141 
142 	if (ferror(file))
143 		ereport(ERROR,
144 				(errcode_for_file_access(),
145 				 errmsg("could not read file \"%s\": %m", filename)));
146 
147 	SET_VARSIZE(buf, nbytes + VARHDRSZ);
148 
149 	FreeFile(file);
150 
151 	return buf;
152 }
153 
154 /*
155  * Similar to read_binary_file, but we verify that the contents are valid
156  * in the database encoding.
157  */
158 static text *
read_text_file(const char * filename,int64 seek_offset,int64 bytes_to_read,bool missing_ok)159 read_text_file(const char *filename, int64 seek_offset, int64 bytes_to_read,
160 			   bool missing_ok)
161 {
162 	bytea	   *buf;
163 
164 	buf = read_binary_file(filename, seek_offset, bytes_to_read, missing_ok);
165 
166 	if (buf != NULL)
167 	{
168 		/* Make sure the input is valid */
169 		pg_verifymbstr(VARDATA(buf), VARSIZE(buf) - VARHDRSZ, false);
170 
171 		/* OK, we can cast it to text safely */
172 		return (text *) buf;
173 	}
174 	else
175 		return NULL;
176 }
177 
178 /*
179  * Read a section of a file, returning it as text
180  */
181 Datum
pg_read_file(PG_FUNCTION_ARGS)182 pg_read_file(PG_FUNCTION_ARGS)
183 {
184 	text	   *filename_t = PG_GETARG_TEXT_PP(0);
185 	int64		seek_offset = 0;
186 	int64		bytes_to_read = -1;
187 	bool		missing_ok = false;
188 	char	   *filename;
189 	text	   *result;
190 
191 	if (!superuser())
192 		ereport(ERROR,
193 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
194 				 (errmsg("must be superuser to read files"))));
195 
196 	/* handle optional arguments */
197 	if (PG_NARGS() >= 3)
198 	{
199 		seek_offset = PG_GETARG_INT64(1);
200 		bytes_to_read = PG_GETARG_INT64(2);
201 
202 		if (bytes_to_read < 0)
203 			ereport(ERROR,
204 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
205 					 errmsg("requested length cannot be negative")));
206 	}
207 	if (PG_NARGS() >= 4)
208 		missing_ok = PG_GETARG_BOOL(3);
209 
210 	filename = convert_and_check_filename(filename_t);
211 
212 	result = read_text_file(filename, seek_offset, bytes_to_read, missing_ok);
213 	if (result)
214 		PG_RETURN_TEXT_P(result);
215 	else
216 		PG_RETURN_NULL();
217 }
218 
219 /*
220  * Read a section of a file, returning it as bytea
221  */
222 Datum
pg_read_binary_file(PG_FUNCTION_ARGS)223 pg_read_binary_file(PG_FUNCTION_ARGS)
224 {
225 	text	   *filename_t = PG_GETARG_TEXT_PP(0);
226 	int64		seek_offset = 0;
227 	int64		bytes_to_read = -1;
228 	bool		missing_ok = false;
229 	char	   *filename;
230 	bytea	   *result;
231 
232 	if (!superuser())
233 		ereport(ERROR,
234 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
235 				 (errmsg("must be superuser to read files"))));
236 
237 	/* handle optional arguments */
238 	if (PG_NARGS() >= 3)
239 	{
240 		seek_offset = PG_GETARG_INT64(1);
241 		bytes_to_read = PG_GETARG_INT64(2);
242 
243 		if (bytes_to_read < 0)
244 			ereport(ERROR,
245 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
246 					 errmsg("requested length cannot be negative")));
247 	}
248 	if (PG_NARGS() >= 4)
249 		missing_ok = PG_GETARG_BOOL(3);
250 
251 	filename = convert_and_check_filename(filename_t);
252 
253 	result = read_binary_file(filename, seek_offset,
254 							  bytes_to_read, missing_ok);
255 	if (result)
256 		PG_RETURN_BYTEA_P(result);
257 	else
258 		PG_RETURN_NULL();
259 }
260 
261 
262 /*
263  * Wrapper functions for the 1 and 3 argument variants of pg_read_file()
264  * and pg_binary_read_file().
265  *
266  * These are necessary to pass the sanity check in opr_sanity, which checks
267  * that all built-in functions that share the implementing C function take
268  * the same number of arguments.
269  */
270 Datum
pg_read_file_off_len(PG_FUNCTION_ARGS)271 pg_read_file_off_len(PG_FUNCTION_ARGS)
272 {
273 	return pg_read_file(fcinfo);
274 }
275 
276 Datum
pg_read_file_all(PG_FUNCTION_ARGS)277 pg_read_file_all(PG_FUNCTION_ARGS)
278 {
279 	return pg_read_file(fcinfo);
280 }
281 
282 Datum
pg_read_binary_file_off_len(PG_FUNCTION_ARGS)283 pg_read_binary_file_off_len(PG_FUNCTION_ARGS)
284 {
285 	return pg_read_binary_file(fcinfo);
286 }
287 
288 Datum
pg_read_binary_file_all(PG_FUNCTION_ARGS)289 pg_read_binary_file_all(PG_FUNCTION_ARGS)
290 {
291 	return pg_read_binary_file(fcinfo);
292 }
293 
294 /*
295  * stat a file
296  */
297 Datum
pg_stat_file(PG_FUNCTION_ARGS)298 pg_stat_file(PG_FUNCTION_ARGS)
299 {
300 	text	   *filename_t = PG_GETARG_TEXT_PP(0);
301 	char	   *filename;
302 	struct stat fst;
303 	Datum		values[6];
304 	bool		isnull[6];
305 	HeapTuple	tuple;
306 	TupleDesc	tupdesc;
307 	bool		missing_ok = false;
308 
309 	if (!superuser())
310 		ereport(ERROR,
311 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
312 				 (errmsg("must be superuser to get file information"))));
313 
314 	/* check the optional argument */
315 	if (PG_NARGS() == 2)
316 		missing_ok = PG_GETARG_BOOL(1);
317 
318 	filename = convert_and_check_filename(filename_t);
319 
320 	if (stat(filename, &fst) < 0)
321 	{
322 		if (missing_ok && errno == ENOENT)
323 			PG_RETURN_NULL();
324 		else
325 			ereport(ERROR,
326 					(errcode_for_file_access(),
327 					 errmsg("could not stat file \"%s\": %m", filename)));
328 	}
329 
330 	/*
331 	 * This record type had better match the output parameters declared for me
332 	 * in pg_proc.h.
333 	 */
334 	tupdesc = CreateTemplateTupleDesc(6, false);
335 	TupleDescInitEntry(tupdesc, (AttrNumber) 1,
336 					   "size", INT8OID, -1, 0);
337 	TupleDescInitEntry(tupdesc, (AttrNumber) 2,
338 					   "access", TIMESTAMPTZOID, -1, 0);
339 	TupleDescInitEntry(tupdesc, (AttrNumber) 3,
340 					   "modification", TIMESTAMPTZOID, -1, 0);
341 	TupleDescInitEntry(tupdesc, (AttrNumber) 4,
342 					   "change", TIMESTAMPTZOID, -1, 0);
343 	TupleDescInitEntry(tupdesc, (AttrNumber) 5,
344 					   "creation", TIMESTAMPTZOID, -1, 0);
345 	TupleDescInitEntry(tupdesc, (AttrNumber) 6,
346 					   "isdir", BOOLOID, -1, 0);
347 	BlessTupleDesc(tupdesc);
348 
349 	memset(isnull, false, sizeof(isnull));
350 
351 	values[0] = Int64GetDatum((int64) fst.st_size);
352 	values[1] = TimestampTzGetDatum(time_t_to_timestamptz(fst.st_atime));
353 	values[2] = TimestampTzGetDatum(time_t_to_timestamptz(fst.st_mtime));
354 	/* Unix has file status change time, while Win32 has creation time */
355 #if !defined(WIN32) && !defined(__CYGWIN__)
356 	values[3] = TimestampTzGetDatum(time_t_to_timestamptz(fst.st_ctime));
357 	isnull[4] = true;
358 #else
359 	isnull[3] = true;
360 	values[4] = TimestampTzGetDatum(time_t_to_timestamptz(fst.st_ctime));
361 #endif
362 	values[5] = BoolGetDatum(S_ISDIR(fst.st_mode));
363 
364 	tuple = heap_form_tuple(tupdesc, values, isnull);
365 
366 	pfree(filename);
367 
368 	PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
369 }
370 
371 /*
372  * stat a file (1 argument version)
373  *
374  * note: this wrapper is necessary to pass the sanity check in opr_sanity,
375  * which checks that all built-in functions that share the implementing C
376  * function take the same number of arguments
377  */
378 Datum
pg_stat_file_1arg(PG_FUNCTION_ARGS)379 pg_stat_file_1arg(PG_FUNCTION_ARGS)
380 {
381 	return pg_stat_file(fcinfo);
382 }
383 
384 /*
385  * List a directory (returns the filenames only)
386  */
387 Datum
pg_ls_dir(PG_FUNCTION_ARGS)388 pg_ls_dir(PG_FUNCTION_ARGS)
389 {
390 	ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
391 	char	   *location;
392 	bool		missing_ok = false;
393 	bool		include_dot_dirs = false;
394 	bool		randomAccess;
395 	TupleDesc	tupdesc;
396 	Tuplestorestate *tupstore;
397 	DIR		   *dirdesc;
398 	struct dirent *de;
399 	MemoryContext oldcontext;
400 
401 	if (!superuser())
402 		ereport(ERROR,
403 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
404 				 (errmsg("must be superuser to get directory listings"))));
405 
406 	location = convert_and_check_filename(PG_GETARG_TEXT_PP(0));
407 
408 	/* check the optional arguments */
409 	if (PG_NARGS() == 3)
410 	{
411 		if (!PG_ARGISNULL(1))
412 			missing_ok = PG_GETARG_BOOL(1);
413 		if (!PG_ARGISNULL(2))
414 			include_dot_dirs = PG_GETARG_BOOL(2);
415 	}
416 
417 	/* check to see if caller supports us returning a tuplestore */
418 	if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
419 		ereport(ERROR,
420 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
421 				 errmsg("set-valued function called in context that cannot accept a set")));
422 	if (!(rsinfo->allowedModes & SFRM_Materialize))
423 		ereport(ERROR,
424 				(errcode(ERRCODE_SYNTAX_ERROR),
425 				 errmsg("materialize mode required, but it is not allowed in this context")));
426 
427 	/* The tupdesc and tuplestore must be created in ecxt_per_query_memory */
428 	oldcontext = MemoryContextSwitchTo(rsinfo->econtext->ecxt_per_query_memory);
429 
430 	tupdesc = CreateTemplateTupleDesc(1, false);
431 	TupleDescInitEntry(tupdesc, (AttrNumber) 1, "pg_ls_dir", TEXTOID, -1, 0);
432 
433 	randomAccess = (rsinfo->allowedModes & SFRM_Materialize_Random) != 0;
434 	tupstore = tuplestore_begin_heap(randomAccess, false, work_mem);
435 	rsinfo->returnMode = SFRM_Materialize;
436 	rsinfo->setResult = tupstore;
437 	rsinfo->setDesc = tupdesc;
438 
439 	MemoryContextSwitchTo(oldcontext);
440 
441 	dirdesc = AllocateDir(location);
442 	if (!dirdesc)
443 	{
444 		/* Return empty tuplestore if appropriate */
445 		if (missing_ok && errno == ENOENT)
446 			return (Datum) 0;
447 		/* Otherwise, we can let ReadDir() throw the error */
448 	}
449 
450 	while ((de = ReadDir(dirdesc, location)) != NULL)
451 	{
452 		Datum		values[1];
453 		bool		nulls[1];
454 
455 		if (!include_dot_dirs &&
456 			(strcmp(de->d_name, ".") == 0 ||
457 			 strcmp(de->d_name, "..") == 0))
458 			continue;
459 
460 		values[0] = CStringGetTextDatum(de->d_name);
461 		nulls[0] = false;
462 
463 		tuplestore_putvalues(tupstore, tupdesc, values, nulls);
464 	}
465 
466 	FreeDir(dirdesc);
467 	return (Datum) 0;
468 }
469 
470 /*
471  * List a directory (1 argument version)
472  *
473  * note: this wrapper is necessary to pass the sanity check in opr_sanity,
474  * which checks that all built-in functions that share the implementing C
475  * function take the same number of arguments.
476  */
477 Datum
pg_ls_dir_1arg(PG_FUNCTION_ARGS)478 pg_ls_dir_1arg(PG_FUNCTION_ARGS)
479 {
480 	return pg_ls_dir(fcinfo);
481 }
482 
483 /*
484  * Generic function to return a directory listing of files.
485  */
486 static Datum
pg_ls_dir_files(FunctionCallInfo fcinfo,const char * dir)487 pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir)
488 {
489 	ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
490 	bool		randomAccess;
491 	TupleDesc	tupdesc;
492 	Tuplestorestate *tupstore;
493 	DIR		   *dirdesc;
494 	struct dirent *de;
495 	MemoryContext oldcontext;
496 
497 	/* check to see if caller supports us returning a tuplestore */
498 	if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
499 		ereport(ERROR,
500 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
501 				 errmsg("set-valued function called in context that cannot accept a set")));
502 	if (!(rsinfo->allowedModes & SFRM_Materialize))
503 		ereport(ERROR,
504 				(errcode(ERRCODE_SYNTAX_ERROR),
505 				 errmsg("materialize mode required, but it is not allowed in this context")));
506 
507 	/* The tupdesc and tuplestore must be created in ecxt_per_query_memory */
508 	oldcontext = MemoryContextSwitchTo(rsinfo->econtext->ecxt_per_query_memory);
509 
510 	if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
511 		elog(ERROR, "return type must be a row type");
512 
513 	randomAccess = (rsinfo->allowedModes & SFRM_Materialize_Random) != 0;
514 	tupstore = tuplestore_begin_heap(randomAccess, false, work_mem);
515 	rsinfo->returnMode = SFRM_Materialize;
516 	rsinfo->setResult = tupstore;
517 	rsinfo->setDesc = tupdesc;
518 
519 	MemoryContextSwitchTo(oldcontext);
520 
521 	/*
522 	 * Now walk the directory.  Note that we must do this within a single SRF
523 	 * call, not leave the directory open across multiple calls, since we
524 	 * can't count on the SRF being run to completion.
525 	 */
526 	dirdesc = AllocateDir(dir);
527 	while ((de = ReadDir(dirdesc, dir)) != NULL)
528 	{
529 		Datum		values[3];
530 		bool		nulls[3];
531 		char		path[MAXPGPATH * 2];
532 		struct stat attrib;
533 
534 		/* Skip hidden files */
535 		if (de->d_name[0] == '.')
536 			continue;
537 
538 		/* Get the file info */
539 		snprintf(path, sizeof(path), "%s/%s", dir, de->d_name);
540 		if (stat(path, &attrib) < 0)
541 		{
542 			/* Ignore concurrently-deleted files, else complain */
543 			if (errno == ENOENT)
544 				continue;
545 			ereport(ERROR,
546 					(errcode_for_file_access(),
547 					 errmsg("could not stat file \"%s\": %m", path)));
548 		}
549 
550 		/* Ignore anything but regular files */
551 		if (!S_ISREG(attrib.st_mode))
552 			continue;
553 
554 		values[0] = CStringGetTextDatum(de->d_name);
555 		values[1] = Int64GetDatum((int64) attrib.st_size);
556 		values[2] = TimestampTzGetDatum(time_t_to_timestamptz(attrib.st_mtime));
557 		memset(nulls, 0, sizeof(nulls));
558 
559 		tuplestore_putvalues(tupstore, tupdesc, values, nulls);
560 	}
561 
562 	FreeDir(dirdesc);
563 	return (Datum) 0;
564 }
565 
566 /* Function to return the list of files in the log directory */
567 Datum
pg_ls_logdir(PG_FUNCTION_ARGS)568 pg_ls_logdir(PG_FUNCTION_ARGS)
569 {
570 	return pg_ls_dir_files(fcinfo, Log_directory);
571 }
572 
573 /* Function to return the list of files in the WAL directory */
574 Datum
pg_ls_waldir(PG_FUNCTION_ARGS)575 pg_ls_waldir(PG_FUNCTION_ARGS)
576 {
577 	return pg_ls_dir_files(fcinfo, XLOGDIR);
578 }
579