1 /*-------------------------------------------------------------------------
2 *
3 * genfile.c
4 * Functions for direct access to files
5 *
6 *
7 * Copyright (c) 2004-2018, PostgreSQL Global Development Group
8 *
9 * Author: Andreas Pflug <pgadmin@pse-consulting.de>
10 *
11 * IDENTIFICATION
12 * src/backend/utils/adt/genfile.c
13 *
14 *-------------------------------------------------------------------------
15 */
16 #include "postgres.h"
17
18 #include <sys/file.h>
19 #include <sys/stat.h>
20 #include <unistd.h>
21 #include <dirent.h>
22
23 #include "access/htup_details.h"
24 #include "access/xlog_internal.h"
25 #include "catalog/pg_authid.h"
26 #include "catalog/pg_type.h"
27 #include "funcapi.h"
28 #include "mb/pg_wchar.h"
29 #include "miscadmin.h"
30 #include "postmaster/syslogger.h"
31 #include "storage/fd.h"
32 #include "utils/builtins.h"
33 #include "utils/memutils.h"
34 #include "utils/timestamp.h"
35
36
37 /*
38 * Convert a "text" filename argument to C string, and check it's allowable.
39 *
40 * Filename may be absolute or relative to the DataDir, but we only allow
41 * absolute paths that match DataDir or Log_directory.
42 *
43 * This does a privilege check against the 'pg_read_server_files' role, so
44 * this function is really only appropriate for callers who are only checking
45 * 'read' access. Do not use this function if you are looking for a check
46 * for 'write' or 'program' access without updating it to access the type
47 * of check as an argument and checking the appropriate role membership.
48 */
49 static char *
convert_and_check_filename(text * arg)50 convert_and_check_filename(text *arg)
51 {
52 char *filename;
53
54 filename = text_to_cstring(arg);
55 canonicalize_path(filename); /* filename can change length here */
56
57 /*
58 * Members of the 'pg_read_server_files' role are allowed to access any
59 * files on the server as the PG user, so no need to do any further checks
60 * here.
61 */
62 if (is_member_of_role(GetUserId(), DEFAULT_ROLE_READ_SERVER_FILES))
63 return filename;
64
65 /* User isn't a member of the default role, so check if it's allowable */
66 if (is_absolute_path(filename))
67 {
68 /* Disallow '/a/b/data/..' */
69 if (path_contains_parent_reference(filename))
70 ereport(ERROR,
71 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
72 (errmsg("reference to parent directory (\"..\") not allowed"))));
73
74 /*
75 * Allow absolute paths if within DataDir or Log_directory, even
76 * though Log_directory might be outside DataDir.
77 */
78 if (!path_is_prefix_of_path(DataDir, filename) &&
79 (!is_absolute_path(Log_directory) ||
80 !path_is_prefix_of_path(Log_directory, filename)))
81 ereport(ERROR,
82 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
83 (errmsg("absolute path not allowed"))));
84 }
85 else if (!path_is_relative_and_below_cwd(filename))
86 ereport(ERROR,
87 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
88 (errmsg("path must be in or below the current directory"))));
89
90 return filename;
91 }
92
93
94 /*
95 * Read a section of a file, returning it as bytea
96 *
97 * Caller is responsible for all permissions checking.
98 *
99 * We read the whole of the file when bytes_to_read is negative.
100 */
101 static bytea *
read_binary_file(const char * filename,int64 seek_offset,int64 bytes_to_read,bool missing_ok)102 read_binary_file(const char *filename, int64 seek_offset, int64 bytes_to_read,
103 bool missing_ok)
104 {
105 bytea *buf;
106 size_t nbytes = 0;
107 FILE *file;
108
109 /* clamp request size to what we can actually deliver */
110 if (bytes_to_read > (int64) (MaxAllocSize - VARHDRSZ))
111 ereport(ERROR,
112 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
113 errmsg("requested length too large")));
114
115 if ((file = AllocateFile(filename, PG_BINARY_R)) == NULL)
116 {
117 if (missing_ok && errno == ENOENT)
118 return NULL;
119 else
120 ereport(ERROR,
121 (errcode_for_file_access(),
122 errmsg("could not open file \"%s\" for reading: %m",
123 filename)));
124 }
125
126 if (fseeko(file, (off_t) seek_offset,
127 (seek_offset >= 0) ? SEEK_SET : SEEK_END) != 0)
128 ereport(ERROR,
129 (errcode_for_file_access(),
130 errmsg("could not seek in file \"%s\": %m", filename)));
131
132 if (bytes_to_read >= 0)
133 {
134 /* If passed explicit read size just do it */
135 buf = (bytea *) palloc((Size) bytes_to_read + VARHDRSZ);
136
137 nbytes = fread(VARDATA(buf), 1, (size_t) bytes_to_read, file);
138 }
139 else
140 {
141 /* Negative read size, read rest of file */
142 StringInfoData sbuf;
143
144 initStringInfo(&sbuf);
145 /* Leave room in the buffer for the varlena length word */
146 sbuf.len += VARHDRSZ;
147 Assert(sbuf.len < sbuf.maxlen);
148
149 while (!(feof(file) || ferror(file)))
150 {
151 size_t rbytes;
152
153 /* Minimum amount to read at a time */
154 #define MIN_READ_SIZE 4096
155
156 /*
157 * If not at end of file, and sbuf.len is equal to
158 * MaxAllocSize - 1, then either the file is too large, or
159 * there is nothing left to read. Attempt to read one more
160 * byte to see if the end of file has been reached. If not,
161 * the file is too large; we'd rather give the error message
162 * for that ourselves.
163 */
164 if (sbuf.len == MaxAllocSize - 1)
165 {
166 char rbuf[1];
167
168 if (fread(rbuf, 1, 1, file) != 0 || !feof(file))
169 ereport(ERROR,
170 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
171 errmsg("file length too large")));
172 else
173 break;
174 }
175
176 /* OK, ensure that we can read at least MIN_READ_SIZE */
177 enlargeStringInfo(&sbuf, MIN_READ_SIZE);
178
179 /*
180 * stringinfo.c likes to allocate in powers of 2, so it's likely
181 * that much more space is available than we asked for. Use all
182 * of it, rather than making more fread calls than necessary.
183 */
184 rbytes = fread(sbuf.data + sbuf.len, 1,
185 (size_t) (sbuf.maxlen - sbuf.len - 1), file);
186 sbuf.len += rbytes;
187 nbytes += rbytes;
188 }
189
190 /* Now we can commandeer the stringinfo's buffer as the result */
191 buf = (bytea *) sbuf.data;
192 }
193
194 if (ferror(file))
195 ereport(ERROR,
196 (errcode_for_file_access(),
197 errmsg("could not read file \"%s\": %m", filename)));
198
199 SET_VARSIZE(buf, nbytes + VARHDRSZ);
200
201 FreeFile(file);
202
203 return buf;
204 }
205
206 /*
207 * Similar to read_binary_file, but we verify that the contents are valid
208 * in the database encoding.
209 */
210 static text *
read_text_file(const char * filename,int64 seek_offset,int64 bytes_to_read,bool missing_ok)211 read_text_file(const char *filename, int64 seek_offset, int64 bytes_to_read,
212 bool missing_ok)
213 {
214 bytea *buf;
215
216 buf = read_binary_file(filename, seek_offset, bytes_to_read, missing_ok);
217
218 if (buf != NULL)
219 {
220 /* Make sure the input is valid */
221 pg_verifymbstr(VARDATA(buf), VARSIZE(buf) - VARHDRSZ, false);
222
223 /* OK, we can cast it to text safely */
224 return (text *) buf;
225 }
226 else
227 return NULL;
228 }
229
230 /*
231 * Read a section of a file, returning it as text
232 *
233 * This function is kept to support adminpack 1.0.
234 */
235 Datum
pg_read_file(PG_FUNCTION_ARGS)236 pg_read_file(PG_FUNCTION_ARGS)
237 {
238 text *filename_t = PG_GETARG_TEXT_PP(0);
239 int64 seek_offset = 0;
240 int64 bytes_to_read = -1;
241 bool missing_ok = false;
242 char *filename;
243 text *result;
244
245 if (!superuser())
246 ereport(ERROR,
247 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
248 (errmsg("must be superuser to read files with adminpack 1.0"),
249 errhint("Consider using pg_read_file(), which is part of core, instead."))));
250
251 /* handle optional arguments */
252 if (PG_NARGS() >= 3)
253 {
254 seek_offset = PG_GETARG_INT64(1);
255 bytes_to_read = PG_GETARG_INT64(2);
256
257 if (bytes_to_read < 0)
258 ereport(ERROR,
259 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
260 errmsg("requested length cannot be negative")));
261 }
262 if (PG_NARGS() >= 4)
263 missing_ok = PG_GETARG_BOOL(3);
264
265 filename = convert_and_check_filename(filename_t);
266
267 result = read_text_file(filename, seek_offset, bytes_to_read, missing_ok);
268 if (result)
269 PG_RETURN_TEXT_P(result);
270 else
271 PG_RETURN_NULL();
272 }
273
274 /*
275 * Read a section of a file, returning it as text
276 *
277 * No superuser check done here- instead privileges are handled by the
278 * GRANT system.
279 */
280 Datum
pg_read_file_v2(PG_FUNCTION_ARGS)281 pg_read_file_v2(PG_FUNCTION_ARGS)
282 {
283 text *filename_t = PG_GETARG_TEXT_PP(0);
284 int64 seek_offset = 0;
285 int64 bytes_to_read = -1;
286 bool missing_ok = false;
287 char *filename;
288 text *result;
289
290 /* handle optional arguments */
291 if (PG_NARGS() >= 3)
292 {
293 seek_offset = PG_GETARG_INT64(1);
294 bytes_to_read = PG_GETARG_INT64(2);
295
296 if (bytes_to_read < 0)
297 ereport(ERROR,
298 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
299 errmsg("requested length cannot be negative")));
300 }
301 if (PG_NARGS() >= 4)
302 missing_ok = PG_GETARG_BOOL(3);
303
304 filename = convert_and_check_filename(filename_t);
305
306 result = read_text_file(filename, seek_offset, bytes_to_read, missing_ok);
307 if (result)
308 PG_RETURN_TEXT_P(result);
309 else
310 PG_RETURN_NULL();
311 }
312
313 /*
314 * Read a section of a file, returning it as bytea
315 */
316 Datum
pg_read_binary_file(PG_FUNCTION_ARGS)317 pg_read_binary_file(PG_FUNCTION_ARGS)
318 {
319 text *filename_t = PG_GETARG_TEXT_PP(0);
320 int64 seek_offset = 0;
321 int64 bytes_to_read = -1;
322 bool missing_ok = false;
323 char *filename;
324 bytea *result;
325
326 /* handle optional arguments */
327 if (PG_NARGS() >= 3)
328 {
329 seek_offset = PG_GETARG_INT64(1);
330 bytes_to_read = PG_GETARG_INT64(2);
331
332 if (bytes_to_read < 0)
333 ereport(ERROR,
334 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
335 errmsg("requested length cannot be negative")));
336 }
337 if (PG_NARGS() >= 4)
338 missing_ok = PG_GETARG_BOOL(3);
339
340 filename = convert_and_check_filename(filename_t);
341
342 result = read_binary_file(filename, seek_offset,
343 bytes_to_read, missing_ok);
344 if (result)
345 PG_RETURN_BYTEA_P(result);
346 else
347 PG_RETURN_NULL();
348 }
349
350
351 /*
352 * Wrapper functions for the 1 and 3 argument variants of pg_read_file_v2()
353 * and pg_binary_read_file().
354 *
355 * These are necessary to pass the sanity check in opr_sanity, which checks
356 * that all built-in functions that share the implementing C function take
357 * the same number of arguments.
358 */
359 Datum
pg_read_file_off_len(PG_FUNCTION_ARGS)360 pg_read_file_off_len(PG_FUNCTION_ARGS)
361 {
362 return pg_read_file_v2(fcinfo);
363 }
364
365 Datum
pg_read_file_all(PG_FUNCTION_ARGS)366 pg_read_file_all(PG_FUNCTION_ARGS)
367 {
368 return pg_read_file_v2(fcinfo);
369 }
370
371 Datum
pg_read_binary_file_off_len(PG_FUNCTION_ARGS)372 pg_read_binary_file_off_len(PG_FUNCTION_ARGS)
373 {
374 return pg_read_binary_file(fcinfo);
375 }
376
377 Datum
pg_read_binary_file_all(PG_FUNCTION_ARGS)378 pg_read_binary_file_all(PG_FUNCTION_ARGS)
379 {
380 return pg_read_binary_file(fcinfo);
381 }
382
383 /*
384 * stat a file
385 */
386 Datum
pg_stat_file(PG_FUNCTION_ARGS)387 pg_stat_file(PG_FUNCTION_ARGS)
388 {
389 text *filename_t = PG_GETARG_TEXT_PP(0);
390 char *filename;
391 struct stat fst;
392 Datum values[6];
393 bool isnull[6];
394 HeapTuple tuple;
395 TupleDesc tupdesc;
396 bool missing_ok = false;
397
398 /* check the optional argument */
399 if (PG_NARGS() == 2)
400 missing_ok = PG_GETARG_BOOL(1);
401
402 filename = convert_and_check_filename(filename_t);
403
404 if (stat(filename, &fst) < 0)
405 {
406 if (missing_ok && errno == ENOENT)
407 PG_RETURN_NULL();
408 else
409 ereport(ERROR,
410 (errcode_for_file_access(),
411 errmsg("could not stat file \"%s\": %m", filename)));
412 }
413
414 /*
415 * This record type had better match the output parameters declared for me
416 * in pg_proc.h.
417 */
418 tupdesc = CreateTemplateTupleDesc(6, false);
419 TupleDescInitEntry(tupdesc, (AttrNumber) 1,
420 "size", INT8OID, -1, 0);
421 TupleDescInitEntry(tupdesc, (AttrNumber) 2,
422 "access", TIMESTAMPTZOID, -1, 0);
423 TupleDescInitEntry(tupdesc, (AttrNumber) 3,
424 "modification", TIMESTAMPTZOID, -1, 0);
425 TupleDescInitEntry(tupdesc, (AttrNumber) 4,
426 "change", TIMESTAMPTZOID, -1, 0);
427 TupleDescInitEntry(tupdesc, (AttrNumber) 5,
428 "creation", TIMESTAMPTZOID, -1, 0);
429 TupleDescInitEntry(tupdesc, (AttrNumber) 6,
430 "isdir", BOOLOID, -1, 0);
431 BlessTupleDesc(tupdesc);
432
433 memset(isnull, false, sizeof(isnull));
434
435 values[0] = Int64GetDatum((int64) fst.st_size);
436 values[1] = TimestampTzGetDatum(time_t_to_timestamptz(fst.st_atime));
437 values[2] = TimestampTzGetDatum(time_t_to_timestamptz(fst.st_mtime));
438 /* Unix has file status change time, while Win32 has creation time */
439 #if !defined(WIN32) && !defined(__CYGWIN__)
440 values[3] = TimestampTzGetDatum(time_t_to_timestamptz(fst.st_ctime));
441 isnull[4] = true;
442 #else
443 isnull[3] = true;
444 values[4] = TimestampTzGetDatum(time_t_to_timestamptz(fst.st_ctime));
445 #endif
446 values[5] = BoolGetDatum(S_ISDIR(fst.st_mode));
447
448 tuple = heap_form_tuple(tupdesc, values, isnull);
449
450 pfree(filename);
451
452 PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
453 }
454
455 /*
456 * stat a file (1 argument version)
457 *
458 * note: this wrapper is necessary to pass the sanity check in opr_sanity,
459 * which checks that all built-in functions that share the implementing C
460 * function take the same number of arguments
461 */
462 Datum
pg_stat_file_1arg(PG_FUNCTION_ARGS)463 pg_stat_file_1arg(PG_FUNCTION_ARGS)
464 {
465 return pg_stat_file(fcinfo);
466 }
467
468 /*
469 * List a directory (returns the filenames only)
470 */
471 Datum
pg_ls_dir(PG_FUNCTION_ARGS)472 pg_ls_dir(PG_FUNCTION_ARGS)
473 {
474 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
475 char *location;
476 bool missing_ok = false;
477 bool include_dot_dirs = false;
478 bool randomAccess;
479 TupleDesc tupdesc;
480 Tuplestorestate *tupstore;
481 DIR *dirdesc;
482 struct dirent *de;
483 MemoryContext oldcontext;
484
485 location = convert_and_check_filename(PG_GETARG_TEXT_PP(0));
486
487 /* check the optional arguments */
488 if (PG_NARGS() == 3)
489 {
490 if (!PG_ARGISNULL(1))
491 missing_ok = PG_GETARG_BOOL(1);
492 if (!PG_ARGISNULL(2))
493 include_dot_dirs = PG_GETARG_BOOL(2);
494 }
495
496 /* check to see if caller supports us returning a tuplestore */
497 if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
498 ereport(ERROR,
499 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
500 errmsg("set-valued function called in context that cannot accept a set")));
501 if (!(rsinfo->allowedModes & SFRM_Materialize))
502 ereport(ERROR,
503 (errcode(ERRCODE_SYNTAX_ERROR),
504 errmsg("materialize mode required, but it is not allowed in this context")));
505
506 /* The tupdesc and tuplestore must be created in ecxt_per_query_memory */
507 oldcontext = MemoryContextSwitchTo(rsinfo->econtext->ecxt_per_query_memory);
508
509 tupdesc = CreateTemplateTupleDesc(1, false);
510 TupleDescInitEntry(tupdesc, (AttrNumber) 1, "pg_ls_dir", TEXTOID, -1, 0);
511
512 randomAccess = (rsinfo->allowedModes & SFRM_Materialize_Random) != 0;
513 tupstore = tuplestore_begin_heap(randomAccess, false, work_mem);
514 rsinfo->returnMode = SFRM_Materialize;
515 rsinfo->setResult = tupstore;
516 rsinfo->setDesc = tupdesc;
517
518 MemoryContextSwitchTo(oldcontext);
519
520 dirdesc = AllocateDir(location);
521 if (!dirdesc)
522 {
523 /* Return empty tuplestore if appropriate */
524 if (missing_ok && errno == ENOENT)
525 return (Datum) 0;
526 /* Otherwise, we can let ReadDir() throw the error */
527 }
528
529 while ((de = ReadDir(dirdesc, location)) != NULL)
530 {
531 Datum values[1];
532 bool nulls[1];
533
534 if (!include_dot_dirs &&
535 (strcmp(de->d_name, ".") == 0 ||
536 strcmp(de->d_name, "..") == 0))
537 continue;
538
539 values[0] = CStringGetTextDatum(de->d_name);
540 nulls[0] = false;
541
542 tuplestore_putvalues(tupstore, tupdesc, values, nulls);
543 }
544
545 FreeDir(dirdesc);
546 return (Datum) 0;
547 }
548
549 /*
550 * List a directory (1 argument version)
551 *
552 * note: this wrapper is necessary to pass the sanity check in opr_sanity,
553 * which checks that all built-in functions that share the implementing C
554 * function take the same number of arguments.
555 */
556 Datum
pg_ls_dir_1arg(PG_FUNCTION_ARGS)557 pg_ls_dir_1arg(PG_FUNCTION_ARGS)
558 {
559 return pg_ls_dir(fcinfo);
560 }
561
562 /*
563 * Generic function to return a directory listing of files.
564 */
565 static Datum
pg_ls_dir_files(FunctionCallInfo fcinfo,const char * dir)566 pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir)
567 {
568 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
569 bool randomAccess;
570 TupleDesc tupdesc;
571 Tuplestorestate *tupstore;
572 DIR *dirdesc;
573 struct dirent *de;
574 MemoryContext oldcontext;
575
576 /* check to see if caller supports us returning a tuplestore */
577 if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
578 ereport(ERROR,
579 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
580 errmsg("set-valued function called in context that cannot accept a set")));
581 if (!(rsinfo->allowedModes & SFRM_Materialize))
582 ereport(ERROR,
583 (errcode(ERRCODE_SYNTAX_ERROR),
584 errmsg("materialize mode required, but it is not allowed in this context")));
585
586 /* The tupdesc and tuplestore must be created in ecxt_per_query_memory */
587 oldcontext = MemoryContextSwitchTo(rsinfo->econtext->ecxt_per_query_memory);
588
589 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
590 elog(ERROR, "return type must be a row type");
591
592 randomAccess = (rsinfo->allowedModes & SFRM_Materialize_Random) != 0;
593 tupstore = tuplestore_begin_heap(randomAccess, false, work_mem);
594 rsinfo->returnMode = SFRM_Materialize;
595 rsinfo->setResult = tupstore;
596 rsinfo->setDesc = tupdesc;
597
598 MemoryContextSwitchTo(oldcontext);
599
600 /*
601 * Now walk the directory. Note that we must do this within a single SRF
602 * call, not leave the directory open across multiple calls, since we
603 * can't count on the SRF being run to completion.
604 */
605 dirdesc = AllocateDir(dir);
606 while ((de = ReadDir(dirdesc, dir)) != NULL)
607 {
608 Datum values[3];
609 bool nulls[3];
610 char path[MAXPGPATH * 2];
611 struct stat attrib;
612
613 /* Skip hidden files */
614 if (de->d_name[0] == '.')
615 continue;
616
617 /* Get the file info */
618 snprintf(path, sizeof(path), "%s/%s", dir, de->d_name);
619 if (stat(path, &attrib) < 0)
620 {
621 /* Ignore concurrently-deleted files, else complain */
622 if (errno == ENOENT)
623 continue;
624 ereport(ERROR,
625 (errcode_for_file_access(),
626 errmsg("could not stat file \"%s\": %m", path)));
627 }
628
629 /* Ignore anything but regular files */
630 if (!S_ISREG(attrib.st_mode))
631 continue;
632
633 values[0] = CStringGetTextDatum(de->d_name);
634 values[1] = Int64GetDatum((int64) attrib.st_size);
635 values[2] = TimestampTzGetDatum(time_t_to_timestamptz(attrib.st_mtime));
636 memset(nulls, 0, sizeof(nulls));
637
638 tuplestore_putvalues(tupstore, tupdesc, values, nulls);
639 }
640
641 FreeDir(dirdesc);
642 return (Datum) 0;
643 }
644
645 /* Function to return the list of files in the log directory */
646 Datum
pg_ls_logdir(PG_FUNCTION_ARGS)647 pg_ls_logdir(PG_FUNCTION_ARGS)
648 {
649 return pg_ls_dir_files(fcinfo, Log_directory);
650 }
651
652 /* Function to return the list of files in the WAL directory */
653 Datum
pg_ls_waldir(PG_FUNCTION_ARGS)654 pg_ls_waldir(PG_FUNCTION_ARGS)
655 {
656 return pg_ls_dir_files(fcinfo, XLOGDIR);
657 }
658