1 /*-------------------------------------------------------------------------
2 *
3 * genfile.c
4 * Functions for direct access to files
5 *
6 *
7 * Copyright (c) 2004-2017, PostgreSQL Global Development Group
8 *
9 * Author: Andreas Pflug <pgadmin@pse-consulting.de>
10 *
11 * IDENTIFICATION
12 * src/backend/utils/adt/genfile.c
13 *
14 *-------------------------------------------------------------------------
15 */
16 #include "postgres.h"
17
18 #include <sys/file.h>
19 #include <sys/stat.h>
20 #include <unistd.h>
21 #include <dirent.h>
22
23 #include "access/htup_details.h"
24 #include "access/xlog_internal.h"
25 #include "catalog/pg_type.h"
26 #include "funcapi.h"
27 #include "mb/pg_wchar.h"
28 #include "miscadmin.h"
29 #include "postmaster/syslogger.h"
30 #include "storage/fd.h"
31 #include "utils/builtins.h"
32 #include "utils/memutils.h"
33 #include "utils/timestamp.h"
34
35
36 /*
37 * Convert a "text" filename argument to C string, and check it's allowable.
38 *
39 * Filename may be absolute or relative to the DataDir, but we only allow
40 * absolute paths that match DataDir or Log_directory.
41 */
42 static char *
convert_and_check_filename(text * arg)43 convert_and_check_filename(text *arg)
44 {
45 char *filename;
46
47 filename = text_to_cstring(arg);
48 canonicalize_path(filename); /* filename can change length here */
49
50 if (is_absolute_path(filename))
51 {
52 /* Disallow '/a/b/data/..' */
53 if (path_contains_parent_reference(filename))
54 ereport(ERROR,
55 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
56 (errmsg("reference to parent directory (\"..\") not allowed"))));
57
58 /*
59 * Allow absolute paths if within DataDir or Log_directory, even
60 * though Log_directory might be outside DataDir.
61 */
62 if (!path_is_prefix_of_path(DataDir, filename) &&
63 (!is_absolute_path(Log_directory) ||
64 !path_is_prefix_of_path(Log_directory, filename)))
65 ereport(ERROR,
66 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
67 (errmsg("absolute path not allowed"))));
68 }
69 else if (!path_is_relative_and_below_cwd(filename))
70 ereport(ERROR,
71 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
72 (errmsg("path must be in or below the current directory"))));
73
74 return filename;
75 }
76
77
78 /*
79 * Read a section of a file, returning it as bytea
80 *
81 * Caller is responsible for all permissions checking.
82 *
83 * We read the whole of the file when bytes_to_read is negative.
84 */
85 static bytea *
read_binary_file(const char * filename,int64 seek_offset,int64 bytes_to_read,bool missing_ok)86 read_binary_file(const char *filename, int64 seek_offset, int64 bytes_to_read,
87 bool missing_ok)
88 {
89 bytea *buf;
90 size_t nbytes;
91 FILE *file;
92
93 if (bytes_to_read < 0)
94 {
95 if (seek_offset < 0)
96 bytes_to_read = -seek_offset;
97 else
98 {
99 struct stat fst;
100
101 if (stat(filename, &fst) < 0)
102 {
103 if (missing_ok && errno == ENOENT)
104 return NULL;
105 else
106 ereport(ERROR,
107 (errcode_for_file_access(),
108 errmsg("could not stat file \"%s\": %m", filename)));
109 }
110
111 bytes_to_read = fst.st_size - seek_offset;
112 }
113 }
114
115 /* not sure why anyone thought that int64 length was a good idea */
116 if (bytes_to_read > (MaxAllocSize - VARHDRSZ))
117 ereport(ERROR,
118 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
119 errmsg("requested length too large")));
120
121 if ((file = AllocateFile(filename, PG_BINARY_R)) == NULL)
122 {
123 if (missing_ok && errno == ENOENT)
124 return NULL;
125 else
126 ereport(ERROR,
127 (errcode_for_file_access(),
128 errmsg("could not open file \"%s\" for reading: %m",
129 filename)));
130 }
131
132 if (fseeko(file, (off_t) seek_offset,
133 (seek_offset >= 0) ? SEEK_SET : SEEK_END) != 0)
134 ereport(ERROR,
135 (errcode_for_file_access(),
136 errmsg("could not seek in file \"%s\": %m", filename)));
137
138 buf = (bytea *) palloc((Size) bytes_to_read + VARHDRSZ);
139
140 nbytes = fread(VARDATA(buf), 1, (size_t) bytes_to_read, file);
141
142 if (ferror(file))
143 ereport(ERROR,
144 (errcode_for_file_access(),
145 errmsg("could not read file \"%s\": %m", filename)));
146
147 SET_VARSIZE(buf, nbytes + VARHDRSZ);
148
149 FreeFile(file);
150
151 return buf;
152 }
153
154 /*
155 * Similar to read_binary_file, but we verify that the contents are valid
156 * in the database encoding.
157 */
158 static text *
read_text_file(const char * filename,int64 seek_offset,int64 bytes_to_read,bool missing_ok)159 read_text_file(const char *filename, int64 seek_offset, int64 bytes_to_read,
160 bool missing_ok)
161 {
162 bytea *buf;
163
164 buf = read_binary_file(filename, seek_offset, bytes_to_read, missing_ok);
165
166 if (buf != NULL)
167 {
168 /* Make sure the input is valid */
169 pg_verifymbstr(VARDATA(buf), VARSIZE(buf) - VARHDRSZ, false);
170
171 /* OK, we can cast it to text safely */
172 return (text *) buf;
173 }
174 else
175 return NULL;
176 }
177
178 /*
179 * Read a section of a file, returning it as text
180 */
181 Datum
pg_read_file(PG_FUNCTION_ARGS)182 pg_read_file(PG_FUNCTION_ARGS)
183 {
184 text *filename_t = PG_GETARG_TEXT_PP(0);
185 int64 seek_offset = 0;
186 int64 bytes_to_read = -1;
187 bool missing_ok = false;
188 char *filename;
189 text *result;
190
191 if (!superuser())
192 ereport(ERROR,
193 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
194 (errmsg("must be superuser to read files"))));
195
196 /* handle optional arguments */
197 if (PG_NARGS() >= 3)
198 {
199 seek_offset = PG_GETARG_INT64(1);
200 bytes_to_read = PG_GETARG_INT64(2);
201
202 if (bytes_to_read < 0)
203 ereport(ERROR,
204 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
205 errmsg("requested length cannot be negative")));
206 }
207 if (PG_NARGS() >= 4)
208 missing_ok = PG_GETARG_BOOL(3);
209
210 filename = convert_and_check_filename(filename_t);
211
212 result = read_text_file(filename, seek_offset, bytes_to_read, missing_ok);
213 if (result)
214 PG_RETURN_TEXT_P(result);
215 else
216 PG_RETURN_NULL();
217 }
218
219 /*
220 * Read a section of a file, returning it as bytea
221 */
222 Datum
pg_read_binary_file(PG_FUNCTION_ARGS)223 pg_read_binary_file(PG_FUNCTION_ARGS)
224 {
225 text *filename_t = PG_GETARG_TEXT_PP(0);
226 int64 seek_offset = 0;
227 int64 bytes_to_read = -1;
228 bool missing_ok = false;
229 char *filename;
230 bytea *result;
231
232 if (!superuser())
233 ereport(ERROR,
234 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
235 (errmsg("must be superuser to read files"))));
236
237 /* handle optional arguments */
238 if (PG_NARGS() >= 3)
239 {
240 seek_offset = PG_GETARG_INT64(1);
241 bytes_to_read = PG_GETARG_INT64(2);
242
243 if (bytes_to_read < 0)
244 ereport(ERROR,
245 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
246 errmsg("requested length cannot be negative")));
247 }
248 if (PG_NARGS() >= 4)
249 missing_ok = PG_GETARG_BOOL(3);
250
251 filename = convert_and_check_filename(filename_t);
252
253 result = read_binary_file(filename, seek_offset,
254 bytes_to_read, missing_ok);
255 if (result)
256 PG_RETURN_BYTEA_P(result);
257 else
258 PG_RETURN_NULL();
259 }
260
261
262 /*
263 * Wrapper functions for the 1 and 3 argument variants of pg_read_file()
264 * and pg_binary_read_file().
265 *
266 * These are necessary to pass the sanity check in opr_sanity, which checks
267 * that all built-in functions that share the implementing C function take
268 * the same number of arguments.
269 */
270 Datum
pg_read_file_off_len(PG_FUNCTION_ARGS)271 pg_read_file_off_len(PG_FUNCTION_ARGS)
272 {
273 return pg_read_file(fcinfo);
274 }
275
276 Datum
pg_read_file_all(PG_FUNCTION_ARGS)277 pg_read_file_all(PG_FUNCTION_ARGS)
278 {
279 return pg_read_file(fcinfo);
280 }
281
282 Datum
pg_read_binary_file_off_len(PG_FUNCTION_ARGS)283 pg_read_binary_file_off_len(PG_FUNCTION_ARGS)
284 {
285 return pg_read_binary_file(fcinfo);
286 }
287
288 Datum
pg_read_binary_file_all(PG_FUNCTION_ARGS)289 pg_read_binary_file_all(PG_FUNCTION_ARGS)
290 {
291 return pg_read_binary_file(fcinfo);
292 }
293
294 /*
295 * stat a file
296 */
297 Datum
pg_stat_file(PG_FUNCTION_ARGS)298 pg_stat_file(PG_FUNCTION_ARGS)
299 {
300 text *filename_t = PG_GETARG_TEXT_PP(0);
301 char *filename;
302 struct stat fst;
303 Datum values[6];
304 bool isnull[6];
305 HeapTuple tuple;
306 TupleDesc tupdesc;
307 bool missing_ok = false;
308
309 if (!superuser())
310 ereport(ERROR,
311 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
312 (errmsg("must be superuser to get file information"))));
313
314 /* check the optional argument */
315 if (PG_NARGS() == 2)
316 missing_ok = PG_GETARG_BOOL(1);
317
318 filename = convert_and_check_filename(filename_t);
319
320 if (stat(filename, &fst) < 0)
321 {
322 if (missing_ok && errno == ENOENT)
323 PG_RETURN_NULL();
324 else
325 ereport(ERROR,
326 (errcode_for_file_access(),
327 errmsg("could not stat file \"%s\": %m", filename)));
328 }
329
330 /*
331 * This record type had better match the output parameters declared for me
332 * in pg_proc.h.
333 */
334 tupdesc = CreateTemplateTupleDesc(6, false);
335 TupleDescInitEntry(tupdesc, (AttrNumber) 1,
336 "size", INT8OID, -1, 0);
337 TupleDescInitEntry(tupdesc, (AttrNumber) 2,
338 "access", TIMESTAMPTZOID, -1, 0);
339 TupleDescInitEntry(tupdesc, (AttrNumber) 3,
340 "modification", TIMESTAMPTZOID, -1, 0);
341 TupleDescInitEntry(tupdesc, (AttrNumber) 4,
342 "change", TIMESTAMPTZOID, -1, 0);
343 TupleDescInitEntry(tupdesc, (AttrNumber) 5,
344 "creation", TIMESTAMPTZOID, -1, 0);
345 TupleDescInitEntry(tupdesc, (AttrNumber) 6,
346 "isdir", BOOLOID, -1, 0);
347 BlessTupleDesc(tupdesc);
348
349 memset(isnull, false, sizeof(isnull));
350
351 values[0] = Int64GetDatum((int64) fst.st_size);
352 values[1] = TimestampTzGetDatum(time_t_to_timestamptz(fst.st_atime));
353 values[2] = TimestampTzGetDatum(time_t_to_timestamptz(fst.st_mtime));
354 /* Unix has file status change time, while Win32 has creation time */
355 #if !defined(WIN32) && !defined(__CYGWIN__)
356 values[3] = TimestampTzGetDatum(time_t_to_timestamptz(fst.st_ctime));
357 isnull[4] = true;
358 #else
359 isnull[3] = true;
360 values[4] = TimestampTzGetDatum(time_t_to_timestamptz(fst.st_ctime));
361 #endif
362 values[5] = BoolGetDatum(S_ISDIR(fst.st_mode));
363
364 tuple = heap_form_tuple(tupdesc, values, isnull);
365
366 pfree(filename);
367
368 PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
369 }
370
371 /*
372 * stat a file (1 argument version)
373 *
374 * note: this wrapper is necessary to pass the sanity check in opr_sanity,
375 * which checks that all built-in functions that share the implementing C
376 * function take the same number of arguments
377 */
378 Datum
pg_stat_file_1arg(PG_FUNCTION_ARGS)379 pg_stat_file_1arg(PG_FUNCTION_ARGS)
380 {
381 return pg_stat_file(fcinfo);
382 }
383
384 /*
385 * List a directory (returns the filenames only)
386 */
387 Datum
pg_ls_dir(PG_FUNCTION_ARGS)388 pg_ls_dir(PG_FUNCTION_ARGS)
389 {
390 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
391 char *location;
392 bool missing_ok = false;
393 bool include_dot_dirs = false;
394 bool randomAccess;
395 TupleDesc tupdesc;
396 Tuplestorestate *tupstore;
397 DIR *dirdesc;
398 struct dirent *de;
399 MemoryContext oldcontext;
400
401 if (!superuser())
402 ereport(ERROR,
403 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
404 (errmsg("must be superuser to get directory listings"))));
405
406 location = convert_and_check_filename(PG_GETARG_TEXT_PP(0));
407
408 /* check the optional arguments */
409 if (PG_NARGS() == 3)
410 {
411 if (!PG_ARGISNULL(1))
412 missing_ok = PG_GETARG_BOOL(1);
413 if (!PG_ARGISNULL(2))
414 include_dot_dirs = PG_GETARG_BOOL(2);
415 }
416
417 /* check to see if caller supports us returning a tuplestore */
418 if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
419 ereport(ERROR,
420 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
421 errmsg("set-valued function called in context that cannot accept a set")));
422 if (!(rsinfo->allowedModes & SFRM_Materialize))
423 ereport(ERROR,
424 (errcode(ERRCODE_SYNTAX_ERROR),
425 errmsg("materialize mode required, but it is not allowed in this context")));
426
427 /* The tupdesc and tuplestore must be created in ecxt_per_query_memory */
428 oldcontext = MemoryContextSwitchTo(rsinfo->econtext->ecxt_per_query_memory);
429
430 tupdesc = CreateTemplateTupleDesc(1, false);
431 TupleDescInitEntry(tupdesc, (AttrNumber) 1, "pg_ls_dir", TEXTOID, -1, 0);
432
433 randomAccess = (rsinfo->allowedModes & SFRM_Materialize_Random) != 0;
434 tupstore = tuplestore_begin_heap(randomAccess, false, work_mem);
435 rsinfo->returnMode = SFRM_Materialize;
436 rsinfo->setResult = tupstore;
437 rsinfo->setDesc = tupdesc;
438
439 MemoryContextSwitchTo(oldcontext);
440
441 dirdesc = AllocateDir(location);
442 if (!dirdesc)
443 {
444 /* Return empty tuplestore if appropriate */
445 if (missing_ok && errno == ENOENT)
446 return (Datum) 0;
447 /* Otherwise, we can let ReadDir() throw the error */
448 }
449
450 while ((de = ReadDir(dirdesc, location)) != NULL)
451 {
452 Datum values[1];
453 bool nulls[1];
454
455 if (!include_dot_dirs &&
456 (strcmp(de->d_name, ".") == 0 ||
457 strcmp(de->d_name, "..") == 0))
458 continue;
459
460 values[0] = CStringGetTextDatum(de->d_name);
461 nulls[0] = false;
462
463 tuplestore_putvalues(tupstore, tupdesc, values, nulls);
464 }
465
466 FreeDir(dirdesc);
467 return (Datum) 0;
468 }
469
470 /*
471 * List a directory (1 argument version)
472 *
473 * note: this wrapper is necessary to pass the sanity check in opr_sanity,
474 * which checks that all built-in functions that share the implementing C
475 * function take the same number of arguments.
476 */
477 Datum
pg_ls_dir_1arg(PG_FUNCTION_ARGS)478 pg_ls_dir_1arg(PG_FUNCTION_ARGS)
479 {
480 return pg_ls_dir(fcinfo);
481 }
482
483 /*
484 * Generic function to return a directory listing of files.
485 */
486 static Datum
pg_ls_dir_files(FunctionCallInfo fcinfo,const char * dir)487 pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir)
488 {
489 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
490 bool randomAccess;
491 TupleDesc tupdesc;
492 Tuplestorestate *tupstore;
493 DIR *dirdesc;
494 struct dirent *de;
495 MemoryContext oldcontext;
496
497 /* check to see if caller supports us returning a tuplestore */
498 if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
499 ereport(ERROR,
500 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
501 errmsg("set-valued function called in context that cannot accept a set")));
502 if (!(rsinfo->allowedModes & SFRM_Materialize))
503 ereport(ERROR,
504 (errcode(ERRCODE_SYNTAX_ERROR),
505 errmsg("materialize mode required, but it is not allowed in this context")));
506
507 /* The tupdesc and tuplestore must be created in ecxt_per_query_memory */
508 oldcontext = MemoryContextSwitchTo(rsinfo->econtext->ecxt_per_query_memory);
509
510 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
511 elog(ERROR, "return type must be a row type");
512
513 randomAccess = (rsinfo->allowedModes & SFRM_Materialize_Random) != 0;
514 tupstore = tuplestore_begin_heap(randomAccess, false, work_mem);
515 rsinfo->returnMode = SFRM_Materialize;
516 rsinfo->setResult = tupstore;
517 rsinfo->setDesc = tupdesc;
518
519 MemoryContextSwitchTo(oldcontext);
520
521 /*
522 * Now walk the directory. Note that we must do this within a single SRF
523 * call, not leave the directory open across multiple calls, since we
524 * can't count on the SRF being run to completion.
525 */
526 dirdesc = AllocateDir(dir);
527 while ((de = ReadDir(dirdesc, dir)) != NULL)
528 {
529 Datum values[3];
530 bool nulls[3];
531 char path[MAXPGPATH * 2];
532 struct stat attrib;
533
534 /* Skip hidden files */
535 if (de->d_name[0] == '.')
536 continue;
537
538 /* Get the file info */
539 snprintf(path, sizeof(path), "%s/%s", dir, de->d_name);
540 if (stat(path, &attrib) < 0)
541 {
542 /* Ignore concurrently-deleted files, else complain */
543 if (errno == ENOENT)
544 continue;
545 ereport(ERROR,
546 (errcode_for_file_access(),
547 errmsg("could not stat file \"%s\": %m", path)));
548 }
549
550 /* Ignore anything but regular files */
551 if (!S_ISREG(attrib.st_mode))
552 continue;
553
554 values[0] = CStringGetTextDatum(de->d_name);
555 values[1] = Int64GetDatum((int64) attrib.st_size);
556 values[2] = TimestampTzGetDatum(time_t_to_timestamptz(attrib.st_mtime));
557 memset(nulls, 0, sizeof(nulls));
558
559 tuplestore_putvalues(tupstore, tupdesc, values, nulls);
560 }
561
562 FreeDir(dirdesc);
563 return (Datum) 0;
564 }
565
566 /* Function to return the list of files in the log directory */
567 Datum
pg_ls_logdir(PG_FUNCTION_ARGS)568 pg_ls_logdir(PG_FUNCTION_ARGS)
569 {
570 return pg_ls_dir_files(fcinfo, Log_directory);
571 }
572
573 /* Function to return the list of files in the WAL directory */
574 Datum
pg_ls_waldir(PG_FUNCTION_ARGS)575 pg_ls_waldir(PG_FUNCTION_ARGS)
576 {
577 return pg_ls_dir_files(fcinfo, XLOGDIR);
578 }
579