1 /*------------------------------------------------------------------------- 2 * 3 * genfile.c 4 * Functions for direct access to files 5 * 6 * 7 * Copyright (c) 2004-2018, PostgreSQL Global Development Group 8 * 9 * Author: Andreas Pflug <pgadmin@pse-consulting.de> 10 * 11 * IDENTIFICATION 12 * src/backend/utils/adt/genfile.c 13 * 14 *------------------------------------------------------------------------- 15 */ 16 #include "postgres.h" 17 18 #include <sys/file.h> 19 #include <sys/stat.h> 20 #include <unistd.h> 21 #include <dirent.h> 22 23 #include "access/htup_details.h" 24 #include "access/xlog_internal.h" 25 #include "catalog/pg_authid.h" 26 #include "catalog/pg_type.h" 27 #include "funcapi.h" 28 #include "mb/pg_wchar.h" 29 #include "miscadmin.h" 30 #include "postmaster/syslogger.h" 31 #include "storage/fd.h" 32 #include "utils/builtins.h" 33 #include "utils/memutils.h" 34 #include "utils/timestamp.h" 35 36 37 /* 38 * Convert a "text" filename argument to C string, and check it's allowable. 39 * 40 * Filename may be absolute or relative to the DataDir, but we only allow 41 * absolute paths that match DataDir or Log_directory. 42 * 43 * This does a privilege check against the 'pg_read_server_files' role, so 44 * this function is really only appropriate for callers who are only checking 45 * 'read' access. Do not use this function if you are looking for a check 46 * for 'write' or 'program' access without updating it to access the type 47 * of check as an argument and checking the appropriate role membership. 48 */ 49 static char * 50 convert_and_check_filename(text *arg) 51 { 52 char *filename; 53 54 filename = text_to_cstring(arg); 55 canonicalize_path(filename); /* filename can change length here */ 56 57 /* 58 * Members of the 'pg_read_server_files' role are allowed to access any 59 * files on the server as the PG user, so no need to do any further checks 60 * here. 61 */ 62 if (is_member_of_role(GetUserId(), DEFAULT_ROLE_READ_SERVER_FILES)) 63 return filename; 64 65 /* User isn't a member of the default role, so check if it's allowable */ 66 if (is_absolute_path(filename)) 67 { 68 /* Disallow '/a/b/data/..' */ 69 if (path_contains_parent_reference(filename)) 70 ereport(ERROR, 71 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), 72 (errmsg("reference to parent directory (\"..\") not allowed")))); 73 74 /* 75 * Allow absolute paths if within DataDir or Log_directory, even 76 * though Log_directory might be outside DataDir. 77 */ 78 if (!path_is_prefix_of_path(DataDir, filename) && 79 (!is_absolute_path(Log_directory) || 80 !path_is_prefix_of_path(Log_directory, filename))) 81 ereport(ERROR, 82 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), 83 (errmsg("absolute path not allowed")))); 84 } 85 else if (!path_is_relative_and_below_cwd(filename)) 86 ereport(ERROR, 87 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), 88 (errmsg("path must be in or below the current directory")))); 89 90 return filename; 91 } 92 93 94 /* 95 * Read a section of a file, returning it as bytea 96 * 97 * Caller is responsible for all permissions checking. 98 * 99 * We read the whole of the file when bytes_to_read is negative. 100 */ 101 static bytea * 102 read_binary_file(const char *filename, int64 seek_offset, int64 bytes_to_read, 103 bool missing_ok) 104 { 105 bytea *buf; 106 size_t nbytes = 0; 107 FILE *file; 108 109 /* clamp request size to what we can actually deliver */ 110 if (bytes_to_read > (int64) (MaxAllocSize - VARHDRSZ)) 111 ereport(ERROR, 112 (errcode(ERRCODE_INVALID_PARAMETER_VALUE), 113 errmsg("requested length too large"))); 114 115 if ((file = AllocateFile(filename, PG_BINARY_R)) == NULL) 116 { 117 if (missing_ok && errno == ENOENT) 118 return NULL; 119 else 120 ereport(ERROR, 121 (errcode_for_file_access(), 122 errmsg("could not open file \"%s\" for reading: %m", 123 filename))); 124 } 125 126 if (fseeko(file, (off_t) seek_offset, 127 (seek_offset >= 0) ? SEEK_SET : SEEK_END) != 0) 128 ereport(ERROR, 129 (errcode_for_file_access(), 130 errmsg("could not seek in file \"%s\": %m", filename))); 131 132 if (bytes_to_read >= 0) 133 { 134 /* If passed explicit read size just do it */ 135 buf = (bytea *) palloc((Size) bytes_to_read + VARHDRSZ); 136 137 nbytes = fread(VARDATA(buf), 1, (size_t) bytes_to_read, file); 138 } 139 else 140 { 141 /* Negative read size, read rest of file */ 142 StringInfoData sbuf; 143 144 initStringInfo(&sbuf); 145 /* Leave room in the buffer for the varlena length word */ 146 sbuf.len += VARHDRSZ; 147 Assert(sbuf.len < sbuf.maxlen); 148 149 while (!(feof(file) || ferror(file))) 150 { 151 size_t rbytes; 152 153 /* Minimum amount to read at a time */ 154 #define MIN_READ_SIZE 4096 155 156 /* 157 * If not at end of file, and sbuf.len is equal to 158 * MaxAllocSize - 1, then either the file is too large, or 159 * there is nothing left to read. Attempt to read one more 160 * byte to see if the end of file has been reached. If not, 161 * the file is too large; we'd rather give the error message 162 * for that ourselves. 163 */ 164 if (sbuf.len == MaxAllocSize - 1) 165 { 166 char rbuf[1]; 167 168 if (fread(rbuf, 1, 1, file) != 0 || !feof(file)) 169 ereport(ERROR, 170 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), 171 errmsg("file length too large"))); 172 else 173 break; 174 } 175 176 /* OK, ensure that we can read at least MIN_READ_SIZE */ 177 enlargeStringInfo(&sbuf, MIN_READ_SIZE); 178 179 /* 180 * stringinfo.c likes to allocate in powers of 2, so it's likely 181 * that much more space is available than we asked for. Use all 182 * of it, rather than making more fread calls than necessary. 183 */ 184 rbytes = fread(sbuf.data + sbuf.len, 1, 185 (size_t) (sbuf.maxlen - sbuf.len - 1), file); 186 sbuf.len += rbytes; 187 nbytes += rbytes; 188 } 189 190 /* Now we can commandeer the stringinfo's buffer as the result */ 191 buf = (bytea *) sbuf.data; 192 } 193 194 if (ferror(file)) 195 ereport(ERROR, 196 (errcode_for_file_access(), 197 errmsg("could not read file \"%s\": %m", filename))); 198 199 SET_VARSIZE(buf, nbytes + VARHDRSZ); 200 201 FreeFile(file); 202 203 return buf; 204 } 205 206 /* 207 * Similar to read_binary_file, but we verify that the contents are valid 208 * in the database encoding. 209 */ 210 static text * 211 read_text_file(const char *filename, int64 seek_offset, int64 bytes_to_read, 212 bool missing_ok) 213 { 214 bytea *buf; 215 216 buf = read_binary_file(filename, seek_offset, bytes_to_read, missing_ok); 217 218 if (buf != NULL) 219 { 220 /* Make sure the input is valid */ 221 pg_verifymbstr(VARDATA(buf), VARSIZE(buf) - VARHDRSZ, false); 222 223 /* OK, we can cast it to text safely */ 224 return (text *) buf; 225 } 226 else 227 return NULL; 228 } 229 230 /* 231 * Read a section of a file, returning it as text 232 * 233 * This function is kept to support adminpack 1.0. 234 */ 235 Datum 236 pg_read_file(PG_FUNCTION_ARGS) 237 { 238 text *filename_t = PG_GETARG_TEXT_PP(0); 239 int64 seek_offset = 0; 240 int64 bytes_to_read = -1; 241 bool missing_ok = false; 242 char *filename; 243 text *result; 244 245 if (!superuser()) 246 ereport(ERROR, 247 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), 248 (errmsg("must be superuser to read files with adminpack 1.0"), 249 errhint("Consider using pg_read_file(), which is part of core, instead.")))); 250 251 /* handle optional arguments */ 252 if (PG_NARGS() >= 3) 253 { 254 seek_offset = PG_GETARG_INT64(1); 255 bytes_to_read = PG_GETARG_INT64(2); 256 257 if (bytes_to_read < 0) 258 ereport(ERROR, 259 (errcode(ERRCODE_INVALID_PARAMETER_VALUE), 260 errmsg("requested length cannot be negative"))); 261 } 262 if (PG_NARGS() >= 4) 263 missing_ok = PG_GETARG_BOOL(3); 264 265 filename = convert_and_check_filename(filename_t); 266 267 result = read_text_file(filename, seek_offset, bytes_to_read, missing_ok); 268 if (result) 269 PG_RETURN_TEXT_P(result); 270 else 271 PG_RETURN_NULL(); 272 } 273 274 /* 275 * Read a section of a file, returning it as text 276 * 277 * No superuser check done here- instead privileges are handled by the 278 * GRANT system. 279 */ 280 Datum 281 pg_read_file_v2(PG_FUNCTION_ARGS) 282 { 283 text *filename_t = PG_GETARG_TEXT_PP(0); 284 int64 seek_offset = 0; 285 int64 bytes_to_read = -1; 286 bool missing_ok = false; 287 char *filename; 288 text *result; 289 290 /* handle optional arguments */ 291 if (PG_NARGS() >= 3) 292 { 293 seek_offset = PG_GETARG_INT64(1); 294 bytes_to_read = PG_GETARG_INT64(2); 295 296 if (bytes_to_read < 0) 297 ereport(ERROR, 298 (errcode(ERRCODE_INVALID_PARAMETER_VALUE), 299 errmsg("requested length cannot be negative"))); 300 } 301 if (PG_NARGS() >= 4) 302 missing_ok = PG_GETARG_BOOL(3); 303 304 filename = convert_and_check_filename(filename_t); 305 306 result = read_text_file(filename, seek_offset, bytes_to_read, missing_ok); 307 if (result) 308 PG_RETURN_TEXT_P(result); 309 else 310 PG_RETURN_NULL(); 311 } 312 313 /* 314 * Read a section of a file, returning it as bytea 315 */ 316 Datum 317 pg_read_binary_file(PG_FUNCTION_ARGS) 318 { 319 text *filename_t = PG_GETARG_TEXT_PP(0); 320 int64 seek_offset = 0; 321 int64 bytes_to_read = -1; 322 bool missing_ok = false; 323 char *filename; 324 bytea *result; 325 326 /* handle optional arguments */ 327 if (PG_NARGS() >= 3) 328 { 329 seek_offset = PG_GETARG_INT64(1); 330 bytes_to_read = PG_GETARG_INT64(2); 331 332 if (bytes_to_read < 0) 333 ereport(ERROR, 334 (errcode(ERRCODE_INVALID_PARAMETER_VALUE), 335 errmsg("requested length cannot be negative"))); 336 } 337 if (PG_NARGS() >= 4) 338 missing_ok = PG_GETARG_BOOL(3); 339 340 filename = convert_and_check_filename(filename_t); 341 342 result = read_binary_file(filename, seek_offset, 343 bytes_to_read, missing_ok); 344 if (result) 345 PG_RETURN_BYTEA_P(result); 346 else 347 PG_RETURN_NULL(); 348 } 349 350 351 /* 352 * Wrapper functions for the 1 and 3 argument variants of pg_read_file_v2() 353 * and pg_binary_read_file(). 354 * 355 * These are necessary to pass the sanity check in opr_sanity, which checks 356 * that all built-in functions that share the implementing C function take 357 * the same number of arguments. 358 */ 359 Datum 360 pg_read_file_off_len(PG_FUNCTION_ARGS) 361 { 362 return pg_read_file_v2(fcinfo); 363 } 364 365 Datum 366 pg_read_file_all(PG_FUNCTION_ARGS) 367 { 368 return pg_read_file_v2(fcinfo); 369 } 370 371 Datum 372 pg_read_binary_file_off_len(PG_FUNCTION_ARGS) 373 { 374 return pg_read_binary_file(fcinfo); 375 } 376 377 Datum 378 pg_read_binary_file_all(PG_FUNCTION_ARGS) 379 { 380 return pg_read_binary_file(fcinfo); 381 } 382 383 /* 384 * stat a file 385 */ 386 Datum 387 pg_stat_file(PG_FUNCTION_ARGS) 388 { 389 text *filename_t = PG_GETARG_TEXT_PP(0); 390 char *filename; 391 struct stat fst; 392 Datum values[6]; 393 bool isnull[6]; 394 HeapTuple tuple; 395 TupleDesc tupdesc; 396 bool missing_ok = false; 397 398 /* check the optional argument */ 399 if (PG_NARGS() == 2) 400 missing_ok = PG_GETARG_BOOL(1); 401 402 filename = convert_and_check_filename(filename_t); 403 404 if (stat(filename, &fst) < 0) 405 { 406 if (missing_ok && errno == ENOENT) 407 PG_RETURN_NULL(); 408 else 409 ereport(ERROR, 410 (errcode_for_file_access(), 411 errmsg("could not stat file \"%s\": %m", filename))); 412 } 413 414 /* 415 * This record type had better match the output parameters declared for me 416 * in pg_proc.h. 417 */ 418 tupdesc = CreateTemplateTupleDesc(6, false); 419 TupleDescInitEntry(tupdesc, (AttrNumber) 1, 420 "size", INT8OID, -1, 0); 421 TupleDescInitEntry(tupdesc, (AttrNumber) 2, 422 "access", TIMESTAMPTZOID, -1, 0); 423 TupleDescInitEntry(tupdesc, (AttrNumber) 3, 424 "modification", TIMESTAMPTZOID, -1, 0); 425 TupleDescInitEntry(tupdesc, (AttrNumber) 4, 426 "change", TIMESTAMPTZOID, -1, 0); 427 TupleDescInitEntry(tupdesc, (AttrNumber) 5, 428 "creation", TIMESTAMPTZOID, -1, 0); 429 TupleDescInitEntry(tupdesc, (AttrNumber) 6, 430 "isdir", BOOLOID, -1, 0); 431 BlessTupleDesc(tupdesc); 432 433 memset(isnull, false, sizeof(isnull)); 434 435 values[0] = Int64GetDatum((int64) fst.st_size); 436 values[1] = TimestampTzGetDatum(time_t_to_timestamptz(fst.st_atime)); 437 values[2] = TimestampTzGetDatum(time_t_to_timestamptz(fst.st_mtime)); 438 /* Unix has file status change time, while Win32 has creation time */ 439 #if !defined(WIN32) && !defined(__CYGWIN__) 440 values[3] = TimestampTzGetDatum(time_t_to_timestamptz(fst.st_ctime)); 441 isnull[4] = true; 442 #else 443 isnull[3] = true; 444 values[4] = TimestampTzGetDatum(time_t_to_timestamptz(fst.st_ctime)); 445 #endif 446 values[5] = BoolGetDatum(S_ISDIR(fst.st_mode)); 447 448 tuple = heap_form_tuple(tupdesc, values, isnull); 449 450 pfree(filename); 451 452 PG_RETURN_DATUM(HeapTupleGetDatum(tuple)); 453 } 454 455 /* 456 * stat a file (1 argument version) 457 * 458 * note: this wrapper is necessary to pass the sanity check in opr_sanity, 459 * which checks that all built-in functions that share the implementing C 460 * function take the same number of arguments 461 */ 462 Datum 463 pg_stat_file_1arg(PG_FUNCTION_ARGS) 464 { 465 return pg_stat_file(fcinfo); 466 } 467 468 /* 469 * List a directory (returns the filenames only) 470 */ 471 Datum 472 pg_ls_dir(PG_FUNCTION_ARGS) 473 { 474 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; 475 char *location; 476 bool missing_ok = false; 477 bool include_dot_dirs = false; 478 bool randomAccess; 479 TupleDesc tupdesc; 480 Tuplestorestate *tupstore; 481 DIR *dirdesc; 482 struct dirent *de; 483 MemoryContext oldcontext; 484 485 location = convert_and_check_filename(PG_GETARG_TEXT_PP(0)); 486 487 /* check the optional arguments */ 488 if (PG_NARGS() == 3) 489 { 490 if (!PG_ARGISNULL(1)) 491 missing_ok = PG_GETARG_BOOL(1); 492 if (!PG_ARGISNULL(2)) 493 include_dot_dirs = PG_GETARG_BOOL(2); 494 } 495 496 /* check to see if caller supports us returning a tuplestore */ 497 if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) 498 ereport(ERROR, 499 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), 500 errmsg("set-valued function called in context that cannot accept a set"))); 501 if (!(rsinfo->allowedModes & SFRM_Materialize)) 502 ereport(ERROR, 503 (errcode(ERRCODE_SYNTAX_ERROR), 504 errmsg("materialize mode required, but it is not allowed in this context"))); 505 506 /* The tupdesc and tuplestore must be created in ecxt_per_query_memory */ 507 oldcontext = MemoryContextSwitchTo(rsinfo->econtext->ecxt_per_query_memory); 508 509 tupdesc = CreateTemplateTupleDesc(1, false); 510 TupleDescInitEntry(tupdesc, (AttrNumber) 1, "pg_ls_dir", TEXTOID, -1, 0); 511 512 randomAccess = (rsinfo->allowedModes & SFRM_Materialize_Random) != 0; 513 tupstore = tuplestore_begin_heap(randomAccess, false, work_mem); 514 rsinfo->returnMode = SFRM_Materialize; 515 rsinfo->setResult = tupstore; 516 rsinfo->setDesc = tupdesc; 517 518 MemoryContextSwitchTo(oldcontext); 519 520 dirdesc = AllocateDir(location); 521 if (!dirdesc) 522 { 523 /* Return empty tuplestore if appropriate */ 524 if (missing_ok && errno == ENOENT) 525 return (Datum) 0; 526 /* Otherwise, we can let ReadDir() throw the error */ 527 } 528 529 while ((de = ReadDir(dirdesc, location)) != NULL) 530 { 531 Datum values[1]; 532 bool nulls[1]; 533 534 if (!include_dot_dirs && 535 (strcmp(de->d_name, ".") == 0 || 536 strcmp(de->d_name, "..") == 0)) 537 continue; 538 539 values[0] = CStringGetTextDatum(de->d_name); 540 nulls[0] = false; 541 542 tuplestore_putvalues(tupstore, tupdesc, values, nulls); 543 } 544 545 FreeDir(dirdesc); 546 return (Datum) 0; 547 } 548 549 /* 550 * List a directory (1 argument version) 551 * 552 * note: this wrapper is necessary to pass the sanity check in opr_sanity, 553 * which checks that all built-in functions that share the implementing C 554 * function take the same number of arguments. 555 */ 556 Datum 557 pg_ls_dir_1arg(PG_FUNCTION_ARGS) 558 { 559 return pg_ls_dir(fcinfo); 560 } 561 562 /* 563 * Generic function to return a directory listing of files. 564 */ 565 static Datum 566 pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir) 567 { 568 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; 569 bool randomAccess; 570 TupleDesc tupdesc; 571 Tuplestorestate *tupstore; 572 DIR *dirdesc; 573 struct dirent *de; 574 MemoryContext oldcontext; 575 576 /* check to see if caller supports us returning a tuplestore */ 577 if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) 578 ereport(ERROR, 579 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), 580 errmsg("set-valued function called in context that cannot accept a set"))); 581 if (!(rsinfo->allowedModes & SFRM_Materialize)) 582 ereport(ERROR, 583 (errcode(ERRCODE_SYNTAX_ERROR), 584 errmsg("materialize mode required, but it is not allowed in this context"))); 585 586 /* The tupdesc and tuplestore must be created in ecxt_per_query_memory */ 587 oldcontext = MemoryContextSwitchTo(rsinfo->econtext->ecxt_per_query_memory); 588 589 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) 590 elog(ERROR, "return type must be a row type"); 591 592 randomAccess = (rsinfo->allowedModes & SFRM_Materialize_Random) != 0; 593 tupstore = tuplestore_begin_heap(randomAccess, false, work_mem); 594 rsinfo->returnMode = SFRM_Materialize; 595 rsinfo->setResult = tupstore; 596 rsinfo->setDesc = tupdesc; 597 598 MemoryContextSwitchTo(oldcontext); 599 600 /* 601 * Now walk the directory. Note that we must do this within a single SRF 602 * call, not leave the directory open across multiple calls, since we 603 * can't count on the SRF being run to completion. 604 */ 605 dirdesc = AllocateDir(dir); 606 while ((de = ReadDir(dirdesc, dir)) != NULL) 607 { 608 Datum values[3]; 609 bool nulls[3]; 610 char path[MAXPGPATH * 2]; 611 struct stat attrib; 612 613 /* Skip hidden files */ 614 if (de->d_name[0] == '.') 615 continue; 616 617 /* Get the file info */ 618 snprintf(path, sizeof(path), "%s/%s", dir, de->d_name); 619 if (stat(path, &attrib) < 0) 620 { 621 /* Ignore concurrently-deleted files, else complain */ 622 if (errno == ENOENT) 623 continue; 624 ereport(ERROR, 625 (errcode_for_file_access(), 626 errmsg("could not stat file \"%s\": %m", path))); 627 } 628 629 /* Ignore anything but regular files */ 630 if (!S_ISREG(attrib.st_mode)) 631 continue; 632 633 values[0] = CStringGetTextDatum(de->d_name); 634 values[1] = Int64GetDatum((int64) attrib.st_size); 635 values[2] = TimestampTzGetDatum(time_t_to_timestamptz(attrib.st_mtime)); 636 memset(nulls, 0, sizeof(nulls)); 637 638 tuplestore_putvalues(tupstore, tupdesc, values, nulls); 639 } 640 641 FreeDir(dirdesc); 642 return (Datum) 0; 643 } 644 645 /* Function to return the list of files in the log directory */ 646 Datum 647 pg_ls_logdir(PG_FUNCTION_ARGS) 648 { 649 return pg_ls_dir_files(fcinfo, Log_directory); 650 } 651 652 /* Function to return the list of files in the WAL directory */ 653 Datum 654 pg_ls_waldir(PG_FUNCTION_ARGS) 655 { 656 return pg_ls_dir_files(fcinfo, XLOGDIR); 657 } 658