1 /*------------------------------------------------------------------------- 2 * 3 * buffile.c 4 * Management of large buffered temporary files. 5 * 6 * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group 7 * Portions Copyright (c) 1994, Regents of the University of California 8 * 9 * IDENTIFICATION 10 * src/backend/storage/file/buffile.c 11 * 12 * NOTES: 13 * 14 * BufFiles provide a very incomplete emulation of stdio atop virtual Files 15 * (as managed by fd.c). Currently, we only support the buffered-I/O 16 * aspect of stdio: a read or write of the low-level File occurs only 17 * when the buffer is filled or emptied. This is an even bigger win 18 * for virtual Files than for ordinary kernel files, since reducing the 19 * frequency with which a virtual File is touched reduces "thrashing" 20 * of opening/closing file descriptors. 21 * 22 * Note that BufFile structs are allocated with palloc(), and therefore 23 * will go away automatically at query/transaction end. Since the underlying 24 * virtual Files are made with OpenTemporaryFile, all resources for 25 * the file are certain to be cleaned up even if processing is aborted 26 * by ereport(ERROR). The data structures required are made in the 27 * palloc context that was current when the BufFile was created, and 28 * any external resources such as temp files are owned by the ResourceOwner 29 * that was current at that time. 30 * 31 * BufFile also supports temporary files that exceed the OS file size limit 32 * (by opening multiple fd.c temporary files). This is an essential feature 33 * for sorts and hashjoins on large amounts of data. 34 * 35 * BufFile supports temporary files that can be made read-only and shared with 36 * other backends, as infrastructure for parallel execution. Such files need 37 * to be created as a member of a SharedFileSet that all participants are 38 * attached to. 39 *------------------------------------------------------------------------- 40 */ 41 42 #include "postgres.h" 43 44 #include "commands/tablespace.h" 45 #include "executor/instrument.h" 46 #include "miscadmin.h" 47 #include "pgstat.h" 48 #include "storage/buf_internals.h" 49 #include "storage/buffile.h" 50 #include "storage/fd.h" 51 #include "utils/resowner.h" 52 53 /* 54 * We break BufFiles into gigabyte-sized segments, regardless of RELSEG_SIZE. 55 * The reason is that we'd like large BufFiles to be spread across multiple 56 * tablespaces when available. 57 */ 58 #define MAX_PHYSICAL_FILESIZE 0x40000000 59 #define BUFFILE_SEG_SIZE (MAX_PHYSICAL_FILESIZE / BLCKSZ) 60 61 /* 62 * This data structure represents a buffered file that consists of one or 63 * more physical files (each accessed through a virtual file descriptor 64 * managed by fd.c). 65 */ 66 struct BufFile 67 { 68 int numFiles; /* number of physical files in set */ 69 /* all files except the last have length exactly MAX_PHYSICAL_FILESIZE */ 70 File *files; /* palloc'd array with numFiles entries */ 71 72 bool isInterXact; /* keep open over transactions? */ 73 bool dirty; /* does buffer need to be written? */ 74 bool readOnly; /* has the file been set to read only? */ 75 76 SharedFileSet *fileset; /* space for segment files if shared */ 77 const char *name; /* name of this BufFile if shared */ 78 79 /* 80 * resowner is the ResourceOwner to use for underlying temp files. (We 81 * don't need to remember the memory context we're using explicitly, 82 * because after creation we only repalloc our arrays larger.) 83 */ 84 ResourceOwner resowner; 85 86 /* 87 * "current pos" is position of start of buffer within the logical file. 88 * Position as seen by user of BufFile is (curFile, curOffset + pos). 89 */ 90 int curFile; /* file index (0..n) part of current pos */ 91 off_t curOffset; /* offset part of current pos */ 92 int pos; /* next read/write position in buffer */ 93 int nbytes; /* total # of valid bytes in buffer */ 94 PGAlignedBlock buffer; 95 }; 96 97 static BufFile *makeBufFileCommon(int nfiles); 98 static BufFile *makeBufFile(File firstfile); 99 static void extendBufFile(BufFile *file); 100 static void BufFileLoadBuffer(BufFile *file); 101 static void BufFileDumpBuffer(BufFile *file); 102 static void BufFileFlush(BufFile *file); 103 static File MakeNewSharedSegment(BufFile *file, int segment); 104 105 /* 106 * Create BufFile and perform the common initialization. 107 */ 108 static BufFile * 109 makeBufFileCommon(int nfiles) 110 { 111 BufFile *file = (BufFile *) palloc(sizeof(BufFile)); 112 113 file->numFiles = nfiles; 114 file->isInterXact = false; 115 file->dirty = false; 116 file->resowner = CurrentResourceOwner; 117 file->curFile = 0; 118 file->curOffset = 0L; 119 file->pos = 0; 120 file->nbytes = 0; 121 122 return file; 123 } 124 125 /* 126 * Create a BufFile given the first underlying physical file. 127 * NOTE: caller must set isInterXact if appropriate. 128 */ 129 static BufFile * 130 makeBufFile(File firstfile) 131 { 132 BufFile *file = makeBufFileCommon(1); 133 134 file->files = (File *) palloc(sizeof(File)); 135 file->files[0] = firstfile; 136 file->readOnly = false; 137 file->fileset = NULL; 138 file->name = NULL; 139 140 return file; 141 } 142 143 /* 144 * Add another component temp file. 145 */ 146 static void 147 extendBufFile(BufFile *file) 148 { 149 File pfile; 150 ResourceOwner oldowner; 151 152 /* Be sure to associate the file with the BufFile's resource owner */ 153 oldowner = CurrentResourceOwner; 154 CurrentResourceOwner = file->resowner; 155 156 if (file->fileset == NULL) 157 pfile = OpenTemporaryFile(file->isInterXact); 158 else 159 pfile = MakeNewSharedSegment(file, file->numFiles); 160 161 Assert(pfile >= 0); 162 163 CurrentResourceOwner = oldowner; 164 165 file->files = (File *) repalloc(file->files, 166 (file->numFiles + 1) * sizeof(File)); 167 file->files[file->numFiles] = pfile; 168 file->numFiles++; 169 } 170 171 /* 172 * Create a BufFile for a new temporary file (which will expand to become 173 * multiple temporary files if more than MAX_PHYSICAL_FILESIZE bytes are 174 * written to it). 175 * 176 * If interXact is true, the temp file will not be automatically deleted 177 * at end of transaction. 178 * 179 * Note: if interXact is true, the caller had better be calling us in a 180 * memory context, and with a resource owner, that will survive across 181 * transaction boundaries. 182 */ 183 BufFile * 184 BufFileCreateTemp(bool interXact) 185 { 186 BufFile *file; 187 File pfile; 188 189 /* 190 * Ensure that temp tablespaces are set up for OpenTemporaryFile to use. 191 * Possibly the caller will have done this already, but it seems useful to 192 * double-check here. Failure to do this at all would result in the temp 193 * files always getting placed in the default tablespace, which is a 194 * pretty hard-to-detect bug. Callers may prefer to do it earlier if they 195 * want to be sure that any required catalog access is done in some other 196 * resource context. 197 */ 198 PrepareTempTablespaces(); 199 200 pfile = OpenTemporaryFile(interXact); 201 Assert(pfile >= 0); 202 203 file = makeBufFile(pfile); 204 file->isInterXact = interXact; 205 206 return file; 207 } 208 209 /* 210 * Build the name for a given segment of a given BufFile. 211 */ 212 static void 213 SharedSegmentName(char *name, const char *buffile_name, int segment) 214 { 215 snprintf(name, MAXPGPATH, "%s.%d", buffile_name, segment); 216 } 217 218 /* 219 * Create a new segment file backing a shared BufFile. 220 */ 221 static File 222 MakeNewSharedSegment(BufFile *buffile, int segment) 223 { 224 char name[MAXPGPATH]; 225 File file; 226 227 /* 228 * It is possible that there are files left over from before a crash 229 * restart with the same name. In order for BufFileOpenShared() not to 230 * get confused about how many segments there are, we'll unlink the next 231 * segment number if it already exists. 232 */ 233 SharedSegmentName(name, buffile->name, segment + 1); 234 SharedFileSetDelete(buffile->fileset, name, true); 235 236 /* Create the new segment. */ 237 SharedSegmentName(name, buffile->name, segment); 238 file = SharedFileSetCreate(buffile->fileset, name); 239 240 /* SharedFileSetCreate would've errored out */ 241 Assert(file > 0); 242 243 return file; 244 } 245 246 /* 247 * Create a BufFile that can be discovered and opened read-only by other 248 * backends that are attached to the same SharedFileSet using the same name. 249 * 250 * The naming scheme for shared BufFiles is left up to the calling code. The 251 * name will appear as part of one or more filenames on disk, and might 252 * provide clues to administrators about which subsystem is generating 253 * temporary file data. Since each SharedFileSet object is backed by one or 254 * more uniquely named temporary directory, names don't conflict with 255 * unrelated SharedFileSet objects. 256 */ 257 BufFile * 258 BufFileCreateShared(SharedFileSet *fileset, const char *name) 259 { 260 BufFile *file; 261 262 file = makeBufFileCommon(1); 263 file->fileset = fileset; 264 file->name = pstrdup(name); 265 file->files = (File *) palloc(sizeof(File)); 266 file->files[0] = MakeNewSharedSegment(file, 0); 267 file->readOnly = false; 268 269 return file; 270 } 271 272 /* 273 * Open a file that was previously created in another backend (or this one) 274 * with BufFileCreateShared in the same SharedFileSet using the same name. 275 * The backend that created the file must have called BufFileClose() or 276 * BufFileExportShared() to make sure that it is ready to be opened by other 277 * backends and render it read-only. 278 */ 279 BufFile * 280 BufFileOpenShared(SharedFileSet *fileset, const char *name) 281 { 282 BufFile *file; 283 char segment_name[MAXPGPATH]; 284 Size capacity = 16; 285 File *files; 286 int nfiles = 0; 287 288 files = palloc(sizeof(File) * capacity); 289 290 /* 291 * We don't know how many segments there are, so we'll probe the 292 * filesystem to find out. 293 */ 294 for (;;) 295 { 296 /* See if we need to expand our file segment array. */ 297 if (nfiles + 1 > capacity) 298 { 299 capacity *= 2; 300 files = repalloc(files, sizeof(File) * capacity); 301 } 302 /* Try to load a segment. */ 303 SharedSegmentName(segment_name, name, nfiles); 304 files[nfiles] = SharedFileSetOpen(fileset, segment_name); 305 if (files[nfiles] <= 0) 306 break; 307 ++nfiles; 308 309 CHECK_FOR_INTERRUPTS(); 310 } 311 312 /* 313 * If we didn't find any files at all, then no BufFile exists with this 314 * name. 315 */ 316 if (nfiles == 0) 317 ereport(ERROR, 318 (errcode_for_file_access(), 319 errmsg("could not open temporary file \"%s\" from BufFile \"%s\": %m", 320 segment_name, name))); 321 322 file = makeBufFileCommon(nfiles); 323 file->files = files; 324 file->readOnly = true; /* Can't write to files opened this way */ 325 file->fileset = fileset; 326 file->name = pstrdup(name); 327 328 return file; 329 } 330 331 /* 332 * Delete a BufFile that was created by BufFileCreateShared in the given 333 * SharedFileSet using the given name. 334 * 335 * It is not necessary to delete files explicitly with this function. It is 336 * provided only as a way to delete files proactively, rather than waiting for 337 * the SharedFileSet to be cleaned up. 338 * 339 * Only one backend should attempt to delete a given name, and should know 340 * that it exists and has been exported or closed. 341 */ 342 void 343 BufFileDeleteShared(SharedFileSet *fileset, const char *name) 344 { 345 char segment_name[MAXPGPATH]; 346 int segment = 0; 347 bool found = false; 348 349 /* 350 * We don't know how many segments the file has. We'll keep deleting 351 * until we run out. If we don't manage to find even an initial segment, 352 * raise an error. 353 */ 354 for (;;) 355 { 356 SharedSegmentName(segment_name, name, segment); 357 if (!SharedFileSetDelete(fileset, segment_name, true)) 358 break; 359 found = true; 360 ++segment; 361 362 CHECK_FOR_INTERRUPTS(); 363 } 364 365 if (!found) 366 elog(ERROR, "could not delete unknown shared BufFile \"%s\"", name); 367 } 368 369 /* 370 * BufFileExportShared --- flush and make read-only, in preparation for sharing. 371 */ 372 void 373 BufFileExportShared(BufFile *file) 374 { 375 /* Must be a file belonging to a SharedFileSet. */ 376 Assert(file->fileset != NULL); 377 378 /* It's probably a bug if someone calls this twice. */ 379 Assert(!file->readOnly); 380 381 BufFileFlush(file); 382 file->readOnly = true; 383 } 384 385 /* 386 * Close a BufFile 387 * 388 * Like fclose(), this also implicitly FileCloses the underlying File. 389 */ 390 void 391 BufFileClose(BufFile *file) 392 { 393 int i; 394 395 /* flush any unwritten data */ 396 BufFileFlush(file); 397 /* close and delete the underlying file(s) */ 398 for (i = 0; i < file->numFiles; i++) 399 FileClose(file->files[i]); 400 /* release the buffer space */ 401 pfree(file->files); 402 pfree(file); 403 } 404 405 /* 406 * BufFileLoadBuffer 407 * 408 * Load some data into buffer, if possible, starting from curOffset. 409 * At call, must have dirty = false, pos and nbytes = 0. 410 * On exit, nbytes is number of bytes loaded. 411 */ 412 static void 413 BufFileLoadBuffer(BufFile *file) 414 { 415 File thisfile; 416 417 /* 418 * Advance to next component file if necessary and possible. 419 */ 420 if (file->curOffset >= MAX_PHYSICAL_FILESIZE && 421 file->curFile + 1 < file->numFiles) 422 { 423 file->curFile++; 424 file->curOffset = 0L; 425 } 426 427 /* 428 * Read whatever we can get, up to a full bufferload. 429 */ 430 thisfile = file->files[file->curFile]; 431 file->nbytes = FileRead(thisfile, 432 file->buffer.data, 433 sizeof(file->buffer), 434 file->curOffset, 435 WAIT_EVENT_BUFFILE_READ); 436 if (file->nbytes < 0) 437 { 438 file->nbytes = 0; 439 ereport(ERROR, 440 (errcode_for_file_access(), 441 errmsg("could not read file \"%s\": %m", 442 FilePathName(thisfile)))); 443 } 444 445 /* we choose not to advance curOffset here */ 446 447 if (file->nbytes > 0) 448 pgBufferUsage.temp_blks_read++; 449 } 450 451 /* 452 * BufFileDumpBuffer 453 * 454 * Dump buffer contents starting at curOffset. 455 * At call, should have dirty = true, nbytes > 0. 456 * On exit, dirty is cleared if successful write, and curOffset is advanced. 457 */ 458 static void 459 BufFileDumpBuffer(BufFile *file) 460 { 461 int wpos = 0; 462 int bytestowrite; 463 File thisfile; 464 465 /* 466 * Unlike BufFileLoadBuffer, we must dump the whole buffer even if it 467 * crosses a component-file boundary; so we need a loop. 468 */ 469 while (wpos < file->nbytes) 470 { 471 off_t availbytes; 472 473 /* 474 * Advance to next component file if necessary and possible. 475 */ 476 if (file->curOffset >= MAX_PHYSICAL_FILESIZE) 477 { 478 while (file->curFile + 1 >= file->numFiles) 479 extendBufFile(file); 480 file->curFile++; 481 file->curOffset = 0L; 482 } 483 484 /* 485 * Determine how much we need to write into this file. 486 */ 487 bytestowrite = file->nbytes - wpos; 488 availbytes = MAX_PHYSICAL_FILESIZE - file->curOffset; 489 490 if ((off_t) bytestowrite > availbytes) 491 bytestowrite = (int) availbytes; 492 493 thisfile = file->files[file->curFile]; 494 bytestowrite = FileWrite(thisfile, 495 file->buffer.data + wpos, 496 bytestowrite, 497 file->curOffset, 498 WAIT_EVENT_BUFFILE_WRITE); 499 if (bytestowrite <= 0) 500 ereport(ERROR, 501 (errcode_for_file_access(), 502 errmsg("could not write to file \"%s\": %m", 503 FilePathName(thisfile)))); 504 file->curOffset += bytestowrite; 505 wpos += bytestowrite; 506 507 pgBufferUsage.temp_blks_written++; 508 } 509 file->dirty = false; 510 511 /* 512 * At this point, curOffset has been advanced to the end of the buffer, 513 * ie, its original value + nbytes. We need to make it point to the 514 * logical file position, ie, original value + pos, in case that is less 515 * (as could happen due to a small backwards seek in a dirty buffer!) 516 */ 517 file->curOffset -= (file->nbytes - file->pos); 518 if (file->curOffset < 0) /* handle possible segment crossing */ 519 { 520 file->curFile--; 521 Assert(file->curFile >= 0); 522 file->curOffset += MAX_PHYSICAL_FILESIZE; 523 } 524 525 /* 526 * Now we can set the buffer empty without changing the logical position 527 */ 528 file->pos = 0; 529 file->nbytes = 0; 530 } 531 532 /* 533 * BufFileRead 534 * 535 * Like fread() except we assume 1-byte element size and report I/O errors via 536 * ereport(). 537 */ 538 size_t 539 BufFileRead(BufFile *file, void *ptr, size_t size) 540 { 541 size_t nread = 0; 542 size_t nthistime; 543 544 BufFileFlush(file); 545 546 while (size > 0) 547 { 548 if (file->pos >= file->nbytes) 549 { 550 /* Try to load more data into buffer. */ 551 file->curOffset += file->pos; 552 file->pos = 0; 553 file->nbytes = 0; 554 BufFileLoadBuffer(file); 555 if (file->nbytes <= 0) 556 break; /* no more data available */ 557 } 558 559 nthistime = file->nbytes - file->pos; 560 if (nthistime > size) 561 nthistime = size; 562 Assert(nthistime > 0); 563 564 memcpy(ptr, file->buffer.data + file->pos, nthistime); 565 566 file->pos += nthistime; 567 ptr = (void *) ((char *) ptr + nthistime); 568 size -= nthistime; 569 nread += nthistime; 570 } 571 572 return nread; 573 } 574 575 /* 576 * BufFileWrite 577 * 578 * Like fwrite() except we assume 1-byte element size and report errors via 579 * ereport(). 580 */ 581 size_t 582 BufFileWrite(BufFile *file, void *ptr, size_t size) 583 { 584 size_t nwritten = 0; 585 size_t nthistime; 586 587 Assert(!file->readOnly); 588 589 while (size > 0) 590 { 591 if (file->pos >= BLCKSZ) 592 { 593 /* Buffer full, dump it out */ 594 if (file->dirty) 595 BufFileDumpBuffer(file); 596 else 597 { 598 /* Hmm, went directly from reading to writing? */ 599 file->curOffset += file->pos; 600 file->pos = 0; 601 file->nbytes = 0; 602 } 603 } 604 605 nthistime = BLCKSZ - file->pos; 606 if (nthistime > size) 607 nthistime = size; 608 Assert(nthistime > 0); 609 610 memcpy(file->buffer.data + file->pos, ptr, nthistime); 611 612 file->dirty = true; 613 file->pos += nthistime; 614 if (file->nbytes < file->pos) 615 file->nbytes = file->pos; 616 ptr = (void *) ((char *) ptr + nthistime); 617 size -= nthistime; 618 nwritten += nthistime; 619 } 620 621 return nwritten; 622 } 623 624 /* 625 * BufFileFlush 626 * 627 * Like fflush(), except that I/O errors are reported with ereport(). 628 */ 629 static void 630 BufFileFlush(BufFile *file) 631 { 632 if (file->dirty) 633 BufFileDumpBuffer(file); 634 635 Assert(!file->dirty); 636 } 637 638 /* 639 * BufFileSeek 640 * 641 * Like fseek(), except that target position needs two values in order to 642 * work when logical filesize exceeds maximum value representable by off_t. 643 * We do not support relative seeks across more than that, however. 644 * I/O errors are reported by ereport(). 645 * 646 * Result is 0 if OK, EOF if not. Logical position is not moved if an 647 * impossible seek is attempted. 648 */ 649 int 650 BufFileSeek(BufFile *file, int fileno, off_t offset, int whence) 651 { 652 int newFile; 653 off_t newOffset; 654 655 switch (whence) 656 { 657 case SEEK_SET: 658 if (fileno < 0) 659 return EOF; 660 newFile = fileno; 661 newOffset = offset; 662 break; 663 case SEEK_CUR: 664 665 /* 666 * Relative seek considers only the signed offset, ignoring 667 * fileno. Note that large offsets (> 1 GB) risk overflow in this 668 * add, unless we have 64-bit off_t. 669 */ 670 newFile = file->curFile; 671 newOffset = (file->curOffset + file->pos) + offset; 672 break; 673 #ifdef NOT_USED 674 case SEEK_END: 675 /* could be implemented, not needed currently */ 676 break; 677 #endif 678 default: 679 elog(ERROR, "invalid whence: %d", whence); 680 return EOF; 681 } 682 while (newOffset < 0) 683 { 684 if (--newFile < 0) 685 return EOF; 686 newOffset += MAX_PHYSICAL_FILESIZE; 687 } 688 if (newFile == file->curFile && 689 newOffset >= file->curOffset && 690 newOffset <= file->curOffset + file->nbytes) 691 { 692 /* 693 * Seek is to a point within existing buffer; we can just adjust 694 * pos-within-buffer, without flushing buffer. Note this is OK 695 * whether reading or writing, but buffer remains dirty if we were 696 * writing. 697 */ 698 file->pos = (int) (newOffset - file->curOffset); 699 return 0; 700 } 701 /* Otherwise, must reposition buffer, so flush any dirty data */ 702 BufFileFlush(file); 703 704 /* 705 * At this point and no sooner, check for seek past last segment. The 706 * above flush could have created a new segment, so checking sooner would 707 * not work (at least not with this code). 708 */ 709 710 /* convert seek to "start of next seg" to "end of last seg" */ 711 if (newFile == file->numFiles && newOffset == 0) 712 { 713 newFile--; 714 newOffset = MAX_PHYSICAL_FILESIZE; 715 } 716 while (newOffset > MAX_PHYSICAL_FILESIZE) 717 { 718 if (++newFile >= file->numFiles) 719 return EOF; 720 newOffset -= MAX_PHYSICAL_FILESIZE; 721 } 722 if (newFile >= file->numFiles) 723 return EOF; 724 /* Seek is OK! */ 725 file->curFile = newFile; 726 file->curOffset = newOffset; 727 file->pos = 0; 728 file->nbytes = 0; 729 return 0; 730 } 731 732 void 733 BufFileTell(BufFile *file, int *fileno, off_t *offset) 734 { 735 *fileno = file->curFile; 736 *offset = file->curOffset + file->pos; 737 } 738 739 /* 740 * BufFileSeekBlock --- block-oriented seek 741 * 742 * Performs absolute seek to the start of the n'th BLCKSZ-sized block of 743 * the file. Note that users of this interface will fail if their files 744 * exceed BLCKSZ * LONG_MAX bytes, but that is quite a lot; we don't work 745 * with tables bigger than that, either... 746 * 747 * Result is 0 if OK, EOF if not. Logical position is not moved if an 748 * impossible seek is attempted. 749 */ 750 int 751 BufFileSeekBlock(BufFile *file, long blknum) 752 { 753 return BufFileSeek(file, 754 (int) (blknum / BUFFILE_SEG_SIZE), 755 (off_t) (blknum % BUFFILE_SEG_SIZE) * BLCKSZ, 756 SEEK_SET); 757 } 758 759 #ifdef NOT_USED 760 /* 761 * BufFileTellBlock --- block-oriented tell 762 * 763 * Any fractional part of a block in the current seek position is ignored. 764 */ 765 long 766 BufFileTellBlock(BufFile *file) 767 { 768 long blknum; 769 770 blknum = (file->curOffset + file->pos) / BLCKSZ; 771 blknum += file->curFile * BUFFILE_SEG_SIZE; 772 return blknum; 773 } 774 775 #endif 776 777 /* 778 * Return the current shared BufFile size. 779 * 780 * Counts any holes left behind by BufFileAppend as part of the size. 781 * ereport()s on failure. 782 */ 783 int64 784 BufFileSize(BufFile *file) 785 { 786 int64 lastFileSize; 787 788 Assert(file->fileset != NULL); 789 790 /* Get the size of the last physical file. */ 791 lastFileSize = FileSize(file->files[file->numFiles - 1]); 792 if (lastFileSize < 0) 793 ereport(ERROR, 794 (errcode_for_file_access(), 795 errmsg("could not determine size of temporary file \"%s\" from BufFile \"%s\": %m", 796 FilePathName(file->files[file->numFiles - 1]), 797 file->name))); 798 799 return ((file->numFiles - 1) * (int64) MAX_PHYSICAL_FILESIZE) + 800 lastFileSize; 801 } 802 803 /* 804 * Append the contents of source file (managed within shared fileset) to 805 * end of target file (managed within same shared fileset). 806 * 807 * Note that operation subsumes ownership of underlying resources from 808 * "source". Caller should never call BufFileClose against source having 809 * called here first. Resource owners for source and target must match, 810 * too. 811 * 812 * This operation works by manipulating lists of segment files, so the 813 * file content is always appended at a MAX_PHYSICAL_FILESIZE-aligned 814 * boundary, typically creating empty holes before the boundary. These 815 * areas do not contain any interesting data, and cannot be read from by 816 * caller. 817 * 818 * Returns the block number within target where the contents of source 819 * begins. Caller should apply this as an offset when working off block 820 * positions that are in terms of the original BufFile space. 821 */ 822 long 823 BufFileAppend(BufFile *target, BufFile *source) 824 { 825 long startBlock = target->numFiles * BUFFILE_SEG_SIZE; 826 int newNumFiles = target->numFiles + source->numFiles; 827 int i; 828 829 Assert(target->fileset != NULL); 830 Assert(source->readOnly); 831 Assert(!source->dirty); 832 Assert(source->fileset != NULL); 833 834 if (target->resowner != source->resowner) 835 elog(ERROR, "could not append BufFile with non-matching resource owner"); 836 837 target->files = (File *) 838 repalloc(target->files, sizeof(File) * newNumFiles); 839 for (i = target->numFiles; i < newNumFiles; i++) 840 target->files[i] = source->files[i - target->numFiles]; 841 target->numFiles = newNumFiles; 842 843 return startBlock; 844 } 845