1 /*------------------------------------------------------------------------- 2 * 3 * md.c 4 * This code manages relations that reside on magnetic disk. 5 * 6 * Or at least, that was what the Berkeley folk had in mind when they named 7 * this file. In reality, what this code provides is an interface from 8 * the smgr API to Unix-like filesystem APIs, so it will work with any type 9 * of device for which the operating system provides filesystem support. 10 * It doesn't matter whether the bits are on spinning rust or some other 11 * storage technology. 12 * 13 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group 14 * Portions Copyright (c) 1994, Regents of the University of California 15 * 16 * 17 * IDENTIFICATION 18 * src/backend/storage/smgr/md.c 19 * 20 *------------------------------------------------------------------------- 21 */ 22 #include "postgres.h" 23 24 #include <unistd.h> 25 #include <fcntl.h> 26 #include <sys/file.h> 27 28 #include "miscadmin.h" 29 #include "access/xlogutils.h" 30 #include "access/xlog.h" 31 #include "pgstat.h" 32 #include "postmaster/bgwriter.h" 33 #include "storage/fd.h" 34 #include "storage/bufmgr.h" 35 #include "storage/md.h" 36 #include "storage/relfilenode.h" 37 #include "storage/smgr.h" 38 #include "storage/sync.h" 39 #include "utils/hsearch.h" 40 #include "utils/memutils.h" 41 #include "pg_trace.h" 42 43 /* 44 * The magnetic disk storage manager keeps track of open file 45 * descriptors in its own descriptor pool. This is done to make it 46 * easier to support relations that are larger than the operating 47 * system's file size limit (often 2GBytes). In order to do that, 48 * we break relations up into "segment" files that are each shorter than 49 * the OS file size limit. The segment size is set by the RELSEG_SIZE 50 * configuration constant in pg_config.h. 51 * 52 * On disk, a relation must consist of consecutively numbered segment 53 * files in the pattern 54 * -- Zero or more full segments of exactly RELSEG_SIZE blocks each 55 * -- Exactly one partial segment of size 0 <= size < RELSEG_SIZE blocks 56 * -- Optionally, any number of inactive segments of size 0 blocks. 57 * The full and partial segments are collectively the "active" segments. 58 * Inactive segments are those that once contained data but are currently 59 * not needed because of an mdtruncate() operation. The reason for leaving 60 * them present at size zero, rather than unlinking them, is that other 61 * backends and/or the checkpointer might be holding open file references to 62 * such segments. If the relation expands again after mdtruncate(), such 63 * that a deactivated segment becomes active again, it is important that 64 * such file references still be valid --- else data might get written 65 * out to an unlinked old copy of a segment file that will eventually 66 * disappear. 67 * 68 * File descriptors are stored in the per-fork md_seg_fds arrays inside 69 * SMgrRelation. The length of these arrays is stored in md_num_open_segs. 70 * Note that a fork's md_num_open_segs having a specific value does not 71 * necessarily mean the relation doesn't have additional segments; we may 72 * just not have opened the next segment yet. (We could not have "all 73 * segments are in the array" as an invariant anyway, since another backend 74 * could extend the relation while we aren't looking.) We do not have 75 * entries for inactive segments, however; as soon as we find a partial 76 * segment, we assume that any subsequent segments are inactive. 77 * 78 * The entire MdfdVec array is palloc'd in the MdCxt memory context. 79 */ 80 81 typedef struct _MdfdVec 82 { 83 File mdfd_vfd; /* fd number in fd.c's pool */ 84 BlockNumber mdfd_segno; /* segment number, from 0 */ 85 } MdfdVec; 86 87 static MemoryContext MdCxt; /* context for all MdfdVec objects */ 88 89 90 /* Populate a file tag describing an md.c segment file. */ 91 #define INIT_MD_FILETAG(a,xx_rnode,xx_forknum,xx_segno) \ 92 ( \ 93 memset(&(a), 0, sizeof(FileTag)), \ 94 (a).handler = SYNC_HANDLER_MD, \ 95 (a).rnode = (xx_rnode), \ 96 (a).forknum = (xx_forknum), \ 97 (a).segno = (xx_segno) \ 98 ) 99 100 101 /*** behavior for mdopen & _mdfd_getseg ***/ 102 /* ereport if segment not present */ 103 #define EXTENSION_FAIL (1 << 0) 104 /* return NULL if segment not present */ 105 #define EXTENSION_RETURN_NULL (1 << 1) 106 /* create new segments as needed */ 107 #define EXTENSION_CREATE (1 << 2) 108 /* create new segments if needed during recovery */ 109 #define EXTENSION_CREATE_RECOVERY (1 << 3) 110 /* 111 * Allow opening segments which are preceded by segments smaller than 112 * RELSEG_SIZE, e.g. inactive segments (see above). Note that this breaks 113 * mdnblocks() and related functionality henceforth - which currently is ok, 114 * because this is only required in the checkpointer which never uses 115 * mdnblocks(). 116 */ 117 #define EXTENSION_DONT_CHECK_SIZE (1 << 4) 118 119 120 /* local routines */ 121 static void mdunlinkfork(RelFileNodeBackend rnode, ForkNumber forkNum, 122 bool isRedo); 123 static MdfdVec *mdopen(SMgrRelation reln, ForkNumber forknum, int behavior); 124 static void register_dirty_segment(SMgrRelation reln, ForkNumber forknum, 125 MdfdVec *seg); 126 static void register_unlink_segment(RelFileNodeBackend rnode, ForkNumber forknum, 127 BlockNumber segno); 128 static void register_forget_request(RelFileNodeBackend rnode, ForkNumber forknum, 129 BlockNumber segno); 130 static void _fdvec_resize(SMgrRelation reln, 131 ForkNumber forknum, 132 int nseg); 133 static char *_mdfd_segpath(SMgrRelation reln, ForkNumber forknum, 134 BlockNumber segno); 135 static MdfdVec *_mdfd_openseg(SMgrRelation reln, ForkNumber forkno, 136 BlockNumber segno, int oflags); 137 static MdfdVec *_mdfd_getseg(SMgrRelation reln, ForkNumber forkno, 138 BlockNumber blkno, bool skipFsync, int behavior); 139 static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum, 140 MdfdVec *seg); 141 142 143 /* 144 * mdinit() -- Initialize private state for magnetic disk storage manager. 145 */ 146 void 147 mdinit(void) 148 { 149 MdCxt = AllocSetContextCreate(TopMemoryContext, 150 "MdSmgr", 151 ALLOCSET_DEFAULT_SIZES); 152 } 153 154 /* 155 * mdexists() -- Does the physical file exist? 156 * 157 * Note: this will return true for lingering files, with pending deletions 158 */ 159 bool 160 mdexists(SMgrRelation reln, ForkNumber forkNum) 161 { 162 /* 163 * Close it first, to ensure that we notice if the fork has been unlinked 164 * since we opened it. 165 */ 166 mdclose(reln, forkNum); 167 168 return (mdopen(reln, forkNum, EXTENSION_RETURN_NULL) != NULL); 169 } 170 171 /* 172 * mdcreate() -- Create a new relation on magnetic disk. 173 * 174 * If isRedo is true, it's okay for the relation to exist already. 175 */ 176 void 177 mdcreate(SMgrRelation reln, ForkNumber forkNum, bool isRedo) 178 { 179 MdfdVec *mdfd; 180 char *path; 181 File fd; 182 183 if (isRedo && reln->md_num_open_segs[forkNum] > 0) 184 return; /* created and opened already... */ 185 186 Assert(reln->md_num_open_segs[forkNum] == 0); 187 188 path = relpath(reln->smgr_rnode, forkNum); 189 190 fd = PathNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY); 191 192 if (fd < 0) 193 { 194 int save_errno = errno; 195 196 if (isRedo) 197 fd = PathNameOpenFile(path, O_RDWR | PG_BINARY); 198 if (fd < 0) 199 { 200 /* be sure to report the error reported by create, not open */ 201 errno = save_errno; 202 ereport(ERROR, 203 (errcode_for_file_access(), 204 errmsg("could not create file \"%s\": %m", path))); 205 } 206 } 207 208 pfree(path); 209 210 _fdvec_resize(reln, forkNum, 1); 211 mdfd = &reln->md_seg_fds[forkNum][0]; 212 mdfd->mdfd_vfd = fd; 213 mdfd->mdfd_segno = 0; 214 } 215 216 /* 217 * mdunlink() -- Unlink a relation. 218 * 219 * Note that we're passed a RelFileNodeBackend --- by the time this is called, 220 * there won't be an SMgrRelation hashtable entry anymore. 221 * 222 * forkNum can be a fork number to delete a specific fork, or InvalidForkNumber 223 * to delete all forks. 224 * 225 * For regular relations, we don't unlink the first segment file of the rel, 226 * but just truncate it to zero length, and record a request to unlink it after 227 * the next checkpoint. Additional segments can be unlinked immediately, 228 * however. Leaving the empty file in place prevents that relfilenode 229 * number from being reused. The scenario this protects us from is: 230 * 1. We delete a relation (and commit, and actually remove its file). 231 * 2. We create a new relation, which by chance gets the same relfilenode as 232 * the just-deleted one (OIDs must've wrapped around for that to happen). 233 * 3. We crash before another checkpoint occurs. 234 * During replay, we would delete the file and then recreate it, which is fine 235 * if the contents of the file were repopulated by subsequent WAL entries. 236 * But if we didn't WAL-log insertions, but instead relied on fsyncing the 237 * file after populating it (as for instance CLUSTER and CREATE INDEX do), 238 * the contents of the file would be lost forever. By leaving the empty file 239 * until after the next checkpoint, we prevent reassignment of the relfilenode 240 * number until it's safe, because relfilenode assignment skips over any 241 * existing file. 242 * 243 * We do not need to go through this dance for temp relations, though, because 244 * we never make WAL entries for temp rels, and so a temp rel poses no threat 245 * to the health of a regular rel that has taken over its relfilenode number. 246 * The fact that temp rels and regular rels have different file naming 247 * patterns provides additional safety. 248 * 249 * All the above applies only to the relation's main fork; other forks can 250 * just be removed immediately, since they are not needed to prevent the 251 * relfilenode number from being recycled. Also, we do not carefully 252 * track whether other forks have been created or not, but just attempt to 253 * unlink them unconditionally; so we should never complain about ENOENT. 254 * 255 * If isRedo is true, it's unsurprising for the relation to be already gone. 256 * Also, we should remove the file immediately instead of queuing a request 257 * for later, since during redo there's no possibility of creating a 258 * conflicting relation. 259 * 260 * Note: any failure should be reported as WARNING not ERROR, because 261 * we are usually not in a transaction anymore when this is called. 262 */ 263 void 264 mdunlink(RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo) 265 { 266 /* Now do the per-fork work */ 267 if (forkNum == InvalidForkNumber) 268 { 269 for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++) 270 mdunlinkfork(rnode, forkNum, isRedo); 271 } 272 else 273 mdunlinkfork(rnode, forkNum, isRedo); 274 } 275 276 /* 277 * Truncate a file to release disk space. 278 */ 279 static int 280 do_truncate(const char *path) 281 { 282 int save_errno; 283 int ret; 284 int fd; 285 286 /* truncate(2) would be easier here, but Windows hasn't got it */ 287 fd = OpenTransientFile(path, O_RDWR | PG_BINARY); 288 if (fd >= 0) 289 { 290 ret = ftruncate(fd, 0); 291 save_errno = errno; 292 CloseTransientFile(fd); 293 errno = save_errno; 294 } 295 else 296 ret = -1; 297 298 /* Log a warning here to avoid repetition in callers. */ 299 if (ret < 0 && errno != ENOENT) 300 { 301 save_errno = errno; 302 ereport(WARNING, 303 (errcode_for_file_access(), 304 errmsg("could not truncate file \"%s\": %m", path))); 305 errno = save_errno; 306 } 307 308 return ret; 309 } 310 311 static void 312 mdunlinkfork(RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo) 313 { 314 char *path; 315 int ret; 316 317 path = relpath(rnode, forkNum); 318 319 /* 320 * Delete or truncate the first segment. 321 */ 322 if (isRedo || forkNum != MAIN_FORKNUM || RelFileNodeBackendIsTemp(rnode)) 323 { 324 if (!RelFileNodeBackendIsTemp(rnode)) 325 { 326 /* Prevent other backends' fds from holding on to the disk space */ 327 ret = do_truncate(path); 328 329 /* Forget any pending sync requests for the first segment */ 330 register_forget_request(rnode, forkNum, 0 /* first seg */ ); 331 } 332 else 333 ret = 0; 334 335 /* Next unlink the file, unless it was already found to be missing */ 336 if (ret == 0 || errno != ENOENT) 337 { 338 ret = unlink(path); 339 if (ret < 0 && errno != ENOENT) 340 ereport(WARNING, 341 (errcode_for_file_access(), 342 errmsg("could not remove file \"%s\": %m", path))); 343 } 344 } 345 else 346 { 347 /* Prevent other backends' fds from holding on to the disk space */ 348 ret = do_truncate(path); 349 350 /* Register request to unlink first segment later */ 351 register_unlink_segment(rnode, forkNum, 0 /* first seg */ ); 352 } 353 354 /* 355 * Delete any additional segments. 356 */ 357 if (ret >= 0) 358 { 359 char *segpath = (char *) palloc(strlen(path) + 12); 360 BlockNumber segno; 361 362 /* 363 * Note that because we loop until getting ENOENT, we will correctly 364 * remove all inactive segments as well as active ones. 365 */ 366 for (segno = 1;; segno++) 367 { 368 sprintf(segpath, "%s.%u", path, segno); 369 370 if (!RelFileNodeBackendIsTemp(rnode)) 371 { 372 /* 373 * Prevent other backends' fds from holding on to the disk 374 * space. 375 */ 376 if (do_truncate(segpath) < 0 && errno == ENOENT) 377 break; 378 379 /* 380 * Forget any pending sync requests for this segment before we 381 * try to unlink. 382 */ 383 register_forget_request(rnode, forkNum, segno); 384 } 385 386 if (unlink(segpath) < 0) 387 { 388 /* ENOENT is expected after the last segment... */ 389 if (errno != ENOENT) 390 ereport(WARNING, 391 (errcode_for_file_access(), 392 errmsg("could not remove file \"%s\": %m", segpath))); 393 break; 394 } 395 } 396 pfree(segpath); 397 } 398 399 pfree(path); 400 } 401 402 /* 403 * mdextend() -- Add a block to the specified relation. 404 * 405 * The semantics are nearly the same as mdwrite(): write at the 406 * specified position. However, this is to be used for the case of 407 * extending a relation (i.e., blocknum is at or beyond the current 408 * EOF). Note that we assume writing a block beyond current EOF 409 * causes intervening file space to become filled with zeroes. 410 */ 411 void 412 mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, 413 char *buffer, bool skipFsync) 414 { 415 off_t seekpos; 416 int nbytes; 417 MdfdVec *v; 418 419 /* This assert is too expensive to have on normally ... */ 420 #ifdef CHECK_WRITE_VS_EXTEND 421 Assert(blocknum >= mdnblocks(reln, forknum)); 422 #endif 423 424 /* 425 * If a relation manages to grow to 2^32-1 blocks, refuse to extend it any 426 * more --- we mustn't create a block whose number actually is 427 * InvalidBlockNumber. (Note that this failure should be unreachable 428 * because of upstream checks in bufmgr.c.) 429 */ 430 if (blocknum == InvalidBlockNumber) 431 ereport(ERROR, 432 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), 433 errmsg("cannot extend file \"%s\" beyond %u blocks", 434 relpath(reln->smgr_rnode, forknum), 435 InvalidBlockNumber))); 436 437 v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, EXTENSION_CREATE); 438 439 seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); 440 441 Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE); 442 443 if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_EXTEND)) != BLCKSZ) 444 { 445 if (nbytes < 0) 446 ereport(ERROR, 447 (errcode_for_file_access(), 448 errmsg("could not extend file \"%s\": %m", 449 FilePathName(v->mdfd_vfd)), 450 errhint("Check free disk space."))); 451 /* short write: complain appropriately */ 452 ereport(ERROR, 453 (errcode(ERRCODE_DISK_FULL), 454 errmsg("could not extend file \"%s\": wrote only %d of %d bytes at block %u", 455 FilePathName(v->mdfd_vfd), 456 nbytes, BLCKSZ, blocknum), 457 errhint("Check free disk space."))); 458 } 459 460 if (!skipFsync && !SmgrIsTemp(reln)) 461 register_dirty_segment(reln, forknum, v); 462 463 Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE)); 464 } 465 466 /* 467 * mdopen() -- Open the specified relation. 468 * 469 * Note we only open the first segment, when there are multiple segments. 470 * 471 * If first segment is not present, either ereport or return NULL according 472 * to "behavior". We treat EXTENSION_CREATE the same as EXTENSION_FAIL; 473 * EXTENSION_CREATE means it's OK to extend an existing relation, not to 474 * invent one out of whole cloth. 475 */ 476 static MdfdVec * 477 mdopen(SMgrRelation reln, ForkNumber forknum, int behavior) 478 { 479 MdfdVec *mdfd; 480 char *path; 481 File fd; 482 483 /* No work if already open */ 484 if (reln->md_num_open_segs[forknum] > 0) 485 return &reln->md_seg_fds[forknum][0]; 486 487 path = relpath(reln->smgr_rnode, forknum); 488 489 fd = PathNameOpenFile(path, O_RDWR | PG_BINARY); 490 491 if (fd < 0) 492 { 493 if ((behavior & EXTENSION_RETURN_NULL) && 494 FILE_POSSIBLY_DELETED(errno)) 495 { 496 pfree(path); 497 return NULL; 498 } 499 ereport(ERROR, 500 (errcode_for_file_access(), 501 errmsg("could not open file \"%s\": %m", path))); 502 } 503 504 pfree(path); 505 506 _fdvec_resize(reln, forknum, 1); 507 mdfd = &reln->md_seg_fds[forknum][0]; 508 mdfd->mdfd_vfd = fd; 509 mdfd->mdfd_segno = 0; 510 511 Assert(_mdnblocks(reln, forknum, mdfd) <= ((BlockNumber) RELSEG_SIZE)); 512 513 return mdfd; 514 } 515 516 /* 517 * mdclose() -- Close the specified relation, if it isn't closed already. 518 */ 519 void 520 mdclose(SMgrRelation reln, ForkNumber forknum) 521 { 522 int nopensegs = reln->md_num_open_segs[forknum]; 523 524 /* No work if already closed */ 525 if (nopensegs == 0) 526 return; 527 528 /* close segments starting from the end */ 529 while (nopensegs > 0) 530 { 531 MdfdVec *v = &reln->md_seg_fds[forknum][nopensegs - 1]; 532 533 FileClose(v->mdfd_vfd); 534 _fdvec_resize(reln, forknum, nopensegs - 1); 535 nopensegs--; 536 } 537 } 538 539 /* 540 * mdprefetch() -- Initiate asynchronous read of the specified block of a relation 541 */ 542 void 543 mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum) 544 { 545 #ifdef USE_PREFETCH 546 off_t seekpos; 547 MdfdVec *v; 548 549 v = _mdfd_getseg(reln, forknum, blocknum, false, EXTENSION_FAIL); 550 551 seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); 552 553 Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE); 554 555 (void) FilePrefetch(v->mdfd_vfd, seekpos, BLCKSZ, WAIT_EVENT_DATA_FILE_PREFETCH); 556 #endif /* USE_PREFETCH */ 557 } 558 559 /* 560 * mdwriteback() -- Tell the kernel to write pages back to storage. 561 * 562 * This accepts a range of blocks because flushing several pages at once is 563 * considerably more efficient than doing so individually. 564 */ 565 void 566 mdwriteback(SMgrRelation reln, ForkNumber forknum, 567 BlockNumber blocknum, BlockNumber nblocks) 568 { 569 /* 570 * Issue flush requests in as few requests as possible; have to split at 571 * segment boundaries though, since those are actually separate files. 572 */ 573 while (nblocks > 0) 574 { 575 BlockNumber nflush = nblocks; 576 off_t seekpos; 577 MdfdVec *v; 578 int segnum_start, 579 segnum_end; 580 581 v = _mdfd_getseg(reln, forknum, blocknum, true /* not used */ , 582 EXTENSION_RETURN_NULL); 583 584 /* 585 * We might be flushing buffers of already removed relations, that's 586 * ok, just ignore that case. 587 */ 588 if (!v) 589 return; 590 591 /* compute offset inside the current segment */ 592 segnum_start = blocknum / RELSEG_SIZE; 593 594 /* compute number of desired writes within the current segment */ 595 segnum_end = (blocknum + nblocks - 1) / RELSEG_SIZE; 596 if (segnum_start != segnum_end) 597 nflush = RELSEG_SIZE - (blocknum % ((BlockNumber) RELSEG_SIZE)); 598 599 Assert(nflush >= 1); 600 Assert(nflush <= nblocks); 601 602 seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); 603 604 FileWriteback(v->mdfd_vfd, seekpos, (off_t) BLCKSZ * nflush, WAIT_EVENT_DATA_FILE_FLUSH); 605 606 nblocks -= nflush; 607 blocknum += nflush; 608 } 609 } 610 611 /* 612 * mdread() -- Read the specified block from a relation. 613 */ 614 void 615 mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, 616 char *buffer) 617 { 618 off_t seekpos; 619 int nbytes; 620 MdfdVec *v; 621 622 TRACE_POSTGRESQL_SMGR_MD_READ_START(forknum, blocknum, 623 reln->smgr_rnode.node.spcNode, 624 reln->smgr_rnode.node.dbNode, 625 reln->smgr_rnode.node.relNode, 626 reln->smgr_rnode.backend); 627 628 v = _mdfd_getseg(reln, forknum, blocknum, false, 629 EXTENSION_FAIL | EXTENSION_CREATE_RECOVERY); 630 631 seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); 632 633 Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE); 634 635 nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_READ); 636 637 TRACE_POSTGRESQL_SMGR_MD_READ_DONE(forknum, blocknum, 638 reln->smgr_rnode.node.spcNode, 639 reln->smgr_rnode.node.dbNode, 640 reln->smgr_rnode.node.relNode, 641 reln->smgr_rnode.backend, 642 nbytes, 643 BLCKSZ); 644 645 if (nbytes != BLCKSZ) 646 { 647 if (nbytes < 0) 648 ereport(ERROR, 649 (errcode_for_file_access(), 650 errmsg("could not read block %u in file \"%s\": %m", 651 blocknum, FilePathName(v->mdfd_vfd)))); 652 653 /* 654 * Short read: we are at or past EOF, or we read a partial block at 655 * EOF. Normally this is an error; upper levels should never try to 656 * read a nonexistent block. However, if zero_damaged_pages is ON or 657 * we are InRecovery, we should instead return zeroes without 658 * complaining. This allows, for example, the case of trying to 659 * update a block that was later truncated away. 660 */ 661 if (zero_damaged_pages || InRecovery) 662 MemSet(buffer, 0, BLCKSZ); 663 else 664 ereport(ERROR, 665 (errcode(ERRCODE_DATA_CORRUPTED), 666 errmsg("could not read block %u in file \"%s\": read only %d of %d bytes", 667 blocknum, FilePathName(v->mdfd_vfd), 668 nbytes, BLCKSZ))); 669 } 670 } 671 672 /* 673 * mdwrite() -- Write the supplied block at the appropriate location. 674 * 675 * This is to be used only for updating already-existing blocks of a 676 * relation (ie, those before the current EOF). To extend a relation, 677 * use mdextend(). 678 */ 679 void 680 mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, 681 char *buffer, bool skipFsync) 682 { 683 off_t seekpos; 684 int nbytes; 685 MdfdVec *v; 686 687 /* This assert is too expensive to have on normally ... */ 688 #ifdef CHECK_WRITE_VS_EXTEND 689 Assert(blocknum < mdnblocks(reln, forknum)); 690 #endif 691 692 TRACE_POSTGRESQL_SMGR_MD_WRITE_START(forknum, blocknum, 693 reln->smgr_rnode.node.spcNode, 694 reln->smgr_rnode.node.dbNode, 695 reln->smgr_rnode.node.relNode, 696 reln->smgr_rnode.backend); 697 698 v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, 699 EXTENSION_FAIL | EXTENSION_CREATE_RECOVERY); 700 701 seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); 702 703 Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE); 704 705 nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_WRITE); 706 707 TRACE_POSTGRESQL_SMGR_MD_WRITE_DONE(forknum, blocknum, 708 reln->smgr_rnode.node.spcNode, 709 reln->smgr_rnode.node.dbNode, 710 reln->smgr_rnode.node.relNode, 711 reln->smgr_rnode.backend, 712 nbytes, 713 BLCKSZ); 714 715 if (nbytes != BLCKSZ) 716 { 717 if (nbytes < 0) 718 ereport(ERROR, 719 (errcode_for_file_access(), 720 errmsg("could not write block %u in file \"%s\": %m", 721 blocknum, FilePathName(v->mdfd_vfd)))); 722 /* short write: complain appropriately */ 723 ereport(ERROR, 724 (errcode(ERRCODE_DISK_FULL), 725 errmsg("could not write block %u in file \"%s\": wrote only %d of %d bytes", 726 blocknum, 727 FilePathName(v->mdfd_vfd), 728 nbytes, BLCKSZ), 729 errhint("Check free disk space."))); 730 } 731 732 if (!skipFsync && !SmgrIsTemp(reln)) 733 register_dirty_segment(reln, forknum, v); 734 } 735 736 /* 737 * mdnblocks() -- Get the number of blocks stored in a relation. 738 * 739 * Important side effect: all active segments of the relation are opened 740 * and added to the mdfd_seg_fds array. If this routine has not been 741 * called, then only segments up to the last one actually touched 742 * are present in the array. 743 */ 744 BlockNumber 745 mdnblocks(SMgrRelation reln, ForkNumber forknum) 746 { 747 MdfdVec *v = mdopen(reln, forknum, EXTENSION_FAIL); 748 BlockNumber nblocks; 749 BlockNumber segno = 0; 750 751 /* mdopen has opened the first segment */ 752 Assert(reln->md_num_open_segs[forknum] > 0); 753 754 /* 755 * Start from the last open segments, to avoid redundant seeks. We have 756 * previously verified that these segments are exactly RELSEG_SIZE long, 757 * and it's useless to recheck that each time. 758 * 759 * NOTE: this assumption could only be wrong if another backend has 760 * truncated the relation. We rely on higher code levels to handle that 761 * scenario by closing and re-opening the md fd, which is handled via 762 * relcache flush. (Since the checkpointer doesn't participate in 763 * relcache flush, it could have segment entries for inactive segments; 764 * that's OK because the checkpointer never needs to compute relation 765 * size.) 766 */ 767 segno = reln->md_num_open_segs[forknum] - 1; 768 v = &reln->md_seg_fds[forknum][segno]; 769 770 for (;;) 771 { 772 nblocks = _mdnblocks(reln, forknum, v); 773 if (nblocks > ((BlockNumber) RELSEG_SIZE)) 774 elog(FATAL, "segment too big"); 775 if (nblocks < ((BlockNumber) RELSEG_SIZE)) 776 return (segno * ((BlockNumber) RELSEG_SIZE)) + nblocks; 777 778 /* 779 * If segment is exactly RELSEG_SIZE, advance to next one. 780 */ 781 segno++; 782 783 /* 784 * We used to pass O_CREAT here, but that has the disadvantage that it 785 * might create a segment which has vanished through some operating 786 * system misadventure. In such a case, creating the segment here 787 * undermines _mdfd_getseg's attempts to notice and report an error 788 * upon access to a missing segment. 789 */ 790 v = _mdfd_openseg(reln, forknum, segno, 0); 791 if (v == NULL) 792 return segno * ((BlockNumber) RELSEG_SIZE); 793 } 794 } 795 796 /* 797 * mdtruncate() -- Truncate relation to specified number of blocks. 798 */ 799 void 800 mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks) 801 { 802 BlockNumber curnblk; 803 BlockNumber priorblocks; 804 int curopensegs; 805 806 /* 807 * NOTE: mdnblocks makes sure we have opened all active segments, so that 808 * truncation loop will get them all! 809 */ 810 curnblk = mdnblocks(reln, forknum); 811 if (nblocks > curnblk) 812 { 813 /* Bogus request ... but no complaint if InRecovery */ 814 if (InRecovery) 815 return; 816 ereport(ERROR, 817 (errmsg("could not truncate file \"%s\" to %u blocks: it's only %u blocks now", 818 relpath(reln->smgr_rnode, forknum), 819 nblocks, curnblk))); 820 } 821 if (nblocks == curnblk) 822 return; /* no work */ 823 824 /* 825 * Truncate segments, starting at the last one. Starting at the end makes 826 * managing the memory for the fd array easier, should there be errors. 827 */ 828 curopensegs = reln->md_num_open_segs[forknum]; 829 while (curopensegs > 0) 830 { 831 MdfdVec *v; 832 833 priorblocks = (curopensegs - 1) * RELSEG_SIZE; 834 835 v = &reln->md_seg_fds[forknum][curopensegs - 1]; 836 837 if (priorblocks > nblocks) 838 { 839 /* 840 * This segment is no longer active. We truncate the file, but do 841 * not delete it, for reasons explained in the header comments. 842 */ 843 if (FileTruncate(v->mdfd_vfd, 0, WAIT_EVENT_DATA_FILE_TRUNCATE) < 0) 844 ereport(ERROR, 845 (errcode_for_file_access(), 846 errmsg("could not truncate file \"%s\": %m", 847 FilePathName(v->mdfd_vfd)))); 848 849 if (!SmgrIsTemp(reln)) 850 register_dirty_segment(reln, forknum, v); 851 852 /* we never drop the 1st segment */ 853 Assert(v != &reln->md_seg_fds[forknum][0]); 854 855 FileClose(v->mdfd_vfd); 856 _fdvec_resize(reln, forknum, curopensegs - 1); 857 } 858 else if (priorblocks + ((BlockNumber) RELSEG_SIZE) > nblocks) 859 { 860 /* 861 * This is the last segment we want to keep. Truncate the file to 862 * the right length. NOTE: if nblocks is exactly a multiple K of 863 * RELSEG_SIZE, we will truncate the K+1st segment to 0 length but 864 * keep it. This adheres to the invariant given in the header 865 * comments. 866 */ 867 BlockNumber lastsegblocks = nblocks - priorblocks; 868 869 if (FileTruncate(v->mdfd_vfd, (off_t) lastsegblocks * BLCKSZ, WAIT_EVENT_DATA_FILE_TRUNCATE) < 0) 870 ereport(ERROR, 871 (errcode_for_file_access(), 872 errmsg("could not truncate file \"%s\" to %u blocks: %m", 873 FilePathName(v->mdfd_vfd), 874 nblocks))); 875 if (!SmgrIsTemp(reln)) 876 register_dirty_segment(reln, forknum, v); 877 } 878 else 879 { 880 /* 881 * We still need this segment, so nothing to do for this and any 882 * earlier segment. 883 */ 884 break; 885 } 886 curopensegs--; 887 } 888 } 889 890 /* 891 * mdimmedsync() -- Immediately sync a relation to stable storage. 892 * 893 * Note that only writes already issued are synced; this routine knows 894 * nothing of dirty buffers that may exist inside the buffer manager. 895 */ 896 void 897 mdimmedsync(SMgrRelation reln, ForkNumber forknum) 898 { 899 int segno; 900 901 /* 902 * NOTE: mdnblocks makes sure we have opened all active segments, so that 903 * fsync loop will get them all! 904 */ 905 mdnblocks(reln, forknum); 906 907 segno = reln->md_num_open_segs[forknum]; 908 909 while (segno > 0) 910 { 911 MdfdVec *v = &reln->md_seg_fds[forknum][segno - 1]; 912 913 if (FileSync(v->mdfd_vfd, WAIT_EVENT_DATA_FILE_IMMEDIATE_SYNC) < 0) 914 ereport(data_sync_elevel(ERROR), 915 (errcode_for_file_access(), 916 errmsg("could not fsync file \"%s\": %m", 917 FilePathName(v->mdfd_vfd)))); 918 segno--; 919 } 920 } 921 922 /* 923 * register_dirty_segment() -- Mark a relation segment as needing fsync 924 * 925 * If there is a local pending-ops table, just make an entry in it for 926 * ProcessSyncRequests to process later. Otherwise, try to pass off the 927 * fsync request to the checkpointer process. If that fails, just do the 928 * fsync locally before returning (we hope this will not happen often 929 * enough to be a performance problem). 930 */ 931 static void 932 register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg) 933 { 934 FileTag tag; 935 936 INIT_MD_FILETAG(tag, reln->smgr_rnode.node, forknum, seg->mdfd_segno); 937 938 /* Temp relations should never be fsync'd */ 939 Assert(!SmgrIsTemp(reln)); 940 941 if (!RegisterSyncRequest(&tag, SYNC_REQUEST, false /* retryOnError */ )) 942 { 943 ereport(DEBUG1, 944 (errmsg("could not forward fsync request because request queue is full"))); 945 946 if (FileSync(seg->mdfd_vfd, WAIT_EVENT_DATA_FILE_SYNC) < 0) 947 ereport(data_sync_elevel(ERROR), 948 (errcode_for_file_access(), 949 errmsg("could not fsync file \"%s\": %m", 950 FilePathName(seg->mdfd_vfd)))); 951 } 952 } 953 954 /* 955 * register_unlink_segment() -- Schedule a file to be deleted after next checkpoint 956 */ 957 static void 958 register_unlink_segment(RelFileNodeBackend rnode, ForkNumber forknum, 959 BlockNumber segno) 960 { 961 FileTag tag; 962 963 INIT_MD_FILETAG(tag, rnode.node, forknum, segno); 964 965 /* Should never be used with temp relations */ 966 Assert(!RelFileNodeBackendIsTemp(rnode)); 967 968 RegisterSyncRequest(&tag, SYNC_UNLINK_REQUEST, true /* retryOnError */ ); 969 } 970 971 /* 972 * register_forget_request() -- forget any fsyncs for a relation fork's segment 973 */ 974 static void 975 register_forget_request(RelFileNodeBackend rnode, ForkNumber forknum, 976 BlockNumber segno) 977 { 978 FileTag tag; 979 980 INIT_MD_FILETAG(tag, rnode.node, forknum, segno); 981 982 RegisterSyncRequest(&tag, SYNC_FORGET_REQUEST, true /* retryOnError */ ); 983 } 984 985 /* 986 * ForgetDatabaseSyncRequests -- forget any fsyncs and unlinks for a DB 987 */ 988 void 989 ForgetDatabaseSyncRequests(Oid dbid) 990 { 991 FileTag tag; 992 RelFileNode rnode; 993 994 rnode.dbNode = dbid; 995 rnode.spcNode = 0; 996 rnode.relNode = 0; 997 998 INIT_MD_FILETAG(tag, rnode, InvalidForkNumber, InvalidBlockNumber); 999 1000 RegisterSyncRequest(&tag, SYNC_FILTER_REQUEST, true /* retryOnError */ ); 1001 } 1002 1003 /* 1004 * DropRelationFiles -- drop files of all given relations 1005 */ 1006 void 1007 DropRelationFiles(RelFileNode *delrels, int ndelrels, bool isRedo) 1008 { 1009 SMgrRelation *srels; 1010 int i; 1011 1012 srels = palloc(sizeof(SMgrRelation) * ndelrels); 1013 for (i = 0; i < ndelrels; i++) 1014 { 1015 SMgrRelation srel = smgropen(delrels[i], InvalidBackendId); 1016 1017 if (isRedo) 1018 { 1019 ForkNumber fork; 1020 1021 for (fork = 0; fork <= MAX_FORKNUM; fork++) 1022 XLogDropRelation(delrels[i], fork); 1023 } 1024 srels[i] = srel; 1025 } 1026 1027 smgrdounlinkall(srels, ndelrels, isRedo); 1028 1029 for (i = 0; i < ndelrels; i++) 1030 smgrclose(srels[i]); 1031 pfree(srels); 1032 } 1033 1034 1035 /* 1036 * _fdvec_resize() -- Resize the fork's open segments array 1037 */ 1038 static void 1039 _fdvec_resize(SMgrRelation reln, 1040 ForkNumber forknum, 1041 int nseg) 1042 { 1043 if (nseg == 0) 1044 { 1045 if (reln->md_num_open_segs[forknum] > 0) 1046 { 1047 pfree(reln->md_seg_fds[forknum]); 1048 reln->md_seg_fds[forknum] = NULL; 1049 } 1050 } 1051 else if (reln->md_num_open_segs[forknum] == 0) 1052 { 1053 reln->md_seg_fds[forknum] = 1054 MemoryContextAlloc(MdCxt, sizeof(MdfdVec) * nseg); 1055 } 1056 else 1057 { 1058 /* 1059 * It doesn't seem worthwhile complicating the code to amortize 1060 * repalloc() calls. Those are far faster than PathNameOpenFile() or 1061 * FileClose(), and the memory context internally will sometimes avoid 1062 * doing an actual reallocation. 1063 */ 1064 reln->md_seg_fds[forknum] = 1065 repalloc(reln->md_seg_fds[forknum], 1066 sizeof(MdfdVec) * nseg); 1067 } 1068 1069 reln->md_num_open_segs[forknum] = nseg; 1070 } 1071 1072 /* 1073 * Return the filename for the specified segment of the relation. The 1074 * returned string is palloc'd. 1075 */ 1076 static char * 1077 _mdfd_segpath(SMgrRelation reln, ForkNumber forknum, BlockNumber segno) 1078 { 1079 char *path, 1080 *fullpath; 1081 1082 path = relpath(reln->smgr_rnode, forknum); 1083 1084 if (segno > 0) 1085 { 1086 fullpath = psprintf("%s.%u", path, segno); 1087 pfree(path); 1088 } 1089 else 1090 fullpath = path; 1091 1092 return fullpath; 1093 } 1094 1095 /* 1096 * Open the specified segment of the relation, 1097 * and make a MdfdVec object for it. Returns NULL on failure. 1098 */ 1099 static MdfdVec * 1100 _mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno, 1101 int oflags) 1102 { 1103 MdfdVec *v; 1104 int fd; 1105 char *fullpath; 1106 1107 fullpath = _mdfd_segpath(reln, forknum, segno); 1108 1109 /* open the file */ 1110 fd = PathNameOpenFile(fullpath, O_RDWR | PG_BINARY | oflags); 1111 1112 pfree(fullpath); 1113 1114 if (fd < 0) 1115 return NULL; 1116 1117 if (segno <= reln->md_num_open_segs[forknum]) 1118 _fdvec_resize(reln, forknum, segno + 1); 1119 1120 /* fill the entry */ 1121 v = &reln->md_seg_fds[forknum][segno]; 1122 v->mdfd_vfd = fd; 1123 v->mdfd_segno = segno; 1124 1125 Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE)); 1126 1127 /* all done */ 1128 return v; 1129 } 1130 1131 /* 1132 * _mdfd_getseg() -- Find the segment of the relation holding the 1133 * specified block. 1134 * 1135 * If the segment doesn't exist, we ereport, return NULL, or create the 1136 * segment, according to "behavior". Note: skipFsync is only used in the 1137 * EXTENSION_CREATE case. 1138 */ 1139 static MdfdVec * 1140 _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, 1141 bool skipFsync, int behavior) 1142 { 1143 MdfdVec *v; 1144 BlockNumber targetseg; 1145 BlockNumber nextsegno; 1146 1147 /* some way to handle non-existent segments needs to be specified */ 1148 Assert(behavior & 1149 (EXTENSION_FAIL | EXTENSION_CREATE | EXTENSION_RETURN_NULL)); 1150 1151 targetseg = blkno / ((BlockNumber) RELSEG_SIZE); 1152 1153 /* if an existing and opened segment, we're done */ 1154 if (targetseg < reln->md_num_open_segs[forknum]) 1155 { 1156 v = &reln->md_seg_fds[forknum][targetseg]; 1157 return v; 1158 } 1159 1160 /* 1161 * The target segment is not yet open. Iterate over all the segments 1162 * between the last opened and the target segment. This way missing 1163 * segments either raise an error, or get created (according to 1164 * 'behavior'). Start with either the last opened, or the first segment if 1165 * none was opened before. 1166 */ 1167 if (reln->md_num_open_segs[forknum] > 0) 1168 v = &reln->md_seg_fds[forknum][reln->md_num_open_segs[forknum] - 1]; 1169 else 1170 { 1171 v = mdopen(reln, forknum, behavior); 1172 if (!v) 1173 return NULL; /* if behavior & EXTENSION_RETURN_NULL */ 1174 } 1175 1176 for (nextsegno = reln->md_num_open_segs[forknum]; 1177 nextsegno <= targetseg; nextsegno++) 1178 { 1179 BlockNumber nblocks = _mdnblocks(reln, forknum, v); 1180 int flags = 0; 1181 1182 Assert(nextsegno == v->mdfd_segno + 1); 1183 1184 if (nblocks > ((BlockNumber) RELSEG_SIZE)) 1185 elog(FATAL, "segment too big"); 1186 1187 if ((behavior & EXTENSION_CREATE) || 1188 (InRecovery && (behavior & EXTENSION_CREATE_RECOVERY))) 1189 { 1190 /* 1191 * Normally we will create new segments only if authorized by the 1192 * caller (i.e., we are doing mdextend()). But when doing WAL 1193 * recovery, create segments anyway; this allows cases such as 1194 * replaying WAL data that has a write into a high-numbered 1195 * segment of a relation that was later deleted. We want to go 1196 * ahead and create the segments so we can finish out the replay. 1197 * However if the caller has specified 1198 * EXTENSION_REALLY_RETURN_NULL, then extension is not desired 1199 * even in recovery; we won't reach this point in that case. 1200 * 1201 * We have to maintain the invariant that segments before the last 1202 * active segment are of size RELSEG_SIZE; therefore, if 1203 * extending, pad them out with zeroes if needed. (This only 1204 * matters if in recovery, or if the caller is extending the 1205 * relation discontiguously, but that can happen in hash indexes.) 1206 */ 1207 if (nblocks < ((BlockNumber) RELSEG_SIZE)) 1208 { 1209 char *zerobuf = palloc0(BLCKSZ); 1210 1211 mdextend(reln, forknum, 1212 nextsegno * ((BlockNumber) RELSEG_SIZE) - 1, 1213 zerobuf, skipFsync); 1214 pfree(zerobuf); 1215 } 1216 flags = O_CREAT; 1217 } 1218 else if (!(behavior & EXTENSION_DONT_CHECK_SIZE) && 1219 nblocks < ((BlockNumber) RELSEG_SIZE)) 1220 { 1221 /* 1222 * When not extending (or explicitly including truncated 1223 * segments), only open the next segment if the current one is 1224 * exactly RELSEG_SIZE. If not (this branch), either return NULL 1225 * or fail. 1226 */ 1227 if (behavior & EXTENSION_RETURN_NULL) 1228 { 1229 /* 1230 * Some callers discern between reasons for _mdfd_getseg() 1231 * returning NULL based on errno. As there's no failing 1232 * syscall involved in this case, explicitly set errno to 1233 * ENOENT, as that seems the closest interpretation. 1234 */ 1235 errno = ENOENT; 1236 return NULL; 1237 } 1238 1239 ereport(ERROR, 1240 (errcode_for_file_access(), 1241 errmsg("could not open file \"%s\" (target block %u): previous segment is only %u blocks", 1242 _mdfd_segpath(reln, forknum, nextsegno), 1243 blkno, nblocks))); 1244 } 1245 1246 v = _mdfd_openseg(reln, forknum, nextsegno, flags); 1247 1248 if (v == NULL) 1249 { 1250 if ((behavior & EXTENSION_RETURN_NULL) && 1251 FILE_POSSIBLY_DELETED(errno)) 1252 return NULL; 1253 ereport(ERROR, 1254 (errcode_for_file_access(), 1255 errmsg("could not open file \"%s\" (target block %u): %m", 1256 _mdfd_segpath(reln, forknum, nextsegno), 1257 blkno))); 1258 } 1259 } 1260 1261 return v; 1262 } 1263 1264 /* 1265 * Get number of blocks present in a single disk file 1266 */ 1267 static BlockNumber 1268 _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg) 1269 { 1270 off_t len; 1271 1272 len = FileSize(seg->mdfd_vfd); 1273 if (len < 0) 1274 ereport(ERROR, 1275 (errcode_for_file_access(), 1276 errmsg("could not seek to end of file \"%s\": %m", 1277 FilePathName(seg->mdfd_vfd)))); 1278 /* note that this calculation will ignore any partial block at EOF */ 1279 return (BlockNumber) (len / BLCKSZ); 1280 } 1281 1282 /* 1283 * Sync a file to disk, given a file tag. Write the path into an output 1284 * buffer so the caller can use it in error messages. 1285 * 1286 * Return 0 on success, -1 on failure, with errno set. 1287 */ 1288 int 1289 mdsyncfiletag(const FileTag *ftag, char *path) 1290 { 1291 SMgrRelation reln = smgropen(ftag->rnode, InvalidBackendId); 1292 File file; 1293 bool need_to_close; 1294 int result, 1295 save_errno; 1296 1297 /* See if we already have the file open, or need to open it. */ 1298 if (ftag->segno < reln->md_num_open_segs[ftag->forknum]) 1299 { 1300 file = reln->md_seg_fds[ftag->forknum][ftag->segno].mdfd_vfd; 1301 strlcpy(path, FilePathName(file), MAXPGPATH); 1302 need_to_close = false; 1303 } 1304 else 1305 { 1306 char *p; 1307 1308 p = _mdfd_segpath(reln, ftag->forknum, ftag->segno); 1309 strlcpy(path, p, MAXPGPATH); 1310 pfree(p); 1311 1312 file = PathNameOpenFile(path, O_RDWR | PG_BINARY); 1313 if (file < 0) 1314 return -1; 1315 need_to_close = true; 1316 } 1317 1318 /* Sync the file. */ 1319 result = FileSync(file, WAIT_EVENT_DATA_FILE_SYNC); 1320 save_errno = errno; 1321 1322 if (need_to_close) 1323 FileClose(file); 1324 1325 errno = save_errno; 1326 return result; 1327 } 1328 1329 /* 1330 * Unlink a file, given a file tag. Write the path into an output 1331 * buffer so the caller can use it in error messages. 1332 * 1333 * Return 0 on success, -1 on failure, with errno set. 1334 */ 1335 int 1336 mdunlinkfiletag(const FileTag *ftag, char *path) 1337 { 1338 char *p; 1339 1340 /* Compute the path. */ 1341 p = relpathperm(ftag->rnode, MAIN_FORKNUM); 1342 strlcpy(path, p, MAXPGPATH); 1343 pfree(p); 1344 1345 /* Try to unlink the file. */ 1346 return unlink(path); 1347 } 1348 1349 /* 1350 * Check if a given candidate request matches a given tag, when processing 1351 * a SYNC_FILTER_REQUEST request. This will be called for all pending 1352 * requests to find out whether to forget them. 1353 */ 1354 bool 1355 mdfiletagmatches(const FileTag *ftag, const FileTag *candidate) 1356 { 1357 /* 1358 * For now we only use filter requests as a way to drop all scheduled 1359 * callbacks relating to a given database, when dropping the database. 1360 * We'll return true for all candidates that have the same database OID as 1361 * the ftag from the SYNC_FILTER_REQUEST request, so they're forgotten. 1362 */ 1363 return ftag->rnode.dbNode == candidate->rnode.dbNode; 1364 } 1365