1 /*-------------------------------------------------------------------------
2 *
3 * smgr.c
4 * public interface routines to storage manager switch.
5 *
6 * All file system operations in POSTGRES dispatch through these
7 * routines.
8 *
9 * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
10 * Portions Copyright (c) 1994, Regents of the University of California
11 *
12 *
13 * IDENTIFICATION
14 * src/backend/storage/smgr/smgr.c
15 *
16 *-------------------------------------------------------------------------
17 */
18 #include "postgres.h"
19
20 #include "commands/tablespace.h"
21 #include "lib/ilist.h"
22 #include "storage/bufmgr.h"
23 #include "storage/ipc.h"
24 #include "storage/smgr.h"
25 #include "utils/hsearch.h"
26 #include "utils/inval.h"
27
28
29 /*
30 * This struct of function pointers defines the API between smgr.c and
31 * any individual storage manager module. Note that smgr subfunctions are
32 * generally expected to report problems via elog(ERROR). An exception is
33 * that smgr_unlink should use elog(WARNING), rather than erroring out,
34 * because we normally unlink relations during post-commit/abort cleanup,
35 * and so it's too late to raise an error. Also, various conditions that
36 * would normally be errors should be allowed during bootstrap and/or WAL
37 * recovery --- see comments in md.c for details.
38 */
39 typedef struct f_smgr
40 {
41 void (*smgr_init) (void); /* may be NULL */
42 void (*smgr_shutdown) (void); /* may be NULL */
43 void (*smgr_close) (SMgrRelation reln, ForkNumber forknum);
44 void (*smgr_create) (SMgrRelation reln, ForkNumber forknum,
45 bool isRedo);
46 bool (*smgr_exists) (SMgrRelation reln, ForkNumber forknum);
47 void (*smgr_unlink) (RelFileNodeBackend rnode, ForkNumber forknum,
48 bool isRedo);
49 void (*smgr_extend) (SMgrRelation reln, ForkNumber forknum,
50 BlockNumber blocknum, char *buffer, bool skipFsync);
51 void (*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum,
52 BlockNumber blocknum);
53 void (*smgr_read) (SMgrRelation reln, ForkNumber forknum,
54 BlockNumber blocknum, char *buffer);
55 void (*smgr_write) (SMgrRelation reln, ForkNumber forknum,
56 BlockNumber blocknum, char *buffer, bool skipFsync);
57 void (*smgr_writeback) (SMgrRelation reln, ForkNumber forknum,
58 BlockNumber blocknum, BlockNumber nblocks);
59 BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum);
60 void (*smgr_truncate) (SMgrRelation reln, ForkNumber forknum,
61 BlockNumber nblocks);
62 void (*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum);
63 void (*smgr_pre_ckpt) (void); /* may be NULL */
64 void (*smgr_sync) (void); /* may be NULL */
65 void (*smgr_post_ckpt) (void); /* may be NULL */
66 } f_smgr;
67
68
69 static const f_smgr smgrsw[] = {
70 /* magnetic disk */
71 {mdinit, NULL, mdclose, mdcreate, mdexists, mdunlink, mdextend,
72 mdprefetch, mdread, mdwrite, mdwriteback, mdnblocks, mdtruncate,
73 mdimmedsync, mdpreckpt, mdsync, mdpostckpt
74 }
75 };
76
77 static const int NSmgr = lengthof(smgrsw);
78
79
80 /*
81 * Each backend has a hashtable that stores all extant SMgrRelation objects.
82 * In addition, "unowned" SMgrRelation objects are chained together in a list.
83 */
84 static HTAB *SMgrRelationHash = NULL;
85
86 static dlist_head unowned_relns;
87
88 /* local function prototypes */
89 static void smgrshutdown(int code, Datum arg);
90
91
92 /*
93 * smgrinit(), smgrshutdown() -- Initialize or shut down storage
94 * managers.
95 *
96 * Note: smgrinit is called during backend startup (normal or standalone
97 * case), *not* during postmaster start. Therefore, any resources created
98 * here or destroyed in smgrshutdown are backend-local.
99 */
100 void
smgrinit(void)101 smgrinit(void)
102 {
103 int i;
104
105 for (i = 0; i < NSmgr; i++)
106 {
107 if (smgrsw[i].smgr_init)
108 (*(smgrsw[i].smgr_init)) ();
109 }
110
111 /* register the shutdown proc */
112 on_proc_exit(smgrshutdown, 0);
113 }
114
115 /*
116 * on_proc_exit hook for smgr cleanup during backend shutdown
117 */
118 static void
smgrshutdown(int code,Datum arg)119 smgrshutdown(int code, Datum arg)
120 {
121 int i;
122
123 for (i = 0; i < NSmgr; i++)
124 {
125 if (smgrsw[i].smgr_shutdown)
126 (*(smgrsw[i].smgr_shutdown)) ();
127 }
128 }
129
130 /*
131 * smgropen() -- Return an SMgrRelation object, creating it if need be.
132 *
133 * This does not attempt to actually open the underlying file.
134 */
135 SMgrRelation
smgropen(RelFileNode rnode,BackendId backend)136 smgropen(RelFileNode rnode, BackendId backend)
137 {
138 RelFileNodeBackend brnode;
139 SMgrRelation reln;
140 bool found;
141
142 if (SMgrRelationHash == NULL)
143 {
144 /* First time through: initialize the hash table */
145 HASHCTL ctl;
146
147 MemSet(&ctl, 0, sizeof(ctl));
148 ctl.keysize = sizeof(RelFileNodeBackend);
149 ctl.entrysize = sizeof(SMgrRelationData);
150 SMgrRelationHash = hash_create("smgr relation table", 400,
151 &ctl, HASH_ELEM | HASH_BLOBS);
152 dlist_init(&unowned_relns);
153 }
154
155 /* Look up or create an entry */
156 brnode.node = rnode;
157 brnode.backend = backend;
158 reln = (SMgrRelation) hash_search(SMgrRelationHash,
159 (void *) &brnode,
160 HASH_ENTER, &found);
161
162 /* Initialize it if not present before */
163 if (!found)
164 {
165 int forknum;
166
167 /* hash_search already filled in the lookup key */
168 reln->smgr_owner = NULL;
169 reln->smgr_targblock = InvalidBlockNumber;
170 reln->smgr_fsm_nblocks = InvalidBlockNumber;
171 reln->smgr_vm_nblocks = InvalidBlockNumber;
172 reln->smgr_which = 0; /* we only have md.c at present */
173
174 /* mark it not open */
175 for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
176 reln->md_fd[forknum] = NULL;
177
178 /* it has no owner yet */
179 dlist_push_tail(&unowned_relns, &reln->node);
180 }
181
182 return reln;
183 }
184
185 /*
186 * smgrsetowner() -- Establish a long-lived reference to an SMgrRelation object
187 *
188 * There can be only one owner at a time; this is sufficient since currently
189 * the only such owners exist in the relcache.
190 */
191 void
smgrsetowner(SMgrRelation * owner,SMgrRelation reln)192 smgrsetowner(SMgrRelation *owner, SMgrRelation reln)
193 {
194 /* We don't support "disowning" an SMgrRelation here, use smgrclearowner */
195 Assert(owner != NULL);
196
197 /*
198 * First, unhook any old owner. (Normally there shouldn't be any, but it
199 * seems possible that this can happen during swap_relation_files()
200 * depending on the order of processing. It's ok to close the old
201 * relcache entry early in that case.)
202 *
203 * If there isn't an old owner, then the reln should be in the unowned
204 * list, and we need to remove it.
205 */
206 if (reln->smgr_owner)
207 *(reln->smgr_owner) = NULL;
208 else
209 dlist_delete(&reln->node);
210
211 /* Now establish the ownership relationship. */
212 reln->smgr_owner = owner;
213 *owner = reln;
214 }
215
216 /*
217 * smgrclearowner() -- Remove long-lived reference to an SMgrRelation object
218 * if one exists
219 */
220 void
smgrclearowner(SMgrRelation * owner,SMgrRelation reln)221 smgrclearowner(SMgrRelation *owner, SMgrRelation reln)
222 {
223 /* Do nothing if the SMgrRelation object is not owned by the owner */
224 if (reln->smgr_owner != owner)
225 return;
226
227 /* unset the owner's reference */
228 *owner = NULL;
229
230 /* unset our reference to the owner */
231 reln->smgr_owner = NULL;
232
233 /* add to list of unowned relations */
234 dlist_push_tail(&unowned_relns, &reln->node);
235 }
236
237 /*
238 * smgrexists() -- Does the underlying file for a fork exist?
239 */
240 bool
smgrexists(SMgrRelation reln,ForkNumber forknum)241 smgrexists(SMgrRelation reln, ForkNumber forknum)
242 {
243 return (*(smgrsw[reln->smgr_which].smgr_exists)) (reln, forknum);
244 }
245
246 /*
247 * smgrclose() -- Close and delete an SMgrRelation object.
248 */
249 void
smgrclose(SMgrRelation reln)250 smgrclose(SMgrRelation reln)
251 {
252 SMgrRelation *owner;
253 ForkNumber forknum;
254
255 for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
256 (*(smgrsw[reln->smgr_which].smgr_close)) (reln, forknum);
257
258 owner = reln->smgr_owner;
259
260 if (!owner)
261 dlist_delete(&reln->node);
262
263 if (hash_search(SMgrRelationHash,
264 (void *) &(reln->smgr_rnode),
265 HASH_REMOVE, NULL) == NULL)
266 elog(ERROR, "SMgrRelation hashtable corrupted");
267
268 /*
269 * Unhook the owner pointer, if any. We do this last since in the remote
270 * possibility of failure above, the SMgrRelation object will still exist.
271 */
272 if (owner)
273 *owner = NULL;
274 }
275
276 /*
277 * smgrcloseall() -- Close all existing SMgrRelation objects.
278 */
279 void
smgrcloseall(void)280 smgrcloseall(void)
281 {
282 HASH_SEQ_STATUS status;
283 SMgrRelation reln;
284
285 /* Nothing to do if hashtable not set up */
286 if (SMgrRelationHash == NULL)
287 return;
288
289 hash_seq_init(&status, SMgrRelationHash);
290
291 while ((reln = (SMgrRelation) hash_seq_search(&status)) != NULL)
292 smgrclose(reln);
293 }
294
295 /*
296 * smgrclosenode() -- Close SMgrRelation object for given RelFileNode,
297 * if one exists.
298 *
299 * This has the same effects as smgrclose(smgropen(rnode)), but it avoids
300 * uselessly creating a hashtable entry only to drop it again when no
301 * such entry exists already.
302 */
303 void
smgrclosenode(RelFileNodeBackend rnode)304 smgrclosenode(RelFileNodeBackend rnode)
305 {
306 SMgrRelation reln;
307
308 /* Nothing to do if hashtable not set up */
309 if (SMgrRelationHash == NULL)
310 return;
311
312 reln = (SMgrRelation) hash_search(SMgrRelationHash,
313 (void *) &rnode,
314 HASH_FIND, NULL);
315 if (reln != NULL)
316 smgrclose(reln);
317 }
318
319 /*
320 * smgrcreate() -- Create a new relation.
321 *
322 * Given an already-created (but presumably unused) SMgrRelation,
323 * cause the underlying disk file or other storage for the fork
324 * to be created.
325 *
326 * If isRedo is true, it is okay for the underlying file to exist
327 * already because we are in a WAL replay sequence.
328 */
329 void
smgrcreate(SMgrRelation reln,ForkNumber forknum,bool isRedo)330 smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
331 {
332 /*
333 * Exit quickly in WAL replay mode if we've already opened the file. If
334 * it's open, it surely must exist.
335 */
336 if (isRedo && reln->md_fd[forknum] != NULL)
337 return;
338
339 /*
340 * We may be using the target table space for the first time in this
341 * database, so create a per-database subdirectory if needed.
342 *
343 * XXX this is a fairly ugly violation of module layering, but this seems
344 * to be the best place to put the check. Maybe TablespaceCreateDbspace
345 * should be here and not in commands/tablespace.c? But that would imply
346 * importing a lot of stuff that smgr.c oughtn't know, either.
347 */
348 TablespaceCreateDbspace(reln->smgr_rnode.node.spcNode,
349 reln->smgr_rnode.node.dbNode,
350 isRedo);
351
352 (*(smgrsw[reln->smgr_which].smgr_create)) (reln, forknum, isRedo);
353 }
354
355 /*
356 * smgrdounlink() -- Immediately unlink all forks of a relation.
357 *
358 * All forks of the relation are removed from the store. This should
359 * not be used during transactional operations, since it can't be undone.
360 *
361 * If isRedo is true, it is okay for the underlying file(s) to be gone
362 * already.
363 *
364 * This is equivalent to calling smgrdounlinkfork for each fork, but
365 * it's significantly quicker so should be preferred when possible.
366 */
367 void
smgrdounlink(SMgrRelation reln,bool isRedo)368 smgrdounlink(SMgrRelation reln, bool isRedo)
369 {
370 RelFileNodeBackend rnode = reln->smgr_rnode;
371 int which = reln->smgr_which;
372 ForkNumber forknum;
373
374 /* Close the forks at smgr level */
375 for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
376 (*(smgrsw[which].smgr_close)) (reln, forknum);
377
378 /*
379 * Get rid of any remaining buffers for the relation. bufmgr will just
380 * drop them without bothering to write the contents.
381 */
382 DropRelFileNodesAllBuffers(&rnode, 1);
383
384 /*
385 * It'd be nice to tell the stats collector to forget it immediately, too.
386 * But we can't because we don't know the OID (and in cases involving
387 * relfilenode swaps, it's not always clear which table OID to forget,
388 * anyway).
389 */
390
391 /*
392 * Send a shared-inval message to force other backends to close any
393 * dangling smgr references they may have for this rel. We should do this
394 * before starting the actual unlinking, in case we fail partway through
395 * that step. Note that the sinval message will eventually come back to
396 * this backend, too, and thereby provide a backstop that we closed our
397 * own smgr rel.
398 */
399 CacheInvalidateSmgr(rnode);
400
401 /*
402 * Delete the physical file(s).
403 *
404 * Note: smgr_unlink must treat deletion failure as a WARNING, not an
405 * ERROR, because we've already decided to commit or abort the current
406 * xact.
407 */
408 (*(smgrsw[which].smgr_unlink)) (rnode, InvalidForkNumber, isRedo);
409 }
410
411 /*
412 * smgrdounlinkall() -- Immediately unlink all forks of all given relations
413 *
414 * All forks of all given relations are removed from the store. This
415 * should not be used during transactional operations, since it can't be
416 * undone.
417 *
418 * If isRedo is true, it is okay for the underlying file(s) to be gone
419 * already.
420 *
421 * This is equivalent to calling smgrdounlink for each relation, but it's
422 * significantly quicker so should be preferred when possible.
423 */
424 void
smgrdounlinkall(SMgrRelation * rels,int nrels,bool isRedo)425 smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
426 {
427 int i = 0;
428 RelFileNodeBackend *rnodes;
429 ForkNumber forknum;
430
431 if (nrels == 0)
432 return;
433
434 /*
435 * create an array which contains all relations to be dropped, and close
436 * each relation's forks at the smgr level while at it
437 */
438 rnodes = palloc(sizeof(RelFileNodeBackend) * nrels);
439 for (i = 0; i < nrels; i++)
440 {
441 RelFileNodeBackend rnode = rels[i]->smgr_rnode;
442 int which = rels[i]->smgr_which;
443
444 rnodes[i] = rnode;
445
446 /* Close the forks at smgr level */
447 for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
448 (*(smgrsw[which].smgr_close)) (rels[i], forknum);
449 }
450
451 /*
452 * Get rid of any remaining buffers for the relations. bufmgr will just
453 * drop them without bothering to write the contents.
454 */
455 DropRelFileNodesAllBuffers(rnodes, nrels);
456
457 /*
458 * It'd be nice to tell the stats collector to forget them immediately,
459 * too. But we can't because we don't know the OIDs.
460 */
461
462 /*
463 * Send a shared-inval message to force other backends to close any
464 * dangling smgr references they may have for these rels. We should do
465 * this before starting the actual unlinking, in case we fail partway
466 * through that step. Note that the sinval messages will eventually come
467 * back to this backend, too, and thereby provide a backstop that we
468 * closed our own smgr rel.
469 */
470 for (i = 0; i < nrels; i++)
471 CacheInvalidateSmgr(rnodes[i]);
472
473 /*
474 * Delete the physical file(s).
475 *
476 * Note: smgr_unlink must treat deletion failure as a WARNING, not an
477 * ERROR, because we've already decided to commit or abort the current
478 * xact.
479 */
480
481 for (i = 0; i < nrels; i++)
482 {
483 int which = rels[i]->smgr_which;
484
485 for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
486 (*(smgrsw[which].smgr_unlink)) (rnodes[i], forknum, isRedo);
487 }
488
489 pfree(rnodes);
490 }
491
492 /*
493 * smgrdounlinkfork() -- Immediately unlink one fork of a relation.
494 *
495 * The specified fork of the relation is removed from the store. This
496 * should not be used during transactional operations, since it can't be
497 * undone.
498 *
499 * If isRedo is true, it is okay for the underlying file to be gone
500 * already.
501 */
502 void
smgrdounlinkfork(SMgrRelation reln,ForkNumber forknum,bool isRedo)503 smgrdounlinkfork(SMgrRelation reln, ForkNumber forknum, bool isRedo)
504 {
505 RelFileNodeBackend rnode = reln->smgr_rnode;
506 int which = reln->smgr_which;
507
508 /* Close the fork at smgr level */
509 (*(smgrsw[which].smgr_close)) (reln, forknum);
510
511 /*
512 * Get rid of any remaining buffers for the fork. bufmgr will just drop
513 * them without bothering to write the contents.
514 */
515 DropRelFileNodeBuffers(rnode, forknum, 0);
516
517 /*
518 * It'd be nice to tell the stats collector to forget it immediately, too.
519 * But we can't because we don't know the OID (and in cases involving
520 * relfilenode swaps, it's not always clear which table OID to forget,
521 * anyway).
522 */
523
524 /*
525 * Send a shared-inval message to force other backends to close any
526 * dangling smgr references they may have for this rel. We should do this
527 * before starting the actual unlinking, in case we fail partway through
528 * that step. Note that the sinval message will eventually come back to
529 * this backend, too, and thereby provide a backstop that we closed our
530 * own smgr rel.
531 */
532 CacheInvalidateSmgr(rnode);
533
534 /*
535 * Delete the physical file(s).
536 *
537 * Note: smgr_unlink must treat deletion failure as a WARNING, not an
538 * ERROR, because we've already decided to commit or abort the current
539 * xact.
540 */
541 (*(smgrsw[which].smgr_unlink)) (rnode, forknum, isRedo);
542 }
543
544 /*
545 * smgrextend() -- Add a new block to a file.
546 *
547 * The semantics are nearly the same as smgrwrite(): write at the
548 * specified position. However, this is to be used for the case of
549 * extending a relation (i.e., blocknum is at or beyond the current
550 * EOF). Note that we assume writing a block beyond current EOF
551 * causes intervening file space to become filled with zeroes.
552 */
553 void
smgrextend(SMgrRelation reln,ForkNumber forknum,BlockNumber blocknum,char * buffer,bool skipFsync)554 smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
555 char *buffer, bool skipFsync)
556 {
557 (*(smgrsw[reln->smgr_which].smgr_extend)) (reln, forknum, blocknum,
558 buffer, skipFsync);
559 }
560
561 /*
562 * smgrprefetch() -- Initiate asynchronous read of the specified block of a relation.
563 */
564 void
smgrprefetch(SMgrRelation reln,ForkNumber forknum,BlockNumber blocknum)565 smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
566 {
567 (*(smgrsw[reln->smgr_which].smgr_prefetch)) (reln, forknum, blocknum);
568 }
569
570 /*
571 * smgrread() -- read a particular block from a relation into the supplied
572 * buffer.
573 *
574 * This routine is called from the buffer manager in order to
575 * instantiate pages in the shared buffer cache. All storage managers
576 * return pages in the format that POSTGRES expects.
577 */
578 void
smgrread(SMgrRelation reln,ForkNumber forknum,BlockNumber blocknum,char * buffer)579 smgrread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
580 char *buffer)
581 {
582 (*(smgrsw[reln->smgr_which].smgr_read)) (reln, forknum, blocknum, buffer);
583 }
584
585 /*
586 * smgrwrite() -- Write the supplied buffer out.
587 *
588 * This is to be used only for updating already-existing blocks of a
589 * relation (ie, those before the current EOF). To extend a relation,
590 * use smgrextend().
591 *
592 * This is not a synchronous write -- the block is not necessarily
593 * on disk at return, only dumped out to the kernel. However,
594 * provisions will be made to fsync the write before the next checkpoint.
595 *
596 * skipFsync indicates that the caller will make other provisions to
597 * fsync the relation, so we needn't bother. Temporary relations also
598 * do not require fsync.
599 */
600 void
smgrwrite(SMgrRelation reln,ForkNumber forknum,BlockNumber blocknum,char * buffer,bool skipFsync)601 smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
602 char *buffer, bool skipFsync)
603 {
604 (*(smgrsw[reln->smgr_which].smgr_write)) (reln, forknum, blocknum,
605 buffer, skipFsync);
606 }
607
608
609 /*
610 * smgrwriteback() -- Trigger kernel writeback for the supplied range of
611 * blocks.
612 */
613 void
smgrwriteback(SMgrRelation reln,ForkNumber forknum,BlockNumber blocknum,BlockNumber nblocks)614 smgrwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
615 BlockNumber nblocks)
616 {
617 (*(smgrsw[reln->smgr_which].smgr_writeback)) (reln, forknum, blocknum,
618 nblocks);
619 }
620
621 /*
622 * smgrnblocks() -- Calculate the number of blocks in the
623 * supplied relation.
624 */
625 BlockNumber
smgrnblocks(SMgrRelation reln,ForkNumber forknum)626 smgrnblocks(SMgrRelation reln, ForkNumber forknum)
627 {
628 return (*(smgrsw[reln->smgr_which].smgr_nblocks)) (reln, forknum);
629 }
630
631 /*
632 * smgrtruncate() -- Truncate supplied relation to the specified number
633 * of blocks
634 *
635 * The truncation is done immediately, so this can't be rolled back.
636 */
637 void
smgrtruncate(SMgrRelation reln,ForkNumber forknum,BlockNumber nblocks)638 smgrtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
639 {
640 /*
641 * Get rid of any buffers for the about-to-be-deleted blocks. bufmgr will
642 * just drop them without bothering to write the contents.
643 */
644 DropRelFileNodeBuffers(reln->smgr_rnode, forknum, nblocks);
645
646 /*
647 * Send a shared-inval message to force other backends to close any smgr
648 * references they may have for this rel. This is useful because they
649 * might have open file pointers to segments that got removed, and/or
650 * smgr_targblock variables pointing past the new rel end. (The inval
651 * message will come back to our backend, too, causing a
652 * probably-unnecessary local smgr flush. But we don't expect that this
653 * is a performance-critical path.) As in the unlink code, we want to be
654 * sure the message is sent before we start changing things on-disk.
655 */
656 CacheInvalidateSmgr(reln->smgr_rnode);
657
658 /*
659 * Do the truncation.
660 */
661 (*(smgrsw[reln->smgr_which].smgr_truncate)) (reln, forknum, nblocks);
662 }
663
664 /*
665 * smgrimmedsync() -- Force the specified relation to stable storage.
666 *
667 * Synchronously force all previous writes to the specified relation
668 * down to disk.
669 *
670 * This is useful for building completely new relations (eg, new
671 * indexes). Instead of incrementally WAL-logging the index build
672 * steps, we can just write completed index pages to disk with smgrwrite
673 * or smgrextend, and then fsync the completed index file before
674 * committing the transaction. (This is sufficient for purposes of
675 * crash recovery, since it effectively duplicates forcing a checkpoint
676 * for the completed index. But it is *not* sufficient if one wishes
677 * to use the WAL log for PITR or replication purposes: in that case
678 * we have to make WAL entries as well.)
679 *
680 * The preceding writes should specify skipFsync = true to avoid
681 * duplicative fsyncs.
682 *
683 * Note that you need to do FlushRelationBuffers() first if there is
684 * any possibility that there are dirty buffers for the relation;
685 * otherwise the sync is not very meaningful.
686 */
687 void
smgrimmedsync(SMgrRelation reln,ForkNumber forknum)688 smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
689 {
690 (*(smgrsw[reln->smgr_which].smgr_immedsync)) (reln, forknum);
691 }
692
693
694 /*
695 * smgrpreckpt() -- Prepare for checkpoint.
696 */
697 void
smgrpreckpt(void)698 smgrpreckpt(void)
699 {
700 int i;
701
702 for (i = 0; i < NSmgr; i++)
703 {
704 if (smgrsw[i].smgr_pre_ckpt)
705 (*(smgrsw[i].smgr_pre_ckpt)) ();
706 }
707 }
708
709 /*
710 * smgrsync() -- Sync files to disk during checkpoint.
711 */
712 void
smgrsync(void)713 smgrsync(void)
714 {
715 int i;
716
717 for (i = 0; i < NSmgr; i++)
718 {
719 if (smgrsw[i].smgr_sync)
720 (*(smgrsw[i].smgr_sync)) ();
721 }
722 }
723
724 /*
725 * smgrpostckpt() -- Post-checkpoint cleanup.
726 */
727 void
smgrpostckpt(void)728 smgrpostckpt(void)
729 {
730 int i;
731
732 for (i = 0; i < NSmgr; i++)
733 {
734 if (smgrsw[i].smgr_post_ckpt)
735 (*(smgrsw[i].smgr_post_ckpt)) ();
736 }
737 }
738
739 /*
740 * AtEOXact_SMgr
741 *
742 * This routine is called during transaction commit or abort (it doesn't
743 * particularly care which). All transient SMgrRelation objects are closed.
744 *
745 * We do this as a compromise between wanting transient SMgrRelations to
746 * live awhile (to amortize the costs of blind writes of multiple blocks)
747 * and needing them to not live forever (since we're probably holding open
748 * a kernel file descriptor for the underlying file, and we need to ensure
749 * that gets closed reasonably soon if the file gets deleted).
750 */
751 void
AtEOXact_SMgr(void)752 AtEOXact_SMgr(void)
753 {
754 dlist_mutable_iter iter;
755
756 /*
757 * Zap all unowned SMgrRelations. We rely on smgrclose() to remove each
758 * one from the list.
759 */
760 dlist_foreach_modify(iter, &unowned_relns)
761 {
762 SMgrRelation rel = dlist_container(SMgrRelationData, node,
763 iter.cur);
764
765 Assert(rel->smgr_owner == NULL);
766
767 smgrclose(rel);
768 }
769 }
770