1 /*-------------------------------------------------------------------------
2  *
3  * smgr.c
4  *	  public interface routines to storage manager switch.
5  *
6  *	  All file system operations in POSTGRES dispatch through these
7  *	  routines.
8  *
9  * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
10  * Portions Copyright (c) 1994, Regents of the University of California
11  *
12  *
13  * IDENTIFICATION
14  *	  src/backend/storage/smgr/smgr.c
15  *
16  *-------------------------------------------------------------------------
17  */
18 #include "postgres.h"
19 
20 #include "commands/tablespace.h"
21 #include "lib/ilist.h"
22 #include "storage/bufmgr.h"
23 #include "storage/ipc.h"
24 #include "storage/smgr.h"
25 #include "utils/hsearch.h"
26 #include "utils/inval.h"
27 
28 
29 /*
30  * This struct of function pointers defines the API between smgr.c and
31  * any individual storage manager module.  Note that smgr subfunctions are
32  * generally expected to report problems via elog(ERROR).  An exception is
33  * that smgr_unlink should use elog(WARNING), rather than erroring out,
34  * because we normally unlink relations during post-commit/abort cleanup,
35  * and so it's too late to raise an error.  Also, various conditions that
36  * would normally be errors should be allowed during bootstrap and/or WAL
37  * recovery --- see comments in md.c for details.
38  */
39 typedef struct f_smgr
40 {
41 	void		(*smgr_init) (void);	/* may be NULL */
42 	void		(*smgr_shutdown) (void);		/* may be NULL */
43 	void		(*smgr_close) (SMgrRelation reln, ForkNumber forknum);
44 	void		(*smgr_create) (SMgrRelation reln, ForkNumber forknum,
45 											bool isRedo);
46 	bool		(*smgr_exists) (SMgrRelation reln, ForkNumber forknum);
47 	void		(*smgr_unlink) (RelFileNodeBackend rnode, ForkNumber forknum,
48 											bool isRedo);
49 	void		(*smgr_extend) (SMgrRelation reln, ForkNumber forknum,
50 						 BlockNumber blocknum, char *buffer, bool skipFsync);
51 	void		(*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum,
52 											  BlockNumber blocknum);
53 	void		(*smgr_read) (SMgrRelation reln, ForkNumber forknum,
54 										  BlockNumber blocknum, char *buffer);
55 	void		(*smgr_write) (SMgrRelation reln, ForkNumber forknum,
56 						 BlockNumber blocknum, char *buffer, bool skipFsync);
57 	void		(*smgr_writeback) (SMgrRelation reln, ForkNumber forknum,
58 								  BlockNumber blocknum, BlockNumber nblocks);
59 	BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum);
60 	void		(*smgr_truncate) (SMgrRelation reln, ForkNumber forknum,
61 											  BlockNumber nblocks);
62 	void		(*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum);
63 	void		(*smgr_pre_ckpt) (void);		/* may be NULL */
64 	void		(*smgr_sync) (void);	/* may be NULL */
65 	void		(*smgr_post_ckpt) (void);		/* may be NULL */
66 } f_smgr;
67 
68 
69 static const f_smgr smgrsw[] = {
70 	/* magnetic disk */
71 	{mdinit, NULL, mdclose, mdcreate, mdexists, mdunlink, mdextend,
72 		mdprefetch, mdread, mdwrite, mdwriteback, mdnblocks, mdtruncate,
73 		mdimmedsync, mdpreckpt, mdsync, mdpostckpt
74 	}
75 };
76 
77 static const int NSmgr = lengthof(smgrsw);
78 
79 
80 /*
81  * Each backend has a hashtable that stores all extant SMgrRelation objects.
82  * In addition, "unowned" SMgrRelation objects are chained together in a list.
83  */
84 static HTAB *SMgrRelationHash = NULL;
85 
86 static dlist_head	unowned_relns;
87 
88 /* local function prototypes */
89 static void smgrshutdown(int code, Datum arg);
90 
91 
92 /*
93  *	smgrinit(), smgrshutdown() -- Initialize or shut down storage
94  *								  managers.
95  *
96  * Note: smgrinit is called during backend startup (normal or standalone
97  * case), *not* during postmaster start.  Therefore, any resources created
98  * here or destroyed in smgrshutdown are backend-local.
99  */
100 void
smgrinit(void)101 smgrinit(void)
102 {
103 	int			i;
104 
105 	for (i = 0; i < NSmgr; i++)
106 	{
107 		if (smgrsw[i].smgr_init)
108 			(*(smgrsw[i].smgr_init)) ();
109 	}
110 
111 	/* register the shutdown proc */
112 	on_proc_exit(smgrshutdown, 0);
113 }
114 
115 /*
116  * on_proc_exit hook for smgr cleanup during backend shutdown
117  */
118 static void
smgrshutdown(int code,Datum arg)119 smgrshutdown(int code, Datum arg)
120 {
121 	int			i;
122 
123 	for (i = 0; i < NSmgr; i++)
124 	{
125 		if (smgrsw[i].smgr_shutdown)
126 			(*(smgrsw[i].smgr_shutdown)) ();
127 	}
128 }
129 
130 /*
131  *	smgropen() -- Return an SMgrRelation object, creating it if need be.
132  *
133  *		This does not attempt to actually open the underlying file.
134  */
135 SMgrRelation
smgropen(RelFileNode rnode,BackendId backend)136 smgropen(RelFileNode rnode, BackendId backend)
137 {
138 	RelFileNodeBackend brnode;
139 	SMgrRelation reln;
140 	bool		found;
141 
142 	if (SMgrRelationHash == NULL)
143 	{
144 		/* First time through: initialize the hash table */
145 		HASHCTL		ctl;
146 
147 		MemSet(&ctl, 0, sizeof(ctl));
148 		ctl.keysize = sizeof(RelFileNodeBackend);
149 		ctl.entrysize = sizeof(SMgrRelationData);
150 		SMgrRelationHash = hash_create("smgr relation table", 400,
151 									   &ctl, HASH_ELEM | HASH_BLOBS);
152 		dlist_init(&unowned_relns);
153 	}
154 
155 	/* Look up or create an entry */
156 	brnode.node = rnode;
157 	brnode.backend = backend;
158 	reln = (SMgrRelation) hash_search(SMgrRelationHash,
159 									  (void *) &brnode,
160 									  HASH_ENTER, &found);
161 
162 	/* Initialize it if not present before */
163 	if (!found)
164 	{
165 		int			forknum;
166 
167 		/* hash_search already filled in the lookup key */
168 		reln->smgr_owner = NULL;
169 		reln->smgr_targblock = InvalidBlockNumber;
170 		reln->smgr_fsm_nblocks = InvalidBlockNumber;
171 		reln->smgr_vm_nblocks = InvalidBlockNumber;
172 		reln->smgr_which = 0;	/* we only have md.c at present */
173 
174 		/* mark it not open */
175 		for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
176 			reln->md_fd[forknum] = NULL;
177 
178 		/* it has no owner yet */
179 		dlist_push_tail(&unowned_relns, &reln->node);
180 	}
181 
182 	return reln;
183 }
184 
185 /*
186  * smgrsetowner() -- Establish a long-lived reference to an SMgrRelation object
187  *
188  * There can be only one owner at a time; this is sufficient since currently
189  * the only such owners exist in the relcache.
190  */
191 void
smgrsetowner(SMgrRelation * owner,SMgrRelation reln)192 smgrsetowner(SMgrRelation *owner, SMgrRelation reln)
193 {
194 	/* We don't support "disowning" an SMgrRelation here, use smgrclearowner */
195 	Assert(owner != NULL);
196 
197 	/*
198 	 * First, unhook any old owner.  (Normally there shouldn't be any, but it
199 	 * seems possible that this can happen during swap_relation_files()
200 	 * depending on the order of processing.  It's ok to close the old
201 	 * relcache entry early in that case.)
202 	 *
203 	 * If there isn't an old owner, then the reln should be in the unowned
204 	 * list, and we need to remove it.
205 	 */
206 	if (reln->smgr_owner)
207 		*(reln->smgr_owner) = NULL;
208 	else
209 		dlist_delete(&reln->node);
210 
211 	/* Now establish the ownership relationship. */
212 	reln->smgr_owner = owner;
213 	*owner = reln;
214 }
215 
216 /*
217  * smgrclearowner() -- Remove long-lived reference to an SMgrRelation object
218  *					   if one exists
219  */
220 void
smgrclearowner(SMgrRelation * owner,SMgrRelation reln)221 smgrclearowner(SMgrRelation *owner, SMgrRelation reln)
222 {
223 	/* Do nothing if the SMgrRelation object is not owned by the owner */
224 	if (reln->smgr_owner != owner)
225 		return;
226 
227 	/* unset the owner's reference */
228 	*owner = NULL;
229 
230 	/* unset our reference to the owner */
231 	reln->smgr_owner = NULL;
232 
233 	/* add to list of unowned relations */
234 	dlist_push_tail(&unowned_relns, &reln->node);
235 }
236 
237 /*
238  *	smgrexists() -- Does the underlying file for a fork exist?
239  */
240 bool
smgrexists(SMgrRelation reln,ForkNumber forknum)241 smgrexists(SMgrRelation reln, ForkNumber forknum)
242 {
243 	return (*(smgrsw[reln->smgr_which].smgr_exists)) (reln, forknum);
244 }
245 
246 /*
247  *	smgrclose() -- Close and delete an SMgrRelation object.
248  */
249 void
smgrclose(SMgrRelation reln)250 smgrclose(SMgrRelation reln)
251 {
252 	SMgrRelation *owner;
253 	ForkNumber	forknum;
254 
255 	for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
256 		(*(smgrsw[reln->smgr_which].smgr_close)) (reln, forknum);
257 
258 	owner = reln->smgr_owner;
259 
260 	if (!owner)
261 		dlist_delete(&reln->node);
262 
263 	if (hash_search(SMgrRelationHash,
264 					(void *) &(reln->smgr_rnode),
265 					HASH_REMOVE, NULL) == NULL)
266 		elog(ERROR, "SMgrRelation hashtable corrupted");
267 
268 	/*
269 	 * Unhook the owner pointer, if any.  We do this last since in the remote
270 	 * possibility of failure above, the SMgrRelation object will still exist.
271 	 */
272 	if (owner)
273 		*owner = NULL;
274 }
275 
276 /*
277  *	smgrcloseall() -- Close all existing SMgrRelation objects.
278  */
279 void
smgrcloseall(void)280 smgrcloseall(void)
281 {
282 	HASH_SEQ_STATUS status;
283 	SMgrRelation reln;
284 
285 	/* Nothing to do if hashtable not set up */
286 	if (SMgrRelationHash == NULL)
287 		return;
288 
289 	hash_seq_init(&status, SMgrRelationHash);
290 
291 	while ((reln = (SMgrRelation) hash_seq_search(&status)) != NULL)
292 		smgrclose(reln);
293 }
294 
295 /*
296  *	smgrclosenode() -- Close SMgrRelation object for given RelFileNode,
297  *					   if one exists.
298  *
299  * This has the same effects as smgrclose(smgropen(rnode)), but it avoids
300  * uselessly creating a hashtable entry only to drop it again when no
301  * such entry exists already.
302  */
303 void
smgrclosenode(RelFileNodeBackend rnode)304 smgrclosenode(RelFileNodeBackend rnode)
305 {
306 	SMgrRelation reln;
307 
308 	/* Nothing to do if hashtable not set up */
309 	if (SMgrRelationHash == NULL)
310 		return;
311 
312 	reln = (SMgrRelation) hash_search(SMgrRelationHash,
313 									  (void *) &rnode,
314 									  HASH_FIND, NULL);
315 	if (reln != NULL)
316 		smgrclose(reln);
317 }
318 
319 /*
320  *	smgrcreate() -- Create a new relation.
321  *
322  *		Given an already-created (but presumably unused) SMgrRelation,
323  *		cause the underlying disk file or other storage for the fork
324  *		to be created.
325  *
326  *		If isRedo is true, it is okay for the underlying file to exist
327  *		already because we are in a WAL replay sequence.
328  */
329 void
smgrcreate(SMgrRelation reln,ForkNumber forknum,bool isRedo)330 smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
331 {
332 	/*
333 	 * Exit quickly in WAL replay mode if we've already opened the file. If
334 	 * it's open, it surely must exist.
335 	 */
336 	if (isRedo && reln->md_fd[forknum] != NULL)
337 		return;
338 
339 	/*
340 	 * We may be using the target table space for the first time in this
341 	 * database, so create a per-database subdirectory if needed.
342 	 *
343 	 * XXX this is a fairly ugly violation of module layering, but this seems
344 	 * to be the best place to put the check.  Maybe TablespaceCreateDbspace
345 	 * should be here and not in commands/tablespace.c?  But that would imply
346 	 * importing a lot of stuff that smgr.c oughtn't know, either.
347 	 */
348 	TablespaceCreateDbspace(reln->smgr_rnode.node.spcNode,
349 							reln->smgr_rnode.node.dbNode,
350 							isRedo);
351 
352 	(*(smgrsw[reln->smgr_which].smgr_create)) (reln, forknum, isRedo);
353 }
354 
355 /*
356  *	smgrdounlink() -- Immediately unlink all forks of a relation.
357  *
358  *		All forks of the relation are removed from the store.  This should
359  *		not be used during transactional operations, since it can't be undone.
360  *
361  *		If isRedo is true, it is okay for the underlying file(s) to be gone
362  *		already.
363  *
364  *		This is equivalent to calling smgrdounlinkfork for each fork, but
365  *		it's significantly quicker so should be preferred when possible.
366  */
367 void
smgrdounlink(SMgrRelation reln,bool isRedo)368 smgrdounlink(SMgrRelation reln, bool isRedo)
369 {
370 	RelFileNodeBackend rnode = reln->smgr_rnode;
371 	int			which = reln->smgr_which;
372 	ForkNumber	forknum;
373 
374 	/* Close the forks at smgr level */
375 	for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
376 		(*(smgrsw[which].smgr_close)) (reln, forknum);
377 
378 	/*
379 	 * Get rid of any remaining buffers for the relation.  bufmgr will just
380 	 * drop them without bothering to write the contents.
381 	 */
382 	DropRelFileNodesAllBuffers(&rnode, 1);
383 
384 	/*
385 	 * It'd be nice to tell the stats collector to forget it immediately, too.
386 	 * But we can't because we don't know the OID (and in cases involving
387 	 * relfilenode swaps, it's not always clear which table OID to forget,
388 	 * anyway).
389 	 */
390 
391 	/*
392 	 * Send a shared-inval message to force other backends to close any
393 	 * dangling smgr references they may have for this rel.  We should do this
394 	 * before starting the actual unlinking, in case we fail partway through
395 	 * that step.  Note that the sinval message will eventually come back to
396 	 * this backend, too, and thereby provide a backstop that we closed our
397 	 * own smgr rel.
398 	 */
399 	CacheInvalidateSmgr(rnode);
400 
401 	/*
402 	 * Delete the physical file(s).
403 	 *
404 	 * Note: smgr_unlink must treat deletion failure as a WARNING, not an
405 	 * ERROR, because we've already decided to commit or abort the current
406 	 * xact.
407 	 */
408 	(*(smgrsw[which].smgr_unlink)) (rnode, InvalidForkNumber, isRedo);
409 }
410 
411 /*
412  *	smgrdounlinkall() -- Immediately unlink all forks of all given relations
413  *
414  *		All forks of all given relations are removed from the store.  This
415  *		should not be used during transactional operations, since it can't be
416  *		undone.
417  *
418  *		If isRedo is true, it is okay for the underlying file(s) to be gone
419  *		already.
420  *
421  *		This is equivalent to calling smgrdounlink for each relation, but it's
422  *		significantly quicker so should be preferred when possible.
423  */
424 void
smgrdounlinkall(SMgrRelation * rels,int nrels,bool isRedo)425 smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
426 {
427 	int			i = 0;
428 	RelFileNodeBackend *rnodes;
429 	ForkNumber	forknum;
430 
431 	if (nrels == 0)
432 		return;
433 
434 	/*
435 	 * create an array which contains all relations to be dropped, and close
436 	 * each relation's forks at the smgr level while at it
437 	 */
438 	rnodes = palloc(sizeof(RelFileNodeBackend) * nrels);
439 	for (i = 0; i < nrels; i++)
440 	{
441 		RelFileNodeBackend rnode = rels[i]->smgr_rnode;
442 		int			which = rels[i]->smgr_which;
443 
444 		rnodes[i] = rnode;
445 
446 		/* Close the forks at smgr level */
447 		for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
448 			(*(smgrsw[which].smgr_close)) (rels[i], forknum);
449 	}
450 
451 	/*
452 	 * Get rid of any remaining buffers for the relations.  bufmgr will just
453 	 * drop them without bothering to write the contents.
454 	 */
455 	DropRelFileNodesAllBuffers(rnodes, nrels);
456 
457 	/*
458 	 * It'd be nice to tell the stats collector to forget them immediately,
459 	 * too. But we can't because we don't know the OIDs.
460 	 */
461 
462 	/*
463 	 * Send a shared-inval message to force other backends to close any
464 	 * dangling smgr references they may have for these rels.  We should do
465 	 * this before starting the actual unlinking, in case we fail partway
466 	 * through that step.  Note that the sinval messages will eventually come
467 	 * back to this backend, too, and thereby provide a backstop that we
468 	 * closed our own smgr rel.
469 	 */
470 	for (i = 0; i < nrels; i++)
471 		CacheInvalidateSmgr(rnodes[i]);
472 
473 	/*
474 	 * Delete the physical file(s).
475 	 *
476 	 * Note: smgr_unlink must treat deletion failure as a WARNING, not an
477 	 * ERROR, because we've already decided to commit or abort the current
478 	 * xact.
479 	 */
480 
481 	for (i = 0; i < nrels; i++)
482 	{
483 		int			which = rels[i]->smgr_which;
484 
485 		for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
486 			(*(smgrsw[which].smgr_unlink)) (rnodes[i], forknum, isRedo);
487 	}
488 
489 	pfree(rnodes);
490 }
491 
492 /*
493  *	smgrdounlinkfork() -- Immediately unlink one fork of a relation.
494  *
495  *		The specified fork of the relation is removed from the store.  This
496  *		should not be used during transactional operations, since it can't be
497  *		undone.
498  *
499  *		If isRedo is true, it is okay for the underlying file to be gone
500  *		already.
501  */
502 void
smgrdounlinkfork(SMgrRelation reln,ForkNumber forknum,bool isRedo)503 smgrdounlinkfork(SMgrRelation reln, ForkNumber forknum, bool isRedo)
504 {
505 	RelFileNodeBackend rnode = reln->smgr_rnode;
506 	int			which = reln->smgr_which;
507 
508 	/* Close the fork at smgr level */
509 	(*(smgrsw[which].smgr_close)) (reln, forknum);
510 
511 	/*
512 	 * Get rid of any remaining buffers for the fork.  bufmgr will just drop
513 	 * them without bothering to write the contents.
514 	 */
515 	DropRelFileNodeBuffers(rnode, forknum, 0);
516 
517 	/*
518 	 * It'd be nice to tell the stats collector to forget it immediately, too.
519 	 * But we can't because we don't know the OID (and in cases involving
520 	 * relfilenode swaps, it's not always clear which table OID to forget,
521 	 * anyway).
522 	 */
523 
524 	/*
525 	 * Send a shared-inval message to force other backends to close any
526 	 * dangling smgr references they may have for this rel.  We should do this
527 	 * before starting the actual unlinking, in case we fail partway through
528 	 * that step.  Note that the sinval message will eventually come back to
529 	 * this backend, too, and thereby provide a backstop that we closed our
530 	 * own smgr rel.
531 	 */
532 	CacheInvalidateSmgr(rnode);
533 
534 	/*
535 	 * Delete the physical file(s).
536 	 *
537 	 * Note: smgr_unlink must treat deletion failure as a WARNING, not an
538 	 * ERROR, because we've already decided to commit or abort the current
539 	 * xact.
540 	 */
541 	(*(smgrsw[which].smgr_unlink)) (rnode, forknum, isRedo);
542 }
543 
544 /*
545  *	smgrextend() -- Add a new block to a file.
546  *
547  *		The semantics are nearly the same as smgrwrite(): write at the
548  *		specified position.  However, this is to be used for the case of
549  *		extending a relation (i.e., blocknum is at or beyond the current
550  *		EOF).  Note that we assume writing a block beyond current EOF
551  *		causes intervening file space to become filled with zeroes.
552  */
553 void
smgrextend(SMgrRelation reln,ForkNumber forknum,BlockNumber blocknum,char * buffer,bool skipFsync)554 smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
555 		   char *buffer, bool skipFsync)
556 {
557 	(*(smgrsw[reln->smgr_which].smgr_extend)) (reln, forknum, blocknum,
558 											   buffer, skipFsync);
559 }
560 
561 /*
562  *	smgrprefetch() -- Initiate asynchronous read of the specified block of a relation.
563  */
564 void
smgrprefetch(SMgrRelation reln,ForkNumber forknum,BlockNumber blocknum)565 smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
566 {
567 	(*(smgrsw[reln->smgr_which].smgr_prefetch)) (reln, forknum, blocknum);
568 }
569 
570 /*
571  *	smgrread() -- read a particular block from a relation into the supplied
572  *				  buffer.
573  *
574  *		This routine is called from the buffer manager in order to
575  *		instantiate pages in the shared buffer cache.  All storage managers
576  *		return pages in the format that POSTGRES expects.
577  */
578 void
smgrread(SMgrRelation reln,ForkNumber forknum,BlockNumber blocknum,char * buffer)579 smgrread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
580 		 char *buffer)
581 {
582 	(*(smgrsw[reln->smgr_which].smgr_read)) (reln, forknum, blocknum, buffer);
583 }
584 
585 /*
586  *	smgrwrite() -- Write the supplied buffer out.
587  *
588  *		This is to be used only for updating already-existing blocks of a
589  *		relation (ie, those before the current EOF).  To extend a relation,
590  *		use smgrextend().
591  *
592  *		This is not a synchronous write -- the block is not necessarily
593  *		on disk at return, only dumped out to the kernel.  However,
594  *		provisions will be made to fsync the write before the next checkpoint.
595  *
596  *		skipFsync indicates that the caller will make other provisions to
597  *		fsync the relation, so we needn't bother.  Temporary relations also
598  *		do not require fsync.
599  */
600 void
smgrwrite(SMgrRelation reln,ForkNumber forknum,BlockNumber blocknum,char * buffer,bool skipFsync)601 smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
602 		  char *buffer, bool skipFsync)
603 {
604 	(*(smgrsw[reln->smgr_which].smgr_write)) (reln, forknum, blocknum,
605 											  buffer, skipFsync);
606 }
607 
608 
609 /*
610  *	smgrwriteback() -- Trigger kernel writeback for the supplied range of
611  *					   blocks.
612  */
613 void
smgrwriteback(SMgrRelation reln,ForkNumber forknum,BlockNumber blocknum,BlockNumber nblocks)614 smgrwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
615 			  BlockNumber nblocks)
616 {
617 	(*(smgrsw[reln->smgr_which].smgr_writeback)) (reln, forknum, blocknum,
618 												  nblocks);
619 }
620 
621 /*
622  *	smgrnblocks() -- Calculate the number of blocks in the
623  *					 supplied relation.
624  */
625 BlockNumber
smgrnblocks(SMgrRelation reln,ForkNumber forknum)626 smgrnblocks(SMgrRelation reln, ForkNumber forknum)
627 {
628 	return (*(smgrsw[reln->smgr_which].smgr_nblocks)) (reln, forknum);
629 }
630 
631 /*
632  *	smgrtruncate() -- Truncate supplied relation to the specified number
633  *					  of blocks
634  *
635  * The truncation is done immediately, so this can't be rolled back.
636  */
637 void
smgrtruncate(SMgrRelation reln,ForkNumber forknum,BlockNumber nblocks)638 smgrtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
639 {
640 	/*
641 	 * Get rid of any buffers for the about-to-be-deleted blocks. bufmgr will
642 	 * just drop them without bothering to write the contents.
643 	 */
644 	DropRelFileNodeBuffers(reln->smgr_rnode, forknum, nblocks);
645 
646 	/*
647 	 * Send a shared-inval message to force other backends to close any smgr
648 	 * references they may have for this rel.  This is useful because they
649 	 * might have open file pointers to segments that got removed, and/or
650 	 * smgr_targblock variables pointing past the new rel end.  (The inval
651 	 * message will come back to our backend, too, causing a
652 	 * probably-unnecessary local smgr flush.  But we don't expect that this
653 	 * is a performance-critical path.)  As in the unlink code, we want to be
654 	 * sure the message is sent before we start changing things on-disk.
655 	 */
656 	CacheInvalidateSmgr(reln->smgr_rnode);
657 
658 	/*
659 	 * Do the truncation.
660 	 */
661 	(*(smgrsw[reln->smgr_which].smgr_truncate)) (reln, forknum, nblocks);
662 }
663 
664 /*
665  *	smgrimmedsync() -- Force the specified relation to stable storage.
666  *
667  *		Synchronously force all previous writes to the specified relation
668  *		down to disk.
669  *
670  *		This is useful for building completely new relations (eg, new
671  *		indexes).  Instead of incrementally WAL-logging the index build
672  *		steps, we can just write completed index pages to disk with smgrwrite
673  *		or smgrextend, and then fsync the completed index file before
674  *		committing the transaction.  (This is sufficient for purposes of
675  *		crash recovery, since it effectively duplicates forcing a checkpoint
676  *		for the completed index.  But it is *not* sufficient if one wishes
677  *		to use the WAL log for PITR or replication purposes: in that case
678  *		we have to make WAL entries as well.)
679  *
680  *		The preceding writes should specify skipFsync = true to avoid
681  *		duplicative fsyncs.
682  *
683  *		Note that you need to do FlushRelationBuffers() first if there is
684  *		any possibility that there are dirty buffers for the relation;
685  *		otherwise the sync is not very meaningful.
686  */
687 void
smgrimmedsync(SMgrRelation reln,ForkNumber forknum)688 smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
689 {
690 	(*(smgrsw[reln->smgr_which].smgr_immedsync)) (reln, forknum);
691 }
692 
693 
694 /*
695  *	smgrpreckpt() -- Prepare for checkpoint.
696  */
697 void
smgrpreckpt(void)698 smgrpreckpt(void)
699 {
700 	int			i;
701 
702 	for (i = 0; i < NSmgr; i++)
703 	{
704 		if (smgrsw[i].smgr_pre_ckpt)
705 			(*(smgrsw[i].smgr_pre_ckpt)) ();
706 	}
707 }
708 
709 /*
710  *	smgrsync() -- Sync files to disk during checkpoint.
711  */
712 void
smgrsync(void)713 smgrsync(void)
714 {
715 	int			i;
716 
717 	for (i = 0; i < NSmgr; i++)
718 	{
719 		if (smgrsw[i].smgr_sync)
720 			(*(smgrsw[i].smgr_sync)) ();
721 	}
722 }
723 
724 /*
725  *	smgrpostckpt() -- Post-checkpoint cleanup.
726  */
727 void
smgrpostckpt(void)728 smgrpostckpt(void)
729 {
730 	int			i;
731 
732 	for (i = 0; i < NSmgr; i++)
733 	{
734 		if (smgrsw[i].smgr_post_ckpt)
735 			(*(smgrsw[i].smgr_post_ckpt)) ();
736 	}
737 }
738 
739 /*
740  * AtEOXact_SMgr
741  *
742  * This routine is called during transaction commit or abort (it doesn't
743  * particularly care which).  All transient SMgrRelation objects are closed.
744  *
745  * We do this as a compromise between wanting transient SMgrRelations to
746  * live awhile (to amortize the costs of blind writes of multiple blocks)
747  * and needing them to not live forever (since we're probably holding open
748  * a kernel file descriptor for the underlying file, and we need to ensure
749  * that gets closed reasonably soon if the file gets deleted).
750  */
751 void
AtEOXact_SMgr(void)752 AtEOXact_SMgr(void)
753 {
754 	dlist_mutable_iter	iter;
755 
756 	/*
757 	 * Zap all unowned SMgrRelations.  We rely on smgrclose() to remove each
758 	 * one from the list.
759 	 */
760 	dlist_foreach_modify(iter, &unowned_relns)
761 	{
762 		SMgrRelation	rel = dlist_container(SMgrRelationData, node,
763 											  iter.cur);
764 
765 		Assert(rel->smgr_owner == NULL);
766 
767 		smgrclose(rel);
768 	}
769 }
770