1 /*-------------------------------------------------------------------------
2  *
3  * buf_init.c
4  *	  buffer manager initialization routines
5  *
6  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *	  src/backend/storage/buffer/buf_init.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include "storage/bufmgr.h"
18 #include "storage/buf_internals.h"
19 
20 
21 BufferDescPadded *BufferDescriptors;
22 char	   *BufferBlocks;
23 LWLockMinimallyPadded *BufferIOLWLockArray = NULL;
24 WritebackContext BackendWritebackContext;
25 CkptSortItem *CkptBufferIds;
26 
27 
28 /*
29  * Data Structures:
30  *		buffers live in a freelist and a lookup data structure.
31  *
32  *
33  * Buffer Lookup:
34  *		Two important notes.  First, the buffer has to be
35  *		available for lookup BEFORE an IO begins.  Otherwise
36  *		a second process trying to read the buffer will
37  *		allocate its own copy and the buffer pool will
38  *		become inconsistent.
39  *
40  * Buffer Replacement:
41  *		see freelist.c.  A buffer cannot be replaced while in
42  *		use either by data manager or during IO.
43  *
44  *
45  * Synchronization/Locking:
46  *
47  * IO_IN_PROGRESS -- this is a flag in the buffer descriptor.
48  *		It must be set when an IO is initiated and cleared at
49  *		the end of the IO.  It is there to make sure that one
50  *		process doesn't start to use a buffer while another is
51  *		faulting it in.  see WaitIO and related routines.
52  *
53  * refcount --	Counts the number of processes holding pins on a buffer.
54  *		A buffer is pinned during IO and immediately after a BufferAlloc().
55  *		Pins must be released before end of transaction.  For efficiency the
56  *		shared refcount isn't increased if an individual backend pins a buffer
57  *		multiple times. Check the PrivateRefCount infrastructure in bufmgr.c.
58  */
59 
60 
61 /*
62  * Initialize shared buffer pool
63  *
64  * This is called once during shared-memory initialization (either in the
65  * postmaster, or in a standalone backend).
66  */
67 void
68 InitBufferPool(void)
69 {
70 	bool		foundBufs,
71 				foundDescs,
72 				foundIOLocks,
73 				foundBufCkpt;
74 
75 	/* Align descriptors to a cacheline boundary. */
76 	BufferDescriptors = (BufferDescPadded *)
77 		ShmemInitStruct("Buffer Descriptors",
78 						NBuffers * sizeof(BufferDescPadded),
79 						&foundDescs);
80 
81 	BufferBlocks = (char *)
82 		ShmemInitStruct("Buffer Blocks",
83 						NBuffers * (Size) BLCKSZ, &foundBufs);
84 
85 	/* Align lwlocks to cacheline boundary */
86 	BufferIOLWLockArray = (LWLockMinimallyPadded *)
87 		ShmemInitStruct("Buffer IO Locks",
88 						NBuffers * (Size) sizeof(LWLockMinimallyPadded),
89 						&foundIOLocks);
90 
91 	LWLockRegisterTranche(LWTRANCHE_BUFFER_IO_IN_PROGRESS, "buffer_io");
92 	LWLockRegisterTranche(LWTRANCHE_BUFFER_CONTENT, "buffer_content");
93 
94 	/*
95 	 * The array used to sort to-be-checkpointed buffer ids is located in
96 	 * shared memory, to avoid having to allocate significant amounts of
97 	 * memory at runtime. As that'd be in the middle of a checkpoint, or when
98 	 * the checkpointer is restarted, memory allocation failures would be
99 	 * painful.
100 	 */
101 	CkptBufferIds = (CkptSortItem *)
102 		ShmemInitStruct("Checkpoint BufferIds",
103 						NBuffers * sizeof(CkptSortItem), &foundBufCkpt);
104 
105 	if (foundDescs || foundBufs || foundIOLocks || foundBufCkpt)
106 	{
107 		/* should find all of these, or none of them */
108 		Assert(foundDescs && foundBufs && foundIOLocks && foundBufCkpt);
109 		/* note: this path is only taken in EXEC_BACKEND case */
110 	}
111 	else
112 	{
113 		int			i;
114 
115 		/*
116 		 * Initialize all the buffer headers.
117 		 */
118 		for (i = 0; i < NBuffers; i++)
119 		{
120 			BufferDesc *buf = GetBufferDescriptor(i);
121 
122 			CLEAR_BUFFERTAG(buf->tag);
123 
124 			pg_atomic_init_u32(&buf->state, 0);
125 			buf->wait_backend_pid = 0;
126 
127 			buf->buf_id = i;
128 
129 			/*
130 			 * Initially link all the buffers together as unused. Subsequent
131 			 * management of this list is done by freelist.c.
132 			 */
133 			buf->freeNext = i + 1;
134 
135 			LWLockInitialize(BufferDescriptorGetContentLock(buf),
136 							 LWTRANCHE_BUFFER_CONTENT);
137 
138 			LWLockInitialize(BufferDescriptorGetIOLock(buf),
139 							 LWTRANCHE_BUFFER_IO_IN_PROGRESS);
140 		}
141 
142 		/* Correct last entry of linked list */
143 		GetBufferDescriptor(NBuffers - 1)->freeNext = FREENEXT_END_OF_LIST;
144 	}
145 
146 	/* Init other shared buffer-management stuff */
147 	StrategyInitialize(!foundDescs);
148 
149 	/* Initialize per-backend file flush context */
150 	WritebackContextInit(&BackendWritebackContext,
151 						 &backend_flush_after);
152 }
153 
154 /*
155  * BufferShmemSize
156  *
157  * compute the size of shared memory for the buffer pool including
158  * data pages, buffer descriptors, hash tables, etc.
159  */
160 Size
161 BufferShmemSize(void)
162 {
163 	Size		size = 0;
164 
165 	/* size of buffer descriptors */
166 	size = add_size(size, mul_size(NBuffers, sizeof(BufferDescPadded)));
167 	/* to allow aligning buffer descriptors */
168 	size = add_size(size, PG_CACHE_LINE_SIZE);
169 
170 	/* size of data pages */
171 	size = add_size(size, mul_size(NBuffers, BLCKSZ));
172 
173 	/* size of stuff controlled by freelist.c */
174 	size = add_size(size, StrategyShmemSize());
175 
176 	/*
177 	 * It would be nice to include the I/O locks in the BufferDesc, but that
178 	 * would increase the size of a BufferDesc to more than one cache line,
179 	 * and benchmarking has shown that keeping every BufferDesc aligned on a
180 	 * cache line boundary is important for performance.  So, instead, the
181 	 * array of I/O locks is allocated in a separate tranche.  Because those
182 	 * locks are not highly contentended, we lay out the array with minimal
183 	 * padding.
184 	 */
185 	size = add_size(size, mul_size(NBuffers, sizeof(LWLockMinimallyPadded)));
186 	/* to allow aligning the above */
187 	size = add_size(size, PG_CACHE_LINE_SIZE);
188 
189 	/* size of checkpoint sort array in bufmgr.c */
190 	size = add_size(size, mul_size(NBuffers, sizeof(CkptSortItem)));
191 
192 	return size;
193 }
194