1 /*-------------------------------------------------------------------------
2  *
3  * buf_init.c
4  *	  buffer manager initialization routines
5  *
6  * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *	  src/backend/storage/buffer/buf_init.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include "storage/bufmgr.h"
18 #include "storage/buf_internals.h"
19 
20 
21 BufferDescPadded *BufferDescriptors;
22 char	   *BufferBlocks;
23 LWLockMinimallyPadded *BufferIOLWLockArray = NULL;
24 LWLockTranche BufferIOLWLockTranche;
25 LWLockTranche BufferContentLWLockTranche;
26 WritebackContext BackendWritebackContext;
27 CkptSortItem *CkptBufferIds;
28 
29 
30 /*
31  * Data Structures:
32  *		buffers live in a freelist and a lookup data structure.
33  *
34  *
35  * Buffer Lookup:
36  *		Two important notes.  First, the buffer has to be
37  *		available for lookup BEFORE an IO begins.  Otherwise
38  *		a second process trying to read the buffer will
39  *		allocate its own copy and the buffer pool will
40  *		become inconsistent.
41  *
42  * Buffer Replacement:
43  *		see freelist.c.  A buffer cannot be replaced while in
44  *		use either by data manager or during IO.
45  *
46  *
47  * Synchronization/Locking:
48  *
49  * IO_IN_PROGRESS -- this is a flag in the buffer descriptor.
50  *		It must be set when an IO is initiated and cleared at
51  *		the end of the IO.  It is there to make sure that one
52  *		process doesn't start to use a buffer while another is
53  *		faulting it in.  see WaitIO and related routines.
54  *
55  * refcount --	Counts the number of processes holding pins on a buffer.
56  *		A buffer is pinned during IO and immediately after a BufferAlloc().
57  *		Pins must be released before end of transaction.  For efficiency the
58  *		shared refcount isn't increased if an individual backend pins a buffer
59  *		multiple times. Check the PrivateRefCount infrastructure in bufmgr.c.
60  */
61 
62 
63 /*
64  * Initialize shared buffer pool
65  *
66  * This is called once during shared-memory initialization (either in the
67  * postmaster, or in a standalone backend).
68  */
69 void
InitBufferPool(void)70 InitBufferPool(void)
71 {
72 	bool		foundBufs,
73 				foundDescs,
74 				foundIOLocks,
75 				foundBufCkpt;
76 
77 	/* Align descriptors to a cacheline boundary. */
78 	BufferDescriptors = (BufferDescPadded *)
79 		ShmemInitStruct("Buffer Descriptors",
80 						NBuffers * sizeof(BufferDescPadded),
81 						&foundDescs);
82 
83 	BufferBlocks = (char *)
84 		ShmemInitStruct("Buffer Blocks",
85 						NBuffers * (Size) BLCKSZ, &foundBufs);
86 
87 	/* Align lwlocks to cacheline boundary */
88 	BufferIOLWLockArray = (LWLockMinimallyPadded *)
89 		ShmemInitStruct("Buffer IO Locks",
90 						NBuffers * (Size) sizeof(LWLockMinimallyPadded),
91 						&foundIOLocks);
92 
93 	BufferIOLWLockTranche.name = "buffer_io";
94 	BufferIOLWLockTranche.array_base = BufferIOLWLockArray;
95 	BufferIOLWLockTranche.array_stride = sizeof(LWLockMinimallyPadded);
96 	LWLockRegisterTranche(LWTRANCHE_BUFFER_IO_IN_PROGRESS,
97 						  &BufferIOLWLockTranche);
98 
99 	BufferContentLWLockTranche.name = "buffer_content";
100 	BufferContentLWLockTranche.array_base =
101 		((char *) BufferDescriptors) + offsetof(BufferDesc, content_lock);
102 	BufferContentLWLockTranche.array_stride = sizeof(BufferDescPadded);
103 	LWLockRegisterTranche(LWTRANCHE_BUFFER_CONTENT,
104 						  &BufferContentLWLockTranche);
105 
106 	/*
107 	 * The array used to sort to-be-checkpointed buffer ids is located in
108 	 * shared memory, to avoid having to allocate significant amounts of
109 	 * memory at runtime. As that'd be in the middle of a checkpoint, or when
110 	 * the checkpointer is restarted, memory allocation failures would be
111 	 * painful.
112 	 */
113 	CkptBufferIds = (CkptSortItem *)
114 		ShmemInitStruct("Checkpoint BufferIds",
115 						NBuffers * sizeof(CkptSortItem), &foundBufCkpt);
116 
117 	if (foundDescs || foundBufs || foundIOLocks || foundBufCkpt)
118 	{
119 		/* should find all of these, or none of them */
120 		Assert(foundDescs && foundBufs && foundIOLocks && foundBufCkpt);
121 		/* note: this path is only taken in EXEC_BACKEND case */
122 	}
123 	else
124 	{
125 		int			i;
126 
127 		/*
128 		 * Initialize all the buffer headers.
129 		 */
130 		for (i = 0; i < NBuffers; i++)
131 		{
132 			BufferDesc *buf = GetBufferDescriptor(i);
133 
134 			CLEAR_BUFFERTAG(buf->tag);
135 
136 			pg_atomic_init_u32(&buf->state, 0);
137 			buf->wait_backend_pid = 0;
138 
139 			buf->buf_id = i;
140 
141 			/*
142 			 * Initially link all the buffers together as unused. Subsequent
143 			 * management of this list is done by freelist.c.
144 			 */
145 			buf->freeNext = i + 1;
146 
147 			LWLockInitialize(BufferDescriptorGetContentLock(buf),
148 							 LWTRANCHE_BUFFER_CONTENT);
149 
150 			LWLockInitialize(BufferDescriptorGetIOLock(buf),
151 							 LWTRANCHE_BUFFER_IO_IN_PROGRESS);
152 		}
153 
154 		/* Correct last entry of linked list */
155 		GetBufferDescriptor(NBuffers - 1)->freeNext = FREENEXT_END_OF_LIST;
156 	}
157 
158 	/* Init other shared buffer-management stuff */
159 	StrategyInitialize(!foundDescs);
160 
161 	/* Initialize per-backend file flush context */
162 	WritebackContextInit(&BackendWritebackContext,
163 						 &backend_flush_after);
164 }
165 
166 /*
167  * BufferShmemSize
168  *
169  * compute the size of shared memory for the buffer pool including
170  * data pages, buffer descriptors, hash tables, etc.
171  */
172 Size
BufferShmemSize(void)173 BufferShmemSize(void)
174 {
175 	Size		size = 0;
176 
177 	/* size of buffer descriptors */
178 	size = add_size(size, mul_size(NBuffers, sizeof(BufferDescPadded)));
179 	/* to allow aligning buffer descriptors */
180 	size = add_size(size, PG_CACHE_LINE_SIZE);
181 
182 	/* size of data pages */
183 	size = add_size(size, mul_size(NBuffers, BLCKSZ));
184 
185 	/* size of stuff controlled by freelist.c */
186 	size = add_size(size, StrategyShmemSize());
187 
188 	/*
189 	 * It would be nice to include the I/O locks in the BufferDesc, but that
190 	 * would increase the size of a BufferDesc to more than one cache line,
191 	 * and benchmarking has shown that keeping every BufferDesc aligned on a
192 	 * cache line boundary is important for performance.  So, instead, the
193 	 * array of I/O locks is allocated in a separate tranche.  Because those
194 	 * locks are not highly contentended, we lay out the array with minimal
195 	 * padding.
196 	 */
197 	size = add_size(size, mul_size(NBuffers, sizeof(LWLockMinimallyPadded)));
198 	/* to allow aligning the above */
199 	size = add_size(size, PG_CACHE_LINE_SIZE);
200 
201 	/* size of checkpoint sort array in bufmgr.c */
202 	size = add_size(size, mul_size(NBuffers, sizeof(CkptSortItem)));
203 
204 	return size;
205 }
206