1 /*-------------------------------------------------------------------------
2 *
3 * buf_init.c
4 * buffer manager initialization routines
5 *
6 * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/storage/buffer/buf_init.c
12 *
13 *-------------------------------------------------------------------------
14 */
15 #include "postgres.h"
16
17 #include "storage/bufmgr.h"
18 #include "storage/buf_internals.h"
19
20
21 BufferDescPadded *BufferDescriptors;
22 char *BufferBlocks;
23 LWLockMinimallyPadded *BufferIOLWLockArray = NULL;
24 LWLockTranche BufferIOLWLockTranche;
25 LWLockTranche BufferContentLWLockTranche;
26 WritebackContext BackendWritebackContext;
27 CkptSortItem *CkptBufferIds;
28
29
30 /*
31 * Data Structures:
32 * buffers live in a freelist and a lookup data structure.
33 *
34 *
35 * Buffer Lookup:
36 * Two important notes. First, the buffer has to be
37 * available for lookup BEFORE an IO begins. Otherwise
38 * a second process trying to read the buffer will
39 * allocate its own copy and the buffer pool will
40 * become inconsistent.
41 *
42 * Buffer Replacement:
43 * see freelist.c. A buffer cannot be replaced while in
44 * use either by data manager or during IO.
45 *
46 *
47 * Synchronization/Locking:
48 *
49 * IO_IN_PROGRESS -- this is a flag in the buffer descriptor.
50 * It must be set when an IO is initiated and cleared at
51 * the end of the IO. It is there to make sure that one
52 * process doesn't start to use a buffer while another is
53 * faulting it in. see WaitIO and related routines.
54 *
55 * refcount -- Counts the number of processes holding pins on a buffer.
56 * A buffer is pinned during IO and immediately after a BufferAlloc().
57 * Pins must be released before end of transaction. For efficiency the
58 * shared refcount isn't increased if an individual backend pins a buffer
59 * multiple times. Check the PrivateRefCount infrastructure in bufmgr.c.
60 */
61
62
63 /*
64 * Initialize shared buffer pool
65 *
66 * This is called once during shared-memory initialization (either in the
67 * postmaster, or in a standalone backend).
68 */
69 void
InitBufferPool(void)70 InitBufferPool(void)
71 {
72 bool foundBufs,
73 foundDescs,
74 foundIOLocks,
75 foundBufCkpt;
76
77 /* Align descriptors to a cacheline boundary. */
78 BufferDescriptors = (BufferDescPadded *)
79 ShmemInitStruct("Buffer Descriptors",
80 NBuffers * sizeof(BufferDescPadded),
81 &foundDescs);
82
83 BufferBlocks = (char *)
84 ShmemInitStruct("Buffer Blocks",
85 NBuffers * (Size) BLCKSZ, &foundBufs);
86
87 /* Align lwlocks to cacheline boundary */
88 BufferIOLWLockArray = (LWLockMinimallyPadded *)
89 ShmemInitStruct("Buffer IO Locks",
90 NBuffers * (Size) sizeof(LWLockMinimallyPadded),
91 &foundIOLocks);
92
93 BufferIOLWLockTranche.name = "buffer_io";
94 BufferIOLWLockTranche.array_base = BufferIOLWLockArray;
95 BufferIOLWLockTranche.array_stride = sizeof(LWLockMinimallyPadded);
96 LWLockRegisterTranche(LWTRANCHE_BUFFER_IO_IN_PROGRESS,
97 &BufferIOLWLockTranche);
98
99 BufferContentLWLockTranche.name = "buffer_content";
100 BufferContentLWLockTranche.array_base =
101 ((char *) BufferDescriptors) + offsetof(BufferDesc, content_lock);
102 BufferContentLWLockTranche.array_stride = sizeof(BufferDescPadded);
103 LWLockRegisterTranche(LWTRANCHE_BUFFER_CONTENT,
104 &BufferContentLWLockTranche);
105
106 /*
107 * The array used to sort to-be-checkpointed buffer ids is located in
108 * shared memory, to avoid having to allocate significant amounts of
109 * memory at runtime. As that'd be in the middle of a checkpoint, or when
110 * the checkpointer is restarted, memory allocation failures would be
111 * painful.
112 */
113 CkptBufferIds = (CkptSortItem *)
114 ShmemInitStruct("Checkpoint BufferIds",
115 NBuffers * sizeof(CkptSortItem), &foundBufCkpt);
116
117 if (foundDescs || foundBufs || foundIOLocks || foundBufCkpt)
118 {
119 /* should find all of these, or none of them */
120 Assert(foundDescs && foundBufs && foundIOLocks && foundBufCkpt);
121 /* note: this path is only taken in EXEC_BACKEND case */
122 }
123 else
124 {
125 int i;
126
127 /*
128 * Initialize all the buffer headers.
129 */
130 for (i = 0; i < NBuffers; i++)
131 {
132 BufferDesc *buf = GetBufferDescriptor(i);
133
134 CLEAR_BUFFERTAG(buf->tag);
135
136 pg_atomic_init_u32(&buf->state, 0);
137 buf->wait_backend_pid = 0;
138
139 buf->buf_id = i;
140
141 /*
142 * Initially link all the buffers together as unused. Subsequent
143 * management of this list is done by freelist.c.
144 */
145 buf->freeNext = i + 1;
146
147 LWLockInitialize(BufferDescriptorGetContentLock(buf),
148 LWTRANCHE_BUFFER_CONTENT);
149
150 LWLockInitialize(BufferDescriptorGetIOLock(buf),
151 LWTRANCHE_BUFFER_IO_IN_PROGRESS);
152 }
153
154 /* Correct last entry of linked list */
155 GetBufferDescriptor(NBuffers - 1)->freeNext = FREENEXT_END_OF_LIST;
156 }
157
158 /* Init other shared buffer-management stuff */
159 StrategyInitialize(!foundDescs);
160
161 /* Initialize per-backend file flush context */
162 WritebackContextInit(&BackendWritebackContext,
163 &backend_flush_after);
164 }
165
166 /*
167 * BufferShmemSize
168 *
169 * compute the size of shared memory for the buffer pool including
170 * data pages, buffer descriptors, hash tables, etc.
171 */
172 Size
BufferShmemSize(void)173 BufferShmemSize(void)
174 {
175 Size size = 0;
176
177 /* size of buffer descriptors */
178 size = add_size(size, mul_size(NBuffers, sizeof(BufferDescPadded)));
179 /* to allow aligning buffer descriptors */
180 size = add_size(size, PG_CACHE_LINE_SIZE);
181
182 /* size of data pages */
183 size = add_size(size, mul_size(NBuffers, BLCKSZ));
184
185 /* size of stuff controlled by freelist.c */
186 size = add_size(size, StrategyShmemSize());
187
188 /*
189 * It would be nice to include the I/O locks in the BufferDesc, but that
190 * would increase the size of a BufferDesc to more than one cache line,
191 * and benchmarking has shown that keeping every BufferDesc aligned on a
192 * cache line boundary is important for performance. So, instead, the
193 * array of I/O locks is allocated in a separate tranche. Because those
194 * locks are not highly contentended, we lay out the array with minimal
195 * padding.
196 */
197 size = add_size(size, mul_size(NBuffers, sizeof(LWLockMinimallyPadded)));
198 /* to allow aligning the above */
199 size = add_size(size, PG_CACHE_LINE_SIZE);
200
201 /* size of checkpoint sort array in bufmgr.c */
202 size = add_size(size, mul_size(NBuffers, sizeof(CkptSortItem)));
203
204 return size;
205 }
206