1 /*-------------------------------------------------------------------------
2 *
3 * blinsert.c
4 * Bloom index build and insert functions.
5 *
6 * Copyright (c) 2016-2018, PostgreSQL Global Development Group
7 *
8 * IDENTIFICATION
9 * contrib/bloom/blinsert.c
10 *
11 *-------------------------------------------------------------------------
12 */
13 #include "postgres.h"
14
15 #include "access/genam.h"
16 #include "access/generic_xlog.h"
17 #include "catalog/index.h"
18 #include "miscadmin.h"
19 #include "storage/bufmgr.h"
20 #include "storage/indexfsm.h"
21 #include "storage/smgr.h"
22 #include "utils/memutils.h"
23 #include "utils/rel.h"
24
25 #include "bloom.h"
26
27 PG_MODULE_MAGIC;
28
29 /*
30 * State of bloom index build. We accumulate one page data here before
31 * flushing it to buffer manager.
32 */
33 typedef struct
34 {
35 BloomState blstate; /* bloom index state */
36 int64 indtuples; /* total number of tuples indexed */
37 MemoryContext tmpCtx; /* temporary memory context reset after each
38 * tuple */
39 PGAlignedBlock data; /* cached page */
40 int count; /* number of tuples in cached page */
41 } BloomBuildState;
42
43 /*
44 * Flush page cached in BloomBuildState.
45 */
46 static void
flushCachedPage(Relation index,BloomBuildState * buildstate)47 flushCachedPage(Relation index, BloomBuildState *buildstate)
48 {
49 Page page;
50 Buffer buffer = BloomNewBuffer(index);
51 GenericXLogState *state;
52
53 state = GenericXLogStart(index);
54 page = GenericXLogRegisterBuffer(state, buffer, GENERIC_XLOG_FULL_IMAGE);
55 memcpy(page, buildstate->data.data, BLCKSZ);
56 GenericXLogFinish(state);
57 UnlockReleaseBuffer(buffer);
58 }
59
60 /*
61 * (Re)initialize cached page in BloomBuildState.
62 */
63 static void
initCachedPage(BloomBuildState * buildstate)64 initCachedPage(BloomBuildState *buildstate)
65 {
66 memset(buildstate->data.data, 0, BLCKSZ);
67 BloomInitPage(buildstate->data.data, 0);
68 buildstate->count = 0;
69 }
70
71 /*
72 * Per-tuple callback from IndexBuildHeapScan.
73 */
74 static void
bloomBuildCallback(Relation index,HeapTuple htup,Datum * values,bool * isnull,bool tupleIsAlive,void * state)75 bloomBuildCallback(Relation index, HeapTuple htup, Datum *values,
76 bool *isnull, bool tupleIsAlive, void *state)
77 {
78 BloomBuildState *buildstate = (BloomBuildState *) state;
79 MemoryContext oldCtx;
80 BloomTuple *itup;
81
82 oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
83
84 itup = BloomFormTuple(&buildstate->blstate, &htup->t_self, values, isnull);
85
86 /* Try to add next item to cached page */
87 if (BloomPageAddItem(&buildstate->blstate, buildstate->data.data, itup))
88 {
89 /* Next item was added successfully */
90 buildstate->count++;
91 }
92 else
93 {
94 /* Cached page is full, flush it out and make a new one */
95 flushCachedPage(index, buildstate);
96
97 CHECK_FOR_INTERRUPTS();
98
99 initCachedPage(buildstate);
100
101 if (!BloomPageAddItem(&buildstate->blstate, buildstate->data.data, itup))
102 {
103 /* We shouldn't be here since we're inserting to the empty page */
104 elog(ERROR, "could not add new bloom tuple to empty page");
105 }
106
107 /* Next item was added successfully */
108 buildstate->count++;
109 }
110
111 /* Update total tuple count */
112 buildstate->indtuples += 1;
113
114 MemoryContextSwitchTo(oldCtx);
115 MemoryContextReset(buildstate->tmpCtx);
116 }
117
118 /*
119 * Build a new bloom index.
120 */
121 IndexBuildResult *
blbuild(Relation heap,Relation index,IndexInfo * indexInfo)122 blbuild(Relation heap, Relation index, IndexInfo *indexInfo)
123 {
124 IndexBuildResult *result;
125 double reltuples;
126 BloomBuildState buildstate;
127
128 if (RelationGetNumberOfBlocks(index) != 0)
129 elog(ERROR, "index \"%s\" already contains data",
130 RelationGetRelationName(index));
131
132 /* Initialize the meta page */
133 BloomInitMetapage(index);
134
135 /* Initialize the bloom build state */
136 memset(&buildstate, 0, sizeof(buildstate));
137 initBloomState(&buildstate.blstate, index);
138 buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext,
139 "Bloom build temporary context",
140 ALLOCSET_DEFAULT_SIZES);
141 initCachedPage(&buildstate);
142
143 /* Do the heap scan */
144 reltuples = IndexBuildHeapScan(heap, index, indexInfo, true,
145 bloomBuildCallback, (void *) &buildstate,
146 NULL);
147
148 /* Flush last page if needed (it will be, unless heap was empty) */
149 if (buildstate.count > 0)
150 flushCachedPage(index, &buildstate);
151
152 MemoryContextDelete(buildstate.tmpCtx);
153
154 result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
155 result->heap_tuples = reltuples;
156 result->index_tuples = buildstate.indtuples;
157
158 return result;
159 }
160
161 /*
162 * Build an empty bloom index in the initialization fork.
163 */
164 void
blbuildempty(Relation index)165 blbuildempty(Relation index)
166 {
167 Page metapage;
168
169 /* Construct metapage. */
170 metapage = (Page) palloc(BLCKSZ);
171 BloomFillMetapage(index, metapage);
172
173 /*
174 * Write the page and log it. It might seem that an immediate sync would
175 * be sufficient to guarantee that the file exists on disk, but recovery
176 * itself might remove it while replaying, for example, an
177 * XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE record. Therefore, we need
178 * this even when wal_level=minimal.
179 */
180 PageSetChecksumInplace(metapage, BLOOM_METAPAGE_BLKNO);
181 smgrwrite(index->rd_smgr, INIT_FORKNUM, BLOOM_METAPAGE_BLKNO,
182 (char *) metapage, true);
183 log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
184 BLOOM_METAPAGE_BLKNO, metapage, true);
185
186 /*
187 * An immediate sync is required even if we xlog'd the page, because the
188 * write did not go through shared_buffers and therefore a concurrent
189 * checkpoint may have moved the redo pointer past our xlog record.
190 */
191 smgrimmedsync(index->rd_smgr, INIT_FORKNUM);
192 }
193
194 /*
195 * Insert new tuple to the bloom index.
196 */
197 bool
blinsert(Relation index,Datum * values,bool * isnull,ItemPointer ht_ctid,Relation heapRel,IndexUniqueCheck checkUnique,IndexInfo * indexInfo)198 blinsert(Relation index, Datum *values, bool *isnull,
199 ItemPointer ht_ctid, Relation heapRel,
200 IndexUniqueCheck checkUnique,
201 IndexInfo *indexInfo)
202 {
203 BloomState blstate;
204 BloomTuple *itup;
205 MemoryContext oldCtx;
206 MemoryContext insertCtx;
207 BloomMetaPageData *metaData;
208 Buffer buffer,
209 metaBuffer;
210 Page page,
211 metaPage;
212 BlockNumber blkno = InvalidBlockNumber;
213 OffsetNumber nStart;
214 GenericXLogState *state;
215
216 insertCtx = AllocSetContextCreate(CurrentMemoryContext,
217 "Bloom insert temporary context",
218 ALLOCSET_DEFAULT_SIZES);
219
220 oldCtx = MemoryContextSwitchTo(insertCtx);
221
222 initBloomState(&blstate, index);
223 itup = BloomFormTuple(&blstate, ht_ctid, values, isnull);
224
225 /*
226 * At first, try to insert new tuple to the first page in notFullPage
227 * array. If successful, we don't need to modify the meta page.
228 */
229 metaBuffer = ReadBuffer(index, BLOOM_METAPAGE_BLKNO);
230 LockBuffer(metaBuffer, BUFFER_LOCK_SHARE);
231 metaData = BloomPageGetMeta(BufferGetPage(metaBuffer));
232
233 if (metaData->nEnd > metaData->nStart)
234 {
235 Page page;
236
237 blkno = metaData->notFullPage[metaData->nStart];
238 Assert(blkno != InvalidBlockNumber);
239
240 /* Don't hold metabuffer lock while doing insert */
241 LockBuffer(metaBuffer, BUFFER_LOCK_UNLOCK);
242
243 buffer = ReadBuffer(index, blkno);
244 LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
245
246 state = GenericXLogStart(index);
247 page = GenericXLogRegisterBuffer(state, buffer, 0);
248
249 /*
250 * We might have found a page that was recently deleted by VACUUM. If
251 * so, we can reuse it, but we must reinitialize it.
252 */
253 if (PageIsNew(page) || BloomPageIsDeleted(page))
254 BloomInitPage(page, 0);
255
256 if (BloomPageAddItem(&blstate, page, itup))
257 {
258 /* Success! Apply the change, clean up, and exit */
259 GenericXLogFinish(state);
260 UnlockReleaseBuffer(buffer);
261 ReleaseBuffer(metaBuffer);
262 MemoryContextSwitchTo(oldCtx);
263 MemoryContextDelete(insertCtx);
264 return false;
265 }
266
267 /* Didn't fit, must try other pages */
268 GenericXLogAbort(state);
269 UnlockReleaseBuffer(buffer);
270 }
271 else
272 {
273 /* No entries in notFullPage */
274 LockBuffer(metaBuffer, BUFFER_LOCK_UNLOCK);
275 }
276
277 /*
278 * Try other pages in notFullPage array. We will have to change nStart in
279 * metapage. Thus, grab exclusive lock on metapage.
280 */
281 LockBuffer(metaBuffer, BUFFER_LOCK_EXCLUSIVE);
282
283 /* nStart might have changed while we didn't have lock */
284 nStart = metaData->nStart;
285
286 /* Skip first page if we already tried it above */
287 if (nStart < metaData->nEnd &&
288 blkno == metaData->notFullPage[nStart])
289 nStart++;
290
291 /*
292 * This loop iterates for each page we try from the notFullPage array, and
293 * will also initialize a GenericXLogState for the fallback case of having
294 * to allocate a new page.
295 */
296 for (;;)
297 {
298 state = GenericXLogStart(index);
299
300 /* get modifiable copy of metapage */
301 metaPage = GenericXLogRegisterBuffer(state, metaBuffer, 0);
302 metaData = BloomPageGetMeta(metaPage);
303
304 if (nStart >= metaData->nEnd)
305 break; /* no more entries in notFullPage array */
306
307 blkno = metaData->notFullPage[nStart];
308 Assert(blkno != InvalidBlockNumber);
309
310 buffer = ReadBuffer(index, blkno);
311 LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
312 page = GenericXLogRegisterBuffer(state, buffer, 0);
313
314 /* Basically same logic as above */
315 if (PageIsNew(page) || BloomPageIsDeleted(page))
316 BloomInitPage(page, 0);
317
318 if (BloomPageAddItem(&blstate, page, itup))
319 {
320 /* Success! Apply the changes, clean up, and exit */
321 metaData->nStart = nStart;
322 GenericXLogFinish(state);
323 UnlockReleaseBuffer(buffer);
324 UnlockReleaseBuffer(metaBuffer);
325 MemoryContextSwitchTo(oldCtx);
326 MemoryContextDelete(insertCtx);
327 return false;
328 }
329
330 /* Didn't fit, must try other pages */
331 GenericXLogAbort(state);
332 UnlockReleaseBuffer(buffer);
333 nStart++;
334 }
335
336 /*
337 * Didn't find place to insert in notFullPage array. Allocate new page.
338 * (XXX is it good to do this while holding ex-lock on the metapage??)
339 */
340 buffer = BloomNewBuffer(index);
341
342 page = GenericXLogRegisterBuffer(state, buffer, GENERIC_XLOG_FULL_IMAGE);
343 BloomInitPage(page, 0);
344
345 if (!BloomPageAddItem(&blstate, page, itup))
346 {
347 /* We shouldn't be here since we're inserting to an empty page */
348 elog(ERROR, "could not add new bloom tuple to empty page");
349 }
350
351 /* Reset notFullPage array to contain just this new page */
352 metaData->nStart = 0;
353 metaData->nEnd = 1;
354 metaData->notFullPage[0] = BufferGetBlockNumber(buffer);
355
356 /* Apply the changes, clean up, and exit */
357 GenericXLogFinish(state);
358
359 UnlockReleaseBuffer(buffer);
360 UnlockReleaseBuffer(metaBuffer);
361
362 MemoryContextSwitchTo(oldCtx);
363 MemoryContextDelete(insertCtx);
364
365 return false;
366 }
367