1 /*-------------------------------------------------------------------------
2  *
3  * blinsert.c
4  *		Bloom index build and insert functions.
5  *
6  * Copyright (c) 2016-2018, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  *	  contrib/bloom/blinsert.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 #include "postgres.h"
14 
15 #include "access/genam.h"
16 #include "access/generic_xlog.h"
17 #include "catalog/index.h"
18 #include "miscadmin.h"
19 #include "storage/bufmgr.h"
20 #include "storage/indexfsm.h"
21 #include "storage/smgr.h"
22 #include "utils/memutils.h"
23 #include "utils/rel.h"
24 
25 #include "bloom.h"
26 
27 PG_MODULE_MAGIC;
28 
29 /*
30  * State of bloom index build.  We accumulate one page data here before
31  * flushing it to buffer manager.
32  */
33 typedef struct
34 {
35 	BloomState	blstate;		/* bloom index state */
36 	int64		indtuples;		/* total number of tuples indexed */
37 	MemoryContext tmpCtx;		/* temporary memory context reset after each
38 								 * tuple */
39 	PGAlignedBlock data;		/* cached page */
40 	int			count;			/* number of tuples in cached page */
41 } BloomBuildState;
42 
43 /*
44  * Flush page cached in BloomBuildState.
45  */
46 static void
flushCachedPage(Relation index,BloomBuildState * buildstate)47 flushCachedPage(Relation index, BloomBuildState *buildstate)
48 {
49 	Page		page;
50 	Buffer		buffer = BloomNewBuffer(index);
51 	GenericXLogState *state;
52 
53 	state = GenericXLogStart(index);
54 	page = GenericXLogRegisterBuffer(state, buffer, GENERIC_XLOG_FULL_IMAGE);
55 	memcpy(page, buildstate->data.data, BLCKSZ);
56 	GenericXLogFinish(state);
57 	UnlockReleaseBuffer(buffer);
58 }
59 
60 /*
61  * (Re)initialize cached page in BloomBuildState.
62  */
63 static void
initCachedPage(BloomBuildState * buildstate)64 initCachedPage(BloomBuildState *buildstate)
65 {
66 	memset(buildstate->data.data, 0, BLCKSZ);
67 	BloomInitPage(buildstate->data.data, 0);
68 	buildstate->count = 0;
69 }
70 
71 /*
72  * Per-tuple callback from IndexBuildHeapScan.
73  */
74 static void
bloomBuildCallback(Relation index,HeapTuple htup,Datum * values,bool * isnull,bool tupleIsAlive,void * state)75 bloomBuildCallback(Relation index, HeapTuple htup, Datum *values,
76 				   bool *isnull, bool tupleIsAlive, void *state)
77 {
78 	BloomBuildState *buildstate = (BloomBuildState *) state;
79 	MemoryContext oldCtx;
80 	BloomTuple *itup;
81 
82 	oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
83 
84 	itup = BloomFormTuple(&buildstate->blstate, &htup->t_self, values, isnull);
85 
86 	/* Try to add next item to cached page */
87 	if (BloomPageAddItem(&buildstate->blstate, buildstate->data.data, itup))
88 	{
89 		/* Next item was added successfully */
90 		buildstate->count++;
91 	}
92 	else
93 	{
94 		/* Cached page is full, flush it out and make a new one */
95 		flushCachedPage(index, buildstate);
96 
97 		CHECK_FOR_INTERRUPTS();
98 
99 		initCachedPage(buildstate);
100 
101 		if (!BloomPageAddItem(&buildstate->blstate, buildstate->data.data, itup))
102 		{
103 			/* We shouldn't be here since we're inserting to the empty page */
104 			elog(ERROR, "could not add new bloom tuple to empty page");
105 		}
106 
107 		/* Next item was added successfully */
108 		buildstate->count++;
109 	}
110 
111 	/* Update total tuple count */
112 	buildstate->indtuples += 1;
113 
114 	MemoryContextSwitchTo(oldCtx);
115 	MemoryContextReset(buildstate->tmpCtx);
116 }
117 
118 /*
119  * Build a new bloom index.
120  */
121 IndexBuildResult *
blbuild(Relation heap,Relation index,IndexInfo * indexInfo)122 blbuild(Relation heap, Relation index, IndexInfo *indexInfo)
123 {
124 	IndexBuildResult *result;
125 	double		reltuples;
126 	BloomBuildState buildstate;
127 
128 	if (RelationGetNumberOfBlocks(index) != 0)
129 		elog(ERROR, "index \"%s\" already contains data",
130 			 RelationGetRelationName(index));
131 
132 	/* Initialize the meta page */
133 	BloomInitMetapage(index);
134 
135 	/* Initialize the bloom build state */
136 	memset(&buildstate, 0, sizeof(buildstate));
137 	initBloomState(&buildstate.blstate, index);
138 	buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext,
139 											  "Bloom build temporary context",
140 											  ALLOCSET_DEFAULT_SIZES);
141 	initCachedPage(&buildstate);
142 
143 	/* Do the heap scan */
144 	reltuples = IndexBuildHeapScan(heap, index, indexInfo, true,
145 								   bloomBuildCallback, (void *) &buildstate,
146 								   NULL);
147 
148 	/* Flush last page if needed (it will be, unless heap was empty) */
149 	if (buildstate.count > 0)
150 		flushCachedPage(index, &buildstate);
151 
152 	MemoryContextDelete(buildstate.tmpCtx);
153 
154 	result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
155 	result->heap_tuples = reltuples;
156 	result->index_tuples = buildstate.indtuples;
157 
158 	return result;
159 }
160 
161 /*
162  * Build an empty bloom index in the initialization fork.
163  */
164 void
blbuildempty(Relation index)165 blbuildempty(Relation index)
166 {
167 	Page		metapage;
168 
169 	/* Construct metapage. */
170 	metapage = (Page) palloc(BLCKSZ);
171 	BloomFillMetapage(index, metapage);
172 
173 	/*
174 	 * Write the page and log it.  It might seem that an immediate sync would
175 	 * be sufficient to guarantee that the file exists on disk, but recovery
176 	 * itself might remove it while replaying, for example, an
177 	 * XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE record.  Therefore, we need
178 	 * this even when wal_level=minimal.
179 	 */
180 	PageSetChecksumInplace(metapage, BLOOM_METAPAGE_BLKNO);
181 	smgrwrite(index->rd_smgr, INIT_FORKNUM, BLOOM_METAPAGE_BLKNO,
182 			  (char *) metapage, true);
183 	log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
184 				BLOOM_METAPAGE_BLKNO, metapage, true);
185 
186 	/*
187 	 * An immediate sync is required even if we xlog'd the page, because the
188 	 * write did not go through shared_buffers and therefore a concurrent
189 	 * checkpoint may have moved the redo pointer past our xlog record.
190 	 */
191 	smgrimmedsync(index->rd_smgr, INIT_FORKNUM);
192 }
193 
194 /*
195  * Insert new tuple to the bloom index.
196  */
197 bool
blinsert(Relation index,Datum * values,bool * isnull,ItemPointer ht_ctid,Relation heapRel,IndexUniqueCheck checkUnique,IndexInfo * indexInfo)198 blinsert(Relation index, Datum *values, bool *isnull,
199 		 ItemPointer ht_ctid, Relation heapRel,
200 		 IndexUniqueCheck checkUnique,
201 		 IndexInfo *indexInfo)
202 {
203 	BloomState	blstate;
204 	BloomTuple *itup;
205 	MemoryContext oldCtx;
206 	MemoryContext insertCtx;
207 	BloomMetaPageData *metaData;
208 	Buffer		buffer,
209 				metaBuffer;
210 	Page		page,
211 				metaPage;
212 	BlockNumber blkno = InvalidBlockNumber;
213 	OffsetNumber nStart;
214 	GenericXLogState *state;
215 
216 	insertCtx = AllocSetContextCreate(CurrentMemoryContext,
217 									  "Bloom insert temporary context",
218 									  ALLOCSET_DEFAULT_SIZES);
219 
220 	oldCtx = MemoryContextSwitchTo(insertCtx);
221 
222 	initBloomState(&blstate, index);
223 	itup = BloomFormTuple(&blstate, ht_ctid, values, isnull);
224 
225 	/*
226 	 * At first, try to insert new tuple to the first page in notFullPage
227 	 * array.  If successful, we don't need to modify the meta page.
228 	 */
229 	metaBuffer = ReadBuffer(index, BLOOM_METAPAGE_BLKNO);
230 	LockBuffer(metaBuffer, BUFFER_LOCK_SHARE);
231 	metaData = BloomPageGetMeta(BufferGetPage(metaBuffer));
232 
233 	if (metaData->nEnd > metaData->nStart)
234 	{
235 		Page		page;
236 
237 		blkno = metaData->notFullPage[metaData->nStart];
238 		Assert(blkno != InvalidBlockNumber);
239 
240 		/* Don't hold metabuffer lock while doing insert */
241 		LockBuffer(metaBuffer, BUFFER_LOCK_UNLOCK);
242 
243 		buffer = ReadBuffer(index, blkno);
244 		LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
245 
246 		state = GenericXLogStart(index);
247 		page = GenericXLogRegisterBuffer(state, buffer, 0);
248 
249 		/*
250 		 * We might have found a page that was recently deleted by VACUUM.  If
251 		 * so, we can reuse it, but we must reinitialize it.
252 		 */
253 		if (PageIsNew(page) || BloomPageIsDeleted(page))
254 			BloomInitPage(page, 0);
255 
256 		if (BloomPageAddItem(&blstate, page, itup))
257 		{
258 			/* Success!  Apply the change, clean up, and exit */
259 			GenericXLogFinish(state);
260 			UnlockReleaseBuffer(buffer);
261 			ReleaseBuffer(metaBuffer);
262 			MemoryContextSwitchTo(oldCtx);
263 			MemoryContextDelete(insertCtx);
264 			return false;
265 		}
266 
267 		/* Didn't fit, must try other pages */
268 		GenericXLogAbort(state);
269 		UnlockReleaseBuffer(buffer);
270 	}
271 	else
272 	{
273 		/* No entries in notFullPage */
274 		LockBuffer(metaBuffer, BUFFER_LOCK_UNLOCK);
275 	}
276 
277 	/*
278 	 * Try other pages in notFullPage array.  We will have to change nStart in
279 	 * metapage.  Thus, grab exclusive lock on metapage.
280 	 */
281 	LockBuffer(metaBuffer, BUFFER_LOCK_EXCLUSIVE);
282 
283 	/* nStart might have changed while we didn't have lock */
284 	nStart = metaData->nStart;
285 
286 	/* Skip first page if we already tried it above */
287 	if (nStart < metaData->nEnd &&
288 		blkno == metaData->notFullPage[nStart])
289 		nStart++;
290 
291 	/*
292 	 * This loop iterates for each page we try from the notFullPage array, and
293 	 * will also initialize a GenericXLogState for the fallback case of having
294 	 * to allocate a new page.
295 	 */
296 	for (;;)
297 	{
298 		state = GenericXLogStart(index);
299 
300 		/* get modifiable copy of metapage */
301 		metaPage = GenericXLogRegisterBuffer(state, metaBuffer, 0);
302 		metaData = BloomPageGetMeta(metaPage);
303 
304 		if (nStart >= metaData->nEnd)
305 			break;				/* no more entries in notFullPage array */
306 
307 		blkno = metaData->notFullPage[nStart];
308 		Assert(blkno != InvalidBlockNumber);
309 
310 		buffer = ReadBuffer(index, blkno);
311 		LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
312 		page = GenericXLogRegisterBuffer(state, buffer, 0);
313 
314 		/* Basically same logic as above */
315 		if (PageIsNew(page) || BloomPageIsDeleted(page))
316 			BloomInitPage(page, 0);
317 
318 		if (BloomPageAddItem(&blstate, page, itup))
319 		{
320 			/* Success!  Apply the changes, clean up, and exit */
321 			metaData->nStart = nStart;
322 			GenericXLogFinish(state);
323 			UnlockReleaseBuffer(buffer);
324 			UnlockReleaseBuffer(metaBuffer);
325 			MemoryContextSwitchTo(oldCtx);
326 			MemoryContextDelete(insertCtx);
327 			return false;
328 		}
329 
330 		/* Didn't fit, must try other pages */
331 		GenericXLogAbort(state);
332 		UnlockReleaseBuffer(buffer);
333 		nStart++;
334 	}
335 
336 	/*
337 	 * Didn't find place to insert in notFullPage array.  Allocate new page.
338 	 * (XXX is it good to do this while holding ex-lock on the metapage??)
339 	 */
340 	buffer = BloomNewBuffer(index);
341 
342 	page = GenericXLogRegisterBuffer(state, buffer, GENERIC_XLOG_FULL_IMAGE);
343 	BloomInitPage(page, 0);
344 
345 	if (!BloomPageAddItem(&blstate, page, itup))
346 	{
347 		/* We shouldn't be here since we're inserting to an empty page */
348 		elog(ERROR, "could not add new bloom tuple to empty page");
349 	}
350 
351 	/* Reset notFullPage array to contain just this new page */
352 	metaData->nStart = 0;
353 	metaData->nEnd = 1;
354 	metaData->notFullPage[0] = BufferGetBlockNumber(buffer);
355 
356 	/* Apply the changes, clean up, and exit */
357 	GenericXLogFinish(state);
358 
359 	UnlockReleaseBuffer(buffer);
360 	UnlockReleaseBuffer(metaBuffer);
361 
362 	MemoryContextSwitchTo(oldCtx);
363 	MemoryContextDelete(insertCtx);
364 
365 	return false;
366 }
367