1 /*-------------------------------------------------------------------------
2  *
3  * gistsplit.c
4  *	  Multi-column page splitting algorithm
5  *
6  * This file is concerned with making good page-split decisions in multi-column
7  * GiST indexes.  The opclass-specific picksplit functions can only be expected
8  * to produce answers based on a single column.  We first run the picksplit
9  * function for column 1; then, if there are more columns, we check if any of
10  * the tuples are "don't cares" so far as the column 1 split is concerned
11  * (that is, they could go to either side for no additional penalty).  If so,
12  * we try to redistribute those tuples on the basis of the next column.
13  * Repeat till we're out of columns.
14  *
15  * gistSplitByKey() is the entry point to this file.
16  *
17  *
18  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
19  * Portions Copyright (c) 1994, Regents of the University of California
20  *
21  * IDENTIFICATION
22  *	  src/backend/access/gist/gistsplit.c
23  *
24  *-------------------------------------------------------------------------
25  */
26 #include "postgres.h"
27 
28 #include "access/gist_private.h"
29 #include "utils/rel.h"
30 
31 typedef struct
32 {
33 	OffsetNumber *entries;
34 	int			len;
35 	Datum	   *attr;
36 	bool	   *isnull;
37 	bool	   *dontcare;
38 } GistSplitUnion;
39 
40 
41 /*
42  * Form unions of subkeys in itvec[] entries listed in gsvp->entries[],
43  * ignoring any tuples that are marked in gsvp->dontcare[].  Subroutine for
44  * gistunionsubkey.
45  */
46 static void
gistunionsubkeyvec(GISTSTATE * giststate,IndexTuple * itvec,GistSplitUnion * gsvp)47 gistunionsubkeyvec(GISTSTATE *giststate, IndexTuple *itvec,
48 				   GistSplitUnion *gsvp)
49 {
50 	IndexTuple *cleanedItVec;
51 	int			i,
52 				cleanedLen = 0;
53 
54 	cleanedItVec = (IndexTuple *) palloc(sizeof(IndexTuple) * gsvp->len);
55 
56 	for (i = 0; i < gsvp->len; i++)
57 	{
58 		if (gsvp->dontcare && gsvp->dontcare[gsvp->entries[i]])
59 			continue;
60 
61 		cleanedItVec[cleanedLen++] = itvec[gsvp->entries[i] - 1];
62 	}
63 
64 	gistMakeUnionItVec(giststate, cleanedItVec, cleanedLen,
65 					   gsvp->attr, gsvp->isnull);
66 
67 	pfree(cleanedItVec);
68 }
69 
70 /*
71  * Recompute unions of left- and right-side subkeys after a page split,
72  * ignoring any tuples that are marked in spl->spl_dontcare[].
73  *
74  * Note: we always recompute union keys for all index columns.  In some cases
75  * this might represent duplicate work for the leftmost column(s), but it's
76  * not safe to assume that "zero penalty to move a tuple" means "the union
77  * key doesn't change at all".  Penalty functions aren't 100% accurate.
78  */
79 static void
gistunionsubkey(GISTSTATE * giststate,IndexTuple * itvec,GistSplitVector * spl)80 gistunionsubkey(GISTSTATE *giststate, IndexTuple *itvec, GistSplitVector *spl)
81 {
82 	GistSplitUnion gsvp;
83 
84 	gsvp.dontcare = spl->spl_dontcare;
85 
86 	gsvp.entries = spl->splitVector.spl_left;
87 	gsvp.len = spl->splitVector.spl_nleft;
88 	gsvp.attr = spl->spl_lattr;
89 	gsvp.isnull = spl->spl_lisnull;
90 
91 	gistunionsubkeyvec(giststate, itvec, &gsvp);
92 
93 	gsvp.entries = spl->splitVector.spl_right;
94 	gsvp.len = spl->splitVector.spl_nright;
95 	gsvp.attr = spl->spl_rattr;
96 	gsvp.isnull = spl->spl_risnull;
97 
98 	gistunionsubkeyvec(giststate, itvec, &gsvp);
99 }
100 
101 /*
102  * Find tuples that are "don't cares", that is could be moved to the other
103  * side of the split with zero penalty, so far as the attno column is
104  * concerned.
105  *
106  * Don't-care tuples are marked by setting the corresponding entry in
107  * spl->spl_dontcare[] to "true".  Caller must have initialized that array
108  * to zeroes.
109  *
110  * Returns number of don't-cares found.
111  */
112 static int
findDontCares(Relation r,GISTSTATE * giststate,GISTENTRY * valvec,GistSplitVector * spl,int attno)113 findDontCares(Relation r, GISTSTATE *giststate, GISTENTRY *valvec,
114 			  GistSplitVector *spl, int attno)
115 {
116 	int			i;
117 	GISTENTRY	entry;
118 	int			NumDontCare = 0;
119 
120 	/*
121 	 * First, search the left-side tuples to see if any have zero penalty to
122 	 * be added to the right-side union key.
123 	 *
124 	 * attno column is known all-not-null (see gistSplitByKey), so we need not
125 	 * check for nulls
126 	 */
127 	gistentryinit(entry, spl->splitVector.spl_rdatum, r, NULL,
128 				  (OffsetNumber) 0, FALSE);
129 	for (i = 0; i < spl->splitVector.spl_nleft; i++)
130 	{
131 		int			j = spl->splitVector.spl_left[i];
132 		float		penalty = gistpenalty(giststate, attno, &entry, false,
133 										  &valvec[j], false);
134 
135 		if (penalty == 0.0)
136 		{
137 			spl->spl_dontcare[j] = true;
138 			NumDontCare++;
139 		}
140 	}
141 
142 	/* And conversely for the right-side tuples */
143 	gistentryinit(entry, spl->splitVector.spl_ldatum, r, NULL,
144 				  (OffsetNumber) 0, FALSE);
145 	for (i = 0; i < spl->splitVector.spl_nright; i++)
146 	{
147 		int			j = spl->splitVector.spl_right[i];
148 		float		penalty = gistpenalty(giststate, attno, &entry, false,
149 										  &valvec[j], false);
150 
151 		if (penalty == 0.0)
152 		{
153 			spl->spl_dontcare[j] = true;
154 			NumDontCare++;
155 		}
156 	}
157 
158 	return NumDontCare;
159 }
160 
161 /*
162  * Remove tuples that are marked don't-cares from the tuple index array a[]
163  * of length *len.  This is applied separately to the spl_left and spl_right
164  * arrays.
165  */
166 static void
removeDontCares(OffsetNumber * a,int * len,const bool * dontcare)167 removeDontCares(OffsetNumber *a, int *len, const bool *dontcare)
168 {
169 	int			origlen,
170 				newlen,
171 				i;
172 	OffsetNumber *curwpos;
173 
174 	origlen = newlen = *len;
175 	curwpos = a;
176 	for (i = 0; i < origlen; i++)
177 	{
178 		OffsetNumber ai = a[i];
179 
180 		if (dontcare[ai] == FALSE)
181 		{
182 			/* re-emit item into a[] */
183 			*curwpos = ai;
184 			curwpos++;
185 		}
186 		else
187 			newlen--;
188 	}
189 
190 	*len = newlen;
191 }
192 
193 /*
194  * Place a single don't-care tuple into either the left or right side of the
195  * split, according to which has least penalty for merging the tuple into
196  * the previously-computed union keys.  We need consider only columns starting
197  * at attno.
198  */
199 static void
placeOne(Relation r,GISTSTATE * giststate,GistSplitVector * v,IndexTuple itup,OffsetNumber off,int attno)200 placeOne(Relation r, GISTSTATE *giststate, GistSplitVector *v,
201 		 IndexTuple itup, OffsetNumber off, int attno)
202 {
203 	GISTENTRY	identry[INDEX_MAX_KEYS];
204 	bool		isnull[INDEX_MAX_KEYS];
205 	bool		toLeft = true;
206 
207 	gistDeCompressAtt(giststate, r, itup, NULL, (OffsetNumber) 0,
208 					  identry, isnull);
209 
210 	for (; attno < giststate->tupdesc->natts; attno++)
211 	{
212 		float		lpenalty,
213 					rpenalty;
214 		GISTENTRY	entry;
215 
216 		gistentryinit(entry, v->spl_lattr[attno], r, NULL, 0, FALSE);
217 		lpenalty = gistpenalty(giststate, attno, &entry, v->spl_lisnull[attno],
218 							   identry + attno, isnull[attno]);
219 		gistentryinit(entry, v->spl_rattr[attno], r, NULL, 0, FALSE);
220 		rpenalty = gistpenalty(giststate, attno, &entry, v->spl_risnull[attno],
221 							   identry + attno, isnull[attno]);
222 
223 		if (lpenalty != rpenalty)
224 		{
225 			if (lpenalty > rpenalty)
226 				toLeft = false;
227 			break;
228 		}
229 	}
230 
231 	if (toLeft)
232 		v->splitVector.spl_left[v->splitVector.spl_nleft++] = off;
233 	else
234 		v->splitVector.spl_right[v->splitVector.spl_nright++] = off;
235 }
236 
237 #define SWAPVAR( s, d, t ) \
238 do {	\
239 	(t) = (s); \
240 	(s) = (d); \
241 	(d) = (t); \
242 } while(0)
243 
244 /*
245  * Clean up when we did a secondary split but the user-defined PickSplit
246  * method didn't support it (leaving spl_ldatum_exists or spl_rdatum_exists
247  * true).
248  *
249  * We consider whether to swap the left and right outputs of the secondary
250  * split; this can be worthwhile if the penalty for merging those tuples into
251  * the previously chosen sets is less that way.
252  *
253  * In any case we must update the union datums for the current column by
254  * adding in the previous union keys (oldL/oldR), since the user-defined
255  * PickSplit method didn't do so.
256  */
257 static void
supportSecondarySplit(Relation r,GISTSTATE * giststate,int attno,GIST_SPLITVEC * sv,Datum oldL,Datum oldR)258 supportSecondarySplit(Relation r, GISTSTATE *giststate, int attno,
259 					  GIST_SPLITVEC *sv, Datum oldL, Datum oldR)
260 {
261 	bool		leaveOnLeft = true,
262 				tmpBool;
263 	GISTENTRY	entryL,
264 				entryR,
265 				entrySL,
266 				entrySR;
267 
268 	gistentryinit(entryL, oldL, r, NULL, 0, FALSE);
269 	gistentryinit(entryR, oldR, r, NULL, 0, FALSE);
270 	gistentryinit(entrySL, sv->spl_ldatum, r, NULL, 0, FALSE);
271 	gistentryinit(entrySR, sv->spl_rdatum, r, NULL, 0, FALSE);
272 
273 	if (sv->spl_ldatum_exists && sv->spl_rdatum_exists)
274 	{
275 		float		penalty1,
276 					penalty2;
277 
278 		penalty1 = gistpenalty(giststate, attno, &entryL, false, &entrySL, false) +
279 			gistpenalty(giststate, attno, &entryR, false, &entrySR, false);
280 		penalty2 = gistpenalty(giststate, attno, &entryL, false, &entrySR, false) +
281 			gistpenalty(giststate, attno, &entryR, false, &entrySL, false);
282 
283 		if (penalty1 > penalty2)
284 			leaveOnLeft = false;
285 	}
286 	else
287 	{
288 		GISTENTRY  *entry1 = (sv->spl_ldatum_exists) ? &entryL : &entryR;
289 		float		penalty1,
290 					penalty2;
291 
292 		/*
293 		 * There is only one previously defined union, so we just choose swap
294 		 * or not by lowest penalty for that side.  We can only get here if a
295 		 * secondary split happened to have all NULLs in its column in the
296 		 * tuples that the outer recursion level had assigned to one side.
297 		 * (Note that the null checks in gistSplitByKey don't prevent the
298 		 * case, because they'll only be checking tuples that were considered
299 		 * don't-cares at the outer recursion level, not the tuples that went
300 		 * into determining the passed-down left and right union keys.)
301 		 */
302 		penalty1 = gistpenalty(giststate, attno, entry1, false, &entrySL, false);
303 		penalty2 = gistpenalty(giststate, attno, entry1, false, &entrySR, false);
304 
305 		if (penalty1 < penalty2)
306 			leaveOnLeft = (sv->spl_ldatum_exists) ? true : false;
307 		else
308 			leaveOnLeft = (sv->spl_rdatum_exists) ? true : false;
309 	}
310 
311 	if (leaveOnLeft == false)
312 	{
313 		/*
314 		 * swap left and right
315 		 */
316 		OffsetNumber *off,
317 					noff;
318 		Datum		datum;
319 
320 		SWAPVAR(sv->spl_left, sv->spl_right, off);
321 		SWAPVAR(sv->spl_nleft, sv->spl_nright, noff);
322 		SWAPVAR(sv->spl_ldatum, sv->spl_rdatum, datum);
323 		gistentryinit(entrySL, sv->spl_ldatum, r, NULL, 0, FALSE);
324 		gistentryinit(entrySR, sv->spl_rdatum, r, NULL, 0, FALSE);
325 	}
326 
327 	if (sv->spl_ldatum_exists)
328 		gistMakeUnionKey(giststate, attno, &entryL, false, &entrySL, false,
329 						 &sv->spl_ldatum, &tmpBool);
330 
331 	if (sv->spl_rdatum_exists)
332 		gistMakeUnionKey(giststate, attno, &entryR, false, &entrySR, false,
333 						 &sv->spl_rdatum, &tmpBool);
334 
335 	sv->spl_ldatum_exists = sv->spl_rdatum_exists = false;
336 }
337 
338 /*
339  * Trivial picksplit implementation. Function called only
340  * if user-defined picksplit puts all keys on the same side of the split.
341  * That is a bug of user-defined picksplit but we don't want to fail.
342  */
343 static void
genericPickSplit(GISTSTATE * giststate,GistEntryVector * entryvec,GIST_SPLITVEC * v,int attno)344 genericPickSplit(GISTSTATE *giststate, GistEntryVector *entryvec, GIST_SPLITVEC *v, int attno)
345 {
346 	OffsetNumber i,
347 				maxoff;
348 	int			nbytes;
349 	GistEntryVector *evec;
350 
351 	maxoff = entryvec->n - 1;
352 
353 	nbytes = (maxoff + 2) * sizeof(OffsetNumber);
354 
355 	v->spl_left = (OffsetNumber *) palloc(nbytes);
356 	v->spl_right = (OffsetNumber *) palloc(nbytes);
357 	v->spl_nleft = v->spl_nright = 0;
358 
359 	for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
360 	{
361 		if (i <= (maxoff - FirstOffsetNumber + 1) / 2)
362 		{
363 			v->spl_left[v->spl_nleft] = i;
364 			v->spl_nleft++;
365 		}
366 		else
367 		{
368 			v->spl_right[v->spl_nright] = i;
369 			v->spl_nright++;
370 		}
371 	}
372 
373 	/*
374 	 * Form union datums for each side
375 	 */
376 	evec = palloc(sizeof(GISTENTRY) * entryvec->n + GEVHDRSZ);
377 
378 	evec->n = v->spl_nleft;
379 	memcpy(evec->vector, entryvec->vector + FirstOffsetNumber,
380 		   sizeof(GISTENTRY) * evec->n);
381 	v->spl_ldatum = FunctionCall2Coll(&giststate->unionFn[attno],
382 									  giststate->supportCollation[attno],
383 									  PointerGetDatum(evec),
384 									  PointerGetDatum(&nbytes));
385 
386 	evec->n = v->spl_nright;
387 	memcpy(evec->vector, entryvec->vector + FirstOffsetNumber + v->spl_nleft,
388 		   sizeof(GISTENTRY) * evec->n);
389 	v->spl_rdatum = FunctionCall2Coll(&giststate->unionFn[attno],
390 									  giststate->supportCollation[attno],
391 									  PointerGetDatum(evec),
392 									  PointerGetDatum(&nbytes));
393 }
394 
395 /*
396  * Calls user picksplit method for attno column to split tuples into
397  * two vectors.
398  *
399  * Returns FALSE if split is complete (there are no more index columns, or
400  * there is no need to consider them because split is optimal already).
401  *
402  * Returns TRUE and v->spl_dontcare = NULL if the picksplit result is
403  * degenerate (all tuples seem to be don't-cares), so we should just
404  * disregard this column and split on the next column(s) instead.
405  *
406  * Returns TRUE and v->spl_dontcare != NULL if there are don't-care tuples
407  * that could be relocated based on the next column(s).  The don't-care
408  * tuples have been removed from the split and must be reinserted by caller.
409  * There is at least one non-don't-care tuple on each side of the split,
410  * and union keys for all columns are updated to include just those tuples.
411  *
412  * A TRUE result implies there is at least one more index column.
413  */
414 static bool
gistUserPicksplit(Relation r,GistEntryVector * entryvec,int attno,GistSplitVector * v,IndexTuple * itup,int len,GISTSTATE * giststate)415 gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVector *v,
416 				  IndexTuple *itup, int len, GISTSTATE *giststate)
417 {
418 	GIST_SPLITVEC *sv = &v->splitVector;
419 
420 	/*
421 	 * Prepare spl_ldatum/spl_rdatum/spl_ldatum_exists/spl_rdatum_exists in
422 	 * case we are doing a secondary split (see comments in gist.h).
423 	 */
424 	sv->spl_ldatum_exists = (v->spl_lisnull[attno]) ? false : true;
425 	sv->spl_rdatum_exists = (v->spl_risnull[attno]) ? false : true;
426 	sv->spl_ldatum = v->spl_lattr[attno];
427 	sv->spl_rdatum = v->spl_rattr[attno];
428 
429 	/*
430 	 * Let the opclass-specific PickSplit method do its thing.  Note that at
431 	 * this point we know there are no null keys in the entryvec.
432 	 */
433 	FunctionCall2Coll(&giststate->picksplitFn[attno],
434 					  giststate->supportCollation[attno],
435 					  PointerGetDatum(entryvec),
436 					  PointerGetDatum(sv));
437 
438 	if (sv->spl_nleft == 0 || sv->spl_nright == 0)
439 	{
440 		/*
441 		 * User-defined picksplit failed to create an actual split, ie it put
442 		 * everything on the same side.  Complain but cope.
443 		 */
444 		ereport(DEBUG1,
445 				(errcode(ERRCODE_INTERNAL_ERROR),
446 				 errmsg("picksplit method for column %d of index \"%s\" failed",
447 						attno + 1, RelationGetRelationName(r)),
448 				 errhint("The index is not optimal. To optimize it, contact a developer, or try to use the column as the second one in the CREATE INDEX command.")));
449 
450 		/*
451 		 * Reinit GIST_SPLITVEC. Although these fields are not used by
452 		 * genericPickSplit(), set them up for further processing
453 		 */
454 		sv->spl_ldatum_exists = (v->spl_lisnull[attno]) ? false : true;
455 		sv->spl_rdatum_exists = (v->spl_risnull[attno]) ? false : true;
456 		sv->spl_ldatum = v->spl_lattr[attno];
457 		sv->spl_rdatum = v->spl_rattr[attno];
458 
459 		/* Do a generic split */
460 		genericPickSplit(giststate, entryvec, sv, attno);
461 	}
462 	else
463 	{
464 		/* hack for compatibility with old picksplit API */
465 		if (sv->spl_left[sv->spl_nleft - 1] == InvalidOffsetNumber)
466 			sv->spl_left[sv->spl_nleft - 1] = (OffsetNumber) (entryvec->n - 1);
467 		if (sv->spl_right[sv->spl_nright - 1] == InvalidOffsetNumber)
468 			sv->spl_right[sv->spl_nright - 1] = (OffsetNumber) (entryvec->n - 1);
469 	}
470 
471 	/* Clean up if PickSplit didn't take care of a secondary split */
472 	if (sv->spl_ldatum_exists || sv->spl_rdatum_exists)
473 		supportSecondarySplit(r, giststate, attno, sv,
474 							  v->spl_lattr[attno], v->spl_rattr[attno]);
475 
476 	/* emit union datums computed by PickSplit back to v arrays */
477 	v->spl_lattr[attno] = sv->spl_ldatum;
478 	v->spl_rattr[attno] = sv->spl_rdatum;
479 	v->spl_lisnull[attno] = false;
480 	v->spl_risnull[attno] = false;
481 
482 	/*
483 	 * If index columns remain, then consider whether we can improve the split
484 	 * by using them.
485 	 */
486 	v->spl_dontcare = NULL;
487 
488 	if (attno + 1 < giststate->tupdesc->natts)
489 	{
490 		int			NumDontCare;
491 
492 		/*
493 		 * Make a quick check to see if left and right union keys are equal;
494 		 * if so, the split is certainly degenerate, so tell caller to
495 		 * re-split with the next column.
496 		 */
497 		if (gistKeyIsEQ(giststate, attno, sv->spl_ldatum, sv->spl_rdatum))
498 			return true;
499 
500 		/*
501 		 * Locate don't-care tuples, if any.  If there are none, the split is
502 		 * optimal, so just fall out and return false.
503 		 */
504 		v->spl_dontcare = (bool *) palloc0(sizeof(bool) * (entryvec->n + 1));
505 
506 		NumDontCare = findDontCares(r, giststate, entryvec->vector, v, attno);
507 
508 		if (NumDontCare > 0)
509 		{
510 			/*
511 			 * Remove don't-cares from spl_left[] and spl_right[].
512 			 */
513 			removeDontCares(sv->spl_left, &sv->spl_nleft, v->spl_dontcare);
514 			removeDontCares(sv->spl_right, &sv->spl_nright, v->spl_dontcare);
515 
516 			/*
517 			 * If all tuples on either side were don't-cares, the split is
518 			 * degenerate, and we're best off to ignore it and split on the
519 			 * next column.  (We used to try to press on with a secondary
520 			 * split by forcing a random tuple on each side to be treated as
521 			 * non-don't-care, but it seems unlikely that that technique
522 			 * really gives a better result.  Note that we don't want to try a
523 			 * secondary split with empty left or right primary split sides,
524 			 * because then there is no union key on that side for the
525 			 * PickSplit function to try to expand, so it can have no good
526 			 * figure of merit for what it's doing.  Also note that this check
527 			 * ensures we can't produce a bogus one-side-only split in the
528 			 * NumDontCare == 1 special case below.)
529 			 */
530 			if (sv->spl_nleft == 0 || sv->spl_nright == 0)
531 			{
532 				v->spl_dontcare = NULL;
533 				return true;
534 			}
535 
536 			/*
537 			 * Recompute union keys, considering only non-don't-care tuples.
538 			 * NOTE: this will set union keys for remaining index columns,
539 			 * which will cause later calls of gistUserPicksplit to pass those
540 			 * values down to user-defined PickSplit methods with
541 			 * spl_ldatum_exists/spl_rdatum_exists set true.
542 			 */
543 			gistunionsubkey(giststate, itup, v);
544 
545 			if (NumDontCare == 1)
546 			{
547 				/*
548 				 * If there's only one don't-care tuple then we can't do a
549 				 * PickSplit on it, so just choose whether to send it left or
550 				 * right by comparing penalties.  We needed the
551 				 * gistunionsubkey step anyway so that we have appropriate
552 				 * union keys for figuring the penalties.
553 				 */
554 				OffsetNumber toMove;
555 
556 				/* find it ... */
557 				for (toMove = FirstOffsetNumber; toMove < entryvec->n; toMove++)
558 				{
559 					if (v->spl_dontcare[toMove])
560 						break;
561 				}
562 				Assert(toMove < entryvec->n);
563 
564 				/* ... and assign it to cheaper side */
565 				placeOne(r, giststate, v, itup[toMove - 1], toMove, attno + 1);
566 
567 				/*
568 				 * At this point the union keys are wrong, but we don't care
569 				 * because we're done splitting.  The outermost recursion
570 				 * level of gistSplitByKey will fix things before returning.
571 				 */
572 			}
573 			else
574 				return true;
575 		}
576 	}
577 
578 	return false;
579 }
580 
581 /*
582  * simply split page in half
583  */
584 static void
gistSplitHalf(GIST_SPLITVEC * v,int len)585 gistSplitHalf(GIST_SPLITVEC *v, int len)
586 {
587 	int			i;
588 
589 	v->spl_nright = v->spl_nleft = 0;
590 	v->spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
591 	v->spl_right = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
592 	for (i = 1; i <= len; i++)
593 		if (i < len / 2)
594 			v->spl_right[v->spl_nright++] = i;
595 		else
596 			v->spl_left[v->spl_nleft++] = i;
597 
598 	/* we need not compute union keys, caller took care of it */
599 }
600 
601 /*
602  * gistSplitByKey: main entry point for page-splitting algorithm
603  *
604  * r: index relation
605  * page: page being split
606  * itup: array of IndexTuples to be processed
607  * len: number of IndexTuples to be processed (must be at least 2)
608  * giststate: additional info about index
609  * v: working state and output area
610  * attno: column we are working on (zero-based index)
611  *
612  * Outside caller must initialize v->spl_lisnull and v->spl_risnull arrays
613  * to all-TRUE.  On return, spl_left/spl_nleft contain indexes of tuples
614  * to go left, spl_right/spl_nright contain indexes of tuples to go right,
615  * spl_lattr/spl_lisnull contain left-side union key values, and
616  * spl_rattr/spl_risnull contain right-side union key values.  Other fields
617  * in this struct are workspace for this file.
618  *
619  * Outside caller must pass zero for attno.  The function may internally
620  * recurse to the next column by passing attno+1.
621  */
622 void
gistSplitByKey(Relation r,Page page,IndexTuple * itup,int len,GISTSTATE * giststate,GistSplitVector * v,int attno)623 gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len,
624 			   GISTSTATE *giststate, GistSplitVector *v, int attno)
625 {
626 	GistEntryVector *entryvec;
627 	OffsetNumber *offNullTuples;
628 	int			nOffNullTuples = 0;
629 	int			i;
630 
631 	/* generate the item array, and identify tuples with null keys */
632 	/* note that entryvec->vector[0] goes unused in this code */
633 	entryvec = palloc(GEVHDRSZ + (len + 1) * sizeof(GISTENTRY));
634 	entryvec->n = len + 1;
635 	offNullTuples = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
636 
637 	for (i = 1; i <= len; i++)
638 	{
639 		Datum		datum;
640 		bool		IsNull;
641 
642 		datum = index_getattr(itup[i - 1], attno + 1, giststate->tupdesc,
643 							  &IsNull);
644 		gistdentryinit(giststate, attno, &(entryvec->vector[i]),
645 					   datum, r, page, i,
646 					   FALSE, IsNull);
647 		if (IsNull)
648 			offNullTuples[nOffNullTuples++] = i;
649 	}
650 
651 	if (nOffNullTuples == len)
652 	{
653 		/*
654 		 * Corner case: All keys in attno column are null, so just transfer
655 		 * our attention to the next column.  If there's no next column, just
656 		 * split page in half.
657 		 */
658 		v->spl_risnull[attno] = v->spl_lisnull[attno] = TRUE;
659 
660 		if (attno + 1 < giststate->tupdesc->natts)
661 			gistSplitByKey(r, page, itup, len, giststate, v, attno + 1);
662 		else
663 			gistSplitHalf(&v->splitVector, len);
664 	}
665 	else if (nOffNullTuples > 0)
666 	{
667 		int			j = 0;
668 
669 		/*
670 		 * We don't want to mix NULL and not-NULL keys on one page, so split
671 		 * nulls to right page and not-nulls to left.
672 		 */
673 		v->splitVector.spl_right = offNullTuples;
674 		v->splitVector.spl_nright = nOffNullTuples;
675 		v->spl_risnull[attno] = TRUE;
676 
677 		v->splitVector.spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
678 		v->splitVector.spl_nleft = 0;
679 		for (i = 1; i <= len; i++)
680 			if (j < v->splitVector.spl_nright && offNullTuples[j] == i)
681 				j++;
682 			else
683 				v->splitVector.spl_left[v->splitVector.spl_nleft++] = i;
684 
685 		/* Compute union keys, unless outer recursion level will handle it */
686 		if (attno == 0 && giststate->tupdesc->natts == 1)
687 		{
688 			v->spl_dontcare = NULL;
689 			gistunionsubkey(giststate, itup, v);
690 		}
691 	}
692 	else
693 	{
694 		/*
695 		 * All keys are not-null, so apply user-defined PickSplit method
696 		 */
697 		if (gistUserPicksplit(r, entryvec, attno, v, itup, len, giststate))
698 		{
699 			/*
700 			 * Splitting on attno column is not optimal, so consider
701 			 * redistributing don't-care tuples according to the next column
702 			 */
703 			Assert(attno + 1 < giststate->tupdesc->natts);
704 
705 			if (v->spl_dontcare == NULL)
706 			{
707 				/*
708 				 * This split was actually degenerate, so ignore it altogether
709 				 * and just split according to the next column.
710 				 */
711 				gistSplitByKey(r, page, itup, len, giststate, v, attno + 1);
712 			}
713 			else
714 			{
715 				/*
716 				 * Form an array of just the don't-care tuples to pass to a
717 				 * recursive invocation of this function for the next column.
718 				 */
719 				IndexTuple *newitup = (IndexTuple *) palloc(len * sizeof(IndexTuple));
720 				OffsetNumber *map = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
721 				int			newlen = 0;
722 				GIST_SPLITVEC backupSplit;
723 
724 				for (i = 0; i < len; i++)
725 				{
726 					if (v->spl_dontcare[i + 1])
727 					{
728 						newitup[newlen] = itup[i];
729 						map[newlen] = i + 1;
730 						newlen++;
731 					}
732 				}
733 
734 				Assert(newlen > 0);
735 
736 				/*
737 				 * Make a backup copy of v->splitVector, since the recursive
738 				 * call will overwrite that with its own result.
739 				 */
740 				backupSplit = v->splitVector;
741 				backupSplit.spl_left = (OffsetNumber *) palloc(sizeof(OffsetNumber) * len);
742 				memcpy(backupSplit.spl_left, v->splitVector.spl_left, sizeof(OffsetNumber) * v->splitVector.spl_nleft);
743 				backupSplit.spl_right = (OffsetNumber *) palloc(sizeof(OffsetNumber) * len);
744 				memcpy(backupSplit.spl_right, v->splitVector.spl_right, sizeof(OffsetNumber) * v->splitVector.spl_nright);
745 
746 				/* Recursively decide how to split the don't-care tuples */
747 				gistSplitByKey(r, page, newitup, newlen, giststate, v, attno + 1);
748 
749 				/* Merge result of subsplit with non-don't-care tuples */
750 				for (i = 0; i < v->splitVector.spl_nleft; i++)
751 					backupSplit.spl_left[backupSplit.spl_nleft++] = map[v->splitVector.spl_left[i] - 1];
752 				for (i = 0; i < v->splitVector.spl_nright; i++)
753 					backupSplit.spl_right[backupSplit.spl_nright++] = map[v->splitVector.spl_right[i] - 1];
754 
755 				v->splitVector = backupSplit;
756 			}
757 		}
758 	}
759 
760 	/*
761 	 * If we're handling a multicolumn index, at the end of the recursion
762 	 * recompute the left and right union datums for all index columns.  This
763 	 * makes sure we hand back correct union datums in all corner cases,
764 	 * including when we haven't processed all columns to start with, or when
765 	 * a secondary split moved "don't care" tuples from one side to the other
766 	 * (we really shouldn't assume that that didn't change the union datums).
767 	 *
768 	 * Note: when we're in an internal recursion (attno > 0), we do not worry
769 	 * about whether the union datums we return with are sensible, since
770 	 * calling levels won't care.  Also, in a single-column index, we expect
771 	 * that PickSplit (or the special cases above) produced correct union
772 	 * datums.
773 	 */
774 	if (attno == 0 && giststate->tupdesc->natts > 1)
775 	{
776 		v->spl_dontcare = NULL;
777 		gistunionsubkey(giststate, itup, v);
778 	}
779 }
780