1 /*-------------------------------------------------------------------------
2  *
3  * nbtsplitloc.c
4  *	  Choose split point code for Postgres btree implementation.
5  *
6  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *	  src/backend/access/nbtree/nbtsplitloc.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include "access/nbtree.h"
18 #include "storage/lmgr.h"
19 
20 typedef enum
21 {
22 	/* strategy for searching through materialized list of split points */
23 	SPLIT_DEFAULT,				/* give some weight to truncation */
24 	SPLIT_MANY_DUPLICATES,		/* find minimally distinguishing point */
25 	SPLIT_SINGLE_VALUE			/* leave left page almost full */
26 } FindSplitStrat;
27 
28 typedef struct
29 {
30 	/* details of free space left by split */
31 	int16		curdelta;		/* current leftfree/rightfree delta */
32 	int16		leftfree;		/* space left on left page post-split */
33 	int16		rightfree;		/* space left on right page post-split */
34 
35 	/* split point identifying fields (returned by _bt_findsplitloc) */
36 	OffsetNumber firstrightoff; /* first origpage item on rightpage */
37 	bool		newitemonleft;	/* new item goes on left, or right? */
38 
39 } SplitPoint;
40 
41 typedef struct
42 {
43 	/* context data for _bt_recsplitloc */
44 	Relation	rel;			/* index relation */
45 	Page		origpage;		/* page undergoing split */
46 	IndexTuple	newitem;		/* new item (cause of page split) */
47 	Size		newitemsz;		/* size of newitem (includes line pointer) */
48 	bool		is_leaf;		/* T if splitting a leaf page */
49 	bool		is_rightmost;	/* T if splitting rightmost page on level */
50 	OffsetNumber newitemoff;	/* where the new item is to be inserted */
51 	int			leftspace;		/* space available for items on left page */
52 	int			rightspace;		/* space available for items on right page */
53 	int			olddataitemstotal;	/* space taken by old items */
54 	Size		minfirstrightsz;	/* smallest firstright size */
55 
56 	/* candidate split point data */
57 	int			maxsplits;		/* maximum number of splits */
58 	int			nsplits;		/* current number of splits */
59 	SplitPoint *splits;			/* all candidate split points for page */
60 	int			interval;		/* current range of acceptable split points */
61 } FindSplitData;
62 
63 static void _bt_recsplitloc(FindSplitData *state,
64 							OffsetNumber firstrightoff, bool newitemonleft,
65 							int olddataitemstoleft,
66 							Size firstrightofforigpagetuplesz);
67 static void _bt_deltasortsplits(FindSplitData *state, double fillfactormult,
68 								bool usemult);
69 static int	_bt_splitcmp(const void *arg1, const void *arg2);
70 static bool _bt_afternewitemoff(FindSplitData *state, OffsetNumber maxoff,
71 								int leaffillfactor, bool *usemult);
72 static bool _bt_adjacenthtid(ItemPointer lowhtid, ItemPointer highhtid);
73 static OffsetNumber _bt_bestsplitloc(FindSplitData *state, int perfectpenalty,
74 									 bool *newitemonleft, FindSplitStrat strategy);
75 static int	_bt_defaultinterval(FindSplitData *state);
76 static int	_bt_strategy(FindSplitData *state, SplitPoint *leftpage,
77 						 SplitPoint *rightpage, FindSplitStrat *strategy);
78 static void _bt_interval_edges(FindSplitData *state,
79 							   SplitPoint **leftinterval, SplitPoint **rightinterval);
80 static inline int _bt_split_penalty(FindSplitData *state, SplitPoint *split);
81 static inline IndexTuple _bt_split_lastleft(FindSplitData *state,
82 											SplitPoint *split);
83 static inline IndexTuple _bt_split_firstright(FindSplitData *state,
84 											  SplitPoint *split);
85 
86 
87 /*
88  *	_bt_findsplitloc() -- find an appropriate place to split a page.
89  *
90  * The main goal here is to equalize the free space that will be on each
91  * split page, *after accounting for the inserted tuple*.  (If we fail to
92  * account for it, we might find ourselves with too little room on the page
93  * that it needs to go into!)
94  *
95  * If the page is the rightmost page on its level, we instead try to arrange
96  * to leave the left split page fillfactor% full.  In this way, when we are
97  * inserting successively increasing keys (consider sequences, timestamps,
98  * etc) we will end up with a tree whose pages are about fillfactor% full,
99  * instead of the 50% full result that we'd get without this special case.
100  * This is the same as nbtsort.c produces for a newly-created tree.  Note
101  * that leaf and nonleaf pages use different fillfactors.  Note also that
102  * there are a number of further special cases where fillfactor is not
103  * applied in the standard way.
104  *
105  * We are passed the intended insert position of the new tuple, expressed as
106  * the offsetnumber of the tuple it must go in front of (this could be
107  * maxoff+1 if the tuple is to go at the end).  The new tuple itself is also
108  * passed, since it's needed to give some weight to how effective suffix
109  * truncation will be.  The implementation picks the split point that
110  * maximizes the effectiveness of suffix truncation from a small list of
111  * alternative candidate split points that leave each side of the split with
112  * about the same share of free space.  Suffix truncation is secondary to
113  * equalizing free space, except in cases with large numbers of duplicates.
114  * Note that it is always assumed that caller goes on to perform truncation,
115  * even with pg_upgrade'd indexes where that isn't actually the case
116  * (!heapkeyspace indexes).  See nbtree/README for more information about
117  * suffix truncation.
118  *
119  * We return the index of the first existing tuple that should go on the
120  * righthand page (which is called firstrightoff), plus a boolean
121  * indicating whether the new tuple goes on the left or right page.  You
122  * can think of the returned state as a point _between_ two adjacent data
123  * items (laftleft and firstright data items) on an imaginary version of
124  * origpage that already includes newitem.  The bool is necessary to
125  * disambiguate the case where firstrightoff == newitemoff (i.e. it is
126  * sometimes needed to determine if the firstright tuple for the split is
127  * newitem rather than the tuple from origpage at offset firstrightoff).
128  */
129 OffsetNumber
_bt_findsplitloc(Relation rel,Page origpage,OffsetNumber newitemoff,Size newitemsz,IndexTuple newitem,bool * newitemonleft)130 _bt_findsplitloc(Relation rel,
131 				 Page origpage,
132 				 OffsetNumber newitemoff,
133 				 Size newitemsz,
134 				 IndexTuple newitem,
135 				 bool *newitemonleft)
136 {
137 	BTPageOpaque opaque;
138 	int			leftspace,
139 				rightspace,
140 				olddataitemstotal,
141 				olddataitemstoleft,
142 				perfectpenalty,
143 				leaffillfactor;
144 	FindSplitData state;
145 	FindSplitStrat strategy;
146 	ItemId		itemid;
147 	OffsetNumber offnum,
148 				maxoff,
149 				firstrightoff;
150 	double		fillfactormult;
151 	bool		usemult;
152 	SplitPoint	leftpage,
153 				rightpage;
154 
155 	opaque = (BTPageOpaque) PageGetSpecialPointer(origpage);
156 	maxoff = PageGetMaxOffsetNumber(origpage);
157 
158 	/* Total free space available on a btree page, after fixed overhead */
159 	leftspace = rightspace =
160 		PageGetPageSize(origpage) - SizeOfPageHeaderData -
161 		MAXALIGN(sizeof(BTPageOpaqueData));
162 
163 	/* The right page will have the same high key as the old page */
164 	if (!P_RIGHTMOST(opaque))
165 	{
166 		itemid = PageGetItemId(origpage, P_HIKEY);
167 		rightspace -= (int) (MAXALIGN(ItemIdGetLength(itemid)) +
168 							 sizeof(ItemIdData));
169 	}
170 
171 	/* Count up total space in data items before actually scanning 'em */
172 	olddataitemstotal = rightspace - (int) PageGetExactFreeSpace(origpage);
173 	leaffillfactor = BTGetFillFactor(rel);
174 
175 	/* Passed-in newitemsz is MAXALIGNED but does not include line pointer */
176 	newitemsz += sizeof(ItemIdData);
177 	state.rel = rel;
178 	state.origpage = origpage;
179 	state.newitem = newitem;
180 	state.newitemsz = newitemsz;
181 	state.is_leaf = P_ISLEAF(opaque);
182 	state.is_rightmost = P_RIGHTMOST(opaque);
183 	state.leftspace = leftspace;
184 	state.rightspace = rightspace;
185 	state.olddataitemstotal = olddataitemstotal;
186 	state.minfirstrightsz = SIZE_MAX;
187 	state.newitemoff = newitemoff;
188 
189 	/* newitem cannot be a posting list item */
190 	Assert(!BTreeTupleIsPosting(newitem));
191 
192 	/*
193 	 * maxsplits should never exceed maxoff because there will be at most as
194 	 * many candidate split points as there are points _between_ tuples, once
195 	 * you imagine that the new item is already on the original page (the
196 	 * final number of splits may be slightly lower because not all points
197 	 * between tuples will be legal).
198 	 */
199 	state.maxsplits = maxoff;
200 	state.splits = palloc(sizeof(SplitPoint) * state.maxsplits);
201 	state.nsplits = 0;
202 
203 	/*
204 	 * Scan through the data items and calculate space usage for a split at
205 	 * each possible position
206 	 */
207 	olddataitemstoleft = 0;
208 
209 	for (offnum = P_FIRSTDATAKEY(opaque);
210 		 offnum <= maxoff;
211 		 offnum = OffsetNumberNext(offnum))
212 	{
213 		Size		itemsz;
214 
215 		itemid = PageGetItemId(origpage, offnum);
216 		itemsz = MAXALIGN(ItemIdGetLength(itemid)) + sizeof(ItemIdData);
217 
218 		/*
219 		 * When item offset number is not newitemoff, neither side of the
220 		 * split can be newitem.  Record a split after the previous data item
221 		 * from original page, but before the current data item from original
222 		 * page. (_bt_recsplitloc() will reject the split when there are no
223 		 * previous items, which we rely on.)
224 		 */
225 		if (offnum < newitemoff)
226 			_bt_recsplitloc(&state, offnum, false, olddataitemstoleft, itemsz);
227 		else if (offnum > newitemoff)
228 			_bt_recsplitloc(&state, offnum, true, olddataitemstoleft, itemsz);
229 		else
230 		{
231 			/*
232 			 * Record a split after all "offnum < newitemoff" original page
233 			 * data items, but before newitem
234 			 */
235 			_bt_recsplitloc(&state, offnum, false, olddataitemstoleft, itemsz);
236 
237 			/*
238 			 * Record a split after newitem, but before data item from
239 			 * original page at offset newitemoff/current offset
240 			 */
241 			_bt_recsplitloc(&state, offnum, true, olddataitemstoleft, itemsz);
242 		}
243 
244 		olddataitemstoleft += itemsz;
245 	}
246 
247 	/*
248 	 * Record a split after all original page data items, but before newitem.
249 	 * (Though only when it's possible that newitem will end up alone on new
250 	 * right page.)
251 	 */
252 	Assert(olddataitemstoleft == olddataitemstotal);
253 	if (newitemoff > maxoff)
254 		_bt_recsplitloc(&state, newitemoff, false, olddataitemstotal, 0);
255 
256 	/*
257 	 * I believe it is not possible to fail to find a feasible split, but just
258 	 * in case ...
259 	 */
260 	if (state.nsplits == 0)
261 		elog(ERROR, "could not find a feasible split point for index \"%s\"",
262 			 RelationGetRelationName(rel));
263 
264 	/*
265 	 * Start search for a split point among list of legal split points.  Give
266 	 * primary consideration to equalizing available free space in each half
267 	 * of the split initially (start with default strategy), while applying
268 	 * rightmost and split-after-new-item optimizations where appropriate.
269 	 * Either of the two other fallback strategies may be required for cases
270 	 * with a large number of duplicates around the original/space-optimal
271 	 * split point.
272 	 *
273 	 * Default strategy gives some weight to suffix truncation in deciding a
274 	 * split point on leaf pages.  It attempts to select a split point where a
275 	 * distinguishing attribute appears earlier in the new high key for the
276 	 * left side of the split, in order to maximize the number of trailing
277 	 * attributes that can be truncated away.  Only candidate split points
278 	 * that imply an acceptable balance of free space on each side are
279 	 * considered.  See _bt_defaultinterval().
280 	 */
281 	if (!state.is_leaf)
282 	{
283 		/* fillfactormult only used on rightmost page */
284 		usemult = state.is_rightmost;
285 		fillfactormult = BTREE_NONLEAF_FILLFACTOR / 100.0;
286 	}
287 	else if (state.is_rightmost)
288 	{
289 		/* Rightmost leaf page --  fillfactormult always used */
290 		usemult = true;
291 		fillfactormult = leaffillfactor / 100.0;
292 	}
293 	else if (_bt_afternewitemoff(&state, maxoff, leaffillfactor, &usemult))
294 	{
295 		/*
296 		 * New item inserted at rightmost point among a localized grouping on
297 		 * a leaf page -- apply "split after new item" optimization, either by
298 		 * applying leaf fillfactor multiplier, or by choosing the exact split
299 		 * point that leaves newitem as lastleft. (usemult is set for us.)
300 		 */
301 		if (usemult)
302 		{
303 			/* fillfactormult should be set based on leaf fillfactor */
304 			fillfactormult = leaffillfactor / 100.0;
305 		}
306 		else
307 		{
308 			/* find precise split point after newitemoff */
309 			for (int i = 0; i < state.nsplits; i++)
310 			{
311 				SplitPoint *split = state.splits + i;
312 
313 				if (split->newitemonleft &&
314 					newitemoff == split->firstrightoff)
315 				{
316 					pfree(state.splits);
317 					*newitemonleft = true;
318 					return newitemoff;
319 				}
320 			}
321 
322 			/*
323 			 * Cannot legally split after newitemoff; proceed with split
324 			 * without using fillfactor multiplier.  This is defensive, and
325 			 * should never be needed in practice.
326 			 */
327 			fillfactormult = 0.50;
328 		}
329 	}
330 	else
331 	{
332 		/* Other leaf page.  50:50 page split. */
333 		usemult = false;
334 		/* fillfactormult not used, but be tidy */
335 		fillfactormult = 0.50;
336 	}
337 
338 	/*
339 	 * Save leftmost and rightmost splits for page before original ordinal
340 	 * sort order is lost by delta/fillfactormult sort
341 	 */
342 	leftpage = state.splits[0];
343 	rightpage = state.splits[state.nsplits - 1];
344 
345 	/* Give split points a fillfactormult-wise delta, and sort on deltas */
346 	_bt_deltasortsplits(&state, fillfactormult, usemult);
347 
348 	/* Determine split interval for default strategy */
349 	state.interval = _bt_defaultinterval(&state);
350 
351 	/*
352 	 * Determine if default strategy/split interval will produce a
353 	 * sufficiently distinguishing split, or if we should change strategies.
354 	 * Alternative strategies change the range of split points that are
355 	 * considered acceptable (split interval), and possibly change
356 	 * fillfactormult, in order to deal with pages with a large number of
357 	 * duplicates gracefully.
358 	 *
359 	 * Pass low and high splits for the entire page (actually, they're for an
360 	 * imaginary version of the page that includes newitem).  These are used
361 	 * when the initial split interval encloses split points that are full of
362 	 * duplicates, and we need to consider if it's even possible to avoid
363 	 * appending a heap TID.
364 	 */
365 	perfectpenalty = _bt_strategy(&state, &leftpage, &rightpage, &strategy);
366 
367 	if (strategy == SPLIT_DEFAULT)
368 	{
369 		/*
370 		 * Default strategy worked out (always works out with internal page).
371 		 * Original split interval still stands.
372 		 */
373 	}
374 
375 	/*
376 	 * Many duplicates strategy is used when a heap TID would otherwise be
377 	 * appended, but the page isn't completely full of logical duplicates.
378 	 *
379 	 * The split interval is widened to include all legal candidate split
380 	 * points.  There might be a few as two distinct values in the whole-page
381 	 * split interval, though it's also possible that most of the values on
382 	 * the page are unique.  The final split point will either be to the
383 	 * immediate left or to the immediate right of the group of duplicate
384 	 * tuples that enclose the first/delta-optimal split point (perfect
385 	 * penalty was set so that the lowest delta split point that avoids
386 	 * appending a heap TID will be chosen).  Maximizing the number of
387 	 * attributes that can be truncated away is not a goal of the many
388 	 * duplicates strategy.
389 	 *
390 	 * Single value strategy is used when it is impossible to avoid appending
391 	 * a heap TID.  It arranges to leave the left page very full.  This
392 	 * maximizes space utilization in cases where tuples with the same
393 	 * attribute values span many pages.  Newly inserted duplicates will tend
394 	 * to have higher heap TID values, so we'll end up splitting to the right
395 	 * consistently.  (Single value strategy is harmless though not
396 	 * particularly useful with !heapkeyspace indexes.)
397 	 */
398 	else if (strategy == SPLIT_MANY_DUPLICATES)
399 	{
400 		Assert(state.is_leaf);
401 		/* Shouldn't try to truncate away extra user attributes */
402 		Assert(perfectpenalty ==
403 			   IndexRelationGetNumberOfKeyAttributes(state.rel));
404 		/* No need to resort splits -- no change in fillfactormult/deltas */
405 		state.interval = state.nsplits;
406 	}
407 	else if (strategy == SPLIT_SINGLE_VALUE)
408 	{
409 		Assert(state.is_leaf);
410 		/* Split near the end of the page */
411 		usemult = true;
412 		fillfactormult = BTREE_SINGLEVAL_FILLFACTOR / 100.0;
413 		/* Resort split points with new delta */
414 		_bt_deltasortsplits(&state, fillfactormult, usemult);
415 		/* Appending a heap TID is unavoidable, so interval of 1 is fine */
416 		state.interval = 1;
417 	}
418 
419 	/*
420 	 * Search among acceptable split points (using final split interval) for
421 	 * the entry that has the lowest penalty, and is therefore expected to
422 	 * maximize fan-out.  Sets *newitemonleft for us.
423 	 */
424 	firstrightoff = _bt_bestsplitloc(&state, perfectpenalty, newitemonleft,
425 									 strategy);
426 	pfree(state.splits);
427 
428 	return firstrightoff;
429 }
430 
431 /*
432  * Subroutine to record a particular point between two tuples (possibly the
433  * new item) on page (ie, combination of firstrightoff and newitemonleft
434  * settings) in *state for later analysis.  This is also a convenient point to
435  * check if the split is legal (if it isn't, it won't be recorded).
436  *
437  * firstrightoff is the offset of the first item on the original page that
438  * goes to the right page, and firstrightofforigpagetuplesz is the size of
439  * that tuple.  firstrightoff can be > max offset, which means that all the
440  * old items go to the left page and only the new item goes to the right page.
441  * We don't actually use firstrightofforigpagetuplesz in that case (actually,
442  * we don't use it for _any_ split where the firstright tuple happens to be
443  * newitem).
444  *
445  * olddataitemstoleft is the total size of all old items to the left of the
446  * split point that is recorded here when legal.  Should not include
447  * newitemsz, since that is handled here.
448  */
449 static void
_bt_recsplitloc(FindSplitData * state,OffsetNumber firstrightoff,bool newitemonleft,int olddataitemstoleft,Size firstrightofforigpagetuplesz)450 _bt_recsplitloc(FindSplitData *state,
451 				OffsetNumber firstrightoff,
452 				bool newitemonleft,
453 				int olddataitemstoleft,
454 				Size firstrightofforigpagetuplesz)
455 {
456 	int16		leftfree,
457 				rightfree;
458 	Size		firstrightsz;
459 	Size		postingsz = 0;
460 	bool		newitemisfirstright;
461 
462 	/* Is the new item going to be split point's firstright tuple? */
463 	newitemisfirstright = (firstrightoff == state->newitemoff &&
464 						   !newitemonleft);
465 
466 	if (newitemisfirstright)
467 		firstrightsz = state->newitemsz;
468 	else
469 	{
470 		firstrightsz = firstrightofforigpagetuplesz;
471 
472 		/*
473 		 * Calculate suffix truncation space saving when firstright tuple is a
474 		 * posting list tuple, though only when the tuple is over 64 bytes
475 		 * including line pointer overhead (arbitrary).  This avoids accessing
476 		 * the tuple in cases where its posting list must be very small (if
477 		 * tuple has one at all).
478 		 *
479 		 * Note: We don't do this in the case where firstright tuple is
480 		 * newitem, since newitem cannot have a posting list.
481 		 */
482 		if (state->is_leaf && firstrightsz > 64)
483 		{
484 			ItemId		itemid;
485 			IndexTuple	newhighkey;
486 
487 			itemid = PageGetItemId(state->origpage, firstrightoff);
488 			newhighkey = (IndexTuple) PageGetItem(state->origpage, itemid);
489 
490 			if (BTreeTupleIsPosting(newhighkey))
491 				postingsz = IndexTupleSize(newhighkey) -
492 					BTreeTupleGetPostingOffset(newhighkey);
493 		}
494 	}
495 
496 	/* Account for all the old tuples */
497 	leftfree = state->leftspace - olddataitemstoleft;
498 	rightfree = state->rightspace -
499 		(state->olddataitemstotal - olddataitemstoleft);
500 
501 	/*
502 	 * The first item on the right page becomes the high key of the left page;
503 	 * therefore it counts against left space as well as right space (we
504 	 * cannot assume that suffix truncation will make it any smaller).  When
505 	 * index has included attributes, then those attributes of left page high
506 	 * key will be truncated leaving that page with slightly more free space.
507 	 * However, that shouldn't affect our ability to find valid split
508 	 * location, since we err in the direction of being pessimistic about free
509 	 * space on the left half.  Besides, even when suffix truncation of
510 	 * non-TID attributes occurs, the new high key often won't even be a
511 	 * single MAXALIGN() quantum smaller than the firstright tuple it's based
512 	 * on.
513 	 *
514 	 * If we are on the leaf level, assume that suffix truncation cannot avoid
515 	 * adding a heap TID to the left half's new high key when splitting at the
516 	 * leaf level.  In practice the new high key will often be smaller and
517 	 * will rarely be larger, but conservatively assume the worst case.  We do
518 	 * go to the trouble of subtracting away posting list overhead, though
519 	 * only when it looks like it will make an appreciable difference.
520 	 * (Posting lists are the only case where truncation will typically make
521 	 * the final high key far smaller than firstright, so being a bit more
522 	 * precise there noticeably improves the balance of free space.)
523 	 */
524 	if (state->is_leaf)
525 		leftfree -= (int16) (firstrightsz +
526 							 MAXALIGN(sizeof(ItemPointerData)) -
527 							 postingsz);
528 	else
529 		leftfree -= (int16) firstrightsz;
530 
531 	/* account for the new item */
532 	if (newitemonleft)
533 		leftfree -= (int16) state->newitemsz;
534 	else
535 		rightfree -= (int16) state->newitemsz;
536 
537 	/*
538 	 * If we are not on the leaf level, we will be able to discard the key
539 	 * data from the first item that winds up on the right page.
540 	 */
541 	if (!state->is_leaf)
542 		rightfree += (int16) firstrightsz -
543 			(int16) (MAXALIGN(sizeof(IndexTupleData)) + sizeof(ItemIdData));
544 
545 	/* Record split if legal */
546 	if (leftfree >= 0 && rightfree >= 0)
547 	{
548 		Assert(state->nsplits < state->maxsplits);
549 
550 		/* Determine smallest firstright tuple size among legal splits */
551 		state->minfirstrightsz = Min(state->minfirstrightsz, firstrightsz);
552 
553 		state->splits[state->nsplits].curdelta = 0;
554 		state->splits[state->nsplits].leftfree = leftfree;
555 		state->splits[state->nsplits].rightfree = rightfree;
556 		state->splits[state->nsplits].firstrightoff = firstrightoff;
557 		state->splits[state->nsplits].newitemonleft = newitemonleft;
558 		state->nsplits++;
559 	}
560 }
561 
562 /*
563  * Subroutine to assign space deltas to materialized array of candidate split
564  * points based on current fillfactor, and to sort array using that fillfactor
565  */
566 static void
_bt_deltasortsplits(FindSplitData * state,double fillfactormult,bool usemult)567 _bt_deltasortsplits(FindSplitData *state, double fillfactormult,
568 					bool usemult)
569 {
570 	for (int i = 0; i < state->nsplits; i++)
571 	{
572 		SplitPoint *split = state->splits + i;
573 		int16		delta;
574 
575 		if (usemult)
576 			delta = fillfactormult * split->leftfree -
577 				(1.0 - fillfactormult) * split->rightfree;
578 		else
579 			delta = split->leftfree - split->rightfree;
580 
581 		if (delta < 0)
582 			delta = -delta;
583 
584 		/* Save delta */
585 		split->curdelta = delta;
586 	}
587 
588 	qsort(state->splits, state->nsplits, sizeof(SplitPoint), _bt_splitcmp);
589 }
590 
591 /*
592  * qsort-style comparator used by _bt_deltasortsplits()
593  */
594 static int
_bt_splitcmp(const void * arg1,const void * arg2)595 _bt_splitcmp(const void *arg1, const void *arg2)
596 {
597 	SplitPoint *split1 = (SplitPoint *) arg1;
598 	SplitPoint *split2 = (SplitPoint *) arg2;
599 
600 	if (split1->curdelta > split2->curdelta)
601 		return 1;
602 	if (split1->curdelta < split2->curdelta)
603 		return -1;
604 
605 	return 0;
606 }
607 
608 /*
609  * Subroutine to determine whether or not a non-rightmost leaf page should be
610  * split immediately after the would-be original page offset for the
611  * new/incoming tuple (or should have leaf fillfactor applied when new item is
612  * to the right on original page).  This is appropriate when there is a
613  * pattern of localized monotonically increasing insertions into a composite
614  * index, where leading attribute values form local groupings, and we
615  * anticipate further insertions of the same/current grouping (new item's
616  * grouping) in the near future.  This can be thought of as a variation on
617  * applying leaf fillfactor during rightmost leaf page splits, since cases
618  * that benefit will converge on packing leaf pages leaffillfactor% full over
619  * time.
620  *
621  * We may leave extra free space remaining on the rightmost page of a "most
622  * significant column" grouping of tuples if that grouping never ends up
623  * having future insertions that use the free space.  That effect is
624  * self-limiting; a future grouping that becomes the "nearest on the right"
625  * grouping of the affected grouping usually puts the extra free space to good
626  * use.
627  *
628  * Caller uses optimization when routine returns true, though the exact action
629  * taken by caller varies.  Caller uses original leaf page fillfactor in
630  * standard way rather than using the new item offset directly when *usemult
631  * was also set to true here.  Otherwise, caller applies optimization by
632  * locating the legal split point that makes the new tuple the lastleft tuple
633  * for the split.
634  */
635 static bool
_bt_afternewitemoff(FindSplitData * state,OffsetNumber maxoff,int leaffillfactor,bool * usemult)636 _bt_afternewitemoff(FindSplitData *state, OffsetNumber maxoff,
637 					int leaffillfactor, bool *usemult)
638 {
639 	int16		nkeyatts;
640 	ItemId		itemid;
641 	IndexTuple	tup;
642 	int			keepnatts;
643 
644 	Assert(state->is_leaf && !state->is_rightmost);
645 
646 	nkeyatts = IndexRelationGetNumberOfKeyAttributes(state->rel);
647 
648 	/* Single key indexes not considered here */
649 	if (nkeyatts == 1)
650 		return false;
651 
652 	/* Ascending insertion pattern never inferred when new item is first */
653 	if (state->newitemoff == P_FIRSTKEY)
654 		return false;
655 
656 	/*
657 	 * Only apply optimization on pages with equisized tuples, since ordinal
658 	 * keys are likely to be fixed-width.  Testing if the new tuple is
659 	 * variable width directly might also work, but that fails to apply the
660 	 * optimization to indexes with a numeric_ops attribute.
661 	 *
662 	 * Conclude that page has equisized tuples when the new item is the same
663 	 * width as the smallest item observed during pass over page, and other
664 	 * non-pivot tuples must be the same width as well.  (Note that the
665 	 * possibly-truncated existing high key isn't counted in
666 	 * olddataitemstotal, and must be subtracted from maxoff.)
667 	 */
668 	if (state->newitemsz != state->minfirstrightsz)
669 		return false;
670 	if (state->newitemsz * (maxoff - 1) != state->olddataitemstotal)
671 		return false;
672 
673 	/*
674 	 * Avoid applying optimization when tuples are wider than a tuple
675 	 * consisting of two non-NULL int8/int64 attributes (or four non-NULL
676 	 * int4/int32 attributes)
677 	 */
678 	if (state->newitemsz >
679 		MAXALIGN(sizeof(IndexTupleData) + sizeof(int64) * 2) +
680 		sizeof(ItemIdData))
681 		return false;
682 
683 	/*
684 	 * At least the first attribute's value must be equal to the corresponding
685 	 * value in previous tuple to apply optimization.  New item cannot be a
686 	 * duplicate, either.
687 	 *
688 	 * Handle case where new item is to the right of all items on the existing
689 	 * page.  This is suggestive of monotonically increasing insertions in
690 	 * itself, so the "heap TID adjacency" test is not applied here.
691 	 */
692 	if (state->newitemoff > maxoff)
693 	{
694 		itemid = PageGetItemId(state->origpage, maxoff);
695 		tup = (IndexTuple) PageGetItem(state->origpage, itemid);
696 		keepnatts = _bt_keep_natts_fast(state->rel, tup, state->newitem);
697 
698 		if (keepnatts > 1 && keepnatts <= nkeyatts)
699 		{
700 			*usemult = true;
701 			return true;
702 		}
703 
704 		return false;
705 	}
706 
707 	/*
708 	 * "Low cardinality leading column, high cardinality suffix column"
709 	 * indexes with a random insertion pattern (e.g., an index with a boolean
710 	 * column, such as an index on '(book_is_in_print, book_isbn)') present us
711 	 * with a risk of consistently misapplying the optimization.  We're
712 	 * willing to accept very occasional misapplication of the optimization,
713 	 * provided the cases where we get it wrong are rare and self-limiting.
714 	 *
715 	 * Heap TID adjacency strongly suggests that the item just to the left was
716 	 * inserted very recently, which limits overapplication of the
717 	 * optimization.  Besides, all inappropriate cases triggered here will
718 	 * still split in the middle of the page on average.
719 	 */
720 	itemid = PageGetItemId(state->origpage, OffsetNumberPrev(state->newitemoff));
721 	tup = (IndexTuple) PageGetItem(state->origpage, itemid);
722 	/* Do cheaper test first */
723 	if (BTreeTupleIsPosting(tup) ||
724 		!_bt_adjacenthtid(&tup->t_tid, &state->newitem->t_tid))
725 		return false;
726 	/* Check same conditions as rightmost item case, too */
727 	keepnatts = _bt_keep_natts_fast(state->rel, tup, state->newitem);
728 
729 	if (keepnatts > 1 && keepnatts <= nkeyatts)
730 	{
731 		double		interp = (double) state->newitemoff / ((double) maxoff + 1);
732 		double		leaffillfactormult = (double) leaffillfactor / 100.0;
733 
734 		/*
735 		 * Don't allow caller to split after a new item when it will result in
736 		 * a split point to the right of the point that a leaf fillfactor
737 		 * split would use -- have caller apply leaf fillfactor instead
738 		 */
739 		*usemult = interp > leaffillfactormult;
740 
741 		return true;
742 	}
743 
744 	return false;
745 }
746 
747 /*
748  * Subroutine for determining if two heap TIDS are "adjacent".
749  *
750  * Adjacent means that the high TID is very likely to have been inserted into
751  * heap relation immediately after the low TID, probably during the current
752  * transaction.
753  */
754 static bool
_bt_adjacenthtid(ItemPointer lowhtid,ItemPointer highhtid)755 _bt_adjacenthtid(ItemPointer lowhtid, ItemPointer highhtid)
756 {
757 	BlockNumber lowblk,
758 				highblk;
759 
760 	lowblk = ItemPointerGetBlockNumber(lowhtid);
761 	highblk = ItemPointerGetBlockNumber(highhtid);
762 
763 	/* Make optimistic assumption of adjacency when heap blocks match */
764 	if (lowblk == highblk)
765 		return true;
766 
767 	/* When heap block one up, second offset should be FirstOffsetNumber */
768 	if (lowblk + 1 == highblk &&
769 		ItemPointerGetOffsetNumber(highhtid) == FirstOffsetNumber)
770 		return true;
771 
772 	return false;
773 }
774 
775 /*
776  * Subroutine to find the "best" split point among candidate split points.
777  * The best split point is the split point with the lowest penalty among split
778  * points that fall within current/final split interval.  Penalty is an
779  * abstract score, with a definition that varies depending on whether we're
780  * splitting a leaf page or an internal page.  See _bt_split_penalty() for
781  * details.
782  *
783  * "perfectpenalty" is assumed to be the lowest possible penalty among
784  * candidate split points.  This allows us to return early without wasting
785  * cycles on calculating the first differing attribute for all candidate
786  * splits when that clearly cannot improve our choice (or when we only want a
787  * minimally distinguishing split point, and don't want to make the split any
788  * more unbalanced than is necessary).
789  *
790  * We return the index of the first existing tuple that should go on the right
791  * page, plus a boolean indicating if new item is on left of split point.
792  */
793 static OffsetNumber
_bt_bestsplitloc(FindSplitData * state,int perfectpenalty,bool * newitemonleft,FindSplitStrat strategy)794 _bt_bestsplitloc(FindSplitData *state, int perfectpenalty,
795 				 bool *newitemonleft, FindSplitStrat strategy)
796 {
797 	int			bestpenalty,
798 				lowsplit;
799 	int			highsplit = Min(state->interval, state->nsplits);
800 	SplitPoint *final;
801 
802 	bestpenalty = INT_MAX;
803 	lowsplit = 0;
804 	for (int i = lowsplit; i < highsplit; i++)
805 	{
806 		int			penalty;
807 
808 		penalty = _bt_split_penalty(state, state->splits + i);
809 
810 		if (penalty < bestpenalty)
811 		{
812 			bestpenalty = penalty;
813 			lowsplit = i;
814 		}
815 
816 		if (penalty <= perfectpenalty)
817 			break;
818 	}
819 
820 	final = &state->splits[lowsplit];
821 
822 	/*
823 	 * There is a risk that the "many duplicates" strategy will repeatedly do
824 	 * the wrong thing when there are monotonically decreasing insertions to
825 	 * the right of a large group of duplicates.   Repeated splits could leave
826 	 * a succession of right half pages with free space that can never be
827 	 * used.  This must be avoided.
828 	 *
829 	 * Consider the example of the leftmost page in a single integer attribute
830 	 * NULLS FIRST index which is almost filled with NULLs.  Monotonically
831 	 * decreasing integer insertions might cause the same leftmost page to
832 	 * split repeatedly at the same point.  Each split derives its new high
833 	 * key from the lowest current value to the immediate right of the large
834 	 * group of NULLs, which will always be higher than all future integer
835 	 * insertions, directing all future integer insertions to the same
836 	 * leftmost page.
837 	 */
838 	if (strategy == SPLIT_MANY_DUPLICATES && !state->is_rightmost &&
839 		!final->newitemonleft && final->firstrightoff >= state->newitemoff &&
840 		final->firstrightoff < state->newitemoff + 9)
841 	{
842 		/*
843 		 * Avoid the problem by performing a 50:50 split when the new item is
844 		 * just to the right of the would-be "many duplicates" split point.
845 		 * (Note that the test used for an insert that is "just to the right"
846 		 * of the split point is conservative.)
847 		 */
848 		final = &state->splits[0];
849 	}
850 
851 	*newitemonleft = final->newitemonleft;
852 	return final->firstrightoff;
853 }
854 
855 #define LEAF_SPLIT_DISTANCE			0.050
856 #define INTERNAL_SPLIT_DISTANCE		0.075
857 
858 /*
859  * Return a split interval to use for the default strategy.  This is a limit
860  * on the number of candidate split points to give further consideration to.
861  * Only a fraction of all candidate splits points (those located at the start
862  * of the now-sorted splits array) fall within the split interval.  Split
863  * interval is applied within _bt_bestsplitloc().
864  *
865  * Split interval represents an acceptable range of split points -- those that
866  * have leftfree and rightfree values that are acceptably balanced.  The final
867  * split point chosen is the split point with the lowest "penalty" among split
868  * points in this split interval (unless we change our entire strategy, in
869  * which case the interval also changes -- see _bt_strategy()).
870  *
871  * The "Prefix B-Trees" paper calls split interval sigma l for leaf splits,
872  * and sigma b for internal ("branch") splits.  It's hard to provide a
873  * theoretical justification for the size of the split interval, though it's
874  * clear that a small split interval can make tuples on level L+1 much smaller
875  * on average, without noticeably affecting space utilization on level L.
876  * (Note that the way that we calculate split interval might need to change if
877  * suffix truncation is taught to truncate tuples "within" the last
878  * attribute/datum for data types like text, which is more or less how it is
879  * assumed to work in the paper.)
880  */
881 static int
_bt_defaultinterval(FindSplitData * state)882 _bt_defaultinterval(FindSplitData *state)
883 {
884 	SplitPoint *spaceoptimal;
885 	int16		tolerance,
886 				lowleftfree,
887 				lowrightfree,
888 				highleftfree,
889 				highrightfree;
890 
891 	/*
892 	 * Determine leftfree and rightfree values that are higher and lower than
893 	 * we're willing to tolerate.  Note that the final split interval will be
894 	 * about 10% of nsplits in the common case where all non-pivot tuples
895 	 * (data items) from a leaf page are uniformly sized.  We're a bit more
896 	 * aggressive when splitting internal pages.
897 	 */
898 	if (state->is_leaf)
899 		tolerance = state->olddataitemstotal * LEAF_SPLIT_DISTANCE;
900 	else
901 		tolerance = state->olddataitemstotal * INTERNAL_SPLIT_DISTANCE;
902 
903 	/* First candidate split point is the most evenly balanced */
904 	spaceoptimal = state->splits;
905 	lowleftfree = spaceoptimal->leftfree - tolerance;
906 	lowrightfree = spaceoptimal->rightfree - tolerance;
907 	highleftfree = spaceoptimal->leftfree + tolerance;
908 	highrightfree = spaceoptimal->rightfree + tolerance;
909 
910 	/*
911 	 * Iterate through split points, starting from the split immediately after
912 	 * 'spaceoptimal'.  Find the first split point that divides free space so
913 	 * unevenly that including it in the split interval would be unacceptable.
914 	 */
915 	for (int i = 1; i < state->nsplits; i++)
916 	{
917 		SplitPoint *split = state->splits + i;
918 
919 		/* Cannot use curdelta here, since its value is often weighted */
920 		if (split->leftfree < lowleftfree || split->rightfree < lowrightfree ||
921 			split->leftfree > highleftfree || split->rightfree > highrightfree)
922 			return i;
923 	}
924 
925 	return state->nsplits;
926 }
927 
928 /*
929  * Subroutine to decide whether split should use default strategy/initial
930  * split interval, or whether it should finish splitting the page using
931  * alternative strategies (this is only possible with leaf pages).
932  *
933  * Caller uses alternative strategy (or sticks with default strategy) based
934  * on how *strategy is set here.  Return value is "perfect penalty", which is
935  * passed to _bt_bestsplitloc() as a final constraint on how far caller is
936  * willing to go to avoid appending a heap TID when using the many duplicates
937  * strategy (it also saves _bt_bestsplitloc() useless cycles).
938  */
939 static int
_bt_strategy(FindSplitData * state,SplitPoint * leftpage,SplitPoint * rightpage,FindSplitStrat * strategy)940 _bt_strategy(FindSplitData *state, SplitPoint *leftpage,
941 			 SplitPoint *rightpage, FindSplitStrat *strategy)
942 {
943 	IndexTuple	leftmost,
944 				rightmost;
945 	SplitPoint *leftinterval,
946 			   *rightinterval;
947 	int			perfectpenalty;
948 	int			indnkeyatts = IndexRelationGetNumberOfKeyAttributes(state->rel);
949 
950 	/* Assume that alternative strategy won't be used for now */
951 	*strategy = SPLIT_DEFAULT;
952 
953 	/*
954 	 * Use smallest observed firstright item size for entire page (actually,
955 	 * entire imaginary version of page that includes newitem) as perfect
956 	 * penalty on internal pages.  This can save cycles in the common case
957 	 * where most or all splits (not just splits within interval) have
958 	 * firstright tuples that are the same size.
959 	 */
960 	if (!state->is_leaf)
961 		return state->minfirstrightsz;
962 
963 	/*
964 	 * Use leftmost and rightmost tuples from leftmost and rightmost splits in
965 	 * current split interval
966 	 */
967 	_bt_interval_edges(state, &leftinterval, &rightinterval);
968 	leftmost = _bt_split_lastleft(state, leftinterval);
969 	rightmost = _bt_split_firstright(state, rightinterval);
970 
971 	/*
972 	 * If initial split interval can produce a split point that will at least
973 	 * avoid appending a heap TID in new high key, we're done.  Finish split
974 	 * with default strategy and initial split interval.
975 	 */
976 	perfectpenalty = _bt_keep_natts_fast(state->rel, leftmost, rightmost);
977 	if (perfectpenalty <= indnkeyatts)
978 		return perfectpenalty;
979 
980 	/*
981 	 * Work out how caller should finish split when even their "perfect"
982 	 * penalty for initial/default split interval indicates that the interval
983 	 * does not contain even a single split that avoids appending a heap TID.
984 	 *
985 	 * Use the leftmost split's lastleft tuple and the rightmost split's
986 	 * firstright tuple to assess every possible split.
987 	 */
988 	leftmost = _bt_split_lastleft(state, leftpage);
989 	rightmost = _bt_split_firstright(state, rightpage);
990 
991 	/*
992 	 * If page (including new item) has many duplicates but is not entirely
993 	 * full of duplicates, a many duplicates strategy split will be performed.
994 	 * If page is entirely full of duplicates, a single value strategy split
995 	 * will be performed.
996 	 */
997 	perfectpenalty = _bt_keep_natts_fast(state->rel, leftmost, rightmost);
998 	if (perfectpenalty <= indnkeyatts)
999 	{
1000 		*strategy = SPLIT_MANY_DUPLICATES;
1001 
1002 		/*
1003 		 * Many duplicates strategy should split at either side the group of
1004 		 * duplicates that enclose the delta-optimal split point.  Return
1005 		 * indnkeyatts rather than the true perfect penalty to make that
1006 		 * happen.  (If perfectpenalty was returned here then low cardinality
1007 		 * composite indexes could have continual unbalanced splits.)
1008 		 *
1009 		 * Note that caller won't go through with a many duplicates split in
1010 		 * rare cases where it looks like there are ever-decreasing insertions
1011 		 * to the immediate right of the split point.  This must happen just
1012 		 * before a final decision is made, within _bt_bestsplitloc().
1013 		 */
1014 		return indnkeyatts;
1015 	}
1016 
1017 	/*
1018 	 * Single value strategy is only appropriate with ever-increasing heap
1019 	 * TIDs; otherwise, original default strategy split should proceed to
1020 	 * avoid pathological performance.  Use page high key to infer if this is
1021 	 * the rightmost page among pages that store the same duplicate value.
1022 	 * This should not prevent insertions of heap TIDs that are slightly out
1023 	 * of order from using single value strategy, since that's expected with
1024 	 * concurrent inserters of the same duplicate value.
1025 	 */
1026 	else if (state->is_rightmost)
1027 		*strategy = SPLIT_SINGLE_VALUE;
1028 	else
1029 	{
1030 		ItemId		itemid;
1031 		IndexTuple	hikey;
1032 
1033 		itemid = PageGetItemId(state->origpage, P_HIKEY);
1034 		hikey = (IndexTuple) PageGetItem(state->origpage, itemid);
1035 		perfectpenalty = _bt_keep_natts_fast(state->rel, hikey,
1036 											 state->newitem);
1037 		if (perfectpenalty <= indnkeyatts)
1038 			*strategy = SPLIT_SINGLE_VALUE;
1039 		else
1040 		{
1041 			/*
1042 			 * Have caller finish split using default strategy, since page
1043 			 * does not appear to be the rightmost page for duplicates of the
1044 			 * value the page is filled with
1045 			 */
1046 		}
1047 	}
1048 
1049 	return perfectpenalty;
1050 }
1051 
1052 /*
1053  * Subroutine to locate leftmost and rightmost splits for current/default
1054  * split interval.  Note that it will be the same split iff there is only one
1055  * split in interval.
1056  */
1057 static void
_bt_interval_edges(FindSplitData * state,SplitPoint ** leftinterval,SplitPoint ** rightinterval)1058 _bt_interval_edges(FindSplitData *state, SplitPoint **leftinterval,
1059 				   SplitPoint **rightinterval)
1060 {
1061 	int			highsplit = Min(state->interval, state->nsplits);
1062 	SplitPoint *deltaoptimal;
1063 
1064 	deltaoptimal = state->splits;
1065 	*leftinterval = NULL;
1066 	*rightinterval = NULL;
1067 
1068 	/*
1069 	 * Delta is an absolute distance to optimal split point, so both the
1070 	 * leftmost and rightmost split point will usually be at the end of the
1071 	 * array
1072 	 */
1073 	for (int i = highsplit - 1; i >= 0; i--)
1074 	{
1075 		SplitPoint *distant = state->splits + i;
1076 
1077 		if (distant->firstrightoff < deltaoptimal->firstrightoff)
1078 		{
1079 			if (*leftinterval == NULL)
1080 				*leftinterval = distant;
1081 		}
1082 		else if (distant->firstrightoff > deltaoptimal->firstrightoff)
1083 		{
1084 			if (*rightinterval == NULL)
1085 				*rightinterval = distant;
1086 		}
1087 		else if (!distant->newitemonleft && deltaoptimal->newitemonleft)
1088 		{
1089 			/*
1090 			 * "incoming tuple will become firstright" (distant) is to the
1091 			 * left of "incoming tuple will become lastleft" (delta-optimal)
1092 			 */
1093 			Assert(distant->firstrightoff == state->newitemoff);
1094 			if (*leftinterval == NULL)
1095 				*leftinterval = distant;
1096 		}
1097 		else if (distant->newitemonleft && !deltaoptimal->newitemonleft)
1098 		{
1099 			/*
1100 			 * "incoming tuple will become lastleft" (distant) is to the right
1101 			 * of "incoming tuple will become firstright" (delta-optimal)
1102 			 */
1103 			Assert(distant->firstrightoff == state->newitemoff);
1104 			if (*rightinterval == NULL)
1105 				*rightinterval = distant;
1106 		}
1107 		else
1108 		{
1109 			/* There was only one or two splits in initial split interval */
1110 			Assert(distant == deltaoptimal);
1111 			if (*leftinterval == NULL)
1112 				*leftinterval = distant;
1113 			if (*rightinterval == NULL)
1114 				*rightinterval = distant;
1115 		}
1116 
1117 		if (*leftinterval && *rightinterval)
1118 			return;
1119 	}
1120 
1121 	Assert(false);
1122 }
1123 
1124 /*
1125  * Subroutine to find penalty for caller's candidate split point.
1126  *
1127  * On leaf pages, penalty is the attribute number that distinguishes each side
1128  * of a split.  It's the last attribute that needs to be included in new high
1129  * key for left page.  It can be greater than the number of key attributes in
1130  * cases where a heap TID will need to be appended during truncation.
1131  *
1132  * On internal pages, penalty is simply the size of the firstright tuple for
1133  * the split (including line pointer overhead).  This tuple will become the
1134  * new high key for the left page.
1135  */
1136 static inline int
_bt_split_penalty(FindSplitData * state,SplitPoint * split)1137 _bt_split_penalty(FindSplitData *state, SplitPoint *split)
1138 {
1139 	IndexTuple	lastleft;
1140 	IndexTuple	firstright;
1141 
1142 	if (!state->is_leaf)
1143 	{
1144 		ItemId		itemid;
1145 
1146 		if (!split->newitemonleft &&
1147 			split->firstrightoff == state->newitemoff)
1148 			return state->newitemsz;
1149 
1150 		itemid = PageGetItemId(state->origpage, split->firstrightoff);
1151 
1152 		return MAXALIGN(ItemIdGetLength(itemid)) + sizeof(ItemIdData);
1153 	}
1154 
1155 	lastleft = _bt_split_lastleft(state, split);
1156 	firstright = _bt_split_firstright(state, split);
1157 
1158 	return _bt_keep_natts_fast(state->rel, lastleft, firstright);
1159 }
1160 
1161 /*
1162  * Subroutine to get a lastleft IndexTuple for a split point
1163  */
1164 static inline IndexTuple
_bt_split_lastleft(FindSplitData * state,SplitPoint * split)1165 _bt_split_lastleft(FindSplitData *state, SplitPoint *split)
1166 {
1167 	ItemId		itemid;
1168 
1169 	if (split->newitemonleft && split->firstrightoff == state->newitemoff)
1170 		return state->newitem;
1171 
1172 	itemid = PageGetItemId(state->origpage,
1173 						   OffsetNumberPrev(split->firstrightoff));
1174 	return (IndexTuple) PageGetItem(state->origpage, itemid);
1175 }
1176 
1177 /*
1178  * Subroutine to get a firstright IndexTuple for a split point
1179  */
1180 static inline IndexTuple
_bt_split_firstright(FindSplitData * state,SplitPoint * split)1181 _bt_split_firstright(FindSplitData *state, SplitPoint *split)
1182 {
1183 	ItemId		itemid;
1184 
1185 	if (!split->newitemonleft && split->firstrightoff == state->newitemoff)
1186 		return state->newitem;
1187 
1188 	itemid = PageGetItemId(state->origpage, split->firstrightoff);
1189 	return (IndexTuple) PageGetItem(state->origpage, itemid);
1190 }
1191