1 /*-------------------------------------------------------------------------
2  *
3  * hashsearch.c
4  *	  search code for postgres hash tables
5  *
6  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *	  src/backend/access/hash/hashsearch.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include "access/hash.h"
18 #include "access/relscan.h"
19 #include "miscadmin.h"
20 #include "pgstat.h"
21 #include "utils/rel.h"
22 #include "storage/predicate.h"
23 
24 static bool _hash_readpage(IndexScanDesc scan, Buffer *bufP,
25 						   ScanDirection dir);
26 static int	_hash_load_qualified_items(IndexScanDesc scan, Page page,
27 									   OffsetNumber offnum, ScanDirection dir);
28 static inline void _hash_saveitem(HashScanOpaque so, int itemIndex,
29 								  OffsetNumber offnum, IndexTuple itup);
30 static void _hash_readnext(IndexScanDesc scan, Buffer *bufp,
31 						   Page *pagep, HashPageOpaque *opaquep);
32 
33 /*
34  *	_hash_next() -- Get the next item in a scan.
35  *
36  *		On entry, so->currPos describes the current page, which may
37  *		be pinned but not locked, and so->currPos.itemIndex identifies
38  *		which item was previously returned.
39  *
40  *		On successful exit, scan->xs_ctup.t_self is set to the TID
41  *		of the next heap tuple. so->currPos is updated as needed.
42  *
43  *		On failure exit (no more tuples), we return false with pin
44  *		held on bucket page but no pins or locks held on overflow
45  *		page.
46  */
47 bool
_hash_next(IndexScanDesc scan,ScanDirection dir)48 _hash_next(IndexScanDesc scan, ScanDirection dir)
49 {
50 	Relation	rel = scan->indexRelation;
51 	HashScanOpaque so = (HashScanOpaque) scan->opaque;
52 	HashScanPosItem *currItem;
53 	BlockNumber blkno;
54 	Buffer		buf;
55 	bool		end_of_scan = false;
56 
57 	/*
58 	 * Advance to the next tuple on the current page; or if done, try to read
59 	 * data from the next or previous page based on the scan direction. Before
60 	 * moving to the next or previous page make sure that we deal with all the
61 	 * killed items.
62 	 */
63 	if (ScanDirectionIsForward(dir))
64 	{
65 		if (++so->currPos.itemIndex > so->currPos.lastItem)
66 		{
67 			if (so->numKilled > 0)
68 				_hash_kill_items(scan);
69 
70 			blkno = so->currPos.nextPage;
71 			if (BlockNumberIsValid(blkno))
72 			{
73 				buf = _hash_getbuf(rel, blkno, HASH_READ, LH_OVERFLOW_PAGE);
74 				TestForOldSnapshot(scan->xs_snapshot, rel, BufferGetPage(buf));
75 				if (!_hash_readpage(scan, &buf, dir))
76 					end_of_scan = true;
77 			}
78 			else
79 				end_of_scan = true;
80 		}
81 	}
82 	else
83 	{
84 		if (--so->currPos.itemIndex < so->currPos.firstItem)
85 		{
86 			if (so->numKilled > 0)
87 				_hash_kill_items(scan);
88 
89 			blkno = so->currPos.prevPage;
90 			if (BlockNumberIsValid(blkno))
91 			{
92 				buf = _hash_getbuf(rel, blkno, HASH_READ,
93 								   LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
94 				TestForOldSnapshot(scan->xs_snapshot, rel, BufferGetPage(buf));
95 
96 				/*
97 				 * We always maintain the pin on bucket page for whole scan
98 				 * operation, so releasing the additional pin we have acquired
99 				 * here.
100 				 */
101 				if (buf == so->hashso_bucket_buf ||
102 					buf == so->hashso_split_bucket_buf)
103 					_hash_dropbuf(rel, buf);
104 
105 				if (!_hash_readpage(scan, &buf, dir))
106 					end_of_scan = true;
107 			}
108 			else
109 				end_of_scan = true;
110 		}
111 	}
112 
113 	if (end_of_scan)
114 	{
115 		_hash_dropscanbuf(rel, so);
116 		HashScanPosInvalidate(so->currPos);
117 		return false;
118 	}
119 
120 	/* OK, itemIndex says what to return */
121 	currItem = &so->currPos.items[so->currPos.itemIndex];
122 	scan->xs_heaptid = currItem->heapTid;
123 
124 	return true;
125 }
126 
127 /*
128  * Advance to next page in a bucket, if any.  If we are scanning the bucket
129  * being populated during split operation then this function advances to the
130  * bucket being split after the last bucket page of bucket being populated.
131  */
132 static void
_hash_readnext(IndexScanDesc scan,Buffer * bufp,Page * pagep,HashPageOpaque * opaquep)133 _hash_readnext(IndexScanDesc scan,
134 			   Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
135 {
136 	BlockNumber blkno;
137 	Relation	rel = scan->indexRelation;
138 	HashScanOpaque so = (HashScanOpaque) scan->opaque;
139 	bool		block_found = false;
140 
141 	blkno = (*opaquep)->hasho_nextblkno;
142 
143 	/*
144 	 * Retain the pin on primary bucket page till the end of scan.  Refer the
145 	 * comments in _hash_first to know the reason of retaining pin.
146 	 */
147 	if (*bufp == so->hashso_bucket_buf || *bufp == so->hashso_split_bucket_buf)
148 		LockBuffer(*bufp, BUFFER_LOCK_UNLOCK);
149 	else
150 		_hash_relbuf(rel, *bufp);
151 
152 	*bufp = InvalidBuffer;
153 	/* check for interrupts while we're not holding any buffer lock */
154 	CHECK_FOR_INTERRUPTS();
155 	if (BlockNumberIsValid(blkno))
156 	{
157 		*bufp = _hash_getbuf(rel, blkno, HASH_READ, LH_OVERFLOW_PAGE);
158 		block_found = true;
159 	}
160 	else if (so->hashso_buc_populated && !so->hashso_buc_split)
161 	{
162 		/*
163 		 * end of bucket, scan bucket being split if there was a split in
164 		 * progress at the start of scan.
165 		 */
166 		*bufp = so->hashso_split_bucket_buf;
167 
168 		/*
169 		 * buffer for bucket being split must be valid as we acquire the pin
170 		 * on it before the start of scan and retain it till end of scan.
171 		 */
172 		Assert(BufferIsValid(*bufp));
173 
174 		LockBuffer(*bufp, BUFFER_LOCK_SHARE);
175 		PredicateLockPage(rel, BufferGetBlockNumber(*bufp), scan->xs_snapshot);
176 
177 		/*
178 		 * setting hashso_buc_split to true indicates that we are scanning
179 		 * bucket being split.
180 		 */
181 		so->hashso_buc_split = true;
182 
183 		block_found = true;
184 	}
185 
186 	if (block_found)
187 	{
188 		*pagep = BufferGetPage(*bufp);
189 		TestForOldSnapshot(scan->xs_snapshot, rel, *pagep);
190 		*opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
191 	}
192 }
193 
194 /*
195  * Advance to previous page in a bucket, if any.  If the current scan has
196  * started during split operation then this function advances to bucket
197  * being populated after the first bucket page of bucket being split.
198  */
199 static void
_hash_readprev(IndexScanDesc scan,Buffer * bufp,Page * pagep,HashPageOpaque * opaquep)200 _hash_readprev(IndexScanDesc scan,
201 			   Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
202 {
203 	BlockNumber blkno;
204 	Relation	rel = scan->indexRelation;
205 	HashScanOpaque so = (HashScanOpaque) scan->opaque;
206 	bool		haveprevblk;
207 
208 	blkno = (*opaquep)->hasho_prevblkno;
209 
210 	/*
211 	 * Retain the pin on primary bucket page till the end of scan.  Refer the
212 	 * comments in _hash_first to know the reason of retaining pin.
213 	 */
214 	if (*bufp == so->hashso_bucket_buf || *bufp == so->hashso_split_bucket_buf)
215 	{
216 		LockBuffer(*bufp, BUFFER_LOCK_UNLOCK);
217 		haveprevblk = false;
218 	}
219 	else
220 	{
221 		_hash_relbuf(rel, *bufp);
222 		haveprevblk = true;
223 	}
224 
225 	*bufp = InvalidBuffer;
226 	/* check for interrupts while we're not holding any buffer lock */
227 	CHECK_FOR_INTERRUPTS();
228 
229 	if (haveprevblk)
230 	{
231 		Assert(BlockNumberIsValid(blkno));
232 		*bufp = _hash_getbuf(rel, blkno, HASH_READ,
233 							 LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
234 		*pagep = BufferGetPage(*bufp);
235 		TestForOldSnapshot(scan->xs_snapshot, rel, *pagep);
236 		*opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
237 
238 		/*
239 		 * We always maintain the pin on bucket page for whole scan operation,
240 		 * so releasing the additional pin we have acquired here.
241 		 */
242 		if (*bufp == so->hashso_bucket_buf || *bufp == so->hashso_split_bucket_buf)
243 			_hash_dropbuf(rel, *bufp);
244 	}
245 	else if (so->hashso_buc_populated && so->hashso_buc_split)
246 	{
247 		/*
248 		 * end of bucket, scan bucket being populated if there was a split in
249 		 * progress at the start of scan.
250 		 */
251 		*bufp = so->hashso_bucket_buf;
252 
253 		/*
254 		 * buffer for bucket being populated must be valid as we acquire the
255 		 * pin on it before the start of scan and retain it till end of scan.
256 		 */
257 		Assert(BufferIsValid(*bufp));
258 
259 		LockBuffer(*bufp, BUFFER_LOCK_SHARE);
260 		*pagep = BufferGetPage(*bufp);
261 		*opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
262 
263 		/* move to the end of bucket chain */
264 		while (BlockNumberIsValid((*opaquep)->hasho_nextblkno))
265 			_hash_readnext(scan, bufp, pagep, opaquep);
266 
267 		/*
268 		 * setting hashso_buc_split to false indicates that we are scanning
269 		 * bucket being populated.
270 		 */
271 		so->hashso_buc_split = false;
272 	}
273 }
274 
275 /*
276  *	_hash_first() -- Find the first item in a scan.
277  *
278  *		We find the first item (or, if backward scan, the last item) in the
279  *		index that satisfies the qualification associated with the scan
280  *		descriptor.
281  *
282  *		On successful exit, if the page containing current index tuple is an
283  *		overflow page, both pin and lock are released whereas if it is a bucket
284  *		page then it is pinned but not locked and data about the matching
285  *		tuple(s) on the page has been loaded into so->currPos,
286  *		scan->xs_ctup.t_self is set to the heap TID of the current tuple.
287  *
288  *		On failure exit (no more tuples), we return false, with pin held on
289  *		bucket page but no pins or locks held on overflow page.
290  */
291 bool
_hash_first(IndexScanDesc scan,ScanDirection dir)292 _hash_first(IndexScanDesc scan, ScanDirection dir)
293 {
294 	Relation	rel = scan->indexRelation;
295 	HashScanOpaque so = (HashScanOpaque) scan->opaque;
296 	ScanKey		cur;
297 	uint32		hashkey;
298 	Bucket		bucket;
299 	Buffer		buf;
300 	Page		page;
301 	HashPageOpaque opaque;
302 	HashScanPosItem *currItem;
303 
304 	pgstat_count_index_scan(rel);
305 
306 	/*
307 	 * We do not support hash scans with no index qualification, because we
308 	 * would have to read the whole index rather than just one bucket. That
309 	 * creates a whole raft of problems, since we haven't got a practical way
310 	 * to lock all the buckets against splits or compactions.
311 	 */
312 	if (scan->numberOfKeys < 1)
313 		ereport(ERROR,
314 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
315 				 errmsg("hash indexes do not support whole-index scans")));
316 
317 	/* There may be more than one index qual, but we hash only the first */
318 	cur = &scan->keyData[0];
319 
320 	/* We support only single-column hash indexes */
321 	Assert(cur->sk_attno == 1);
322 	/* And there's only one operator strategy, too */
323 	Assert(cur->sk_strategy == HTEqualStrategyNumber);
324 
325 	/*
326 	 * If the constant in the index qual is NULL, assume it cannot match any
327 	 * items in the index.
328 	 */
329 	if (cur->sk_flags & SK_ISNULL)
330 		return false;
331 
332 	/*
333 	 * Okay to compute the hash key.  We want to do this before acquiring any
334 	 * locks, in case a user-defined hash function happens to be slow.
335 	 *
336 	 * If scankey operator is not a cross-type comparison, we can use the
337 	 * cached hash function; otherwise gotta look it up in the catalogs.
338 	 *
339 	 * We support the convention that sk_subtype == InvalidOid means the
340 	 * opclass input type; this is a hack to simplify life for ScanKeyInit().
341 	 */
342 	if (cur->sk_subtype == rel->rd_opcintype[0] ||
343 		cur->sk_subtype == InvalidOid)
344 		hashkey = _hash_datum2hashkey(rel, cur->sk_argument);
345 	else
346 		hashkey = _hash_datum2hashkey_type(rel, cur->sk_argument,
347 										   cur->sk_subtype);
348 
349 	so->hashso_sk_hash = hashkey;
350 
351 	buf = _hash_getbucketbuf_from_hashkey(rel, hashkey, HASH_READ, NULL);
352 	PredicateLockPage(rel, BufferGetBlockNumber(buf), scan->xs_snapshot);
353 	page = BufferGetPage(buf);
354 	TestForOldSnapshot(scan->xs_snapshot, rel, page);
355 	opaque = (HashPageOpaque) PageGetSpecialPointer(page);
356 	bucket = opaque->hasho_bucket;
357 
358 	so->hashso_bucket_buf = buf;
359 
360 	/*
361 	 * If a bucket split is in progress, then while scanning the bucket being
362 	 * populated, we need to skip tuples that were copied from bucket being
363 	 * split.  We also need to maintain a pin on the bucket being split to
364 	 * ensure that split-cleanup work done by vacuum doesn't remove tuples
365 	 * from it till this scan is done.  We need to maintain a pin on the
366 	 * bucket being populated to ensure that vacuum doesn't squeeze that
367 	 * bucket till this scan is complete; otherwise, the ordering of tuples
368 	 * can't be maintained during forward and backward scans.  Here, we have
369 	 * to be cautious about locking order: first, acquire the lock on bucket
370 	 * being split; then, release the lock on it but not the pin; then,
371 	 * acquire a lock on bucket being populated and again re-verify whether
372 	 * the bucket split is still in progress.  Acquiring the lock on bucket
373 	 * being split first ensures that the vacuum waits for this scan to
374 	 * finish.
375 	 */
376 	if (H_BUCKET_BEING_POPULATED(opaque))
377 	{
378 		BlockNumber old_blkno;
379 		Buffer		old_buf;
380 
381 		old_blkno = _hash_get_oldblock_from_newbucket(rel, bucket);
382 
383 		/*
384 		 * release the lock on new bucket and re-acquire it after acquiring
385 		 * the lock on old bucket.
386 		 */
387 		LockBuffer(buf, BUFFER_LOCK_UNLOCK);
388 
389 		old_buf = _hash_getbuf(rel, old_blkno, HASH_READ, LH_BUCKET_PAGE);
390 		TestForOldSnapshot(scan->xs_snapshot, rel, BufferGetPage(old_buf));
391 
392 		/*
393 		 * remember the split bucket buffer so as to use it later for
394 		 * scanning.
395 		 */
396 		so->hashso_split_bucket_buf = old_buf;
397 		LockBuffer(old_buf, BUFFER_LOCK_UNLOCK);
398 
399 		LockBuffer(buf, BUFFER_LOCK_SHARE);
400 		page = BufferGetPage(buf);
401 		opaque = (HashPageOpaque) PageGetSpecialPointer(page);
402 		Assert(opaque->hasho_bucket == bucket);
403 
404 		if (H_BUCKET_BEING_POPULATED(opaque))
405 			so->hashso_buc_populated = true;
406 		else
407 		{
408 			_hash_dropbuf(rel, so->hashso_split_bucket_buf);
409 			so->hashso_split_bucket_buf = InvalidBuffer;
410 		}
411 	}
412 
413 	/* If a backwards scan is requested, move to the end of the chain */
414 	if (ScanDirectionIsBackward(dir))
415 	{
416 		/*
417 		 * Backward scans that start during split needs to start from end of
418 		 * bucket being split.
419 		 */
420 		while (BlockNumberIsValid(opaque->hasho_nextblkno) ||
421 			   (so->hashso_buc_populated && !so->hashso_buc_split))
422 			_hash_readnext(scan, &buf, &page, &opaque);
423 	}
424 
425 	/* remember which buffer we have pinned, if any */
426 	Assert(BufferIsInvalid(so->currPos.buf));
427 	so->currPos.buf = buf;
428 
429 	/* Now find all the tuples satisfying the qualification from a page */
430 	if (!_hash_readpage(scan, &buf, dir))
431 		return false;
432 
433 	/* OK, itemIndex says what to return */
434 	currItem = &so->currPos.items[so->currPos.itemIndex];
435 	scan->xs_heaptid = currItem->heapTid;
436 
437 	/* if we're here, _hash_readpage found a valid tuples */
438 	return true;
439 }
440 
441 /*
442  *	_hash_readpage() -- Load data from current index page into so->currPos
443  *
444  *	We scan all the items in the current index page and save them into
445  *	so->currPos if it satisfies the qualification. If no matching items
446  *	are found in the current page, we move to the next or previous page
447  *	in a bucket chain as indicated by the direction.
448  *
449  *	Return true if any matching items are found else return false.
450  */
451 static bool
_hash_readpage(IndexScanDesc scan,Buffer * bufP,ScanDirection dir)452 _hash_readpage(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
453 {
454 	Relation	rel = scan->indexRelation;
455 	HashScanOpaque so = (HashScanOpaque) scan->opaque;
456 	Buffer		buf;
457 	Page		page;
458 	HashPageOpaque opaque;
459 	OffsetNumber offnum;
460 	uint16		itemIndex;
461 
462 	buf = *bufP;
463 	Assert(BufferIsValid(buf));
464 	_hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
465 	page = BufferGetPage(buf);
466 	opaque = (HashPageOpaque) PageGetSpecialPointer(page);
467 
468 	so->currPos.buf = buf;
469 	so->currPos.currPage = BufferGetBlockNumber(buf);
470 
471 	if (ScanDirectionIsForward(dir))
472 	{
473 		BlockNumber prev_blkno = InvalidBlockNumber;
474 
475 		for (;;)
476 		{
477 			/* new page, locate starting position by binary search */
478 			offnum = _hash_binsearch(page, so->hashso_sk_hash);
479 
480 			itemIndex = _hash_load_qualified_items(scan, page, offnum, dir);
481 
482 			if (itemIndex != 0)
483 				break;
484 
485 			/*
486 			 * Could not find any matching tuples in the current page, move to
487 			 * the next page. Before leaving the current page, deal with any
488 			 * killed items.
489 			 */
490 			if (so->numKilled > 0)
491 				_hash_kill_items(scan);
492 
493 			/*
494 			 * If this is a primary bucket page, hasho_prevblkno is not a real
495 			 * block number.
496 			 */
497 			if (so->currPos.buf == so->hashso_bucket_buf ||
498 				so->currPos.buf == so->hashso_split_bucket_buf)
499 				prev_blkno = InvalidBlockNumber;
500 			else
501 				prev_blkno = opaque->hasho_prevblkno;
502 
503 			_hash_readnext(scan, &buf, &page, &opaque);
504 			if (BufferIsValid(buf))
505 			{
506 				so->currPos.buf = buf;
507 				so->currPos.currPage = BufferGetBlockNumber(buf);
508 			}
509 			else
510 			{
511 				/*
512 				 * Remember next and previous block numbers for scrollable
513 				 * cursors to know the start position and return false
514 				 * indicating that no more matching tuples were found. Also,
515 				 * don't reset currPage or lsn, because we expect
516 				 * _hash_kill_items to be called for the old page after this
517 				 * function returns.
518 				 */
519 				so->currPos.prevPage = prev_blkno;
520 				so->currPos.nextPage = InvalidBlockNumber;
521 				so->currPos.buf = buf;
522 				return false;
523 			}
524 		}
525 
526 		so->currPos.firstItem = 0;
527 		so->currPos.lastItem = itemIndex - 1;
528 		so->currPos.itemIndex = 0;
529 	}
530 	else
531 	{
532 		BlockNumber next_blkno = InvalidBlockNumber;
533 
534 		for (;;)
535 		{
536 			/* new page, locate starting position by binary search */
537 			offnum = _hash_binsearch_last(page, so->hashso_sk_hash);
538 
539 			itemIndex = _hash_load_qualified_items(scan, page, offnum, dir);
540 
541 			if (itemIndex != MaxIndexTuplesPerPage)
542 				break;
543 
544 			/*
545 			 * Could not find any matching tuples in the current page, move to
546 			 * the previous page. Before leaving the current page, deal with
547 			 * any killed items.
548 			 */
549 			if (so->numKilled > 0)
550 				_hash_kill_items(scan);
551 
552 			if (so->currPos.buf == so->hashso_bucket_buf ||
553 				so->currPos.buf == so->hashso_split_bucket_buf)
554 				next_blkno = opaque->hasho_nextblkno;
555 
556 			_hash_readprev(scan, &buf, &page, &opaque);
557 			if (BufferIsValid(buf))
558 			{
559 				so->currPos.buf = buf;
560 				so->currPos.currPage = BufferGetBlockNumber(buf);
561 			}
562 			else
563 			{
564 				/*
565 				 * Remember next and previous block numbers for scrollable
566 				 * cursors to know the start position and return false
567 				 * indicating that no more matching tuples were found. Also,
568 				 * don't reset currPage or lsn, because we expect
569 				 * _hash_kill_items to be called for the old page after this
570 				 * function returns.
571 				 */
572 				so->currPos.prevPage = InvalidBlockNumber;
573 				so->currPos.nextPage = next_blkno;
574 				so->currPos.buf = buf;
575 				return false;
576 			}
577 		}
578 
579 		so->currPos.firstItem = itemIndex;
580 		so->currPos.lastItem = MaxIndexTuplesPerPage - 1;
581 		so->currPos.itemIndex = MaxIndexTuplesPerPage - 1;
582 	}
583 
584 	if (so->currPos.buf == so->hashso_bucket_buf ||
585 		so->currPos.buf == so->hashso_split_bucket_buf)
586 	{
587 		so->currPos.prevPage = InvalidBlockNumber;
588 		so->currPos.nextPage = opaque->hasho_nextblkno;
589 		LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK);
590 	}
591 	else
592 	{
593 		so->currPos.prevPage = opaque->hasho_prevblkno;
594 		so->currPos.nextPage = opaque->hasho_nextblkno;
595 		_hash_relbuf(rel, so->currPos.buf);
596 		so->currPos.buf = InvalidBuffer;
597 	}
598 
599 	Assert(so->currPos.firstItem <= so->currPos.lastItem);
600 	return true;
601 }
602 
603 /*
604  * Load all the qualified items from a current index page
605  * into so->currPos. Helper function for _hash_readpage.
606  */
607 static int
_hash_load_qualified_items(IndexScanDesc scan,Page page,OffsetNumber offnum,ScanDirection dir)608 _hash_load_qualified_items(IndexScanDesc scan, Page page,
609 						   OffsetNumber offnum, ScanDirection dir)
610 {
611 	HashScanOpaque so = (HashScanOpaque) scan->opaque;
612 	IndexTuple	itup;
613 	int			itemIndex;
614 	OffsetNumber maxoff;
615 
616 	maxoff = PageGetMaxOffsetNumber(page);
617 
618 	if (ScanDirectionIsForward(dir))
619 	{
620 		/* load items[] in ascending order */
621 		itemIndex = 0;
622 
623 		while (offnum <= maxoff)
624 		{
625 			Assert(offnum >= FirstOffsetNumber);
626 			itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
627 
628 			/*
629 			 * skip the tuples that are moved by split operation for the scan
630 			 * that has started when split was in progress. Also, skip the
631 			 * tuples that are marked as dead.
632 			 */
633 			if ((so->hashso_buc_populated && !so->hashso_buc_split &&
634 				 (itup->t_info & INDEX_MOVED_BY_SPLIT_MASK)) ||
635 				(scan->ignore_killed_tuples &&
636 				 (ItemIdIsDead(PageGetItemId(page, offnum)))))
637 			{
638 				offnum = OffsetNumberNext(offnum);	/* move forward */
639 				continue;
640 			}
641 
642 			if (so->hashso_sk_hash == _hash_get_indextuple_hashkey(itup) &&
643 				_hash_checkqual(scan, itup))
644 			{
645 				/* tuple is qualified, so remember it */
646 				_hash_saveitem(so, itemIndex, offnum, itup);
647 				itemIndex++;
648 			}
649 			else
650 			{
651 				/*
652 				 * No more matching tuples exist in this page. so, exit while
653 				 * loop.
654 				 */
655 				break;
656 			}
657 
658 			offnum = OffsetNumberNext(offnum);
659 		}
660 
661 		Assert(itemIndex <= MaxIndexTuplesPerPage);
662 		return itemIndex;
663 	}
664 	else
665 	{
666 		/* load items[] in descending order */
667 		itemIndex = MaxIndexTuplesPerPage;
668 
669 		while (offnum >= FirstOffsetNumber)
670 		{
671 			Assert(offnum <= maxoff);
672 			itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
673 
674 			/*
675 			 * skip the tuples that are moved by split operation for the scan
676 			 * that has started when split was in progress. Also, skip the
677 			 * tuples that are marked as dead.
678 			 */
679 			if ((so->hashso_buc_populated && !so->hashso_buc_split &&
680 				 (itup->t_info & INDEX_MOVED_BY_SPLIT_MASK)) ||
681 				(scan->ignore_killed_tuples &&
682 				 (ItemIdIsDead(PageGetItemId(page, offnum)))))
683 			{
684 				offnum = OffsetNumberPrev(offnum);	/* move back */
685 				continue;
686 			}
687 
688 			if (so->hashso_sk_hash == _hash_get_indextuple_hashkey(itup) &&
689 				_hash_checkqual(scan, itup))
690 			{
691 				itemIndex--;
692 				/* tuple is qualified, so remember it */
693 				_hash_saveitem(so, itemIndex, offnum, itup);
694 			}
695 			else
696 			{
697 				/*
698 				 * No more matching tuples exist in this page. so, exit while
699 				 * loop.
700 				 */
701 				break;
702 			}
703 
704 			offnum = OffsetNumberPrev(offnum);
705 		}
706 
707 		Assert(itemIndex >= 0);
708 		return itemIndex;
709 	}
710 }
711 
712 /* Save an index item into so->currPos.items[itemIndex] */
713 static inline void
_hash_saveitem(HashScanOpaque so,int itemIndex,OffsetNumber offnum,IndexTuple itup)714 _hash_saveitem(HashScanOpaque so, int itemIndex,
715 			   OffsetNumber offnum, IndexTuple itup)
716 {
717 	HashScanPosItem *currItem = &so->currPos.items[itemIndex];
718 
719 	currItem->heapTid = itup->t_tid;
720 	currItem->indexOffset = offnum;
721 }
722