1 /*-------------------------------------------------------------------------
2  *
3  * procarray.c
4  *	  POSTGRES process array code.
5  *
6  *
7  * This module maintains arrays of the PGPROC and PGXACT structures for all
8  * active backends.  Although there are several uses for this, the principal
9  * one is as a means of determining the set of currently running transactions.
10  *
11  * Because of various subtle race conditions it is critical that a backend
12  * hold the correct locks while setting or clearing its MyPgXact->xid field.
13  * See notes in src/backend/access/transam/README.
14  *
15  * The process arrays now also include structures representing prepared
16  * transactions.  The xid and subxids fields of these are valid, as are the
17  * myProcLocks lists.  They can be distinguished from regular backend PGPROCs
18  * at need by checking for pid == 0.
19  *
20  * During hot standby, we also keep a list of XIDs representing transactions
21  * that are known to be running in the master (or more precisely, were running
22  * as of the current point in the WAL stream).  This list is kept in the
23  * KnownAssignedXids array, and is updated by watching the sequence of
24  * arriving XIDs.  This is necessary because if we leave those XIDs out of
25  * snapshots taken for standby queries, then they will appear to be already
26  * complete, leading to MVCC failures.  Note that in hot standby, the PGPROC
27  * array represents standby processes, which by definition are not running
28  * transactions that have XIDs.
29  *
30  * It is perhaps possible for a backend on the master to terminate without
31  * writing an abort record for its transaction.  While that shouldn't really
32  * happen, it would tie up KnownAssignedXids indefinitely, so we protect
33  * ourselves by pruning the array when a valid list of running XIDs arrives.
34  *
35  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
36  * Portions Copyright (c) 1994, Regents of the University of California
37  *
38  *
39  * IDENTIFICATION
40  *	  src/backend/storage/ipc/procarray.c
41  *
42  *-------------------------------------------------------------------------
43  */
44 #include "postgres.h"
45 
46 #include <signal.h>
47 
48 #include "access/clog.h"
49 #include "access/subtrans.h"
50 #include "access/transam.h"
51 #include "access/twophase.h"
52 #include "access/xact.h"
53 #include "access/xlog.h"
54 #include "catalog/catalog.h"
55 #include "miscadmin.h"
56 #include "pgstat.h"
57 #include "storage/proc.h"
58 #include "storage/procarray.h"
59 #include "storage/spin.h"
60 #include "utils/builtins.h"
61 #include "utils/rel.h"
62 #include "utils/snapmgr.h"
63 
64 
65 /* Our shared memory area */
66 typedef struct ProcArrayStruct
67 {
68 	int			numProcs;		/* number of valid procs entries */
69 	int			maxProcs;		/* allocated size of procs array */
70 
71 	/*
72 	 * Known assigned XIDs handling
73 	 */
74 	int			maxKnownAssignedXids;	/* allocated size of array */
75 	int			numKnownAssignedXids;	/* current # of valid entries */
76 	int			tailKnownAssignedXids;	/* index of oldest valid element */
77 	int			headKnownAssignedXids;	/* index of newest element, + 1 */
78 	slock_t		known_assigned_xids_lck;	/* protects head/tail pointers */
79 
80 	/*
81 	 * Highest subxid that has been removed from KnownAssignedXids array to
82 	 * prevent overflow; or InvalidTransactionId if none.  We track this for
83 	 * similar reasons to tracking overflowing cached subxids in PGXACT
84 	 * entries.  Must hold exclusive ProcArrayLock to change this, and shared
85 	 * lock to read it.
86 	 */
87 	TransactionId lastOverflowedXid;
88 
89 	/* oldest xmin of any replication slot */
90 	TransactionId replication_slot_xmin;
91 	/* oldest catalog xmin of any replication slot */
92 	TransactionId replication_slot_catalog_xmin;
93 
94 	/* indexes into allPgXact[], has PROCARRAY_MAXPROCS entries */
95 	int			pgprocnos[FLEXIBLE_ARRAY_MEMBER];
96 } ProcArrayStruct;
97 
98 static ProcArrayStruct *procArray;
99 
100 static PGPROC *allProcs;
101 static PGXACT *allPgXact;
102 
103 /*
104  * Bookkeeping for tracking emulated transactions in recovery
105  */
106 static TransactionId *KnownAssignedXids;
107 static bool *KnownAssignedXidsValid;
108 static TransactionId latestObservedXid = InvalidTransactionId;
109 
110 /*
111  * If we're in STANDBY_SNAPSHOT_PENDING state, standbySnapshotPendingXmin is
112  * the highest xid that might still be running that we don't have in
113  * KnownAssignedXids.
114  */
115 static TransactionId standbySnapshotPendingXmin;
116 
117 #ifdef XIDCACHE_DEBUG
118 
119 /* counters for XidCache measurement */
120 static long xc_by_recent_xmin = 0;
121 static long xc_by_known_xact = 0;
122 static long xc_by_my_xact = 0;
123 static long xc_by_latest_xid = 0;
124 static long xc_by_main_xid = 0;
125 static long xc_by_child_xid = 0;
126 static long xc_by_known_assigned = 0;
127 static long xc_no_overflow = 0;
128 static long xc_slow_answer = 0;
129 
130 #define xc_by_recent_xmin_inc()		(xc_by_recent_xmin++)
131 #define xc_by_known_xact_inc()		(xc_by_known_xact++)
132 #define xc_by_my_xact_inc()			(xc_by_my_xact++)
133 #define xc_by_latest_xid_inc()		(xc_by_latest_xid++)
134 #define xc_by_main_xid_inc()		(xc_by_main_xid++)
135 #define xc_by_child_xid_inc()		(xc_by_child_xid++)
136 #define xc_by_known_assigned_inc()	(xc_by_known_assigned++)
137 #define xc_no_overflow_inc()		(xc_no_overflow++)
138 #define xc_slow_answer_inc()		(xc_slow_answer++)
139 
140 static void DisplayXidCache(void);
141 #else							/* !XIDCACHE_DEBUG */
142 
143 #define xc_by_recent_xmin_inc()		((void) 0)
144 #define xc_by_known_xact_inc()		((void) 0)
145 #define xc_by_my_xact_inc()			((void) 0)
146 #define xc_by_latest_xid_inc()		((void) 0)
147 #define xc_by_main_xid_inc()		((void) 0)
148 #define xc_by_child_xid_inc()		((void) 0)
149 #define xc_by_known_assigned_inc()	((void) 0)
150 #define xc_no_overflow_inc()		((void) 0)
151 #define xc_slow_answer_inc()		((void) 0)
152 #endif							/* XIDCACHE_DEBUG */
153 
154 /* Primitives for KnownAssignedXids array handling for standby */
155 static void KnownAssignedXidsCompress(bool force);
156 static void KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid,
157 					 bool exclusive_lock);
158 static bool KnownAssignedXidsSearch(TransactionId xid, bool remove);
159 static bool KnownAssignedXidExists(TransactionId xid);
160 static void KnownAssignedXidsRemove(TransactionId xid);
161 static void KnownAssignedXidsRemoveTree(TransactionId xid, int nsubxids,
162 							TransactionId *subxids);
163 static void KnownAssignedXidsRemovePreceding(TransactionId xid);
164 static int	KnownAssignedXidsGet(TransactionId *xarray, TransactionId xmax);
165 static int KnownAssignedXidsGetAndSetXmin(TransactionId *xarray,
166 							   TransactionId *xmin,
167 							   TransactionId xmax);
168 static TransactionId KnownAssignedXidsGetOldestXmin(void);
169 static void KnownAssignedXidsDisplay(int trace_level);
170 static void KnownAssignedXidsReset(void);
171 static inline void ProcArrayEndTransactionInternal(PGPROC *proc,
172 								PGXACT *pgxact, TransactionId latestXid);
173 static void ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid);
174 
175 /*
176  * Report shared-memory space needed by CreateSharedProcArray.
177  */
178 Size
179 ProcArrayShmemSize(void)
180 {
181 	Size		size;
182 
183 	/* Size of the ProcArray structure itself */
184 #define PROCARRAY_MAXPROCS	(MaxBackends + max_prepared_xacts)
185 
186 	size = offsetof(ProcArrayStruct, pgprocnos);
187 	size = add_size(size, mul_size(sizeof(int), PROCARRAY_MAXPROCS));
188 
189 	/*
190 	 * During Hot Standby processing we have a data structure called
191 	 * KnownAssignedXids, created in shared memory. Local data structures are
192 	 * also created in various backends during GetSnapshotData(),
193 	 * TransactionIdIsInProgress() and GetRunningTransactionData(). All of the
194 	 * main structures created in those functions must be identically sized,
195 	 * since we may at times copy the whole of the data structures around. We
196 	 * refer to this size as TOTAL_MAX_CACHED_SUBXIDS.
197 	 *
198 	 * Ideally we'd only create this structure if we were actually doing hot
199 	 * standby in the current run, but we don't know that yet at the time
200 	 * shared memory is being set up.
201 	 */
202 #define TOTAL_MAX_CACHED_SUBXIDS \
203 	((PGPROC_MAX_CACHED_SUBXIDS + 1) * PROCARRAY_MAXPROCS)
204 
205 	if (EnableHotStandby)
206 	{
207 		size = add_size(size,
208 						mul_size(sizeof(TransactionId),
209 								 TOTAL_MAX_CACHED_SUBXIDS));
210 		size = add_size(size,
211 						mul_size(sizeof(bool), TOTAL_MAX_CACHED_SUBXIDS));
212 	}
213 
214 	return size;
215 }
216 
217 /*
218  * Initialize the shared PGPROC array during postmaster startup.
219  */
220 void
221 CreateSharedProcArray(void)
222 {
223 	bool		found;
224 
225 	/* Create or attach to the ProcArray shared structure */
226 	procArray = (ProcArrayStruct *)
227 		ShmemInitStruct("Proc Array",
228 						add_size(offsetof(ProcArrayStruct, pgprocnos),
229 								 mul_size(sizeof(int),
230 										  PROCARRAY_MAXPROCS)),
231 						&found);
232 
233 	if (!found)
234 	{
235 		/*
236 		 * We're the first - initialize.
237 		 */
238 		procArray->numProcs = 0;
239 		procArray->maxProcs = PROCARRAY_MAXPROCS;
240 		procArray->maxKnownAssignedXids = TOTAL_MAX_CACHED_SUBXIDS;
241 		procArray->numKnownAssignedXids = 0;
242 		procArray->tailKnownAssignedXids = 0;
243 		procArray->headKnownAssignedXids = 0;
244 		SpinLockInit(&procArray->known_assigned_xids_lck);
245 		procArray->lastOverflowedXid = InvalidTransactionId;
246 		procArray->replication_slot_xmin = InvalidTransactionId;
247 		procArray->replication_slot_catalog_xmin = InvalidTransactionId;
248 	}
249 
250 	allProcs = ProcGlobal->allProcs;
251 	allPgXact = ProcGlobal->allPgXact;
252 
253 	/* Create or attach to the KnownAssignedXids arrays too, if needed */
254 	if (EnableHotStandby)
255 	{
256 		KnownAssignedXids = (TransactionId *)
257 			ShmemInitStruct("KnownAssignedXids",
258 							mul_size(sizeof(TransactionId),
259 									 TOTAL_MAX_CACHED_SUBXIDS),
260 							&found);
261 		KnownAssignedXidsValid = (bool *)
262 			ShmemInitStruct("KnownAssignedXidsValid",
263 							mul_size(sizeof(bool), TOTAL_MAX_CACHED_SUBXIDS),
264 							&found);
265 	}
266 
267 	/* Register and initialize fields of ProcLWLockTranche */
268 	LWLockRegisterTranche(LWTRANCHE_PROC, "proc");
269 }
270 
271 /*
272  * Add the specified PGPROC to the shared array.
273  */
274 void
275 ProcArrayAdd(PGPROC *proc)
276 {
277 	ProcArrayStruct *arrayP = procArray;
278 	int			index;
279 
280 	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
281 
282 	if (arrayP->numProcs >= arrayP->maxProcs)
283 	{
284 		/*
285 		 * Oops, no room.  (This really shouldn't happen, since there is a
286 		 * fixed supply of PGPROC structs too, and so we should have failed
287 		 * earlier.)
288 		 */
289 		LWLockRelease(ProcArrayLock);
290 		ereport(FATAL,
291 				(errcode(ERRCODE_TOO_MANY_CONNECTIONS),
292 				 errmsg("sorry, too many clients already")));
293 	}
294 
295 	/*
296 	 * Keep the procs array sorted by (PGPROC *) so that we can utilize
297 	 * locality of references much better. This is useful while traversing the
298 	 * ProcArray because there is an increased likelihood of finding the next
299 	 * PGPROC structure in the cache.
300 	 *
301 	 * Since the occurrence of adding/removing a proc is much lower than the
302 	 * access to the ProcArray itself, the overhead should be marginal
303 	 */
304 	for (index = 0; index < arrayP->numProcs; index++)
305 	{
306 		/*
307 		 * If we are the first PGPROC or if we have found our right position
308 		 * in the array, break
309 		 */
310 		if ((arrayP->pgprocnos[index] == -1) || (arrayP->pgprocnos[index] > proc->pgprocno))
311 			break;
312 	}
313 
314 	memmove(&arrayP->pgprocnos[index + 1], &arrayP->pgprocnos[index],
315 			(arrayP->numProcs - index) * sizeof(int));
316 	arrayP->pgprocnos[index] = proc->pgprocno;
317 	arrayP->numProcs++;
318 
319 	LWLockRelease(ProcArrayLock);
320 }
321 
322 /*
323  * Remove the specified PGPROC from the shared array.
324  *
325  * When latestXid is a valid XID, we are removing a live 2PC gxact from the
326  * array, and thus causing it to appear as "not running" anymore.  In this
327  * case we must advance latestCompletedXid.  (This is essentially the same
328  * as ProcArrayEndTransaction followed by removal of the PGPROC, but we take
329  * the ProcArrayLock only once, and don't damage the content of the PGPROC;
330  * twophase.c depends on the latter.)
331  */
332 void
333 ProcArrayRemove(PGPROC *proc, TransactionId latestXid)
334 {
335 	ProcArrayStruct *arrayP = procArray;
336 	int			index;
337 
338 #ifdef XIDCACHE_DEBUG
339 	/* dump stats at backend shutdown, but not prepared-xact end */
340 	if (proc->pid != 0)
341 		DisplayXidCache();
342 #endif
343 
344 	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
345 
346 	if (TransactionIdIsValid(latestXid))
347 	{
348 		Assert(TransactionIdIsValid(allPgXact[proc->pgprocno].xid));
349 
350 		/* Advance global latestCompletedXid while holding the lock */
351 		if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid,
352 								  latestXid))
353 			ShmemVariableCache->latestCompletedXid = latestXid;
354 	}
355 	else
356 	{
357 		/* Shouldn't be trying to remove a live transaction here */
358 		Assert(!TransactionIdIsValid(allPgXact[proc->pgprocno].xid));
359 	}
360 
361 	for (index = 0; index < arrayP->numProcs; index++)
362 	{
363 		if (arrayP->pgprocnos[index] == proc->pgprocno)
364 		{
365 			/* Keep the PGPROC array sorted. See notes above */
366 			memmove(&arrayP->pgprocnos[index], &arrayP->pgprocnos[index + 1],
367 					(arrayP->numProcs - index - 1) * sizeof(int));
368 			arrayP->pgprocnos[arrayP->numProcs - 1] = -1;	/* for debugging */
369 			arrayP->numProcs--;
370 			LWLockRelease(ProcArrayLock);
371 			return;
372 		}
373 	}
374 
375 	/* Oops */
376 	LWLockRelease(ProcArrayLock);
377 
378 	elog(LOG, "failed to find proc %p in ProcArray", proc);
379 }
380 
381 
382 /*
383  * ProcArrayEndTransaction -- mark a transaction as no longer running
384  *
385  * This is used interchangeably for commit and abort cases.  The transaction
386  * commit/abort must already be reported to WAL and pg_xact.
387  *
388  * proc is currently always MyProc, but we pass it explicitly for flexibility.
389  * latestXid is the latest Xid among the transaction's main XID and
390  * subtransactions, or InvalidTransactionId if it has no XID.  (We must ask
391  * the caller to pass latestXid, instead of computing it from the PGPROC's
392  * contents, because the subxid information in the PGPROC might be
393  * incomplete.)
394  */
395 void
396 ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid)
397 {
398 	PGXACT	   *pgxact = &allPgXact[proc->pgprocno];
399 
400 	if (TransactionIdIsValid(latestXid))
401 	{
402 		/*
403 		 * We must lock ProcArrayLock while clearing our advertised XID, so
404 		 * that we do not exit the set of "running" transactions while someone
405 		 * else is taking a snapshot.  See discussion in
406 		 * src/backend/access/transam/README.
407 		 */
408 		Assert(TransactionIdIsValid(allPgXact[proc->pgprocno].xid));
409 
410 		/*
411 		 * If we can immediately acquire ProcArrayLock, we clear our own XID
412 		 * and release the lock.  If not, use group XID clearing to improve
413 		 * efficiency.
414 		 */
415 		if (LWLockConditionalAcquire(ProcArrayLock, LW_EXCLUSIVE))
416 		{
417 			ProcArrayEndTransactionInternal(proc, pgxact, latestXid);
418 			LWLockRelease(ProcArrayLock);
419 		}
420 		else
421 			ProcArrayGroupClearXid(proc, latestXid);
422 	}
423 	else
424 	{
425 		/*
426 		 * If we have no XID, we don't need to lock, since we won't affect
427 		 * anyone else's calculation of a snapshot.  We might change their
428 		 * estimate of global xmin, but that's OK.
429 		 */
430 		Assert(!TransactionIdIsValid(allPgXact[proc->pgprocno].xid));
431 
432 		proc->lxid = InvalidLocalTransactionId;
433 		pgxact->xmin = InvalidTransactionId;
434 		/* must be cleared with xid/xmin: */
435 		pgxact->vacuumFlags &= ~PROC_VACUUM_STATE_MASK;
436 		pgxact->delayChkpt = false; /* be sure this is cleared in abort */
437 		proc->recoveryConflictPending = false;
438 
439 		Assert(pgxact->nxids == 0);
440 		Assert(pgxact->overflowed == false);
441 	}
442 }
443 
444 /*
445  * Mark a write transaction as no longer running.
446  *
447  * We don't do any locking here; caller must handle that.
448  */
449 static inline void
450 ProcArrayEndTransactionInternal(PGPROC *proc, PGXACT *pgxact,
451 								TransactionId latestXid)
452 {
453 	pgxact->xid = InvalidTransactionId;
454 	proc->lxid = InvalidLocalTransactionId;
455 	pgxact->xmin = InvalidTransactionId;
456 	/* must be cleared with xid/xmin: */
457 	pgxact->vacuumFlags &= ~PROC_VACUUM_STATE_MASK;
458 	pgxact->delayChkpt = false; /* be sure this is cleared in abort */
459 	proc->recoveryConflictPending = false;
460 
461 	/* Clear the subtransaction-XID cache too while holding the lock */
462 	pgxact->nxids = 0;
463 	pgxact->overflowed = false;
464 
465 	/* Also advance global latestCompletedXid while holding the lock */
466 	if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid,
467 							  latestXid))
468 		ShmemVariableCache->latestCompletedXid = latestXid;
469 }
470 
471 /*
472  * ProcArrayGroupClearXid -- group XID clearing
473  *
474  * When we cannot immediately acquire ProcArrayLock in exclusive mode at
475  * commit time, add ourselves to a list of processes that need their XIDs
476  * cleared.  The first process to add itself to the list will acquire
477  * ProcArrayLock in exclusive mode and perform ProcArrayEndTransactionInternal
478  * on behalf of all group members.  This avoids a great deal of contention
479  * around ProcArrayLock when many processes are trying to commit at once,
480  * since the lock need not be repeatedly handed off from one committing
481  * process to the next.
482  */
483 static void
484 ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid)
485 {
486 	volatile PROC_HDR *procglobal = ProcGlobal;
487 	uint32		nextidx;
488 	uint32		wakeidx;
489 
490 	/* We should definitely have an XID to clear. */
491 	Assert(TransactionIdIsValid(allPgXact[proc->pgprocno].xid));
492 
493 	/* Add ourselves to the list of processes needing a group XID clear. */
494 	proc->procArrayGroupMember = true;
495 	proc->procArrayGroupMemberXid = latestXid;
496 	while (true)
497 	{
498 		nextidx = pg_atomic_read_u32(&procglobal->procArrayGroupFirst);
499 		pg_atomic_write_u32(&proc->procArrayGroupNext, nextidx);
500 
501 		if (pg_atomic_compare_exchange_u32(&procglobal->procArrayGroupFirst,
502 										   &nextidx,
503 										   (uint32) proc->pgprocno))
504 			break;
505 	}
506 
507 	/*
508 	 * If the list was not empty, the leader will clear our XID.  It is
509 	 * impossible to have followers without a leader because the first process
510 	 * that has added itself to the list will always have nextidx as
511 	 * INVALID_PGPROCNO.
512 	 */
513 	if (nextidx != INVALID_PGPROCNO)
514 	{
515 		int			extraWaits = 0;
516 
517 		/* Sleep until the leader clears our XID. */
518 		pgstat_report_wait_start(WAIT_EVENT_PROCARRAY_GROUP_UPDATE);
519 		for (;;)
520 		{
521 			/* acts as a read barrier */
522 			PGSemaphoreLock(proc->sem);
523 			if (!proc->procArrayGroupMember)
524 				break;
525 			extraWaits++;
526 		}
527 		pgstat_report_wait_end();
528 
529 		Assert(pg_atomic_read_u32(&proc->procArrayGroupNext) == INVALID_PGPROCNO);
530 
531 		/* Fix semaphore count for any absorbed wakeups */
532 		while (extraWaits-- > 0)
533 			PGSemaphoreUnlock(proc->sem);
534 		return;
535 	}
536 
537 	/* We are the leader.  Acquire the lock on behalf of everyone. */
538 	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
539 
540 	/*
541 	 * Now that we've got the lock, clear the list of processes waiting for
542 	 * group XID clearing, saving a pointer to the head of the list.  Trying
r_prelude(struct SN_env * z)543 	 * to pop elements one at a time could lead to an ABA problem.
544 	 */
545 	while (true)
546 	{
547 		nextidx = pg_atomic_read_u32(&procglobal->procArrayGroupFirst);
548 		if (pg_atomic_compare_exchange_u32(&procglobal->procArrayGroupFirst,
549 										   &nextidx,
550 										   INVALID_PGPROCNO))
551 			break;
552 	}
553 
554 	/* Remember head of list so we can perform wakeups after dropping lock. */
555 	wakeidx = nextidx;
556 
557 	/* Walk the list and clear all XIDs. */
558 	while (nextidx != INVALID_PGPROCNO)
559 	{
560 		PGPROC	   *nextproc = &allProcs[nextidx];
561 		PGXACT	   *pgxact = &allPgXact[nextidx];
562 
563 		ProcArrayEndTransactionInternal(nextproc, pgxact, nextproc->procArrayGroupMemberXid);
564 
565 		/* Move to next proc in list. */
566 		nextidx = pg_atomic_read_u32(&nextproc->procArrayGroupNext);
567 	}
568 
569 	/* We're done with the lock now. */
570 	LWLockRelease(ProcArrayLock);
571 
572 	/*
573 	 * Now that we've released the lock, go back and wake everybody up.  We
574 	 * don't do this under the lock so as to keep lock hold times to a
575 	 * minimum.  The system calls we need to perform to wake other processes
576 	 * up are probably much slower than the simple memory writes we did while
577 	 * holding the lock.
578 	 */
579 	while (wakeidx != INVALID_PGPROCNO)
580 	{
581 		PGPROC	   *nextproc = &allProcs[wakeidx];
582 
r_mark_regions(struct SN_env * z)583 		wakeidx = pg_atomic_read_u32(&nextproc->procArrayGroupNext);
584 		pg_atomic_write_u32(&nextproc->procArrayGroupNext, INVALID_PGPROCNO);
585 
586 		/* ensure all previous writes are visible before follower continues. */
587 		pg_write_barrier();
588 
589 		nextproc->procArrayGroupMember = false;
590 
591 		if (nextproc != MyProc)
592 			PGSemaphoreUnlock(nextproc->sem);
593 	}
594 }
595 
596 /*
597  * ProcArrayClearTransaction -- clear the transaction fields
598  *
599  * This is used after successfully preparing a 2-phase transaction.  We are
600  * not actually reporting the transaction's XID as no longer running --- it
601  * will still appear as running because the 2PC's gxact is in the ProcArray
602  * too.  We just have to clear out our own PGXACT.
603  */
604 void
605 ProcArrayClearTransaction(PGPROC *proc)
606 {
607 	PGXACT	   *pgxact = &allPgXact[proc->pgprocno];
608 
609 	/*
610 	 * We can skip locking ProcArrayLock here, because this action does not
611 	 * actually change anyone's view of the set of running XIDs: our entry is
612 	 * duplicate with the gxact that has already been inserted into the
613 	 * ProcArray.
614 	 */
615 	pgxact->xid = InvalidTransactionId;
616 	proc->lxid = InvalidLocalTransactionId;
617 	pgxact->xmin = InvalidTransactionId;
618 	proc->recoveryConflictPending = false;
619 
620 	/* redundant, but just in case */
621 	pgxact->vacuumFlags &= ~PROC_VACUUM_STATE_MASK;
622 	pgxact->delayChkpt = false;
623 
624 	/* Clear the subtransaction-XID cache too */
625 	pgxact->nxids = 0;
626 	pgxact->overflowed = false;
627 }
628 
629 /*
630  * ProcArrayInitRecovery -- initialize recovery xid mgmt environment
631  *
632  * Remember up to where the startup process initialized the CLOG and subtrans
633  * so we can ensure it's initialized gaplessly up to the point where necessary
634  * while in recovery.
635  */
636 void
637 ProcArrayInitRecovery(TransactionId initializedUptoXID)
638 {
639 	Assert(standbyState == STANDBY_INITIALIZED);
640 	Assert(TransactionIdIsNormal(initializedUptoXID));
641 
642 	/*
643 	 * we set latestObservedXid to the xid SUBTRANS has been initialized up
644 	 * to, so we can extend it from that point onwards in
645 	 * RecordKnownAssignedTransactionIds, and when we get consistent in
646 	 * ProcArrayApplyRecoveryInfo().
647 	 */
648 	latestObservedXid = initializedUptoXID;
649 	TransactionIdRetreat(latestObservedXid);
650 }
651 
652 /*
653  * ProcArrayApplyRecoveryInfo -- apply recovery info about xids
654  *
655  * Takes us through 3 states: Initialized, Pending and Ready.
656  * Normal case is to go all the way to Ready straight away, though there
657  * are atypical cases where we need to take it in steps.
658  *
659  * Use the data about running transactions on master to create the initial
660  * state of KnownAssignedXids. We also use these records to regularly prune
661  * KnownAssignedXids because we know it is possible that some transactions
662  * with FATAL errors fail to write abort records, which could cause eventual
r_postlude(struct SN_env * z)663  * overflow.
664  *
665  * See comments for LogStandbySnapshot().
666  */
667 void
668 ProcArrayApplyRecoveryInfo(RunningTransactions running)
669 {
670 	TransactionId *xids;
671 	int			nxids;
672 	TransactionId nextXid;
673 	int			i;
674 
675 	Assert(standbyState >= STANDBY_INITIALIZED);
676 	Assert(TransactionIdIsValid(running->nextXid));
677 	Assert(TransactionIdIsValid(running->oldestRunningXid));
678 	Assert(TransactionIdIsNormal(running->latestCompletedXid));
679 
680 	/*
681 	 * Remove stale transactions, if any.
682 	 */
683 	ExpireOldKnownAssignedTransactionIds(running->oldestRunningXid);
684 
685 	/*
686 	 * Remove stale locks, if any.
687 	 */
688 	StandbyReleaseOldLocks(running->oldestRunningXid);
689 
690 	/*
691 	 * If our snapshot is already valid, nothing else to do...
692 	 */
693 	if (standbyState == STANDBY_SNAPSHOT_READY)
694 		return;
695 
696 	/*
697 	 * If our initial RunningTransactionsData had an overflowed snapshot then
698 	 * we knew we were missing some subxids from our snapshot. If we continue
699 	 * to see overflowed snapshots then we might never be able to start up, so
700 	 * we make another test to see if our snapshot is now valid. We know that
701 	 * the missing subxids are equal to or earlier than nextXid. After we
702 	 * initialise we continue to apply changes during recovery, so once the
703 	 * oldestRunningXid is later than the nextXid from the initial snapshot we
704 	 * know that we no longer have missing information and can mark the
705 	 * snapshot as valid.
706 	 */
707 	if (standbyState == STANDBY_SNAPSHOT_PENDING)
708 	{
709 		/*
710 		 * If the snapshot isn't overflowed or if its empty we can reset our
711 		 * pending state and use this snapshot instead.
712 		 */
713 		if (!running->subxid_overflow || running->xcnt == 0)
714 		{
715 			/*
716 			 * If we have already collected known assigned xids, we need to
717 			 * throw them away before we apply the recovery snapshot.
718 			 */
719 			KnownAssignedXidsReset();
720 			standbyState = STANDBY_INITIALIZED;
721 		}
722 		else
723 		{
724 			if (TransactionIdPrecedes(standbySnapshotPendingXmin,
725 									  running->oldestRunningXid))
726 			{
727 				standbyState = STANDBY_SNAPSHOT_READY;
728 				elog(trace_recovery(DEBUG1),
729 					 "recovery snapshots are now enabled");
730 			}
731 			else
732 				elog(trace_recovery(DEBUG1),
733 					 "recovery snapshot waiting for non-overflowed snapshot or "
734 					 "until oldest active xid on standby is at least %u (now %u)",
735 					 standbySnapshotPendingXmin,
736 					 running->oldestRunningXid);
737 			return;
738 		}
739 	}
740 
741 	Assert(standbyState == STANDBY_INITIALIZED);
742 
743 	/*
744 	 * OK, we need to initialise from the RunningTransactionsData record.
745 	 *
746 	 * NB: this can be reached at least twice, so make sure new code can deal
747 	 * with that.
748 	 */
749 
750 	/*
751 	 * Nobody else is running yet, but take locks anyhow
752 	 */
753 	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
754 
755 	/*
756 	 * KnownAssignedXids is sorted so we cannot just add the xids, we have to
757 	 * sort them first.
758 	 *
759 	 * Some of the new xids are top-level xids and some are subtransactions.
760 	 * We don't call SubtransSetParent because it doesn't matter yet. If we
761 	 * aren't overflowed then all xids will fit in snapshot and so we don't
762 	 * need subtrans. If we later overflow, an xid assignment record will add
763 	 * xids to subtrans. If RunningXacts is overflowed then we don't have
764 	 * enough information to correctly update subtrans anyway.
765 	 */
766 
767 	/*
768 	 * Allocate a temporary array to avoid modifying the array passed as
769 	 * argument.
770 	 */
771 	xids = palloc(sizeof(TransactionId) * (running->xcnt + running->subxcnt));
772 
773 	/*
774 	 * Add to the temp array any xids which have not already completed.
775 	 */
776 	nxids = 0;
777 	for (i = 0; i < running->xcnt + running->subxcnt; i++)
778 	{
779 		TransactionId xid = running->xids[i];
780 
781 		/*
782 		 * The running-xacts snapshot can contain xids that were still visible
783 		 * in the procarray when the snapshot was taken, but were already
784 		 * WAL-logged as completed. They're not running anymore, so ignore
785 		 * them.
786 		 */
787 		if (TransactionIdDidCommit(xid) || TransactionIdDidAbort(xid))
788 			continue;
789 
790 		xids[nxids++] = xid;
791 	}
792 
793 	if (nxids > 0)
794 	{
795 		if (procArray->numKnownAssignedXids != 0)
796 		{
797 			LWLockRelease(ProcArrayLock);
798 			elog(ERROR, "KnownAssignedXids is not empty");
799 		}
800 
801 		/*
802 		 * Sort the array so that we can add them safely into
803 		 * KnownAssignedXids.
804 		 */
805 		qsort(xids, nxids, sizeof(TransactionId), xidComparator);
806 
807 		/*
808 		 * Add the sorted snapshot into KnownAssignedXids.  The running-xacts
809 		 * snapshot may include duplicated xids because of prepared
810 		 * transactions, so ignore them.
811 		 */
812 		for (i = 0; i < nxids; i++)
813 		{
814 			if (i > 0 && TransactionIdEquals(xids[i - 1], xids[i]))
815 			{
816 				elog(DEBUG1,
817 					 "found duplicated transaction %u for KnownAssignedXids insertion",
818 					 xids[i]);
819 				continue;
820 			}
821 			KnownAssignedXidsAdd(xids[i], xids[i], true);
822 		}
823 
824 		KnownAssignedXidsDisplay(trace_recovery(DEBUG3));
825 	}
826 
827 	pfree(xids);
828 
829 	/*
830 	 * latestObservedXid is at least set to the point where SUBTRANS was
831 	 * started up to (cf. ProcArrayInitRecovery()) or to the biggest xid
832 	 * RecordKnownAssignedTransactionIds() was called for.  Initialize
833 	 * subtrans from thereon, up to nextXid - 1.
834 	 *
835 	 * We need to duplicate parts of RecordKnownAssignedTransactionId() here,
836 	 * because we've just added xids to the known assigned xids machinery that
837 	 * haven't gone through RecordKnownAssignedTransactionId().
838 	 */
839 	Assert(TransactionIdIsNormal(latestObservedXid));
840 	TransactionIdAdvance(latestObservedXid);
841 	while (TransactionIdPrecedes(latestObservedXid, running->nextXid))
842 	{
843 		ExtendSUBTRANS(latestObservedXid);
844 		TransactionIdAdvance(latestObservedXid);
845 	}
846 	TransactionIdRetreat(latestObservedXid);	/* = running->nextXid - 1 */
847 
848 	/* ----------
849 	 * Now we've got the running xids we need to set the global values that
850 	 * are used to track snapshots as they evolve further.
851 	 *
852 	 * - latestCompletedXid which will be the xmax for snapshots
853 	 * - lastOverflowedXid which shows whether snapshots overflow
854 	 * - nextXid
855 	 *
856 	 * If the snapshot overflowed, then we still initialise with what we know,
857 	 * but the recovery snapshot isn't fully valid yet because we know there
858 	 * are some subxids missing. We don't know the specific subxids that are
859 	 * missing, so conservatively assume the last one is latestObservedXid.
860 	 * ----------
861 	 */
862 	if (running->subxid_overflow)
863 	{
864 		standbyState = STANDBY_SNAPSHOT_PENDING;
865 
866 		standbySnapshotPendingXmin = latestObservedXid;
867 		procArray->lastOverflowedXid = latestObservedXid;
868 	}
869 	else
870 	{
871 		standbyState = STANDBY_SNAPSHOT_READY;
872 
873 		standbySnapshotPendingXmin = InvalidTransactionId;
874 	}
875 
876 	/*
877 	 * If a transaction wrote a commit record in the gap between taking and
878 	 * logging the snapshot then latestCompletedXid may already be higher than
879 	 * the value from the snapshot, so check before we use the incoming value.
880 	 */
881 	if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid,
882 							  running->latestCompletedXid))
883 		ShmemVariableCache->latestCompletedXid = running->latestCompletedXid;
884 
885 	Assert(TransactionIdIsNormal(ShmemVariableCache->latestCompletedXid));
886 
887 	LWLockRelease(ProcArrayLock);
888 
889 	/*
890 	 * ShmemVariableCache->nextXid must be beyond any observed xid.
891 	 *
892 	 * We don't expect anyone else to modify nextXid, hence we don't need to
893 	 * hold a lock while examining it.  We still acquire the lock to modify
894 	 * it, though.
895 	 */
896 	nextXid = latestObservedXid;
897 	TransactionIdAdvance(nextXid);
898 	if (TransactionIdFollows(nextXid, ShmemVariableCache->nextXid))
899 	{
900 		LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
901 		ShmemVariableCache->nextXid = nextXid;
902 		LWLockRelease(XidGenLock);
903 	}
904 
905 	Assert(TransactionIdIsValid(ShmemVariableCache->nextXid));
906 
907 	KnownAssignedXidsDisplay(trace_recovery(DEBUG3));
908 	if (standbyState == STANDBY_SNAPSHOT_READY)
909 		elog(trace_recovery(DEBUG1), "recovery snapshots are now enabled");
910 	else
911 		elog(trace_recovery(DEBUG1),
912 			 "recovery snapshot waiting for non-overflowed snapshot or "
913 			 "until oldest active xid on standby is at least %u (now %u)",
914 			 standbySnapshotPendingXmin,
915 			 running->oldestRunningXid);
916 }
917 
918 /*
919  * ProcArrayApplyXidAssignment
920  *		Process an XLOG_XACT_ASSIGNMENT WAL record
921  */
922 void
923 ProcArrayApplyXidAssignment(TransactionId topxid,
924 							int nsubxids, TransactionId *subxids)
romanian_ISO_8859_2_stem(struct SN_env * z)925 {
926 	TransactionId max_xid;
927 	int			i;
928 
929 	Assert(standbyState >= STANDBY_INITIALIZED);
930 
931 	max_xid = TransactionIdLatest(topxid, nsubxids, subxids);
932 
933 	/*
934 	 * Mark all the subtransactions as observed.
935 	 *
936 	 * NOTE: This will fail if the subxid contains too many previously
937 	 * unobserved xids to fit into known-assigned-xids. That shouldn't happen
938 	 * as the code stands, because xid-assignment records should never contain
939 	 * more than PGPROC_MAX_CACHED_SUBXIDS entries.
940 	 */
941 	RecordKnownAssignedTransactionIds(max_xid);
942 
943 	/*
944 	 * Notice that we update pg_subtrans with the top-level xid, rather than
945 	 * the parent xid. This is a difference between normal processing and
946 	 * recovery, yet is still correct in all cases. The reason is that
947 	 * subtransaction commit is not marked in clog until commit processing, so
948 	 * all aborted subtransactions have already been clearly marked in clog.
949 	 * As a result we are able to refer directly to the top-level
950 	 * transaction's state rather than skipping through all the intermediate
951 	 * states in the subtransaction tree. This should be the first time we
952 	 * have attempted to SubTransSetParent().
953 	 */
954 	for (i = 0; i < nsubxids; i++)
955 		SubTransSetParent(subxids[i], topxid);
956 
957 	/* KnownAssignedXids isn't maintained yet, so we're done for now */
958 	if (standbyState == STANDBY_INITIALIZED)
959 		return;
960 
961 	/*
962 	 * Uses same locking as transaction commit
963 	 */
964 	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
965 
966 	/*
967 	 * Remove subxids from known-assigned-xacts.
968 	 */
969 	KnownAssignedXidsRemoveTree(InvalidTransactionId, nsubxids, subxids);
970 
971 	/*
972 	 * Advance lastOverflowedXid to be at least the last of these subxids.
973 	 */
974 	if (TransactionIdPrecedes(procArray->lastOverflowedXid, max_xid))
975 		procArray->lastOverflowedXid = max_xid;
976 
977 	LWLockRelease(ProcArrayLock);
978 }
979 
980 /*
981  * TransactionIdIsInProgress -- is given transaction running in some backend
982  *
983  * Aside from some shortcuts such as checking RecentXmin and our own Xid,
984  * there are four possibilities for finding a running transaction:
985  *
986  * 1. The given Xid is a main transaction Id.  We will find this out cheaply
987  * by looking at the PGXACT struct for each backend.
988  *
989  * 2. The given Xid is one of the cached subxact Xids in the PGPROC array.
990  * We can find this out cheaply too.
991  *
992  * 3. In Hot Standby mode, we must search the KnownAssignedXids list to see
993  * if the Xid is running on the master.
994  *
romanian_ISO_8859_2_create_env(void)995  * 4. Search the SubTrans tree to find the Xid's topmost parent, and then see
996  * if that is running according to PGXACT or KnownAssignedXids.  This is the
997  * slowest way, but sadly it has to be done always if the others failed,
998  * unless we see that the cached subxact sets are complete (none have
999  * overflowed).
1000  *
1001  * ProcArrayLock has to be held while we do 1, 2, 3.  If we save the top Xids
1002  * while doing 1 and 3, we can release the ProcArrayLock while we do 4.
1003  * This buys back some concurrency (and we can't retrieve the main Xids from
1004  * PGXACT again anyway; see GetNewTransactionId).
1005  */
1006 bool
1007 TransactionIdIsInProgress(TransactionId xid)
1008 {
1009 	static TransactionId *xids = NULL;
1010 	int			nxids = 0;
1011 	ProcArrayStruct *arrayP = procArray;
1012 	TransactionId topxid;
1013 	int			i,
1014 				j;
1015 
1016 	/*
1017 	 * Don't bother checking a transaction older than RecentXmin; it could not
1018 	 * possibly still be running.  (Note: in particular, this guarantees that
1019 	 * we reject InvalidTransactionId, FrozenTransactionId, etc as not
1020 	 * running.)
1021 	 */
1022 	if (TransactionIdPrecedes(xid, RecentXmin))
1023 	{
1024 		xc_by_recent_xmin_inc();
1025 		return false;
1026 	}
1027 
1028 	/*
1029 	 * We may have just checked the status of this transaction, so if it is
1030 	 * already known to be completed, we can fall out without any access to
1031 	 * shared memory.
1032 	 */
1033 	if (TransactionIdIsKnownCompleted(xid))
1034 	{
1035 		xc_by_known_xact_inc();
1036 		return false;
1037 	}
1038 
1039 	/*
1040 	 * Also, we can handle our own transaction (and subtransactions) without
1041 	 * any access to shared memory.
1042 	 */
1043 	if (TransactionIdIsCurrentTransactionId(xid))
1044 	{
1045 		xc_by_my_xact_inc();
1046 		return true;
1047 	}
1048 
1049 	/*
1050 	 * If first time through, get workspace to remember main XIDs in. We
1051 	 * malloc it permanently to avoid repeated palloc/pfree overhead.
1052 	 */
1053 	if (xids == NULL)
1054 	{
1055 		/*
1056 		 * In hot standby mode, reserve enough space to hold all xids in the
1057 		 * known-assigned list. If we later finish recovery, we no longer need
1058 		 * the bigger array, but we don't bother to shrink it.
1059 		 */
1060 		int			maxxids = RecoveryInProgress() ? TOTAL_MAX_CACHED_SUBXIDS : arrayP->maxProcs;
1061 
1062 		xids = (TransactionId *) malloc(maxxids * sizeof(TransactionId));
1063 		if (xids == NULL)
1064 			ereport(ERROR,
1065 					(errcode(ERRCODE_OUT_OF_MEMORY),
1066 					 errmsg("out of memory")));
1067 	}
1068 
1069 	LWLockAcquire(ProcArrayLock, LW_SHARED);
1070 
1071 	/*
1072 	 * Now that we have the lock, we can check latestCompletedXid; if the
1073 	 * target Xid is after that, it's surely still running.
1074 	 */
1075 	if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid, xid))
1076 	{
1077 		LWLockRelease(ProcArrayLock);
1078 		xc_by_latest_xid_inc();
1079 		return true;
1080 	}
1081 
1082 	/* No shortcuts, gotta grovel through the array */
1083 	for (i = 0; i < arrayP->numProcs; i++)
1084 	{
1085 		int			pgprocno = arrayP->pgprocnos[i];
1086 		volatile PGPROC *proc = &allProcs[pgprocno];
1087 		volatile PGXACT *pgxact = &allPgXact[pgprocno];
1088 		TransactionId pxid;
1089 
1090 		/* Ignore my own proc --- dealt with it above */
1091 		if (proc == MyProc)
1092 			continue;
1093 
1094 		/* Fetch xid just once - see GetNewTransactionId */
1095 		pxid = pgxact->xid;
1096 
1097 		if (!TransactionIdIsValid(pxid))
1098 			continue;
1099 
1100 		/*
1101 		 * Step 1: check the main Xid
1102 		 */
1103 		if (TransactionIdEquals(pxid, xid))
1104 		{
1105 			LWLockRelease(ProcArrayLock);
1106 			xc_by_main_xid_inc();
1107 			return true;
1108 		}
1109 
1110 		/*
1111 		 * We can ignore main Xids that are younger than the target Xid, since
1112 		 * the target could not possibly be their child.
1113 		 */
1114 		if (TransactionIdPrecedes(xid, pxid))
1115 			continue;
1116 
1117 		/*
1118 		 * Step 2: check the cached child-Xids arrays
1119 		 */
1120 		for (j = pgxact->nxids - 1; j >= 0; j--)
1121 		{
1122 			/* Fetch xid just once - see GetNewTransactionId */
1123 			TransactionId cxid = proc->subxids.xids[j];
1124 
1125 			if (TransactionIdEquals(cxid, xid))
1126 			{
1127 				LWLockRelease(ProcArrayLock);
1128 				xc_by_child_xid_inc();
1129 				return true;
1130 			}
1131 		}
1132 
1133 		/*
1134 		 * Save the main Xid for step 4.  We only need to remember main Xids
1135 		 * that have uncached children.  (Note: there is no race condition
1136 		 * here because the overflowed flag cannot be cleared, only set, while
1137 		 * we hold ProcArrayLock.  So we can't miss an Xid that we need to
1138 		 * worry about.)
1139 		 */
1140 		if (pgxact->overflowed)
1141 			xids[nxids++] = pxid;
1142 	}
1143 
1144 	/*
1145 	 * Step 3: in hot standby mode, check the known-assigned-xids list.  XIDs
1146 	 * in the list must be treated as running.
1147 	 */
1148 	if (RecoveryInProgress())
1149 	{
1150 		/* none of the PGXACT entries should have XIDs in hot standby mode */
1151 		Assert(nxids == 0);
1152 
1153 		if (KnownAssignedXidExists(xid))
1154 		{
1155 			LWLockRelease(ProcArrayLock);
1156 			xc_by_known_assigned_inc();
1157 			return true;
1158 		}
1159 
1160 		/*
1161 		 * If the KnownAssignedXids overflowed, we have to check pg_subtrans
1162 		 * too.  Fetch all xids from KnownAssignedXids that are lower than
1163 		 * xid, since if xid is a subtransaction its parent will always have a
1164 		 * lower value.  Note we will collect both main and subXIDs here, but
1165 		 * there's no help for it.
1166 		 */
1167 		if (TransactionIdPrecedesOrEquals(xid, procArray->lastOverflowedXid))
1168 			nxids = KnownAssignedXidsGet(xids, xid);
1169 	}
1170 
1171 	LWLockRelease(ProcArrayLock);
1172 
1173 	/*
1174 	 * If none of the relevant caches overflowed, we know the Xid is not
1175 	 * running without even looking at pg_subtrans.
1176 	 */
1177 	if (nxids == 0)
1178 	{
1179 		xc_no_overflow_inc();
1180 		return false;
1181 	}
1182 
1183 	/*
1184 	 * Step 4: have to check pg_subtrans.
1185 	 *
1186 	 * At this point, we know it's either a subtransaction of one of the Xids
1187 	 * in xids[], or it's not running.  If it's an already-failed
1188 	 * subtransaction, we want to say "not running" even though its parent may
1189 	 * still be running.  So first, check pg_xact to see if it's been aborted.
1190 	 */
1191 	xc_slow_answer_inc();
1192 
1193 	if (TransactionIdDidAbort(xid))
1194 		return false;
1195 
1196 	/*
1197 	 * It isn't aborted, so check whether the transaction tree it belongs to
1198 	 * is still running (or, more precisely, whether it was running when we
1199 	 * held ProcArrayLock).
1200 	 */
1201 	topxid = SubTransGetTopmostTransaction(xid);
1202 	Assert(TransactionIdIsValid(topxid));
1203 	if (!TransactionIdEquals(topxid, xid))
1204 	{
1205 		for (i = 0; i < nxids; i++)
1206 		{
1207 			if (TransactionIdEquals(xids[i], topxid))
1208 				return true;
1209 		}
1210 	}
1211 
1212 	return false;
1213 }
1214 
1215 /*
1216  * TransactionIdIsActive -- is xid the top-level XID of an active backend?
1217  *
1218  * This differs from TransactionIdIsInProgress in that it ignores prepared
1219  * transactions, as well as transactions running on the master if we're in
1220  * hot standby.  Also, we ignore subtransactions since that's not needed
1221  * for current uses.
1222  */
1223 bool
1224 TransactionIdIsActive(TransactionId xid)
1225 {
1226 	bool		result = false;
1227 	ProcArrayStruct *arrayP = procArray;
1228 	int			i;
1229 
1230 	/*
1231 	 * Don't bother checking a transaction older than RecentXmin; it could not
1232 	 * possibly still be running.
1233 	 */
1234 	if (TransactionIdPrecedes(xid, RecentXmin))
1235 		return false;
1236 
1237 	LWLockAcquire(ProcArrayLock, LW_SHARED);
1238 
1239 	for (i = 0; i < arrayP->numProcs; i++)
1240 	{
1241 		int			pgprocno = arrayP->pgprocnos[i];
1242 		volatile PGPROC *proc = &allProcs[pgprocno];
1243 		volatile PGXACT *pgxact = &allPgXact[pgprocno];
1244 		TransactionId pxid;
1245 
1246 		/* Fetch xid just once - see GetNewTransactionId */
1247 		pxid = pgxact->xid;
1248 
1249 		if (!TransactionIdIsValid(pxid))
1250 			continue;
1251 
1252 		if (proc->pid == 0)
1253 			continue;			/* ignore prepared transactions */
1254 
1255 		if (TransactionIdEquals(pxid, xid))
1256 		{
1257 			result = true;
1258 			break;
1259 		}
1260 	}
1261 
1262 	LWLockRelease(ProcArrayLock);
1263 
1264 	return result;
1265 }
1266 
1267 
1268 /*
1269  * GetOldestXmin -- returns oldest transaction that was running
1270  *					when any current transaction was started.
1271  *
1272  * If rel is NULL or a shared relation, all backends are considered, otherwise
1273  * only backends running in this database are considered.
1274  *
1275  * The flags are used to ignore the backends in calculation when any of the
1276  * corresponding flags is set. Typically, if you want to ignore ones with
1277  * PROC_IN_VACUUM flag, you can use PROCARRAY_FLAGS_VACUUM.
1278  *
1279  * PROCARRAY_SLOTS_XMIN causes GetOldestXmin to ignore the xmin and
1280  * catalog_xmin of any replication slots that exist in the system when
1281  * calculating the oldest xmin.
1282  *
1283  * This is used by VACUUM to decide which deleted tuples must be preserved in
1284  * the passed in table. For shared relations backends in all databases must be
1285  * considered, but for non-shared relations that's not required, since only
1286  * backends in my own database could ever see the tuples in them. Also, we can
1287  * ignore concurrently running lazy VACUUMs because (a) they must be working
1288  * on other tables, and (b) they don't need to do snapshot-based lookups.
1289  *
1290  * This is also used to determine where to truncate pg_subtrans.  For that
1291  * backends in all databases have to be considered, so rel = NULL has to be
1292  * passed in.
1293  *
1294  * Note: we include all currently running xids in the set of considered xids.
1295  * This ensures that if a just-started xact has not yet set its snapshot,
1296  * when it does set the snapshot it cannot set xmin less than what we compute.
1297  * See notes in src/backend/access/transam/README.
1298  *
1299  * Note: despite the above, it's possible for the calculated value to move
1300  * backwards on repeated calls. The calculated value is conservative, so that
1301  * anything older is definitely not considered as running by anyone anymore,
1302  * but the exact value calculated depends on a number of things. For example,
1303  * if rel = NULL and there are no transactions running in the current
1304  * database, GetOldestXmin() returns latestCompletedXid. If a transaction
1305  * begins after that, its xmin will include in-progress transactions in other
1306  * databases that started earlier, so another call will return a lower value.
1307  * Nonetheless it is safe to vacuum a table in the current database with the
1308  * first result.  There are also replication-related effects: a walsender
1309  * process can set its xmin based on transactions that are no longer running
1310  * in the master but are still being replayed on the standby, thus possibly
1311  * making the GetOldestXmin reading go backwards.  In this case there is a
1312  * possibility that we lose data that the standby would like to have, but
1313  * unless the standby uses a replication slot to make its xmin persistent
1314  * there is little we can do about that --- data is only protected if the
1315  * walsender runs continuously while queries are executed on the standby.
1316  * (The Hot Standby code deals with such cases by failing standby queries
1317  * that needed to access already-removed data, so there's no integrity bug.)
1318  * The return value is also adjusted with vacuum_defer_cleanup_age, so
1319  * increasing that setting on the fly is another easy way to make
1320  * GetOldestXmin() move backwards, with no consequences for data integrity.
1321  */
1322 TransactionId
1323 GetOldestXmin(Relation rel, int flags)
1324 {
1325 	ProcArrayStruct *arrayP = procArray;
1326 	TransactionId result;
1327 	int			index;
1328 	bool		allDbs;
1329 
1330 	volatile TransactionId replication_slot_xmin = InvalidTransactionId;
1331 	volatile TransactionId replication_slot_catalog_xmin = InvalidTransactionId;
1332 
1333 	/*
1334 	 * If we're not computing a relation specific limit, or if a shared
1335 	 * relation has been passed in, backends in all databases have to be
1336 	 * considered.
1337 	 */
1338 	allDbs = rel == NULL || rel->rd_rel->relisshared;
1339 
1340 	/* Cannot look for individual databases during recovery */
1341 	Assert(allDbs || !RecoveryInProgress());
1342 
1343 	LWLockAcquire(ProcArrayLock, LW_SHARED);
1344 
1345 	/*
1346 	 * We initialize the MIN() calculation with latestCompletedXid + 1. This
1347 	 * is a lower bound for the XIDs that might appear in the ProcArray later,
1348 	 * and so protects us against overestimating the result due to future
1349 	 * additions.
1350 	 */
1351 	result = ShmemVariableCache->latestCompletedXid;
1352 	Assert(TransactionIdIsNormal(result));
1353 	TransactionIdAdvance(result);
1354 
1355 	for (index = 0; index < arrayP->numProcs; index++)
1356 	{
1357 		int			pgprocno = arrayP->pgprocnos[index];
1358 		volatile PGPROC *proc = &allProcs[pgprocno];
1359 		volatile PGXACT *pgxact = &allPgXact[pgprocno];
1360 
1361 		if (pgxact->vacuumFlags & (flags & PROCARRAY_PROC_FLAGS_MASK))
1362 			continue;
1363 
1364 		if (allDbs ||
1365 			proc->databaseId == MyDatabaseId ||
1366 			proc->databaseId == 0)	/* always include WalSender */
1367 		{
1368 			/* Fetch xid just once - see GetNewTransactionId */
1369 			TransactionId xid = pgxact->xid;
1370 
1371 			/* First consider the transaction's own Xid, if any */
1372 			if (TransactionIdIsNormal(xid) &&
1373 				TransactionIdPrecedes(xid, result))
1374 				result = xid;
1375 
1376 			/*
1377 			 * Also consider the transaction's Xmin, if set.
1378 			 *
1379 			 * We must check both Xid and Xmin because a transaction might
1380 			 * have an Xmin but not (yet) an Xid; conversely, if it has an
1381 			 * Xid, that could determine some not-yet-set Xmin.
1382 			 */
1383 			xid = pgxact->xmin; /* Fetch just once */
1384 			if (TransactionIdIsNormal(xid) &&
1385 				TransactionIdPrecedes(xid, result))
1386 				result = xid;
1387 		}
1388 	}
1389 
1390 	/* fetch into volatile var while ProcArrayLock is held */
1391 	replication_slot_xmin = procArray->replication_slot_xmin;
1392 	replication_slot_catalog_xmin = procArray->replication_slot_catalog_xmin;
1393 
1394 	if (RecoveryInProgress())
1395 	{
1396 		/*
1397 		 * Check to see whether KnownAssignedXids contains an xid value older
1398 		 * than the main procarray.
1399 		 */
1400 		TransactionId kaxmin = KnownAssignedXidsGetOldestXmin();
1401 
1402 		LWLockRelease(ProcArrayLock);
1403 
1404 		if (TransactionIdIsNormal(kaxmin) &&
1405 			TransactionIdPrecedes(kaxmin, result))
1406 			result = kaxmin;
1407 	}
1408 	else
1409 	{
1410 		/*
1411 		 * No other information needed, so release the lock immediately.
1412 		 */
1413 		LWLockRelease(ProcArrayLock);
1414 
1415 		/*
1416 		 * Compute the cutoff XID by subtracting vacuum_defer_cleanup_age,
1417 		 * being careful not to generate a "permanent" XID.
1418 		 *
1419 		 * vacuum_defer_cleanup_age provides some additional "slop" for the
1420 		 * benefit of hot standby queries on standby servers.  This is quick
1421 		 * and dirty, and perhaps not all that useful unless the master has a
1422 		 * predictable transaction rate, but it offers some protection when
1423 		 * there's no walsender connection.  Note that we are assuming
1424 		 * vacuum_defer_cleanup_age isn't large enough to cause wraparound ---
1425 		 * so guc.c should limit it to no more than the xidStopLimit threshold
1426 		 * in varsup.c.  Also note that we intentionally don't apply
1427 		 * vacuum_defer_cleanup_age on standby servers.
1428 		 */
1429 		result -= vacuum_defer_cleanup_age;
1430 		if (!TransactionIdIsNormal(result))
1431 			result = FirstNormalTransactionId;
1432 	}
1433 
1434 	/*
1435 	 * Check whether there are replication slots requiring an older xmin.
1436 	 */
1437 	if (!(flags & PROCARRAY_SLOTS_XMIN) &&
1438 		TransactionIdIsValid(replication_slot_xmin) &&
1439 		NormalTransactionIdPrecedes(replication_slot_xmin, result))
1440 		result = replication_slot_xmin;
1441 
1442 	/*
1443 	 * After locks have been released and defer_cleanup_age has been applied,
1444 	 * check whether we need to back up further to make logical decoding
1445 	 * possible. We need to do so if we're computing the global limit (rel =
1446 	 * NULL) or if the passed relation is a catalog relation of some kind.
1447 	 */
1448 	if (!(flags & PROCARRAY_SLOTS_XMIN) &&
1449 		(rel == NULL ||
1450 		 RelationIsAccessibleInLogicalDecoding(rel)) &&
1451 		TransactionIdIsValid(replication_slot_catalog_xmin) &&
1452 		NormalTransactionIdPrecedes(replication_slot_catalog_xmin, result))
1453 		result = replication_slot_catalog_xmin;
1454 
1455 	return result;
1456 }
1457 
1458 /*
1459  * GetMaxSnapshotXidCount -- get max size for snapshot XID array
1460  *
1461  * We have to export this for use by snapmgr.c.
1462  */
1463 int
1464 GetMaxSnapshotXidCount(void)
1465 {
1466 	return procArray->maxProcs;
1467 }
1468 
1469 /*
1470  * GetMaxSnapshotSubxidCount -- get max size for snapshot sub-XID array
1471  *
1472  * We have to export this for use by snapmgr.c.
1473  */
1474 int
1475 GetMaxSnapshotSubxidCount(void)
1476 {
1477 	return TOTAL_MAX_CACHED_SUBXIDS;
1478 }
1479 
1480 /*
1481  * GetSnapshotData -- returns information about running transactions.
1482  *
1483  * The returned snapshot includes xmin (lowest still-running xact ID),
1484  * xmax (highest completed xact ID + 1), and a list of running xact IDs
1485  * in the range xmin <= xid < xmax.  It is used as follows:
1486  *		All xact IDs < xmin are considered finished.
1487  *		All xact IDs >= xmax are considered still running.
1488  *		For an xact ID xmin <= xid < xmax, consult list to see whether
1489  *		it is considered running or not.
1490  * This ensures that the set of transactions seen as "running" by the
1491  * current xact will not change after it takes the snapshot.
1492  *
1493  * All running top-level XIDs are included in the snapshot, except for lazy
1494  * VACUUM processes.  We also try to include running subtransaction XIDs,
1495  * but since PGPROC has only a limited cache area for subxact XIDs, full
1496  * information may not be available.  If we find any overflowed subxid arrays,
1497  * we have to mark the snapshot's subxid data as overflowed, and extra work
1498  * *may* need to be done to determine what's running (see XidInMVCCSnapshot()
1499  * in tqual.c).
1500  *
1501  * We also update the following backend-global variables:
1502  *		TransactionXmin: the oldest xmin of any snapshot in use in the
1503  *			current transaction (this is the same as MyPgXact->xmin).
1504  *		RecentXmin: the xmin computed for the most recent snapshot.  XIDs
1505  *			older than this are known not running any more.
1506  *		RecentGlobalXmin: the global xmin (oldest TransactionXmin across all
1507  *			running transactions, except those running LAZY VACUUM).  This is
1508  *			the same computation done by
1509  *			GetOldestXmin(NULL, PROCARRAY_FLAGS_VACUUM).
1510  *		RecentGlobalDataXmin: the global xmin for non-catalog tables
1511  *			>= RecentGlobalXmin
1512  *
1513  * Note: this function should probably not be called with an argument that's
1514  * not statically allocated (see xip allocation below).
1515  */
1516 Snapshot
1517 GetSnapshotData(Snapshot snapshot)
1518 {
1519 	ProcArrayStruct *arrayP = procArray;
1520 	TransactionId xmin;
1521 	TransactionId xmax;
1522 	TransactionId globalxmin;
1523 	int			index;
1524 	int			count = 0;
1525 	int			subcount = 0;
1526 	bool		suboverflowed = false;
1527 	volatile TransactionId replication_slot_xmin = InvalidTransactionId;
1528 	volatile TransactionId replication_slot_catalog_xmin = InvalidTransactionId;
1529 
1530 	Assert(snapshot != NULL);
1531 
1532 	/*
1533 	 * Allocating space for maxProcs xids is usually overkill; numProcs would
1534 	 * be sufficient.  But it seems better to do the malloc while not holding
1535 	 * the lock, so we can't look at numProcs.  Likewise, we allocate much
1536 	 * more subxip storage than is probably needed.
1537 	 *
1538 	 * This does open a possibility for avoiding repeated malloc/free: since
1539 	 * maxProcs does not change at runtime, we can simply reuse the previous
1540 	 * xip arrays if any.  (This relies on the fact that all callers pass
1541 	 * static SnapshotData structs.)
1542 	 */
1543 	if (snapshot->xip == NULL)
1544 	{
1545 		/*
1546 		 * First call for this snapshot. Snapshot is same size whether or not
1547 		 * we are in recovery, see later comments.
1548 		 */
1549 		snapshot->xip = (TransactionId *)
1550 			malloc(GetMaxSnapshotXidCount() * sizeof(TransactionId));
1551 		if (snapshot->xip == NULL)
1552 			ereport(ERROR,
1553 					(errcode(ERRCODE_OUT_OF_MEMORY),
1554 					 errmsg("out of memory")));
1555 		Assert(snapshot->subxip == NULL);
1556 		snapshot->subxip = (TransactionId *)
1557 			malloc(GetMaxSnapshotSubxidCount() * sizeof(TransactionId));
1558 		if (snapshot->subxip == NULL)
1559 			ereport(ERROR,
1560 					(errcode(ERRCODE_OUT_OF_MEMORY),
1561 					 errmsg("out of memory")));
1562 	}
1563 
1564 	/*
1565 	 * It is sufficient to get shared lock on ProcArrayLock, even if we are
1566 	 * going to set MyPgXact->xmin.
1567 	 */
1568 	LWLockAcquire(ProcArrayLock, LW_SHARED);
1569 
1570 	/* xmax is always latestCompletedXid + 1 */
1571 	xmax = ShmemVariableCache->latestCompletedXid;
1572 	Assert(TransactionIdIsNormal(xmax));
1573 	TransactionIdAdvance(xmax);
1574 
1575 	/* initialize xmin calculation with xmax */
1576 	globalxmin = xmin = xmax;
1577 
1578 	snapshot->takenDuringRecovery = RecoveryInProgress();
1579 
1580 	if (!snapshot->takenDuringRecovery)
1581 	{
1582 		int		   *pgprocnos = arrayP->pgprocnos;
1583 		int			numProcs;
1584 
1585 		/*
1586 		 * Spin over procArray checking xid, xmin, and subxids.  The goal is
1587 		 * to gather all active xids, find the lowest xmin, and try to record
1588 		 * subxids.
1589 		 */
1590 		numProcs = arrayP->numProcs;
1591 		for (index = 0; index < numProcs; index++)
1592 		{
1593 			int			pgprocno = pgprocnos[index];
1594 			volatile PGXACT *pgxact = &allPgXact[pgprocno];
1595 			TransactionId xid;
1596 
1597 			/*
1598 			 * Backend is doing logical decoding which manages xmin
1599 			 * separately, check below.
1600 			 */
1601 			if (pgxact->vacuumFlags & PROC_IN_LOGICAL_DECODING)
1602 				continue;
1603 
1604 			/* Ignore procs running LAZY VACUUM */
1605 			if (pgxact->vacuumFlags & PROC_IN_VACUUM)
1606 				continue;
1607 
1608 			/* Update globalxmin to be the smallest valid xmin */
1609 			xid = pgxact->xmin; /* fetch just once */
1610 			if (TransactionIdIsNormal(xid) &&
1611 				NormalTransactionIdPrecedes(xid, globalxmin))
1612 				globalxmin = xid;
1613 
1614 			/* Fetch xid just once - see GetNewTransactionId */
1615 			xid = pgxact->xid;
1616 
1617 			/*
1618 			 * If the transaction has no XID assigned, we can skip it; it
1619 			 * won't have sub-XIDs either.  If the XID is >= xmax, we can also
1620 			 * skip it; such transactions will be treated as running anyway
1621 			 * (and any sub-XIDs will also be >= xmax).
1622 			 */
1623 			if (!TransactionIdIsNormal(xid)
1624 				|| !NormalTransactionIdPrecedes(xid, xmax))
1625 				continue;
1626 
1627 			/*
1628 			 * We don't include our own XIDs (if any) in the snapshot, but we
1629 			 * must include them in xmin.
1630 			 */
1631 			if (NormalTransactionIdPrecedes(xid, xmin))
1632 				xmin = xid;
1633 			if (pgxact == MyPgXact)
1634 				continue;
1635 
1636 			/* Add XID to snapshot. */
1637 			snapshot->xip[count++] = xid;
1638 
1639 			/*
1640 			 * Save subtransaction XIDs if possible (if we've already
1641 			 * overflowed, there's no point).  Note that the subxact XIDs must
1642 			 * be later than their parent, so no need to check them against
1643 			 * xmin.  We could filter against xmax, but it seems better not to
1644 			 * do that much work while holding the ProcArrayLock.
1645 			 *
1646 			 * The other backend can add more subxids concurrently, but cannot
1647 			 * remove any.  Hence it's important to fetch nxids just once.
1648 			 * Should be safe to use memcpy, though.  (We needn't worry about
1649 			 * missing any xids added concurrently, because they must postdate
1650 			 * xmax.)
1651 			 *
1652 			 * Again, our own XIDs are not included in the snapshot.
1653 			 */
1654 			if (!suboverflowed)
1655 			{
1656 				if (pgxact->overflowed)
1657 					suboverflowed = true;
1658 				else
1659 				{
1660 					int			nxids = pgxact->nxids;
1661 
1662 					if (nxids > 0)
1663 					{
1664 						volatile PGPROC *proc = &allProcs[pgprocno];
1665 
1666 						memcpy(snapshot->subxip + subcount,
1667 							   (void *) proc->subxids.xids,
1668 							   nxids * sizeof(TransactionId));
1669 						subcount += nxids;
1670 					}
1671 				}
1672 			}
1673 		}
1674 	}
1675 	else
1676 	{
1677 		/*
1678 		 * We're in hot standby, so get XIDs from KnownAssignedXids.
1679 		 *
1680 		 * We store all xids directly into subxip[]. Here's why:
1681 		 *
1682 		 * In recovery we don't know which xids are top-level and which are
1683 		 * subxacts, a design choice that greatly simplifies xid processing.
1684 		 *
1685 		 * It seems like we would want to try to put xids into xip[] only, but
1686 		 * that is fairly small. We would either need to make that bigger or
1687 		 * to increase the rate at which we WAL-log xid assignment; neither is
1688 		 * an appealing choice.
1689 		 *
1690 		 * We could try to store xids into xip[] first and then into subxip[]
1691 		 * if there are too many xids. That only works if the snapshot doesn't
1692 		 * overflow because we do not search subxip[] in that case. A simpler
1693 		 * way is to just store all xids in the subxact array because this is
1694 		 * by far the bigger array. We just leave the xip array empty.
1695 		 *
1696 		 * Either way we need to change the way XidInMVCCSnapshot() works
1697 		 * depending upon when the snapshot was taken, or change normal
1698 		 * snapshot processing so it matches.
1699 		 *
1700 		 * Note: It is possible for recovery to end before we finish taking
1701 		 * the snapshot, and for newly assigned transaction ids to be added to
1702 		 * the ProcArray.  xmax cannot change while we hold ProcArrayLock, so
1703 		 * those newly added transaction ids would be filtered away, so we
1704 		 * need not be concerned about them.
1705 		 */
1706 		subcount = KnownAssignedXidsGetAndSetXmin(snapshot->subxip, &xmin,
1707 												  xmax);
1708 
1709 		if (TransactionIdPrecedesOrEquals(xmin, procArray->lastOverflowedXid))
1710 			suboverflowed = true;
1711 	}
1712 
1713 
1714 	/* fetch into volatile var while ProcArrayLock is held */
1715 	replication_slot_xmin = procArray->replication_slot_xmin;
1716 	replication_slot_catalog_xmin = procArray->replication_slot_catalog_xmin;
1717 
1718 	if (!TransactionIdIsValid(MyPgXact->xmin))
1719 		MyPgXact->xmin = TransactionXmin = xmin;
1720 
1721 	LWLockRelease(ProcArrayLock);
1722 
1723 	/*
1724 	 * Update globalxmin to include actual process xids.  This is a slightly
1725 	 * different way of computing it than GetOldestXmin uses, but should give
1726 	 * the same result.
1727 	 */
1728 	if (TransactionIdPrecedes(xmin, globalxmin))
1729 		globalxmin = xmin;
1730 
1731 	/* Update global variables too */
1732 	RecentGlobalXmin = globalxmin - vacuum_defer_cleanup_age;
1733 	if (!TransactionIdIsNormal(RecentGlobalXmin))
1734 		RecentGlobalXmin = FirstNormalTransactionId;
1735 
1736 	/* Check whether there's a replication slot requiring an older xmin. */
1737 	if (TransactionIdIsValid(replication_slot_xmin) &&
1738 		NormalTransactionIdPrecedes(replication_slot_xmin, RecentGlobalXmin))
1739 		RecentGlobalXmin = replication_slot_xmin;
1740 
1741 	/* Non-catalog tables can be vacuumed if older than this xid */
1742 	RecentGlobalDataXmin = RecentGlobalXmin;
1743 
1744 	/*
1745 	 * Check whether there's a replication slot requiring an older catalog
1746 	 * xmin.
1747 	 */
1748 	if (TransactionIdIsNormal(replication_slot_catalog_xmin) &&
1749 		NormalTransactionIdPrecedes(replication_slot_catalog_xmin, RecentGlobalXmin))
1750 		RecentGlobalXmin = replication_slot_catalog_xmin;
1751 
1752 	RecentXmin = xmin;
1753 
1754 	snapshot->xmin = xmin;
1755 	snapshot->xmax = xmax;
1756 	snapshot->xcnt = count;
1757 	snapshot->subxcnt = subcount;
1758 	snapshot->suboverflowed = suboverflowed;
1759 
1760 	snapshot->curcid = GetCurrentCommandId(false);
1761 
1762 	/*
1763 	 * This is a new snapshot, so set both refcounts are zero, and mark it as
1764 	 * not copied in persistent memory.
1765 	 */
1766 	snapshot->active_count = 0;
1767 	snapshot->regd_count = 0;
1768 	snapshot->copied = false;
1769 
1770 	if (old_snapshot_threshold < 0)
1771 	{
1772 		/*
1773 		 * If not using "snapshot too old" feature, fill related fields with
1774 		 * dummy values that don't require any locking.
1775 		 */
1776 		snapshot->lsn = InvalidXLogRecPtr;
1777 		snapshot->whenTaken = 0;
1778 	}
1779 	else
1780 	{
1781 		/*
1782 		 * Capture the current time and WAL stream location in case this
1783 		 * snapshot becomes old enough to need to fall back on the special
1784 		 * "old snapshot" logic.
1785 		 */
1786 		snapshot->lsn = GetXLogInsertRecPtr();
1787 		snapshot->whenTaken = GetSnapshotCurrentTimestamp();
1788 		MaintainOldSnapshotTimeMapping(snapshot->whenTaken, xmin);
1789 	}
1790 
1791 	return snapshot;
1792 }
1793 
1794 /*
1795  * ProcArrayInstallImportedXmin -- install imported xmin into MyPgXact->xmin
1796  *
1797  * This is called when installing a snapshot imported from another
1798  * transaction.  To ensure that OldestXmin doesn't go backwards, we must
1799  * check that the source transaction is still running, and we'd better do
1800  * that atomically with installing the new xmin.
1801  *
1802  * Returns true if successful, false if source xact is no longer running.
1803  */
1804 bool
1805 ProcArrayInstallImportedXmin(TransactionId xmin,
1806 							 VirtualTransactionId *sourcevxid)
1807 {
1808 	bool		result = false;
1809 	ProcArrayStruct *arrayP = procArray;
1810 	int			index;
1811 
1812 	Assert(TransactionIdIsNormal(xmin));
1813 	if (!sourcevxid)
1814 		return false;
1815 
1816 	/* Get lock so source xact can't end while we're doing this */
1817 	LWLockAcquire(ProcArrayLock, LW_SHARED);
1818 
1819 	for (index = 0; index < arrayP->numProcs; index++)
1820 	{
1821 		int			pgprocno = arrayP->pgprocnos[index];
1822 		volatile PGPROC *proc = &allProcs[pgprocno];
1823 		volatile PGXACT *pgxact = &allPgXact[pgprocno];
1824 		TransactionId xid;
1825 
1826 		/* Ignore procs running LAZY VACUUM */
1827 		if (pgxact->vacuumFlags & PROC_IN_VACUUM)
1828 			continue;
1829 
1830 		/* We are only interested in the specific virtual transaction. */
1831 		if (proc->backendId != sourcevxid->backendId)
1832 			continue;
1833 		if (proc->lxid != sourcevxid->localTransactionId)
1834 			continue;
1835 
1836 		/*
1837 		 * We check the transaction's database ID for paranoia's sake: if it's
1838 		 * in another DB then its xmin does not cover us.  Caller should have
1839 		 * detected this already, so we just treat any funny cases as
1840 		 * "transaction not found".
1841 		 */
1842 		if (proc->databaseId != MyDatabaseId)
1843 			continue;
1844 
1845 		/*
1846 		 * Likewise, let's just make real sure its xmin does cover us.
1847 		 */
1848 		xid = pgxact->xmin;		/* fetch just once */
1849 		if (!TransactionIdIsNormal(xid) ||
1850 			!TransactionIdPrecedesOrEquals(xid, xmin))
1851 			continue;
1852 
1853 		/*
1854 		 * We're good.  Install the new xmin.  As in GetSnapshotData, set
1855 		 * TransactionXmin too.  (Note that because snapmgr.c called
1856 		 * GetSnapshotData first, we'll be overwriting a valid xmin here, so
1857 		 * we don't check that.)
1858 		 */
1859 		MyPgXact->xmin = TransactionXmin = xmin;
1860 
1861 		result = true;
1862 		break;
1863 	}
1864 
1865 	LWLockRelease(ProcArrayLock);
1866 
1867 	return result;
1868 }
1869 
1870 /*
1871  * ProcArrayInstallRestoredXmin -- install restored xmin into MyPgXact->xmin
1872  *
1873  * This is like ProcArrayInstallImportedXmin, but we have a pointer to the
1874  * PGPROC of the transaction from which we imported the snapshot, rather than
1875  * an XID.
1876  *
1877  * Returns true if successful, false if source xact is no longer running.
1878  */
1879 bool
1880 ProcArrayInstallRestoredXmin(TransactionId xmin, PGPROC *proc)
1881 {
1882 	bool		result = false;
1883 	TransactionId xid;
1884 	volatile PGXACT *pgxact;
1885 
1886 	Assert(TransactionIdIsNormal(xmin));
1887 	Assert(proc != NULL);
1888 
1889 	/* Get lock so source xact can't end while we're doing this */
1890 	LWLockAcquire(ProcArrayLock, LW_SHARED);
1891 
1892 	pgxact = &allPgXact[proc->pgprocno];
1893 
1894 	/*
1895 	 * Be certain that the referenced PGPROC has an advertised xmin which is
1896 	 * no later than the one we're installing, so that the system-wide xmin
1897 	 * can't go backwards.  Also, make sure it's running in the same database,
1898 	 * so that the per-database xmin cannot go backwards.
1899 	 */
1900 	xid = pgxact->xmin;			/* fetch just once */
1901 	if (proc->databaseId == MyDatabaseId &&
1902 		TransactionIdIsNormal(xid) &&
1903 		TransactionIdPrecedesOrEquals(xid, xmin))
1904 	{
1905 		MyPgXact->xmin = TransactionXmin = xmin;
1906 		result = true;
1907 	}
1908 
1909 	LWLockRelease(ProcArrayLock);
1910 
1911 	return result;
1912 }
1913 
1914 /*
1915  * GetRunningTransactionData -- returns information about running transactions.
1916  *
1917  * Similar to GetSnapshotData but returns more information. We include
1918  * all PGXACTs with an assigned TransactionId, even VACUUM processes and
1919  * prepared transactions.
1920  *
1921  * We acquire XidGenLock and ProcArrayLock, but the caller is responsible for
1922  * releasing them. Acquiring XidGenLock ensures that no new XIDs enter the proc
1923  * array until the caller has WAL-logged this snapshot, and releases the
1924  * lock. Acquiring ProcArrayLock ensures that no transactions commit until the
1925  * lock is released.
1926  *
1927  * The returned data structure is statically allocated; caller should not
1928  * modify it, and must not assume it is valid past the next call.
1929  *
1930  * This is never executed during recovery so there is no need to look at
1931  * KnownAssignedXids.
1932  *
1933  * Dummy PGXACTs from prepared transaction are included, meaning that this
1934  * may return entries with duplicated TransactionId values coming from
1935  * transaction finishing to prepare.  Nothing is done about duplicated
1936  * entries here to not hold on ProcArrayLock more than necessary.
1937  *
1938  * We don't worry about updating other counters, we want to keep this as
1939  * simple as possible and leave GetSnapshotData() as the primary code for
1940  * that bookkeeping.
1941  *
1942  * Note that if any transaction has overflowed its cached subtransactions
1943  * then there is no real need include any subtransactions.
1944  */
1945 RunningTransactions
1946 GetRunningTransactionData(void)
1947 {
1948 	/* result workspace */
1949 	static RunningTransactionsData CurrentRunningXactsData;
1950 
1951 	ProcArrayStruct *arrayP = procArray;
1952 	RunningTransactions CurrentRunningXacts = &CurrentRunningXactsData;
1953 	TransactionId latestCompletedXid;
1954 	TransactionId oldestRunningXid;
1955 	TransactionId *xids;
1956 	int			index;
1957 	int			count;
1958 	int			subcount;
1959 	bool		suboverflowed;
1960 
1961 	Assert(!RecoveryInProgress());
1962 
1963 	/*
1964 	 * Allocating space for maxProcs xids is usually overkill; numProcs would
1965 	 * be sufficient.  But it seems better to do the malloc while not holding
1966 	 * the lock, so we can't look at numProcs.  Likewise, we allocate much
1967 	 * more subxip storage than is probably needed.
1968 	 *
1969 	 * Should only be allocated in bgwriter, since only ever executed during
1970 	 * checkpoints.
1971 	 */
1972 	if (CurrentRunningXacts->xids == NULL)
1973 	{
1974 		/*
1975 		 * First call
1976 		 */
1977 		CurrentRunningXacts->xids = (TransactionId *)
1978 			malloc(TOTAL_MAX_CACHED_SUBXIDS * sizeof(TransactionId));
1979 		if (CurrentRunningXacts->xids == NULL)
1980 			ereport(ERROR,
1981 					(errcode(ERRCODE_OUT_OF_MEMORY),
1982 					 errmsg("out of memory")));
1983 	}
1984 
1985 	xids = CurrentRunningXacts->xids;
1986 
1987 	count = subcount = 0;
1988 	suboverflowed = false;
1989 
1990 	/*
1991 	 * Ensure that no xids enter or leave the procarray while we obtain
1992 	 * snapshot.
1993 	 */
1994 	LWLockAcquire(ProcArrayLock, LW_SHARED);
1995 	LWLockAcquire(XidGenLock, LW_SHARED);
1996 
1997 	latestCompletedXid = ShmemVariableCache->latestCompletedXid;
1998 
1999 	oldestRunningXid = ShmemVariableCache->nextXid;
2000 
2001 	/*
2002 	 * Spin over procArray collecting all xids
2003 	 */
2004 	for (index = 0; index < arrayP->numProcs; index++)
2005 	{
2006 		int			pgprocno = arrayP->pgprocnos[index];
2007 		volatile PGXACT *pgxact = &allPgXact[pgprocno];
2008 		TransactionId xid;
2009 
2010 		/* Fetch xid just once - see GetNewTransactionId */
2011 		xid = pgxact->xid;
2012 
2013 		/*
2014 		 * We don't need to store transactions that don't have a TransactionId
2015 		 * yet because they will not show as running on a standby server.
2016 		 */
2017 		if (!TransactionIdIsValid(xid))
2018 			continue;
2019 
2020 		/*
2021 		 * Be careful not to exclude any xids before calculating the values of
2022 		 * oldestRunningXid and suboverflowed, since these are used to clean
2023 		 * up transaction information held on standbys.
2024 		 */
2025 		if (TransactionIdPrecedes(xid, oldestRunningXid))
2026 			oldestRunningXid = xid;
2027 
2028 		if (pgxact->overflowed)
2029 			suboverflowed = true;
2030 
2031 		/*
2032 		 * If we wished to exclude xids this would be the right place for it.
2033 		 * Procs with the PROC_IN_VACUUM flag set don't usually assign xids,
2034 		 * but they do during truncation at the end when they get the lock and
2035 		 * truncate, so it is not much of a problem to include them if they
2036 		 * are seen and it is cleaner to include them.
2037 		 */
2038 
2039 		xids[count++] = xid;
2040 	}
2041 
2042 	/*
2043 	 * Spin over procArray collecting all subxids, but only if there hasn't
2044 	 * been a suboverflow.
2045 	 */
2046 	if (!suboverflowed)
2047 	{
2048 		for (index = 0; index < arrayP->numProcs; index++)
2049 		{
2050 			int			pgprocno = arrayP->pgprocnos[index];
2051 			volatile PGPROC *proc = &allProcs[pgprocno];
2052 			volatile PGXACT *pgxact = &allPgXact[pgprocno];
2053 			int			nxids;
2054 
2055 			/*
2056 			 * Save subtransaction XIDs. Other backends can't add or remove
2057 			 * entries while we're holding XidGenLock.
2058 			 */
2059 			nxids = pgxact->nxids;
2060 			if (nxids > 0)
2061 			{
2062 				memcpy(&xids[count], (void *) proc->subxids.xids,
2063 					   nxids * sizeof(TransactionId));
2064 				count += nxids;
2065 				subcount += nxids;
2066 
2067 				/*
2068 				 * Top-level XID of a transaction is always less than any of
2069 				 * its subxids, so we don't need to check if any of the
2070 				 * subxids are smaller than oldestRunningXid
2071 				 */
2072 			}
2073 		}
2074 	}
2075 
2076 	/*
2077 	 * It's important *not* to include the limits set by slots here because
2078 	 * snapbuild.c uses oldestRunningXid to manage its xmin horizon. If those
2079 	 * were to be included here the initial value could never increase because
2080 	 * of a circular dependency where slots only increase their limits when
2081 	 * running xacts increases oldestRunningXid and running xacts only
2082 	 * increases if slots do.
2083 	 */
2084 
2085 	CurrentRunningXacts->xcnt = count - subcount;
2086 	CurrentRunningXacts->subxcnt = subcount;
2087 	CurrentRunningXacts->subxid_overflow = suboverflowed;
2088 	CurrentRunningXacts->nextXid = ShmemVariableCache->nextXid;
2089 	CurrentRunningXacts->oldestRunningXid = oldestRunningXid;
2090 	CurrentRunningXacts->latestCompletedXid = latestCompletedXid;
2091 
2092 	Assert(TransactionIdIsValid(CurrentRunningXacts->nextXid));
2093 	Assert(TransactionIdIsValid(CurrentRunningXacts->oldestRunningXid));
2094 	Assert(TransactionIdIsNormal(CurrentRunningXacts->latestCompletedXid));
2095 
2096 	/* We don't release the locks here, the caller is responsible for that */
2097 
2098 	return CurrentRunningXacts;
2099 }
2100 
2101 /*
2102  * GetOldestActiveTransactionId()
2103  *
2104  * Similar to GetSnapshotData but returns just oldestActiveXid. We include
2105  * all PGXACTs with an assigned TransactionId, even VACUUM processes.
2106  * We look at all databases, though there is no need to include WALSender
2107  * since this has no effect on hot standby conflicts.
2108  *
2109  * This is never executed during recovery so there is no need to look at
2110  * KnownAssignedXids.
2111  *
2112  * We don't worry about updating other counters, we want to keep this as
2113  * simple as possible and leave GetSnapshotData() as the primary code for
2114  * that bookkeeping.
2115  */
2116 TransactionId
2117 GetOldestActiveTransactionId(void)
2118 {
2119 	ProcArrayStruct *arrayP = procArray;
2120 	TransactionId oldestRunningXid;
2121 	int			index;
2122 
2123 	Assert(!RecoveryInProgress());
2124 
2125 	/*
2126 	 * Read nextXid, as the upper bound of what's still active.
2127 	 *
2128 	 * Reading a TransactionId is atomic, but we must grab the lock to make
2129 	 * sure that all XIDs < nextXid are already present in the proc array (or
2130 	 * have already completed), when we spin over it.
2131 	 */
2132 	LWLockAcquire(XidGenLock, LW_SHARED);
2133 	oldestRunningXid = ShmemVariableCache->nextXid;
2134 	LWLockRelease(XidGenLock);
2135 
2136 	/*
2137 	 * Spin over procArray collecting all xids and subxids.
2138 	 */
2139 	LWLockAcquire(ProcArrayLock, LW_SHARED);
2140 	for (index = 0; index < arrayP->numProcs; index++)
2141 	{
2142 		int			pgprocno = arrayP->pgprocnos[index];
2143 		volatile PGXACT *pgxact = &allPgXact[pgprocno];
2144 		TransactionId xid;
2145 
2146 		/* Fetch xid just once - see GetNewTransactionId */
2147 		xid = pgxact->xid;
2148 
2149 		if (!TransactionIdIsNormal(xid))
2150 			continue;
2151 
2152 		if (TransactionIdPrecedes(xid, oldestRunningXid))
2153 			oldestRunningXid = xid;
2154 
2155 		/*
2156 		 * Top-level XID of a transaction is always less than any of its
2157 		 * subxids, so we don't need to check if any of the subxids are
2158 		 * smaller than oldestRunningXid
2159 		 */
2160 	}
2161 	LWLockRelease(ProcArrayLock);
2162 
2163 	return oldestRunningXid;
2164 }
2165 
2166 /*
2167  * GetOldestSafeDecodingTransactionId -- lowest xid not affected by vacuum
2168  *
2169  * Returns the oldest xid that we can guarantee not to have been affected by
2170  * vacuum, i.e. no rows >= that xid have been vacuumed away unless the
2171  * transaction aborted. Note that the value can (and most of the time will) be
2172  * much more conservative than what really has been affected by vacuum, but we
2173  * currently don't have better data available.
2174  *
2175  * This is useful to initialize the cutoff xid after which a new changeset
2176  * extraction replication slot can start decoding changes.
2177  *
2178  * Must be called with ProcArrayLock held either shared or exclusively,
2179  * although most callers will want to use exclusive mode since it is expected
2180  * that the caller will immediately use the xid to peg the xmin horizon.
2181  */
2182 TransactionId
2183 GetOldestSafeDecodingTransactionId(bool catalogOnly)
2184 {
2185 	ProcArrayStruct *arrayP = procArray;
2186 	TransactionId oldestSafeXid;
2187 	int			index;
2188 	bool		recovery_in_progress = RecoveryInProgress();
2189 
2190 	Assert(LWLockHeldByMe(ProcArrayLock));
2191 
2192 	/*
2193 	 * Acquire XidGenLock, so no transactions can acquire an xid while we're
2194 	 * running. If no transaction with xid were running concurrently a new xid
2195 	 * could influence the RecentXmin et al.
2196 	 *
2197 	 * We initialize the computation to nextXid since that's guaranteed to be
2198 	 * a safe, albeit pessimal, value.
2199 	 */
2200 	LWLockAcquire(XidGenLock, LW_SHARED);
2201 	oldestSafeXid = ShmemVariableCache->nextXid;
2202 
2203 	/*
2204 	 * If there's already a slot pegging the xmin horizon, we can start with
2205 	 * that value, it's guaranteed to be safe since it's computed by this
2206 	 * routine initially and has been enforced since.  We can always use the
2207 	 * slot's general xmin horizon, but the catalog horizon is only usable
2208 	 * when only catalog data is going to be looked at.
2209 	 */
2210 	if (TransactionIdIsValid(procArray->replication_slot_xmin) &&
2211 		TransactionIdPrecedes(procArray->replication_slot_xmin,
2212 							  oldestSafeXid))
2213 		oldestSafeXid = procArray->replication_slot_xmin;
2214 
2215 	if (catalogOnly &&
2216 		TransactionIdIsValid(procArray->replication_slot_catalog_xmin) &&
2217 		TransactionIdPrecedes(procArray->replication_slot_catalog_xmin,
2218 							  oldestSafeXid))
2219 		oldestSafeXid = procArray->replication_slot_catalog_xmin;
2220 
2221 	/*
2222 	 * If we're not in recovery, we walk over the procarray and collect the
2223 	 * lowest xid. Since we're called with ProcArrayLock held and have
2224 	 * acquired XidGenLock, no entries can vanish concurrently, since
2225 	 * PGXACT->xid is only set with XidGenLock held and only cleared with
2226 	 * ProcArrayLock held.
2227 	 *
2228 	 * In recovery we can't lower the safe value besides what we've computed
2229 	 * above, so we'll have to wait a bit longer there. We unfortunately can
2230 	 * *not* use KnownAssignedXidsGetOldestXmin() since the KnownAssignedXids
2231 	 * machinery can miss values and return an older value than is safe.
2232 	 */
2233 	if (!recovery_in_progress)
2234 	{
2235 		/*
2236 		 * Spin over procArray collecting all min(PGXACT->xid)
2237 		 */
2238 		for (index = 0; index < arrayP->numProcs; index++)
2239 		{
2240 			int			pgprocno = arrayP->pgprocnos[index];
2241 			volatile PGXACT *pgxact = &allPgXact[pgprocno];
2242 			TransactionId xid;
2243 
2244 			/* Fetch xid just once - see GetNewTransactionId */
2245 			xid = pgxact->xid;
2246 
2247 			if (!TransactionIdIsNormal(xid))
2248 				continue;
2249 
2250 			if (TransactionIdPrecedes(xid, oldestSafeXid))
2251 				oldestSafeXid = xid;
2252 		}
2253 	}
2254 
2255 	LWLockRelease(XidGenLock);
2256 
2257 	return oldestSafeXid;
2258 }
2259 
2260 /*
2261  * GetVirtualXIDsDelayingChkpt -- Get the VXIDs of transactions that are
2262  * delaying checkpoint because they have critical actions in progress.
2263  *
2264  * Constructs an array of VXIDs of transactions that are currently in commit
2265  * critical sections, as shown by having delayChkpt set in their PGXACT.
2266  *
2267  * Returns a palloc'd array that should be freed by the caller.
2268  * *nvxids is the number of valid entries.
2269  *
2270  * Note that because backends set or clear delayChkpt without holding any lock,
2271  * the result is somewhat indeterminate, but we don't really care.  Even in
2272  * a multiprocessor with delayed writes to shared memory, it should be certain
2273  * that setting of delayChkpt will propagate to shared memory when the backend
2274  * takes a lock, so we cannot fail to see a virtual xact as delayChkpt if
2275  * it's already inserted its commit record.  Whether it takes a little while
2276  * for clearing of delayChkpt to propagate is unimportant for correctness.
2277  */
2278 VirtualTransactionId *
2279 GetVirtualXIDsDelayingChkpt(int *nvxids)
2280 {
2281 	VirtualTransactionId *vxids;
2282 	ProcArrayStruct *arrayP = procArray;
2283 	int			count = 0;
2284 	int			index;
2285 
2286 	/* allocate what's certainly enough result space */
2287 	vxids = (VirtualTransactionId *)
2288 		palloc(sizeof(VirtualTransactionId) * arrayP->maxProcs);
2289 
2290 	LWLockAcquire(ProcArrayLock, LW_SHARED);
2291 
2292 	for (index = 0; index < arrayP->numProcs; index++)
2293 	{
2294 		int			pgprocno = arrayP->pgprocnos[index];
2295 		volatile PGPROC *proc = &allProcs[pgprocno];
2296 		volatile PGXACT *pgxact = &allPgXact[pgprocno];
2297 
2298 		if (pgxact->delayChkpt)
2299 		{
2300 			VirtualTransactionId vxid;
2301 
2302 			GET_VXID_FROM_PGPROC(vxid, *proc);
2303 			if (VirtualTransactionIdIsValid(vxid))
2304 				vxids[count++] = vxid;
2305 		}
2306 	}
2307 
2308 	LWLockRelease(ProcArrayLock);
2309 
2310 	*nvxids = count;
2311 	return vxids;
2312 }
2313 
2314 /*
2315  * HaveVirtualXIDsDelayingChkpt -- Are any of the specified VXIDs delaying?
2316  *
2317  * This is used with the results of GetVirtualXIDsDelayingChkpt to see if any
2318  * of the specified VXIDs are still in critical sections of code.
2319  *
2320  * Note: this is O(N^2) in the number of vxacts that are/were delaying, but
2321  * those numbers should be small enough for it not to be a problem.
2322  */
2323 bool
2324 HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids)
2325 {
2326 	bool		result = false;
2327 	ProcArrayStruct *arrayP = procArray;
2328 	int			index;
2329 
2330 	LWLockAcquire(ProcArrayLock, LW_SHARED);
2331 
2332 	for (index = 0; index < arrayP->numProcs; index++)
2333 	{
2334 		int			pgprocno = arrayP->pgprocnos[index];
2335 		volatile PGPROC *proc = &allProcs[pgprocno];
2336 		volatile PGXACT *pgxact = &allPgXact[pgprocno];
2337 		VirtualTransactionId vxid;
2338 
2339 		GET_VXID_FROM_PGPROC(vxid, *proc);
2340 
2341 		if (pgxact->delayChkpt && VirtualTransactionIdIsValid(vxid))
2342 		{
2343 			int			i;
2344 
2345 			for (i = 0; i < nvxids; i++)
2346 			{
2347 				if (VirtualTransactionIdEquals(vxid, vxids[i]))
2348 				{
2349 					result = true;
2350 					break;
2351 				}
2352 			}
2353 			if (result)
2354 				break;
2355 		}
2356 	}
2357 
2358 	LWLockRelease(ProcArrayLock);
2359 
2360 	return result;
2361 }
2362 
2363 /*
2364  * BackendPidGetProc -- get a backend's PGPROC given its PID
2365  *
2366  * Returns NULL if not found.  Note that it is up to the caller to be
2367  * sure that the question remains meaningful for long enough for the
2368  * answer to be used ...
2369  */
2370 PGPROC *
2371 BackendPidGetProc(int pid)
2372 {
2373 	PGPROC	   *result;
2374 
2375 	if (pid == 0)				/* never match dummy PGPROCs */
2376 		return NULL;
2377 
2378 	LWLockAcquire(ProcArrayLock, LW_SHARED);
2379 
2380 	result = BackendPidGetProcWithLock(pid);
2381 
2382 	LWLockRelease(ProcArrayLock);
2383 
2384 	return result;
2385 }
2386 
2387 /*
2388  * BackendPidGetProcWithLock -- get a backend's PGPROC given its PID
2389  *
2390  * Same as above, except caller must be holding ProcArrayLock.  The found
2391  * entry, if any, can be assumed to be valid as long as the lock remains held.
2392  */
2393 PGPROC *
2394 BackendPidGetProcWithLock(int pid)
2395 {
2396 	PGPROC	   *result = NULL;
2397 	ProcArrayStruct *arrayP = procArray;
2398 	int			index;
2399 
2400 	if (pid == 0)				/* never match dummy PGPROCs */
2401 		return NULL;
2402 
2403 	for (index = 0; index < arrayP->numProcs; index++)
2404 	{
2405 		PGPROC	   *proc = &allProcs[arrayP->pgprocnos[index]];
2406 
2407 		if (proc->pid == pid)
2408 		{
2409 			result = proc;
2410 			break;
2411 		}
2412 	}
2413 
2414 	return result;
2415 }
2416 
2417 /*
2418  * BackendXidGetPid -- get a backend's pid given its XID
2419  *
2420  * Returns 0 if not found or it's a prepared transaction.  Note that
2421  * it is up to the caller to be sure that the question remains
2422  * meaningful for long enough for the answer to be used ...
2423  *
2424  * Only main transaction Ids are considered.  This function is mainly
2425  * useful for determining what backend owns a lock.
2426  *
2427  * Beware that not every xact has an XID assigned.  However, as long as you
2428  * only call this using an XID found on disk, you're safe.
2429  */
2430 int
2431 BackendXidGetPid(TransactionId xid)
2432 {
2433 	int			result = 0;
2434 	ProcArrayStruct *arrayP = procArray;
2435 	int			index;
2436 
2437 	if (xid == InvalidTransactionId)	/* never match invalid xid */
2438 		return 0;
2439 
2440 	LWLockAcquire(ProcArrayLock, LW_SHARED);
2441 
2442 	for (index = 0; index < arrayP->numProcs; index++)
2443 	{
2444 		int			pgprocno = arrayP->pgprocnos[index];
2445 		volatile PGPROC *proc = &allProcs[pgprocno];
2446 		volatile PGXACT *pgxact = &allPgXact[pgprocno];
2447 
2448 		if (pgxact->xid == xid)
2449 		{
2450 			result = proc->pid;
2451 			break;
2452 		}
2453 	}
2454 
2455 	LWLockRelease(ProcArrayLock);
2456 
2457 	return result;
2458 }
2459 
2460 /*
2461  * IsBackendPid -- is a given pid a running backend
2462  *
2463  * This is not called by the backend, but is called by external modules.
2464  */
2465 bool
2466 IsBackendPid(int pid)
2467 {
2468 	return (BackendPidGetProc(pid) != NULL);
2469 }
2470 
2471 
2472 /*
2473  * GetCurrentVirtualXIDs -- returns an array of currently active VXIDs.
2474  *
2475  * The array is palloc'd. The number of valid entries is returned into *nvxids.
2476  *
2477  * The arguments allow filtering the set of VXIDs returned.  Our own process
2478  * is always skipped.  In addition:
2479  *	If limitXmin is not InvalidTransactionId, skip processes with
2480  *		xmin > limitXmin.
2481  *	If excludeXmin0 is true, skip processes with xmin = 0.
2482  *	If allDbs is false, skip processes attached to other databases.
2483  *	If excludeVacuum isn't zero, skip processes for which
2484  *		(vacuumFlags & excludeVacuum) is not zero.
2485  *
2486  * Note: the purpose of the limitXmin and excludeXmin0 parameters is to
2487  * allow skipping backends whose oldest live snapshot is no older than
2488  * some snapshot we have.  Since we examine the procarray with only shared
2489  * lock, there are race conditions: a backend could set its xmin just after
2490  * we look.  Indeed, on multiprocessors with weak memory ordering, the
2491  * other backend could have set its xmin *before* we look.  We know however
2492  * that such a backend must have held shared ProcArrayLock overlapping our
2493  * own hold of ProcArrayLock, else we would see its xmin update.  Therefore,
2494  * any snapshot the other backend is taking concurrently with our scan cannot
2495  * consider any transactions as still running that we think are committed
2496  * (since backends must hold ProcArrayLock exclusive to commit).
2497  */
2498 VirtualTransactionId *
2499 GetCurrentVirtualXIDs(TransactionId limitXmin, bool excludeXmin0,
2500 					  bool allDbs, int excludeVacuum,
2501 					  int *nvxids)
2502 {
2503 	VirtualTransactionId *vxids;
2504 	ProcArrayStruct *arrayP = procArray;
2505 	int			count = 0;
2506 	int			index;
2507 
2508 	/* allocate what's certainly enough result space */
2509 	vxids = (VirtualTransactionId *)
2510 		palloc(sizeof(VirtualTransactionId) * arrayP->maxProcs);
2511 
2512 	LWLockAcquire(ProcArrayLock, LW_SHARED);
2513 
2514 	for (index = 0; index < arrayP->numProcs; index++)
2515 	{
2516 		int			pgprocno = arrayP->pgprocnos[index];
2517 		volatile PGPROC *proc = &allProcs[pgprocno];
2518 		volatile PGXACT *pgxact = &allPgXact[pgprocno];
2519 
2520 		if (proc == MyProc)
2521 			continue;
2522 
2523 		if (excludeVacuum & pgxact->vacuumFlags)
2524 			continue;
2525 
2526 		if (allDbs || proc->databaseId == MyDatabaseId)
2527 		{
2528 			/* Fetch xmin just once - might change on us */
2529 			TransactionId pxmin = pgxact->xmin;
2530 
2531 			if (excludeXmin0 && !TransactionIdIsValid(pxmin))
2532 				continue;
2533 
2534 			/*
2535 			 * InvalidTransactionId precedes all other XIDs, so a proc that
2536 			 * hasn't set xmin yet will not be rejected by this test.
2537 			 */
2538 			if (!TransactionIdIsValid(limitXmin) ||
2539 				TransactionIdPrecedesOrEquals(pxmin, limitXmin))
2540 			{
2541 				VirtualTransactionId vxid;
2542 
2543 				GET_VXID_FROM_PGPROC(vxid, *proc);
2544 				if (VirtualTransactionIdIsValid(vxid))
2545 					vxids[count++] = vxid;
2546 			}
2547 		}
2548 	}
2549 
2550 	LWLockRelease(ProcArrayLock);
2551 
2552 	*nvxids = count;
2553 	return vxids;
2554 }
2555 
2556 /*
2557  * GetConflictingVirtualXIDs -- returns an array of currently active VXIDs.
2558  *
2559  * Usage is limited to conflict resolution during recovery on standby servers.
2560  * limitXmin is supplied as either latestRemovedXid, or InvalidTransactionId
2561  * in cases where we cannot accurately determine a value for latestRemovedXid.
2562  *
2563  * If limitXmin is InvalidTransactionId then we want to kill everybody,
2564  * so we're not worried if they have a snapshot or not, nor does it really
2565  * matter what type of lock we hold.
2566  *
2567  * All callers that are checking xmins always now supply a valid and useful
2568  * value for limitXmin. The limitXmin is always lower than the lowest
2569  * numbered KnownAssignedXid that is not already a FATAL error. This is
2570  * because we only care about cleanup records that are cleaning up tuple
2571  * versions from committed transactions. In that case they will only occur
2572  * at the point where the record is less than the lowest running xid. That
2573  * allows us to say that if any backend takes a snapshot concurrently with
2574  * us then the conflict assessment made here would never include the snapshot
2575  * that is being derived. So we take LW_SHARED on the ProcArray and allow
2576  * concurrent snapshots when limitXmin is valid. We might think about adding
2577  *	 Assert(limitXmin < lowest(KnownAssignedXids))
2578  * but that would not be true in the case of FATAL errors lagging in array,
2579  * but we already know those are bogus anyway, so we skip that test.
2580  *
2581  * If dbOid is valid we skip backends attached to other databases.
2582  *
2583  * Be careful to *not* pfree the result from this function. We reuse
2584  * this array sufficiently often that we use malloc for the result.
2585  */
2586 VirtualTransactionId *
2587 GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid)
2588 {
2589 	static VirtualTransactionId *vxids;
2590 	ProcArrayStruct *arrayP = procArray;
2591 	int			count = 0;
2592 	int			index;
2593 
2594 	/*
2595 	 * If first time through, get workspace to remember main XIDs in. We
2596 	 * malloc it permanently to avoid repeated palloc/pfree overhead. Allow
2597 	 * result space, remembering room for a terminator.
2598 	 */
2599 	if (vxids == NULL)
2600 	{
2601 		vxids = (VirtualTransactionId *)
2602 			malloc(sizeof(VirtualTransactionId) * (arrayP->maxProcs + 1));
2603 		if (vxids == NULL)
2604 			ereport(ERROR,
2605 					(errcode(ERRCODE_OUT_OF_MEMORY),
2606 					 errmsg("out of memory")));
2607 	}
2608 
2609 	LWLockAcquire(ProcArrayLock, LW_SHARED);
2610 
2611 	for (index = 0; index < arrayP->numProcs; index++)
2612 	{
2613 		int			pgprocno = arrayP->pgprocnos[index];
2614 		volatile PGPROC *proc = &allProcs[pgprocno];
2615 		volatile PGXACT *pgxact = &allPgXact[pgprocno];
2616 
2617 		/* Exclude prepared transactions */
2618 		if (proc->pid == 0)
2619 			continue;
2620 
2621 		if (!OidIsValid(dbOid) ||
2622 			proc->databaseId == dbOid)
2623 		{
2624 			/* Fetch xmin just once - can't change on us, but good coding */
2625 			TransactionId pxmin = pgxact->xmin;
2626 
2627 			/*
2628 			 * We ignore an invalid pxmin because this means that backend has
2629 			 * no snapshot currently. We hold a Share lock to avoid contention
2630 			 * with users taking snapshots.  That is not a problem because the
2631 			 * current xmin is always at least one higher than the latest
2632 			 * removed xid, so any new snapshot would never conflict with the
2633 			 * test here.
2634 			 */
2635 			if (!TransactionIdIsValid(limitXmin) ||
2636 				(TransactionIdIsValid(pxmin) && !TransactionIdFollows(pxmin, limitXmin)))
2637 			{
2638 				VirtualTransactionId vxid;
2639 
2640 				GET_VXID_FROM_PGPROC(vxid, *proc);
2641 				if (VirtualTransactionIdIsValid(vxid))
2642 					vxids[count++] = vxid;
2643 			}
2644 		}
2645 	}
2646 
2647 	LWLockRelease(ProcArrayLock);
2648 
2649 	/* add the terminator */
2650 	vxids[count].backendId = InvalidBackendId;
2651 	vxids[count].localTransactionId = InvalidLocalTransactionId;
2652 
2653 	return vxids;
2654 }
2655 
2656 /*
2657  * CancelVirtualTransaction - used in recovery conflict processing
2658  *
2659  * Returns pid of the process signaled, or 0 if not found.
2660  */
2661 pid_t
2662 CancelVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode)
2663 {
2664 	return SignalVirtualTransaction(vxid, sigmode, true);
2665 }
2666 
2667 pid_t
2668 SignalVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode,
2669 						 bool conflictPending)
2670 {
2671 	ProcArrayStruct *arrayP = procArray;
2672 	int			index;
2673 	pid_t		pid = 0;
2674 
2675 	LWLockAcquire(ProcArrayLock, LW_SHARED);
2676 
2677 	for (index = 0; index < arrayP->numProcs; index++)
2678 	{
2679 		int			pgprocno = arrayP->pgprocnos[index];
2680 		volatile PGPROC *proc = &allProcs[pgprocno];
2681 		VirtualTransactionId procvxid;
2682 
2683 		GET_VXID_FROM_PGPROC(procvxid, *proc);
2684 
2685 		if (procvxid.backendId == vxid.backendId &&
2686 			procvxid.localTransactionId == vxid.localTransactionId)
2687 		{
2688 			proc->recoveryConflictPending = conflictPending;
2689 			pid = proc->pid;
2690 			if (pid != 0)
2691 			{
2692 				/*
2693 				 * Kill the pid if it's still here. If not, that's what we
2694 				 * wanted so ignore any errors.
2695 				 */
2696 				(void) SendProcSignal(pid, sigmode, vxid.backendId);
2697 			}
2698 			break;
2699 		}
2700 	}
2701 
2702 	LWLockRelease(ProcArrayLock);
2703 
2704 	return pid;
2705 }
2706 
2707 /*
2708  * MinimumActiveBackends --- count backends (other than myself) that are
2709  *		in active transactions.  Return true if the count exceeds the
2710  *		minimum threshold passed.  This is used as a heuristic to decide if
2711  *		a pre-XLOG-flush delay is worthwhile during commit.
2712  *
2713  * Do not count backends that are blocked waiting for locks, since they are
2714  * not going to get to run until someone else commits.
2715  */
2716 bool
2717 MinimumActiveBackends(int min)
2718 {
2719 	ProcArrayStruct *arrayP = procArray;
2720 	int			count = 0;
2721 	int			index;
2722 
2723 	/* Quick short-circuit if no minimum is specified */
2724 	if (min == 0)
2725 		return true;
2726 
2727 	/*
2728 	 * Note: for speed, we don't acquire ProcArrayLock.  This is a little bit
2729 	 * bogus, but since we are only testing fields for zero or nonzero, it
2730 	 * should be OK.  The result is only used for heuristic purposes anyway...
2731 	 */
2732 	for (index = 0; index < arrayP->numProcs; index++)
2733 	{
2734 		int			pgprocno = arrayP->pgprocnos[index];
2735 		volatile PGPROC *proc = &allProcs[pgprocno];
2736 		volatile PGXACT *pgxact = &allPgXact[pgprocno];
2737 
2738 		/*
2739 		 * Since we're not holding a lock, need to be prepared to deal with
2740 		 * garbage, as someone could have incremented numProcs but not yet
2741 		 * filled the structure.
2742 		 *
2743 		 * If someone just decremented numProcs, 'proc' could also point to a
2744 		 * PGPROC entry that's no longer in the array. It still points to a
2745 		 * PGPROC struct, though, because freed PGPROC entries just go to the
2746 		 * free list and are recycled. Its contents are nonsense in that case,
2747 		 * but that's acceptable for this function.
2748 		 */
2749 		if (pgprocno == -1)
2750 			continue;			/* do not count deleted entries */
2751 		if (proc == MyProc)
2752 			continue;			/* do not count myself */
2753 		if (pgxact->xid == InvalidTransactionId)
2754 			continue;			/* do not count if no XID assigned */
2755 		if (proc->pid == 0)
2756 			continue;			/* do not count prepared xacts */
2757 		if (proc->waitLock != NULL)
2758 			continue;			/* do not count if blocked on a lock */
2759 		count++;
2760 		if (count >= min)
2761 			break;
2762 	}
2763 
2764 	return count >= min;
2765 }
2766 
2767 /*
2768  * CountDBBackends --- count backends that are using specified database
2769  */
2770 int
2771 CountDBBackends(Oid databaseid)
2772 {
2773 	ProcArrayStruct *arrayP = procArray;
2774 	int			count = 0;
2775 	int			index;
2776 
2777 	LWLockAcquire(ProcArrayLock, LW_SHARED);
2778 
2779 	for (index = 0; index < arrayP->numProcs; index++)
2780 	{
2781 		int			pgprocno = arrayP->pgprocnos[index];
2782 		volatile PGPROC *proc = &allProcs[pgprocno];
2783 
2784 		if (proc->pid == 0)
2785 			continue;			/* do not count prepared xacts */
2786 		if (!OidIsValid(databaseid) ||
2787 			proc->databaseId == databaseid)
2788 			count++;
2789 	}
2790 
2791 	LWLockRelease(ProcArrayLock);
2792 
2793 	return count;
2794 }
2795 
2796 /*
2797  * CountDBConnections --- counts database backends ignoring any background
2798  *		worker processes
2799  */
2800 int
2801 CountDBConnections(Oid databaseid)
2802 {
2803 	ProcArrayStruct *arrayP = procArray;
2804 	int			count = 0;
2805 	int			index;
2806 
2807 	LWLockAcquire(ProcArrayLock, LW_SHARED);
2808 
2809 	for (index = 0; index < arrayP->numProcs; index++)
2810 	{
2811 		int			pgprocno = arrayP->pgprocnos[index];
2812 		volatile PGPROC *proc = &allProcs[pgprocno];
2813 
2814 		if (proc->pid == 0)
2815 			continue;			/* do not count prepared xacts */
2816 		if (proc->isBackgroundWorker)
2817 			continue;			/* do not count background workers */
2818 		if (!OidIsValid(databaseid) ||
2819 			proc->databaseId == databaseid)
2820 			count++;
2821 	}
2822 
2823 	LWLockRelease(ProcArrayLock);
2824 
2825 	return count;
2826 }
2827 
2828 /*
2829  * CancelDBBackends --- cancel backends that are using specified database
2830  */
2831 void
2832 CancelDBBackends(Oid databaseid, ProcSignalReason sigmode, bool conflictPending)
2833 {
2834 	ProcArrayStruct *arrayP = procArray;
2835 	int			index;
2836 	pid_t		pid = 0;
2837 
2838 	/* tell all backends to die */
2839 	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
2840 
2841 	for (index = 0; index < arrayP->numProcs; index++)
2842 	{
2843 		int			pgprocno = arrayP->pgprocnos[index];
2844 		volatile PGPROC *proc = &allProcs[pgprocno];
2845 
2846 		if (databaseid == InvalidOid || proc->databaseId == databaseid)
2847 		{
2848 			VirtualTransactionId procvxid;
2849 
2850 			GET_VXID_FROM_PGPROC(procvxid, *proc);
2851 
2852 			proc->recoveryConflictPending = conflictPending;
2853 			pid = proc->pid;
2854 			if (pid != 0)
2855 			{
2856 				/*
2857 				 * Kill the pid if it's still here. If not, that's what we
2858 				 * wanted so ignore any errors.
2859 				 */
2860 				(void) SendProcSignal(pid, sigmode, procvxid.backendId);
2861 			}
2862 		}
2863 	}
2864 
2865 	LWLockRelease(ProcArrayLock);
2866 }
2867 
2868 /*
2869  * CountUserBackends --- count backends that are used by specified user
2870  */
2871 int
2872 CountUserBackends(Oid roleid)
2873 {
2874 	ProcArrayStruct *arrayP = procArray;
2875 	int			count = 0;
2876 	int			index;
2877 
2878 	LWLockAcquire(ProcArrayLock, LW_SHARED);
2879 
2880 	for (index = 0; index < arrayP->numProcs; index++)
2881 	{
2882 		int			pgprocno = arrayP->pgprocnos[index];
2883 		volatile PGPROC *proc = &allProcs[pgprocno];
2884 
2885 		if (proc->pid == 0)
2886 			continue;			/* do not count prepared xacts */
2887 		if (proc->isBackgroundWorker)
2888 			continue;			/* do not count background workers */
2889 		if (proc->roleId == roleid)
2890 			count++;
2891 	}
2892 
2893 	LWLockRelease(ProcArrayLock);
2894 
2895 	return count;
2896 }
2897 
2898 /*
2899  * CountOtherDBBackends -- check for other backends running in the given DB
2900  *
2901  * If there are other backends in the DB, we will wait a maximum of 5 seconds
2902  * for them to exit.  Autovacuum backends are encouraged to exit early by
2903  * sending them SIGTERM, but normal user backends are just waited for.
2904  *
2905  * The current backend is always ignored; it is caller's responsibility to
2906  * check whether the current backend uses the given DB, if it's important.
2907  *
2908  * Returns true if there are (still) other backends in the DB, false if not.
2909  * Also, *nbackends and *nprepared are set to the number of other backends
2910  * and prepared transactions in the DB, respectively.
2911  *
2912  * This function is used to interlock DROP DATABASE and related commands
2913  * against there being any active backends in the target DB --- dropping the
2914  * DB while active backends remain would be a Bad Thing.  Note that we cannot
2915  * detect here the possibility of a newly-started backend that is trying to
2916  * connect to the doomed database, so additional interlocking is needed during
2917  * backend startup.  The caller should normally hold an exclusive lock on the
2918  * target DB before calling this, which is one reason we mustn't wait
2919  * indefinitely.
2920  */
2921 bool
2922 CountOtherDBBackends(Oid databaseId, int *nbackends, int *nprepared)
2923 {
2924 	ProcArrayStruct *arrayP = procArray;
2925 
2926 #define MAXAUTOVACPIDS	10		/* max autovacs to SIGTERM per iteration */
2927 	int			autovac_pids[MAXAUTOVACPIDS];
2928 	int			tries;
2929 
2930 	/* 50 tries with 100ms sleep between tries makes 5 sec total wait */
2931 	for (tries = 0; tries < 50; tries++)
2932 	{
2933 		int			nautovacs = 0;
2934 		bool		found = false;
2935 		int			index;
2936 
2937 		CHECK_FOR_INTERRUPTS();
2938 
2939 		*nbackends = *nprepared = 0;
2940 
2941 		LWLockAcquire(ProcArrayLock, LW_SHARED);
2942 
2943 		for (index = 0; index < arrayP->numProcs; index++)
2944 		{
2945 			int			pgprocno = arrayP->pgprocnos[index];
2946 			volatile PGPROC *proc = &allProcs[pgprocno];
2947 			volatile PGXACT *pgxact = &allPgXact[pgprocno];
2948 
2949 			if (proc->databaseId != databaseId)
2950 				continue;
2951 			if (proc == MyProc)
2952 				continue;
2953 
2954 			found = true;
2955 
2956 			if (proc->pid == 0)
2957 				(*nprepared)++;
2958 			else
2959 			{
2960 				(*nbackends)++;
2961 				if ((pgxact->vacuumFlags & PROC_IS_AUTOVACUUM) &&
2962 					nautovacs < MAXAUTOVACPIDS)
2963 					autovac_pids[nautovacs++] = proc->pid;
2964 			}
2965 		}
2966 
2967 		LWLockRelease(ProcArrayLock);
2968 
2969 		if (!found)
2970 			return false;		/* no conflicting backends, so done */
2971 
2972 		/*
2973 		 * Send SIGTERM to any conflicting autovacuums before sleeping. We
2974 		 * postpone this step until after the loop because we don't want to
2975 		 * hold ProcArrayLock while issuing kill(). We have no idea what might
2976 		 * block kill() inside the kernel...
2977 		 */
2978 		for (index = 0; index < nautovacs; index++)
2979 			(void) kill(autovac_pids[index], SIGTERM);	/* ignore any error */
2980 
2981 		/* sleep, then try again */
2982 		pg_usleep(100 * 1000L); /* 100ms */
2983 	}
2984 
2985 	return true;				/* timed out, still conflicts */
2986 }
2987 
2988 /*
2989  * ProcArraySetReplicationSlotXmin
2990  *
2991  * Install limits to future computations of the xmin horizon to prevent vacuum
2992  * and HOT pruning from removing affected rows still needed by clients with
2993  * replication slots.
2994  */
2995 void
2996 ProcArraySetReplicationSlotXmin(TransactionId xmin, TransactionId catalog_xmin,
2997 								bool already_locked)
2998 {
2999 	Assert(!already_locked || LWLockHeldByMe(ProcArrayLock));
3000 
3001 	if (!already_locked)
3002 		LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
3003 
3004 	procArray->replication_slot_xmin = xmin;
3005 	procArray->replication_slot_catalog_xmin = catalog_xmin;
3006 
3007 	if (!already_locked)
3008 		LWLockRelease(ProcArrayLock);
3009 }
3010 
3011 /*
3012  * ProcArrayGetReplicationSlotXmin
3013  *
3014  * Return the current slot xmin limits. That's useful to be able to remove
3015  * data that's older than those limits.
3016  */
3017 void
3018 ProcArrayGetReplicationSlotXmin(TransactionId *xmin,
3019 								TransactionId *catalog_xmin)
3020 {
3021 	LWLockAcquire(ProcArrayLock, LW_SHARED);
3022 
3023 	if (xmin != NULL)
3024 		*xmin = procArray->replication_slot_xmin;
3025 
3026 	if (catalog_xmin != NULL)
3027 		*catalog_xmin = procArray->replication_slot_catalog_xmin;
3028 
3029 	LWLockRelease(ProcArrayLock);
3030 }
3031 
3032 
3033 #define XidCacheRemove(i) \
3034 	do { \
3035 		MyProc->subxids.xids[i] = MyProc->subxids.xids[MyPgXact->nxids - 1]; \
3036 		MyPgXact->nxids--; \
3037 	} while (0)
3038 
3039 /*
3040  * XidCacheRemoveRunningXids
3041  *
3042  * Remove a bunch of TransactionIds from the list of known-running
3043  * subtransactions for my backend.  Both the specified xid and those in
3044  * the xids[] array (of length nxids) are removed from the subxids cache.
3045  * latestXid must be the latest XID among the group.
3046  */
3047 void
3048 XidCacheRemoveRunningXids(TransactionId xid,
3049 						  int nxids, const TransactionId *xids,
3050 						  TransactionId latestXid)
3051 {
3052 	int			i,
3053 				j;
3054 
3055 	Assert(TransactionIdIsValid(xid));
3056 
3057 	/*
3058 	 * We must hold ProcArrayLock exclusively in order to remove transactions
3059 	 * from the PGPROC array.  (See src/backend/access/transam/README.)  It's
3060 	 * possible this could be relaxed since we know this routine is only used
3061 	 * to abort subtransactions, but pending closer analysis we'd best be
3062 	 * conservative.
3063 	 */
3064 	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
3065 
3066 	/*
3067 	 * Under normal circumstances xid and xids[] will be in increasing order,
3068 	 * as will be the entries in subxids.  Scan backwards to avoid O(N^2)
3069 	 * behavior when removing a lot of xids.
3070 	 */
3071 	for (i = nxids - 1; i >= 0; i--)
3072 	{
3073 		TransactionId anxid = xids[i];
3074 
3075 		for (j = MyPgXact->nxids - 1; j >= 0; j--)
3076 		{
3077 			if (TransactionIdEquals(MyProc->subxids.xids[j], anxid))
3078 			{
3079 				XidCacheRemove(j);
3080 				break;
3081 			}
3082 		}
3083 
3084 		/*
3085 		 * Ordinarily we should have found it, unless the cache has
3086 		 * overflowed. However it's also possible for this routine to be
3087 		 * invoked multiple times for the same subtransaction, in case of an
3088 		 * error during AbortSubTransaction.  So instead of Assert, emit a
3089 		 * debug warning.
3090 		 */
3091 		if (j < 0 && !MyPgXact->overflowed)
3092 			elog(WARNING, "did not find subXID %u in MyProc", anxid);
3093 	}
3094 
3095 	for (j = MyPgXact->nxids - 1; j >= 0; j--)
3096 	{
3097 		if (TransactionIdEquals(MyProc->subxids.xids[j], xid))
3098 		{
3099 			XidCacheRemove(j);
3100 			break;
3101 		}
3102 	}
3103 	/* Ordinarily we should have found it, unless the cache has overflowed */
3104 	if (j < 0 && !MyPgXact->overflowed)
3105 		elog(WARNING, "did not find subXID %u in MyProc", xid);
3106 
3107 	/* Also advance global latestCompletedXid while holding the lock */
3108 	if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid,
3109 							  latestXid))
3110 		ShmemVariableCache->latestCompletedXid = latestXid;
3111 
3112 	LWLockRelease(ProcArrayLock);
3113 }
3114 
3115 #ifdef XIDCACHE_DEBUG
3116 
3117 /*
3118  * Print stats about effectiveness of XID cache
3119  */
3120 static void
3121 DisplayXidCache(void)
3122 {
3123 	fprintf(stderr,
3124 			"XidCache: xmin: %ld, known: %ld, myxact: %ld, latest: %ld, mainxid: %ld, childxid: %ld, knownassigned: %ld, nooflo: %ld, slow: %ld\n",
3125 			xc_by_recent_xmin,
3126 			xc_by_known_xact,
3127 			xc_by_my_xact,
3128 			xc_by_latest_xid,
3129 			xc_by_main_xid,
3130 			xc_by_child_xid,
3131 			xc_by_known_assigned,
3132 			xc_no_overflow,
3133 			xc_slow_answer);
3134 }
3135 #endif							/* XIDCACHE_DEBUG */
3136 
3137 
3138 /* ----------------------------------------------
3139  *		KnownAssignedTransactions sub-module
3140  * ----------------------------------------------
3141  */
3142 
3143 /*
3144  * In Hot Standby mode, we maintain a list of transactions that are (or were)
3145  * running in the master at the current point in WAL.  These XIDs must be
3146  * treated as running by standby transactions, even though they are not in
3147  * the standby server's PGXACT array.
3148  *
3149  * We record all XIDs that we know have been assigned.  That includes all the
3150  * XIDs seen in WAL records, plus all unobserved XIDs that we can deduce have
3151  * been assigned.  We can deduce the existence of unobserved XIDs because we
3152  * know XIDs are assigned in sequence, with no gaps.  The KnownAssignedXids
3153  * list expands as new XIDs are observed or inferred, and contracts when
3154  * transaction completion records arrive.
3155  *
3156  * During hot standby we do not fret too much about the distinction between
3157  * top-level XIDs and subtransaction XIDs. We store both together in the
3158  * KnownAssignedXids list.  In backends, this is copied into snapshots in
3159  * GetSnapshotData(), taking advantage of the fact that XidInMVCCSnapshot()
3160  * doesn't care about the distinction either.  Subtransaction XIDs are
3161  * effectively treated as top-level XIDs and in the typical case pg_subtrans
3162  * links are *not* maintained (which does not affect visibility).
3163  *
3164  * We have room in KnownAssignedXids and in snapshots to hold maxProcs *
3165  * (1 + PGPROC_MAX_CACHED_SUBXIDS) XIDs, so every master transaction must
3166  * report its subtransaction XIDs in a WAL XLOG_XACT_ASSIGNMENT record at
3167  * least every PGPROC_MAX_CACHED_SUBXIDS.  When we receive one of these
3168  * records, we mark the subXIDs as children of the top XID in pg_subtrans,
3169  * and then remove them from KnownAssignedXids.  This prevents overflow of
3170  * KnownAssignedXids and snapshots, at the cost that status checks for these
3171  * subXIDs will take a slower path through TransactionIdIsInProgress().
3172  * This means that KnownAssignedXids is not necessarily complete for subXIDs,
3173  * though it should be complete for top-level XIDs; this is the same situation
3174  * that holds with respect to the PGPROC entries in normal running.
3175  *
3176  * When we throw away subXIDs from KnownAssignedXids, we need to keep track of
3177  * that, similarly to tracking overflow of a PGPROC's subxids array.  We do
3178  * that by remembering the lastOverflowedXID, ie the last thrown-away subXID.
3179  * As long as that is within the range of interesting XIDs, we have to assume
3180  * that subXIDs are missing from snapshots.  (Note that subXID overflow occurs
3181  * on primary when 65th subXID arrives, whereas on standby it occurs when 64th
3182  * subXID arrives - that is not an error.)
3183  *
3184  * Should a backend on primary somehow disappear before it can write an abort
3185  * record, then we just leave those XIDs in KnownAssignedXids. They actually
3186  * aborted but we think they were running; the distinction is irrelevant
3187  * because either way any changes done by the transaction are not visible to
3188  * backends in the standby.  We prune KnownAssignedXids when
3189  * XLOG_RUNNING_XACTS arrives, to forestall possible overflow of the
3190  * array due to such dead XIDs.
3191  */
3192 
3193 /*
3194  * RecordKnownAssignedTransactionIds
3195  *		Record the given XID in KnownAssignedXids, as well as any preceding
3196  *		unobserved XIDs.
3197  *
3198  * RecordKnownAssignedTransactionIds() should be run for *every* WAL record
3199  * associated with a transaction. Must be called for each record after we
3200  * have executed StartupCLOG() et al, since we must ExtendCLOG() etc..
3201  *
3202  * Called during recovery in analogy with and in place of GetNewTransactionId()
3203  */
3204 void
3205 RecordKnownAssignedTransactionIds(TransactionId xid)
3206 {
3207 	Assert(standbyState >= STANDBY_INITIALIZED);
3208 	Assert(TransactionIdIsValid(xid));
3209 	Assert(TransactionIdIsValid(latestObservedXid));
3210 
3211 	elog(trace_recovery(DEBUG4), "record known xact %u latestObservedXid %u",
3212 		 xid, latestObservedXid);
3213 
3214 	/*
3215 	 * When a newly observed xid arrives, it is frequently the case that it is
3216 	 * *not* the next xid in sequence. When this occurs, we must treat the
3217 	 * intervening xids as running also.
3218 	 */
3219 	if (TransactionIdFollows(xid, latestObservedXid))
3220 	{
3221 		TransactionId next_expected_xid;
3222 
3223 		/*
3224 		 * Extend subtrans like we do in GetNewTransactionId() during normal
3225 		 * operation using individual extend steps. Note that we do not need
3226 		 * to extend clog since its extensions are WAL logged.
3227 		 *
3228 		 * This part has to be done regardless of standbyState since we
3229 		 * immediately start assigning subtransactions to their toplevel
3230 		 * transactions.
3231 		 */
3232 		next_expected_xid = latestObservedXid;
3233 		while (TransactionIdPrecedes(next_expected_xid, xid))
3234 		{
3235 			TransactionIdAdvance(next_expected_xid);
3236 			ExtendSUBTRANS(next_expected_xid);
3237 		}
3238 		Assert(next_expected_xid == xid);
3239 
3240 		/*
3241 		 * If the KnownAssignedXids machinery isn't up yet, there's nothing
3242 		 * more to do since we don't track assigned xids yet.
3243 		 */
3244 		if (standbyState <= STANDBY_INITIALIZED)
3245 		{
3246 			latestObservedXid = xid;
3247 			return;
3248 		}
3249 
3250 		/*
3251 		 * Add (latestObservedXid, xid] onto the KnownAssignedXids array.
3252 		 */
3253 		next_expected_xid = latestObservedXid;
3254 		TransactionIdAdvance(next_expected_xid);
3255 		KnownAssignedXidsAdd(next_expected_xid, xid, false);
3256 
3257 		/*
3258 		 * Now we can advance latestObservedXid
3259 		 */
3260 		latestObservedXid = xid;
3261 
3262 		/* ShmemVariableCache->nextXid must be beyond any observed xid */
3263 		next_expected_xid = latestObservedXid;
3264 		TransactionIdAdvance(next_expected_xid);
3265 		LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
3266 		if (TransactionIdFollows(next_expected_xid, ShmemVariableCache->nextXid))
3267 			ShmemVariableCache->nextXid = next_expected_xid;
3268 		LWLockRelease(XidGenLock);
3269 	}
3270 }
3271 
3272 /*
3273  * ExpireTreeKnownAssignedTransactionIds
3274  *		Remove the given XIDs from KnownAssignedXids.
3275  *
3276  * Called during recovery in analogy with and in place of ProcArrayEndTransaction()
3277  */
3278 void
3279 ExpireTreeKnownAssignedTransactionIds(TransactionId xid, int nsubxids,
3280 									  TransactionId *subxids, TransactionId max_xid)
3281 {
3282 	Assert(standbyState >= STANDBY_INITIALIZED);
3283 
3284 	/*
3285 	 * Uses same locking as transaction commit
3286 	 */
3287 	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
3288 
3289 	KnownAssignedXidsRemoveTree(xid, nsubxids, subxids);
3290 
3291 	/* As in ProcArrayEndTransaction, advance latestCompletedXid */
3292 	if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid,
3293 							  max_xid))
3294 		ShmemVariableCache->latestCompletedXid = max_xid;
3295 
3296 	LWLockRelease(ProcArrayLock);
3297 }
3298 
3299 /*
3300  * ExpireAllKnownAssignedTransactionIds
3301  *		Remove all entries in KnownAssignedXids and reset lastOverflowedXid.
3302  */
3303 void
3304 ExpireAllKnownAssignedTransactionIds(void)
3305 {
3306 	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
3307 	KnownAssignedXidsRemovePreceding(InvalidTransactionId);
3308 
3309 	/*
3310 	 * Reset lastOverflowedXid.  Currently, lastOverflowedXid has no use after
3311 	 * the call of this function.  But do this for unification with what
3312 	 * ExpireOldKnownAssignedTransactionIds() do.
3313 	 */
3314 	procArray->lastOverflowedXid = InvalidTransactionId;
3315 	LWLockRelease(ProcArrayLock);
3316 }
3317 
3318 /*
3319  * ExpireOldKnownAssignedTransactionIds
3320  *		Remove KnownAssignedXids entries preceding the given XID and
3321  *		potentially reset lastOverflowedXid.
3322  */
3323 void
3324 ExpireOldKnownAssignedTransactionIds(TransactionId xid)
3325 {
3326 	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
3327 
3328 	/*
3329 	 * Reset lastOverflowedXid if we know all transactions that have been
3330 	 * possibly running are being gone.  Not doing so could cause an incorrect
3331 	 * lastOverflowedXid value, which makes extra snapshots be marked as
3332 	 * suboverflowed.
3333 	 */
3334 	if (TransactionIdPrecedes(procArray->lastOverflowedXid, xid))
3335 		procArray->lastOverflowedXid = InvalidTransactionId;
3336 	KnownAssignedXidsRemovePreceding(xid);
3337 	LWLockRelease(ProcArrayLock);
3338 }
3339 
3340 
3341 /*
3342  * Private module functions to manipulate KnownAssignedXids
3343  *
3344  * There are 5 main uses of the KnownAssignedXids data structure:
3345  *
3346  *	* backends taking snapshots - all valid XIDs need to be copied out
3347  *	* backends seeking to determine presence of a specific XID
3348  *	* startup process adding new known-assigned XIDs
3349  *	* startup process removing specific XIDs as transactions end
3350  *	* startup process pruning array when special WAL records arrive
3351  *
3352  * This data structure is known to be a hot spot during Hot Standby, so we
3353  * go to some lengths to make these operations as efficient and as concurrent
3354  * as possible.
3355  *
3356  * The XIDs are stored in an array in sorted order --- TransactionIdPrecedes
3357  * order, to be exact --- to allow binary search for specific XIDs.  Note:
3358  * in general TransactionIdPrecedes would not provide a total order, but
3359  * we know that the entries present at any instant should not extend across
3360  * a large enough fraction of XID space to wrap around (the master would
3361  * shut down for fear of XID wrap long before that happens).  So it's OK to
3362  * use TransactionIdPrecedes as a binary-search comparator.
3363  *
3364  * It's cheap to maintain the sortedness during insertions, since new known
3365  * XIDs are always reported in XID order; we just append them at the right.
3366  *
3367  * To keep individual deletions cheap, we need to allow gaps in the array.
3368  * This is implemented by marking array elements as valid or invalid using
3369  * the parallel boolean array KnownAssignedXidsValid[].  A deletion is done
3370  * by setting KnownAssignedXidsValid[i] to false, *without* clearing the
3371  * XID entry itself.  This preserves the property that the XID entries are
3372  * sorted, so we can do binary searches easily.  Periodically we compress
3373  * out the unused entries; that's much cheaper than having to compress the
3374  * array immediately on every deletion.
3375  *
3376  * The actually valid items in KnownAssignedXids[] and KnownAssignedXidsValid[]
3377  * are those with indexes tail <= i < head; items outside this subscript range
3378  * have unspecified contents.  When head reaches the end of the array, we
3379  * force compression of unused entries rather than wrapping around, since
3380  * allowing wraparound would greatly complicate the search logic.  We maintain
3381  * an explicit tail pointer so that pruning of old XIDs can be done without
3382  * immediately moving the array contents.  In most cases only a small fraction
3383  * of the array contains valid entries at any instant.
3384  *
3385  * Although only the startup process can ever change the KnownAssignedXids
3386  * data structure, we still need interlocking so that standby backends will
3387  * not observe invalid intermediate states.  The convention is that backends
3388  * must hold shared ProcArrayLock to examine the array.  To remove XIDs from
3389  * the array, the startup process must hold ProcArrayLock exclusively, for
3390  * the usual transactional reasons (compare commit/abort of a transaction
3391  * during normal running).  Compressing unused entries out of the array
3392  * likewise requires exclusive lock.  To add XIDs to the array, we just insert
3393  * them into slots to the right of the head pointer and then advance the head
3394  * pointer.  This wouldn't require any lock at all, except that on machines
3395  * with weak memory ordering we need to be careful that other processors
3396  * see the array element changes before they see the head pointer change.
3397  * We handle this by using a spinlock to protect reads and writes of the
3398  * head/tail pointers.  (We could dispense with the spinlock if we were to
3399  * create suitable memory access barrier primitives and use those instead.)
3400  * The spinlock must be taken to read or write the head/tail pointers unless
3401  * the caller holds ProcArrayLock exclusively.
3402  *
3403  * Algorithmic analysis:
3404  *
3405  * If we have a maximum of M slots, with N XIDs currently spread across
3406  * S elements then we have N <= S <= M always.
3407  *
3408  *	* Adding a new XID is O(1) and needs little locking (unless compression
3409  *		must happen)
3410  *	* Compressing the array is O(S) and requires exclusive lock
3411  *	* Removing an XID is O(logS) and requires exclusive lock
3412  *	* Taking a snapshot is O(S) and requires shared lock
3413  *	* Checking for an XID is O(logS) and requires shared lock
3414  *
3415  * In comparison, using a hash table for KnownAssignedXids would mean that
3416  * taking snapshots would be O(M). If we can maintain S << M then the
3417  * sorted array technique will deliver significantly faster snapshots.
3418  * If we try to keep S too small then we will spend too much time compressing,
3419  * so there is an optimal point for any workload mix. We use a heuristic to
3420  * decide when to compress the array, though trimming also helps reduce
3421  * frequency of compressing. The heuristic requires us to track the number of
3422  * currently valid XIDs in the array.
3423  */
3424 
3425 
3426 /*
3427  * Compress KnownAssignedXids by shifting valid data down to the start of the
3428  * array, removing any gaps.
3429  *
3430  * A compression step is forced if "force" is true, otherwise we do it
3431  * only if a heuristic indicates it's a good time to do it.
3432  *
3433  * Caller must hold ProcArrayLock in exclusive mode.
3434  */
3435 static void
3436 KnownAssignedXidsCompress(bool force)
3437 {
3438 	/* use volatile pointer to prevent code rearrangement */
3439 	volatile ProcArrayStruct *pArray = procArray;
3440 	int			head,
3441 				tail;
3442 	int			compress_index;
3443 	int			i;
3444 
3445 	/* no spinlock required since we hold ProcArrayLock exclusively */
3446 	head = pArray->headKnownAssignedXids;
3447 	tail = pArray->tailKnownAssignedXids;
3448 
3449 	if (!force)
3450 	{
3451 		/*
3452 		 * If we can choose how much to compress, use a heuristic to avoid
3453 		 * compressing too often or not often enough.
3454 		 *
3455 		 * Heuristic is if we have a large enough current spread and less than
3456 		 * 50% of the elements are currently in use, then compress. This
3457 		 * should ensure we compress fairly infrequently. We could compress
3458 		 * less often though the virtual array would spread out more and
3459 		 * snapshots would become more expensive.
3460 		 */
3461 		int			nelements = head - tail;
3462 
3463 		if (nelements < 4 * PROCARRAY_MAXPROCS ||
3464 			nelements < 2 * pArray->numKnownAssignedXids)
3465 			return;
3466 	}
3467 
3468 	/*
3469 	 * We compress the array by reading the valid values from tail to head,
3470 	 * re-aligning data to 0th element.
3471 	 */
3472 	compress_index = 0;
3473 	for (i = tail; i < head; i++)
3474 	{
3475 		if (KnownAssignedXidsValid[i])
3476 		{
3477 			KnownAssignedXids[compress_index] = KnownAssignedXids[i];
3478 			KnownAssignedXidsValid[compress_index] = true;
3479 			compress_index++;
3480 		}
3481 	}
3482 
3483 	pArray->tailKnownAssignedXids = 0;
3484 	pArray->headKnownAssignedXids = compress_index;
3485 }
3486 
3487 /*
3488  * Add xids into KnownAssignedXids at the head of the array.
3489  *
3490  * xids from from_xid to to_xid, inclusive, are added to the array.
3491  *
3492  * If exclusive_lock is true then caller already holds ProcArrayLock in
3493  * exclusive mode, so we need no extra locking here.  Else caller holds no
3494  * lock, so we need to be sure we maintain sufficient interlocks against
3495  * concurrent readers.  (Only the startup process ever calls this, so no need
3496  * to worry about concurrent writers.)
3497  */
3498 static void
3499 KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid,
3500 					 bool exclusive_lock)
3501 {
3502 	/* use volatile pointer to prevent code rearrangement */
3503 	volatile ProcArrayStruct *pArray = procArray;
3504 	TransactionId next_xid;
3505 	int			head,
3506 				tail;
3507 	int			nxids;
3508 	int			i;
3509 
3510 	Assert(TransactionIdPrecedesOrEquals(from_xid, to_xid));
3511 
3512 	/*
3513 	 * Calculate how many array slots we'll need.  Normally this is cheap; in
3514 	 * the unusual case where the XIDs cross the wrap point, we do it the hard
3515 	 * way.
3516 	 */
3517 	if (to_xid >= from_xid)
3518 		nxids = to_xid - from_xid + 1;
3519 	else
3520 	{
3521 		nxids = 1;
3522 		next_xid = from_xid;
3523 		while (TransactionIdPrecedes(next_xid, to_xid))
3524 		{
3525 			nxids++;
3526 			TransactionIdAdvance(next_xid);
3527 		}
3528 	}
3529 
3530 	/*
3531 	 * Since only the startup process modifies the head/tail pointers, we
3532 	 * don't need a lock to read them here.
3533 	 */
3534 	head = pArray->headKnownAssignedXids;
3535 	tail = pArray->tailKnownAssignedXids;
3536 
3537 	Assert(head >= 0 && head <= pArray->maxKnownAssignedXids);
3538 	Assert(tail >= 0 && tail < pArray->maxKnownAssignedXids);
3539 
3540 	/*
3541 	 * Verify that insertions occur in TransactionId sequence.  Note that even
3542 	 * if the last existing element is marked invalid, it must still have a
3543 	 * correctly sequenced XID value.
3544 	 */
3545 	if (head > tail &&
3546 		TransactionIdFollowsOrEquals(KnownAssignedXids[head - 1], from_xid))
3547 	{
3548 		KnownAssignedXidsDisplay(LOG);
3549 		elog(ERROR, "out-of-order XID insertion in KnownAssignedXids");
3550 	}
3551 
3552 	/*
3553 	 * If our xids won't fit in the remaining space, compress out free space
3554 	 */
3555 	if (head + nxids > pArray->maxKnownAssignedXids)
3556 	{
3557 		/* must hold lock to compress */
3558 		if (!exclusive_lock)
3559 			LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
3560 
3561 		KnownAssignedXidsCompress(true);
3562 
3563 		head = pArray->headKnownAssignedXids;
3564 		/* note: we no longer care about the tail pointer */
3565 
3566 		if (!exclusive_lock)
3567 			LWLockRelease(ProcArrayLock);
3568 
3569 		/*
3570 		 * If it still won't fit then we're out of memory
3571 		 */
3572 		if (head + nxids > pArray->maxKnownAssignedXids)
3573 			elog(ERROR, "too many KnownAssignedXids");
3574 	}
3575 
3576 	/* Now we can insert the xids into the space starting at head */
3577 	next_xid = from_xid;
3578 	for (i = 0; i < nxids; i++)
3579 	{
3580 		KnownAssignedXids[head] = next_xid;
3581 		KnownAssignedXidsValid[head] = true;
3582 		TransactionIdAdvance(next_xid);
3583 		head++;
3584 	}
3585 
3586 	/* Adjust count of number of valid entries */
3587 	pArray->numKnownAssignedXids += nxids;
3588 
3589 	/*
3590 	 * Now update the head pointer.  We use a spinlock to protect this
3591 	 * pointer, not because the update is likely to be non-atomic, but to
3592 	 * ensure that other processors see the above array updates before they
3593 	 * see the head pointer change.
3594 	 *
3595 	 * If we're holding ProcArrayLock exclusively, there's no need to take the
3596 	 * spinlock.
3597 	 */
3598 	if (exclusive_lock)
3599 		pArray->headKnownAssignedXids = head;
3600 	else
3601 	{
3602 		SpinLockAcquire(&pArray->known_assigned_xids_lck);
3603 		pArray->headKnownAssignedXids = head;
3604 		SpinLockRelease(&pArray->known_assigned_xids_lck);
3605 	}
3606 }
3607 
3608 /*
3609  * KnownAssignedXidsSearch
3610  *
3611  * Searches KnownAssignedXids for a specific xid and optionally removes it.
3612  * Returns true if it was found, false if not.
3613  *
3614  * Caller must hold ProcArrayLock in shared or exclusive mode.
3615  * Exclusive lock must be held for remove = true.
3616  */
3617 static bool
3618 KnownAssignedXidsSearch(TransactionId xid, bool remove)
3619 {
3620 	/* use volatile pointer to prevent code rearrangement */
3621 	volatile ProcArrayStruct *pArray = procArray;
3622 	int			first,
3623 				last;
3624 	int			head;
3625 	int			tail;
3626 	int			result_index = -1;
3627 
3628 	if (remove)
3629 	{
3630 		/* we hold ProcArrayLock exclusively, so no need for spinlock */
3631 		tail = pArray->tailKnownAssignedXids;
3632 		head = pArray->headKnownAssignedXids;
3633 	}
3634 	else
3635 	{
3636 		/* take spinlock to ensure we see up-to-date array contents */
3637 		SpinLockAcquire(&pArray->known_assigned_xids_lck);
3638 		tail = pArray->tailKnownAssignedXids;
3639 		head = pArray->headKnownAssignedXids;
3640 		SpinLockRelease(&pArray->known_assigned_xids_lck);
3641 	}
3642 
3643 	/*
3644 	 * Standard binary search.  Note we can ignore the KnownAssignedXidsValid
3645 	 * array here, since even invalid entries will contain sorted XIDs.
3646 	 */
3647 	first = tail;
3648 	last = head - 1;
3649 	while (first <= last)
3650 	{
3651 		int			mid_index;
3652 		TransactionId mid_xid;
3653 
3654 		mid_index = (first + last) / 2;
3655 		mid_xid = KnownAssignedXids[mid_index];
3656 
3657 		if (xid == mid_xid)
3658 		{
3659 			result_index = mid_index;
3660 			break;
3661 		}
3662 		else if (TransactionIdPrecedes(xid, mid_xid))
3663 			last = mid_index - 1;
3664 		else
3665 			first = mid_index + 1;
3666 	}
3667 
3668 	if (result_index < 0)
3669 		return false;			/* not in array */
3670 
3671 	if (!KnownAssignedXidsValid[result_index])
3672 		return false;			/* in array, but invalid */
3673 
3674 	if (remove)
3675 	{
3676 		KnownAssignedXidsValid[result_index] = false;
3677 
3678 		pArray->numKnownAssignedXids--;
3679 		Assert(pArray->numKnownAssignedXids >= 0);
3680 
3681 		/*
3682 		 * If we're removing the tail element then advance tail pointer over
3683 		 * any invalid elements.  This will speed future searches.
3684 		 */
3685 		if (result_index == tail)
3686 		{
3687 			tail++;
3688 			while (tail < head && !KnownAssignedXidsValid[tail])
3689 				tail++;
3690 			if (tail >= head)
3691 			{
3692 				/* Array is empty, so we can reset both pointers */
3693 				pArray->headKnownAssignedXids = 0;
3694 				pArray->tailKnownAssignedXids = 0;
3695 			}
3696 			else
3697 			{
3698 				pArray->tailKnownAssignedXids = tail;
3699 			}
3700 		}
3701 	}
3702 
3703 	return true;
3704 }
3705 
3706 /*
3707  * Is the specified XID present in KnownAssignedXids[]?
3708  *
3709  * Caller must hold ProcArrayLock in shared or exclusive mode.
3710  */
3711 static bool
3712 KnownAssignedXidExists(TransactionId xid)
3713 {
3714 	Assert(TransactionIdIsValid(xid));
3715 
3716 	return KnownAssignedXidsSearch(xid, false);
3717 }
3718 
3719 /*
3720  * Remove the specified XID from KnownAssignedXids[].
3721  *
3722  * Caller must hold ProcArrayLock in exclusive mode.
3723  */
3724 static void
3725 KnownAssignedXidsRemove(TransactionId xid)
3726 {
3727 	Assert(TransactionIdIsValid(xid));
3728 
3729 	elog(trace_recovery(DEBUG4), "remove KnownAssignedXid %u", xid);
3730 
3731 	/*
3732 	 * Note: we cannot consider it an error to remove an XID that's not
3733 	 * present.  We intentionally remove subxact IDs while processing
3734 	 * XLOG_XACT_ASSIGNMENT, to avoid array overflow.  Then those XIDs will be
3735 	 * removed again when the top-level xact commits or aborts.
3736 	 *
3737 	 * It might be possible to track such XIDs to distinguish this case from
3738 	 * actual errors, but it would be complicated and probably not worth it.
3739 	 * So, just ignore the search result.
3740 	 */
3741 	(void) KnownAssignedXidsSearch(xid, true);
3742 }
3743 
3744 /*
3745  * KnownAssignedXidsRemoveTree
3746  *		Remove xid (if it's not InvalidTransactionId) and all the subxids.
3747  *
3748  * Caller must hold ProcArrayLock in exclusive mode.
3749  */
3750 static void
3751 KnownAssignedXidsRemoveTree(TransactionId xid, int nsubxids,
3752 							TransactionId *subxids)
3753 {
3754 	int			i;
3755 
3756 	if (TransactionIdIsValid(xid))
3757 		KnownAssignedXidsRemove(xid);
3758 
3759 	for (i = 0; i < nsubxids; i++)
3760 		KnownAssignedXidsRemove(subxids[i]);
3761 
3762 	/* Opportunistically compress the array */
3763 	KnownAssignedXidsCompress(false);
3764 }
3765 
3766 /*
3767  * Prune KnownAssignedXids up to, but *not* including xid. If xid is invalid
3768  * then clear the whole table.
3769  *
3770  * Caller must hold ProcArrayLock in exclusive mode.
3771  */
3772 static void
3773 KnownAssignedXidsRemovePreceding(TransactionId removeXid)
3774 {
3775 	/* use volatile pointer to prevent code rearrangement */
3776 	volatile ProcArrayStruct *pArray = procArray;
3777 	int			count = 0;
3778 	int			head,
3779 				tail,
3780 				i;
3781 
3782 	if (!TransactionIdIsValid(removeXid))
3783 	{
3784 		elog(trace_recovery(DEBUG4), "removing all KnownAssignedXids");
3785 		pArray->numKnownAssignedXids = 0;
3786 		pArray->headKnownAssignedXids = pArray->tailKnownAssignedXids = 0;
3787 		return;
3788 	}
3789 
3790 	elog(trace_recovery(DEBUG4), "prune KnownAssignedXids to %u", removeXid);
3791 
3792 	/*
3793 	 * Mark entries invalid starting at the tail.  Since array is sorted, we
3794 	 * can stop as soon as we reach an entry >= removeXid.
3795 	 */
3796 	tail = pArray->tailKnownAssignedXids;
3797 	head = pArray->headKnownAssignedXids;
3798 
3799 	for (i = tail; i < head; i++)
3800 	{
3801 		if (KnownAssignedXidsValid[i])
3802 		{
3803 			TransactionId knownXid = KnownAssignedXids[i];
3804 
3805 			if (TransactionIdFollowsOrEquals(knownXid, removeXid))
3806 				break;
3807 
3808 			if (!StandbyTransactionIdIsPrepared(knownXid))
3809 			{
3810 				KnownAssignedXidsValid[i] = false;
3811 				count++;
3812 			}
3813 		}
3814 	}
3815 
3816 	pArray->numKnownAssignedXids -= count;
3817 	Assert(pArray->numKnownAssignedXids >= 0);
3818 
3819 	/*
3820 	 * Advance the tail pointer if we've marked the tail item invalid.
3821 	 */
3822 	for (i = tail; i < head; i++)
3823 	{
3824 		if (KnownAssignedXidsValid[i])
3825 			break;
3826 	}
3827 	if (i >= head)
3828 	{
3829 		/* Array is empty, so we can reset both pointers */
3830 		pArray->headKnownAssignedXids = 0;
3831 		pArray->tailKnownAssignedXids = 0;
3832 	}
3833 	else
3834 	{
3835 		pArray->tailKnownAssignedXids = i;
3836 	}
3837 
3838 	/* Opportunistically compress the array */
3839 	KnownAssignedXidsCompress(false);
3840 }
3841 
3842 /*
3843  * KnownAssignedXidsGet - Get an array of xids by scanning KnownAssignedXids.
3844  * We filter out anything >= xmax.
3845  *
3846  * Returns the number of XIDs stored into xarray[].  Caller is responsible
3847  * that array is large enough.
3848  *
3849  * Caller must hold ProcArrayLock in (at least) shared mode.
3850  */
3851 static int
3852 KnownAssignedXidsGet(TransactionId *xarray, TransactionId xmax)
3853 {
3854 	TransactionId xtmp = InvalidTransactionId;
3855 
3856 	return KnownAssignedXidsGetAndSetXmin(xarray, &xtmp, xmax);
3857 }
3858 
3859 /*
3860  * KnownAssignedXidsGetAndSetXmin - as KnownAssignedXidsGet, plus
3861  * we reduce *xmin to the lowest xid value seen if not already lower.
3862  *
3863  * Caller must hold ProcArrayLock in (at least) shared mode.
3864  */
3865 static int
3866 KnownAssignedXidsGetAndSetXmin(TransactionId *xarray, TransactionId *xmin,
3867 							   TransactionId xmax)
3868 {
3869 	int			count = 0;
3870 	int			head,
3871 				tail;
3872 	int			i;
3873 
3874 	/*
3875 	 * Fetch head just once, since it may change while we loop. We can stop
3876 	 * once we reach the initially seen head, since we are certain that an xid
3877 	 * cannot enter and then leave the array while we hold ProcArrayLock.  We
3878 	 * might miss newly-added xids, but they should be >= xmax so irrelevant
3879 	 * anyway.
3880 	 *
3881 	 * Must take spinlock to ensure we see up-to-date array contents.
3882 	 */
3883 	SpinLockAcquire(&procArray->known_assigned_xids_lck);
3884 	tail = procArray->tailKnownAssignedXids;
3885 	head = procArray->headKnownAssignedXids;
3886 	SpinLockRelease(&procArray->known_assigned_xids_lck);
3887 
3888 	for (i = tail; i < head; i++)
3889 	{
3890 		/* Skip any gaps in the array */
3891 		if (KnownAssignedXidsValid[i])
3892 		{
3893 			TransactionId knownXid = KnownAssignedXids[i];
3894 
3895 			/*
3896 			 * Update xmin if required.  Only the first XID need be checked,
3897 			 * since the array is sorted.
3898 			 */
3899 			if (count == 0 &&
3900 				TransactionIdPrecedes(knownXid, *xmin))
3901 				*xmin = knownXid;
3902 
3903 			/*
3904 			 * Filter out anything >= xmax, again relying on sorted property
3905 			 * of array.
3906 			 */
3907 			if (TransactionIdIsValid(xmax) &&
3908 				TransactionIdFollowsOrEquals(knownXid, xmax))
3909 				break;
3910 
3911 			/* Add knownXid into output array */
3912 			xarray[count++] = knownXid;
3913 		}
3914 	}
3915 
3916 	return count;
3917 }
3918 
3919 /*
3920  * Get oldest XID in the KnownAssignedXids array, or InvalidTransactionId
3921  * if nothing there.
3922  */
3923 static TransactionId
3924 KnownAssignedXidsGetOldestXmin(void)
3925 {
3926 	int			head,
3927 				tail;
3928 	int			i;
3929 
3930 	/*
3931 	 * Fetch head just once, since it may change while we loop.
3932 	 */
3933 	SpinLockAcquire(&procArray->known_assigned_xids_lck);
3934 	tail = procArray->tailKnownAssignedXids;
3935 	head = procArray->headKnownAssignedXids;
3936 	SpinLockRelease(&procArray->known_assigned_xids_lck);
3937 
3938 	for (i = tail; i < head; i++)
3939 	{
3940 		/* Skip any gaps in the array */
3941 		if (KnownAssignedXidsValid[i])
3942 			return KnownAssignedXids[i];
3943 	}
3944 
3945 	return InvalidTransactionId;
3946 }
3947 
3948 /*
3949  * Display KnownAssignedXids to provide debug trail
3950  *
3951  * Currently this is only called within startup process, so we need no
3952  * special locking.
3953  *
3954  * Note this is pretty expensive, and much of the expense will be incurred
3955  * even if the elog message will get discarded.  It's not currently called
3956  * in any performance-critical places, however, so no need to be tenser.
3957  */
3958 static void
3959 KnownAssignedXidsDisplay(int trace_level)
3960 {
3961 	/* use volatile pointer to prevent code rearrangement */
3962 	volatile ProcArrayStruct *pArray = procArray;
3963 	StringInfoData buf;
3964 	int			head,
3965 				tail,
3966 				i;
3967 	int			nxids = 0;
3968 
3969 	tail = pArray->tailKnownAssignedXids;
3970 	head = pArray->headKnownAssignedXids;
3971 
3972 	initStringInfo(&buf);
3973 
3974 	for (i = tail; i < head; i++)
3975 	{
3976 		if (KnownAssignedXidsValid[i])
3977 		{
3978 			nxids++;
3979 			appendStringInfo(&buf, "[%d]=%u ", i, KnownAssignedXids[i]);
3980 		}
3981 	}
3982 
3983 	elog(trace_level, "%d KnownAssignedXids (num=%d tail=%d head=%d) %s",
3984 		 nxids,
3985 		 pArray->numKnownAssignedXids,
3986 		 pArray->tailKnownAssignedXids,
3987 		 pArray->headKnownAssignedXids,
3988 		 buf.data);
3989 
3990 	pfree(buf.data);
3991 }
3992 
3993 /*
3994  * KnownAssignedXidsReset
3995  *		Resets KnownAssignedXids to be empty
3996  */
3997 static void
3998 KnownAssignedXidsReset(void)
3999 {
4000 	/* use volatile pointer to prevent code rearrangement */
4001 	volatile ProcArrayStruct *pArray = procArray;
4002 
4003 	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
4004 
4005 	pArray->numKnownAssignedXids = 0;
4006 	pArray->tailKnownAssignedXids = 0;
4007 	pArray->headKnownAssignedXids = 0;
4008 
4009 	LWLockRelease(ProcArrayLock);
4010 }
4011