1 /*------------------------------------------------------------------------- 2 * 3 * procarray.c 4 * POSTGRES process array code. 5 * 6 * 7 * This module maintains arrays of the PGPROC and PGXACT structures for all 8 * active backends. Although there are several uses for this, the principal 9 * one is as a means of determining the set of currently running transactions. 10 * 11 * Because of various subtle race conditions it is critical that a backend 12 * hold the correct locks while setting or clearing its MyPgXact->xid field. 13 * See notes in src/backend/access/transam/README. 14 * 15 * The process arrays now also include structures representing prepared 16 * transactions. The xid and subxids fields of these are valid, as are the 17 * myProcLocks lists. They can be distinguished from regular backend PGPROCs 18 * at need by checking for pid == 0. 19 * 20 * During hot standby, we also keep a list of XIDs representing transactions 21 * that are known to be running in the master (or more precisely, were running 22 * as of the current point in the WAL stream). This list is kept in the 23 * KnownAssignedXids array, and is updated by watching the sequence of 24 * arriving XIDs. This is necessary because if we leave those XIDs out of 25 * snapshots taken for standby queries, then they will appear to be already 26 * complete, leading to MVCC failures. Note that in hot standby, the PGPROC 27 * array represents standby processes, which by definition are not running 28 * transactions that have XIDs. 29 * 30 * It is perhaps possible for a backend on the master to terminate without 31 * writing an abort record for its transaction. While that shouldn't really 32 * happen, it would tie up KnownAssignedXids indefinitely, so we protect 33 * ourselves by pruning the array when a valid list of running XIDs arrives. 34 * 35 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group 36 * Portions Copyright (c) 1994, Regents of the University of California 37 * 38 * 39 * IDENTIFICATION 40 * src/backend/storage/ipc/procarray.c 41 * 42 *------------------------------------------------------------------------- 43 */ 44 #include "postgres.h" 45 46 #include <signal.h> 47 48 #include "access/clog.h" 49 #include "access/subtrans.h" 50 #include "access/transam.h" 51 #include "access/twophase.h" 52 #include "access/xact.h" 53 #include "access/xlog.h" 54 #include "catalog/catalog.h" 55 #include "miscadmin.h" 56 #include "pgstat.h" 57 #include "storage/proc.h" 58 #include "storage/procarray.h" 59 #include "storage/spin.h" 60 #include "utils/builtins.h" 61 #include "utils/rel.h" 62 #include "utils/snapmgr.h" 63 64 65 /* Our shared memory area */ 66 typedef struct ProcArrayStruct 67 { 68 int numProcs; /* number of valid procs entries */ 69 int maxProcs; /* allocated size of procs array */ 70 71 /* 72 * Known assigned XIDs handling 73 */ 74 int maxKnownAssignedXids; /* allocated size of array */ 75 int numKnownAssignedXids; /* current # of valid entries */ 76 int tailKnownAssignedXids; /* index of oldest valid element */ 77 int headKnownAssignedXids; /* index of newest element, + 1 */ 78 slock_t known_assigned_xids_lck; /* protects head/tail pointers */ 79 80 /* 81 * Highest subxid that has been removed from KnownAssignedXids array to 82 * prevent overflow; or InvalidTransactionId if none. We track this for 83 * similar reasons to tracking overflowing cached subxids in PGXACT 84 * entries. Must hold exclusive ProcArrayLock to change this, and shared 85 * lock to read it. 86 */ 87 TransactionId lastOverflowedXid; 88 89 /* oldest xmin of any replication slot */ 90 TransactionId replication_slot_xmin; 91 /* oldest catalog xmin of any replication slot */ 92 TransactionId replication_slot_catalog_xmin; 93 94 /* indexes into allPgXact[], has PROCARRAY_MAXPROCS entries */ 95 int pgprocnos[FLEXIBLE_ARRAY_MEMBER]; 96 } ProcArrayStruct; 97 98 static ProcArrayStruct *procArray; 99 100 static PGPROC *allProcs; 101 static PGXACT *allPgXact; 102 103 /* 104 * Bookkeeping for tracking emulated transactions in recovery 105 */ 106 static TransactionId *KnownAssignedXids; 107 static bool *KnownAssignedXidsValid; 108 static TransactionId latestObservedXid = InvalidTransactionId; 109 110 /* 111 * If we're in STANDBY_SNAPSHOT_PENDING state, standbySnapshotPendingXmin is 112 * the highest xid that might still be running that we don't have in 113 * KnownAssignedXids. 114 */ 115 static TransactionId standbySnapshotPendingXmin; 116 117 #ifdef XIDCACHE_DEBUG 118 119 /* counters for XidCache measurement */ 120 static long xc_by_recent_xmin = 0; 121 static long xc_by_known_xact = 0; 122 static long xc_by_my_xact = 0; 123 static long xc_by_latest_xid = 0; 124 static long xc_by_main_xid = 0; 125 static long xc_by_child_xid = 0; 126 static long xc_by_known_assigned = 0; 127 static long xc_no_overflow = 0; 128 static long xc_slow_answer = 0; 129 130 #define xc_by_recent_xmin_inc() (xc_by_recent_xmin++) 131 #define xc_by_known_xact_inc() (xc_by_known_xact++) 132 #define xc_by_my_xact_inc() (xc_by_my_xact++) 133 #define xc_by_latest_xid_inc() (xc_by_latest_xid++) 134 #define xc_by_main_xid_inc() (xc_by_main_xid++) 135 #define xc_by_child_xid_inc() (xc_by_child_xid++) 136 #define xc_by_known_assigned_inc() (xc_by_known_assigned++) 137 #define xc_no_overflow_inc() (xc_no_overflow++) 138 #define xc_slow_answer_inc() (xc_slow_answer++) 139 140 static void DisplayXidCache(void); 141 #else /* !XIDCACHE_DEBUG */ 142 143 #define xc_by_recent_xmin_inc() ((void) 0) 144 #define xc_by_known_xact_inc() ((void) 0) 145 #define xc_by_my_xact_inc() ((void) 0) 146 #define xc_by_latest_xid_inc() ((void) 0) 147 #define xc_by_main_xid_inc() ((void) 0) 148 #define xc_by_child_xid_inc() ((void) 0) 149 #define xc_by_known_assigned_inc() ((void) 0) 150 #define xc_no_overflow_inc() ((void) 0) 151 #define xc_slow_answer_inc() ((void) 0) 152 #endif /* XIDCACHE_DEBUG */ 153 154 /* Primitives for KnownAssignedXids array handling for standby */ 155 static void KnownAssignedXidsCompress(bool force); 156 static void KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid, 157 bool exclusive_lock); 158 static bool KnownAssignedXidsSearch(TransactionId xid, bool remove); 159 static bool KnownAssignedXidExists(TransactionId xid); 160 static void KnownAssignedXidsRemove(TransactionId xid); 161 static void KnownAssignedXidsRemoveTree(TransactionId xid, int nsubxids, 162 TransactionId *subxids); 163 static void KnownAssignedXidsRemovePreceding(TransactionId xid); 164 static int KnownAssignedXidsGet(TransactionId *xarray, TransactionId xmax); 165 static int KnownAssignedXidsGetAndSetXmin(TransactionId *xarray, 166 TransactionId *xmin, 167 TransactionId xmax); 168 static TransactionId KnownAssignedXidsGetOldestXmin(void); 169 static void KnownAssignedXidsDisplay(int trace_level); 170 static void KnownAssignedXidsReset(void); 171 static inline void ProcArrayEndTransactionInternal(PGPROC *proc, 172 PGXACT *pgxact, TransactionId latestXid); 173 static void ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid); 174 175 /* 176 * Report shared-memory space needed by CreateSharedProcArray. 177 */ 178 Size 179 ProcArrayShmemSize(void) 180 { 181 Size size; 182 183 /* Size of the ProcArray structure itself */ 184 #define PROCARRAY_MAXPROCS (MaxBackends + max_prepared_xacts) 185 186 size = offsetof(ProcArrayStruct, pgprocnos); 187 size = add_size(size, mul_size(sizeof(int), PROCARRAY_MAXPROCS)); 188 189 /* 190 * During Hot Standby processing we have a data structure called 191 * KnownAssignedXids, created in shared memory. Local data structures are 192 * also created in various backends during GetSnapshotData(), 193 * TransactionIdIsInProgress() and GetRunningTransactionData(). All of the 194 * main structures created in those functions must be identically sized, 195 * since we may at times copy the whole of the data structures around. We 196 * refer to this size as TOTAL_MAX_CACHED_SUBXIDS. 197 * 198 * Ideally we'd only create this structure if we were actually doing hot 199 * standby in the current run, but we don't know that yet at the time 200 * shared memory is being set up. 201 */ 202 #define TOTAL_MAX_CACHED_SUBXIDS \ 203 ((PGPROC_MAX_CACHED_SUBXIDS + 1) * PROCARRAY_MAXPROCS) 204 205 if (EnableHotStandby) 206 { 207 size = add_size(size, 208 mul_size(sizeof(TransactionId), 209 TOTAL_MAX_CACHED_SUBXIDS)); 210 size = add_size(size, 211 mul_size(sizeof(bool), TOTAL_MAX_CACHED_SUBXIDS)); 212 } 213 214 return size; 215 } 216 217 /* 218 * Initialize the shared PGPROC array during postmaster startup. 219 */ 220 void 221 CreateSharedProcArray(void) 222 { 223 bool found; 224 225 /* Create or attach to the ProcArray shared structure */ 226 procArray = (ProcArrayStruct *) 227 ShmemInitStruct("Proc Array", 228 add_size(offsetof(ProcArrayStruct, pgprocnos), 229 mul_size(sizeof(int), 230 PROCARRAY_MAXPROCS)), 231 &found); 232 233 if (!found) 234 { 235 /* 236 * We're the first - initialize. 237 */ 238 procArray->numProcs = 0; 239 procArray->maxProcs = PROCARRAY_MAXPROCS; 240 procArray->maxKnownAssignedXids = TOTAL_MAX_CACHED_SUBXIDS; 241 procArray->numKnownAssignedXids = 0; 242 procArray->tailKnownAssignedXids = 0; 243 procArray->headKnownAssignedXids = 0; 244 SpinLockInit(&procArray->known_assigned_xids_lck); 245 procArray->lastOverflowedXid = InvalidTransactionId; 246 procArray->replication_slot_xmin = InvalidTransactionId; 247 procArray->replication_slot_catalog_xmin = InvalidTransactionId; 248 } 249 250 allProcs = ProcGlobal->allProcs; 251 allPgXact = ProcGlobal->allPgXact; 252 253 /* Create or attach to the KnownAssignedXids arrays too, if needed */ 254 if (EnableHotStandby) 255 { 256 KnownAssignedXids = (TransactionId *) 257 ShmemInitStruct("KnownAssignedXids", 258 mul_size(sizeof(TransactionId), 259 TOTAL_MAX_CACHED_SUBXIDS), 260 &found); 261 KnownAssignedXidsValid = (bool *) 262 ShmemInitStruct("KnownAssignedXidsValid", 263 mul_size(sizeof(bool), TOTAL_MAX_CACHED_SUBXIDS), 264 &found); 265 } 266 267 /* Register and initialize fields of ProcLWLockTranche */ 268 LWLockRegisterTranche(LWTRANCHE_PROC, "proc"); 269 } 270 271 /* 272 * Add the specified PGPROC to the shared array. 273 */ 274 void 275 ProcArrayAdd(PGPROC *proc) 276 { 277 ProcArrayStruct *arrayP = procArray; 278 int index; 279 280 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); 281 282 if (arrayP->numProcs >= arrayP->maxProcs) 283 { 284 /* 285 * Oops, no room. (This really shouldn't happen, since there is a 286 * fixed supply of PGPROC structs too, and so we should have failed 287 * earlier.) 288 */ 289 LWLockRelease(ProcArrayLock); 290 ereport(FATAL, 291 (errcode(ERRCODE_TOO_MANY_CONNECTIONS), 292 errmsg("sorry, too many clients already"))); 293 } 294 295 /* 296 * Keep the procs array sorted by (PGPROC *) so that we can utilize 297 * locality of references much better. This is useful while traversing the 298 * ProcArray because there is an increased likelihood of finding the next 299 * PGPROC structure in the cache. 300 * 301 * Since the occurrence of adding/removing a proc is much lower than the 302 * access to the ProcArray itself, the overhead should be marginal 303 */ 304 for (index = 0; index < arrayP->numProcs; index++) 305 { 306 /* 307 * If we are the first PGPROC or if we have found our right position 308 * in the array, break 309 */ 310 if ((arrayP->pgprocnos[index] == -1) || (arrayP->pgprocnos[index] > proc->pgprocno)) 311 break; 312 } 313 314 memmove(&arrayP->pgprocnos[index + 1], &arrayP->pgprocnos[index], 315 (arrayP->numProcs - index) * sizeof(int)); 316 arrayP->pgprocnos[index] = proc->pgprocno; 317 arrayP->numProcs++; 318 319 LWLockRelease(ProcArrayLock); 320 } 321 322 /* 323 * Remove the specified PGPROC from the shared array. 324 * 325 * When latestXid is a valid XID, we are removing a live 2PC gxact from the 326 * array, and thus causing it to appear as "not running" anymore. In this 327 * case we must advance latestCompletedXid. (This is essentially the same 328 * as ProcArrayEndTransaction followed by removal of the PGPROC, but we take 329 * the ProcArrayLock only once, and don't damage the content of the PGPROC; 330 * twophase.c depends on the latter.) 331 */ 332 void 333 ProcArrayRemove(PGPROC *proc, TransactionId latestXid) 334 { 335 ProcArrayStruct *arrayP = procArray; 336 int index; 337 338 #ifdef XIDCACHE_DEBUG 339 /* dump stats at backend shutdown, but not prepared-xact end */ 340 if (proc->pid != 0) 341 DisplayXidCache(); 342 #endif 343 344 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); 345 346 if (TransactionIdIsValid(latestXid)) 347 { 348 Assert(TransactionIdIsValid(allPgXact[proc->pgprocno].xid)); 349 350 /* Advance global latestCompletedXid while holding the lock */ 351 if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid, 352 latestXid)) 353 ShmemVariableCache->latestCompletedXid = latestXid; 354 } 355 else 356 { 357 /* Shouldn't be trying to remove a live transaction here */ 358 Assert(!TransactionIdIsValid(allPgXact[proc->pgprocno].xid)); 359 } 360 361 for (index = 0; index < arrayP->numProcs; index++) 362 { 363 if (arrayP->pgprocnos[index] == proc->pgprocno) 364 { 365 /* Keep the PGPROC array sorted. See notes above */ 366 memmove(&arrayP->pgprocnos[index], &arrayP->pgprocnos[index + 1], 367 (arrayP->numProcs - index - 1) * sizeof(int)); 368 arrayP->pgprocnos[arrayP->numProcs - 1] = -1; /* for debugging */ 369 arrayP->numProcs--; 370 LWLockRelease(ProcArrayLock); 371 return; 372 } 373 } 374 375 /* Oops */ 376 LWLockRelease(ProcArrayLock); 377 378 elog(LOG, "failed to find proc %p in ProcArray", proc); 379 } 380 381 382 /* 383 * ProcArrayEndTransaction -- mark a transaction as no longer running 384 * 385 * This is used interchangeably for commit and abort cases. The transaction 386 * commit/abort must already be reported to WAL and pg_xact. 387 * 388 * proc is currently always MyProc, but we pass it explicitly for flexibility. 389 * latestXid is the latest Xid among the transaction's main XID and 390 * subtransactions, or InvalidTransactionId if it has no XID. (We must ask 391 * the caller to pass latestXid, instead of computing it from the PGPROC's 392 * contents, because the subxid information in the PGPROC might be 393 * incomplete.) 394 */ 395 void 396 ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid) 397 { 398 PGXACT *pgxact = &allPgXact[proc->pgprocno]; 399 400 if (TransactionIdIsValid(latestXid)) 401 { 402 /* 403 * We must lock ProcArrayLock while clearing our advertised XID, so 404 * that we do not exit the set of "running" transactions while someone 405 * else is taking a snapshot. See discussion in 406 * src/backend/access/transam/README. 407 */ 408 Assert(TransactionIdIsValid(allPgXact[proc->pgprocno].xid)); 409 410 /* 411 * If we can immediately acquire ProcArrayLock, we clear our own XID 412 * and release the lock. If not, use group XID clearing to improve 413 * efficiency. 414 */ 415 if (LWLockConditionalAcquire(ProcArrayLock, LW_EXCLUSIVE)) 416 { 417 ProcArrayEndTransactionInternal(proc, pgxact, latestXid); 418 LWLockRelease(ProcArrayLock); 419 } 420 else 421 ProcArrayGroupClearXid(proc, latestXid); 422 } 423 else 424 { 425 /* 426 * If we have no XID, we don't need to lock, since we won't affect 427 * anyone else's calculation of a snapshot. We might change their 428 * estimate of global xmin, but that's OK. 429 */ 430 Assert(!TransactionIdIsValid(allPgXact[proc->pgprocno].xid)); 431 432 proc->lxid = InvalidLocalTransactionId; 433 pgxact->xmin = InvalidTransactionId; 434 /* must be cleared with xid/xmin: */ 435 pgxact->vacuumFlags &= ~PROC_VACUUM_STATE_MASK; 436 pgxact->delayChkpt = false; /* be sure this is cleared in abort */ 437 proc->recoveryConflictPending = false; 438 439 Assert(pgxact->nxids == 0); 440 Assert(pgxact->overflowed == false); 441 } 442 } 443 444 /* 445 * Mark a write transaction as no longer running. 446 * 447 * We don't do any locking here; caller must handle that. 448 */ 449 static inline void 450 ProcArrayEndTransactionInternal(PGPROC *proc, PGXACT *pgxact, 451 TransactionId latestXid) 452 { 453 pgxact->xid = InvalidTransactionId; 454 proc->lxid = InvalidLocalTransactionId; 455 pgxact->xmin = InvalidTransactionId; 456 /* must be cleared with xid/xmin: */ 457 pgxact->vacuumFlags &= ~PROC_VACUUM_STATE_MASK; 458 pgxact->delayChkpt = false; /* be sure this is cleared in abort */ 459 proc->recoveryConflictPending = false; 460 461 /* Clear the subtransaction-XID cache too while holding the lock */ 462 pgxact->nxids = 0; 463 pgxact->overflowed = false; 464 465 /* Also advance global latestCompletedXid while holding the lock */ 466 if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid, 467 latestXid)) 468 ShmemVariableCache->latestCompletedXid = latestXid; 469 } 470 471 /* 472 * ProcArrayGroupClearXid -- group XID clearing 473 * 474 * When we cannot immediately acquire ProcArrayLock in exclusive mode at 475 * commit time, add ourselves to a list of processes that need their XIDs 476 * cleared. The first process to add itself to the list will acquire 477 * ProcArrayLock in exclusive mode and perform ProcArrayEndTransactionInternal 478 * on behalf of all group members. This avoids a great deal of contention 479 * around ProcArrayLock when many processes are trying to commit at once, 480 * since the lock need not be repeatedly handed off from one committing 481 * process to the next. 482 */ 483 static void 484 ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid) 485 { 486 volatile PROC_HDR *procglobal = ProcGlobal; 487 uint32 nextidx; 488 uint32 wakeidx; 489 490 /* We should definitely have an XID to clear. */ 491 Assert(TransactionIdIsValid(allPgXact[proc->pgprocno].xid)); 492 493 /* Add ourselves to the list of processes needing a group XID clear. */ 494 proc->procArrayGroupMember = true; 495 proc->procArrayGroupMemberXid = latestXid; 496 while (true) 497 { 498 nextidx = pg_atomic_read_u32(&procglobal->procArrayGroupFirst); 499 pg_atomic_write_u32(&proc->procArrayGroupNext, nextidx); 500 501 if (pg_atomic_compare_exchange_u32(&procglobal->procArrayGroupFirst, 502 &nextidx, 503 (uint32) proc->pgprocno)) 504 break; 505 } 506 507 /* 508 * If the list was not empty, the leader will clear our XID. It is 509 * impossible to have followers without a leader because the first process 510 * that has added itself to the list will always have nextidx as 511 * INVALID_PGPROCNO. 512 */ 513 if (nextidx != INVALID_PGPROCNO) 514 { 515 int extraWaits = 0; 516 517 /* Sleep until the leader clears our XID. */ 518 pgstat_report_wait_start(WAIT_EVENT_PROCARRAY_GROUP_UPDATE); 519 for (;;) 520 { 521 /* acts as a read barrier */ 522 PGSemaphoreLock(proc->sem); 523 if (!proc->procArrayGroupMember) 524 break; 525 extraWaits++; 526 } 527 pgstat_report_wait_end(); 528 529 Assert(pg_atomic_read_u32(&proc->procArrayGroupNext) == INVALID_PGPROCNO); 530 531 /* Fix semaphore count for any absorbed wakeups */ 532 while (extraWaits-- > 0) 533 PGSemaphoreUnlock(proc->sem); 534 return; 535 } 536 537 /* We are the leader. Acquire the lock on behalf of everyone. */ 538 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); 539 540 /* 541 * Now that we've got the lock, clear the list of processes waiting for 542 * group XID clearing, saving a pointer to the head of the list. Trying 543 * to pop elements one at a time could lead to an ABA problem. 544 */ 545 while (true) 546 { 547 nextidx = pg_atomic_read_u32(&procglobal->procArrayGroupFirst); 548 if (pg_atomic_compare_exchange_u32(&procglobal->procArrayGroupFirst, 549 &nextidx, 550 INVALID_PGPROCNO)) 551 break; 552 } 553 554 /* Remember head of list so we can perform wakeups after dropping lock. */ 555 wakeidx = nextidx; 556 557 /* Walk the list and clear all XIDs. */ 558 while (nextidx != INVALID_PGPROCNO) 559 { 560 PGPROC *nextproc = &allProcs[nextidx]; 561 PGXACT *pgxact = &allPgXact[nextidx]; 562 563 ProcArrayEndTransactionInternal(nextproc, pgxact, nextproc->procArrayGroupMemberXid); 564 565 /* Move to next proc in list. */ 566 nextidx = pg_atomic_read_u32(&nextproc->procArrayGroupNext); 567 } 568 569 /* We're done with the lock now. */ 570 LWLockRelease(ProcArrayLock); 571 572 /* 573 * Now that we've released the lock, go back and wake everybody up. We 574 * don't do this under the lock so as to keep lock hold times to a 575 * minimum. The system calls we need to perform to wake other processes 576 * up are probably much slower than the simple memory writes we did while 577 * holding the lock. 578 */ 579 while (wakeidx != INVALID_PGPROCNO) 580 { 581 PGPROC *nextproc = &allProcs[wakeidx]; 582 583 wakeidx = pg_atomic_read_u32(&nextproc->procArrayGroupNext); 584 pg_atomic_write_u32(&nextproc->procArrayGroupNext, INVALID_PGPROCNO); 585 586 /* ensure all previous writes are visible before follower continues. */ 587 pg_write_barrier(); 588 589 nextproc->procArrayGroupMember = false; 590 591 if (nextproc != MyProc) 592 PGSemaphoreUnlock(nextproc->sem); 593 } 594 } 595 596 /* 597 * ProcArrayClearTransaction -- clear the transaction fields 598 * 599 * This is used after successfully preparing a 2-phase transaction. We are 600 * not actually reporting the transaction's XID as no longer running --- it 601 * will still appear as running because the 2PC's gxact is in the ProcArray 602 * too. We just have to clear out our own PGXACT. 603 */ 604 void 605 ProcArrayClearTransaction(PGPROC *proc) 606 { 607 PGXACT *pgxact = &allPgXact[proc->pgprocno]; 608 609 /* 610 * We can skip locking ProcArrayLock here, because this action does not 611 * actually change anyone's view of the set of running XIDs: our entry is 612 * duplicate with the gxact that has already been inserted into the 613 * ProcArray. 614 */ 615 pgxact->xid = InvalidTransactionId; 616 proc->lxid = InvalidLocalTransactionId; 617 pgxact->xmin = InvalidTransactionId; 618 proc->recoveryConflictPending = false; 619 620 /* redundant, but just in case */ 621 pgxact->vacuumFlags &= ~PROC_VACUUM_STATE_MASK; 622 pgxact->delayChkpt = false; 623 624 /* Clear the subtransaction-XID cache too */ 625 pgxact->nxids = 0; 626 pgxact->overflowed = false; 627 } 628 629 /* 630 * ProcArrayInitRecovery -- initialize recovery xid mgmt environment 631 * 632 * Remember up to where the startup process initialized the CLOG and subtrans 633 * so we can ensure it's initialized gaplessly up to the point where necessary 634 * while in recovery. 635 */ 636 void 637 ProcArrayInitRecovery(TransactionId initializedUptoXID) 638 { 639 Assert(standbyState == STANDBY_INITIALIZED); 640 Assert(TransactionIdIsNormal(initializedUptoXID)); 641 642 /* 643 * we set latestObservedXid to the xid SUBTRANS has been initialized up 644 * to, so we can extend it from that point onwards in 645 * RecordKnownAssignedTransactionIds, and when we get consistent in 646 * ProcArrayApplyRecoveryInfo(). 647 */ 648 latestObservedXid = initializedUptoXID; 649 TransactionIdRetreat(latestObservedXid); 650 } 651 652 /* 653 * ProcArrayApplyRecoveryInfo -- apply recovery info about xids 654 * 655 * Takes us through 3 states: Initialized, Pending and Ready. 656 * Normal case is to go all the way to Ready straight away, though there 657 * are atypical cases where we need to take it in steps. 658 * 659 * Use the data about running transactions on master to create the initial 660 * state of KnownAssignedXids. We also use these records to regularly prune 661 * KnownAssignedXids because we know it is possible that some transactions 662 * with FATAL errors fail to write abort records, which could cause eventual 663 * overflow. 664 * 665 * See comments for LogStandbySnapshot(). 666 */ 667 void 668 ProcArrayApplyRecoveryInfo(RunningTransactions running) 669 { 670 TransactionId *xids; 671 int nxids; 672 TransactionId nextXid; 673 int i; 674 675 Assert(standbyState >= STANDBY_INITIALIZED); 676 Assert(TransactionIdIsValid(running->nextXid)); 677 Assert(TransactionIdIsValid(running->oldestRunningXid)); 678 Assert(TransactionIdIsNormal(running->latestCompletedXid)); 679 680 /* 681 * Remove stale transactions, if any. 682 */ 683 ExpireOldKnownAssignedTransactionIds(running->oldestRunningXid); 684 685 /* 686 * Remove stale locks, if any. 687 */ 688 StandbyReleaseOldLocks(running->oldestRunningXid); 689 690 /* 691 * If our snapshot is already valid, nothing else to do... 692 */ 693 if (standbyState == STANDBY_SNAPSHOT_READY) 694 return; 695 696 /* 697 * If our initial RunningTransactionsData had an overflowed snapshot then 698 * we knew we were missing some subxids from our snapshot. If we continue 699 * to see overflowed snapshots then we might never be able to start up, so 700 * we make another test to see if our snapshot is now valid. We know that 701 * the missing subxids are equal to or earlier than nextXid. After we 702 * initialise we continue to apply changes during recovery, so once the 703 * oldestRunningXid is later than the nextXid from the initial snapshot we 704 * know that we no longer have missing information and can mark the 705 * snapshot as valid. 706 */ 707 if (standbyState == STANDBY_SNAPSHOT_PENDING) 708 { 709 /* 710 * If the snapshot isn't overflowed or if its empty we can reset our 711 * pending state and use this snapshot instead. 712 */ 713 if (!running->subxid_overflow || running->xcnt == 0) 714 { 715 /* 716 * If we have already collected known assigned xids, we need to 717 * throw them away before we apply the recovery snapshot. 718 */ 719 KnownAssignedXidsReset(); 720 standbyState = STANDBY_INITIALIZED; 721 } 722 else 723 { 724 if (TransactionIdPrecedes(standbySnapshotPendingXmin, 725 running->oldestRunningXid)) 726 { 727 standbyState = STANDBY_SNAPSHOT_READY; 728 elog(trace_recovery(DEBUG1), 729 "recovery snapshots are now enabled"); 730 } 731 else 732 elog(trace_recovery(DEBUG1), 733 "recovery snapshot waiting for non-overflowed snapshot or " 734 "until oldest active xid on standby is at least %u (now %u)", 735 standbySnapshotPendingXmin, 736 running->oldestRunningXid); 737 return; 738 } 739 } 740 741 Assert(standbyState == STANDBY_INITIALIZED); 742 743 /* 744 * OK, we need to initialise from the RunningTransactionsData record. 745 * 746 * NB: this can be reached at least twice, so make sure new code can deal 747 * with that. 748 */ 749 750 /* 751 * Nobody else is running yet, but take locks anyhow 752 */ 753 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); 754 755 /* 756 * KnownAssignedXids is sorted so we cannot just add the xids, we have to 757 * sort them first. 758 * 759 * Some of the new xids are top-level xids and some are subtransactions. 760 * We don't call SubtransSetParent because it doesn't matter yet. If we 761 * aren't overflowed then all xids will fit in snapshot and so we don't 762 * need subtrans. If we later overflow, an xid assignment record will add 763 * xids to subtrans. If RunningXacts is overflowed then we don't have 764 * enough information to correctly update subtrans anyway. 765 */ 766 767 /* 768 * Allocate a temporary array to avoid modifying the array passed as 769 * argument. 770 */ 771 xids = palloc(sizeof(TransactionId) * (running->xcnt + running->subxcnt)); 772 773 /* 774 * Add to the temp array any xids which have not already completed. 775 */ 776 nxids = 0; 777 for (i = 0; i < running->xcnt + running->subxcnt; i++) 778 { 779 TransactionId xid = running->xids[i]; 780 781 /* 782 * The running-xacts snapshot can contain xids that were still visible 783 * in the procarray when the snapshot was taken, but were already 784 * WAL-logged as completed. They're not running anymore, so ignore 785 * them. 786 */ 787 if (TransactionIdDidCommit(xid) || TransactionIdDidAbort(xid)) 788 continue; 789 790 xids[nxids++] = xid; 791 } 792 793 if (nxids > 0) 794 { 795 if (procArray->numKnownAssignedXids != 0) 796 { 797 LWLockRelease(ProcArrayLock); 798 elog(ERROR, "KnownAssignedXids is not empty"); 799 } 800 801 /* 802 * Sort the array so that we can add them safely into 803 * KnownAssignedXids. 804 */ 805 qsort(xids, nxids, sizeof(TransactionId), xidComparator); 806 807 /* 808 * Add the sorted snapshot into KnownAssignedXids. The running-xacts 809 * snapshot may include duplicated xids because of prepared 810 * transactions, so ignore them. 811 */ 812 for (i = 0; i < nxids; i++) 813 { 814 if (i > 0 && TransactionIdEquals(xids[i - 1], xids[i])) 815 { 816 elog(DEBUG1, 817 "found duplicated transaction %u for KnownAssignedXids insertion", 818 xids[i]); 819 continue; 820 } 821 KnownAssignedXidsAdd(xids[i], xids[i], true); 822 } 823 824 KnownAssignedXidsDisplay(trace_recovery(DEBUG3)); 825 } 826 827 pfree(xids); 828 829 /* 830 * latestObservedXid is at least set to the point where SUBTRANS was 831 * started up to (cf. ProcArrayInitRecovery()) or to the biggest xid 832 * RecordKnownAssignedTransactionIds() was called for. Initialize 833 * subtrans from thereon, up to nextXid - 1. 834 * 835 * We need to duplicate parts of RecordKnownAssignedTransactionId() here, 836 * because we've just added xids to the known assigned xids machinery that 837 * haven't gone through RecordKnownAssignedTransactionId(). 838 */ 839 Assert(TransactionIdIsNormal(latestObservedXid)); 840 TransactionIdAdvance(latestObservedXid); 841 while (TransactionIdPrecedes(latestObservedXid, running->nextXid)) 842 { 843 ExtendSUBTRANS(latestObservedXid); 844 TransactionIdAdvance(latestObservedXid); 845 } 846 TransactionIdRetreat(latestObservedXid); /* = running->nextXid - 1 */ 847 848 /* ---------- 849 * Now we've got the running xids we need to set the global values that 850 * are used to track snapshots as they evolve further. 851 * 852 * - latestCompletedXid which will be the xmax for snapshots 853 * - lastOverflowedXid which shows whether snapshots overflow 854 * - nextXid 855 * 856 * If the snapshot overflowed, then we still initialise with what we know, 857 * but the recovery snapshot isn't fully valid yet because we know there 858 * are some subxids missing. We don't know the specific subxids that are 859 * missing, so conservatively assume the last one is latestObservedXid. 860 * ---------- 861 */ 862 if (running->subxid_overflow) 863 { 864 standbyState = STANDBY_SNAPSHOT_PENDING; 865 866 standbySnapshotPendingXmin = latestObservedXid; 867 procArray->lastOverflowedXid = latestObservedXid; 868 } 869 else 870 { 871 standbyState = STANDBY_SNAPSHOT_READY; 872 873 standbySnapshotPendingXmin = InvalidTransactionId; 874 } 875 876 /* 877 * If a transaction wrote a commit record in the gap between taking and 878 * logging the snapshot then latestCompletedXid may already be higher than 879 * the value from the snapshot, so check before we use the incoming value. 880 */ 881 if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid, 882 running->latestCompletedXid)) 883 ShmemVariableCache->latestCompletedXid = running->latestCompletedXid; 884 885 Assert(TransactionIdIsNormal(ShmemVariableCache->latestCompletedXid)); 886 887 LWLockRelease(ProcArrayLock); 888 889 /* 890 * ShmemVariableCache->nextXid must be beyond any observed xid. 891 * 892 * We don't expect anyone else to modify nextXid, hence we don't need to 893 * hold a lock while examining it. We still acquire the lock to modify 894 * it, though. 895 */ 896 nextXid = latestObservedXid; 897 TransactionIdAdvance(nextXid); 898 if (TransactionIdFollows(nextXid, ShmemVariableCache->nextXid)) 899 { 900 LWLockAcquire(XidGenLock, LW_EXCLUSIVE); 901 ShmemVariableCache->nextXid = nextXid; 902 LWLockRelease(XidGenLock); 903 } 904 905 Assert(TransactionIdIsValid(ShmemVariableCache->nextXid)); 906 907 KnownAssignedXidsDisplay(trace_recovery(DEBUG3)); 908 if (standbyState == STANDBY_SNAPSHOT_READY) 909 elog(trace_recovery(DEBUG1), "recovery snapshots are now enabled"); 910 else 911 elog(trace_recovery(DEBUG1), 912 "recovery snapshot waiting for non-overflowed snapshot or " 913 "until oldest active xid on standby is at least %u (now %u)", 914 standbySnapshotPendingXmin, 915 running->oldestRunningXid); 916 } 917 918 /* 919 * ProcArrayApplyXidAssignment 920 * Process an XLOG_XACT_ASSIGNMENT WAL record 921 */ 922 void 923 ProcArrayApplyXidAssignment(TransactionId topxid, 924 int nsubxids, TransactionId *subxids) 925 { 926 TransactionId max_xid; 927 int i; 928 929 Assert(standbyState >= STANDBY_INITIALIZED); 930 931 max_xid = TransactionIdLatest(topxid, nsubxids, subxids); 932 933 /* 934 * Mark all the subtransactions as observed. 935 * 936 * NOTE: This will fail if the subxid contains too many previously 937 * unobserved xids to fit into known-assigned-xids. That shouldn't happen 938 * as the code stands, because xid-assignment records should never contain 939 * more than PGPROC_MAX_CACHED_SUBXIDS entries. 940 */ 941 RecordKnownAssignedTransactionIds(max_xid); 942 943 /* 944 * Notice that we update pg_subtrans with the top-level xid, rather than 945 * the parent xid. This is a difference between normal processing and 946 * recovery, yet is still correct in all cases. The reason is that 947 * subtransaction commit is not marked in clog until commit processing, so 948 * all aborted subtransactions have already been clearly marked in clog. 949 * As a result we are able to refer directly to the top-level 950 * transaction's state rather than skipping through all the intermediate 951 * states in the subtransaction tree. This should be the first time we 952 * have attempted to SubTransSetParent(). 953 */ 954 for (i = 0; i < nsubxids; i++) 955 SubTransSetParent(subxids[i], topxid); 956 957 /* KnownAssignedXids isn't maintained yet, so we're done for now */ 958 if (standbyState == STANDBY_INITIALIZED) 959 return; 960 961 /* 962 * Uses same locking as transaction commit 963 */ 964 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); 965 966 /* 967 * Remove subxids from known-assigned-xacts. 968 */ 969 KnownAssignedXidsRemoveTree(InvalidTransactionId, nsubxids, subxids); 970 971 /* 972 * Advance lastOverflowedXid to be at least the last of these subxids. 973 */ 974 if (TransactionIdPrecedes(procArray->lastOverflowedXid, max_xid)) 975 procArray->lastOverflowedXid = max_xid; 976 977 LWLockRelease(ProcArrayLock); 978 } 979 980 /* 981 * TransactionIdIsInProgress -- is given transaction running in some backend 982 * 983 * Aside from some shortcuts such as checking RecentXmin and our own Xid, 984 * there are four possibilities for finding a running transaction: 985 * 986 * 1. The given Xid is a main transaction Id. We will find this out cheaply 987 * by looking at the PGXACT struct for each backend. 988 * 989 * 2. The given Xid is one of the cached subxact Xids in the PGPROC array. 990 * We can find this out cheaply too. 991 * 992 * 3. In Hot Standby mode, we must search the KnownAssignedXids list to see 993 * if the Xid is running on the master. 994 * 995 * 4. Search the SubTrans tree to find the Xid's topmost parent, and then see 996 * if that is running according to PGXACT or KnownAssignedXids. This is the 997 * slowest way, but sadly it has to be done always if the others failed, 998 * unless we see that the cached subxact sets are complete (none have 999 * overflowed). 1000 * 1001 * ProcArrayLock has to be held while we do 1, 2, 3. If we save the top Xids 1002 * while doing 1 and 3, we can release the ProcArrayLock while we do 4. 1003 * This buys back some concurrency (and we can't retrieve the main Xids from 1004 * PGXACT again anyway; see GetNewTransactionId). 1005 */ 1006 bool 1007 TransactionIdIsInProgress(TransactionId xid) 1008 { 1009 static TransactionId *xids = NULL; 1010 int nxids = 0; 1011 ProcArrayStruct *arrayP = procArray; 1012 TransactionId topxid; 1013 int i, 1014 j; 1015 1016 /* 1017 * Don't bother checking a transaction older than RecentXmin; it could not 1018 * possibly still be running. (Note: in particular, this guarantees that 1019 * we reject InvalidTransactionId, FrozenTransactionId, etc as not 1020 * running.) 1021 */ 1022 if (TransactionIdPrecedes(xid, RecentXmin)) 1023 { 1024 xc_by_recent_xmin_inc(); 1025 return false; 1026 } 1027 1028 /* 1029 * We may have just checked the status of this transaction, so if it is 1030 * already known to be completed, we can fall out without any access to 1031 * shared memory. 1032 */ 1033 if (TransactionIdIsKnownCompleted(xid)) 1034 { 1035 xc_by_known_xact_inc(); 1036 return false; 1037 } 1038 1039 /* 1040 * Also, we can handle our own transaction (and subtransactions) without 1041 * any access to shared memory. 1042 */ 1043 if (TransactionIdIsCurrentTransactionId(xid)) 1044 { 1045 xc_by_my_xact_inc(); 1046 return true; 1047 } 1048 1049 /* 1050 * If first time through, get workspace to remember main XIDs in. We 1051 * malloc it permanently to avoid repeated palloc/pfree overhead. 1052 */ 1053 if (xids == NULL) 1054 { 1055 /* 1056 * In hot standby mode, reserve enough space to hold all xids in the 1057 * known-assigned list. If we later finish recovery, we no longer need 1058 * the bigger array, but we don't bother to shrink it. 1059 */ 1060 int maxxids = RecoveryInProgress() ? TOTAL_MAX_CACHED_SUBXIDS : arrayP->maxProcs; 1061 1062 xids = (TransactionId *) malloc(maxxids * sizeof(TransactionId)); 1063 if (xids == NULL) 1064 ereport(ERROR, 1065 (errcode(ERRCODE_OUT_OF_MEMORY), 1066 errmsg("out of memory"))); 1067 } 1068 1069 LWLockAcquire(ProcArrayLock, LW_SHARED); 1070 1071 /* 1072 * Now that we have the lock, we can check latestCompletedXid; if the 1073 * target Xid is after that, it's surely still running. 1074 */ 1075 if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid, xid)) 1076 { 1077 LWLockRelease(ProcArrayLock); 1078 xc_by_latest_xid_inc(); 1079 return true; 1080 } 1081 1082 /* No shortcuts, gotta grovel through the array */ 1083 for (i = 0; i < arrayP->numProcs; i++) 1084 { 1085 int pgprocno = arrayP->pgprocnos[i]; 1086 volatile PGPROC *proc = &allProcs[pgprocno]; 1087 volatile PGXACT *pgxact = &allPgXact[pgprocno]; 1088 TransactionId pxid; 1089 1090 /* Ignore my own proc --- dealt with it above */ 1091 if (proc == MyProc) 1092 continue; 1093 1094 /* Fetch xid just once - see GetNewTransactionId */ 1095 pxid = pgxact->xid; 1096 1097 if (!TransactionIdIsValid(pxid)) 1098 continue; 1099 1100 /* 1101 * Step 1: check the main Xid 1102 */ 1103 if (TransactionIdEquals(pxid, xid)) 1104 { 1105 LWLockRelease(ProcArrayLock); 1106 xc_by_main_xid_inc(); 1107 return true; 1108 } 1109 1110 /* 1111 * We can ignore main Xids that are younger than the target Xid, since 1112 * the target could not possibly be their child. 1113 */ 1114 if (TransactionIdPrecedes(xid, pxid)) 1115 continue; 1116 1117 /* 1118 * Step 2: check the cached child-Xids arrays 1119 */ 1120 for (j = pgxact->nxids - 1; j >= 0; j--) 1121 { 1122 /* Fetch xid just once - see GetNewTransactionId */ 1123 TransactionId cxid = proc->subxids.xids[j]; 1124 1125 if (TransactionIdEquals(cxid, xid)) 1126 { 1127 LWLockRelease(ProcArrayLock); 1128 xc_by_child_xid_inc(); 1129 return true; 1130 } 1131 } 1132 1133 /* 1134 * Save the main Xid for step 4. We only need to remember main Xids 1135 * that have uncached children. (Note: there is no race condition 1136 * here because the overflowed flag cannot be cleared, only set, while 1137 * we hold ProcArrayLock. So we can't miss an Xid that we need to 1138 * worry about.) 1139 */ 1140 if (pgxact->overflowed) 1141 xids[nxids++] = pxid; 1142 } 1143 1144 /* 1145 * Step 3: in hot standby mode, check the known-assigned-xids list. XIDs 1146 * in the list must be treated as running. 1147 */ 1148 if (RecoveryInProgress()) 1149 { 1150 /* none of the PGXACT entries should have XIDs in hot standby mode */ 1151 Assert(nxids == 0); 1152 1153 if (KnownAssignedXidExists(xid)) 1154 { 1155 LWLockRelease(ProcArrayLock); 1156 xc_by_known_assigned_inc(); 1157 return true; 1158 } 1159 1160 /* 1161 * If the KnownAssignedXids overflowed, we have to check pg_subtrans 1162 * too. Fetch all xids from KnownAssignedXids that are lower than 1163 * xid, since if xid is a subtransaction its parent will always have a 1164 * lower value. Note we will collect both main and subXIDs here, but 1165 * there's no help for it. 1166 */ 1167 if (TransactionIdPrecedesOrEquals(xid, procArray->lastOverflowedXid)) 1168 nxids = KnownAssignedXidsGet(xids, xid); 1169 } 1170 1171 LWLockRelease(ProcArrayLock); 1172 1173 /* 1174 * If none of the relevant caches overflowed, we know the Xid is not 1175 * running without even looking at pg_subtrans. 1176 */ 1177 if (nxids == 0) 1178 { 1179 xc_no_overflow_inc(); 1180 return false; 1181 } 1182 1183 /* 1184 * Step 4: have to check pg_subtrans. 1185 * 1186 * At this point, we know it's either a subtransaction of one of the Xids 1187 * in xids[], or it's not running. If it's an already-failed 1188 * subtransaction, we want to say "not running" even though its parent may 1189 * still be running. So first, check pg_xact to see if it's been aborted. 1190 */ 1191 xc_slow_answer_inc(); 1192 1193 if (TransactionIdDidAbort(xid)) 1194 return false; 1195 1196 /* 1197 * It isn't aborted, so check whether the transaction tree it belongs to 1198 * is still running (or, more precisely, whether it was running when we 1199 * held ProcArrayLock). 1200 */ 1201 topxid = SubTransGetTopmostTransaction(xid); 1202 Assert(TransactionIdIsValid(topxid)); 1203 if (!TransactionIdEquals(topxid, xid)) 1204 { 1205 for (i = 0; i < nxids; i++) 1206 { 1207 if (TransactionIdEquals(xids[i], topxid)) 1208 return true; 1209 } 1210 } 1211 1212 return false; 1213 } 1214 1215 /* 1216 * TransactionIdIsActive -- is xid the top-level XID of an active backend? 1217 * 1218 * This differs from TransactionIdIsInProgress in that it ignores prepared 1219 * transactions, as well as transactions running on the master if we're in 1220 * hot standby. Also, we ignore subtransactions since that's not needed 1221 * for current uses. 1222 */ 1223 bool 1224 TransactionIdIsActive(TransactionId xid) 1225 { 1226 bool result = false; 1227 ProcArrayStruct *arrayP = procArray; 1228 int i; 1229 1230 /* 1231 * Don't bother checking a transaction older than RecentXmin; it could not 1232 * possibly still be running. 1233 */ 1234 if (TransactionIdPrecedes(xid, RecentXmin)) 1235 return false; 1236 1237 LWLockAcquire(ProcArrayLock, LW_SHARED); 1238 1239 for (i = 0; i < arrayP->numProcs; i++) 1240 { 1241 int pgprocno = arrayP->pgprocnos[i]; 1242 volatile PGPROC *proc = &allProcs[pgprocno]; 1243 volatile PGXACT *pgxact = &allPgXact[pgprocno]; 1244 TransactionId pxid; 1245 1246 /* Fetch xid just once - see GetNewTransactionId */ 1247 pxid = pgxact->xid; 1248 1249 if (!TransactionIdIsValid(pxid)) 1250 continue; 1251 1252 if (proc->pid == 0) 1253 continue; /* ignore prepared transactions */ 1254 1255 if (TransactionIdEquals(pxid, xid)) 1256 { 1257 result = true; 1258 break; 1259 } 1260 } 1261 1262 LWLockRelease(ProcArrayLock); 1263 1264 return result; 1265 } 1266 1267 1268 /* 1269 * GetOldestXmin -- returns oldest transaction that was running 1270 * when any current transaction was started. 1271 * 1272 * If rel is NULL or a shared relation, all backends are considered, otherwise 1273 * only backends running in this database are considered. 1274 * 1275 * The flags are used to ignore the backends in calculation when any of the 1276 * corresponding flags is set. Typically, if you want to ignore ones with 1277 * PROC_IN_VACUUM flag, you can use PROCARRAY_FLAGS_VACUUM. 1278 * 1279 * PROCARRAY_SLOTS_XMIN causes GetOldestXmin to ignore the xmin and 1280 * catalog_xmin of any replication slots that exist in the system when 1281 * calculating the oldest xmin. 1282 * 1283 * This is used by VACUUM to decide which deleted tuples must be preserved in 1284 * the passed in table. For shared relations backends in all databases must be 1285 * considered, but for non-shared relations that's not required, since only 1286 * backends in my own database could ever see the tuples in them. Also, we can 1287 * ignore concurrently running lazy VACUUMs because (a) they must be working 1288 * on other tables, and (b) they don't need to do snapshot-based lookups. 1289 * 1290 * This is also used to determine where to truncate pg_subtrans. For that 1291 * backends in all databases have to be considered, so rel = NULL has to be 1292 * passed in. 1293 * 1294 * Note: we include all currently running xids in the set of considered xids. 1295 * This ensures that if a just-started xact has not yet set its snapshot, 1296 * when it does set the snapshot it cannot set xmin less than what we compute. 1297 * See notes in src/backend/access/transam/README. 1298 * 1299 * Note: despite the above, it's possible for the calculated value to move 1300 * backwards on repeated calls. The calculated value is conservative, so that 1301 * anything older is definitely not considered as running by anyone anymore, 1302 * but the exact value calculated depends on a number of things. For example, 1303 * if rel = NULL and there are no transactions running in the current 1304 * database, GetOldestXmin() returns latestCompletedXid. If a transaction 1305 * begins after that, its xmin will include in-progress transactions in other 1306 * databases that started earlier, so another call will return a lower value. 1307 * Nonetheless it is safe to vacuum a table in the current database with the 1308 * first result. There are also replication-related effects: a walsender 1309 * process can set its xmin based on transactions that are no longer running 1310 * in the master but are still being replayed on the standby, thus possibly 1311 * making the GetOldestXmin reading go backwards. In this case there is a 1312 * possibility that we lose data that the standby would like to have, but 1313 * unless the standby uses a replication slot to make its xmin persistent 1314 * there is little we can do about that --- data is only protected if the 1315 * walsender runs continuously while queries are executed on the standby. 1316 * (The Hot Standby code deals with such cases by failing standby queries 1317 * that needed to access already-removed data, so there's no integrity bug.) 1318 * The return value is also adjusted with vacuum_defer_cleanup_age, so 1319 * increasing that setting on the fly is another easy way to make 1320 * GetOldestXmin() move backwards, with no consequences for data integrity. 1321 */ 1322 TransactionId 1323 GetOldestXmin(Relation rel, int flags) 1324 { 1325 ProcArrayStruct *arrayP = procArray; 1326 TransactionId result; 1327 int index; 1328 bool allDbs; 1329 1330 volatile TransactionId replication_slot_xmin = InvalidTransactionId; 1331 volatile TransactionId replication_slot_catalog_xmin = InvalidTransactionId; 1332 1333 /* 1334 * If we're not computing a relation specific limit, or if a shared 1335 * relation has been passed in, backends in all databases have to be 1336 * considered. 1337 */ 1338 allDbs = rel == NULL || rel->rd_rel->relisshared; 1339 1340 /* Cannot look for individual databases during recovery */ 1341 Assert(allDbs || !RecoveryInProgress()); 1342 1343 LWLockAcquire(ProcArrayLock, LW_SHARED); 1344 1345 /* 1346 * We initialize the MIN() calculation with latestCompletedXid + 1. This 1347 * is a lower bound for the XIDs that might appear in the ProcArray later, 1348 * and so protects us against overestimating the result due to future 1349 * additions. 1350 */ 1351 result = ShmemVariableCache->latestCompletedXid; 1352 Assert(TransactionIdIsNormal(result)); 1353 TransactionIdAdvance(result); 1354 1355 for (index = 0; index < arrayP->numProcs; index++) 1356 { 1357 int pgprocno = arrayP->pgprocnos[index]; 1358 volatile PGPROC *proc = &allProcs[pgprocno]; 1359 volatile PGXACT *pgxact = &allPgXact[pgprocno]; 1360 1361 if (pgxact->vacuumFlags & (flags & PROCARRAY_PROC_FLAGS_MASK)) 1362 continue; 1363 1364 if (allDbs || 1365 proc->databaseId == MyDatabaseId || 1366 proc->databaseId == 0) /* always include WalSender */ 1367 { 1368 /* Fetch xid just once - see GetNewTransactionId */ 1369 TransactionId xid = pgxact->xid; 1370 1371 /* First consider the transaction's own Xid, if any */ 1372 if (TransactionIdIsNormal(xid) && 1373 TransactionIdPrecedes(xid, result)) 1374 result = xid; 1375 1376 /* 1377 * Also consider the transaction's Xmin, if set. 1378 * 1379 * We must check both Xid and Xmin because a transaction might 1380 * have an Xmin but not (yet) an Xid; conversely, if it has an 1381 * Xid, that could determine some not-yet-set Xmin. 1382 */ 1383 xid = pgxact->xmin; /* Fetch just once */ 1384 if (TransactionIdIsNormal(xid) && 1385 TransactionIdPrecedes(xid, result)) 1386 result = xid; 1387 } 1388 } 1389 1390 /* fetch into volatile var while ProcArrayLock is held */ 1391 replication_slot_xmin = procArray->replication_slot_xmin; 1392 replication_slot_catalog_xmin = procArray->replication_slot_catalog_xmin; 1393 1394 if (RecoveryInProgress()) 1395 { 1396 /* 1397 * Check to see whether KnownAssignedXids contains an xid value older 1398 * than the main procarray. 1399 */ 1400 TransactionId kaxmin = KnownAssignedXidsGetOldestXmin(); 1401 1402 LWLockRelease(ProcArrayLock); 1403 1404 if (TransactionIdIsNormal(kaxmin) && 1405 TransactionIdPrecedes(kaxmin, result)) 1406 result = kaxmin; 1407 } 1408 else 1409 { 1410 /* 1411 * No other information needed, so release the lock immediately. 1412 */ 1413 LWLockRelease(ProcArrayLock); 1414 1415 /* 1416 * Compute the cutoff XID by subtracting vacuum_defer_cleanup_age, 1417 * being careful not to generate a "permanent" XID. 1418 * 1419 * vacuum_defer_cleanup_age provides some additional "slop" for the 1420 * benefit of hot standby queries on standby servers. This is quick 1421 * and dirty, and perhaps not all that useful unless the master has a 1422 * predictable transaction rate, but it offers some protection when 1423 * there's no walsender connection. Note that we are assuming 1424 * vacuum_defer_cleanup_age isn't large enough to cause wraparound --- 1425 * so guc.c should limit it to no more than the xidStopLimit threshold 1426 * in varsup.c. Also note that we intentionally don't apply 1427 * vacuum_defer_cleanup_age on standby servers. 1428 */ 1429 result -= vacuum_defer_cleanup_age; 1430 if (!TransactionIdIsNormal(result)) 1431 result = FirstNormalTransactionId; 1432 } 1433 1434 /* 1435 * Check whether there are replication slots requiring an older xmin. 1436 */ 1437 if (!(flags & PROCARRAY_SLOTS_XMIN) && 1438 TransactionIdIsValid(replication_slot_xmin) && 1439 NormalTransactionIdPrecedes(replication_slot_xmin, result)) 1440 result = replication_slot_xmin; 1441 1442 /* 1443 * After locks have been released and defer_cleanup_age has been applied, 1444 * check whether we need to back up further to make logical decoding 1445 * possible. We need to do so if we're computing the global limit (rel = 1446 * NULL) or if the passed relation is a catalog relation of some kind. 1447 */ 1448 if (!(flags & PROCARRAY_SLOTS_XMIN) && 1449 (rel == NULL || 1450 RelationIsAccessibleInLogicalDecoding(rel)) && 1451 TransactionIdIsValid(replication_slot_catalog_xmin) && 1452 NormalTransactionIdPrecedes(replication_slot_catalog_xmin, result)) 1453 result = replication_slot_catalog_xmin; 1454 1455 return result; 1456 } 1457 1458 /* 1459 * GetMaxSnapshotXidCount -- get max size for snapshot XID array 1460 * 1461 * We have to export this for use by snapmgr.c. 1462 */ 1463 int 1464 GetMaxSnapshotXidCount(void) 1465 { 1466 return procArray->maxProcs; 1467 } 1468 1469 /* 1470 * GetMaxSnapshotSubxidCount -- get max size for snapshot sub-XID array 1471 * 1472 * We have to export this for use by snapmgr.c. 1473 */ 1474 int 1475 GetMaxSnapshotSubxidCount(void) 1476 { 1477 return TOTAL_MAX_CACHED_SUBXIDS; 1478 } 1479 1480 /* 1481 * GetSnapshotData -- returns information about running transactions. 1482 * 1483 * The returned snapshot includes xmin (lowest still-running xact ID), 1484 * xmax (highest completed xact ID + 1), and a list of running xact IDs 1485 * in the range xmin <= xid < xmax. It is used as follows: 1486 * All xact IDs < xmin are considered finished. 1487 * All xact IDs >= xmax are considered still running. 1488 * For an xact ID xmin <= xid < xmax, consult list to see whether 1489 * it is considered running or not. 1490 * This ensures that the set of transactions seen as "running" by the 1491 * current xact will not change after it takes the snapshot. 1492 * 1493 * All running top-level XIDs are included in the snapshot, except for lazy 1494 * VACUUM processes. We also try to include running subtransaction XIDs, 1495 * but since PGPROC has only a limited cache area for subxact XIDs, full 1496 * information may not be available. If we find any overflowed subxid arrays, 1497 * we have to mark the snapshot's subxid data as overflowed, and extra work 1498 * *may* need to be done to determine what's running (see XidInMVCCSnapshot() 1499 * in tqual.c). 1500 * 1501 * We also update the following backend-global variables: 1502 * TransactionXmin: the oldest xmin of any snapshot in use in the 1503 * current transaction (this is the same as MyPgXact->xmin). 1504 * RecentXmin: the xmin computed for the most recent snapshot. XIDs 1505 * older than this are known not running any more. 1506 * RecentGlobalXmin: the global xmin (oldest TransactionXmin across all 1507 * running transactions, except those running LAZY VACUUM). This is 1508 * the same computation done by 1509 * GetOldestXmin(NULL, PROCARRAY_FLAGS_VACUUM). 1510 * RecentGlobalDataXmin: the global xmin for non-catalog tables 1511 * >= RecentGlobalXmin 1512 * 1513 * Note: this function should probably not be called with an argument that's 1514 * not statically allocated (see xip allocation below). 1515 */ 1516 Snapshot 1517 GetSnapshotData(Snapshot snapshot) 1518 { 1519 ProcArrayStruct *arrayP = procArray; 1520 TransactionId xmin; 1521 TransactionId xmax; 1522 TransactionId globalxmin; 1523 int index; 1524 int count = 0; 1525 int subcount = 0; 1526 bool suboverflowed = false; 1527 volatile TransactionId replication_slot_xmin = InvalidTransactionId; 1528 volatile TransactionId replication_slot_catalog_xmin = InvalidTransactionId; 1529 1530 Assert(snapshot != NULL); 1531 1532 /* 1533 * Allocating space for maxProcs xids is usually overkill; numProcs would 1534 * be sufficient. But it seems better to do the malloc while not holding 1535 * the lock, so we can't look at numProcs. Likewise, we allocate much 1536 * more subxip storage than is probably needed. 1537 * 1538 * This does open a possibility for avoiding repeated malloc/free: since 1539 * maxProcs does not change at runtime, we can simply reuse the previous 1540 * xip arrays if any. (This relies on the fact that all callers pass 1541 * static SnapshotData structs.) 1542 */ 1543 if (snapshot->xip == NULL) 1544 { 1545 /* 1546 * First call for this snapshot. Snapshot is same size whether or not 1547 * we are in recovery, see later comments. 1548 */ 1549 snapshot->xip = (TransactionId *) 1550 malloc(GetMaxSnapshotXidCount() * sizeof(TransactionId)); 1551 if (snapshot->xip == NULL) 1552 ereport(ERROR, 1553 (errcode(ERRCODE_OUT_OF_MEMORY), 1554 errmsg("out of memory"))); 1555 Assert(snapshot->subxip == NULL); 1556 snapshot->subxip = (TransactionId *) 1557 malloc(GetMaxSnapshotSubxidCount() * sizeof(TransactionId)); 1558 if (snapshot->subxip == NULL) 1559 ereport(ERROR, 1560 (errcode(ERRCODE_OUT_OF_MEMORY), 1561 errmsg("out of memory"))); 1562 } 1563 1564 /* 1565 * It is sufficient to get shared lock on ProcArrayLock, even if we are 1566 * going to set MyPgXact->xmin. 1567 */ 1568 LWLockAcquire(ProcArrayLock, LW_SHARED); 1569 1570 /* xmax is always latestCompletedXid + 1 */ 1571 xmax = ShmemVariableCache->latestCompletedXid; 1572 Assert(TransactionIdIsNormal(xmax)); 1573 TransactionIdAdvance(xmax); 1574 1575 /* initialize xmin calculation with xmax */ 1576 globalxmin = xmin = xmax; 1577 1578 snapshot->takenDuringRecovery = RecoveryInProgress(); 1579 1580 if (!snapshot->takenDuringRecovery) 1581 { 1582 int *pgprocnos = arrayP->pgprocnos; 1583 int numProcs; 1584 1585 /* 1586 * Spin over procArray checking xid, xmin, and subxids. The goal is 1587 * to gather all active xids, find the lowest xmin, and try to record 1588 * subxids. 1589 */ 1590 numProcs = arrayP->numProcs; 1591 for (index = 0; index < numProcs; index++) 1592 { 1593 int pgprocno = pgprocnos[index]; 1594 volatile PGXACT *pgxact = &allPgXact[pgprocno]; 1595 TransactionId xid; 1596 1597 /* 1598 * Backend is doing logical decoding which manages xmin 1599 * separately, check below. 1600 */ 1601 if (pgxact->vacuumFlags & PROC_IN_LOGICAL_DECODING) 1602 continue; 1603 1604 /* Ignore procs running LAZY VACUUM */ 1605 if (pgxact->vacuumFlags & PROC_IN_VACUUM) 1606 continue; 1607 1608 /* Update globalxmin to be the smallest valid xmin */ 1609 xid = pgxact->xmin; /* fetch just once */ 1610 if (TransactionIdIsNormal(xid) && 1611 NormalTransactionIdPrecedes(xid, globalxmin)) 1612 globalxmin = xid; 1613 1614 /* Fetch xid just once - see GetNewTransactionId */ 1615 xid = pgxact->xid; 1616 1617 /* 1618 * If the transaction has no XID assigned, we can skip it; it 1619 * won't have sub-XIDs either. If the XID is >= xmax, we can also 1620 * skip it; such transactions will be treated as running anyway 1621 * (and any sub-XIDs will also be >= xmax). 1622 */ 1623 if (!TransactionIdIsNormal(xid) 1624 || !NormalTransactionIdPrecedes(xid, xmax)) 1625 continue; 1626 1627 /* 1628 * We don't include our own XIDs (if any) in the snapshot, but we 1629 * must include them in xmin. 1630 */ 1631 if (NormalTransactionIdPrecedes(xid, xmin)) 1632 xmin = xid; 1633 if (pgxact == MyPgXact) 1634 continue; 1635 1636 /* Add XID to snapshot. */ 1637 snapshot->xip[count++] = xid; 1638 1639 /* 1640 * Save subtransaction XIDs if possible (if we've already 1641 * overflowed, there's no point). Note that the subxact XIDs must 1642 * be later than their parent, so no need to check them against 1643 * xmin. We could filter against xmax, but it seems better not to 1644 * do that much work while holding the ProcArrayLock. 1645 * 1646 * The other backend can add more subxids concurrently, but cannot 1647 * remove any. Hence it's important to fetch nxids just once. 1648 * Should be safe to use memcpy, though. (We needn't worry about 1649 * missing any xids added concurrently, because they must postdate 1650 * xmax.) 1651 * 1652 * Again, our own XIDs are not included in the snapshot. 1653 */ 1654 if (!suboverflowed) 1655 { 1656 if (pgxact->overflowed) 1657 suboverflowed = true; 1658 else 1659 { 1660 int nxids = pgxact->nxids; 1661 1662 if (nxids > 0) 1663 { 1664 volatile PGPROC *proc = &allProcs[pgprocno]; 1665 1666 memcpy(snapshot->subxip + subcount, 1667 (void *) proc->subxids.xids, 1668 nxids * sizeof(TransactionId)); 1669 subcount += nxids; 1670 } 1671 } 1672 } 1673 } 1674 } 1675 else 1676 { 1677 /* 1678 * We're in hot standby, so get XIDs from KnownAssignedXids. 1679 * 1680 * We store all xids directly into subxip[]. Here's why: 1681 * 1682 * In recovery we don't know which xids are top-level and which are 1683 * subxacts, a design choice that greatly simplifies xid processing. 1684 * 1685 * It seems like we would want to try to put xids into xip[] only, but 1686 * that is fairly small. We would either need to make that bigger or 1687 * to increase the rate at which we WAL-log xid assignment; neither is 1688 * an appealing choice. 1689 * 1690 * We could try to store xids into xip[] first and then into subxip[] 1691 * if there are too many xids. That only works if the snapshot doesn't 1692 * overflow because we do not search subxip[] in that case. A simpler 1693 * way is to just store all xids in the subxact array because this is 1694 * by far the bigger array. We just leave the xip array empty. 1695 * 1696 * Either way we need to change the way XidInMVCCSnapshot() works 1697 * depending upon when the snapshot was taken, or change normal 1698 * snapshot processing so it matches. 1699 * 1700 * Note: It is possible for recovery to end before we finish taking 1701 * the snapshot, and for newly assigned transaction ids to be added to 1702 * the ProcArray. xmax cannot change while we hold ProcArrayLock, so 1703 * those newly added transaction ids would be filtered away, so we 1704 * need not be concerned about them. 1705 */ 1706 subcount = KnownAssignedXidsGetAndSetXmin(snapshot->subxip, &xmin, 1707 xmax); 1708 1709 if (TransactionIdPrecedesOrEquals(xmin, procArray->lastOverflowedXid)) 1710 suboverflowed = true; 1711 } 1712 1713 1714 /* fetch into volatile var while ProcArrayLock is held */ 1715 replication_slot_xmin = procArray->replication_slot_xmin; 1716 replication_slot_catalog_xmin = procArray->replication_slot_catalog_xmin; 1717 1718 if (!TransactionIdIsValid(MyPgXact->xmin)) 1719 MyPgXact->xmin = TransactionXmin = xmin; 1720 1721 LWLockRelease(ProcArrayLock); 1722 1723 /* 1724 * Update globalxmin to include actual process xids. This is a slightly 1725 * different way of computing it than GetOldestXmin uses, but should give 1726 * the same result. 1727 */ 1728 if (TransactionIdPrecedes(xmin, globalxmin)) 1729 globalxmin = xmin; 1730 1731 /* Update global variables too */ 1732 RecentGlobalXmin = globalxmin - vacuum_defer_cleanup_age; 1733 if (!TransactionIdIsNormal(RecentGlobalXmin)) 1734 RecentGlobalXmin = FirstNormalTransactionId; 1735 1736 /* Check whether there's a replication slot requiring an older xmin. */ 1737 if (TransactionIdIsValid(replication_slot_xmin) && 1738 NormalTransactionIdPrecedes(replication_slot_xmin, RecentGlobalXmin)) 1739 RecentGlobalXmin = replication_slot_xmin; 1740 1741 /* Non-catalog tables can be vacuumed if older than this xid */ 1742 RecentGlobalDataXmin = RecentGlobalXmin; 1743 1744 /* 1745 * Check whether there's a replication slot requiring an older catalog 1746 * xmin. 1747 */ 1748 if (TransactionIdIsNormal(replication_slot_catalog_xmin) && 1749 NormalTransactionIdPrecedes(replication_slot_catalog_xmin, RecentGlobalXmin)) 1750 RecentGlobalXmin = replication_slot_catalog_xmin; 1751 1752 RecentXmin = xmin; 1753 1754 snapshot->xmin = xmin; 1755 snapshot->xmax = xmax; 1756 snapshot->xcnt = count; 1757 snapshot->subxcnt = subcount; 1758 snapshot->suboverflowed = suboverflowed; 1759 1760 snapshot->curcid = GetCurrentCommandId(false); 1761 1762 /* 1763 * This is a new snapshot, so set both refcounts are zero, and mark it as 1764 * not copied in persistent memory. 1765 */ 1766 snapshot->active_count = 0; 1767 snapshot->regd_count = 0; 1768 snapshot->copied = false; 1769 1770 if (old_snapshot_threshold < 0) 1771 { 1772 /* 1773 * If not using "snapshot too old" feature, fill related fields with 1774 * dummy values that don't require any locking. 1775 */ 1776 snapshot->lsn = InvalidXLogRecPtr; 1777 snapshot->whenTaken = 0; 1778 } 1779 else 1780 { 1781 /* 1782 * Capture the current time and WAL stream location in case this 1783 * snapshot becomes old enough to need to fall back on the special 1784 * "old snapshot" logic. 1785 */ 1786 snapshot->lsn = GetXLogInsertRecPtr(); 1787 snapshot->whenTaken = GetSnapshotCurrentTimestamp(); 1788 MaintainOldSnapshotTimeMapping(snapshot->whenTaken, xmin); 1789 } 1790 1791 return snapshot; 1792 } 1793 1794 /* 1795 * ProcArrayInstallImportedXmin -- install imported xmin into MyPgXact->xmin 1796 * 1797 * This is called when installing a snapshot imported from another 1798 * transaction. To ensure that OldestXmin doesn't go backwards, we must 1799 * check that the source transaction is still running, and we'd better do 1800 * that atomically with installing the new xmin. 1801 * 1802 * Returns true if successful, false if source xact is no longer running. 1803 */ 1804 bool 1805 ProcArrayInstallImportedXmin(TransactionId xmin, 1806 VirtualTransactionId *sourcevxid) 1807 { 1808 bool result = false; 1809 ProcArrayStruct *arrayP = procArray; 1810 int index; 1811 1812 Assert(TransactionIdIsNormal(xmin)); 1813 if (!sourcevxid) 1814 return false; 1815 1816 /* Get lock so source xact can't end while we're doing this */ 1817 LWLockAcquire(ProcArrayLock, LW_SHARED); 1818 1819 for (index = 0; index < arrayP->numProcs; index++) 1820 { 1821 int pgprocno = arrayP->pgprocnos[index]; 1822 volatile PGPROC *proc = &allProcs[pgprocno]; 1823 volatile PGXACT *pgxact = &allPgXact[pgprocno]; 1824 TransactionId xid; 1825 1826 /* Ignore procs running LAZY VACUUM */ 1827 if (pgxact->vacuumFlags & PROC_IN_VACUUM) 1828 continue; 1829 1830 /* We are only interested in the specific virtual transaction. */ 1831 if (proc->backendId != sourcevxid->backendId) 1832 continue; 1833 if (proc->lxid != sourcevxid->localTransactionId) 1834 continue; 1835 1836 /* 1837 * We check the transaction's database ID for paranoia's sake: if it's 1838 * in another DB then its xmin does not cover us. Caller should have 1839 * detected this already, so we just treat any funny cases as 1840 * "transaction not found". 1841 */ 1842 if (proc->databaseId != MyDatabaseId) 1843 continue; 1844 1845 /* 1846 * Likewise, let's just make real sure its xmin does cover us. 1847 */ 1848 xid = pgxact->xmin; /* fetch just once */ 1849 if (!TransactionIdIsNormal(xid) || 1850 !TransactionIdPrecedesOrEquals(xid, xmin)) 1851 continue; 1852 1853 /* 1854 * We're good. Install the new xmin. As in GetSnapshotData, set 1855 * TransactionXmin too. (Note that because snapmgr.c called 1856 * GetSnapshotData first, we'll be overwriting a valid xmin here, so 1857 * we don't check that.) 1858 */ 1859 MyPgXact->xmin = TransactionXmin = xmin; 1860 1861 result = true; 1862 break; 1863 } 1864 1865 LWLockRelease(ProcArrayLock); 1866 1867 return result; 1868 } 1869 1870 /* 1871 * ProcArrayInstallRestoredXmin -- install restored xmin into MyPgXact->xmin 1872 * 1873 * This is like ProcArrayInstallImportedXmin, but we have a pointer to the 1874 * PGPROC of the transaction from which we imported the snapshot, rather than 1875 * an XID. 1876 * 1877 * Returns true if successful, false if source xact is no longer running. 1878 */ 1879 bool 1880 ProcArrayInstallRestoredXmin(TransactionId xmin, PGPROC *proc) 1881 { 1882 bool result = false; 1883 TransactionId xid; 1884 volatile PGXACT *pgxact; 1885 1886 Assert(TransactionIdIsNormal(xmin)); 1887 Assert(proc != NULL); 1888 1889 /* Get lock so source xact can't end while we're doing this */ 1890 LWLockAcquire(ProcArrayLock, LW_SHARED); 1891 1892 pgxact = &allPgXact[proc->pgprocno]; 1893 1894 /* 1895 * Be certain that the referenced PGPROC has an advertised xmin which is 1896 * no later than the one we're installing, so that the system-wide xmin 1897 * can't go backwards. Also, make sure it's running in the same database, 1898 * so that the per-database xmin cannot go backwards. 1899 */ 1900 xid = pgxact->xmin; /* fetch just once */ 1901 if (proc->databaseId == MyDatabaseId && 1902 TransactionIdIsNormal(xid) && 1903 TransactionIdPrecedesOrEquals(xid, xmin)) 1904 { 1905 MyPgXact->xmin = TransactionXmin = xmin; 1906 result = true; 1907 } 1908 1909 LWLockRelease(ProcArrayLock); 1910 1911 return result; 1912 } 1913 1914 /* 1915 * GetRunningTransactionData -- returns information about running transactions. 1916 * 1917 * Similar to GetSnapshotData but returns more information. We include 1918 * all PGXACTs with an assigned TransactionId, even VACUUM processes and 1919 * prepared transactions. 1920 * 1921 * We acquire XidGenLock and ProcArrayLock, but the caller is responsible for 1922 * releasing them. Acquiring XidGenLock ensures that no new XIDs enter the proc 1923 * array until the caller has WAL-logged this snapshot, and releases the 1924 * lock. Acquiring ProcArrayLock ensures that no transactions commit until the 1925 * lock is released. 1926 * 1927 * The returned data structure is statically allocated; caller should not 1928 * modify it, and must not assume it is valid past the next call. 1929 * 1930 * This is never executed during recovery so there is no need to look at 1931 * KnownAssignedXids. 1932 * 1933 * Dummy PGXACTs from prepared transaction are included, meaning that this 1934 * may return entries with duplicated TransactionId values coming from 1935 * transaction finishing to prepare. Nothing is done about duplicated 1936 * entries here to not hold on ProcArrayLock more than necessary. 1937 * 1938 * We don't worry about updating other counters, we want to keep this as 1939 * simple as possible and leave GetSnapshotData() as the primary code for 1940 * that bookkeeping. 1941 * 1942 * Note that if any transaction has overflowed its cached subtransactions 1943 * then there is no real need include any subtransactions. 1944 */ 1945 RunningTransactions 1946 GetRunningTransactionData(void) 1947 { 1948 /* result workspace */ 1949 static RunningTransactionsData CurrentRunningXactsData; 1950 1951 ProcArrayStruct *arrayP = procArray; 1952 RunningTransactions CurrentRunningXacts = &CurrentRunningXactsData; 1953 TransactionId latestCompletedXid; 1954 TransactionId oldestRunningXid; 1955 TransactionId *xids; 1956 int index; 1957 int count; 1958 int subcount; 1959 bool suboverflowed; 1960 1961 Assert(!RecoveryInProgress()); 1962 1963 /* 1964 * Allocating space for maxProcs xids is usually overkill; numProcs would 1965 * be sufficient. But it seems better to do the malloc while not holding 1966 * the lock, so we can't look at numProcs. Likewise, we allocate much 1967 * more subxip storage than is probably needed. 1968 * 1969 * Should only be allocated in bgwriter, since only ever executed during 1970 * checkpoints. 1971 */ 1972 if (CurrentRunningXacts->xids == NULL) 1973 { 1974 /* 1975 * First call 1976 */ 1977 CurrentRunningXacts->xids = (TransactionId *) 1978 malloc(TOTAL_MAX_CACHED_SUBXIDS * sizeof(TransactionId)); 1979 if (CurrentRunningXacts->xids == NULL) 1980 ereport(ERROR, 1981 (errcode(ERRCODE_OUT_OF_MEMORY), 1982 errmsg("out of memory"))); 1983 } 1984 1985 xids = CurrentRunningXacts->xids; 1986 1987 count = subcount = 0; 1988 suboverflowed = false; 1989 1990 /* 1991 * Ensure that no xids enter or leave the procarray while we obtain 1992 * snapshot. 1993 */ 1994 LWLockAcquire(ProcArrayLock, LW_SHARED); 1995 LWLockAcquire(XidGenLock, LW_SHARED); 1996 1997 latestCompletedXid = ShmemVariableCache->latestCompletedXid; 1998 1999 oldestRunningXid = ShmemVariableCache->nextXid; 2000 2001 /* 2002 * Spin over procArray collecting all xids 2003 */ 2004 for (index = 0; index < arrayP->numProcs; index++) 2005 { 2006 int pgprocno = arrayP->pgprocnos[index]; 2007 volatile PGXACT *pgxact = &allPgXact[pgprocno]; 2008 TransactionId xid; 2009 2010 /* Fetch xid just once - see GetNewTransactionId */ 2011 xid = pgxact->xid; 2012 2013 /* 2014 * We don't need to store transactions that don't have a TransactionId 2015 * yet because they will not show as running on a standby server. 2016 */ 2017 if (!TransactionIdIsValid(xid)) 2018 continue; 2019 2020 /* 2021 * Be careful not to exclude any xids before calculating the values of 2022 * oldestRunningXid and suboverflowed, since these are used to clean 2023 * up transaction information held on standbys. 2024 */ 2025 if (TransactionIdPrecedes(xid, oldestRunningXid)) 2026 oldestRunningXid = xid; 2027 2028 if (pgxact->overflowed) 2029 suboverflowed = true; 2030 2031 /* 2032 * If we wished to exclude xids this would be the right place for it. 2033 * Procs with the PROC_IN_VACUUM flag set don't usually assign xids, 2034 * but they do during truncation at the end when they get the lock and 2035 * truncate, so it is not much of a problem to include them if they 2036 * are seen and it is cleaner to include them. 2037 */ 2038 2039 xids[count++] = xid; 2040 } 2041 2042 /* 2043 * Spin over procArray collecting all subxids, but only if there hasn't 2044 * been a suboverflow. 2045 */ 2046 if (!suboverflowed) 2047 { 2048 for (index = 0; index < arrayP->numProcs; index++) 2049 { 2050 int pgprocno = arrayP->pgprocnos[index]; 2051 volatile PGPROC *proc = &allProcs[pgprocno]; 2052 volatile PGXACT *pgxact = &allPgXact[pgprocno]; 2053 int nxids; 2054 2055 /* 2056 * Save subtransaction XIDs. Other backends can't add or remove 2057 * entries while we're holding XidGenLock. 2058 */ 2059 nxids = pgxact->nxids; 2060 if (nxids > 0) 2061 { 2062 memcpy(&xids[count], (void *) proc->subxids.xids, 2063 nxids * sizeof(TransactionId)); 2064 count += nxids; 2065 subcount += nxids; 2066 2067 /* 2068 * Top-level XID of a transaction is always less than any of 2069 * its subxids, so we don't need to check if any of the 2070 * subxids are smaller than oldestRunningXid 2071 */ 2072 } 2073 } 2074 } 2075 2076 /* 2077 * It's important *not* to include the limits set by slots here because 2078 * snapbuild.c uses oldestRunningXid to manage its xmin horizon. If those 2079 * were to be included here the initial value could never increase because 2080 * of a circular dependency where slots only increase their limits when 2081 * running xacts increases oldestRunningXid and running xacts only 2082 * increases if slots do. 2083 */ 2084 2085 CurrentRunningXacts->xcnt = count - subcount; 2086 CurrentRunningXacts->subxcnt = subcount; 2087 CurrentRunningXacts->subxid_overflow = suboverflowed; 2088 CurrentRunningXacts->nextXid = ShmemVariableCache->nextXid; 2089 CurrentRunningXacts->oldestRunningXid = oldestRunningXid; 2090 CurrentRunningXacts->latestCompletedXid = latestCompletedXid; 2091 2092 Assert(TransactionIdIsValid(CurrentRunningXacts->nextXid)); 2093 Assert(TransactionIdIsValid(CurrentRunningXacts->oldestRunningXid)); 2094 Assert(TransactionIdIsNormal(CurrentRunningXacts->latestCompletedXid)); 2095 2096 /* We don't release the locks here, the caller is responsible for that */ 2097 2098 return CurrentRunningXacts; 2099 } 2100 2101 /* 2102 * GetOldestActiveTransactionId() 2103 * 2104 * Similar to GetSnapshotData but returns just oldestActiveXid. We include 2105 * all PGXACTs with an assigned TransactionId, even VACUUM processes. 2106 * We look at all databases, though there is no need to include WALSender 2107 * since this has no effect on hot standby conflicts. 2108 * 2109 * This is never executed during recovery so there is no need to look at 2110 * KnownAssignedXids. 2111 * 2112 * We don't worry about updating other counters, we want to keep this as 2113 * simple as possible and leave GetSnapshotData() as the primary code for 2114 * that bookkeeping. 2115 */ 2116 TransactionId 2117 GetOldestActiveTransactionId(void) 2118 { 2119 ProcArrayStruct *arrayP = procArray; 2120 TransactionId oldestRunningXid; 2121 int index; 2122 2123 Assert(!RecoveryInProgress()); 2124 2125 /* 2126 * Read nextXid, as the upper bound of what's still active. 2127 * 2128 * Reading a TransactionId is atomic, but we must grab the lock to make 2129 * sure that all XIDs < nextXid are already present in the proc array (or 2130 * have already completed), when we spin over it. 2131 */ 2132 LWLockAcquire(XidGenLock, LW_SHARED); 2133 oldestRunningXid = ShmemVariableCache->nextXid; 2134 LWLockRelease(XidGenLock); 2135 2136 /* 2137 * Spin over procArray collecting all xids and subxids. 2138 */ 2139 LWLockAcquire(ProcArrayLock, LW_SHARED); 2140 for (index = 0; index < arrayP->numProcs; index++) 2141 { 2142 int pgprocno = arrayP->pgprocnos[index]; 2143 volatile PGXACT *pgxact = &allPgXact[pgprocno]; 2144 TransactionId xid; 2145 2146 /* Fetch xid just once - see GetNewTransactionId */ 2147 xid = pgxact->xid; 2148 2149 if (!TransactionIdIsNormal(xid)) 2150 continue; 2151 2152 if (TransactionIdPrecedes(xid, oldestRunningXid)) 2153 oldestRunningXid = xid; 2154 2155 /* 2156 * Top-level XID of a transaction is always less than any of its 2157 * subxids, so we don't need to check if any of the subxids are 2158 * smaller than oldestRunningXid 2159 */ 2160 } 2161 LWLockRelease(ProcArrayLock); 2162 2163 return oldestRunningXid; 2164 } 2165 2166 /* 2167 * GetOldestSafeDecodingTransactionId -- lowest xid not affected by vacuum 2168 * 2169 * Returns the oldest xid that we can guarantee not to have been affected by 2170 * vacuum, i.e. no rows >= that xid have been vacuumed away unless the 2171 * transaction aborted. Note that the value can (and most of the time will) be 2172 * much more conservative than what really has been affected by vacuum, but we 2173 * currently don't have better data available. 2174 * 2175 * This is useful to initialize the cutoff xid after which a new changeset 2176 * extraction replication slot can start decoding changes. 2177 * 2178 * Must be called with ProcArrayLock held either shared or exclusively, 2179 * although most callers will want to use exclusive mode since it is expected 2180 * that the caller will immediately use the xid to peg the xmin horizon. 2181 */ 2182 TransactionId 2183 GetOldestSafeDecodingTransactionId(bool catalogOnly) 2184 { 2185 ProcArrayStruct *arrayP = procArray; 2186 TransactionId oldestSafeXid; 2187 int index; 2188 bool recovery_in_progress = RecoveryInProgress(); 2189 2190 Assert(LWLockHeldByMe(ProcArrayLock)); 2191 2192 /* 2193 * Acquire XidGenLock, so no transactions can acquire an xid while we're 2194 * running. If no transaction with xid were running concurrently a new xid 2195 * could influence the RecentXmin et al. 2196 * 2197 * We initialize the computation to nextXid since that's guaranteed to be 2198 * a safe, albeit pessimal, value. 2199 */ 2200 LWLockAcquire(XidGenLock, LW_SHARED); 2201 oldestSafeXid = ShmemVariableCache->nextXid; 2202 2203 /* 2204 * If there's already a slot pegging the xmin horizon, we can start with 2205 * that value, it's guaranteed to be safe since it's computed by this 2206 * routine initially and has been enforced since. We can always use the 2207 * slot's general xmin horizon, but the catalog horizon is only usable 2208 * when only catalog data is going to be looked at. 2209 */ 2210 if (TransactionIdIsValid(procArray->replication_slot_xmin) && 2211 TransactionIdPrecedes(procArray->replication_slot_xmin, 2212 oldestSafeXid)) 2213 oldestSafeXid = procArray->replication_slot_xmin; 2214 2215 if (catalogOnly && 2216 TransactionIdIsValid(procArray->replication_slot_catalog_xmin) && 2217 TransactionIdPrecedes(procArray->replication_slot_catalog_xmin, 2218 oldestSafeXid)) 2219 oldestSafeXid = procArray->replication_slot_catalog_xmin; 2220 2221 /* 2222 * If we're not in recovery, we walk over the procarray and collect the 2223 * lowest xid. Since we're called with ProcArrayLock held and have 2224 * acquired XidGenLock, no entries can vanish concurrently, since 2225 * PGXACT->xid is only set with XidGenLock held and only cleared with 2226 * ProcArrayLock held. 2227 * 2228 * In recovery we can't lower the safe value besides what we've computed 2229 * above, so we'll have to wait a bit longer there. We unfortunately can 2230 * *not* use KnownAssignedXidsGetOldestXmin() since the KnownAssignedXids 2231 * machinery can miss values and return an older value than is safe. 2232 */ 2233 if (!recovery_in_progress) 2234 { 2235 /* 2236 * Spin over procArray collecting all min(PGXACT->xid) 2237 */ 2238 for (index = 0; index < arrayP->numProcs; index++) 2239 { 2240 int pgprocno = arrayP->pgprocnos[index]; 2241 volatile PGXACT *pgxact = &allPgXact[pgprocno]; 2242 TransactionId xid; 2243 2244 /* Fetch xid just once - see GetNewTransactionId */ 2245 xid = pgxact->xid; 2246 2247 if (!TransactionIdIsNormal(xid)) 2248 continue; 2249 2250 if (TransactionIdPrecedes(xid, oldestSafeXid)) 2251 oldestSafeXid = xid; 2252 } 2253 } 2254 2255 LWLockRelease(XidGenLock); 2256 2257 return oldestSafeXid; 2258 } 2259 2260 /* 2261 * GetVirtualXIDsDelayingChkpt -- Get the VXIDs of transactions that are 2262 * delaying checkpoint because they have critical actions in progress. 2263 * 2264 * Constructs an array of VXIDs of transactions that are currently in commit 2265 * critical sections, as shown by having delayChkpt set in their PGXACT. 2266 * 2267 * Returns a palloc'd array that should be freed by the caller. 2268 * *nvxids is the number of valid entries. 2269 * 2270 * Note that because backends set or clear delayChkpt without holding any lock, 2271 * the result is somewhat indeterminate, but we don't really care. Even in 2272 * a multiprocessor with delayed writes to shared memory, it should be certain 2273 * that setting of delayChkpt will propagate to shared memory when the backend 2274 * takes a lock, so we cannot fail to see a virtual xact as delayChkpt if 2275 * it's already inserted its commit record. Whether it takes a little while 2276 * for clearing of delayChkpt to propagate is unimportant for correctness. 2277 */ 2278 VirtualTransactionId * 2279 GetVirtualXIDsDelayingChkpt(int *nvxids) 2280 { 2281 VirtualTransactionId *vxids; 2282 ProcArrayStruct *arrayP = procArray; 2283 int count = 0; 2284 int index; 2285 2286 /* allocate what's certainly enough result space */ 2287 vxids = (VirtualTransactionId *) 2288 palloc(sizeof(VirtualTransactionId) * arrayP->maxProcs); 2289 2290 LWLockAcquire(ProcArrayLock, LW_SHARED); 2291 2292 for (index = 0; index < arrayP->numProcs; index++) 2293 { 2294 int pgprocno = arrayP->pgprocnos[index]; 2295 volatile PGPROC *proc = &allProcs[pgprocno]; 2296 volatile PGXACT *pgxact = &allPgXact[pgprocno]; 2297 2298 if (pgxact->delayChkpt) 2299 { 2300 VirtualTransactionId vxid; 2301 2302 GET_VXID_FROM_PGPROC(vxid, *proc); 2303 if (VirtualTransactionIdIsValid(vxid)) 2304 vxids[count++] = vxid; 2305 } 2306 } 2307 2308 LWLockRelease(ProcArrayLock); 2309 2310 *nvxids = count; 2311 return vxids; 2312 } 2313 2314 /* 2315 * HaveVirtualXIDsDelayingChkpt -- Are any of the specified VXIDs delaying? 2316 * 2317 * This is used with the results of GetVirtualXIDsDelayingChkpt to see if any 2318 * of the specified VXIDs are still in critical sections of code. 2319 * 2320 * Note: this is O(N^2) in the number of vxacts that are/were delaying, but 2321 * those numbers should be small enough for it not to be a problem. 2322 */ 2323 bool 2324 HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids) 2325 { 2326 bool result = false; 2327 ProcArrayStruct *arrayP = procArray; 2328 int index; 2329 2330 LWLockAcquire(ProcArrayLock, LW_SHARED); 2331 2332 for (index = 0; index < arrayP->numProcs; index++) 2333 { 2334 int pgprocno = arrayP->pgprocnos[index]; 2335 volatile PGPROC *proc = &allProcs[pgprocno]; 2336 volatile PGXACT *pgxact = &allPgXact[pgprocno]; 2337 VirtualTransactionId vxid; 2338 2339 GET_VXID_FROM_PGPROC(vxid, *proc); 2340 2341 if (pgxact->delayChkpt && VirtualTransactionIdIsValid(vxid)) 2342 { 2343 int i; 2344 2345 for (i = 0; i < nvxids; i++) 2346 { 2347 if (VirtualTransactionIdEquals(vxid, vxids[i])) 2348 { 2349 result = true; 2350 break; 2351 } 2352 } 2353 if (result) 2354 break; 2355 } 2356 } 2357 2358 LWLockRelease(ProcArrayLock); 2359 2360 return result; 2361 } 2362 2363 /* 2364 * BackendPidGetProc -- get a backend's PGPROC given its PID 2365 * 2366 * Returns NULL if not found. Note that it is up to the caller to be 2367 * sure that the question remains meaningful for long enough for the 2368 * answer to be used ... 2369 */ 2370 PGPROC * 2371 BackendPidGetProc(int pid) 2372 { 2373 PGPROC *result; 2374 2375 if (pid == 0) /* never match dummy PGPROCs */ 2376 return NULL; 2377 2378 LWLockAcquire(ProcArrayLock, LW_SHARED); 2379 2380 result = BackendPidGetProcWithLock(pid); 2381 2382 LWLockRelease(ProcArrayLock); 2383 2384 return result; 2385 } 2386 2387 /* 2388 * BackendPidGetProcWithLock -- get a backend's PGPROC given its PID 2389 * 2390 * Same as above, except caller must be holding ProcArrayLock. The found 2391 * entry, if any, can be assumed to be valid as long as the lock remains held. 2392 */ 2393 PGPROC * 2394 BackendPidGetProcWithLock(int pid) 2395 { 2396 PGPROC *result = NULL; 2397 ProcArrayStruct *arrayP = procArray; 2398 int index; 2399 2400 if (pid == 0) /* never match dummy PGPROCs */ 2401 return NULL; 2402 2403 for (index = 0; index < arrayP->numProcs; index++) 2404 { 2405 PGPROC *proc = &allProcs[arrayP->pgprocnos[index]]; 2406 2407 if (proc->pid == pid) 2408 { 2409 result = proc; 2410 break; 2411 } 2412 } 2413 2414 return result; 2415 } 2416 2417 /* 2418 * BackendXidGetPid -- get a backend's pid given its XID 2419 * 2420 * Returns 0 if not found or it's a prepared transaction. Note that 2421 * it is up to the caller to be sure that the question remains 2422 * meaningful for long enough for the answer to be used ... 2423 * 2424 * Only main transaction Ids are considered. This function is mainly 2425 * useful for determining what backend owns a lock. 2426 * 2427 * Beware that not every xact has an XID assigned. However, as long as you 2428 * only call this using an XID found on disk, you're safe. 2429 */ 2430 int 2431 BackendXidGetPid(TransactionId xid) 2432 { 2433 int result = 0; 2434 ProcArrayStruct *arrayP = procArray; 2435 int index; 2436 2437 if (xid == InvalidTransactionId) /* never match invalid xid */ 2438 return 0; 2439 2440 LWLockAcquire(ProcArrayLock, LW_SHARED); 2441 2442 for (index = 0; index < arrayP->numProcs; index++) 2443 { 2444 int pgprocno = arrayP->pgprocnos[index]; 2445 volatile PGPROC *proc = &allProcs[pgprocno]; 2446 volatile PGXACT *pgxact = &allPgXact[pgprocno]; 2447 2448 if (pgxact->xid == xid) 2449 { 2450 result = proc->pid; 2451 break; 2452 } 2453 } 2454 2455 LWLockRelease(ProcArrayLock); 2456 2457 return result; 2458 } 2459 2460 /* 2461 * IsBackendPid -- is a given pid a running backend 2462 * 2463 * This is not called by the backend, but is called by external modules. 2464 */ 2465 bool 2466 IsBackendPid(int pid) 2467 { 2468 return (BackendPidGetProc(pid) != NULL); 2469 } 2470 2471 2472 /* 2473 * GetCurrentVirtualXIDs -- returns an array of currently active VXIDs. 2474 * 2475 * The array is palloc'd. The number of valid entries is returned into *nvxids. 2476 * 2477 * The arguments allow filtering the set of VXIDs returned. Our own process 2478 * is always skipped. In addition: 2479 * If limitXmin is not InvalidTransactionId, skip processes with 2480 * xmin > limitXmin. 2481 * If excludeXmin0 is true, skip processes with xmin = 0. 2482 * If allDbs is false, skip processes attached to other databases. 2483 * If excludeVacuum isn't zero, skip processes for which 2484 * (vacuumFlags & excludeVacuum) is not zero. 2485 * 2486 * Note: the purpose of the limitXmin and excludeXmin0 parameters is to 2487 * allow skipping backends whose oldest live snapshot is no older than 2488 * some snapshot we have. Since we examine the procarray with only shared 2489 * lock, there are race conditions: a backend could set its xmin just after 2490 * we look. Indeed, on multiprocessors with weak memory ordering, the 2491 * other backend could have set its xmin *before* we look. We know however 2492 * that such a backend must have held shared ProcArrayLock overlapping our 2493 * own hold of ProcArrayLock, else we would see its xmin update. Therefore, 2494 * any snapshot the other backend is taking concurrently with our scan cannot 2495 * consider any transactions as still running that we think are committed 2496 * (since backends must hold ProcArrayLock exclusive to commit). 2497 */ 2498 VirtualTransactionId * 2499 GetCurrentVirtualXIDs(TransactionId limitXmin, bool excludeXmin0, 2500 bool allDbs, int excludeVacuum, 2501 int *nvxids) 2502 { 2503 VirtualTransactionId *vxids; 2504 ProcArrayStruct *arrayP = procArray; 2505 int count = 0; 2506 int index; 2507 2508 /* allocate what's certainly enough result space */ 2509 vxids = (VirtualTransactionId *) 2510 palloc(sizeof(VirtualTransactionId) * arrayP->maxProcs); 2511 2512 LWLockAcquire(ProcArrayLock, LW_SHARED); 2513 2514 for (index = 0; index < arrayP->numProcs; index++) 2515 { 2516 int pgprocno = arrayP->pgprocnos[index]; 2517 volatile PGPROC *proc = &allProcs[pgprocno]; 2518 volatile PGXACT *pgxact = &allPgXact[pgprocno]; 2519 2520 if (proc == MyProc) 2521 continue; 2522 2523 if (excludeVacuum & pgxact->vacuumFlags) 2524 continue; 2525 2526 if (allDbs || proc->databaseId == MyDatabaseId) 2527 { 2528 /* Fetch xmin just once - might change on us */ 2529 TransactionId pxmin = pgxact->xmin; 2530 2531 if (excludeXmin0 && !TransactionIdIsValid(pxmin)) 2532 continue; 2533 2534 /* 2535 * InvalidTransactionId precedes all other XIDs, so a proc that 2536 * hasn't set xmin yet will not be rejected by this test. 2537 */ 2538 if (!TransactionIdIsValid(limitXmin) || 2539 TransactionIdPrecedesOrEquals(pxmin, limitXmin)) 2540 { 2541 VirtualTransactionId vxid; 2542 2543 GET_VXID_FROM_PGPROC(vxid, *proc); 2544 if (VirtualTransactionIdIsValid(vxid)) 2545 vxids[count++] = vxid; 2546 } 2547 } 2548 } 2549 2550 LWLockRelease(ProcArrayLock); 2551 2552 *nvxids = count; 2553 return vxids; 2554 } 2555 2556 /* 2557 * GetConflictingVirtualXIDs -- returns an array of currently active VXIDs. 2558 * 2559 * Usage is limited to conflict resolution during recovery on standby servers. 2560 * limitXmin is supplied as either latestRemovedXid, or InvalidTransactionId 2561 * in cases where we cannot accurately determine a value for latestRemovedXid. 2562 * 2563 * If limitXmin is InvalidTransactionId then we want to kill everybody, 2564 * so we're not worried if they have a snapshot or not, nor does it really 2565 * matter what type of lock we hold. 2566 * 2567 * All callers that are checking xmins always now supply a valid and useful 2568 * value for limitXmin. The limitXmin is always lower than the lowest 2569 * numbered KnownAssignedXid that is not already a FATAL error. This is 2570 * because we only care about cleanup records that are cleaning up tuple 2571 * versions from committed transactions. In that case they will only occur 2572 * at the point where the record is less than the lowest running xid. That 2573 * allows us to say that if any backend takes a snapshot concurrently with 2574 * us then the conflict assessment made here would never include the snapshot 2575 * that is being derived. So we take LW_SHARED on the ProcArray and allow 2576 * concurrent snapshots when limitXmin is valid. We might think about adding 2577 * Assert(limitXmin < lowest(KnownAssignedXids)) 2578 * but that would not be true in the case of FATAL errors lagging in array, 2579 * but we already know those are bogus anyway, so we skip that test. 2580 * 2581 * If dbOid is valid we skip backends attached to other databases. 2582 * 2583 * Be careful to *not* pfree the result from this function. We reuse 2584 * this array sufficiently often that we use malloc for the result. 2585 */ 2586 VirtualTransactionId * 2587 GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid) 2588 { 2589 static VirtualTransactionId *vxids; 2590 ProcArrayStruct *arrayP = procArray; 2591 int count = 0; 2592 int index; 2593 2594 /* 2595 * If first time through, get workspace to remember main XIDs in. We 2596 * malloc it permanently to avoid repeated palloc/pfree overhead. Allow 2597 * result space, remembering room for a terminator. 2598 */ 2599 if (vxids == NULL) 2600 { 2601 vxids = (VirtualTransactionId *) 2602 malloc(sizeof(VirtualTransactionId) * (arrayP->maxProcs + 1)); 2603 if (vxids == NULL) 2604 ereport(ERROR, 2605 (errcode(ERRCODE_OUT_OF_MEMORY), 2606 errmsg("out of memory"))); 2607 } 2608 2609 LWLockAcquire(ProcArrayLock, LW_SHARED); 2610 2611 for (index = 0; index < arrayP->numProcs; index++) 2612 { 2613 int pgprocno = arrayP->pgprocnos[index]; 2614 volatile PGPROC *proc = &allProcs[pgprocno]; 2615 volatile PGXACT *pgxact = &allPgXact[pgprocno]; 2616 2617 /* Exclude prepared transactions */ 2618 if (proc->pid == 0) 2619 continue; 2620 2621 if (!OidIsValid(dbOid) || 2622 proc->databaseId == dbOid) 2623 { 2624 /* Fetch xmin just once - can't change on us, but good coding */ 2625 TransactionId pxmin = pgxact->xmin; 2626 2627 /* 2628 * We ignore an invalid pxmin because this means that backend has 2629 * no snapshot currently. We hold a Share lock to avoid contention 2630 * with users taking snapshots. That is not a problem because the 2631 * current xmin is always at least one higher than the latest 2632 * removed xid, so any new snapshot would never conflict with the 2633 * test here. 2634 */ 2635 if (!TransactionIdIsValid(limitXmin) || 2636 (TransactionIdIsValid(pxmin) && !TransactionIdFollows(pxmin, limitXmin))) 2637 { 2638 VirtualTransactionId vxid; 2639 2640 GET_VXID_FROM_PGPROC(vxid, *proc); 2641 if (VirtualTransactionIdIsValid(vxid)) 2642 vxids[count++] = vxid; 2643 } 2644 } 2645 } 2646 2647 LWLockRelease(ProcArrayLock); 2648 2649 /* add the terminator */ 2650 vxids[count].backendId = InvalidBackendId; 2651 vxids[count].localTransactionId = InvalidLocalTransactionId; 2652 2653 return vxids; 2654 } 2655 2656 /* 2657 * CancelVirtualTransaction - used in recovery conflict processing 2658 * 2659 * Returns pid of the process signaled, or 0 if not found. 2660 */ 2661 pid_t 2662 CancelVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode) 2663 { 2664 return SignalVirtualTransaction(vxid, sigmode, true); 2665 } 2666 2667 pid_t 2668 SignalVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode, 2669 bool conflictPending) 2670 { 2671 ProcArrayStruct *arrayP = procArray; 2672 int index; 2673 pid_t pid = 0; 2674 2675 LWLockAcquire(ProcArrayLock, LW_SHARED); 2676 2677 for (index = 0; index < arrayP->numProcs; index++) 2678 { 2679 int pgprocno = arrayP->pgprocnos[index]; 2680 volatile PGPROC *proc = &allProcs[pgprocno]; 2681 VirtualTransactionId procvxid; 2682 2683 GET_VXID_FROM_PGPROC(procvxid, *proc); 2684 2685 if (procvxid.backendId == vxid.backendId && 2686 procvxid.localTransactionId == vxid.localTransactionId) 2687 { 2688 proc->recoveryConflictPending = conflictPending; 2689 pid = proc->pid; 2690 if (pid != 0) 2691 { 2692 /* 2693 * Kill the pid if it's still here. If not, that's what we 2694 * wanted so ignore any errors. 2695 */ 2696 (void) SendProcSignal(pid, sigmode, vxid.backendId); 2697 } 2698 break; 2699 } 2700 } 2701 2702 LWLockRelease(ProcArrayLock); 2703 2704 return pid; 2705 } 2706 2707 /* 2708 * MinimumActiveBackends --- count backends (other than myself) that are 2709 * in active transactions. Return true if the count exceeds the 2710 * minimum threshold passed. This is used as a heuristic to decide if 2711 * a pre-XLOG-flush delay is worthwhile during commit. 2712 * 2713 * Do not count backends that are blocked waiting for locks, since they are 2714 * not going to get to run until someone else commits. 2715 */ 2716 bool 2717 MinimumActiveBackends(int min) 2718 { 2719 ProcArrayStruct *arrayP = procArray; 2720 int count = 0; 2721 int index; 2722 2723 /* Quick short-circuit if no minimum is specified */ 2724 if (min == 0) 2725 return true; 2726 2727 /* 2728 * Note: for speed, we don't acquire ProcArrayLock. This is a little bit 2729 * bogus, but since we are only testing fields for zero or nonzero, it 2730 * should be OK. The result is only used for heuristic purposes anyway... 2731 */ 2732 for (index = 0; index < arrayP->numProcs; index++) 2733 { 2734 int pgprocno = arrayP->pgprocnos[index]; 2735 volatile PGPROC *proc = &allProcs[pgprocno]; 2736 volatile PGXACT *pgxact = &allPgXact[pgprocno]; 2737 2738 /* 2739 * Since we're not holding a lock, need to be prepared to deal with 2740 * garbage, as someone could have incremented numProcs but not yet 2741 * filled the structure. 2742 * 2743 * If someone just decremented numProcs, 'proc' could also point to a 2744 * PGPROC entry that's no longer in the array. It still points to a 2745 * PGPROC struct, though, because freed PGPROC entries just go to the 2746 * free list and are recycled. Its contents are nonsense in that case, 2747 * but that's acceptable for this function. 2748 */ 2749 if (pgprocno == -1) 2750 continue; /* do not count deleted entries */ 2751 if (proc == MyProc) 2752 continue; /* do not count myself */ 2753 if (pgxact->xid == InvalidTransactionId) 2754 continue; /* do not count if no XID assigned */ 2755 if (proc->pid == 0) 2756 continue; /* do not count prepared xacts */ 2757 if (proc->waitLock != NULL) 2758 continue; /* do not count if blocked on a lock */ 2759 count++; 2760 if (count >= min) 2761 break; 2762 } 2763 2764 return count >= min; 2765 } 2766 2767 /* 2768 * CountDBBackends --- count backends that are using specified database 2769 */ 2770 int 2771 CountDBBackends(Oid databaseid) 2772 { 2773 ProcArrayStruct *arrayP = procArray; 2774 int count = 0; 2775 int index; 2776 2777 LWLockAcquire(ProcArrayLock, LW_SHARED); 2778 2779 for (index = 0; index < arrayP->numProcs; index++) 2780 { 2781 int pgprocno = arrayP->pgprocnos[index]; 2782 volatile PGPROC *proc = &allProcs[pgprocno]; 2783 2784 if (proc->pid == 0) 2785 continue; /* do not count prepared xacts */ 2786 if (!OidIsValid(databaseid) || 2787 proc->databaseId == databaseid) 2788 count++; 2789 } 2790 2791 LWLockRelease(ProcArrayLock); 2792 2793 return count; 2794 } 2795 2796 /* 2797 * CountDBConnections --- counts database backends ignoring any background 2798 * worker processes 2799 */ 2800 int 2801 CountDBConnections(Oid databaseid) 2802 { 2803 ProcArrayStruct *arrayP = procArray; 2804 int count = 0; 2805 int index; 2806 2807 LWLockAcquire(ProcArrayLock, LW_SHARED); 2808 2809 for (index = 0; index < arrayP->numProcs; index++) 2810 { 2811 int pgprocno = arrayP->pgprocnos[index]; 2812 volatile PGPROC *proc = &allProcs[pgprocno]; 2813 2814 if (proc->pid == 0) 2815 continue; /* do not count prepared xacts */ 2816 if (proc->isBackgroundWorker) 2817 continue; /* do not count background workers */ 2818 if (!OidIsValid(databaseid) || 2819 proc->databaseId == databaseid) 2820 count++; 2821 } 2822 2823 LWLockRelease(ProcArrayLock); 2824 2825 return count; 2826 } 2827 2828 /* 2829 * CancelDBBackends --- cancel backends that are using specified database 2830 */ 2831 void 2832 CancelDBBackends(Oid databaseid, ProcSignalReason sigmode, bool conflictPending) 2833 { 2834 ProcArrayStruct *arrayP = procArray; 2835 int index; 2836 pid_t pid = 0; 2837 2838 /* tell all backends to die */ 2839 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); 2840 2841 for (index = 0; index < arrayP->numProcs; index++) 2842 { 2843 int pgprocno = arrayP->pgprocnos[index]; 2844 volatile PGPROC *proc = &allProcs[pgprocno]; 2845 2846 if (databaseid == InvalidOid || proc->databaseId == databaseid) 2847 { 2848 VirtualTransactionId procvxid; 2849 2850 GET_VXID_FROM_PGPROC(procvxid, *proc); 2851 2852 proc->recoveryConflictPending = conflictPending; 2853 pid = proc->pid; 2854 if (pid != 0) 2855 { 2856 /* 2857 * Kill the pid if it's still here. If not, that's what we 2858 * wanted so ignore any errors. 2859 */ 2860 (void) SendProcSignal(pid, sigmode, procvxid.backendId); 2861 } 2862 } 2863 } 2864 2865 LWLockRelease(ProcArrayLock); 2866 } 2867 2868 /* 2869 * CountUserBackends --- count backends that are used by specified user 2870 */ 2871 int 2872 CountUserBackends(Oid roleid) 2873 { 2874 ProcArrayStruct *arrayP = procArray; 2875 int count = 0; 2876 int index; 2877 2878 LWLockAcquire(ProcArrayLock, LW_SHARED); 2879 2880 for (index = 0; index < arrayP->numProcs; index++) 2881 { 2882 int pgprocno = arrayP->pgprocnos[index]; 2883 volatile PGPROC *proc = &allProcs[pgprocno]; 2884 2885 if (proc->pid == 0) 2886 continue; /* do not count prepared xacts */ 2887 if (proc->isBackgroundWorker) 2888 continue; /* do not count background workers */ 2889 if (proc->roleId == roleid) 2890 count++; 2891 } 2892 2893 LWLockRelease(ProcArrayLock); 2894 2895 return count; 2896 } 2897 2898 /* 2899 * CountOtherDBBackends -- check for other backends running in the given DB 2900 * 2901 * If there are other backends in the DB, we will wait a maximum of 5 seconds 2902 * for them to exit. Autovacuum backends are encouraged to exit early by 2903 * sending them SIGTERM, but normal user backends are just waited for. 2904 * 2905 * The current backend is always ignored; it is caller's responsibility to 2906 * check whether the current backend uses the given DB, if it's important. 2907 * 2908 * Returns true if there are (still) other backends in the DB, false if not. 2909 * Also, *nbackends and *nprepared are set to the number of other backends 2910 * and prepared transactions in the DB, respectively. 2911 * 2912 * This function is used to interlock DROP DATABASE and related commands 2913 * against there being any active backends in the target DB --- dropping the 2914 * DB while active backends remain would be a Bad Thing. Note that we cannot 2915 * detect here the possibility of a newly-started backend that is trying to 2916 * connect to the doomed database, so additional interlocking is needed during 2917 * backend startup. The caller should normally hold an exclusive lock on the 2918 * target DB before calling this, which is one reason we mustn't wait 2919 * indefinitely. 2920 */ 2921 bool 2922 CountOtherDBBackends(Oid databaseId, int *nbackends, int *nprepared) 2923 { 2924 ProcArrayStruct *arrayP = procArray; 2925 2926 #define MAXAUTOVACPIDS 10 /* max autovacs to SIGTERM per iteration */ 2927 int autovac_pids[MAXAUTOVACPIDS]; 2928 int tries; 2929 2930 /* 50 tries with 100ms sleep between tries makes 5 sec total wait */ 2931 for (tries = 0; tries < 50; tries++) 2932 { 2933 int nautovacs = 0; 2934 bool found = false; 2935 int index; 2936 2937 CHECK_FOR_INTERRUPTS(); 2938 2939 *nbackends = *nprepared = 0; 2940 2941 LWLockAcquire(ProcArrayLock, LW_SHARED); 2942 2943 for (index = 0; index < arrayP->numProcs; index++) 2944 { 2945 int pgprocno = arrayP->pgprocnos[index]; 2946 volatile PGPROC *proc = &allProcs[pgprocno]; 2947 volatile PGXACT *pgxact = &allPgXact[pgprocno]; 2948 2949 if (proc->databaseId != databaseId) 2950 continue; 2951 if (proc == MyProc) 2952 continue; 2953 2954 found = true; 2955 2956 if (proc->pid == 0) 2957 (*nprepared)++; 2958 else 2959 { 2960 (*nbackends)++; 2961 if ((pgxact->vacuumFlags & PROC_IS_AUTOVACUUM) && 2962 nautovacs < MAXAUTOVACPIDS) 2963 autovac_pids[nautovacs++] = proc->pid; 2964 } 2965 } 2966 2967 LWLockRelease(ProcArrayLock); 2968 2969 if (!found) 2970 return false; /* no conflicting backends, so done */ 2971 2972 /* 2973 * Send SIGTERM to any conflicting autovacuums before sleeping. We 2974 * postpone this step until after the loop because we don't want to 2975 * hold ProcArrayLock while issuing kill(). We have no idea what might 2976 * block kill() inside the kernel... 2977 */ 2978 for (index = 0; index < nautovacs; index++) 2979 (void) kill(autovac_pids[index], SIGTERM); /* ignore any error */ 2980 2981 /* sleep, then try again */ 2982 pg_usleep(100 * 1000L); /* 100ms */ 2983 } 2984 2985 return true; /* timed out, still conflicts */ 2986 } 2987 2988 /* 2989 * ProcArraySetReplicationSlotXmin 2990 * 2991 * Install limits to future computations of the xmin horizon to prevent vacuum 2992 * and HOT pruning from removing affected rows still needed by clients with 2993 * replication slots. 2994 */ 2995 void 2996 ProcArraySetReplicationSlotXmin(TransactionId xmin, TransactionId catalog_xmin, 2997 bool already_locked) 2998 { 2999 Assert(!already_locked || LWLockHeldByMe(ProcArrayLock)); 3000 3001 if (!already_locked) 3002 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); 3003 3004 procArray->replication_slot_xmin = xmin; 3005 procArray->replication_slot_catalog_xmin = catalog_xmin; 3006 3007 if (!already_locked) 3008 LWLockRelease(ProcArrayLock); 3009 } 3010 3011 /* 3012 * ProcArrayGetReplicationSlotXmin 3013 * 3014 * Return the current slot xmin limits. That's useful to be able to remove 3015 * data that's older than those limits. 3016 */ 3017 void 3018 ProcArrayGetReplicationSlotXmin(TransactionId *xmin, 3019 TransactionId *catalog_xmin) 3020 { 3021 LWLockAcquire(ProcArrayLock, LW_SHARED); 3022 3023 if (xmin != NULL) 3024 *xmin = procArray->replication_slot_xmin; 3025 3026 if (catalog_xmin != NULL) 3027 *catalog_xmin = procArray->replication_slot_catalog_xmin; 3028 3029 LWLockRelease(ProcArrayLock); 3030 } 3031 3032 3033 #define XidCacheRemove(i) \ 3034 do { \ 3035 MyProc->subxids.xids[i] = MyProc->subxids.xids[MyPgXact->nxids - 1]; \ 3036 MyPgXact->nxids--; \ 3037 } while (0) 3038 3039 /* 3040 * XidCacheRemoveRunningXids 3041 * 3042 * Remove a bunch of TransactionIds from the list of known-running 3043 * subtransactions for my backend. Both the specified xid and those in 3044 * the xids[] array (of length nxids) are removed from the subxids cache. 3045 * latestXid must be the latest XID among the group. 3046 */ 3047 void 3048 XidCacheRemoveRunningXids(TransactionId xid, 3049 int nxids, const TransactionId *xids, 3050 TransactionId latestXid) 3051 { 3052 int i, 3053 j; 3054 3055 Assert(TransactionIdIsValid(xid)); 3056 3057 /* 3058 * We must hold ProcArrayLock exclusively in order to remove transactions 3059 * from the PGPROC array. (See src/backend/access/transam/README.) It's 3060 * possible this could be relaxed since we know this routine is only used 3061 * to abort subtransactions, but pending closer analysis we'd best be 3062 * conservative. 3063 */ 3064 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); 3065 3066 /* 3067 * Under normal circumstances xid and xids[] will be in increasing order, 3068 * as will be the entries in subxids. Scan backwards to avoid O(N^2) 3069 * behavior when removing a lot of xids. 3070 */ 3071 for (i = nxids - 1; i >= 0; i--) 3072 { 3073 TransactionId anxid = xids[i]; 3074 3075 for (j = MyPgXact->nxids - 1; j >= 0; j--) 3076 { 3077 if (TransactionIdEquals(MyProc->subxids.xids[j], anxid)) 3078 { 3079 XidCacheRemove(j); 3080 break; 3081 } 3082 } 3083 3084 /* 3085 * Ordinarily we should have found it, unless the cache has 3086 * overflowed. However it's also possible for this routine to be 3087 * invoked multiple times for the same subtransaction, in case of an 3088 * error during AbortSubTransaction. So instead of Assert, emit a 3089 * debug warning. 3090 */ 3091 if (j < 0 && !MyPgXact->overflowed) 3092 elog(WARNING, "did not find subXID %u in MyProc", anxid); 3093 } 3094 3095 for (j = MyPgXact->nxids - 1; j >= 0; j--) 3096 { 3097 if (TransactionIdEquals(MyProc->subxids.xids[j], xid)) 3098 { 3099 XidCacheRemove(j); 3100 break; 3101 } 3102 } 3103 /* Ordinarily we should have found it, unless the cache has overflowed */ 3104 if (j < 0 && !MyPgXact->overflowed) 3105 elog(WARNING, "did not find subXID %u in MyProc", xid); 3106 3107 /* Also advance global latestCompletedXid while holding the lock */ 3108 if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid, 3109 latestXid)) 3110 ShmemVariableCache->latestCompletedXid = latestXid; 3111 3112 LWLockRelease(ProcArrayLock); 3113 } 3114 3115 #ifdef XIDCACHE_DEBUG 3116 3117 /* 3118 * Print stats about effectiveness of XID cache 3119 */ 3120 static void 3121 DisplayXidCache(void) 3122 { 3123 fprintf(stderr, 3124 "XidCache: xmin: %ld, known: %ld, myxact: %ld, latest: %ld, mainxid: %ld, childxid: %ld, knownassigned: %ld, nooflo: %ld, slow: %ld\n", 3125 xc_by_recent_xmin, 3126 xc_by_known_xact, 3127 xc_by_my_xact, 3128 xc_by_latest_xid, 3129 xc_by_main_xid, 3130 xc_by_child_xid, 3131 xc_by_known_assigned, 3132 xc_no_overflow, 3133 xc_slow_answer); 3134 } 3135 #endif /* XIDCACHE_DEBUG */ 3136 3137 3138 /* ---------------------------------------------- 3139 * KnownAssignedTransactions sub-module 3140 * ---------------------------------------------- 3141 */ 3142 3143 /* 3144 * In Hot Standby mode, we maintain a list of transactions that are (or were) 3145 * running in the master at the current point in WAL. These XIDs must be 3146 * treated as running by standby transactions, even though they are not in 3147 * the standby server's PGXACT array. 3148 * 3149 * We record all XIDs that we know have been assigned. That includes all the 3150 * XIDs seen in WAL records, plus all unobserved XIDs that we can deduce have 3151 * been assigned. We can deduce the existence of unobserved XIDs because we 3152 * know XIDs are assigned in sequence, with no gaps. The KnownAssignedXids 3153 * list expands as new XIDs are observed or inferred, and contracts when 3154 * transaction completion records arrive. 3155 * 3156 * During hot standby we do not fret too much about the distinction between 3157 * top-level XIDs and subtransaction XIDs. We store both together in the 3158 * KnownAssignedXids list. In backends, this is copied into snapshots in 3159 * GetSnapshotData(), taking advantage of the fact that XidInMVCCSnapshot() 3160 * doesn't care about the distinction either. Subtransaction XIDs are 3161 * effectively treated as top-level XIDs and in the typical case pg_subtrans 3162 * links are *not* maintained (which does not affect visibility). 3163 * 3164 * We have room in KnownAssignedXids and in snapshots to hold maxProcs * 3165 * (1 + PGPROC_MAX_CACHED_SUBXIDS) XIDs, so every master transaction must 3166 * report its subtransaction XIDs in a WAL XLOG_XACT_ASSIGNMENT record at 3167 * least every PGPROC_MAX_CACHED_SUBXIDS. When we receive one of these 3168 * records, we mark the subXIDs as children of the top XID in pg_subtrans, 3169 * and then remove them from KnownAssignedXids. This prevents overflow of 3170 * KnownAssignedXids and snapshots, at the cost that status checks for these 3171 * subXIDs will take a slower path through TransactionIdIsInProgress(). 3172 * This means that KnownAssignedXids is not necessarily complete for subXIDs, 3173 * though it should be complete for top-level XIDs; this is the same situation 3174 * that holds with respect to the PGPROC entries in normal running. 3175 * 3176 * When we throw away subXIDs from KnownAssignedXids, we need to keep track of 3177 * that, similarly to tracking overflow of a PGPROC's subxids array. We do 3178 * that by remembering the lastOverflowedXID, ie the last thrown-away subXID. 3179 * As long as that is within the range of interesting XIDs, we have to assume 3180 * that subXIDs are missing from snapshots. (Note that subXID overflow occurs 3181 * on primary when 65th subXID arrives, whereas on standby it occurs when 64th 3182 * subXID arrives - that is not an error.) 3183 * 3184 * Should a backend on primary somehow disappear before it can write an abort 3185 * record, then we just leave those XIDs in KnownAssignedXids. They actually 3186 * aborted but we think they were running; the distinction is irrelevant 3187 * because either way any changes done by the transaction are not visible to 3188 * backends in the standby. We prune KnownAssignedXids when 3189 * XLOG_RUNNING_XACTS arrives, to forestall possible overflow of the 3190 * array due to such dead XIDs. 3191 */ 3192 3193 /* 3194 * RecordKnownAssignedTransactionIds 3195 * Record the given XID in KnownAssignedXids, as well as any preceding 3196 * unobserved XIDs. 3197 * 3198 * RecordKnownAssignedTransactionIds() should be run for *every* WAL record 3199 * associated with a transaction. Must be called for each record after we 3200 * have executed StartupCLOG() et al, since we must ExtendCLOG() etc.. 3201 * 3202 * Called during recovery in analogy with and in place of GetNewTransactionId() 3203 */ 3204 void 3205 RecordKnownAssignedTransactionIds(TransactionId xid) 3206 { 3207 Assert(standbyState >= STANDBY_INITIALIZED); 3208 Assert(TransactionIdIsValid(xid)); 3209 Assert(TransactionIdIsValid(latestObservedXid)); 3210 3211 elog(trace_recovery(DEBUG4), "record known xact %u latestObservedXid %u", 3212 xid, latestObservedXid); 3213 3214 /* 3215 * When a newly observed xid arrives, it is frequently the case that it is 3216 * *not* the next xid in sequence. When this occurs, we must treat the 3217 * intervening xids as running also. 3218 */ 3219 if (TransactionIdFollows(xid, latestObservedXid)) 3220 { 3221 TransactionId next_expected_xid; 3222 3223 /* 3224 * Extend subtrans like we do in GetNewTransactionId() during normal 3225 * operation using individual extend steps. Note that we do not need 3226 * to extend clog since its extensions are WAL logged. 3227 * 3228 * This part has to be done regardless of standbyState since we 3229 * immediately start assigning subtransactions to their toplevel 3230 * transactions. 3231 */ 3232 next_expected_xid = latestObservedXid; 3233 while (TransactionIdPrecedes(next_expected_xid, xid)) 3234 { 3235 TransactionIdAdvance(next_expected_xid); 3236 ExtendSUBTRANS(next_expected_xid); 3237 } 3238 Assert(next_expected_xid == xid); 3239 3240 /* 3241 * If the KnownAssignedXids machinery isn't up yet, there's nothing 3242 * more to do since we don't track assigned xids yet. 3243 */ 3244 if (standbyState <= STANDBY_INITIALIZED) 3245 { 3246 latestObservedXid = xid; 3247 return; 3248 } 3249 3250 /* 3251 * Add (latestObservedXid, xid] onto the KnownAssignedXids array. 3252 */ 3253 next_expected_xid = latestObservedXid; 3254 TransactionIdAdvance(next_expected_xid); 3255 KnownAssignedXidsAdd(next_expected_xid, xid, false); 3256 3257 /* 3258 * Now we can advance latestObservedXid 3259 */ 3260 latestObservedXid = xid; 3261 3262 /* ShmemVariableCache->nextXid must be beyond any observed xid */ 3263 next_expected_xid = latestObservedXid; 3264 TransactionIdAdvance(next_expected_xid); 3265 LWLockAcquire(XidGenLock, LW_EXCLUSIVE); 3266 if (TransactionIdFollows(next_expected_xid, ShmemVariableCache->nextXid)) 3267 ShmemVariableCache->nextXid = next_expected_xid; 3268 LWLockRelease(XidGenLock); 3269 } 3270 } 3271 3272 /* 3273 * ExpireTreeKnownAssignedTransactionIds 3274 * Remove the given XIDs from KnownAssignedXids. 3275 * 3276 * Called during recovery in analogy with and in place of ProcArrayEndTransaction() 3277 */ 3278 void 3279 ExpireTreeKnownAssignedTransactionIds(TransactionId xid, int nsubxids, 3280 TransactionId *subxids, TransactionId max_xid) 3281 { 3282 Assert(standbyState >= STANDBY_INITIALIZED); 3283 3284 /* 3285 * Uses same locking as transaction commit 3286 */ 3287 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); 3288 3289 KnownAssignedXidsRemoveTree(xid, nsubxids, subxids); 3290 3291 /* As in ProcArrayEndTransaction, advance latestCompletedXid */ 3292 if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid, 3293 max_xid)) 3294 ShmemVariableCache->latestCompletedXid = max_xid; 3295 3296 LWLockRelease(ProcArrayLock); 3297 } 3298 3299 /* 3300 * ExpireAllKnownAssignedTransactionIds 3301 * Remove all entries in KnownAssignedXids and reset lastOverflowedXid. 3302 */ 3303 void 3304 ExpireAllKnownAssignedTransactionIds(void) 3305 { 3306 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); 3307 KnownAssignedXidsRemovePreceding(InvalidTransactionId); 3308 3309 /* 3310 * Reset lastOverflowedXid. Currently, lastOverflowedXid has no use after 3311 * the call of this function. But do this for unification with what 3312 * ExpireOldKnownAssignedTransactionIds() do. 3313 */ 3314 procArray->lastOverflowedXid = InvalidTransactionId; 3315 LWLockRelease(ProcArrayLock); 3316 } 3317 3318 /* 3319 * ExpireOldKnownAssignedTransactionIds 3320 * Remove KnownAssignedXids entries preceding the given XID and 3321 * potentially reset lastOverflowedXid. 3322 */ 3323 void 3324 ExpireOldKnownAssignedTransactionIds(TransactionId xid) 3325 { 3326 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); 3327 3328 /* 3329 * Reset lastOverflowedXid if we know all transactions that have been 3330 * possibly running are being gone. Not doing so could cause an incorrect 3331 * lastOverflowedXid value, which makes extra snapshots be marked as 3332 * suboverflowed. 3333 */ 3334 if (TransactionIdPrecedes(procArray->lastOverflowedXid, xid)) 3335 procArray->lastOverflowedXid = InvalidTransactionId; 3336 KnownAssignedXidsRemovePreceding(xid); 3337 LWLockRelease(ProcArrayLock); 3338 } 3339 3340 3341 /* 3342 * Private module functions to manipulate KnownAssignedXids 3343 * 3344 * There are 5 main uses of the KnownAssignedXids data structure: 3345 * 3346 * * backends taking snapshots - all valid XIDs need to be copied out 3347 * * backends seeking to determine presence of a specific XID 3348 * * startup process adding new known-assigned XIDs 3349 * * startup process removing specific XIDs as transactions end 3350 * * startup process pruning array when special WAL records arrive 3351 * 3352 * This data structure is known to be a hot spot during Hot Standby, so we 3353 * go to some lengths to make these operations as efficient and as concurrent 3354 * as possible. 3355 * 3356 * The XIDs are stored in an array in sorted order --- TransactionIdPrecedes 3357 * order, to be exact --- to allow binary search for specific XIDs. Note: 3358 * in general TransactionIdPrecedes would not provide a total order, but 3359 * we know that the entries present at any instant should not extend across 3360 * a large enough fraction of XID space to wrap around (the master would 3361 * shut down for fear of XID wrap long before that happens). So it's OK to 3362 * use TransactionIdPrecedes as a binary-search comparator. 3363 * 3364 * It's cheap to maintain the sortedness during insertions, since new known 3365 * XIDs are always reported in XID order; we just append them at the right. 3366 * 3367 * To keep individual deletions cheap, we need to allow gaps in the array. 3368 * This is implemented by marking array elements as valid or invalid using 3369 * the parallel boolean array KnownAssignedXidsValid[]. A deletion is done 3370 * by setting KnownAssignedXidsValid[i] to false, *without* clearing the 3371 * XID entry itself. This preserves the property that the XID entries are 3372 * sorted, so we can do binary searches easily. Periodically we compress 3373 * out the unused entries; that's much cheaper than having to compress the 3374 * array immediately on every deletion. 3375 * 3376 * The actually valid items in KnownAssignedXids[] and KnownAssignedXidsValid[] 3377 * are those with indexes tail <= i < head; items outside this subscript range 3378 * have unspecified contents. When head reaches the end of the array, we 3379 * force compression of unused entries rather than wrapping around, since 3380 * allowing wraparound would greatly complicate the search logic. We maintain 3381 * an explicit tail pointer so that pruning of old XIDs can be done without 3382 * immediately moving the array contents. In most cases only a small fraction 3383 * of the array contains valid entries at any instant. 3384 * 3385 * Although only the startup process can ever change the KnownAssignedXids 3386 * data structure, we still need interlocking so that standby backends will 3387 * not observe invalid intermediate states. The convention is that backends 3388 * must hold shared ProcArrayLock to examine the array. To remove XIDs from 3389 * the array, the startup process must hold ProcArrayLock exclusively, for 3390 * the usual transactional reasons (compare commit/abort of a transaction 3391 * during normal running). Compressing unused entries out of the array 3392 * likewise requires exclusive lock. To add XIDs to the array, we just insert 3393 * them into slots to the right of the head pointer and then advance the head 3394 * pointer. This wouldn't require any lock at all, except that on machines 3395 * with weak memory ordering we need to be careful that other processors 3396 * see the array element changes before they see the head pointer change. 3397 * We handle this by using a spinlock to protect reads and writes of the 3398 * head/tail pointers. (We could dispense with the spinlock if we were to 3399 * create suitable memory access barrier primitives and use those instead.) 3400 * The spinlock must be taken to read or write the head/tail pointers unless 3401 * the caller holds ProcArrayLock exclusively. 3402 * 3403 * Algorithmic analysis: 3404 * 3405 * If we have a maximum of M slots, with N XIDs currently spread across 3406 * S elements then we have N <= S <= M always. 3407 * 3408 * * Adding a new XID is O(1) and needs little locking (unless compression 3409 * must happen) 3410 * * Compressing the array is O(S) and requires exclusive lock 3411 * * Removing an XID is O(logS) and requires exclusive lock 3412 * * Taking a snapshot is O(S) and requires shared lock 3413 * * Checking for an XID is O(logS) and requires shared lock 3414 * 3415 * In comparison, using a hash table for KnownAssignedXids would mean that 3416 * taking snapshots would be O(M). If we can maintain S << M then the 3417 * sorted array technique will deliver significantly faster snapshots. 3418 * If we try to keep S too small then we will spend too much time compressing, 3419 * so there is an optimal point for any workload mix. We use a heuristic to 3420 * decide when to compress the array, though trimming also helps reduce 3421 * frequency of compressing. The heuristic requires us to track the number of 3422 * currently valid XIDs in the array. 3423 */ 3424 3425 3426 /* 3427 * Compress KnownAssignedXids by shifting valid data down to the start of the 3428 * array, removing any gaps. 3429 * 3430 * A compression step is forced if "force" is true, otherwise we do it 3431 * only if a heuristic indicates it's a good time to do it. 3432 * 3433 * Caller must hold ProcArrayLock in exclusive mode. 3434 */ 3435 static void 3436 KnownAssignedXidsCompress(bool force) 3437 { 3438 /* use volatile pointer to prevent code rearrangement */ 3439 volatile ProcArrayStruct *pArray = procArray; 3440 int head, 3441 tail; 3442 int compress_index; 3443 int i; 3444 3445 /* no spinlock required since we hold ProcArrayLock exclusively */ 3446 head = pArray->headKnownAssignedXids; 3447 tail = pArray->tailKnownAssignedXids; 3448 3449 if (!force) 3450 { 3451 /* 3452 * If we can choose how much to compress, use a heuristic to avoid 3453 * compressing too often or not often enough. 3454 * 3455 * Heuristic is if we have a large enough current spread and less than 3456 * 50% of the elements are currently in use, then compress. This 3457 * should ensure we compress fairly infrequently. We could compress 3458 * less often though the virtual array would spread out more and 3459 * snapshots would become more expensive. 3460 */ 3461 int nelements = head - tail; 3462 3463 if (nelements < 4 * PROCARRAY_MAXPROCS || 3464 nelements < 2 * pArray->numKnownAssignedXids) 3465 return; 3466 } 3467 3468 /* 3469 * We compress the array by reading the valid values from tail to head, 3470 * re-aligning data to 0th element. 3471 */ 3472 compress_index = 0; 3473 for (i = tail; i < head; i++) 3474 { 3475 if (KnownAssignedXidsValid[i]) 3476 { 3477 KnownAssignedXids[compress_index] = KnownAssignedXids[i]; 3478 KnownAssignedXidsValid[compress_index] = true; 3479 compress_index++; 3480 } 3481 } 3482 3483 pArray->tailKnownAssignedXids = 0; 3484 pArray->headKnownAssignedXids = compress_index; 3485 } 3486 3487 /* 3488 * Add xids into KnownAssignedXids at the head of the array. 3489 * 3490 * xids from from_xid to to_xid, inclusive, are added to the array. 3491 * 3492 * If exclusive_lock is true then caller already holds ProcArrayLock in 3493 * exclusive mode, so we need no extra locking here. Else caller holds no 3494 * lock, so we need to be sure we maintain sufficient interlocks against 3495 * concurrent readers. (Only the startup process ever calls this, so no need 3496 * to worry about concurrent writers.) 3497 */ 3498 static void 3499 KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid, 3500 bool exclusive_lock) 3501 { 3502 /* use volatile pointer to prevent code rearrangement */ 3503 volatile ProcArrayStruct *pArray = procArray; 3504 TransactionId next_xid; 3505 int head, 3506 tail; 3507 int nxids; 3508 int i; 3509 3510 Assert(TransactionIdPrecedesOrEquals(from_xid, to_xid)); 3511 3512 /* 3513 * Calculate how many array slots we'll need. Normally this is cheap; in 3514 * the unusual case where the XIDs cross the wrap point, we do it the hard 3515 * way. 3516 */ 3517 if (to_xid >= from_xid) 3518 nxids = to_xid - from_xid + 1; 3519 else 3520 { 3521 nxids = 1; 3522 next_xid = from_xid; 3523 while (TransactionIdPrecedes(next_xid, to_xid)) 3524 { 3525 nxids++; 3526 TransactionIdAdvance(next_xid); 3527 } 3528 } 3529 3530 /* 3531 * Since only the startup process modifies the head/tail pointers, we 3532 * don't need a lock to read them here. 3533 */ 3534 head = pArray->headKnownAssignedXids; 3535 tail = pArray->tailKnownAssignedXids; 3536 3537 Assert(head >= 0 && head <= pArray->maxKnownAssignedXids); 3538 Assert(tail >= 0 && tail < pArray->maxKnownAssignedXids); 3539 3540 /* 3541 * Verify that insertions occur in TransactionId sequence. Note that even 3542 * if the last existing element is marked invalid, it must still have a 3543 * correctly sequenced XID value. 3544 */ 3545 if (head > tail && 3546 TransactionIdFollowsOrEquals(KnownAssignedXids[head - 1], from_xid)) 3547 { 3548 KnownAssignedXidsDisplay(LOG); 3549 elog(ERROR, "out-of-order XID insertion in KnownAssignedXids"); 3550 } 3551 3552 /* 3553 * If our xids won't fit in the remaining space, compress out free space 3554 */ 3555 if (head + nxids > pArray->maxKnownAssignedXids) 3556 { 3557 /* must hold lock to compress */ 3558 if (!exclusive_lock) 3559 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); 3560 3561 KnownAssignedXidsCompress(true); 3562 3563 head = pArray->headKnownAssignedXids; 3564 /* note: we no longer care about the tail pointer */ 3565 3566 if (!exclusive_lock) 3567 LWLockRelease(ProcArrayLock); 3568 3569 /* 3570 * If it still won't fit then we're out of memory 3571 */ 3572 if (head + nxids > pArray->maxKnownAssignedXids) 3573 elog(ERROR, "too many KnownAssignedXids"); 3574 } 3575 3576 /* Now we can insert the xids into the space starting at head */ 3577 next_xid = from_xid; 3578 for (i = 0; i < nxids; i++) 3579 { 3580 KnownAssignedXids[head] = next_xid; 3581 KnownAssignedXidsValid[head] = true; 3582 TransactionIdAdvance(next_xid); 3583 head++; 3584 } 3585 3586 /* Adjust count of number of valid entries */ 3587 pArray->numKnownAssignedXids += nxids; 3588 3589 /* 3590 * Now update the head pointer. We use a spinlock to protect this 3591 * pointer, not because the update is likely to be non-atomic, but to 3592 * ensure that other processors see the above array updates before they 3593 * see the head pointer change. 3594 * 3595 * If we're holding ProcArrayLock exclusively, there's no need to take the 3596 * spinlock. 3597 */ 3598 if (exclusive_lock) 3599 pArray->headKnownAssignedXids = head; 3600 else 3601 { 3602 SpinLockAcquire(&pArray->known_assigned_xids_lck); 3603 pArray->headKnownAssignedXids = head; 3604 SpinLockRelease(&pArray->known_assigned_xids_lck); 3605 } 3606 } 3607 3608 /* 3609 * KnownAssignedXidsSearch 3610 * 3611 * Searches KnownAssignedXids for a specific xid and optionally removes it. 3612 * Returns true if it was found, false if not. 3613 * 3614 * Caller must hold ProcArrayLock in shared or exclusive mode. 3615 * Exclusive lock must be held for remove = true. 3616 */ 3617 static bool 3618 KnownAssignedXidsSearch(TransactionId xid, bool remove) 3619 { 3620 /* use volatile pointer to prevent code rearrangement */ 3621 volatile ProcArrayStruct *pArray = procArray; 3622 int first, 3623 last; 3624 int head; 3625 int tail; 3626 int result_index = -1; 3627 3628 if (remove) 3629 { 3630 /* we hold ProcArrayLock exclusively, so no need for spinlock */ 3631 tail = pArray->tailKnownAssignedXids; 3632 head = pArray->headKnownAssignedXids; 3633 } 3634 else 3635 { 3636 /* take spinlock to ensure we see up-to-date array contents */ 3637 SpinLockAcquire(&pArray->known_assigned_xids_lck); 3638 tail = pArray->tailKnownAssignedXids; 3639 head = pArray->headKnownAssignedXids; 3640 SpinLockRelease(&pArray->known_assigned_xids_lck); 3641 } 3642 3643 /* 3644 * Standard binary search. Note we can ignore the KnownAssignedXidsValid 3645 * array here, since even invalid entries will contain sorted XIDs. 3646 */ 3647 first = tail; 3648 last = head - 1; 3649 while (first <= last) 3650 { 3651 int mid_index; 3652 TransactionId mid_xid; 3653 3654 mid_index = (first + last) / 2; 3655 mid_xid = KnownAssignedXids[mid_index]; 3656 3657 if (xid == mid_xid) 3658 { 3659 result_index = mid_index; 3660 break; 3661 } 3662 else if (TransactionIdPrecedes(xid, mid_xid)) 3663 last = mid_index - 1; 3664 else 3665 first = mid_index + 1; 3666 } 3667 3668 if (result_index < 0) 3669 return false; /* not in array */ 3670 3671 if (!KnownAssignedXidsValid[result_index]) 3672 return false; /* in array, but invalid */ 3673 3674 if (remove) 3675 { 3676 KnownAssignedXidsValid[result_index] = false; 3677 3678 pArray->numKnownAssignedXids--; 3679 Assert(pArray->numKnownAssignedXids >= 0); 3680 3681 /* 3682 * If we're removing the tail element then advance tail pointer over 3683 * any invalid elements. This will speed future searches. 3684 */ 3685 if (result_index == tail) 3686 { 3687 tail++; 3688 while (tail < head && !KnownAssignedXidsValid[tail]) 3689 tail++; 3690 if (tail >= head) 3691 { 3692 /* Array is empty, so we can reset both pointers */ 3693 pArray->headKnownAssignedXids = 0; 3694 pArray->tailKnownAssignedXids = 0; 3695 } 3696 else 3697 { 3698 pArray->tailKnownAssignedXids = tail; 3699 } 3700 } 3701 } 3702 3703 return true; 3704 } 3705 3706 /* 3707 * Is the specified XID present in KnownAssignedXids[]? 3708 * 3709 * Caller must hold ProcArrayLock in shared or exclusive mode. 3710 */ 3711 static bool 3712 KnownAssignedXidExists(TransactionId xid) 3713 { 3714 Assert(TransactionIdIsValid(xid)); 3715 3716 return KnownAssignedXidsSearch(xid, false); 3717 } 3718 3719 /* 3720 * Remove the specified XID from KnownAssignedXids[]. 3721 * 3722 * Caller must hold ProcArrayLock in exclusive mode. 3723 */ 3724 static void 3725 KnownAssignedXidsRemove(TransactionId xid) 3726 { 3727 Assert(TransactionIdIsValid(xid)); 3728 3729 elog(trace_recovery(DEBUG4), "remove KnownAssignedXid %u", xid); 3730 3731 /* 3732 * Note: we cannot consider it an error to remove an XID that's not 3733 * present. We intentionally remove subxact IDs while processing 3734 * XLOG_XACT_ASSIGNMENT, to avoid array overflow. Then those XIDs will be 3735 * removed again when the top-level xact commits or aborts. 3736 * 3737 * It might be possible to track such XIDs to distinguish this case from 3738 * actual errors, but it would be complicated and probably not worth it. 3739 * So, just ignore the search result. 3740 */ 3741 (void) KnownAssignedXidsSearch(xid, true); 3742 } 3743 3744 /* 3745 * KnownAssignedXidsRemoveTree 3746 * Remove xid (if it's not InvalidTransactionId) and all the subxids. 3747 * 3748 * Caller must hold ProcArrayLock in exclusive mode. 3749 */ 3750 static void 3751 KnownAssignedXidsRemoveTree(TransactionId xid, int nsubxids, 3752 TransactionId *subxids) 3753 { 3754 int i; 3755 3756 if (TransactionIdIsValid(xid)) 3757 KnownAssignedXidsRemove(xid); 3758 3759 for (i = 0; i < nsubxids; i++) 3760 KnownAssignedXidsRemove(subxids[i]); 3761 3762 /* Opportunistically compress the array */ 3763 KnownAssignedXidsCompress(false); 3764 } 3765 3766 /* 3767 * Prune KnownAssignedXids up to, but *not* including xid. If xid is invalid 3768 * then clear the whole table. 3769 * 3770 * Caller must hold ProcArrayLock in exclusive mode. 3771 */ 3772 static void 3773 KnownAssignedXidsRemovePreceding(TransactionId removeXid) 3774 { 3775 /* use volatile pointer to prevent code rearrangement */ 3776 volatile ProcArrayStruct *pArray = procArray; 3777 int count = 0; 3778 int head, 3779 tail, 3780 i; 3781 3782 if (!TransactionIdIsValid(removeXid)) 3783 { 3784 elog(trace_recovery(DEBUG4), "removing all KnownAssignedXids"); 3785 pArray->numKnownAssignedXids = 0; 3786 pArray->headKnownAssignedXids = pArray->tailKnownAssignedXids = 0; 3787 return; 3788 } 3789 3790 elog(trace_recovery(DEBUG4), "prune KnownAssignedXids to %u", removeXid); 3791 3792 /* 3793 * Mark entries invalid starting at the tail. Since array is sorted, we 3794 * can stop as soon as we reach an entry >= removeXid. 3795 */ 3796 tail = pArray->tailKnownAssignedXids; 3797 head = pArray->headKnownAssignedXids; 3798 3799 for (i = tail; i < head; i++) 3800 { 3801 if (KnownAssignedXidsValid[i]) 3802 { 3803 TransactionId knownXid = KnownAssignedXids[i]; 3804 3805 if (TransactionIdFollowsOrEquals(knownXid, removeXid)) 3806 break; 3807 3808 if (!StandbyTransactionIdIsPrepared(knownXid)) 3809 { 3810 KnownAssignedXidsValid[i] = false; 3811 count++; 3812 } 3813 } 3814 } 3815 3816 pArray->numKnownAssignedXids -= count; 3817 Assert(pArray->numKnownAssignedXids >= 0); 3818 3819 /* 3820 * Advance the tail pointer if we've marked the tail item invalid. 3821 */ 3822 for (i = tail; i < head; i++) 3823 { 3824 if (KnownAssignedXidsValid[i]) 3825 break; 3826 } 3827 if (i >= head) 3828 { 3829 /* Array is empty, so we can reset both pointers */ 3830 pArray->headKnownAssignedXids = 0; 3831 pArray->tailKnownAssignedXids = 0; 3832 } 3833 else 3834 { 3835 pArray->tailKnownAssignedXids = i; 3836 } 3837 3838 /* Opportunistically compress the array */ 3839 KnownAssignedXidsCompress(false); 3840 } 3841 3842 /* 3843 * KnownAssignedXidsGet - Get an array of xids by scanning KnownAssignedXids. 3844 * We filter out anything >= xmax. 3845 * 3846 * Returns the number of XIDs stored into xarray[]. Caller is responsible 3847 * that array is large enough. 3848 * 3849 * Caller must hold ProcArrayLock in (at least) shared mode. 3850 */ 3851 static int 3852 KnownAssignedXidsGet(TransactionId *xarray, TransactionId xmax) 3853 { 3854 TransactionId xtmp = InvalidTransactionId; 3855 3856 return KnownAssignedXidsGetAndSetXmin(xarray, &xtmp, xmax); 3857 } 3858 3859 /* 3860 * KnownAssignedXidsGetAndSetXmin - as KnownAssignedXidsGet, plus 3861 * we reduce *xmin to the lowest xid value seen if not already lower. 3862 * 3863 * Caller must hold ProcArrayLock in (at least) shared mode. 3864 */ 3865 static int 3866 KnownAssignedXidsGetAndSetXmin(TransactionId *xarray, TransactionId *xmin, 3867 TransactionId xmax) 3868 { 3869 int count = 0; 3870 int head, 3871 tail; 3872 int i; 3873 3874 /* 3875 * Fetch head just once, since it may change while we loop. We can stop 3876 * once we reach the initially seen head, since we are certain that an xid 3877 * cannot enter and then leave the array while we hold ProcArrayLock. We 3878 * might miss newly-added xids, but they should be >= xmax so irrelevant 3879 * anyway. 3880 * 3881 * Must take spinlock to ensure we see up-to-date array contents. 3882 */ 3883 SpinLockAcquire(&procArray->known_assigned_xids_lck); 3884 tail = procArray->tailKnownAssignedXids; 3885 head = procArray->headKnownAssignedXids; 3886 SpinLockRelease(&procArray->known_assigned_xids_lck); 3887 3888 for (i = tail; i < head; i++) 3889 { 3890 /* Skip any gaps in the array */ 3891 if (KnownAssignedXidsValid[i]) 3892 { 3893 TransactionId knownXid = KnownAssignedXids[i]; 3894 3895 /* 3896 * Update xmin if required. Only the first XID need be checked, 3897 * since the array is sorted. 3898 */ 3899 if (count == 0 && 3900 TransactionIdPrecedes(knownXid, *xmin)) 3901 *xmin = knownXid; 3902 3903 /* 3904 * Filter out anything >= xmax, again relying on sorted property 3905 * of array. 3906 */ 3907 if (TransactionIdIsValid(xmax) && 3908 TransactionIdFollowsOrEquals(knownXid, xmax)) 3909 break; 3910 3911 /* Add knownXid into output array */ 3912 xarray[count++] = knownXid; 3913 } 3914 } 3915 3916 return count; 3917 } 3918 3919 /* 3920 * Get oldest XID in the KnownAssignedXids array, or InvalidTransactionId 3921 * if nothing there. 3922 */ 3923 static TransactionId 3924 KnownAssignedXidsGetOldestXmin(void) 3925 { 3926 int head, 3927 tail; 3928 int i; 3929 3930 /* 3931 * Fetch head just once, since it may change while we loop. 3932 */ 3933 SpinLockAcquire(&procArray->known_assigned_xids_lck); 3934 tail = procArray->tailKnownAssignedXids; 3935 head = procArray->headKnownAssignedXids; 3936 SpinLockRelease(&procArray->known_assigned_xids_lck); 3937 3938 for (i = tail; i < head; i++) 3939 { 3940 /* Skip any gaps in the array */ 3941 if (KnownAssignedXidsValid[i]) 3942 return KnownAssignedXids[i]; 3943 } 3944 3945 return InvalidTransactionId; 3946 } 3947 3948 /* 3949 * Display KnownAssignedXids to provide debug trail 3950 * 3951 * Currently this is only called within startup process, so we need no 3952 * special locking. 3953 * 3954 * Note this is pretty expensive, and much of the expense will be incurred 3955 * even if the elog message will get discarded. It's not currently called 3956 * in any performance-critical places, however, so no need to be tenser. 3957 */ 3958 static void 3959 KnownAssignedXidsDisplay(int trace_level) 3960 { 3961 /* use volatile pointer to prevent code rearrangement */ 3962 volatile ProcArrayStruct *pArray = procArray; 3963 StringInfoData buf; 3964 int head, 3965 tail, 3966 i; 3967 int nxids = 0; 3968 3969 tail = pArray->tailKnownAssignedXids; 3970 head = pArray->headKnownAssignedXids; 3971 3972 initStringInfo(&buf); 3973 3974 for (i = tail; i < head; i++) 3975 { 3976 if (KnownAssignedXidsValid[i]) 3977 { 3978 nxids++; 3979 appendStringInfo(&buf, "[%d]=%u ", i, KnownAssignedXids[i]); 3980 } 3981 } 3982 3983 elog(trace_level, "%d KnownAssignedXids (num=%d tail=%d head=%d) %s", 3984 nxids, 3985 pArray->numKnownAssignedXids, 3986 pArray->tailKnownAssignedXids, 3987 pArray->headKnownAssignedXids, 3988 buf.data); 3989 3990 pfree(buf.data); 3991 } 3992 3993 /* 3994 * KnownAssignedXidsReset 3995 * Resets KnownAssignedXids to be empty 3996 */ 3997 static void 3998 KnownAssignedXidsReset(void) 3999 { 4000 /* use volatile pointer to prevent code rearrangement */ 4001 volatile ProcArrayStruct *pArray = procArray; 4002 4003 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); 4004 4005 pArray->numKnownAssignedXids = 0; 4006 pArray->tailKnownAssignedXids = 0; 4007 pArray->headKnownAssignedXids = 0; 4008 4009 LWLockRelease(ProcArrayLock); 4010 } 4011