1 /*------------------------------------------------------------------------- 2 * 3 * proc.h 4 * per-process shared memory data structures 5 * 6 * 7 * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group 8 * Portions Copyright (c) 1994, Regents of the University of California 9 * 10 * src/include/storage/proc.h 11 * 12 *------------------------------------------------------------------------- 13 */ 14 #ifndef _PROC_H_ 15 #define _PROC_H_ 16 17 #include "access/clog.h" 18 #include "access/xlogdefs.h" 19 #include "lib/ilist.h" 20 #include "storage/latch.h" 21 #include "storage/lock.h" 22 #include "storage/pg_sema.h" 23 #include "storage/proclist_types.h" 24 25 /* 26 * Each backend advertises up to PGPROC_MAX_CACHED_SUBXIDS TransactionIds 27 * for non-aborted subtransactions of its current top transaction. These 28 * have to be treated as running XIDs by other backends. 29 * 30 * We also keep track of whether the cache overflowed (ie, the transaction has 31 * generated at least one subtransaction that didn't fit in the cache). 32 * If none of the caches have overflowed, we can assume that an XID that's not 33 * listed anywhere in the PGPROC array is not a running transaction. Else we 34 * have to look at pg_subtrans. 35 */ 36 #define PGPROC_MAX_CACHED_SUBXIDS 64 /* XXX guessed-at value */ 37 38 typedef struct XidCacheStatus 39 { 40 /* number of cached subxids, never more than PGPROC_MAX_CACHED_SUBXIDS */ 41 uint8 count; 42 /* has PGPROC->subxids overflowed */ 43 bool overflowed; 44 } XidCacheStatus; 45 46 struct XidCache 47 { 48 TransactionId xids[PGPROC_MAX_CACHED_SUBXIDS]; 49 }; 50 51 /* 52 * Flags for PGPROC->statusFlags and PROC_HDR->statusFlags[] 53 */ 54 #define PROC_IS_AUTOVACUUM 0x01 /* is it an autovac worker? */ 55 #define PROC_IN_VACUUM 0x02 /* currently running lazy vacuum */ 56 #define PROC_IN_SAFE_IC 0x04 /* currently running CREATE INDEX 57 * CONCURRENTLY or REINDEX 58 * CONCURRENTLY on non-expressional, 59 * non-partial index */ 60 #define PROC_VACUUM_FOR_WRAPAROUND 0x08 /* set by autovac only */ 61 #define PROC_IN_LOGICAL_DECODING 0x10 /* currently doing logical 62 * decoding outside xact */ 63 64 /* flags reset at EOXact */ 65 #define PROC_VACUUM_STATE_MASK \ 66 (PROC_IN_VACUUM | PROC_IN_SAFE_IC | PROC_VACUUM_FOR_WRAPAROUND) 67 68 /* 69 * We allow a small number of "weak" relation locks (AccessShareLock, 70 * RowShareLock, RowExclusiveLock) to be recorded in the PGPROC structure 71 * rather than the main lock table. This eases contention on the lock 72 * manager LWLocks. See storage/lmgr/README for additional details. 73 */ 74 #define FP_LOCK_SLOTS_PER_BACKEND 16 75 76 /* 77 * An invalid pgprocno. Must be larger than the maximum number of PGPROC 78 * structures we could possibly have. See comments for MAX_BACKENDS. 79 */ 80 #define INVALID_PGPROCNO PG_INT32_MAX 81 82 typedef enum 83 { 84 PROC_WAIT_STATUS_OK, 85 PROC_WAIT_STATUS_WAITING, 86 PROC_WAIT_STATUS_ERROR, 87 } ProcWaitStatus; 88 89 /* 90 * Each backend has a PGPROC struct in shared memory. There is also a list of 91 * currently-unused PGPROC structs that will be reallocated to new backends. 92 * 93 * links: list link for any list the PGPROC is in. When waiting for a lock, 94 * the PGPROC is linked into that lock's waitProcs queue. A recycled PGPROC 95 * is linked into ProcGlobal's freeProcs list. 96 * 97 * Note: twophase.c also sets up a dummy PGPROC struct for each currently 98 * prepared transaction. These PGPROCs appear in the ProcArray data structure 99 * so that the prepared transactions appear to be still running and are 100 * correctly shown as holding locks. A prepared transaction PGPROC can be 101 * distinguished from a real one at need by the fact that it has pid == 0. 102 * The semaphore and lock-activity fields in a prepared-xact PGPROC are unused, 103 * but its myProcLocks[] lists are valid. 104 * 105 * We allow many fields of this struct to be accessed without locks, such as 106 * delayChkpt and isBackgroundWorker. However, keep in mind that writing 107 * mirrored ones (see below) requires holding ProcArrayLock or XidGenLock in 108 * at least shared mode, so that pgxactoff does not change concurrently. 109 * 110 * Mirrored fields: 111 * 112 * Some fields in PGPROC (see "mirrored in ..." comment) are mirrored into an 113 * element of more densely packed ProcGlobal arrays. These arrays are indexed 114 * by PGPROC->pgxactoff. Both copies need to be maintained coherently. 115 * 116 * NB: The pgxactoff indexed value can *never* be accessed without holding 117 * locks. 118 * 119 * See PROC_HDR for details. 120 */ 121 struct PGPROC 122 { 123 /* proc->links MUST BE FIRST IN STRUCT (see ProcSleep,ProcWakeup,etc) */ 124 SHM_QUEUE links; /* list link if process is in a list */ 125 PGPROC **procgloballist; /* procglobal list that owns this PGPROC */ 126 127 PGSemaphore sem; /* ONE semaphore to sleep on */ 128 ProcWaitStatus waitStatus; 129 130 Latch procLatch; /* generic latch for process */ 131 132 133 TransactionId xid; /* id of top-level transaction currently being 134 * executed by this proc, if running and XID 135 * is assigned; else InvalidTransactionId. 136 * mirrored in ProcGlobal->xids[pgxactoff] */ 137 138 TransactionId xmin; /* minimal running XID as it was when we were 139 * starting our xact, excluding LAZY VACUUM: 140 * vacuum must not remove tuples deleted by 141 * xid >= xmin ! */ 142 143 LocalTransactionId lxid; /* local id of top-level transaction currently 144 * being executed by this proc, if running; 145 * else InvalidLocalTransactionId */ 146 int pid; /* Backend's process ID; 0 if prepared xact */ 147 148 int pgxactoff; /* offset into various ProcGlobal->arrays with 149 * data mirrored from this PGPROC */ 150 int pgprocno; 151 152 /* These fields are zero while a backend is still starting up: */ 153 BackendId backendId; /* This backend's backend ID (if assigned) */ 154 Oid databaseId; /* OID of database this backend is using */ 155 Oid roleId; /* OID of role using this backend */ 156 157 Oid tempNamespaceId; /* OID of temp schema this backend is 158 * using */ 159 160 bool isBackgroundWorker; /* true if background worker. */ 161 162 /* 163 * While in hot standby mode, shows that a conflict signal has been sent 164 * for the current transaction. Set/cleared while holding ProcArrayLock, 165 * though not required. Accessed without lock, if needed. 166 */ 167 bool recoveryConflictPending; 168 169 /* Info about LWLock the process is currently waiting for, if any. */ 170 bool lwWaiting; /* true if waiting for an LW lock */ 171 uint8 lwWaitMode; /* lwlock mode being waited for */ 172 proclist_node lwWaitLink; /* position in LW lock wait list */ 173 174 /* Support for condition variables. */ 175 proclist_node cvWaitLink; /* position in CV wait list */ 176 177 /* Info about lock the process is currently waiting for, if any. */ 178 /* waitLock and waitProcLock are NULL if not currently waiting. */ 179 LOCK *waitLock; /* Lock object we're sleeping on ... */ 180 PROCLOCK *waitProcLock; /* Per-holder info for awaited lock */ 181 LOCKMODE waitLockMode; /* type of lock we're waiting for */ 182 LOCKMASK heldLocks; /* bitmask for lock types already held on this 183 * lock object by this backend */ 184 pg_atomic_uint64 waitStart; /* time at which wait for lock acquisition 185 * started */ 186 187 bool delayChkpt; /* true if this proc delays checkpoint start */ 188 189 uint8 statusFlags; /* this backend's status flags, see PROC_* 190 * above. mirrored in 191 * ProcGlobal->statusFlags[pgxactoff] */ 192 193 /* 194 * Info to allow us to wait for synchronous replication, if needed. 195 * waitLSN is InvalidXLogRecPtr if not waiting; set only by user backend. 196 * syncRepState must not be touched except by owning process or WALSender. 197 * syncRepLinks used only while holding SyncRepLock. 198 */ 199 XLogRecPtr waitLSN; /* waiting for this LSN or higher */ 200 int syncRepState; /* wait state for sync rep */ 201 SHM_QUEUE syncRepLinks; /* list link if process is in syncrep queue */ 202 203 /* 204 * All PROCLOCK objects for locks held or awaited by this backend are 205 * linked into one of these lists, according to the partition number of 206 * their lock. 207 */ 208 SHM_QUEUE myProcLocks[NUM_LOCK_PARTITIONS]; 209 210 XidCacheStatus subxidStatus; /* mirrored with 211 * ProcGlobal->subxidStates[i] */ 212 struct XidCache subxids; /* cache for subtransaction XIDs */ 213 214 /* Support for group XID clearing. */ 215 /* true, if member of ProcArray group waiting for XID clear */ 216 bool procArrayGroupMember; 217 /* next ProcArray group member waiting for XID clear */ 218 pg_atomic_uint32 procArrayGroupNext; 219 220 /* 221 * latest transaction id among the transaction's main XID and 222 * subtransactions 223 */ 224 TransactionId procArrayGroupMemberXid; 225 226 uint32 wait_event_info; /* proc's wait information */ 227 228 /* Support for group transaction status update. */ 229 bool clogGroupMember; /* true, if member of clog group */ 230 pg_atomic_uint32 clogGroupNext; /* next clog group member */ 231 TransactionId clogGroupMemberXid; /* transaction id of clog group member */ 232 XidStatus clogGroupMemberXidStatus; /* transaction status of clog 233 * group member */ 234 int clogGroupMemberPage; /* clog page corresponding to 235 * transaction id of clog group member */ 236 XLogRecPtr clogGroupMemberLsn; /* WAL location of commit record for clog 237 * group member */ 238 239 /* Lock manager data, recording fast-path locks taken by this backend. */ 240 LWLock fpInfoLock; /* protects per-backend fast-path state */ 241 uint64 fpLockBits; /* lock modes held for each fast-path slot */ 242 Oid fpRelId[FP_LOCK_SLOTS_PER_BACKEND]; /* slots for rel oids */ 243 bool fpVXIDLock; /* are we holding a fast-path VXID lock? */ 244 LocalTransactionId fpLocalTransactionId; /* lxid for fast-path VXID 245 * lock */ 246 247 /* 248 * Support for lock groups. Use LockHashPartitionLockByProc on the group 249 * leader to get the LWLock protecting these fields. 250 */ 251 PGPROC *lockGroupLeader; /* lock group leader, if I'm a member */ 252 dlist_head lockGroupMembers; /* list of members, if I'm a leader */ 253 dlist_node lockGroupLink; /* my member link, if I'm a member */ 254 }; 255 256 /* NOTE: "typedef struct PGPROC PGPROC" appears in storage/lock.h. */ 257 258 259 extern PGDLLIMPORT PGPROC *MyProc; 260 261 /* 262 * There is one ProcGlobal struct for the whole database cluster. 263 * 264 * Adding/Removing an entry into the procarray requires holding *both* 265 * ProcArrayLock and XidGenLock in exclusive mode (in that order). Both are 266 * needed because the dense arrays (see below) are accessed from 267 * GetNewTransactionId() and GetSnapshotData(), and we don't want to add 268 * further contention by both using the same lock. Adding/Removing a procarray 269 * entry is much less frequent. 270 * 271 * Some fields in PGPROC are mirrored into more densely packed arrays (e.g. 272 * xids), with one entry for each backend. These arrays only contain entries 273 * for PGPROCs that have been added to the shared array with ProcArrayAdd() 274 * (in contrast to PGPROC array which has unused PGPROCs interspersed). 275 * 276 * The dense arrays are indexed by PGPROC->pgxactoff. Any concurrent 277 * ProcArrayAdd() / ProcArrayRemove() can lead to pgxactoff of a procarray 278 * member to change. Therefore it is only safe to use PGPROC->pgxactoff to 279 * access the dense array while holding either ProcArrayLock or XidGenLock. 280 * 281 * As long as a PGPROC is in the procarray, the mirrored values need to be 282 * maintained in both places in a coherent manner. 283 * 284 * The denser separate arrays are beneficial for three main reasons: First, to 285 * allow for as tight loops accessing the data as possible. Second, to prevent 286 * updates of frequently changing data (e.g. xmin) from invalidating 287 * cachelines also containing less frequently changing data (e.g. xid, 288 * statusFlags). Third to condense frequently accessed data into as few 289 * cachelines as possible. 290 * 291 * There are two main reasons to have the data mirrored between these dense 292 * arrays and PGPROC. First, as explained above, a PGPROC's array entries can 293 * only be accessed with either ProcArrayLock or XidGenLock held, whereas the 294 * PGPROC entries do not require that (obviously there may still be locking 295 * requirements around the individual field, separate from the concerns 296 * here). That is particularly important for a backend to efficiently checks 297 * it own values, which it often can safely do without locking. Second, the 298 * PGPROC fields allow to avoid unnecessary accesses and modification to the 299 * dense arrays. A backend's own PGPROC is more likely to be in a local cache, 300 * whereas the cachelines for the dense array will be modified by other 301 * backends (often removing it from the cache for other cores/sockets). At 302 * commit/abort time a check of the PGPROC value can avoid accessing/dirtying 303 * the corresponding array value. 304 * 305 * Basically it makes sense to access the PGPROC variable when checking a 306 * single backend's data, especially when already looking at the PGPROC for 307 * other reasons already. It makes sense to look at the "dense" arrays if we 308 * need to look at many / most entries, because we then benefit from the 309 * reduced indirection and better cross-process cache-ability. 310 * 311 * When entering a PGPROC for 2PC transactions with ProcArrayAdd(), the data 312 * in the dense arrays is initialized from the PGPROC while it already holds 313 * ProcArrayLock. 314 */ 315 typedef struct PROC_HDR 316 { 317 /* Array of PGPROC structures (not including dummies for prepared txns) */ 318 PGPROC *allProcs; 319 320 /* Array mirroring PGPROC.xid for each PGPROC currently in the procarray */ 321 TransactionId *xids; 322 323 /* 324 * Array mirroring PGPROC.subxidStatus for each PGPROC currently in the 325 * procarray. 326 */ 327 XidCacheStatus *subxidStates; 328 329 /* 330 * Array mirroring PGPROC.statusFlags for each PGPROC currently in the 331 * procarray. 332 */ 333 uint8 *statusFlags; 334 335 /* Length of allProcs array */ 336 uint32 allProcCount; 337 /* Head of list of free PGPROC structures */ 338 PGPROC *freeProcs; 339 /* Head of list of autovacuum's free PGPROC structures */ 340 PGPROC *autovacFreeProcs; 341 /* Head of list of bgworker free PGPROC structures */ 342 PGPROC *bgworkerFreeProcs; 343 /* Head of list of walsender free PGPROC structures */ 344 PGPROC *walsenderFreeProcs; 345 /* First pgproc waiting for group XID clear */ 346 pg_atomic_uint32 procArrayGroupFirst; 347 /* First pgproc waiting for group transaction status update */ 348 pg_atomic_uint32 clogGroupFirst; 349 /* WALWriter process's latch */ 350 Latch *walwriterLatch; 351 /* Checkpointer process's latch */ 352 Latch *checkpointerLatch; 353 /* Current shared estimate of appropriate spins_per_delay value */ 354 int spins_per_delay; 355 /* The proc of the Startup process, since not in ProcArray */ 356 PGPROC *startupProc; 357 int startupProcPid; 358 /* Buffer id of the buffer that Startup process waits for pin on, or -1 */ 359 int startupBufferPinWaitBufId; 360 } PROC_HDR; 361 362 extern PGDLLIMPORT PROC_HDR *ProcGlobal; 363 364 extern PGPROC *PreparedXactProcs; 365 366 /* Accessor for PGPROC given a pgprocno. */ 367 #define GetPGProcByNumber(n) (&ProcGlobal->allProcs[(n)]) 368 369 /* 370 * We set aside some extra PGPROC structures for auxiliary processes, 371 * ie things that aren't full-fledged backends but need shmem access. 372 * 373 * Background writer, checkpointer, WAL writer and archiver run during normal 374 * operation. Startup process and WAL receiver also consume 2 slots, but WAL 375 * writer is launched only after startup has exited, so we only need 5 slots. 376 */ 377 #define NUM_AUXILIARY_PROCS 5 378 379 /* configurable options */ 380 extern PGDLLIMPORT int DeadlockTimeout; 381 extern PGDLLIMPORT int StatementTimeout; 382 extern PGDLLIMPORT int LockTimeout; 383 extern PGDLLIMPORT int IdleInTransactionSessionTimeout; 384 extern PGDLLIMPORT int IdleSessionTimeout; 385 extern bool log_lock_waits; 386 387 388 /* 389 * Function Prototypes 390 */ 391 extern int ProcGlobalSemas(void); 392 extern Size ProcGlobalShmemSize(void); 393 extern void InitProcGlobal(void); 394 extern void InitProcess(void); 395 extern void InitProcessPhase2(void); 396 extern void InitAuxiliaryProcess(void); 397 398 extern void PublishStartupProcessInformation(void); 399 extern void SetStartupBufferPinWaitBufId(int bufid); 400 extern int GetStartupBufferPinWaitBufId(void); 401 402 extern bool HaveNFreeProcs(int n); 403 extern void ProcReleaseLocks(bool isCommit); 404 405 extern void ProcQueueInit(PROC_QUEUE *queue); 406 extern ProcWaitStatus ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable); 407 extern PGPROC *ProcWakeup(PGPROC *proc, ProcWaitStatus waitStatus); 408 extern void ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock); 409 extern void CheckDeadLockAlert(void); 410 extern bool IsWaitingForLock(void); 411 extern void LockErrorCleanup(void); 412 413 extern void ProcWaitForSignal(uint32 wait_event_info); 414 extern void ProcSendSignal(int pid); 415 416 extern PGPROC *AuxiliaryPidGetProc(int pid); 417 418 extern void BecomeLockGroupLeader(void); 419 extern bool BecomeLockGroupMember(PGPROC *leader, int pid); 420 421 #endif /* _PROC_H_ */ 422