1 /*-------------------------------------------------------------------------
2  *
3  * proc.h
4  *	  per-process shared memory data structures
5  *
6  *
7  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * src/include/storage/proc.h
11  *
12  *-------------------------------------------------------------------------
13  */
14 #ifndef _PROC_H_
15 #define _PROC_H_
16 
17 #include "access/clog.h"
18 #include "access/xlogdefs.h"
19 #include "lib/ilist.h"
20 #include "storage/latch.h"
21 #include "storage/lock.h"
22 #include "storage/pg_sema.h"
23 #include "storage/proclist_types.h"
24 
25 /*
26  * Each backend advertises up to PGPROC_MAX_CACHED_SUBXIDS TransactionIds
27  * for non-aborted subtransactions of its current top transaction.  These
28  * have to be treated as running XIDs by other backends.
29  *
30  * We also keep track of whether the cache overflowed (ie, the transaction has
31  * generated at least one subtransaction that didn't fit in the cache).
32  * If none of the caches have overflowed, we can assume that an XID that's not
33  * listed anywhere in the PGPROC array is not a running transaction.  Else we
34  * have to look at pg_subtrans.
35  */
36 #define PGPROC_MAX_CACHED_SUBXIDS 64	/* XXX guessed-at value */
37 
38 typedef struct XidCacheStatus
39 {
40 	/* number of cached subxids, never more than PGPROC_MAX_CACHED_SUBXIDS */
41 	uint8		count;
42 	/* has PGPROC->subxids overflowed */
43 	bool		overflowed;
44 } XidCacheStatus;
45 
46 struct XidCache
47 {
48 	TransactionId xids[PGPROC_MAX_CACHED_SUBXIDS];
49 };
50 
51 /*
52  * Flags for PGPROC->statusFlags and PROC_HDR->statusFlags[]
53  */
54 #define		PROC_IS_AUTOVACUUM	0x01	/* is it an autovac worker? */
55 #define		PROC_IN_VACUUM		0x02	/* currently running lazy vacuum */
56 #define		PROC_IN_SAFE_IC		0x04	/* currently running CREATE INDEX
57 										 * CONCURRENTLY or REINDEX
58 										 * CONCURRENTLY on non-expressional,
59 										 * non-partial index */
60 #define		PROC_VACUUM_FOR_WRAPAROUND	0x08	/* set by autovac only */
61 #define		PROC_IN_LOGICAL_DECODING	0x10	/* currently doing logical
62 												 * decoding outside xact */
63 
64 /* flags reset at EOXact */
65 #define		PROC_VACUUM_STATE_MASK \
66 	(PROC_IN_VACUUM | PROC_IN_SAFE_IC | PROC_VACUUM_FOR_WRAPAROUND)
67 
68 /*
69  * We allow a small number of "weak" relation locks (AccessShareLock,
70  * RowShareLock, RowExclusiveLock) to be recorded in the PGPROC structure
71  * rather than the main lock table.  This eases contention on the lock
72  * manager LWLocks.  See storage/lmgr/README for additional details.
73  */
74 #define		FP_LOCK_SLOTS_PER_BACKEND 16
75 
76 /*
77  * An invalid pgprocno.  Must be larger than the maximum number of PGPROC
78  * structures we could possibly have.  See comments for MAX_BACKENDS.
79  */
80 #define INVALID_PGPROCNO		PG_INT32_MAX
81 
82 typedef enum
83 {
84 	PROC_WAIT_STATUS_OK,
85 	PROC_WAIT_STATUS_WAITING,
86 	PROC_WAIT_STATUS_ERROR,
87 } ProcWaitStatus;
88 
89 /*
90  * Each backend has a PGPROC struct in shared memory.  There is also a list of
91  * currently-unused PGPROC structs that will be reallocated to new backends.
92  *
93  * links: list link for any list the PGPROC is in.  When waiting for a lock,
94  * the PGPROC is linked into that lock's waitProcs queue.  A recycled PGPROC
95  * is linked into ProcGlobal's freeProcs list.
96  *
97  * Note: twophase.c also sets up a dummy PGPROC struct for each currently
98  * prepared transaction.  These PGPROCs appear in the ProcArray data structure
99  * so that the prepared transactions appear to be still running and are
100  * correctly shown as holding locks.  A prepared transaction PGPROC can be
101  * distinguished from a real one at need by the fact that it has pid == 0.
102  * The semaphore and lock-activity fields in a prepared-xact PGPROC are unused,
103  * but its myProcLocks[] lists are valid.
104  *
105  * We allow many fields of this struct to be accessed without locks, such as
106  * delayChkpt and isBackgroundWorker. However, keep in mind that writing
107  * mirrored ones (see below) requires holding ProcArrayLock or XidGenLock in
108  * at least shared mode, so that pgxactoff does not change concurrently.
109  *
110  * Mirrored fields:
111  *
112  * Some fields in PGPROC (see "mirrored in ..." comment) are mirrored into an
113  * element of more densely packed ProcGlobal arrays. These arrays are indexed
114  * by PGPROC->pgxactoff. Both copies need to be maintained coherently.
115  *
116  * NB: The pgxactoff indexed value can *never* be accessed without holding
117  * locks.
118  *
119  * See PROC_HDR for details.
120  */
121 struct PGPROC
122 {
123 	/* proc->links MUST BE FIRST IN STRUCT (see ProcSleep,ProcWakeup,etc) */
124 	SHM_QUEUE	links;			/* list link if process is in a list */
125 	PGPROC	  **procgloballist; /* procglobal list that owns this PGPROC */
126 
127 	PGSemaphore sem;			/* ONE semaphore to sleep on */
128 	ProcWaitStatus waitStatus;
129 
130 	Latch		procLatch;		/* generic latch for process */
131 
132 
133 	TransactionId xid;			/* id of top-level transaction currently being
134 								 * executed by this proc, if running and XID
135 								 * is assigned; else InvalidTransactionId.
136 								 * mirrored in ProcGlobal->xids[pgxactoff] */
137 
138 	TransactionId xmin;			/* minimal running XID as it was when we were
139 								 * starting our xact, excluding LAZY VACUUM:
140 								 * vacuum must not remove tuples deleted by
141 								 * xid >= xmin ! */
142 
143 	LocalTransactionId lxid;	/* local id of top-level transaction currently
144 								 * being executed by this proc, if running;
145 								 * else InvalidLocalTransactionId */
146 	int			pid;			/* Backend's process ID; 0 if prepared xact */
147 
148 	int			pgxactoff;		/* offset into various ProcGlobal->arrays with
149 								 * data mirrored from this PGPROC */
150 	int			pgprocno;
151 
152 	/* These fields are zero while a backend is still starting up: */
153 	BackendId	backendId;		/* This backend's backend ID (if assigned) */
154 	Oid			databaseId;		/* OID of database this backend is using */
155 	Oid			roleId;			/* OID of role using this backend */
156 
157 	Oid			tempNamespaceId;	/* OID of temp schema this backend is
158 									 * using */
159 
160 	bool		isBackgroundWorker; /* true if background worker. */
161 
162 	/*
163 	 * While in hot standby mode, shows that a conflict signal has been sent
164 	 * for the current transaction. Set/cleared while holding ProcArrayLock,
165 	 * though not required. Accessed without lock, if needed.
166 	 */
167 	bool		recoveryConflictPending;
168 
169 	/* Info about LWLock the process is currently waiting for, if any. */
170 	bool		lwWaiting;		/* true if waiting for an LW lock */
171 	uint8		lwWaitMode;		/* lwlock mode being waited for */
172 	proclist_node lwWaitLink;	/* position in LW lock wait list */
173 
174 	/* Support for condition variables. */
175 	proclist_node cvWaitLink;	/* position in CV wait list */
176 
177 	/* Info about lock the process is currently waiting for, if any. */
178 	/* waitLock and waitProcLock are NULL if not currently waiting. */
179 	LOCK	   *waitLock;		/* Lock object we're sleeping on ... */
180 	PROCLOCK   *waitProcLock;	/* Per-holder info for awaited lock */
181 	LOCKMODE	waitLockMode;	/* type of lock we're waiting for */
182 	LOCKMASK	heldLocks;		/* bitmask for lock types already held on this
183 								 * lock object by this backend */
184 	pg_atomic_uint64 waitStart; /* time at which wait for lock acquisition
185 								 * started */
186 
187 	bool		delayChkpt;		/* true if this proc delays checkpoint start */
188 
189 	uint8		statusFlags;	/* this backend's status flags, see PROC_*
190 								 * above. mirrored in
191 								 * ProcGlobal->statusFlags[pgxactoff] */
192 
193 	/*
194 	 * Info to allow us to wait for synchronous replication, if needed.
195 	 * waitLSN is InvalidXLogRecPtr if not waiting; set only by user backend.
196 	 * syncRepState must not be touched except by owning process or WALSender.
197 	 * syncRepLinks used only while holding SyncRepLock.
198 	 */
199 	XLogRecPtr	waitLSN;		/* waiting for this LSN or higher */
200 	int			syncRepState;	/* wait state for sync rep */
201 	SHM_QUEUE	syncRepLinks;	/* list link if process is in syncrep queue */
202 
203 	/*
204 	 * All PROCLOCK objects for locks held or awaited by this backend are
205 	 * linked into one of these lists, according to the partition number of
206 	 * their lock.
207 	 */
208 	SHM_QUEUE	myProcLocks[NUM_LOCK_PARTITIONS];
209 
210 	XidCacheStatus subxidStatus;	/* mirrored with
211 									 * ProcGlobal->subxidStates[i] */
212 	struct XidCache subxids;	/* cache for subtransaction XIDs */
213 
214 	/* Support for group XID clearing. */
215 	/* true, if member of ProcArray group waiting for XID clear */
216 	bool		procArrayGroupMember;
217 	/* next ProcArray group member waiting for XID clear */
218 	pg_atomic_uint32 procArrayGroupNext;
219 
220 	/*
221 	 * latest transaction id among the transaction's main XID and
222 	 * subtransactions
223 	 */
224 	TransactionId procArrayGroupMemberXid;
225 
226 	uint32		wait_event_info;	/* proc's wait information */
227 
228 	/* Support for group transaction status update. */
229 	bool		clogGroupMember;	/* true, if member of clog group */
230 	pg_atomic_uint32 clogGroupNext; /* next clog group member */
231 	TransactionId clogGroupMemberXid;	/* transaction id of clog group member */
232 	XidStatus	clogGroupMemberXidStatus;	/* transaction status of clog
233 											 * group member */
234 	int			clogGroupMemberPage;	/* clog page corresponding to
235 										 * transaction id of clog group member */
236 	XLogRecPtr	clogGroupMemberLsn; /* WAL location of commit record for clog
237 									 * group member */
238 
239 	/* Lock manager data, recording fast-path locks taken by this backend. */
240 	LWLock		fpInfoLock;		/* protects per-backend fast-path state */
241 	uint64		fpLockBits;		/* lock modes held for each fast-path slot */
242 	Oid			fpRelId[FP_LOCK_SLOTS_PER_BACKEND]; /* slots for rel oids */
243 	bool		fpVXIDLock;		/* are we holding a fast-path VXID lock? */
244 	LocalTransactionId fpLocalTransactionId;	/* lxid for fast-path VXID
245 												 * lock */
246 
247 	/*
248 	 * Support for lock groups.  Use LockHashPartitionLockByProc on the group
249 	 * leader to get the LWLock protecting these fields.
250 	 */
251 	PGPROC	   *lockGroupLeader;	/* lock group leader, if I'm a member */
252 	dlist_head	lockGroupMembers;	/* list of members, if I'm a leader */
253 	dlist_node	lockGroupLink;	/* my member link, if I'm a member */
254 };
255 
256 /* NOTE: "typedef struct PGPROC PGPROC" appears in storage/lock.h. */
257 
258 
259 extern PGDLLIMPORT PGPROC *MyProc;
260 
261 /*
262  * There is one ProcGlobal struct for the whole database cluster.
263  *
264  * Adding/Removing an entry into the procarray requires holding *both*
265  * ProcArrayLock and XidGenLock in exclusive mode (in that order). Both are
266  * needed because the dense arrays (see below) are accessed from
267  * GetNewTransactionId() and GetSnapshotData(), and we don't want to add
268  * further contention by both using the same lock. Adding/Removing a procarray
269  * entry is much less frequent.
270  *
271  * Some fields in PGPROC are mirrored into more densely packed arrays (e.g.
272  * xids), with one entry for each backend. These arrays only contain entries
273  * for PGPROCs that have been added to the shared array with ProcArrayAdd()
274  * (in contrast to PGPROC array which has unused PGPROCs interspersed).
275  *
276  * The dense arrays are indexed by PGPROC->pgxactoff. Any concurrent
277  * ProcArrayAdd() / ProcArrayRemove() can lead to pgxactoff of a procarray
278  * member to change.  Therefore it is only safe to use PGPROC->pgxactoff to
279  * access the dense array while holding either ProcArrayLock or XidGenLock.
280  *
281  * As long as a PGPROC is in the procarray, the mirrored values need to be
282  * maintained in both places in a coherent manner.
283  *
284  * The denser separate arrays are beneficial for three main reasons: First, to
285  * allow for as tight loops accessing the data as possible. Second, to prevent
286  * updates of frequently changing data (e.g. xmin) from invalidating
287  * cachelines also containing less frequently changing data (e.g. xid,
288  * statusFlags). Third to condense frequently accessed data into as few
289  * cachelines as possible.
290  *
291  * There are two main reasons to have the data mirrored between these dense
292  * arrays and PGPROC. First, as explained above, a PGPROC's array entries can
293  * only be accessed with either ProcArrayLock or XidGenLock held, whereas the
294  * PGPROC entries do not require that (obviously there may still be locking
295  * requirements around the individual field, separate from the concerns
296  * here). That is particularly important for a backend to efficiently checks
297  * it own values, which it often can safely do without locking.  Second, the
298  * PGPROC fields allow to avoid unnecessary accesses and modification to the
299  * dense arrays. A backend's own PGPROC is more likely to be in a local cache,
300  * whereas the cachelines for the dense array will be modified by other
301  * backends (often removing it from the cache for other cores/sockets). At
302  * commit/abort time a check of the PGPROC value can avoid accessing/dirtying
303  * the corresponding array value.
304  *
305  * Basically it makes sense to access the PGPROC variable when checking a
306  * single backend's data, especially when already looking at the PGPROC for
307  * other reasons already.  It makes sense to look at the "dense" arrays if we
308  * need to look at many / most entries, because we then benefit from the
309  * reduced indirection and better cross-process cache-ability.
310  *
311  * When entering a PGPROC for 2PC transactions with ProcArrayAdd(), the data
312  * in the dense arrays is initialized from the PGPROC while it already holds
313  * ProcArrayLock.
314  */
315 typedef struct PROC_HDR
316 {
317 	/* Array of PGPROC structures (not including dummies for prepared txns) */
318 	PGPROC	   *allProcs;
319 
320 	/* Array mirroring PGPROC.xid for each PGPROC currently in the procarray */
321 	TransactionId *xids;
322 
323 	/*
324 	 * Array mirroring PGPROC.subxidStatus for each PGPROC currently in the
325 	 * procarray.
326 	 */
327 	XidCacheStatus *subxidStates;
328 
329 	/*
330 	 * Array mirroring PGPROC.statusFlags for each PGPROC currently in the
331 	 * procarray.
332 	 */
333 	uint8	   *statusFlags;
334 
335 	/* Length of allProcs array */
336 	uint32		allProcCount;
337 	/* Head of list of free PGPROC structures */
338 	PGPROC	   *freeProcs;
339 	/* Head of list of autovacuum's free PGPROC structures */
340 	PGPROC	   *autovacFreeProcs;
341 	/* Head of list of bgworker free PGPROC structures */
342 	PGPROC	   *bgworkerFreeProcs;
343 	/* Head of list of walsender free PGPROC structures */
344 	PGPROC	   *walsenderFreeProcs;
345 	/* First pgproc waiting for group XID clear */
346 	pg_atomic_uint32 procArrayGroupFirst;
347 	/* First pgproc waiting for group transaction status update */
348 	pg_atomic_uint32 clogGroupFirst;
349 	/* WALWriter process's latch */
350 	Latch	   *walwriterLatch;
351 	/* Checkpointer process's latch */
352 	Latch	   *checkpointerLatch;
353 	/* Current shared estimate of appropriate spins_per_delay value */
354 	int			spins_per_delay;
355 	/* The proc of the Startup process, since not in ProcArray */
356 	PGPROC	   *startupProc;
357 	int			startupProcPid;
358 	/* Buffer id of the buffer that Startup process waits for pin on, or -1 */
359 	int			startupBufferPinWaitBufId;
360 } PROC_HDR;
361 
362 extern PGDLLIMPORT PROC_HDR *ProcGlobal;
363 
364 extern PGPROC *PreparedXactProcs;
365 
366 /* Accessor for PGPROC given a pgprocno. */
367 #define GetPGProcByNumber(n) (&ProcGlobal->allProcs[(n)])
368 
369 /*
370  * We set aside some extra PGPROC structures for auxiliary processes,
371  * ie things that aren't full-fledged backends but need shmem access.
372  *
373  * Background writer, checkpointer, WAL writer and archiver run during normal
374  * operation.  Startup process and WAL receiver also consume 2 slots, but WAL
375  * writer is launched only after startup has exited, so we only need 5 slots.
376  */
377 #define NUM_AUXILIARY_PROCS		5
378 
379 /* configurable options */
380 extern PGDLLIMPORT int DeadlockTimeout;
381 extern PGDLLIMPORT int StatementTimeout;
382 extern PGDLLIMPORT int LockTimeout;
383 extern PGDLLIMPORT int IdleInTransactionSessionTimeout;
384 extern PGDLLIMPORT int IdleSessionTimeout;
385 extern bool log_lock_waits;
386 
387 
388 /*
389  * Function Prototypes
390  */
391 extern int	ProcGlobalSemas(void);
392 extern Size ProcGlobalShmemSize(void);
393 extern void InitProcGlobal(void);
394 extern void InitProcess(void);
395 extern void InitProcessPhase2(void);
396 extern void InitAuxiliaryProcess(void);
397 
398 extern void PublishStartupProcessInformation(void);
399 extern void SetStartupBufferPinWaitBufId(int bufid);
400 extern int	GetStartupBufferPinWaitBufId(void);
401 
402 extern bool HaveNFreeProcs(int n);
403 extern void ProcReleaseLocks(bool isCommit);
404 
405 extern void ProcQueueInit(PROC_QUEUE *queue);
406 extern ProcWaitStatus ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable);
407 extern PGPROC *ProcWakeup(PGPROC *proc, ProcWaitStatus waitStatus);
408 extern void ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock);
409 extern void CheckDeadLockAlert(void);
410 extern bool IsWaitingForLock(void);
411 extern void LockErrorCleanup(void);
412 
413 extern void ProcWaitForSignal(uint32 wait_event_info);
414 extern void ProcSendSignal(int pid);
415 
416 extern PGPROC *AuxiliaryPidGetProc(int pid);
417 
418 extern void BecomeLockGroupLeader(void);
419 extern bool BecomeLockGroupMember(PGPROC *leader, int pid);
420 
421 #endif							/* _PROC_H_ */
422