1 /*-------------------------------------------------------------------------
2  *
3  * deadlock.c
4  *	  POSTGRES deadlock detection code
5  *
6  * See src/backend/storage/lmgr/README for a description of the deadlock
7  * detection and resolution algorithms.
8  *
9  *
10  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
11  * Portions Copyright (c) 1994, Regents of the University of California
12  *
13  *
14  * IDENTIFICATION
15  *	  src/backend/storage/lmgr/deadlock.c
16  *
17  *	Interface:
18  *
19  *	DeadLockCheck()
20  *	DeadLockReport()
21  *	RememberSimpleDeadLock()
22  *	InitDeadLockChecking()
23  *
24  *-------------------------------------------------------------------------
25  */
26 #include "postgres.h"
27 
28 #include "miscadmin.h"
29 #include "pg_trace.h"
30 #include "pgstat.h"
31 #include "storage/lmgr.h"
32 #include "storage/proc.h"
33 #include "utils/memutils.h"
34 
35 
36 /*
37  * One edge in the waits-for graph.
38  *
39  * waiter and blocker may or may not be members of a lock group, but if either
40  * is, it will be the leader rather than any other member of the lock group.
41  * The group leaders act as representatives of the whole group even though
42  * those particular processes need not be waiting at all.  There will be at
43  * least one member of the waiter's lock group on the wait queue for the given
44  * lock, maybe more.
45  */
46 typedef struct
47 {
48 	PGPROC	   *waiter;			/* the leader of the waiting lock group */
49 	PGPROC	   *blocker;		/* the leader of the group it is waiting for */
50 	LOCK	   *lock;			/* the lock being waited for */
51 	int			pred;			/* workspace for TopoSort */
52 	int			link;			/* workspace for TopoSort */
53 } EDGE;
54 
55 /* One potential reordering of a lock's wait queue */
56 typedef struct
57 {
58 	LOCK	   *lock;			/* the lock whose wait queue is described */
59 	PGPROC	  **procs;			/* array of PGPROC *'s in new wait order */
60 	int			nProcs;
61 } WAIT_ORDER;
62 
63 /*
64  * Information saved about each edge in a detected deadlock cycle.  This
65  * is used to print a diagnostic message upon failure.
66  *
67  * Note: because we want to examine this info after releasing the lock
68  * manager's partition locks, we can't just store LOCK and PGPROC pointers;
69  * we must extract out all the info we want to be able to print.
70  */
71 typedef struct
72 {
73 	LOCKTAG		locktag;		/* ID of awaited lock object */
74 	LOCKMODE	lockmode;		/* type of lock we're waiting for */
75 	int			pid;			/* PID of blocked backend */
76 } DEADLOCK_INFO;
77 
78 
79 static bool DeadLockCheckRecurse(PGPROC *proc);
80 static int	TestConfiguration(PGPROC *startProc);
81 static bool FindLockCycle(PGPROC *checkProc,
82 						  EDGE *softEdges, int *nSoftEdges);
83 static bool FindLockCycleRecurse(PGPROC *checkProc, int depth,
84 								 EDGE *softEdges, int *nSoftEdges);
85 static bool FindLockCycleRecurseMember(PGPROC *checkProc,
86 									   PGPROC *checkProcLeader,
87 									   int depth, EDGE *softEdges, int *nSoftEdges);
88 static bool ExpandConstraints(EDGE *constraints, int nConstraints);
89 static bool TopoSort(LOCK *lock, EDGE *constraints, int nConstraints,
90 					 PGPROC **ordering);
91 
92 #ifdef DEBUG_DEADLOCK
93 static void PrintLockQueue(LOCK *lock, const char *info);
94 #endif
95 
96 
97 /*
98  * Working space for the deadlock detector
99  */
100 
101 /* Workspace for FindLockCycle */
102 static PGPROC **visitedProcs;	/* Array of visited procs */
103 static int	nVisitedProcs;
104 
105 /* Workspace for TopoSort */
106 static PGPROC **topoProcs;		/* Array of not-yet-output procs */
107 static int *beforeConstraints;	/* Counts of remaining before-constraints */
108 static int *afterConstraints;	/* List head for after-constraints */
109 
110 /* Output area for ExpandConstraints */
111 static WAIT_ORDER *waitOrders;	/* Array of proposed queue rearrangements */
112 static int	nWaitOrders;
113 static PGPROC **waitOrderProcs; /* Space for waitOrders queue contents */
114 
115 /* Current list of constraints being considered */
116 static EDGE *curConstraints;
117 static int	nCurConstraints;
118 static int	maxCurConstraints;
119 
120 /* Storage space for results from FindLockCycle */
121 static EDGE *possibleConstraints;
122 static int	nPossibleConstraints;
123 static int	maxPossibleConstraints;
124 static DEADLOCK_INFO *deadlockDetails;
125 static int	nDeadlockDetails;
126 
127 /* PGPROC pointer of any blocking autovacuum worker found */
128 static PGPROC *blocking_autovacuum_proc = NULL;
129 
130 
131 /*
132  * InitDeadLockChecking -- initialize deadlock checker during backend startup
133  *
134  * This does per-backend initialization of the deadlock checker; primarily,
135  * allocation of working memory for DeadLockCheck.  We do this per-backend
136  * since there's no percentage in making the kernel do copy-on-write
137  * inheritance of workspace from the postmaster.  We want to allocate the
138  * space at startup because (a) the deadlock checker might be invoked when
139  * there's no free memory left, and (b) the checker is normally run inside a
140  * signal handler, which is a very dangerous place to invoke palloc from.
141  */
142 void
InitDeadLockChecking(void)143 InitDeadLockChecking(void)
144 {
145 	MemoryContext oldcxt;
146 
147 	/* Make sure allocations are permanent */
148 	oldcxt = MemoryContextSwitchTo(TopMemoryContext);
149 
150 	/*
151 	 * FindLockCycle needs at most MaxBackends entries in visitedProcs[] and
152 	 * deadlockDetails[].
153 	 */
154 	visitedProcs = (PGPROC **) palloc(MaxBackends * sizeof(PGPROC *));
155 	deadlockDetails = (DEADLOCK_INFO *) palloc(MaxBackends * sizeof(DEADLOCK_INFO));
156 
157 	/*
158 	 * TopoSort needs to consider at most MaxBackends wait-queue entries, and
159 	 * it needn't run concurrently with FindLockCycle.
160 	 */
161 	topoProcs = visitedProcs;	/* re-use this space */
162 	beforeConstraints = (int *) palloc(MaxBackends * sizeof(int));
163 	afterConstraints = (int *) palloc(MaxBackends * sizeof(int));
164 
165 	/*
166 	 * We need to consider rearranging at most MaxBackends/2 wait queues
167 	 * (since it takes at least two waiters in a queue to create a soft edge),
168 	 * and the expanded form of the wait queues can't involve more than
169 	 * MaxBackends total waiters.
170 	 */
171 	waitOrders = (WAIT_ORDER *)
172 		palloc((MaxBackends / 2) * sizeof(WAIT_ORDER));
173 	waitOrderProcs = (PGPROC **) palloc(MaxBackends * sizeof(PGPROC *));
174 
175 	/*
176 	 * Allow at most MaxBackends distinct constraints in a configuration. (Is
177 	 * this enough?  In practice it seems it should be, but I don't quite see
178 	 * how to prove it.  If we run out, we might fail to find a workable wait
179 	 * queue rearrangement even though one exists.)  NOTE that this number
180 	 * limits the maximum recursion depth of DeadLockCheckRecurse. Making it
181 	 * really big might potentially allow a stack-overflow problem.
182 	 */
183 	maxCurConstraints = MaxBackends;
184 	curConstraints = (EDGE *) palloc(maxCurConstraints * sizeof(EDGE));
185 
186 	/*
187 	 * Allow up to 3*MaxBackends constraints to be saved without having to
188 	 * re-run TestConfiguration.  (This is probably more than enough, but we
189 	 * can survive if we run low on space by doing excess runs of
190 	 * TestConfiguration to re-compute constraint lists each time needed.) The
191 	 * last MaxBackends entries in possibleConstraints[] are reserved as
192 	 * output workspace for FindLockCycle.
193 	 */
194 	maxPossibleConstraints = MaxBackends * 4;
195 	possibleConstraints =
196 		(EDGE *) palloc(maxPossibleConstraints * sizeof(EDGE));
197 
198 	MemoryContextSwitchTo(oldcxt);
199 }
200 
201 /*
202  * DeadLockCheck -- Checks for deadlocks for a given process
203  *
204  * This code looks for deadlocks involving the given process.  If any
205  * are found, it tries to rearrange lock wait queues to resolve the
206  * deadlock.  If resolution is impossible, return DS_HARD_DEADLOCK ---
207  * the caller is then expected to abort the given proc's transaction.
208  *
209  * Caller must already have locked all partitions of the lock tables.
210  *
211  * On failure, deadlock details are recorded in deadlockDetails[] for
212  * subsequent printing by DeadLockReport().  That activity is separate
213  * because (a) we don't want to do it while holding all those LWLocks,
214  * and (b) we are typically invoked inside a signal handler.
215  */
216 DeadLockState
DeadLockCheck(PGPROC * proc)217 DeadLockCheck(PGPROC *proc)
218 {
219 	int			i,
220 				j;
221 
222 	/* Initialize to "no constraints" */
223 	nCurConstraints = 0;
224 	nPossibleConstraints = 0;
225 	nWaitOrders = 0;
226 
227 	/* Initialize to not blocked by an autovacuum worker */
228 	blocking_autovacuum_proc = NULL;
229 
230 	/* Search for deadlocks and possible fixes */
231 	if (DeadLockCheckRecurse(proc))
232 	{
233 		/*
234 		 * Call FindLockCycle one more time, to record the correct
235 		 * deadlockDetails[] for the basic state with no rearrangements.
236 		 */
237 		int			nSoftEdges;
238 
239 		TRACE_POSTGRESQL_DEADLOCK_FOUND();
240 
241 		nWaitOrders = 0;
242 		if (!FindLockCycle(proc, possibleConstraints, &nSoftEdges))
243 			elog(FATAL, "deadlock seems to have disappeared");
244 
245 		return DS_HARD_DEADLOCK;	/* cannot find a non-deadlocked state */
246 	}
247 
248 	/* Apply any needed rearrangements of wait queues */
249 	for (i = 0; i < nWaitOrders; i++)
250 	{
251 		LOCK	   *lock = waitOrders[i].lock;
252 		PGPROC	  **procs = waitOrders[i].procs;
253 		int			nProcs = waitOrders[i].nProcs;
254 		PROC_QUEUE *waitQueue = &(lock->waitProcs);
255 
256 		Assert(nProcs == waitQueue->size);
257 
258 #ifdef DEBUG_DEADLOCK
259 		PrintLockQueue(lock, "DeadLockCheck:");
260 #endif
261 
262 		/* Reset the queue and re-add procs in the desired order */
263 		ProcQueueInit(waitQueue);
264 		for (j = 0; j < nProcs; j++)
265 		{
266 			SHMQueueInsertBefore(&(waitQueue->links), &(procs[j]->links));
267 			waitQueue->size++;
268 		}
269 
270 #ifdef DEBUG_DEADLOCK
271 		PrintLockQueue(lock, "rearranged to:");
272 #endif
273 
274 		/* See if any waiters for the lock can be woken up now */
275 		ProcLockWakeup(GetLocksMethodTable(lock), lock);
276 	}
277 
278 	/* Return code tells caller if we had to escape a deadlock or not */
279 	if (nWaitOrders > 0)
280 		return DS_SOFT_DEADLOCK;
281 	else if (blocking_autovacuum_proc != NULL)
282 		return DS_BLOCKED_BY_AUTOVACUUM;
283 	else
284 		return DS_NO_DEADLOCK;
285 }
286 
287 /*
288  * Return the PGPROC of the autovacuum that's blocking a process.
289  *
290  * We reset the saved pointer as soon as we pass it back.
291  */
292 PGPROC *
GetBlockingAutoVacuumPgproc(void)293 GetBlockingAutoVacuumPgproc(void)
294 {
295 	PGPROC	   *ptr;
296 
297 	ptr = blocking_autovacuum_proc;
298 	blocking_autovacuum_proc = NULL;
299 
300 	return ptr;
301 }
302 
303 /*
304  * DeadLockCheckRecurse -- recursively search for valid orderings
305  *
306  * curConstraints[] holds the current set of constraints being considered
307  * by an outer level of recursion.  Add to this each possible solution
308  * constraint for any cycle detected at this level.
309  *
310  * Returns true if no solution exists.  Returns false if a deadlock-free
311  * state is attainable, in which case waitOrders[] shows the required
312  * rearrangements of lock wait queues (if any).
313  */
314 static bool
DeadLockCheckRecurse(PGPROC * proc)315 DeadLockCheckRecurse(PGPROC *proc)
316 {
317 	int			nEdges;
318 	int			oldPossibleConstraints;
319 	bool		savedList;
320 	int			i;
321 
322 	nEdges = TestConfiguration(proc);
323 	if (nEdges < 0)
324 		return true;			/* hard deadlock --- no solution */
325 	if (nEdges == 0)
326 		return false;			/* good configuration found */
327 	if (nCurConstraints >= maxCurConstraints)
328 		return true;			/* out of room for active constraints? */
329 	oldPossibleConstraints = nPossibleConstraints;
330 	if (nPossibleConstraints + nEdges + MaxBackends <= maxPossibleConstraints)
331 	{
332 		/* We can save the edge list in possibleConstraints[] */
333 		nPossibleConstraints += nEdges;
334 		savedList = true;
335 	}
336 	else
337 	{
338 		/* Not room; will need to regenerate the edges on-the-fly */
339 		savedList = false;
340 	}
341 
342 	/*
343 	 * Try each available soft edge as an addition to the configuration.
344 	 */
345 	for (i = 0; i < nEdges; i++)
346 	{
347 		if (!savedList && i > 0)
348 		{
349 			/* Regenerate the list of possible added constraints */
350 			if (nEdges != TestConfiguration(proc))
351 				elog(FATAL, "inconsistent results during deadlock check");
352 		}
353 		curConstraints[nCurConstraints] =
354 			possibleConstraints[oldPossibleConstraints + i];
355 		nCurConstraints++;
356 		if (!DeadLockCheckRecurse(proc))
357 			return false;		/* found a valid solution! */
358 		/* give up on that added constraint, try again */
359 		nCurConstraints--;
360 	}
361 	nPossibleConstraints = oldPossibleConstraints;
362 	return true;				/* no solution found */
363 }
364 
365 
366 /*--------------------
367  * Test a configuration (current set of constraints) for validity.
368  *
369  * Returns:
370  *		0: the configuration is good (no deadlocks)
371  *	   -1: the configuration has a hard deadlock or is not self-consistent
372  *		>0: the configuration has one or more soft deadlocks
373  *
374  * In the soft-deadlock case, one of the soft cycles is chosen arbitrarily
375  * and a list of its soft edges is returned beginning at
376  * possibleConstraints+nPossibleConstraints.  The return value is the
377  * number of soft edges.
378  *--------------------
379  */
380 static int
TestConfiguration(PGPROC * startProc)381 TestConfiguration(PGPROC *startProc)
382 {
383 	int			softFound = 0;
384 	EDGE	   *softEdges = possibleConstraints + nPossibleConstraints;
385 	int			nSoftEdges;
386 	int			i;
387 
388 	/*
389 	 * Make sure we have room for FindLockCycle's output.
390 	 */
391 	if (nPossibleConstraints + MaxBackends > maxPossibleConstraints)
392 		return -1;
393 
394 	/*
395 	 * Expand current constraint set into wait orderings.  Fail if the
396 	 * constraint set is not self-consistent.
397 	 */
398 	if (!ExpandConstraints(curConstraints, nCurConstraints))
399 		return -1;
400 
401 	/*
402 	 * Check for cycles involving startProc or any of the procs mentioned in
403 	 * constraints.  We check startProc last because if it has a soft cycle
404 	 * still to be dealt with, we want to deal with that first.
405 	 */
406 	for (i = 0; i < nCurConstraints; i++)
407 	{
408 		if (FindLockCycle(curConstraints[i].waiter, softEdges, &nSoftEdges))
409 		{
410 			if (nSoftEdges == 0)
411 				return -1;		/* hard deadlock detected */
412 			softFound = nSoftEdges;
413 		}
414 		if (FindLockCycle(curConstraints[i].blocker, softEdges, &nSoftEdges))
415 		{
416 			if (nSoftEdges == 0)
417 				return -1;		/* hard deadlock detected */
418 			softFound = nSoftEdges;
419 		}
420 	}
421 	if (FindLockCycle(startProc, softEdges, &nSoftEdges))
422 	{
423 		if (nSoftEdges == 0)
424 			return -1;			/* hard deadlock detected */
425 		softFound = nSoftEdges;
426 	}
427 	return softFound;
428 }
429 
430 
431 /*
432  * FindLockCycle -- basic check for deadlock cycles
433  *
434  * Scan outward from the given proc to see if there is a cycle in the
435  * waits-for graph that includes this proc.  Return true if a cycle
436  * is found, else false.  If a cycle is found, we return a list of
437  * the "soft edges", if any, included in the cycle.  These edges could
438  * potentially be eliminated by rearranging wait queues.  We also fill
439  * deadlockDetails[] with information about the detected cycle; this info
440  * is not used by the deadlock algorithm itself, only to print a useful
441  * message after failing.
442  *
443  * Since we need to be able to check hypothetical configurations that would
444  * exist after wait queue rearrangement, the routine pays attention to the
445  * table of hypothetical queue orders in waitOrders[].  These orders will
446  * be believed in preference to the actual ordering seen in the locktable.
447  */
448 static bool
FindLockCycle(PGPROC * checkProc,EDGE * softEdges,int * nSoftEdges)449 FindLockCycle(PGPROC *checkProc,
450 			  EDGE *softEdges,	/* output argument */
451 			  int *nSoftEdges)	/* output argument */
452 {
453 	nVisitedProcs = 0;
454 	nDeadlockDetails = 0;
455 	*nSoftEdges = 0;
456 	return FindLockCycleRecurse(checkProc, 0, softEdges, nSoftEdges);
457 }
458 
459 static bool
FindLockCycleRecurse(PGPROC * checkProc,int depth,EDGE * softEdges,int * nSoftEdges)460 FindLockCycleRecurse(PGPROC *checkProc,
461 					 int depth,
462 					 EDGE *softEdges,	/* output argument */
463 					 int *nSoftEdges)	/* output argument */
464 {
465 	int			i;
466 	dlist_iter	iter;
467 
468 	/*
469 	 * If this process is a lock group member, check the leader instead. (Note
470 	 * that we might be the leader, in which case this is a no-op.)
471 	 */
472 	if (checkProc->lockGroupLeader != NULL)
473 		checkProc = checkProc->lockGroupLeader;
474 
475 	/*
476 	 * Have we already seen this proc?
477 	 */
478 	for (i = 0; i < nVisitedProcs; i++)
479 	{
480 		if (visitedProcs[i] == checkProc)
481 		{
482 			/* If we return to starting point, we have a deadlock cycle */
483 			if (i == 0)
484 			{
485 				/*
486 				 * record total length of cycle --- outer levels will now fill
487 				 * deadlockDetails[]
488 				 */
489 				Assert(depth <= MaxBackends);
490 				nDeadlockDetails = depth;
491 
492 				return true;
493 			}
494 
495 			/*
496 			 * Otherwise, we have a cycle but it does not include the start
497 			 * point, so say "no deadlock".
498 			 */
499 			return false;
500 		}
501 	}
502 	/* Mark proc as seen */
503 	Assert(nVisitedProcs < MaxBackends);
504 	visitedProcs[nVisitedProcs++] = checkProc;
505 
506 	/*
507 	 * If the process is waiting, there is an outgoing waits-for edge to each
508 	 * process that blocks it.
509 	 */
510 	if (checkProc->links.next != NULL && checkProc->waitLock != NULL &&
511 		FindLockCycleRecurseMember(checkProc, checkProc, depth, softEdges,
512 								   nSoftEdges))
513 		return true;
514 
515 	/*
516 	 * If the process is not waiting, there could still be outgoing waits-for
517 	 * edges if it is part of a lock group, because other members of the lock
518 	 * group might be waiting even though this process is not.  (Given lock
519 	 * groups {A1, A2} and {B1, B2}, if A1 waits for B1 and B2 waits for A2,
520 	 * that is a deadlock even neither of B1 and A2 are waiting for anything.)
521 	 */
522 	dlist_foreach(iter, &checkProc->lockGroupMembers)
523 	{
524 		PGPROC	   *memberProc;
525 
526 		memberProc = dlist_container(PGPROC, lockGroupLink, iter.cur);
527 
528 		if (memberProc->links.next != NULL && memberProc->waitLock != NULL &&
529 			memberProc != checkProc &&
530 			FindLockCycleRecurseMember(memberProc, checkProc, depth, softEdges,
531 									   nSoftEdges))
532 			return true;
533 	}
534 
535 	return false;
536 }
537 
538 static bool
FindLockCycleRecurseMember(PGPROC * checkProc,PGPROC * checkProcLeader,int depth,EDGE * softEdges,int * nSoftEdges)539 FindLockCycleRecurseMember(PGPROC *checkProc,
540 						   PGPROC *checkProcLeader,
541 						   int depth,
542 						   EDGE *softEdges, /* output argument */
543 						   int *nSoftEdges) /* output argument */
544 {
545 	PGPROC	   *proc;
546 	LOCK	   *lock = checkProc->waitLock;
547 	PROCLOCK   *proclock;
548 	SHM_QUEUE  *procLocks;
549 	LockMethod	lockMethodTable;
550 	PROC_QUEUE *waitQueue;
551 	int			queue_size;
552 	int			conflictMask;
553 	int			i;
554 	int			numLockModes,
555 				lm;
556 
557 	/*
558 	 * The relation extension or page lock can never participate in actual
559 	 * deadlock cycle.  See Asserts in LockAcquireExtended.  So, there is no
560 	 * advantage in checking wait edges from them.
561 	 */
562 	if (LOCK_LOCKTAG(*lock) == LOCKTAG_RELATION_EXTEND ||
563 		(LOCK_LOCKTAG(*lock) == LOCKTAG_PAGE))
564 		return false;
565 
566 	lockMethodTable = GetLocksMethodTable(lock);
567 	numLockModes = lockMethodTable->numLockModes;
568 	conflictMask = lockMethodTable->conflictTab[checkProc->waitLockMode];
569 
570 	/*
571 	 * Scan for procs that already hold conflicting locks.  These are "hard"
572 	 * edges in the waits-for graph.
573 	 */
574 	procLocks = &(lock->procLocks);
575 
576 	proclock = (PROCLOCK *) SHMQueueNext(procLocks, procLocks,
577 										 offsetof(PROCLOCK, lockLink));
578 
579 	while (proclock)
580 	{
581 		PGPROC	   *leader;
582 
583 		proc = proclock->tag.myProc;
584 		leader = proc->lockGroupLeader == NULL ? proc : proc->lockGroupLeader;
585 
586 		/* A proc never blocks itself or any other lock group member */
587 		if (leader != checkProcLeader)
588 		{
589 			for (lm = 1; lm <= numLockModes; lm++)
590 			{
591 				if ((proclock->holdMask & LOCKBIT_ON(lm)) &&
592 					(conflictMask & LOCKBIT_ON(lm)))
593 				{
594 					/* This proc hard-blocks checkProc */
595 					if (FindLockCycleRecurse(proc, depth + 1,
596 											 softEdges, nSoftEdges))
597 					{
598 						/* fill deadlockDetails[] */
599 						DEADLOCK_INFO *info = &deadlockDetails[depth];
600 
601 						info->locktag = lock->tag;
602 						info->lockmode = checkProc->waitLockMode;
603 						info->pid = checkProc->pid;
604 
605 						return true;
606 					}
607 
608 					/*
609 					 * No deadlock here, but see if this proc is an autovacuum
610 					 * that is directly hard-blocking our own proc.  If so,
611 					 * report it so that the caller can send a cancel signal
612 					 * to it, if appropriate.  If there's more than one such
613 					 * proc, it's indeterminate which one will be reported.
614 					 *
615 					 * We don't touch autovacuums that are indirectly blocking
616 					 * us; it's up to the direct blockee to take action.  This
617 					 * rule simplifies understanding the behavior and ensures
618 					 * that an autovacuum won't be canceled with less than
619 					 * deadlock_timeout grace period.
620 					 *
621 					 * Note we read statusFlags without any locking.  This is
622 					 * OK only for checking the PROC_IS_AUTOVACUUM flag,
623 					 * because that flag is set at process start and never
624 					 * reset.  There is logic elsewhere to avoid canceling an
625 					 * autovacuum that is working to prevent XID wraparound
626 					 * problems (which needs to read a different statusFlags
627 					 * bit), but we don't do that here to avoid grabbing
628 					 * ProcArrayLock.
629 					 */
630 					if (checkProc == MyProc &&
631 						proc->statusFlags & PROC_IS_AUTOVACUUM)
632 						blocking_autovacuum_proc = proc;
633 
634 					/* We're done looking at this proclock */
635 					break;
636 				}
637 			}
638 		}
639 
640 		proclock = (PROCLOCK *) SHMQueueNext(procLocks, &proclock->lockLink,
641 											 offsetof(PROCLOCK, lockLink));
642 	}
643 
644 	/*
645 	 * Scan for procs that are ahead of this one in the lock's wait queue.
646 	 * Those that have conflicting requests soft-block this one.  This must be
647 	 * done after the hard-block search, since if another proc both hard- and
648 	 * soft-blocks this one, we want to call it a hard edge.
649 	 *
650 	 * If there is a proposed re-ordering of the lock's wait order, use that
651 	 * rather than the current wait order.
652 	 */
653 	for (i = 0; i < nWaitOrders; i++)
654 	{
655 		if (waitOrders[i].lock == lock)
656 			break;
657 	}
658 
659 	if (i < nWaitOrders)
660 	{
661 		/* Use the given hypothetical wait queue order */
662 		PGPROC	  **procs = waitOrders[i].procs;
663 
664 		queue_size = waitOrders[i].nProcs;
665 
666 		for (i = 0; i < queue_size; i++)
667 		{
668 			PGPROC	   *leader;
669 
670 			proc = procs[i];
671 			leader = proc->lockGroupLeader == NULL ? proc :
672 				proc->lockGroupLeader;
673 
674 			/*
675 			 * TopoSort will always return an ordering with group members
676 			 * adjacent to each other in the wait queue (see comments
677 			 * therein). So, as soon as we reach a process in the same lock
678 			 * group as checkProc, we know we've found all the conflicts that
679 			 * precede any member of the lock group lead by checkProcLeader.
680 			 */
681 			if (leader == checkProcLeader)
682 				break;
683 
684 			/* Is there a conflict with this guy's request? */
685 			if ((LOCKBIT_ON(proc->waitLockMode) & conflictMask) != 0)
686 			{
687 				/* This proc soft-blocks checkProc */
688 				if (FindLockCycleRecurse(proc, depth + 1,
689 										 softEdges, nSoftEdges))
690 				{
691 					/* fill deadlockDetails[] */
692 					DEADLOCK_INFO *info = &deadlockDetails[depth];
693 
694 					info->locktag = lock->tag;
695 					info->lockmode = checkProc->waitLockMode;
696 					info->pid = checkProc->pid;
697 
698 					/*
699 					 * Add this edge to the list of soft edges in the cycle
700 					 */
701 					Assert(*nSoftEdges < MaxBackends);
702 					softEdges[*nSoftEdges].waiter = checkProcLeader;
703 					softEdges[*nSoftEdges].blocker = leader;
704 					softEdges[*nSoftEdges].lock = lock;
705 					(*nSoftEdges)++;
706 					return true;
707 				}
708 			}
709 		}
710 	}
711 	else
712 	{
713 		PGPROC	   *lastGroupMember = NULL;
714 
715 		/* Use the true lock wait queue order */
716 		waitQueue = &(lock->waitProcs);
717 
718 		/*
719 		 * Find the last member of the lock group that is present in the wait
720 		 * queue.  Anything after this is not a soft lock conflict. If group
721 		 * locking is not in use, then we know immediately which process we're
722 		 * looking for, but otherwise we've got to search the wait queue to
723 		 * find the last process actually present.
724 		 */
725 		if (checkProc->lockGroupLeader == NULL)
726 			lastGroupMember = checkProc;
727 		else
728 		{
729 			proc = (PGPROC *) waitQueue->links.next;
730 			queue_size = waitQueue->size;
731 			while (queue_size-- > 0)
732 			{
733 				if (proc->lockGroupLeader == checkProcLeader)
734 					lastGroupMember = proc;
735 				proc = (PGPROC *) proc->links.next;
736 			}
737 			Assert(lastGroupMember != NULL);
738 		}
739 
740 		/*
741 		 * OK, now rescan (or scan) the queue to identify the soft conflicts.
742 		 */
743 		queue_size = waitQueue->size;
744 		proc = (PGPROC *) waitQueue->links.next;
745 		while (queue_size-- > 0)
746 		{
747 			PGPROC	   *leader;
748 
749 			leader = proc->lockGroupLeader == NULL ? proc :
750 				proc->lockGroupLeader;
751 
752 			/* Done when we reach the target proc */
753 			if (proc == lastGroupMember)
754 				break;
755 
756 			/* Is there a conflict with this guy's request? */
757 			if ((LOCKBIT_ON(proc->waitLockMode) & conflictMask) != 0 &&
758 				leader != checkProcLeader)
759 			{
760 				/* This proc soft-blocks checkProc */
761 				if (FindLockCycleRecurse(proc, depth + 1,
762 										 softEdges, nSoftEdges))
763 				{
764 					/* fill deadlockDetails[] */
765 					DEADLOCK_INFO *info = &deadlockDetails[depth];
766 
767 					info->locktag = lock->tag;
768 					info->lockmode = checkProc->waitLockMode;
769 					info->pid = checkProc->pid;
770 
771 					/*
772 					 * Add this edge to the list of soft edges in the cycle
773 					 */
774 					Assert(*nSoftEdges < MaxBackends);
775 					softEdges[*nSoftEdges].waiter = checkProcLeader;
776 					softEdges[*nSoftEdges].blocker = leader;
777 					softEdges[*nSoftEdges].lock = lock;
778 					(*nSoftEdges)++;
779 					return true;
780 				}
781 			}
782 
783 			proc = (PGPROC *) proc->links.next;
784 		}
785 	}
786 
787 	/*
788 	 * No conflict detected here.
789 	 */
790 	return false;
791 }
792 
793 
794 /*
795  * ExpandConstraints -- expand a list of constraints into a set of
796  *		specific new orderings for affected wait queues
797  *
798  * Input is a list of soft edges to be reversed.  The output is a list
799  * of nWaitOrders WAIT_ORDER structs in waitOrders[], with PGPROC array
800  * workspace in waitOrderProcs[].
801  *
802  * Returns true if able to build an ordering that satisfies all the
803  * constraints, false if not (there are contradictory constraints).
804  */
805 static bool
ExpandConstraints(EDGE * constraints,int nConstraints)806 ExpandConstraints(EDGE *constraints,
807 				  int nConstraints)
808 {
809 	int			nWaitOrderProcs = 0;
810 	int			i,
811 				j;
812 
813 	nWaitOrders = 0;
814 
815 	/*
816 	 * Scan constraint list backwards.  This is because the last-added
817 	 * constraint is the only one that could fail, and so we want to test it
818 	 * for inconsistency first.
819 	 */
820 	for (i = nConstraints; --i >= 0;)
821 	{
822 		LOCK	   *lock = constraints[i].lock;
823 
824 		/* Did we already make a list for this lock? */
825 		for (j = nWaitOrders; --j >= 0;)
826 		{
827 			if (waitOrders[j].lock == lock)
828 				break;
829 		}
830 		if (j >= 0)
831 			continue;
832 		/* No, so allocate a new list */
833 		waitOrders[nWaitOrders].lock = lock;
834 		waitOrders[nWaitOrders].procs = waitOrderProcs + nWaitOrderProcs;
835 		waitOrders[nWaitOrders].nProcs = lock->waitProcs.size;
836 		nWaitOrderProcs += lock->waitProcs.size;
837 		Assert(nWaitOrderProcs <= MaxBackends);
838 
839 		/*
840 		 * Do the topo sort.  TopoSort need not examine constraints after this
841 		 * one, since they must be for different locks.
842 		 */
843 		if (!TopoSort(lock, constraints, i + 1,
844 					  waitOrders[nWaitOrders].procs))
845 			return false;
846 		nWaitOrders++;
847 	}
848 	return true;
849 }
850 
851 
852 /*
853  * TopoSort -- topological sort of a wait queue
854  *
855  * Generate a re-ordering of a lock's wait queue that satisfies given
856  * constraints about certain procs preceding others.  (Each such constraint
857  * is a fact of a partial ordering.)  Minimize rearrangement of the queue
858  * not needed to achieve the partial ordering.
859  *
860  * This is a lot simpler and slower than, for example, the topological sort
861  * algorithm shown in Knuth's Volume 1.  However, Knuth's method doesn't
862  * try to minimize the damage to the existing order.  In practice we are
863  * not likely to be working with more than a few constraints, so the apparent
864  * slowness of the algorithm won't really matter.
865  *
866  * The initial queue ordering is taken directly from the lock's wait queue.
867  * The output is an array of PGPROC pointers, of length equal to the lock's
868  * wait queue length (the caller is responsible for providing this space).
869  * The partial order is specified by an array of EDGE structs.  Each EDGE
870  * is one that we need to reverse, therefore the "waiter" must appear before
871  * the "blocker" in the output array.  The EDGE array may well contain
872  * edges associated with other locks; these should be ignored.
873  *
874  * Returns true if able to build an ordering that satisfies all the
875  * constraints, false if not (there are contradictory constraints).
876  */
877 static bool
TopoSort(LOCK * lock,EDGE * constraints,int nConstraints,PGPROC ** ordering)878 TopoSort(LOCK *lock,
879 		 EDGE *constraints,
880 		 int nConstraints,
881 		 PGPROC **ordering)		/* output argument */
882 {
883 	PROC_QUEUE *waitQueue = &(lock->waitProcs);
884 	int			queue_size = waitQueue->size;
885 	PGPROC	   *proc;
886 	int			i,
887 				j,
888 				jj,
889 				k,
890 				kk,
891 				last;
892 
893 	/* First, fill topoProcs[] array with the procs in their current order */
894 	proc = (PGPROC *) waitQueue->links.next;
895 	for (i = 0; i < queue_size; i++)
896 	{
897 		topoProcs[i] = proc;
898 		proc = (PGPROC *) proc->links.next;
899 	}
900 
901 	/*
902 	 * Scan the constraints, and for each proc in the array, generate a count
903 	 * of the number of constraints that say it must be before something else,
904 	 * plus a list of the constraints that say it must be after something
905 	 * else. The count for the j'th proc is stored in beforeConstraints[j],
906 	 * and the head of its list in afterConstraints[j].  Each constraint
907 	 * stores its list link in constraints[i].link (note any constraint will
908 	 * be in just one list). The array index for the before-proc of the i'th
909 	 * constraint is remembered in constraints[i].pred.
910 	 *
911 	 * Note that it's not necessarily the case that every constraint affects
912 	 * this particular wait queue.  Prior to group locking, a process could be
913 	 * waiting for at most one lock.  But a lock group can be waiting for
914 	 * zero, one, or multiple locks.  Since topoProcs[] is an array of the
915 	 * processes actually waiting, while constraints[] is an array of group
916 	 * leaders, we've got to scan through topoProcs[] for each constraint,
917 	 * checking whether both a waiter and a blocker for that group are
918 	 * present.  If so, the constraint is relevant to this wait queue; if not,
919 	 * it isn't.
920 	 */
921 	MemSet(beforeConstraints, 0, queue_size * sizeof(int));
922 	MemSet(afterConstraints, 0, queue_size * sizeof(int));
923 	for (i = 0; i < nConstraints; i++)
924 	{
925 		/*
926 		 * Find a representative process that is on the lock queue and part of
927 		 * the waiting lock group.  This may or may not be the leader, which
928 		 * may or may not be waiting at all.  If there are any other processes
929 		 * in the same lock group on the queue, set their number of
930 		 * beforeConstraints to -1 to indicate that they should be emitted
931 		 * with their groupmates rather than considered separately.
932 		 *
933 		 * In this loop and the similar one just below, it's critical that we
934 		 * consistently select the same representative member of any one lock
935 		 * group, so that all the constraints are associated with the same
936 		 * proc, and the -1's are only associated with not-representative
937 		 * members.  We select the last one in the topoProcs array.
938 		 */
939 		proc = constraints[i].waiter;
940 		Assert(proc != NULL);
941 		jj = -1;
942 		for (j = queue_size; --j >= 0;)
943 		{
944 			PGPROC	   *waiter = topoProcs[j];
945 
946 			if (waiter == proc || waiter->lockGroupLeader == proc)
947 			{
948 				Assert(waiter->waitLock == lock);
949 				if (jj == -1)
950 					jj = j;
951 				else
952 				{
953 					Assert(beforeConstraints[j] <= 0);
954 					beforeConstraints[j] = -1;
955 				}
956 			}
957 		}
958 
959 		/* If no matching waiter, constraint is not relevant to this lock. */
960 		if (jj < 0)
961 			continue;
962 
963 		/*
964 		 * Similarly, find a representative process that is on the lock queue
965 		 * and waiting for the blocking lock group.  Again, this could be the
966 		 * leader but does not need to be.
967 		 */
968 		proc = constraints[i].blocker;
969 		Assert(proc != NULL);
970 		kk = -1;
971 		for (k = queue_size; --k >= 0;)
972 		{
973 			PGPROC	   *blocker = topoProcs[k];
974 
975 			if (blocker == proc || blocker->lockGroupLeader == proc)
976 			{
977 				Assert(blocker->waitLock == lock);
978 				if (kk == -1)
979 					kk = k;
980 				else
981 				{
982 					Assert(beforeConstraints[k] <= 0);
983 					beforeConstraints[k] = -1;
984 				}
985 			}
986 		}
987 
988 		/* If no matching blocker, constraint is not relevant to this lock. */
989 		if (kk < 0)
990 			continue;
991 
992 		Assert(beforeConstraints[jj] >= 0);
993 		beforeConstraints[jj]++;	/* waiter must come before */
994 		/* add this constraint to list of after-constraints for blocker */
995 		constraints[i].pred = jj;
996 		constraints[i].link = afterConstraints[kk];
997 		afterConstraints[kk] = i + 1;
998 	}
999 
1000 	/*--------------------
1001 	 * Now scan the topoProcs array backwards.  At each step, output the
1002 	 * last proc that has no remaining before-constraints plus any other
1003 	 * members of the same lock group; then decrease the beforeConstraints
1004 	 * count of each of the procs it was constrained against.
1005 	 * i = index of ordering[] entry we want to output this time
1006 	 * j = search index for topoProcs[]
1007 	 * k = temp for scanning constraint list for proc j
1008 	 * last = last non-null index in topoProcs (avoid redundant searches)
1009 	 *--------------------
1010 	 */
1011 	last = queue_size - 1;
1012 	for (i = queue_size - 1; i >= 0;)
1013 	{
1014 		int			c;
1015 		int			nmatches = 0;
1016 
1017 		/* Find next candidate to output */
1018 		while (topoProcs[last] == NULL)
1019 			last--;
1020 		for (j = last; j >= 0; j--)
1021 		{
1022 			if (topoProcs[j] != NULL && beforeConstraints[j] == 0)
1023 				break;
1024 		}
1025 
1026 		/* If no available candidate, topological sort fails */
1027 		if (j < 0)
1028 			return false;
1029 
1030 		/*
1031 		 * Output everything in the lock group.  There's no point in
1032 		 * outputting an ordering where members of the same lock group are not
1033 		 * consecutive on the wait queue: if some other waiter is between two
1034 		 * requests that belong to the same group, then either it conflicts
1035 		 * with both of them and is certainly not a solution; or it conflicts
1036 		 * with at most one of them and is thus isomorphic to an ordering
1037 		 * where the group members are consecutive.
1038 		 */
1039 		proc = topoProcs[j];
1040 		if (proc->lockGroupLeader != NULL)
1041 			proc = proc->lockGroupLeader;
1042 		Assert(proc != NULL);
1043 		for (c = 0; c <= last; ++c)
1044 		{
1045 			if (topoProcs[c] == proc || (topoProcs[c] != NULL &&
1046 										 topoProcs[c]->lockGroupLeader == proc))
1047 			{
1048 				ordering[i - nmatches] = topoProcs[c];
1049 				topoProcs[c] = NULL;
1050 				++nmatches;
1051 			}
1052 		}
1053 		Assert(nmatches > 0);
1054 		i -= nmatches;
1055 
1056 		/* Update beforeConstraints counts of its predecessors */
1057 		for (k = afterConstraints[j]; k > 0; k = constraints[k - 1].link)
1058 			beforeConstraints[constraints[k - 1].pred]--;
1059 	}
1060 
1061 	/* Done */
1062 	return true;
1063 }
1064 
1065 #ifdef DEBUG_DEADLOCK
1066 static void
PrintLockQueue(LOCK * lock,const char * info)1067 PrintLockQueue(LOCK *lock, const char *info)
1068 {
1069 	PROC_QUEUE *waitQueue = &(lock->waitProcs);
1070 	int			queue_size = waitQueue->size;
1071 	PGPROC	   *proc;
1072 	int			i;
1073 
1074 	printf("%s lock %p queue ", info, lock);
1075 	proc = (PGPROC *) waitQueue->links.next;
1076 	for (i = 0; i < queue_size; i++)
1077 	{
1078 		printf(" %d", proc->pid);
1079 		proc = (PGPROC *) proc->links.next;
1080 	}
1081 	printf("\n");
1082 	fflush(stdout);
1083 }
1084 #endif
1085 
1086 /*
1087  * Report a detected deadlock, with available details.
1088  */
1089 void
DeadLockReport(void)1090 DeadLockReport(void)
1091 {
1092 	StringInfoData clientbuf;	/* errdetail for client */
1093 	StringInfoData logbuf;		/* errdetail for server log */
1094 	StringInfoData locktagbuf;
1095 	int			i;
1096 
1097 	initStringInfo(&clientbuf);
1098 	initStringInfo(&logbuf);
1099 	initStringInfo(&locktagbuf);
1100 
1101 	/* Generate the "waits for" lines sent to the client */
1102 	for (i = 0; i < nDeadlockDetails; i++)
1103 	{
1104 		DEADLOCK_INFO *info = &deadlockDetails[i];
1105 		int			nextpid;
1106 
1107 		/* The last proc waits for the first one... */
1108 		if (i < nDeadlockDetails - 1)
1109 			nextpid = info[1].pid;
1110 		else
1111 			nextpid = deadlockDetails[0].pid;
1112 
1113 		/* reset locktagbuf to hold next object description */
1114 		resetStringInfo(&locktagbuf);
1115 
1116 		DescribeLockTag(&locktagbuf, &info->locktag);
1117 
1118 		if (i > 0)
1119 			appendStringInfoChar(&clientbuf, '\n');
1120 
1121 		appendStringInfo(&clientbuf,
1122 						 _("Process %d waits for %s on %s; blocked by process %d."),
1123 						 info->pid,
1124 						 GetLockmodeName(info->locktag.locktag_lockmethodid,
1125 										 info->lockmode),
1126 						 locktagbuf.data,
1127 						 nextpid);
1128 	}
1129 
1130 	/* Duplicate all the above for the server ... */
1131 	appendBinaryStringInfo(&logbuf, clientbuf.data, clientbuf.len);
1132 
1133 	/* ... and add info about query strings */
1134 	for (i = 0; i < nDeadlockDetails; i++)
1135 	{
1136 		DEADLOCK_INFO *info = &deadlockDetails[i];
1137 
1138 		appendStringInfoChar(&logbuf, '\n');
1139 
1140 		appendStringInfo(&logbuf,
1141 						 _("Process %d: %s"),
1142 						 info->pid,
1143 						 pgstat_get_backend_current_activity(info->pid, false));
1144 	}
1145 
1146 	pgstat_report_deadlock();
1147 
1148 	ereport(ERROR,
1149 			(errcode(ERRCODE_T_R_DEADLOCK_DETECTED),
1150 			 errmsg("deadlock detected"),
1151 			 errdetail_internal("%s", clientbuf.data),
1152 			 errdetail_log("%s", logbuf.data),
1153 			 errhint("See server log for query details.")));
1154 }
1155 
1156 /*
1157  * RememberSimpleDeadLock: set up info for DeadLockReport when ProcSleep
1158  * detects a trivial (two-way) deadlock.  proc1 wants to block for lockmode
1159  * on lock, but proc2 is already waiting and would be blocked by proc1.
1160  */
1161 void
RememberSimpleDeadLock(PGPROC * proc1,LOCKMODE lockmode,LOCK * lock,PGPROC * proc2)1162 RememberSimpleDeadLock(PGPROC *proc1,
1163 					   LOCKMODE lockmode,
1164 					   LOCK *lock,
1165 					   PGPROC *proc2)
1166 {
1167 	DEADLOCK_INFO *info = &deadlockDetails[0];
1168 
1169 	info->locktag = lock->tag;
1170 	info->lockmode = lockmode;
1171 	info->pid = proc1->pid;
1172 	info++;
1173 	info->locktag = proc2->waitLock->tag;
1174 	info->lockmode = proc2->waitLockMode;
1175 	info->pid = proc2->pid;
1176 	nDeadlockDetails = 2;
1177 }
1178