1 /*-------------------------------------------------------------------------
2 *
3 * deadlock.c
4 * POSTGRES deadlock detection code
5 *
6 * See src/backend/storage/lmgr/README for a description of the deadlock
7 * detection and resolution algorithms.
8 *
9 *
10 * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
11 * Portions Copyright (c) 1994, Regents of the University of California
12 *
13 *
14 * IDENTIFICATION
15 * src/backend/storage/lmgr/deadlock.c
16 *
17 * Interface:
18 *
19 * DeadLockCheck()
20 * DeadLockReport()
21 * RememberSimpleDeadLock()
22 * InitDeadLockChecking()
23 *
24 *-------------------------------------------------------------------------
25 */
26 #include "postgres.h"
27
28 #include "miscadmin.h"
29 #include "pg_trace.h"
30 #include "pgstat.h"
31 #include "storage/lmgr.h"
32 #include "storage/proc.h"
33 #include "utils/memutils.h"
34
35
36 /*
37 * One edge in the waits-for graph.
38 *
39 * waiter and blocker may or may not be members of a lock group, but if either
40 * is, it will be the leader rather than any other member of the lock group.
41 * The group leaders act as representatives of the whole group even though
42 * those particular processes need not be waiting at all. There will be at
43 * least one member of the waiter's lock group on the wait queue for the given
44 * lock, maybe more.
45 */
46 typedef struct
47 {
48 PGPROC *waiter; /* the leader of the waiting lock group */
49 PGPROC *blocker; /* the leader of the group it is waiting for */
50 LOCK *lock; /* the lock being waited for */
51 int pred; /* workspace for TopoSort */
52 int link; /* workspace for TopoSort */
53 } EDGE;
54
55 /* One potential reordering of a lock's wait queue */
56 typedef struct
57 {
58 LOCK *lock; /* the lock whose wait queue is described */
59 PGPROC **procs; /* array of PGPROC *'s in new wait order */
60 int nProcs;
61 } WAIT_ORDER;
62
63 /*
64 * Information saved about each edge in a detected deadlock cycle. This
65 * is used to print a diagnostic message upon failure.
66 *
67 * Note: because we want to examine this info after releasing the lock
68 * manager's partition locks, we can't just store LOCK and PGPROC pointers;
69 * we must extract out all the info we want to be able to print.
70 */
71 typedef struct
72 {
73 LOCKTAG locktag; /* ID of awaited lock object */
74 LOCKMODE lockmode; /* type of lock we're waiting for */
75 int pid; /* PID of blocked backend */
76 } DEADLOCK_INFO;
77
78
79 static bool DeadLockCheckRecurse(PGPROC *proc);
80 static int TestConfiguration(PGPROC *startProc);
81 static bool FindLockCycle(PGPROC *checkProc,
82 EDGE *softEdges, int *nSoftEdges);
83 static bool FindLockCycleRecurse(PGPROC *checkProc, int depth,
84 EDGE *softEdges, int *nSoftEdges);
85 static bool FindLockCycleRecurseMember(PGPROC *checkProc,
86 PGPROC *checkProcLeader,
87 int depth, EDGE *softEdges, int *nSoftEdges);
88 static bool ExpandConstraints(EDGE *constraints, int nConstraints);
89 static bool TopoSort(LOCK *lock, EDGE *constraints, int nConstraints,
90 PGPROC **ordering);
91
92 #ifdef DEBUG_DEADLOCK
93 static void PrintLockQueue(LOCK *lock, const char *info);
94 #endif
95
96
97 /*
98 * Working space for the deadlock detector
99 */
100
101 /* Workspace for FindLockCycle */
102 static PGPROC **visitedProcs; /* Array of visited procs */
103 static int nVisitedProcs;
104
105 /* Workspace for TopoSort */
106 static PGPROC **topoProcs; /* Array of not-yet-output procs */
107 static int *beforeConstraints; /* Counts of remaining before-constraints */
108 static int *afterConstraints; /* List head for after-constraints */
109
110 /* Output area for ExpandConstraints */
111 static WAIT_ORDER *waitOrders; /* Array of proposed queue rearrangements */
112 static int nWaitOrders;
113 static PGPROC **waitOrderProcs; /* Space for waitOrders queue contents */
114
115 /* Current list of constraints being considered */
116 static EDGE *curConstraints;
117 static int nCurConstraints;
118 static int maxCurConstraints;
119
120 /* Storage space for results from FindLockCycle */
121 static EDGE *possibleConstraints;
122 static int nPossibleConstraints;
123 static int maxPossibleConstraints;
124 static DEADLOCK_INFO *deadlockDetails;
125 static int nDeadlockDetails;
126
127 /* PGPROC pointer of any blocking autovacuum worker found */
128 static PGPROC *blocking_autovacuum_proc = NULL;
129
130
131 /*
132 * InitDeadLockChecking -- initialize deadlock checker during backend startup
133 *
134 * This does per-backend initialization of the deadlock checker; primarily,
135 * allocation of working memory for DeadLockCheck. We do this per-backend
136 * since there's no percentage in making the kernel do copy-on-write
137 * inheritance of workspace from the postmaster. We want to allocate the
138 * space at startup because (a) the deadlock checker might be invoked when
139 * there's no free memory left, and (b) the checker is normally run inside a
140 * signal handler, which is a very dangerous place to invoke palloc from.
141 */
142 void
InitDeadLockChecking(void)143 InitDeadLockChecking(void)
144 {
145 MemoryContext oldcxt;
146
147 /* Make sure allocations are permanent */
148 oldcxt = MemoryContextSwitchTo(TopMemoryContext);
149
150 /*
151 * FindLockCycle needs at most MaxBackends entries in visitedProcs[] and
152 * deadlockDetails[].
153 */
154 visitedProcs = (PGPROC **) palloc(MaxBackends * sizeof(PGPROC *));
155 deadlockDetails = (DEADLOCK_INFO *) palloc(MaxBackends * sizeof(DEADLOCK_INFO));
156
157 /*
158 * TopoSort needs to consider at most MaxBackends wait-queue entries, and
159 * it needn't run concurrently with FindLockCycle.
160 */
161 topoProcs = visitedProcs; /* re-use this space */
162 beforeConstraints = (int *) palloc(MaxBackends * sizeof(int));
163 afterConstraints = (int *) palloc(MaxBackends * sizeof(int));
164
165 /*
166 * We need to consider rearranging at most MaxBackends/2 wait queues
167 * (since it takes at least two waiters in a queue to create a soft edge),
168 * and the expanded form of the wait queues can't involve more than
169 * MaxBackends total waiters.
170 */
171 waitOrders = (WAIT_ORDER *)
172 palloc((MaxBackends / 2) * sizeof(WAIT_ORDER));
173 waitOrderProcs = (PGPROC **) palloc(MaxBackends * sizeof(PGPROC *));
174
175 /*
176 * Allow at most MaxBackends distinct constraints in a configuration. (Is
177 * this enough? In practice it seems it should be, but I don't quite see
178 * how to prove it. If we run out, we might fail to find a workable wait
179 * queue rearrangement even though one exists.) NOTE that this number
180 * limits the maximum recursion depth of DeadLockCheckRecurse. Making it
181 * really big might potentially allow a stack-overflow problem.
182 */
183 maxCurConstraints = MaxBackends;
184 curConstraints = (EDGE *) palloc(maxCurConstraints * sizeof(EDGE));
185
186 /*
187 * Allow up to 3*MaxBackends constraints to be saved without having to
188 * re-run TestConfiguration. (This is probably more than enough, but we
189 * can survive if we run low on space by doing excess runs of
190 * TestConfiguration to re-compute constraint lists each time needed.) The
191 * last MaxBackends entries in possibleConstraints[] are reserved as
192 * output workspace for FindLockCycle.
193 */
194 maxPossibleConstraints = MaxBackends * 4;
195 possibleConstraints =
196 (EDGE *) palloc(maxPossibleConstraints * sizeof(EDGE));
197
198 MemoryContextSwitchTo(oldcxt);
199 }
200
201 /*
202 * DeadLockCheck -- Checks for deadlocks for a given process
203 *
204 * This code looks for deadlocks involving the given process. If any
205 * are found, it tries to rearrange lock wait queues to resolve the
206 * deadlock. If resolution is impossible, return DS_HARD_DEADLOCK ---
207 * the caller is then expected to abort the given proc's transaction.
208 *
209 * Caller must already have locked all partitions of the lock tables.
210 *
211 * On failure, deadlock details are recorded in deadlockDetails[] for
212 * subsequent printing by DeadLockReport(). That activity is separate
213 * because (a) we don't want to do it while holding all those LWLocks,
214 * and (b) we are typically invoked inside a signal handler.
215 */
216 DeadLockState
DeadLockCheck(PGPROC * proc)217 DeadLockCheck(PGPROC *proc)
218 {
219 int i,
220 j;
221
222 /* Initialize to "no constraints" */
223 nCurConstraints = 0;
224 nPossibleConstraints = 0;
225 nWaitOrders = 0;
226
227 /* Initialize to not blocked by an autovacuum worker */
228 blocking_autovacuum_proc = NULL;
229
230 /* Search for deadlocks and possible fixes */
231 if (DeadLockCheckRecurse(proc))
232 {
233 /*
234 * Call FindLockCycle one more time, to record the correct
235 * deadlockDetails[] for the basic state with no rearrangements.
236 */
237 int nSoftEdges;
238
239 TRACE_POSTGRESQL_DEADLOCK_FOUND();
240
241 nWaitOrders = 0;
242 if (!FindLockCycle(proc, possibleConstraints, &nSoftEdges))
243 elog(FATAL, "deadlock seems to have disappeared");
244
245 return DS_HARD_DEADLOCK; /* cannot find a non-deadlocked state */
246 }
247
248 /* Apply any needed rearrangements of wait queues */
249 for (i = 0; i < nWaitOrders; i++)
250 {
251 LOCK *lock = waitOrders[i].lock;
252 PGPROC **procs = waitOrders[i].procs;
253 int nProcs = waitOrders[i].nProcs;
254 PROC_QUEUE *waitQueue = &(lock->waitProcs);
255
256 Assert(nProcs == waitQueue->size);
257
258 #ifdef DEBUG_DEADLOCK
259 PrintLockQueue(lock, "DeadLockCheck:");
260 #endif
261
262 /* Reset the queue and re-add procs in the desired order */
263 ProcQueueInit(waitQueue);
264 for (j = 0; j < nProcs; j++)
265 {
266 SHMQueueInsertBefore(&(waitQueue->links), &(procs[j]->links));
267 waitQueue->size++;
268 }
269
270 #ifdef DEBUG_DEADLOCK
271 PrintLockQueue(lock, "rearranged to:");
272 #endif
273
274 /* See if any waiters for the lock can be woken up now */
275 ProcLockWakeup(GetLocksMethodTable(lock), lock);
276 }
277
278 /* Return code tells caller if we had to escape a deadlock or not */
279 if (nWaitOrders > 0)
280 return DS_SOFT_DEADLOCK;
281 else if (blocking_autovacuum_proc != NULL)
282 return DS_BLOCKED_BY_AUTOVACUUM;
283 else
284 return DS_NO_DEADLOCK;
285 }
286
287 /*
288 * Return the PGPROC of the autovacuum that's blocking a process.
289 *
290 * We reset the saved pointer as soon as we pass it back.
291 */
292 PGPROC *
GetBlockingAutoVacuumPgproc(void)293 GetBlockingAutoVacuumPgproc(void)
294 {
295 PGPROC *ptr;
296
297 ptr = blocking_autovacuum_proc;
298 blocking_autovacuum_proc = NULL;
299
300 return ptr;
301 }
302
303 /*
304 * DeadLockCheckRecurse -- recursively search for valid orderings
305 *
306 * curConstraints[] holds the current set of constraints being considered
307 * by an outer level of recursion. Add to this each possible solution
308 * constraint for any cycle detected at this level.
309 *
310 * Returns true if no solution exists. Returns false if a deadlock-free
311 * state is attainable, in which case waitOrders[] shows the required
312 * rearrangements of lock wait queues (if any).
313 */
314 static bool
DeadLockCheckRecurse(PGPROC * proc)315 DeadLockCheckRecurse(PGPROC *proc)
316 {
317 int nEdges;
318 int oldPossibleConstraints;
319 bool savedList;
320 int i;
321
322 nEdges = TestConfiguration(proc);
323 if (nEdges < 0)
324 return true; /* hard deadlock --- no solution */
325 if (nEdges == 0)
326 return false; /* good configuration found */
327 if (nCurConstraints >= maxCurConstraints)
328 return true; /* out of room for active constraints? */
329 oldPossibleConstraints = nPossibleConstraints;
330 if (nPossibleConstraints + nEdges + MaxBackends <= maxPossibleConstraints)
331 {
332 /* We can save the edge list in possibleConstraints[] */
333 nPossibleConstraints += nEdges;
334 savedList = true;
335 }
336 else
337 {
338 /* Not room; will need to regenerate the edges on-the-fly */
339 savedList = false;
340 }
341
342 /*
343 * Try each available soft edge as an addition to the configuration.
344 */
345 for (i = 0; i < nEdges; i++)
346 {
347 if (!savedList && i > 0)
348 {
349 /* Regenerate the list of possible added constraints */
350 if (nEdges != TestConfiguration(proc))
351 elog(FATAL, "inconsistent results during deadlock check");
352 }
353 curConstraints[nCurConstraints] =
354 possibleConstraints[oldPossibleConstraints + i];
355 nCurConstraints++;
356 if (!DeadLockCheckRecurse(proc))
357 return false; /* found a valid solution! */
358 /* give up on that added constraint, try again */
359 nCurConstraints--;
360 }
361 nPossibleConstraints = oldPossibleConstraints;
362 return true; /* no solution found */
363 }
364
365
366 /*--------------------
367 * Test a configuration (current set of constraints) for validity.
368 *
369 * Returns:
370 * 0: the configuration is good (no deadlocks)
371 * -1: the configuration has a hard deadlock or is not self-consistent
372 * >0: the configuration has one or more soft deadlocks
373 *
374 * In the soft-deadlock case, one of the soft cycles is chosen arbitrarily
375 * and a list of its soft edges is returned beginning at
376 * possibleConstraints+nPossibleConstraints. The return value is the
377 * number of soft edges.
378 *--------------------
379 */
380 static int
TestConfiguration(PGPROC * startProc)381 TestConfiguration(PGPROC *startProc)
382 {
383 int softFound = 0;
384 EDGE *softEdges = possibleConstraints + nPossibleConstraints;
385 int nSoftEdges;
386 int i;
387
388 /*
389 * Make sure we have room for FindLockCycle's output.
390 */
391 if (nPossibleConstraints + MaxBackends > maxPossibleConstraints)
392 return -1;
393
394 /*
395 * Expand current constraint set into wait orderings. Fail if the
396 * constraint set is not self-consistent.
397 */
398 if (!ExpandConstraints(curConstraints, nCurConstraints))
399 return -1;
400
401 /*
402 * Check for cycles involving startProc or any of the procs mentioned in
403 * constraints. We check startProc last because if it has a soft cycle
404 * still to be dealt with, we want to deal with that first.
405 */
406 for (i = 0; i < nCurConstraints; i++)
407 {
408 if (FindLockCycle(curConstraints[i].waiter, softEdges, &nSoftEdges))
409 {
410 if (nSoftEdges == 0)
411 return -1; /* hard deadlock detected */
412 softFound = nSoftEdges;
413 }
414 if (FindLockCycle(curConstraints[i].blocker, softEdges, &nSoftEdges))
415 {
416 if (nSoftEdges == 0)
417 return -1; /* hard deadlock detected */
418 softFound = nSoftEdges;
419 }
420 }
421 if (FindLockCycle(startProc, softEdges, &nSoftEdges))
422 {
423 if (nSoftEdges == 0)
424 return -1; /* hard deadlock detected */
425 softFound = nSoftEdges;
426 }
427 return softFound;
428 }
429
430
431 /*
432 * FindLockCycle -- basic check for deadlock cycles
433 *
434 * Scan outward from the given proc to see if there is a cycle in the
435 * waits-for graph that includes this proc. Return true if a cycle
436 * is found, else false. If a cycle is found, we return a list of
437 * the "soft edges", if any, included in the cycle. These edges could
438 * potentially be eliminated by rearranging wait queues. We also fill
439 * deadlockDetails[] with information about the detected cycle; this info
440 * is not used by the deadlock algorithm itself, only to print a useful
441 * message after failing.
442 *
443 * Since we need to be able to check hypothetical configurations that would
444 * exist after wait queue rearrangement, the routine pays attention to the
445 * table of hypothetical queue orders in waitOrders[]. These orders will
446 * be believed in preference to the actual ordering seen in the locktable.
447 */
448 static bool
FindLockCycle(PGPROC * checkProc,EDGE * softEdges,int * nSoftEdges)449 FindLockCycle(PGPROC *checkProc,
450 EDGE *softEdges, /* output argument */
451 int *nSoftEdges) /* output argument */
452 {
453 nVisitedProcs = 0;
454 nDeadlockDetails = 0;
455 *nSoftEdges = 0;
456 return FindLockCycleRecurse(checkProc, 0, softEdges, nSoftEdges);
457 }
458
459 static bool
FindLockCycleRecurse(PGPROC * checkProc,int depth,EDGE * softEdges,int * nSoftEdges)460 FindLockCycleRecurse(PGPROC *checkProc,
461 int depth,
462 EDGE *softEdges, /* output argument */
463 int *nSoftEdges) /* output argument */
464 {
465 int i;
466 dlist_iter iter;
467
468 /*
469 * If this process is a lock group member, check the leader instead. (Note
470 * that we might be the leader, in which case this is a no-op.)
471 */
472 if (checkProc->lockGroupLeader != NULL)
473 checkProc = checkProc->lockGroupLeader;
474
475 /*
476 * Have we already seen this proc?
477 */
478 for (i = 0; i < nVisitedProcs; i++)
479 {
480 if (visitedProcs[i] == checkProc)
481 {
482 /* If we return to starting point, we have a deadlock cycle */
483 if (i == 0)
484 {
485 /*
486 * record total length of cycle --- outer levels will now fill
487 * deadlockDetails[]
488 */
489 Assert(depth <= MaxBackends);
490 nDeadlockDetails = depth;
491
492 return true;
493 }
494
495 /*
496 * Otherwise, we have a cycle but it does not include the start
497 * point, so say "no deadlock".
498 */
499 return false;
500 }
501 }
502 /* Mark proc as seen */
503 Assert(nVisitedProcs < MaxBackends);
504 visitedProcs[nVisitedProcs++] = checkProc;
505
506 /*
507 * If the process is waiting, there is an outgoing waits-for edge to each
508 * process that blocks it.
509 */
510 if (checkProc->links.next != NULL && checkProc->waitLock != NULL &&
511 FindLockCycleRecurseMember(checkProc, checkProc, depth, softEdges,
512 nSoftEdges))
513 return true;
514
515 /*
516 * If the process is not waiting, there could still be outgoing waits-for
517 * edges if it is part of a lock group, because other members of the lock
518 * group might be waiting even though this process is not. (Given lock
519 * groups {A1, A2} and {B1, B2}, if A1 waits for B1 and B2 waits for A2,
520 * that is a deadlock even neither of B1 and A2 are waiting for anything.)
521 */
522 dlist_foreach(iter, &checkProc->lockGroupMembers)
523 {
524 PGPROC *memberProc;
525
526 memberProc = dlist_container(PGPROC, lockGroupLink, iter.cur);
527
528 if (memberProc->links.next != NULL && memberProc->waitLock != NULL &&
529 memberProc != checkProc &&
530 FindLockCycleRecurseMember(memberProc, checkProc, depth, softEdges,
531 nSoftEdges))
532 return true;
533 }
534
535 return false;
536 }
537
538 static bool
FindLockCycleRecurseMember(PGPROC * checkProc,PGPROC * checkProcLeader,int depth,EDGE * softEdges,int * nSoftEdges)539 FindLockCycleRecurseMember(PGPROC *checkProc,
540 PGPROC *checkProcLeader,
541 int depth,
542 EDGE *softEdges, /* output argument */
543 int *nSoftEdges) /* output argument */
544 {
545 PGPROC *proc;
546 LOCK *lock = checkProc->waitLock;
547 PROCLOCK *proclock;
548 SHM_QUEUE *procLocks;
549 LockMethod lockMethodTable;
550 PROC_QUEUE *waitQueue;
551 int queue_size;
552 int conflictMask;
553 int i;
554 int numLockModes,
555 lm;
556
557 /*
558 * The relation extension or page lock can never participate in actual
559 * deadlock cycle. See Asserts in LockAcquireExtended. So, there is no
560 * advantage in checking wait edges from them.
561 */
562 if (LOCK_LOCKTAG(*lock) == LOCKTAG_RELATION_EXTEND ||
563 (LOCK_LOCKTAG(*lock) == LOCKTAG_PAGE))
564 return false;
565
566 lockMethodTable = GetLocksMethodTable(lock);
567 numLockModes = lockMethodTable->numLockModes;
568 conflictMask = lockMethodTable->conflictTab[checkProc->waitLockMode];
569
570 /*
571 * Scan for procs that already hold conflicting locks. These are "hard"
572 * edges in the waits-for graph.
573 */
574 procLocks = &(lock->procLocks);
575
576 proclock = (PROCLOCK *) SHMQueueNext(procLocks, procLocks,
577 offsetof(PROCLOCK, lockLink));
578
579 while (proclock)
580 {
581 PGPROC *leader;
582
583 proc = proclock->tag.myProc;
584 leader = proc->lockGroupLeader == NULL ? proc : proc->lockGroupLeader;
585
586 /* A proc never blocks itself or any other lock group member */
587 if (leader != checkProcLeader)
588 {
589 for (lm = 1; lm <= numLockModes; lm++)
590 {
591 if ((proclock->holdMask & LOCKBIT_ON(lm)) &&
592 (conflictMask & LOCKBIT_ON(lm)))
593 {
594 /* This proc hard-blocks checkProc */
595 if (FindLockCycleRecurse(proc, depth + 1,
596 softEdges, nSoftEdges))
597 {
598 /* fill deadlockDetails[] */
599 DEADLOCK_INFO *info = &deadlockDetails[depth];
600
601 info->locktag = lock->tag;
602 info->lockmode = checkProc->waitLockMode;
603 info->pid = checkProc->pid;
604
605 return true;
606 }
607
608 /*
609 * No deadlock here, but see if this proc is an autovacuum
610 * that is directly hard-blocking our own proc. If so,
611 * report it so that the caller can send a cancel signal
612 * to it, if appropriate. If there's more than one such
613 * proc, it's indeterminate which one will be reported.
614 *
615 * We don't touch autovacuums that are indirectly blocking
616 * us; it's up to the direct blockee to take action. This
617 * rule simplifies understanding the behavior and ensures
618 * that an autovacuum won't be canceled with less than
619 * deadlock_timeout grace period.
620 *
621 * Note we read statusFlags without any locking. This is
622 * OK only for checking the PROC_IS_AUTOVACUUM flag,
623 * because that flag is set at process start and never
624 * reset. There is logic elsewhere to avoid canceling an
625 * autovacuum that is working to prevent XID wraparound
626 * problems (which needs to read a different statusFlags
627 * bit), but we don't do that here to avoid grabbing
628 * ProcArrayLock.
629 */
630 if (checkProc == MyProc &&
631 proc->statusFlags & PROC_IS_AUTOVACUUM)
632 blocking_autovacuum_proc = proc;
633
634 /* We're done looking at this proclock */
635 break;
636 }
637 }
638 }
639
640 proclock = (PROCLOCK *) SHMQueueNext(procLocks, &proclock->lockLink,
641 offsetof(PROCLOCK, lockLink));
642 }
643
644 /*
645 * Scan for procs that are ahead of this one in the lock's wait queue.
646 * Those that have conflicting requests soft-block this one. This must be
647 * done after the hard-block search, since if another proc both hard- and
648 * soft-blocks this one, we want to call it a hard edge.
649 *
650 * If there is a proposed re-ordering of the lock's wait order, use that
651 * rather than the current wait order.
652 */
653 for (i = 0; i < nWaitOrders; i++)
654 {
655 if (waitOrders[i].lock == lock)
656 break;
657 }
658
659 if (i < nWaitOrders)
660 {
661 /* Use the given hypothetical wait queue order */
662 PGPROC **procs = waitOrders[i].procs;
663
664 queue_size = waitOrders[i].nProcs;
665
666 for (i = 0; i < queue_size; i++)
667 {
668 PGPROC *leader;
669
670 proc = procs[i];
671 leader = proc->lockGroupLeader == NULL ? proc :
672 proc->lockGroupLeader;
673
674 /*
675 * TopoSort will always return an ordering with group members
676 * adjacent to each other in the wait queue (see comments
677 * therein). So, as soon as we reach a process in the same lock
678 * group as checkProc, we know we've found all the conflicts that
679 * precede any member of the lock group lead by checkProcLeader.
680 */
681 if (leader == checkProcLeader)
682 break;
683
684 /* Is there a conflict with this guy's request? */
685 if ((LOCKBIT_ON(proc->waitLockMode) & conflictMask) != 0)
686 {
687 /* This proc soft-blocks checkProc */
688 if (FindLockCycleRecurse(proc, depth + 1,
689 softEdges, nSoftEdges))
690 {
691 /* fill deadlockDetails[] */
692 DEADLOCK_INFO *info = &deadlockDetails[depth];
693
694 info->locktag = lock->tag;
695 info->lockmode = checkProc->waitLockMode;
696 info->pid = checkProc->pid;
697
698 /*
699 * Add this edge to the list of soft edges in the cycle
700 */
701 Assert(*nSoftEdges < MaxBackends);
702 softEdges[*nSoftEdges].waiter = checkProcLeader;
703 softEdges[*nSoftEdges].blocker = leader;
704 softEdges[*nSoftEdges].lock = lock;
705 (*nSoftEdges)++;
706 return true;
707 }
708 }
709 }
710 }
711 else
712 {
713 PGPROC *lastGroupMember = NULL;
714
715 /* Use the true lock wait queue order */
716 waitQueue = &(lock->waitProcs);
717
718 /*
719 * Find the last member of the lock group that is present in the wait
720 * queue. Anything after this is not a soft lock conflict. If group
721 * locking is not in use, then we know immediately which process we're
722 * looking for, but otherwise we've got to search the wait queue to
723 * find the last process actually present.
724 */
725 if (checkProc->lockGroupLeader == NULL)
726 lastGroupMember = checkProc;
727 else
728 {
729 proc = (PGPROC *) waitQueue->links.next;
730 queue_size = waitQueue->size;
731 while (queue_size-- > 0)
732 {
733 if (proc->lockGroupLeader == checkProcLeader)
734 lastGroupMember = proc;
735 proc = (PGPROC *) proc->links.next;
736 }
737 Assert(lastGroupMember != NULL);
738 }
739
740 /*
741 * OK, now rescan (or scan) the queue to identify the soft conflicts.
742 */
743 queue_size = waitQueue->size;
744 proc = (PGPROC *) waitQueue->links.next;
745 while (queue_size-- > 0)
746 {
747 PGPROC *leader;
748
749 leader = proc->lockGroupLeader == NULL ? proc :
750 proc->lockGroupLeader;
751
752 /* Done when we reach the target proc */
753 if (proc == lastGroupMember)
754 break;
755
756 /* Is there a conflict with this guy's request? */
757 if ((LOCKBIT_ON(proc->waitLockMode) & conflictMask) != 0 &&
758 leader != checkProcLeader)
759 {
760 /* This proc soft-blocks checkProc */
761 if (FindLockCycleRecurse(proc, depth + 1,
762 softEdges, nSoftEdges))
763 {
764 /* fill deadlockDetails[] */
765 DEADLOCK_INFO *info = &deadlockDetails[depth];
766
767 info->locktag = lock->tag;
768 info->lockmode = checkProc->waitLockMode;
769 info->pid = checkProc->pid;
770
771 /*
772 * Add this edge to the list of soft edges in the cycle
773 */
774 Assert(*nSoftEdges < MaxBackends);
775 softEdges[*nSoftEdges].waiter = checkProcLeader;
776 softEdges[*nSoftEdges].blocker = leader;
777 softEdges[*nSoftEdges].lock = lock;
778 (*nSoftEdges)++;
779 return true;
780 }
781 }
782
783 proc = (PGPROC *) proc->links.next;
784 }
785 }
786
787 /*
788 * No conflict detected here.
789 */
790 return false;
791 }
792
793
794 /*
795 * ExpandConstraints -- expand a list of constraints into a set of
796 * specific new orderings for affected wait queues
797 *
798 * Input is a list of soft edges to be reversed. The output is a list
799 * of nWaitOrders WAIT_ORDER structs in waitOrders[], with PGPROC array
800 * workspace in waitOrderProcs[].
801 *
802 * Returns true if able to build an ordering that satisfies all the
803 * constraints, false if not (there are contradictory constraints).
804 */
805 static bool
ExpandConstraints(EDGE * constraints,int nConstraints)806 ExpandConstraints(EDGE *constraints,
807 int nConstraints)
808 {
809 int nWaitOrderProcs = 0;
810 int i,
811 j;
812
813 nWaitOrders = 0;
814
815 /*
816 * Scan constraint list backwards. This is because the last-added
817 * constraint is the only one that could fail, and so we want to test it
818 * for inconsistency first.
819 */
820 for (i = nConstraints; --i >= 0;)
821 {
822 LOCK *lock = constraints[i].lock;
823
824 /* Did we already make a list for this lock? */
825 for (j = nWaitOrders; --j >= 0;)
826 {
827 if (waitOrders[j].lock == lock)
828 break;
829 }
830 if (j >= 0)
831 continue;
832 /* No, so allocate a new list */
833 waitOrders[nWaitOrders].lock = lock;
834 waitOrders[nWaitOrders].procs = waitOrderProcs + nWaitOrderProcs;
835 waitOrders[nWaitOrders].nProcs = lock->waitProcs.size;
836 nWaitOrderProcs += lock->waitProcs.size;
837 Assert(nWaitOrderProcs <= MaxBackends);
838
839 /*
840 * Do the topo sort. TopoSort need not examine constraints after this
841 * one, since they must be for different locks.
842 */
843 if (!TopoSort(lock, constraints, i + 1,
844 waitOrders[nWaitOrders].procs))
845 return false;
846 nWaitOrders++;
847 }
848 return true;
849 }
850
851
852 /*
853 * TopoSort -- topological sort of a wait queue
854 *
855 * Generate a re-ordering of a lock's wait queue that satisfies given
856 * constraints about certain procs preceding others. (Each such constraint
857 * is a fact of a partial ordering.) Minimize rearrangement of the queue
858 * not needed to achieve the partial ordering.
859 *
860 * This is a lot simpler and slower than, for example, the topological sort
861 * algorithm shown in Knuth's Volume 1. However, Knuth's method doesn't
862 * try to minimize the damage to the existing order. In practice we are
863 * not likely to be working with more than a few constraints, so the apparent
864 * slowness of the algorithm won't really matter.
865 *
866 * The initial queue ordering is taken directly from the lock's wait queue.
867 * The output is an array of PGPROC pointers, of length equal to the lock's
868 * wait queue length (the caller is responsible for providing this space).
869 * The partial order is specified by an array of EDGE structs. Each EDGE
870 * is one that we need to reverse, therefore the "waiter" must appear before
871 * the "blocker" in the output array. The EDGE array may well contain
872 * edges associated with other locks; these should be ignored.
873 *
874 * Returns true if able to build an ordering that satisfies all the
875 * constraints, false if not (there are contradictory constraints).
876 */
877 static bool
TopoSort(LOCK * lock,EDGE * constraints,int nConstraints,PGPROC ** ordering)878 TopoSort(LOCK *lock,
879 EDGE *constraints,
880 int nConstraints,
881 PGPROC **ordering) /* output argument */
882 {
883 PROC_QUEUE *waitQueue = &(lock->waitProcs);
884 int queue_size = waitQueue->size;
885 PGPROC *proc;
886 int i,
887 j,
888 jj,
889 k,
890 kk,
891 last;
892
893 /* First, fill topoProcs[] array with the procs in their current order */
894 proc = (PGPROC *) waitQueue->links.next;
895 for (i = 0; i < queue_size; i++)
896 {
897 topoProcs[i] = proc;
898 proc = (PGPROC *) proc->links.next;
899 }
900
901 /*
902 * Scan the constraints, and for each proc in the array, generate a count
903 * of the number of constraints that say it must be before something else,
904 * plus a list of the constraints that say it must be after something
905 * else. The count for the j'th proc is stored in beforeConstraints[j],
906 * and the head of its list in afterConstraints[j]. Each constraint
907 * stores its list link in constraints[i].link (note any constraint will
908 * be in just one list). The array index for the before-proc of the i'th
909 * constraint is remembered in constraints[i].pred.
910 *
911 * Note that it's not necessarily the case that every constraint affects
912 * this particular wait queue. Prior to group locking, a process could be
913 * waiting for at most one lock. But a lock group can be waiting for
914 * zero, one, or multiple locks. Since topoProcs[] is an array of the
915 * processes actually waiting, while constraints[] is an array of group
916 * leaders, we've got to scan through topoProcs[] for each constraint,
917 * checking whether both a waiter and a blocker for that group are
918 * present. If so, the constraint is relevant to this wait queue; if not,
919 * it isn't.
920 */
921 MemSet(beforeConstraints, 0, queue_size * sizeof(int));
922 MemSet(afterConstraints, 0, queue_size * sizeof(int));
923 for (i = 0; i < nConstraints; i++)
924 {
925 /*
926 * Find a representative process that is on the lock queue and part of
927 * the waiting lock group. This may or may not be the leader, which
928 * may or may not be waiting at all. If there are any other processes
929 * in the same lock group on the queue, set their number of
930 * beforeConstraints to -1 to indicate that they should be emitted
931 * with their groupmates rather than considered separately.
932 *
933 * In this loop and the similar one just below, it's critical that we
934 * consistently select the same representative member of any one lock
935 * group, so that all the constraints are associated with the same
936 * proc, and the -1's are only associated with not-representative
937 * members. We select the last one in the topoProcs array.
938 */
939 proc = constraints[i].waiter;
940 Assert(proc != NULL);
941 jj = -1;
942 for (j = queue_size; --j >= 0;)
943 {
944 PGPROC *waiter = topoProcs[j];
945
946 if (waiter == proc || waiter->lockGroupLeader == proc)
947 {
948 Assert(waiter->waitLock == lock);
949 if (jj == -1)
950 jj = j;
951 else
952 {
953 Assert(beforeConstraints[j] <= 0);
954 beforeConstraints[j] = -1;
955 }
956 }
957 }
958
959 /* If no matching waiter, constraint is not relevant to this lock. */
960 if (jj < 0)
961 continue;
962
963 /*
964 * Similarly, find a representative process that is on the lock queue
965 * and waiting for the blocking lock group. Again, this could be the
966 * leader but does not need to be.
967 */
968 proc = constraints[i].blocker;
969 Assert(proc != NULL);
970 kk = -1;
971 for (k = queue_size; --k >= 0;)
972 {
973 PGPROC *blocker = topoProcs[k];
974
975 if (blocker == proc || blocker->lockGroupLeader == proc)
976 {
977 Assert(blocker->waitLock == lock);
978 if (kk == -1)
979 kk = k;
980 else
981 {
982 Assert(beforeConstraints[k] <= 0);
983 beforeConstraints[k] = -1;
984 }
985 }
986 }
987
988 /* If no matching blocker, constraint is not relevant to this lock. */
989 if (kk < 0)
990 continue;
991
992 Assert(beforeConstraints[jj] >= 0);
993 beforeConstraints[jj]++; /* waiter must come before */
994 /* add this constraint to list of after-constraints for blocker */
995 constraints[i].pred = jj;
996 constraints[i].link = afterConstraints[kk];
997 afterConstraints[kk] = i + 1;
998 }
999
1000 /*--------------------
1001 * Now scan the topoProcs array backwards. At each step, output the
1002 * last proc that has no remaining before-constraints plus any other
1003 * members of the same lock group; then decrease the beforeConstraints
1004 * count of each of the procs it was constrained against.
1005 * i = index of ordering[] entry we want to output this time
1006 * j = search index for topoProcs[]
1007 * k = temp for scanning constraint list for proc j
1008 * last = last non-null index in topoProcs (avoid redundant searches)
1009 *--------------------
1010 */
1011 last = queue_size - 1;
1012 for (i = queue_size - 1; i >= 0;)
1013 {
1014 int c;
1015 int nmatches = 0;
1016
1017 /* Find next candidate to output */
1018 while (topoProcs[last] == NULL)
1019 last--;
1020 for (j = last; j >= 0; j--)
1021 {
1022 if (topoProcs[j] != NULL && beforeConstraints[j] == 0)
1023 break;
1024 }
1025
1026 /* If no available candidate, topological sort fails */
1027 if (j < 0)
1028 return false;
1029
1030 /*
1031 * Output everything in the lock group. There's no point in
1032 * outputting an ordering where members of the same lock group are not
1033 * consecutive on the wait queue: if some other waiter is between two
1034 * requests that belong to the same group, then either it conflicts
1035 * with both of them and is certainly not a solution; or it conflicts
1036 * with at most one of them and is thus isomorphic to an ordering
1037 * where the group members are consecutive.
1038 */
1039 proc = topoProcs[j];
1040 if (proc->lockGroupLeader != NULL)
1041 proc = proc->lockGroupLeader;
1042 Assert(proc != NULL);
1043 for (c = 0; c <= last; ++c)
1044 {
1045 if (topoProcs[c] == proc || (topoProcs[c] != NULL &&
1046 topoProcs[c]->lockGroupLeader == proc))
1047 {
1048 ordering[i - nmatches] = topoProcs[c];
1049 topoProcs[c] = NULL;
1050 ++nmatches;
1051 }
1052 }
1053 Assert(nmatches > 0);
1054 i -= nmatches;
1055
1056 /* Update beforeConstraints counts of its predecessors */
1057 for (k = afterConstraints[j]; k > 0; k = constraints[k - 1].link)
1058 beforeConstraints[constraints[k - 1].pred]--;
1059 }
1060
1061 /* Done */
1062 return true;
1063 }
1064
1065 #ifdef DEBUG_DEADLOCK
1066 static void
PrintLockQueue(LOCK * lock,const char * info)1067 PrintLockQueue(LOCK *lock, const char *info)
1068 {
1069 PROC_QUEUE *waitQueue = &(lock->waitProcs);
1070 int queue_size = waitQueue->size;
1071 PGPROC *proc;
1072 int i;
1073
1074 printf("%s lock %p queue ", info, lock);
1075 proc = (PGPROC *) waitQueue->links.next;
1076 for (i = 0; i < queue_size; i++)
1077 {
1078 printf(" %d", proc->pid);
1079 proc = (PGPROC *) proc->links.next;
1080 }
1081 printf("\n");
1082 fflush(stdout);
1083 }
1084 #endif
1085
1086 /*
1087 * Report a detected deadlock, with available details.
1088 */
1089 void
DeadLockReport(void)1090 DeadLockReport(void)
1091 {
1092 StringInfoData clientbuf; /* errdetail for client */
1093 StringInfoData logbuf; /* errdetail for server log */
1094 StringInfoData locktagbuf;
1095 int i;
1096
1097 initStringInfo(&clientbuf);
1098 initStringInfo(&logbuf);
1099 initStringInfo(&locktagbuf);
1100
1101 /* Generate the "waits for" lines sent to the client */
1102 for (i = 0; i < nDeadlockDetails; i++)
1103 {
1104 DEADLOCK_INFO *info = &deadlockDetails[i];
1105 int nextpid;
1106
1107 /* The last proc waits for the first one... */
1108 if (i < nDeadlockDetails - 1)
1109 nextpid = info[1].pid;
1110 else
1111 nextpid = deadlockDetails[0].pid;
1112
1113 /* reset locktagbuf to hold next object description */
1114 resetStringInfo(&locktagbuf);
1115
1116 DescribeLockTag(&locktagbuf, &info->locktag);
1117
1118 if (i > 0)
1119 appendStringInfoChar(&clientbuf, '\n');
1120
1121 appendStringInfo(&clientbuf,
1122 _("Process %d waits for %s on %s; blocked by process %d."),
1123 info->pid,
1124 GetLockmodeName(info->locktag.locktag_lockmethodid,
1125 info->lockmode),
1126 locktagbuf.data,
1127 nextpid);
1128 }
1129
1130 /* Duplicate all the above for the server ... */
1131 appendBinaryStringInfo(&logbuf, clientbuf.data, clientbuf.len);
1132
1133 /* ... and add info about query strings */
1134 for (i = 0; i < nDeadlockDetails; i++)
1135 {
1136 DEADLOCK_INFO *info = &deadlockDetails[i];
1137
1138 appendStringInfoChar(&logbuf, '\n');
1139
1140 appendStringInfo(&logbuf,
1141 _("Process %d: %s"),
1142 info->pid,
1143 pgstat_get_backend_current_activity(info->pid, false));
1144 }
1145
1146 pgstat_report_deadlock();
1147
1148 ereport(ERROR,
1149 (errcode(ERRCODE_T_R_DEADLOCK_DETECTED),
1150 errmsg("deadlock detected"),
1151 errdetail_internal("%s", clientbuf.data),
1152 errdetail_log("%s", logbuf.data),
1153 errhint("See server log for query details.")));
1154 }
1155
1156 /*
1157 * RememberSimpleDeadLock: set up info for DeadLockReport when ProcSleep
1158 * detects a trivial (two-way) deadlock. proc1 wants to block for lockmode
1159 * on lock, but proc2 is already waiting and would be blocked by proc1.
1160 */
1161 void
RememberSimpleDeadLock(PGPROC * proc1,LOCKMODE lockmode,LOCK * lock,PGPROC * proc2)1162 RememberSimpleDeadLock(PGPROC *proc1,
1163 LOCKMODE lockmode,
1164 LOCK *lock,
1165 PGPROC *proc2)
1166 {
1167 DEADLOCK_INFO *info = &deadlockDetails[0];
1168
1169 info->locktag = lock->tag;
1170 info->lockmode = lockmode;
1171 info->pid = proc1->pid;
1172 info++;
1173 info->locktag = proc2->waitLock->tag;
1174 info->lockmode = proc2->waitLockMode;
1175 info->pid = proc2->pid;
1176 nDeadlockDetails = 2;
1177 }
1178