1#!/usr/bin/env python3
2# Copyright (c) Facebook, Inc. and its affiliates.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#     http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16import re
17from collections import defaultdict
18from enum import Enum
19
20import gdb
21
22
23class DiGraph(object):
24    """
25    Adapted from networkx: http://networkx.github.io/
26    Represents a directed graph. Edges can store (key, value) attributes.
27    """
28
29    def __init__(self):
30        # Map of node -> set of nodes
31        self.adjacency_map = {}
32        # Map of (node1, node2) -> map string -> arbitrary attribute
33        # This will not be copied in subgraph()
34        self.attributes_map = {}
35
36    def neighbors(self, node):
37        return self.adjacency_map.get(node, set())
38
39    def edges(self):
40        edges = []
41        for node, neighbors in self.adjacency_map.items():
42            for neighbor in neighbors:
43                edges.append((node, neighbor))
44        return edges
45
46    def nodes(self):
47        return self.adjacency_map.keys()
48
49    def attributes(self, node1, node2):
50        return self.attributes_map[(node1, node2)]
51
52    def add_edge(self, node1, node2, **kwargs):
53        if node1 not in self.adjacency_map:
54            self.adjacency_map[node1] = set()
55        if node2 not in self.adjacency_map:
56            self.adjacency_map[node2] = set()
57        self.adjacency_map[node1].add(node2)
58        self.attributes_map[(node1, node2)] = kwargs
59
60    def remove_node(self, node):
61        self.adjacency_map.pop(node, None)
62        for _, neighbors in self.adjacency_map.items():
63            neighbors.discard(node)
64
65    def subgraph(self, nodes):
66        graph = DiGraph()
67        for node in nodes:
68            for neighbor in self.neighbors(node):
69                if neighbor in nodes:
70                    graph.add_edge(node, neighbor)
71        return graph
72
73    def node_link_data(self):
74        """
75        Returns the graph as a dictionary in a format that can be
76        serialized.
77        """
78        data = {
79            "directed": True,
80            "multigraph": False,
81            "graph": {},
82            "links": [],
83            "nodes": [],
84        }
85
86        # Do one pass to build a map of node -> position in nodes
87        node_to_number = {}
88        for node in self.adjacency_map.keys():
89            node_to_number[node] = len(data["nodes"])
90            data["nodes"].append({"id": node})
91
92        # Do another pass to build the link information
93        for node, neighbors in self.adjacency_map.items():
94            for neighbor in neighbors:
95                link = self.attributes_map[(node, neighbor)].copy()
96                link["source"] = node_to_number[node]
97                link["target"] = node_to_number[neighbor]
98                data["links"].append(link)
99        return data
100
101
102def strongly_connected_components(G):  # noqa: C901
103    """
104    Adapted from networkx: http://networkx.github.io/
105    Parameters
106    ----------
107    G : DiGraph
108    Returns
109    -------
110    comp : generator of sets
111        A generator of sets of nodes, one for each strongly connected
112        component of G.
113    """
114    preorder = {}
115    lowlink = {}
116    scc_found = {}
117    scc_queue = []
118    i = 0  # Preorder counter
119    for source in G.nodes():
120        if source not in scc_found:
121            queue = [source]
122            while queue:
123                v = queue[-1]
124                if v not in preorder:
125                    i = i + 1
126                    preorder[v] = i
127                done = 1
128                v_nbrs = G.neighbors(v)
129                for w in v_nbrs:
130                    if w not in preorder:
131                        queue.append(w)
132                        done = 0
133                        break
134                if done == 1:
135                    lowlink[v] = preorder[v]
136                    for w in v_nbrs:
137                        if w not in scc_found:
138                            if preorder[w] > preorder[v]:
139                                lowlink[v] = min([lowlink[v], lowlink[w]])
140                            else:
141                                lowlink[v] = min([lowlink[v], preorder[w]])
142                    queue.pop()
143                    if lowlink[v] == preorder[v]:
144                        scc_found[v] = True
145                        scc = {v}
146                        while scc_queue and preorder[scc_queue[-1]] > preorder[v]:
147                            k = scc_queue.pop()
148                            scc_found[k] = True
149                            scc.add(k)
150                        yield scc
151                    else:
152                        scc_queue.append(v)
153
154
155def simple_cycles(G):  # noqa: C901
156    """
157    Adapted from networkx: http://networkx.github.io/
158    Parameters
159    ----------
160    G : DiGraph
161    Returns
162    -------
163    cycle_generator: generator
164       A generator that produces elementary cycles of the graph.
165       Each cycle is represented by a list of nodes along the cycle.
166    """
167
168    def _unblock(thisnode, blocked, B):
169        stack = set([thisnode])
170        while stack:
171            node = stack.pop()
172            if node in blocked:
173                blocked.remove(node)
174                stack.update(B[node])
175                B[node].clear()
176
177    # Johnson's algorithm requires some ordering of the nodes.
178    # We assign the arbitrary ordering given by the strongly connected comps
179    # There is no need to track the ordering as each node removed as processed.
180    # save the actual graph so we can mutate it here
181    # We only take the edges because we do not want to
182    # copy edge and node attributes here.
183    subG = G.subgraph(G.nodes())
184    sccs = list(strongly_connected_components(subG))
185    while sccs:
186        scc = sccs.pop()
187        # order of scc determines ordering of nodes
188        startnode = scc.pop()
189        # Processing node runs 'circuit' routine from recursive version
190        path = [startnode]
191        blocked = set()  # vertex: blocked from search?
192        closed = set()  # nodes involved in a cycle
193        blocked.add(startnode)
194        B = defaultdict(set)  # graph portions that yield no elementary circuit
195        stack = [(startnode, list(subG.neighbors(startnode)))]
196        while stack:
197            thisnode, nbrs = stack[-1]
198            if nbrs:
199                nextnode = nbrs.pop()
200                if nextnode == startnode:
201                    yield path[:]
202                    closed.update(path)
203                elif nextnode not in blocked:
204                    path.append(nextnode)
205                    stack.append((nextnode, list(subG.neighbors(nextnode))))
206                    closed.discard(nextnode)
207                    blocked.add(nextnode)
208                    continue
209            # done with nextnode... look for more neighbors
210            if not nbrs:  # no more nbrs
211                if thisnode in closed:
212                    _unblock(thisnode, blocked, B)
213                else:
214                    for nbr in subG.neighbors(thisnode):
215                        if thisnode not in B[nbr]:
216                            B[nbr].add(thisnode)
217                stack.pop()
218                path.pop()
219        # done processing this node
220        subG.remove_node(startnode)
221        H = subG.subgraph(scc)  # make smaller to avoid work in SCC routine
222        sccs.extend(list(strongly_connected_components(H)))
223
224
225def find_cycle(graph):
226    """
227    Looks for a cycle in the graph. If found, returns the first cycle.
228    If nodes a1, a2, ..., an are in a cycle, then this returns:
229        [(a1,a2), (a2,a3), ... (an-1,an), (an, a1)]
230    Otherwise returns an empty list.
231    """
232    cycles = list(simple_cycles(graph))
233    if cycles:
234        nodes = cycles[0]
235        nodes.append(nodes[0])
236        edges = []
237        prev = nodes[0]
238        for node in nodes[1:]:
239            edges.append((prev, node))
240            prev = node
241        return edges
242    else:
243        return []
244
245
246def get_stacktrace(thread_id):
247    """
248    Returns the stack trace for the thread id as a list of strings.
249    """
250    gdb.execute("thread %d" % thread_id, from_tty=False, to_string=True)
251    output = gdb.execute("bt", from_tty=False, to_string=True)
252    stacktrace_lines = output.strip().split("\n")
253    return stacktrace_lines
254
255
256def is_thread_blocked_with_frame(
257    thread_id, top_line, expected_top_lines, expected_frame
258):
259    """
260    Returns True if we found expected_top_line in top_line, and
261    we found the expected_frame in the thread's stack trace.
262    """
263    if all(expected not in top_line for expected in expected_top_lines):
264        return False
265    stacktrace_lines = get_stacktrace(thread_id)
266    return any(expected_frame in line for line in stacktrace_lines)
267
268
269class MutexType(Enum):
270    """Types of mutexes that we can detect deadlocks."""
271
272    PTHREAD_MUTEX_T = "pthread_mutex_t"
273    PTHREAD_RWLOCK_T = "pthread_rwlock_t"
274
275    @staticmethod
276    def get_mutex_type(thread_id, top_line):
277        """
278        Returns the probable mutex type, based on the first line
279        of the thread's stack. Returns None if not found.
280        """
281
282        WAITLIST = [
283            "__lll_lock_wait",
284            "futex_abstimed_wait",
285            "futex_abstimed_wait_cancelable",
286            "futex_reltimed_wait",
287            "futex_reltimed_wait_cancelable",
288            "futex_wait",
289            "futex_wait_cancelable",
290        ]
291
292        if is_thread_blocked_with_frame(thread_id, top_line, WAITLIST, "pthread_mutex"):
293            return MutexType.PTHREAD_MUTEX_T
294        if is_thread_blocked_with_frame(
295            thread_id, top_line, WAITLIST, "pthread_rwlock"
296        ):
297            return MutexType.PTHREAD_RWLOCK_T
298        return None
299
300    @staticmethod
301    def get_mutex_owner_and_address_func_for_type(mutex_type):
302        """
303        Returns a function to resolve the mutex owner and address for
304        the given type. The returned function f has the following
305        signature:
306
307            f: args: (map of thread lwp -> thread id), blocked thread lwp
308               returns: (lwp of thread owning mutex, mutex address)
309                        or (None, None) if not found.
310
311        Returns None if there is no function for this mutex_type.
312        """
313        if mutex_type == MutexType.PTHREAD_MUTEX_T:
314            return get_pthread_mutex_t_owner_and_address
315        if mutex_type == MutexType.PTHREAD_RWLOCK_T:
316            return get_pthread_rwlock_t_owner_and_address
317        return None
318
319
320def print_cycle(graph, lwp_to_thread_id, cycle):
321    """Prints the threads and mutexes involved in the deadlock."""
322    for (m, n) in cycle:
323        print(
324            "Thread %d (LWP %d) is waiting on %s (0x%016x) held by "
325            "Thread %d (LWP %d)"
326            % (
327                lwp_to_thread_id[m],
328                m,
329                graph.attributes(m, n)["mutex_type"].value,
330                graph.attributes(m, n)["mutex"],
331                lwp_to_thread_id[n],
332                n,
333            )
334        )
335
336
337def get_thread_info():
338    """
339    Returns a pair of:
340    - map of LWP -> thread ID
341    - map of blocked threads LWP -> potential mutex type
342    """
343    # LWP -> thread ID
344    lwp_to_thread_id = {}
345
346    # LWP -> potential mutex type it is blocked on
347    blocked_threads = {}
348
349    output = gdb.execute("info threads", from_tty=False, to_string=True)
350    lines = output.strip().split("\n")[1:]
351    regex = re.compile(r"[\s\*]*(\d+).*Thread.*\(LWP (\d+)\).*")
352    for line in lines:
353        try:
354            thread_id = int(regex.match(line).group(1))
355            thread_lwp = int(regex.match(line).group(2))
356            lwp_to_thread_id[thread_lwp] = thread_id
357            mutex_type = MutexType.get_mutex_type(thread_id, line)
358            if mutex_type:
359                blocked_threads[thread_lwp] = mutex_type
360        except Exception:
361            continue
362
363    return (lwp_to_thread_id, blocked_threads)
364
365
366def get_pthread_mutex_t_owner_and_address(lwp_to_thread_id, thread_lwp):
367    """
368    Finds the thread holding the mutex that this thread is blocked on.
369    Returns a pair of (lwp of thread owning mutex, mutex address),
370    or (None, None) if not found.
371    """
372    # Go up the stack to the pthread_mutex_lock frame
373    gdb.execute(
374        "thread %d" % lwp_to_thread_id[thread_lwp], from_tty=False, to_string=True
375    )
376    gdb.execute("frame 1", from_tty=False, to_string=True)
377
378    # Get the owner of the mutex by inspecting the internal
379    # fields of the mutex.
380    try:
381        mutex_info = gdb.parse_and_eval("mutex").dereference()
382        mutex_owner_lwp = int(mutex_info["__data"]["__owner"])
383        return (mutex_owner_lwp, int(mutex_info.address))
384    except gdb.error:
385        return (None, None)
386
387
388def get_pthread_rwlock_t_owner_and_address(lwp_to_thread_id, thread_lwp):
389    """
390    If the thread is waiting on a write-locked pthread_rwlock_t, this will
391    return the pair of:
392        (lwp of thread that is write-owning the mutex, mutex address)
393    or (None, None) if not found, or if the mutex is read-locked.
394    """
395    # Go up the stack to the pthread_rwlock_{rd|wr}lock frame
396    gdb.execute(
397        "thread %d" % lwp_to_thread_id[thread_lwp], from_tty=False, to_string=True
398    )
399    gdb.execute("frame 2", from_tty=False, to_string=True)
400
401    # Get the owner of the mutex by inspecting the internal
402    # fields of the mutex.
403    try:
404        rwlock_info = gdb.parse_and_eval("rwlock").dereference()
405        rwlock_data = rwlock_info["__data"]
406        field_names = ["__cur_writer", "__writer"]
407        fields = rwlock_data.type.fields()
408        field = [f for f in fields if f.name in field_names][0]
409        rwlock_owner_lwp = int(rwlock_data[field])
410        # We can only track the owner if it is currently write-locked.
411        # If it is not write-locked or if it is currently read-locked,
412        # possibly by multiple threads, we cannot find the owner.
413        if rwlock_owner_lwp != 0:
414            return (rwlock_owner_lwp, int(rwlock_info.address))
415        else:
416            return (None, None)
417    except gdb.error:
418        return (None, None)
419
420
421class Deadlock(gdb.Command):
422    """Detects deadlocks"""
423
424    def __init__(self):
425        super(Deadlock, self).__init__("deadlock", gdb.COMMAND_NONE)
426
427    def invoke(self, arg, from_tty):
428        """Prints the threads and mutexes in a deadlock, if it exists."""
429        lwp_to_thread_id, blocked_threads = get_thread_info()
430
431        # Nodes represent threads. Edge (A,B) exists if thread A
432        # is waiting on a mutex held by thread B.
433        graph = DiGraph()
434
435        # Go through all the blocked threads and see which threads
436        # they are blocked on, and build the thread wait graph.
437        for thread_lwp, mutex_type in blocked_threads.items():
438            get_owner_and_address_func = (
439                MutexType.get_mutex_owner_and_address_func_for_type(mutex_type)
440            )
441            if not get_owner_and_address_func:
442                continue
443            mutex_owner_lwp, mutex_address = get_owner_and_address_func(
444                lwp_to_thread_id, thread_lwp
445            )
446            if mutex_owner_lwp and mutex_address:
447                graph.add_edge(
448                    thread_lwp,
449                    mutex_owner_lwp,
450                    mutex=mutex_address,
451                    mutex_type=mutex_type,
452                )
453
454        # A deadlock exists if there is a cycle in the graph.
455        cycle = find_cycle(graph)
456        if cycle:
457            print("Found deadlock!")
458            print_cycle(graph, lwp_to_thread_id, cycle)
459        else:
460            print("No deadlock detected. " "Do you have debug symbols installed?")
461
462
463def load():
464    # instantiate the Deadlock command
465    Deadlock()
466    print('Type "deadlock" to detect deadlocks.')
467
468
469def info():
470    return "Detect deadlocks"
471
472
473if __name__ == "__main__":
474    load()
475