1#!/usr/bin/env python3 2# Copyright (c) Facebook, Inc. and its affiliates. 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15 16import re 17from collections import defaultdict 18from enum import Enum 19 20import gdb 21 22 23class DiGraph(object): 24 """ 25 Adapted from networkx: http://networkx.github.io/ 26 Represents a directed graph. Edges can store (key, value) attributes. 27 """ 28 29 def __init__(self): 30 # Map of node -> set of nodes 31 self.adjacency_map = {} 32 # Map of (node1, node2) -> map string -> arbitrary attribute 33 # This will not be copied in subgraph() 34 self.attributes_map = {} 35 36 def neighbors(self, node): 37 return self.adjacency_map.get(node, set()) 38 39 def edges(self): 40 edges = [] 41 for node, neighbors in self.adjacency_map.items(): 42 for neighbor in neighbors: 43 edges.append((node, neighbor)) 44 return edges 45 46 def nodes(self): 47 return self.adjacency_map.keys() 48 49 def attributes(self, node1, node2): 50 return self.attributes_map[(node1, node2)] 51 52 def add_edge(self, node1, node2, **kwargs): 53 if node1 not in self.adjacency_map: 54 self.adjacency_map[node1] = set() 55 if node2 not in self.adjacency_map: 56 self.adjacency_map[node2] = set() 57 self.adjacency_map[node1].add(node2) 58 self.attributes_map[(node1, node2)] = kwargs 59 60 def remove_node(self, node): 61 self.adjacency_map.pop(node, None) 62 for _, neighbors in self.adjacency_map.items(): 63 neighbors.discard(node) 64 65 def subgraph(self, nodes): 66 graph = DiGraph() 67 for node in nodes: 68 for neighbor in self.neighbors(node): 69 if neighbor in nodes: 70 graph.add_edge(node, neighbor) 71 return graph 72 73 def node_link_data(self): 74 """ 75 Returns the graph as a dictionary in a format that can be 76 serialized. 77 """ 78 data = { 79 "directed": True, 80 "multigraph": False, 81 "graph": {}, 82 "links": [], 83 "nodes": [], 84 } 85 86 # Do one pass to build a map of node -> position in nodes 87 node_to_number = {} 88 for node in self.adjacency_map.keys(): 89 node_to_number[node] = len(data["nodes"]) 90 data["nodes"].append({"id": node}) 91 92 # Do another pass to build the link information 93 for node, neighbors in self.adjacency_map.items(): 94 for neighbor in neighbors: 95 link = self.attributes_map[(node, neighbor)].copy() 96 link["source"] = node_to_number[node] 97 link["target"] = node_to_number[neighbor] 98 data["links"].append(link) 99 return data 100 101 102def strongly_connected_components(G): # noqa: C901 103 """ 104 Adapted from networkx: http://networkx.github.io/ 105 Parameters 106 ---------- 107 G : DiGraph 108 Returns 109 ------- 110 comp : generator of sets 111 A generator of sets of nodes, one for each strongly connected 112 component of G. 113 """ 114 preorder = {} 115 lowlink = {} 116 scc_found = {} 117 scc_queue = [] 118 i = 0 # Preorder counter 119 for source in G.nodes(): 120 if source not in scc_found: 121 queue = [source] 122 while queue: 123 v = queue[-1] 124 if v not in preorder: 125 i = i + 1 126 preorder[v] = i 127 done = 1 128 v_nbrs = G.neighbors(v) 129 for w in v_nbrs: 130 if w not in preorder: 131 queue.append(w) 132 done = 0 133 break 134 if done == 1: 135 lowlink[v] = preorder[v] 136 for w in v_nbrs: 137 if w not in scc_found: 138 if preorder[w] > preorder[v]: 139 lowlink[v] = min([lowlink[v], lowlink[w]]) 140 else: 141 lowlink[v] = min([lowlink[v], preorder[w]]) 142 queue.pop() 143 if lowlink[v] == preorder[v]: 144 scc_found[v] = True 145 scc = {v} 146 while scc_queue and preorder[scc_queue[-1]] > preorder[v]: 147 k = scc_queue.pop() 148 scc_found[k] = True 149 scc.add(k) 150 yield scc 151 else: 152 scc_queue.append(v) 153 154 155def simple_cycles(G): # noqa: C901 156 """ 157 Adapted from networkx: http://networkx.github.io/ 158 Parameters 159 ---------- 160 G : DiGraph 161 Returns 162 ------- 163 cycle_generator: generator 164 A generator that produces elementary cycles of the graph. 165 Each cycle is represented by a list of nodes along the cycle. 166 """ 167 168 def _unblock(thisnode, blocked, B): 169 stack = set([thisnode]) 170 while stack: 171 node = stack.pop() 172 if node in blocked: 173 blocked.remove(node) 174 stack.update(B[node]) 175 B[node].clear() 176 177 # Johnson's algorithm requires some ordering of the nodes. 178 # We assign the arbitrary ordering given by the strongly connected comps 179 # There is no need to track the ordering as each node removed as processed. 180 # save the actual graph so we can mutate it here 181 # We only take the edges because we do not want to 182 # copy edge and node attributes here. 183 subG = G.subgraph(G.nodes()) 184 sccs = list(strongly_connected_components(subG)) 185 while sccs: 186 scc = sccs.pop() 187 # order of scc determines ordering of nodes 188 startnode = scc.pop() 189 # Processing node runs 'circuit' routine from recursive version 190 path = [startnode] 191 blocked = set() # vertex: blocked from search? 192 closed = set() # nodes involved in a cycle 193 blocked.add(startnode) 194 B = defaultdict(set) # graph portions that yield no elementary circuit 195 stack = [(startnode, list(subG.neighbors(startnode)))] 196 while stack: 197 thisnode, nbrs = stack[-1] 198 if nbrs: 199 nextnode = nbrs.pop() 200 if nextnode == startnode: 201 yield path[:] 202 closed.update(path) 203 elif nextnode not in blocked: 204 path.append(nextnode) 205 stack.append((nextnode, list(subG.neighbors(nextnode)))) 206 closed.discard(nextnode) 207 blocked.add(nextnode) 208 continue 209 # done with nextnode... look for more neighbors 210 if not nbrs: # no more nbrs 211 if thisnode in closed: 212 _unblock(thisnode, blocked, B) 213 else: 214 for nbr in subG.neighbors(thisnode): 215 if thisnode not in B[nbr]: 216 B[nbr].add(thisnode) 217 stack.pop() 218 path.pop() 219 # done processing this node 220 subG.remove_node(startnode) 221 H = subG.subgraph(scc) # make smaller to avoid work in SCC routine 222 sccs.extend(list(strongly_connected_components(H))) 223 224 225def find_cycle(graph): 226 """ 227 Looks for a cycle in the graph. If found, returns the first cycle. 228 If nodes a1, a2, ..., an are in a cycle, then this returns: 229 [(a1,a2), (a2,a3), ... (an-1,an), (an, a1)] 230 Otherwise returns an empty list. 231 """ 232 cycles = list(simple_cycles(graph)) 233 if cycles: 234 nodes = cycles[0] 235 nodes.append(nodes[0]) 236 edges = [] 237 prev = nodes[0] 238 for node in nodes[1:]: 239 edges.append((prev, node)) 240 prev = node 241 return edges 242 else: 243 return [] 244 245 246def get_stacktrace(thread_id): 247 """ 248 Returns the stack trace for the thread id as a list of strings. 249 """ 250 gdb.execute("thread %d" % thread_id, from_tty=False, to_string=True) 251 output = gdb.execute("bt", from_tty=False, to_string=True) 252 stacktrace_lines = output.strip().split("\n") 253 return stacktrace_lines 254 255 256def is_thread_blocked_with_frame( 257 thread_id, top_line, expected_top_lines, expected_frame 258): 259 """ 260 Returns True if we found expected_top_line in top_line, and 261 we found the expected_frame in the thread's stack trace. 262 """ 263 if all(expected not in top_line for expected in expected_top_lines): 264 return False 265 stacktrace_lines = get_stacktrace(thread_id) 266 return any(expected_frame in line for line in stacktrace_lines) 267 268 269class MutexType(Enum): 270 """Types of mutexes that we can detect deadlocks.""" 271 272 PTHREAD_MUTEX_T = "pthread_mutex_t" 273 PTHREAD_RWLOCK_T = "pthread_rwlock_t" 274 275 @staticmethod 276 def get_mutex_type(thread_id, top_line): 277 """ 278 Returns the probable mutex type, based on the first line 279 of the thread's stack. Returns None if not found. 280 """ 281 282 WAITLIST = [ 283 "__lll_lock_wait", 284 "futex_abstimed_wait", 285 "futex_abstimed_wait_cancelable", 286 "futex_reltimed_wait", 287 "futex_reltimed_wait_cancelable", 288 "futex_wait", 289 "futex_wait_cancelable", 290 ] 291 292 if is_thread_blocked_with_frame(thread_id, top_line, WAITLIST, "pthread_mutex"): 293 return MutexType.PTHREAD_MUTEX_T 294 if is_thread_blocked_with_frame( 295 thread_id, top_line, WAITLIST, "pthread_rwlock" 296 ): 297 return MutexType.PTHREAD_RWLOCK_T 298 return None 299 300 @staticmethod 301 def get_mutex_owner_and_address_func_for_type(mutex_type): 302 """ 303 Returns a function to resolve the mutex owner and address for 304 the given type. The returned function f has the following 305 signature: 306 307 f: args: (map of thread lwp -> thread id), blocked thread lwp 308 returns: (lwp of thread owning mutex, mutex address) 309 or (None, None) if not found. 310 311 Returns None if there is no function for this mutex_type. 312 """ 313 if mutex_type == MutexType.PTHREAD_MUTEX_T: 314 return get_pthread_mutex_t_owner_and_address 315 if mutex_type == MutexType.PTHREAD_RWLOCK_T: 316 return get_pthread_rwlock_t_owner_and_address 317 return None 318 319 320def print_cycle(graph, lwp_to_thread_id, cycle): 321 """Prints the threads and mutexes involved in the deadlock.""" 322 for (m, n) in cycle: 323 print( 324 "Thread %d (LWP %d) is waiting on %s (0x%016x) held by " 325 "Thread %d (LWP %d)" 326 % ( 327 lwp_to_thread_id[m], 328 m, 329 graph.attributes(m, n)["mutex_type"].value, 330 graph.attributes(m, n)["mutex"], 331 lwp_to_thread_id[n], 332 n, 333 ) 334 ) 335 336 337def get_thread_info(): 338 """ 339 Returns a pair of: 340 - map of LWP -> thread ID 341 - map of blocked threads LWP -> potential mutex type 342 """ 343 # LWP -> thread ID 344 lwp_to_thread_id = {} 345 346 # LWP -> potential mutex type it is blocked on 347 blocked_threads = {} 348 349 output = gdb.execute("info threads", from_tty=False, to_string=True) 350 lines = output.strip().split("\n")[1:] 351 regex = re.compile(r"[\s\*]*(\d+).*Thread.*\(LWP (\d+)\).*") 352 for line in lines: 353 try: 354 thread_id = int(regex.match(line).group(1)) 355 thread_lwp = int(regex.match(line).group(2)) 356 lwp_to_thread_id[thread_lwp] = thread_id 357 mutex_type = MutexType.get_mutex_type(thread_id, line) 358 if mutex_type: 359 blocked_threads[thread_lwp] = mutex_type 360 except Exception: 361 continue 362 363 return (lwp_to_thread_id, blocked_threads) 364 365 366def get_pthread_mutex_t_owner_and_address(lwp_to_thread_id, thread_lwp): 367 """ 368 Finds the thread holding the mutex that this thread is blocked on. 369 Returns a pair of (lwp of thread owning mutex, mutex address), 370 or (None, None) if not found. 371 """ 372 # Go up the stack to the pthread_mutex_lock frame 373 gdb.execute( 374 "thread %d" % lwp_to_thread_id[thread_lwp], from_tty=False, to_string=True 375 ) 376 gdb.execute("frame 1", from_tty=False, to_string=True) 377 378 # Get the owner of the mutex by inspecting the internal 379 # fields of the mutex. 380 try: 381 mutex_info = gdb.parse_and_eval("mutex").dereference() 382 mutex_owner_lwp = int(mutex_info["__data"]["__owner"]) 383 return (mutex_owner_lwp, int(mutex_info.address)) 384 except gdb.error: 385 return (None, None) 386 387 388def get_pthread_rwlock_t_owner_and_address(lwp_to_thread_id, thread_lwp): 389 """ 390 If the thread is waiting on a write-locked pthread_rwlock_t, this will 391 return the pair of: 392 (lwp of thread that is write-owning the mutex, mutex address) 393 or (None, None) if not found, or if the mutex is read-locked. 394 """ 395 # Go up the stack to the pthread_rwlock_{rd|wr}lock frame 396 gdb.execute( 397 "thread %d" % lwp_to_thread_id[thread_lwp], from_tty=False, to_string=True 398 ) 399 gdb.execute("frame 2", from_tty=False, to_string=True) 400 401 # Get the owner of the mutex by inspecting the internal 402 # fields of the mutex. 403 try: 404 rwlock_info = gdb.parse_and_eval("rwlock").dereference() 405 rwlock_data = rwlock_info["__data"] 406 field_names = ["__cur_writer", "__writer"] 407 fields = rwlock_data.type.fields() 408 field = [f for f in fields if f.name in field_names][0] 409 rwlock_owner_lwp = int(rwlock_data[field]) 410 # We can only track the owner if it is currently write-locked. 411 # If it is not write-locked or if it is currently read-locked, 412 # possibly by multiple threads, we cannot find the owner. 413 if rwlock_owner_lwp != 0: 414 return (rwlock_owner_lwp, int(rwlock_info.address)) 415 else: 416 return (None, None) 417 except gdb.error: 418 return (None, None) 419 420 421class Deadlock(gdb.Command): 422 """Detects deadlocks""" 423 424 def __init__(self): 425 super(Deadlock, self).__init__("deadlock", gdb.COMMAND_NONE) 426 427 def invoke(self, arg, from_tty): 428 """Prints the threads and mutexes in a deadlock, if it exists.""" 429 lwp_to_thread_id, blocked_threads = get_thread_info() 430 431 # Nodes represent threads. Edge (A,B) exists if thread A 432 # is waiting on a mutex held by thread B. 433 graph = DiGraph() 434 435 # Go through all the blocked threads and see which threads 436 # they are blocked on, and build the thread wait graph. 437 for thread_lwp, mutex_type in blocked_threads.items(): 438 get_owner_and_address_func = ( 439 MutexType.get_mutex_owner_and_address_func_for_type(mutex_type) 440 ) 441 if not get_owner_and_address_func: 442 continue 443 mutex_owner_lwp, mutex_address = get_owner_and_address_func( 444 lwp_to_thread_id, thread_lwp 445 ) 446 if mutex_owner_lwp and mutex_address: 447 graph.add_edge( 448 thread_lwp, 449 mutex_owner_lwp, 450 mutex=mutex_address, 451 mutex_type=mutex_type, 452 ) 453 454 # A deadlock exists if there is a cycle in the graph. 455 cycle = find_cycle(graph) 456 if cycle: 457 print("Found deadlock!") 458 print_cycle(graph, lwp_to_thread_id, cycle) 459 else: 460 print("No deadlock detected. " "Do you have debug symbols installed?") 461 462 463def load(): 464 # instantiate the Deadlock command 465 Deadlock() 466 print('Type "deadlock" to detect deadlocks.') 467 468 469def info(): 470 return "Detect deadlocks" 471 472 473if __name__ == "__main__": 474 load() 475