1#!/usr/bin/env python
2#
3#===- exploded-graph-rewriter.py - ExplodedGraph dump tool -----*- python -*--#
4#
5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6# See https://llvm.org/LICENSE.txt for license information.
7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8#
9#===-----------------------------------------------------------------------===#
10
11
12from __future__ import print_function
13
14import argparse
15import collections
16import difflib
17import json
18import logging
19import os
20import re
21
22
23#===-----------------------------------------------------------------------===#
24# These data structures represent a deserialized ExplodedGraph.
25#===-----------------------------------------------------------------------===#
26
27
28# A helper function for finding the difference between two dictionaries.
29def diff_dicts(curr, prev):
30    removed = [k for k in prev if k not in curr or curr[k] != prev[k]]
31    added = [k for k in curr if k not in prev or curr[k] != prev[k]]
32    return (removed, added)
33
34
35# Represents any program state trait that is a dictionary of key-value pairs.
36class GenericMap:
37    def __init__(self, items):
38        self.generic_map = collections.OrderedDict(items)
39
40    def diff(self, prev):
41        return diff_dicts(self.generic_map, prev.generic_map)
42
43    def is_different(self, prev):
44        removed, added = self.diff(prev)
45        return len(removed) != 0 or len(added) != 0
46
47
48# A deserialized source location.
49class SourceLocation:
50    def __init__(self, json_loc):
51        logging.debug('json: %s' % json_loc)
52        self.line = json_loc['line']
53        self.col = json_loc['column']
54        self.filename = os.path.basename(json_loc['file']) \
55            if 'file' in json_loc else '(main file)'
56        self.spelling = SourceLocation(json_loc['spelling']) \
57            if 'spelling' in json_loc else None
58
59    def is_macro(self):
60        return self.spelling is not None
61
62
63# A deserialized program point.
64class ProgramPoint:
65    def __init__(self, json_pp):
66        self.kind = json_pp['kind']
67        self.tag = json_pp['tag']
68        self.node_id = json_pp['node_id']
69        self.is_sink = bool(json_pp['is_sink'])
70        self.has_report = bool(json_pp['has_report'])
71        if self.kind == 'Edge':
72            self.src_id = json_pp['src_id']
73            self.dst_id = json_pp['dst_id']
74        elif self.kind == 'Statement':
75            logging.debug(json_pp)
76            self.stmt_kind = json_pp['stmt_kind']
77            self.cast_kind = json_pp['cast_kind'] \
78                if 'cast_kind' in json_pp else None
79            self.stmt_point_kind = json_pp['stmt_point_kind']
80            self.stmt_id = json_pp['stmt_id']
81            self.pointer = json_pp['pointer']
82            self.pretty = json_pp['pretty']
83            self.loc = SourceLocation(json_pp['location']) \
84                if json_pp['location'] is not None else None
85        elif self.kind == 'BlockEntrance':
86            self.block_id = json_pp['block_id']
87
88
89# A single expression acting as a key in a deserialized Environment.
90class EnvironmentBindingKey:
91    def __init__(self, json_ek):
92        # CXXCtorInitializer is not a Stmt!
93        self.stmt_id = json_ek['stmt_id'] if 'stmt_id' in json_ek \
94            else json_ek['init_id']
95        self.pretty = json_ek['pretty']
96        self.kind = json_ek['kind'] if 'kind' in json_ek else None
97
98    def _key(self):
99        return self.stmt_id
100
101    def __eq__(self, other):
102        return self._key() == other._key()
103
104    def __hash__(self):
105        return hash(self._key())
106
107
108# Deserialized description of a location context.
109class LocationContext:
110    def __init__(self, json_frame):
111        self.lctx_id = json_frame['lctx_id']
112        self.caption = json_frame['location_context']
113        self.decl = json_frame['calling']
114        self.loc = SourceLocation(json_frame['location']) \
115            if json_frame['location'] is not None else None
116
117    def _key(self):
118        return self.lctx_id
119
120    def __eq__(self, other):
121        return self._key() == other._key()
122
123    def __hash__(self):
124        return hash(self._key())
125
126
127# A group of deserialized Environment bindings that correspond to a specific
128# location context.
129class EnvironmentFrame:
130    def __init__(self, json_frame):
131        self.location_context = LocationContext(json_frame)
132        self.bindings = collections.OrderedDict(
133            [(EnvironmentBindingKey(b),
134              b['value']) for b in json_frame['items']]
135            if json_frame['items'] is not None else [])
136
137    def diff_bindings(self, prev):
138        return diff_dicts(self.bindings, prev.bindings)
139
140    def is_different(self, prev):
141        removed, added = self.diff_bindings(prev)
142        return len(removed) != 0 or len(added) != 0
143
144
145# A deserialized Environment. This class can also hold other entities that
146# are similar to Environment, such as Objects Under Construction or
147# Indices Of Elements Under Construction.
148class GenericEnvironment:
149    def __init__(self, json_e):
150        self.frames = [EnvironmentFrame(f) for f in json_e]
151
152    def diff_frames(self, prev):
153        # TODO: It's difficult to display a good diff when frame numbers shift.
154        if len(self.frames) != len(prev.frames):
155            return None
156
157        updated = []
158        for i in range(len(self.frames)):
159            f = self.frames[i]
160            prev_f = prev.frames[i]
161            if f.location_context == prev_f.location_context:
162                if f.is_different(prev_f):
163                    updated.append(i)
164            else:
165                # We have the whole frame replaced with another frame.
166                # TODO: Produce a nice diff.
167                return None
168
169        # TODO: Add support for added/removed.
170        return updated
171
172    def is_different(self, prev):
173        updated = self.diff_frames(prev)
174        return updated is None or len(updated) > 0
175
176
177# A single binding key in a deserialized RegionStore cluster.
178class StoreBindingKey:
179    def __init__(self, json_sk):
180        self.kind = json_sk['kind']
181        self.offset = json_sk['offset']
182
183    def _key(self):
184        return (self.kind, self.offset)
185
186    def __eq__(self, other):
187        return self._key() == other._key()
188
189    def __hash__(self):
190        return hash(self._key())
191
192
193# A single cluster of the deserialized RegionStore.
194class StoreCluster:
195    def __init__(self, json_sc):
196        self.base_region = json_sc['cluster']
197        self.bindings = collections.OrderedDict(
198            [(StoreBindingKey(b), b['value']) for b in json_sc['items']])
199
200    def diff_bindings(self, prev):
201        return diff_dicts(self.bindings, prev.bindings)
202
203    def is_different(self, prev):
204        removed, added = self.diff_bindings(prev)
205        return len(removed) != 0 or len(added) != 0
206
207
208# A deserialized RegionStore.
209class Store:
210    def __init__(self, json_s):
211        self.ptr = json_s['pointer']
212        self.clusters = collections.OrderedDict(
213            [(c['pointer'], StoreCluster(c)) for c in json_s['items']])
214
215    def diff_clusters(self, prev):
216        removed = [k for k in prev.clusters if k not in self.clusters]
217        added = [k for k in self.clusters if k not in prev.clusters]
218        updated = [k for k in prev.clusters if k in self.clusters
219                   and prev.clusters[k].is_different(self.clusters[k])]
220        return (removed, added, updated)
221
222    def is_different(self, prev):
223        removed, added, updated = self.diff_clusters(prev)
224        return len(removed) != 0 or len(added) != 0 or len(updated) != 0
225
226
227# Deserialized messages from a single checker in a single program state.
228# Basically a list of raw strings.
229class CheckerLines:
230    def __init__(self, json_lines):
231        self.lines = json_lines
232
233    def diff_lines(self, prev):
234        lines = difflib.ndiff(prev.lines, self.lines)
235        return [l.strip() for l in lines
236                if l.startswith('+') or l.startswith('-')]
237
238    def is_different(self, prev):
239        return len(self.diff_lines(prev)) > 0
240
241
242# Deserialized messages of all checkers, separated by checker.
243class CheckerMessages:
244    def __init__(self, json_m):
245        self.items = collections.OrderedDict(
246            [(m['checker'], CheckerLines(m['messages'])) for m in json_m])
247
248    def diff_messages(self, prev):
249        removed = [k for k in prev.items if k not in self.items]
250        added = [k for k in self.items if k not in prev.items]
251        updated = [k for k in prev.items if k in self.items
252                   and prev.items[k].is_different(self.items[k])]
253        return (removed, added, updated)
254
255    def is_different(self, prev):
256        removed, added, updated = self.diff_messages(prev)
257        return len(removed) != 0 or len(added) != 0 or len(updated) != 0
258
259
260# A deserialized program state.
261class ProgramState:
262    def __init__(self, state_id, json_ps):
263        logging.debug('Adding ProgramState ' + str(state_id))
264
265        store_key = 'store'
266        env_key = 'environment'
267        constraints_key = 'constraints'
268        dyn_ty_key = 'dynamic_types'
269        ctor_key = 'constructing_objects'
270        ind_key = 'index_of_element'
271        init_loop_key = 'pending_init_loops'
272        dtor_key = 'pending_destructors'
273        msg_key = 'checker_messages'
274
275        if json_ps is None:
276            json_ps = {
277                store_key: None,
278                env_key: None,
279                constraints_key: None,
280                dyn_ty_key: None,
281                ctor_key: None,
282                ind_key: None,
283                init_loop_key: None,
284                dtor_key: None,
285                msg_key: None
286            }
287
288        self.state_id = state_id
289
290        self.store = Store(json_ps[store_key]) \
291            if json_ps[store_key] is not None else None
292
293        self.environment = \
294            GenericEnvironment(json_ps[env_key]['items']) \
295            if json_ps[env_key] is not None else None
296
297        self.constraints = GenericMap([
298            (c['symbol'], c['range']) for c in json_ps[constraints_key]
299        ]) if json_ps[constraints_key] is not None else None
300
301        self.dynamic_types = GenericMap([
302                (t['region'], '%s%s' % (t['dyn_type'],
303                                        ' (or a sub-class)'
304                                        if t['sub_classable'] else ''))
305                for t in json_ps[dyn_ty_key]]) \
306            if json_ps[dyn_ty_key] is not None else None
307
308        self.checker_messages = CheckerMessages(json_ps[msg_key]) \
309            if json_ps[msg_key] is not None else None
310
311        # State traits
312        #
313        # For traits we always check if a key exists because if a trait
314        # has no imformation, nothing will be printed in the .dot file
315        # we parse.
316
317        self.constructing_objects = \
318            GenericEnvironment(json_ps[ctor_key]) \
319            if ctor_key in json_ps and json_ps[ctor_key] is not None else None
320
321        self.index_of_element = \
322            GenericEnvironment(json_ps[ind_key]) \
323            if ind_key in json_ps and json_ps[ind_key] is not None else None
324
325        self.pending_init_loops = \
326            GenericEnvironment(json_ps[init_loop_key]) \
327            if init_loop_key in json_ps and json_ps[init_loop_key] is not None else None
328
329        self.pending_destructors = \
330            GenericEnvironment(json_ps[dtor_key]) \
331            if dtor_key in json_ps and json_ps[dtor_key] is not None else None
332
333
334# A deserialized exploded graph node. Has a default constructor because it
335# may be referenced as part of an edge before its contents are deserialized,
336# and in this moment we already need a room for predecessors and successors.
337class ExplodedNode:
338    def __init__(self):
339        self.predecessors = []
340        self.successors = []
341
342    def construct(self, node_id, json_node):
343        logging.debug('Adding ' + node_id)
344        self.ptr = node_id[4:]
345        self.points = [ProgramPoint(p) for p in json_node['program_points']]
346        self.node_id = self.points[-1].node_id
347        self.state = ProgramState(json_node['state_id'],
348                                  json_node['program_state']
349            if json_node['program_state'] is not None else None);
350
351        assert self.node_name() == node_id
352
353    def node_name(self):
354        return 'Node' + self.ptr
355
356
357# A deserialized ExplodedGraph. Constructed by consuming a .dot file
358# line-by-line.
359class ExplodedGraph:
360    # Parse .dot files with regular expressions.
361    node_re = re.compile(
362        '^(Node0x[0-9a-f]*) \\[shape=record,.*label="{(.*)\\\\l}"\\];$')
363    edge_re = re.compile(
364        '^(Node0x[0-9a-f]*) -> (Node0x[0-9a-f]*);$')
365
366    def __init__(self):
367        self.nodes = collections.defaultdict(ExplodedNode)
368        self.root_id = None
369        self.incomplete_line = ''
370
371    def add_raw_line(self, raw_line):
372        if raw_line.startswith('//'):
373            return
374
375        # Allow line breaks by waiting for ';'. This is not valid in
376        # a .dot file, but it is useful for writing tests.
377        if len(raw_line) > 0 and raw_line[-1] != ';':
378            self.incomplete_line += raw_line
379            return
380        raw_line = self.incomplete_line + raw_line
381        self.incomplete_line = ''
382
383        # Apply regexps one by one to see if it's a node or an edge
384        # and extract contents if necessary.
385        logging.debug('Line: ' + raw_line)
386        result = self.edge_re.match(raw_line)
387        if result is not None:
388            logging.debug('Classified as edge line.')
389            pred = result.group(1)
390            succ = result.group(2)
391            self.nodes[pred].successors.append(succ)
392            self.nodes[succ].predecessors.append(pred)
393            return
394        result = self.node_re.match(raw_line)
395        if result is not None:
396            logging.debug('Classified as node line.')
397            node_id = result.group(1)
398            if len(self.nodes) == 0:
399                self.root_id = node_id
400            # Note: when writing tests you don't need to escape everything,
401            # even though in a valid dot file everything is escaped.
402            node_label = result.group(2).replace(' ', '') \
403                                        .replace('\\"', '"') \
404                                        .replace('\\{', '{') \
405                                        .replace('\\}', '}') \
406                                        .replace('\\\\', '\\') \
407                                        .replace('\\|', '|') \
408                                        .replace('\\<', '\\\\<') \
409                                        .replace('\\>', '\\\\>') \
410                                        .rstrip(',')
411            # Handle `\l` separately because a string literal can be in code
412            # like "string\\literal" with the `\l` inside.
413            # Also on Windows macros __FILE__ produces specific delimiters `\`
414            # and a directory or file may starts with the letter `l`.
415            # Find all `\l` (like `,\l`, `}\l`, `[\l`) except `\\l`,
416            # because the literal as a rule contains multiple `\` before `\l`.
417            node_label = re.sub(r'(?<!\\)\\l', '', node_label)
418            logging.debug(node_label)
419            json_node = json.loads(node_label)
420            self.nodes[node_id].construct(node_id, json_node)
421            return
422        logging.debug('Skipping.')
423
424
425#===-----------------------------------------------------------------------===#
426# Visitors traverse a deserialized ExplodedGraph and do different things
427# with every node and edge.
428#===-----------------------------------------------------------------------===#
429
430
431# A visitor that dumps the ExplodedGraph into a DOT file with fancy HTML-based
432# syntax highlighing.
433class DotDumpVisitor:
434    def __init__(self, do_diffs, dark_mode, gray_mode,
435                 topo_mode, dump_dot_only):
436        self._do_diffs = do_diffs
437        self._dark_mode = dark_mode
438        self._gray_mode = gray_mode
439        self._topo_mode = topo_mode
440        self._dump_dot_only = dump_dot_only
441        self._output = []
442
443    def _dump_raw(self, s):
444        if self._dump_dot_only:
445            print(s, end='')
446        else:
447            self._output.append(s)
448
449    def output(self):
450        assert not self._dump_dot_only
451        return ''.join(self._output)
452
453    def _dump(self, s):
454        s = s.replace('&', '&amp;') \
455             .replace('{', '\\{') \
456             .replace('}', '\\}') \
457             .replace('\\<', '&lt;') \
458             .replace('\\>', '&gt;') \
459             .replace('|', '\\|')
460        s = re.sub(r'(?<!\\)\\l', '<br />', s)
461        if self._gray_mode:
462            s = re.sub(r'<font color="[a-z0-9]*">', '', s)
463            s = re.sub(r'</font>', '', s)
464        self._dump_raw(s)
465
466    @staticmethod
467    def _diff_plus_minus(is_added):
468        if is_added is None:
469            return ''
470        if is_added:
471            return '<font color="forestgreen">+</font>'
472        return '<font color="red">-</font>'
473
474    @staticmethod
475    def _short_pretty(s):
476        if s is None:
477            return None
478        if len(s) < 20:
479            return s
480        left = s.find('{')
481        right = s.rfind('}')
482        if left == -1 or right == -1 or left >= right:
483            return s
484        candidate = s[0:left + 1] + ' ... ' + s[right:]
485        if len(candidate) >= len(s):
486            return s
487        return candidate
488
489    @staticmethod
490    def _make_sloc(loc):
491        if loc is None:
492            return '<i>Invalid Source Location</i>'
493
494        def make_plain_loc(loc):
495            return '%s:<b>%s</b>:<b>%s</b>' \
496                % (loc.filename, loc.line, loc.col)
497
498        if loc.is_macro():
499            return '%s <font color="royalblue1">' \
500                   '(<i>spelling at </i> %s)</font>' \
501                % (make_plain_loc(loc), make_plain_loc(loc.spelling))
502
503        return make_plain_loc(loc)
504
505    def visit_begin_graph(self, graph):
506        self._graph = graph
507        self._dump_raw('digraph "ExplodedGraph" {\n')
508        if self._dark_mode:
509            self._dump_raw('bgcolor="gray10";\n')
510        self._dump_raw('label="";\n')
511
512    def visit_program_point(self, p):
513        if p.kind in ['Edge', 'BlockEntrance', 'BlockExit']:
514            color = 'gold3'
515        elif p.kind in ['PreStmtPurgeDeadSymbols',
516                        'PostStmtPurgeDeadSymbols']:
517            color = 'red'
518        elif p.kind in ['CallEnter', 'CallExitBegin', 'CallExitEnd']:
519            color = 'dodgerblue' if self._dark_mode else 'blue'
520        elif p.kind in ['Statement']:
521            color = 'cyan4'
522        else:
523            color = 'forestgreen'
524
525        self._dump('<tr><td align="left">%s.</td>' % p.node_id)
526
527        if p.kind == 'Statement':
528            # This avoids pretty-printing huge statements such as CompoundStmt.
529            # Such statements show up only at [Pre|Post]StmtPurgeDeadSymbols
530            skip_pretty = 'PurgeDeadSymbols' in p.stmt_point_kind
531            stmt_color = 'cyan3'
532            self._dump('<td align="left" width="0">%s:</td>'
533                       '<td align="left" width="0"><font color="%s">'
534                       '%s</font> </td>'
535                       '<td align="left"><i>S%s</i></td>'
536                       '<td align="left"><font color="%s">%s</font></td>'
537                       '<td align="left">%s</td></tr>'
538                       % (self._make_sloc(p.loc), color,
539                          '%s (%s)' % (p.stmt_kind, p.cast_kind)
540                          if p.cast_kind is not None else p.stmt_kind,
541                          p.stmt_id, stmt_color, p.stmt_point_kind,
542                          self._short_pretty(p.pretty)
543                          if not skip_pretty else ''))
544        elif p.kind == 'Edge':
545            self._dump('<td width="0"></td>'
546                       '<td align="left" width="0">'
547                       '<font color="%s">%s</font></td><td align="left">'
548                       '[B%d] -\\> [B%d]</td></tr>'
549                       % (color, 'BlockEdge', p.src_id, p.dst_id))
550        elif p.kind == 'BlockEntrance':
551            self._dump('<td width="0"></td>'
552                       '<td align="left" width="0">'
553                       '<font color="%s">%s</font></td>'
554                       '<td align="left">[B%d]</td></tr>'
555                       % (color, p.kind, p.block_id))
556        else:
557            # TODO: Print more stuff for other kinds of points.
558            self._dump('<td width="0"></td>'
559                       '<td align="left" width="0" colspan="2">'
560                       '<font color="%s">%s</font></td></tr>'
561                       % (color, p.kind))
562
563        if p.tag is not None:
564            self._dump('<tr><td width="0"></td><td width="0"></td>'
565                       '<td colspan="3" align="left">'
566                       '<b>Tag: </b> <font color="crimson">'
567                       '%s</font></td></tr>' % p.tag)
568
569        if p.has_report:
570            self._dump('<tr><td width="0"></td><td width="0"></td>'
571                       '<td colspan="3" align="left">'
572                       '<font color="red"><b>Bug Report Attached'
573                       '</b></font></td></tr>')
574        if p.is_sink:
575            self._dump('<tr><td width="0"></td><td width="0"></td>'
576                       '<td colspan="3" align="left">'
577                       '<font color="cornflowerblue"><b>Sink Node'
578                       '</b></font></td></tr>')
579
580    def visit_environment(self, e, prev_e=None):
581        self._dump('<table border="0">')
582
583        def dump_location_context(lc, is_added=None):
584            self._dump('<tr><td>%s</td>'
585                       '<td align="left"><b>%s</b></td>'
586                       '<td align="left" colspan="2">'
587                       '<font color="gray60">%s </font>'
588                       '%s</td></tr>'
589                       % (self._diff_plus_minus(is_added),
590                          lc.caption, lc.decl,
591                          ('(%s)' % self._make_sloc(lc.loc))
592                          if lc.loc is not None else ''))
593
594        def dump_binding(f, b, is_added=None):
595            self._dump('<tr><td>%s</td>'
596                       '<td align="left"><i>S%s</i></td>'
597                       '%s'
598                       '<td align="left">%s</td>'
599                       '<td align="left">%s</td></tr>'
600                       % (self._diff_plus_minus(is_added),
601                          b.stmt_id,
602                          '<td align="left"><font color="%s"><i>'
603                          '%s</i></font></td>' % (
604                              'lavender' if self._dark_mode else 'darkgreen',
605                              ('(%s)' % b.kind) if b.kind is not None else ' '
606                          ),
607                          self._short_pretty(b.pretty), f.bindings[b]))
608
609        frames_updated = e.diff_frames(prev_e) if prev_e is not None else None
610        if frames_updated:
611            for i in frames_updated:
612                f = e.frames[i]
613                prev_f = prev_e.frames[i]
614                dump_location_context(f.location_context)
615                bindings_removed, bindings_added = f.diff_bindings(prev_f)
616                for b in bindings_removed:
617                    dump_binding(prev_f, b, False)
618                for b in bindings_added:
619                    dump_binding(f, b, True)
620        else:
621            for f in e.frames:
622                dump_location_context(f.location_context)
623                for b in f.bindings:
624                    dump_binding(f, b)
625
626        self._dump('</table>')
627
628    def visit_environment_in_state(self, selector, title, s, prev_s=None):
629        e = getattr(s, selector)
630        prev_e = getattr(prev_s, selector) if prev_s is not None else None
631        if e is None and prev_e is None:
632            return
633
634        self._dump('<hr /><tr><td align="left"><b>%s: </b>' % title)
635        if e is None:
636            self._dump('<i> Nothing!</i>')
637        else:
638            if prev_e is not None:
639                if e.is_different(prev_e):
640                    self._dump('</td></tr><tr><td align="left">')
641                    self.visit_environment(e, prev_e)
642                else:
643                    self._dump('<i> No changes!</i>')
644            else:
645                self._dump('</td></tr><tr><td align="left">')
646                self.visit_environment(e)
647
648        self._dump('</td></tr>')
649
650    def visit_store(self, s, prev_s=None):
651        self._dump('<table border="0">')
652
653        def dump_binding(s, c, b, is_added=None):
654            self._dump('<tr><td>%s</td>'
655                       '<td align="left">%s</td>'
656                       '<td align="left">%s</td>'
657                       '<td align="left">%s</td>'
658                       '<td align="left">%s</td></tr>'
659                       % (self._diff_plus_minus(is_added),
660                          s.clusters[c].base_region, b.offset,
661                          '(<i>Default</i>)' if b.kind == 'Default'
662                          else '',
663                          s.clusters[c].bindings[b]))
664
665        if prev_s is not None:
666            clusters_removed, clusters_added, clusters_updated = \
667                s.diff_clusters(prev_s)
668            for c in clusters_removed:
669                for b in prev_s.clusters[c].bindings:
670                    dump_binding(prev_s, c, b, False)
671            for c in clusters_updated:
672                bindings_removed, bindings_added = \
673                    s.clusters[c].diff_bindings(prev_s.clusters[c])
674                for b in bindings_removed:
675                    dump_binding(prev_s, c, b, False)
676                for b in bindings_added:
677                    dump_binding(s, c, b, True)
678            for c in clusters_added:
679                for b in s.clusters[c].bindings:
680                    dump_binding(s, c, b, True)
681        else:
682            for c in s.clusters:
683                for b in s.clusters[c].bindings:
684                    dump_binding(s, c, b)
685
686        self._dump('</table>')
687
688    def visit_store_in_state(self, s, prev_s=None):
689        st = s.store
690        prev_st = prev_s.store if prev_s is not None else None
691        if st is None and prev_st is None:
692            return
693
694        self._dump('<hr /><tr><td align="left"><b>Store: </b>')
695        if st is None:
696            self._dump('<i> Nothing!</i>')
697        else:
698            if self._dark_mode:
699                self._dump(' <font color="gray30">(%s)</font>' % st.ptr)
700            else:
701                self._dump(' <font color="gray">(%s)</font>' % st.ptr)
702            if prev_st is not None:
703                if s.store.is_different(prev_st):
704                    self._dump('</td></tr><tr><td align="left">')
705                    self.visit_store(st, prev_st)
706                else:
707                    self._dump('<i> No changes!</i>')
708            else:
709                self._dump('</td></tr><tr><td align="left">')
710                self.visit_store(st)
711        self._dump('</td></tr>')
712
713    def visit_generic_map(self, m, prev_m=None):
714        self._dump('<table border="0">')
715
716        def dump_pair(m, k, is_added=None):
717            self._dump('<tr><td>%s</td>'
718                       '<td align="left">%s</td>'
719                       '<td align="left">%s</td></tr>'
720                       % (self._diff_plus_minus(is_added),
721                          k, m.generic_map[k]))
722
723        if prev_m is not None:
724            removed, added = m.diff(prev_m)
725            for k in removed:
726                dump_pair(prev_m, k, False)
727            for k in added:
728                dump_pair(m, k, True)
729        else:
730            for k in m.generic_map:
731                dump_pair(m, k, None)
732
733        self._dump('</table>')
734
735    def visit_generic_map_in_state(self, selector, title, s, prev_s=None):
736        m = getattr(s, selector)
737        prev_m = getattr(prev_s, selector) if prev_s is not None else None
738        if m is None and prev_m is None:
739            return
740
741        self._dump('<hr />')
742        self._dump('<tr><td align="left">'
743                   '<b>%s: </b>' % title)
744        if m is None:
745            self._dump('<i> Nothing!</i>')
746        else:
747            if prev_m is not None:
748                if m.is_different(prev_m):
749                    self._dump('</td></tr><tr><td align="left">')
750                    self.visit_generic_map(m, prev_m)
751                else:
752                    self._dump('<i> No changes!</i>')
753            else:
754                self._dump('</td></tr><tr><td align="left">')
755                self.visit_generic_map(m)
756
757        self._dump('</td></tr>')
758
759    def visit_checker_messages(self, m, prev_m=None):
760        self._dump('<table border="0">')
761
762        def dump_line(l, is_added=None):
763            self._dump('<tr><td>%s</td>'
764                       '<td align="left">%s</td></tr>'
765                       % (self._diff_plus_minus(is_added), l))
766
767        def dump_chk(chk, is_added=None):
768            dump_line('<i>%s</i>:' % chk, is_added)
769
770        if prev_m is not None:
771            removed, added, updated = m.diff_messages(prev_m)
772            for chk in removed:
773                dump_chk(chk, False)
774                for l in prev_m.items[chk].lines:
775                    dump_line(l, False)
776            for chk in updated:
777                dump_chk(chk)
778                for l in m.items[chk].diff_lines(prev_m.items[chk]):
779                    dump_line(l[1:], l.startswith('+'))
780            for chk in added:
781                dump_chk(chk, True)
782                for l in m.items[chk].lines:
783                    dump_line(l, True)
784        else:
785            for chk in m.items:
786                dump_chk(chk)
787                for l in m.items[chk].lines:
788                    dump_line(l)
789
790        self._dump('</table>')
791
792    def visit_checker_messages_in_state(self, s, prev_s=None):
793        m = s.checker_messages
794        prev_m = prev_s.checker_messages if prev_s is not None else None
795        if m is None and prev_m is None:
796            return
797
798        self._dump('<hr />')
799        self._dump('<tr><td align="left">'
800                   '<b>Checker State: </b>')
801        if m is None:
802            self._dump('<i> Nothing!</i>')
803        else:
804            if prev_m is not None:
805                if m.is_different(prev_m):
806                    self._dump('</td></tr><tr><td align="left">')
807                    self.visit_checker_messages(m, prev_m)
808                else:
809                    self._dump('<i> No changes!</i>')
810            else:
811                self._dump('</td></tr><tr><td align="left">')
812                self.visit_checker_messages(m)
813
814        self._dump('</td></tr>')
815
816    def visit_state(self, s, prev_s):
817        self.visit_store_in_state(s, prev_s)
818        self.visit_environment_in_state('environment', 'Expressions',
819                                        s, prev_s)
820        self.visit_generic_map_in_state('constraints', 'Ranges',
821                                        s, prev_s)
822        self.visit_generic_map_in_state('dynamic_types', 'Dynamic Types',
823                                        s, prev_s)
824        self.visit_environment_in_state('constructing_objects',
825                                        'Objects Under Construction',
826                                        s, prev_s)
827        self.visit_environment_in_state('index_of_element',
828                                        'Indices Of Elements Under Construction',
829                                        s, prev_s)
830        self.visit_environment_in_state('pending_init_loops',
831                                        'Pending Array Init Loop Expressions',
832                                        s, prev_s)
833        self.visit_environment_in_state('pending_destructors',
834                                        'Indices of Elements Under Destruction',
835                                        s, prev_s)
836        self.visit_checker_messages_in_state(s, prev_s)
837
838    def visit_node(self, node):
839        self._dump('%s [shape=record,'
840                   % (node.node_name()))
841        if self._dark_mode:
842            self._dump('color="white",fontcolor="gray80",')
843        self._dump('label=<<table border="0">')
844
845        self._dump('<tr><td bgcolor="%s"><b>State %s</b></td></tr>'
846                   % ("gray20" if self._dark_mode else "gray70",
847                      node.state.state_id
848                      if node.state is not None else 'Unspecified'))
849        if not self._topo_mode:
850            self._dump('<tr><td align="left" width="0">')
851            if len(node.points) > 1:
852                self._dump('<b>Program points:</b></td></tr>')
853            else:
854                self._dump('<b>Program point:</b></td></tr>')
855        self._dump('<tr><td align="left" width="0">'
856                   '<table border="0" align="left" width="0">')
857        for p in node.points:
858            self.visit_program_point(p)
859        self._dump('</table></td></tr>')
860
861        if node.state is not None and not self._topo_mode:
862            prev_s = None
863            # Do diffs only when we have a unique predecessor.
864            # Don't do diffs on the leaf nodes because they're
865            # the important ones.
866            if self._do_diffs and len(node.predecessors) == 1 \
867               and len(node.successors) > 0:
868                prev_s = self._graph.nodes[node.predecessors[0]].state
869            self.visit_state(node.state, prev_s)
870        self._dump_raw('</table>>];\n')
871
872    def visit_edge(self, pred, succ):
873        self._dump_raw('%s -> %s%s;\n' % (
874            pred.node_name(), succ.node_name(),
875            ' [color="white"]' if self._dark_mode else ''
876        ))
877
878    def visit_end_of_graph(self):
879        self._dump_raw('}\n')
880
881        if not self._dump_dot_only:
882            import sys
883            import tempfile
884
885            def write_temp_file(suffix, prefix, data):
886                fd, filename = tempfile.mkstemp(suffix, prefix, '.', True)
887                print('Writing "%s"...' % filename)
888                with os.fdopen(fd, 'w') as fp:
889                    fp.write(data)
890                print('Done! Please remember to remove the file.')
891                return filename
892
893            try:
894                import graphviz
895            except ImportError:
896                # The fallback behavior if graphviz is not installed!
897                print('Python graphviz not found. Please invoke')
898                print('  $ pip install graphviz')
899                print('in order to enable automatic conversion to HTML.')
900                print()
901                print('You may also convert DOT to SVG manually via')
902                print('  $ dot -Tsvg input.dot -o output.svg')
903                print()
904                write_temp_file('.dot', 'egraph-', self.output())
905                return
906
907            svg = graphviz.pipe('dot', 'svg', self.output().encode()).decode()
908
909            filename = write_temp_file(
910                '.html', 'egraph-', '<html><body bgcolor="%s">%s</body></html>' % (
911                             '#1a1a1a' if self._dark_mode else 'white', svg))
912            if sys.platform == 'win32':
913                os.startfile(filename)
914            elif sys.platform == 'darwin':
915                os.system('open "%s"' % filename)
916            else:
917                os.system('xdg-open "%s"' % filename)
918
919
920#===-----------------------------------------------------------------------===#
921# Explorers know how to traverse the ExplodedGraph in a certain order.
922# They would invoke a Visitor on every node or edge they encounter.
923#===-----------------------------------------------------------------------===#
924
925
926# BasicExplorer explores the whole graph in no particular order.
927class BasicExplorer:
928    def explore(self, graph, visitor):
929        visitor.visit_begin_graph(graph)
930        for node in sorted(graph.nodes):
931            logging.debug('Visiting ' + node)
932            visitor.visit_node(graph.nodes[node])
933            for succ in sorted(graph.nodes[node].successors):
934                logging.debug('Visiting edge: %s -> %s ' % (node, succ))
935                visitor.visit_edge(graph.nodes[node], graph.nodes[succ])
936        visitor.visit_end_of_graph()
937
938
939#===-----------------------------------------------------------------------===#
940# Trimmers cut out parts of the ExplodedGraph so that to focus on other parts.
941# Trimmers can be combined together by applying them sequentially.
942#===-----------------------------------------------------------------------===#
943
944
945# SinglePathTrimmer keeps only a single path - the leftmost path from the root.
946# Useful when the trimmed graph is still too large.
947class SinglePathTrimmer:
948    def trim(self, graph):
949        visited_nodes = set()
950        node_id = graph.root_id
951        while True:
952            visited_nodes.add(node_id)
953            node = graph.nodes[node_id]
954            if len(node.successors) > 0:
955                succ_id = node.successors[0]
956                succ = graph.nodes[succ_id]
957                node.successors = [succ_id]
958                succ.predecessors = [node_id]
959                if succ_id in visited_nodes:
960                    break
961                node_id = succ_id
962            else:
963                break
964        graph.nodes = {node_id: graph.nodes[node_id]
965                       for node_id in visited_nodes}
966
967
968# TargetedTrimmer keeps paths that lead to specific nodes and discards all
969# other paths. Useful when you cannot use -trim-egraph (e.g. when debugging
970# a crash).
971class TargetedTrimmer:
972    def __init__(self, target_nodes):
973        self._target_nodes = target_nodes
974
975    @staticmethod
976    def parse_target_node(node, graph):
977        if node.startswith('0x'):
978            ret = 'Node' + node
979            assert ret in graph.nodes
980            return ret
981        else:
982            for other_id in graph.nodes:
983                other = graph.nodes[other_id]
984                if other.node_id == int(node):
985                    return other_id
986
987    @staticmethod
988    def parse_target_nodes(target_nodes, graph):
989        return [TargetedTrimmer.parse_target_node(node, graph)
990                for node in target_nodes.split(',')]
991
992    def trim(self, graph):
993        queue = self._target_nodes
994        visited_nodes = set()
995
996        while len(queue) > 0:
997            node_id = queue.pop()
998            visited_nodes.add(node_id)
999            node = graph.nodes[node_id]
1000            for pred_id in node.predecessors:
1001                if pred_id not in visited_nodes:
1002                    queue.append(pred_id)
1003        graph.nodes = {node_id: graph.nodes[node_id]
1004                       for node_id in visited_nodes}
1005        for node_id in graph.nodes:
1006            node = graph.nodes[node_id]
1007            node.successors = [succ_id for succ_id in node.successors
1008                               if succ_id in visited_nodes]
1009            node.predecessors = [succ_id for succ_id in node.predecessors
1010                                 if succ_id in visited_nodes]
1011
1012
1013#===-----------------------------------------------------------------------===#
1014# The entry point to the script.
1015#===-----------------------------------------------------------------------===#
1016
1017
1018def main():
1019    parser = argparse.ArgumentParser(
1020        description='Display and manipulate Exploded Graph dumps.')
1021    parser.add_argument('filename', type=str,
1022                        help='the .dot file produced by the Static Analyzer')
1023    parser.add_argument('-v', '--verbose', action='store_const',
1024                        dest='loglevel', const=logging.DEBUG,
1025                        default=logging.WARNING,
1026                        help='enable info prints')
1027    parser.add_argument('-d', '--diff', action='store_const', dest='diff',
1028                        const=True, default=False,
1029                        help='display differences between states')
1030    parser.add_argument('-t', '--topology', action='store_const',
1031                        dest='topology', const=True, default=False,
1032                        help='only display program points, omit states')
1033    parser.add_argument('-s', '--single-path', action='store_const',
1034                        dest='single_path', const=True, default=False,
1035                        help='only display the leftmost path in the graph '
1036                             '(useful for trimmed graphs that still '
1037                             'branch too much)')
1038    parser.add_argument('--to', type=str, default=None,
1039                        help='only display execution paths from the root '
1040                             'to the given comma-separated list of nodes '
1041                             'identified by a pointer or a stable ID; '
1042                             'compatible with --single-path')
1043    parser.add_argument('--dark', action='store_const', dest='dark',
1044                        const=True, default=False,
1045                        help='dark mode')
1046    parser.add_argument('--gray', action='store_const', dest='gray',
1047                        const=True, default=False,
1048                        help='black-and-white mode')
1049    parser.add_argument('--dump-dot-only', action='store_const',
1050                        dest='dump_dot_only', const=True, default=False,
1051                        help='instead of writing an HTML file and immediately '
1052                             'displaying it, dump the rewritten dot file '
1053                             'to stdout')
1054    args = parser.parse_args()
1055    logging.basicConfig(level=args.loglevel)
1056
1057    graph = ExplodedGraph()
1058    with open(args.filename) as fd:
1059        for raw_line in fd:
1060            raw_line = raw_line.strip()
1061            graph.add_raw_line(raw_line)
1062
1063    trimmers = []
1064    if args.to is not None:
1065        trimmers.append(TargetedTrimmer(
1066            TargetedTrimmer.parse_target_nodes(args.to, graph)))
1067    if args.single_path:
1068        trimmers.append(SinglePathTrimmer())
1069
1070    explorer = BasicExplorer()
1071
1072    visitor = DotDumpVisitor(args.diff, args.dark, args.gray, args.topology,
1073                             args.dump_dot_only)
1074
1075    for trimmer in trimmers:
1076        trimmer.trim(graph)
1077
1078    explorer.explore(graph, visitor)
1079
1080
1081if __name__ == '__main__':
1082    main()
1083