1#!/usr/bin/env python
2
3import argparse
4import itertools
5import os
6import re
7import sys
8from collections import defaultdict
9
10from use_lldb_suite import lldb_root
11
12parser = argparse.ArgumentParser(
13    description='Analyze LLDB project #include dependencies.')
14parser.add_argument('--show-counts', default=False, action='store_true',
15    help='When true, show the number of dependencies from each subproject')
16parser.add_argument('--discover-cycles', default=False, action='store_true',
17    help='When true, find and display all project dependency cycles.  Note,'
18         'this option is very slow')
19
20args = parser.parse_args()
21
22src_dir = os.path.join(lldb_root, "source")
23inc_dir = os.path.join(lldb_root, "include")
24
25src_map = {}
26
27include_regex = re.compile('#include \"((lldb|Plugins|clang)(.*/)+).*\"')
28
29def is_sublist(small, big):
30    it = iter(big)
31    return all(c in it for c in small)
32
33def normalize_host(str):
34    if str.startswith("lldb/Host"):
35        return "lldb/Host"
36    if str.startswith("Plugins"):
37        return "lldb/" + str
38    if str.startswith("lldb/../../source"):
39        return str.replace("lldb/../../source", "lldb")
40    return str
41
42def scan_deps(this_dir, file):
43    global src_map
44    deps = {}
45    this_dir = normalize_host(this_dir)
46    if this_dir in src_map:
47        deps = src_map[this_dir]
48
49    with open(file) as f:
50        for line in list(f):
51            m = include_regex.match(line)
52            if m is None:
53                continue
54            relative = m.groups()[0].rstrip("/")
55            if relative == this_dir:
56                continue
57            relative = normalize_host(relative)
58            if relative in deps:
59                deps[relative] += 1
60            elif relative != this_dir:
61                deps[relative] = 1
62    if this_dir not in src_map and len(deps) > 0:
63        src_map[this_dir] = deps
64
65for (base, dirs, files) in os.walk(inc_dir):
66    dir = os.path.basename(base)
67    relative = os.path.relpath(base, inc_dir)
68    inc_files = [x for x in files if os.path.splitext(x)[1] in [".h"]]
69    relative = relative.replace("\\", "/")
70    for inc in inc_files:
71        inc_path = os.path.join(base, inc)
72        scan_deps(relative, inc_path)
73
74for (base, dirs, files) in os.walk(src_dir):
75    dir = os.path.basename(base)
76    relative = os.path.relpath(base, src_dir)
77    src_files = [x for x in files if os.path.splitext(x)[1] in [".cpp", ".h", ".mm"]]
78    norm_base_path = os.path.normpath(os.path.join("lldb", relative))
79    norm_base_path = norm_base_path.replace("\\", "/")
80    for src in src_files:
81        src_path = os.path.join(base, src)
82        scan_deps(norm_base_path, src_path)
83    pass
84
85def is_existing_cycle(path, cycles):
86    # If we have a cycle like # A -> B -> C (with an implicit -> A at the end)
87    # then we don't just want to check for an occurrence of A -> B -> C in the
88    # list of known cycles, but every possible rotation of A -> B -> C.  For
89    # example, if we previously encountered B -> C -> A (with an implicit -> B
90    # at the end), then A -> B -> C is also a cycle.  This is an important
91    # optimization which reduces the search space by multiple orders of
92    # magnitude.
93    for i in range(0,len(path)):
94        if any(is_sublist(x, path) for x in cycles):
95            return True
96        path = [path[-1]] + path[0:-1]
97    return False
98
99def expand(path_queue, path_lengths, cycles, src_map):
100    # We do a breadth first search, to make sure we visit all paths in order
101    # of ascending length.  This is an important optimization to make sure that
102    # short cycles are discovered first, which will allow us to discard longer
103    # cycles which grow the search space exponentially the longer they get.
104    while len(path_queue) > 0:
105        cur_path = path_queue.pop(0)
106        if is_existing_cycle(cur_path, cycles):
107            continue
108
109        next_len = path_lengths.pop(0) + 1
110        last_component = cur_path[-1]
111
112        for item in src_map.get(last_component, []):
113            if item.startswith("clang"):
114                continue
115
116            if item in cur_path:
117                # This is a cycle.  Minimize it and then check if the result is
118                # already in the list of cycles.  Insert it (or not) and then
119                # exit.
120                new_index = cur_path.index(item)
121                cycle = cur_path[new_index:]
122                if not is_existing_cycle(cycle, cycles):
123                    cycles.append(cycle)
124                continue
125
126            path_lengths.append(next_len)
127            path_queue.append(cur_path + [item])
128    pass
129
130cycles = []
131
132path_queue = [[x] for x in iter(src_map)]
133path_lens = [1] * len(path_queue)
134
135items = list(src_map.items())
136items.sort(key = lambda A : A[0])
137
138for (path, deps) in items:
139    print(path + ":")
140    sorted_deps = list(deps.items())
141    if args.show_counts:
142        sorted_deps.sort(key = lambda A: (A[1], A[0]))
143        for dep in sorted_deps:
144            print("\t{} [{}]".format(dep[0], dep[1]))
145    else:
146        sorted_deps.sort(key = lambda A: A[0])
147        for dep in sorted_deps:
148            print("\t{}".format(dep[0]))
149
150def iter_cycles(cycles):
151    global src_map
152    for cycle in cycles:
153        cycle.append(cycle[0])
154        zipper = list(zip(cycle[0:-1], cycle[1:]))
155        result = [(x, src_map[x][y], y) for (x,y) in zipper]
156        total = 0
157        smallest = result[0][1]
158        for (first, value, last) in result:
159            total += value
160            smallest = min(smallest, value)
161        yield (total, smallest, result)
162
163if args.discover_cycles:
164    print("Analyzing cycles...")
165
166    expand(path_queue, path_lens, cycles, src_map)
167
168    average = sum([len(x)+1 for x in cycles]) / len(cycles)
169
170    print("Found {} cycles.  Average cycle length = {}.".format(len(cycles), average))
171    counted = list(iter_cycles(cycles))
172    if args.show_counts:
173        counted.sort(key = lambda A: A[0])
174        for (total, smallest, cycle) in counted:
175            sys.stdout.write("{} deps to break: ".format(total))
176            sys.stdout.write(cycle[0][0])
177            for (first, count, last) in cycle:
178                sys.stdout.write(" [{}->] {}".format(count, last))
179            sys.stdout.write("\n")
180    else:
181        for cycle in cycles:
182            cycle.append(cycle[0])
183            print(" -> ".join(cycle))
184
185    print("Analyzing islands...")
186    islands = []
187    outgoing_counts = defaultdict(int)
188    incoming_counts = defaultdict(int)
189    for (total, smallest, cycle) in counted:
190        for (first, count, last) in cycle:
191            outgoing_counts[first] += count
192            incoming_counts[last] += count
193    for cycle in cycles:
194        this_cycle = set(cycle)
195        disjoints = [x for x in islands if this_cycle.isdisjoint(x)]
196        overlaps = [x for x in islands if not this_cycle.isdisjoint(x)]
197        islands = disjoints + [set.union(this_cycle, *overlaps)]
198    print("Found {} disjoint cycle islands...".format(len(islands)))
199    for island in islands:
200        print("Island ({} elements)".format(len(island)))
201        sorted = []
202        for node in island:
203            sorted.append((node, incoming_counts[node], outgoing_counts[node]))
204        sorted.sort(key = lambda x: x[1]+x[2])
205        for (node, inc, outg) in sorted:
206            print("  {} [{} in, {} out]".format(node, inc, outg))
207    sys.stdout.flush()
208pass
209