1#!/usr/local/bin/python3.8
2
3from collections import defaultdict
4import io
5import re
6import subprocess
7import sys
8
9# --------------------------------------------------------------------------------------------
10# globals
11# --------------------------------------------------------------------------------------------
12
13definitionSet = set() # set of method_name
14definitionToSourceLocationMap = dict()
15
16# for the "unused methods" analysis
17callDict = defaultdict(set) # map of from_method_name -> set(method_name)
18
19# clang does not always use exactly the same numbers in the type-parameter vars it generates
20# so I need to substitute them to ensure we can match correctly.
21normalizeTypeParamsRegex = re.compile(r"type-parameter-\d+-\d+")
22def normalizeTypeParams( line ):
23    line = normalizeTypeParamsRegex.sub("type-parameter-?-?", line)
24    # make some of the types a little prettier
25    line = line.replace("std::__debug", "std::")
26    line = line.replace("class ", "")
27    line = line.replace("struct ", "")
28    line = line.replace("_Bool", "bool")
29    return line
30
31# --------------------------------------------------------------------------------------------
32# primary input loop
33# --------------------------------------------------------------------------------------------
34
35cnt = 0
36with io.open("workdir/loplugin.methodcycles.log", "rb", buffering=1024*1024) as txt:
37    for line in txt:
38        tokens = line.strip().split("\t")
39        if tokens[0] == "definition:":
40            returnType = tokens[1]
41            nameAndParams = tokens[2]
42            sourceLocation = tokens[3]
43            funcInfo = (normalizeTypeParams(returnType) + " " + normalizeTypeParams(nameAndParams)).strip()
44            definitionSet.add(funcInfo)
45            definitionToSourceLocationMap[funcInfo] = sourceLocation
46        elif tokens[0] == "call:":
47            returnTypeFrom = tokens[1]
48            nameAndParamsFrom = tokens[2]
49            returnTypeTo = tokens[3]
50            nameAndParamsTo = tokens[4]
51            caller = (normalizeTypeParams(returnTypeFrom) + " " + normalizeTypeParams(nameAndParamsFrom)).strip()
52            callee = (normalizeTypeParams(returnTypeTo) + " " + normalizeTypeParams(nameAndParamsTo)).strip()
53            callDict[caller].add(callee)
54        else:
55            print( "unknown line: " + line)
56        cnt = cnt + 1
57        #if cnt > 100000: break
58
59# sort the results using a "natural order" so sequences like [item1,item2,item10] sort nicely
60def natural_sort_key(s, _nsre=re.compile('([0-9]+)')):
61    return [int(text) if text.isdigit() else text.lower()
62            for text in re.split(_nsre, s)]
63# sort by both the source-line and the datatype, so the output file ordering is stable
64# when we have multiple items on the same source line
65def v_sort_key(v):
66    return natural_sort_key(v[1]) + [v[0]]
67def sort_set_by_natural_key(s):
68    return sorted(s, key=lambda v: v_sort_key(v))
69
70
71# --------------------------------------------------------------------------------------------
72#  analysis
73# --------------------------------------------------------------------------------------------
74
75# follow caller-callee chains, removing all methods reachable from a root method
76def remove_reachable(callDict, startCaller):
77    worklist = list()
78    worklist.append(startCaller)
79    while len(worklist) > 0:
80        caller = worklist.pop()
81        if not caller in callDict:
82            continue
83        calleeSet = callDict[caller]
84        del callDict[caller]
85        if caller in definitionSet:
86            definitionSet.remove(caller)
87        for c in calleeSet:
88            worklist.append(c)
89
90# look for all the external entry points and remove code called from there
91to_be_removed = set()
92to_be_removed.add("int main(int,char **)")
93# random dynload entrypoints that we don't otherwise find
94to_be_removed.add("bool TestImportOLE2(SvStream &)")
95to_be_removed.add("void SbiRuntime::StepREDIMP()")
96to_be_removed.add("_object * (anonymous namespace)::createUnoStructHelper(_object *,_object *,_object *)");
97for caller in definitionSet:
98    if not caller in definitionToSourceLocationMap:
99        to_be_removed.append(caller)
100        continue
101    location = definitionToSourceLocationMap[caller]
102    if "include/com/" in location \
103      or "include/cppu/" in location \
104      or "include/cppuhelper/" in location \
105      or "include/osl/" in location \
106      or "include/rtl/" in location \
107      or "include/sal/" in location \
108      or "include/salhelper/" in location \
109      or "include/typelib/" in location \
110      or "include/uno/" in location \
111      or "workdir/UnpackedTarball/" in location \
112      or "workdir/UnoApiHeadersTarget/" in location \
113      or "workdir/CustomTarget/officecfg/" in location \
114      or "workdir/LexTarget/" in location \
115      or "workdir/CustomTarget/i18npool/localedata/" in location \
116      or "workdir/SdiTarget/" in location \
117      or "/qa/" in location \
118      or "include/test/" in location:
119        to_be_removed.add(caller)
120    # TODO calls to destructors are not mentioned in the AST, so we'll just have to assume they get called,
121    # which is not ideal
122    if "::~" in caller:
123        to_be_removed.add(caller)
124    # dyload entry points for VCL builder
125    if "(VclPtr<vcl::Window> & rRet, const VclPtr<vcl::Window> & pParent, VclBuilder::stringmap & rMap)" in caller:
126        to_be_removed.add(caller)
127    if "(VclPtr<vcl::Window> &,const VclPtr<vcl::Window> &,std::::map<rtl::OString, rtl::OUString, std::less<rtl::OString>, std::allocator<std::pair<const rtl::OString, rtl::OUString> > > &)" in caller:
128        to_be_removed.add(caller)
129# find all the UNO load-by-symbol-name entrypoints
130uno_constructor_entrypoints = set()
131git_grep_process = subprocess.Popen("git grep -h 'constructor=' -- *.component", stdout=subprocess.PIPE, shell=True)
132with git_grep_process.stdout as txt:
133    for line in txt:
134        idx1 = line.find("\"")
135        idx2 = line.find("\"", idx1 + 1)
136        func = line[idx1+1 : idx2]
137        uno_constructor_entrypoints.add(func)
138for caller in callDict:
139    if "(com::sun::star::uno::XComponentContext *,const com::sun::star::uno::Sequence<com::sun::star::uno::Any> &)" in caller:
140        for func in uno_constructor_entrypoints:
141            if func in caller:
142                to_be_removed.add(caller)
143# remove everything reachable from the found entry points
144for caller in to_be_removed:
145    remove_reachable(callDict, caller)
146for caller in callDict:
147    callDict[caller] -= to_be_removed
148
149# create a reverse call graph
150inverseCallDict = defaultdict(set) # map of from_method_name -> set(method_name)
151for caller in callDict:
152    for callee in callDict[caller]:
153        inverseCallDict[callee].add(caller)
154
155print_tree_recurse_set = set() # protect against cycles
156def print_tree(f, callDict, caller, depth):
157    if depth == 0:
158        f.write("\n") # add an empty line before each tree
159        print_tree_recurse_set.clear()
160    # protect against cycles
161    if caller in print_tree_recurse_set:
162        return
163    # when printing out trees, things that are not in the map are things that are reachable,
164    # so we're not interested in them
165    if not caller in callDict:
166        return
167    print_tree_recurse_set.add(caller)
168    f.write("  " * depth + caller + "\n")
169    f.write("  " * depth + definitionToSourceLocationMap[caller] + "\n")
170    calleeSet = callDict[caller]
171    for c in calleeSet:
172        print_tree(f, callDict, c, depth+1)
173
174# find possible roots (ie. entrypoints) by looking for methods that are not called
175def dump_possible_roots():
176    possibleRootList = list()
177    for caller in callDict:
178        if not caller in inverseCallDict and caller in definitionToSourceLocationMap:
179            possibleRootList.append(caller)
180    possibleRootList.sort()
181
182    # print out first 100 trees of caller->callees
183    count = 0
184    with open("compilerplugins/clang/methodcycles.roots", "wt") as f:
185        f.write("callDict size " + str(len(callDict)) + "\n")
186        f.write("possibleRootList size " + str(len(possibleRootList)) + "\n")
187        f.write("\n")
188        for caller in possibleRootList:
189            f.write(caller + "\n")
190            f.write("  " + definitionToSourceLocationMap[caller] + "\n")
191            #print_tree(f, caller, 0)
192            count = count + 1
193            #if count>1000: break
194
195# Look for cycles in a directed graph
196# Adapted from:
197# https://codereview.stackexchange.com/questions/86021/check-if-a-directed-graph-contains-a-cycle
198def print_cycles():
199    with open("compilerplugins/clang/methodcycles.results", "wt") as f:
200        path = set()
201        visited = set()
202
203        def printPath(path):
204            if len(path) < 2:
205                return
206            # we may have found a cycle, but if the cycle is called from outside the cycle
207            # the code is still in use.
208            for p in path:
209                for caller in inverseCallDict[p]:
210                    if not caller in path:
211                        return
212            f.write("found cycle\n")
213            for p in path:
214                f.write("    " + p + "\n")
215                f.write("    " + definitionToSourceLocationMap[p] + "\n")
216                f.write("\n")
217
218        def checkCyclic(vertex):
219            if vertex in visited:
220                return
221            visited.add(vertex)
222            path.add(vertex)
223            if vertex in callDict:
224                for neighbour in callDict[vertex]:
225                    if neighbour in path:
226                        printPath(path)
227                        break
228                    else:
229                        checkCyclic(neighbour)
230            path.remove(vertex)
231
232        for caller in callDict:
233            checkCyclic(caller)
234
235print_cycles()
236
237# print partitioned sub-graphs
238def print_partitions():
239    callDict2 = callDict
240    # Remove anything with no callees, and that is itself not called.
241    # After this stage, we should only be left with closed sub-graphs ie. partitions
242    while True:
243        to_be_removed.clear()
244        for caller in callDict2:
245            if len(callDict2[caller]) == 0 \
246                or not caller in inverseCallDict[caller]:
247                to_be_removed.add(caller)
248        if len(to_be_removed) == 0:
249            break
250        for caller in to_be_removed:
251            remove_reachable(callDict2, caller)
252        for caller in callDict2:
253            callDict2[caller] -= to_be_removed
254
255    count = 0
256    with open("compilerplugins/clang/methodcycles.partition.results", "wt") as f:
257        f.write("callDict size " + str(len(callDict2)) + "\n")
258        f.write("\n")
259        while len(callDict2) > 0:
260            print_tree(f, callDict2, next(iter(callDict2)), 0)
261            for c in print_tree_recurse_set:
262                callDict2.pop(c, None)
263            count = count + 1
264            if count>1000: break
265
266print_partitions()