1#!/usr/local/bin/python3.8 2 3from collections import defaultdict 4import io 5import re 6import subprocess 7import sys 8 9# -------------------------------------------------------------------------------------------- 10# globals 11# -------------------------------------------------------------------------------------------- 12 13definitionSet = set() # set of method_name 14definitionToSourceLocationMap = dict() 15 16# for the "unused methods" analysis 17callDict = defaultdict(set) # map of from_method_name -> set(method_name) 18 19# clang does not always use exactly the same numbers in the type-parameter vars it generates 20# so I need to substitute them to ensure we can match correctly. 21normalizeTypeParamsRegex = re.compile(r"type-parameter-\d+-\d+") 22def normalizeTypeParams( line ): 23 line = normalizeTypeParamsRegex.sub("type-parameter-?-?", line) 24 # make some of the types a little prettier 25 line = line.replace("std::__debug", "std::") 26 line = line.replace("class ", "") 27 line = line.replace("struct ", "") 28 line = line.replace("_Bool", "bool") 29 return line 30 31# -------------------------------------------------------------------------------------------- 32# primary input loop 33# -------------------------------------------------------------------------------------------- 34 35cnt = 0 36with io.open("workdir/loplugin.methodcycles.log", "rb", buffering=1024*1024) as txt: 37 for line in txt: 38 tokens = line.strip().split("\t") 39 if tokens[0] == "definition:": 40 returnType = tokens[1] 41 nameAndParams = tokens[2] 42 sourceLocation = tokens[3] 43 funcInfo = (normalizeTypeParams(returnType) + " " + normalizeTypeParams(nameAndParams)).strip() 44 definitionSet.add(funcInfo) 45 definitionToSourceLocationMap[funcInfo] = sourceLocation 46 elif tokens[0] == "call:": 47 returnTypeFrom = tokens[1] 48 nameAndParamsFrom = tokens[2] 49 returnTypeTo = tokens[3] 50 nameAndParamsTo = tokens[4] 51 caller = (normalizeTypeParams(returnTypeFrom) + " " + normalizeTypeParams(nameAndParamsFrom)).strip() 52 callee = (normalizeTypeParams(returnTypeTo) + " " + normalizeTypeParams(nameAndParamsTo)).strip() 53 callDict[caller].add(callee) 54 else: 55 print( "unknown line: " + line) 56 cnt = cnt + 1 57 #if cnt > 100000: break 58 59# sort the results using a "natural order" so sequences like [item1,item2,item10] sort nicely 60def natural_sort_key(s, _nsre=re.compile('([0-9]+)')): 61 return [int(text) if text.isdigit() else text.lower() 62 for text in re.split(_nsre, s)] 63# sort by both the source-line and the datatype, so the output file ordering is stable 64# when we have multiple items on the same source line 65def v_sort_key(v): 66 return natural_sort_key(v[1]) + [v[0]] 67def sort_set_by_natural_key(s): 68 return sorted(s, key=lambda v: v_sort_key(v)) 69 70 71# -------------------------------------------------------------------------------------------- 72# analysis 73# -------------------------------------------------------------------------------------------- 74 75# follow caller-callee chains, removing all methods reachable from a root method 76def remove_reachable(callDict, startCaller): 77 worklist = list() 78 worklist.append(startCaller) 79 while len(worklist) > 0: 80 caller = worklist.pop() 81 if not caller in callDict: 82 continue 83 calleeSet = callDict[caller] 84 del callDict[caller] 85 if caller in definitionSet: 86 definitionSet.remove(caller) 87 for c in calleeSet: 88 worklist.append(c) 89 90# look for all the external entry points and remove code called from there 91to_be_removed = set() 92to_be_removed.add("int main(int,char **)") 93# random dynload entrypoints that we don't otherwise find 94to_be_removed.add("bool TestImportOLE2(SvStream &)") 95to_be_removed.add("void SbiRuntime::StepREDIMP()") 96to_be_removed.add("_object * (anonymous namespace)::createUnoStructHelper(_object *,_object *,_object *)"); 97for caller in definitionSet: 98 if not caller in definitionToSourceLocationMap: 99 to_be_removed.append(caller) 100 continue 101 location = definitionToSourceLocationMap[caller] 102 if "include/com/" in location \ 103 or "include/cppu/" in location \ 104 or "include/cppuhelper/" in location \ 105 or "include/osl/" in location \ 106 or "include/rtl/" in location \ 107 or "include/sal/" in location \ 108 or "include/salhelper/" in location \ 109 or "include/typelib/" in location \ 110 or "include/uno/" in location \ 111 or "workdir/UnpackedTarball/" in location \ 112 or "workdir/UnoApiHeadersTarget/" in location \ 113 or "workdir/CustomTarget/officecfg/" in location \ 114 or "workdir/LexTarget/" in location \ 115 or "workdir/CustomTarget/i18npool/localedata/" in location \ 116 or "workdir/SdiTarget/" in location \ 117 or "/qa/" in location \ 118 or "include/test/" in location: 119 to_be_removed.add(caller) 120 # TODO calls to destructors are not mentioned in the AST, so we'll just have to assume they get called, 121 # which is not ideal 122 if "::~" in caller: 123 to_be_removed.add(caller) 124 # dyload entry points for VCL builder 125 if "(VclPtr<vcl::Window> & rRet, const VclPtr<vcl::Window> & pParent, VclBuilder::stringmap & rMap)" in caller: 126 to_be_removed.add(caller) 127 if "(VclPtr<vcl::Window> &,const VclPtr<vcl::Window> &,std::::map<rtl::OString, rtl::OUString, std::less<rtl::OString>, std::allocator<std::pair<const rtl::OString, rtl::OUString> > > &)" in caller: 128 to_be_removed.add(caller) 129# find all the UNO load-by-symbol-name entrypoints 130uno_constructor_entrypoints = set() 131git_grep_process = subprocess.Popen("git grep -h 'constructor=' -- *.component", stdout=subprocess.PIPE, shell=True) 132with git_grep_process.stdout as txt: 133 for line in txt: 134 idx1 = line.find("\"") 135 idx2 = line.find("\"", idx1 + 1) 136 func = line[idx1+1 : idx2] 137 uno_constructor_entrypoints.add(func) 138for caller in callDict: 139 if "(com::sun::star::uno::XComponentContext *,const com::sun::star::uno::Sequence<com::sun::star::uno::Any> &)" in caller: 140 for func in uno_constructor_entrypoints: 141 if func in caller: 142 to_be_removed.add(caller) 143# remove everything reachable from the found entry points 144for caller in to_be_removed: 145 remove_reachable(callDict, caller) 146for caller in callDict: 147 callDict[caller] -= to_be_removed 148 149# create a reverse call graph 150inverseCallDict = defaultdict(set) # map of from_method_name -> set(method_name) 151for caller in callDict: 152 for callee in callDict[caller]: 153 inverseCallDict[callee].add(caller) 154 155print_tree_recurse_set = set() # protect against cycles 156def print_tree(f, callDict, caller, depth): 157 if depth == 0: 158 f.write("\n") # add an empty line before each tree 159 print_tree_recurse_set.clear() 160 # protect against cycles 161 if caller in print_tree_recurse_set: 162 return 163 # when printing out trees, things that are not in the map are things that are reachable, 164 # so we're not interested in them 165 if not caller in callDict: 166 return 167 print_tree_recurse_set.add(caller) 168 f.write(" " * depth + caller + "\n") 169 f.write(" " * depth + definitionToSourceLocationMap[caller] + "\n") 170 calleeSet = callDict[caller] 171 for c in calleeSet: 172 print_tree(f, callDict, c, depth+1) 173 174# find possible roots (ie. entrypoints) by looking for methods that are not called 175def dump_possible_roots(): 176 possibleRootList = list() 177 for caller in callDict: 178 if not caller in inverseCallDict and caller in definitionToSourceLocationMap: 179 possibleRootList.append(caller) 180 possibleRootList.sort() 181 182 # print out first 100 trees of caller->callees 183 count = 0 184 with open("compilerplugins/clang/methodcycles.roots", "wt") as f: 185 f.write("callDict size " + str(len(callDict)) + "\n") 186 f.write("possibleRootList size " + str(len(possibleRootList)) + "\n") 187 f.write("\n") 188 for caller in possibleRootList: 189 f.write(caller + "\n") 190 f.write(" " + definitionToSourceLocationMap[caller] + "\n") 191 #print_tree(f, caller, 0) 192 count = count + 1 193 #if count>1000: break 194 195# Look for cycles in a directed graph 196# Adapted from: 197# https://codereview.stackexchange.com/questions/86021/check-if-a-directed-graph-contains-a-cycle 198def print_cycles(): 199 with open("compilerplugins/clang/methodcycles.results", "wt") as f: 200 path = set() 201 visited = set() 202 203 def printPath(path): 204 if len(path) < 2: 205 return 206 # we may have found a cycle, but if the cycle is called from outside the cycle 207 # the code is still in use. 208 for p in path: 209 for caller in inverseCallDict[p]: 210 if not caller in path: 211 return 212 f.write("found cycle\n") 213 for p in path: 214 f.write(" " + p + "\n") 215 f.write(" " + definitionToSourceLocationMap[p] + "\n") 216 f.write("\n") 217 218 def checkCyclic(vertex): 219 if vertex in visited: 220 return 221 visited.add(vertex) 222 path.add(vertex) 223 if vertex in callDict: 224 for neighbour in callDict[vertex]: 225 if neighbour in path: 226 printPath(path) 227 break 228 else: 229 checkCyclic(neighbour) 230 path.remove(vertex) 231 232 for caller in callDict: 233 checkCyclic(caller) 234 235print_cycles() 236 237# print partitioned sub-graphs 238def print_partitions(): 239 callDict2 = callDict 240 # Remove anything with no callees, and that is itself not called. 241 # After this stage, we should only be left with closed sub-graphs ie. partitions 242 while True: 243 to_be_removed.clear() 244 for caller in callDict2: 245 if len(callDict2[caller]) == 0 \ 246 or not caller in inverseCallDict[caller]: 247 to_be_removed.add(caller) 248 if len(to_be_removed) == 0: 249 break 250 for caller in to_be_removed: 251 remove_reachable(callDict2, caller) 252 for caller in callDict2: 253 callDict2[caller] -= to_be_removed 254 255 count = 0 256 with open("compilerplugins/clang/methodcycles.partition.results", "wt") as f: 257 f.write("callDict size " + str(len(callDict2)) + "\n") 258 f.write("\n") 259 while len(callDict2) > 0: 260 print_tree(f, callDict2, next(iter(callDict2)), 0) 261 for c in print_tree_recurse_set: 262 callDict2.pop(c, None) 263 count = count + 1 264 if count>1000: break 265 266print_partitions()