1#!/usr/local/bin/python3.8 2# 3# Find exported symbols that can be made non-exported. 4# 5# Noting that (a) parsing these commands is a pain, the output is quite irregular and (b) I'm fumbling in the 6# dark here, trying to guess what exactly constitutes an "import" vs an "export" of a symbol, linux linking 7# is rather complex. 8# 9# Takes about 5min to run on a decent machine. 10# 11# The standalone function analysis is reasonable reliable, but the class/method analysis is less so 12# (something to do with destructor thunks not showing up in my results?) 13# 14# Also, the class/method analysis will not catch problems like 15# 'dynamic_cast from 'Foo' with hidden type visibility to 'Bar' with default type visibility' 16# but loplugin:dyncastvisibility will do that for you 17# 18 19import subprocess 20import sys 21import re 22 23exported_symbols = set() 24imported_symbols = set() 25 26subprocess_find = subprocess.Popen("find ./instdir -name *.so && find ./workdir/LinkTarget/CppunitTest -name *.so", stdout=subprocess.PIPE, shell=True) 27with subprocess_find.stdout as txt: 28 for line in txt: 29 sharedlib = line.strip() 30 # look for exported symbols 31 subprocess_nm = subprocess.Popen("nm -D " + sharedlib, stdout=subprocess.PIPE, shell=True) 32 with subprocess_nm.stdout as txt2: 33 # We are looking for lines something like: 34 # 0000000000036ed0 T flash_component_getFactory 35 line_regex = re.compile(r'^[0-9a-fA-F]+ T ') 36 for line2 in txt2: 37 line2 = line2.strip() 38 if line_regex.match(line2): 39 exported_symbols.add(line2.split(" ")[2]) 40 # look for imported symbols 41 subprocess_objdump = subprocess.Popen("objdump -T " + sharedlib, stdout=subprocess.PIPE, shell=True) 42 with subprocess_objdump.stdout as txt2: 43 # ignore some header bumpf 44 txt2.readline() 45 txt2.readline() 46 txt2.readline() 47 txt2.readline() 48 # We are looking for lines something like: 49 # 0000000000000000 DF *UND* 0000000000000000 _ZN16FilterConfigItem10WriteInt32ERKN3rtl8OUStringEi 50 for line2 in txt2: 51 line2 = line2.strip() 52 tokens = line2.split(" ") 53 if len(tokens) < 7 or not(tokens[7].startswith("*UND*")): continue 54 sym = tokens[len(tokens)-1] 55 imported_symbols.add(sym) 56subprocess_find.terminate() 57 58# look for imported symbols in executables 59subprocess_find = subprocess.Popen("find ./instdir -name *.bin", stdout=subprocess.PIPE, shell=True) 60with subprocess_find.stdout as txt: 61 for line in txt: 62 executable = line.strip() 63 # look for exported symbols 64 subprocess_nm = subprocess.Popen("nm -D " + executable + " | grep -w U", stdout=subprocess.PIPE, shell=True) 65 with subprocess_nm.stdout as txt2: 66 # We are looking for lines something like: 67 # U sal_detail_deinitialize 68 for line2 in txt2: 69 line2 = line2.strip() 70 sym = line2.split(" ")[1] 71 imported_symbols.add(sym) 72subprocess_find.terminate() 73 74diff = exported_symbols - imported_symbols 75print("exported = " + str(len(exported_symbols))) 76print("imported = " + str(len(imported_symbols))) 77print("diff = " + str(len(diff))) 78 79# standalone functions that are exported but not imported 80unused_function_exports = set() 81classes_with_exported_symbols = set() 82classes_with_imported_symbols = set() 83 84for sym in exported_symbols: 85 filtered_sym = subprocess.check_output(["c++filt", sym]).strip() 86 if filtered_sym.startswith("non-virtual thunk to "): filtered_sym = filtered_sym[21:] 87 elif filtered_sym.startswith("virtual thunk to "): filtered_sym = filtered_sym[17:] 88 i = filtered_sym.find("(") 89 i = filtered_sym.rfind("::", 0, i) 90 if i != -1: 91 classname = filtered_sym[:i] 92 # find classes where all of the exported symbols are not imported 93 classes_with_exported_symbols.add(classname) 94 else: 95 func = filtered_sym 96 # find standalone functions which are exported but not imported 97 if not(sym in imported_symbols): unused_function_exports.add(func) 98 99for sym in imported_symbols: 100 filtered_sym = subprocess.check_output(["c++filt", sym]).strip() 101 if filtered_sym.startswith("non-virtual thunk to "): filtered_sym = filtered_sym[21:] 102 elif filtered_sym.startswith("virtual thunk to "): filtered_sym = filtered_sym[17:] 103 i = filtered_sym.find("(") 104 i = filtered_sym.rfind("::", 0, i) 105 if i != -1: 106 classname = filtered_sym[:i] 107 classes_with_imported_symbols.add(classname) 108 109with open("bin/find-can-be-private-symbols.functions.results", "wt") as f: 110 for sym in sorted(unused_function_exports): 111 # Filter out most of the noise. 112 # No idea where these are coming from, but not our code. 113 if sym.startswith("CERT_"): continue 114 elif sym.startswith("DER_"): continue 115 elif sym.startswith("FORM_"): continue 116 elif sym.startswith("FPDF"): continue 117 elif sym.startswith("HASH_"): continue 118 elif sym.startswith("Hunspell_"): continue 119 elif sym.startswith("LL_"): continue 120 elif sym.startswith("LP_"): continue 121 elif sym.startswith("LU"): continue 122 elif sym.startswith("MIP"): continue 123 elif sym.startswith("MPS"): continue 124 elif sym.startswith("NSS"): continue 125 elif sym.startswith("NSC_"): continue 126 elif sym.startswith("PK11"): continue 127 elif sym.startswith("PL_"): continue 128 elif sym.startswith("PQ"): continue 129 elif sym.startswith("PBE_"): continue 130 elif sym.startswith("PORT_"): continue 131 elif sym.startswith("PRP_"): continue 132 elif sym.startswith("PR_"): continue 133 elif sym.startswith("PT_"): continue 134 elif sym.startswith("QS_"): continue 135 elif sym.startswith("REPORT_"): continue 136 elif sym.startswith("RSA_"): continue 137 elif sym.startswith("SEC"): continue 138 elif sym.startswith("SGN"): continue 139 elif sym.startswith("SOS"): continue 140 elif sym.startswith("SSL_"): continue 141 elif sym.startswith("VFY_"): continue 142 elif sym.startswith("_PR_"): continue 143 elif sym.startswith("_"): continue 144 elif sym.startswith("ber_"): continue 145 elif sym.startswith("bfp_"): continue 146 elif sym.startswith("ldap_"): continue 147 elif sym.startswith("ne_"): continue 148 elif sym.startswith("opj_"): continue 149 elif sym.startswith("pg_"): continue 150 elif sym.startswith("pq"): continue 151 elif sym.startswith("presolve_"): continue 152 elif sym.startswith("sqlite3_"): continue 153 # dynamically loaded 154 elif sym.endswith("get_implementation"): continue 155 elif sym.endswith("component_getFactory"): continue 156 elif sym == "CreateDialogFactory": continue 157 elif sym == "CreateUnoWrapper": continue 158 elif sym == "CreateWindow": continue 159 elif sym == "ExportDOC": continue 160 elif sym == "ExportPPT": continue 161 elif sym == "ExportRTF": continue 162 elif sym == "GetSaveWarningOfMSVBAStorage_ww8": continue 163 elif sym == "GetSpecialCharsForEdit": continue 164 elif sym.startswith("Import"): continue 165 elif sym.startswith("Java_com_sun_star_"): continue 166 elif sym.startswith("TestImport"): continue 167 elif sym.startswith("getAllCalendars_"): continue 168 elif sym.startswith("getAllCurrencies_"): continue 169 elif sym.startswith("getAllFormats"): continue 170 elif sym.startswith("getBreakIteratorRules_"): continue 171 elif sym.startswith("getCollationOptions_"): continue 172 elif sym.startswith("getCollatorImplementation_"): continue 173 elif sym.startswith("getContinuousNumberingLevels_"): continue 174 elif sym.startswith("getDateAcceptancePatterns_"): continue 175 elif sym.startswith("getForbiddenCharacters_"): continue 176 elif sym.startswith("getIndexAlgorithm_"): continue 177 elif sym.startswith("getLCInfo_"): continue 178 elif sym.startswith("getLocaleItem_"): continue 179 elif sym.startswith("getOutlineNumberingLevels_"): continue 180 elif sym.startswith("getReservedWords_"): continue 181 elif sym.startswith("getSTC_"): continue 182 elif sym.startswith("getSearchOptions_"): continue 183 elif sym.startswith("getTransliterations_"): continue 184 elif sym.startswith("getUnicodeScripts_"): continue 185 elif sym.startswith("lok_"): continue 186 # UDK API 187 elif sym.startswith("osl_"): continue 188 elif sym.startswith("rtl_"): continue 189 elif sym.startswith("typelib_"): continue 190 elif sym.startswith("typereg_"): continue 191 elif sym.startswith("uno_"): continue 192 f.write(sym + "\n") 193 194with open("bin/find-can-be-private-symbols.classes.results", "wt") as f: 195 for sym in sorted(classes_with_exported_symbols - classes_with_imported_symbols): 196 # externals 197 if sym.startswith("libcdr"): continue 198 elif sym.startswith("libabw"): continue 199 elif sym.startswith("libebook"): continue 200 elif sym.startswith("libepubgen"): continue 201 elif sym.startswith("libfreehand"): continue 202 elif sym.startswith("libmspub"): continue 203 elif sym.startswith("libpagemaker"): continue 204 elif sym.startswith("libqxp"): continue 205 elif sym.startswith("libvisio"): continue 206 elif sym.startswith("libzmf"): continue 207 elif sym.startswith("lucene::"): continue 208 f.write(sym + "\n") 209