1#!/usr/local/bin/python3.8
2#
3# Find exported symbols that can be made non-exported.
4#
5# Noting that (a) parsing these commands is a pain, the output is quite irregular and (b) I'm fumbling in the
6# dark here, trying to guess what exactly constitutes an "import" vs an "export" of a symbol, linux linking
7# is rather complex.
8#
9# Takes about 5min to run on a decent machine.
10#
11# The standalone function analysis is reasonable reliable, but the class/method analysis is less so
12#   (something to do with destructor thunks not showing up in my results?)
13#
14# Also, the class/method analysis will not catch problems like
15#    'dynamic_cast from 'Foo' with hidden type visibility to 'Bar' with default type visibility'
16#    but loplugin:dyncastvisibility will do that for you
17#
18
19import subprocess
20import sys
21import re
22
23exported_symbols = set()
24imported_symbols = set()
25
26subprocess_find = subprocess.Popen("find ./instdir -name *.so && find ./workdir/LinkTarget/CppunitTest -name *.so", stdout=subprocess.PIPE, shell=True)
27with subprocess_find.stdout as txt:
28    for line in txt:
29        sharedlib = line.strip()
30        # look for exported symbols
31        subprocess_nm = subprocess.Popen("nm -D " + sharedlib, stdout=subprocess.PIPE, shell=True)
32        with subprocess_nm.stdout as txt2:
33            # We are looking for lines something like:
34            # 0000000000036ed0 T flash_component_getFactory
35            line_regex = re.compile(r'^[0-9a-fA-F]+ T ')
36            for line2 in txt2:
37                line2 = line2.strip()
38                if line_regex.match(line2):
39                    exported_symbols.add(line2.split(" ")[2])
40        # look for imported symbols
41        subprocess_objdump = subprocess.Popen("objdump -T " + sharedlib, stdout=subprocess.PIPE, shell=True)
42        with subprocess_objdump.stdout as txt2:
43            # ignore some header bumpf
44            txt2.readline()
45            txt2.readline()
46            txt2.readline()
47            txt2.readline()
48            # We are looking for lines something like:
49            # 0000000000000000      DF *UND*  0000000000000000     _ZN16FilterConfigItem10WriteInt32ERKN3rtl8OUStringEi
50            for line2 in txt2:
51                line2 = line2.strip()
52                tokens = line2.split(" ")
53                if len(tokens) < 7 or not(tokens[7].startswith("*UND*")): continue
54                sym = tokens[len(tokens)-1]
55                imported_symbols.add(sym)
56subprocess_find.terminate()
57
58# look for imported symbols in executables
59subprocess_find = subprocess.Popen("find ./instdir -name *.bin", stdout=subprocess.PIPE, shell=True)
60with subprocess_find.stdout as txt:
61    for line in txt:
62        executable = line.strip()
63        # look for exported symbols
64        subprocess_nm = subprocess.Popen("nm -D " + executable + " | grep -w U", stdout=subprocess.PIPE, shell=True)
65        with subprocess_nm.stdout as txt2:
66            # We are looking for lines something like:
67            # U sal_detail_deinitialize
68            for line2 in txt2:
69                line2 = line2.strip()
70                sym = line2.split(" ")[1]
71                imported_symbols.add(sym)
72subprocess_find.terminate()
73
74diff = exported_symbols - imported_symbols
75print("exported = " + str(len(exported_symbols)))
76print("imported = " + str(len(imported_symbols)))
77print("diff     = " + str(len(diff)))
78
79# standalone functions that are exported but not imported
80unused_function_exports = set()
81classes_with_exported_symbols = set()
82classes_with_imported_symbols = set()
83
84for sym in exported_symbols:
85    filtered_sym = subprocess.check_output(["c++filt", sym]).strip()
86    if filtered_sym.startswith("non-virtual thunk to "): filtered_sym = filtered_sym[21:]
87    elif filtered_sym.startswith("virtual thunk to "): filtered_sym = filtered_sym[17:]
88    i = filtered_sym.find("(")
89    i = filtered_sym.rfind("::", 0, i)
90    if i != -1:
91        classname = filtered_sym[:i]
92        # find classes where all of the exported symbols are not imported
93        classes_with_exported_symbols.add(classname)
94    else:
95        func = filtered_sym
96        # find standalone functions which are exported but not imported
97        if not(sym in imported_symbols): unused_function_exports.add(func)
98
99for sym in imported_symbols:
100    filtered_sym = subprocess.check_output(["c++filt", sym]).strip()
101    if filtered_sym.startswith("non-virtual thunk to "): filtered_sym = filtered_sym[21:]
102    elif filtered_sym.startswith("virtual thunk to "): filtered_sym = filtered_sym[17:]
103    i = filtered_sym.find("(")
104    i = filtered_sym.rfind("::", 0, i)
105    if i != -1:
106        classname = filtered_sym[:i]
107        classes_with_imported_symbols.add(classname)
108
109with open("bin/find-can-be-private-symbols.functions.results", "wt") as f:
110    for sym in sorted(unused_function_exports):
111        # Filter out most of the noise.
112        # No idea where these are coming from, but not our code.
113        if sym.startswith("CERT_"): continue
114        elif sym.startswith("DER_"): continue
115        elif sym.startswith("FORM_"): continue
116        elif sym.startswith("FPDF"): continue
117        elif sym.startswith("HASH_"): continue
118        elif sym.startswith("Hunspell_"): continue
119        elif sym.startswith("LL_"): continue
120        elif sym.startswith("LP_"): continue
121        elif sym.startswith("LU"): continue
122        elif sym.startswith("MIP"): continue
123        elif sym.startswith("MPS"): continue
124        elif sym.startswith("NSS"): continue
125        elif sym.startswith("NSC_"): continue
126        elif sym.startswith("PK11"): continue
127        elif sym.startswith("PL_"): continue
128        elif sym.startswith("PQ"): continue
129        elif sym.startswith("PBE_"): continue
130        elif sym.startswith("PORT_"): continue
131        elif sym.startswith("PRP_"): continue
132        elif sym.startswith("PR_"): continue
133        elif sym.startswith("PT_"): continue
134        elif sym.startswith("QS_"): continue
135        elif sym.startswith("REPORT_"): continue
136        elif sym.startswith("RSA_"): continue
137        elif sym.startswith("SEC"): continue
138        elif sym.startswith("SGN"): continue
139        elif sym.startswith("SOS"): continue
140        elif sym.startswith("SSL_"): continue
141        elif sym.startswith("VFY_"): continue
142        elif sym.startswith("_PR_"): continue
143        elif sym.startswith("_"): continue
144        elif sym.startswith("ber_"): continue
145        elif sym.startswith("bfp_"): continue
146        elif sym.startswith("ldap_"): continue
147        elif sym.startswith("ne_"): continue
148        elif sym.startswith("opj_"): continue
149        elif sym.startswith("pg_"): continue
150        elif sym.startswith("pq"): continue
151        elif sym.startswith("presolve_"): continue
152        elif sym.startswith("sqlite3_"): continue
153        # dynamically loaded
154        elif sym.endswith("get_implementation"): continue
155        elif sym.endswith("component_getFactory"): continue
156        elif sym == "CreateDialogFactory": continue
157        elif sym == "CreateUnoWrapper": continue
158        elif sym == "CreateWindow": continue
159        elif sym == "ExportDOC": continue
160        elif sym == "ExportPPT": continue
161        elif sym == "ExportRTF": continue
162        elif sym == "GetSaveWarningOfMSVBAStorage_ww8": continue
163        elif sym == "GetSpecialCharsForEdit": continue
164        elif sym.startswith("Import"): continue
165        elif sym.startswith("Java_com_sun_star_"): continue
166        elif sym.startswith("TestImport"): continue
167        elif sym.startswith("getAllCalendars_"): continue
168        elif sym.startswith("getAllCurrencies_"): continue
169        elif sym.startswith("getAllFormats"): continue
170        elif sym.startswith("getBreakIteratorRules_"): continue
171        elif sym.startswith("getCollationOptions_"): continue
172        elif sym.startswith("getCollatorImplementation_"): continue
173        elif sym.startswith("getContinuousNumberingLevels_"): continue
174        elif sym.startswith("getDateAcceptancePatterns_"): continue
175        elif sym.startswith("getForbiddenCharacters_"): continue
176        elif sym.startswith("getIndexAlgorithm_"): continue
177        elif sym.startswith("getLCInfo_"): continue
178        elif sym.startswith("getLocaleItem_"): continue
179        elif sym.startswith("getOutlineNumberingLevels_"): continue
180        elif sym.startswith("getReservedWords_"): continue
181        elif sym.startswith("getSTC_"): continue
182        elif sym.startswith("getSearchOptions_"): continue
183        elif sym.startswith("getTransliterations_"): continue
184        elif sym.startswith("getUnicodeScripts_"): continue
185        elif sym.startswith("lok_"): continue
186        # UDK API
187        elif sym.startswith("osl_"): continue
188        elif sym.startswith("rtl_"): continue
189        elif sym.startswith("typelib_"): continue
190        elif sym.startswith("typereg_"): continue
191        elif sym.startswith("uno_"): continue
192        f.write(sym + "\n")
193
194with open("bin/find-can-be-private-symbols.classes.results", "wt") as f:
195    for sym in sorted(classes_with_exported_symbols - classes_with_imported_symbols):
196        # externals
197        if sym.startswith("libcdr"): continue
198        elif sym.startswith("libabw"): continue
199        elif sym.startswith("libebook"): continue
200        elif sym.startswith("libepubgen"): continue
201        elif sym.startswith("libfreehand"): continue
202        elif sym.startswith("libmspub"): continue
203        elif sym.startswith("libpagemaker"): continue
204        elif sym.startswith("libqxp"): continue
205        elif sym.startswith("libvisio"): continue
206        elif sym.startswith("libzmf"): continue
207        elif sym.startswith("lucene::"): continue
208        f.write(sym + "\n")
209