1#!/usr/bin/perl -w
2
3# Check namespace cleanness of a library.
4# Allowed symbols are passed as arguments.
5# They may have trailing * = wildcard.
6# Wildcards may be also specified as *::* (e.g. K*::* for all KDE classes)
7# Symbols are listed as full function unmangled names without arguments,
8# e.g. 'foo bar* nspace::*' allows foo(), foo(int), bar(), barbar()
9# and all symbols in namespace/class nspace.
10# If an argument has comma in it, it's a filename of a file containing
11# allowed symbols, one per line.
12
13
14$thisProg   = "$0";     # This programs name
15
16$library = "";
17$allowed_symbols = "";
18$debug = 0;
19$allowed_weak = "";
20$weak_specified = 0;
21
22while( defined( $ARGV[ 0 ] ))
23{
24    $_ = shift;
25    if( /^--verbose$|^-v$/ )
26    {
27	$debug = 1;
28    }
29    elsif( /^--help$|^-h$/ )
30    {
31        print STDOUT "Usage $thisProg [OPTION] ... library [allowed symbols] ...\n",
32                "\n",
33		"Check if the given library has only allowed public symbols.\n",
34                "\n",
35		"  --allowweak=[symbol] allow only these weak symbols\n",
36                "  -v, --verbose        verbosely list files processed\n",
37                "  -h, --help           print this help, then exit\n";
38        exit 0;
39    }
40    elsif( /^--allowweak=(.*)$/ )
41    {
42	$allowed_weak .= " " . $1;
43	$weak_specified = 1;
44    }
45    elsif( /^--allowweak$/ ) # simply list all weak
46    {
47	$allowed_weak .= " ";
48	$weak_specified = 1;
49    }
50    elsif( /^--*/ )
51    {
52	die "Invalid argument!\n";
53    }
54    else
55    {
56	if( ! $library )
57	{
58	    $library = $_;
59	}
60	else
61	{
62	    $allowed_symbols .= " " . $_;
63	}
64    }
65}
66
67if( ! $weak_specified )
68{
69    $allowed_weak = "*";
70    # allow all weak symbols by default
71    # instances of templates and similar stuff - unfortunately includes also things from other libraries,
72    # so it cannot be on by default
73}
74
75print STDERR "library:" . $library . "\n" if $debug;
76print STDERR "allowed_symbols:" . $allowed_symbols . "\n" if $debug;
77print STDERR "allowed_weak:" . $allowed_weak . "\n" if $debug;
78
79$default_symbols = "_fini _init";  # system symbols
80# on my system, every .so has :
81# A _DYNAMIC
82# A _GLOBAL_OFFSET_TABLE_
83# A __bss_start
84# A _edata
85# A _end
86# T _fini
87# T _init
88# no need to list A symbols in $default_symbols
89
90print STDERR "default_symbols: " . $default_symbols . "\n" if $debug;
91
92print STDOUT "Namespace cleanness check for " . $library . " :\n";
93
94$lib_file = "";
95if( $library =~ /\.la$/ )
96{
97    # get the real library file from .la
98    open( FILEIN, $library ) || die "Couldn't open $! !\n";
99    while( $line = <FILEIN> )
100    {
101	if( $line =~ /library_names=\'([^ ]*).*/o )
102	{
103	    $lib_file = $1;
104	}
105    }
106    close( FILEIN );
107    if( ! $lib_file )
108    {
109	print STDERR "Library file not found in .la file!\n";
110	exit 1;
111    }
112    my $libpath = $library;
113    $libpath =~ s%[^/]*$%%;
114    if(  -e $libpath . ".libs/" . $lib_file )
115    {
116	$lib_file = $libpath . ".libs/" . $lib_file;
117    }
118    else
119    {
120	$lib_file = $libpath . $lib_file;
121    }
122}
123else
124{
125    $lib_file = $library;
126}
127
128print STDERR "libfile: ". $lib_file . "\n" if $debug;
129
130$allowed_symbols .= " " . $default_symbols;
131
132sub process_symbols($\@\%\@);
133
134@wildcards = ();
135%exacts = ();
136@regwildcards = ();
137process_symbols( $allowed_symbols, @wildcards, %exacts, @regwildcards );
138@weak_wildcards = ();
139%weak_exacts = ();
140@weak_regwildcards = ();
141process_symbols( $allowed_weak, @weak_wildcards, %weak_exacts, @weak_regwildcards );
142
143# grep is for stripping not exported symbols, which don't have address (=first column)
144$nm_command = "nm -BDCg " . $lib_file . " | grep -v '^ ' |";
145
146# TODO how portable is this nmcheck stuff?
147
148print STDERR "nm command:" . $nm_command . "\n" if $debug;
149
150open( FILEIN, $nm_command ) || die "nm command failed\n";
151
152my $exit_code = 0;
153
154while( $line = <FILEIN> )
155{
156    my $type;
157    my $symbol;
158    if( $line =~ /^[^ ]* (.) (.*)$/o )
159    {
160	$type = $1;
161	$symbol = $2;
162    }
163    else
164    {
165	die "Invalid line: " . $line . "\n";
166    }
167
168    print STDERR "Type: " . $type . " , symbol: " . $symbol . "\n" if $debug;
169    if( $type eq "A" )
170    { # these should be system symbols, so ignore them
171	next;
172    }
173
174    my $orig_symbol = $symbol;
175
176    if( $symbol =~ /\(anonymous namespace\)/o )
177    { # TODO tell to prefer named namespaces? (shorter symbols)
178	next;
179    }
180
181    # strip prefixes
182    # the :: appending is to make "CLASS::*" work also for "vtable for CLASS"
183    $symbol =~ s/^typeinfo for (.*)$/$1::/o;
184    $symbol =~ s/^typeinfo fn for (.*)$/$1::/o;
185    $symbol =~ s/^typeinfo name for (.*)$/$1::/o;
186    $symbol =~ s/^vtable for (.*)$/$1::/o;
187    $symbol =~ s/^guard variable for (.*)$/$1::/o;
188    $symbol =~ s/^reference temporary for (.*)$/$1::/o;
189    $symbol =~ s/^VTT for (.*)$/$1::/o;
190    $symbol =~ s/^virtual thunk \[[^\]]*\] to (.*)$/$1::/o;
191    $symbol =~ s/^non-virtual thunk \[[^\]]*\] to (.*)$/$1::/o;
192    $symbol =~ s/^covariant return thunk \[[^\]]*\] to (.*)$/$1::/o;
193    $symbol =~ s/^construction vtable thunk for (.*)$/$1::/o;
194    $symbol =~ s/^construction vtable for .*-in-(.*) [0-9]*$/$1::/o;
195
196    # templates seem to have also return types mangled in their name, and nm prints it too
197    # they have also template arguments in the symbol
198    # get rid of both of those
199    while( $symbol =~ /<.*>/o )
200    {
201        $symbol =~ s/<[^<>]*>//o; # strip innermost <>
202    }
203    if( $symbol !~ /operator\(\)/o )
204    {
205	$symbol =~ s/ ?\(.*\).*$//o;  # strip () and all after it
206    }
207    else
208    {
209	$symbol =~ s/(^|:| )operator\(\) ?\(.*\).*$//o;  # strip () and all after it
210    }
211    $symbol =~ s/\[.*\] *$//o;   # strip [in-charge] etc.
212    if( $symbol =~ /(^|:| )operator /o )
213    {
214	$symbol =~ s/.* ([^\s]*)operator /$1/o; # strip everything before 'X::operator blah'
215    }
216    else
217    {
218	$symbol =~ s/.* ([^\s]+) *$/$1/o;  # get last word (strip return type)
219    }
220
221    # print STDERR "Processed symbol: " . $symbol . "\n" if $debug;
222
223    my $found = 0;
224    if( $exacts{ $symbol } )
225    {
226	$found = 1;
227    }
228    if( ! $found )
229    {
230	for my $wild ( @wildcards )
231	{
232	    if( index( $symbol, $wild ) == 0 )
233	    {
234		$found = 1;
235		last;
236	    }
237	}
238    }
239    if( ! $found )
240    {
241	for my $wild ( @regwildcards )
242	{
243	    if( $symbol =~ /^$wild$/ )
244	    {
245		$found = 1;
246		last;
247	    }
248	}
249    }
250    if( ( ! $found ) && ( $type eq "W" || $type eq "V" ))
251    {
252	if( $weak_exacts{ $symbol } )
253	{
254	    $found = 1;
255	}
256	if( ! $found )
257	{
258	    for my $wild ( @weak_wildcards )
259	    {
260	        if( index( $symbol, $wild ) == 0 )
261	        {
262	    	    $found = 1;
263		    last;
264		}
265	    }
266	}
267	if( ! $found )
268	{
269	    for my $wild ( @weak_regwildcards )
270	    {
271	        if( $symbol =~ /^$wild$/ )
272	        {
273		    $found = 1;
274		    last;
275	        }
276	    }
277	}
278    }
279
280    if( ! $found )
281    {
282	print STDERR "Public symbol " . $orig_symbol . " is not allowed!\n";
283	$exit_code = 1;
284    }
285}
286
287close( FILEIN );
288
289print STDOUT $exit_code == 0 ? "OK\n" : "FAILED\n";
290
291exit $exit_code;
292
293sub process_symbols($\@\%\@)
294{
295    my $allowed_symbols = $_[ 0 ];
296    my $wildcards_ref = $_[ 1 ];
297    my $exacts_ref = $_[ 2 ];
298    my $regwildcards_ref = $_[ 3 ];
299
300    $allowed_symbols =~ s/^ *//o;  # strip whitespace
301    $allowed_symbols =~ s/ *$//o;
302
303    if( $allowed_symbols eq "NONE" )
304    {
305	$allowed_symbols = "";
306    }
307
308    my @symbols1 = split( ' ', $allowed_symbols );
309    my $i = 0;
310    my @symbols2 = ();
311    while( defined( $symbols1[ $i ] ))
312    {
313	my $symbol = $symbols1[ $i ];
314	if( $symbol =~ /\./ )  # dot in name -> file
315	{
316	    open( SYMIN, $symbol ) || die ( "Cannot open file " . $symbol . "!" );
317	    while( $line = <SYMIN> )
318	    {
319		$line =~ s/^\s*//o;  # strip whitespace
320		$line =~ s/\s*$//o;
321		if( $line !~ /^$/o  # empty line
322		    &&  $line !~ /^\s*#/ ) # comment line starting with #
323		{
324		    $symbols2[ $#symbols2 + 1 ] = $line;
325		}
326	    }
327	    close( SYMIN );
328	}
329	else
330	{
331	    $symbols2[ $#symbols2 + 1 ] = $symbol;
332	}
333	$i++;
334    }
335    $i = 0;
336    while( defined( $symbols2[ $i ] ))
337    {
338	my $symbol = $symbols2[ $i ];
339	if( $symbol =~ /__/
340	    || $symbol =~ /^_[A-Z]/ )
341	{ # ISO C++ 2.10.2
342	    die "Symbols containing a double underscore or beginning with an underscore and an upper-case letter are reserved!\n";
343	}
344	elsif( $symbol eq "main"
345	    || $symbol eq "main*" )
346	{
347	    die "Symbol main is not allowed!\n";
348	}
349	if( $symbol =~ /^([^\*]*)\*$/o   # trailing * without any * before it
350	    && $symbol !~ /operator\*$/o )
351        {
352	    print STDERR "wildcard:" . $symbol . "\n" if $debug;
353    	    $wildcards_ref->[ $#{$wildcards_ref} + 1 ] = $1;
354	}
355	elsif( $symbol =~ /\*$/o
356	    && ( $symbol =~ /\*::/o || $symbol =~ /::\*/o )
357	    && $symbol !~ /^\*/o
358	    && $symbol !~ /operator\*$/o )
359	{
360	    print STDERR "regwildcard:" . $symbol . "\n" if $debug;
361	    $symbol =~ s/\*/\.\*/go;  # change * to .* (regexp)
362	    $regwildcards_ref->[ $#{$regwildcards_ref} + 1 ] = $symbol;
363	}
364	else
365	{
366	    print STDERR "exact:" . $symbol . "\n" if $debug;
367	    $exacts_ref->{ $symbol } = 1;
368	}
369	$i++;
370    }
371}
372