1#!/usr/bin/perl -w 2 3# Check namespace cleanness of a library. 4# Allowed symbols are passed as arguments. 5# They may have trailing * = wildcard. 6# Wildcards may be also specified as *::* (e.g. K*::* for all KDE classes) 7# Symbols are listed as full function unmangled names without arguments, 8# e.g. 'foo bar* nspace::*' allows foo(), foo(int), bar(), barbar() 9# and all symbols in namespace/class nspace. 10# If an argument has comma in it, it's a filename of a file containing 11# allowed symbols, one per line. 12 13 14$thisProg = "$0"; # This programs name 15 16$library = ""; 17$allowed_symbols = ""; 18$debug = 0; 19$allowed_weak = ""; 20$weak_specified = 0; 21 22while( defined( $ARGV[ 0 ] )) 23{ 24 $_ = shift; 25 if( /^--verbose$|^-v$/ ) 26 { 27 $debug = 1; 28 } 29 elsif( /^--help$|^-h$/ ) 30 { 31 print STDOUT "Usage $thisProg [OPTION] ... library [allowed symbols] ...\n", 32 "\n", 33 "Check if the given library has only allowed public symbols.\n", 34 "\n", 35 " --allowweak=[symbol] allow only these weak symbols\n", 36 " -v, --verbose verbosely list files processed\n", 37 " -h, --help print this help, then exit\n"; 38 exit 0; 39 } 40 elsif( /^--allowweak=(.*)$/ ) 41 { 42 $allowed_weak .= " " . $1; 43 $weak_specified = 1; 44 } 45 elsif( /^--allowweak$/ ) # simply list all weak 46 { 47 $allowed_weak .= " "; 48 $weak_specified = 1; 49 } 50 elsif( /^--*/ ) 51 { 52 die "Invalid argument!\n"; 53 } 54 else 55 { 56 if( ! $library ) 57 { 58 $library = $_; 59 } 60 else 61 { 62 $allowed_symbols .= " " . $_; 63 } 64 } 65} 66 67if( ! $weak_specified ) 68{ 69 $allowed_weak = "*"; 70 # allow all weak symbols by default 71 # instances of templates and similar stuff - unfortunately includes also things from other libraries, 72 # so it cannot be on by default 73} 74 75print STDERR "library:" . $library . "\n" if $debug; 76print STDERR "allowed_symbols:" . $allowed_symbols . "\n" if $debug; 77print STDERR "allowed_weak:" . $allowed_weak . "\n" if $debug; 78 79$default_symbols = "_fini _init"; # system symbols 80# on my system, every .so has : 81# A _DYNAMIC 82# A _GLOBAL_OFFSET_TABLE_ 83# A __bss_start 84# A _edata 85# A _end 86# T _fini 87# T _init 88# no need to list A symbols in $default_symbols 89 90print STDERR "default_symbols: " . $default_symbols . "\n" if $debug; 91 92print STDOUT "Namespace cleanness check for " . $library . " :\n"; 93 94$lib_file = ""; 95if( $library =~ /\.la$/ ) 96{ 97 # get the real library file from .la 98 open( FILEIN, $library ) || die "Couldn't open $! !\n"; 99 while( $line = <FILEIN> ) 100 { 101 if( $line =~ /library_names=\'([^ ]*).*/o ) 102 { 103 $lib_file = $1; 104 } 105 } 106 close( FILEIN ); 107 if( ! $lib_file ) 108 { 109 print STDERR "Library file not found in .la file!\n"; 110 exit 1; 111 } 112 my $libpath = $library; 113 $libpath =~ s%[^/]*$%%; 114 if( -e $libpath . ".libs/" . $lib_file ) 115 { 116 $lib_file = $libpath . ".libs/" . $lib_file; 117 } 118 else 119 { 120 $lib_file = $libpath . $lib_file; 121 } 122} 123else 124{ 125 $lib_file = $library; 126} 127 128print STDERR "libfile: ". $lib_file . "\n" if $debug; 129 130$allowed_symbols .= " " . $default_symbols; 131 132sub process_symbols($\@\%\@); 133 134@wildcards = (); 135%exacts = (); 136@regwildcards = (); 137process_symbols( $allowed_symbols, @wildcards, %exacts, @regwildcards ); 138@weak_wildcards = (); 139%weak_exacts = (); 140@weak_regwildcards = (); 141process_symbols( $allowed_weak, @weak_wildcards, %weak_exacts, @weak_regwildcards ); 142 143# grep is for stripping not exported symbols, which don't have address (=first column) 144$nm_command = "nm -BDCg " . $lib_file . " | grep -v '^ ' |"; 145 146# TODO how portable is this nmcheck stuff? 147 148print STDERR "nm command:" . $nm_command . "\n" if $debug; 149 150open( FILEIN, $nm_command ) || die "nm command failed\n"; 151 152my $exit_code = 0; 153 154while( $line = <FILEIN> ) 155{ 156 my $type; 157 my $symbol; 158 if( $line =~ /^[^ ]* (.) (.*)$/o ) 159 { 160 $type = $1; 161 $symbol = $2; 162 } 163 else 164 { 165 die "Invalid line: " . $line . "\n"; 166 } 167 168 print STDERR "Type: " . $type . " , symbol: " . $symbol . "\n" if $debug; 169 if( $type eq "A" ) 170 { # these should be system symbols, so ignore them 171 next; 172 } 173 174 my $orig_symbol = $symbol; 175 176 if( $symbol =~ /\(anonymous namespace\)/o ) 177 { # TODO tell to prefer named namespaces? (shorter symbols) 178 next; 179 } 180 181 # strip prefixes 182 # the :: appending is to make "CLASS::*" work also for "vtable for CLASS" 183 $symbol =~ s/^typeinfo for (.*)$/$1::/o; 184 $symbol =~ s/^typeinfo fn for (.*)$/$1::/o; 185 $symbol =~ s/^typeinfo name for (.*)$/$1::/o; 186 $symbol =~ s/^vtable for (.*)$/$1::/o; 187 $symbol =~ s/^guard variable for (.*)$/$1::/o; 188 $symbol =~ s/^reference temporary for (.*)$/$1::/o; 189 $symbol =~ s/^VTT for (.*)$/$1::/o; 190 $symbol =~ s/^virtual thunk \[[^\]]*\] to (.*)$/$1::/o; 191 $symbol =~ s/^non-virtual thunk \[[^\]]*\] to (.*)$/$1::/o; 192 $symbol =~ s/^covariant return thunk \[[^\]]*\] to (.*)$/$1::/o; 193 $symbol =~ s/^construction vtable thunk for (.*)$/$1::/o; 194 $symbol =~ s/^construction vtable for .*-in-(.*) [0-9]*$/$1::/o; 195 196 # templates seem to have also return types mangled in their name, and nm prints it too 197 # they have also template arguments in the symbol 198 # get rid of both of those 199 while( $symbol =~ /<.*>/o ) 200 { 201 $symbol =~ s/<[^<>]*>//o; # strip innermost <> 202 } 203 if( $symbol !~ /operator\(\)/o ) 204 { 205 $symbol =~ s/ ?\(.*\).*$//o; # strip () and all after it 206 } 207 else 208 { 209 $symbol =~ s/(^|:| )operator\(\) ?\(.*\).*$//o; # strip () and all after it 210 } 211 $symbol =~ s/\[.*\] *$//o; # strip [in-charge] etc. 212 if( $symbol =~ /(^|:| )operator /o ) 213 { 214 $symbol =~ s/.* ([^\s]*)operator /$1/o; # strip everything before 'X::operator blah' 215 } 216 else 217 { 218 $symbol =~ s/.* ([^\s]+) *$/$1/o; # get last word (strip return type) 219 } 220 221 # print STDERR "Processed symbol: " . $symbol . "\n" if $debug; 222 223 my $found = 0; 224 if( $exacts{ $symbol } ) 225 { 226 $found = 1; 227 } 228 if( ! $found ) 229 { 230 for my $wild ( @wildcards ) 231 { 232 if( index( $symbol, $wild ) == 0 ) 233 { 234 $found = 1; 235 last; 236 } 237 } 238 } 239 if( ! $found ) 240 { 241 for my $wild ( @regwildcards ) 242 { 243 if( $symbol =~ /^$wild$/ ) 244 { 245 $found = 1; 246 last; 247 } 248 } 249 } 250 if( ( ! $found ) && ( $type eq "W" || $type eq "V" )) 251 { 252 if( $weak_exacts{ $symbol } ) 253 { 254 $found = 1; 255 } 256 if( ! $found ) 257 { 258 for my $wild ( @weak_wildcards ) 259 { 260 if( index( $symbol, $wild ) == 0 ) 261 { 262 $found = 1; 263 last; 264 } 265 } 266 } 267 if( ! $found ) 268 { 269 for my $wild ( @weak_regwildcards ) 270 { 271 if( $symbol =~ /^$wild$/ ) 272 { 273 $found = 1; 274 last; 275 } 276 } 277 } 278 } 279 280 if( ! $found ) 281 { 282 print STDERR "Public symbol " . $orig_symbol . " is not allowed!\n"; 283 $exit_code = 1; 284 } 285} 286 287close( FILEIN ); 288 289print STDOUT $exit_code == 0 ? "OK\n" : "FAILED\n"; 290 291exit $exit_code; 292 293sub process_symbols($\@\%\@) 294{ 295 my $allowed_symbols = $_[ 0 ]; 296 my $wildcards_ref = $_[ 1 ]; 297 my $exacts_ref = $_[ 2 ]; 298 my $regwildcards_ref = $_[ 3 ]; 299 300 $allowed_symbols =~ s/^ *//o; # strip whitespace 301 $allowed_symbols =~ s/ *$//o; 302 303 if( $allowed_symbols eq "NONE" ) 304 { 305 $allowed_symbols = ""; 306 } 307 308 my @symbols1 = split( ' ', $allowed_symbols ); 309 my $i = 0; 310 my @symbols2 = (); 311 while( defined( $symbols1[ $i ] )) 312 { 313 my $symbol = $symbols1[ $i ]; 314 if( $symbol =~ /\./ ) # dot in name -> file 315 { 316 open( SYMIN, $symbol ) || die ( "Cannot open file " . $symbol . "!" ); 317 while( $line = <SYMIN> ) 318 { 319 $line =~ s/^\s*//o; # strip whitespace 320 $line =~ s/\s*$//o; 321 if( $line !~ /^$/o # empty line 322 && $line !~ /^\s*#/ ) # comment line starting with # 323 { 324 $symbols2[ $#symbols2 + 1 ] = $line; 325 } 326 } 327 close( SYMIN ); 328 } 329 else 330 { 331 $symbols2[ $#symbols2 + 1 ] = $symbol; 332 } 333 $i++; 334 } 335 $i = 0; 336 while( defined( $symbols2[ $i ] )) 337 { 338 my $symbol = $symbols2[ $i ]; 339 if( $symbol =~ /__/ 340 || $symbol =~ /^_[A-Z]/ ) 341 { # ISO C++ 2.10.2 342 die "Symbols containing a double underscore or beginning with an underscore and an upper-case letter are reserved!\n"; 343 } 344 elsif( $symbol eq "main" 345 || $symbol eq "main*" ) 346 { 347 die "Symbol main is not allowed!\n"; 348 } 349 if( $symbol =~ /^([^\*]*)\*$/o # trailing * without any * before it 350 && $symbol !~ /operator\*$/o ) 351 { 352 print STDERR "wildcard:" . $symbol . "\n" if $debug; 353 $wildcards_ref->[ $#{$wildcards_ref} + 1 ] = $1; 354 } 355 elsif( $symbol =~ /\*$/o 356 && ( $symbol =~ /\*::/o || $symbol =~ /::\*/o ) 357 && $symbol !~ /^\*/o 358 && $symbol !~ /operator\*$/o ) 359 { 360 print STDERR "regwildcard:" . $symbol . "\n" if $debug; 361 $symbol =~ s/\*/\.\*/go; # change * to .* (regexp) 362 $regwildcards_ref->[ $#{$regwildcards_ref} + 1 ] = $symbol; 363 } 364 else 365 { 366 print STDERR "exact:" . $symbol . "\n" if $debug; 367 $exacts_ref->{ $symbol } = 1; 368 } 369 $i++; 370 } 371} 372