1#!/usr/bin/perl -w 2 3# Try opening libperl.a with nm, and verifying it has the kind of 4# symbols we expect, and no symbols we should avoid. 5# 6# Fail softly, expect things only on known platforms: 7# - linux, x86 only (ppc linux has odd symbol tables) 8# - darwin (OS X), both x86 and ppc 9# - freebsd 10# and on other platforms, and if things seem odd, just give up (skip_all). 11# 12# Debugging tip: nm output (this script's input) can be faked by 13# giving one command line argument for this script: it should be 14# either the filename to read, or "-" for STDIN. You can also append 15# "@style" (where style is a supported nm style, like "gnu" or "darwin") 16# to this filename for "cross-parsing". 17# 18# Some terminology: 19# - "text" symbols are code 20# - "data" symbols are data (duh), with subdivisions: 21# - "bss": (Block-Started-by-Symbol: originally from IBM assembler...), 22# uninitialized data, which often even doesn't exist in the object 23# file as such, only its size does, which is then created on demand 24# by the loader 25# - "const": initialized read-only data, like string literals 26# - "common": uninitialized data unless initialized... 27# (the full story is too long for here, see "man nm") 28# - "data": initialized read-write data 29# (somewhat confusingly below: "data data", but it makes code simpler) 30# - "undefined": external symbol referred to by an object, 31# most likely a text symbol. Can be either a symbol defined by 32# a Perl object file but referred to by other Perl object files, 33# or a completely external symbol from libc, or other system libraries. 34 35BEGIN { 36 chdir 't' if -d 't'; 37 @INC = '../lib'; 38 require "./test.pl"; 39} 40 41use strict; 42 43use Config; 44 45if ($Config{cc} =~ /g\+\+/) { 46 # XXX Could use c++filt, maybe. 47 skip_all "on g++"; 48} 49 50my $libperl_a; 51 52for my $f (qw(../libperl.a libperl.a)) { 53 if (-f $f) { 54 $libperl_a = $f; 55 last; 56 } 57} 58 59unless (defined $libperl_a) { 60 skip_all "no libperl.a"; 61} 62 63print "# \$^O = $^O\n"; 64print "# \$Config{archname} = $Config{archname}\n"; 65print "# \$Config{cc} = $Config{cc}\n"; 66print "# libperl = $libperl_a\n"; 67 68my $nm; 69my $nm_opt = ''; 70my $nm_style; 71my $nm_fh; 72my $nm_err_tmp = "libperl$$"; 73 74END { 75 # this is still executed when we skip_all above, avoid a warning 76 unlink $nm_err_tmp if $nm_err_tmp; 77} 78 79my $fake_input; 80my $fake_style; 81 82if (@ARGV == 1) { 83 $fake_input = shift @ARGV; 84 print "# Faking nm output from $fake_input\n"; 85 if ($fake_input =~ s/\@(.+)$//) { 86 $fake_style = $1; 87 print "# Faking nm style from $fake_style\n"; 88 if ($fake_style eq 'gnu' || 89 $fake_style eq 'linux' || 90 $fake_style eq 'freebsd') { 91 $nm_style = 'gnu' 92 } elsif ($fake_style eq 'darwin' || $fake_style eq 'osx') { 93 $nm_style = 'darwin' 94 } else { 95 die "$0: Unknown explicit nm style '$fake_style'\n"; 96 } 97 } 98} 99 100unless (defined $nm_style) { 101 if ($^O eq 'linux') { 102 # The 'gnu' style could be equally well be called 'bsd' style, 103 # since the output format of the GNU binutils nm is really BSD. 104 $nm_style = 'gnu'; 105 } elsif ($^O eq 'freebsd') { 106 $nm_style = 'gnu'; 107 } elsif ($^O eq 'darwin') { 108 $nm_style = 'darwin'; 109 } 110} 111 112if (defined $nm_style) { 113 if ($nm_style eq 'gnu') { 114 $nm = '/usr/bin/nm'; 115 } elsif ($nm_style eq 'darwin') { 116 $nm = '/usr/bin/nm'; 117 # With the -m option we get better information than the BSD-like 118 # default: with the default, a lot of symbols get dumped into 'S' 119 # or 's', for example one cannot tell the difference between const 120 # and non-const data symbols. 121 $nm_opt = '-m'; 122 } else { 123 die "$0: Unexpected nm style '$nm_style'\n"; 124 } 125} 126 127if ($^O eq 'linux' && $Config{archname} !~ /^(?:x|i6)86/) { 128 # For example in ppc most (but not all!) code symbols are placed 129 # in 'D' (data), not in ' T '. We cannot work under such conditions. 130 skip_all "linux but archname $Config{archname} not x86*"; 131} 132 133unless (defined $nm) { 134 skip_all "no nm"; 135} 136 137unless (defined $nm_style) { 138 skip_all "no nm style"; 139} 140 141print "# nm = $nm\n"; 142print "# nm_style = $nm_style\n"; 143print "# nm_opt = $nm_opt\n"; 144 145unless (-x $nm) { 146 skip_all "no executable nm $nm"; 147} 148 149if ($nm_style eq 'gnu' && !defined $fake_style) { 150 open(my $gnu_verify, "$nm --version|") or 151 skip_all "nm failed: $!"; 152 my $gnu_verified; 153 while (<$gnu_verify>) { 154 if (/^GNU nm/) { 155 $gnu_verified = 1; 156 last; 157 } 158 } 159 unless ($gnu_verified) { 160 skip_all "no GNU nm"; 161 } 162} 163 164if (defined $fake_input) { 165 if ($fake_input eq '-') { 166 open($nm_fh, "<&STDIN") or 167 skip_all "Duping STDIN failed: $!"; 168 } else { 169 open($nm_fh, "<", $fake_input) or 170 skip_all "Opening '$fake_input' failed: $!"; 171 } 172 undef $nm_err_tmp; # In this case there will be no nm errors. 173} else { 174 print qq{# command: "$nm $nm_opt $libperl_a 2>$nm_err_tmp |"\n}; 175 open($nm_fh, "$nm $nm_opt $libperl_a 2>$nm_err_tmp |") or 176 skip_all "$nm $nm_opt $libperl_a failed: $!"; 177} 178 179sub is_perlish_symbol { 180 $_[0] =~ /^(?:PL_|Perl|PerlIO)/; 181} 182 183# XXX Implement "internal test" for this script (option -t?) 184# to verify that the parsing does what it's intended to. 185 186sub nm_parse_gnu { 187 my $symbols = shift; 188 my $line = $_; 189 if (m{^(\w+\.o):$}) { 190 # object file name 191 $symbols->{obj}{$1}++; 192 $symbols->{o} = $1; 193 return; 194 } else { 195 die "$0: undefined current object: $line" 196 unless defined $symbols->{o}; 197 # 64-bit systems have 16 hexdigits, 32-bit systems have 8. 198 if (s/^[0-9a-f]{8}(?:[0-9a-f]{8})? //) { 199 if (/^[Rr] (\w+)$/) { 200 # R: read only (const) 201 $symbols->{data}{const}{$1}{$symbols->{o}}++; 202 } elsif (/^r .+$/) { 203 # Skip local const (read only). 204 } elsif (/^([Tti]) (\w+)(\..+)?$/) { 205 $symbols->{text}{$2}{$symbols->{o}}{$1}++; 206 } elsif (/^C (\w+)$/) { 207 $symbols->{data}{common}{$1}{$symbols->{o}}++; 208 } elsif (/^[BbSs] (\w+)(\.\d+)?$/) { 209 # Bb: uninitialized data (bss) 210 # Ss: uninitialized data "for small objects" 211 $symbols->{data}{bss}{$1}{$symbols->{o}}++; 212 } elsif (/^D _LIB_VERSION$/) { 213 # Skip the _LIB_VERSION (not ours, probably libm) 214 } elsif (/^[DdGg] (\w+)$/) { 215 # Dd: initialized data 216 # Gg: initialized "for small objects" 217 $symbols->{data}{data}{$1}{$symbols->{o}}++; 218 } elsif (/^. \.?(\w+)$/) { 219 # Skip the unknown types. 220 print "# Unknown type: $line ($symbols->{o})\n"; 221 } 222 return; 223 } elsif (/^ {8}(?: {8})? U _?(\w+)$/) { 224 my ($symbol) = $1; 225 return if is_perlish_symbol($symbol); 226 $symbols->{undef}{$symbol}{$symbols->{o}}++; 227 return; 228 } 229 } 230 print "# Unexpected nm output '$line' ($symbols->{o})\n"; 231} 232 233sub nm_parse_darwin { 234 my $symbols = shift; 235 my $line = $_; 236 if (m{^(?:.+)?libperl\.a\((\w+\.o)\):$}) { 237 # object file name 238 $symbols->{obj}{$1}++; 239 $symbols->{o} = $1; 240 return; 241 } else { 242 die "$0: undefined current object: $line" unless defined $symbols->{o}; 243 # 64-bit systems have 16 hexdigits, 32-bit systems have 8. 244 if (s/^[0-9a-f]{8}(?:[0-9a-f]{8})? //) { 245 # String literals can live in different sections 246 # depending on the compiler and os release, assumedly 247 # also linker flags. 248 if (/^\(__TEXT,__(?:const|(?:asan_)?cstring|literal\d+)\) (?:non-)?external _?(\w+)(\.\w+)?$/) { 249 my ($symbol, $suffix) = ($1, $2); 250 # Ignore function-local constants like 251 # _Perl_av_extend_guts.oom_array_extend 252 return if defined $suffix && /__TEXT,__const/; 253 # Ignore the cstring unnamed strings. 254 return if $symbol =~ /^L\.str\d+$/; 255 $symbols->{data}{const}{$symbol}{$symbols->{o}}++; 256 } elsif (/^\(__TEXT,__text\) ((?:non-)?external) _(\w+)$/) { 257 my ($exp, $sym) = ($1, $2); 258 $symbols->{text}{$sym}{$symbols->{o}}{$exp =~ /^non/ ? 't' : 'T'}++; 259 } elsif (/^\(__DATA,__\w*?(const|data|bss|common)\w*\) (?:non-)?external _?(\w+)(\.\w+)?$/) { 260 my ($dtype, $symbol, $suffix) = ($1, $2, $3); 261 # Ignore function-local constants like 262 # _Perl_pp_gmtime.dayname 263 return if defined $suffix; 264 $symbols->{data}{$dtype}{$symbol}{$symbols->{o}}++; 265 } elsif (/^\(__DATA,__const\) non-external _\.memset_pattern\d*$/) { 266 # Skip this, whatever it is (some inlined leakage from 267 # darwin libc?) 268 } elsif (/^\(__TEXT,__eh_frame/) { 269 # Skip the eh_frame (exception handling) symbols. 270 return; 271 } elsif (/^\(__\w+,__\w+\) /) { 272 # Skip the unknown types. 273 print "# Unknown type: $line ($symbols->{o})\n"; 274 } 275 return; 276 } elsif (/^ {8}(?: {8})? \(undefined(?: \[lazy bound\])?\) external _?(.+)/) { 277 # darwin/ppc marks most undefined text symbols 278 # as "[lazy bound]". 279 my ($symbol) = $1 =~ s/\$UNIX2003\z//r; 280 return if is_perlish_symbol($symbol); 281 $symbols->{undef}{$symbol}{$symbols->{o}}++; 282 return; 283 } 284 } 285 print "# Unexpected nm output '$line' ($symbols->{o})\n"; 286} 287 288my $nm_parse; 289 290if ($nm_style eq 'gnu') { 291 $nm_parse = \&nm_parse_gnu; 292} elsif ($nm_style eq 'darwin') { 293 $nm_parse = \&nm_parse_darwin; 294} 295 296unless (defined $nm_parse) { 297 skip_all "no nm parser ($nm_style $nm_style, \$^O $^O)"; 298} 299 300my %symbols; 301 302while (<$nm_fh>) { 303 next if /^$/; 304 chomp; 305 $nm_parse->(\%symbols); 306} 307 308# use Data::Dumper; print Dumper(\%symbols); 309 310# Something went awfully wrong. Wrong nm? Wrong options? 311unless (keys %symbols) { 312 skip_all "no symbols\n"; 313} 314unless (exists $symbols{text}) { 315 skip_all "no text symbols\n"; 316} 317 318# These should always be true for everyone. 319 320ok($symbols{obj}{'pp.o'}, "has object pp.o"); 321ok($symbols{text}{'Perl_peep'}, "has text Perl_peep"); 322ok($symbols{text}{'Perl_pp_uc'}{'pp.o'}, "has text Perl_pp_uc in pp.o"); 323ok(exists $symbols{data}{const}, "has data const symbols"); 324ok($symbols{data}{const}{PL_no_mem}{'globals.o'}, "has PL_no_mem"); 325 326my $nocommon = $Config{ccflags} =~ /-fno-common/ ? 1 : 0; 327 328print "# nocommon = $nocommon\n"; 329 330my %data_symbols; 331 332for my $dtype (sort keys %{$symbols{data}}) { 333 for my $symbol (sort keys %{$symbols{data}{$dtype}}) { 334 $data_symbols{$symbol}++; 335 } 336} 337 338if ( !$symbols{data}{common} ) { 339 # This is likely because Perl was compiled with 340 # -Accflags="-fno-common" 341 $symbols{data}{common} = $symbols{data}{bss}; 342} 343 344ok($symbols{data}{common}{PL_hash_seed_w}{'globals.o'}, "has PL_hash_seed_w"); 345ok($symbols{data}{data}{PL_ppaddr}{'globals.o'}, "has PL_ppaddr"); 346 347# See the comments in the beginning for what "undefined symbols" 348# really means. We *should* have many of those, that is a good thing. 349ok(keys %{$symbols{undef}}, "has undefined symbols"); 350 351# There are certain symbols we expect to see. 352 353# chmod, socket, getenv, sigaction, exp, time are system/library 354# calls that should each see at least one use. exp can be expl 355# if so configured. 356my %expected = ( 357 chmod => undef, # There is no Configure symbol for chmod. 358 socket => 'd_socket', 359 getenv => undef, # There is no Configure symbol for getenv, 360 sigaction => 'd_sigaction', 361 time => 'd_time', 362 ); 363 364if ($Config{uselongdouble} && $Config{longdblsize} > $Config{doublesize}) { 365 $expected{expl} = undef; # There is no Configure symbol for expl. 366} elsif ($Config{usequadmath}) { 367 $expected{expq} = undef; # There is no Configure symbol for expq. 368} else { 369 $expected{exp} = undef; # There is no Configure symbol for exp. 370} 371 372# DynaLoader will use dlopen, unless we are building static, 373# and it is used in the platforms we are supporting in this test. 374if ($Config{usedl} ) { 375 $expected{dlopen} = 'd_dlopen'; 376} 377 378for my $symbol (sort keys %expected) { 379 if (defined $expected{$symbol} && !$Config{$expected{$symbol}}) { 380 SKIP: { 381 skip("no $symbol"); 382 } 383 next; 384 } 385 my @o = exists $symbols{undef}{$symbol} ? 386 sort keys %{ $symbols{undef}{$symbol} } : (); 387 ok(@o, "uses $symbol (@o)"); 388} 389 390# There are certain symbols we expect NOT to see. 391# 392# gets is horribly unsafe. 393# 394# fgets should not be used (Perl has its own API, sv_gets), 395# even without perlio. 396# 397# tmpfile is unsafe. 398# 399# strcat, strcpy, strncat, strncpy are unsafe. 400# 401# sprintf and vsprintf should not be used because 402# Perl has its own safer and more portable implementations. 403# (One exception: for certain floating point outputs 404# the native sprintf is still used in some platforms, see below.) 405# 406# atoi has unsafe and undefined failure modes, and is affected by locale. 407# Its cousins include atol and atoll. 408# 409# strtol and strtoul are affected by locale. 410# Cousins include strtoq. 411# 412# system should not be used, use pp_system or my_popen. 413# 414 415my %unexpected; 416 417for my $str (qw(system)) { 418 $unexpected{$str} = "d_$str"; 419} 420 421for my $stdio (qw(gets fgets tmpfile sprintf vsprintf)) { 422 $unexpected{$stdio} = undef; # No Configure symbol for these. 423} 424for my $str (qw(strcat strcpy strncat strncpy)) { 425 $unexpected{$str} = undef; # No Configure symbol for these. 426} 427 428$unexpected{atoi} = undef; # No Configure symbol for atoi. 429$unexpected{atol} = undef; # No Configure symbol for atol. 430 431for my $str (qw(atoll strtol strtoul strtoq)) { 432 $unexpected{$str} = "d_$str"; 433} 434 435for my $symbol (sort keys %unexpected) { 436 if (defined $unexpected{$symbol} && !$Config{$unexpected{$symbol}}) { 437 SKIP: { 438 skip("no $symbol"); 439 } 440 next; 441 } 442 my @o = exists $symbols{undef}{$symbol} ? 443 sort keys %{ $symbols{undef}{$symbol} } : (); 444 # While sprintf() is bad in the general case, 445 # some platforms implement Gconvert via sprintf, in sv.o. 446 if ($symbol eq 'sprintf' && 447 $Config{d_Gconvert} =~ /^sprintf/ && 448 @o == 1 && $o[0] eq 'sv.o') { 449 SKIP: { 450 skip("uses sprintf for Gconvert in sv.o"); 451 } 452 } else { 453 is(@o, 0, "uses no $symbol (@o)"); 454 } 455} 456 457# Check that any text symbols named S_ are not exported. 458my $export_S_prefix = 0; 459for my $t (sort grep { /^S_/ } keys %{$symbols{text}}) { 460 for my $o (sort keys %{$symbols{text}{$t}}) { 461 if (exists $symbols{text}{$t}{$o}{T}) { 462 fail($t, "$t exported from $o"); 463 $export_S_prefix++; 464 } 465 } 466} 467is($export_S_prefix, 0, "no S_ exports"); 468 469if (defined $nm_err_tmp) { 470 if (open(my $nm_err_fh, $nm_err_tmp)) { 471 my $error; 472 while (<$nm_err_fh>) { 473 # OS X has weird error where nm warns about 474 # "no name list" but then outputs fine. 475 # llvm-nm may also complain about 'no symbols'. In some 476 # versions this is exactly the string "no symbols\n" but in later 477 # versions becomes a string followed by ": no symbols\n". For this 478 # test it is typically "../libperl.a:perlapi.o: no symbols\n" 479 if ( $^O eq 'darwin' ) { 480 if (/nm: no name list/ || /^(.*: )?no symbols$/ ) { 481 print "# $^O ignoring $nm output: $_"; 482 next; 483 } 484 } 485 warn "$0: Unexpected $nm error: $_"; 486 $error++; 487 } 488 die "$0: Unexpected $nm errors\n" if $error; 489 } else { 490 warn "Failed to open '$nm_err_tmp': $!\n"; 491 } 492} 493 494done_testing(); 495