1#!/usr/bin/perl -w 2 3# Try opening libperl.a with nm, and verifying it has the kind of 4# symbols we expect, and no symbols we should avoid. 5# 6# Fail softly, expect things only on known platforms: 7# - linux, x86 only (ppc linux has odd symbol tables) 8# - darwin (OS X), both x86 and ppc 9# - freebsd 10# and on other platforms, and if things seem odd, just give up (skip_all). 11# 12# Also, if the rarely-used builds options -DPERL_GLOBAL_STRUCT or 13# -DPERL_GLOBAL_STRUCT_PRIVATE are used, verify that they did what 14# they were meant to do, hide the global variables (see perlguts for 15# the details). 16# 17# Debugging tip: nm output (this script's input) can be faked by 18# giving one command line argument for this script: it should be 19# either the filename to read, or "-" for STDIN. You can also append 20# "@style" (where style is a supported nm style, like "gnu" or "darwin") 21# to this filename for "cross-parsing". 22# 23# Some terminology: 24# - "text" symbols are code 25# - "data" symbols are data (duh), with subdivisions: 26# - "bss": (Block-Started-by-Symbol: originally from IBM assembler...), 27# uninitialized data, which often even doesn't exist in the object 28# file as such, only its size does, which is then created on demand 29# by the loader 30# - "const": initialized read-only data, like string literals 31# - "common": uninitialized data unless initialized... 32# (the full story is too long for here, see "man nm") 33# - "data": initialized read-write data 34# (somewhat confusingly below: "data data", but it makes code simpler) 35# - "undefined": external symbol referred to by an object, 36# most likely a text symbol. Can be either a symbol defined by 37# a Perl object file but referred to by other Perl object files, 38# or a completely external symbol from libc, or other system libraries. 39 40BEGIN { 41 chdir 't' if -d 't'; 42 @INC = '../lib'; 43 require "./test.pl"; 44} 45 46use strict; 47 48use Config; 49 50if ($Config{cc} =~ /g\+\+/) { 51 # XXX Could use c++filt, maybe. 52 skip_all "on g++"; 53} 54 55my $libperl_a; 56 57for my $f (qw(../libperl.a libperl.a)) { 58 if (-f $f) { 59 $libperl_a = $f; 60 last; 61 } 62} 63 64unless (defined $libperl_a) { 65 skip_all "no libperl.a"; 66} 67 68print "# \$^O = $^O\n"; 69print "# \$Config{archname} = $Config{archname}\n"; 70print "# \$Config{cc} = $Config{cc}\n"; 71print "# libperl = $libperl_a\n"; 72 73my $nm; 74my $nm_opt = ''; 75my $nm_style; 76my $nm_fh; 77my $nm_err_tmp = "libperl$$"; 78 79END { 80 # this is still executed when we skip_all above, avoid a warning 81 unlink $nm_err_tmp if $nm_err_tmp; 82} 83 84my $fake_input; 85my $fake_style; 86 87if (@ARGV == 1) { 88 $fake_input = shift @ARGV; 89 print "# Faking nm output from $fake_input\n"; 90 if ($fake_input =~ s/\@(.+)$//) { 91 $fake_style = $1; 92 print "# Faking nm style from $fake_style\n"; 93 if ($fake_style eq 'gnu' || 94 $fake_style eq 'linux' || 95 $fake_style eq 'freebsd') { 96 $nm_style = 'gnu' 97 } elsif ($fake_style eq 'darwin' || $fake_style eq 'osx') { 98 $nm_style = 'darwin' 99 } else { 100 die "$0: Unknown explicit nm style '$fake_style'\n"; 101 } 102 } 103} 104 105unless (defined $nm_style) { 106 if ($^O eq 'linux') { 107 # The 'gnu' style could be equally well be called 'bsd' style, 108 # since the output format of the GNU binutils nm is really BSD. 109 $nm_style = 'gnu'; 110 } elsif ($^O eq 'freebsd') { 111 $nm_style = 'gnu'; 112 } elsif ($^O eq 'darwin') { 113 $nm_style = 'darwin'; 114 } 115} 116 117if (defined $nm_style) { 118 if ($nm_style eq 'gnu') { 119 $nm = '/usr/bin/nm'; 120 } elsif ($nm_style eq 'darwin') { 121 $nm = '/usr/bin/nm'; 122 # With the -m option we get better information than the BSD-like 123 # default: with the default, a lot of symbols get dumped into 'S' 124 # or 's', for example one cannot tell the difference between const 125 # and non-const data symbols. 126 $nm_opt = '-m'; 127 } else { 128 die "$0: Unexpected nm style '$nm_style'\n"; 129 } 130} 131 132if ($^O eq 'linux' && $Config{archname} !~ /^(?:x|i6)86/) { 133 # For example in ppc most (but not all!) code symbols are placed 134 # in 'D' (data), not in ' T '. We cannot work under such conditions. 135 skip_all "linux but archname $Config{archname} not x86*"; 136} 137 138unless (defined $nm) { 139 skip_all "no nm"; 140} 141 142unless (defined $nm_style) { 143 skip_all "no nm style"; 144} 145 146print "# nm = $nm\n"; 147print "# nm_style = $nm_style\n"; 148print "# nm_opt = $nm_opt\n"; 149 150unless (-x $nm) { 151 skip_all "no executable nm $nm"; 152} 153 154if ($nm_style eq 'gnu' && !defined $fake_style) { 155 open(my $gnu_verify, "$nm --version|") or 156 skip_all "nm failed: $!"; 157 my $gnu_verified; 158 while (<$gnu_verify>) { 159 if (/^GNU nm/) { 160 $gnu_verified = 1; 161 last; 162 } 163 } 164 unless ($gnu_verified) { 165 skip_all "no GNU nm"; 166 } 167} 168 169if (defined $fake_input) { 170 if ($fake_input eq '-') { 171 open($nm_fh, "<&STDIN") or 172 skip_all "Duping STDIN failed: $!"; 173 } else { 174 open($nm_fh, "<", $fake_input) or 175 skip_all "Opening '$fake_input' failed: $!"; 176 } 177 undef $nm_err_tmp; # In this case there will be no nm errors. 178} else { 179 print qq{# command: "$nm $nm_opt $libperl_a 2>$nm_err_tmp |"\n}; 180 open($nm_fh, "$nm $nm_opt $libperl_a 2>$nm_err_tmp |") or 181 skip_all "$nm $nm_opt $libperl_a failed: $!"; 182} 183 184sub is_perlish_symbol { 185 $_[0] =~ /^(?:PL_|Perl|PerlIO)/; 186} 187 188# XXX Implement "internal test" for this script (option -t?) 189# to verify that the parsing does what it's intended to. 190 191sub nm_parse_gnu { 192 my $symbols = shift; 193 my $line = $_; 194 if (m{^(\w+\.o):$}) { 195 # object file name 196 $symbols->{obj}{$1}++; 197 $symbols->{o} = $1; 198 return; 199 } else { 200 die "$0: undefined current object: $line" 201 unless defined $symbols->{o}; 202 # 64-bit systems have 16 hexdigits, 32-bit systems have 8. 203 if (s/^[0-9a-f]{8}(?:[0-9a-f]{8})? //) { 204 if (/^[Rr] (\w+)$/) { 205 # R: read only (const) 206 $symbols->{data}{const}{$1}{$symbols->{o}}++; 207 } elsif (/^r .+$/) { 208 # Skip local const (read only). 209 } elsif (/^([Tti]) (\w+)(\..+)?$/) { 210 $symbols->{text}{$2}{$symbols->{o}}{$1}++; 211 } elsif (/^C (\w+)$/) { 212 $symbols->{data}{common}{$1}{$symbols->{o}}++; 213 } elsif (/^[BbSs] (\w+)(\.\d+)?$/) { 214 # Bb: uninitialized data (bss) 215 # Ss: uninitialized data "for small objects" 216 $symbols->{data}{bss}{$1}{$symbols->{o}}++; 217 } elsif (/^D _LIB_VERSION$/) { 218 # Skip the _LIB_VERSION (not ours, probably libm) 219 } elsif (/^[DdGg] (\w+)$/) { 220 # Dd: initialized data 221 # Gg: initialized "for small objects" 222 $symbols->{data}{data}{$1}{$symbols->{o}}++; 223 } elsif (/^. \.?(\w+)$/) { 224 # Skip the unknown types. 225 print "# Unknown type: $line ($symbols->{o})\n"; 226 } 227 return; 228 } elsif (/^ {8}(?: {8})? U _?(\w+)$/) { 229 my ($symbol) = $1; 230 return if is_perlish_symbol($symbol); 231 $symbols->{undef}{$symbol}{$symbols->{o}}++; 232 return; 233 } 234 } 235 print "# Unexpected nm output '$line' ($symbols->{o})\n"; 236} 237 238sub nm_parse_darwin { 239 my $symbols = shift; 240 my $line = $_; 241 if (m{^(?:.+)?libperl\.a\((\w+\.o)\):$}) { 242 # object file name 243 $symbols->{obj}{$1}++; 244 $symbols->{o} = $1; 245 return; 246 } else { 247 die "$0: undefined current object: $line" unless defined $symbols->{o}; 248 # 64-bit systems have 16 hexdigits, 32-bit systems have 8. 249 if (s/^[0-9a-f]{8}(?:[0-9a-f]{8})? //) { 250 # String literals can live in different sections 251 # depending on the compiler and os release, assumedly 252 # also linker flags. 253 if (/^\(__TEXT,__(?:const|(?:asan_)?cstring|literal\d+)\) (?:non-)?external _?(\w+)(\.\w+)?$/) { 254 my ($symbol, $suffix) = ($1, $2); 255 # Ignore function-local constants like 256 # _Perl_av_extend_guts.oom_array_extend 257 return if defined $suffix && /__TEXT,__const/; 258 # Ignore the cstring unnamed strings. 259 return if $symbol =~ /^L\.str\d+$/; 260 $symbols->{data}{const}{$symbol}{$symbols->{o}}++; 261 } elsif (/^\(__TEXT,__text\) ((?:non-)?external) _(\w+)$/) { 262 my ($exp, $sym) = ($1, $2); 263 $symbols->{text}{$sym}{$symbols->{o}}{$exp =~ /^non/ ? 't' : 'T'}++; 264 } elsif (/^\(__DATA,__\w*?(const|data|bss|common)\w*\) (?:non-)?external _?(\w+)(\.\w+)?$/) { 265 my ($dtype, $symbol, $suffix) = ($1, $2, $3); 266 # Ignore function-local constants like 267 # _Perl_pp_gmtime.dayname 268 return if defined $suffix; 269 $symbols->{data}{$dtype}{$symbol}{$symbols->{o}}++; 270 } elsif (/^\(__DATA,__const\) non-external _\.memset_pattern\d*$/) { 271 # Skip this, whatever it is (some inlined leakage from 272 # darwin libc?) 273 } elsif (/^\(__TEXT,__eh_frame/) { 274 # Skip the eh_frame (exception handling) symbols. 275 return; 276 } elsif (/^\(__\w+,__\w+\) /) { 277 # Skip the unknown types. 278 print "# Unknown type: $line ($symbols->{o})\n"; 279 } 280 return; 281 } elsif (/^ {8}(?: {8})? \(undefined(?: \[lazy bound\])?\) external _?(.+)/) { 282 # darwin/ppc marks most undefined text symbols 283 # as "[lazy bound]". 284 my ($symbol) = $1 =~ s/\$UNIX2003\z//r; 285 return if is_perlish_symbol($symbol); 286 $symbols->{undef}{$symbol}{$symbols->{o}}++; 287 return; 288 } 289 } 290 print "# Unexpected nm output '$line' ($symbols->{o})\n"; 291} 292 293my $nm_parse; 294 295if ($nm_style eq 'gnu') { 296 $nm_parse = \&nm_parse_gnu; 297} elsif ($nm_style eq 'darwin') { 298 $nm_parse = \&nm_parse_darwin; 299} 300 301unless (defined $nm_parse) { 302 skip_all "no nm parser ($nm_style $nm_style, \$^O $^O)"; 303} 304 305my %symbols; 306 307while (<$nm_fh>) { 308 next if /^$/; 309 chomp; 310 $nm_parse->(\%symbols); 311} 312 313# use Data::Dumper; print Dumper(\%symbols); 314 315# Something went awfully wrong. Wrong nm? Wrong options? 316unless (keys %symbols) { 317 skip_all "no symbols\n"; 318} 319unless (exists $symbols{text}) { 320 skip_all "no text symbols\n"; 321} 322 323# These should always be true for everyone. 324 325ok($symbols{obj}{'pp.o'}, "has object pp.o"); 326ok($symbols{text}{'Perl_peep'}, "has text Perl_peep"); 327ok($symbols{text}{'Perl_pp_uc'}{'pp.o'}, "has text Perl_pp_uc in pp.o"); 328ok(exists $symbols{data}{const}, "has data const symbols"); 329ok($symbols{data}{const}{PL_no_mem}{'globals.o'}, "has PL_no_mem"); 330 331my $GS = $Config{ccflags} =~ /-DPERL_GLOBAL_STRUCT\b/ ? 1 : 0; 332my $GSP = $Config{ccflags} =~ /-DPERL_GLOBAL_STRUCT_PRIVATE/ ? 1 : 0; 333my $nocommon = $Config{ccflags} =~ /-fno-common/ ? 1 : 0; 334 335print "# GS = $GS\n"; 336print "# GSP = $GSP\n"; 337print "# nocommon = $nocommon\n"; 338 339my %data_symbols; 340 341for my $dtype (sort keys %{$symbols{data}}) { 342 for my $symbol (sort keys %{$symbols{data}{$dtype}}) { 343 $data_symbols{$symbol}++; 344 } 345} 346 347# The following tests differ between vanilla vs $GSP or $GS. 348 349if ($GSP) { 350 print "# -DPERL_GLOBAL_STRUCT_PRIVATE\n"; 351 ok(!exists $data_symbols{PL_hash_seed}, "has no PL_hash_seed"); 352 ok(!exists $data_symbols{PL_ppaddr}, "has no PL_ppaddr"); 353 354 ok(! exists $symbols{data}{bss}, "has no data bss symbols") 355 or do { 356 my $bad = "BSS entries (there are supposed to be none):\n"; 357 $bad .= " bss sym: $_\n" for sort keys %{$symbols{data}{bss}}; 358 diag($bad); 359 }; 360 361 ok(! exists $symbols{data}{data} || 362 # clang with ASAN seems to add this symbol to every object file: 363 !grep($_ ne '__unnamed_1', keys %{$symbols{data}{data}}), 364 "has no data data symbols") 365 or do { 366 my $bad = "DATA entries (there are supposed to be none):\n"; 367 $bad .= " data sym: $_\n" for sort keys %{$symbols{data}{data}}; 368 diag($bad); 369 }; 370 371 ok(! exists $symbols{data}{common}, "has no data common symbols") 372 or do { 373 my $bad = "COMMON entries (there are supposed to be none):\n"; 374 $bad .= " common sym: $_\n" for sort keys %{$symbols{data}{common}}; 375 diag($bad); 376 }; 377 378 # -DPERL_GLOBAL_STRUCT_PRIVATE should NOT have 379 # the extra text symbol for accessing the vars 380 # (as opposed to "just" -DPERL_GLOBAL_STRUCT) 381 ok(! exists $symbols{text}{Perl_GetVars}, "has no Perl_GetVars"); 382} elsif ($GS) { 383 print "# -DPERL_GLOBAL_STRUCT\n"; 384 ok(!exists $data_symbols{PL_hash_seed}, "has no PL_hash_seed"); 385 ok(!exists $data_symbols{PL_ppaddr}, "has no PL_ppaddr"); 386 387 if ($nocommon) { 388 $symbols{data}{common} = $symbols{data}{bss}; 389 delete $symbols{data}{bss}; 390 } 391 392 ok(! exists $symbols{data}{bss}, "has no data bss symbols") 393 or do { 394 my $bad = "BSS entries (there are supposed to be none):\n"; 395 $bad .= " bss sym: $_\n" for sort keys %{$symbols{data}{bss}}; 396 diag($bad); 397 }; 398 399 400 # These PerlIO data symbols are left visible with 401 # -DPERL_GLOBAL_STRUCT (as opposed to -DPERL_GLOBAL_STRUCT_PRIVATE) 402 my @PerlIO = 403 qw( 404 PerlIO_byte 405 PerlIO_crlf 406 PerlIO_pending 407 PerlIO_perlio 408 PerlIO_raw 409 PerlIO_remove 410 PerlIO_stdio 411 PerlIO_unix 412 PerlIO_utf8 413 ); 414 415 # PL_magic_vtables is const with -DPERL_GLOBAL_STRUCT_PRIVATE but 416 # otherwise not const -- because of SWIG which wants to modify 417 # the table. Evil SWIG, eeevil. 418 419 # my_cxt_index is used with PERL_IMPLICIT_CONTEXT, which 420 # -DPERL_GLOBAL_STRUCT has turned on. 421 eq_array([sort keys %{$symbols{data}{data}}], 422 [sort('PL_VarsPtr', 423 @PerlIO, 424 'PL_magic_vtables', 425 'my_cxt_index')], 426 "data data symbols"); 427 428 # Only one data common symbol, our "supervariable". 429 eq_array([sort keys %{$symbols{data}{common}}], 430 ['PL_Vars'], 431 "data common symbols"); 432 433 ok($symbols{data}{data}{PL_VarsPtr}{'globals.o'}, "has PL_VarsPtr"); 434 ok($symbols{data}{common}{PL_Vars}{'globals.o'}, "has PL_Vars"); 435 436 # -DPERL_GLOBAL_STRUCT has extra text symbol for accessing the vars. 437 ok($symbols{text}{Perl_GetVars}{'util.o'}, "has Perl_GetVars"); 438} else { 439 print "# neither -DPERL_GLOBAL_STRUCT nor -DPERL_GLOBAL_STRUCT_PRIVATE\n"; 440 441 if ( !$symbols{data}{common} ) { 442 # This is likely because Perl was compiled with 443 # -Accflags="-fno-common" 444 $symbols{data}{common} = $symbols{data}{bss}; 445 } 446 447 ok($symbols{data}{common}{PL_hash_seed}{'globals.o'}, "has PL_hash_seed"); 448 ok($symbols{data}{data}{PL_ppaddr}{'globals.o'}, "has PL_ppaddr"); 449 450 # None of the GLOBAL_STRUCT* business here. 451 ok(! exists $symbols{data}{data}{PL_VarsPtr}, "has no PL_VarsPtr"); 452 ok(! exists $symbols{data}{common}{PL_Vars}, "has no PL_Vars"); 453 ok(! exists $symbols{text}{Perl_GetVars}, "has no Perl_GetVars"); 454} 455 456# See the comments in the beginning for what "undefined symbols" 457# really means. We *should* have many of those, that is a good thing. 458ok(keys %{$symbols{undef}}, "has undefined symbols"); 459 460# There are certain symbols we expect to see. 461 462# chmod, socket, getenv, sigaction, exp, time are system/library 463# calls that should each see at least one use. exp can be expl 464# if so configured. 465my %expected = ( 466 chmod => undef, # There is no Configure symbol for chmod. 467 socket => 'd_socket', 468 getenv => undef, # There is no Configure symbol for getenv, 469 sigaction => 'd_sigaction', 470 time => 'd_time', 471 ); 472 473if ($Config{uselongdouble} && $Config{longdblsize} > $Config{doublesize}) { 474 $expected{expl} = undef; # There is no Configure symbol for expl. 475} elsif ($Config{usequadmath}) { 476 $expected{expq} = undef; # There is no Configure symbol for expq. 477} else { 478 $expected{exp} = undef; # There is no Configure symbol for exp. 479} 480 481# DynaLoader will use dlopen, unless we are building static, 482# and it is used in the platforms we are supporting in this test. 483if ($Config{usedl} ) { 484 $expected{dlopen} = 'd_dlopen'; 485} 486 487for my $symbol (sort keys %expected) { 488 if (defined $expected{$symbol} && !$Config{$expected{$symbol}}) { 489 SKIP: { 490 skip("no $symbol"); 491 } 492 next; 493 } 494 my @o = exists $symbols{undef}{$symbol} ? 495 sort keys %{ $symbols{undef}{$symbol} } : (); 496 ok(@o, "uses $symbol (@o)"); 497} 498 499# There are certain symbols we expect NOT to see. 500# 501# gets is horribly unsafe. 502# 503# fgets should not be used (Perl has its own API, sv_gets), 504# even without perlio. 505# 506# tmpfile is unsafe. 507# 508# strcat, strcpy, strncat, strncpy are unsafe. 509# 510# sprintf and vsprintf should not be used because 511# Perl has its own safer and more portable implementations. 512# (One exception: for certain floating point outputs 513# the native sprintf is still used in some platforms, see below.) 514# 515# atoi has unsafe and undefined failure modes, and is affected by locale. 516# Its cousins include atol and atoll. 517# 518# strtol and strtoul are affected by locale. 519# Cousins include strtoq. 520# 521# system should not be used, use pp_system or my_popen. 522# 523 524my %unexpected; 525 526for my $str (qw(system)) { 527 $unexpected{$str} = "d_$str"; 528} 529 530for my $stdio (qw(gets fgets tmpfile sprintf vsprintf)) { 531 $unexpected{$stdio} = undef; # No Configure symbol for these. 532} 533for my $str (qw(strcat strcpy strncat strncpy)) { 534 $unexpected{$str} = undef; # No Configure symbol for these. 535} 536 537$unexpected{atoi} = undef; # No Configure symbol for atoi. 538$unexpected{atol} = undef; # No Configure symbol for atol. 539 540for my $str (qw(atoll strtol strtoul strtoq)) { 541 $unexpected{$str} = "d_$str"; 542} 543 544for my $symbol (sort keys %unexpected) { 545 if (defined $unexpected{$symbol} && !$Config{$unexpected{$symbol}}) { 546 SKIP: { 547 skip("no $symbol"); 548 } 549 next; 550 } 551 my @o = exists $symbols{undef}{$symbol} ? 552 sort keys %{ $symbols{undef}{$symbol} } : (); 553 # While sprintf() is bad in the general case, 554 # some platforms implement Gconvert via sprintf, in sv.o. 555 if ($symbol eq 'sprintf' && 556 $Config{d_Gconvert} =~ /^sprintf/ && 557 @o == 1 && $o[0] eq 'sv.o') { 558 SKIP: { 559 skip("uses sprintf for Gconvert in sv.o"); 560 } 561 } else { 562 is(@o, 0, "uses no $symbol (@o)"); 563 } 564} 565 566# Check that any text symbols named S_ are not exported. 567my $export_S_prefix = 0; 568for my $t (sort grep { /^S_/ } keys %{$symbols{text}}) { 569 for my $o (sort keys %{$symbols{text}{$t}}) { 570 if (exists $symbols{text}{$t}{$o}{T}) { 571 fail($t, "$t exported from $o"); 572 $export_S_prefix++; 573 } 574 } 575} 576is($export_S_prefix, 0, "no S_ exports"); 577 578if (defined $nm_err_tmp) { 579 if (open(my $nm_err_fh, $nm_err_tmp)) { 580 my $error; 581 while (<$nm_err_fh>) { 582 # OS X has weird error where nm warns about 583 # "no name list" but then outputs fine. 584 if ( $^O eq 'darwin' ) { 585 if (/nm: no name list/ || /^no symbols$/ ) { 586 print "# $^O ignoring $nm output: $_"; 587 next; 588 } 589 } 590 warn "$0: Unexpected $nm error: $_"; 591 $error++; 592 } 593 die "$0: Unexpected $nm errors\n" if $error; 594 } else { 595 warn "Failed to open '$nm_err_tmp': $!\n"; 596 } 597} 598 599done_testing(); 600