1#!/usr/bin/perl -w 2 3# Try opening libperl.a with nm, and verifying it has the kind of 4# symbols we expect, and no symbols we should avoid. 5# 6# Fail softly, expect things only on known platforms: 7# - linux, x86 only (ppc linux has odd symbol tables) 8# - darwin (OS X), both x86 and ppc 9# - freebsd 10# and on other platforms, and if things seem odd, just give up (skip_all). 11# 12# Symbol types for LTO builds don't seem to match their final section, so 13# skip on LTO builds too. 14# 15# Debugging tip: nm output (this script's input) can be faked by 16# giving one command line argument for this script: it should be 17# either the filename to read, or "-" for STDIN. You can also append 18# "@style" (where style is a supported nm style, like "gnu" or "darwin") 19# to this filename for "cross-parsing". 20# 21# Some terminology: 22# - "text" symbols are code 23# - "data" symbols are data (duh), with subdivisions: 24# - "bss": (Block-Started-by-Symbol: originally from IBM assembler...), 25# uninitialized data, which often even doesn't exist in the object 26# file as such, only its size does, which is then created on demand 27# by the loader 28# - "const": initialized read-only data, like string literals 29# - "common": uninitialized data unless initialized... 30# (the full story is too long for here, see "man nm") 31# - "data": initialized read-write data 32# (somewhat confusingly below: "data data", but it makes code simpler) 33# - "undefined": external symbol referred to by an object, 34# most likely a text symbol. Can be either a symbol defined by 35# a Perl object file but referred to by other Perl object files, 36# or a completely external symbol from libc, or other system libraries. 37 38BEGIN { 39 chdir 't' if -d 't'; 40 @INC = '../lib'; 41 require "./test.pl"; 42} 43 44use strict; 45 46use Config; 47 48# maint (and tarballs of maint releases) may not have updates here to 49# deal with changes to nm's output in some toolchains 50$^V =~ /^v\d+\.\d*[13579]\./ 51 or skip_all "on maint"; 52 53if ($Config{cc} =~ /g\+\+/) { 54 # XXX Could use c++filt, maybe. 55 skip_all "on g++"; 56} 57 58# ccname is gcc for both gcc and clang 59if ($Config{ccname} eq "gcc" && $Config{ccflags} =~ /-flto\b/) { 60 # If we compile with gcc nm marks PL_no_mem as "D" (normal data) rather than a R (read only) 61 # but the symbol still ends up in the .rodata section of the image on linking. 62 # If we compile with clang 14, nm marks PL_no_mem as "T" (text, aka code) rather than R 63 # but the symbol still ends up in the .rodata section on linking. 64 skip_all "LTO libperl.a flags don't match the final linker sections"; 65} 66 67my $libperl_a; 68 69for my $f (qw(../libperl.a libperl.a)) { 70 if (-f $f) { 71 $libperl_a = $f; 72 last; 73 } 74} 75 76unless (defined $libperl_a) { 77 skip_all "no libperl.a"; 78} 79 80print "# \$^O = $^O\n"; 81print "# \$Config{archname} = $Config{archname}\n"; 82print "# \$Config{cc} = $Config{cc}\n"; 83print "# libperl = $libperl_a\n"; 84 85my $nm; 86my $nm_opt = ''; 87my $nm_style; 88my $nm_fh; 89my $nm_err_tmp = "libperl$$"; 90 91END { 92 # this is still executed when we skip_all above, avoid a warning 93 unlink $nm_err_tmp if $nm_err_tmp; 94} 95 96my $fake_input; 97my $fake_style; 98 99if (@ARGV == 1) { 100 $fake_input = shift @ARGV; 101 print "# Faking nm output from $fake_input\n"; 102 if ($fake_input =~ s/\@(.+)$//) { 103 $fake_style = $1; 104 print "# Faking nm style from $fake_style\n"; 105 if ($fake_style eq 'gnu' || 106 $fake_style eq 'linux' || 107 $fake_style eq 'freebsd') { 108 $nm_style = 'gnu' 109 } elsif ($fake_style eq 'darwin' || $fake_style eq 'osx') { 110 $nm_style = 'darwin' 111 } else { 112 die "$0: Unknown explicit nm style '$fake_style'\n"; 113 } 114 } 115} 116 117unless (defined $nm_style) { 118 if ($^O eq 'linux') { 119 # The 'gnu' style could be equally well be called 'bsd' style, 120 # since the output format of the GNU binutils nm is really BSD. 121 $nm_style = 'gnu'; 122 } elsif ($^O eq 'freebsd') { 123 $nm_style = 'gnu'; 124 } elsif ($^O eq 'darwin') { 125 $nm_style = 'darwin'; 126 } 127} 128 129if (defined $nm_style) { 130 if ($nm_style eq 'gnu') { 131 $nm = '/usr/bin/nm'; 132 } elsif ($nm_style eq 'darwin') { 133 $nm = '/usr/bin/nm'; 134 # With the -m option we get better information than the BSD-like 135 # default: with the default, a lot of symbols get dumped into 'S' 136 # or 's', for example one cannot tell the difference between const 137 # and non-const data symbols. 138 $nm_opt = '-m'; 139 } else { 140 die "$0: Unexpected nm style '$nm_style'\n"; 141 } 142} 143 144if ($^O eq 'linux' && $Config{archname} !~ /^(?:x|i6)86/) { 145 # For example in ppc most (but not all!) code symbols are placed 146 # in 'D' (data), not in ' T '. We cannot work under such conditions. 147 skip_all "linux but archname $Config{archname} not x86*"; 148} 149 150unless (defined $nm) { 151 skip_all "no nm"; 152} 153 154unless (defined $nm_style) { 155 skip_all "no nm style"; 156} 157 158print "# nm = $nm\n"; 159print "# nm_style = $nm_style\n"; 160print "# nm_opt = $nm_opt\n"; 161 162unless (-x $nm) { 163 skip_all "no executable nm $nm"; 164} 165 166if ($nm_style eq 'gnu' && !defined $fake_style) { 167 open(my $gnu_verify, "$nm --version|") or 168 skip_all "nm failed: $!"; 169 my $gnu_verified; 170 while (<$gnu_verify>) { 171 if (/^GNU nm/) { 172 $gnu_verified = 1; 173 last; 174 } 175 } 176 unless ($gnu_verified) { 177 skip_all "no GNU nm"; 178 } 179} 180 181if (defined $fake_input) { 182 if ($fake_input eq '-') { 183 open($nm_fh, "<&STDIN") or 184 skip_all "Duping STDIN failed: $!"; 185 } else { 186 open($nm_fh, "<", $fake_input) or 187 skip_all "Opening '$fake_input' failed: $!"; 188 } 189 undef $nm_err_tmp; # In this case there will be no nm errors. 190} else { 191 print qq{# command: "$nm $nm_opt $libperl_a 2>$nm_err_tmp |"\n}; 192 open($nm_fh, "$nm $nm_opt $libperl_a 2>$nm_err_tmp |") or 193 skip_all "$nm $nm_opt $libperl_a failed: $!"; 194} 195 196sub is_perlish_symbol { 197 $_[0] =~ /^(?:PL_|Perl|PerlIO)/; 198} 199 200# XXX Implement "internal test" for this script (option -t?) 201# to verify that the parsing does what it's intended to. 202 203sub nm_parse_gnu { 204 my $symbols = shift; 205 my $line = $_; 206 if (m{^(\w+\.o):$}) { 207 # object file name 208 $symbols->{obj}{$1}++; 209 $symbols->{o} = $1; 210 return; 211 } else { 212 die "$0: undefined current object: $line" 213 unless defined $symbols->{o}; 214 # 64-bit systems have 16 hexdigits, 32-bit systems have 8. 215 if (s/^[0-9a-f]{8}(?:[0-9a-f]{8})? //) { 216 if (/^[Rr] (\w+)$/) { 217 # R: read only (const) 218 $symbols->{data}{const}{$1}{$symbols->{o}}++; 219 } elsif (/^r .+$/) { 220 # Skip local const (read only). 221 } elsif (/^([Tti]) (\w+)(\..+)?$/) { 222 $symbols->{text}{$2}{$symbols->{o}}{$1}++; 223 } elsif (/^C (\w+)$/) { 224 $symbols->{data}{common}{$1}{$symbols->{o}}++; 225 } elsif (/^[BbSs] (\w+)(\.\d+)?$/) { 226 # Bb: uninitialized data (bss) 227 # Ss: uninitialized data "for small objects" 228 $symbols->{data}{bss}{$1}{$symbols->{o}}++; 229 } elsif (/^D _LIB_VERSION$/) { 230 # Skip the _LIB_VERSION (not ours, probably libm) 231 } elsif (/^[DdGg] (\w+)$/) { 232 # Dd: initialized data 233 # Gg: initialized "for small objects" 234 $symbols->{data}{data}{$1}{$symbols->{o}}++; 235 } elsif (/^. \.?(\w+)$/) { 236 # Skip the unknown types. 237 print "# Unknown type: $line ($symbols->{o})\n"; 238 } 239 return; 240 } elsif (/^ {8}(?: {8})? U _?(\w+)$/) { 241 my ($symbol) = $1; 242 return if is_perlish_symbol($symbol); 243 $symbols->{undef}{$symbol}{$symbols->{o}}++; 244 return; 245 } 246 } 247 print "# Unexpected nm output '$line' ($symbols->{o})\n"; 248} 249 250sub nm_parse_darwin { 251 my $symbols = shift; 252 my $line = $_; 253 if (m{^(?:.+)?libperl\.a\((\w+\.o)\):$} || 254 m{^(\w+\.o):$}) { 255 # object file name 256 $symbols->{obj}{$1}++; 257 $symbols->{o} = $1; 258 return; 259 } else { 260 die "$0: undefined current object: $line" unless defined $symbols->{o}; 261 # 64-bit systems have 16 hexdigits, 32-bit systems have 8. 262 if (s/^[0-9a-f]{8}(?:[0-9a-f]{8})? //) { 263 # String literals can live in different sections 264 # depending on the compiler and os release, assumedly 265 # also linker flags. 266 if (/^\(__TEXT,__(?:const|(?:asan_)?cstring|literal\d+)\) (?:non-)?external _?(\w+)(\.\w+){0,2}$/) { 267 my ($symbol, $suffix) = ($1, $2); 268 # Ignore function-local constants like 269 # _Perl_av_extend_guts.oom_array_extend 270 return if defined $suffix && /__TEXT,__const/; 271 # Ignore the cstring unnamed strings. 272 return if $symbol =~ /^L\.str\d+$/; 273 $symbols->{data}{const}{$symbol}{$symbols->{o}}++; 274 } elsif (/^\(__TEXT,__text\) ((?:non-|private )?external) \[cold func\] _(\w+\.cold\.[1-9][0-9]*)$/) { 275 # for N_COLD_FUNC symbols in MachO 276 # eg. 0000000000022c60 (__TEXT,__text) non-external [cold func] _Perl_lex_next_chunk.cold.1 (toke.o) 277 } elsif (/^\(__TEXT,__text\) ((?:non-|private )?external) _(\w+)$/) { 278 my ($exp, $sym) = ($1, $2); 279 $symbols->{text}{$sym}{$symbols->{o}}{$exp =~ /^non/ ? 't' : 'T'}++; 280 } elsif (/^\(__DATA,__\w*?(const|data|bss|common)\w*\) (?:non-)?external _?(\w+)(\.\w+){0,3}$/) { 281 my ($dtype, $symbol, $suffix) = ($1, $2, $3); 282 # Ignore function-local constants like 283 # _Perl_pp_gmtime.dayname 284 return if defined $suffix; 285 $symbols->{data}{$dtype}{$symbol}{$symbols->{o}}++; 286 } elsif (/^\(__DATA,__const\) non-external _\.memset_pattern\d*$/) { 287 # Skip this, whatever it is (some inlined leakage from 288 # darwin libc?) 289 } elsif (/^\(__TEXT,__eh_frame/) { 290 # Skip the eh_frame (exception handling) symbols. 291 return; 292 } elsif (/^\(__\w+,__\w+\) /) { 293 # Skip the unknown types. 294 print "# Unknown type: $line ($symbols->{o})\n"; 295 } 296 return; 297 } elsif (/^ {8}(?: {8})? \(undefined(?: \[lazy bound\])?\) external _?(.+)/) { 298 # darwin/ppc marks most undefined text symbols 299 # as "[lazy bound]". 300 my ($symbol) = $1 =~ s/\$UNIX2003\z//r; 301 return if is_perlish_symbol($symbol); 302 $symbols->{undef}{$symbol}{$symbols->{o}}++; 303 return; 304 } 305 } 306 print "# Unexpected nm output '$line' ($symbols->{o})\n"; 307} 308 309my $nm_parse; 310 311if ($nm_style eq 'gnu') { 312 $nm_parse = \&nm_parse_gnu; 313} elsif ($nm_style eq 'darwin') { 314 $nm_parse = \&nm_parse_darwin; 315} 316 317unless (defined $nm_parse) { 318 skip_all "no nm parser ($nm_style $nm_style, \$^O $^O)"; 319} 320 321my %symbols; 322 323while (<$nm_fh>) { 324 next if /^$/; 325 chomp; 326 $nm_parse->(\%symbols); 327} 328 329# use Data::Dumper; print Dumper(\%symbols); 330 331# Something went awfully wrong. Wrong nm? Wrong options? 332unless (keys %symbols) { 333 skip_all "no symbols\n"; 334} 335unless (exists $symbols{text}) { 336 skip_all "no text symbols\n"; 337} 338 339# These should always be true for everyone. 340 341ok($symbols{obj}{'util.o'}, "has object util.o"); 342ok($symbols{text}{'Perl_croak'}{'util.o'}, "has text Perl_croak in util.o"); 343ok(exists $symbols{data}{const}, "has data const symbols"); 344ok($symbols{data}{const}{PL_no_modify}{'globals.o'}, "has PL_no_modify"); 345 346my $nocommon = $Config{ccflags} =~ /-fno-common/ ? 1 : 0; 347 348print "# nocommon = $nocommon\n"; 349 350my %data_symbols; 351 352for my $dtype (sort keys %{$symbols{data}}) { 353 for my $symbol (sort keys %{$symbols{data}{$dtype}}) { 354 $data_symbols{$symbol}++; 355 } 356} 357 358if ( !$symbols{data}{common} ) { 359 # This is likely because Perl was compiled with 360 # -Accflags="-fno-common" 361 $symbols{data}{common} = $symbols{data}{bss}; 362} 363 364ok($symbols{data}{common}{PL_hash_seed_w}{'globals.o'}, "has PL_hash_seed_w"); 365ok($symbols{data}{data}{PL_ppaddr}{'globals.o'}, "has PL_ppaddr"); 366 367# See the comments in the beginning for what "undefined symbols" 368# really means. We *should* have many of those, that is a good thing. 369ok(keys %{$symbols{undef}}, "has undefined symbols"); 370 371# There are certain symbols we expect to see. 372 373# chmod, socket, getenv, sigaction, exp, time are system/library 374# calls that should each see at least one use. exp can be expl 375# if so configured. 376my %expected = ( 377 chmod => undef, # There is no Configure symbol for chmod. 378 socket => 'd_socket', 379 getenv => undef, # There is no Configure symbol for getenv, 380 sigaction => 'd_sigaction', 381 time => 'd_time', 382 ); 383 384if ($Config{uselongdouble} && $Config{longdblsize} > $Config{doublesize}) { 385 $expected{expl} = undef; # There is no Configure symbol for expl. 386} elsif ($Config{usequadmath}) { 387 $expected{expq} = undef; # There is no Configure symbol for expq. 388} else { 389 $expected{exp} = undef; # There is no Configure symbol for exp. 390} 391 392# DynaLoader will use dlopen, unless we are building static, 393# and it is used in the platforms we are supporting in this test. 394if ($Config{usedl} ) { 395 $expected{dlopen} = 'd_dlopen'; 396} 397 398for my $symbol (sort keys %expected) { 399 if (defined $expected{$symbol} && !$Config{$expected{$symbol}}) { 400 SKIP: { 401 skip("no $symbol"); 402 } 403 next; 404 } 405 my @o = exists $symbols{undef}{$symbol} ? 406 sort keys %{ $symbols{undef}{$symbol} } : (); 407 ok(@o, "uses $symbol (@o)"); 408} 409 410# There are certain symbols we expect NOT to see. 411# 412# gets is horribly unsafe. 413# 414# fgets should not be used (Perl has its own API, sv_gets), 415# even without perlio. 416# 417# tmpfile is unsafe. 418# 419# strcat, strcpy, strncat, strncpy are unsafe. 420# 421# sprintf and vsprintf should not be used because 422# Perl has its own safer and more portable implementations. 423# (One exception: for certain floating point outputs 424# the native sprintf is still used in some platforms, see below.) 425# 426# atoi has unsafe and undefined failure modes, and is affected by locale. 427# Its cousins include atol and atoll. 428# 429# strtol and strtoul are affected by locale. 430# Cousins include strtoq. 431# 432# system should not be used, use pp_system or my_popen. 433# 434 435my %unexpected; 436 437for my $str (qw(system)) { 438 $unexpected{$str} = "d_$str"; 439} 440 441for my $stdio (qw(gets fgets tmpfile sprintf vsprintf)) { 442 $unexpected{$stdio} = undef; # No Configure symbol for these. 443} 444for my $str (qw(strcat strcpy strncat strncpy)) { 445 $unexpected{$str} = undef; # No Configure symbol for these. 446} 447 448$unexpected{atoi} = undef; # No Configure symbol for atoi. 449$unexpected{atol} = undef; # No Configure symbol for atol. 450 451for my $str (qw(atoll strtol strtoul strtoq)) { 452 $unexpected{$str} = "d_$str"; 453} 454 455for my $symbol (sort keys %unexpected) { 456 if (defined $unexpected{$symbol} && !$Config{$unexpected{$symbol}}) { 457 SKIP: { 458 skip("no $symbol"); 459 } 460 next; 461 } 462 my @o = exists $symbols{undef}{$symbol} ? 463 sort keys %{ $symbols{undef}{$symbol} } : (); 464 # While sprintf() is bad in the general case, 465 # some platforms implement Gconvert via sprintf, in sv.o. 466 if ($symbol eq 'sprintf' && 467 $Config{d_Gconvert} =~ /^sprintf/ && 468 @o == 1 && $o[0] eq 'sv.o') { 469 SKIP: { 470 skip("uses sprintf for Gconvert in sv.o"); 471 } 472 } else { 473 is(@o, 0, "uses no $symbol (@o)"); 474 } 475} 476 477# Check that any text symbols named S_ are not exported. 478my $export_S_prefix = 0; 479for my $t (sort grep { /^S_/ } keys %{$symbols{text}}) { 480 for my $o (sort keys %{$symbols{text}{$t}}) { 481 if (exists $symbols{text}{$t}{$o}{T}) { 482 fail($t, "$t exported from $o"); 483 $export_S_prefix++; 484 } 485 } 486} 487is($export_S_prefix, 0, "no S_ exports"); 488 489if (defined $nm_err_tmp) { 490 if (open(my $nm_err_fh, $nm_err_tmp)) { 491 my $error; 492 while (<$nm_err_fh>) { 493 # OS X has weird error where nm warns about 494 # "no name list" but then outputs fine. 495 # llvm-nm may also complain about 'no symbols'. In some 496 # versions this is exactly the string "no symbols\n" but in later 497 # versions becomes a string followed by ": no symbols\n". For this 498 # test it is typically "../libperl.a:perlapi.o: no symbols\n" 499 if ( $^O eq 'darwin' ) { 500 if (/nm: no name list/ || /^(.*: )?no symbols$/ ) { 501 print "# $^O ignoring $nm output: $_"; 502 next; 503 } 504 } 505 warn "$0: Unexpected $nm error: $_"; 506 $error++; 507 } 508 die "$0: Unexpected $nm errors\n" if $error; 509 } else { 510 warn "Failed to open '$nm_err_tmp': $!\n"; 511 } 512} 513 514done_testing(); 515