1#!/usr/bin/perl -w
2
3# Try opening libperl.a with nm, and verifying it has the kind of
4# symbols we expect, and no symbols we should avoid.
5#
6# Fail softly, expect things only on known platforms:
7# - linux, x86 only (ppc linux has odd symbol tables)
8# - darwin (OS X), both x86 and ppc
9# - freebsd
10# and on other platforms, and if things seem odd, just give up (skip_all).
11#
12# Also, if the rarely-used builds options -DPERL_GLOBAL_STRUCT or
13# -DPERL_GLOBAL_STRUCT_PRIVATE are used, verify that they did what
14# they were meant to do, hide the global variables (see perlguts for
15# the details).
16#
17# Debugging tip: nm output (this script's input) can be faked by
18# giving one command line argument for this script: it should be
19# either the filename to read, or "-" for STDIN.  You can also append
20# "@style" (where style is a supported nm style, like "gnu" or "darwin")
21# to this filename for "cross-parsing".
22#
23# Some terminology:
24# - "text" symbols are code
25# - "data" symbols are data (duh), with subdivisions:
26#   - "bss": (Block-Started-by-Symbol: originally from IBM assembler...),
27#     uninitialized data, which often even doesn't exist in the object
28#     file as such, only its size does, which is then created on demand
29#     by the loader
30#  - "const": initialized read-only data, like string literals
31#  - "common": uninitialized data unless initialized...
32#    (the full story is too long for here, see "man nm")
33#  - "data": initialized read-write data
34#    (somewhat confusingly below: "data data", but it makes code simpler)
35#  - "undefined": external symbol referred to by an object,
36#    most likely a text symbol.  Can be either a symbol defined by
37#    a Perl object file but referred to by other Perl object files,
38#    or a completely external symbol from libc, or other system libraries.
39
40BEGIN {
41    chdir 't' if -d 't';
42    @INC = '../lib';
43    require "./test.pl";
44}
45
46use strict;
47
48use Config;
49
50if ($Config{cc} =~ /g\+\+/) {
51    # XXX Could use c++filt, maybe.
52    skip_all "on g++";
53}
54
55my $libperl_a;
56
57for my $f (qw(../libperl.a libperl.a)) {
58  if (-f $f) {
59    $libperl_a = $f;
60    last;
61  }
62}
63
64unless (defined $libperl_a) {
65  skip_all "no libperl.a";
66}
67
68print "# \$^O = $^O\n";
69print "# \$Config{archname} = $Config{archname}\n";
70print "# \$Config{cc} = $Config{cc}\n";
71print "# libperl = $libperl_a\n";
72
73my $nm;
74my $nm_opt = '';
75my $nm_style;
76my $nm_fh;
77my $nm_err_tmp = "libperl$$";
78
79END {
80    # this is still executed when we skip_all above, avoid a warning
81    unlink $nm_err_tmp if $nm_err_tmp;
82}
83
84my $fake_input;
85my $fake_style;
86
87if (@ARGV == 1) {
88    $fake_input = shift @ARGV;
89    print "# Faking nm output from $fake_input\n";
90    if ($fake_input =~ s/\@(.+)$//) {
91        $fake_style = $1;
92        print "# Faking nm style from $fake_style\n";
93        if ($fake_style eq 'gnu' ||
94            $fake_style eq 'linux' ||
95            $fake_style eq 'freebsd') {
96            $nm_style = 'gnu'
97        } elsif ($fake_style eq 'darwin' || $fake_style eq 'osx') {
98            $nm_style = 'darwin'
99        } else {
100            die "$0: Unknown explicit nm style '$fake_style'\n";
101        }
102    }
103}
104
105unless (defined $nm_style) {
106    if ($^O eq 'linux') {
107        # The 'gnu' style could be equally well be called 'bsd' style,
108        # since the output format of the GNU binutils nm is really BSD.
109        $nm_style = 'gnu';
110    } elsif ($^O eq 'freebsd') {
111        $nm_style = 'gnu';
112    } elsif ($^O eq 'darwin') {
113        $nm_style = 'darwin';
114    }
115}
116
117if (defined $nm_style) {
118    if ($nm_style eq 'gnu') {
119        $nm = '/usr/bin/nm';
120    } elsif ($nm_style eq 'darwin') {
121        $nm = '/usr/bin/nm';
122        # With the -m option we get better information than the BSD-like
123        # default: with the default, a lot of symbols get dumped into 'S'
124        # or 's', for example one cannot tell the difference between const
125        # and non-const data symbols.
126        $nm_opt = '-m';
127    } else {
128        die "$0: Unexpected nm style '$nm_style'\n";
129    }
130}
131
132if ($^O eq 'linux' && $Config{archname} !~ /^(?:x|i6)86/) {
133    # For example in ppc most (but not all!) code symbols are placed
134    # in 'D' (data), not in ' T '.  We cannot work under such conditions.
135    skip_all "linux but archname $Config{archname} not x86*";
136}
137
138unless (defined $nm) {
139  skip_all "no nm";
140}
141
142unless (defined $nm_style) {
143  skip_all "no nm style";
144}
145
146print "# nm = $nm\n";
147print "# nm_style = $nm_style\n";
148print "# nm_opt = $nm_opt\n";
149
150unless (-x $nm) {
151    skip_all "no executable nm $nm";
152}
153
154if ($nm_style eq 'gnu' && !defined $fake_style) {
155    open(my $gnu_verify, "$nm --version|") or
156        skip_all "nm failed: $!";
157    my $gnu_verified;
158    while (<$gnu_verify>) {
159        if (/^GNU nm/) {
160            $gnu_verified = 1;
161            last;
162        }
163    }
164    unless ($gnu_verified) {
165        skip_all "no GNU nm";
166    }
167}
168
169if (defined $fake_input) {
170    if ($fake_input eq '-') {
171        open($nm_fh, "<&STDIN") or
172            skip_all "Duping STDIN failed: $!";
173    } else {
174        open($nm_fh, "<", $fake_input) or
175            skip_all "Opening '$fake_input' failed: $!";
176    }
177    undef $nm_err_tmp; # In this case there will be no nm errors.
178} else {
179    print qq{# command: "$nm $nm_opt $libperl_a 2>$nm_err_tmp |"\n};
180    open($nm_fh, "$nm $nm_opt $libperl_a 2>$nm_err_tmp |") or
181        skip_all "$nm $nm_opt $libperl_a failed: $!";
182}
183
184sub is_perlish_symbol {
185    $_[0] =~ /^(?:PL_|Perl|PerlIO)/;
186}
187
188# XXX Implement "internal test" for this script (option -t?)
189# to verify that the parsing does what it's intended to.
190
191sub nm_parse_gnu {
192    my $symbols = shift;
193    my $line = $_;
194    if (m{^(\w+\.o):$}) {
195        # object file name
196        $symbols->{obj}{$1}++;
197        $symbols->{o} = $1;
198        return;
199    } else {
200        die "$0: undefined current object: $line"
201            unless defined $symbols->{o};
202        # 64-bit systems have 16 hexdigits, 32-bit systems have 8.
203        if (s/^[0-9a-f]{8}(?:[0-9a-f]{8})? //) {
204            if (/^[Rr] (\w+)$/) {
205                # R: read only (const)
206                $symbols->{data}{const}{$1}{$symbols->{o}}++;
207            } elsif (/^r .+$/) {
208                # Skip local const (read only).
209            } elsif (/^([Tti]) (\w+)(\..+)?$/) {
210                $symbols->{text}{$2}{$symbols->{o}}{$1}++;
211            } elsif (/^C (\w+)$/) {
212                $symbols->{data}{common}{$1}{$symbols->{o}}++;
213            } elsif (/^[BbSs] (\w+)(\.\d+)?$/) {
214                # Bb: uninitialized data (bss)
215                # Ss: uninitialized data "for small objects"
216                $symbols->{data}{bss}{$1}{$symbols->{o}}++;
217            } elsif (/^D _LIB_VERSION$/) {
218                # Skip the _LIB_VERSION (not ours, probably libm)
219            } elsif (/^[DdGg] (\w+)$/) {
220                # Dd: initialized data
221                # Gg: initialized "for small objects"
222                $symbols->{data}{data}{$1}{$symbols->{o}}++;
223            } elsif (/^. \.?(\w+)$/) {
224                # Skip the unknown types.
225                print "# Unknown type: $line ($symbols->{o})\n";
226            }
227            return;
228        } elsif (/^ {8}(?: {8})? U _?(\w+)$/) {
229            my ($symbol) = $1;
230            return if is_perlish_symbol($symbol);
231            $symbols->{undef}{$symbol}{$symbols->{o}}++;
232            return;
233	}
234    }
235    print "# Unexpected nm output '$line' ($symbols->{o})\n";
236}
237
238sub nm_parse_darwin {
239    my $symbols = shift;
240    my $line = $_;
241    if (m{^(?:.+)?libperl\.a\((\w+\.o)\):$}) {
242        # object file name
243        $symbols->{obj}{$1}++;
244        $symbols->{o} = $1;
245        return;
246    } else {
247        die "$0: undefined current object: $line" unless defined $symbols->{o};
248        # 64-bit systems have 16 hexdigits, 32-bit systems have 8.
249        if (s/^[0-9a-f]{8}(?:[0-9a-f]{8})? //) {
250            # String literals can live in different sections
251            # depending on the compiler and os release, assumedly
252            # also linker flags.
253            if (/^\(__TEXT,__(?:const|(?:asan_)?cstring|literal\d+)\) (?:non-)?external _?(\w+)(\.\w+)?$/) {
254                my ($symbol, $suffix) = ($1, $2);
255                # Ignore function-local constants like
256                # _Perl_av_extend_guts.oom_array_extend
257                return if defined $suffix && /__TEXT,__const/;
258                # Ignore the cstring unnamed strings.
259                return if $symbol =~ /^L\.str\d+$/;
260                $symbols->{data}{const}{$symbol}{$symbols->{o}}++;
261            } elsif (/^\(__TEXT,__text\) ((?:non-)?external) _(\w+)$/) {
262                my ($exp, $sym) = ($1, $2);
263                $symbols->{text}{$sym}{$symbols->{o}}{$exp =~ /^non/ ? 't' : 'T'}++;
264            } elsif (/^\(__DATA,__\w*?(const|data|bss|common)\w*\) (?:non-)?external _?(\w+)(\.\w+)?$/) {
265                my ($dtype, $symbol, $suffix) = ($1, $2, $3);
266                # Ignore function-local constants like
267                # _Perl_pp_gmtime.dayname
268                return if defined $suffix;
269                $symbols->{data}{$dtype}{$symbol}{$symbols->{o}}++;
270            } elsif (/^\(__DATA,__const\) non-external _\.memset_pattern\d*$/) {
271                # Skip this, whatever it is (some inlined leakage from
272                # darwin libc?)
273            } elsif (/^\(__TEXT,__eh_frame/) {
274                # Skip the eh_frame (exception handling) symbols.
275                return;
276            } elsif (/^\(__\w+,__\w+\) /) {
277                # Skip the unknown types.
278                print "# Unknown type: $line ($symbols->{o})\n";
279            }
280            return;
281        } elsif (/^ {8}(?: {8})? \(undefined(?: \[lazy bound\])?\) external _?(.+)/) {
282            # darwin/ppc marks most undefined text symbols
283            # as "[lazy bound]".
284            my ($symbol) = $1 =~ s/\$UNIX2003\z//r;
285            return if is_perlish_symbol($symbol);
286            $symbols->{undef}{$symbol}{$symbols->{o}}++;
287            return;
288        }
289    }
290    print "# Unexpected nm output '$line' ($symbols->{o})\n";
291}
292
293my $nm_parse;
294
295if ($nm_style eq 'gnu') {
296    $nm_parse = \&nm_parse_gnu;
297} elsif ($nm_style eq 'darwin') {
298    $nm_parse = \&nm_parse_darwin;
299}
300
301unless (defined $nm_parse) {
302    skip_all "no nm parser ($nm_style $nm_style, \$^O $^O)";
303}
304
305my %symbols;
306
307while (<$nm_fh>) {
308    next if /^$/;
309    chomp;
310    $nm_parse->(\%symbols);
311}
312
313# use Data::Dumper; print Dumper(\%symbols);
314
315# Something went awfully wrong.  Wrong nm?  Wrong options?
316unless (keys %symbols) {
317    skip_all "no symbols\n";
318}
319unless (exists $symbols{text}) {
320    skip_all "no text symbols\n";
321}
322
323# These should always be true for everyone.
324
325ok($symbols{obj}{'pp.o'}, "has object pp.o");
326ok($symbols{text}{'Perl_peep'}, "has text Perl_peep");
327ok($symbols{text}{'Perl_pp_uc'}{'pp.o'}, "has text Perl_pp_uc in pp.o");
328ok(exists $symbols{data}{const}, "has data const symbols");
329ok($symbols{data}{const}{PL_no_mem}{'globals.o'}, "has PL_no_mem");
330
331my $GS  = $Config{ccflags} =~ /-DPERL_GLOBAL_STRUCT\b/ ? 1 : 0;
332my $GSP = $Config{ccflags} =~ /-DPERL_GLOBAL_STRUCT_PRIVATE/ ? 1 : 0;
333my $nocommon = $Config{ccflags} =~ /-fno-common/ ? 1 : 0;
334
335print "# GS  = $GS\n";
336print "# GSP = $GSP\n";
337print "# nocommon = $nocommon\n";
338
339my %data_symbols;
340
341for my $dtype (sort keys %{$symbols{data}}) {
342    for my $symbol (sort keys %{$symbols{data}{$dtype}}) {
343        $data_symbols{$symbol}++;
344    }
345}
346
347# The following tests differ between vanilla vs $GSP or $GS.
348
349if ($GSP) {
350    print "# -DPERL_GLOBAL_STRUCT_PRIVATE\n";
351    ok(!exists $data_symbols{PL_hash_seed}, "has no PL_hash_seed");
352    ok(!exists $data_symbols{PL_ppaddr}, "has no PL_ppaddr");
353
354    ok(! exists $symbols{data}{bss}, "has no data bss symbols")
355        or do {
356            my $bad = "BSS entries (there are supposed to be none):\n";
357            $bad .= "  bss sym: $_\n" for sort keys %{$symbols{data}{bss}};
358            diag($bad);
359        };
360
361    ok(! exists $symbols{data}{data} ||
362            # clang with ASAN seems to add this symbol to every object file:
363            !grep($_ ne '__unnamed_1', keys %{$symbols{data}{data}}),
364        "has no data data symbols")
365        or do {
366            my $bad = "DATA entries (there are supposed to be none):\n";
367            $bad .= "  data sym: $_\n" for sort keys %{$symbols{data}{data}};
368            diag($bad);
369        };
370
371    ok(! exists $symbols{data}{common}, "has no data common symbols")
372        or do {
373            my $bad = "COMMON entries (there are supposed to be none):\n";
374            $bad .= "  common sym: $_\n" for sort keys %{$symbols{data}{common}};
375            diag($bad);
376        };
377
378    # -DPERL_GLOBAL_STRUCT_PRIVATE should NOT have
379    # the extra text symbol for accessing the vars
380    # (as opposed to "just" -DPERL_GLOBAL_STRUCT)
381    ok(! exists $symbols{text}{Perl_GetVars}, "has no Perl_GetVars");
382} elsif ($GS) {
383    print "# -DPERL_GLOBAL_STRUCT\n";
384    ok(!exists $data_symbols{PL_hash_seed}, "has no PL_hash_seed");
385    ok(!exists $data_symbols{PL_ppaddr}, "has no PL_ppaddr");
386
387    if ($nocommon) {
388        $symbols{data}{common} = $symbols{data}{bss};
389        delete $symbols{data}{bss};
390    }
391
392    ok(! exists $symbols{data}{bss}, "has no data bss symbols")
393        or do {
394            my $bad = "BSS entries (there are supposed to be none):\n";
395            $bad .= "  bss sym: $_\n" for sort keys %{$symbols{data}{bss}};
396            diag($bad);
397        };
398
399
400    # These PerlIO data symbols are left visible with
401    # -DPERL_GLOBAL_STRUCT (as opposed to -DPERL_GLOBAL_STRUCT_PRIVATE)
402    my @PerlIO =
403        qw(
404           PerlIO_byte
405           PerlIO_crlf
406           PerlIO_pending
407           PerlIO_perlio
408           PerlIO_raw
409           PerlIO_remove
410           PerlIO_stdio
411           PerlIO_unix
412           PerlIO_utf8
413          );
414
415    # PL_magic_vtables is const with -DPERL_GLOBAL_STRUCT_PRIVATE but
416    # otherwise not const -- because of SWIG which wants to modify
417    # the table.  Evil SWIG, eeevil.
418
419    # my_cxt_index is used with PERL_IMPLICIT_CONTEXT, which
420    # -DPERL_GLOBAL_STRUCT has turned on.
421    eq_array([sort keys %{$symbols{data}{data}}],
422             [sort('PL_VarsPtr',
423                   @PerlIO,
424                   'PL_magic_vtables',
425                   'my_cxt_index')],
426             "data data symbols");
427
428    # Only one data common symbol, our "supervariable".
429    eq_array([sort keys %{$symbols{data}{common}}],
430             ['PL_Vars'],
431             "data common symbols");
432
433    ok($symbols{data}{data}{PL_VarsPtr}{'globals.o'}, "has PL_VarsPtr");
434    ok($symbols{data}{common}{PL_Vars}{'globals.o'}, "has PL_Vars");
435
436    # -DPERL_GLOBAL_STRUCT has extra text symbol for accessing the vars.
437    ok($symbols{text}{Perl_GetVars}{'util.o'}, "has Perl_GetVars");
438} else {
439    print "# neither -DPERL_GLOBAL_STRUCT nor -DPERL_GLOBAL_STRUCT_PRIVATE\n";
440
441    if ( !$symbols{data}{common} ) {
442        # This is likely because Perl was compiled with
443        # -Accflags="-fno-common"
444        $symbols{data}{common} = $symbols{data}{bss};
445    }
446
447    ok($symbols{data}{common}{PL_hash_seed}{'globals.o'}, "has PL_hash_seed");
448    ok($symbols{data}{data}{PL_ppaddr}{'globals.o'}, "has PL_ppaddr");
449
450    # None of the GLOBAL_STRUCT* business here.
451    ok(! exists $symbols{data}{data}{PL_VarsPtr}, "has no PL_VarsPtr");
452    ok(! exists $symbols{data}{common}{PL_Vars}, "has no PL_Vars");
453    ok(! exists $symbols{text}{Perl_GetVars}, "has no Perl_GetVars");
454}
455
456# See the comments in the beginning for what "undefined symbols"
457# really means.  We *should* have many of those, that is a good thing.
458ok(keys %{$symbols{undef}}, "has undefined symbols");
459
460# There are certain symbols we expect to see.
461
462# chmod, socket, getenv, sigaction, exp, time are system/library
463# calls that should each see at least one use. exp can be expl
464# if so configured.
465my %expected = (
466    chmod  => undef, # There is no Configure symbol for chmod.
467    socket => 'd_socket',
468    getenv => undef, # There is no Configure symbol for getenv,
469    sigaction => 'd_sigaction',
470    time   => 'd_time',
471    );
472
473if ($Config{uselongdouble} && $Config{longdblsize} > $Config{doublesize}) {
474    $expected{expl} = undef; # There is no Configure symbol for expl.
475} elsif ($Config{usequadmath}) {
476    $expected{expq} = undef; # There is no Configure symbol for expq.
477} else {
478    $expected{exp} = undef; # There is no Configure symbol for exp.
479}
480
481# DynaLoader will use dlopen, unless we are building static,
482# and it is used in the platforms we are supporting in this test.
483if ($Config{usedl} ) {
484    $expected{dlopen} = 'd_dlopen';
485}
486
487for my $symbol (sort keys %expected) {
488    if (defined $expected{$symbol} && !$Config{$expected{$symbol}}) {
489      SKIP: {
490        skip("no $symbol");
491      }
492      next;
493    }
494    my @o = exists $symbols{undef}{$symbol} ?
495        sort keys %{ $symbols{undef}{$symbol} } : ();
496    ok(@o, "uses $symbol (@o)");
497}
498
499# There are certain symbols we expect NOT to see.
500#
501# gets is horribly unsafe.
502#
503# fgets should not be used (Perl has its own API, sv_gets),
504# even without perlio.
505#
506# tmpfile is unsafe.
507#
508# strcat, strcpy, strncat, strncpy are unsafe.
509#
510# sprintf and vsprintf should not be used because
511# Perl has its own safer and more portable implementations.
512# (One exception: for certain floating point outputs
513# the native sprintf is still used in some platforms, see below.)
514#
515# atoi has unsafe and undefined failure modes, and is affected by locale.
516# Its cousins include atol and atoll.
517#
518# strtol and strtoul are affected by locale.
519# Cousins include strtoq.
520#
521# system should not be used, use pp_system or my_popen.
522#
523
524my %unexpected;
525
526for my $str (qw(system)) {
527    $unexpected{$str} = "d_$str";
528}
529
530for my $stdio (qw(gets fgets tmpfile sprintf vsprintf)) {
531    $unexpected{$stdio} = undef; # No Configure symbol for these.
532}
533for my $str (qw(strcat strcpy strncat strncpy)) {
534    $unexpected{$str} = undef; # No Configure symbol for these.
535}
536
537$unexpected{atoi} = undef; # No Configure symbol for atoi.
538$unexpected{atol} = undef; # No Configure symbol for atol.
539
540for my $str (qw(atoll strtol strtoul strtoq)) {
541    $unexpected{$str} = "d_$str";
542}
543
544for my $symbol (sort keys %unexpected) {
545    if (defined $unexpected{$symbol} && !$Config{$unexpected{$symbol}}) {
546      SKIP: {
547        skip("no $symbol");
548      }
549      next;
550    }
551    my @o = exists $symbols{undef}{$symbol} ?
552        sort keys %{ $symbols{undef}{$symbol} } : ();
553    # While sprintf() is bad in the general case,
554    # some platforms implement Gconvert via sprintf, in sv.o.
555    if ($symbol eq 'sprintf' &&
556        $Config{d_Gconvert} =~ /^sprintf/ &&
557        @o == 1 && $o[0] eq 'sv.o') {
558      SKIP: {
559        skip("uses sprintf for Gconvert in sv.o");
560      }
561    } else {
562        is(@o, 0, "uses no $symbol (@o)");
563    }
564}
565
566# Check that any text symbols named S_ are not exported.
567my $export_S_prefix = 0;
568for my $t (sort grep { /^S_/ } keys %{$symbols{text}}) {
569    for my $o (sort keys %{$symbols{text}{$t}}) {
570        if (exists $symbols{text}{$t}{$o}{T}) {
571            fail($t, "$t exported from $o");
572            $export_S_prefix++;
573        }
574    }
575}
576is($export_S_prefix, 0, "no S_ exports");
577
578if (defined $nm_err_tmp) {
579    if (open(my $nm_err_fh, $nm_err_tmp)) {
580        my $error;
581        while (<$nm_err_fh>) {
582            # OS X has weird error where nm warns about
583            # "no name list" but then outputs fine.
584            if ( $^O eq 'darwin' ) {
585                if (/nm: no name list/ || /^no symbols$/ ) {
586                    print "# $^O ignoring $nm output: $_";
587                    next;
588                }
589            }
590            warn "$0: Unexpected $nm error: $_";
591            $error++;
592        }
593        die "$0: Unexpected $nm errors\n" if $error;
594    } else {
595        warn "Failed to open '$nm_err_tmp': $!\n";
596    }
597}
598
599done_testing();
600