1#!/usr/bin/perl -w
2
3# Try opening libperl.a with nm, and verifying it has the kind of
4# symbols we expect, and no symbols we should avoid.
5#
6# Fail softly, expect things only on known platforms:
7# - linux, x86 only (ppc linux has odd symbol tables)
8# - darwin (OS X), both x86 and ppc
9# - freebsd
10# and on other platforms, and if things seem odd, just give up (skip_all).
11#
12# Debugging tip: nm output (this script's input) can be faked by
13# giving one command line argument for this script: it should be
14# either the filename to read, or "-" for STDIN.  You can also append
15# "@style" (where style is a supported nm style, like "gnu" or "darwin")
16# to this filename for "cross-parsing".
17#
18# Some terminology:
19# - "text" symbols are code
20# - "data" symbols are data (duh), with subdivisions:
21#   - "bss": (Block-Started-by-Symbol: originally from IBM assembler...),
22#     uninitialized data, which often even doesn't exist in the object
23#     file as such, only its size does, which is then created on demand
24#     by the loader
25#  - "const": initialized read-only data, like string literals
26#  - "common": uninitialized data unless initialized...
27#    (the full story is too long for here, see "man nm")
28#  - "data": initialized read-write data
29#    (somewhat confusingly below: "data data", but it makes code simpler)
30#  - "undefined": external symbol referred to by an object,
31#    most likely a text symbol.  Can be either a symbol defined by
32#    a Perl object file but referred to by other Perl object files,
33#    or a completely external symbol from libc, or other system libraries.
34
35BEGIN {
36    chdir 't' if -d 't';
37    @INC = '../lib';
38    require "./test.pl";
39}
40
41use strict;
42
43use Config;
44
45if ($Config{cc} =~ /g\+\+/) {
46    # XXX Could use c++filt, maybe.
47    skip_all "on g++";
48}
49
50my $libperl_a;
51
52for my $f (qw(../libperl.a libperl.a)) {
53  if (-f $f) {
54    $libperl_a = $f;
55    last;
56  }
57}
58
59unless (defined $libperl_a) {
60  skip_all "no libperl.a";
61}
62
63print "# \$^O = $^O\n";
64print "# \$Config{archname} = $Config{archname}\n";
65print "# \$Config{cc} = $Config{cc}\n";
66print "# libperl = $libperl_a\n";
67
68my $nm;
69my $nm_opt = '';
70my $nm_style;
71my $nm_fh;
72my $nm_err_tmp = "libperl$$";
73
74END {
75    # this is still executed when we skip_all above, avoid a warning
76    unlink $nm_err_tmp if $nm_err_tmp;
77}
78
79my $fake_input;
80my $fake_style;
81
82if (@ARGV == 1) {
83    $fake_input = shift @ARGV;
84    print "# Faking nm output from $fake_input\n";
85    if ($fake_input =~ s/\@(.+)$//) {
86        $fake_style = $1;
87        print "# Faking nm style from $fake_style\n";
88        if ($fake_style eq 'gnu' ||
89            $fake_style eq 'linux' ||
90            $fake_style eq 'freebsd') {
91            $nm_style = 'gnu'
92        } elsif ($fake_style eq 'darwin' || $fake_style eq 'osx') {
93            $nm_style = 'darwin'
94        } else {
95            die "$0: Unknown explicit nm style '$fake_style'\n";
96        }
97    }
98}
99
100unless (defined $nm_style) {
101    if ($^O eq 'linux') {
102        # The 'gnu' style could be equally well be called 'bsd' style,
103        # since the output format of the GNU binutils nm is really BSD.
104        $nm_style = 'gnu';
105    } elsif ($^O eq 'freebsd') {
106        $nm_style = 'gnu';
107    } elsif ($^O eq 'darwin') {
108        $nm_style = 'darwin';
109    }
110}
111
112if (defined $nm_style) {
113    if ($nm_style eq 'gnu') {
114        $nm = '/usr/bin/nm';
115    } elsif ($nm_style eq 'darwin') {
116        $nm = '/usr/bin/nm';
117        # With the -m option we get better information than the BSD-like
118        # default: with the default, a lot of symbols get dumped into 'S'
119        # or 's', for example one cannot tell the difference between const
120        # and non-const data symbols.
121        $nm_opt = '-m';
122    } else {
123        die "$0: Unexpected nm style '$nm_style'\n";
124    }
125}
126
127if ($^O eq 'linux' && $Config{archname} !~ /^(?:x|i6)86/) {
128    # For example in ppc most (but not all!) code symbols are placed
129    # in 'D' (data), not in ' T '.  We cannot work under such conditions.
130    skip_all "linux but archname $Config{archname} not x86*";
131}
132
133unless (defined $nm) {
134  skip_all "no nm";
135}
136
137unless (defined $nm_style) {
138  skip_all "no nm style";
139}
140
141print "# nm = $nm\n";
142print "# nm_style = $nm_style\n";
143print "# nm_opt = $nm_opt\n";
144
145unless (-x $nm) {
146    skip_all "no executable nm $nm";
147}
148
149if ($nm_style eq 'gnu' && !defined $fake_style) {
150    open(my $gnu_verify, "$nm --version|") or
151        skip_all "nm failed: $!";
152    my $gnu_verified;
153    while (<$gnu_verify>) {
154        if (/^GNU nm/) {
155            $gnu_verified = 1;
156            last;
157        }
158    }
159    unless ($gnu_verified) {
160        skip_all "no GNU nm";
161    }
162}
163
164if (defined $fake_input) {
165    if ($fake_input eq '-') {
166        open($nm_fh, "<&STDIN") or
167            skip_all "Duping STDIN failed: $!";
168    } else {
169        open($nm_fh, "<", $fake_input) or
170            skip_all "Opening '$fake_input' failed: $!";
171    }
172    undef $nm_err_tmp; # In this case there will be no nm errors.
173} else {
174    print qq{# command: "$nm $nm_opt $libperl_a 2>$nm_err_tmp |"\n};
175    open($nm_fh, "$nm $nm_opt $libperl_a 2>$nm_err_tmp |") or
176        skip_all "$nm $nm_opt $libperl_a failed: $!";
177}
178
179sub is_perlish_symbol {
180    $_[0] =~ /^(?:PL_|Perl|PerlIO)/;
181}
182
183# XXX Implement "internal test" for this script (option -t?)
184# to verify that the parsing does what it's intended to.
185
186sub nm_parse_gnu {
187    my $symbols = shift;
188    my $line = $_;
189    if (m{^(\w+\.o):$}) {
190        # object file name
191        $symbols->{obj}{$1}++;
192        $symbols->{o} = $1;
193        return;
194    } else {
195        die "$0: undefined current object: $line"
196            unless defined $symbols->{o};
197        # 64-bit systems have 16 hexdigits, 32-bit systems have 8.
198        if (s/^[0-9a-f]{8}(?:[0-9a-f]{8})? //) {
199            if (/^[Rr] (\w+)$/) {
200                # R: read only (const)
201                $symbols->{data}{const}{$1}{$symbols->{o}}++;
202            } elsif (/^r .+$/) {
203                # Skip local const (read only).
204            } elsif (/^([Tti]) (\w+)(\..+)?$/) {
205                $symbols->{text}{$2}{$symbols->{o}}{$1}++;
206            } elsif (/^C (\w+)$/) {
207                $symbols->{data}{common}{$1}{$symbols->{o}}++;
208            } elsif (/^[BbSs] (\w+)(\.\d+)?$/) {
209                # Bb: uninitialized data (bss)
210                # Ss: uninitialized data "for small objects"
211                $symbols->{data}{bss}{$1}{$symbols->{o}}++;
212            } elsif (/^D _LIB_VERSION$/) {
213                # Skip the _LIB_VERSION (not ours, probably libm)
214            } elsif (/^[DdGg] (\w+)$/) {
215                # Dd: initialized data
216                # Gg: initialized "for small objects"
217                $symbols->{data}{data}{$1}{$symbols->{o}}++;
218            } elsif (/^. \.?(\w+)$/) {
219                # Skip the unknown types.
220                print "# Unknown type: $line ($symbols->{o})\n";
221            }
222            return;
223        } elsif (/^ {8}(?: {8})? U _?(\w+)$/) {
224            my ($symbol) = $1;
225            return if is_perlish_symbol($symbol);
226            $symbols->{undef}{$symbol}{$symbols->{o}}++;
227            return;
228	}
229    }
230    print "# Unexpected nm output '$line' ($symbols->{o})\n";
231}
232
233sub nm_parse_darwin {
234    my $symbols = shift;
235    my $line = $_;
236    if (m{^(?:.+)?libperl\.a\((\w+\.o)\):$}) {
237        # object file name
238        $symbols->{obj}{$1}++;
239        $symbols->{o} = $1;
240        return;
241    } else {
242        die "$0: undefined current object: $line" unless defined $symbols->{o};
243        # 64-bit systems have 16 hexdigits, 32-bit systems have 8.
244        if (s/^[0-9a-f]{8}(?:[0-9a-f]{8})? //) {
245            # String literals can live in different sections
246            # depending on the compiler and os release, assumedly
247            # also linker flags.
248            if (/^\(__TEXT,__(?:const|(?:asan_)?cstring|literal\d+)\) (?:non-)?external _?(\w+)(\.\w+)?$/) {
249                my ($symbol, $suffix) = ($1, $2);
250                # Ignore function-local constants like
251                # _Perl_av_extend_guts.oom_array_extend
252                return if defined $suffix && /__TEXT,__const/;
253                # Ignore the cstring unnamed strings.
254                return if $symbol =~ /^L\.str\d+$/;
255                $symbols->{data}{const}{$symbol}{$symbols->{o}}++;
256            } elsif (/^\(__TEXT,__text\) ((?:non-)?external) _(\w+)$/) {
257                my ($exp, $sym) = ($1, $2);
258                $symbols->{text}{$sym}{$symbols->{o}}{$exp =~ /^non/ ? 't' : 'T'}++;
259            } elsif (/^\(__DATA,__\w*?(const|data|bss|common)\w*\) (?:non-)?external _?(\w+)(\.\w+)?$/) {
260                my ($dtype, $symbol, $suffix) = ($1, $2, $3);
261                # Ignore function-local constants like
262                # _Perl_pp_gmtime.dayname
263                return if defined $suffix;
264                $symbols->{data}{$dtype}{$symbol}{$symbols->{o}}++;
265            } elsif (/^\(__DATA,__const\) non-external _\.memset_pattern\d*$/) {
266                # Skip this, whatever it is (some inlined leakage from
267                # darwin libc?)
268            } elsif (/^\(__TEXT,__eh_frame/) {
269                # Skip the eh_frame (exception handling) symbols.
270                return;
271            } elsif (/^\(__\w+,__\w+\) /) {
272                # Skip the unknown types.
273                print "# Unknown type: $line ($symbols->{o})\n";
274            }
275            return;
276        } elsif (/^ {8}(?: {8})? \(undefined(?: \[lazy bound\])?\) external _?(.+)/) {
277            # darwin/ppc marks most undefined text symbols
278            # as "[lazy bound]".
279            my ($symbol) = $1 =~ s/\$UNIX2003\z//r;
280            return if is_perlish_symbol($symbol);
281            $symbols->{undef}{$symbol}{$symbols->{o}}++;
282            return;
283        }
284    }
285    print "# Unexpected nm output '$line' ($symbols->{o})\n";
286}
287
288my $nm_parse;
289
290if ($nm_style eq 'gnu') {
291    $nm_parse = \&nm_parse_gnu;
292} elsif ($nm_style eq 'darwin') {
293    $nm_parse = \&nm_parse_darwin;
294}
295
296unless (defined $nm_parse) {
297    skip_all "no nm parser ($nm_style $nm_style, \$^O $^O)";
298}
299
300my %symbols;
301
302while (<$nm_fh>) {
303    next if /^$/;
304    chomp;
305    $nm_parse->(\%symbols);
306}
307
308# use Data::Dumper; print Dumper(\%symbols);
309
310# Something went awfully wrong.  Wrong nm?  Wrong options?
311unless (keys %symbols) {
312    skip_all "no symbols\n";
313}
314unless (exists $symbols{text}) {
315    skip_all "no text symbols\n";
316}
317
318# These should always be true for everyone.
319
320ok($symbols{obj}{'pp.o'}, "has object pp.o");
321ok($symbols{text}{'Perl_peep'}, "has text Perl_peep");
322ok($symbols{text}{'Perl_pp_uc'}{'pp.o'}, "has text Perl_pp_uc in pp.o");
323ok(exists $symbols{data}{const}, "has data const symbols");
324ok($symbols{data}{const}{PL_no_mem}{'globals.o'}, "has PL_no_mem");
325
326my $nocommon = $Config{ccflags} =~ /-fno-common/ ? 1 : 0;
327
328print "# nocommon = $nocommon\n";
329
330my %data_symbols;
331
332for my $dtype (sort keys %{$symbols{data}}) {
333    for my $symbol (sort keys %{$symbols{data}{$dtype}}) {
334        $data_symbols{$symbol}++;
335    }
336}
337
338if ( !$symbols{data}{common} ) {
339    # This is likely because Perl was compiled with
340    # -Accflags="-fno-common"
341    $symbols{data}{common} = $symbols{data}{bss};
342}
343
344ok($symbols{data}{common}{PL_hash_seed_w}{'globals.o'}, "has PL_hash_seed_w");
345ok($symbols{data}{data}{PL_ppaddr}{'globals.o'}, "has PL_ppaddr");
346
347# See the comments in the beginning for what "undefined symbols"
348# really means.  We *should* have many of those, that is a good thing.
349ok(keys %{$symbols{undef}}, "has undefined symbols");
350
351# There are certain symbols we expect to see.
352
353# chmod, socket, getenv, sigaction, exp, time are system/library
354# calls that should each see at least one use. exp can be expl
355# if so configured.
356my %expected = (
357    chmod  => undef, # There is no Configure symbol for chmod.
358    socket => 'd_socket',
359    getenv => undef, # There is no Configure symbol for getenv,
360    sigaction => 'd_sigaction',
361    time   => 'd_time',
362    );
363
364if ($Config{uselongdouble} && $Config{longdblsize} > $Config{doublesize}) {
365    $expected{expl} = undef; # There is no Configure symbol for expl.
366} elsif ($Config{usequadmath}) {
367    $expected{expq} = undef; # There is no Configure symbol for expq.
368} else {
369    $expected{exp} = undef; # There is no Configure symbol for exp.
370}
371
372# DynaLoader will use dlopen, unless we are building static,
373# and it is used in the platforms we are supporting in this test.
374if ($Config{usedl} ) {
375    $expected{dlopen} = 'd_dlopen';
376}
377
378for my $symbol (sort keys %expected) {
379    if (defined $expected{$symbol} && !$Config{$expected{$symbol}}) {
380      SKIP: {
381        skip("no $symbol");
382      }
383      next;
384    }
385    my @o = exists $symbols{undef}{$symbol} ?
386        sort keys %{ $symbols{undef}{$symbol} } : ();
387    ok(@o, "uses $symbol (@o)");
388}
389
390# There are certain symbols we expect NOT to see.
391#
392# gets is horribly unsafe.
393#
394# fgets should not be used (Perl has its own API, sv_gets),
395# even without perlio.
396#
397# tmpfile is unsafe.
398#
399# strcat, strcpy, strncat, strncpy are unsafe.
400#
401# sprintf and vsprintf should not be used because
402# Perl has its own safer and more portable implementations.
403# (One exception: for certain floating point outputs
404# the native sprintf is still used in some platforms, see below.)
405#
406# atoi has unsafe and undefined failure modes, and is affected by locale.
407# Its cousins include atol and atoll.
408#
409# strtol and strtoul are affected by locale.
410# Cousins include strtoq.
411#
412# system should not be used, use pp_system or my_popen.
413#
414
415my %unexpected;
416
417for my $str (qw(system)) {
418    $unexpected{$str} = "d_$str";
419}
420
421for my $stdio (qw(gets fgets tmpfile sprintf vsprintf)) {
422    $unexpected{$stdio} = undef; # No Configure symbol for these.
423}
424for my $str (qw(strcat strcpy strncat strncpy)) {
425    $unexpected{$str} = undef; # No Configure symbol for these.
426}
427
428$unexpected{atoi} = undef; # No Configure symbol for atoi.
429$unexpected{atol} = undef; # No Configure symbol for atol.
430
431for my $str (qw(atoll strtol strtoul strtoq)) {
432    $unexpected{$str} = "d_$str";
433}
434
435for my $symbol (sort keys %unexpected) {
436    if (defined $unexpected{$symbol} && !$Config{$unexpected{$symbol}}) {
437      SKIP: {
438        skip("no $symbol");
439      }
440      next;
441    }
442    my @o = exists $symbols{undef}{$symbol} ?
443        sort keys %{ $symbols{undef}{$symbol} } : ();
444    # While sprintf() is bad in the general case,
445    # some platforms implement Gconvert via sprintf, in sv.o.
446    if ($symbol eq 'sprintf' &&
447        $Config{d_Gconvert} =~ /^sprintf/ &&
448        @o == 1 && $o[0] eq 'sv.o') {
449      SKIP: {
450        skip("uses sprintf for Gconvert in sv.o");
451      }
452    } else {
453        is(@o, 0, "uses no $symbol (@o)");
454    }
455}
456
457# Check that any text symbols named S_ are not exported.
458my $export_S_prefix = 0;
459for my $t (sort grep { /^S_/ } keys %{$symbols{text}}) {
460    for my $o (sort keys %{$symbols{text}{$t}}) {
461        if (exists $symbols{text}{$t}{$o}{T}) {
462            fail($t, "$t exported from $o");
463            $export_S_prefix++;
464        }
465    }
466}
467is($export_S_prefix, 0, "no S_ exports");
468
469if (defined $nm_err_tmp) {
470    if (open(my $nm_err_fh, $nm_err_tmp)) {
471        my $error;
472        while (<$nm_err_fh>) {
473            # OS X has weird error where nm warns about
474            # "no name list" but then outputs fine.
475            # llvm-nm may also complain about 'no symbols'. In some
476            # versions this is exactly the string "no symbols\n" but in later
477            # versions becomes a string followed by ": no symbols\n". For this
478            # test it is typically "../libperl.a:perlapi.o: no symbols\n"
479            if ( $^O eq 'darwin' ) {
480                if (/nm: no name list/ || /^(.*: )?no symbols$/ ) {
481                    print "# $^O ignoring $nm output: $_";
482                    next;
483                }
484            }
485            warn "$0: Unexpected $nm error: $_";
486            $error++;
487        }
488        die "$0: Unexpected $nm errors\n" if $error;
489    } else {
490        warn "Failed to open '$nm_err_tmp': $!\n";
491    }
492}
493
494done_testing();
495