xref: /openbsd/gnu/usr.bin/perl/t/porting/libperl.t (revision 3d61058a)
1#!/usr/bin/perl -w
2
3# Try opening libperl.a with nm, and verifying it has the kind of
4# symbols we expect, and no symbols we should avoid.
5#
6# Fail softly, expect things only on known platforms:
7# - linux, x86 only (ppc linux has odd symbol tables)
8# - darwin (OS X), both x86 and ppc
9# - freebsd
10# and on other platforms, and if things seem odd, just give up (skip_all).
11#
12# Symbol types for LTO builds don't seem to match their final section, so
13# skip on LTO builds too.
14#
15# Debugging tip: nm output (this script's input) can be faked by
16# giving one command line argument for this script: it should be
17# either the filename to read, or "-" for STDIN.  You can also append
18# "@style" (where style is a supported nm style, like "gnu" or "darwin")
19# to this filename for "cross-parsing".
20#
21# Some terminology:
22# - "text" symbols are code
23# - "data" symbols are data (duh), with subdivisions:
24#   - "bss": (Block-Started-by-Symbol: originally from IBM assembler...),
25#     uninitialized data, which often even doesn't exist in the object
26#     file as such, only its size does, which is then created on demand
27#     by the loader
28#  - "const": initialized read-only data, like string literals
29#  - "common": uninitialized data unless initialized...
30#    (the full story is too long for here, see "man nm")
31#  - "data": initialized read-write data
32#    (somewhat confusingly below: "data data", but it makes code simpler)
33#  - "undefined": external symbol referred to by an object,
34#    most likely a text symbol.  Can be either a symbol defined by
35#    a Perl object file but referred to by other Perl object files,
36#    or a completely external symbol from libc, or other system libraries.
37
38BEGIN {
39    chdir 't' if -d 't';
40    @INC = '../lib';
41    require "./test.pl";
42}
43
44use strict;
45
46use Config;
47
48# maint (and tarballs of maint releases) may not have updates here to
49# deal with changes to nm's output in some toolchains
50$^V =~ /^v\d+\.\d*[13579]\./
51  or skip_all "on maint";
52
53if ($Config{cc} =~ /g\+\+/) {
54    # XXX Could use c++filt, maybe.
55    skip_all "on g++";
56}
57
58# ccname is gcc for both gcc and clang
59if ($Config{ccname} eq "gcc" && $Config{ccflags} =~ /-flto\b/) {
60    # If we compile with gcc nm marks PL_no_mem as "D" (normal data) rather than a R (read only)
61    # but the symbol still ends up in the .rodata section of the image on linking.
62    # If we compile with clang 14, nm marks PL_no_mem as "T" (text, aka code) rather than R
63    # but the symbol still ends up in the .rodata section on linking.
64    skip_all "LTO libperl.a flags don't match the final linker sections";
65}
66
67my $libperl_a;
68
69for my $f (qw(../libperl.a libperl.a)) {
70  if (-f $f) {
71    $libperl_a = $f;
72    last;
73  }
74}
75
76unless (defined $libperl_a) {
77  skip_all "no libperl.a";
78}
79
80print "# \$^O = $^O\n";
81print "# \$Config{archname} = $Config{archname}\n";
82print "# \$Config{cc} = $Config{cc}\n";
83print "# libperl = $libperl_a\n";
84
85my $nm;
86my $nm_opt = '';
87my $nm_style;
88my $nm_fh;
89my $nm_err_tmp = "libperl$$";
90
91END {
92    # this is still executed when we skip_all above, avoid a warning
93    unlink $nm_err_tmp if $nm_err_tmp;
94}
95
96my $fake_input;
97my $fake_style;
98
99if (@ARGV == 1) {
100    $fake_input = shift @ARGV;
101    print "# Faking nm output from $fake_input\n";
102    if ($fake_input =~ s/\@(.+)$//) {
103        $fake_style = $1;
104        print "# Faking nm style from $fake_style\n";
105        if ($fake_style eq 'gnu' ||
106            $fake_style eq 'linux' ||
107            $fake_style eq 'freebsd') {
108            $nm_style = 'gnu'
109        } elsif ($fake_style eq 'darwin' || $fake_style eq 'osx') {
110            $nm_style = 'darwin'
111        } else {
112            die "$0: Unknown explicit nm style '$fake_style'\n";
113        }
114    }
115}
116
117unless (defined $nm_style) {
118    if ($^O eq 'linux') {
119        # The 'gnu' style could be equally well be called 'bsd' style,
120        # since the output format of the GNU binutils nm is really BSD.
121        $nm_style = 'gnu';
122    } elsif ($^O eq 'freebsd') {
123        $nm_style = 'gnu';
124    } elsif ($^O eq 'darwin') {
125        $nm_style = 'darwin';
126    }
127}
128
129if (defined $nm_style) {
130    if ($nm_style eq 'gnu') {
131        $nm = '/usr/bin/nm';
132    } elsif ($nm_style eq 'darwin') {
133        $nm = '/usr/bin/nm';
134        # With the -m option we get better information than the BSD-like
135        # default: with the default, a lot of symbols get dumped into 'S'
136        # or 's', for example one cannot tell the difference between const
137        # and non-const data symbols.
138        $nm_opt = '-m';
139    } else {
140        die "$0: Unexpected nm style '$nm_style'\n";
141    }
142}
143
144if ($^O eq 'linux' && $Config{archname} !~ /^(?:x|i6)86/) {
145    # For example in ppc most (but not all!) code symbols are placed
146    # in 'D' (data), not in ' T '.  We cannot work under such conditions.
147    skip_all "linux but archname $Config{archname} not x86*";
148}
149
150unless (defined $nm) {
151  skip_all "no nm";
152}
153
154unless (defined $nm_style) {
155  skip_all "no nm style";
156}
157
158print "# nm = $nm\n";
159print "# nm_style = $nm_style\n";
160print "# nm_opt = $nm_opt\n";
161
162unless (-x $nm) {
163    skip_all "no executable nm $nm";
164}
165
166if ($nm_style eq 'gnu' && !defined $fake_style) {
167    open(my $gnu_verify, "$nm --version|") or
168        skip_all "nm failed: $!";
169    my $gnu_verified;
170    while (<$gnu_verify>) {
171        if (/^GNU nm/) {
172            $gnu_verified = 1;
173            last;
174        }
175    }
176    unless ($gnu_verified) {
177        skip_all "no GNU nm";
178    }
179}
180
181if (defined $fake_input) {
182    if ($fake_input eq '-') {
183        open($nm_fh, "<&STDIN") or
184            skip_all "Duping STDIN failed: $!";
185    } else {
186        open($nm_fh, "<", $fake_input) or
187            skip_all "Opening '$fake_input' failed: $!";
188    }
189    undef $nm_err_tmp; # In this case there will be no nm errors.
190} else {
191    print qq{# command: "$nm $nm_opt $libperl_a 2>$nm_err_tmp |"\n};
192    open($nm_fh, "$nm $nm_opt $libperl_a 2>$nm_err_tmp |") or
193        skip_all "$nm $nm_opt $libperl_a failed: $!";
194}
195
196sub is_perlish_symbol {
197    $_[0] =~ /^(?:PL_|Perl|PerlIO)/;
198}
199
200# XXX Implement "internal test" for this script (option -t?)
201# to verify that the parsing does what it's intended to.
202
203sub nm_parse_gnu {
204    my $symbols = shift;
205    my $line = $_;
206    if (m{^(\w+\.o):$}) {
207        # object file name
208        $symbols->{obj}{$1}++;
209        $symbols->{o} = $1;
210        return;
211    } else {
212        die "$0: undefined current object: $line"
213            unless defined $symbols->{o};
214        # 64-bit systems have 16 hexdigits, 32-bit systems have 8.
215        if (s/^[0-9a-f]{8}(?:[0-9a-f]{8})? //) {
216            if (/^[Rr] (\w+)$/) {
217                # R: read only (const)
218                $symbols->{data}{const}{$1}{$symbols->{o}}++;
219            } elsif (/^r .+$/) {
220                # Skip local const (read only).
221            } elsif (/^([Tti]) (\w+)(\..+)?$/) {
222                $symbols->{text}{$2}{$symbols->{o}}{$1}++;
223            } elsif (/^C (\w+)$/) {
224                $symbols->{data}{common}{$1}{$symbols->{o}}++;
225            } elsif (/^[BbSs] (\w+)(\.\d+)?$/) {
226                # Bb: uninitialized data (bss)
227                # Ss: uninitialized data "for small objects"
228                $symbols->{data}{bss}{$1}{$symbols->{o}}++;
229            } elsif (/^D _LIB_VERSION$/) {
230                # Skip the _LIB_VERSION (not ours, probably libm)
231            } elsif (/^[DdGg] (\w+)$/) {
232                # Dd: initialized data
233                # Gg: initialized "for small objects"
234                $symbols->{data}{data}{$1}{$symbols->{o}}++;
235            } elsif (/^. \.?(\w+)$/) {
236                # Skip the unknown types.
237                print "# Unknown type: $line ($symbols->{o})\n";
238            }
239            return;
240        } elsif (/^ {8}(?: {8})? U _?(\w+)$/) {
241            my ($symbol) = $1;
242            return if is_perlish_symbol($symbol);
243            $symbols->{undef}{$symbol}{$symbols->{o}}++;
244            return;
245	}
246    }
247    print "# Unexpected nm output '$line' ($symbols->{o})\n";
248}
249
250sub nm_parse_darwin {
251    my $symbols = shift;
252    my $line = $_;
253    if (m{^(?:.+)?libperl\.a\((\w+\.o)\):$} ||
254        m{^(\w+\.o):$}) {
255        # object file name
256        $symbols->{obj}{$1}++;
257        $symbols->{o} = $1;
258        return;
259    } else {
260        die "$0: undefined current object: $line" unless defined $symbols->{o};
261        # 64-bit systems have 16 hexdigits, 32-bit systems have 8.
262        if (s/^[0-9a-f]{8}(?:[0-9a-f]{8})? //) {
263            # String literals can live in different sections
264            # depending on the compiler and os release, assumedly
265            # also linker flags.
266            if (/^\(__TEXT,__(?:const|(?:asan_)?cstring|literal\d+)\) (?:non-)?external _?(\w+)(\.\w+){0,2}$/) {
267                my ($symbol, $suffix) = ($1, $2);
268                # Ignore function-local constants like
269                # _Perl_av_extend_guts.oom_array_extend
270                return if defined $suffix && /__TEXT,__const/;
271                # Ignore the cstring unnamed strings.
272                return if $symbol =~ /^L\.str\d+$/;
273                $symbols->{data}{const}{$symbol}{$symbols->{o}}++;
274            } elsif (/^\(__TEXT,__text\) ((?:non-|private )?external) \[cold func\] _(\w+\.cold\.[1-9][0-9]*)$/) {
275                # for N_COLD_FUNC symbols in MachO
276                # eg. 0000000000022c60 (__TEXT,__text) non-external [cold func] _Perl_lex_next_chunk.cold.1 (toke.o)
277            } elsif (/^\(__TEXT,__text\) ((?:non-|private )?external) _(\w+)$/) {
278                my ($exp, $sym) = ($1, $2);
279                $symbols->{text}{$sym}{$symbols->{o}}{$exp =~ /^non/ ? 't' : 'T'}++;
280            } elsif (/^\(__DATA,__\w*?(const|data|bss|common)\w*\) (?:non-)?external _?(\w+)(\.\w+){0,3}$/) {
281                my ($dtype, $symbol, $suffix) = ($1, $2, $3);
282                # Ignore function-local constants like
283                # _Perl_pp_gmtime.dayname
284                return if defined $suffix;
285                $symbols->{data}{$dtype}{$symbol}{$symbols->{o}}++;
286            } elsif (/^\(__DATA,__const\) non-external _\.memset_pattern\d*$/) {
287                # Skip this, whatever it is (some inlined leakage from
288                # darwin libc?)
289            } elsif (/^\(__TEXT,__eh_frame/) {
290                # Skip the eh_frame (exception handling) symbols.
291                return;
292            } elsif (/^\(__\w+,__\w+\) /) {
293                # Skip the unknown types.
294                print "# Unknown type: $line ($symbols->{o})\n";
295            }
296            return;
297        } elsif (/^ {8}(?: {8})? \(undefined(?: \[lazy bound\])?\) external _?(.+)/) {
298            # darwin/ppc marks most undefined text symbols
299            # as "[lazy bound]".
300            my ($symbol) = $1 =~ s/\$UNIX2003\z//r;
301            return if is_perlish_symbol($symbol);
302            $symbols->{undef}{$symbol}{$symbols->{o}}++;
303            return;
304        }
305    }
306    print "# Unexpected nm output '$line' ($symbols->{o})\n";
307}
308
309my $nm_parse;
310
311if ($nm_style eq 'gnu') {
312    $nm_parse = \&nm_parse_gnu;
313} elsif ($nm_style eq 'darwin') {
314    $nm_parse = \&nm_parse_darwin;
315}
316
317unless (defined $nm_parse) {
318    skip_all "no nm parser ($nm_style $nm_style, \$^O $^O)";
319}
320
321my %symbols;
322
323while (<$nm_fh>) {
324    next if /^$/;
325    chomp;
326    $nm_parse->(\%symbols);
327}
328
329# use Data::Dumper; print Dumper(\%symbols);
330
331# Something went awfully wrong.  Wrong nm?  Wrong options?
332unless (keys %symbols) {
333    skip_all "no symbols\n";
334}
335unless (exists $symbols{text}) {
336    skip_all "no text symbols\n";
337}
338
339# These should always be true for everyone.
340
341ok($symbols{obj}{'util.o'}, "has object util.o");
342ok($symbols{text}{'Perl_croak'}{'util.o'}, "has text Perl_croak in util.o");
343ok(exists $symbols{data}{const}, "has data const symbols");
344ok($symbols{data}{const}{PL_no_modify}{'globals.o'}, "has PL_no_modify");
345
346my $nocommon = $Config{ccflags} =~ /-fno-common/ ? 1 : 0;
347
348print "# nocommon = $nocommon\n";
349
350my %data_symbols;
351
352for my $dtype (sort keys %{$symbols{data}}) {
353    for my $symbol (sort keys %{$symbols{data}{$dtype}}) {
354        $data_symbols{$symbol}++;
355    }
356}
357
358if ( !$symbols{data}{common} ) {
359    # This is likely because Perl was compiled with
360    # -Accflags="-fno-common"
361    $symbols{data}{common} = $symbols{data}{bss};
362}
363
364ok($symbols{data}{common}{PL_hash_seed_w}{'globals.o'}, "has PL_hash_seed_w");
365ok($symbols{data}{data}{PL_ppaddr}{'globals.o'}, "has PL_ppaddr");
366
367# See the comments in the beginning for what "undefined symbols"
368# really means.  We *should* have many of those, that is a good thing.
369ok(keys %{$symbols{undef}}, "has undefined symbols");
370
371# There are certain symbols we expect to see.
372
373# chmod, socket, getenv, sigaction, exp, time are system/library
374# calls that should each see at least one use. exp can be expl
375# if so configured.
376my %expected = (
377    chmod  => undef, # There is no Configure symbol for chmod.
378    socket => 'd_socket',
379    getenv => undef, # There is no Configure symbol for getenv,
380    sigaction => 'd_sigaction',
381    time   => 'd_time',
382    );
383
384if ($Config{uselongdouble} && $Config{longdblsize} > $Config{doublesize}) {
385    $expected{expl} = undef; # There is no Configure symbol for expl.
386} elsif ($Config{usequadmath}) {
387    $expected{expq} = undef; # There is no Configure symbol for expq.
388} else {
389    $expected{exp} = undef; # There is no Configure symbol for exp.
390}
391
392# DynaLoader will use dlopen, unless we are building static,
393# and it is used in the platforms we are supporting in this test.
394if ($Config{usedl} ) {
395    $expected{dlopen} = 'd_dlopen';
396}
397
398for my $symbol (sort keys %expected) {
399    if (defined $expected{$symbol} && !$Config{$expected{$symbol}}) {
400      SKIP: {
401        skip("no $symbol");
402      }
403      next;
404    }
405    my @o = exists $symbols{undef}{$symbol} ?
406        sort keys %{ $symbols{undef}{$symbol} } : ();
407    ok(@o, "uses $symbol (@o)");
408}
409
410# There are certain symbols we expect NOT to see.
411#
412# gets is horribly unsafe.
413#
414# fgets should not be used (Perl has its own API, sv_gets),
415# even without perlio.
416#
417# tmpfile is unsafe.
418#
419# strcat, strcpy, strncat, strncpy are unsafe.
420#
421# sprintf and vsprintf should not be used because
422# Perl has its own safer and more portable implementations.
423# (One exception: for certain floating point outputs
424# the native sprintf is still used in some platforms, see below.)
425#
426# atoi has unsafe and undefined failure modes, and is affected by locale.
427# Its cousins include atol and atoll.
428#
429# strtol and strtoul are affected by locale.
430# Cousins include strtoq.
431#
432# system should not be used, use pp_system or my_popen.
433#
434
435my %unexpected;
436
437for my $str (qw(system)) {
438    $unexpected{$str} = "d_$str";
439}
440
441for my $stdio (qw(gets fgets tmpfile sprintf vsprintf)) {
442    $unexpected{$stdio} = undef; # No Configure symbol for these.
443}
444for my $str (qw(strcat strcpy strncat strncpy)) {
445    $unexpected{$str} = undef; # No Configure symbol for these.
446}
447
448$unexpected{atoi} = undef; # No Configure symbol for atoi.
449$unexpected{atol} = undef; # No Configure symbol for atol.
450
451for my $str (qw(atoll strtol strtoul strtoq)) {
452    $unexpected{$str} = "d_$str";
453}
454
455for my $symbol (sort keys %unexpected) {
456    if (defined $unexpected{$symbol} && !$Config{$unexpected{$symbol}}) {
457      SKIP: {
458        skip("no $symbol");
459      }
460      next;
461    }
462    my @o = exists $symbols{undef}{$symbol} ?
463        sort keys %{ $symbols{undef}{$symbol} } : ();
464    # While sprintf() is bad in the general case,
465    # some platforms implement Gconvert via sprintf, in sv.o.
466    if ($symbol eq 'sprintf' &&
467        $Config{d_Gconvert} =~ /^sprintf/ &&
468        @o == 1 && $o[0] eq 'sv.o') {
469      SKIP: {
470        skip("uses sprintf for Gconvert in sv.o");
471      }
472    } else {
473        is(@o, 0, "uses no $symbol (@o)");
474    }
475}
476
477# Check that any text symbols named S_ are not exported.
478my $export_S_prefix = 0;
479for my $t (sort grep { /^S_/ } keys %{$symbols{text}}) {
480    for my $o (sort keys %{$symbols{text}{$t}}) {
481        if (exists $symbols{text}{$t}{$o}{T}) {
482            fail($t, "$t exported from $o");
483            $export_S_prefix++;
484        }
485    }
486}
487is($export_S_prefix, 0, "no S_ exports");
488
489if (defined $nm_err_tmp) {
490    if (open(my $nm_err_fh, $nm_err_tmp)) {
491        my $error;
492        while (<$nm_err_fh>) {
493            # OS X has weird error where nm warns about
494            # "no name list" but then outputs fine.
495            # llvm-nm may also complain about 'no symbols'. In some
496            # versions this is exactly the string "no symbols\n" but in later
497            # versions becomes a string followed by ": no symbols\n". For this
498            # test it is typically "../libperl.a:perlapi.o: no symbols\n"
499            if ( $^O eq 'darwin' ) {
500                if (/nm: no name list/ || /^(.*: )?no symbols$/ ) {
501                    print "# $^O ignoring $nm output: $_";
502                    next;
503                }
504            }
505            warn "$0: Unexpected $nm error: $_";
506            $error++;
507        }
508        die "$0: Unexpected $nm errors\n" if $error;
509    } else {
510        warn "Failed to open '$nm_err_tmp': $!\n";
511    }
512}
513
514done_testing();
515