1################################################################################
2#
3#  ppptools.pl -- various utility functions
4#
5#  WARNING: This will be called by old perls.  You can't use modern constructs
6#  in it.
7#
8################################################################################
9#
10#  Version 3.x, Copyright (C) 2004-2013, Marcus Holland-Moritz.
11#  Version 2.x, Copyright (C) 2001, Paul Marquess.
12#  Version 1.x, Copyright (C) 1999, Kenneth Albanowski.
13#
14#  This program is free software; you can redistribute it and/or
15#  modify it under the same terms as Perl itself.
16#
17################################################################################
18
19require './parts/inc/inctools';
20
21sub cat_file
22{
23  eval { require File::Spec };
24  return $@ ? join('/', @_) : File::Spec->catfile(@_);
25}
26
27sub all_files_in_dir
28{
29  my $dir = shift;
30  local *DIR;
31
32  opendir DIR, $dir or die "cannot open directory $dir: $!\n";
33  my @files = grep { !-d && !/^\./ } readdir DIR;  # no dirs or hidden files
34  closedir DIR;
35
36  return map { cat_file($dir, $_) } sort @files;
37}
38
39sub parse_todo
40{
41  # Creates a hash with the keys being all symbols found in all the files in
42  # the input directory (default 'parts/todo'), and the values being each a
43  # subhash like so:
44  #     'utf8_hop_forward' => {
45  #                               'code' => 'U',
46  #                               'version' => '5.025007'
47  #                           },
48  #
49  # The input line that generated that was this:
50  #
51  #     utf8_hop_forward               # U
52
53  my $dir = shift || 'parts/todo';
54  local *TODO;
55  my %todo;
56  my $todo;
57
58  for $todo (all_files_in_dir($dir)) {
59    open TODO, $todo or die "cannot open $todo: $!\n";
60    my $version = <TODO>;
61    chomp $version;
62    while (<TODO>) {
63      chomp;
64      s/#(?: (\w)\b)?.*//;  # 'code' is optional
65      my $code = $1;
66      s/^\s+//; s/\s+$//;
67      /^\s*$/ and next;
68      /^\w+$/ or die "invalid identifier: $_\n";
69      exists $todo{$_} and die "duplicate identifier: $_ ($todo{$_} <=> $version)\n";
70      $todo{$_}{'version'} = $version;
71      $todo{$_}{'code'} = $code if $code;
72    }
73    close TODO;
74  }
75
76  return \%todo;
77}
78
79sub expand_version
80{
81  my($op, $ver) = @_;
82  my($r, $v, $s) = parse_version($ver);
83  $r == 5 or die "only Perl revision 5 is supported\n";
84  my $bcdver = sprintf "0x%d%03d%03d", $r, $v, $s;
85  return "(PERL_BCDVERSION $op $bcdver)";
86}
87
88sub parse_partspec
89{
90  my $file = shift;
91  my $section = 'implementation';
92
93  my $vsec = join '|', qw( provides dontwarn implementation
94                           xsubs xsinit xsmisc xshead xsboot tests );
95  my(%data, %options);
96  local *F;
97
98  open F, $file or die "$file: $!\n";
99  while (<F>) {
100    /[ \t]+$/ and warn "$file:$.: warning: trailing whitespace\n";
101    if ($section eq 'implementation') {
102      m!//! && !m!(?:=~|s/).*//! && !m!(?:ht|f)tp(?:s)://!
103          and warn "$file:$.: warning: potential C++ comment\n";
104    }
105
106    /^##/ and next;
107
108    if (/^=($vsec)(?:\s+(.*))?/) {
109      $section = $1;
110      if (defined $2) {
111        my $opt = $2;
112        $options{$section} = eval "{ $opt }";
113        $@ and die "$file:$.: invalid options ($opt) in section $section: $@\n";
114      }
115      next;
116    }
117    push @{$data{$section}}, $_;
118  }
119  close F;
120
121  for (keys %data) {
122    my @v = @{$data{$_}};
123    shift @v while @v && $v[0]  =~ /^\s*$/;
124    pop   @v while @v && $v[-1] =~ /^\s*$/;
125    $data{$_} = join '', @v;
126  }
127
128  if (! exists $data{provides}) {
129    if ($file =~ /inctools$/) { # This file is special, it doesn't 'provide'
130                                # any API, but has subs to use internally
131      $data{provides} = "";
132    }
133    else {
134      $data{provides} = ($file =~ /(\w+)\.?$/)[0];
135    }
136  }
137  $data{provides} = [$data{provides} =~ /(\S+)/g];
138
139  if (exists $data{dontwarn}) {
140    $data{dontwarn} = [$data{dontwarn} =~ /(\S+)/g];
141  }
142
143  my @prov;
144  my %proto;
145
146  if (exists $data{tests} && (!exists $data{implementation} || $data{implementation} !~ /\S/)) {
147    $data{implementation} = '';
148  }
149  else {
150    $data{implementation} =~ /\S/ or die "Empty implementation in $file\n";
151
152    my $p;
153
154    for $p (@{$data{provides}}) {
155      if ($p =~ m#^/.*/\w*$#) {
156        my @tmp = eval "\$data{implementation} =~ ${p}gm";
157        $@ and die "invalid regex $p in $file\n";
158        @tmp or warn "no matches for regex $p in $file\n";
159        push @prov, do { my %h; grep !$h{$_}++, @tmp };
160      }
161      elsif ($p eq '__UNDEFINED__') {
162        my @tmp = $data{implementation} =~ /^\s*__UNDEFINED__[^\r\n\S]+(\w+)/gm;
163        @tmp or warn "no __UNDEFINED__ macros in $file\n";
164        push @prov, @tmp;
165      }
166      else {
167        push @prov, $p;
168      }
169    }
170
171    for (@prov) {
172      if ($data{implementation} !~ /\b\Q$_\E\b/) {
173        warn "$file claims to provide $_, but doesn't seem to do so\n";
174        next;
175      }
176
177      # scan for prototypes
178      my($proto) = $data{implementation} =~ /
179                   ( ^ (?:[\w*]|[^\S\r\n])+
180                       [\r\n]*?
181                     ^ \b$_\b \s*
182                       \( [^{]* \)
183                   )
184                       \s* \{
185                   /xm or next;
186
187      $proto =~ s/^\s+//;
188      $proto =~ s/\s+$//;
189      $proto =~ s/\s+/ /g;
190
191      exists $proto{$_} and warn "$file: duplicate prototype for $_\n";
192      $proto{$_} = $proto;
193    }
194  }
195
196  for $section (qw( implementation xsubs xsinit xsmisc xshead xsboot )) {
197    if (exists $data{$section}) {
198      $data{$section} =~ s/\{\s*version\s*(<|>|==|!=|>=|<=)\s*([\d._]+)\s*\}/expand_version($1, $2)/gei;
199    }
200  }
201
202  $data{provides}   = \@prov;
203  $data{prototypes} = \%proto;
204  $data{OPTIONS}    = \%options;
205
206  my %prov     = map { ($_ => 1) } @prov;
207  my %dontwarn = exists $data{dontwarn} ? map { ($_ => 1) } @{$data{dontwarn}} : ();
208  my @maybeprov = do { my %h;
209                       grep {
210                         my($nop) = /^Perl_(.*)/;
211                         not exists $prov{$_}                         ||
212                             exists $dontwarn{$_}                     ||
213                             /^D_PPP_/                                ||
214                             (defined $nop && exists $prov{$nop}    ) ||
215                             (defined $nop && exists $dontwarn{$nop}) ||
216                             $h{$_}++;
217                       }
218                       $data{implementation} =~ /^\s*#\s*define\s+(\w+)/gm };
219
220  if (@maybeprov) {
221    warn "$file seems to provide these macros, but doesn't list them:\n  "
222         . join("\n  ", @maybeprov) . "\n";
223  }
224
225  return \%data;
226}
227
228sub compare_prototypes
229{
230  my($p1, $p2) = @_;
231  for ($p1, $p2) {
232    s/^\s+//;
233    s/\s+$//;
234    s/\s+/ /g;
235    s/(\w)\s(\W)/$1$2/g;
236    s/(\W)\s(\w)/$1$2/g;
237  }
238  return $p1 cmp $p2;
239}
240
241sub ppcond
242{
243  my $s = shift;
244  my @c;
245  my $p;
246
247  for $p (@$s) {
248    push @c, map "!($_)", @{$p->{pre}};
249    defined $p->{cur} and push @c, "($p->{cur})";
250  }
251
252  join " && ", @c;
253}
254
255sub trim_arg        # Splits the argument into type and name, returning the
256                    # pair: (type, name)
257{
258  my $in = shift;
259  my $remove = join '|', qw( NN NULLOK VOL );
260
261  $in eq '...' and return ($in);
262
263  local $_ = $in;
264  my $name;                 # Work on the name
265
266  s/[*()]/ /g;              # Get rid of this punctuation
267  s/ \[ [^\]]* \] / /xg;    # Get rid of dimensions
268  s/\b(?:auto|const|extern|inline|register|static|volatile|restrict)\b//g;
269  s/\b(?:$remove)\b//;
270  s/^\s+//; s/\s+$//;       # No leading, trailing space
271
272  if( /^\b (?:struct|union|enum) \s+ \w+ (?: \s+ ( \w+ ) )? $/x ) {
273    defined $1 and $name = $1;    # Extract the name for one of these declarations
274  }
275  else {
276    if( s/\b(?:char|double|float|int|long|short|signed|unsigned|void)\b//g ) {
277      /^ \s* (\w+) \s* $/x and $name = $1;    # Similarly for these
278    }
279    elsif (/^ \s* " [^"]+ " \s+ (\w+) \s* $/x) { # A literal string (is special)
280        $name = $1;
281    }
282    else {
283      /^ \s* \w+ \s+ (\w+) \s* $/x and $name = $1; # Everything else.
284    }
285  }
286
287  $_ = $in;     # Now work on the type.
288
289  # Get rid of the name if we found one
290  defined $name and s/\b$name\b//;
291
292  # these don't matter at all; note that const does matter
293  s/\b(?:auto|extern|inline|register|static|volatile|restrict)\b//g;
294  s/\b(?:$remove)\b//;
295
296  while (s/ \* \s+ \* /**/xg) {}  # No spaces within pointer sequences
297  s/ \s* ( \*+ ) \s* / $1 /xg;    # Normalize pointer sequences to be surrounded
298                                  # by a single space
299  s/^\s+//; s/\s+$//;             # No leading, trailing spacd
300  s/\s+/ /g;                      # Collapse multiple space into one
301
302  return ($_, $name);
303}
304
305sub parse_embed
306{
307  my @files = @_;
308  my @func;
309  my @pps;
310  my $file;
311  local *FILE;
312
313  for $file (@files) {
314    open FILE, $file or die "$file: $!\n";
315    my($line, $l);
316
317    while (defined($line = <FILE>)) {
318      while ($line =~ /\\$/ && defined($l = <FILE>)) {
319        $line =~ s/\\\s*//;
320        $line .= $l;
321      }
322      next if $line =~ /^\s*:/;
323      $line =~ s/^\s+|\s+$//gs;
324      my($dir, $args) = ($line =~ /^\s*#\s*(\w+)(?:\s*(.*?)\s*)?$/);
325      if (defined $dir and defined $args) {
326        for ($dir) {
327          /^ifdef$/   and do { push @pps, { pre => [], cur => "defined($args)"  }         ; last };
328          /^ifndef$/  and do { push @pps, { pre => [], cur => "!defined($args)" }         ; last };
329          /^if$/      and do { push @pps, { pre => [], cur => $args             }         ; last };
330          /^elif$/    and do { push @{$pps[-1]{pre}}, $pps[-1]{cur}; $pps[-1]{cur} = $args; last };
331          /^else$/    and do { push @{$pps[-1]{pre}}, $pps[-1]{cur}; $pps[-1]{cur} = undef; last };
332          /^endif$/   and do { pop @pps                                                   ; last };
333          /^include$/ and last;
334          /^define$/  and last;
335          /^undef$/   and last;
336          warn "unhandled preprocessor directive: $dir\n";
337        }
338      }
339      else {
340        my @e = split /\s*\|\s*/, $line;
341        if( @e >= 3 ) {
342          my($flags, $ret, $name, @args) = @e;
343
344          # Skip non-name entries, like
345          #    PL_parser-E<gt>linestr
346          # which documents a struct entry rather than a function.  We retain
347          # all other entries, so that our caller has full information, and
348          # may skip things like non-public functions.
349          next if $flags =~ /N/;
350
351          # M implies m for the purposes of this module.
352          $flags .= 'm' if $flags =~ /M/;
353
354          # An entry marked 'b' is in mathoms, so is effectively deprecated,
355          # as it can be removed at anytime.  But if it also has a macro to
356          # implement it, that macro stays when mathoms is removed, so the
357          # non-'Perl_' form isn't deprecated.  embed.fnc is supposed to have
358          # already set this up, but make sure.
359          if ($flags =~ /b/ && $flags !~ /m/ && $flags !~ /D/) {
360            warn "Expecting D flag for '$name', since it is b without [Mm]";
361            $flags .= 'D';
362          }
363
364          if ($name =~ /^[^\W\d]\w*$/) {
365            for (@args) {
366              $_ = [trim_arg($_)];
367            }
368            ($ret) = trim_arg($ret);
369            push @func, {
370              name  => $name,
371              flags => { map { $_, 1 } $flags =~ /./g },
372              ret   => $ret,
373              args  => \@args,
374              cond  => ppcond(\@pps),
375            };
376            $func[-1]{'ppport_fnc'} = 1 if $file =~ /ppport\.fnc/;
377          }
378          else {
379            warn "mysterious name [$name] in $file, line $.\n";
380          }
381        }
382      }
383    }
384
385    close FILE;
386  }
387
388  # Here's what two elements of the array look like:
389  # {
390  #              'args' => [
391  #                          [
392  #                            'const nl_item',
393  #                            'item'
394  #                          ]
395  #                        ],
396  #              'cond' => '(defined(HAS_NL_LANGINFO) && defined(PERL_LANGINFO_H))',
397  #              'flags' => {
398  #                           'A' => 1,
399  #                           'T' => 1,
400  #                           'd' => 1,
401  #                           'o' => 1
402  #                         },
403  #              'name' => 'Perl_langinfo',
404  #              'ret' => 'const char *'
405  #            },
406  #            {
407  #              'args' => [
408  #                          [
409  #                            'const int',
410  #                            'item'
411  #                          ]
412  #                        ],
413  #              'cond' => '!(defined(HAS_NL_LANGINFO) && defined(PERL_LANGINFO_H))',
414  #              'flags' => {
415  #                           'A' => 1,
416  #                           'T' => 1,
417  #                           'd' => 1,
418  #                           'o' => 1
419  #                         },
420  #              'name' => 'Perl_langinfo',
421  #              'ret' => 'const char *'
422  #            },
423
424  return @func;
425}
426
427sub known_but_hard_to_test_for
428{
429    # This returns a list of functions/symbols that are in Perl, but the tests
430    # for their existence don't work, usually as a result of them being XS,
431    # and using XS to test.  Effectively, any XS code that compiles and works
432    # is exercising most of these XS-related ones.
433    #
434    # The values for the keys are each the version that ppport.h makes them
435    # work on, and were gleaned by manually looking at the code parts/inc/*.
436    # For non-ppport.h, scanprov will automatically figure out the version
437    # they were introduced in.
438
439    my %return;
440
441    for (qw(CLASS dXSI32 items ix pTHX_ RETVAL StructCopy svtype
442            STMT_START STMT_END STR_WITH_LEN THIS XS))
443    {
444        # __MIN_PERL__ is this at the time of this commit.  This is the
445        # earliest these have been tested to at the time of the commit, but
446        # likely go back further.
447        $return{$_} = '5.003_07';
448    }
449    for (qw(_pMY_CXT pMY_CXT_)) {
450        $return{$_} = '5.9.0';
451    }
452    for (qw(XopDISABLE XopENABLE XopENTRY XopENTRYCUSTOM XopENTRY_set)) {
453        $return{$_} = '5.13.7';
454    }
455    for (qw(XS_EXTERNAL XS_INTERNAL)) {
456        $return{$_} = '5.15.2';
457    }
458
459    return \%return;
460}
461
462sub normalize_prototype  # So that they can be compared more easily
463{
464    my $proto = shift;
465    $proto =~ s/\s* \* \s* / * /xg;
466    return $proto;
467}
468
469sub make_prototype
470{
471  my $f = shift;
472  my @args = map { "@$_" } @{$f->{args}};
473  my $proto;
474  my $pTHX_ = exists $f->{flags}{T} ? "" : "pTHX_ ";
475  $proto = "$f->{ret} $f->{name}" . "($pTHX_" . join(', ', @args) . ')';
476  return normalize_prototype($proto);
477}
4781;
479