lib/B/BUtils.pm

# forked version of B::Utils; needs to merge it ASAP
package B::Utils;

use 5.006;
use warnings;
use vars '$DEBUG';
our @EXPORT_OK = qw(all_starts all_roots anon_subs
                    walkoptree_simple walkoptree_filtered
                    walkallops_simple walkallops_filtered
                    carp croak
                    opgrep
                   );
sub import {
  my $pack = __PACKAGE__; shift;
  my @exports = @_;
  my $caller = caller;
  my %EOK = map {$_ => 1} @EXPORT_OK;
  for (@exports) {
    unless ($EOK{$_}) {
      require Carp;
      Carp::croak(qq{"$_" is not exported by the $pack module});
    }
    no strict 'refs';
    *{"$caller\::$_"} = \&{"$pack\::$_"};
  }
}

our $VERSION = '0.04_02'; # 0.04 with some Schwern patches

use B qw(main_start main_root walksymtable class OPf_KIDS);

my (%starts, %roots, @anon_subs);

our @bad_stashes = qw(B Carp DB Exporter warnings Cwd Config CORE blib strict DynaLoader vars XSLoader AutoLoader base);

sub null {
    my $op = shift;
    class( $op ) eq 'NULL';
}

{ my $_subsdone=0;
sub _init { # To ensure runtimeness.
    return if $_subsdone;
    %starts = ( '__MAIN__' =>  main_start() );
    %roots  = ( '__MAIN__' =>  main_root()  );
    walksymtable(\%main::,
                '_push_starts',
                sub {
                    return if scalar grep {$_[0] eq $_."::"} @bad_stashes;
                    1;
                }, # Do not eat our own children!
                '');
    push @anon_subs, { root => $_->ROOT, start => $_->START}
        for grep { class($_) eq "CV" } B::main_cv->PADLIST->ARRAY->ARRAY;
    $_subsdone=1;
}
}

=head1 NAME

B::Utils - Helper functions for op tree manipulation

=head1 SYNOPSIS

  use B::Utils;

=head1 DESCRIPTION

These functions make it easier to manipulate the op tree.

=head1 FUNCTIONS

=over 3

=item C<all_starts>

=item C<all_roots>

Returns a hash of all of the starting ops or root ops of optrees, keyed
to subroutine name; the optree for main program is simply keyed to C<__MAIN__>.

B<Note>: Certain "dangerous" stashes are not scanned for subroutines:
the list of such stashes can be found in C<@B::Utils::bad_stashes>. Feel
free to examine and/or modify this to suit your needs. The intention is
that a simple program which uses no modules other than C<B> and
C<B::Utils> would show no addition symbols.

This does B<not> return the details of ops in anonymous subroutines
compiled at compile time. For instance, given

    $a = sub { ... };

the subroutine will not appear in the hash. This is just as well, since
they're anonymous... If you want to get at them, use...

=item C<anon_subs()>

This returns an array of hash references. Each element has the keys
"start" and "root". These are the starting and root ops of all of
the anonymous subroutines in the program.

=cut

sub all_starts { _init(); return %starts; }
sub all_roots  { _init(); return %roots; }
sub anon_subs { _init(); return @anon_subs }

sub B::GV::_push_starts {
    my $name = $_[0]->STASH->NAME."::".$_[0]->SAFENAME;
    return unless ${$_[0]->CV};
    my $cv = $_[0]->CV;

    if ($cv->PADLIST->can("ARRAY") and $cv->PADLIST->ARRAY and $cv->PADLIST->ARRAY->can("ARRAY")) {
        push @anon_subs, { root => $_->ROOT, start => $_->START}
            for grep { class($_) eq "CV" } $cv->PADLIST->ARRAY->ARRAY;
    }
    return unless ${$cv->START} and ${$cv->ROOT};
    $starts{$name} = $cv->START;
    $roots{$name} = $cv->ROOT;
};

sub B::SPECIAL::_push_starts{}

=item C<< $op->oldname >>

Returns the name of the op, even if it is currently optimized to null.
This helps you understand the stucture of the op tree.

=cut

sub B::OP::oldname {
    return substr(B::ppname($_[0]->targ),3) if $_[0]->name eq "null" and $_[0]->targ;
    return $_[0]->name;
}

=item C<< $op->kids >>

Returns an array of all this op's non-null children, in order.

=cut

sub B::OP::kids {
    my $op = shift;
    my @rv = ();

    foreach my $type (qw(first last other)) {
        my $kid = $op->$type();
        next if !$kid || class($kid) eq 'NULL';
        if( $kid->name eq 'null' ) {
            push @rv, $kid->kids;
        }
        else {
            push @rv, $kid;
        }
    }

    my @more_rv = ();
    foreach my $more_op (@rv) {
        my $next_op = $more_op;
        while( $next_op->can("sibling") ) {
            $next_op = $next_op->sibling;
            last if !$next_op || class($next_op) eq 'NULL';
            if( $next_op->name eq 'null' ) {
                push @more_rv, $next_op->kids;
            }
            else {
                push @more_rv, $next_op;
            }
        }
    }

    return @rv, @more_rv;
}

=item C<< $op->first >>

=item C<< $op->last >>

=item C<< $op->other >>

Normally if you call first, last or other on anything which is not an
UNOP, BINOP or LOGOP respectivly it will die.  This leads to lots of
code like:

    $op->first if $op->can('first');

B::Utils provides every op with first, last and other methods which
will simply return nothing if it isn't relevent.

=cut

foreach my $type (qw(first last other)) {
    no strict 'refs';
    *{'B::OP::'.$type} = sub {
        my($op) = shift;
        if( $op->can("SUPER::$type") ) {
            return $op->$type();
        }
        else {
            return;
        }
    }
}

=item C<< $op->parent >>

Returns the parent node in the op tree, if possible. Currently "possible" means
"if the tree has already been optimized"; that is, if we're during a C<CHECK>
block. (and hence, if we have valid C<next> pointers.)

In the future, it may be possible to search for the parent before we have the
C<next> pointers in place, but it'll take me a while to figure out how to do
that.

=cut

sub B::OP::parent {
    my $target = shift;
    printf( "parent %s %s=(0x%07x)\n",
	    B::class( $target),
	    $target->oldname,
	    $$target )
	if $DEBUG;

    die "I'm not sure how to do this yet. I'm sure there is a way. If you know, please email me."
        if (!$target->seq);

    my (%deadend, $search_kids);
    $search_kids = sub {
        my $node = shift || return undef;

	printf( "Searching from %s %s=(0x%07x)\n",
		class($node)||'?',
		$node->oldname,
		$$node )
	    if $DEBUG;

        # Go up a level if we've got stuck, and search (for the same
        # $target) from a higher vantage point.
        return $search->($node->parent) if exists $deadend{$node};

        # Test the immediate children
        return $node if scalar grep {$_ == $target} $node->kids;

        # Recurse
        my $x;
        defined($x = $search->($_)) and return $x for $node->kids;

        # Not in this subtree.
        $deadend{$node}++;
        return undef;
   };
   my $result;
   my $start = $target;
   $result = $search->($start) and return $result while $start = $start->next;
   return $search->($start);
}

=item C<< $op->previous >>

Like C<< $op->next >>, but not quite.

=cut

sub B::OP::previous {
    my $target = shift;
    my $start = $target;
    my (%deadend, $search);
    $search = sub {
        my $node = shift || die;
        return $search->(find_parent($node)) if exists $deadend{$node};
        return $node if $node->{next}==$target;
        # Recurse
        my $x;
        ($_->next == $target and return $_) for $node->kids;
        defined($x = $search->($_)) and return $x for $node->{kids};

        # Not in this subtree.
        $deadend{$node}++;
        return undef;
   };
   my $result;
   $result = $search->($start) and return $result
        while $start = $start->next;
}

=item walkoptree_simple($op, \&callback, [$data])

The C<B> module provides various functions to walk the op tree, but
they're all rather difficult to use, requiring you to inject methods
into the C<B::OP> class. This is a very simple op tree walker with
more expected semantics.

The &callback is called at each op with the op itself passed in as the
first argument and any additional $data as the second.

All the C<walk> functions set C<$B::Utils::file> and C<$B::Utils::line>
to the appropriate values of file and line number in the program
being examined.  Since only COPs contain this information it may be
unavailable in the first few callback calls.

=cut

our ($file, $line);

# Make sure we reset $file and $line between runs.
sub walkoptree_simple {
    ($file, $line) = ('__none__', 0);

    _walkoptree_simple(@_);
}

sub _walkoptree_simple {
    my ($op, $callback, $data) = @_;
    ($file, $line) = ($op->file, $op->line) if $op->isa("B::COP");
    $callback->($op,$data);
    if ($$op && ($op->flags & OPf_KIDS)) {
        my $kid;
        for ($kid = $op->first; $$kid; $kid = $kid->sibling) {
            _walkoptree_simple($kid, $callback, $data);
        }
    }
}

=item walkoptree_filtered($op, \&filter, \&callback, [$data])

This is much the same as C<walkoptree_simple>, but will only call the
callback if the C<filter> returns true. The C<filter> is passed the
op in question as a parameter; the C<opgrep> function is fantastic
for building your own filters.

=cut

sub walkoptree_filtered {
    ($file, $line) = ('__none__', 0);

    _walkoptree_filtered(@_);
}

sub _walkoptree_filtered {
    my ($op, $filter, $callback, $data) = @_;
    ($file, $line) = ($op->file, $op->line) if $op->isa("B::COP");
    $callback->($op,$data) if $filter->($op);
    if ($$op && ($op->flags & OPf_KIDS)) {
        my $kid;
        for ($kid = $op->first; $$kid; $kid = $kid->sibling) {
            _walkoptree_filtered($kid, $filter, $callback, $data);
        }
    }
}

=item walkallops_simple(\&callback, [$data])

This combines C<walkoptree_simple> with C<all_roots> and C<anon_subs>
to examine every op in the program. C<$B::Utils::sub> is set to the
subroutine name if you're in a subroutine, C<__MAIN__> if you're in
the main program and C<__ANON__> if you're in an anonymous subroutine.

=cut

our $sub;

sub walkallops_simple {
    my ($callback, $data) = @_;
    _init();
    for $sub (keys %roots) {
        walkoptree_simple($roots{$sub}, $callback, $data);
    }
    $sub = "__ANON__";
    for (@anon_subs) {
        walkoptree_simple($_->{root}, $callback, $data);
    }
}

=item walkallops_filtered(\&filter, \&callback, [$data])

Same as above, but filtered.

=cut

sub walkallops_filtered {
    my ($filter, $callback, $data) = @_;
    _init();
    for $sub (keys %roots) {
        walkoptree_filtered($roots{$sub}, $filter, $callback, $data);
    }
    $sub = "__ANON__";
    for (@anon_subs) {
        walkoptree_filtered($_->{root}, $filter, $callback, $data);
    }
}

=item carp(@args)

=item croak(@args)

Warn and die, respectively, from the perspective of the position of the op in
the program. Sounds complicated, but it's exactly the kind of error reporting
you expect when you're grovelling through an op tree.

=cut

sub _preparewarn {
    my $args = join '', @_;
    $args = "Something's wrong " unless $args;
    $args .= " at $file line $line.\n" unless substr($args, length($args) -1) eq "\n";
}

sub carp  (@) { CORE::warn(_preparewarn(@_)) }
sub croak (@) { CORE::die(_preparewarn(@_)) }

=item opgrep(\%conditions, @ops)

Returns the ops which meet the given conditions. The conditions should be
specified like this:

    @barewords = opgrep(
                        { name => "const", private => OPpCONST_BARE },
                        @ops
                       );

You can specify alternation by giving an arrayref of values:

    @svs = opgrep ( { name => ["padsv", "gvsv"] }, @ops)

And you can specify inversion by making the first element of the arrayref
a "!". (Hint: if you want to say "anything", say "not nothing": C<["!"]>)

You may also specify the conditions to be matched in nearby ops.

    walkallops_filtered(
        sub { opgrep( {name => "exec",
                       next => {
                                 name    => "nextstate",
                                 sibling => { name => [qw(! exit warn die)] }
                               }
                      }, @_)},
        sub {
              carp("Statement unlikely to be reached");
              carp("\t(Maybe you meant system() when you said exec()?)\n");
        }
    )

Get that?

Here are the things that can be tested:

        name targ type seq flags private pmflags pmpermflags
        first other last sibling next pmreplroot pmreplstart pmnext

=cut

sub opgrep {
    my ($cref, @ops) = @_;
    my %conds = %$cref;
    my @rv = ();

    OPLOOP: for my $o (grep defined, @ops) {
        # First, let's skim off ops of the wrong type.
        for my $type (qw(first other last pmreplroot pmreplstart pmnext pmflags pmpermflags)) {
            next OPLOOP if exists $conds{$type} and !$o->can($type);
        }

        for my $test (qw(name targ type seq flags private pmflags pmpermflags)) {
            next unless exists $conds{$test};
            next OPLOOP unless $o->can($test);

            my @conds = ref $conds{$test} ? @{$conds{$test}} : $conds{$test};

            if ($conds[0] eq "!") {
                my @conds = @{$conds{$test}}; shift @conds;
                next OPLOOP if grep {$o->$test eq $_} @conds;
            } else {
                next OPLOOP unless grep {$o->$test eq $_} @conds;
            }
        }

        for my $neighbour (qw(first other last sibling next pmreplroot pmreplstart pmnext)) {
            next unless exists $conds{$neighbour};
            # We know it can, because we tested that above
            # Recurse, recurse!
            next OPLOOP unless opgrep($conds{$neighbour}, $o->$neighbour);
        }

        push @rv, $o;
    }
    return @rv;
}

package B::BUtils;

@ISA = qw(B::Utils);

1;

=back

=head2 EXPORT

None by default.

=head1 AUTHOR

Simon Cozens, C<simon@cpan.org>

=head1 TODO

I need to add more Fun Things, and possibly clean up some parts where
the (previous/parent) algorithm has catastrophic cases, but it's more
important to get this out right now than get it right.

=head1 SEE ALSO

L<B>, L<B::Generate>.

=cut