xref: /openbsd/gnu/usr.bin/perl/ext/B/B/Xref.pm (revision 5759b3d2)
1package B::Xref;
2
3our $VERSION = '1.07';
4
5=head1 NAME
6
7B::Xref - Generates cross reference reports for Perl programs
8
9=head1 SYNOPSIS
10
11perl -MO=Xref[,OPTIONS] foo.pl
12
13=head1 DESCRIPTION
14
15The B::Xref module is used to generate a cross reference listing of all
16definitions and uses of variables, subroutines and formats in a Perl program.
17It is implemented as a backend for the Perl compiler.
18
19The report generated is in the following format:
20
21    File filename1
22      Subroutine subname1
23	Package package1
24	  object1        line numbers
25	  object2        line numbers
26	  ...
27	Package package2
28	...
29
30Each B<File> section reports on a single file. Each B<Subroutine> section
31reports on a single subroutine apart from the special cases
32"(definitions)" and "(main)". These report, respectively, on subroutine
33definitions found by the initial symbol table walk and on the main part of
34the program or module external to all subroutines.
35
36The report is then grouped by the B<Package> of each variable,
37subroutine or format with the special case "(lexicals)" meaning
38lexical variables. Each B<object> name (implicitly qualified by its
39containing B<Package>) includes its type character(s) at the beginning
40where possible. Lexical variables are easier to track and even
41included dereferencing information where possible.
42
43The C<line numbers> are a comma separated list of line numbers (some
44preceded by code letters) where that object is used in some way.
45Simple uses aren't preceded by a code letter. Introductions (such as
46where a lexical is first defined with C<my>) are indicated with the
47letter "i". Subroutine and method calls are indicated by the character
48"&".  Subroutine definitions are indicated by "s" and format
49definitions by "f".
50
51For instance, here's part of the report from the I<pod2man> program that
52comes with Perl:
53
54  Subroutine clear_noremap
55    Package (lexical)
56      $ready_to_print   i1069, 1079
57    Package main
58      $&                1086
59      $.                1086
60      $0                1086
61      $1                1087
62      $2                1085, 1085
63      $3                1085, 1085
64      $ARGV             1086
65      %HTML_Escapes     1085, 1085
66
67This shows the variables used in the subroutine C<clear_noremap>.  The
68variable C<$ready_to_print> is a my() (lexical) variable,
69B<i>ntroduced (first declared with my()) on line 1069, and used on
70line 1079.  The variable C<$&> from the main package is used on 1086,
71and so on.
72
73A line number may be prefixed by a single letter:
74
75=over 4
76
77=item i
78
79Lexical variable introduced (declared with my()) for the first time.
80
81=item &
82
83Subroutine or method call.
84
85=item s
86
87Subroutine defined.
88
89=item r
90
91Format defined.
92
93=back
94
95The most useful option the cross referencer has is to save the report
96to a separate file.  For instance, to save the report on
97I<myperlprogram> to the file I<report>:
98
99  $ perl -MO=Xref,-oreport myperlprogram
100
101=head1 OPTIONS
102
103Option words are separated by commas (not whitespace) and follow the
104usual conventions of compiler backend options.
105
106=over 8
107
108=item C<-oFILENAME>
109
110Directs output to C<FILENAME> instead of standard output.
111
112=item C<-r>
113
114Raw output. Instead of producing a human-readable report, outputs a line
115in machine-readable form for each definition/use of a variable/sub/format.
116
117=item C<-d>
118
119Don't output the "(definitions)" sections.
120
121=item C<-D[tO]>
122
123(Internal) debug options, probably only useful if C<-r> included.
124The C<t> option prints the object on the top of the stack as it's
125being tracked. The C<O> option prints each operator as it's being
126processed in the execution order of the program.
127
128=back
129
130=head1 BUGS
131
132Non-lexical variables are quite difficult to track through a program.
133Sometimes the type of a non-lexical variable's use is impossible to
134determine. Introductions of non-lexical non-scalars don't seem to be
135reported properly.
136
137=head1 AUTHOR
138
139Malcolm Beattie, mbeattie@sable.ox.ac.uk.
140
141=cut
142
143use strict;
144use Config;
145use B qw(peekop class comppadlist main_start svref_2object walksymtable
146         OPpLVAL_INTRO SVf_POK SVf_ROK OPpOUR_INTRO cstring
147        );
148
149sub UNKNOWN { ["?", "?", "?"] }
150
151my @pad;			# lexicals in current pad
152				# as ["(lexical)", type, name]
153my %done;			# keyed by $$op: set when each $op is done
154my $top = UNKNOWN;		# shadows top element of stack as
155				# [pack, type, name] (pack can be "(lexical)")
156my $file;			# shadows current filename
157my $line;			# shadows current line number
158my $subname;			# shadows current sub name
159my %table;			# Multi-level hash to record all uses etc.
160my @todo = ();			# List of CVs that need processing
161
162my %code = (intro => "i", used => "",
163	    subdef => "s", subused => "&",
164	    formdef => "f", meth => "->");
165
166
167# Options
168my ($debug_op, $debug_top, $nodefs, $raw);
169
170sub process {
171    my ($var, $event) = @_;
172    my ($pack, $type, $name) = @$var;
173    if ($type eq "*") {
174	if ($event eq "used") {
175	    return;
176	} elsif ($event eq "subused") {
177	    $type = "&";
178	}
179    }
180    $type =~ s/(.)\*$/$1/g;
181    if ($raw) {
182	printf "%-16s %-12s %5d %-12s %4s %-16s %s\n",
183	    $file, $subname, $line, $pack, $type, $name, $event;
184    } else {
185	# Wheee
186	push(@{$table{$file}->{$subname}->{$pack}->{$type.$name}->{$event}},
187	    $line);
188    }
189}
190
191sub load_pad {
192    my $padlist = shift;
193    my ($namelistav, $vallistav, @namelist, $ix);
194    @pad = ();
195    return if class($padlist) =~ '^(?:SPECIAL|NULL)\z';
196    ($namelistav,$vallistav) = $padlist->ARRAY;
197    @namelist = $namelistav->ARRAY;
198    for ($ix = 1; $ix < @namelist; $ix++) {
199	my $namesv = $namelist[$ix];
200	next if class($namesv) eq "SPECIAL";
201	my ($type, $name) = $namesv->PV =~ /^(.)([^\0]*)(\0.*)?$/;
202	$pad[$ix] = ["(lexical)", $type || '?', $name || '?'];
203    }
204    if ($Config{useithreads}) {
205	my (@vallist);
206	@vallist = $vallistav->ARRAY;
207	for ($ix = 1; $ix < @vallist; $ix++) {
208	    my $valsv = $vallist[$ix];
209	    next unless class($valsv) eq "GV";
210            next if class($valsv->STASH) eq 'SPECIAL';
211	    # these pad GVs don't have corresponding names, so same @pad
212	    # array can be used without collisions
213	    $pad[$ix] = [$valsv->STASH->NAME, "*", $valsv->NAME];
214	}
215    }
216}
217
218sub xref {
219    my $start = shift;
220    my $op;
221    for ($op = $start; $$op; $op = $op->next) {
222	last if $done{$$op}++;
223	warn sprintf("top = [%s, %s, %s]\n", @$top) if $debug_top;
224	warn peekop($op), "\n" if $debug_op;
225	my $opname = $op->name;
226	if ($opname =~ /^(or|and|mapwhile|grepwhile|range|cond_expr)$/) {
227	    xref($op->other);
228	} elsif ($opname eq "match" || $opname eq "subst") {
229	    xref($op->pmreplstart);
230	} elsif ($opname eq "substcont") {
231	    xref($op->other->pmreplstart);
232	    $op = $op->other;
233	    redo;
234	} elsif ($opname eq "enterloop") {
235	    xref($op->redoop);
236	    xref($op->nextop);
237	    xref($op->lastop);
238	} elsif ($opname eq "subst") {
239	    xref($op->pmreplstart);
240	} else {
241	    no strict 'refs';
242	    my $ppname = "pp_$opname";
243	    &$ppname($op) if defined(&$ppname);
244	}
245    }
246}
247
248sub xref_cv {
249    my $cv = shift;
250    my $pack = $cv->GV->STASH->NAME;
251    $subname = ($pack eq "main" ? "" : "$pack\::") . $cv->GV->NAME;
252    load_pad($cv->PADLIST);
253    xref($cv->START);
254    $subname = "(main)";
255}
256
257sub xref_object {
258    my $cvref = shift;
259    xref_cv(svref_2object($cvref));
260}
261
262sub xref_main {
263    $subname = "(main)";
264    load_pad(comppadlist);
265    xref(main_start);
266    while (@todo) {
267	xref_cv(shift @todo);
268    }
269}
270
271sub pp_nextstate {
272    my $op = shift;
273    $file = $op->file;
274    $line = $op->line;
275    $top = UNKNOWN;
276}
277
278sub pp_padrange {
279    my $op = shift;
280    my $count = $op->private & 127;
281    for my $i (0..$count-1) {
282	$top = $pad[$op->targ + $i];
283	process($top, $op->private & OPpLVAL_INTRO ? "intro" : "used");
284    }
285}
286
287sub pp_padsv {
288    my $op = shift;
289    $top = $pad[$op->targ];
290    process($top, $op->private & OPpLVAL_INTRO ? "intro" : "used");
291}
292
293sub pp_padav { pp_padsv(@_) }
294sub pp_padhv { pp_padsv(@_) }
295
296sub deref {
297    my ($op, $var, $as) = @_;
298    $var->[1] = $as . $var->[1];
299    process($var, $op->private & OPpOUR_INTRO ? "intro" : "used");
300}
301
302sub pp_rv2cv { deref(shift, $top, "&"); }
303sub pp_rv2hv { deref(shift, $top, "%"); }
304sub pp_rv2sv { deref(shift, $top, "\$"); }
305sub pp_rv2av { deref(shift, $top, "\@"); }
306sub pp_rv2gv { deref(shift, $top, "*"); }
307
308sub pp_gvsv {
309    my $op = shift;
310    my $gv;
311    if ($Config{useithreads}) {
312	$top = $pad[$op->padix];
313	$top = UNKNOWN unless $top;
314	$top->[1] = '$';
315    }
316    else {
317	$gv = $op->gv;
318	$top = [$gv->STASH->NAME, '$', $gv->SAFENAME];
319    }
320    process($top, $op->private & OPpLVAL_INTRO ||
321                  $op->private & OPpOUR_INTRO   ? "intro" : "used");
322}
323
324sub pp_gv {
325    my $op = shift;
326    my $gv;
327    if ($Config{useithreads}) {
328	$top = $pad[$op->padix];
329	$top = UNKNOWN unless $top;
330	$top->[1] = '*';
331    }
332    else {
333	$gv = $op->gv;
334	if ($gv->FLAGS & SVf_ROK) { # sub ref
335	    my $cv = $gv->RV;
336	    $top = [$cv->STASH->NAME, '*', B::safename($cv->NAME_HEK)]
337	}
338	else {
339	    $top = [$gv->STASH->NAME, '*', $gv->SAFENAME];
340	}
341    }
342    process($top, $op->private & OPpLVAL_INTRO ? "intro" : "used");
343}
344
345sub pp_const {
346    my $op = shift;
347    my $sv = $op->sv;
348    # constant could be in the pad (under useithreads)
349    if ($$sv) {
350	$top = ["?", "",
351		(class($sv) ne "SPECIAL" && $sv->FLAGS & SVf_POK)
352		? cstring($sv->PV) : "?"];
353    }
354    else {
355	$top = $pad[$op->targ];
356	$top = UNKNOWN unless $top;
357    }
358}
359
360sub pp_method {
361    my $op = shift;
362    $top = ["(method)", "->".$top->[1], $top->[2]];
363}
364
365sub pp_entersub {
366    my $op = shift;
367    if ($top->[1] eq "m") {
368	process($top, "meth");
369    } else {
370	process($top, "subused");
371    }
372    $top = UNKNOWN;
373}
374
375#
376# Stuff for cross referencing definitions of variables and subs
377#
378
379sub B::GV::xref {
380    my $gv = shift;
381    my $cv = $gv->CV;
382    if ($$cv) {
383	#return if $done{$$cv}++;
384	$file = $gv->FILE;
385	$line = $gv->LINE;
386	process([$gv->STASH->NAME, "&", $gv->NAME], "subdef");
387	push(@todo, $cv);
388    }
389    my $form = $gv->FORM;
390    if ($$form) {
391	return if $done{$$form}++;
392	$file = $gv->FILE;
393	$line = $gv->LINE;
394	process([$gv->STASH->NAME, "", $gv->NAME], "formdef");
395    }
396}
397
398sub xref_definitions {
399    my ($pack, %exclude);
400    return if $nodefs;
401    $subname = "(definitions)";
402    foreach $pack (qw(B O AutoLoader DynaLoader XSLoader Config DB VMS
403		      strict vars FileHandle Exporter Carp PerlIO::Layer
404		      attributes utf8 warnings)) {
405        $exclude{$pack."::"} = 1;
406    }
407    no strict qw(vars refs);
408    walksymtable(\%{"main::"}, "xref", sub { !defined($exclude{$_[0]}) });
409}
410
411sub output {
412    return if $raw;
413    my ($file, $subname, $pack, $name, $ev, $perfile, $persubname,
414	$perpack, $pername, $perev);
415    foreach $file (sort(keys(%table))) {
416	$perfile = $table{$file};
417	print "File $file\n";
418	foreach $subname (sort(keys(%$perfile))) {
419	    $persubname = $perfile->{$subname};
420	    print "  Subroutine $subname\n";
421	    foreach $pack (sort(keys(%$persubname))) {
422		$perpack = $persubname->{$pack};
423		print "    Package $pack\n";
424		foreach $name (sort(keys(%$perpack))) {
425		    $pername = $perpack->{$name};
426		    my @lines;
427		    foreach $ev (qw(intro formdef subdef meth subused used)) {
428			$perev = $pername->{$ev};
429			if (defined($perev) && @$perev) {
430			    my $code = $code{$ev};
431			    push(@lines, map("$code$_", @$perev));
432			}
433		    }
434		    printf "      %-16s  %s\n", $name, join(", ", @lines);
435		}
436	    }
437	}
438    }
439}
440
441sub compile {
442    my @options = @_;
443    my ($option, $opt, $arg);
444  OPTION:
445    while ($option = shift @options) {
446	if ($option =~ /^-(.)(.*)/) {
447	    $opt = $1;
448	    $arg = $2;
449	} else {
450	    unshift @options, $option;
451	    last OPTION;
452	}
453	if ($opt eq "-" && $arg eq "-") {
454	    shift @options;
455	    last OPTION;
456	} elsif ($opt eq "o") {
457	    $arg ||= shift @options;
458	    open(STDOUT, '>', $arg) or return "$arg: $!\n";
459	} elsif ($opt eq "d") {
460	    $nodefs = 1;
461	} elsif ($opt eq "r") {
462	    $raw = 1;
463	} elsif ($opt eq "D") {
464            $arg ||= shift @options;
465	    foreach $arg (split(//, $arg)) {
466		if ($arg eq "o") {
467		    B->debug(1);
468		} elsif ($arg eq "O") {
469		    $debug_op = 1;
470		} elsif ($arg eq "t") {
471		    $debug_top = 1;
472		}
473	    }
474	}
475    }
476    if (@options) {
477	return sub {
478	    my $objname;
479	    xref_definitions();
480	    foreach $objname (@options) {
481		$objname = "main::$objname" unless $objname =~ /::/;
482		eval "xref_object(\\&$objname)";
483		die "xref_object(\\&$objname) failed: $@" if $@;
484	    }
485	    output();
486	}
487    } else {
488	return sub {
489	    xref_definitions();
490	    xref_main();
491	    output();
492	}
493    }
494}
495
4961;
497