xref: /openbsd/gnu/usr.bin/perl/ext/B/B/Concise.pm (revision e0680481)
1package B::Concise;
2# Copyright (C) 2000-2003 Stephen McCamant. All rights reserved.
3# This program is free software; you can redistribute and/or modify it
4# under the same terms as Perl itself.
5
6# Note: we need to keep track of how many use declarations/BEGIN
7# blocks this module uses, so we can avoid printing them when user
8# asks for the BEGIN blocks in her program. Update the comments and
9# the count in concise_specials if you add or delete one. The
10# -MO=Concise counts as use #1.
11
12use strict; # use #2
13use warnings; # uses #3 and #4, since warnings uses Carp
14
15use Exporter 'import'; # use #5
16
17our $VERSION   = "1.007";
18our @EXPORT_OK = qw( set_style set_style_standard add_callback
19		     concise_subref concise_cv concise_main
20		     add_style walk_output compile reset_sequence );
21our %EXPORT_TAGS =
22    ( io	=> [qw( walk_output compile reset_sequence )],
23      style	=> [qw( add_style set_style_standard )],
24      cb	=> [qw( add_callback )],
25      mech	=> [qw( concise_subref concise_cv concise_main )],  );
26
27# use #6
28use B qw(class ppname main_start main_root main_cv cstring svref_2object
29	 SVf_IOK SVf_NOK SVf_POK SVf_IVisUV SVf_FAKE OPf_KIDS OPf_SPECIAL
30         OPf_STACKED
31         OPpSPLIT_ASSIGN OPpSPLIT_LEX
32	 CVf_ANON CVf_LEXICAL CVf_NAMED
33	 PAD_FAKELEX_ANON PAD_FAKELEX_MULTI SVf_ROK);
34
35my %style =
36  ("terse" =>
37   ["(?(#label =>\n)?)(*(    )*)#class (#addr) #name (?([#targ])?) "
38    . "#svclass~(?((#svaddr))?)~#svval~(?(label \"#coplabel\")?)\n",
39    "(*(    )*)goto #class (#addr)\n",
40    "#class pp_#name"],
41   "concise" =>
42   ["#hyphseq2 (*(   (x( ;)x))*)<#classsym> #exname#arg(?([#targarglife])?)"
43    . "~#flags(?(/#private)?)(?(:#hints)?)(x(;~->#next)x)\n"
44    , "  (*(    )*)     goto #seq\n",
45    "(?(<#seq>)?)#exname#arg(?([#targarglife])?)"],
46   "linenoise" =>
47   ["(x(;(*( )*))x)#noise#arg(?([#targarg])?)(x( ;\n)x)",
48    "gt_#seq ",
49    "(?(#seq)?)#noise#arg(?([#targarg])?)"],
50   "debug" =>
51   ["#class (#addr)\n\top_next\t\t#nextaddr\n\t(?(op_other\t#otheraddr\n\t)?)"
52    . "op_sibling\t#sibaddr\n\t"
53    . "op_ppaddr\tPL_ppaddr[OP_#NAME]\n\top_type\t\t#typenum\n"
54    . "\top_flags\t#flagval\n\top_private\t#privval\t#hintsval\n"
55    . "(?(\top_first\t#firstaddr\n)?)(?(\top_last\t\t#lastaddr\n)?)"
56    . "(?(\top_sv\t\t#svaddr\n)?)",
57    "    GOTO #addr\n",
58    "#addr"],
59   "env" => [$ENV{B_CONCISE_FORMAT}, $ENV{B_CONCISE_GOTO_FORMAT},
60	     $ENV{B_CONCISE_TREE_FORMAT}],
61  );
62
63# Renderings, ie how Concise prints, is controlled by these vars
64# primary:
65our $stylename;		# selects current style from %style
66my $order = "basic";	# how optree is walked & printed: basic, exec, tree
67
68# rendering mechanics:
69# these 'formats' are the line-rendering templates
70# they're updated from %style when $stylename changes
71my ($format, $gotofmt, $treefmt);
72
73# lesser players:
74my $base = 36;		# how <sequence#> is displayed
75my $big_endian = 1;	# more <sequence#> display
76my $tree_style = 0;	# tree-order details
77my $banner = 1;		# print banner before optree is traversed
78my $do_main = 0;	# force printing of main routine
79my $show_src;		# show source code
80
81# another factor: can affect all styles!
82our @callbacks;		# allow external management
83
84set_style_standard("concise");
85
86my $curcv;
87my $cop_seq_base;
88
89sub set_style {
90    ($format, $gotofmt, $treefmt) = @_;
91    #warn "set_style: deprecated, use set_style_standard instead\n"; # someday
92    die "expecting 3 style-format args\n" unless @_ == 3;
93}
94
95sub add_style {
96    my ($newstyle,@args) = @_;
97    die "style '$newstyle' already exists, choose a new name\n"
98	if exists $style{$newstyle};
99    die "expecting 3 style-format args\n" unless @args == 3;
100    $style{$newstyle} = [@args];
101    $stylename = $newstyle; # update rendering state
102}
103
104sub set_style_standard {
105    ($stylename) = @_; # update rendering state
106    die "err: style '$stylename' unknown\n" unless exists $style{$stylename};
107    set_style(@{$style{$stylename}});
108}
109
110sub add_callback {
111    push @callbacks, @_;
112}
113
114# output handle, used with all Concise-output printing
115our $walkHandle;	# public for your convenience
116BEGIN { $walkHandle = \*STDOUT }
117
118sub walk_output { # updates $walkHandle
119    my $handle = shift;
120    return $walkHandle unless $handle; # allow use as accessor
121
122    if (ref $handle eq 'SCALAR') {
123	require Config;
124	die "no perlio in this build, can't call walk_output (\\\$scalar)\n"
125	    unless $Config::Config{useperlio};
126	# in 5.8+, open(FILEHANDLE,MODE,REFERENCE) writes to string
127	open my $tmp, '>', $handle;	# but cant re-set existing STDOUT
128	$walkHandle = $tmp;		# so use my $tmp as intermediate var
129	return $walkHandle;
130    }
131    my $iotype = ref $handle;
132    die "expecting argument/object that can print\n"
133	unless $iotype eq 'GLOB' or $iotype and $handle->can('print');
134    $walkHandle = $handle;
135}
136
137sub concise_subref {
138    my($order, $coderef, $name) = @_;
139    my $codeobj = svref_2object($coderef);
140
141    return concise_stashref(@_)
142	unless ref($codeobj) =~ '^B::(?:CV|FM)\z';
143    concise_cv_obj($order, $codeobj, $name);
144}
145
146sub concise_stashref {
147    my($order, $h) = @_;
148    my $name = svref_2object($h)->NAME;
149    foreach my $k (sort keys %$h) {
150	next unless defined $h->{$k};
151	my $coderef = ref $h->{$k} eq 'CODE' ? $h->{$k}
152		    : ref\$h->{$k} eq 'GLOB' ? *{$h->{$k}}{CODE} || next
153		    : next;
154	reset_sequence();
155	print "FUNC: *", $name, "::", $k, "\n";
156	my $codeobj = svref_2object($coderef);
157	next unless ref $codeobj eq 'B::CV';
158	eval { concise_cv_obj($order, $codeobj, $k) };
159	warn "err $@ on $codeobj" if $@;
160    }
161}
162
163# This should have been called concise_subref, but it was exported
164# under this name in versions before 0.56
165*concise_cv = \&concise_subref;
166
167sub concise_cv_obj {
168    my ($order, $cv, $name) = @_;
169    # name is either a string, or a CODE ref (copy of $cv arg??)
170
171    $curcv = $cv;
172
173    if (ref($cv->XSUBANY) =~ /B::(\w+)/) {
174	print $walkHandle "$name is a constant sub, optimized to a $1\n";
175	return;
176    }
177    if ($cv->XSUB) {
178	print $walkHandle "$name is XS code\n";
179	return;
180    }
181    if (class($cv->START) eq "NULL") {
182	no strict 'refs';
183	if (ref $name eq 'CODE') {
184	    print $walkHandle "coderef $name has no START\n";
185	}
186	elsif (exists &$name) {
187	    print $walkHandle "$name exists in stash, but has no START\n";
188	}
189	else {
190	    print $walkHandle "$name not in symbol table\n";
191	}
192	return;
193    }
194    sequence($cv->START);
195    if ($order eq "exec") {
196	walk_exec($cv->START);
197    }
198    elsif ($order eq "basic") {
199	# walk_topdown($cv->ROOT, sub { $_[0]->concise($_[1]) }, 0);
200	my $root = $cv->ROOT;
201	unless (ref $root eq 'B::NULL') {
202	    walk_topdown($root, sub { $_[0]->concise($_[1]) }, 0);
203	} else {
204	    print $walkHandle "B::NULL encountered doing ROOT on $cv. avoiding disaster\n";
205	}
206    } else {
207	print $walkHandle tree($cv->ROOT, 0);
208    }
209}
210
211sub concise_main {
212    my($order) = @_;
213    sequence(main_start);
214    $curcv = main_cv;
215    if ($order eq "exec") {
216	return if class(main_start) eq "NULL";
217	walk_exec(main_start);
218    } elsif ($order eq "tree") {
219	return if class(main_root) eq "NULL";
220	print $walkHandle tree(main_root, 0);
221    } elsif ($order eq "basic") {
222	return if class(main_root) eq "NULL";
223	walk_topdown(main_root,
224		     sub { $_[0]->concise($_[1]) }, 0);
225    }
226}
227
228sub concise_specials {
229    my($name, $order, @cv_s) = @_;
230    my $i = 1;
231    if ($name eq "BEGIN") {
232	splice(@cv_s, 0, 8); # skip 7 BEGIN blocks in this file. NOW 8 ??
233    } elsif ($name eq "CHECK") {
234	pop @cv_s; # skip the CHECK block that calls us
235    }
236    for my $cv (@cv_s) {
237	print $walkHandle "$name $i:\n";
238	$i++;
239	concise_cv_obj($order, $cv, $name);
240    }
241}
242
243my $start_sym = "\e(0"; # "\cN" sometimes also works
244my $end_sym   = "\e(B"; # "\cO" respectively
245
246my @tree_decorations =
247  (["  ", "--", "+-", "|-", "| ", "`-", "-", 1],
248   [" ", "-", "+", "+", "|", "`", "", 0],
249   ["  ", map("$start_sym$_$end_sym", "qq", "wq", "tq", "x ", "mq", "q"), 1],
250   [" ", map("$start_sym$_$end_sym", "q", "w", "t", "x", "m"), "", 0],
251  );
252
253my @render_packs; # collect -stash=<packages>
254
255sub compileOpts {
256    # set rendering state from options and args
257    my (@options,@args);
258    if (@_) {
259	@options = grep(/^-/, @_);
260	@args = grep(!/^-/, @_);
261    }
262    for my $o (@options) {
263	# mode/order
264	if ($o eq "-basic") {
265	    $order = "basic";
266	} elsif ($o eq "-exec") {
267	    $order = "exec";
268	} elsif ($o eq "-tree") {
269	    $order = "tree";
270	}
271	# tree-specific
272	elsif ($o eq "-compact") {
273	    $tree_style |= 1;
274	} elsif ($o eq "-loose") {
275	    $tree_style &= ~1;
276	} elsif ($o eq "-vt") {
277	    $tree_style |= 2;
278	} elsif ($o eq "-ascii") {
279	    $tree_style &= ~2;
280	}
281	# sequence numbering
282	elsif ($o =~ /^-base(\d+)$/) {
283	    $base = $1;
284	} elsif ($o eq "-bigendian") {
285	    $big_endian = 1;
286	} elsif ($o eq "-littleendian") {
287	    $big_endian = 0;
288	}
289	# miscellaneous, presentation
290	elsif ($o eq "-nobanner") {
291	    $banner = 0;
292	} elsif ($o eq "-banner") {
293	    $banner = 1;
294	}
295	elsif ($o eq "-main") {
296	    $do_main = 1;
297	} elsif ($o eq "-nomain") {
298	    $do_main = 0;
299	} elsif ($o eq "-src") {
300	    $show_src = 1;
301	}
302	elsif ($o =~ /^-stash=(.*)/) {
303	    my $pkg = $1;
304	    no strict 'refs';
305	    if (! %{$pkg.'::'}) {
306		eval "require $pkg";
307	    } else {
308		require Config;
309		if (!$Config::Config{usedl}
310		    && keys %{$pkg.'::'} == 1
311		    && $pkg->can('bootstrap')) {
312		    # It is something that we're statically linked to, but hasn't
313		    # yet been used.
314		    eval "require $pkg";
315		}
316	    }
317	    push @render_packs, $pkg;
318	}
319	# line-style options
320	elsif (exists $style{substr($o, 1)}) {
321	    $stylename = substr($o, 1);
322	    set_style_standard($stylename);
323	} else {
324	    warn "Option $o unrecognized";
325	}
326    }
327    return (@args);
328}
329
330sub compile {
331    my (@args) = compileOpts(@_);
332    return sub {
333	my @newargs = compileOpts(@_); # accept new rendering options
334	warn "disregarding non-options: @newargs\n" if @newargs;
335
336	for my $objname (@args) {
337	    next unless $objname; # skip null args to avoid noisy responses
338
339	    if ($objname eq "BEGIN") {
340		concise_specials("BEGIN", $order,
341				 B::begin_av->isa("B::AV") ?
342				 B::begin_av->ARRAY : ());
343	    } elsif ($objname eq "INIT") {
344		concise_specials("INIT", $order,
345				 B::init_av->isa("B::AV") ?
346				 B::init_av->ARRAY : ());
347	    } elsif ($objname eq "CHECK") {
348		concise_specials("CHECK", $order,
349				 B::check_av->isa("B::AV") ?
350				 B::check_av->ARRAY : ());
351	    } elsif ($objname eq "UNITCHECK") {
352		concise_specials("UNITCHECK", $order,
353				 B::unitcheck_av->isa("B::AV") ?
354				 B::unitcheck_av->ARRAY : ());
355	    } elsif ($objname eq "END") {
356		concise_specials("END", $order,
357				 B::end_av->isa("B::AV") ?
358				 B::end_av->ARRAY : ());
359	    }
360	    else {
361		# convert function names to subrefs
362		if (ref $objname) {
363		    print $walkHandle "B::Concise::compile($objname)\n"
364			if $banner;
365		    concise_subref($order, ($objname)x2);
366		    next;
367		} else {
368		    $objname = "main::" . $objname unless $objname =~ /::/;
369		    no strict 'refs';
370		    my $glob = \*$objname;
371		    unless (*$glob{CODE} || *$glob{FORMAT}) {
372			print $walkHandle "$objname:\n" if $banner;
373			print $walkHandle "err: unknown function ($objname)\n";
374			return;
375		    }
376		    if (my $objref = *$glob{CODE}) {
377			print $walkHandle "$objname:\n" if $banner;
378			concise_subref($order, $objref, $objname);
379		    }
380		    if (my $objref = *$glob{FORMAT}) {
381			print $walkHandle "$objname (FORMAT):\n"
382			    if $banner;
383			concise_subref($order, $objref, $objname);
384		    }
385		}
386	    }
387	}
388	for my $pkg (@render_packs) {
389	    no strict 'refs';
390	    concise_stashref($order, \%{$pkg.'::'});
391	}
392
393	if (!@args or $do_main or @render_packs) {
394	    print $walkHandle "main program:\n" if $do_main;
395	    concise_main($order);
396	}
397	return @args;	# something
398    }
399}
400
401my %labels;
402my $lastnext;	# remembers op-chain, used to insert gotos
403
404my %opclass = ('OP' => "0", 'UNOP' => "1", 'BINOP' => "2", 'LOGOP' => "|",
405	       'LISTOP' => "@", 'PMOP' => "/", 'SVOP' => "\$", 'GVOP' => "*",
406	       'PVOP' => '"', 'LOOP' => "{", 'COP' => ";", 'PADOP' => "#",
407	       'METHOP' => '.', UNOP_AUX => '+');
408
409no warnings 'qw'; # "Possible attempt to put comments..."; use #7
410my @linenoise =
411  qw'#  () sc (  @? 1  $* gv *{ m$ m@ m% m? p/ *$ $  $# & a& pt \\ s\\ rf bl
412     `  *? <> ?? ?/ r/ c/ // qr s/ /c y/ =  @= C  sC Cp sp df un BM po +1 +I
413     -1 -I 1+ I+ 1- I- ** *  i* /  i/ %$ i% x  +  i+ -  i- .  "  << >> <  i<
414     >  i> <= i, >= i. == i= != i! <? i? s< s> s, s. s= s! s? b& b^ b| -0 -i
415     !  ~  a2 si cs rd sr e^ lg sq in %x %o ab le ss ve ix ri sf FL od ch cy
416     uf lf uc lc qm @  [f [  @[ eh vl ky dl ex %  ${ @{ uk pk st jn )  )[ a@
417     a% sl +] -] [- [+ so rv GS GW MS MW .. f. .f && || ^^ ?: &= |= -> s{ s}
418     v} ca wa di rs ;; ;  ;d }{ {  }  {} f{ it {l l} rt }l }n }r dm }g }e ^o
419     ^c ^| ^# um bm t~ u~ ~d DB db ^s se ^g ^r {w }w pf pr ^O ^K ^R ^W ^d ^v
420     ^e ^t ^k t. fc ic fl .s .p .b .c .l .a .h g1 s1 g2 s2 ?. l? -R -W -X -r
421     -w -x -e -o -O -z -s -M -A -C -S -c -b -f -d -p -l -u -g -k -t -T -B cd
422     co cr u. cm ut r. l@ s@ r@ mD uD oD rD tD sD wD cD f$ w$ p$ sh e$ k$ g3
423     g4 s4 g5 s5 T@ C@ L@ G@ A@ S@ Hg Hc Hr Hw Mg Mc Ms Mr Sg Sc So rq do {e
424     e} {t t} g6 G6 6e g7 G7 7e g8 G8 8e g9 G9 9e 6s 7s 8s 9s 6E 7E 8E 9E Pn
425     Pu GP SP EP Gn Gg GG SG EG g0 c$ lk t$ ;s n> // /= CO';
426
427my $chars = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
428
429sub op_flags { # common flags (see BASOP.op_flags in op.h)
430    my($x) = @_;
431    my(@v);
432    push @v, "v" if ($x & 3) == 1;
433    push @v, "s" if ($x & 3) == 2;
434    push @v, "l" if ($x & 3) == 3;
435    push @v, "K" if $x & 4;
436    push @v, "P" if $x & 8;
437    push @v, "R" if $x & 16;
438    push @v, "M" if $x & 32;
439    push @v, "S" if $x & 64;
440    push @v, "*" if $x & 128;
441    return join("", @v);
442}
443
444sub base_n {
445    my $x = shift;
446    return "-" . base_n(-$x) if $x < 0;
447    my $str = "";
448    do { $str .= substr($chars, $x % $base, 1) } while $x = int($x / $base);
449    $str = reverse $str if $big_endian;
450    return $str;
451}
452
453my %sequence_num;
454my $seq_max = 1;
455
456sub reset_sequence {
457    # reset the sequence
458    %sequence_num = ();
459    $seq_max = 1;
460    $lastnext = 0;
461}
462
463sub seq {
464    my($op) = @_;
465    return "-" if not exists $sequence_num{$$op};
466    return base_n($sequence_num{$$op});
467}
468
469sub walk_topdown {
470    my($op, $sub, $level) = @_;
471    $sub->($op, $level);
472    if ($op->flags & OPf_KIDS) {
473	for (my $kid = $op->first; $$kid; $kid = $kid->sibling) {
474	    walk_topdown($kid, $sub, $level + 1);
475	}
476    }
477    if (class($op) eq "PMOP") {
478	my $maybe_root = $op->code_list;
479	if ( ref($maybe_root) and $maybe_root->isa("B::OP")
480	 and not $op->flags & OPf_KIDS) {
481	    walk_topdown($maybe_root, $sub, $level + 1);
482	}
483	$maybe_root = $op->pmreplroot;
484	if (ref($maybe_root) and $maybe_root->isa("B::OP")) {
485	    # It really is the root of the replacement, not something
486	    # else stored here for lack of space elsewhere
487	    walk_topdown($maybe_root, $sub, $level + 1);
488	}
489    }
490}
491
492sub walklines {
493    my($ar, $level) = @_;
494    for my $l (@$ar) {
495	if (ref($l) eq "ARRAY") {
496	    walklines($l, $level + 1);
497	} else {
498	    $l->concise($level);
499	}
500    }
501}
502
503sub walk_exec {
504    my($top, $level) = @_;
505    my %opsseen;
506    my @lines;
507    my @todo = ([$top, \@lines]);
508    while (@todo and my($op, $targ) = @{shift @todo}) {
509	for (; $$op; $op = $op->next) {
510	    last if $opsseen{$$op}++;
511	    push @$targ, $op;
512	    my $name = $op->name;
513	    if (class($op) eq "LOGOP") {
514		my $ar = [];
515		push @$targ, $ar;
516		push @todo, [$op->other, $ar];
517	    } elsif ($name eq "subst" and $ {$op->pmreplstart}) {
518		my $ar = [];
519		push @$targ, $ar;
520		push @todo, [$op->pmreplstart, $ar];
521	    } elsif ($name =~ /^enter(loop|iter)$/) {
522		$labels{${$op->nextop}} = "NEXT";
523		$labels{${$op->lastop}} = "LAST";
524		$labels{${$op->redoop}} = "REDO";
525	    }
526	}
527    }
528    walklines(\@lines, 0);
529}
530
531# The structure of this routine is purposely modeled after op.c's peep()
532sub sequence {
533    my($op) = @_;
534    my $oldop = 0;
535    return if class($op) eq "NULL" or exists $sequence_num{$$op};
536    for (; $$op; $op = $op->next) {
537	last if exists $sequence_num{$$op};
538	my $name = $op->name;
539	$sequence_num{$$op} = $seq_max++;
540	if (class($op) eq "LOGOP") {
541	    sequence($op->other);
542	} elsif (class($op) eq "LOOP") {
543	    sequence($op->redoop);
544	    sequence( $op->nextop);
545	    sequence($op->lastop);
546	} elsif ($name eq "subst" and $ {$op->pmreplstart}) {
547	    sequence($op->pmreplstart);
548	}
549	$oldop = $op;
550    }
551}
552
553sub fmt_line {    # generate text-line for op.
554    my($hr, $op, $text, $level) = @_;
555
556    $_->($hr, $op, \$text, \$level, $stylename) for @callbacks;
557
558    return '' if $hr->{SKIP};	# suppress line if a callback said so
559    return '' if $hr->{goto} and $hr->{goto} eq '-';	# no goto nowhere
560
561    # spec: (?(text1#varText2)?)
562    $text =~ s/\(\?\(([^\#]*?)\#(\w+)([^\#]*?)\)\?\)/
563	$hr->{$2} ? $1.$hr->{$2}.$3 : ""/eg;
564
565    # spec: (x(exec_text;basic_text)x)
566    $text =~ s/\(x\((.*?);(.*?)\)x\)/$order eq "exec" ? $1 : $2/egs;
567
568    # spec: (*(text)*)
569    $text =~ s/\(\*\(([^;]*?)\)\*\)/$1 x $level/egs;
570
571    # spec: (*(text1;text2)*)
572    $text =~ s/\(\*\((.*?);(.*?)\)\*\)/$1 x ($level - 1) . $2 x ($level>0)/egs;
573
574    # convert #Var to tag=>val form: Var\t#var
575    $text =~ s/\#([A-Z][a-z]+)(\d+)?/\t\u$1\t\L#$1$2/gs;
576
577    # spec: #varN
578    $text =~ s/\#([a-zA-Z]+)(\d+)/sprintf("%-$2s", $hr->{$1})/eg;
579
580    $text =~ s/\#([a-zA-Z]+)/$hr->{$1}/eg;	# populate #var's
581    $text =~ s/[ \t]*~+[ \t]*/ /g;		# squeeze tildes
582
583    $text = "# $hr->{src}\n$text" if $show_src and $hr->{src};
584
585    chomp $text;
586    return "$text\n" if $text ne "" and $order ne "tree";
587    return $text; # suppress empty lines
588}
589
590
591
592# use require rather than use here to avoid disturbing tests that dump
593# BEGIN blocks
594require B::Op_private;
595
596
597
598our %hints; # used to display each COP's op_hints values
599
600# strict refs, subs, vars
601@hints{0x2,0x200,0x400,0x20,0x40,0x80} = ('$', '&', '*', 'x$', 'x&', 'x*');
602# integers, locale, bytes
603@hints{0x1,0x4,0x8,0x10} = ('i', 'l', 'b');
604# block scope, localise %^H, $^OPEN (in), $^OPEN (out)
605@hints{0x100,0x20000,0x40000,0x80000} = ('{','%','<','>');
606# overload new integer, float, binary, string, re
607@hints{0x1000,0x2000,0x4000,0x8000,0x10000} = ('I', 'F', 'B', 'S', 'R');
608# taint and eval
609@hints{0x100000,0x200000} = ('T', 'E');
610# filetest access, use utf8, unicode_strings feature
611@hints{0x400000,0x800000,0x800} = ('X', 'U', 'us');
612
613# pick up the feature hints constants.
614# Note that we're relying on non-API parts of feature.pm,
615# but its less naughty than just blindly copying those constants into
616# this src file.
617#
618require feature;
619
620sub hints_flags {
621    my($x) = @_;
622    my @s;
623    for my $flag (sort {$b <=> $a} keys %hints) {
624	if ($hints{$flag} and $x & $flag and $x >= $flag) {
625	    $x -= $flag;
626	    push @s, $hints{$flag};
627	}
628    }
629    if ($x & $feature::hint_mask) {
630        push @s, "fea=" . (($x & $feature::hint_mask) >> $feature::hint_shift);
631        $x &= ~$feature::hint_mask;
632    }
633    push @s, sprintf "0x%x", $x if $x;
634    return join(",", @s);
635}
636
637
638# return a string like 'LVINTRO,1' for the op $name with op_private
639# value $x
640
641sub private_flags {
642    my($name, $x) = @_;
643    my $entry = $B::Op_private::bits{$name};
644    return $x ? "$x" : '' unless $entry;
645
646    my @flags;
647    my $bit;
648    for ($bit = 7; $bit >= 0; $bit--) {
649        next unless exists $entry->{$bit};
650        my $e = $entry->{$bit};
651        if (ref($e) eq 'HASH') {
652            # bit field
653
654            my ($bitmin, $bitmax, $bitmask, $enum, $label) =
655                    @{$e}{qw(bitmin bitmax bitmask enum label)};
656            $bit = $bitmin;
657            next if defined $label && $label eq '-'; # display as raw number
658
659            my $val = $x & $bitmask;
660            $x &= ~$bitmask;
661            $val >>= $bitmin;
662
663            if (defined $enum) {
664                # try to convert numeric $val into symbolic
665                my @enum = @$enum;
666                while (@enum) {
667                    my $ix    = shift @enum;
668                    my $name  = shift @enum;
669                    my $label = shift @enum;
670                    if ($val == $ix) {
671                        $val = $label;
672                        last;
673                    }
674                }
675            }
676            next if $val eq '0'; # don't display anonymous zero values
677            push @flags, defined $label ? "$label=$val" : $val;
678
679        }
680        else {
681            # flag bit
682            my $label = $B::Op_private::labels{$e};
683            next if defined $label && $label eq '-'; # display as raw number
684            if ($x & (1<<$bit)) {
685                $x -= (1<<$bit);
686                push @flags, $label;
687            }
688        }
689    }
690
691    push @flags, $x if $x; # display unknown bits numerically
692    return join ",", @flags;
693}
694
695sub concise_sv {
696    my($sv, $hr, $preferpv) = @_;
697    $hr->{svclass} = class($sv);
698    $hr->{svclass} = "UV"
699      if $hr->{svclass} eq "IV" and $sv->FLAGS & SVf_IVisUV;
700    Carp::cluck("bad concise_sv: $sv") unless $sv and $$sv;
701    $hr->{svaddr} = sprintf("%#x", $$sv);
702    if ($hr->{svclass} eq "GV" && $sv->isGV_with_GP()) {
703	my $gv = $sv;
704	my $stash = $gv->STASH;
705	if (class($stash) eq "SPECIAL") {
706	    $stash = "<none>";
707	}
708	else {
709	    $stash = $stash->NAME;
710	}
711	if ($stash eq "main") {
712	    $stash = "";
713	} else {
714	    $stash = $stash . "::";
715	}
716	$hr->{svval} = "*$stash" . $gv->SAFENAME;
717	return "*$stash" . $gv->SAFENAME;
718    } else {
719	while (class($sv) eq "IV" && $sv->FLAGS & SVf_ROK) {
720	    $hr->{svval} .= "\\";
721	    $sv = $sv->RV;
722	}
723	if (class($sv) eq "SPECIAL") {
724	    $hr->{svval} .= ["Null", "sv_undef", "sv_yes", "sv_no",
725                             '', '', '', "sv_zero"]->[$$sv];
726	} elsif ($preferpv
727	      && ($sv->FLAGS & SVf_POK)) {
728	    $hr->{svval} .= cstring($sv->PV);
729	} elsif ($sv->FLAGS & SVf_NOK) {
730	    $hr->{svval} .= $sv->NV;
731	} elsif ($sv->FLAGS & SVf_IOK) {
732	    $hr->{svval} .= $sv->int_value;
733	} elsif ($sv->FLAGS & SVf_POK) {
734	    $hr->{svval} .= cstring($sv->PV);
735	} elsif (class($sv) eq "HV") {
736	    $hr->{svval} .= 'HASH';
737	} elsif (class($sv) eq "AV") {
738	    $hr->{svval} .= 'ARRAY';
739	} elsif (class($sv) eq "CV") {
740	    if ($sv->CvFLAGS & CVf_ANON) {
741		$hr->{svval} .= 'CODE';
742	    } elsif ($sv->CvFLAGS & CVf_NAMED) {
743		$hr->{svval} .= "&";
744		unless ($sv->CvFLAGS & CVf_LEXICAL) {
745		    my $stash = $sv->STASH;
746		    unless (class($stash) eq "SPECIAL") {
747			$hr->{svval} .= $stash->NAME . "::";
748		    }
749		}
750		$hr->{svval} .= $sv->NAME_HEK;
751	    } else {
752		$hr->{svval} .= "&";
753		$sv = $sv->GV;
754		my $stash = $sv->STASH;
755		unless (class($stash) eq "SPECIAL") {
756		    $hr->{svval} .= $stash->NAME . "::";
757		}
758		$hr->{svval} .= $sv->SAFENAME;
759	    }
760	}
761
762	$hr->{svval} = 'undef' unless defined $hr->{svval};
763	my $out = $hr->{svclass};
764	return $out .= " $hr->{svval}" ;
765    }
766}
767
768my %srclines;
769
770sub fill_srclines {
771    my $fullnm = shift;
772    if ($fullnm eq '-e') {
773	$srclines{$fullnm} = [ $fullnm, "-src not supported for -e" ];
774	return;
775    }
776    open (my $fh, '<', $fullnm)
777	or warn "# $fullnm: $!, (chdirs not supported by this feature yet)\n"
778	and return;
779    my @l = <$fh>;
780    chomp @l;
781    unshift @l, $fullnm; # like @{_<$fullnm} in debug, array starts at 1
782    $srclines{$fullnm} = \@l;
783}
784
785# Given a pad target, return the pad var's name and cop range /
786# fakeness, or failing that, its target number.
787# e.g.
788#   ('$i', '$i:5,7')
789# or
790#   ('$i', '$i:fake:a')
791# or
792#   ('t5', 't5')
793
794sub padname {
795    my ($targ) = @_;
796
797    my ($targarg, $targarglife);
798    my $padname = (($curcv->PADLIST->ARRAY)[0]->ARRAY)[$targ];
799    if (defined $padname and class($padname) ne "SPECIAL" and
800        $padname->LEN)
801    {
802        $targarg  = $padname->PVX;
803        if ($padname->FLAGS & SVf_FAKE) {
804            # These changes relate to the jumbo closure fix.
805            # See changes 19939 and 20005
806            my $fake = '';
807            $fake .= 'a'
808                if $padname->PARENT_FAKELEX_FLAGS & PAD_FAKELEX_ANON;
809            $fake .= 'm'
810                if $padname->PARENT_FAKELEX_FLAGS & PAD_FAKELEX_MULTI;
811            $fake .= ':' . $padname->PARENT_PAD_INDEX
812                if $curcv->CvFLAGS & CVf_ANON;
813            $targarglife = "$targarg:FAKE:$fake";
814        }
815        else {
816            my $intro = $padname->COP_SEQ_RANGE_LOW - $cop_seq_base;
817            my $finish = int($padname->COP_SEQ_RANGE_HIGH) - $cop_seq_base;
818            $finish = "end" if $finish == 999999999 - $cop_seq_base;
819            $targarglife = "$targarg:$intro,$finish";
820        }
821    } else {
822        $targarglife = $targarg = "t" . $targ;
823    }
824    return $targarg, $targarglife;
825}
826
827
828
829sub concise_op {
830    my ($op, $level, $format) = @_;
831    my %h;
832    $h{exname} = $h{name} = $op->name;
833    $h{NAME} = uc $h{name};
834    $h{class} = class($op);
835    $h{extarg} = $h{targ} = $op->targ;
836    $h{extarg} = "" unless $h{extarg};
837    $h{privval} = $op->private;
838    # for null ops, targ holds the old type
839    my $origname = $h{name} eq "null" && $h{targ}
840      ? substr(ppname($h{targ}), 3)
841      : $h{name};
842    $h{private} = private_flags($origname, $op->private);
843    if ($op->folded) {
844      $h{private} &&= "$h{private},";
845      $h{private} .= "FOLD";
846    }
847
848    if ($h{name} ne $origname) { # a null op
849	$h{exname} = "ex-$origname";
850	$h{extarg} = "";
851    } elsif ($h{private} =~ /\bREFC\b/) {
852	# targ holds a reference count
853        my $refs = "ref" . ($h{targ} != 1 ? "s" : "");
854        $h{targarglife} = $h{targarg} = "$h{targ} $refs";
855    } elsif ($h{targ} && $h{name} ne 'iter') {
856        # for my ($q, $r, $s) () {} syntax hijacks the targ of the iter op,
857        # (which is the ->next of the enteriter) hence the special cases above
858        # and just below:
859	my $count = $h{name} eq 'padrange'
860            ? ($op->private & $B::Op_private::defines{'OPpPADRANGE_COUNTMASK'})
861            : $h{name} eq 'enteriter'
862            ? $op->next->targ + 1
863            : 1;
864	my (@targarg, @targarglife);
865	for my $i (0..$count-1) {
866	    my ($targarg, $targarglife) = padname($h{targ} + $i);
867	    push @targarg,     $targarg;
868	    push @targarglife, $targarglife;
869	}
870	$h{targarg}     = join '; ', @targarg;
871	$h{targarglife} = join '; ', @targarglife;
872    }
873
874    $h{arg} = "";
875    $h{svclass} = $h{svaddr} = $h{svval} = "";
876    if ($h{class} eq "PMOP") {
877	my $extra = '';
878	my $precomp = $op->precomp;
879	if (defined $precomp) {
880	    $precomp = cstring($precomp); # Escape literal control sequences
881 	    $precomp = "/$precomp/";
882	} else {
883	    $precomp = "";
884	}
885	if ($op->name eq 'subst') {
886	    if (class($op->pmreplstart) ne "NULL") {
887		undef $lastnext;
888		$extra = " replstart->" . seq($op->pmreplstart);
889	    }
890	}
891	elsif ($op->name eq 'split') {
892            if (    ($op->private & OPpSPLIT_ASSIGN) # @array  = split
893                 && (not $op->flags & OPf_STACKED))  # @{expr} = split
894            {
895                # with C<@array = split(/pat/, str);>,
896                #  array is stored in /pat/'s pmreplroot; either
897                # as an integer index into the pad (for a lexical array)
898                # or as GV for a package array (which will be a pad index
899                # on threaded builds)
900
901                if ($op->private & $B::Op_private::defines{'OPpSPLIT_LEX'}) {
902                    my $off = $op->pmreplroot; # union with op_pmtargetoff
903                    my ($name, $full) = padname($off);
904                    $extra = " => $full";
905                }
906                else {
907                    # union with op_pmtargetoff, op_pmtargetgv
908                    my $gv = $op->pmreplroot;
909                    if (!ref($gv)) {
910                        # the value is actually a pad offset
911                        $gv = (($curcv->PADLIST->ARRAY)[1]->ARRAY)[$gv]->NAME;
912                    }
913                    else {
914                        # unthreaded: its a GV
915                        $gv = $gv->NAME;
916                    }
917                    $extra = " => \@$gv";
918                }
919            }
920	}
921	$h{arg} = "($precomp$extra)";
922    } elsif ($h{class} eq "PVOP" and $h{name} !~ '^transr?\z') {
923	$h{arg} = '("' . $op->pv . '")';
924	$h{svval} = '"' . $op->pv . '"';
925    } elsif ($h{class} eq "COP") {
926	my $label = $op->label;
927	$h{coplabel} = $label;
928	$label = $label ? "$label: " : "";
929	my $loc = $op->file;
930	my $pathnm = $loc;
931	$loc =~ s[.*/][];
932	my $ln = $op->line;
933	$loc .= ":$ln";
934	my($stash, $cseq) = ($op->stash->NAME, $op->cop_seq - $cop_seq_base);
935	$h{arg} = "($label$stash $cseq $loc)";
936	if ($show_src) {
937	    fill_srclines($pathnm) unless exists $srclines{$pathnm};
938	    my $line = $srclines{$pathnm}[$ln] // "-src unavailable under -e";
939	    $h{src} = "$ln: $line";
940	}
941    } elsif ($h{class} eq "LOOP") {
942	$h{arg} = "(next->" . seq($op->nextop) . " last->" . seq($op->lastop)
943	  . " redo->" . seq($op->redoop) . ")";
944    } elsif ($h{class} eq "LOGOP") {
945	undef $lastnext;
946	$h{arg} = "(other->" . seq($op->other) . ")";
947	$h{otheraddr} = sprintf("%#x", $ {$op->other});
948        if ($h{name} eq "argdefelem") {
949            # targ used for element index
950            $h{targarglife} = $h{targarg} = "";
951            $h{arg} .= "[" . $op->targ . "]";
952        }
953    }
954    elsif ($h{class} eq "SVOP" or $h{class} eq "PADOP") {
955	unless ($h{name} eq 'aelemfast' and $op->flags & OPf_SPECIAL) {
956	    my $idx = ($h{class} eq "SVOP") ? $op->targ : $op->padix;
957	    if ($h{class} eq "PADOP" or !${$op->sv}) {
958		my $sv = (($curcv->PADLIST->ARRAY)[1]->ARRAY)[$idx];
959		$h{arg} = "[" . concise_sv($sv, \%h, 0) . "]";
960		$h{targarglife} = $h{targarg} = "";
961	    } else {
962		$h{arg} = "(" . concise_sv($op->sv, \%h, 0) . ")";
963	    }
964	}
965    }
966    elsif ($h{class} eq "METHOP") {
967        my $prefix = '';
968        if ($h{name} eq 'method_redir' or $h{name} eq 'method_redir_super') {
969            my $rclass_sv = $op->rclass;
970            $rclass_sv = (($curcv->PADLIST->ARRAY)[1]->ARRAY)[$rclass_sv]
971                unless ref $rclass_sv;
972            $prefix .= 'PACKAGE "'.$rclass_sv->PV.'", ';
973        }
974        if ($h{name} ne "method") {
975            if (${$op->meth_sv}) {
976                $h{arg} = "($prefix" . concise_sv($op->meth_sv, \%h, 1) . ")";
977            } else {
978                my $sv = (($curcv->PADLIST->ARRAY)[1]->ARRAY)[$op->targ];
979                $h{arg} = "[$prefix" . concise_sv($sv, \%h, 1) . "]";
980                $h{targarglife} = $h{targarg} = "";
981            }
982        }
983    }
984    elsif ($h{class} eq "UNOP_AUX") {
985        $h{arg} = "(" . $op->string($curcv) . ")";
986    }
987
988    $h{seq} = $h{hyphseq} = seq($op);
989    $h{seq} = "" if $h{seq} eq "-";
990    $h{opt} = $op->opt;
991    $h{label} = $labels{$$op};
992    $h{next} = $op->next;
993    $h{next} = (class($h{next}) eq "NULL") ? "(end)" : seq($h{next});
994    $h{nextaddr} = sprintf("%#x", $ {$op->next});
995    $h{sibaddr} = sprintf("%#x", $ {$op->sibling});
996    $h{firstaddr} = sprintf("%#x", $ {$op->first}) if $op->can("first");
997    $h{lastaddr} = sprintf("%#x", $ {$op->last}) if $op->can("last");
998
999    $h{classsym} = $opclass{$h{class}};
1000    $h{flagval} = $op->flags;
1001    $h{flags} = op_flags($op->flags);
1002    if ($op->can("hints")) {
1003      $h{hintsval} = $op->hints;
1004      $h{hints} = hints_flags($h{hintsval});
1005    } else {
1006      $h{hintsval} = $h{hints} = '';
1007    }
1008    $h{addr} = sprintf("%#x", $$op);
1009    $h{typenum} = $op->type;
1010    $h{noise} = $linenoise[$op->type];
1011
1012    return fmt_line(\%h, $op, $format, $level);
1013}
1014
1015sub B::OP::concise {
1016    my($op, $level) = @_;
1017    if ($order eq "exec" and $lastnext and $$lastnext != $$op) {
1018	# insert a 'goto' line
1019	my $synth = {"seq" => seq($lastnext), "class" => class($lastnext),
1020		     "addr" => sprintf("%#x", $$lastnext),
1021		     "goto" => seq($lastnext), # simplify goto '-' removal
1022	     };
1023	print $walkHandle fmt_line($synth, $op, $gotofmt, $level+1);
1024    }
1025    $lastnext = $op->next;
1026    print $walkHandle concise_op($op, $level, $format);
1027}
1028
1029# B::OP::terse (see Terse.pm) now just calls this
1030sub b_terse {
1031    my($op, $level) = @_;
1032
1033    # This isn't necessarily right, but there's no easy way to get
1034    # from an OP to the right CV. This is a limitation of the
1035    # ->terse() interface style, and there isn't much to do about
1036    # it. In particular, we can die in concise_op if the main pad
1037    # isn't long enough, or has the wrong kind of entries, compared to
1038    # the pad a sub was compiled with. The fix for that would be to
1039    # make a backwards compatible "terse" format that never even
1040    # looked at the pad, just like the old B::Terse. I don't think
1041    # that's worth the effort, though.
1042    $curcv = main_cv unless $curcv;
1043
1044    if ($order eq "exec" and $lastnext and $$lastnext != $$op) {
1045	# insert a 'goto'
1046	my $h = {"seq" => seq($lastnext), "class" => class($lastnext),
1047		 "addr" => sprintf("%#x", $$lastnext)};
1048	print # $walkHandle
1049	    fmt_line($h, $op, $style{"terse"}[1], $level+1);
1050    }
1051    $lastnext = $op->next;
1052    print # $walkHandle
1053	concise_op($op, $level, $style{"terse"}[0]);
1054}
1055
1056sub tree {
1057    my $op = shift;
1058    my $level = shift;
1059    my $style = $tree_decorations[$tree_style];
1060    my($space, $single, $kids, $kid, $nokid, $last, $lead, $size) = @$style;
1061    my $name = concise_op($op, $level, $treefmt);
1062    if (not $op->flags & OPf_KIDS) {
1063	return $name . "\n";
1064    }
1065    my @lines;
1066    for (my $kid = $op->first; $$kid; $kid = $kid->sibling) {
1067	push @lines, tree($kid, $level+1);
1068    }
1069    my $i;
1070    for ($i = $#lines; substr($lines[$i], 0, 1) eq " "; $i--) {
1071	$lines[$i] = $space . $lines[$i];
1072    }
1073    if ($i > 0) {
1074	$lines[$i] = $last . $lines[$i];
1075	while ($i-- > 1) {
1076	    if (substr($lines[$i], 0, 1) eq " ") {
1077		$lines[$i] = $nokid . $lines[$i];
1078	    } else {
1079		$lines[$i] = $kid . $lines[$i];
1080	    }
1081	}
1082	$lines[$i] = $kids . $lines[$i];
1083    } else {
1084	$lines[0] = $single . $lines[0];
1085    }
1086    return("$name$lead" . shift @lines,
1087           map(" " x (length($name)+$size) . $_, @lines));
1088}
1089
1090# *** Warning: fragile kludge ahead ***
1091# Because the B::* modules run in the same interpreter as the code
1092# they're compiling, their presence tends to distort the view we have of
1093# the code we're looking at. In particular, perl gives sequence numbers
1094# to COPs. If the program we're looking at were run on its own, this
1095# would start at 1. Because all of B::Concise and all the modules it
1096# uses are compiled first, though, by the time we get to the user's
1097# program the sequence number is already pretty high, which could be
1098# distracting if you're trying to tell OPs apart. Therefore we'd like to
1099# subtract an offset from all the sequence numbers we display, to
1100# restore the simpler view of the world. The trick is to know what that
1101# offset will be, when we're still compiling B::Concise!  If we
1102# hardcoded a value, it would have to change every time B::Concise or
1103# other modules we use do. To help a little, what we do here is compile
1104# a little code at the end of the module, and compute the base sequence
1105# number for the user's program as being a small offset later, so all we
1106# have to worry about are changes in the offset.
1107
1108# When you say "perl -MO=Concise -e '$a'", the output should look like:
1109
1110# 4  <@> leave[t1] vKP/REFC ->(end)
1111# 1     <0> enter ->2
1112 #^ smallest OP sequence number should be 1
1113# 2     <;> nextstate(main 1 -e:1) v ->3
1114 #                         ^ smallest COP sequence number should be 1
1115# -     <1> ex-rv2sv vK/1 ->4
1116# 3        <$> gvsv(*a) s ->4
1117
1118# If the second of the marked numbers there isn't 1, it means you need
1119# to update the corresponding magic number in the next line.
1120# Remember, this needs to stay the last things in the module.
1121
1122my $cop_seq_mnum = 12;
1123$cop_seq_base = svref_2object(eval 'sub{0;}')->START->cop_seq + $cop_seq_mnum;
1124
11251;
1126
1127__END__
1128
1129=head1 NAME
1130
1131B::Concise - Walk Perl syntax tree, printing concise info about ops
1132
1133=head1 SYNOPSIS
1134
1135    perl -MO=Concise[,OPTIONS] foo.pl
1136
1137    use B::Concise qw(set_style add_callback);
1138
1139=head1 DESCRIPTION
1140
1141This compiler backend prints the internal OPs of a Perl program's syntax
1142tree in one of several space-efficient text formats suitable for debugging
1143the inner workings of perl or other compiler backends. It can print OPs in
1144the order they appear in the OP tree, in the order they will execute, or
1145in a text approximation to their tree structure, and the format of the
1146information displayed is customizable. Its function is similar to that of
1147perl's B<-Dx> debugging flag or the B<B::Terse> module, but it is more
1148sophisticated and flexible.
1149
1150=head1 EXAMPLE
1151
1152Here's two outputs (or 'renderings'), using the -exec and -basic
1153(i.e. default) formatting conventions on the same code snippet.
1154
1155    % perl -MO=Concise,-exec -e '$a = $b + 42'
1156    1  <0> enter
1157    2  <;> nextstate(main 1 -e:1) v
1158    3  <#> gvsv[*b] s
1159    4  <$> const[IV 42] s
1160 *  5  <2> add[t3] sK/2
1161    6  <#> gvsv[*a] s
1162    7  <2> sassign vKS/2
1163    8  <@> leave[1 ref] vKP/REFC
1164
1165In this -exec rendering, each opcode is executed in the order shown.
1166The add opcode, marked with '*', is discussed in more detail.
1167
1168The 1st column is the op's sequence number, starting at 1, and is
1169displayed in base 36 by default.  Here they're purely linear; the
1170sequences are very helpful when looking at code with loops and
1171branches.
1172
1173The symbol between angle brackets indicates the op's type, for
1174example; <2> is a BINOP, <@> a LISTOP, and <#> is a PADOP, which is
1175used in threaded perls. (see L</"OP class abbreviations">).
1176
1177The opname, as in B<'add[t1]'>, may be followed by op-specific
1178information in parentheses or brackets (ex B<'[t1]'>).
1179
1180The op-flags (ex B<'sK/2'>) are described in (L</"OP flags
1181abbreviations">).
1182
1183    % perl -MO=Concise -e '$a = $b + 42'
1184    8  <@> leave[1 ref] vKP/REFC ->(end)
1185    1     <0> enter ->2
1186    2     <;> nextstate(main 1 -e:1) v ->3
1187    7     <2> sassign vKS/2 ->8
1188 *  5        <2> add[t1] sK/2 ->6
1189    -           <1> ex-rv2sv sK/1 ->4
1190    3              <$> gvsv(*b) s ->4
1191    4           <$> const(IV 42) s ->5
1192    -        <1> ex-rv2sv sKRM*/1 ->7
1193    6           <$> gvsv(*a) s ->7
1194
1195The default rendering is top-down, so they're not in execution order.
1196This form reflects the way the stack is used to parse and evaluate
1197expressions; the add operates on the two terms below it in the tree.
1198
1199Nullops appear as C<ex-opname>, where I<opname> is an op that has been
1200optimized away by perl.  They're displayed with a sequence-number of
1201'-', because they are not executed (they don't appear in previous
1202example), they're printed here because they reflect the parse.
1203
1204The arrow points to the sequence number of the next op; they're not
1205displayed in -exec mode, for obvious reasons.
1206
1207Note that because this rendering was done on a non-threaded perl, the
1208PADOPs in the previous examples are now SVOPs, and some (but not all)
1209of the square brackets have been replaced by round ones.  This is a
1210subtle feature to provide some visual distinction between renderings
1211on threaded and un-threaded perls.
1212
1213
1214=head1 OPTIONS
1215
1216Arguments that don't start with a hyphen are taken to be the names of
1217subroutines or formats to render; if no
1218such functions are specified, the main
1219body of the program (outside any subroutines, and not including use'd
1220or require'd files) is rendered.  Passing C<BEGIN>, C<UNITCHECK>,
1221C<CHECK>, C<INIT>, or C<END> will cause all of the corresponding
1222special blocks to be printed.  Arguments must follow options.
1223
1224Options affect how things are rendered (ie printed).  They're presented
1225here by their visual effect, 1st being strongest.  They're grouped
1226according to how they interrelate; within each group the options are
1227mutually exclusive (unless otherwise stated).
1228
1229=head2 Options for Opcode Ordering
1230
1231These options control the 'vertical display' of opcodes.  The display
1232'order' is also called 'mode' elsewhere in this document.
1233
1234=over 4
1235
1236=item B<-basic>
1237
1238Print OPs in the order they appear in the OP tree (a preorder
1239traversal, starting at the root). The indentation of each OP shows its
1240level in the tree, and the '->' at the end of the line indicates the
1241next opcode in execution order.  This mode is the default, so the flag
1242is included simply for completeness.
1243
1244=item B<-exec>
1245
1246Print OPs in the order they would normally execute (for the majority
1247of constructs this is a postorder traversal of the tree, ending at the
1248root). In most cases the OP that usually follows a given OP will
1249appear directly below it; alternate paths are shown by indentation. In
1250cases like loops when control jumps out of a linear path, a 'goto'
1251line is generated.
1252
1253=item B<-tree>
1254
1255Print OPs in a text approximation of a tree, with the root of the tree
1256at the left and 'left-to-right' order of children transformed into
1257'top-to-bottom'. Because this mode grows both to the right and down,
1258it isn't suitable for large programs (unless you have a very wide
1259terminal).
1260
1261=back
1262
1263=head2 Options for Line-Style
1264
1265These options select the line-style (or just style) used to render
1266each opcode, and dictates what info is actually printed into each line.
1267
1268=over 4
1269
1270=item B<-concise>
1271
1272Use the author's favorite set of formatting conventions. This is the
1273default, of course.
1274
1275=item B<-terse>
1276
1277Use formatting conventions that emulate the output of B<B::Terse>. The
1278basic mode is almost indistinguishable from the real B<B::Terse>, and the
1279exec mode looks very similar, but is in a more logical order and lacks
1280curly brackets. B<B::Terse> doesn't have a tree mode, so the tree mode
1281is only vaguely reminiscent of B<B::Terse>.
1282
1283=item B<-linenoise>
1284
1285Use formatting conventions in which the name of each OP, rather than being
1286written out in full, is represented by a one- or two-character abbreviation.
1287This is mainly a joke.
1288
1289=item B<-debug>
1290
1291Use formatting conventions reminiscent of CPAN module B<B::Debug>; these aren't
1292very concise at all.
1293
1294=item B<-env>
1295
1296Use formatting conventions read from the environment variables
1297C<B_CONCISE_FORMAT>, C<B_CONCISE_GOTO_FORMAT>, and C<B_CONCISE_TREE_FORMAT>.
1298
1299=back
1300
1301=head2 Options for tree-specific formatting
1302
1303=over 4
1304
1305=item B<-compact>
1306
1307Use a tree format in which the minimum amount of space is used for the
1308lines connecting nodes (one character in most cases). This squeezes out
1309a few precious columns of screen real estate.
1310
1311=item B<-loose>
1312
1313Use a tree format that uses longer edges to separate OP nodes. This format
1314tends to look better than the compact one, especially in ASCII, and is
1315the default.
1316
1317=item B<-vt>
1318
1319Use tree connecting characters drawn from the VT100 line-drawing set.
1320This looks better if your terminal supports it.
1321
1322=item B<-ascii>
1323
1324Draw the tree with standard ASCII characters like C<+> and C<|>. These don't
1325look as clean as the VT100 characters, but they'll work with almost any
1326terminal (or the horizontal scrolling mode of less(1)) and are suitable
1327for text documentation or email. This is the default.
1328
1329=back
1330
1331These are pairwise exclusive, i.e. compact or loose, vt or ascii.
1332
1333=head2 Options controlling sequence numbering
1334
1335=over 4
1336
1337=item B<-base>I<n>
1338
1339Print OP sequence numbers in base I<n>. If I<n> is greater than 10, the
1340digit for 11 will be 'a', and so on. If I<n> is greater than 36, the digit
1341for 37 will be 'A', and so on until 62. Values greater than 62 are not
1342currently supported. The default is 36.
1343
1344=item B<-bigendian>
1345
1346Print sequence numbers with the most significant digit first. This is the
1347usual convention for Arabic numerals, and the default.
1348
1349=item B<-littleendian>
1350
1351Print sequence numbers with the least significant digit first.  This is
1352obviously mutually exclusive with bigendian.
1353
1354=back
1355
1356=head2 Other options
1357
1358=over 4
1359
1360=item B<-src>
1361
1362With this option, the rendering of each statement (starting with the
1363nextstate OP) will be preceded by the 1st line of source code that
1364generates it.  For example:
1365
1366    1  <0> enter
1367    # 1: my $i;
1368    2  <;> nextstate(main 1 junk.pl:1) v:{
1369    3  <0> padsv[$i:1,10] vM/LVINTRO
1370    # 3: for $i (0..9) {
1371    4  <;> nextstate(main 3 junk.pl:3) v:{
1372    5  <0> pushmark s
1373    6  <$> const[IV 0] s
1374    7  <$> const[IV 9] s
1375    8  <{> enteriter(next->j last->m redo->9)[$i:1,10] lKS
1376    k  <0> iter s
1377    l  <|> and(other->9) vK/1
1378    # 4:     print "line ";
1379    9      <;> nextstate(main 2 junk.pl:4) v
1380    a      <0> pushmark s
1381    b      <$> const[PV "line "] s
1382    c      <@> print vK
1383    # 5:     print "$i\n";
1384    ...
1385
1386=item B<-stash="somepackage">
1387
1388With this, "somepackage" will be required, then the stash is
1389inspected, and each function is rendered.
1390
1391=back
1392
1393The following options are pairwise exclusive.
1394
1395=over 4
1396
1397=item B<-main>
1398
1399Include the main program in the output, even if subroutines were also
1400specified.  This rendering is normally suppressed when a subroutine
1401name or reference is given.
1402
1403=item B<-nomain>
1404
1405This restores the default behavior after you've changed it with '-main'
1406(it's not normally needed).  If no subroutine name/ref is given, main is
1407rendered, regardless of this flag.
1408
1409=item B<-nobanner>
1410
1411Renderings usually include a banner line identifying the function name
1412or stringified subref.  This suppresses the printing of the banner.
1413
1414TBC: Remove the stringified coderef; while it provides a 'cookie' for
1415each function rendered, the cookies used should be 1,2,3.. not a
1416random hex-address.  It also complicates string comparison of two
1417different trees.
1418
1419=item B<-banner>
1420
1421restores default banner behavior.
1422
1423=item B<-banneris> => subref
1424
1425TBC: a hookpoint (and an option to set it) for a user-supplied
1426function to produce a banner appropriate for users needs.  It's not
1427ideal, because the rendering-state variables, which are a natural
1428candidate for use in concise.t, are unavailable to the user.
1429
1430=back
1431
1432=head2 Option Stickiness
1433
1434If you invoke Concise more than once in a program, you should know that
1435the options are 'sticky'.  This means that the options you provide in
1436the first call will be remembered for the 2nd call, unless you
1437re-specify or change them.
1438
1439=head1 ABBREVIATIONS
1440
1441The concise style uses symbols to convey maximum info with minimal
1442clutter (like hex addresses).  With just a little practice, you can
1443start to see the flowers, not just the branches, in the trees.
1444
1445=head2 OP class abbreviations
1446
1447These symbols appear before the op-name, and indicate the
1448B:: namespace that represents the ops in your Perl code.
1449
1450    0      OP (aka BASEOP)  An OP with no children
1451    1      UNOP             An OP with one child
1452    +      UNOP_AUX         A UNOP with auxillary fields
1453    2      BINOP            An OP with two children
1454    |      LOGOP            A control branch OP
1455    @      LISTOP           An OP that could have lots of children
1456    /      PMOP             An OP with a regular expression
1457    $      SVOP             An OP with an SV
1458    "      PVOP             An OP with a string
1459    {      LOOP             An OP that holds pointers for a loop
1460    ;      COP              An OP that marks the start of a statement
1461    #      PADOP            An OP with a GV on the pad
1462    .      METHOP           An OP with method call info
1463
1464=head2 OP flags abbreviations
1465
1466OP flags are either public or private.  The public flags alter the
1467behavior of each opcode in consistent ways, and are represented by 0
1468or more single characters.
1469
1470    v      OPf_WANT_VOID    Want nothing (void context)
1471    s      OPf_WANT_SCALAR  Want single value (scalar context)
1472    l      OPf_WANT_LIST    Want list of any length (list context)
1473                            Want is unknown
1474    K      OPf_KIDS         There is a firstborn child.
1475    P      OPf_PARENS       This operator was parenthesized.
1476                             (Or block needs explicit scope entry.)
1477    R      OPf_REF          Certified reference.
1478                             (Return container, not containee).
1479    M      OPf_MOD          Will modify (lvalue).
1480    S      OPf_STACKED      Some arg is arriving on the stack.
1481    *      OPf_SPECIAL      Do something weird for this op (see op.h)
1482
1483Private flags, if any are set for an opcode, are displayed after a '/'
1484
1485    8  <@> leave[1 ref] vKP/REFC ->(end)
1486    7     <2> sassign vKS/2 ->8
1487
1488They're opcode specific, and occur less often than the public ones, so
1489they're represented by short mnemonics instead of single-chars; see
1490B::Op_private and F<regen/op_private> for more details.
1491
1492Note that a number after a '/' often indicates the number of arguments.
1493In the I<sassign> example above, the OP takes 2 arguments. These values
1494are sometimes used at runtime: in particular, the MAXARG macro makes use
1495of them.
1496
1497=head1 FORMATTING SPECIFICATIONS
1498
1499For each line-style ('concise', 'terse', 'linenoise', etc.) there are
15003 format-specs which control how OPs are rendered.
1501
1502The first is the 'default' format, which is used in both basic and exec
1503modes to print all opcodes.  The 2nd, goto-format, is used in exec
1504mode when branches are encountered.  They're not real opcodes, and are
1505inserted to look like a closing curly brace.  The tree-format is tree
1506specific.
1507
1508When a line is rendered, the correct format-spec is copied and scanned
1509for the following items; data is substituted in, and other
1510manipulations like basic indenting are done, for each opcode rendered.
1511
1512There are 3 kinds of items that may be populated; special patterns,
1513#vars, and literal text, which is copied verbatim.  (Yes, it's a set
1514of s///g steps.)
1515
1516=head2 Special Patterns
1517
1518These items are the primitives used to perform indenting, and to
1519select text from amongst alternatives.
1520
1521=over 4
1522
1523=item B<(x(>I<exec_text>B<;>I<basic_text>B<)x)>
1524
1525Generates I<exec_text> in exec mode, or I<basic_text> in basic mode.
1526
1527=item B<(*(>I<text>B<)*)>
1528
1529Generates one copy of I<text> for each indentation level.
1530
1531=item B<(*(>I<text1>B<;>I<text2>B<)*)>
1532
1533Generates one fewer copies of I<text1> than the indentation level, followed
1534by one copy of I<text2> if the indentation level is more than 0.
1535
1536=item B<(?(>I<text1>B<#>I<var>I<Text2>B<)?)>
1537
1538If the value of I<var> is true (not empty or zero), generates the
1539value of I<var> surrounded by I<text1> and I<Text2>, otherwise
1540nothing.
1541
1542=item B<~>
1543
1544Any number of tildes and surrounding whitespace will be collapsed to
1545a single space.
1546
1547=back
1548
1549=head2 # Variables
1550
1551These #vars represent opcode properties that you may want as part of
1552your rendering.  The '#' is intended as a private sigil; a #var's
1553value is interpolated into the style-line, much like "read $this".
1554
1555These vars take 3 forms:
1556
1557=over 4
1558
1559=item B<#>I<var>
1560
1561A property named 'var' is assumed to exist for the opcodes, and is
1562interpolated into the rendering.
1563
1564=item B<#>I<var>I<N>
1565
1566Generates the value of I<var>, left justified to fill I<N> spaces.
1567Note that this means while you can have properties 'foo' and 'foo2',
1568you cannot render 'foo2', but you could with 'foo2a'.  You would be
1569wise not to rely on this behavior going forward ;-)
1570
1571=item B<#>I<Var>
1572
1573This ucfirst form of #var generates a tag-value form of itself for
1574display; it converts '#Var' into a 'Var => #var' style, which is then
1575handled as described above.  (Imp-note: #Vars cannot be used for
1576conditional-fills, because the => #var transform is done after the check
1577for #Var's value).
1578
1579=back
1580
1581The following variables are 'defined' by B::Concise; when they are
1582used in a style, their respective values are plugged into the
1583rendering of each opcode.
1584
1585Only some of these are used by the standard styles, the others are
1586provided for you to delve into optree mechanics, should you wish to
1587add a new style (see L</add_style> below) that uses them.  You can
1588also add new ones using L</add_callback>.
1589
1590=over 4
1591
1592=item B<#addr>
1593
1594The address of the OP, in hexadecimal.
1595
1596=item B<#arg>
1597
1598The OP-specific information of the OP (such as the SV for an SVOP, the
1599non-local exit pointers for a LOOP, etc.) enclosed in parentheses.
1600
1601=item B<#class>
1602
1603The B-determined class of the OP, in all caps.
1604
1605=item B<#classsym>
1606
1607A single symbol abbreviating the class of the OP.
1608
1609=item B<#coplabel>
1610
1611The label of the statement or block the OP is the start of, if any.
1612
1613=item B<#exname>
1614
1615The name of the OP, or 'ex-foo' if the OP is a null that used to be a foo.
1616
1617=item B<#extarg>
1618
1619The target of the OP, or nothing for a nulled OP.
1620
1621=item B<#firstaddr>
1622
1623The address of the OP's first child, in hexadecimal.
1624
1625=item B<#flags>
1626
1627The OP's flags, abbreviated as a series of symbols.
1628
1629=item B<#flagval>
1630
1631The numeric value of the OP's flags.
1632
1633=item B<#hints>
1634
1635The COP's hint flags, rendered with abbreviated names if possible. An empty
1636string if this is not a COP. Here are the symbols used:
1637
1638    $ strict refs
1639    & strict subs
1640    * strict vars
1641   x$ explicit use/no strict refs
1642   x& explicit use/no strict subs
1643   x* explicit use/no strict vars
1644    i integers
1645    l locale
1646    b bytes
1647    { block scope
1648    % localise %^H
1649    < open in
1650    > open out
1651    I overload int
1652    F overload float
1653    B overload binary
1654    S overload string
1655    R overload re
1656    T taint
1657    E eval
1658    X filetest access
1659    U utf-8
1660
1661    us      use feature 'unicode_strings'
1662    fea=NNN feature bundle number
1663
1664=item B<#hintsval>
1665
1666The numeric value of the COP's hint flags, or an empty string if this is not
1667a COP.
1668
1669=item B<#hyphseq>
1670
1671The sequence number of the OP, or a hyphen if it doesn't have one.
1672
1673=item B<#label>
1674
1675'NEXT', 'LAST', or 'REDO' if the OP is a target of one of those in exec
1676mode, or empty otherwise.
1677
1678=item B<#lastaddr>
1679
1680The address of the OP's last child, in hexadecimal.
1681
1682=item B<#name>
1683
1684The OP's name.
1685
1686=item B<#NAME>
1687
1688The OP's name, in all caps.
1689
1690=item B<#next>
1691
1692The sequence number of the OP's next OP.
1693
1694=item B<#nextaddr>
1695
1696The address of the OP's next OP, in hexadecimal.
1697
1698=item B<#noise>
1699
1700A one- or two-character abbreviation for the OP's name.
1701
1702=item B<#private>
1703
1704The OP's private flags, rendered with abbreviated names if possible.
1705
1706=item B<#privval>
1707
1708The numeric value of the OP's private flags.
1709
1710=item B<#seq>
1711
1712The sequence number of the OP. Note that this is a sequence number
1713generated by B::Concise.
1714
1715=item B<#opt>
1716
1717Whether or not the op has been optimized by the peephole optimizer.
1718
1719=item B<#sibaddr>
1720
1721The address of the OP's next youngest sibling, in hexadecimal.
1722
1723=item B<#svaddr>
1724
1725The address of the OP's SV, if it has an SV, in hexadecimal.
1726
1727=item B<#svclass>
1728
1729The class of the OP's SV, if it has one, in all caps (e.g., 'IV').
1730
1731=item B<#svval>
1732
1733The value of the OP's SV, if it has one, in a short human-readable format.
1734
1735=item B<#targ>
1736
1737The numeric value of the OP's targ.
1738
1739=item B<#targarg>
1740
1741The name of the variable the OP's targ refers to, if any, otherwise the
1742letter t followed by the OP's targ in decimal.
1743
1744=item B<#targarglife>
1745
1746Same as B<#targarg>, but followed by the COP sequence numbers that delimit
1747the variable's lifetime (or 'end' for a variable in an open scope) for a
1748variable.
1749
1750=item B<#typenum>
1751
1752The numeric value of the OP's type, in decimal.
1753
1754=back
1755
1756=head1 One-Liner Command tips
1757
1758=over 4
1759
1760=item perl -MO=Concise,bar foo.pl
1761
1762Renders only bar() from foo.pl.  To see main, drop the ',bar'.  To see
1763both, add ',-main'
1764
1765=item perl -MDigest::MD5=md5 -MO=Concise,md5 -e1
1766
1767Identifies md5 as an XS function.  The export is needed so that BC can
1768find it in main.
1769
1770=item perl -MPOSIX -MO=Concise,_POSIX_ARG_MAX -e1
1771
1772Identifies _POSIX_ARG_MAX as a constant sub, optimized to an IV.
1773Although POSIX isn't entirely consistent across platforms, this is
1774likely to be present in virtually all of them.
1775
1776=item perl -MPOSIX -MO=Concise,a -e 'print _POSIX_SAVED_IDS'
1777
1778This renders a print statement, which includes a call to the function.
1779It's identical to rendering a file with a use call and that single
1780statement, except for the filename which appears in the nextstate ops.
1781
1782=item perl -MPOSIX -MO=Concise,a -e 'sub a{_POSIX_SAVED_IDS}'
1783
1784This is B<very> similar to previous, only the first two ops differ.  This
1785subroutine rendering is more representative, insofar as a single main
1786program will have many subs.
1787
1788=item perl -MB::Concise -e 'B::Concise::compile("-exec","-src", \%B::Concise::)->()'
1789
1790This renders all functions in the B::Concise package with the source
1791lines.  It eschews the O framework so that the stashref can be passed
1792directly to B::Concise::compile().  See -stash option for a more
1793convenient way to render a package.
1794
1795=back
1796
1797=head1 Using B::Concise outside of the O framework
1798
1799The common (and original) usage of B::Concise was for command-line
1800renderings of simple code, as given in EXAMPLE.  But you can also use
1801B<B::Concise> from your code, and call compile() directly, and
1802repeatedly.  By doing so, you can avoid the compile-time only
1803operation of O.pm, and even use the debugger to step through
1804B::Concise::compile() itself.
1805
1806Once you're doing this, you may alter Concise output by adding new
1807rendering styles, and by optionally adding callback routines which
1808populate new variables, if such were referenced from those (just
1809added) styles.
1810
1811=head2 Example: Altering Concise Renderings
1812
1813    use B::Concise qw(set_style add_callback);
1814    add_style($yourStyleName => $defaultfmt, $gotofmt, $treefmt);
1815    add_callback
1816      ( sub {
1817            my ($h, $op, $format, $level, $stylename) = @_;
1818            $h->{variable} = some_func($op);
1819        });
1820    $walker = B::Concise::compile(@options,@subnames,@subrefs);
1821    $walker->();
1822
1823=head2 set_style()
1824
1825B<set_style> accepts 3 arguments, and updates the three format-specs
1826comprising a line-style (basic-exec, goto, tree).  It has one minor
1827drawback though; it doesn't register the style under a new name.  This
1828can become an issue if you render more than once and switch styles.
1829Thus you may prefer to use add_style() and/or set_style_standard()
1830instead.
1831
1832=head2 set_style_standard($name)
1833
1834This restores one of the standard line-styles: C<terse>, C<concise>,
1835C<linenoise>, C<debug>, C<env>, into effect.  It also accepts style
1836names previously defined with add_style().
1837
1838=head2 add_style ()
1839
1840This subroutine accepts a new style name and three style arguments as
1841above, and creates, registers, and selects the newly named style.  It is
1842an error to re-add a style; call set_style_standard() to switch between
1843several styles.
1844
1845=head2 add_callback ()
1846
1847If your newly minted styles refer to any new #variables, you'll need
1848to define a callback subroutine that will populate (or modify) those
1849variables.  They are then available for use in the style you've
1850chosen.
1851
1852The callbacks are called for each opcode visited by Concise, in the
1853same order as they are added.  Each subroutine is passed five
1854parameters.
1855
1856  1. A hashref, containing the variable names and values which are
1857     populated into the report-line for the op
1858  2. the op, as a B<B::OP> object
1859  3. a reference to the format string
1860  4. the formatting (indent) level
1861  5. the selected stylename
1862
1863To define your own variables, simply add them to the hash, or change
1864existing values if you need to.  The level and format are passed in as
1865references to scalars, but it is unlikely that they will need to be
1866changed or even used.
1867
1868=head2 Running B::Concise::compile()
1869
1870B<compile> accepts options as described above in L</OPTIONS>, and
1871arguments, which are either coderefs, or subroutine names.
1872
1873It constructs and returns a $treewalker coderef, which when invoked,
1874traverses, or walks, and renders the optrees of the given arguments to
1875STDOUT.  You can reuse this, and can change the rendering style used
1876each time; thereafter the coderef renders in the new style.
1877
1878B<walk_output> lets you change the print destination from STDOUT to
1879another open filehandle, or into a string passed as a ref (unless
1880you've built perl with -Uuseperlio).
1881
1882  my $walker = B::Concise::compile('-terse','aFuncName', \&aSubRef); # 1
1883  walk_output(\my $buf);
1884  $walker->();			        # 1 renders -terse
1885  set_style_standard('concise');	# 2
1886  $walker->();  		        # 2 renders -concise
1887  $walker->(@new);			# 3 renders whatever
1888  print "3 different renderings: terse, concise, and @new: $buf\n";
1889
1890When $walker is called, it traverses the subroutines supplied when it
1891was created, and renders them using the current style.  You can change
1892the style afterwards in several different ways:
1893
1894  1. call C<compile>, altering style or mode/order
1895  2. call C<set_style_standard>
1896  3. call $walker, passing @new options
1897
1898Passing new options to the $walker is the easiest way to change
1899amongst any pre-defined styles (the ones you add are automatically
1900recognized as options), and is the only way to alter rendering order
1901without calling compile again.  Note however that rendering state is
1902still shared amongst multiple $walker objects, so they must still be
1903used in a coordinated manner.
1904
1905=head2 B::Concise::reset_sequence()
1906
1907This function (not exported) lets you reset the sequence numbers (note
1908that they're numbered arbitrarily, their goal being to be human
1909readable).  Its purpose is mostly to support testing, i.e. to compare
1910the concise output from two identical anonymous subroutines (but
1911different instances).  Without the reset, B::Concise, seeing that
1912they're separate optrees, generates different sequence numbers in
1913the output.
1914
1915=head2 Errors
1916
1917Errors in rendering (non-existent function-name, non-existent coderef)
1918are written to the STDOUT, or wherever you've set it via
1919walk_output().
1920
1921Errors using the various *style* calls, and bad args to walk_output(),
1922result in die().  Use an eval if you wish to catch these errors and
1923continue processing.
1924
1925=head1 AUTHOR
1926
1927Stephen McCamant, E<lt>smcc@CSUA.Berkeley.EDUE<gt>.
1928
1929=cut
1930