1##
2# name:      Module::Compile
3# abstract:  Perl Module Compilation
4# author:
5# - Ingy döt Net <ingy@ingy.net>
6# - Audrey Tang <autrijus@autrijus.org>
7# license:   perl
8# copyright: 2006, 2011
9
10# To Do:
11#
12# - Make preface part of parsed code, since it might contain `package`
13#   statements or other scoping stuff.
14# - Build code into an AST.
15use 5.008003;
16package Module::Compile;
17use strict;
18use warnings;
19
20our $VERSION = '0.23';
21
22use Digest::SHA1 2.13 ();
23
24# A lexical hash to keep track of which files have already been filtered
25my $filtered = {};
26
27# A map of digests to code blocks
28my $digest_map = {};
29
30# All subroutines are prefixed with pmc_ so subclasses don't
31# accidentally override things they didn't intend to.
32
33# Determine which stack frame points to the code we are filtering.
34# This is a method in case it needs to be overridden.
35sub pmc_caller_stack_frame { 0 };
36
37# This is called while parsing source code to determine if the
38# module/class in a use/no line is part of the Module::Compile game.
39#
40# Return true if this class supports PMC compilation.
41#
42# The hope is that this will allow interoperability with modules that
43# do not inherit from Module::Compile but still want to do this sort
44# of thing.
45sub pmc_is_compiler_module { 1 };
46
47sub new {
48    return bless {}, shift;
49}
50
51# This is called to determine whether the meaning of use/no is reversed.
52sub pmc_use_means_no { 0 }
53
54# This is called to determine whether the use line means a one line section.
55sub pmc_use_means_now { 0 }
56
57# All Module::Compile based modules inherit this import routine.
58sub import {
59    my ($class) = @_;
60    return if $class->pmc_use_means_no;
61    goto &{$class->can('pmc_import')};
62}
63
64# Treat unimport like import if use means no
65sub unimport {
66    my ($class) = @_;
67    return unless $class->pmc_use_means_no;
68    goto &{$class->can('pmc_import')};
69}
70
71sub pmc_import {
72    my ($class, @args) = @_;
73
74    # Handler modules can do C< use Module::Compile -base; >. Make
75    # them ISA Module::Compile and get the hell out of Dodge.
76    $class->pmc_set_base(@args) and return;
77
78    my ($module, $line) = (caller($class->pmc_caller_stack_frame))[1, 2];
79
80    return if $filtered->{$module}++;
81
82    my $callback = sub {
83        my ($class, $content, $data) = @_;
84        my $output = $class->pmc_template($module, $content, $data);
85        $class->pmc_output($module, $output);
86    };
87
88    $class->pmc_check_compiled_file($module);
89
90    $class->pmc_filter($module, $line, $callback);
91
92    # Is there a meaningful return value here?
93    return;
94}
95
96# File might not be a module (.pm) and might be compiled already.
97# If so, run the compiled file.
98sub pmc_check_compiled_file {
99    my ($class, $file) = @_;
100
101    if (defined $file and $file !~ /\.pm$/i) {
102        # Do the freshness check ourselves
103        my $pmc = $file.'c';
104        $class->pmc_run_compiled_file($pmc), die
105          if -s $pmc and (-M $pmc <= -M $file);
106    }
107}
108
109sub pmc_run_compiled_file {
110    my ($class, $pmc) = @_;
111    my ($package) = caller($class->pmc_file_caller_frame());
112    eval "package $package; do \$pmc";
113    die $@ if $@;
114    exit 0;
115}
116
117sub pmc_file_caller_frame { 2 }
118
119# Set up inheritance
120sub pmc_set_base {
121    my ($class, $flag) = @_;
122
123    # Handle the C<use Module::Compile -base;> command.
124    if ($class->isa(__PACKAGE__) and defined $flag and $flag eq '-base') {
125        my $descendant = (caller 1)[0];;
126        no strict 'refs';
127        push @{$descendant . '::ISA'}, $class;
128        return 1;
129    }
130
131    return 0;
132}
133
134# Generate the actual code that will go into the .pmc file.
135sub pmc_template {
136    my ($class, $module, $content, $data) = @_;
137    my $base = __PACKAGE__;
138    my $check = $class->freshness_check($module);
139    my $version = $class->VERSION || '0';
140    return join "\n",
141        "# Generated by $class $version ($base $VERSION) - do not edit!",
142        "$check$content$data";
143}
144
145# This returns a piece of Perl code to do a runtime check to see if the
146# .pmc file is fresh.  By default we use a 32-bit running checksum.
147sub freshness_check {
148    my ($class, $module) = @_;
149    my $sum = sprintf('%08X', do {
150        local $/;
151        open my $fh, "<", $module
152          or die "Cannot open $module: $!";
153        binmode($fh, ':crlf'); # normalize CRLF for consistent checksum
154        unpack('%32N*', <$fh>);
155    });
156    return << "...";
157################((( 32-bit Checksum Validator III )))################
158#line 1
159BEGIN { use 5.006; local (*F, \$/); (\$F = __FILE__) =~ s!c\$!!; open(F)
160or die "Cannot open \$F: \$!"; binmode(F, ':crlf'); if (unpack('%32N*',
161\$F=readline(*F)) != 0x$sum) { use Filter::Util::Call; my \$f = \$F;
162filter_add(sub { filter_del(); 1 while &filter_read; \$_ = \$f; 1; })}}
163#line 1
164...
165}
166
167# Write the output to the .pmc file
168sub pmc_output {
169    my ($class, $module, $output) = @_;
170    $class->pmc_can_output($module)
171      or return 0;
172    my $pmc = $module . 'c';
173
174    # If we can't open the file, just return. The filtering will not be cached,
175    # but that might be ok.
176    open my $fh, ">", $pmc
177      or return 0;
178
179    # Protect against disk full or whatever else.
180    local $@;
181    eval {
182        print $fh $output
183           or die;
184        close $fh
185           or die;
186    };
187    if ( my $e = $@ ) {
188        # close $fh? die if unlink?
189        if ( -e $pmc ) {
190            unlink $pmc
191                or die "Can't delete errant $pmc: $!";
192        }
193        return 0;
194    }
195
196    return 1;
197}
198
199# Check whether output can be written.
200sub pmc_can_output {
201    my ($class, $file_path) = @_;
202    return 1;
203#     return $file_path =~ /\.pm$/;
204}
205
206# We use a source filter to get all the code for compiling.
207sub pmc_filter {
208    my ($class, $module, $line_number, $post_process) = @_;
209
210    # Read original module source code instead of taking from filter,
211    # because we need all the lines including the ones before the `use`
212    # statement, so we can parse Perl into packages and such.
213    open my $fh, $module
214        or die "Can't open $module for input:\n$!";
215    my $module_content = do { local $/; <$fh> };
216    close $fh;
217
218    # Find the real __DATA__ or __END__ line. (Not one hidden in a Pod
219    # section or heredoc).
220    my $folded_content = $class->pmc_fold_blocks($module_content);
221    my $folded_data = '';
222    if ($folded_content =~ s/^((?:__(?:DATA|END)__$).*)//ms) {
223        $folded_data = $1;
224    }
225    my $real_content = $class->pmc_unfold_blocks($folded_content);
226    my $real_data = $class->pmc_unfold_blocks($folded_data);
227
228    # Calculate the number of lines to skip in the source filter, since
229    # we already have them in $real_content.
230    my @lines = ($real_content =~ /(.*\n)/g);
231    my $lines_to_skip = @lines;
232    $lines_to_skip -= $line_number;
233
234    # Use filter to skip past that many lines
235    # Leave __DATA__ section intact
236    my $done = 0;
237    require Filter::Util::Call;
238    Filter::Util::Call::filter_add(sub {
239        return 0 if $done;
240        my $data_line = '';
241        while (1) {
242            my $status = Filter::Util::Call::filter_read();
243            last unless $status;
244            return $status if $status < 0;
245            # Skip lines up to the DATA section.
246            next if $lines_to_skip-- > 0;
247            if (/^__(?:END|DATA)__$/) {
248                # Don't filter the DATA section, or else the DATA file
249                # handle becomes invalid.
250
251                # XXX - Maybe there is a way to simply recreate the DATA
252                # file handle, or at least seek back to the start of it.
253                # Needs investigation.
254
255                # For now this means that we only allow compilation on
256                # the module content; not the DATA section. Because we
257                # want to make sure that the program runs the same way
258                # as both a .pm and a .pmc.
259
260                $data_line = $_;
261                last;
262            }
263        }
264        continue {
265            $_ = '';
266        }
267
268        $real_content =~ s/\r//g;
269        my $filtered_content = $class->pmc_process($real_content);
270        $class->$post_process($filtered_content, $real_data);
271
272        $filtered_content =~ s/(.*\n){$line_number}//;
273
274        $_ = $filtered_content . $data_line;
275
276        $done = 1;
277    });
278}
279
280use constant TEXT => 0;
281use constant CONTEXT => 1;
282use constant CLASSES => 2;
283# Break the code into blocks. Compile the blocks.
284# Fold out heredocs etc
285# Parse the code into packages, blocks and subs
286# Parse the code by `use/no *::Compiler`
287# Build an AST
288# Reduce the AST until fully reduced
289# Return the result
290sub pmc_process {
291    my $class = shift;
292    my $data = shift;
293    my @blocks = $class->pmc_parse_blocks($data);
294    while (@blocks = $class->pmc_reduce(@blocks)) {
295        if (@blocks == 1 and @{$blocks[0][CLASSES]} == 0) {
296            my $content = $blocks[0][TEXT];
297            $content .= "\n" unless $content =~ /\n\z/;
298            return $content;
299        }
300    }
301    die "How did I get here?!?";
302}
303
304# Analyze the remaining blocks and determine which compilers to call to reduce
305# the problem.
306#
307# XXX This routine must do some kind of reduction each pass, or infinite loop
308# will ensue. It is not yet certain if this is the case.
309sub pmc_reduce {
310    my $class = shift;
311    my @blocks;
312    my $prev;
313    while (@_) {
314        my $block = shift;
315        my $next = $_[TEXT];
316        if ($next and "@{$block->[CLASSES]}" eq "@{$next->[CLASSES]}") {
317            shift;
318            $block->[TEXT] .= $next->[TEXT];
319        }
320        elsif (
321            (not $prev or @{$prev->[CLASSES]} < @{$block->[CLASSES]}) and
322            (not $next or @{$next->[CLASSES]} < @{$block->[CLASSES]})
323        ) {
324            my $prev_len = $prev ? @{$prev->[CLASSES]} : 0;
325            my $next_len = $next ? @{$next->[CLASSES]} : 0;
326            my $offset = ($prev_len > $next_len) ? $prev_len : $next_len;
327            my $length = @{$block->[CLASSES]} - $offset;
328            $class->pmc_call($block, $offset, $length);
329        }
330        push @blocks, $block;
331        $prev = $block;
332    }
333    return @blocks;
334}
335
336# Call a set of compilers on a piece of source code.
337sub pmc_call {
338    my $class = shift;
339    my $block = shift;
340    my $offset = shift;
341    my $length = shift;
342
343    my $text = $block->[TEXT];
344    my $context = $block->[CONTEXT];
345    my @classes = splice(@{$block->[CLASSES]}, $offset, $length);
346    for my $klass (@classes) {
347        local $_ = $text;
348        my $return = $klass->pmc_compile($text, ($context->{$klass} || {}));
349        $text = (defined $return and $return !~ /^\d+\z/)
350            ? $return
351            : $_;
352    }
353    $block->[TEXT] = $text;
354}
355
356# Divide a Perl module into blocks. This code divides a module based on
357# lines that use/no a Module::Compile subclass.
358sub pmc_parse_blocks {
359    my $class = shift;
360    my $data = shift;
361    my @blocks = ();
362    my @classes = ();
363    my $context = {};
364    my $text = '';
365    my @parts = split /^([^\S\n]*(?:use|no)[^\S\n]+[\w\:\']+[^\n]*\n)/m, $data;
366    for my $part (@parts) {
367        if ($part =~ /^[^\S\n]*(use|no)[^\S\n]+([\w\:\']+)[^\n]*\n/) {
368            my ($use, $klass, $file) = ($1, $2, $2);
369            $file =~ s{(?:::|')}{/}g;
370            if ($klass =~ /^\d+$/) {
371                $text .= $part;
372                next;
373            }
374            {
375                local $@;
376                eval { require "$file.pm" };
377                die $@ if $@ and "$@" !~ /^Can't locate /;
378            }
379            if ($klass->can('pmc_is_compiler_module') and
380                $klass->pmc_is_compiler_module) {
381                push @blocks, [$text, {%$context}, [@classes]];
382                $text = '';
383                @classes = grep {$_ ne $klass} @classes;
384                if (($use eq 'use') xor $klass->pmc_use_means_no) {
385                    push @classes, $klass;
386                    $context->{$klass} = {%{$context->{$klass} || {}}};
387                    $context->{$klass}{use} = $part;
388                    if ($klass->pmc_use_means_now) {
389                        push @blocks, ['', {%$context}, [@classes]];
390                        @classes = grep {$_ ne $klass} @classes;
391                        delete $context->{$klass};
392                    }
393                }
394                else {
395                    delete $context->{$klass};
396                }
397            }
398            else {
399                $text .= $part;
400            }
401        }
402        else {
403            $text .= $part;
404        }
405    }
406    push @blocks, [$text, {%$context}, [@classes]]
407        if length $text;
408    return @blocks;
409}
410
411# Compile/Filter some source code into something else. This is almost
412# always overridden in a subclass.
413sub pmc_compile {
414    my ($class, $source_code_string, $context_hashref) = @_;
415    return $source_code_string;
416}
417
418# Regexp fragments for matching heredoc, pod section, comment block and
419# data section.
420my $re_here = qr/
421(?:                     # Heredoc starting line
422    ^                   # Start of some line
423    ((?-s:.*?))         # $2 - text before heredoc marker
424    <<(?!=)             # heredoc marker
425    [\t\x20]*           # whitespace between marker and quote
426    ((?>['"]?))         # $3 - possible left quote
427    ([\w\-\.]*)         # $4 - heredoc terminator
428    (\3                 # $5 - possible right quote
429     (?-s:.*\n))        #      and rest of the line
430    (.*?\n)             # $6 - Heredoc content
431    (?<!\n[0-9a-fA-F]{40}\n)  # Not another digest
432    (\4\n)              # $7 - Heredoc terminating line
433)
434/xsm;
435
436my $re_pod = qr/
437(?:
438    (?-s:^=(?!cut\b)\w+.*\n)        # Pod starter line
439    .*?                             # Pod lines
440    (?:(?-s:^=cut\b.*\n)|\z)        # Pod terminator
441)
442/xsm;
443
444my $re_comment = qr/
445(?:
446    (?m-s:^[^\S\n]*\#.*\n)+           # one or more comment lines
447)
448/xsm;
449
450my $re_data = qr/
451(?:
452    ^(?:__END__|__DATA__)\n   # DATA starter line
453    .*                              # Rest of lines
454)
455/xsm;
456
457# Fold each heredoc, pod section, comment block and data section, each
458# into a single line containing a digest of the original content.
459#
460# This makes further dividing of Perl code less troublesome.
461sub pmc_fold_blocks {
462    my ($class, $source) = @_;
463
464    $source =~ s/(~{3,})/$1~/g;
465    $source =~ s/(^'{3,})/$1'/gm;
466    $source =~ s/(^`{3,})/$1`/gm;
467    $source =~ s/(^={3,})/$1=/gm;
468
469    while (1) {
470        no warnings;
471        $source =~ s/
472            (
473                $re_pod |
474                $re_comment |
475                $re_here |
476                $re_data
477            )
478        /
479            my $result = $1;
480            $result =~ m{\A($re_data)}    ? $class->pmc_fold_data()    :
481            $result =~ m{\A($re_pod)}     ? $class->pmc_fold_pod()     :
482            $result =~ m{\A($re_comment)} ? $class->pmc_fold_comment() :
483            $result =~ m{\A($re_here)}    ? $class->pmc_fold_here()    :
484                die "'$result' didn't match '$re_comment'";
485        /ex or last;
486    }
487
488    $source =~ s/(?<!~)~~~(?!~)/<</g;
489    $source =~ s/^'''(?!') /__DATA__\n/gm;
490    $source =~ s/^```(?!`)/#/gm;
491    $source =~ s/^===(?!=)/=/gm;
492
493    $source =~ s/^(={3,})=/$1/gm;
494    $source =~ s/^('{3,})'/$1/gm;
495    $source =~ s/^(`{3,})`/$1/gm;
496    $source =~ s/(~{3,})~/$1/g;
497
498    return $source;
499}
500
501sub pmc_unfold_blocks {
502    my ($class, $source) = @_;
503
504    $source =~ s/
505        (
506            ^__DATA__\n[0-9a-fA-F]{40}\n
507        |
508            ^=pod\s[0-9a-fA-F]{40}\n=cut\n
509        )
510    /
511        my $match = $1;
512        $match =~ s!.*?([0-9a-fA-F]{40}).*!$1!s or die;
513        $digest_map->{$match}
514    /xmeg;
515
516    return $source;
517}
518
519# Fold a heredoc's content but don't fold other heredocs from the
520# same line.
521sub pmc_fold_here {
522    my $class = shift;
523    my $result = "$2~~~$3$4$5";
524    my $preface = '';
525    my $text = $6;
526    my $stop = $7;
527    while (1) {
528        if ($text =~ s!^(([0-9a-fA-F]{40})\n.*\n)!!) {
529            if (defined $digest_map->{$2}) {
530                $preface .= $1;
531                next;
532            }
533            else {
534                $text = $1 . $text;
535                last;
536            }
537        }
538        last;
539    }
540    my $digest = $class->pmc_fold($text);
541    $result = "$result$preface$digest\n$stop";
542    $result;
543}
544
545sub pmc_fold_pod {
546    my $class = shift;
547    my $text = $1;
548    my $digest = $class->pmc_fold($text);
549    return qq{===pod $digest\n===cut\n};
550}
551
552sub pmc_fold_comment {
553    my $class = shift;
554    my $text = $1;
555    my $digest = $class->pmc_fold($text);
556    return qq{``` $digest\n};
557}
558
559sub pmc_fold_data {
560    my $class = shift;
561    my $text = $1;
562    my $digest = $class->pmc_fold($text);
563    return qq{''' $digest\n};
564}
565
566# Fold a piece of code into a unique string.
567sub pmc_fold {
568    require Digest::SHA1;
569    my ($class, $text) = @_;
570    my $digest = Digest::SHA1::sha1_hex($text);
571    $digest_map->{$digest} = $text;
572    return $digest;
573}
574
575# Expand folded code into original content.
576sub pmc_unfold {
577    my ($class, $digest) = @_;
578    return $digest_map->{$digest};
579}
580
5811;
582
583=head1 SYNOPSIS
584
585    package Foo;
586    use Module::Compile -base;
587
588    sub pmc_compile {
589        my ($class, $source) = @_;
590        # Convert $source into (most likely Perl 5) $compiled_output
591        return $compiled_output;
592    }
593
594In F<Bar.pm>:
595
596    package Bar;
597
598    use Foo;
599    ...
600    no Foo
601
602or (implied "no Foo;"):
603
604    package Bar;
605
606    {
607        use Foo;
608        ...
609    }
610
611To compile F<Bar.pm> into F<Bar.pmc>:
612
613    perl -c Bar.pm
614
615=head1 DESCRIPTION
616
617This module provides a system for writing modules that I<compile> other
618Perl modules.
619
620Modules that use these compilation modules get compiled into some
621altered form the first time they are run. The result is cached into
622C<.pmc> files.
623
624Perl has native support for C<.pmc> files. It always checks for them, before
625loading a C<.pm> file.
626
627=head1 EXAMPLE
628
629You can declare a C<v6.pm> compiler with:
630
631    package v6;
632    use Module::Compile -base;
633
634    sub pmc_compile {
635        my ($class, $source) = @_;
636        # ... some way to invoke pugs and give p5 code back ...
637    }
638
639and use it like:
640
641    # MyModule.pm
642    use v6-pugs;
643    module MyModule;
644    # ...some p6 code here...
645    no v6;
646    # ...back to p5 land...
647
648On the first time this module is loaded, it will compile Perl 6
649blocks into Perl 5 (as soon as the C<no v6> line is seen), and
650merge it with the Perl 5 blocks, saving the result into a
651F<MyModule.pmc> file.
652
653The next time around, Perl 5 will automatically load F<MyModule.pmc>
654when someone says C<use MyModule>. On the other hand, Perl 6 can run
655MyModule.pm s a Perl 6 module just fine, as C<use v6-pugs> and C<no v6>
656both works in a Perl 6 setting.
657
658The B<v6.pm> module will also check if F<MyModule.pmc> is up to date. If
659it is, then it will touch its timestamp so the C<.pmc> is loaded on the
660next time.
661
662=head1 BENEFITS
663
664Module::Compile compilers gives you the following benefits:
665
666=over
667
668=item *
669
670Ability to mix many source filterish modules in a much more sane manner.
671Module::Compile controls the compilation process, calling each compiler
672at the right time with the right data.
673
674=item *
675
676Ability to ship precompiled modules without shipping Module::Compile and
677the compiler modules themselves.
678
679=item *
680
681Easier debugging of compiled/filtered code. The C<.pmc> has the real
682code you want to see.
683
684=item *
685
686Zero additional runtime penalty after compilation, because C<perl> has
687already been doing the C<.pmc> check on every module load since 1999!
688
689=back
690
691=head1 PARSING AND DISPATCH
692
693NOTE: *** NOT FULLY IMPLEMENTED YET ***
694
695Module::Compile attempts to make source filtering a sane process, by
696parsing up your module's source code into various blocks; so that by the
697time a compiler is called it only gets the source code that it should be
698looking at.
699
700This section describes the rather complex algorithm that
701Module::Compile uses.
702
703First, the source module is preprocessed to hide heredocs, since the content
704inside heredocs can possibly confuse further parsing.
705
706Next, the source module is divided into a shallow tree of blocks:
707
708    PREAMBLE:
709        (SUBROUTINE | BAREBLOCK | POD | PLAIN)S
710    PACKAGES:
711        PREFACE
712        (SUBROUTINE | BAREBLOCK | POD | PLAIN)S
713    DATA
714
715All of these blocks begin and end on line boundaries. They are described
716as follows:
717
718    PREAMBLE - Lines before the first C<package> statement.
719    PACKAGES - Lines beginning with a C<package statement and continuing
720        until the next C<package> or C<DATA> section.
721    DATA - The DATA section. Begins with the line C<__DATA__> or
722        C<__END__>.
723    SUBROUTINE - A top level (not nested) subroutine. Ending '}' must be
724        on its own line in the first column.
725    BAREBLOCK - A top level (not nested) code block. Ending '}' must be
726        on its own line in the first column.
727    POD - Pod sections beginning with C<^=\w+> and ending with C<=cut>.
728    PLAIN - Lines not in SUBROUTINE, BAREBLOCK or POD.
729    PREFACE - Lines before the first block in a package.
730
731Next, all the blocks are scanned for lines like:
732
733    use Foo qw'x y z';
734    no Foo;
735
736Where Foo is a Module::Compile subclass.
737
738The lines within a given block between a C<use> and C<no> statement
739are marked to be passed to that compiler. The end of an inner block
740effectively acts as a C<no> statement for any compile sections in
741that block. C<use> statements in a PREFACE apply to all the code in
742a PACKAGE. C<use> statements in a PREAMBLE apply to all the code in
743all PACKAGES.
744
745After all the code has been parsed into blocks and the blocks have been
746marked for various compilers, Module::Compile dispatches the code blocks
747to the compilers. It does so in a most specific to most general order.
748So inner blocks get compiled first, then outer blocks.
749
750A compiler may choose to declare that its result not be recompiled by
751some other containing parser. In this case the result of the compilation
752is replaced by a single line containing the hexadecimal digest of the
753result in double quotes followed by a semicolon. Like:
754
755    "f1d2d2f924e986ac86fdf7b36c94bcdf32beec15";
756
757The rationale of this is that randoms strings are usally left alone by
758compilers. After all the compilers have finished, the digest lines will
759be expanded again.
760
761Every bit of the default process described above is overridable by
762various methods.
763
764=head1 DISTRIBUTION SUPPORT
765
766Module::Install makes it terribly easy to prepare a module distribution
767with compiled .pmc files. Module::Compile installs a
768Module::Install::PMC plugin. All you need to do is add this line to your
769Makefile.PL:
770
771    pmc_support;
772
773Any of your distrbution's modules that use Module::Compile based modules
774will automatically be compiled into .pmc files and shipped with your
775distribtution precompiled. This means that people who install your
776module distribtution do not need to have the compilers installed
777themselves. So you don't need to make the compiler modules be
778prerequisites.
779
780=head1 SEE ALSO
781
782Module::Install
783