1## 2# name: Module::Compile 3# abstract: Perl Module Compilation 4# author: 5# - Ingy döt Net <ingy@ingy.net> 6# - Audrey Tang <autrijus@autrijus.org> 7# license: perl 8# copyright: 2006, 2011 9 10# To Do: 11# 12# - Make preface part of parsed code, since it might contain `package` 13# statements or other scoping stuff. 14# - Build code into an AST. 15use 5.008003; 16package Module::Compile; 17use strict; 18use warnings; 19 20our $VERSION = '0.23'; 21 22use Digest::SHA1 2.13 (); 23 24# A lexical hash to keep track of which files have already been filtered 25my $filtered = {}; 26 27# A map of digests to code blocks 28my $digest_map = {}; 29 30# All subroutines are prefixed with pmc_ so subclasses don't 31# accidentally override things they didn't intend to. 32 33# Determine which stack frame points to the code we are filtering. 34# This is a method in case it needs to be overridden. 35sub pmc_caller_stack_frame { 0 }; 36 37# This is called while parsing source code to determine if the 38# module/class in a use/no line is part of the Module::Compile game. 39# 40# Return true if this class supports PMC compilation. 41# 42# The hope is that this will allow interoperability with modules that 43# do not inherit from Module::Compile but still want to do this sort 44# of thing. 45sub pmc_is_compiler_module { 1 }; 46 47sub new { 48 return bless {}, shift; 49} 50 51# This is called to determine whether the meaning of use/no is reversed. 52sub pmc_use_means_no { 0 } 53 54# This is called to determine whether the use line means a one line section. 55sub pmc_use_means_now { 0 } 56 57# All Module::Compile based modules inherit this import routine. 58sub import { 59 my ($class) = @_; 60 return if $class->pmc_use_means_no; 61 goto &{$class->can('pmc_import')}; 62} 63 64# Treat unimport like import if use means no 65sub unimport { 66 my ($class) = @_; 67 return unless $class->pmc_use_means_no; 68 goto &{$class->can('pmc_import')}; 69} 70 71sub pmc_import { 72 my ($class, @args) = @_; 73 74 # Handler modules can do C< use Module::Compile -base; >. Make 75 # them ISA Module::Compile and get the hell out of Dodge. 76 $class->pmc_set_base(@args) and return; 77 78 my ($module, $line) = (caller($class->pmc_caller_stack_frame))[1, 2]; 79 80 return if $filtered->{$module}++; 81 82 my $callback = sub { 83 my ($class, $content, $data) = @_; 84 my $output = $class->pmc_template($module, $content, $data); 85 $class->pmc_output($module, $output); 86 }; 87 88 $class->pmc_check_compiled_file($module); 89 90 $class->pmc_filter($module, $line, $callback); 91 92 # Is there a meaningful return value here? 93 return; 94} 95 96# File might not be a module (.pm) and might be compiled already. 97# If so, run the compiled file. 98sub pmc_check_compiled_file { 99 my ($class, $file) = @_; 100 101 if (defined $file and $file !~ /\.pm$/i) { 102 # Do the freshness check ourselves 103 my $pmc = $file.'c'; 104 $class->pmc_run_compiled_file($pmc), die 105 if -s $pmc and (-M $pmc <= -M $file); 106 } 107} 108 109sub pmc_run_compiled_file { 110 my ($class, $pmc) = @_; 111 my ($package) = caller($class->pmc_file_caller_frame()); 112 eval "package $package; do \$pmc"; 113 die $@ if $@; 114 exit 0; 115} 116 117sub pmc_file_caller_frame { 2 } 118 119# Set up inheritance 120sub pmc_set_base { 121 my ($class, $flag) = @_; 122 123 # Handle the C<use Module::Compile -base;> command. 124 if ($class->isa(__PACKAGE__) and defined $flag and $flag eq '-base') { 125 my $descendant = (caller 1)[0];; 126 no strict 'refs'; 127 push @{$descendant . '::ISA'}, $class; 128 return 1; 129 } 130 131 return 0; 132} 133 134# Generate the actual code that will go into the .pmc file. 135sub pmc_template { 136 my ($class, $module, $content, $data) = @_; 137 my $base = __PACKAGE__; 138 my $check = $class->freshness_check($module); 139 my $version = $class->VERSION || '0'; 140 return join "\n", 141 "# Generated by $class $version ($base $VERSION) - do not edit!", 142 "$check$content$data"; 143} 144 145# This returns a piece of Perl code to do a runtime check to see if the 146# .pmc file is fresh. By default we use a 32-bit running checksum. 147sub freshness_check { 148 my ($class, $module) = @_; 149 my $sum = sprintf('%08X', do { 150 local $/; 151 open my $fh, "<", $module 152 or die "Cannot open $module: $!"; 153 binmode($fh, ':crlf'); # normalize CRLF for consistent checksum 154 unpack('%32N*', <$fh>); 155 }); 156 return << "..."; 157################((( 32-bit Checksum Validator III )))################ 158#line 1 159BEGIN { use 5.006; local (*F, \$/); (\$F = __FILE__) =~ s!c\$!!; open(F) 160or die "Cannot open \$F: \$!"; binmode(F, ':crlf'); if (unpack('%32N*', 161\$F=readline(*F)) != 0x$sum) { use Filter::Util::Call; my \$f = \$F; 162filter_add(sub { filter_del(); 1 while &filter_read; \$_ = \$f; 1; })}} 163#line 1 164... 165} 166 167# Write the output to the .pmc file 168sub pmc_output { 169 my ($class, $module, $output) = @_; 170 $class->pmc_can_output($module) 171 or return 0; 172 my $pmc = $module . 'c'; 173 174 # If we can't open the file, just return. The filtering will not be cached, 175 # but that might be ok. 176 open my $fh, ">", $pmc 177 or return 0; 178 179 # Protect against disk full or whatever else. 180 local $@; 181 eval { 182 print $fh $output 183 or die; 184 close $fh 185 or die; 186 }; 187 if ( my $e = $@ ) { 188 # close $fh? die if unlink? 189 if ( -e $pmc ) { 190 unlink $pmc 191 or die "Can't delete errant $pmc: $!"; 192 } 193 return 0; 194 } 195 196 return 1; 197} 198 199# Check whether output can be written. 200sub pmc_can_output { 201 my ($class, $file_path) = @_; 202 return 1; 203# return $file_path =~ /\.pm$/; 204} 205 206# We use a source filter to get all the code for compiling. 207sub pmc_filter { 208 my ($class, $module, $line_number, $post_process) = @_; 209 210 # Read original module source code instead of taking from filter, 211 # because we need all the lines including the ones before the `use` 212 # statement, so we can parse Perl into packages and such. 213 open my $fh, $module 214 or die "Can't open $module for input:\n$!"; 215 my $module_content = do { local $/; <$fh> }; 216 close $fh; 217 218 # Find the real __DATA__ or __END__ line. (Not one hidden in a Pod 219 # section or heredoc). 220 my $folded_content = $class->pmc_fold_blocks($module_content); 221 my $folded_data = ''; 222 if ($folded_content =~ s/^((?:__(?:DATA|END)__$).*)//ms) { 223 $folded_data = $1; 224 } 225 my $real_content = $class->pmc_unfold_blocks($folded_content); 226 my $real_data = $class->pmc_unfold_blocks($folded_data); 227 228 # Calculate the number of lines to skip in the source filter, since 229 # we already have them in $real_content. 230 my @lines = ($real_content =~ /(.*\n)/g); 231 my $lines_to_skip = @lines; 232 $lines_to_skip -= $line_number; 233 234 # Use filter to skip past that many lines 235 # Leave __DATA__ section intact 236 my $done = 0; 237 require Filter::Util::Call; 238 Filter::Util::Call::filter_add(sub { 239 return 0 if $done; 240 my $data_line = ''; 241 while (1) { 242 my $status = Filter::Util::Call::filter_read(); 243 last unless $status; 244 return $status if $status < 0; 245 # Skip lines up to the DATA section. 246 next if $lines_to_skip-- > 0; 247 if (/^__(?:END|DATA)__$/) { 248 # Don't filter the DATA section, or else the DATA file 249 # handle becomes invalid. 250 251 # XXX - Maybe there is a way to simply recreate the DATA 252 # file handle, or at least seek back to the start of it. 253 # Needs investigation. 254 255 # For now this means that we only allow compilation on 256 # the module content; not the DATA section. Because we 257 # want to make sure that the program runs the same way 258 # as both a .pm and a .pmc. 259 260 $data_line = $_; 261 last; 262 } 263 } 264 continue { 265 $_ = ''; 266 } 267 268 $real_content =~ s/\r//g; 269 my $filtered_content = $class->pmc_process($real_content); 270 $class->$post_process($filtered_content, $real_data); 271 272 $filtered_content =~ s/(.*\n){$line_number}//; 273 274 $_ = $filtered_content . $data_line; 275 276 $done = 1; 277 }); 278} 279 280use constant TEXT => 0; 281use constant CONTEXT => 1; 282use constant CLASSES => 2; 283# Break the code into blocks. Compile the blocks. 284# Fold out heredocs etc 285# Parse the code into packages, blocks and subs 286# Parse the code by `use/no *::Compiler` 287# Build an AST 288# Reduce the AST until fully reduced 289# Return the result 290sub pmc_process { 291 my $class = shift; 292 my $data = shift; 293 my @blocks = $class->pmc_parse_blocks($data); 294 while (@blocks = $class->pmc_reduce(@blocks)) { 295 if (@blocks == 1 and @{$blocks[0][CLASSES]} == 0) { 296 my $content = $blocks[0][TEXT]; 297 $content .= "\n" unless $content =~ /\n\z/; 298 return $content; 299 } 300 } 301 die "How did I get here?!?"; 302} 303 304# Analyze the remaining blocks and determine which compilers to call to reduce 305# the problem. 306# 307# XXX This routine must do some kind of reduction each pass, or infinite loop 308# will ensue. It is not yet certain if this is the case. 309sub pmc_reduce { 310 my $class = shift; 311 my @blocks; 312 my $prev; 313 while (@_) { 314 my $block = shift; 315 my $next = $_[TEXT]; 316 if ($next and "@{$block->[CLASSES]}" eq "@{$next->[CLASSES]}") { 317 shift; 318 $block->[TEXT] .= $next->[TEXT]; 319 } 320 elsif ( 321 (not $prev or @{$prev->[CLASSES]} < @{$block->[CLASSES]}) and 322 (not $next or @{$next->[CLASSES]} < @{$block->[CLASSES]}) 323 ) { 324 my $prev_len = $prev ? @{$prev->[CLASSES]} : 0; 325 my $next_len = $next ? @{$next->[CLASSES]} : 0; 326 my $offset = ($prev_len > $next_len) ? $prev_len : $next_len; 327 my $length = @{$block->[CLASSES]} - $offset; 328 $class->pmc_call($block, $offset, $length); 329 } 330 push @blocks, $block; 331 $prev = $block; 332 } 333 return @blocks; 334} 335 336# Call a set of compilers on a piece of source code. 337sub pmc_call { 338 my $class = shift; 339 my $block = shift; 340 my $offset = shift; 341 my $length = shift; 342 343 my $text = $block->[TEXT]; 344 my $context = $block->[CONTEXT]; 345 my @classes = splice(@{$block->[CLASSES]}, $offset, $length); 346 for my $klass (@classes) { 347 local $_ = $text; 348 my $return = $klass->pmc_compile($text, ($context->{$klass} || {})); 349 $text = (defined $return and $return !~ /^\d+\z/) 350 ? $return 351 : $_; 352 } 353 $block->[TEXT] = $text; 354} 355 356# Divide a Perl module into blocks. This code divides a module based on 357# lines that use/no a Module::Compile subclass. 358sub pmc_parse_blocks { 359 my $class = shift; 360 my $data = shift; 361 my @blocks = (); 362 my @classes = (); 363 my $context = {}; 364 my $text = ''; 365 my @parts = split /^([^\S\n]*(?:use|no)[^\S\n]+[\w\:\']+[^\n]*\n)/m, $data; 366 for my $part (@parts) { 367 if ($part =~ /^[^\S\n]*(use|no)[^\S\n]+([\w\:\']+)[^\n]*\n/) { 368 my ($use, $klass, $file) = ($1, $2, $2); 369 $file =~ s{(?:::|')}{/}g; 370 if ($klass =~ /^\d+$/) { 371 $text .= $part; 372 next; 373 } 374 { 375 local $@; 376 eval { require "$file.pm" }; 377 die $@ if $@ and "$@" !~ /^Can't locate /; 378 } 379 if ($klass->can('pmc_is_compiler_module') and 380 $klass->pmc_is_compiler_module) { 381 push @blocks, [$text, {%$context}, [@classes]]; 382 $text = ''; 383 @classes = grep {$_ ne $klass} @classes; 384 if (($use eq 'use') xor $klass->pmc_use_means_no) { 385 push @classes, $klass; 386 $context->{$klass} = {%{$context->{$klass} || {}}}; 387 $context->{$klass}{use} = $part; 388 if ($klass->pmc_use_means_now) { 389 push @blocks, ['', {%$context}, [@classes]]; 390 @classes = grep {$_ ne $klass} @classes; 391 delete $context->{$klass}; 392 } 393 } 394 else { 395 delete $context->{$klass}; 396 } 397 } 398 else { 399 $text .= $part; 400 } 401 } 402 else { 403 $text .= $part; 404 } 405 } 406 push @blocks, [$text, {%$context}, [@classes]] 407 if length $text; 408 return @blocks; 409} 410 411# Compile/Filter some source code into something else. This is almost 412# always overridden in a subclass. 413sub pmc_compile { 414 my ($class, $source_code_string, $context_hashref) = @_; 415 return $source_code_string; 416} 417 418# Regexp fragments for matching heredoc, pod section, comment block and 419# data section. 420my $re_here = qr/ 421(?: # Heredoc starting line 422 ^ # Start of some line 423 ((?-s:.*?)) # $2 - text before heredoc marker 424 <<(?!=) # heredoc marker 425 [\t\x20]* # whitespace between marker and quote 426 ((?>['"]?)) # $3 - possible left quote 427 ([\w\-\.]*) # $4 - heredoc terminator 428 (\3 # $5 - possible right quote 429 (?-s:.*\n)) # and rest of the line 430 (.*?\n) # $6 - Heredoc content 431 (?<!\n[0-9a-fA-F]{40}\n) # Not another digest 432 (\4\n) # $7 - Heredoc terminating line 433) 434/xsm; 435 436my $re_pod = qr/ 437(?: 438 (?-s:^=(?!cut\b)\w+.*\n) # Pod starter line 439 .*? # Pod lines 440 (?:(?-s:^=cut\b.*\n)|\z) # Pod terminator 441) 442/xsm; 443 444my $re_comment = qr/ 445(?: 446 (?m-s:^[^\S\n]*\#.*\n)+ # one or more comment lines 447) 448/xsm; 449 450my $re_data = qr/ 451(?: 452 ^(?:__END__|__DATA__)\n # DATA starter line 453 .* # Rest of lines 454) 455/xsm; 456 457# Fold each heredoc, pod section, comment block and data section, each 458# into a single line containing a digest of the original content. 459# 460# This makes further dividing of Perl code less troublesome. 461sub pmc_fold_blocks { 462 my ($class, $source) = @_; 463 464 $source =~ s/(~{3,})/$1~/g; 465 $source =~ s/(^'{3,})/$1'/gm; 466 $source =~ s/(^`{3,})/$1`/gm; 467 $source =~ s/(^={3,})/$1=/gm; 468 469 while (1) { 470 no warnings; 471 $source =~ s/ 472 ( 473 $re_pod | 474 $re_comment | 475 $re_here | 476 $re_data 477 ) 478 / 479 my $result = $1; 480 $result =~ m{\A($re_data)} ? $class->pmc_fold_data() : 481 $result =~ m{\A($re_pod)} ? $class->pmc_fold_pod() : 482 $result =~ m{\A($re_comment)} ? $class->pmc_fold_comment() : 483 $result =~ m{\A($re_here)} ? $class->pmc_fold_here() : 484 die "'$result' didn't match '$re_comment'"; 485 /ex or last; 486 } 487 488 $source =~ s/(?<!~)~~~(?!~)/<</g; 489 $source =~ s/^'''(?!') /__DATA__\n/gm; 490 $source =~ s/^```(?!`)/#/gm; 491 $source =~ s/^===(?!=)/=/gm; 492 493 $source =~ s/^(={3,})=/$1/gm; 494 $source =~ s/^('{3,})'/$1/gm; 495 $source =~ s/^(`{3,})`/$1/gm; 496 $source =~ s/(~{3,})~/$1/g; 497 498 return $source; 499} 500 501sub pmc_unfold_blocks { 502 my ($class, $source) = @_; 503 504 $source =~ s/ 505 ( 506 ^__DATA__\n[0-9a-fA-F]{40}\n 507 | 508 ^=pod\s[0-9a-fA-F]{40}\n=cut\n 509 ) 510 / 511 my $match = $1; 512 $match =~ s!.*?([0-9a-fA-F]{40}).*!$1!s or die; 513 $digest_map->{$match} 514 /xmeg; 515 516 return $source; 517} 518 519# Fold a heredoc's content but don't fold other heredocs from the 520# same line. 521sub pmc_fold_here { 522 my $class = shift; 523 my $result = "$2~~~$3$4$5"; 524 my $preface = ''; 525 my $text = $6; 526 my $stop = $7; 527 while (1) { 528 if ($text =~ s!^(([0-9a-fA-F]{40})\n.*\n)!!) { 529 if (defined $digest_map->{$2}) { 530 $preface .= $1; 531 next; 532 } 533 else { 534 $text = $1 . $text; 535 last; 536 } 537 } 538 last; 539 } 540 my $digest = $class->pmc_fold($text); 541 $result = "$result$preface$digest\n$stop"; 542 $result; 543} 544 545sub pmc_fold_pod { 546 my $class = shift; 547 my $text = $1; 548 my $digest = $class->pmc_fold($text); 549 return qq{===pod $digest\n===cut\n}; 550} 551 552sub pmc_fold_comment { 553 my $class = shift; 554 my $text = $1; 555 my $digest = $class->pmc_fold($text); 556 return qq{``` $digest\n}; 557} 558 559sub pmc_fold_data { 560 my $class = shift; 561 my $text = $1; 562 my $digest = $class->pmc_fold($text); 563 return qq{''' $digest\n}; 564} 565 566# Fold a piece of code into a unique string. 567sub pmc_fold { 568 require Digest::SHA1; 569 my ($class, $text) = @_; 570 my $digest = Digest::SHA1::sha1_hex($text); 571 $digest_map->{$digest} = $text; 572 return $digest; 573} 574 575# Expand folded code into original content. 576sub pmc_unfold { 577 my ($class, $digest) = @_; 578 return $digest_map->{$digest}; 579} 580 5811; 582 583=head1 SYNOPSIS 584 585 package Foo; 586 use Module::Compile -base; 587 588 sub pmc_compile { 589 my ($class, $source) = @_; 590 # Convert $source into (most likely Perl 5) $compiled_output 591 return $compiled_output; 592 } 593 594In F<Bar.pm>: 595 596 package Bar; 597 598 use Foo; 599 ... 600 no Foo 601 602or (implied "no Foo;"): 603 604 package Bar; 605 606 { 607 use Foo; 608 ... 609 } 610 611To compile F<Bar.pm> into F<Bar.pmc>: 612 613 perl -c Bar.pm 614 615=head1 DESCRIPTION 616 617This module provides a system for writing modules that I<compile> other 618Perl modules. 619 620Modules that use these compilation modules get compiled into some 621altered form the first time they are run. The result is cached into 622C<.pmc> files. 623 624Perl has native support for C<.pmc> files. It always checks for them, before 625loading a C<.pm> file. 626 627=head1 EXAMPLE 628 629You can declare a C<v6.pm> compiler with: 630 631 package v6; 632 use Module::Compile -base; 633 634 sub pmc_compile { 635 my ($class, $source) = @_; 636 # ... some way to invoke pugs and give p5 code back ... 637 } 638 639and use it like: 640 641 # MyModule.pm 642 use v6-pugs; 643 module MyModule; 644 # ...some p6 code here... 645 no v6; 646 # ...back to p5 land... 647 648On the first time this module is loaded, it will compile Perl 6 649blocks into Perl 5 (as soon as the C<no v6> line is seen), and 650merge it with the Perl 5 blocks, saving the result into a 651F<MyModule.pmc> file. 652 653The next time around, Perl 5 will automatically load F<MyModule.pmc> 654when someone says C<use MyModule>. On the other hand, Perl 6 can run 655MyModule.pm s a Perl 6 module just fine, as C<use v6-pugs> and C<no v6> 656both works in a Perl 6 setting. 657 658The B<v6.pm> module will also check if F<MyModule.pmc> is up to date. If 659it is, then it will touch its timestamp so the C<.pmc> is loaded on the 660next time. 661 662=head1 BENEFITS 663 664Module::Compile compilers gives you the following benefits: 665 666=over 667 668=item * 669 670Ability to mix many source filterish modules in a much more sane manner. 671Module::Compile controls the compilation process, calling each compiler 672at the right time with the right data. 673 674=item * 675 676Ability to ship precompiled modules without shipping Module::Compile and 677the compiler modules themselves. 678 679=item * 680 681Easier debugging of compiled/filtered code. The C<.pmc> has the real 682code you want to see. 683 684=item * 685 686Zero additional runtime penalty after compilation, because C<perl> has 687already been doing the C<.pmc> check on every module load since 1999! 688 689=back 690 691=head1 PARSING AND DISPATCH 692 693NOTE: *** NOT FULLY IMPLEMENTED YET *** 694 695Module::Compile attempts to make source filtering a sane process, by 696parsing up your module's source code into various blocks; so that by the 697time a compiler is called it only gets the source code that it should be 698looking at. 699 700This section describes the rather complex algorithm that 701Module::Compile uses. 702 703First, the source module is preprocessed to hide heredocs, since the content 704inside heredocs can possibly confuse further parsing. 705 706Next, the source module is divided into a shallow tree of blocks: 707 708 PREAMBLE: 709 (SUBROUTINE | BAREBLOCK | POD | PLAIN)S 710 PACKAGES: 711 PREFACE 712 (SUBROUTINE | BAREBLOCK | POD | PLAIN)S 713 DATA 714 715All of these blocks begin and end on line boundaries. They are described 716as follows: 717 718 PREAMBLE - Lines before the first C<package> statement. 719 PACKAGES - Lines beginning with a C<package statement and continuing 720 until the next C<package> or C<DATA> section. 721 DATA - The DATA section. Begins with the line C<__DATA__> or 722 C<__END__>. 723 SUBROUTINE - A top level (not nested) subroutine. Ending '}' must be 724 on its own line in the first column. 725 BAREBLOCK - A top level (not nested) code block. Ending '}' must be 726 on its own line in the first column. 727 POD - Pod sections beginning with C<^=\w+> and ending with C<=cut>. 728 PLAIN - Lines not in SUBROUTINE, BAREBLOCK or POD. 729 PREFACE - Lines before the first block in a package. 730 731Next, all the blocks are scanned for lines like: 732 733 use Foo qw'x y z'; 734 no Foo; 735 736Where Foo is a Module::Compile subclass. 737 738The lines within a given block between a C<use> and C<no> statement 739are marked to be passed to that compiler. The end of an inner block 740effectively acts as a C<no> statement for any compile sections in 741that block. C<use> statements in a PREFACE apply to all the code in 742a PACKAGE. C<use> statements in a PREAMBLE apply to all the code in 743all PACKAGES. 744 745After all the code has been parsed into blocks and the blocks have been 746marked for various compilers, Module::Compile dispatches the code blocks 747to the compilers. It does so in a most specific to most general order. 748So inner blocks get compiled first, then outer blocks. 749 750A compiler may choose to declare that its result not be recompiled by 751some other containing parser. In this case the result of the compilation 752is replaced by a single line containing the hexadecimal digest of the 753result in double quotes followed by a semicolon. Like: 754 755 "f1d2d2f924e986ac86fdf7b36c94bcdf32beec15"; 756 757The rationale of this is that randoms strings are usally left alone by 758compilers. After all the compilers have finished, the digest lines will 759be expanded again. 760 761Every bit of the default process described above is overridable by 762various methods. 763 764=head1 DISTRIBUTION SUPPORT 765 766Module::Install makes it terribly easy to prepare a module distribution 767with compiled .pmc files. Module::Compile installs a 768Module::Install::PMC plugin. All you need to do is add this line to your 769Makefile.PL: 770 771 pmc_support; 772 773Any of your distrbution's modules that use Module::Compile based modules 774will automatically be compiled into .pmc files and shipped with your 775distribtution precompiled. This means that people who install your 776module distribtution do not need to have the compilers installed 777themselves. So you don't need to make the compiler modules be 778prerequisites. 779 780=head1 SEE ALSO 781 782Module::Install 783