1#line 1 2# 3############################################################ 4# 5# perltidy - a perl script indenter and formatter 6# 7# Copyright (c) 2000-2009 by Steve Hancock 8# Distributed under the GPL license agreement; see file COPYING 9# 10# This program is free software; you can redistribute it and/or modify 11# it under the terms of the GNU General Public License as published by 12# the Free Software Foundation; either version 2 of the License, or 13# (at your option) any later version. 14# 15# This program is distributed in the hope that it will be useful, 16# but WITHOUT ANY WARRANTY; without even the implied warranty of 17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18# GNU General Public License for more details. 19# 20# You should have received a copy of the GNU General Public License 21# along with this program; if not, write to the Free Software 22# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 23# 24# For brief instructions instructions, try 'perltidy -h'. 25# For more complete documentation, try 'man perltidy' 26# or visit http://perltidy.sourceforge.net 27# 28# This script is an example of the default style. It was formatted with: 29# 30# perltidy Tidy.pm 31# 32# Code Contributions: See ChangeLog.html for a complete history. 33# Michael Cartmell supplied code for adaptation to VMS and helped with 34# v-strings. 35# Hugh S. Myers supplied sub streamhandle and the supporting code to 36# create a Perl::Tidy module which can operate on strings, arrays, etc. 37# Yves Orton supplied coding to help detect Windows versions. 38# Axel Rose supplied a patch for MacPerl. 39# Sebastien Aperghis-Tramoni supplied a patch for the defined or operator. 40# Dan Tyrell contributed a patch for binary I/O. 41# Ueli Hugenschmidt contributed a patch for -fpsc 42# Sam Kington supplied a patch to identify the initial indentation of 43# entabbed code. 44# jonathan swartz supplied patches for: 45# * .../ pattern, which looks upwards from directory 46# * --notidy, to be used in directories where we want to avoid 47# accidentally tidying 48# * prefilter and postfilter 49# * iterations option 50# 51# Many others have supplied key ideas, suggestions, and bug reports; 52# see the CHANGES file. 53# 54############################################################ 55 56package Perl::Tidy; 57use 5.004; # need IO::File from 5.004 or later 58BEGIN { $^W = 1; } # turn on warnings 59 60use strict; 61use Exporter; 62use Carp; 63$|++; 64 65use vars qw{ 66 $VERSION 67 @ISA 68 @EXPORT 69 $missing_file_spec 70}; 71 72@ISA = qw( Exporter ); 73@EXPORT = qw( &perltidy ); 74 75use Cwd; 76use IO::File; 77use File::Basename; 78 79BEGIN { 80 ( $VERSION = q($Id: Tidy.pm,v 1.74 2010/12/17 13:56:49 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker 81} 82 83sub streamhandle { 84 85 # given filename and mode (r or w), create an object which: 86 # has a 'getline' method if mode='r', and 87 # has a 'print' method if mode='w'. 88 # The objects also need a 'close' method. 89 # 90 # How the object is made: 91 # 92 # if $filename is: Make object using: 93 # ---------------- ----------------- 94 # '-' (STDIN if mode = 'r', STDOUT if mode='w') 95 # string IO::File 96 # ARRAY ref Perl::Tidy::IOScalarArray (formerly IO::ScalarArray) 97 # STRING ref Perl::Tidy::IOScalar (formerly IO::Scalar) 98 # object object 99 # (check for 'print' method for 'w' mode) 100 # (check for 'getline' method for 'r' mode) 101 my $ref = ref( my $filename = shift ); 102 my $mode = shift; 103 my $New; 104 my $fh; 105 106 # handle a reference 107 if ($ref) { 108 if ( $ref eq 'ARRAY' ) { 109 $New = sub { Perl::Tidy::IOScalarArray->new(@_) }; 110 } 111 elsif ( $ref eq 'SCALAR' ) { 112 $New = sub { Perl::Tidy::IOScalar->new(@_) }; 113 } 114 else { 115 116 # Accept an object with a getline method for reading. Note: 117 # IO::File is built-in and does not respond to the defined 118 # operator. If this causes trouble, the check can be 119 # skipped and we can just let it crash if there is no 120 # getline. 121 if ( $mode =~ /[rR]/ ) { 122 if ( $ref eq 'IO::File' || defined &{ $ref . "::getline" } ) { 123 $New = sub { $filename }; 124 } 125 else { 126 $New = sub { undef }; 127 confess <<EOM; 128------------------------------------------------------------------------ 129No 'getline' method is defined for object of class $ref 130Please check your call to Perl::Tidy::perltidy. Trace follows. 131------------------------------------------------------------------------ 132EOM 133 } 134 } 135 136 # Accept an object with a print method for writing. 137 # See note above about IO::File 138 if ( $mode =~ /[wW]/ ) { 139 if ( $ref eq 'IO::File' || defined &{ $ref . "::print" } ) { 140 $New = sub { $filename }; 141 } 142 else { 143 $New = sub { undef }; 144 confess <<EOM; 145------------------------------------------------------------------------ 146No 'print' method is defined for object of class $ref 147Please check your call to Perl::Tidy::perltidy. Trace follows. 148------------------------------------------------------------------------ 149EOM 150 } 151 } 152 } 153 } 154 155 # handle a string 156 else { 157 if ( $filename eq '-' ) { 158 $New = sub { $mode eq 'w' ? *STDOUT : *STDIN } 159 } 160 else { 161 $New = sub { IO::File->new(@_) }; 162 } 163 } 164 $fh = $New->( $filename, $mode ) 165 or warn "Couldn't open file:$filename in mode:$mode : $!\n"; 166 return $fh, ( $ref or $filename ); 167} 168 169sub find_input_line_ending { 170 171 # Peek at a file and return first line ending character. 172 # Quietly return undef in case of any trouble. 173 my ($input_file) = @_; 174 my $ending; 175 176 # silently ignore input from object or stdin 177 if ( ref($input_file) || $input_file eq '-' ) { 178 return $ending; 179 } 180 open( INFILE, $input_file ) || return $ending; 181 182 binmode INFILE; 183 my $buf; 184 read( INFILE, $buf, 1024 ); 185 close INFILE; 186 if ( $buf && $buf =~ /([\012\015]+)/ ) { 187 my $test = $1; 188 189 # dos 190 if ( $test =~ /^(\015\012)+$/ ) { $ending = "\015\012" } 191 192 # mac 193 elsif ( $test =~ /^\015+$/ ) { $ending = "\015" } 194 195 # unix 196 elsif ( $test =~ /^\012+$/ ) { $ending = "\012" } 197 198 # unknown 199 else { } 200 } 201 202 # no ending seen 203 else { } 204 205 return $ending; 206} 207 208sub catfile { 209 210 # concatenate a path and file basename 211 # returns undef in case of error 212 213 BEGIN { eval "require File::Spec"; $missing_file_spec = $@; } 214 215 # use File::Spec if we can 216 unless ($missing_file_spec) { 217 return File::Spec->catfile(@_); 218 } 219 220 # Perl 5.004 systems may not have File::Spec so we'll make 221 # a simple try. We assume File::Basename is available. 222 # return undef if not successful. 223 my $name = pop @_; 224 my $path = join '/', @_; 225 my $test_file = $path . $name; 226 my ( $test_name, $test_path ) = fileparse($test_file); 227 return $test_file if ( $test_name eq $name ); 228 return undef if ( $^O eq 'VMS' ); 229 230 # this should work at least for Windows and Unix: 231 $test_file = $path . '/' . $name; 232 ( $test_name, $test_path ) = fileparse($test_file); 233 return $test_file if ( $test_name eq $name ); 234 return undef; 235} 236 237sub make_temporary_filename { 238 239 # Make a temporary filename. 240 # 241 # The POSIX tmpnam() function tends to be unreliable for non-unix 242 # systems (at least for the win32 systems that I've tested), so use 243 # a pre-defined name. A slight disadvantage of this is that two 244 # perltidy runs in the same working directory may conflict. 245 # However, the chance of that is small and managable by the user. 246 # An alternative would be to check for the file's existance and use, 247 # say .TMP0, .TMP1, etc, but that scheme has its own problems. So, 248 # keep it simple. 249 my $name = "perltidy.TMP"; 250 if ( $^O =~ /win32|dos/i || $^O eq 'VMS' || $^O eq 'MacOs' ) { 251 return $name; 252 } 253 eval "use POSIX qw(tmpnam)"; 254 if ($@) { return $name } 255 use IO::File; 256 257 # just make a couple of tries before giving up and using the default 258 for ( 0 .. 1 ) { 259 my $tmpname = tmpnam(); 260 my $fh = IO::File->new( $tmpname, O_RDWR | O_CREAT | O_EXCL ); 261 if ($fh) { 262 $fh->close(); 263 return ($tmpname); 264 last; 265 } 266 } 267 return ($name); 268} 269 270# Here is a map of the flow of data from the input source to the output 271# line sink: 272# 273# LineSource-->Tokenizer-->Formatter-->VerticalAligner-->FileWriter--> 274# input groups output 275# lines tokens lines of lines lines 276# lines 277# 278# The names correspond to the package names responsible for the unit processes. 279# 280# The overall process is controlled by the "main" package. 281# 282# LineSource is the stream of input lines 283# 284# Tokenizer analyzes a line and breaks it into tokens, peeking ahead 285# if necessary. A token is any section of the input line which should be 286# manipulated as a single entity during formatting. For example, a single 287# ',' character is a token, and so is an entire side comment. It handles 288# the complexities of Perl syntax, such as distinguishing between '<<' as 289# a shift operator and as a here-document, or distinguishing between '/' 290# as a divide symbol and as a pattern delimiter. 291# 292# Formatter inserts and deletes whitespace between tokens, and breaks 293# sequences of tokens at appropriate points as output lines. It bases its 294# decisions on the default rules as modified by any command-line options. 295# 296# VerticalAligner collects groups of lines together and tries to line up 297# certain tokens, such as '=>', '#', and '=' by adding whitespace. 298# 299# FileWriter simply writes lines to the output stream. 300# 301# The Logger package, not shown, records significant events and warning 302# messages. It writes a .LOG file, which may be saved with a 303# '-log' or a '-g' flag. 304 305{ 306 307 # variables needed by interrupt handler: 308 my $tokenizer; 309 my $input_file; 310 311 # this routine may be called to give a status report if interrupted. If a 312 # parameter is given, it will call exit with that parameter. This is no 313 # longer used because it works under Unix but not under Windows. 314 sub interrupt_handler { 315 316 my $exit_flag = shift; 317 print STDERR "perltidy interrupted"; 318 if ($tokenizer) { 319 my $input_line_number = 320 Perl::Tidy::Tokenizer::get_input_line_number(); 321 print STDERR " at line $input_line_number"; 322 } 323 if ($input_file) { 324 325 if ( ref $input_file ) { print STDERR " of reference to:" } 326 else { print STDERR " of file:" } 327 print STDERR " $input_file"; 328 } 329 print STDERR "\n"; 330 exit $exit_flag if defined($exit_flag); 331 } 332 333 sub perltidy { 334 335 my %defaults = ( 336 argv => undef, 337 destination => undef, 338 formatter => undef, 339 logfile => undef, 340 errorfile => undef, 341 perltidyrc => undef, 342 source => undef, 343 stderr => undef, 344 dump_options => undef, 345 dump_options_type => undef, 346 dump_getopt_flags => undef, 347 dump_options_category => undef, 348 dump_options_range => undef, 349 dump_abbreviations => undef, 350 prefilter => undef, 351 postfilter => undef, 352 ); 353 354 # don't overwrite callers ARGV 355 local @ARGV = @ARGV; 356 357 my %input_hash = @_; 358 359 if ( my @bad_keys = grep { !exists $defaults{$_} } keys %input_hash ) { 360 local $" = ')('; 361 my @good_keys = sort keys %defaults; 362 @bad_keys = sort @bad_keys; 363 confess <<EOM; 364------------------------------------------------------------------------ 365Unknown perltidy parameter : (@bad_keys) 366perltidy only understands : (@good_keys) 367------------------------------------------------------------------------ 368 369EOM 370 } 371 372 my $get_hash_ref = sub { 373 my ($key) = @_; 374 my $hash_ref = $input_hash{$key}; 375 if ( defined($hash_ref) ) { 376 unless ( ref($hash_ref) eq 'HASH' ) { 377 my $what = ref($hash_ref); 378 my $but_is = 379 $what ? "but is ref to $what" : "but is not a reference"; 380 croak <<EOM; 381------------------------------------------------------------------------ 382error in call to perltidy: 383-$key must be reference to HASH $but_is 384------------------------------------------------------------------------ 385EOM 386 } 387 } 388 return $hash_ref; 389 }; 390 391 %input_hash = ( %defaults, %input_hash ); 392 my $argv = $input_hash{'argv'}; 393 my $destination_stream = $input_hash{'destination'}; 394 my $errorfile_stream = $input_hash{'errorfile'}; 395 my $logfile_stream = $input_hash{'logfile'}; 396 my $perltidyrc_stream = $input_hash{'perltidyrc'}; 397 my $source_stream = $input_hash{'source'}; 398 my $stderr_stream = $input_hash{'stderr'}; 399 my $user_formatter = $input_hash{'formatter'}; 400 my $prefilter = $input_hash{'prefilter'}; 401 my $postfilter = $input_hash{'postfilter'}; 402 403 # various dump parameters 404 my $dump_options_type = $input_hash{'dump_options_type'}; 405 my $dump_options = $get_hash_ref->('dump_options'); 406 my $dump_getopt_flags = $get_hash_ref->('dump_getopt_flags'); 407 my $dump_options_category = $get_hash_ref->('dump_options_category'); 408 my $dump_abbreviations = $get_hash_ref->('dump_abbreviations'); 409 my $dump_options_range = $get_hash_ref->('dump_options_range'); 410 411 # validate dump_options_type 412 if ( defined($dump_options) ) { 413 unless ( defined($dump_options_type) ) { 414 $dump_options_type = 'perltidyrc'; 415 } 416 unless ( $dump_options_type =~ /^(perltidyrc|full)$/ ) { 417 croak <<EOM; 418------------------------------------------------------------------------ 419Please check value of -dump_options_type in call to perltidy; 420saw: '$dump_options_type' 421expecting: 'perltidyrc' or 'full' 422------------------------------------------------------------------------ 423EOM 424 425 } 426 } 427 else { 428 $dump_options_type = ""; 429 } 430 431 if ($user_formatter) { 432 433 # if the user defines a formatter, there is no output stream, 434 # but we need a null stream to keep coding simple 435 $destination_stream = Perl::Tidy::DevNull->new(); 436 } 437 438 # see if ARGV is overridden 439 if ( defined($argv) ) { 440 441 my $rargv = ref $argv; 442 if ( $rargv eq 'SCALAR' ) { $argv = $$argv; $rargv = undef } 443 444 # ref to ARRAY 445 if ($rargv) { 446 if ( $rargv eq 'ARRAY' ) { 447 @ARGV = @$argv; 448 } 449 else { 450 croak <<EOM; 451------------------------------------------------------------------------ 452Please check value of -argv in call to perltidy; 453it must be a string or ref to ARRAY but is: $rargv 454------------------------------------------------------------------------ 455EOM 456 } 457 } 458 459 # string 460 else { 461 my ( $rargv, $msg ) = parse_args($argv); 462 if ($msg) { 463 die <<EOM; 464Error parsing this string passed to to perltidy with 'argv': 465$msg 466EOM 467 } 468 @ARGV = @{$rargv}; 469 } 470 } 471 472 # redirect STDERR if requested 473 if ($stderr_stream) { 474 my ( $fh_stderr, $stderr_file ) = 475 Perl::Tidy::streamhandle( $stderr_stream, 'w' ); 476 if ($fh_stderr) { *STDERR = $fh_stderr } 477 else { 478 croak <<EOM; 479------------------------------------------------------------------------ 480Unable to redirect STDERR to $stderr_stream 481Please check value of -stderr in call to perltidy 482------------------------------------------------------------------------ 483EOM 484 } 485 } 486 487 my $rpending_complaint; 488 $$rpending_complaint = ""; 489 my $rpending_logfile_message; 490 $$rpending_logfile_message = ""; 491 492 my ( $is_Windows, $Windows_type ) = 493 look_for_Windows($rpending_complaint); 494 495 # VMS file names are restricted to a 40.40 format, so we append _tdy 496 # instead of .tdy, etc. (but see also sub check_vms_filename) 497 my $dot; 498 my $dot_pattern; 499 if ( $^O eq 'VMS' ) { 500 $dot = '_'; 501 $dot_pattern = '_'; 502 } 503 else { 504 $dot = '.'; 505 $dot_pattern = '\.'; # must escape for use in regex 506 } 507 508 # handle command line options 509 my ( $rOpts, $config_file, $rraw_options, $saw_extrude, $roption_string, 510 $rexpansion, $roption_category, $roption_range ) 511 = process_command_line( 512 $perltidyrc_stream, $is_Windows, $Windows_type, 513 $rpending_complaint, $dump_options_type, 514 ); 515 516 # return or exit immediately after all dumps 517 my $quit_now = 0; 518 519 # Getopt parameters and their flags 520 if ( defined($dump_getopt_flags) ) { 521 $quit_now = 1; 522 foreach my $op ( @{$roption_string} ) { 523 my $opt = $op; 524 my $flag = ""; 525 526 # Examples: 527 # some-option=s 528 # some-option=i 529 # some-option:i 530 # some-option! 531 if ( $opt =~ /(.*)(!|=.*|:.*)$/ ) { 532 $opt = $1; 533 $flag = $2; 534 } 535 $dump_getopt_flags->{$opt} = $flag; 536 } 537 } 538 539 if ( defined($dump_options_category) ) { 540 $quit_now = 1; 541 %{$dump_options_category} = %{$roption_category}; 542 } 543 544 if ( defined($dump_options_range) ) { 545 $quit_now = 1; 546 %{$dump_options_range} = %{$roption_range}; 547 } 548 549 if ( defined($dump_abbreviations) ) { 550 $quit_now = 1; 551 %{$dump_abbreviations} = %{$rexpansion}; 552 } 553 554 if ( defined($dump_options) ) { 555 $quit_now = 1; 556 %{$dump_options} = %{$rOpts}; 557 } 558 559 return if ($quit_now); 560 561 # make printable string of options for this run as possible diagnostic 562 my $readable_options = readable_options( $rOpts, $roption_string ); 563 564 # dump from command line 565 if ( $rOpts->{'dump-options'} ) { 566 print STDOUT $readable_options; 567 exit 1; 568 } 569 570 check_options( $rOpts, $is_Windows, $Windows_type, 571 $rpending_complaint ); 572 573 if ($user_formatter) { 574 $rOpts->{'format'} = 'user'; 575 } 576 577 # there must be one entry here for every possible format 578 my %default_file_extension = ( 579 tidy => 'tdy', 580 html => 'html', 581 user => '', 582 ); 583 584 # be sure we have a valid output format 585 unless ( exists $default_file_extension{ $rOpts->{'format'} } ) { 586 my $formats = join ' ', 587 sort map { "'" . $_ . "'" } keys %default_file_extension; 588 my $fmt = $rOpts->{'format'}; 589 die "-format='$fmt' but must be one of: $formats\n"; 590 } 591 592 my $output_extension = 593 make_extension( $rOpts->{'output-file-extension'}, 594 $default_file_extension{ $rOpts->{'format'} }, $dot ); 595 596 my $backup_extension = 597 make_extension( $rOpts->{'backup-file-extension'}, 'bak', $dot ); 598 599 my $html_toc_extension = 600 make_extension( $rOpts->{'html-toc-extension'}, 'toc', $dot ); 601 602 my $html_src_extension = 603 make_extension( $rOpts->{'html-src-extension'}, 'src', $dot ); 604 605 # check for -b option; 606 my $in_place_modify = $rOpts->{'backup-and-modify-in-place'} 607 && $rOpts->{'format'} eq 'tidy' # silently ignore unless beautify mode 608 && @ARGV > 0; # silently ignore if standard input; 609 # this allows -b to be in a .perltidyrc file 610 # without error messages when running from an editor 611 612 # turn off -b with warnings in case of conflicts with other options 613 if ($in_place_modify) { 614 if ( $rOpts->{'standard-output'} ) { 615 warn "Ignoring -b; you may not use -b and -st together\n"; 616 $in_place_modify = 0; 617 } 618 if ($destination_stream) { 619 warn 620"Ignoring -b; you may not specify a destination array and -b together\n"; 621 $in_place_modify = 0; 622 } 623 if ($source_stream) { 624 warn 625"Ignoring -b; you may not specify a source array and -b together\n"; 626 $in_place_modify = 0; 627 } 628 if ( $rOpts->{'outfile'} ) { 629 warn "Ignoring -b; you may not use -b and -o together\n"; 630 $in_place_modify = 0; 631 } 632 if ( defined( $rOpts->{'output-path'} ) ) { 633 warn "Ignoring -b; you may not use -b and -opath together\n"; 634 $in_place_modify = 0; 635 } 636 } 637 638 Perl::Tidy::Formatter::check_options($rOpts); 639 if ( $rOpts->{'format'} eq 'html' ) { 640 Perl::Tidy::HtmlWriter->check_options($rOpts); 641 } 642 643 # make the pattern of file extensions that we shouldn't touch 644 my $forbidden_file_extensions = "(($dot_pattern)(LOG|DEBUG|ERR|TEE)"; 645 if ($output_extension) { 646 my $ext = quotemeta($output_extension); 647 $forbidden_file_extensions .= "|$ext"; 648 } 649 if ( $in_place_modify && $backup_extension ) { 650 my $ext = quotemeta($backup_extension); 651 $forbidden_file_extensions .= "|$ext"; 652 } 653 $forbidden_file_extensions .= ')$'; 654 655 # Create a diagnostics object if requested; 656 # This is only useful for code development 657 my $diagnostics_object = undef; 658 if ( $rOpts->{'DIAGNOSTICS'} ) { 659 $diagnostics_object = Perl::Tidy::Diagnostics->new(); 660 } 661 662 # no filenames should be given if input is from an array 663 if ($source_stream) { 664 if ( @ARGV > 0 ) { 665 die 666"You may not specify any filenames when a source array is given\n"; 667 } 668 669 # we'll stuff the source array into ARGV 670 unshift( @ARGV, $source_stream ); 671 672 # No special treatment for source stream which is a filename. 673 # This will enable checks for binary files and other bad stuff. 674 $source_stream = undef unless ref($source_stream); 675 } 676 677 # use stdin by default if no source array and no args 678 else { 679 unshift( @ARGV, '-' ) unless @ARGV; 680 } 681 682 # loop to process all files in argument list 683 my $number_of_files = @ARGV; 684 my $formatter = undef; 685 $tokenizer = undef; 686 while ( $input_file = shift @ARGV ) { 687 my $fileroot; 688 my $input_file_permissions; 689 690 #--------------------------------------------------------------- 691 # determine the input file name 692 #--------------------------------------------------------------- 693 if ($source_stream) { 694 $fileroot = "perltidy"; 695 } 696 elsif ( $input_file eq '-' ) { # '-' indicates input from STDIN 697 $fileroot = "perltidy"; # root name to use for .ERR, .LOG, etc 698 $in_place_modify = 0; 699 } 700 else { 701 $fileroot = $input_file; 702 unless ( -e $input_file ) { 703 704 # file doesn't exist - check for a file glob 705 if ( $input_file =~ /([\?\*\[\{])/ ) { 706 707 # Windows shell may not remove quotes, so do it 708 my $input_file = $input_file; 709 if ( $input_file =~ /^\'(.+)\'$/ ) { $input_file = $1 } 710 if ( $input_file =~ /^\"(.+)\"$/ ) { $input_file = $1 } 711 my $pattern = fileglob_to_re($input_file); 712 ##eval "/$pattern/"; 713 if ( !$@ && opendir( DIR, './' ) ) { 714 my @files = 715 grep { /$pattern/ && !-d $_ } readdir(DIR); 716 closedir(DIR); 717 if (@files) { 718 unshift @ARGV, @files; 719 next; 720 } 721 } 722 } 723 print "skipping file: '$input_file': no matches found\n"; 724 next; 725 } 726 727 unless ( -f $input_file ) { 728 print "skipping file: $input_file: not a regular file\n"; 729 next; 730 } 731 732 unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) { 733 print 734"skipping file: $input_file: Non-text (override with -f)\n"; 735 next; 736 } 737 738 # we should have a valid filename now 739 $fileroot = $input_file; 740 $input_file_permissions = ( stat $input_file )[2] & 07777; 741 742 if ( $^O eq 'VMS' ) { 743 ( $fileroot, $dot ) = check_vms_filename($fileroot); 744 } 745 746 # add option to change path here 747 if ( defined( $rOpts->{'output-path'} ) ) { 748 749 my ( $base, $old_path ) = fileparse($fileroot); 750 my $new_path = $rOpts->{'output-path'}; 751 unless ( -d $new_path ) { 752 unless ( mkdir $new_path, 0777 ) { 753 die "unable to create directory $new_path: $!\n"; 754 } 755 } 756 my $path = $new_path; 757 $fileroot = catfile( $path, $base ); 758 unless ($fileroot) { 759 die <<EOM; 760------------------------------------------------------------------------ 761Problem combining $new_path and $base to make a filename; check -opath 762------------------------------------------------------------------------ 763EOM 764 } 765 } 766 } 767 768 # Skip files with same extension as the output files because 769 # this can lead to a messy situation with files like 770 # script.tdy.tdy.tdy ... or worse problems ... when you 771 # rerun perltidy over and over with wildcard input. 772 if ( 773 !$source_stream 774 && ( $input_file =~ /$forbidden_file_extensions/o 775 || $input_file eq 'DIAGNOSTICS' ) 776 ) 777 { 778 print "skipping file: $input_file: wrong extension\n"; 779 next; 780 } 781 782 # the 'source_object' supplies a method to read the input file 783 my $source_object = 784 Perl::Tidy::LineSource->new( $input_file, $rOpts, 785 $rpending_logfile_message ); 786 next unless ($source_object); 787 788 # Prefilters and postfilters: The prefilter is a code reference 789 # that will be applied to the source before tidying, and the 790 # postfilter is a code reference to the result before outputting. 791 if ($prefilter) { 792 my $buf = ''; 793 while ( my $line = $source_object->get_line() ) { 794 $buf .= $line; 795 } 796 $buf = $prefilter->($buf); 797 798 $source_object = Perl::Tidy::LineSource->new( \$buf, $rOpts, 799 $rpending_logfile_message ); 800 } 801 802 # register this file name with the Diagnostics package 803 $diagnostics_object->set_input_file($input_file) 804 if $diagnostics_object; 805 806 #--------------------------------------------------------------- 807 # determine the output file name 808 #--------------------------------------------------------------- 809 my $output_file = undef; 810 my $actual_output_extension; 811 812 if ( $rOpts->{'outfile'} ) { 813 814 if ( $number_of_files <= 1 ) { 815 816 if ( $rOpts->{'standard-output'} ) { 817 die "You may not use -o and -st together\n"; 818 } 819 elsif ($destination_stream) { 820 die 821"You may not specify a destination array and -o together\n"; 822 } 823 elsif ( defined( $rOpts->{'output-path'} ) ) { 824 die "You may not specify -o and -opath together\n"; 825 } 826 elsif ( defined( $rOpts->{'output-file-extension'} ) ) { 827 die "You may not specify -o and -oext together\n"; 828 } 829 $output_file = $rOpts->{outfile}; 830 831 # make sure user gives a file name after -o 832 if ( $output_file =~ /^-/ ) { 833 die "You must specify a valid filename after -o\n"; 834 } 835 836 # do not overwrite input file with -o 837 if ( defined($input_file_permissions) 838 && ( $output_file eq $input_file ) ) 839 { 840 die 841 "Use 'perltidy -b $input_file' to modify in-place\n"; 842 } 843 } 844 else { 845 die "You may not use -o with more than one input file\n"; 846 } 847 } 848 elsif ( $rOpts->{'standard-output'} ) { 849 if ($destination_stream) { 850 die 851"You may not specify a destination array and -st together\n"; 852 } 853 $output_file = '-'; 854 855 if ( $number_of_files <= 1 ) { 856 } 857 else { 858 die "You may not use -st with more than one input file\n"; 859 } 860 } 861 elsif ($destination_stream) { 862 $output_file = $destination_stream; 863 } 864 elsif ($source_stream) { # source but no destination goes to stdout 865 $output_file = '-'; 866 } 867 elsif ( $input_file eq '-' ) { 868 $output_file = '-'; 869 } 870 else { 871 if ($in_place_modify) { 872 $output_file = IO::File->new_tmpfile() 873 or die "cannot open temp file for -b option: $!\n"; 874 } 875 else { 876 $actual_output_extension = $output_extension; 877 $output_file = $fileroot . $output_extension; 878 } 879 } 880 881 # the 'sink_object' knows how to write the output file 882 my $tee_file = $fileroot . $dot . "TEE"; 883 884 my $line_separator = $rOpts->{'output-line-ending'}; 885 if ( $rOpts->{'preserve-line-endings'} ) { 886 $line_separator = find_input_line_ending($input_file); 887 } 888 889 # Eventually all I/O may be done with binmode, but for now it is 890 # only done when a user requests a particular line separator 891 # through the -ple or -ole flags 892 my $binmode = 0; 893 if ( defined($line_separator) ) { $binmode = 1 } 894 else { $line_separator = "\n" } 895 896 my ( $sink_object, $postfilter_buffer ); 897 if ($postfilter) { 898 $sink_object = 899 Perl::Tidy::LineSink->new( \$postfilter_buffer, $tee_file, 900 $line_separator, $rOpts, $rpending_logfile_message, 901 $binmode ); 902 } 903 else { 904 $sink_object = 905 Perl::Tidy::LineSink->new( $output_file, $tee_file, 906 $line_separator, $rOpts, $rpending_logfile_message, 907 $binmode ); 908 } 909 910 #--------------------------------------------------------------- 911 # initialize the error logger 912 #--------------------------------------------------------------- 913 my $warning_file = $fileroot . $dot . "ERR"; 914 if ($errorfile_stream) { $warning_file = $errorfile_stream } 915 my $log_file = $fileroot . $dot . "LOG"; 916 if ($logfile_stream) { $log_file = $logfile_stream } 917 918 my $logger_object = 919 Perl::Tidy::Logger->new( $rOpts, $log_file, $warning_file, 920 $saw_extrude ); 921 write_logfile_header( 922 $rOpts, $logger_object, $config_file, 923 $rraw_options, $Windows_type, $readable_options, 924 ); 925 if ($$rpending_logfile_message) { 926 $logger_object->write_logfile_entry($$rpending_logfile_message); 927 } 928 if ($$rpending_complaint) { 929 $logger_object->complain($$rpending_complaint); 930 } 931 932 #--------------------------------------------------------------- 933 # initialize the debug object, if any 934 #--------------------------------------------------------------- 935 my $debugger_object = undef; 936 if ( $rOpts->{DEBUG} ) { 937 $debugger_object = 938 Perl::Tidy::Debugger->new( $fileroot . $dot . "DEBUG" ); 939 } 940 941 # loop over iterations 942 my $max_iterations = $rOpts->{'iterations'}; 943 my $sink_object_final = $sink_object; 944 for ( my $iter = 1 ; $iter <= $max_iterations ; $iter++ ) { 945 my $temp_buffer; 946 947 # local copies of some debugging objects which get deleted 948 # after first iteration, but will reappear after this loop 949 my $debugger_object = $debugger_object; 950 my $logger_object = $logger_object; 951 my $diagnostics_object = $diagnostics_object; 952 953 # output to temp buffer until last iteration 954 if ( $iter < $max_iterations ) { 955 $sink_object = 956 Perl::Tidy::LineSink->new( \$temp_buffer, $tee_file, 957 $line_separator, $rOpts, $rpending_logfile_message, 958 $binmode ); 959 } 960 else { 961 $sink_object = $sink_object_final; 962 963 # terminate some debugging output after first pass 964 # to avoid needless output. 965 $debugger_object = undef; 966 $logger_object = undef; 967 $diagnostics_object = undef; 968 } 969 970 #--------------------------------------------------------------- 971 # create a formatter for this file : html writer or pretty printer 972 #--------------------------------------------------------------- 973 974 # we have to delete any old formatter because, for safety, 975 # the formatter will check to see that there is only one. 976 $formatter = undef; 977 978 if ($user_formatter) { 979 $formatter = $user_formatter; 980 } 981 elsif ( $rOpts->{'format'} eq 'html' ) { 982 $formatter = 983 Perl::Tidy::HtmlWriter->new( $fileroot, $output_file, 984 $actual_output_extension, $html_toc_extension, 985 $html_src_extension ); 986 } 987 elsif ( $rOpts->{'format'} eq 'tidy' ) { 988 $formatter = Perl::Tidy::Formatter->new( 989 logger_object => $logger_object, 990 diagnostics_object => $diagnostics_object, 991 sink_object => $sink_object, 992 ); 993 } 994 else { 995 die "I don't know how to do -format=$rOpts->{'format'}\n"; 996 } 997 998 unless ($formatter) { 999 die 1000 "Unable to continue with $rOpts->{'format'} formatting\n"; 1001 } 1002 1003 #--------------------------------------------------------------- 1004 # create the tokenizer for this file 1005 #--------------------------------------------------------------- 1006 $tokenizer = undef; # must destroy old tokenizer 1007 $tokenizer = Perl::Tidy::Tokenizer->new( 1008 source_object => $source_object, 1009 logger_object => $logger_object, 1010 debugger_object => $debugger_object, 1011 diagnostics_object => $diagnostics_object, 1012 starting_level => $rOpts->{'starting-indentation-level'}, 1013 tabs => $rOpts->{'tabs'}, 1014 entab_leading_space => $rOpts->{'entab-leading-whitespace'}, 1015 indent_columns => $rOpts->{'indent-columns'}, 1016 look_for_hash_bang => $rOpts->{'look-for-hash-bang'}, 1017 look_for_autoloader => $rOpts->{'look-for-autoloader'}, 1018 look_for_selfloader => $rOpts->{'look-for-selfloader'}, 1019 trim_qw => $rOpts->{'trim-qw'}, 1020 ); 1021 1022 #--------------------------------------------------------------- 1023 # now we can do it 1024 #--------------------------------------------------------------- 1025 process_this_file( $tokenizer, $formatter ); 1026 1027 #--------------------------------------------------------------- 1028 # close the input source and report errors 1029 #--------------------------------------------------------------- 1030 $source_object->close_input_file(); 1031 1032 # line source for next iteration (if any) comes from the current 1033 # temporary buffer 1034 if ( $iter < $max_iterations ) { 1035 $source_object = 1036 Perl::Tidy::LineSource->new( \$temp_buffer, $rOpts, 1037 $rpending_logfile_message ); 1038 } 1039 1040 } # end loop over iterations 1041 1042 # get file names to use for syntax check 1043 my $ifname = $source_object->get_input_file_copy_name(); 1044 my $ofname = $sink_object->get_output_file_copy(); 1045 1046 #--------------------------------------------------------------- 1047 # handle the -b option (backup and modify in-place) 1048 #--------------------------------------------------------------- 1049 if ($in_place_modify) { 1050 unless ( -f $input_file ) { 1051 1052 # oh, oh, no real file to backup .. 1053 # shouldn't happen because of numerous preliminary checks 1054 die print 1055"problem with -b backing up input file '$input_file': not a file\n"; 1056 } 1057 my $backup_name = $input_file . $backup_extension; 1058 if ( -f $backup_name ) { 1059 unlink($backup_name) 1060 or die 1061"unable to remove previous '$backup_name' for -b option; check permissions: $!\n"; 1062 } 1063 rename( $input_file, $backup_name ) 1064 or die 1065"problem renaming $input_file to $backup_name for -b option: $!\n"; 1066 $ifname = $backup_name; 1067 1068 seek( $output_file, 0, 0 ) 1069 or die "unable to rewind tmp file for -b option: $!\n"; 1070 1071 my $fout = IO::File->new("> $input_file") 1072 or die 1073"problem opening $input_file for write for -b option; check directory permissions: $!\n"; 1074 binmode $fout; 1075 my $line; 1076 while ( $line = $output_file->getline() ) { 1077 $fout->print($line); 1078 } 1079 $fout->close(); 1080 $output_file = $input_file; 1081 $ofname = $input_file; 1082 } 1083 1084 #--------------------------------------------------------------- 1085 # clean up and report errors 1086 #--------------------------------------------------------------- 1087 $sink_object->close_output_file() if $sink_object; 1088 $debugger_object->close_debug_file() if $debugger_object; 1089 1090 if ($postfilter) { 1091 my $new_sink = 1092 Perl::Tidy::LineSink->new( $output_file, $tee_file, 1093 $line_separator, $rOpts, $rpending_logfile_message, 1094 $binmode ); 1095 my $buf = $postfilter->($postfilter_buffer); 1096 foreach my $line ( split( "\n", $buf ) ) { 1097 $new_sink->write_line($line); 1098 } 1099 } 1100 1101 my $infile_syntax_ok = 0; # -1 no 0=don't know 1 yes 1102 if ($output_file) { 1103 1104 if ($input_file_permissions) { 1105 1106 # give output script same permissions as input script, but 1107 # make it user-writable or else we can't run perltidy again. 1108 # Thus we retain whatever executable flags were set. 1109 if ( $rOpts->{'format'} eq 'tidy' ) { 1110 chmod( $input_file_permissions | 0600, $output_file ); 1111 } 1112 1113 # else use default permissions for html and any other format 1114 1115 } 1116 if ( $logger_object && $rOpts->{'check-syntax'} ) { 1117 $infile_syntax_ok = 1118 check_syntax( $ifname, $ofname, $logger_object, $rOpts ); 1119 } 1120 } 1121 1122 $logger_object->finish( $infile_syntax_ok, $formatter ) 1123 if $logger_object; 1124 } # end of loop to process all files 1125 } # end of main program 1126} 1127 1128sub fileglob_to_re { 1129 1130 # modified (corrected) from version in find2perl 1131 my $x = shift; 1132 $x =~ s#([./^\$()])#\\$1#g; # escape special characters 1133 $x =~ s#\*#.*#g; # '*' -> '.*' 1134 $x =~ s#\?#.#g; # '?' -> '.' 1135 "^$x\\z"; # match whole word 1136} 1137 1138sub make_extension { 1139 1140 # Make a file extension, including any leading '.' if necessary 1141 # The '.' may actually be an '_' under VMS 1142 my ( $extension, $default, $dot ) = @_; 1143 1144 # Use the default if none specified 1145 $extension = $default unless ($extension); 1146 1147 # Only extensions with these leading characters get a '.' 1148 # This rule gives the user some freedom 1149 if ( $extension =~ /^[a-zA-Z0-9]/ ) { 1150 $extension = $dot . $extension; 1151 } 1152 return $extension; 1153} 1154 1155sub write_logfile_header { 1156 my ( 1157 $rOpts, $logger_object, $config_file, 1158 $rraw_options, $Windows_type, $readable_options 1159 ) = @_; 1160 $logger_object->write_logfile_entry( 1161"perltidy version $VERSION log file on a $^O system, OLD_PERL_VERSION=$]\n" 1162 ); 1163 if ($Windows_type) { 1164 $logger_object->write_logfile_entry("Windows type is $Windows_type\n"); 1165 } 1166 my $options_string = join( ' ', @$rraw_options ); 1167 1168 if ($config_file) { 1169 $logger_object->write_logfile_entry( 1170 "Found Configuration File >>> $config_file \n"); 1171 } 1172 $logger_object->write_logfile_entry( 1173 "Configuration and command line parameters for this run:\n"); 1174 $logger_object->write_logfile_entry("$options_string\n"); 1175 1176 if ( $rOpts->{'DEBUG'} || $rOpts->{'show-options'} ) { 1177 $rOpts->{'logfile'} = 1; # force logfile to be saved 1178 $logger_object->write_logfile_entry( 1179 "Final parameter set for this run\n"); 1180 $logger_object->write_logfile_entry( 1181 "------------------------------------\n"); 1182 1183 $logger_object->write_logfile_entry($readable_options); 1184 1185 $logger_object->write_logfile_entry( 1186 "------------------------------------\n"); 1187 } 1188 $logger_object->write_logfile_entry( 1189 "To find error messages search for 'WARNING' with your editor\n"); 1190} 1191 1192sub generate_options { 1193 1194 ###################################################################### 1195 # Generate and return references to: 1196 # @option_string - the list of options to be passed to Getopt::Long 1197 # @defaults - the list of default options 1198 # %expansion - a hash showing how all abbreviations are expanded 1199 # %category - a hash giving the general category of each option 1200 # %option_range - a hash giving the valid ranges of certain options 1201 1202 # Note: a few options are not documented in the man page and usage 1203 # message. This is because these are experimental or debug options and 1204 # may or may not be retained in future versions. 1205 # 1206 # Here are the undocumented flags as far as I know. Any of them 1207 # may disappear at any time. They are mainly for fine-tuning 1208 # and debugging. 1209 # 1210 # fll --> fuzzy-line-length # a trivial parameter which gets 1211 # turned off for the extrude option 1212 # which is mainly for debugging 1213 # chk --> check-multiline-quotes # check for old bug; to be deleted 1214 # scl --> short-concatenation-item-length # helps break at '.' 1215 # recombine # for debugging line breaks 1216 # valign # for debugging vertical alignment 1217 # I --> DIAGNOSTICS # for debugging 1218 ###################################################################### 1219 1220 # here is a summary of the Getopt codes: 1221 # <none> does not take an argument 1222 # =s takes a mandatory string 1223 # :s takes an optional string (DO NOT USE - filenames will get eaten up) 1224 # =i takes a mandatory integer 1225 # :i takes an optional integer (NOT RECOMMENDED - can cause trouble) 1226 # ! does not take an argument and may be negated 1227 # i.e., -foo and -nofoo are allowed 1228 # a double dash signals the end of the options list 1229 # 1230 #--------------------------------------------------------------- 1231 # Define the option string passed to GetOptions. 1232 #--------------------------------------------------------------- 1233 1234 my @option_string = (); 1235 my %expansion = (); 1236 my %option_category = (); 1237 my %option_range = (); 1238 my $rexpansion = \%expansion; 1239 1240 # names of categories in manual 1241 # leading integers will allow sorting 1242 my @category_name = ( 1243 '0. I/O control', 1244 '1. Basic formatting options', 1245 '2. Code indentation control', 1246 '3. Whitespace control', 1247 '4. Comment controls', 1248 '5. Linebreak controls', 1249 '6. Controlling list formatting', 1250 '7. Retaining or ignoring existing line breaks', 1251 '8. Blank line control', 1252 '9. Other controls', 1253 '10. HTML options', 1254 '11. pod2html options', 1255 '12. Controlling HTML properties', 1256 '13. Debugging', 1257 ); 1258 1259 # These options are parsed directly by perltidy: 1260 # help h 1261 # version v 1262 # However, they are included in the option set so that they will 1263 # be seen in the options dump. 1264 1265 # These long option names have no abbreviations or are treated specially 1266 @option_string = qw( 1267 html! 1268 noprofile 1269 no-profile 1270 npro 1271 recombine! 1272 valign! 1273 notidy 1274 ); 1275 1276 my $category = 13; # Debugging 1277 foreach (@option_string) { 1278 my $opt = $_; # must avoid changing the actual flag 1279 $opt =~ s/!$//; 1280 $option_category{$opt} = $category_name[$category]; 1281 } 1282 1283 $category = 11; # HTML 1284 $option_category{html} = $category_name[$category]; 1285 1286 # routine to install and check options 1287 my $add_option = sub { 1288 my ( $long_name, $short_name, $flag ) = @_; 1289 push @option_string, $long_name . $flag; 1290 $option_category{$long_name} = $category_name[$category]; 1291 if ($short_name) { 1292 if ( $expansion{$short_name} ) { 1293 my $existing_name = $expansion{$short_name}[0]; 1294 die 1295"redefining abbreviation $short_name for $long_name; already used for $existing_name\n"; 1296 } 1297 $expansion{$short_name} = [$long_name]; 1298 if ( $flag eq '!' ) { 1299 my $nshort_name = 'n' . $short_name; 1300 my $nolong_name = 'no' . $long_name; 1301 if ( $expansion{$nshort_name} ) { 1302 my $existing_name = $expansion{$nshort_name}[0]; 1303 die 1304"attempting to redefine abbreviation $nshort_name for $nolong_name; already used for $existing_name\n"; 1305 } 1306 $expansion{$nshort_name} = [$nolong_name]; 1307 } 1308 } 1309 }; 1310 1311 # Install long option names which have a simple abbreviation. 1312 # Options with code '!' get standard negation ('no' for long names, 1313 # 'n' for abbreviations). Categories follow the manual. 1314 1315 ########################### 1316 $category = 0; # I/O_Control 1317 ########################### 1318 $add_option->( 'backup-and-modify-in-place', 'b', '!' ); 1319 $add_option->( 'backup-file-extension', 'bext', '=s' ); 1320 $add_option->( 'force-read-binary', 'f', '!' ); 1321 $add_option->( 'format', 'fmt', '=s' ); 1322 $add_option->( 'iterations', 'it', '=i' ); 1323 $add_option->( 'logfile', 'log', '!' ); 1324 $add_option->( 'logfile-gap', 'g', ':i' ); 1325 $add_option->( 'outfile', 'o', '=s' ); 1326 $add_option->( 'output-file-extension', 'oext', '=s' ); 1327 $add_option->( 'output-path', 'opath', '=s' ); 1328 $add_option->( 'profile', 'pro', '=s' ); 1329 $add_option->( 'quiet', 'q', '!' ); 1330 $add_option->( 'standard-error-output', 'se', '!' ); 1331 $add_option->( 'standard-output', 'st', '!' ); 1332 $add_option->( 'warning-output', 'w', '!' ); 1333 1334 # options which are both toggle switches and values moved here 1335 # to hide from tidyview (which does not show category 0 flags): 1336 # -ole moved here from category 1 1337 # -sil moved here from category 2 1338 $add_option->( 'output-line-ending', 'ole', '=s' ); 1339 $add_option->( 'starting-indentation-level', 'sil', '=i' ); 1340 1341 ######################################## 1342 $category = 1; # Basic formatting options 1343 ######################################## 1344 $add_option->( 'check-syntax', 'syn', '!' ); 1345 $add_option->( 'entab-leading-whitespace', 'et', '=i' ); 1346 $add_option->( 'indent-columns', 'i', '=i' ); 1347 $add_option->( 'maximum-line-length', 'l', '=i' ); 1348 $add_option->( 'perl-syntax-check-flags', 'pscf', '=s' ); 1349 $add_option->( 'preserve-line-endings', 'ple', '!' ); 1350 $add_option->( 'tabs', 't', '!' ); 1351 1352 ######################################## 1353 $category = 2; # Code indentation control 1354 ######################################## 1355 $add_option->( 'continuation-indentation', 'ci', '=i' ); 1356 $add_option->( 'line-up-parentheses', 'lp', '!' ); 1357 $add_option->( 'outdent-keyword-list', 'okwl', '=s' ); 1358 $add_option->( 'outdent-keywords', 'okw', '!' ); 1359 $add_option->( 'outdent-labels', 'ola', '!' ); 1360 $add_option->( 'outdent-long-quotes', 'olq', '!' ); 1361 $add_option->( 'indent-closing-brace', 'icb', '!' ); 1362 $add_option->( 'closing-token-indentation', 'cti', '=i' ); 1363 $add_option->( 'closing-paren-indentation', 'cpi', '=i' ); 1364 $add_option->( 'closing-brace-indentation', 'cbi', '=i' ); 1365 $add_option->( 'closing-square-bracket-indentation', 'csbi', '=i' ); 1366 $add_option->( 'brace-left-and-indent', 'bli', '!' ); 1367 $add_option->( 'brace-left-and-indent-list', 'blil', '=s' ); 1368 1369 ######################################## 1370 $category = 3; # Whitespace control 1371 ######################################## 1372 $add_option->( 'add-semicolons', 'asc', '!' ); 1373 $add_option->( 'add-whitespace', 'aws', '!' ); 1374 $add_option->( 'block-brace-tightness', 'bbt', '=i' ); 1375 $add_option->( 'brace-tightness', 'bt', '=i' ); 1376 $add_option->( 'delete-old-whitespace', 'dws', '!' ); 1377 $add_option->( 'delete-semicolons', 'dsm', '!' ); 1378 $add_option->( 'nospace-after-keyword', 'nsak', '=s' ); 1379 $add_option->( 'nowant-left-space', 'nwls', '=s' ); 1380 $add_option->( 'nowant-right-space', 'nwrs', '=s' ); 1381 $add_option->( 'paren-tightness', 'pt', '=i' ); 1382 $add_option->( 'space-after-keyword', 'sak', '=s' ); 1383 $add_option->( 'space-for-semicolon', 'sfs', '!' ); 1384 $add_option->( 'space-function-paren', 'sfp', '!' ); 1385 $add_option->( 'space-keyword-paren', 'skp', '!' ); 1386 $add_option->( 'space-terminal-semicolon', 'sts', '!' ); 1387 $add_option->( 'square-bracket-tightness', 'sbt', '=i' ); 1388 $add_option->( 'square-bracket-vertical-tightness', 'sbvt', '=i' ); 1389 $add_option->( 'square-bracket-vertical-tightness-closing', 'sbvtc', '=i' ); 1390 $add_option->( 'trim-qw', 'tqw', '!' ); 1391 $add_option->( 'want-left-space', 'wls', '=s' ); 1392 $add_option->( 'want-right-space', 'wrs', '=s' ); 1393 1394 ######################################## 1395 $category = 4; # Comment controls 1396 ######################################## 1397 $add_option->( 'closing-side-comment-else-flag', 'csce', '=i' ); 1398 $add_option->( 'closing-side-comment-interval', 'csci', '=i' ); 1399 $add_option->( 'closing-side-comment-list', 'cscl', '=s' ); 1400 $add_option->( 'closing-side-comment-maximum-text', 'csct', '=i' ); 1401 $add_option->( 'closing-side-comment-prefix', 'cscp', '=s' ); 1402 $add_option->( 'closing-side-comment-warnings', 'cscw', '!' ); 1403 $add_option->( 'closing-side-comments', 'csc', '!' ); 1404 $add_option->( 'closing-side-comments-balanced', 'cscb', '!' ); 1405 $add_option->( 'format-skipping', 'fs', '!' ); 1406 $add_option->( 'format-skipping-begin', 'fsb', '=s' ); 1407 $add_option->( 'format-skipping-end', 'fse', '=s' ); 1408 $add_option->( 'hanging-side-comments', 'hsc', '!' ); 1409 $add_option->( 'indent-block-comments', 'ibc', '!' ); 1410 $add_option->( 'indent-spaced-block-comments', 'isbc', '!' ); 1411 $add_option->( 'fixed-position-side-comment', 'fpsc', '=i' ); 1412 $add_option->( 'minimum-space-to-comment', 'msc', '=i' ); 1413 $add_option->( 'outdent-long-comments', 'olc', '!' ); 1414 $add_option->( 'outdent-static-block-comments', 'osbc', '!' ); 1415 $add_option->( 'static-block-comment-prefix', 'sbcp', '=s' ); 1416 $add_option->( 'static-block-comments', 'sbc', '!' ); 1417 $add_option->( 'static-side-comment-prefix', 'sscp', '=s' ); 1418 $add_option->( 'static-side-comments', 'ssc', '!' ); 1419 1420 ######################################## 1421 $category = 5; # Linebreak controls 1422 ######################################## 1423 $add_option->( 'add-newlines', 'anl', '!' ); 1424 $add_option->( 'block-brace-vertical-tightness', 'bbvt', '=i' ); 1425 $add_option->( 'block-brace-vertical-tightness-list', 'bbvtl', '=s' ); 1426 $add_option->( 'brace-vertical-tightness', 'bvt', '=i' ); 1427 $add_option->( 'brace-vertical-tightness-closing', 'bvtc', '=i' ); 1428 $add_option->( 'cuddled-else', 'ce', '!' ); 1429 $add_option->( 'delete-old-newlines', 'dnl', '!' ); 1430 $add_option->( 'opening-brace-always-on-right', 'bar', '!' ); 1431 $add_option->( 'opening-brace-on-new-line', 'bl', '!' ); 1432 $add_option->( 'opening-hash-brace-right', 'ohbr', '!' ); 1433 $add_option->( 'opening-paren-right', 'opr', '!' ); 1434 $add_option->( 'opening-square-bracket-right', 'osbr', '!' ); 1435 $add_option->( 'opening-anonymous-sub-brace-on-new-line', 'asbl', '!' ); 1436 $add_option->( 'opening-sub-brace-on-new-line', 'sbl', '!' ); 1437 $add_option->( 'paren-vertical-tightness', 'pvt', '=i' ); 1438 $add_option->( 'paren-vertical-tightness-closing', 'pvtc', '=i' ); 1439 $add_option->( 'stack-closing-hash-brace', 'schb', '!' ); 1440 $add_option->( 'stack-closing-paren', 'scp', '!' ); 1441 $add_option->( 'stack-closing-square-bracket', 'scsb', '!' ); 1442 $add_option->( 'stack-opening-hash-brace', 'sohb', '!' ); 1443 $add_option->( 'stack-opening-paren', 'sop', '!' ); 1444 $add_option->( 'stack-opening-square-bracket', 'sosb', '!' ); 1445 $add_option->( 'vertical-tightness', 'vt', '=i' ); 1446 $add_option->( 'vertical-tightness-closing', 'vtc', '=i' ); 1447 $add_option->( 'want-break-after', 'wba', '=s' ); 1448 $add_option->( 'want-break-before', 'wbb', '=s' ); 1449 $add_option->( 'break-after-all-operators', 'baao', '!' ); 1450 $add_option->( 'break-before-all-operators', 'bbao', '!' ); 1451 $add_option->( 'keep-interior-semicolons', 'kis', '!' ); 1452 1453 ######################################## 1454 $category = 6; # Controlling list formatting 1455 ######################################## 1456 $add_option->( 'break-at-old-comma-breakpoints', 'boc', '!' ); 1457 $add_option->( 'comma-arrow-breakpoints', 'cab', '=i' ); 1458 $add_option->( 'maximum-fields-per-table', 'mft', '=i' ); 1459 1460 ######################################## 1461 $category = 7; # Retaining or ignoring existing line breaks 1462 ######################################## 1463 $add_option->( 'break-at-old-keyword-breakpoints', 'bok', '!' ); 1464 $add_option->( 'break-at-old-logical-breakpoints', 'bol', '!' ); 1465 $add_option->( 'break-at-old-ternary-breakpoints', 'bot', '!' ); 1466 $add_option->( 'ignore-old-breakpoints', 'iob', '!' ); 1467 1468 ######################################## 1469 $category = 8; # Blank line control 1470 ######################################## 1471 $add_option->( 'blanks-before-blocks', 'bbb', '!' ); 1472 $add_option->( 'blanks-before-comments', 'bbc', '!' ); 1473 $add_option->( 'blanks-before-subs', 'bbs', '!' ); 1474 $add_option->( 'long-block-line-count', 'lbl', '=i' ); 1475 $add_option->( 'maximum-consecutive-blank-lines', 'mbl', '=i' ); 1476 $add_option->( 'keep-old-blank-lines', 'kbl', '=i' ); 1477 1478 ######################################## 1479 $category = 9; # Other controls 1480 ######################################## 1481 $add_option->( 'delete-block-comments', 'dbc', '!' ); 1482 $add_option->( 'delete-closing-side-comments', 'dcsc', '!' ); 1483 $add_option->( 'delete-pod', 'dp', '!' ); 1484 $add_option->( 'delete-side-comments', 'dsc', '!' ); 1485 $add_option->( 'tee-block-comments', 'tbc', '!' ); 1486 $add_option->( 'tee-pod', 'tp', '!' ); 1487 $add_option->( 'tee-side-comments', 'tsc', '!' ); 1488 $add_option->( 'look-for-autoloader', 'lal', '!' ); 1489 $add_option->( 'look-for-hash-bang', 'x', '!' ); 1490 $add_option->( 'look-for-selfloader', 'lsl', '!' ); 1491 $add_option->( 'pass-version-line', 'pvl', '!' ); 1492 1493 ######################################## 1494 $category = 13; # Debugging 1495 ######################################## 1496 $add_option->( 'DEBUG', 'D', '!' ); 1497 $add_option->( 'DIAGNOSTICS', 'I', '!' ); 1498 $add_option->( 'check-multiline-quotes', 'chk', '!' ); 1499 $add_option->( 'dump-defaults', 'ddf', '!' ); 1500 $add_option->( 'dump-long-names', 'dln', '!' ); 1501 $add_option->( 'dump-options', 'dop', '!' ); 1502 $add_option->( 'dump-profile', 'dpro', '!' ); 1503 $add_option->( 'dump-short-names', 'dsn', '!' ); 1504 $add_option->( 'dump-token-types', 'dtt', '!' ); 1505 $add_option->( 'dump-want-left-space', 'dwls', '!' ); 1506 $add_option->( 'dump-want-right-space', 'dwrs', '!' ); 1507 $add_option->( 'fuzzy-line-length', 'fll', '!' ); 1508 $add_option->( 'help', 'h', '' ); 1509 $add_option->( 'short-concatenation-item-length', 'scl', '=i' ); 1510 $add_option->( 'show-options', 'opt', '!' ); 1511 $add_option->( 'version', 'v', '' ); 1512 1513 #--------------------------------------------------------------------- 1514 1515 # The Perl::Tidy::HtmlWriter will add its own options to the string 1516 Perl::Tidy::HtmlWriter->make_getopt_long_names( \@option_string ); 1517 1518 ######################################## 1519 # Set categories 10, 11, 12 1520 ######################################## 1521 # Based on their known order 1522 $category = 12; # HTML properties 1523 foreach my $opt (@option_string) { 1524 my $long_name = $opt; 1525 $long_name =~ s/(!|=.*|:.*)$//; 1526 unless ( defined( $option_category{$long_name} ) ) { 1527 if ( $long_name =~ /^html-linked/ ) { 1528 $category = 10; # HTML options 1529 } 1530 elsif ( $long_name =~ /^pod2html/ ) { 1531 $category = 11; # Pod2html 1532 } 1533 $option_category{$long_name} = $category_name[$category]; 1534 } 1535 } 1536 1537 #--------------------------------------------------------------- 1538 # Assign valid ranges to certain options 1539 #--------------------------------------------------------------- 1540 # In the future, these may be used to make preliminary checks 1541 # hash keys are long names 1542 # If key or value is undefined: 1543 # strings may have any value 1544 # integer ranges are >=0 1545 # If value is defined: 1546 # value is [qw(any valid words)] for strings 1547 # value is [min, max] for integers 1548 # if min is undefined, there is no lower limit 1549 # if max is undefined, there is no upper limit 1550 # Parameters not listed here have defaults 1551 %option_range = ( 1552 'format' => [ 'tidy', 'html', 'user' ], 1553 'output-line-ending' => [ 'dos', 'win', 'mac', 'unix' ], 1554 1555 'block-brace-tightness' => [ 0, 2 ], 1556 'brace-tightness' => [ 0, 2 ], 1557 'paren-tightness' => [ 0, 2 ], 1558 'square-bracket-tightness' => [ 0, 2 ], 1559 1560 'block-brace-vertical-tightness' => [ 0, 2 ], 1561 'brace-vertical-tightness' => [ 0, 2 ], 1562 'brace-vertical-tightness-closing' => [ 0, 2 ], 1563 'paren-vertical-tightness' => [ 0, 2 ], 1564 'paren-vertical-tightness-closing' => [ 0, 2 ], 1565 'square-bracket-vertical-tightness' => [ 0, 2 ], 1566 'square-bracket-vertical-tightness-closing' => [ 0, 2 ], 1567 'vertical-tightness' => [ 0, 2 ], 1568 'vertical-tightness-closing' => [ 0, 2 ], 1569 1570 'closing-brace-indentation' => [ 0, 3 ], 1571 'closing-paren-indentation' => [ 0, 3 ], 1572 'closing-square-bracket-indentation' => [ 0, 3 ], 1573 'closing-token-indentation' => [ 0, 3 ], 1574 1575 'closing-side-comment-else-flag' => [ 0, 2 ], 1576 'comma-arrow-breakpoints' => [ 0, 3 ], 1577 ); 1578 1579 # Note: we could actually allow negative ci if someone really wants it: 1580 # $option_range{'continuation-indentation'} = [ undef, undef ]; 1581 1582 #--------------------------------------------------------------- 1583 # Assign default values to the above options here, except 1584 # for 'outfile' and 'help'. 1585 # These settings should approximate the perlstyle(1) suggestions. 1586 #--------------------------------------------------------------- 1587 my @defaults = qw( 1588 add-newlines 1589 add-semicolons 1590 add-whitespace 1591 blanks-before-blocks 1592 blanks-before-comments 1593 blanks-before-subs 1594 block-brace-tightness=0 1595 block-brace-vertical-tightness=0 1596 brace-tightness=1 1597 brace-vertical-tightness-closing=0 1598 brace-vertical-tightness=0 1599 break-at-old-logical-breakpoints 1600 break-at-old-ternary-breakpoints 1601 break-at-old-keyword-breakpoints 1602 comma-arrow-breakpoints=1 1603 nocheck-syntax 1604 closing-side-comment-interval=6 1605 closing-side-comment-maximum-text=20 1606 closing-side-comment-else-flag=0 1607 closing-side-comments-balanced 1608 closing-paren-indentation=0 1609 closing-brace-indentation=0 1610 closing-square-bracket-indentation=0 1611 continuation-indentation=2 1612 delete-old-newlines 1613 delete-semicolons 1614 fuzzy-line-length 1615 hanging-side-comments 1616 indent-block-comments 1617 indent-columns=4 1618 iterations=1 1619 keep-old-blank-lines=1 1620 long-block-line-count=8 1621 look-for-autoloader 1622 look-for-selfloader 1623 maximum-consecutive-blank-lines=1 1624 maximum-fields-per-table=0 1625 maximum-line-length=80 1626 minimum-space-to-comment=4 1627 nobrace-left-and-indent 1628 nocuddled-else 1629 nodelete-old-whitespace 1630 nohtml 1631 nologfile 1632 noquiet 1633 noshow-options 1634 nostatic-side-comments 1635 notabs 1636 nowarning-output 1637 outdent-labels 1638 outdent-long-quotes 1639 outdent-long-comments 1640 paren-tightness=1 1641 paren-vertical-tightness-closing=0 1642 paren-vertical-tightness=0 1643 pass-version-line 1644 recombine 1645 valign 1646 short-concatenation-item-length=8 1647 space-for-semicolon 1648 square-bracket-tightness=1 1649 square-bracket-vertical-tightness-closing=0 1650 square-bracket-vertical-tightness=0 1651 static-block-comments 1652 trim-qw 1653 format=tidy 1654 backup-file-extension=bak 1655 format-skipping 1656 1657 pod2html 1658 html-table-of-contents 1659 html-entities 1660 ); 1661 1662 push @defaults, "perl-syntax-check-flags=-c -T"; 1663 1664 #--------------------------------------------------------------- 1665 # Define abbreviations which will be expanded into the above primitives. 1666 # These may be defined recursively. 1667 #--------------------------------------------------------------- 1668 %expansion = ( 1669 %expansion, 1670 'freeze-newlines' => [qw(noadd-newlines nodelete-old-newlines)], 1671 'fnl' => [qw(freeze-newlines)], 1672 'freeze-whitespace' => [qw(noadd-whitespace nodelete-old-whitespace)], 1673 'fws' => [qw(freeze-whitespace)], 1674 'freeze-blank-lines' => 1675 [qw(maximum-consecutive-blank-lines=0 keep-old-blank-lines=2)], 1676 'fbl' => [qw(freeze-blank-lines)], 1677 'indent-only' => [qw(freeze-newlines freeze-whitespace)], 1678 'outdent-long-lines' => [qw(outdent-long-quotes outdent-long-comments)], 1679 'nooutdent-long-lines' => 1680 [qw(nooutdent-long-quotes nooutdent-long-comments)], 1681 'noll' => [qw(nooutdent-long-lines)], 1682 'io' => [qw(indent-only)], 1683 'delete-all-comments' => 1684 [qw(delete-block-comments delete-side-comments delete-pod)], 1685 'nodelete-all-comments' => 1686 [qw(nodelete-block-comments nodelete-side-comments nodelete-pod)], 1687 'dac' => [qw(delete-all-comments)], 1688 'ndac' => [qw(nodelete-all-comments)], 1689 'gnu' => [qw(gnu-style)], 1690 'pbp' => [qw(perl-best-practices)], 1691 'tee-all-comments' => 1692 [qw(tee-block-comments tee-side-comments tee-pod)], 1693 'notee-all-comments' => 1694 [qw(notee-block-comments notee-side-comments notee-pod)], 1695 'tac' => [qw(tee-all-comments)], 1696 'ntac' => [qw(notee-all-comments)], 1697 'html' => [qw(format=html)], 1698 'nhtml' => [qw(format=tidy)], 1699 'tidy' => [qw(format=tidy)], 1700 1701 'swallow-optional-blank-lines' => [qw(kbl=0)], 1702 'noswallow-optional-blank-lines' => [qw(kbl=1)], 1703 'sob' => [qw(kbl=0)], 1704 'nsob' => [qw(kbl=1)], 1705 1706 'break-after-comma-arrows' => [qw(cab=0)], 1707 'nobreak-after-comma-arrows' => [qw(cab=1)], 1708 'baa' => [qw(cab=0)], 1709 'nbaa' => [qw(cab=1)], 1710 1711 'break-at-old-trinary-breakpoints' => [qw(bot)], 1712 1713 'cti=0' => [qw(cpi=0 cbi=0 csbi=0)], 1714 'cti=1' => [qw(cpi=1 cbi=1 csbi=1)], 1715 'cti=2' => [qw(cpi=2 cbi=2 csbi=2)], 1716 'icp' => [qw(cpi=2 cbi=2 csbi=2)], 1717 'nicp' => [qw(cpi=0 cbi=0 csbi=0)], 1718 1719 'closing-token-indentation=0' => [qw(cpi=0 cbi=0 csbi=0)], 1720 'closing-token-indentation=1' => [qw(cpi=1 cbi=1 csbi=1)], 1721 'closing-token-indentation=2' => [qw(cpi=2 cbi=2 csbi=2)], 1722 'indent-closing-paren' => [qw(cpi=2 cbi=2 csbi=2)], 1723 'noindent-closing-paren' => [qw(cpi=0 cbi=0 csbi=0)], 1724 1725 'vt=0' => [qw(pvt=0 bvt=0 sbvt=0)], 1726 'vt=1' => [qw(pvt=1 bvt=1 sbvt=1)], 1727 'vt=2' => [qw(pvt=2 bvt=2 sbvt=2)], 1728 1729 'vertical-tightness=0' => [qw(pvt=0 bvt=0 sbvt=0)], 1730 'vertical-tightness=1' => [qw(pvt=1 bvt=1 sbvt=1)], 1731 'vertical-tightness=2' => [qw(pvt=2 bvt=2 sbvt=2)], 1732 1733 'vtc=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)], 1734 'vtc=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)], 1735 'vtc=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)], 1736 1737 'vertical-tightness-closing=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)], 1738 'vertical-tightness-closing=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)], 1739 'vertical-tightness-closing=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)], 1740 1741 'otr' => [qw(opr ohbr osbr)], 1742 'opening-token-right' => [qw(opr ohbr osbr)], 1743 'notr' => [qw(nopr nohbr nosbr)], 1744 'noopening-token-right' => [qw(nopr nohbr nosbr)], 1745 1746 'sot' => [qw(sop sohb sosb)], 1747 'nsot' => [qw(nsop nsohb nsosb)], 1748 'stack-opening-tokens' => [qw(sop sohb sosb)], 1749 'nostack-opening-tokens' => [qw(nsop nsohb nsosb)], 1750 1751 'sct' => [qw(scp schb scsb)], 1752 'stack-closing-tokens' => => [qw(scp schb scsb)], 1753 'nsct' => [qw(nscp nschb nscsb)], 1754 'nostack-opening-tokens' => [qw(nscp nschb nscsb)], 1755 1756 # 'mangle' originally deleted pod and comments, but to keep it 1757 # reversible, it no longer does. But if you really want to 1758 # delete them, just use: 1759 # -mangle -dac 1760 1761 # An interesting use for 'mangle' is to do this: 1762 # perltidy -mangle myfile.pl -st | perltidy -o myfile.pl.new 1763 # which will form as many one-line blocks as possible 1764 1765 'mangle' => [ 1766 qw( 1767 check-syntax 1768 keep-old-blank-lines=0 1769 delete-old-newlines 1770 delete-old-whitespace 1771 delete-semicolons 1772 indent-columns=0 1773 maximum-consecutive-blank-lines=0 1774 maximum-line-length=100000 1775 noadd-newlines 1776 noadd-semicolons 1777 noadd-whitespace 1778 noblanks-before-blocks 1779 noblanks-before-subs 1780 notabs 1781 ) 1782 ], 1783 1784 # 'extrude' originally deleted pod and comments, but to keep it 1785 # reversible, it no longer does. But if you really want to 1786 # delete them, just use 1787 # extrude -dac 1788 # 1789 # An interesting use for 'extrude' is to do this: 1790 # perltidy -extrude myfile.pl -st | perltidy -o myfile.pl.new 1791 # which will break up all one-line blocks. 1792 1793 'extrude' => [ 1794 qw( 1795 check-syntax 1796 ci=0 1797 delete-old-newlines 1798 delete-old-whitespace 1799 delete-semicolons 1800 indent-columns=0 1801 maximum-consecutive-blank-lines=0 1802 maximum-line-length=1 1803 noadd-semicolons 1804 noadd-whitespace 1805 noblanks-before-blocks 1806 noblanks-before-subs 1807 nofuzzy-line-length 1808 notabs 1809 norecombine 1810 ) 1811 ], 1812 1813 # this style tries to follow the GNU Coding Standards (which do 1814 # not really apply to perl but which are followed by some perl 1815 # programmers). 1816 'gnu-style' => [ 1817 qw( 1818 lp bl noll pt=2 bt=2 sbt=2 cpi=1 csbi=1 cbi=1 1819 ) 1820 ], 1821 1822 # Style suggested in Damian Conway's Perl Best Practices 1823 'perl-best-practices' => [ 1824 qw(l=78 i=4 ci=4 st se vt=2 cti=0 pt=1 bt=1 sbt=1 bbt=1 nsfs nolq), 1825q(wbb=% + - * / x != == >= <= =~ !~ < > | & = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=) 1826 ], 1827 1828 # Additional styles can be added here 1829 ); 1830 1831 Perl::Tidy::HtmlWriter->make_abbreviated_names( \%expansion ); 1832 1833 # Uncomment next line to dump all expansions for debugging: 1834 # dump_short_names(\%expansion); 1835 return ( 1836 \@option_string, \@defaults, \%expansion, 1837 \%option_category, \%option_range 1838 ); 1839 1840} # end of generate_options 1841 1842sub process_command_line { 1843 1844 my ( 1845 $perltidyrc_stream, $is_Windows, $Windows_type, 1846 $rpending_complaint, $dump_options_type 1847 ) = @_; 1848 1849 use Getopt::Long; 1850 1851 my ( 1852 $roption_string, $rdefaults, $rexpansion, 1853 $roption_category, $roption_range 1854 ) = generate_options(); 1855 1856 #--------------------------------------------------------------- 1857 # set the defaults by passing the above list through GetOptions 1858 #--------------------------------------------------------------- 1859 my %Opts = (); 1860 { 1861 local @ARGV; 1862 my $i; 1863 1864 # do not load the defaults if we are just dumping perltidyrc 1865 unless ( $dump_options_type eq 'perltidyrc' ) { 1866 for $i (@$rdefaults) { push @ARGV, "--" . $i } 1867 } 1868 1869 # Patch to save users Getopt::Long configuration 1870 # and set to Getopt::Long defaults. Use eval to avoid 1871 # breaking old versions of Perl without these routines. 1872 my $glc; 1873 eval { $glc = Getopt::Long::Configure() }; 1874 unless ($@) { 1875 eval { Getopt::Long::ConfigDefaults() }; 1876 } 1877 else { $glc = undef } 1878 1879 if ( !GetOptions( \%Opts, @$roption_string ) ) { 1880 die "Programming Bug: error in setting default options"; 1881 } 1882 1883 # Patch to put the previous Getopt::Long configuration back 1884 eval { Getopt::Long::Configure($glc) } if defined $glc; 1885 } 1886 1887 my $word; 1888 my @raw_options = (); 1889 my $config_file = ""; 1890 my $saw_ignore_profile = 0; 1891 my $saw_extrude = 0; 1892 my $saw_dump_profile = 0; 1893 my $i; 1894 1895 #--------------------------------------------------------------- 1896 # Take a first look at the command-line parameters. Do as many 1897 # immediate dumps as possible, which can avoid confusion if the 1898 # perltidyrc file has an error. 1899 #--------------------------------------------------------------- 1900 foreach $i (@ARGV) { 1901 1902 $i =~ s/^--/-/; 1903 if ( $i =~ /^-(npro|noprofile|no-profile)$/ ) { 1904 $saw_ignore_profile = 1; 1905 } 1906 1907 # note: this must come before -pro and -profile, below: 1908 elsif ( $i =~ /^-(dump-profile|dpro)$/ ) { 1909 $saw_dump_profile = 1; 1910 } 1911 elsif ( $i =~ /^-(pro|profile)=(.+)/ ) { 1912 if ($config_file) { 1913 warn 1914"Only one -pro=filename allowed, using '$2' instead of '$config_file'\n"; 1915 } 1916 $config_file = $2; 1917 1918 # resolve <dir>/.../<file>, meaning look upwards from directory 1919 if ( defined($config_file) ) { 1920 if ( my ( $start_dir, $search_file ) = 1921 ( $config_file =~ m{^(.*)\.\.\./(.*)$} ) ) 1922 { 1923 $start_dir = '.' if !$start_dir; 1924 $start_dir = Cwd::realpath($start_dir); 1925 if ( my $found_file = 1926 find_file_upwards( $start_dir, $search_file ) ) 1927 { 1928 $config_file = $found_file; 1929 } 1930 } 1931 } 1932 unless ( -e $config_file ) { 1933 warn "cannot find file given with -pro=$config_file: $!\n"; 1934 $config_file = ""; 1935 } 1936 } 1937 elsif ( $i =~ /^-(pro|profile)=?$/ ) { 1938 die "usage: -pro=filename or --profile=filename, no spaces\n"; 1939 } 1940 elsif ( $i =~ /^-extrude$/ ) { 1941 $saw_extrude = 1; 1942 } 1943 elsif ( $i =~ /^-(help|h|HELP|H)$/ ) { 1944 usage(); 1945 exit 1; 1946 } 1947 elsif ( $i =~ /^-(version|v)$/ ) { 1948 show_version(); 1949 exit 1; 1950 } 1951 elsif ( $i =~ /^-(dump-defaults|ddf)$/ ) { 1952 dump_defaults(@$rdefaults); 1953 exit 1; 1954 } 1955 elsif ( $i =~ /^-(dump-long-names|dln)$/ ) { 1956 dump_long_names(@$roption_string); 1957 exit 1; 1958 } 1959 elsif ( $i =~ /^-(dump-short-names|dsn)$/ ) { 1960 dump_short_names($rexpansion); 1961 exit 1; 1962 } 1963 elsif ( $i =~ /^-(dump-token-types|dtt)$/ ) { 1964 Perl::Tidy::Tokenizer->dump_token_types(*STDOUT); 1965 exit 1; 1966 } 1967 } 1968 1969 if ( $saw_dump_profile && $saw_ignore_profile ) { 1970 warn "No profile to dump because of -npro\n"; 1971 exit 1; 1972 } 1973 1974 #--------------------------------------------------------------- 1975 # read any .perltidyrc configuration file 1976 #--------------------------------------------------------------- 1977 unless ($saw_ignore_profile) { 1978 1979 # resolve possible conflict between $perltidyrc_stream passed 1980 # as call parameter to perltidy and -pro=filename on command 1981 # line. 1982 if ($perltidyrc_stream) { 1983 if ($config_file) { 1984 warn <<EOM; 1985 Conflict: a perltidyrc configuration file was specified both as this 1986 perltidy call parameter: $perltidyrc_stream 1987 and with this -profile=$config_file. 1988 Using -profile=$config_file. 1989EOM 1990 } 1991 else { 1992 $config_file = $perltidyrc_stream; 1993 } 1994 } 1995 1996 # look for a config file if we don't have one yet 1997 my $rconfig_file_chatter; 1998 $$rconfig_file_chatter = ""; 1999 $config_file = 2000 find_config_file( $is_Windows, $Windows_type, $rconfig_file_chatter, 2001 $rpending_complaint ) 2002 unless $config_file; 2003 2004 # open any config file 2005 my $fh_config; 2006 if ($config_file) { 2007 ( $fh_config, $config_file ) = 2008 Perl::Tidy::streamhandle( $config_file, 'r' ); 2009 unless ($fh_config) { 2010 $$rconfig_file_chatter .= 2011 "# $config_file exists but cannot be opened\n"; 2012 } 2013 } 2014 2015 if ($saw_dump_profile) { 2016 if ($saw_dump_profile) { 2017 dump_config_file( $fh_config, $config_file, 2018 $rconfig_file_chatter ); 2019 exit 1; 2020 } 2021 } 2022 2023 if ($fh_config) { 2024 2025 my ( $rconfig_list, $death_message ) = 2026 read_config_file( $fh_config, $config_file, $rexpansion ); 2027 die $death_message if ($death_message); 2028 2029 # process any .perltidyrc parameters right now so we can 2030 # localize errors 2031 if (@$rconfig_list) { 2032 local @ARGV = @$rconfig_list; 2033 2034 expand_command_abbreviations( $rexpansion, \@raw_options, 2035 $config_file ); 2036 2037 if ( !GetOptions( \%Opts, @$roption_string ) ) { 2038 die 2039"Error in this config file: $config_file \nUse -npro to ignore this file, -h for help'\n"; 2040 } 2041 2042 # Anything left in this local @ARGV is an error and must be 2043 # invalid bare words from the configuration file. We cannot 2044 # check this earlier because bare words may have been valid 2045 # values for parameters. We had to wait for GetOptions to have 2046 # a look at @ARGV. 2047 if (@ARGV) { 2048 my $count = @ARGV; 2049 my $str = "\'" . pop(@ARGV) . "\'"; 2050 while ( my $param = pop(@ARGV) ) { 2051 if ( length($str) < 70 ) { 2052 $str .= ", '$param'"; 2053 } 2054 else { 2055 $str .= ", ..."; 2056 last; 2057 } 2058 } 2059 die <<EOM; 2060There are $count unrecognized values in the configuration file '$config_file': 2061$str 2062Use leading dashes for parameters. Use -npro to ignore this file. 2063EOM 2064 } 2065 2066 # Undo any options which cause premature exit. They are not 2067 # appropriate for a config file, and it could be hard to 2068 # diagnose the cause of the premature exit. 2069 foreach ( 2070 qw{ 2071 dump-defaults 2072 dump-long-names 2073 dump-options 2074 dump-profile 2075 dump-short-names 2076 dump-token-types 2077 dump-want-left-space 2078 dump-want-right-space 2079 help 2080 stylesheet 2081 version 2082 } 2083 ) 2084 { 2085 2086 if ( defined( $Opts{$_} ) ) { 2087 delete $Opts{$_}; 2088 warn "ignoring --$_ in config file: $config_file\n"; 2089 } 2090 } 2091 } 2092 } 2093 } 2094 2095 #--------------------------------------------------------------- 2096 # now process the command line parameters 2097 #--------------------------------------------------------------- 2098 expand_command_abbreviations( $rexpansion, \@raw_options, $config_file ); 2099 2100 if ( !GetOptions( \%Opts, @$roption_string ) ) { 2101 die "Error on command line; for help try 'perltidy -h'\n"; 2102 } 2103 2104 return ( \%Opts, $config_file, \@raw_options, $saw_extrude, $roption_string, 2105 $rexpansion, $roption_category, $roption_range ); 2106} # end of process_command_line 2107 2108sub check_options { 2109 2110 my ( $rOpts, $is_Windows, $Windows_type, $rpending_complaint ) = @_; 2111 2112 #--------------------------------------------------------------- 2113 # check and handle any interactions among the basic options.. 2114 #--------------------------------------------------------------- 2115 2116 # Since -vt, -vtc, and -cti are abbreviations, but under 2117 # msdos, an unquoted input parameter like vtc=1 will be 2118 # seen as 2 parameters, vtc and 1, so the abbreviations 2119 # won't be seen. Therefore, we will catch them here if 2120 # they get through. 2121 2122 if ( defined $rOpts->{'vertical-tightness'} ) { 2123 my $vt = $rOpts->{'vertical-tightness'}; 2124 $rOpts->{'paren-vertical-tightness'} = $vt; 2125 $rOpts->{'square-bracket-vertical-tightness'} = $vt; 2126 $rOpts->{'brace-vertical-tightness'} = $vt; 2127 } 2128 2129 if ( defined $rOpts->{'vertical-tightness-closing'} ) { 2130 my $vtc = $rOpts->{'vertical-tightness-closing'}; 2131 $rOpts->{'paren-vertical-tightness-closing'} = $vtc; 2132 $rOpts->{'square-bracket-vertical-tightness-closing'} = $vtc; 2133 $rOpts->{'brace-vertical-tightness-closing'} = $vtc; 2134 } 2135 2136 if ( defined $rOpts->{'closing-token-indentation'} ) { 2137 my $cti = $rOpts->{'closing-token-indentation'}; 2138 $rOpts->{'closing-square-bracket-indentation'} = $cti; 2139 $rOpts->{'closing-brace-indentation'} = $cti; 2140 $rOpts->{'closing-paren-indentation'} = $cti; 2141 } 2142 2143 # In quiet mode, there is no log file and hence no way to report 2144 # results of syntax check, so don't do it. 2145 if ( $rOpts->{'quiet'} ) { 2146 $rOpts->{'check-syntax'} = 0; 2147 } 2148 2149 # can't check syntax if no output 2150 if ( $rOpts->{'format'} ne 'tidy' ) { 2151 $rOpts->{'check-syntax'} = 0; 2152 } 2153 2154 # Never let Windows 9x/Me systems run syntax check -- this will prevent a 2155 # wide variety of nasty problems on these systems, because they cannot 2156 # reliably run backticks. Don't even think about changing this! 2157 if ( $rOpts->{'check-syntax'} 2158 && $is_Windows 2159 && ( !$Windows_type || $Windows_type =~ /^(9|Me)/ ) ) 2160 { 2161 $rOpts->{'check-syntax'} = 0; 2162 } 2163 2164 # It's really a bad idea to check syntax as root unless you wrote 2165 # the script yourself. FIXME: not sure if this works with VMS 2166 unless ($is_Windows) { 2167 2168 if ( $< == 0 && $rOpts->{'check-syntax'} ) { 2169 $rOpts->{'check-syntax'} = 0; 2170 $$rpending_complaint .= 2171"Syntax check deactivated for safety; you shouldn't run this as root\n"; 2172 } 2173 } 2174 2175 # check iteration count and quietly fix if necessary: 2176 # - iterations option only applies to code beautification mode 2177 # - it shouldn't be nessary to use more than about 2 iterations 2178 if ( $rOpts->{'format'} ne 'tidy' ) { 2179 $rOpts->{'iterations'} = 1; 2180 } 2181 elsif ( defined( $rOpts->{'iterations'} ) ) { 2182 if ( $rOpts->{'iterations'} <= 0 ) { $rOpts->{'iterations'} = 1 } 2183 elsif ( $rOpts->{'iterations'} > 5 ) { $rOpts->{'iterations'} = 5 } 2184 } 2185 else { 2186 $rOpts->{'iterations'} = 1; 2187 } 2188 2189 # see if user set a non-negative logfile-gap 2190 if ( defined( $rOpts->{'logfile-gap'} ) && $rOpts->{'logfile-gap'} >= 0 ) { 2191 2192 # a zero gap will be taken as a 1 2193 if ( $rOpts->{'logfile-gap'} == 0 ) { 2194 $rOpts->{'logfile-gap'} = 1; 2195 } 2196 2197 # setting a non-negative logfile gap causes logfile to be saved 2198 $rOpts->{'logfile'} = 1; 2199 } 2200 2201 # not setting logfile gap, or setting it negative, causes default of 50 2202 else { 2203 $rOpts->{'logfile-gap'} = 50; 2204 } 2205 2206 # set short-cut flag when only indentation is to be done. 2207 # Note that the user may or may not have already set the 2208 # indent-only flag. 2209 if ( !$rOpts->{'add-whitespace'} 2210 && !$rOpts->{'delete-old-whitespace'} 2211 && !$rOpts->{'add-newlines'} 2212 && !$rOpts->{'delete-old-newlines'} ) 2213 { 2214 $rOpts->{'indent-only'} = 1; 2215 } 2216 2217 # -isbc implies -ibc 2218 if ( $rOpts->{'indent-spaced-block-comments'} ) { 2219 $rOpts->{'indent-block-comments'} = 1; 2220 } 2221 2222 # -bli flag implies -bl 2223 if ( $rOpts->{'brace-left-and-indent'} ) { 2224 $rOpts->{'opening-brace-on-new-line'} = 1; 2225 } 2226 2227 if ( $rOpts->{'opening-brace-always-on-right'} 2228 && $rOpts->{'opening-brace-on-new-line'} ) 2229 { 2230 warn <<EOM; 2231 Conflict: you specified both 'opening-brace-always-on-right' (-bar) and 2232 'opening-brace-on-new-line' (-bl). Ignoring -bl. 2233EOM 2234 $rOpts->{'opening-brace-on-new-line'} = 0; 2235 } 2236 2237 # it simplifies things if -bl is 0 rather than undefined 2238 if ( !defined( $rOpts->{'opening-brace-on-new-line'} ) ) { 2239 $rOpts->{'opening-brace-on-new-line'} = 0; 2240 } 2241 2242 # -sbl defaults to -bl if not defined 2243 if ( !defined( $rOpts->{'opening-sub-brace-on-new-line'} ) ) { 2244 $rOpts->{'opening-sub-brace-on-new-line'} = 2245 $rOpts->{'opening-brace-on-new-line'}; 2246 } 2247 2248 if ( $rOpts->{'entab-leading-whitespace'} ) { 2249 if ( $rOpts->{'entab-leading-whitespace'} < 0 ) { 2250 warn "-et=n must use a positive integer; ignoring -et\n"; 2251 $rOpts->{'entab-leading-whitespace'} = undef; 2252 } 2253 2254 # entab leading whitespace has priority over the older 'tabs' option 2255 if ( $rOpts->{'tabs'} ) { $rOpts->{'tabs'} = 0; } 2256 } 2257} 2258 2259sub find_file_upwards { 2260 my ( $search_dir, $search_file ) = @_; 2261 2262 $search_dir =~ s{/+$}{}; 2263 $search_file =~ s{^/+}{}; 2264 2265 while (1) { 2266 my $try_path = "$search_dir/$search_file"; 2267 if ( -f $try_path ) { 2268 return $try_path; 2269 } 2270 elsif ( $search_dir eq '/' ) { 2271 return undef; 2272 } 2273 else { 2274 $search_dir = dirname($search_dir); 2275 } 2276 } 2277} 2278 2279sub expand_command_abbreviations { 2280 2281 # go through @ARGV and expand any abbreviations 2282 2283 my ( $rexpansion, $rraw_options, $config_file ) = @_; 2284 my ($word); 2285 2286 # set a pass limit to prevent an infinite loop; 2287 # 10 should be plenty, but it may be increased to allow deeply 2288 # nested expansions. 2289 my $max_passes = 10; 2290 my @new_argv = (); 2291 2292 # keep looping until all expansions have been converted into actual 2293 # dash parameters.. 2294 for ( my $pass_count = 0 ; $pass_count <= $max_passes ; $pass_count++ ) { 2295 my @new_argv = (); 2296 my $abbrev_count = 0; 2297 2298 # loop over each item in @ARGV.. 2299 foreach $word (@ARGV) { 2300 2301 # convert any leading 'no-' to just 'no' 2302 if ( $word =~ /^(-[-]?no)-(.*)/ ) { $word = $1 . $2 } 2303 2304 # if it is a dash flag (instead of a file name).. 2305 if ( $word =~ /^-[-]?([\w\-]+)(.*)/ ) { 2306 2307 my $abr = $1; 2308 my $flags = $2; 2309 2310 # save the raw input for debug output in case of circular refs 2311 if ( $pass_count == 0 ) { 2312 push( @$rraw_options, $word ); 2313 } 2314 2315 # recombine abbreviation and flag, if necessary, 2316 # to allow abbreviations with arguments such as '-vt=1' 2317 if ( $rexpansion->{ $abr . $flags } ) { 2318 $abr = $abr . $flags; 2319 $flags = ""; 2320 } 2321 2322 # if we see this dash item in the expansion hash.. 2323 if ( $rexpansion->{$abr} ) { 2324 $abbrev_count++; 2325 2326 # stuff all of the words that it expands to into the 2327 # new arg list for the next pass 2328 foreach my $abbrev ( @{ $rexpansion->{$abr} } ) { 2329 next unless $abbrev; # for safety; shouldn't happen 2330 push( @new_argv, '--' . $abbrev . $flags ); 2331 } 2332 } 2333 2334 # not in expansion hash, must be actual long name 2335 else { 2336 push( @new_argv, $word ); 2337 } 2338 } 2339 2340 # not a dash item, so just save it for the next pass 2341 else { 2342 push( @new_argv, $word ); 2343 } 2344 } # end of this pass 2345 2346 # update parameter list @ARGV to the new one 2347 @ARGV = @new_argv; 2348 last unless ( $abbrev_count > 0 ); 2349 2350 # make sure we are not in an infinite loop 2351 if ( $pass_count == $max_passes ) { 2352 print STDERR 2353"I'm tired. We seem to be in an infinite loop trying to expand aliases.\n"; 2354 print STDERR "Here are the raw options\n"; 2355 local $" = ')('; 2356 print STDERR "(@$rraw_options)\n"; 2357 my $num = @new_argv; 2358 2359 if ( $num < 50 ) { 2360 print STDERR "After $max_passes passes here is ARGV\n"; 2361 print STDERR "(@new_argv)\n"; 2362 } 2363 else { 2364 print STDERR "After $max_passes passes ARGV has $num entries\n"; 2365 } 2366 2367 if ($config_file) { 2368 die <<"DIE"; 2369Please check your configuration file $config_file for circular-references. 2370To deactivate it, use -npro. 2371DIE 2372 } 2373 else { 2374 die <<'DIE'; 2375Program bug - circular-references in the %expansion hash, probably due to 2376a recent program change. 2377DIE 2378 } 2379 } # end of check for circular references 2380 } # end of loop over all passes 2381} 2382 2383# Debug routine -- this will dump the expansion hash 2384sub dump_short_names { 2385 my $rexpansion = shift; 2386 print STDOUT <<EOM; 2387List of short names. This list shows how all abbreviations are 2388translated into other abbreviations and, eventually, into long names. 2389New abbreviations may be defined in a .perltidyrc file. 2390For a list of all long names, use perltidy --dump-long-names (-dln). 2391-------------------------------------------------------------------------- 2392EOM 2393 foreach my $abbrev ( sort keys %$rexpansion ) { 2394 my @list = @{ $$rexpansion{$abbrev} }; 2395 print STDOUT "$abbrev --> @list\n"; 2396 } 2397} 2398 2399sub check_vms_filename { 2400 2401 # given a valid filename (the perltidy input file) 2402 # create a modified filename and separator character 2403 # suitable for VMS. 2404 # 2405 # Contributed by Michael Cartmell 2406 # 2407 my ( $base, $path ) = fileparse( $_[0] ); 2408 2409 # remove explicit ; version 2410 $base =~ s/;-?\d*$// 2411 2412 # remove explicit . version ie two dots in filename NB ^ escapes a dot 2413 or $base =~ s/( # begin capture $1 2414 (?:^|[^^])\. # match a dot not preceded by a caret 2415 (?: # followed by nothing 2416 | # or 2417 .*[^^] # anything ending in a non caret 2418 ) 2419 ) # end capture $1 2420 \.-?\d*$ # match . version number 2421 /$1/x; 2422 2423 # normalise filename, if there are no unescaped dots then append one 2424 $base .= '.' unless $base =~ /(?:^|[^^])\./; 2425 2426 # if we don't already have an extension then we just append the extention 2427 my $separator = ( $base =~ /\.$/ ) ? "" : "_"; 2428 return ( $path . $base, $separator ); 2429} 2430 2431sub Win_OS_Type { 2432 2433 # TODO: are these more standard names? 2434 # Win32s Win95 Win98 WinMe WinNT3.51 WinNT4 Win2000 WinXP/.Net Win2003 2435 2436 # Returns a string that determines what MS OS we are on. 2437 # Returns win32s,95,98,Me,NT3.51,NT4,2000,XP/.Net,Win2003 2438 # Returns blank string if not an MS system. 2439 # Original code contributed by: Yves Orton 2440 # We need to know this to decide where to look for config files 2441 2442 my $rpending_complaint = shift; 2443 my $os = ""; 2444 return $os unless $^O =~ /win32|dos/i; # is it a MS box? 2445 2446 # Systems built from Perl source may not have Win32.pm 2447 # But probably have Win32::GetOSVersion() anyway so the 2448 # following line is not 'required': 2449 # return $os unless eval('require Win32'); 2450 2451 # Use the standard API call to determine the version 2452 my ( $undef, $major, $minor, $build, $id ); 2453 eval { ( $undef, $major, $minor, $build, $id ) = Win32::GetOSVersion() }; 2454 2455 # 2456 # NAME ID MAJOR MINOR 2457 # Windows NT 4 2 4 0 2458 # Windows 2000 2 5 0 2459 # Windows XP 2 5 1 2460 # Windows Server 2003 2 5 2 2461 2462 return "win32s" unless $id; # If id==0 then its a win32s box. 2463 $os = { # Magic numbers from MSDN 2464 # documentation of GetOSVersion 2465 1 => { 2466 0 => "95", 2467 10 => "98", 2468 90 => "Me" 2469 }, 2470 2 => { 2471 0 => "2000", # or NT 4, see below 2472 1 => "XP/.Net", 2473 2 => "Win2003", 2474 51 => "NT3.51" 2475 } 2476 }->{$id}->{$minor}; 2477 2478 # If $os is undefined, the above code is out of date. Suggested updates 2479 # are welcome. 2480 unless ( defined $os ) { 2481 $os = ""; 2482 $$rpending_complaint .= <<EOS; 2483Error trying to discover Win_OS_Type: $id:$major:$minor Has no name of record! 2484We won't be able to look for a system-wide config file. 2485EOS 2486 } 2487 2488 # Unfortunately the logic used for the various versions isnt so clever.. 2489 # so we have to handle an outside case. 2490 return ( $os eq "2000" && $major != 5 ) ? "NT4" : $os; 2491} 2492 2493sub is_unix { 2494 return 2495 ( $^O !~ /win32|dos/i ) 2496 && ( $^O ne 'VMS' ) 2497 && ( $^O ne 'OS2' ) 2498 && ( $^O ne 'MacOS' ); 2499} 2500 2501sub look_for_Windows { 2502 2503 # determine Windows sub-type and location of 2504 # system-wide configuration files 2505 my $rpending_complaint = shift; 2506 my $is_Windows = ( $^O =~ /win32|dos/i ); 2507 my $Windows_type = Win_OS_Type($rpending_complaint) if $is_Windows; 2508 return ( $is_Windows, $Windows_type ); 2509} 2510 2511sub find_config_file { 2512 2513 # look for a .perltidyrc configuration file 2514 # For Windows also look for a file named perltidy.ini 2515 my ( $is_Windows, $Windows_type, $rconfig_file_chatter, 2516 $rpending_complaint ) = @_; 2517 2518 $$rconfig_file_chatter .= "# Config file search...system reported as:"; 2519 if ($is_Windows) { 2520 $$rconfig_file_chatter .= "Windows $Windows_type\n"; 2521 } 2522 else { 2523 $$rconfig_file_chatter .= " $^O\n"; 2524 } 2525 2526 # sub to check file existance and record all tests 2527 my $exists_config_file = sub { 2528 my $config_file = shift; 2529 return 0 unless $config_file; 2530 $$rconfig_file_chatter .= "# Testing: $config_file\n"; 2531 return -f $config_file; 2532 }; 2533 2534 my $config_file; 2535 2536 # look in current directory first 2537 $config_file = ".perltidyrc"; 2538 return $config_file if $exists_config_file->($config_file); 2539 if ($is_Windows) { 2540 $config_file = "perltidy.ini"; 2541 return $config_file if $exists_config_file->($config_file); 2542 } 2543 2544 # Default environment vars. 2545 my @envs = qw(PERLTIDY HOME); 2546 2547 # Check the NT/2k/XP locations, first a local machine def, then a 2548 # network def 2549 push @envs, qw(USERPROFILE HOMESHARE) if $^O =~ /win32/i; 2550 2551 # Now go through the enviornment ... 2552 foreach my $var (@envs) { 2553 $$rconfig_file_chatter .= "# Examining: \$ENV{$var}"; 2554 if ( defined( $ENV{$var} ) ) { 2555 $$rconfig_file_chatter .= " = $ENV{$var}\n"; 2556 2557 # test ENV{ PERLTIDY } as file: 2558 if ( $var eq 'PERLTIDY' ) { 2559 $config_file = "$ENV{$var}"; 2560 return $config_file if $exists_config_file->($config_file); 2561 } 2562 2563 # test ENV as directory: 2564 $config_file = catfile( $ENV{$var}, ".perltidyrc" ); 2565 return $config_file if $exists_config_file->($config_file); 2566 2567 if ($is_Windows) { 2568 $config_file = catfile( $ENV{$var}, "perltidy.ini" ); 2569 return $config_file if $exists_config_file->($config_file); 2570 } 2571 } 2572 else { 2573 $$rconfig_file_chatter .= "\n"; 2574 } 2575 } 2576 2577 # then look for a system-wide definition 2578 # where to look varies with OS 2579 if ($is_Windows) { 2580 2581 if ($Windows_type) { 2582 my ( $os, $system, $allusers ) = 2583 Win_Config_Locs( $rpending_complaint, $Windows_type ); 2584 2585 # Check All Users directory, if there is one. 2586 # i.e. C:\Documents and Settings\User\perltidy.ini 2587 if ($allusers) { 2588 2589 $config_file = catfile( $allusers, ".perltidyrc" ); 2590 return $config_file if $exists_config_file->($config_file); 2591 2592 $config_file = catfile( $allusers, "perltidy.ini" ); 2593 return $config_file if $exists_config_file->($config_file); 2594 } 2595 2596 # Check system directory. 2597 # retain old code in case someone has been able to create 2598 # a file with a leading period. 2599 $config_file = catfile( $system, ".perltidyrc" ); 2600 return $config_file if $exists_config_file->($config_file); 2601 2602 $config_file = catfile( $system, "perltidy.ini" ); 2603 return $config_file if $exists_config_file->($config_file); 2604 } 2605 } 2606 2607 # Place to add customization code for other systems 2608 elsif ( $^O eq 'OS2' ) { 2609 } 2610 elsif ( $^O eq 'MacOS' ) { 2611 } 2612 elsif ( $^O eq 'VMS' ) { 2613 } 2614 2615 # Assume some kind of Unix 2616 else { 2617 2618 $config_file = "/usr/local/etc/perltidyrc"; 2619 return $config_file if $exists_config_file->($config_file); 2620 2621 $config_file = "/etc/perltidyrc"; 2622 return $config_file if $exists_config_file->($config_file); 2623 } 2624 2625 # Couldn't find a config file 2626 return; 2627} 2628 2629sub Win_Config_Locs { 2630 2631 # In scalar context returns the OS name (95 98 ME NT3.51 NT4 2000 XP), 2632 # or undef if its not a win32 OS. In list context returns OS, System 2633 # Directory, and All Users Directory. All Users will be empty on a 2634 # 9x/Me box. Contributed by: Yves Orton. 2635 2636 my $rpending_complaint = shift; 2637 my $os = (@_) ? shift : Win_OS_Type(); 2638 return unless $os; 2639 2640 my $system = ""; 2641 my $allusers = ""; 2642 2643 if ( $os =~ /9[58]|Me/ ) { 2644 $system = "C:/Windows"; 2645 } 2646 elsif ( $os =~ /NT|XP|200?/ ) { 2647 $system = ( $os =~ /XP/ ) ? "C:/Windows/" : "C:/WinNT/"; 2648 $allusers = 2649 ( $os =~ /NT/ ) 2650 ? "C:/WinNT/profiles/All Users/" 2651 : "C:/Documents and Settings/All Users/"; 2652 } 2653 else { 2654 2655 # This currently would only happen on a win32s computer. I dont have 2656 # one to test, so I am unsure how to proceed. Suggestions welcome! 2657 $$rpending_complaint .= 2658"I dont know a sensible place to look for config files on an $os system.\n"; 2659 return; 2660 } 2661 return wantarray ? ( $os, $system, $allusers ) : $os; 2662} 2663 2664sub dump_config_file { 2665 my $fh = shift; 2666 my $config_file = shift; 2667 my $rconfig_file_chatter = shift; 2668 print STDOUT "$$rconfig_file_chatter"; 2669 if ($fh) { 2670 print STDOUT "# Dump of file: '$config_file'\n"; 2671 while ( my $line = $fh->getline() ) { print STDOUT $line } 2672 eval { $fh->close() }; 2673 } 2674 else { 2675 print STDOUT "# ...no config file found\n"; 2676 } 2677} 2678 2679sub read_config_file { 2680 2681 my ( $fh, $config_file, $rexpansion ) = @_; 2682 my @config_list = (); 2683 2684 # file is bad if non-empty $death_message is returned 2685 my $death_message = ""; 2686 2687 my $name = undef; 2688 my $line_no; 2689 while ( my $line = $fh->getline() ) { 2690 $line_no++; 2691 chomp $line; 2692 next if $line =~ /^\s*#/; # skip full-line comment 2693 ( $line, $death_message ) = 2694 strip_comment( $line, $config_file, $line_no ); 2695 last if ($death_message); 2696 $line =~ s/^\s*(.*?)\s*$/$1/; # trim both ends 2697 next unless $line; 2698 2699 # look for something of the general form 2700 # newname { body } 2701 # or just 2702 # body 2703 2704 if ( $line =~ /^((\w+)\s*\{)?([^}]*)(\})?$/ ) { 2705 my ( $newname, $body, $curly ) = ( $2, $3, $4 ); 2706 2707 # handle a new alias definition 2708 if ($newname) { 2709 if ($name) { 2710 $death_message = 2711"No '}' seen after $name and before $newname in config file $config_file line $.\n"; 2712 last; 2713 } 2714 $name = $newname; 2715 2716 if ( ${$rexpansion}{$name} ) { 2717 local $" = ')('; 2718 my @names = sort keys %$rexpansion; 2719 $death_message = 2720 "Here is a list of all installed aliases\n(@names)\n" 2721 . "Attempting to redefine alias ($name) in config file $config_file line $.\n"; 2722 last; 2723 } 2724 ${$rexpansion}{$name} = []; 2725 } 2726 2727 # now do the body 2728 if ($body) { 2729 2730 my ( $rbody_parts, $msg ) = parse_args($body); 2731 if ($msg) { 2732 $death_message = <<EOM; 2733Error reading file '$config_file' at line number $line_no. 2734$msg 2735Please fix this line or use -npro to avoid reading this file 2736EOM 2737 last; 2738 } 2739 2740 if ($name) { 2741 2742 # remove leading dashes if this is an alias 2743 foreach (@$rbody_parts) { s/^\-+//; } 2744 push @{ ${$rexpansion}{$name} }, @$rbody_parts; 2745 } 2746 else { 2747 push( @config_list, @$rbody_parts ); 2748 } 2749 } 2750 2751 if ($curly) { 2752 unless ($name) { 2753 $death_message = 2754"Unexpected '}' seen in config file $config_file line $.\n"; 2755 last; 2756 } 2757 $name = undef; 2758 } 2759 } 2760 } 2761 eval { $fh->close() }; 2762 return ( \@config_list, $death_message ); 2763} 2764 2765sub strip_comment { 2766 2767 my ( $instr, $config_file, $line_no ) = @_; 2768 my $msg = ""; 2769 2770 # nothing to do if no comments 2771 if ( $instr !~ /#/ ) { 2772 return ( $instr, $msg ); 2773 } 2774 2775 # use simple method of no quotes 2776 elsif ( $instr !~ /['"]/ ) { 2777 $instr =~ s/\s*\#.*$//; # simple trim 2778 return ( $instr, $msg ); 2779 } 2780 2781 # handle comments and quotes 2782 my $outstr = ""; 2783 my $quote_char = ""; 2784 while (1) { 2785 2786 # looking for ending quote character 2787 if ($quote_char) { 2788 if ( $instr =~ /\G($quote_char)/gc ) { 2789 $quote_char = ""; 2790 $outstr .= $1; 2791 } 2792 elsif ( $instr =~ /\G(.)/gc ) { 2793 $outstr .= $1; 2794 } 2795 2796 # error..we reached the end without seeing the ending quote char 2797 else { 2798 $msg = <<EOM; 2799Error reading file $config_file at line number $line_no. 2800Did not see ending quote character <$quote_char> in this text: 2801$instr 2802Please fix this line or use -npro to avoid reading this file 2803EOM 2804 last; 2805 } 2806 } 2807 2808 # accumulating characters and looking for start of a quoted string 2809 else { 2810 if ( $instr =~ /\G([\"\'])/gc ) { 2811 $outstr .= $1; 2812 $quote_char = $1; 2813 } 2814 elsif ( $instr =~ /\G#/gc ) { 2815 last; 2816 } 2817 elsif ( $instr =~ /\G(.)/gc ) { 2818 $outstr .= $1; 2819 } 2820 else { 2821 last; 2822 } 2823 } 2824 } 2825 return ( $outstr, $msg ); 2826} 2827 2828sub parse_args { 2829 2830 # Parse a command string containing multiple string with possible 2831 # quotes, into individual commands. It might look like this, for example: 2832 # 2833 # -wba=" + - " -some-thing -wbb='. && ||' 2834 # 2835 # There is no need, at present, to handle escaped quote characters. 2836 # (They are not perltidy tokens, so needn't be in strings). 2837 2838 my ($body) = @_; 2839 my @body_parts = (); 2840 my $quote_char = ""; 2841 my $part = ""; 2842 my $msg = ""; 2843 while (1) { 2844 2845 # looking for ending quote character 2846 if ($quote_char) { 2847 if ( $body =~ /\G($quote_char)/gc ) { 2848 $quote_char = ""; 2849 } 2850 elsif ( $body =~ /\G(.)/gc ) { 2851 $part .= $1; 2852 } 2853 2854 # error..we reached the end without seeing the ending quote char 2855 else { 2856 if ( length($part) ) { push @body_parts, $part; } 2857 $msg = <<EOM; 2858Did not see ending quote character <$quote_char> in this text: 2859$body 2860EOM 2861 last; 2862 } 2863 } 2864 2865 # accumulating characters and looking for start of a quoted string 2866 else { 2867 if ( $body =~ /\G([\"\'])/gc ) { 2868 $quote_char = $1; 2869 } 2870 elsif ( $body =~ /\G(\s+)/gc ) { 2871 if ( length($part) ) { push @body_parts, $part; } 2872 $part = ""; 2873 } 2874 elsif ( $body =~ /\G(.)/gc ) { 2875 $part .= $1; 2876 } 2877 else { 2878 if ( length($part) ) { push @body_parts, $part; } 2879 last; 2880 } 2881 } 2882 } 2883 return ( \@body_parts, $msg ); 2884} 2885 2886sub dump_long_names { 2887 2888 my @names = sort @_; 2889 print STDOUT <<EOM; 2890# Command line long names (passed to GetOptions) 2891#--------------------------------------------------------------- 2892# here is a summary of the Getopt codes: 2893# <none> does not take an argument 2894# =s takes a mandatory string 2895# :s takes an optional string 2896# =i takes a mandatory integer 2897# :i takes an optional integer 2898# ! does not take an argument and may be negated 2899# i.e., -foo and -nofoo are allowed 2900# a double dash signals the end of the options list 2901# 2902#--------------------------------------------------------------- 2903EOM 2904 2905 foreach (@names) { print STDOUT "$_\n" } 2906} 2907 2908sub dump_defaults { 2909 my @defaults = sort @_; 2910 print STDOUT "Default command line options:\n"; 2911 foreach (@_) { print STDOUT "$_\n" } 2912} 2913 2914sub readable_options { 2915 2916 # return options for this run as a string which could be 2917 # put in a perltidyrc file 2918 my ( $rOpts, $roption_string ) = @_; 2919 my %Getopt_flags; 2920 my $rGetopt_flags = \%Getopt_flags; 2921 my $readable_options = "# Final parameter set for this run.\n"; 2922 $readable_options .= 2923 "# See utility 'perltidyrc_dump.pl' for nicer formatting.\n"; 2924 foreach my $opt ( @{$roption_string} ) { 2925 my $flag = ""; 2926 if ( $opt =~ /(.*)(!|=.*)$/ ) { 2927 $opt = $1; 2928 $flag = $2; 2929 } 2930 if ( defined( $rOpts->{$opt} ) ) { 2931 $rGetopt_flags->{$opt} = $flag; 2932 } 2933 } 2934 foreach my $key ( sort keys %{$rOpts} ) { 2935 my $flag = $rGetopt_flags->{$key}; 2936 my $value = $rOpts->{$key}; 2937 my $prefix = '--'; 2938 my $suffix = ""; 2939 if ($flag) { 2940 if ( $flag =~ /^=/ ) { 2941 if ( $value !~ /^\d+$/ ) { $value = '"' . $value . '"' } 2942 $suffix = "=" . $value; 2943 } 2944 elsif ( $flag =~ /^!/ ) { 2945 $prefix .= "no" unless ($value); 2946 } 2947 else { 2948 2949 # shouldn't happen 2950 $readable_options .= 2951 "# ERROR in dump_options: unrecognized flag $flag for $key\n"; 2952 } 2953 } 2954 $readable_options .= $prefix . $key . $suffix . "\n"; 2955 } 2956 return $readable_options; 2957} 2958 2959sub show_version { 2960 print <<"EOM"; 2961This is perltidy, v$VERSION 2962 2963Copyright 2000-2010, Steve Hancock 2964 2965Perltidy is free software and may be copied under the terms of the GNU 2966General Public License, which is included in the distribution files. 2967 2968Complete documentation for perltidy can be found using 'man perltidy' 2969or on the internet at http://perltidy.sourceforge.net. 2970EOM 2971} 2972 2973sub usage { 2974 2975 print STDOUT <<EOF; 2976This is perltidy version $VERSION, a perl script indenter. Usage: 2977 2978 perltidy [ options ] file1 file2 file3 ... 2979 (output goes to file1.tdy, file2.tdy, file3.tdy, ...) 2980 perltidy [ options ] file1 -o outfile 2981 perltidy [ options ] file1 -st >outfile 2982 perltidy [ options ] <infile >outfile 2983 2984Options have short and long forms. Short forms are shown; see 2985man pages for long forms. Note: '=s' indicates a required string, 2986and '=n' indicates a required integer. 2987 2988I/O control 2989 -h show this help 2990 -o=file name of the output file (only if single input file) 2991 -oext=s change output extension from 'tdy' to s 2992 -opath=path change path to be 'path' for output files 2993 -b backup original to .bak and modify file in-place 2994 -bext=s change default backup extension from 'bak' to s 2995 -q deactivate error messages (for running under editor) 2996 -w include non-critical warning messages in the .ERR error output 2997 -syn run perl -c to check syntax (default under unix systems) 2998 -log save .LOG file, which has useful diagnostics 2999 -f force perltidy to read a binary file 3000 -g like -log but writes more detailed .LOG file, for debugging scripts 3001 -opt write the set of options actually used to a .LOG file 3002 -npro ignore .perltidyrc configuration command file 3003 -pro=file read configuration commands from file instead of .perltidyrc 3004 -st send output to standard output, STDOUT 3005 -se send error output to standard error output, STDERR 3006 -v display version number to standard output and quit 3007 3008Basic Options: 3009 -i=n use n columns per indentation level (default n=4) 3010 -t tabs: use one tab character per indentation level, not recommeded 3011 -nt no tabs: use n spaces per indentation level (default) 3012 -et=n entab leading whitespace n spaces per tab; not recommended 3013 -io "indent only": just do indentation, no other formatting. 3014 -sil=n set starting indentation level to n; use if auto detection fails 3015 -ole=s specify output line ending (s=dos or win, mac, unix) 3016 -ple keep output line endings same as input (input must be filename) 3017 3018Whitespace Control 3019 -fws freeze whitespace; this disables all whitespace changes 3020 and disables the following switches: 3021 -bt=n sets brace tightness, n= (0 = loose, 1=default, 2 = tight) 3022 -bbt same as -bt but for code block braces; same as -bt if not given 3023 -bbvt block braces vertically tight; use with -bl or -bli 3024 -bbvtl=s make -bbvt to apply to selected list of block types 3025 -pt=n paren tightness (n=0, 1 or 2) 3026 -sbt=n square bracket tightness (n=0, 1, or 2) 3027 -bvt=n brace vertical tightness, 3028 n=(0=open, 1=close unless multiple steps on a line, 2=always close) 3029 -pvt=n paren vertical tightness (see -bvt for n) 3030 -sbvt=n square bracket vertical tightness (see -bvt for n) 3031 -bvtc=n closing brace vertical tightness: 3032 n=(0=open, 1=sometimes close, 2=always close) 3033 -pvtc=n closing paren vertical tightness, see -bvtc for n. 3034 -sbvtc=n closing square bracket vertical tightness, see -bvtc for n. 3035 -ci=n sets continuation indentation=n, default is n=2 spaces 3036 -lp line up parentheses, brackets, and non-BLOCK braces 3037 -sfs add space before semicolon in for( ; ; ) 3038 -aws allow perltidy to add whitespace (default) 3039 -dws delete all old non-essential whitespace 3040 -icb indent closing brace of a code block 3041 -cti=n closing indentation of paren, square bracket, or non-block brace: 3042 n=0 none, =1 align with opening, =2 one full indentation level 3043 -icp equivalent to -cti=2 3044 -wls=s want space left of tokens in string; i.e. -nwls='+ - * /' 3045 -wrs=s want space right of tokens in string; 3046 -sts put space before terminal semicolon of a statement 3047 -sak=s put space between keywords given in s and '('; 3048 -nsak=s no space between keywords in s and '('; i.e. -nsak='my our local' 3049 3050Line Break Control 3051 -fnl freeze newlines; this disables all line break changes 3052 and disables the following switches: 3053 -anl add newlines; ok to introduce new line breaks 3054 -bbs add blank line before subs and packages 3055 -bbc add blank line before block comments 3056 -bbb add blank line between major blocks 3057 -kbl=n keep old blank lines? 0=no, 1=some, 2=all 3058 -mbl=n maximum consecutive blank lines to output (default=1) 3059 -ce cuddled else; use this style: '} else {' 3060 -dnl delete old newlines (default) 3061 -l=n maximum line length; default n=80 3062 -bl opening brace on new line 3063 -sbl opening sub brace on new line. value of -bl is used if not given. 3064 -bli opening brace on new line and indented 3065 -bar opening brace always on right, even for long clauses 3066 -vt=n vertical tightness (requires -lp); n controls break after opening 3067 token: 0=never 1=no break if next line balanced 2=no break 3068 -vtc=n vertical tightness of closing container; n controls if closing 3069 token starts new line: 0=always 1=not unless list 1=never 3070 -wba=s want break after tokens in string; i.e. wba=': .' 3071 -wbb=s want break before tokens in string 3072 3073Following Old Breakpoints 3074 -kis keep interior semicolons. Allows multiple statements per line. 3075 -boc break at old comma breaks: turns off all automatic list formatting 3076 -bol break at old logical breakpoints: or, and, ||, && (default) 3077 -bok break at old list keyword breakpoints such as map, sort (default) 3078 -bot break at old conditional (ternary ?:) operator breakpoints (default) 3079 -cab=n break at commas after a comma-arrow (=>): 3080 n=0 break at all commas after => 3081 n=1 stable: break unless this breaks an existing one-line container 3082 n=2 break only if a one-line container cannot be formed 3083 n=3 do not treat commas after => specially at all 3084 3085Comment controls 3086 -ibc indent block comments (default) 3087 -isbc indent spaced block comments; may indent unless no leading space 3088 -msc=n minimum desired spaces to side comment, default 4 3089 -fpsc=n fix position for side comments; default 0; 3090 -csc add or update closing side comments after closing BLOCK brace 3091 -dcsc delete closing side comments created by a -csc command 3092 -cscp=s change closing side comment prefix to be other than '## end' 3093 -cscl=s change closing side comment to apply to selected list of blocks 3094 -csci=n minimum number of lines needed to apply a -csc tag, default n=6 3095 -csct=n maximum number of columns of appended text, default n=20 3096 -cscw causes warning if old side comment is overwritten with -csc 3097 3098 -sbc use 'static block comments' identified by leading '##' (default) 3099 -sbcp=s change static block comment identifier to be other than '##' 3100 -osbc outdent static block comments 3101 3102 -ssc use 'static side comments' identified by leading '##' (default) 3103 -sscp=s change static side comment identifier to be other than '##' 3104 3105Delete selected text 3106 -dac delete all comments AND pod 3107 -dbc delete block comments 3108 -dsc delete side comments 3109 -dp delete pod 3110 3111Send selected text to a '.TEE' file 3112 -tac tee all comments AND pod 3113 -tbc tee block comments 3114 -tsc tee side comments 3115 -tp tee pod 3116 3117Outdenting 3118 -olq outdent long quoted strings (default) 3119 -olc outdent a long block comment line 3120 -ola outdent statement labels 3121 -okw outdent control keywords (redo, next, last, goto, return) 3122 -okwl=s specify alternative keywords for -okw command 3123 3124Other controls 3125 -mft=n maximum fields per table; default n=40 3126 -x do not format lines before hash-bang line (i.e., for VMS) 3127 -asc allows perltidy to add a ';' when missing (default) 3128 -dsm allows perltidy to delete an unnecessary ';' (default) 3129 3130Combinations of other parameters 3131 -gnu attempt to follow GNU Coding Standards as applied to perl 3132 -mangle remove as many newlines as possible (but keep comments and pods) 3133 -extrude insert as many newlines as possible 3134 3135Dump and die, debugging 3136 -dop dump options used in this run to standard output and quit 3137 -ddf dump default options to standard output and quit 3138 -dsn dump all option short names to standard output and quit 3139 -dln dump option long names to standard output and quit 3140 -dpro dump whatever configuration file is in effect to standard output 3141 -dtt dump all token types to standard output and quit 3142 3143HTML 3144 -html write an html file (see 'man perl2web' for many options) 3145 Note: when -html is used, no indentation or formatting are done. 3146 Hint: try perltidy -html -css=mystyle.css filename.pl 3147 and edit mystyle.css to change the appearance of filename.html. 3148 -nnn gives line numbers 3149 -pre only writes out <pre>..</pre> code section 3150 -toc places a table of contents to subs at the top (default) 3151 -pod passes pod text through pod2html (default) 3152 -frm write html as a frame (3 files) 3153 -text=s extra extension for table of contents if -frm, default='toc' 3154 -sext=s extra extension for file content if -frm, default='src' 3155 3156A prefix of "n" negates short form toggle switches, and a prefix of "no" 3157negates the long forms. For example, -nasc means don't add missing 3158semicolons. 3159 3160If you are unable to see this entire text, try "perltidy -h | more" 3161For more detailed information, and additional options, try "man perltidy", 3162or go to the perltidy home page at http://perltidy.sourceforge.net 3163EOF 3164 3165} 3166 3167sub process_this_file { 3168 3169 my ( $truth, $beauty ) = @_; 3170 3171 # loop to process each line of this file 3172 while ( my $line_of_tokens = $truth->get_line() ) { 3173 $beauty->write_line($line_of_tokens); 3174 } 3175 3176 # finish up 3177 eval { $beauty->finish_formatting() }; 3178 $truth->report_tokenization_errors(); 3179} 3180 3181sub check_syntax { 3182 3183 # Use 'perl -c' to make sure that we did not create bad syntax 3184 # This is a very good independent check for programming errors 3185 # 3186 # Given names of the input and output files, ($ifname, $ofname), 3187 # we do the following: 3188 # - check syntax of the input file 3189 # - if bad, all done (could be an incomplete code snippet) 3190 # - if infile syntax ok, then check syntax of the output file; 3191 # - if outfile syntax bad, issue warning; this implies a code bug! 3192 # - set and return flag "infile_syntax_ok" : =-1 bad 0 unknown 1 good 3193 3194 my ( $ifname, $ofname, $logger_object, $rOpts ) = @_; 3195 my $infile_syntax_ok = 0; 3196 my $line_of_dashes = '-' x 42 . "\n"; 3197 3198 my $flags = $rOpts->{'perl-syntax-check-flags'}; 3199 3200 # be sure we invoke perl with -c 3201 # note: perl will accept repeated flags like '-c -c'. It is safest 3202 # to append another -c than try to find an interior bundled c, as 3203 # in -Tc, because such a 'c' might be in a quoted string, for example. 3204 if ( $flags !~ /(^-c|\s+-c)/ ) { $flags .= " -c" } 3205 3206 # be sure we invoke perl with -x if requested 3207 # same comments about repeated parameters applies 3208 if ( $rOpts->{'look-for-hash-bang'} ) { 3209 if ( $flags !~ /(^-x|\s+-x)/ ) { $flags .= " -x" } 3210 } 3211 3212 # this shouldn't happen unless a termporary file couldn't be made 3213 if ( $ifname eq '-' ) { 3214 $logger_object->write_logfile_entry( 3215 "Cannot run perl -c on STDIN and STDOUT\n"); 3216 return $infile_syntax_ok; 3217 } 3218 3219 $logger_object->write_logfile_entry( 3220 "checking input file syntax with perl $flags\n"); 3221 $logger_object->write_logfile_entry($line_of_dashes); 3222 3223 # Not all operating systems/shells support redirection of the standard 3224 # error output. 3225 my $error_redirection = ( $^O eq 'VMS' ) ? "" : '2>&1'; 3226 3227 my $perl_output = do_syntax_check( $ifname, $flags, $error_redirection ); 3228 $logger_object->write_logfile_entry("$perl_output\n"); 3229 3230 if ( $perl_output =~ /syntax\s*OK/ ) { 3231 $infile_syntax_ok = 1; 3232 $logger_object->write_logfile_entry($line_of_dashes); 3233 $logger_object->write_logfile_entry( 3234 "checking output file syntax with perl $flags ...\n"); 3235 $logger_object->write_logfile_entry($line_of_dashes); 3236 3237 my $perl_output = 3238 do_syntax_check( $ofname, $flags, $error_redirection ); 3239 $logger_object->write_logfile_entry("$perl_output\n"); 3240 3241 unless ( $perl_output =~ /syntax\s*OK/ ) { 3242 $logger_object->write_logfile_entry($line_of_dashes); 3243 $logger_object->warning( 3244"The output file has a syntax error when tested with perl $flags $ofname !\n" 3245 ); 3246 $logger_object->warning( 3247 "This implies an error in perltidy; the file $ofname is bad\n"); 3248 $logger_object->report_definite_bug(); 3249 3250 # the perl version number will be helpful for diagnosing the problem 3251 $logger_object->write_logfile_entry( 3252 qx/perl -v $error_redirection/ . "\n" ); 3253 } 3254 } 3255 else { 3256 3257 # Only warn of perl -c syntax errors. Other messages, 3258 # such as missing modules, are too common. They can be 3259 # seen by running with perltidy -w 3260 $logger_object->complain("A syntax check using perl $flags gives: \n"); 3261 $logger_object->complain($line_of_dashes); 3262 $logger_object->complain("$perl_output\n"); 3263 $logger_object->complain($line_of_dashes); 3264 $infile_syntax_ok = -1; 3265 $logger_object->write_logfile_entry($line_of_dashes); 3266 $logger_object->write_logfile_entry( 3267"The output file will not be checked because of input file problems\n" 3268 ); 3269 } 3270 return $infile_syntax_ok; 3271} 3272 3273sub do_syntax_check { 3274 my ( $fname, $flags, $error_redirection ) = @_; 3275 3276 # We have to quote the filename in case it has unusual characters 3277 # or spaces. Example: this filename #CM11.pm# gives trouble. 3278 $fname = '"' . $fname . '"'; 3279 3280 # Under VMS something like -T will become -t (and an error) so we 3281 # will put quotes around the flags. Double quotes seem to work on 3282 # Unix/Windows/VMS, but this may not work on all systems. (Single 3283 # quotes do not work under Windows). It could become necessary to 3284 # put double quotes around each flag, such as: -"c" -"T" 3285 # We may eventually need some system-dependent coding here. 3286 $flags = '"' . $flags . '"'; 3287 3288 # now wish for luck... 3289 return qx/perl $flags $fname $error_redirection/; 3290} 3291 3292##################################################################### 3293# 3294# This is a stripped down version of IO::Scalar 3295# Given a reference to a scalar, it supplies either: 3296# a getline method which reads lines (mode='r'), or 3297# a print method which reads lines (mode='w') 3298# 3299##################################################################### 3300package Perl::Tidy::IOScalar; 3301use Carp; 3302 3303sub new { 3304 my ( $package, $rscalar, $mode ) = @_; 3305 my $ref = ref $rscalar; 3306 if ( $ref ne 'SCALAR' ) { 3307 confess <<EOM; 3308------------------------------------------------------------------------ 3309expecting ref to SCALAR but got ref to ($ref); trace follows: 3310------------------------------------------------------------------------ 3311EOM 3312 3313 } 3314 if ( $mode eq 'w' ) { 3315 $$rscalar = ""; 3316 return bless [ $rscalar, $mode ], $package; 3317 } 3318 elsif ( $mode eq 'r' ) { 3319 3320 # Convert a scalar to an array. 3321 # This avoids looking for "\n" on each call to getline 3322 my @array = map { $_ .= "\n" } split /\n/, ${$rscalar}; 3323 my $i_next = 0; 3324 return bless [ \@array, $mode, $i_next ], $package; 3325 } 3326 else { 3327 confess <<EOM; 3328------------------------------------------------------------------------ 3329expecting mode = 'r' or 'w' but got mode ($mode); trace follows: 3330------------------------------------------------------------------------ 3331EOM 3332 } 3333} 3334 3335sub getline { 3336 my $self = shift; 3337 my $mode = $self->[1]; 3338 if ( $mode ne 'r' ) { 3339 confess <<EOM; 3340------------------------------------------------------------------------ 3341getline call requires mode = 'r' but mode = ($mode); trace follows: 3342------------------------------------------------------------------------ 3343EOM 3344 } 3345 my $i = $self->[2]++; 3346 ##my $line = $self->[0]->[$i]; 3347 return $self->[0]->[$i]; 3348} 3349 3350sub print { 3351 my $self = shift; 3352 my $mode = $self->[1]; 3353 if ( $mode ne 'w' ) { 3354 confess <<EOM; 3355------------------------------------------------------------------------ 3356print call requires mode = 'w' but mode = ($mode); trace follows: 3357------------------------------------------------------------------------ 3358EOM 3359 } 3360 ${ $self->[0] } .= $_[0]; 3361} 3362sub close { return } 3363 3364##################################################################### 3365# 3366# This is a stripped down version of IO::ScalarArray 3367# Given a reference to an array, it supplies either: 3368# a getline method which reads lines (mode='r'), or 3369# a print method which reads lines (mode='w') 3370# 3371# NOTE: this routine assumes that that there aren't any embedded 3372# newlines within any of the array elements. There are no checks 3373# for that. 3374# 3375##################################################################### 3376package Perl::Tidy::IOScalarArray; 3377use Carp; 3378 3379sub new { 3380 my ( $package, $rarray, $mode ) = @_; 3381 my $ref = ref $rarray; 3382 if ( $ref ne 'ARRAY' ) { 3383 confess <<EOM; 3384------------------------------------------------------------------------ 3385expecting ref to ARRAY but got ref to ($ref); trace follows: 3386------------------------------------------------------------------------ 3387EOM 3388 3389 } 3390 if ( $mode eq 'w' ) { 3391 @$rarray = (); 3392 return bless [ $rarray, $mode ], $package; 3393 } 3394 elsif ( $mode eq 'r' ) { 3395 my $i_next = 0; 3396 return bless [ $rarray, $mode, $i_next ], $package; 3397 } 3398 else { 3399 confess <<EOM; 3400------------------------------------------------------------------------ 3401expecting mode = 'r' or 'w' but got mode ($mode); trace follows: 3402------------------------------------------------------------------------ 3403EOM 3404 } 3405} 3406 3407sub getline { 3408 my $self = shift; 3409 my $mode = $self->[1]; 3410 if ( $mode ne 'r' ) { 3411 confess <<EOM; 3412------------------------------------------------------------------------ 3413getline requires mode = 'r' but mode = ($mode); trace follows: 3414------------------------------------------------------------------------ 3415EOM 3416 } 3417 my $i = $self->[2]++; 3418 return $self->[0]->[$i]; 3419} 3420 3421sub print { 3422 my $self = shift; 3423 my $mode = $self->[1]; 3424 if ( $mode ne 'w' ) { 3425 confess <<EOM; 3426------------------------------------------------------------------------ 3427print requires mode = 'w' but mode = ($mode); trace follows: 3428------------------------------------------------------------------------ 3429EOM 3430 } 3431 push @{ $self->[0] }, $_[0]; 3432} 3433sub close { return } 3434 3435##################################################################### 3436# 3437# the Perl::Tidy::LineSource class supplies an object with a 'get_line()' method 3438# which returns the next line to be parsed 3439# 3440##################################################################### 3441 3442package Perl::Tidy::LineSource; 3443 3444sub new { 3445 3446 my ( $class, $input_file, $rOpts, $rpending_logfile_message ) = @_; 3447 my $input_file_copy = undef; 3448 my $fh_copy; 3449 3450 my $input_line_ending; 3451 if ( $rOpts->{'preserve-line-endings'} ) { 3452 $input_line_ending = Perl::Tidy::find_input_line_ending($input_file); 3453 } 3454 3455 ( my $fh, $input_file ) = Perl::Tidy::streamhandle( $input_file, 'r' ); 3456 return undef unless $fh; 3457 3458 # in order to check output syntax when standard output is used, 3459 # or when it is an object, we have to make a copy of the file 3460 if ( ( $input_file eq '-' || ref $input_file ) && $rOpts->{'check-syntax'} ) 3461 { 3462 3463 # Turning off syntax check when input output is used. 3464 # The reason is that temporary files cause problems on 3465 # on many systems. 3466 $rOpts->{'check-syntax'} = 0; 3467 $input_file_copy = '-'; 3468 3469 $$rpending_logfile_message .= <<EOM; 3470Note: --syntax check will be skipped because standard input is used 3471EOM 3472 3473 } 3474 3475 return bless { 3476 _fh => $fh, 3477 _fh_copy => $fh_copy, 3478 _filename => $input_file, 3479 _input_file_copy => $input_file_copy, 3480 _input_line_ending => $input_line_ending, 3481 _rinput_buffer => [], 3482 _started => 0, 3483 }, $class; 3484} 3485 3486sub get_input_file_copy_name { 3487 my $self = shift; 3488 my $ifname = $self->{_input_file_copy}; 3489 unless ($ifname) { 3490 $ifname = $self->{_filename}; 3491 } 3492 return $ifname; 3493} 3494 3495sub close_input_file { 3496 my $self = shift; 3497 eval { $self->{_fh}->close() }; 3498 eval { $self->{_fh_copy}->close() } if $self->{_fh_copy}; 3499} 3500 3501sub get_line { 3502 my $self = shift; 3503 my $line = undef; 3504 my $fh = $self->{_fh}; 3505 my $fh_copy = $self->{_fh_copy}; 3506 my $rinput_buffer = $self->{_rinput_buffer}; 3507 3508 if ( scalar(@$rinput_buffer) ) { 3509 $line = shift @$rinput_buffer; 3510 } 3511 else { 3512 $line = $fh->getline(); 3513 3514 # patch to read raw mac files under unix, dos 3515 # see if the first line has embedded \r's 3516 if ( $line && !$self->{_started} ) { 3517 if ( $line =~ /[\015][^\015\012]/ ) { 3518 3519 # found one -- break the line up and store in a buffer 3520 @$rinput_buffer = map { $_ . "\n" } split /\015/, $line; 3521 my $count = @$rinput_buffer; 3522 $line = shift @$rinput_buffer; 3523 } 3524 $self->{_started}++; 3525 } 3526 } 3527 if ( $line && $fh_copy ) { $fh_copy->print($line); } 3528 return $line; 3529} 3530 3531##################################################################### 3532# 3533# the Perl::Tidy::LineSink class supplies a write_line method for 3534# actual file writing 3535# 3536##################################################################### 3537 3538package Perl::Tidy::LineSink; 3539 3540sub new { 3541 3542 my ( $class, $output_file, $tee_file, $line_separator, $rOpts, 3543 $rpending_logfile_message, $binmode ) 3544 = @_; 3545 my $fh = undef; 3546 my $fh_copy = undef; 3547 my $fh_tee = undef; 3548 my $output_file_copy = ""; 3549 my $output_file_open = 0; 3550 3551 if ( $rOpts->{'format'} eq 'tidy' ) { 3552 ( $fh, $output_file ) = Perl::Tidy::streamhandle( $output_file, 'w' ); 3553 unless ($fh) { die "Cannot write to output stream\n"; } 3554 $output_file_open = 1; 3555 if ($binmode) { 3556 if ( ref($fh) eq 'IO::File' ) { 3557 binmode $fh; 3558 } 3559 if ( $output_file eq '-' ) { binmode STDOUT } 3560 } 3561 } 3562 3563 # in order to check output syntax when standard output is used, 3564 # or when it is an object, we have to make a copy of the file 3565 if ( $output_file eq '-' || ref $output_file ) { 3566 if ( $rOpts->{'check-syntax'} ) { 3567 3568 # Turning off syntax check when standard output is used. 3569 # The reason is that temporary files cause problems on 3570 # on many systems. 3571 $rOpts->{'check-syntax'} = 0; 3572 $output_file_copy = '-'; 3573 $$rpending_logfile_message .= <<EOM; 3574Note: --syntax check will be skipped because standard output is used 3575EOM 3576 3577 } 3578 } 3579 3580 bless { 3581 _fh => $fh, 3582 _fh_copy => $fh_copy, 3583 _fh_tee => $fh_tee, 3584 _output_file => $output_file, 3585 _output_file_open => $output_file_open, 3586 _output_file_copy => $output_file_copy, 3587 _tee_flag => 0, 3588 _tee_file => $tee_file, 3589 _tee_file_opened => 0, 3590 _line_separator => $line_separator, 3591 _binmode => $binmode, 3592 }, $class; 3593} 3594 3595sub write_line { 3596 3597 my $self = shift; 3598 my $fh = $self->{_fh}; 3599 my $fh_copy = $self->{_fh_copy}; 3600 3601 my $output_file_open = $self->{_output_file_open}; 3602 chomp $_[0]; 3603 $_[0] .= $self->{_line_separator}; 3604 3605 $fh->print( $_[0] ) if ( $self->{_output_file_open} ); 3606 print $fh_copy $_[0] if ( $fh_copy && $self->{_output_file_copy} ); 3607 3608 if ( $self->{_tee_flag} ) { 3609 unless ( $self->{_tee_file_opened} ) { $self->really_open_tee_file() } 3610 my $fh_tee = $self->{_fh_tee}; 3611 print $fh_tee $_[0]; 3612 } 3613} 3614 3615sub get_output_file_copy { 3616 my $self = shift; 3617 my $ofname = $self->{_output_file_copy}; 3618 unless ($ofname) { 3619 $ofname = $self->{_output_file}; 3620 } 3621 return $ofname; 3622} 3623 3624sub tee_on { 3625 my $self = shift; 3626 $self->{_tee_flag} = 1; 3627} 3628 3629sub tee_off { 3630 my $self = shift; 3631 $self->{_tee_flag} = 0; 3632} 3633 3634sub really_open_tee_file { 3635 my $self = shift; 3636 my $tee_file = $self->{_tee_file}; 3637 my $fh_tee; 3638 $fh_tee = IO::File->new(">$tee_file") 3639 or die("couldn't open TEE file $tee_file: $!\n"); 3640 binmode $fh_tee if $self->{_binmode}; 3641 $self->{_tee_file_opened} = 1; 3642 $self->{_fh_tee} = $fh_tee; 3643} 3644 3645sub close_output_file { 3646 my $self = shift; 3647 eval { $self->{_fh}->close() } if $self->{_output_file_open}; 3648 eval { $self->{_fh_copy}->close() } if ( $self->{_output_file_copy} ); 3649 $self->close_tee_file(); 3650} 3651 3652sub close_tee_file { 3653 my $self = shift; 3654 3655 if ( $self->{_tee_file_opened} ) { 3656 eval { $self->{_fh_tee}->close() }; 3657 $self->{_tee_file_opened} = 0; 3658 } 3659} 3660 3661##################################################################### 3662# 3663# The Perl::Tidy::Diagnostics class writes the DIAGNOSTICS file, which is 3664# useful for program development. 3665# 3666# Only one such file is created regardless of the number of input 3667# files processed. This allows the results of processing many files 3668# to be summarized in a single file. 3669# 3670##################################################################### 3671 3672package Perl::Tidy::Diagnostics; 3673 3674sub new { 3675 3676 my $class = shift; 3677 bless { 3678 _write_diagnostics_count => 0, 3679 _last_diagnostic_file => "", 3680 _input_file => "", 3681 _fh => undef, 3682 }, $class; 3683} 3684 3685sub set_input_file { 3686 my $self = shift; 3687 $self->{_input_file} = $_[0]; 3688} 3689 3690# This is a diagnostic routine which is useful for program development. 3691# Output from debug messages go to a file named DIAGNOSTICS, where 3692# they are labeled by file and line. This allows many files to be 3693# scanned at once for some particular condition of interest. 3694sub write_diagnostics { 3695 my $self = shift; 3696 3697 unless ( $self->{_write_diagnostics_count} ) { 3698 open DIAGNOSTICS, ">DIAGNOSTICS" 3699 or death("couldn't open DIAGNOSTICS: $!\n"); 3700 } 3701 3702 my $last_diagnostic_file = $self->{_last_diagnostic_file}; 3703 my $input_file = $self->{_input_file}; 3704 if ( $last_diagnostic_file ne $input_file ) { 3705 print DIAGNOSTICS "\nFILE:$input_file\n"; 3706 } 3707 $self->{_last_diagnostic_file} = $input_file; 3708 my $input_line_number = Perl::Tidy::Tokenizer::get_input_line_number(); 3709 print DIAGNOSTICS "$input_line_number:\t@_"; 3710 $self->{_write_diagnostics_count}++; 3711} 3712 3713##################################################################### 3714# 3715# The Perl::Tidy::Logger class writes the .LOG and .ERR files 3716# 3717##################################################################### 3718 3719package Perl::Tidy::Logger; 3720 3721sub new { 3722 my $class = shift; 3723 my $fh; 3724 my ( $rOpts, $log_file, $warning_file, $saw_extrude ) = @_; 3725 3726 # remove any old error output file 3727 unless ( ref($warning_file) ) { 3728 if ( -e $warning_file ) { unlink($warning_file) } 3729 } 3730 3731 bless { 3732 _log_file => $log_file, 3733 _fh_warnings => undef, 3734 _rOpts => $rOpts, 3735 _fh_warnings => undef, 3736 _last_input_line_written => 0, 3737 _at_end_of_file => 0, 3738 _use_prefix => 1, 3739 _block_log_output => 0, 3740 _line_of_tokens => undef, 3741 _output_line_number => undef, 3742 _wrote_line_information_string => 0, 3743 _wrote_column_headings => 0, 3744 _warning_file => $warning_file, 3745 _warning_count => 0, 3746 _complaint_count => 0, 3747 _saw_code_bug => -1, # -1=no 0=maybe 1=for sure 3748 _saw_brace_error => 0, 3749 _saw_extrude => $saw_extrude, 3750 _output_array => [], 3751 }, $class; 3752} 3753 3754sub close_log_file { 3755 3756 my $self = shift; 3757 if ( $self->{_fh_warnings} ) { 3758 eval { $self->{_fh_warnings}->close() }; 3759 $self->{_fh_warnings} = undef; 3760 } 3761} 3762 3763sub get_warning_count { 3764 my $self = shift; 3765 return $self->{_warning_count}; 3766} 3767 3768sub get_use_prefix { 3769 my $self = shift; 3770 return $self->{_use_prefix}; 3771} 3772 3773sub block_log_output { 3774 my $self = shift; 3775 $self->{_block_log_output} = 1; 3776} 3777 3778sub unblock_log_output { 3779 my $self = shift; 3780 $self->{_block_log_output} = 0; 3781} 3782 3783sub interrupt_logfile { 3784 my $self = shift; 3785 $self->{_use_prefix} = 0; 3786 $self->warning("\n"); 3787 $self->write_logfile_entry( '#' x 24 . " WARNING " . '#' x 25 . "\n" ); 3788} 3789 3790sub resume_logfile { 3791 my $self = shift; 3792 $self->write_logfile_entry( '#' x 60 . "\n" ); 3793 $self->{_use_prefix} = 1; 3794} 3795 3796sub we_are_at_the_last_line { 3797 my $self = shift; 3798 unless ( $self->{_wrote_line_information_string} ) { 3799 $self->write_logfile_entry("Last line\n\n"); 3800 } 3801 $self->{_at_end_of_file} = 1; 3802} 3803 3804# record some stuff in case we go down in flames 3805sub black_box { 3806 my $self = shift; 3807 my ( $line_of_tokens, $output_line_number ) = @_; 3808 my $input_line = $line_of_tokens->{_line_text}; 3809 my $input_line_number = $line_of_tokens->{_line_number}; 3810 3811 # save line information in case we have to write a logfile message 3812 $self->{_line_of_tokens} = $line_of_tokens; 3813 $self->{_output_line_number} = $output_line_number; 3814 $self->{_wrote_line_information_string} = 0; 3815 3816 my $last_input_line_written = $self->{_last_input_line_written}; 3817 my $rOpts = $self->{_rOpts}; 3818 if ( 3819 ( 3820 ( $input_line_number - $last_input_line_written ) >= 3821 $rOpts->{'logfile-gap'} 3822 ) 3823 || ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) 3824 ) 3825 { 3826 my $rlevels = $line_of_tokens->{_rlevels}; 3827 my $structural_indentation_level = $$rlevels[0]; 3828 $self->{_last_input_line_written} = $input_line_number; 3829 ( my $out_str = $input_line ) =~ s/^\s*//; 3830 chomp $out_str; 3831 3832 $out_str = ( '.' x $structural_indentation_level ) . $out_str; 3833 3834 if ( length($out_str) > 35 ) { 3835 $out_str = substr( $out_str, 0, 35 ) . " ...."; 3836 } 3837 $self->logfile_output( "", "$out_str\n" ); 3838 } 3839} 3840 3841sub write_logfile_entry { 3842 my $self = shift; 3843 3844 # add leading >>> to avoid confusing error mesages and code 3845 $self->logfile_output( ">>>", "@_" ); 3846} 3847 3848sub write_column_headings { 3849 my $self = shift; 3850 3851 $self->{_wrote_column_headings} = 1; 3852 my $routput_array = $self->{_output_array}; 3853 push @{$routput_array}, <<EOM; 3854The nesting depths in the table below are at the start of the lines. 3855The indicated output line numbers are not always exact. 3856ci = levels of continuation indentation; bk = 1 if in BLOCK, 0 if not. 3857 3858in:out indent c b nesting code + messages; (messages begin with >>>) 3859lines levels i k (code begins with one '.' per indent level) 3860------ ----- - - -------- ------------------------------------------- 3861EOM 3862} 3863 3864sub make_line_information_string { 3865 3866 # make columns of information when a logfile message needs to go out 3867 my $self = shift; 3868 my $line_of_tokens = $self->{_line_of_tokens}; 3869 my $input_line_number = $line_of_tokens->{_line_number}; 3870 my $line_information_string = ""; 3871 if ($input_line_number) { 3872 3873 my $output_line_number = $self->{_output_line_number}; 3874 my $brace_depth = $line_of_tokens->{_curly_brace_depth}; 3875 my $paren_depth = $line_of_tokens->{_paren_depth}; 3876 my $square_bracket_depth = $line_of_tokens->{_square_bracket_depth}; 3877 my $python_indentation_level = 3878 $line_of_tokens->{_python_indentation_level}; 3879 my $rlevels = $line_of_tokens->{_rlevels}; 3880 my $rnesting_tokens = $line_of_tokens->{_rnesting_tokens}; 3881 my $rci_levels = $line_of_tokens->{_rci_levels}; 3882 my $rnesting_blocks = $line_of_tokens->{_rnesting_blocks}; 3883 3884 my $structural_indentation_level = $$rlevels[0]; 3885 3886 $self->write_column_headings() unless $self->{_wrote_column_headings}; 3887 3888 # keep logfile columns aligned for scripts up to 999 lines; 3889 # for longer scripts it doesn't really matter 3890 my $extra_space = ""; 3891 $extra_space .= 3892 ( $input_line_number < 10 ) ? " " 3893 : ( $input_line_number < 100 ) ? " " 3894 : ""; 3895 $extra_space .= 3896 ( $output_line_number < 10 ) ? " " 3897 : ( $output_line_number < 100 ) ? " " 3898 : ""; 3899 3900 # there are 2 possible nesting strings: 3901 # the original which looks like this: (0 [1 {2 3902 # the new one, which looks like this: {{[ 3903 # the new one is easier to read, and shows the order, but 3904 # could be arbitrarily long, so we use it unless it is too long 3905 my $nesting_string = 3906 "($paren_depth [$square_bracket_depth {$brace_depth"; 3907 my $nesting_string_new = $$rnesting_tokens[0]; 3908 3909 my $ci_level = $$rci_levels[0]; 3910 if ( $ci_level > 9 ) { $ci_level = '*' } 3911 my $bk = ( $$rnesting_blocks[0] =~ /1$/ ) ? '1' : '0'; 3912 3913 if ( length($nesting_string_new) <= 8 ) { 3914 $nesting_string = 3915 $nesting_string_new . " " x ( 8 - length($nesting_string_new) ); 3916 } 3917 if ( $python_indentation_level < 0 ) { $python_indentation_level = 0 } 3918 $line_information_string = 3919"L$input_line_number:$output_line_number$extra_space i$python_indentation_level:$structural_indentation_level $ci_level $bk $nesting_string"; 3920 } 3921 return $line_information_string; 3922} 3923 3924sub logfile_output { 3925 my $self = shift; 3926 my ( $prompt, $msg ) = @_; 3927 return if ( $self->{_block_log_output} ); 3928 3929 my $routput_array = $self->{_output_array}; 3930 if ( $self->{_at_end_of_file} || !$self->{_use_prefix} ) { 3931 push @{$routput_array}, "$msg"; 3932 } 3933 else { 3934 my $line_information_string = $self->make_line_information_string(); 3935 $self->{_wrote_line_information_string} = 1; 3936 3937 if ($line_information_string) { 3938 push @{$routput_array}, "$line_information_string $prompt$msg"; 3939 } 3940 else { 3941 push @{$routput_array}, "$msg"; 3942 } 3943 } 3944} 3945 3946sub get_saw_brace_error { 3947 my $self = shift; 3948 return $self->{_saw_brace_error}; 3949} 3950 3951sub increment_brace_error { 3952 my $self = shift; 3953 $self->{_saw_brace_error}++; 3954} 3955 3956sub brace_warning { 3957 my $self = shift; 3958 use constant BRACE_WARNING_LIMIT => 10; 3959 my $saw_brace_error = $self->{_saw_brace_error}; 3960 3961 if ( $saw_brace_error < BRACE_WARNING_LIMIT ) { 3962 $self->warning(@_); 3963 } 3964 $saw_brace_error++; 3965 $self->{_saw_brace_error} = $saw_brace_error; 3966 3967 if ( $saw_brace_error == BRACE_WARNING_LIMIT ) { 3968 $self->warning("No further warnings of this type will be given\n"); 3969 } 3970} 3971 3972sub complain { 3973 3974 # handle non-critical warning messages based on input flag 3975 my $self = shift; 3976 my $rOpts = $self->{_rOpts}; 3977 3978 # these appear in .ERR output only if -w flag is used 3979 if ( $rOpts->{'warning-output'} ) { 3980 $self->warning(@_); 3981 } 3982 3983 # otherwise, they go to the .LOG file 3984 else { 3985 $self->{_complaint_count}++; 3986 $self->write_logfile_entry(@_); 3987 } 3988} 3989 3990sub warning { 3991 3992 # report errors to .ERR file (or stdout) 3993 my $self = shift; 3994 use constant WARNING_LIMIT => 50; 3995 3996 my $rOpts = $self->{_rOpts}; 3997 unless ( $rOpts->{'quiet'} ) { 3998 3999 my $warning_count = $self->{_warning_count}; 4000 unless ($warning_count) { 4001 my $warning_file = $self->{_warning_file}; 4002 my $fh_warnings; 4003 if ( $rOpts->{'standard-error-output'} ) { 4004 $fh_warnings = *STDERR; 4005 } 4006 else { 4007 ( $fh_warnings, my $filename ) = 4008 Perl::Tidy::streamhandle( $warning_file, 'w' ); 4009 $fh_warnings or die("couldn't open $filename $!\n"); 4010 warn "## Please see file $filename\n"; 4011 } 4012 $self->{_fh_warnings} = $fh_warnings; 4013 } 4014 4015 my $fh_warnings = $self->{_fh_warnings}; 4016 if ( $warning_count < WARNING_LIMIT ) { 4017 if ( $self->get_use_prefix() > 0 ) { 4018 my $input_line_number = 4019 Perl::Tidy::Tokenizer::get_input_line_number(); 4020 $fh_warnings->print("$input_line_number:\t@_"); 4021 $self->write_logfile_entry("WARNING: @_"); 4022 } 4023 else { 4024 $fh_warnings->print(@_); 4025 $self->write_logfile_entry(@_); 4026 } 4027 } 4028 $warning_count++; 4029 $self->{_warning_count} = $warning_count; 4030 4031 if ( $warning_count == WARNING_LIMIT ) { 4032 $fh_warnings->print("No further warnings will be given\n"); 4033 } 4034 } 4035} 4036 4037# programming bug codes: 4038# -1 = no bug 4039# 0 = maybe, not sure. 4040# 1 = definitely 4041sub report_possible_bug { 4042 my $self = shift; 4043 my $saw_code_bug = $self->{_saw_code_bug}; 4044 $self->{_saw_code_bug} = ( $saw_code_bug < 0 ) ? 0 : $saw_code_bug; 4045} 4046 4047sub report_definite_bug { 4048 my $self = shift; 4049 $self->{_saw_code_bug} = 1; 4050} 4051 4052sub ask_user_for_bug_report { 4053 my $self = shift; 4054 4055 my ( $infile_syntax_ok, $formatter ) = @_; 4056 my $saw_code_bug = $self->{_saw_code_bug}; 4057 if ( ( $saw_code_bug == 0 ) && ( $infile_syntax_ok == 1 ) ) { 4058 $self->warning(<<EOM); 4059 4060You may have encountered a code bug in perltidy. If you think so, and 4061the problem is not listed in the BUGS file at 4062http://perltidy.sourceforge.net, please report it so that it can be 4063corrected. Include the smallest possible script which has the problem, 4064along with the .LOG file. See the manual pages for contact information. 4065Thank you! 4066EOM 4067 4068 } 4069 elsif ( $saw_code_bug == 1 ) { 4070 if ( $self->{_saw_extrude} ) { 4071 $self->warning(<<EOM); 4072 4073You may have encountered a bug in perltidy. However, since you are using the 4074-extrude option, the problem may be with perl or one of its modules, which have 4075occasional problems with this type of file. If you believe that the 4076problem is with perltidy, and the problem is not listed in the BUGS file at 4077http://perltidy.sourceforge.net, please report it so that it can be corrected. 4078Include the smallest possible script which has the problem, along with the .LOG 4079file. See the manual pages for contact information. 4080Thank you! 4081EOM 4082 } 4083 else { 4084 $self->warning(<<EOM); 4085 4086Oops, you seem to have encountered a bug in perltidy. Please check the 4087BUGS file at http://perltidy.sourceforge.net. If the problem is not 4088listed there, please report it so that it can be corrected. Include the 4089smallest possible script which produces this message, along with the 4090.LOG file if appropriate. See the manual pages for contact information. 4091Your efforts are appreciated. 4092Thank you! 4093EOM 4094 my $added_semicolon_count = 0; 4095 eval { 4096 $added_semicolon_count = 4097 $formatter->get_added_semicolon_count(); 4098 }; 4099 if ( $added_semicolon_count > 0 ) { 4100 $self->warning(<<EOM); 4101 4102The log file shows that perltidy added $added_semicolon_count semicolons. 4103Please rerun with -nasc to see if that is the cause of the syntax error. Even 4104if that is the problem, please report it so that it can be fixed. 4105EOM 4106 4107 } 4108 } 4109 } 4110} 4111 4112sub finish { 4113 4114 # called after all formatting to summarize errors 4115 my $self = shift; 4116 my ( $infile_syntax_ok, $formatter ) = @_; 4117 4118 my $rOpts = $self->{_rOpts}; 4119 my $warning_count = $self->{_warning_count}; 4120 my $saw_code_bug = $self->{_saw_code_bug}; 4121 4122 my $save_logfile = 4123 ( $saw_code_bug == 0 && $infile_syntax_ok == 1 ) 4124 || $saw_code_bug == 1 4125 || $rOpts->{'logfile'}; 4126 my $log_file = $self->{_log_file}; 4127 if ($warning_count) { 4128 if ($save_logfile) { 4129 $self->block_log_output(); # avoid echoing this to the logfile 4130 $self->warning( 4131 "The logfile $log_file may contain useful information\n"); 4132 $self->unblock_log_output(); 4133 } 4134 4135 if ( $self->{_complaint_count} > 0 ) { 4136 $self->warning( 4137"To see $self->{_complaint_count} non-critical warnings rerun with -w\n" 4138 ); 4139 } 4140 4141 if ( $self->{_saw_brace_error} 4142 && ( $rOpts->{'logfile-gap'} > 1 || !$save_logfile ) ) 4143 { 4144 $self->warning("To save a full .LOG file rerun with -g\n"); 4145 } 4146 } 4147 $self->ask_user_for_bug_report( $infile_syntax_ok, $formatter ); 4148 4149 if ($save_logfile) { 4150 my $log_file = $self->{_log_file}; 4151 my ( $fh, $filename ) = Perl::Tidy::streamhandle( $log_file, 'w' ); 4152 if ($fh) { 4153 my $routput_array = $self->{_output_array}; 4154 foreach ( @{$routput_array} ) { $fh->print($_) } 4155 eval { $fh->close() }; 4156 } 4157 } 4158} 4159 4160##################################################################### 4161# 4162# The Perl::Tidy::DevNull class supplies a dummy print method 4163# 4164##################################################################### 4165 4166package Perl::Tidy::DevNull; 4167sub new { return bless {}, $_[0] } 4168sub print { return } 4169sub close { return } 4170 4171##################################################################### 4172# 4173# The Perl::Tidy::HtmlWriter class writes a copy of the input stream in html 4174# 4175##################################################################### 4176 4177package Perl::Tidy::HtmlWriter; 4178 4179use File::Basename; 4180 4181# class variables 4182use vars qw{ 4183 %html_color 4184 %html_bold 4185 %html_italic 4186 %token_short_names 4187 %short_to_long_names 4188 $rOpts 4189 $css_filename 4190 $css_linkname 4191 $missing_html_entities 4192}; 4193 4194# replace unsafe characters with HTML entity representation if HTML::Entities 4195# is available 4196{ eval "use HTML::Entities"; $missing_html_entities = $@; } 4197 4198sub new { 4199 4200 my ( $class, $input_file, $html_file, $extension, $html_toc_extension, 4201 $html_src_extension ) 4202 = @_; 4203 4204 my $html_file_opened = 0; 4205 my $html_fh; 4206 ( $html_fh, my $html_filename ) = 4207 Perl::Tidy::streamhandle( $html_file, 'w' ); 4208 unless ($html_fh) { 4209 warn("can't open $html_file: $!\n"); 4210 return undef; 4211 } 4212 $html_file_opened = 1; 4213 4214 if ( !$input_file || $input_file eq '-' || ref($input_file) ) { 4215 $input_file = "NONAME"; 4216 } 4217 4218 # write the table of contents to a string 4219 my $toc_string; 4220 my $html_toc_fh = Perl::Tidy::IOScalar->new( \$toc_string, 'w' ); 4221 4222 my $html_pre_fh; 4223 my @pre_string_stack; 4224 if ( $rOpts->{'html-pre-only'} ) { 4225 4226 # pre section goes directly to the output stream 4227 $html_pre_fh = $html_fh; 4228 $html_pre_fh->print( <<"PRE_END"); 4229<pre> 4230PRE_END 4231 } 4232 else { 4233 4234 # pre section go out to a temporary string 4235 my $pre_string; 4236 $html_pre_fh = Perl::Tidy::IOScalar->new( \$pre_string, 'w' ); 4237 push @pre_string_stack, \$pre_string; 4238 } 4239 4240 # pod text gets diverted if the 'pod2html' is used 4241 my $html_pod_fh; 4242 my $pod_string; 4243 if ( $rOpts->{'pod2html'} ) { 4244 if ( $rOpts->{'html-pre-only'} ) { 4245 undef $rOpts->{'pod2html'}; 4246 } 4247 else { 4248 eval "use Pod::Html"; 4249 if ($@) { 4250 warn 4251"unable to find Pod::Html; cannot use pod2html\n-npod disables this message\n"; 4252 undef $rOpts->{'pod2html'}; 4253 } 4254 else { 4255 $html_pod_fh = Perl::Tidy::IOScalar->new( \$pod_string, 'w' ); 4256 } 4257 } 4258 } 4259 4260 my $toc_filename; 4261 my $src_filename; 4262 if ( $rOpts->{'frames'} ) { 4263 unless ($extension) { 4264 warn 4265"cannot use frames without a specified output extension; ignoring -frm\n"; 4266 undef $rOpts->{'frames'}; 4267 } 4268 else { 4269 $toc_filename = $input_file . $html_toc_extension . $extension; 4270 $src_filename = $input_file . $html_src_extension . $extension; 4271 } 4272 } 4273 4274 # ---------------------------------------------------------- 4275 # Output is now directed as follows: 4276 # html_toc_fh <-- table of contents items 4277 # html_pre_fh <-- the <pre> section of formatted code, except: 4278 # html_pod_fh <-- pod goes here with the pod2html option 4279 # ---------------------------------------------------------- 4280 4281 my $title = $rOpts->{'title'}; 4282 unless ($title) { 4283 ( $title, my $path ) = fileparse($input_file); 4284 } 4285 my $toc_item_count = 0; 4286 my $in_toc_package = ""; 4287 my $last_level = 0; 4288 bless { 4289 _input_file => $input_file, # name of input file 4290 _title => $title, # title, unescaped 4291 _html_file => $html_file, # name of .html output file 4292 _toc_filename => $toc_filename, # for frames option 4293 _src_filename => $src_filename, # for frames option 4294 _html_file_opened => $html_file_opened, # a flag 4295 _html_fh => $html_fh, # the output stream 4296 _html_pre_fh => $html_pre_fh, # pre section goes here 4297 _rpre_string_stack => \@pre_string_stack, # stack of pre sections 4298 _html_pod_fh => $html_pod_fh, # pod goes here if pod2html 4299 _rpod_string => \$pod_string, # string holding pod 4300 _pod_cut_count => 0, # how many =cut's? 4301 _html_toc_fh => $html_toc_fh, # fh for table of contents 4302 _rtoc_string => \$toc_string, # string holding toc 4303 _rtoc_item_count => \$toc_item_count, # how many toc items 4304 _rin_toc_package => \$in_toc_package, # package name 4305 _rtoc_name_count => {}, # hash to track unique names 4306 _rpackage_stack => [], # stack to check for package 4307 # name changes 4308 _rlast_level => \$last_level, # brace indentation level 4309 }, $class; 4310} 4311 4312sub add_toc_item { 4313 4314 # Add an item to the html table of contents. 4315 # This is called even if no table of contents is written, 4316 # because we still want to put the anchors in the <pre> text. 4317 # We are given an anchor name and its type; types are: 4318 # 'package', 'sub', '__END__', '__DATA__', 'EOF' 4319 # There must be an 'EOF' call at the end to wrap things up. 4320 my $self = shift; 4321 my ( $name, $type ) = @_; 4322 my $html_toc_fh = $self->{_html_toc_fh}; 4323 my $html_pre_fh = $self->{_html_pre_fh}; 4324 my $rtoc_name_count = $self->{_rtoc_name_count}; 4325 my $rtoc_item_count = $self->{_rtoc_item_count}; 4326 my $rlast_level = $self->{_rlast_level}; 4327 my $rin_toc_package = $self->{_rin_toc_package}; 4328 my $rpackage_stack = $self->{_rpackage_stack}; 4329 4330 # packages contain sublists of subs, so to avoid errors all package 4331 # items are written and finished with the following routines 4332 my $end_package_list = sub { 4333 if ($$rin_toc_package) { 4334 $html_toc_fh->print("</ul>\n</li>\n"); 4335 $$rin_toc_package = ""; 4336 } 4337 }; 4338 4339 my $start_package_list = sub { 4340 my ( $unique_name, $package ) = @_; 4341 if ($$rin_toc_package) { $end_package_list->() } 4342 $html_toc_fh->print(<<EOM); 4343<li><a href=\"#$unique_name\">package $package</a> 4344<ul> 4345EOM 4346 $$rin_toc_package = $package; 4347 }; 4348 4349 # start the table of contents on the first item 4350 unless ($$rtoc_item_count) { 4351 4352 # but just quit if we hit EOF without any other entries 4353 # in this case, there will be no toc 4354 return if ( $type eq 'EOF' ); 4355 $html_toc_fh->print( <<"TOC_END"); 4356<!-- BEGIN CODE INDEX --><a name="code-index"></a> 4357<ul> 4358TOC_END 4359 } 4360 $$rtoc_item_count++; 4361 4362 # make a unique anchor name for this location: 4363 # - packages get a 'package-' prefix 4364 # - subs use their names 4365 my $unique_name = $name; 4366 if ( $type eq 'package' ) { $unique_name = "package-$name" } 4367 4368 # append '-1', '-2', etc if necessary to make unique; this will 4369 # be unique because subs and packages cannot have a '-' 4370 if ( my $count = $rtoc_name_count->{ lc $unique_name }++ ) { 4371 $unique_name .= "-$count"; 4372 } 4373 4374 # - all names get terminal '-' if pod2html is used, to avoid 4375 # conflicts with anchor names created by pod2html 4376 if ( $rOpts->{'pod2html'} ) { $unique_name .= '-' } 4377 4378 # start/stop lists of subs 4379 if ( $type eq 'sub' ) { 4380 my $package = $rpackage_stack->[$$rlast_level]; 4381 unless ($package) { $package = 'main' } 4382 4383 # if we're already in a package/sub list, be sure its the right 4384 # package or else close it 4385 if ( $$rin_toc_package && $$rin_toc_package ne $package ) { 4386 $end_package_list->(); 4387 } 4388 4389 # start a package/sub list if necessary 4390 unless ($$rin_toc_package) { 4391 $start_package_list->( $unique_name, $package ); 4392 } 4393 } 4394 4395 # now write an entry in the toc for this item 4396 if ( $type eq 'package' ) { 4397 $start_package_list->( $unique_name, $name ); 4398 } 4399 elsif ( $type eq 'sub' ) { 4400 $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n"); 4401 } 4402 else { 4403 $end_package_list->(); 4404 $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n"); 4405 } 4406 4407 # write the anchor in the <pre> section 4408 $html_pre_fh->print("<a name=\"$unique_name\"></a>"); 4409 4410 # end the table of contents, if any, on the end of file 4411 if ( $type eq 'EOF' ) { 4412 $html_toc_fh->print( <<"TOC_END"); 4413</ul> 4414<!-- END CODE INDEX --> 4415TOC_END 4416 } 4417} 4418 4419BEGIN { 4420 4421 # This is the official list of tokens which may be identified by the 4422 # user. Long names are used as getopt keys. Short names are 4423 # convenient short abbreviations for specifying input. Short names 4424 # somewhat resemble token type characters, but are often different 4425 # because they may only be alphanumeric, to allow command line 4426 # input. Also, note that because of case insensitivity of html, 4427 # this table must be in a single case only (I've chosen to use all 4428 # lower case). 4429 # When adding NEW_TOKENS: update this hash table 4430 # short names => long names 4431 %short_to_long_names = ( 4432 'n' => 'numeric', 4433 'p' => 'paren', 4434 'q' => 'quote', 4435 's' => 'structure', 4436 'c' => 'comment', 4437 'v' => 'v-string', 4438 'cm' => 'comma', 4439 'w' => 'bareword', 4440 'co' => 'colon', 4441 'pu' => 'punctuation', 4442 'i' => 'identifier', 4443 'j' => 'label', 4444 'h' => 'here-doc-target', 4445 'hh' => 'here-doc-text', 4446 'k' => 'keyword', 4447 'sc' => 'semicolon', 4448 'm' => 'subroutine', 4449 'pd' => 'pod-text', 4450 ); 4451 4452 # Now we have to map actual token types into one of the above short 4453 # names; any token types not mapped will get 'punctuation' 4454 # properties. 4455 4456 # The values of this hash table correspond to the keys of the 4457 # previous hash table. 4458 # The keys of this hash table are token types and can be seen 4459 # by running with --dump-token-types (-dtt). 4460 4461 # When adding NEW_TOKENS: update this hash table 4462 # $type => $short_name 4463 %token_short_names = ( 4464 '#' => 'c', 4465 'n' => 'n', 4466 'v' => 'v', 4467 'k' => 'k', 4468 'F' => 'k', 4469 'Q' => 'q', 4470 'q' => 'q', 4471 'J' => 'j', 4472 'j' => 'j', 4473 'h' => 'h', 4474 'H' => 'hh', 4475 'w' => 'w', 4476 ',' => 'cm', 4477 '=>' => 'cm', 4478 ';' => 'sc', 4479 ':' => 'co', 4480 'f' => 'sc', 4481 '(' => 'p', 4482 ')' => 'p', 4483 'M' => 'm', 4484 'P' => 'pd', 4485 'A' => 'co', 4486 ); 4487 4488 # These token types will all be called identifiers for now 4489 # FIXME: need to separate user defined modules as separate type 4490 my @identifier = qw" i t U C Y Z G :: "; 4491 @token_short_names{@identifier} = ('i') x scalar(@identifier); 4492 4493 # These token types will be called 'structure' 4494 my @structure = qw" { } "; 4495 @token_short_names{@structure} = ('s') x scalar(@structure); 4496 4497 # OLD NOTES: save for reference 4498 # Any of these could be added later if it would be useful. 4499 # For now, they will by default become punctuation 4500 # my @list = qw" L R [ ] "; 4501 # @token_long_names{@list} = ('non-structure') x scalar(@list); 4502 # 4503 # my @list = qw" 4504 # / /= * *= ** **= + += - -= % %= = ++ -- << <<= >> >>= pp p m mm 4505 # "; 4506 # @token_long_names{@list} = ('math') x scalar(@list); 4507 # 4508 # my @list = qw" & &= ~ ~= ^ ^= | |= "; 4509 # @token_long_names{@list} = ('bit') x scalar(@list); 4510 # 4511 # my @list = qw" == != < > <= <=> "; 4512 # @token_long_names{@list} = ('numerical-comparison') x scalar(@list); 4513 # 4514 # my @list = qw" && || ! &&= ||= //= "; 4515 # @token_long_names{@list} = ('logical') x scalar(@list); 4516 # 4517 # my @list = qw" . .= =~ !~ x x= "; 4518 # @token_long_names{@list} = ('string-operators') x scalar(@list); 4519 # 4520 # # Incomplete.. 4521 # my @list = qw" .. -> <> ... \ ? "; 4522 # @token_long_names{@list} = ('misc-operators') x scalar(@list); 4523 4524} 4525 4526sub make_getopt_long_names { 4527 my $class = shift; 4528 my ($rgetopt_names) = @_; 4529 while ( my ( $short_name, $name ) = each %short_to_long_names ) { 4530 push @$rgetopt_names, "html-color-$name=s"; 4531 push @$rgetopt_names, "html-italic-$name!"; 4532 push @$rgetopt_names, "html-bold-$name!"; 4533 } 4534 push @$rgetopt_names, "html-color-background=s"; 4535 push @$rgetopt_names, "html-linked-style-sheet=s"; 4536 push @$rgetopt_names, "nohtml-style-sheets"; 4537 push @$rgetopt_names, "html-pre-only"; 4538 push @$rgetopt_names, "html-line-numbers"; 4539 push @$rgetopt_names, "html-entities!"; 4540 push @$rgetopt_names, "stylesheet"; 4541 push @$rgetopt_names, "html-table-of-contents!"; 4542 push @$rgetopt_names, "pod2html!"; 4543 push @$rgetopt_names, "frames!"; 4544 push @$rgetopt_names, "html-toc-extension=s"; 4545 push @$rgetopt_names, "html-src-extension=s"; 4546 4547 # Pod::Html parameters: 4548 push @$rgetopt_names, "backlink=s"; 4549 push @$rgetopt_names, "cachedir=s"; 4550 push @$rgetopt_names, "htmlroot=s"; 4551 push @$rgetopt_names, "libpods=s"; 4552 push @$rgetopt_names, "podpath=s"; 4553 push @$rgetopt_names, "podroot=s"; 4554 push @$rgetopt_names, "title=s"; 4555 4556 # Pod::Html parameters with leading 'pod' which will be removed 4557 # before the call to Pod::Html 4558 push @$rgetopt_names, "podquiet!"; 4559 push @$rgetopt_names, "podverbose!"; 4560 push @$rgetopt_names, "podrecurse!"; 4561 push @$rgetopt_names, "podflush"; 4562 push @$rgetopt_names, "podheader!"; 4563 push @$rgetopt_names, "podindex!"; 4564} 4565 4566sub make_abbreviated_names { 4567 4568 # We're appending things like this to the expansion list: 4569 # 'hcc' => [qw(html-color-comment)], 4570 # 'hck' => [qw(html-color-keyword)], 4571 # etc 4572 my $class = shift; 4573 my ($rexpansion) = @_; 4574 4575 # abbreviations for color/bold/italic properties 4576 while ( my ( $short_name, $long_name ) = each %short_to_long_names ) { 4577 ${$rexpansion}{"hc$short_name"} = ["html-color-$long_name"]; 4578 ${$rexpansion}{"hb$short_name"} = ["html-bold-$long_name"]; 4579 ${$rexpansion}{"hi$short_name"} = ["html-italic-$long_name"]; 4580 ${$rexpansion}{"nhb$short_name"} = ["nohtml-bold-$long_name"]; 4581 ${$rexpansion}{"nhi$short_name"} = ["nohtml-italic-$long_name"]; 4582 } 4583 4584 # abbreviations for all other html options 4585 ${$rexpansion}{"hcbg"} = ["html-color-background"]; 4586 ${$rexpansion}{"pre"} = ["html-pre-only"]; 4587 ${$rexpansion}{"toc"} = ["html-table-of-contents"]; 4588 ${$rexpansion}{"ntoc"} = ["nohtml-table-of-contents"]; 4589 ${$rexpansion}{"nnn"} = ["html-line-numbers"]; 4590 ${$rexpansion}{"hent"} = ["html-entities"]; 4591 ${$rexpansion}{"nhent"} = ["nohtml-entities"]; 4592 ${$rexpansion}{"css"} = ["html-linked-style-sheet"]; 4593 ${$rexpansion}{"nss"} = ["nohtml-style-sheets"]; 4594 ${$rexpansion}{"ss"} = ["stylesheet"]; 4595 ${$rexpansion}{"pod"} = ["pod2html"]; 4596 ${$rexpansion}{"npod"} = ["nopod2html"]; 4597 ${$rexpansion}{"frm"} = ["frames"]; 4598 ${$rexpansion}{"nfrm"} = ["noframes"]; 4599 ${$rexpansion}{"text"} = ["html-toc-extension"]; 4600 ${$rexpansion}{"sext"} = ["html-src-extension"]; 4601} 4602 4603sub check_options { 4604 4605 # This will be called once after options have been parsed 4606 my $class = shift; 4607 $rOpts = shift; 4608 4609 # X11 color names for default settings that seemed to look ok 4610 # (these color names are only used for programming clarity; the hex 4611 # numbers are actually written) 4612 use constant ForestGreen => "#228B22"; 4613 use constant SaddleBrown => "#8B4513"; 4614 use constant magenta4 => "#8B008B"; 4615 use constant IndianRed3 => "#CD5555"; 4616 use constant DeepSkyBlue4 => "#00688B"; 4617 use constant MediumOrchid3 => "#B452CD"; 4618 use constant black => "#000000"; 4619 use constant white => "#FFFFFF"; 4620 use constant red => "#FF0000"; 4621 4622 # set default color, bold, italic properties 4623 # anything not listed here will be given the default (punctuation) color -- 4624 # these types currently not listed and get default: ws pu s sc cm co p 4625 # When adding NEW_TOKENS: add an entry here if you don't want defaults 4626 4627 # set_default_properties( $short_name, default_color, bold?, italic? ); 4628 set_default_properties( 'c', ForestGreen, 0, 0 ); 4629 set_default_properties( 'pd', ForestGreen, 0, 1 ); 4630 set_default_properties( 'k', magenta4, 1, 0 ); # was SaddleBrown 4631 set_default_properties( 'q', IndianRed3, 0, 0 ); 4632 set_default_properties( 'hh', IndianRed3, 0, 1 ); 4633 set_default_properties( 'h', IndianRed3, 1, 0 ); 4634 set_default_properties( 'i', DeepSkyBlue4, 0, 0 ); 4635 set_default_properties( 'w', black, 0, 0 ); 4636 set_default_properties( 'n', MediumOrchid3, 0, 0 ); 4637 set_default_properties( 'v', MediumOrchid3, 0, 0 ); 4638 set_default_properties( 'j', IndianRed3, 1, 0 ); 4639 set_default_properties( 'm', red, 1, 0 ); 4640 4641 set_default_color( 'html-color-background', white ); 4642 set_default_color( 'html-color-punctuation', black ); 4643 4644 # setup property lookup tables for tokens based on their short names 4645 # every token type has a short name, and will use these tables 4646 # to do the html markup 4647 while ( my ( $short_name, $long_name ) = each %short_to_long_names ) { 4648 $html_color{$short_name} = $rOpts->{"html-color-$long_name"}; 4649 $html_bold{$short_name} = $rOpts->{"html-bold-$long_name"}; 4650 $html_italic{$short_name} = $rOpts->{"html-italic-$long_name"}; 4651 } 4652 4653 # write style sheet to STDOUT and die if requested 4654 if ( defined( $rOpts->{'stylesheet'} ) ) { 4655 write_style_sheet_file('-'); 4656 exit 1; 4657 } 4658 4659 # make sure user gives a file name after -css 4660 if ( defined( $rOpts->{'html-linked-style-sheet'} ) ) { 4661 $css_linkname = $rOpts->{'html-linked-style-sheet'}; 4662 if ( $css_linkname =~ /^-/ ) { 4663 die "You must specify a valid filename after -css\n"; 4664 } 4665 } 4666 4667 # check for conflict 4668 if ( $css_linkname && $rOpts->{'nohtml-style-sheets'} ) { 4669 $rOpts->{'nohtml-style-sheets'} = 0; 4670 warning("You can't specify both -css and -nss; -nss ignored\n"); 4671 } 4672 4673 # write a style sheet file if necessary 4674 if ($css_linkname) { 4675 4676 # if the selected filename exists, don't write, because user may 4677 # have done some work by hand to create it; use backup name instead 4678 # Also, this will avoid a potential disaster in which the user 4679 # forgets to specify the style sheet, like this: 4680 # perltidy -html -css myfile1.pl myfile2.pl 4681 # This would cause myfile1.pl to parsed as the style sheet by GetOpts 4682 my $css_filename = $css_linkname; 4683 unless ( -e $css_filename ) { 4684 write_style_sheet_file($css_filename); 4685 } 4686 } 4687 $missing_html_entities = 1 unless $rOpts->{'html-entities'}; 4688} 4689 4690sub write_style_sheet_file { 4691 4692 my $css_filename = shift; 4693 my $fh; 4694 unless ( $fh = IO::File->new("> $css_filename") ) { 4695 die "can't open $css_filename: $!\n"; 4696 } 4697 write_style_sheet_data($fh); 4698 eval { $fh->close }; 4699} 4700 4701sub write_style_sheet_data { 4702 4703 # write the style sheet data to an open file handle 4704 my $fh = shift; 4705 4706 my $bg_color = $rOpts->{'html-color-background'}; 4707 my $text_color = $rOpts->{'html-color-punctuation'}; 4708 4709 # pre-bgcolor is new, and may not be defined 4710 my $pre_bg_color = $rOpts->{'html-pre-color-background'}; 4711 $pre_bg_color = $bg_color unless $pre_bg_color; 4712 4713 $fh->print(<<"EOM"); 4714/* default style sheet generated by perltidy */ 4715body {background: $bg_color; color: $text_color} 4716pre { color: $text_color; 4717 background: $pre_bg_color; 4718 font-family: courier; 4719 } 4720 4721EOM 4722 4723 foreach my $short_name ( sort keys %short_to_long_names ) { 4724 my $long_name = $short_to_long_names{$short_name}; 4725 4726 my $abbrev = '.' . $short_name; 4727 if ( length($short_name) == 1 ) { $abbrev .= ' ' } # for alignment 4728 my $color = $html_color{$short_name}; 4729 if ( !defined($color) ) { $color = $text_color } 4730 $fh->print("$abbrev \{ color: $color;"); 4731 4732 if ( $html_bold{$short_name} ) { 4733 $fh->print(" font-weight:bold;"); 4734 } 4735 4736 if ( $html_italic{$short_name} ) { 4737 $fh->print(" font-style:italic;"); 4738 } 4739 $fh->print("} /* $long_name */\n"); 4740 } 4741} 4742 4743sub set_default_color { 4744 4745 # make sure that options hash $rOpts->{$key} contains a valid color 4746 my ( $key, $color ) = @_; 4747 if ( $rOpts->{$key} ) { $color = $rOpts->{$key} } 4748 $rOpts->{$key} = check_RGB($color); 4749} 4750 4751sub check_RGB { 4752 4753 # if color is a 6 digit hex RGB value, prepend a #, otherwise 4754 # assume that it is a valid ascii color name 4755 my ($color) = @_; 4756 if ( $color =~ /^[0-9a-fA-F]{6,6}$/ ) { $color = "#$color" } 4757 return $color; 4758} 4759 4760sub set_default_properties { 4761 my ( $short_name, $color, $bold, $italic ) = @_; 4762 4763 set_default_color( "html-color-$short_to_long_names{$short_name}", $color ); 4764 my $key; 4765 $key = "html-bold-$short_to_long_names{$short_name}"; 4766 $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $bold; 4767 $key = "html-italic-$short_to_long_names{$short_name}"; 4768 $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $italic; 4769} 4770 4771sub pod_to_html { 4772 4773 # Use Pod::Html to process the pod and make the page 4774 # then merge the perltidy code sections into it. 4775 # return 1 if success, 0 otherwise 4776 my $self = shift; 4777 my ( $pod_string, $css_string, $toc_string, $rpre_string_stack ) = @_; 4778 my $input_file = $self->{_input_file}; 4779 my $title = $self->{_title}; 4780 my $success_flag = 0; 4781 4782 # don't try to use pod2html if no pod 4783 unless ($pod_string) { 4784 return $success_flag; 4785 } 4786 4787 # Pod::Html requires a real temporary filename 4788 # If we are making a frame, we have a name available 4789 # Otherwise, we have to fine one 4790 my $tmpfile; 4791 if ( $rOpts->{'frames'} ) { 4792 $tmpfile = $self->{_toc_filename}; 4793 } 4794 else { 4795 $tmpfile = Perl::Tidy::make_temporary_filename(); 4796 } 4797 my $fh_tmp = IO::File->new( $tmpfile, 'w' ); 4798 unless ($fh_tmp) { 4799 warn "unable to open temporary file $tmpfile; cannot use pod2html\n"; 4800 return $success_flag; 4801 } 4802 4803 #------------------------------------------------------------------ 4804 # Warning: a temporary file is open; we have to clean up if 4805 # things go bad. From here on all returns should be by going to 4806 # RETURN so that the temporary file gets unlinked. 4807 #------------------------------------------------------------------ 4808 4809 # write the pod text to the temporary file 4810 $fh_tmp->print($pod_string); 4811 $fh_tmp->close(); 4812 4813 # Hand off the pod to pod2html. 4814 # Note that we can use the same temporary filename for input and output 4815 # because of the way pod2html works. 4816 { 4817 4818 my @args; 4819 push @args, "--infile=$tmpfile", "--outfile=$tmpfile", "--title=$title"; 4820 my $kw; 4821 4822 # Flags with string args: 4823 # "backlink=s", "cachedir=s", "htmlroot=s", "libpods=s", 4824 # "podpath=s", "podroot=s" 4825 # Note: -css=s is handled by perltidy itself 4826 foreach $kw (qw(backlink cachedir htmlroot libpods podpath podroot)) { 4827 if ( $rOpts->{$kw} ) { push @args, "--$kw=$rOpts->{$kw}" } 4828 } 4829 4830 # Toggle switches; these have extra leading 'pod' 4831 # "header!", "index!", "recurse!", "quiet!", "verbose!" 4832 foreach $kw (qw(podheader podindex podrecurse podquiet podverbose)) { 4833 my $kwd = $kw; # allows us to strip 'pod' 4834 if ( $rOpts->{$kw} ) { $kwd =~ s/^pod//; push @args, "--$kwd" } 4835 elsif ( defined( $rOpts->{$kw} ) ) { 4836 $kwd =~ s/^pod//; 4837 push @args, "--no$kwd"; 4838 } 4839 } 4840 4841 # "flush", 4842 $kw = 'podflush'; 4843 if ( $rOpts->{$kw} ) { $kw =~ s/^pod//; push @args, "--$kw" } 4844 4845 # Must clean up if pod2html dies (it can); 4846 # Be careful not to overwrite callers __DIE__ routine 4847 local $SIG{__DIE__} = sub { 4848 print $_[0]; 4849 unlink $tmpfile if -e $tmpfile; 4850 exit 1; 4851 }; 4852 4853 pod2html(@args); 4854 } 4855 $fh_tmp = IO::File->new( $tmpfile, 'r' ); 4856 unless ($fh_tmp) { 4857 4858 # this error shouldn't happen ... we just used this filename 4859 warn "unable to open temporary file $tmpfile; cannot use pod2html\n"; 4860 goto RETURN; 4861 } 4862 4863 my $html_fh = $self->{_html_fh}; 4864 my @toc; 4865 my $in_toc; 4866 my $no_print; 4867 4868 # This routine will write the html selectively and store the toc 4869 my $html_print = sub { 4870 foreach (@_) { 4871 $html_fh->print($_) unless ($no_print); 4872 if ($in_toc) { push @toc, $_ } 4873 } 4874 }; 4875 4876 # loop over lines of html output from pod2html and merge in 4877 # the necessary perltidy html sections 4878 my ( $saw_body, $saw_index, $saw_body_end ); 4879 while ( my $line = $fh_tmp->getline() ) { 4880 4881 if ( $line =~ /^\s*<html>\s*$/i ) { 4882 my $date = localtime; 4883 $html_print->("<!-- Generated by perltidy on $date -->\n"); 4884 $html_print->($line); 4885 } 4886 4887 # Copy the perltidy css, if any, after <body> tag 4888 elsif ( $line =~ /^\s*<body.*>\s*$/i ) { 4889 $saw_body = 1; 4890 $html_print->($css_string) if $css_string; 4891 $html_print->($line); 4892 4893 # add a top anchor and heading 4894 $html_print->("<a name=\"-top-\"></a>\n"); 4895 $title = escape_html($title); 4896 $html_print->("<h1>$title</h1>\n"); 4897 } 4898 elsif ( $line =~ /^\s*<!-- INDEX BEGIN -->\s*$/i ) { 4899 $in_toc = 1; 4900 4901 # when frames are used, an extra table of contents in the 4902 # contents panel is confusing, so don't print it 4903 $no_print = $rOpts->{'frames'} 4904 || !$rOpts->{'html-table-of-contents'}; 4905 $html_print->("<h2>Doc Index:</h2>\n") if $rOpts->{'frames'}; 4906 $html_print->($line); 4907 } 4908 4909 # Copy the perltidy toc, if any, after the Pod::Html toc 4910 elsif ( $line =~ /^\s*<!-- INDEX END -->\s*$/i ) { 4911 $saw_index = 1; 4912 $html_print->($line); 4913 if ($toc_string) { 4914 $html_print->("<hr />\n") if $rOpts->{'frames'}; 4915 $html_print->("<h2>Code Index:</h2>\n"); 4916 my @toc = map { $_ .= "\n" } split /\n/, $toc_string; 4917 $html_print->(@toc); 4918 } 4919 $in_toc = 0; 4920 $no_print = 0; 4921 } 4922 4923 # Copy one perltidy section after each marker 4924 elsif ( $line =~ /^(.*)<!-- pERLTIDY sECTION -->(.*)$/ ) { 4925 $line = $2; 4926 $html_print->($1) if $1; 4927 4928 # Intermingle code and pod sections if we saw multiple =cut's. 4929 if ( $self->{_pod_cut_count} > 1 ) { 4930 my $rpre_string = shift(@$rpre_string_stack); 4931 if ($$rpre_string) { 4932 $html_print->('<pre>'); 4933 $html_print->($$rpre_string); 4934 $html_print->('</pre>'); 4935 } 4936 else { 4937 4938 # shouldn't happen: we stored a string before writing 4939 # each marker. 4940 warn 4941"Problem merging html stream with pod2html; order may be wrong\n"; 4942 } 4943 $html_print->($line); 4944 } 4945 4946 # If didn't see multiple =cut lines, we'll put the pod out first 4947 # and then the code, because it's less confusing. 4948 else { 4949 4950 # since we are not intermixing code and pod, we don't need 4951 # or want any <hr> lines which separated pod and code 4952 $html_print->($line) unless ( $line =~ /^\s*<hr>\s*$/i ); 4953 } 4954 } 4955 4956 # Copy any remaining code section before the </body> tag 4957 elsif ( $line =~ /^\s*<\/body>\s*$/i ) { 4958 $saw_body_end = 1; 4959 if (@$rpre_string_stack) { 4960 unless ( $self->{_pod_cut_count} > 1 ) { 4961 $html_print->('<hr />'); 4962 } 4963 while ( my $rpre_string = shift(@$rpre_string_stack) ) { 4964 $html_print->('<pre>'); 4965 $html_print->($$rpre_string); 4966 $html_print->('</pre>'); 4967 } 4968 } 4969 $html_print->($line); 4970 } 4971 else { 4972 $html_print->($line); 4973 } 4974 } 4975 4976 $success_flag = 1; 4977 unless ($saw_body) { 4978 warn "Did not see <body> in pod2html output\n"; 4979 $success_flag = 0; 4980 } 4981 unless ($saw_body_end) { 4982 warn "Did not see </body> in pod2html output\n"; 4983 $success_flag = 0; 4984 } 4985 unless ($saw_index) { 4986 warn "Did not find INDEX END in pod2html output\n"; 4987 $success_flag = 0; 4988 } 4989 4990 RETURN: 4991 eval { $html_fh->close() }; 4992 4993 # note that we have to unlink tmpfile before making frames 4994 # because the tmpfile may be one of the names used for frames 4995 unlink $tmpfile if -e $tmpfile; 4996 if ( $success_flag && $rOpts->{'frames'} ) { 4997 $self->make_frame( \@toc ); 4998 } 4999 return $success_flag; 5000} 5001 5002sub make_frame { 5003 5004 # Make a frame with table of contents in the left panel 5005 # and the text in the right panel. 5006 # On entry: 5007 # $html_filename contains the no-frames html output 5008 # $rtoc is a reference to an array with the table of contents 5009 my $self = shift; 5010 my ($rtoc) = @_; 5011 my $input_file = $self->{_input_file}; 5012 my $html_filename = $self->{_html_file}; 5013 my $toc_filename = $self->{_toc_filename}; 5014 my $src_filename = $self->{_src_filename}; 5015 my $title = $self->{_title}; 5016 $title = escape_html($title); 5017 5018 # FUTURE input parameter: 5019 my $top_basename = ""; 5020 5021 # We need to produce 3 html files: 5022 # 1. - the table of contents 5023 # 2. - the contents (source code) itself 5024 # 3. - the frame which contains them 5025 5026 # get basenames for relative links 5027 my ( $toc_basename, $toc_path ) = fileparse($toc_filename); 5028 my ( $src_basename, $src_path ) = fileparse($src_filename); 5029 5030 # 1. Make the table of contents panel, with appropriate changes 5031 # to the anchor names 5032 my $src_frame_name = 'SRC'; 5033 my $first_anchor = 5034 write_toc_html( $title, $toc_filename, $src_basename, $rtoc, 5035 $src_frame_name ); 5036 5037 # 2. The current .html filename is renamed to be the contents panel 5038 rename( $html_filename, $src_filename ) 5039 or die "Cannot rename $html_filename to $src_filename:$!\n"; 5040 5041 # 3. Then use the original html filename for the frame 5042 write_frame_html( 5043 $title, $html_filename, $top_basename, 5044 $toc_basename, $src_basename, $src_frame_name 5045 ); 5046} 5047 5048sub write_toc_html { 5049 5050 # write a separate html table of contents file for frames 5051 my ( $title, $toc_filename, $src_basename, $rtoc, $src_frame_name ) = @_; 5052 my $fh = IO::File->new( $toc_filename, 'w' ) 5053 or die "Cannot open $toc_filename:$!\n"; 5054 $fh->print(<<EOM); 5055<html> 5056<head> 5057<title>$title</title> 5058</head> 5059<body> 5060<h1><a href=\"$src_basename#-top-" target="$src_frame_name">$title</a></h1> 5061EOM 5062 5063 my $first_anchor = 5064 change_anchor_names( $rtoc, $src_basename, "$src_frame_name" ); 5065 $fh->print( join "", @$rtoc ); 5066 5067 $fh->print(<<EOM); 5068</body> 5069</html> 5070EOM 5071 5072} 5073 5074sub write_frame_html { 5075 5076 # write an html file to be the table of contents frame 5077 my ( 5078 $title, $frame_filename, $top_basename, 5079 $toc_basename, $src_basename, $src_frame_name 5080 ) = @_; 5081 5082 my $fh = IO::File->new( $frame_filename, 'w' ) 5083 or die "Cannot open $toc_basename:$!\n"; 5084 5085 $fh->print(<<EOM); 5086<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN" 5087 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd"> 5088<?xml version="1.0" encoding="iso-8859-1" ?> 5089<html xmlns="http://www.w3.org/1999/xhtml"> 5090<head> 5091<title>$title</title> 5092</head> 5093EOM 5094 5095 # two left panels, one right, if master index file 5096 if ($top_basename) { 5097 $fh->print(<<EOM); 5098<frameset cols="20%,80%"> 5099<frameset rows="30%,70%"> 5100<frame src = "$top_basename" /> 5101<frame src = "$toc_basename" /> 5102</frameset> 5103EOM 5104 } 5105 5106 # one left panels, one right, if no master index file 5107 else { 5108 $fh->print(<<EOM); 5109<frameset cols="20%,*"> 5110<frame src = "$toc_basename" /> 5111EOM 5112 } 5113 $fh->print(<<EOM); 5114<frame src = "$src_basename" name = "$src_frame_name" /> 5115<noframes> 5116<body> 5117<p>If you see this message, you are using a non-frame-capable web client.</p> 5118<p>This document contains:</p> 5119<ul> 5120<li><a href="$toc_basename">A table of contents</a></li> 5121<li><a href="$src_basename">The source code</a></li> 5122</ul> 5123</body> 5124</noframes> 5125</frameset> 5126</html> 5127EOM 5128} 5129 5130sub change_anchor_names { 5131 5132 # add a filename and target to anchors 5133 # also return the first anchor 5134 my ( $rlines, $filename, $target ) = @_; 5135 my $first_anchor; 5136 foreach my $line (@$rlines) { 5137 5138 # We're looking for lines like this: 5139 # <LI><A HREF="#synopsis">SYNOPSIS</A></LI> 5140 # ---- - -------- ----------------- 5141 # $1 $4 $5 5142 if ( $line =~ /^(.*)<a(.*)href\s*=\s*"([^#]*)#([^"]+)"[^>]*>(.*)$/i ) { 5143 my $pre = $1; 5144 my $name = $4; 5145 my $post = $5; 5146 my $href = "$filename#$name"; 5147 $line = "$pre<a href=\"$href\" target=\"$target\">$post\n"; 5148 unless ($first_anchor) { $first_anchor = $href } 5149 } 5150 } 5151 return $first_anchor; 5152} 5153 5154sub close_html_file { 5155 my $self = shift; 5156 return unless $self->{_html_file_opened}; 5157 5158 my $html_fh = $self->{_html_fh}; 5159 my $rtoc_string = $self->{_rtoc_string}; 5160 5161 # There are 3 basic paths to html output... 5162 5163 # --------------------------------- 5164 # Path 1: finish up if in -pre mode 5165 # --------------------------------- 5166 if ( $rOpts->{'html-pre-only'} ) { 5167 $html_fh->print( <<"PRE_END"); 5168</pre> 5169PRE_END 5170 eval { $html_fh->close() }; 5171 return; 5172 } 5173 5174 # Finish the index 5175 $self->add_toc_item( 'EOF', 'EOF' ); 5176 5177 my $rpre_string_stack = $self->{_rpre_string_stack}; 5178 5179 # Patch to darken the <pre> background color in case of pod2html and 5180 # interleaved code/documentation. Otherwise, the distinction 5181 # between code and documentation is blurred. 5182 if ( $rOpts->{pod2html} 5183 && $self->{_pod_cut_count} >= 1 5184 && $rOpts->{'html-color-background'} eq '#FFFFFF' ) 5185 { 5186 $rOpts->{'html-pre-color-background'} = '#F0F0F0'; 5187 } 5188 5189 # put the css or its link into a string, if used 5190 my $css_string; 5191 my $fh_css = Perl::Tidy::IOScalar->new( \$css_string, 'w' ); 5192 5193 # use css linked to another file 5194 if ( $rOpts->{'html-linked-style-sheet'} ) { 5195 $fh_css->print( 5196 qq(<link rel="stylesheet" href="$css_linkname" type="text/css" />) 5197 ); 5198 } 5199 5200 # use css embedded in this file 5201 elsif ( !$rOpts->{'nohtml-style-sheets'} ) { 5202 $fh_css->print( <<'ENDCSS'); 5203<style type="text/css"> 5204<!-- 5205ENDCSS 5206 write_style_sheet_data($fh_css); 5207 $fh_css->print( <<"ENDCSS"); 5208--> 5209</style> 5210ENDCSS 5211 } 5212 5213 # ----------------------------------------------------------- 5214 # path 2: use pod2html if requested 5215 # If we fail for some reason, continue on to path 3 5216 # ----------------------------------------------------------- 5217 if ( $rOpts->{'pod2html'} ) { 5218 my $rpod_string = $self->{_rpod_string}; 5219 $self->pod_to_html( $$rpod_string, $css_string, $$rtoc_string, 5220 $rpre_string_stack ) 5221 && return; 5222 } 5223 5224 # -------------------------------------------------- 5225 # path 3: write code in html, with pod only in italics 5226 # -------------------------------------------------- 5227 my $input_file = $self->{_input_file}; 5228 my $title = escape_html($input_file); 5229 my $date = localtime; 5230 $html_fh->print( <<"HTML_START"); 5231<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 5232 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> 5233<!-- Generated by perltidy on $date --> 5234<html xmlns="http://www.w3.org/1999/xhtml"> 5235<head> 5236<title>$title</title> 5237HTML_START 5238 5239 # output the css, if used 5240 if ($css_string) { 5241 $html_fh->print($css_string); 5242 $html_fh->print( <<"ENDCSS"); 5243</head> 5244<body> 5245ENDCSS 5246 } 5247 else { 5248 5249 $html_fh->print( <<"HTML_START"); 5250</head> 5251<body bgcolor=\"$rOpts->{'html-color-background'}\" text=\"$rOpts->{'html-color-punctuation'}\"> 5252HTML_START 5253 } 5254 5255 $html_fh->print("<a name=\"-top-\"></a>\n"); 5256 $html_fh->print( <<"EOM"); 5257<h1>$title</h1> 5258EOM 5259 5260 # copy the table of contents 5261 if ( $$rtoc_string 5262 && !$rOpts->{'frames'} 5263 && $rOpts->{'html-table-of-contents'} ) 5264 { 5265 $html_fh->print($$rtoc_string); 5266 } 5267 5268 # copy the pre section(s) 5269 my $fname_comment = $input_file; 5270 $fname_comment =~ s/--+/-/g; # protect HTML comment tags 5271 $html_fh->print( <<"END_PRE"); 5272<hr /> 5273<!-- contents of filename: $fname_comment --> 5274<pre> 5275END_PRE 5276 5277 foreach my $rpre_string (@$rpre_string_stack) { 5278 $html_fh->print($$rpre_string); 5279 } 5280 5281 # and finish the html page 5282 $html_fh->print( <<"HTML_END"); 5283</pre> 5284</body> 5285</html> 5286HTML_END 5287 eval { $html_fh->close() }; # could be object without close method 5288 5289 if ( $rOpts->{'frames'} ) { 5290 my @toc = map { $_ .= "\n" } split /\n/, $$rtoc_string; 5291 $self->make_frame( \@toc ); 5292 } 5293} 5294 5295sub markup_tokens { 5296 my $self = shift; 5297 my ( $rtokens, $rtoken_type, $rlevels ) = @_; 5298 my ( @colored_tokens, $j, $string, $type, $token, $level ); 5299 my $rlast_level = $self->{_rlast_level}; 5300 my $rpackage_stack = $self->{_rpackage_stack}; 5301 5302 for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) { 5303 $type = $$rtoken_type[$j]; 5304 $token = $$rtokens[$j]; 5305 $level = $$rlevels[$j]; 5306 $level = 0 if ( $level < 0 ); 5307 5308 #------------------------------------------------------- 5309 # Update the package stack. The package stack is needed to keep 5310 # the toc correct because some packages may be declared within 5311 # blocks and go out of scope when we leave the block. 5312 #------------------------------------------------------- 5313 if ( $level > $$rlast_level ) { 5314 unless ( $rpackage_stack->[ $level - 1 ] ) { 5315 $rpackage_stack->[ $level - 1 ] = 'main'; 5316 } 5317 $rpackage_stack->[$level] = $rpackage_stack->[ $level - 1 ]; 5318 } 5319 elsif ( $level < $$rlast_level ) { 5320 my $package = $rpackage_stack->[$level]; 5321 unless ($package) { $package = 'main' } 5322 5323 # if we change packages due to a nesting change, we 5324 # have to make an entry in the toc 5325 if ( $package ne $rpackage_stack->[ $level + 1 ] ) { 5326 $self->add_toc_item( $package, 'package' ); 5327 } 5328 } 5329 $$rlast_level = $level; 5330 5331 #------------------------------------------------------- 5332 # Intercept a sub name here; split it 5333 # into keyword 'sub' and sub name; and add an 5334 # entry in the toc 5335 #------------------------------------------------------- 5336 if ( $type eq 'i' && $token =~ /^(sub\s+)(\w.*)$/ ) { 5337 $token = $self->markup_html_element( $1, 'k' ); 5338 push @colored_tokens, $token; 5339 $token = $2; 5340 $type = 'M'; 5341 5342 # but don't include sub declarations in the toc; 5343 # these wlll have leading token types 'i;' 5344 my $signature = join "", @$rtoken_type; 5345 unless ( $signature =~ /^i;/ ) { 5346 my $subname = $token; 5347 $subname =~ s/[\s\(].*$//; # remove any attributes and prototype 5348 $self->add_toc_item( $subname, 'sub' ); 5349 } 5350 } 5351 5352 #------------------------------------------------------- 5353 # Intercept a package name here; split it 5354 # into keyword 'package' and name; add to the toc, 5355 # and update the package stack 5356 #------------------------------------------------------- 5357 if ( $type eq 'i' && $token =~ /^(package\s+)(\w.*)$/ ) { 5358 $token = $self->markup_html_element( $1, 'k' ); 5359 push @colored_tokens, $token; 5360 $token = $2; 5361 $type = 'i'; 5362 $self->add_toc_item( "$token", 'package' ); 5363 $rpackage_stack->[$level] = $token; 5364 } 5365 5366 $token = $self->markup_html_element( $token, $type ); 5367 push @colored_tokens, $token; 5368 } 5369 return ( \@colored_tokens ); 5370} 5371 5372sub markup_html_element { 5373 my $self = shift; 5374 my ( $token, $type ) = @_; 5375 5376 return $token if ( $type eq 'b' ); # skip a blank token 5377 return $token if ( $token =~ /^\s*$/ ); # skip a blank line 5378 $token = escape_html($token); 5379 5380 # get the short abbreviation for this token type 5381 my $short_name = $token_short_names{$type}; 5382 if ( !defined($short_name) ) { 5383 $short_name = "pu"; # punctuation is default 5384 } 5385 5386 # handle style sheets.. 5387 if ( !$rOpts->{'nohtml-style-sheets'} ) { 5388 if ( $short_name ne 'pu' ) { 5389 $token = qq(<span class="$short_name">) . $token . "</span>"; 5390 } 5391 } 5392 5393 # handle no style sheets.. 5394 else { 5395 my $color = $html_color{$short_name}; 5396 5397 if ( $color && ( $color ne $rOpts->{'html-color-punctuation'} ) ) { 5398 $token = qq(<font color="$color">) . $token . "</font>"; 5399 } 5400 if ( $html_italic{$short_name} ) { $token = "<i>$token</i>" } 5401 if ( $html_bold{$short_name} ) { $token = "<b>$token</b>" } 5402 } 5403 return $token; 5404} 5405 5406sub escape_html { 5407 5408 my $token = shift; 5409 if ($missing_html_entities) { 5410 $token =~ s/\&/&/g; 5411 $token =~ s/\</</g; 5412 $token =~ s/\>/>/g; 5413 $token =~ s/\"/"/g; 5414 } 5415 else { 5416 HTML::Entities::encode_entities($token); 5417 } 5418 return $token; 5419} 5420 5421sub finish_formatting { 5422 5423 # called after last line 5424 my $self = shift; 5425 $self->close_html_file(); 5426 return; 5427} 5428 5429sub write_line { 5430 5431 my $self = shift; 5432 return unless $self->{_html_file_opened}; 5433 my $html_pre_fh = $self->{_html_pre_fh}; 5434 my ($line_of_tokens) = @_; 5435 my $line_type = $line_of_tokens->{_line_type}; 5436 my $input_line = $line_of_tokens->{_line_text}; 5437 my $line_number = $line_of_tokens->{_line_number}; 5438 chomp $input_line; 5439 5440 # markup line of code.. 5441 my $html_line; 5442 if ( $line_type eq 'CODE' ) { 5443 my $rtoken_type = $line_of_tokens->{_rtoken_type}; 5444 my $rtokens = $line_of_tokens->{_rtokens}; 5445 my $rlevels = $line_of_tokens->{_rlevels}; 5446 5447 if ( $input_line =~ /(^\s*)/ ) { 5448 $html_line = $1; 5449 } 5450 else { 5451 $html_line = ""; 5452 } 5453 my ($rcolored_tokens) = 5454 $self->markup_tokens( $rtokens, $rtoken_type, $rlevels ); 5455 $html_line .= join '', @$rcolored_tokens; 5456 } 5457 5458 # markup line of non-code.. 5459 else { 5460 my $line_character; 5461 if ( $line_type eq 'HERE' ) { $line_character = 'H' } 5462 elsif ( $line_type eq 'HERE_END' ) { $line_character = 'h' } 5463 elsif ( $line_type eq 'FORMAT' ) { $line_character = 'H' } 5464 elsif ( $line_type eq 'FORMAT_END' ) { $line_character = 'h' } 5465 elsif ( $line_type eq 'SYSTEM' ) { $line_character = 'c' } 5466 elsif ( $line_type eq 'END_START' ) { 5467 $line_character = 'k'; 5468 $self->add_toc_item( '__END__', '__END__' ); 5469 } 5470 elsif ( $line_type eq 'DATA_START' ) { 5471 $line_character = 'k'; 5472 $self->add_toc_item( '__DATA__', '__DATA__' ); 5473 } 5474 elsif ( $line_type =~ /^POD/ ) { 5475 $line_character = 'P'; 5476 if ( $rOpts->{'pod2html'} ) { 5477 my $html_pod_fh = $self->{_html_pod_fh}; 5478 if ( $line_type eq 'POD_START' ) { 5479 5480 my $rpre_string_stack = $self->{_rpre_string_stack}; 5481 my $rpre_string = $rpre_string_stack->[-1]; 5482 5483 # if we have written any non-blank lines to the 5484 # current pre section, start writing to a new output 5485 # string 5486 if ( $$rpre_string =~ /\S/ ) { 5487 my $pre_string; 5488 $html_pre_fh = 5489 Perl::Tidy::IOScalar->new( \$pre_string, 'w' ); 5490 $self->{_html_pre_fh} = $html_pre_fh; 5491 push @$rpre_string_stack, \$pre_string; 5492 5493 # leave a marker in the pod stream so we know 5494 # where to put the pre section we just 5495 # finished. 5496 my $for_html = '=for html'; # don't confuse pod utils 5497 $html_pod_fh->print(<<EOM); 5498 5499$for_html 5500<!-- pERLTIDY sECTION --> 5501 5502EOM 5503 } 5504 5505 # otherwise, just clear the current string and start 5506 # over 5507 else { 5508 $$rpre_string = ""; 5509 $html_pod_fh->print("\n"); 5510 } 5511 } 5512 $html_pod_fh->print( $input_line . "\n" ); 5513 if ( $line_type eq 'POD_END' ) { 5514 $self->{_pod_cut_count}++; 5515 $html_pod_fh->print("\n"); 5516 } 5517 return; 5518 } 5519 } 5520 else { $line_character = 'Q' } 5521 $html_line = $self->markup_html_element( $input_line, $line_character ); 5522 } 5523 5524 # add the line number if requested 5525 if ( $rOpts->{'html-line-numbers'} ) { 5526 my $extra_space .= 5527 ( $line_number < 10 ) ? " " 5528 : ( $line_number < 100 ) ? " " 5529 : ( $line_number < 1000 ) ? " " 5530 : ""; 5531 $html_line = $extra_space . $line_number . " " . $html_line; 5532 } 5533 5534 # write the line 5535 $html_pre_fh->print("$html_line\n"); 5536} 5537 5538##################################################################### 5539# 5540# The Perl::Tidy::Formatter package adds indentation, whitespace, and 5541# line breaks to the token stream 5542# 5543# WARNING: This is not a real class for speed reasons. Only one 5544# Formatter may be used. 5545# 5546##################################################################### 5547 5548package Perl::Tidy::Formatter; 5549 5550BEGIN { 5551 5552 # Caution: these debug flags produce a lot of output 5553 # They should all be 0 except when debugging small scripts 5554 use constant FORMATTER_DEBUG_FLAG_BOND => 0; 5555 use constant FORMATTER_DEBUG_FLAG_BREAK => 0; 5556 use constant FORMATTER_DEBUG_FLAG_CI => 0; 5557 use constant FORMATTER_DEBUG_FLAG_FLUSH => 0; 5558 use constant FORMATTER_DEBUG_FLAG_FORCE => 0; 5559 use constant FORMATTER_DEBUG_FLAG_LIST => 0; 5560 use constant FORMATTER_DEBUG_FLAG_NOBREAK => 0; 5561 use constant FORMATTER_DEBUG_FLAG_OUTPUT => 0; 5562 use constant FORMATTER_DEBUG_FLAG_SPARSE => 0; 5563 use constant FORMATTER_DEBUG_FLAG_STORE => 0; 5564 use constant FORMATTER_DEBUG_FLAG_UNDOBP => 0; 5565 use constant FORMATTER_DEBUG_FLAG_WHITE => 0; 5566 5567 my $debug_warning = sub { 5568 print "FORMATTER_DEBUGGING with key $_[0]\n"; 5569 }; 5570 5571 FORMATTER_DEBUG_FLAG_BOND && $debug_warning->('BOND'); 5572 FORMATTER_DEBUG_FLAG_BREAK && $debug_warning->('BREAK'); 5573 FORMATTER_DEBUG_FLAG_CI && $debug_warning->('CI'); 5574 FORMATTER_DEBUG_FLAG_FLUSH && $debug_warning->('FLUSH'); 5575 FORMATTER_DEBUG_FLAG_FORCE && $debug_warning->('FORCE'); 5576 FORMATTER_DEBUG_FLAG_LIST && $debug_warning->('LIST'); 5577 FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK'); 5578 FORMATTER_DEBUG_FLAG_OUTPUT && $debug_warning->('OUTPUT'); 5579 FORMATTER_DEBUG_FLAG_SPARSE && $debug_warning->('SPARSE'); 5580 FORMATTER_DEBUG_FLAG_STORE && $debug_warning->('STORE'); 5581 FORMATTER_DEBUG_FLAG_UNDOBP && $debug_warning->('UNDOBP'); 5582 FORMATTER_DEBUG_FLAG_WHITE && $debug_warning->('WHITE'); 5583} 5584 5585use Carp; 5586use vars qw{ 5587 5588 @gnu_stack 5589 $max_gnu_stack_index 5590 $gnu_position_predictor 5591 $line_start_index_to_go 5592 $last_indentation_written 5593 $last_unadjusted_indentation 5594 $last_leading_token 5595 5596 $saw_VERSION_in_this_file 5597 $saw_END_or_DATA_ 5598 5599 @gnu_item_list 5600 $max_gnu_item_index 5601 $gnu_sequence_number 5602 $last_output_indentation 5603 %last_gnu_equals 5604 %gnu_comma_count 5605 %gnu_arrow_count 5606 5607 @block_type_to_go 5608 @type_sequence_to_go 5609 @container_environment_to_go 5610 @bond_strength_to_go 5611 @forced_breakpoint_to_go 5612 @lengths_to_go 5613 @levels_to_go 5614 @leading_spaces_to_go 5615 @reduced_spaces_to_go 5616 @matching_token_to_go 5617 @mate_index_to_go 5618 @nesting_blocks_to_go 5619 @ci_levels_to_go 5620 @nesting_depth_to_go 5621 @nobreak_to_go 5622 @old_breakpoint_to_go 5623 @tokens_to_go 5624 @types_to_go 5625 5626 %saved_opening_indentation 5627 5628 $max_index_to_go 5629 $comma_count_in_batch 5630 $old_line_count_in_batch 5631 $last_nonblank_index_to_go 5632 $last_nonblank_type_to_go 5633 $last_nonblank_token_to_go 5634 $last_last_nonblank_index_to_go 5635 $last_last_nonblank_type_to_go 5636 $last_last_nonblank_token_to_go 5637 @nonblank_lines_at_depth 5638 $starting_in_quote 5639 $ending_in_quote 5640 5641 $in_format_skipping_section 5642 $format_skipping_pattern_begin 5643 $format_skipping_pattern_end 5644 5645 $forced_breakpoint_count 5646 $forced_breakpoint_undo_count 5647 @forced_breakpoint_undo_stack 5648 %postponed_breakpoint 5649 5650 $tabbing 5651 $embedded_tab_count 5652 $first_embedded_tab_at 5653 $last_embedded_tab_at 5654 $deleted_semicolon_count 5655 $first_deleted_semicolon_at 5656 $last_deleted_semicolon_at 5657 $added_semicolon_count 5658 $first_added_semicolon_at 5659 $last_added_semicolon_at 5660 $first_tabbing_disagreement 5661 $last_tabbing_disagreement 5662 $in_tabbing_disagreement 5663 $tabbing_disagreement_count 5664 $input_line_tabbing 5665 5666 $last_line_type 5667 $last_line_leading_type 5668 $last_line_leading_level 5669 $last_last_line_leading_level 5670 5671 %block_leading_text 5672 %block_opening_line_number 5673 $csc_new_statement_ok 5674 $accumulating_text_for_block 5675 $leading_block_text 5676 $rleading_block_if_elsif_text 5677 $leading_block_text_level 5678 $leading_block_text_length_exceeded 5679 $leading_block_text_line_length 5680 $leading_block_text_line_number 5681 $closing_side_comment_prefix_pattern 5682 $closing_side_comment_list_pattern 5683 5684 $last_nonblank_token 5685 $last_nonblank_type 5686 $last_last_nonblank_token 5687 $last_last_nonblank_type 5688 $last_nonblank_block_type 5689 $last_output_level 5690 %is_do_follower 5691 %is_if_brace_follower 5692 %space_after_keyword 5693 $rbrace_follower 5694 $looking_for_else 5695 %is_last_next_redo_return 5696 %is_other_brace_follower 5697 %is_else_brace_follower 5698 %is_anon_sub_brace_follower 5699 %is_anon_sub_1_brace_follower 5700 %is_sort_map_grep 5701 %is_sort_map_grep_eval 5702 %is_sort_map_grep_eval_do 5703 %is_block_without_semicolon 5704 %is_if_unless 5705 %is_and_or 5706 %is_assignment 5707 %is_chain_operator 5708 %is_if_unless_and_or_last_next_redo_return 5709 %is_until_while_for_if_elsif_else 5710 5711 @has_broken_sublist 5712 @dont_align 5713 @want_comma_break 5714 5715 $is_static_block_comment 5716 $index_start_one_line_block 5717 $semicolons_before_block_self_destruct 5718 $index_max_forced_break 5719 $input_line_number 5720 $diagnostics_object 5721 $vertical_aligner_object 5722 $logger_object 5723 $file_writer_object 5724 $formatter_self 5725 @ci_stack 5726 $last_line_had_side_comment 5727 %want_break_before 5728 %outdent_keyword 5729 $static_block_comment_pattern 5730 $static_side_comment_pattern 5731 %opening_vertical_tightness 5732 %closing_vertical_tightness 5733 %closing_token_indentation 5734 5735 %opening_token_right 5736 %stack_opening_token 5737 %stack_closing_token 5738 5739 $block_brace_vertical_tightness_pattern 5740 5741 $rOpts_add_newlines 5742 $rOpts_add_whitespace 5743 $rOpts_block_brace_tightness 5744 $rOpts_block_brace_vertical_tightness 5745 $rOpts_brace_left_and_indent 5746 $rOpts_comma_arrow_breakpoints 5747 $rOpts_break_at_old_keyword_breakpoints 5748 $rOpts_break_at_old_comma_breakpoints 5749 $rOpts_break_at_old_logical_breakpoints 5750 $rOpts_break_at_old_ternary_breakpoints 5751 $rOpts_closing_side_comment_else_flag 5752 $rOpts_closing_side_comment_maximum_text 5753 $rOpts_continuation_indentation 5754 $rOpts_cuddled_else 5755 $rOpts_delete_old_whitespace 5756 $rOpts_fuzzy_line_length 5757 $rOpts_indent_columns 5758 $rOpts_line_up_parentheses 5759 $rOpts_maximum_fields_per_table 5760 $rOpts_maximum_line_length 5761 $rOpts_short_concatenation_item_length 5762 $rOpts_keep_old_blank_lines 5763 $rOpts_ignore_old_breakpoints 5764 $rOpts_format_skipping 5765 $rOpts_space_function_paren 5766 $rOpts_space_keyword_paren 5767 $rOpts_keep_interior_semicolons 5768 5769 $half_maximum_line_length 5770 5771 %is_opening_type 5772 %is_closing_type 5773 %is_keyword_returning_list 5774 %tightness 5775 %matching_token 5776 $rOpts 5777 %right_bond_strength 5778 %left_bond_strength 5779 %binary_ws_rules 5780 %want_left_space 5781 %want_right_space 5782 %is_digraph 5783 %is_trigraph 5784 $bli_pattern 5785 $bli_list_string 5786 %is_closing_type 5787 %is_opening_type 5788 %is_closing_token 5789 %is_opening_token 5790}; 5791 5792BEGIN { 5793 5794 # default list of block types for which -bli would apply 5795 $bli_list_string = 'if else elsif unless while for foreach do : sub'; 5796 5797 @_ = qw( 5798 .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <> 5799 <= >= == =~ !~ != ++ -- /= x= 5800 ); 5801 @is_digraph{@_} = (1) x scalar(@_); 5802 5803 @_ = qw( ... **= <<= >>= &&= ||= //= <=> ); 5804 @is_trigraph{@_} = (1) x scalar(@_); 5805 5806 @_ = qw( 5807 = **= += *= &= <<= &&= 5808 -= /= |= >>= ||= //= 5809 .= %= ^= 5810 x= 5811 ); 5812 @is_assignment{@_} = (1) x scalar(@_); 5813 5814 @_ = qw( 5815 grep 5816 keys 5817 map 5818 reverse 5819 sort 5820 split 5821 ); 5822 @is_keyword_returning_list{@_} = (1) x scalar(@_); 5823 5824 @_ = qw(is if unless and or err last next redo return); 5825 @is_if_unless_and_or_last_next_redo_return{@_} = (1) x scalar(@_); 5826 5827 # always break after a closing curly of these block types: 5828 @_ = qw(until while for if elsif else); 5829 @is_until_while_for_if_elsif_else{@_} = (1) x scalar(@_); 5830 5831 @_ = qw(last next redo return); 5832 @is_last_next_redo_return{@_} = (1) x scalar(@_); 5833 5834 @_ = qw(sort map grep); 5835 @is_sort_map_grep{@_} = (1) x scalar(@_); 5836 5837 @_ = qw(sort map grep eval); 5838 @is_sort_map_grep_eval{@_} = (1) x scalar(@_); 5839 5840 @_ = qw(sort map grep eval do); 5841 @is_sort_map_grep_eval_do{@_} = (1) x scalar(@_); 5842 5843 @_ = qw(if unless); 5844 @is_if_unless{@_} = (1) x scalar(@_); 5845 5846 @_ = qw(and or err); 5847 @is_and_or{@_} = (1) x scalar(@_); 5848 5849 # Identify certain operators which often occur in chains. 5850 # Note: the minus (-) causes a side effect of padding of the first line in 5851 # something like this (by sub set_logical_padding): 5852 # Checkbutton => 'Transmission checked', 5853 # -variable => \$TRANS 5854 # This usually improves appearance so it seems ok. 5855 @_ = qw(&& || and or : ? . + - * /); 5856 @is_chain_operator{@_} = (1) x scalar(@_); 5857 5858 # We can remove semicolons after blocks preceded by these keywords 5859 @_ = 5860 qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else 5861 unless while until for foreach); 5862 @is_block_without_semicolon{@_} = (1) x scalar(@_); 5863 5864 # 'L' is token for opening { at hash key 5865 @_ = qw" L { ( [ "; 5866 @is_opening_type{@_} = (1) x scalar(@_); 5867 5868 # 'R' is token for closing } at hash key 5869 @_ = qw" R } ) ] "; 5870 @is_closing_type{@_} = (1) x scalar(@_); 5871 5872 @_ = qw" { ( [ "; 5873 @is_opening_token{@_} = (1) x scalar(@_); 5874 5875 @_ = qw" } ) ] "; 5876 @is_closing_token{@_} = (1) x scalar(@_); 5877} 5878 5879# whitespace codes 5880use constant WS_YES => 1; 5881use constant WS_OPTIONAL => 0; 5882use constant WS_NO => -1; 5883 5884# Token bond strengths. 5885use constant NO_BREAK => 10000; 5886use constant VERY_STRONG => 100; 5887use constant STRONG => 2.1; 5888use constant NOMINAL => 1.1; 5889use constant WEAK => 0.8; 5890use constant VERY_WEAK => 0.55; 5891 5892# values for testing indexes in output array 5893use constant UNDEFINED_INDEX => -1; 5894 5895# Maximum number of little messages; probably need not be changed. 5896use constant MAX_NAG_MESSAGES => 6; 5897 5898# increment between sequence numbers for each type 5899# For example, ?: pairs might have numbers 7,11,15,... 5900use constant TYPE_SEQUENCE_INCREMENT => 4; 5901 5902{ 5903 5904 # methods to count instances 5905 my $_count = 0; 5906 sub get_count { $_count; } 5907 sub _increment_count { ++$_count } 5908 sub _decrement_count { --$_count } 5909} 5910 5911sub trim { 5912 5913 # trim leading and trailing whitespace from a string 5914 $_[0] =~ s/\s+$//; 5915 $_[0] =~ s/^\s+//; 5916 return $_[0]; 5917} 5918 5919sub split_words { 5920 5921 # given a string containing words separated by whitespace, 5922 # return the list of words 5923 my ($str) = @_; 5924 return unless $str; 5925 $str =~ s/\s+$//; 5926 $str =~ s/^\s+//; 5927 return split( /\s+/, $str ); 5928} 5929 5930# interface to Perl::Tidy::Logger routines 5931sub warning { 5932 if ($logger_object) { 5933 $logger_object->warning(@_); 5934 } 5935} 5936 5937sub complain { 5938 if ($logger_object) { 5939 $logger_object->complain(@_); 5940 } 5941} 5942 5943sub write_logfile_entry { 5944 if ($logger_object) { 5945 $logger_object->write_logfile_entry(@_); 5946 } 5947} 5948 5949sub black_box { 5950 if ($logger_object) { 5951 $logger_object->black_box(@_); 5952 } 5953} 5954 5955sub report_definite_bug { 5956 if ($logger_object) { 5957 $logger_object->report_definite_bug(); 5958 } 5959} 5960 5961sub get_saw_brace_error { 5962 if ($logger_object) { 5963 $logger_object->get_saw_brace_error(); 5964 } 5965} 5966 5967sub we_are_at_the_last_line { 5968 if ($logger_object) { 5969 $logger_object->we_are_at_the_last_line(); 5970 } 5971} 5972 5973# interface to Perl::Tidy::Diagnostics routine 5974sub write_diagnostics { 5975 5976 if ($diagnostics_object) { 5977 $diagnostics_object->write_diagnostics(@_); 5978 } 5979} 5980 5981sub get_added_semicolon_count { 5982 my $self = shift; 5983 return $added_semicolon_count; 5984} 5985 5986sub DESTROY { 5987 $_[0]->_decrement_count(); 5988} 5989 5990sub new { 5991 5992 my $class = shift; 5993 5994 # we are given an object with a write_line() method to take lines 5995 my %defaults = ( 5996 sink_object => undef, 5997 diagnostics_object => undef, 5998 logger_object => undef, 5999 ); 6000 my %args = ( %defaults, @_ ); 6001 6002 $logger_object = $args{logger_object}; 6003 $diagnostics_object = $args{diagnostics_object}; 6004 6005 # we create another object with a get_line() and peek_ahead() method 6006 my $sink_object = $args{sink_object}; 6007 $file_writer_object = 6008 Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object ); 6009 6010 # initialize the leading whitespace stack to negative levels 6011 # so that we can never run off the end of the stack 6012 $gnu_position_predictor = 0; # where the current token is predicted to be 6013 $max_gnu_stack_index = 0; 6014 $max_gnu_item_index = -1; 6015 $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 ); 6016 @gnu_item_list = (); 6017 $last_output_indentation = 0; 6018 $last_indentation_written = 0; 6019 $last_unadjusted_indentation = 0; 6020 $last_leading_token = ""; 6021 6022 $saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'}; 6023 $saw_END_or_DATA_ = 0; 6024 6025 @block_type_to_go = (); 6026 @type_sequence_to_go = (); 6027 @container_environment_to_go = (); 6028 @bond_strength_to_go = (); 6029 @forced_breakpoint_to_go = (); 6030 @lengths_to_go = (); # line length to start of ith token 6031 @levels_to_go = (); 6032 @matching_token_to_go = (); 6033 @mate_index_to_go = (); 6034 @nesting_blocks_to_go = (); 6035 @ci_levels_to_go = (); 6036 @nesting_depth_to_go = (0); 6037 @nobreak_to_go = (); 6038 @old_breakpoint_to_go = (); 6039 @tokens_to_go = (); 6040 @types_to_go = (); 6041 @leading_spaces_to_go = (); 6042 @reduced_spaces_to_go = (); 6043 6044 @dont_align = (); 6045 @has_broken_sublist = (); 6046 @want_comma_break = (); 6047 6048 @ci_stack = (""); 6049 $first_tabbing_disagreement = 0; 6050 $last_tabbing_disagreement = 0; 6051 $tabbing_disagreement_count = 0; 6052 $in_tabbing_disagreement = 0; 6053 $input_line_tabbing = undef; 6054 6055 $last_line_type = ""; 6056 $last_last_line_leading_level = 0; 6057 $last_line_leading_level = 0; 6058 $last_line_leading_type = '#'; 6059 6060 $last_nonblank_token = ';'; 6061 $last_nonblank_type = ';'; 6062 $last_last_nonblank_token = ';'; 6063 $last_last_nonblank_type = ';'; 6064 $last_nonblank_block_type = ""; 6065 $last_output_level = 0; 6066 $looking_for_else = 0; 6067 $embedded_tab_count = 0; 6068 $first_embedded_tab_at = 0; 6069 $last_embedded_tab_at = 0; 6070 $deleted_semicolon_count = 0; 6071 $first_deleted_semicolon_at = 0; 6072 $last_deleted_semicolon_at = 0; 6073 $added_semicolon_count = 0; 6074 $first_added_semicolon_at = 0; 6075 $last_added_semicolon_at = 0; 6076 $last_line_had_side_comment = 0; 6077 $is_static_block_comment = 0; 6078 %postponed_breakpoint = (); 6079 6080 # variables for adding side comments 6081 %block_leading_text = (); 6082 %block_opening_line_number = (); 6083 $csc_new_statement_ok = 1; 6084 6085 %saved_opening_indentation = (); 6086 $in_format_skipping_section = 0; 6087 6088 reset_block_text_accumulator(); 6089 6090 prepare_for_new_input_lines(); 6091 6092 $vertical_aligner_object = 6093 Perl::Tidy::VerticalAligner->initialize( $rOpts, $file_writer_object, 6094 $logger_object, $diagnostics_object ); 6095 6096 if ( $rOpts->{'entab-leading-whitespace'} ) { 6097 write_logfile_entry( 6098"Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n" 6099 ); 6100 } 6101 elsif ( $rOpts->{'tabs'} ) { 6102 write_logfile_entry("Indentation will be with a tab character\n"); 6103 } 6104 else { 6105 write_logfile_entry( 6106 "Indentation will be with $rOpts->{'indent-columns'} spaces\n"); 6107 } 6108 6109 # This was the start of a formatter referent, but object-oriented 6110 # coding has turned out to be too slow here. 6111 $formatter_self = {}; 6112 6113 bless $formatter_self, $class; 6114 6115 # Safety check..this is not a class yet 6116 if ( _increment_count() > 1 ) { 6117 confess 6118"Attempt to create more than 1 object in $class, which is not a true class yet\n"; 6119 } 6120 return $formatter_self; 6121} 6122 6123sub prepare_for_new_input_lines { 6124 6125 $gnu_sequence_number++; # increment output batch counter 6126 %last_gnu_equals = (); 6127 %gnu_comma_count = (); 6128 %gnu_arrow_count = (); 6129 $line_start_index_to_go = 0; 6130 $max_gnu_item_index = UNDEFINED_INDEX; 6131 $index_max_forced_break = UNDEFINED_INDEX; 6132 $max_index_to_go = UNDEFINED_INDEX; 6133 $last_nonblank_index_to_go = UNDEFINED_INDEX; 6134 $last_nonblank_type_to_go = ''; 6135 $last_nonblank_token_to_go = ''; 6136 $last_last_nonblank_index_to_go = UNDEFINED_INDEX; 6137 $last_last_nonblank_type_to_go = ''; 6138 $last_last_nonblank_token_to_go = ''; 6139 $forced_breakpoint_count = 0; 6140 $forced_breakpoint_undo_count = 0; 6141 $rbrace_follower = undef; 6142 $lengths_to_go[0] = 0; 6143 $old_line_count_in_batch = 1; 6144 $comma_count_in_batch = 0; 6145 $starting_in_quote = 0; 6146 6147 destroy_one_line_block(); 6148} 6149 6150sub write_line { 6151 6152 my $self = shift; 6153 my ($line_of_tokens) = @_; 6154 6155 my $line_type = $line_of_tokens->{_line_type}; 6156 my $input_line = $line_of_tokens->{_line_text}; 6157 6158 if ( $rOpts->{notidy} ) { 6159 write_unindented_line($input_line); 6160 $last_line_type = $line_type; 6161 return; 6162 } 6163 6164 # _line_type codes are: 6165 # SYSTEM - system-specific code before hash-bang line 6166 # CODE - line of perl code (including comments) 6167 # POD_START - line starting pod, such as '=head' 6168 # POD - pod documentation text 6169 # POD_END - last line of pod section, '=cut' 6170 # HERE - text of here-document 6171 # HERE_END - last line of here-doc (target word) 6172 # FORMAT - format section 6173 # FORMAT_END - last line of format section, '.' 6174 # DATA_START - __DATA__ line 6175 # DATA - unidentified text following __DATA__ 6176 # END_START - __END__ line 6177 # END - unidentified text following __END__ 6178 # ERROR - we are in big trouble, probably not a perl script 6179 6180 # put a blank line after an =cut which comes before __END__ and __DATA__ 6181 # (required by podchecker) 6182 if ( $last_line_type eq 'POD_END' && !$saw_END_or_DATA_ ) { 6183 $file_writer_object->reset_consecutive_blank_lines(); 6184 if ( $input_line !~ /^\s*$/ ) { want_blank_line() } 6185 } 6186 6187 # handle line of code.. 6188 if ( $line_type eq 'CODE' ) { 6189 6190 # let logger see all non-blank lines of code 6191 if ( $input_line !~ /^\s*$/ ) { 6192 my $output_line_number = 6193 $vertical_aligner_object->get_output_line_number(); 6194 black_box( $line_of_tokens, $output_line_number ); 6195 } 6196 print_line_of_tokens($line_of_tokens); 6197 } 6198 6199 # handle line of non-code.. 6200 else { 6201 6202 # set special flags 6203 my $skip_line = 0; 6204 my $tee_line = 0; 6205 if ( $line_type =~ /^POD/ ) { 6206 6207 # Pod docs should have a preceding blank line. But be 6208 # very careful in __END__ and __DATA__ sections, because: 6209 # 1. the user may be using this section for any purpose whatsoever 6210 # 2. the blank counters are not active there 6211 # It should be safe to request a blank line between an 6212 # __END__ or __DATA__ and an immediately following '=head' 6213 # type line, (types END_START and DATA_START), but not for 6214 # any other lines of type END or DATA. 6215 if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; } 6216 if ( $rOpts->{'tee-pod'} ) { $tee_line = 1; } 6217 if ( !$skip_line 6218 && $line_type eq 'POD_START' 6219 && $last_line_type !~ /^(END|DATA)$/ ) 6220 { 6221 want_blank_line(); 6222 } 6223 } 6224 6225 # leave the blank counters in a predictable state 6226 # after __END__ or __DATA__ 6227 elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) { 6228 $file_writer_object->reset_consecutive_blank_lines(); 6229 $saw_END_or_DATA_ = 1; 6230 } 6231 6232 # write unindented non-code line 6233 if ( !$skip_line ) { 6234 if ($tee_line) { $file_writer_object->tee_on() } 6235 write_unindented_line($input_line); 6236 if ($tee_line) { $file_writer_object->tee_off() } 6237 } 6238 } 6239 $last_line_type = $line_type; 6240} 6241 6242sub create_one_line_block { 6243 $index_start_one_line_block = $_[0]; 6244 $semicolons_before_block_self_destruct = $_[1]; 6245} 6246 6247sub destroy_one_line_block { 6248 $index_start_one_line_block = UNDEFINED_INDEX; 6249 $semicolons_before_block_self_destruct = 0; 6250} 6251 6252sub leading_spaces_to_go { 6253 6254 # return the number of indentation spaces for a token in the output stream; 6255 # these were previously stored by 'set_leading_whitespace'. 6256 6257 return get_SPACES( $leading_spaces_to_go[ $_[0] ] ); 6258 6259} 6260 6261sub get_SPACES { 6262 6263 # return the number of leading spaces associated with an indentation 6264 # variable $indentation is either a constant number of spaces or an object 6265 # with a get_SPACES method. 6266 my $indentation = shift; 6267 return ref($indentation) ? $indentation->get_SPACES() : $indentation; 6268} 6269 6270sub get_RECOVERABLE_SPACES { 6271 6272 # return the number of spaces (+ means shift right, - means shift left) 6273 # that we would like to shift a group of lines with the same indentation 6274 # to get them to line up with their opening parens 6275 my $indentation = shift; 6276 return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0; 6277} 6278 6279sub get_AVAILABLE_SPACES_to_go { 6280 6281 my $item = $leading_spaces_to_go[ $_[0] ]; 6282 6283 # return the number of available leading spaces associated with an 6284 # indentation variable. $indentation is either a constant number of 6285 # spaces or an object with a get_AVAILABLE_SPACES method. 6286 return ref($item) ? $item->get_AVAILABLE_SPACES() : 0; 6287} 6288 6289sub new_lp_indentation_item { 6290 6291 # this is an interface to the IndentationItem class 6292 my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_; 6293 6294 # A negative level implies not to store the item in the item_list 6295 my $index = 0; 6296 if ( $level >= 0 ) { $index = ++$max_gnu_item_index; } 6297 6298 my $item = Perl::Tidy::IndentationItem->new( 6299 $spaces, $level, 6300 $ci_level, $available_spaces, 6301 $index, $gnu_sequence_number, 6302 $align_paren, $max_gnu_stack_index, 6303 $line_start_index_to_go, 6304 ); 6305 6306 if ( $level >= 0 ) { 6307 $gnu_item_list[$max_gnu_item_index] = $item; 6308 } 6309 6310 return $item; 6311} 6312 6313sub set_leading_whitespace { 6314 6315 # This routine defines leading whitespace 6316 # given: the level and continuation_level of a token, 6317 # define: space count of leading string which would apply if it 6318 # were the first token of a new line. 6319 6320 my ( $level, $ci_level, $in_continued_quote ) = @_; 6321 6322 # modify for -bli, which adds one continuation indentation for 6323 # opening braces 6324 if ( $rOpts_brace_left_and_indent 6325 && $max_index_to_go == 0 6326 && $block_type_to_go[$max_index_to_go] =~ /$bli_pattern/o ) 6327 { 6328 $ci_level++; 6329 } 6330 6331 # patch to avoid trouble when input file has negative indentation. 6332 # other logic should catch this error. 6333 if ( $level < 0 ) { $level = 0 } 6334 6335 #------------------------------------------- 6336 # handle the standard indentation scheme 6337 #------------------------------------------- 6338 unless ($rOpts_line_up_parentheses) { 6339 my $space_count = 6340 $ci_level * $rOpts_continuation_indentation + 6341 $level * $rOpts_indent_columns; 6342 my $ci_spaces = 6343 ( $ci_level == 0 ) ? 0 : $rOpts_continuation_indentation; 6344 6345 if ($in_continued_quote) { 6346 $space_count = 0; 6347 $ci_spaces = 0; 6348 } 6349 $leading_spaces_to_go[$max_index_to_go] = $space_count; 6350 $reduced_spaces_to_go[$max_index_to_go] = $space_count - $ci_spaces; 6351 return; 6352 } 6353 6354 #------------------------------------------------------------- 6355 # handle case of -lp indentation.. 6356 #------------------------------------------------------------- 6357 6358 # The continued_quote flag means that this is the first token of a 6359 # line, and it is the continuation of some kind of multi-line quote 6360 # or pattern. It requires special treatment because it must have no 6361 # added leading whitespace. So we create a special indentation item 6362 # which is not in the stack. 6363 if ($in_continued_quote) { 6364 my $space_count = 0; 6365 my $available_space = 0; 6366 $level = -1; # flag to prevent storing in item_list 6367 $leading_spaces_to_go[$max_index_to_go] = 6368 $reduced_spaces_to_go[$max_index_to_go] = 6369 new_lp_indentation_item( $space_count, $level, $ci_level, 6370 $available_space, 0 ); 6371 return; 6372 } 6373 6374 # get the top state from the stack 6375 my $space_count = $gnu_stack[$max_gnu_stack_index]->get_SPACES(); 6376 my $current_level = $gnu_stack[$max_gnu_stack_index]->get_LEVEL(); 6377 my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL(); 6378 6379 my $type = $types_to_go[$max_index_to_go]; 6380 my $token = $tokens_to_go[$max_index_to_go]; 6381 my $total_depth = $nesting_depth_to_go[$max_index_to_go]; 6382 6383 if ( $type eq '{' || $type eq '(' ) { 6384 6385 $gnu_comma_count{ $total_depth + 1 } = 0; 6386 $gnu_arrow_count{ $total_depth + 1 } = 0; 6387 6388 # If we come to an opening token after an '=' token of some type, 6389 # see if it would be helpful to 'break' after the '=' to save space 6390 my $last_equals = $last_gnu_equals{$total_depth}; 6391 if ( $last_equals && $last_equals > $line_start_index_to_go ) { 6392 6393 # find the position if we break at the '=' 6394 my $i_test = $last_equals; 6395 if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ } 6396 6397 # TESTING 6398 ##my $too_close = ($i_test==$max_index_to_go-1); 6399 6400 my $test_position = total_line_length( $i_test, $max_index_to_go ); 6401 6402 if ( 6403 6404 # the equals is not just before an open paren (testing) 6405 ##!$too_close && 6406 6407 # if we are beyond the midpoint 6408 $gnu_position_predictor > $half_maximum_line_length 6409 6410 # or we are beyont the 1/4 point and there was an old 6411 # break at the equals 6412 || ( 6413 $gnu_position_predictor > $half_maximum_line_length / 2 6414 && ( 6415 $old_breakpoint_to_go[$last_equals] 6416 || ( $last_equals > 0 6417 && $old_breakpoint_to_go[ $last_equals - 1 ] ) 6418 || ( $last_equals > 1 6419 && $types_to_go[ $last_equals - 1 ] eq 'b' 6420 && $old_breakpoint_to_go[ $last_equals - 2 ] ) 6421 ) 6422 ) 6423 ) 6424 { 6425 6426 # then make the switch -- note that we do not set a real 6427 # breakpoint here because we may not really need one; sub 6428 # scan_list will do that if necessary 6429 $line_start_index_to_go = $i_test + 1; 6430 $gnu_position_predictor = $test_position; 6431 } 6432 } 6433 } 6434 6435 # Check for decreasing depth .. 6436 # Note that one token may have both decreasing and then increasing 6437 # depth. For example, (level, ci) can go from (1,1) to (2,0). So, 6438 # in this example we would first go back to (1,0) then up to (2,0) 6439 # in a single call. 6440 if ( $level < $current_level || $ci_level < $current_ci_level ) { 6441 6442 # loop to find the first entry at or completely below this level 6443 my ( $lev, $ci_lev ); 6444 while (1) { 6445 if ($max_gnu_stack_index) { 6446 6447 # save index of token which closes this level 6448 $gnu_stack[$max_gnu_stack_index]->set_CLOSED($max_index_to_go); 6449 6450 # Undo any extra indentation if we saw no commas 6451 my $available_spaces = 6452 $gnu_stack[$max_gnu_stack_index]->get_AVAILABLE_SPACES(); 6453 6454 my $comma_count = 0; 6455 my $arrow_count = 0; 6456 if ( $type eq '}' || $type eq ')' ) { 6457 $comma_count = $gnu_comma_count{$total_depth}; 6458 $arrow_count = $gnu_arrow_count{$total_depth}; 6459 $comma_count = 0 unless $comma_count; 6460 $arrow_count = 0 unless $arrow_count; 6461 } 6462 $gnu_stack[$max_gnu_stack_index]->set_COMMA_COUNT($comma_count); 6463 $gnu_stack[$max_gnu_stack_index]->set_ARROW_COUNT($arrow_count); 6464 6465 if ( $available_spaces > 0 ) { 6466 6467 if ( $comma_count <= 0 || $arrow_count > 0 ) { 6468 6469 my $i = $gnu_stack[$max_gnu_stack_index]->get_INDEX(); 6470 my $seqno = 6471 $gnu_stack[$max_gnu_stack_index] 6472 ->get_SEQUENCE_NUMBER(); 6473 6474 # Be sure this item was created in this batch. This 6475 # should be true because we delete any available 6476 # space from open items at the end of each batch. 6477 if ( $gnu_sequence_number != $seqno 6478 || $i > $max_gnu_item_index ) 6479 { 6480 warning( 6481"Program bug with -lp. seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n" 6482 ); 6483 report_definite_bug(); 6484 } 6485 6486 else { 6487 if ( $arrow_count == 0 ) { 6488 $gnu_item_list[$i] 6489 ->permanently_decrease_AVAILABLE_SPACES( 6490 $available_spaces); 6491 } 6492 else { 6493 $gnu_item_list[$i] 6494 ->tentatively_decrease_AVAILABLE_SPACES( 6495 $available_spaces); 6496 } 6497 6498 my $j; 6499 for ( 6500 $j = $i + 1 ; 6501 $j <= $max_gnu_item_index ; 6502 $j++ 6503 ) 6504 { 6505 $gnu_item_list[$j] 6506 ->decrease_SPACES($available_spaces); 6507 } 6508 } 6509 } 6510 } 6511 6512 # go down one level 6513 --$max_gnu_stack_index; 6514 $lev = $gnu_stack[$max_gnu_stack_index]->get_LEVEL(); 6515 $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL(); 6516 6517 # stop when we reach a level at or below the current level 6518 if ( $lev <= $level && $ci_lev <= $ci_level ) { 6519 $space_count = 6520 $gnu_stack[$max_gnu_stack_index]->get_SPACES(); 6521 $current_level = $lev; 6522 $current_ci_level = $ci_lev; 6523 last; 6524 } 6525 } 6526 6527 # reached bottom of stack .. should never happen because 6528 # only negative levels can get here, and $level was forced 6529 # to be positive above. 6530 else { 6531 warning( 6532"program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n" 6533 ); 6534 report_definite_bug(); 6535 last; 6536 } 6537 } 6538 } 6539 6540 # handle increasing depth 6541 if ( $level > $current_level || $ci_level > $current_ci_level ) { 6542 6543 # Compute the standard incremental whitespace. This will be 6544 # the minimum incremental whitespace that will be used. This 6545 # choice results in a smooth transition between the gnu-style 6546 # and the standard style. 6547 my $standard_increment = 6548 ( $level - $current_level ) * $rOpts_indent_columns + 6549 ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation; 6550 6551 # Now we have to define how much extra incremental space 6552 # ("$available_space") we want. This extra space will be 6553 # reduced as necessary when long lines are encountered or when 6554 # it becomes clear that we do not have a good list. 6555 my $available_space = 0; 6556 my $align_paren = 0; 6557 my $excess = 0; 6558 6559 # initialization on empty stack.. 6560 if ( $max_gnu_stack_index == 0 ) { 6561 $space_count = $level * $rOpts_indent_columns; 6562 } 6563 6564 # if this is a BLOCK, add the standard increment 6565 elsif ($last_nonblank_block_type) { 6566 $space_count += $standard_increment; 6567 } 6568 6569 # if last nonblank token was not structural indentation, 6570 # just use standard increment 6571 elsif ( $last_nonblank_type ne '{' ) { 6572 $space_count += $standard_increment; 6573 } 6574 6575 # otherwise use the space to the first non-blank level change token 6576 else { 6577 6578 $space_count = $gnu_position_predictor; 6579 6580 my $min_gnu_indentation = 6581 $gnu_stack[$max_gnu_stack_index]->get_SPACES(); 6582 6583 $available_space = $space_count - $min_gnu_indentation; 6584 if ( $available_space >= $standard_increment ) { 6585 $min_gnu_indentation += $standard_increment; 6586 } 6587 elsif ( $available_space > 1 ) { 6588 $min_gnu_indentation += $available_space + 1; 6589 } 6590 elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) { 6591 if ( ( $tightness{$last_nonblank_token} < 2 ) ) { 6592 $min_gnu_indentation += 2; 6593 } 6594 else { 6595 $min_gnu_indentation += 1; 6596 } 6597 } 6598 else { 6599 $min_gnu_indentation += $standard_increment; 6600 } 6601 $available_space = $space_count - $min_gnu_indentation; 6602 6603 if ( $available_space < 0 ) { 6604 $space_count = $min_gnu_indentation; 6605 $available_space = 0; 6606 } 6607 $align_paren = 1; 6608 } 6609 6610 # update state, but not on a blank token 6611 if ( $types_to_go[$max_index_to_go] ne 'b' ) { 6612 6613 $gnu_stack[$max_gnu_stack_index]->set_HAVE_CHILD(1); 6614 6615 ++$max_gnu_stack_index; 6616 $gnu_stack[$max_gnu_stack_index] = 6617 new_lp_indentation_item( $space_count, $level, $ci_level, 6618 $available_space, $align_paren ); 6619 6620 # If the opening paren is beyond the half-line length, then 6621 # we will use the minimum (standard) indentation. This will 6622 # help avoid problems associated with running out of space 6623 # near the end of a line. As a result, in deeply nested 6624 # lists, there will be some indentations which are limited 6625 # to this minimum standard indentation. But the most deeply 6626 # nested container will still probably be able to shift its 6627 # parameters to the right for proper alignment, so in most 6628 # cases this will not be noticable. 6629 if ( $available_space > 0 6630 && $space_count > $half_maximum_line_length ) 6631 { 6632 $gnu_stack[$max_gnu_stack_index] 6633 ->tentatively_decrease_AVAILABLE_SPACES($available_space); 6634 } 6635 } 6636 } 6637 6638 # Count commas and look for non-list characters. Once we see a 6639 # non-list character, we give up and don't look for any more commas. 6640 if ( $type eq '=>' ) { 6641 $gnu_arrow_count{$total_depth}++; 6642 6643 # tentatively treating '=>' like '=' for estimating breaks 6644 # TODO: this could use some experimentation 6645 $last_gnu_equals{$total_depth} = $max_index_to_go; 6646 } 6647 6648 elsif ( $type eq ',' ) { 6649 $gnu_comma_count{$total_depth}++; 6650 } 6651 6652 elsif ( $is_assignment{$type} ) { 6653 $last_gnu_equals{$total_depth} = $max_index_to_go; 6654 } 6655 6656 # this token might start a new line 6657 # if this is a non-blank.. 6658 if ( $type ne 'b' ) { 6659 6660 # and if .. 6661 if ( 6662 6663 # this is the first nonblank token of the line 6664 $max_index_to_go == 1 && $types_to_go[0] eq 'b' 6665 6666 # or previous character was one of these: 6667 || $last_nonblank_type_to_go =~ /^([\:\?\,f])$/ 6668 6669 # or previous character was opening and this does not close it 6670 || ( $last_nonblank_type_to_go eq '{' && $type ne '}' ) 6671 || ( $last_nonblank_type_to_go eq '(' and $type ne ')' ) 6672 6673 # or this token is one of these: 6674 || $type =~ /^([\.]|\|\||\&\&)$/ 6675 6676 # or this is a closing structure 6677 || ( $last_nonblank_type_to_go eq '}' 6678 && $last_nonblank_token_to_go eq $last_nonblank_type_to_go ) 6679 6680 # or previous token was keyword 'return' 6681 || ( $last_nonblank_type_to_go eq 'k' 6682 && ( $last_nonblank_token_to_go eq 'return' && $type ne '{' ) ) 6683 6684 # or starting a new line at certain keywords is fine 6685 || ( $type eq 'k' 6686 && $is_if_unless_and_or_last_next_redo_return{$token} ) 6687 6688 # or this is after an assignment after a closing structure 6689 || ( 6690 $is_assignment{$last_nonblank_type_to_go} 6691 && ( 6692 $last_last_nonblank_type_to_go =~ /^[\}\)\]]$/ 6693 6694 # and it is significantly to the right 6695 || $gnu_position_predictor > $half_maximum_line_length 6696 ) 6697 ) 6698 ) 6699 { 6700 check_for_long_gnu_style_lines(); 6701 $line_start_index_to_go = $max_index_to_go; 6702 6703 # back up 1 token if we want to break before that type 6704 # otherwise, we may strand tokens like '?' or ':' on a line 6705 if ( $line_start_index_to_go > 0 ) { 6706 if ( $last_nonblank_type_to_go eq 'k' ) { 6707 6708 if ( $want_break_before{$last_nonblank_token_to_go} ) { 6709 $line_start_index_to_go--; 6710 } 6711 } 6712 elsif ( $want_break_before{$last_nonblank_type_to_go} ) { 6713 $line_start_index_to_go--; 6714 } 6715 } 6716 } 6717 } 6718 6719 # remember the predicted position of this token on the output line 6720 if ( $max_index_to_go > $line_start_index_to_go ) { 6721 $gnu_position_predictor = 6722 total_line_length( $line_start_index_to_go, $max_index_to_go ); 6723 } 6724 else { 6725 $gnu_position_predictor = $space_count + 6726 token_sequence_length( $max_index_to_go, $max_index_to_go ); 6727 } 6728 6729 # store the indentation object for this token 6730 # this allows us to manipulate the leading whitespace 6731 # (in case we have to reduce indentation to fit a line) without 6732 # having to change any token values 6733 $leading_spaces_to_go[$max_index_to_go] = $gnu_stack[$max_gnu_stack_index]; 6734 $reduced_spaces_to_go[$max_index_to_go] = 6735 ( $max_gnu_stack_index > 0 && $ci_level ) 6736 ? $gnu_stack[ $max_gnu_stack_index - 1 ] 6737 : $gnu_stack[$max_gnu_stack_index]; 6738 return; 6739} 6740 6741sub check_for_long_gnu_style_lines { 6742 6743 # look at the current estimated maximum line length, and 6744 # remove some whitespace if it exceeds the desired maximum 6745 6746 # this is only for the '-lp' style 6747 return unless ($rOpts_line_up_parentheses); 6748 6749 # nothing can be done if no stack items defined for this line 6750 return if ( $max_gnu_item_index == UNDEFINED_INDEX ); 6751 6752 # see if we have exceeded the maximum desired line length 6753 # keep 2 extra free because they are needed in some cases 6754 # (result of trial-and-error testing) 6755 my $spaces_needed = 6756 $gnu_position_predictor - $rOpts_maximum_line_length + 2; 6757 6758 return if ( $spaces_needed <= 0 ); 6759 6760 # We are over the limit, so try to remove a requested number of 6761 # spaces from leading whitespace. We are only allowed to remove 6762 # from whitespace items created on this batch, since others have 6763 # already been used and cannot be undone. 6764 my @candidates = (); 6765 my $i; 6766 6767 # loop over all whitespace items created for the current batch 6768 for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) { 6769 my $item = $gnu_item_list[$i]; 6770 6771 # item must still be open to be a candidate (otherwise it 6772 # cannot influence the current token) 6773 next if ( $item->get_CLOSED() >= 0 ); 6774 6775 my $available_spaces = $item->get_AVAILABLE_SPACES(); 6776 6777 if ( $available_spaces > 0 ) { 6778 push( @candidates, [ $i, $available_spaces ] ); 6779 } 6780 } 6781 6782 return unless (@candidates); 6783 6784 # sort by available whitespace so that we can remove whitespace 6785 # from the maximum available first 6786 @candidates = sort { $b->[1] <=> $a->[1] } @candidates; 6787 6788 # keep removing whitespace until we are done or have no more 6789 my $candidate; 6790 foreach $candidate (@candidates) { 6791 my ( $i, $available_spaces ) = @{$candidate}; 6792 my $deleted_spaces = 6793 ( $available_spaces > $spaces_needed ) 6794 ? $spaces_needed 6795 : $available_spaces; 6796 6797 # remove the incremental space from this item 6798 $gnu_item_list[$i]->decrease_AVAILABLE_SPACES($deleted_spaces); 6799 6800 my $i_debug = $i; 6801 6802 # update the leading whitespace of this item and all items 6803 # that came after it 6804 for ( ; $i <= $max_gnu_item_index ; $i++ ) { 6805 6806 my $old_spaces = $gnu_item_list[$i]->get_SPACES(); 6807 if ( $old_spaces >= $deleted_spaces ) { 6808 $gnu_item_list[$i]->decrease_SPACES($deleted_spaces); 6809 } 6810 6811 # shouldn't happen except for code bug: 6812 else { 6813 my $level = $gnu_item_list[$i_debug]->get_LEVEL(); 6814 my $ci_level = $gnu_item_list[$i_debug]->get_CI_LEVEL(); 6815 my $old_level = $gnu_item_list[$i]->get_LEVEL(); 6816 my $old_ci_level = $gnu_item_list[$i]->get_CI_LEVEL(); 6817 warning( 6818"program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level\n" 6819 ); 6820 report_definite_bug(); 6821 } 6822 } 6823 $gnu_position_predictor -= $deleted_spaces; 6824 $spaces_needed -= $deleted_spaces; 6825 last unless ( $spaces_needed > 0 ); 6826 } 6827} 6828 6829sub finish_lp_batch { 6830 6831 # This routine is called once after each each output stream batch is 6832 # finished to undo indentation for all incomplete -lp 6833 # indentation levels. It is too risky to leave a level open, 6834 # because then we can't backtrack in case of a long line to follow. 6835 # This means that comments and blank lines will disrupt this 6836 # indentation style. But the vertical aligner may be able to 6837 # get the space back if there are side comments. 6838 6839 # this is only for the 'lp' style 6840 return unless ($rOpts_line_up_parentheses); 6841 6842 # nothing can be done if no stack items defined for this line 6843 return if ( $max_gnu_item_index == UNDEFINED_INDEX ); 6844 6845 # loop over all whitespace items created for the current batch 6846 my $i; 6847 for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) { 6848 my $item = $gnu_item_list[$i]; 6849 6850 # only look for open items 6851 next if ( $item->get_CLOSED() >= 0 ); 6852 6853 # Tentatively remove all of the available space 6854 # (The vertical aligner will try to get it back later) 6855 my $available_spaces = $item->get_AVAILABLE_SPACES(); 6856 if ( $available_spaces > 0 ) { 6857 6858 # delete incremental space for this item 6859 $gnu_item_list[$i] 6860 ->tentatively_decrease_AVAILABLE_SPACES($available_spaces); 6861 6862 # Reduce the total indentation space of any nodes that follow 6863 # Note that any such nodes must necessarily be dependents 6864 # of this node. 6865 foreach ( $i + 1 .. $max_gnu_item_index ) { 6866 $gnu_item_list[$_]->decrease_SPACES($available_spaces); 6867 } 6868 } 6869 } 6870 return; 6871} 6872 6873sub reduce_lp_indentation { 6874 6875 # reduce the leading whitespace at token $i if possible by $spaces_needed 6876 # (a large value of $spaces_needed will remove all excess space) 6877 # NOTE: to be called from scan_list only for a sequence of tokens 6878 # contained between opening and closing parens/braces/brackets 6879 6880 my ( $i, $spaces_wanted ) = @_; 6881 my $deleted_spaces = 0; 6882 6883 my $item = $leading_spaces_to_go[$i]; 6884 my $available_spaces = $item->get_AVAILABLE_SPACES(); 6885 6886 if ( 6887 $available_spaces > 0 6888 && ( ( $spaces_wanted <= $available_spaces ) 6889 || !$item->get_HAVE_CHILD() ) 6890 ) 6891 { 6892 6893 # we'll remove these spaces, but mark them as recoverable 6894 $deleted_spaces = 6895 $item->tentatively_decrease_AVAILABLE_SPACES($spaces_wanted); 6896 } 6897 6898 return $deleted_spaces; 6899} 6900 6901sub token_sequence_length { 6902 6903 # return length of tokens ($ifirst .. $ilast) including first & last 6904 # returns 0 if $ifirst > $ilast 6905 my $ifirst = shift; 6906 my $ilast = shift; 6907 return 0 if ( $ilast < 0 || $ifirst > $ilast ); 6908 return $lengths_to_go[ $ilast + 1 ] if ( $ifirst < 0 ); 6909 return $lengths_to_go[ $ilast + 1 ] - $lengths_to_go[$ifirst]; 6910} 6911 6912sub total_line_length { 6913 6914 # return length of a line of tokens ($ifirst .. $ilast) 6915 my $ifirst = shift; 6916 my $ilast = shift; 6917 if ( $ifirst < 0 ) { $ifirst = 0 } 6918 6919 return leading_spaces_to_go($ifirst) + 6920 token_sequence_length( $ifirst, $ilast ); 6921} 6922 6923sub excess_line_length { 6924 6925 # return number of characters by which a line of tokens ($ifirst..$ilast) 6926 # exceeds the allowable line length. 6927 my $ifirst = shift; 6928 my $ilast = shift; 6929 if ( $ifirst < 0 ) { $ifirst = 0 } 6930 return leading_spaces_to_go($ifirst) + 6931 token_sequence_length( $ifirst, $ilast ) - $rOpts_maximum_line_length; 6932} 6933 6934sub finish_formatting { 6935 6936 # flush buffer and write any informative messages 6937 my $self = shift; 6938 6939 flush(); 6940 $file_writer_object->decrement_output_line_number() 6941 ; # fix up line number since it was incremented 6942 we_are_at_the_last_line(); 6943 if ( $added_semicolon_count > 0 ) { 6944 my $first = ( $added_semicolon_count > 1 ) ? "First" : ""; 6945 my $what = 6946 ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was"; 6947 write_logfile_entry("$added_semicolon_count $what added:\n"); 6948 write_logfile_entry( 6949 " $first at input line $first_added_semicolon_at\n"); 6950 6951 if ( $added_semicolon_count > 1 ) { 6952 write_logfile_entry( 6953 " Last at input line $last_added_semicolon_at\n"); 6954 } 6955 write_logfile_entry(" (Use -nasc to prevent semicolon addition)\n"); 6956 write_logfile_entry("\n"); 6957 } 6958 6959 if ( $deleted_semicolon_count > 0 ) { 6960 my $first = ( $deleted_semicolon_count > 1 ) ? "First" : ""; 6961 my $what = 6962 ( $deleted_semicolon_count > 1 ) 6963 ? "semicolons were" 6964 : "semicolon was"; 6965 write_logfile_entry( 6966 "$deleted_semicolon_count unnecessary $what deleted:\n"); 6967 write_logfile_entry( 6968 " $first at input line $first_deleted_semicolon_at\n"); 6969 6970 if ( $deleted_semicolon_count > 1 ) { 6971 write_logfile_entry( 6972 " Last at input line $last_deleted_semicolon_at\n"); 6973 } 6974 write_logfile_entry(" (Use -ndsc to prevent semicolon deletion)\n"); 6975 write_logfile_entry("\n"); 6976 } 6977 6978 if ( $embedded_tab_count > 0 ) { 6979 my $first = ( $embedded_tab_count > 1 ) ? "First" : ""; 6980 my $what = 6981 ( $embedded_tab_count > 1 ) 6982 ? "quotes or patterns" 6983 : "quote or pattern"; 6984 write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n"); 6985 write_logfile_entry( 6986"This means the display of this script could vary with device or software\n" 6987 ); 6988 write_logfile_entry(" $first at input line $first_embedded_tab_at\n"); 6989 6990 if ( $embedded_tab_count > 1 ) { 6991 write_logfile_entry( 6992 " Last at input line $last_embedded_tab_at\n"); 6993 } 6994 write_logfile_entry("\n"); 6995 } 6996 6997 if ($first_tabbing_disagreement) { 6998 write_logfile_entry( 6999"First indentation disagreement seen at input line $first_tabbing_disagreement\n" 7000 ); 7001 } 7002 7003 if ($in_tabbing_disagreement) { 7004 write_logfile_entry( 7005"Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n" 7006 ); 7007 } 7008 else { 7009 7010 if ($last_tabbing_disagreement) { 7011 7012 write_logfile_entry( 7013"Last indentation disagreement seen at input line $last_tabbing_disagreement\n" 7014 ); 7015 } 7016 else { 7017 write_logfile_entry("No indentation disagreement seen\n"); 7018 } 7019 } 7020 write_logfile_entry("\n"); 7021 7022 $vertical_aligner_object->report_anything_unusual(); 7023 7024 $file_writer_object->report_line_length_errors(); 7025} 7026 7027sub check_options { 7028 7029 # This routine is called to check the Opts hash after it is defined 7030 7031 ($rOpts) = @_; 7032 my ( $tabbing_string, $tab_msg ); 7033 7034 make_static_block_comment_pattern(); 7035 make_static_side_comment_pattern(); 7036 make_closing_side_comment_prefix(); 7037 make_closing_side_comment_list_pattern(); 7038 $format_skipping_pattern_begin = 7039 make_format_skipping_pattern( 'format-skipping-begin', '#<<<' ); 7040 $format_skipping_pattern_end = 7041 make_format_skipping_pattern( 'format-skipping-end', '#>>>' ); 7042 7043 # If closing side comments ARE selected, then we can safely 7044 # delete old closing side comments unless closing side comment 7045 # warnings are requested. This is a good idea because it will 7046 # eliminate any old csc's which fall below the line count threshold. 7047 # We cannot do this if warnings are turned on, though, because we 7048 # might delete some text which has been added. So that must 7049 # be handled when comments are created. 7050 if ( $rOpts->{'closing-side-comments'} ) { 7051 if ( !$rOpts->{'closing-side-comment-warnings'} ) { 7052 $rOpts->{'delete-closing-side-comments'} = 1; 7053 } 7054 } 7055 7056 # If closing side comments ARE NOT selected, but warnings ARE 7057 # selected and we ARE DELETING csc's, then we will pretend to be 7058 # adding with a huge interval. This will force the comments to be 7059 # generated for comparison with the old comments, but not added. 7060 elsif ( $rOpts->{'closing-side-comment-warnings'} ) { 7061 if ( $rOpts->{'delete-closing-side-comments'} ) { 7062 $rOpts->{'delete-closing-side-comments'} = 0; 7063 $rOpts->{'closing-side-comments'} = 1; 7064 $rOpts->{'closing-side-comment-interval'} = 100000000; 7065 } 7066 } 7067 7068 make_bli_pattern(); 7069 make_block_brace_vertical_tightness_pattern(); 7070 7071 if ( $rOpts->{'line-up-parentheses'} ) { 7072 7073 if ( $rOpts->{'indent-only'} 7074 || !$rOpts->{'add-newlines'} 7075 || !$rOpts->{'delete-old-newlines'} ) 7076 { 7077 warn <<EOM; 7078----------------------------------------------------------------------- 7079Conflict: -lp conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp 7080 7081The -lp indentation logic requires that perltidy be able to coordinate 7082arbitrarily large numbers of line breakpoints. This isn't possible 7083with these flags. Sometimes an acceptable workaround is to use -wocb=3 7084----------------------------------------------------------------------- 7085EOM 7086 $rOpts->{'line-up-parentheses'} = 0; 7087 } 7088 } 7089 7090 # At present, tabs are not compatable with the line-up-parentheses style 7091 # (it would be possible to entab the total leading whitespace 7092 # just prior to writing the line, if desired). 7093 if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) { 7094 warn <<EOM; 7095Conflict: -t (tabs) cannot be used with the -lp option; ignoring -t; see -et. 7096EOM 7097 $rOpts->{'tabs'} = 0; 7098 } 7099 7100 # Likewise, tabs are not compatable with outdenting.. 7101 if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) { 7102 warn <<EOM; 7103Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et. 7104EOM 7105 $rOpts->{'tabs'} = 0; 7106 } 7107 7108 if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) { 7109 warn <<EOM; 7110Conflict: -t (tabs) cannot be used with the -ola option; ignoring -t; see -et. 7111EOM 7112 $rOpts->{'tabs'} = 0; 7113 } 7114 7115 if ( !$rOpts->{'space-for-semicolon'} ) { 7116 $want_left_space{'f'} = -1; 7117 } 7118 7119 if ( $rOpts->{'space-terminal-semicolon'} ) { 7120 $want_left_space{';'} = 1; 7121 } 7122 7123 # implement outdenting preferences for keywords 7124 %outdent_keyword = (); 7125 unless ( @_ = split_words( $rOpts->{'outdent-keyword-okl'} ) ) { 7126 @_ = qw(next last redo goto return); # defaults 7127 } 7128 7129 # FUTURE: if not a keyword, assume that it is an identifier 7130 foreach (@_) { 7131 if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) { 7132 $outdent_keyword{$_} = 1; 7133 } 7134 else { 7135 warn "ignoring '$_' in -okwl list; not a perl keyword"; 7136 } 7137 } 7138 7139 # implement user whitespace preferences 7140 if ( @_ = split_words( $rOpts->{'want-left-space'} ) ) { 7141 @want_left_space{@_} = (1) x scalar(@_); 7142 } 7143 7144 if ( @_ = split_words( $rOpts->{'want-right-space'} ) ) { 7145 @want_right_space{@_} = (1) x scalar(@_); 7146 } 7147 7148 if ( @_ = split_words( $rOpts->{'nowant-left-space'} ) ) { 7149 @want_left_space{@_} = (-1) x scalar(@_); 7150 } 7151 7152 if ( @_ = split_words( $rOpts->{'nowant-right-space'} ) ) { 7153 @want_right_space{@_} = (-1) x scalar(@_); 7154 } 7155 if ( $rOpts->{'dump-want-left-space'} ) { 7156 dump_want_left_space(*STDOUT); 7157 exit 1; 7158 } 7159 7160 if ( $rOpts->{'dump-want-right-space'} ) { 7161 dump_want_right_space(*STDOUT); 7162 exit 1; 7163 } 7164 7165 # default keywords for which space is introduced before an opening paren 7166 # (at present, including them messes up vertical alignment) 7167 @_ = qw(my local our and or err eq ne if else elsif until 7168 unless while for foreach return switch case given when); 7169 @space_after_keyword{@_} = (1) x scalar(@_); 7170 7171 # allow user to modify these defaults 7172 if ( @_ = split_words( $rOpts->{'space-after-keyword'} ) ) { 7173 @space_after_keyword{@_} = (1) x scalar(@_); 7174 } 7175 7176 if ( @_ = split_words( $rOpts->{'nospace-after-keyword'} ) ) { 7177 @space_after_keyword{@_} = (0) x scalar(@_); 7178 } 7179 7180 # implement user break preferences 7181 my @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | & 7182 = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x= 7183 . : ? && || and or err xor 7184 ); 7185 7186 my $break_after = sub { 7187 foreach my $tok (@_) { 7188 if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/: 7189 my $lbs = $left_bond_strength{$tok}; 7190 my $rbs = $right_bond_strength{$tok}; 7191 if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) { 7192 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) = 7193 ( $lbs, $rbs ); 7194 } 7195 } 7196 }; 7197 7198 my $break_before = sub { 7199 foreach my $tok (@_) { 7200 my $lbs = $left_bond_strength{$tok}; 7201 my $rbs = $right_bond_strength{$tok}; 7202 if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) { 7203 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) = 7204 ( $lbs, $rbs ); 7205 } 7206 } 7207 }; 7208 7209 $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} ); 7210 $break_before->(@all_operators) 7211 if ( $rOpts->{'break-before-all-operators'} ); 7212 7213 $break_after->( split_words( $rOpts->{'want-break-after'} ) ); 7214 $break_before->( split_words( $rOpts->{'want-break-before'} ) ); 7215 7216 # make note if breaks are before certain key types 7217 %want_break_before = (); 7218 foreach my $tok ( @all_operators, ',' ) { 7219 $want_break_before{$tok} = 7220 $left_bond_strength{$tok} < $right_bond_strength{$tok}; 7221 } 7222 7223 # Coordinate ?/: breaks, which must be similar 7224 if ( !$want_break_before{':'} ) { 7225 $want_break_before{'?'} = $want_break_before{':'}; 7226 $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01; 7227 $left_bond_strength{'?'} = NO_BREAK; 7228 } 7229 7230 # Define here tokens which may follow the closing brace of a do statement 7231 # on the same line, as in: 7232 # } while ( $something); 7233 @_ = qw(until while unless if ; : ); 7234 push @_, ','; 7235 @is_do_follower{@_} = (1) x scalar(@_); 7236 7237 # These tokens may follow the closing brace of an if or elsif block. 7238 # In other words, for cuddled else we want code to look like: 7239 # } elsif ( $something) { 7240 # } else { 7241 if ( $rOpts->{'cuddled-else'} ) { 7242 @_ = qw(else elsif); 7243 @is_if_brace_follower{@_} = (1) x scalar(@_); 7244 } 7245 else { 7246 %is_if_brace_follower = (); 7247 } 7248 7249 # nothing can follow the closing curly of an else { } block: 7250 %is_else_brace_follower = (); 7251 7252 # what can follow a multi-line anonymous sub definition closing curly: 7253 @_ = qw# ; : => or and && || ~~ !~~ ) #; 7254 push @_, ','; 7255 @is_anon_sub_brace_follower{@_} = (1) x scalar(@_); 7256 7257 # what can follow a one-line anonynomous sub closing curly: 7258 # one-line anonumous subs also have ']' here... 7259 # see tk3.t and PP.pm 7260 @_ = qw# ; : => or and && || ) ] ~~ !~~ #; 7261 push @_, ','; 7262 @is_anon_sub_1_brace_follower{@_} = (1) x scalar(@_); 7263 7264 # What can follow a closing curly of a block 7265 # which is not an if/elsif/else/do/sort/map/grep/eval/sub 7266 # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl' 7267 @_ = qw# ; : => or and && || ) #; 7268 push @_, ','; 7269 7270 # allow cuddled continue if cuddled else is specified 7271 if ( $rOpts->{'cuddled-else'} ) { push @_, 'continue'; } 7272 7273 @is_other_brace_follower{@_} = (1) x scalar(@_); 7274 7275 $right_bond_strength{'{'} = WEAK; 7276 $left_bond_strength{'{'} = VERY_STRONG; 7277 7278 # make -l=0 equal to -l=infinite 7279 if ( !$rOpts->{'maximum-line-length'} ) { 7280 $rOpts->{'maximum-line-length'} = 1000000; 7281 } 7282 7283 # make -lbl=0 equal to -lbl=infinite 7284 if ( !$rOpts->{'long-block-line-count'} ) { 7285 $rOpts->{'long-block-line-count'} = 1000000; 7286 } 7287 7288 my $ole = $rOpts->{'output-line-ending'}; 7289 if ($ole) { 7290 my %endings = ( 7291 dos => "\015\012", 7292 win => "\015\012", 7293 mac => "\015", 7294 unix => "\012", 7295 ); 7296 $ole = lc $ole; 7297 unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) { 7298 my $str = join " ", keys %endings; 7299 die <<EOM; 7300Unrecognized line ending '$ole'; expecting one of: $str 7301EOM 7302 } 7303 if ( $rOpts->{'preserve-line-endings'} ) { 7304 warn "Ignoring -ple; conflicts with -ole\n"; 7305 $rOpts->{'preserve-line-endings'} = undef; 7306 } 7307 } 7308 7309 # hashes used to simplify setting whitespace 7310 %tightness = ( 7311 '{' => $rOpts->{'brace-tightness'}, 7312 '}' => $rOpts->{'brace-tightness'}, 7313 '(' => $rOpts->{'paren-tightness'}, 7314 ')' => $rOpts->{'paren-tightness'}, 7315 '[' => $rOpts->{'square-bracket-tightness'}, 7316 ']' => $rOpts->{'square-bracket-tightness'}, 7317 ); 7318 %matching_token = ( 7319 '{' => '}', 7320 '(' => ')', 7321 '[' => ']', 7322 '?' => ':', 7323 ); 7324 7325 # frequently used parameters 7326 $rOpts_add_newlines = $rOpts->{'add-newlines'}; 7327 $rOpts_add_whitespace = $rOpts->{'add-whitespace'}; 7328 $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'}; 7329 $rOpts_block_brace_vertical_tightness = 7330 $rOpts->{'block-brace-vertical-tightness'}; 7331 $rOpts_brace_left_and_indent = $rOpts->{'brace-left-and-indent'}; 7332 $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'}; 7333 $rOpts_break_at_old_ternary_breakpoints = 7334 $rOpts->{'break-at-old-ternary-breakpoints'}; 7335 $rOpts_break_at_old_comma_breakpoints = 7336 $rOpts->{'break-at-old-comma-breakpoints'}; 7337 $rOpts_break_at_old_keyword_breakpoints = 7338 $rOpts->{'break-at-old-keyword-breakpoints'}; 7339 $rOpts_break_at_old_logical_breakpoints = 7340 $rOpts->{'break-at-old-logical-breakpoints'}; 7341 $rOpts_closing_side_comment_else_flag = 7342 $rOpts->{'closing-side-comment-else-flag'}; 7343 $rOpts_closing_side_comment_maximum_text = 7344 $rOpts->{'closing-side-comment-maximum-text'}; 7345 $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'}; 7346 $rOpts_cuddled_else = $rOpts->{'cuddled-else'}; 7347 $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'}; 7348 $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'}; 7349 $rOpts_indent_columns = $rOpts->{'indent-columns'}; 7350 $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'}; 7351 $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'}; 7352 $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'}; 7353 $rOpts_short_concatenation_item_length = 7354 $rOpts->{'short-concatenation-item-length'}; 7355 $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'}; 7356 $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'}; 7357 $rOpts_format_skipping = $rOpts->{'format-skipping'}; 7358 $rOpts_space_function_paren = $rOpts->{'space-function-paren'}; 7359 $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'}; 7360 $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'}; 7361 $half_maximum_line_length = $rOpts_maximum_line_length / 2; 7362 7363 # Note that both opening and closing tokens can access the opening 7364 # and closing flags of their container types. 7365 %opening_vertical_tightness = ( 7366 '(' => $rOpts->{'paren-vertical-tightness'}, 7367 '{' => $rOpts->{'brace-vertical-tightness'}, 7368 '[' => $rOpts->{'square-bracket-vertical-tightness'}, 7369 ')' => $rOpts->{'paren-vertical-tightness'}, 7370 '}' => $rOpts->{'brace-vertical-tightness'}, 7371 ']' => $rOpts->{'square-bracket-vertical-tightness'}, 7372 ); 7373 7374 %closing_vertical_tightness = ( 7375 '(' => $rOpts->{'paren-vertical-tightness-closing'}, 7376 '{' => $rOpts->{'brace-vertical-tightness-closing'}, 7377 '[' => $rOpts->{'square-bracket-vertical-tightness-closing'}, 7378 ')' => $rOpts->{'paren-vertical-tightness-closing'}, 7379 '}' => $rOpts->{'brace-vertical-tightness-closing'}, 7380 ']' => $rOpts->{'square-bracket-vertical-tightness-closing'}, 7381 ); 7382 7383 # assume flag for '>' same as ')' for closing qw quotes 7384 %closing_token_indentation = ( 7385 ')' => $rOpts->{'closing-paren-indentation'}, 7386 '}' => $rOpts->{'closing-brace-indentation'}, 7387 ']' => $rOpts->{'closing-square-bracket-indentation'}, 7388 '>' => $rOpts->{'closing-paren-indentation'}, 7389 ); 7390 7391 %opening_token_right = ( 7392 '(' => $rOpts->{'opening-paren-right'}, 7393 '{' => $rOpts->{'opening-hash-brace-right'}, 7394 '[' => $rOpts->{'opening-square-bracket-right'}, 7395 ); 7396 7397 %stack_opening_token = ( 7398 '(' => $rOpts->{'stack-opening-paren'}, 7399 '{' => $rOpts->{'stack-opening-hash-brace'}, 7400 '[' => $rOpts->{'stack-opening-square-bracket'}, 7401 ); 7402 7403 %stack_closing_token = ( 7404 ')' => $rOpts->{'stack-closing-paren'}, 7405 '}' => $rOpts->{'stack-closing-hash-brace'}, 7406 ']' => $rOpts->{'stack-closing-square-bracket'}, 7407 ); 7408} 7409 7410sub make_static_block_comment_pattern { 7411 7412 # create the pattern used to identify static block comments 7413 $static_block_comment_pattern = '^\s*##'; 7414 7415 # allow the user to change it 7416 if ( $rOpts->{'static-block-comment-prefix'} ) { 7417 my $prefix = $rOpts->{'static-block-comment-prefix'}; 7418 $prefix =~ s/^\s*//; 7419 my $pattern = $prefix; 7420 7421 # user may give leading caret to force matching left comments only 7422 if ( $prefix !~ /^\^#/ ) { 7423 if ( $prefix !~ /^#/ ) { 7424 die 7425"ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n"; 7426 } 7427 $pattern = '^\s*' . $prefix; 7428 } 7429 eval "'##'=~/$pattern/"; 7430 if ($@) { 7431 die 7432"ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n"; 7433 } 7434 $static_block_comment_pattern = $pattern; 7435 } 7436} 7437 7438sub make_format_skipping_pattern { 7439 my ( $opt_name, $default ) = @_; 7440 my $param = $rOpts->{$opt_name}; 7441 unless ($param) { $param = $default } 7442 $param =~ s/^\s*//; 7443 if ( $param !~ /^#/ ) { 7444 die "ERROR: the $opt_name parameter '$param' must begin with '#'\n"; 7445 } 7446 my $pattern = '^' . $param . '\s'; 7447 eval "'#'=~/$pattern/"; 7448 if ($@) { 7449 die 7450"ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n"; 7451 } 7452 return $pattern; 7453} 7454 7455sub make_closing_side_comment_list_pattern { 7456 7457 # turn any input list into a regex for recognizing selected block types 7458 $closing_side_comment_list_pattern = '^\w+'; 7459 if ( defined( $rOpts->{'closing-side-comment-list'} ) 7460 && $rOpts->{'closing-side-comment-list'} ) 7461 { 7462 $closing_side_comment_list_pattern = 7463 make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} ); 7464 } 7465} 7466 7467sub make_bli_pattern { 7468 7469 if ( defined( $rOpts->{'brace-left-and-indent-list'} ) 7470 && $rOpts->{'brace-left-and-indent-list'} ) 7471 { 7472 $bli_list_string = $rOpts->{'brace-left-and-indent-list'}; 7473 } 7474 7475 $bli_pattern = make_block_pattern( '-blil', $bli_list_string ); 7476} 7477 7478sub make_block_brace_vertical_tightness_pattern { 7479 7480 # turn any input list into a regex for recognizing selected block types 7481 $block_brace_vertical_tightness_pattern = 7482 '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)'; 7483 7484 if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} ) 7485 && $rOpts->{'block-brace-vertical-tightness-list'} ) 7486 { 7487 $block_brace_vertical_tightness_pattern = 7488 make_block_pattern( '-bbvtl', 7489 $rOpts->{'block-brace-vertical-tightness-list'} ); 7490 } 7491} 7492 7493sub make_block_pattern { 7494 7495 # given a string of block-type keywords, return a regex to match them 7496 # The only tricky part is that labels are indicated with a single ':' 7497 # and the 'sub' token text may have additional text after it (name of 7498 # sub). 7499 # 7500 # Example: 7501 # 7502 # input string: "if else elsif unless while for foreach do : sub"; 7503 # pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)'; 7504 7505 my ( $abbrev, $string ) = @_; 7506 my @list = split_words($string); 7507 my @words = (); 7508 my %seen; 7509 for my $i (@list) { 7510 next if $seen{$i}; 7511 $seen{$i} = 1; 7512 if ( $i eq 'sub' ) { 7513 } 7514 elsif ( $i eq ':' ) { 7515 push @words, '\w+:'; 7516 } 7517 elsif ( $i =~ /^\w/ ) { 7518 push @words, $i; 7519 } 7520 else { 7521 warn "unrecognized block type $i after $abbrev, ignoring\n"; 7522 } 7523 } 7524 my $pattern = '(' . join( '|', @words ) . ')$'; 7525 if ( $seen{'sub'} ) { 7526 $pattern = '(' . $pattern . '|sub)'; 7527 } 7528 $pattern = '^' . $pattern; 7529 return $pattern; 7530} 7531 7532sub make_static_side_comment_pattern { 7533 7534 # create the pattern used to identify static side comments 7535 $static_side_comment_pattern = '^##'; 7536 7537 # allow the user to change it 7538 if ( $rOpts->{'static-side-comment-prefix'} ) { 7539 my $prefix = $rOpts->{'static-side-comment-prefix'}; 7540 $prefix =~ s/^\s*//; 7541 my $pattern = '^' . $prefix; 7542 eval "'##'=~/$pattern/"; 7543 if ($@) { 7544 die 7545"ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n"; 7546 } 7547 $static_side_comment_pattern = $pattern; 7548 } 7549} 7550 7551sub make_closing_side_comment_prefix { 7552 7553 # Be sure we have a valid closing side comment prefix 7554 my $csc_prefix = $rOpts->{'closing-side-comment-prefix'}; 7555 my $csc_prefix_pattern; 7556 if ( !defined($csc_prefix) ) { 7557 $csc_prefix = '## end'; 7558 $csc_prefix_pattern = '^##\s+end'; 7559 } 7560 else { 7561 my $test_csc_prefix = $csc_prefix; 7562 if ( $test_csc_prefix !~ /^#/ ) { 7563 $test_csc_prefix = '#' . $test_csc_prefix; 7564 } 7565 7566 # make a regex to recognize the prefix 7567 my $test_csc_prefix_pattern = $test_csc_prefix; 7568 7569 # escape any special characters 7570 $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g; 7571 7572 $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern; 7573 7574 # allow exact number of intermediate spaces to vary 7575 $test_csc_prefix_pattern =~ s/\s+/\\s\+/g; 7576 7577 # make sure we have a good pattern 7578 # if we fail this we probably have an error in escaping 7579 # characters. 7580 eval "'##'=~/$test_csc_prefix_pattern/"; 7581 if ($@) { 7582 7583 # shouldn't happen..must have screwed up escaping, above 7584 report_definite_bug(); 7585 warn 7586"Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n"; 7587 7588 # just warn and keep going with defaults 7589 warn "Please consider using a simpler -cscp prefix\n"; 7590 warn "Using default -cscp instead; please check output\n"; 7591 } 7592 else { 7593 $csc_prefix = $test_csc_prefix; 7594 $csc_prefix_pattern = $test_csc_prefix_pattern; 7595 } 7596 } 7597 $rOpts->{'closing-side-comment-prefix'} = $csc_prefix; 7598 $closing_side_comment_prefix_pattern = $csc_prefix_pattern; 7599} 7600 7601sub dump_want_left_space { 7602 my $fh = shift; 7603 local $" = "\n"; 7604 print $fh <<EOM; 7605These values are the main control of whitespace to the left of a token type; 7606They may be altered with the -wls parameter. 7607For a list of token types, use perltidy --dump-token-types (-dtt) 7608 1 means the token wants a space to its left 7609-1 means the token does not want a space to its left 7610------------------------------------------------------------------------ 7611EOM 7612 foreach ( sort keys %want_left_space ) { 7613 print $fh "$_\t$want_left_space{$_}\n"; 7614 } 7615} 7616 7617sub dump_want_right_space { 7618 my $fh = shift; 7619 local $" = "\n"; 7620 print $fh <<EOM; 7621These values are the main control of whitespace to the right of a token type; 7622They may be altered with the -wrs parameter. 7623For a list of token types, use perltidy --dump-token-types (-dtt) 7624 1 means the token wants a space to its right 7625-1 means the token does not want a space to its right 7626------------------------------------------------------------------------ 7627EOM 7628 foreach ( sort keys %want_right_space ) { 7629 print $fh "$_\t$want_right_space{$_}\n"; 7630 } 7631} 7632 7633{ # begin is_essential_whitespace 7634 7635 my %is_sort_grep_map; 7636 my %is_for_foreach; 7637 7638 BEGIN { 7639 7640 @_ = qw(sort grep map); 7641 @is_sort_grep_map{@_} = (1) x scalar(@_); 7642 7643 @_ = qw(for foreach); 7644 @is_for_foreach{@_} = (1) x scalar(@_); 7645 7646 } 7647 7648 sub is_essential_whitespace { 7649 7650 # Essential whitespace means whitespace which cannot be safely deleted 7651 # without risking the introduction of a syntax error. 7652 # We are given three tokens and their types: 7653 # ($tokenl, $typel) is the token to the left of the space in question 7654 # ($tokenr, $typer) is the token to the right of the space in question 7655 # ($tokenll, $typell) is previous nonblank token to the left of $tokenl 7656 # 7657 # This is a slow routine but is not needed too often except when -mangle 7658 # is used. 7659 # 7660 # Note: This routine should almost never need to be changed. It is 7661 # for avoiding syntax problems rather than for formatting. 7662 my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_; 7663 7664 my $result = 7665 7666 # never combine two bare words or numbers 7667 # examples: and ::ok(1) 7668 # return ::spw(...) 7669 # for bla::bla:: abc 7670 # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl 7671 # $input eq"quit" to make $inputeq"quit" 7672 # my $size=-s::SINK if $file; <==OK but we won't do it 7673 # don't join something like: for bla::bla:: abc 7674 # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl 7675 ( ( $tokenl =~ /([\'\w]|\:\:)$/ ) && ( $tokenr =~ /^([\'\w]|\:\:)/ ) ) 7676 7677 # do not combine a number with a concatination dot 7678 # example: pom.caputo: 7679 # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n"); 7680 || ( ( $typel eq 'n' ) && ( $tokenr eq '.' ) ) 7681 || ( ( $typer eq 'n' ) && ( $tokenl eq '.' ) ) 7682 7683 # do not join a minus with a bare word, because you might form 7684 # a file test operator. Example from Complex.pm: 7685 # if (CORE::abs($z - i) < $eps); "z-i" would be taken as a file test. 7686 || ( ( $tokenl eq '-' ) && ( $tokenr =~ /^[_A-Za-z]$/ ) ) 7687 7688 # and something like this could become ambiguous without space 7689 # after the '-': 7690 # use constant III=>1; 7691 # $a = $b - III; 7692 # and even this: 7693 # $a = - III; 7694 || ( ( $tokenl eq '-' ) 7695 && ( $typer =~ /^[wC]$/ && $tokenr =~ /^[_A-Za-z]/ ) ) 7696 7697 # '= -' should not become =- or you will get a warning 7698 # about reversed -= 7699 # || ($tokenr eq '-') 7700 7701 # keep a space between a quote and a bareword to prevent the 7702 # bareword from becomming a quote modifier. 7703 || ( ( $typel eq 'Q' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) ) 7704 7705 # keep a space between a token ending in '$' and any word; 7706 # this caused trouble: "die @$ if $@" 7707 || ( ( $typel eq 'i' && $tokenl =~ /\$$/ ) 7708 && ( $tokenr =~ /^[a-zA-Z_]/ ) ) 7709 7710 # perl is very fussy about spaces before << 7711 || ( $tokenr =~ /^\<\</ ) 7712 7713 # avoid combining tokens to create new meanings. Example: 7714 # $a+ +$b must not become $a++$b 7715 || ( $is_digraph{ $tokenl . $tokenr } ) 7716 || ( $is_trigraph{ $tokenl . $tokenr } ) 7717 7718 # another example: do not combine these two &'s: 7719 # allow_options & &OPT_EXECCGI 7720 || ( $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) } ) 7721 7722 # don't combine $$ or $# with any alphanumeric 7723 # (testfile mangle.t with --mangle) 7724 || ( ( $tokenl =~ /^\$[\$\#]$/ ) && ( $tokenr =~ /^\w/ ) ) 7725 7726 # retain any space after possible filehandle 7727 # (testfiles prnterr1.t with --extrude and mangle.t with --mangle) 7728 || ( $typel eq 'Z' ) 7729 7730 # Perl is sensitive to whitespace after the + here: 7731 # $b = xvals $a + 0.1 * yvals $a; 7732 || ( $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/ ) 7733 7734 # keep paren separate in 'use Foo::Bar ()' 7735 || ( $tokenr eq '(' 7736 && $typel eq 'w' 7737 && $typell eq 'k' 7738 && $tokenll eq 'use' ) 7739 7740 # keep any space between filehandle and paren: 7741 # file mangle.t with --mangle: 7742 || ( $typel eq 'Y' && $tokenr eq '(' ) 7743 7744 # retain any space after here doc operator ( hereerr.t) 7745 || ( $typel eq 'h' ) 7746 7747 # be careful with a space around ++ and --, to avoid ambiguity as to 7748 # which token it applies 7749 || ( ( $typer =~ /^(pp|mm)$/ ) && ( $tokenl !~ /^[\;\{\(\[]/ ) ) 7750 || ( ( $typel =~ /^(\+\+|\-\-)$/ ) && ( $tokenr !~ /^[\;\}\)\]]/ ) ) 7751 7752 # need space after foreach my; for example, this will fail in 7753 # older versions of Perl: 7754 # foreach my$ft(@filetypes)... 7755 || ( 7756 $tokenl eq 'my' 7757 7758 # /^(for|foreach)$/ 7759 && $is_for_foreach{$tokenll} 7760 && $tokenr =~ /^\$/ 7761 ) 7762 7763 # must have space between grep and left paren; "grep(" will fail 7764 || ( $tokenr eq '(' && $is_sort_grep_map{$tokenl} ) 7765 7766 # don't stick numbers next to left parens, as in: 7767 #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm) 7768 || ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) ) 7769 7770 # We must be sure that a space between a ? and a quoted string 7771 # remains if the space before the ? remains. [Loca.pm, lockarea] 7772 # ie, 7773 # $b=join $comma ? ',' : ':', @_; # ok 7774 # $b=join $comma?',' : ':', @_; # ok! 7775 # $b=join $comma ?',' : ':', @_; # error! 7776 # Not really required: 7777 ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) ) 7778 7779 # do not remove space between an '&' and a bare word because 7780 # it may turn into a function evaluation, like here 7781 # between '&' and 'O_ACCMODE', producing a syntax error [File.pm] 7782 # $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY); 7783 || ( ( $typel eq '&' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) ) 7784 7785 ; # the value of this long logic sequence is the result we want 7786 return $result; 7787 } 7788} 7789 7790sub set_white_space_flag { 7791 7792 # This routine examines each pair of nonblank tokens and 7793 # sets values for array @white_space_flag. 7794 # 7795 # $white_space_flag[$j] is a flag indicating whether a white space 7796 # BEFORE token $j is needed, with the following values: 7797 # 7798 # -1 do not want a space before token $j 7799 # 0 optional space or $j is a whitespace 7800 # 1 want a space before token $j 7801 # 7802 # 7803 # The values for the first token will be defined based 7804 # upon the contents of the "to_go" output array. 7805 # 7806 # Note: retain debug print statements because they are usually 7807 # required after adding new token types. 7808 7809 BEGIN { 7810 7811 # initialize these global hashes, which control the use of 7812 # whitespace around tokens: 7813 # 7814 # %binary_ws_rules 7815 # %want_left_space 7816 # %want_right_space 7817 # %space_after_keyword 7818 # 7819 # Many token types are identical to the tokens themselves. 7820 # See the tokenizer for a complete list. Here are some special types: 7821 # k = perl keyword 7822 # f = semicolon in for statement 7823 # m = unary minus 7824 # p = unary plus 7825 # Note that :: is excluded since it should be contained in an identifier 7826 # Note that '->' is excluded because it never gets space 7827 # parentheses and brackets are excluded since they are handled specially 7828 # curly braces are included but may be overridden by logic, such as 7829 # newline logic. 7830 7831 # NEW_TOKENS: create a whitespace rule here. This can be as 7832 # simple as adding your new letter to @spaces_both_sides, for 7833 # example. 7834 7835 @_ = qw" L { ( [ "; 7836 @is_opening_type{@_} = (1) x scalar(@_); 7837 7838 @_ = qw" R } ) ] "; 7839 @is_closing_type{@_} = (1) x scalar(@_); 7840 7841 my @spaces_both_sides = qw" 7842 + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -= 7843 .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~ 7844 &&= ||= //= <=> A k f w F n C Y U G v 7845 "; 7846 7847 my @spaces_left_side = qw" 7848 t ! ~ m p { \ h pp mm Z j 7849 "; 7850 push( @spaces_left_side, '#' ); # avoids warning message 7851 7852 my @spaces_right_side = qw" 7853 ; } ) ] R J ++ -- **= 7854 "; 7855 push( @spaces_right_side, ',' ); # avoids warning message 7856 @want_left_space{@spaces_both_sides} = (1) x scalar(@spaces_both_sides); 7857 @want_right_space{@spaces_both_sides} = 7858 (1) x scalar(@spaces_both_sides); 7859 @want_left_space{@spaces_left_side} = (1) x scalar(@spaces_left_side); 7860 @want_right_space{@spaces_left_side} = (-1) x scalar(@spaces_left_side); 7861 @want_left_space{@spaces_right_side} = 7862 (-1) x scalar(@spaces_right_side); 7863 @want_right_space{@spaces_right_side} = 7864 (1) x scalar(@spaces_right_side); 7865 $want_left_space{'L'} = WS_NO; 7866 $want_left_space{'->'} = WS_NO; 7867 $want_right_space{'->'} = WS_NO; 7868 $want_left_space{'**'} = WS_NO; 7869 $want_right_space{'**'} = WS_NO; 7870 7871 # hash type information must stay tightly bound 7872 # as in : ${xxxx} 7873 $binary_ws_rules{'i'}{'L'} = WS_NO; 7874 $binary_ws_rules{'i'}{'{'} = WS_YES; 7875 $binary_ws_rules{'k'}{'{'} = WS_YES; 7876 $binary_ws_rules{'U'}{'{'} = WS_YES; 7877 $binary_ws_rules{'i'}{'['} = WS_NO; 7878 $binary_ws_rules{'R'}{'L'} = WS_NO; 7879 $binary_ws_rules{'R'}{'{'} = WS_NO; 7880 $binary_ws_rules{'t'}{'L'} = WS_NO; 7881 $binary_ws_rules{'t'}{'{'} = WS_NO; 7882 $binary_ws_rules{'}'}{'L'} = WS_NO; 7883 $binary_ws_rules{'}'}{'{'} = WS_NO; 7884 $binary_ws_rules{'$'}{'L'} = WS_NO; 7885 $binary_ws_rules{'$'}{'{'} = WS_NO; 7886 $binary_ws_rules{'@'}{'L'} = WS_NO; 7887 $binary_ws_rules{'@'}{'{'} = WS_NO; 7888 $binary_ws_rules{'='}{'L'} = WS_YES; 7889 7890 # the following includes ') {' 7891 # as in : if ( xxx ) { yyy } 7892 $binary_ws_rules{']'}{'L'} = WS_NO; 7893 $binary_ws_rules{']'}{'{'} = WS_NO; 7894 $binary_ws_rules{')'}{'{'} = WS_YES; 7895 $binary_ws_rules{')'}{'['} = WS_NO; 7896 $binary_ws_rules{']'}{'['} = WS_NO; 7897 $binary_ws_rules{']'}{'{'} = WS_NO; 7898 $binary_ws_rules{'}'}{'['} = WS_NO; 7899 $binary_ws_rules{'R'}{'['} = WS_NO; 7900 7901 $binary_ws_rules{']'}{'++'} = WS_NO; 7902 $binary_ws_rules{']'}{'--'} = WS_NO; 7903 $binary_ws_rules{')'}{'++'} = WS_NO; 7904 $binary_ws_rules{')'}{'--'} = WS_NO; 7905 7906 $binary_ws_rules{'R'}{'++'} = WS_NO; 7907 $binary_ws_rules{'R'}{'--'} = WS_NO; 7908 7909 ######################################################## 7910 # should no longer be necessary (see niek.pl) 7911 ##$binary_ws_rules{'k'}{':'} = WS_NO; # keep colon with label 7912 ##$binary_ws_rules{'w'}{':'} = WS_NO; 7913 ######################################################## 7914 $binary_ws_rules{'i'}{'Q'} = WS_YES; 7915 $binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()' 7916 7917 # FIXME: we need to split 'i' into variables and functions 7918 # and have no space for functions but space for variables. For now, 7919 # I have a special patch in the special rules below 7920 $binary_ws_rules{'i'}{'('} = WS_NO; 7921 7922 $binary_ws_rules{'w'}{'('} = WS_NO; 7923 $binary_ws_rules{'w'}{'{'} = WS_YES; 7924 } 7925 my ( $jmax, $rtokens, $rtoken_type, $rblock_type ) = @_; 7926 my ( $last_token, $last_type, $last_block_type, $token, $type, 7927 $block_type ); 7928 my (@white_space_flag); 7929 my $j_tight_closing_paren = -1; 7930 7931 if ( $max_index_to_go >= 0 ) { 7932 $token = $tokens_to_go[$max_index_to_go]; 7933 $type = $types_to_go[$max_index_to_go]; 7934 $block_type = $block_type_to_go[$max_index_to_go]; 7935 } 7936 else { 7937 $token = ' '; 7938 $type = 'b'; 7939 $block_type = ''; 7940 } 7941 7942 # loop over all tokens 7943 my ( $j, $ws ); 7944 7945 for ( $j = 0 ; $j <= $jmax ; $j++ ) { 7946 7947 if ( $$rtoken_type[$j] eq 'b' ) { 7948 $white_space_flag[$j] = WS_OPTIONAL; 7949 next; 7950 } 7951 7952 # set a default value, to be changed as needed 7953 $ws = undef; 7954 $last_token = $token; 7955 $last_type = $type; 7956 $last_block_type = $block_type; 7957 $token = $$rtokens[$j]; 7958 $type = $$rtoken_type[$j]; 7959 $block_type = $$rblock_type[$j]; 7960 7961 #--------------------------------------------------------------- 7962 # section 1: 7963 # handle space on the inside of opening braces 7964 #--------------------------------------------------------------- 7965 7966 # /^[L\{\(\[]$/ 7967 if ( $is_opening_type{$last_type} ) { 7968 7969 $j_tight_closing_paren = -1; 7970 7971 # let's keep empty matched braces together: () {} [] 7972 # except for BLOCKS 7973 if ( $token eq $matching_token{$last_token} ) { 7974 if ($block_type) { 7975 $ws = WS_YES; 7976 } 7977 else { 7978 $ws = WS_NO; 7979 } 7980 } 7981 else { 7982 7983 # we're considering the right of an opening brace 7984 # tightness = 0 means always pad inside with space 7985 # tightness = 1 means pad inside if "complex" 7986 # tightness = 2 means never pad inside with space 7987 7988 my $tightness; 7989 if ( $last_type eq '{' 7990 && $last_token eq '{' 7991 && $last_block_type ) 7992 { 7993 $tightness = $rOpts_block_brace_tightness; 7994 } 7995 else { $tightness = $tightness{$last_token} } 7996 7997 #================================================================= 7998 # Patch for fabrice_bug.pl 7999 # We must always avoid spaces around a bare word beginning with ^ as in: 8000 # my $before = ${^PREMATCH}; 8001 # Because all of the following cause an error in perl: 8002 # my $before = ${ ^PREMATCH }; 8003 # my $before = ${ ^PREMATCH}; 8004 # my $before = ${^PREMATCH }; 8005 # So if brace tightness flag is -bt=0 we must temporarily reset to bt=1. 8006 # Note that here we must set tightness=1 and not 2 so that the closing space 8007 # is also avoided (via the $j_tight_closing_paren flag in coding) 8008 if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 } 8009 8010 #================================================================= 8011 8012 if ( $tightness <= 0 ) { 8013 $ws = WS_YES; 8014 } 8015 elsif ( $tightness > 1 ) { 8016 $ws = WS_NO; 8017 } 8018 else { 8019 8020 # Patch to count '-foo' as single token so that 8021 # each of $a{-foo} and $a{foo} and $a{'foo'} do 8022 # not get spaces with default formatting. 8023 my $j_here = $j; 8024 ++$j_here 8025 if ( $token eq '-' 8026 && $last_token eq '{' 8027 && $$rtoken_type[ $j + 1 ] eq 'w' ); 8028 8029 # $j_next is where a closing token should be if 8030 # the container has a single token 8031 my $j_next = 8032 ( $$rtoken_type[ $j_here + 1 ] eq 'b' ) 8033 ? $j_here + 2 8034 : $j_here + 1; 8035 my $tok_next = $$rtokens[$j_next]; 8036 my $type_next = $$rtoken_type[$j_next]; 8037 8038 # for tightness = 1, if there is just one token 8039 # within the matching pair, we will keep it tight 8040 if ( 8041 $tok_next eq $matching_token{$last_token} 8042 8043 # but watch out for this: [ [ ] (misc.t) 8044 && $last_token ne $token 8045 ) 8046 { 8047 8048 # remember where to put the space for the closing paren 8049 $j_tight_closing_paren = $j_next; 8050 $ws = WS_NO; 8051 } 8052 else { 8053 $ws = WS_YES; 8054 } 8055 } 8056 } 8057 } # done with opening braces and brackets 8058 my $ws_1 = $ws 8059 if FORMATTER_DEBUG_FLAG_WHITE; 8060 8061 #--------------------------------------------------------------- 8062 # section 2: 8063 # handle space on inside of closing brace pairs 8064 #--------------------------------------------------------------- 8065 8066 # /[\}\)\]R]/ 8067 if ( $is_closing_type{$type} ) { 8068 8069 if ( $j == $j_tight_closing_paren ) { 8070 8071 $j_tight_closing_paren = -1; 8072 $ws = WS_NO; 8073 } 8074 else { 8075 8076 if ( !defined($ws) ) { 8077 8078 my $tightness; 8079 if ( $type eq '}' && $token eq '}' && $block_type ) { 8080 $tightness = $rOpts_block_brace_tightness; 8081 } 8082 else { $tightness = $tightness{$token} } 8083 8084 $ws = ( $tightness > 1 ) ? WS_NO : WS_YES; 8085 } 8086 } 8087 } 8088 8089 my $ws_2 = $ws 8090 if FORMATTER_DEBUG_FLAG_WHITE; 8091 8092 #--------------------------------------------------------------- 8093 # section 3: 8094 # use the binary table 8095 #--------------------------------------------------------------- 8096 if ( !defined($ws) ) { 8097 $ws = $binary_ws_rules{$last_type}{$type}; 8098 } 8099 my $ws_3 = $ws 8100 if FORMATTER_DEBUG_FLAG_WHITE; 8101 8102 #--------------------------------------------------------------- 8103 # section 4: 8104 # some special cases 8105 #--------------------------------------------------------------- 8106 if ( $token eq '(' ) { 8107 8108 # This will have to be tweaked as tokenization changes. 8109 # We usually want a space at '} (', for example: 8110 # map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s ); 8111 # 8112 # But not others: 8113 # &{ $_->[1] }( delete $_[$#_]{ $_->[0] } ); 8114 # At present, the above & block is marked as type L/R so this case 8115 # won't go through here. 8116 if ( $last_type eq '}' ) { $ws = WS_YES } 8117 8118 # NOTE: some older versions of Perl had occasional problems if 8119 # spaces are introduced between keywords or functions and opening 8120 # parens. So the default is not to do this except is certain 8121 # cases. The current Perl seems to tolerate spaces. 8122 8123 # Space between keyword and '(' 8124 elsif ( $last_type eq 'k' ) { 8125 $ws = WS_NO 8126 unless ( $rOpts_space_keyword_paren 8127 || $space_after_keyword{$last_token} ); 8128 } 8129 8130 # Space between function and '(' 8131 # ----------------------------------------------------- 8132 # 'w' and 'i' checks for something like: 8133 # myfun( &myfun( ->myfun( 8134 # ----------------------------------------------------- 8135 elsif (( $last_type =~ /^[wUG]$/ ) 8136 || ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) ) 8137 { 8138 $ws = WS_NO unless ($rOpts_space_function_paren); 8139 } 8140 8141 # space between something like $i and ( in 8142 # for $i ( 0 .. 20 ) { 8143 # FIXME: eventually, type 'i' needs to be split into multiple 8144 # token types so this can be a hardwired rule. 8145 elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) { 8146 $ws = WS_YES; 8147 } 8148 8149 # allow constant function followed by '()' to retain no space 8150 elsif ( $last_type eq 'C' && $$rtokens[ $j + 1 ] eq ')' ) { 8151 $ws = WS_NO; 8152 } 8153 } 8154 8155 # patch for SWITCH/CASE: make space at ']{' optional 8156 # since the '{' might begin a case or when block 8157 elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) { 8158 $ws = WS_OPTIONAL; 8159 } 8160 8161 # keep space between 'sub' and '{' for anonymous sub definition 8162 if ( $type eq '{' ) { 8163 if ( $last_token eq 'sub' ) { 8164 $ws = WS_YES; 8165 } 8166 8167 # this is needed to avoid no space in '){' 8168 if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES } 8169 8170 # avoid any space before the brace or bracket in something like 8171 # @opts{'a','b',...} 8172 if ( $last_type eq 'i' && $last_token =~ /^\@/ ) { 8173 $ws = WS_NO; 8174 } 8175 } 8176 8177 elsif ( $type eq 'i' ) { 8178 8179 # never a space before -> 8180 if ( $token =~ /^\-\>/ ) { 8181 $ws = WS_NO; 8182 } 8183 } 8184 8185 # retain any space between '-' and bare word 8186 elsif ( $type eq 'w' || $type eq 'C' ) { 8187 $ws = WS_OPTIONAL if $last_type eq '-'; 8188 8189 # never a space before -> 8190 if ( $token =~ /^\-\>/ ) { 8191 $ws = WS_NO; 8192 } 8193 } 8194 8195 # retain any space between '-' and bare word 8196 # example: avoid space between 'USER' and '-' here: 8197 # $myhash{USER-NAME}='steve'; 8198 elsif ( $type eq 'm' || $type eq '-' ) { 8199 $ws = WS_OPTIONAL if ( $last_type eq 'w' ); 8200 } 8201 8202 # always space before side comment 8203 elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 } 8204 8205 # always preserver whatever space was used after a possible 8206 # filehandle (except _) or here doc operator 8207 if ( 8208 $type ne '#' 8209 && ( ( $last_type eq 'Z' && $last_token ne '_' ) 8210 || $last_type eq 'h' ) 8211 ) 8212 { 8213 $ws = WS_OPTIONAL; 8214 } 8215 8216 my $ws_4 = $ws 8217 if FORMATTER_DEBUG_FLAG_WHITE; 8218 8219 #--------------------------------------------------------------- 8220 # section 5: 8221 # default rules not covered above 8222 #--------------------------------------------------------------- 8223 # if we fall through to here, 8224 # look at the pre-defined hash tables for the two tokens, and 8225 # if (they are equal) use the common value 8226 # if (either is zero or undef) use the other 8227 # if (either is -1) use it 8228 # That is, 8229 # left vs right 8230 # 1 vs 1 --> 1 8231 # 0 vs 0 --> 0 8232 # -1 vs -1 --> -1 8233 # 8234 # 0 vs -1 --> -1 8235 # 0 vs 1 --> 1 8236 # 1 vs 0 --> 1 8237 # -1 vs 0 --> -1 8238 # 8239 # -1 vs 1 --> -1 8240 # 1 vs -1 --> -1 8241 if ( !defined($ws) ) { 8242 my $wl = $want_left_space{$type}; 8243 my $wr = $want_right_space{$last_type}; 8244 if ( !defined($wl) ) { $wl = 0 } 8245 if ( !defined($wr) ) { $wr = 0 } 8246 $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr; 8247 } 8248 8249 if ( !defined($ws) ) { 8250 $ws = 0; 8251 write_diagnostics( 8252 "WS flag is undefined for tokens $last_token $token\n"); 8253 } 8254 8255 # Treat newline as a whitespace. Otherwise, we might combine 8256 # 'Send' and '-recipients' here according to the above rules: 8257 # my $msg = new Fax::Send 8258 # -recipients => $to, 8259 # -data => $data; 8260 if ( $ws == 0 && $j == 0 ) { $ws = 1 } 8261 8262 if ( ( $ws == 0 ) 8263 && $j > 0 8264 && $j < $jmax 8265 && ( $last_type !~ /^[Zh]$/ ) ) 8266 { 8267 8268 # If this happens, we have a non-fatal but undesirable 8269 # hole in the above rules which should be patched. 8270 write_diagnostics( 8271 "WS flag is zero for tokens $last_token $token\n"); 8272 } 8273 $white_space_flag[$j] = $ws; 8274 8275 FORMATTER_DEBUG_FLAG_WHITE && do { 8276 my $str = substr( $last_token, 0, 15 ); 8277 $str .= ' ' x ( 16 - length($str) ); 8278 if ( !defined($ws_1) ) { $ws_1 = "*" } 8279 if ( !defined($ws_2) ) { $ws_2 = "*" } 8280 if ( !defined($ws_3) ) { $ws_3 = "*" } 8281 if ( !defined($ws_4) ) { $ws_4 = "*" } 8282 print 8283"WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n"; 8284 }; 8285 } 8286 return \@white_space_flag; 8287} 8288 8289{ # begin print_line_of_tokens 8290 8291 my $rtoken_type; 8292 my $rtokens; 8293 my $rlevels; 8294 my $rslevels; 8295 my $rblock_type; 8296 my $rcontainer_type; 8297 my $rcontainer_environment; 8298 my $rtype_sequence; 8299 my $input_line; 8300 my $rnesting_tokens; 8301 my $rci_levels; 8302 my $rnesting_blocks; 8303 8304 my $in_quote; 8305 my $python_indentation_level; 8306 8307 # These local token variables are stored by store_token_to_go: 8308 my $block_type; 8309 my $ci_level; 8310 my $container_environment; 8311 my $container_type; 8312 my $in_continued_quote; 8313 my $level; 8314 my $nesting_blocks; 8315 my $no_internal_newlines; 8316 my $slevel; 8317 my $token; 8318 my $type; 8319 my $type_sequence; 8320 8321 # routine to pull the jth token from the line of tokens 8322 sub extract_token { 8323 my $j = shift; 8324 $token = $$rtokens[$j]; 8325 $type = $$rtoken_type[$j]; 8326 $block_type = $$rblock_type[$j]; 8327 $container_type = $$rcontainer_type[$j]; 8328 $container_environment = $$rcontainer_environment[$j]; 8329 $type_sequence = $$rtype_sequence[$j]; 8330 $level = $$rlevels[$j]; 8331 $slevel = $$rslevels[$j]; 8332 $nesting_blocks = $$rnesting_blocks[$j]; 8333 $ci_level = $$rci_levels[$j]; 8334 } 8335 8336 { 8337 my @saved_token; 8338 8339 sub save_current_token { 8340 8341 @saved_token = ( 8342 $block_type, $ci_level, 8343 $container_environment, $container_type, 8344 $in_continued_quote, $level, 8345 $nesting_blocks, $no_internal_newlines, 8346 $slevel, $token, 8347 $type, $type_sequence, 8348 ); 8349 } 8350 8351 sub restore_current_token { 8352 ( 8353 $block_type, $ci_level, 8354 $container_environment, $container_type, 8355 $in_continued_quote, $level, 8356 $nesting_blocks, $no_internal_newlines, 8357 $slevel, $token, 8358 $type, $type_sequence, 8359 ) = @saved_token; 8360 } 8361 } 8362 8363 # Routine to place the current token into the output stream. 8364 # Called once per output token. 8365 sub store_token_to_go { 8366 8367 my $flag = $no_internal_newlines; 8368 if ( $_[0] ) { $flag = 1 } 8369 8370 $tokens_to_go[ ++$max_index_to_go ] = $token; 8371 $types_to_go[$max_index_to_go] = $type; 8372 $nobreak_to_go[$max_index_to_go] = $flag; 8373 $old_breakpoint_to_go[$max_index_to_go] = 0; 8374 $forced_breakpoint_to_go[$max_index_to_go] = 0; 8375 $block_type_to_go[$max_index_to_go] = $block_type; 8376 $type_sequence_to_go[$max_index_to_go] = $type_sequence; 8377 $container_environment_to_go[$max_index_to_go] = $container_environment; 8378 $nesting_blocks_to_go[$max_index_to_go] = $nesting_blocks; 8379 $ci_levels_to_go[$max_index_to_go] = $ci_level; 8380 $mate_index_to_go[$max_index_to_go] = -1; 8381 $matching_token_to_go[$max_index_to_go] = ''; 8382 $bond_strength_to_go[$max_index_to_go] = 0; 8383 8384 # Note: negative levels are currently retained as a diagnostic so that 8385 # the 'final indentation level' is correctly reported for bad scripts. 8386 # But this means that every use of $level as an index must be checked. 8387 # If this becomes too much of a problem, we might give up and just clip 8388 # them at zero. 8389 ## $levels_to_go[$max_index_to_go] = ( $level > 0 ) ? $level : 0; 8390 $levels_to_go[$max_index_to_go] = $level; 8391 $nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0; 8392 $lengths_to_go[ $max_index_to_go + 1 ] = 8393 $lengths_to_go[$max_index_to_go] + length($token); 8394 8395 # Define the indentation that this token would have if it started 8396 # a new line. We have to do this now because we need to know this 8397 # when considering one-line blocks. 8398 set_leading_whitespace( $level, $ci_level, $in_continued_quote ); 8399 8400 if ( $type ne 'b' ) { 8401 $last_last_nonblank_index_to_go = $last_nonblank_index_to_go; 8402 $last_last_nonblank_type_to_go = $last_nonblank_type_to_go; 8403 $last_last_nonblank_token_to_go = $last_nonblank_token_to_go; 8404 $last_nonblank_index_to_go = $max_index_to_go; 8405 $last_nonblank_type_to_go = $type; 8406 $last_nonblank_token_to_go = $token; 8407 if ( $type eq ',' ) { 8408 $comma_count_in_batch++; 8409 } 8410 } 8411 8412 FORMATTER_DEBUG_FLAG_STORE && do { 8413 my ( $a, $b, $c ) = caller(); 8414 print 8415"STORE: from $a $c: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n"; 8416 }; 8417 } 8418 8419 sub insert_new_token_to_go { 8420 8421 # insert a new token into the output stream. use same level as 8422 # previous token; assumes a character at max_index_to_go. 8423 save_current_token(); 8424 ( $token, $type, $slevel, $no_internal_newlines ) = @_; 8425 8426 if ( $max_index_to_go == UNDEFINED_INDEX ) { 8427 warning("code bug: bad call to insert_new_token_to_go\n"); 8428 } 8429 $level = $levels_to_go[$max_index_to_go]; 8430 8431 # FIXME: it seems to be necessary to use the next, rather than 8432 # previous, value of this variable when creating a new blank (align.t) 8433 #my $slevel = $nesting_depth_to_go[$max_index_to_go]; 8434 $nesting_blocks = $nesting_blocks_to_go[$max_index_to_go]; 8435 $ci_level = $ci_levels_to_go[$max_index_to_go]; 8436 $container_environment = $container_environment_to_go[$max_index_to_go]; 8437 $in_continued_quote = 0; 8438 $block_type = ""; 8439 $type_sequence = ""; 8440 store_token_to_go(); 8441 restore_current_token(); 8442 return; 8443 } 8444 8445 sub print_line_of_tokens { 8446 8447 my $line_of_tokens = shift; 8448 8449 # This routine is called once per input line to process all of 8450 # the tokens on that line. This is the first stage of 8451 # beautification. 8452 # 8453 # Full-line comments and blank lines may be processed immediately. 8454 # 8455 # For normal lines of code, the tokens are stored one-by-one, 8456 # via calls to 'sub store_token_to_go', until a known line break 8457 # point is reached. Then, the batch of collected tokens is 8458 # passed along to 'sub output_line_to_go' for further 8459 # processing. This routine decides if there should be 8460 # whitespace between each pair of non-white tokens, so later 8461 # routines only need to decide on any additional line breaks. 8462 # Any whitespace is initally a single space character. Later, 8463 # the vertical aligner may expand that to be multiple space 8464 # characters if necessary for alignment. 8465 8466 # extract input line number for error messages 8467 $input_line_number = $line_of_tokens->{_line_number}; 8468 8469 $rtoken_type = $line_of_tokens->{_rtoken_type}; 8470 $rtokens = $line_of_tokens->{_rtokens}; 8471 $rlevels = $line_of_tokens->{_rlevels}; 8472 $rslevels = $line_of_tokens->{_rslevels}; 8473 $rblock_type = $line_of_tokens->{_rblock_type}; 8474 $rcontainer_type = $line_of_tokens->{_rcontainer_type}; 8475 $rcontainer_environment = $line_of_tokens->{_rcontainer_environment}; 8476 $rtype_sequence = $line_of_tokens->{_rtype_sequence}; 8477 $input_line = $line_of_tokens->{_line_text}; 8478 $rnesting_tokens = $line_of_tokens->{_rnesting_tokens}; 8479 $rci_levels = $line_of_tokens->{_rci_levels}; 8480 $rnesting_blocks = $line_of_tokens->{_rnesting_blocks}; 8481 8482 $in_continued_quote = $starting_in_quote = 8483 $line_of_tokens->{_starting_in_quote}; 8484 $in_quote = $line_of_tokens->{_ending_in_quote}; 8485 $ending_in_quote = $in_quote; 8486 $python_indentation_level = 8487 $line_of_tokens->{_python_indentation_level}; 8488 8489 my $j; 8490 my $j_next; 8491 my $jmax; 8492 my $next_nonblank_token; 8493 my $next_nonblank_token_type; 8494 my $rwhite_space_flag; 8495 8496 $jmax = @$rtokens - 1; 8497 $block_type = ""; 8498 $container_type = ""; 8499 $container_environment = ""; 8500 $type_sequence = ""; 8501 $no_internal_newlines = 1 - $rOpts_add_newlines; 8502 $is_static_block_comment = 0; 8503 8504 # Handle a continued quote.. 8505 if ($in_continued_quote) { 8506 8507 # A line which is entirely a quote or pattern must go out 8508 # verbatim. Note: the \n is contained in $input_line. 8509 if ( $jmax <= 0 ) { 8510 if ( ( $input_line =~ "\t" ) ) { 8511 note_embedded_tab(); 8512 } 8513 write_unindented_line("$input_line"); 8514 $last_line_had_side_comment = 0; 8515 return; 8516 } 8517 8518 # prior to version 20010406, perltidy had a bug which placed 8519 # continuation indentation before the last line of some multiline 8520 # quotes and patterns -- exactly the lines passing this way. 8521 # To help find affected lines in scripts run with these 8522 # versions, run with '-chk', and it will warn of any quotes or 8523 # patterns which might have been modified by these early 8524 # versions. 8525 if ( $rOpts->{'check-multiline-quotes'} && $input_line =~ /^ / ) { 8526 warning( 8527"-chk: please check this line for extra leading whitespace\n" 8528 ); 8529 } 8530 } 8531 8532 # Write line verbatim if we are in a formatting skip section 8533 if ($in_format_skipping_section) { 8534 write_unindented_line("$input_line"); 8535 $last_line_had_side_comment = 0; 8536 8537 # Note: extra space appended to comment simplifies pattern matching 8538 if ( $jmax == 0 8539 && $$rtoken_type[0] eq '#' 8540 && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_end/o ) 8541 { 8542 $in_format_skipping_section = 0; 8543 write_logfile_entry("Exiting formatting skip section\n"); 8544 } 8545 return; 8546 } 8547 8548 # See if we are entering a formatting skip section 8549 if ( $rOpts_format_skipping 8550 && $jmax == 0 8551 && $$rtoken_type[0] eq '#' 8552 && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_begin/o ) 8553 { 8554 flush(); 8555 $in_format_skipping_section = 1; 8556 write_logfile_entry("Entering formatting skip section\n"); 8557 write_unindented_line("$input_line"); 8558 $last_line_had_side_comment = 0; 8559 return; 8560 } 8561 8562 # delete trailing blank tokens 8563 if ( $jmax > 0 && $$rtoken_type[$jmax] eq 'b' ) { $jmax-- } 8564 8565 # Handle a blank line.. 8566 if ( $jmax < 0 ) { 8567 8568 # If keep-old-blank-lines is zero, we delete all 8569 # old blank lines and let the blank line rules generate any 8570 # needed blanks. 8571 if ($rOpts_keep_old_blank_lines) { 8572 flush(); 8573 $file_writer_object->write_blank_code_line( 8574 $rOpts_keep_old_blank_lines == 2 ); 8575 $last_line_leading_type = 'b'; 8576 } 8577 $last_line_had_side_comment = 0; 8578 return; 8579 } 8580 8581 # see if this is a static block comment (starts with ## by default) 8582 my $is_static_block_comment_without_leading_space = 0; 8583 if ( $jmax == 0 8584 && $$rtoken_type[0] eq '#' 8585 && $rOpts->{'static-block-comments'} 8586 && $input_line =~ /$static_block_comment_pattern/o ) 8587 { 8588 $is_static_block_comment = 1; 8589 $is_static_block_comment_without_leading_space = 8590 substr( $input_line, 0, 1 ) eq '#'; 8591 } 8592 8593 # Check for comments which are line directives 8594 # Treat exactly as static block comments without leading space 8595 # reference: perlsyn, near end, section Plain Old Comments (Not!) 8596 # example: '# line 42 "new_filename.plx"' 8597 if ( 8598 $jmax == 0 8599 && $$rtoken_type[0] eq '#' 8600 && $input_line =~ /^\# \s* 8601 line \s+ (\d+) \s* 8602 (?:\s("?)([^"]+)\2)? \s* 8603 $/x 8604 ) 8605 { 8606 $is_static_block_comment = 1; 8607 $is_static_block_comment_without_leading_space = 1; 8608 } 8609 8610 # create a hanging side comment if appropriate 8611 if ( 8612 $jmax == 0 8613 && $$rtoken_type[0] eq '#' # only token is a comment 8614 && $last_line_had_side_comment # last line had side comment 8615 && $input_line =~ /^\s/ # there is some leading space 8616 && !$is_static_block_comment # do not make static comment hanging 8617 && $rOpts->{'hanging-side-comments'} # user is allowing this 8618 ) 8619 { 8620 8621 # We will insert an empty qw string at the start of the token list 8622 # to force this comment to be a side comment. The vertical aligner 8623 # should then line it up with the previous side comment. 8624 unshift @$rtoken_type, 'q'; 8625 unshift @$rtokens, ''; 8626 unshift @$rlevels, $$rlevels[0]; 8627 unshift @$rslevels, $$rslevels[0]; 8628 unshift @$rblock_type, ''; 8629 unshift @$rcontainer_type, ''; 8630 unshift @$rcontainer_environment, ''; 8631 unshift @$rtype_sequence, ''; 8632 unshift @$rnesting_tokens, $$rnesting_tokens[0]; 8633 unshift @$rci_levels, $$rci_levels[0]; 8634 unshift @$rnesting_blocks, $$rnesting_blocks[0]; 8635 $jmax = 1; 8636 } 8637 8638 # remember if this line has a side comment 8639 $last_line_had_side_comment = 8640 ( $jmax > 0 && $$rtoken_type[$jmax] eq '#' ); 8641 8642 # Handle a block (full-line) comment.. 8643 if ( ( $jmax == 0 ) && ( $$rtoken_type[0] eq '#' ) ) { 8644 8645 if ( $rOpts->{'delete-block-comments'} ) { return } 8646 8647 if ( $rOpts->{'tee-block-comments'} ) { 8648 $file_writer_object->tee_on(); 8649 } 8650 8651 destroy_one_line_block(); 8652 output_line_to_go(); 8653 8654 # output a blank line before block comments 8655 if ( 8656 $last_line_leading_type !~ /^[#b]$/ 8657 && $rOpts->{'blanks-before-comments'} # only if allowed 8658 && ! 8659 $is_static_block_comment # never before static block comments 8660 ) 8661 { 8662 flush(); # switching to new output stream 8663 $file_writer_object->write_blank_code_line(); 8664 $last_line_leading_type = 'b'; 8665 } 8666 8667 # TRIM COMMENTS -- This could be turned off as a option 8668 $$rtokens[0] =~ s/\s*$//; # trim right end 8669 8670 if ( 8671 $rOpts->{'indent-block-comments'} 8672 && ( !$rOpts->{'indent-spaced-block-comments'} 8673 || $input_line =~ /^\s+/ ) 8674 && !$is_static_block_comment_without_leading_space 8675 ) 8676 { 8677 extract_token(0); 8678 store_token_to_go(); 8679 output_line_to_go(); 8680 } 8681 else { 8682 flush(); # switching to new output stream 8683 $file_writer_object->write_code_line( $$rtokens[0] . "\n" ); 8684 $last_line_leading_type = '#'; 8685 } 8686 if ( $rOpts->{'tee-block-comments'} ) { 8687 $file_writer_object->tee_off(); 8688 } 8689 return; 8690 } 8691 8692 # compare input/output indentation except for continuation lines 8693 # (because they have an unknown amount of initial blank space) 8694 # and lines which are quotes (because they may have been outdented) 8695 # Note: this test is placed here because we know the continuation flag 8696 # at this point, which allows us to avoid non-meaningful checks. 8697 my $structural_indentation_level = $$rlevels[0]; 8698 compare_indentation_levels( $python_indentation_level, 8699 $structural_indentation_level ) 8700 unless ( $python_indentation_level < 0 8701 || ( $$rci_levels[0] > 0 ) 8702 || ( ( $python_indentation_level == 0 ) && $$rtoken_type[0] eq 'Q' ) 8703 ); 8704 8705 # Patch needed for MakeMaker. Do not break a statement 8706 # in which $VERSION may be calculated. See MakeMaker.pm; 8707 # this is based on the coding in it. 8708 # The first line of a file that matches this will be eval'd: 8709 # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ 8710 # Examples: 8711 # *VERSION = \'1.01'; 8712 # ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/; 8713 # We will pass such a line straight through without breaking 8714 # it unless -npvl is used 8715 8716 my $is_VERSION_statement = 0; 8717 8718 if ( 8719 !$saw_VERSION_in_this_file 8720 && $input_line =~ /VERSION/ # quick check to reject most lines 8721 && $input_line =~ /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ 8722 ) 8723 { 8724 $saw_VERSION_in_this_file = 1; 8725 $is_VERSION_statement = 1; 8726 write_logfile_entry("passing VERSION line; -npvl deactivates\n"); 8727 $no_internal_newlines = 1; 8728 } 8729 8730 # take care of indentation-only 8731 # NOTE: In previous versions we sent all qw lines out immediately here. 8732 # No longer doing this: also write a line which is entirely a 'qw' list 8733 # to allow stacking of opening and closing tokens. Note that interior 8734 # qw lines will still go out at the end of this routine. 8735 if ( $rOpts->{'indent-only'} ) { 8736 flush(); 8737 trim($input_line); 8738 8739 extract_token(0); 8740 $token = $input_line; 8741 $type = 'q'; 8742 $block_type = ""; 8743 $container_type = ""; 8744 $container_environment = ""; 8745 $type_sequence = ""; 8746 store_token_to_go(); 8747 output_line_to_go(); 8748 return; 8749 } 8750 8751 push( @$rtokens, ' ', ' ' ); # making $j+2 valid simplifies coding 8752 push( @$rtoken_type, 'b', 'b' ); 8753 ($rwhite_space_flag) = 8754 set_white_space_flag( $jmax, $rtokens, $rtoken_type, $rblock_type ); 8755 8756 # find input tabbing to allow checks for tabbing disagreement 8757 ## not used for now 8758 ##$input_line_tabbing = ""; 8759 ##if ( $input_line =~ /^(\s*)/ ) { $input_line_tabbing = $1; } 8760 8761 # if the buffer hasn't been flushed, add a leading space if 8762 # necessary to keep essential whitespace. This is really only 8763 # necessary if we are squeezing out all ws. 8764 if ( $max_index_to_go >= 0 ) { 8765 8766 $old_line_count_in_batch++; 8767 8768 if ( 8769 is_essential_whitespace( 8770 $last_last_nonblank_token, 8771 $last_last_nonblank_type, 8772 $tokens_to_go[$max_index_to_go], 8773 $types_to_go[$max_index_to_go], 8774 $$rtokens[0], 8775 $$rtoken_type[0] 8776 ) 8777 ) 8778 { 8779 my $slevel = $$rslevels[0]; 8780 insert_new_token_to_go( ' ', 'b', $slevel, 8781 $no_internal_newlines ); 8782 } 8783 } 8784 8785 # If we just saw the end of an elsif block, write nag message 8786 # if we do not see another elseif or an else. 8787 if ($looking_for_else) { 8788 8789 unless ( $$rtokens[0] =~ /^(elsif|else)$/ ) { 8790 write_logfile_entry("(No else block)\n"); 8791 } 8792 $looking_for_else = 0; 8793 } 8794 8795 # This is a good place to kill incomplete one-line blocks 8796 if ( ( $semicolons_before_block_self_destruct == 0 ) 8797 && ( $max_index_to_go >= 0 ) 8798 && ( $types_to_go[$max_index_to_go] eq ';' ) 8799 && ( $$rtokens[0] ne '}' ) ) 8800 { 8801 destroy_one_line_block(); 8802 output_line_to_go(); 8803 } 8804 8805 # loop to process the tokens one-by-one 8806 $type = 'b'; 8807 $token = ""; 8808 8809 foreach $j ( 0 .. $jmax ) { 8810 8811 # pull out the local values for this token 8812 extract_token($j); 8813 8814 if ( $type eq '#' ) { 8815 8816 # trim trailing whitespace 8817 # (there is no option at present to prevent this) 8818 $token =~ s/\s*$//; 8819 8820 if ( 8821 $rOpts->{'delete-side-comments'} 8822 8823 # delete closing side comments if necessary 8824 || ( $rOpts->{'delete-closing-side-comments'} 8825 && $token =~ /$closing_side_comment_prefix_pattern/o 8826 && $last_nonblank_block_type =~ 8827 /$closing_side_comment_list_pattern/o ) 8828 ) 8829 { 8830 if ( $types_to_go[$max_index_to_go] eq 'b' ) { 8831 unstore_token_to_go(); 8832 } 8833 last; 8834 } 8835 } 8836 8837 # If we are continuing after seeing a right curly brace, flush 8838 # buffer unless we see what we are looking for, as in 8839 # } else ... 8840 if ( $rbrace_follower && $type ne 'b' ) { 8841 8842 unless ( $rbrace_follower->{$token} ) { 8843 output_line_to_go(); 8844 } 8845 $rbrace_follower = undef; 8846 } 8847 8848 $j_next = ( $$rtoken_type[ $j + 1 ] eq 'b' ) ? $j + 2 : $j + 1; 8849 $next_nonblank_token = $$rtokens[$j_next]; 8850 $next_nonblank_token_type = $$rtoken_type[$j_next]; 8851 8852 #-------------------------------------------------------- 8853 # Start of section to patch token text 8854 #-------------------------------------------------------- 8855 8856 # Modify certain tokens here for whitespace 8857 # The following is not yet done, but could be: 8858 # sub (x x x) 8859 if ( $type =~ /^[wit]$/ ) { 8860 8861 # Examples: 8862 # change '$ var' to '$var' etc 8863 # '-> new' to '->new' 8864 if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) { 8865 $token =~ s/\s*//g; 8866 } 8867 8868 if ( $token =~ /^sub/ ) { $token =~ s/\s+/ /g } 8869 } 8870 8871 # change 'LABEL :' to 'LABEL:' 8872 elsif ( $type eq 'J' ) { $token =~ s/\s+//g } 8873 8874 # patch to add space to something like "x10" 8875 # This avoids having to split this token in the pre-tokenizer 8876 elsif ( $type eq 'n' ) { 8877 if ( $token =~ /^x\d+/ ) { $token =~ s/x/x / } 8878 } 8879 8880 elsif ( $type eq 'Q' ) { 8881 note_embedded_tab() if ( $token =~ "\t" ); 8882 8883 # make note of something like '$var = s/xxx/yyy/;' 8884 # in case it should have been '$var =~ s/xxx/yyy/;' 8885 if ( 8886 $token =~ /^(s|tr|y|m|\/)/ 8887 && $last_nonblank_token =~ /^(=|==|!=)$/ 8888 8889 # precededed by simple scalar 8890 && $last_last_nonblank_type eq 'i' 8891 && $last_last_nonblank_token =~ /^\$/ 8892 8893 # followed by some kind of termination 8894 # (but give complaint if we can's see far enough ahead) 8895 && $next_nonblank_token =~ /^[; \)\}]$/ 8896 8897 # scalar is not decleared 8898 && !( 8899 $types_to_go[0] eq 'k' 8900 && $tokens_to_go[0] =~ /^(my|our|local)$/ 8901 ) 8902 ) 8903 { 8904 my $guess = substr( $last_nonblank_token, 0, 1 ) . '~'; 8905 complain( 8906"Note: be sure you want '$last_nonblank_token' instead of '$guess' here\n" 8907 ); 8908 } 8909 } 8910 8911 # trim blanks from right of qw quotes 8912 # (To avoid trimming qw quotes use -ntqw; the tokenizer handles this) 8913 elsif ( $type eq 'q' ) { 8914 $token =~ s/\s*$//; 8915 note_embedded_tab() if ( $token =~ "\t" ); 8916 } 8917 8918 #-------------------------------------------------------- 8919 # End of section to patch token text 8920 #-------------------------------------------------------- 8921 8922 # insert any needed whitespace 8923 if ( ( $type ne 'b' ) 8924 && ( $max_index_to_go >= 0 ) 8925 && ( $types_to_go[$max_index_to_go] ne 'b' ) 8926 && $rOpts_add_whitespace ) 8927 { 8928 my $ws = $$rwhite_space_flag[$j]; 8929 8930 if ( $ws == 1 ) { 8931 insert_new_token_to_go( ' ', 'b', $slevel, 8932 $no_internal_newlines ); 8933 } 8934 } 8935 8936 # Do not allow breaks which would promote a side comment to a 8937 # block comment. In order to allow a break before an opening 8938 # or closing BLOCK, followed by a side comment, those sections 8939 # of code will handle this flag separately. 8940 my $side_comment_follows = ( $next_nonblank_token_type eq '#' ); 8941 my $is_opening_BLOCK = 8942 ( $type eq '{' 8943 && $token eq '{' 8944 && $block_type 8945 && $block_type ne 't' ); 8946 my $is_closing_BLOCK = 8947 ( $type eq '}' 8948 && $token eq '}' 8949 && $block_type 8950 && $block_type ne 't' ); 8951 8952 if ( $side_comment_follows 8953 && !$is_opening_BLOCK 8954 && !$is_closing_BLOCK ) 8955 { 8956 $no_internal_newlines = 1; 8957 } 8958 8959 # We're only going to handle breaking for code BLOCKS at this 8960 # (top) level. Other indentation breaks will be handled by 8961 # sub scan_list, which is better suited to dealing with them. 8962 if ($is_opening_BLOCK) { 8963 8964 # Tentatively output this token. This is required before 8965 # calling starting_one_line_block. We may have to unstore 8966 # it, though, if we have to break before it. 8967 store_token_to_go($side_comment_follows); 8968 8969 # Look ahead to see if we might form a one-line block 8970 my $too_long = 8971 starting_one_line_block( $j, $jmax, $level, $slevel, 8972 $ci_level, $rtokens, $rtoken_type, $rblock_type ); 8973 clear_breakpoint_undo_stack(); 8974 8975 # to simplify the logic below, set a flag to indicate if 8976 # this opening brace is far from the keyword which introduces it 8977 my $keyword_on_same_line = 1; 8978 if ( ( $max_index_to_go >= 0 ) 8979 && ( $last_nonblank_type eq ')' ) ) 8980 { 8981 if ( $block_type =~ /^(if|else|elsif)$/ 8982 && ( $tokens_to_go[0] eq '}' ) 8983 && $rOpts_cuddled_else ) 8984 { 8985 $keyword_on_same_line = 1; 8986 } 8987 elsif ( ( $slevel < $nesting_depth_to_go[0] ) || $too_long ) 8988 { 8989 $keyword_on_same_line = 0; 8990 } 8991 } 8992 8993 # decide if user requested break before '{' 8994 my $want_break = 8995 8996 # use -bl flag if not a sub block of any type 8997 $block_type !~ /^sub/ 8998 ? $rOpts->{'opening-brace-on-new-line'} 8999 9000 # use -sbl flag for a named sub block 9001 : $block_type !~ /^sub\W*$/ 9002 ? $rOpts->{'opening-sub-brace-on-new-line'} 9003 9004 # use -asbl flag for an anonymous sub block 9005 : $rOpts->{'opening-anonymous-sub-brace-on-new-line'}; 9006 9007 # Break before an opening '{' ... 9008 if ( 9009 9010 # if requested 9011 $want_break 9012 9013 # and we were unable to start looking for a block, 9014 && $index_start_one_line_block == UNDEFINED_INDEX 9015 9016 # or if it will not be on same line as its keyword, so that 9017 # it will be outdented (eval.t, overload.t), and the user 9018 # has not insisted on keeping it on the right 9019 || ( !$keyword_on_same_line 9020 && !$rOpts->{'opening-brace-always-on-right'} ) 9021 9022 ) 9023 { 9024 9025 # but only if allowed 9026 unless ($no_internal_newlines) { 9027 9028 # since we already stored this token, we must unstore it 9029 unstore_token_to_go(); 9030 9031 # then output the line 9032 output_line_to_go(); 9033 9034 # and now store this token at the start of a new line 9035 store_token_to_go($side_comment_follows); 9036 } 9037 } 9038 9039 # Now update for side comment 9040 if ($side_comment_follows) { $no_internal_newlines = 1 } 9041 9042 # now output this line 9043 unless ($no_internal_newlines) { 9044 output_line_to_go(); 9045 } 9046 } 9047 9048 elsif ($is_closing_BLOCK) { 9049 9050 # If there is a pending one-line block .. 9051 if ( $index_start_one_line_block != UNDEFINED_INDEX ) { 9052 9053 # we have to terminate it if.. 9054 if ( 9055 9056 # it is too long (final length may be different from 9057 # initial estimate). note: must allow 1 space for this token 9058 excess_line_length( $index_start_one_line_block, 9059 $max_index_to_go ) >= 0 9060 9061 # or if it has too many semicolons 9062 || ( $semicolons_before_block_self_destruct == 0 9063 && $last_nonblank_type ne ';' ) 9064 ) 9065 { 9066 destroy_one_line_block(); 9067 } 9068 } 9069 9070 # put a break before this closing curly brace if appropriate 9071 unless ( $no_internal_newlines 9072 || $index_start_one_line_block != UNDEFINED_INDEX ) 9073 { 9074 9075 # add missing semicolon if ... 9076 # there are some tokens 9077 if ( 9078 ( $max_index_to_go > 0 ) 9079 9080 # and we don't have one 9081 && ( $last_nonblank_type ne ';' ) 9082 9083 # patch until some block type issues are fixed: 9084 # Do not add semi-colon for block types '{', 9085 # '}', and ';' because we cannot be sure yet 9086 # that this is a block and not an anonomyous 9087 # hash (blktype.t, blktype1.t) 9088 && ( $block_type !~ /^[\{\};]$/ ) 9089 9090 # patch: and do not add semi-colons for recently 9091 # added block types (see tmp/semicolon.t) 9092 && ( $block_type !~ 9093 /^(switch|case|given|when|default)$/ ) 9094 9095 # it seems best not to add semicolons in these 9096 # special block types: sort|map|grep 9097 && ( !$is_sort_map_grep{$block_type} ) 9098 9099 # and we are allowed to do so. 9100 && $rOpts->{'add-semicolons'} 9101 ) 9102 { 9103 9104 save_current_token(); 9105 $token = ';'; 9106 $type = ';'; 9107 $level = $levels_to_go[$max_index_to_go]; 9108 $slevel = $nesting_depth_to_go[$max_index_to_go]; 9109 $nesting_blocks = 9110 $nesting_blocks_to_go[$max_index_to_go]; 9111 $ci_level = $ci_levels_to_go[$max_index_to_go]; 9112 $block_type = ""; 9113 $container_type = ""; 9114 $container_environment = ""; 9115 $type_sequence = ""; 9116 9117 # Note - we remove any blank AFTER extracting its 9118 # parameters such as level, etc, above 9119 if ( $types_to_go[$max_index_to_go] eq 'b' ) { 9120 unstore_token_to_go(); 9121 } 9122 store_token_to_go(); 9123 9124 note_added_semicolon(); 9125 restore_current_token(); 9126 } 9127 9128 # then write out everything before this closing curly brace 9129 output_line_to_go(); 9130 9131 } 9132 9133 # Now update for side comment 9134 if ($side_comment_follows) { $no_internal_newlines = 1 } 9135 9136 # store the closing curly brace 9137 store_token_to_go(); 9138 9139 # ok, we just stored a closing curly brace. Often, but 9140 # not always, we want to end the line immediately. 9141 # So now we have to check for special cases. 9142 9143 # if this '}' successfully ends a one-line block.. 9144 my $is_one_line_block = 0; 9145 my $keep_going = 0; 9146 if ( $index_start_one_line_block != UNDEFINED_INDEX ) { 9147 9148 # Remember the type of token just before the 9149 # opening brace. It would be more general to use 9150 # a stack, but this will work for one-line blocks. 9151 $is_one_line_block = 9152 $types_to_go[$index_start_one_line_block]; 9153 9154 # we have to actually make it by removing tentative 9155 # breaks that were set within it 9156 undo_forced_breakpoint_stack(0); 9157 set_nobreaks( $index_start_one_line_block, 9158 $max_index_to_go - 1 ); 9159 9160 # then re-initialize for the next one-line block 9161 destroy_one_line_block(); 9162 9163 # then decide if we want to break after the '}' .. 9164 # We will keep going to allow certain brace followers as in: 9165 # do { $ifclosed = 1; last } unless $losing; 9166 # 9167 # But make a line break if the curly ends a 9168 # significant block: 9169 if ( 9170 $is_block_without_semicolon{$block_type} 9171 9172 # if needless semicolon follows we handle it later 9173 && $next_nonblank_token ne ';' 9174 ) 9175 { 9176 output_line_to_go() unless ($no_internal_newlines); 9177 } 9178 } 9179 9180 # set string indicating what we need to look for brace follower 9181 # tokens 9182 if ( $block_type eq 'do' ) { 9183 $rbrace_follower = \%is_do_follower; 9184 } 9185 elsif ( $block_type =~ /^(if|elsif|unless)$/ ) { 9186 $rbrace_follower = \%is_if_brace_follower; 9187 } 9188 elsif ( $block_type eq 'else' ) { 9189 $rbrace_follower = \%is_else_brace_follower; 9190 } 9191 9192 # added eval for borris.t 9193 elsif ($is_sort_map_grep_eval{$block_type} 9194 || $is_one_line_block eq 'G' ) 9195 { 9196 $rbrace_follower = undef; 9197 $keep_going = 1; 9198 } 9199 9200 # anonymous sub 9201 elsif ( $block_type =~ /^sub\W*$/ ) { 9202 9203 if ($is_one_line_block) { 9204 $rbrace_follower = \%is_anon_sub_1_brace_follower; 9205 } 9206 else { 9207 $rbrace_follower = \%is_anon_sub_brace_follower; 9208 } 9209 } 9210 9211 # None of the above: specify what can follow a closing 9212 # brace of a block which is not an 9213 # if/elsif/else/do/sort/map/grep/eval 9214 # Testfiles: 9215 # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t 9216 else { 9217 $rbrace_follower = \%is_other_brace_follower; 9218 } 9219 9220 # See if an elsif block is followed by another elsif or else; 9221 # complain if not. 9222 if ( $block_type eq 'elsif' ) { 9223 9224 if ( $next_nonblank_token_type eq 'b' ) { # end of line? 9225 $looking_for_else = 1; # ok, check on next line 9226 } 9227 else { 9228 9229 unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) { 9230 write_logfile_entry("No else block :(\n"); 9231 } 9232 } 9233 } 9234 9235 # keep going after certain block types (map,sort,grep,eval) 9236 # added eval for borris.t 9237 if ($keep_going) { 9238 9239 # keep going 9240 } 9241 9242 # if no more tokens, postpone decision until re-entring 9243 elsif ( ( $next_nonblank_token_type eq 'b' ) 9244 && $rOpts_add_newlines ) 9245 { 9246 unless ($rbrace_follower) { 9247 output_line_to_go() unless ($no_internal_newlines); 9248 } 9249 } 9250 9251 elsif ($rbrace_follower) { 9252 9253 unless ( $rbrace_follower->{$next_nonblank_token} ) { 9254 output_line_to_go() unless ($no_internal_newlines); 9255 } 9256 $rbrace_follower = undef; 9257 } 9258 9259 else { 9260 output_line_to_go() unless ($no_internal_newlines); 9261 } 9262 9263 } # end treatment of closing block token 9264 9265 # handle semicolon 9266 elsif ( $type eq ';' ) { 9267 9268 # kill one-line blocks with too many semicolons 9269 $semicolons_before_block_self_destruct--; 9270 if ( 9271 ( $semicolons_before_block_self_destruct < 0 ) 9272 || ( $semicolons_before_block_self_destruct == 0 9273 && $next_nonblank_token_type !~ /^[b\}]$/ ) 9274 ) 9275 { 9276 destroy_one_line_block(); 9277 } 9278 9279 # Remove unnecessary semicolons, but not after bare 9280 # blocks, where it could be unsafe if the brace is 9281 # mistokenized. 9282 if ( 9283 ( 9284 $last_nonblank_token eq '}' 9285 && ( 9286 $is_block_without_semicolon{ 9287 $last_nonblank_block_type} 9288 || $last_nonblank_block_type =~ /^sub\s+\w/ 9289 || $last_nonblank_block_type =~ /^\w+:$/ ) 9290 ) 9291 || $last_nonblank_type eq ';' 9292 ) 9293 { 9294 9295 if ( 9296 $rOpts->{'delete-semicolons'} 9297 9298 # don't delete ; before a # because it would promote it 9299 # to a block comment 9300 && ( $next_nonblank_token_type ne '#' ) 9301 ) 9302 { 9303 note_deleted_semicolon(); 9304 output_line_to_go() 9305 unless ( $no_internal_newlines 9306 || $index_start_one_line_block != UNDEFINED_INDEX ); 9307 next; 9308 } 9309 else { 9310 write_logfile_entry("Extra ';'\n"); 9311 } 9312 } 9313 store_token_to_go(); 9314 9315 output_line_to_go() 9316 unless ( $no_internal_newlines 9317 || ( $rOpts_keep_interior_semicolons && $j < $jmax ) 9318 || ( $next_nonblank_token eq '}' ) ); 9319 9320 } 9321 9322 # handle here_doc target string 9323 elsif ( $type eq 'h' ) { 9324 $no_internal_newlines = 9325 1; # no newlines after seeing here-target 9326 destroy_one_line_block(); 9327 store_token_to_go(); 9328 } 9329 9330 # handle all other token types 9331 else { 9332 9333 # if this is a blank... 9334 if ( $type eq 'b' ) { 9335 9336 # make it just one character 9337 $token = ' ' if $rOpts_add_whitespace; 9338 9339 # delete it if unwanted by whitespace rules 9340 # or we are deleting all whitespace 9341 my $ws = $$rwhite_space_flag[ $j + 1 ]; 9342 if ( ( defined($ws) && $ws == -1 ) 9343 || $rOpts_delete_old_whitespace ) 9344 { 9345 9346 # unless it might make a syntax error 9347 next 9348 unless is_essential_whitespace( 9349 $last_last_nonblank_token, 9350 $last_last_nonblank_type, 9351 $tokens_to_go[$max_index_to_go], 9352 $types_to_go[$max_index_to_go], 9353 $$rtokens[ $j + 1 ], 9354 $$rtoken_type[ $j + 1 ] 9355 ); 9356 } 9357 } 9358 store_token_to_go(); 9359 } 9360 9361 # remember two previous nonblank OUTPUT tokens 9362 if ( $type ne '#' && $type ne 'b' ) { 9363 $last_last_nonblank_token = $last_nonblank_token; 9364 $last_last_nonblank_type = $last_nonblank_type; 9365 $last_nonblank_token = $token; 9366 $last_nonblank_type = $type; 9367 $last_nonblank_block_type = $block_type; 9368 } 9369 9370 # unset the continued-quote flag since it only applies to the 9371 # first token, and we want to resume normal formatting if 9372 # there are additional tokens on the line 9373 $in_continued_quote = 0; 9374 9375 } # end of loop over all tokens in this 'line_of_tokens' 9376 9377 # we have to flush .. 9378 if ( 9379 9380 # if there is a side comment 9381 ( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} ) 9382 9383 # if this line ends in a quote 9384 # NOTE: This is critically important for insuring that quoted lines 9385 # do not get processed by things like -sot and -sct 9386 || $in_quote 9387 9388 # if this is a VERSION statement 9389 || $is_VERSION_statement 9390 9391 # to keep a label on one line if that is how it is now 9392 || ( ( $type eq 'J' ) && ( $max_index_to_go == 0 ) ) 9393 9394 # if we are instructed to keep all old line breaks 9395 || !$rOpts->{'delete-old-newlines'} 9396 ) 9397 { 9398 destroy_one_line_block(); 9399 output_line_to_go(); 9400 } 9401 9402 # mark old line breakpoints in current output stream 9403 if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_breakpoints ) { 9404 $old_breakpoint_to_go[$max_index_to_go] = 1; 9405 } 9406 } # end sub print_line_of_tokens 9407} # end print_line_of_tokens 9408 9409# sub output_line_to_go sends one logical line of tokens on down the 9410# pipeline to the VerticalAligner package, breaking the line into continuation 9411# lines as necessary. The line of tokens is ready to go in the "to_go" 9412# arrays. 9413sub output_line_to_go { 9414 9415 # debug stuff; this routine can be called from many points 9416 FORMATTER_DEBUG_FLAG_OUTPUT && do { 9417 my ( $a, $b, $c ) = caller; 9418 write_diagnostics( 9419"OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n" 9420 ); 9421 my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ]; 9422 write_diagnostics("$output_str\n"); 9423 }; 9424 9425 # just set a tentative breakpoint if we might be in a one-line block 9426 if ( $index_start_one_line_block != UNDEFINED_INDEX ) { 9427 set_forced_breakpoint($max_index_to_go); 9428 return; 9429 } 9430 9431 my $cscw_block_comment; 9432 $cscw_block_comment = add_closing_side_comment() 9433 if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 ); 9434 9435 match_opening_and_closing_tokens(); 9436 9437 # tell the -lp option we are outputting a batch so it can close 9438 # any unfinished items in its stack 9439 finish_lp_batch(); 9440 9441 # If this line ends in a code block brace, set breaks at any 9442 # previous closing code block braces to breakup a chain of code 9443 # blocks on one line. This is very rare but can happen for 9444 # user-defined subs. For example we might be looking at this: 9445 # BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR { 9446 my $saw_good_break = 0; # flag to force breaks even if short line 9447 if ( 9448 9449 # looking for opening or closing block brace 9450 $block_type_to_go[$max_index_to_go] 9451 9452 # but not one of these which are never duplicated on a line: 9453 # until|while|for|if|elsif|else 9454 && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go] } 9455 ) 9456 { 9457 my $lev = $nesting_depth_to_go[$max_index_to_go]; 9458 9459 # Walk backwards from the end and 9460 # set break at any closing block braces at the same level. 9461 # But quit if we are not in a chain of blocks. 9462 for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) { 9463 last if ( $levels_to_go[$i] < $lev ); # stop at a lower level 9464 next if ( $levels_to_go[$i] > $lev ); # skip past higher level 9465 9466 if ( $block_type_to_go[$i] ) { 9467 if ( $tokens_to_go[$i] eq '}' ) { 9468 set_forced_breakpoint($i); 9469 $saw_good_break = 1; 9470 } 9471 } 9472 9473 # quit if we see anything besides words, function, blanks 9474 # at this level 9475 elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last } 9476 } 9477 } 9478 9479 my $imin = 0; 9480 my $imax = $max_index_to_go; 9481 9482 # trim any blank tokens 9483 if ( $max_index_to_go >= 0 ) { 9484 if ( $types_to_go[$imin] eq 'b' ) { $imin++ } 9485 if ( $types_to_go[$imax] eq 'b' ) { $imax-- } 9486 } 9487 9488 # anything left to write? 9489 if ( $imin <= $imax ) { 9490 9491 # add a blank line before certain key types 9492 if ( $last_line_leading_type !~ /^[#b]/ ) { 9493 my $want_blank = 0; 9494 my $leading_token = $tokens_to_go[$imin]; 9495 my $leading_type = $types_to_go[$imin]; 9496 9497 # blank lines before subs except declarations and one-liners 9498 # MCONVERSION LOCATION - for sub tokenization change 9499 if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) { 9500 $want_blank = ( $rOpts->{'blanks-before-subs'} ) 9501 && ( 9502 terminal_type( \@types_to_go, \@block_type_to_go, $imin, 9503 $imax ) !~ /^[\;\}]$/ 9504 ); 9505 } 9506 9507 # break before all package declarations 9508 # MCONVERSION LOCATION - for tokenizaton change 9509 elsif ($leading_token =~ /^(package\s)/ 9510 && $leading_type eq 'i' ) 9511 { 9512 $want_blank = ( $rOpts->{'blanks-before-subs'} ); 9513 } 9514 9515 # break before certain key blocks except one-liners 9516 if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) { 9517 $want_blank = ( $rOpts->{'blanks-before-subs'} ) 9518 && ( 9519 terminal_type( \@types_to_go, \@block_type_to_go, $imin, 9520 $imax ) ne '}' 9521 ); 9522 } 9523 9524 # Break before certain block types if we haven't had a 9525 # break at this level for a while. This is the 9526 # difficult decision.. 9527 elsif ($leading_token =~ /^(unless|if|while|until|for|foreach)$/ 9528 && $leading_type eq 'k' ) 9529 { 9530 my $lc = $nonblank_lines_at_depth[$last_line_leading_level]; 9531 if ( !defined($lc) ) { $lc = 0 } 9532 9533 $want_blank = 9534 $rOpts->{'blanks-before-blocks'} 9535 && $lc >= $rOpts->{'long-block-line-count'} 9536 && $file_writer_object->get_consecutive_nonblank_lines() >= 9537 $rOpts->{'long-block-line-count'} 9538 && ( 9539 terminal_type( \@types_to_go, \@block_type_to_go, $imin, 9540 $imax ) ne '}' 9541 ); 9542 } 9543 9544 if ($want_blank) { 9545 9546 # future: send blank line down normal path to VerticalAligner 9547 Perl::Tidy::VerticalAligner::flush(); 9548 $file_writer_object->write_blank_code_line(); 9549 } 9550 } 9551 9552 # update blank line variables and count number of consecutive 9553 # non-blank, non-comment lines at this level 9554 $last_last_line_leading_level = $last_line_leading_level; 9555 $last_line_leading_level = $levels_to_go[$imin]; 9556 if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 } 9557 $last_line_leading_type = $types_to_go[$imin]; 9558 if ( $last_line_leading_level == $last_last_line_leading_level 9559 && $last_line_leading_type ne 'b' 9560 && $last_line_leading_type ne '#' 9561 && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) ) 9562 { 9563 $nonblank_lines_at_depth[$last_line_leading_level]++; 9564 } 9565 else { 9566 $nonblank_lines_at_depth[$last_line_leading_level] = 1; 9567 } 9568 9569 FORMATTER_DEBUG_FLAG_FLUSH && do { 9570 my ( $package, $file, $line ) = caller; 9571 print 9572"FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n"; 9573 }; 9574 9575 # add a couple of extra terminal blank tokens 9576 pad_array_to_go(); 9577 9578 # set all forced breakpoints for good list formatting 9579 my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0; 9580 9581 if ( 9582 $max_index_to_go > 0 9583 && ( 9584 $is_long_line 9585 || $old_line_count_in_batch > 1 9586 || is_unbalanced_batch() 9587 || ( 9588 $comma_count_in_batch 9589 && ( $rOpts_maximum_fields_per_table > 0 9590 || $rOpts_comma_arrow_breakpoints == 0 ) 9591 ) 9592 ) 9593 ) 9594 { 9595 $saw_good_break ||= scan_list(); 9596 } 9597 9598 # let $ri_first and $ri_last be references to lists of 9599 # first and last tokens of line fragments to output.. 9600 my ( $ri_first, $ri_last ); 9601 9602 # write a single line if.. 9603 if ( 9604 9605 # we aren't allowed to add any newlines 9606 !$rOpts_add_newlines 9607 9608 # or, we don't already have an interior breakpoint 9609 # and we didn't see a good breakpoint 9610 || ( 9611 !$forced_breakpoint_count 9612 && !$saw_good_break 9613 9614 # and this line is 'short' 9615 && !$is_long_line 9616 ) 9617 ) 9618 { 9619 @$ri_first = ($imin); 9620 @$ri_last = ($imax); 9621 } 9622 9623 # otherwise use multiple lines 9624 else { 9625 9626 ( $ri_first, $ri_last, my $colon_count ) = 9627 set_continuation_breaks($saw_good_break); 9628 9629 break_all_chain_tokens( $ri_first, $ri_last ); 9630 9631 break_equals( $ri_first, $ri_last ); 9632 9633 # now we do a correction step to clean this up a bit 9634 # (The only time we would not do this is for debugging) 9635 if ( $rOpts->{'recombine'} ) { 9636 ( $ri_first, $ri_last ) = 9637 recombine_breakpoints( $ri_first, $ri_last ); 9638 } 9639 9640 insert_final_breaks( $ri_first, $ri_last ) if $colon_count; 9641 } 9642 9643 # do corrector step if -lp option is used 9644 my $do_not_pad = 0; 9645 if ($rOpts_line_up_parentheses) { 9646 $do_not_pad = correct_lp_indentation( $ri_first, $ri_last ); 9647 } 9648 send_lines_to_vertical_aligner( $ri_first, $ri_last, $do_not_pad ); 9649 } 9650 prepare_for_new_input_lines(); 9651 9652 # output any new -cscw block comment 9653 if ($cscw_block_comment) { 9654 flush(); 9655 $file_writer_object->write_code_line( $cscw_block_comment . "\n" ); 9656 } 9657} 9658 9659sub note_added_semicolon { 9660 $last_added_semicolon_at = $input_line_number; 9661 if ( $added_semicolon_count == 0 ) { 9662 $first_added_semicolon_at = $last_added_semicolon_at; 9663 } 9664 $added_semicolon_count++; 9665 write_logfile_entry("Added ';' here\n"); 9666} 9667 9668sub note_deleted_semicolon { 9669 $last_deleted_semicolon_at = $input_line_number; 9670 if ( $deleted_semicolon_count == 0 ) { 9671 $first_deleted_semicolon_at = $last_deleted_semicolon_at; 9672 } 9673 $deleted_semicolon_count++; 9674 write_logfile_entry("Deleted unnecessary ';'\n"); # i hope ;) 9675} 9676 9677sub note_embedded_tab { 9678 $embedded_tab_count++; 9679 $last_embedded_tab_at = $input_line_number; 9680 if ( !$first_embedded_tab_at ) { 9681 $first_embedded_tab_at = $last_embedded_tab_at; 9682 } 9683 9684 if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) { 9685 write_logfile_entry("Embedded tabs in quote or pattern\n"); 9686 } 9687} 9688 9689sub starting_one_line_block { 9690 9691 # after seeing an opening curly brace, look for the closing brace 9692 # and see if the entire block will fit on a line. This routine is 9693 # not always right because it uses the old whitespace, so a check 9694 # is made later (at the closing brace) to make sure we really 9695 # have a one-line block. We have to do this preliminary check, 9696 # though, because otherwise we would always break at a semicolon 9697 # within a one-line block if the block contains multiple statements. 9698 9699 my ( $j, $jmax, $level, $slevel, $ci_level, $rtokens, $rtoken_type, 9700 $rblock_type ) 9701 = @_; 9702 9703 # kill any current block - we can only go 1 deep 9704 destroy_one_line_block(); 9705 9706 # return value: 9707 # 1=distance from start of block to opening brace exceeds line length 9708 # 0=otherwise 9709 9710 my $i_start = 0; 9711 9712 # shouldn't happen: there must have been a prior call to 9713 # store_token_to_go to put the opening brace in the output stream 9714 if ( $max_index_to_go < 0 ) { 9715 warning("program bug: store_token_to_go called incorrectly\n"); 9716 report_definite_bug(); 9717 } 9718 else { 9719 9720 # cannot use one-line blocks with cuddled else else/elsif lines 9721 if ( ( $tokens_to_go[0] eq '}' ) && $rOpts_cuddled_else ) { 9722 return 0; 9723 } 9724 } 9725 9726 my $block_type = $$rblock_type[$j]; 9727 9728 # find the starting keyword for this block (such as 'if', 'else', ...) 9729 9730 if ( $block_type =~ /^[\{\}\;\:]$/ ) { 9731 $i_start = $max_index_to_go; 9732 } 9733 9734 elsif ( $last_last_nonblank_token_to_go eq ')' ) { 9735 9736 # For something like "if (xxx) {", the keyword "if" will be 9737 # just after the most recent break. This will be 0 unless 9738 # we have just killed a one-line block and are starting another. 9739 # (doif.t) 9740 $i_start = $index_max_forced_break + 1; 9741 if ( $types_to_go[$i_start] eq 'b' ) { 9742 $i_start++; 9743 } 9744 9745 unless ( $tokens_to_go[$i_start] eq $block_type ) { 9746 return 0; 9747 } 9748 } 9749 9750 # the previous nonblank token should start these block types 9751 elsif ( 9752 ( $last_last_nonblank_token_to_go eq $block_type ) 9753 || ( $block_type =~ /^sub/ 9754 && $last_last_nonblank_token_to_go =~ /^sub/ ) 9755 ) 9756 { 9757 $i_start = $last_last_nonblank_index_to_go; 9758 } 9759 9760 # patch for SWITCH/CASE to retain one-line case/when blocks 9761 elsif ( $block_type eq 'case' || $block_type eq 'when' ) { 9762 $i_start = $index_max_forced_break + 1; 9763 if ( $types_to_go[$i_start] eq 'b' ) { 9764 $i_start++; 9765 } 9766 unless ( $tokens_to_go[$i_start] eq $block_type ) { 9767 return 0; 9768 } 9769 } 9770 9771 else { 9772 return 1; 9773 } 9774 9775 my $pos = total_line_length( $i_start, $max_index_to_go ) - 1; 9776 9777 my $i; 9778 9779 # see if length is too long to even start 9780 if ( $pos > $rOpts_maximum_line_length ) { 9781 return 1; 9782 } 9783 9784 for ( $i = $j + 1 ; $i <= $jmax ; $i++ ) { 9785 9786 # old whitespace could be arbitrarily large, so don't use it 9787 if ( $$rtoken_type[$i] eq 'b' ) { $pos += 1 } 9788 else { $pos += length( $$rtokens[$i] ) } 9789 9790 # Return false result if we exceed the maximum line length, 9791 if ( $pos > $rOpts_maximum_line_length ) { 9792 return 0; 9793 } 9794 9795 # or encounter another opening brace before finding the closing brace. 9796 elsif ($$rtokens[$i] eq '{' 9797 && $$rtoken_type[$i] eq '{' 9798 && $$rblock_type[$i] ) 9799 { 9800 return 0; 9801 } 9802 9803 # if we find our closing brace.. 9804 elsif ($$rtokens[$i] eq '}' 9805 && $$rtoken_type[$i] eq '}' 9806 && $$rblock_type[$i] ) 9807 { 9808 9809 # be sure any trailing comment also fits on the line 9810 my $i_nonblank = 9811 ( $$rtoken_type[ $i + 1 ] eq 'b' ) ? $i + 2 : $i + 1; 9812 9813 if ( $$rtoken_type[$i_nonblank] eq '#' ) { 9814 $pos += length( $$rtokens[$i_nonblank] ); 9815 9816 if ( $i_nonblank > $i + 1 ) { 9817 $pos += length( $$rtokens[ $i + 1 ] ); 9818 } 9819 9820 if ( $pos > $rOpts_maximum_line_length ) { 9821 return 0; 9822 } 9823 } 9824 9825 # ok, it's a one-line block 9826 create_one_line_block( $i_start, 20 ); 9827 return 0; 9828 } 9829 9830 # just keep going for other characters 9831 else { 9832 } 9833 } 9834 9835 # Allow certain types of new one-line blocks to form by joining 9836 # input lines. These can be safely done, but for other block types, 9837 # we keep old one-line blocks but do not form new ones. It is not 9838 # always a good idea to make as many one-line blocks as possible, 9839 # so other types are not done. The user can always use -mangle. 9840 if ( $is_sort_map_grep_eval{$block_type} ) { 9841 create_one_line_block( $i_start, 1 ); 9842 } 9843 9844 return 0; 9845} 9846 9847sub unstore_token_to_go { 9848 9849 # remove most recent token from output stream 9850 if ( $max_index_to_go > 0 ) { 9851 $max_index_to_go--; 9852 } 9853 else { 9854 $max_index_to_go = UNDEFINED_INDEX; 9855 } 9856 9857} 9858 9859sub want_blank_line { 9860 flush(); 9861 $file_writer_object->want_blank_line(); 9862} 9863 9864sub write_unindented_line { 9865 flush(); 9866 $file_writer_object->write_line( $_[0] ); 9867} 9868 9869sub undo_ci { 9870 9871 # Undo continuation indentation in certain sequences 9872 # For example, we can undo continuation indation in sort/map/grep chains 9873 # my $dat1 = pack( "n*", 9874 # map { $_, $lookup->{$_} } 9875 # sort { $a <=> $b } 9876 # grep { $lookup->{$_} ne $default } keys %$lookup ); 9877 # To align the map/sort/grep keywords like this: 9878 # my $dat1 = pack( "n*", 9879 # map { $_, $lookup->{$_} } 9880 # sort { $a <=> $b } 9881 # grep { $lookup->{$_} ne $default } keys %$lookup ); 9882 my ( $ri_first, $ri_last ) = @_; 9883 my ( $line_1, $line_2, $lev_last ); 9884 my $this_line_is_semicolon_terminated; 9885 my $max_line = @$ri_first - 1; 9886 9887 # looking at each line of this batch.. 9888 # We are looking at leading tokens and looking for a sequence 9889 # all at the same level and higher level than enclosing lines. 9890 foreach my $line ( 0 .. $max_line ) { 9891 9892 my $ibeg = $$ri_first[$line]; 9893 my $lev = $levels_to_go[$ibeg]; 9894 if ( $line > 0 ) { 9895 9896 # if we have started a chain.. 9897 if ($line_1) { 9898 9899 # see if it continues.. 9900 if ( $lev == $lev_last ) { 9901 if ( $types_to_go[$ibeg] eq 'k' 9902 && $is_sort_map_grep{ $tokens_to_go[$ibeg] } ) 9903 { 9904 9905 # chain continues... 9906 # check for chain ending at end of a a statement 9907 if ( $line == $max_line ) { 9908 9909 # see of this line ends a statement 9910 my $iend = $$ri_last[$line]; 9911 $this_line_is_semicolon_terminated = 9912 $types_to_go[$iend] eq ';' 9913 9914 # with possible side comment 9915 || ( $types_to_go[$iend] eq '#' 9916 && $iend - $ibeg >= 2 9917 && $types_to_go[ $iend - 2 ] eq ';' 9918 && $types_to_go[ $iend - 1 ] eq 'b' ); 9919 } 9920 $line_2 = $line if ($this_line_is_semicolon_terminated); 9921 } 9922 else { 9923 9924 # kill chain 9925 $line_1 = undef; 9926 } 9927 } 9928 elsif ( $lev < $lev_last ) { 9929 9930 # chain ends with previous line 9931 $line_2 = $line - 1; 9932 } 9933 elsif ( $lev > $lev_last ) { 9934 9935 # kill chain 9936 $line_1 = undef; 9937 } 9938 9939 # undo the continuation indentation if a chain ends 9940 if ( defined($line_2) && defined($line_1) ) { 9941 my $continuation_line_count = $line_2 - $line_1 + 1; 9942 @ci_levels_to_go[ @$ri_first[ $line_1 .. $line_2 ] ] = 9943 (0) x ($continuation_line_count); 9944 @leading_spaces_to_go[ @$ri_first[ $line_1 .. $line_2 ] ] = 9945 @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $line_2 ] ]; 9946 $line_1 = undef; 9947 } 9948 } 9949 9950 # not in a chain yet.. 9951 else { 9952 9953 # look for start of a new sort/map/grep chain 9954 if ( $lev > $lev_last ) { 9955 if ( $types_to_go[$ibeg] eq 'k' 9956 && $is_sort_map_grep{ $tokens_to_go[$ibeg] } ) 9957 { 9958 $line_1 = $line; 9959 } 9960 } 9961 } 9962 } 9963 $lev_last = $lev; 9964 } 9965} 9966 9967sub undo_lp_ci { 9968 9969 # If there is a single, long parameter within parens, like this: 9970 # 9971 # $self->command( "/msg " 9972 # . $infoline->chan 9973 # . " You said $1, but did you know that it's square was " 9974 # . $1 * $1 . " ?" ); 9975 # 9976 # we can remove the continuation indentation of the 2nd and higher lines 9977 # to achieve this effect, which is more pleasing: 9978 # 9979 # $self->command("/msg " 9980 # . $infoline->chan 9981 # . " You said $1, but did you know that it's square was " 9982 # . $1 * $1 . " ?"); 9983 9984 my ( $line_open, $i_start, $closing_index, $ri_first, $ri_last ) = @_; 9985 my $max_line = @$ri_first - 1; 9986 9987 # must be multiple lines 9988 return unless $max_line > $line_open; 9989 9990 my $lev_start = $levels_to_go[$i_start]; 9991 my $ci_start_plus = 1 + $ci_levels_to_go[$i_start]; 9992 9993 # see if all additional lines in this container have continuation 9994 # indentation 9995 my $n; 9996 my $line_1 = 1 + $line_open; 9997 for ( $n = $line_1 ; $n <= $max_line ; ++$n ) { 9998 my $ibeg = $$ri_first[$n]; 9999 my $iend = $$ri_last[$n]; 10000 if ( $ibeg eq $closing_index ) { $n--; last } 10001 return if ( $lev_start != $levels_to_go[$ibeg] ); 10002 return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] ); 10003 last if ( $closing_index <= $iend ); 10004 } 10005 10006 # we can reduce the indentation of all continuation lines 10007 my $continuation_line_count = $n - $line_open; 10008 @ci_levels_to_go[ @$ri_first[ $line_1 .. $n ] ] = 10009 (0) x ($continuation_line_count); 10010 @leading_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ] = 10011 @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ]; 10012} 10013 10014sub set_logical_padding { 10015 10016 # Look at a batch of lines and see if extra padding can improve the 10017 # alignment when there are certain leading operators. Here is an 10018 # example, in which some extra space is introduced before 10019 # '( $year' to make it line up with the subsequent lines: 10020 # 10021 # if ( ( $Year < 1601 ) 10022 # || ( $Year > 2899 ) 10023 # || ( $EndYear < 1601 ) 10024 # || ( $EndYear > 2899 ) ) 10025 # { 10026 # &Error_OutOfRange; 10027 # } 10028 # 10029 my ( $ri_first, $ri_last ) = @_; 10030 my $max_line = @$ri_first - 1; 10031 10032 my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $line, $pad_spaces, 10033 $tok_next, $type_next, $has_leading_op_next, $has_leading_op ); 10034 10035 # looking at each line of this batch.. 10036 foreach $line ( 0 .. $max_line - 1 ) { 10037 10038 # see if the next line begins with a logical operator 10039 $ibeg = $$ri_first[$line]; 10040 $iend = $$ri_last[$line]; 10041 $ibeg_next = $$ri_first[ $line + 1 ]; 10042 $tok_next = $tokens_to_go[$ibeg_next]; 10043 $type_next = $types_to_go[$ibeg_next]; 10044 10045 $has_leading_op_next = ( $tok_next =~ /^\w/ ) 10046 ? $is_chain_operator{$tok_next} # + - * / : ? && || 10047 : $is_chain_operator{$type_next}; # and, or 10048 10049 next unless ($has_leading_op_next); 10050 10051 # next line must not be at lesser depth 10052 next 10053 if ( $nesting_depth_to_go[$ibeg] > $nesting_depth_to_go[$ibeg_next] ); 10054 10055 # identify the token in this line to be padded on the left 10056 $ipad = undef; 10057 10058 # handle lines at same depth... 10059 if ( $nesting_depth_to_go[$ibeg] == $nesting_depth_to_go[$ibeg_next] ) { 10060 10061 # if this is not first line of the batch ... 10062 if ( $line > 0 ) { 10063 10064 # and we have leading operator.. 10065 next if $has_leading_op; 10066 10067 # Introduce padding if.. 10068 # 1. the previous line is at lesser depth, or 10069 # 2. the previous line ends in an assignment 10070 # 3. the previous line ends in a 'return' 10071 # 4. the previous line ends in a comma 10072 # Example 1: previous line at lesser depth 10073 # if ( ( $Year < 1601 ) # <- we are here but 10074 # || ( $Year > 2899 ) # list has not yet 10075 # || ( $EndYear < 1601 ) # collapsed vertically 10076 # || ( $EndYear > 2899 ) ) 10077 # { 10078 # 10079 # Example 2: previous line ending in assignment: 10080 # $leapyear = 10081 # $year % 4 ? 0 # <- We are here 10082 # : $year % 100 ? 1 10083 # : $year % 400 ? 0 10084 # : 1; 10085 # 10086 # Example 3: previous line ending in comma: 10087 # push @expr, 10088 # /test/ ? undef 10089 # : eval($_) ? 1 10090 # : eval($_) ? 1 10091 # : 0; 10092 10093 # be sure levels agree (do not indent after an indented 'if') 10094 next if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] ); 10095 10096 # allow padding on first line after a comma but only if: 10097 # (1) this is line 2 and 10098 # (2) there are at more than three lines and 10099 # (3) lines 3 and 4 have the same leading operator 10100 # These rules try to prevent padding within a long 10101 # comma-separated list. 10102 my $ok_comma; 10103 if ( $types_to_go[$iendm] eq ',' 10104 && $line == 1 10105 && $max_line > 2 ) 10106 { 10107 my $ibeg_next_next = $$ri_first[ $line + 2 ]; 10108 my $tok_next_next = $tokens_to_go[$ibeg_next_next]; 10109 $ok_comma = $tok_next_next eq $tok_next; 10110 } 10111 10112 next 10113 unless ( 10114 $is_assignment{ $types_to_go[$iendm] } 10115 || $ok_comma 10116 || ( $nesting_depth_to_go[$ibegm] < 10117 $nesting_depth_to_go[$ibeg] ) 10118 || ( $types_to_go[$iendm] eq 'k' 10119 && $tokens_to_go[$iendm] eq 'return' ) 10120 ); 10121 10122 # we will add padding before the first token 10123 $ipad = $ibeg; 10124 } 10125 10126 # for first line of the batch.. 10127 else { 10128 10129 # WARNING: Never indent if first line is starting in a 10130 # continued quote, which would change the quote. 10131 next if $starting_in_quote; 10132 10133 # if this is text after closing '}' 10134 # then look for an interior token to pad 10135 if ( $types_to_go[$ibeg] eq '}' ) { 10136 10137 } 10138 10139 # otherwise, we might pad if it looks really good 10140 else { 10141 10142 # we might pad token $ibeg, so be sure that it 10143 # is at the same depth as the next line. 10144 next 10145 if ( $nesting_depth_to_go[$ibeg] != 10146 $nesting_depth_to_go[$ibeg_next] ); 10147 10148 # We can pad on line 1 of a statement if at least 3 10149 # lines will be aligned. Otherwise, it 10150 # can look very confusing. 10151 10152 # We have to be careful not to pad if there are too few 10153 # lines. The current rule is: 10154 # (1) in general we require at least 3 consecutive lines 10155 # with the same leading chain operator token, 10156 # (2) but an exception is that we only require two lines 10157 # with leading colons if there are no more lines. For example, 10158 # the first $i in the following snippet would get padding 10159 # by the second rule: 10160 # 10161 # $i == 1 ? ( "First", "Color" ) 10162 # : $i == 2 ? ( "Then", "Rarity" ) 10163 # : ( "Then", "Name" ); 10164 10165 if ( $max_line > 1 ) { 10166 my $leading_token = $tokens_to_go[$ibeg_next]; 10167 my $tokens_differ; 10168 10169 # never indent line 1 of a '.' series because 10170 # previous line is most likely at same level. 10171 # TODO: we should also look at the leasing_spaces 10172 # of the last output line and skip if it is same 10173 # as this line. 10174 next if ( $leading_token eq '.' ); 10175 10176 my $count = 1; 10177 foreach my $l ( 2 .. 3 ) { 10178 last if ( $line + $l > $max_line ); 10179 my $ibeg_next_next = $$ri_first[ $line + $l ]; 10180 if ( $tokens_to_go[$ibeg_next_next] ne 10181 $leading_token ) 10182 { 10183 $tokens_differ = 1; 10184 last; 10185 } 10186 $count++; 10187 } 10188 next if ($tokens_differ); 10189 next if ( $count < 3 && $leading_token ne ':' ); 10190 $ipad = $ibeg; 10191 } 10192 else { 10193 next; 10194 } 10195 } 10196 } 10197 } 10198 10199 # find interior token to pad if necessary 10200 if ( !defined($ipad) ) { 10201 10202 for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) { 10203 10204 # find any unclosed container 10205 next 10206 unless ( $type_sequence_to_go[$i] 10207 && $mate_index_to_go[$i] > $iend ); 10208 10209 # find next nonblank token to pad 10210 $ipad = $i + 1; 10211 if ( $types_to_go[$ipad] eq 'b' ) { 10212 $ipad++; 10213 last if ( $ipad > $iend ); 10214 } 10215 } 10216 last unless $ipad; 10217 } 10218 10219 # next line must not be at greater depth 10220 my $iend_next = $$ri_last[ $line + 1 ]; 10221 next 10222 if ( $nesting_depth_to_go[ $iend_next + 1 ] > 10223 $nesting_depth_to_go[$ipad] ); 10224 10225 # lines must be somewhat similar to be padded.. 10226 my $inext_next = $ibeg_next + 1; 10227 if ( $types_to_go[$inext_next] eq 'b' ) { 10228 $inext_next++; 10229 } 10230 my $type = $types_to_go[$ipad]; 10231 my $type_next = $types_to_go[ $ipad + 1 ]; 10232 10233 # see if there are multiple continuation lines 10234 my $logical_continuation_lines = 1; 10235 if ( $line + 2 <= $max_line ) { 10236 my $leading_token = $tokens_to_go[$ibeg_next]; 10237 my $ibeg_next_next = $$ri_first[ $line + 2 ]; 10238 if ( $tokens_to_go[$ibeg_next_next] eq $leading_token 10239 && $nesting_depth_to_go[$ibeg_next] eq 10240 $nesting_depth_to_go[$ibeg_next_next] ) 10241 { 10242 $logical_continuation_lines++; 10243 } 10244 } 10245 10246 # see if leading types match 10247 my $types_match = $types_to_go[$inext_next] eq $type; 10248 my $matches_without_bang; 10249 10250 # if first line has leading ! then compare the following token 10251 if ( !$types_match && $type eq '!' ) { 10252 $types_match = $matches_without_bang = 10253 $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ]; 10254 } 10255 10256 if ( 10257 10258 # either we have multiple continuation lines to follow 10259 # and we are not padding the first token 10260 ( $logical_continuation_lines > 1 && $ipad > 0 ) 10261 10262 # or.. 10263 || ( 10264 10265 # types must match 10266 $types_match 10267 10268 # and keywords must match if keyword 10269 && !( 10270 $type eq 'k' 10271 && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next] 10272 ) 10273 ) 10274 ) 10275 { 10276 10277 #----------------------begin special checks-------------- 10278 # 10279 # SPECIAL CHECK 1: 10280 # A check is needed before we can make the pad. 10281 # If we are in a list with some long items, we want each 10282 # item to stand out. So in the following example, the 10283 # first line begining with '$casefold->' would look good 10284 # padded to align with the next line, but then it 10285 # would be indented more than the last line, so we 10286 # won't do it. 10287 # 10288 # ok( 10289 # $casefold->{code} eq '0041' 10290 # && $casefold->{status} eq 'C' 10291 # && $casefold->{mapping} eq '0061', 10292 # 'casefold 0x41' 10293 # ); 10294 # 10295 # Note: 10296 # It would be faster, and almost as good, to use a comma 10297 # count, and not pad if comma_count > 1 and the previous 10298 # line did not end with a comma. 10299 # 10300 my $ok_to_pad = 1; 10301 10302 my $ibg = $$ri_first[ $line + 1 ]; 10303 my $depth = $nesting_depth_to_go[ $ibg + 1 ]; 10304 10305 # just use simplified formula for leading spaces to avoid 10306 # needless sub calls 10307 my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg]; 10308 10309 # look at each line beyond the next .. 10310 my $l = $line + 1; 10311 foreach $l ( $line + 2 .. $max_line ) { 10312 my $ibg = $$ri_first[$l]; 10313 10314 # quit looking at the end of this container 10315 last 10316 if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth ) 10317 || ( $nesting_depth_to_go[$ibg] < $depth ); 10318 10319 # cannot do the pad if a later line would be 10320 # outdented more 10321 if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) { 10322 $ok_to_pad = 0; 10323 last; 10324 } 10325 } 10326 10327 # don't pad if we end in a broken list 10328 if ( $l == $max_line ) { 10329 my $i2 = $$ri_last[$l]; 10330 if ( $types_to_go[$i2] eq '#' ) { 10331 my $i1 = $$ri_first[$l]; 10332 next 10333 if ( 10334 terminal_type( \@types_to_go, \@block_type_to_go, $i1, 10335 $i2 ) eq ',' 10336 ); 10337 } 10338 } 10339 10340 # SPECIAL CHECK 2: 10341 # a minus may introduce a quoted variable, and we will 10342 # add the pad only if this line begins with a bare word, 10343 # such as for the word 'Button' here: 10344 # [ 10345 # Button => "Print letter \"~$_\"", 10346 # -command => [ sub { print "$_[0]\n" }, $_ ], 10347 # -accelerator => "Meta+$_" 10348 # ]; 10349 # 10350 # On the other hand, if 'Button' is quoted, it looks best 10351 # not to pad: 10352 # [ 10353 # 'Button' => "Print letter \"~$_\"", 10354 # -command => [ sub { print "$_[0]\n" }, $_ ], 10355 # -accelerator => "Meta+$_" 10356 # ]; 10357 if ( $types_to_go[$ibeg_next] eq 'm' ) { 10358 $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q'; 10359 } 10360 10361 next unless $ok_to_pad; 10362 10363 #----------------------end special check--------------- 10364 10365 my $length_1 = total_line_length( $ibeg, $ipad - 1 ); 10366 my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 ); 10367 $pad_spaces = $length_2 - $length_1; 10368 10369 # If the first line has a leading ! and the second does 10370 # not, then remove one space to try to align the next 10371 # leading characters, which are often the same. For example: 10372 # if ( !$ts 10373 # || $ts == $self->Holder 10374 # || $self->Holder->Type eq "Arena" ) 10375 # 10376 # This usually helps readability, but if there are subsequent 10377 # ! operators things will still get messed up. For example: 10378 # 10379 # if ( !exists $Net::DNS::typesbyname{$qtype} 10380 # && exists $Net::DNS::classesbyname{$qtype} 10381 # && !exists $Net::DNS::classesbyname{$qclass} 10382 # && exists $Net::DNS::typesbyname{$qclass} ) 10383 # We can't fix that. 10384 if ($matches_without_bang) { $pad_spaces-- } 10385 10386 # make sure this won't change if -lp is used 10387 my $indentation_1 = $leading_spaces_to_go[$ibeg]; 10388 if ( ref($indentation_1) ) { 10389 if ( $indentation_1->get_RECOVERABLE_SPACES() == 0 ) { 10390 my $indentation_2 = $leading_spaces_to_go[$ibeg_next]; 10391 unless ( $indentation_2->get_RECOVERABLE_SPACES() == 0 ) { 10392 $pad_spaces = 0; 10393 } 10394 } 10395 } 10396 10397 # we might be able to handle a pad of -1 by removing a blank 10398 # token 10399 if ( $pad_spaces < 0 ) { 10400 10401 if ( $pad_spaces == -1 ) { 10402 if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' ) { 10403 $tokens_to_go[ $ipad - 1 ] = ''; 10404 } 10405 } 10406 $pad_spaces = 0; 10407 } 10408 10409 # now apply any padding for alignment 10410 if ( $ipad >= 0 && $pad_spaces ) { 10411 10412 my $length_t = total_line_length( $ibeg, $iend ); 10413 if ( $pad_spaces + $length_t <= $rOpts_maximum_line_length ) { 10414 $tokens_to_go[$ipad] = 10415 ' ' x $pad_spaces . $tokens_to_go[$ipad]; 10416 } 10417 } 10418 } 10419 } 10420 continue { 10421 $iendm = $iend; 10422 $ibegm = $ibeg; 10423 $has_leading_op = $has_leading_op_next; 10424 } # end of loop over lines 10425 return; 10426} 10427 10428sub correct_lp_indentation { 10429 10430 # When the -lp option is used, we need to make a last pass through 10431 # each line to correct the indentation positions in case they differ 10432 # from the predictions. This is necessary because perltidy uses a 10433 # predictor/corrector method for aligning with opening parens. The 10434 # predictor is usually good, but sometimes stumbles. The corrector 10435 # tries to patch things up once the actual opening paren locations 10436 # are known. 10437 my ( $ri_first, $ri_last ) = @_; 10438 my $do_not_pad = 0; 10439 10440 # Note on flag '$do_not_pad': 10441 # We want to avoid a situation like this, where the aligner inserts 10442 # whitespace before the '=' to align it with a previous '=', because 10443 # otherwise the parens might become mis-aligned in a situation like 10444 # this, where the '=' has become aligned with the previous line, 10445 # pushing the opening '(' forward beyond where we want it. 10446 # 10447 # $mkFloor::currentRoom = ''; 10448 # $mkFloor::c_entry = $c->Entry( 10449 # -width => '10', 10450 # -relief => 'sunken', 10451 # ... 10452 # ); 10453 # 10454 # We leave it to the aligner to decide how to do this. 10455 10456 # first remove continuation indentation if appropriate 10457 my $max_line = @$ri_first - 1; 10458 10459 # looking at each line of this batch.. 10460 my ( $ibeg, $iend ); 10461 my $line; 10462 foreach $line ( 0 .. $max_line ) { 10463 $ibeg = $$ri_first[$line]; 10464 $iend = $$ri_last[$line]; 10465 10466 # looking at each token in this output line.. 10467 my $i; 10468 foreach $i ( $ibeg .. $iend ) { 10469 10470 # How many space characters to place before this token 10471 # for special alignment. Actual padding is done in the 10472 # continue block. 10473 10474 # looking for next unvisited indentation item 10475 my $indentation = $leading_spaces_to_go[$i]; 10476 if ( !$indentation->get_MARKED() ) { 10477 $indentation->set_MARKED(1); 10478 10479 # looking for indentation item for which we are aligning 10480 # with parens, braces, and brackets 10481 next unless ( $indentation->get_ALIGN_PAREN() ); 10482 10483 # skip closed container on this line 10484 if ( $i > $ibeg ) { 10485 my $im = $i - 1; 10486 if ( $types_to_go[$im] eq 'b' && $im > $ibeg ) { $im-- } 10487 if ( $type_sequence_to_go[$im] 10488 && $mate_index_to_go[$im] <= $iend ) 10489 { 10490 next; 10491 } 10492 } 10493 10494 if ( $line == 1 && $i == $ibeg ) { 10495 $do_not_pad = 1; 10496 } 10497 10498 # Ok, let's see what the error is and try to fix it 10499 my $actual_pos; 10500 my $predicted_pos = $indentation->get_SPACES(); 10501 if ( $i > $ibeg ) { 10502 10503 # token is mid-line - use length to previous token 10504 $actual_pos = total_line_length( $ibeg, $i - 1 ); 10505 10506 # for mid-line token, we must check to see if all 10507 # additional lines have continuation indentation, 10508 # and remove it if so. Otherwise, we do not get 10509 # good alignment. 10510 my $closing_index = $indentation->get_CLOSED(); 10511 if ( $closing_index > $iend ) { 10512 my $ibeg_next = $$ri_first[ $line + 1 ]; 10513 if ( $ci_levels_to_go[$ibeg_next] > 0 ) { 10514 undo_lp_ci( $line, $i, $closing_index, $ri_first, 10515 $ri_last ); 10516 } 10517 } 10518 } 10519 elsif ( $line > 0 ) { 10520 10521 # handle case where token starts a new line; 10522 # use length of previous line 10523 my $ibegm = $$ri_first[ $line - 1 ]; 10524 my $iendm = $$ri_last[ $line - 1 ]; 10525 $actual_pos = total_line_length( $ibegm, $iendm ); 10526 10527 # follow -pt style 10528 ++$actual_pos 10529 if ( $types_to_go[ $iendm + 1 ] eq 'b' ); 10530 } 10531 else { 10532 10533 # token is first character of first line of batch 10534 $actual_pos = $predicted_pos; 10535 } 10536 10537 my $move_right = $actual_pos - $predicted_pos; 10538 10539 # done if no error to correct (gnu2.t) 10540 if ( $move_right == 0 ) { 10541 $indentation->set_RECOVERABLE_SPACES($move_right); 10542 next; 10543 } 10544 10545 # if we have not seen closure for this indentation in 10546 # this batch, we can only pass on a request to the 10547 # vertical aligner 10548 my $closing_index = $indentation->get_CLOSED(); 10549 10550 if ( $closing_index < 0 ) { 10551 $indentation->set_RECOVERABLE_SPACES($move_right); 10552 next; 10553 } 10554 10555 # If necessary, look ahead to see if there is really any 10556 # leading whitespace dependent on this whitespace, and 10557 # also find the longest line using this whitespace. 10558 # Since it is always safe to move left if there are no 10559 # dependents, we only need to do this if we may have 10560 # dependent nodes or need to move right. 10561 10562 my $right_margin = 0; 10563 my $have_child = $indentation->get_HAVE_CHILD(); 10564 10565 my %saw_indentation; 10566 my $line_count = 1; 10567 $saw_indentation{$indentation} = $indentation; 10568 10569 if ( $have_child || $move_right > 0 ) { 10570 $have_child = 0; 10571 my $max_length = 0; 10572 if ( $i == $ibeg ) { 10573 $max_length = total_line_length( $ibeg, $iend ); 10574 } 10575 10576 # look ahead at the rest of the lines of this batch.. 10577 my $line_t; 10578 foreach $line_t ( $line + 1 .. $max_line ) { 10579 my $ibeg_t = $$ri_first[$line_t]; 10580 my $iend_t = $$ri_last[$line_t]; 10581 last if ( $closing_index <= $ibeg_t ); 10582 10583 # remember all different indentation objects 10584 my $indentation_t = $leading_spaces_to_go[$ibeg_t]; 10585 $saw_indentation{$indentation_t} = $indentation_t; 10586 $line_count++; 10587 10588 # remember longest line in the group 10589 my $length_t = total_line_length( $ibeg_t, $iend_t ); 10590 if ( $length_t > $max_length ) { 10591 $max_length = $length_t; 10592 } 10593 } 10594 $right_margin = $rOpts_maximum_line_length - $max_length; 10595 if ( $right_margin < 0 ) { $right_margin = 0 } 10596 } 10597 10598 my $first_line_comma_count = 10599 grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ]; 10600 my $comma_count = $indentation->get_COMMA_COUNT(); 10601 my $arrow_count = $indentation->get_ARROW_COUNT(); 10602 10603 # This is a simple approximate test for vertical alignment: 10604 # if we broke just after an opening paren, brace, bracket, 10605 # and there are 2 or more commas in the first line, 10606 # and there are no '=>'s, 10607 # then we are probably vertically aligned. We could set 10608 # an exact flag in sub scan_list, but this is good 10609 # enough. 10610 my $indentation_count = keys %saw_indentation; 10611 my $is_vertically_aligned = 10612 ( $i == $ibeg 10613 && $first_line_comma_count > 1 10614 && $indentation_count == 1 10615 && ( $arrow_count == 0 || $arrow_count == $line_count ) ); 10616 10617 # Make the move if possible .. 10618 if ( 10619 10620 # we can always move left 10621 $move_right < 0 10622 10623 # but we should only move right if we are sure it will 10624 # not spoil vertical alignment 10625 || ( $comma_count == 0 ) 10626 || ( $comma_count > 0 && !$is_vertically_aligned ) 10627 ) 10628 { 10629 my $move = 10630 ( $move_right <= $right_margin ) 10631 ? $move_right 10632 : $right_margin; 10633 10634 foreach ( keys %saw_indentation ) { 10635 $saw_indentation{$_} 10636 ->permanently_decrease_AVAILABLE_SPACES( -$move ); 10637 } 10638 } 10639 10640 # Otherwise, record what we want and the vertical aligner 10641 # will try to recover it. 10642 else { 10643 $indentation->set_RECOVERABLE_SPACES($move_right); 10644 } 10645 } 10646 } 10647 } 10648 return $do_not_pad; 10649} 10650 10651# flush is called to output any tokens in the pipeline, so that 10652# an alternate source of lines can be written in the correct order 10653 10654sub flush { 10655 destroy_one_line_block(); 10656 output_line_to_go(); 10657 Perl::Tidy::VerticalAligner::flush(); 10658} 10659 10660sub reset_block_text_accumulator { 10661 10662 # save text after 'if' and 'elsif' to append after 'else' 10663 if ($accumulating_text_for_block) { 10664 10665 if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) { 10666 push @{$rleading_block_if_elsif_text}, $leading_block_text; 10667 } 10668 } 10669 $accumulating_text_for_block = ""; 10670 $leading_block_text = ""; 10671 $leading_block_text_level = 0; 10672 $leading_block_text_length_exceeded = 0; 10673 $leading_block_text_line_number = 0; 10674 $leading_block_text_line_length = 0; 10675} 10676 10677sub set_block_text_accumulator { 10678 my $i = shift; 10679 $accumulating_text_for_block = $tokens_to_go[$i]; 10680 if ( $accumulating_text_for_block !~ /^els/ ) { 10681 $rleading_block_if_elsif_text = []; 10682 } 10683 $leading_block_text = ""; 10684 $leading_block_text_level = $levels_to_go[$i]; 10685 $leading_block_text_line_number = 10686 $vertical_aligner_object->get_output_line_number(); 10687 $leading_block_text_length_exceeded = 0; 10688 10689 # this will contain the column number of the last character 10690 # of the closing side comment 10691 $leading_block_text_line_length = 10692 length($accumulating_text_for_block) + 10693 length( $rOpts->{'closing-side-comment-prefix'} ) + 10694 $leading_block_text_level * $rOpts_indent_columns + 3; 10695} 10696 10697sub accumulate_block_text { 10698 my $i = shift; 10699 10700 # accumulate leading text for -csc, ignoring any side comments 10701 if ( $accumulating_text_for_block 10702 && !$leading_block_text_length_exceeded 10703 && $types_to_go[$i] ne '#' ) 10704 { 10705 10706 my $added_length = length( $tokens_to_go[$i] ); 10707 $added_length += 1 if $i == 0; 10708 my $new_line_length = $leading_block_text_line_length + $added_length; 10709 10710 # we can add this text if we don't exceed some limits.. 10711 if ( 10712 10713 # we must not have already exceeded the text length limit 10714 length($leading_block_text) < 10715 $rOpts_closing_side_comment_maximum_text 10716 10717 # and either: 10718 # the new total line length must be below the line length limit 10719 # or the new length must be below the text length limit 10720 # (ie, we may allow one token to exceed the text length limit) 10721 && ( $new_line_length < $rOpts_maximum_line_length 10722 || length($leading_block_text) + $added_length < 10723 $rOpts_closing_side_comment_maximum_text ) 10724 10725 # UNLESS: we are adding a closing paren before the brace we seek. 10726 # This is an attempt to avoid situations where the ... to be 10727 # added are longer than the omitted right paren, as in: 10728 10729 # foreach my $item (@a_rather_long_variable_name_here) { 10730 # &whatever; 10731 # } ## end foreach my $item (@a_rather_long_variable_name_here... 10732 10733 || ( 10734 $tokens_to_go[$i] eq ')' 10735 && ( 10736 ( 10737 $i + 1 <= $max_index_to_go 10738 && $block_type_to_go[ $i + 1 ] eq 10739 $accumulating_text_for_block 10740 ) 10741 || ( $i + 2 <= $max_index_to_go 10742 && $block_type_to_go[ $i + 2 ] eq 10743 $accumulating_text_for_block ) 10744 ) 10745 ) 10746 ) 10747 { 10748 10749 # add an extra space at each newline 10750 if ( $i == 0 ) { $leading_block_text .= ' ' } 10751 10752 # add the token text 10753 $leading_block_text .= $tokens_to_go[$i]; 10754 $leading_block_text_line_length = $new_line_length; 10755 } 10756 10757 # show that text was truncated if necessary 10758 elsif ( $types_to_go[$i] ne 'b' ) { 10759 $leading_block_text_length_exceeded = 1; 10760 $leading_block_text .= '...'; 10761 } 10762 } 10763} 10764 10765{ 10766 my %is_if_elsif_else_unless_while_until_for_foreach; 10767 10768 BEGIN { 10769 10770 # These block types may have text between the keyword and opening 10771 # curly. Note: 'else' does not, but must be included to allow trailing 10772 # if/elsif text to be appended. 10773 # patch for SWITCH/CASE: added 'case' and 'when' 10774 @_ = qw(if elsif else unless while until for foreach case when); 10775 @is_if_elsif_else_unless_while_until_for_foreach{@_} = (1) x scalar(@_); 10776 } 10777 10778 sub accumulate_csc_text { 10779 10780 # called once per output buffer when -csc is used. Accumulates 10781 # the text placed after certain closing block braces. 10782 # Defines and returns the following for this buffer: 10783 10784 my $block_leading_text = ""; # the leading text of the last '}' 10785 my $rblock_leading_if_elsif_text; 10786 my $i_block_leading_text = 10787 -1; # index of token owning block_leading_text 10788 my $block_line_count = 100; # how many lines the block spans 10789 my $terminal_type = 'b'; # type of last nonblank token 10790 my $i_terminal = 0; # index of last nonblank token 10791 my $terminal_block_type = ""; 10792 10793 for my $i ( 0 .. $max_index_to_go ) { 10794 my $type = $types_to_go[$i]; 10795 my $block_type = $block_type_to_go[$i]; 10796 my $token = $tokens_to_go[$i]; 10797 10798 # remember last nonblank token type 10799 if ( $type ne '#' && $type ne 'b' ) { 10800 $terminal_type = $type; 10801 $terminal_block_type = $block_type; 10802 $i_terminal = $i; 10803 } 10804 10805 my $type_sequence = $type_sequence_to_go[$i]; 10806 if ( $block_type && $type_sequence ) { 10807 10808 if ( $token eq '}' ) { 10809 10810 # restore any leading text saved when we entered this block 10811 if ( defined( $block_leading_text{$type_sequence} ) ) { 10812 ( $block_leading_text, $rblock_leading_if_elsif_text ) = 10813 @{ $block_leading_text{$type_sequence} }; 10814 $i_block_leading_text = $i; 10815 delete $block_leading_text{$type_sequence}; 10816 $rleading_block_if_elsif_text = 10817 $rblock_leading_if_elsif_text; 10818 } 10819 10820 # if we run into a '}' then we probably started accumulating 10821 # at something like a trailing 'if' clause..no harm done. 10822 if ( $accumulating_text_for_block 10823 && $levels_to_go[$i] <= $leading_block_text_level ) 10824 { 10825 my $lev = $levels_to_go[$i]; 10826 reset_block_text_accumulator(); 10827 } 10828 10829 if ( defined( $block_opening_line_number{$type_sequence} ) ) 10830 { 10831 my $output_line_number = 10832 $vertical_aligner_object->get_output_line_number(); 10833 $block_line_count = 10834 $output_line_number - 10835 $block_opening_line_number{$type_sequence} + 1; 10836 delete $block_opening_line_number{$type_sequence}; 10837 } 10838 else { 10839 10840 # Error: block opening line undefined for this line.. 10841 # This shouldn't be possible, but it is not a 10842 # significant problem. 10843 } 10844 } 10845 10846 elsif ( $token eq '{' ) { 10847 10848 my $line_number = 10849 $vertical_aligner_object->get_output_line_number(); 10850 $block_opening_line_number{$type_sequence} = $line_number; 10851 10852 if ( $accumulating_text_for_block 10853 && $levels_to_go[$i] == $leading_block_text_level ) 10854 { 10855 10856 if ( $accumulating_text_for_block eq $block_type ) { 10857 10858 # save any leading text before we enter this block 10859 $block_leading_text{$type_sequence} = [ 10860 $leading_block_text, 10861 $rleading_block_if_elsif_text 10862 ]; 10863 $block_opening_line_number{$type_sequence} = 10864 $leading_block_text_line_number; 10865 reset_block_text_accumulator(); 10866 } 10867 else { 10868 10869 # shouldn't happen, but not a serious error. 10870 # We were accumulating -csc text for block type 10871 # $accumulating_text_for_block and unexpectedly 10872 # encountered a '{' for block type $block_type. 10873 } 10874 } 10875 } 10876 } 10877 10878 if ( $type eq 'k' 10879 && $csc_new_statement_ok 10880 && $is_if_elsif_else_unless_while_until_for_foreach{$token} 10881 && $token =~ /$closing_side_comment_list_pattern/o ) 10882 { 10883 set_block_text_accumulator($i); 10884 } 10885 else { 10886 10887 # note: ignoring type 'q' because of tricks being played 10888 # with 'q' for hanging side comments 10889 if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) { 10890 $csc_new_statement_ok = 10891 ( $block_type || $type eq 'J' || $type eq ';' ); 10892 } 10893 if ( $type eq ';' 10894 && $accumulating_text_for_block 10895 && $levels_to_go[$i] == $leading_block_text_level ) 10896 { 10897 reset_block_text_accumulator(); 10898 } 10899 else { 10900 accumulate_block_text($i); 10901 } 10902 } 10903 } 10904 10905 # Treat an 'else' block specially by adding preceding 'if' and 10906 # 'elsif' text. Otherwise, the 'end else' is not helpful, 10907 # especially for cuddled-else formatting. 10908 if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) { 10909 $block_leading_text = 10910 make_else_csc_text( $i_terminal, $terminal_block_type, 10911 $block_leading_text, $rblock_leading_if_elsif_text ); 10912 } 10913 10914 return ( $terminal_type, $i_terminal, $i_block_leading_text, 10915 $block_leading_text, $block_line_count ); 10916 } 10917} 10918 10919sub make_else_csc_text { 10920 10921 # create additional -csc text for an 'else' and optionally 'elsif', 10922 # depending on the value of switch 10923 # $rOpts_closing_side_comment_else_flag: 10924 # 10925 # = 0 add 'if' text to trailing else 10926 # = 1 same as 0 plus: 10927 # add 'if' to 'elsif's if can fit in line length 10928 # add last 'elsif' to trailing else if can fit in one line 10929 # = 2 same as 1 but do not check if exceed line length 10930 # 10931 # $rif_elsif_text = a reference to a list of all previous closing 10932 # side comments created for this if block 10933 # 10934 my ( $i_terminal, $block_type, $block_leading_text, $rif_elsif_text ) = @_; 10935 my $csc_text = $block_leading_text; 10936 10937 if ( $block_type eq 'elsif' && $rOpts_closing_side_comment_else_flag == 0 ) 10938 { 10939 return $csc_text; 10940 } 10941 10942 my $count = @{$rif_elsif_text}; 10943 return $csc_text unless ($count); 10944 10945 my $if_text = '[ if' . $rif_elsif_text->[0]; 10946 10947 # always show the leading 'if' text on 'else' 10948 if ( $block_type eq 'else' ) { 10949 $csc_text .= $if_text; 10950 } 10951 10952 # see if that's all 10953 if ( $rOpts_closing_side_comment_else_flag == 0 ) { 10954 return $csc_text; 10955 } 10956 10957 my $last_elsif_text = ""; 10958 if ( $count > 1 ) { 10959 $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ]; 10960 if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; } 10961 } 10962 10963 # tentatively append one more item 10964 my $saved_text = $csc_text; 10965 if ( $block_type eq 'else' ) { 10966 $csc_text .= $last_elsif_text; 10967 } 10968 else { 10969 $csc_text .= ' ' . $if_text; 10970 } 10971 10972 # all done if no length checks requested 10973 if ( $rOpts_closing_side_comment_else_flag == 2 ) { 10974 return $csc_text; 10975 } 10976 10977 # undo it if line length exceeded 10978 my $length = 10979 length($csc_text) + 10980 length($block_type) + 10981 length( $rOpts->{'closing-side-comment-prefix'} ) + 10982 $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3; 10983 if ( $length > $rOpts_maximum_line_length ) { 10984 $csc_text = $saved_text; 10985 } 10986 return $csc_text; 10987} 10988 10989{ # sub balance_csc_text 10990 10991 my %matching_char; 10992 10993 BEGIN { 10994 %matching_char = ( 10995 '{' => '}', 10996 '(' => ')', 10997 '[' => ']', 10998 '}' => '{', 10999 ')' => '(', 11000 ']' => '[', 11001 ); 11002 } 11003 11004 sub balance_csc_text { 11005 11006 # Append characters to balance a closing side comment so that editors 11007 # such as vim can correctly jump through code. 11008 # Simple Example: 11009 # input = ## end foreach my $foo ( sort { $b ... 11010 # output = ## end foreach my $foo ( sort { $b ...}) 11011 11012 # NOTE: This routine does not currently filter out structures within 11013 # quoted text because the bounce algorithims in text editors do not 11014 # necessarily do this either (a version of vim was checked and 11015 # did not do this). 11016 11017 # Some complex examples which will cause trouble for some editors: 11018 # while ( $mask_string =~ /\{[^{]*?\}/g ) { 11019 # if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) { 11020 # if ( $1 eq '{' ) { 11021 # test file test1/braces.pl has many such examples. 11022 11023 my ($csc) = @_; 11024 11025 # loop to examine characters one-by-one, RIGHT to LEFT and 11026 # build a balancing ending, LEFT to RIGHT. 11027 for ( my $pos = length($csc) - 1 ; $pos >= 0 ; $pos-- ) { 11028 11029 my $char = substr( $csc, $pos, 1 ); 11030 11031 # ignore everything except structural characters 11032 next unless ( $matching_char{$char} ); 11033 11034 # pop most recently appended character 11035 my $top = chop($csc); 11036 11037 # push it back plus the mate to the newest character 11038 # unless they balance each other. 11039 $csc = $csc . $top . $matching_char{$char} unless $top eq $char; 11040 } 11041 11042 # return the balanced string 11043 return $csc; 11044 } 11045} 11046 11047sub add_closing_side_comment { 11048 11049 # add closing side comments after closing block braces if -csc used 11050 my $cscw_block_comment; 11051 11052 #--------------------------------------------------------------- 11053 # Step 1: loop through all tokens of this line to accumulate 11054 # the text needed to create the closing side comments. Also see 11055 # how the line ends. 11056 #--------------------------------------------------------------- 11057 11058 my ( $terminal_type, $i_terminal, $i_block_leading_text, 11059 $block_leading_text, $block_line_count ) 11060 = accumulate_csc_text(); 11061 11062 #--------------------------------------------------------------- 11063 # Step 2: make the closing side comment if this ends a block 11064 #--------------------------------------------------------------- 11065 my $have_side_comment = $i_terminal != $max_index_to_go; 11066 11067 # if this line might end in a block closure.. 11068 if ( 11069 $terminal_type eq '}' 11070 11071 # ..and either 11072 && ( 11073 11074 # the block is long enough 11075 ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} ) 11076 11077 # or there is an existing comment to check 11078 || ( $have_side_comment 11079 && $rOpts->{'closing-side-comment-warnings'} ) 11080 ) 11081 11082 # .. and if this is one of the types of interest 11083 && $block_type_to_go[$i_terminal] =~ 11084 /$closing_side_comment_list_pattern/o 11085 11086 # .. but not an anonymous sub 11087 # These are not normally of interest, and their closing braces are 11088 # often followed by commas or semicolons anyway. This also avoids 11089 # possible erratic output due to line numbering inconsistencies 11090 # in the cases where their closing braces terminate a line. 11091 && $block_type_to_go[$i_terminal] ne 'sub' 11092 11093 # ..and the corresponding opening brace must is not in this batch 11094 # (because we do not need to tag one-line blocks, although this 11095 # should also be caught with a positive -csci value) 11096 && $mate_index_to_go[$i_terminal] < 0 11097 11098 # ..and either 11099 && ( 11100 11101 # this is the last token (line doesnt have a side comment) 11102 !$have_side_comment 11103 11104 # or the old side comment is a closing side comment 11105 || $tokens_to_go[$max_index_to_go] =~ 11106 /$closing_side_comment_prefix_pattern/o 11107 ) 11108 ) 11109 { 11110 11111 # then make the closing side comment text 11112 my $token = 11113"$rOpts->{'closing-side-comment-prefix'} $block_type_to_go[$i_terminal]"; 11114 11115 # append any extra descriptive text collected above 11116 if ( $i_block_leading_text == $i_terminal ) { 11117 $token .= $block_leading_text; 11118 } 11119 11120 $token = balance_csc_text($token) 11121 if $rOpts->{'closing-side-comments-balanced'}; 11122 11123 $token =~ s/\s*$//; # trim any trailing whitespace 11124 11125 # handle case of existing closing side comment 11126 if ($have_side_comment) { 11127 11128 # warn if requested and tokens differ significantly 11129 if ( $rOpts->{'closing-side-comment-warnings'} ) { 11130 my $old_csc = $tokens_to_go[$max_index_to_go]; 11131 my $new_csc = $token; 11132 $new_csc =~ s/\s+//g; # trim all whitespace 11133 $old_csc =~ s/\s+//g; # trim all whitespace 11134 $new_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures 11135 $old_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures 11136 $new_csc =~ s/(\.\.\.)$//; # trim trailing '...' 11137 my $new_trailing_dots = $1; 11138 $old_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...' 11139 11140 # Patch to handle multiple closing side comments at 11141 # else and elsif's. These have become too complicated 11142 # to check, so if we see an indication of 11143 # '[ if' or '[ # elsif', then assume they were made 11144 # by perltidy. 11145 if ( $block_type_to_go[$i_terminal] eq 'else' ) { 11146 if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc } 11147 } 11148 elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) { 11149 if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc } 11150 } 11151 11152 # if old comment is contained in new comment, 11153 # only compare the common part. 11154 if ( length($new_csc) > length($old_csc) ) { 11155 $new_csc = substr( $new_csc, 0, length($old_csc) ); 11156 } 11157 11158 # if the new comment is shorter and has been limited, 11159 # only compare the common part. 11160 if ( length($new_csc) < length($old_csc) && $new_trailing_dots ) 11161 { 11162 $old_csc = substr( $old_csc, 0, length($new_csc) ); 11163 } 11164 11165 # any remaining difference? 11166 if ( $new_csc ne $old_csc ) { 11167 11168 # just leave the old comment if we are below the threshold 11169 # for creating side comments 11170 if ( $block_line_count < 11171 $rOpts->{'closing-side-comment-interval'} ) 11172 { 11173 $token = undef; 11174 } 11175 11176 # otherwise we'll make a note of it 11177 else { 11178 11179 warning( 11180"perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n" 11181 ); 11182 11183 # save the old side comment in a new trailing block comment 11184 my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ]; 11185 $year += 1900; 11186 $month += 1; 11187 $cscw_block_comment = 11188"## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]"; 11189 } 11190 } 11191 else { 11192 11193 # No differences.. we can safely delete old comment if we 11194 # are below the threshold 11195 if ( $block_line_count < 11196 $rOpts->{'closing-side-comment-interval'} ) 11197 { 11198 $token = undef; 11199 unstore_token_to_go() 11200 if ( $types_to_go[$max_index_to_go] eq '#' ); 11201 unstore_token_to_go() 11202 if ( $types_to_go[$max_index_to_go] eq 'b' ); 11203 } 11204 } 11205 } 11206 11207 # switch to the new csc (unless we deleted it!) 11208 $tokens_to_go[$max_index_to_go] = $token if $token; 11209 } 11210 11211 # handle case of NO existing closing side comment 11212 else { 11213 11214 # insert the new side comment into the output token stream 11215 my $type = '#'; 11216 my $block_type = ''; 11217 my $type_sequence = ''; 11218 my $container_environment = 11219 $container_environment_to_go[$max_index_to_go]; 11220 my $level = $levels_to_go[$max_index_to_go]; 11221 my $slevel = $nesting_depth_to_go[$max_index_to_go]; 11222 my $no_internal_newlines = 0; 11223 11224 my $nesting_blocks = $nesting_blocks_to_go[$max_index_to_go]; 11225 my $ci_level = $ci_levels_to_go[$max_index_to_go]; 11226 my $in_continued_quote = 0; 11227 11228 # first insert a blank token 11229 insert_new_token_to_go( ' ', 'b', $slevel, $no_internal_newlines ); 11230 11231 # then the side comment 11232 insert_new_token_to_go( $token, $type, $slevel, 11233 $no_internal_newlines ); 11234 } 11235 } 11236 return $cscw_block_comment; 11237} 11238 11239sub previous_nonblank_token { 11240 my ($i) = @_; 11241 my $name = ""; 11242 my $im = $i - 1; 11243 return "" if ( $im < 0 ); 11244 if ( $types_to_go[$im] eq 'b' ) { $im--; } 11245 return "" if ( $im < 0 ); 11246 $name = $tokens_to_go[$im]; 11247 11248 # prepend any sub name to an isolated -> to avoid unwanted alignments 11249 # [test case is test8/penco.pl] 11250 if ( $name eq '->' ) { 11251 $im--; 11252 if ( $im >= 0 && $types_to_go[$im] ne 'b' ) { 11253 $name = $tokens_to_go[$im] . $name; 11254 } 11255 } 11256 return $name; 11257} 11258 11259sub send_lines_to_vertical_aligner { 11260 11261 my ( $ri_first, $ri_last, $do_not_pad ) = @_; 11262 11263 my $rindentation_list = [0]; # ref to indentations for each line 11264 11265 # define the array @matching_token_to_go for the output tokens 11266 # which will be non-blank for each special token (such as =>) 11267 # for which alignment is required. 11268 set_vertical_alignment_markers( $ri_first, $ri_last ); 11269 11270 # flush if necessary to avoid unwanted alignment 11271 my $must_flush = 0; 11272 if ( @$ri_first > 1 ) { 11273 11274 # flush before a long if statement 11275 if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] =~ /^(if|unless)$/ ) { 11276 $must_flush = 1; 11277 } 11278 } 11279 if ($must_flush) { 11280 Perl::Tidy::VerticalAligner::flush(); 11281 } 11282 11283 undo_ci( $ri_first, $ri_last ); 11284 11285 set_logical_padding( $ri_first, $ri_last ); 11286 11287 # loop to prepare each line for shipment 11288 my $n_last_line = @$ri_first - 1; 11289 my $in_comma_list; 11290 for my $n ( 0 .. $n_last_line ) { 11291 my $ibeg = $$ri_first[$n]; 11292 my $iend = $$ri_last[$n]; 11293 11294 my ( $rtokens, $rfields, $rpatterns ) = 11295 make_alignment_patterns( $ibeg, $iend ); 11296 11297 my ( $indentation, $lev, $level_end, $terminal_type, 11298 $is_semicolon_terminated, $is_outdented_line ) 11299 = set_adjusted_indentation( $ibeg, $iend, $rfields, $rpatterns, 11300 $ri_first, $ri_last, $rindentation_list ); 11301 11302 # we will allow outdenting of long lines.. 11303 my $outdent_long_lines = ( 11304 11305 # which are long quotes, if allowed 11306 ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} ) 11307 11308 # which are long block comments, if allowed 11309 || ( 11310 $types_to_go[$ibeg] eq '#' 11311 && $rOpts->{'outdent-long-comments'} 11312 11313 # but not if this is a static block comment 11314 && !$is_static_block_comment 11315 ) 11316 ); 11317 11318 my $level_jump = 11319 $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg]; 11320 11321 my $rvertical_tightness_flags = 11322 set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend, 11323 $ri_first, $ri_last ); 11324 11325 # flush an outdented line to avoid any unwanted vertical alignment 11326 Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line); 11327 11328 my $is_terminal_ternary = 0; 11329 if ( $tokens_to_go[$ibeg] eq ':' 11330 || $n > 0 && $tokens_to_go[ $$ri_last[ $n - 1 ] ] eq ':' ) 11331 { 11332 if ( ( $terminal_type eq ';' && $level_end <= $lev ) 11333 || ( $level_end < $lev ) ) 11334 { 11335 $is_terminal_ternary = 1; 11336 } 11337 } 11338 11339 # send this new line down the pipe 11340 my $forced_breakpoint = $forced_breakpoint_to_go[$iend]; 11341 Perl::Tidy::VerticalAligner::append_line( 11342 $lev, 11343 $level_end, 11344 $indentation, 11345 $rfields, 11346 $rtokens, 11347 $rpatterns, 11348 $forced_breakpoint_to_go[$iend] || $in_comma_list, 11349 $outdent_long_lines, 11350 $is_terminal_ternary, 11351 $is_semicolon_terminated, 11352 $do_not_pad, 11353 $rvertical_tightness_flags, 11354 $level_jump, 11355 ); 11356 $in_comma_list = 11357 $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend]; 11358 11359 # flush an outdented line to avoid any unwanted vertical alignment 11360 Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line); 11361 11362 $do_not_pad = 0; 11363 11364 } # end of loop to output each line 11365 11366 # remember indentation of lines containing opening containers for 11367 # later use by sub set_adjusted_indentation 11368 save_opening_indentation( $ri_first, $ri_last, $rindentation_list ); 11369} 11370 11371{ # begin make_alignment_patterns 11372 11373 my %block_type_map; 11374 my %keyword_map; 11375 11376 BEGIN { 11377 11378 # map related block names into a common name to 11379 # allow alignment 11380 %block_type_map = ( 11381 'unless' => 'if', 11382 'else' => 'if', 11383 'elsif' => 'if', 11384 'when' => 'if', 11385 'default' => 'if', 11386 'case' => 'if', 11387 'sort' => 'map', 11388 'grep' => 'map', 11389 ); 11390 11391 # map certain keywords to the same 'if' class to align 11392 # long if/elsif sequences. [elsif.pl] 11393 %keyword_map = ( 11394 'unless' => 'if', 11395 'else' => 'if', 11396 'elsif' => 'if', 11397 'when' => 'given', 11398 'default' => 'given', 11399 'case' => 'switch', 11400 11401 # treat an 'undef' similar to numbers and quotes 11402 'undef' => 'Q', 11403 ); 11404 } 11405 11406 sub make_alignment_patterns { 11407 11408 # Here we do some important preliminary work for the 11409 # vertical aligner. We create three arrays for one 11410 # output line. These arrays contain strings that can 11411 # be tested by the vertical aligner to see if 11412 # consecutive lines can be aligned vertically. 11413 # 11414 # The three arrays are indexed on the vertical 11415 # alignment fields and are: 11416 # @tokens - a list of any vertical alignment tokens for this line. 11417 # These are tokens, such as '=' '&&' '#' etc which 11418 # we want to might align vertically. These are 11419 # decorated with various information such as 11420 # nesting depth to prevent unwanted vertical 11421 # alignment matches. 11422 # @fields - the actual text of the line between the vertical alignment 11423 # tokens. 11424 # @patterns - a modified list of token types, one for each alignment 11425 # field. These should normally each match before alignment is 11426 # allowed, even when the alignment tokens match. 11427 my ( $ibeg, $iend ) = @_; 11428 my @tokens = (); 11429 my @fields = (); 11430 my @patterns = (); 11431 my $i_start = $ibeg; 11432 my $i; 11433 11434 my $depth = 0; 11435 my @container_name = (""); 11436 my @multiple_comma_arrows = (undef); 11437 11438 my $j = 0; # field index 11439 11440 $patterns[0] = ""; 11441 for $i ( $ibeg .. $iend ) { 11442 11443 # Keep track of containers balanced on this line only. 11444 # These are used below to prevent unwanted cross-line alignments. 11445 # Unbalanced containers already avoid aligning across 11446 # container boundaries. 11447 if ( $tokens_to_go[$i] eq '(' ) { 11448 11449 # if container is balanced on this line... 11450 my $i_mate = $mate_index_to_go[$i]; 11451 if ( $i_mate > $i && $i_mate <= $iend ) { 11452 $depth++; 11453 my $seqno = $type_sequence_to_go[$i]; 11454 my $count = comma_arrow_count($seqno); 11455 $multiple_comma_arrows[$depth] = $count && $count > 1; 11456 11457 # Append the previous token name to make the container name 11458 # more unique. This name will also be given to any commas 11459 # within this container, and it helps avoid undesirable 11460 # alignments of different types of containers. 11461 my $name = previous_nonblank_token($i); 11462 $name =~ s/^->//; 11463 $container_name[$depth] = "+" . $name; 11464 11465 # Make the container name even more unique if necessary. 11466 # If we are not vertically aligning this opening paren, 11467 # append a character count to avoid bad alignment because 11468 # it usually looks bad to align commas within continers 11469 # for which the opening parens do not align. Here 11470 # is an example very BAD alignment of commas (because 11471 # the atan2 functions are not all aligned): 11472 # $XY = 11473 # $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) + 11474 # $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) - 11475 # $X * atan2( $X, 1 ) - 11476 # $Y * atan2( $Y, 1 ); 11477 # 11478 # On the other hand, it is usually okay to align commas if 11479 # opening parens align, such as: 11480 # glVertex3d( $cx + $s * $xs, $cy, $z ); 11481 # glVertex3d( $cx, $cy + $s * $ys, $z ); 11482 # glVertex3d( $cx - $s * $xs, $cy, $z ); 11483 # glVertex3d( $cx, $cy - $s * $ys, $z ); 11484 # 11485 # To distinguish between these situations, we will 11486 # append the length of the line from the previous matching 11487 # token, or beginning of line, to the function name. This 11488 # will allow the vertical aligner to reject undesirable 11489 # matches. 11490 11491 # if we are not aligning on this paren... 11492 if ( $matching_token_to_go[$i] eq '' ) { 11493 11494 # Sum length from previous alignment, or start of line. 11495 # Note that we have to sum token lengths here because 11496 # padding has been done and so array $lengths_to_go 11497 # is now wrong. 11498 my $len = 11499 length( 11500 join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) ); 11501 $len += leading_spaces_to_go($i_start) 11502 if ( $i_start == $ibeg ); 11503 11504 # tack length onto the container name to make unique 11505 $container_name[$depth] .= "-" . $len; 11506 } 11507 } 11508 } 11509 elsif ( $tokens_to_go[$i] eq ')' ) { 11510 $depth-- if $depth > 0; 11511 } 11512 11513 # if we find a new synchronization token, we are done with 11514 # a field 11515 if ( $i > $i_start && $matching_token_to_go[$i] ne '' ) { 11516 11517 my $tok = my $raw_tok = $matching_token_to_go[$i]; 11518 11519 # make separators in different nesting depths unique 11520 # by appending the nesting depth digit. 11521 if ( $raw_tok ne '#' ) { 11522 $tok .= "$nesting_depth_to_go[$i]"; 11523 } 11524 11525 # also decorate commas with any container name to avoid 11526 # unwanted cross-line alignments. 11527 if ( $raw_tok eq ',' || $raw_tok eq '=>' ) { 11528 if ( $container_name[$depth] ) { 11529 $tok .= $container_name[$depth]; 11530 } 11531 } 11532 11533 # Patch to avoid aligning leading and trailing if, unless. 11534 # Mark trailing if, unless statements with container names. 11535 # This makes them different from leading if, unless which 11536 # are not so marked at present. If we ever need to name 11537 # them too, we could use ci to distinguish them. 11538 # Example problem to avoid: 11539 # return ( 2, "DBERROR" ) 11540 # if ( $retval == 2 ); 11541 # if ( scalar @_ ) { 11542 # my ( $a, $b, $c, $d, $e, $f ) = @_; 11543 # } 11544 if ( $raw_tok eq '(' ) { 11545 my $ci = $ci_levels_to_go[$ibeg]; 11546 if ( $container_name[$depth] =~ /^\+(if|unless)/ 11547 && $ci ) 11548 { 11549 $tok .= $container_name[$depth]; 11550 } 11551 } 11552 11553 # Decorate block braces with block types to avoid 11554 # unwanted alignments such as the following: 11555 # foreach ( @{$routput_array} ) { $fh->print($_) } 11556 # eval { $fh->close() }; 11557 if ( $raw_tok eq '{' && $block_type_to_go[$i] ) { 11558 my $block_type = $block_type_to_go[$i]; 11559 11560 # map certain related block types to allow 11561 # else blocks to align 11562 $block_type = $block_type_map{$block_type} 11563 if ( defined( $block_type_map{$block_type} ) ); 11564 11565 # remove sub names to allow one-line sub braces to align 11566 # regardless of name 11567 if ( $block_type =~ /^sub / ) { $block_type = 'sub' } 11568 11569 # allow all control-type blocks to align 11570 if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' } 11571 11572 $tok .= $block_type; 11573 } 11574 11575 # concatenate the text of the consecutive tokens to form 11576 # the field 11577 push( @fields, 11578 join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) ); 11579 11580 # store the alignment token for this field 11581 push( @tokens, $tok ); 11582 11583 # get ready for the next batch 11584 $i_start = $i; 11585 $j++; 11586 $patterns[$j] = ""; 11587 } 11588 11589 # continue accumulating tokens 11590 # handle non-keywords.. 11591 if ( $types_to_go[$i] ne 'k' ) { 11592 my $type = $types_to_go[$i]; 11593 11594 # Mark most things before arrows as a quote to 11595 # get them to line up. Testfile: mixed.pl. 11596 if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) { 11597 my $next_type = $types_to_go[ $i + 1 ]; 11598 my $i_next_nonblank = 11599 ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 ); 11600 11601 if ( $types_to_go[$i_next_nonblank] eq '=>' ) { 11602 $type = 'Q'; 11603 11604 # Patch to ignore leading minus before words, 11605 # by changing pattern 'mQ' into just 'Q', 11606 # so that we can align things like this: 11607 # Button => "Print letter \"~$_\"", 11608 # -command => [ sub { print "$_[0]\n" }, $_ ], 11609 if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" } 11610 } 11611 } 11612 11613 # patch to make numbers and quotes align 11614 if ( $type eq 'n' ) { $type = 'Q' } 11615 11616 # patch to ignore any ! in patterns 11617 if ( $type eq '!' ) { $type = '' } 11618 11619 $patterns[$j] .= $type; 11620 } 11621 11622 # for keywords we have to use the actual text 11623 else { 11624 11625 my $tok = $tokens_to_go[$i]; 11626 11627 # but map certain keywords to a common string to allow 11628 # alignment. 11629 $tok = $keyword_map{$tok} 11630 if ( defined( $keyword_map{$tok} ) ); 11631 $patterns[$j] .= $tok; 11632 } 11633 } 11634 11635 # done with this line .. join text of tokens to make the last field 11636 push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) ); 11637 return ( \@tokens, \@fields, \@patterns ); 11638 } 11639 11640} # end make_alignment_patterns 11641 11642{ # begin unmatched_indexes 11643 11644 # closure to keep track of unbalanced containers. 11645 # arrays shared by the routines in this block: 11646 my @unmatched_opening_indexes_in_this_batch; 11647 my @unmatched_closing_indexes_in_this_batch; 11648 my %comma_arrow_count; 11649 11650 sub is_unbalanced_batch { 11651 @unmatched_opening_indexes_in_this_batch + 11652 @unmatched_closing_indexes_in_this_batch; 11653 } 11654 11655 sub comma_arrow_count { 11656 my $seqno = $_[0]; 11657 return $comma_arrow_count{$seqno}; 11658 } 11659 11660 sub match_opening_and_closing_tokens { 11661 11662 # Match up indexes of opening and closing braces, etc, in this batch. 11663 # This has to be done after all tokens are stored because unstoring 11664 # of tokens would otherwise cause trouble. 11665 11666 @unmatched_opening_indexes_in_this_batch = (); 11667 @unmatched_closing_indexes_in_this_batch = (); 11668 %comma_arrow_count = (); 11669 11670 my ( $i, $i_mate, $token ); 11671 foreach $i ( 0 .. $max_index_to_go ) { 11672 if ( $type_sequence_to_go[$i] ) { 11673 $token = $tokens_to_go[$i]; 11674 if ( $token =~ /^[\(\[\{\?]$/ ) { 11675 push @unmatched_opening_indexes_in_this_batch, $i; 11676 } 11677 elsif ( $token =~ /^[\)\]\}\:]$/ ) { 11678 11679 $i_mate = pop @unmatched_opening_indexes_in_this_batch; 11680 if ( defined($i_mate) && $i_mate >= 0 ) { 11681 if ( $type_sequence_to_go[$i_mate] == 11682 $type_sequence_to_go[$i] ) 11683 { 11684 $mate_index_to_go[$i] = $i_mate; 11685 $mate_index_to_go[$i_mate] = $i; 11686 } 11687 else { 11688 push @unmatched_opening_indexes_in_this_batch, 11689 $i_mate; 11690 push @unmatched_closing_indexes_in_this_batch, $i; 11691 } 11692 } 11693 else { 11694 push @unmatched_closing_indexes_in_this_batch, $i; 11695 } 11696 } 11697 } 11698 elsif ( $tokens_to_go[$i] eq '=>' ) { 11699 if (@unmatched_opening_indexes_in_this_batch) { 11700 my $j = $unmatched_opening_indexes_in_this_batch[-1]; 11701 my $seqno = $type_sequence_to_go[$j]; 11702 $comma_arrow_count{$seqno}++; 11703 } 11704 } 11705 } 11706 } 11707 11708 sub save_opening_indentation { 11709 11710 # This should be called after each batch of tokens is output. It 11711 # saves indentations of lines of all unmatched opening tokens. 11712 # These will be used by sub get_opening_indentation. 11713 11714 my ( $ri_first, $ri_last, $rindentation_list ) = @_; 11715 11716 # we no longer need indentations of any saved indentations which 11717 # are unmatched closing tokens in this batch, because we will 11718 # never encounter them again. So we can delete them to keep 11719 # the hash size down. 11720 foreach (@unmatched_closing_indexes_in_this_batch) { 11721 my $seqno = $type_sequence_to_go[$_]; 11722 delete $saved_opening_indentation{$seqno}; 11723 } 11724 11725 # we need to save indentations of any unmatched opening tokens 11726 # in this batch because we may need them in a subsequent batch. 11727 foreach (@unmatched_opening_indexes_in_this_batch) { 11728 my $seqno = $type_sequence_to_go[$_]; 11729 $saved_opening_indentation{$seqno} = [ 11730 lookup_opening_indentation( 11731 $_, $ri_first, $ri_last, $rindentation_list 11732 ) 11733 ]; 11734 } 11735 } 11736} # end unmatched_indexes 11737 11738sub get_opening_indentation { 11739 11740 # get the indentation of the line which output the opening token 11741 # corresponding to a given closing token in the current output batch. 11742 # 11743 # given: 11744 # $i_closing - index in this line of a closing token ')' '}' or ']' 11745 # 11746 # $ri_first - reference to list of the first index $i for each output 11747 # line in this batch 11748 # $ri_last - reference to list of the last index $i for each output line 11749 # in this batch 11750 # $rindentation_list - reference to a list containing the indentation 11751 # used for each line. 11752 # 11753 # return: 11754 # -the indentation of the line which contained the opening token 11755 # which matches the token at index $i_opening 11756 # -and its offset (number of columns) from the start of the line 11757 # 11758 my ( $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_; 11759 11760 # first, see if the opening token is in the current batch 11761 my $i_opening = $mate_index_to_go[$i_closing]; 11762 my ( $indent, $offset, $is_leading, $exists ); 11763 $exists = 1; 11764 if ( $i_opening >= 0 ) { 11765 11766 # it is..look up the indentation 11767 ( $indent, $offset, $is_leading ) = 11768 lookup_opening_indentation( $i_opening, $ri_first, $ri_last, 11769 $rindentation_list ); 11770 } 11771 11772 # if not, it should have been stored in the hash by a previous batch 11773 else { 11774 my $seqno = $type_sequence_to_go[$i_closing]; 11775 if ($seqno) { 11776 if ( $saved_opening_indentation{$seqno} ) { 11777 ( $indent, $offset, $is_leading ) = 11778 @{ $saved_opening_indentation{$seqno} }; 11779 } 11780 11781 # some kind of serious error 11782 # (example is badfile.t) 11783 else { 11784 $indent = 0; 11785 $offset = 0; 11786 $is_leading = 0; 11787 $exists = 0; 11788 } 11789 } 11790 11791 # if no sequence number it must be an unbalanced container 11792 else { 11793 $indent = 0; 11794 $offset = 0; 11795 $is_leading = 0; 11796 $exists = 0; 11797 } 11798 } 11799 return ( $indent, $offset, $is_leading, $exists ); 11800} 11801 11802sub lookup_opening_indentation { 11803 11804 # get the indentation of the line in the current output batch 11805 # which output a selected opening token 11806 # 11807 # given: 11808 # $i_opening - index of an opening token in the current output batch 11809 # whose line indentation we need 11810 # $ri_first - reference to list of the first index $i for each output 11811 # line in this batch 11812 # $ri_last - reference to list of the last index $i for each output line 11813 # in this batch 11814 # $rindentation_list - reference to a list containing the indentation 11815 # used for each line. (NOTE: the first slot in 11816 # this list is the last returned line number, and this is 11817 # followed by the list of indentations). 11818 # 11819 # return 11820 # -the indentation of the line which contained token $i_opening 11821 # -and its offset (number of columns) from the start of the line 11822 11823 my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_; 11824 11825 my $nline = $rindentation_list->[0]; # line number of previous lookup 11826 11827 # reset line location if necessary 11828 $nline = 0 if ( $i_opening < $ri_start->[$nline] ); 11829 11830 # find the correct line 11831 unless ( $i_opening > $ri_last->[-1] ) { 11832 while ( $i_opening > $ri_last->[$nline] ) { $nline++; } 11833 } 11834 11835 # error - token index is out of bounds - shouldn't happen 11836 else { 11837 warning( 11838"non-fatal program bug in lookup_opening_indentation - index out of range\n" 11839 ); 11840 report_definite_bug(); 11841 $nline = $#{$ri_last}; 11842 } 11843 11844 $rindentation_list->[0] = 11845 $nline; # save line number to start looking next call 11846 my $ibeg = $ri_start->[$nline]; 11847 my $offset = token_sequence_length( $ibeg, $i_opening ) - 1; 11848 my $is_leading = ( $ibeg == $i_opening ); 11849 return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading ); 11850} 11851 11852{ 11853 my %is_if_elsif_else_unless_while_until_for_foreach; 11854 11855 BEGIN { 11856 11857 # These block types may have text between the keyword and opening 11858 # curly. Note: 'else' does not, but must be included to allow trailing 11859 # if/elsif text to be appended. 11860 # patch for SWITCH/CASE: added 'case' and 'when' 11861 @_ = qw(if elsif else unless while until for foreach case when); 11862 @is_if_elsif_else_unless_while_until_for_foreach{@_} = (1) x scalar(@_); 11863 } 11864 11865 sub set_adjusted_indentation { 11866 11867 # This routine has the final say regarding the actual indentation of 11868 # a line. It starts with the basic indentation which has been 11869 # defined for the leading token, and then takes into account any 11870 # options that the user has set regarding special indenting and 11871 # outdenting. 11872 11873 my ( $ibeg, $iend, $rfields, $rpatterns, $ri_first, $ri_last, 11874 $rindentation_list ) 11875 = @_; 11876 11877 # we need to know the last token of this line 11878 my ( $terminal_type, $i_terminal ) = 11879 terminal_type( \@types_to_go, \@block_type_to_go, $ibeg, $iend ); 11880 11881 my $is_outdented_line = 0; 11882 11883 my $is_semicolon_terminated = $terminal_type eq ';' 11884 && $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg]; 11885 11886 ########################################################## 11887 # Section 1: set a flag and a default indentation 11888 # 11889 # Most lines are indented according to the initial token. 11890 # But it is common to outdent to the level just after the 11891 # terminal token in certain cases... 11892 # adjust_indentation flag: 11893 # 0 - do not adjust 11894 # 1 - outdent 11895 # 2 - vertically align with opening token 11896 # 3 - indent 11897 ########################################################## 11898 my $adjust_indentation = 0; 11899 my $default_adjust_indentation = $adjust_indentation; 11900 11901 my ( 11902 $opening_indentation, $opening_offset, 11903 $is_leading, $opening_exists 11904 ); 11905 11906 # if we are at a closing token of some type.. 11907 if ( $types_to_go[$ibeg] =~ /^[\)\}\]]$/ ) { 11908 11909 # get the indentation of the line containing the corresponding 11910 # opening token 11911 ( 11912 $opening_indentation, $opening_offset, 11913 $is_leading, $opening_exists 11914 ) 11915 = get_opening_indentation( $ibeg, $ri_first, $ri_last, 11916 $rindentation_list ); 11917 11918 # First set the default behavior: 11919 # default behavior is to outdent closing lines 11920 # of the form: "); }; ]; )->xxx;" 11921 if ( 11922 $is_semicolon_terminated 11923 11924 # and 'cuddled parens' of the form: ")->pack(" 11925 || ( 11926 $terminal_type eq '(' 11927 && $types_to_go[$ibeg] eq ')' 11928 && ( $nesting_depth_to_go[$iend] + 1 == 11929 $nesting_depth_to_go[$ibeg] ) 11930 ) 11931 ) 11932 { 11933 $adjust_indentation = 1; 11934 } 11935 11936 # TESTING: outdent something like '),' 11937 if ( 11938 $terminal_type eq ',' 11939 11940 # allow just one character before the comma 11941 && $i_terminal == $ibeg + 1 11942 11943 # requre LIST environment; otherwise, we may outdent too much -- 11944 # this can happen in calls without parentheses (overload.t); 11945 && $container_environment_to_go[$i_terminal] eq 'LIST' 11946 ) 11947 { 11948 $adjust_indentation = 1; 11949 } 11950 11951 # undo continuation indentation of a terminal closing token if 11952 # it is the last token before a level decrease. This will allow 11953 # a closing token to line up with its opening counterpart, and 11954 # avoids a indentation jump larger than 1 level. 11955 if ( $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/ 11956 && $i_terminal == $ibeg ) 11957 { 11958 my $ci = $ci_levels_to_go[$ibeg]; 11959 my $lev = $levels_to_go[$ibeg]; 11960 my $next_type = $types_to_go[ $ibeg + 1 ]; 11961 my $i_next_nonblank = 11962 ( ( $next_type eq 'b' ) ? $ibeg + 2 : $ibeg + 1 ); 11963 if ( $i_next_nonblank <= $max_index_to_go 11964 && $levels_to_go[$i_next_nonblank] < $lev ) 11965 { 11966 $adjust_indentation = 1; 11967 } 11968 } 11969 11970 # YVES patch 1 of 2: 11971 # Undo ci of line with leading closing eval brace, 11972 # but not beyond the indention of the line with 11973 # the opening brace. 11974 if ( $block_type_to_go[$ibeg] eq 'eval' 11975 && !$rOpts->{'line-up-parentheses'} 11976 && !$rOpts->{'indent-closing-brace'} ) 11977 { 11978 ( 11979 $opening_indentation, $opening_offset, 11980 $is_leading, $opening_exists 11981 ) 11982 = get_opening_indentation( $ibeg, $ri_first, $ri_last, 11983 $rindentation_list ); 11984 my $indentation = $leading_spaces_to_go[$ibeg]; 11985 if ( defined($opening_indentation) 11986 && $indentation > $opening_indentation ) 11987 { 11988 $adjust_indentation = 1; 11989 } 11990 } 11991 11992 $default_adjust_indentation = $adjust_indentation; 11993 11994 # Now modify default behavior according to user request: 11995 # handle option to indent non-blocks of the form ); }; ]; 11996 # But don't do special indentation to something like ')->pack(' 11997 if ( !$block_type_to_go[$ibeg] ) { 11998 my $cti = $closing_token_indentation{ $tokens_to_go[$ibeg] }; 11999 if ( $cti == 1 ) { 12000 if ( $i_terminal <= $ibeg + 1 12001 || $is_semicolon_terminated ) 12002 { 12003 $adjust_indentation = 2; 12004 } 12005 else { 12006 $adjust_indentation = 0; 12007 } 12008 } 12009 elsif ( $cti == 2 ) { 12010 if ($is_semicolon_terminated) { 12011 $adjust_indentation = 3; 12012 } 12013 else { 12014 $adjust_indentation = 0; 12015 } 12016 } 12017 elsif ( $cti == 3 ) { 12018 $adjust_indentation = 3; 12019 } 12020 } 12021 12022 # handle option to indent blocks 12023 else { 12024 if ( 12025 $rOpts->{'indent-closing-brace'} 12026 && ( 12027 $i_terminal == $ibeg # isolated terminal '}' 12028 || $is_semicolon_terminated 12029 ) 12030 ) # } xxxx ; 12031 { 12032 $adjust_indentation = 3; 12033 } 12034 } 12035 } 12036 12037 # if at ');', '};', '>;', and '];' of a terminal qw quote 12038 elsif ($$rpatterns[0] =~ /^qb*;$/ 12039 && $$rfields[0] =~ /^([\)\}\]\>]);$/ ) 12040 { 12041 if ( $closing_token_indentation{$1} == 0 ) { 12042 $adjust_indentation = 1; 12043 } 12044 else { 12045 $adjust_indentation = 3; 12046 } 12047 } 12048 12049 # if line begins with a ':', align it with any 12050 # previous line leading with corresponding ? 12051 elsif ( $types_to_go[$ibeg] eq ':' ) { 12052 ( 12053 $opening_indentation, $opening_offset, 12054 $is_leading, $opening_exists 12055 ) 12056 = get_opening_indentation( $ibeg, $ri_first, $ri_last, 12057 $rindentation_list ); 12058 if ($is_leading) { $adjust_indentation = 2; } 12059 } 12060 12061 ########################################################## 12062 # Section 2: set indentation according to flag set above 12063 # 12064 # Select the indentation object to define leading 12065 # whitespace. If we are outdenting something like '} } );' 12066 # then we want to use one level below the last token 12067 # ($i_terminal) in order to get it to fully outdent through 12068 # all levels. 12069 ########################################################## 12070 my $indentation; 12071 my $lev; 12072 my $level_end = $levels_to_go[$iend]; 12073 12074 if ( $adjust_indentation == 0 ) { 12075 $indentation = $leading_spaces_to_go[$ibeg]; 12076 $lev = $levels_to_go[$ibeg]; 12077 } 12078 elsif ( $adjust_indentation == 1 ) { 12079 $indentation = $reduced_spaces_to_go[$i_terminal]; 12080 $lev = $levels_to_go[$i_terminal]; 12081 } 12082 12083 # handle indented closing token which aligns with opening token 12084 elsif ( $adjust_indentation == 2 ) { 12085 12086 # handle option to align closing token with opening token 12087 $lev = $levels_to_go[$ibeg]; 12088 12089 # calculate spaces needed to align with opening token 12090 my $space_count = 12091 get_SPACES($opening_indentation) + $opening_offset; 12092 12093 # Indent less than the previous line. 12094 # 12095 # Problem: For -lp we don't exactly know what it was if there 12096 # were recoverable spaces sent to the aligner. A good solution 12097 # would be to force a flush of the vertical alignment buffer, so 12098 # that we would know. For now, this rule is used for -lp: 12099 # 12100 # When the last line did not start with a closing token we will 12101 # be optimistic that the aligner will recover everything wanted. 12102 # 12103 # This rule will prevent us from breaking a hierarchy of closing 12104 # tokens, and in a worst case will leave a closing paren too far 12105 # indented, but this is better than frequently leaving it not 12106 # indented enough. 12107 my $last_spaces = get_SPACES($last_indentation_written); 12108 if ( $last_leading_token !~ /^[\}\]\)]$/ ) { 12109 $last_spaces += 12110 get_RECOVERABLE_SPACES($last_indentation_written); 12111 } 12112 12113 # reset the indentation to the new space count if it works 12114 # only options are all or none: nothing in-between looks good 12115 $lev = $levels_to_go[$ibeg]; 12116 if ( $space_count < $last_spaces ) { 12117 if ($rOpts_line_up_parentheses) { 12118 my $lev = $levels_to_go[$ibeg]; 12119 $indentation = 12120 new_lp_indentation_item( $space_count, $lev, 0, 0, 0 ); 12121 } 12122 else { 12123 $indentation = $space_count; 12124 } 12125 } 12126 12127 # revert to default if it doesnt work 12128 else { 12129 $space_count = leading_spaces_to_go($ibeg); 12130 if ( $default_adjust_indentation == 0 ) { 12131 $indentation = $leading_spaces_to_go[$ibeg]; 12132 } 12133 elsif ( $default_adjust_indentation == 1 ) { 12134 $indentation = $reduced_spaces_to_go[$i_terminal]; 12135 $lev = $levels_to_go[$i_terminal]; 12136 } 12137 } 12138 } 12139 12140 # Full indentaion of closing tokens (-icb and -icp or -cti=2) 12141 else { 12142 12143 # handle -icb (indented closing code block braces) 12144 # Updated method for indented block braces: indent one full level if 12145 # there is no continuation indentation. This will occur for major 12146 # structures such as sub, if, else, but not for things like map 12147 # blocks. 12148 # 12149 # Note: only code blocks without continuation indentation are 12150 # handled here (if, else, unless, ..). In the following snippet, 12151 # the terminal brace of the sort block will have continuation 12152 # indentation as shown so it will not be handled by the coding 12153 # here. We would have to undo the continuation indentation to do 12154 # this, but it probably looks ok as is. This is a possible future 12155 # update for semicolon terminated lines. 12156 # 12157 # if ($sortby eq 'date' or $sortby eq 'size') { 12158 # @files = sort { 12159 # $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby} 12160 # or $a cmp $b 12161 # } @files; 12162 # } 12163 # 12164 if ( $block_type_to_go[$ibeg] 12165 && $ci_levels_to_go[$i_terminal] == 0 ) 12166 { 12167 my $spaces = get_SPACES( $leading_spaces_to_go[$i_terminal] ); 12168 $indentation = $spaces + $rOpts_indent_columns; 12169 12170 # NOTE: for -lp we could create a new indentation object, but 12171 # there is probably no need to do it 12172 } 12173 12174 # handle -icp and any -icb block braces which fall through above 12175 # test such as the 'sort' block mentioned above. 12176 else { 12177 12178 # There are currently two ways to handle -icp... 12179 # One way is to use the indentation of the previous line: 12180 # $indentation = $last_indentation_written; 12181 12182 # The other way is to use the indentation that the previous line 12183 # would have had if it hadn't been adjusted: 12184 $indentation = $last_unadjusted_indentation; 12185 12186 # Current method: use the minimum of the two. This avoids 12187 # inconsistent indentation. 12188 if ( get_SPACES($last_indentation_written) < 12189 get_SPACES($indentation) ) 12190 { 12191 $indentation = $last_indentation_written; 12192 } 12193 } 12194 12195 # use previous indentation but use own level 12196 # to cause list to be flushed properly 12197 $lev = $levels_to_go[$ibeg]; 12198 } 12199 12200 # remember indentation except for multi-line quotes, which get 12201 # no indentation 12202 unless ( $ibeg == 0 && $starting_in_quote ) { 12203 $last_indentation_written = $indentation; 12204 $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg]; 12205 $last_leading_token = $tokens_to_go[$ibeg]; 12206 } 12207 12208 # be sure lines with leading closing tokens are not outdented more 12209 # than the line which contained the corresponding opening token. 12210 12211 ############################################################# 12212 # updated per bug report in alex_bug.pl: we must not 12213 # mess with the indentation of closing logical braces so 12214 # we must treat something like '} else {' as if it were 12215 # an isolated brace my $is_isolated_block_brace = ( 12216 # $iend == $ibeg ) && $block_type_to_go[$ibeg]; 12217 ############################################################# 12218 my $is_isolated_block_brace = $block_type_to_go[$ibeg] 12219 && ( $iend == $ibeg 12220 || $is_if_elsif_else_unless_while_until_for_foreach{ 12221 $block_type_to_go[$ibeg] } ); 12222 12223 # only do this for a ':; which is aligned with its leading '?' 12224 my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading; 12225 if ( defined($opening_indentation) 12226 && !$is_isolated_block_brace 12227 && !$is_unaligned_colon ) 12228 { 12229 if ( get_SPACES($opening_indentation) > get_SPACES($indentation) ) { 12230 $indentation = $opening_indentation; 12231 } 12232 } 12233 12234 # remember the indentation of each line of this batch 12235 push @{$rindentation_list}, $indentation; 12236 12237 # outdent lines with certain leading tokens... 12238 if ( 12239 12240 # must be first word of this batch 12241 $ibeg == 0 12242 12243 # and ... 12244 && ( 12245 12246 # certain leading keywords if requested 12247 ( 12248 $rOpts->{'outdent-keywords'} 12249 && $types_to_go[$ibeg] eq 'k' 12250 && $outdent_keyword{ $tokens_to_go[$ibeg] } 12251 ) 12252 12253 # or labels if requested 12254 || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' ) 12255 12256 # or static block comments if requested 12257 || ( $types_to_go[$ibeg] eq '#' 12258 && $rOpts->{'outdent-static-block-comments'} 12259 && $is_static_block_comment ) 12260 ) 12261 ) 12262 12263 { 12264 my $space_count = leading_spaces_to_go($ibeg); 12265 if ( $space_count > 0 ) { 12266 $space_count -= $rOpts_continuation_indentation; 12267 $is_outdented_line = 1; 12268 if ( $space_count < 0 ) { $space_count = 0 } 12269 12270 # do not promote a spaced static block comment to non-spaced; 12271 # this is not normally necessary but could be for some 12272 # unusual user inputs (such as -ci = -i) 12273 if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) { 12274 $space_count = 1; 12275 } 12276 12277 if ($rOpts_line_up_parentheses) { 12278 $indentation = 12279 new_lp_indentation_item( $space_count, $lev, 0, 0, 0 ); 12280 } 12281 else { 12282 $indentation = $space_count; 12283 } 12284 } 12285 } 12286 12287 return ( $indentation, $lev, $level_end, $terminal_type, 12288 $is_semicolon_terminated, $is_outdented_line ); 12289 } 12290} 12291 12292sub set_vertical_tightness_flags { 12293 12294 my ( $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ) = @_; 12295 12296 # Define vertical tightness controls for the nth line of a batch. 12297 # We create an array of parameters which tell the vertical aligner 12298 # if we should combine this line with the next line to achieve the 12299 # desired vertical tightness. The array of parameters contains: 12300 # 12301 # [0] type: 1=is opening tok 2=is closing tok 3=is opening block brace 12302 # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok 12303 # if closing: spaces of padding to use 12304 # [2] sequence number of container 12305 # [3] valid flag: do not append if this flag is false. Will be 12306 # true if appropriate -vt flag is set. Otherwise, Will be 12307 # made true only for 2 line container in parens with -lp 12308 # 12309 # These flags are used by sub set_leading_whitespace in 12310 # the vertical aligner 12311 12312 my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ]; 12313 12314 # For non-BLOCK tokens, we will need to examine the next line 12315 # too, so we won't consider the last line. 12316 if ( $n < $n_last_line ) { 12317 12318 # see if last token is an opening token...not a BLOCK... 12319 my $ibeg_next = $$ri_first[ $n + 1 ]; 12320 my $token_end = $tokens_to_go[$iend]; 12321 my $iend_next = $$ri_last[ $n + 1 ]; 12322 if ( 12323 $type_sequence_to_go[$iend] 12324 && !$block_type_to_go[$iend] 12325 && $is_opening_token{$token_end} 12326 && ( 12327 $opening_vertical_tightness{$token_end} > 0 12328 12329 # allow 2-line method call to be closed up 12330 || ( $rOpts_line_up_parentheses 12331 && $token_end eq '(' 12332 && $iend > $ibeg 12333 && $types_to_go[ $iend - 1 ] ne 'b' ) 12334 ) 12335 ) 12336 { 12337 12338 # avoid multiple jumps in nesting depth in one line if 12339 # requested 12340 my $ovt = $opening_vertical_tightness{$token_end}; 12341 my $iend_next = $$ri_last[ $n + 1 ]; 12342 unless ( 12343 $ovt < 2 12344 && ( $nesting_depth_to_go[ $iend_next + 1 ] != 12345 $nesting_depth_to_go[$ibeg_next] ) 12346 ) 12347 { 12348 12349 # If -vt flag has not been set, mark this as invalid 12350 # and aligner will validate it if it sees the closing paren 12351 # within 2 lines. 12352 my $valid_flag = $ovt; 12353 @{$rvertical_tightness_flags} = 12354 ( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag ); 12355 } 12356 } 12357 12358 # see if first token of next line is a closing token... 12359 # ..and be sure this line does not have a side comment 12360 my $token_next = $tokens_to_go[$ibeg_next]; 12361 if ( $type_sequence_to_go[$ibeg_next] 12362 && !$block_type_to_go[$ibeg_next] 12363 && $is_closing_token{$token_next} 12364 && $types_to_go[$iend] !~ '#' ) # for safety, shouldn't happen! 12365 { 12366 my $ovt = $opening_vertical_tightness{$token_next}; 12367 my $cvt = $closing_vertical_tightness{$token_next}; 12368 if ( 12369 12370 # never append a trailing line like )->pack( 12371 # because it will throw off later alignment 12372 ( 12373 $nesting_depth_to_go[$ibeg_next] == 12374 $nesting_depth_to_go[ $iend_next + 1 ] + 1 12375 ) 12376 && ( 12377 $cvt == 2 12378 || ( 12379 $container_environment_to_go[$ibeg_next] ne 'LIST' 12380 && ( 12381 $cvt == 1 12382 12383 # allow closing up 2-line method calls 12384 || ( $rOpts_line_up_parentheses 12385 && $token_next eq ')' ) 12386 ) 12387 ) 12388 ) 12389 ) 12390 { 12391 12392 # decide which trailing closing tokens to append.. 12393 my $ok = 0; 12394 if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 } 12395 else { 12396 my $str = join( '', 12397 @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] ); 12398 12399 # append closing token if followed by comment or ';' 12400 if ( $str =~ /^b?[#;]/ ) { $ok = 1 } 12401 } 12402 12403 if ($ok) { 12404 my $valid_flag = $cvt; 12405 @{$rvertical_tightness_flags} = ( 12406 2, 12407 $tightness{$token_next} == 2 ? 0 : 1, 12408 $type_sequence_to_go[$ibeg_next], $valid_flag, 12409 ); 12410 } 12411 } 12412 } 12413 12414 # Opening Token Right 12415 # If requested, move an isolated trailing opening token to the end of 12416 # the previous line which ended in a comma. We could do this 12417 # in sub recombine_breakpoints but that would cause problems 12418 # with -lp formatting. The problem is that indentation will 12419 # quickly move far to the right in nested expressions. By 12420 # doing it after indentation has been set, we avoid changes 12421 # to the indentation. Actual movement of the token takes place 12422 # in sub write_leader_and_string. 12423 if ( 12424 $opening_token_right{ $tokens_to_go[$ibeg_next] } 12425 12426 # previous line is not opening 12427 # (use -sot to combine with it) 12428 && !$is_opening_token{$token_end} 12429 12430 # previous line ended in one of these 12431 # (add other cases if necessary; '=>' and '.' are not necessary 12432 ##&& ($is_opening_token{$token_end} || $token_end eq ',') 12433 && !$block_type_to_go[$ibeg_next] 12434 12435 # this is a line with just an opening token 12436 && ( $iend_next == $ibeg_next 12437 || $iend_next == $ibeg_next + 2 12438 && $types_to_go[$iend_next] eq '#' ) 12439 12440 # looks bad if we align vertically with the wrong container 12441 && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next] 12442 ) 12443 { 12444 my $valid_flag = 1; 12445 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0; 12446 @{$rvertical_tightness_flags} = 12447 ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, ); 12448 } 12449 12450 # Stacking of opening and closing tokens 12451 my $stackable; 12452 my $token_beg_next = $tokens_to_go[$ibeg_next]; 12453 12454 # patch to make something like 'qw(' behave like an opening paren 12455 # (aran.t) 12456 if ( $types_to_go[$ibeg_next] eq 'q' ) { 12457 if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) { 12458 $token_beg_next = $1; 12459 } 12460 } 12461 12462 if ( $is_closing_token{$token_end} 12463 && $is_closing_token{$token_beg_next} ) 12464 { 12465 $stackable = $stack_closing_token{$token_beg_next} 12466 unless ( $block_type_to_go[$ibeg_next] ) 12467 ; # shouldn't happen; just checking 12468 } 12469 elsif ($is_opening_token{$token_end} 12470 && $is_opening_token{$token_beg_next} ) 12471 { 12472 $stackable = $stack_opening_token{$token_beg_next} 12473 unless ( $block_type_to_go[$ibeg_next] ) 12474 ; # shouldn't happen; just checking 12475 } 12476 12477 if ($stackable) { 12478 12479 my $is_semicolon_terminated; 12480 if ( $n + 1 == $n_last_line ) { 12481 my ( $terminal_type, $i_terminal ) = terminal_type( 12482 \@types_to_go, \@block_type_to_go, 12483 $ibeg_next, $iend_next 12484 ); 12485 $is_semicolon_terminated = $terminal_type eq ';' 12486 && $nesting_depth_to_go[$iend_next] < 12487 $nesting_depth_to_go[$ibeg_next]; 12488 } 12489 12490 # this must be a line with just an opening token 12491 # or end in a semicolon 12492 if ( 12493 $is_semicolon_terminated 12494 || ( $iend_next == $ibeg_next 12495 || $iend_next == $ibeg_next + 2 12496 && $types_to_go[$iend_next] eq '#' ) 12497 ) 12498 { 12499 my $valid_flag = 1; 12500 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0; 12501 @{$rvertical_tightness_flags} = 12502 ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, 12503 ); 12504 } 12505 } 12506 } 12507 12508 # Check for a last line with isolated opening BLOCK curly 12509 elsif ($rOpts_block_brace_vertical_tightness 12510 && $ibeg eq $iend 12511 && $types_to_go[$iend] eq '{' 12512 && $block_type_to_go[$iend] =~ 12513 /$block_brace_vertical_tightness_pattern/o ) 12514 { 12515 @{$rvertical_tightness_flags} = 12516 ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 ); 12517 } 12518 12519 # pack in the sequence numbers of the ends of this line 12520 $rvertical_tightness_flags->[4] = get_seqno($ibeg); 12521 $rvertical_tightness_flags->[5] = get_seqno($iend); 12522 return $rvertical_tightness_flags; 12523} 12524 12525sub get_seqno { 12526 12527 # get opening and closing sequence numbers of a token for the vertical 12528 # aligner. Assign qw quotes a value to allow qw opening and closing tokens 12529 # to be treated somewhat like opening and closing tokens for stacking 12530 # tokens by the vertical aligner. 12531 my ($ii) = @_; 12532 my $seqno = $type_sequence_to_go[$ii]; 12533 if ( $types_to_go[$ii] eq 'q' ) { 12534 my $SEQ_QW = -1; 12535 if ( $ii > 0 ) { 12536 $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /^qw\s*[\(\{\[]/ ); 12537 } 12538 else { 12539 if ( !$ending_in_quote ) { 12540 $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /[\)\}\]]$/ ); 12541 } 12542 } 12543 } 12544 return ($seqno); 12545} 12546 12547{ 12548 my %is_vertical_alignment_type; 12549 my %is_vertical_alignment_keyword; 12550 12551 BEGIN { 12552 12553 @_ = qw# 12554 = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x= 12555 { ? : => =~ && || // ~~ !~~ 12556 #; 12557 @is_vertical_alignment_type{@_} = (1) x scalar(@_); 12558 12559 @_ = qw(if unless and or err eq ne for foreach while until); 12560 @is_vertical_alignment_keyword{@_} = (1) x scalar(@_); 12561 } 12562 12563 sub set_vertical_alignment_markers { 12564 12565 # This routine takes the first step toward vertical alignment of the 12566 # lines of output text. It looks for certain tokens which can serve as 12567 # vertical alignment markers (such as an '='). 12568 # 12569 # Method: We look at each token $i in this output batch and set 12570 # $matching_token_to_go[$i] equal to those tokens at which we would 12571 # accept vertical alignment. 12572 12573 # nothing to do if we aren't allowed to change whitespace 12574 if ( !$rOpts_add_whitespace ) { 12575 for my $i ( 0 .. $max_index_to_go ) { 12576 $matching_token_to_go[$i] = ''; 12577 } 12578 return; 12579 } 12580 12581 my ( $ri_first, $ri_last ) = @_; 12582 12583 # remember the index of last nonblank token before any sidecomment 12584 my $i_terminal = $max_index_to_go; 12585 if ( $types_to_go[$i_terminal] eq '#' ) { 12586 if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) { 12587 if ( $i_terminal > 0 ) { --$i_terminal } 12588 } 12589 } 12590 12591 # look at each line of this batch.. 12592 my $last_vertical_alignment_before_index; 12593 my $vert_last_nonblank_type; 12594 my $vert_last_nonblank_token; 12595 my $vert_last_nonblank_block_type; 12596 my $max_line = @$ri_first - 1; 12597 my ( $i, $type, $token, $block_type, $alignment_type ); 12598 my ( $ibeg, $iend, $line ); 12599 12600 foreach $line ( 0 .. $max_line ) { 12601 $ibeg = $$ri_first[$line]; 12602 $iend = $$ri_last[$line]; 12603 $last_vertical_alignment_before_index = -1; 12604 $vert_last_nonblank_type = ''; 12605 $vert_last_nonblank_token = ''; 12606 $vert_last_nonblank_block_type = ''; 12607 12608 # look at each token in this output line.. 12609 foreach $i ( $ibeg .. $iend ) { 12610 $alignment_type = ''; 12611 $type = $types_to_go[$i]; 12612 $block_type = $block_type_to_go[$i]; 12613 $token = $tokens_to_go[$i]; 12614 12615 # check for flag indicating that we should not align 12616 # this token 12617 if ( $matching_token_to_go[$i] ) { 12618 $matching_token_to_go[$i] = ''; 12619 next; 12620 } 12621 12622 #-------------------------------------------------------- 12623 # First see if we want to align BEFORE this token 12624 #-------------------------------------------------------- 12625 12626 # The first possible token that we can align before 12627 # is index 2 because: 1) it doesn't normally make sense to 12628 # align before the first token and 2) the second 12629 # token must be a blank if we are to align before 12630 # the third 12631 if ( $i < $ibeg + 2 ) { } 12632 12633 # must follow a blank token 12634 elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { } 12635 12636 # align a side comment -- 12637 elsif ( $type eq '#' ) { 12638 12639 unless ( 12640 12641 # it is a static side comment 12642 ( 12643 $rOpts->{'static-side-comments'} 12644 && $token =~ /$static_side_comment_pattern/o 12645 ) 12646 12647 # or a closing side comment 12648 || ( $vert_last_nonblank_block_type 12649 && $token =~ 12650 /$closing_side_comment_prefix_pattern/o ) 12651 ) 12652 { 12653 $alignment_type = $type; 12654 } ## Example of a static side comment 12655 } 12656 12657 # otherwise, do not align two in a row to create a 12658 # blank field 12659 elsif ( $last_vertical_alignment_before_index == $i - 2 ) { } 12660 12661 # align before one of these keywords 12662 # (within a line, since $i>1) 12663 elsif ( $type eq 'k' ) { 12664 12665 # /^(if|unless|and|or|eq|ne)$/ 12666 if ( $is_vertical_alignment_keyword{$token} ) { 12667 $alignment_type = $token; 12668 } 12669 } 12670 12671 # align before one of these types.. 12672 # Note: add '.' after new vertical aligner is operational 12673 elsif ( $is_vertical_alignment_type{$type} ) { 12674 $alignment_type = $token; 12675 12676 # Do not align a terminal token. Although it might 12677 # occasionally look ok to do this, it has been found to be 12678 # a good general rule. The main problems are: 12679 # (1) that the terminal token (such as an = or :) might get 12680 # moved far to the right where it is hard to see because 12681 # nothing follows it, and 12682 # (2) doing so may prevent other good alignments. 12683 if ( $i == $iend || $i >= $i_terminal ) { 12684 $alignment_type = ""; 12685 } 12686 12687 # Do not align leading ': (' or '. ('. This would prevent 12688 # alignment in something like the following: 12689 # $extra_space .= 12690 # ( $input_line_number < 10 ) ? " " 12691 # : ( $input_line_number < 100 ) ? " " 12692 # : ""; 12693 # or 12694 # $code = 12695 # ( $case_matters ? $accessor : " lc($accessor) " ) 12696 # . ( $yesno ? " eq " : " ne " ) 12697 if ( $i == $ibeg + 2 12698 && $types_to_go[$ibeg] =~ /^[\.\:]$/ 12699 && $types_to_go[ $i - 1 ] eq 'b' ) 12700 { 12701 $alignment_type = ""; 12702 } 12703 12704 # For a paren after keyword, only align something like this: 12705 # if ( $a ) { &a } 12706 # elsif ( $b ) { &b } 12707 if ( $token eq '(' && $vert_last_nonblank_type eq 'k' ) { 12708 $alignment_type = "" 12709 unless $vert_last_nonblank_token =~ 12710 /^(if|unless|elsif)$/; 12711 } 12712 12713 # be sure the alignment tokens are unique 12714 # This didn't work well: reason not determined 12715 # if ($token ne $type) {$alignment_type .= $type} 12716 } 12717 12718 # NOTE: This is deactivated because it causes the previous 12719 # if/elsif alignment to fail 12720 #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i]) 12721 #{ $alignment_type = $type; } 12722 12723 if ($alignment_type) { 12724 $last_vertical_alignment_before_index = $i; 12725 } 12726 12727 #-------------------------------------------------------- 12728 # Next see if we want to align AFTER the previous nonblank 12729 #-------------------------------------------------------- 12730 12731 # We want to line up ',' and interior ';' tokens, with the added 12732 # space AFTER these tokens. (Note: interior ';' is included 12733 # because it may occur in short blocks). 12734 if ( 12735 12736 # we haven't already set it 12737 !$alignment_type 12738 12739 # and its not the first token of the line 12740 && ( $i > $ibeg ) 12741 12742 # and it follows a blank 12743 && $types_to_go[ $i - 1 ] eq 'b' 12744 12745 # and previous token IS one of these: 12746 && ( $vert_last_nonblank_type =~ /^[\,\;]$/ ) 12747 12748 # and it's NOT one of these 12749 && ( $type !~ /^[b\#\)\]\}]$/ ) 12750 12751 # then go ahead and align 12752 ) 12753 12754 { 12755 $alignment_type = $vert_last_nonblank_type; 12756 } 12757 12758 #-------------------------------------------------------- 12759 # then store the value 12760 #-------------------------------------------------------- 12761 $matching_token_to_go[$i] = $alignment_type; 12762 if ( $type ne 'b' ) { 12763 $vert_last_nonblank_type = $type; 12764 $vert_last_nonblank_token = $token; 12765 $vert_last_nonblank_block_type = $block_type; 12766 } 12767 } 12768 } 12769 } 12770} 12771 12772sub terminal_type { 12773 12774 # returns type of last token on this line (terminal token), as follows: 12775 # returns # for a full-line comment 12776 # returns ' ' for a blank line 12777 # otherwise returns final token type 12778 12779 my ( $rtype, $rblock_type, $ibeg, $iend ) = @_; 12780 12781 # check for full-line comment.. 12782 if ( $$rtype[$ibeg] eq '#' ) { 12783 return wantarray ? ( $$rtype[$ibeg], $ibeg ) : $$rtype[$ibeg]; 12784 } 12785 else { 12786 12787 # start at end and walk bakwards.. 12788 for ( my $i = $iend ; $i >= $ibeg ; $i-- ) { 12789 12790 # skip past any side comment and blanks 12791 next if ( $$rtype[$i] eq 'b' ); 12792 next if ( $$rtype[$i] eq '#' ); 12793 12794 # found it..make sure it is a BLOCK termination, 12795 # but hide a terminal } after sort/grep/map because it is not 12796 # necessarily the end of the line. (terminal.t) 12797 my $terminal_type = $$rtype[$i]; 12798 if ( 12799 $terminal_type eq '}' 12800 && ( !$$rblock_type[$i] 12801 || ( $is_sort_map_grep_eval_do{ $$rblock_type[$i] } ) ) 12802 ) 12803 { 12804 $terminal_type = 'b'; 12805 } 12806 return wantarray ? ( $terminal_type, $i ) : $terminal_type; 12807 } 12808 12809 # empty line 12810 return wantarray ? ( ' ', $ibeg ) : ' '; 12811 } 12812} 12813 12814{ 12815 my %is_good_keyword_breakpoint; 12816 my %is_lt_gt_le_ge; 12817 12818 sub set_bond_strengths { 12819 12820 BEGIN { 12821 12822 @_ = qw(if unless while until for foreach); 12823 @is_good_keyword_breakpoint{@_} = (1) x scalar(@_); 12824 12825 @_ = qw(lt gt le ge); 12826 @is_lt_gt_le_ge{@_} = (1) x scalar(@_); 12827 12828 ############################################################### 12829 # NOTE: NO_BREAK's set here are HINTS which may not be honored; 12830 # essential NO_BREAKS's must be enforced in section 2, below. 12831 ############################################################### 12832 12833 # adding NEW_TOKENS: add a left and right bond strength by 12834 # mimmicking what is done for an existing token type. You 12835 # can skip this step at first and take the default, then 12836 # tweak later to get desired results. 12837 12838 # The bond strengths should roughly follow precenence order where 12839 # possible. If you make changes, please check the results very 12840 # carefully on a variety of scripts. 12841 12842 # no break around possible filehandle 12843 $left_bond_strength{'Z'} = NO_BREAK; 12844 $right_bond_strength{'Z'} = NO_BREAK; 12845 12846 # never put a bare word on a new line: 12847 # example print (STDERR, "bla"); will fail with break after ( 12848 $left_bond_strength{'w'} = NO_BREAK; 12849 12850 # blanks always have infinite strength to force breaks after real tokens 12851 $right_bond_strength{'b'} = NO_BREAK; 12852 12853 # try not to break on exponentation 12854 @_ = qw" ** .. ... <=> "; 12855 @left_bond_strength{@_} = (STRONG) x scalar(@_); 12856 @right_bond_strength{@_} = (STRONG) x scalar(@_); 12857 12858 # The comma-arrow has very low precedence but not a good break point 12859 $left_bond_strength{'=>'} = NO_BREAK; 12860 $right_bond_strength{'=>'} = NOMINAL; 12861 12862 # ok to break after label 12863 $left_bond_strength{'J'} = NO_BREAK; 12864 $right_bond_strength{'J'} = NOMINAL; 12865 $left_bond_strength{'j'} = STRONG; 12866 $right_bond_strength{'j'} = STRONG; 12867 $left_bond_strength{'A'} = STRONG; 12868 $right_bond_strength{'A'} = STRONG; 12869 12870 $left_bond_strength{'->'} = STRONG; 12871 $right_bond_strength{'->'} = VERY_STRONG; 12872 12873 # breaking AFTER modulus operator is ok: 12874 @_ = qw" % "; 12875 @left_bond_strength{@_} = (STRONG) x scalar(@_); 12876 @right_bond_strength{@_} = 12877 ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@_); 12878 12879 # Break AFTER math operators * and / 12880 @_ = qw" * / x "; 12881 @left_bond_strength{@_} = (STRONG) x scalar(@_); 12882 @right_bond_strength{@_} = (NOMINAL) x scalar(@_); 12883 12884 # Break AFTER weakest math operators + and - 12885 # Make them weaker than * but a bit stronger than '.' 12886 @_ = qw" + - "; 12887 @left_bond_strength{@_} = (STRONG) x scalar(@_); 12888 @right_bond_strength{@_} = 12889 ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@_); 12890 12891 # breaking BEFORE these is just ok: 12892 @_ = qw" >> << "; 12893 @right_bond_strength{@_} = (STRONG) x scalar(@_); 12894 @left_bond_strength{@_} = (NOMINAL) x scalar(@_); 12895 12896 # breaking before the string concatenation operator seems best 12897 # because it can be hard to see at the end of a line 12898 $right_bond_strength{'.'} = STRONG; 12899 $left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK; 12900 12901 @_ = qw"} ] ) "; 12902 @left_bond_strength{@_} = (STRONG) x scalar(@_); 12903 @right_bond_strength{@_} = (NOMINAL) x scalar(@_); 12904 12905 # make these a little weaker than nominal so that they get 12906 # favored for end-of-line characters 12907 @_ = qw"!= == =~ !~ ~~ !~~"; 12908 @left_bond_strength{@_} = (STRONG) x scalar(@_); 12909 @right_bond_strength{@_} = 12910 ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@_); 12911 12912 # break AFTER these 12913 @_ = qw" < > | & >= <="; 12914 @left_bond_strength{@_} = (VERY_STRONG) x scalar(@_); 12915 @right_bond_strength{@_} = 12916 ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@_); 12917 12918 # breaking either before or after a quote is ok 12919 # but bias for breaking before a quote 12920 $left_bond_strength{'Q'} = NOMINAL; 12921 $right_bond_strength{'Q'} = NOMINAL + 0.02; 12922 $left_bond_strength{'q'} = NOMINAL; 12923 $right_bond_strength{'q'} = NOMINAL; 12924 12925 # starting a line with a keyword is usually ok 12926 $left_bond_strength{'k'} = NOMINAL; 12927 12928 # we usually want to bond a keyword strongly to what immediately 12929 # follows, rather than leaving it stranded at the end of a line 12930 $right_bond_strength{'k'} = STRONG; 12931 12932 $left_bond_strength{'G'} = NOMINAL; 12933 $right_bond_strength{'G'} = STRONG; 12934 12935 # it is good to break AFTER various assignment operators 12936 @_ = qw( 12937 = **= += *= &= <<= &&= 12938 -= /= |= >>= ||= //= 12939 .= %= ^= 12940 x= 12941 ); 12942 @left_bond_strength{@_} = (STRONG) x scalar(@_); 12943 @right_bond_strength{@_} = 12944 ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@_); 12945 12946 # break BEFORE '&&' and '||' and '//' 12947 # set strength of '||' to same as '=' so that chains like 12948 # $a = $b || $c || $d will break before the first '||' 12949 $right_bond_strength{'||'} = NOMINAL; 12950 $left_bond_strength{'||'} = $right_bond_strength{'='}; 12951 12952 # same thing for '//' 12953 $right_bond_strength{'//'} = NOMINAL; 12954 $left_bond_strength{'//'} = $right_bond_strength{'='}; 12955 12956 # set strength of && a little higher than || 12957 $right_bond_strength{'&&'} = NOMINAL; 12958 $left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1; 12959 12960 $left_bond_strength{';'} = VERY_STRONG; 12961 $right_bond_strength{';'} = VERY_WEAK; 12962 $left_bond_strength{'f'} = VERY_STRONG; 12963 12964 # make right strength of for ';' a little less than '=' 12965 # to make for contents break after the ';' to avoid this: 12966 # for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j += 12967 # $number_of_fields ) 12968 # and make it weaker than ',' and 'and' too 12969 $right_bond_strength{'f'} = VERY_WEAK - 0.03; 12970 12971 # The strengths of ?/: should be somewhere between 12972 # an '=' and a quote (NOMINAL), 12973 # make strength of ':' slightly less than '?' to help 12974 # break long chains of ? : after the colons 12975 $left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL; 12976 $right_bond_strength{':'} = NO_BREAK; 12977 $left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01; 12978 $right_bond_strength{'?'} = NO_BREAK; 12979 12980 $left_bond_strength{','} = VERY_STRONG; 12981 $right_bond_strength{','} = VERY_WEAK; 12982 12983 # Set bond strengths of certain keywords 12984 # make 'or', 'err', 'and' slightly weaker than a ',' 12985 $left_bond_strength{'and'} = VERY_WEAK - 0.01; 12986 $left_bond_strength{'or'} = VERY_WEAK - 0.02; 12987 $left_bond_strength{'err'} = VERY_WEAK - 0.02; 12988 $left_bond_strength{'xor'} = NOMINAL; 12989 $right_bond_strength{'and'} = NOMINAL; 12990 $right_bond_strength{'or'} = NOMINAL; 12991 $right_bond_strength{'err'} = NOMINAL; 12992 $right_bond_strength{'xor'} = STRONG; 12993 } 12994 12995 # patch-its always ok to break at end of line 12996 $nobreak_to_go[$max_index_to_go] = 0; 12997 12998 # adding a small 'bias' to strengths is a simple way to make a line 12999 # break at the first of a sequence of identical terms. For example, 13000 # to force long string of conditional operators to break with 13001 # each line ending in a ':', we can add a small number to the bond 13002 # strength of each ':' 13003 my $colon_bias = 0; 13004 my $amp_bias = 0; 13005 my $bar_bias = 0; 13006 my $and_bias = 0; 13007 my $or_bias = 0; 13008 my $dot_bias = 0; 13009 my $f_bias = 0; 13010 my $code_bias = -.01; 13011 my $type = 'b'; 13012 my $token = ' '; 13013 my $last_type; 13014 my $last_nonblank_type = $type; 13015 my $last_nonblank_token = $token; 13016 my $delta_bias = 0.0001; 13017 my $list_str = $left_bond_strength{'?'}; 13018 13019 my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token, 13020 $next_nonblank_type, $next_token, $next_type, $total_nesting_depth, 13021 ); 13022 13023 # preliminary loop to compute bond strengths 13024 for ( my $i = 0 ; $i <= $max_index_to_go ; $i++ ) { 13025 $last_type = $type; 13026 if ( $type ne 'b' ) { 13027 $last_nonblank_type = $type; 13028 $last_nonblank_token = $token; 13029 } 13030 $type = $types_to_go[$i]; 13031 13032 # strength on both sides of a blank is the same 13033 if ( $type eq 'b' && $last_type ne 'b' ) { 13034 $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ]; 13035 next; 13036 } 13037 13038 $token = $tokens_to_go[$i]; 13039 $block_type = $block_type_to_go[$i]; 13040 $i_next = $i + 1; 13041 $next_type = $types_to_go[$i_next]; 13042 $next_token = $tokens_to_go[$i_next]; 13043 $total_nesting_depth = $nesting_depth_to_go[$i_next]; 13044 $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 ); 13045 $next_nonblank_type = $types_to_go[$i_next_nonblank]; 13046 $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; 13047 13048 # Some token chemistry... The decision about where to break a 13049 # line depends upon a "bond strength" between tokens. The LOWER 13050 # the bond strength, the MORE likely a break. The strength 13051 # values are based on trial-and-error, and need to be tweaked 13052 # occasionally to get desired results. Things to keep in mind 13053 # are: 13054 # 1. relative strengths are important. small differences 13055 # in strengths can make big formatting differences. 13056 # 2. each indentation level adds one unit of bond strength 13057 # 3. a value of NO_BREAK makes an unbreakable bond 13058 # 4. a value of VERY_WEAK is the strength of a ',' 13059 # 5. values below NOMINAL are considered ok break points 13060 # 6. values above NOMINAL are considered poor break points 13061 # We are computing the strength of the bond between the current 13062 # token and the NEXT token. 13063 my $bond_str = VERY_STRONG; # a default, high strength 13064 13065 #--------------------------------------------------------------- 13066 # section 1: 13067 # use minimum of left and right bond strengths if defined; 13068 # digraphs and trigraphs like to break on their left 13069 #--------------------------------------------------------------- 13070 my $bsr = $right_bond_strength{$type}; 13071 13072 if ( !defined($bsr) ) { 13073 13074 if ( $is_digraph{$type} || $is_trigraph{$type} ) { 13075 $bsr = STRONG; 13076 } 13077 else { 13078 $bsr = VERY_STRONG; 13079 } 13080 } 13081 13082 # define right bond strengths of certain keywords 13083 if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) { 13084 $bsr = $right_bond_strength{$token}; 13085 } 13086 elsif ( $token eq 'ne' or $token eq 'eq' ) { 13087 $bsr = NOMINAL; 13088 } 13089 my $bsl = $left_bond_strength{$next_nonblank_type}; 13090 13091 # set terminal bond strength to the nominal value 13092 # this will cause good preceding breaks to be retained 13093 if ( $i_next_nonblank > $max_index_to_go ) { 13094 $bsl = NOMINAL; 13095 } 13096 13097 if ( !defined($bsl) ) { 13098 13099 if ( $is_digraph{$next_nonblank_type} 13100 || $is_trigraph{$next_nonblank_type} ) 13101 { 13102 $bsl = WEAK; 13103 } 13104 else { 13105 $bsl = VERY_STRONG; 13106 } 13107 } 13108 13109 # define right bond strengths of certain keywords 13110 if ( $next_nonblank_type eq 'k' 13111 && defined( $left_bond_strength{$next_nonblank_token} ) ) 13112 { 13113 $bsl = $left_bond_strength{$next_nonblank_token}; 13114 } 13115 elsif ($next_nonblank_token eq 'ne' 13116 or $next_nonblank_token eq 'eq' ) 13117 { 13118 $bsl = NOMINAL; 13119 } 13120 elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) { 13121 $bsl = 0.9 * NOMINAL + 0.1 * STRONG; 13122 } 13123 13124 # Note: it might seem that we would want to keep a NO_BREAK if 13125 # either token has this value. This didn't work, because in an 13126 # arrow list, it prevents the comma from separating from the 13127 # following bare word (which is probably quoted by its arrow). 13128 # So necessary NO_BREAK's have to be handled as special cases 13129 # in the final section. 13130 $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl; 13131 my $bond_str_1 = $bond_str; 13132 13133 #--------------------------------------------------------------- 13134 # section 2: 13135 # special cases 13136 #--------------------------------------------------------------- 13137 13138 # allow long lines before final { in an if statement, as in: 13139 # if (.......... 13140 # ..........) 13141 # { 13142 # 13143 # Otherwise, the line before the { tends to be too short. 13144 if ( $type eq ')' ) { 13145 if ( $next_nonblank_type eq '{' ) { 13146 $bond_str = VERY_WEAK + 0.03; 13147 } 13148 } 13149 13150 elsif ( $type eq '(' ) { 13151 if ( $next_nonblank_type eq '{' ) { 13152 $bond_str = NOMINAL; 13153 } 13154 } 13155 13156 # break on something like '} (', but keep this stronger than a ',' 13157 # example is in 'howe.pl' 13158 elsif ( $type eq 'R' or $type eq '}' ) { 13159 if ( $next_nonblank_type eq '(' ) { 13160 $bond_str = 0.8 * VERY_WEAK + 0.2 * WEAK; 13161 } 13162 } 13163 13164 #----------------------------------------------------------------- 13165 # adjust bond strength bias 13166 #----------------------------------------------------------------- 13167 13168 # TESTING: add any bias set by sub scan_list at old comma 13169 # break points. 13170 elsif ( $type eq ',' ) { 13171 $bond_str += $bond_strength_to_go[$i]; 13172 } 13173 13174 elsif ( $type eq 'f' ) { 13175 $bond_str += $f_bias; 13176 $f_bias += $delta_bias; 13177 } 13178 13179 # in long ?: conditionals, bias toward just one set per line (colon.t) 13180 elsif ( $type eq ':' ) { 13181 if ( !$want_break_before{$type} ) { 13182 $bond_str += $colon_bias; 13183 $colon_bias += $delta_bias; 13184 } 13185 } 13186 13187 if ( $next_nonblank_type eq ':' 13188 && $want_break_before{$next_nonblank_type} ) 13189 { 13190 $bond_str += $colon_bias; 13191 $colon_bias += $delta_bias; 13192 } 13193 13194 # if leading '.' is used, align all but 'short' quotes; 13195 # the idea is to not place something like "\n" on a single line. 13196 elsif ( $next_nonblank_type eq '.' ) { 13197 if ( $want_break_before{'.'} ) { 13198 unless ( 13199 $last_nonblank_type eq '.' 13200 && ( 13201 length($token) <= 13202 $rOpts_short_concatenation_item_length ) 13203 && ( $token !~ /^[\)\]\}]$/ ) 13204 ) 13205 { 13206 $dot_bias += $delta_bias; 13207 } 13208 $bond_str += $dot_bias; 13209 } 13210 } 13211 elsif ($next_nonblank_type eq '&&' 13212 && $want_break_before{$next_nonblank_type} ) 13213 { 13214 $bond_str += $amp_bias; 13215 $amp_bias += $delta_bias; 13216 } 13217 elsif ($next_nonblank_type eq '||' 13218 && $want_break_before{$next_nonblank_type} ) 13219 { 13220 $bond_str += $bar_bias; 13221 $bar_bias += $delta_bias; 13222 } 13223 elsif ( $next_nonblank_type eq 'k' ) { 13224 13225 if ( $next_nonblank_token eq 'and' 13226 && $want_break_before{$next_nonblank_token} ) 13227 { 13228 $bond_str += $and_bias; 13229 $and_bias += $delta_bias; 13230 } 13231 elsif ($next_nonblank_token =~ /^(or|err)$/ 13232 && $want_break_before{$next_nonblank_token} ) 13233 { 13234 $bond_str += $or_bias; 13235 $or_bias += $delta_bias; 13236 } 13237 13238 # FIXME: needs more testing 13239 elsif ( $is_keyword_returning_list{$next_nonblank_token} ) { 13240 $bond_str = $list_str if ( $bond_str > $list_str ); 13241 } 13242 elsif ( $token eq 'err' 13243 && !$want_break_before{$token} ) 13244 { 13245 $bond_str += $or_bias; 13246 $or_bias += $delta_bias; 13247 } 13248 } 13249 13250 if ( $type eq ':' 13251 && !$want_break_before{$type} ) 13252 { 13253 $bond_str += $colon_bias; 13254 $colon_bias += $delta_bias; 13255 } 13256 elsif ( $type eq '&&' 13257 && !$want_break_before{$type} ) 13258 { 13259 $bond_str += $amp_bias; 13260 $amp_bias += $delta_bias; 13261 } 13262 elsif ( $type eq '||' 13263 && !$want_break_before{$type} ) 13264 { 13265 $bond_str += $bar_bias; 13266 $bar_bias += $delta_bias; 13267 } 13268 elsif ( $type eq 'k' ) { 13269 13270 if ( $token eq 'and' 13271 && !$want_break_before{$token} ) 13272 { 13273 $bond_str += $and_bias; 13274 $and_bias += $delta_bias; 13275 } 13276 elsif ( $token eq 'or' 13277 && !$want_break_before{$token} ) 13278 { 13279 $bond_str += $or_bias; 13280 $or_bias += $delta_bias; 13281 } 13282 } 13283 13284 # keep matrix and hash indices together 13285 # but make them a little below STRONG to allow breaking open 13286 # something like {'some-word'}{'some-very-long-word'} at the }{ 13287 # (bracebrk.t) 13288 if ( ( $type eq ']' or $type eq 'R' ) 13289 && ( $next_nonblank_type eq '[' or $next_nonblank_type eq 'L' ) 13290 ) 13291 { 13292 $bond_str = 0.9 * STRONG + 0.1 * NOMINAL; 13293 } 13294 13295 if ( $next_nonblank_token =~ /^->/ ) { 13296 13297 # increase strength to the point where a break in the following 13298 # will be after the opening paren rather than at the arrow: 13299 # $a->$b($c); 13300 if ( $type eq 'i' ) { 13301 $bond_str = 1.45 * STRONG; 13302 } 13303 13304 elsif ( $type =~ /^[\)\]\}R]$/ ) { 13305 $bond_str = 0.1 * STRONG + 0.9 * NOMINAL; 13306 } 13307 13308 # otherwise make strength before an '->' a little over a '+' 13309 else { 13310 if ( $bond_str <= NOMINAL ) { 13311 $bond_str = NOMINAL + 0.01; 13312 } 13313 } 13314 } 13315 13316 if ( $token eq ')' && $next_nonblank_token eq '[' ) { 13317 $bond_str = 0.2 * STRONG + 0.8 * NOMINAL; 13318 } 13319 13320 # map1.t -- correct for a quirk in perl 13321 if ( $token eq '(' 13322 && $next_nonblank_type eq 'i' 13323 && $last_nonblank_type eq 'k' 13324 && $is_sort_map_grep{$last_nonblank_token} ) 13325 13326 # /^(sort|map|grep)$/ ) 13327 { 13328 $bond_str = NO_BREAK; 13329 } 13330 13331 # extrude.t: do not break before paren at: 13332 # -l pid_filename( 13333 if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) { 13334 $bond_str = NO_BREAK; 13335 } 13336 13337 # good to break after end of code blocks 13338 if ( $type eq '}' && $block_type ) { 13339 13340 $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias; 13341 $code_bias += $delta_bias; 13342 } 13343 13344 if ( $type eq 'k' ) { 13345 13346 # allow certain control keywords to stand out 13347 if ( $next_nonblank_type eq 'k' 13348 && $is_last_next_redo_return{$token} ) 13349 { 13350 $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK; 13351 } 13352 13353# Don't break after keyword my. This is a quick fix for a 13354# rare problem with perl. An example is this line from file 13355# Container.pm: 13356# foreach my $question( Debian::DebConf::ConfigDb::gettree( $this->{'question'} ) ) 13357 13358 if ( $token eq 'my' ) { 13359 $bond_str = NO_BREAK; 13360 } 13361 13362 } 13363 13364 # good to break before 'if', 'unless', etc 13365 if ( $is_if_brace_follower{$next_nonblank_token} ) { 13366 $bond_str = VERY_WEAK; 13367 } 13368 13369 if ( $next_nonblank_type eq 'k' ) { 13370 13371 # keywords like 'unless', 'if', etc, within statements 13372 # make good breaks 13373 if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) { 13374 $bond_str = VERY_WEAK / 1.05; 13375 } 13376 } 13377 13378 # try not to break before a comma-arrow 13379 elsif ( $next_nonblank_type eq '=>' ) { 13380 if ( $bond_str < STRONG ) { $bond_str = STRONG } 13381 } 13382 13383 #---------------------------------------------------------------------- 13384 # only set NO_BREAK's from here on 13385 #---------------------------------------------------------------------- 13386 if ( $type eq 'C' or $type eq 'U' ) { 13387 13388 # use strict requires that bare word and => not be separated 13389 if ( $next_nonblank_type eq '=>' ) { 13390 $bond_str = NO_BREAK; 13391 } 13392 13393 # Never break between a bareword and a following paren because 13394 # perl may give an error. For example, if a break is placed 13395 # between 'to_filehandle' and its '(' the following line will 13396 # give a syntax error [Carp.pm]: my( $no) =fileno( 13397 # to_filehandle( $in)) ; 13398 if ( $next_nonblank_token eq '(' ) { 13399 $bond_str = NO_BREAK; 13400 } 13401 } 13402 13403 # use strict requires that bare word within braces not start new line 13404 elsif ( $type eq 'L' ) { 13405 13406 if ( $next_nonblank_type eq 'w' ) { 13407 $bond_str = NO_BREAK; 13408 } 13409 } 13410 13411 # in older version of perl, use strict can cause problems with 13412 # breaks before bare words following opening parens. For example, 13413 # this will fail under older versions if a break is made between 13414 # '(' and 'MAIL': 13415 # use strict; 13416 # open( MAIL, "a long filename or command"); 13417 # close MAIL; 13418 elsif ( $type eq '{' ) { 13419 13420 if ( $token eq '(' && $next_nonblank_type eq 'w' ) { 13421 13422 # but it's fine to break if the word is followed by a '=>' 13423 # or if it is obviously a sub call 13424 my $i_next_next_nonblank = $i_next_nonblank + 1; 13425 my $next_next_type = $types_to_go[$i_next_next_nonblank]; 13426 if ( $next_next_type eq 'b' 13427 && $i_next_nonblank < $max_index_to_go ) 13428 { 13429 $i_next_next_nonblank++; 13430 $next_next_type = $types_to_go[$i_next_next_nonblank]; 13431 } 13432 13433 ##if ( $next_next_type ne '=>' ) { 13434 # these are ok: '->xxx', '=>', '(' 13435 13436 # We'll check for an old breakpoint and keep a leading 13437 # bareword if it was that way in the input file. 13438 # Presumably it was ok that way. For example, the 13439 # following would remain unchanged: 13440 # 13441 # @months = ( 13442 # January, February, March, April, 13443 # May, June, July, August, 13444 # September, October, November, December, 13445 # ); 13446 # 13447 # This should be sufficient: 13448 if ( !$old_breakpoint_to_go[$i] 13449 && ( $next_next_type eq ',' || $next_next_type eq '}' ) 13450 ) 13451 { 13452 $bond_str = NO_BREAK; 13453 } 13454 } 13455 } 13456 13457 elsif ( $type eq 'w' ) { 13458 13459 if ( $next_nonblank_type eq 'R' ) { 13460 $bond_str = NO_BREAK; 13461 } 13462 13463 # use strict requires that bare word and => not be separated 13464 if ( $next_nonblank_type eq '=>' ) { 13465 $bond_str = NO_BREAK; 13466 } 13467 } 13468 13469 # in fact, use strict hates bare words on any new line. For 13470 # example, a break before the underscore here provokes the 13471 # wrath of use strict: 13472 # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) { 13473 elsif ( $type eq 'F' ) { 13474 $bond_str = NO_BREAK; 13475 } 13476 13477 # use strict does not allow separating type info from trailing { } 13478 # testfile is readmail.pl 13479 elsif ( $type eq 't' or $type eq 'i' ) { 13480 13481 if ( $next_nonblank_type eq 'L' ) { 13482 $bond_str = NO_BREAK; 13483 } 13484 } 13485 13486 # Do not break between a possible filehandle and a ? or / and do 13487 # not introduce a break after it if there is no blank 13488 # (extrude.t) 13489 elsif ( $type eq 'Z' ) { 13490 13491 # dont break.. 13492 if ( 13493 13494 # if there is no blank and we do not want one. Examples: 13495 # print $x++ # do not break after $x 13496 # print HTML"HELLO" # break ok after HTML 13497 ( 13498 $next_type ne 'b' 13499 && defined( $want_left_space{$next_type} ) 13500 && $want_left_space{$next_type} == WS_NO 13501 ) 13502 13503 # or we might be followed by the start of a quote 13504 || $next_nonblank_type =~ /^[\/\?]$/ 13505 ) 13506 { 13507 $bond_str = NO_BREAK; 13508 } 13509 } 13510 13511 # Do not break before a possible file handle 13512 if ( $next_nonblank_type eq 'Z' ) { 13513 $bond_str = NO_BREAK; 13514 } 13515 13516 # As a defensive measure, do not break between a '(' and a 13517 # filehandle. In some cases, this can cause an error. For 13518 # example, the following program works: 13519 # my $msg="hi!\n"; 13520 # print 13521 # ( STDOUT 13522 # $msg 13523 # ); 13524 # 13525 # But this program fails: 13526 # my $msg="hi!\n"; 13527 # print 13528 # ( 13529 # STDOUT 13530 # $msg 13531 # ); 13532 # 13533 # This is normally only a problem with the 'extrude' option 13534 if ( $next_nonblank_type eq 'Y' && $token eq '(' ) { 13535 $bond_str = NO_BREAK; 13536 } 13537 13538 # Breaking before a ++ can cause perl to guess wrong. For 13539 # example the following line will cause a syntax error 13540 # with -extrude if we break between '$i' and '++' [fixstyle2] 13541 # print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) ); 13542 elsif ( $next_nonblank_type eq '++' ) { 13543 $bond_str = NO_BREAK; 13544 } 13545 13546 # Breaking before a ? before a quote can cause trouble if 13547 # they are not separated by a blank. 13548 # Example: a syntax error occurs if you break before the ? here 13549 # my$logic=join$all?' && ':' || ',@regexps; 13550 # From: Professional_Perl_Programming_Code/multifind.pl 13551 elsif ( $next_nonblank_type eq '?' ) { 13552 $bond_str = NO_BREAK 13553 if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' ); 13554 } 13555 13556 # Breaking before a . followed by a number 13557 # can cause trouble if there is no intervening space 13558 # Example: a syntax error occurs if you break before the .2 here 13559 # $str .= pack($endian.2, ensurrogate($ord)); 13560 # From: perl58/Unicode.pm 13561 elsif ( $next_nonblank_type eq '.' ) { 13562 $bond_str = NO_BREAK 13563 if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' ); 13564 } 13565 13566 # patch to put cuddled elses back together when on multiple 13567 # lines, as in: } \n else \n { \n 13568 if ($rOpts_cuddled_else) { 13569 13570 if ( ( $token eq 'else' ) && ( $next_nonblank_type eq '{' ) 13571 || ( $type eq '}' ) && ( $next_nonblank_token eq 'else' ) ) 13572 { 13573 $bond_str = NO_BREAK; 13574 } 13575 } 13576 13577 # keep '}' together with ';' 13578 if ( ( $token eq '}' ) && ( $next_nonblank_type eq ';' ) ) { 13579 $bond_str = NO_BREAK; 13580 } 13581 13582 # never break between sub name and opening paren 13583 if ( ( $type eq 'w' ) && ( $next_nonblank_token eq '(' ) ) { 13584 $bond_str = NO_BREAK; 13585 } 13586 13587 #--------------------------------------------------------------- 13588 # section 3: 13589 # now take nesting depth into account 13590 #--------------------------------------------------------------- 13591 # final strength incorporates the bond strength and nesting depth 13592 my $strength; 13593 13594 if ( defined($bond_str) && !$nobreak_to_go[$i] ) { 13595 if ( $total_nesting_depth > 0 ) { 13596 $strength = $bond_str + $total_nesting_depth; 13597 } 13598 else { 13599 $strength = $bond_str; 13600 } 13601 } 13602 else { 13603 $strength = NO_BREAK; 13604 } 13605 13606 # always break after side comment 13607 if ( $type eq '#' ) { $strength = 0 } 13608 13609 $bond_strength_to_go[$i] = $strength; 13610 13611 FORMATTER_DEBUG_FLAG_BOND && do { 13612 my $str = substr( $token, 0, 15 ); 13613 $str .= ' ' x ( 16 - length($str) ); 13614 print 13615"BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str -> $strength \n"; 13616 }; 13617 } 13618 } 13619 13620} 13621 13622sub pad_array_to_go { 13623 13624 # to simplify coding in scan_list and set_bond_strengths, it helps 13625 # to create some extra blank tokens at the end of the arrays 13626 $tokens_to_go[ $max_index_to_go + 1 ] = ''; 13627 $tokens_to_go[ $max_index_to_go + 2 ] = ''; 13628 $types_to_go[ $max_index_to_go + 1 ] = 'b'; 13629 $types_to_go[ $max_index_to_go + 2 ] = 'b'; 13630 $nesting_depth_to_go[ $max_index_to_go + 1 ] = 13631 $nesting_depth_to_go[$max_index_to_go]; 13632 13633 # /^[R\}\)\]]$/ 13634 if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) { 13635 if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) { 13636 13637 # shouldn't happen: 13638 unless ( get_saw_brace_error() ) { 13639 warning( 13640"Program bug in scan_list: hit nesting error which should have been caught\n" 13641 ); 13642 report_definite_bug(); 13643 } 13644 } 13645 else { 13646 $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1; 13647 } 13648 } 13649 13650 # /^[L\{\(\[]$/ 13651 elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) { 13652 $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1; 13653 } 13654} 13655 13656{ # begin scan_list 13657 13658 my ( 13659 $block_type, $current_depth, 13660 $depth, $i, 13661 $i_last_nonblank_token, $last_colon_sequence_number, 13662 $last_nonblank_token, $last_nonblank_type, 13663 $last_old_breakpoint_count, $minimum_depth, 13664 $next_nonblank_block_type, $next_nonblank_token, 13665 $next_nonblank_type, $old_breakpoint_count, 13666 $starting_breakpoint_count, $starting_depth, 13667 $token, $type, 13668 $type_sequence, 13669 ); 13670 13671 my ( 13672 @breakpoint_stack, @breakpoint_undo_stack, 13673 @comma_index, @container_type, 13674 @identifier_count_stack, @index_before_arrow, 13675 @interrupted_list, @item_count_stack, 13676 @last_comma_index, @last_dot_index, 13677 @last_nonblank_type, @old_breakpoint_count_stack, 13678 @opening_structure_index_stack, @rfor_semicolon_list, 13679 @has_old_logical_breakpoints, @rand_or_list, 13680 @i_equals, 13681 ); 13682 13683 # routine to define essential variables when we go 'up' to 13684 # a new depth 13685 sub check_for_new_minimum_depth { 13686 my $depth = shift; 13687 if ( $depth < $minimum_depth ) { 13688 13689 $minimum_depth = $depth; 13690 13691 # these arrays need not retain values between calls 13692 $breakpoint_stack[$depth] = $starting_breakpoint_count; 13693 $container_type[$depth] = ""; 13694 $identifier_count_stack[$depth] = 0; 13695 $index_before_arrow[$depth] = -1; 13696 $interrupted_list[$depth] = 1; 13697 $item_count_stack[$depth] = 0; 13698 $last_nonblank_type[$depth] = ""; 13699 $opening_structure_index_stack[$depth] = -1; 13700 13701 $breakpoint_undo_stack[$depth] = undef; 13702 $comma_index[$depth] = undef; 13703 $last_comma_index[$depth] = undef; 13704 $last_dot_index[$depth] = undef; 13705 $old_breakpoint_count_stack[$depth] = undef; 13706 $has_old_logical_breakpoints[$depth] = 0; 13707 $rand_or_list[$depth] = []; 13708 $rfor_semicolon_list[$depth] = []; 13709 $i_equals[$depth] = -1; 13710 13711 # these arrays must retain values between calls 13712 if ( !defined( $has_broken_sublist[$depth] ) ) { 13713 $dont_align[$depth] = 0; 13714 $has_broken_sublist[$depth] = 0; 13715 $want_comma_break[$depth] = 0; 13716 } 13717 } 13718 } 13719 13720 # routine to decide which commas to break at within a container; 13721 # returns: 13722 # $bp_count = number of comma breakpoints set 13723 # $do_not_break_apart = a flag indicating if container need not 13724 # be broken open 13725 sub set_comma_breakpoints { 13726 13727 my $dd = shift; 13728 my $bp_count = 0; 13729 my $do_not_break_apart = 0; 13730 13731 # anything to do? 13732 if ( $item_count_stack[$dd] ) { 13733 13734 # handle commas not in containers... 13735 if ( $dont_align[$dd] ) { 13736 do_uncontained_comma_breaks($dd); 13737 } 13738 13739 # handle commas within containers... 13740 else { 13741 my $fbc = $forced_breakpoint_count; 13742 13743 # always open comma lists not preceded by keywords, 13744 # barewords, identifiers (that is, anything that doesn't 13745 # look like a function call) 13746 my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/; 13747 13748 set_comma_breakpoints_do( 13749 $dd, 13750 $opening_structure_index_stack[$dd], 13751 $i, 13752 $item_count_stack[$dd], 13753 $identifier_count_stack[$dd], 13754 $comma_index[$dd], 13755 $next_nonblank_type, 13756 $container_type[$dd], 13757 $interrupted_list[$dd], 13758 \$do_not_break_apart, 13759 $must_break_open, 13760 ); 13761 $bp_count = $forced_breakpoint_count - $fbc; 13762 $do_not_break_apart = 0 if $must_break_open; 13763 } 13764 } 13765 return ( $bp_count, $do_not_break_apart ); 13766 } 13767 13768 sub do_uncontained_comma_breaks { 13769 13770 # Handle commas not in containers... 13771 # This is a catch-all routine for commas that we 13772 # don't know what to do with because the don't fall 13773 # within containers. We will bias the bond strength 13774 # to break at commas which ended lines in the input 13775 # file. This usually works better than just trying 13776 # to put as many items on a line as possible. A 13777 # downside is that if the input file is garbage it 13778 # won't work very well. However, the user can always 13779 # prevent following the old breakpoints with the 13780 # -iob flag. 13781 my $dd = shift; 13782 my $bias = -.01; 13783 foreach my $ii ( @{ $comma_index[$dd] } ) { 13784 if ( $old_breakpoint_to_go[$ii] ) { 13785 $bond_strength_to_go[$ii] = $bias; 13786 13787 # reduce bias magnitude to force breaks in order 13788 $bias *= 0.99; 13789 } 13790 } 13791 13792 # Also put a break before the first comma if 13793 # (1) there was a break there in the input, and 13794 # (2) that was exactly one previous break in the input 13795 # 13796 # For example, we will follow the user and break after 13797 # 'print' in this snippet: 13798 # print 13799 # "conformability (Not the same dimension)\n", 13800 # "\t", $have, " is ", text_unit($hu), "\n", 13801 # "\t", $want, " is ", text_unit($wu), "\n", 13802 # ; 13803 my $i_first_comma = $comma_index[$dd]->[0]; 13804 if ( $old_breakpoint_to_go[$i_first_comma] ) { 13805 my $level_comma = $levels_to_go[$i_first_comma]; 13806 my $ibreak = -1; 13807 my $obp_count = 0; 13808 for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) { 13809 if ( $old_breakpoint_to_go[$ii] ) { 13810 $obp_count++; 13811 last if ( $obp_count > 1 ); 13812 $ibreak = $ii 13813 if ( $levels_to_go[$ii] == $level_comma ); 13814 } 13815 } 13816 if ( $ibreak >= 0 && $obp_count == 1 ) { 13817 set_forced_breakpoint($ibreak); 13818 } 13819 } 13820 } 13821 13822 my %is_logical_container; 13823 13824 BEGIN { 13825 @_ = qw# if elsif unless while and or err not && | || ? : ! #; 13826 @is_logical_container{@_} = (1) x scalar(@_); 13827 } 13828 13829 sub set_for_semicolon_breakpoints { 13830 my $dd = shift; 13831 foreach ( @{ $rfor_semicolon_list[$dd] } ) { 13832 set_forced_breakpoint($_); 13833 } 13834 } 13835 13836 sub set_logical_breakpoints { 13837 my $dd = shift; 13838 if ( 13839 $item_count_stack[$dd] == 0 13840 && $is_logical_container{ $container_type[$dd] } 13841 13842 # TESTING: 13843 || $has_old_logical_breakpoints[$dd] 13844 ) 13845 { 13846 13847 # Look for breaks in this order: 13848 # 0 1 2 3 13849 # or and || && 13850 foreach my $i ( 0 .. 3 ) { 13851 if ( $rand_or_list[$dd][$i] ) { 13852 foreach ( @{ $rand_or_list[$dd][$i] } ) { 13853 set_forced_breakpoint($_); 13854 } 13855 13856 # break at any 'if' and 'unless' too 13857 foreach ( @{ $rand_or_list[$dd][4] } ) { 13858 set_forced_breakpoint($_); 13859 } 13860 $rand_or_list[$dd] = []; 13861 last; 13862 } 13863 } 13864 } 13865 } 13866 13867 sub is_unbreakable_container { 13868 13869 # never break a container of one of these types 13870 # because bad things can happen (map1.t) 13871 my $dd = shift; 13872 $is_sort_map_grep{ $container_type[$dd] }; 13873 } 13874 13875 sub scan_list { 13876 13877 # This routine is responsible for setting line breaks for all lists, 13878 # so that hierarchical structure can be displayed and so that list 13879 # items can be vertically aligned. The output of this routine is 13880 # stored in the array @forced_breakpoint_to_go, which is used to set 13881 # final breakpoints. 13882 13883 $starting_depth = $nesting_depth_to_go[0]; 13884 13885 $block_type = ' '; 13886 $current_depth = $starting_depth; 13887 $i = -1; 13888 $last_colon_sequence_number = -1; 13889 $last_nonblank_token = ';'; 13890 $last_nonblank_type = ';'; 13891 $last_nonblank_block_type = ' '; 13892 $last_old_breakpoint_count = 0; 13893 $minimum_depth = $current_depth + 1; # forces update in check below 13894 $old_breakpoint_count = 0; 13895 $starting_breakpoint_count = $forced_breakpoint_count; 13896 $token = ';'; 13897 $type = ';'; 13898 $type_sequence = ''; 13899 13900 check_for_new_minimum_depth($current_depth); 13901 13902 my $is_long_line = excess_line_length( 0, $max_index_to_go ) > 0; 13903 my $want_previous_breakpoint = -1; 13904 13905 my $saw_good_breakpoint; 13906 my $i_line_end = -1; 13907 my $i_line_start = -1; 13908 13909 # loop over all tokens in this batch 13910 while ( ++$i <= $max_index_to_go ) { 13911 if ( $type ne 'b' ) { 13912 $i_last_nonblank_token = $i - 1; 13913 $last_nonblank_type = $type; 13914 $last_nonblank_token = $token; 13915 $last_nonblank_block_type = $block_type; 13916 } 13917 $type = $types_to_go[$i]; 13918 $block_type = $block_type_to_go[$i]; 13919 $token = $tokens_to_go[$i]; 13920 $type_sequence = $type_sequence_to_go[$i]; 13921 my $next_type = $types_to_go[ $i + 1 ]; 13922 my $next_token = $tokens_to_go[ $i + 1 ]; 13923 my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 ); 13924 $next_nonblank_type = $types_to_go[$i_next_nonblank]; 13925 $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; 13926 $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank]; 13927 13928 # set break if flag was set 13929 if ( $want_previous_breakpoint >= 0 ) { 13930 set_forced_breakpoint($want_previous_breakpoint); 13931 $want_previous_breakpoint = -1; 13932 } 13933 13934 $last_old_breakpoint_count = $old_breakpoint_count; 13935 if ( $old_breakpoint_to_go[$i] ) { 13936 $i_line_end = $i; 13937 $i_line_start = $i_next_nonblank; 13938 13939 $old_breakpoint_count++; 13940 13941 # Break before certain keywords if user broke there and 13942 # this is a 'safe' break point. The idea is to retain 13943 # any preferred breaks for sequential list operations, 13944 # like a schwartzian transform. 13945 if ($rOpts_break_at_old_keyword_breakpoints) { 13946 if ( 13947 $next_nonblank_type eq 'k' 13948 && $is_keyword_returning_list{$next_nonblank_token} 13949 && ( $type =~ /^[=\)\]\}Riw]$/ 13950 || $type eq 'k' 13951 && $is_keyword_returning_list{$token} ) 13952 ) 13953 { 13954 13955 # we actually have to set this break next time through 13956 # the loop because if we are at a closing token (such 13957 # as '}') which forms a one-line block, this break might 13958 # get undone. 13959 $want_previous_breakpoint = $i; 13960 } 13961 } 13962 } 13963 next if ( $type eq 'b' ); 13964 $depth = $nesting_depth_to_go[ $i + 1 ]; 13965 13966 # safety check - be sure we always break after a comment 13967 # Shouldn't happen .. an error here probably means that the 13968 # nobreak flag did not get turned off correctly during 13969 # formatting. 13970 if ( $type eq '#' ) { 13971 if ( $i != $max_index_to_go ) { 13972 warning( 13973"Non-fatal program bug: backup logic needed to break after a comment\n" 13974 ); 13975 report_definite_bug(); 13976 $nobreak_to_go[$i] = 0; 13977 set_forced_breakpoint($i); 13978 } 13979 } 13980 13981 # Force breakpoints at certain tokens in long lines. 13982 # Note that such breakpoints will be undone later if these tokens 13983 # are fully contained within parens on a line. 13984 if ( 13985 13986 # break before a keyword within a line 13987 $type eq 'k' 13988 && $i > 0 13989 13990 # if one of these keywords: 13991 && $token =~ /^(if|unless|while|until|for)$/ 13992 13993 # but do not break at something like '1 while' 13994 && ( $last_nonblank_type ne 'n' || $i > 2 ) 13995 13996 # and let keywords follow a closing 'do' brace 13997 && $last_nonblank_block_type ne 'do' 13998 13999 && ( 14000 $is_long_line 14001 14002 # or container is broken (by side-comment, etc) 14003 || ( $next_nonblank_token eq '(' 14004 && $mate_index_to_go[$i_next_nonblank] < $i ) 14005 ) 14006 ) 14007 { 14008 set_forced_breakpoint( $i - 1 ); 14009 } 14010 14011 # remember locations of '||' and '&&' for possible breaks if we 14012 # decide this is a long logical expression. 14013 if ( $type eq '||' ) { 14014 push @{ $rand_or_list[$depth][2] }, $i; 14015 ++$has_old_logical_breakpoints[$depth] 14016 if ( ( $i == $i_line_start || $i == $i_line_end ) 14017 && $rOpts_break_at_old_logical_breakpoints ); 14018 } 14019 elsif ( $type eq '&&' ) { 14020 push @{ $rand_or_list[$depth][3] }, $i; 14021 ++$has_old_logical_breakpoints[$depth] 14022 if ( ( $i == $i_line_start || $i == $i_line_end ) 14023 && $rOpts_break_at_old_logical_breakpoints ); 14024 } 14025 elsif ( $type eq 'f' ) { 14026 push @{ $rfor_semicolon_list[$depth] }, $i; 14027 } 14028 elsif ( $type eq 'k' ) { 14029 if ( $token eq 'and' ) { 14030 push @{ $rand_or_list[$depth][1] }, $i; 14031 ++$has_old_logical_breakpoints[$depth] 14032 if ( ( $i == $i_line_start || $i == $i_line_end ) 14033 && $rOpts_break_at_old_logical_breakpoints ); 14034 } 14035 14036 # break immediately at 'or's which are probably not in a logical 14037 # block -- but we will break in logical breaks below so that 14038 # they do not add to the forced_breakpoint_count 14039 elsif ( $token eq 'or' ) { 14040 push @{ $rand_or_list[$depth][0] }, $i; 14041 ++$has_old_logical_breakpoints[$depth] 14042 if ( ( $i == $i_line_start || $i == $i_line_end ) 14043 && $rOpts_break_at_old_logical_breakpoints ); 14044 if ( $is_logical_container{ $container_type[$depth] } ) { 14045 } 14046 else { 14047 if ($is_long_line) { set_forced_breakpoint($i) } 14048 elsif ( ( $i == $i_line_start || $i == $i_line_end ) 14049 && $rOpts_break_at_old_logical_breakpoints ) 14050 { 14051 $saw_good_breakpoint = 1; 14052 } 14053 } 14054 } 14055 elsif ( $token eq 'if' || $token eq 'unless' ) { 14056 push @{ $rand_or_list[$depth][4] }, $i; 14057 if ( ( $i == $i_line_start || $i == $i_line_end ) 14058 && $rOpts_break_at_old_logical_breakpoints ) 14059 { 14060 set_forced_breakpoint($i); 14061 } 14062 } 14063 } 14064 elsif ( $is_assignment{$type} ) { 14065 $i_equals[$depth] = $i; 14066 } 14067 14068 if ($type_sequence) { 14069 14070 # handle any postponed closing breakpoints 14071 if ( $token =~ /^[\)\]\}\:]$/ ) { 14072 if ( $type eq ':' ) { 14073 $last_colon_sequence_number = $type_sequence; 14074 14075 # TESTING: retain break at a ':' line break 14076 if ( ( $i == $i_line_start || $i == $i_line_end ) 14077 && $rOpts_break_at_old_ternary_breakpoints ) 14078 { 14079 14080 # TESTING: 14081 set_forced_breakpoint($i); 14082 14083 # break at previous '=' 14084 if ( $i_equals[$depth] > 0 ) { 14085 set_forced_breakpoint( $i_equals[$depth] ); 14086 $i_equals[$depth] = -1; 14087 } 14088 } 14089 } 14090 if ( defined( $postponed_breakpoint{$type_sequence} ) ) { 14091 my $inc = ( $type eq ':' ) ? 0 : 1; 14092 set_forced_breakpoint( $i - $inc ); 14093 delete $postponed_breakpoint{$type_sequence}; 14094 } 14095 } 14096 14097 # set breaks at ?/: if they will get separated (and are 14098 # not a ?/: chain), or if the '?' is at the end of the 14099 # line 14100 elsif ( $token eq '?' ) { 14101 my $i_colon = $mate_index_to_go[$i]; 14102 if ( 14103 $i_colon <= 0 # the ':' is not in this batch 14104 || $i == 0 # this '?' is the first token of the line 14105 || $i == 14106 $max_index_to_go # or this '?' is the last token 14107 ) 14108 { 14109 14110 # don't break at a '?' if preceded by ':' on 14111 # this line of previous ?/: pair on this line. 14112 # This is an attempt to preserve a chain of ?/: 14113 # expressions (elsif2.t). And don't break if 14114 # this has a side comment. 14115 set_forced_breakpoint($i) 14116 unless ( 14117 $type_sequence == ( 14118 $last_colon_sequence_number + 14119 TYPE_SEQUENCE_INCREMENT 14120 ) 14121 || $tokens_to_go[$max_index_to_go] eq '#' 14122 ); 14123 set_closing_breakpoint($i); 14124 } 14125 } 14126 } 14127 14128#print "LISTX sees: i=$i type=$type tok=$token block=$block_type depth=$depth\n"; 14129 14130 #------------------------------------------------------------ 14131 # Handle Increasing Depth.. 14132 # 14133 # prepare for a new list when depth increases 14134 # token $i is a '(','{', or '[' 14135 #------------------------------------------------------------ 14136 if ( $depth > $current_depth ) { 14137 14138 $breakpoint_stack[$depth] = $forced_breakpoint_count; 14139 $breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count; 14140 $has_broken_sublist[$depth] = 0; 14141 $identifier_count_stack[$depth] = 0; 14142 $index_before_arrow[$depth] = -1; 14143 $interrupted_list[$depth] = 0; 14144 $item_count_stack[$depth] = 0; 14145 $last_comma_index[$depth] = undef; 14146 $last_dot_index[$depth] = undef; 14147 $last_nonblank_type[$depth] = $last_nonblank_type; 14148 $old_breakpoint_count_stack[$depth] = $old_breakpoint_count; 14149 $opening_structure_index_stack[$depth] = $i; 14150 $rand_or_list[$depth] = []; 14151 $rfor_semicolon_list[$depth] = []; 14152 $i_equals[$depth] = -1; 14153 $want_comma_break[$depth] = 0; 14154 $container_type[$depth] = 14155 ( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ ) 14156 ? $last_nonblank_token 14157 : ""; 14158 $has_old_logical_breakpoints[$depth] = 0; 14159 14160 # if line ends here then signal closing token to break 14161 if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' ) 14162 { 14163 set_closing_breakpoint($i); 14164 } 14165 14166 # Not all lists of values should be vertically aligned.. 14167 $dont_align[$depth] = 14168 14169 # code BLOCKS are handled at a higher level 14170 ( $block_type ne "" ) 14171 14172 # certain paren lists 14173 || ( $type eq '(' ) && ( 14174 14175 # it does not usually look good to align a list of 14176 # identifiers in a parameter list, as in: 14177 # my($var1, $var2, ...) 14178 # (This test should probably be refined, for now I'm just 14179 # testing for any keyword) 14180 ( $last_nonblank_type eq 'k' ) 14181 14182 # a trailing '(' usually indicates a non-list 14183 || ( $next_nonblank_type eq '(' ) 14184 ); 14185 14186 # patch to outdent opening brace of long if/for/.. 14187 # statements (like this one). See similar coding in 14188 # set_continuation breaks. We have also catch it here for 14189 # short line fragments which otherwise will not go through 14190 # set_continuation_breaks. 14191 if ( 14192 $block_type 14193 14194 # if we have the ')' but not its '(' in this batch.. 14195 && ( $last_nonblank_token eq ')' ) 14196 && $mate_index_to_go[$i_last_nonblank_token] < 0 14197 14198 # and user wants brace to left 14199 && !$rOpts->{'opening-brace-always-on-right'} 14200 14201 && ( $type eq '{' ) # should be true 14202 && ( $token eq '{' ) # should be true 14203 ) 14204 { 14205 set_forced_breakpoint( $i - 1 ); 14206 } 14207 } 14208 14209 #------------------------------------------------------------ 14210 # Handle Decreasing Depth.. 14211 # 14212 # finish off any old list when depth decreases 14213 # token $i is a ')','}', or ']' 14214 #------------------------------------------------------------ 14215 elsif ( $depth < $current_depth ) { 14216 14217 check_for_new_minimum_depth($depth); 14218 14219 # force all outer logical containers to break after we see on 14220 # old breakpoint 14221 $has_old_logical_breakpoints[$depth] ||= 14222 $has_old_logical_breakpoints[$current_depth]; 14223 14224 # Patch to break between ') {' if the paren list is broken. 14225 # There is similar logic in set_continuation_breaks for 14226 # non-broken lists. 14227 if ( $token eq ')' 14228 && $next_nonblank_block_type 14229 && $interrupted_list[$current_depth] 14230 && $next_nonblank_type eq '{' 14231 && !$rOpts->{'opening-brace-always-on-right'} ) 14232 { 14233 set_forced_breakpoint($i); 14234 } 14235 14236#print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n"; 14237 14238 # set breaks at commas if necessary 14239 my ( $bp_count, $do_not_break_apart ) = 14240 set_comma_breakpoints($current_depth); 14241 14242 my $i_opening = $opening_structure_index_stack[$current_depth]; 14243 my $saw_opening_structure = ( $i_opening >= 0 ); 14244 14245 # this term is long if we had to break at interior commas.. 14246 my $is_long_term = $bp_count > 0; 14247 14248 # ..or if the length between opening and closing parens exceeds 14249 # allowed line length 14250 if ( !$is_long_term && $saw_opening_structure ) { 14251 my $i_opening_minus = find_token_starting_list($i_opening); 14252 14253 # Note: we have to allow for one extra space after a 14254 # closing token so that we do not strand a comma or 14255 # semicolon, hence the '>=' here (oneline.t) 14256 $is_long_term = 14257 excess_line_length( $i_opening_minus, $i ) >= 0; 14258 } 14259 14260 # We've set breaks after all comma-arrows. Now we have to 14261 # undo them if this can be a one-line block 14262 # (the only breakpoints set will be due to comma-arrows) 14263 if ( 14264 14265 # user doesn't require breaking after all comma-arrows 14266 ( $rOpts_comma_arrow_breakpoints != 0 ) 14267 14268 # and if the opening structure is in this batch 14269 && $saw_opening_structure 14270 14271 # and either on the same old line 14272 && ( 14273 $old_breakpoint_count_stack[$current_depth] == 14274 $last_old_breakpoint_count 14275 14276 # or user wants to form long blocks with arrows 14277 || $rOpts_comma_arrow_breakpoints == 2 14278 ) 14279 14280 # and we made some breakpoints between the opening and closing 14281 && ( $breakpoint_undo_stack[$current_depth] < 14282 $forced_breakpoint_undo_count ) 14283 14284 # and this block is short enough to fit on one line 14285 # Note: use < because need 1 more space for possible comma 14286 && !$is_long_term 14287 14288 ) 14289 { 14290 undo_forced_breakpoint_stack( 14291 $breakpoint_undo_stack[$current_depth] ); 14292 } 14293 14294 # now see if we have any comma breakpoints left 14295 my $has_comma_breakpoints = 14296 ( $breakpoint_stack[$current_depth] != 14297 $forced_breakpoint_count ); 14298 14299 # update broken-sublist flag of the outer container 14300 $has_broken_sublist[$depth] = 14301 $has_broken_sublist[$depth] 14302 || $has_broken_sublist[$current_depth] 14303 || $is_long_term 14304 || $has_comma_breakpoints; 14305 14306# Having come to the closing ')', '}', or ']', now we have to decide if we 14307# should 'open up' the structure by placing breaks at the opening and 14308# closing containers. This is a tricky decision. Here are some of the 14309# basic considerations: 14310# 14311# -If this is a BLOCK container, then any breakpoints will have already 14312# been set (and according to user preferences), so we need do nothing here. 14313# 14314# -If we have a comma-separated list for which we can align the list items, 14315# then we need to do so because otherwise the vertical aligner cannot 14316# currently do the alignment. 14317# 14318# -If this container does itself contain a container which has been broken 14319# open, then it should be broken open to properly show the structure. 14320# 14321# -If there is nothing to align, and no other reason to break apart, 14322# then do not do it. 14323# 14324# We will not break open the parens of a long but 'simple' logical expression. 14325# For example: 14326# 14327# This is an example of a simple logical expression and its formatting: 14328# 14329# if ( $bigwasteofspace1 && $bigwasteofspace2 14330# || $bigwasteofspace3 && $bigwasteofspace4 ) 14331# 14332# Most people would prefer this than the 'spacey' version: 14333# 14334# if ( 14335# $bigwasteofspace1 && $bigwasteofspace2 14336# || $bigwasteofspace3 && $bigwasteofspace4 14337# ) 14338# 14339# To illustrate the rules for breaking logical expressions, consider: 14340# 14341# FULLY DENSE: 14342# if ( $opt_excl 14343# and ( exists $ids_excl_uc{$id_uc} 14344# or grep $id_uc =~ /$_/, @ids_excl_uc )) 14345# 14346# This is on the verge of being difficult to read. The current default is to 14347# open it up like this: 14348# 14349# DEFAULT: 14350# if ( 14351# $opt_excl 14352# and ( exists $ids_excl_uc{$id_uc} 14353# or grep $id_uc =~ /$_/, @ids_excl_uc ) 14354# ) 14355# 14356# This is a compromise which tries to avoid being too dense and to spacey. 14357# A more spaced version would be: 14358# 14359# SPACEY: 14360# if ( 14361# $opt_excl 14362# and ( 14363# exists $ids_excl_uc{$id_uc} 14364# or grep $id_uc =~ /$_/, @ids_excl_uc 14365# ) 14366# ) 14367# 14368# Some people might prefer the spacey version -- an option could be added. The 14369# innermost expression contains a long block '( exists $ids_... ')'. 14370# 14371# Here is how the logic goes: We will force a break at the 'or' that the 14372# innermost expression contains, but we will not break apart its opening and 14373# closing containers because (1) it contains no multi-line sub-containers itself, 14374# and (2) there is no alignment to be gained by breaking it open like this 14375# 14376# and ( 14377# exists $ids_excl_uc{$id_uc} 14378# or grep $id_uc =~ /$_/, @ids_excl_uc 14379# ) 14380# 14381# (although this looks perfectly ok and might be good for long expressions). The 14382# outer 'if' container, though, contains a broken sub-container, so it will be 14383# broken open to avoid too much density. Also, since it contains no 'or's, there 14384# will be a forced break at its 'and'. 14385 14386 # set some flags telling something about this container.. 14387 my $is_simple_logical_expression = 0; 14388 if ( $item_count_stack[$current_depth] == 0 14389 && $saw_opening_structure 14390 && $tokens_to_go[$i_opening] eq '(' 14391 && $is_logical_container{ $container_type[$current_depth] } 14392 ) 14393 { 14394 14395 # This seems to be a simple logical expression with 14396 # no existing breakpoints. Set a flag to prevent 14397 # opening it up. 14398 if ( !$has_comma_breakpoints ) { 14399 $is_simple_logical_expression = 1; 14400 } 14401 14402 # This seems to be a simple logical expression with 14403 # breakpoints (broken sublists, for example). Break 14404 # at all 'or's and '||'s. 14405 else { 14406 set_logical_breakpoints($current_depth); 14407 } 14408 } 14409 14410 if ( $is_long_term 14411 && @{ $rfor_semicolon_list[$current_depth] } ) 14412 { 14413 set_for_semicolon_breakpoints($current_depth); 14414 14415 # open up a long 'for' or 'foreach' container to allow 14416 # leading term alignment unless -lp is used. 14417 $has_comma_breakpoints = 1 14418 unless $rOpts_line_up_parentheses; 14419 } 14420 14421 if ( 14422 14423 # breaks for code BLOCKS are handled at a higher level 14424 !$block_type 14425 14426 # we do not need to break at the top level of an 'if' 14427 # type expression 14428 && !$is_simple_logical_expression 14429 14430 ## modification to keep ': (' containers vertically tight; 14431 ## but probably better to let user set -vt=1 to avoid 14432 ## inconsistency with other paren types 14433 ## && ($container_type[$current_depth] ne ':') 14434 14435 # otherwise, we require one of these reasons for breaking: 14436 && ( 14437 14438 # - this term has forced line breaks 14439 $has_comma_breakpoints 14440 14441 # - the opening container is separated from this batch 14442 # for some reason (comment, blank line, code block) 14443 # - this is a non-paren container spanning multiple lines 14444 || !$saw_opening_structure 14445 14446 # - this is a long block contained in another breakable 14447 # container 14448 || ( $is_long_term 14449 && $container_environment_to_go[$i_opening] ne 14450 'BLOCK' ) 14451 ) 14452 ) 14453 { 14454 14455 # For -lp option, we must put a breakpoint before 14456 # the token which has been identified as starting 14457 # this indentation level. This is necessary for 14458 # proper alignment. 14459 if ( $rOpts_line_up_parentheses && $saw_opening_structure ) 14460 { 14461 my $item = $leading_spaces_to_go[ $i_opening + 1 ]; 14462 if ( $i_opening + 1 < $max_index_to_go 14463 && $types_to_go[ $i_opening + 1 ] eq 'b' ) 14464 { 14465 $item = $leading_spaces_to_go[ $i_opening + 2 ]; 14466 } 14467 if ( defined($item) ) { 14468 my $i_start_2 = $item->get_STARTING_INDEX(); 14469 if ( 14470 defined($i_start_2) 14471 14472 # we are breaking after an opening brace, paren, 14473 # so don't break before it too 14474 && $i_start_2 ne $i_opening 14475 ) 14476 { 14477 14478 # Only break for breakpoints at the same 14479 # indentation level as the opening paren 14480 my $test1 = $nesting_depth_to_go[$i_opening]; 14481 my $test2 = $nesting_depth_to_go[$i_start_2]; 14482 if ( $test2 == $test1 ) { 14483 set_forced_breakpoint( $i_start_2 - 1 ); 14484 } 14485 } 14486 } 14487 } 14488 14489 # break after opening structure. 14490 # note: break before closing structure will be automatic 14491 if ( $minimum_depth <= $current_depth ) { 14492 14493 set_forced_breakpoint($i_opening) 14494 unless ( $do_not_break_apart 14495 || is_unbreakable_container($current_depth) ); 14496 14497 # break at '.' of lower depth level before opening token 14498 if ( $last_dot_index[$depth] ) { 14499 set_forced_breakpoint( $last_dot_index[$depth] ); 14500 } 14501 14502 # break before opening structure if preeced by another 14503 # closing structure and a comma. This is normally 14504 # done by the previous closing brace, but not 14505 # if it was a one-line block. 14506 if ( $i_opening > 2 ) { 14507 my $i_prev = 14508 ( $types_to_go[ $i_opening - 1 ] eq 'b' ) 14509 ? $i_opening - 2 14510 : $i_opening - 1; 14511 14512 if ( $types_to_go[$i_prev] eq ',' 14513 && $types_to_go[ $i_prev - 1 ] =~ /^[\)\}]$/ ) 14514 { 14515 set_forced_breakpoint($i_prev); 14516 } 14517 14518 # also break before something like ':(' or '?(' 14519 # if appropriate. 14520 elsif ( 14521 $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ ) 14522 { 14523 my $token_prev = $tokens_to_go[$i_prev]; 14524 if ( $want_break_before{$token_prev} ) { 14525 set_forced_breakpoint($i_prev); 14526 } 14527 } 14528 } 14529 } 14530 14531 # break after comma following closing structure 14532 if ( $next_type eq ',' ) { 14533 set_forced_breakpoint( $i + 1 ); 14534 } 14535 14536 # break before an '=' following closing structure 14537 if ( 14538 $is_assignment{$next_nonblank_type} 14539 && ( $breakpoint_stack[$current_depth] != 14540 $forced_breakpoint_count ) 14541 ) 14542 { 14543 set_forced_breakpoint($i); 14544 } 14545 14546 # break at any comma before the opening structure Added 14547 # for -lp, but seems to be good in general. It isn't 14548 # obvious how far back to look; the '5' below seems to 14549 # work well and will catch the comma in something like 14550 # push @list, myfunc( $param, $param, .. 14551 14552 my $icomma = $last_comma_index[$depth]; 14553 if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) { 14554 unless ( $forced_breakpoint_to_go[$icomma] ) { 14555 set_forced_breakpoint($icomma); 14556 } 14557 } 14558 } # end logic to open up a container 14559 14560 # Break open a logical container open if it was already open 14561 elsif ($is_simple_logical_expression 14562 && $has_old_logical_breakpoints[$current_depth] ) 14563 { 14564 set_logical_breakpoints($current_depth); 14565 } 14566 14567 # Handle long container which does not get opened up 14568 elsif ($is_long_term) { 14569 14570 # must set fake breakpoint to alert outer containers that 14571 # they are complex 14572 set_fake_breakpoint(); 14573 } 14574 } 14575 14576 #------------------------------------------------------------ 14577 # Handle this token 14578 #------------------------------------------------------------ 14579 14580 $current_depth = $depth; 14581 14582 # handle comma-arrow 14583 if ( $type eq '=>' ) { 14584 next if ( $last_nonblank_type eq '=>' ); 14585 next if $rOpts_break_at_old_comma_breakpoints; 14586 next if $rOpts_comma_arrow_breakpoints == 3; 14587 $want_comma_break[$depth] = 1; 14588 $index_before_arrow[$depth] = $i_last_nonblank_token; 14589 next; 14590 } 14591 14592 elsif ( $type eq '.' ) { 14593 $last_dot_index[$depth] = $i; 14594 } 14595 14596 # Turn off alignment if we are sure that this is not a list 14597 # environment. To be safe, we will do this if we see certain 14598 # non-list tokens, such as ';', and also the environment is 14599 # not a list. Note that '=' could be in any of the = operators 14600 # (lextest.t). We can't just use the reported environment 14601 # because it can be incorrect in some cases. 14602 elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} ) 14603 && $container_environment_to_go[$i] ne 'LIST' ) 14604 { 14605 $dont_align[$depth] = 1; 14606 $want_comma_break[$depth] = 0; 14607 $index_before_arrow[$depth] = -1; 14608 } 14609 14610 # now just handle any commas 14611 next unless ( $type eq ',' ); 14612 14613 $last_dot_index[$depth] = undef; 14614 $last_comma_index[$depth] = $i; 14615 14616 # break here if this comma follows a '=>' 14617 # but not if there is a side comment after the comma 14618 if ( $want_comma_break[$depth] ) { 14619 14620 if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) { 14621 $want_comma_break[$depth] = 0; 14622 $index_before_arrow[$depth] = -1; 14623 next; 14624 } 14625 14626 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' ); 14627 14628 # break before the previous token if it looks safe 14629 # Example of something that we will not try to break before: 14630 # DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt}, 14631 # Also we don't want to break at a binary operator (like +): 14632 # $c->createOval( 14633 # $x + $R, $y + 14634 # $R => $x - $R, 14635 # $y - $R, -fill => 'black', 14636 # ); 14637 my $ibreak = $index_before_arrow[$depth] - 1; 14638 if ( $ibreak > 0 14639 && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ ) 14640 { 14641 if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- } 14642 if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- } 14643 if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) { 14644 14645 # don't break pointer calls, such as the following: 14646 # File::Spec->curdir => 1, 14647 # (This is tokenized as adjacent 'w' tokens) 14648 if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) { 14649 set_forced_breakpoint($ibreak); 14650 } 14651 } 14652 } 14653 14654 $want_comma_break[$depth] = 0; 14655 $index_before_arrow[$depth] = -1; 14656 14657 # handle list which mixes '=>'s and ','s: 14658 # treat any list items so far as an interrupted list 14659 $interrupted_list[$depth] = 1; 14660 next; 14661 } 14662 14663 # break after all commas above starting depth 14664 if ( $depth < $starting_depth && !$dont_align[$depth] ) { 14665 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' ); 14666 next; 14667 } 14668 14669 # add this comma to the list.. 14670 my $item_count = $item_count_stack[$depth]; 14671 if ( $item_count == 0 ) { 14672 14673 # but do not form a list with no opening structure 14674 # for example: 14675 14676 # open INFILE_COPY, ">$input_file_copy" 14677 # or die ("very long message"); 14678 14679 if ( ( $opening_structure_index_stack[$depth] < 0 ) 14680 && $container_environment_to_go[$i] eq 'BLOCK' ) 14681 { 14682 $dont_align[$depth] = 1; 14683 } 14684 } 14685 14686 $comma_index[$depth][$item_count] = $i; 14687 ++$item_count_stack[$depth]; 14688 if ( $last_nonblank_type =~ /^[iR\]]$/ ) { 14689 $identifier_count_stack[$depth]++; 14690 } 14691 } 14692 14693 #------------------------------------------- 14694 # end of loop over all tokens in this batch 14695 #------------------------------------------- 14696 14697 # set breaks for any unfinished lists .. 14698 for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) { 14699 14700 $interrupted_list[$dd] = 1; 14701 $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth ); 14702 set_comma_breakpoints($dd); 14703 set_logical_breakpoints($dd) 14704 if ( $has_old_logical_breakpoints[$dd] ); 14705 set_for_semicolon_breakpoints($dd); 14706 14707 # break open container... 14708 my $i_opening = $opening_structure_index_stack[$dd]; 14709 set_forced_breakpoint($i_opening) 14710 unless ( 14711 is_unbreakable_container($dd) 14712 14713 # Avoid a break which would place an isolated ' or " 14714 # on a line 14715 || ( $type eq 'Q' 14716 && $i_opening >= $max_index_to_go - 2 14717 && $token =~ /^['"]$/ ) 14718 ); 14719 } 14720 14721 # Return a flag indicating if the input file had some good breakpoints. 14722 # This flag will be used to force a break in a line shorter than the 14723 # allowed line length. 14724 if ( $has_old_logical_breakpoints[$current_depth] ) { 14725 $saw_good_breakpoint = 1; 14726 } 14727 return $saw_good_breakpoint; 14728 } 14729} # end scan_list 14730 14731sub find_token_starting_list { 14732 14733 # When testing to see if a block will fit on one line, some 14734 # previous token(s) may also need to be on the line; particularly 14735 # if this is a sub call. So we will look back at least one 14736 # token. NOTE: This isn't perfect, but not critical, because 14737 # if we mis-identify a block, it will be wrapped and therefore 14738 # fixed the next time it is formatted. 14739 my $i_opening_paren = shift; 14740 my $i_opening_minus = $i_opening_paren; 14741 my $im1 = $i_opening_paren - 1; 14742 my $im2 = $i_opening_paren - 2; 14743 my $im3 = $i_opening_paren - 3; 14744 my $typem1 = $types_to_go[$im1]; 14745 my $typem2 = $im2 >= 0 ? $types_to_go[$im2] : 'b'; 14746 if ( $typem1 eq ',' || ( $typem1 eq 'b' && $typem2 eq ',' ) ) { 14747 $i_opening_minus = $i_opening_paren; 14748 } 14749 elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) { 14750 $i_opening_minus = $im1 if $im1 >= 0; 14751 14752 # walk back to improve length estimate 14753 for ( my $j = $im1 ; $j >= 0 ; $j-- ) { 14754 last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ ); 14755 $i_opening_minus = $j; 14756 } 14757 if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ } 14758 } 14759 elsif ( $typem1 eq 'k' ) { $i_opening_minus = $im1 } 14760 elsif ( $typem1 eq 'b' && $im2 >= 0 && $types_to_go[$im2] eq 'k' ) { 14761 $i_opening_minus = $im2; 14762 } 14763 return $i_opening_minus; 14764} 14765 14766{ # begin set_comma_breakpoints_do 14767 14768 my %is_keyword_with_special_leading_term; 14769 14770 BEGIN { 14771 14772 # These keywords have prototypes which allow a special leading item 14773 # followed by a list 14774 @_ = 14775 qw(formline grep kill map printf sprintf push chmod join pack unshift); 14776 @is_keyword_with_special_leading_term{@_} = (1) x scalar(@_); 14777 } 14778 14779 sub set_comma_breakpoints_do { 14780 14781 # Given a list with some commas, set breakpoints at some of the 14782 # commas, if necessary, to make it easy to read. This list is 14783 # an example: 14784 my ( 14785 $depth, $i_opening_paren, $i_closing_paren, 14786 $item_count, $identifier_count, $rcomma_index, 14787 $next_nonblank_type, $list_type, $interrupted, 14788 $rdo_not_break_apart, $must_break_open, 14789 ) = @_; 14790 14791 # nothing to do if no commas seen 14792 return if ( $item_count < 1 ); 14793 my $i_first_comma = $$rcomma_index[0]; 14794 my $i_true_last_comma = $$rcomma_index[ $item_count - 1 ]; 14795 my $i_last_comma = $i_true_last_comma; 14796 if ( $i_last_comma >= $max_index_to_go ) { 14797 $i_last_comma = $$rcomma_index[ --$item_count - 1 ]; 14798 return if ( $item_count < 1 ); 14799 } 14800 14801 #--------------------------------------------------------------- 14802 # find lengths of all items in the list to calculate page layout 14803 #--------------------------------------------------------------- 14804 my $comma_count = $item_count; 14805 my @item_lengths; 14806 my @i_term_begin; 14807 my @i_term_end; 14808 my @i_term_comma; 14809 my $i_prev_plus; 14810 my @max_length = ( 0, 0 ); 14811 my $first_term_length; 14812 my $i = $i_opening_paren; 14813 my $is_odd = 1; 14814 14815 for ( my $j = 0 ; $j < $comma_count ; $j++ ) { 14816 $is_odd = 1 - $is_odd; 14817 $i_prev_plus = $i + 1; 14818 $i = $$rcomma_index[$j]; 14819 14820 my $i_term_end = 14821 ( $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1; 14822 my $i_term_begin = 14823 ( $types_to_go[$i_prev_plus] eq 'b' ) 14824 ? $i_prev_plus + 1 14825 : $i_prev_plus; 14826 push @i_term_begin, $i_term_begin; 14827 push @i_term_end, $i_term_end; 14828 push @i_term_comma, $i; 14829 14830 # note: currently adding 2 to all lengths (for comma and space) 14831 my $length = 14832 2 + token_sequence_length( $i_term_begin, $i_term_end ); 14833 push @item_lengths, $length; 14834 14835 if ( $j == 0 ) { 14836 $first_term_length = $length; 14837 } 14838 else { 14839 14840 if ( $length > $max_length[$is_odd] ) { 14841 $max_length[$is_odd] = $length; 14842 } 14843 } 14844 } 14845 14846 # now we have to make a distinction between the comma count and item 14847 # count, because the item count will be one greater than the comma 14848 # count if the last item is not terminated with a comma 14849 my $i_b = 14850 ( $types_to_go[ $i_last_comma + 1 ] eq 'b' ) 14851 ? $i_last_comma + 1 14852 : $i_last_comma; 14853 my $i_e = 14854 ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' ) 14855 ? $i_closing_paren - 2 14856 : $i_closing_paren - 1; 14857 my $i_effective_last_comma = $i_last_comma; 14858 14859 my $last_item_length = token_sequence_length( $i_b + 1, $i_e ); 14860 14861 if ( $last_item_length > 0 ) { 14862 14863 # add 2 to length because other lengths include a comma and a blank 14864 $last_item_length += 2; 14865 push @item_lengths, $last_item_length; 14866 push @i_term_begin, $i_b + 1; 14867 push @i_term_end, $i_e; 14868 push @i_term_comma, undef; 14869 14870 my $i_odd = $item_count % 2; 14871 14872 if ( $last_item_length > $max_length[$i_odd] ) { 14873 $max_length[$i_odd] = $last_item_length; 14874 } 14875 14876 $item_count++; 14877 $i_effective_last_comma = $i_e + 1; 14878 14879 if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) { 14880 $identifier_count++; 14881 } 14882 } 14883 14884 #--------------------------------------------------------------- 14885 # End of length calculations 14886 #--------------------------------------------------------------- 14887 14888 #--------------------------------------------------------------- 14889 # Compound List Rule 1: 14890 # Break at (almost) every comma for a list containing a broken 14891 # sublist. This has higher priority than the Interrupted List 14892 # Rule. 14893 #--------------------------------------------------------------- 14894 if ( $has_broken_sublist[$depth] ) { 14895 14896 # Break at every comma except for a comma between two 14897 # simple, small terms. This prevents long vertical 14898 # columns of, say, just 0's. 14899 my $small_length = 10; # 2 + actual maximum length wanted 14900 14901 # We'll insert a break in long runs of small terms to 14902 # allow alignment in uniform tables. 14903 my $skipped_count = 0; 14904 my $columns = table_columns_available($i_first_comma); 14905 my $fields = int( $columns / $small_length ); 14906 if ( $rOpts_maximum_fields_per_table 14907 && $fields > $rOpts_maximum_fields_per_table ) 14908 { 14909 $fields = $rOpts_maximum_fields_per_table; 14910 } 14911 my $max_skipped_count = $fields - 1; 14912 14913 my $is_simple_last_term = 0; 14914 my $is_simple_next_term = 0; 14915 foreach my $j ( 0 .. $item_count ) { 14916 $is_simple_last_term = $is_simple_next_term; 14917 $is_simple_next_term = 0; 14918 if ( $j < $item_count 14919 && $i_term_end[$j] == $i_term_begin[$j] 14920 && $item_lengths[$j] <= $small_length ) 14921 { 14922 $is_simple_next_term = 1; 14923 } 14924 next if $j == 0; 14925 if ( $is_simple_last_term 14926 && $is_simple_next_term 14927 && $skipped_count < $max_skipped_count ) 14928 { 14929 $skipped_count++; 14930 } 14931 else { 14932 $skipped_count = 0; 14933 my $i = $i_term_comma[ $j - 1 ]; 14934 last unless defined $i; 14935 set_forced_breakpoint($i); 14936 } 14937 } 14938 14939 # always break at the last comma if this list is 14940 # interrupted; we wouldn't want to leave a terminal '{', for 14941 # example. 14942 if ($interrupted) { set_forced_breakpoint($i_true_last_comma) } 14943 return; 14944 } 14945 14946#my ( $a, $b, $c ) = caller(); 14947#print "LISTX: in set_list $a $c interupt=$interrupted count=$item_count 14948#i_first = $i_first_comma i_last=$i_last_comma max=$max_index_to_go\n"; 14949#print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n"; 14950 14951 #--------------------------------------------------------------- 14952 # Interrupted List Rule: 14953 # A list is is forced to use old breakpoints if it was interrupted 14954 # by side comments or blank lines, or requested by user. 14955 #--------------------------------------------------------------- 14956 if ( $rOpts_break_at_old_comma_breakpoints 14957 || $interrupted 14958 || $i_opening_paren < 0 ) 14959 { 14960 copy_old_breakpoints( $i_first_comma, $i_true_last_comma ); 14961 return; 14962 } 14963 14964 #--------------------------------------------------------------- 14965 # Looks like a list of items. We have to look at it and size it up. 14966 #--------------------------------------------------------------- 14967 14968 my $opening_token = $tokens_to_go[$i_opening_paren]; 14969 my $opening_environment = 14970 $container_environment_to_go[$i_opening_paren]; 14971 14972 #------------------------------------------------------------------- 14973 # Return if this will fit on one line 14974 #------------------------------------------------------------------- 14975 14976 my $i_opening_minus = find_token_starting_list($i_opening_paren); 14977 return 14978 unless excess_line_length( $i_opening_minus, $i_closing_paren ) > 0; 14979 14980 #------------------------------------------------------------------- 14981 # Now we know that this block spans multiple lines; we have to set 14982 # at least one breakpoint -- real or fake -- as a signal to break 14983 # open any outer containers. 14984 #------------------------------------------------------------------- 14985 set_fake_breakpoint(); 14986 14987 # be sure we do not extend beyond the current list length 14988 if ( $i_effective_last_comma >= $max_index_to_go ) { 14989 $i_effective_last_comma = $max_index_to_go - 1; 14990 } 14991 14992 # Set a flag indicating if we need to break open to keep -lp 14993 # items aligned. This is necessary if any of the list terms 14994 # exceeds the available space after the '('. 14995 my $need_lp_break_open = $must_break_open; 14996 if ( $rOpts_line_up_parentheses && !$must_break_open ) { 14997 my $columns_if_unbroken = $rOpts_maximum_line_length - 14998 total_line_length( $i_opening_minus, $i_opening_paren ); 14999 $need_lp_break_open = 15000 ( $max_length[0] > $columns_if_unbroken ) 15001 || ( $max_length[1] > $columns_if_unbroken ) 15002 || ( $first_term_length > $columns_if_unbroken ); 15003 } 15004 15005 # Specify if the list must have an even number of fields or not. 15006 # It is generally safest to assume an even number, because the 15007 # list items might be a hash list. But if we can be sure that 15008 # it is not a hash, then we can allow an odd number for more 15009 # flexibility. 15010 my $odd_or_even = 2; # 1 = odd field count ok, 2 = want even count 15011 15012 if ( $identifier_count >= $item_count - 1 15013 || $is_assignment{$next_nonblank_type} 15014 || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ ) 15015 ) 15016 { 15017 $odd_or_even = 1; 15018 } 15019 15020 # do we have a long first term which should be 15021 # left on a line by itself? 15022 my $use_separate_first_term = ( 15023 $odd_or_even == 1 # only if we can use 1 field/line 15024 && $item_count > 3 # need several items 15025 && $first_term_length > 15026 2 * $max_length[0] - 2 # need long first term 15027 && $first_term_length > 15028 2 * $max_length[1] - 2 # need long first term 15029 ); 15030 15031 # or do we know from the type of list that the first term should 15032 # be placed alone? 15033 if ( !$use_separate_first_term ) { 15034 if ( $is_keyword_with_special_leading_term{$list_type} ) { 15035 $use_separate_first_term = 1; 15036 15037 # should the container be broken open? 15038 if ( $item_count < 3 ) { 15039 if ( $i_first_comma - $i_opening_paren < 4 ) { 15040 $$rdo_not_break_apart = 1; 15041 } 15042 } 15043 elsif ($first_term_length < 20 15044 && $i_first_comma - $i_opening_paren < 4 ) 15045 { 15046 my $columns = table_columns_available($i_first_comma); 15047 if ( $first_term_length < $columns ) { 15048 $$rdo_not_break_apart = 1; 15049 } 15050 } 15051 } 15052 } 15053 15054 # if so, 15055 if ($use_separate_first_term) { 15056 15057 # ..set a break and update starting values 15058 $use_separate_first_term = 1; 15059 set_forced_breakpoint($i_first_comma); 15060 $i_opening_paren = $i_first_comma; 15061 $i_first_comma = $$rcomma_index[1]; 15062 $item_count--; 15063 return if $comma_count == 1; 15064 shift @item_lengths; 15065 shift @i_term_begin; 15066 shift @i_term_end; 15067 shift @i_term_comma; 15068 } 15069 15070 # if not, update the metrics to include the first term 15071 else { 15072 if ( $first_term_length > $max_length[0] ) { 15073 $max_length[0] = $first_term_length; 15074 } 15075 } 15076 15077 # Field width parameters 15078 my $pair_width = ( $max_length[0] + $max_length[1] ); 15079 my $max_width = 15080 ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1]; 15081 15082 # Number of free columns across the page width for laying out tables 15083 my $columns = table_columns_available($i_first_comma); 15084 15085 # Estimated maximum number of fields which fit this space 15086 # This will be our first guess 15087 my $number_of_fields_max = 15088 maximum_number_of_fields( $columns, $odd_or_even, $max_width, 15089 $pair_width ); 15090 my $number_of_fields = $number_of_fields_max; 15091 15092 # Find the best-looking number of fields 15093 # and make this our second guess if possible 15094 my ( $number_of_fields_best, $ri_ragged_break_list, 15095 $new_identifier_count ) 15096 = study_list_complexity( \@i_term_begin, \@i_term_end, \@item_lengths, 15097 $max_width ); 15098 15099 if ( $number_of_fields_best != 0 15100 && $number_of_fields_best < $number_of_fields_max ) 15101 { 15102 $number_of_fields = $number_of_fields_best; 15103 } 15104 15105 # ---------------------------------------------------------------------- 15106 # If we are crowded and the -lp option is being used, try to 15107 # undo some indentation 15108 # ---------------------------------------------------------------------- 15109 if ( 15110 $rOpts_line_up_parentheses 15111 && ( 15112 $number_of_fields == 0 15113 || ( $number_of_fields == 1 15114 && $number_of_fields != $number_of_fields_best ) 15115 ) 15116 ) 15117 { 15118 my $available_spaces = get_AVAILABLE_SPACES_to_go($i_first_comma); 15119 if ( $available_spaces > 0 ) { 15120 15121 my $spaces_wanted = $max_width - $columns; # for 1 field 15122 15123 if ( $number_of_fields_best == 0 ) { 15124 $number_of_fields_best = 15125 get_maximum_fields_wanted( \@item_lengths ); 15126 } 15127 15128 if ( $number_of_fields_best != 1 ) { 15129 my $spaces_wanted_2 = 15130 1 + $pair_width - $columns; # for 2 fields 15131 if ( $available_spaces > $spaces_wanted_2 ) { 15132 $spaces_wanted = $spaces_wanted_2; 15133 } 15134 } 15135 15136 if ( $spaces_wanted > 0 ) { 15137 my $deleted_spaces = 15138 reduce_lp_indentation( $i_first_comma, $spaces_wanted ); 15139 15140 # redo the math 15141 if ( $deleted_spaces > 0 ) { 15142 $columns = table_columns_available($i_first_comma); 15143 $number_of_fields_max = 15144 maximum_number_of_fields( $columns, $odd_or_even, 15145 $max_width, $pair_width ); 15146 $number_of_fields = $number_of_fields_max; 15147 15148 if ( $number_of_fields_best == 1 15149 && $number_of_fields >= 1 ) 15150 { 15151 $number_of_fields = $number_of_fields_best; 15152 } 15153 } 15154 } 15155 } 15156 } 15157 15158 # try for one column if two won't work 15159 if ( $number_of_fields <= 0 ) { 15160 $number_of_fields = int( $columns / $max_width ); 15161 } 15162 15163 # The user can place an upper bound on the number of fields, 15164 # which can be useful for doing maintenance on tables 15165 if ( $rOpts_maximum_fields_per_table 15166 && $number_of_fields > $rOpts_maximum_fields_per_table ) 15167 { 15168 $number_of_fields = $rOpts_maximum_fields_per_table; 15169 } 15170 15171 # How many columns (characters) and lines would this container take 15172 # if no additional whitespace were added? 15173 my $packed_columns = token_sequence_length( $i_opening_paren + 1, 15174 $i_effective_last_comma + 1 ); 15175 if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero 15176 my $packed_lines = 1 + int( $packed_columns / $columns ); 15177 15178 # are we an item contained in an outer list? 15179 my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/; 15180 15181 if ( $number_of_fields <= 0 ) { 15182 15183# #--------------------------------------------------------------- 15184# # We're in trouble. We can't find a single field width that works. 15185# # There is no simple answer here; we may have a single long list 15186# # item, or many. 15187# #--------------------------------------------------------------- 15188# 15189# In many cases, it may be best to not force a break if there is just one 15190# comma, because the standard continuation break logic will do a better 15191# job without it. 15192# 15193# In the common case that all but one of the terms can fit 15194# on a single line, it may look better not to break open the 15195# containing parens. Consider, for example 15196# 15197# $color = 15198# join ( '/', 15199# sort { $color_value{$::a} <=> $color_value{$::b}; } 15200# keys %colors ); 15201# 15202# which will look like this with the container broken: 15203# 15204# $color = join ( 15205# '/', 15206# sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors 15207# ); 15208# 15209# Here is an example of this rule for a long last term: 15210# 15211# log_message( 0, 256, 128, 15212# "Number of routes in adj-RIB-in to be considered: $peercount" ); 15213# 15214# And here is an example with a long first term: 15215# 15216# $s = sprintf( 15217# "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)", 15218# $r, $pu, $ps, $cu, $cs, $tt 15219# ) 15220# if $style eq 'all'; 15221 15222 my $i_last_comma = $$rcomma_index[ $comma_count - 1 ]; 15223 my $long_last_term = excess_line_length( 0, $i_last_comma ) <= 0; 15224 my $long_first_term = 15225 excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0; 15226 15227 # break at every comma ... 15228 if ( 15229 15230 # if requested by user or is best looking 15231 $number_of_fields_best == 1 15232 15233 # or if this is a sublist of a larger list 15234 || $in_hierarchical_list 15235 15236 # or if multiple commas and we dont have a long first or last 15237 # term 15238 || ( $comma_count > 1 15239 && !( $long_last_term || $long_first_term ) ) 15240 ) 15241 { 15242 foreach ( 0 .. $comma_count - 1 ) { 15243 set_forced_breakpoint( $$rcomma_index[$_] ); 15244 } 15245 } 15246 elsif ($long_last_term) { 15247 15248 set_forced_breakpoint($i_last_comma); 15249 $$rdo_not_break_apart = 1 unless $must_break_open; 15250 } 15251 elsif ($long_first_term) { 15252 15253 set_forced_breakpoint($i_first_comma); 15254 } 15255 else { 15256 15257 # let breaks be defined by default bond strength logic 15258 } 15259 return; 15260 } 15261 15262 # -------------------------------------------------------- 15263 # We have a tentative field count that seems to work. 15264 # How many lines will this require? 15265 # -------------------------------------------------------- 15266 my $formatted_lines = $item_count / ($number_of_fields); 15267 if ( $formatted_lines != int $formatted_lines ) { 15268 $formatted_lines = 1 + int $formatted_lines; 15269 } 15270 15271 # So far we've been trying to fill out to the right margin. But 15272 # compact tables are easier to read, so let's see if we can use fewer 15273 # fields without increasing the number of lines. 15274 $number_of_fields = 15275 compactify_table( $item_count, $number_of_fields, $formatted_lines, 15276 $odd_or_even ); 15277 15278 # How many spaces across the page will we fill? 15279 my $columns_per_line = 15280 ( int $number_of_fields / 2 ) * $pair_width + 15281 ( $number_of_fields % 2 ) * $max_width; 15282 15283 my $formatted_columns; 15284 15285 if ( $number_of_fields > 1 ) { 15286 $formatted_columns = 15287 ( $pair_width * ( int( $item_count / 2 ) ) + 15288 ( $item_count % 2 ) * $max_width ); 15289 } 15290 else { 15291 $formatted_columns = $max_width * $item_count; 15292 } 15293 if ( $formatted_columns < $packed_columns ) { 15294 $formatted_columns = $packed_columns; 15295 } 15296 15297 my $unused_columns = $formatted_columns - $packed_columns; 15298 15299 # set some empirical parameters to help decide if we should try to 15300 # align; high sparsity does not look good, especially with few lines 15301 my $sparsity = ($unused_columns) / ($formatted_columns); 15302 my $max_allowed_sparsity = 15303 ( $item_count < 3 ) ? 0.1 15304 : ( $packed_lines == 1 ) ? 0.15 15305 : ( $packed_lines == 2 ) ? 0.4 15306 : 0.7; 15307 15308 # Begin check for shortcut methods, which avoid treating a list 15309 # as a table for relatively small parenthesized lists. These 15310 # are usually easier to read if not formatted as tables. 15311 if ( 15312 $packed_lines <= 2 # probably can fit in 2 lines 15313 && $item_count < 9 # doesn't have too many items 15314 && $opening_environment eq 'BLOCK' # not a sub-container 15315 && $opening_token eq '(' # is paren list 15316 ) 15317 { 15318 15319 # Shortcut method 1: for -lp and just one comma: 15320 # This is a no-brainer, just break at the comma. 15321 if ( 15322 $rOpts_line_up_parentheses # -lp 15323 && $item_count == 2 # two items, one comma 15324 && !$must_break_open 15325 ) 15326 { 15327 my $i_break = $$rcomma_index[0]; 15328 set_forced_breakpoint($i_break); 15329 $$rdo_not_break_apart = 1; 15330 set_non_alignment_flags( $comma_count, $rcomma_index ); 15331 return; 15332 15333 } 15334 15335 # method 2 is for most small ragged lists which might look 15336 # best if not displayed as a table. 15337 if ( 15338 ( $number_of_fields == 2 && $item_count == 3 ) 15339 || ( 15340 $new_identifier_count > 0 # isn't all quotes 15341 && $sparsity > 0.15 15342 ) # would be fairly spaced gaps if aligned 15343 ) 15344 { 15345 15346 my $break_count = set_ragged_breakpoints( \@i_term_comma, 15347 $ri_ragged_break_list ); 15348 ++$break_count if ($use_separate_first_term); 15349 15350 # NOTE: we should really use the true break count here, 15351 # which can be greater if there are large terms and 15352 # little space, but usually this will work well enough. 15353 unless ($must_break_open) { 15354 15355 if ( $break_count <= 1 ) { 15356 $$rdo_not_break_apart = 1; 15357 } 15358 elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open ) 15359 { 15360 $$rdo_not_break_apart = 1; 15361 } 15362 } 15363 set_non_alignment_flags( $comma_count, $rcomma_index ); 15364 return; 15365 } 15366 15367 } # end shortcut methods 15368 15369 # debug stuff 15370 15371 FORMATTER_DEBUG_FLAG_SPARSE && do { 15372 print 15373"SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n"; 15374 15375 }; 15376 15377 #--------------------------------------------------------------- 15378 # Compound List Rule 2: 15379 # If this list is too long for one line, and it is an item of a 15380 # larger list, then we must format it, regardless of sparsity 15381 # (ian.t). One reason that we have to do this is to trigger 15382 # Compound List Rule 1, above, which causes breaks at all commas of 15383 # all outer lists. In this way, the structure will be properly 15384 # displayed. 15385 #--------------------------------------------------------------- 15386 15387 # Decide if this list is too long for one line unless broken 15388 my $total_columns = table_columns_available($i_opening_paren); 15389 my $too_long = $packed_columns > $total_columns; 15390 15391 # For a paren list, include the length of the token just before the 15392 # '(' because this is likely a sub call, and we would have to 15393 # include the sub name on the same line as the list. This is still 15394 # imprecise, but not too bad. (steve.t) 15395 if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) { 15396 15397 $too_long = excess_line_length( $i_opening_minus, 15398 $i_effective_last_comma + 1 ) > 0; 15399 } 15400 15401 # FIXME: For an item after a '=>', try to include the length of the 15402 # thing before the '=>'. This is crude and should be improved by 15403 # actually looking back token by token. 15404 if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) { 15405 my $i_opening_minus = $i_opening_paren - 4; 15406 if ( $i_opening_minus >= 0 ) { 15407 $too_long = excess_line_length( $i_opening_minus, 15408 $i_effective_last_comma + 1 ) > 0; 15409 } 15410 } 15411 15412 # Always break lists contained in '[' and '{' if too long for 1 line, 15413 # and always break lists which are too long and part of a more complex 15414 # structure. 15415 my $must_break_open_container = $must_break_open 15416 || ( $too_long 15417 && ( $in_hierarchical_list || $opening_token ne '(' ) ); 15418 15419#print "LISTX: next=$next_nonblank_type avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n"; 15420 15421 #--------------------------------------------------------------- 15422 # The main decision: 15423 # Now decide if we will align the data into aligned columns. Do not 15424 # attempt to align columns if this is a tiny table or it would be 15425 # too spaced. It seems that the more packed lines we have, the 15426 # sparser the list that can be allowed and still look ok. 15427 #--------------------------------------------------------------- 15428 15429 if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines ) 15430 || ( $formatted_lines < 2 ) 15431 || ( $unused_columns > $max_allowed_sparsity * $formatted_columns ) 15432 ) 15433 { 15434 15435 #--------------------------------------------------------------- 15436 # too sparse: would look ugly if aligned in a table; 15437 #--------------------------------------------------------------- 15438 15439 # use old breakpoints if this is a 'big' list 15440 # FIXME: goal is to improve set_ragged_breakpoints so that 15441 # this is not necessary. 15442 if ( $packed_lines > 2 && $item_count > 10 ) { 15443 write_logfile_entry("List sparse: using old breakpoints\n"); 15444 copy_old_breakpoints( $i_first_comma, $i_last_comma ); 15445 } 15446 15447 # let the continuation logic handle it if 2 lines 15448 else { 15449 15450 my $break_count = set_ragged_breakpoints( \@i_term_comma, 15451 $ri_ragged_break_list ); 15452 ++$break_count if ($use_separate_first_term); 15453 15454 unless ($must_break_open_container) { 15455 if ( $break_count <= 1 ) { 15456 $$rdo_not_break_apart = 1; 15457 } 15458 elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open ) 15459 { 15460 $$rdo_not_break_apart = 1; 15461 } 15462 } 15463 set_non_alignment_flags( $comma_count, $rcomma_index ); 15464 } 15465 return; 15466 } 15467 15468 #--------------------------------------------------------------- 15469 # go ahead and format as a table 15470 #--------------------------------------------------------------- 15471 write_logfile_entry( 15472 "List: auto formatting with $number_of_fields fields/row\n"); 15473 15474 my $j_first_break = 15475 $use_separate_first_term ? $number_of_fields : $number_of_fields - 1; 15476 15477 for ( 15478 my $j = $j_first_break ; 15479 $j < $comma_count ; 15480 $j += $number_of_fields 15481 ) 15482 { 15483 my $i = $$rcomma_index[$j]; 15484 set_forced_breakpoint($i); 15485 } 15486 return; 15487 } 15488} 15489 15490sub set_non_alignment_flags { 15491 15492 # set flag which indicates that these commas should not be 15493 # aligned 15494 my ( $comma_count, $rcomma_index ) = @_; 15495 foreach ( 0 .. $comma_count - 1 ) { 15496 $matching_token_to_go[ $$rcomma_index[$_] ] = 1; 15497 } 15498} 15499 15500sub study_list_complexity { 15501 15502 # Look for complex tables which should be formatted with one term per line. 15503 # Returns the following: 15504 # 15505 # \@i_ragged_break_list = list of good breakpoints to avoid lines 15506 # which are hard to read 15507 # $number_of_fields_best = suggested number of fields based on 15508 # complexity; = 0 if any number may be used. 15509 # 15510 my ( $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_; 15511 my $item_count = @{$ri_term_begin}; 15512 my $complex_item_count = 0; 15513 my $number_of_fields_best = $rOpts_maximum_fields_per_table; 15514 my $i_max = @{$ritem_lengths} - 1; 15515 ##my @item_complexity; 15516 15517 my $i_last_last_break = -3; 15518 my $i_last_break = -2; 15519 my @i_ragged_break_list; 15520 15521 my $definitely_complex = 30; 15522 my $definitely_simple = 12; 15523 my $quote_count = 0; 15524 15525 for my $i ( 0 .. $i_max ) { 15526 my $ib = $ri_term_begin->[$i]; 15527 my $ie = $ri_term_end->[$i]; 15528 15529 # define complexity: start with the actual term length 15530 my $weighted_length = ( $ritem_lengths->[$i] - 2 ); 15531 15532 ##TBD: join types here and check for variations 15533 ##my $str=join "", @tokens_to_go[$ib..$ie]; 15534 15535 my $is_quote = 0; 15536 if ( $types_to_go[$ib] =~ /^[qQ]$/ ) { 15537 $is_quote = 1; 15538 $quote_count++; 15539 } 15540 elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) { 15541 $quote_count++; 15542 } 15543 15544 if ( $ib eq $ie ) { 15545 if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) { 15546 $complex_item_count++; 15547 $weighted_length *= 2; 15548 } 15549 else { 15550 } 15551 } 15552 else { 15553 if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) { 15554 $complex_item_count++; 15555 $weighted_length *= 2; 15556 } 15557 if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) { 15558 $weighted_length += 4; 15559 } 15560 } 15561 15562 # add weight for extra tokens. 15563 $weighted_length += 2 * ( $ie - $ib ); 15564 15565## my $BUB = join '', @tokens_to_go[$ib..$ie]; 15566## print "# COMPLEXITY:$weighted_length $BUB\n"; 15567 15568##push @item_complexity, $weighted_length; 15569 15570 # now mark a ragged break after this item it if it is 'long and 15571 # complex': 15572 if ( $weighted_length >= $definitely_complex ) { 15573 15574 # if we broke after the previous term 15575 # then break before it too 15576 if ( $i_last_break == $i - 1 15577 && $i > 1 15578 && $i_last_last_break != $i - 2 ) 15579 { 15580 15581 ## FIXME: don't strand a small term 15582 pop @i_ragged_break_list; 15583 push @i_ragged_break_list, $i - 2; 15584 push @i_ragged_break_list, $i - 1; 15585 } 15586 15587 push @i_ragged_break_list, $i; 15588 $i_last_last_break = $i_last_break; 15589 $i_last_break = $i; 15590 } 15591 15592 # don't break before a small last term -- it will 15593 # not look good on a line by itself. 15594 elsif ($i == $i_max 15595 && $i_last_break == $i - 1 15596 && $weighted_length <= $definitely_simple ) 15597 { 15598 pop @i_ragged_break_list; 15599 } 15600 } 15601 15602 my $identifier_count = $i_max + 1 - $quote_count; 15603 15604 # Need more tuning here.. 15605 if ( $max_width > 12 15606 && $complex_item_count > $item_count / 2 15607 && $number_of_fields_best != 2 ) 15608 { 15609 $number_of_fields_best = 1; 15610 } 15611 15612 return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count ); 15613} 15614 15615sub get_maximum_fields_wanted { 15616 15617 # Not all tables look good with more than one field of items. 15618 # This routine looks at a table and decides if it should be 15619 # formatted with just one field or not. 15620 # This coding is still under development. 15621 my ($ritem_lengths) = @_; 15622 15623 my $number_of_fields_best = 0; 15624 15625 # For just a few items, we tentatively assume just 1 field. 15626 my $item_count = @{$ritem_lengths}; 15627 if ( $item_count <= 5 ) { 15628 $number_of_fields_best = 1; 15629 } 15630 15631 # For larger tables, look at it both ways and see what looks best 15632 else { 15633 15634 my $is_odd = 1; 15635 my @max_length = ( 0, 0 ); 15636 my @last_length_2 = ( undef, undef ); 15637 my @first_length_2 = ( undef, undef ); 15638 my $last_length = undef; 15639 my $total_variation_1 = 0; 15640 my $total_variation_2 = 0; 15641 my @total_variation_2 = ( 0, 0 ); 15642 for ( my $j = 0 ; $j < $item_count ; $j++ ) { 15643 15644 $is_odd = 1 - $is_odd; 15645 my $length = $ritem_lengths->[$j]; 15646 if ( $length > $max_length[$is_odd] ) { 15647 $max_length[$is_odd] = $length; 15648 } 15649 15650 if ( defined($last_length) ) { 15651 my $dl = abs( $length - $last_length ); 15652 $total_variation_1 += $dl; 15653 } 15654 $last_length = $length; 15655 15656 my $ll = $last_length_2[$is_odd]; 15657 if ( defined($ll) ) { 15658 my $dl = abs( $length - $ll ); 15659 $total_variation_2[$is_odd] += $dl; 15660 } 15661 else { 15662 $first_length_2[$is_odd] = $length; 15663 } 15664 $last_length_2[$is_odd] = $length; 15665 } 15666 $total_variation_2 = $total_variation_2[0] + $total_variation_2[1]; 15667 15668 my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0; 15669 unless ( $total_variation_2 < $factor * $total_variation_1 ) { 15670 $number_of_fields_best = 1; 15671 } 15672 } 15673 return ($number_of_fields_best); 15674} 15675 15676sub table_columns_available { 15677 my $i_first_comma = shift; 15678 my $columns = 15679 $rOpts_maximum_line_length - leading_spaces_to_go($i_first_comma); 15680 15681 # Patch: the vertical formatter does not line up lines whose lengths 15682 # exactly equal the available line length because of allowances 15683 # that must be made for side comments. Therefore, the number of 15684 # available columns is reduced by 1 character. 15685 $columns -= 1; 15686 return $columns; 15687} 15688 15689sub maximum_number_of_fields { 15690 15691 # how many fields will fit in the available space? 15692 my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_; 15693 my $max_pairs = int( $columns / $pair_width ); 15694 my $number_of_fields = $max_pairs * 2; 15695 if ( $odd_or_even == 1 15696 && $max_pairs * $pair_width + $max_width <= $columns ) 15697 { 15698 $number_of_fields++; 15699 } 15700 return $number_of_fields; 15701} 15702 15703sub compactify_table { 15704 15705 # given a table with a certain number of fields and a certain number 15706 # of lines, see if reducing the number of fields will make it look 15707 # better. 15708 my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_; 15709 if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) { 15710 my $min_fields; 15711 15712 for ( 15713 $min_fields = $number_of_fields ; 15714 $min_fields >= $odd_or_even 15715 && $min_fields * $formatted_lines >= $item_count ; 15716 $min_fields -= $odd_or_even 15717 ) 15718 { 15719 $number_of_fields = $min_fields; 15720 } 15721 } 15722 return $number_of_fields; 15723} 15724 15725sub set_ragged_breakpoints { 15726 15727 # Set breakpoints in a list that cannot be formatted nicely as a 15728 # table. 15729 my ( $ri_term_comma, $ri_ragged_break_list ) = @_; 15730 15731 my $break_count = 0; 15732 foreach (@$ri_ragged_break_list) { 15733 my $j = $ri_term_comma->[$_]; 15734 if ($j) { 15735 set_forced_breakpoint($j); 15736 $break_count++; 15737 } 15738 } 15739 return $break_count; 15740} 15741 15742sub copy_old_breakpoints { 15743 my ( $i_first_comma, $i_last_comma ) = @_; 15744 for my $i ( $i_first_comma .. $i_last_comma ) { 15745 if ( $old_breakpoint_to_go[$i] ) { 15746 set_forced_breakpoint($i); 15747 } 15748 } 15749} 15750 15751sub set_nobreaks { 15752 my ( $i, $j ) = @_; 15753 if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) { 15754 15755 FORMATTER_DEBUG_FLAG_NOBREAK && do { 15756 my ( $a, $b, $c ) = caller(); 15757 print( 15758"NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n" 15759 ); 15760 }; 15761 15762 @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 ); 15763 } 15764 15765 # shouldn't happen; non-critical error 15766 else { 15767 FORMATTER_DEBUG_FLAG_NOBREAK && do { 15768 my ( $a, $b, $c ) = caller(); 15769 print( 15770"NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n" 15771 ); 15772 }; 15773 } 15774} 15775 15776sub set_fake_breakpoint { 15777 15778 # Just bump up the breakpoint count as a signal that there are breaks. 15779 # This is useful if we have breaks but may want to postpone deciding where 15780 # to make them. 15781 $forced_breakpoint_count++; 15782} 15783 15784sub set_forced_breakpoint { 15785 my $i = shift; 15786 15787 return unless defined $i && $i >= 0; 15788 15789 # when called with certain tokens, use bond strengths to decide 15790 # if we break before or after it 15791 my $token = $tokens_to_go[$i]; 15792 15793 if ( $token =~ /^([\=\.\,\:\?]|and|or|xor|&&|\|\|)$/ ) { 15794 if ( $want_break_before{$token} && $i >= 0 ) { $i-- } 15795 } 15796 15797 # breaks are forced before 'if' and 'unless' 15798 elsif ( $is_if_unless{$token} ) { $i-- } 15799 15800 if ( $i >= 0 && $i <= $max_index_to_go ) { 15801 my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1; 15802 15803 FORMATTER_DEBUG_FLAG_FORCE && do { 15804 my ( $a, $b, $c ) = caller(); 15805 print 15806"FORCE forced_breakpoint $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n"; 15807 }; 15808 15809 if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 ) { 15810 $forced_breakpoint_to_go[$i_nonblank] = 1; 15811 15812 if ( $i_nonblank > $index_max_forced_break ) { 15813 $index_max_forced_break = $i_nonblank; 15814 } 15815 $forced_breakpoint_count++; 15816 $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] = 15817 $i_nonblank; 15818 15819 # if we break at an opening container..break at the closing 15820 if ( $tokens_to_go[$i_nonblank] =~ /^[\{\[\(\?]$/ ) { 15821 set_closing_breakpoint($i_nonblank); 15822 } 15823 } 15824 } 15825} 15826 15827sub clear_breakpoint_undo_stack { 15828 $forced_breakpoint_undo_count = 0; 15829} 15830 15831sub undo_forced_breakpoint_stack { 15832 15833 my $i_start = shift; 15834 if ( $i_start < 0 ) { 15835 $i_start = 0; 15836 my ( $a, $b, $c ) = caller(); 15837 warning( 15838"Program Bug: undo_forced_breakpoint_stack from $a $c has i=$i_start " 15839 ); 15840 } 15841 15842 while ( $forced_breakpoint_undo_count > $i_start ) { 15843 my $i = 15844 $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ]; 15845 if ( $i >= 0 && $i <= $max_index_to_go ) { 15846 $forced_breakpoint_to_go[$i] = 0; 15847 $forced_breakpoint_count--; 15848 15849 FORMATTER_DEBUG_FLAG_UNDOBP && do { 15850 my ( $a, $b, $c ) = caller(); 15851 print( 15852"UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n" 15853 ); 15854 }; 15855 } 15856 15857 # shouldn't happen, but not a critical error 15858 else { 15859 FORMATTER_DEBUG_FLAG_UNDOBP && do { 15860 my ( $a, $b, $c ) = caller(); 15861 print( 15862"Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go" 15863 ); 15864 }; 15865 } 15866 } 15867} 15868 15869{ # begin recombine_breakpoints 15870 15871 my %is_amp_amp; 15872 my %is_ternary; 15873 my %is_math_op; 15874 15875 BEGIN { 15876 15877 @_ = qw( && || ); 15878 @is_amp_amp{@_} = (1) x scalar(@_); 15879 15880 @_ = qw( ? : ); 15881 @is_ternary{@_} = (1) x scalar(@_); 15882 15883 @_ = qw( + - * / ); 15884 @is_math_op{@_} = (1) x scalar(@_); 15885 } 15886 15887 sub recombine_breakpoints { 15888 15889 # sub set_continuation_breaks is very liberal in setting line breaks 15890 # for long lines, always setting breaks at good breakpoints, even 15891 # when that creates small lines. Occasionally small line fragments 15892 # are produced which would look better if they were combined. 15893 # That's the task of this routine, recombine_breakpoints. 15894 # 15895 # $ri_beg = ref to array of BEGinning indexes of each line 15896 # $ri_end = ref to array of ENDing indexes of each line 15897 my ( $ri_beg, $ri_end ) = @_; 15898 15899 my $more_to_do = 1; 15900 15901 # We keep looping over all of the lines of this batch 15902 # until there are no more possible recombinations 15903 my $nmax_last = @$ri_end; 15904 while ($more_to_do) { 15905 my $n_best = 0; 15906 my $bs_best; 15907 my $n; 15908 my $nmax = @$ri_end - 1; 15909 15910 # safety check for infinite loop 15911 unless ( $nmax < $nmax_last ) { 15912 15913 # shouldn't happen because splice below decreases nmax on each pass: 15914 # but i get paranoid sometimes 15915 die "Program bug-infinite loop in recombine breakpoints\n"; 15916 } 15917 $nmax_last = $nmax; 15918 $more_to_do = 0; 15919 my $previous_outdentable_closing_paren; 15920 my $leading_amp_count = 0; 15921 my $this_line_is_semicolon_terminated; 15922 15923 # loop over all remaining lines in this batch 15924 for $n ( 1 .. $nmax ) { 15925 15926 #---------------------------------------------------------- 15927 # If we join the current pair of lines, 15928 # line $n-1 will become the left part of the joined line 15929 # line $n will become the right part of the joined line 15930 # 15931 # Here are Indexes of the endpoint tokens of the two lines: 15932 # 15933 # -----line $n-1--- | -----line $n----- 15934 # $ibeg_1 $iend_1 | $ibeg_2 $iend_2 15935 # ^ 15936 # | 15937 # We want to decide if we should remove the line break 15938 # betwen the tokens at $iend_1 and $ibeg_2 15939 # 15940 # We will apply a number of ad-hoc tests to see if joining 15941 # here will look ok. The code will just issue a 'next' 15942 # command if the join doesn't look good. If we get through 15943 # the gauntlet of tests, the lines will be recombined. 15944 #---------------------------------------------------------- 15945 # 15946 # beginning and ending tokens of the lines we are working on 15947 my $ibeg_1 = $$ri_beg[ $n - 1 ]; 15948 my $iend_1 = $$ri_end[ $n - 1 ]; 15949 my $iend_2 = $$ri_end[$n]; 15950 my $ibeg_2 = $$ri_beg[$n]; 15951 15952 my $ibeg_nmax = $$ri_beg[$nmax]; 15953 15954 # some beginning indexes of other lines, which may not exist 15955 my $ibeg_0 = $n > 1 ? $$ri_beg[ $n - 2 ] : -1; 15956 my $ibeg_3 = $n < $nmax ? $$ri_beg[ $n + 1 ] : -1; 15957 my $ibeg_4 = $n + 2 <= $nmax ? $$ri_beg[ $n + 2 ] : -1; 15958 15959 my $bs_tweak = 0; 15960 15961 #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] - 15962 # $nesting_depth_to_go[$ibeg_1] ); 15963 15964##print "RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$types_to_go[$ibeg_1] =$tokens_to_go[$ibeg_1] next_type=$types_to_go[$ibeg_2] next_tok=$tokens_to_go[$ibeg_2]\n"; 15965 15966 # If line $n is the last line, we set some flags and 15967 # do any special checks for it 15968 if ( $n == $nmax ) { 15969 15970 # a terminal '{' should stay where it is 15971 next if $types_to_go[$ibeg_2] eq '{'; 15972 15973 # set flag if statement $n ends in ';' 15974 $this_line_is_semicolon_terminated = 15975 $types_to_go[$iend_2] eq ';' 15976 15977 # with possible side comment 15978 || ( $types_to_go[$iend_2] eq '#' 15979 && $iend_2 - $ibeg_2 >= 2 15980 && $types_to_go[ $iend_2 - 2 ] eq ';' 15981 && $types_to_go[ $iend_2 - 1 ] eq 'b' ); 15982 } 15983 15984 #---------------------------------------------------------- 15985 # Section 1: examine token at $iend_1 (right end of first line 15986 # of pair) 15987 #---------------------------------------------------------- 15988 15989 # an isolated '}' may join with a ';' terminated segment 15990 if ( $types_to_go[$iend_1] eq '}' ) { 15991 15992 # Check for cases where combining a semicolon terminated 15993 # statement with a previous isolated closing paren will 15994 # allow the combined line to be outdented. This is 15995 # generally a good move. For example, we can join up 15996 # the last two lines here: 15997 # ( 15998 # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev, 15999 # $size, $atime, $mtime, $ctime, $blksize, $blocks 16000 # ) 16001 # = stat($file); 16002 # 16003 # to get: 16004 # ( 16005 # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev, 16006 # $size, $atime, $mtime, $ctime, $blksize, $blocks 16007 # ) = stat($file); 16008 # 16009 # which makes the parens line up. 16010 # 16011 # Another example, from Joe Matarazzo, probably looks best 16012 # with the 'or' clause appended to the trailing paren: 16013 # $self->some_method( 16014 # PARAM1 => 'foo', 16015 # PARAM2 => 'bar' 16016 # ) or die "Some_method didn't work"; 16017 # 16018 $previous_outdentable_closing_paren = 16019 $this_line_is_semicolon_terminated # ends in ';' 16020 && $ibeg_1 == $iend_1 # only one token on last line 16021 && $tokens_to_go[$iend_1] eq 16022 ')' # must be structural paren 16023 16024 # only &&, ||, and : if no others seen 16025 # (but note: our count made below could be wrong 16026 # due to intervening comments) 16027 && ( $leading_amp_count == 0 16028 || $types_to_go[$ibeg_2] !~ /^(:|\&\&|\|\|)$/ ) 16029 16030 # but leading colons probably line up with with a 16031 # previous colon or question (count could be wrong). 16032 && $types_to_go[$ibeg_2] ne ':' 16033 16034 # only one step in depth allowed. this line must not 16035 # begin with a ')' itself. 16036 && ( $nesting_depth_to_go[$iend_1] == 16037 $nesting_depth_to_go[$iend_2] + 1 ); 16038 16039 # YVES patch 2 of 2: 16040 # Allow cuddled eval chains, like this: 16041 # eval { 16042 # #STUFF; 16043 # 1; # return true 16044 # } or do { 16045 # #handle error 16046 # }; 16047 # This patch works together with a patch in 16048 # setting adjusted indentation (where the closing eval 16049 # brace is outdented if possible). 16050 # The problem is that an 'eval' block has continuation 16051 # indentation and it looks better to undo it in some 16052 # cases. If we do not use this patch we would get: 16053 # eval { 16054 # #STUFF; 16055 # 1; # return true 16056 # } 16057 # or do { 16058 # #handle error 16059 # }; 16060 # The alternative, for uncuddled style, is to create 16061 # a patch in set_adjusted_indentation which undoes 16062 # the indentation of a leading line like 'or do {'. 16063 # This doesn't work well with -icb through 16064 if ( 16065 $block_type_to_go[$iend_1] eq 'eval' 16066 && !$rOpts->{'line-up-parentheses'} 16067 && !$rOpts->{'indent-closing-brace'} 16068 && $tokens_to_go[$iend_2] eq '{' 16069 && ( 16070 ( $types_to_go[$ibeg_2] =~ /^(|\&\&|\|\|)$/ ) 16071 || ( $types_to_go[$ibeg_2] eq 'k' 16072 && $is_and_or{ $tokens_to_go[$ibeg_2] } ) 16073 || $is_if_unless{ $tokens_to_go[$ibeg_2] } 16074 ) 16075 ) 16076 { 16077 $previous_outdentable_closing_paren ||= 1; 16078 } 16079 16080 next 16081 unless ( 16082 $previous_outdentable_closing_paren 16083 16084 # handle '.' and '?' specially below 16085 || ( $types_to_go[$ibeg_2] =~ /^[\.\?]$/ ) 16086 ); 16087 } 16088 16089 # YVES 16090 # honor breaks at opening brace 16091 # Added to prevent recombining something like this: 16092 # } || eval { package main; 16093 elsif ( $types_to_go[$iend_1] eq '{' ) { 16094 next if $forced_breakpoint_to_go[$iend_1]; 16095 } 16096 16097 # do not recombine lines with ending &&, ||, 16098 elsif ( $is_amp_amp{ $types_to_go[$iend_1] } ) { 16099 next unless $want_break_before{ $types_to_go[$iend_1] }; 16100 } 16101 16102 # keep a terminal colon 16103 elsif ( $types_to_go[$iend_1] eq ':' ) { 16104 next unless $want_break_before{ $types_to_go[$iend_1] }; 16105 } 16106 16107 # Identify and recombine a broken ?/: chain 16108 elsif ( $types_to_go[$iend_1] eq '?' ) { 16109 16110 # Do not recombine different levels 16111 next 16112 if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] ); 16113 16114 # do not recombine unless next line ends in : 16115 next unless $types_to_go[$iend_2] eq ':'; 16116 } 16117 16118 # for lines ending in a comma... 16119 elsif ( $types_to_go[$iend_1] eq ',' ) { 16120 16121 # Do not recombine at comma which is following the 16122 # input bias. 16123 # TODO: might be best to make a special flag 16124 next if ( $old_breakpoint_to_go[$iend_1] ); 16125 16126 # an isolated '},' may join with an identifier + ';' 16127 # this is useful for the class of a 'bless' statement (bless.t) 16128 if ( $types_to_go[$ibeg_1] eq '}' 16129 && $types_to_go[$ibeg_2] eq 'i' ) 16130 { 16131 next 16132 unless ( ( $ibeg_1 == ( $iend_1 - 1 ) ) 16133 && ( $iend_2 == ( $ibeg_2 + 1 ) ) 16134 && $this_line_is_semicolon_terminated ); 16135 16136 # override breakpoint 16137 $forced_breakpoint_to_go[$iend_1] = 0; 16138 } 16139 16140 # but otherwise .. 16141 else { 16142 16143 # do not recombine after a comma unless this will leave 16144 # just 1 more line 16145 next unless ( $n + 1 >= $nmax ); 16146 16147 # do not recombine if there is a change in indentation depth 16148 next 16149 if ( 16150 $levels_to_go[$iend_1] != $levels_to_go[$iend_2] ); 16151 16152 # do not recombine a "complex expression" after a 16153 # comma. "complex" means no parens. 16154 my $saw_paren; 16155 foreach my $ii ( $ibeg_2 .. $iend_2 ) { 16156 if ( $tokens_to_go[$ii] eq '(' ) { 16157 $saw_paren = 1; 16158 last; 16159 } 16160 } 16161 next if $saw_paren; 16162 } 16163 } 16164 16165 # opening paren.. 16166 elsif ( $types_to_go[$iend_1] eq '(' ) { 16167 16168 # No longer doing this 16169 } 16170 16171 elsif ( $types_to_go[$iend_1] eq ')' ) { 16172 16173 # No longer doing this 16174 } 16175 16176 # keep a terminal for-semicolon 16177 elsif ( $types_to_go[$iend_1] eq 'f' ) { 16178 next; 16179 } 16180 16181 # if '=' at end of line ... 16182 elsif ( $is_assignment{ $types_to_go[$iend_1] } ) { 16183 16184 my $is_short_quote = 16185 ( $types_to_go[$ibeg_2] eq 'Q' 16186 && $ibeg_2 == $iend_2 16187 && length( $tokens_to_go[$ibeg_2] ) < 16188 $rOpts_short_concatenation_item_length ); 16189 my $is_ternary = 16190 ( $types_to_go[$ibeg_1] eq '?' 16191 && ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) ); 16192 16193 # always join an isolated '=', a short quote, or if this 16194 # will put ?/: at start of adjacent lines 16195 if ( $ibeg_1 != $iend_1 16196 && !$is_short_quote 16197 && !$is_ternary ) 16198 { 16199 next 16200 unless ( 16201 ( 16202 16203 # unless we can reduce this to two lines 16204 $nmax < $n + 2 16205 16206 # or three lines, the last with a leading semicolon 16207 || ( $nmax == $n + 2 16208 && $types_to_go[$ibeg_nmax] eq ';' ) 16209 16210 # or the next line ends with a here doc 16211 || $types_to_go[$iend_2] eq 'h' 16212 16213 # or the next line ends in an open paren or brace 16214 # and the break hasn't been forced [dima.t] 16215 || ( !$forced_breakpoint_to_go[$iend_1] 16216 && $types_to_go[$iend_2] eq '{' ) 16217 ) 16218 16219 # do not recombine if the two lines might align well 16220 # this is a very approximate test for this 16221 && ( $ibeg_3 >= 0 16222 && $types_to_go[$ibeg_2] ne 16223 $types_to_go[$ibeg_3] ) 16224 ); 16225 16226 # -lp users often prefer this: 16227 # my $title = function($env, $env, $sysarea, 16228 # "bubba Borrower Entry"); 16229 # so we will recombine if -lp is used we have ending 16230 # comma 16231 if ( !$rOpts_line_up_parentheses 16232 || $types_to_go[$iend_2] ne ',' ) 16233 { 16234 16235 # otherwise, scan the rhs line up to last token for 16236 # complexity. Note that we are not counting the last 16237 # token in case it is an opening paren. 16238 my $tv = 0; 16239 my $depth = $nesting_depth_to_go[$ibeg_2]; 16240 for ( my $i = $ibeg_2 + 1 ; $i < $iend_2 ; $i++ ) { 16241 if ( $nesting_depth_to_go[$i] != $depth ) { 16242 $tv++; 16243 last if ( $tv > 1 ); 16244 } 16245 $depth = $nesting_depth_to_go[$i]; 16246 } 16247 16248 # ok to recombine if no level changes before last token 16249 if ( $tv > 0 ) { 16250 16251 # otherwise, do not recombine if more than two 16252 # level changes. 16253 next if ( $tv > 1 ); 16254 16255 # check total complexity of the two adjacent lines 16256 # that will occur if we do this join 16257 my $istop = 16258 ( $n < $nmax ) ? $$ri_end[ $n + 1 ] : $iend_2; 16259 for ( my $i = $iend_2 ; $i <= $istop ; $i++ ) { 16260 if ( $nesting_depth_to_go[$i] != $depth ) { 16261 $tv++; 16262 last if ( $tv > 2 ); 16263 } 16264 $depth = $nesting_depth_to_go[$i]; 16265 } 16266 16267 # do not recombine if total is more than 2 level changes 16268 next if ( $tv > 2 ); 16269 } 16270 } 16271 } 16272 16273 unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) { 16274 $forced_breakpoint_to_go[$iend_1] = 0; 16275 } 16276 } 16277 16278 # for keywords.. 16279 elsif ( $types_to_go[$iend_1] eq 'k' ) { 16280 16281 # make major control keywords stand out 16282 # (recombine.t) 16283 next 16284 if ( 16285 16286 #/^(last|next|redo|return)$/ 16287 $is_last_next_redo_return{ $tokens_to_go[$iend_1] } 16288 16289 # but only if followed by multiple lines 16290 && $n < $nmax 16291 ); 16292 16293 if ( $is_and_or{ $tokens_to_go[$iend_1] } ) { 16294 next 16295 unless $want_break_before{ $tokens_to_go[$iend_1] }; 16296 } 16297 } 16298 16299 # handle trailing + - * / 16300 elsif ( $is_math_op{ $types_to_go[$iend_1] } ) { 16301 16302 # combine lines if next line has single number 16303 # or a short term followed by same operator 16304 my $i_next_nonblank = $ibeg_2; 16305 my $i_next_next = $i_next_nonblank + 1; 16306 $i_next_next++ if ( $types_to_go[$i_next_next] eq 'b' ); 16307 my $number_follows = $types_to_go[$i_next_nonblank] eq 'n' 16308 && ( 16309 $i_next_nonblank == $iend_2 16310 || ( $i_next_next == $iend_2 16311 && $is_math_op{ $types_to_go[$i_next_next] } ) 16312 || $types_to_go[$i_next_next] eq ';' 16313 ); 16314 16315 # find token before last operator of previous line 16316 my $iend_1_minus = $iend_1; 16317 $iend_1_minus-- 16318 if ( $iend_1_minus > $ibeg_1 ); 16319 $iend_1_minus-- 16320 if ( $types_to_go[$iend_1_minus] eq 'b' 16321 && $iend_1_minus > $ibeg_1 ); 16322 16323 my $short_term_follows = 16324 ( $types_to_go[$iend_2] eq $types_to_go[$iend_1] 16325 && $types_to_go[$iend_1_minus] =~ /^[in]$/ 16326 && $iend_2 <= $ibeg_2 + 2 16327 && length( $tokens_to_go[$ibeg_2] ) < 16328 $rOpts_short_concatenation_item_length ); 16329 16330 next 16331 unless ( $number_follows || $short_term_follows ); 16332 } 16333 16334 #---------------------------------------------------------- 16335 # Section 2: Now examine token at $ibeg_2 (left end of second 16336 # line of pair) 16337 #---------------------------------------------------------- 16338 16339 # join lines identified above as capable of 16340 # causing an outdented line with leading closing paren 16341 if ($previous_outdentable_closing_paren) { 16342 $forced_breakpoint_to_go[$iend_1] = 0; 16343 } 16344 16345 # do not recombine lines with leading : 16346 elsif ( $types_to_go[$ibeg_2] eq ':' ) { 16347 $leading_amp_count++; 16348 next if $want_break_before{ $types_to_go[$ibeg_2] }; 16349 } 16350 16351 # handle lines with leading &&, || 16352 elsif ( $is_amp_amp{ $types_to_go[$ibeg_2] } ) { 16353 16354 $leading_amp_count++; 16355 16356 # ok to recombine if it follows a ? or : 16357 # and is followed by an open paren.. 16358 my $ok = 16359 ( $is_ternary{ $types_to_go[$ibeg_1] } 16360 && $tokens_to_go[$iend_2] eq '(' ) 16361 16362 # or is followed by a ? or : at same depth 16363 # 16364 # We are looking for something like this. We can 16365 # recombine the && line with the line above to make the 16366 # structure more clear: 16367 # return 16368 # exists $G->{Attr}->{V} 16369 # && exists $G->{Attr}->{V}->{$u} 16370 # ? %{ $G->{Attr}->{V}->{$u} } 16371 # : (); 16372 # 16373 # We should probably leave something like this alone: 16374 # return 16375 # exists $G->{Attr}->{E} 16376 # && exists $G->{Attr}->{E}->{$u} 16377 # && exists $G->{Attr}->{E}->{$u}->{$v} 16378 # ? %{ $G->{Attr}->{E}->{$u}->{$v} } 16379 # : (); 16380 # so that we either have all of the &&'s (or ||'s) 16381 # on one line, as in the first example, or break at 16382 # each one as in the second example. However, it 16383 # sometimes makes things worse to check for this because 16384 # it prevents multiple recombinations. So this is not done. 16385 || ( $ibeg_3 >= 0 16386 && $is_ternary{ $types_to_go[$ibeg_3] } 16387 && $nesting_depth_to_go[$ibeg_3] == 16388 $nesting_depth_to_go[$ibeg_2] ); 16389 16390 next if !$ok && $want_break_before{ $types_to_go[$ibeg_2] }; 16391 $forced_breakpoint_to_go[$iend_1] = 0; 16392 16393 # tweak the bond strength to give this joint priority 16394 # over ? and : 16395 $bs_tweak = 0.25; 16396 } 16397 16398 # Identify and recombine a broken ?/: chain 16399 elsif ( $types_to_go[$ibeg_2] eq '?' ) { 16400 16401 # Do not recombine different levels 16402 my $lev = $levels_to_go[$ibeg_2]; 16403 next if ( $lev ne $levels_to_go[$ibeg_1] ); 16404 16405 # Do not recombine a '?' if either next line or 16406 # previous line does not start with a ':'. The reasons 16407 # are that (1) no alignment of the ? will be possible 16408 # and (2) the expression is somewhat complex, so the 16409 # '?' is harder to see in the interior of the line. 16410 my $follows_colon = 16411 $ibeg_1 >= 0 && $types_to_go[$ibeg_1] eq ':'; 16412 my $precedes_colon = 16413 $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':'; 16414 next unless ( $follows_colon || $precedes_colon ); 16415 16416 # we will always combining a ? line following a : line 16417 if ( !$follows_colon ) { 16418 16419 # ...otherwise recombine only if it looks like a chain. 16420 # we will just look at a few nearby lines to see if 16421 # this looks like a chain. 16422 my $local_count = 0; 16423 foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) { 16424 $local_count++ 16425 if $ii >= 0 16426 && $types_to_go[$ii] eq ':' 16427 && $levels_to_go[$ii] == $lev; 16428 } 16429 next unless ( $local_count > 1 ); 16430 } 16431 $forced_breakpoint_to_go[$iend_1] = 0; 16432 } 16433 16434 # do not recombine lines with leading '.' 16435 elsif ( $types_to_go[$ibeg_2] =~ /^(\.)$/ ) { 16436 my $i_next_nonblank = $ibeg_2 + 1; 16437 if ( $types_to_go[$i_next_nonblank] eq 'b' ) { 16438 $i_next_nonblank++; 16439 } 16440 16441 next 16442 unless ( 16443 16444 # ... unless there is just one and we can reduce 16445 # this to two lines if we do. For example, this 16446 # 16447 # 16448 # $bodyA .= 16449 # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;' 16450 # 16451 # looks better than this: 16452 # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;' 16453 # . '$args .= $pat;' 16454 16455 ( 16456 $n == 2 16457 && $n == $nmax 16458 && $types_to_go[$ibeg_1] ne $types_to_go[$ibeg_2] 16459 ) 16460 16461 # ... or this would strand a short quote , like this 16462 # . "some long qoute" 16463 # . "\n"; 16464 || ( $types_to_go[$i_next_nonblank] eq 'Q' 16465 && $i_next_nonblank >= $iend_2 - 1 16466 && length( $tokens_to_go[$i_next_nonblank] ) < 16467 $rOpts_short_concatenation_item_length ) 16468 ); 16469 } 16470 16471 # handle leading keyword.. 16472 elsif ( $types_to_go[$ibeg_2] eq 'k' ) { 16473 16474 # handle leading "or" 16475 if ( $tokens_to_go[$ibeg_2] eq 'or' ) { 16476 next 16477 unless ( 16478 $this_line_is_semicolon_terminated 16479 && ( 16480 16481 # following 'if' or 'unless' or 'or' 16482 $types_to_go[$ibeg_1] eq 'k' 16483 && $is_if_unless{ $tokens_to_go[$ibeg_1] } 16484 16485 # important: only combine a very simple or 16486 # statement because the step below may have 16487 # combined a trailing 'and' with this or, 16488 # and we do not want to then combine 16489 # everything together 16490 && ( $iend_2 - $ibeg_2 <= 7 ) 16491 ) 16492 ); 16493 } 16494 16495 # handle leading 'and' 16496 elsif ( $tokens_to_go[$ibeg_2] eq 'and' ) { 16497 16498 # Decide if we will combine a single terminal 'and' 16499 # after an 'if' or 'unless'. 16500 16501 # This looks best with the 'and' on the same 16502 # line as the 'if': 16503 # 16504 # $a = 1 16505 # if $seconds and $nu < 2; 16506 # 16507 # But this looks better as shown: 16508 # 16509 # $a = 1 16510 # if !$this->{Parents}{$_} 16511 # or $this->{Parents}{$_} eq $_; 16512 # 16513 next 16514 unless ( 16515 $this_line_is_semicolon_terminated 16516 && ( 16517 16518 # following 'if' or 'unless' or 'or' 16519 $types_to_go[$ibeg_1] eq 'k' 16520 && ( $is_if_unless{ $tokens_to_go[$ibeg_1] } 16521 || $tokens_to_go[$ibeg_1] eq 'or' ) 16522 ) 16523 ); 16524 } 16525 16526 # handle leading "if" and "unless" 16527 elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) { 16528 16529 # FIXME: This is still experimental..may not be too useful 16530 next 16531 unless ( 16532 $this_line_is_semicolon_terminated 16533 16534 # previous line begins with 'and' or 'or' 16535 && $types_to_go[$ibeg_1] eq 'k' 16536 && $is_and_or{ $tokens_to_go[$ibeg_1] } 16537 16538 ); 16539 } 16540 16541 # handle all other leading keywords 16542 else { 16543 16544 # keywords look best at start of lines, 16545 # but combine things like "1 while" 16546 unless ( $is_assignment{ $types_to_go[$iend_1] } ) { 16547 next 16548 if ( ( $types_to_go[$iend_1] ne 'k' ) 16549 && ( $tokens_to_go[$ibeg_2] ne 'while' ) ); 16550 } 16551 } 16552 } 16553 16554 # similar treatment of && and || as above for 'and' and 'or': 16555 # NOTE: This block of code is currently bypassed because 16556 # of a previous block but is retained for possible future use. 16557 elsif ( $is_amp_amp{ $types_to_go[$ibeg_2] } ) { 16558 16559 # maybe looking at something like: 16560 # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i; 16561 16562 next 16563 unless ( 16564 $this_line_is_semicolon_terminated 16565 16566 # previous line begins with an 'if' or 'unless' keyword 16567 && $types_to_go[$ibeg_1] eq 'k' 16568 && $is_if_unless{ $tokens_to_go[$ibeg_1] } 16569 16570 ); 16571 } 16572 16573 # handle leading + - * / 16574 elsif ( $is_math_op{ $types_to_go[$ibeg_2] } ) { 16575 my $i_next_nonblank = $ibeg_2 + 1; 16576 if ( $types_to_go[$i_next_nonblank] eq 'b' ) { 16577 $i_next_nonblank++; 16578 } 16579 16580 my $i_next_next = $i_next_nonblank + 1; 16581 $i_next_next++ if ( $types_to_go[$i_next_next] eq 'b' ); 16582 16583 my $is_number = ( 16584 $types_to_go[$i_next_nonblank] eq 'n' 16585 && ( $i_next_nonblank >= $iend_2 - 1 16586 || $types_to_go[$i_next_next] eq ';' ) 16587 ); 16588 16589 my $iend_1_nonblank = 16590 $types_to_go[$iend_1] eq 'b' ? $iend_1 - 1 : $iend_1; 16591 my $iend_2_nonblank = 16592 $types_to_go[$iend_2] eq 'b' ? $iend_2 - 1 : $iend_2; 16593 16594 my $is_short_term = 16595 ( $types_to_go[$ibeg_2] eq $types_to_go[$ibeg_1] 16596 && $types_to_go[$iend_2_nonblank] =~ /^[in]$/ 16597 && $types_to_go[$iend_1_nonblank] =~ /^[in]$/ 16598 && $iend_2_nonblank <= $ibeg_2 + 2 16599 && length( $tokens_to_go[$iend_2_nonblank] ) < 16600 $rOpts_short_concatenation_item_length ); 16601 16602 # Combine these lines if this line is a single 16603 # number, or if it is a short term with same 16604 # operator as the previous line. For example, in 16605 # the following code we will combine all of the 16606 # short terms $A, $B, $C, $D, $E, $F, together 16607 # instead of leaving them one per line: 16608 # my $time = 16609 # $A * $B * $C * $D * $E * $F * 16610 # ( 2. * $eps * $sigma * $area ) * 16611 # ( 1. / $tcold**3 - 1. / $thot**3 ); 16612 # This can be important in math-intensive code. 16613 next 16614 unless ( 16615 $is_number 16616 || $is_short_term 16617 16618 # or if we can reduce this to two lines if we do. 16619 || ( $n == 2 16620 && $n == $nmax 16621 && $types_to_go[$ibeg_1] ne $types_to_go[$ibeg_2] ) 16622 ); 16623 } 16624 16625 # handle line with leading = or similar 16626 elsif ( $is_assignment{ $types_to_go[$ibeg_2] } ) { 16627 next unless $n == 1; 16628 next 16629 unless ( 16630 16631 # unless we can reduce this to two lines 16632 $nmax == 2 16633 16634 # or three lines, the last with a leading semicolon 16635 || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' ) 16636 16637 # or the next line ends with a here doc 16638 || $types_to_go[$iend_2] eq 'h' 16639 ); 16640 } 16641 16642 #---------------------------------------------------------- 16643 # Section 3: 16644 # Combine the lines if we arrive here and it is possible 16645 #---------------------------------------------------------- 16646 16647 # honor hard breakpoints 16648 next if ( $forced_breakpoint_to_go[$iend_1] > 0 ); 16649 16650 my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak; 16651 16652 # combined line cannot be too long 16653 next 16654 if excess_line_length( $ibeg_1, $iend_2 ) > 0; 16655 16656 # do not recombine if we would skip in indentation levels 16657 if ( $n < $nmax ) { 16658 my $if_next = $$ri_beg[ $n + 1 ]; 16659 next 16660 if ( 16661 $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2] 16662 && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next] 16663 16664 # but an isolated 'if (' is undesirable 16665 && !( 16666 $n == 1 16667 && $iend_1 - $ibeg_1 <= 2 16668 && $types_to_go[$ibeg_1] eq 'k' 16669 && $tokens_to_go[$ibeg_1] eq 'if' 16670 && $tokens_to_go[$iend_1] ne '(' 16671 ) 16672 ); 16673 } 16674 16675 # honor no-break's 16676 next if ( $bs == NO_BREAK ); 16677 16678 # remember the pair with the greatest bond strength 16679 if ( !$n_best ) { 16680 $n_best = $n; 16681 $bs_best = $bs; 16682 } 16683 else { 16684 16685 if ( $bs > $bs_best ) { 16686 $n_best = $n; 16687 $bs_best = $bs; 16688 } 16689 } 16690 } 16691 16692 # recombine the pair with the greatest bond strength 16693 if ($n_best) { 16694 splice @$ri_beg, $n_best, 1; 16695 splice @$ri_end, $n_best - 1, 1; 16696 16697 # keep going if we are still making progress 16698 $more_to_do++; 16699 } 16700 } 16701 return ( $ri_beg, $ri_end ); 16702 } 16703} # end recombine_breakpoints 16704 16705sub break_all_chain_tokens { 16706 16707 # scan the current breakpoints looking for breaks at certain "chain 16708 # operators" (. : && || + etc) which often occur repeatedly in a long 16709 # statement. If we see a break at any one, break at all similar tokens 16710 # within the same container. 16711 # 16712 my ( $ri_left, $ri_right ) = @_; 16713 16714 my %saw_chain_type; 16715 my %left_chain_type; 16716 my %right_chain_type; 16717 my %interior_chain_type; 16718 my $nmax = @$ri_right - 1; 16719 16720 # scan the left and right end tokens of all lines 16721 my $count = 0; 16722 for my $n ( 0 .. $nmax ) { 16723 my $il = $$ri_left[$n]; 16724 my $ir = $$ri_right[$n]; 16725 my $typel = $types_to_go[$il]; 16726 my $typer = $types_to_go[$ir]; 16727 $typel = '+' if ( $typel eq '-' ); # treat + and - the same 16728 $typer = '+' if ( $typer eq '-' ); 16729 $typel = '*' if ( $typel eq '/' ); # treat * and / the same 16730 $typer = '*' if ( $typer eq '/' ); 16731 my $tokenl = $tokens_to_go[$il]; 16732 my $tokenr = $tokens_to_go[$ir]; 16733 16734 if ( $is_chain_operator{$tokenl} && $want_break_before{$typel} ) { 16735 next if ( $typel eq '?' ); 16736 push @{ $left_chain_type{$typel} }, $il; 16737 $saw_chain_type{$typel} = 1; 16738 $count++; 16739 } 16740 if ( $is_chain_operator{$tokenr} && !$want_break_before{$typer} ) { 16741 next if ( $typer eq '?' ); 16742 push @{ $right_chain_type{$typer} }, $ir; 16743 $saw_chain_type{$typer} = 1; 16744 $count++; 16745 } 16746 } 16747 return unless $count; 16748 16749 # now look for any interior tokens of the same types 16750 $count = 0; 16751 for my $n ( 0 .. $nmax ) { 16752 my $il = $$ri_left[$n]; 16753 my $ir = $$ri_right[$n]; 16754 for ( my $i = $il + 1 ; $i < $ir ; $i++ ) { 16755 my $type = $types_to_go[$i]; 16756 $type = '+' if ( $type eq '-' ); 16757 $type = '*' if ( $type eq '/' ); 16758 if ( $saw_chain_type{$type} ) { 16759 push @{ $interior_chain_type{$type} }, $i; 16760 $count++; 16761 } 16762 } 16763 } 16764 return unless $count; 16765 16766 # now make a list of all new break points 16767 my @insert_list; 16768 16769 # loop over all chain types 16770 foreach my $type ( keys %saw_chain_type ) { 16771 16772 # quit if just ONE continuation line with leading . For example-- 16773 # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{' 16774 # . $contents; 16775 last if ( $nmax == 1 && $type =~ /^[\.\+]$/ ); 16776 16777 # loop over all interior chain tokens 16778 foreach my $itest ( @{ $interior_chain_type{$type} } ) { 16779 16780 # loop over all left end tokens of same type 16781 if ( $left_chain_type{$type} ) { 16782 next if $nobreak_to_go[ $itest - 1 ]; 16783 foreach my $i ( @{ $left_chain_type{$type} } ) { 16784 next unless in_same_container( $i, $itest ); 16785 push @insert_list, $itest - 1; 16786 16787 # Break at matching ? if this : is at a different level. 16788 # For example, the ? before $THRf_DEAD in the following 16789 # should get a break if its : gets a break. 16790 # 16791 # my $flags = 16792 # ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE 16793 # : ( $_ & 4 ) ? $THRf_R_DETACHED 16794 # : $THRf_R_JOINABLE; 16795 if ( $type eq ':' 16796 && $levels_to_go[$i] != $levels_to_go[$itest] ) 16797 { 16798 my $i_question = $mate_index_to_go[$itest]; 16799 if ( $i_question > 0 ) { 16800 push @insert_list, $i_question - 1; 16801 } 16802 } 16803 last; 16804 } 16805 } 16806 16807 # loop over all right end tokens of same type 16808 if ( $right_chain_type{$type} ) { 16809 next if $nobreak_to_go[$itest]; 16810 foreach my $i ( @{ $right_chain_type{$type} } ) { 16811 next unless in_same_container( $i, $itest ); 16812 push @insert_list, $itest; 16813 16814 # break at matching ? if this : is at a different level 16815 if ( $type eq ':' 16816 && $levels_to_go[$i] != $levels_to_go[$itest] ) 16817 { 16818 my $i_question = $mate_index_to_go[$itest]; 16819 if ( $i_question >= 0 ) { 16820 push @insert_list, $i_question; 16821 } 16822 } 16823 last; 16824 } 16825 } 16826 } 16827 } 16828 16829 # insert any new break points 16830 if (@insert_list) { 16831 insert_additional_breaks( \@insert_list, $ri_left, $ri_right ); 16832 } 16833} 16834 16835sub break_equals { 16836 16837 # Look for assignment operators that could use a breakpoint. 16838 # For example, in the following snippet 16839 # 16840 # $HOME = $ENV{HOME} 16841 # || $ENV{LOGDIR} 16842 # || $pw[7] 16843 # || die "no home directory for user $<"; 16844 # 16845 # we could break at the = to get this, which is a little nicer: 16846 # $HOME = 16847 # $ENV{HOME} 16848 # || $ENV{LOGDIR} 16849 # || $pw[7] 16850 # || die "no home directory for user $<"; 16851 # 16852 # The logic here follows the logic in set_logical_padding, which 16853 # will add the padding in the second line to improve alignment. 16854 # 16855 my ( $ri_left, $ri_right ) = @_; 16856 my $nmax = @$ri_right - 1; 16857 return unless ( $nmax >= 2 ); 16858 16859 # scan the left ends of first two lines 16860 my $tokbeg = ""; 16861 my $depth_beg; 16862 for my $n ( 1 .. 2 ) { 16863 my $il = $$ri_left[$n]; 16864 my $typel = $types_to_go[$il]; 16865 my $tokenl = $tokens_to_go[$il]; 16866 16867 my $has_leading_op = ( $tokenl =~ /^\w/ ) 16868 ? $is_chain_operator{$tokenl} # + - * / : ? && || 16869 : $is_chain_operator{$typel}; # and, or 16870 return unless ($has_leading_op); 16871 if ( $n > 1 ) { 16872 return 16873 unless ( $tokenl eq $tokbeg 16874 && $nesting_depth_to_go[$il] eq $depth_beg ); 16875 } 16876 $tokbeg = $tokenl; 16877 $depth_beg = $nesting_depth_to_go[$il]; 16878 } 16879 16880 # now look for any interior tokens of the same types 16881 my $il = $$ri_left[0]; 16882 my $ir = $$ri_right[0]; 16883 16884 # now make a list of all new break points 16885 my @insert_list; 16886 for ( my $i = $ir - 1 ; $i > $il ; $i-- ) { 16887 my $type = $types_to_go[$i]; 16888 if ( $is_assignment{$type} 16889 && $nesting_depth_to_go[$i] eq $depth_beg ) 16890 { 16891 if ( $want_break_before{$type} ) { 16892 push @insert_list, $i - 1; 16893 } 16894 else { 16895 push @insert_list, $i; 16896 } 16897 } 16898 } 16899 16900 # Break after a 'return' followed by a chain of operators 16901 # return ( $^O !~ /win32|dos/i ) 16902 # && ( $^O ne 'VMS' ) 16903 # && ( $^O ne 'OS2' ) 16904 # && ( $^O ne 'MacOS' ); 16905 # To give: 16906 # return 16907 # ( $^O !~ /win32|dos/i ) 16908 # && ( $^O ne 'VMS' ) 16909 # && ( $^O ne 'OS2' ) 16910 # && ( $^O ne 'MacOS' ); 16911 my $i = 0; 16912 if ( $types_to_go[$i] eq 'k' 16913 && $tokens_to_go[$i] eq 'return' 16914 && $ir > $il 16915 && $nesting_depth_to_go[$i] eq $depth_beg ) 16916 { 16917 push @insert_list, $i; 16918 } 16919 16920 return unless (@insert_list); 16921 16922 # One final check... 16923 # scan second and thrid lines and be sure there are no assignments 16924 # we want to avoid breaking at an = to make something like this: 16925 # unless ( $icon = 16926 # $html_icons{"$type-$state"} 16927 # or $icon = $html_icons{$type} 16928 # or $icon = $html_icons{$state} ) 16929 for my $n ( 1 .. 2 ) { 16930 my $il = $$ri_left[$n]; 16931 my $ir = $$ri_right[$n]; 16932 for ( my $i = $il + 1 ; $i <= $ir ; $i++ ) { 16933 my $type = $types_to_go[$i]; 16934 return 16935 if ( $is_assignment{$type} 16936 && $nesting_depth_to_go[$i] eq $depth_beg ); 16937 } 16938 } 16939 16940 # ok, insert any new break point 16941 if (@insert_list) { 16942 insert_additional_breaks( \@insert_list, $ri_left, $ri_right ); 16943 } 16944} 16945 16946sub insert_final_breaks { 16947 16948 my ( $ri_left, $ri_right ) = @_; 16949 16950 my $nmax = @$ri_right - 1; 16951 16952 # scan the left and right end tokens of all lines 16953 my $count = 0; 16954 my $i_first_colon = -1; 16955 for my $n ( 0 .. $nmax ) { 16956 my $il = $$ri_left[$n]; 16957 my $ir = $$ri_right[$n]; 16958 my $typel = $types_to_go[$il]; 16959 my $typer = $types_to_go[$ir]; 16960 return if ( $typel eq '?' ); 16961 return if ( $typer eq '?' ); 16962 if ( $typel eq ':' ) { $i_first_colon = $il; last; } 16963 elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; } 16964 } 16965 16966 # For long ternary chains, 16967 # if the first : we see has its # ? is in the interior 16968 # of a preceding line, then see if there are any good 16969 # breakpoints before the ?. 16970 if ( $i_first_colon > 0 ) { 16971 my $i_question = $mate_index_to_go[$i_first_colon]; 16972 if ( $i_question > 0 ) { 16973 my @insert_list; 16974 for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) { 16975 my $token = $tokens_to_go[$ii]; 16976 my $type = $types_to_go[$ii]; 16977 16978 # For now, a good break is either a comma or a 'return'. 16979 if ( ( $type eq ',' || $type eq 'k' && $token eq 'return' ) 16980 && in_same_container( $ii, $i_question ) ) 16981 { 16982 push @insert_list, $ii; 16983 last; 16984 } 16985 } 16986 16987 # insert any new break points 16988 if (@insert_list) { 16989 insert_additional_breaks( \@insert_list, $ri_left, $ri_right ); 16990 } 16991 } 16992 } 16993} 16994 16995sub in_same_container { 16996 16997 # check to see if tokens at i1 and i2 are in the 16998 # same container, and not separated by a comma, ? or : 16999 my ( $i1, $i2 ) = @_; 17000 my $type = $types_to_go[$i1]; 17001 my $depth = $nesting_depth_to_go[$i1]; 17002 return unless ( $nesting_depth_to_go[$i2] == $depth ); 17003 if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) } 17004 17005 ########################################################### 17006 # This is potentially a very slow routine and not critical. 17007 # For safety just give up for large differences. 17008 # See test file 'infinite_loop.txt' 17009 # TODO: replace this loop with a data structure 17010 ########################################################### 17011 return if ( $i2 - $i1 > 200 ); 17012 17013 for ( my $i = $i1 + 1 ; $i < $i2 ; $i++ ) { 17014 next if ( $nesting_depth_to_go[$i] > $depth ); 17015 return if ( $nesting_depth_to_go[$i] < $depth ); 17016 17017 my $tok = $tokens_to_go[$i]; 17018 $tok = ',' if $tok eq '=>'; # treat => same as , 17019 17020 # Example: we would not want to break at any of these .'s 17021 # : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>" 17022 if ( $type ne ':' ) { 17023 return if ( $tok =~ /^[\,\:\?]$/ ) || $tok eq '||' || $tok eq 'or'; 17024 } 17025 else { 17026 return if ( $tok =~ /^[\,]$/ ); 17027 } 17028 } 17029 return 1; 17030} 17031 17032sub set_continuation_breaks { 17033 17034 # Define an array of indexes for inserting newline characters to 17035 # keep the line lengths below the maximum desired length. There is 17036 # an implied break after the last token, so it need not be included. 17037 17038 # Method: 17039 # This routine is part of series of routines which adjust line 17040 # lengths. It is only called if a statement is longer than the 17041 # maximum line length, or if a preliminary scanning located 17042 # desirable break points. Sub scan_list has already looked at 17043 # these tokens and set breakpoints (in array 17044 # $forced_breakpoint_to_go[$i]) where it wants breaks (for example 17045 # after commas, after opening parens, and before closing parens). 17046 # This routine will honor these breakpoints and also add additional 17047 # breakpoints as necessary to keep the line length below the maximum 17048 # requested. It bases its decision on where the 'bond strength' is 17049 # lowest. 17050 17051 # Output: returns references to the arrays: 17052 # @i_first 17053 # @i_last 17054 # which contain the indexes $i of the first and last tokens on each 17055 # line. 17056 17057 # In addition, the array: 17058 # $forced_breakpoint_to_go[$i] 17059 # may be updated to be =1 for any index $i after which there must be 17060 # a break. This signals later routines not to undo the breakpoint. 17061 17062 my $saw_good_break = shift; 17063 my @i_first = (); # the first index to output 17064 my @i_last = (); # the last index to output 17065 my @i_colon_breaks = (); # needed to decide if we have to break at ?'s 17066 if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 } 17067 17068 set_bond_strengths(); 17069 17070 my $imin = 0; 17071 my $imax = $max_index_to_go; 17072 if ( $types_to_go[$imin] eq 'b' ) { $imin++ } 17073 if ( $types_to_go[$imax] eq 'b' ) { $imax-- } 17074 my $i_begin = $imin; # index for starting next iteration 17075 17076 my $leading_spaces = leading_spaces_to_go($imin); 17077 my $line_count = 0; 17078 my $last_break_strength = NO_BREAK; 17079 my $i_last_break = -1; 17080 my $max_bias = 0.001; 17081 my $tiny_bias = 0.0001; 17082 my $leading_alignment_token = ""; 17083 my $leading_alignment_type = ""; 17084 17085 # see if any ?/:'s are in order 17086 my $colons_in_order = 1; 17087 my $last_tok = ""; 17088 my @colon_list = grep /^[\?\:]$/, @tokens_to_go[ 0 .. $max_index_to_go ]; 17089 my $colon_count = @colon_list; 17090 foreach (@colon_list) { 17091 if ( $_ eq $last_tok ) { $colons_in_order = 0; last } 17092 $last_tok = $_; 17093 } 17094 17095 # This is a sufficient but not necessary condition for colon chain 17096 my $is_colon_chain = ( $colons_in_order && @colon_list > 2 ); 17097 17098 #------------------------------------------------------- 17099 # BEGINNING of main loop to set continuation breakpoints 17100 # Keep iterating until we reach the end 17101 #------------------------------------------------------- 17102 while ( $i_begin <= $imax ) { 17103 my $lowest_strength = NO_BREAK; 17104 my $starting_sum = $lengths_to_go[$i_begin]; 17105 my $i_lowest = -1; 17106 my $i_test = -1; 17107 my $lowest_next_token = ''; 17108 my $lowest_next_type = 'b'; 17109 my $i_lowest_next_nonblank = -1; 17110 17111 #------------------------------------------------------- 17112 # BEGINNING of inner loop to find the best next breakpoint 17113 #------------------------------------------------------- 17114 for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) { 17115 my $type = $types_to_go[$i_test]; 17116 my $token = $tokens_to_go[$i_test]; 17117 my $next_type = $types_to_go[ $i_test + 1 ]; 17118 my $next_token = $tokens_to_go[ $i_test + 1 ]; 17119 my $i_next_nonblank = 17120 ( ( $next_type eq 'b' ) ? $i_test + 2 : $i_test + 1 ); 17121 my $next_nonblank_type = $types_to_go[$i_next_nonblank]; 17122 my $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; 17123 my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank]; 17124 my $strength = $bond_strength_to_go[$i_test]; 17125 my $must_break = 0; 17126 17127 # FIXME: TESTING: Might want to be able to break after these 17128 # force an immediate break at certain operators 17129 # with lower level than the start of the line 17130 if ( 17131 ( 17132 $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/ 17133 || ( $next_nonblank_type eq 'k' 17134 && $next_nonblank_token =~ /^(and|or)$/ ) 17135 ) 17136 && ( $nesting_depth_to_go[$i_begin] > 17137 $nesting_depth_to_go[$i_next_nonblank] ) 17138 ) 17139 { 17140 set_forced_breakpoint($i_next_nonblank); 17141 } 17142 17143 if ( 17144 17145 # Try to put a break where requested by scan_list 17146 $forced_breakpoint_to_go[$i_test] 17147 17148 # break between ) { in a continued line so that the '{' can 17149 # be outdented 17150 # See similar logic in scan_list which catches instances 17151 # where a line is just something like ') {' 17152 || ( $line_count 17153 && ( $token eq ')' ) 17154 && ( $next_nonblank_type eq '{' ) 17155 && ($next_nonblank_block_type) 17156 && !$rOpts->{'opening-brace-always-on-right'} ) 17157 17158 # There is an implied forced break at a terminal opening brace 17159 || ( ( $type eq '{' ) && ( $i_test == $imax ) ) 17160 ) 17161 { 17162 17163 # Forced breakpoints must sometimes be overridden, for example 17164 # because of a side comment causing a NO_BREAK. It is easier 17165 # to catch this here than when they are set. 17166 if ( $strength < NO_BREAK ) { 17167 $strength = $lowest_strength - $tiny_bias; 17168 $must_break = 1; 17169 } 17170 } 17171 17172 # quit if a break here would put a good terminal token on 17173 # the next line and we already have a possible break 17174 if ( 17175 !$must_break 17176 && ( $next_nonblank_type =~ /^[\;\,]$/ ) 17177 && ( 17178 ( 17179 $leading_spaces + 17180 $lengths_to_go[ $i_next_nonblank + 1 ] - 17181 $starting_sum 17182 ) > $rOpts_maximum_line_length 17183 ) 17184 ) 17185 { 17186 last if ( $i_lowest >= 0 ); 17187 } 17188 17189 # Avoid a break which would strand a single punctuation 17190 # token. For example, we do not want to strand a leading 17191 # '.' which is followed by a long quoted string. 17192 if ( 17193 !$must_break 17194 && ( $i_test == $i_begin ) 17195 && ( $i_test < $imax ) 17196 && ( $token eq $type ) 17197 && ( 17198 ( 17199 $leading_spaces + 17200 $lengths_to_go[ $i_test + 1 ] - 17201 $starting_sum 17202 ) <= $rOpts_maximum_line_length 17203 ) 17204 ) 17205 { 17206 $i_test++; 17207 17208 if ( ( $i_test < $imax ) && ( $next_type eq 'b' ) ) { 17209 $i_test++; 17210 } 17211 redo; 17212 } 17213 17214 if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) ) 17215 { 17216 17217 # break at previous best break if it would have produced 17218 # a leading alignment of certain common tokens, and it 17219 # is different from the latest candidate break 17220 last 17221 if ($leading_alignment_type); 17222 17223 # Force at least one breakpoint if old code had good 17224 # break It is only called if a breakpoint is required or 17225 # desired. This will probably need some adjustments 17226 # over time. A goal is to try to be sure that, if a new 17227 # side comment is introduced into formated text, then 17228 # the same breakpoints will occur. scbreak.t 17229 last 17230 if ( 17231 $i_test == $imax # we are at the end 17232 && !$forced_breakpoint_count # 17233 && $saw_good_break # old line had good break 17234 && $type =~ /^[#;\{]$/ # and this line ends in 17235 # ';' or side comment 17236 && $i_last_break < 0 # and we haven't made a break 17237 && $i_lowest > 0 # and we saw a possible break 17238 && $i_lowest < $imax - 1 # (but not just before this ;) 17239 && $strength - $lowest_strength < 0.5 * WEAK # and it's good 17240 ); 17241 17242 $lowest_strength = $strength; 17243 $i_lowest = $i_test; 17244 $lowest_next_token = $next_nonblank_token; 17245 $lowest_next_type = $next_nonblank_type; 17246 $i_lowest_next_nonblank = $i_next_nonblank; 17247 last if $must_break; 17248 17249 # set flags to remember if a break here will produce a 17250 # leading alignment of certain common tokens 17251 if ( $line_count > 0 17252 && $i_test < $imax 17253 && ( $lowest_strength - $last_break_strength <= $max_bias ) 17254 ) 17255 { 17256 my $i_last_end = $i_begin - 1; 17257 if ( $types_to_go[$i_last_end] eq 'b' ) { $i_last_end -= 1 } 17258 my $tok_beg = $tokens_to_go[$i_begin]; 17259 my $type_beg = $types_to_go[$i_begin]; 17260 if ( 17261 17262 # check for leading alignment of certain tokens 17263 ( 17264 $tok_beg eq $next_nonblank_token 17265 && $is_chain_operator{$tok_beg} 17266 && ( $type_beg eq 'k' 17267 || $type_beg eq $tok_beg ) 17268 && $nesting_depth_to_go[$i_begin] >= 17269 $nesting_depth_to_go[$i_next_nonblank] 17270 ) 17271 17272 || ( $tokens_to_go[$i_last_end] eq $token 17273 && $is_chain_operator{$token} 17274 && ( $type eq 'k' || $type eq $token ) 17275 && $nesting_depth_to_go[$i_last_end] >= 17276 $nesting_depth_to_go[$i_test] ) 17277 ) 17278 { 17279 $leading_alignment_token = $next_nonblank_token; 17280 $leading_alignment_type = $next_nonblank_type; 17281 } 17282 } 17283 } 17284 17285 my $too_long = 17286 ( $i_test >= $imax ) 17287 ? 1 17288 : ( 17289 ( 17290 $leading_spaces + 17291 $lengths_to_go[ $i_test + 2 ] - 17292 $starting_sum 17293 ) > $rOpts_maximum_line_length 17294 ); 17295 17296 FORMATTER_DEBUG_FLAG_BREAK 17297 && print 17298"BREAK: testing i = $i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type leading sp=($leading_spaces) next length = $lengths_to_go[$i_test+2] too_long=$too_long str=$strength\n"; 17299 17300 # allow one extra terminal token after exceeding line length 17301 # if it would strand this token. 17302 if ( $rOpts_fuzzy_line_length 17303 && $too_long 17304 && ( $i_lowest == $i_test ) 17305 && ( length($token) > 1 ) 17306 && ( $next_nonblank_type =~ /^[\;\,]$/ ) ) 17307 { 17308 $too_long = 0; 17309 } 17310 17311 last 17312 if ( 17313 ( $i_test == $imax ) # we're done if no more tokens, 17314 || ( 17315 ( $i_lowest >= 0 ) # or no more space and we have a break 17316 && $too_long 17317 ) 17318 ); 17319 } 17320 17321 #------------------------------------------------------- 17322 # END of inner loop to find the best next breakpoint 17323 # Now decide exactly where to put the breakpoint 17324 #------------------------------------------------------- 17325 17326 # it's always ok to break at imax if no other break was found 17327 if ( $i_lowest < 0 ) { $i_lowest = $imax } 17328 17329 # semi-final index calculation 17330 my $i_next_nonblank = ( 17331 ( $types_to_go[ $i_lowest + 1 ] eq 'b' ) 17332 ? $i_lowest + 2 17333 : $i_lowest + 1 17334 ); 17335 my $next_nonblank_type = $types_to_go[$i_next_nonblank]; 17336 my $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; 17337 17338 #------------------------------------------------------- 17339 # ?/: rule 1 : if a break here will separate a '?' on this 17340 # line from its closing ':', then break at the '?' instead. 17341 #------------------------------------------------------- 17342 my $i; 17343 foreach $i ( $i_begin + 1 .. $i_lowest - 1 ) { 17344 next unless ( $tokens_to_go[$i] eq '?' ); 17345 17346 # do not break if probable sequence of ?/: statements 17347 next if ($is_colon_chain); 17348 17349 # do not break if statement is broken by side comment 17350 next 17351 if ( 17352 $tokens_to_go[$max_index_to_go] eq '#' 17353 && terminal_type( \@types_to_go, \@block_type_to_go, 0, 17354 $max_index_to_go ) !~ /^[\;\}]$/ 17355 ); 17356 17357 # no break needed if matching : is also on the line 17358 next 17359 if ( $mate_index_to_go[$i] >= 0 17360 && $mate_index_to_go[$i] <= $i_next_nonblank ); 17361 17362 $i_lowest = $i; 17363 if ( $want_break_before{'?'} ) { $i_lowest-- } 17364 last; 17365 } 17366 17367 #------------------------------------------------------- 17368 # END of inner loop to find the best next breakpoint: 17369 # Break the line after the token with index i=$i_lowest 17370 #------------------------------------------------------- 17371 17372 # final index calculation 17373 $i_next_nonblank = ( 17374 ( $types_to_go[ $i_lowest + 1 ] eq 'b' ) 17375 ? $i_lowest + 2 17376 : $i_lowest + 1 17377 ); 17378 $next_nonblank_type = $types_to_go[$i_next_nonblank]; 17379 $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; 17380 17381 FORMATTER_DEBUG_FLAG_BREAK 17382 && print "BREAK: best is i = $i_lowest strength = $lowest_strength\n"; 17383 17384 #------------------------------------------------------- 17385 # ?/: rule 2 : if we break at a '?', then break at its ':' 17386 # 17387 # Note: this rule is also in sub scan_list to handle a break 17388 # at the start and end of a line (in case breaks are dictated 17389 # by side comments). 17390 #------------------------------------------------------- 17391 if ( $next_nonblank_type eq '?' ) { 17392 set_closing_breakpoint($i_next_nonblank); 17393 } 17394 elsif ( $types_to_go[$i_lowest] eq '?' ) { 17395 set_closing_breakpoint($i_lowest); 17396 } 17397 17398 #------------------------------------------------------- 17399 # ?/: rule 3 : if we break at a ':' then we save 17400 # its location for further work below. We may need to go 17401 # back and break at its '?'. 17402 #------------------------------------------------------- 17403 if ( $next_nonblank_type eq ':' ) { 17404 push @i_colon_breaks, $i_next_nonblank; 17405 } 17406 elsif ( $types_to_go[$i_lowest] eq ':' ) { 17407 push @i_colon_breaks, $i_lowest; 17408 } 17409 17410 # here we should set breaks for all '?'/':' pairs which are 17411 # separated by this line 17412 17413 $line_count++; 17414 17415 # save this line segment, after trimming blanks at the ends 17416 push( @i_first, 17417 ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin ); 17418 push( @i_last, 17419 ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest ); 17420 17421 # set a forced breakpoint at a container opening, if necessary, to 17422 # signal a break at a closing container. Excepting '(' for now. 17423 if ( $tokens_to_go[$i_lowest] =~ /^[\{\[]$/ 17424 && !$forced_breakpoint_to_go[$i_lowest] ) 17425 { 17426 set_closing_breakpoint($i_lowest); 17427 } 17428 17429 # get ready to go again 17430 $i_begin = $i_lowest + 1; 17431 $last_break_strength = $lowest_strength; 17432 $i_last_break = $i_lowest; 17433 $leading_alignment_token = ""; 17434 $leading_alignment_type = ""; 17435 $lowest_next_token = ''; 17436 $lowest_next_type = 'b'; 17437 17438 if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) { 17439 $i_begin++; 17440 } 17441 17442 # update indentation size 17443 if ( $i_begin <= $imax ) { 17444 $leading_spaces = leading_spaces_to_go($i_begin); 17445 } 17446 } 17447 17448 #------------------------------------------------------- 17449 # END of main loop to set continuation breakpoints 17450 # Now go back and make any necessary corrections 17451 #------------------------------------------------------- 17452 17453 #------------------------------------------------------- 17454 # ?/: rule 4 -- if we broke at a ':', then break at 17455 # corresponding '?' unless this is a chain of ?: expressions 17456 #------------------------------------------------------- 17457 if (@i_colon_breaks) { 17458 17459 # using a simple method for deciding if we are in a ?/: chain -- 17460 # this is a chain if it has multiple ?/: pairs all in order; 17461 # otherwise not. 17462 # Note that if line starts in a ':' we count that above as a break 17463 my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 ); 17464 17465 unless ($is_chain) { 17466 my @insert_list = (); 17467 foreach (@i_colon_breaks) { 17468 my $i_question = $mate_index_to_go[$_]; 17469 if ( $i_question >= 0 ) { 17470 if ( $want_break_before{'?'} ) { 17471 $i_question--; 17472 if ( $i_question > 0 17473 && $types_to_go[$i_question] eq 'b' ) 17474 { 17475 $i_question--; 17476 } 17477 } 17478 17479 if ( $i_question >= 0 ) { 17480 push @insert_list, $i_question; 17481 } 17482 } 17483 insert_additional_breaks( \@insert_list, \@i_first, \@i_last ); 17484 } 17485 } 17486 } 17487 return ( \@i_first, \@i_last, $colon_count ); 17488} 17489 17490sub insert_additional_breaks { 17491 17492 # this routine will add line breaks at requested locations after 17493 # sub set_continuation_breaks has made preliminary breaks. 17494 17495 my ( $ri_break_list, $ri_first, $ri_last ) = @_; 17496 my $i_f; 17497 my $i_l; 17498 my $line_number = 0; 17499 my $i_break_left; 17500 foreach $i_break_left ( sort { $a <=> $b } @$ri_break_list ) { 17501 17502 $i_f = $$ri_first[$line_number]; 17503 $i_l = $$ri_last[$line_number]; 17504 while ( $i_break_left >= $i_l ) { 17505 $line_number++; 17506 17507 # shouldn't happen unless caller passes bad indexes 17508 if ( $line_number >= @$ri_last ) { 17509 warning( 17510"Non-fatal program bug: couldn't set break at $i_break_left\n" 17511 ); 17512 report_definite_bug(); 17513 return; 17514 } 17515 $i_f = $$ri_first[$line_number]; 17516 $i_l = $$ri_last[$line_number]; 17517 } 17518 17519 my $i_break_right = $i_break_left + 1; 17520 if ( $types_to_go[$i_break_right] eq 'b' ) { $i_break_right++ } 17521 17522 if ( $i_break_left >= $i_f 17523 && $i_break_left < $i_l 17524 && $i_break_right > $i_f 17525 && $i_break_right <= $i_l ) 17526 { 17527 splice( @$ri_first, $line_number, 1, ( $i_f, $i_break_right ) ); 17528 splice( @$ri_last, $line_number, 1, ( $i_break_left, $i_l ) ); 17529 } 17530 } 17531} 17532 17533sub set_closing_breakpoint { 17534 17535 # set a breakpoint at a matching closing token 17536 # at present, this is only used to break at a ':' which matches a '?' 17537 my $i_break = shift; 17538 17539 if ( $mate_index_to_go[$i_break] >= 0 ) { 17540 17541 # CAUTION: infinite recursion possible here: 17542 # set_closing_breakpoint calls set_forced_breakpoint, and 17543 # set_forced_breakpoint call set_closing_breakpoint 17544 # ( test files attrib.t, BasicLyx.pm.html). 17545 # Don't reduce the '2' in the statement below 17546 if ( $mate_index_to_go[$i_break] > $i_break + 2 ) { 17547 17548 # break before } ] and ), but sub set_forced_breakpoint will decide 17549 # to break before or after a ? and : 17550 my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1; 17551 set_forced_breakpoint( $mate_index_to_go[$i_break] - $inc ); 17552 } 17553 } 17554 else { 17555 my $type_sequence = $type_sequence_to_go[$i_break]; 17556 if ($type_sequence) { 17557 my $closing_token = $matching_token{ $tokens_to_go[$i_break] }; 17558 $postponed_breakpoint{$type_sequence} = 1; 17559 } 17560 } 17561} 17562 17563# check to see if output line tabbing agrees with input line 17564# this can be very useful for debugging a script which has an extra 17565# or missing brace 17566sub compare_indentation_levels { 17567 17568 my ( $python_indentation_level, $structural_indentation_level ) = @_; 17569 if ( ( $python_indentation_level ne $structural_indentation_level ) ) { 17570 $last_tabbing_disagreement = $input_line_number; 17571 17572 if ($in_tabbing_disagreement) { 17573 } 17574 else { 17575 $tabbing_disagreement_count++; 17576 17577 if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) { 17578 write_logfile_entry( 17579"Start indentation disagreement: input=$python_indentation_level; output=$structural_indentation_level\n" 17580 ); 17581 } 17582 $in_tabbing_disagreement = $input_line_number; 17583 $first_tabbing_disagreement = $in_tabbing_disagreement 17584 unless ($first_tabbing_disagreement); 17585 } 17586 } 17587 else { 17588 17589 if ($in_tabbing_disagreement) { 17590 17591 if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) { 17592 write_logfile_entry( 17593"End indentation disagreement from input line $in_tabbing_disagreement\n" 17594 ); 17595 17596 if ( $tabbing_disagreement_count == MAX_NAG_MESSAGES ) { 17597 write_logfile_entry( 17598 "No further tabbing disagreements will be noted\n"); 17599 } 17600 } 17601 $in_tabbing_disagreement = 0; 17602 } 17603 } 17604} 17605 17606##################################################################### 17607# 17608# the Perl::Tidy::IndentationItem class supplies items which contain 17609# how much whitespace should be used at the start of a line 17610# 17611##################################################################### 17612 17613package Perl::Tidy::IndentationItem; 17614 17615# Indexes for indentation items 17616use constant SPACES => 0; # total leading white spaces 17617use constant LEVEL => 1; # the indentation 'level' 17618use constant CI_LEVEL => 2; # the 'continuation level' 17619use constant AVAILABLE_SPACES => 3; # how many left spaces available 17620 # for this level 17621use constant CLOSED => 4; # index where we saw closing '}' 17622use constant COMMA_COUNT => 5; # how many commas at this level? 17623use constant SEQUENCE_NUMBER => 6; # output batch number 17624use constant INDEX => 7; # index in output batch list 17625use constant HAVE_CHILD => 8; # any dependents? 17626use constant RECOVERABLE_SPACES => 9; # how many spaces to the right 17627 # we would like to move to get 17628 # alignment (negative if left) 17629use constant ALIGN_PAREN => 10; # do we want to try to align 17630 # with an opening structure? 17631use constant MARKED => 11; # if visited by corrector logic 17632use constant STACK_DEPTH => 12; # indentation nesting depth 17633use constant STARTING_INDEX => 13; # first token index of this level 17634use constant ARROW_COUNT => 14; # how many =>'s 17635 17636sub new { 17637 17638 # Create an 'indentation_item' which describes one level of leading 17639 # whitespace when the '-lp' indentation is used. We return 17640 # a reference to an anonymous array of associated variables. 17641 # See above constants for storage scheme. 17642 my ( 17643 $class, $spaces, $level, 17644 $ci_level, $available_spaces, $index, 17645 $gnu_sequence_number, $align_paren, $stack_depth, 17646 $starting_index, 17647 ) = @_; 17648 my $closed = -1; 17649 my $arrow_count = 0; 17650 my $comma_count = 0; 17651 my $have_child = 0; 17652 my $want_right_spaces = 0; 17653 my $marked = 0; 17654 bless [ 17655 $spaces, $level, $ci_level, 17656 $available_spaces, $closed, $comma_count, 17657 $gnu_sequence_number, $index, $have_child, 17658 $want_right_spaces, $align_paren, $marked, 17659 $stack_depth, $starting_index, $arrow_count, 17660 ], $class; 17661} 17662 17663sub permanently_decrease_AVAILABLE_SPACES { 17664 17665 # make a permanent reduction in the available indentation spaces 17666 # at one indentation item. NOTE: if there are child nodes, their 17667 # total SPACES must be reduced by the caller. 17668 17669 my ( $item, $spaces_needed ) = @_; 17670 my $available_spaces = $item->get_AVAILABLE_SPACES(); 17671 my $deleted_spaces = 17672 ( $available_spaces > $spaces_needed ) 17673 ? $spaces_needed 17674 : $available_spaces; 17675 $item->decrease_AVAILABLE_SPACES($deleted_spaces); 17676 $item->decrease_SPACES($deleted_spaces); 17677 $item->set_RECOVERABLE_SPACES(0); 17678 17679 return $deleted_spaces; 17680} 17681 17682sub tentatively_decrease_AVAILABLE_SPACES { 17683 17684 # We are asked to tentatively delete $spaces_needed of indentation 17685 # for a indentation item. We may want to undo this later. NOTE: if 17686 # there are child nodes, their total SPACES must be reduced by the 17687 # caller. 17688 my ( $item, $spaces_needed ) = @_; 17689 my $available_spaces = $item->get_AVAILABLE_SPACES(); 17690 my $deleted_spaces = 17691 ( $available_spaces > $spaces_needed ) 17692 ? $spaces_needed 17693 : $available_spaces; 17694 $item->decrease_AVAILABLE_SPACES($deleted_spaces); 17695 $item->decrease_SPACES($deleted_spaces); 17696 $item->increase_RECOVERABLE_SPACES($deleted_spaces); 17697 return $deleted_spaces; 17698} 17699 17700sub get_STACK_DEPTH { 17701 my $self = shift; 17702 return $self->[STACK_DEPTH]; 17703} 17704 17705sub get_SPACES { 17706 my $self = shift; 17707 return $self->[SPACES]; 17708} 17709 17710sub get_MARKED { 17711 my $self = shift; 17712 return $self->[MARKED]; 17713} 17714 17715sub set_MARKED { 17716 my ( $self, $value ) = @_; 17717 if ( defined($value) ) { 17718 $self->[MARKED] = $value; 17719 } 17720 return $self->[MARKED]; 17721} 17722 17723sub get_AVAILABLE_SPACES { 17724 my $self = shift; 17725 return $self->[AVAILABLE_SPACES]; 17726} 17727 17728sub decrease_SPACES { 17729 my ( $self, $value ) = @_; 17730 if ( defined($value) ) { 17731 $self->[SPACES] -= $value; 17732 } 17733 return $self->[SPACES]; 17734} 17735 17736sub decrease_AVAILABLE_SPACES { 17737 my ( $self, $value ) = @_; 17738 if ( defined($value) ) { 17739 $self->[AVAILABLE_SPACES] -= $value; 17740 } 17741 return $self->[AVAILABLE_SPACES]; 17742} 17743 17744sub get_ALIGN_PAREN { 17745 my $self = shift; 17746 return $self->[ALIGN_PAREN]; 17747} 17748 17749sub get_RECOVERABLE_SPACES { 17750 my $self = shift; 17751 return $self->[RECOVERABLE_SPACES]; 17752} 17753 17754sub set_RECOVERABLE_SPACES { 17755 my ( $self, $value ) = @_; 17756 if ( defined($value) ) { 17757 $self->[RECOVERABLE_SPACES] = $value; 17758 } 17759 return $self->[RECOVERABLE_SPACES]; 17760} 17761 17762sub increase_RECOVERABLE_SPACES { 17763 my ( $self, $value ) = @_; 17764 if ( defined($value) ) { 17765 $self->[RECOVERABLE_SPACES] += $value; 17766 } 17767 return $self->[RECOVERABLE_SPACES]; 17768} 17769 17770sub get_CI_LEVEL { 17771 my $self = shift; 17772 return $self->[CI_LEVEL]; 17773} 17774 17775sub get_LEVEL { 17776 my $self = shift; 17777 return $self->[LEVEL]; 17778} 17779 17780sub get_SEQUENCE_NUMBER { 17781 my $self = shift; 17782 return $self->[SEQUENCE_NUMBER]; 17783} 17784 17785sub get_INDEX { 17786 my $self = shift; 17787 return $self->[INDEX]; 17788} 17789 17790sub get_STARTING_INDEX { 17791 my $self = shift; 17792 return $self->[STARTING_INDEX]; 17793} 17794 17795sub set_HAVE_CHILD { 17796 my ( $self, $value ) = @_; 17797 if ( defined($value) ) { 17798 $self->[HAVE_CHILD] = $value; 17799 } 17800 return $self->[HAVE_CHILD]; 17801} 17802 17803sub get_HAVE_CHILD { 17804 my $self = shift; 17805 return $self->[HAVE_CHILD]; 17806} 17807 17808sub set_ARROW_COUNT { 17809 my ( $self, $value ) = @_; 17810 if ( defined($value) ) { 17811 $self->[ARROW_COUNT] = $value; 17812 } 17813 return $self->[ARROW_COUNT]; 17814} 17815 17816sub get_ARROW_COUNT { 17817 my $self = shift; 17818 return $self->[ARROW_COUNT]; 17819} 17820 17821sub set_COMMA_COUNT { 17822 my ( $self, $value ) = @_; 17823 if ( defined($value) ) { 17824 $self->[COMMA_COUNT] = $value; 17825 } 17826 return $self->[COMMA_COUNT]; 17827} 17828 17829sub get_COMMA_COUNT { 17830 my $self = shift; 17831 return $self->[COMMA_COUNT]; 17832} 17833 17834sub set_CLOSED { 17835 my ( $self, $value ) = @_; 17836 if ( defined($value) ) { 17837 $self->[CLOSED] = $value; 17838 } 17839 return $self->[CLOSED]; 17840} 17841 17842sub get_CLOSED { 17843 my $self = shift; 17844 return $self->[CLOSED]; 17845} 17846 17847##################################################################### 17848# 17849# the Perl::Tidy::VerticalAligner::Line class supplies an object to 17850# contain a single output line 17851# 17852##################################################################### 17853 17854package Perl::Tidy::VerticalAligner::Line; 17855 17856{ 17857 17858 use strict; 17859 use Carp; 17860 17861 use constant JMAX => 0; 17862 use constant JMAX_ORIGINAL_LINE => 1; 17863 use constant RTOKENS => 2; 17864 use constant RFIELDS => 3; 17865 use constant RPATTERNS => 4; 17866 use constant INDENTATION => 5; 17867 use constant LEADING_SPACE_COUNT => 6; 17868 use constant OUTDENT_LONG_LINES => 7; 17869 use constant LIST_TYPE => 8; 17870 use constant IS_HANGING_SIDE_COMMENT => 9; 17871 use constant RALIGNMENTS => 10; 17872 use constant MAXIMUM_LINE_LENGTH => 11; 17873 use constant RVERTICAL_TIGHTNESS_FLAGS => 12; 17874 17875 my %_index_map; 17876 $_index_map{jmax} = JMAX; 17877 $_index_map{jmax_original_line} = JMAX_ORIGINAL_LINE; 17878 $_index_map{rtokens} = RTOKENS; 17879 $_index_map{rfields} = RFIELDS; 17880 $_index_map{rpatterns} = RPATTERNS; 17881 $_index_map{indentation} = INDENTATION; 17882 $_index_map{leading_space_count} = LEADING_SPACE_COUNT; 17883 $_index_map{outdent_long_lines} = OUTDENT_LONG_LINES; 17884 $_index_map{list_type} = LIST_TYPE; 17885 $_index_map{is_hanging_side_comment} = IS_HANGING_SIDE_COMMENT; 17886 $_index_map{ralignments} = RALIGNMENTS; 17887 $_index_map{maximum_line_length} = MAXIMUM_LINE_LENGTH; 17888 $_index_map{rvertical_tightness_flags} = RVERTICAL_TIGHTNESS_FLAGS; 17889 17890 my @_default_data = (); 17891 $_default_data[JMAX] = undef; 17892 $_default_data[JMAX_ORIGINAL_LINE] = undef; 17893 $_default_data[RTOKENS] = undef; 17894 $_default_data[RFIELDS] = undef; 17895 $_default_data[RPATTERNS] = undef; 17896 $_default_data[INDENTATION] = undef; 17897 $_default_data[LEADING_SPACE_COUNT] = undef; 17898 $_default_data[OUTDENT_LONG_LINES] = undef; 17899 $_default_data[LIST_TYPE] = undef; 17900 $_default_data[IS_HANGING_SIDE_COMMENT] = undef; 17901 $_default_data[RALIGNMENTS] = []; 17902 $_default_data[MAXIMUM_LINE_LENGTH] = undef; 17903 $_default_data[RVERTICAL_TIGHTNESS_FLAGS] = undef; 17904 17905 { 17906 17907 # methods to count object population 17908 my $_count = 0; 17909 sub get_count { $_count; } 17910 sub _increment_count { ++$_count } 17911 sub _decrement_count { --$_count } 17912 } 17913 17914 # Constructor may be called as a class method 17915 sub new { 17916 my ( $caller, %arg ) = @_; 17917 my $caller_is_obj = ref($caller); 17918 my $class = $caller_is_obj || $caller; 17919 no strict "refs"; 17920 my $self = bless [], $class; 17921 17922 $self->[RALIGNMENTS] = []; 17923 17924 my $index; 17925 foreach ( keys %_index_map ) { 17926 $index = $_index_map{$_}; 17927 if ( exists $arg{$_} ) { $self->[$index] = $arg{$_} } 17928 elsif ($caller_is_obj) { $self->[$index] = $caller->[$index] } 17929 else { $self->[$index] = $_default_data[$index] } 17930 } 17931 17932 $self->_increment_count(); 17933 return $self; 17934 } 17935 17936 sub DESTROY { 17937 $_[0]->_decrement_count(); 17938 } 17939 17940 sub get_jmax { $_[0]->[JMAX] } 17941 sub get_jmax_original_line { $_[0]->[JMAX_ORIGINAL_LINE] } 17942 sub get_rtokens { $_[0]->[RTOKENS] } 17943 sub get_rfields { $_[0]->[RFIELDS] } 17944 sub get_rpatterns { $_[0]->[RPATTERNS] } 17945 sub get_indentation { $_[0]->[INDENTATION] } 17946 sub get_leading_space_count { $_[0]->[LEADING_SPACE_COUNT] } 17947 sub get_outdent_long_lines { $_[0]->[OUTDENT_LONG_LINES] } 17948 sub get_list_type { $_[0]->[LIST_TYPE] } 17949 sub get_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] } 17950 sub get_rvertical_tightness_flags { $_[0]->[RVERTICAL_TIGHTNESS_FLAGS] } 17951 17952 sub set_column { $_[0]->[RALIGNMENTS]->[ $_[1] ]->set_column( $_[2] ) } 17953 sub get_alignment { $_[0]->[RALIGNMENTS]->[ $_[1] ] } 17954 sub get_alignments { @{ $_[0]->[RALIGNMENTS] } } 17955 sub get_column { $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_column() } 17956 17957 sub get_starting_column { 17958 $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_starting_column(); 17959 } 17960 17961 sub increment_column { 17962 $_[0]->[RALIGNMENTS]->[ $_[1] ]->increment_column( $_[2] ); 17963 } 17964 sub set_alignments { my $self = shift; @{ $self->[RALIGNMENTS] } = @_; } 17965 17966 sub current_field_width { 17967 my $self = shift; 17968 my ($j) = @_; 17969 if ( $j == 0 ) { 17970 return $self->get_column($j); 17971 } 17972 else { 17973 return $self->get_column($j) - $self->get_column( $j - 1 ); 17974 } 17975 } 17976 17977 sub field_width_growth { 17978 my $self = shift; 17979 my $j = shift; 17980 return $self->get_column($j) - $self->get_starting_column($j); 17981 } 17982 17983 sub starting_field_width { 17984 my $self = shift; 17985 my $j = shift; 17986 if ( $j == 0 ) { 17987 return $self->get_starting_column($j); 17988 } 17989 else { 17990 return $self->get_starting_column($j) - 17991 $self->get_starting_column( $j - 1 ); 17992 } 17993 } 17994 17995 sub increase_field_width { 17996 17997 my $self = shift; 17998 my ( $j, $pad ) = @_; 17999 my $jmax = $self->get_jmax(); 18000 for my $k ( $j .. $jmax ) { 18001 $self->increment_column( $k, $pad ); 18002 } 18003 } 18004 18005 sub get_available_space_on_right { 18006 my $self = shift; 18007 my $jmax = $self->get_jmax(); 18008 return $self->[MAXIMUM_LINE_LENGTH] - $self->get_column($jmax); 18009 } 18010 18011 sub set_jmax { $_[0]->[JMAX] = $_[1] } 18012 sub set_jmax_original_line { $_[0]->[JMAX_ORIGINAL_LINE] = $_[1] } 18013 sub set_rtokens { $_[0]->[RTOKENS] = $_[1] } 18014 sub set_rfields { $_[0]->[RFIELDS] = $_[1] } 18015 sub set_rpatterns { $_[0]->[RPATTERNS] = $_[1] } 18016 sub set_indentation { $_[0]->[INDENTATION] = $_[1] } 18017 sub set_leading_space_count { $_[0]->[LEADING_SPACE_COUNT] = $_[1] } 18018 sub set_outdent_long_lines { $_[0]->[OUTDENT_LONG_LINES] = $_[1] } 18019 sub set_list_type { $_[0]->[LIST_TYPE] = $_[1] } 18020 sub set_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] = $_[1] } 18021 sub set_alignment { $_[0]->[RALIGNMENTS]->[ $_[1] ] = $_[2] } 18022 18023} 18024 18025##################################################################### 18026# 18027# the Perl::Tidy::VerticalAligner::Alignment class holds information 18028# on a single column being aligned 18029# 18030##################################################################### 18031package Perl::Tidy::VerticalAligner::Alignment; 18032 18033{ 18034 18035 use strict; 18036 18037 #use Carp; 18038 18039 # Symbolic array indexes 18040 use constant COLUMN => 0; # the current column number 18041 use constant STARTING_COLUMN => 1; # column number when created 18042 use constant MATCHING_TOKEN => 2; # what token we are matching 18043 use constant STARTING_LINE => 3; # the line index of creation 18044 use constant ENDING_LINE => 4; # the most recent line to use it 18045 use constant SAVED_COLUMN => 5; # the most recent line to use it 18046 use constant SERIAL_NUMBER => 6; # unique number for this alignment 18047 # (just its index in an array) 18048 18049 # Correspondence between variables and array indexes 18050 my %_index_map; 18051 $_index_map{column} = COLUMN; 18052 $_index_map{starting_column} = STARTING_COLUMN; 18053 $_index_map{matching_token} = MATCHING_TOKEN; 18054 $_index_map{starting_line} = STARTING_LINE; 18055 $_index_map{ending_line} = ENDING_LINE; 18056 $_index_map{saved_column} = SAVED_COLUMN; 18057 $_index_map{serial_number} = SERIAL_NUMBER; 18058 18059 my @_default_data = (); 18060 $_default_data[COLUMN] = undef; 18061 $_default_data[STARTING_COLUMN] = undef; 18062 $_default_data[MATCHING_TOKEN] = undef; 18063 $_default_data[STARTING_LINE] = undef; 18064 $_default_data[ENDING_LINE] = undef; 18065 $_default_data[SAVED_COLUMN] = undef; 18066 $_default_data[SERIAL_NUMBER] = undef; 18067 18068 # class population count 18069 { 18070 my $_count = 0; 18071 sub get_count { $_count; } 18072 sub _increment_count { ++$_count } 18073 sub _decrement_count { --$_count } 18074 } 18075 18076 # constructor 18077 sub new { 18078 my ( $caller, %arg ) = @_; 18079 my $caller_is_obj = ref($caller); 18080 my $class = $caller_is_obj || $caller; 18081 no strict "refs"; 18082 my $self = bless [], $class; 18083 18084 foreach ( keys %_index_map ) { 18085 my $index = $_index_map{$_}; 18086 if ( exists $arg{$_} ) { $self->[$index] = $arg{$_} } 18087 elsif ($caller_is_obj) { $self->[$index] = $caller->[$index] } 18088 else { $self->[$index] = $_default_data[$index] } 18089 } 18090 $self->_increment_count(); 18091 return $self; 18092 } 18093 18094 sub DESTROY { 18095 $_[0]->_decrement_count(); 18096 } 18097 18098 sub get_column { return $_[0]->[COLUMN] } 18099 sub get_starting_column { return $_[0]->[STARTING_COLUMN] } 18100 sub get_matching_token { return $_[0]->[MATCHING_TOKEN] } 18101 sub get_starting_line { return $_[0]->[STARTING_LINE] } 18102 sub get_ending_line { return $_[0]->[ENDING_LINE] } 18103 sub get_serial_number { return $_[0]->[SERIAL_NUMBER] } 18104 18105 sub set_column { $_[0]->[COLUMN] = $_[1] } 18106 sub set_starting_column { $_[0]->[STARTING_COLUMN] = $_[1] } 18107 sub set_matching_token { $_[0]->[MATCHING_TOKEN] = $_[1] } 18108 sub set_starting_line { $_[0]->[STARTING_LINE] = $_[1] } 18109 sub set_ending_line { $_[0]->[ENDING_LINE] = $_[1] } 18110 sub increment_column { $_[0]->[COLUMN] += $_[1] } 18111 18112 sub save_column { $_[0]->[SAVED_COLUMN] = $_[0]->[COLUMN] } 18113 sub restore_column { $_[0]->[COLUMN] = $_[0]->[SAVED_COLUMN] } 18114 18115} 18116 18117package Perl::Tidy::VerticalAligner; 18118 18119# The Perl::Tidy::VerticalAligner package collects output lines and 18120# attempts to line up certain common tokens, such as => and #, which are 18121# identified by the calling routine. 18122# 18123# There are two main routines: append_line and flush. Append acts as a 18124# storage buffer, collecting lines into a group which can be vertically 18125# aligned. When alignment is no longer possible or desirable, it dumps 18126# the group to flush. 18127# 18128# append_line -----> flush 18129# 18130# collects writes 18131# vertical one 18132# groups group 18133 18134BEGIN { 18135 18136 # Caution: these debug flags produce a lot of output 18137 # They should all be 0 except when debugging small scripts 18138 18139 use constant VALIGN_DEBUG_FLAG_APPEND => 0; 18140 use constant VALIGN_DEBUG_FLAG_APPEND0 => 0; 18141 use constant VALIGN_DEBUG_FLAG_TERNARY => 0; 18142 18143 my $debug_warning = sub { 18144 print "VALIGN_DEBUGGING with key $_[0]\n"; 18145 }; 18146 18147 VALIGN_DEBUG_FLAG_APPEND && $debug_warning->('APPEND'); 18148 VALIGN_DEBUG_FLAG_APPEND0 && $debug_warning->('APPEND0'); 18149 18150} 18151 18152use vars qw( 18153 $vertical_aligner_self 18154 $current_line 18155 $maximum_alignment_index 18156 $ralignment_list 18157 $maximum_jmax_seen 18158 $minimum_jmax_seen 18159 $previous_minimum_jmax_seen 18160 $previous_maximum_jmax_seen 18161 $maximum_line_index 18162 $group_level 18163 $group_type 18164 $group_maximum_gap 18165 $marginal_match 18166 $last_group_level_written 18167 $last_leading_space_count 18168 $extra_indent_ok 18169 $zero_count 18170 @group_lines 18171 $last_comment_column 18172 $last_side_comment_line_number 18173 $last_side_comment_length 18174 $last_side_comment_level 18175 $outdented_line_count 18176 $first_outdented_line_at 18177 $last_outdented_line_at 18178 $diagnostics_object 18179 $logger_object 18180 $file_writer_object 18181 @side_comment_history 18182 $comment_leading_space_count 18183 $is_matching_terminal_line 18184 18185 $cached_line_text 18186 $cached_line_type 18187 $cached_line_flag 18188 $cached_seqno 18189 $cached_line_valid 18190 $cached_line_leading_space_count 18191 $cached_seqno_string 18192 18193 $seqno_string 18194 $last_nonblank_seqno_string 18195 18196 $rOpts 18197 18198 $rOpts_maximum_line_length 18199 $rOpts_continuation_indentation 18200 $rOpts_indent_columns 18201 $rOpts_tabs 18202 $rOpts_entab_leading_whitespace 18203 $rOpts_valign 18204 18205 $rOpts_fixed_position_side_comment 18206 $rOpts_minimum_space_to_comment 18207 18208); 18209 18210sub initialize { 18211 18212 my $class; 18213 18214 ( $class, $rOpts, $file_writer_object, $logger_object, $diagnostics_object ) 18215 = @_; 18216 18217 # variables describing the entire space group: 18218 $ralignment_list = []; 18219 $group_level = 0; 18220 $last_group_level_written = -1; 18221 $extra_indent_ok = 0; # can we move all lines to the right? 18222 $last_side_comment_length = 0; 18223 $maximum_jmax_seen = 0; 18224 $minimum_jmax_seen = 0; 18225 $previous_minimum_jmax_seen = 0; 18226 $previous_maximum_jmax_seen = 0; 18227 18228 # variables describing each line of the group 18229 @group_lines = (); # list of all lines in group 18230 18231 $outdented_line_count = 0; 18232 $first_outdented_line_at = 0; 18233 $last_outdented_line_at = 0; 18234 $last_side_comment_line_number = 0; 18235 $last_side_comment_level = -1; 18236 $is_matching_terminal_line = 0; 18237 18238 # most recent 3 side comments; [ line number, column ] 18239 $side_comment_history[0] = [ -300, 0 ]; 18240 $side_comment_history[1] = [ -200, 0 ]; 18241 $side_comment_history[2] = [ -100, 0 ]; 18242 18243 # write_leader_and_string cache: 18244 $cached_line_text = ""; 18245 $cached_line_type = 0; 18246 $cached_line_flag = 0; 18247 $cached_seqno = 0; 18248 $cached_line_valid = 0; 18249 $cached_line_leading_space_count = 0; 18250 $cached_seqno_string = ""; 18251 18252 # string of sequence numbers joined together 18253 $seqno_string = ""; 18254 $last_nonblank_seqno_string = ""; 18255 18256 # frequently used parameters 18257 $rOpts_indent_columns = $rOpts->{'indent-columns'}; 18258 $rOpts_tabs = $rOpts->{'tabs'}; 18259 $rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'}; 18260 $rOpts_fixed_position_side_comment = 18261 $rOpts->{'fixed-position-side-comment'}; 18262 $rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'}; 18263 $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'}; 18264 $rOpts_valign = $rOpts->{'valign'}; 18265 18266 forget_side_comment(); 18267 18268 initialize_for_new_group(); 18269 18270 $vertical_aligner_self = {}; 18271 bless $vertical_aligner_self, $class; 18272 return $vertical_aligner_self; 18273} 18274 18275sub initialize_for_new_group { 18276 $maximum_line_index = -1; # lines in the current group 18277 $maximum_alignment_index = -1; # alignments in current group 18278 $zero_count = 0; # count consecutive lines without tokens 18279 $current_line = undef; # line being matched for alignment 18280 $group_maximum_gap = 0; # largest gap introduced 18281 $group_type = ""; 18282 $marginal_match = 0; 18283 $comment_leading_space_count = 0; 18284 $last_leading_space_count = 0; 18285} 18286 18287# interface to Perl::Tidy::Diagnostics routines 18288sub write_diagnostics { 18289 if ($diagnostics_object) { 18290 $diagnostics_object->write_diagnostics(@_); 18291 } 18292} 18293 18294# interface to Perl::Tidy::Logger routines 18295sub warning { 18296 if ($logger_object) { 18297 $logger_object->warning(@_); 18298 } 18299} 18300 18301sub write_logfile_entry { 18302 if ($logger_object) { 18303 $logger_object->write_logfile_entry(@_); 18304 } 18305} 18306 18307sub report_definite_bug { 18308 if ($logger_object) { 18309 $logger_object->report_definite_bug(); 18310 } 18311} 18312 18313sub get_SPACES { 18314 18315 # return the number of leading spaces associated with an indentation 18316 # variable $indentation is either a constant number of spaces or an 18317 # object with a get_SPACES method. 18318 my $indentation = shift; 18319 return ref($indentation) ? $indentation->get_SPACES() : $indentation; 18320} 18321 18322sub get_RECOVERABLE_SPACES { 18323 18324 # return the number of spaces (+ means shift right, - means shift left) 18325 # that we would like to shift a group of lines with the same indentation 18326 # to get them to line up with their opening parens 18327 my $indentation = shift; 18328 return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0; 18329} 18330 18331sub get_STACK_DEPTH { 18332 18333 my $indentation = shift; 18334 return ref($indentation) ? $indentation->get_STACK_DEPTH() : 0; 18335} 18336 18337sub make_alignment { 18338 my ( $col, $token ) = @_; 18339 18340 # make one new alignment at column $col which aligns token $token 18341 ++$maximum_alignment_index; 18342 my $alignment = new Perl::Tidy::VerticalAligner::Alignment( 18343 column => $col, 18344 starting_column => $col, 18345 matching_token => $token, 18346 starting_line => $maximum_line_index, 18347 ending_line => $maximum_line_index, 18348 serial_number => $maximum_alignment_index, 18349 ); 18350 $ralignment_list->[$maximum_alignment_index] = $alignment; 18351 return $alignment; 18352} 18353 18354sub dump_alignments { 18355 print 18356"Current Alignments:\ni\ttoken\tstarting_column\tcolumn\tstarting_line\tending_line\n"; 18357 for my $i ( 0 .. $maximum_alignment_index ) { 18358 my $column = $ralignment_list->[$i]->get_column(); 18359 my $starting_column = $ralignment_list->[$i]->get_starting_column(); 18360 my $matching_token = $ralignment_list->[$i]->get_matching_token(); 18361 my $starting_line = $ralignment_list->[$i]->get_starting_line(); 18362 my $ending_line = $ralignment_list->[$i]->get_ending_line(); 18363 print 18364"$i\t$matching_token\t$starting_column\t$column\t$starting_line\t$ending_line\n"; 18365 } 18366} 18367 18368sub save_alignment_columns { 18369 for my $i ( 0 .. $maximum_alignment_index ) { 18370 $ralignment_list->[$i]->save_column(); 18371 } 18372} 18373 18374sub restore_alignment_columns { 18375 for my $i ( 0 .. $maximum_alignment_index ) { 18376 $ralignment_list->[$i]->restore_column(); 18377 } 18378} 18379 18380sub forget_side_comment { 18381 $last_comment_column = 0; 18382} 18383 18384sub append_line { 18385 18386 # sub append is called to place one line in the current vertical group. 18387 # 18388 # The input parameters are: 18389 # $level = indentation level of this line 18390 # $rfields = reference to array of fields 18391 # $rpatterns = reference to array of patterns, one per field 18392 # $rtokens = reference to array of tokens starting fields 1,2,.. 18393 # 18394 # Here is an example of what this package does. In this example, 18395 # we are trying to line up both the '=>' and the '#'. 18396 # 18397 # '18' => 'grave', # \` 18398 # '19' => 'acute', # `' 18399 # '20' => 'caron', # \v 18400 # <-tabs-><f1-><--field 2 ---><-f3-> 18401 # | | | | 18402 # | | | | 18403 # col1 col2 col3 col4 18404 # 18405 # The calling routine has already broken the entire line into 3 fields as 18406 # indicated. (So the work of identifying promising common tokens has 18407 # already been done). 18408 # 18409 # In this example, there will be 2 tokens being matched: '=>' and '#'. 18410 # They are the leading parts of fields 2 and 3, but we do need to know 18411 # what they are so that we can dump a group of lines when these tokens 18412 # change. 18413 # 18414 # The fields contain the actual characters of each field. The patterns 18415 # are like the fields, but they contain mainly token types instead 18416 # of tokens, so they have fewer characters. They are used to be 18417 # sure we are matching fields of similar type. 18418 # 18419 # In this example, there will be 4 column indexes being adjusted. The 18420 # first one is always at zero. The interior columns are at the start of 18421 # the matching tokens, and the last one tracks the maximum line length. 18422 # 18423 # Basically, each time a new line comes in, it joins the current vertical 18424 # group if possible. Otherwise it causes the current group to be dumped 18425 # and a new group is started. 18426 # 18427 # For each new group member, the column locations are increased, as 18428 # necessary, to make room for the new fields. When the group is finally 18429 # output, these column numbers are used to compute the amount of spaces of 18430 # padding needed for each field. 18431 # 18432 # Programming note: the fields are assumed not to have any tab characters. 18433 # Tabs have been previously removed except for tabs in quoted strings and 18434 # side comments. Tabs in these fields can mess up the column counting. 18435 # The log file warns the user if there are any such tabs. 18436 18437 my ( 18438 $level, $level_end, 18439 $indentation, $rfields, 18440 $rtokens, $rpatterns, 18441 $is_forced_break, $outdent_long_lines, 18442 $is_terminal_ternary, $is_terminal_statement, 18443 $do_not_pad, $rvertical_tightness_flags, 18444 $level_jump, 18445 ) = @_; 18446 18447 # number of fields is $jmax 18448 # number of tokens between fields is $jmax-1 18449 my $jmax = $#{$rfields}; 18450 18451 my $leading_space_count = get_SPACES($indentation); 18452 18453 # set outdented flag to be sure we either align within statements or 18454 # across statement boundaries, but not both. 18455 my $is_outdented = $last_leading_space_count > $leading_space_count; 18456 $last_leading_space_count = $leading_space_count; 18457 18458 # Patch: undo for hanging side comment 18459 my $is_hanging_side_comment = 18460 ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ ); 18461 $is_outdented = 0 if $is_hanging_side_comment; 18462 18463 VALIGN_DEBUG_FLAG_APPEND0 && do { 18464 print 18465"APPEND0: entering lines=$maximum_line_index new #fields= $jmax, leading_count=$leading_space_count last_cmt=$last_comment_column force=$is_forced_break\n"; 18466 }; 18467 18468 # Validate cached line if necessary: If we can produce a container 18469 # with just 2 lines total by combining an existing cached opening 18470 # token with the closing token to follow, then we will mark both 18471 # cached flags as valid. 18472 if ($rvertical_tightness_flags) { 18473 if ( $maximum_line_index <= 0 18474 && $cached_line_type 18475 && $cached_seqno 18476 && $rvertical_tightness_flags->[2] 18477 && $rvertical_tightness_flags->[2] == $cached_seqno ) 18478 { 18479 $rvertical_tightness_flags->[3] ||= 1; 18480 $cached_line_valid ||= 1; 18481 } 18482 } 18483 18484 # do not join an opening block brace with an unbalanced line 18485 # unless requested with a flag value of 2 18486 if ( $cached_line_type == 3 18487 && $maximum_line_index < 0 18488 && $cached_line_flag < 2 18489 && $level_jump != 0 ) 18490 { 18491 $cached_line_valid = 0; 18492 } 18493 18494 # patch until new aligner is finished 18495 if ($do_not_pad) { my_flush() } 18496 18497 # shouldn't happen: 18498 if ( $level < 0 ) { $level = 0 } 18499 18500 # do not align code across indentation level changes 18501 # or if vertical alignment is turned off for debugging 18502 if ( $level != $group_level || $is_outdented || !$rOpts_valign ) { 18503 18504 # we are allowed to shift a group of lines to the right if its 18505 # level is greater than the previous and next group 18506 $extra_indent_ok = 18507 ( $level < $group_level && $last_group_level_written < $group_level ); 18508 18509 my_flush(); 18510 18511 # If we know that this line will get flushed out by itself because 18512 # of level changes, we can leave the extra_indent_ok flag set. 18513 # That way, if we get an external flush call, we will still be 18514 # able to do some -lp alignment if necessary. 18515 $extra_indent_ok = ( $is_terminal_statement && $level > $group_level ); 18516 18517 $group_level = $level; 18518 18519 # wait until after the above flush to get the leading space 18520 # count because it may have been changed if the -icp flag is in 18521 # effect 18522 $leading_space_count = get_SPACES($indentation); 18523 18524 } 18525 18526 # -------------------------------------------------------------------- 18527 # Patch to collect outdentable block COMMENTS 18528 # -------------------------------------------------------------------- 18529 my $is_blank_line = ""; 18530 my $is_block_comment = ( $jmax == 0 && $rfields->[0] =~ /^#/ ); 18531 if ( $group_type eq 'COMMENT' ) { 18532 if ( 18533 ( 18534 $is_block_comment 18535 && $outdent_long_lines 18536 && $leading_space_count == $comment_leading_space_count 18537 ) 18538 || $is_blank_line 18539 ) 18540 { 18541 $group_lines[ ++$maximum_line_index ] = $rfields->[0]; 18542 return; 18543 } 18544 else { 18545 my_flush(); 18546 } 18547 } 18548 18549 # -------------------------------------------------------------------- 18550 # add dummy fields for terminal ternary 18551 # -------------------------------------------------------------------- 18552 my $j_terminal_match; 18553 if ( $is_terminal_ternary && $current_line ) { 18554 $j_terminal_match = 18555 fix_terminal_ternary( $rfields, $rtokens, $rpatterns ); 18556 $jmax = @{$rfields} - 1; 18557 } 18558 18559 # -------------------------------------------------------------------- 18560 # add dummy fields for else statement 18561 # -------------------------------------------------------------------- 18562 if ( $rfields->[0] =~ /^else\s*$/ 18563 && $current_line 18564 && $level_jump == 0 ) 18565 { 18566 $j_terminal_match = fix_terminal_else( $rfields, $rtokens, $rpatterns ); 18567 $jmax = @{$rfields} - 1; 18568 } 18569 18570 # -------------------------------------------------------------------- 18571 # Step 1. Handle simple line of code with no fields to match. 18572 # -------------------------------------------------------------------- 18573 if ( $jmax <= 0 ) { 18574 $zero_count++; 18575 18576 if ( $maximum_line_index >= 0 18577 && !get_RECOVERABLE_SPACES( $group_lines[0]->get_indentation() ) ) 18578 { 18579 18580 # flush the current group if it has some aligned columns.. 18581 if ( $group_lines[0]->get_jmax() > 1 ) { my_flush() } 18582 18583 # flush current group if we are just collecting side comments.. 18584 elsif ( 18585 18586 # ...and we haven't seen a comment lately 18587 ( $zero_count > 3 ) 18588 18589 # ..or if this new line doesn't fit to the left of the comments 18590 || ( ( $leading_space_count + length( $$rfields[0] ) ) > 18591 $group_lines[0]->get_column(0) ) 18592 ) 18593 { 18594 my_flush(); 18595 } 18596 } 18597 18598 # patch to start new COMMENT group if this comment may be outdented 18599 if ( $is_block_comment 18600 && $outdent_long_lines 18601 && $maximum_line_index < 0 ) 18602 { 18603 $group_type = 'COMMENT'; 18604 $comment_leading_space_count = $leading_space_count; 18605 $group_lines[ ++$maximum_line_index ] = $rfields->[0]; 18606 return; 18607 } 18608 18609 # just write this line directly if no current group, no side comment, 18610 # and no space recovery is needed. 18611 if ( $maximum_line_index < 0 && !get_RECOVERABLE_SPACES($indentation) ) 18612 { 18613 write_leader_and_string( $leading_space_count, $$rfields[0], 0, 18614 $outdent_long_lines, $rvertical_tightness_flags ); 18615 return; 18616 } 18617 } 18618 else { 18619 $zero_count = 0; 18620 } 18621 18622 # programming check: (shouldn't happen) 18623 # an error here implies an incorrect call was made 18624 if ( $jmax > 0 && ( $#{$rtokens} != ( $jmax - 1 ) ) ) { 18625 warning( 18626"Program bug in Perl::Tidy::VerticalAligner - number of tokens = $#{$rtokens} should be one less than number of fields: $#{$rfields})\n" 18627 ); 18628 report_definite_bug(); 18629 } 18630 18631 # -------------------------------------------------------------------- 18632 # create an object to hold this line 18633 # -------------------------------------------------------------------- 18634 my $new_line = new Perl::Tidy::VerticalAligner::Line( 18635 jmax => $jmax, 18636 jmax_original_line => $jmax, 18637 rtokens => $rtokens, 18638 rfields => $rfields, 18639 rpatterns => $rpatterns, 18640 indentation => $indentation, 18641 leading_space_count => $leading_space_count, 18642 outdent_long_lines => $outdent_long_lines, 18643 list_type => "", 18644 is_hanging_side_comment => $is_hanging_side_comment, 18645 maximum_line_length => $rOpts->{'maximum-line-length'}, 18646 rvertical_tightness_flags => $rvertical_tightness_flags, 18647 ); 18648 18649 # Initialize a global flag saying if the last line of the group should 18650 # match end of group and also terminate the group. There should be no 18651 # returns between here and where the flag is handled at the bottom. 18652 my $col_matching_terminal = 0; 18653 if ( defined($j_terminal_match) ) { 18654 18655 # remember the column of the terminal ? or { to match with 18656 $col_matching_terminal = $current_line->get_column($j_terminal_match); 18657 18658 # set global flag for sub decide_if_aligned 18659 $is_matching_terminal_line = 1; 18660 } 18661 18662 # -------------------------------------------------------------------- 18663 # It simplifies things to create a zero length side comment 18664 # if none exists. 18665 # -------------------------------------------------------------------- 18666 make_side_comment( $new_line, $level_end ); 18667 18668 # -------------------------------------------------------------------- 18669 # Decide if this is a simple list of items. 18670 # There are 3 list types: none, comma, comma-arrow. 18671 # We use this below to be less restrictive in deciding what to align. 18672 # -------------------------------------------------------------------- 18673 if ($is_forced_break) { 18674 decide_if_list($new_line); 18675 } 18676 18677 if ($current_line) { 18678 18679 # -------------------------------------------------------------------- 18680 # Allow hanging side comment to join current group, if any 18681 # This will help keep side comments aligned, because otherwise we 18682 # will have to start a new group, making alignment less likely. 18683 # -------------------------------------------------------------------- 18684 join_hanging_comment( $new_line, $current_line ) 18685 if $is_hanging_side_comment; 18686 18687 # -------------------------------------------------------------------- 18688 # If there is just one previous line, and it has more fields 18689 # than the new line, try to join fields together to get a match with 18690 # the new line. At the present time, only a single leading '=' is 18691 # allowed to be compressed out. This is useful in rare cases where 18692 # a table is forced to use old breakpoints because of side comments, 18693 # and the table starts out something like this: 18694 # my %MonthChars = ('0', 'Jan', # side comment 18695 # '1', 'Feb', 18696 # '2', 'Mar', 18697 # Eliminating the '=' field will allow the remaining fields to line up. 18698 # This situation does not occur if there are no side comments 18699 # because scan_list would put a break after the opening '('. 18700 # -------------------------------------------------------------------- 18701 eliminate_old_fields( $new_line, $current_line ); 18702 18703 # -------------------------------------------------------------------- 18704 # If the new line has more fields than the current group, 18705 # see if we can match the first fields and combine the remaining 18706 # fields of the new line. 18707 # -------------------------------------------------------------------- 18708 eliminate_new_fields( $new_line, $current_line ); 18709 18710 # -------------------------------------------------------------------- 18711 # Flush previous group unless all common tokens and patterns match.. 18712 # -------------------------------------------------------------------- 18713 check_match( $new_line, $current_line ); 18714 18715 # -------------------------------------------------------------------- 18716 # See if there is space for this line in the current group (if any) 18717 # -------------------------------------------------------------------- 18718 if ($current_line) { 18719 check_fit( $new_line, $current_line ); 18720 } 18721 } 18722 18723 # -------------------------------------------------------------------- 18724 # Append this line to the current group (or start new group) 18725 # -------------------------------------------------------------------- 18726 accept_line($new_line); 18727 18728 # Future update to allow this to vary: 18729 $current_line = $new_line if ( $maximum_line_index == 0 ); 18730 18731 # output this group if it ends in a terminal else or ternary line 18732 if ( defined($j_terminal_match) ) { 18733 18734 # if there is only one line in the group (maybe due to failure to match 18735 # perfectly with previous lines), then align the ? or { of this 18736 # terminal line with the previous one unless that would make the line 18737 # too long 18738 if ( $maximum_line_index == 0 ) { 18739 my $col_now = $current_line->get_column($j_terminal_match); 18740 my $pad = $col_matching_terminal - $col_now; 18741 my $padding_available = 18742 $current_line->get_available_space_on_right(); 18743 if ( $pad > 0 && $pad <= $padding_available ) { 18744 $current_line->increase_field_width( $j_terminal_match, $pad ); 18745 } 18746 } 18747 my_flush(); 18748 $is_matching_terminal_line = 0; 18749 } 18750 18751 # -------------------------------------------------------------------- 18752 # Step 8. Some old debugging stuff 18753 # -------------------------------------------------------------------- 18754 VALIGN_DEBUG_FLAG_APPEND && do { 18755 print "APPEND fields:"; 18756 dump_array(@$rfields); 18757 print "APPEND tokens:"; 18758 dump_array(@$rtokens); 18759 print "APPEND patterns:"; 18760 dump_array(@$rpatterns); 18761 dump_alignments(); 18762 }; 18763 18764 return; 18765} 18766 18767sub join_hanging_comment { 18768 18769 my $line = shift; 18770 my $jmax = $line->get_jmax(); 18771 return 0 unless $jmax == 1; # must be 2 fields 18772 my $rtokens = $line->get_rtokens(); 18773 return 0 unless $$rtokens[0] eq '#'; # the second field is a comment.. 18774 my $rfields = $line->get_rfields(); 18775 return 0 unless $$rfields[0] =~ /^\s*$/; # the first field is empty... 18776 my $old_line = shift; 18777 my $maximum_field_index = $old_line->get_jmax(); 18778 return 0 18779 unless $maximum_field_index > $jmax; # the current line has more fields 18780 my $rpatterns = $line->get_rpatterns(); 18781 18782 $line->set_is_hanging_side_comment(1); 18783 $jmax = $maximum_field_index; 18784 $line->set_jmax($jmax); 18785 $$rfields[$jmax] = $$rfields[1]; 18786 $$rtokens[ $jmax - 1 ] = $$rtokens[0]; 18787 $$rpatterns[ $jmax - 1 ] = $$rpatterns[0]; 18788 for ( my $j = 1 ; $j < $jmax ; $j++ ) { 18789 $$rfields[$j] = " "; # NOTE: caused glitch unless 1 blank, why? 18790 $$rtokens[ $j - 1 ] = ""; 18791 $$rpatterns[ $j - 1 ] = ""; 18792 } 18793 return 1; 18794} 18795 18796sub eliminate_old_fields { 18797 18798 my $new_line = shift; 18799 my $jmax = $new_line->get_jmax(); 18800 if ( $jmax > $maximum_jmax_seen ) { $maximum_jmax_seen = $jmax } 18801 if ( $jmax < $minimum_jmax_seen ) { $minimum_jmax_seen = $jmax } 18802 18803 # there must be one previous line 18804 return unless ( $maximum_line_index == 0 ); 18805 18806 my $old_line = shift; 18807 my $maximum_field_index = $old_line->get_jmax(); 18808 18809 ############################################### 18810 # this line must have fewer fields 18811 return unless $maximum_field_index > $jmax; 18812 ############################################### 18813 18814 # Identify specific cases where field elimination is allowed: 18815 # case=1: both lines have comma-separated lists, and the first 18816 # line has an equals 18817 # case=2: both lines have leading equals 18818 18819 # case 1 is the default 18820 my $case = 1; 18821 18822 # See if case 2: both lines have leading '=' 18823 # We'll require smiliar leading patterns in this case 18824 my $old_rtokens = $old_line->get_rtokens(); 18825 my $rtokens = $new_line->get_rtokens(); 18826 my $rpatterns = $new_line->get_rpatterns(); 18827 my $old_rpatterns = $old_line->get_rpatterns(); 18828 if ( $rtokens->[0] =~ /^=\d*$/ 18829 && $old_rtokens->[0] eq $rtokens->[0] 18830 && $old_rpatterns->[0] eq $rpatterns->[0] ) 18831 { 18832 $case = 2; 18833 } 18834 18835 # not too many fewer fields in new line for case 1 18836 return unless ( $case != 1 || $maximum_field_index - 2 <= $jmax ); 18837 18838 # case 1 must have side comment 18839 my $old_rfields = $old_line->get_rfields(); 18840 return 18841 if ( $case == 1 18842 && length( $$old_rfields[$maximum_field_index] ) == 0 ); 18843 18844 my $rfields = $new_line->get_rfields(); 18845 18846 my $hid_equals = 0; 18847 18848 my @new_alignments = (); 18849 my @new_fields = (); 18850 my @new_matching_patterns = (); 18851 my @new_matching_tokens = (); 18852 18853 my $j = 0; 18854 my $k; 18855 my $current_field = ''; 18856 my $current_pattern = ''; 18857 18858 # loop over all old tokens 18859 my $in_match = 0; 18860 for ( $k = 0 ; $k < $maximum_field_index ; $k++ ) { 18861 $current_field .= $$old_rfields[$k]; 18862 $current_pattern .= $$old_rpatterns[$k]; 18863 last if ( $j > $jmax - 1 ); 18864 18865 if ( $$old_rtokens[$k] eq $$rtokens[$j] ) { 18866 $in_match = 1; 18867 $new_fields[$j] = $current_field; 18868 $new_matching_patterns[$j] = $current_pattern; 18869 $current_field = ''; 18870 $current_pattern = ''; 18871 $new_matching_tokens[$j] = $$old_rtokens[$k]; 18872 $new_alignments[$j] = $old_line->get_alignment($k); 18873 $j++; 18874 } 18875 else { 18876 18877 if ( $$old_rtokens[$k] =~ /^\=\d*$/ ) { 18878 last if ( $case == 2 ); # avoid problems with stuff 18879 # like: $a=$b=$c=$d; 18880 $hid_equals = 1; 18881 } 18882 last 18883 if ( $in_match && $case == 1 ) 18884 ; # disallow gaps in matching field types in case 1 18885 } 18886 } 18887 18888 # Modify the current state if we are successful. 18889 # We must exactly reach the ends of both lists for success. 18890 if ( ( $j == $jmax ) 18891 && ( $current_field eq '' ) 18892 && ( $case != 1 || $hid_equals ) ) 18893 { 18894 $k = $maximum_field_index; 18895 $current_field .= $$old_rfields[$k]; 18896 $current_pattern .= $$old_rpatterns[$k]; 18897 $new_fields[$j] = $current_field; 18898 $new_matching_patterns[$j] = $current_pattern; 18899 18900 $new_alignments[$j] = $old_line->get_alignment($k); 18901 $maximum_field_index = $j; 18902 18903 $old_line->set_alignments(@new_alignments); 18904 $old_line->set_jmax($jmax); 18905 $old_line->set_rtokens( \@new_matching_tokens ); 18906 $old_line->set_rfields( \@new_fields ); 18907 $old_line->set_rpatterns( \@$rpatterns ); 18908 } 18909} 18910 18911# create an empty side comment if none exists 18912sub make_side_comment { 18913 my $new_line = shift; 18914 my $level_end = shift; 18915 my $jmax = $new_line->get_jmax(); 18916 my $rtokens = $new_line->get_rtokens(); 18917 18918 # if line does not have a side comment... 18919 if ( ( $jmax == 0 ) || ( $$rtokens[ $jmax - 1 ] ne '#' ) ) { 18920 my $rfields = $new_line->get_rfields(); 18921 my $rpatterns = $new_line->get_rpatterns(); 18922 $$rtokens[$jmax] = '#'; 18923 $$rfields[ ++$jmax ] = ''; 18924 $$rpatterns[$jmax] = '#'; 18925 $new_line->set_jmax($jmax); 18926 $new_line->set_jmax_original_line($jmax); 18927 } 18928 18929 # line has a side comment.. 18930 else { 18931 18932 # don't remember old side comment location for very long 18933 my $line_number = $vertical_aligner_self->get_output_line_number(); 18934 my $rfields = $new_line->get_rfields(); 18935 if ( 18936 $line_number - $last_side_comment_line_number > 12 18937 18938 # and don't remember comment location across block level changes 18939 || ( $level_end < $last_side_comment_level && $$rfields[0] =~ /^}/ ) 18940 ) 18941 { 18942 forget_side_comment(); 18943 } 18944 $last_side_comment_line_number = $line_number; 18945 $last_side_comment_level = $level_end; 18946 } 18947} 18948 18949sub decide_if_list { 18950 18951 my $line = shift; 18952 18953 # A list will be taken to be a line with a forced break in which all 18954 # of the field separators are commas or comma-arrows (except for the 18955 # trailing #) 18956 18957 # List separator tokens are things like ',3' or '=>2', 18958 # where the trailing digit is the nesting depth. Allow braces 18959 # to allow nested list items. 18960 my $rtokens = $line->get_rtokens(); 18961 my $test_token = $$rtokens[0]; 18962 if ( $test_token =~ /^(\,|=>)/ ) { 18963 my $list_type = $test_token; 18964 my $jmax = $line->get_jmax(); 18965 18966 foreach ( 1 .. $jmax - 2 ) { 18967 if ( $$rtokens[$_] !~ /^(\,|=>|\{)/ ) { 18968 $list_type = ""; 18969 last; 18970 } 18971 } 18972 $line->set_list_type($list_type); 18973 } 18974} 18975 18976sub eliminate_new_fields { 18977 18978 return unless ( $maximum_line_index >= 0 ); 18979 my ( $new_line, $old_line ) = @_; 18980 my $jmax = $new_line->get_jmax(); 18981 18982 my $old_rtokens = $old_line->get_rtokens(); 18983 my $rtokens = $new_line->get_rtokens(); 18984 my $is_assignment = 18985 ( $rtokens->[0] =~ /^=\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] ) ); 18986 18987 # must be monotonic variation 18988 return unless ( $is_assignment || $previous_maximum_jmax_seen <= $jmax ); 18989 18990 # must be more fields in the new line 18991 my $maximum_field_index = $old_line->get_jmax(); 18992 return unless ( $maximum_field_index < $jmax ); 18993 18994 unless ($is_assignment) { 18995 return 18996 unless ( $old_line->get_jmax_original_line() == $minimum_jmax_seen ) 18997 ; # only if monotonic 18998 18999 # never combine fields of a comma list 19000 return 19001 unless ( $maximum_field_index > 1 ) 19002 && ( $new_line->get_list_type() !~ /^,/ ); 19003 } 19004 19005 my $rfields = $new_line->get_rfields(); 19006 my $rpatterns = $new_line->get_rpatterns(); 19007 my $old_rpatterns = $old_line->get_rpatterns(); 19008 19009 # loop over all OLD tokens except comment and check match 19010 my $match = 1; 19011 my $k; 19012 for ( $k = 0 ; $k < $maximum_field_index - 1 ; $k++ ) { 19013 if ( ( $$old_rtokens[$k] ne $$rtokens[$k] ) 19014 || ( $$old_rpatterns[$k] ne $$rpatterns[$k] ) ) 19015 { 19016 $match = 0; 19017 last; 19018 } 19019 } 19020 19021 # first tokens agree, so combine extra new tokens 19022 if ($match) { 19023 for $k ( $maximum_field_index .. $jmax - 1 ) { 19024 19025 $$rfields[ $maximum_field_index - 1 ] .= $$rfields[$k]; 19026 $$rfields[$k] = ""; 19027 $$rpatterns[ $maximum_field_index - 1 ] .= $$rpatterns[$k]; 19028 $$rpatterns[$k] = ""; 19029 } 19030 19031 $$rtokens[ $maximum_field_index - 1 ] = '#'; 19032 $$rfields[$maximum_field_index] = $$rfields[$jmax]; 19033 $$rpatterns[$maximum_field_index] = $$rpatterns[$jmax]; 19034 $jmax = $maximum_field_index; 19035 } 19036 $new_line->set_jmax($jmax); 19037} 19038 19039sub fix_terminal_ternary { 19040 19041 # Add empty fields as necessary to align a ternary term 19042 # like this: 19043 # 19044 # my $leapyear = 19045 # $year % 4 ? 0 19046 # : $year % 100 ? 1 19047 # : $year % 400 ? 0 19048 # : 1; 19049 # 19050 # returns 1 if the terminal item should be indented 19051 19052 my ( $rfields, $rtokens, $rpatterns ) = @_; 19053 19054 my $jmax = @{$rfields} - 1; 19055 my $old_line = $group_lines[$maximum_line_index]; 19056 my $rfields_old = $old_line->get_rfields(); 19057 19058 my $rpatterns_old = $old_line->get_rpatterns(); 19059 my $rtokens_old = $old_line->get_rtokens(); 19060 my $maximum_field_index = $old_line->get_jmax(); 19061 19062 # look for the question mark after the : 19063 my ($jquestion); 19064 my $depth_question; 19065 my $pad = ""; 19066 for ( my $j = 0 ; $j < $maximum_field_index ; $j++ ) { 19067 my $tok = $rtokens_old->[$j]; 19068 if ( $tok =~ /^\?(\d+)$/ ) { 19069 $depth_question = $1; 19070 19071 # depth must be correct 19072 next unless ( $depth_question eq $group_level ); 19073 19074 $jquestion = $j; 19075 if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) { 19076 $pad = " " x length($1); 19077 } 19078 else { 19079 return; # shouldn't happen 19080 } 19081 last; 19082 } 19083 } 19084 return unless ( defined($jquestion) ); # shouldn't happen 19085 19086 # Now splice the tokens and patterns of the previous line 19087 # into the else line to insure a match. Add empty fields 19088 # as necessary. 19089 my $jadd = $jquestion; 19090 19091 # Work on copies of the actual arrays in case we have 19092 # to return due to an error 19093 my @fields = @{$rfields}; 19094 my @patterns = @{$rpatterns}; 19095 my @tokens = @{$rtokens}; 19096 19097 VALIGN_DEBUG_FLAG_TERNARY && do { 19098 local $" = '><'; 19099 print "CURRENT FIELDS=<@{$rfields_old}>\n"; 19100 print "CURRENT TOKENS=<@{$rtokens_old}>\n"; 19101 print "CURRENT PATTERNS=<@{$rpatterns_old}>\n"; 19102 print "UNMODIFIED FIELDS=<@{$rfields}>\n"; 19103 print "UNMODIFIED TOKENS=<@{$rtokens}>\n"; 19104 print "UNMODIFIED PATTERNS=<@{$rpatterns}>\n"; 19105 }; 19106 19107 # handle cases of leading colon on this line 19108 if ( $fields[0] =~ /^(:\s*)(.*)$/ ) { 19109 19110 my ( $colon, $therest ) = ( $1, $2 ); 19111 19112 # Handle sub-case of first field with leading colon plus additional code 19113 # This is the usual situation as at the '1' below: 19114 # ... 19115 # : $year % 400 ? 0 19116 # : 1; 19117 if ($therest) { 19118 19119 # Split the first field after the leading colon and insert padding. 19120 # Note that this padding will remain even if the terminal value goes 19121 # out on a separate line. This does not seem to look to bad, so no 19122 # mechanism has been included to undo it. 19123 my $field1 = shift @fields; 19124 unshift @fields, ( $colon, $pad . $therest ); 19125 19126 # change the leading pattern from : to ? 19127 return unless ( $patterns[0] =~ s/^\:/?/ ); 19128 19129 # install leading tokens and patterns of existing line 19130 unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] ); 19131 unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] ); 19132 19133 # insert appropriate number of empty fields 19134 splice( @fields, 1, 0, ('') x $jadd ) if $jadd; 19135 } 19136 19137 # handle sub-case of first field just equal to leading colon. 19138 # This can happen for example in the example below where 19139 # the leading '(' would create a new alignment token 19140 # : ( $name =~ /[]}]$/ ) ? ( $mname = $name ) 19141 # : ( $mname = $name . '->' ); 19142 else { 19143 19144 return unless ( $jmax > 0 && $tokens[0] ne '#' ); # shouldn't happen 19145 19146 # prepend a leading ? onto the second pattern 19147 $patterns[1] = "?b" . $patterns[1]; 19148 19149 # pad the second field 19150 $fields[1] = $pad . $fields[1]; 19151 19152 # install leading tokens and patterns of existing line, replacing 19153 # leading token and inserting appropriate number of empty fields 19154 splice( @tokens, 0, 1, @{$rtokens_old}[ 0 .. $jquestion ] ); 19155 splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] ); 19156 splice( @fields, 1, 0, ('') x $jadd ) if $jadd; 19157 } 19158 } 19159 19160 # Handle case of no leading colon on this line. This will 19161 # be the case when -wba=':' is used. For example, 19162 # $year % 400 ? 0 : 19163 # 1; 19164 else { 19165 19166 # install leading tokens and patterns of existing line 19167 $patterns[0] = '?' . 'b' . $patterns[0]; 19168 unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] ); 19169 unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] ); 19170 19171 # insert appropriate number of empty fields 19172 $jadd = $jquestion + 1; 19173 $fields[0] = $pad . $fields[0]; 19174 splice( @fields, 0, 0, ('') x $jadd ) if $jadd; 19175 } 19176 19177 VALIGN_DEBUG_FLAG_TERNARY && do { 19178 local $" = '><'; 19179 print "MODIFIED TOKENS=<@tokens>\n"; 19180 print "MODIFIED PATTERNS=<@patterns>\n"; 19181 print "MODIFIED FIELDS=<@fields>\n"; 19182 }; 19183 19184 # all ok .. update the arrays 19185 @{$rfields} = @fields; 19186 @{$rtokens} = @tokens; 19187 @{$rpatterns} = @patterns; 19188 19189 # force a flush after this line 19190 return $jquestion; 19191} 19192 19193sub fix_terminal_else { 19194 19195 # Add empty fields as necessary to align a balanced terminal 19196 # else block to a previous if/elsif/unless block, 19197 # like this: 19198 # 19199 # if ( 1 || $x ) { print "ok 13\n"; } 19200 # else { print "not ok 13\n"; } 19201 # 19202 # returns 1 if the else block should be indented 19203 # 19204 my ( $rfields, $rtokens, $rpatterns ) = @_; 19205 my $jmax = @{$rfields} - 1; 19206 return unless ( $jmax > 0 ); 19207 19208 # check for balanced else block following if/elsif/unless 19209 my $rfields_old = $current_line->get_rfields(); 19210 19211 # TBD: add handling for 'case' 19212 return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ ); 19213 19214 # look for the opening brace after the else, and extrace the depth 19215 my $tok_brace = $rtokens->[0]; 19216 my $depth_brace; 19217 if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; } 19218 19219 # probably: "else # side_comment" 19220 else { return } 19221 19222 my $rpatterns_old = $current_line->get_rpatterns(); 19223 my $rtokens_old = $current_line->get_rtokens(); 19224 my $maximum_field_index = $current_line->get_jmax(); 19225 19226 # be sure the previous if/elsif is followed by an opening paren 19227 my $jparen = 0; 19228 my $tok_paren = '(' . $depth_brace; 19229 my $tok_test = $rtokens_old->[$jparen]; 19230 return unless ( $tok_test eq $tok_paren ); # shouldn't happen 19231 19232 # Now find the opening block brace 19233 my ($jbrace); 19234 for ( my $j = 1 ; $j < $maximum_field_index ; $j++ ) { 19235 my $tok = $rtokens_old->[$j]; 19236 if ( $tok eq $tok_brace ) { 19237 $jbrace = $j; 19238 last; 19239 } 19240 } 19241 return unless ( defined($jbrace) ); # shouldn't happen 19242 19243 # Now splice the tokens and patterns of the previous line 19244 # into the else line to insure a match. Add empty fields 19245 # as necessary. 19246 my $jadd = $jbrace - $jparen; 19247 splice( @{$rtokens}, 0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] ); 19248 splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] ); 19249 splice( @{$rfields}, 1, 0, ('') x $jadd ); 19250 19251 # force a flush after this line if it does not follow a case 19252 return $jbrace 19253 unless ( $rfields_old->[0] =~ /^case\s*$/ ); 19254} 19255 19256{ # sub check_match 19257 my %is_good_alignment; 19258 19259 BEGIN { 19260 19261 # Vertically aligning on certain "good" tokens is usually okay 19262 # so we can be less restrictive in marginal cases. 19263 @_ = qw( { ? => = ); 19264 push @_, (','); 19265 @is_good_alignment{@_} = (1) x scalar(@_); 19266 } 19267 19268 sub check_match { 19269 19270 # See if the current line matches the current vertical alignment group. 19271 # If not, flush the current group. 19272 my $new_line = shift; 19273 my $old_line = shift; 19274 19275 # uses global variables: 19276 # $previous_minimum_jmax_seen 19277 # $maximum_jmax_seen 19278 # $maximum_line_index 19279 # $marginal_match 19280 my $jmax = $new_line->get_jmax(); 19281 my $maximum_field_index = $old_line->get_jmax(); 19282 19283 # flush if this line has too many fields 19284 if ( $jmax > $maximum_field_index ) { goto NO_MATCH } 19285 19286 # flush if adding this line would make a non-monotonic field count 19287 if ( 19288 ( $maximum_field_index > $jmax ) # this has too few fields 19289 && ( 19290 ( $previous_minimum_jmax_seen < 19291 $jmax ) # and wouldn't be monotonic 19292 || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen ) 19293 ) 19294 ) 19295 { 19296 goto NO_MATCH; 19297 } 19298 19299 # otherwise see if this line matches the current group 19300 my $jmax_original_line = $new_line->get_jmax_original_line(); 19301 my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment(); 19302 my $rtokens = $new_line->get_rtokens(); 19303 my $rfields = $new_line->get_rfields(); 19304 my $rpatterns = $new_line->get_rpatterns(); 19305 my $list_type = $new_line->get_list_type(); 19306 19307 my $group_list_type = $old_line->get_list_type(); 19308 my $old_rpatterns = $old_line->get_rpatterns(); 19309 my $old_rtokens = $old_line->get_rtokens(); 19310 19311 my $jlimit = $jmax - 1; 19312 if ( $maximum_field_index > $jmax ) { 19313 $jlimit = $jmax_original_line; 19314 --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) ); 19315 } 19316 19317 # handle comma-separated lists .. 19318 if ( $group_list_type && ( $list_type eq $group_list_type ) ) { 19319 for my $j ( 0 .. $jlimit ) { 19320 my $old_tok = $$old_rtokens[$j]; 19321 next unless $old_tok; 19322 my $new_tok = $$rtokens[$j]; 19323 next unless $new_tok; 19324 19325 # lists always match ... 19326 # unless they would align any '=>'s with ','s 19327 goto NO_MATCH 19328 if ( $old_tok =~ /^=>/ && $new_tok =~ /^,/ 19329 || $new_tok =~ /^=>/ && $old_tok =~ /^,/ ); 19330 } 19331 } 19332 19333 # do detailed check for everything else except hanging side comments 19334 elsif ( !$is_hanging_side_comment ) { 19335 19336 my $leading_space_count = $new_line->get_leading_space_count(); 19337 19338 my $max_pad = 0; 19339 my $min_pad = 0; 19340 my $saw_good_alignment; 19341 19342 for my $j ( 0 .. $jlimit ) { 19343 19344 my $old_tok = $$old_rtokens[$j]; 19345 my $new_tok = $$rtokens[$j]; 19346 19347 # Note on encoding used for alignment tokens: 19348 # ------------------------------------------- 19349 # Tokens are "decorated" with information which can help 19350 # prevent unwanted alignments. Consider for example the 19351 # following two lines: 19352 # local ( $xn, $xd ) = split( '/', &'rnorm(@_) ); 19353 # local ( $i, $f ) = &'bdiv( $xn, $xd ); 19354 # There are three alignment tokens in each line, a comma, 19355 # an =, and a comma. In the first line these three tokens 19356 # are encoded as: 19357 # ,4+local-18 =3 ,4+split-7 19358 # and in the second line they are encoded as 19359 # ,4+local-18 =3 ,4+&'bdiv-8 19360 # Tokens always at least have token name and nesting 19361 # depth. So in this example the ='s are at depth 3 and 19362 # the ,'s are at depth 4. This prevents aligning tokens 19363 # of different depths. Commas contain additional 19364 # information, as follows: 19365 # , {depth} + {container name} - {spaces to opening paren} 19366 # This allows us to reject matching the rightmost commas 19367 # in the above two lines, since they are for different 19368 # function calls. This encoding is done in 19369 # 'sub send_lines_to_vertical_aligner'. 19370 19371 # Pick off actual token. 19372 # Everything up to the first digit is the actual token. 19373 my $alignment_token = $new_tok; 19374 if ( $alignment_token =~ /^([^\d]+)/ ) { $alignment_token = $1 } 19375 19376 # see if the decorated tokens match 19377 my $tokens_match = $new_tok eq $old_tok 19378 19379 # Exception for matching terminal : of ternary statement.. 19380 # consider containers prefixed by ? and : a match 19381 || ( $new_tok =~ /^,\d*\+\:/ && $old_tok =~ /^,\d*\+\?/ ); 19382 19383 # No match if the alignment tokens differ... 19384 if ( !$tokens_match ) { 19385 19386 # ...Unless this is a side comment 19387 if ( 19388 $j == $jlimit 19389 19390 # and there is either at least one alignment token 19391 # or this is a single item following a list. This 19392 # latter rule is required for 'December' to join 19393 # the following list: 19394 # my (@months) = ( 19395 # '', 'January', 'February', 'March', 19396 # 'April', 'May', 'June', 'July', 19397 # 'August', 'September', 'October', 'November', 19398 # 'December' 19399 # ); 19400 # If it doesn't then the -lp formatting will fail. 19401 && ( $j > 0 || $old_tok =~ /^,/ ) 19402 ) 19403 { 19404 $marginal_match = 1 19405 if ( $marginal_match == 0 19406 && $maximum_line_index == 0 ); 19407 last; 19408 } 19409 19410 goto NO_MATCH; 19411 } 19412 19413 # Calculate amount of padding required to fit this in. 19414 # $pad is the number of spaces by which we must increase 19415 # the current field to squeeze in this field. 19416 my $pad = 19417 length( $$rfields[$j] ) - $old_line->current_field_width($j); 19418 if ( $j == 0 ) { $pad += $leading_space_count; } 19419 19420 # remember max pads to limit marginal cases 19421 if ( $alignment_token ne '#' ) { 19422 if ( $pad > $max_pad ) { $max_pad = $pad } 19423 if ( $pad < $min_pad ) { $min_pad = $pad } 19424 } 19425 if ( $is_good_alignment{$alignment_token} ) { 19426 $saw_good_alignment = 1; 19427 } 19428 19429 # If patterns don't match, we have to be careful... 19430 if ( $$old_rpatterns[$j] ne $$rpatterns[$j] ) { 19431 19432 # flag this as a marginal match since patterns differ 19433 $marginal_match = 1 19434 if ( $marginal_match == 0 && $maximum_line_index == 0 ); 19435 19436 # We have to be very careful about aligning commas 19437 # when the pattern's don't match, because it can be 19438 # worse to create an alignment where none is needed 19439 # than to omit one. Here's an example where the ','s 19440 # are not in named continers. The first line below 19441 # should not match the next two: 19442 # ( $a, $b ) = ( $b, $r ); 19443 # ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 ); 19444 # ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 ); 19445 if ( $alignment_token eq ',' ) { 19446 19447 # do not align commas unless they are in named containers 19448 goto NO_MATCH unless ( $new_tok =~ /[A-Za-z]/ ); 19449 } 19450 19451 # do not align parens unless patterns match; 19452 # large ugly spaces can occur in math expressions. 19453 elsif ( $alignment_token eq '(' ) { 19454 19455 # But we can allow a match if the parens don't 19456 # require any padding. 19457 if ( $pad != 0 ) { goto NO_MATCH } 19458 } 19459 19460 # Handle an '=' alignment with different patterns to 19461 # the left. 19462 elsif ( $alignment_token eq '=' ) { 19463 19464 # It is best to be a little restrictive when 19465 # aligning '=' tokens. Here is an example of 19466 # two lines that we will not align: 19467 # my $variable=6; 19468 # $bb=4; 19469 # The problem is that one is a 'my' declaration, 19470 # and the other isn't, so they're not very similar. 19471 # We will filter these out by comparing the first 19472 # letter of the pattern. This is crude, but works 19473 # well enough. 19474 if ( 19475 substr( $$old_rpatterns[$j], 0, 1 ) ne 19476 substr( $$rpatterns[$j], 0, 1 ) ) 19477 { 19478 goto NO_MATCH; 19479 } 19480 19481 # If we pass that test, we'll call it a marginal match. 19482 # Here is an example of a marginal match: 19483 # $done{$$op} = 1; 19484 # $op = compile_bblock($op); 19485 # The left tokens are both identifiers, but 19486 # one accesses a hash and the other doesn't. 19487 # We'll let this be a tentative match and undo 19488 # it later if we don't find more than 2 lines 19489 # in the group. 19490 elsif ( $maximum_line_index == 0 ) { 19491 $marginal_match = 19492 2; # =2 prevents being undone below 19493 } 19494 } 19495 } 19496 19497 # Don't let line with fewer fields increase column widths 19498 # ( align3.t ) 19499 if ( $maximum_field_index > $jmax ) { 19500 19501 # Exception: suspend this rule to allow last lines to join 19502 if ( $pad > 0 ) { goto NO_MATCH; } 19503 } 19504 } ## end for my $j ( 0 .. $jlimit) 19505 19506 # Turn off the "marginal match" flag in some cases... 19507 # A "marginal match" occurs when the alignment tokens agree 19508 # but there are differences in the other tokens (patterns). 19509 # If we leave the marginal match flag set, then the rule is that we 19510 # will align only if there are more than two lines in the group. 19511 # We will turn of the flag if we almost have a match 19512 # and either we have seen a good alignment token or we 19513 # just need a small pad (2 spaces) to fit. These rules are 19514 # the result of experimentation. Tokens which misaligned by just 19515 # one or two characters are annoying. On the other hand, 19516 # large gaps to less important alignment tokens are also annoying. 19517 if ( $marginal_match == 1 19518 && $jmax == $maximum_field_index 19519 && ( $saw_good_alignment || ( $max_pad < 3 && $min_pad > -3 ) ) 19520 ) 19521 { 19522 $marginal_match = 0; 19523 } 19524 ##print "marginal=$marginal_match saw=$saw_good_alignment jmax=$jmax max=$maximum_field_index maxpad=$max_pad minpad=$min_pad\n"; 19525 } 19526 19527 # We have a match (even if marginal). 19528 # If the current line has fewer fields than the current group 19529 # but otherwise matches, copy the remaining group fields to 19530 # make it a perfect match. 19531 if ( $maximum_field_index > $jmax ) { 19532 my $comment = $$rfields[$jmax]; 19533 for $jmax ( $jlimit .. $maximum_field_index ) { 19534 $$rtokens[$jmax] = $$old_rtokens[$jmax]; 19535 $$rfields[ ++$jmax ] = ''; 19536 $$rpatterns[$jmax] = $$old_rpatterns[$jmax]; 19537 } 19538 $$rfields[$jmax] = $comment; 19539 $new_line->set_jmax($jmax); 19540 } 19541 return; 19542 19543 NO_MATCH: 19544 ##print "BUBBA: no match jmax=$jmax max=$maximum_field_index $group_list_type lines=$maximum_line_index token=$$old_rtokens[0]\n"; 19545 my_flush(); 19546 return; 19547 } 19548} 19549 19550sub check_fit { 19551 19552 return unless ( $maximum_line_index >= 0 ); 19553 my $new_line = shift; 19554 my $old_line = shift; 19555 19556 my $jmax = $new_line->get_jmax(); 19557 my $leading_space_count = $new_line->get_leading_space_count(); 19558 my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment(); 19559 my $rtokens = $new_line->get_rtokens(); 19560 my $rfields = $new_line->get_rfields(); 19561 my $rpatterns = $new_line->get_rpatterns(); 19562 19563 my $group_list_type = $group_lines[0]->get_list_type(); 19564 19565 my $padding_so_far = 0; 19566 my $padding_available = $old_line->get_available_space_on_right(); 19567 19568 # save current columns in case this doesn't work 19569 save_alignment_columns(); 19570 19571 my ( $j, $pad, $eight ); 19572 my $maximum_field_index = $old_line->get_jmax(); 19573 for $j ( 0 .. $jmax ) { 19574 19575 $pad = length( $$rfields[$j] ) - $old_line->current_field_width($j); 19576 19577 if ( $j == 0 ) { 19578 $pad += $leading_space_count; 19579 } 19580 19581 # remember largest gap of the group, excluding gap to side comment 19582 if ( $pad < 0 19583 && $group_maximum_gap < -$pad 19584 && $j > 0 19585 && $j < $jmax - 1 ) 19586 { 19587 $group_maximum_gap = -$pad; 19588 } 19589 19590 next if $pad < 0; 19591 19592 ## This patch helps sometimes, but it doesn't check to see if 19593 ## the line is too long even without the side comment. It needs 19594 ## to be reworked. 19595 ##don't let a long token with no trailing side comment push 19596 ##side comments out, or end a group. (sidecmt1.t) 19597 ##next if ($j==$jmax-1 && length($$rfields[$jmax])==0); 19598 19599 # This line will need space; lets see if we want to accept it.. 19600 if ( 19601 19602 # not if this won't fit 19603 ( $pad > $padding_available ) 19604 19605 # previously, there were upper bounds placed on padding here 19606 # (maximum_whitespace_columns), but they were not really helpful 19607 19608 ) 19609 { 19610 19611 # revert to starting state then flush; things didn't work out 19612 restore_alignment_columns(); 19613 my_flush(); 19614 last; 19615 } 19616 19617 # patch to avoid excessive gaps in previous lines, 19618 # due to a line of fewer fields. 19619 # return join( ".", 19620 # $self->{"dfi"}, $self->{"aa"}, $self->rsvd, $self->{"rd"}, 19621 # $self->{"area"}, $self->{"id"}, $self->{"sel"} ); 19622 next if ( $jmax < $maximum_field_index && $j == $jmax - 1 ); 19623 19624 # looks ok, squeeze this field in 19625 $old_line->increase_field_width( $j, $pad ); 19626 $padding_available -= $pad; 19627 19628 # remember largest gap of the group, excluding gap to side comment 19629 if ( $pad > $group_maximum_gap && $j > 0 && $j < $jmax - 1 ) { 19630 $group_maximum_gap = $pad; 19631 } 19632 } 19633} 19634 19635sub accept_line { 19636 19637 # The current line either starts a new alignment group or is 19638 # accepted into the current alignment group. 19639 my $new_line = shift; 19640 $group_lines[ ++$maximum_line_index ] = $new_line; 19641 19642 # initialize field lengths if starting new group 19643 if ( $maximum_line_index == 0 ) { 19644 19645 my $jmax = $new_line->get_jmax(); 19646 my $rfields = $new_line->get_rfields(); 19647 my $rtokens = $new_line->get_rtokens(); 19648 my $j; 19649 my $col = $new_line->get_leading_space_count(); 19650 19651 for $j ( 0 .. $jmax ) { 19652 $col += length( $$rfields[$j] ); 19653 19654 # create initial alignments for the new group 19655 my $token = ""; 19656 if ( $j < $jmax ) { $token = $$rtokens[$j] } 19657 my $alignment = make_alignment( $col, $token ); 19658 $new_line->set_alignment( $j, $alignment ); 19659 } 19660 19661 $maximum_jmax_seen = $jmax; 19662 $minimum_jmax_seen = $jmax; 19663 } 19664 19665 # use previous alignments otherwise 19666 else { 19667 my @new_alignments = 19668 $group_lines[ $maximum_line_index - 1 ]->get_alignments(); 19669 $new_line->set_alignments(@new_alignments); 19670 } 19671 19672 # remember group jmax extremes for next call to append_line 19673 $previous_minimum_jmax_seen = $minimum_jmax_seen; 19674 $previous_maximum_jmax_seen = $maximum_jmax_seen; 19675} 19676 19677sub dump_array { 19678 19679 # debug routine to dump array contents 19680 local $" = ')('; 19681 print "(@_)\n"; 19682} 19683 19684# flush() sends the current Perl::Tidy::VerticalAligner group down the 19685# pipeline to Perl::Tidy::FileWriter. 19686 19687# This is the external flush, which also empties the cache 19688sub flush { 19689 19690 if ( $maximum_line_index < 0 ) { 19691 if ($cached_line_type) { 19692 $seqno_string = $cached_seqno_string; 19693 entab_and_output( $cached_line_text, 19694 $cached_line_leading_space_count, 19695 $last_group_level_written ); 19696 $cached_line_type = 0; 19697 $cached_line_text = ""; 19698 $cached_seqno_string = ""; 19699 } 19700 } 19701 else { 19702 my_flush(); 19703 } 19704} 19705 19706# This is the internal flush, which leaves the cache intact 19707sub my_flush { 19708 19709 return if ( $maximum_line_index < 0 ); 19710 19711 # handle a group of comment lines 19712 if ( $group_type eq 'COMMENT' ) { 19713 19714 VALIGN_DEBUG_FLAG_APPEND0 && do { 19715 my ( $a, $b, $c ) = caller(); 19716 print 19717"APPEND0: Flush called from $a $b $c for COMMENT group: lines=$maximum_line_index \n"; 19718 19719 }; 19720 my $leading_space_count = $comment_leading_space_count; 19721 my $leading_string = get_leading_string($leading_space_count); 19722 19723 # zero leading space count if any lines are too long 19724 my $max_excess = 0; 19725 for my $i ( 0 .. $maximum_line_index ) { 19726 my $str = $group_lines[$i]; 19727 my $excess = 19728 length($str) + $leading_space_count - $rOpts_maximum_line_length; 19729 if ( $excess > $max_excess ) { 19730 $max_excess = $excess; 19731 } 19732 } 19733 19734 if ( $max_excess > 0 ) { 19735 $leading_space_count -= $max_excess; 19736 if ( $leading_space_count < 0 ) { $leading_space_count = 0 } 19737 $last_outdented_line_at = 19738 $file_writer_object->get_output_line_number(); 19739 unless ($outdented_line_count) { 19740 $first_outdented_line_at = $last_outdented_line_at; 19741 } 19742 $outdented_line_count += ( $maximum_line_index + 1 ); 19743 } 19744 19745 # write the group of lines 19746 my $outdent_long_lines = 0; 19747 for my $i ( 0 .. $maximum_line_index ) { 19748 write_leader_and_string( $leading_space_count, $group_lines[$i], 0, 19749 $outdent_long_lines, "" ); 19750 } 19751 } 19752 19753 # handle a group of code lines 19754 else { 19755 19756 VALIGN_DEBUG_FLAG_APPEND0 && do { 19757 my $group_list_type = $group_lines[0]->get_list_type(); 19758 my ( $a, $b, $c ) = caller(); 19759 my $maximum_field_index = $group_lines[0]->get_jmax(); 19760 print 19761"APPEND0: Flush called from $a $b $c fields=$maximum_field_index list=$group_list_type lines=$maximum_line_index extra=$extra_indent_ok\n"; 19762 19763 }; 19764 19765 # some small groups are best left unaligned 19766 my $do_not_align = decide_if_aligned(); 19767 19768 # optimize side comment location 19769 $do_not_align = adjust_side_comment($do_not_align); 19770 19771 # recover spaces for -lp option if possible 19772 my $extra_leading_spaces = get_extra_leading_spaces(); 19773 19774 # all lines of this group have the same basic leading spacing 19775 my $group_leader_length = $group_lines[0]->get_leading_space_count(); 19776 19777 # add extra leading spaces if helpful 19778 my $min_ci_gap = improve_continuation_indentation( $do_not_align, 19779 $group_leader_length ); 19780 19781 # loop to output all lines 19782 for my $i ( 0 .. $maximum_line_index ) { 19783 my $line = $group_lines[$i]; 19784 write_vertically_aligned_line( $line, $min_ci_gap, $do_not_align, 19785 $group_leader_length, $extra_leading_spaces ); 19786 } 19787 } 19788 initialize_for_new_group(); 19789} 19790 19791sub decide_if_aligned { 19792 19793 # Do not try to align two lines which are not really similar 19794 return unless $maximum_line_index == 1; 19795 return if ($is_matching_terminal_line); 19796 19797 my $group_list_type = $group_lines[0]->get_list_type(); 19798 19799 my $do_not_align = ( 19800 19801 # always align lists 19802 !$group_list_type 19803 19804 && ( 19805 19806 # don't align if it was just a marginal match 19807 $marginal_match 19808 19809 # don't align two lines with big gap 19810 || $group_maximum_gap > 12 19811 19812 # or lines with differing number of alignment tokens 19813 # TODO: this could be improved. It occasionally rejects 19814 # good matches. 19815 || $previous_maximum_jmax_seen != $previous_minimum_jmax_seen 19816 ) 19817 ); 19818 19819 # But try to convert them into a simple comment group if the first line 19820 # a has side comment 19821 my $rfields = $group_lines[0]->get_rfields(); 19822 my $maximum_field_index = $group_lines[0]->get_jmax(); 19823 if ( $do_not_align 19824 && ( $maximum_line_index > 0 ) 19825 && ( length( $$rfields[$maximum_field_index] ) > 0 ) ) 19826 { 19827 combine_fields(); 19828 $do_not_align = 0; 19829 } 19830 return $do_not_align; 19831} 19832 19833sub adjust_side_comment { 19834 19835 my $do_not_align = shift; 19836 19837 # let's see if we can move the side comment field out a little 19838 # to improve readability (the last field is always a side comment field) 19839 my $have_side_comment = 0; 19840 my $first_side_comment_line = -1; 19841 my $maximum_field_index = $group_lines[0]->get_jmax(); 19842 for my $i ( 0 .. $maximum_line_index ) { 19843 my $line = $group_lines[$i]; 19844 19845 if ( length( $line->get_rfields()->[$maximum_field_index] ) ) { 19846 $have_side_comment = 1; 19847 $first_side_comment_line = $i; 19848 last; 19849 } 19850 } 19851 19852 my $kmax = $maximum_field_index + 1; 19853 19854 if ($have_side_comment) { 19855 19856 my $line = $group_lines[0]; 19857 19858 # the maximum space without exceeding the line length: 19859 my $avail = $line->get_available_space_on_right(); 19860 19861 # try to use the previous comment column 19862 my $side_comment_column = $line->get_column( $kmax - 2 ); 19863 my $move = $last_comment_column - $side_comment_column; 19864 19865## my $sc_line0 = $side_comment_history[0]->[0]; 19866## my $sc_col0 = $side_comment_history[0]->[1]; 19867## my $sc_line1 = $side_comment_history[1]->[0]; 19868## my $sc_col1 = $side_comment_history[1]->[1]; 19869## my $sc_line2 = $side_comment_history[2]->[0]; 19870## my $sc_col2 = $side_comment_history[2]->[1]; 19871## 19872## # FUTURE UPDATES: 19873## # Be sure to ignore 'do not align' and '} # end comments' 19874## # Find first $move > 0 and $move <= $avail as follows: 19875## # 1. try sc_col1 if sc_col1 == sc_col0 && (line-sc_line0) < 12 19876## # 2. try sc_col2 if (line-sc_line2) < 12 19877## # 3. try min possible space, plus up to 8, 19878## # 4. try min possible space 19879 19880 if ( $kmax > 0 && !$do_not_align ) { 19881 19882 # but if this doesn't work, give up and use the minimum space 19883 if ( $move > $avail ) { 19884 $move = $rOpts_minimum_space_to_comment - 1; 19885 } 19886 19887 # but we want some minimum space to the comment 19888 my $min_move = $rOpts_minimum_space_to_comment - 1; 19889 if ( $move >= 0 19890 && $last_side_comment_length > 0 19891 && ( $first_side_comment_line == 0 ) 19892 && $group_level == $last_group_level_written ) 19893 { 19894 $min_move = 0; 19895 } 19896 19897 if ( $move < $min_move ) { 19898 $move = $min_move; 19899 } 19900 19901 # prevously, an upper bound was placed on $move here, 19902 # (maximum_space_to_comment), but it was not helpful 19903 19904 # don't exceed the available space 19905 if ( $move > $avail ) { $move = $avail } 19906 19907 # we can only increase space, never decrease 19908 if ( $move > 0 ) { 19909 $line->increase_field_width( $maximum_field_index - 1, $move ); 19910 } 19911 19912 # remember this column for the next group 19913 $last_comment_column = $line->get_column( $kmax - 2 ); 19914 } 19915 else { 19916 19917 # try to at least line up the existing side comment location 19918 if ( $kmax > 0 && $move > 0 && $move < $avail ) { 19919 $line->increase_field_width( $maximum_field_index - 1, $move ); 19920 $do_not_align = 0; 19921 } 19922 19923 # reset side comment column if we can't align 19924 else { 19925 forget_side_comment(); 19926 } 19927 } 19928 } 19929 return $do_not_align; 19930} 19931 19932sub improve_continuation_indentation { 19933 my ( $do_not_align, $group_leader_length ) = @_; 19934 19935 # See if we can increase the continuation indentation 19936 # to move all continuation lines closer to the next field 19937 # (unless it is a comment). 19938 # 19939 # '$min_ci_gap'is the extra indentation that we may need to introduce. 19940 # We will only introduce this to fields which already have some ci. 19941 # Without this variable, we would occasionally get something like this 19942 # (Complex.pm): 19943 # 19944 # use overload '+' => \&plus, 19945 # '-' => \&minus, 19946 # '*' => \&multiply, 19947 # ... 19948 # 'tan' => \&tan, 19949 # 'atan2' => \&atan2, 19950 # 19951 # Whereas with this variable, we can shift variables over to get this: 19952 # 19953 # use overload '+' => \&plus, 19954 # '-' => \&minus, 19955 # '*' => \&multiply, 19956 # ... 19957 # 'tan' => \&tan, 19958 # 'atan2' => \&atan2, 19959 19960 ## BUB: Deactivated#################### 19961 # The trouble with this patch is that it may, for example, 19962 # move in some 'or's or ':'s, and leave some out, so that the 19963 # left edge alignment suffers. 19964 return 0; 19965 ########################################### 19966 19967 my $maximum_field_index = $group_lines[0]->get_jmax(); 19968 19969 my $min_ci_gap = $rOpts_maximum_line_length; 19970 if ( $maximum_field_index > 1 && !$do_not_align ) { 19971 19972 for my $i ( 0 .. $maximum_line_index ) { 19973 my $line = $group_lines[$i]; 19974 my $leading_space_count = $line->get_leading_space_count(); 19975 my $rfields = $line->get_rfields(); 19976 19977 my $gap = 19978 $line->get_column(0) - 19979 $leading_space_count - 19980 length( $$rfields[0] ); 19981 19982 if ( $leading_space_count > $group_leader_length ) { 19983 if ( $gap < $min_ci_gap ) { $min_ci_gap = $gap } 19984 } 19985 } 19986 19987 if ( $min_ci_gap >= $rOpts_maximum_line_length ) { 19988 $min_ci_gap = 0; 19989 } 19990 } 19991 else { 19992 $min_ci_gap = 0; 19993 } 19994 return $min_ci_gap; 19995} 19996 19997sub write_vertically_aligned_line { 19998 19999 my ( $line, $min_ci_gap, $do_not_align, $group_leader_length, 20000 $extra_leading_spaces ) 20001 = @_; 20002 my $rfields = $line->get_rfields(); 20003 my $leading_space_count = $line->get_leading_space_count(); 20004 my $outdent_long_lines = $line->get_outdent_long_lines(); 20005 my $maximum_field_index = $line->get_jmax(); 20006 my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags(); 20007 20008 # add any extra spaces 20009 if ( $leading_space_count > $group_leader_length ) { 20010 $leading_space_count += $min_ci_gap; 20011 } 20012 20013 my $str = $$rfields[0]; 20014 20015 # loop to concatenate all fields of this line and needed padding 20016 my $total_pad_count = 0; 20017 my ( $j, $pad ); 20018 for $j ( 1 .. $maximum_field_index ) { 20019 20020 # skip zero-length side comments 20021 last 20022 if ( ( $j == $maximum_field_index ) 20023 && ( !defined( $$rfields[$j] ) || ( length( $$rfields[$j] ) == 0 ) ) 20024 ); 20025 20026 # compute spaces of padding before this field 20027 my $col = $line->get_column( $j - 1 ); 20028 $pad = $col - ( length($str) + $leading_space_count ); 20029 20030 if ($do_not_align) { 20031 $pad = 20032 ( $j < $maximum_field_index ) 20033 ? 0 20034 : $rOpts_minimum_space_to_comment - 1; 20035 } 20036 20037 # if the -fpsc flag is set, move the side comment to the selected 20038 # column if and only if it is possible, ignoring constraints on 20039 # line length and minimum space to comment 20040 if ( $rOpts_fixed_position_side_comment && $j == $maximum_field_index ) 20041 { 20042 my $newpad = $pad + $rOpts_fixed_position_side_comment - $col - 1; 20043 if ( $newpad >= 0 ) { $pad = $newpad; } 20044 } 20045 20046 # accumulate the padding 20047 if ( $pad > 0 ) { $total_pad_count += $pad; } 20048 20049 # add this field 20050 if ( !defined $$rfields[$j] ) { 20051 write_diagnostics("UNDEFined field at j=$j\n"); 20052 } 20053 20054 # only add padding when we have a finite field; 20055 # this avoids extra terminal spaces if we have empty fields 20056 if ( length( $$rfields[$j] ) > 0 ) { 20057 $str .= ' ' x $total_pad_count; 20058 $total_pad_count = 0; 20059 $str .= $$rfields[$j]; 20060 } 20061 else { 20062 $total_pad_count = 0; 20063 } 20064 20065 # update side comment history buffer 20066 if ( $j == $maximum_field_index ) { 20067 my $lineno = $file_writer_object->get_output_line_number(); 20068 shift @side_comment_history; 20069 push @side_comment_history, [ $lineno, $col ]; 20070 } 20071 } 20072 20073 my $side_comment_length = ( length( $$rfields[$maximum_field_index] ) ); 20074 20075 # ship this line off 20076 write_leader_and_string( $leading_space_count + $extra_leading_spaces, 20077 $str, $side_comment_length, $outdent_long_lines, 20078 $rvertical_tightness_flags ); 20079} 20080 20081sub get_extra_leading_spaces { 20082 20083 #---------------------------------------------------------- 20084 # Define any extra indentation space (for the -lp option). 20085 # Here is why: 20086 # If a list has side comments, sub scan_list must dump the 20087 # list before it sees everything. When this happens, it sets 20088 # the indentation to the standard scheme, but notes how 20089 # many spaces it would have liked to use. We may be able 20090 # to recover that space here in the event that that all of the 20091 # lines of a list are back together again. 20092 #---------------------------------------------------------- 20093 20094 my $extra_leading_spaces = 0; 20095 if ($extra_indent_ok) { 20096 my $object = $group_lines[0]->get_indentation(); 20097 if ( ref($object) ) { 20098 my $extra_indentation_spaces_wanted = 20099 get_RECOVERABLE_SPACES($object); 20100 20101 # all indentation objects must be the same 20102 my $i; 20103 for $i ( 1 .. $maximum_line_index ) { 20104 if ( $object != $group_lines[$i]->get_indentation() ) { 20105 $extra_indentation_spaces_wanted = 0; 20106 last; 20107 } 20108 } 20109 20110 if ($extra_indentation_spaces_wanted) { 20111 20112 # the maximum space without exceeding the line length: 20113 my $avail = $group_lines[0]->get_available_space_on_right(); 20114 $extra_leading_spaces = 20115 ( $avail > $extra_indentation_spaces_wanted ) 20116 ? $extra_indentation_spaces_wanted 20117 : $avail; 20118 20119 # update the indentation object because with -icp the terminal 20120 # ');' will use the same adjustment. 20121 $object->permanently_decrease_AVAILABLE_SPACES( 20122 -$extra_leading_spaces ); 20123 } 20124 } 20125 } 20126 return $extra_leading_spaces; 20127} 20128 20129sub combine_fields { 20130 20131 # combine all fields except for the comment field ( sidecmt.t ) 20132 # Uses global variables: 20133 # @group_lines 20134 # $maximum_line_index 20135 my ( $j, $k ); 20136 my $maximum_field_index = $group_lines[0]->get_jmax(); 20137 for ( $j = 0 ; $j <= $maximum_line_index ; $j++ ) { 20138 my $line = $group_lines[$j]; 20139 my $rfields = $line->get_rfields(); 20140 foreach ( 1 .. $maximum_field_index - 1 ) { 20141 $$rfields[0] .= $$rfields[$_]; 20142 } 20143 $$rfields[1] = $$rfields[$maximum_field_index]; 20144 20145 $line->set_jmax(1); 20146 $line->set_column( 0, 0 ); 20147 $line->set_column( 1, 0 ); 20148 20149 } 20150 $maximum_field_index = 1; 20151 20152 for $j ( 0 .. $maximum_line_index ) { 20153 my $line = $group_lines[$j]; 20154 my $rfields = $line->get_rfields(); 20155 for $k ( 0 .. $maximum_field_index ) { 20156 my $pad = length( $$rfields[$k] ) - $line->current_field_width($k); 20157 if ( $k == 0 ) { 20158 $pad += $group_lines[$j]->get_leading_space_count(); 20159 } 20160 20161 if ( $pad > 0 ) { $line->increase_field_width( $k, $pad ) } 20162 20163 } 20164 } 20165} 20166 20167sub get_output_line_number { 20168 20169 # the output line number reported to a caller is the number of items 20170 # written plus the number of items in the buffer 20171 my $self = shift; 20172 1 + $maximum_line_index + $file_writer_object->get_output_line_number(); 20173} 20174 20175sub write_leader_and_string { 20176 20177 my ( $leading_space_count, $str, $side_comment_length, $outdent_long_lines, 20178 $rvertical_tightness_flags ) 20179 = @_; 20180 20181 # handle outdenting of long lines: 20182 if ($outdent_long_lines) { 20183 my $excess = 20184 length($str) - 20185 $side_comment_length + 20186 $leading_space_count - 20187 $rOpts_maximum_line_length; 20188 if ( $excess > 0 ) { 20189 $leading_space_count = 0; 20190 $last_outdented_line_at = 20191 $file_writer_object->get_output_line_number(); 20192 20193 unless ($outdented_line_count) { 20194 $first_outdented_line_at = $last_outdented_line_at; 20195 } 20196 $outdented_line_count++; 20197 } 20198 } 20199 20200 # Make preliminary leading whitespace. It could get changed 20201 # later by entabbing, so we have to keep track of any changes 20202 # to the leading_space_count from here on. 20203 my $leading_string = 20204 $leading_space_count > 0 ? ( ' ' x $leading_space_count ) : ""; 20205 20206 # Unpack any recombination data; it was packed by 20207 # sub send_lines_to_vertical_aligner. Contents: 20208 # 20209 # [0] type: 1=opening 2=closing 3=opening block brace 20210 # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok 20211 # if closing: spaces of padding to use 20212 # [2] sequence number of container 20213 # [3] valid flag: do not append if this flag is false 20214 # 20215 my ( $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg, 20216 $seqno_end ); 20217 if ($rvertical_tightness_flags) { 20218 ( 20219 $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg, 20220 $seqno_end 20221 ) = @{$rvertical_tightness_flags}; 20222 } 20223 20224 $seqno_string = $seqno_end; 20225 20226 # handle any cached line .. 20227 # either append this line to it or write it out 20228 if ( length($cached_line_text) ) { 20229 20230 if ( !$cached_line_valid ) { 20231 entab_and_output( $cached_line_text, 20232 $cached_line_leading_space_count, 20233 $last_group_level_written ); 20234 } 20235 20236 # handle cached line with opening container token 20237 elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) { 20238 20239 my $gap = $leading_space_count - length($cached_line_text); 20240 20241 # handle option of just one tight opening per line: 20242 if ( $cached_line_flag == 1 ) { 20243 if ( defined($open_or_close) && $open_or_close == 1 ) { 20244 $gap = -1; 20245 } 20246 } 20247 20248 if ( $gap >= 0 ) { 20249 $leading_string = $cached_line_text . ' ' x $gap; 20250 $leading_space_count = $cached_line_leading_space_count; 20251 $seqno_string = $cached_seqno_string . ':' . $seqno_beg; 20252 } 20253 else { 20254 entab_and_output( $cached_line_text, 20255 $cached_line_leading_space_count, 20256 $last_group_level_written ); 20257 } 20258 } 20259 20260 # handle cached line to place before this closing container token 20261 else { 20262 my $test_line = $cached_line_text . ' ' x $cached_line_flag . $str; 20263 20264 if ( length($test_line) <= $rOpts_maximum_line_length ) { 20265 20266 $seqno_string = $cached_seqno_string . ':' . $seqno_beg; 20267 20268 # Patch to outdent closing tokens ending # in ');' 20269 # If we are joining a line like ');' to a previous stacked 20270 # set of closing tokens, then decide if we may outdent the 20271 # combined stack to the indentation of the ');'. Since we 20272 # should not normally outdent any of the other tokens more than 20273 # the indentation of the lines that contained them, we will 20274 # only do this if all of the corresponding opening 20275 # tokens were on the same line. This can happen with 20276 # -sot and -sct. For example, it is ok here: 20277 # __PACKAGE__->load_components( qw( 20278 # PK::Auto 20279 # Core 20280 # )); 20281 # 20282 # But, for example, we do not outdent in this example because 20283 # that would put the closing sub brace out farther than the 20284 # opening sub brace: 20285 # 20286 # perltidy -sot -sct 20287 # $c->Tk::bind( 20288 # '<Control-f>' => sub { 20289 # my ($c) = @_; 20290 # my $e = $c->XEvent; 20291 # itemsUnderArea $c; 20292 # } ); 20293 # 20294 if ( $str =~ /^\);/ && $cached_line_text =~ /^[\)\}\]\s]*$/ ) { 20295 20296 # The way to tell this is if the stacked sequence numbers 20297 # of this output line are the reverse of the stacked 20298 # sequence numbers of the previous non-blank line of 20299 # sequence numbers. So we can join if the previous 20300 # nonblank string of tokens is the mirror image. For 20301 # example if stack )}] is 13:8:6 then we are looking for a 20302 # leading stack like [{( which is 6:8:13 We only need to 20303 # check the two ends, because the intermediate tokens must 20304 # fall in order. Note on speed: having to split on colons 20305 # and eliminate multiple colons might appear to be slow, 20306 # but it's not an issue because we almost never come 20307 # through here. In a typical file we don't. 20308 $seqno_string =~ s/^:+//; 20309 $last_nonblank_seqno_string =~ s/^:+//; 20310 $seqno_string =~ s/:+/:/g; 20311 $last_nonblank_seqno_string =~ s/:+/:/g; 20312 20313 # how many spaces can we outdent? 20314 my $diff = 20315 $cached_line_leading_space_count - $leading_space_count; 20316 if ( $diff > 0 20317 && length($seqno_string) 20318 && length($last_nonblank_seqno_string) == 20319 length($seqno_string) ) 20320 { 20321 my @seqno_last = 20322 ( split ':', $last_nonblank_seqno_string ); 20323 my @seqno_now = ( split ':', $seqno_string ); 20324 if ( $seqno_now[-1] == $seqno_last[0] 20325 && $seqno_now[0] == $seqno_last[-1] ) 20326 { 20327 20328 # OK to outdent .. 20329 # for absolute safety, be sure we only remove 20330 # whitespace 20331 my $ws = substr( $test_line, 0, $diff ); 20332 if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) { 20333 20334 $test_line = substr( $test_line, $diff ); 20335 $cached_line_leading_space_count -= $diff; 20336 } 20337 20338 # shouldn't happen, but not critical: 20339 ##else { 20340 ## ERROR transferring indentation here 20341 ##} 20342 } 20343 } 20344 } 20345 20346 $str = $test_line; 20347 $leading_string = ""; 20348 $leading_space_count = $cached_line_leading_space_count; 20349 } 20350 else { 20351 entab_and_output( $cached_line_text, 20352 $cached_line_leading_space_count, 20353 $last_group_level_written ); 20354 } 20355 } 20356 } 20357 $cached_line_type = 0; 20358 $cached_line_text = ""; 20359 20360 # make the line to be written 20361 my $line = $leading_string . $str; 20362 20363 # write or cache this line 20364 if ( !$open_or_close || $side_comment_length > 0 ) { 20365 entab_and_output( $line, $leading_space_count, $group_level ); 20366 } 20367 else { 20368 $cached_line_text = $line; 20369 $cached_line_type = $open_or_close; 20370 $cached_line_flag = $tightness_flag; 20371 $cached_seqno = $seqno; 20372 $cached_line_valid = $valid; 20373 $cached_line_leading_space_count = $leading_space_count; 20374 $cached_seqno_string = $seqno_string; 20375 } 20376 20377 $last_group_level_written = $group_level; 20378 $last_side_comment_length = $side_comment_length; 20379 $extra_indent_ok = 0; 20380} 20381 20382sub entab_and_output { 20383 my ( $line, $leading_space_count, $level ) = @_; 20384 20385 # The line is currently correct if there is no tabbing (recommended!) 20386 # We may have to lop off some leading spaces and replace with tabs. 20387 if ( $leading_space_count > 0 ) { 20388 20389 # Nothing to do if no tabs 20390 if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace ) 20391 || $rOpts_indent_columns <= 0 ) 20392 { 20393 20394 # nothing to do 20395 } 20396 20397 # Handle entab option 20398 elsif ($rOpts_entab_leading_whitespace) { 20399 my $space_count = 20400 $leading_space_count % $rOpts_entab_leading_whitespace; 20401 my $tab_count = 20402 int( $leading_space_count / $rOpts_entab_leading_whitespace ); 20403 my $leading_string = "\t" x $tab_count . ' ' x $space_count; 20404 if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) { 20405 substr( $line, 0, $leading_space_count ) = $leading_string; 20406 } 20407 else { 20408 20409 # REMOVE AFTER TESTING 20410 # shouldn't happen - program error counting whitespace 20411 # we'll skip entabbing 20412 warning( 20413"Error entabbing in entab_and_output: expected count=$leading_space_count\n" 20414 ); 20415 } 20416 } 20417 20418 # Handle option of one tab per level 20419 else { 20420 my $leading_string = ( "\t" x $level ); 20421 my $space_count = 20422 $leading_space_count - $level * $rOpts_indent_columns; 20423 20424 # shouldn't happen: 20425 if ( $space_count < 0 ) { 20426 warning( 20427"Error entabbing in append_line: for level=$group_level count=$leading_space_count\n" 20428 ); 20429 $leading_string = ( ' ' x $leading_space_count ); 20430 } 20431 else { 20432 $leading_string .= ( ' ' x $space_count ); 20433 } 20434 if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) { 20435 substr( $line, 0, $leading_space_count ) = $leading_string; 20436 } 20437 else { 20438 20439 # REMOVE AFTER TESTING 20440 # shouldn't happen - program error counting whitespace 20441 # we'll skip entabbing 20442 warning( 20443"Error entabbing in entab_and_output: expected count=$leading_space_count\n" 20444 ); 20445 } 20446 } 20447 } 20448 $file_writer_object->write_code_line( $line . "\n" ); 20449 if ($seqno_string) { 20450 $last_nonblank_seqno_string = $seqno_string; 20451 } 20452} 20453 20454{ # begin get_leading_string 20455 20456 my @leading_string_cache; 20457 20458 sub get_leading_string { 20459 20460 # define the leading whitespace string for this line.. 20461 my $leading_whitespace_count = shift; 20462 20463 # Handle case of zero whitespace, which includes multi-line quotes 20464 # (which may have a finite level; this prevents tab problems) 20465 if ( $leading_whitespace_count <= 0 ) { 20466 return ""; 20467 } 20468 20469 # look for previous result 20470 elsif ( $leading_string_cache[$leading_whitespace_count] ) { 20471 return $leading_string_cache[$leading_whitespace_count]; 20472 } 20473 20474 # must compute a string for this number of spaces 20475 my $leading_string; 20476 20477 # Handle simple case of no tabs 20478 if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace ) 20479 || $rOpts_indent_columns <= 0 ) 20480 { 20481 $leading_string = ( ' ' x $leading_whitespace_count ); 20482 } 20483 20484 # Handle entab option 20485 elsif ($rOpts_entab_leading_whitespace) { 20486 my $space_count = 20487 $leading_whitespace_count % $rOpts_entab_leading_whitespace; 20488 my $tab_count = int( 20489 $leading_whitespace_count / $rOpts_entab_leading_whitespace ); 20490 $leading_string = "\t" x $tab_count . ' ' x $space_count; 20491 } 20492 20493 # Handle option of one tab per level 20494 else { 20495 $leading_string = ( "\t" x $group_level ); 20496 my $space_count = 20497 $leading_whitespace_count - $group_level * $rOpts_indent_columns; 20498 20499 # shouldn't happen: 20500 if ( $space_count < 0 ) { 20501 warning( 20502"Error in append_line: for level=$group_level count=$leading_whitespace_count\n" 20503 ); 20504 $leading_string = ( ' ' x $leading_whitespace_count ); 20505 } 20506 else { 20507 $leading_string .= ( ' ' x $space_count ); 20508 } 20509 } 20510 $leading_string_cache[$leading_whitespace_count] = $leading_string; 20511 return $leading_string; 20512 } 20513} # end get_leading_string 20514 20515sub report_anything_unusual { 20516 my $self = shift; 20517 if ( $outdented_line_count > 0 ) { 20518 write_logfile_entry( 20519 "$outdented_line_count long lines were outdented:\n"); 20520 write_logfile_entry( 20521 " First at output line $first_outdented_line_at\n"); 20522 20523 if ( $outdented_line_count > 1 ) { 20524 write_logfile_entry( 20525 " Last at output line $last_outdented_line_at\n"); 20526 } 20527 write_logfile_entry( 20528 " use -noll to prevent outdenting, -l=n to increase line length\n" 20529 ); 20530 write_logfile_entry("\n"); 20531 } 20532} 20533 20534##################################################################### 20535# 20536# the Perl::Tidy::FileWriter class writes the output file 20537# 20538##################################################################### 20539 20540package Perl::Tidy::FileWriter; 20541 20542# Maximum number of little messages; probably need not be changed. 20543use constant MAX_NAG_MESSAGES => 6; 20544 20545sub write_logfile_entry { 20546 my $self = shift; 20547 my $logger_object = $self->{_logger_object}; 20548 if ($logger_object) { 20549 $logger_object->write_logfile_entry(@_); 20550 } 20551} 20552 20553sub new { 20554 my $class = shift; 20555 my ( $line_sink_object, $rOpts, $logger_object ) = @_; 20556 20557 bless { 20558 _line_sink_object => $line_sink_object, 20559 _logger_object => $logger_object, 20560 _rOpts => $rOpts, 20561 _output_line_number => 1, 20562 _consecutive_blank_lines => 0, 20563 _consecutive_nonblank_lines => 0, 20564 _first_line_length_error => 0, 20565 _max_line_length_error => 0, 20566 _last_line_length_error => 0, 20567 _first_line_length_error_at => 0, 20568 _max_line_length_error_at => 0, 20569 _last_line_length_error_at => 0, 20570 _line_length_error_count => 0, 20571 _max_output_line_length => 0, 20572 _max_output_line_length_at => 0, 20573 }, $class; 20574} 20575 20576sub tee_on { 20577 my $self = shift; 20578 $self->{_line_sink_object}->tee_on(); 20579} 20580 20581sub tee_off { 20582 my $self = shift; 20583 $self->{_line_sink_object}->tee_off(); 20584} 20585 20586sub get_output_line_number { 20587 my $self = shift; 20588 return $self->{_output_line_number}; 20589} 20590 20591sub decrement_output_line_number { 20592 my $self = shift; 20593 $self->{_output_line_number}--; 20594} 20595 20596sub get_consecutive_nonblank_lines { 20597 my $self = shift; 20598 return $self->{_consecutive_nonblank_lines}; 20599} 20600 20601sub reset_consecutive_blank_lines { 20602 my $self = shift; 20603 $self->{_consecutive_blank_lines} = 0; 20604} 20605 20606sub want_blank_line { 20607 my $self = shift; 20608 unless ( $self->{_consecutive_blank_lines} ) { 20609 $self->write_blank_code_line(); 20610 } 20611} 20612 20613sub write_blank_code_line { 20614 my $self = shift; 20615 my $forced = shift; 20616 my $rOpts = $self->{_rOpts}; 20617 return 20618 if (!$forced 20619 && $self->{_consecutive_blank_lines} >= 20620 $rOpts->{'maximum-consecutive-blank-lines'} ); 20621 $self->{_consecutive_blank_lines}++; 20622 $self->{_consecutive_nonblank_lines} = 0; 20623 $self->write_line("\n"); 20624} 20625 20626sub write_code_line { 20627 my $self = shift; 20628 my $a = shift; 20629 20630 if ( $a =~ /^\s*$/ ) { 20631 my $rOpts = $self->{_rOpts}; 20632 return 20633 if ( $self->{_consecutive_blank_lines} >= 20634 $rOpts->{'maximum-consecutive-blank-lines'} ); 20635 $self->{_consecutive_blank_lines}++; 20636 $self->{_consecutive_nonblank_lines} = 0; 20637 } 20638 else { 20639 $self->{_consecutive_blank_lines} = 0; 20640 $self->{_consecutive_nonblank_lines}++; 20641 } 20642 $self->write_line($a); 20643} 20644 20645sub write_line { 20646 my $self = shift; 20647 my $a = shift; 20648 20649 # TODO: go through and see if the test is necessary here 20650 if ( $a =~ /\n$/ ) { $self->{_output_line_number}++; } 20651 20652 $self->{_line_sink_object}->write_line($a); 20653 20654 # This calculation of excess line length ignores any internal tabs 20655 my $rOpts = $self->{_rOpts}; 20656 my $exceed = length($a) - $rOpts->{'maximum-line-length'} - 1; 20657 if ( $a =~ /^\t+/g ) { 20658 $exceed += pos($a) * ( $rOpts->{'indent-columns'} - 1 ); 20659 } 20660 20661 # Note that we just incremented output line number to future value 20662 # so we must subtract 1 for current line number 20663 if ( length($a) > 1 + $self->{_max_output_line_length} ) { 20664 $self->{_max_output_line_length} = length($a) - 1; 20665 $self->{_max_output_line_length_at} = $self->{_output_line_number} - 1; 20666 } 20667 20668 if ( $exceed > 0 ) { 20669 my $output_line_number = $self->{_output_line_number}; 20670 $self->{_last_line_length_error} = $exceed; 20671 $self->{_last_line_length_error_at} = $output_line_number - 1; 20672 if ( $self->{_line_length_error_count} == 0 ) { 20673 $self->{_first_line_length_error} = $exceed; 20674 $self->{_first_line_length_error_at} = $output_line_number - 1; 20675 } 20676 20677 if ( 20678 $self->{_last_line_length_error} > $self->{_max_line_length_error} ) 20679 { 20680 $self->{_max_line_length_error} = $exceed; 20681 $self->{_max_line_length_error_at} = $output_line_number - 1; 20682 } 20683 20684 if ( $self->{_line_length_error_count} < MAX_NAG_MESSAGES ) { 20685 $self->write_logfile_entry( 20686 "Line length exceeded by $exceed characters\n"); 20687 } 20688 $self->{_line_length_error_count}++; 20689 } 20690 20691} 20692 20693sub report_line_length_errors { 20694 my $self = shift; 20695 my $rOpts = $self->{_rOpts}; 20696 my $line_length_error_count = $self->{_line_length_error_count}; 20697 if ( $line_length_error_count == 0 ) { 20698 $self->write_logfile_entry( 20699 "No lines exceeded $rOpts->{'maximum-line-length'} characters\n"); 20700 my $max_output_line_length = $self->{_max_output_line_length}; 20701 my $max_output_line_length_at = $self->{_max_output_line_length_at}; 20702 $self->write_logfile_entry( 20703" Maximum output line length was $max_output_line_length at line $max_output_line_length_at\n" 20704 ); 20705 20706 } 20707 else { 20708 20709 my $word = ( $line_length_error_count > 1 ) ? "s" : ""; 20710 $self->write_logfile_entry( 20711"$line_length_error_count output line$word exceeded $rOpts->{'maximum-line-length'} characters:\n" 20712 ); 20713 20714 $word = ( $line_length_error_count > 1 ) ? "First" : ""; 20715 my $first_line_length_error = $self->{_first_line_length_error}; 20716 my $first_line_length_error_at = $self->{_first_line_length_error_at}; 20717 $self->write_logfile_entry( 20718" $word at line $first_line_length_error_at by $first_line_length_error characters\n" 20719 ); 20720 20721 if ( $line_length_error_count > 1 ) { 20722 my $max_line_length_error = $self->{_max_line_length_error}; 20723 my $max_line_length_error_at = $self->{_max_line_length_error_at}; 20724 my $last_line_length_error = $self->{_last_line_length_error}; 20725 my $last_line_length_error_at = $self->{_last_line_length_error_at}; 20726 $self->write_logfile_entry( 20727" Maximum at line $max_line_length_error_at by $max_line_length_error characters\n" 20728 ); 20729 $self->write_logfile_entry( 20730" Last at line $last_line_length_error_at by $last_line_length_error characters\n" 20731 ); 20732 } 20733 } 20734} 20735 20736##################################################################### 20737# 20738# The Perl::Tidy::Debugger class shows line tokenization 20739# 20740##################################################################### 20741 20742package Perl::Tidy::Debugger; 20743 20744sub new { 20745 20746 my ( $class, $filename ) = @_; 20747 20748 bless { 20749 _debug_file => $filename, 20750 _debug_file_opened => 0, 20751 _fh => undef, 20752 }, $class; 20753} 20754 20755sub really_open_debug_file { 20756 20757 my $self = shift; 20758 my $debug_file = $self->{_debug_file}; 20759 my $fh; 20760 unless ( $fh = IO::File->new("> $debug_file") ) { 20761 warn("can't open $debug_file: $!\n"); 20762 } 20763 $self->{_debug_file_opened} = 1; 20764 $self->{_fh} = $fh; 20765 print $fh 20766 "Use -dump-token-types (-dtt) to get a list of token type codes\n"; 20767} 20768 20769sub close_debug_file { 20770 20771 my $self = shift; 20772 my $fh = $self->{_fh}; 20773 if ( $self->{_debug_file_opened} ) { 20774 20775 eval { $self->{_fh}->close() }; 20776 } 20777} 20778 20779sub write_debug_entry { 20780 20781 # This is a debug dump routine which may be modified as necessary 20782 # to dump tokens on a line-by-line basis. The output will be written 20783 # to the .DEBUG file when the -D flag is entered. 20784 my $self = shift; 20785 my $line_of_tokens = shift; 20786 20787 my $input_line = $line_of_tokens->{_line_text}; 20788 my $rtoken_type = $line_of_tokens->{_rtoken_type}; 20789 my $rtokens = $line_of_tokens->{_rtokens}; 20790 my $rlevels = $line_of_tokens->{_rlevels}; 20791 my $rslevels = $line_of_tokens->{_rslevels}; 20792 my $rblock_type = $line_of_tokens->{_rblock_type}; 20793 my $input_line_number = $line_of_tokens->{_line_number}; 20794 my $line_type = $line_of_tokens->{_line_type}; 20795 20796 my ( $j, $num ); 20797 20798 my $token_str = "$input_line_number: "; 20799 my $reconstructed_original = "$input_line_number: "; 20800 my $block_str = "$input_line_number: "; 20801 20802 #$token_str .= "$line_type: "; 20803 #$reconstructed_original .= "$line_type: "; 20804 20805 my $pattern = ""; 20806 my @next_char = ( '"', '"' ); 20807 my $i_next = 0; 20808 unless ( $self->{_debug_file_opened} ) { $self->really_open_debug_file() } 20809 my $fh = $self->{_fh}; 20810 20811 for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) { 20812 20813 # testing patterns 20814 if ( $$rtoken_type[$j] eq 'k' ) { 20815 $pattern .= $$rtokens[$j]; 20816 } 20817 else { 20818 $pattern .= $$rtoken_type[$j]; 20819 } 20820 $reconstructed_original .= $$rtokens[$j]; 20821 $block_str .= "($$rblock_type[$j])"; 20822 $num = length( $$rtokens[$j] ); 20823 my $type_str = $$rtoken_type[$j]; 20824 20825 # be sure there are no blank tokens (shouldn't happen) 20826 # This can only happen if a programming error has been made 20827 # because all valid tokens are non-blank 20828 if ( $type_str eq ' ' ) { 20829 print $fh "BLANK TOKEN on the next line\n"; 20830 $type_str = $next_char[$i_next]; 20831 $i_next = 1 - $i_next; 20832 } 20833 20834 if ( length($type_str) == 1 ) { 20835 $type_str = $type_str x $num; 20836 } 20837 $token_str .= $type_str; 20838 } 20839 20840 # Write what you want here ... 20841 # print $fh "$input_line\n"; 20842 # print $fh "$pattern\n"; 20843 print $fh "$reconstructed_original\n"; 20844 print $fh "$token_str\n"; 20845 20846 #print $fh "$block_str\n"; 20847} 20848 20849##################################################################### 20850# 20851# The Perl::Tidy::LineBuffer class supplies a 'get_line()' 20852# method for returning the next line to be parsed, as well as a 20853# 'peek_ahead()' method 20854# 20855# The input parameter is an object with a 'get_line()' method 20856# which returns the next line to be parsed 20857# 20858##################################################################### 20859 20860package Perl::Tidy::LineBuffer; 20861 20862sub new { 20863 20864 my $class = shift; 20865 my $line_source_object = shift; 20866 20867 return bless { 20868 _line_source_object => $line_source_object, 20869 _rlookahead_buffer => [], 20870 }, $class; 20871} 20872 20873sub peek_ahead { 20874 my $self = shift; 20875 my $buffer_index = shift; 20876 my $line = undef; 20877 my $line_source_object = $self->{_line_source_object}; 20878 my $rlookahead_buffer = $self->{_rlookahead_buffer}; 20879 if ( $buffer_index < scalar(@$rlookahead_buffer) ) { 20880 $line = $$rlookahead_buffer[$buffer_index]; 20881 } 20882 else { 20883 $line = $line_source_object->get_line(); 20884 push( @$rlookahead_buffer, $line ); 20885 } 20886 return $line; 20887} 20888 20889sub get_line { 20890 my $self = shift; 20891 my $line = undef; 20892 my $line_source_object = $self->{_line_source_object}; 20893 my $rlookahead_buffer = $self->{_rlookahead_buffer}; 20894 20895 if ( scalar(@$rlookahead_buffer) ) { 20896 $line = shift @$rlookahead_buffer; 20897 } 20898 else { 20899 $line = $line_source_object->get_line(); 20900 } 20901 return $line; 20902} 20903 20904######################################################################## 20905# 20906# the Perl::Tidy::Tokenizer package is essentially a filter which 20907# reads lines of perl source code from a source object and provides 20908# corresponding tokenized lines through its get_line() method. Lines 20909# flow from the source_object to the caller like this: 20910# 20911# source_object --> LineBuffer_object --> Tokenizer --> calling routine 20912# get_line() get_line() get_line() line_of_tokens 20913# 20914# The source object can be any object with a get_line() method which 20915# supplies one line (a character string) perl call. 20916# The LineBuffer object is created by the Tokenizer. 20917# The Tokenizer returns a reference to a data structure 'line_of_tokens' 20918# containing one tokenized line for each call to its get_line() method. 20919# 20920# WARNING: This is not a real class yet. Only one tokenizer my be used. 20921# 20922######################################################################## 20923 20924package Perl::Tidy::Tokenizer; 20925 20926BEGIN { 20927 20928 # Caution: these debug flags produce a lot of output 20929 # They should all be 0 except when debugging small scripts 20930 20931 use constant TOKENIZER_DEBUG_FLAG_EXPECT => 0; 20932 use constant TOKENIZER_DEBUG_FLAG_NSCAN => 0; 20933 use constant TOKENIZER_DEBUG_FLAG_QUOTE => 0; 20934 use constant TOKENIZER_DEBUG_FLAG_SCAN_ID => 0; 20935 use constant TOKENIZER_DEBUG_FLAG_TOKENIZE => 0; 20936 20937 my $debug_warning = sub { 20938 print "TOKENIZER_DEBUGGING with key $_[0]\n"; 20939 }; 20940 20941 TOKENIZER_DEBUG_FLAG_EXPECT && $debug_warning->('EXPECT'); 20942 TOKENIZER_DEBUG_FLAG_NSCAN && $debug_warning->('NSCAN'); 20943 TOKENIZER_DEBUG_FLAG_QUOTE && $debug_warning->('QUOTE'); 20944 TOKENIZER_DEBUG_FLAG_SCAN_ID && $debug_warning->('SCAN_ID'); 20945 TOKENIZER_DEBUG_FLAG_TOKENIZE && $debug_warning->('TOKENIZE'); 20946 20947} 20948 20949use Carp; 20950 20951# PACKAGE VARIABLES for for processing an entire FILE. 20952use vars qw{ 20953 $tokenizer_self 20954 20955 $last_nonblank_token 20956 $last_nonblank_type 20957 $last_nonblank_block_type 20958 $statement_type 20959 $in_attribute_list 20960 $current_package 20961 $context 20962 20963 %is_constant 20964 %is_user_function 20965 %user_function_prototype 20966 %is_block_function 20967 %is_block_list_function 20968 %saw_function_definition 20969 20970 $brace_depth 20971 $paren_depth 20972 $square_bracket_depth 20973 20974 @current_depth 20975 @total_depth 20976 $total_depth 20977 @nesting_sequence_number 20978 @current_sequence_number 20979 @paren_type 20980 @paren_semicolon_count 20981 @paren_structural_type 20982 @brace_type 20983 @brace_structural_type 20984 @brace_statement_type 20985 @brace_context 20986 @brace_package 20987 @square_bracket_type 20988 @square_bracket_structural_type 20989 @depth_array 20990 @nested_ternary_flag 20991 @starting_line_of_current_depth 20992}; 20993 20994# GLOBAL CONSTANTS for routines in this package 20995use vars qw{ 20996 %is_indirect_object_taker 20997 %is_block_operator 20998 %expecting_operator_token 20999 %expecting_operator_types 21000 %expecting_term_types 21001 %expecting_term_token 21002 %is_digraph 21003 %is_file_test_operator 21004 %is_trigraph 21005 %is_valid_token_type 21006 %is_keyword 21007 %is_code_block_token 21008 %really_want_term 21009 @opening_brace_names 21010 @closing_brace_names 21011 %is_keyword_taking_list 21012 %is_q_qq_qw_qx_qr_s_y_tr_m 21013}; 21014 21015# possible values of operator_expected() 21016use constant TERM => -1; 21017use constant UNKNOWN => 0; 21018use constant OPERATOR => 1; 21019 21020# possible values of context 21021use constant SCALAR_CONTEXT => -1; 21022use constant UNKNOWN_CONTEXT => 0; 21023use constant LIST_CONTEXT => 1; 21024 21025# Maximum number of little messages; probably need not be changed. 21026use constant MAX_NAG_MESSAGES => 6; 21027 21028{ 21029 21030 # methods to count instances 21031 my $_count = 0; 21032 sub get_count { $_count; } 21033 sub _increment_count { ++$_count } 21034 sub _decrement_count { --$_count } 21035} 21036 21037sub DESTROY { 21038 $_[0]->_decrement_count(); 21039} 21040 21041sub new { 21042 21043 my $class = shift; 21044 21045 # Note: 'tabs' and 'indent_columns' are temporary and should be 21046 # removed asap 21047 my %defaults = ( 21048 source_object => undef, 21049 debugger_object => undef, 21050 diagnostics_object => undef, 21051 logger_object => undef, 21052 starting_level => undef, 21053 indent_columns => 4, 21054 tabs => 0, 21055 entab_leading_space => undef, 21056 look_for_hash_bang => 0, 21057 trim_qw => 1, 21058 look_for_autoloader => 1, 21059 look_for_selfloader => 1, 21060 starting_line_number => 1, 21061 ); 21062 my %args = ( %defaults, @_ ); 21063 21064 # we are given an object with a get_line() method to supply source lines 21065 my $source_object = $args{source_object}; 21066 21067 # we create another object with a get_line() and peek_ahead() method 21068 my $line_buffer_object = Perl::Tidy::LineBuffer->new($source_object); 21069 21070 # Tokenizer state data is as follows: 21071 # _rhere_target_list reference to list of here-doc targets 21072 # _here_doc_target the target string for a here document 21073 # _here_quote_character the type of here-doc quoting (" ' ` or none) 21074 # to determine if interpolation is done 21075 # _quote_target character we seek if chasing a quote 21076 # _line_start_quote line where we started looking for a long quote 21077 # _in_here_doc flag indicating if we are in a here-doc 21078 # _in_pod flag set if we are in pod documentation 21079 # _in_error flag set if we saw severe error (binary in script) 21080 # _in_data flag set if we are in __DATA__ section 21081 # _in_end flag set if we are in __END__ section 21082 # _in_format flag set if we are in a format description 21083 # _in_attribute_list flag telling if we are looking for attributes 21084 # _in_quote flag telling if we are chasing a quote 21085 # _starting_level indentation level of first line 21086 # _input_tabstr string denoting one indentation level of input file 21087 # _know_input_tabstr flag indicating if we know _input_tabstr 21088 # _line_buffer_object object with get_line() method to supply source code 21089 # _diagnostics_object place to write debugging information 21090 # _unexpected_error_count error count used to limit output 21091 # _lower_case_labels_at line numbers where lower case labels seen 21092 $tokenizer_self = { 21093 _rhere_target_list => [], 21094 _in_here_doc => 0, 21095 _here_doc_target => "", 21096 _here_quote_character => "", 21097 _in_data => 0, 21098 _in_end => 0, 21099 _in_format => 0, 21100 _in_error => 0, 21101 _in_pod => 0, 21102 _in_attribute_list => 0, 21103 _in_quote => 0, 21104 _quote_target => "", 21105 _line_start_quote => -1, 21106 _starting_level => $args{starting_level}, 21107 _know_starting_level => defined( $args{starting_level} ), 21108 _tabs => $args{tabs}, 21109 _entab_leading_space => $args{entab_leading_space}, 21110 _indent_columns => $args{indent_columns}, 21111 _look_for_hash_bang => $args{look_for_hash_bang}, 21112 _trim_qw => $args{trim_qw}, 21113 _input_tabstr => "", 21114 _know_input_tabstr => -1, 21115 _last_line_number => $args{starting_line_number} - 1, 21116 _saw_perl_dash_P => 0, 21117 _saw_perl_dash_w => 0, 21118 _saw_use_strict => 0, 21119 _saw_v_string => 0, 21120 _look_for_autoloader => $args{look_for_autoloader}, 21121 _look_for_selfloader => $args{look_for_selfloader}, 21122 _saw_autoloader => 0, 21123 _saw_selfloader => 0, 21124 _saw_hash_bang => 0, 21125 _saw_end => 0, 21126 _saw_data => 0, 21127 _saw_negative_indentation => 0, 21128 _started_tokenizing => 0, 21129 _line_buffer_object => $line_buffer_object, 21130 _debugger_object => $args{debugger_object}, 21131 _diagnostics_object => $args{diagnostics_object}, 21132 _logger_object => $args{logger_object}, 21133 _unexpected_error_count => 0, 21134 _started_looking_for_here_target_at => 0, 21135 _nearly_matched_here_target_at => undef, 21136 _line_text => "", 21137 _rlower_case_labels_at => undef, 21138 }; 21139 21140 prepare_for_a_new_file(); 21141 find_starting_indentation_level(); 21142 21143 bless $tokenizer_self, $class; 21144 21145 # This is not a full class yet, so die if an attempt is made to 21146 # create more than one object. 21147 21148 if ( _increment_count() > 1 ) { 21149 confess 21150"Attempt to create more than 1 object in $class, which is not a true class yet\n"; 21151 } 21152 21153 return $tokenizer_self; 21154 21155} 21156 21157# interface to Perl::Tidy::Logger routines 21158sub warning { 21159 my $logger_object = $tokenizer_self->{_logger_object}; 21160 if ($logger_object) { 21161 $logger_object->warning(@_); 21162 } 21163} 21164 21165sub complain { 21166 my $logger_object = $tokenizer_self->{_logger_object}; 21167 if ($logger_object) { 21168 $logger_object->complain(@_); 21169 } 21170} 21171 21172sub write_logfile_entry { 21173 my $logger_object = $tokenizer_self->{_logger_object}; 21174 if ($logger_object) { 21175 $logger_object->write_logfile_entry(@_); 21176 } 21177} 21178 21179sub interrupt_logfile { 21180 my $logger_object = $tokenizer_self->{_logger_object}; 21181 if ($logger_object) { 21182 $logger_object->interrupt_logfile(); 21183 } 21184} 21185 21186sub resume_logfile { 21187 my $logger_object = $tokenizer_self->{_logger_object}; 21188 if ($logger_object) { 21189 $logger_object->resume_logfile(); 21190 } 21191} 21192 21193sub increment_brace_error { 21194 my $logger_object = $tokenizer_self->{_logger_object}; 21195 if ($logger_object) { 21196 $logger_object->increment_brace_error(); 21197 } 21198} 21199 21200sub report_definite_bug { 21201 my $logger_object = $tokenizer_self->{_logger_object}; 21202 if ($logger_object) { 21203 $logger_object->report_definite_bug(); 21204 } 21205} 21206 21207sub brace_warning { 21208 my $logger_object = $tokenizer_self->{_logger_object}; 21209 if ($logger_object) { 21210 $logger_object->brace_warning(@_); 21211 } 21212} 21213 21214sub get_saw_brace_error { 21215 my $logger_object = $tokenizer_self->{_logger_object}; 21216 if ($logger_object) { 21217 $logger_object->get_saw_brace_error(); 21218 } 21219 else { 21220 0; 21221 } 21222} 21223 21224# interface to Perl::Tidy::Diagnostics routines 21225sub write_diagnostics { 21226 if ( $tokenizer_self->{_diagnostics_object} ) { 21227 $tokenizer_self->{_diagnostics_object}->write_diagnostics(@_); 21228 } 21229} 21230 21231sub report_tokenization_errors { 21232 21233 my $self = shift; 21234 21235 my $level = get_indentation_level(); 21236 if ( $level != $tokenizer_self->{_starting_level} ) { 21237 warning("final indentation level: $level\n"); 21238 } 21239 21240 check_final_nesting_depths(); 21241 21242 if ( $tokenizer_self->{_look_for_hash_bang} 21243 && !$tokenizer_self->{_saw_hash_bang} ) 21244 { 21245 warning( 21246 "hit EOF without seeing hash-bang line; maybe don't need -x?\n"); 21247 } 21248 21249 if ( $tokenizer_self->{_in_format} ) { 21250 warning("hit EOF while in format description\n"); 21251 } 21252 21253 if ( $tokenizer_self->{_in_pod} ) { 21254 21255 # Just write log entry if this is after __END__ or __DATA__ 21256 # because this happens to often, and it is not likely to be 21257 # a parsing error. 21258 if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) { 21259 write_logfile_entry( 21260"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n" 21261 ); 21262 } 21263 21264 else { 21265 complain( 21266"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n" 21267 ); 21268 } 21269 21270 } 21271 21272 if ( $tokenizer_self->{_in_here_doc} ) { 21273 my $here_doc_target = $tokenizer_self->{_here_doc_target}; 21274 my $started_looking_for_here_target_at = 21275 $tokenizer_self->{_started_looking_for_here_target_at}; 21276 if ($here_doc_target) { 21277 warning( 21278"hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n" 21279 ); 21280 } 21281 else { 21282 warning( 21283"hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string\n" 21284 ); 21285 } 21286 my $nearly_matched_here_target_at = 21287 $tokenizer_self->{_nearly_matched_here_target_at}; 21288 if ($nearly_matched_here_target_at) { 21289 warning( 21290"NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n" 21291 ); 21292 } 21293 } 21294 21295 if ( $tokenizer_self->{_in_quote} ) { 21296 my $line_start_quote = $tokenizer_self->{_line_start_quote}; 21297 my $quote_target = $tokenizer_self->{_quote_target}; 21298 my $what = 21299 ( $tokenizer_self->{_in_attribute_list} ) 21300 ? "attribute list" 21301 : "quote/pattern"; 21302 warning( 21303"hit EOF seeking end of $what starting at line $line_start_quote ending in $quote_target\n" 21304 ); 21305 } 21306 21307 unless ( $tokenizer_self->{_saw_perl_dash_w} ) { 21308 if ( $] < 5.006 ) { 21309 write_logfile_entry("Suggest including '-w parameter'\n"); 21310 } 21311 else { 21312 write_logfile_entry("Suggest including 'use warnings;'\n"); 21313 } 21314 } 21315 21316 if ( $tokenizer_self->{_saw_perl_dash_P} ) { 21317 write_logfile_entry("Use of -P parameter for defines is discouraged\n"); 21318 } 21319 21320 unless ( $tokenizer_self->{_saw_use_strict} ) { 21321 write_logfile_entry("Suggest including 'use strict;'\n"); 21322 } 21323 21324 # it is suggested that lables have at least one upper case character 21325 # for legibility and to avoid code breakage as new keywords are introduced 21326 if ( $tokenizer_self->{_rlower_case_labels_at} ) { 21327 my @lower_case_labels_at = 21328 @{ $tokenizer_self->{_rlower_case_labels_at} }; 21329 write_logfile_entry( 21330 "Suggest using upper case characters in label(s)\n"); 21331 local $" = ')('; 21332 write_logfile_entry(" defined at line(s): (@lower_case_labels_at)\n"); 21333 } 21334} 21335 21336sub report_v_string { 21337 21338 # warn if this version can't handle v-strings 21339 my $tok = shift; 21340 unless ( $tokenizer_self->{_saw_v_string} ) { 21341 $tokenizer_self->{_saw_v_string} = $tokenizer_self->{_last_line_number}; 21342 } 21343 if ( $] < 5.006 ) { 21344 warning( 21345"Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n" 21346 ); 21347 } 21348} 21349 21350sub get_input_line_number { 21351 return $tokenizer_self->{_last_line_number}; 21352} 21353 21354# returns the next tokenized line 21355sub get_line { 21356 21357 my $self = shift; 21358 21359 # USES GLOBAL VARIABLES: $tokenizer_self, $brace_depth, 21360 # $square_bracket_depth, $paren_depth 21361 21362 my $input_line = $tokenizer_self->{_line_buffer_object}->get_line(); 21363 $tokenizer_self->{_line_text} = $input_line; 21364 21365 return undef unless ($input_line); 21366 21367 my $input_line_number = ++$tokenizer_self->{_last_line_number}; 21368 21369 # Find and remove what characters terminate this line, including any 21370 # control r 21371 my $input_line_separator = ""; 21372 if ( chomp($input_line) ) { $input_line_separator = $/ } 21373 21374 # TODO: what other characters should be included here? 21375 if ( $input_line =~ s/((\r|\035|\032)+)$// ) { 21376 $input_line_separator = $2 . $input_line_separator; 21377 } 21378 21379 # for backwards compatability we keep the line text terminated with 21380 # a newline character 21381 $input_line .= "\n"; 21382 $tokenizer_self->{_line_text} = $input_line; # update 21383 21384 # create a data structure describing this line which will be 21385 # returned to the caller. 21386 21387 # _line_type codes are: 21388 # SYSTEM - system-specific code before hash-bang line 21389 # CODE - line of perl code (including comments) 21390 # POD_START - line starting pod, such as '=head' 21391 # POD - pod documentation text 21392 # POD_END - last line of pod section, '=cut' 21393 # HERE - text of here-document 21394 # HERE_END - last line of here-doc (target word) 21395 # FORMAT - format section 21396 # FORMAT_END - last line of format section, '.' 21397 # DATA_START - __DATA__ line 21398 # DATA - unidentified text following __DATA__ 21399 # END_START - __END__ line 21400 # END - unidentified text following __END__ 21401 # ERROR - we are in big trouble, probably not a perl script 21402 21403 # Other variables: 21404 # _curly_brace_depth - depth of curly braces at start of line 21405 # _square_bracket_depth - depth of square brackets at start of line 21406 # _paren_depth - depth of parens at start of line 21407 # _starting_in_quote - this line continues a multi-line quote 21408 # (so don't trim leading blanks!) 21409 # _ending_in_quote - this line ends in a multi-line quote 21410 # (so don't trim trailing blanks!) 21411 my $line_of_tokens = { 21412 _line_type => 'EOF', 21413 _line_text => $input_line, 21414 _line_number => $input_line_number, 21415 _rtoken_type => undef, 21416 _rtokens => undef, 21417 _rlevels => undef, 21418 _rslevels => undef, 21419 _rblock_type => undef, 21420 _rcontainer_type => undef, 21421 _rcontainer_environment => undef, 21422 _rtype_sequence => undef, 21423 _rnesting_tokens => undef, 21424 _rci_levels => undef, 21425 _rnesting_blocks => undef, 21426 _python_indentation_level => -1, ## 0, 21427 _starting_in_quote => 0, # to be set by subroutine 21428 _ending_in_quote => 0, 21429 _curly_brace_depth => $brace_depth, 21430 _square_bracket_depth => $square_bracket_depth, 21431 _paren_depth => $paren_depth, 21432 _quote_character => '', 21433 }; 21434 21435 # must print line unchanged if we are in a here document 21436 if ( $tokenizer_self->{_in_here_doc} ) { 21437 21438 $line_of_tokens->{_line_type} = 'HERE'; 21439 my $here_doc_target = $tokenizer_self->{_here_doc_target}; 21440 my $here_quote_character = $tokenizer_self->{_here_quote_character}; 21441 my $candidate_target = $input_line; 21442 chomp $candidate_target; 21443 if ( $candidate_target eq $here_doc_target ) { 21444 $tokenizer_self->{_nearly_matched_here_target_at} = undef; 21445 $line_of_tokens->{_line_type} = 'HERE_END'; 21446 write_logfile_entry("Exiting HERE document $here_doc_target\n"); 21447 21448 my $rhere_target_list = $tokenizer_self->{_rhere_target_list}; 21449 if (@$rhere_target_list) { # there can be multiple here targets 21450 ( $here_doc_target, $here_quote_character ) = 21451 @{ shift @$rhere_target_list }; 21452 $tokenizer_self->{_here_doc_target} = $here_doc_target; 21453 $tokenizer_self->{_here_quote_character} = 21454 $here_quote_character; 21455 write_logfile_entry( 21456 "Entering HERE document $here_doc_target\n"); 21457 $tokenizer_self->{_nearly_matched_here_target_at} = undef; 21458 $tokenizer_self->{_started_looking_for_here_target_at} = 21459 $input_line_number; 21460 } 21461 else { 21462 $tokenizer_self->{_in_here_doc} = 0; 21463 $tokenizer_self->{_here_doc_target} = ""; 21464 $tokenizer_self->{_here_quote_character} = ""; 21465 } 21466 } 21467 21468 # check for error of extra whitespace 21469 # note for PERL6: leading whitespace is allowed 21470 else { 21471 $candidate_target =~ s/\s*$//; 21472 $candidate_target =~ s/^\s*//; 21473 if ( $candidate_target eq $here_doc_target ) { 21474 $tokenizer_self->{_nearly_matched_here_target_at} = 21475 $input_line_number; 21476 } 21477 } 21478 return $line_of_tokens; 21479 } 21480 21481 # must print line unchanged if we are in a format section 21482 elsif ( $tokenizer_self->{_in_format} ) { 21483 21484 if ( $input_line =~ /^\.[\s#]*$/ ) { 21485 write_logfile_entry("Exiting format section\n"); 21486 $tokenizer_self->{_in_format} = 0; 21487 $line_of_tokens->{_line_type} = 'FORMAT_END'; 21488 } 21489 else { 21490 $line_of_tokens->{_line_type} = 'FORMAT'; 21491 } 21492 return $line_of_tokens; 21493 } 21494 21495 # must print line unchanged if we are in pod documentation 21496 elsif ( $tokenizer_self->{_in_pod} ) { 21497 21498 $line_of_tokens->{_line_type} = 'POD'; 21499 if ( $input_line =~ /^=cut/ ) { 21500 $line_of_tokens->{_line_type} = 'POD_END'; 21501 write_logfile_entry("Exiting POD section\n"); 21502 $tokenizer_self->{_in_pod} = 0; 21503 } 21504 if ( $input_line =~ /^\#\!.*perl\b/ ) { 21505 warning( 21506 "Hash-bang in pod can cause older versions of perl to fail! \n" 21507 ); 21508 } 21509 21510 return $line_of_tokens; 21511 } 21512 21513 # must print line unchanged if we have seen a severe error (i.e., we 21514 # are seeing illegal tokens and connot continue. Syntax errors do 21515 # not pass this route). Calling routine can decide what to do, but 21516 # the default can be to just pass all lines as if they were after __END__ 21517 elsif ( $tokenizer_self->{_in_error} ) { 21518 $line_of_tokens->{_line_type} = 'ERROR'; 21519 return $line_of_tokens; 21520 } 21521 21522 # print line unchanged if we are __DATA__ section 21523 elsif ( $tokenizer_self->{_in_data} ) { 21524 21525 # ...but look for POD 21526 # Note that the _in_data and _in_end flags remain set 21527 # so that we return to that state after seeing the 21528 # end of a pod section 21529 if ( $input_line =~ /^=(?!cut)/ ) { 21530 $line_of_tokens->{_line_type} = 'POD_START'; 21531 write_logfile_entry("Entering POD section\n"); 21532 $tokenizer_self->{_in_pod} = 1; 21533 return $line_of_tokens; 21534 } 21535 else { 21536 $line_of_tokens->{_line_type} = 'DATA'; 21537 return $line_of_tokens; 21538 } 21539 } 21540 21541 # print line unchanged if we are in __END__ section 21542 elsif ( $tokenizer_self->{_in_end} ) { 21543 21544 # ...but look for POD 21545 # Note that the _in_data and _in_end flags remain set 21546 # so that we return to that state after seeing the 21547 # end of a pod section 21548 if ( $input_line =~ /^=(?!cut)/ ) { 21549 $line_of_tokens->{_line_type} = 'POD_START'; 21550 write_logfile_entry("Entering POD section\n"); 21551 $tokenizer_self->{_in_pod} = 1; 21552 return $line_of_tokens; 21553 } 21554 else { 21555 $line_of_tokens->{_line_type} = 'END'; 21556 return $line_of_tokens; 21557 } 21558 } 21559 21560 # check for a hash-bang line if we haven't seen one 21561 if ( !$tokenizer_self->{_saw_hash_bang} ) { 21562 if ( $input_line =~ /^\#\!.*perl\b/ ) { 21563 $tokenizer_self->{_saw_hash_bang} = $input_line_number; 21564 21565 # check for -w and -P flags 21566 if ( $input_line =~ /^\#\!.*perl\s.*-.*P/ ) { 21567 $tokenizer_self->{_saw_perl_dash_P} = 1; 21568 } 21569 21570 if ( $input_line =~ /^\#\!.*perl\s.*-.*w/ ) { 21571 $tokenizer_self->{_saw_perl_dash_w} = 1; 21572 } 21573 21574 if ( ( $input_line_number > 1 ) 21575 && ( !$tokenizer_self->{_look_for_hash_bang} ) ) 21576 { 21577 21578 # this is helpful for VMS systems; we may have accidentally 21579 # tokenized some DCL commands 21580 if ( $tokenizer_self->{_started_tokenizing} ) { 21581 warning( 21582"There seems to be a hash-bang after line 1; do you need to run with -x ?\n" 21583 ); 21584 } 21585 else { 21586 complain("Useless hash-bang after line 1\n"); 21587 } 21588 } 21589 21590 # Report the leading hash-bang as a system line 21591 # This will prevent -dac from deleting it 21592 else { 21593 $line_of_tokens->{_line_type} = 'SYSTEM'; 21594 return $line_of_tokens; 21595 } 21596 } 21597 } 21598 21599 # wait for a hash-bang before parsing if the user invoked us with -x 21600 if ( $tokenizer_self->{_look_for_hash_bang} 21601 && !$tokenizer_self->{_saw_hash_bang} ) 21602 { 21603 $line_of_tokens->{_line_type} = 'SYSTEM'; 21604 return $line_of_tokens; 21605 } 21606 21607 # a first line of the form ': #' will be marked as SYSTEM 21608 # since lines of this form may be used by tcsh 21609 if ( $input_line_number == 1 && $input_line =~ /^\s*\:\s*\#/ ) { 21610 $line_of_tokens->{_line_type} = 'SYSTEM'; 21611 return $line_of_tokens; 21612 } 21613 21614 # now we know that it is ok to tokenize the line... 21615 # the line tokenizer will modify any of these private variables: 21616 # _rhere_target_list 21617 # _in_data 21618 # _in_end 21619 # _in_format 21620 # _in_error 21621 # _in_pod 21622 # _in_quote 21623 my $ending_in_quote_last = $tokenizer_self->{_in_quote}; 21624 tokenize_this_line($line_of_tokens); 21625 21626 # Now finish defining the return structure and return it 21627 $line_of_tokens->{_ending_in_quote} = $tokenizer_self->{_in_quote}; 21628 21629 # handle severe error (binary data in script) 21630 if ( $tokenizer_self->{_in_error} ) { 21631 $tokenizer_self->{_in_quote} = 0; # to avoid any more messages 21632 warning("Giving up after error\n"); 21633 $line_of_tokens->{_line_type} = 'ERROR'; 21634 reset_indentation_level(0); # avoid error messages 21635 return $line_of_tokens; 21636 } 21637 21638 # handle start of pod documentation 21639 if ( $tokenizer_self->{_in_pod} ) { 21640 21641 # This gets tricky..above a __DATA__ or __END__ section, perl 21642 # accepts '=cut' as the start of pod section. But afterwards, 21643 # only pod utilities see it and they may ignore an =cut without 21644 # leading =head. In any case, this isn't good. 21645 if ( $input_line =~ /^=cut\b/ ) { 21646 if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) { 21647 complain("=cut while not in pod ignored\n"); 21648 $tokenizer_self->{_in_pod} = 0; 21649 $line_of_tokens->{_line_type} = 'POD_END'; 21650 } 21651 else { 21652 $line_of_tokens->{_line_type} = 'POD_START'; 21653 complain( 21654"=cut starts a pod section .. this can fool pod utilities.\n" 21655 ); 21656 write_logfile_entry("Entering POD section\n"); 21657 } 21658 } 21659 21660 else { 21661 $line_of_tokens->{_line_type} = 'POD_START'; 21662 write_logfile_entry("Entering POD section\n"); 21663 } 21664 21665 return $line_of_tokens; 21666 } 21667 21668 # update indentation levels for log messages 21669 if ( $input_line !~ /^\s*$/ ) { 21670 my $rlevels = $line_of_tokens->{_rlevels}; 21671 my $structural_indentation_level = $$rlevels[0]; 21672 my ( $python_indentation_level, $msg ) = 21673 find_indentation_level( $input_line, $structural_indentation_level ); 21674 if ($msg) { write_logfile_entry("$msg") } 21675 if ( $tokenizer_self->{_know_input_tabstr} == 1 ) { 21676 $line_of_tokens->{_python_indentation_level} = 21677 $python_indentation_level; 21678 } 21679 } 21680 21681 # see if this line contains here doc targets 21682 my $rhere_target_list = $tokenizer_self->{_rhere_target_list}; 21683 if (@$rhere_target_list) { 21684 21685 my ( $here_doc_target, $here_quote_character ) = 21686 @{ shift @$rhere_target_list }; 21687 $tokenizer_self->{_in_here_doc} = 1; 21688 $tokenizer_self->{_here_doc_target} = $here_doc_target; 21689 $tokenizer_self->{_here_quote_character} = $here_quote_character; 21690 write_logfile_entry("Entering HERE document $here_doc_target\n"); 21691 $tokenizer_self->{_started_looking_for_here_target_at} = 21692 $input_line_number; 21693 } 21694 21695 # NOTE: __END__ and __DATA__ statements are written unformatted 21696 # because they can theoretically contain additional characters 21697 # which are not tokenized (and cannot be read with <DATA> either!). 21698 if ( $tokenizer_self->{_in_data} ) { 21699 $line_of_tokens->{_line_type} = 'DATA_START'; 21700 write_logfile_entry("Starting __DATA__ section\n"); 21701 $tokenizer_self->{_saw_data} = 1; 21702 21703 # keep parsing after __DATA__ if use SelfLoader was seen 21704 if ( $tokenizer_self->{_saw_selfloader} ) { 21705 $tokenizer_self->{_in_data} = 0; 21706 write_logfile_entry( 21707 "SelfLoader seen, continuing; -nlsl deactivates\n"); 21708 } 21709 21710 return $line_of_tokens; 21711 } 21712 21713 elsif ( $tokenizer_self->{_in_end} ) { 21714 $line_of_tokens->{_line_type} = 'END_START'; 21715 write_logfile_entry("Starting __END__ section\n"); 21716 $tokenizer_self->{_saw_end} = 1; 21717 21718 # keep parsing after __END__ if use AutoLoader was seen 21719 if ( $tokenizer_self->{_saw_autoloader} ) { 21720 $tokenizer_self->{_in_end} = 0; 21721 write_logfile_entry( 21722 "AutoLoader seen, continuing; -nlal deactivates\n"); 21723 } 21724 return $line_of_tokens; 21725 } 21726 21727 # now, finally, we know that this line is type 'CODE' 21728 $line_of_tokens->{_line_type} = 'CODE'; 21729 21730 # remember if we have seen any real code 21731 if ( !$tokenizer_self->{_started_tokenizing} 21732 && $input_line !~ /^\s*$/ 21733 && $input_line !~ /^\s*#/ ) 21734 { 21735 $tokenizer_self->{_started_tokenizing} = 1; 21736 } 21737 21738 if ( $tokenizer_self->{_debugger_object} ) { 21739 $tokenizer_self->{_debugger_object}->write_debug_entry($line_of_tokens); 21740 } 21741 21742 # Note: if keyword 'format' occurs in this line code, it is still CODE 21743 # (keyword 'format' need not start a line) 21744 if ( $tokenizer_self->{_in_format} ) { 21745 write_logfile_entry("Entering format section\n"); 21746 } 21747 21748 if ( $tokenizer_self->{_in_quote} 21749 and ( $tokenizer_self->{_line_start_quote} < 0 ) ) 21750 { 21751 21752 #if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) { 21753 if ( 21754 ( my $quote_target = $tokenizer_self->{_quote_target} ) !~ /^\s*$/ ) 21755 { 21756 $tokenizer_self->{_line_start_quote} = $input_line_number; 21757 write_logfile_entry( 21758 "Start multi-line quote or pattern ending in $quote_target\n"); 21759 } 21760 } 21761 elsif ( ( $tokenizer_self->{_line_start_quote} >= 0 ) 21762 and !$tokenizer_self->{_in_quote} ) 21763 { 21764 $tokenizer_self->{_line_start_quote} = -1; 21765 write_logfile_entry("End of multi-line quote or pattern\n"); 21766 } 21767 21768 # we are returning a line of CODE 21769 return $line_of_tokens; 21770} 21771 21772sub find_starting_indentation_level { 21773 21774 # USES GLOBAL VARIABLES: $tokenizer_self 21775 my $starting_level = 0; 21776 my $know_input_tabstr = -1; # flag for find_indentation_level 21777 21778 # use value if given as parameter 21779 if ( $tokenizer_self->{_know_starting_level} ) { 21780 $starting_level = $tokenizer_self->{_starting_level}; 21781 } 21782 21783 # if we know there is a hash_bang line, the level must be zero 21784 elsif ( $tokenizer_self->{_look_for_hash_bang} ) { 21785 $tokenizer_self->{_know_starting_level} = 1; 21786 } 21787 21788 # otherwise figure it out from the input file 21789 else { 21790 my $line; 21791 my $i = 0; 21792 my $structural_indentation_level = -1; # flag for find_indentation_level 21793 21794 # keep looking at lines until we find a hash bang or piece of code 21795 my $msg = ""; 21796 while ( $line = 21797 $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) ) 21798 { 21799 21800 # if first line is #! then assume starting level is zero 21801 if ( $i == 1 && $line =~ /^\#\!/ ) { 21802 $starting_level = 0; 21803 last; 21804 } 21805 next if ( $line =~ /^\s*#/ ); # skip past comments 21806 next if ( $line =~ /^\s*$/ ); # skip past blank lines 21807 ( $starting_level, $msg ) = 21808 find_indentation_level( $line, $structural_indentation_level ); 21809 if ($msg) { write_logfile_entry("$msg") } 21810 last; 21811 } 21812 $msg = "Line $i implies starting-indentation-level = $starting_level\n"; 21813 21814 if ( $starting_level > 0 ) { 21815 21816 my $input_tabstr = $tokenizer_self->{_input_tabstr}; 21817 if ( $input_tabstr eq "\t" ) { 21818 $msg .= "by guessing input tabbing uses 1 tab per level\n"; 21819 } 21820 else { 21821 my $cols = length($input_tabstr); 21822 $msg .= 21823 "by guessing input tabbing uses $cols blanks per level\n"; 21824 } 21825 } 21826 write_logfile_entry("$msg"); 21827 } 21828 $tokenizer_self->{_starting_level} = $starting_level; 21829 reset_indentation_level($starting_level); 21830} 21831 21832# Find indentation level given a input line. At the same time, try to 21833# figure out the input tabbing scheme. 21834# 21835# There are two types of calls: 21836# 21837# Type 1: $structural_indentation_level < 0 21838# In this case we have to guess $input_tabstr to figure out the level. 21839# 21840# Type 2: $structural_indentation_level >= 0 21841# In this case the level of this line is known, and this routine can 21842# update the tabbing string, if still unknown, to make the level correct. 21843 21844sub find_indentation_level { 21845 my ( $line, $structural_indentation_level ) = @_; 21846 21847 # USES GLOBAL VARIABLES: $tokenizer_self 21848 my $level = 0; 21849 my $msg = ""; 21850 21851 my $know_input_tabstr = $tokenizer_self->{_know_input_tabstr}; 21852 my $input_tabstr = $tokenizer_self->{_input_tabstr}; 21853 21854 # find leading whitespace 21855 my $leading_whitespace = ( $line =~ /^(\s*)/ ) ? $1 : ""; 21856 21857 # make first guess at input tabbing scheme if necessary 21858 if ( $know_input_tabstr < 0 ) { 21859 21860 $know_input_tabstr = 0; 21861 21862 # When -et=n is used for the output formatting, we will assume that 21863 # tabs in the input formatting were also produced with -et=n. This may 21864 # not be true, but it is the best guess because it will keep leading 21865 # whitespace unchanged on repeated formatting on small pieces of code 21866 # when -et=n is used. Thanks to Sam Kington for this patch. 21867 if ( my $tabsize = $tokenizer_self->{_entab_leading_space} ) { 21868 $leading_whitespace =~ s{^ (\t*) } 21869 { " " x (length($1) * $tabsize) }xe; 21870 $input_tabstr = " " x $tokenizer_self->{_indent_columns}; 21871 } 21872 elsif ( $tokenizer_self->{_tabs} ) { 21873 $input_tabstr = "\t"; 21874 if ( length($leading_whitespace) > 0 ) { 21875 if ( $leading_whitespace !~ /\t/ ) { 21876 21877 my $cols = $tokenizer_self->{_indent_columns}; 21878 21879 if ( length($leading_whitespace) < $cols ) { 21880 $cols = length($leading_whitespace); 21881 } 21882 $input_tabstr = " " x $cols; 21883 } 21884 } 21885 } 21886 else { 21887 $input_tabstr = " " x $tokenizer_self->{_indent_columns}; 21888 21889 if ( length($leading_whitespace) > 0 ) { 21890 if ( $leading_whitespace =~ /^\t/ ) { 21891 $input_tabstr = "\t"; 21892 } 21893 } 21894 } 21895 $tokenizer_self->{_know_input_tabstr} = $know_input_tabstr; 21896 $tokenizer_self->{_input_tabstr} = $input_tabstr; 21897 } 21898 21899 # determine the input tabbing scheme if possible 21900 if ( ( $know_input_tabstr == 0 ) 21901 && ( length($leading_whitespace) > 0 ) 21902 && ( $structural_indentation_level > 0 ) ) 21903 { 21904 my $saved_input_tabstr = $input_tabstr; 21905 21906 # check for common case of one tab per indentation level 21907 if ( $leading_whitespace eq "\t" x $structural_indentation_level ) { 21908 if ( $leading_whitespace eq "\t" x $structural_indentation_level ) { 21909 $input_tabstr = "\t"; 21910 $msg = "Guessing old indentation was tab character\n"; 21911 } 21912 } 21913 21914 else { 21915 21916 # detab any tabs based on 8 blanks per tab 21917 my $entabbed = ""; 21918 if ( $leading_whitespace =~ s/^\t+/ /g ) { 21919 $entabbed = "entabbed"; 21920 } 21921 21922 # now compute tabbing from number of spaces 21923 my $columns = 21924 length($leading_whitespace) / $structural_indentation_level; 21925 if ( $columns == int $columns ) { 21926 $msg = 21927 "Guessing old indentation was $columns $entabbed spaces\n"; 21928 } 21929 else { 21930 $columns = int $columns; 21931 $msg = 21932"old indentation is unclear, using $columns $entabbed spaces\n"; 21933 } 21934 $input_tabstr = " " x $columns; 21935 } 21936 $know_input_tabstr = 1; 21937 $tokenizer_self->{_know_input_tabstr} = $know_input_tabstr; 21938 $tokenizer_self->{_input_tabstr} = $input_tabstr; 21939 21940 # see if mistakes were made 21941 if ( ( $tokenizer_self->{_starting_level} > 0 ) 21942 && !$tokenizer_self->{_know_starting_level} ) 21943 { 21944 21945 if ( $input_tabstr ne $saved_input_tabstr ) { 21946 complain( 21947"I made a bad starting level guess; rerun with a value for -sil \n" 21948 ); 21949 } 21950 } 21951 } 21952 21953 # use current guess at input tabbing to get input indentation level 21954 # 21955 # Patch to handle a common case of entabbed leading whitespace 21956 # If the leading whitespace equals 4 spaces and we also have 21957 # tabs, detab the input whitespace assuming 8 spaces per tab. 21958 if ( length($input_tabstr) == 4 ) { 21959 $leading_whitespace =~ s/^\t+/ /g; 21960 } 21961 21962 if ( ( my $len_tab = length($input_tabstr) ) > 0 ) { 21963 my $pos = 0; 21964 21965 while ( substr( $leading_whitespace, $pos, $len_tab ) eq $input_tabstr ) 21966 { 21967 $pos += $len_tab; 21968 $level++; 21969 } 21970 } 21971 return ( $level, $msg ); 21972} 21973 21974# This is a currently unused debug routine 21975sub dump_functions { 21976 21977 my $fh = *STDOUT; 21978 my ( $pkg, $sub ); 21979 foreach $pkg ( keys %is_user_function ) { 21980 print $fh "\nnon-constant subs in package $pkg\n"; 21981 21982 foreach $sub ( keys %{ $is_user_function{$pkg} } ) { 21983 my $msg = ""; 21984 if ( $is_block_list_function{$pkg}{$sub} ) { 21985 $msg = 'block_list'; 21986 } 21987 21988 if ( $is_block_function{$pkg}{$sub} ) { 21989 $msg = 'block'; 21990 } 21991 print $fh "$sub $msg\n"; 21992 } 21993 } 21994 21995 foreach $pkg ( keys %is_constant ) { 21996 print $fh "\nconstants and constant subs in package $pkg\n"; 21997 21998 foreach $sub ( keys %{ $is_constant{$pkg} } ) { 21999 print $fh "$sub\n"; 22000 } 22001 } 22002} 22003 22004sub ones_count { 22005 22006 # count number of 1's in a string of 1's and 0's 22007 # example: ones_count("010101010101") gives 6 22008 return ( my $cis = $_[0] ) =~ tr/1/0/; 22009} 22010 22011sub prepare_for_a_new_file { 22012 22013 # previous tokens needed to determine what to expect next 22014 $last_nonblank_token = ';'; # the only possible starting state which 22015 $last_nonblank_type = ';'; # will make a leading brace a code block 22016 $last_nonblank_block_type = ''; 22017 22018 # scalars for remembering statement types across multiple lines 22019 $statement_type = ''; # '' or 'use' or 'sub..' or 'case..' 22020 $in_attribute_list = 0; 22021 22022 # scalars for remembering where we are in the file 22023 $current_package = "main"; 22024 $context = UNKNOWN_CONTEXT; 22025 22026 # hashes used to remember function information 22027 %is_constant = (); # user-defined constants 22028 %is_user_function = (); # user-defined functions 22029 %user_function_prototype = (); # their prototypes 22030 %is_block_function = (); 22031 %is_block_list_function = (); 22032 %saw_function_definition = (); 22033 22034 # variables used to track depths of various containers 22035 # and report nesting errors 22036 $paren_depth = 0; 22037 $brace_depth = 0; 22038 $square_bracket_depth = 0; 22039 @current_depth[ 0 .. $#closing_brace_names ] = 22040 (0) x scalar @closing_brace_names; 22041 $total_depth = 0; 22042 @total_depth = (); 22043 @nesting_sequence_number[ 0 .. $#closing_brace_names ] = 22044 ( 0 .. $#closing_brace_names ); 22045 @current_sequence_number = (); 22046 $paren_type[$paren_depth] = ''; 22047 $paren_semicolon_count[$paren_depth] = 0; 22048 $paren_structural_type[$brace_depth] = ''; 22049 $brace_type[$brace_depth] = ';'; # identify opening brace as code block 22050 $brace_structural_type[$brace_depth] = ''; 22051 $brace_statement_type[$brace_depth] = ""; 22052 $brace_context[$brace_depth] = UNKNOWN_CONTEXT; 22053 $brace_package[$paren_depth] = $current_package; 22054 $square_bracket_type[$square_bracket_depth] = ''; 22055 $square_bracket_structural_type[$square_bracket_depth] = ''; 22056 22057 initialize_tokenizer_state(); 22058} 22059 22060{ # begin tokenize_this_line 22061 22062 use constant BRACE => 0; 22063 use constant SQUARE_BRACKET => 1; 22064 use constant PAREN => 2; 22065 use constant QUESTION_COLON => 3; 22066 22067 # TV1: scalars for processing one LINE. 22068 # Re-initialized on each entry to sub tokenize_this_line. 22069 my ( 22070 $block_type, $container_type, $expecting, 22071 $i, $i_tok, $input_line, 22072 $input_line_number, $last_nonblank_i, $max_token_index, 22073 $next_tok, $next_type, $peeked_ahead, 22074 $prototype, $rhere_target_list, $rtoken_map, 22075 $rtoken_type, $rtokens, $tok, 22076 $type, $type_sequence, $indent_flag, 22077 ); 22078 22079 # TV2: refs to ARRAYS for processing one LINE 22080 # Re-initialized on each call. 22081 my $routput_token_list = []; # stack of output token indexes 22082 my $routput_token_type = []; # token types 22083 my $routput_block_type = []; # types of code block 22084 my $routput_container_type = []; # paren types, such as if, elsif, .. 22085 my $routput_type_sequence = []; # nesting sequential number 22086 my $routput_indent_flag = []; # 22087 22088 # TV3: SCALARS for quote variables. These are initialized with a 22089 # subroutine call and continually updated as lines are processed. 22090 my ( $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth, 22091 $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, ); 22092 22093 # TV4: SCALARS for multi-line identifiers and 22094 # statements. These are initialized with a subroutine call 22095 # and continually updated as lines are processed. 22096 my ( $id_scan_state, $identifier, $want_paren, $indented_if_level ); 22097 22098 # TV5: SCALARS for tracking indentation level. 22099 # Initialized once and continually updated as lines are 22100 # processed. 22101 my ( 22102 $nesting_token_string, $nesting_type_string, 22103 $nesting_block_string, $nesting_block_flag, 22104 $nesting_list_string, $nesting_list_flag, 22105 $ci_string_in_tokenizer, $continuation_string_in_tokenizer, 22106 $in_statement_continuation, $level_in_tokenizer, 22107 $slevel_in_tokenizer, $rslevel_stack, 22108 ); 22109 22110 # TV6: SCALARS for remembering several previous 22111 # tokens. Initialized once and continually updated as 22112 # lines are processed. 22113 my ( 22114 $last_nonblank_container_type, $last_nonblank_type_sequence, 22115 $last_last_nonblank_token, $last_last_nonblank_type, 22116 $last_last_nonblank_block_type, $last_last_nonblank_container_type, 22117 $last_last_nonblank_type_sequence, $last_nonblank_prototype, 22118 ); 22119 22120 # ---------------------------------------------------------------- 22121 # beginning of tokenizer variable access and manipulation routines 22122 # ---------------------------------------------------------------- 22123 22124 sub initialize_tokenizer_state { 22125 22126 # TV1: initialized on each call 22127 # TV2: initialized on each call 22128 # TV3: 22129 $in_quote = 0; 22130 $quote_type = 'Q'; 22131 $quote_character = ""; 22132 $quote_pos = 0; 22133 $quote_depth = 0; 22134 $quoted_string_1 = ""; 22135 $quoted_string_2 = ""; 22136 $allowed_quote_modifiers = ""; 22137 22138 # TV4: 22139 $id_scan_state = ''; 22140 $identifier = ''; 22141 $want_paren = ""; 22142 $indented_if_level = 0; 22143 22144 # TV5: 22145 $nesting_token_string = ""; 22146 $nesting_type_string = ""; 22147 $nesting_block_string = '1'; # initially in a block 22148 $nesting_block_flag = 1; 22149 $nesting_list_string = '0'; # initially not in a list 22150 $nesting_list_flag = 0; # initially not in a list 22151 $ci_string_in_tokenizer = ""; 22152 $continuation_string_in_tokenizer = "0"; 22153 $in_statement_continuation = 0; 22154 $level_in_tokenizer = 0; 22155 $slevel_in_tokenizer = 0; 22156 $rslevel_stack = []; 22157 22158 # TV6: 22159 $last_nonblank_container_type = ''; 22160 $last_nonblank_type_sequence = ''; 22161 $last_last_nonblank_token = ';'; 22162 $last_last_nonblank_type = ';'; 22163 $last_last_nonblank_block_type = ''; 22164 $last_last_nonblank_container_type = ''; 22165 $last_last_nonblank_type_sequence = ''; 22166 $last_nonblank_prototype = ""; 22167 } 22168 22169 sub save_tokenizer_state { 22170 22171 my $rTV1 = [ 22172 $block_type, $container_type, $expecting, 22173 $i, $i_tok, $input_line, 22174 $input_line_number, $last_nonblank_i, $max_token_index, 22175 $next_tok, $next_type, $peeked_ahead, 22176 $prototype, $rhere_target_list, $rtoken_map, 22177 $rtoken_type, $rtokens, $tok, 22178 $type, $type_sequence, $indent_flag, 22179 ]; 22180 22181 my $rTV2 = [ 22182 $routput_token_list, $routput_token_type, 22183 $routput_block_type, $routput_container_type, 22184 $routput_type_sequence, $routput_indent_flag, 22185 ]; 22186 22187 my $rTV3 = [ 22188 $in_quote, $quote_type, 22189 $quote_character, $quote_pos, 22190 $quote_depth, $quoted_string_1, 22191 $quoted_string_2, $allowed_quote_modifiers, 22192 ]; 22193 22194 my $rTV4 = 22195 [ $id_scan_state, $identifier, $want_paren, $indented_if_level ]; 22196 22197 my $rTV5 = [ 22198 $nesting_token_string, $nesting_type_string, 22199 $nesting_block_string, $nesting_block_flag, 22200 $nesting_list_string, $nesting_list_flag, 22201 $ci_string_in_tokenizer, $continuation_string_in_tokenizer, 22202 $in_statement_continuation, $level_in_tokenizer, 22203 $slevel_in_tokenizer, $rslevel_stack, 22204 ]; 22205 22206 my $rTV6 = [ 22207 $last_nonblank_container_type, 22208 $last_nonblank_type_sequence, 22209 $last_last_nonblank_token, 22210 $last_last_nonblank_type, 22211 $last_last_nonblank_block_type, 22212 $last_last_nonblank_container_type, 22213 $last_last_nonblank_type_sequence, 22214 $last_nonblank_prototype, 22215 ]; 22216 return [ $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ]; 22217 } 22218 22219 sub restore_tokenizer_state { 22220 my ($rstate) = @_; 22221 my ( $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ) = @{$rstate}; 22222 ( 22223 $block_type, $container_type, $expecting, 22224 $i, $i_tok, $input_line, 22225 $input_line_number, $last_nonblank_i, $max_token_index, 22226 $next_tok, $next_type, $peeked_ahead, 22227 $prototype, $rhere_target_list, $rtoken_map, 22228 $rtoken_type, $rtokens, $tok, 22229 $type, $type_sequence, $indent_flag, 22230 ) = @{$rTV1}; 22231 22232 ( 22233 $routput_token_list, $routput_token_type, 22234 $routput_block_type, $routput_container_type, 22235 $routput_type_sequence, $routput_type_sequence, 22236 ) = @{$rTV2}; 22237 22238 ( 22239 $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth, 22240 $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, 22241 ) = @{$rTV3}; 22242 22243 ( $id_scan_state, $identifier, $want_paren, $indented_if_level ) = 22244 @{$rTV4}; 22245 22246 ( 22247 $nesting_token_string, $nesting_type_string, 22248 $nesting_block_string, $nesting_block_flag, 22249 $nesting_list_string, $nesting_list_flag, 22250 $ci_string_in_tokenizer, $continuation_string_in_tokenizer, 22251 $in_statement_continuation, $level_in_tokenizer, 22252 $slevel_in_tokenizer, $rslevel_stack, 22253 ) = @{$rTV5}; 22254 22255 ( 22256 $last_nonblank_container_type, 22257 $last_nonblank_type_sequence, 22258 $last_last_nonblank_token, 22259 $last_last_nonblank_type, 22260 $last_last_nonblank_block_type, 22261 $last_last_nonblank_container_type, 22262 $last_last_nonblank_type_sequence, 22263 $last_nonblank_prototype, 22264 ) = @{$rTV6}; 22265 } 22266 22267 sub get_indentation_level { 22268 22269 # patch to avoid reporting error if indented if is not terminated 22270 if ($indented_if_level) { return $level_in_tokenizer - 1 } 22271 return $level_in_tokenizer; 22272 } 22273 22274 sub reset_indentation_level { 22275 $level_in_tokenizer = $_[0]; 22276 $slevel_in_tokenizer = $_[0]; 22277 push @{$rslevel_stack}, $slevel_in_tokenizer; 22278 } 22279 22280 sub peeked_ahead { 22281 $peeked_ahead = defined( $_[0] ) ? $_[0] : $peeked_ahead; 22282 } 22283 22284 # ------------------------------------------------------------ 22285 # end of tokenizer variable access and manipulation routines 22286 # ------------------------------------------------------------ 22287 22288 # ------------------------------------------------------------ 22289 # beginning of various scanner interface routines 22290 # ------------------------------------------------------------ 22291 sub scan_replacement_text { 22292 22293 # check for here-docs in replacement text invoked by 22294 # a substitution operator with executable modifier 'e'. 22295 # 22296 # given: 22297 # $replacement_text 22298 # return: 22299 # $rht = reference to any here-doc targets 22300 my ($replacement_text) = @_; 22301 22302 # quick check 22303 return undef unless ( $replacement_text =~ /<</ ); 22304 22305 write_logfile_entry("scanning replacement text for here-doc targets\n"); 22306 22307 # save the logger object for error messages 22308 my $logger_object = $tokenizer_self->{_logger_object}; 22309 22310 # localize all package variables 22311 local ( 22312 $tokenizer_self, $last_nonblank_token, 22313 $last_nonblank_type, $last_nonblank_block_type, 22314 $statement_type, $in_attribute_list, 22315 $current_package, $context, 22316 %is_constant, %is_user_function, 22317 %user_function_prototype, %is_block_function, 22318 %is_block_list_function, %saw_function_definition, 22319 $brace_depth, $paren_depth, 22320 $square_bracket_depth, @current_depth, 22321 @total_depth, $total_depth, 22322 @nesting_sequence_number, @current_sequence_number, 22323 @paren_type, @paren_semicolon_count, 22324 @paren_structural_type, @brace_type, 22325 @brace_structural_type, @brace_statement_type, 22326 @brace_context, @brace_package, 22327 @square_bracket_type, @square_bracket_structural_type, 22328 @depth_array, @starting_line_of_current_depth, 22329 @nested_ternary_flag, 22330 ); 22331 22332 # save all lexical variables 22333 my $rstate = save_tokenizer_state(); 22334 _decrement_count(); # avoid error check for multiple tokenizers 22335 22336 # make a new tokenizer 22337 my $rOpts = {}; 22338 my $rpending_logfile_message; 22339 my $source_object = 22340 Perl::Tidy::LineSource->new( \$replacement_text, $rOpts, 22341 $rpending_logfile_message ); 22342 my $tokenizer = Perl::Tidy::Tokenizer->new( 22343 source_object => $source_object, 22344 logger_object => $logger_object, 22345 starting_line_number => $input_line_number, 22346 ); 22347 22348 # scan the replacement text 22349 1 while ( $tokenizer->get_line() ); 22350 22351 # remove any here doc targets 22352 my $rht = undef; 22353 if ( $tokenizer_self->{_in_here_doc} ) { 22354 $rht = []; 22355 push @{$rht}, 22356 [ 22357 $tokenizer_self->{_here_doc_target}, 22358 $tokenizer_self->{_here_quote_character} 22359 ]; 22360 if ( $tokenizer_self->{_rhere_target_list} ) { 22361 push @{$rht}, @{ $tokenizer_self->{_rhere_target_list} }; 22362 $tokenizer_self->{_rhere_target_list} = undef; 22363 } 22364 $tokenizer_self->{_in_here_doc} = undef; 22365 } 22366 22367 # now its safe to report errors 22368 $tokenizer->report_tokenization_errors(); 22369 22370 # restore all tokenizer lexical variables 22371 restore_tokenizer_state($rstate); 22372 22373 # return the here doc targets 22374 return $rht; 22375 } 22376 22377 sub scan_bare_identifier { 22378 ( $i, $tok, $type, $prototype ) = 22379 scan_bare_identifier_do( $input_line, $i, $tok, $type, $prototype, 22380 $rtoken_map, $max_token_index ); 22381 } 22382 22383 sub scan_identifier { 22384 ( $i, $tok, $type, $id_scan_state, $identifier ) = 22385 scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens, 22386 $max_token_index, $expecting ); 22387 } 22388 22389 sub scan_id { 22390 ( $i, $tok, $type, $id_scan_state ) = 22391 scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map, 22392 $id_scan_state, $max_token_index ); 22393 } 22394 22395 sub scan_number { 22396 my $number; 22397 ( $i, $type, $number ) = 22398 scan_number_do( $input_line, $i, $rtoken_map, $type, 22399 $max_token_index ); 22400 return $number; 22401 } 22402 22403 # a sub to warn if token found where term expected 22404 sub error_if_expecting_TERM { 22405 if ( $expecting == TERM ) { 22406 if ( $really_want_term{$last_nonblank_type} ) { 22407 unexpected( $tok, "term", $i_tok, $last_nonblank_i, $rtoken_map, 22408 $rtoken_type, $input_line ); 22409 1; 22410 } 22411 } 22412 } 22413 22414 # a sub to warn if token found where operator expected 22415 sub error_if_expecting_OPERATOR { 22416 if ( $expecting == OPERATOR ) { 22417 my $thing = defined $_[0] ? $_[0] : $tok; 22418 unexpected( $thing, "operator", $i_tok, $last_nonblank_i, 22419 $rtoken_map, $rtoken_type, $input_line ); 22420 if ( $i_tok == 0 ) { 22421 interrupt_logfile(); 22422 warning("Missing ';' above?\n"); 22423 resume_logfile(); 22424 } 22425 1; 22426 } 22427 } 22428 22429 # ------------------------------------------------------------ 22430 # end scanner interfaces 22431 # ------------------------------------------------------------ 22432 22433 my %is_for_foreach; 22434 @_ = qw(for foreach); 22435 @is_for_foreach{@_} = (1) x scalar(@_); 22436 22437 my %is_my_our; 22438 @_ = qw(my our); 22439 @is_my_our{@_} = (1) x scalar(@_); 22440 22441 # These keywords may introduce blocks after parenthesized expressions, 22442 # in the form: 22443 # keyword ( .... ) { BLOCK } 22444 # patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when' 22445 my %is_blocktype_with_paren; 22446 @_ = qw(if elsif unless while until for foreach switch case given when); 22447 @is_blocktype_with_paren{@_} = (1) x scalar(@_); 22448 22449 # ------------------------------------------------------------ 22450 # begin hash of code for handling most token types 22451 # ------------------------------------------------------------ 22452 my $tokenization_code = { 22453 22454 # no special code for these types yet, but syntax checks 22455 # could be added 22456 22457## '!' => undef, 22458## '!=' => undef, 22459## '!~' => undef, 22460## '%=' => undef, 22461## '&&=' => undef, 22462## '&=' => undef, 22463## '+=' => undef, 22464## '-=' => undef, 22465## '..' => undef, 22466## '..' => undef, 22467## '...' => undef, 22468## '.=' => undef, 22469## '<<=' => undef, 22470## '<=' => undef, 22471## '<=>' => undef, 22472## '<>' => undef, 22473## '=' => undef, 22474## '==' => undef, 22475## '=~' => undef, 22476## '>=' => undef, 22477## '>>' => undef, 22478## '>>=' => undef, 22479## '\\' => undef, 22480## '^=' => undef, 22481## '|=' => undef, 22482## '||=' => undef, 22483## '//=' => undef, 22484## '~' => undef, 22485## '~~' => undef, 22486## '!~~' => undef, 22487 22488 '>' => sub { 22489 error_if_expecting_TERM() 22490 if ( $expecting == TERM ); 22491 }, 22492 '|' => sub { 22493 error_if_expecting_TERM() 22494 if ( $expecting == TERM ); 22495 }, 22496 '$' => sub { 22497 22498 # start looking for a scalar 22499 error_if_expecting_OPERATOR("Scalar") 22500 if ( $expecting == OPERATOR ); 22501 scan_identifier(); 22502 22503 if ( $identifier eq '$^W' ) { 22504 $tokenizer_self->{_saw_perl_dash_w} = 1; 22505 } 22506 22507 # Check for indentifier in indirect object slot 22508 # (vorboard.pl, sort.t). Something like: 22509 # /^(print|printf|sort|exec|system)$/ 22510 if ( 22511 $is_indirect_object_taker{$last_nonblank_token} 22512 22513 || ( ( $last_nonblank_token eq '(' ) 22514 && $is_indirect_object_taker{ $paren_type[$paren_depth] } ) 22515 || ( $last_nonblank_type =~ /^[Uw]$/ ) # possible object 22516 ) 22517 { 22518 $type = 'Z'; 22519 } 22520 }, 22521 '(' => sub { 22522 22523 ++$paren_depth; 22524 $paren_semicolon_count[$paren_depth] = 0; 22525 if ($want_paren) { 22526 $container_type = $want_paren; 22527 $want_paren = ""; 22528 } 22529 else { 22530 $container_type = $last_nonblank_token; 22531 22532 # We can check for a syntax error here of unexpected '(', 22533 # but this is going to get messy... 22534 if ( 22535 $expecting == OPERATOR 22536 22537 # be sure this is not a method call of the form 22538 # &method(...), $method->(..), &{method}(...), 22539 # $ref[2](list) is ok & short for $ref[2]->(list) 22540 # NOTE: at present, braces in something like &{ xxx } 22541 # are not marked as a block, we might have a method call 22542 && $last_nonblank_token !~ /^([\]\}\&]|\-\>)/ 22543 22544 ) 22545 { 22546 22547 # ref: camel 3 p 703. 22548 if ( $last_last_nonblank_token eq 'do' ) { 22549 complain( 22550"do SUBROUTINE is deprecated; consider & or -> notation\n" 22551 ); 22552 } 22553 else { 22554 22555 # if this is an empty list, (), then it is not an 22556 # error; for example, we might have a constant pi and 22557 # invoke it with pi() or just pi; 22558 my ( $next_nonblank_token, $i_next ) = 22559 find_next_nonblank_token( $i, $rtokens, 22560 $max_token_index ); 22561 if ( $next_nonblank_token ne ')' ) { 22562 my $hint; 22563 error_if_expecting_OPERATOR('('); 22564 22565 if ( $last_nonblank_type eq 'C' ) { 22566 $hint = 22567 "$last_nonblank_token has a void prototype\n"; 22568 } 22569 elsif ( $last_nonblank_type eq 'i' ) { 22570 if ( $i_tok > 0 22571 && $last_nonblank_token =~ /^\$/ ) 22572 { 22573 $hint = 22574"Do you mean '$last_nonblank_token->(' ?\n"; 22575 } 22576 } 22577 if ($hint) { 22578 interrupt_logfile(); 22579 warning($hint); 22580 resume_logfile(); 22581 } 22582 } ## end if ( $next_nonblank_token... 22583 } ## end else [ if ( $last_last_nonblank_token... 22584 } ## end if ( $expecting == OPERATOR... 22585 } 22586 $paren_type[$paren_depth] = $container_type; 22587 ( $type_sequence, $indent_flag ) = 22588 increase_nesting_depth( PAREN, $$rtoken_map[$i_tok] ); 22589 22590 # propagate types down through nested parens 22591 # for example: the second paren in 'if ((' would be structural 22592 # since the first is. 22593 22594 if ( $last_nonblank_token eq '(' ) { 22595 $type = $last_nonblank_type; 22596 } 22597 22598 # We exclude parens as structural after a ',' because it 22599 # causes subtle problems with continuation indentation for 22600 # something like this, where the first 'or' will not get 22601 # indented. 22602 # 22603 # assert( 22604 # __LINE__, 22605 # ( not defined $check ) 22606 # or ref $check 22607 # or $check eq "new" 22608 # or $check eq "old", 22609 # ); 22610 # 22611 # Likewise, we exclude parens where a statement can start 22612 # because of problems with continuation indentation, like 22613 # these: 22614 # 22615 # ($firstline =~ /^#\!.*perl/) 22616 # and (print $File::Find::name, "\n") 22617 # and (return 1); 22618 # 22619 # (ref($usage_fref) =~ /CODE/) 22620 # ? &$usage_fref 22621 # : (&blast_usage, &blast_params, &blast_general_params); 22622 22623 else { 22624 $type = '{'; 22625 } 22626 22627 if ( $last_nonblank_type eq ')' ) { 22628 warning( 22629 "Syntax error? found token '$last_nonblank_type' then '('\n" 22630 ); 22631 } 22632 $paren_structural_type[$paren_depth] = $type; 22633 22634 }, 22635 ')' => sub { 22636 ( $type_sequence, $indent_flag ) = 22637 decrease_nesting_depth( PAREN, $$rtoken_map[$i_tok] ); 22638 22639 if ( $paren_structural_type[$paren_depth] eq '{' ) { 22640 $type = '}'; 22641 } 22642 22643 $container_type = $paren_type[$paren_depth]; 22644 22645 # /^(for|foreach)$/ 22646 if ( $is_for_foreach{ $paren_type[$paren_depth] } ) { 22647 my $num_sc = $paren_semicolon_count[$paren_depth]; 22648 if ( $num_sc > 0 && $num_sc != 2 ) { 22649 warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n"); 22650 } 22651 } 22652 22653 if ( $paren_depth > 0 ) { $paren_depth-- } 22654 }, 22655 ',' => sub { 22656 if ( $last_nonblank_type eq ',' ) { 22657 complain("Repeated ','s \n"); 22658 } 22659 22660 # patch for operator_expected: note if we are in the list (use.t) 22661 if ( $statement_type eq 'use' ) { $statement_type = '_use' } 22662## FIXME: need to move this elsewhere, perhaps check after a '(' 22663## elsif ($last_nonblank_token eq '(') { 22664## warning("Leading ','s illegal in some versions of perl\n"); 22665## } 22666 }, 22667 ';' => sub { 22668 $context = UNKNOWN_CONTEXT; 22669 $statement_type = ''; 22670 22671 # /^(for|foreach)$/ 22672 if ( $is_for_foreach{ $paren_type[$paren_depth] } ) 22673 { # mark ; in for loop 22674 22675 # Be careful: we do not want a semicolon such as the 22676 # following to be included: 22677 # 22678 # for (sort {strcoll($a,$b);} keys %investments) { 22679 22680 if ( $brace_depth == $depth_array[PAREN][BRACE][$paren_depth] 22681 && $square_bracket_depth == 22682 $depth_array[PAREN][SQUARE_BRACKET][$paren_depth] ) 22683 { 22684 22685 $type = 'f'; 22686 $paren_semicolon_count[$paren_depth]++; 22687 } 22688 } 22689 22690 }, 22691 '"' => sub { 22692 error_if_expecting_OPERATOR("String") 22693 if ( $expecting == OPERATOR ); 22694 $in_quote = 1; 22695 $type = 'Q'; 22696 $allowed_quote_modifiers = ""; 22697 }, 22698 "'" => sub { 22699 error_if_expecting_OPERATOR("String") 22700 if ( $expecting == OPERATOR ); 22701 $in_quote = 1; 22702 $type = 'Q'; 22703 $allowed_quote_modifiers = ""; 22704 }, 22705 '`' => sub { 22706 error_if_expecting_OPERATOR("String") 22707 if ( $expecting == OPERATOR ); 22708 $in_quote = 1; 22709 $type = 'Q'; 22710 $allowed_quote_modifiers = ""; 22711 }, 22712 '/' => sub { 22713 my $is_pattern; 22714 22715 if ( $expecting == UNKNOWN ) { # indeterminte, must guess.. 22716 my $msg; 22717 ( $is_pattern, $msg ) = 22718 guess_if_pattern_or_division( $i, $rtokens, $rtoken_map, 22719 $max_token_index ); 22720 22721 if ($msg) { 22722 write_diagnostics("DIVIDE:$msg\n"); 22723 write_logfile_entry($msg); 22724 } 22725 } 22726 else { $is_pattern = ( $expecting == TERM ) } 22727 22728 if ($is_pattern) { 22729 $in_quote = 1; 22730 $type = 'Q'; 22731 $allowed_quote_modifiers = '[cgimosxp]'; 22732 } 22733 else { # not a pattern; check for a /= token 22734 22735 if ( $$rtokens[ $i + 1 ] eq '=' ) { # form token /= 22736 $i++; 22737 $tok = '/='; 22738 $type = $tok; 22739 } 22740 22741 #DEBUG - collecting info on what tokens follow a divide 22742 # for development of guessing algorithm 22743 #if ( numerator_expected( $i, $rtokens, $max_token_index ) < 0 ) { 22744 # #write_diagnostics( "DIVIDE? $input_line\n" ); 22745 #} 22746 } 22747 }, 22748 '{' => sub { 22749 22750 # if we just saw a ')', we will label this block with 22751 # its type. We need to do this to allow sub 22752 # code_block_type to determine if this brace starts a 22753 # code block or anonymous hash. (The type of a paren 22754 # pair is the preceding token, such as 'if', 'else', 22755 # etc). 22756 $container_type = ""; 22757 22758 # ATTRS: for a '{' following an attribute list, reset 22759 # things to look like we just saw the sub name 22760 if ( $statement_type =~ /^sub/ ) { 22761 $last_nonblank_token = $statement_type; 22762 $last_nonblank_type = 'i'; 22763 $statement_type = ""; 22764 } 22765 22766 # patch for SWITCH/CASE: hide these keywords from an immediately 22767 # following opening brace 22768 elsif ( ( $statement_type eq 'case' || $statement_type eq 'when' ) 22769 && $statement_type eq $last_nonblank_token ) 22770 { 22771 $last_nonblank_token = ";"; 22772 } 22773 22774 elsif ( $last_nonblank_token eq ')' ) { 22775 $last_nonblank_token = $paren_type[ $paren_depth + 1 ]; 22776 22777 # defensive move in case of a nesting error (pbug.t) 22778 # in which this ')' had no previous '(' 22779 # this nesting error will have been caught 22780 if ( !defined($last_nonblank_token) ) { 22781 $last_nonblank_token = 'if'; 22782 } 22783 22784 # check for syntax error here; 22785 unless ( $is_blocktype_with_paren{$last_nonblank_token} ) { 22786 my $list = join( ' ', sort keys %is_blocktype_with_paren ); 22787 warning( 22788 "syntax error at ') {', didn't see one of: $list\n"); 22789 } 22790 } 22791 22792 # patch for paren-less for/foreach glitch, part 2. 22793 # see note below under 'qw' 22794 elsif ($last_nonblank_token eq 'qw' 22795 && $is_for_foreach{$want_paren} ) 22796 { 22797 $last_nonblank_token = $want_paren; 22798 if ( $last_last_nonblank_token eq $want_paren ) { 22799 warning( 22800"syntax error at '$want_paren .. {' -- missing \$ loop variable\n" 22801 ); 22802 22803 } 22804 $want_paren = ""; 22805 } 22806 22807 # now identify which of the three possible types of 22808 # curly braces we have: hash index container, anonymous 22809 # hash reference, or code block. 22810 22811 # non-structural (hash index) curly brace pair 22812 # get marked 'L' and 'R' 22813 if ( is_non_structural_brace() ) { 22814 $type = 'L'; 22815 22816 # patch for SWITCH/CASE: 22817 # allow paren-less identifier after 'when' 22818 # if the brace is preceded by a space 22819 if ( $statement_type eq 'when' 22820 && $last_nonblank_type eq 'i' 22821 && $last_last_nonblank_type eq 'k' 22822 && ( $i_tok == 0 || $rtoken_type->[ $i_tok - 1 ] eq 'b' ) ) 22823 { 22824 $type = '{'; 22825 $block_type = $statement_type; 22826 } 22827 } 22828 22829 # code and anonymous hash have the same type, '{', but are 22830 # distinguished by 'block_type', 22831 # which will be blank for an anonymous hash 22832 else { 22833 22834 $block_type = code_block_type( $i_tok, $rtokens, $rtoken_type, 22835 $max_token_index ); 22836 22837 # patch to promote bareword type to function taking block 22838 if ( $block_type 22839 && $last_nonblank_type eq 'w' 22840 && $last_nonblank_i >= 0 ) 22841 { 22842 if ( $routput_token_type->[$last_nonblank_i] eq 'w' ) { 22843 $routput_token_type->[$last_nonblank_i] = 'G'; 22844 } 22845 } 22846 22847 # patch for SWITCH/CASE: if we find a stray opening block brace 22848 # where we might accept a 'case' or 'when' block, then take it 22849 if ( $statement_type eq 'case' 22850 || $statement_type eq 'when' ) 22851 { 22852 if ( !$block_type || $block_type eq '}' ) { 22853 $block_type = $statement_type; 22854 } 22855 } 22856 } 22857 $brace_type[ ++$brace_depth ] = $block_type; 22858 $brace_package[$brace_depth] = $current_package; 22859 ( $type_sequence, $indent_flag ) = 22860 increase_nesting_depth( BRACE, $$rtoken_map[$i_tok] ); 22861 $brace_structural_type[$brace_depth] = $type; 22862 $brace_context[$brace_depth] = $context; 22863 $brace_statement_type[$brace_depth] = $statement_type; 22864 }, 22865 '}' => sub { 22866 $block_type = $brace_type[$brace_depth]; 22867 if ($block_type) { $statement_type = '' } 22868 if ( defined( $brace_package[$brace_depth] ) ) { 22869 $current_package = $brace_package[$brace_depth]; 22870 } 22871 22872 # can happen on brace error (caught elsewhere) 22873 else { 22874 } 22875 ( $type_sequence, $indent_flag ) = 22876 decrease_nesting_depth( BRACE, $$rtoken_map[$i_tok] ); 22877 22878 if ( $brace_structural_type[$brace_depth] eq 'L' ) { 22879 $type = 'R'; 22880 } 22881 22882 # propagate type information for 'do' and 'eval' blocks. 22883 # This is necessary to enable us to know if an operator 22884 # or term is expected next 22885 if ( $is_block_operator{ $brace_type[$brace_depth] } ) { 22886 $tok = $brace_type[$brace_depth]; 22887 } 22888 22889 $context = $brace_context[$brace_depth]; 22890 $statement_type = $brace_statement_type[$brace_depth]; 22891 if ( $brace_depth > 0 ) { $brace_depth--; } 22892 }, 22893 '&' => sub { # maybe sub call? start looking 22894 22895 # We have to check for sub call unless we are sure we 22896 # are expecting an operator. This example from s2p 22897 # got mistaken as a q operator in an early version: 22898 # print BODY &q(<<'EOT'); 22899 if ( $expecting != OPERATOR ) { 22900 scan_identifier(); 22901 } 22902 else { 22903 } 22904 }, 22905 '<' => sub { # angle operator or less than? 22906 22907 if ( $expecting != OPERATOR ) { 22908 ( $i, $type ) = 22909 find_angle_operator_termination( $input_line, $i, $rtoken_map, 22910 $expecting, $max_token_index ); 22911 22912 if ( $type eq '<' && $expecting == TERM ) { 22913 error_if_expecting_TERM(); 22914 interrupt_logfile(); 22915 warning("Unterminated <> operator?\n"); 22916 resume_logfile(); 22917 } 22918 } 22919 else { 22920 } 22921 }, 22922 '?' => sub { # ?: conditional or starting pattern? 22923 22924 my $is_pattern; 22925 22926 if ( $expecting == UNKNOWN ) { 22927 22928 my $msg; 22929 ( $is_pattern, $msg ) = 22930 guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map, 22931 $max_token_index ); 22932 22933 if ($msg) { write_logfile_entry($msg) } 22934 } 22935 else { $is_pattern = ( $expecting == TERM ) } 22936 22937 if ($is_pattern) { 22938 $in_quote = 1; 22939 $type = 'Q'; 22940 $allowed_quote_modifiers = '[cgimosxp]'; 22941 } 22942 else { 22943 ( $type_sequence, $indent_flag ) = 22944 increase_nesting_depth( QUESTION_COLON, 22945 $$rtoken_map[$i_tok] ); 22946 } 22947 }, 22948 '*' => sub { # typeglob, or multiply? 22949 22950 if ( $expecting == TERM ) { 22951 scan_identifier(); 22952 } 22953 else { 22954 22955 if ( $$rtokens[ $i + 1 ] eq '=' ) { 22956 $tok = '*='; 22957 $type = $tok; 22958 $i++; 22959 } 22960 elsif ( $$rtokens[ $i + 1 ] eq '*' ) { 22961 $tok = '**'; 22962 $type = $tok; 22963 $i++; 22964 if ( $$rtokens[ $i + 1 ] eq '=' ) { 22965 $tok = '**='; 22966 $type = $tok; 22967 $i++; 22968 } 22969 } 22970 } 22971 }, 22972 '.' => sub { # what kind of . ? 22973 22974 if ( $expecting != OPERATOR ) { 22975 scan_number(); 22976 if ( $type eq '.' ) { 22977 error_if_expecting_TERM() 22978 if ( $expecting == TERM ); 22979 } 22980 } 22981 else { 22982 } 22983 }, 22984 ':' => sub { 22985 22986 # if this is the first nonblank character, call it a label 22987 # since perl seems to just swallow it 22988 if ( $input_line_number == 1 && $last_nonblank_i == -1 ) { 22989 $type = 'J'; 22990 } 22991 22992 # ATTRS: check for a ':' which introduces an attribute list 22993 # (this might eventually get its own token type) 22994 elsif ( $statement_type =~ /^sub/ ) { 22995 $type = 'A'; 22996 $in_attribute_list = 1; 22997 } 22998 22999 # check for scalar attribute, such as 23000 # my $foo : shared = 1; 23001 elsif ($is_my_our{$statement_type} 23002 && $current_depth[QUESTION_COLON] == 0 ) 23003 { 23004 $type = 'A'; 23005 $in_attribute_list = 1; 23006 } 23007 23008 # otherwise, it should be part of a ?/: operator 23009 else { 23010 ( $type_sequence, $indent_flag ) = 23011 decrease_nesting_depth( QUESTION_COLON, 23012 $$rtoken_map[$i_tok] ); 23013 if ( $last_nonblank_token eq '?' ) { 23014 warning("Syntax error near ? :\n"); 23015 } 23016 } 23017 }, 23018 '+' => sub { # what kind of plus? 23019 23020 if ( $expecting == TERM ) { 23021 my $number = scan_number(); 23022 23023 # unary plus is safest assumption if not a number 23024 if ( !defined($number) ) { $type = 'p'; } 23025 } 23026 elsif ( $expecting == OPERATOR ) { 23027 } 23028 else { 23029 if ( $next_type eq 'w' ) { $type = 'p' } 23030 } 23031 }, 23032 '@' => sub { 23033 23034 error_if_expecting_OPERATOR("Array") 23035 if ( $expecting == OPERATOR ); 23036 scan_identifier(); 23037 }, 23038 '%' => sub { # hash or modulo? 23039 23040 # first guess is hash if no following blank 23041 if ( $expecting == UNKNOWN ) { 23042 if ( $next_type ne 'b' ) { $expecting = TERM } 23043 } 23044 if ( $expecting == TERM ) { 23045 scan_identifier(); 23046 } 23047 }, 23048 '[' => sub { 23049 $square_bracket_type[ ++$square_bracket_depth ] = 23050 $last_nonblank_token; 23051 ( $type_sequence, $indent_flag ) = 23052 increase_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] ); 23053 23054 # It may seem odd, but structural square brackets have 23055 # type '{' and '}'. This simplifies the indentation logic. 23056 if ( !is_non_structural_brace() ) { 23057 $type = '{'; 23058 } 23059 $square_bracket_structural_type[$square_bracket_depth] = $type; 23060 }, 23061 ']' => sub { 23062 ( $type_sequence, $indent_flag ) = 23063 decrease_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] ); 23064 23065 if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' ) 23066 { 23067 $type = '}'; 23068 } 23069 if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; } 23070 }, 23071 '-' => sub { # what kind of minus? 23072 23073 if ( ( $expecting != OPERATOR ) 23074 && $is_file_test_operator{$next_tok} ) 23075 { 23076 my ( $next_nonblank_token, $i_next ) = 23077 find_next_nonblank_token( $i + 1, $rtokens, 23078 $max_token_index ); 23079 23080 # check for a quoted word like "-w=>xx"; 23081 # it is sufficient to just check for a following '=' 23082 if ( $next_nonblank_token eq '=' ) { 23083 $type = 'm'; 23084 } 23085 else { 23086 $i++; 23087 $tok .= $next_tok; 23088 $type = 'F'; 23089 } 23090 } 23091 elsif ( $expecting == TERM ) { 23092 my $number = scan_number(); 23093 23094 # maybe part of bareword token? unary is safest 23095 if ( !defined($number) ) { $type = 'm'; } 23096 23097 } 23098 elsif ( $expecting == OPERATOR ) { 23099 } 23100 else { 23101 23102 if ( $next_type eq 'w' ) { 23103 $type = 'm'; 23104 } 23105 } 23106 }, 23107 23108 '^' => sub { 23109 23110 # check for special variables like ${^WARNING_BITS} 23111 if ( $expecting == TERM ) { 23112 23113 # FIXME: this should work but will not catch errors 23114 # because we also have to be sure that previous token is 23115 # a type character ($,@,%). 23116 if ( $last_nonblank_token eq '{' 23117 && ( $next_tok =~ /^[A-Za-z_]/ ) ) 23118 { 23119 23120 if ( $next_tok eq 'W' ) { 23121 $tokenizer_self->{_saw_perl_dash_w} = 1; 23122 } 23123 $tok = $tok . $next_tok; 23124 $i = $i + 1; 23125 $type = 'w'; 23126 } 23127 23128 else { 23129 unless ( error_if_expecting_TERM() ) { 23130 23131 # Something like this is valid but strange: 23132 # undef ^I; 23133 complain("The '^' seems unusual here\n"); 23134 } 23135 } 23136 } 23137 }, 23138 23139 '::' => sub { # probably a sub call 23140 scan_bare_identifier(); 23141 }, 23142 '<<' => sub { # maybe a here-doc? 23143 return 23144 unless ( $i < $max_token_index ) 23145 ; # here-doc not possible if end of line 23146 23147 if ( $expecting != OPERATOR ) { 23148 my ( $found_target, $here_doc_target, $here_quote_character, 23149 $saw_error ); 23150 ( 23151 $found_target, $here_doc_target, $here_quote_character, $i, 23152 $saw_error 23153 ) 23154 = find_here_doc( $expecting, $i, $rtokens, $rtoken_map, 23155 $max_token_index ); 23156 23157 if ($found_target) { 23158 push @{$rhere_target_list}, 23159 [ $here_doc_target, $here_quote_character ]; 23160 $type = 'h'; 23161 if ( length($here_doc_target) > 80 ) { 23162 my $truncated = substr( $here_doc_target, 0, 80 ); 23163 complain("Long here-target: '$truncated' ...\n"); 23164 } 23165 elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) { 23166 complain( 23167 "Unconventional here-target: '$here_doc_target'\n" 23168 ); 23169 } 23170 } 23171 elsif ( $expecting == TERM ) { 23172 unless ($saw_error) { 23173 23174 # shouldn't happen.. 23175 warning("Program bug; didn't find here doc target\n"); 23176 report_definite_bug(); 23177 } 23178 } 23179 } 23180 else { 23181 } 23182 }, 23183 '->' => sub { 23184 23185 # if -> points to a bare word, we must scan for an identifier, 23186 # otherwise something like ->y would look like the y operator 23187 scan_identifier(); 23188 }, 23189 23190 # type = 'pp' for pre-increment, '++' for post-increment 23191 '++' => sub { 23192 if ( $expecting == TERM ) { $type = 'pp' } 23193 elsif ( $expecting == UNKNOWN ) { 23194 my ( $next_nonblank_token, $i_next ) = 23195 find_next_nonblank_token( $i, $rtokens, $max_token_index ); 23196 if ( $next_nonblank_token eq '$' ) { $type = 'pp' } 23197 } 23198 }, 23199 23200 '=>' => sub { 23201 if ( $last_nonblank_type eq $tok ) { 23202 complain("Repeated '=>'s \n"); 23203 } 23204 23205 # patch for operator_expected: note if we are in the list (use.t) 23206 # TODO: make version numbers a new token type 23207 if ( $statement_type eq 'use' ) { $statement_type = '_use' } 23208 }, 23209 23210 # type = 'mm' for pre-decrement, '--' for post-decrement 23211 '--' => sub { 23212 23213 if ( $expecting == TERM ) { $type = 'mm' } 23214 elsif ( $expecting == UNKNOWN ) { 23215 my ( $next_nonblank_token, $i_next ) = 23216 find_next_nonblank_token( $i, $rtokens, $max_token_index ); 23217 if ( $next_nonblank_token eq '$' ) { $type = 'mm' } 23218 } 23219 }, 23220 23221 '&&' => sub { 23222 error_if_expecting_TERM() 23223 if ( $expecting == TERM ); 23224 }, 23225 23226 '||' => sub { 23227 error_if_expecting_TERM() 23228 if ( $expecting == TERM ); 23229 }, 23230 23231 '//' => sub { 23232 error_if_expecting_TERM() 23233 if ( $expecting == TERM ); 23234 }, 23235 }; 23236 23237 # ------------------------------------------------------------ 23238 # end hash of code for handling individual token types 23239 # ------------------------------------------------------------ 23240 23241 my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' ); 23242 23243 # These block types terminate statements and do not need a trailing 23244 # semicolon 23245 # patched for SWITCH/CASE/ 23246 my %is_zero_continuation_block_type; 23247 @_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ; 23248 if elsif else unless while until for foreach switch case given when); 23249 @is_zero_continuation_block_type{@_} = (1) x scalar(@_); 23250 23251 my %is_not_zero_continuation_block_type; 23252 @_ = qw(sort grep map do eval); 23253 @is_not_zero_continuation_block_type{@_} = (1) x scalar(@_); 23254 23255 my %is_logical_container; 23256 @_ = qw(if elsif unless while and or err not && ! || for foreach); 23257 @is_logical_container{@_} = (1) x scalar(@_); 23258 23259 my %is_binary_type; 23260 @_ = qw(|| &&); 23261 @is_binary_type{@_} = (1) x scalar(@_); 23262 23263 my %is_binary_keyword; 23264 @_ = qw(and or err eq ne cmp); 23265 @is_binary_keyword{@_} = (1) x scalar(@_); 23266 23267 # 'L' is token for opening { at hash key 23268 my %is_opening_type; 23269 @_ = qw" L { ( [ "; 23270 @is_opening_type{@_} = (1) x scalar(@_); 23271 23272 # 'R' is token for closing } at hash key 23273 my %is_closing_type; 23274 @_ = qw" R } ) ] "; 23275 @is_closing_type{@_} = (1) x scalar(@_); 23276 23277 my %is_redo_last_next_goto; 23278 @_ = qw(redo last next goto); 23279 @is_redo_last_next_goto{@_} = (1) x scalar(@_); 23280 23281 my %is_use_require; 23282 @_ = qw(use require); 23283 @is_use_require{@_} = (1) x scalar(@_); 23284 23285 my %is_sub_package; 23286 @_ = qw(sub package); 23287 @is_sub_package{@_} = (1) x scalar(@_); 23288 23289 # This hash holds the hash key in $tokenizer_self for these keywords: 23290 my %is_format_END_DATA = ( 23291 'format' => '_in_format', 23292 '__END__' => '_in_end', 23293 '__DATA__' => '_in_data', 23294 ); 23295 23296 # ref: camel 3 p 147, 23297 # but perl may accept undocumented flags 23298 # perl 5.10 adds 'p' (preserve) 23299 my %quote_modifiers = ( 23300 's' => '[cegimosxp]', 23301 'y' => '[cds]', 23302 'tr' => '[cds]', 23303 'm' => '[cgimosxp]', 23304 'qr' => '[imosxp]', 23305 'q' => "", 23306 'qq' => "", 23307 'qw' => "", 23308 'qx' => "", 23309 ); 23310 23311 # table showing how many quoted things to look for after quote operator.. 23312 # s, y, tr have 2 (pattern and replacement) 23313 # others have 1 (pattern only) 23314 my %quote_items = ( 23315 's' => 2, 23316 'y' => 2, 23317 'tr' => 2, 23318 'm' => 1, 23319 'qr' => 1, 23320 'q' => 1, 23321 'qq' => 1, 23322 'qw' => 1, 23323 'qx' => 1, 23324 ); 23325 23326 sub tokenize_this_line { 23327 23328 # This routine breaks a line of perl code into tokens which are of use in 23329 # indentation and reformatting. One of my goals has been to define tokens 23330 # such that a newline may be inserted between any pair of tokens without 23331 # changing or invalidating the program. This version comes close to this, 23332 # although there are necessarily a few exceptions which must be caught by 23333 # the formatter. Many of these involve the treatment of bare words. 23334 # 23335 # The tokens and their types are returned in arrays. See previous 23336 # routine for their names. 23337 # 23338 # See also the array "valid_token_types" in the BEGIN section for an 23339 # up-to-date list. 23340 # 23341 # To simplify things, token types are either a single character, or they 23342 # are identical to the tokens themselves. 23343 # 23344 # As a debugging aid, the -D flag creates a file containing a side-by-side 23345 # comparison of the input string and its tokenization for each line of a file. 23346 # This is an invaluable debugging aid. 23347 # 23348 # In addition to tokens, and some associated quantities, the tokenizer 23349 # also returns flags indication any special line types. These include 23350 # quotes, here_docs, formats. 23351 # 23352 # ----------------------------------------------------------------------- 23353 # 23354 # How to add NEW_TOKENS: 23355 # 23356 # New token types will undoubtedly be needed in the future both to keep up 23357 # with changes in perl and to help adapt the tokenizer to other applications. 23358 # 23359 # Here are some notes on the minimal steps. I wrote these notes while 23360 # adding the 'v' token type for v-strings, which are things like version 23361 # numbers 5.6.0, and ip addresses, and will use that as an example. ( You 23362 # can use your editor to search for the string "NEW_TOKENS" to find the 23363 # appropriate sections to change): 23364 # 23365 # *. Try to talk somebody else into doing it! If not, .. 23366 # 23367 # *. Make a backup of your current version in case things don't work out! 23368 # 23369 # *. Think of a new, unused character for the token type, and add to 23370 # the array @valid_token_types in the BEGIN section of this package. 23371 # For example, I used 'v' for v-strings. 23372 # 23373 # *. Implement coding to recognize the $type of the token in this routine. 23374 # This is the hardest part, and is best done by immitating or modifying 23375 # some of the existing coding. For example, to recognize v-strings, I 23376 # patched 'sub scan_bare_identifier' to recognize v-strings beginning with 23377 # 'v' and 'sub scan_number' to recognize v-strings without the leading 'v'. 23378 # 23379 # *. Update sub operator_expected. This update is critically important but 23380 # the coding is trivial. Look at the comments in that routine for help. 23381 # For v-strings, which should behave like numbers, I just added 'v' to the 23382 # regex used to handle numbers and strings (types 'n' and 'Q'). 23383 # 23384 # *. Implement a 'bond strength' rule in sub set_bond_strengths in 23385 # Perl::Tidy::Formatter for breaking lines around this token type. You can 23386 # skip this step and take the default at first, then adjust later to get 23387 # desired results. For adding type 'v', I looked at sub bond_strength and 23388 # saw that number type 'n' was using default strengths, so I didn't do 23389 # anything. I may tune it up someday if I don't like the way line 23390 # breaks with v-strings look. 23391 # 23392 # *. Implement a 'whitespace' rule in sub set_white_space_flag in 23393 # Perl::Tidy::Formatter. For adding type 'v', I looked at this routine 23394 # and saw that type 'n' used spaces on both sides, so I just added 'v' 23395 # to the array @spaces_both_sides. 23396 # 23397 # *. Update HtmlWriter package so that users can colorize the token as 23398 # desired. This is quite easy; see comments identified by 'NEW_TOKENS' in 23399 # that package. For v-strings, I initially chose to use a default color 23400 # equal to the default for numbers, but it might be nice to change that 23401 # eventually. 23402 # 23403 # *. Update comments in Perl::Tidy::Tokenizer::dump_token_types. 23404 # 23405 # *. Run lots and lots of debug tests. Start with special files designed 23406 # to test the new token type. Run with the -D flag to create a .DEBUG 23407 # file which shows the tokenization. When these work ok, test as many old 23408 # scripts as possible. Start with all of the '.t' files in the 'test' 23409 # directory of the distribution file. Compare .tdy output with previous 23410 # version and updated version to see the differences. Then include as 23411 # many more files as possible. My own technique has been to collect a huge 23412 # number of perl scripts (thousands!) into one directory and run perltidy 23413 # *, then run diff between the output of the previous version and the 23414 # current version. 23415 # 23416 # *. For another example, search for the smartmatch operator '~~' 23417 # with your editor to see where updates were made for it. 23418 # 23419 # ----------------------------------------------------------------------- 23420 23421 my $line_of_tokens = shift; 23422 my ($untrimmed_input_line) = $line_of_tokens->{_line_text}; 23423 23424 # patch while coding change is underway 23425 # make callers private data to allow access 23426 # $tokenizer_self = $caller_tokenizer_self; 23427 23428 # extract line number for use in error messages 23429 $input_line_number = $line_of_tokens->{_line_number}; 23430 23431 # reinitialize for multi-line quote 23432 $line_of_tokens->{_starting_in_quote} = $in_quote && $quote_type eq 'Q'; 23433 23434 # check for pod documentation 23435 if ( ( $untrimmed_input_line =~ /^=[A-Za-z_]/ ) ) { 23436 23437 # must not be in multi-line quote 23438 # and must not be in an eqn 23439 if ( !$in_quote and ( operator_expected( 'b', '=', 'b' ) == TERM ) ) 23440 { 23441 $tokenizer_self->{_in_pod} = 1; 23442 return; 23443 } 23444 } 23445 23446 $input_line = $untrimmed_input_line; 23447 23448 chomp $input_line; 23449 23450 # trim start of this line unless we are continuing a quoted line 23451 # do not trim end because we might end in a quote (test: deken4.pl) 23452 # Perl::Tidy::Formatter will delete needless trailing blanks 23453 unless ( $in_quote && ( $quote_type eq 'Q' ) ) { 23454 $input_line =~ s/^\s*//; # trim left end 23455 } 23456 23457 # update the copy of the line for use in error messages 23458 # This must be exactly what we give the pre_tokenizer 23459 $tokenizer_self->{_line_text} = $input_line; 23460 23461 # re-initialize for the main loop 23462 $routput_token_list = []; # stack of output token indexes 23463 $routput_token_type = []; # token types 23464 $routput_block_type = []; # types of code block 23465 $routput_container_type = []; # paren types, such as if, elsif, .. 23466 $routput_type_sequence = []; # nesting sequential number 23467 23468 $rhere_target_list = []; 23469 23470 $tok = $last_nonblank_token; 23471 $type = $last_nonblank_type; 23472 $prototype = $last_nonblank_prototype; 23473 $last_nonblank_i = -1; 23474 $block_type = $last_nonblank_block_type; 23475 $container_type = $last_nonblank_container_type; 23476 $type_sequence = $last_nonblank_type_sequence; 23477 $indent_flag = 0; 23478 $peeked_ahead = 0; 23479 23480 # tokenization is done in two stages.. 23481 # stage 1 is a very simple pre-tokenization 23482 my $max_tokens_wanted = 0; # this signals pre_tokenize to get all tokens 23483 23484 # a little optimization for a full-line comment 23485 if ( !$in_quote && ( $input_line =~ /^#/ ) ) { 23486 $max_tokens_wanted = 1 # no use tokenizing a comment 23487 } 23488 23489 # start by breaking the line into pre-tokens 23490 ( $rtokens, $rtoken_map, $rtoken_type ) = 23491 pre_tokenize( $input_line, $max_tokens_wanted ); 23492 23493 $max_token_index = scalar(@$rtokens) - 1; 23494 push( @$rtokens, ' ', ' ', ' ' ); # extra whitespace simplifies logic 23495 push( @$rtoken_map, 0, 0, 0 ); # shouldn't be referenced 23496 push( @$rtoken_type, 'b', 'b', 'b' ); 23497 23498 # initialize for main loop 23499 for $i ( 0 .. $max_token_index + 3 ) { 23500 $routput_token_type->[$i] = ""; 23501 $routput_block_type->[$i] = ""; 23502 $routput_container_type->[$i] = ""; 23503 $routput_type_sequence->[$i] = ""; 23504 $routput_indent_flag->[$i] = 0; 23505 } 23506 $i = -1; 23507 $i_tok = -1; 23508 23509 # ------------------------------------------------------------ 23510 # begin main tokenization loop 23511 # ------------------------------------------------------------ 23512 23513 # we are looking at each pre-token of one line and combining them 23514 # into tokens 23515 while ( ++$i <= $max_token_index ) { 23516 23517 if ($in_quote) { # continue looking for end of a quote 23518 $type = $quote_type; 23519 23520 unless ( @{$routput_token_list} ) 23521 { # initialize if continuation line 23522 push( @{$routput_token_list}, $i ); 23523 $routput_token_type->[$i] = $type; 23524 23525 } 23526 $tok = $quote_character unless ( $quote_character =~ /^\s*$/ ); 23527 23528 # scan for the end of the quote or pattern 23529 ( 23530 $i, $in_quote, $quote_character, $quote_pos, $quote_depth, 23531 $quoted_string_1, $quoted_string_2 23532 ) 23533 = do_quote( 23534 $i, $in_quote, $quote_character, 23535 $quote_pos, $quote_depth, $quoted_string_1, 23536 $quoted_string_2, $rtokens, $rtoken_map, 23537 $max_token_index 23538 ); 23539 23540 # all done if we didn't find it 23541 last if ($in_quote); 23542 23543 # save pattern and replacement text for rescanning 23544 my $qs1 = $quoted_string_1; 23545 my $qs2 = $quoted_string_2; 23546 23547 # re-initialize for next search 23548 $quote_character = ''; 23549 $quote_pos = 0; 23550 $quote_type = 'Q'; 23551 $quoted_string_1 = ""; 23552 $quoted_string_2 = ""; 23553 last if ( ++$i > $max_token_index ); 23554 23555 # look for any modifiers 23556 if ($allowed_quote_modifiers) { 23557 23558 # check for exact quote modifiers 23559 if ( $$rtokens[$i] =~ /^[A-Za-z_]/ ) { 23560 my $str = $$rtokens[$i]; 23561 my $saw_modifier_e; 23562 while ( $str =~ /\G$allowed_quote_modifiers/gc ) { 23563 my $pos = pos($str); 23564 my $char = substr( $str, $pos - 1, 1 ); 23565 $saw_modifier_e ||= ( $char eq 'e' ); 23566 } 23567 23568 # For an 'e' quote modifier we must scan the replacement 23569 # text for here-doc targets. 23570 if ($saw_modifier_e) { 23571 23572 my $rht = scan_replacement_text($qs1); 23573 23574 # Change type from 'Q' to 'h' for quotes with 23575 # here-doc targets so that the formatter (see sub 23576 # print_line_of_tokens) will not make any line 23577 # breaks after this point. 23578 if ($rht) { 23579 push @{$rhere_target_list}, @{$rht}; 23580 $type = 'h'; 23581 if ( $i_tok < 0 ) { 23582 my $ilast = $routput_token_list->[-1]; 23583 $routput_token_type->[$ilast] = $type; 23584 } 23585 } 23586 } 23587 23588 if ( defined( pos($str) ) ) { 23589 23590 # matched 23591 if ( pos($str) == length($str) ) { 23592 last if ( ++$i > $max_token_index ); 23593 } 23594 23595 # Looks like a joined quote modifier 23596 # and keyword, maybe something like 23597 # s/xxx/yyy/gefor @k=... 23598 # Example is "galgen.pl". Would have to split 23599 # the word and insert a new token in the 23600 # pre-token list. This is so rare that I haven't 23601 # done it. Will just issue a warning citation. 23602 23603 # This error might also be triggered if my quote 23604 # modifier characters are incomplete 23605 else { 23606 warning(<<EOM); 23607 23608Partial match to quote modifier $allowed_quote_modifiers at word: '$str' 23609Please put a space between quote modifiers and trailing keywords. 23610EOM 23611 23612 # print "token $$rtokens[$i]\n"; 23613 # my $num = length($str) - pos($str); 23614 # $$rtokens[$i]=substr($$rtokens[$i],pos($str),$num); 23615 # print "continuing with new token $$rtokens[$i]\n"; 23616 23617 # skipping past this token does least damage 23618 last if ( ++$i > $max_token_index ); 23619 } 23620 } 23621 else { 23622 23623 # example file: rokicki4.pl 23624 # This error might also be triggered if my quote 23625 # modifier characters are incomplete 23626 write_logfile_entry( 23627"Note: found word $str at quote modifier location\n" 23628 ); 23629 } 23630 } 23631 23632 # re-initialize 23633 $allowed_quote_modifiers = ""; 23634 } 23635 } 23636 23637 unless ( $tok =~ /^\s*$/ ) { 23638 23639 # try to catch some common errors 23640 if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) { 23641 23642 if ( $last_nonblank_token eq 'eq' ) { 23643 complain("Should 'eq' be '==' here ?\n"); 23644 } 23645 elsif ( $last_nonblank_token eq 'ne' ) { 23646 complain("Should 'ne' be '!=' here ?\n"); 23647 } 23648 } 23649 23650 $last_last_nonblank_token = $last_nonblank_token; 23651 $last_last_nonblank_type = $last_nonblank_type; 23652 $last_last_nonblank_block_type = $last_nonblank_block_type; 23653 $last_last_nonblank_container_type = 23654 $last_nonblank_container_type; 23655 $last_last_nonblank_type_sequence = 23656 $last_nonblank_type_sequence; 23657 $last_nonblank_token = $tok; 23658 $last_nonblank_type = $type; 23659 $last_nonblank_prototype = $prototype; 23660 $last_nonblank_block_type = $block_type; 23661 $last_nonblank_container_type = $container_type; 23662 $last_nonblank_type_sequence = $type_sequence; 23663 $last_nonblank_i = $i_tok; 23664 } 23665 23666 # store previous token type 23667 if ( $i_tok >= 0 ) { 23668 $routput_token_type->[$i_tok] = $type; 23669 $routput_block_type->[$i_tok] = $block_type; 23670 $routput_container_type->[$i_tok] = $container_type; 23671 $routput_type_sequence->[$i_tok] = $type_sequence; 23672 $routput_indent_flag->[$i_tok] = $indent_flag; 23673 } 23674 my $pre_tok = $$rtokens[$i]; # get the next pre-token 23675 my $pre_type = $$rtoken_type[$i]; # and type 23676 $tok = $pre_tok; 23677 $type = $pre_type; # to be modified as necessary 23678 $block_type = ""; # blank for all tokens except code block braces 23679 $container_type = ""; # blank for all tokens except some parens 23680 $type_sequence = ""; # blank for all tokens except ?/: 23681 $indent_flag = 0; 23682 $prototype = ""; # blank for all tokens except user defined subs 23683 $i_tok = $i; 23684 23685 # this pre-token will start an output token 23686 push( @{$routput_token_list}, $i_tok ); 23687 23688 # continue gathering identifier if necessary 23689 # but do not start on blanks and comments 23690 if ( $id_scan_state && $pre_type !~ /[b#]/ ) { 23691 23692 if ( $id_scan_state =~ /^(sub|package)/ ) { 23693 scan_id(); 23694 } 23695 else { 23696 scan_identifier(); 23697 } 23698 23699 last if ($id_scan_state); 23700 next if ( ( $i > 0 ) || $type ); 23701 23702 # didn't find any token; start over 23703 $type = $pre_type; 23704 $tok = $pre_tok; 23705 } 23706 23707 # handle whitespace tokens.. 23708 next if ( $type eq 'b' ); 23709 my $prev_tok = $i > 0 ? $$rtokens[ $i - 1 ] : ' '; 23710 my $prev_type = $i > 0 ? $$rtoken_type[ $i - 1 ] : 'b'; 23711 23712 # Build larger tokens where possible, since we are not in a quote. 23713 # 23714 # First try to assemble digraphs. The following tokens are 23715 # excluded and handled specially: 23716 # '/=' is excluded because the / might start a pattern. 23717 # 'x=' is excluded since it might be $x=, with $ on previous line 23718 # '**' and *= might be typeglobs of punctuation variables 23719 # I have allowed tokens starting with <, such as <=, 23720 # because I don't think these could be valid angle operators. 23721 # test file: storrs4.pl 23722 my $test_tok = $tok . $$rtokens[ $i + 1 ]; 23723 my $combine_ok = $is_digraph{$test_tok}; 23724 23725 # check for special cases which cannot be combined 23726 if ($combine_ok) { 23727 23728 # '//' must be defined_or operator if an operator is expected. 23729 # TODO: Code for other ambiguous digraphs (/=, x=, **, *=) 23730 # could be migrated here for clarity 23731 if ( $test_tok eq '//' ) { 23732 my $next_type = $$rtokens[ $i + 1 ]; 23733 my $expecting = 23734 operator_expected( $prev_type, $tok, $next_type ); 23735 $combine_ok = 0 unless ( $expecting == OPERATOR ); 23736 } 23737 } 23738 23739 if ( 23740 $combine_ok 23741 && ( $test_tok ne '/=' ) # might be pattern 23742 && ( $test_tok ne 'x=' ) # might be $x 23743 && ( $test_tok ne '**' ) # typeglob? 23744 && ( $test_tok ne '*=' ) # typeglob? 23745 ) 23746 { 23747 $tok = $test_tok; 23748 $i++; 23749 23750 # Now try to assemble trigraphs. Note that all possible 23751 # perl trigraphs can be constructed by appending a character 23752 # to a digraph. 23753 $test_tok = $tok . $$rtokens[ $i + 1 ]; 23754 23755 if ( $is_trigraph{$test_tok} ) { 23756 $tok = $test_tok; 23757 $i++; 23758 } 23759 } 23760 23761 $type = $tok; 23762 $next_tok = $$rtokens[ $i + 1 ]; 23763 $next_type = $$rtoken_type[ $i + 1 ]; 23764 23765 TOKENIZER_DEBUG_FLAG_TOKENIZE && do { 23766 local $" = ')('; 23767 my @debug_list = ( 23768 $last_nonblank_token, $tok, 23769 $next_tok, $brace_depth, 23770 $brace_type[$brace_depth], $paren_depth, 23771 $paren_type[$paren_depth] 23772 ); 23773 print "TOKENIZE:(@debug_list)\n"; 23774 }; 23775 23776 # turn off attribute list on first non-blank, non-bareword 23777 if ( $pre_type ne 'w' ) { $in_attribute_list = 0 } 23778 23779 ############################################################### 23780 # We have the next token, $tok. 23781 # Now we have to examine this token and decide what it is 23782 # and define its $type 23783 # 23784 # section 1: bare words 23785 ############################################################### 23786 23787 if ( $pre_type eq 'w' ) { 23788 $expecting = operator_expected( $prev_type, $tok, $next_type ); 23789 my ( $next_nonblank_token, $i_next ) = 23790 find_next_nonblank_token( $i, $rtokens, $max_token_index ); 23791 23792 # ATTRS: handle sub and variable attributes 23793 if ($in_attribute_list) { 23794 23795 # treat bare word followed by open paren like qw( 23796 if ( $next_nonblank_token eq '(' ) { 23797 $in_quote = $quote_items{'q'}; 23798 $allowed_quote_modifiers = $quote_modifiers{'q'}; 23799 $type = 'q'; 23800 $quote_type = 'q'; 23801 next; 23802 } 23803 23804 # handle bareword not followed by open paren 23805 else { 23806 $type = 'w'; 23807 next; 23808 } 23809 } 23810 23811 # quote a word followed by => operator 23812 if ( $next_nonblank_token eq '=' ) { 23813 23814 if ( $$rtokens[ $i_next + 1 ] eq '>' ) { 23815 if ( $is_constant{$current_package}{$tok} ) { 23816 $type = 'C'; 23817 } 23818 elsif ( $is_user_function{$current_package}{$tok} ) { 23819 $type = 'U'; 23820 $prototype = 23821 $user_function_prototype{$current_package}{$tok}; 23822 } 23823 elsif ( $tok =~ /^v\d+$/ ) { 23824 $type = 'v'; 23825 report_v_string($tok); 23826 } 23827 else { $type = 'w' } 23828 23829 next; 23830 } 23831 } 23832 23833 # quote a bare word within braces..like xxx->{s}; note that we 23834 # must be sure this is not a structural brace, to avoid 23835 # mistaking {s} in the following for a quoted bare word: 23836 # for(@[){s}bla}BLA} 23837 # Also treat q in something like var{-q} as a bare word, not qoute operator 23838 ##if ( ( $last_nonblank_type eq 'L' ) 23839 ## && ( $next_nonblank_token eq '}' ) ) 23840 if ( 23841 $next_nonblank_token eq '}' 23842 && ( 23843 $last_nonblank_type eq 'L' 23844 || ( $last_nonblank_type eq 'm' 23845 && $last_last_nonblank_type eq 'L' ) 23846 ) 23847 ) 23848 { 23849 $type = 'w'; 23850 next; 23851 } 23852 23853 # a bare word immediately followed by :: is not a keyword; 23854 # use $tok_kw when testing for keywords to avoid a mistake 23855 my $tok_kw = $tok; 23856 if ( $$rtokens[ $i + 1 ] eq ':' && $$rtokens[ $i + 2 ] eq ':' ) 23857 { 23858 $tok_kw .= '::'; 23859 } 23860 23861 # handle operator x (now we know it isn't $x=) 23862 if ( ( $tok =~ /^x\d*$/ ) && ( $expecting == OPERATOR ) ) { 23863 if ( $tok eq 'x' ) { 23864 23865 if ( $$rtokens[ $i + 1 ] eq '=' ) { # x= 23866 $tok = 'x='; 23867 $type = $tok; 23868 $i++; 23869 } 23870 else { 23871 $type = 'x'; 23872 } 23873 } 23874 23875 # FIXME: Patch: mark something like x4 as an integer for now 23876 # It gets fixed downstream. This is easier than 23877 # splitting the pretoken. 23878 else { 23879 $type = 'n'; 23880 } 23881 } 23882 23883 elsif ( ( $tok eq 'strict' ) 23884 and ( $last_nonblank_token eq 'use' ) ) 23885 { 23886 $tokenizer_self->{_saw_use_strict} = 1; 23887 scan_bare_identifier(); 23888 } 23889 23890 elsif ( ( $tok eq 'warnings' ) 23891 and ( $last_nonblank_token eq 'use' ) ) 23892 { 23893 $tokenizer_self->{_saw_perl_dash_w} = 1; 23894 23895 # scan as identifier, so that we pick up something like: 23896 # use warnings::register 23897 scan_bare_identifier(); 23898 } 23899 23900 elsif ( 23901 $tok eq 'AutoLoader' 23902 && $tokenizer_self->{_look_for_autoloader} 23903 && ( 23904 $last_nonblank_token eq 'use' 23905 23906 # these regexes are from AutoSplit.pm, which we want 23907 # to mimic 23908 || $input_line =~ /^\s*(use|require)\s+AutoLoader\b/ 23909 || $input_line =~ /\bISA\s*=.*\bAutoLoader\b/ 23910 ) 23911 ) 23912 { 23913 write_logfile_entry("AutoLoader seen, -nlal deactivates\n"); 23914 $tokenizer_self->{_saw_autoloader} = 1; 23915 $tokenizer_self->{_look_for_autoloader} = 0; 23916 scan_bare_identifier(); 23917 } 23918 23919 elsif ( 23920 $tok eq 'SelfLoader' 23921 && $tokenizer_self->{_look_for_selfloader} 23922 && ( $last_nonblank_token eq 'use' 23923 || $input_line =~ /^\s*(use|require)\s+SelfLoader\b/ 23924 || $input_line =~ /\bISA\s*=.*\bSelfLoader\b/ ) 23925 ) 23926 { 23927 write_logfile_entry("SelfLoader seen, -nlsl deactivates\n"); 23928 $tokenizer_self->{_saw_selfloader} = 1; 23929 $tokenizer_self->{_look_for_selfloader} = 0; 23930 scan_bare_identifier(); 23931 } 23932 23933 elsif ( ( $tok eq 'constant' ) 23934 and ( $last_nonblank_token eq 'use' ) ) 23935 { 23936 scan_bare_identifier(); 23937 my ( $next_nonblank_token, $i_next ) = 23938 find_next_nonblank_token( $i, $rtokens, 23939 $max_token_index ); 23940 23941 if ($next_nonblank_token) { 23942 23943 if ( $is_keyword{$next_nonblank_token} ) { 23944 warning( 23945"Attempting to define constant '$next_nonblank_token' which is a perl keyword\n" 23946 ); 23947 } 23948 23949 # FIXME: could check for error in which next token is 23950 # not a word (number, punctuation, ..) 23951 else { 23952 $is_constant{$current_package} 23953 {$next_nonblank_token} = 1; 23954 } 23955 } 23956 } 23957 23958 # various quote operators 23959 elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) { 23960 if ( $expecting == OPERATOR ) { 23961 23962 # patch for paren-less for/foreach glitch, part 1 23963 # perl will accept this construct as valid: 23964 # 23965 # foreach my $key qw\Uno Due Tres Quadro\ { 23966 # print "Set $key\n"; 23967 # } 23968 unless ( $tok eq 'qw' && $is_for_foreach{$want_paren} ) 23969 { 23970 error_if_expecting_OPERATOR(); 23971 } 23972 } 23973 $in_quote = $quote_items{$tok}; 23974 $allowed_quote_modifiers = $quote_modifiers{$tok}; 23975 23976 # All quote types are 'Q' except possibly qw quotes. 23977 # qw quotes are special in that they may generally be trimmed 23978 # of leading and trailing whitespace. So they are given a 23979 # separate type, 'q', unless requested otherwise. 23980 $type = 23981 ( $tok eq 'qw' && $tokenizer_self->{_trim_qw} ) 23982 ? 'q' 23983 : 'Q'; 23984 $quote_type = $type; 23985 } 23986 23987 # check for a statement label 23988 elsif ( 23989 ( $next_nonblank_token eq ':' ) 23990 && ( $$rtokens[ $i_next + 1 ] ne ':' ) 23991 && ( $i_next <= $max_token_index ) # colon on same line 23992 && label_ok() 23993 ) 23994 { 23995 if ( $tok !~ /[A-Z]/ ) { 23996 push @{ $tokenizer_self->{_rlower_case_labels_at} }, 23997 $input_line_number; 23998 } 23999 $type = 'J'; 24000 $tok .= ':'; 24001 $i = $i_next; 24002 next; 24003 } 24004 24005 # 'sub' || 'package' 24006 elsif ( $is_sub_package{$tok_kw} ) { 24007 error_if_expecting_OPERATOR() 24008 if ( $expecting == OPERATOR ); 24009 scan_id(); 24010 } 24011 24012 # Note on token types for format, __DATA__, __END__: 24013 # It simplifies things to give these type ';', so that when we 24014 # start rescanning we will be expecting a token of type TERM. 24015 # We will switch to type 'k' before outputting the tokens. 24016 elsif ( $is_format_END_DATA{$tok_kw} ) { 24017 $type = ';'; # make tokenizer look for TERM next 24018 $tokenizer_self->{ $is_format_END_DATA{$tok_kw} } = 1; 24019 last; 24020 } 24021 24022 elsif ( $is_keyword{$tok_kw} ) { 24023 $type = 'k'; 24024 24025 # Since for and foreach may not be followed immediately 24026 # by an opening paren, we have to remember which keyword 24027 # is associated with the next '(' 24028 if ( $is_for_foreach{$tok} ) { 24029 if ( new_statement_ok() ) { 24030 $want_paren = $tok; 24031 } 24032 } 24033 24034 # recognize 'use' statements, which are special 24035 elsif ( $is_use_require{$tok} ) { 24036 $statement_type = $tok; 24037 error_if_expecting_OPERATOR() 24038 if ( $expecting == OPERATOR ); 24039 } 24040 24041 # remember my and our to check for trailing ": shared" 24042 elsif ( $is_my_our{$tok} ) { 24043 $statement_type = $tok; 24044 } 24045 24046 # Check for misplaced 'elsif' and 'else', but allow isolated 24047 # else or elsif blocks to be formatted. This is indicated 24048 # by a last noblank token of ';' 24049 elsif ( $tok eq 'elsif' ) { 24050 if ( $last_nonblank_token ne ';' 24051 && $last_nonblank_block_type !~ 24052 /^(if|elsif|unless)$/ ) 24053 { 24054 warning( 24055"expecting '$tok' to follow one of 'if|elsif|unless'\n" 24056 ); 24057 } 24058 } 24059 elsif ( $tok eq 'else' ) { 24060 24061 # patched for SWITCH/CASE 24062 if ( $last_nonblank_token ne ';' 24063 && $last_nonblank_block_type !~ 24064 /^(if|elsif|unless|case|when)$/ ) 24065 { 24066 warning( 24067"expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n" 24068 ); 24069 } 24070 } 24071 elsif ( $tok eq 'continue' ) { 24072 if ( $last_nonblank_token ne ';' 24073 && $last_nonblank_block_type !~ 24074 /(^(\{|\}|;|while|until|for|foreach)|:$)/ ) 24075 { 24076 24077 # note: ';' '{' and '}' in list above 24078 # because continues can follow bare blocks; 24079 # ':' is labeled block 24080 # 24081 ############################################ 24082 # NOTE: This check has been deactivated because 24083 # continue has an alternative usage for given/when 24084 # blocks in perl 5.10 24085 ## warning("'$tok' should follow a block\n"); 24086 ############################################ 24087 } 24088 } 24089 24090 # patch for SWITCH/CASE if 'case' and 'when are 24091 # treated as keywords. 24092 elsif ( $tok eq 'when' || $tok eq 'case' ) { 24093 $statement_type = $tok; # next '{' is block 24094 } 24095 24096 # indent trailing if/unless/while/until 24097 # outdenting will be handled by later indentation loop 24098 if ( $tok =~ /^(if|unless|while|until)$/ 24099 && $next_nonblank_token ne '(' ) 24100 { 24101 $indent_flag = 1; 24102 } 24103 } 24104 24105 # check for inline label following 24106 # /^(redo|last|next|goto)$/ 24107 elsif (( $last_nonblank_type eq 'k' ) 24108 && ( $is_redo_last_next_goto{$last_nonblank_token} ) ) 24109 { 24110 $type = 'j'; 24111 next; 24112 } 24113 24114 # something else -- 24115 else { 24116 24117 scan_bare_identifier(); 24118 if ( $type eq 'w' ) { 24119 24120 if ( $expecting == OPERATOR ) { 24121 24122 # don't complain about possible indirect object 24123 # notation. 24124 # For example: 24125 # package main; 24126 # sub new($) { ... } 24127 # $b = new A::; # calls A::new 24128 # $c = new A; # same thing but suspicious 24129 # This will call A::new but we have a 'new' in 24130 # main:: which looks like a constant. 24131 # 24132 if ( $last_nonblank_type eq 'C' ) { 24133 if ( $tok !~ /::$/ ) { 24134 complain(<<EOM); 24135Expecting operator after '$last_nonblank_token' but found bare word '$tok' 24136 Maybe indirectet object notation? 24137EOM 24138 } 24139 } 24140 else { 24141 error_if_expecting_OPERATOR("bareword"); 24142 } 24143 } 24144 24145 # mark bare words immediately followed by a paren as 24146 # functions 24147 $next_tok = $$rtokens[ $i + 1 ]; 24148 if ( $next_tok eq '(' ) { 24149 $type = 'U'; 24150 } 24151 24152 # underscore after file test operator is file handle 24153 if ( $tok eq '_' && $last_nonblank_type eq 'F' ) { 24154 $type = 'Z'; 24155 } 24156 24157 # patch for SWITCH/CASE if 'case' and 'when are 24158 # not treated as keywords: 24159 if ( 24160 ( 24161 $tok eq 'case' 24162 && $brace_type[$brace_depth] eq 'switch' 24163 ) 24164 || ( $tok eq 'when' 24165 && $brace_type[$brace_depth] eq 'given' ) 24166 ) 24167 { 24168 $statement_type = $tok; # next '{' is block 24169 $type = 'k'; # for keyword syntax coloring 24170 } 24171 24172 # patch for SWITCH/CASE if switch and given not keywords 24173 # Switch is not a perl 5 keyword, but we will gamble 24174 # and mark switch followed by paren as a keyword. This 24175 # is only necessary to get html syntax coloring nice, 24176 # and does not commit this as being a switch/case. 24177 if ( $next_nonblank_token eq '(' 24178 && ( $tok eq 'switch' || $tok eq 'given' ) ) 24179 { 24180 $type = 'k'; # for keyword syntax coloring 24181 } 24182 } 24183 } 24184 } 24185 24186 ############################################################### 24187 # section 2: strings of digits 24188 ############################################################### 24189 elsif ( $pre_type eq 'd' ) { 24190 $expecting = operator_expected( $prev_type, $tok, $next_type ); 24191 error_if_expecting_OPERATOR("Number") 24192 if ( $expecting == OPERATOR ); 24193 my $number = scan_number(); 24194 if ( !defined($number) ) { 24195 24196 # shouldn't happen - we should always get a number 24197 warning("non-number beginning with digit--program bug\n"); 24198 report_definite_bug(); 24199 } 24200 } 24201 24202 ############################################################### 24203 # section 3: all other tokens 24204 ############################################################### 24205 24206 else { 24207 last if ( $tok eq '#' ); 24208 my $code = $tokenization_code->{$tok}; 24209 if ($code) { 24210 $expecting = 24211 operator_expected( $prev_type, $tok, $next_type ); 24212 $code->(); 24213 redo if $in_quote; 24214 } 24215 } 24216 } 24217 24218 # ----------------------------- 24219 # end of main tokenization loop 24220 # ----------------------------- 24221 24222 if ( $i_tok >= 0 ) { 24223 $routput_token_type->[$i_tok] = $type; 24224 $routput_block_type->[$i_tok] = $block_type; 24225 $routput_container_type->[$i_tok] = $container_type; 24226 $routput_type_sequence->[$i_tok] = $type_sequence; 24227 $routput_indent_flag->[$i_tok] = $indent_flag; 24228 } 24229 24230 unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) { 24231 $last_last_nonblank_token = $last_nonblank_token; 24232 $last_last_nonblank_type = $last_nonblank_type; 24233 $last_last_nonblank_block_type = $last_nonblank_block_type; 24234 $last_last_nonblank_container_type = $last_nonblank_container_type; 24235 $last_last_nonblank_type_sequence = $last_nonblank_type_sequence; 24236 $last_nonblank_token = $tok; 24237 $last_nonblank_type = $type; 24238 $last_nonblank_block_type = $block_type; 24239 $last_nonblank_container_type = $container_type; 24240 $last_nonblank_type_sequence = $type_sequence; 24241 $last_nonblank_prototype = $prototype; 24242 } 24243 24244 # reset indentation level if necessary at a sub or package 24245 # in an attempt to recover from a nesting error 24246 if ( $level_in_tokenizer < 0 ) { 24247 if ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) { 24248 reset_indentation_level(0); 24249 brace_warning("resetting level to 0 at $1 $2\n"); 24250 } 24251 } 24252 24253 # all done tokenizing this line ... 24254 # now prepare the final list of tokens and types 24255 24256 my @token_type = (); # stack of output token types 24257 my @block_type = (); # stack of output code block types 24258 my @container_type = (); # stack of output code container types 24259 my @type_sequence = (); # stack of output type sequence numbers 24260 my @tokens = (); # output tokens 24261 my @levels = (); # structural brace levels of output tokens 24262 my @slevels = (); # secondary nesting levels of output tokens 24263 my @nesting_tokens = (); # string of tokens leading to this depth 24264 my @nesting_types = (); # string of token types leading to this depth 24265 my @nesting_blocks = (); # string of block types leading to this depth 24266 my @nesting_lists = (); # string of list types leading to this depth 24267 my @ci_string = (); # string needed to compute continuation indentation 24268 my @container_environment = (); # BLOCK or LIST 24269 my $container_environment = ''; 24270 my $im = -1; # previous $i value 24271 my $num; 24272 my $ci_string_sum = ones_count($ci_string_in_tokenizer); 24273 24274# Computing Token Indentation 24275# 24276# The final section of the tokenizer forms tokens and also computes 24277# parameters needed to find indentation. It is much easier to do it 24278# in the tokenizer than elsewhere. Here is a brief description of how 24279# indentation is computed. Perl::Tidy computes indentation as the sum 24280# of 2 terms: 24281# 24282# (1) structural indentation, such as if/else/elsif blocks 24283# (2) continuation indentation, such as long parameter call lists. 24284# 24285# These are occasionally called primary and secondary indentation. 24286# 24287# Structural indentation is introduced by tokens of type '{', although 24288# the actual tokens might be '{', '(', or '['. Structural indentation 24289# is of two types: BLOCK and non-BLOCK. Default structural indentation 24290# is 4 characters if the standard indentation scheme is used. 24291# 24292# Continuation indentation is introduced whenever a line at BLOCK level 24293# is broken before its termination. Default continuation indentation 24294# is 2 characters in the standard indentation scheme. 24295# 24296# Both types of indentation may be nested arbitrarily deep and 24297# interlaced. The distinction between the two is somewhat arbitrary. 24298# 24299# For each token, we will define two variables which would apply if 24300# the current statement were broken just before that token, so that 24301# that token started a new line: 24302# 24303# $level = the structural indentation level, 24304# $ci_level = the continuation indentation level 24305# 24306# The total indentation will be $level * (4 spaces) + $ci_level * (2 spaces), 24307# assuming defaults. However, in some special cases it is customary 24308# to modify $ci_level from this strict value. 24309# 24310# The total structural indentation is easy to compute by adding and 24311# subtracting 1 from a saved value as types '{' and '}' are seen. The 24312# running value of this variable is $level_in_tokenizer. 24313# 24314# The total continuation is much more difficult to compute, and requires 24315# several variables. These veriables are: 24316# 24317# $ci_string_in_tokenizer = a string of 1's and 0's indicating, for 24318# each indentation level, if there are intervening open secondary 24319# structures just prior to that level. 24320# $continuation_string_in_tokenizer = a string of 1's and 0's indicating 24321# if the last token at that level is "continued", meaning that it 24322# is not the first token of an expression. 24323# $nesting_block_string = a string of 1's and 0's indicating, for each 24324# indentation level, if the level is of type BLOCK or not. 24325# $nesting_block_flag = the most recent 1 or 0 of $nesting_block_string 24326# $nesting_list_string = a string of 1's and 0's indicating, for each 24327# indentation level, if it is is appropriate for list formatting. 24328# If so, continuation indentation is used to indent long list items. 24329# $nesting_list_flag = the most recent 1 or 0 of $nesting_list_string 24330# @{$rslevel_stack} = a stack of total nesting depths at each 24331# structural indentation level, where "total nesting depth" means 24332# the nesting depth that would occur if every nesting token -- '{', '[', 24333# and '(' -- , regardless of context, is used to compute a nesting 24334# depth. 24335 24336 #my $nesting_block_flag = ($nesting_block_string =~ /1$/); 24337 #my $nesting_list_flag = ($nesting_list_string =~ /1$/); 24338 24339 my ( $ci_string_i, $level_i, $nesting_block_string_i, 24340 $nesting_list_string_i, $nesting_token_string_i, 24341 $nesting_type_string_i, ); 24342 24343 foreach $i ( @{$routput_token_list} ) 24344 { # scan the list of pre-tokens indexes 24345 24346 # self-checking for valid token types 24347 my $type = $routput_token_type->[$i]; 24348 my $forced_indentation_flag = $routput_indent_flag->[$i]; 24349 24350 # See if we should undo the $forced_indentation_flag. 24351 # Forced indentation after 'if', 'unless', 'while' and 'until' 24352 # expressions without trailing parens is optional and doesn't 24353 # always look good. It is usually okay for a trailing logical 24354 # expression, but if the expression is a function call, code block, 24355 # or some kind of list it puts in an unwanted extra indentation 24356 # level which is hard to remove. 24357 # 24358 # Example where extra indentation looks ok: 24359 # return 1 24360 # if $det_a < 0 and $det_b > 0 24361 # or $det_a > 0 and $det_b < 0; 24362 # 24363 # Example where extra indentation is not needed because 24364 # the eval brace also provides indentation: 24365 # print "not " if defined eval { 24366 # reduce { die if $b > 2; $a + $b } 0, 1, 2, 3, 4; 24367 # }; 24368 # 24369 # The following rule works fairly well: 24370 # Undo the flag if the end of this line, or start of the next 24371 # line, is an opening container token or a comma. 24372 # This almost always works, but if not after another pass it will 24373 # be stable. 24374 if ( $forced_indentation_flag && $type eq 'k' ) { 24375 my $ixlast = -1; 24376 my $ilast = $routput_token_list->[$ixlast]; 24377 my $toklast = $routput_token_type->[$ilast]; 24378 if ( $toklast eq '#' ) { 24379 $ixlast--; 24380 $ilast = $routput_token_list->[$ixlast]; 24381 $toklast = $routput_token_type->[$ilast]; 24382 } 24383 if ( $toklast eq 'b' ) { 24384 $ixlast--; 24385 $ilast = $routput_token_list->[$ixlast]; 24386 $toklast = $routput_token_type->[$ilast]; 24387 } 24388 if ( $toklast =~ /^[\{,]$/ ) { 24389 $forced_indentation_flag = 0; 24390 } 24391 else { 24392 ( $toklast, my $i_next ) = 24393 find_next_nonblank_token( $max_token_index, $rtokens, 24394 $max_token_index ); 24395 if ( $toklast =~ /^[\{,]$/ ) { 24396 $forced_indentation_flag = 0; 24397 } 24398 } 24399 } 24400 24401 # if we are already in an indented if, see if we should outdent 24402 if ($indented_if_level) { 24403 24404 # don't try to nest trailing if's - shouldn't happen 24405 if ( $type eq 'k' ) { 24406 $forced_indentation_flag = 0; 24407 } 24408 24409 # check for the normal case - outdenting at next ';' 24410 elsif ( $type eq ';' ) { 24411 if ( $level_in_tokenizer == $indented_if_level ) { 24412 $forced_indentation_flag = -1; 24413 $indented_if_level = 0; 24414 } 24415 } 24416 24417 # handle case of missing semicolon 24418 elsif ( $type eq '}' ) { 24419 if ( $level_in_tokenizer == $indented_if_level ) { 24420 $indented_if_level = 0; 24421 24422 # TBD: This could be a subroutine call 24423 $level_in_tokenizer--; 24424 if ( @{$rslevel_stack} > 1 ) { 24425 pop( @{$rslevel_stack} ); 24426 } 24427 if ( length($nesting_block_string) > 1 ) 24428 { # true for valid script 24429 chop $nesting_block_string; 24430 chop $nesting_list_string; 24431 } 24432 24433 } 24434 } 24435 } 24436 24437 my $tok = $$rtokens[$i]; # the token, but ONLY if same as pretoken 24438 $level_i = $level_in_tokenizer; 24439 24440 # This can happen by running perltidy on non-scripts 24441 # although it could also be bug introduced by programming change. 24442 # Perl silently accepts a 032 (^Z) and takes it as the end 24443 if ( !$is_valid_token_type{$type} ) { 24444 my $val = ord($type); 24445 warning( 24446 "unexpected character decimal $val ($type) in script\n"); 24447 $tokenizer_self->{_in_error} = 1; 24448 } 24449 24450 # ---------------------------------------------------------------- 24451 # TOKEN TYPE PATCHES 24452 # output __END__, __DATA__, and format as type 'k' instead of ';' 24453 # to make html colors correct, etc. 24454 my $fix_type = $type; 24455 if ( $type eq ';' && $tok =~ /\w/ ) { $fix_type = 'k' } 24456 24457 # output anonymous 'sub' as keyword 24458 if ( $type eq 't' && $tok eq 'sub' ) { $fix_type = 'k' } 24459 24460 # ----------------------------------------------------------------- 24461 24462 $nesting_token_string_i = $nesting_token_string; 24463 $nesting_type_string_i = $nesting_type_string; 24464 $nesting_block_string_i = $nesting_block_string; 24465 $nesting_list_string_i = $nesting_list_string; 24466 24467 # set primary indentation levels based on structural braces 24468 # Note: these are set so that the leading braces have a HIGHER 24469 # level than their CONTENTS, which is convenient for indentation 24470 # Also, define continuation indentation for each token. 24471 if ( $type eq '{' || $type eq 'L' || $forced_indentation_flag > 0 ) 24472 { 24473 24474 # use environment before updating 24475 $container_environment = 24476 $nesting_block_flag ? 'BLOCK' 24477 : $nesting_list_flag ? 'LIST' 24478 : ""; 24479 24480 # if the difference between total nesting levels is not 1, 24481 # there are intervening non-structural nesting types between 24482 # this '{' and the previous unclosed '{' 24483 my $intervening_secondary_structure = 0; 24484 if ( @{$rslevel_stack} ) { 24485 $intervening_secondary_structure = 24486 $slevel_in_tokenizer - $rslevel_stack->[-1]; 24487 } 24488 24489 # Continuation Indentation 24490 # 24491 # Having tried setting continuation indentation both in the formatter and 24492 # in the tokenizer, I can say that setting it in the tokenizer is much, 24493 # much easier. The formatter already has too much to do, and can't 24494 # make decisions on line breaks without knowing what 'ci' will be at 24495 # arbitrary locations. 24496 # 24497 # But a problem with setting the continuation indentation (ci) here 24498 # in the tokenizer is that we do not know where line breaks will actually 24499 # be. As a result, we don't know if we should propagate continuation 24500 # indentation to higher levels of structure. 24501 # 24502 # For nesting of only structural indentation, we never need to do this. 24503 # For example, in a long if statement, like this 24504 # 24505 # if ( !$output_block_type[$i] 24506 # && ($in_statement_continuation) ) 24507 # { <--outdented 24508 # do_something(); 24509 # } 24510 # 24511 # the second line has ci but we do normally give the lines within the BLOCK 24512 # any ci. This would be true if we had blocks nested arbitrarily deeply. 24513 # 24514 # But consider something like this, where we have created a break after 24515 # an opening paren on line 1, and the paren is not (currently) a 24516 # structural indentation token: 24517 # 24518 # my $file = $menubar->Menubutton( 24519 # qw/-text File -underline 0 -menuitems/ => [ 24520 # [ 24521 # Cascade => '~View', 24522 # -menuitems => [ 24523 # ... 24524 # 24525 # The second line has ci, so it would seem reasonable to propagate it 24526 # down, giving the third line 1 ci + 1 indentation. This suggests the 24527 # following rule, which is currently used to propagating ci down: if there 24528 # are any non-structural opening parens (or brackets, or braces), before 24529 # an opening structural brace, then ci is propagated down, and otherwise 24530 # not. The variable $intervening_secondary_structure contains this 24531 # information for the current token, and the string 24532 # "$ci_string_in_tokenizer" is a stack of previous values of this 24533 # variable. 24534 24535 # save the current states 24536 push( @{$rslevel_stack}, 1 + $slevel_in_tokenizer ); 24537 $level_in_tokenizer++; 24538 24539 if ($forced_indentation_flag) { 24540 24541 # break BEFORE '?' when there is forced indentation 24542 if ( $type eq '?' ) { $level_i = $level_in_tokenizer; } 24543 if ( $type eq 'k' ) { 24544 $indented_if_level = $level_in_tokenizer; 24545 } 24546 } 24547 24548 if ( $routput_block_type->[$i] ) { 24549 $nesting_block_flag = 1; 24550 $nesting_block_string .= '1'; 24551 } 24552 else { 24553 $nesting_block_flag = 0; 24554 $nesting_block_string .= '0'; 24555 } 24556 24557 # we will use continuation indentation within containers 24558 # which are not blocks and not logical expressions 24559 my $bit = 0; 24560 if ( !$routput_block_type->[$i] ) { 24561 24562 # propagate flag down at nested open parens 24563 if ( $routput_container_type->[$i] eq '(' ) { 24564 $bit = 1 if $nesting_list_flag; 24565 } 24566 24567 # use list continuation if not a logical grouping 24568 # /^(if|elsif|unless|while|and|or|not|&&|!|\|\||for|foreach)$/ 24569 else { 24570 $bit = 1 24571 unless 24572 $is_logical_container{ $routput_container_type->[$i] 24573 }; 24574 } 24575 } 24576 $nesting_list_string .= $bit; 24577 $nesting_list_flag = $bit; 24578 24579 $ci_string_in_tokenizer .= 24580 ( $intervening_secondary_structure != 0 ) ? '1' : '0'; 24581 $ci_string_sum = ones_count($ci_string_in_tokenizer); 24582 $continuation_string_in_tokenizer .= 24583 ( $in_statement_continuation > 0 ) ? '1' : '0'; 24584 24585 # Sometimes we want to give an opening brace continuation indentation, 24586 # and sometimes not. For code blocks, we don't do it, so that the leading 24587 # '{' gets outdented, like this: 24588 # 24589 # if ( !$output_block_type[$i] 24590 # && ($in_statement_continuation) ) 24591 # { <--outdented 24592 # 24593 # For other types, we will give them continuation indentation. For example, 24594 # here is how a list looks with the opening paren indented: 24595 # 24596 # @LoL = 24597 # ( [ "fred", "barney" ], [ "george", "jane", "elroy" ], 24598 # [ "homer", "marge", "bart" ], ); 24599 # 24600 # This looks best when 'ci' is one-half of the indentation (i.e., 2 and 4) 24601 24602 my $total_ci = $ci_string_sum; 24603 if ( 24604 !$routput_block_type->[$i] # patch: skip for BLOCK 24605 && ($in_statement_continuation) 24606 && !( $forced_indentation_flag && $type eq ':' ) 24607 ) 24608 { 24609 $total_ci += $in_statement_continuation 24610 unless ( $ci_string_in_tokenizer =~ /1$/ ); 24611 } 24612 24613 $ci_string_i = $total_ci; 24614 $in_statement_continuation = 0; 24615 } 24616 24617 elsif ($type eq '}' 24618 || $type eq 'R' 24619 || $forced_indentation_flag < 0 ) 24620 { 24621 24622 # only a nesting error in the script would prevent popping here 24623 if ( @{$rslevel_stack} > 1 ) { pop( @{$rslevel_stack} ); } 24624 24625 $level_i = --$level_in_tokenizer; 24626 24627 # restore previous level values 24628 if ( length($nesting_block_string) > 1 ) 24629 { # true for valid script 24630 chop $nesting_block_string; 24631 $nesting_block_flag = ( $nesting_block_string =~ /1$/ ); 24632 chop $nesting_list_string; 24633 $nesting_list_flag = ( $nesting_list_string =~ /1$/ ); 24634 24635 chop $ci_string_in_tokenizer; 24636 $ci_string_sum = ones_count($ci_string_in_tokenizer); 24637 24638 $in_statement_continuation = 24639 chop $continuation_string_in_tokenizer; 24640 24641 # zero continuation flag at terminal BLOCK '}' which 24642 # ends a statement. 24643 if ( $routput_block_type->[$i] ) { 24644 24645 # ...These include non-anonymous subs 24646 # note: could be sub ::abc { or sub 'abc 24647 if ( $routput_block_type->[$i] =~ m/^sub\s*/gc ) { 24648 24649 # note: older versions of perl require the /gc modifier 24650 # here or else the \G does not work. 24651 if ( $routput_block_type->[$i] =~ /\G('|::|\w)/gc ) 24652 { 24653 $in_statement_continuation = 0; 24654 } 24655 } 24656 24657# ...and include all block types except user subs with 24658# block prototypes and these: (sort|grep|map|do|eval) 24659# /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|;|if|elsif|else|unless|while|until|for|foreach)$/ 24660 elsif ( 24661 $is_zero_continuation_block_type{ 24662 $routput_block_type->[$i] } ) 24663 { 24664 $in_statement_continuation = 0; 24665 } 24666 24667 # ..but these are not terminal types: 24668 # /^(sort|grep|map|do|eval)$/ ) 24669 elsif ( 24670 $is_not_zero_continuation_block_type{ 24671 $routput_block_type->[$i] } ) 24672 { 24673 } 24674 24675 # ..and a block introduced by a label 24676 # /^\w+\s*:$/gc ) { 24677 elsif ( $routput_block_type->[$i] =~ /:$/ ) { 24678 $in_statement_continuation = 0; 24679 } 24680 24681 # user function with block prototype 24682 else { 24683 $in_statement_continuation = 0; 24684 } 24685 } 24686 24687 # If we are in a list, then 24688 # we must set continuatoin indentation at the closing 24689 # paren of something like this (paren after $check): 24690 # assert( 24691 # __LINE__, 24692 # ( not defined $check ) 24693 # or ref $check 24694 # or $check eq "new" 24695 # or $check eq "old", 24696 # ); 24697 elsif ( $tok eq ')' ) { 24698 $in_statement_continuation = 1 24699 if $routput_container_type->[$i] =~ /^[;,\{\}]$/; 24700 } 24701 24702 elsif ( $tok eq ';' ) { $in_statement_continuation = 0 } 24703 } 24704 24705 # use environment after updating 24706 $container_environment = 24707 $nesting_block_flag ? 'BLOCK' 24708 : $nesting_list_flag ? 'LIST' 24709 : ""; 24710 $ci_string_i = $ci_string_sum + $in_statement_continuation; 24711 $nesting_block_string_i = $nesting_block_string; 24712 $nesting_list_string_i = $nesting_list_string; 24713 } 24714 24715 # not a structural indentation type.. 24716 else { 24717 24718 $container_environment = 24719 $nesting_block_flag ? 'BLOCK' 24720 : $nesting_list_flag ? 'LIST' 24721 : ""; 24722 24723 # zero the continuation indentation at certain tokens so 24724 # that they will be at the same level as its container. For 24725 # commas, this simplifies the -lp indentation logic, which 24726 # counts commas. For ?: it makes them stand out. 24727 if ($nesting_list_flag) { 24728 if ( $type =~ /^[,\?\:]$/ ) { 24729 $in_statement_continuation = 0; 24730 } 24731 } 24732 24733 # be sure binary operators get continuation indentation 24734 if ( 24735 $container_environment 24736 && ( $type eq 'k' && $is_binary_keyword{$tok} 24737 || $is_binary_type{$type} ) 24738 ) 24739 { 24740 $in_statement_continuation = 1; 24741 } 24742 24743 # continuation indentation is sum of any open ci from previous 24744 # levels plus the current level 24745 $ci_string_i = $ci_string_sum + $in_statement_continuation; 24746 24747 # update continuation flag ... 24748 # if this isn't a blank or comment.. 24749 if ( $type ne 'b' && $type ne '#' ) { 24750 24751 # and we are in a BLOCK 24752 if ($nesting_block_flag) { 24753 24754 # the next token after a ';' and label starts a new stmt 24755 if ( $type eq ';' || $type eq 'J' ) { 24756 $in_statement_continuation = 0; 24757 } 24758 24759 # otherwise, we are continuing the current statement 24760 else { 24761 $in_statement_continuation = 1; 24762 } 24763 } 24764 24765 # if we are not in a BLOCK.. 24766 else { 24767 24768 # do not use continuation indentation if not list 24769 # environment (could be within if/elsif clause) 24770 if ( !$nesting_list_flag ) { 24771 $in_statement_continuation = 0; 24772 } 24773 24774 # otherwise, the next token after a ',' starts a new term 24775 elsif ( $type eq ',' ) { 24776 $in_statement_continuation = 0; 24777 } 24778 24779 # otherwise, we are continuing the current term 24780 else { 24781 $in_statement_continuation = 1; 24782 } 24783 } 24784 } 24785 } 24786 24787 if ( $level_in_tokenizer < 0 ) { 24788 unless ( $tokenizer_self->{_saw_negative_indentation} ) { 24789 $tokenizer_self->{_saw_negative_indentation} = 1; 24790 warning("Starting negative indentation\n"); 24791 } 24792 } 24793 24794 # set secondary nesting levels based on all continment token types 24795 # Note: these are set so that the nesting depth is the depth 24796 # of the PREVIOUS TOKEN, which is convenient for setting 24797 # the stength of token bonds 24798 my $slevel_i = $slevel_in_tokenizer; 24799 24800 # /^[L\{\(\[]$/ 24801 if ( $is_opening_type{$type} ) { 24802 $slevel_in_tokenizer++; 24803 $nesting_token_string .= $tok; 24804 $nesting_type_string .= $type; 24805 } 24806 24807 # /^[R\}\)\]]$/ 24808 elsif ( $is_closing_type{$type} ) { 24809 $slevel_in_tokenizer--; 24810 my $char = chop $nesting_token_string; 24811 24812 if ( $char ne $matching_start_token{$tok} ) { 24813 $nesting_token_string .= $char . $tok; 24814 $nesting_type_string .= $type; 24815 } 24816 else { 24817 chop $nesting_type_string; 24818 } 24819 } 24820 24821 push( @block_type, $routput_block_type->[$i] ); 24822 push( @ci_string, $ci_string_i ); 24823 push( @container_environment, $container_environment ); 24824 push( @container_type, $routput_container_type->[$i] ); 24825 push( @levels, $level_i ); 24826 push( @nesting_tokens, $nesting_token_string_i ); 24827 push( @nesting_types, $nesting_type_string_i ); 24828 push( @slevels, $slevel_i ); 24829 push( @token_type, $fix_type ); 24830 push( @type_sequence, $routput_type_sequence->[$i] ); 24831 push( @nesting_blocks, $nesting_block_string ); 24832 push( @nesting_lists, $nesting_list_string ); 24833 24834 # now form the previous token 24835 if ( $im >= 0 ) { 24836 $num = 24837 $$rtoken_map[$i] - $$rtoken_map[$im]; # how many characters 24838 24839 if ( $num > 0 ) { 24840 push( @tokens, 24841 substr( $input_line, $$rtoken_map[$im], $num ) ); 24842 } 24843 } 24844 $im = $i; 24845 } 24846 24847 $num = length($input_line) - $$rtoken_map[$im]; # make the last token 24848 if ( $num > 0 ) { 24849 push( @tokens, substr( $input_line, $$rtoken_map[$im], $num ) ); 24850 } 24851 24852 $tokenizer_self->{_in_attribute_list} = $in_attribute_list; 24853 $tokenizer_self->{_in_quote} = $in_quote; 24854 $tokenizer_self->{_quote_target} = 24855 $in_quote ? matching_end_token($quote_character) : ""; 24856 $tokenizer_self->{_rhere_target_list} = $rhere_target_list; 24857 24858 $line_of_tokens->{_rtoken_type} = \@token_type; 24859 $line_of_tokens->{_rtokens} = \@tokens; 24860 $line_of_tokens->{_rblock_type} = \@block_type; 24861 $line_of_tokens->{_rcontainer_type} = \@container_type; 24862 $line_of_tokens->{_rcontainer_environment} = \@container_environment; 24863 $line_of_tokens->{_rtype_sequence} = \@type_sequence; 24864 $line_of_tokens->{_rlevels} = \@levels; 24865 $line_of_tokens->{_rslevels} = \@slevels; 24866 $line_of_tokens->{_rnesting_tokens} = \@nesting_tokens; 24867 $line_of_tokens->{_rci_levels} = \@ci_string; 24868 $line_of_tokens->{_rnesting_blocks} = \@nesting_blocks; 24869 24870 return; 24871 } 24872} # end tokenize_this_line 24873 24874#########i############################################################# 24875# Tokenizer routines which assist in identifying token types 24876####################################################################### 24877 24878sub operator_expected { 24879 24880 # Many perl symbols have two or more meanings. For example, '<<' 24881 # can be a shift operator or a here-doc operator. The 24882 # interpretation of these symbols depends on the current state of 24883 # the tokenizer, which may either be expecting a term or an 24884 # operator. For this example, a << would be a shift if an operator 24885 # is expected, and a here-doc if a term is expected. This routine 24886 # is called to make this decision for any current token. It returns 24887 # one of three possible values: 24888 # 24889 # OPERATOR - operator expected (or at least, not a term) 24890 # UNKNOWN - can't tell 24891 # TERM - a term is expected (or at least, not an operator) 24892 # 24893 # The decision is based on what has been seen so far. This 24894 # information is stored in the "$last_nonblank_type" and 24895 # "$last_nonblank_token" variables. For example, if the 24896 # $last_nonblank_type is '=~', then we are expecting a TERM, whereas 24897 # if $last_nonblank_type is 'n' (numeric), we are expecting an 24898 # OPERATOR. 24899 # 24900 # If a UNKNOWN is returned, the calling routine must guess. A major 24901 # goal of this tokenizer is to minimize the possiblity of returning 24902 # UNKNOWN, because a wrong guess can spoil the formatting of a 24903 # script. 24904 # 24905 # adding NEW_TOKENS: it is critically important that this routine be 24906 # updated to allow it to determine if an operator or term is to be 24907 # expected after the new token. Doing this simply involves adding 24908 # the new token character to one of the regexes in this routine or 24909 # to one of the hash lists 24910 # that it uses, which are initialized in the BEGIN section. 24911 # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token, 24912 # $statement_type 24913 24914 my ( $prev_type, $tok, $next_type ) = @_; 24915 24916 my $op_expected = UNKNOWN; 24917 24918#print "tok=$tok last type=$last_nonblank_type last tok=$last_nonblank_token\n"; 24919 24920# Note: function prototype is available for token type 'U' for future 24921# program development. It contains the leading and trailing parens, 24922# and no blanks. It might be used to eliminate token type 'C', for 24923# example (prototype = '()'). Thus: 24924# if ($last_nonblank_type eq 'U') { 24925# print "previous token=$last_nonblank_token type=$last_nonblank_type prototype=$last_nonblank_prototype\n"; 24926# } 24927 24928 # A possible filehandle (or object) requires some care... 24929 if ( $last_nonblank_type eq 'Z' ) { 24930 24931 # angle.t 24932 if ( $last_nonblank_token =~ /^[A-Za-z_]/ ) { 24933 $op_expected = UNKNOWN; 24934 } 24935 24936 # For possible file handle like "$a", Perl uses weird parsing rules. 24937 # For example: 24938 # print $a/2,"/hi"; - division 24939 # print $a / 2,"/hi"; - division 24940 # print $a/ 2,"/hi"; - division 24941 # print $a /2,"/hi"; - pattern (and error)! 24942 elsif ( ( $prev_type eq 'b' ) && ( $next_type ne 'b' ) ) { 24943 $op_expected = TERM; 24944 } 24945 24946 # Note when an operation is being done where a 24947 # filehandle might be expected, since a change in whitespace 24948 # could change the interpretation of the statement. 24949 else { 24950 if ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) { 24951 complain("operator in print statement not recommended\n"); 24952 $op_expected = OPERATOR; 24953 } 24954 } 24955 } 24956 24957 # handle something after 'do' and 'eval' 24958 elsif ( $is_block_operator{$last_nonblank_token} ) { 24959 24960 # something like $a = eval "expression"; 24961 # ^ 24962 if ( $last_nonblank_type eq 'k' ) { 24963 $op_expected = TERM; # expression or list mode following keyword 24964 } 24965 24966 # something like $a = do { BLOCK } / 2; 24967 # ^ 24968 else { 24969 $op_expected = OPERATOR; # block mode following } 24970 } 24971 } 24972 24973 # handle bare word.. 24974 elsif ( $last_nonblank_type eq 'w' ) { 24975 24976 # unfortunately, we can't tell what type of token to expect next 24977 # after most bare words 24978 $op_expected = UNKNOWN; 24979 } 24980 24981 # operator, but not term possible after these types 24982 # Note: moved ')' from type to token because parens in list context 24983 # get marked as '{' '}' now. This is a minor glitch in the following: 24984 # my %opts = (ref $_[0] eq 'HASH') ? %{shift()} : (); 24985 # 24986 elsif (( $last_nonblank_type =~ /^[\]RnviQh]$/ ) 24987 || ( $last_nonblank_token =~ /^(\)|\$|\-\>)/ ) ) 24988 { 24989 $op_expected = OPERATOR; 24990 24991 # in a 'use' statement, numbers and v-strings are not true 24992 # numbers, so to avoid incorrect error messages, we will 24993 # mark them as unknown for now (use.t) 24994 # TODO: it would be much nicer to create a new token V for VERSION 24995 # number in a use statement. Then this could be a check on type V 24996 # and related patches which change $statement_type for '=>' 24997 # and ',' could be removed. Further, it would clean things up to 24998 # scan the 'use' statement with a separate subroutine. 24999 if ( ( $statement_type eq 'use' ) 25000 && ( $last_nonblank_type =~ /^[nv]$/ ) ) 25001 { 25002 $op_expected = UNKNOWN; 25003 } 25004 } 25005 25006 # no operator after many keywords, such as "die", "warn", etc 25007 elsif ( $expecting_term_token{$last_nonblank_token} ) { 25008 25009 # patch for dor.t (defined or). 25010 # perl functions which may be unary operators 25011 # TODO: This list is incomplete, and these should be put 25012 # into a hash. 25013 if ( $tok eq '/' 25014 && $next_type eq '/' 25015 && $last_nonblank_type eq 'k' 25016 && $last_nonblank_token =~ /^eof|undef|shift|pop$/ ) 25017 { 25018 $op_expected = OPERATOR; 25019 } 25020 else { 25021 $op_expected = TERM; 25022 } 25023 } 25024 25025 # no operator after things like + - ** (i.e., other operators) 25026 elsif ( $expecting_term_types{$last_nonblank_type} ) { 25027 $op_expected = TERM; 25028 } 25029 25030 # a few operators, like "time", have an empty prototype () and so 25031 # take no parameters but produce a value to operate on 25032 elsif ( $expecting_operator_token{$last_nonblank_token} ) { 25033 $op_expected = OPERATOR; 25034 } 25035 25036 # post-increment and decrement produce values to be operated on 25037 elsif ( $expecting_operator_types{$last_nonblank_type} ) { 25038 $op_expected = OPERATOR; 25039 } 25040 25041 # no value to operate on after sub block 25042 elsif ( $last_nonblank_token =~ /^sub\s/ ) { $op_expected = TERM; } 25043 25044 # a right brace here indicates the end of a simple block. 25045 # all non-structural right braces have type 'R' 25046 # all braces associated with block operator keywords have been given those 25047 # keywords as "last_nonblank_token" and caught above. 25048 # (This statement is order dependent, and must come after checking 25049 # $last_nonblank_token). 25050 elsif ( $last_nonblank_type eq '}' ) { 25051 25052 # patch for dor.t (defined or). 25053 if ( $tok eq '/' 25054 && $next_type eq '/' 25055 && $last_nonblank_token eq ']' ) 25056 { 25057 $op_expected = OPERATOR; 25058 } 25059 else { 25060 $op_expected = TERM; 25061 } 25062 } 25063 25064 # something else..what did I forget? 25065 else { 25066 25067 # collecting diagnostics on unknown operator types..see what was missed 25068 $op_expected = UNKNOWN; 25069 write_diagnostics( 25070"OP: unknown after type=$last_nonblank_type token=$last_nonblank_token\n" 25071 ); 25072 } 25073 25074 TOKENIZER_DEBUG_FLAG_EXPECT && do { 25075 print 25076"EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n"; 25077 }; 25078 return $op_expected; 25079} 25080 25081sub new_statement_ok { 25082 25083 # return true if the current token can start a new statement 25084 # USES GLOBAL VARIABLES: $last_nonblank_type 25085 25086 return label_ok() # a label would be ok here 25087 25088 || $last_nonblank_type eq 'J'; # or we follow a label 25089 25090} 25091 25092sub label_ok { 25093 25094 # Decide if a bare word followed by a colon here is a label 25095 # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type, 25096 # $brace_depth, @brace_type 25097 25098 # if it follows an opening or closing code block curly brace.. 25099 if ( ( $last_nonblank_token eq '{' || $last_nonblank_token eq '}' ) 25100 && $last_nonblank_type eq $last_nonblank_token ) 25101 { 25102 25103 # it is a label if and only if the curly encloses a code block 25104 return $brace_type[$brace_depth]; 25105 } 25106 25107 # otherwise, it is a label if and only if it follows a ';' 25108 # (real or fake) 25109 else { 25110 return ( $last_nonblank_type eq ';' ); 25111 } 25112} 25113 25114sub code_block_type { 25115 25116 # Decide if this is a block of code, and its type. 25117 # Must be called only when $type = $token = '{' 25118 # The problem is to distinguish between the start of a block of code 25119 # and the start of an anonymous hash reference 25120 # Returns "" if not code block, otherwise returns 'last_nonblank_token' 25121 # to indicate the type of code block. (For example, 'last_nonblank_token' 25122 # might be 'if' for an if block, 'else' for an else block, etc). 25123 # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type, 25124 # $last_nonblank_block_type, $brace_depth, @brace_type 25125 25126 # handle case of multiple '{'s 25127 25128# print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n"; 25129 25130 my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_; 25131 if ( $last_nonblank_token eq '{' 25132 && $last_nonblank_type eq $last_nonblank_token ) 25133 { 25134 25135 # opening brace where a statement may appear is probably 25136 # a code block but might be and anonymous hash reference 25137 if ( $brace_type[$brace_depth] ) { 25138 return decide_if_code_block( $i, $rtokens, $rtoken_type, 25139 $max_token_index ); 25140 } 25141 25142 # cannot start a code block within an anonymous hash 25143 else { 25144 return ""; 25145 } 25146 } 25147 25148 elsif ( $last_nonblank_token eq ';' ) { 25149 25150 # an opening brace where a statement may appear is probably 25151 # a code block but might be and anonymous hash reference 25152 return decide_if_code_block( $i, $rtokens, $rtoken_type, 25153 $max_token_index ); 25154 } 25155 25156 # handle case of '}{' 25157 elsif ($last_nonblank_token eq '}' 25158 && $last_nonblank_type eq $last_nonblank_token ) 25159 { 25160 25161 # a } { situation ... 25162 # could be hash reference after code block..(blktype1.t) 25163 if ($last_nonblank_block_type) { 25164 return decide_if_code_block( $i, $rtokens, $rtoken_type, 25165 $max_token_index ); 25166 } 25167 25168 # must be a block if it follows a closing hash reference 25169 else { 25170 return $last_nonblank_token; 25171 } 25172 } 25173 25174 # NOTE: braces after type characters start code blocks, but for 25175 # simplicity these are not identified as such. See also 25176 # sub is_non_structural_brace. 25177 # elsif ( $last_nonblank_type eq 't' ) { 25178 # return $last_nonblank_token; 25179 # } 25180 25181 # brace after label: 25182 elsif ( $last_nonblank_type eq 'J' ) { 25183 return $last_nonblank_token; 25184 } 25185 25186# otherwise, look at previous token. This must be a code block if 25187# it follows any of these: 25188# /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/ 25189 elsif ( $is_code_block_token{$last_nonblank_token} ) { 25190 25191 # Bug Patch: Note that the opening brace after the 'if' in the following 25192 # snippet is an anonymous hash ref and not a code block! 25193 # print 'hi' if { x => 1, }->{x}; 25194 # We can identify this situation because the last nonblank type 25195 # will be a keyword (instead of a closing peren) 25196 if ( $last_nonblank_token =~ /^(if|unless)$/ 25197 && $last_nonblank_type eq 'k' ) 25198 { 25199 return ""; 25200 } 25201 else { 25202 return $last_nonblank_token; 25203 } 25204 } 25205 25206 # or a sub definition 25207 elsif ( ( $last_nonblank_type eq 'i' || $last_nonblank_type eq 't' ) 25208 && $last_nonblank_token =~ /^sub\b/ ) 25209 { 25210 return $last_nonblank_token; 25211 } 25212 25213 # user-defined subs with block parameters (like grep/map/eval) 25214 elsif ( $last_nonblank_type eq 'G' ) { 25215 return $last_nonblank_token; 25216 } 25217 25218 # check bareword 25219 elsif ( $last_nonblank_type eq 'w' ) { 25220 return decide_if_code_block( $i, $rtokens, $rtoken_type, 25221 $max_token_index ); 25222 } 25223 25224 # anything else must be anonymous hash reference 25225 else { 25226 return ""; 25227 } 25228} 25229 25230sub decide_if_code_block { 25231 25232 # USES GLOBAL VARIABLES: $last_nonblank_token 25233 my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_; 25234 my ( $next_nonblank_token, $i_next ) = 25235 find_next_nonblank_token( $i, $rtokens, $max_token_index ); 25236 25237 # we are at a '{' where a statement may appear. 25238 # We must decide if this brace starts an anonymous hash or a code 25239 # block. 25240 # return "" if anonymous hash, and $last_nonblank_token otherwise 25241 25242 # initialize to be code BLOCK 25243 my $code_block_type = $last_nonblank_token; 25244 25245 # Check for the common case of an empty anonymous hash reference: 25246 # Maybe something like sub { { } } 25247 if ( $next_nonblank_token eq '}' ) { 25248 $code_block_type = ""; 25249 } 25250 25251 else { 25252 25253 # To guess if this '{' is an anonymous hash reference, look ahead 25254 # and test as follows: 25255 # 25256 # it is a hash reference if next come: 25257 # - a string or digit followed by a comma or => 25258 # - bareword followed by => 25259 # otherwise it is a code block 25260 # 25261 # Examples of anonymous hash ref: 25262 # {'aa',}; 25263 # {1,2} 25264 # 25265 # Examples of code blocks: 25266 # {1; print "hello\n", 1;} 25267 # {$a,1}; 25268 25269 # We are only going to look ahead one more (nonblank/comment) line. 25270 # Strange formatting could cause a bad guess, but that's unlikely. 25271 my @pre_types = @$rtoken_type[ $i + 1 .. $max_token_index ]; 25272 my @pre_tokens = @$rtokens[ $i + 1 .. $max_token_index ]; 25273 my ( $rpre_tokens, $rpre_types ) = 25274 peek_ahead_for_n_nonblank_pre_tokens(20); # 20 is arbitrary but 25275 # generous, and prevents 25276 # wasting lots of 25277 # time in mangled files 25278 if ( defined($rpre_types) && @$rpre_types ) { 25279 push @pre_types, @$rpre_types; 25280 push @pre_tokens, @$rpre_tokens; 25281 } 25282 25283 # put a sentinal token to simplify stopping the search 25284 push @pre_types, '}'; 25285 25286 my $jbeg = 0; 25287 $jbeg = 1 if $pre_types[0] eq 'b'; 25288 25289 # first look for one of these 25290 # - bareword 25291 # - bareword with leading - 25292 # - digit 25293 # - quoted string 25294 my $j = $jbeg; 25295 if ( $pre_types[$j] =~ /^[\'\"]/ ) { 25296 25297 # find the closing quote; don't worry about escapes 25298 my $quote_mark = $pre_types[$j]; 25299 for ( my $k = $j + 1 ; $k < $#pre_types ; $k++ ) { 25300 if ( $pre_types[$k] eq $quote_mark ) { 25301 $j = $k + 1; 25302 my $next = $pre_types[$j]; 25303 last; 25304 } 25305 } 25306 } 25307 elsif ( $pre_types[$j] eq 'd' ) { 25308 $j++; 25309 } 25310 elsif ( $pre_types[$j] eq 'w' ) { 25311 unless ( $is_keyword{ $pre_tokens[$j] } ) { 25312 $j++; 25313 } 25314 } 25315 elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) { 25316 $j++; 25317 } 25318 if ( $j > $jbeg ) { 25319 25320 $j++ if $pre_types[$j] eq 'b'; 25321 25322 # it's a hash ref if a comma or => follow next 25323 if ( $pre_types[$j] eq ',' 25324 || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' ) ) 25325 { 25326 $code_block_type = ""; 25327 } 25328 } 25329 } 25330 25331 return $code_block_type; 25332} 25333 25334sub unexpected { 25335 25336 # report unexpected token type and show where it is 25337 # USES GLOBAL VARIABLES: $tokenizer_self 25338 my ( $found, $expecting, $i_tok, $last_nonblank_i, $rpretoken_map, 25339 $rpretoken_type, $input_line ) 25340 = @_; 25341 25342 if ( ++$tokenizer_self->{_unexpected_error_count} <= MAX_NAG_MESSAGES ) { 25343 my $msg = "found $found where $expecting expected"; 25344 my $pos = $$rpretoken_map[$i_tok]; 25345 interrupt_logfile(); 25346 my $input_line_number = $tokenizer_self->{_last_line_number}; 25347 my ( $offset, $numbered_line, $underline ) = 25348 make_numbered_line( $input_line_number, $input_line, $pos ); 25349 $underline = write_on_underline( $underline, $pos - $offset, '^' ); 25350 25351 my $trailer = ""; 25352 if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) { 25353 my $pos_prev = $$rpretoken_map[$last_nonblank_i]; 25354 my $num; 25355 if ( $$rpretoken_type[ $i_tok - 1 ] eq 'b' ) { 25356 $num = $$rpretoken_map[ $i_tok - 1 ] - $pos_prev; 25357 } 25358 else { 25359 $num = $pos - $pos_prev; 25360 } 25361 if ( $num > 40 ) { $num = 40; $pos_prev = $pos - 40; } 25362 25363 $underline = 25364 write_on_underline( $underline, $pos_prev - $offset, '-' x $num ); 25365 $trailer = " (previous token underlined)"; 25366 } 25367 warning( $numbered_line . "\n" ); 25368 warning( $underline . "\n" ); 25369 warning( $msg . $trailer . "\n" ); 25370 resume_logfile(); 25371 } 25372} 25373 25374sub is_non_structural_brace { 25375 25376 # Decide if a brace or bracket is structural or non-structural 25377 # by looking at the previous token and type 25378 # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token 25379 25380 # EXPERIMENTAL: Mark slices as structural; idea was to improve formatting. 25381 # Tentatively deactivated because it caused the wrong operator expectation 25382 # for this code: 25383 # $user = @vars[1] / 100; 25384 # Must update sub operator_expected before re-implementing. 25385 # if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) { 25386 # return 0; 25387 # } 25388 25389 # NOTE: braces after type characters start code blocks, but for 25390 # simplicity these are not identified as such. See also 25391 # sub code_block_type 25392 # if ($last_nonblank_type eq 't') {return 0} 25393 25394 # otherwise, it is non-structural if it is decorated 25395 # by type information. 25396 # For example, the '{' here is non-structural: ${xxx} 25397 ( 25398 $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/ 25399 25400 # or if we follow a hash or array closing curly brace or bracket 25401 # For example, the second '{' in this is non-structural: $a{'x'}{'y'} 25402 # because the first '}' would have been given type 'R' 25403 || $last_nonblank_type =~ /^([R\]])$/ 25404 ); 25405} 25406 25407#########i############################################################# 25408# Tokenizer routines for tracking container nesting depths 25409####################################################################### 25410 25411# The following routines keep track of nesting depths of the nesting 25412# types, ( [ { and ?. This is necessary for determining the indentation 25413# level, and also for debugging programs. Not only do they keep track of 25414# nesting depths of the individual brace types, but they check that each 25415# of the other brace types is balanced within matching pairs. For 25416# example, if the program sees this sequence: 25417# 25418# { ( ( ) } 25419# 25420# then it can determine that there is an extra left paren somewhere 25421# between the { and the }. And so on with every other possible 25422# combination of outer and inner brace types. For another 25423# example: 25424# 25425# ( [ ..... ] ] ) 25426# 25427# which has an extra ] within the parens. 25428# 25429# The brace types have indexes 0 .. 3 which are indexes into 25430# the matrices. 25431# 25432# The pair ? : are treated as just another nesting type, with ? acting 25433# as the opening brace and : acting as the closing brace. 25434# 25435# The matrix 25436# 25437# $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b]; 25438# 25439# saves the nesting depth of brace type $b (where $b is either of the other 25440# nesting types) when brace type $a enters a new depth. When this depth 25441# decreases, a check is made that the current depth of brace types $b is 25442# unchanged, or otherwise there must have been an error. This can 25443# be very useful for localizing errors, particularly when perl runs to 25444# the end of a large file (such as this one) and announces that there 25445# is a problem somewhere. 25446# 25447# A numerical sequence number is maintained for every nesting type, 25448# so that each matching pair can be uniquely identified in a simple 25449# way. 25450 25451sub increase_nesting_depth { 25452 my ( $aa, $pos ) = @_; 25453 25454 # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth, 25455 # @current_sequence_number, @depth_array, @starting_line_of_current_depth 25456 my $bb; 25457 $current_depth[$aa]++; 25458 $total_depth++; 25459 $total_depth[$aa][ $current_depth[$aa] ] = $total_depth; 25460 my $input_line_number = $tokenizer_self->{_last_line_number}; 25461 my $input_line = $tokenizer_self->{_line_text}; 25462 25463 # Sequence numbers increment by number of items. This keeps 25464 # a unique set of numbers but still allows the relative location 25465 # of any type to be determined. 25466 $nesting_sequence_number[$aa] += scalar(@closing_brace_names); 25467 my $seqno = $nesting_sequence_number[$aa]; 25468 $current_sequence_number[$aa][ $current_depth[$aa] ] = $seqno; 25469 25470 $starting_line_of_current_depth[$aa][ $current_depth[$aa] ] = 25471 [ $input_line_number, $input_line, $pos ]; 25472 25473 for $bb ( 0 .. $#closing_brace_names ) { 25474 next if ( $bb == $aa ); 25475 $depth_array[$aa][$bb][ $current_depth[$aa] ] = $current_depth[$bb]; 25476 } 25477 25478 # set a flag for indenting a nested ternary statement 25479 my $indent = 0; 25480 if ( $aa == QUESTION_COLON ) { 25481 $nested_ternary_flag[ $current_depth[$aa] ] = 0; 25482 if ( $current_depth[$aa] > 1 ) { 25483 if ( $nested_ternary_flag[ $current_depth[$aa] - 1 ] == 0 ) { 25484 my $pdepth = $total_depth[$aa][ $current_depth[$aa] - 1 ]; 25485 if ( $pdepth == $total_depth - 1 ) { 25486 $indent = 1; 25487 $nested_ternary_flag[ $current_depth[$aa] - 1 ] = -1; 25488 } 25489 } 25490 } 25491 } 25492 return ( $seqno, $indent ); 25493} 25494 25495sub decrease_nesting_depth { 25496 25497 my ( $aa, $pos ) = @_; 25498 25499 # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth, 25500 # @current_sequence_number, @depth_array, @starting_line_of_current_depth 25501 my $bb; 25502 my $seqno = 0; 25503 my $input_line_number = $tokenizer_self->{_last_line_number}; 25504 my $input_line = $tokenizer_self->{_line_text}; 25505 25506 my $outdent = 0; 25507 $total_depth--; 25508 if ( $current_depth[$aa] > 0 ) { 25509 25510 # set a flag for un-indenting after seeing a nested ternary statement 25511 $seqno = $current_sequence_number[$aa][ $current_depth[$aa] ]; 25512 if ( $aa == QUESTION_COLON ) { 25513 $outdent = $nested_ternary_flag[ $current_depth[$aa] ]; 25514 } 25515 25516 # check that any brace types $bb contained within are balanced 25517 for $bb ( 0 .. $#closing_brace_names ) { 25518 next if ( $bb == $aa ); 25519 25520 unless ( $depth_array[$aa][$bb][ $current_depth[$aa] ] == 25521 $current_depth[$bb] ) 25522 { 25523 my $diff = 25524 $current_depth[$bb] - 25525 $depth_array[$aa][$bb][ $current_depth[$aa] ]; 25526 25527 # don't whine too many times 25528 my $saw_brace_error = get_saw_brace_error(); 25529 if ( 25530 $saw_brace_error <= MAX_NAG_MESSAGES 25531 25532 # if too many closing types have occured, we probably 25533 # already caught this error 25534 && ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) ) 25535 ) 25536 { 25537 interrupt_logfile(); 25538 my $rsl = 25539 $starting_line_of_current_depth[$aa] 25540 [ $current_depth[$aa] ]; 25541 my $sl = $$rsl[0]; 25542 my $rel = [ $input_line_number, $input_line, $pos ]; 25543 my $el = $$rel[0]; 25544 my ($ess); 25545 25546 if ( $diff == 1 || $diff == -1 ) { 25547 $ess = ''; 25548 } 25549 else { 25550 $ess = 's'; 25551 } 25552 my $bname = 25553 ( $diff > 0 ) 25554 ? $opening_brace_names[$bb] 25555 : $closing_brace_names[$bb]; 25556 write_error_indicator_pair( @$rsl, '^' ); 25557 my $msg = <<"EOM"; 25558Found $diff extra $bname$ess between $opening_brace_names[$aa] on line $sl and $closing_brace_names[$aa] on line $el 25559EOM 25560 25561 if ( $diff > 0 ) { 25562 my $rml = 25563 $starting_line_of_current_depth[$bb] 25564 [ $current_depth[$bb] ]; 25565 my $ml = $$rml[0]; 25566 $msg .= 25567" The most recent un-matched $bname is on line $ml\n"; 25568 write_error_indicator_pair( @$rml, '^' ); 25569 } 25570 write_error_indicator_pair( @$rel, '^' ); 25571 warning($msg); 25572 resume_logfile(); 25573 } 25574 increment_brace_error(); 25575 } 25576 } 25577 $current_depth[$aa]--; 25578 } 25579 else { 25580 25581 my $saw_brace_error = get_saw_brace_error(); 25582 if ( $saw_brace_error <= MAX_NAG_MESSAGES ) { 25583 my $msg = <<"EOM"; 25584There is no previous $opening_brace_names[$aa] to match a $closing_brace_names[$aa] on line $input_line_number 25585EOM 25586 indicate_error( $msg, $input_line_number, $input_line, $pos, '^' ); 25587 } 25588 increment_brace_error(); 25589 } 25590 return ( $seqno, $outdent ); 25591} 25592 25593sub check_final_nesting_depths { 25594 my ($aa); 25595 25596 # USES GLOBAL VARIABLES: @current_depth, @starting_line_of_current_depth 25597 25598 for $aa ( 0 .. $#closing_brace_names ) { 25599 25600 if ( $current_depth[$aa] ) { 25601 my $rsl = 25602 $starting_line_of_current_depth[$aa][ $current_depth[$aa] ]; 25603 my $sl = $$rsl[0]; 25604 my $msg = <<"EOM"; 25605Final nesting depth of $opening_brace_names[$aa]s is $current_depth[$aa] 25606The most recent un-matched $opening_brace_names[$aa] is on line $sl 25607EOM 25608 indicate_error( $msg, @$rsl, '^' ); 25609 increment_brace_error(); 25610 } 25611 } 25612} 25613 25614#########i############################################################# 25615# Tokenizer routines for looking ahead in input stream 25616####################################################################### 25617 25618sub peek_ahead_for_n_nonblank_pre_tokens { 25619 25620 # returns next n pretokens if they exist 25621 # returns undef's if hits eof without seeing any pretokens 25622 # USES GLOBAL VARIABLES: $tokenizer_self 25623 my $max_pretokens = shift; 25624 my $line; 25625 my $i = 0; 25626 my ( $rpre_tokens, $rmap, $rpre_types ); 25627 25628 while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) ) 25629 { 25630 $line =~ s/^\s*//; # trim leading blanks 25631 next if ( length($line) <= 0 ); # skip blank 25632 next if ( $line =~ /^#/ ); # skip comment 25633 ( $rpre_tokens, $rmap, $rpre_types ) = 25634 pre_tokenize( $line, $max_pretokens ); 25635 last; 25636 } 25637 return ( $rpre_tokens, $rpre_types ); 25638} 25639 25640# look ahead for next non-blank, non-comment line of code 25641sub peek_ahead_for_nonblank_token { 25642 25643 # USES GLOBAL VARIABLES: $tokenizer_self 25644 my ( $rtokens, $max_token_index ) = @_; 25645 my $line; 25646 my $i = 0; 25647 25648 while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) ) 25649 { 25650 $line =~ s/^\s*//; # trim leading blanks 25651 next if ( length($line) <= 0 ); # skip blank 25652 next if ( $line =~ /^#/ ); # skip comment 25653 my ( $rtok, $rmap, $rtype ) = 25654 pre_tokenize( $line, 2 ); # only need 2 pre-tokens 25655 my $j = $max_token_index + 1; 25656 my $tok; 25657 25658 foreach $tok (@$rtok) { 25659 last if ( $tok =~ "\n" ); 25660 $$rtokens[ ++$j ] = $tok; 25661 } 25662 last; 25663 } 25664 return $rtokens; 25665} 25666 25667#########i############################################################# 25668# Tokenizer guessing routines for ambiguous situations 25669####################################################################### 25670 25671sub guess_if_pattern_or_conditional { 25672 25673 # this routine is called when we have encountered a ? following an 25674 # unknown bareword, and we must decide if it starts a pattern or not 25675 # input parameters: 25676 # $i - token index of the ? starting possible pattern 25677 # output parameters: 25678 # $is_pattern = 0 if probably not pattern, =1 if probably a pattern 25679 # msg = a warning or diagnostic message 25680 # USES GLOBAL VARIABLES: $last_nonblank_token 25681 my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_; 25682 my $is_pattern = 0; 25683 my $msg = "guessing that ? after $last_nonblank_token starts a "; 25684 25685 if ( $i >= $max_token_index ) { 25686 $msg .= "conditional (no end to pattern found on the line)\n"; 25687 } 25688 else { 25689 my $ibeg = $i; 25690 $i = $ibeg + 1; 25691 my $next_token = $$rtokens[$i]; # first token after ? 25692 25693 # look for a possible ending ? on this line.. 25694 my $in_quote = 1; 25695 my $quote_depth = 0; 25696 my $quote_character = ''; 25697 my $quote_pos = 0; 25698 my $quoted_string; 25699 ( 25700 $i, $in_quote, $quote_character, $quote_pos, $quote_depth, 25701 $quoted_string 25702 ) 25703 = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character, 25704 $quote_pos, $quote_depth, $max_token_index ); 25705 25706 if ($in_quote) { 25707 25708 # we didn't find an ending ? on this line, 25709 # so we bias towards conditional 25710 $is_pattern = 0; 25711 $msg .= "conditional (no ending ? on this line)\n"; 25712 25713 # we found an ending ?, so we bias towards a pattern 25714 } 25715 else { 25716 25717 if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) { 25718 $is_pattern = 1; 25719 $msg .= "pattern (found ending ? and pattern expected)\n"; 25720 } 25721 else { 25722 $msg .= "pattern (uncertain, but found ending ?)\n"; 25723 } 25724 } 25725 } 25726 return ( $is_pattern, $msg ); 25727} 25728 25729sub guess_if_pattern_or_division { 25730 25731 # this routine is called when we have encountered a / following an 25732 # unknown bareword, and we must decide if it starts a pattern or is a 25733 # division 25734 # input parameters: 25735 # $i - token index of the / starting possible pattern 25736 # output parameters: 25737 # $is_pattern = 0 if probably division, =1 if probably a pattern 25738 # msg = a warning or diagnostic message 25739 # USES GLOBAL VARIABLES: $last_nonblank_token 25740 my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_; 25741 my $is_pattern = 0; 25742 my $msg = "guessing that / after $last_nonblank_token starts a "; 25743 25744 if ( $i >= $max_token_index ) { 25745 "division (no end to pattern found on the line)\n"; 25746 } 25747 else { 25748 my $ibeg = $i; 25749 my $divide_expected = 25750 numerator_expected( $i, $rtokens, $max_token_index ); 25751 $i = $ibeg + 1; 25752 my $next_token = $$rtokens[$i]; # first token after slash 25753 25754 # look for a possible ending / on this line.. 25755 my $in_quote = 1; 25756 my $quote_depth = 0; 25757 my $quote_character = ''; 25758 my $quote_pos = 0; 25759 my $quoted_string; 25760 ( 25761 $i, $in_quote, $quote_character, $quote_pos, $quote_depth, 25762 $quoted_string 25763 ) 25764 = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character, 25765 $quote_pos, $quote_depth, $max_token_index ); 25766 25767 if ($in_quote) { 25768 25769 # we didn't find an ending / on this line, 25770 # so we bias towards division 25771 if ( $divide_expected >= 0 ) { 25772 $is_pattern = 0; 25773 $msg .= "division (no ending / on this line)\n"; 25774 } 25775 else { 25776 $msg = "multi-line pattern (division not possible)\n"; 25777 $is_pattern = 1; 25778 } 25779 25780 } 25781 25782 # we found an ending /, so we bias towards a pattern 25783 else { 25784 25785 if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) { 25786 25787 if ( $divide_expected >= 0 ) { 25788 25789 if ( $i - $ibeg > 60 ) { 25790 $msg .= "division (matching / too distant)\n"; 25791 $is_pattern = 0; 25792 } 25793 else { 25794 $msg .= "pattern (but division possible too)\n"; 25795 $is_pattern = 1; 25796 } 25797 } 25798 else { 25799 $is_pattern = 1; 25800 $msg .= "pattern (division not possible)\n"; 25801 } 25802 } 25803 else { 25804 25805 if ( $divide_expected >= 0 ) { 25806 $is_pattern = 0; 25807 $msg .= "division (pattern not possible)\n"; 25808 } 25809 else { 25810 $is_pattern = 1; 25811 $msg .= 25812 "pattern (uncertain, but division would not work here)\n"; 25813 } 25814 } 25815 } 25816 } 25817 return ( $is_pattern, $msg ); 25818} 25819 25820# try to resolve here-doc vs. shift by looking ahead for 25821# non-code or the end token (currently only looks for end token) 25822# returns 1 if it is probably a here doc, 0 if not 25823sub guess_if_here_doc { 25824 25825 # This is how many lines we will search for a target as part of the 25826 # guessing strategy. It is a constant because there is probably 25827 # little reason to change it. 25828 # USES GLOBAL VARIABLES: $tokenizer_self, $current_package 25829 # %is_constant, 25830 use constant HERE_DOC_WINDOW => 40; 25831 25832 my $next_token = shift; 25833 my $here_doc_expected = 0; 25834 my $line; 25835 my $k = 0; 25836 my $msg = "checking <<"; 25837 25838 while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $k++ ) ) 25839 { 25840 chomp $line; 25841 25842 if ( $line =~ /^$next_token$/ ) { 25843 $msg .= " -- found target $next_token ahead $k lines\n"; 25844 $here_doc_expected = 1; # got it 25845 last; 25846 } 25847 last if ( $k >= HERE_DOC_WINDOW ); 25848 } 25849 25850 unless ($here_doc_expected) { 25851 25852 if ( !defined($line) ) { 25853 $here_doc_expected = -1; # hit eof without seeing target 25854 $msg .= " -- must be shift; target $next_token not in file\n"; 25855 25856 } 25857 else { # still unsure..taking a wild guess 25858 25859 if ( !$is_constant{$current_package}{$next_token} ) { 25860 $here_doc_expected = 1; 25861 $msg .= 25862 " -- guessing it's a here-doc ($next_token not a constant)\n"; 25863 } 25864 else { 25865 $msg .= 25866 " -- guessing it's a shift ($next_token is a constant)\n"; 25867 } 25868 } 25869 } 25870 write_logfile_entry($msg); 25871 return $here_doc_expected; 25872} 25873 25874#########i############################################################# 25875# Tokenizer Routines for scanning identifiers and related items 25876####################################################################### 25877 25878sub scan_bare_identifier_do { 25879 25880 # this routine is called to scan a token starting with an alphanumeric 25881 # variable or package separator, :: or '. 25882 # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, 25883 # $last_nonblank_type,@paren_type, $paren_depth 25884 25885 my ( $input_line, $i, $tok, $type, $prototype, $rtoken_map, 25886 $max_token_index ) 25887 = @_; 25888 my $i_begin = $i; 25889 my $package = undef; 25890 25891 my $i_beg = $i; 25892 25893 # we have to back up one pretoken at a :: since each : is one pretoken 25894 if ( $tok eq '::' ) { $i_beg-- } 25895 if ( $tok eq '->' ) { $i_beg-- } 25896 my $pos_beg = $$rtoken_map[$i_beg]; 25897 pos($input_line) = $pos_beg; 25898 25899 # Examples: 25900 # A::B::C 25901 # A:: 25902 # ::A 25903 # A'B 25904 if ( $input_line =~ m/\G\s*((?:\w*(?:'|::)))*(?:(?:->)?(\w+))?/gc ) { 25905 25906 my $pos = pos($input_line); 25907 my $numc = $pos - $pos_beg; 25908 $tok = substr( $input_line, $pos_beg, $numc ); 25909 25910 # type 'w' includes anything without leading type info 25911 # ($,%,@,*) including something like abc::def::ghi 25912 $type = 'w'; 25913 25914 my $sub_name = ""; 25915 if ( defined($2) ) { $sub_name = $2; } 25916 if ( defined($1) ) { 25917 $package = $1; 25918 25919 # patch: don't allow isolated package name which just ends 25920 # in the old style package separator (single quote). Example: 25921 # use CGI':all'; 25922 if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) { 25923 $pos--; 25924 } 25925 25926 $package =~ s/\'/::/g; 25927 if ( $package =~ /^\:/ ) { $package = 'main' . $package } 25928 $package =~ s/::$//; 25929 } 25930 else { 25931 $package = $current_package; 25932 25933 if ( $is_keyword{$tok} ) { 25934 $type = 'k'; 25935 } 25936 } 25937 25938 # if it is a bareword.. 25939 if ( $type eq 'w' ) { 25940 25941 # check for v-string with leading 'v' type character 25942 # (This seems to have presidence over filehandle, type 'Y') 25943 if ( $tok =~ /^v\d[_\d]*$/ ) { 25944 25945 # we only have the first part - something like 'v101' - 25946 # look for more 25947 if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) { 25948 $pos = pos($input_line); 25949 $numc = $pos - $pos_beg; 25950 $tok = substr( $input_line, $pos_beg, $numc ); 25951 } 25952 $type = 'v'; 25953 25954 # warn if this version can't handle v-strings 25955 report_v_string($tok); 25956 } 25957 25958 elsif ( $is_constant{$package}{$sub_name} ) { 25959 $type = 'C'; 25960 } 25961 25962 # bareword after sort has implied empty prototype; for example: 25963 # @sorted = sort numerically ( 53, 29, 11, 32, 7 ); 25964 # This has priority over whatever the user has specified. 25965 elsif ($last_nonblank_token eq 'sort' 25966 && $last_nonblank_type eq 'k' ) 25967 { 25968 $type = 'Z'; 25969 } 25970 25971 # Note: strangely, perl does not seem to really let you create 25972 # functions which act like eval and do, in the sense that eval 25973 # and do may have operators following the final }, but any operators 25974 # that you create with prototype (&) apparently do not allow 25975 # trailing operators, only terms. This seems strange. 25976 # If this ever changes, here is the update 25977 # to make perltidy behave accordingly: 25978 25979 # elsif ( $is_block_function{$package}{$tok} ) { 25980 # $tok='eval'; # patch to do braces like eval - doesn't work 25981 # $type = 'k'; 25982 #} 25983 # FIXME: This could become a separate type to allow for different 25984 # future behavior: 25985 elsif ( $is_block_function{$package}{$sub_name} ) { 25986 $type = 'G'; 25987 } 25988 25989 elsif ( $is_block_list_function{$package}{$sub_name} ) { 25990 $type = 'G'; 25991 } 25992 elsif ( $is_user_function{$package}{$sub_name} ) { 25993 $type = 'U'; 25994 $prototype = $user_function_prototype{$package}{$sub_name}; 25995 } 25996 25997 # check for indirect object 25998 elsif ( 25999 26000 # added 2001-03-27: must not be followed immediately by '(' 26001 # see fhandle.t 26002 ( $input_line !~ m/\G\(/gc ) 26003 26004 # and 26005 && ( 26006 26007 # preceded by keyword like 'print', 'printf' and friends 26008 $is_indirect_object_taker{$last_nonblank_token} 26009 26010 # or preceded by something like 'print(' or 'printf(' 26011 || ( 26012 ( $last_nonblank_token eq '(' ) 26013 && $is_indirect_object_taker{ $paren_type[$paren_depth] 26014 } 26015 26016 ) 26017 ) 26018 ) 26019 { 26020 26021 # may not be indirect object unless followed by a space 26022 if ( $input_line =~ m/\G\s+/gc ) { 26023 $type = 'Y'; 26024 26025 # Abandon Hope ... 26026 # Perl's indirect object notation is a very bad 26027 # thing and can cause subtle bugs, especially for 26028 # beginning programmers. And I haven't even been 26029 # able to figure out a sane warning scheme which 26030 # doesn't get in the way of good scripts. 26031 26032 # Complain if a filehandle has any lower case 26033 # letters. This is suggested good practice. 26034 # Use 'sub_name' because something like 26035 # main::MYHANDLE is ok for filehandle 26036 if ( $sub_name =~ /[a-z]/ ) { 26037 26038 # could be bug caused by older perltidy if 26039 # followed by '(' 26040 if ( $input_line =~ m/\G\s*\(/gc ) { 26041 complain( 26042"Caution: unknown word '$tok' in indirect object slot\n" 26043 ); 26044 } 26045 } 26046 } 26047 26048 # bareword not followed by a space -- may not be filehandle 26049 # (may be function call defined in a 'use' statement) 26050 else { 26051 $type = 'Z'; 26052 } 26053 } 26054 } 26055 26056 # Now we must convert back from character position 26057 # to pre_token index. 26058 # I don't think an error flag can occur here ..but who knows 26059 my $error; 26060 ( $i, $error ) = 26061 inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); 26062 if ($error) { 26063 warning("scan_bare_identifier: Possibly invalid tokenization\n"); 26064 } 26065 } 26066 26067 # no match but line not blank - could be syntax error 26068 # perl will take '::' alone without complaint 26069 else { 26070 $type = 'w'; 26071 26072 # change this warning to log message if it becomes annoying 26073 warning("didn't find identifier after leading ::\n"); 26074 } 26075 return ( $i, $tok, $type, $prototype ); 26076} 26077 26078sub scan_id_do { 26079 26080# This is the new scanner and will eventually replace scan_identifier. 26081# Only type 'sub' and 'package' are implemented. 26082# Token types $ * % @ & -> are not yet implemented. 26083# 26084# Scan identifier following a type token. 26085# The type of call depends on $id_scan_state: $id_scan_state = '' 26086# for starting call, in which case $tok must be the token defining 26087# the type. 26088# 26089# If the type token is the last nonblank token on the line, a value 26090# of $id_scan_state = $tok is returned, indicating that further 26091# calls must be made to get the identifier. If the type token is 26092# not the last nonblank token on the line, the identifier is 26093# scanned and handled and a value of '' is returned. 26094# USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, $in_attribute_list, 26095# $statement_type, $tokenizer_self 26096 26097 my ( $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state, 26098 $max_token_index ) 26099 = @_; 26100 my $type = ''; 26101 my ( $i_beg, $pos_beg ); 26102 26103 #print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n"; 26104 #my ($a,$b,$c) = caller; 26105 #print "NSCAN: scan_id called with tok=$tok $a $b $c\n"; 26106 26107 # on re-entry, start scanning at first token on the line 26108 if ($id_scan_state) { 26109 $i_beg = $i; 26110 $type = ''; 26111 } 26112 26113 # on initial entry, start scanning just after type token 26114 else { 26115 $i_beg = $i + 1; 26116 $id_scan_state = $tok; 26117 $type = 't'; 26118 } 26119 26120 # find $i_beg = index of next nonblank token, 26121 # and handle empty lines 26122 my $blank_line = 0; 26123 my $next_nonblank_token = $$rtokens[$i_beg]; 26124 if ( $i_beg > $max_token_index ) { 26125 $blank_line = 1; 26126 } 26127 else { 26128 26129 # only a '#' immediately after a '$' is not a comment 26130 if ( $next_nonblank_token eq '#' ) { 26131 unless ( $tok eq '$' ) { 26132 $blank_line = 1; 26133 } 26134 } 26135 26136 if ( $next_nonblank_token =~ /^\s/ ) { 26137 ( $next_nonblank_token, $i_beg ) = 26138 find_next_nonblank_token_on_this_line( $i_beg, $rtokens, 26139 $max_token_index ); 26140 if ( $next_nonblank_token =~ /(^#|^\s*$)/ ) { 26141 $blank_line = 1; 26142 } 26143 } 26144 } 26145 26146 # handle non-blank line; identifier, if any, must follow 26147 unless ($blank_line) { 26148 26149 if ( $id_scan_state eq 'sub' ) { 26150 ( $i, $tok, $type, $id_scan_state ) = do_scan_sub( 26151 $input_line, $i, $i_beg, 26152 $tok, $type, $rtokens, 26153 $rtoken_map, $id_scan_state, $max_token_index 26154 ); 26155 } 26156 26157 elsif ( $id_scan_state eq 'package' ) { 26158 ( $i, $tok, $type ) = 26159 do_scan_package( $input_line, $i, $i_beg, $tok, $type, $rtokens, 26160 $rtoken_map, $max_token_index ); 26161 $id_scan_state = ''; 26162 } 26163 26164 else { 26165 warning("invalid token in scan_id: $tok\n"); 26166 $id_scan_state = ''; 26167 } 26168 } 26169 26170 if ( $id_scan_state && ( !defined($type) || !$type ) ) { 26171 26172 # shouldn't happen: 26173 warning( 26174"Program bug in scan_id: undefined type but scan_state=$id_scan_state\n" 26175 ); 26176 report_definite_bug(); 26177 } 26178 26179 TOKENIZER_DEBUG_FLAG_NSCAN && do { 26180 print 26181 "NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n"; 26182 }; 26183 return ( $i, $tok, $type, $id_scan_state ); 26184} 26185 26186sub check_prototype { 26187 my ( $proto, $package, $subname ) = @_; 26188 return unless ( defined($package) && defined($subname) ); 26189 if ( defined($proto) ) { 26190 $proto =~ s/^\s*\(\s*//; 26191 $proto =~ s/\s*\)$//; 26192 if ($proto) { 26193 $is_user_function{$package}{$subname} = 1; 26194 $user_function_prototype{$package}{$subname} = "($proto)"; 26195 26196 # prototypes containing '&' must be treated specially.. 26197 if ( $proto =~ /\&/ ) { 26198 26199 # right curly braces of prototypes ending in 26200 # '&' may be followed by an operator 26201 if ( $proto =~ /\&$/ ) { 26202 $is_block_function{$package}{$subname} = 1; 26203 } 26204 26205 # right curly braces of prototypes NOT ending in 26206 # '&' may NOT be followed by an operator 26207 elsif ( $proto !~ /\&$/ ) { 26208 $is_block_list_function{$package}{$subname} = 1; 26209 } 26210 } 26211 } 26212 else { 26213 $is_constant{$package}{$subname} = 1; 26214 } 26215 } 26216 else { 26217 $is_user_function{$package}{$subname} = 1; 26218 } 26219} 26220 26221sub do_scan_package { 26222 26223 # do_scan_package parses a package name 26224 # it is called with $i_beg equal to the index of the first nonblank 26225 # token following a 'package' token. 26226 # USES GLOBAL VARIABLES: $current_package, 26227 26228 my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map, 26229 $max_token_index ) 26230 = @_; 26231 my $package = undef; 26232 my $pos_beg = $$rtoken_map[$i_beg]; 26233 pos($input_line) = $pos_beg; 26234 26235 # handle non-blank line; package name, if any, must follow 26236 if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w+)/gc ) { 26237 $package = $1; 26238 $package = ( defined($1) && $1 ) ? $1 : 'main'; 26239 $package =~ s/\'/::/g; 26240 if ( $package =~ /^\:/ ) { $package = 'main' . $package } 26241 $package =~ s/::$//; 26242 my $pos = pos($input_line); 26243 my $numc = $pos - $pos_beg; 26244 $tok = 'package ' . substr( $input_line, $pos_beg, $numc ); 26245 $type = 'i'; 26246 26247 # Now we must convert back from character position 26248 # to pre_token index. 26249 # I don't think an error flag can occur here ..but ? 26250 my $error; 26251 ( $i, $error ) = 26252 inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); 26253 if ($error) { warning("Possibly invalid package\n") } 26254 $current_package = $package; 26255 26256 # check for error 26257 my ( $next_nonblank_token, $i_next ) = 26258 find_next_nonblank_token( $i, $rtokens, $max_token_index ); 26259 if ( $next_nonblank_token !~ /^[;\}]$/ ) { 26260 warning( 26261 "Unexpected '$next_nonblank_token' after package name '$tok'\n" 26262 ); 26263 } 26264 } 26265 26266 # no match but line not blank -- 26267 # could be a label with name package, like package: , for example. 26268 else { 26269 $type = 'k'; 26270 } 26271 26272 return ( $i, $tok, $type ); 26273} 26274 26275sub scan_identifier_do { 26276 26277 # This routine assembles tokens into identifiers. It maintains a 26278 # scan state, id_scan_state. It updates id_scan_state based upon 26279 # current id_scan_state and token, and returns an updated 26280 # id_scan_state and the next index after the identifier. 26281 # USES GLOBAL VARIABLES: $context, $last_nonblank_token, 26282 # $last_nonblank_type 26283 26284 my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index, 26285 $expecting ) 26286 = @_; 26287 my $i_begin = $i; 26288 my $type = ''; 26289 my $tok_begin = $$rtokens[$i_begin]; 26290 if ( $tok_begin eq ':' ) { $tok_begin = '::' } 26291 my $id_scan_state_begin = $id_scan_state; 26292 my $identifier_begin = $identifier; 26293 my $tok = $tok_begin; 26294 my $message = ""; 26295 26296 # these flags will be used to help figure out the type: 26297 my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ ); 26298 my $saw_type; 26299 26300 # allow old package separator (') except in 'use' statement 26301 my $allow_tick = ( $last_nonblank_token ne 'use' ); 26302 26303 # get started by defining a type and a state if necessary 26304 unless ($id_scan_state) { 26305 $context = UNKNOWN_CONTEXT; 26306 26307 # fixup for digraph 26308 if ( $tok eq '>' ) { 26309 $tok = '->'; 26310 $tok_begin = $tok; 26311 } 26312 $identifier = $tok; 26313 26314 if ( $tok eq '$' || $tok eq '*' ) { 26315 $id_scan_state = '$'; 26316 $context = SCALAR_CONTEXT; 26317 } 26318 elsif ( $tok eq '%' || $tok eq '@' ) { 26319 $id_scan_state = '$'; 26320 $context = LIST_CONTEXT; 26321 } 26322 elsif ( $tok eq '&' ) { 26323 $id_scan_state = '&'; 26324 } 26325 elsif ( $tok eq 'sub' or $tok eq 'package' ) { 26326 $saw_alpha = 0; # 'sub' is considered type info here 26327 $id_scan_state = '$'; 26328 $identifier .= ' '; # need a space to separate sub from sub name 26329 } 26330 elsif ( $tok eq '::' ) { 26331 $id_scan_state = 'A'; 26332 } 26333 elsif ( $tok =~ /^[A-Za-z_]/ ) { 26334 $id_scan_state = ':'; 26335 } 26336 elsif ( $tok eq '->' ) { 26337 $id_scan_state = '$'; 26338 } 26339 else { 26340 26341 # shouldn't happen 26342 my ( $a, $b, $c ) = caller; 26343 warning("Program Bug: scan_identifier given bad token = $tok \n"); 26344 warning(" called from sub $a line: $c\n"); 26345 report_definite_bug(); 26346 } 26347 $saw_type = !$saw_alpha; 26348 } 26349 else { 26350 $i--; 26351 $saw_type = ( $tok =~ /([\$\%\@\*\&])/ ); 26352 } 26353 26354 # now loop to gather the identifier 26355 my $i_save = $i; 26356 26357 while ( $i < $max_token_index ) { 26358 $i_save = $i unless ( $tok =~ /^\s*$/ ); 26359 $tok = $$rtokens[ ++$i ]; 26360 26361 if ( ( $tok eq ':' ) && ( $$rtokens[ $i + 1 ] eq ':' ) ) { 26362 $tok = '::'; 26363 $i++; 26364 } 26365 26366 if ( $id_scan_state eq '$' ) { # starting variable name 26367 26368 if ( $tok eq '$' ) { 26369 26370 $identifier .= $tok; 26371 26372 # we've got a punctuation variable if end of line (punct.t) 26373 if ( $i == $max_token_index ) { 26374 $type = 'i'; 26375 $id_scan_state = ''; 26376 last; 26377 } 26378 } 26379 elsif ( $tok =~ /^[A-Za-z_]/ ) { # alphanumeric .. 26380 $saw_alpha = 1; 26381 $id_scan_state = ':'; # now need :: 26382 $identifier .= $tok; 26383 } 26384 elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric .. 26385 $saw_alpha = 1; 26386 $id_scan_state = ':'; # now need :: 26387 $identifier .= $tok; 26388 26389 # Perl will accept leading digits in identifiers, 26390 # although they may not always produce useful results. 26391 # Something like $main::0 is ok. But this also works: 26392 # 26393 # sub howdy::123::bubba{ print "bubba $54321!\n" } 26394 # howdy::123::bubba(); 26395 # 26396 } 26397 elsif ( $tok =~ /^[0-9]/ ) { # numeric 26398 $saw_alpha = 1; 26399 $id_scan_state = ':'; # now need :: 26400 $identifier .= $tok; 26401 } 26402 elsif ( $tok eq '::' ) { 26403 $id_scan_state = 'A'; 26404 $identifier .= $tok; 26405 } 26406 elsif ( ( $tok eq '#' ) && ( $identifier eq '$' ) ) { # $#array 26407 $identifier .= $tok; # keep same state, a $ could follow 26408 } 26409 elsif ( $tok eq '{' ) { 26410 26411 # check for something like ${#} or ${�} 26412 if ( $identifier eq '$' 26413 && $i + 2 <= $max_token_index 26414 && $$rtokens[ $i + 2 ] eq '}' 26415 && $$rtokens[ $i + 1 ] !~ /[\s\w]/ ) 26416 { 26417 my $next2 = $$rtokens[ $i + 2 ]; 26418 my $next1 = $$rtokens[ $i + 1 ]; 26419 $identifier .= $tok . $next1 . $next2; 26420 $i += 2; 26421 $id_scan_state = ''; 26422 last; 26423 } 26424 26425 # skip something like ${xxx} or ->{ 26426 $id_scan_state = ''; 26427 26428 # if this is the first token of a line, any tokens for this 26429 # identifier have already been accumulated 26430 if ( $identifier eq '$' || $i == 0 ) { $identifier = ''; } 26431 $i = $i_save; 26432 last; 26433 } 26434 26435 # space ok after leading $ % * & @ 26436 elsif ( $tok =~ /^\s*$/ ) { 26437 26438 if ( $identifier =~ /^[\$\%\*\&\@]/ ) { 26439 26440 if ( length($identifier) > 1 ) { 26441 $id_scan_state = ''; 26442 $i = $i_save; 26443 $type = 'i'; # probably punctuation variable 26444 last; 26445 } 26446 else { 26447 26448 # spaces after $'s are common, and space after @ 26449 # is harmless, so only complain about space 26450 # after other type characters. Space after $ and 26451 # @ will be removed in formatting. Report space 26452 # after % and * because they might indicate a 26453 # parsing error. In other words '% ' might be a 26454 # modulo operator. Delete this warning if it 26455 # gets annoying. 26456 if ( $identifier !~ /^[\@\$]$/ ) { 26457 $message = 26458 "Space in identifier, following $identifier\n"; 26459 } 26460 } 26461 } 26462 26463 # else: 26464 # space after '->' is ok 26465 } 26466 elsif ( $tok eq '^' ) { 26467 26468 # check for some special variables like $^W 26469 if ( $identifier =~ /^[\$\*\@\%]$/ ) { 26470 $identifier .= $tok; 26471 $id_scan_state = 'A'; 26472 26473 # Perl accepts '$^]' or '@^]', but 26474 # there must not be a space before the ']'. 26475 my $next1 = $$rtokens[ $i + 1 ]; 26476 if ( $next1 eq ']' ) { 26477 $i++; 26478 $identifier .= $next1; 26479 $id_scan_state = ""; 26480 last; 26481 } 26482 } 26483 else { 26484 $id_scan_state = ''; 26485 } 26486 } 26487 else { # something else 26488 26489 # check for various punctuation variables 26490 if ( $identifier =~ /^[\$\*\@\%]$/ ) { 26491 $identifier .= $tok; 26492 } 26493 26494 elsif ( $identifier eq '$#' ) { 26495 26496 if ( $tok eq '{' ) { $type = 'i'; $i = $i_save } 26497 26498 # perl seems to allow just these: $#: $#- $#+ 26499 elsif ( $tok =~ /^[\:\-\+]$/ ) { 26500 $type = 'i'; 26501 $identifier .= $tok; 26502 } 26503 else { 26504 $i = $i_save; 26505 write_logfile_entry( 'Use of $# is deprecated' . "\n" ); 26506 } 26507 } 26508 elsif ( $identifier eq '$$' ) { 26509 26510 # perl does not allow references to punctuation 26511 # variables without braces. For example, this 26512 # won't work: 26513 # $:=\4; 26514 # $a = $$:; 26515 # You would have to use 26516 # $a = ${$:}; 26517 26518 $i = $i_save; 26519 if ( $tok eq '{' ) { $type = 't' } 26520 else { $type = 'i' } 26521 } 26522 elsif ( $identifier eq '->' ) { 26523 $i = $i_save; 26524 } 26525 else { 26526 $i = $i_save; 26527 if ( length($identifier) == 1 ) { $identifier = ''; } 26528 } 26529 $id_scan_state = ''; 26530 last; 26531 } 26532 } 26533 elsif ( $id_scan_state eq '&' ) { # starting sub call? 26534 26535 if ( $tok =~ /^[\$A-Za-z_]/ ) { # alphanumeric .. 26536 $id_scan_state = ':'; # now need :: 26537 $saw_alpha = 1; 26538 $identifier .= $tok; 26539 } 26540 elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric .. 26541 $id_scan_state = ':'; # now need :: 26542 $saw_alpha = 1; 26543 $identifier .= $tok; 26544 } 26545 elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above 26546 $id_scan_state = ':'; # now need :: 26547 $saw_alpha = 1; 26548 $identifier .= $tok; 26549 } 26550 elsif ( $tok =~ /^\s*$/ ) { # allow space 26551 } 26552 elsif ( $tok eq '::' ) { # leading :: 26553 $id_scan_state = 'A'; # accept alpha next 26554 $identifier .= $tok; 26555 } 26556 elsif ( $tok eq '{' ) { 26557 if ( $identifier eq '&' || $i == 0 ) { $identifier = ''; } 26558 $i = $i_save; 26559 $id_scan_state = ''; 26560 last; 26561 } 26562 else { 26563 26564 # punctuation variable? 26565 # testfile: cunningham4.pl 26566 # 26567 # We have to be careful here. If we are in an unknown state, 26568 # we will reject the punctuation variable. In the following 26569 # example the '&' is a binary opeator but we are in an unknown 26570 # state because there is no sigil on 'Prima', so we don't 26571 # know what it is. But it is a bad guess that 26572 # '&~' is a punction variable. 26573 # $self->{text}->{colorMap}->[ 26574 # Prima::PodView::COLOR_CODE_FOREGROUND 26575 # & ~tb::COLOR_INDEX ] = 26576 # $sec->{ColorCode} 26577 if ( $identifier eq '&' && $expecting ) { 26578 $identifier .= $tok; 26579 } 26580 else { 26581 $identifier = ''; 26582 $i = $i_save; 26583 $type = '&'; 26584 } 26585 $id_scan_state = ''; 26586 last; 26587 } 26588 } 26589 elsif ( $id_scan_state eq 'A' ) { # looking for alpha (after ::) 26590 26591 if ( $tok =~ /^[A-Za-z_]/ ) { # found it 26592 $identifier .= $tok; 26593 $id_scan_state = ':'; # now need :: 26594 $saw_alpha = 1; 26595 } 26596 elsif ( $tok eq "'" && $allow_tick ) { 26597 $identifier .= $tok; 26598 $id_scan_state = ':'; # now need :: 26599 $saw_alpha = 1; 26600 } 26601 elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above 26602 $identifier .= $tok; 26603 $id_scan_state = ':'; # now need :: 26604 $saw_alpha = 1; 26605 } 26606 elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) { 26607 $id_scan_state = '('; 26608 $identifier .= $tok; 26609 } 26610 elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) { 26611 $id_scan_state = ')'; 26612 $identifier .= $tok; 26613 } 26614 else { 26615 $id_scan_state = ''; 26616 $i = $i_save; 26617 last; 26618 } 26619 } 26620 elsif ( $id_scan_state eq ':' ) { # looking for :: after alpha 26621 26622 if ( $tok eq '::' ) { # got it 26623 $identifier .= $tok; 26624 $id_scan_state = 'A'; # now require alpha 26625 } 26626 elsif ( $tok =~ /^[A-Za-z_]/ ) { # more alphanumeric is ok here 26627 $identifier .= $tok; 26628 $id_scan_state = ':'; # now need :: 26629 $saw_alpha = 1; 26630 } 26631 elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above 26632 $identifier .= $tok; 26633 $id_scan_state = ':'; # now need :: 26634 $saw_alpha = 1; 26635 } 26636 elsif ( $tok eq "'" && $allow_tick ) { # tick 26637 26638 if ( $is_keyword{$identifier} ) { 26639 $id_scan_state = ''; # that's all 26640 $i = $i_save; 26641 } 26642 else { 26643 $identifier .= $tok; 26644 } 26645 } 26646 elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) { 26647 $id_scan_state = '('; 26648 $identifier .= $tok; 26649 } 26650 elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) { 26651 $id_scan_state = ')'; 26652 $identifier .= $tok; 26653 } 26654 else { 26655 $id_scan_state = ''; # that's all 26656 $i = $i_save; 26657 last; 26658 } 26659 } 26660 elsif ( $id_scan_state eq '(' ) { # looking for ( of prototype 26661 26662 if ( $tok eq '(' ) { # got it 26663 $identifier .= $tok; 26664 $id_scan_state = ')'; # now find the end of it 26665 } 26666 elsif ( $tok =~ /^\s*$/ ) { # blank - keep going 26667 $identifier .= $tok; 26668 } 26669 else { 26670 $id_scan_state = ''; # that's all - no prototype 26671 $i = $i_save; 26672 last; 26673 } 26674 } 26675 elsif ( $id_scan_state eq ')' ) { # looking for ) to end 26676 26677 if ( $tok eq ')' ) { # got it 26678 $identifier .= $tok; 26679 $id_scan_state = ''; # all done 26680 last; 26681 } 26682 elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) { 26683 $identifier .= $tok; 26684 } 26685 else { # probable error in script, but keep going 26686 warning("Unexpected '$tok' while seeking end of prototype\n"); 26687 $identifier .= $tok; 26688 } 26689 } 26690 else { # can get here due to error in initialization 26691 $id_scan_state = ''; 26692 $i = $i_save; 26693 last; 26694 } 26695 } 26696 26697 if ( $id_scan_state eq ')' ) { 26698 warning("Hit end of line while seeking ) to end prototype\n"); 26699 } 26700 26701 # once we enter the actual identifier, it may not extend beyond 26702 # the end of the current line 26703 if ( $id_scan_state =~ /^[A\:\(\)]/ ) { 26704 $id_scan_state = ''; 26705 } 26706 if ( $i < 0 ) { $i = 0 } 26707 26708 unless ($type) { 26709 26710 if ($saw_type) { 26711 26712 if ($saw_alpha) { 26713 if ( $identifier =~ /^->/ && $last_nonblank_type eq 'w' ) { 26714 $type = 'w'; 26715 } 26716 else { $type = 'i' } 26717 } 26718 elsif ( $identifier eq '->' ) { 26719 $type = '->'; 26720 } 26721 elsif ( 26722 ( length($identifier) > 1 ) 26723 26724 # In something like '@$=' we have an identifier '@$' 26725 # In something like '$${' we have type '$$' (and only 26726 # part of an identifier) 26727 && !( $identifier =~ /\$$/ && $tok eq '{' ) 26728 && ( $identifier !~ /^(sub |package )$/ ) 26729 ) 26730 { 26731 $type = 'i'; 26732 } 26733 else { $type = 't' } 26734 } 26735 elsif ($saw_alpha) { 26736 26737 # type 'w' includes anything without leading type info 26738 # ($,%,@,*) including something like abc::def::ghi 26739 $type = 'w'; 26740 } 26741 else { 26742 $type = ''; 26743 } # this can happen on a restart 26744 } 26745 26746 if ($identifier) { 26747 $tok = $identifier; 26748 if ($message) { write_logfile_entry($message) } 26749 } 26750 else { 26751 $tok = $tok_begin; 26752 $i = $i_begin; 26753 } 26754 26755 TOKENIZER_DEBUG_FLAG_SCAN_ID && do { 26756 my ( $a, $b, $c ) = caller; 26757 print 26758"SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n"; 26759 print 26760"SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n"; 26761 }; 26762 return ( $i, $tok, $type, $id_scan_state, $identifier ); 26763} 26764 26765{ 26766 26767 # saved package and subnames in case prototype is on separate line 26768 my ( $package_saved, $subname_saved ); 26769 26770 sub do_scan_sub { 26771 26772 # do_scan_sub parses a sub name and prototype 26773 # it is called with $i_beg equal to the index of the first nonblank 26774 # token following a 'sub' token. 26775 26776 # TODO: add future error checks to be sure we have a valid 26777 # sub name. For example, 'sub &doit' is wrong. Also, be sure 26778 # a name is given if and only if a non-anonymous sub is 26779 # appropriate. 26780 # USES GLOBAL VARS: $current_package, $last_nonblank_token, 26781 # $in_attribute_list, %saw_function_definition, 26782 # $statement_type 26783 26784 my ( 26785 $input_line, $i, $i_beg, 26786 $tok, $type, $rtokens, 26787 $rtoken_map, $id_scan_state, $max_token_index 26788 ) = @_; 26789 $id_scan_state = ""; # normally we get everything in one call 26790 my $subname = undef; 26791 my $package = undef; 26792 my $proto = undef; 26793 my $attrs = undef; 26794 my $match; 26795 26796 my $pos_beg = $$rtoken_map[$i_beg]; 26797 pos($input_line) = $pos_beg; 26798 26799 # sub NAME PROTO ATTRS 26800 if ( 26801 $input_line =~ m/\G\s* 26802 ((?:\w*(?:'|::))*) # package - something that ends in :: or ' 26803 (\w+) # NAME - required 26804 (\s*\([^){]*\))? # PROTO - something in parens 26805 (\s*:)? # ATTRS - leading : of attribute list 26806 /gcx 26807 ) 26808 { 26809 $match = 1; 26810 $subname = $2; 26811 $proto = $3; 26812 $attrs = $4; 26813 26814 $package = ( defined($1) && $1 ) ? $1 : $current_package; 26815 $package =~ s/\'/::/g; 26816 if ( $package =~ /^\:/ ) { $package = 'main' . $package } 26817 $package =~ s/::$//; 26818 my $pos = pos($input_line); 26819 my $numc = $pos - $pos_beg; 26820 $tok = 'sub ' . substr( $input_line, $pos_beg, $numc ); 26821 $type = 'i'; 26822 } 26823 26824 # Look for prototype/attributes not preceded on this line by subname; 26825 # This might be an anonymous sub with attributes, 26826 # or a prototype on a separate line from its sub name 26827 elsif ( 26828 $input_line =~ m/\G(\s*\([^){]*\))? # PROTO 26829 (\s*:)? # ATTRS leading ':' 26830 /gcx 26831 && ( $1 || $2 ) 26832 ) 26833 { 26834 $match = 1; 26835 $proto = $1; 26836 $attrs = $2; 26837 26838 # Handle prototype on separate line from subname 26839 if ($subname_saved) { 26840 $package = $package_saved; 26841 $subname = $subname_saved; 26842 $tok = $last_nonblank_token; 26843 } 26844 $type = 'i'; 26845 } 26846 26847 if ($match) { 26848 26849 # ATTRS: if there are attributes, back up and let the ':' be 26850 # found later by the scanner. 26851 my $pos = pos($input_line); 26852 if ($attrs) { 26853 $pos -= length($attrs); 26854 } 26855 26856 my $next_nonblank_token = $tok; 26857 26858 # catch case of line with leading ATTR ':' after anonymous sub 26859 if ( $pos == $pos_beg && $tok eq ':' ) { 26860 $type = 'A'; 26861 $in_attribute_list = 1; 26862 } 26863 26864 # We must convert back from character position 26865 # to pre_token index. 26866 else { 26867 26868 # I don't think an error flag can occur here ..but ? 26869 my $error; 26870 ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map, 26871 $max_token_index ); 26872 if ($error) { warning("Possibly invalid sub\n") } 26873 26874 # check for multiple definitions of a sub 26875 ( $next_nonblank_token, my $i_next ) = 26876 find_next_nonblank_token_on_this_line( $i, $rtokens, 26877 $max_token_index ); 26878 } 26879 26880 if ( $next_nonblank_token =~ /^(\s*|#)$/ ) 26881 { # skip blank or side comment 26882 my ( $rpre_tokens, $rpre_types ) = 26883 peek_ahead_for_n_nonblank_pre_tokens(1); 26884 if ( defined($rpre_tokens) && @$rpre_tokens ) { 26885 $next_nonblank_token = $rpre_tokens->[0]; 26886 } 26887 else { 26888 $next_nonblank_token = '}'; 26889 } 26890 } 26891 $package_saved = ""; 26892 $subname_saved = ""; 26893 if ( $next_nonblank_token eq '{' ) { 26894 if ($subname) { 26895 26896 # Check for multiple definitions of a sub, but 26897 # it is ok to have multiple sub BEGIN, etc, 26898 # so we do not complain if name is all caps 26899 if ( $saw_function_definition{$package}{$subname} 26900 && $subname !~ /^[A-Z]+$/ ) 26901 { 26902 my $lno = $saw_function_definition{$package}{$subname}; 26903 warning( 26904"already saw definition of 'sub $subname' in package '$package' at line $lno\n" 26905 ); 26906 } 26907 $saw_function_definition{$package}{$subname} = 26908 $tokenizer_self->{_last_line_number}; 26909 } 26910 } 26911 elsif ( $next_nonblank_token eq ';' ) { 26912 } 26913 elsif ( $next_nonblank_token eq '}' ) { 26914 } 26915 26916 # ATTRS - if an attribute list follows, remember the name 26917 # of the sub so the next opening brace can be labeled. 26918 # Setting 'statement_type' causes any ':'s to introduce 26919 # attributes. 26920 elsif ( $next_nonblank_token eq ':' ) { 26921 $statement_type = $tok; 26922 } 26923 26924 # see if PROTO follows on another line: 26925 elsif ( $next_nonblank_token eq '(' ) { 26926 if ( $attrs || $proto ) { 26927 warning( 26928"unexpected '(' after definition or declaration of sub '$subname'\n" 26929 ); 26930 } 26931 else { 26932 $id_scan_state = 'sub'; # we must come back to get proto 26933 $statement_type = $tok; 26934 $package_saved = $package; 26935 $subname_saved = $subname; 26936 } 26937 } 26938 elsif ($next_nonblank_token) { # EOF technically ok 26939 warning( 26940"expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n" 26941 ); 26942 } 26943 check_prototype( $proto, $package, $subname ); 26944 } 26945 26946 # no match but line not blank 26947 else { 26948 } 26949 return ( $i, $tok, $type, $id_scan_state ); 26950 } 26951} 26952 26953#########i############################################################### 26954# Tokenizer utility routines which may use CONSTANTS but no other GLOBALS 26955######################################################################### 26956 26957sub find_next_nonblank_token { 26958 my ( $i, $rtokens, $max_token_index ) = @_; 26959 26960 if ( $i >= $max_token_index ) { 26961 if ( !peeked_ahead() ) { 26962 peeked_ahead(1); 26963 $rtokens = 26964 peek_ahead_for_nonblank_token( $rtokens, $max_token_index ); 26965 } 26966 } 26967 my $next_nonblank_token = $$rtokens[ ++$i ]; 26968 26969 if ( $next_nonblank_token =~ /^\s*$/ ) { 26970 $next_nonblank_token = $$rtokens[ ++$i ]; 26971 } 26972 return ( $next_nonblank_token, $i ); 26973} 26974 26975sub numerator_expected { 26976 26977 # this is a filter for a possible numerator, in support of guessing 26978 # for the / pattern delimiter token. 26979 # returns - 26980 # 1 - yes 26981 # 0 - can't tell 26982 # -1 - no 26983 # Note: I am using the convention that variables ending in 26984 # _expected have these 3 possible values. 26985 my ( $i, $rtokens, $max_token_index ) = @_; 26986 my $next_token = $$rtokens[ $i + 1 ]; 26987 if ( $next_token eq '=' ) { $i++; } # handle /= 26988 my ( $next_nonblank_token, $i_next ) = 26989 find_next_nonblank_token( $i, $rtokens, $max_token_index ); 26990 26991 if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) { 26992 1; 26993 } 26994 else { 26995 26996 if ( $next_nonblank_token =~ /^\s*$/ ) { 26997 0; 26998 } 26999 else { 27000 -1; 27001 } 27002 } 27003} 27004 27005sub pattern_expected { 27006 27007 # This is the start of a filter for a possible pattern. 27008 # It looks at the token after a possbible pattern and tries to 27009 # determine if that token could end a pattern. 27010 # returns - 27011 # 1 - yes 27012 # 0 - can't tell 27013 # -1 - no 27014 my ( $i, $rtokens, $max_token_index ) = @_; 27015 my $next_token = $$rtokens[ $i + 1 ]; 27016 if ( $next_token =~ /^[cgimosxp]/ ) { $i++; } # skip possible modifier 27017 my ( $next_nonblank_token, $i_next ) = 27018 find_next_nonblank_token( $i, $rtokens, $max_token_index ); 27019 27020 # list of tokens which may follow a pattern 27021 # (can probably be expanded) 27022 if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ ) 27023 { 27024 1; 27025 } 27026 else { 27027 27028 if ( $next_nonblank_token =~ /^\s*$/ ) { 27029 0; 27030 } 27031 else { 27032 -1; 27033 } 27034 } 27035} 27036 27037sub find_next_nonblank_token_on_this_line { 27038 my ( $i, $rtokens, $max_token_index ) = @_; 27039 my $next_nonblank_token; 27040 27041 if ( $i < $max_token_index ) { 27042 $next_nonblank_token = $$rtokens[ ++$i ]; 27043 27044 if ( $next_nonblank_token =~ /^\s*$/ ) { 27045 27046 if ( $i < $max_token_index ) { 27047 $next_nonblank_token = $$rtokens[ ++$i ]; 27048 } 27049 } 27050 } 27051 else { 27052 $next_nonblank_token = ""; 27053 } 27054 return ( $next_nonblank_token, $i ); 27055} 27056 27057sub find_angle_operator_termination { 27058 27059 # We are looking at a '<' and want to know if it is an angle operator. 27060 # We are to return: 27061 # $i = pretoken index of ending '>' if found, current $i otherwise 27062 # $type = 'Q' if found, '>' otherwise 27063 my ( $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index ) = @_; 27064 my $i = $i_beg; 27065 my $type = '<'; 27066 pos($input_line) = 1 + $$rtoken_map[$i]; 27067 27068 my $filter; 27069 27070 # we just have to find the next '>' if a term is expected 27071 if ( $expecting == TERM ) { $filter = '[\>]' } 27072 27073 # we have to guess if we don't know what is expected 27074 elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' } 27075 27076 # shouldn't happen - we shouldn't be here if operator is expected 27077 else { warning("Program Bug in find_angle_operator_termination\n") } 27078 27079 # To illustrate what we might be looking at, in case we are 27080 # guessing, here are some examples of valid angle operators 27081 # (or file globs): 27082 # <tmp_imp/*> 27083 # <FH> 27084 # <$fh> 27085 # <*.c *.h> 27086 # <_> 27087 # <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t) 27088 # <${PREFIX}*img*.$IMAGE_TYPE> 27089 # <img*.$IMAGE_TYPE> 27090 # <Timg*.$IMAGE_TYPE> 27091 # <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl> 27092 # 27093 # Here are some examples of lines which do not have angle operators: 27094 # return undef unless $self->[2]++ < $#{$self->[1]}; 27095 # < 2 || @$t > 27096 # 27097 # the following line from dlister.pl caused trouble: 27098 # print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n"; 27099 # 27100 # If the '<' starts an angle operator, it must end on this line and 27101 # it must not have certain characters like ';' and '=' in it. I use 27102 # this to limit the testing. This filter should be improved if 27103 # possible. 27104 27105 if ( $input_line =~ /($filter)/g ) { 27106 27107 if ( $1 eq '>' ) { 27108 27109 # We MAY have found an angle operator termination if we get 27110 # here, but we need to do more to be sure we haven't been 27111 # fooled. 27112 my $pos = pos($input_line); 27113 27114 my $pos_beg = $$rtoken_map[$i]; 27115 my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) ); 27116 27117 # Reject if the closing '>' follows a '-' as in: 27118 # if ( VERSION < 5.009 && $op-> name eq 'aassign' ) { } 27119 if ( $expecting eq UNKNOWN ) { 27120 my $check = substr( $input_line, $pos - 2, 1 ); 27121 if ( $check eq '-' ) { 27122 return ( $i, $type ); 27123 } 27124 } 27125 27126 ######################################debug##### 27127 #write_diagnostics( "ANGLE? :$str\n"); 27128 #print "ANGLE: found $1 at pos=$pos str=$str check=$check\n"; 27129 ######################################debug##### 27130 $type = 'Q'; 27131 my $error; 27132 ( $i, $error ) = 27133 inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); 27134 27135 # It may be possible that a quote ends midway in a pretoken. 27136 # If this happens, it may be necessary to split the pretoken. 27137 if ($error) { 27138 warning( 27139 "Possible tokinization error..please check this line\n"); 27140 report_possible_bug(); 27141 } 27142 27143 # Now let's see where we stand.... 27144 # OK if math op not possible 27145 if ( $expecting == TERM ) { 27146 } 27147 27148 # OK if there are no more than 2 pre-tokens inside 27149 # (not possible to write 2 token math between < and >) 27150 # This catches most common cases 27151 elsif ( $i <= $i_beg + 3 ) { 27152 write_diagnostics("ANGLE(1 or 2 tokens): $str\n"); 27153 } 27154 27155 # Not sure.. 27156 else { 27157 27158 # Let's try a Brace Test: any braces inside must balance 27159 my $br = 0; 27160 while ( $str =~ /\{/g ) { $br++ } 27161 while ( $str =~ /\}/g ) { $br-- } 27162 my $sb = 0; 27163 while ( $str =~ /\[/g ) { $sb++ } 27164 while ( $str =~ /\]/g ) { $sb-- } 27165 my $pr = 0; 27166 while ( $str =~ /\(/g ) { $pr++ } 27167 while ( $str =~ /\)/g ) { $pr-- } 27168 27169 # if braces do not balance - not angle operator 27170 if ( $br || $sb || $pr ) { 27171 $i = $i_beg; 27172 $type = '<'; 27173 write_diagnostics( 27174 "NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n"); 27175 } 27176 27177 # we should keep doing more checks here...to be continued 27178 # Tentatively accepting this as a valid angle operator. 27179 # There are lots more things that can be checked. 27180 else { 27181 write_diagnostics( 27182 "ANGLE-Guessing yes: $str expecting=$expecting\n"); 27183 write_logfile_entry("Guessing angle operator here: $str\n"); 27184 } 27185 } 27186 } 27187 27188 # didn't find ending > 27189 else { 27190 if ( $expecting == TERM ) { 27191 warning("No ending > for angle operator\n"); 27192 } 27193 } 27194 } 27195 return ( $i, $type ); 27196} 27197 27198sub scan_number_do { 27199 27200 # scan a number in any of the formats that Perl accepts 27201 # Underbars (_) are allowed in decimal numbers. 27202 # input parameters - 27203 # $input_line - the string to scan 27204 # $i - pre_token index to start scanning 27205 # $rtoken_map - reference to the pre_token map giving starting 27206 # character position in $input_line of token $i 27207 # output parameters - 27208 # $i - last pre_token index of the number just scanned 27209 # number - the number (characters); or undef if not a number 27210 27211 my ( $input_line, $i, $rtoken_map, $input_type, $max_token_index ) = @_; 27212 my $pos_beg = $$rtoken_map[$i]; 27213 my $pos; 27214 my $i_begin = $i; 27215 my $number = undef; 27216 my $type = $input_type; 27217 27218 my $first_char = substr( $input_line, $pos_beg, 1 ); 27219 27220 # Look for bad starting characters; Shouldn't happen.. 27221 if ( $first_char !~ /[\d\.\+\-Ee]/ ) { 27222 warning("Program bug - scan_number given character $first_char\n"); 27223 report_definite_bug(); 27224 return ( $i, $type, $number ); 27225 } 27226 27227 # handle v-string without leading 'v' character ('Two Dot' rule) 27228 # (vstring.t) 27229 # TODO: v-strings may contain underscores 27230 pos($input_line) = $pos_beg; 27231 if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) { 27232 $pos = pos($input_line); 27233 my $numc = $pos - $pos_beg; 27234 $number = substr( $input_line, $pos_beg, $numc ); 27235 $type = 'v'; 27236 report_v_string($number); 27237 } 27238 27239 # handle octal, hex, binary 27240 if ( !defined($number) ) { 27241 pos($input_line) = $pos_beg; 27242 if ( $input_line =~ /\G[+-]?0((x[0-9a-fA-F_]+)|([0-7_]+)|(b[01_]+))/g ) 27243 { 27244 $pos = pos($input_line); 27245 my $numc = $pos - $pos_beg; 27246 $number = substr( $input_line, $pos_beg, $numc ); 27247 $type = 'n'; 27248 } 27249 } 27250 27251 # handle decimal 27252 if ( !defined($number) ) { 27253 pos($input_line) = $pos_beg; 27254 27255 if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) { 27256 $pos = pos($input_line); 27257 27258 # watch out for things like 0..40 which would give 0. by this; 27259 if ( ( substr( $input_line, $pos - 1, 1 ) eq '.' ) 27260 && ( substr( $input_line, $pos, 1 ) eq '.' ) ) 27261 { 27262 $pos--; 27263 } 27264 my $numc = $pos - $pos_beg; 27265 $number = substr( $input_line, $pos_beg, $numc ); 27266 $type = 'n'; 27267 } 27268 } 27269 27270 # filter out non-numbers like e + - . e2 .e3 +e6 27271 # the rule: at least one digit, and any 'e' must be preceded by a digit 27272 if ( 27273 $number !~ /\d/ # no digits 27274 || ( $number =~ /^(.*)[eE]/ 27275 && $1 !~ /\d/ ) # or no digits before the 'e' 27276 ) 27277 { 27278 $number = undef; 27279 $type = $input_type; 27280 return ( $i, $type, $number ); 27281 } 27282 27283 # Found a number; now we must convert back from character position 27284 # to pre_token index. An error here implies user syntax error. 27285 # An example would be an invalid octal number like '009'. 27286 my $error; 27287 ( $i, $error ) = 27288 inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); 27289 if ($error) { warning("Possibly invalid number\n") } 27290 27291 return ( $i, $type, $number ); 27292} 27293 27294sub inverse_pretoken_map { 27295 27296 # Starting with the current pre_token index $i, scan forward until 27297 # finding the index of the next pre_token whose position is $pos. 27298 my ( $i, $pos, $rtoken_map, $max_token_index ) = @_; 27299 my $error = 0; 27300 27301 while ( ++$i <= $max_token_index ) { 27302 27303 if ( $pos <= $$rtoken_map[$i] ) { 27304 27305 # Let the calling routine handle errors in which we do not 27306 # land on a pre-token boundary. It can happen by running 27307 # perltidy on some non-perl scripts, for example. 27308 if ( $pos < $$rtoken_map[$i] ) { $error = 1 } 27309 $i--; 27310 last; 27311 } 27312 } 27313 return ( $i, $error ); 27314} 27315 27316sub find_here_doc { 27317 27318 # find the target of a here document, if any 27319 # input parameters: 27320 # $i - token index of the second < of << 27321 # ($i must be less than the last token index if this is called) 27322 # output parameters: 27323 # $found_target = 0 didn't find target; =1 found target 27324 # HERE_TARGET - the target string (may be empty string) 27325 # $i - unchanged if not here doc, 27326 # or index of the last token of the here target 27327 # $saw_error - flag noting unbalanced quote on here target 27328 my ( $expecting, $i, $rtokens, $rtoken_map, $max_token_index ) = @_; 27329 my $ibeg = $i; 27330 my $found_target = 0; 27331 my $here_doc_target = ''; 27332 my $here_quote_character = ''; 27333 my $saw_error = 0; 27334 my ( $next_nonblank_token, $i_next_nonblank, $next_token ); 27335 $next_token = $$rtokens[ $i + 1 ]; 27336 27337 # perl allows a backslash before the target string (heredoc.t) 27338 my $backslash = 0; 27339 if ( $next_token eq '\\' ) { 27340 $backslash = 1; 27341 $next_token = $$rtokens[ $i + 2 ]; 27342 } 27343 27344 ( $next_nonblank_token, $i_next_nonblank ) = 27345 find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index ); 27346 27347 if ( $next_nonblank_token =~ /[\'\"\`]/ ) { 27348 27349 my $in_quote = 1; 27350 my $quote_depth = 0; 27351 my $quote_pos = 0; 27352 my $quoted_string; 27353 27354 ( 27355 $i, $in_quote, $here_quote_character, $quote_pos, $quote_depth, 27356 $quoted_string 27357 ) 27358 = follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens, 27359 $here_quote_character, $quote_pos, $quote_depth, $max_token_index ); 27360 27361 if ($in_quote) { # didn't find end of quote, so no target found 27362 $i = $ibeg; 27363 if ( $expecting == TERM ) { 27364 warning( 27365"Did not find here-doc string terminator ($here_quote_character) before end of line \n" 27366 ); 27367 $saw_error = 1; 27368 } 27369 } 27370 else { # found ending quote 27371 my $j; 27372 $found_target = 1; 27373 27374 my $tokj; 27375 for ( $j = $i_next_nonblank + 1 ; $j < $i ; $j++ ) { 27376 $tokj = $$rtokens[$j]; 27377 27378 # we have to remove any backslash before the quote character 27379 # so that the here-doc-target exactly matches this string 27380 next 27381 if ( $tokj eq "\\" 27382 && $j < $i - 1 27383 && $$rtokens[ $j + 1 ] eq $here_quote_character ); 27384 $here_doc_target .= $tokj; 27385 } 27386 } 27387 } 27388 27389 elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) { 27390 $found_target = 1; 27391 write_logfile_entry( 27392 "found blank here-target after <<; suggest using \"\"\n"); 27393 $i = $ibeg; 27394 } 27395 elsif ( $next_token =~ /^\w/ ) { # simple bareword or integer after << 27396 27397 my $here_doc_expected; 27398 if ( $expecting == UNKNOWN ) { 27399 $here_doc_expected = guess_if_here_doc($next_token); 27400 } 27401 else { 27402 $here_doc_expected = 1; 27403 } 27404 27405 if ($here_doc_expected) { 27406 $found_target = 1; 27407 $here_doc_target = $next_token; 27408 $i = $ibeg + 1; 27409 } 27410 27411 } 27412 else { 27413 27414 if ( $expecting == TERM ) { 27415 $found_target = 1; 27416 write_logfile_entry("Note: bare here-doc operator <<\n"); 27417 } 27418 else { 27419 $i = $ibeg; 27420 } 27421 } 27422 27423 # patch to neglect any prepended backslash 27424 if ( $found_target && $backslash ) { $i++ } 27425 27426 return ( $found_target, $here_doc_target, $here_quote_character, $i, 27427 $saw_error ); 27428} 27429 27430sub do_quote { 27431 27432 # follow (or continue following) quoted string(s) 27433 # $in_quote return code: 27434 # 0 - ok, found end 27435 # 1 - still must find end of quote whose target is $quote_character 27436 # 2 - still looking for end of first of two quotes 27437 # 27438 # Returns updated strings: 27439 # $quoted_string_1 = quoted string seen while in_quote=1 27440 # $quoted_string_2 = quoted string seen while in_quote=2 27441 my ( 27442 $i, $in_quote, $quote_character, 27443 $quote_pos, $quote_depth, $quoted_string_1, 27444 $quoted_string_2, $rtokens, $rtoken_map, 27445 $max_token_index 27446 ) = @_; 27447 27448 my $in_quote_starting = $in_quote; 27449 27450 my $quoted_string; 27451 if ( $in_quote == 2 ) { # two quotes/quoted_string_1s to follow 27452 my $ibeg = $i; 27453 ( 27454 $i, $in_quote, $quote_character, $quote_pos, $quote_depth, 27455 $quoted_string 27456 ) 27457 = follow_quoted_string( $i, $in_quote, $rtokens, $quote_character, 27458 $quote_pos, $quote_depth, $max_token_index ); 27459 $quoted_string_2 .= $quoted_string; 27460 if ( $in_quote == 1 ) { 27461 if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; } 27462 $quote_character = ''; 27463 } 27464 else { 27465 $quoted_string_2 .= "\n"; 27466 } 27467 } 27468 27469 if ( $in_quote == 1 ) { # one (more) quote to follow 27470 my $ibeg = $i; 27471 ( 27472 $i, $in_quote, $quote_character, $quote_pos, $quote_depth, 27473 $quoted_string 27474 ) 27475 = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character, 27476 $quote_pos, $quote_depth, $max_token_index ); 27477 $quoted_string_1 .= $quoted_string; 27478 if ( $in_quote == 1 ) { 27479 $quoted_string_1 .= "\n"; 27480 } 27481 } 27482 return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth, 27483 $quoted_string_1, $quoted_string_2 ); 27484} 27485 27486sub follow_quoted_string { 27487 27488 # scan for a specific token, skipping escaped characters 27489 # if the quote character is blank, use the first non-blank character 27490 # input parameters: 27491 # $rtokens = reference to the array of tokens 27492 # $i = the token index of the first character to search 27493 # $in_quote = number of quoted strings being followed 27494 # $beginning_tok = the starting quote character 27495 # $quote_pos = index to check next for alphanumeric delimiter 27496 # output parameters: 27497 # $i = the token index of the ending quote character 27498 # $in_quote = decremented if found end, unchanged if not 27499 # $beginning_tok = the starting quote character 27500 # $quote_pos = index to check next for alphanumeric delimiter 27501 # $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested. 27502 # $quoted_string = the text of the quote (without quotation tokens) 27503 my ( $i_beg, $in_quote, $rtokens, $beginning_tok, $quote_pos, $quote_depth, 27504 $max_token_index ) 27505 = @_; 27506 my ( $tok, $end_tok ); 27507 my $i = $i_beg - 1; 27508 my $quoted_string = ""; 27509 27510 TOKENIZER_DEBUG_FLAG_QUOTE && do { 27511 print 27512"QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n"; 27513 }; 27514 27515 # get the corresponding end token 27516 if ( $beginning_tok !~ /^\s*$/ ) { 27517 $end_tok = matching_end_token($beginning_tok); 27518 } 27519 27520 # a blank token means we must find and use the first non-blank one 27521 else { 27522 my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a <cr> 27523 27524 while ( $i < $max_token_index ) { 27525 $tok = $$rtokens[ ++$i ]; 27526 27527 if ( $tok !~ /^\s*$/ ) { 27528 27529 if ( ( $tok eq '#' ) && ($allow_quote_comments) ) { 27530 $i = $max_token_index; 27531 } 27532 else { 27533 27534 if ( length($tok) > 1 ) { 27535 if ( $quote_pos <= 0 ) { $quote_pos = 1 } 27536 $beginning_tok = substr( $tok, $quote_pos - 1, 1 ); 27537 } 27538 else { 27539 $beginning_tok = $tok; 27540 $quote_pos = 0; 27541 } 27542 $end_tok = matching_end_token($beginning_tok); 27543 $quote_depth = 1; 27544 last; 27545 } 27546 } 27547 else { 27548 $allow_quote_comments = 1; 27549 } 27550 } 27551 } 27552 27553 # There are two different loops which search for the ending quote 27554 # character. In the rare case of an alphanumeric quote delimiter, we 27555 # have to look through alphanumeric tokens character-by-character, since 27556 # the pre-tokenization process combines multiple alphanumeric 27557 # characters, whereas for a non-alphanumeric delimiter, only tokens of 27558 # length 1 can match. 27559 27560 ################################################################### 27561 # Case 1 (rare): loop for case of alphanumeric quote delimiter.. 27562 # "quote_pos" is the position the current word to begin searching 27563 ################################################################### 27564 if ( $beginning_tok =~ /\w/ ) { 27565 27566 # Note this because it is not recommended practice except 27567 # for obfuscated perl contests 27568 if ( $in_quote == 1 ) { 27569 write_logfile_entry( 27570 "Note: alphanumeric quote delimiter ($beginning_tok) \n"); 27571 } 27572 27573 while ( $i < $max_token_index ) { 27574 27575 if ( $quote_pos == 0 || ( $i < 0 ) ) { 27576 $tok = $$rtokens[ ++$i ]; 27577 27578 if ( $tok eq '\\' ) { 27579 27580 # retain backslash unless it hides the end token 27581 $quoted_string .= $tok 27582 unless $$rtokens[ $i + 1 ] eq $end_tok; 27583 $quote_pos++; 27584 last if ( $i >= $max_token_index ); 27585 $tok = $$rtokens[ ++$i ]; 27586 } 27587 } 27588 my $old_pos = $quote_pos; 27589 27590 unless ( defined($tok) && defined($end_tok) && defined($quote_pos) ) 27591 { 27592 27593 } 27594 $quote_pos = 1 + index( $tok, $end_tok, $quote_pos ); 27595 27596 if ( $quote_pos > 0 ) { 27597 27598 $quoted_string .= 27599 substr( $tok, $old_pos, $quote_pos - $old_pos - 1 ); 27600 27601 $quote_depth--; 27602 27603 if ( $quote_depth == 0 ) { 27604 $in_quote--; 27605 last; 27606 } 27607 } 27608 else { 27609 $quoted_string .= substr( $tok, $old_pos ); 27610 } 27611 } 27612 } 27613 27614 ######################################################################## 27615 # Case 2 (normal): loop for case of a non-alphanumeric quote delimiter.. 27616 ######################################################################## 27617 else { 27618 27619 while ( $i < $max_token_index ) { 27620 $tok = $$rtokens[ ++$i ]; 27621 27622 if ( $tok eq $end_tok ) { 27623 $quote_depth--; 27624 27625 if ( $quote_depth == 0 ) { 27626 $in_quote--; 27627 last; 27628 } 27629 } 27630 elsif ( $tok eq $beginning_tok ) { 27631 $quote_depth++; 27632 } 27633 elsif ( $tok eq '\\' ) { 27634 27635 # retain backslash unless it hides the beginning or end token 27636 $tok = $$rtokens[ ++$i ]; 27637 $quoted_string .= '\\' 27638 unless ( $tok eq $end_tok || $tok eq $beginning_tok ); 27639 } 27640 $quoted_string .= $tok; 27641 } 27642 } 27643 if ( $i > $max_token_index ) { $i = $max_token_index } 27644 return ( $i, $in_quote, $beginning_tok, $quote_pos, $quote_depth, 27645 $quoted_string ); 27646} 27647 27648sub indicate_error { 27649 my ( $msg, $line_number, $input_line, $pos, $carrat ) = @_; 27650 interrupt_logfile(); 27651 warning($msg); 27652 write_error_indicator_pair( $line_number, $input_line, $pos, $carrat ); 27653 resume_logfile(); 27654} 27655 27656sub write_error_indicator_pair { 27657 my ( $line_number, $input_line, $pos, $carrat ) = @_; 27658 my ( $offset, $numbered_line, $underline ) = 27659 make_numbered_line( $line_number, $input_line, $pos ); 27660 $underline = write_on_underline( $underline, $pos - $offset, $carrat ); 27661 warning( $numbered_line . "\n" ); 27662 $underline =~ s/\s*$//; 27663 warning( $underline . "\n" ); 27664} 27665 27666sub make_numbered_line { 27667 27668 # Given an input line, its line number, and a character position of 27669 # interest, create a string not longer than 80 characters of the form 27670 # $lineno: sub_string 27671 # such that the sub_string of $str contains the position of interest 27672 # 27673 # Here is an example of what we want, in this case we add trailing 27674 # '...' because the line is long. 27675 # 27676 # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ... 27677 # 27678 # Here is another example, this time in which we used leading '...' 27679 # because of excessive length: 27680 # 27681 # 2: ... er of the World Wide Web Consortium's 27682 # 27683 # input parameters are: 27684 # $lineno = line number 27685 # $str = the text of the line 27686 # $pos = position of interest (the error) : 0 = first character 27687 # 27688 # We return : 27689 # - $offset = an offset which corrects the position in case we only 27690 # display part of a line, such that $pos-$offset is the effective 27691 # position from the start of the displayed line. 27692 # - $numbered_line = the numbered line as above, 27693 # - $underline = a blank 'underline' which is all spaces with the same 27694 # number of characters as the numbered line. 27695 27696 my ( $lineno, $str, $pos ) = @_; 27697 my $offset = ( $pos < 60 ) ? 0 : $pos - 40; 27698 my $excess = length($str) - $offset - 68; 27699 my $numc = ( $excess > 0 ) ? 68 : undef; 27700 27701 if ( defined($numc) ) { 27702 if ( $offset == 0 ) { 27703 $str = substr( $str, $offset, $numc - 4 ) . " ..."; 27704 } 27705 else { 27706 $str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ..."; 27707 } 27708 } 27709 else { 27710 27711 if ( $offset == 0 ) { 27712 } 27713 else { 27714 $str = "... " . substr( $str, $offset + 4 ); 27715 } 27716 } 27717 27718 my $numbered_line = sprintf( "%d: ", $lineno ); 27719 $offset -= length($numbered_line); 27720 $numbered_line .= $str; 27721 my $underline = " " x length($numbered_line); 27722 return ( $offset, $numbered_line, $underline ); 27723} 27724 27725sub write_on_underline { 27726 27727 # The "underline" is a string that shows where an error is; it starts 27728 # out as a string of blanks with the same length as the numbered line of 27729 # code above it, and we have to add marking to show where an error is. 27730 # In the example below, we want to write the string '--^' just below 27731 # the line of bad code: 27732 # 27733 # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ... 27734 # ---^ 27735 # We are given the current underline string, plus a position and a 27736 # string to write on it. 27737 # 27738 # In the above example, there will be 2 calls to do this: 27739 # First call: $pos=19, pos_chr=^ 27740 # Second call: $pos=16, pos_chr=--- 27741 # 27742 # This is a trivial thing to do with substr, but there is some 27743 # checking to do. 27744 27745 my ( $underline, $pos, $pos_chr ) = @_; 27746 27747 # check for error..shouldn't happen 27748 unless ( ( $pos >= 0 ) && ( $pos <= length($underline) ) ) { 27749 return $underline; 27750 } 27751 my $excess = length($pos_chr) + $pos - length($underline); 27752 if ( $excess > 0 ) { 27753 $pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess ); 27754 } 27755 substr( $underline, $pos, length($pos_chr) ) = $pos_chr; 27756 return ($underline); 27757} 27758 27759sub pre_tokenize { 27760 27761 # Break a string, $str, into a sequence of preliminary tokens. We 27762 # are interested in these types of tokens: 27763 # words (type='w'), example: 'max_tokens_wanted' 27764 # digits (type = 'd'), example: '0755' 27765 # whitespace (type = 'b'), example: ' ' 27766 # any other single character (i.e. punct; type = the character itself). 27767 # We cannot do better than this yet because we might be in a quoted 27768 # string or pattern. Caller sets $max_tokens_wanted to 0 to get all 27769 # tokens. 27770 my ( $str, $max_tokens_wanted ) = @_; 27771 27772 # we return references to these 3 arrays: 27773 my @tokens = (); # array of the tokens themselves 27774 my @token_map = (0); # string position of start of each token 27775 my @type = (); # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct 27776 27777 do { 27778 27779 # whitespace 27780 if ( $str =~ /\G(\s+)/gc ) { push @type, 'b'; } 27781 27782 # numbers 27783 # note that this must come before words! 27784 elsif ( $str =~ /\G(\d+)/gc ) { push @type, 'd'; } 27785 27786 # words 27787 elsif ( $str =~ /\G(\w+)/gc ) { push @type, 'w'; } 27788 27789 # single-character punctuation 27790 elsif ( $str =~ /\G(\W)/gc ) { push @type, $1; } 27791 27792 # that's all.. 27793 else { 27794 return ( \@tokens, \@token_map, \@type ); 27795 } 27796 27797 push @tokens, $1; 27798 push @token_map, pos($str); 27799 27800 } while ( --$max_tokens_wanted != 0 ); 27801 27802 return ( \@tokens, \@token_map, \@type ); 27803} 27804 27805sub show_tokens { 27806 27807 # this is an old debug routine 27808 my ( $rtokens, $rtoken_map ) = @_; 27809 my $num = scalar(@$rtokens); 27810 my $i; 27811 27812 for ( $i = 0 ; $i < $num ; $i++ ) { 27813 my $len = length( $$rtokens[$i] ); 27814 print "$i:$len:$$rtoken_map[$i]:$$rtokens[$i]:\n"; 27815 } 27816} 27817 27818sub matching_end_token { 27819 27820 # find closing character for a pattern 27821 my $beginning_token = shift; 27822 27823 if ( $beginning_token eq '{' ) { 27824 '}'; 27825 } 27826 elsif ( $beginning_token eq '[' ) { 27827 ']'; 27828 } 27829 elsif ( $beginning_token eq '<' ) { 27830 '>'; 27831 } 27832 elsif ( $beginning_token eq '(' ) { 27833 ')'; 27834 } 27835 else { 27836 $beginning_token; 27837 } 27838} 27839 27840sub dump_token_types { 27841 my $class = shift; 27842 my $fh = shift; 27843 27844 # This should be the latest list of token types in use 27845 # adding NEW_TOKENS: add a comment here 27846 print $fh <<'END_OF_LIST'; 27847 27848Here is a list of the token types currently used for lines of type 'CODE'. 27849For the following tokens, the "type" of a token is just the token itself. 27850 27851.. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <> 27852( ) <= >= == =~ !~ != ++ -- /= x= 27853... **= <<= >>= &&= ||= //= <=> 27854, + - / * | % ! x ~ = \ ? : . < > ^ & 27855 27856The following additional token types are defined: 27857 27858 type meaning 27859 b blank (white space) 27860 { indent: opening structural curly brace or square bracket or paren 27861 (code block, anonymous hash reference, or anonymous array reference) 27862 } outdent: right structural curly brace or square bracket or paren 27863 [ left non-structural square bracket (enclosing an array index) 27864 ] right non-structural square bracket 27865 ( left non-structural paren (all but a list right of an =) 27866 ) right non-structural parena 27867 L left non-structural curly brace (enclosing a key) 27868 R right non-structural curly brace 27869 ; terminal semicolon 27870 f indicates a semicolon in a "for" statement 27871 h here_doc operator << 27872 # a comment 27873 Q indicates a quote or pattern 27874 q indicates a qw quote block 27875 k a perl keyword 27876 C user-defined constant or constant function (with void prototype = ()) 27877 U user-defined function taking parameters 27878 G user-defined function taking block parameter (like grep/map/eval) 27879 M (unused, but reserved for subroutine definition name) 27880 P (unused, but -html uses it to label pod text) 27881 t type indicater such as %,$,@,*,&,sub 27882 w bare word (perhaps a subroutine call) 27883 i identifier of some type (with leading %, $, @, *, &, sub, -> ) 27884 n a number 27885 v a v-string 27886 F a file test operator (like -e) 27887 Y File handle 27888 Z identifier in indirect object slot: may be file handle, object 27889 J LABEL: code block label 27890 j LABEL after next, last, redo, goto 27891 p unary + 27892 m unary - 27893 pp pre-increment operator ++ 27894 mm pre-decrement operator -- 27895 A : used as attribute separator 27896 27897 Here are the '_line_type' codes used internally: 27898 SYSTEM - system-specific code before hash-bang line 27899 CODE - line of perl code (including comments) 27900 POD_START - line starting pod, such as '=head' 27901 POD - pod documentation text 27902 POD_END - last line of pod section, '=cut' 27903 HERE - text of here-document 27904 HERE_END - last line of here-doc (target word) 27905 FORMAT - format section 27906 FORMAT_END - last line of format section, '.' 27907 DATA_START - __DATA__ line 27908 DATA - unidentified text following __DATA__ 27909 END_START - __END__ line 27910 END - unidentified text following __END__ 27911 ERROR - we are in big trouble, probably not a perl script 27912END_OF_LIST 27913} 27914 27915BEGIN { 27916 27917 # These names are used in error messages 27918 @opening_brace_names = qw# '{' '[' '(' '?' #; 27919 @closing_brace_names = qw# '}' ']' ')' ':' #; 27920 27921 my @digraphs = qw( 27922 .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <> 27923 <= >= == =~ !~ != ++ -- /= x= ~~ 27924 ); 27925 @is_digraph{@digraphs} = (1) x scalar(@digraphs); 27926 27927 my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> !~~ ); 27928 @is_trigraph{@trigraphs} = (1) x scalar(@trigraphs); 27929 27930 # make a hash of all valid token types for self-checking the tokenizer 27931 # (adding NEW_TOKENS : select a new character and add to this list) 27932 my @valid_token_types = qw# 27933 A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v 27934 { } ( ) [ ] ; + - / * | % ! x ~ = \ ? : . < > ^ & 27935 #; 27936 push( @valid_token_types, @digraphs ); 27937 push( @valid_token_types, @trigraphs ); 27938 push( @valid_token_types, '#' ); 27939 push( @valid_token_types, ',' ); 27940 @is_valid_token_type{@valid_token_types} = (1) x scalar(@valid_token_types); 27941 27942 # a list of file test letters, as in -e (Table 3-4 of 'camel 3') 27943 my @file_test_operators = 27944 qw( A B C M O R S T W X b c d e f g k l o p r s t u w x z); 27945 @is_file_test_operator{@file_test_operators} = 27946 (1) x scalar(@file_test_operators); 27947 27948 # these functions have prototypes of the form (&), so when they are 27949 # followed by a block, that block MAY BE followed by an operator. 27950 @_ = qw( do eval ); 27951 @is_block_operator{@_} = (1) x scalar(@_); 27952 27953 # these functions allow an identifier in the indirect object slot 27954 @_ = qw( print printf sort exec system say); 27955 @is_indirect_object_taker{@_} = (1) x scalar(@_); 27956 27957 # These tokens may precede a code block 27958 # patched for SWITCH/CASE 27959 @_ = 27960 qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else 27961 unless do while until eval for foreach map grep sort 27962 switch case given when); 27963 @is_code_block_token{@_} = (1) x scalar(@_); 27964 27965 # I'll build the list of keywords incrementally 27966 my @Keywords = (); 27967 27968 # keywords and tokens after which a value or pattern is expected, 27969 # but not an operator. In other words, these should consume terms 27970 # to their right, or at least they are not expected to be followed 27971 # immediately by operators. 27972 my @value_requestor = qw( 27973 AUTOLOAD 27974 BEGIN 27975 CHECK 27976 DESTROY 27977 END 27978 EQ 27979 GE 27980 GT 27981 INIT 27982 LE 27983 LT 27984 NE 27985 UNITCHECK 27986 abs 27987 accept 27988 alarm 27989 and 27990 atan2 27991 bind 27992 binmode 27993 bless 27994 break 27995 caller 27996 chdir 27997 chmod 27998 chomp 27999 chop 28000 chown 28001 chr 28002 chroot 28003 close 28004 closedir 28005 cmp 28006 connect 28007 continue 28008 cos 28009 crypt 28010 dbmclose 28011 dbmopen 28012 defined 28013 delete 28014 die 28015 dump 28016 each 28017 else 28018 elsif 28019 eof 28020 eq 28021 exec 28022 exists 28023 exit 28024 exp 28025 fcntl 28026 fileno 28027 flock 28028 for 28029 foreach 28030 formline 28031 ge 28032 getc 28033 getgrgid 28034 getgrnam 28035 gethostbyaddr 28036 gethostbyname 28037 getnetbyaddr 28038 getnetbyname 28039 getpeername 28040 getpgrp 28041 getpriority 28042 getprotobyname 28043 getprotobynumber 28044 getpwnam 28045 getpwuid 28046 getservbyname 28047 getservbyport 28048 getsockname 28049 getsockopt 28050 glob 28051 gmtime 28052 goto 28053 grep 28054 gt 28055 hex 28056 if 28057 index 28058 int 28059 ioctl 28060 join 28061 keys 28062 kill 28063 last 28064 lc 28065 lcfirst 28066 le 28067 length 28068 link 28069 listen 28070 local 28071 localtime 28072 lock 28073 log 28074 lstat 28075 lt 28076 map 28077 mkdir 28078 msgctl 28079 msgget 28080 msgrcv 28081 msgsnd 28082 my 28083 ne 28084 next 28085 no 28086 not 28087 oct 28088 open 28089 opendir 28090 or 28091 ord 28092 our 28093 pack 28094 pipe 28095 pop 28096 pos 28097 print 28098 printf 28099 prototype 28100 push 28101 quotemeta 28102 rand 28103 read 28104 readdir 28105 readlink 28106 readline 28107 readpipe 28108 recv 28109 redo 28110 ref 28111 rename 28112 require 28113 reset 28114 return 28115 reverse 28116 rewinddir 28117 rindex 28118 rmdir 28119 scalar 28120 seek 28121 seekdir 28122 select 28123 semctl 28124 semget 28125 semop 28126 send 28127 sethostent 28128 setnetent 28129 setpgrp 28130 setpriority 28131 setprotoent 28132 setservent 28133 setsockopt 28134 shift 28135 shmctl 28136 shmget 28137 shmread 28138 shmwrite 28139 shutdown 28140 sin 28141 sleep 28142 socket 28143 socketpair 28144 sort 28145 splice 28146 split 28147 sprintf 28148 sqrt 28149 srand 28150 stat 28151 study 28152 substr 28153 symlink 28154 syscall 28155 sysopen 28156 sysread 28157 sysseek 28158 system 28159 syswrite 28160 tell 28161 telldir 28162 tie 28163 tied 28164 truncate 28165 uc 28166 ucfirst 28167 umask 28168 undef 28169 unless 28170 unlink 28171 unpack 28172 unshift 28173 untie 28174 until 28175 use 28176 utime 28177 values 28178 vec 28179 waitpid 28180 warn 28181 while 28182 write 28183 xor 28184 28185 switch 28186 case 28187 given 28188 when 28189 err 28190 say 28191 ); 28192 28193 # patched above for SWITCH/CASE given/when err say 28194 # 'err' is a fairly safe addition. 28195 # TODO: 'default' still needed if appropriate 28196 # 'use feature' seen, but perltidy works ok without it. 28197 # Concerned that 'default' could break code. 28198 push( @Keywords, @value_requestor ); 28199 28200 # These are treated the same but are not keywords: 28201 my @extra_vr = qw( 28202 constant 28203 vars 28204 ); 28205 push( @value_requestor, @extra_vr ); 28206 28207 @expecting_term_token{@value_requestor} = (1) x scalar(@value_requestor); 28208 28209 # this list contains keywords which do not look for arguments, 28210 # so that they might be followed by an operator, or at least 28211 # not a term. 28212 my @operator_requestor = qw( 28213 endgrent 28214 endhostent 28215 endnetent 28216 endprotoent 28217 endpwent 28218 endservent 28219 fork 28220 getgrent 28221 gethostent 28222 getlogin 28223 getnetent 28224 getppid 28225 getprotoent 28226 getpwent 28227 getservent 28228 setgrent 28229 setpwent 28230 time 28231 times 28232 wait 28233 wantarray 28234 ); 28235 28236 push( @Keywords, @operator_requestor ); 28237 28238 # These are treated the same but are not considered keywords: 28239 my @extra_or = qw( 28240 STDERR 28241 STDIN 28242 STDOUT 28243 ); 28244 28245 push( @operator_requestor, @extra_or ); 28246 28247 @expecting_operator_token{@operator_requestor} = 28248 (1) x scalar(@operator_requestor); 28249 28250 # these token TYPES expect trailing operator but not a term 28251 # note: ++ and -- are post-increment and decrement, 'C' = constant 28252 my @operator_requestor_types = qw( ++ -- C <> q ); 28253 @expecting_operator_types{@operator_requestor_types} = 28254 (1) x scalar(@operator_requestor_types); 28255 28256 # these token TYPES consume values (terms) 28257 # note: pp and mm are pre-increment and decrement 28258 # f=semicolon in for, F=file test operator 28259 my @value_requestor_type = qw# 28260 L { ( [ ~ !~ =~ ; . .. ... A : && ! || // = + - x 28261 **= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //= 28262 <= >= == != => \ > < % * / ? & | ** <=> ~~ !~~ 28263 f F pp mm Y p m U J G j >> << ^ t 28264 #; 28265 push( @value_requestor_type, ',' ) 28266 ; # (perl doesn't like a ',' in a qw block) 28267 @expecting_term_types{@value_requestor_type} = 28268 (1) x scalar(@value_requestor_type); 28269 28270 # Note: the following valid token types are not assigned here to 28271 # hashes requesting to be followed by values or terms, but are 28272 # instead currently hard-coded into sub operator_expected: 28273 # ) -> :: Q R Z ] b h i k n v w } # 28274 28275 # For simple syntax checking, it is nice to have a list of operators which 28276 # will really be unhappy if not followed by a term. This includes most 28277 # of the above... 28278 %really_want_term = %expecting_term_types; 28279 28280 # with these exceptions... 28281 delete $really_want_term{'U'}; # user sub, depends on prototype 28282 delete $really_want_term{'F'}; # file test works on $_ if no following term 28283 delete $really_want_term{'Y'}; # indirect object, too risky to check syntax; 28284 # let perl do it 28285 28286 @_ = qw(q qq qw qx qr s y tr m); 28287 @is_q_qq_qw_qx_qr_s_y_tr_m{@_} = (1) x scalar(@_); 28288 28289 # These keywords are handled specially in the tokenizer code: 28290 my @special_keywords = qw( 28291 do 28292 eval 28293 format 28294 m 28295 package 28296 q 28297 qq 28298 qr 28299 qw 28300 qx 28301 s 28302 sub 28303 tr 28304 y 28305 ); 28306 push( @Keywords, @special_keywords ); 28307 28308 # Keywords after which list formatting may be used 28309 # WARNING: do not include |map|grep|eval or perl may die on 28310 # syntax errors (map1.t). 28311 my @keyword_taking_list = qw( 28312 and 28313 chmod 28314 chomp 28315 chop 28316 chown 28317 dbmopen 28318 die 28319 elsif 28320 exec 28321 fcntl 28322 for 28323 foreach 28324 formline 28325 getsockopt 28326 if 28327 index 28328 ioctl 28329 join 28330 kill 28331 local 28332 msgctl 28333 msgrcv 28334 msgsnd 28335 my 28336 open 28337 or 28338 our 28339 pack 28340 print 28341 printf 28342 push 28343 read 28344 readpipe 28345 recv 28346 return 28347 reverse 28348 rindex 28349 seek 28350 select 28351 semctl 28352 semget 28353 send 28354 setpriority 28355 setsockopt 28356 shmctl 28357 shmget 28358 shmread 28359 shmwrite 28360 socket 28361 socketpair 28362 sort 28363 splice 28364 split 28365 sprintf 28366 substr 28367 syscall 28368 sysopen 28369 sysread 28370 sysseek 28371 system 28372 syswrite 28373 tie 28374 unless 28375 unlink 28376 unpack 28377 unshift 28378 until 28379 vec 28380 warn 28381 while 28382 ); 28383 @is_keyword_taking_list{@keyword_taking_list} = 28384 (1) x scalar(@keyword_taking_list); 28385 28386 # These are not used in any way yet 28387 # my @unused_keywords = qw( 28388 # CORE 28389 # __FILE__ 28390 # __LINE__ 28391 # __PACKAGE__ 28392 # ); 28393 28394 # The list of keywords was extracted from function 'keyword' in 28395 # perl file toke.c version 5.005.03, using this utility, plus a 28396 # little editing: (file getkwd.pl): 28397 # while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } } 28398 # Add 'get' prefix where necessary, then split into the above lists. 28399 # This list should be updated as necessary. 28400 # The list should not contain these special variables: 28401 # ARGV DATA ENV SIG STDERR STDIN STDOUT 28402 # __DATA__ __END__ 28403 28404 @is_keyword{@Keywords} = (1) x scalar(@Keywords); 28405} 284061; 28407__END__ 28408 28409#line 28750 28410