1#!/usr/bin/env perl 2############################################################ 3# Local Nuasis Revision $Id: perltidy,v 1.1 2003/06/05 01:53:21 hbo Exp $ 4# 5# perltidy - a perl script indenter and formatter 6# 7# Copyright (c) 2000, 2001 by Steven L. Hancock 8# Distributed under the GPL license agreement; see file COPYING 9# 10# This program is free software; you can redistribute it and/or modify 11# it under the terms of the GNU General Public License as published by 12# the Free Software Foundation; either version 2 of the License, or 13# (at your option) any later version. 14# 15# This program is distributed in the hope that it will be useful, 16# but WITHOUT ANY WARRANTY; without even the implied warranty of 17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18# GNU General Public License for more details. 19# 20# You should have received a copy of the GNU General Public License 21# along with this program; if not, write to the Free Software 22# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 23# 24# For brief instructions instructions, try 'perltidy -h'. 25# For more complete documentation, try 'man perltidy'. 26# 27# This script is an example of the default style. It was formatted with: 28# 29# perltidy perltidy 30# 31# Code Contributions: 32# Michael Cartmell supplied code for adaptation to VMS and helped with 33# v-strings. 34# Hugh S. Myers supplied sub streamhandle and the supporting code to 35# create a PerlTidy module which can operate on strings, arrays, etc. 36# Many others have supplied key ideas, suggestions, and bug reports; 37# see the ChangeLog file. 38# 39############################################################ 40 41package PerlTidy; 42use 5.004; # need IO::File from 5.004 or later 43BEGIN { $^W = 1; } # turn on warnings 44use strict; 45use Exporter; 46use Carp; 47 48use vars qw{ 49 $VERSION 50 @ISA 51 @EXPORT 52 $missing_io_scalar 53 $missing_io_scalararray 54}; 55 56@EXPORT = qw( &perltidy ); 57 58eval "use diagnostics"; 59{ eval "use IO::Scalar"; $missing_io_scalar = $@; } 60{ eval "use IO::ScalarArray"; $missing_io_scalararray = $@; } 61use IO::File; 62 63BEGIN { 64 ($VERSION=q($Id: perltidy,v 1.1 2003/06/05 01:53:21 hbo Exp $)) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker 65} 66 67# Preloaded methods go here. 68sub streamhandle { 69 my $ref = ref( my $filename = shift ); 70 my $mode = shift; 71 my $New; 72 my $fh; 73 74 if ( $ref eq 'ARRAY' ) { 75 die $missing_io_scalararray if $missing_io_scalararray; 76 $New = sub { IO::ScalarArray->new(@_) }; 77 } 78 elsif ( $ref eq 'SCALAR' ) { 79 die $missing_io_scalar if $missing_io_scalar; 80 $New = sub { IO::Scalar->new(@_) }; 81 } 82 elsif ( $filename eq '-' ) { 83 $New = sub { $mode eq 'w' ? *STDOUT : *STDIN } 84 } 85 else { 86 $New = sub { IO::File->new(@_) }; 87 } 88 $fh = $New->( $filename, $mode ) 89 or warn "Couldn't open file:$filename in mode:$mode : $!\n"; 90 return $fh, ( $ref or $filename ); 91} 92 93=pod 94 95POD NOTE: Documentation is contained in separately supplied .pod files; 96pod is used here only for long comments. 97 98Here is a map of the flow of data from the input source to the output 99line sink: 100 101LineSource-->Tokenizer-->Formatter-->VerticalAligner-->FileWriter--> 102 input groups output 103 lines tokens lines of lines lines 104 lines 105 106The names correspond to the package names responsible for the unit processes. 107 108The overall process is controlled by the "main" package. 109 110LineSource is the stream of input lines 111 112Tokenizer analyzes a line and breaks it into tokens, peeking ahead 113if necessary. A token is any section of the input line which should be 114manipulated as a single entity during formatting. For example, a single 115',' character is a token, and so is an entire side comment. It handles 116the complexities of Perl syntax, such as distinguishing between '<<' as 117a shift operator and as a here-document, or distinguishing between '/' 118as a divide symbol and as a pattern delimiter. 119 120Formatter inserts and deletes whitespace between tokens, and breaks 121sequences of tokens at appropriate points as output lines. It bases its 122decisions on the default rules as modified by any command-line options. 123 124VerticalAligner collects groups of lines together and tries to line up 125certain tokens, such as '=>', '#', and '=' by adding whitespace. 126 127FileWriter simply writes lines to the output stream. 128 129The Logger package, not shown, records significant events and warning 130messages. It writes a .LOG file, which may be saved with a 131'-log' or a '-g' flag. 132 133Some comments in this file refer to separate test files, most of which 134are in the test directory which can be downloaded in addition to the 135basic perltidy distribution. 136 137=cut 138 139sub perltidy { 140 141 my %defaults = ( 142 source => undef, 143 destination => undef, 144 145 # .. more to be added 146 ); 147 148 my %input_hash = @_; 149 if ( my @bad_keys = grep { !exists $defaults{$_} } keys %input_hash ) { 150 local $" = ')('; 151 confess 152 "unknown parameters in call to PerlTidy::perltidy: (@bad_keys)\n"; 153 } 154 155 %input_hash = ( %defaults, %input_hash ); 156 my $source_array = $input_hash{'source'}; 157 my $destination_array = $input_hash{'destination'}; 158 159 # VMS file names are restricted to a 40.40 format, so 160 # we append _tdy instead of .tdy, etc. 161 my $dot; 162 my $dot_pattern; 163 if ( $^O eq 'VMS' ) { 164 $dot = '_'; 165 $dot_pattern = '_'; 166 } 167 else { 168 $dot = '.'; 169 $dot_pattern = '\.'; # must escape for use in regex 170 } 171 172 # handle command line options 173 my ( $rOpts, $config_file, $rraw_options, $pending_complaint, $saw_extrude ) 174 = process_command_line(); 175 PerlTidy::Formatter::check_options($rOpts); 176 if ( $rOpts->{'html'} ) { 177 PerlTidy::HtmlWriter->check_options($rOpts); 178 } 179 180 # create a diagnostics object if requested 181 my $diagnostics_object = undef; 182 if ( $rOpts->{'DIAGNOSTICS'} ) { 183 $diagnostics_object = PerlTidy::Diagnostics->new(); 184 } 185 186 # no filenames should be given if input is from an array 187 if ($source_array) { 188 if ( @ARGV > 0 ) { 189 die 190"You may not specify any filenames when a source array is given\n"; 191 } 192 193 # we'll stuff the source array into ARGV 194 unshift ( @ARGV, $source_array ); 195 } 196 197 # use stdin by default if no source array and no args 198 else { 199 unshift ( @ARGV, '-' ) unless @ARGV; 200 } 201 202 # loop to process all files in argument list 203 my $number_of_files = @ARGV; 204 my $input_file; 205 my $formatter = undef; 206 my $tokenizer = undef; 207 208 # Set a flag here for any system which does not have a shell to 209 # expand wildcard filenames like '*.pl'. In theory it should also 210 # be ok to set the flag for any system, but I prefer not to do so 211 # out of robustness concerns. 212 my $use_glob = ( $^O =~ /^(MSWin32|msdos|dos|win32)$/ ); 213 214 while ( $input_file = shift @ARGV ) { 215 my $fileroot; 216 217 #--------------------------------------------------------------- 218 # determine the input file name 219 #--------------------------------------------------------------- 220 if ( $input_file eq '-' ) { # '-' indicates input from STDIN 221 $fileroot = "perltidy"; # root name to use for .ERR, .LOG, etc 222 } 223 elsif ($source_array) { 224 $fileroot = "perltidy"; 225 } 226 else { 227 $fileroot = $input_file; 228 unless ( -e $input_file ) { 229 230 # file doesn't exist, maybe we have a wildcard 231 if ($use_glob) { 232 233 # be sure files exist, because glob('p.q') always 234 # returns 'p.q' even if 'p.q' doesn't exist. 235 my @files = grep { -e $_ } glob($input_file); 236 if (@files) { 237 unshift @ARGV, @files; 238 next; 239 } 240 } 241 242 print "skipping file: $input_file: does not exist\n"; 243 next; 244 } 245 246 unless ( -f $input_file ) { 247 print "skipping: $input_file: not a regular file\n"; 248 next; 249 } 250 251 unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) { 252 print 253 "skipping file: $input_file: Non-text (override with -f)\n"; 254 next; 255 } 256 } 257 258 # Skip files with same extension as the output files 259 # because this can lead to a messy situation 260 # with files like script.tdy.tdy.tdy ... when you rerun 261 # perltidy over and over with wildcard input 262 my $output_extension = $rOpts->{'html'} ? "html" : "tdy"; 263 if ( defined( $rOpts->{'output-file-extension'} ) ) { 264 $output_extension = $rOpts->{'output-file-extension'}; 265 } 266 267 if ( 268 !$source_array 269 && ( $input_file =~ 270 /($dot_pattern)($output_extension|LOG|DEBUG|ERR|TEE|TMPI|TMPO)$/ 271 ) 272 || ( $input_file eq 'DIAGNOSTICS' ) 273 ) 274 { 275 print "skipping file: $input_file: wrong extension\n"; 276 next; 277 } 278 279 # the 'source_object' supplies a method to read the input file 280 my $source_object = PerlTidy::LineSource->new( $input_file, $rOpts ); 281 next unless ($source_object); 282 283 # register this file name with the Diagnostics package 284 $diagnostics_object->set_input_file($input_file) if $diagnostics_object; 285 286 #--------------------------------------------------------------- 287 # determine the output file name 288 #--------------------------------------------------------------- 289 my $output_file = undef; 290 291 if ( $rOpts->{'outfile'} ) { 292 293 if ( $number_of_files <= 1 ) { 294 295 if ( $rOpts->{'standard-output'} ) { 296 die "You may not use -o and -st together\n"; 297 } 298 elsif ($destination_array) { 299 die 300"You may not specify a destination array and -o together\n"; 301 } 302 $output_file = $rOpts->{outfile}; 303 304 # make sure user gives a file name after -o 305 if ( $output_file =~ /^-/ ) { 306 die "You must specify a valid filename after -o\n"; 307 } 308 } 309 else { 310 die "You may not use -o with more than one input file\n"; 311 } 312 } 313 elsif ( $rOpts->{'standard-output'} ) { 314 if ($destination_array) { 315 die 316 "You may not specify a destination array and -st together\n"; 317 } 318 $output_file = '-'; 319 320 if ( $number_of_files <= 1 ) { 321 } 322 else { 323 die "You may not use -st with more than one input file\n"; 324 } 325 } 326 elsif ($destination_array) { 327 $output_file = $destination_array; 328 } 329 elsif ($source_array) { # source but no destination goes to stdout 330 $output_file = '-'; 331 } 332 elsif ( $input_file eq '-' ) { 333 $output_file = $input_file; 334 } 335 else { 336 $output_file = $fileroot . $dot . $output_extension; 337 } 338 339 # the 'sink_object' knows how to write the output file 340 my $tee_file = $fileroot . $dot . "TEE"; 341 my $sink_object = 342 PerlTidy::LineSink->new( $output_file, $tee_file, $rOpts ); 343 344 #--------------------------------------------------------------- 345 # initialize the error logger 346 #--------------------------------------------------------------- 347 my $warning_file = $fileroot . $dot . "ERR"; 348 my $log_file = $fileroot . $dot . "LOG"; 349 350 my $logger_object = 351 PerlTidy::Logger->new( $rOpts, $log_file, $warning_file, 352 $saw_extrude ); 353 write_logfile_header( $rOpts, $logger_object, $config_file, 354 $rraw_options ); 355 if ($pending_complaint) { 356 $logger_object->complain($pending_complaint); 357 } 358 359 #--------------------------------------------------------------- 360 # initialize the debug object, if any 361 #--------------------------------------------------------------- 362 my $debugger_object = undef; 363 if ( $rOpts->{DEBUG} ) { 364 $debugger_object = 365 PerlTidy::Debugger->new( $fileroot . $dot . "DEBUG" ); 366 } 367 368 # we have to delete any old formatter because, for safety, 369 # formatter will check to see that there is only one. 370 $formatter = undef; 371 372 #--------------------------------------------------------------- 373 # create a formatter for this file : html writer or pretty printer 374 #--------------------------------------------------------------- 375 if ( $rOpts->{'html'} ) { 376 $formatter = PerlTidy::HtmlWriter->new( $fileroot, $output_file ); 377 } 378 379 else { 380 $formatter = PerlTidy::Formatter->new( 381 logger_object => $logger_object, 382 diagnostics_object => $diagnostics_object, 383 sink_object => $sink_object, 384 ); 385 } 386 387 #--------------------------------------------------------------- 388 # create the tokenizer for this file 389 #--------------------------------------------------------------- 390 $tokenizer = undef; 391 $tokenizer = PerlTidy::Tokenizer->new( 392 source_object => $source_object, 393 logger_object => $logger_object, 394 debugger_object => $debugger_object, 395 diagnostics_object => $diagnostics_object, 396 starting_level => $rOpts->{'starting-indentation-level'}, 397 tabs => $rOpts->{'tabs'}, 398 indent_columns => $rOpts->{'indent-columns'}, 399 look_for_hash_bang => $rOpts->{'look-for-hash-bang'}, 400 look_for_autoloader => $rOpts->{'look-for-autoloader'}, 401 look_for_selfloader => $rOpts->{'look-for-selfloader'}, 402 trim_qw => $rOpts->{'trim-qw'}, 403 ); 404 405 #--------------------------------------------------------------- 406 # now we can do it 407 #--------------------------------------------------------------- 408 process_this_file( $tokenizer, $formatter ); 409 410 #--------------------------------------------------------------- 411 # clean up and report errors 412 #--------------------------------------------------------------- 413 $source_object->close_input_file(); 414 $sink_object->close_output_file() if $sink_object; 415 $debugger_object->close_debug_file() if $debugger_object; 416 417 my $infile_syntax_ok = 0; # -1 no 0=don't know 1 yes 418 if ($output_file) { 419 chmod 0755, $output_file; 420 if ( $logger_object && $rOpts->{'check-syntax'} ) { 421 my $ifname = $source_object->get_input_file_copy_name(); 422 my $ofname = $sink_object->get_output_file_copy(); 423 $infile_syntax_ok = 424 check_syntax( $ifname, $ofname, $logger_object, $rOpts ); 425 } 426 } 427 $source_object->unlink_copy(); 428 $sink_object->unlink_copy(); 429 $logger_object->finish( $infile_syntax_ok, $formatter ) 430 if $logger_object; 431 432 } # end of loop to process all files 433 434} # end of main program 435 436sub write_logfile_header { 437 my ( $rOpts, $logger_object, $config_file, $rraw_options ) = @_; 438 $logger_object->write_logfile_entry( 439"perltidy version $VERSION log file on a $^O system, OLD_PERL_VERSION=$]\n" 440 ); 441 my $options_string = join ( ' ', @$rraw_options ); 442 443 if ($config_file) { 444 $logger_object->write_logfile_entry( 445 "Found Configuration File >>> $config_file \n"); 446 } 447 $logger_object->write_logfile_entry( 448 "Configuration and command line parameters for this run:\n"); 449 $logger_object->write_logfile_entry("$options_string\n"); 450 451 if ( $rOpts->{'DEBUG'} || $rOpts->{'show-options'} ) { 452 $rOpts->{'logfile'} = 1; # force logfile to be saved 453 $logger_object->write_logfile_entry( 454 "Final parameter set for this run\n"); 455 $logger_object->write_logfile_entry( 456 "------------------------------------\n"); 457 458 foreach my $i ( keys %{$rOpts} ) { 459 $logger_object->write_logfile_entry( '--' . "$i=$rOpts->{$i}\n" ); 460 } 461 $logger_object->write_logfile_entry( 462 "------------------------------------\n"); 463 } 464 $logger_object->write_logfile_entry( 465 "To find error messages search for 'WARNING' with your editor\n"); 466} 467 468sub process_command_line { 469 use Getopt::Long; 470 471 ###################################################################### 472 # Note: a few options are not documented in the man page and usage 473 # message. This is because these are experimental or debug options and 474 # may or may not be retained in future versions. 475 # 476 # Here are the undocumented flags as far as I know. Any of them 477 # may disappear at any time. They are mainly for fine-tuning 478 # and debugging. 479 # 480 # xsc --> maximum-space-to-comment # for spacing side comments 481 # fll --> fuzzy-line-length # trivial parameter 482 # iob --> ignore-old-line-breaks # do not follow breaks in old script 483 # tdy --> tidy-output # This is an internal flag 484 # chk --> check-multiline-quotes # check for old bug; to be deleted 485 # mci --> maximum-continuation-indentation # need for -lp 486 # scl --> short-concatenation-item-length # helps break at '.' 487 # bob --> break-after-opening-brace # this is the default 488 # bsj --> big-space-jump # used by vertical aligner 489 # recombine # for debugging line breaks 490 # I --> DIAGNOSTICS # for debugging 491 # maximum-whitespace-columns 492 ###################################################################### 493 494 # here is a summary of the Getopt codes: 495 # <none> does not take an argument 496 # =s takes a mandatory string 497 # :s takes an optional string (DO NOT USE - filenames will get eaten up) 498 # =i takes a mandatory integer 499 # :i takes an optional integer (NOT RECOMMENDED - can cause trouble) 500 # ! does not take an argument and may be negated 501 # i.e., -foo and -nofoo are allowed 502 # a double dash signals the end of the options list 503 # 504 #--------------------------------------------------------------- 505 # Define the option string passed to GetOptions. 506 #--------------------------------------------------------------- 507 508 my @option_string = (); 509 my %expansion = (); 510 my $rexpansion = \%expansion; 511 512 # These options are parsed directly by perltidy: 513 # help h 514 # version v 515 # However, they are included in the option set so that they will 516 # be seen in the options dump. 517 518 # These long option names have no abbreviations or are treated specially 519 @option_string = qw( 520 html! 521 maximum-whitespace-columns=i 522 noprofile 523 npro 524 recombine! 525 ); 526 527 # routine to install and check options 528 my $add_option = sub { 529 my ( $long_name, $short_name, $flag ) = @_; 530 push @option_string, $long_name . $flag; 531 if ($short_name) { 532 if ( $expansion{$short_name} ) { 533 my $existing_name = $expansion{$short_name}[0]; 534 die 535"redefining abbreviation $short_name for $long_name; already used for $existing_name\n"; 536 } 537 $expansion{$short_name} = [$long_name]; 538 if ( $flag eq '!' ) { 539 my $nshort_name = 'n' . $short_name; 540 my $nolong_name = 'no' . $long_name; 541 if ( $expansion{$nshort_name} ) { 542 my $existing_name = $expansion{$nshort_name}[0]; 543 die 544"attempting to redefine abbreviation $nshort_name for $nolong_name; already used for $existing_name\n"; 545 } 546 $expansion{$nshort_name} = [$nolong_name]; 547 } 548 } 549 }; 550 551 # Install long option names which have a simple abbreviation. 552 # Options with code '!' get standard negation ('no' for long names, 553 # 'n' for abbreviations) 554 $add_option->( 'DEBUG', 'D', '!' ); 555 $add_option->( 'DIAGNOSTICS', 'I', '!' ); 556 $add_option->( 'add-newlines', 'anl', '!' ); 557 $add_option->( 'add-semicolons', 'asc', '!' ); 558 $add_option->( 'add-whitespace', 'aws', '!' ); 559 $add_option->( 'big-space-jump', 'bsj', '=i' ); 560 $add_option->( 'blanks-before-blocks', 'bbb', '!' ); 561 $add_option->( 'blanks-before-comments', 'bbc', '!' ); 562 $add_option->( 'blanks-before-subs', 'bbs', '!' ); 563 $add_option->( 'block-brace-tightness', 'bbt', '=i' ); 564 $add_option->( 'brace-left-and-indent', 'bli', '!' ); 565 $add_option->( 'brace-tightness', 'bt', '=i' ); 566 $add_option->( 'break-after-comma-arrows', 'baa', '!' ); 567 $add_option->( 'break-after-opening-brace', 'bob', '!' ); 568 $add_option->( 'check-multiline-quotes', 'chk', '!' ); 569 $add_option->( 'check-syntax', 'syn', '!' ); 570 $add_option->( 'continuation-indentation', 'ci', '=i' ); 571 $add_option->( 'closing-side-comments', 'csc', '!' ); 572 $add_option->( 'closing-side-comment-prefix', 'cscp', '=s' ); 573 $add_option->( 'closing-side-comment-list', 'cscl', '=s' ); 574 $add_option->( 'closing-side-comment-interval', 'csci', '=i' ); 575 $add_option->( 'closing-side-comment-maximum-text', 'csct', '=i' ); 576 $add_option->( 'closing-side-comment-warnings', 'cscw', '!' ); 577 $add_option->( 'cuddled-else', 'ce', '!' ); 578 $add_option->( 'delete-block-comments', 'dbc', '!' ); 579 $add_option->( 'delete-closing-side-comments', 'dcsc', '!' ); 580 $add_option->( 'delete-old-newlines', 'dnl', '!' ); 581 $add_option->( 'delete-old-whitespace', 'dws', '!' ); 582 $add_option->( 'delete-pod', 'dp', '!' ); 583 $add_option->( 'delete-semicolons', 'dsm', '!' ); 584 $add_option->( 'delete-side-comments', 'dsc', '!' ); 585 $add_option->( 'dump-defaults', 'ddf', '!' ); 586 $add_option->( 'dump-long-names', 'dln', '!' ); 587 $add_option->( 'dump-options', 'dop', '!' ); 588 $add_option->( 'dump-short-names', 'dsn', '!' ); 589 $add_option->( 'dump-token-types', 'dtt', '!' ); 590 $add_option->( 'dump-want-left-space', 'dwls', '!' ); 591 $add_option->( 'dump-want-right-space', 'dwrs', '!' ); 592 $add_option->( 'force-read-binary', 'f', '!' ); 593 $add_option->( 'fuzzy-line-length', 'fll', '!' ); 594 $add_option->( 'hanging-side-comments', 'hsc', '!' ); 595 $add_option->( 'help', 'h', '' ); 596 $add_option->( 'ignore-old-line-breaks', 'iob', '!' ); 597 $add_option->( 'indent-block-comments', 'ibc', '!' ); 598 $add_option->( 'indent-closing-brace', 'icb', '!' ); 599 $add_option->( 'indent-closing-paren', 'icp', '!' ); 600 $add_option->( 'indent-columns', 'i', '=i' ); 601 $add_option->( 'line-up-parentheses', 'lp', '!' ); 602 $add_option->( 'logfile', 'log', '!' ); 603 $add_option->( 'logfile-gap', 'g', ':i' ); 604 $add_option->( 'long-block-line-count', 'lbl', '=i' ); 605 $add_option->( 'look-for-autoloader', 'lal', '!' ); 606 $add_option->( 'look-for-hash-bang', 'x', '!' ); 607 $add_option->( 'look-for-selfloader', 'lsl', '!' ); 608 $add_option->( 'maximum-consecutive-blank-lines', 'mbl', '=i' ); 609 $add_option->( 'maximum-continuation-indentation', 'mci', '=i' ); 610 $add_option->( 'maximum-fields-per-table', 'mft', '=i' ); 611 $add_option->( 'maximum-line-length', 'l', '=i' ); 612 $add_option->( 'maximum-space-to-comment', 'xsc', '=i' ); 613 $add_option->( 'minimum-space-to-comment', 'msc', '=i' ); 614 $add_option->( 'nowant-left-space', 'nwls', '=s' ); 615 $add_option->( 'nowant-right-space', 'nwrs', '=s' ); 616 $add_option->( 'opening-brace-always-on-right', 'bar', '' ); 617 $add_option->( 'opening-brace-on-new-line', 'bl', '!' ); 618 $add_option->( 'opening-sub-brace-on-new-line', 'sbl', '!' ); 619 $add_option->( 'outdent-labels', 'ola', '!' ); 620 $add_option->( 'outdent-keywords', 'okw', '!' ); 621 $add_option->( 'outdent-keyword-list', 'okwl', '=s' ); 622 $add_option->( 'outdent-long-quotes', 'olq', '!' ); 623 $add_option->( 'outdent-long-comments', 'olc', '!' ); 624 $add_option->( 'outfile', 'o', '=s' ); 625 $add_option->( 'output-file-extension', 'oext', '=s' ); 626 $add_option->( 'paren-tightness', 'pt', '=i' ); 627 $add_option->( 'pass-version-line', 'pvl', '!' ); 628 $add_option->( 'profile', 'pro', '=s' ); 629 $add_option->( 'quiet', 'q', '!' ); 630 $add_option->( 'short-concatenation-item-length', 'scl', '=i' ); 631 $add_option->( 'show-options', 'opt', '!' ); 632 $add_option->( 'space-for-semicolon', 'sfs', '!' ); 633 $add_option->( 'space-terminal-semicolon', 'sts', '!' ); 634 $add_option->( 'static-side-comments', 'ssc', '!' ); 635 $add_option->( 'static-side-comment-prefix', 'sscp', '=s' ); 636 $add_option->( 'square-bracket-tightness', 'sbt', '=i' ); 637 $add_option->( 'standard-error-output', 'se', '!' ); 638 $add_option->( 'standard-output', 'st', '!' ); 639 $add_option->( 'starting-indentation-level', 'sil', '=i' ); 640 $add_option->( 'static-block-comments', 'sbc', '!' ); 641 $add_option->( 'static-block-comment-prefix', 'sbcp', '=s' ); 642 $add_option->( 'swallow-optional-blank-lines', 'sob', '!' ); 643 $add_option->( 'tabs', 't', '!' ); 644 $add_option->( 'tee-block-comments', 'tbc', '!' ); 645 $add_option->( 'tee-pod', 'tp', '!' ); 646 $add_option->( 'tee-side-comments', 'tsc', '!' ); 647 $add_option->( 'tidy-output', 'tdy', '!' ); 648 $add_option->( 'trim-qw', 'tqw', '!' ); 649 $add_option->( 'version', 'v', '' ); 650 $add_option->( 'want-break-after', 'wba', '=s' ); 651 $add_option->( 'want-break-before', 'wbb', '=s' ); 652 $add_option->( 'want-left-space', 'wls', '=s' ); 653 $add_option->( 'want-right-space', 'wrs', '=s' ); 654 $add_option->( 'warning-output', 'w', '!' ); 655 656 # The PerlTidy::HtmlWriter will add its own options to the string 657 PerlTidy::HtmlWriter->make_getopt_long_names( \@option_string ); 658 659 #--------------------------------------------------------------- 660 # Assign default values to the above options here, except 661 # for 'outfile' and 'help'. 662 # These settings should approximate the perlstyle(1) suggestions. 663 #--------------------------------------------------------------- 664 my @defaults = qw( 665 add-newlines 666 add-semicolons 667 add-whitespace 668 big-space-jump=24 669 blanks-before-blocks 670 blanks-before-comments 671 blanks-before-subs 672 block-brace-tightness=0 673 brace-tightness=1 674 break-after-opening-brace 675 check-syntax 676 closing-side-comment-interval=6 677 closing-side-comment-maximum-text=20 678 continuation-indentation=2 679 delete-old-newlines 680 delete-semicolons 681 fuzzy-line-length 682 hanging-side-comments 683 indent-block-comments 684 indent-columns=4 685 long-block-line-count=8 686 look-for-autoloader 687 look-for-selfloader 688 maximum-consecutive-blank-lines=1 689 maximum-continuation-indentation=40 690 maximum-fields-per-table=40 691 maximum-line-length=80 692 maximum-space-to-comment=32 693 maximum-whitespace-columns=32 694 minimum-space-to-comment=4 695 nobrace-left-and-indent 696 nobreak-after-comma-arrows 697 nocuddled-else 698 nodelete-old-whitespace 699 nohtml 700 noignore-old-line-breaks 701 noindent-closing-brace 702 noindent-closing-paren 703 nologfile 704 noquiet 705 noshow-options 706 nostatic-side-comments 707 noswallow-optional-blank-lines 708 notabs 709 nowarning-output 710 outdent-long-quotes 711 paren-tightness=1 712 pass-version-line 713 recombine 714 short-concatenation-item-length=8 715 space-for-semicolon 716 square-bracket-tightness=1 717 static-block-comments 718 tidy-output 719 trim-qw 720 ); 721 722 #--------------------------------------------------------------- 723 # set the defaults by passing the above list through GetOptions 724 #--------------------------------------------------------------- 725 my %Opts = (); 726 { 727 local @ARGV; 728 my $i; 729 730 for $i (@defaults) { push @ARGV, "--" . $i } 731 732 if ( !GetOptions( \%Opts, @option_string ) ) { 733 die "Programming Bug: error in setting default options"; 734 } 735 } 736 737 #--------------------------------------------------------------- 738 # Define abbreviations which will be expanded into the above primitives. 739 # These may be defined recursively. 740 #--------------------------------------------------------------- 741 %expansion = ( 742 %expansion, 743 'freeze-newlines' => [qw(noadd-newlines nodelete-old-newlines)], 744 'fnl' => [qw(freeze-newlines)], 745 'freeze-whitespace' => [qw(noadd-whitespace nodelete-old-whitespace)], 746 'fws' => [qw(freeze-whitespace)], 747 'indent-only' => [qw(freeze-newlines freeze-whitespace)], 748 'outdent-long-lines' => [qw(outdent-long-quotes outdent-long-comments)], 749 'nooutdent-long-lines' => 750 [qw(nooutdent-long-quotes nooutdent-long-comments)], 751 'noll' => [qw(nooutdent-long-lines)], 752 'io' => [qw(indent-only)], 753 'delete-all-comments' => 754 [qw(delete-block-comments delete-side-comments delete-pod)], 755 'nodelete-all-comments' => 756 [qw(nodelete-block-comments nodelete-side-comments nodelete-pod)], 757 'dac' => [qw(delete-all-comments)], 758 'ndac' => [qw(nodelete-all-comments)], 759 'gnu' => [qw(gnu-style)], 760 'tee-all-comments' => 761 [qw(tee-block-comments tee-side-comments tee-pod)], 762 'notee-all-comments' => 763 [qw(notee-block-comments notee-side-comments notee-pod)], 764 'tac' => [qw(tee-all-comments)], 765 'ntac' => [qw(notee-all-comments)], 766 'nhtml' => [qw(nohtml)], 767 768 # 'mangle' originally deleted pod and comments, but to keep it 769 # reversible, it no longer does. But if you really want to 770 # delete them, just use: 771 # -mangle -dac 772 773 # An interesting use for 'mangle' is to do this: 774 # perltidy -mangle myfile.pl -st | perltidy -o myfile.pl.new 775 # which will form as many one-line blocks as possible 776 777 'mangle' => [ 778 qw( 779 check-syntax 780 delete-old-newlines 781 delete-old-whitespace 782 delete-semicolons 783 indent-columns=0 784 maximum-consecutive-blank-lines=0 785 maximum-line-length=100000 786 noadd-newlines 787 noadd-semicolons 788 noadd-whitespace 789 noblanks-before-blocks 790 noblanks-before-subs 791 notabs 792 ) 793 ], 794 795 # 'extrude' originally deleted pod and comments, but to keep it 796 # reversible, it no longer does. But if you really want to 797 # delete them, just use 798 # extrude -dac 799 # 800 # An interesting use for 'extrude' is to do this: 801 # perltidy -extrude myfile.pl -st | perltidy -o myfile.pl.new 802 # which will break up all one-line blocks. 803 804 'extrude' => [ 805 qw( 806 check-syntax 807 ci=0 808 delete-old-newlines 809 delete-old-whitespace 810 delete-semicolons 811 indent-columns=0 812 maximum-consecutive-blank-lines=0 813 maximum-line-length=1 814 noadd-semicolons 815 noadd-whitespace 816 noblanks-before-blocks 817 noblanks-before-subs 818 nofuzzy-line-length 819 notabs 820 ) 821 ], 822 823 # this style tries to follow the GNU Coding Standards (which do 824 # not really apply to perl but which are followed by some perl 825 # programmers). 826 'gnu-style' => [ 827 qw( 828 lp bl noll pt=2 bt=2 sbt=2 icp 829 ) 830 ], 831 832 # Additional styles can be added here 833 ); 834 835 PerlTidy::HtmlWriter->make_abbreviated_names( \%expansion ); 836 837 # Uncomment next line to dump all expansions for debugging: 838 # dump_short_names(\%expansion); 839 840 my $word; 841 my @raw_options = (); 842 my $config_file = ""; 843 my $saw_ignore_profile = 0; 844 my $saw_extrude = 0; 845 my $i; 846 847 #--------------------------------------------------------------- 848 # Take a first look at the command-line parameters. Do as many 849 # immediate dumps as possible, which can avoid confusion if the 850 # perltidyrc file has an error. 851 #--------------------------------------------------------------- 852 foreach $i (@ARGV) { 853 854 if ( $i =~ /-(npro|noprofile)$/ ) { 855 $saw_ignore_profile = 1; 856 } 857 elsif ( $i =~ /-(pro|profile)=(.+)/ ) { 858 if ($config_file) { 859 print STDERR 860"Only one -pro=filename allowed, using '$2' instead of '$config_file'\n"; 861 } 862 $config_file = $2; 863 } 864 elsif ( $i =~ /-(pro|profile)=?$/ ) { 865 print STDERR 866 "usage: -pro=filename or --profile=filename, no spaces\n"; 867 exit 1; 868 } 869 elsif ( $i =~ /-extrude$/ ) { 870 $saw_extrude = 1; 871 } 872 elsif ( $i =~ /-(help|h)$/ ) { 873 usage(); 874 exit 1; 875 } 876 elsif ( $i =~ /-(version|v)$/ ) { 877 show_version(); 878 exit 1; 879 } 880 elsif ( $i =~ /-(dump-defaults|ddf)$/ ) { 881 dump_defaults(@defaults); 882 exit 1; 883 } 884 elsif ( $i =~ /-(dump-long-names|dln)$/ ) { 885 dump_long_names(@option_string); 886 exit 1; 887 } 888 elsif ( $i =~ /-(dump-short-names|dsn)$/ ) { 889 dump_short_names( \%expansion ); 890 exit 1; 891 } 892 elsif ( $i =~ /-(dump-token-types|dtt)$/ ) { 893 PerlTidy::Tokenizer->dump_token_types(*STDOUT); 894 exit 1; 895 } 896 } 897 898 #--------------------------------------------------------------- 899 # read any .perltidyrc configuration file 900 #--------------------------------------------------------------- 901 unless ($saw_ignore_profile) { 902 903 $config_file = find_config_file() unless $config_file; 904 my $rconfig_list; 905 ( $config_file, $rconfig_list ) = 906 read_config_file( $config_file, \%expansion ); 907 908 # process any .perltidyrc parameters right now so we can localize errors 909 if (@$rconfig_list) { 910 local @ARGV = @$rconfig_list; 911 912 expand_command_abbreviations( \%expansion, \@raw_options, 913 $config_file ); 914 915 if ( !GetOptions( \%Opts, @option_string ) ) { 916 die 917"Error in this config file: $config_file \nUse -npro to ignore this file, -h for help'\n"; 918 } 919 920 # Undo any options which cause premature exit. They are not 921 # appropriate for a config file, and it could be hard to 922 # diagnose the cause of the premature exit. 923 foreach ( 924 qw{ 925 dump-defaults 926 dump-long-names 927 dump-options 928 dump-short-names 929 dump-token-types 930 dump-want-left-space 931 dump-want-right-space 932 help 933 stylesheet 934 version 935 } 936 ) 937 { 938 if ( defined( $Opts{$_} ) ) { 939 delete $Opts{$_}; 940 print STDERR "ignoring --$_ in config file: $config_file\n"; 941 } 942 } 943 } 944 } 945 946 #--------------------------------------------------------------- 947 # now process the command line parameters 948 #--------------------------------------------------------------- 949 expand_command_abbreviations( \%expansion, \@raw_options, $config_file ); 950 951 if ( !GetOptions( \%Opts, @option_string ) ) { 952 die "Error on command line; for help try 'perltidy -h'\n"; 953 } 954 955 if ( $Opts{'dump-options'} ) { 956 dump_options( \%Opts ); 957 exit 1; 958 } 959 960 #--------------------------------------------------------------- 961 # Now we have to handle any interactions among the options.. 962 #--------------------------------------------------------------- 963 964 # In quiet mode, there is no log file and hence no way to report 965 # results of syntax check, so don't do it. 966 if ( $Opts{'quiet'} ) { 967 $Opts{'check-syntax'} = 0; 968 } 969 970 # either html output or tidy output, not both 971 if ( $Opts{'html'} ) { 972 $Opts{'tidy-output'} = 0; 973 } 974 975 # can't check syntax if no output 976 if ( !$Opts{'tidy-output'} ) { 977 $Opts{'check-syntax'} = 0; 978 } 979 980 # It's really a bad idea to check syntax as root unless you wrote 981 # the script yourself. 982 my $pending_complaint = ""; 983 984 # everybody is root in windows 95/98, so we can't complain about it 985 unless ( $^O =~ /^(MSWin32|msdos|dos|win32)$/ ) { 986 987 if ( $< == 0 && $Opts{'check-syntax'} ) { 988 $Opts{'check-syntax'} = 0; 989 $pending_complaint = 990"Syntax check deactivated for safety; you shouldn't run this as root\n"; 991 } 992 } 993 994 # see if user set a non-negative logfile-gap 995 if ( defined( $Opts{'logfile-gap'} ) && $Opts{'logfile-gap'} >= 0 ) { 996 997 # a zero gap will be taken as a 1 998 if ( $Opts{'logfile-gap'} == 0 ) { 999 $Opts{'logfile-gap'} = 1; 1000 } 1001 1002 # setting a non-negative logfile gap causes logfile to be saved 1003 $Opts{'logfile'} = 1; 1004 } 1005 1006 # not setting logfile gap, or setting it negative, causes default of 50 1007 else { 1008 $Opts{'logfile-gap'} = 50; 1009 } 1010 1011 # set short-cut flag when only indentation is to be done. 1012 # Note that the user may or may not have already set the 1013 # indent-only flag. 1014 if ( !$Opts{'add-whitespace'} 1015 && !$Opts{'delete-old-whitespace'} 1016 && !$Opts{'add-newlines'} 1017 && !$Opts{'delete-old-newlines'} ) 1018 { 1019 $Opts{'indent-only'} = 1; 1020 } 1021 1022 # -nbob implies -bli 1023 if ( !$Opts{'break-after-opening-brace'} ) { 1024 $Opts{'brace-left-and-indent'} = 1; 1025 } 1026 1027 # -bli flag implies -bl 1028 if ( $Opts{'brace-left-and-indent'} ) { 1029 $Opts{'opening-brace-on-new-line'} = 1; 1030 } 1031 1032 if ( $Opts{'opening-brace-always-on-right'} 1033 && $Opts{'opening-brace-on-new-line'} ) 1034 { 1035 print STDERR <<EOM; 1036 Conflict: you specified both 'opening-brace-always-on-right' (-bar) and 1037 'opening-brace-on-new-line' (-bl). Ignoring -bl. 1038EOM 1039 $Opts{'opening-brace-on-new-line'} = 0; 1040 } 1041 1042 # it simplifies things if -bl is 0 rather than undefined 1043 if ( !defined( $Opts{'opening-brace-on-new-line'} ) ) { 1044 $Opts{'opening-brace-on-new-line'} = 0; 1045 } 1046 1047 # -sbl defaults to -bl if not defined 1048 if ( !defined( $Opts{'opening-sub-brace-on-new-line'} ) ) { 1049 $Opts{'opening-sub-brace-on-new-line'} = 1050 $Opts{'opening-brace-on-new-line'}; 1051 } 1052 1053 # set shortcut flag if no blanks to be written 1054 unless ( $Opts{'maximum-consecutive-blank-lines'} ) { 1055 $Opts{'swallow-optional-blank-lines'} = 1; 1056 } 1057 return ( \%Opts, $config_file, \@raw_options, $pending_complaint, 1058 $saw_extrude ); 1059 1060} # end of process_command_line 1061 1062sub expand_command_abbreviations { 1063 1064 # go through @ARGV and expand any abbreviations 1065 1066 my ( $rexpansion, $rraw_options, $config_file ) = @_; 1067 my ($word); 1068 1069 # set a pass limit to prevent an infinite loop; 1070 # 10 should be plenty, but it may be increased to allow deeply 1071 # nested expansions. 1072 my $max_passes = 10; 1073 my @new_argv = (); 1074 1075 # keep looping until all expansions have been converted into actual 1076 # dash parameters.. 1077 for ( my $pass_count = 0 ; $pass_count <= $max_passes ; $pass_count++ ) { 1078 my @new_argv = (); 1079 my $abbrev_count = 0; 1080 1081 # loop over each item in @ARGV.. 1082 foreach $word (@ARGV) { 1083 1084 # if it is a dash flag (instead of a file name).. 1085 if ( $word =~ /^-[-]?([\w\-]+)(.*)/ ) { 1086 1087 # save the raw input for debug output in case of circular refs 1088 if ( $pass_count == 0 ) { 1089 push ( @$rraw_options, $word ); 1090 } 1091 1092 my $abr = $1; 1093 my $flags = $2; 1094 1095 # if we see this dash item in the expansion hash.. 1096 if ( $rexpansion->{$abr} ) { 1097 $abbrev_count++; 1098 1099 # stuff all of the words that it expands to into the 1100 # new arg list for the next pass 1101 foreach my $abbrev ( @{ $rexpansion->{$abr} } ) { 1102 push ( @new_argv, '--' . $abbrev . $flags ); 1103 } 1104 } 1105 1106 # not in expansion hash, must be actual long name 1107 else { 1108 push ( @new_argv, $word ); 1109 } 1110 } 1111 1112 # not a dash item, so just save it for the next pass 1113 else { 1114 push ( @new_argv, $word ); 1115 } 1116 } # end of this pass 1117 1118 # update parameter list @ARGV to the new one 1119 @ARGV = @new_argv; 1120 last unless ( $abbrev_count > 0 ); 1121 1122 # make sure we are not in an infinite loop 1123 if ( $pass_count == $max_passes ) { 1124 print STDERR 1125"I'm tired. We seem to be in an infinite loop trying to expand aliases.\n"; 1126 print STDERR "Here are the raw options\n"; 1127 local $" = ')('; 1128 print STDERR "(@$rraw_options)\n"; 1129 my $num = @new_argv; 1130 1131 if ( $num < 50 ) { 1132 print STDERR "After $max_passes passes here is ARGV\n"; 1133 print STDERR "(@new_argv)\n"; 1134 } 1135 else { 1136 print STDERR "After $max_passes passes ARGV has $num entries\n"; 1137 } 1138 1139 if ($config_file) { 1140 die <<"DIE"; 1141Please check your configuration file $config_file for circular-references. 1142To deactivate it, use -npro. 1143DIE 1144 } 1145 else { 1146 die <<'DIE'; 1147Program bug - circular-references in the %expansion hash, probably due to 1148a recent program change. 1149DIE 1150 } 1151 } # end of check for circular references 1152 } # end of loop over all passes 1153} 1154 1155# Debug routine -- this will dump the expansion hash 1156sub dump_short_names { 1157 my $rexpansion = shift; 1158 print STDOUT <<EOM; 1159List of short names. This list shows how all abbreviations are 1160translated into other abbreviations and, eventually, into long names. 1161New abbreviations may be defined in a .perltidyrc file. 1162For a list of all long names, use perltidy --dump-long-names (-dln). 1163-------------------------------------------------------------------------- 1164EOM 1165 foreach my $abbrev ( sort keys %$rexpansion ) { 1166 my @list = @{ $$rexpansion{$abbrev} }; 1167 print STDOUT "$abbrev --> @list\n"; 1168 } 1169} 1170 1171sub find_config_file { 1172 1173 my $config_file; 1174 1175 # look in current directory first 1176 if ( -e ".perltidyrc" ) { 1177 $config_file = ".perltidyrc"; 1178 } 1179 1180 # then the home directory 1181 elsif ( defined( $ENV{HOME} ) && -e "$ENV{HOME}/.perltidyrc" ) { 1182 $config_file = "$ENV{HOME}/.perltidyrc"; 1183 } 1184 1185 # then look for a system-wide definition 1186 elsif ( -e "/usr/local/etc/perltidyrc" ) { 1187 $config_file = "/usr/local/etc/perltidyrc"; 1188 } 1189 elsif ( -e "/etc/perltidyrc" ) { 1190 $config_file = "/etc/perltidyrc"; 1191 } 1192 return $config_file; 1193} 1194 1195sub read_config_file { 1196 1197 my @config_list = (); 1198 1199 my ( $config_file, $rexpansion ) = @_; 1200 1201 my $name = undef; 1202 my $line_no; 1203 if ( defined($config_file) && -e $config_file ) { 1204 1205 unless ( open CONFIG, "<$config_file" ) { 1206 warn "cannot open config file $config_file: $!\n"; 1207 $config_file = ""; 1208 } 1209 else { 1210 1211 while (<CONFIG>) { 1212 $line_no++; 1213 chomp; 1214 next if /^\s*#/; # skip full-line comment 1215 $_ = strip_comment( $_, $config_file, $line_no ); 1216 s/^\s*(.*?)\s*$/$1/; # trim both ends 1217 next unless $_; 1218 1219 # look for something of the general form 1220 # newname { body } 1221 # or just 1222 # body 1223 1224 if ( $_ =~ /^((\w+)\s*\{)?([^}]*)(\})?$/ ) { 1225 my ( $newname, $body, $curly ) = ( $2, $3, $4 ); 1226 1227 # handle a new alias definition 1228 if ($newname) { 1229 if ($name) { 1230 die 1231"No '}' seen after $name and before $newname in config file $config_file line $.\n"; 1232 } 1233 $name = $newname; 1234 1235 if ( ${$rexpansion}{$name} ) { 1236 local $" = ')('; 1237 my @names = sort keys %$rexpansion; 1238 print 1239"Here is a list of all installed aliases\n(@names)\n"; 1240 die 1241"Attempting to redefine alias ($name) in config file $config_file line $.\n"; 1242 } 1243 ${$rexpansion}{$name} = []; 1244 } 1245 1246 # now do the body 1247 if ($body) { 1248 1249 my $rbody_parts = 1250 parse_body( $body, $config_file, $line_no ); 1251 1252 if ($name) { 1253 1254 # remove leading dashes if this is an alias 1255 foreach (@$rbody_parts) { s/^\-+//; } 1256 push @{ ${$rexpansion}{$name} }, @$rbody_parts; 1257 } 1258 1259 else { 1260 push ( @config_list, @$rbody_parts ); 1261 } 1262 } 1263 1264 if ($curly) { 1265 unless ($name) { 1266 die 1267"Unexpected '}' seen in config file $config_file line $.\n"; 1268 } 1269 $name = undef; 1270 } 1271 } 1272 } 1273 close CONFIG; 1274 } 1275 } 1276 else { 1277 $config_file = ""; 1278 } 1279 return ( $config_file, \@config_list ); 1280} 1281 1282sub strip_comment { 1283 1284 my ( $instr, $config_file, $line_no ) = @_; 1285 1286 # nothing to do if no comments 1287 if ( $instr !~ /#/ ) { 1288 return $instr; 1289 } 1290 1291 # use simple method of no quotes 1292 elsif ( $instr !~ /['"]/ ) { 1293 $instr =~ s/\s*\#.*$//; # simple trim 1294 return $instr; 1295 } 1296 1297 # handle comments and quotes 1298 my $outstr = ""; 1299 my $quote_char = ""; 1300 while (1) { 1301 1302 # looking for ending quote character 1303 if ($quote_char) { 1304 if ( $instr =~ /\G($quote_char)/gc ) { 1305 $quote_char = ""; 1306 $outstr .= $1; 1307 } 1308 elsif ( $instr =~ /\G(.)/gc ) { 1309 $outstr .= $1; 1310 } 1311 1312 # error..we reached the end without seeing the ending quote char 1313 else { 1314 die <<EOM; 1315Error reading file $config_file at line number $line_no. 1316Did not see ending quote character <$quote_char> in this text: 1317$instr 1318Please fix this line or use -npro to avoid reading this file 1319EOM 1320 last; 1321 } 1322 } 1323 1324 # accumulating characters and looking for start of a quoted string 1325 else { 1326 if ( $instr =~ /\G([\"\'])/gc ) { 1327 $outstr .= $1; 1328 $quote_char = $1; 1329 } 1330 elsif ( $instr =~ /\G#/gc ) { 1331 last; 1332 } 1333 elsif ( $instr =~ /\G(.)/gc ) { 1334 $outstr .= $1; 1335 } 1336 else { 1337 last; 1338 } 1339 } 1340 } 1341 return $outstr; 1342} 1343 1344sub parse_body { 1345 1346=pod 1347 1348Parse a command string containing multiple string with possible 1349quotes, into individual commands. It might look like this, for example: 1350 1351 -wba=" + - " -some-thing -wbb='. && ||' 1352 1353There is no need, at present, to handle escaped quote characters. 1354(They are not perltidy tokens, so needn't be in strings). 1355 1356=cut 1357 1358 my ( $body, $config_file, $line_no ) = @_; 1359 my @body_parts = (); 1360 my $quote_char = ""; 1361 my $part = ""; 1362 while (1) { 1363 1364 # looking for ending quote character 1365 if ($quote_char) { 1366 if ( $body =~ /\G($quote_char)/gc ) { 1367 $quote_char = ""; 1368 } 1369 elsif ( $body =~ /\G(.)/gc ) { 1370 $part .= $1; 1371 } 1372 1373 # error..we reached the end without seeing the ending quote char 1374 else { 1375 if ($part) { push @body_parts, $part; } 1376 die <<EOM; 1377Error reading file $config_file at line number $line_no. 1378Did not see ending quote character <$quote_char> in this text: 1379$body 1380Please fix this line or use -npro to avoid reading this file 1381EOM 1382 last; 1383 } 1384 } 1385 1386 # accumulating characters and looking for start of a quoted string 1387 else { 1388 if ( $body =~ /\G([\"\'])/gc ) { 1389 $quote_char = $1; 1390 } 1391 elsif ( $body =~ /\G(\s+)/gc ) { 1392 push @body_parts, $part; 1393 $part = ""; 1394 } 1395 elsif ( $body =~ /\G(.)/gc ) { 1396 $part .= $1; 1397 } 1398 else { 1399 if ($part) { push @body_parts, $part; } 1400 last; 1401 } 1402 } 1403 } 1404 return ( \@body_parts ); # sound's ghoulish! 1405} 1406 1407sub dump_long_names { 1408 1409 my @names = sort @_; 1410 print STDOUT <<EOM; 1411# Command line long names (passed to GetOptions) 1412#--------------------------------------------------------------- 1413# here is a summary of the Getopt codes: 1414# <none> does not take an argument 1415# =s takes a mandatory string 1416# :s takes an optional string 1417# =i takes a mandatory integer 1418# :i takes an optional integer 1419# ! does not take an argument and may be negated 1420# i.e., -foo and -nofoo are allowed 1421# a double dash signals the end of the options list 1422# 1423#--------------------------------------------------------------- 1424EOM 1425 1426 foreach (@names) { print STDOUT "$_\n" } 1427} 1428 1429sub dump_defaults { 1430 my @defaults = sort @_; 1431 print STDOUT "Default command line options:\n"; 1432 foreach (@_) { print STDOUT "$_\n" } 1433} 1434 1435sub dump_options { 1436 my ($rOpts) = @_; 1437 local $" = "\n"; 1438 print STDOUT "Final parameter set for this run\n"; 1439 foreach my $i ( sort keys %{$rOpts} ) { 1440 print STDOUT "$i=$rOpts->{$i}\n"; 1441 } 1442} 1443 1444sub show_version { 1445 print <<"EOM"; 1446This is perltidy, v$VERSION 1447 1448Copyright 2000-2001, Steven L. Hancock 1449 1450PerlTidy is free software and may be copied under the terms of the GNU 1451General Public License, which is included in the distribution files. 1452 1453Complete documentation for perltidy can be found using 'man perltidy' 1454or on the internet at http://perltidy.sourceforge.net. 1455EOM 1456} 1457 1458sub usage { 1459 1460 print STDOUT <<EOF; 1461This is perltidy version $VERSION, a perl script indenter. Usage: 1462 1463 perltidy [ options ] file1 file2 file3 ... 1464 (output goes to file1.tdy, file2.tdy, file3.tdy, ...) 1465 perltidy [ options ] file1 -o outfile 1466 perltidy [ options ] file1 -st >outfile 1467 perltidy [ options ] <infile >outfile 1468 1469Options have short and long forms. Short forms are shown; see 1470man pages for long forms. Note: '=s' indicates a required string, 1471and '=n' indicates a required integer. 1472 1473I/O control 1474 -h show this help 1475 -o=file name of the output file (only if single input file) 1476 -q deactivate error messages (for running under editor) 1477 -w include non-critical warning messages in the .ERR error output 1478 -syn run perl -c to check syntax (default under unix systems) 1479 -log save .LOG file, which has useful diagnostics 1480 -f force perltidy to read a binary file 1481 -g like -log but writes more detailed .LOG file, for debugging scripts 1482 -opt write the set of options actually used to a .LOG file 1483 -npro ignore .perltidyrc configuration command file 1484 -pro=file read configuration commands from file instead of .perltidyrc 1485 -st send output to standard output, STDOUT 1486 -se send error output to standard error output, STDERR 1487 -v display version number to standard output and quit 1488 1489Basic Options: 1490 -i=n use n columns per indentation level (default n=4) 1491 -t tabs: use one tab character per indentation level, not recommeded 1492 -nt no tabs: use n spaces per indentation level (default) 1493 -io "indent only": just do indentation, no other formatting. 1494 -sil=n set starting indentation level to n; use if auto detection fails 1495 1496Whitespace Control 1497 -fws freeze whitespace; this disables all whitespace changes 1498 and disables the following switches: 1499 -bt=n sets brace tightness, n= (0 = loose, 1=default, 2 = tight) 1500 -bbt same as -bt but for code block braces; same as -bt if not given 1501 -pt=n paren tightness (n=0, 1 or 2) 1502 -sbt=n square bracket tightness (n=0, 1, or 2) 1503 -ci=n sets continuation indentation=n, default is n=2 spaces 1504 -lp line up parentheses, brackets, and non-BLOCK braces 1505 -ibc indent block comments; this is the default 1506 -sfs add space before semicolon in for( ; ; ) 1507 -msc=n minimum spaces to side comment, default 4 1508 -aws allow perltidy to add whitespace (default) 1509 -dws delete all old non-essential whitespace 1510 -icb indent closing brace of a code block 1511 -icp indent closing paren, square-bracket, or brace of non code block 1512 -wls=s want space left of tokens in string; i.e. -nwls='+ - * /' 1513 -wrs=s want space right of tokens in string; 1514 -sts put space before terminal semicolon of a statement 1515 1516Line Break Control 1517 -fnl freeze newlines; this disables all line break changes 1518 and disables the following switches: 1519 -anl add newlines; ok to introduce new line breaks 1520 -bbs add blank line before subs and packages 1521 -bbc add blank line before block comments 1522 -bbb add blank line between major blocks 1523 -sob swallow optional blank lines 1524 -ce cuddled else; use this style: '} else {' 1525 -dnl delete old newlines (default) 1526 -mbl=n maximum consecutive blank lines (default=1) 1527 -l=n maximum line length; default n=80 1528 -bl opening brace on new line 1529 -sbl opening sub brace on new line. value of -bl is used if not given. 1530 -bli opening brace on new line and indented 1531 -bar opening brace always on right, even for long clauses 1532 -wba=s want break after tokens in string; i.e. wba=': .' 1533 -wbb=s want break before tokens in string 1534 1535Delete selected text 1536 -dac delete all comments AND pod 1537 -dbc delete block comments 1538 -dsc delete side comments 1539 -dp delete pod 1540 1541Send selected text to a '.TEE' file 1542 -tac tee all comments AND pod 1543 -tbc tee block comments 1544 -tsc tee side comments 1545 -tp tee pod 1546 1547Combinations of other parameters 1548 -gnu attempt to follow GNU Coding Standards as applied to perl 1549 -mangle remove as many newlines as possible (but keep comments and pods) 1550 -extrude insert as many newlines as possible 1551 1552Other controls 1553 -mft=n maximum fields per table; default n=40 1554 -x do not format lines before hash-bang line (i.e., for VMS) 1555 -asc allows perltidy to add a ';' when missing (default) 1556 -dsm allows perltidy to delete an unnecessary ';' (default) 1557 1558 -olq outdent long quoted strings (default) 1559 -olc outdent a long block comment line 1560 -ola outdent statement labels 1561 -okw outdent control keywords (redo, next, last, goto, return) 1562 -okwl=s specify alternative keywords for -okw command 1563 1564 -csc add or update closing side comments after closing BLOCK brace 1565 -dcsc delete closing side comments created by a -csc command 1566 -cscp=s change closing side comment prefix to be other than '## end' 1567 -cscl=s change closing side comment to apply to selected list of blocks 1568 -csci=n minimum number of lines needed to apply a -csc tag, default n=6 1569 -csct=n maximum number of columns of appended text, default n=20 1570 -cscw causes warning if old side comment is overwritten with -csc 1571 1572 -sbc use 'static block comments' identified by leading '##' (default) 1573 -sbcp=s change static block comment identifier to be other than '##' 1574 1575 -ssc use 'static side comments' identified by leading '##' (default) 1576 -sscp=s change static side comment identifier to be other than '##' 1577 1578 -dop dump options used in this run to standard output and quit 1579 -ddf dump default options to standard output and quit 1580 -dsn dump all option short names to standard output and quit 1581 -dln dump option long names to standard output and quit 1582 -dtt dump all token types to standard output and quit 1583 1584 -html write an html file (see 'man perl2web' for many options) 1585 Note: when -html is used, no indentation or formatting are done. 1586 Hint: try perltidy -html -css=mystyle.css filename.pl 1587 and edit mystyle.css to change the appearance of filename.html. 1588 -nnn gives line numbers 1589 -pre only writes out <pre>..</pre> code section 1590 1591A prefix of "n" negates short form toggle switches, and a prefix of "no" 1592negates the long forms. For example, -nt or --notabs mean to indent with 1593spaces rather than tabs. Do not bundle switches together. 1594 1595If you are unable to see this entire text, try "perltidy -h | more" 1596For more detailed information, and additional options, try "man perltidy", 1597or go to the perltidy home page at http://perltidy.sourceforge.net 1598EOF 1599 1600} 1601 1602sub process_this_file { 1603 1604 my ( $truth, $beauty ) = @_; 1605 1606 # loop to process each line of this file 1607 while ( my $line_of_tokens = $truth->get_line() ) { 1608 $beauty->write_line($line_of_tokens); 1609 } 1610 1611 # finish up 1612 $beauty->finish_formatting(); 1613 $truth->report_tokenization_errors(); 1614} 1615 1616sub check_syntax { 1617 1618 # Use 'perl -c' to make sure that we did not create bad syntax 1619 # This is a very good independent check for programming errors 1620 # 1621 # Given names of the input and output files, ($ifname, $ofname), 1622 # we do the following: 1623 # - check syntax of the input file 1624 # - if bad, all done (could be an incomplete code snippet) 1625 # - if infile syntax ok, then check syntax of the output file; 1626 # - if outfile syntax bad, issue warning; this implies a code bug! 1627 # - set and return flag "infile_syntax_ok" : =-1 bad 0 unknown 1 good 1628 1629 my ( $ifname, $ofname, $logger_object, $rOpts ) = @_; 1630 my $infile_syntax_ok = 0; 1631 my $line_of_dashes = '-' x 42 . "\n"; 1632 1633 # invoke perl with -x if requested 1634 my $dash_x = $rOpts->{'look-for-hash-bang'} ? "-x" : ""; 1635 1636 # this shouldn't happen unless perltidy.TMPI couldn't be made 1637 if ( $ifname eq '-' ) { 1638 $logger_object->write_logfile_entry( 1639 "Cannot run perl -c on STDIN and STDOUT\n"); 1640 return $infile_syntax_ok; 1641 } 1642 1643 $logger_object->write_logfile_entry( 1644 "checking input file syntax with perl -c...\n"); 1645 $logger_object->write_logfile_entry($line_of_dashes); 1646 1647 # Not all operating systems/shells support redirection of the standard 1648 # error output. 1649 my $error_redirection = ( $^O eq 'VMS' ) ? "" : '2>&1'; 1650 my $flags = "-c -T $dash_x"; 1651 1652 my $perl_output = do_syntax_check( $ifname, $flags, $error_redirection ); 1653 $logger_object->write_logfile_entry("$perl_output\n"); 1654 1655 if ( $perl_output =~ /syntax\s*OK/ ) { 1656 $infile_syntax_ok = 1; 1657 $logger_object->write_logfile_entry($line_of_dashes); 1658 $logger_object->write_logfile_entry( 1659 "checking output file syntax with perl -c...\n"); 1660 $logger_object->write_logfile_entry($line_of_dashes); 1661 1662 my $perl_output = 1663 do_syntax_check( $ofname, $flags, $error_redirection ); 1664 $logger_object->write_logfile_entry("$perl_output\n"); 1665 1666 unless ( $perl_output =~ /syntax\s*OK/ ) { 1667 $logger_object->write_logfile_entry($line_of_dashes); 1668 $logger_object->warning( 1669"The output file has a syntax error when tested with perl -c $dash_x $ofname !\n" 1670 ); 1671 $logger_object->warning( 1672 "This implies an error in perltidy; the file $ofname is bad\n"); 1673 $logger_object->report_definite_bug(); 1674 1675 # the perl version number will be helpful for diagnosing the problem 1676 $logger_object->write_logfile_entry( 1677 `perl -v $dash_x $ofname $error_redirection` . "\n" ); 1678 } 1679 } 1680 else { 1681 1682 # Only warn of perl -c syntax errors. Other messages, 1683 # such as missing modules, are too common. They can be 1684 # seen by running with perltidy -w 1685 $logger_object->complain("A syntax check using perl -c gives: \n"); 1686 $logger_object->complain($line_of_dashes); 1687 $logger_object->complain("$perl_output\n"); 1688 $logger_object->complain($line_of_dashes); 1689 $infile_syntax_ok = -1; 1690 $logger_object->write_logfile_entry($line_of_dashes); 1691 $logger_object->write_logfile_entry( 1692"The output file will not be checked because of input file problems\n" 1693 ); 1694 } 1695 return $infile_syntax_ok; 1696} 1697 1698sub do_syntax_check { 1699 my ( $fname, $flags, $error_redirection ) = @_; 1700 return `perl $flags $fname $error_redirection`; 1701} 1702 1703##################################################################### 1704# 1705# the PerlTidy::LineSource class supplies an object with a 'get_line()' method 1706# which returns the next line to be parsed 1707# 1708##################################################################### 1709 1710package PerlTidy::LineSource; 1711 1712sub new { 1713 1714 my $class = shift; 1715 my $input_file = shift; 1716 my $rOpts = shift; 1717 my $fh; 1718 my $input_file_copy = undef; 1719 my $fh_copy; 1720 1721 unless ( ( $fh, $input_file ) = PerlTidy::streamhandle( $input_file, 'r' ) ) 1722 { 1723 return undef; 1724 } 1725 else { 1726 1727 # in order to check output syntax when standard output is used, we have 1728 # to make a copy of the file 1729 if ( $input_file eq '-' && $rOpts->{'check-syntax'} ) { 1730 $input_file_copy = "perltidy.TMPI"; 1731 $fh_copy = IO::File->new(">$input_file_copy") 1732 or die ( 1733 "Couldn't open $input_file_copy: $!\n 1734 It is needed to check syntax; deactivate with -nsyn" 1735 ); 1736 } 1737 1738 return bless { 1739 _fh => $fh, 1740 _fh_copy => $fh_copy, 1741 _filename => $input_file, 1742 _input_file_copy => $input_file_copy, 1743 }, $class; 1744 } 1745} 1746 1747sub get_input_file_copy_name { 1748 my $self = shift; 1749 my $ifname = $self->{_input_file_copy}; 1750 unless ($ifname) { 1751 $ifname = $self->{_filename}; 1752 } 1753 return $ifname; 1754} 1755 1756sub close_input_file { 1757 my $self = shift; 1758 $self->{_fh}->close(); 1759 $self->{_fh_copy}->close() if $self->{_fh_copy}; 1760} 1761 1762sub unlink_copy { 1763 my $self = shift; 1764 unlink $self->{_input_file_copy} if $self->{_input_file_copy}; 1765 my $fname = $self->{_input_file_copy}; 1766} 1767 1768sub get_line { 1769 my $self = shift; 1770 my $line = undef; 1771 my $fh = $self->{_fh}; 1772 my $fh_copy = $self->{_fh_copy}; 1773 $line = $fh->getline(); 1774 if ( $line && $fh_copy ) { $fh_copy->print($line); } 1775 return $line; 1776} 1777 1778##################################################################### 1779# 1780# the PerlTidy::LineSink class supplies a write_line method for 1781# actual file writing 1782# 1783##################################################################### 1784 1785package PerlTidy::LineSink; 1786 1787sub new { 1788 1789 my ( $class, $output_file, $tee_file, $rOpts ) = @_; 1790 my $fh = undef; 1791 my $fh_copy = undef; 1792 my $fh_tee = undef; 1793 my $output_file_copy = ""; 1794 my $output_file_open = 0; 1795 1796 if ( $rOpts->{'tidy-output'} ) { 1797 ( $fh, $output_file ) = PerlTidy::streamhandle( $output_file, 'w' ); 1798 if ( $output_file eq '-' ) { $output_file_copy = "perltidy.TMPO"; } 1799 $output_file_open = 1; 1800 } 1801 1802 # in order to check output syntax when standard output is used, we have to 1803 # make a copy of the file 1804 if ($output_file_copy) { 1805 if ( $rOpts->{'check-syntax'} ) { 1806 $fh_copy = IO::File->new(">$output_file_copy") 1807 or die ( 1808 "couldn't open $output_file_copy: $!\n 1809 which is needed for to check syntax; deactivate with -nsyn" 1810 ); 1811 } 1812 else { 1813 $output_file_copy = ""; 1814 } 1815 } 1816 1817 bless { 1818 _fh => $fh, 1819 _fh_copy => $fh_copy, 1820 _fh_tee => $fh_tee, 1821 _output_file => $output_file, 1822 _output_file_open => $output_file_open, 1823 _output_file_copy => $output_file_copy, 1824 _tee_flag => 0, 1825 _tee_file => $tee_file, 1826 _tee_file_opened => 0, 1827 }, $class; 1828} 1829 1830sub write_line { 1831 1832 my $self = shift; 1833 my $fh = $self->{_fh}; 1834 my $fh_copy = $self->{_fh_copy}; 1835 1836 my $output_file_open = $self->{_output_file_open}; 1837 1838 $fh->print( $_[0] ) if ( $self->{_output_file_open} ); 1839 print $fh_copy $_[0] if ( $self->{_output_file_copy} ); 1840 1841 if ( $self->{_tee_flag} ) { 1842 unless ( $self->{_tee_file_opened} ) { $self->really_open_tee_file() } 1843 my $fh_tee = $self->{_fh_tee}; 1844 print $fh_tee $_[0]; 1845 } 1846} 1847 1848sub get_output_file_copy { 1849 my $self = shift; 1850 my $ofname = $self->{_output_file_copy}; 1851 unless ($ofname) { 1852 $ofname = $self->{_output_file}; 1853 } 1854 return $ofname; 1855} 1856 1857sub tee_on { 1858 my $self = shift; 1859 $self->{_tee_flag} = 1; 1860} 1861 1862sub tee_off { 1863 my $self = shift; 1864 $self->{_tee_flag} = 0; 1865} 1866 1867sub really_open_tee_file { 1868 my $self = shift; 1869 my $tee_file = $self->{_tee_file}; 1870 my $fh_tee; 1871 $fh_tee = IO::File->new(">$tee_file") 1872 or die ("couldn't open TEE file $tee_file: $!\n"); 1873 $self->{_tee_file_opened} = 1; 1874 $self->{_fh_tee} = $fh_tee; 1875} 1876 1877sub close_output_file { 1878 my $self = shift; 1879 $self->{_fh}->close() if $self->{_output_file_open}; 1880 close $self->{_fh_copy} if ( $self->{_output_file_copy} ); 1881 $self->close_tee_file(); 1882} 1883 1884sub close_tee_file { 1885 my $self = shift; 1886 1887 if ( $self->{_tee_file_opened} ) { 1888 close $self->{_fh_tee}; 1889 $self->{_tee_file_opened} = 0; 1890 } 1891} 1892 1893sub unlink_copy { 1894 my $self = shift; 1895 unlink( $self->{_output_file_copy} ) if $self->{_output_file_copy}; 1896} 1897 1898##################################################################### 1899# 1900# The PerlTidy::Diagnostics class writes the DIAGNOSTICS file, which is 1901# useful for program development. 1902# 1903# Only one such file is created regardless of the number of input 1904# files processed. This allows the results of processing many files 1905# to be summarized in a single file. 1906# 1907##################################################################### 1908 1909package PerlTidy::Diagnostics; 1910 1911sub new { 1912 1913 my $class = shift; 1914 bless { 1915 _write_diagnostics_count => 0, 1916 _last_diagnostic_file => "", 1917 _input_file => "", 1918 _fh => undef, 1919 }, $class; 1920} 1921 1922sub set_input_file { 1923 my $self = shift; 1924 $self->{_input_file} = $_[0]; 1925} 1926 1927# This is a diagnostic routine which is useful for program development. 1928# Output from debug messages go to a file named DIAGNOSTICS, where 1929# they are labeled by file and line. This allows many files to be 1930# scanned at once for some particular condition of interest. 1931sub write_diagnostics { 1932 my $self = shift; 1933 1934 unless ( $self->{_write_diagnostics_count} ) { 1935 open DIAGNOSTICS, ">DIAGNOSTICS" 1936 or death("couldn't open DIAGNOSTICS: $!\n"); 1937 } 1938 1939 my $last_diagnostic_file = $self->{_last_diagnostic_file}; 1940 my $input_file = $self->{_input_file}; 1941 if ( $last_diagnostic_file ne $input_file ) { 1942 print DIAGNOSTICS "\nFILE:$input_file\n"; 1943 } 1944 $self->{_last_diagnostic_file} = $input_file; 1945 my $input_line_number = PerlTidy::Tokenizer::get_input_line_number(); 1946 print DIAGNOSTICS "$input_line_number:\t@_"; 1947 $self->{_write_diagnostics_count}++; 1948} 1949 1950##################################################################### 1951# 1952# The PerlTidy::Logger class writes the .LOG and .ERR files 1953# 1954##################################################################### 1955 1956package PerlTidy::Logger; 1957 1958sub new { 1959 my $class = shift; 1960 my $fh; 1961 my ( $rOpts, $log_file, $warning_file, $saw_extrude ) = @_; 1962 $fh = IO::File->new(">$log_file") 1963 or die ("couldn't open log file $log_file: $!\n"); 1964 1965 # remove any old error output file 1966 if ( -e $warning_file ) { unlink($warning_file) } 1967 1968 bless { 1969 _log_file => $log_file, 1970 _fh => $fh, 1971 _fh_warnings => undef, 1972 _rOpts => $rOpts, 1973 _fh_warnings => undef, 1974 _last_input_line_written => 0, 1975 _at_end_of_file => 0, 1976 _use_prefix => 1, 1977 _block_log_output => 0, 1978 _line_of_tokens => undef, 1979 _output_line_number => undef, 1980 _wrote_line_information_string => 0, 1981 _wrote_column_headings => 0, 1982 _warning_file => $warning_file, 1983 _warning_count => 0, 1984 _complaint_count => 0, 1985 _saw_code_bug => -1, # -1=no 0=maybe 1=for sure 1986 _saw_brace_error => 0, 1987 _saw_extrude => $saw_extrude, 1988 }, $class; 1989} 1990 1991sub close_log_file { 1992 my $self = shift; 1993 close $self->{_fh}; 1994 close $self->{_fh_warnings} if ( $self->{_warning_count} ); 1995} 1996 1997sub get_warning_count { 1998 my $self = shift; 1999 return $self->{_warning_count}; 2000} 2001 2002sub get_use_prefix { 2003 my $self = shift; 2004 return $self->{_use_prefix}; 2005} 2006 2007sub block_log_output { 2008 my $self = shift; 2009 $self->{_block_log_output} = 1; 2010} 2011 2012sub unblock_log_output { 2013 my $self = shift; 2014 $self->{_block_log_output} = 0; 2015} 2016 2017sub interrupt_logfile { 2018 my $self = shift; 2019 $self->{_use_prefix} = 0; 2020 $self->warning("\n"); 2021 $self->write_logfile_entry( '#' x 24 . " WARNING " . '#' x 25 . "\n" ); 2022} 2023 2024sub resume_logfile { 2025 my $self = shift; 2026 $self->write_logfile_entry( '#' x 60 . "\n" ); 2027 $self->{_use_prefix} = 1; 2028} 2029 2030sub we_are_at_the_last_line { 2031 my $self = shift; 2032 unless ( $self->{_wrote_line_information_string} ) { 2033 $self->write_logfile_entry("Last line\n\n"); 2034 } 2035 $self->{_at_end_of_file} = 1; 2036} 2037 2038# record some stuff in case we go down in flames 2039sub black_box { 2040 my $self = shift; 2041 my ( $line_of_tokens, $output_line_number ) = @_; 2042 my $input_line = $line_of_tokens->{_line_text}; 2043 my $input_line_number = $line_of_tokens->{_line_number}; 2044 2045 # save line information in case we have to write a logfile message 2046 $self->{_line_of_tokens} = $line_of_tokens; 2047 $self->{_output_line_number} = $output_line_number; 2048 $self->{_wrote_line_information_string} = 0; 2049 2050 my $last_input_line_written = $self->{_last_input_line_written}; 2051 my $rOpts = $self->{_rOpts}; 2052 if ( 2053 ( 2054 ( $input_line_number - $last_input_line_written ) >= 2055 $rOpts->{'logfile-gap'} 2056 ) 2057 || ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) 2058 ) 2059 { 2060 my $rlevels = $line_of_tokens->{_rlevels}; 2061 my $structural_indentation_level = $$rlevels[0]; 2062 $self->{_last_input_line_written} = $input_line_number; 2063 ( my $out_str = $input_line ) =~ s/^\s*//; 2064 chomp $out_str; 2065 2066 $out_str = ( '.' x $structural_indentation_level ) . $out_str; 2067 2068 if ( length($out_str) > 35 ) { 2069 $out_str = substr( $out_str, 0, 35 ) . " ...."; 2070 } 2071 $self->logfile_output( "", "$out_str\n" ); 2072 } 2073} 2074 2075sub write_logfile_entry { 2076 my $self = shift; 2077 2078 # add leading >>> to avoid confusing error mesages and code 2079 $self->logfile_output( ">>>", "@_" ); 2080} 2081 2082sub write_column_headings { 2083 my $self = shift; 2084 2085 $self->{_wrote_column_headings} = 1; 2086 my $fh = $self->{_fh}; 2087 print $fh <<EOM; 2088The nesting depths in the table below are at the start of the lines. 2089The indicated output line numbers are not always exact. 2090ci = levels of continuation indentation; bk = 1 if in BLOCK, 0 if not. 2091 2092in:out indent c b nesting code + messages; (messages begin with >>>) 2093lines levels i k (code begins with one '.' per indent level) 2094------ ----- - - -------- ------------------------------------------- 2095EOM 2096} 2097 2098sub make_line_information_string { 2099 2100 # make columns of information when a logfile message needs to go out 2101 my $self = shift; 2102 my $line_of_tokens = $self->{_line_of_tokens}; 2103 my $output_line_number = $self->{_output_line_number}; 2104 my $brace_depth = $line_of_tokens->{_curly_brace_depth}; 2105 my $paren_depth = $line_of_tokens->{_paren_depth}; 2106 my $square_bracket_depth = $line_of_tokens->{_square_bracket_depth}; 2107 my $input_line_number = $line_of_tokens->{_line_number}; 2108 my $python_indentation_level = $line_of_tokens->{_python_indentation_level}; 2109 my $rlevels = $line_of_tokens->{_rlevels}; 2110 my $rnesting_tokens = $line_of_tokens->{_rnesting_tokens}; 2111 my $rci_levels = $line_of_tokens->{_rci_levels}; 2112 my $rnesting_blocks = $line_of_tokens->{_rnesting_blocks}; 2113 2114 my $structural_indentation_level = $$rlevels[0]; 2115 my $line_information_string = ""; 2116 2117 if ($input_line_number) { 2118 $self->write_column_headings() unless $self->{_wrote_column_headings}; 2119 2120 # keep logfile columns aligned for scripts up to 999 lines; 2121 # for longer scripts it doesn't really matter 2122 my $extra_space = ""; 2123 $extra_space .= ( $input_line_number < 10 ) ? " " 2124 : ( $input_line_number < 100 ) ? " " 2125 : ""; 2126 $extra_space .= ( $output_line_number < 10 ) ? " " 2127 : ( $output_line_number < 100 ) ? " " 2128 : ""; 2129 2130 # there are 2 possible nesting strings: 2131 # the original which looks like this: (0 [1 {2 2132 # the new one, which looks like this: {{[ 2133 # the new one is easier to read, and shows the order, but 2134 # could be arbitrarily long, so we use it unless it is too long 2135 my $nesting_string = 2136 "($paren_depth [$square_bracket_depth {$brace_depth"; 2137 my $nesting_string_new = $$rnesting_tokens[0]; 2138 2139 my $ci_level = $$rci_levels[0]; 2140 if ( $ci_level > 9 ) { $ci_level = '*' } 2141 my $bk = ( $$rnesting_blocks[0] =~ /1$/ ) ? '1' : '0'; 2142 2143 if ( length($nesting_string_new) <= 8 ) { 2144 $nesting_string = 2145 $nesting_string_new . " " x ( 8 - length($nesting_string_new) ); 2146 } 2147 2148 $line_information_string = 2149"L$input_line_number:$output_line_number$extra_space i$python_indentation_level:$structural_indentation_level $ci_level $bk $nesting_string"; 2150 } 2151 return $line_information_string; 2152} 2153 2154sub logfile_output { 2155 my $self = shift; 2156 my ( $prompt, $msg ) = @_; 2157 return if ( $self->{_block_log_output} ); 2158 2159 my $fh = $self->{_fh}; 2160 if ( $self->{_at_end_of_file} || !$self->{_use_prefix} ) { 2161 print $fh "$msg"; 2162 } 2163 else { 2164 my $line_information_string = $self->make_line_information_string(); 2165 $self->{_wrote_line_information_string} = 1; 2166 2167 if ($line_information_string) { 2168 print $fh "$line_information_string $prompt$msg"; 2169 } 2170 else { 2171 print $fh "$msg"; 2172 } 2173 } 2174} 2175 2176sub get_saw_brace_error { 2177 my $self = shift; 2178 return $self->{_saw_brace_error}; 2179} 2180 2181sub increment_brace_error { 2182 my $self = shift; 2183 $self->{_saw_brace_error}++; 2184} 2185 2186sub brace_warning { 2187 my $self = shift; 2188 use constant BRACE_WARNING_LIMIT => 10; 2189 my $saw_brace_error = $self->{_saw_brace_error}; 2190 2191 if ( $saw_brace_error < BRACE_WARNING_LIMIT ) { 2192 $self->warning(@_); 2193 } 2194 $saw_brace_error++; 2195 $self->{_saw_brace_error} = $saw_brace_error; 2196 2197 if ( $saw_brace_error == BRACE_WARNING_LIMIT ) { 2198 $self->warning("No further warnings of this type will be given\n"); 2199 } 2200} 2201 2202sub complain { 2203 2204 # handle non-critical warning messages based on input flag 2205 my $self = shift; 2206 my $rOpts = $self->{_rOpts}; 2207 2208 # these appear in .ERR output only if -w flag is used 2209 if ( $rOpts->{'warning-output'} ) { 2210 $self->warning(@_); 2211 } 2212 2213 # otherwise, they go to the .LOG file 2214 else { 2215 $self->{_complaint_count}++; 2216 $self->write_logfile_entry(@_); 2217 } 2218} 2219 2220sub warning { 2221 2222 # report errors to .ERR file (or stdout) 2223 my $self = shift; 2224 use constant WARNING_LIMIT => 50; 2225 2226 my $rOpts = $self->{_rOpts}; 2227 unless ( $rOpts->{'quiet'} ) { 2228 2229 my $warning_count = $self->{_warning_count}; 2230 unless ($warning_count) { 2231 my $warning_file = $self->{_warning_file}; 2232 my $fh_warnings; 2233 if ( $rOpts->{'standard-error-output'} ) { 2234 $fh_warnings = *STDERR; 2235 } 2236 else { 2237 $fh_warnings = IO::File->new(">$warning_file") 2238 or death("couldn't open $warning_file: $!\n"); 2239 print STDERR "Please see file $warning_file!\n"; 2240 } 2241 $self->{_fh_warnings} = $fh_warnings; 2242 } 2243 2244 my $fh_warnings = $self->{_fh_warnings}; 2245 if ( $warning_count < WARNING_LIMIT ) { 2246 if ( $self->get_use_prefix() > 0 ) { 2247 my $input_line_number = 2248 PerlTidy::Tokenizer::get_input_line_number(); 2249 print $fh_warnings "$input_line_number:\t@_"; 2250 $self->write_logfile_entry("WARNING: @_"); 2251 } 2252 else { 2253 print $fh_warnings @_; 2254 $self->write_logfile_entry(@_); 2255 } 2256 } 2257 $warning_count++; 2258 $self->{_warning_count} = $warning_count; 2259 2260 if ( $warning_count == WARNING_LIMIT ) { 2261 print $fh_warnings "No further warnings will be given"; 2262 } 2263 } 2264} 2265 2266# programming bug codes: 2267# -1 = no bug 2268# 0 = maybe, not sure. 2269# 1 = definitely 2270sub report_possible_bug { 2271 my $self = shift; 2272 my $saw_code_bug = $self->{_saw_code_bug}; 2273 $self->{_saw_code_bug} = ( $saw_code_bug < 0 ) ? 0 : $saw_code_bug; 2274} 2275 2276sub report_definite_bug { 2277 my $self = shift; 2278 $self->{_saw_code_bug} = 1; 2279} 2280 2281sub ask_user_for_bug_report { 2282 my $self = shift; 2283 2284 my ( $infile_syntax_ok, $formatter ) = @_; 2285 my $saw_code_bug = $self->{_saw_code_bug}; 2286 if ( ( $saw_code_bug == 0 ) && ( $infile_syntax_ok == 1 ) ) { 2287 $self->warning(<<EOM); 2288 2289You may have encountered a code bug in perltidy. If you think so, and 2290the problem is not listed in the BUGS file at 2291http://perltidy.sourceforge.net, please report it so that it can be 2292corrected. Include the smallest possible script which has the problem, 2293along with the .LOG file. See the manual pages for contact information. 2294Thank you! 2295EOM 2296 2297 } 2298 elsif ( $saw_code_bug == 1 ) { 2299 if ( $self->{_saw_extrude} ) { 2300 $self->warning(<<EOM); 2301You may have encountered a bug in perltidy. However, since you are 2302using the -extrude option, the problem may be with perl itself, which 2303has occasional parsing problems with this type of file. If you believe 2304that the problem is with perltidy, and the problem is not listed in the 2305BUGS file at http://perltidy.sourceforge.net, please report it so that 2306it can be corrected. Include the smallest possible script which has the 2307problem, along with the .LOG file. See the manual pages for contact 2308information. 2309Thank you! 2310EOM 2311 } 2312 else { 2313 $self->warning(<<EOM); 2314 2315Oops, you seem to have encountered a bug in perltidy. Please check the 2316BUGS file at http://perltidy.sourceforge.net. If the problem is not 2317listed there, please report it so that it can be corrected. Include the 2318smallest possible script which produces this message, along with the 2319.LOG file if appropriate. See the manual pages for contact information. 2320Your efforts are appreciated. 2321Thank you! 2322EOM 2323 my $added_semicolon_count = $formatter->get_added_semicolon_count(); 2324 if ( $added_semicolon_count > 0 ) { 2325 $self->warning(<<EOM); 2326 2327The log file shows that perltidy added $added_semicolon_count semicolons. 2328Please rerun with -nasc to see if that is the cause of the syntax error. Even 2329if that is the problem, please report it so that it can be fixed. 2330EOM 2331 2332 } 2333 } 2334 } 2335} 2336 2337sub finish { 2338 2339 # called after all formatting to summarize errors 2340 my $self = shift; 2341 my ( $infile_syntax_ok, $formatter ) = @_; 2342 2343 my $rOpts = $self->{_rOpts}; 2344 my $warning_count = $self->{_warning_count}; 2345 my $saw_code_bug = $self->{_saw_code_bug}; 2346 2347 my $save_logfile = ( $saw_code_bug == 0 && $infile_syntax_ok == 1 ) 2348 || $saw_code_bug == 1 2349 || $rOpts->{'logfile'}; 2350 my $log_file = $self->{_log_file}; 2351 if ($warning_count) { 2352 if ($save_logfile) { 2353 $self->block_log_output(); # avoid echoing this to the logfile 2354 $self->warning( 2355 "The logfile $log_file may contain useful information\n"); 2356 $self->unblock_log_output(); 2357 } 2358 2359 if ( $self->{_complaint_count} > 0 ) { 2360 $self->warning( 2361"To see $self->{_complaint_count} non-critical warnings rerun with -w\n" 2362 ); 2363 } 2364 2365 if ( $self->{_saw_brace_error} 2366 && ( $rOpts->{'logfile-gap'} > 1 || !$save_logfile ) ) 2367 { 2368 $self->warning("To save a full .LOG file rerun with -g\n"); 2369 } 2370 } 2371 $self->ask_user_for_bug_report( $infile_syntax_ok, $formatter ); 2372 $self->close_log_file(); 2373 2374 # delete the log file unless it is needed or wanted 2375 unlink($log_file) unless ($save_logfile); 2376} 2377 2378##################################################################### 2379# 2380# The PerlTidy::HtmlWriter class writes a copy of the input stream in html 2381# 2382##################################################################### 2383 2384package PerlTidy::HtmlWriter; 2385 2386# class variables 2387use vars qw{ 2388 %html_color 2389 %html_bold 2390 %html_italic 2391 %token_short_names 2392 %short_to_long_names 2393 $rOpts 2394 $css_filename 2395 $css_linkname 2396 $missing_html_entities 2397}; 2398 2399# replace unsafe characters with HTML entity representation if HTML::Entities 2400# is available 2401{ eval "use HTML::Entities"; $missing_html_entities = $@; } 2402 2403sub new { 2404 2405 my ( $class, $input_file, $html_file ) = @_; 2406 2407 my $html_file_opened = 0; 2408 my $html_fh; 2409 unless ( $html_fh = IO::File->new("> $html_file") ) { 2410 warn("can't open $html_file: $!\n"); 2411 return undef; 2412 } 2413 $html_file_opened = 1; 2414 2415 unless ( $rOpts->{'html-pre-only'} ) { 2416 $html_fh->print( <<"HTML_START"); 2417<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> 2418<HTML> 2419<HEAD> 2420HTML_START 2421 2422 # use css linked to another file 2423 if ( $rOpts->{'html-linked-style-sheet'} ) { 2424 $html_fh->print( 2425 qq(<link rel=stylesheet href="$css_linkname" type="text/css">)); 2426 $html_fh->print( <<"ENDCSS"); 2427<TITLE>$input_file</TITLE> 2428</HEAD> 2429<BODY> 2430ENDCSS 2431 } 2432 2433 # use css embedded in this file 2434 elsif ( !$rOpts->{'nohtml-style-sheets'} ) { 2435 $html_fh->print( <<'ENDCSS'); 2436<STYLE TYPE="text/css"> 2437<!-- 2438ENDCSS 2439 write_style_sheet_data($html_fh); 2440 $html_fh->print( <<"ENDCSS"); 2441--> 2442</STYLE> 2443<TITLE>$input_file</TITLE> 2444</HEAD> 2445<BODY> 2446ENDCSS 2447 } 2448 2449 # no css used 2450 else { 2451 2452 $html_fh->print( <<"HTML_START"); 2453<TITLE>$input_file</TITLE> 2454</HEAD> 2455<BODY BGCOLOR=\"$rOpts->{'html-color-background'}\" TEXT=\"$rOpts->{'html-color-punctuation'}\"> 2456HTML_START 2457 } 2458 } 2459 2460 $html_fh->print( <<"END_PRE"); 2461<!-- filename: $input_file --> 2462<PRE> 2463END_PRE 2464 2465 bless { 2466 _html_file => $html_file, 2467 _html_file_opened => $html_file_opened, 2468 _html_fh => $html_fh, 2469 }, $class; 2470} 2471 2472BEGIN { 2473 2474 # This is the official list of tokens which may be identified by the 2475 # user. Long names are used as getopt keys. Short names are 2476 # convenient short abbreviations for specifying input. Short names 2477 # somewhat resemble token type characters, but are often different 2478 # because they may only be alphanumeric, to allow command line 2479 # input. Also, note that because of case insensitivity of html, 2480 # this table must be in a single case only (I've chosen to use all 2481 # lower case). 2482 # When adding NEW_TOKENS: update this hash table 2483 # short names => long names 2484 %short_to_long_names = ( 2485 'n' => 'numeric', 2486 'p' => 'paren', 2487 'q' => 'quote', 2488 's' => 'structure', 2489 'c' => 'comment', 2490 'v' => 'v-string', 2491 'cm' => 'comma', 2492 'w' => 'bareword', 2493 'co' => 'colon', 2494 'pu' => 'punctuation', 2495 'i' => 'identifier', 2496 'j' => 'label', 2497 'h' => 'here-doc-target', 2498 'hh' => 'here-doc-text', 2499 'k' => 'keyword', 2500 'sc' => 'semicolon', 2501 'm' => 'subroutine', 2502 'pd' => 'pod-text', 2503 ); 2504 2505 # Now we have to map actual token types into one of the above short 2506 # names; any token types not mapped will get 'punctuation' 2507 # properties. 2508 2509 # The values of this hash table correspond to the keys of the 2510 # previous hash table. 2511 # The keys of this hash table are token types and can be seen 2512 # by running with --dump-token-types (-dtt). 2513 2514 # When adding NEW_TOKENS: update this hash table 2515 # $type => $short_name 2516 %token_short_names = ( 2517 '#' => 'c', 2518 'n' => 'n', 2519 'v' => 'v', 2520 'k' => 'k', 2521 'F' => 'k', 2522 'Q' => 'q', 2523 'q' => 'q', 2524 'J' => 'j', 2525 'j' => 'j', 2526 'h' => 'h', 2527 'H' => 'hh', 2528 'w' => 'w', 2529 ',' => 'cm', 2530 '=>' => 'cm', 2531 ';' => 'sc', 2532 ':' => 'co', 2533 'f' => 'sc', 2534 '(' => 'p', 2535 ')' => 'p', 2536 'M' => 'm', 2537 'P' => 'pd', 2538 ); 2539 2540 # These token types will all be called identifiers for now 2541 # FIXME: need to separate user defined modules as separate type 2542 my @identifier = qw" i t U C Y Z G :: "; 2543 @token_short_names{@identifier} = ('i') x scalar(@identifier); 2544 2545 # These token types will be called 'structure' 2546 my @structure = qw" { } "; 2547 @token_short_names{@structure} = ('s') x scalar(@structure); 2548 2549 # OLD NOTES: save for reference 2550 # Any of these could be added later if it would be useful. 2551 # For now, they will by default become punctuation 2552 # my @list = qw" L R [ ] "; 2553 # @token_long_names{@list} = ('non-structure') x scalar(@list); 2554 # 2555 # my @list = qw" 2556 # / /= * *= ** **= + += - -= % %= = ++ -- << <<= >> >>= pp p m mm 2557 # "; 2558 # @token_long_names{@list} = ('math') x scalar(@list); 2559 # 2560 # my @list = qw" & &= ~ ~= ^ ^= | |= "; 2561 # @token_long_names{@list} = ('bit') x scalar(@list); 2562 # 2563 # my @list = qw" == != < > <= <=> "; 2564 # @token_long_names{@list} = ('numerical-comparison') x scalar(@list); 2565 # 2566 # my @list = qw" && || ! &&= ||= "; 2567 # @token_long_names{@list} = ('logical') x scalar(@list); 2568 # 2569 # my @list = qw" . .= =~ !~ x x= "; 2570 # @token_long_names{@list} = ('string-operators') x scalar(@list); 2571 # 2572 # # Incomplete.. 2573 # my @list = qw" .. -> <> ... \ ? "; 2574 # @token_long_names{@list} = ('misc-operators') x scalar(@list); 2575 2576} 2577 2578sub make_getopt_long_names { 2579 my $class = shift; 2580 my ($rgetopt_names) = @_; 2581 while ( my ( $short_name, $name ) = each %short_to_long_names ) { 2582 push @$rgetopt_names, "html-color-$name=s"; 2583 push @$rgetopt_names, "html-italic-$name!"; 2584 push @$rgetopt_names, "html-bold-$name!"; 2585 } 2586 push @$rgetopt_names, "html-color-background=s"; 2587 push @$rgetopt_names, "html-linked-style-sheet=s"; 2588 push @$rgetopt_names, "nohtml-style-sheets"; 2589 push @$rgetopt_names, "html-pre-only"; 2590 push @$rgetopt_names, "html-line-numbers"; 2591 push @$rgetopt_names, "stylesheet"; 2592} 2593 2594sub make_abbreviated_names { 2595 2596 # We're appending things like this to the expansion list: 2597 # 'hcc' => [qw(html-color-comment)], 2598 # 'hck' => [qw(html-color-keyword)], 2599 # etc 2600 my $class = shift; 2601 my ($rexpansion) = @_; 2602 2603 # abbreviations for color/bold/italic properties 2604 while ( my ( $short_name, $long_name ) = each %short_to_long_names ) { 2605 ${$rexpansion}{"hc$short_name"} = ["html-color-$long_name"]; 2606 ${$rexpansion}{"hb$short_name"} = ["html-bold-$long_name"]; 2607 ${$rexpansion}{"hi$short_name"} = ["html-italic-$long_name"]; 2608 ${$rexpansion}{"nhb$short_name"} = ["nohtml-bold-$long_name"]; 2609 ${$rexpansion}{"nhi$short_name"} = ["nohtml-italic-$long_name"]; 2610 } 2611 2612 # abbreviations for all other html options 2613 ${$rexpansion}{"hcbg"} = ["html-color-background"]; 2614 ${$rexpansion}{"pre"} = ["html-pre-only"]; 2615 ${$rexpansion}{"nnn"} = ["html-line-numbers"]; 2616 ${$rexpansion}{"css"} = ["html-linked-style-sheet"]; 2617 ${$rexpansion}{"nss"} = ["nohtml-style-sheets"]; 2618 ${$rexpansion}{"ss"} = ["stylesheet"]; 2619} 2620 2621sub check_options { 2622 2623 # This will be called once after options have been parsed 2624 my $class = shift; 2625 $rOpts = shift; 2626 2627 # X11 color names for default settings that seemed to look ok 2628 # (these color names are only used for programming clarity; the hex 2629 # numbers are actually written) 2630 use constant ForestGreen => "#228B22"; 2631 use constant SaddleBrown => "#8B4513"; 2632 use constant IndianRed3 => "#CD5555"; 2633 use constant DeepSkyBlue4 => "#00688B"; 2634 use constant MediumOrchid3 => "#B452CD"; 2635 use constant black => "#000000"; 2636 use constant white => "#FFFFFF"; 2637 use constant red => "#FF0000"; 2638 2639 # set default color, bold, italic properties 2640 # anything not listed here will be given the default (punctuation) color -- 2641 # these types currently not listed and get default: ws pu s sc cm co p 2642 # When adding NEW_TOKENS: add an entry here if you don't want defaults 2643 2644 # set_default_properties( $short_name, default_color, bold?, italic? ); 2645 set_default_properties( 'c', ForestGreen, 0, 0 ); 2646 set_default_properties( 'pd', ForestGreen, 0, 1 ); 2647 set_default_properties( 'k', SaddleBrown, 1, 0 ); 2648 set_default_properties( 'q', IndianRed3, 0, 0 ); 2649 set_default_properties( 'hh', IndianRed3, 0, 1 ); 2650 set_default_properties( 'h', IndianRed3, 1, 0 ); 2651 set_default_properties( 'i', DeepSkyBlue4, 0, 0 ); 2652 set_default_properties( 'w', black, 0, 0 ); 2653 set_default_properties( 'n', MediumOrchid3, 0, 0 ); 2654 set_default_properties( 'v', MediumOrchid3, 0, 0 ); 2655 set_default_properties( 'j', black, 1, 0 ); 2656 set_default_properties( 'm', red, 1, 0 ); 2657 2658 set_default_color( 'html-color-background', white ); 2659 set_default_color( 'html-color-punctuation', black ); 2660 2661 # setup property lookup tables for tokens based on their short names 2662 # every token type has a short name, and will use these tables 2663 # to do the html markup 2664 while ( my ( $short_name, $long_name ) = each %short_to_long_names ) { 2665 $html_color{$short_name} = $rOpts->{"html-color-$long_name"}; 2666 $html_bold{$short_name} = $rOpts->{"html-bold-$long_name"}; 2667 $html_italic{$short_name} = $rOpts->{"html-italic-$long_name"}; 2668 } 2669 2670 # write style sheet to STDOUT and die if requested 2671 if ( defined( $rOpts->{'stylesheet'} ) ) { 2672 write_style_sheet_file('-'); 2673 exit; 2674 } 2675 2676 # make sure user gives a file name after -css 2677 if ( defined( $rOpts->{'html-linked-style-sheet'} ) ) { 2678 $css_linkname = $rOpts->{'html-linked-style-sheet'}; 2679 if ( $css_linkname =~ /^-/ ) { 2680 die "You must specify a valid filename after -css\n"; 2681 } 2682 } 2683 2684 # check for conflict 2685 if ( $css_linkname && $rOpts->{'nohtml-style-sheets'} ) { 2686 $rOpts->{'nohtml-style-sheets'} = 0; 2687 warning("You can't specify both -css and -nss; -nss ignored\n"); 2688 } 2689 2690 # write a style sheet file if necessary 2691 if ($css_linkname) { 2692 2693 # if the selected filename exists, don't write, because user may 2694 # have done some work by hand to create it; use backup name instead 2695 # Also, this will avoid a potential disaster in which the user 2696 # forgets to specify the style sheet, like this: 2697 # perltidy -html -css myfile1.pl myfile2.pl 2698 # This would cause myfile1.pl to parsed as the style sheet by GetOpts 2699 2700 my $css_filename = $css_linkname; 2701 if ( -e $css_filename ) { 2702 } 2703 else { 2704 2705 write_style_sheet_file($css_filename); 2706 } 2707 } 2708} 2709 2710sub write_style_sheet_file { 2711 2712 my $css_filename = shift; 2713 my $fh; 2714 unless ( $fh = IO::File->new("> $css_filename") ) { 2715 die "can't open $css_filename: $!\n"; 2716 } 2717 write_style_sheet_data($fh); 2718 $fh->close; 2719} 2720 2721sub write_style_sheet_data { 2722 2723 # write the style sheet data to an open file handle 2724 my $fh = shift; 2725 2726 my $bg_color = $rOpts->{'html-color-background'}; 2727 my $text_color = $rOpts->{'html-color-punctuation'}; 2728 2729 $fh->print(<<"EOM"); 2730/* default style sheet generated by perltidy */ 2731body {background: $bg_color; color: $text_color} 2732pre { color: $text_color; 2733 background: $bg_color; 2734 font-family: courier; 2735 } 2736 2737EOM 2738 2739 foreach my $short_name ( sort keys %short_to_long_names ) { 2740 my $long_name = $short_to_long_names{$short_name}; 2741 2742 my $abbrev = '.' . $short_name; 2743 if ( length($short_name) == 1 ) { $abbrev .= ' ' } # for alignment 2744 my $color = $html_color{$short_name}; 2745 if ( !defined($color) ) { $color = $text_color } 2746 $fh->print("$abbrev \{ color: $color;"); 2747 2748 if ( $html_bold{$short_name} ) { 2749 $fh->print(" font-weight:bold;"); 2750 } 2751 2752 if ( $html_italic{$short_name} ) { 2753 $fh->print(" font-style:italic;"); 2754 } 2755 $fh->print("} /* $long_name */\n"); 2756 } 2757} 2758 2759sub set_default_color { 2760 2761 # make sure that options hash $rOpts->{$key} contains a valid color 2762 my ( $key, $color ) = @_; 2763 if ( $rOpts->{$key} ) { $color = $rOpts->{$key} } 2764 $rOpts->{$key} = check_RGB($color); 2765} 2766 2767sub check_RGB { 2768 2769 # if color is a 6 digit hex RGB value, prepend a #, otherwise 2770 # assume that it is a valid ascii color name 2771 my ($color) = @_; 2772 if ( $color =~ /^[0-9a-fA-F]{6,6}$/ ) { $color = "#$color" } 2773 return $color; 2774} 2775 2776sub set_default_properties { 2777 my ( $short_name, $color, $bold, $italic ) = @_; 2778 2779 set_default_color( "html-color-$short_to_long_names{$short_name}", $color ); 2780 my $key; 2781 $key = "html-bold-$short_to_long_names{$short_name}"; 2782 $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $bold; 2783 $key = "html-italic-$short_to_long_names{$short_name}"; 2784 $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $italic; 2785} 2786 2787sub close_html_file { 2788 my $self = shift; 2789 return unless $self->{_html_file_opened}; 2790 my $html_fh = $self->{_html_fh}; 2791 $html_fh->print( <<"PRE_END"); 2792</PRE> 2793PRE_END 2794 unless ( $rOpts->{'html-pre-only'} ) { 2795 $html_fh->print( <<"HTML_END"); 2796</BODY> 2797</HTML> 2798HTML_END 2799 } 2800 $html_fh->close(); 2801} 2802 2803sub markup_tokens { 2804 my $self = shift; 2805 my ( $rtokens, $rtoken_type ) = @_; 2806 my ( @colored_tokens, $j, $string, $type, $token ); 2807 2808 for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) { 2809 $type = $$rtoken_type[$j]; 2810 $token = $$rtokens[$j]; 2811 2812 #------------------------------------------------------- 2813 # Patch : intercept a sub name here and split it 2814 # into keyword 'sub' and sub name 2815 if ( $type eq 'i' && $token =~ /^(sub\s+)(\w.*)$/ ) { 2816 $token = $self->markup_html_element( $1, 'k' ); 2817 push @colored_tokens, $token; 2818 $token = $2; 2819 $type = 'M'; 2820 } 2821 2822 # Patch : intercept a package name here and split it 2823 # into keyword 'package' and name 2824 if ( $type eq 'i' && $token =~ /^(package\s+)(\w.*)$/ ) { 2825 $token = $self->markup_html_element( $1, 'k' ); 2826 push @colored_tokens, $token; 2827 $token = $2; 2828 $type = 'i'; 2829 } 2830 2831 #------------------------------------------------------- 2832 2833 $token = $self->markup_html_element( $token, $type ); 2834 push @colored_tokens, $token; 2835 } 2836 return \@colored_tokens; 2837} 2838 2839sub markup_html_element { 2840 my $self = shift; 2841 my ( $token, $type ) = @_; 2842 2843 return $token if ( $type eq 'b' ); # skip a blank 2844 return $token if ( $token =~ /^\s*$/ ); 2845 2846 if ($missing_html_entities) { 2847 $token =~ s/\&/&/g; 2848 $token =~ s/\</</g; 2849 $token =~ s/\>/>/g; 2850 $token =~ s/\"/"/g; 2851 } 2852 else { 2853 encode_entities($token); 2854 } 2855 2856 # get the short abbreviation for this token type 2857 my $short_name = $token_short_names{$type}; 2858 if ( !defined($short_name) ) { 2859 $short_name = "pu"; # punctuation is default 2860 } 2861 2862 # handle style sheets.. 2863 if ( !$rOpts->{'nohtml-style-sheets'} ) { 2864 if ( $short_name ne 'pu' ) { 2865 $token = qq(<SPAN CLASS="$short_name">) . $token . "</SPAN>"; 2866 } 2867 } 2868 2869 # handle no style sheets.. 2870 else { 2871 my $color = $html_color{$short_name}; 2872 2873 if ( $color && ( $color ne $rOpts->{'html-color-punctuation'} ) ) { 2874 $token = qq(<FONT COLOR="$color">) . $token . "</FONT>"; 2875 } 2876 if ( $html_italic{$short_name} ) { $token = "<I>$token</I>" } 2877 if ( $html_bold{$short_name} ) { $token = "<B>$token</B>" } 2878 } 2879 return $token; 2880} 2881 2882sub finish_formatting { 2883 2884 # called after last line 2885 my $self = shift; 2886 $self->close_html_file(); 2887 return; 2888} 2889 2890sub write_line { 2891 2892 my $self = shift; 2893 return unless $self->{_html_file_opened}; 2894 my $html_fh = $self->{_html_fh}; 2895 my ($line_of_tokens) = @_; 2896 my $line_type = $line_of_tokens->{_line_type}; 2897 my $input_line = $line_of_tokens->{_line_text}; 2898 my $line_number = $line_of_tokens->{_line_number}; 2899 chomp $input_line; 2900 2901 # markup line of code.. 2902 my $html_line; 2903 if ( $line_type eq 'CODE' ) { 2904 my $rtoken_type = $line_of_tokens->{_rtoken_type}; 2905 my $rtokens = $line_of_tokens->{_rtokens}; 2906 2907 if ( $input_line =~ /(^\s*)/ ) { 2908 $html_line = $1; 2909 } 2910 else { 2911 $html_line = ""; 2912 } 2913 my $rcolored_tokens = $self->markup_tokens( $rtokens, $rtoken_type ); 2914 $html_line .= join '', @$rcolored_tokens; 2915 } 2916 2917 # markup line of non-code.. 2918 else { 2919 my $line_character; 2920 if ( $line_type eq 'HERE' ) { $line_character = 'H' } 2921 elsif ( $line_type eq 'HERE_END' ) { $line_character = 'h' } 2922 elsif ( $line_type eq 'FORMAT' ) { $line_character = 'H' } 2923 elsif ( $line_type eq 'FORMAT_END' ) { $line_character = 'h' } 2924 elsif ( $line_type =~ /^POD/ ) { $line_character = 'P' } 2925 elsif ( $line_type eq 'SYSTEM' ) { $line_character = 'c' } 2926 elsif ( $line_type eq 'END_START' ) { $line_character = 'k' } 2927 elsif ( $line_type eq 'DATA_START' ) { $line_character = 'k' } 2928 else { $line_character = 'Q' } 2929 $html_line = $self->markup_html_element( $input_line, $line_character ); 2930 } 2931 2932 # add the line number if requested 2933 if ( $rOpts->{'html-line-numbers'} ) { 2934 my $extra_space .= ( $line_number < 10 ) ? " " 2935 : ( $line_number < 100 ) ? " " 2936 : ( $line_number < 1000 ) ? " " 2937 : ""; 2938 $html_line = $extra_space . $line_number . " " . $html_line; 2939 } 2940 2941 # write the line 2942 $html_fh->print("$html_line\n"); 2943} 2944 2945##################################################################### 2946# 2947# The PerlTidy::Formatter package adds indentation, whitespace, and line breaks 2948# to the token stream 2949# 2950# WARNING: This is not a real class yet. Only one Formatter my be used. 2951# 2952##################################################################### 2953 2954package PerlTidy::Formatter; 2955 2956BEGIN { 2957 2958 # Caution: these debug flags produce a lot of output 2959 # They should all be 0 except when debugging small scripts 2960 use constant FORMATTER_DEBUG_FLAG_BOND => 0; 2961 use constant FORMATTER_DEBUG_FLAG_BREAK => 0; 2962 use constant FORMATTER_DEBUG_FLAG_CI => 0; 2963 use constant FORMATTER_DEBUG_FLAG_EQUALS => 0; 2964 use constant FORMATTER_DEBUG_FLAG_FLUSH => 0; 2965 use constant FORMATTER_DEBUG_FLAG_FORCE => 0; 2966 use constant FORMATTER_DEBUG_FLAG_LIST => 0; 2967 use constant FORMATTER_DEBUG_FLAG_NOBREAK => 0; 2968 use constant FORMATTER_DEBUG_FLAG_OUTPUT => 0; 2969 use constant FORMATTER_DEBUG_FLAG_SPARSE => 0; 2970 use constant FORMATTER_DEBUG_FLAG_STORE => 0; 2971 use constant FORMATTER_DEBUG_FLAG_UNDOBP => 0; 2972 use constant FORMATTER_DEBUG_FLAG_WHITE => 0; 2973 2974 my $debug_warning = sub { 2975 print "FORMATTER_DEBUGGING with key $_[0]\n"; 2976 }; 2977 2978 FORMATTER_DEBUG_FLAG_BOND && $debug_warning->('BOND'); 2979 FORMATTER_DEBUG_FLAG_BREAK && $debug_warning->('BREAK'); 2980 FORMATTER_DEBUG_FLAG_CI && $debug_warning->('CI'); 2981 FORMATTER_DEBUG_FLAG_EQUALS && $debug_warning->('EQUALS'); 2982 FORMATTER_DEBUG_FLAG_FLUSH && $debug_warning->('FLUSH'); 2983 FORMATTER_DEBUG_FLAG_FORCE && $debug_warning->('FORCE'); 2984 FORMATTER_DEBUG_FLAG_LIST && $debug_warning->('LIST'); 2985 FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK'); 2986 FORMATTER_DEBUG_FLAG_OUTPUT && $debug_warning->('OUTPUT'); 2987 FORMATTER_DEBUG_FLAG_SPARSE && $debug_warning->('SPARSE'); 2988 FORMATTER_DEBUG_FLAG_STORE && $debug_warning->('STORE'); 2989 FORMATTER_DEBUG_FLAG_UNDOBP && $debug_warning->('UNDOBP'); 2990 FORMATTER_DEBUG_FLAG_WHITE && $debug_warning->('WHITE'); 2991} 2992 2993use Carp; 2994use vars qw{ 2995 2996 @gnu_stack 2997 $max_gnu_stack_index 2998 $gnu_position_predictor 2999 $line_start_index_to_go 3000 $last_indentation_written 3001 $last_unadjusted_indentation 3002 3003 $saw_VERSION_in_this_file 3004 3005 @gnu_item_list 3006 $max_gnu_item_index 3007 $gnu_sequence_number 3008 $last_output_indentation 3009 3010 @block_type_to_go 3011 @type_sequence_to_go 3012 @container_environment_to_go 3013 @bond_strength_to_go 3014 @forced_breakpoint_to_go 3015 @lengths_to_go 3016 @levels_to_go 3017 @leading_spaces_to_go 3018 @reduced_spaces_to_go 3019 @matching_token_to_go 3020 @mate_index_to_go 3021 @nesting_blocks_to_go 3022 @ci_levels_to_go 3023 @nesting_depth_to_go 3024 @nobreak_to_go 3025 @old_breakpoint_to_go 3026 @tokens_to_go 3027 @types_to_go 3028 3029 %saved_opening_indentation 3030 3031 $max_index_to_go 3032 $old_line_count_in_batch 3033 $last_nonblank_index_to_go 3034 $last_nonblank_type_to_go 3035 $last_nonblank_token_to_go 3036 $last_last_nonblank_index_to_go 3037 $last_last_nonblank_type_to_go 3038 $last_last_nonblank_token_to_go 3039 @nonblank_lines_at_depth 3040 3041 $forced_breakpoint_count 3042 $forced_breakpoint_undo_count 3043 @forced_breakpoint_undo_stack 3044 %postponed_breakpoint 3045 3046 $tabbing 3047 $tabstr 3048 $embedded_tab_count 3049 $first_embedded_tab_at 3050 $last_embedded_tab_at 3051 $deleted_semicolon_count 3052 $first_deleted_semicolon_at 3053 $last_deleted_semicolon_at 3054 $added_semicolon_count 3055 $first_added_semicolon_at 3056 $last_added_semicolon_at 3057 $saw_negative_indentation 3058 $first_tabbing_disagreement 3059 $last_tabbing_disagreement 3060 $in_tabbing_disagreement 3061 $tabbing_disagreement_count 3062 $input_line_tabbing 3063 3064 $last_line_leading_type 3065 $last_line_leading_level 3066 $last_last_line_leading_level 3067 3068 %block_leading_text 3069 %block_opening_line_number 3070 $csc_new_statement_ok 3071 $accumulating_text_for_block 3072 $leading_block_text 3073 $leading_block_text_level 3074 $leading_block_text_length_exceeded 3075 $leading_block_text_line_number 3076 $closing_side_comment_prefix_pattern 3077 $closing_side_comment_list_pattern 3078 3079 $last_nonblank_token 3080 $last_nonblank_type 3081 $last_last_nonblank_token 3082 $last_last_nonblank_type 3083 $last_nonblank_block_type 3084 $last_output_level 3085 $do_follower_pattern 3086 $if_brace_follower_pattern 3087 %space_before_paren 3088 $brace_follower_pattern 3089 $looking_for_else 3090 $other_brace_follower_pattern 3091 $else_brace_follower_pattern 3092 $anon_sub_brace_follower_pattern 3093 $anon_sub_1_brace_follower_pattern 3094 3095 @has_broken_sublist 3096 @dont_align 3097 @want_comma_break 3098 3099 $index_start_one_line_block 3100 $semicolons_before_block_self_destruct 3101 $index_max_forced_break 3102 $input_line_number 3103 $diagnostics_object 3104 $vertical_aligner_object 3105 $logger_object 3106 $file_writer_object 3107 $formatter_self 3108 @ci_stack 3109 $last_line_had_side_comment 3110 %want_break_before 3111 %outdent_keyword 3112 $static_block_comment_pattern 3113 $static_side_comment_pattern 3114 3115 $rOpts_add_whitespace 3116 $rOpts_continuation_indentation 3117 $rOpts_cuddled_else 3118 $rOpts_delete_old_whitespace 3119 $rOpts_fuzzy_line_length 3120 $rOpts_indent_columns 3121 $rOpts_line_up_parentheses 3122 $rOpts_maximum_line_length 3123 $rOpts_brace_left_and_indent 3124 3125 %is_vertical_alignment_type 3126 %tightness 3127 %matching_token 3128 $rOpts 3129 %right_bond_strength 3130 %left_bond_strength 3131 %binary_ws_rules 3132 %want_left_space 3133 %want_right_space 3134 %is_digraph 3135 %is_trigraph 3136 $bli_pattern 3137}; 3138 3139sub make_regex { 3140 3141 # Given a string, make the corresponding regex with qr. 3142 # Versions of perl before 5.005 do not have qr, 3143 # so we will just return the string, which will work 3144 # but not be optimized. 3145 BEGIN { 3146 if ( $] < 5.005 ) { 3147 sub qr { $_[0] } 3148 } 3149 } 3150 qr($_[0]); 3151} 3152 3153BEGIN { 3154 3155 # block types for which -bli is active 3156 $bli_pattern = '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)'; 3157 my @digraphs = qw( 3158 .. :: << >> ** && .. || -> => += -= .= %= &= |= ^= *= <> 3159 <= >= == =~ !~ != ++ -- /= x= 3160 ); 3161 @is_digraph{@digraphs} = (1) x scalar(@digraphs); 3162 3163 my @trigraphs = qw( ... **= <<= >>= &&= ||= <=> ); 3164 @is_trigraph{@trigraphs} = (1) x scalar(@trigraphs); 3165 3166} 3167 3168# whitespace codes 3169use constant WS_YES => 1; 3170use constant WS_OPTIONAL => 0; 3171use constant WS_NO => -1; 3172 3173# Token bond strengths. 3174use constant NO_BREAK => 10000; 3175use constant VERY_STRONG => 100; 3176use constant STRONG => 2.1; 3177use constant NOMINAL => 1.1; 3178use constant WEAK => 0.8; 3179use constant VERY_WEAK => 0.55; 3180 3181# values for testing indexes in output array 3182use constant UNDEFINED_INDEX => -1; 3183 3184# Maximum number of little messages; probably need not be changed. 3185use constant MAX_NAG_MESSAGES => 6; 3186 3187# increment between sequence numbers for each type 3188# For example, ?: pairs might have numbers 7,11,15,... 3189use constant TYPE_SEQUENCE_INCREMENT => 4; 3190 3191{ 3192 3193 # methods to count instances 3194 my $_count = 0; 3195 sub get_count { $_count; } 3196 sub _increment_count { ++$_count } 3197 sub _decrement_count { --$_count } 3198} 3199 3200# interface to PerlTidy::Logger routines 3201sub warning { 3202 if ($logger_object) { 3203 $logger_object->warning(@_); 3204 } 3205} 3206 3207sub complain { 3208 if ($logger_object) { 3209 $logger_object->complain(@_); 3210 } 3211} 3212 3213sub write_logfile_entry { 3214 if ($logger_object) { 3215 $logger_object->write_logfile_entry(@_); 3216 } 3217} 3218 3219sub black_box { 3220 if ($logger_object) { 3221 $logger_object->black_box(@_); 3222 } 3223} 3224 3225sub report_definite_bug { 3226 if ($logger_object) { 3227 $logger_object->report_definite_bug(); 3228 } 3229} 3230 3231sub get_saw_brace_error { 3232 if ($logger_object) { 3233 $logger_object->get_saw_brace_error(); 3234 } 3235} 3236 3237sub we_are_at_the_last_line { 3238 if ($logger_object) { 3239 $logger_object->we_are_at_the_last_line(); 3240 } 3241} 3242 3243# interface to PerlTidy::Diagnostics routine 3244sub write_diagnostics { 3245 3246 if ($diagnostics_object) { 3247 $diagnostics_object->write_diagnostics(@_); 3248 } 3249} 3250 3251sub get_added_semicolon_count { 3252 my $self = shift; 3253 return $added_semicolon_count; 3254} 3255 3256sub DESTROY { 3257 $_[0]->_decrement_count(); 3258} 3259 3260sub new { 3261 3262 my $class = shift; 3263 3264 # we are given an object with a write_line() method to take lines 3265 my %defaults = ( 3266 sink_object => undef, 3267 diagnostics_object => undef, 3268 logger_object => undef, 3269 ); 3270 my %args = ( %defaults, @_ ); 3271 3272 $logger_object = $args{logger_object}; 3273 $diagnostics_object = $args{diagnostics_object}; 3274 3275 # FIXME: we create another object with a get_line() and peek_ahead() method 3276 my $sink_object = $args{sink_object}; 3277 $file_writer_object = 3278 PerlTidy::FileWriter->new( $sink_object, $rOpts, $logger_object ); 3279 3280 # initialize the leading whitespace stack to negative levels 3281 # so that we can never run off the end of the stack 3282 $gnu_position_predictor = 0; # where the current token is predicted to be 3283 $max_gnu_stack_index = 0; 3284 $max_gnu_item_index = -1; 3285 $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 ); 3286 @gnu_item_list = (); 3287 $gnu_sequence_number = 0; # this will count the output batches 3288 $last_output_indentation = 0; 3289 $last_indentation_written = 0; 3290 $last_unadjusted_indentation = 0; 3291 3292 $saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'}; 3293 3294 @block_type_to_go = (); 3295 @type_sequence_to_go = (); 3296 @container_environment_to_go = (); 3297 @bond_strength_to_go = (); 3298 @forced_breakpoint_to_go = (); 3299 @lengths_to_go = (); # line length to start of ith token 3300 @levels_to_go = (); 3301 @matching_token_to_go = (); 3302 @mate_index_to_go = (); 3303 @nesting_blocks_to_go = (); 3304 @ci_levels_to_go = (); 3305 @nesting_depth_to_go = (0); 3306 @nobreak_to_go = (); 3307 @old_breakpoint_to_go = (); 3308 @tokens_to_go = (); 3309 @types_to_go = (); 3310 @leading_spaces_to_go = (); 3311 @reduced_spaces_to_go = (); 3312 3313 @dont_align = (); 3314 @has_broken_sublist = (); 3315 @want_comma_break = (); 3316 3317 @ci_stack = (""); 3318 $saw_negative_indentation = 0; 3319 $first_tabbing_disagreement = 0; 3320 $last_tabbing_disagreement = 0; 3321 $tabbing_disagreement_count = 0; 3322 $in_tabbing_disagreement = 0; 3323 $input_line_tabbing = undef; 3324 3325 $last_last_line_leading_level = 0; 3326 $last_line_leading_level = 0; 3327 $last_line_leading_type = '#'; 3328 3329 $last_nonblank_token = ';'; 3330 $last_nonblank_type = ';'; 3331 $last_last_nonblank_token = ';'; 3332 $last_last_nonblank_type = ';'; 3333 $last_nonblank_block_type = ""; 3334 $last_output_level = 0; 3335 $looking_for_else = 0; 3336 $embedded_tab_count = 0; 3337 $first_embedded_tab_at = 0; 3338 $last_embedded_tab_at = 0; 3339 $deleted_semicolon_count = 0; 3340 $first_deleted_semicolon_at = 0; 3341 $last_deleted_semicolon_at = 0; 3342 $added_semicolon_count = 0; 3343 $first_added_semicolon_at = 0; 3344 $last_added_semicolon_at = 0; 3345 $last_line_had_side_comment = 0; 3346 %postponed_breakpoint = (); 3347 3348 # variables for adding side comments 3349 %block_leading_text = (); 3350 %block_opening_line_number = (); 3351 $csc_new_statement_ok = 1; 3352 3353 %saved_opening_indentation = (); 3354 3355 reset_block_text_accumulator(); 3356 3357 prepare_for_new_input_lines(); 3358 3359 $vertical_aligner_object = 3360 PerlTidy::VerticalAligner->initialize( $rOpts, $file_writer_object, 3361 $logger_object, $diagnostics_object ); 3362 3363 if ( $rOpts->{'tabs'} ) { 3364 write_logfile_entry("Indentation will be with a tab character\n"); 3365 } 3366 else { 3367 write_logfile_entry( 3368 "Indentation will be with $rOpts->{'indent-columns'} spaces\n"); 3369 } 3370 3371 # This is the start of a formatter referent. 3372 # I'll populate it someday when I figure out an easy, automated 3373 # way. 3374 $formatter_self = {}; 3375 3376 bless $formatter_self, $class; 3377 3378 # Safety check..this is not a class yet 3379 if ( _increment_count() > 1 ) { 3380 confess 3381"Attempt to create more than 1 object in $class, which is not a true class yet\n"; 3382 } 3383 3384 return $formatter_self; 3385 3386} 3387 3388sub prepare_for_new_input_lines { 3389 3390 $gnu_sequence_number++; # increment output batch counter 3391 $line_start_index_to_go = 0; 3392 $max_gnu_item_index = UNDEFINED_INDEX; 3393 $index_max_forced_break = UNDEFINED_INDEX; 3394 $max_index_to_go = UNDEFINED_INDEX; 3395 $last_nonblank_index_to_go = UNDEFINED_INDEX; 3396 $last_nonblank_type_to_go = ''; 3397 $last_nonblank_token_to_go = ''; 3398 $last_last_nonblank_index_to_go = UNDEFINED_INDEX; 3399 $last_last_nonblank_type_to_go = ''; 3400 $last_last_nonblank_token_to_go = ''; 3401 $forced_breakpoint_count = 0; 3402 $forced_breakpoint_undo_count = 0; 3403 $brace_follower_pattern = undef; 3404 $lengths_to_go[0] = 0; 3405 $old_line_count_in_batch = 1; 3406 3407 destroy_one_line_block(); 3408} 3409 3410sub write_line { 3411 3412 my $self = shift; 3413 my ($line_of_tokens) = @_; 3414 3415 my $line_type = $line_of_tokens->{_line_type}; 3416 my $input_line = $line_of_tokens->{_line_text}; 3417 my $want_blank_line_next = 0; 3418 3419 # handle line of code.. 3420 if ( $line_type eq 'CODE' ) { 3421 3422 # let logger see all non-blank lines of code 3423 if ( $input_line !~ /^\s*$/ ) { 3424 my $output_line_number = 3425 $vertical_aligner_object->get_output_line_number(); 3426 black_box( $line_of_tokens, $output_line_number ); 3427 } 3428 print_line_of_tokens($line_of_tokens); 3429 } 3430 3431 # handle line of non-code.. 3432 else { 3433 3434 # set special flags 3435 my $skip_line = 0; 3436 my $tee_line = 0; 3437 if ( $line_type =~ /^POD/ ) { 3438 3439 # pod docs should have a preceding blank line 3440 if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; } 3441 if ( $rOpts->{'tee-pod'} ) { $tee_line = 1; } 3442 if ( !$skip_line && $line_type eq 'POD_START' ) { 3443 want_blank_line(); 3444 } 3445 3446 # patch to put a blank line after =cut 3447 # (required by podchecker) 3448 if ( $line_type eq 'POD_END' ) { 3449 $file_writer_object->reset_consecutive_blank_lines(); 3450 $want_blank_line_next = 1; 3451 } 3452 } 3453 3454 # write unindented non-code line 3455 if ( !$skip_line ) { 3456 if ($tee_line) { $file_writer_object->tee_on() } 3457 write_unindented_line($input_line); 3458 if ($tee_line) { $file_writer_object->tee_off() } 3459 if ($want_blank_line_next) { want_blank_line(); } 3460 } 3461 } 3462} 3463 3464sub create_one_line_block { 3465 $index_start_one_line_block = $_[0]; 3466 $semicolons_before_block_self_destruct = $_[1]; 3467} 3468 3469sub destroy_one_line_block { 3470 $index_start_one_line_block = UNDEFINED_INDEX; 3471 $semicolons_before_block_self_destruct = 0; 3472} 3473 3474sub leading_spaces_to_go { 3475 3476 # return the number of indentation spaces for a token in the output stream; 3477 # these were previously stored by 'set_leading_whitespace'. 3478 3479 return get_SPACES( $leading_spaces_to_go[ $_[0] ] ); 3480 3481} 3482 3483sub get_SPACES { 3484 3485 # return the number of leading spaces associated with an indentation 3486 # variable $indentation is either a constant number of spaces or an object 3487 # with a get_SPACES method. 3488 my $indentation = shift; 3489 return ref($indentation) ? $indentation->get_SPACES() : $indentation; 3490} 3491 3492sub get_AVAILABLE_SPACES_to_go { 3493 3494 my $item = $leading_spaces_to_go[ $_[0] ]; 3495 3496 # return the number of available leading spaces associated with an 3497 # indentation variable. $indentation is either a constant number of 3498 # spaces or an object with a get_AVAILABLE_SPACES method. 3499 return ref($item) ? $item->get_AVAILABLE_SPACES() : 0; 3500} 3501 3502sub new_lp_indentation_item { 3503 3504 # this is an interface to the IndentationItem class 3505 my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_; 3506 3507 # A negative level implies not to store the item in the item_list 3508 my $index = 0; 3509 if ( $level >= 0 ) { $index = ++$max_gnu_item_index; } 3510 3511 my $item = PerlTidy::IndentationItem->new( 3512 $spaces, $level, 3513 $ci_level, $available_spaces, 3514 $index, $gnu_sequence_number, 3515 $align_paren, $max_gnu_stack_index, 3516 $line_start_index_to_go, 3517 ); 3518 3519 if ( $level >= 0 ) { 3520 $gnu_item_list[$max_gnu_item_index] = $item; 3521 } 3522 3523 return $item; 3524} 3525 3526sub set_leading_whitespace { 3527 3528 # This routine defines leading whitespace 3529 # given: the level and continuation_level of a token, 3530 # define: space count of leading string which would apply if it 3531 # were the first token of a new line. 3532 3533 my ( $level, $ci_level, $in_continued_quote ) = @_; 3534 3535 # patch for -bli, which adds one continuation indentation for 3536 # opening braces 3537 if ( $rOpts_brace_left_and_indent 3538 && $max_index_to_go == 0 3539 && $block_type_to_go[$max_index_to_go] =~ /$bli_pattern/o ) 3540 { 3541 $ci_level++; 3542 } 3543 3544 # patch to avoid trouble when input file has negative indentation. 3545 # other logic should catch this error. 3546 if ( $level < 0 ) { $level = 0 } 3547 3548 #------------------------------------------- 3549 # handle the standard indentation scheme 3550 #------------------------------------------- 3551 unless ($rOpts_line_up_parentheses) { 3552 my $space_count = $ci_level * $rOpts_continuation_indentation + $level * 3553 $rOpts_indent_columns; 3554 my $ci_spaces = 3555 ( $ci_level == 0 ) ? 0 : $rOpts_continuation_indentation; 3556 3557 if ($in_continued_quote) { 3558 $space_count = 0; 3559 $ci_spaces = 0; 3560 } 3561 $leading_spaces_to_go[$max_index_to_go] = $space_count; 3562 $reduced_spaces_to_go[$max_index_to_go] = $space_count - $ci_spaces; 3563 return; 3564 } 3565 3566 #------------------------------------------------------------- 3567 # handle case of -lp indentation.. 3568 #------------------------------------------------------------- 3569 3570 # The continued_quote flag means that this is the first token of a 3571 # line, and it is the continuation of some kind of multi-line quote 3572 # or pattern. It requires special treatment because it must have no 3573 # added leading whitespace. So we create a special indentation item 3574 # which is not in the stack. 3575 if ($in_continued_quote) { 3576 my $space_count = 0; 3577 my $available_space = 0; 3578 $level = -1; # flag to prevent storing in item_list 3579 $leading_spaces_to_go[$max_index_to_go] = 3580 $reduced_spaces_to_go[$max_index_to_go] = 3581 new_lp_indentation_item( $space_count, $level, $ci_level, 3582 $available_space, 0 ); 3583 return; 3584 } 3585 3586 # get the top state from the stack 3587 my $space_count = $gnu_stack[$max_gnu_stack_index]->get_SPACES(); 3588 my $current_level = $gnu_stack[$max_gnu_stack_index]->get_LEVEL(); 3589 my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL(); 3590 3591 my $type = $types_to_go[$max_index_to_go]; 3592 my $token = $tokens_to_go[$max_index_to_go]; 3593 3594 # If we come to an opening paren far to the right of the '=', then 3595 # backup starting index to the '=' to avoid going beyond the right margin 3596 #if ( $token eq '(' ) { 3597 if ( $type =~ /^[\{\(\[]$/ ) { 3598 my $last_equals = $gnu_stack[$max_gnu_stack_index]->get_LAST_EQUALS(); 3599 3600 # if we have an '=' after the previous assumed breakpoint 3601 if ( $last_equals > $line_start_index_to_go ) { 3602 3603 # and we are far to the right 3604 if ( $gnu_position_predictor > $rOpts_maximum_line_length / 2 ) { 3605 3606 # ok, make the switch -- note that we do not set a real 3607 # breakpoint here because we may not really need one; sub 3608 # scan_list will do that if necessary 3609 $line_start_index_to_go = $last_equals; 3610 if ( $types_to_go[ $line_start_index_to_go + 1 ] eq 'b' ) { 3611 $line_start_index_to_go++; 3612 } 3613 3614 # and update the position predictor 3615 $gnu_position_predictor = 3616 total_line_length( $line_start_index_to_go, 3617 $max_index_to_go - 1 ); 3618 } 3619 } 3620 } 3621 3622 # Check for decreasing depth .. 3623 # Note that one token may have both decreasing and then increasing 3624 # depth. For example, (level, ci) can go from (1,1) to (2,0). So, 3625 # in this example we would first go back to (1,0) then up to (2,0) 3626 # in a single call. 3627 if ( $level < $current_level || $ci_level < $current_ci_level ) { 3628 3629 # loop to find the first entry at or completely below this level 3630 my ( $lev, $ci_lev ); 3631 while (1) { 3632 if ($max_gnu_stack_index) { 3633 3634 # save index of token which closes this level 3635 $gnu_stack[$max_gnu_stack_index]->set_CLOSED($max_index_to_go); 3636 3637 # Undo any extra indentation if we saw no commas 3638 my $available_spaces = 3639 $gnu_stack[$max_gnu_stack_index]->get_AVAILABLE_SPACES(); 3640 my $comma_count = 3641 $gnu_stack[$max_gnu_stack_index]->get_COMMA_COUNT(); 3642 3643 if ( $comma_count <= 0 && $available_spaces > 0 ) { 3644 3645 my $i = $gnu_stack[$max_gnu_stack_index]->get_INDEX(); 3646 my $seqno = 3647 $gnu_stack[$max_gnu_stack_index]->get_SEQUENCE_NUMBER(); 3648 3649 # Be sure this item was created in this batch. This 3650 # should be true because we delete any available 3651 # space from open items at the end of each batch. 3652 if ( $gnu_sequence_number != $seqno 3653 || $i > $max_gnu_item_index ) 3654 { 3655 warning( 3656"Program bug with -lp. seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n" 3657 ); 3658 report_definite_bug(); 3659 } 3660 3661 else { 3662 $gnu_item_list[$i] 3663 ->permanently_decrease_AVAILABLE_SPACES( 3664 $available_spaces); 3665 3666 my $j; 3667 for ( $j = $i + 1 ; $j <= $max_gnu_item_index ; $j++ ) { 3668 $gnu_item_list[$j] 3669 ->decrease_SPACES($available_spaces); 3670 } 3671 } 3672 } 3673 3674 # go down one level 3675 --$max_gnu_stack_index; 3676 $lev = $gnu_stack[$max_gnu_stack_index]->get_LEVEL(); 3677 $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL(); 3678 3679 # stop when we reach a level at or below the current level 3680 if ( $lev <= $level && $ci_lev <= $ci_level ) { 3681 $space_count = 3682 $gnu_stack[$max_gnu_stack_index]->get_SPACES(); 3683 $current_level = $lev; 3684 $current_ci_level = $ci_lev; 3685 last; 3686 } 3687 } 3688 3689 # reached bottom of stack .. should never happen because 3690 # only negative levels can get here, and $level was forced 3691 # to be positive above. 3692 else { 3693 warning( 3694"program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n" 3695 ); 3696 report_definite_bug(); 3697 last; 3698 } 3699 } 3700 } 3701 3702 # handle increasing depth 3703 if ( $level > $current_level || $ci_level > $current_ci_level ) { 3704 3705 # Compute the standard incremental whitespace. This will be 3706 # the minimum incremental whitespace that will be used. This 3707 # choice results in a smooth transition between the gnu-style 3708 # and the standard style. 3709 my $standard_increment = 3710 ( $level - $current_level ) * $rOpts_indent_columns + 3711 ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation; 3712 3713 # Now we have to define how much extra incremental space 3714 # ("$available_space") we want. This extra space will be 3715 # reduced as necessary when long lines are encountered or when 3716 # it becomes clear that we do not have a good list. 3717 my $available_space = 0; 3718 my $align_paren = 0; 3719 my $excess = 0; 3720 3721 # initialization on empty stack.. 3722 if ( $max_gnu_stack_index == 0 ) { 3723 $space_count = $level * $rOpts_indent_columns; 3724 } 3725 3726 # if this is a BLOCK, add the standard increment 3727 elsif ($last_nonblank_block_type) { 3728 $space_count += $standard_increment; 3729 } 3730 3731 # if last nonblank token was not structural indentation, 3732 # just use standard increment 3733 elsif ( $last_nonblank_type ne '{' ) { 3734 $space_count += $standard_increment; 3735 } 3736 3737 # otherwise use the space to the first non-blank level change token 3738 else { 3739 3740 ( $space_count, $available_space, $excess ) = 3741 get_gnu_indentation( $standard_increment, 3742 $gnu_position_predictor ); 3743 $align_paren = 1; 3744 } 3745 3746 # update state, but not on a blank token 3747 if ( $types_to_go[$max_index_to_go] ne 'b' ) { 3748 3749 $gnu_stack[$max_gnu_stack_index]->set_HAVE_CHILD(1); 3750 3751 ++$max_gnu_stack_index; 3752 $gnu_stack[$max_gnu_stack_index] = 3753 new_lp_indentation_item( $space_count, $level, $ci_level, 3754 $available_space, $align_paren ); 3755 3756 #TESTING-LP 3757 ################################################################## 3758 #$gnu_stack[$max_gnu_stack_index]->set_RECOVERABLE_SPACES($excess); 3759 ################################################################## 3760 } 3761 } 3762 3763 # Count commas and look for non-list characters. Once we see a 3764 # non-list character, we give up and don't look for any more commas. 3765 if ( $type eq ',' ) { 3766 3767 my $comma_count = $gnu_stack[$max_gnu_stack_index]->get_COMMA_COUNT(); 3768 if ( $comma_count >= 0 ) { 3769 $gnu_stack[$max_gnu_stack_index]->set_COMMA_COUNT( ++$comma_count ); 3770 } 3771 } 3772 elsif ( $type eq '=' ) { 3773 $gnu_stack[$max_gnu_stack_index]->set_LAST_EQUALS($max_index_to_go); 3774 } 3775 3776=pod 3777 3778 # Filter out non-lists. Note that '=' in this filter could be in 3779 # any of the = operators. Actually, we might be in a list if we see 3780 # ? and : following a comma or comma arrow, but it probably won't 3781 # format well any way with the extra gnu spaces, because it will 3782 # probably be a long line that breaks after the ':', so it isn't 3783 # worth worrying about. Example: 3784 3785 my $ftp = $pkg->SUPER::new( 3786 Timeout => defined $arg{Timeout} ? $arg{Timeout} : 120, 3787 PeerAddr => $peer, 3788 ); 3789 3790 NEEDS FURTHER EVALUATION 3791 # BUB: TESTING: commented out: needs more evaluation 3792 #elsif ( $type =~ /(^[\?\:\;\<\>\~]$)|[=]/ && $type !~ /^=>$/ ) { 3793 # $gnu_stack[$max_gnu_stack_index]->set_COMMA_COUNT(-1); 3794 #} 3795 3796=cut 3797 3798 # this token might start a new line 3799 # if this is a non-blank.. 3800 if ( $type ne 'b' ) { 3801 3802 # '.' ':', '?' '||' '&&' added (mci.t) 3803 #if ( $last_nonblank_type =~ /^([\,\{])$/ 3804 3805 # and if .. 3806 if ( 3807 3808 # previous character was one of these: 3809 $last_nonblank_type =~ /^([\:\?\,])$/ 3810 3811 # previous character was opening and this does not close it 3812 || ( $last_nonblank_type eq '{' && $type ne '}' ) 3813 3814 # or this token is one of these: 3815 || $type =~ /^([\.]|\|\||\&\&)$/ 3816 3817 # or this is a closing structure 3818 || ( $type eq '}' && $token eq $type ) 3819 3820 # or previous token was a keyword 3821 || ( 3822 $last_nonblank_type eq 'k' 3823 3824 # in this set: 3825 && ( 3826 $last_nonblank_token =~ 3827 /^(if|unless|and|or|last|next|redo|return)$/ 3828 3829 && $type ne '{' 3830 ) 3831 ) 3832 3833 # or this is after an assignment after a closing structure 3834 || ( 3835 $last_nonblank_type =~ /=/ 3836 && $last_nonblank_type !~ /(==|!=|>=|<=|=~|=>)/ 3837 && ( 3838 $last_last_nonblank_type =~ /^[\}\)\]]$/ 3839 3840 # and it is significantly to the right 3841 || $gnu_position_predictor > $rOpts_maximum_line_length / 2 3842 ) 3843 ) 3844 ) 3845 { 3846 check_for_long_gnu_style_lines(); 3847 $line_start_index_to_go = $max_index_to_go; 3848 3849 # back up 1 token if we want to break before that type 3850 # otherwise, we may strand tokens like '?' or ':' on a line 3851 if ( $line_start_index_to_go > 0 ) { 3852 if ( $last_nonblank_type eq 'k' ) { 3853 if ( $last_nonblank_token =~ /^(and|or)$/ ) { 3854 $line_start_index_to_go--; 3855 } 3856 } 3857 elsif ( $want_break_before{$last_nonblank_type} ) { 3858 $line_start_index_to_go--; 3859 } 3860 } 3861 } 3862 } 3863 3864 # remember the predicted position of this token on the output line 3865 if ( $max_index_to_go > $line_start_index_to_go ) { 3866 $gnu_position_predictor = 3867 total_line_length( $line_start_index_to_go, $max_index_to_go ); 3868 } 3869 else { 3870 $gnu_position_predictor = $space_count + 3871 token_sequence_length( $max_index_to_go, $max_index_to_go ); 3872 } 3873 3874 # store the indentation object for this token 3875 # this allows us to manipulate the leading whitespace 3876 # (in case we have to reduce indentation to fit a line) without 3877 # having to change any token values 3878 $leading_spaces_to_go[$max_index_to_go] = $gnu_stack[$max_gnu_stack_index]; 3879 $reduced_spaces_to_go[$max_index_to_go] = 3880 ( $max_gnu_stack_index > 0 && $ci_level ) 3881 ? $gnu_stack[ $max_gnu_stack_index - 1 ] 3882 : $gnu_stack[$max_gnu_stack_index]; 3883 return; 3884} 3885 3886sub get_gnu_indentation { 3887 3888 # define the next indentation space count for the case that gnu-style 3889 # indentation will be used for this token. 3890 3891 # basically, we want to indent to just after the last token on the 3892 # previous line, but we have to put some limits on this 3893 3894 my ( $standard_increment, $gnu_indentation ) = @_; 3895 3896 # total indentation we already have: 3897 my $space_count = $gnu_stack[$max_gnu_stack_index]->get_SPACES(); 3898 3899 # avoid excessive space if requested... 3900 my $excess = 0; 3901 if ( $rOpts->{'maximum-continuation-indentation'} > 0 ) { 3902 $excess = 3903 ( $gnu_indentation - $space_count ) - 3904 $rOpts->{'maximum-continuation-indentation'}; 3905 if ( $excess > 0 ) { 3906 3907 #TESTING: - FUTURE LOGIC MAY SKIP THIS 3908 $gnu_indentation -= $excess; 3909 } 3910 else { $excess = 0 } 3911 } 3912 3913 # always add an increment of at least the standard amount 3914 my $min_gnu_indentation = $space_count + $standard_increment; 3915 3916 # how many spaces do we have extra? 3917 my $available_space = $gnu_indentation - $min_gnu_indentation; 3918 3919 #TESTING-LP : FUTURE UPDATE 3920 #$available_space -= $excess; 3921 $excess = 0; 3922 3923 #---------------------------------------------------------------- 3924 # for TESTING only: this should make the indentation identical to 3925 # the standard scheme, even though a stack is used. This is a good 3926 # way to test the proper functioning of the stack. 3927 my $TESTING = 0; # 0 or 1 3928 3929 #---------------------------------------------------------------- 3930 3931 # maintain at least the minimum incremental spacing 3932 if ( $TESTING || $available_space < 0 ) { 3933 $gnu_indentation = $min_gnu_indentation; 3934 $available_space = 0; 3935 $excess = 0; 3936 } 3937 3938 return ( $gnu_indentation, $available_space, $excess ); 3939} 3940 3941sub check_for_long_gnu_style_lines { 3942 3943 # look at the current estimated maximum line length, and 3944 # remove some whitespace if it exceeds the desired maximum 3945 3946 # this is only for the '-lp' style 3947 return unless ($rOpts_line_up_parentheses); 3948 3949 # nothing can be done if no stack items defined for this line 3950 return if ( $max_gnu_item_index == UNDEFINED_INDEX ); 3951 3952 # see if we have exceeded the maximum desired line length 3953 # keep 2 extra free because they are needed in some cases 3954 # (result of trial-and-error testing) 3955 my $spaces_needed = 3956 $gnu_position_predictor - $rOpts_maximum_line_length + 2; 3957 3958 return if ( $spaces_needed < 0 ); 3959 3960 # We are over the limit, so try to remove a requested number of 3961 # spaces from leading whitespace. We are only allowed to remove 3962 # from whitespace items created on this batch, since others have 3963 # already been used and cannot be undone. 3964 my @candidates = (); 3965 my $i; 3966 3967 # loop over all whitespace items created for the current batch 3968 for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) { 3969 my $item = $gnu_item_list[$i]; 3970 3971 # item must still be open to be a candidate (otherwise it 3972 # cannot influence the current token) 3973 next if ( $item->get_CLOSED() >= 0 ); 3974 3975 my $available_spaces = $item->get_AVAILABLE_SPACES(); 3976 3977 if ( $available_spaces > 0 ) { 3978 push ( @candidates, [ $i, $available_spaces ] ); 3979 } 3980 } 3981 3982 return unless (@candidates); 3983 3984 # sort by available whitespace so that we can remove whitespace 3985 # from the maximum available first 3986 @candidates = sort { $b->[1] <=> $a->[1] } @candidates; 3987 3988 # keep removing whitespace until we are done or have no more 3989 my $candidate; 3990 foreach $candidate (@candidates) { 3991 my ( $i, $available_spaces ) = @{$candidate}; 3992 my $deleted_spaces = 3993 ( $available_spaces > $spaces_needed ) 3994 ? $spaces_needed 3995 : $available_spaces; 3996 3997 # remove the incremental space from this item 3998 $gnu_item_list[$i]->decrease_AVAILABLE_SPACES($deleted_spaces); 3999 4000 my $i_debug = $i; 4001 4002 # update the leading whitespace of this item and all items 4003 # that came after it 4004 for ( ; $i <= $max_gnu_item_index ; $i++ ) { 4005 4006 my $old_spaces = $gnu_item_list[$i]->get_SPACES(); 4007 if ( $old_spaces > $deleted_spaces ) { 4008 $gnu_item_list[$i]->decrease_SPACES($deleted_spaces); 4009 } 4010 4011 # shouldn't happen except for code bug: 4012 else { 4013 my $level = $gnu_item_list[$i_debug]->get_LEVEL(); 4014 my $ci_level = $gnu_item_list[$i_debug]->get_CI_LEVEL(); 4015 my $old_level = $gnu_item_list[$i]->get_LEVEL(); 4016 my $old_ci_level = $gnu_item_list[$i]->get_CI_LEVEL(); 4017 warning( 4018"program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level\n" 4019 ); 4020 report_definite_bug(); 4021 } 4022 } 4023 $gnu_position_predictor -= $deleted_spaces; 4024 $spaces_needed -= $deleted_spaces; 4025 last unless ( $spaces_needed > 0 ); 4026 } 4027} 4028 4029sub finish_lp_batch { 4030 4031 # This routine is called once after each each output stream batch is 4032 # finished to undo indentation for all incomplete -lp 4033 # indentation levels. It is too risky to leave a level open, 4034 # because then we can't backtrack in case of a long line to follow. 4035 # This means that comments and blank lines will disrupt this 4036 # indentation style. But the vertical aligner may be able to 4037 # get the space back if there are side comments. 4038 4039 # this is only for the 'lp' style 4040 return unless ($rOpts_line_up_parentheses); 4041 4042 # nothing can be done if no stack items defined for this line 4043 return if ( $max_gnu_item_index == UNDEFINED_INDEX ); 4044 4045 # loop over all whitespace items created for the current batch 4046 my $i; 4047 for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) { 4048 my $item = $gnu_item_list[$i]; 4049 4050 # only look for open items 4051 next if ( $item->get_CLOSED() >= 0 ); 4052 4053 # forget index of any '=' 4054 $item->set_LAST_EQUALS(-1); 4055 4056 # Tentatively remove all of the available space 4057 # (The vertical aligner will try to get it back later) 4058 my $available_spaces = $item->get_AVAILABLE_SPACES(); 4059 if ( $available_spaces > 0 ) { 4060 4061 # delete incremental space for this item 4062 $gnu_item_list[$i] 4063 ->tentatively_decrease_AVAILABLE_SPACES($available_spaces); 4064 4065 # Reduce the total indentation space of any nodes that follow 4066 # Note that any such nodes must necessarily be dependents 4067 # of this node. 4068 foreach my $j ( $i + 1 .. $max_gnu_item_index ) { 4069 $gnu_item_list[$j]->decrease_SPACES($available_spaces); 4070 } 4071 } 4072 } 4073 return; 4074} 4075 4076sub reduce_lp_indentation { 4077 4078 # reduce the leading whitespace at token $i if possible by $spaces_needed 4079 # (a large value of $spaces_needed will remove all excess space) 4080 # NOTE: to be called from scan_list only for a sequence of tokens 4081 # contained between opening and closing parens/braces/brackets 4082 4083 my ( $i, $spaces_needed ) = @_; 4084 my $deleted_spaces = 0; 4085 4086 # this is only for the -lp style 4087 if ($rOpts_line_up_parentheses) { 4088 4089 # it is only safe to undo indentation if there are no children. 4090 my $item = $leading_spaces_to_go[$i]; 4091 if ( !$item->get_HAVE_CHILD() ) { 4092 4093 # we'll remove these spaces, but mark them as recoverable 4094 $deleted_spaces = 4095 $item->tentatively_decrease_AVAILABLE_SPACES($spaces_needed); 4096 } 4097 } 4098 4099 return $deleted_spaces; 4100} 4101 4102sub token_sequence_length { 4103 4104 # return length of tokens ($ifirst .. $ilast) including first & last 4105 # returns 0 if $ifirst > $ilast 4106 my $ifirst = shift; 4107 my $ilast = shift; 4108 return 0 if ( $ilast < 0 || $ifirst > $ilast ); 4109 return $lengths_to_go[ $ilast + 1 ] if ( $ifirst < 0 ); 4110 return $lengths_to_go[ $ilast + 1 ] - $lengths_to_go[$ifirst]; 4111} 4112 4113sub total_line_length { 4114 4115 # return length of a line of tokens ($ifirst .. $ilast) 4116 my $ifirst = shift; 4117 my $ilast = shift; 4118 if ( $ifirst < 0 ) { $ifirst = 0 } 4119 4120 return leading_spaces_to_go($ifirst) + 4121 token_sequence_length( $ifirst, $ilast ); 4122} 4123 4124sub excess_line_length { 4125 4126 # return number of characters by which a line of tokens ($ifirst..$ilast) 4127 # exceeds the allowable line length. 4128 my $ifirst = shift; 4129 my $ilast = shift; 4130 if ( $ifirst < 0 ) { $ifirst = 0 } 4131 return leading_spaces_to_go($ifirst) + 4132 token_sequence_length( $ifirst, $ilast ) - $rOpts_maximum_line_length; 4133} 4134 4135sub finish_formatting { 4136 4137 # flush buffer and write any informative messages 4138 my $self = shift; 4139 4140 flush(); 4141 $file_writer_object->decrement_output_line_number() 4142 ; # fix up line number since it was incremented 4143 we_are_at_the_last_line(); 4144 if ( $added_semicolon_count > 0 ) { 4145 my $first = ( $added_semicolon_count > 1 ) ? "First" : ""; 4146 my $what = 4147 ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was"; 4148 write_logfile_entry("$added_semicolon_count $what added:\n"); 4149 write_logfile_entry( 4150 " $first at input line $first_added_semicolon_at\n"); 4151 4152 if ( $added_semicolon_count > 1 ) { 4153 write_logfile_entry( 4154 " Last at input line $last_added_semicolon_at\n"); 4155 } 4156 write_logfile_entry(" (Use -nasc to prevent semicolon addition)\n"); 4157 write_logfile_entry("\n"); 4158 } 4159 4160 if ( $deleted_semicolon_count > 0 ) { 4161 my $first = ( $deleted_semicolon_count > 1 ) ? "First" : ""; 4162 my $what = 4163 ( $deleted_semicolon_count > 1 ) 4164 ? "semicolons were" 4165 : "semicolon was"; 4166 write_logfile_entry( 4167 "$deleted_semicolon_count unnecessary $what deleted:\n"); 4168 write_logfile_entry( 4169 " $first at input line $first_deleted_semicolon_at\n"); 4170 4171 if ( $deleted_semicolon_count > 1 ) { 4172 write_logfile_entry( 4173 " Last at input line $last_deleted_semicolon_at\n"); 4174 } 4175 write_logfile_entry(" (Use -ndsc to prevent semicolon deletion)\n"); 4176 write_logfile_entry("\n"); 4177 } 4178 4179 if ( $embedded_tab_count > 0 ) { 4180 my $first = ( $embedded_tab_count > 1 ) ? "First" : ""; 4181 my $what = 4182 ( $embedded_tab_count > 1 ) 4183 ? "quotes or patterns" 4184 : "quote or pattern"; 4185 write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n"); 4186 write_logfile_entry( 4187"This means the display of this script could vary with device or software\n" 4188 ); 4189 write_logfile_entry(" $first at input line $first_embedded_tab_at\n"); 4190 4191 if ( $embedded_tab_count > 1 ) { 4192 write_logfile_entry( 4193 " Last at input line $last_embedded_tab_at\n"); 4194 } 4195 write_logfile_entry("\n"); 4196 } 4197 4198 if ($first_tabbing_disagreement) { 4199 write_logfile_entry( 4200"First indentation disagreement seen at input line $first_tabbing_disagreement\n" 4201 ); 4202 } 4203 4204 if ($in_tabbing_disagreement) { 4205 write_logfile_entry( 4206"Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n" 4207 ); 4208 } 4209 else { 4210 4211 if ($last_tabbing_disagreement) { 4212 4213 write_logfile_entry( 4214"Last indentation disagreement seen at input line $last_tabbing_disagreement\n" 4215 ); 4216 } 4217 else { 4218 write_logfile_entry("No indentation disagreement seen\n"); 4219 } 4220 } 4221 write_logfile_entry("\n"); 4222 4223 $vertical_aligner_object->report_anything_unusual(); 4224 4225 $file_writer_object->report_line_length_errors(); 4226} 4227 4228sub check_options { 4229 4230 # This routine is called to check the Opts hash after it is defined 4231 4232 ($rOpts) = @_; 4233 my ( $tabbing_string, $tab_msg ); 4234 my @list; # working storage 4235 4236 make_static_block_comment_pattern(); 4237 make_static_side_comment_pattern(); 4238 make_closing_side_comment_prefix(); 4239 make_closing_side_comment_list_pattern(); 4240 4241 # The -lp indentation logic requires that perltidy examine large 4242 # blocks of code between flushing. When the user takes control 4243 # of line breaks, perltidy never sees large enough buffers to 4244 # use the -lp style. There's no way around this. 4245 if ( $rOpts->{'line-up-parentheses'} ) { 4246 4247 if ( $rOpts->{'indent-only'} 4248 || !$rOpts->{'add-newlines'} 4249 || !$rOpts->{'delete-old-newlines'} ) 4250 { 4251 print STDERR <<EOM; 4252Conflict: -lp conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp 4253EOM 4254 $rOpts->{'line-up-parentheses'} = 0; 4255 } 4256 } 4257 4258 # At present, tabs are not compatable with the line-up-parentheses style 4259 # (it would be possible to entab the total leading whitespace 4260 # just prior to writing the line, if desired). 4261 if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) { 4262 print STDERR <<EOM; 4263Conflict: -t (tabs) cannot be used with the -lp option; ignoring -t. 4264EOM 4265 $rOpts->{'tabs'} = 0; 4266 } 4267 4268 # Likewise, tabs are not compatable with outdenting.. 4269 if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) { 4270 print STDERR <<EOM; 4271Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t. 4272EOM 4273 $rOpts->{'tabs'} = 0; 4274 } 4275 4276 if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) { 4277 print STDERR <<EOM; 4278Conflict: -t (tabs) cannot be used with the -ola option; ignoring -t. 4279EOM 4280 $rOpts->{'tabs'} = 0; 4281 } 4282 4283 if ( $rOpts->{'tabs'} ) { 4284 $tabstr = "\t"; 4285 } 4286 else { 4287 $tabstr = " " x $rOpts->{'indent-columns'}; 4288 } 4289 4290 if ( !$rOpts->{'space-for-semicolon'} ) { 4291 $want_left_space{'f'} = -1; 4292 } 4293 4294 if ( $rOpts->{'space-terminal-semicolon'} ) { 4295 $want_left_space{';'} = 1; 4296 } 4297 4298 # implement outdenting preferences for keywords 4299 %outdent_keyword = (); 4300 4301 # load defaults 4302 @list = qw(next last redo goto return); 4303 4304 # override defaults if requested 4305 if ( $rOpts->{'outdent-keyword-list'} ) { 4306 $rOpts->{'outdent-keyword-list'} =~ s/^\s*//; 4307 $rOpts->{'outdent-keyword-list'} =~ s/\s*$//; 4308 @list = split /\s+/, $rOpts->{'outdent-keyword-list'}; 4309 } 4310 4311 # FUTURE: if not a keyword, assume that it is an identifier 4312 foreach my $i (@list) { 4313 if ( $PerlTidy::Tokenizer::is_keyword{$i} ) { 4314 $outdent_keyword{$i} = 1; 4315 } 4316 else { 4317 print STDERR "ignoring '$i' in -okwl list; not a perl keyword"; 4318 } 4319 } 4320 4321 # implement user whitespace preferences 4322 if ( $rOpts->{'want-left-space'} ) { 4323 @list = split /\s/, $rOpts->{'want-left-space'}; 4324 @want_left_space{@list} = (1) x scalar(@list); 4325 } 4326 4327 if ( $rOpts->{'want-right-space'} ) { 4328 @list = split /\s/, $rOpts->{'want-right-space'}; 4329 @want_right_space{@list} = (1) x scalar(@list); 4330 } 4331 if ( $rOpts->{'nowant-left-space'} ) { 4332 @list = split /\s/, $rOpts->{'nowant-left-space'}; 4333 @want_left_space{@list} = (-1) x scalar(@list); 4334 } 4335 4336 if ( $rOpts->{'nowant-right-space'} ) { 4337 @list = split /\s/, $rOpts->{'nowant-right-space'}; 4338 @want_right_space{@list} = (-1) x scalar(@list); 4339 } 4340 if ( $rOpts->{'dump-want-left-space'} ) { 4341 dump_want_left_space(*STDOUT); 4342 exit 1; 4343 } 4344 4345 if ( $rOpts->{'dump-want-right-space'} ) { 4346 dump_want_right_space(*STDOUT); 4347 exit 1; 4348 } 4349 4350 # implement user break preferences 4351 if ( $rOpts->{'want-break-after'} ) { 4352 @list = split /\s/, $rOpts->{'want-break-after'}; 4353 foreach my $tok (@list) { 4354 if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/: 4355 my $lbs = $left_bond_strength{$tok}; 4356 my $rbs = $right_bond_strength{$tok}; 4357 if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) { 4358 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) = 4359 ( $lbs, $rbs ); 4360 } 4361 } 4362 } 4363 4364 if ( $rOpts->{'want-break-before'} ) { 4365 @list = split /\s/, $rOpts->{'want-break-before'}; 4366 foreach my $tok (@list) { 4367 my $lbs = $left_bond_strength{$tok}; 4368 my $rbs = $right_bond_strength{$tok}; 4369 if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) { 4370 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) = 4371 ( $lbs, $rbs ); 4372 } 4373 } 4374 } 4375 4376 # make note if breaks are before certain key types 4377 %want_break_before = (); 4378 foreach my $tok ( '.', ',', ':', '?', '&&', '||' ) { 4379 $want_break_before{$tok} = 4380 $left_bond_strength{$tok} < $right_bond_strength{$tok}; 4381 } 4382 4383 # Coordinate ?/: breaks, which must be similar 4384 if ( !$want_break_before{':'} ) { 4385 $want_break_before{'?'} = $want_break_before{':'}; 4386 $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01; 4387 $left_bond_strength{'?'} = NO_BREAK; 4388 } 4389 4390 # Define here tokens which may follow the closing brace of a do statement 4391 # on the same line, as in: 4392 # } while ( $something); 4393 $do_follower_pattern = make_regex('^(until|while|unless|if|;|,)$'); 4394 4395 # These tokens may follow the closing brace of an if or elsif block. 4396 # In other words, for cuddled else we want code to look like: 4397 # } elsif ( $something) { 4398 # } else { 4399 if ( $rOpts->{'cuddled-else'} ) { 4400 $if_brace_follower_pattern = make_regex('^(else|elsif)$'); 4401 } 4402 else { $if_brace_follower_pattern = undef; } 4403 4404 # nothing can follow the closing curly of an else { } block: 4405 $else_brace_follower_pattern = make_regex('^$'); 4406 4407 # what can follow a multi-line anonymous sub definition closing curly: 4408 $anon_sub_brace_follower_pattern = 4409 make_regex('^(\,|\;|:|=>|or|and|\&\&|\|\}|\)|)$'); 4410 4411 # what can follow a one-line anonynomous sub closing curly: 4412 # one-line anonumous subs also have ']' here... 4413 # see tk3.t and PP.pm 4414 $anon_sub_1_brace_follower_pattern = 4415 make_regex('^(\,|\;|:|=>|or|and|\&\&|\|\}|\]|\)|)$'); 4416 4417 # What can follow a closing curly of a short block 4418 # which is not an if/elsif/else/do/sort/map/grep/eval/sub 4419 # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl' 4420 $other_brace_follower_pattern = 4421 make_regex('^(\,|\;|:|=>|or|and|\&\&|\|\}|\)|)$'); 4422 4423 # TESTING -- deactivated 20010614 because -bl didn't 4424 # work well with it. 4425 #if ( $rOpts->{'opening-brace-on-new-line'} ) { 4426 # $left_bond_strength{'{'} = WEAK; 4427 # $right_bond_strength{'{'} = VERY_STRONG; 4428 #} 4429 #else { 4430 $right_bond_strength{'{'} = WEAK; 4431 $left_bond_strength{'{'} = VERY_STRONG; 4432 4433 #} 4434 4435 # make -l=0 equal to -l=infinite 4436 if ( !$rOpts->{'maximum-line-length'} ) { 4437 $rOpts->{'maximum-line-length'} = 1000000; 4438 } 4439 4440 # make -lbl=0 equal to -lbl=infinite 4441 if ( !$rOpts->{'long-block-line-count'} ) { 4442 $rOpts->{'long-block-line-count'} = 1000000; 4443 } 4444 4445 # hashes used to simplify setting whitespace 4446 %tightness = ( 4447 '{' => $rOpts->{'brace-tightness'}, 4448 '}' => $rOpts->{'brace-tightness'}, 4449 '(' => $rOpts->{'paren-tightness'}, 4450 ')' => $rOpts->{'paren-tightness'}, 4451 '[' => $rOpts->{'square-bracket-tightness'}, 4452 ']' => $rOpts->{'square-bracket-tightness'}, 4453 ); 4454 %matching_token = ( 4455 '{' => '}', 4456 '(' => ')', 4457 '[' => ']', 4458 '?' => ':', 4459 ); 4460 4461 # frequently used parameters 4462 $rOpts_add_whitespace = $rOpts->{'add-whitespace'}; 4463 $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'}; 4464 $rOpts_cuddled_else = $rOpts->{'cuddled-else'}; 4465 $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'}; 4466 $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'}; 4467 $rOpts_indent_columns = $rOpts->{'indent-columns'}; 4468 $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'}; 4469 $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'}; 4470 $rOpts_brace_left_and_indent = $rOpts->{'brace-left-and-indent'}; 4471 4472} 4473 4474sub make_static_block_comment_pattern { 4475 4476 # create the pattern used to identify static block comments 4477 $static_block_comment_pattern = '^(\s*)##'; 4478 4479 # allow the user to change it 4480 if ( $rOpts->{'static-block-comment-prefix'} ) { 4481 my $prefix = $rOpts->{'static-block-comment-prefix'}; 4482 $prefix =~ s/^\s*//; 4483 if ( $prefix !~ /^#/ ) { 4484 die "ERROR: the -sbcp prefix '$prefix' must begin with '#'\n"; 4485 4486 } 4487 my $pattern = '^(\s*)' . $prefix; 4488 eval "'##'=~/$pattern/"; 4489 if ($@) { 4490 die 4491"ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n"; 4492 } 4493 $static_block_comment_pattern = $pattern; 4494 } 4495} 4496 4497sub make_closing_side_comment_list_pattern { 4498 4499 # turn any input list into a regex for recognizing selected block types 4500 $closing_side_comment_list_pattern = '^\w+'; 4501 if ( defined( $rOpts->{'closing-side-comment-list'} ) 4502 && $rOpts->{'closing-side-comment-list'} ) 4503 { 4504 $closing_side_comment_list_pattern = 4505 make_block_pattern( $rOpts->{'closing-side-comment-list'} ); 4506 } 4507} 4508 4509sub make_block_pattern { 4510 4511=pod 4512 4513 given a string of block-type keywords, return a regex to match them 4514 The only tricky part is that labels are indicated with a single ':' 4515 and the 'sub' token text may have additional text after it (name of sub). 4516 4517 Example: 4518 4519 input string: "if else elsif unless while for foreach do : sub"; 4520 pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)'; 4521 4522=cut 4523 4524 my ( $string, $abbrev ) = @_; 4525 $string =~ s/^\s*//; 4526 $string =~ s/\s$//; 4527 my @list = split /\s+/, $string; 4528 my $saw_sub = 0; 4529 my @words = (); 4530 for my $i (@list) { 4531 if ( $i eq 'sub' ) { $saw_sub = 1 } 4532 elsif ( $i eq ':' ) { 4533 push @words, '\w+:'; 4534 } 4535 elsif ( $i =~ /^\w/ ) { 4536 push @words, $i; 4537 } 4538 else { 4539 print STDERR "unrecognized block type $i after -cscl, ignoring\n"; 4540 } 4541 } 4542 my $pattern = '(' . join ( '|', @words ) . ')$'; 4543 if ($saw_sub) { 4544 $pattern = '(' . $pattern . '|sub)'; 4545 } 4546 $pattern = '^' . $pattern; 4547 4548 return $pattern; 4549} 4550 4551sub make_static_side_comment_pattern { 4552 4553 # create the pattern used to identify static side comments 4554 $static_side_comment_pattern = '^##'; 4555 4556 # allow the user to change it 4557 if ( $rOpts->{'static-side-comment-prefix'} ) { 4558 my $prefix = $rOpts->{'static-side-comment-prefix'}; 4559 $prefix =~ s/^\s*//; 4560 my $pattern = '^' . $prefix; 4561 eval "'##'=~/$pattern/"; 4562 if ($@) { 4563 die 4564"ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n"; 4565 } 4566 $static_side_comment_pattern = $pattern; 4567 } 4568} 4569 4570sub make_closing_side_comment_prefix { 4571 4572 # Be sure we have a valid closing side comment prefix 4573 my $csc_prefix = $rOpts->{'closing-side-comment-prefix'}; 4574 my $csc_prefix_pattern; 4575 if ( !defined($csc_prefix) ) { 4576 $csc_prefix = '## end'; 4577 $csc_prefix_pattern = '^##\s+end'; 4578 } 4579 else { 4580 my $test_csc_prefix = $csc_prefix; 4581 if ( $test_csc_prefix !~ /^#/ ) { 4582 $test_csc_prefix = '#' . $test_csc_prefix; 4583 } 4584 4585 # make a regex to recognize the prefix 4586 my $test_csc_prefix_pattern = $test_csc_prefix; 4587 4588 # escape any special characters 4589 $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g; 4590 4591 $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern; 4592 4593 # allow exact number of intermediate spaces to vary 4594 $test_csc_prefix_pattern =~ s/\s+/\\s\+/g; 4595 4596 # make sure we have a good pattern 4597 # if we fail this we probably have an error in escaping 4598 # characters. 4599 eval "'##'=~/$test_csc_prefix_pattern/"; 4600 if ($@) { 4601 4602 # shouldn't happen..must have screwed up escaping, above 4603 report_definite_bug(); 4604 print STDERR 4605"Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n"; 4606 4607 # just warn and keep going with defaults 4608 print STDERR "Please consider using a simpler -cscp prefix\n"; 4609 print STDERR "Using default -cscp instead; please check output\n"; 4610 } 4611 else { 4612 $csc_prefix = $test_csc_prefix; 4613 $csc_prefix_pattern = $test_csc_prefix_pattern; 4614 } 4615 } 4616 $rOpts->{'closing-side-comment-prefix'} = $csc_prefix; 4617 $closing_side_comment_prefix_pattern = $csc_prefix_pattern; 4618} 4619 4620sub dump_want_left_space { 4621 my $fh = shift; 4622 local $" = "\n"; 4623 print $fh <<EOM; 4624These values are the main control of whitespace to the left of a token type; 4625They may be altered with the -wls parameter. 4626For a list of token types, use perltidy --dump-token-types (-dtt) 4627 1 means the token wants a space to its left 4628-1 means the token does not want a space to its left 4629--------------------------------------------------------------- 4630EOM 4631 foreach my $i ( sort keys %want_left_space ) { 4632 print $fh "$i\t$want_left_space{$i}\n"; 4633 } 4634} 4635 4636sub dump_want_right_space { 4637 my $fh = shift; 4638 local $" = "\n"; 4639 print $fh <<EOM; 4640These values are the main control of whitespace to the right of a token type; 4641They may be altered with the -wrs parameter. 4642For a list of token types, use perltidy --dump-token-types (-dtt) 4643 1 means the token wants a space to its right 4644-1 means the token does not want a space to its right 4645--------------------------------------------------------------- 4646EOM 4647 foreach my $i ( sort keys %want_right_space ) { 4648 print $fh "$i\t$want_right_space{$i}\n"; 4649 } 4650} 4651 4652sub is_essential_whitespace { 4653 4654 # Essential whitespace means whitespace which cannot be safely deleted. 4655 # We are given three tokens and their types: 4656 # ($tokenl, $typel) is the token to the left of the space in question 4657 # ($tokenr, $typer) is the token to the right of the space in question 4658 # ($tokenll, $typell) is previous nonblank token to the left of $tokenl 4659 # 4660 # This is a slow routine but is needed too often except when -mangle 4661 # is used. 4662 my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_; 4663 4664 # never combine two bare words or numbers 4665 my $result = ( ( $tokenr =~ /^[\'\w]/ ) && ( $tokenl =~ /[\'\w]$/ ) ) 4666 4667 # do not combine a number with a concatination dot 4668 # example: pom.caputo: 4669 # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n"); 4670 || ( ( $typel eq 'n' ) && ( $tokenr eq '.' ) ) 4671 || ( ( $typer eq 'n' ) && ( $tokenl eq '.' ) ) 4672 4673 # do not join a minus with a bare word, because you might form 4674 # a file test operator. Example from Complex.pm: 4675 # if (CORE::abs($z - i) < $eps); "z-i" would be taken as a file test. 4676 || ( ( $tokenl eq '-' ) && ( $tokenr =~ /^[_A-Za-z]$/ ) ) 4677 4678 # and something like this could become ambiguous without space 4679 # after the '-': 4680 # use constant III=>1; 4681 # $a = $b - III; 4682 # and even this: 4683 # $a = - III; 4684 || ( ( $tokenl eq '-' ) 4685 && ( $typer =~ /^[wC]$/ && $tokenr =~ /^[_A-Za-z]/ ) ) 4686 4687 # '= -' should not become =- or you will get a warning 4688 # about reversed -= 4689 # || ($tokenr eq '-') 4690 4691 # keep a space between a quote and a bareword to prevent the 4692 # bareword from becomming a quote modifier. 4693 || ( ( $typel eq 'Q' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) ) 4694 4695 # perl is very fussy about spaces before << 4696 || ( $tokenr =~ /^\<\</ ) 4697 4698 # avoid combining tokens to create new meanings. Example: 4699 # $a+ +$b must not become $a++$b 4700 || ( $is_digraph{ $tokenl . $tokenr } ) 4701 || ( $is_trigraph{ $tokenl . $tokenr } ) 4702 4703 # another example: do not combine these two &'s: 4704 # allow_options & &OPT_EXECCGI 4705 || ( $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) } ) 4706 4707 # don't combine $$ or $# with any alphanumeric 4708 # (testfile mangle.t with --mangle) 4709 || ( ( $tokenl =~ /^\$[\$\#]$/ ) && ( $tokenr =~ /^\w/ ) ) 4710 4711 # retain any space after possible filehandle 4712 # (testfiles prnterr1.t with --extrude and mangle.t with --mangle) 4713 || ( $typel eq 'Z' || $typell eq 'Z' ) 4714 4715 # keep any space between filehandle and paren: 4716 # file mangle.t with --mangle: 4717 || ( $typel eq 'Y' && $tokenr eq '(' ) 4718 4719 # retain any space after here doc operator ( hereerr.t) 4720 || ( $typel eq 'h' ) 4721 4722 # FIXME: this needs some further work; extrude.t has test cases 4723 # it is safest to retain any space after start of ? : operator 4724 # because of perl's quirky parser. 4725 # ie, this line will fail if you remove the space after the '?': 4726 # $b=join $comma ? ',' : ':', @_; # ok 4727 # $b=join $comma ?',' : ':', @_; # error! 4728 # but this is ok :) 4729 # $b=join $comma?',' : ':', @_; # not a problem! 4730 ## || ($typel eq '?') 4731 4732 # be careful with a space around ++ and --, to avoid ambiguity as to 4733 # which token it applies 4734 || ( ( $typer =~ /^(pp|mm)$/ ) && ( $tokenl !~ /^[\;\{\(\[]/ ) ) 4735 || ( ( $typel =~ /^(\+\+|\-\-)$/ ) && ( $tokenr !~ /^[\;\}\)\]]/ ) ) 4736 4737 # need space after foreach my; for example, this will fail in 4738 # older versions of Perl: 4739 # foreach my$ft(@filetypes)... 4740 || ( $tokenl eq 'my' 4741 && $tokenll =~ /^(for|foreach)$/ 4742 && $tokenr =~ /^\$/ ) 4743 4744 # must have space between grep and left paren; "grep(" will fail 4745 || ( $tokenr eq '(' && $tokenl =~ /^(sort|grep|map)$/ ) 4746 4747 # don't stick numbers next to left parens, as in: 4748 #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm) 4749 || ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) ) 4750 4751 # don't join something like: for bla::bla:: abc 4752 # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl 4753 || ( $tokenl =~ /\:\:$/ && ( $tokenr =~ /^[\'\w]/ ) ) 4754 ; # the value of this long logic sequence is the result we want 4755 return $result; 4756} 4757 4758sub set_white_space_flag { 4759 4760=pod 4761 4762 This routine examines each pair of nonblank tokens and 4763 sets values for array @white_space_flag. 4764 4765 $white_space_flag[$j] is a flag indicating whether a white space 4766 BEFORE token $j is needed, with the following values: 4767 4768 -1 do not want a space before token $j 4769 0 optional space or $j is a whitespace 4770 1 want a space before token $j 4771 4772 4773 The values for the first token will be defined based 4774 upon the contents of the "to_go" output array. 4775 4776 Note: retain debug print statements because they are usually 4777 required after adding new token types. 4778 4779=cut 4780 4781 BEGIN { 4782 4783 # initialize these global hashes, which control the use of 4784 # whitespace around tokens: 4785 # 4786 # %binary_ws_rules 4787 # %want_left_space 4788 # %want_right_space 4789 # %space_before_paren 4790 # 4791 # Many token types are identical to the tokens themselves. 4792 # See the tokenizer for a complete list. Here are some special types: 4793 # k = perl keyword 4794 # f = semicolon in for statement 4795 # m = unary minus 4796 # p = unary plus 4797 # Note that :: is excluded since it should be contained in an identifier 4798 # Note that '->' is excluded because it never gets space 4799 # parentheses and brackets are excluded since they are handled specially 4800 # curly braces are included but may be overridden by logic, such as 4801 # newline logic. 4802 4803 # NEW_TOKENS: create a whitespace rule here. This can be as 4804 # simple as adding your new letter to @spaces_both_sides, for 4805 # example. 4806 4807 my @spaces_both_sides = qw" 4808 + - * / % ? = . : x < > | & ^ .. << >> ** && .. || => += -= 4809 .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= 4810 &&= ||= <=> k f w F n C Y U G v 4811 "; 4812 4813 my @spaces_left_side = qw" 4814 t ! ~ m p { \ h pp mm Z j 4815 "; 4816 push ( @spaces_left_side, '#' ); # avoids warning message 4817 4818 my @spaces_right_side = qw" 4819 ; } ) ] R J ++ -- **= 4820 "; 4821 push ( @spaces_right_side, ',' ); # avoids warning message 4822 my @space_before_paren = qw( 4823 my local and or eq ne if else elsif until unless while 4824 for foreach push return shift unshift pop join split die 4825 ); 4826 @want_left_space{@spaces_both_sides} = (1) x scalar(@spaces_both_sides); 4827 @want_right_space{@spaces_both_sides} = 4828 (1) x scalar(@spaces_both_sides); 4829 @want_left_space{@spaces_left_side} = (1) x scalar(@spaces_left_side); 4830 @want_right_space{@spaces_left_side} = (-1) x scalar(@spaces_left_side); 4831 @want_left_space{@spaces_right_side} = 4832 (-1) x scalar(@spaces_right_side); 4833 @want_right_space{@spaces_right_side} = 4834 (1) x scalar(@spaces_right_side); 4835 @space_before_paren{@space_before_paren} = 4836 (1) x scalar(@space_before_paren); 4837 $want_left_space{'L'} = WS_NO; 4838 $want_left_space{'->'} = WS_NO; 4839 $want_right_space{'->'} = WS_NO; 4840 $want_left_space{'**'} = WS_NO; 4841 $want_right_space{'**'} = WS_NO; 4842 4843 # hash type information must stay tightly bound 4844 # as in : ${xxxx} 4845 $binary_ws_rules{'i'}{'L'} = WS_NO; 4846 $binary_ws_rules{'i'}{'{'} = WS_YES; 4847 $binary_ws_rules{'k'}{'{'} = WS_YES; 4848 $binary_ws_rules{'U'}{'{'} = WS_YES; 4849 $binary_ws_rules{'i'}{'['} = WS_NO; 4850 $binary_ws_rules{'R'}{'L'} = WS_NO; 4851 $binary_ws_rules{'R'}{'{'} = WS_NO; 4852 $binary_ws_rules{'t'}{'L'} = WS_NO; 4853 $binary_ws_rules{'t'}{'{'} = WS_NO; 4854 $binary_ws_rules{'}'}{'L'} = WS_NO; 4855 $binary_ws_rules{'}'}{'{'} = WS_NO; 4856 $binary_ws_rules{'$'}{'L'} = WS_NO; 4857 $binary_ws_rules{'$'}{'{'} = WS_NO; 4858 $binary_ws_rules{'@'}{'L'} = WS_NO; 4859 $binary_ws_rules{'@'}{'{'} = WS_NO; 4860 $binary_ws_rules{'='}{'L'} = WS_YES; 4861 4862 # the following includes ') {' 4863 # as in : if ( xxx ) { yyy } 4864 $binary_ws_rules{']'}{'L'} = WS_NO; 4865 $binary_ws_rules{']'}{'{'} = WS_NO; 4866 $binary_ws_rules{')'}{'{'} = WS_YES; 4867 $binary_ws_rules{')'}{'['} = WS_NO; 4868 $binary_ws_rules{']'}{'['} = WS_NO; 4869 $binary_ws_rules{']'}{'{'} = WS_NO; 4870 $binary_ws_rules{'}'}{'['} = WS_NO; 4871 $binary_ws_rules{'R'}{'['} = WS_NO; 4872 4873 $binary_ws_rules{']'}{'++'} = WS_NO; 4874 $binary_ws_rules{']'}{'--'} = WS_NO; 4875 $binary_ws_rules{')'}{'++'} = WS_NO; 4876 $binary_ws_rules{')'}{'--'} = WS_NO; 4877 4878 $binary_ws_rules{'R'}{'++'} = WS_NO; 4879 $binary_ws_rules{'R'}{'--'} = WS_NO; 4880 4881 $binary_ws_rules{'k'}{':'} = WS_NO; # keep colon with label 4882 $binary_ws_rules{'w'}{':'} = WS_NO; 4883 $binary_ws_rules{'i'}{'Q'} = WS_YES; 4884 $binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()' 4885 4886 # FIXME: we need to split 'i' into variables and functions 4887 # and have no space for functions but space for variables. For now, 4888 # I have a special patch in the special rules below 4889 $binary_ws_rules{'i'}{'('} = WS_NO; 4890 4891 $binary_ws_rules{'w'}{'('} = WS_NO; 4892 $binary_ws_rules{'w'}{'{'} = WS_YES; 4893 } 4894 my ( $jmax, $rtokens, $rtoken_type, $rblock_type ) = @_; 4895 my ( $last_token, $last_type, $last_block_type, $token, $type, 4896 $block_type ); 4897 my (@white_space_flag); 4898 my $j_tight_closing_paren = -1; 4899 4900 if ( $max_index_to_go >= 0 ) { 4901 $token = $tokens_to_go[$max_index_to_go]; 4902 $type = $types_to_go[$max_index_to_go]; 4903 $block_type = $block_type_to_go[$max_index_to_go]; 4904 } 4905 else { 4906 $token = ' '; 4907 $type = 'b'; 4908 $block_type = ''; 4909 } 4910 4911 # loop over all tokens 4912 my ( $j, $ws ); 4913 4914 for ( $j = 0 ; $j <= $jmax ; $j++ ) { 4915 4916 if ( $$rtoken_type[$j] eq 'b' ) { 4917 $white_space_flag[$j] = WS_OPTIONAL; 4918 next; 4919 } 4920 4921 # set a default value, to be changed as needed 4922 $ws = undef; 4923 $last_token = $token; 4924 $last_type = $type; 4925 $last_block_type = $block_type; 4926 $token = $$rtokens[$j]; 4927 $type = $$rtoken_type[$j]; 4928 $block_type = $$rblock_type[$j]; 4929 4930 #--------------------------------------------------------------- 4931 # section 1: 4932 # handle space on the inside of opening braces 4933 #--------------------------------------------------------------- 4934 if ( ( $last_type =~ /^[L\{\(\[]$/ ) ) { 4935 4936 $j_tight_closing_paren = -1; 4937 4938 # let's keep empty matched braces together: () {} [] 4939 # except for BLOCKS 4940 if ( $token eq $matching_token{$last_token} ) { 4941 if ($block_type) { 4942 $ws = WS_YES; 4943 } 4944 else { 4945 $ws = WS_NO; 4946 } 4947 } 4948 else { 4949 4950 # we're considering the right of an opening brace 4951 # tightness = 0 means always pad inside with space 4952 # tightness = 1 means pad inside if "complex" 4953 # tightness = 2 means never pad inside with space 4954 4955 my $tightness; 4956 if ( $last_type eq '{' 4957 && $last_token eq '{' 4958 && $last_block_type ) 4959 { 4960 $tightness = $rOpts->{'block-brace-tightness'}; 4961 } 4962 else { $tightness = $tightness{$last_token} } 4963 4964 if ( $tightness <= 0 ) { 4965 $ws = WS_YES; 4966 } 4967 elsif ( $tightness > 1 ) { 4968 $ws = WS_NO; 4969 } 4970 else { 4971 my $j_next = 4972 ( $$rtoken_type[ $j + 1 ] eq 'b' ) ? $j + 2 : $j + 1; 4973 my $tok_next = $$rtokens[$j_next]; 4974 my $type_next = $$rtoken_type[$j_next]; 4975 4976 # for tightness = 1, if there is just one token 4977 # within the matching pair, we will keep it tight 4978 if ( 4979 $tok_next eq $matching_token{$last_token} 4980 4981 # but watch out for this: [ [ ] (misc.t) 4982 && $last_token ne $token 4983 ) 4984 { 4985 4986 # remember where to put the space for the closing paren 4987 $j_tight_closing_paren = $j_next; 4988 $ws = WS_NO; 4989 } 4990 else { 4991 $ws = WS_YES; 4992 } 4993 } 4994 } 4995 } # done with opening braces and brackets 4996 my $ws_1 = $ws; # for debugging 4997 4998 #--------------------------------------------------------------- 4999 # section 2: 5000 # handle space on inside of closing brace pairs 5001 #--------------------------------------------------------------- 5002 if ( $type =~ /[\}\)\]R]/ ) { 5003 5004 if ( $j == $j_tight_closing_paren ) { 5005 5006 $j_tight_closing_paren = -1; 5007 $ws = WS_NO; 5008 } 5009 else { 5010 5011 if ( !defined($ws) ) { 5012 5013 my $tightness; 5014 if ( $type eq '}' && $token eq '}' && $block_type ) { 5015 $tightness = $rOpts->{'block-brace-tightness'}; 5016 } 5017 else { $tightness = $tightness{$token} } 5018 5019 $ws = ( $tightness > 1 ) ? WS_NO : WS_YES; 5020 } 5021 } 5022 } 5023 5024 my $ws_2 = $ws; # for debugging 5025 5026 #--------------------------------------------------------------- 5027 # section 3: 5028 # use the binary table 5029 #--------------------------------------------------------------- 5030 if ( !defined($ws) ) { 5031 $ws = $binary_ws_rules{$last_type}{$type}; 5032 } 5033 my $ws_3 = $ws; # for debugging 5034 5035 #--------------------------------------------------------------- 5036 # section 4: 5037 # some special cases 5038 #--------------------------------------------------------------- 5039 if ( $token eq '(' ) { 5040 5041 # This will have to be tweaked as tokenization changes. 5042 # We want a space after certain block types: 5043 # map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s ); 5044 # 5045 # But not others: 5046 # &{ $_->[1] } ( delete $_[$#_]{ $_->[0] } ); 5047 # At present, the & block is not marked as a code block, so 5048 # this works: 5049 if ( $last_type eq '}' ) { 5050 if ( $last_block_type =~ /^(sort|map|grep)$/ ) { 5051 $ws = WS_YES; 5052 } 5053 else { 5054 $ws = WS_NO; 5055 } 5056 } 5057 5058 # ----------------------------------------------------- 5059 # added 'w' and 'i' checks for TESTING gnu-style update 5060 # something like: 5061 # myfun( &myfun( ->myfun( 5062 # ----------------------------------------------------- 5063 if ( ( $last_type =~ /^[wkU]$/ ) 5064 || ( $last_type eq 'i' && $last_token =~ /^(\&|->)/ ) ) 5065 { 5066 5067 # Do not introduce new space between keyword or function 5068 # and ( except in special cases) because this can 5069 # introduce errors in some cases ( prnterr1.t ) 5070 unless ( $space_before_paren{$last_token} ) { 5071 $ws = WS_NO; 5072 } 5073 } 5074 5075 # space between something like $i and ( in 5076 # for $i ( 0 .. 20 ) { 5077 # FIXME: eventually, type 'i' needs to be split into multiple 5078 # token types so this can be a hardwired rule. 5079 elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) { 5080 $ws = WS_YES; 5081 } 5082 } 5083 5084 # keep space between 'sub' and '{' for anonymous sub definition 5085 if ( $type eq '{' ) { 5086 if ( $last_token eq 'sub' ) { 5087 $ws = WS_YES; 5088 } 5089 5090 # this is needed to avoid no space in '){' 5091 if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES } 5092 5093 # avoid any space before the brace or bracket in something like 5094 # @opts{'a','b',...} 5095 if ( $last_type eq 'i' && $last_token =~ /^\@/ ) { 5096 $ws = WS_NO; 5097 } 5098 } 5099 5100 elsif ( $type eq 'i' ) { 5101 5102 # never a space before -> 5103 if ( $token =~ /^\-\>/ ) { 5104 $ws = WS_NO; 5105 } 5106 } 5107 5108 # retain any space between '-' and bare word 5109 elsif ( $type eq 'w' || $type eq 'C' ) { 5110 $ws = WS_OPTIONAL if $last_type eq '-'; 5111 } 5112 5113 # always space before side comment 5114 elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 } 5115 5116 # always preserver whatever space was used after a possible 5117 # filehandle or here doc operator 5118 if ( $type ne '#' && ( $last_type eq 'Z' || $last_type eq 'h' ) ) { 5119 $ws = WS_OPTIONAL; 5120 } 5121 5122 my $ws_4 = $ws; 5123 5124 #--------------------------------------------------------------- 5125 # section 5: 5126 # default rules not covered above 5127 #--------------------------------------------------------------- 5128 # if we fall through to here, 5129 # look at the pre-defined hash tables for the two tokens, and 5130 # if (they are equal) use the common value 5131 # if (either is zero or undef) use the other 5132 # if (either is -1) use it 5133 # That is, 5134 # left vs right 5135 # 1 vs 1 --> 1 5136 # 0 vs 0 --> 0 5137 # -1 vs -1 --> -1 5138 # 0 vs -1 --> -1 5139 # 0 vs 1 --> 1 5140 # 1 vs 0 --> 1 5141 # -1 vs 0 --> -1 5142 # -1 vs 1 --> -1 5143 # 1 vs -1 --> -1 5144 if ( !defined($ws) ) { 5145 my $wl = $want_left_space{$type}; 5146 my $wr = $want_right_space{$last_type}; 5147 if ( !defined($wl) ) { $wl = 0 } 5148 if ( !defined($wr) ) { $wr = 0 } 5149 $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr; 5150 } 5151 5152 if ( !defined($ws) ) { 5153 $ws = 0; 5154 write_diagnostics( 5155 "WS flag is undefined for tokens $last_token $token\n"); 5156 } 5157 5158 if ( ( $ws == 0 ) 5159 && $j > 0 5160 && $j < $jmax 5161 && ( $last_type !~ /^[Zh]$/ ) ) 5162 { 5163 5164 # If this happens, we have a non-fatal but undesirable 5165 # hole in the above rules which should be patched. 5166 write_diagnostics( 5167 "WS flag is zero for tokens $last_token $token\n"); 5168 } 5169 $white_space_flag[$j] = $ws; 5170 5171 FORMATTER_DEBUG_FLAG_WHITE && do { 5172 my $str = substr( $last_token, 0, 15 ); 5173 $str .= ' ' x ( 16 - length($str) ); 5174 if ( !defined($ws_1) ) { $ws_1 = "*" } 5175 if ( !defined($ws_2) ) { $ws_2 = "*" } 5176 if ( !defined($ws_3) ) { $ws_3 = "*" } 5177 if ( !defined($ws_4) ) { $ws_4 = "*" } 5178 print 5179"WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n"; 5180 }; 5181 } 5182 return \@white_space_flag; 5183} 5184 5185{ # begin closure print_line_of_tokens 5186 5187 my $rtoken_type; 5188 my $rtokens; 5189 my $rlevels; 5190 my $rslevels; 5191 my $rblock_type; 5192 my $rcontainer_type; 5193 my $rcontainer_environment; 5194 my $rtype_sequence; 5195 my $input_line; 5196 my $rnesting_tokens; 5197 my $rci_levels; 5198 my $rnesting_blocks; 5199 5200 my $in_quote; 5201 my $python_indentation_level; 5202 5203 # These local token variables are stored by store_token_to_go: 5204 my $block_type; 5205 my $ci_level; 5206 my $container_environment; 5207 my $container_type; 5208 my $in_continued_quote; 5209 my $level; 5210 my $nesting_blocks; 5211 my $no_internal_newlines; 5212 my $slevel; 5213 my $token; 5214 my $type; 5215 my $type_sequence; 5216 5217 # routine to pull the jth token from the line of tokens 5218 sub extract_token { 5219 my $j = shift; 5220 $token = $$rtokens[$j]; 5221 $type = $$rtoken_type[$j]; 5222 $block_type = $$rblock_type[$j]; 5223 $container_type = $$rcontainer_type[$j]; 5224 $container_environment = $$rcontainer_environment[$j]; 5225 $type_sequence = $$rtype_sequence[$j]; 5226 $level = $$rlevels[$j]; 5227 $slevel = $$rslevels[$j]; 5228 $nesting_blocks = $$rnesting_blocks[$j]; 5229 $ci_level = $$rci_levels[$j]; 5230 } 5231 5232 # routines to save and restore the current token 5233 { 5234 5235 # Saved values 5236 my $saved_block_type; 5237 my $saved_ci_level; 5238 my $saved_container_environment; 5239 my $saved_container_type; 5240 my $saved_in_continued_quote; 5241 my $saved_level; 5242 my $saved_nesting_blocks; 5243 my $saved_no_internal_newlines; 5244 my $saved_slevel; 5245 my $saved_token; 5246 my $saved_type; 5247 my $saved_type_sequence; 5248 5249 sub save_current_token { 5250 $saved_block_type = $block_type; 5251 $saved_ci_level = $ci_level; 5252 $saved_container_environment = $container_environment; 5253 $saved_container_type = $container_type; 5254 $saved_in_continued_quote = $in_continued_quote; 5255 $saved_level = $level; 5256 $saved_nesting_blocks = $nesting_blocks; 5257 $saved_no_internal_newlines = $no_internal_newlines; 5258 $saved_slevel = $slevel; 5259 $saved_token = $token; 5260 $saved_type = $type; 5261 $saved_type_sequence = $type_sequence; 5262 } 5263 5264 sub restore_current_token { 5265 $block_type = $saved_block_type; 5266 $ci_level = $saved_ci_level; 5267 $container_environment = $saved_container_environment; 5268 $container_type = $saved_container_type; 5269 $in_continued_quote = $saved_in_continued_quote; 5270 $level = $saved_level; 5271 $nesting_blocks = $saved_nesting_blocks; 5272 $no_internal_newlines = $saved_no_internal_newlines; 5273 $slevel = $saved_slevel; 5274 $token = $saved_token; 5275 $type = $saved_type; 5276 $type_sequence = $saved_type_sequence; 5277 } 5278 } 5279 5280 # Routine to place the current token into the output stream. 5281 # Called once per output token. 5282 sub store_token_to_go { 5283 5284 my $flag = $no_internal_newlines; 5285 if ( $_[0] ) { $flag = 1 } 5286 5287 $tokens_to_go[ ++$max_index_to_go ] = $token; 5288 $types_to_go[$max_index_to_go] = $type; 5289 $nobreak_to_go[$max_index_to_go] = $flag; 5290 $old_breakpoint_to_go[$max_index_to_go] = 0; 5291 $forced_breakpoint_to_go[$max_index_to_go] = 0; 5292 $block_type_to_go[$max_index_to_go] = $block_type; 5293 $type_sequence_to_go[$max_index_to_go] = $type_sequence; 5294 $container_environment_to_go[$max_index_to_go] = $container_environment; 5295 $nesting_blocks_to_go[$max_index_to_go] = $nesting_blocks; 5296 $ci_levels_to_go[$max_index_to_go] = $ci_level; 5297 $mate_index_to_go[$max_index_to_go] = -1; 5298 5299 if ( $type ne 'b' ) { 5300 $last_last_nonblank_index_to_go = $last_nonblank_index_to_go; 5301 $last_last_nonblank_type_to_go = $last_nonblank_type_to_go; 5302 $last_last_nonblank_token_to_go = $last_nonblank_token_to_go; 5303 $last_nonblank_index_to_go = $max_index_to_go; 5304 $last_nonblank_type_to_go = $type; 5305 $last_nonblank_token_to_go = $token; 5306 } 5307 5308 $levels_to_go[$max_index_to_go] = $level; 5309 $nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0; 5310 $lengths_to_go[ $max_index_to_go + 1 ] = 5311 $lengths_to_go[$max_index_to_go] + length($token); 5312 5313 # Define the indentation that this token would have if it started 5314 # a new line. We have to do this now because we need to know this 5315 # when considering one-line blocks. 5316 set_leading_whitespace( $level, $ci_level, $in_continued_quote ); 5317 5318 FORMATTER_DEBUG_FLAG_STORE && do { 5319 my ( $a, $b, $c ) = caller(); 5320 print 5321"STORE: from $a $c: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n"; 5322 }; 5323 }; 5324 5325 sub insert_new_token_to_go { 5326 5327 # insert a new token into the output stream. use same level as 5328 # previous token; assumes a character at max_index_to_go. 5329 save_current_token(); 5330 ( $token, $type, $slevel, $no_internal_newlines ) = @_; 5331 5332 if ( $max_index_to_go == UNDEFINED_INDEX ) { 5333 warning("code bug: bad call to insert_new_token_to_go\n"); 5334 } 5335 $level = $levels_to_go[$max_index_to_go]; 5336 5337 # FIXME: it seems to be necessary to use the next, rather than 5338 # previous, value of this variable when creating a new blank (align.t) 5339 #my $slevel = $nesting_depth_to_go[$max_index_to_go]; 5340 $nesting_blocks = $nesting_blocks_to_go[$max_index_to_go]; 5341 $ci_level = $ci_levels_to_go[$max_index_to_go]; 5342 $container_environment = $container_environment_to_go[$max_index_to_go]; 5343 $in_continued_quote = 0; 5344 $block_type = ""; 5345 $type_sequence = ""; 5346 store_token_to_go(); 5347 restore_current_token(); 5348 return; 5349 } 5350 5351 sub print_line_of_tokens { 5352 5353 my $line_of_tokens = shift; 5354 5355 # this routine is called once per input line to process all of the 5356 # tokens on that line. Each token is sent to sub store_token_to_go, 5357 # which stores them in an output buffer which is dumped whenever 5358 # appropriate. 5359 5360 # extract input line number for error messages 5361 $input_line_number = $line_of_tokens->{_line_number}; 5362 5363 $rtoken_type = $line_of_tokens->{_rtoken_type}; 5364 $rtokens = $line_of_tokens->{_rtokens}; 5365 $rlevels = $line_of_tokens->{_rlevels}; 5366 $rslevels = $line_of_tokens->{_rslevels}; 5367 $rblock_type = $line_of_tokens->{_rblock_type}; 5368 $rcontainer_type = $line_of_tokens->{_rcontainer_type}; 5369 $rcontainer_environment = $line_of_tokens->{_rcontainer_environment}; 5370 $rtype_sequence = $line_of_tokens->{_rtype_sequence}; 5371 $input_line = $line_of_tokens->{_line_text}; 5372 $rnesting_tokens = $line_of_tokens->{_rnesting_tokens}; 5373 $rci_levels = $line_of_tokens->{_rci_levels}; 5374 $rnesting_blocks = $line_of_tokens->{_rnesting_blocks}; 5375 5376 $in_continued_quote = $line_of_tokens->{_starting_in_quote}; 5377 $in_quote = $line_of_tokens->{_ending_in_quote}; 5378 $python_indentation_level = 5379 $line_of_tokens->{_python_indentation_level}; 5380 5381 my $j; 5382 my $j_next; 5383 my $jmax; 5384 my $next_nonblank_token; 5385 my $next_nonblank_token_type; 5386 my $rwhite_space_flag; 5387 5388 $jmax = @$rtokens - 1; 5389 $block_type = ""; 5390 $container_type = ""; 5391 $container_environment = ""; 5392 $type_sequence = ""; 5393 $no_internal_newlines = 1 - $rOpts->{'add-newlines'}; 5394 5395 # Handle a continued quote.. 5396 if ($in_continued_quote) { 5397 5398 # A line which is entirely a quote or pattern must go out 5399 # verbatim. Note: the \n is contained in $input_line. 5400 if ( $jmax <= 0 ) { 5401 if ( ( $input_line =~ "\t" ) ) { 5402 note_embedded_tab(); 5403 } 5404 write_unindented_line("$input_line"); 5405 $last_line_had_side_comment = 0; 5406 return; 5407 } 5408 5409 # prior to version 20010406, perltidy had a bug which placed 5410 # continuation indentation before the last line of some multiline 5411 # quotes and patterns -- exactly the lines passing this way. 5412 # To help find affected lines in scripts run with these 5413 # versions, run with '-chk', and it will warn of any quotes or 5414 # patterns which might have been modified by these early 5415 # versions. 5416 if ( $rOpts->{'check-multiline-quotes'} && $input_line =~ /^ / ) { 5417 warning( 5418"-chk: please check this line for extra leading whitespace\n" 5419 ); 5420 } 5421 } 5422 5423 # delete trailing blank tokens 5424 if ( $jmax > 0 && $$rtoken_type[$jmax] eq 'b' ) { $jmax-- } 5425 5426 # Handle a blank line.. 5427 if ( $jmax < 0 ) { 5428 5429 # For the 'swallow-optional-blank-lines' option, we delete all 5430 # old blank lines and let the blank line rules generate any 5431 # needed blanks. 5432 if ( !$rOpts->{'swallow-optional-blank-lines'} ) { 5433 flush(); 5434 $file_writer_object->write_blank_code_line(); 5435 $last_line_leading_type = 'b'; 5436 } 5437 $last_line_had_side_comment = 0; 5438 return; 5439 } 5440 5441 # see if this is a static block comment (starts with ##) 5442 my $is_static_block_comment = 0; 5443 my $is_static_block_comment_without_leading_space = 0; 5444 if ( $jmax == 0 5445 && $$rtoken_type[0] eq '#' 5446 && $rOpts->{'static-block-comments'} 5447 && $input_line =~ /$static_block_comment_pattern/o ) 5448 { 5449 $is_static_block_comment = 1; 5450 $is_static_block_comment_without_leading_space = 5451 ( length($1) <= 0 ); 5452 } 5453 5454 # create a hanging side comment if appropriate 5455 if ( 5456 $jmax == 0 5457 && $$rtoken_type[0] eq '#' # only token is a comment 5458 && $last_line_had_side_comment # last line had side comment 5459 && $input_line =~ /^\s/ # there is some leading space 5460 && !$is_static_block_comment # do not make static comment hanging 5461 && $rOpts->{'hanging-side-comments'} # user is allowing this 5462 ) 5463 { 5464 5465 # We will insert an empty qw string at the start of the token list 5466 # to force this comment to be a side comment. The vertical aligner 5467 # should then line it up with the previous side comment. 5468 unshift @$rtoken_type, 'q'; 5469 unshift @$rtokens, ''; 5470 unshift @$rlevels, $$rlevels[0]; 5471 unshift @$rslevels, $$rslevels[0]; 5472 unshift @$rblock_type, ''; 5473 unshift @$rcontainer_type, ''; 5474 unshift @$rcontainer_environment, ''; 5475 unshift @$rtype_sequence, ''; 5476 unshift @$rnesting_tokens, $$rnesting_tokens[0]; 5477 unshift @$rci_levels, $$rci_levels[0]; 5478 unshift @$rnesting_blocks, $$rnesting_blocks[0]; 5479 $jmax = 1; 5480 } 5481 5482 # remember if this line has a side comment 5483 $last_line_had_side_comment = 5484 ( $jmax > 0 && $$rtoken_type[$jmax] eq '#' ); 5485 5486 # Handle a block (full-line) comment.. 5487 if ( ( $jmax == 0 ) && ( $$rtoken_type[0] eq '#' ) ) { 5488 5489 if ( $rOpts->{'delete-block-comments'} ) { return } 5490 5491 if ( $rOpts->{'tee-block-comments'} ) { 5492 $file_writer_object->tee_on(); 5493 } 5494 flush(); 5495 5496 # output a blank line before block comments 5497 if ( 5498 $last_line_leading_type !~ /^[#b]$/ 5499 && $rOpts->{'blanks-before-comments'} # only if allowed 5500 && ! 5501 $is_static_block_comment # never before static block comments 5502 ) 5503 { 5504 $file_writer_object->write_blank_code_line(); 5505 $last_line_leading_type = 'b'; 5506 } 5507 5508 if ( $rOpts->{'indent-block-comments'} 5509 && !$is_static_block_comment_without_leading_space ) 5510 { 5511 5512 extract_token(0); 5513 store_token_to_go(); 5514 flush(); 5515 } 5516 else { 5517 $file_writer_object->write_code_line( $$rtokens[0] . "\n" ); 5518 $last_line_leading_type = '#'; 5519 } 5520 if ( $rOpts->{'tee-block-comments'} ) { 5521 $file_writer_object->tee_off(); 5522 } 5523 return; 5524 } 5525 5526 # compare input/output indentation except for continuation lines 5527 # (because they have an unknown amount of initial blank space) 5528 # and lines which are quotes (because they may have been outdented) 5529 # Note: this test is placed here because we know the continuation flag 5530 # at this point, which allows us to avoid non-meaningful checks. 5531 my $structural_indentation_level = $$rlevels[0]; 5532 compare_indentation_levels( $python_indentation_level, 5533 $structural_indentation_level ) 5534 unless ( ( $$rci_levels[0] > 0 ) 5535 || ( ( $python_indentation_level == 0 ) && $$rtoken_type[0] eq 'Q' ) 5536 ); 5537 5538=pod 5539 5540 Patch needed for MakeMaker. Do not break a statement 5541 in which $VERSION may be calculated. See MakeMaker.pm; 5542 this is based on the coding in it. 5543 The first line of a file that matches this will be eval'd: 5544 /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ 5545 Examples: 5546 *VERSION = \'1.01'; 5547 ( $VERSION ) = '$Revision: 1.1 $ ' =~ /\$Revision:\s+([^\s]+)/; 5548 We will pass such a line straight through (by changing it 5549 to a quoted string) unless -npvl is used 5550 5551 But note: this means that the formatter will not see every token, 5552 which complicates things. For example, loops which look at 5553 block sequence numbers may see a closing sequence number but not 5554 the corresponding opening sequence number (sidecmt.t). Example: 5555 5556 my $VERSION = do { my @r = (q$Revision: 1.1 $ =~ /\d+/g); sprintf 5557 "%d."."%02d" x $#r, @r }; 5558 5559 Here, the opening brace of 'do {' will not be seen, while the closing 5560 '}' will be seen as an individual token. 5561 5562=cut 5563 5564 my $is_VERSION_statement = 0; 5565 5566 if ( 5567 !$saw_VERSION_in_this_file 5568 && $input_line =~ /VERSION/ # quick check to reject most lines 5569 && $input_line =~ /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ 5570 ) 5571 { 5572 $saw_VERSION_in_this_file = 1; 5573 $is_VERSION_statement = 1; 5574 write_logfile_entry("passing VERSION line; -npvl deactivates\n"); 5575 } 5576 5577 # take care of indentation-only 5578 # also write a line which is entirely a 'qw' list 5579 if ( $is_VERSION_statement 5580 || $rOpts->{'indent-only'} 5581 || ( ( $jmax == 0 ) && ( $$rtoken_type[0] eq 'q' ) ) ) 5582 { 5583 flush(); 5584 $input_line =~ s/^\s*//; # trim left end 5585 5586 unless ( $rOpts->{'indent-only'} ) { 5587 $input_line =~ s/\s*$//; # trim right end 5588 } 5589 5590 extract_token(0); 5591 $token = $input_line; 5592 $type = 'q'; 5593 $block_type = ""; 5594 $container_type = ""; 5595 $container_environment = ""; 5596 $type_sequence = ""; 5597 store_token_to_go(); 5598 output_line_to_go(); 5599 return; 5600 } 5601 5602 push ( @$rtokens, ' ', ' ' ); # making $j+2 valid simplifies coding 5603 push ( @$rtoken_type, 'b', 'b' ); 5604 ($rwhite_space_flag) = 5605 set_white_space_flag( $jmax, $rtokens, $rtoken_type, $rblock_type ); 5606 5607 # find input tabbing to allow checks for tabbing disagreement 5608 $input_line_tabbing = ""; 5609 if ( $input_line =~ /^(\s*)/ ) { $input_line_tabbing = $1; } 5610 5611 # if the buffer hasn't been flushed, add a leading space if 5612 # necessary to keep essential whitespace. This is really only 5613 # necessary if we are squeezing out all ws. 5614 if ( $max_index_to_go >= 0 ) { 5615 5616 $old_line_count_in_batch++; 5617 5618 if ( 5619 is_essential_whitespace( 5620 $last_last_nonblank_token, 5621 $last_last_nonblank_type, 5622 $tokens_to_go[$max_index_to_go], 5623 $types_to_go[$max_index_to_go], 5624 $$rtokens[0], 5625 $$rtoken_type[0] 5626 ) 5627 ) 5628 { 5629 my $slevel = $$rslevels[0]; 5630 insert_new_token_to_go( ' ', 'b', $slevel, 5631 $no_internal_newlines ); 5632 } 5633 } 5634 5635 # If we just saw the end of an elsif block, write nag message 5636 # if we do not see another elseif or an else. 5637 if ($looking_for_else) { 5638 5639 unless ( $$rtokens[0] =~ /^(elsif|else)$/ ) { 5640 write_logfile_entry("(No else block)\n"); 5641 } 5642 $looking_for_else = 0; 5643 } 5644 5645 # This is a good place to kill incomplete one-line blocks 5646 if ( ( $semicolons_before_block_self_destruct == 0 ) 5647 && ( $max_index_to_go >= 0 ) 5648 && ( $types_to_go[$max_index_to_go] eq ';' ) 5649 && ( $$rtokens[0] ne '}' ) ) 5650 { 5651 destroy_one_line_block(); 5652 output_line_to_go(); 5653 } 5654 5655 # loop to process the tokens one-by-one 5656 $type = 'b'; 5657 $token = ""; 5658 5659 for ( $j = 0 ; $j <= $jmax ; $j++ ) { 5660 5661 # pull out the local values for this token 5662 extract_token($j); 5663 5664 if ( $type eq '#' ) { 5665 5666 # trim trailing whitespace 5667 # (there is no option at present to prevent this) 5668 $token =~ s/\s*$//; 5669 5670 if ( 5671 $rOpts->{'delete-side-comments'} 5672 5673 # delete closing side comments if necessary 5674 || ( $rOpts->{'delete-closing-side-comments'} 5675 && $token =~ /$closing_side_comment_prefix_pattern/o 5676 && $last_nonblank_block_type =~ 5677 /$closing_side_comment_list_pattern/o ) 5678 ) 5679 { 5680 if ( $types_to_go[$max_index_to_go] eq 'b' ) { 5681 unstore_token_to_go(); 5682 } 5683 last; 5684 } 5685 } 5686 5687 # If we are continuing after seeing a right curly brace, flush 5688 # buffer unless we see what we are looking for, as in 5689 # } else ... 5690 if ( $brace_follower_pattern && $type ne 'b' ) { 5691 5692 unless ( $token =~ /$brace_follower_pattern/ ) { 5693 output_line_to_go(); 5694 } 5695 $brace_follower_pattern = undef; 5696 } 5697 5698 $j_next = ( $$rtoken_type[ $j + 1 ] eq 'b' ) ? $j + 2 : $j + 1; 5699 $next_nonblank_token = $$rtokens[$j_next]; 5700 $next_nonblank_token_type = $$rtoken_type[$j_next]; 5701 5702 #-------------------------------------------------------- 5703 # Start of patch section 5704 #-------------------------------------------------------- 5705 5706 # Modify certain tokens here for whitespace 5707 # The following is not yet done, but could be: 5708 # sub (x x x) 5709 # These become type 'i', space and all. 5710 if ( $type eq 'i' or $type eq 't' ) { 5711 5712 # change "$ var" to "$var" etc 5713 if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) { 5714 $token =~ s/\s*//g; 5715 } 5716 5717 if ( $token =~ /^sub/ ) { $token =~ s/\s+/ /g } 5718 } 5719 5720 # change 'LABEL :' to 'LABEL:' 5721 elsif ( $type eq 'J' ) { $token =~ s/\s+//g } 5722 5723 # patch to add space to something like "x10" 5724 # This avoids having to split this token in the pre-tokenizer 5725 elsif ( $type eq 'n' ) { 5726 if ( $token =~ /^x\d+/ ) { $token =~ s/x/x / } 5727 } 5728 5729 elsif ( $type eq 'Q' ) { 5730 note_embedded_tab() if ( $token =~ "\t" ); 5731 } 5732 5733 # trim blanks from right of qw quotes 5734 # (To avoid trimming qw quotes use -ntqw; the tokenizer handles this) 5735 elsif ( $type eq 'q' ) { 5736 $token =~ s/\s*$//; 5737 note_embedded_tab() if ( $token =~ "\t" ); 5738 } 5739 5740 #-------------------------------------------------------- 5741 # End of patch section 5742 #-------------------------------------------------------- 5743 5744 # insert any needed whitespace 5745 if ( ( $type ne 'b' ) 5746 && ( $max_index_to_go >= 0 ) 5747 && ( $types_to_go[$max_index_to_go] ne 'b' ) 5748 && $rOpts_add_whitespace ) 5749 { 5750 my $ws = $$rwhite_space_flag[$j]; 5751 5752 if ( $ws == 1 ) { 5753 insert_new_token_to_go( ' ', 'b', $slevel, 5754 $no_internal_newlines ); 5755 } 5756 } 5757 5758 # Do not allow breaks which would promote a side comment to a 5759 # block comment. In order to allow a break before an opening 5760 # or closing BLOCK, followed by a side comment, those sections 5761 # of code will handle this flag separately. 5762 my $side_comment_follows = ( $next_nonblank_token_type eq '#' ); 5763 my $is_opening_BLOCK = 5764 ( $type eq '{' 5765 && $token eq '{' 5766 && $block_type 5767 && $block_type ne 't' ); 5768 my $is_closing_BLOCK = 5769 ( $type eq '}' 5770 && $token eq '}' 5771 && $block_type 5772 && $block_type ne 't' ); 5773 5774 if ( $side_comment_follows 5775 && !$is_opening_BLOCK 5776 && !$is_closing_BLOCK ) 5777 { 5778 $no_internal_newlines = 1; 5779 } 5780 5781 # We're only going to handle breaking for code BLOCKS at this 5782 # (top) level. Other indentation breaks will be handled by 5783 # sub scan_list, which is better suited to dealing with them. 5784 if ($is_opening_BLOCK) { 5785 5786 # Tentatively output this token. This is required before 5787 # calling starting_one_line_block. We may have to unstore 5788 # it, though, if we have to break before it. 5789 store_token_to_go($side_comment_follows); 5790 5791 # Look ahead to see if we might form a one-line block 5792 my $too_long = 5793 starting_one_line_block( $j, $jmax, $level, $slevel, 5794 $ci_level, $rtokens, $rtoken_type, $rblock_type ); 5795 clear_breakpoint_undo_stack(); 5796 5797 # to simplify the logic below, set a flag to indicate if 5798 # this opening brace is far from the keyword which introduces it 5799 my $keyword_on_same_line = 1; 5800 if ( ( $max_index_to_go >= 0 ) 5801 && ( $last_nonblank_type eq ')' ) ) 5802 { 5803 if ( $block_type =~ /^(if|else|elsif)$/ 5804 && ( $tokens_to_go[0] eq '}' ) 5805 && $rOpts_cuddled_else ) 5806 { 5807 $keyword_on_same_line = 1; 5808 } 5809 elsif ( ( $slevel < $nesting_depth_to_go[0] ) || $too_long ) 5810 { 5811 $keyword_on_same_line = 0; 5812 } 5813 } 5814 5815 # decide if user requested break before '{' 5816 my $want_break = 5817 5818 # use -bl flag if not a sub block of any type 5819 $block_type !~ /^sub/ 5820 ? $rOpts->{'opening-brace-on-new-line'} 5821 5822 # use -sbl flag unless this is an anonymous sub block 5823 : $block_type !~ /^sub\W*$/ 5824 ? $rOpts->{'opening-sub-brace-on-new-line'} 5825 5826 # do not break for anonymous subs 5827 : 0; 5828 5829 # Break before an opening '{' ... 5830 if ( 5831 5832 # if requested 5833 $want_break 5834 5835 # and we were unable to start looking for a block, 5836 # or the -nbob is in use 5837 && ( $index_start_one_line_block == UNDEFINED_INDEX 5838 || !$rOpts->{'break-after-opening-brace'} ) 5839 5840 # or if it will not be on same line as its keyword, so that 5841 # it will be outdented (eval.t, overload.t), and the user 5842 # has not insisted on keeping it on the right 5843 || ( !$keyword_on_same_line 5844 && !$rOpts->{'opening-brace-always-on-right'} ) 5845 5846 ) 5847 { 5848 5849 # but only if allowed 5850 unless ($no_internal_newlines) { 5851 5852 # since we already stored this token, we must unstore it 5853 unstore_token_to_go(); 5854 5855 # then output the line 5856 output_line_to_go(); 5857 5858 # and now store this token at the start of a new line 5859 store_token_to_go($side_comment_follows); 5860 } 5861 } 5862 5863 # Now update for side comment 5864 if ($side_comment_follows) { $no_internal_newlines = 1 } 5865 5866 # now output this line 5867 unless ( !$rOpts->{'break-after-opening-brace'} 5868 && $block_type =~ /$bli_pattern/o 5869 && $max_index_to_go == 0 ) 5870 { 5871 unless ($no_internal_newlines) { 5872 output_line_to_go(); 5873 } 5874 } 5875 } 5876 5877 elsif ($is_closing_BLOCK) { 5878 5879 # If there is a pending one-line block .. 5880 if ( $index_start_one_line_block != UNDEFINED_INDEX ) { 5881 5882 # we have to terminate it if.. 5883 if ( 5884 5885 # it is too long (final length may be different from 5886 # initial estimate). note: must allow 1 space for this token 5887 excess_line_length( $index_start_one_line_block, 5888 $max_index_to_go ) >= 0 5889 5890 # or if it has too many semicolons 5891 || ( $semicolons_before_block_self_destruct == 0 5892 && $last_nonblank_type ne ';' ) 5893 ) 5894 { 5895 destroy_one_line_block(); 5896 } 5897 } 5898 5899 # put a break before this closing curly brace if appropriate 5900 unless ( $no_internal_newlines 5901 || $index_start_one_line_block != UNDEFINED_INDEX ) 5902 { 5903 5904 # add missing semicolon if ... 5905 # there are some tokens 5906 if ( 5907 ( $max_index_to_go > 0 ) 5908 && 5909 5910 # and we don't have one 5911 ( $last_nonblank_type ne ';' ) 5912 5913 # patch until some block type issues are fixed: 5914 # Do not add semi-colon for block types '{', '}', and ';' 5915 # because we cannot be sure yet that this 5916 # is a block and not an anonomyous hash 5917 # (blktype.t, blktype1.t) 5918 && ( $block_type !~ /^[\{\};]$/ ) 5919 5920 # and we are allowed to do so. 5921 && $rOpts->{'add-semicolons'} 5922 ) 5923 { 5924 5925 save_current_token(); 5926 $token = ';'; 5927 $type = ';'; 5928 $level = $levels_to_go[$max_index_to_go]; 5929 $slevel = $nesting_depth_to_go[$max_index_to_go]; 5930 $nesting_blocks = 5931 $nesting_blocks_to_go[$max_index_to_go]; 5932 $ci_level = $ci_levels_to_go[$max_index_to_go]; 5933 $block_type = ""; 5934 $container_type = ""; 5935 $container_environment = ""; 5936 $type_sequence = ""; 5937 5938 # Note - we remove any blank AFTER extracting its 5939 # parameters such as level, etc, above 5940 if ( $types_to_go[$max_index_to_go] eq 'b' ) { 5941 unstore_token_to_go(); 5942 } 5943 store_token_to_go(); 5944 5945 note_added_semicolon(); 5946 restore_current_token(); 5947 } 5948 5949 # then write out everything before this closing curly brace 5950 output_line_to_go(); 5951 } 5952 5953 # Now update for side comment 5954 if ($side_comment_follows) { $no_internal_newlines = 1 } 5955 5956 # store the closing curly brace 5957 store_token_to_go(); 5958 5959 # ok, we just stored a closing curly brace. Often, but 5960 # not always, we want to end the line immediately. 5961 # So now we have to check for special cases. 5962 5963 # if this '}' successfully ends a one-line block.. 5964 my $is_one_line_block = 0; 5965 if ( $index_start_one_line_block != UNDEFINED_INDEX ) { 5966 5967 $is_one_line_block = 1; 5968 5969 # we have to actually make it by removing tentative 5970 # breaks that were set within it 5971 undo_forced_breakpoint_stack(0); 5972 set_nobreaks( $index_start_one_line_block, 5973 $max_index_to_go - 1 ); 5974 5975 # then re-initialize for the next one-line block 5976 destroy_one_line_block(); 5977 5978 # then decide if we want to break after the '}' .. 5979 # We will keep going to allow certain brace followers as in: 5980 # do { $ifclosed = 1; last } unless $losing; 5981 # 5982 # But make a line break if the curly ends a significant block: 5983 if ( $block_type =~ /^(until|while|for|if|elsif|else)$/ ) { 5984 output_line_to_go() unless ($no_internal_newlines); 5985 } 5986 } 5987 5988 # set string indicating what we need to look for brace follower 5989 # tokens 5990 if ( $block_type eq 'do' ) { 5991 $brace_follower_pattern = $do_follower_pattern; 5992 } 5993 elsif ( $block_type =~ /^(if|elsif|unless)$/ ) { 5994 $brace_follower_pattern = $if_brace_follower_pattern; 5995 } 5996 elsif ( $block_type eq 'else' ) { 5997 $brace_follower_pattern = $else_brace_follower_pattern; 5998 } 5999 6000 # added eval for borris.t 6001 elsif ( $block_type =~ /^(sort|map|grep|eval)$/ ) { 6002 $brace_follower_pattern = ""; 6003 } 6004 6005 # anonymous sub 6006 elsif ( $block_type =~ /^sub\W*$/ ) { 6007 6008 if ($is_one_line_block) { 6009 $brace_follower_pattern = 6010 $anon_sub_1_brace_follower_pattern; 6011 } 6012 else { 6013 $brace_follower_pattern = 6014 $anon_sub_brace_follower_pattern; 6015 } 6016 } 6017 6018 # None of the above: 6019 # include here everything you would allow to follow a short block 6020 # which is not an if/elsif/else/do/sort/map/grep/eval 6021 # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl' 6022 # break1.t 6023 else { 6024 $brace_follower_pattern = $other_brace_follower_pattern; 6025 } 6026 6027 # See if an elsif block is followed by another elsif or else; 6028 # complain if not. 6029 if ( $block_type eq 'elsif' ) { 6030 6031 if ( $next_nonblank_token_type eq 'b' ) { # end of line? 6032 $looking_for_else = 1; # ok, check on next line 6033 } 6034 else { 6035 6036 unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) { 6037 write_logfile_entry("No else block :(\n"); 6038 } 6039 } 6040 } 6041 6042 # Note: continue blocks are always un-cuddled for now, but 6043 # this is the place to allow cuddled continues 6044 6045 # keep going after these block types: map,sort,grep 6046 # added eval for borris.t 6047 if ( $block_type =~ /^(sort|grep|map|eval)$/ ) { 6048 6049 # keep going 6050 } 6051 6052 # if no more tokens, postpone decision until re-entring 6053 elsif ( ( $next_nonblank_token_type eq 'b' ) 6054 && $rOpts->{'add-newlines'} ) 6055 { 6056 unless ($brace_follower_pattern) { 6057 output_line_to_go() unless ($no_internal_newlines); 6058 } 6059 } 6060 6061 elsif ($brace_follower_pattern) { 6062 6063 unless ( $next_nonblank_token =~ /$brace_follower_pattern/ ) 6064 { 6065 output_line_to_go() unless ($no_internal_newlines); 6066 } 6067 $brace_follower_pattern = undef; 6068 } 6069 6070 else { 6071 output_line_to_go() unless ($no_internal_newlines); 6072 } 6073 6074 } # end treatment of closing block token 6075 6076 # handle semicolon 6077 elsif ( $type eq ';' ) { 6078 6079 # kill one-line blocks with too many semicolons 6080 $semicolons_before_block_self_destruct--; 6081 if ( 6082 ( $semicolons_before_block_self_destruct < 0 ) 6083 || ( $semicolons_before_block_self_destruct == 0 6084 && $next_nonblank_token_type !~ /^[b\}]$/ ) 6085 ) 6086 { 6087 destroy_one_line_block(); 6088 } 6089 6090 if ( 6091 ( $last_nonblank_token eq '}' ) 6092 && ( $last_nonblank_block_type =~ 6093 /^(if|else|elsif|unless|while|for|foreach)$/ ) 6094 ) 6095 { 6096 6097 if ( 6098 $rOpts->{'delete-semicolons'} 6099 6100 # don't delete ; before a # because it would promote it 6101 # to a block comment 6102 && ( $next_nonblank_token_type ne '#' ) 6103 ) 6104 { 6105 note_deleted_semicolon(); 6106 output_line_to_go() 6107 unless ( $no_internal_newlines 6108 || $index_start_one_line_block != UNDEFINED_INDEX ); 6109 next; 6110 } 6111 else { 6112 write_logfile_entry("Extra ';'\n"); 6113 } 6114 } 6115 store_token_to_go(); 6116 6117 output_line_to_go() 6118 unless ( $no_internal_newlines 6119 || ( $next_nonblank_token eq '}' ) ); 6120 6121 } 6122 6123 # handle here_doc target string 6124 elsif ( $type eq 'h' ) { 6125 $no_internal_newlines = 6126 1; # no newlines after seeing here-target 6127 destroy_one_line_block(); 6128 store_token_to_go(); 6129 } 6130 6131 # handle all other token types 6132 else { 6133 6134 # if this is a blank... 6135 if ( $type eq 'b' ) { 6136 6137 # make it just one character 6138 $token = ' ' if $rOpts_add_whitespace; 6139 6140 # delete it if unwanted by whitespace rules 6141 # or we are deleting all whitespace 6142 my $ws = $$rwhite_space_flag[ $j + 1 ]; 6143 if ( ( defined($ws) && $ws == -1 ) 6144 || $rOpts_delete_old_whitespace ) 6145 { 6146 6147 # unless it might make a syntax error 6148 next 6149 unless is_essential_whitespace( 6150 $last_last_nonblank_token, 6151 $last_last_nonblank_type, 6152 $tokens_to_go[$max_index_to_go], 6153 $types_to_go[$max_index_to_go], 6154 $$rtokens[ $j + 1 ], 6155 $$rtoken_type[ $j + 1 ] 6156 ); 6157 } 6158 } 6159 store_token_to_go(); 6160 } 6161 6162 # remember two previous nonblank OUTPUT tokens 6163 if ( $type ne '#' && $type ne 'b' ) { 6164 $last_last_nonblank_token = $last_nonblank_token; 6165 $last_last_nonblank_type = $last_nonblank_type; 6166 $last_nonblank_token = $token; 6167 $last_nonblank_type = $type; 6168 $last_nonblank_block_type = $block_type; 6169 } 6170 6171 # unset the continued-quote flag since it only applies to the 6172 # first token, and we want to resume normal formatting if 6173 # there are additional tokens on the line 6174 $in_continued_quote = 0; 6175 6176 } # end of loop over all tokens in this 'line_of_tokens' 6177 6178 # we have to flush .. 6179 if ( 6180 6181 # if there is a side comment 6182 ( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} ) 6183 6184 # if this line which ends in a quote 6185 || $in_quote 6186 6187 # to keep a label on one line if that is how it is now 6188 || ( ( $type eq 'J' ) && ( $max_index_to_go == 0 ) ) 6189 6190 # if we are instructed to keep all old line breaks 6191 || !$rOpts->{'delete-old-newlines'} 6192 ) 6193 { 6194 destroy_one_line_block(); 6195 output_line_to_go(); 6196 } 6197 6198 # mark old line breakpoints in current output stream 6199 if ( $max_index_to_go >= 0 && !$rOpts->{'ignore-old-line-breaks'} ) { 6200 $old_breakpoint_to_go[$max_index_to_go] = 1; 6201 } 6202 } 6203} # end closure print_line_of_tokens 6204 6205sub note_added_semicolon { 6206 $last_added_semicolon_at = $input_line_number; 6207 if ( $added_semicolon_count == 0 ) { 6208 $first_added_semicolon_at = $last_added_semicolon_at; 6209 } 6210 $added_semicolon_count++; 6211 write_logfile_entry("Added ';' here\n"); 6212} 6213 6214sub note_deleted_semicolon { 6215 $last_deleted_semicolon_at = $input_line_number; 6216 if ( $deleted_semicolon_count == 0 ) { 6217 $first_deleted_semicolon_at = $last_deleted_semicolon_at; 6218 } 6219 $deleted_semicolon_count++; 6220 write_logfile_entry("Deleted unnecessary ';'\n"); # i hope ;) 6221} 6222 6223sub note_embedded_tab { 6224 $embedded_tab_count++; 6225 $last_embedded_tab_at = $input_line_number; 6226 if ( !$first_embedded_tab_at ) { 6227 $first_embedded_tab_at = $last_embedded_tab_at; 6228 } 6229 6230 if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) { 6231 write_logfile_entry("Embedded tabs in quote or pattern\n"); 6232 } 6233} 6234 6235sub starting_one_line_block { 6236 6237 # after seeing an opening curly brace, look for the closing brace 6238 # and see if the entire block will fit on a line. This routine is 6239 # not always right because it uses the old whitespace, so a check 6240 # is made later (at the closing brace) to make sure we really 6241 # have a one-line block. We have to do this preliminary check, 6242 # though, because otherwise we would always break at a semicolon 6243 # within a one-line block if the block contains multiple statements. 6244 6245 my ( $j, $jmax, $level, $slevel, $ci_level, $rtokens, $rtoken_type, 6246 $rblock_type ) 6247 = @_; 6248 6249 # kill any current block - we can only go 1 deep 6250 destroy_one_line_block(); 6251 6252 # return value: 6253 # 1=distance from start of block to opening brace exceeds line length 6254 # 0=otherwise 6255 6256 my $i_start = 0; 6257 6258 # shouldn't happen: there must have been a prior call to 6259 # store_token_to_go to put the opening brace in the output stream 6260 if ( $max_index_to_go < 0 ) { 6261 warning("program bug: store_token_to_go called incorrectly\n"); 6262 report_definite_bug(); 6263 } 6264 else { 6265 6266 # cannot use one-line blocks with cuddled else else/elsif lines 6267 if ( ( $tokens_to_go[0] eq '}' ) && $rOpts_cuddled_else ) { 6268 return 0; 6269 } 6270 } 6271 6272 my $block_type = $$rblock_type[$j]; 6273 6274 # find the starting keyword for this block (such as 'if', 'else', ...) 6275 6276 if ( $block_type =~ /^[\{\}\;\:]$/ ) { 6277 $i_start = $max_index_to_go; 6278 } 6279 6280 elsif ( $last_last_nonblank_token_to_go eq ')' ) { 6281 6282 # For something like "if (xxx) {", the keyword "if" will be 6283 # just after the most recent break. This will be 0 unless 6284 # we have just killed a one-line block and are starting another. 6285 # (doif.t) 6286 $i_start = $index_max_forced_break + 1; 6287 if ( $types_to_go[$i_start] eq 'b' ) { 6288 $i_start++; 6289 } 6290 6291 unless ( $tokens_to_go[$i_start] eq $block_type ) { 6292 return 0; 6293 } 6294 } 6295 6296 # the previous nonblank token should start these block types 6297 elsif ( ( $last_last_nonblank_token_to_go eq $block_type ) 6298 || ( $block_type =~ /^sub/ 6299 && $last_last_nonblank_token_to_go =~ /^sub/ ) ) 6300 { 6301 $i_start = $last_last_nonblank_index_to_go; 6302 } 6303 6304 else { 6305 return 1; 6306 } 6307 6308 my $pos = total_line_length( $i_start, $max_index_to_go ) - 1; 6309 6310 my $i; 6311 6312 # see if length is too long to even start 6313 if ( $pos > $rOpts_maximum_line_length ) { 6314 return 1; 6315 } 6316 6317 for ( $i = $j + 1 ; $i <= $jmax ; $i++ ) { 6318 6319 # old whitespace could be arbitrarily large, so don't use it 6320 if ( $$rtoken_type[$i] eq 'b' ) { $pos += 1 } 6321 else { $pos += length( $$rtokens[$i] ) } 6322 6323 # Return false result if we exceed the maximum line length, 6324 if ( $pos > $rOpts_maximum_line_length ) { 6325 return 0; 6326 } 6327 6328 # or encounter another opening brace before finding the closing brace. 6329 elsif ( $$rtokens[$i] eq '{' 6330 && $$rtoken_type[$i] eq '{' 6331 && $$rblock_type[$i] ) 6332 { 6333 return 0; 6334 } 6335 6336 # if we find our closing brace.. 6337 elsif ( $$rtokens[$i] eq '}' 6338 && $$rtoken_type[$i] eq '}' 6339 && $$rblock_type[$i] ) 6340 { 6341 6342 # be sure any trailing comment also fits on the line 6343 my $i_nonblank = 6344 ( $$rtoken_type[ $i + 1 ] eq 'b' ) ? $i + 2 : $i + 1; 6345 6346 if ( $$rtoken_type[$i_nonblank] eq '#' ) { 6347 $pos += length( $$rtokens[$i_nonblank] ); 6348 6349 if ( $i_nonblank > $i + 1 ) { 6350 $pos += length( $$rtokens[ $i + 1 ] ); 6351 } 6352 6353 if ( $pos > $rOpts_maximum_line_length ) { 6354 return 0; 6355 } 6356 } 6357 6358 # ok, it's a one-line block 6359 create_one_line_block( $i_start, 20 ); 6360 return 0; 6361 } 6362 6363 # just keep going for other characters 6364 else { 6365 } 6366 } 6367 6368 # Allow certain types of new one-line blocks to form by joining 6369 # input lines. These can be safely done, but for other block types, 6370 # we keep old one-line blocks but do not form new ones. It is not 6371 # always a good idea to make as many one-line blocks as possible, 6372 # so other types are not done. The user can always use -mangle. 6373 if ( $block_type =~ /^(eval|map|grep|sort)/ ) { 6374 create_one_line_block( $i_start, 1 ); 6375 } 6376 6377 return 0; 6378} 6379 6380sub unstore_token_to_go { 6381 6382 # remove most recent token from output stream 6383 if ( $max_index_to_go > 0 ) { 6384 $max_index_to_go--; 6385 } 6386 else { 6387 $max_index_to_go = UNDEFINED_INDEX; 6388 } 6389 6390} 6391 6392sub want_blank_line { 6393 flush(); 6394 $file_writer_object->want_blank_line(); 6395} 6396 6397sub write_unindented_line { 6398 flush(); 6399 $file_writer_object->write_line( $_[0] ); 6400} 6401 6402sub correct_lp_indentation { 6403 6404 # When the -lp option is used, we need to make a last pass through 6405 # each line to correct the indentation positions in case they differ 6406 # from the predictions. This is necessary because perltidy uses a 6407 # predictor/corrector method for aligning with opening parens. The 6408 # predictor is usually good, but sometimes stumbles. The corrector 6409 # tries to patch things up once the actual opening paren locations 6410 # are known. 6411 my ( $ri_first, $ri_last ) = @_; 6412 my $do_not_pad = 0; 6413 6414=pod 6415 6416 Note on flag '$do_not_pad': 6417 We want to avoid a situation like this, where the aligner inserts 6418 whitespace before the '=' to align it with a previous '=', because 6419 otherwise the parens might become mis-aligned in a situation like 6420 this, where the '=' has become aligned with the previous line, 6421 pushing the opening '(' forward beyond where we want it. 6422 6423 $mkFloor::currentRoom = ''; 6424 $mkFloor::c_entry = $c->Entry( 6425 -width => '10', 6426 -relief => 'sunken', 6427 ... 6428 ); 6429 6430 We leave it to the aligner to decide how to do this. 6431 6432=cut 6433 6434 # looking at each line of this batch.. 6435 my $max_line = @$ri_first - 1; 6436 my ( $ibeg, $iend ); 6437 for my $line ( 0 .. $max_line ) { 6438 $ibeg = $$ri_first[$line]; 6439 $iend = $$ri_last[$line]; 6440 6441 # looking at each token in this output line.. 6442 my $i; 6443 foreach $i ( $ibeg .. $iend ) { 6444 6445 # looking for next unvisited indentation item 6446 my $indentation = $leading_spaces_to_go[$i]; 6447 if ( !$indentation->get_MARKED() ) { 6448 $indentation->set_MARKED(1); 6449 6450 # looking for indentation item for which we are aligning 6451 # with parens, braces, and brackets 6452 next unless ( $indentation->get_ALIGN_PAREN() ); 6453 6454 if ( $line == 1 && $i == $ibeg ) { 6455 $do_not_pad = 1; 6456 } 6457 6458 # Ok, let's see what the error is and try to fix it 6459 my $actual_pos; 6460 my $predicted_pos = $indentation->get_SPACES(); 6461 if ( $i > $ibeg ) { 6462 6463 # token is mid-line - use length to previous token 6464 $actual_pos = total_line_length( $ibeg, $i - 1 ); 6465 } 6466 elsif ( $line > 0 ) { 6467 6468 # handle case where token starts a new line; 6469 # use length of previous line 6470 my $ibegm = $$ri_first[ $line - 1 ]; 6471 my $iendm = $$ri_last[ $line - 1 ]; 6472 $actual_pos = total_line_length( $ibegm, $iendm ); 6473 6474 # follow -pt style 6475 ++$actual_pos if $types_to_go[ $iendm + 1 ] eq 'b'; 6476 } 6477 else { 6478 6479 # token is first character of first line of batch 6480 $actual_pos = $predicted_pos; 6481 } 6482 6483 my $move_right = $actual_pos - $predicted_pos; 6484 6485 # done if no error to correct (gnu2.t) 6486 # next unless ($move_right); 6487 if ( $move_right == 0 ) { 6488 $indentation->set_RECOVERABLE_SPACES($move_right); 6489 next; 6490 } 6491 6492 # if we have not seen closure for this indentation in 6493 # this batch, we can only pass on a request to the 6494 # vertical aligner 6495 my $closing_index = $indentation->get_CLOSED(); 6496 6497 if ( $closing_index < 0 ) { 6498 $indentation->set_RECOVERABLE_SPACES($move_right); 6499 next; 6500 } 6501 6502 # If necessary, look ahead to see if there is really any 6503 # leading whitespace dependent on this whitespace, and 6504 # also find the longest line using this whitespace. 6505 # Since it is always safe to move left if there are no 6506 # dependents, we only need to do this if we may have 6507 # dependent nodes or need to move right. 6508 6509 my $right_margin = 0; 6510 my $have_child = $indentation->get_HAVE_CHILD(); 6511 if ( $have_child || $move_right > 0 ) { 6512 $have_child = 0; 6513 my $max_length = 0; 6514 if ( $i == $ibeg ) { 6515 $max_length = total_line_length( $ibeg, $iend ); 6516 } 6517 6518 # look ahead at the rest of the lines of this batch.. 6519 my $line_t; 6520 foreach $line_t ( $line + 1 .. $max_line ) { 6521 my $ibeg_t = $$ri_first[$line_t]; 6522 my $iend_t = $$ri_last[$line_t]; 6523 last if ( $closing_index <= $ibeg_t ); 6524 6525 # If this is a dependent we will not move the text; 6526 # it is very rare to get here and if we do it implies 6527 # a short complicated statement that will probably 6528 # look ok unchanged. 6529 my $indentation_t = $leading_spaces_to_go[$ibeg_t]; 6530 if ( $indentation_t != $indentation ) { 6531 $have_child = 1; 6532 last; 6533 } 6534 my $length_t = total_line_length( $ibeg_t, $iend_t ); 6535 6536 if ( $length_t > $max_length ) { 6537 $max_length = $length_t; 6538 } 6539 } 6540 $right_margin = $rOpts_maximum_line_length - $max_length; 6541 if ( $right_margin < 0 ) { $right_margin = 0 } 6542 } 6543 6544 # This is a simple approximate test for vertical alignment: 6545 # if we broke just after an opening paren, brace, bracket, 6546 # then we are probably vertically aligned. We could 6547 # set an exact flag in sub scan_list, but this is good 6548 # enough. 6549 my $is_vertically_aligned = ( $i == $ibeg ); 6550 6551 # Make the move if possible .. 6552 # Must not be any dependent indentation.. 6553 if ( 6554 !$have_child 6555 6556 # we can always move left, but we can only move right if 6557 # we are sure it will not spoil vertical alignment, 6558 # which is implied by ending commas 6559 && ( $move_right < 0 || !$is_vertically_aligned ) 6560 ) 6561 { 6562 my $move = 6563 ( $move_right <= $right_margin ) 6564 ? $move_right 6565 : $right_margin; 6566 $indentation->permanently_decrease_AVAILABLE_SPACES( 6567 -$move ); 6568 } 6569 6570 # Otherwise, record what we want and the vertical aligner 6571 # will try to recover it. 6572 else { 6573 $indentation->set_RECOVERABLE_SPACES($move_right); 6574 } 6575 } 6576 } 6577 } 6578 return $do_not_pad; 6579} 6580 6581# flush is called to output any tokens in the pipeline, so that 6582# an alternate source of lines can be written in the correct order 6583 6584sub flush { 6585 destroy_one_line_block(); 6586 output_line_to_go(); 6587 PerlTidy::VerticalAligner::flush(); 6588} 6589 6590# output_line_to_go sends one logical line of tokens on down the 6591# pipeline to the VerticalAligner package, breaking the line into continuation 6592# lines as necessary. The line of tokens is ready to go in the "to_go" 6593# arrays. 6594 6595sub output_line_to_go { 6596 6597 # debug stuff; this routine can be called from many points 6598 FORMATTER_DEBUG_FLAG_OUTPUT && do { 6599 my ( $a, $b, $c ) = caller; 6600 write_diagnostics( 6601"OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n" 6602 ); 6603 my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ]; 6604 write_diagnostics("$output_str\n"); 6605 }; 6606 6607 # just set a tentative breakpoint if we might be in a one-line block 6608 if ( $index_start_one_line_block != UNDEFINED_INDEX ) { 6609 set_forced_breakpoint($max_index_to_go); 6610 return; 6611 } 6612 6613 my $cscw_block_comment; 6614 $cscw_block_comment = add_closing_side_comment() 6615 if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 ); 6616 6617 match_opening_and_closing_tokens(); 6618 6619 # tell the -lp option we are outputting a batch so it can close 6620 # any unfinished items in its stack 6621 finish_lp_batch(); 6622 6623 my $imin = 0; 6624 my $imax = $max_index_to_go; 6625 6626 # trim any blank tokens 6627 if ( $max_index_to_go >= 0 ) { 6628 if ( $types_to_go[$imin] eq 'b' ) { $imin++ } 6629 if ( $types_to_go[$imax] eq 'b' ) { $imax-- } 6630 } 6631 6632 # anything left to write? 6633 if ( $imin <= $imax ) { 6634 6635 # add a blank line before certain key types 6636 if ( $last_line_leading_type !~ /^[#b]/ ) { 6637 my $want_blank = 0; 6638 my $leading_token = $tokens_to_go[$imin]; 6639 my $leading_type = $types_to_go[$imin]; 6640 6641 # blank lines before subs except declarations and one-liners 6642 # MCONVERSION LOCATION - for sub tokenization change 6643 if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) { 6644 $want_blank = ( $rOpts->{'blanks-before-subs'} ) 6645 && ( 6646 terminal_type( \@types_to_go, \@block_type_to_go, $imin, 6647 $imax ) !~ /^[\;\}]$/ 6648 ); 6649 } 6650 6651 # break before all package declarations 6652 # MCONVERSION LOCATION - for tokenizaton change 6653 elsif ( $leading_token =~ /^(package\s)/ && $leading_type eq 'i' ) { 6654 $want_blank = ( $rOpts->{'blanks-before-subs'} ); 6655 } 6656 6657 # break before certain key blocks except one-liners 6658 if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) { 6659 $want_blank = ( $rOpts->{'blanks-before-subs'} ) 6660 && ( 6661 terminal_type( \@types_to_go, \@block_type_to_go, $imin, 6662 $imax ) ne '}' 6663 ); 6664 } 6665 6666 # Break before certain block types if we haven't had a break at this 6667 # level for a while. This is the difficult decision.. 6668 elsif ( $leading_token =~ /^(unless|if|while|until|for|foreach)$/ 6669 && $leading_type eq 'k' 6670 && $last_line_leading_level >= 0 ) 6671 { 6672 my $lc = $nonblank_lines_at_depth[$last_line_leading_level]; 6673 if ( !defined($lc) ) { $lc = 0 } 6674 6675 $want_blank = 6676 $rOpts->{'blanks-before-blocks'} 6677 && $lc >= $rOpts->{'long-block-line-count'} 6678 && $file_writer_object->get_consecutive_nonblank_lines() >= 6679 $rOpts->{'long-block-line-count'} 6680 && ( 6681 terminal_type( \@types_to_go, \@block_type_to_go, $imin, 6682 $imax ) ne '}' 6683 ); 6684 } 6685 6686 if ($want_blank) { 6687 6688 # future: send blank line down normal path to VerticalAligner 6689 PerlTidy::VerticalAligner::flush(); 6690 $file_writer_object->write_blank_code_line(); 6691 } 6692 } 6693 6694 # update blank line variables and count number of consecutive 6695 # non-blank, non-comment lines at this level 6696 $last_last_line_leading_level = $last_line_leading_level; 6697 $last_line_leading_level = $levels_to_go[$imin]; 6698 $last_line_leading_type = $types_to_go[$imin]; 6699 if ( $last_line_leading_level == $last_last_line_leading_level 6700 && $last_line_leading_level >= 0 6701 && $last_line_leading_type ne 'b' 6702 && $last_line_leading_type ne '#' 6703 && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) ) 6704 { 6705 $nonblank_lines_at_depth[$last_line_leading_level]++; 6706 } 6707 else { 6708 $nonblank_lines_at_depth[$last_line_leading_level] = 1; 6709 } 6710 6711 FORMATTER_DEBUG_FLAG_FLUSH && do { 6712 my ( $package, $file, $line ) = caller; 6713 print 6714"FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n"; 6715 }; 6716 6717 # add a couple of extra terminal blank tokens 6718 pad_array_to_go(); 6719 6720 # set all forced breakpoints for good list formatting 6721 my $saw_good_break = 0; 6722 my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0; 6723 if ( 6724 $max_index_to_go > 0 6725 && ( $is_long_line 6726 || $old_line_count_in_batch > 1 6727 || is_unbalanced_batch() ) 6728 ) 6729 { 6730 $saw_good_break = scan_list(); 6731 } 6732 6733 # let $ri_first and $ri_last be references to lists of 6734 # first and last tokens of line fragments to output.. 6735 my ( $ri_first, $ri_last ); 6736 6737 # write a single line if.. 6738 if ( 6739 6740 # we aren't allowed to add any newlines 6741 !$rOpts->{'add-newlines'} 6742 6743 # or, we don't already have an interior breakpoint 6744 # and we didn't see a good breakpoint 6745 || ( 6746 !$forced_breakpoint_count 6747 && !$saw_good_break 6748 6749 # and this line is 'short' 6750 && !$is_long_line 6751 ) 6752 ) 6753 { 6754 @$ri_first = ($imin); 6755 @$ri_last = ($imax); 6756 } 6757 6758 # otherwise use multiple lines 6759 else { 6760 6761 ( $ri_first, $ri_last ) = set_continuation_breaks($saw_good_break); 6762 6763 # now we do a correction step to clean this up a bit 6764 # (The only time we would not do this is for debugging) 6765 if ( $rOpts->{'recombine'} ) { 6766 ( $ri_first, $ri_last ) = 6767 recombine_breakpoints( $ri_first, $ri_last ); 6768 } 6769 } 6770 6771 # do corrector step if -lp option is used 6772 my $do_not_pad = 0; 6773 if ($rOpts_line_up_parentheses) { 6774 $do_not_pad = correct_lp_indentation( $ri_first, $ri_last ); 6775 } 6776 send_lines_to_vertical_aligner( $ri_first, $ri_last, $do_not_pad ); 6777 } 6778 prepare_for_new_input_lines(); 6779 6780 # output any new -cscw block comment 6781 if ($cscw_block_comment) { 6782 flush(); 6783 $file_writer_object->write_code_line( $cscw_block_comment . "\n" ); 6784 } 6785} 6786 6787sub reset_block_text_accumulator { 6788 $accumulating_text_for_block = ""; 6789 $leading_block_text = ""; 6790 $leading_block_text_level = 0; 6791 $leading_block_text_length_exceeded = 0; 6792 $leading_block_text_line_number = 0; 6793} 6794 6795sub set_block_text_accumulator { 6796 my $i = shift; 6797 $accumulating_text_for_block = $tokens_to_go[$i]; 6798 $leading_block_text = ""; 6799 $leading_block_text_level = $levels_to_go[$i]; 6800 $leading_block_text_line_number = 6801 $vertical_aligner_object->get_output_line_number(); 6802 $leading_block_text_length_exceeded = 0; 6803} 6804 6805sub accumulate_block_text { 6806 my $i = shift; 6807 6808 # accumulate leading text, but ignore any side comments 6809 if ( $accumulating_text_for_block 6810 && !$leading_block_text_length_exceeded 6811 && $types_to_go[$i] ne '#' ) 6812 { 6813 6814 # do not add more characters than allowed 6815 if ( 6816 length($leading_block_text) < 6817 $rOpts->{'closing-side-comment-maximum-text'} ) 6818 { 6819 6820 # add an extra space at each newline 6821 if ( $i == 0 ) { 6822 $leading_block_text .= ' '; 6823 } 6824 6825 # add the token text 6826 $leading_block_text .= $tokens_to_go[$i]; 6827 } 6828 6829 # show that text was truncated if necessary 6830 elsif ( $types_to_go[$i] ne 'b' ) { 6831 $leading_block_text_length_exceeded = 1; 6832 $leading_block_text .= '...'; 6833 } 6834 } 6835} 6836 6837sub accumulate_csc_text { 6838 6839 # called once per output buffer when -csc is used. Accumulates 6840 # the text placed after certain closing block braces. 6841 # Defines and returns the following for this buffer: 6842 6843 my $block_leading_text = ""; # the leading text of the last '}' 6844 my $i_block_leading_text = -1; # index of token owning block_leading_text 6845 my $block_line_count = 100; # how many lines the block spans 6846 my $terminal_type = 'b'; # type of last nonblank token 6847 my $i_terminal = 0; # index of last nonblank token 6848 6849 for my $i ( 0 .. $max_index_to_go ) { 6850 my $type = $types_to_go[$i]; 6851 my $block_type = $block_type_to_go[$i]; 6852 my $token = $tokens_to_go[$i]; 6853 6854 # remember last nonblank token type 6855 if ( $type ne '#' && $type ne 'b' ) { 6856 $terminal_type = $type; 6857 $i_terminal = $i; 6858 } 6859 6860 my $type_sequence = $type_sequence_to_go[$i]; 6861 if ( $block_type && $type_sequence ) { 6862 6863 if ( $token eq '}' ) { 6864 6865 if ( defined( $block_leading_text{$type_sequence} ) ) { 6866 $block_leading_text = $block_leading_text{$type_sequence}; 6867 $i_block_leading_text = $i; 6868 delete $block_leading_text{$type_sequence}; 6869 } 6870 6871 # if we run into a '}' then we probably started accumulating 6872 # at something like a trailing 'if' clause..no harm done. 6873 if ( $accumulating_text_for_block 6874 && $levels_to_go[$i] <= $leading_block_text_level ) 6875 { 6876 my $lev = $levels_to_go[$i]; 6877 reset_block_text_accumulator(); 6878 } 6879 6880 if ( defined( $block_opening_line_number{$type_sequence} ) ) { 6881 my $output_line_number = 6882 $vertical_aligner_object->get_output_line_number(); 6883 $block_line_count = $output_line_number - 6884 $block_opening_line_number{$type_sequence} + 1; 6885 delete $block_opening_line_number{$type_sequence}; 6886 } 6887 else { 6888 6889 # This can happen in the unusual case where a $VERSION line 6890 # has been quoted to keep MakeMaker happy. It is not a 6891 # significant problem. 6892 # warning( 6893 # "Note: block opening line undefined for this line\n"); 6894 } 6895 } 6896 6897 elsif ( $token eq '{' ) { 6898 6899 my $line_number = 6900 $vertical_aligner_object->get_output_line_number(); 6901 $block_opening_line_number{$type_sequence} = $line_number; 6902 6903 if ( $accumulating_text_for_block 6904 && $levels_to_go[$i] == $leading_block_text_level ) 6905 { 6906 if ( $accumulating_text_for_block eq $block_type ) { 6907 $block_leading_text{$type_sequence} = 6908 $leading_block_text; 6909 $block_opening_line_number{$type_sequence} = 6910 $leading_block_text_line_number; 6911 reset_block_text_accumulator(); 6912 } 6913 else { 6914 6915 # shouldn't happen, but not a serious error. 6916 # We were accumulating -csc text for block type 6917 # $accumulating_text_for_block and unexpectedly 6918 # encountered a '{' for block type $block_type. 6919 } 6920 } 6921 6922 } 6923 } 6924 6925 if ( $type eq 'k' 6926 && $csc_new_statement_ok 6927 && $token =~ /^(if|elsif|unless|while|until|for|foreach)$/ 6928 && $token =~ /$closing_side_comment_list_pattern/o ) 6929 { 6930 set_block_text_accumulator($i); 6931 } 6932 else { 6933 6934 # note: ignoring type 'q' because of tricks being played with 'q' 6935 # for hanging side comments and $VERSION 6936 if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) { 6937 $csc_new_statement_ok = 6938 ( $block_type || $type eq 'J' || $type eq ';' ); 6939 } 6940 if ( $type eq ';' 6941 && $accumulating_text_for_block 6942 && $levels_to_go[$i] == $leading_block_text_level ) 6943 { 6944 reset_block_text_accumulator(); 6945 } 6946 else { 6947 accumulate_block_text($i); 6948 } 6949 } 6950 } 6951 return ( $terminal_type, $i_terminal, $i_block_leading_text, 6952 $block_leading_text, $block_line_count ); 6953} 6954 6955sub add_closing_side_comment { 6956 6957 # add closing side comments after closing block braces if -csc used 6958 my $cscw_block_comment; 6959 6960 #--------------------------------------------------------------- 6961 # Step 1: loop through all tokens of this line to accumulate 6962 # the text needed to create the closing side comments. Also see 6963 # how the line ends. 6964 #--------------------------------------------------------------- 6965 6966 my ( $terminal_type, $i_terminal, $i_block_leading_text, 6967 $block_leading_text, $block_line_count ) 6968 = accumulate_csc_text(); 6969 6970 #--------------------------------------------------------------- 6971 # Step 2: make the closing side comment if this ends a block 6972 #--------------------------------------------------------------- 6973 6974 # if this line might end in a block closure.. 6975 if ( 6976 $terminal_type eq '}' 6977 6978 # .. and the block is not too short 6979 && ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} ) 6980 6981 # .. and if this is one of the types of interest 6982 && $block_type_to_go[$i_terminal] =~ 6983 /$closing_side_comment_list_pattern/o 6984 6985 # ..and the corresponding opening brace must is not in this batch 6986 # (because we do not need to tag one-line blocks, although this 6987 # should also be caught with a positive -csci value) 6988 && $mate_index_to_go[$i_terminal] < 0 6989 6990 # ..and either 6991 && ( 6992 6993 # this is the last token (line doesnt have a side comment) 6994 $i_terminal eq $max_index_to_go 6995 6996 # or the old side comment is a closing side comment 6997 || $tokens_to_go[$max_index_to_go] =~ 6998 /$closing_side_comment_prefix_pattern/o 6999 ) 7000 ) 7001 { 7002 7003 # then make the closing side comment text 7004 my $token = 7005"$rOpts->{'closing-side-comment-prefix'} $block_type_to_go[$i_terminal]"; 7006 7007 # append any extra descriptive text collected above 7008 if ( $i_block_leading_text == $i_terminal ) { 7009 $token .= $block_leading_text; 7010 } 7011 $token =~ s/\s*$//; # trim any trailing whitespace 7012 7013 # handle case of existing closing side comment 7014 if ( $i_terminal != $max_index_to_go ) { 7015 7016 # warn if requested and tokens differ significantly 7017 if ( $rOpts->{'closing-side-comment-warnings'} ) { 7018 my $old_csc = $tokens_to_go[$max_index_to_go]; 7019 my $new_csc = $token; 7020 $new_csc =~ s/\.\.\.\s*$//; # trim trailing '...' 7021 $old_csc =~ s/\.\.\.\s*$//; 7022 $new_csc =~ s/\s+//g; # trim all whitespace 7023 $old_csc =~ s/\s+//g; 7024 7025 # no problem if old comment is contained in new comment 7026 if ( length($new_csc) > length($old_csc) ) { 7027 $new_csc = substr( $new_csc, 0, length($old_csc) ); 7028 } 7029 7030 # any remaining difference? 7031 if ( $new_csc ne $old_csc ) { 7032 warning( 7033"perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n" 7034 ); 7035 7036 # save the old side comment in a new trailing block comment 7037 my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ]; 7038 $year += 1900; 7039 $month += 1; 7040 $cscw_block_comment = 7041"## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]"; 7042 } 7043 } 7044 7045 # switch to the new csc 7046 $tokens_to_go[$max_index_to_go] = $token; 7047 } 7048 7049 # handle case of NO existing closing side comment 7050 else { 7051 7052 # insert the new side comment into the output token stream 7053 my $type = '#'; 7054 my $block_type = ''; 7055 my $type_sequence = ''; 7056 my $container_environment = 7057 $container_environment_to_go[$max_index_to_go]; 7058 my $level = $levels_to_go[$max_index_to_go]; 7059 my $slevel = $nesting_depth_to_go[$max_index_to_go]; 7060 my $no_internal_newlines = 0; 7061 7062 my $nesting_blocks = $nesting_blocks_to_go[$max_index_to_go]; 7063 my $ci_level = $ci_levels_to_go[$max_index_to_go]; 7064 my $in_continued_quote = 0; 7065 7066 # first insert a blank token 7067 insert_new_token_to_go( ' ', 'b', $slevel, $no_internal_newlines ); 7068 7069 # then the side comment 7070 insert_new_token_to_go( $token, $type, $slevel, 7071 $no_internal_newlines ); 7072 } 7073 } 7074 return $cscw_block_comment; 7075} 7076 7077sub send_lines_to_vertical_aligner { 7078 7079 my ( $ri_first, $ri_last, $do_not_pad ) = @_; 7080 7081 my $rindentation_list = [0]; # ref to indentations for each line 7082 7083 set_vertical_alignment_markers( $ri_first, $ri_last ); 7084 7085 # flush if necessary to avoid unwanted alignment 7086 my $must_flush = 0; 7087 if ( @$ri_first > 1 ) { 7088 7089 # flush before a long if statement 7090 if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] =~ /^(if|unless)$/ ) { 7091 $must_flush = 1; 7092 } 7093 } 7094 if ($must_flush) { 7095 PerlTidy::VerticalAligner::flush(); 7096 } 7097 7098## TESTING - in preparation for future update to adjust ci_levels in 7099## certain cases for better alignment. 7100## my @levs = @levels_to_go[ @$ri_first[0..@$ri_first-1]]; 7101## my @cilevs = @ci_levels_to_go[ @$ri_first[0..@$ri_first-1]]; 7102## print "BUB: levs are:(@levs) ci=(@cilevs)\n"; 7103 7104 # loop to prepare each line for shipment 7105 for my $n ( 0 .. @$ri_first - 1 ) { 7106 my $ibeg = $$ri_first[$n]; 7107 my $iend = $$ri_last[$n]; 7108 7109 my @patterns = (); 7110 my @tokens = (); 7111 my @fields = (); 7112 my $i_start = $ibeg; 7113 my $i; 7114 7115 my $j = 0; # field index 7116 7117 $patterns[0] = ""; 7118 for $i ( $ibeg .. $iend ) { 7119 7120 # if we find a new synchronization token, we are done with 7121 # a field 7122 if ( $i > $i_start && $matching_token_to_go[$i] ne '' ) { 7123 7124 # make separators in different nesting depths unique 7125 # by appending the nesting depth digit. 7126 my $tok = $matching_token_to_go[$i]; 7127 if ( $tok ne '#' ) { 7128 $tok .= "$nesting_depth_to_go[$i]"; 7129 } 7130 7131 # concatenate the text of the consecutive tokens to form 7132 # the field 7133 push ( @fields, 7134 join ( '', @tokens_to_go[ $i_start .. $i - 1 ] ) ); 7135 7136 # store the alignment token for this field 7137 push ( @tokens, $tok ); 7138 7139 # get ready for the next batch 7140 $i_start = $i; 7141 $j++; 7142 $patterns[$j] = ""; 7143 } 7144 7145 # continue accumulating tokens 7146 # handle non-keywords.. 7147 if ( $types_to_go[$i] ne 'k' ) { 7148 my $type = $types_to_go[$i]; 7149 7150 # Mark most things before arrows as a quote to 7151 # get them to line up. Testfile: mixed.pl. 7152 if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) { 7153 my $next_type = $types_to_go[ $i + 1 ]; 7154 my $i_next_nonblank = 7155 ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 ); 7156 7157 if ( $types_to_go[$i_next_nonblank] eq '=>' ) { 7158 $type = 'Q'; 7159 } 7160 } 7161 7162 # minor patch to make numbers and quotes align 7163 if ( $type eq 'n' ) { $type = 'Q' } 7164 7165 $patterns[$j] .= $type; 7166 } 7167 7168 # for keywords we have to use the actual text 7169 else { 7170 7171 # map certain keywords to the same 'if' class to align 7172 # long if/elsif sequences. my testfile: elsif.pl 7173 my $tok = $tokens_to_go[$i]; 7174 if ( $n == 0 ) { 7175 if ( $tok eq 'elsif' ) { $tok = 'if' } 7176 if ( $tok eq 'else' ) { $tok = 'if' } 7177 if ( $tok eq 'unless' ) { $tok = 'if' } 7178 } 7179 $patterns[$j] .= $tok; 7180 } 7181 } 7182 7183 # done with this line .. join text of tokens to make the last field 7184 push ( @fields, join ( '', @tokens_to_go[ $i_start .. $iend ] ) ); 7185 7186 my ( $indentation, $lev, $level_end, $is_semicolon_terminated, 7187 $is_outdented_line ) 7188 = set_adjusted_indentation( $ibeg, $iend, \@fields, \@patterns, 7189 $ri_first, $ri_last, $rindentation_list ); 7190 7191 # we will allow outdenting of long lines.. 7192 my $outdent_long_lines = ( 7193 7194 # which are long quotes, if allowed 7195 ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} ) 7196 7197 # which are long block comments, if allowed 7198 || ( 7199 $types_to_go[$ibeg] eq '#' 7200 && $rOpts->{'outdent-long-comments'} 7201 7202 # but not if this is a static block comment 7203 && !( 7204 $rOpts->{'static-block-comments'} 7205 && $tokens_to_go[$ibeg] =~ /$static_block_comment_pattern/o 7206 ) 7207 ) 7208 ); 7209 7210 # flush an outdented line to avoid any unwanted vertical alignment 7211 PerlTidy::VerticalAligner::flush() if ($is_outdented_line); 7212 7213 # send this new line down the pipe 7214 PerlTidy::VerticalAligner::append_line( 7215 $lev, $level_end, 7216 $indentation, \@fields, 7217 \@tokens, \@patterns, 7218 $forced_breakpoint_to_go[$iend], $outdent_long_lines, 7219 $is_semicolon_terminated, $do_not_pad 7220 ); 7221 7222 # flush an outdented line to avoid any unwanted vertical alignment 7223 PerlTidy::VerticalAligner::flush() if ($is_outdented_line); 7224 7225 $do_not_pad = 0; 7226 7227 } # end of loop to output each line 7228 7229 # remember indentation of lines containing opening containers for 7230 # later use by sub set_adjusted_indentation 7231 save_opening_indentation( $ri_first, $ri_last, $rindentation_list ); 7232} 7233 7234{ # begin closure unmatched_indexes 7235 7236 # closure to keep track of unbalanced containers. 7237 # arrays shared by the routines in this block: 7238 my @unmatched_opening_indexes_in_this_batch; 7239 my @unmatched_closing_indexes_in_this_batch; 7240 7241 sub is_unbalanced_batch { 7242 @unmatched_opening_indexes_in_this_batch + 7243 @unmatched_closing_indexes_in_this_batch; 7244 } 7245 7246 sub match_opening_and_closing_tokens { 7247 7248 # Match up indexes of opening and closing braces, etc, in this batch. 7249 # This has to be done after all tokens are stored because unstoring 7250 # of tokens would otherwise cause trouble. 7251 7252 @unmatched_opening_indexes_in_this_batch = (); 7253 @unmatched_closing_indexes_in_this_batch = (); 7254 7255 my ( $i, $i_mate, $token ); 7256 foreach $i ( 0 .. $max_index_to_go ) { 7257 if ( $type_sequence_to_go[$i] ) { 7258 $token = $tokens_to_go[$i]; 7259 if ( $token =~ /^[\(\[\{\?]$/ ) { 7260 push @unmatched_opening_indexes_in_this_batch, $i; 7261 } 7262 elsif ( $token =~ /^[\)\]\}\:]$/ ) { 7263 7264 $i_mate = pop @unmatched_opening_indexes_in_this_batch; 7265 if ( defined($i_mate) && $i_mate >= 0 ) { 7266 if ( $type_sequence_to_go[$i_mate] == 7267 $type_sequence_to_go[$i] ) 7268 { 7269 $mate_index_to_go[$i] = $i_mate; 7270 $mate_index_to_go[$i_mate] = $i; 7271 } 7272 else { 7273 push @unmatched_opening_indexes_in_this_batch, 7274 $i_mate; 7275 push @unmatched_closing_indexes_in_this_batch, $i; 7276 } 7277 } 7278 else { 7279 push @unmatched_closing_indexes_in_this_batch, $i; 7280 } 7281 } 7282 } 7283 } 7284 } 7285 7286 sub save_opening_indentation { 7287 7288 # This should be called after each batch of tokens is output. It 7289 # saves indentations of lines of all unmatched opening tokens. 7290 # These will be used by sub get_opening_indentation. 7291 7292 my ( $ri_first, $ri_last, $rindentation_list ) = @_; 7293 7294 # we no longer need indentations of any saved indentations which 7295 # are unmatched closing tokens in this batch, because we will 7296 # never encounter them again. So we can delete them to keep 7297 # the hash size down. 7298 foreach (@unmatched_closing_indexes_in_this_batch) { 7299 my $seqno = $type_sequence_to_go[$_]; 7300 delete $saved_opening_indentation{$seqno}; 7301 } 7302 7303 # we need to save indentations of any unmatched opening tokens 7304 # in this batch because we may need them in a subsequent batch. 7305 foreach (@unmatched_opening_indexes_in_this_batch) { 7306 my $seqno = $type_sequence_to_go[$_]; 7307 $saved_opening_indentation{$seqno} = [ 7308 lookup_opening_indentation( 7309 $_, $ri_first, 7310 $ri_last, $rindentation_list 7311 ) 7312 ]; 7313 } 7314 } 7315} # end closure unmatched_indexes 7316 7317sub get_opening_indentation { 7318 7319 # get the indentation of the line which output the opening token 7320 # corresponding to a given closing token in the current output batch. 7321 # 7322 # given: 7323 # $i_closing - index in this line of a closing token ')' '}' or ']' 7324 # 7325 # $ri_first - reference to list of the first index $i for each output 7326 # line in this batch 7327 # $ri_last - reference to list of the last index $i for each output line 7328 # in this batch 7329 # $rindentation_list - reference to a list containing the indentation 7330 # used for each line. 7331 # 7332 # return: 7333 # -the indentation of the line which contained the opening token 7334 # which matches the token at index $i_opening 7335 # -and its offset (number of columns) from the start of the line 7336 # 7337 my ( $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_; 7338 7339 # first, see if the opening token is in the current batch 7340 my $i_opening = $mate_index_to_go[$i_closing]; 7341 my ( $indent, $offset ); 7342 if ( $i_opening >= 0 ) { 7343 7344 # it is..look up the indentation 7345 ( $indent, $offset ) = 7346 lookup_opening_indentation( $i_opening, $ri_first, $ri_last, 7347 $rindentation_list ); 7348 } 7349 7350 # if not, it should have been stored in the hash by a previous batch 7351 else { 7352 my $seqno = $type_sequence_to_go[$i_closing]; 7353 if ($seqno) { 7354 if ( $saved_opening_indentation{$seqno} ) { 7355 ( $indent, $offset ) = @{ $saved_opening_indentation{$seqno} }; 7356 } 7357 } 7358 } 7359 return ( $indent, $offset ); 7360} 7361 7362sub lookup_opening_indentation { 7363 7364 # get the indentation of the line in the current output batch 7365 # which output a selected opening token 7366 # 7367 # given: 7368 # $i_opening - index of an opening token in the current output batch 7369 # whose line indentation we need 7370 # $ri_first - reference to list of the first index $i for each output 7371 # line in this batch 7372 # $ri_last - reference to list of the last index $i for each output line 7373 # in this batch 7374 # $rindentation_list - reference to a list containing the indentation 7375 # used for each line. (NOTE: the first slot in 7376 # this list is the last returned line number, and this is 7377 # followed by the list of indentations). 7378 # 7379 # return 7380 # -the indentation of the line which contained token $i_opening 7381 # -and its offset (number of columns) from the start of the line 7382 7383 my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_; 7384 7385 my $nline = $rindentation_list->[0]; # line number of previous lookup 7386 7387 # reset line location if necessary 7388 $nline = 0 if ( $i_opening < $ri_start->[$nline] ); 7389 7390 # find the correct line 7391 my $nmax = $#{$ri_last}; 7392 unless ( $i_opening > $ri_last->[$nmax] ) { 7393 while ( $i_opening > $ri_last->[$nline] ) { $nline++; } 7394 } 7395 7396 # error - token index is out of bounds - shouldn't happen 7397 else { 7398 warning( 7399"non-fatal program bug in lookup_opening_indentation - index out of range\n" 7400 ); 7401 report_definite_bug(); 7402 $nline = $nmax; 7403 } 7404 7405 $rindentation_list->[0] = 7406 $nline; # save line number to start looking next call 7407 my $ibeg = $ri_start->[$nline]; 7408 my $offset = token_sequence_length( $ibeg, $i_opening ) - 1; 7409 return ( $rindentation_list->[ $nline + 1 ], $offset ); 7410} 7411 7412sub set_adjusted_indentation { 7413 7414 # This routine has the final say regarding the actual indentation of 7415 # a line. It starts with the basic indentation which has been 7416 # defined for the leading token, and then takes into account any 7417 # options that the user has set regarding special indenting and 7418 # outdenting. 7419 7420 my ( $ibeg, $iend, $rfields, $rpatterns, $ri_first, $ri_last, 7421 $rindentation_list ) 7422 = @_; 7423 7424 # we need to know the last token of this line 7425 my ( $terminal_type, $i_terminal ) = 7426 terminal_type( \@types_to_go, \@block_type_to_go, $ibeg, $iend ); 7427 7428 my $is_outdented_line = 0; 7429 7430 my $is_semicolon_terminated = 7431 $terminal_type eq ';' 7432 && $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg]; 7433 7434 # Most lines are indented according to the initial token. 7435 # But it is common to outdent to the level just after the 7436 # terminal token in certain cases... 7437 # adjust_indentation flag: 7438 # 0 - do not adjust 7439 # 1 - outdent 7440 # -1 - indent 7441 my $adjust_indentation = 0; 7442 7443 my ( $opening_indentation, $opening_offset ); 7444 7445 # if we are at a closing token of some type.. 7446 if ( $types_to_go[$ibeg] =~ /^[\)\}\]]$/ ) { 7447 7448 # get the indentation of the line containing the corresponding 7449 # opening token 7450 ( $opening_indentation, $opening_offset ) = 7451 get_opening_indentation( $ibeg, $ri_first, $ri_last, 7452 $rindentation_list ); 7453 7454 # First set the default behavior: 7455 # default behavior is to outdent closing lines 7456 # of the form: "); }; ]; )->xxx;" 7457 if ( 7458 $is_semicolon_terminated 7459 7460 # and 'cuddled parens' of the form: ")->pack(" 7461 || ( 7462 $terminal_type eq '(' 7463 && $types_to_go[$ibeg] eq ')' 7464 && ( $nesting_depth_to_go[$iend] + 1 == 7465 $nesting_depth_to_go[$ibeg] ) 7466 ) 7467 ) 7468 { 7469 $adjust_indentation = 1; 7470 } 7471 7472 # TESTING: outdent something like '),' 7473 if ( 7474 $terminal_type eq ',' 7475 7476 # allow just one character before the comma 7477 && $i_terminal == $ibeg + 1 7478 7479 # requre LIST environment; otherwise, we may outdent too much -- 7480 # this can happen in calls without parentheses (overload.t); 7481 && $container_environment_to_go[$i_terminal] eq 'LIST' 7482 ) 7483 { 7484 $adjust_indentation = 1; 7485 } 7486 7487 # undo continuation indentation of a terminal closing token if 7488 # it is the last token before a level decrease. This will allow 7489 # a closing token to line up with its opening counterpart, and 7490 # avoids a indentation jump larger than 1 level. 7491 if ( $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/ 7492 && $i_terminal == $ibeg ) 7493 { 7494 my $ci = $ci_levels_to_go[$ibeg]; 7495 my $lev = $levels_to_go[$ibeg]; 7496 my $next_type = $types_to_go[ $ibeg + 1 ]; 7497 my $i_next_nonblank = 7498 ( ( $next_type eq 'b' ) ? $ibeg + 2 : $ibeg + 1 ); 7499 if ( $i_next_nonblank <= $max_index_to_go 7500 && $levels_to_go[$i_next_nonblank] < $lev ) 7501 { 7502 $adjust_indentation = 1; 7503 } 7504 } 7505 7506 # Now modify default behavior according to user request: 7507 # handle option to indent non-blocks of the form ); }; ]; 7508 if ( !$block_type_to_go[$ibeg] ) { 7509 if ( $rOpts->{'indent-closing-paren'} 7510 && $is_semicolon_terminated 7511 && $i_terminal == $ibeg + 1 ) 7512 { 7513 $adjust_indentation = -1; 7514 } 7515 } 7516 7517 # handle option to indent blocks 7518 else { 7519 if ( 7520 $rOpts->{'indent-closing-brace'} 7521 && ( 7522 $i_terminal == $ibeg # isolated terminal '}' 7523 || $is_semicolon_terminated 7524 ) 7525 ) # } xxxx ; 7526 { 7527 $adjust_indentation = -1; 7528 } 7529 } 7530 } 7531 7532 # if at ');', '};', '>;', and '];' of a terminal qw quote 7533 elsif ( $$rpatterns[0] =~ /^qb*;$/ && $$rfields[0] =~ /^[\)\}\]\>];$/ ) { 7534 if ( !$rOpts->{'indent-closing-paren'} ) { 7535 $adjust_indentation = 1; 7536 } 7537 else { 7538 $adjust_indentation = -1; 7539 } 7540 } 7541 7542 # Handle variation in indentation styles... 7543 # Select the indentation object to define leading 7544 # whitespace. If we are outdenting something like '} } );' 7545 # then we want to use one level below the last token 7546 # ($i_terminal) in order to get it to fully outdent through 7547 # all levels. 7548 my $indentation; 7549 my $lev; 7550 my $level_end = $levels_to_go[$iend]; 7551 7552 if ( $adjust_indentation == 0 ) { 7553 $indentation = $leading_spaces_to_go[$ibeg]; 7554 $lev = $levels_to_go[$ibeg]; 7555 } 7556 elsif ( $adjust_indentation == 1 ) { 7557 $indentation = $reduced_spaces_to_go[$i_terminal]; 7558 $lev = $levels_to_go[$i_terminal]; 7559 } 7560 else { 7561 7562 # There are two ways to handle -icb and -icp... 7563 # One way is to use the indentation of the previous line: 7564 # $indentation = $last_indentation_written; 7565 7566 # The other way is to use the indentation that the previous line 7567 # would have had if it hadn't been adjusted: 7568 $indentation = $last_unadjusted_indentation; 7569 7570 # Current method: use the minimum of the two. This avoids inconsistent 7571 # indentation. 7572 if ( get_SPACES($last_indentation_written) < get_SPACES($indentation) ) 7573 { 7574 $indentation = $last_indentation_written; 7575 } 7576 7577 # use previous indentation but use own level 7578 # to cause list to be flushed properly 7579 $lev = $levels_to_go[$ibeg]; 7580 } 7581 7582 # remember indentation except for multi-line quotes, which get 7583 # no indentation 7584 unless ( $types_to_go[$ibeg] eq 'Q' && $lev == 0 ) { 7585 $last_indentation_written = $indentation; 7586 $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg]; 7587 } 7588 7589 # be sure lines with leading closing tokens are not outdented more 7590 # than the line which contained the corresponding opening token. 7591 if ( defined($opening_indentation) ) { 7592 if ( get_SPACES($opening_indentation) > get_SPACES($indentation) ) { 7593 $indentation = $opening_indentation; 7594 } 7595 } 7596 7597 # remember the indentation of each line of this batch 7598 push @{$rindentation_list}, $indentation; 7599 7600 # outdent lines with certain leading tokens... 7601 if ( 7602 7603 # must be first word of this batch 7604 $ibeg == 0 7605 7606 # and be certain leading keywords if requested 7607 && ( $rOpts->{'outdent-keywords'} 7608 && $types_to_go[$ibeg] eq 'k' 7609 && $outdent_keyword{ $tokens_to_go[$ibeg] } ) 7610 7611 # or labels if requested 7612 || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' ) 7613 ) 7614 { 7615 my $space_count = leading_spaces_to_go($ibeg); 7616 if ( $space_count > 0 ) { 7617 $space_count -= $rOpts_continuation_indentation; 7618 $is_outdented_line = 1; 7619 if ( $space_count < 0 ) { $space_count = 0 } 7620 7621 if ($rOpts_line_up_parentheses) { 7622 $indentation = 7623 new_lp_indentation_item( $space_count, $lev, 0, 0, 0 ); 7624 } 7625 else { 7626 $indentation = $space_count; 7627 } 7628 } 7629 } 7630 7631 return ( $indentation, $lev, $level_end, $is_semicolon_terminated, 7632 $is_outdented_line ); 7633} 7634 7635BEGIN { 7636 @_ = qw#{ ? : => = += -= =~ *= /= && || #; 7637 @is_vertical_alignment_type{@_} = (1) x scalar(@_); 7638} 7639 7640sub set_vertical_alignment_markers { 7641 7642 # Look at the tokens in this output batch and define the array 7643 # 'matching_token_to_go' which marks tokens at which we would 7644 # accept vertical alignment. 7645 7646 # nothing to do if we aren't allowed to change whitespace 7647 if ( !$rOpts_add_whitespace ) { 7648 for my $i ( 0 .. $max_index_to_go ) { 7649 $matching_token_to_go[$i] = ''; 7650 } 7651 return; 7652 } 7653 7654 my ( $ri_first, $ri_last ) = @_; 7655 7656 # look at each line of this batch.. 7657 my $last_vertical_alignment_before_index; 7658 my $vert_last_nonblank_type; 7659 my $vert_last_nonblank_block_type; 7660 my $max_line = @$ri_first - 1; 7661 my ( $i, $type, $token, $block_type, $last_nonblank_token, 7662 $alignment_type ); 7663 my ( $ibeg, $iend ); 7664 for my $line ( 0 .. $max_line ) { 7665 $ibeg = $$ri_first[$line]; 7666 $iend = $$ri_last[$line]; 7667 $last_vertical_alignment_before_index = -1; 7668 $vert_last_nonblank_type = ''; 7669 $vert_last_nonblank_block_type = ''; 7670 7671 # look at each token in this output line.. 7672 foreach $i ( $ibeg .. $iend ) { 7673 $alignment_type = ''; 7674 $type = $types_to_go[$i]; 7675 $block_type = $block_type_to_go[$i]; 7676 $token = $tokens_to_go[$i]; 7677 7678 #-------------------------------------------------------- 7679 # First see if we want to align BEFORE this token 7680 #-------------------------------------------------------- 7681 7682 # The first possible token that we can alignment_type before 7683 # is index 2 because: 1) it doesn't normally make sense to 7684 # alignment_type before the first token and 2) the second 7685 # token must be a blank if we are to alignment_type before 7686 # the third 7687 if ( $i < $ibeg + 2 ) { 7688 } 7689 7690 # TESTING : this causes too many bad side effects 7691 #elsif ( $type =~ /^(\[|L)$/ ) { 7692 # $alignment_type = $type; 7693 #} 7694 7695 # must follow a blank token 7696 elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { 7697 } 7698 7699 # align a side comment -- 7700 elsif ( $type eq '#' ) { 7701 7702 unless ( 7703 7704 # it is a static side comment 7705 ( 7706 $rOpts->{'static-side-comments'} 7707 && $token =~ /$static_side_comment_pattern/o 7708 ) 7709 7710 # or a closing side comment 7711 || ( $vert_last_nonblank_block_type 7712 && $token =~ /$closing_side_comment_prefix_pattern/o ) 7713 ) 7714 { 7715 $alignment_type = $type; 7716 } ## Example of a static side comment 7717 } 7718 7719 # otherwise, do not alignment_type two in a row to create a 7720 # blank field 7721 elsif ( $last_vertical_alignment_before_index == $i - 2 ) { 7722 } 7723 7724 # alignment_type before one of these keywords 7725 # (within a line, since $i>1) 7726 elsif ( $type eq 'k' ) { 7727 if ( $token =~ /^(if|unless|and|or|eq|ne)$/ ) { 7728 $alignment_type = $token; 7729 } 7730 } 7731 7732 # We have to be very careful about alignment before opening parens. 7733 # It is ok to line up sequences like this: 7734 # if ( $something eq "simple" ) { &handle_simple } 7735 # elsif ( $something eq "hard" ) { &handle_hard } 7736 elsif ( $type eq '(' ) { 7737 if ( ( $i == $ibeg + 2 ) 7738 && $tokens_to_go[$ibeg] =~ /^(if|elsif)/ ) 7739 { 7740 $alignment_type = $type; 7741 } 7742 } 7743 7744 # alignment_type before one of these types.. 7745 # Note: add '.' after new vertical aligner is operational 7746 elsif ( $is_vertical_alignment_type{$type} ) { 7747 $alignment_type = $token; 7748 7749 # be sure the alignment tokens are unique 7750 # This didn't work well: reason not determined 7751 # if ($token ne $type) {$alignment_type .= $type} 7752 } 7753 7754 # NOTE: This is deactivated until the new vertical aligner 7755 # is finished because it causes the previous if/elsif alignment 7756 # to fail 7757 #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i]) { 7758 # $alignment_type = $type; 7759 #} 7760 7761 if ($alignment_type) { 7762 $last_vertical_alignment_before_index = $i; 7763 } 7764 7765 #-------------------------------------------------------- 7766 # Next see if we want to align AFTER the previous nonblank 7767 #-------------------------------------------------------- 7768 7769 # We want to line up ',' and interior ';' tokens, with the added 7770 # space AFTER these tokens. (Note: interior ';' is included 7771 # because it may occur in short blocks). 7772 if ( 7773 7774 # we haven't already set it 7775 !$alignment_type 7776 7777 # and its not the first token of the line 7778 && ( $i > $ibeg ) 7779 7780 # and it follows a blank 7781 && $types_to_go[ $i - 1 ] eq 'b' 7782 7783 # and previous token IS one of these: 7784 && ( $vert_last_nonblank_type =~ /^[\,\;]$/ ) 7785 7786 # and it's NOT one of these 7787 && ( $type !~ /^[b\#\)\]\}]$/ ) 7788 7789 # then go ahead and align 7790 ) 7791 7792 { 7793 $alignment_type = $vert_last_nonblank_type; 7794 } 7795 7796 #-------------------------------------------------------- 7797 # then store the value 7798 #-------------------------------------------------------- 7799 $matching_token_to_go[$i] = $alignment_type; 7800 if ( $type ne 'b' ) { 7801 $vert_last_nonblank_type = $type; 7802 $vert_last_nonblank_block_type = $block_type; 7803 } 7804 } 7805 } 7806} 7807 7808sub terminal_type { 7809 7810 # returns type of last token on this line (terminal token), as follows: 7811 # returns # for a full-line comment 7812 # returns ' ' for a blank line 7813 # otherwise returns final token type 7814 7815 my ( $rtype, $rblock_type, $ibeg, $iend ) = @_; 7816 7817 # check for full-line comment.. 7818 if ( $$rtype[$ibeg] eq '#' ) { 7819 return wantarray ? ( $$rtype[$ibeg], $ibeg ) : $$rtype[$ibeg]; 7820 } 7821 else { 7822 7823 # start at end and walk bakwards.. 7824 for ( my $i = $iend ; $i >= $ibeg ; $i-- ) { 7825 7826 # skip past any side comment and blanks 7827 next if ( $$rtype[$i] eq 'b' ); 7828 next if ( $$rtype[$i] eq '#' ); 7829 7830 # found it..make sure it is a BLOCK termination, 7831 # but hide a terminal } after sort/grep/map because it is not 7832 # necessarily the end of the line. (terminal.t) 7833 my $terminal_type = $$rtype[$i]; 7834 if ( 7835 $terminal_type eq '}' 7836 && ( !$$rblock_type[$i] 7837 || ( $$rblock_type[$i] =~ /^(sort|grep|map|do|eval)$/ ) ) 7838 ) 7839 { 7840 $terminal_type = 'b'; 7841 } 7842 return wantarray ? ( $terminal_type, $i ) : $terminal_type; 7843 } 7844 7845 # empty line 7846 return wantarray ? ( ' ', $ibeg ) : ' '; 7847 } 7848} 7849 7850sub set_bond_strengths { 7851 7852 BEGIN { 7853 7854 ############################################################### 7855 # NOTE: NO_BREAK's set here are HINTS which may not be honored; 7856 # essential NO_BREAKS's must be enforced in section 2, below. 7857 ############################################################### 7858 7859 # adding NEW_TOKENS: add a left and right bond strength by 7860 # mimmicking what is done for an existing token type. You 7861 # can skip this step at first and take the default, then 7862 # tweak later to get desired results. 7863 7864 # The bond strengths should roughly follow precenence order where 7865 # possible. If you make changes, please check the results very 7866 # carefully on a variety of scripts. 7867 7868 # no break around possible filehandle 7869 $left_bond_strength{'Z'} = NO_BREAK; 7870 $right_bond_strength{'Z'} = NO_BREAK; 7871 7872 # never put a bare word on a new line: 7873 # example print (STDERR, "bla"); will fail with break after ( 7874 $left_bond_strength{'w'} = NO_BREAK; 7875 7876 # blanks always have infinite strength to force breaks after real tokens 7877 $right_bond_strength{'b'} = NO_BREAK; 7878 7879 # try not to break on exponentation 7880 @_ = qw" ** .. ... <=> "; 7881 @left_bond_strength{@_} = (STRONG) x scalar(@_); 7882 @right_bond_strength{@_} = (STRONG) x scalar(@_); 7883 7884 # The comma-arrow has very low precedence but not a good break point 7885 $left_bond_strength{'=>'} = NO_BREAK; 7886 $right_bond_strength{'=>'} = NOMINAL; 7887 7888 # ok to break after label 7889 $left_bond_strength{'J'} = NO_BREAK; 7890 $right_bond_strength{'J'} = NOMINAL; 7891 $left_bond_strength{'j'} = STRONG; 7892 $right_bond_strength{'j'} = STRONG; 7893 7894 $left_bond_strength{'->'} = STRONG; 7895 $right_bond_strength{'->'} = VERY_STRONG; 7896 7897 # breaking AFTER these is just ok: 7898 @_ = qw" % + - * / x "; 7899 @left_bond_strength{@_} = (STRONG) x scalar(@_); 7900 @right_bond_strength{@_} = (NOMINAL) x scalar(@_); 7901 7902 # breaking BEFORE these is just ok: 7903 @_ = qw" >> << "; 7904 @right_bond_strength{@_} = (STRONG) x scalar(@_); 7905 @left_bond_strength{@_} = (NOMINAL) x scalar(@_); 7906 7907 # I prefer breaking before the string concatenation operator 7908 # because it can be hard to see at the end of a line 7909 # swap these to break after a '.' 7910 # this could be a future option 7911 $right_bond_strength{'.'} = STRONG; 7912 $left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK; 7913 7914 @_ = qw"} ] ) "; 7915 @left_bond_strength{@_} = (STRONG) x scalar(@_); 7916 @right_bond_strength{@_} = (NOMINAL) x scalar(@_); 7917 7918 # make these a little weaker than nominal so that they get 7919 # favored for end-of-line characters 7920 @_ = qw"!= == =~ !~"; 7921 @left_bond_strength{@_} = (STRONG) x scalar(@_); 7922 @right_bond_strength{@_} = ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@_); 7923 7924 # break AFTER these 7925 @_ = qw" < > | & >= <="; 7926 @left_bond_strength{@_} = (VERY_STRONG) x scalar(@_); 7927 @right_bond_strength{@_} = ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@_); 7928 7929 # breaking either before or after a quote is ok 7930 # but bias for breaking before a quote 7931 $left_bond_strength{'Q'} = NOMINAL; 7932 $right_bond_strength{'Q'} = NOMINAL + 0.02; 7933 $left_bond_strength{'q'} = NOMINAL; 7934 $right_bond_strength{'q'} = NOMINAL; 7935 7936 # starting a line with a keyword is usually ok 7937 $left_bond_strength{'k'} = NOMINAL; 7938 7939 # we usually want to bond a keyword strongly to what immediately 7940 # follows, rather than leaving it stranded at the end of a line 7941 $right_bond_strength{'k'} = STRONG; 7942 7943 $left_bond_strength{'G'} = NOMINAL; 7944 $right_bond_strength{'G'} = STRONG; 7945 7946 # it is very good to break AFTER various assignment operators 7947 @_ = qw( 7948 = **= += *= &= <<= &&= 7949 -= /= |= >>= ||= 7950 .= %= ^= 7951 x= 7952 ); 7953 @left_bond_strength{@_} = (STRONG) x scalar(@_); 7954 @right_bond_strength{@_} = 7955 ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@_); 7956 7957 # break BEFORE '&&' and '||' 7958 # set strength of '||' to same as '=' so that chains like 7959 # $a = $b || $c || $d will break before the first '||' 7960 $right_bond_strength{'||'} = NOMINAL; 7961 $left_bond_strength{'||'} = $right_bond_strength{'='}; 7962 7963 # set strength of && a little higher than || 7964 $right_bond_strength{'&&'} = NOMINAL; 7965 $left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1; 7966 7967 $left_bond_strength{';'} = VERY_STRONG; 7968 $right_bond_strength{';'} = VERY_WEAK; 7969 $left_bond_strength{'f'} = VERY_STRONG; 7970 7971 # make right strength of for ';' a little less than '=' 7972 # to make for contents break after the ';' to avoid this: 7973 # for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j += 7974 # $number_of_fields ) 7975 # and make it weaker than ',' too 7976 $right_bond_strength{'f'} = VERY_WEAK - 0.001; 7977 7978 # The strengths of ?/: should be somewhere between 7979 # an '=' and a quote (NOMINAL), 7980 # make strength of ':' slightly less than '?' to help 7981 # break long chains of ? : after the colons 7982 $left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL; 7983 $right_bond_strength{':'} = NO_BREAK; 7984 $left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01; 7985 $right_bond_strength{'?'} = NO_BREAK; 7986 7987 $left_bond_strength{','} = VERY_STRONG; 7988 $right_bond_strength{','} = VERY_WEAK; 7989 } 7990 7991 # patch-its always ok to break at end of line 7992 $nobreak_to_go[$max_index_to_go] = 0; 7993 7994 # adding a small 'bias' to strengths is a simple way to make a line 7995 # break at the first of a sequence of identical terms. For example, 7996 # to force long string of conditional operators to break with 7997 # each line ending in a ':', we can add a small number to the bond 7998 # strength of each ':' 7999 my $colon_bias = 0; 8000 my $amp_bias = 0; 8001 my $bar_bias = 0; 8002 my $and_bias = 0; 8003 my $or_bias = 0; 8004 my $dot_bias = 0; 8005 my $f_bias = 0; 8006 my $code_bias = -.01; 8007 my $type = 'b'; 8008 my $token = ' '; 8009 my $last_type; 8010 my $last_nonblank_type = $type; 8011 my $last_nonblank_token = $token; 8012 my $delta_bias = 0.0001; 8013 8014 my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token, 8015 $next_nonblank_type, $next_token, $next_type, $total_nesting_depth, ); 8016 8017 # preliminary loop to compute bond strengths 8018 for ( my $i = 0 ; $i <= $max_index_to_go ; $i++ ) { 8019 $last_type = $type; 8020 if ( $type ne 'b' ) { 8021 $last_nonblank_type = $type; 8022 $last_nonblank_token = $token; 8023 } 8024 $type = $types_to_go[$i]; 8025 8026 # strength on both sides of a blank is the same 8027 if ( $type eq 'b' && $last_type ne 'b' ) { 8028 $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ]; 8029 next; 8030 } 8031 8032 $token = $tokens_to_go[$i]; 8033 $block_type = $block_type_to_go[$i]; 8034 $i_next = $i + 1; 8035 $next_type = $types_to_go[$i_next]; 8036 $next_token = $tokens_to_go[$i_next]; 8037 $total_nesting_depth = $nesting_depth_to_go[$i_next]; 8038 $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 ); 8039 $next_nonblank_type = $types_to_go[$i_next_nonblank]; 8040 $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; 8041 8042 # Some token chemistry... The decision about where to break a 8043 # line depends upon a "bond strength" between tokens. The LOWER 8044 # the bond strength, the MORE likely a break. The strength 8045 # values are based on trial-and-error, and need to be tweaked 8046 # occasionally to get desired results. Things to keep in mind 8047 # are: 8048 # 1. relative strengths are important. small differences 8049 # in strengths can make big formatting differences. 8050 # 2. each indentation level adds one unit of bond strength 8051 # 3. a value of NO_BREAK makes an unbreakable bond 8052 # 4. a value of VERY_WEAK is the strength of a ',' 8053 # 5. values below NOMINAL are considered ok break points 8054 # 6. values above NOMINAL are considered poor break points 8055 # We are computing the strength of the bond between the current 8056 # token and the NEXT token. 8057 my $bond_str = VERY_STRONG; # a default, high strength 8058 8059 #--------------------------------------------------------------- 8060 # section 1: 8061 # use minimum of left and right bond strengths if defined; 8062 # digraphs and trigraphs like to break on their left 8063 #--------------------------------------------------------------- 8064 my $bsr = $right_bond_strength{$type}; 8065 8066 if ( !defined($bsr) ) { 8067 8068 if ( $is_digraph{$type} || $is_trigraph{$type} ) { 8069 $bsr = STRONG; 8070 } 8071 else { 8072 $bsr = VERY_STRONG; 8073 } 8074 } 8075 8076 if ( $token eq 'and' or $token eq 'or' ) { 8077 $bsr = NOMINAL; 8078 } 8079 elsif ( $token eq 'ne' or $token eq 'eq' ) { 8080 $bsr = NOMINAL; 8081 } 8082 my $bsl = $left_bond_strength{$next_nonblank_type}; 8083 8084 # set terminal bond strength to the nominal value 8085 # this will cause good preceding breaks to be retained 8086 if ( $i_next_nonblank > $max_index_to_go ) { 8087 $bsl = NOMINAL; 8088 } 8089 8090 if ( !defined($bsl) ) { 8091 8092 if ( $is_digraph{$next_nonblank_type} 8093 || $is_trigraph{$next_nonblank_type} ) 8094 { 8095 $bsl = WEAK; 8096 } 8097 else { 8098 $bsl = VERY_STRONG; 8099 } 8100 } 8101 8102 # make or, and slightly weaker than a ',' 8103 if ( $next_nonblank_token eq 'or' ) { 8104 $bsl = VERY_WEAK - 0.02; 8105 } 8106 if ( $next_nonblank_token eq 'and' ) { 8107 $bsl = VERY_WEAK - 0.01; 8108 } 8109 elsif ( $next_nonblank_token eq 'ne' or $next_nonblank_token eq 'eq' ) { 8110 $bsl = NOMINAL; 8111 } 8112 elsif ( $next_nonblank_token =~ /^(lt|gt|le|ge)$/ ) { 8113 $bsl = 0.9 * NOMINAL + 0.1 * STRONG; 8114 } 8115 8116 # Note: it might seem that we would want to keep a NO_BREAK if 8117 # either token has this value. This didn't work, because in an 8118 # arrow list, it prevents the comma from separating from the 8119 # following bare word (which is probably quoted by its arrow). 8120 # So necessary NO_BREAK's have to be handled as special cases 8121 # in the final section. 8122 $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl; 8123 my $bond_str_1 = $bond_str; 8124 8125 #--------------------------------------------------------------- 8126 # section 2: 8127 # special cases 8128 #--------------------------------------------------------------- 8129 8130 # allow long lines before final { in an if statement, as in: 8131 # if (.......... 8132 # ..........) 8133 # { 8134 # 8135 # Otherwise, the line before the { tends to be too short. 8136 if ( $type eq ')' ) { 8137 if ( $next_nonblank_type eq '{' ) { 8138 $bond_str = VERY_WEAK + 0.03; 8139 } 8140 } 8141 8142 elsif ( $type eq '(' ) { 8143 if ( $next_nonblank_type eq '{' ) { 8144 $bond_str = NOMINAL; 8145 } 8146 } 8147 8148 # break on something like '} (', but keep this stronger than a ',' 8149 # example is in 'howe.pl' 8150 elsif ( $type eq 'R' or $type eq '}' ) { 8151 if ( $next_nonblank_type eq '(' ) { 8152 $bond_str = 0.8 * VERY_WEAK + 0.2 * WEAK; 8153 } 8154 } 8155 8156 #----------------------------------------------------------------- 8157 # adjust bond strength bias 8158 #----------------------------------------------------------------- 8159 8160 elsif ( $type eq 'f' ) { 8161 $bond_str += $f_bias; 8162 $f_bias += $delta_bias; 8163 } 8164 8165 # in long ?: conditionals, bias toward just one set per line (colon.t) 8166 elsif ( $type eq ':' ) { 8167 if ( !$want_break_before{$type} ) { 8168 $bond_str += $colon_bias; 8169 $colon_bias += $delta_bias; 8170 } 8171 } 8172 8173 if ( $next_nonblank_type eq ':' 8174 && $want_break_before{$next_nonblank_type} ) 8175 { 8176 $bond_str += $colon_bias; 8177 $colon_bias += $delta_bias; 8178 } 8179 8180 # if leading '.' is used, align all but 'short' quotes; 8181 # the idea is to not place something like "\n" on a single line. 8182 elsif ( $next_nonblank_type eq '.' ) { 8183 if ( $want_break_before{'.'} ) { 8184 unless ( 8185 $last_nonblank_type eq '.' 8186 && ( 8187 length($token) <= 8188 $rOpts->{'short-concatenation-item-length'} ) 8189 && ( $token !~ /^[\)\]\}]$/ ) 8190 ) 8191 { 8192 $dot_bias += $delta_bias; 8193 } 8194 $bond_str += $dot_bias; 8195 } 8196 } 8197 elsif ( $next_nonblank_type eq '&&' ) { 8198 $bond_str += $amp_bias; 8199 $amp_bias += $delta_bias; 8200 } 8201 elsif ( $next_nonblank_type eq '||' ) { 8202 $bond_str += $bar_bias; 8203 $bar_bias += $delta_bias; 8204 } 8205 elsif ( $next_nonblank_type eq 'k' ) { 8206 8207 if ( $next_nonblank_token eq 'and' ) { 8208 $bond_str += $and_bias; 8209 $and_bias += $delta_bias; 8210 } 8211 elsif ( $next_nonblank_token eq 'or' ) { 8212 $bond_str += $or_bias; 8213 $or_bias += $delta_bias; 8214 } 8215 } 8216 8217 # keep matrix and hash indices together 8218 # but make them a little below STRONG to allow breaking open 8219 # something like {'some-word'}{'some-very-long-word'} at the }{ 8220 # (bracebrk.t) 8221 if ( ( $type eq ']' or $type eq 'R' ) 8222 && ( $next_nonblank_type eq '[' or $next_nonblank_type eq 'L' ) ) 8223 { 8224 $bond_str = 0.9 * STRONG + 0.1 * NOMINAL; 8225 } 8226 8227 # increase strength to the point where a break in the following 8228 # will be after the opening paren rather than at the arrow: 8229 # $a->$b($c); 8230 if ( ( $type eq 'i' ) 8231 && ( $next_nonblank_type eq 'i' ) 8232 && ( $next_nonblank_token =~ /^->/ ) ) 8233 { 8234 $bond_str = 1.45 * STRONG; 8235 } 8236 8237 # map1.t -- a quirk in perl 8238 if ( $token eq '(' 8239 && $next_nonblank_type eq 'i' 8240 && $last_nonblank_type eq 'k' 8241 && $last_nonblank_token =~ /^(sort|map|grep)$/ ) 8242 { 8243 $bond_str = NO_BREAK; 8244 } 8245 8246 # extrude.t: do not break before paren at: 8247 # -l pid_filename( 8248 if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) { 8249 $bond_str = NO_BREAK; 8250 } 8251 8252 # good to break after end of code blocks 8253 if ( $type eq '}' && $block_type ) { 8254 8255 $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias; 8256 $code_bias += $delta_bias; 8257 } 8258 8259 if ( $type eq 'k' ) { 8260 8261 # allow certain control keywords to stand out 8262 if ( ( $next_nonblank_type eq 'k' ) 8263 && ( $token =~ /^(last|next|redo|return)$/ ) ) 8264 { 8265 $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK; 8266 } 8267 8268 # Don't break after keyword my. This is a quick fix for a 8269 # rare problem with perl. An example is this line from file 8270 # Container.pm: 8271 # foreach my $question( Debian::DebConf::ConfigDb::gettree( $this->{'question'} ) ) 8272 8273 if ( $token eq 'my' ) { 8274 $bond_str = NO_BREAK; 8275 } 8276 8277 } 8278 8279 # good to break before 'if', 'unless', etc 8280 if ( $if_brace_follower_pattern 8281 && ( $next_nonblank_token =~ /$if_brace_follower_pattern/ ) ) 8282 { 8283 $bond_str = VERY_WEAK; 8284 } 8285 8286 if ( $next_nonblank_type eq 'k' ) { 8287 8288 # keywords like 'unless' 'if' make good breaks 8289 if ( $do_follower_pattern 8290 && $next_nonblank_token =~ /$do_follower_pattern/ ) 8291 { 8292 $bond_str = VERY_WEAK / 1.05; 8293 } 8294 8295 } 8296 8297 # try not to break before a comma-arrow 8298 elsif ( $next_nonblank_type eq '=>' ) { 8299 if ( $bond_str < STRONG ) { $bond_str = STRONG } 8300 } 8301 8302 if ( $type eq 'C' or $type eq 'U' ) { 8303 8304 # use strict requires that bare word and => not be separated 8305 if ( $next_nonblank_type eq '=>' ) { 8306 $bond_str = NO_BREAK; 8307 } 8308 8309 } 8310 8311 # use strict requires that bare word within braces not start new line 8312 elsif ( $type eq 'L' ) { 8313 8314 if ( $next_nonblank_type eq 'w' ) { 8315 $bond_str = NO_BREAK; 8316 } 8317 } 8318 8319 elsif ( $type eq 'w' ) { 8320 8321 if ( $next_nonblank_type eq 'R' ) { 8322 $bond_str = NO_BREAK; 8323 } 8324 8325 # use strict requires that bare word and => not be separated 8326 if ( $next_nonblank_type eq '=>' ) { 8327 $bond_str = NO_BREAK; 8328 } 8329 } 8330 8331 # in fact, use strict hates bare words on any new line. For example, 8332 # a break before the underscore here provokes the wrath of use strict: 8333 # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) { 8334 elsif ( $type eq 'F' ) { 8335 $bond_str = NO_BREAK; 8336 } 8337 8338 # use strict does not allow separating type info from trailing { } 8339 # testfile is readmail.pl 8340 elsif ( $type eq 't' or $type eq 'i' ) { 8341 8342 if ( $next_nonblank_type eq 'L' ) { 8343 $bond_str = NO_BREAK; 8344 } 8345 } 8346 8347 # Do not break between a possible filehandle and a ? or / 8348 # and do not introduce a break after it if there is no blank (extrude.t) 8349 elsif ( $type eq 'Z' ) { 8350 8351 # dont break.. 8352 if ( 8353 8354 # if there is no blank and we do not want one. Examples: 8355 # print $x++ # do not break after $x 8356 # print HTML"HELLO" # break ok after HTML 8357 ( 8358 $next_type ne 'b' 8359 && defined( $want_left_space{$next_type} ) 8360 && $want_left_space{$next_type} == WS_NO 8361 ) 8362 8363 # or we might be followed by the start of a quote 8364 || $next_nonblank_type =~ /^[\/\?]$/ 8365 ) 8366 { 8367 $bond_str = NO_BREAK; 8368 } 8369 } 8370 8371 # Do not break before a possible file handle 8372 #if ( ( $type eq 'Z' ) || ( $next_nonblank_type eq 'Z' ) ) { 8373 if ( $next_nonblank_type eq 'Z' ) { 8374 $bond_str = NO_BREAK; 8375 } 8376 8377 # patch to put cuddled elses back together when on multiple 8378 # lines, as in: } \n else \n { \n 8379 if ($rOpts_cuddled_else) { 8380 8381 if ( ( $token eq 'else' ) && ( $next_nonblank_type eq '{' ) 8382 || ( $type eq '}' ) && ( $next_nonblank_token eq 'else' ) ) 8383 { 8384 $bond_str = NO_BREAK; 8385 } 8386 } 8387 8388 # keep '}' together with ';' 8389 if ( ( $token eq '}' ) && ( $next_nonblank_type eq ';' ) ) { 8390 $bond_str = NO_BREAK; 8391 } 8392 8393 # never break between sub name and opening paren 8394 if ( ( $type eq 'w' ) && ( $next_nonblank_token eq '(' ) ) { 8395 $bond_str = NO_BREAK; 8396 } 8397 8398 #--------------------------------------------------------------- 8399 # section 3: 8400 # now take nesting depth into account 8401 #--------------------------------------------------------------- 8402 # final strength incorporates the bond strength and nesting depth 8403 my $strength; 8404 8405 if ( defined($bond_str) && !$nobreak_to_go[$i] ) { 8406 if ( $total_nesting_depth > 0 ) { 8407 $strength = $bond_str + $total_nesting_depth; 8408 } 8409 else { 8410 $strength = $bond_str; 8411 } 8412 } 8413 else { 8414 $strength = NO_BREAK; 8415 } 8416 8417 # always break after side comment 8418 if ( $type eq '#' ) { $strength = 0 } 8419 8420 $bond_strength_to_go[$i] = $strength; 8421 8422 FORMATTER_DEBUG_FLAG_BOND && do { 8423 my $str = substr( $token, 0, 15 ); 8424 $str .= ' ' x ( 16 - length($str) ); 8425 print 8426"BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str -> $strength \n"; 8427 }; 8428 } 8429} 8430 8431sub pad_array_to_go { 8432 8433 # to simplify coding in scan_list and set_bond_strengths, it helps 8434 # to create some extra blank tokens at the end of the arrays 8435 $tokens_to_go[ $max_index_to_go + 1 ] = ''; 8436 $tokens_to_go[ $max_index_to_go + 2 ] = ''; 8437 $types_to_go[ $max_index_to_go + 1 ] = 'b'; 8438 $types_to_go[ $max_index_to_go + 2 ] = 'b'; 8439 $nesting_depth_to_go[ $max_index_to_go + 1 ] = 8440 $nesting_depth_to_go[$max_index_to_go]; 8441 8442 if ( $types_to_go[$max_index_to_go] =~ /^[R\}\)\]]$/ ) { 8443 if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) { 8444 8445 # shouldn't happen: 8446 unless ( get_saw_brace_error() ) { 8447 warning( 8448"Program bug in scan_list: hit nesting error which should have been caught\n" 8449 ); 8450 report_definite_bug(); 8451 } 8452 } 8453 else { 8454 $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1; 8455 } 8456 } 8457 elsif ( $types_to_go[$max_index_to_go] =~ /^[L\{\(\[]$/ ) { 8458 $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1; 8459 } 8460} 8461 8462{ # begin closure scan_list 8463 8464 my ( 8465 $block_type, $current_depth, 8466 $depth, $good_old_breakpoint, 8467 $i, $i_last_nonblank_token, 8468 $last_colon_sequence_number, $last_nonblank_token, 8469 $last_nonblank_type, $last_old_breakpoint_count, 8470 $minimum_depth, $next_nonblank_block_type, 8471 $next_nonblank_token, $next_nonblank_type, 8472 $old_breakpoint_count, $starting_breakpoint_count, 8473 $starting_depth, $token, 8474 $type, $type_sequence, 8475 ); 8476 8477 my ( 8478 @breakpoint_stack, @breakpoint_undo_stack, 8479 @comma_index, @container_type, 8480 @identifier_count_stack, @index_before_arrow, 8481 @interrupted_list, @item_count_stack, 8482 @last_comma_index, @last_equals_index, 8483 @last_nonblank_type, @longest_term, 8484 @max_length, @maximum_nesting_depth, 8485 @old_breakpoint_count_stack, @opening_structure_index_stack, 8486 @rand_or_list, 8487 ); 8488 8489 # routine to define essential variables when we go 'up' to 8490 # a new depth 8491 sub check_for_new_minimum_depth { 8492 my $depth = shift; 8493 if ( $depth < $minimum_depth ) { 8494 8495 $minimum_depth = $depth; 8496 8497 # these arrays need not retain values between calls 8498 $breakpoint_stack[$depth] = $starting_breakpoint_count; 8499 $container_type[$depth] = ""; 8500 $identifier_count_stack[$depth] = 0; 8501 $index_before_arrow[$depth] = -1; 8502 $interrupted_list[$depth] = 1; 8503 $item_count_stack[$depth] = 0; 8504 $last_nonblank_type[$depth] = ""; 8505 $longest_term[$depth] = 0; 8506 $maximum_nesting_depth[$depth] = $depth; 8507 $opening_structure_index_stack[$depth] = -1; 8508 8509 # these arrays must retain values between calls 8510 if ( !defined( $has_broken_sublist[$depth] ) ) { 8511 $dont_align[$depth] = 0; 8512 $has_broken_sublist[$depth] = 0; 8513 $want_comma_break[$depth] = 0; 8514 } 8515 } 8516 } 8517 8518 # routine to decide which commas to break at within a container; 8519 # returns: 8520 # $bp_count = number of comma breakpoints set 8521 # $do_not_break_apart = a flag indicating if container need not 8522 # be broken open 8523 sub set_comma_breakpoints { 8524 8525 my $dd = shift; 8526 my $bp_count = 0; 8527 my $do_not_break_apart = 0; 8528 if ( $item_count_stack[$dd] && !$dont_align[$dd] ) { 8529 8530 my $fbc = $forced_breakpoint_count; 8531 set_comma_breakpoints_do( 8532 $dd, 8533 $opening_structure_index_stack[$dd], 8534 $i, 8535 $item_count_stack[$dd], 8536 $identifier_count_stack[$dd], 8537 $comma_index[$dd], 8538 $max_length[$dd], 8539 $next_nonblank_type, 8540 $container_type[$dd], 8541 $interrupted_list[$dd], 8542 $maximum_nesting_depth[$dd], 8543 \$do_not_break_apart, 8544 ); 8545 $bp_count = $forced_breakpoint_count - $fbc; 8546 8547 # always open comma lists not preceded by keywords, 8548 # barewords, identifiers (that is, anything that doesn't 8549 # look like a function call) 8550 if ($do_not_break_apart) { 8551 $do_not_break_apart = 0 8552 if ( $last_nonblank_type[$dd] !~ /[kwiU]/ ); 8553 } 8554 } 8555 return ( $bp_count, $do_not_break_apart ); 8556 }; 8557 8558 my %is_logical_container; 8559 8560 BEGIN { 8561 @_ = qw# if elsif unless while and or not && | || ? : ! #; 8562 @is_logical_container{@_} = (1) x scalar(@_); 8563 } 8564 8565 sub set_logical_breakpoints { 8566 my $dd = shift; 8567 my $i_opening = $opening_structure_index_stack[$dd]; 8568 if ( defined( $rand_or_list[$dd] ) 8569 && $item_count_stack[$dd] == 0 8570 && $is_logical_container{ $container_type[$dd] } ) 8571 { 8572 while ( my $j = pop ( @{ $rand_or_list[$dd] } ) ) { 8573 set_forced_breakpoint($j); 8574 } 8575 } 8576 } 8577 8578 sub update_longest_term { 8579 my $ii = shift; 8580 my $depth = shift; 8581 my $i_comma = $last_comma_index[$depth]; 8582 if ( defined($i_comma) ) { 8583 my $length = token_sequence_length( $i_comma + 1, $ii ); 8584 if ( $length > $longest_term[$depth] ) { 8585 $longest_term[$depth] = $length; 8586 } 8587 } 8588 else { 8589 my $i_opening = $opening_structure_index_stack[$depth]; 8590 if ( $i_opening >= 0 ) { 8591 my $length = token_sequence_length( $i_opening + 1, $ii ); 8592 $longest_term[$depth] = $length; 8593 } 8594 } 8595 } 8596 8597 sub is_unbreakable_container { 8598 8599 # never break one of these types (map1.t) 8600 my $dd = shift; 8601 $container_type[$dd] =~ /^(sort|map|grep)$/; 8602 } 8603 8604 sub scan_list { 8605 8606 # This routine is responsible for setting line breaks for all lists, 8607 # so that hierarchical structure can be displayed and so that list 8608 # items can be vertically aligned. The output of this routine is 8609 # stored in the array @forced_breakpoint_to_go, which is used to set 8610 # final breakpoints. 8611 8612 $starting_depth = $nesting_depth_to_go[0]; 8613 8614 $block_type = ' '; 8615 $current_depth = $starting_depth; 8616 $good_old_breakpoint = 0; 8617 $i = -1; 8618 $last_colon_sequence_number = -1; 8619 $last_nonblank_token = ';'; 8620 $last_nonblank_type = ';'; 8621 $last_old_breakpoint_count = 0; 8622 $minimum_depth = $current_depth + 1; # forces update in check below 8623 $old_breakpoint_count = 0; 8624 $starting_breakpoint_count = $forced_breakpoint_count; 8625 $token = ';'; 8626 $type = ';'; 8627 $type_sequence = ''; 8628 8629 @breakpoint_stack = (); 8630 @breakpoint_undo_stack = (); 8631 @comma_index = (); 8632 @container_type = (); 8633 @identifier_count_stack = (); 8634 @index_before_arrow = (); 8635 @interrupted_list = (); 8636 @item_count_stack = (); 8637 @last_comma_index = (); 8638 @last_equals_index = (); 8639 @last_nonblank_type = (); 8640 @longest_term = (); 8641 @max_length = (); 8642 @maximum_nesting_depth = (); 8643 @old_breakpoint_count_stack = (); 8644 @opening_structure_index_stack = (); 8645 @rand_or_list = (); 8646 8647 check_for_new_minimum_depth($current_depth); 8648 8649 my $is_long_line = excess_line_length( 0, $max_index_to_go ) > 0; 8650 my $want_previous_breakpoint = 0; 8651 8652 # loop over all tokens in this batch 8653 while ( ++$i <= $max_index_to_go ) { 8654 if ( $type ne 'b' ) { 8655 $i_last_nonblank_token = $i - 1; 8656 $last_nonblank_type = $type; 8657 $last_nonblank_token = $token; 8658 } 8659 $type = $types_to_go[$i]; 8660 $block_type = $block_type_to_go[$i]; 8661 $token = $tokens_to_go[$i]; 8662 $type_sequence = $type_sequence_to_go[$i]; 8663 my $next_type = $types_to_go[ $i + 1 ]; 8664 my $next_token = $tokens_to_go[ $i + 1 ]; 8665 my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 ); 8666 $next_nonblank_type = $types_to_go[$i_next_nonblank]; 8667 $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; 8668 $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank]; 8669 8670 # set break if flag was set 8671 if ($want_previous_breakpoint) { 8672 set_forced_breakpoint( $i - 1 ); 8673 $want_previous_breakpoint = 0; 8674 } 8675 8676 $last_old_breakpoint_count = $old_breakpoint_count; 8677 if ( $old_breakpoint_to_go[$i] ) { 8678 $old_breakpoint_count++; 8679 8680 # if old line broke before certain key types, we take that 8681 # as a cue that the user wants to break there 8682 if ( 8683 ( 8684 $next_nonblank_type =~ /^(\&\&|\|\|)$/ 8685 && $want_break_before{$next_nonblank_type} 8686 ) 8687 || ( $next_nonblank_type eq 'k' 8688 && $next_nonblank_token =~ /^(if|unless|and|or)$/ ) 8689 ) 8690 { 8691 $good_old_breakpoint++; 8692 } 8693 8694 # Break before certain keywords if user broke there and 8695 # this is a 'safe' break point. The idea is to retain 8696 # any preferred breaks for sequential list operations, 8697 # like a schwartzian transform. 8698 if ( 8699 $next_nonblank_type eq 'k' 8700 && $next_nonblank_token =~ /^(sort|grep|map|eval)$/ 8701 && ( $type =~ /^[=\)\]\}Riw]$/ 8702 || $type eq 'k' && $token =~ /^(sort|grep|map|eval)$/ ) 8703 ) 8704 { 8705 8706 # we actually have to set this break next time through 8707 # the loop because if we are at a closing token (such 8708 # as '}') which forms a one-line block, this break might 8709 # get undone. 8710 $want_previous_breakpoint = 1; 8711 } 8712 } 8713 next if ( $type eq 'b' ); 8714 $depth = $nesting_depth_to_go[ $i + 1 ]; 8715 8716 # safety check - be sure we always break after a comment 8717 # Shouldn't happen .. an error here probably means that the 8718 # nobreak flag did not get turned off correctly during 8719 # formatting. 8720 if ( $type eq '#' ) { 8721 if ( $i != $max_index_to_go ) { 8722 warning( 8723"Non-fatal program bug: backup logic needed to break after a comment\n" 8724 ); 8725 report_definite_bug(); 8726 $nobreak_to_go[$i] = 0; 8727 set_forced_breakpoint($i); 8728 } 8729 } 8730 8731 # Force breakpoints at certain tokens in long lines. 8732 # Note that such breakpoints will be undone later if these tokens 8733 # are fully contained within parens on a line. 8734 if ( $is_long_line 8735 && ( ( $type eq 'k' && $token =~ /^(if|unless)$/ ) ) ) 8736 { 8737 set_forced_breakpoint( $i - 1 ) unless $i == 0; 8738 } 8739 8740 # remember locations of '||' and '&&' for possible breaks if we decide 8741 # this is a long logical expression. 8742 if ( $type eq '||' ) { push @{ $rand_or_list[$depth] }, $i } 8743 if ( $type eq '&&' ) { push @{ $rand_or_list[$depth] }, $i } 8744 if ( $type eq 'k' && $token eq 'and' ) { 8745 push @{ $rand_or_list[$depth] }, $i; 8746 } 8747 8748 # break immediately at 'or's which are probably not in a logical 8749 # block -- but we will break in logical breaks below so that 8750 # they do not add to the forced_breakpoint_count 8751 if ( $type eq 'k' && $token eq 'or' ) { 8752 if ( $is_logical_container{ $container_type[$depth] } ) { 8753 push @{ $rand_or_list[$depth] }, $i; 8754 } 8755 else { 8756 if ($is_long_line) { set_forced_breakpoint($i) } 8757 } 8758 } 8759 8760 if ($type_sequence) { 8761 8762 # handle any postponed closing breakpoints 8763 if ( $token =~ /^[\)\]\}\:]$/ ) { 8764 if ( $token eq ':' ) { 8765 $last_colon_sequence_number = $type_sequence; 8766 } 8767 if ( defined( $postponed_breakpoint{$type_sequence} ) ) { 8768 my $inc = ( $token eq ':' ) ? 0 : 1; 8769 set_forced_breakpoint( $i - $inc ); 8770 delete $postponed_breakpoint{$type_sequence}; 8771 } 8772 } 8773 8774 # set breaks at ?/: if they will get separated (and are not a ?/: 8775 # chain), or if the '?' is at the end of the line 8776 elsif ( $token eq '?' ) { 8777 my $i_colon = $mate_index_to_go[$i]; 8778 if ( 8779 $i_colon <= 0 # the ':' is not in this batch 8780 || $i == 0 # this '?' is the first token of the line 8781 || $i == 8782 $max_index_to_go # or this '?' is the last token 8783 ) 8784 { 8785 8786 # don't break at a '?' if preceded by ':' on this 8787 # line of previous ?/: pair on this line. This is 8788 # an attempt to preserve a chain of ?/: expressions 8789 # (elsif2.t). And don't break if this has a side comment. 8790 set_forced_breakpoint($i) 8791 unless ( 8792 $type_sequence == ( 8793 $last_colon_sequence_number + 8794 TYPE_SEQUENCE_INCREMENT 8795 ) 8796 || $tokens_to_go[$max_index_to_go] eq '#' 8797 ); 8798 set_closing_breakpoint($i); 8799 } 8800 } 8801 } 8802 8803 #print "LISTX sees: i=$i type=$type tok=$token block=$block_type depth=$depth\n"; 8804 8805 #------------------------------------------------------------ 8806 # Handle Increasing Depth.. 8807 # 8808 # prepare for a new list when depth increases 8809 # token $i is a '(','{', or '[' 8810 #------------------------------------------------------------ 8811 if ( $depth > $current_depth ) { 8812 8813 $breakpoint_stack[$depth] = $forced_breakpoint_count; 8814 $breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count; 8815 $has_broken_sublist[$depth] = 0; 8816 $identifier_count_stack[$depth] = 0; 8817 $index_before_arrow[$depth] = -1; 8818 $interrupted_list[$depth] = 0; 8819 $item_count_stack[$depth] = 0; 8820 $last_comma_index[$depth] = undef; 8821 $last_equals_index[$depth] = undef; 8822 $last_nonblank_type[$depth] = $last_nonblank_type; 8823 $longest_term[$depth] = 0; 8824 $maximum_nesting_depth[$depth] = $depth; 8825 $old_breakpoint_count_stack[$depth] = $old_breakpoint_count; 8826 $opening_structure_index_stack[$depth] = $i; 8827 $rand_or_list[$depth] = []; 8828 $want_comma_break[$depth] = 0; 8829 8830 # we want to remember keywords my, local, our 8831 $container_type[$depth] = 8832 ( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ ) 8833 ? $last_nonblank_token 8834 : ""; 8835 8836 # if line ends here then signal closing token to break 8837 if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' ) 8838 { 8839 set_closing_breakpoint($i); 8840 } 8841 8842 # Not all lists of values should be vertically aligned.. 8843 $dont_align[$depth] = 8844 8845 # code BLOCKS are handled at a higher level 8846 ( $block_type ne "" ) 8847 8848 # certain paren lists 8849 || ( $type eq '(' ) && ( 8850 8851 # it does not usually look good to align a list of 8852 # identifiers in a parameter list, as in: 8853 # my($var1, $var2, ...) 8854 # (This test should probably be refined, for now I'm just 8855 # testing for any keyword) 8856 ( $last_nonblank_type eq 'k' ) 8857 8858 # a trailing '(' usually indicates a non-list 8859 || ( $next_nonblank_type eq '(' ) 8860 ); 8861 8862 # patch to outdent opening brace of long if/for/.. 8863 # statements (like this one). See similar coding in 8864 # set_continuation breaks. We have also catch it here for 8865 # short line fragments which otherwise will not go through 8866 # set_continuation_breaks. 8867 if ( 8868 $block_type 8869 8870 # if we have the ')' but not its '(' in this batch.. 8871 && ( $last_nonblank_token eq ')' ) 8872 && $mate_index_to_go[$i_last_nonblank_token] < 0 8873 8874 # and user wants brace to left 8875 && !$rOpts->{'opening-brace-always-on-right'} 8876 8877 && ( $type eq '{' ) # should be true 8878 && ( $token eq '{' ) # should be true 8879 ) 8880 { 8881 set_forced_breakpoint( $i - 1 ); 8882 } 8883 } 8884 8885 #------------------------------------------------------------ 8886 # Handle Decreasing Depth.. 8887 # 8888 # finish off any old list when depth decreases 8889 # token $i is a ')','}', or ']' 8890 #------------------------------------------------------------ 8891 elsif ( $depth < $current_depth ) { 8892 8893 check_for_new_minimum_depth($depth); 8894 8895 # remember how deep we have been 8896 if ( $maximum_nesting_depth[$depth] < 8897 $maximum_nesting_depth[$current_depth] ) 8898 { 8899 $maximum_nesting_depth[$depth] = 8900 $maximum_nesting_depth[$current_depth]; 8901 } 8902 8903 # get final term length if necessary 8904 update_longest_term( $i - 1, $current_depth ); 8905 8906 # Patch to break between ') {' if the paren list is broken. 8907 # There is similar logic in set_continuation_breaks for 8908 # non-broken lists. 8909 if ( $token eq ')' 8910 && $next_nonblank_block_type 8911 && $interrupted_list[$current_depth] 8912 && $next_nonblank_type eq '{' 8913 && !$rOpts->{'opening-brace-always-on-right'} ) 8914 { 8915 set_forced_breakpoint($i); 8916 } 8917 8918 #print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n"; 8919 8920 # set breaks at commas if necessary 8921 my ( $bp_count, $do_not_break_apart ) = 8922 set_comma_breakpoints($current_depth); 8923 8924 my $i_opening = $opening_structure_index_stack[$current_depth]; 8925 my $saw_opening_structure = ( $i_opening >= 0 ); 8926 8927 # this term is long if we had to break at interior commas.. 8928 my $is_long_term = $bp_count > 0; 8929 8930 # ..or if the length between opening and closing parens exceeds 8931 # allowed line length 8932 if ( !$is_long_term && $saw_opening_structure ) { 8933 my $i_opening_minus = find_token_starting_list($i_opening); 8934 8935 # Note: we have to allow for one extra space after a 8936 # closing token so that we do not strand a comma or 8937 # semicolon, hence the '>=' here (oneline.t) 8938 $is_long_term = 8939 excess_line_length( $i_opening_minus, $i ) >= 0; 8940 } 8941 8942 # We've set breaks after all comma-arrows. Now we have to 8943 # undo them if this can be a one-line block 8944 # (the only breakpoints set will be due to comma-arrows) 8945 if ( 8946 !$rOpts->{'break-after-comma-arrows'} 8947 8948 # if the opening structure is in this batch 8949 && $saw_opening_structure 8950 8951 # and on the same old line 8952 && ( $old_breakpoint_count_stack[$current_depth] == 8953 $last_old_breakpoint_count ) 8954 8955 # and we made some breakpoints between the opening and closing 8956 && ( $breakpoint_undo_stack[$current_depth] < 8957 $forced_breakpoint_undo_count ) 8958 8959 # and this block is short enough to fit on one line 8960 # Note: use < because need 1 more space for possible comma 8961 && !$is_long_term 8962 8963 ) 8964 { 8965 undo_forced_breakpoint_stack( 8966 $breakpoint_undo_stack[$current_depth] ); 8967 } 8968 8969 # now see if we have any comma breakpoints left 8970 my $has_comma_breakpoints = 8971 ( $breakpoint_stack[$current_depth] != 8972 $forced_breakpoint_count ); 8973 8974 # update broken-sublist flag of the outer container 8975 $has_broken_sublist[$depth] = $has_broken_sublist[$depth] 8976 || $has_broken_sublist[$current_depth] 8977 || $is_long_term 8978 || $has_comma_breakpoints; 8979 8980=pod 8981 8982Having come to the closing ')', '}', or ']', now we have to decide if we 8983should 'open up' the structure by placing breaks at the opening and 8984closing containers. This is a tricky decision. Here are some of the 8985basic considerations: 8986 8987-If this is a BLOCK container, then any breakpoints will have already 8988been set (and according to user preferences), so we need do nothing here. 8989 8990-If we have a comma-separated list for which we can align the list items, 8991then we need to do so because otherwise the vertical aligner cannot 8992currently do the alignment. 8993 8994-If this container does itself contain a container which has been broken 8995open, then it should be broken open to properly show the structure. 8996 8997-If there is nothing to align, and no other reason to break apart, 8998then do not do it. 8999 9000We will not break open the parens of a long but 'simple' logical expression. 9001For example: 9002 9003This is an example of a simple logical expression and its formatting: 9004 9005 if ( $bigwasteofspace1 && $bigwasteofspace2 9006 || $bigwasteofspace3 && $bigwasteofspace4 ) 9007 9008Most people would prefer this than the 'spacey' version: 9009 9010 if ( 9011 $bigwasteofspace1 && $bigwasteofspace2 9012 || $bigwasteofspace3 && $bigwasteofspace4 9013 ) 9014 9015To illustrate the rules for breaking logical expressions, consider: 9016 9017 FULLY DENSE: 9018 if ( $opt_excl 9019 and ( exists $ids_excl_uc{$id_uc} 9020 or grep $id_uc =~ /$_/, @ids_excl_uc )) 9021 9022This is on the verge of being difficult to read. The current default is to 9023open it up like this: 9024 9025 DEFAULT: 9026 if ( 9027 $opt_excl 9028 and ( exists $ids_excl_uc{$id_uc} 9029 or grep $id_uc =~ /$_/, @ids_excl_uc ) 9030 ) 9031 9032This is a compromise which tries to avoid being too dense and to spacey. 9033A more spaced version would be: 9034 9035 SPACEY: 9036 if ( 9037 $opt_excl 9038 and ( 9039 exists $ids_excl_uc{$id_uc} 9040 or grep $id_uc =~ /$_/, @ids_excl_uc 9041 ) 9042 ) 9043 9044Some people might prefer the spacey version -- an option could be added. The 9045innermost expression contains a long block '( exists $ids_... ')'. 9046 9047Here is how the logic goes: We will force a break at the 'or' that the 9048innermost expression contains, but we will not break apart its opening an 9049closing containers because (1) it contains no multi-line sub-containers itself, 9050and (2) there is no alignment to be gained by breaking it open like this 9051 9052 and ( 9053 exists $ids_excl_uc{$id_uc} 9054 or grep $id_uc =~ /$_/, @ids_excl_uc 9055 ) 9056 9057(although this looks perfectly ok and might be good for long expressions). The 9058outer 'if' container, though, contains a broken sub-container, so it will be 9059broken open to avoid too much density. Also, since it contains no 'or's, there 9060will be a forced break at its 'and'. 9061 9062=cut 9063 9064 # set some flags telling something about this container.. 9065 my $is_simple_logical_expression = 0; 9066 if ( $item_count_stack[$current_depth] == 0 9067 && $saw_opening_structure 9068 && $tokens_to_go[$i_opening] eq '(' 9069 && $is_logical_container{ $container_type[$current_depth] } 9070 ) 9071 { 9072 9073 # This seems to be a simple logical expression with no existing 9074 # breakpoints. Set a flag to prevent opening it up. 9075 if ( !$has_comma_breakpoints ) { 9076 $is_simple_logical_expression = 1; 9077 } 9078 9079 # This seems to be a simple logical expression with breakpoints 9080 # (broken sublists, for example). Break at all 'or's and '||'s. 9081 else { 9082 set_logical_breakpoints($current_depth); 9083 } 9084 } 9085 9086 if ( 9087 9088 # breaks for code BLOCKS are handled at a higher level 9089 !$block_type 9090 9091 # we do not need to break at the top level of an 'if' 9092 # type expression 9093 && !$is_simple_logical_expression 9094 9095 # otherwise, we require one of these reasons for breaking: 9096 && ( 9097 9098 # - this term has forced line breaks 9099 $has_comma_breakpoints 9100 9101 # - the opening container is separated from this batch 9102 # for some reason (comment, blank line, code block) 9103 # - this is a non-paren container spanning multiple lines 9104 || !$saw_opening_structure 9105 9106 # - this is a long block contained in another breakable 9107 # container 9108 || ( $is_long_term 9109 && $container_environment_to_go[$i_opening] ne 9110 'BLOCK' ) 9111 ) 9112 ) 9113 { 9114 9115 # For -lp option, we should put a breakpoint before the token 9116 # which has been identified as starting this indentation level. 9117 # This is necessary for proper alignment. 9118 if ( $rOpts_line_up_parentheses && $saw_opening_structure ) 9119 { 9120 my $item = $leading_spaces_to_go[ $i_opening + 1 ]; 9121 if ( defined($item) ) { 9122 my $i_start_2 = $item->get_STARTING_INDEX(); 9123 if ( 9124 defined($i_start_2) 9125 9126 # we are breaking after an opening brace, paren, 9127 # so don't break before it too 9128 && $i_start_2 ne $i_opening 9129 ) 9130 { 9131 9132 # Only break for breakpoints at the same indentation 9133 # level as the opening paren 9134 my $test1 = $leading_spaces_to_go[$i_opening]; 9135 my $test2 = $leading_spaces_to_go[$i_start_2]; 9136 if ( $test2 == $test1 ) { 9137 set_forced_breakpoint( $i_start_2 - 1 ); 9138 } 9139 } 9140 } 9141 } 9142 9143 # break after opening structure. 9144 # note: break before closing structure will be automatic 9145 if ( $minimum_depth <= $current_depth ) { 9146 9147 set_forced_breakpoint($i_opening) 9148 unless ( $do_not_break_apart 9149 || is_unbreakable_container($current_depth) ); 9150 9151 # break before opening structure if preeced by another 9152 # closing structure and a comma. This is normally 9153 # done by the previous closing brace, but not 9154 # if it was a one-line block. 9155 if ( $i_opening > 2 ) { 9156 my $i_prev = 9157 ( $types_to_go[ $i_opening - 1 ] eq 'b' ) 9158 ? $i_opening - 2 9159 : $i_opening - 1; 9160 9161 if ( $types_to_go[$i_prev] eq ',' 9162 && $types_to_go[ $i_prev - 1 ] =~ /^[\)\}]$/ ) 9163 { 9164 set_forced_breakpoint($i_prev); 9165 } 9166 9167 # also break before something like ':(' or '?(' 9168 # if appropriate. 9169 elsif ( 9170 $types_to_go[$i_prev] =~ /^([\:\?]|&&|\|\|)$/ ) 9171 { 9172 my $token_prev = $tokens_to_go[$i_prev]; 9173 if ( $want_break_before{$token_prev} ) { 9174 set_forced_breakpoint($i_prev); 9175 } 9176 } 9177 } 9178 } 9179 9180 # break after comma following closing structure 9181 if ( $next_type eq ',' ) { 9182 set_forced_breakpoint( $i + 1 ); 9183 } 9184 9185 # break before an '=' following closing structure 9186 if ( 9187 $next_nonblank_type eq '=' 9188 && ( $breakpoint_stack[$current_depth] != 9189 $forced_breakpoint_count ) 9190 ) 9191 { 9192 set_forced_breakpoint($i); 9193 } 9194 9195 # break at any comma before the opening structure Added 9196 # for -lp, but seems to be good in general. It isn't 9197 # obvious how far back to look; the '5' below seems to 9198 # work well and will catch the comma in something like 9199 # push @list, myfunc( $param, $param, .. 9200 9201 my $icomma = $last_comma_index[$depth]; 9202 if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) { 9203 unless ( $forced_breakpoint_to_go[$icomma] ) { 9204 set_forced_breakpoint($icomma); 9205 } 9206 } 9207 9208 # With the -lp option we will break at an '=' preceding 9209 # an open paren by a significant number of spaces. 9210 # This can help save space across the page. The 9211 # constants in this are somewhat arbitrary. But note 9212 # the patch above, which disables this for a list 9213 # which is more than 1 level deep. 9214 9215=pod 9216 9217 Here is an example of what this is trying to accomplish: 9218 9219 my $result_string = 9220 $candidates->result_as_string( 9221 'Type' => 'All', 9222 'Category' => 'Failed' 9223 ); 9224 9225 Without the break at the '=', the result would be: 9226 9227 my $result_string = $candidates->result_as_string( 9228 'Type' => 'All', 9229 'Category' => 'Failed' 9230 ); 9231 9232 which is not as nice. 9233 9234=cut 9235 9236 if ($rOpts_line_up_parentheses) { 9237 9238 if ( $token eq ')' ) { 9239 my $iequals = $last_equals_index[$depth]; 9240 9241 # We do not need to break at any '=' if there is 9242 # room for the call parameters. 9243 my $recoverable = 0; 9244 my $icomma = $last_comma_index[$current_depth]; 9245 if ( defined($icomma) ) { # should always be defined 9246 9247 # method 1: for comma lists this 9248 # information is available in the 9249 # indentation. 9250 $recoverable = 9251 $leading_spaces_to_go[$icomma] 9252 ->get_RECOVERABLE_SPACES() > 0 9253 9254 # method 2: for comma-arrow lists 9255 # we need to use an alternate method 9256 || $longest_term[$current_depth] > 9257 table_columns_available($icomma); 9258 9259 } 9260 9261 FORMATTER_DEBUG_FLAG_EQUALS && do { 9262 if ( defined($iequals) ) { 9263 print 9264"EQUALS: considering = break iequals=$iequals rec=$recoverable\n"; 9265 } 9266 else { 9267 print 9268 "EQUALS: at = break but '=' undefined\n"; 9269 } 9270 }; 9271 9272 if ( 9273 defined($iequals) 9274 9275 && ( $recoverable > 0 ) 9276 9277 # more than 10 spaces from the '=' to the '(' 9278 && ( 9279 token_sequence_length( $iequals, 9280 $i_opening ) > 10 9281 ) 9282 9283 # and the '=' is more than 8 spaces 9284 # from line start 9285 && ( token_sequence_length( 0, $iequals ) > 8 ) 9286 ) 9287 { 9288 unless ( $forced_breakpoint_to_go[$iequals] ) { 9289 set_forced_breakpoint($iequals); 9290 } 9291 } 9292 } 9293 9294 # Do not allow break on '=' of a structure 9295 # containing this one because the space cannot 9296 # be recovered at the present time (gnu1.t). We 9297 # might be left with a big gap. 9298 if ( $depth >= 1 ) { 9299 $last_equals_index[ $depth - 1 ] = undef; 9300 FORMATTER_DEBUG_FLAG_EQUALS && do { 9301 print 9302"EQUALS: undefining iequals after breaking a container at depth $depth\n"; 9303 }; 9304 } 9305 } # end -lp logic 9306 } # end logic to open up a container 9307 9308 # Handle long container which does not get opened up 9309 elsif ($is_long_term) { 9310 9311 # must set fake breakpoint to alert outer containers that 9312 # they are complex 9313 set_fake_breakpoint(); 9314 9315 # avoid possible -lp problems (see note above) 9316 if ( $depth >= 1 ) { 9317 $last_equals_index[ $depth - 1 ] = undef; 9318 FORMATTER_DEBUG_FLAG_EQUALS && do { 9319 print 9320"EQUALS: undefining iequals after long block for depth $depth\n"; 9321 }; 9322 } 9323 } 9324 } 9325 9326 #------------------------------------------------------------ 9327 # Handle this token 9328 #------------------------------------------------------------ 9329 9330 $current_depth = $depth; 9331 9332 # handle comma-arrow 9333 if ( $type eq '=>' ) { 9334 next if ( $last_nonblank_type eq '=>' ); 9335 $want_comma_break[$depth] = 1; 9336 $index_before_arrow[$depth] = $i_last_nonblank_token; 9337 next; 9338 } 9339 9340 # remember location of any of the assignment operators 9341 if ( $type =~ /=/ && $type !~ /(==|!=|>=|<=|=~|=>)/ ) { 9342 $last_equals_index[$depth] = $i; 9343 FORMATTER_DEBUG_FLAG_EQUALS && do { 9344 print "EQUALS: found equals at i=$i for depth=$depth\n"; 9345 }; 9346 } 9347 9348 # Turn off alignment if we are sure that this is not a list 9349 # environment. To be safe, we will do this if we see certain 9350 # non-list tokens, such as ';', and also the environment is 9351 # not a list. Note that '=' could be in any of the = operators 9352 # (lextest.t). We can't just use the reported environment 9353 # because it can be incorrect in some cases. 9354 9355 if ( $type =~ /(^[\;\<\>\~]$)|[=]/ 9356 && $container_environment_to_go[$i] ne 'LIST' ) 9357 { 9358 $dont_align[$depth] = 1; 9359 $want_comma_break[$depth] = 0; 9360 $index_before_arrow[$depth] = -1; 9361 } 9362 9363 # now just handle any commas 9364 next unless ( $type eq ',' ); 9365 9366 # keep track of longest item between commas 9367 update_longest_term( $i, $depth ); 9368 9369 $last_comma_index[$depth] = $i; 9370 9371 # break here if this comma follows a '=>' 9372 # but not if there is a side comment after the comma 9373 if ( $want_comma_break[$depth] ) { 9374 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' ); 9375 9376 # break before the previous token if it looks safe 9377 # Example of something that we will not try to break before: 9378 # DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt}, 9379 my $ibreak = $index_before_arrow[$depth] - 1; 9380 if ( $ibreak > 0 9381 && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ ) 9382 { 9383 if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- } 9384 if ( $types_to_go[$ibreak] =~ /^[,b\(\{\[]$/ ) { 9385 set_forced_breakpoint($ibreak); 9386 } 9387 } 9388 9389 $want_comma_break[$depth] = 0; 9390 $index_before_arrow[$depth] = -1; 9391 9392 # items after '=>' may be long, so breaking at a preceding 9393 # '=' might leave a gap when -lp is used. 9394 if ( $depth >= 1 ) { 9395 $last_equals_index[ $depth - 1 ] = undef; 9396 FORMATTER_DEBUG_FLAG_EQUALS && do { 9397 print 9398"EQUALS: comma-arrow: undef equals at depth above $depth\n"; 9399 }; 9400 } 9401 9402 # handle list which mixes '=>'s and ','s: 9403 # treat any list items so far as an interrupted list 9404 $interrupted_list[$depth] = 1; 9405 next; 9406 } 9407 9408 # skip past these commas if we are not supposed to format them 9409 next if ( $dont_align[$depth] ); 9410 9411 # break after all commas above starting depth 9412 ## FIXME: re-check this 9413 if ( $depth < $starting_depth ) { 9414 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' ); 9415 next; 9416 } 9417 9418 # add this comma to the list.. 9419 my $item_count = $item_count_stack[$depth]; 9420 if ( $item_count == 0 ) { 9421 9422 # but do not form a list with no opening structure 9423 # for example: 9424 9425 # open INFILE_COPY, ">$input_file_copy" 9426 # or die ("very long message"); 9427 9428 if ( ( $opening_structure_index_stack[$depth] < 0 ) 9429 && $container_environment_to_go[$i] eq 'BLOCK' ) 9430 { 9431 $dont_align[$depth] = 1; 9432 next; 9433 } 9434 9435 $max_length[$depth][0] = 0; 9436 $max_length[$depth][1] = 0; 9437 } 9438 9439 # save max length of list items to calculate page layout 9440 my $i_prev = 9441 ( $item_count > 0 ) 9442 ? $comma_index[$depth][ $item_count - 1 ] 9443 : $opening_structure_index_stack[$depth]; 9444 9445 if ( !defined($i_prev) ) { $i_prev = -1 } 9446 my $length = token_sequence_length( $i_prev + 1, $i ); 9447 9448 if ( $length > $max_length[$depth][ $item_count % 2 ] ) { 9449 $max_length[$depth][ $item_count % 2 ] = $length; 9450 } 9451 9452 $comma_index[$depth][$item_count] = $i; 9453 ++$item_count_stack[$depth]; 9454 if ( $last_nonblank_type =~ /^[iR\]]$/ ) { 9455 $identifier_count_stack[$depth]++; 9456 } 9457 } 9458 9459 #------------------------------------------- 9460 # end of loop over all tokens in this batch 9461 #------------------------------------------- 9462 9463 # set breaks for any unfinished lists .. 9464 for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) { 9465 9466 $interrupted_list[$dd] = 1; 9467 $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth ); 9468 set_comma_breakpoints($dd); 9469 set_logical_breakpoints($dd) if $good_old_breakpoint > 0; 9470 9471 # break open container... 9472 my $i_opening = $opening_structure_index_stack[$dd]; 9473 set_forced_breakpoint($i_opening) 9474 unless ( 9475 is_unbreakable_container($dd) 9476 9477 # Avoid a break which would place an isolated ' or " 9478 # on a line 9479 || ( $type eq 'Q' 9480 && $i_opening >= $max_index_to_go - 2 9481 && $token =~ /^['"]$/ ) 9482 ); 9483 } 9484 9485 # Return a flag indicating if the input file had some breakpoints, 9486 # and they were all good. If not all of the breakpoints were good, 9487 # we take that as a bad omen and do not set the flag. This flag 9488 # will be used to force a break in a line shorter than the allowed 9489 # line length. 9490 my $saw_good_breakpoint = 9491 ( $old_breakpoint_count > 0 9492 && $old_breakpoint_count == $good_old_breakpoint ); 9493 9494 return $saw_good_breakpoint; 9495 } 9496} # end closure scan_list 9497 9498sub find_token_starting_list { 9499 9500 # When testing to see if a block will fit on one line, some 9501 # previous token(s) may also need to be on the line; particularly 9502 # if this is a sub call. So we will look back at least one 9503 # token. NOTE: This isn't perfect, but not critical, because 9504 # if we mis-identify a block, it will be wrapped and therefore 9505 # fixed the next time it is formatted. 9506 my $i_opening_paren = shift; 9507 my $i_opening_minus = $i_opening_paren; 9508 my $im1 = $i_opening_paren - 1; 9509 my $im2 = $i_opening_paren - 2; 9510 my $im3 = $i_opening_paren - 3; 9511 my $typem1 = $types_to_go[$im1]; 9512 my $typem2 = $im2 >= 0 ? $types_to_go[$im2] : 'b'; 9513 if ( $typem1 eq ',' || ( $typem1 eq 'b' && $typem2 eq ',' ) ) { 9514 $i_opening_minus = $i_opening_paren; 9515 } 9516 elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) { 9517 $i_opening_minus = $im1 if $im1 >= 0; 9518 9519 # walk back to improve length estimate 9520 for ( my $j = $im1 ; $j >= 0 ; $j-- ) { 9521 last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ ); 9522 $i_opening_minus = $j; 9523 } 9524 if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ } 9525 } 9526 elsif ( $typem1 eq 'k' ) { $i_opening_minus = $im1 } 9527 elsif ( $typem1 eq 'b' && $im2 >= 0 && $types_to_go[$im2] eq 'k' ) { 9528 $i_opening_minus = $im2; 9529 } 9530 return $i_opening_minus; 9531} 9532 9533sub set_comma_breakpoints_do { 9534 9535 # Given a list with some commas, set breakpoints at some 9536 # of the commas to allow nice alignment if possible. This 9537 # list is an example: 9538 my ( 9539 $depth, $i_opening_paren, $i_closing_paren, 9540 $item_count, $identifier_count, $rcomma_index, 9541 $rmax_length, $next_nonblank_type, $list_type, 9542 $interrupted, $maximum_nesting_depth, $rdo_not_break_apart, 9543 ) 9544 = @_; 9545 9546 # nothing to do if no commas seen 9547 return if ( $item_count < 1 ); 9548 9549 #--------------------------------------------------------------- 9550 # Compound List Rule 1: 9551 # Break at every comma for a list containing a broken sublist. 9552 # This has higher priority than the Interrupted List Rule. 9553 #--------------------------------------------------------------- 9554 if ( $has_broken_sublist[$depth] ) { 9555 for ( my $j = 0 ; $j < $item_count ; $j++ ) { 9556 my $i = $$rcomma_index[$j]; 9557 set_forced_breakpoint($i); 9558 } 9559 return; 9560 } 9561 9562 my $i_first_comma = $$rcomma_index[0]; 9563 my $i_last_comma = $$rcomma_index[ $item_count - 1 ]; 9564 9565 #my ( $a, $b, $c ) = caller(); 9566 #print "LISTX: in set_list $a $c interupt=$interrupted count=$item_count 9567 #i_first = $i_first_comma i_last=$i_last_comma max=$max_index_to_go\n"; 9568 #print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n"; 9569 9570 #--------------------------------------------------------------- 9571 # Interrupted List Rule: 9572 # A list is is forced to use old breakpoints if it was interrupted 9573 # by side comments or blank lines. 9574 #--------------------------------------------------------------- 9575 if ( $interrupted || $i_opening_paren < 0 ) { 9576 write_logfile_entry("list broken: using old breakpoints\n") 9577 unless ( $item_count < 6 ); 9578 copy_old_breakpoints( $i_first_comma, $i_last_comma ); 9579 return; 9580 } 9581 9582 my $opening_token = $tokens_to_go[$i_opening_paren]; 9583 my $opening_environment = $container_environment_to_go[$i_opening_paren]; 9584 9585 #--------------------------------------------------------------- 9586 # Looks like a list of items. We have to look at it and size it up. 9587 #--------------------------------------------------------------- 9588 9589 return if ( $i_first_comma < 1 ); 9590 if ( $i_last_comma >= $max_index_to_go ) { 9591 $i_last_comma = $$rcomma_index[ --$item_count - 1 ]; 9592 return if ( $item_count <= 2 ); # not much of a list 9593 } 9594 9595 #------------------------------------------------------------------- 9596 # Return if this will fit on one line 9597 #------------------------------------------------------------------- 9598 9599 my $i_opening_minus = find_token_starting_list($i_opening_paren); 9600 return unless excess_line_length( $i_opening_minus, $i_closing_paren ) > 0; 9601 9602 #------------------------------------------------------------------- 9603 # Now we know that this block spans multiple lines; we have to set 9604 # at least one breakpoint -- real or fake -- as a signal to break 9605 # open any outer containers. 9606 #------------------------------------------------------------------- 9607 set_fake_breakpoint(); 9608 9609 # now we have to make a distinction between the comma count and item count, 9610 # because the item count will be one greater than the comma count if 9611 # the last item is not terminated with a comma 9612 my $comma_count = $item_count; 9613 my $i_b = 9614 ( $types_to_go[ $i_last_comma + 1 ] eq 'b' ) 9615 ? $i_last_comma + 1 9616 : $i_last_comma; 9617 my $i_e = 9618 ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' ) 9619 ? $i_closing_paren - 2 9620 : $i_closing_paren - 1; 9621 my $i_effective_last_comma = $i_last_comma; 9622 9623 my $last_item_length = token_sequence_length( $i_b + 1, $i_e ); 9624 if ( $last_item_length > 0 ) { 9625 9626 # add 2 to length because other lengths include a comma and a blank 9627 $last_item_length += 2; 9628 my $i_odd = $item_count % 2; 9629 if ( $last_item_length > $$rmax_length[$i_odd] ) { 9630 $$rmax_length[$i_odd] = $last_item_length; 9631 } 9632 $item_count++; 9633 $i_effective_last_comma = $i_e + 1; 9634 } 9635 9636 # be sure we do not extend beyond the current list length 9637 if ( $i_effective_last_comma >= $max_index_to_go ) { 9638 $i_effective_last_comma = $max_index_to_go - 1; 9639 } 9640 9641 # Field width parameters 9642 my $pair_width = ( $$rmax_length[0] + $$rmax_length[1] ); 9643 my $max_width = 9644 ( $$rmax_length[0] > $$rmax_length[1] ) 9645 ? $$rmax_length[0] 9646 : $$rmax_length[1]; 9647 9648 # Number of free columns across the page width for laying out tables 9649 my $columns = table_columns_available($i_first_comma); 9650 9651 # Specify if the list must have an even number of fields or not. It 9652 # is generally safest in perl to assume an even number, because the 9653 # list items might be a hash list. But if we can be sure that it is 9654 # not a hash, then we can allow an odd number for more flexibility. 9655 my $odd_or_even = 2; # 1 = odd field count ok, 2 = want even count 9656 if ( $list_type =~ /^(my|local|our)$/ || $next_nonblank_type eq '=' ) { 9657 $odd_or_even = 1; 9658 } 9659 if ( $identifier_count == $comma_count ) { $odd_or_even = 1 } # seems safe 9660 9661 # Number of fields which fit this space 9662 my $number_of_fields = 9663 maximum_number_of_fields( $columns, $odd_or_even, $max_width, 9664 $pair_width ); 9665 9666 # ---------------------------------------------------------------------- 9667 # If we are crowded and the -lp option is being used, try to 9668 # undo some indentation 9669 # ---------------------------------------------------------------------- 9670 9671 if ( $number_of_fields < 2 ) { 9672 my $available_spaces = get_AVAILABLE_SPACES_to_go($i_first_comma); 9673 if ( $available_spaces > 0 ) { 9674 9675 # asking for an extra space for safety 9676 my $spaces_wanted = 1 + $pair_width - $columns; # for 2 fields 9677 if ( $spaces_wanted > $available_spaces 9678 || $rOpts->{'maximum-fields-per-table'} == 1 ) 9679 { 9680 $spaces_wanted = 1 + $max_width - $columns; # for 1 field 9681 } 9682 9683 # ask for space if needed 9684 if ( $spaces_wanted > 0 ) { 9685 reduce_lp_indentation( $i_first_comma, $spaces_wanted ); 9686 9687 # redo the math 9688 $columns = table_columns_available($i_first_comma); 9689 $number_of_fields = 9690 maximum_number_of_fields( $columns, $odd_or_even, $max_width, 9691 $pair_width ); 9692 } 9693 } 9694 } 9695 9696 # try for one column if two won't work 9697 if ( $number_of_fields <= 0 ) { 9698 $number_of_fields = int( $columns / $max_width ); 9699 } 9700 9701 #print "LISTX: fields=$number_of_fields columns=$columns max_width==$max_width w0=${$rmax_length}[0] w1=${$rmax_length}[1] width=$pair_width\n"; 9702 9703 # The user can place an upper bound on the number of fields, 9704 # which can be useful for doing maintenance on tables 9705 if ( $number_of_fields > $rOpts->{'maximum-fields-per-table'} ) { 9706 $number_of_fields = int $rOpts->{'maximum-fields-per-table'}; 9707 } 9708 9709 # How many columns (characters) and lines would this container take 9710 # if no additional whitespace were added? 9711 my $packed_columns = 9712 token_sequence_length( $i_opening_paren + 1, 9713 $i_effective_last_comma + 1 ); 9714 if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero 9715 my $packed_lines = 1 + int( $packed_columns / $columns ); 9716 9717 # are we an item contained in an outer list? 9718 my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/; 9719 9720 if ( $number_of_fields <= 0 ) { 9721 9722=pod 9723 9724 #--------------------------------------------------------------- 9725 # We're in trouble. We can't find a single field width that works. 9726 # There is no simple answer here; we may have a single long list 9727 # item, or many. 9728 #--------------------------------------------------------------- 9729 9730 In many cases, it may be best to not force a break if there is just one 9731 comma, because the standard continuation break logic will do a better 9732 job without it. 9733 9734 In the common case that all but one of the terms can fit 9735 on a single line, it may look better not to break open the 9736 containing parens. Consider, for example 9737 9738 $color = 9739 join ( '/', 9740 sort { $color_value{$::a} <=> $color_value{$::b}; } 9741 keys %colors ); 9742 9743 which will look like this with the container broken: 9744 9745 $color = join ( 9746 '/', 9747 sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors 9748 ); 9749 9750 Here is an example of this rule for a long last term: 9751 9752 log_message( 0, 256, 128, 9753 "Number of routes in adj-RIB-in to be considered: $peercount" ); 9754 9755 And here is an example with a long first term: 9756 9757 $s = sprintf( 9758"%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)", 9759 $r, $pu, $ps, $cu, $cs, $tt 9760 ) 9761 if $style eq 'all'; 9762 9763=cut 9764 9765 my $i_last_comma = $$rcomma_index[ $comma_count - 1 ]; 9766 my $long_last_term = excess_line_length( 0, $i_last_comma ) <= 0; 9767 my $long_first_term = 9768 excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0; 9769 9770 # break at every comma ... 9771 if ( 9772 9773 # if user requested 9774 $rOpts->{'maximum-fields-per-table'} == 1 9775 9776 # or if this is a sublist of a larger list 9777 || $in_hierarchical_list 9778 9779 # or if multiple commas and we dont have a long first or last term 9780 || ( $comma_count > 1 && !( $long_last_term || $long_first_term ) ) 9781 ) 9782 { 9783 for ( my $j = 0 ; $j < $comma_count ; $j++ ) { 9784 my $i = $$rcomma_index[$j]; 9785 set_forced_breakpoint($i); 9786 } 9787 } 9788 elsif ($long_last_term) { 9789 9790 set_forced_breakpoint($i_last_comma); 9791 $$rdo_not_break_apart = 1; 9792 } 9793 elsif ($long_first_term) { 9794 9795 set_forced_breakpoint($i_first_comma); 9796 $$rdo_not_break_apart = 1; 9797 } 9798 else { 9799 9800 # let breaks be defined by default bond strength logic 9801 } 9802 return; 9803 } 9804 9805 # -------------------------------------------------------- 9806 # We have a tentative field count that seems to work. 9807 # How many lines will this require? 9808 # -------------------------------------------------------- 9809 my $formatted_lines = $item_count / ($number_of_fields); 9810 if ( $formatted_lines != int $formatted_lines ) { 9811 $formatted_lines = 1 + int $formatted_lines; 9812 } 9813 9814 # So far we've been trying to fill out to the right margin. But 9815 # compact tables are easier to read, so let's see if we can use fewer 9816 # fields without increasing the number of lines. 9817 $number_of_fields = 9818 compactify_table( $item_count, $number_of_fields, $formatted_lines, 9819 $odd_or_even ); 9820 9821 # How many spaces across the page will we fill? 9822 my $columns_per_line = 9823 ( int $number_of_fields / 2 ) * $pair_width + ( $number_of_fields % 2 ) * 9824 $max_width; 9825 9826 my $formatted_columns; 9827 9828 if ( $number_of_fields > 1 ) { 9829 $formatted_columns = 9830 ( $pair_width * ( int( $item_count / 2 ) ) + ( $item_count % 2 ) * 9831 $max_width ); 9832 } 9833 else { 9834 $formatted_columns = $max_width * $item_count; 9835 } 9836 9837 my $unused_columns = $formatted_columns - $packed_columns; 9838 9839 # set some empirical parameters to help decide if we should try to 9840 # align; high sparsity does not look good, especially with few lines 9841 my $sparsity = ($unused_columns) / ($formatted_columns); 9842 my $max_allowed_sparsity = 9843 ( $item_count < 3 ) ? 0.1 9844 : ( $packed_lines == 1 ) ? 0.15 9845 : ( $packed_lines == 2 ) ? 0.4 9846 : 0.7; 9847 9848 # Shortcut method 1: for 2 lines, just one comma: 9849 if ( 9850 $packed_lines <= 2 # probably can fit in 2 lines 9851 && $item_count == 2 # two items, one comma 9852 && $rOpts_line_up_parentheses # -lp 9853 && $opening_environment eq 'BLOCK' # not a sub-container 9854 && $opening_token eq '(' # is paren list 9855 ) 9856 { 9857 my $i_break = $$rcomma_index[0]; 9858 set_forced_breakpoint($i_break); 9859 $$rdo_not_break_apart = 1; 9860 return; 9861 } 9862 9863 # Shortcut method 2: for relatively simple 2 liner function calls 9864 # which usually look better without aligning commas and opening 9865 # up the container 9866 if ( 9867 $packed_lines <= 2 # probably can fit in 2 lines 9868 && ( $identifier_count > 0.5 * $item_count ) # isn't all quotes 9869 && $sparsity > 0.15 # would be fairly spaced gaps if aligned 9870 && $item_count < 9 # doesn't have too many items 9871 && $opening_environment eq 'BLOCK' # not a sub-container 9872 && $opening_token eq '(' # is paren list 9873 && $maximum_nesting_depth <= $depth # has no sublist 9874 ) 9875 { 9876 9877 # let breaks be defined by bond strength logic 9878 $$rdo_not_break_apart = 1; 9879 return; 9880 } 9881 9882 # debug stuff 9883 FORMATTER_DEBUG_FLAG_SPARSE && do { 9884 9885 print 9886"SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n"; 9887 9888 }; 9889 9890 #--------------------------------------------------------------- 9891 # Compound List Rule 2: 9892 # If this list is too long for one line, and it is an item of a 9893 # larger list, then we must format it, regardless of sparsity 9894 # (ian.t). One reason that we have to do this is to trigger 9895 # Compound List Rule 1, above, which causes breaks at all commas of 9896 # all outer lists. In this way, the structure will be properly 9897 # displayed. 9898 #--------------------------------------------------------------- 9899 9900 # Decide if this list is too long for one line unless broken 9901 my $total_columns = table_columns_available($i_opening_paren); 9902 my $too_long = $packed_columns > $total_columns; 9903 9904 # For a paren list, include the length of the token just before the 9905 # '(' because this is likely a sub call, and we would have to 9906 # include the sub name on the same line as the list. This is still 9907 # imprecise, but not too bad. (steve.t) 9908 if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) { 9909 9910 $too_long = 9911 excess_line_length( $i_opening_minus, $i_effective_last_comma + 1 ) > 9912 0; 9913 } 9914 9915 # FIXME: For an item after a '=>', try to include the length of the thing 9916 # before the '=>'. This is crude and should be improved by actually 9917 # looking back token by token. 9918 if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) { 9919 my $i_opening_minus = $i_opening_paren - 4; 9920 if ( $i_opening_minus >= 0 ) { 9921 $too_long = 9922 excess_line_length( $i_opening_minus, 9923 $i_effective_last_comma + 1 ) > 0; 9924 } 9925 } 9926 9927 # EXPERIMENTAL: for -lp, assume that we can get back any needed spaces 9928 # by outdenting ... NOT 9929 # if ($too_long && $opening_token eq '(') { 9930 # my $available_spaces = get_AVAILABLE_SPACES_to_go($i_opening_paren); 9931 # if ( $available_spaces > 0 ) { 9932 # $too_long = ( $packed_columns > ( $columns + $available_spaces ) ); 9933 # } 9934 # } 9935 9936 # Always break lists contained in '[' and '{' if too long for 1 line, 9937 # and always break lists which are too long and part of a more complex 9938 # structure. 9939 9940 my $must_format = 9941 ( $too_long && ( $in_hierarchical_list || $opening_token ne '(' ) ); 9942 9943 #print "LISTX: next=$next_nonblank_type avail cols=$columns packed=$packed_columns must format = $must_format too-long=$too_long opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n"; 9944 9945 #--------------------------------------------------------------- 9946 # The main decision: 9947 # Now decide if we will align the data into aligned columns. Do not 9948 # attempt to align columns if this is a tiny table or it would be 9949 # too spaced. It seems that the more packed lines we have, the 9950 # sparser the list that can be allowed and still look ok. 9951 #--------------------------------------------------------------- 9952 9953 if ( 9954 !$must_format 9955 && ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines ) 9956 || ( $formatted_lines < 2 ) 9957 || ( $unused_columns > $max_allowed_sparsity * $formatted_columns ) 9958 ) 9959 ) 9960 { 9961 9962 #--------------------------------------------------------------- 9963 # too sparse: would look ugly if aligned in a table; 9964 #--------------------------------------------------------------- 9965 9966 # use old breakpoints if this is more than 2 lines 9967 # (otherwise, the list breakup will be at the mercy of the 9968 # standard continuation line break algorithm). 9969 if ( $packed_lines > 2 ) { 9970 write_logfile_entry("List sparse: using old breakpoints\n"); 9971 copy_old_breakpoints( $i_first_comma, $i_last_comma ); 9972 } 9973 9974 # let the continuation logic handle it if 2 lines 9975 else { 9976 $$rdo_not_break_apart = 1; 9977 } 9978 return; 9979 } 9980 9981 #--------------------------------------------------------------- 9982 # looks ok, so go ahead and format the table 9983 #--------------------------------------------------------------- 9984 write_logfile_entry( 9985 "List: auto formatting with $number_of_fields fields/row\n"); 9986 my $j; 9987 9988 for ( $j = $number_of_fields - 1 ; 9989 $j < $comma_count ; $j += $number_of_fields ) 9990 { 9991 my $i = $$rcomma_index[$j]; 9992 set_forced_breakpoint($i); 9993 } 9994 9995 # Save list diagnostics during development 9996 FORMATTER_DEBUG_FLAG_LIST && do { 9997 my $pkl = sprintf( "%.1f", $packed_lines ); 9998 my $fml = sprintf( "%.1f", $formatted_lines ); 9999 write_diagnostics(<<"EOM"); 10000List:items=$item_count commas=$comma_count ids=$identifier_count cols=$columns fmt_lines=$fml pkd_lines=$pkl brks=$forced_breakpoint_count 10001 fmt_cols=$formatted_columns pk_cols=$packed_columns unusd=$unused_columns 10002EOM 10003 }; 10004 10005 return; 10006} 10007 10008sub table_columns_available { 10009 my $i_first_comma = shift; 10010 my $columns = 10011 $rOpts_maximum_line_length - leading_spaces_to_go($i_first_comma); 10012 10013 # Patch: the vertical formatter does not line up lines whose lengths 10014 # exactly equal the available line length because of allowances 10015 # that must be made for side comments. Therefore, the number of 10016 # available columns is reduced by 1 character. 10017 $columns -= 1; 10018 return $columns; 10019} 10020 10021sub maximum_number_of_fields { 10022 10023 my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_; 10024 my $max_pairs = int( $columns / $pair_width ); 10025 my $number_of_fields = $max_pairs * 2; 10026 if ( $odd_or_even == 1 10027 && $max_pairs * $pair_width + $max_width <= $columns ) 10028 { 10029 $number_of_fields++; 10030 } 10031 return $number_of_fields; 10032} 10033 10034sub compactify_table { 10035 10036 # given a table with a certain number of fields and a certain number 10037 # of lines, see if reducing the number of fields will make it look 10038 # better. 10039 my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_; 10040 if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) { 10041 my $min_fields; 10042 10043 for ( $min_fields = $number_of_fields ; 10044 $min_fields >= $odd_or_even 10045 && $min_fields * $formatted_lines >= $item_count ; 10046 $min_fields -= $odd_or_even ) 10047 { 10048 $number_of_fields = $min_fields; 10049 } 10050 } 10051 return $number_of_fields; 10052} 10053 10054sub copy_old_breakpoints { 10055 my ( $i_first_comma, $i_last_comma ) = @_; 10056 for my $i ( $i_first_comma .. $i_last_comma ) { 10057 if ( $old_breakpoint_to_go[$i] ) { 10058 set_forced_breakpoint($i); 10059 } 10060 } 10061} 10062 10063sub set_nobreaks { 10064 my ( $i, $j ) = @_; 10065 if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) { 10066 10067 FORMATTER_DEBUG_FLAG_NOBREAK && do { 10068 my ( $a, $b, $c ) = caller(); 10069 print( 10070"NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n" 10071 ); 10072 }; 10073 10074 my $k; 10075 for ( $k = $i ; $k <= $j ; $k++ ) { 10076 $nobreak_to_go[$k] = 1; 10077 } 10078 } 10079 10080 # shouldn't happen; non-critical error 10081 else { 10082 FORMATTER_DEBUG_FLAG_NOBREAK && do { 10083 my ( $a, $b, $c ) = caller(); 10084 print( 10085"NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n" 10086 ); 10087 }; 10088 } 10089} 10090 10091sub set_fake_breakpoint { 10092 10093 # Just bump up the breakpoint count as a signal that there are breaks. 10094 # This is useful if we have breaks but may want to postpone deciding where 10095 # to make them. 10096 $forced_breakpoint_count++; 10097} 10098 10099sub set_forced_breakpoint { 10100 my $i = shift; 10101 10102 # when called with certain tokens, use bond strengths to decide 10103 # if we break before or after it 10104 my $token = $tokens_to_go[$i]; 10105 if ( $token =~ /^([\,\:\?]|&&|\|\|)$/ ) { 10106 if ( $want_break_before{$token} && $i >= 0 ) { $i-- } 10107 } 10108 10109 # breaks are forced before 'or' and 'and' for now: 10110 if ( $token eq 'and' || $token eq 'or' ) { $i-- } 10111 10112 if ( $i >= 0 && $i <= $max_index_to_go ) { 10113 my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1; 10114 10115 FORMATTER_DEBUG_FLAG_FORCE && do { 10116 my ( $a, $b, $c ) = caller(); 10117 print 10118"FORCE forced_breakpoint $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n"; 10119 }; 10120 10121 # Note: I thought it would be best not to set these if the nobreak 10122 # flag is set (since it has priority), but things really looked 10123 # better without doing this check. The reason is that the really 10124 # complex lines which would trigger this should really be split 10125 # up. ( break.t ) 10126 # if ( $i_nonblank >= 0 && !$nobreak_to_go[$i_nonblank]) { 10127 # So just do this: 10128 if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 ) { 10129 $forced_breakpoint_to_go[$i_nonblank] = 1; 10130 10131 if ( $i_nonblank > $index_max_forced_break ) { 10132 $index_max_forced_break = $i_nonblank; 10133 } 10134 $forced_breakpoint_count++; 10135 $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] = 10136 $i_nonblank; 10137 10138 # if we break at an opening container..break at the closing 10139 if ( $tokens_to_go[$i_nonblank] =~ /^[\{\[\(\?]$/ ) { 10140 set_closing_breakpoint($i_nonblank); 10141 } 10142 } 10143 } 10144} 10145 10146sub clear_breakpoint_undo_stack { 10147 $forced_breakpoint_undo_count = 0; 10148} 10149 10150sub undo_forced_breakpoint_stack { 10151 10152 my $i_start = shift; 10153 if ( $i_start < 0 ) { 10154 $i_start = 0; 10155 my ( $a, $b, $c ) = caller(); 10156 warning( 10157"Program Bug: undo_forced_breakpoint_stack from $a $c has i=$i_start " 10158 ); 10159 } 10160 10161 while ( $forced_breakpoint_undo_count > $i_start ) { 10162 my $i = 10163 $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ]; 10164 if ( $i >= 0 && $i <= $max_index_to_go ) { 10165 $forced_breakpoint_to_go[$i] = 0; 10166 $forced_breakpoint_count--; 10167 10168 FORMATTER_DEBUG_FLAG_UNDOBP && do { 10169 my ( $a, $b, $c ) = caller(); 10170 print( 10171"UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n" 10172 ); 10173 }; 10174 } 10175 10176 # shouldn't happen, but not a critical error 10177 else { 10178 FORMATTER_DEBUG_FLAG_UNDOBP && do { 10179 my ( $a, $b, $c ) = caller(); 10180 print( 10181"Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go" 10182 ); 10183 }; 10184 } 10185 } 10186} 10187 10188sub recombine_breakpoints { 10189 10190 # sub set_continuation_breaks is very liberal in setting line breaks 10191 # for long lines, always setting breaks at good breakpoints, even 10192 # when that creates small lines. Occasionally small line fragments 10193 # are produced which would look better if they were combined. 10194 # That's the task of this routine, recombine_breakpoints. 10195 my ( $ri_first, $ri_last ) = @_; 10196 my $more_to_do = 1; 10197 10198 # Keep looping until there are no more possible recombinations 10199 my $nmax_last = @$ri_last; 10200 while ($more_to_do) { 10201 my $n_best = 0; 10202 my $bs_best; 10203 my $n; 10204 my $nmax = @$ri_last - 1; 10205 10206 # safety check.. 10207 unless ( $nmax < $nmax_last ) { 10208 10209 # shouldn't happen because splice below decreases nmax on each pass: 10210 # but i get paranoid sometimes 10211 die "Program bug-infinite loop in recombine breakpoints\n"; 10212 } 10213 $nmax_last = $nmax; 10214 $more_to_do = 0; 10215 10216 # loop over all remaining lines... 10217 for $n ( 1 .. $nmax ) { 10218 10219 #---------------------------------------------------------- 10220 # Indexes of the endpoints of the two lines are: 10221 # 10222 # ---left---- | ---right--- 10223 # $if $imid | $imidr $il 10224 # 10225 # We want to decide if we should join tokens $imid to $imidr 10226 #---------------------------------------------------------- 10227 my $if = $$ri_first[ $n - 1 ]; 10228 my $il = $$ri_last[$n]; 10229 my $imid = $$ri_last[ $n - 1 ]; 10230 my $imidr = $$ri_first[$n]; 10231 10232 #print "RECOMBINE: n=$n imid=$imid if=$if type=$types_to_go[$if] =$tokens_to_go[$if] next_type=$types_to_go[$imidr] next_tok=$tokens_to_go[$imidr]\n"; 10233 10234 #---------------------------------------------------------- 10235 # Start of special recombination rules 10236 # These are ad-hoc rules which have been found to work ok. 10237 # Skip to next pair to avoid re-combination. 10238 #---------------------------------------------------------- 10239 10240 # a terminal '{' should stay where it is 10241 next if ( $n == $nmax && $types_to_go[$imidr] eq '{' ); 10242 10243 #---------------------------------------------------------- 10244 # examine token at $imid (right end of first line of pair) 10245 #---------------------------------------------------------- 10246 10247 # an isolated '}' may join with a ';' terminated segment 10248 if ( $types_to_go[$imid] eq '}' ) { 10249 next unless ( 10250 10251 # join } and ; 10252 ( ( $if == $imid ) && ( $types_to_go[$il] eq ';' ) ) 10253 10254 # handle '.' below 10255 || ( $types_to_go[$imidr] eq '.' ) 10256 ); 10257 } 10258 10259 # for lines ending in a comma... 10260 elsif ( $types_to_go[$imid] eq ',' ) { 10261 10262 # an isolated '},' may join with an identifier + ';' 10263 # this is useful for the class of a 'bless' statement (bless.t) 10264 if ( $types_to_go[$if] eq '}' && $types_to_go[$imidr] eq 'i' ) { 10265 next 10266 unless ( ( $if == ( $imid - 1 ) ) 10267 && ( $il == ( $imidr + 1 ) ) 10268 && ( $types_to_go[$il] eq ';' ) ); 10269 10270 # override breakpoint 10271 $forced_breakpoint_to_go[$imid] = 0; 10272 } 10273 10274 # but otherwise, do not recombine unless this will leave 10275 # just 1 more line 10276 else { 10277 next unless ( $n + 1 >= $nmax ); 10278 } 10279 } 10280 10281 # opening paren.. 10282 elsif ( $types_to_go[$imid] eq '(' ) { 10283 10284 # No longer doing this 10285 } 10286 10287 elsif ( $types_to_go[$imid] eq ')' ) { 10288 10289 # No longer doing this 10290 } 10291 10292 # keep a terminal colon 10293 elsif ( $types_to_go[$imid] eq ':' ) { 10294 next; 10295 } 10296 10297 # keep a terminal for-semicolon 10298 elsif ( $types_to_go[$imid] eq 'f' ) { 10299 next; 10300 } 10301 10302 # if '=' at end of line ... 10303 elsif ( $types_to_go[$imid] eq '=' ) { 10304 my $is_math = ( 10305 ( $types_to_go[$il] =~ /^[+-\/\*\)]$/ ) 10306 10307 # note no '$' in pattern because -> can start long identifier 10308 && !grep { $_ =~ /^(->|=>|[\,])/ } 10309 @types_to_go[ $imidr .. $il ] 10310 ); 10311 10312 # retain the break after the '=' unless ... 10313 next unless ( 10314 10315 # '=' is followed by a number and looks like math 10316 ( $types_to_go[$imidr] eq 'n' && $is_math ) 10317 10318 # or followed by a scalar and looks like math 10319 || ( ( $types_to_go[$imidr] eq 'i' ) 10320 && ( $tokens_to_go[$imidr] =~ /^\$/ ) 10321 && $is_math ) 10322 10323 # or followed by a single "short" token ('12' is arbitrary) 10324 || ( $il == $imidr 10325 && token_sequence_length( $imidr, $imidr ) < 12 ) 10326 10327 ); 10328 } 10329 10330 # for keywords.. 10331 elsif ( $types_to_go[$imid] eq 'k' ) { 10332 10333 # make major control keywords stand out 10334 # (recombine.t) 10335 next if ( $tokens_to_go[$imid] =~ /^(last|next|redo|return)$/ ); 10336 } 10337 10338 #---------------------------------------------------------- 10339 # examine token at $imidr (left end of second line of pair) 10340 #---------------------------------------------------------- 10341 10342 # do not recombine lines with leading &&, ||, or : 10343 if ( $types_to_go[$imidr] =~ /^(|:|\&\&|\|\|)$/ ) { 10344 next; 10345 } 10346 10347 # Identify and recombine a broken ?/: chain 10348 elsif ( $types_to_go[$imidr] eq '?' ) { 10349 10350 # indexes of line first tokens -- 10351 # mm - line before previous line 10352 # f - previous line 10353 # <-- this line 10354 # ff - next line 10355 # fff - line after next 10356 my $iff = $n < $nmax ? $$ri_first[ $n + 1 ] : -1; 10357 my $ifff = $n + 2 <= $nmax ? $$ri_first[ $n + 2 ] : -1; 10358 my $imm = $n > 1 ? $$ri_first[ $n - 2 ] : -1; 10359 my $seqno = $type_sequence_to_go[$imidr]; 10360 10361 my $f_ok = 10362 ( $tokens_to_go[$if] eq ':' 10363 && $type_sequence_to_go[$if] == 10364 $seqno - TYPE_SEQUENCE_INCREMENT ); 10365 my $mm_ok = 10366 ( $imm >= 0 10367 && $tokens_to_go[$imm] eq ':' 10368 && $type_sequence_to_go[$imm] == 10369 $seqno - 2 * TYPE_SEQUENCE_INCREMENT ); 10370 10371 my $ff_ok = 10372 ( $iff > 0 10373 && $tokens_to_go[$iff] eq ':' 10374 && $type_sequence_to_go[$iff] == $seqno ); 10375 my $fff_ok = 10376 ( $ifff > 0 10377 && $tokens_to_go[$ifff] eq ':' 10378 && $type_sequence_to_go[$ifff] == 10379 $seqno + TYPE_SEQUENCE_INCREMENT ); 10380 10381 # we require that this '?' be part of a correct sequence 10382 # of 3 in a row or else no recombination is done. 10383 next unless ( ( $ff_ok || $mm_ok ) && ( $f_ok || $fff_ok ) ); 10384 $forced_breakpoint_to_go[$imid] = 0; 10385 } 10386 10387 # do not recombine lines with leading '.' 10388 elsif ( $types_to_go[$imidr] =~ /^(\.)$/ ) { 10389 my $i_next_nonblank = $imidr + 1; 10390 if ( $types_to_go[$i_next_nonblank] eq 'b' ) { 10391 $i_next_nonblank++; 10392 } 10393 10394=pod 10395 ... unless there is just one and we can reduce this to 10396 two lines if we do. For example, this : 10397 10398 $bodyA .= 10399 '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;' 10400 10401 looks better than this: 10402 $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;' 10403 . '$args .= $pat;' 10404 10405 10406 ... or this would strand a short token on the last line, like this 10407 . "some long qoute" 10408 . "\n"; 10409 10410=cut 10411 10412 next unless ( 10413 10414 ( 10415 $n == 2 10416 && $n == $nmax 10417 && $types_to_go[$if] ne $types_to_go[$imidr] 10418 ) 10419 10420 || ( 10421 ( 10422 ( 10423 $n == $nmax 10424 && token_sequence_length( $i_next_nonblank, 10425 $il ) < 10426 $rOpts->{'short-concatenation-item-length'} + 1 10427 ) || $types_to_go[$i_next_nonblank] eq 'Q' 10428 ) 10429 && $i_next_nonblank <= $il 10430 && length( $tokens_to_go[$i_next_nonblank] ) < 10431 $rOpts->{'short-concatenation-item-length'} 10432 ) 10433 ); 10434 } 10435 10436 # handle leading keyword.. 10437 elsif ( $types_to_go[$imidr] eq 'k' ) { 10438 10439 # handle leading "and" and "or" 10440 if ( $tokens_to_go[$imidr] =~ /^(and|or)$/ ) { 10441 10442 # Decide if we will combine a single terminal 'and' and 10443 # 'or' after an 'if' or 'unless'. We should consider the 10444 # possible vertical alignment, and visual clutter. 10445 10446=pod 10447 10448 This looks best with the 'and' on the same line as the 'if': 10449 10450 $a = 1 10451 if $seconds and $nu < 2; 10452 10453 But this looks better as shown: 10454 10455 $a = 1 10456 if !$this->{Parents}{$_} 10457 or $this->{Parents}{$_} eq $_; 10458 10459 Eventually, it would be nice to look for similarities (such as 'this' or 10460 'Parents'), but for now I'm using a simple rule that says that the 10461 resulting line length must not be more than half the maximum line length 10462 (making it 80/2 = 40 characters by default). 10463 10464=cut 10465 10466 next unless ( 10467 $n == $nmax # if this is the last line 10468 && $types_to_go[$il] eq ';' # ending in ';' 10469 && $types_to_go[$if] eq 'k' # after an 'if' or 'unless' 10470 && $tokens_to_go[$if] =~ /^(if|unless)$/ 10471 10472 # and if this doesn't make a long last line 10473 && total_line_length( $if, $il ) <= 10474 $rOpts_maximum_line_length / 2 10475 ); 10476 10477 # override breakpoint 10478 $forced_breakpoint_to_go[$imid] = 0; 10479 } 10480 10481 # handle leading "if" and "unless" 10482 elsif ( $tokens_to_go[$imidr] =~ /^(if|unless)$/ ) { 10483 10484=pod 10485 10486FIXME: This is experimental..may not be too useful 10487 10488=cut 10489 10490 next unless ( 10491 $n == $nmax # if this is the last line 10492 && $types_to_go[$il] eq ';' # ending in ';' 10493 && $types_to_go[$if] eq 'k' # after an 'if' or 'unless' 10494 && $tokens_to_go[$if] =~ /^(and|or)$/ 10495 10496 # and if this doesn't make a long last line 10497 && total_line_length( $if, $il ) <= 10498 $rOpts_maximum_line_length / 2 10499 ); 10500 10501 # override breakpoint 10502 $forced_breakpoint_to_go[$imid] = 0; 10503 } 10504 10505 # handle all other leading keywords 10506 else { 10507 10508 # keywords look best at start of lines, 10509 # but combine things like "1 while" 10510 next 10511 if ( ( $types_to_go[$imid] ne 'k' ) 10512 && ( $tokens_to_go[$imidr] !~ /^(while)$/ ) ); 10513 } 10514 } 10515 10516 # similar treatment of && and || as above for 'and' and 'or': 10517 elsif ( $types_to_go[$imidr] =~ /^(&&|\|\|)$/ ) { 10518 10519 # maybe looking at something like: 10520 # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i; 10521 10522 next unless ( 10523 $n == $nmax # if this is the last line 10524 && $types_to_go[$il] eq ';' # ending in ';' 10525 && $types_to_go[$if] eq 'k' # after an 'if' or 'unless' 10526 && $tokens_to_go[$if] =~ /^(if|unless)$/ 10527 10528 # and if this doesn't make a long last line 10529 && total_line_length( $if, $il ) <= 10530 $rOpts_maximum_line_length / 2 10531 ); 10532 10533 # override breakpoint 10534 $forced_breakpoint_to_go[$imid] = 0; 10535 } 10536 10537 # honor hard breakpoints 10538 next if ( $forced_breakpoint_to_go[$imid] > 0 ); 10539 10540 #---------------------------------------------------------- 10541 # end of special recombination rules 10542 #---------------------------------------------------------- 10543 10544 my $bs = $bond_strength_to_go[$imid]; 10545 10546 # combined line cannot be too long 10547 next 10548 if excess_line_length( $if, $il ) > 0; 10549 10550 # do not recombine if we would skip in indentation levels 10551 if ( $n < $nmax ) { 10552 my $if_next = $$ri_first[ $n + 1 ]; 10553 next 10554 if ( 10555 $levels_to_go[$if] < $levels_to_go[$imidr] 10556 && $levels_to_go[$imidr] < $levels_to_go[$if_next] 10557 10558 # but an isolated 'if (' is undesirable 10559 && !( 10560 $n == 1 10561 && $imid - $if <= 2 10562 && $types_to_go[$if] eq 'k' 10563 && $tokens_to_go[$if] eq 'if' 10564 && $tokens_to_go[$imid] ne '(' 10565 ) 10566 10567 # 10568 ); 10569 } 10570 10571 # honor no-break's 10572 next if ( $bs == NO_BREAK ); 10573 10574 # remember the pair with the greatest bond strength 10575 if ( !$n_best ) { 10576 $n_best = $n; 10577 $bs_best = $bs; 10578 } 10579 else { 10580 10581 if ( $bs > $bs_best ) { 10582 $n_best = $n; 10583 $bs_best = $bs; 10584 } 10585 10586 # we have 2 or more candidates, so need another pass 10587 $more_to_do++; 10588 } 10589 } 10590 10591 # recombine the pair with the greatest bond strength 10592 if ($n_best) { 10593 splice @$ri_first, $n_best, 1; 10594 splice @$ri_last, $n_best - 1, 1; 10595 } 10596 } 10597 return ( $ri_first, $ri_last ); 10598} 10599 10600sub set_continuation_breaks { 10601 10602 # Define an array of indexes for inserting newline characters to 10603 # keep the line lengths below the maximum desired length. There is 10604 # an implied break after the last token, so it need not be included. 10605 # We'll break at points where the bond strength is lowest. 10606 my $saw_good_break = shift; 10607 my @i_first = (); # the first index to output 10608 my @i_last = (); # the last index to output 10609 my @i_colon_breaks = (); # needed to decide if we have to break at ?'s 10610 if ( $tokens_to_go[0] eq ':' ) { push @i_colon_breaks, 0 } 10611 10612 set_bond_strengths(); 10613 10614 my $imin = 0; 10615 my $imax = $max_index_to_go; 10616 if ( $types_to_go[$imin] eq 'b' ) { $imin++ } 10617 if ( $types_to_go[$imax] eq 'b' ) { $imax-- } 10618 my $i_begin = $imin; 10619 10620 my $leading_spaces = leading_spaces_to_go($imin); 10621 my $line_count = 0; 10622 my $last_break_strength = NO_BREAK; 10623 my $i_last_break = -1; 10624 my $max_bias = 0.001; 10625 my $leading_alignment_token = ""; 10626 my $leading_alignment_type = ""; 10627 10628 # see if any ?/:'s are in order 10629 my $colons_in_order = 1; 10630 my $last_tok = ""; 10631 my @colon_list = grep /^[\?\:]$/, @tokens_to_go[ 0 .. $max_index_to_go ]; 10632 foreach my $tok (@colon_list) { 10633 if ( $tok eq $last_tok ) { $colons_in_order = 0; last } 10634 $last_tok = $tok; 10635 } 10636 10637 # This is a sufficient but not necessary condition for colon chain 10638 my $is_colon_chain = ( $colons_in_order && @colon_list > 2 ); 10639 10640 while ( $i_begin <= $imax ) { 10641 my $lowest_strength = NO_BREAK; 10642 my $starting_sum = $lengths_to_go[$i_begin]; 10643 my $i_lowest = -1; 10644 my $i_test = -1; 10645 my $lowest_next_token = ''; 10646 my $lowest_next_type = 'b'; 10647 my $i_lowest_next_nonblank = -1; 10648 10649 # loop to find next break point 10650 for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) { 10651 my $type = $types_to_go[$i_test]; 10652 my $token = $tokens_to_go[$i_test]; 10653 my $next_type = $types_to_go[ $i_test + 1 ]; 10654 my $next_token = $tokens_to_go[ $i_test + 1 ]; 10655 my $i_next_nonblank = 10656 ( ( $next_type eq 'b' ) ? $i_test + 2 : $i_test + 1 ); 10657 my $next_nonblank_type = $types_to_go[$i_next_nonblank]; 10658 my $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; 10659 my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank]; 10660 my $strength = $bond_strength_to_go[$i_test]; 10661 my $must_break = 0; 10662 10663 if ( 10664 10665 # Try to put a break where requested by scan_list 10666 $forced_breakpoint_to_go[$i_test] 10667 10668 # break between ) { in a continued line so that the '{' can 10669 # be outdented 10670 # See similar logic in scan_list which catches instances 10671 # where a line is just something like ') {' 10672 || ( $line_count 10673 && ( $token eq ')' ) 10674 && ( $next_nonblank_type eq '{' ) 10675 && ($next_nonblank_block_type) 10676 && !$rOpts->{'opening-brace-always-on-right'} ) 10677 10678 # There is an implied forced break at a terminal opening brace 10679 || ( ( $type eq '{' ) && ( $i_test == $imax ) ) 10680 10681 ) 10682 { 10683 10684 # Forced breakpoints must sometimes be overridden because of a 10685 # side comment causing a NO_BREAK. It is easier to catch this 10686 # here than when they are set. 10687 if ( $strength < NO_BREAK ) { 10688 $strength = $lowest_strength / 2; 10689 $must_break = 1; 10690 } 10691 } 10692 10693 # quit if a break here would put a good terminal token on 10694 # the next line and we already have a possible break 10695 if ( 10696 !$must_break 10697 && ( $next_nonblank_type =~ /^[\;\,]$/ ) 10698 && ( 10699 ( 10700 $leading_spaces + $lengths_to_go[ $i_next_nonblank + 1 ] 10701 - $starting_sum 10702 ) > $rOpts_maximum_line_length 10703 ) 10704 ) 10705 { 10706 last if ( $i_lowest >= 0 ); 10707 } 10708 10709 # Avoid a break which would strand a single punctuation 10710 # token. For example, we do not want to strand a leading 10711 # '.' which is followed by a long quoted string. 10712 if ( 10713 !$must_break 10714 && ( $i_test == $i_begin ) 10715 && ( $i_test < $imax ) 10716 && ( $token eq $type ) 10717 && ( 10718 ( 10719 $leading_spaces + $lengths_to_go[ $i_test + 1 ] - 10720 $starting_sum 10721 ) <= $rOpts_maximum_line_length 10722 ) 10723 ) 10724 { 10725 $i_test++; 10726 10727 if ( ( $i_test < $imax ) && ( $next_type eq 'b' ) ) { 10728 $i_test++; 10729 } 10730 redo; 10731 } 10732 10733 if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) ) 10734 { 10735 10736 # break at previous best break if it would have produced 10737 # a leading alignment of certain common tokens, and it 10738 # is different from the latest candidate break 10739 last 10740 if ( 10741 $leading_alignment_type 10742 && ( $next_nonblank_type ne $leading_alignment_type 10743 || $next_nonblank_token ne $leading_alignment_token ) 10744 ); 10745 10746 # Force at least one breakpoint if old code had good break 10747 # It is only called if a breakpoint is required or desired. 10748 #print "at tok=$token next=$next_nonblank_token str=$strength i=$i_test i-last=$i_last_break i_low= $i_lowest str= $lowest_strength\n"; 10749 last if ( 10750 $i_test == $imax # we are at the end 10751 && !$forced_breakpoint_count # 10752 && $saw_good_break # old line had good break 10753 && $type eq ';' # and this line ends in a ; 10754 && $i_last_break < 0 # and we haven't made a break 10755 && $i_lowest > 0 # and we saw a possible break 10756 && $i_lowest < $imax - 1 # (but not just before this ;) 10757 && $strength - $lowest_strength < 0.5 * WEAK # and it's good 10758 ); 10759 10760 $lowest_strength = $strength; 10761 $i_lowest = $i_test; 10762 $lowest_next_token = $next_nonblank_token; 10763 $lowest_next_type = $next_nonblank_type; 10764 $i_lowest_next_nonblank = $i_next_nonblank; 10765 last if $must_break; 10766 10767 # set flags to remember if a break here will produce a 10768 # leading alignment of certain common tokens 10769 if ( 10770 $line_count > 0 10771 && $i_test < $imax 10772 && ( $lowest_strength - $last_break_strength <= $max_bias ) 10773 && ( $types_to_go[$i_begin] =~ /^(\.|\&\&|\|\||:)$/ 10774 && $types_to_go[$i_begin] eq $next_nonblank_type ) 10775 || ( $tokens_to_go[$i_begin] =~ /^(and|or)$/ 10776 && $tokens_to_go[$i_begin] eq $next_nonblank_token ) 10777 ) 10778 { 10779 $leading_alignment_token = $next_nonblank_token; 10780 $leading_alignment_type = $next_nonblank_type; 10781 } 10782 } 10783 10784 my $too_long = ( $i_test >= $imax ) 10785 ? 1 10786 : ( 10787 ( 10788 $leading_spaces + $lengths_to_go[ $i_test + 2 ] - 10789 $starting_sum 10790 ) > $rOpts_maximum_line_length 10791 ); 10792 10793 FORMATTER_DEBUG_FLAG_BREAK 10794 && print 10795"BREAK: testing i = $i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type leading sp=($leading_spaces) next length = $lengths_to_go[$i_test+2] too_long=$too_long str=$strength\n"; 10796 10797 # allow one extra terminal token after exceeding line length 10798 # if it would strand this token. 10799 if ( $rOpts_fuzzy_line_length 10800 && $too_long 10801 && ( $i_lowest == $i_test ) 10802 && ( length($token) > 1 ) 10803 && ( $next_nonblank_type =~ /^[\;\,]$/ ) ) 10804 { 10805 $too_long = 0; 10806 } 10807 10808 last if ( 10809 ( $i_test == $imax ) # we're done if no more tokens, 10810 || ( 10811 ( $i_lowest >= 0 ) # or no more space and we have a break 10812 && $too_long 10813 ) 10814 ); 10815 } 10816 10817 # it's always ok to break at imax if no other break was found 10818 if ( $i_lowest < 0 ) { $i_lowest = $imax } 10819 10820 # semi-final index calculation 10821 my $i_next_nonblank = 10822 ( ( $types_to_go[ $i_lowest + 1 ] eq 'b' ) 10823 ? $i_lowest + 2 10824 : $i_lowest + 1 ); 10825 my $next_nonblank_type = $types_to_go[$i_next_nonblank]; 10826 my $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; 10827 10828 #------------------------------------------------------- 10829 # ?/: rule 1 : if a break here will separate a '?' on this 10830 # line from its closing ':', then break at the '?' instead. 10831 #------------------------------------------------------- 10832 foreach my $i ( $i_begin + 1 .. $i_lowest - 1 ) { 10833 next unless ( $tokens_to_go[$i] eq '?' ); 10834 10835 # do not break if probable sequence of ?/: statements 10836 next if ($is_colon_chain); 10837 10838 # do not break if statement is broken by side comment 10839 next 10840 if ( 10841 $tokens_to_go[$max_index_to_go] eq '#' 10842 && terminal_type( \@types_to_go, \@block_type_to_go, 0, 10843 $max_index_to_go ) !~ /^[\;\}]$/ 10844 ); 10845 10846 # no break needed if matching : is also on the line 10847 next 10848 if ( $mate_index_to_go[$i] >= 0 10849 && $mate_index_to_go[$i] <= $i_next_nonblank ); 10850 10851 $i_lowest = $i; 10852 if ( $want_break_before{'?'} ) { $i_lowest-- } 10853 last; 10854 } 10855 10856 # final index calculation 10857 $i_next_nonblank = 10858 ( ( $types_to_go[ $i_lowest + 1 ] eq 'b' ) 10859 ? $i_lowest + 2 10860 : $i_lowest + 1 ); 10861 $next_nonblank_type = $types_to_go[$i_next_nonblank]; 10862 $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; 10863 10864 FORMATTER_DEBUG_FLAG_BREAK 10865 && print "BREAK: best is i = $i_lowest strength = $lowest_strength\n"; 10866 10867 #------------------------------------------------------- 10868 # ?/: rule 2 : if we break at a '?', then break at its ':' 10869 # 10870 # Note: this rule is also in sub scan_list to handle a break 10871 # at the start and end of a line (in case breaks are dictated 10872 # by side comments). 10873 #------------------------------------------------------- 10874 if ( $next_nonblank_type eq '?' ) { 10875 set_closing_breakpoint($i_next_nonblank); 10876 } 10877 elsif ( $types_to_go[$i_lowest] eq '?' ) { 10878 set_closing_breakpoint($i_lowest); 10879 } 10880 10881 #------------------------------------------------------- 10882 # ?/: rule 3 : if we break at a ':' then we save 10883 # its location for further work below. We may need to go 10884 # back and break at its '?'. 10885 #------------------------------------------------------- 10886 if ( $next_nonblank_type eq ':' ) { 10887 push @i_colon_breaks, $i_next_nonblank; 10888 } 10889 elsif ( $types_to_go[$i_lowest] eq ':' ) { 10890 push @i_colon_breaks, $i_lowest; 10891 } 10892 10893 # here we should set breaks for all '?'/':' pairs which are 10894 # separated by this line 10895 10896 $line_count++; 10897 10898 # save this line segment, after trimming blanks at the ends 10899 push ( @i_first, 10900 ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin ); 10901 push ( @i_last, 10902 ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest ); 10903 10904 # set a forced breakpoint at a container opening, if necessary, to 10905 # signal a break at a closing container. Excepting '(' for now. 10906 if ( $tokens_to_go[$i_lowest] =~ /^[\{\[]$/ 10907 && !$forced_breakpoint_to_go[$i_lowest] ) 10908 { 10909 set_closing_breakpoint($i_lowest); 10910 } 10911 10912 # get ready to go again 10913 $i_begin = $i_lowest + 1; 10914 $last_break_strength = $lowest_strength; 10915 $i_last_break = $i_lowest; 10916 $leading_alignment_token = ""; 10917 $leading_alignment_type = ""; 10918 $lowest_next_token = ''; 10919 $lowest_next_type = 'b'; 10920 10921 if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) { 10922 $i_begin++; 10923 } 10924 10925 # update indentation size 10926 if ( $i_begin <= $imax ) { 10927 $leading_spaces = leading_spaces_to_go($i_begin); 10928 } 10929 } 10930 10931 #------------------------------------------------------- 10932 # ?/: rule 4 -- if we broke at a ':', then break at 10933 # corresponding '?' unless this is a chain of ?: expressions 10934 #------------------------------------------------------- 10935 if (@i_colon_breaks) { 10936 10937 # using a simple method for deciding if we are in a ?/: chain -- 10938 # this is a chain if it has multiple ?/: pairs all in order; 10939 # otherwise not. 10940 # Note that if line starts in a ':' we count that above as a break 10941 my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 ); 10942 10943 unless ($is_chain) { 10944 my @insert_list = (); 10945 foreach my $i (@i_colon_breaks) { 10946 my $i_question = $mate_index_to_go[$i]; 10947 if ( $i_question >= 0 ) { 10948 if ( $want_break_before{'?'} ) { $i_question-- } 10949 if ( $i_question >= 0 ) { 10950 push @insert_list, $i_question; 10951 } 10952 } 10953 insert_additional_breaks( \@insert_list, \@i_first, \@i_last ); 10954 } 10955 } 10956 } 10957 return \@i_first, \@i_last; 10958} 10959 10960sub insert_additional_breaks { 10961 10962 # this routine will add line breaks at requested locations after 10963 # sub set_continuation_breaks has made preliminary breaks. 10964 10965 my ( $ri_break_list, $ri_first, $ri_last ) = @_; 10966 my $i_f; 10967 my $i_l; 10968 my $line_number = 0; 10969 foreach my $i_break_left ( sort @$ri_break_list ) { 10970 10971 $i_f = $$ri_first[$line_number]; 10972 $i_l = $$ri_last[$line_number]; 10973 while ( $i_break_left >= $i_l ) { 10974 $line_number++; 10975 10976 # shouldn't happen unless caller passes bad indexes 10977 if ( $line_number >= @$ri_last ) { 10978 warning( 10979"Non-fatal program bug: couldn't set break at $i_break_left\n" 10980 ); 10981 report_definite_bug(); 10982 return; 10983 } 10984 $i_f = $$ri_first[$line_number]; 10985 $i_l = $$ri_last[$line_number]; 10986 } 10987 10988 my $i_break_right = $i_break_left + 1; 10989 if ( $types_to_go[$i_break_right] eq 'b' ) { $i_break_right++ } 10990 10991 if ( $i_break_left >= $i_f 10992 && $i_break_left < $i_l 10993 && $i_break_right > $i_f 10994 && $i_break_right <= $i_l ) 10995 { 10996 splice( @$ri_first, $line_number, 1, ( $i_f, $i_break_right ) ); 10997 splice( @$ri_last, $line_number, 1, ( $i_break_left, $i_l ) ); 10998 } 10999 } 11000} 11001 11002sub set_closing_breakpoint { 11003 11004 # set a breakpoint at a matching closing token 11005 # at present, this is only used to break at a ':' which matches a '?' 11006 my $i_break = shift; 11007 11008 if ( $mate_index_to_go[$i_break] >= 0 ) { 11009 11010 # watch out for break between something like '()' 11011 # which can occur under certain error conditions. 11012 # -- infinte recursion will occur (attrib.t) 11013 if ( $mate_index_to_go[$i_break] > $i_break + 1 ) { 11014 11015 # break before } ] and ), but sub set_forced_breakpoint will decide 11016 # to break before or after a ? and : 11017 my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1; 11018 set_forced_breakpoint( $mate_index_to_go[$i_break] - $inc ); 11019 } 11020 } 11021 else { 11022 my $type_sequence = $type_sequence_to_go[$i_break]; 11023 if ($type_sequence) { 11024 my $closing_token = $matching_token{ $tokens_to_go[$i_break] }; 11025 $postponed_breakpoint{$type_sequence} = 1; 11026 } 11027 } 11028} 11029 11030# check to see if output line tabbing agrees with input line 11031# this can be very useful for debugging a script which has an extra 11032# or missing brace 11033sub compare_indentation_levels { 11034 11035 my ( $python_indentation_level, $structural_indentation_level ) = @_; 11036 if ( ( $python_indentation_level ne $structural_indentation_level ) 11037 && ( PerlTidy::Tokenizer::know_input_tabstr() ) ) 11038 { 11039 $last_tabbing_disagreement = $input_line_number; 11040 11041 if ($in_tabbing_disagreement) { 11042 } 11043 else { 11044 $tabbing_disagreement_count++; 11045 11046 if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) { 11047 write_logfile_entry( 11048"Start indentation disagreement: input=$python_indentation_level; output=$structural_indentation_level\n" 11049 ); 11050 } 11051 $in_tabbing_disagreement = $input_line_number; 11052 $first_tabbing_disagreement = $in_tabbing_disagreement 11053 unless ($first_tabbing_disagreement); 11054 } 11055 } 11056 else { 11057 11058 if ($in_tabbing_disagreement) { 11059 11060 if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) { 11061 write_logfile_entry( 11062"End indentation disagreement from input line $in_tabbing_disagreement\n" 11063 ); 11064 11065 if ( $tabbing_disagreement_count == MAX_NAG_MESSAGES ) { 11066 write_logfile_entry( 11067 "No further tabbing disagreements will be noted\n"); 11068 } 11069 } 11070 $in_tabbing_disagreement = 0; 11071 } 11072 } 11073 $input_line_tabbing = undef; # deactivate test for this input line 11074} 11075 11076##################################################################### 11077# 11078# the PerlTidy::IndentationItem class supplies items which contain 11079# how much whitespace should be used at the start of a line 11080# 11081##################################################################### 11082 11083package PerlTidy::IndentationItem; 11084 11085# Indexes for indentation items 11086use constant SPACES => 0; # total leading white spaces 11087use constant LEVEL => 1; # the indentation 'level' 11088use constant CI_LEVEL => 2; # the 'continuation level' 11089use constant AVAILABLE_SPACES => 3; # how many left spaces available 11090 # for this level 11091use constant CLOSED => 4; # index where we saw closing '}' 11092use constant COMMA_COUNT => 5; # how many commas at this level? 11093use constant SEQUENCE_NUMBER => 6; # output batch number 11094use constant INDEX => 7; # index in output batch list 11095use constant HAVE_CHILD => 8; # any dependents? 11096use constant RECOVERABLE_SPACES => 9; # how many spaces to the right 11097 # we would like to move to get 11098 # alignment (negative if left) 11099use constant ALIGN_PAREN => 10; # do we want to try to align 11100 # with an opening structure? 11101use constant MARKED => 11; # if visited by corrector logic 11102use constant STACK_DEPTH => 12; # indentation nesting depth 11103use constant STARTING_INDEX => 13; # first token index of this level 11104use constant LAST_EQUALS => 14; # index of last '=' in this batch 11105 11106sub new { 11107 11108 # Create an 'indentation_item' which describes one level of leading 11109 # whitespace when the '-lp' indentation is used. We return 11110 # a reference to an anonymous array of associated variables. 11111 # See constants _xxx for storage scheme. 11112 my ( 11113 $class, $spaces, $level, 11114 $ci_level, $available_spaces, $index, 11115 $gnu_sequence_number, $align_paren, $stack_depth, 11116 $starting_index, 11117 ) 11118 = @_; 11119 my $closed = -1; 11120 my $last_equals = -1; 11121 my $comma_count = 0; 11122 my $have_child = 0; 11123 my $want_right_spaces = 0; 11124 my $marked = 0; 11125 bless [ 11126 $spaces, $level, $ci_level, 11127 $available_spaces, $closed, $comma_count, 11128 $gnu_sequence_number, $index, $have_child, 11129 $want_right_spaces, $align_paren, $marked, 11130 $stack_depth, $starting_index, $last_equals, 11131 ], $class; 11132} 11133 11134sub permanently_decrease_AVAILABLE_SPACES { 11135 11136 # make a permanent reduction in the available indentation spaces 11137 # at one indentation item. NOTE: if there are child nodes, their 11138 # total SPACES must be reduced by the caller. 11139 11140 my ( $item, $spaces_needed ) = @_; 11141 my $available_spaces = $item->get_AVAILABLE_SPACES(); 11142 my $deleted_spaces = 11143 ( $available_spaces > $spaces_needed ) 11144 ? $spaces_needed 11145 : $available_spaces; 11146 $item->decrease_AVAILABLE_SPACES($deleted_spaces); 11147 $item->decrease_SPACES($deleted_spaces); 11148 $item->set_RECOVERABLE_SPACES(0); 11149 11150 return $deleted_spaces; 11151} 11152 11153sub tentatively_decrease_AVAILABLE_SPACES { 11154 11155 # We are asked to tentatively delete $spaces_needed of indentation 11156 # for a indentation item. We may want to undo this later. NOTE: if 11157 # there are child nodes, their total SPACES must be reduced by the 11158 # caller. 11159 my ( $item, $spaces_needed ) = @_; 11160 my $available_spaces = $item->get_AVAILABLE_SPACES(); 11161 my $deleted_spaces = 11162 ( $available_spaces > $spaces_needed ) 11163 ? $spaces_needed 11164 : $available_spaces; 11165 $item->decrease_AVAILABLE_SPACES($deleted_spaces); 11166 $item->decrease_SPACES($deleted_spaces); 11167 $item->increase_RECOVERABLE_SPACES($deleted_spaces); 11168 return $deleted_spaces; 11169} 11170 11171sub get_STACK_DEPTH { 11172 my $self = shift; 11173 return $self->[STACK_DEPTH]; 11174} 11175 11176sub get_SPACES { 11177 my $self = shift; 11178 return $self->[SPACES]; 11179} 11180 11181sub get_MARKED { 11182 my $self = shift; 11183 return $self->[MARKED]; 11184} 11185 11186sub set_MARKED { 11187 my ( $self, $value ) = @_; 11188 if ( defined($value) ) { 11189 $self->[MARKED] = $value; 11190 } 11191 return $self->[MARKED]; 11192} 11193 11194sub get_AVAILABLE_SPACES { 11195 my $self = shift; 11196 return $self->[AVAILABLE_SPACES]; 11197} 11198 11199sub decrease_SPACES { 11200 my ( $self, $value ) = @_; 11201 if ( defined($value) ) { 11202 $self->[SPACES] -= $value; 11203 } 11204 return $self->[SPACES]; 11205} 11206 11207sub decrease_AVAILABLE_SPACES { 11208 my ( $self, $value ) = @_; 11209 if ( defined($value) ) { 11210 $self->[AVAILABLE_SPACES] -= $value; 11211 } 11212 return $self->[AVAILABLE_SPACES]; 11213} 11214 11215sub get_ALIGN_PAREN { 11216 my $self = shift; 11217 return $self->[ALIGN_PAREN]; 11218} 11219 11220sub get_RECOVERABLE_SPACES { 11221 my $self = shift; 11222 return $self->[RECOVERABLE_SPACES]; 11223} 11224 11225sub set_RECOVERABLE_SPACES { 11226 my ( $self, $value ) = @_; 11227 if ( defined($value) ) { 11228 $self->[RECOVERABLE_SPACES] = $value; 11229 } 11230 return $self->[RECOVERABLE_SPACES]; 11231} 11232 11233sub increase_RECOVERABLE_SPACES { 11234 my ( $self, $value ) = @_; 11235 if ( defined($value) ) { 11236 $self->[RECOVERABLE_SPACES] += $value; 11237 } 11238 return $self->[RECOVERABLE_SPACES]; 11239} 11240 11241sub get_CI_LEVEL { 11242 my $self = shift; 11243 return $self->[CI_LEVEL]; 11244} 11245 11246sub get_LEVEL { 11247 my $self = shift; 11248 return $self->[LEVEL]; 11249} 11250 11251sub get_SEQUENCE_NUMBER { 11252 my $self = shift; 11253 return $self->[SEQUENCE_NUMBER]; 11254} 11255 11256sub get_INDEX { 11257 my $self = shift; 11258 return $self->[INDEX]; 11259} 11260 11261sub get_STARTING_INDEX { 11262 my $self = shift; 11263 return $self->[STARTING_INDEX]; 11264} 11265 11266sub set_HAVE_CHILD { 11267 my ( $self, $value ) = @_; 11268 if ( defined($value) ) { 11269 $self->[HAVE_CHILD] = $value; 11270 } 11271 return $self->[HAVE_CHILD]; 11272} 11273 11274sub get_HAVE_CHILD { 11275 my $self = shift; 11276 return $self->[HAVE_CHILD]; 11277} 11278 11279sub set_LAST_EQUALS { 11280 my ( $self, $value ) = @_; 11281 if ( defined($value) ) { 11282 $self->[LAST_EQUALS] = $value; 11283 } 11284 return $self->[LAST_EQUALS]; 11285} 11286 11287sub get_LAST_EQUALS { 11288 my $self = shift; 11289 return $self->[LAST_EQUALS]; 11290} 11291 11292sub set_COMMA_COUNT { 11293 my ( $self, $value ) = @_; 11294 if ( defined($value) ) { 11295 $self->[COMMA_COUNT] = $value; 11296 } 11297 return $self->[COMMA_COUNT]; 11298} 11299 11300sub get_COMMA_COUNT { 11301 my $self = shift; 11302 return $self->[COMMA_COUNT]; 11303} 11304 11305sub set_CLOSED { 11306 my ( $self, $value ) = @_; 11307 if ( defined($value) ) { 11308 $self->[CLOSED] = $value; 11309 } 11310 return $self->[CLOSED]; 11311} 11312 11313sub get_CLOSED { 11314 my $self = shift; 11315 return $self->[CLOSED]; 11316} 11317 11318##################################################################### 11319# 11320# the PerlTidy::VerticalAligner::Line class supplies an object to 11321# contain a single output line 11322# 11323##################################################################### 11324 11325package PerlTidy::VerticalAligner::Line; 11326 11327{ 11328 11329 use strict; 11330 use Carp; 11331 11332 use constant JMAX => 0; 11333 use constant JMAX_ORIGINAL_LINE => 1; 11334 use constant RTOKENS => 2; 11335 use constant RFIELDS => 3; 11336 use constant RPATTERNS => 4; 11337 use constant INDENTATION => 5; 11338 use constant LEADING_SPACE_COUNT => 6; 11339 use constant OUTDENT_LONG_LINES => 7; 11340 use constant LIST_TYPE => 8; 11341 use constant IS_HANGING_SIDE_COMMENT => 9; 11342 use constant RALIGNMENTS => 10; 11343 use constant MAXIMUM_LINE_LENGTH => 11; 11344 11345 my %_index_map; 11346 $_index_map{jmax} = JMAX; 11347 $_index_map{jmax_original_line} = JMAX_ORIGINAL_LINE; 11348 $_index_map{rtokens} = RTOKENS; 11349 $_index_map{rfields} = RFIELDS; 11350 $_index_map{rpatterns} = RPATTERNS; 11351 $_index_map{indentation} = INDENTATION; 11352 $_index_map{leading_space_count} = LEADING_SPACE_COUNT; 11353 $_index_map{outdent_long_lines} = OUTDENT_LONG_LINES; 11354 $_index_map{list_type} = LIST_TYPE; 11355 $_index_map{is_hanging_side_comment} = IS_HANGING_SIDE_COMMENT; 11356 $_index_map{ralignments} = RALIGNMENTS; 11357 $_index_map{maximum_line_length} = MAXIMUM_LINE_LENGTH; 11358 11359 my @_default_data = (); 11360 $_default_data[JMAX] = undef; 11361 $_default_data[JMAX_ORIGINAL_LINE] = undef; 11362 $_default_data[RTOKENS] = undef; 11363 $_default_data[RFIELDS] = undef; 11364 $_default_data[RPATTERNS] = undef; 11365 $_default_data[INDENTATION] = undef; 11366 $_default_data[LEADING_SPACE_COUNT] = undef; 11367 $_default_data[OUTDENT_LONG_LINES] = undef; 11368 $_default_data[LIST_TYPE] = undef; 11369 $_default_data[IS_HANGING_SIDE_COMMENT] = undef; 11370 $_default_data[RALIGNMENTS] = []; 11371 $_default_data[MAXIMUM_LINE_LENGTH] = undef; 11372 11373 { 11374 11375 # methods to count object population 11376 my $_count = 0; 11377 sub get_count { $_count; } 11378 sub _increment_count { ++$_count } 11379 sub _decrement_count { --$_count } 11380 } 11381 11382 # Constructor may be called as a class method 11383 sub new { 11384 my ( $caller, %arg ) = @_; 11385 my $caller_is_obj = ref($caller); 11386 my $class = $caller_is_obj || $caller; 11387 no strict "refs"; 11388 my $self = bless [], $class; 11389 11390 $self->[RALIGNMENTS] = []; 11391 11392 foreach my $member ( keys %_index_map ) { 11393 my $index = $_index_map{$member}; 11394 if ( exists $arg{$member} ) { $self->[$index] = $arg{$member} } 11395 elsif ($caller_is_obj) { $self->[$index] = $caller->[$index] } 11396 else { $self->[$index] = $_default_data[$index] } 11397 } 11398 11399 $self->_increment_count(); 11400 return $self; 11401 } 11402 11403 sub DESTROY { 11404 $_[0]->_decrement_count(); 11405 } 11406 11407 sub get_jmax { $_[0]->[JMAX] } 11408 sub get_jmax_original_line { $_[0]->[JMAX_ORIGINAL_LINE] } 11409 sub get_rtokens { $_[0]->[RTOKENS] } 11410 sub get_rfields { $_[0]->[RFIELDS] } 11411 sub get_rpatterns { $_[0]->[RPATTERNS] } 11412 sub get_indentation { $_[0]->[INDENTATION] } 11413 sub get_leading_space_count { $_[0]->[LEADING_SPACE_COUNT] } 11414 sub get_outdent_long_lines { $_[0]->[OUTDENT_LONG_LINES] } 11415 sub get_list_type { $_[0]->[LIST_TYPE] } 11416 sub get_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] } 11417 11418 sub set_column { $_[0]->[RALIGNMENTS]->[ $_[1] ]->set_column( $_[2] ) } 11419 sub get_alignment { $_[0]->[RALIGNMENTS]->[ $_[1] ] } 11420 sub get_alignments { @{ $_[0]->[RALIGNMENTS] } } 11421 sub get_column { $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_column() } 11422 11423 sub get_starting_column { 11424 $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_starting_column(); 11425 } 11426 11427 sub increment_column { 11428 $_[0]->[RALIGNMENTS]->[ $_[1] ]->increment_column( $_[2] ); 11429 } 11430 sub set_alignments { my $self = shift; @{ $self->[RALIGNMENTS] } = @_; } 11431 11432 sub current_field_width { 11433 my $self = shift; 11434 my ($j) = @_; 11435 if ( $j == 0 ) { 11436 return $self->get_column($j); 11437 } 11438 else { 11439 return $self->get_column($j) - $self->get_column( $j - 1 ); 11440 } 11441 } 11442 11443 sub field_width_growth { 11444 my $self = shift; 11445 my $j = shift; 11446 return $self->get_column($j) - $self->get_starting_column($j); 11447 } 11448 11449 sub starting_field_width { 11450 my $self = shift; 11451 my $j = shift; 11452 if ( $j == 0 ) { 11453 return $self->get_starting_column($j); 11454 } 11455 else { 11456 return $self->get_starting_column($j) - 11457 $self->get_starting_column( $j - 1 ); 11458 } 11459 } 11460 11461 sub increase_field_width { 11462 11463 my $self = shift; 11464 my ( $j, $pad ) = @_; 11465 my $jmax = $self->get_jmax(); 11466 for my $k ( $j .. $jmax ) { 11467 $self->increment_column( $k, $pad ); 11468 } 11469 } 11470 11471 sub get_available_space_on_right { 11472 my $self = shift; 11473 my $jmax = $self->get_jmax(); 11474 return $self->[MAXIMUM_LINE_LENGTH] - $self->get_column($jmax); 11475 } 11476 11477 sub set_jmax { $_[0]->[JMAX] = $_[1] } 11478 sub set_jmax_original_line { $_[0]->[JMAX_ORIGINAL_LINE] = $_[1] } 11479 sub set_rtokens { $_[0]->[RTOKENS] = $_[1] } 11480 sub set_rfields { $_[0]->[RFIELDS] = $_[1] } 11481 sub set_rpatterns { $_[0]->[RPATTERNS] = $_[1] } 11482 sub set_indentation { $_[0]->[INDENTATION] = $_[1] } 11483 sub set_leading_space_count { $_[0]->[LEADING_SPACE_COUNT] = $_[1] } 11484 sub set_outdent_long_lines { $_[0]->[OUTDENT_LONG_LINES] = $_[1] } 11485 sub set_list_type { $_[0]->[LIST_TYPE] = $_[1] } 11486 sub set_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] = $_[1] } 11487 sub set_alignment { $_[0]->[RALIGNMENTS]->[ $_[1] ] = $_[2] } 11488 11489} 11490 11491##################################################################### 11492# 11493# the PerlTidy::VerticalAligner::Alignment class holds information 11494# on a single column being aligned 11495# 11496##################################################################### 11497package PerlTidy::VerticalAligner::Alignment; 11498 11499{ 11500 11501 use strict; 11502 11503 #use Carp; 11504 11505 # Symbolic array indexes 11506 use constant COLUMN => 0; # the current column number 11507 use constant STARTING_COLUMN => 1; # column number when created 11508 use constant MATCHING_TOKEN => 2; # what token we are matching 11509 use constant STARTING_LINE => 3; # the line index of creation 11510 use constant ENDING_LINE => 4; # the most recent line to use it 11511 use constant SAVED_COLUMN => 5; # the most recent line to use it 11512 use constant SERIAL_NUMBER => 6; # unique number for this alignment 11513 # (just its index in an array) 11514 11515 # Correspondence between variables and array indexes 11516 my %_index_map; 11517 $_index_map{column} = COLUMN; 11518 $_index_map{starting_column} = STARTING_COLUMN; 11519 $_index_map{matching_token} = MATCHING_TOKEN; 11520 $_index_map{starting_line} = STARTING_LINE; 11521 $_index_map{ending_line} = ENDING_LINE; 11522 $_index_map{saved_column} = SAVED_COLUMN; 11523 $_index_map{serial_number} = SERIAL_NUMBER; 11524 11525 my @_default_data = (); 11526 $_default_data[COLUMN] = undef; 11527 $_default_data[STARTING_COLUMN] = undef; 11528 $_default_data[MATCHING_TOKEN] = undef; 11529 $_default_data[STARTING_LINE] = undef; 11530 $_default_data[ENDING_LINE] = undef; 11531 $_default_data[SAVED_COLUMN] = undef; 11532 $_default_data[SERIAL_NUMBER] = undef; 11533 11534 # class population count 11535 { 11536 my $_count = 0; 11537 sub get_count { $_count; } 11538 sub _increment_count { ++$_count } 11539 sub _decrement_count { --$_count } 11540 } 11541 11542 # constructor 11543 sub new { 11544 my ( $caller, %arg ) = @_; 11545 my $caller_is_obj = ref($caller); 11546 my $class = $caller_is_obj || $caller; 11547 no strict "refs"; 11548 my $self = bless [], $class; 11549 11550 foreach my $member ( keys %_index_map ) { 11551 my $index = $_index_map{$member}; 11552 if ( exists $arg{$member} ) { $self->[$index] = $arg{$member} } 11553 elsif ($caller_is_obj) { $self->[$index] = $caller->[$index] } 11554 else { $self->[$index] = $_default_data[$index] } 11555 } 11556 $self->_increment_count(); 11557 return $self; 11558 } 11559 11560 sub DESTROY { 11561 $_[0]->_decrement_count(); 11562 } 11563 11564 sub get_column { return $_[0]->[COLUMN] } 11565 sub get_starting_column { return $_[0]->[STARTING_COLUMN] } 11566 sub get_matching_token { return $_[0]->[MATCHING_TOKEN] } 11567 sub get_starting_line { return $_[0]->[STARTING_LINE] } 11568 sub get_ending_line { return $_[0]->[ENDING_LINE] } 11569 sub get_serial_number { return $_[0]->[SERIAL_NUMBER] } 11570 11571 sub set_column { $_[0]->[COLUMN] = $_[1] } 11572 sub set_starting_column { $_[0]->[STARTING_COLUMN] = $_[1] } 11573 sub set_matching_token { $_[0]->[MATCHING_TOKEN] = $_[1] } 11574 sub set_starting_line { $_[0]->[STARTING_LINE] = $_[1] } 11575 sub set_ending_line { $_[0]->[ENDING_LINE] = $_[1] } 11576 sub increment_column { $_[0]->[COLUMN] += $_[1] } 11577 11578 sub save_column { $_[0]->[SAVED_COLUMN] = $_[0]->[COLUMN] } 11579 sub restore_column { $_[0]->[COLUMN] = $_[0]->[SAVED_COLUMN] } 11580 11581} 11582 11583package PerlTidy::VerticalAligner; 11584 11585=pod 11586 11587The PerlTidy::VerticalAligner package collects output lines and attempts to line 11588up certain common tokens, such as => and #, which are identified 11589by the calling routine. 11590 11591There are two main routines: append_line and flush. Append acts as a storage 11592buffer, collecting lines into a group which can be vertically aligned. 11593When alignment is no longer possible or desirable, it dumps the group 11594to flush. 11595 11596 append_line -----> flush 11597 11598 collects writes 11599 vertical one 11600 groups group 11601 11602=cut 11603 11604BEGIN { 11605 11606 # Caution: these debug flags produce a lot of output 11607 # They should all be 0 except when debugging small scripts 11608 11609 use constant VALIGN_DEBUG_FLAG_APPEND => 0; 11610 use constant VALIGN_DEBUG_FLAG_APPEND0 => 0; 11611 11612 my $debug_warning = sub { 11613 print "VALIGN_DEBUGGING with key $_[0]\n"; 11614 }; 11615 11616 VALIGN_DEBUG_FLAG_APPEND && $debug_warning->('APPEND'); 11617 VALIGN_DEBUG_FLAG_APPEND0 && $debug_warning->('APPEND0'); 11618 11619} 11620 11621use vars qw( 11622 $vertical_aligner_self 11623 $current_line 11624 $maximum_alignment_index 11625 $ralignment_list 11626 $maximum_jmax_seen 11627 $minimum_jmax_seen 11628 $previous_minimum_jmax_seen 11629 $previous_maximum_jmax_seen 11630 $maximum_line_index 11631 $group_level 11632 $last_group_level_written 11633 $extra_indent_ok 11634 $zero_count 11635 @group_lines 11636 $last_comment_column 11637 $last_side_comment_line_number 11638 $last_side_comment_length 11639 $last_side_comment_level 11640 $outdented_line_count 11641 $first_outdented_line_at 11642 $last_outdented_line_at 11643 $diagnostics_object 11644 $logger_object 11645 $file_writer_object 11646 $rOpts 11647 11648 $rOpts_maximum_line_length 11649 $rOpts_continuation_indentation 11650 $rOpts_indent_columns 11651 $rOpts_tabs 11652 11653 $rOpts_maximum_whitespace_columns 11654 $rOpts_big_space_jump 11655 $rOpts_minimum_space_to_comment 11656 $rOpts_maximum_space_to_comment 11657 11658); 11659 11660sub initialize { 11661 11662 my $class; 11663 11664 ( $class, $rOpts, $file_writer_object, $logger_object, $diagnostics_object ) 11665 = @_; 11666 11667 # variables describing the entire space group: 11668 11669 $ralignment_list = []; 11670 $group_level = 0; 11671 $last_group_level_written = -1; 11672 $extra_indent_ok = 0; # can we move all lines to the right? 11673 $last_side_comment_length = 0; 11674 $maximum_jmax_seen = 0; 11675 $minimum_jmax_seen = 0; 11676 $previous_minimum_jmax_seen = 0; 11677 $previous_maximum_jmax_seen = 0; 11678 11679 # variables describing each line of the group 11680 @group_lines = (); # list of all lines in group 11681 11682 $outdented_line_count = 0; 11683 $first_outdented_line_at = 0; 11684 $last_outdented_line_at = 0; 11685 $last_side_comment_line_number = 0; 11686 $last_side_comment_level = -1; 11687 11688 # frequently used parameters 11689 $rOpts_indent_columns = $rOpts->{'indent-columns'}; 11690 $rOpts_tabs = $rOpts->{'tabs'}; 11691 $rOpts_maximum_whitespace_columns = $rOpts->{'maximum-whitespace-columns'}; 11692 $rOpts_big_space_jump = $rOpts->{'big-space-jump'}; 11693 $rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'}; 11694 $rOpts_maximum_space_to_comment = $rOpts->{'maximum-space-to-comment'}; 11695 11696 forget_side_comment(); 11697 11698 initialize_for_new_group(); 11699 11700 $vertical_aligner_self = {}; 11701 bless $vertical_aligner_self, $class; 11702 return $vertical_aligner_self; 11703} 11704 11705sub initialize_for_new_group { 11706 $maximum_line_index = -1; # lines in the current group 11707 $maximum_alignment_index = -1; # alignments in current group 11708 $zero_count = 0; # count consecutive lines without tokens 11709 $current_line = undef; # line being matched for alignment 11710} 11711 11712# interface to PerlTidy::Diagnostics routines 11713sub write_diagnostics { 11714 if ($diagnostics_object) { 11715 $diagnostics_object->write_diagnostics(@_); 11716 } 11717} 11718 11719# interface to PerlTidy::Logger routines 11720sub warning { 11721 if ($logger_object) { 11722 $logger_object->warning(@_); 11723 } 11724} 11725 11726sub write_logfile_entry { 11727 if ($logger_object) { 11728 $logger_object->write_logfile_entry(@_); 11729 } 11730} 11731 11732sub report_definite_bug { 11733 if ($logger_object) { 11734 $logger_object->report_definite_bug(); 11735 } 11736} 11737 11738sub get_SPACES { 11739 11740 # return the number of leading spaces associated with an indentation 11741 # variable $indentation is either a constant number of spaces or an 11742 # object with a get_SPACES method. 11743 my $indentation = shift; 11744 return ref($indentation) ? $indentation->get_SPACES() : $indentation; 11745} 11746 11747sub get_RECOVERABLE_SPACES { 11748 11749 # return the number of spaces (+ means shift right, - means shift left) 11750 # that we would like to shift a group of lines with the same indentation 11751 # to get them to line up with their opening parens 11752 my $indentation = shift; 11753 return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0; 11754} 11755 11756sub get_STACK_DEPTH { 11757 11758 my $indentation = shift; 11759 return ref($indentation) ? $indentation->get_STACK_DEPTH() : 0; 11760} 11761 11762sub make_alignment { 11763 my ( $col, $token ) = @_; 11764 11765 # make one new alignment at column $col which aligns token $token 11766 ++$maximum_alignment_index; 11767 my $alignment = new PerlTidy::VerticalAligner::Alignment( 11768 column => $col, 11769 starting_column => $col, 11770 matching_token => $token, 11771 starting_line => $maximum_line_index, 11772 ending_line => $maximum_line_index, 11773 serial_number => $maximum_alignment_index, 11774 ); 11775 $ralignment_list->[$maximum_alignment_index] = $alignment; 11776 return $alignment; 11777} 11778 11779sub dump_alignments { 11780 print 11781"Current Alignments:\ni\ttoken\tstarting_column\tcolumn\tstarting_line\tending_line\n"; 11782 for my $i ( 0 .. $maximum_alignment_index ) { 11783 my $column = $ralignment_list->[$i]->get_column(); 11784 my $starting_column = $ralignment_list->[$i]->get_starting_column(); 11785 my $matching_token = $ralignment_list->[$i]->get_matching_token(); 11786 my $starting_line = $ralignment_list->[$i]->get_starting_line(); 11787 my $ending_line = $ralignment_list->[$i]->get_ending_line(); 11788 print 11789"$i\t$matching_token\t$starting_column\t$column\t$starting_line\t$ending_line\n"; 11790 } 11791} 11792 11793sub save_alignment_columns { 11794 for my $i ( 0 .. $maximum_alignment_index ) { 11795 $ralignment_list->[$i]->save_column(); 11796 } 11797} 11798 11799sub restore_alignment_columns { 11800 for my $i ( 0 .. $maximum_alignment_index ) { 11801 $ralignment_list->[$i]->restore_column(); 11802 } 11803} 11804 11805sub forget_side_comment { 11806 $last_comment_column = 0; 11807} 11808 11809sub append_line { 11810 11811=pod 11812 11813sub append is called to place one line in the current vertical group. 11814 11815The input parameters are: 11816 $level = indentation level of this line 11817 $rfields = reference to array of fields 11818 $rpatterns = reference to array of patterns, one per field 11819 $rtokens = reference to array of tokens starting fields 1,2,.. 11820 11821Here is an example of what this package does. In this example, 11822we are trying to line up both the '=>' and the '#'. 11823 11824 '18' => 'grave', # \` 11825 '19' => 'acute', # `' 11826 '20' => 'caron', # \v 11827<-tabs-><f1-><--field 2 ---><-f3-> 11828| | | | 11829| | | | 11830col1 col2 col3 col4 11831 11832The calling routine has already broken the entire line into 3 fields as 11833indicated. (So the work of identifying promising common tokens has 11834already been done). 11835 11836In this example, there will be 2 tokens being matched: '=>' and '#'. 11837They are the leading parts of fields 2 and 3, but we do need to know 11838what they are so that we can dump a group of lines when these tokens 11839change. 11840 11841The fields contain the actual characters of each field. The patterns 11842are like the fields, but they contain mainly token types instead 11843of tokens, so they have fewer characters. They are used to be 11844sure we are matching fields of similar type. 11845 11846In this example, there will be 4 column indexes being adjusted. The 11847first one is always at zero. The interior columns are at the start of 11848the matching tokens, and the last one tracks the maximum line length. 11849 11850Basically, each time a new line comes in, it joins the current vertical 11851group if possible. Otherwise it causes the current group to be dumped 11852and a new group is started. 11853 11854For each new group member, the column locations are increased, as 11855necessary, to make room for the new fields. When the group is finally 11856output, these column numbers are used to compute the amount of spaces of 11857padding needed for each field. 11858 11859Programming note: the fields are assumed not to have any tab characters. 11860Tabs have been previously removed except for tabs in quoted strings and 11861side comments. Tabs in these fields can mess up the column counting. 11862The log file warns the user if there are any such tabs. 11863 11864=cut 11865 11866 my ( 11867 $level, $level_end, $indentation, 11868 $rfields, $rtokens, $rpatterns, 11869 $is_forced_break, $outdent_long_lines, $is_terminal_statement, 11870 $do_not_pad 11871 ) 11872 = @_; 11873 11874 my $leading_space_count = get_SPACES($indentation); 11875 11876 # number of fields is $jmax 11877 # number of tokens between fields is $jmax-1 11878 my $jmax = $#{$rfields}; 11879 $previous_minimum_jmax_seen = $minimum_jmax_seen; 11880 $previous_maximum_jmax_seen = $maximum_jmax_seen; 11881 11882 VALIGN_DEBUG_FLAG_APPEND0 && do { 11883 print 11884"APPEND0: entering lines=$maximum_line_index new #fields= $jmax, leading_count=$leading_space_count last_cmt=$last_comment_column force=$is_forced_break\n"; 11885 }; 11886 11887 # patch until new aligner is finished 11888 if ($do_not_pad) { flush() } 11889 11890 # shouldn't happen: 11891 if ( $level < 0 ) { $level = 0 } 11892 11893 # do not align code across indentation level changes 11894 if ( $level != $group_level ) { 11895 11896 # we are allowed to shift a group of lines to the right if its 11897 # level is greater than the previous and next group 11898 $extra_indent_ok = 11899 ( $level < $group_level && $last_group_level_written < $group_level ); 11900 11901 flush(); 11902 11903 # If we know that this line will get flushed out by itself because 11904 # of level changes, we can leave the extra_indent_ok flag set. 11905 # That way, if we get an external flush call, we will still be 11906 # able to do some -lp alignment if necessary. 11907 $extra_indent_ok = ( $is_terminal_statement && $level > $group_level ); 11908 11909 $group_level = $level; 11910 11911 # wait until after the above flush to get the leading space 11912 # count because it may have been changed if the -icp flag is in 11913 # effect 11914 $leading_space_count = get_SPACES($indentation); 11915 11916 } 11917 11918 # -------------------------------------------------------------------- 11919 # Step 1. Handle simple line of code with no fields to match. 11920 # -------------------------------------------------------------------- 11921 if ( $jmax <= 0 ) { 11922 $zero_count++; 11923 11924 if ( $maximum_line_index >= 0 ) { 11925 11926 # flush the current group if it has some aligned columns.. 11927 if ( $group_lines[0]->get_jmax() > 1 ) { flush() } 11928 11929 # flush current group if we are just collecting side comments.. 11930 elsif ( 11931 11932 # ...and we haven't seen a comment lately 11933 ( $zero_count > 3 ) 11934 11935 # ..or if this new line doesn't fit to the left of the comments 11936 || ( ( $leading_space_count + length( $$rfields[0] ) ) > 11937 $group_lines[0]->get_column(0) ) 11938 ) 11939 { 11940 flush(); 11941 } 11942 } 11943 11944 # just write this line directly if no current group, no side comment, 11945 # and no space recovery is needed. 11946 if ( $maximum_line_index < 0 && !get_RECOVERABLE_SPACES($indentation) ) 11947 { 11948 write_leader_and_string( $leading_space_count, $$rfields[0], 0, 11949 $outdent_long_lines ); 11950 return; 11951 } 11952 } 11953 else { 11954 $zero_count = 0; 11955 } 11956 11957 # programming check: (shouldn't happen) 11958 # an error here implies an incorrect call was made 11959 if ( $jmax > 0 && ( $#{$rtokens} != ( $jmax - 1 ) ) ) { 11960 warning( 11961"Program bug in PerlTidy::VerticalAligner - number of tokens = $#{$rtokens} should be one less than number of fields: $#{$rfields})\n" 11962 ); 11963 report_definite_bug(); 11964 } 11965 11966 # -------------------------------------------------------------------- 11967 # create an object to hold this line 11968 # -------------------------------------------------------------------- 11969 my $is_hanging_side_comment = 0; 11970 my $new_line = new PerlTidy::VerticalAligner::Line( 11971 jmax => $jmax, 11972 jmax_original_line => $jmax, 11973 rtokens => $rtokens, 11974 rfields => $rfields, 11975 rpatterns => $rpatterns, 11976 indentation => $indentation, 11977 leading_space_count => $leading_space_count, 11978 outdent_long_lines => $outdent_long_lines, 11979 list_type => "", 11980 is_hanging_side_comment => $is_hanging_side_comment, 11981 maximum_line_length => $rOpts->{'maximum-line-length'}, 11982 ); 11983 11984 # -------------------------------------------------------------------- 11985 # It simplifies things to create a zero length side comment 11986 # if none exists. 11987 # -------------------------------------------------------------------- 11988 make_side_comment( $new_line, $level_end ); 11989 11990 # -------------------------------------------------------------------- 11991 # Decide if this is a simple list of items. 11992 # There are 3 list types: none, comma, comma-arrow. 11993 # We use this below to be less restrictive in deciding what to align. 11994 # -------------------------------------------------------------------- 11995 if ($is_forced_break) { 11996 decide_if_list($new_line); 11997 } 11998 11999 if ($current_line) { 12000 12001 # -------------------------------------------------------------------- 12002 # Allow hanging side comment to join current group, if any 12003 # This will help keep side comments aligned, because otherwise we 12004 # will have to start a new group, making alignment less likely. 12005 # -------------------------------------------------------------------- 12006 $is_hanging_side_comment = 12007 hanging_comment_check( $new_line, $current_line ); 12008 12009 # -------------------------------------------------------------------- 12010 # If there is just one previous line, and it has more fields 12011 # than the new line, try to join fields together to get a match with 12012 # the new line. At the present time, only a single leading '=' is 12013 # allowed to be compressed out. This is useful in rare cases where 12014 # a table is forced to use old breakpoints because of side comments, 12015 # and the table starts out something like this: 12016 # my %MonthChars = ('0', 'Jan', # side comment 12017 # '1', 'Feb', 12018 # '2', 'Mar', 12019 # Eliminating the '=' field will allow the remaining fields to line up. 12020 # This situation does not occur if there are no side comments 12021 # because scan_list would put a break after the opening '('. 12022 # -------------------------------------------------------------------- 12023 eliminate_old_fields( $new_line, $current_line ); 12024 12025 # -------------------------------------------------------------------- 12026 # If the new line has more fields than the current group, 12027 # see if we can match the first fields and combine the remaining 12028 # fields of the new line. 12029 # -------------------------------------------------------------------- 12030 eliminate_new_fields( $new_line, $current_line ); 12031 12032 # -------------------------------------------------------------------- 12033 # Flush previous group unless all common tokens and patterns match.. 12034 # -------------------------------------------------------------------- 12035 check_match( $new_line, $current_line ); 12036 12037 # -------------------------------------------------------------------- 12038 # See if there is space for this line in the current group (if any) 12039 # -------------------------------------------------------------------- 12040 if ($current_line) { 12041 check_fit( $new_line, $current_line ); 12042 } 12043 } 12044 12045 # -------------------------------------------------------------------- 12046 # Append this line to the current group (or start new group) 12047 # -------------------------------------------------------------------- 12048 accept_line($new_line); 12049 12050 # Future update to allow this to vary: 12051 $current_line = $new_line if ( $maximum_line_index == 0 ); 12052 12053 # -------------------------------------------------------------------- 12054 # Step 8. Some old debugging stuff 12055 # -------------------------------------------------------------------- 12056 VALIGN_DEBUG_FLAG_APPEND && do { 12057 print "APPEND fields:"; 12058 dump_array(@$rfields); 12059 print "APPEND tokens:"; 12060 dump_array(@$rtokens); 12061 print "APPEND patterns:"; 12062 dump_array(@$rpatterns); 12063 dump_alignments(); 12064 }; 12065} 12066 12067sub hanging_comment_check { 12068 12069 my $line = shift; 12070 my $jmax = $line->get_jmax(); 12071 return 0 unless $jmax == 1; # must be 2 fields 12072 my $rtokens = $line->get_rtokens(); 12073 return 0 unless $$rtokens[0] eq '#'; # the second field is a comment.. 12074 my $rfields = $line->get_rfields(); 12075 return 0 unless $$rfields[0] =~ /^\s*$/; # the first field is empty... 12076 my $old_line = shift; 12077 my $maximum_field_index = $old_line->get_jmax(); 12078 return 0 12079 unless $maximum_field_index > $jmax; # the current line has more fields 12080 my $rpatterns = $line->get_rpatterns(); 12081 12082 $line->set_is_hanging_side_comment(1); 12083 $jmax = $maximum_field_index; 12084 $line->set_jmax($jmax); 12085 $$rfields[$jmax] = $$rfields[1]; 12086 $$rtokens[ $jmax - 1 ] = $$rtokens[0]; 12087 $$rpatterns[ $jmax - 1 ] = $$rpatterns[0]; 12088 for ( my $j = 1 ; $j < $jmax ; $j++ ) { 12089 $$rfields[$j] = " "; # NOTE: caused glitch unless 1 blank, why? 12090 $$rtokens[ $j - 1 ] = ""; 12091 $$rpatterns[ $j - 1 ] = ""; 12092 } 12093 return 1; 12094} 12095 12096sub eliminate_old_fields { 12097 12098 my $new_line = shift; 12099 my $jmax = $new_line->get_jmax(); 12100 if ( $jmax > $maximum_jmax_seen ) { $maximum_jmax_seen = $jmax } 12101 if ( $jmax < $minimum_jmax_seen ) { $minimum_jmax_seen = $jmax } 12102 12103 # there must be one previous line 12104 return unless ( $maximum_line_index == 0 ); 12105 12106 my $old_line = shift; 12107 my $maximum_field_index = $old_line->get_jmax(); 12108 12109 # this line must have fewer fields 12110 return unless $maximum_field_index > $jmax; 12111 12112 # be reasonable, not too few 12113 return unless ( $maximum_field_index - 2 <= $jmax ); 12114 12115 # must have side comment 12116 my $old_rfields = $old_line->get_rfields(); 12117 return unless ( length( $$old_rfields[$maximum_field_index] ) > 0 ); 12118 12119 my $rtokens = $new_line->get_rtokens(); 12120 my $rfields = $new_line->get_rfields(); 12121 my $rpatterns = $new_line->get_rpatterns(); 12122 12123 my $old_rtokens = $old_line->get_rtokens(); 12124 my $old_rpatterns = $old_line->get_rpatterns(); 12125 12126 my $hid_equals = 0; 12127 12128 my @new_alignments = (); 12129 my @new_fields = (); 12130 my @new_matching_patterns = (); 12131 my @new_matching_tokens = (); 12132 12133 my $j = 0; 12134 my $k; 12135 my $current_field = ''; 12136 my $current_pattern = ''; 12137 12138 # loop over all old tokens 12139 my $in_match = 0; 12140 for ( $k = 0 ; $k < $maximum_field_index ; $k++ ) { 12141 $current_field .= $$old_rfields[$k]; 12142 $current_pattern .= $$old_rpatterns[$k]; 12143 last if ( $j > $jmax - 1 ); 12144 12145 if ( $$old_rtokens[$k] eq $$rtokens[$j] ) { 12146 $in_match = 1; 12147 $new_fields[$j] = $current_field; 12148 $new_matching_patterns[$j] = $current_pattern; 12149 $current_field = ''; 12150 $current_pattern = ''; 12151 $new_matching_tokens[$j] = $$old_rtokens[$k]; 12152 $new_alignments[$j] = $old_line->get_alignment($k); 12153 $j++; 12154 } 12155 else { 12156 12157 if ( $$old_rtokens[$k] =~ /^\=\d*$/ ) { 12158 $hid_equals = 1; 12159 } 12160 last if $in_match; # disallow gaps in matching field types 12161 } 12162 } 12163 12164 # Modify the current state if we are successful. 12165 # We must exactly reach the ends of both lists for success. 12166 if ( ( $j == $jmax ) && ( $current_field eq '' ) && $hid_equals ) { 12167 $k = $maximum_field_index; 12168 $current_field .= $$old_rfields[$k]; 12169 $current_pattern .= $$old_rpatterns[$k]; 12170 $new_fields[$j] = $current_field; 12171 $new_matching_patterns[$j] = $current_pattern; 12172 12173 $new_alignments[$j] = $old_line->get_alignment($k); 12174 $maximum_field_index = $j; 12175 12176 $old_line->set_alignments(@new_alignments); 12177 $old_line->set_jmax($jmax); 12178 $old_line->set_rtokens( \@new_matching_tokens ); 12179 $old_line->set_rfields( \@new_fields ); 12180 $old_line->set_rpatterns( \@$rpatterns ); 12181 } 12182} 12183 12184# create an empty side comment if none exists 12185sub make_side_comment { 12186 my $new_line = shift; 12187 my $level_end = shift; 12188 my $jmax = $new_line->get_jmax(); 12189 my $rtokens = $new_line->get_rtokens(); 12190 12191 # if line does not have a side comment... 12192 if ( ( $jmax == 0 ) || ( $$rtokens[ $jmax - 1 ] ne '#' ) ) { 12193 my $rfields = $new_line->get_rfields(); 12194 my $rpatterns = $new_line->get_rpatterns(); 12195 $$rtokens[$jmax] = '#'; 12196 $$rfields[ ++$jmax ] = ''; 12197 $$rpatterns[$jmax] = '#'; 12198 $new_line->set_jmax($jmax); 12199 $new_line->set_jmax_original_line($jmax); 12200 } 12201 12202 # line has a side comment.. 12203 else { 12204 12205 # don't remember old side comment location for very long 12206 # and don't remember across level changes 12207 my $line_number = $vertical_aligner_self->get_output_line_number(); 12208 if ( $level_end < $last_side_comment_level 12209 || $line_number - $last_side_comment_line_number > 12 ) 12210 { 12211 forget_side_comment(); 12212 } 12213 $last_side_comment_line_number = $line_number; 12214 $last_side_comment_level = $level_end; 12215 } 12216} 12217 12218sub decide_if_list { 12219 12220 my $line = shift; 12221 12222 # A list will be taken to be a line with a forced break in which all 12223 # of the field separators are commas or comma-arrows (except for the 12224 # trailing #) 12225 12226 # List separator tokens are things like ',3' or '=>2', 12227 # where the trailing digit is the nesting depth. Allow braces 12228 # to allow nested list items. 12229 my $rtokens = $line->get_rtokens(); 12230 my $test_token = $$rtokens[0]; 12231 if ( $test_token =~ /^(\,|=>)/ ) { 12232 my $list_type = $test_token; 12233 my $jmax = $line->get_jmax(); 12234 12235 foreach my $i ( 1 .. $jmax - 2 ) { 12236 12237 if ( $$rtokens[$i] !~ /^(\,|=>|\{)/ ) { 12238 $list_type = ""; 12239 last; 12240 } 12241 } 12242 $line->set_list_type($list_type); 12243 } 12244} 12245 12246sub eliminate_new_fields { 12247 12248 return unless ( $maximum_line_index >= 0 ); 12249 my $new_line = shift; 12250 my $jmax = $new_line->get_jmax(); 12251 12252 # must be monotonic variation 12253 return unless ( $previous_maximum_jmax_seen <= $jmax ); 12254 12255 # must be more fields in the new line 12256 my $old_line = shift; 12257 my $maximum_field_index = $old_line->get_jmax(); 12258 return unless ( $maximum_field_index < $jmax ); 12259 12260 return 12261 unless ( $old_line->get_jmax_original_line() == $minimum_jmax_seen ) 12262 ; # only if monotonic 12263 12264 # never combine fields of a comma list 12265 return 12266 unless ( $maximum_field_index > 1 ) 12267 && ( $new_line->get_list_type() !~ /^,/ ); 12268 12269 my $rtokens = $new_line->get_rtokens(); 12270 my $rfields = $new_line->get_rfields(); 12271 my $rpatterns = $new_line->get_rpatterns(); 12272 my $old_rpatterns = $old_line->get_rpatterns(); 12273 my $old_rtokens = $old_line->get_rtokens(); 12274 12275 # loop over all old tokens except comment 12276 my $match = 1; 12277 my $k; 12278 for ( $k = 0 ; $k < $maximum_field_index - 1 ; $k++ ) { 12279 if ( ( $$old_rtokens[$k] ne $$rtokens[$k] ) 12280 || ( $$old_rpatterns[$k] ne $$rpatterns[$k] ) ) 12281 { 12282 $match = 0; 12283 last; 12284 } 12285 } 12286 12287 # first tokens agree, so combine new tokens 12288 if ($match) { 12289 for $k ( $maximum_field_index .. $jmax - 1 ) { 12290 12291 $$rfields[ $maximum_field_index - 1 ] .= $$rfields[$k]; 12292 $$rfields[$k] = ""; 12293 $$rpatterns[ $maximum_field_index - 1 ] .= $$rpatterns[$k]; 12294 $$rpatterns[$k] = ""; 12295 } 12296 12297 $$rtokens[ $maximum_field_index - 1 ] = '#'; 12298 $$rfields[$maximum_field_index] = $$rfields[$jmax]; 12299 $$rpatterns[$maximum_field_index] = $$rpatterns[$jmax]; 12300 $jmax = $maximum_field_index; 12301 } 12302 $new_line->set_jmax($jmax); 12303} 12304 12305sub check_match { 12306 12307 my $new_line = shift; 12308 my $old_line = shift; 12309 12310 my $jmax = $new_line->get_jmax(); 12311 my $maximum_field_index = $old_line->get_jmax(); 12312 12313 # flush if this line has too many fields 12314 if ( $jmax > $maximum_field_index ) { flush(); return } 12315 12316 # flush if adding this line would make a non-monotonic field count 12317 if ( 12318 ( $maximum_field_index > $jmax ) # this has too few fields 12319 && ( 12320 ( $previous_minimum_jmax_seen < $jmax ) # and wouldn't be monotonic 12321 || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen ) 12322 ) 12323 ) 12324 { 12325 flush(); 12326 return; 12327 } 12328 12329 # otherwise append this line if everything matches 12330 my $jmax_original_line = $new_line->get_jmax_original_line(); 12331 my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment(); 12332 my $rtokens = $new_line->get_rtokens(); 12333 my $rfields = $new_line->get_rfields(); 12334 my $rpatterns = $new_line->get_rpatterns(); 12335 my $list_type = $new_line->get_list_type(); 12336 12337 my $group_list_type = $old_line->get_list_type(); 12338 my $old_rpatterns = $old_line->get_rpatterns(); 12339 my $old_rtokens = $old_line->get_rtokens(); 12340 12341 my $jlimit = $jmax - 1; 12342 if ( $maximum_field_index > $jmax ) { 12343 $jlimit = $jmax_original_line; 12344 --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) ); 12345 } 12346 12347 my $everything_matches = 1; 12348 12349 # common list types always match 12350 unless ( ( $group_list_type && ( $list_type eq $group_list_type ) ) 12351 || $is_hanging_side_comment ) 12352 { 12353 12354 my $leading_space_count = $new_line->get_leading_space_count(); 12355 for my $j ( 0 .. $jlimit ) { 12356 my $match = 1; 12357 if ( 12358 ( $j < $jlimit ) 12359 && ( ( $$old_rtokens[$j] ne $$rtokens[$j] ) 12360 || ( $$old_rpatterns[$j] ne $$rpatterns[$j] ) ) 12361 ) 12362 { 12363 $match = 0; 12364 } 12365 12366 # Don't let line with fewer fields increase column widths 12367 # ( align3.t ) 12368 if ( $maximum_field_index > $jmax ) { 12369 my $pad = 12370 length( $$rfields[$j] ) - $old_line->current_field_width($j); 12371 12372 if ( $j == 0 ) { 12373 $pad += $leading_space_count; 12374 } 12375 if ( $pad > 0 ) { $match = 0 } 12376 } 12377 12378 unless ($match) { 12379 $everything_matches = 0; 12380 last; 12381 } 12382 } 12383 } 12384 12385 if ( $maximum_field_index > $jmax ) { 12386 12387 if ($everything_matches) { 12388 12389 my $comment = $$rfields[$jmax]; 12390 for $jmax ( $jlimit .. $maximum_field_index ) { 12391 $$rtokens[$jmax] = $$old_rtokens[$jmax]; 12392 $$rfields[ ++$jmax ] = ''; 12393 $$rpatterns[$jmax] = $$old_rpatterns[$jmax]; 12394 } 12395 $$rfields[$jmax] = $comment; 12396 $new_line->set_jmax($jmax); 12397 } 12398 } 12399 12400 flush() unless ($everything_matches); 12401} 12402 12403sub check_fit { 12404 12405 return unless ( $maximum_line_index >= 0 ); 12406 my $new_line = shift; 12407 my $old_line = shift; 12408 12409 my $jmax = $new_line->get_jmax(); 12410 my $leading_space_count = $new_line->get_leading_space_count(); 12411 my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment(); 12412 my $rtokens = $new_line->get_rtokens(); 12413 my $rfields = $new_line->get_rfields(); 12414 my $rpatterns = $new_line->get_rpatterns(); 12415 12416 my $group_list_type = $group_lines[0]->get_list_type(); 12417 12418 my $padding_so_far = 0; 12419 my $padding_available = $old_line->get_available_space_on_right(); 12420 12421 # save current columns in case this doesn't work 12422 save_alignment_columns(); 12423 12424 my ( $j, $pad, $eight, $big_jump ); 12425 for $j ( 0 .. $jmax ) { 12426 12427 $pad = length( $$rfields[$j] ) - $old_line->current_field_width($j); 12428 12429 if ( $j == 0 ) { 12430 $pad += $leading_space_count; 12431 } 12432 12433 # sudden increase or decrease in whitespace space looks bad 12434 # (unless this is a list or side comment). The number was 12435 # initially '8', but that seems too small and it is now a parameter 12436 # ( eight.t ) 12437 # my $eight = $group_list_type ? 12 : 8; 12438 $big_jump = 0; 12439 $eight = 12440 $group_list_type 12441 ? 1.5 * $rOpts_big_space_jump 12442 : $rOpts_big_space_jump; 12443 12444 if ( !$is_hanging_side_comment && $j < $jmax - 1 ) { 12445 12446 # a sudden increase of over '8' spaces is too much 12447 if ( $pad > $eight ) { $big_jump = 1 } 12448 12449 # a sudden decrease of over '8' spaces to a new low is too much 12450 elsif ( $pad < 0 ) { 12451 my $test_pad = 12452 length( $$rfields[$j] ) - $old_line->starting_field_width($j); 12453 12454 if ( $j == 0 ) { 12455 $test_pad += $leading_space_count; 12456 } 12457 if ( $test_pad < -$eight ) { $big_jump = 1 } 12458 } 12459 } 12460 12461 next if !$big_jump && $pad < 0; 12462 12463 # This line will need space; lets see if we want to accept it.. 12464 if ( 12465 12466 # not if padding increases too much 12467 $big_jump 12468 12469 # not if this won't fit 12470 || ( $pad > $padding_available ) 12471 12472 # or, with the exception of space to side comments, .. 12473 || ( 12474 $j < $jmax - 1 12475 12476 # causes too many consecutive columns of whitespace 12477 && ( 12478 ( 12479 $pad + $old_line->field_width_growth($j) > 12480 $rOpts_maximum_whitespace_columns 12481 ) 12482 12483 ) 12484 ) 12485 ) 12486 { 12487 12488 # revert to starting state then flush; things didn't work out 12489 restore_alignment_columns(); 12490 flush(); 12491 last; 12492 } 12493 12494 # looks ok, squeeze this field in 12495 $old_line->increase_field_width( $j, $pad ); 12496 $padding_available -= $pad; 12497 } 12498} 12499 12500sub accept_line { 12501 12502 my $new_line = shift; 12503 $group_lines[ ++$maximum_line_index ] = $new_line; 12504 12505 # initialize field lengths if starting new group 12506 if ( $maximum_line_index == 0 ) { 12507 12508 my $jmax = $new_line->get_jmax(); 12509 my $rfields = $new_line->get_rfields(); 12510 my $rtokens = $new_line->get_rtokens(); 12511 my $j; 12512 my $col = $new_line->get_leading_space_count(); 12513 12514 for $j ( 0 .. $jmax ) { 12515 $col += length( $$rfields[$j] ); 12516 12517 # create initial alignments for the new group 12518 my $token = ""; 12519 if ( $j < $jmax ) { $token = $$rtokens[$j] } 12520 my $alignment = make_alignment( $col, $token ); 12521 $new_line->set_alignment( $j, $alignment ); 12522 } 12523 12524 $maximum_jmax_seen = $jmax; 12525 $minimum_jmax_seen = $jmax; 12526 } 12527 12528 # use previous alignments otherwise 12529 else { 12530 my @new_alignments = 12531 $group_lines[ $maximum_line_index - 1 ]->get_alignments(); 12532 $new_line->set_alignments(@new_alignments); 12533 } 12534} 12535 12536sub dump_array { 12537 12538 # debug routine to dump array contents 12539 local $" = ')('; 12540 print "(@_)\n"; 12541} 12542 12543=pod 12544 12545flush() sends the current PerlTidy::VerticalAligner group down the pipeline to PerlTidy::FileWriter. 12546 12547=cut 12548 12549sub flush { 12550 12551 return unless ( $maximum_line_index >= 0 ); 12552 12553 VALIGN_DEBUG_FLAG_APPEND0 && do { 12554 my $group_list_type = $group_lines[0]->get_list_type(); 12555 my ( $a, $b, $c ) = caller(); 12556 my $maximum_field_index = $group_lines[0]->get_jmax(); 12557 print 12558"APPEND0: Flush called from $a $b $c fields=$maximum_field_index list=$group_list_type lines=$maximum_line_index extra=$extra_indent_ok\n"; 12559 12560 }; 12561 12562 # some small groups are best left unaligned 12563 my $do_not_align = decide_if_aligned(); 12564 12565 # optimize side comment location 12566 $do_not_align = adjust_side_comment($do_not_align); 12567 12568 # recover spaces for -lp option if possible 12569 my $extra_leading_spaces = get_extra_leading_spaces(); 12570 12571 # all lines of this group have the same basic leading spacing 12572 my $group_leader_length = $group_lines[0]->get_leading_space_count(); 12573 12574 # add extra leading spaces if helpful 12575 my $min_ci_gap = 12576 improve_continuation_indentation( $do_not_align, $group_leader_length ); 12577 12578 # loop to output all lines 12579 for my $i ( 0 .. $maximum_line_index ) { 12580 my $line = $group_lines[$i]; 12581 write_vertically_aligned_line( $line, $min_ci_gap, $do_not_align, 12582 $group_leader_length, $extra_leading_spaces ); 12583 } 12584 12585 initialize_for_new_group(); 12586} 12587 12588sub decide_if_aligned { 12589 12590 # Do not try to align two lines which are not really similar 12591 my $group_list_type = $group_lines[0]->get_list_type(); 12592 my $do_not_align = 12593 ( $maximum_line_index < 2 12594 && !$group_list_type 12595 && ( $previous_maximum_jmax_seen != $previous_minimum_jmax_seen ) ); 12596 12597 # But try to convert them into a simple comment group if the first line 12598 # a has side comment 12599 my $rfields = $group_lines[0]->get_rfields(); 12600 my $maximum_field_index = $group_lines[0]->get_jmax(); 12601 if ( $do_not_align 12602 && ( $maximum_line_index > 0 ) 12603 && ( length( $$rfields[$maximum_field_index] ) > 0 ) ) 12604 { 12605 combine_fields(); 12606 $do_not_align = 0; 12607 } 12608 return $do_not_align; 12609} 12610 12611sub adjust_side_comment { 12612 12613 my $do_not_align = shift; 12614 12615 # let's see if we can move the side comment field out a little 12616 # to improve readability (the last field is always a side comment field) 12617 my $have_side_comment = 0; 12618 my $first_side_comment_line = -1; 12619 my $maximum_field_index = $group_lines[0]->get_jmax(); 12620 for my $i ( 0 .. $maximum_line_index ) { 12621 my $line = $group_lines[$i]; 12622 12623 if ( length( $line->get_rfields()->[$maximum_field_index] ) ) { 12624 $have_side_comment = 1; 12625 $first_side_comment_line = $i; 12626 last; 12627 } 12628 } 12629 12630 my $kmax = $maximum_field_index + 1; 12631 12632 if ($have_side_comment) { 12633 12634 my $line = $group_lines[0]; 12635 12636 # the maximum space without exceeding the line length: 12637 my $avail = $line->get_available_space_on_right(); 12638 12639 # try to use the previous comment column 12640 my $move = $last_comment_column - $line->get_column( $kmax - 2 ); 12641 12642 if ( $kmax > 0 && !$do_not_align ) { 12643 12644 # but if this doesn't work, give up and use the minimum space 12645 if ( $move > $avail ) { 12646 $move = $rOpts_minimum_space_to_comment - 1; 12647 } 12648 12649 # but we want some minimum space to the comment 12650 my $min_move = $rOpts_minimum_space_to_comment - 1; 12651 if ( $move >= 0 12652 && $last_side_comment_length > 0 12653 && ( $first_side_comment_line == 0 ) 12654 && $group_level == $last_group_level_written ) 12655 { 12656 $min_move = 0; 12657 } 12658 12659 if ( $move < $min_move ) { 12660 $move = $min_move; 12661 } 12662 12663 # if this causes too much space, give up and use the minimum space 12664 if ( $move > $rOpts_maximum_space_to_comment - 1 ) { 12665 $move = $rOpts_minimum_space_to_comment - 1; 12666 } 12667 12668 # don't exceed the available space 12669 if ( $move > $avail ) { $move = $avail } 12670 12671 # we can only increase space, never decrease 12672 if ( $move > 0 ) { 12673 $line->increase_field_width( $maximum_field_index - 1, $move ); 12674 } 12675 12676 # remember this column for the next group 12677 $last_comment_column = $line->get_column( $kmax - 2 ); 12678 } 12679 12680 else { 12681 12682 # try to at least line up the existing side comment location 12683 if ( $kmax > 0 && $move > 0 && $move < $avail ) { 12684 $line->increase_field_width( $maximum_field_index - 1, $move ); 12685 $do_not_align = 0; 12686 } 12687 12688 # reset side comment column if we can't align 12689 else { 12690 forget_side_comment(); 12691 } 12692 } 12693 } 12694 return $do_not_align; 12695} 12696 12697sub improve_continuation_indentation { 12698 my ( $do_not_align, $group_leader_length ) = @_; 12699 12700=pod 12701 12702See if we can increase the continuation indentation 12703to move all continuation lines closer to the next field 12704(unless it is a comment). 12705 12706'$min_ci_gap'is the extra indentation that we may need to introduce. 12707We will only introduce this to fields which already have some ci. 12708Without this variable, we would occasionally get something like this 12709(Complex.pm): 12710 12711use overload '+' => \&plus, 12712 '-' => \&minus, 12713 '*' => \&multiply, 12714 ... 12715 'tan' => \&tan, 12716 'atan2' => \&atan2, 12717 12718Whereas with this variable, we can shift variables over to get this: 12719 12720use overload '+' => \&plus, 12721 '-' => \&minus, 12722 '*' => \&multiply, 12723 ... 12724 'tan' => \&tan, 12725 'atan2' => \&atan2, 12726 12727=cut 12728 12729 my $maximum_field_index = $group_lines[0]->get_jmax(); 12730 12731 my $min_ci_gap = $rOpts->{'maximum-line-length'}; 12732 if ( $maximum_field_index > 1 && !$do_not_align ) { 12733 12734 for my $i ( 0 .. $maximum_line_index ) { 12735 my $line = $group_lines[$i]; 12736 my $leading_space_count = $line->get_leading_space_count(); 12737 my $rfields = $line->get_rfields(); 12738 12739 my $gap = $line->get_column(0) - $leading_space_count - 12740 length( $$rfields[0] ); 12741 12742 if ( $leading_space_count > $group_leader_length ) { 12743 if ( $gap < $min_ci_gap ) { $min_ci_gap = $gap } 12744 } 12745 } 12746 12747 if ( $min_ci_gap >= $rOpts->{'maximum-line-length'} ) { 12748 $min_ci_gap = 0; 12749 } 12750 } 12751 else { 12752 $min_ci_gap = 0; 12753 } 12754 return $min_ci_gap; 12755} 12756 12757sub write_vertically_aligned_line { 12758 12759 my ( $line, $min_ci_gap, $do_not_align, $group_leader_length, 12760 $extra_leading_spaces ) 12761 = @_; 12762 my $rfields = $line->get_rfields(); 12763 my $leading_space_count = $line->get_leading_space_count(); 12764 my $outdent_long_lines = $line->get_outdent_long_lines(); 12765 my $maximum_field_index = $line->get_jmax(); 12766 12767 # add any extra spaces 12768 if ( $leading_space_count > $group_leader_length ) { 12769 $leading_space_count += $min_ci_gap; 12770 } 12771 12772 my $str = $$rfields[0]; 12773 12774 # loop to concatenate all fields of this line and needed padding 12775 my $total_pad_count = 0; 12776 my ( $j, $pad ); 12777 for $j ( 1 .. $maximum_field_index ) { 12778 12779 # skip zero-length side comments 12780 last 12781 if ( ( $j == $maximum_field_index ) 12782 && ( !defined( $$rfields[$j] ) || ( length( $$rfields[$j] ) == 0 ) ) 12783 ); 12784 12785 # compute spaces of padding before this field 12786 $pad = 12787 $line->get_column( $j - 1 ) - ( length($str) + $leading_space_count ); 12788 12789 if ($do_not_align) { 12790 $pad = 12791 ( $j < $maximum_field_index ) 12792 ? 0 12793 : $rOpts_minimum_space_to_comment - 1; 12794 } 12795 12796 # accumulate the padding 12797 if ( $pad > 0 ) { $total_pad_count += $pad; } 12798 12799 # add this field 12800 if ( !defined $$rfields[$j] ) { 12801 write_diagnostics("UNDEFined field at j=$j\n"); 12802 } 12803 12804 # only add padding when we have a finite field; 12805 # this avoids extra terminal spaces if we have empty fields 12806 if ( length( $$rfields[$j] ) > 0 ) { 12807 $str .= ' ' x $total_pad_count; 12808 $total_pad_count = 0; 12809 $str .= $$rfields[$j]; 12810 } 12811 } 12812 12813 my $side_comment_length = ( length( $$rfields[$maximum_field_index] ) ); 12814 12815 # ship this line off 12816 write_leader_and_string( $leading_space_count + $extra_leading_spaces, $str, 12817 $side_comment_length, $outdent_long_lines ); 12818} 12819 12820sub get_extra_leading_spaces { 12821 12822 #---------------------------------------------------------- 12823 # Define any extra indentation space (for the -lp option). 12824 # Here is why: 12825 # If a list has side comments, sub scan_list must dump the 12826 # list before it sees everything. When this happens, it sets 12827 # the indentation to the standard scheme, but notes how 12828 # many spaces it would have liked to use. We may be able 12829 # to recover that space here in the event that that all of the 12830 # lines of a list are back together again. 12831 #---------------------------------------------------------- 12832 12833 my $extra_leading_spaces = 0; 12834 if ($extra_indent_ok) { 12835 my $object = $group_lines[0]->get_indentation(); 12836 if ( ref($object) ) { 12837 my $extra_indentation_spaces_wanted = 12838 get_RECOVERABLE_SPACES($object); 12839 12840 # all indentation objects must be the same 12841 my $i; 12842 for $i ( 1 .. $maximum_line_index ) { 12843 if ( $object != $group_lines[$i]->get_indentation() ) { 12844 $extra_indentation_spaces_wanted = 0; 12845 last; 12846 } 12847 } 12848 12849 if ($extra_indentation_spaces_wanted) { 12850 12851 # the maximum space without exceeding the line length: 12852 my $avail = $group_lines[0]->get_available_space_on_right(); 12853 $extra_leading_spaces = 12854 ( $avail > $extra_indentation_spaces_wanted ) 12855 ? $extra_indentation_spaces_wanted 12856 : $avail; 12857 12858 # update the indentation object because with -icp the terminal 12859 # ');' will use the same adjustment. 12860 $object->permanently_decrease_AVAILABLE_SPACES( 12861 -$extra_leading_spaces ); 12862 } 12863 } 12864 } 12865 return $extra_leading_spaces; 12866} 12867 12868sub combine_fields { 12869 12870 # combine all fields except for the comment field ( sidecmt.t ) 12871 my ( $j, $k ); 12872 my $maximum_field_index = $group_lines[0]->get_jmax(); 12873 for ( $j = 0 ; $j <= $maximum_line_index ; $j++ ) { 12874 my $line = $group_lines[$j]; 12875 my $rfields = $line->get_rfields(); 12876 for ( $k = 1 ; $k < $maximum_field_index ; $k++ ) { 12877 $$rfields[0] .= $$rfields[$k]; 12878 } 12879 $$rfields[1] = $$rfields[$maximum_field_index]; 12880 12881 $line->set_jmax(1); 12882 $line->set_column( 0, 0 ); 12883 $line->set_column( 1, 0 ); 12884 12885 } 12886 $maximum_field_index = 1; 12887 12888 for $j ( 0 .. $maximum_line_index ) { 12889 my $line = $group_lines[$j]; 12890 my $rfields = $line->get_rfields(); 12891 for $k ( 0 .. $maximum_field_index ) { 12892 my $pad = length( $$rfields[$k] ) - $line->current_field_width($k); 12893 if ( $k == 0 ) { 12894 $pad += $group_lines[$j]->get_leading_space_count(); 12895 } 12896 12897 if ( $pad > 0 ) { $line->increase_field_width( $k, $pad ) } 12898 12899 } 12900 } 12901} 12902 12903sub get_output_line_number { 12904 12905 # the output line number reported to a caller is the number of items 12906 # written plus the number of items in the buffer 12907 my $self = shift; 12908 1 + $maximum_line_index + $file_writer_object->get_output_line_number(); 12909} 12910 12911sub write_leader_and_string { 12912 12913 my ( $leading_space_count, $str, $side_comment_length, $outdent_long_lines ) 12914 = @_; 12915 12916 my $leading_string = get_leading_string($leading_space_count); 12917 12918 # reduce continuation indentation if it will make the line fit 12919 # in the available page width. Do not include side comment length 12920 # when considering the excess. 12921 my $excess = 12922 length($str) - $side_comment_length + $leading_space_count - 12923 $rOpts->{'maximum-line-length'}; 12924 12925 # handle long lines: 12926 if ( $excess > 0 ) { 12927 12928 # TODO: consider re-implementing this logic 12929 # Reduce continuation indentation if that solves the problem 12930 #if ( length($spaces) >= $excess ) { 12931 # $spaces = substr( $spaces, 0, length($spaces) - $excess ); 12932 12933 #} 12934 12935 # Otherwise, outdent if permitted 12936 #else { 12937 12938 if ($outdent_long_lines) { 12939 $leading_string = ""; 12940 $last_outdented_line_at = 12941 $file_writer_object->get_output_line_number(); 12942 12943 unless ($outdented_line_count) { 12944 $first_outdented_line_at = $last_outdented_line_at; 12945 } 12946 $outdented_line_count++; 12947 } 12948 12949 #} 12950 } 12951 12952 $file_writer_object->write_code_line( $leading_string . $str . "\n" ); 12953 $last_group_level_written = $group_level; 12954 $last_side_comment_length = $side_comment_length; 12955 $extra_indent_ok = 0; 12956} 12957 12958{ # begin closure get_leading_string 12959 12960 my @leading_string_cache; 12961 12962 sub get_leading_string { 12963 12964 # define the leading whitespace string for this line.. 12965 my $leading_whitespace_count = shift; 12966 12967 # Handle case of zero whitespace, which includes multi-line quotes 12968 # (which may have a finite level; this prevents tab problems) 12969 if ( $leading_whitespace_count <= 0 ) { 12970 return ""; 12971 } 12972 12973 # look for previous result 12974 elsif ( $leading_string_cache[$leading_whitespace_count] ) { 12975 return $leading_string_cache[$leading_whitespace_count]; 12976 } 12977 12978 # must compute a string for this number of spaces 12979 my $leading_string; 12980 12981 # Handle simple case of no tabs 12982 if ( !$rOpts_tabs || $rOpts_indent_columns <= 0 ) { 12983 $leading_string = ( ' ' x $leading_whitespace_count ); 12984 } 12985 12986 # Handle tabs 12987 else { 12988 $leading_string = ( "\t" x $group_level ); 12989 my $space_count = 12990 $leading_whitespace_count - $group_level * $rOpts_indent_columns; 12991 12992 # shouldn't happen: 12993 if ( $space_count < 0 ) { 12994 warning( 12995"Error in append_line: for level=$group_level count=$leading_whitespace_count\n" 12996 ); 12997 $leading_string = ( ' ' x $leading_whitespace_count ); 12998 } 12999 else { 13000 $leading_string .= ( ' ' x $space_count ); 13001 } 13002 } 13003 $leading_string_cache[$leading_whitespace_count] = $leading_string; 13004 return $leading_string; 13005 } 13006} # end closure get_leading_string 13007 13008sub report_anything_unusual { 13009 my $self = shift; 13010 if ( $outdented_line_count > 0 ) { 13011 write_logfile_entry( 13012 "$outdented_line_count long lines were outdented:\n"); 13013 write_logfile_entry( 13014 " First at output line $first_outdented_line_at\n"); 13015 13016 if ( $outdented_line_count > 1 ) { 13017 write_logfile_entry( 13018 " Last at output line $last_outdented_line_at\n"); 13019 } 13020 write_logfile_entry( 13021 " use -noll to prevent outdenting, -l=n to increase line length\n" 13022 ); 13023 write_logfile_entry("\n"); 13024 } 13025} 13026 13027##################################################################### 13028# 13029# the PerlTidy::FileWriter class writes the output file 13030# 13031##################################################################### 13032 13033package PerlTidy::FileWriter; 13034 13035# Maximum number of little messages; probably need not be changed. 13036use constant MAX_NAG_MESSAGES => 6; 13037 13038sub write_logfile_entry { 13039 my $self = shift; 13040 my $logger_object = $self->{_logger_object}; 13041 if ($logger_object) { 13042 $logger_object->write_logfile_entry(@_); 13043 } 13044} 13045 13046sub new { 13047 my $class = shift; 13048 my ( $line_sink_object, $rOpts, $logger_object ) = @_; 13049 13050 bless { 13051 _line_sink_object => $line_sink_object, 13052 _logger_object => $logger_object, 13053 _rOpts => $rOpts, 13054 _output_line_number => 1, 13055 _consecutive_blank_lines => 0, 13056 _consecutive_nonblank_lines => 0, 13057 _first_line_length_error => 0, 13058 _max_line_length_error => 0, 13059 _last_line_length_error => 0, 13060 _first_line_length_error_at => 0, 13061 _max_line_length_error_at => 0, 13062 _last_line_length_error_at => 0, 13063 _line_length_error_count => 0, 13064 _max_output_line_length => 0, 13065 _max_output_line_length_at => 0, 13066 }, $class; 13067} 13068 13069sub tee_on { 13070 my $self = shift; 13071 $self->{_line_sink_object}->tee_on(); 13072} 13073 13074sub tee_off { 13075 my $self = shift; 13076 $self->{_line_sink_object}->tee_off(); 13077} 13078 13079sub get_output_line_number { 13080 my $self = shift; 13081 return $self->{_output_line_number}; 13082} 13083 13084sub decrement_output_line_number { 13085 my $self = shift; 13086 $self->{_output_line_number}--; 13087} 13088 13089sub get_consecutive_nonblank_lines { 13090 my $self = shift; 13091 return $self->{_consecutive_nonblank_lines}; 13092} 13093 13094sub reset_consecutive_blank_lines { 13095 my $self = shift; 13096 $self->{_consecutive_blank_lines} = 0; 13097} 13098 13099sub want_blank_line { 13100 my $self = shift; 13101 unless ( $self->{_consecutive_blank_lines} ) { 13102 $self->write_blank_code_line(); 13103 } 13104} 13105 13106sub write_blank_code_line { 13107 my $self = shift; 13108 my $rOpts = $self->{_rOpts}; 13109 return 13110 if ( $self->{_consecutive_blank_lines} >= 13111 $rOpts->{'maximum-consecutive-blank-lines'} ); 13112 $self->{_consecutive_blank_lines}++; 13113 $self->{_consecutive_nonblank_lines} = 0; 13114 $self->write_line("\n"); 13115} 13116 13117sub write_code_line { 13118 my $self = shift; 13119 my $a = shift; 13120 13121 if ( $a =~ /^\s*$/ ) { 13122 my $rOpts = $self->{_rOpts}; 13123 return 13124 if ( $self->{_consecutive_blank_lines} >= 13125 $rOpts->{'maximum-consecutive-blank-lines'} ); 13126 $self->{_consecutive_blank_lines}++; 13127 $self->{_consecutive_nonblank_lines} = 0; 13128 } 13129 else { 13130 $self->{_consecutive_blank_lines} = 0; 13131 $self->{_consecutive_nonblank_lines}++; 13132 } 13133 $self->write_line($a); 13134} 13135 13136sub write_line { 13137 my $self = shift; 13138 my $a = shift; 13139 $self->{_line_sink_object}->write_line($a); 13140 if ( $a =~ /\n$/ ) { $self->{_output_line_number}++; } 13141 13142 # This calculation of excess line length ignores any internal tabs 13143 my $rOpts = $self->{_rOpts}; 13144 my $exceed = length($a) - $rOpts->{'maximum-line-length'} - 1; 13145 if ( $a =~ /^\t+/g ) { 13146 $exceed += pos($a) * ( $rOpts->{'indent-columns'} - 1 ); 13147 } 13148 13149 # Note that we just incremented output line number to future value 13150 # so we must subtract 1 for current line number 13151 if ( length($a) > 1 + $self->{_max_output_line_length} ) { 13152 $self->{_max_output_line_length} = length($a) - 1; 13153 $self->{_max_output_line_length_at} = $self->{_output_line_number} - 1; 13154 } 13155 13156 if ( $exceed > 0 ) { 13157 my $output_line_number = $self->{_output_line_number}; 13158 $self->{_last_line_length_error} = $exceed; 13159 $self->{_last_line_length_error_at} = $output_line_number - 1; 13160 if ( $self->{_line_length_error_count} == 0 ) { 13161 $self->{_first_line_length_error} = $exceed; 13162 $self->{_first_line_length_error_at} = $output_line_number - 1; 13163 } 13164 13165 if ( 13166 $self->{_last_line_length_error} > $self->{_max_line_length_error} ) 13167 { 13168 $self->{_max_line_length_error} = $exceed; 13169 $self->{_max_line_length_error_at} = $output_line_number - 1; 13170 } 13171 13172 if ( $self->{_line_length_error_count} < MAX_NAG_MESSAGES ) { 13173 $self->write_logfile_entry( 13174 "Line length exceeded by $exceed characters\n"); 13175 } 13176 $self->{_line_length_error_count}++; 13177 } 13178 13179} 13180 13181sub report_line_length_errors { 13182 my $self = shift; 13183 my $rOpts = $self->{_rOpts}; 13184 my $line_length_error_count = $self->{_line_length_error_count}; 13185 if ( $line_length_error_count == 0 ) { 13186 $self->write_logfile_entry( 13187 "No lines exceeded $rOpts->{'maximum-line-length'} characters\n"); 13188 my $max_output_line_length = $self->{_max_output_line_length}; 13189 my $max_output_line_length_at = $self->{_max_output_line_length_at}; 13190 $self->write_logfile_entry( 13191" Maximum output line length was $max_output_line_length at line $max_output_line_length_at\n" 13192 ); 13193 13194 } 13195 else { 13196 13197 my $word = ( $line_length_error_count > 1 ) ? "s" : ""; 13198 $self->write_logfile_entry( 13199"$line_length_error_count output line$word exceeded $rOpts->{'maximum-line-length'} characters:\n" 13200 ); 13201 13202 $word = ( $line_length_error_count > 1 ) ? "First" : ""; 13203 my $first_line_length_error = $self->{_first_line_length_error}; 13204 my $first_line_length_error_at = $self->{_first_line_length_error_at}; 13205 $self->write_logfile_entry( 13206" $word at line $first_line_length_error_at by $first_line_length_error characters\n" 13207 ); 13208 13209 if ( $line_length_error_count > 1 ) { 13210 my $max_line_length_error = $self->{_max_line_length_error}; 13211 my $max_line_length_error_at = $self->{_max_line_length_error_at}; 13212 my $last_line_length_error = $self->{_last_line_length_error}; 13213 my $last_line_length_error_at = $self->{_last_line_length_error_at}; 13214 $self->write_logfile_entry( 13215" Maximum at line $max_line_length_error_at by $max_line_length_error characters\n" 13216 ); 13217 $self->write_logfile_entry( 13218" Last at line $last_line_length_error_at by $last_line_length_error characters\n" 13219 ); 13220 } 13221 } 13222} 13223 13224##################################################################### 13225# 13226# The PerlTidy::Debugger class shows line tokenization 13227# 13228##################################################################### 13229 13230package PerlTidy::Debugger; 13231 13232sub new { 13233 13234 my ( $class, $filename ) = @_; 13235 13236 bless { 13237 _debug_file => $filename, 13238 _debug_file_opened => 0, 13239 _fh => undef, 13240 }, $class; 13241} 13242 13243sub really_open_debug_file { 13244 13245 my $self = shift; 13246 my $debug_file = $self->{_debug_file}; 13247 my $fh; 13248 unless ( $fh = IO::File->new("> $debug_file") ) { 13249 warn("can't open $debug_file: $!\n"); 13250 } 13251 $self->{_debug_file_opened} = 1; 13252 $self->{_fh} = $fh; 13253 print $fh 13254 "Use -dump-token-types (-dtt) to get a list of token type codes\n"; 13255} 13256 13257sub close_debug_file { 13258 13259 my $self = shift; 13260 my $fh = $self->{_fh}; 13261 if ( $self->{_debug_file_opened} ) { 13262 13263 close $self->{_fh}; 13264 } 13265} 13266 13267sub write_debug_entry { 13268 13269 # This is a debug dump routine which may be modified as necessary 13270 # to dump tokens on a line-by-line basis. The output will be written 13271 # to the .DEBUG file when the -D flag is entered. 13272 my $self = shift; 13273 my $line_of_tokens = shift; 13274 13275 my $input_line = $line_of_tokens->{_line_text}; 13276 my $rtoken_type = $line_of_tokens->{_rtoken_type}; 13277 my $rtokens = $line_of_tokens->{_rtokens}; 13278 my $rlevels = $line_of_tokens->{_rlevels}; 13279 my $rslevels = $line_of_tokens->{_rslevels}; 13280 my $rblock_type = $line_of_tokens->{_rblock_type}; 13281 my $input_line_number = $line_of_tokens->{_line_number}; 13282 my $line_type = $line_of_tokens->{_line_type}; 13283 13284 my ( $j, $num ); 13285 13286 my $token_str = "$input_line_number: "; 13287 my $reconstructed_original = "$input_line_number: "; 13288 my $block_str = "$input_line_number: "; 13289 13290 #$token_str .= "$line_type: "; 13291 #$reconstructed_original .= "$line_type: "; 13292 13293 my $pattern = ""; 13294 my @next_char = ( '"', '"' ); 13295 my $i_next = 0; 13296 unless ( $self->{_debug_file_opened} ) { $self->really_open_debug_file() } 13297 my $fh = $self->{_fh}; 13298 13299 for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) { 13300 13301 # testing patterns 13302 if ( $$rtoken_type[$j] eq 'k' ) { 13303 $pattern .= $$rtokens[$j]; 13304 } 13305 else { 13306 $pattern .= $$rtoken_type[$j]; 13307 } 13308 $reconstructed_original .= $$rtokens[$j]; 13309 $block_str .= "($$rblock_type[$j])"; 13310 $num = length( $$rtokens[$j] ); 13311 my $type_str = $$rtoken_type[$j]; 13312 13313 # be sure there are no blank tokens (shouldn't happen) 13314 # This can only happen if a programming error has been made 13315 # because all valid tokens are non-blank 13316 if ( $type_str eq ' ' ) { 13317 print $fh "BLANK TOKEN on the next line\n"; 13318 $type_str = $next_char[$i_next]; 13319 $i_next = 1 - $i_next; 13320 } 13321 13322 if ( length($type_str) == 1 ) { 13323 $type_str = $type_str x $num; 13324 } 13325 $token_str .= $type_str; 13326 } 13327 13328 # Write what you want here ... 13329 # print $fh "$input_line\n"; 13330 # print $fh "$pattern\n"; 13331 print $fh "$reconstructed_original\n"; 13332 print $fh "$token_str\n"; 13333 13334 #print $fh "$block_str\n"; 13335} 13336 13337##################################################################### 13338# 13339# The PerlTidy::LineBuffer class supplies a 'get_line()' 13340# method for returning the next line to be parsed, as well as a 13341# 'peek_ahead()' method 13342# 13343# The input parameter is an object with a 'get_line()' method 13344# which returns the next line to be parsed 13345# 13346##################################################################### 13347 13348package PerlTidy::LineBuffer; 13349 13350sub new { 13351 13352 my $class = shift; 13353 my $line_source_object = shift; 13354 13355 return bless { 13356 _line_source_object => $line_source_object, 13357 _rlookahead_buffer => [], 13358 }, $class; 13359} 13360 13361sub peek_ahead { 13362 my $self = shift; 13363 my $buffer_index = shift; 13364 my $line = undef; 13365 my $input_file_copy = $self->{_input_file_copy}; 13366 my $line_source_object = $self->{_line_source_object}; 13367 my $rlookahead_buffer = $self->{_rlookahead_buffer}; 13368 if ( $buffer_index < scalar(@$rlookahead_buffer) ) { 13369 $line = $$rlookahead_buffer[$buffer_index]; 13370 } 13371 else { 13372 $line = $line_source_object->get_line(); 13373 push ( @$rlookahead_buffer, $line ); 13374 } 13375 return $line; 13376} 13377 13378sub get_line { 13379 my $self = shift; 13380 my $line = undef; 13381 my $line_source_object = $self->{_line_source_object}; 13382 my $rlookahead_buffer = $self->{_rlookahead_buffer}; 13383 13384 if ( scalar(@$rlookahead_buffer) ) { 13385 $line = shift @$rlookahead_buffer; 13386 } 13387 else { 13388 $line = $line_source_object->get_line(); 13389 } 13390 return $line; 13391} 13392 13393######################################################################## 13394# 13395# the PerlTidy::Tokenizer package is essentially a filter which 13396# reads lines of perl source code from a source object and provides 13397# corresponding tokenized lines through its get_line() method. Lines 13398# flow from the source_object to the caller like this: 13399# 13400# source_object --> LineBuffer_object --> Tokenizer --> calling routine 13401# get_line() get_line() get_line() line_of_tokens 13402# 13403# The source object can be any object with a get_line() method which 13404# supplies one line (a character string) perl call. 13405# The LineBuffer object is created by the Tokenizer. 13406# The Tokenizer returns a reference to a data structure 'line_of_tokens' 13407# containing one tokenized line for each call to its get_line() method. 13408# 13409# WARNING: This is not a real class yet. Only one tokenizer my be used. 13410# 13411######################################################################## 13412 13413package PerlTidy::Tokenizer; 13414 13415BEGIN { 13416 13417 # Caution: these debug flags produce a lot of output 13418 # They should all be 0 except when debugging small scripts 13419 13420 use constant TOKENIZER_DEBUG_FLAG_EXPECT => 0; 13421 use constant TOKENIZER_DEBUG_FLAG_GUESS => 0; 13422 use constant TOKENIZER_DEBUG_FLAG_NSCAN => 0; 13423 use constant TOKENIZER_DEBUG_FLAG_QUOTE => 0; 13424 use constant TOKENIZER_DEBUG_FLAG_SCAN_ID => 0; 13425 use constant TOKENIZER_DEBUG_FLAG_TOKENIZE => 0; 13426 13427 my $debug_warning = sub { 13428 print "TOKENIZER_DEBUGGING with key $_[0]\n"; 13429 }; 13430 13431 TOKENIZER_DEBUG_FLAG_EXPECT && $debug_warning->('EXPECT'); 13432 TOKENIZER_DEBUG_FLAG_GUESS && $debug_warning->('GUESS'); 13433 TOKENIZER_DEBUG_FLAG_NSCAN && $debug_warning->('NSCAN'); 13434 TOKENIZER_DEBUG_FLAG_QUOTE && $debug_warning->('QUOTE'); 13435 TOKENIZER_DEBUG_FLAG_SCAN_ID && $debug_warning->('SCAN_ID'); 13436 TOKENIZER_DEBUG_FLAG_TOKENIZE && $debug_warning->('TOKENIZE'); 13437 13438} 13439 13440sub make_regex { 13441 13442 # Given a string, make the corresponding regex with qr. 13443 # Versions of perl before 5.005 do not have qr, 13444 # so we will just return the string, which will work 13445 # but not be optimized. 13446 BEGIN { 13447 if ( $] < 5.005 ) { 13448 sub qr { $_[0] } 13449 } 13450 } 13451 qr($_[0]); 13452} 13453 13454use Carp; 13455use vars qw{ 13456 $tokenizer_self 13457 $level_in_tokenizer 13458 $slevel_in_tokenizer 13459 $nesting_token_string 13460 $nesting_type_string 13461 $nesting_block_string 13462 $nesting_list_string 13463 $saw_negative_indentation 13464 $id_scan_state 13465 $last_nonblank_token 13466 $last_nonblank_type 13467 $last_nonblank_block_type 13468 $last_nonblank_container_type 13469 $last_nonblank_type_sequence 13470 $last_last_nonblank_token 13471 $last_last_nonblank_type 13472 $last_last_nonblank_block_type 13473 $last_last_nonblank_container_type 13474 $last_last_nonblank_type_sequence 13475 $last_nonblank_prototype 13476 $statement_type 13477 $identifier 13478 $in_quote 13479 $quote_type 13480 $quote_character 13481 $quote_pos 13482 $quote_depth 13483 $allowed_quote_modifiers 13484 $paren_depth 13485 @paren_type 13486 @paren_semicolon_count 13487 @paren_structural_type 13488 $brace_depth 13489 @brace_type 13490 @brace_structural_type 13491 @brace_context 13492 @brace_package 13493 $square_bracket_depth 13494 @square_bracket_type 13495 @square_bracket_structural_type 13496 @depth_array 13497 @starting_line_of_current_depth 13498 @current_depth 13499 @current_sequence_number 13500 @nesting_sequence_number 13501 @lower_case_labels_at 13502 $saw_v_string 13503 %is_constant 13504 %is_user_function 13505 %user_function_prototype 13506 %saw_function_definition 13507 $max_token_index 13508 $peeked_ahead 13509 $current_package 13510 $unexpected_error_count 13511 $input_line 13512 $input_line_number 13513 $rpretokens 13514 $rpretoken_map 13515 $rpretoken_type 13516 $want_paren 13517 $context 13518 @slevel_stack 13519 $ci_string_in_tokenizer 13520 $continuation_string_in_tokenizer 13521 $in_statement_continuation 13522 $started_looking_for_here_target_at 13523 $nearly_matched_here_target_at 13524 13525 $indirect_object_taker 13526 $block_list_operator 13527 $block_operator 13528 %expecting_operator_token 13529 %expecting_operator_types 13530 %expecting_term_types 13531 %expecting_term_token 13532 %is_block_function 13533 %is_block_list_function 13534 %is_digraph 13535 %is_file_test_operator 13536 %is_trigraph 13537 %is_valid_token_type 13538 %is_keyword 13539 %really_want_term 13540 @opening_brace_names 13541 @closing_brace_names 13542 %is_keyword_taking_list 13543}; 13544 13545# possible values of operator_expected() 13546use constant TERM => -1; 13547use constant UNKNOWN => 0; 13548use constant OPERATOR => 1; 13549 13550# possible values of context 13551use constant SCALAR_CONTEXT => -1; 13552use constant UNKNOWN_CONTEXT => 0; 13553use constant LIST_CONTEXT => 1; 13554 13555# Maximum number of little messages; probably need not be changed. 13556use constant MAX_NAG_MESSAGES => 6; 13557 13558{ 13559 13560 # methods to count instances 13561 my $_count = 0; 13562 sub get_count { $_count; } 13563 sub _increment_count { ++$_count } 13564 sub _decrement_count { --$_count } 13565} 13566 13567sub DESTROY { 13568 $_[0]->_decrement_count(); 13569} 13570 13571sub new { 13572 13573 my $class = shift; 13574 13575 # Note: 'tabs' and 'indent_columns' are temporary and should be 13576 # removed asap 13577 my %defaults = ( 13578 source_object => undef, 13579 debugger_object => undef, 13580 diagnostics_object => undef, 13581 logger_object => undef, 13582 starting_level => undef, 13583 indent_columns => 4, 13584 tabs => 0, 13585 look_for_hash_bang => 0, 13586 trim_qw => 1, 13587 look_for_autoloader => 1, 13588 look_for_selfloader => 1, 13589 ); 13590 my %args = ( %defaults, @_ ); 13591 13592 # we are given an object with a get_line() method to supply source lines 13593 my $source_object = $args{source_object}; 13594 13595 # we create another object with a get_line() and peek_ahead() method 13596 my $line_buffer_object = PerlTidy::LineBuffer->new($source_object); 13597 13598 # Tokenizer state data is as follows: 13599 # _rhere_target_list reference to list of here-doc targets 13600 # _here_doc_target the target string for a here document 13601 # _here_quote_character the type of here-doc quoting (" ' ` or none) 13602 # to determine if interpolation is done 13603 # _quote_target character we seek if chasing a quote 13604 # _line_start_quote line where we started looking for a long quote 13605 # _in_here_doc flag indicating if we are in a here-doc 13606 # _in_pod flag set if we are in pod documentation 13607 # _in_error flag set if we saw severe error (binary in script) 13608 # _in_data flag set if we are in __DATA__ section 13609 # _in_end flag set if we are in __END__ section 13610 # _in_format flag set if we are in a format description 13611 # _in_quote flag telling if we are chasing a quote 13612 # _starting_level indentation level of first line 13613 # _input_tabstr string denoting one indentation level of input file 13614 # _know_input_tabstr flag indicating if we know _input_tabstr 13615 # _line_buffer_object object with get_line() method to supply source code 13616 # _diagnostics_object place to write debugging information 13617 $tokenizer_self = { 13618 _rhere_target_list => undef, 13619 _in_here_doc => 0, 13620 _here_doc_target => "", 13621 _here_quote_character => "", 13622 _in_data => 0, 13623 _in_end => 0, 13624 _in_format => 0, 13625 _in_error => 0, 13626 _in_pod => 0, 13627 _in_quote => 0, 13628 _quote_target => "", 13629 _line_start_quote => -1, 13630 _starting_level => $args{starting_level}, 13631 _know_starting_level => defined( $args{starting_level} ), 13632 _tabs => $args{tabs}, 13633 _indent_columns => $args{indent_columns}, 13634 _look_for_hash_bang => $args{look_for_hash_bang}, 13635 _trim_qw => $args{trim_qw}, 13636 _input_tabstr => "", 13637 _know_input_tabstr => -1, 13638 _last_line_number => 0, 13639 _saw_perl_dash_P => 0, 13640 _saw_perl_dash_w => 0, 13641 _saw_use_strict => 0, 13642 _look_for_autoloader => $args{look_for_autoloader}, 13643 _look_for_selfloader => $args{look_for_selfloader}, 13644 _saw_autoloader => 0, 13645 _saw_selfloader => 0, 13646 _saw_hash_bang => 0, 13647 _saw_end => 0, 13648 _saw_data => 0, 13649 _saw_lc_filehandle => 0, 13650 _started_tokenizing => 0, 13651 _line_buffer_object => $line_buffer_object, 13652 _debugger_object => $args{debugger_object}, 13653 _diagnostics_object => $args{diagnostics_object}, 13654 _logger_object => $args{logger_object}, 13655 }; 13656 13657 prepare_for_a_new_file(); 13658 find_starting_indentation_level(); 13659 13660 bless $tokenizer_self, $class; 13661 13662 # This is not a full class yet, so die if an attempt is made to 13663 # create more than one object. 13664 13665 if ( _increment_count() > 1 ) { 13666 confess 13667"Attempt to create more than 1 object in $class, which is not a true class yet\n"; 13668 } 13669 13670 return $tokenizer_self; 13671 13672} 13673 13674# interface to PerlTidy::Logger routines 13675sub warning { 13676 my $logger_object = $tokenizer_self->{_logger_object}; 13677 if ($logger_object) { 13678 $logger_object->warning(@_); 13679 } 13680} 13681 13682sub complain { 13683 my $logger_object = $tokenizer_self->{_logger_object}; 13684 if ($logger_object) { 13685 $logger_object->complain(@_); 13686 } 13687} 13688 13689sub write_logfile_entry { 13690 my $logger_object = $tokenizer_self->{_logger_object}; 13691 if ($logger_object) { 13692 $logger_object->write_logfile_entry(@_); 13693 } 13694} 13695 13696sub interrupt_logfile { 13697 my $logger_object = $tokenizer_self->{_logger_object}; 13698 if ($logger_object) { 13699 $logger_object->interrupt_logfile(); 13700 } 13701} 13702 13703sub resume_logfile { 13704 my $logger_object = $tokenizer_self->{_logger_object}; 13705 if ($logger_object) { 13706 $logger_object->resume_logfile(); 13707 } 13708} 13709 13710sub increment_brace_error { 13711 my $logger_object = $tokenizer_self->{_logger_object}; 13712 if ($logger_object) { 13713 $logger_object->increment_brace_error(); 13714 } 13715} 13716 13717sub report_definite_bug { 13718 my $logger_object = $tokenizer_self->{_logger_object}; 13719 if ($logger_object) { 13720 $logger_object->report_definite_bug(); 13721 } 13722} 13723 13724sub brace_warning { 13725 my $logger_object = $tokenizer_self->{_logger_object}; 13726 if ($logger_object) { 13727 $logger_object->brace_warning(@_); 13728 } 13729} 13730 13731sub get_saw_brace_error { 13732 my $logger_object = $tokenizer_self->{_logger_object}; 13733 if ($logger_object) { 13734 $logger_object->get_saw_brace_error(); 13735 } 13736 else { 13737 0; 13738 } 13739} 13740 13741# interface to PerlTidy::Diagnostics routines 13742sub write_diagnostics { 13743 if ( $tokenizer_self->{_diagnostics_object} ) { 13744 $tokenizer_self->{_diagnostics_object}->write_diagnostics(@_); 13745 } 13746} 13747 13748sub report_tokenization_errors { 13749 13750 my $self = shift; 13751 13752 my $level = get_indentation_level(); 13753 if ( $level != $tokenizer_self->{_starting_level} ) { 13754 warning("final indentation level: $level\n"); 13755 } 13756 13757 check_final_nesting_depths(); 13758 13759 if ( $tokenizer_self->{_look_for_hash_bang} 13760 && !$tokenizer_self->{_saw_hash_bang} ) 13761 { 13762 warning( 13763 "hit EOF without seeing hash-bang line; maybe don't need -x?\n"); 13764 } 13765 13766 if ( $tokenizer_self->{_in_format} ) { 13767 warning("hit EOF while in format description\n"); 13768 } 13769 13770 # this check may be removed after a year or so 13771 if ( $tokenizer_self->{_saw_lc_filehandle} ) { 13772 13773 warning( <<'EOM' ); 13774------------------------------------------------------------------ 13775PLEASE NOTE: If you get this message, it is because perltidy noticed 13776possible ambiguous syntax at one or more places in your script, as 13777noted above. The problem is with statements accepting indirect objects, 13778such as print and printf statements of the form 13779 13780 print bareword ( $etc 13781 13782Perltidy needs your help in deciding if 'bareword' is a filehandle or a 13783function call. The problem is the space between 'bareword' and '('. If 13784'bareword' is a function call, you should remove the trailing space. If 13785'bareword' is a filehandle, you should avoid the opening paren or else 13786globally capitalize 'bareword' to be BAREWORD. So the above line 13787would be: 13788 13789 print bareword( $etc # function 13790or 13791 print bareword @list # filehandle 13792or 13793 print BAREWORD ( $etc # filehandle 13794 13795If you want to keep the line as it is, and are sure it is correct, 13796you can use -w=0 to prevent this message. 13797------------------------------------------------------------------ 13798EOM 13799 13800 #It is very possible that this is syntax is unambiguous to perl, which 13801 #may know what 'bareword' is by parsing any 'use' and 'require' modules 13802 #that you have referenced, but perltidy does not currently parse other 13803 #modules, so it has not seen any function prototypes that they might 13804 #contain. To prevent this message, use -w=0. 13805 13806 } 13807 13808 if ( $tokenizer_self->{_in_pod} ) { 13809 13810 # Just write log entry if this is after __END__ or __DATA__ 13811 # because this happens to often, and it is not likely to be 13812 # a parsing error. 13813 if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) { 13814 write_logfile_entry( 13815"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n" 13816 ); 13817 } 13818 13819 else { 13820 complain( 13821"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n" 13822 ); 13823 } 13824 13825 } 13826 13827 if ( $tokenizer_self->{_in_here_doc} ) { 13828 my $here_doc_target = $tokenizer_self->{_here_doc_target}; 13829 if ($here_doc_target) { 13830 warning( 13831"hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n" 13832 ); 13833 } 13834 else { 13835 warning( 13836"hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string\n" 13837 ); 13838 } 13839 if ($nearly_matched_here_target_at) { 13840 warning( 13841"NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n" 13842 ); 13843 } 13844 } 13845 13846 if ( $tokenizer_self->{_in_quote} ) { 13847 my $line_start_quote = $tokenizer_self->{_line_start_quote}; 13848 my $quote_target = $tokenizer_self->{_quote_target}; 13849 warning( 13850"hit EOF seeking end of quote/pattern starting at line $line_start_quote ending in $quote_target\n" 13851 ); 13852 } 13853 13854 unless ( $tokenizer_self->{_saw_perl_dash_w} ) { 13855 if ( $] < 5.006 ) { 13856 write_logfile_entry("Suggest including '-w parameter'\n"); 13857 } 13858 else { 13859 write_logfile_entry("Suggest including 'use warnings;'\n"); 13860 } 13861 } 13862 13863 if ( $tokenizer_self->{_saw_perl_dash_P} ) { 13864 write_logfile_entry("Use of -P parameter for defines is discouraged\n"); 13865 } 13866 13867 unless ( $tokenizer_self->{_saw_use_strict} ) { 13868 write_logfile_entry("Suggest including 'use strict;'\n"); 13869 } 13870 13871 # it is suggested that lables have at least one upper case character 13872 # for legibility and to avoid code breakage as new keywords are introduced 13873 if (@lower_case_labels_at) { 13874 my $num = @lower_case_labels_at; 13875 write_logfile_entry( 13876 "Suggest using upper case characters in label(s)\n"); 13877 local $" = ')('; 13878 write_logfile_entry(" defined at line(s): (@lower_case_labels_at)\n"); 13879 } 13880} 13881 13882sub report_v_string { 13883 13884 # warn if this version can't handle v-strings 13885 my $tok = shift; 13886 $saw_v_string = $input_line_number; 13887 if ( $] < 5.006 ) { 13888 warning( 13889"Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n" 13890 ); 13891 } 13892} 13893 13894sub know_input_tabstr { 13895 return ( $tokenizer_self->{_know_input_tabstr} == 1 ); 13896} 13897 13898sub get_input_line_number { 13899 return $tokenizer_self->{_last_line_number}; 13900} 13901 13902# returns the next tokenized line 13903sub get_line { 13904 13905 my $self = shift; 13906 13907 my $input_line = $tokenizer_self->{_line_buffer_object}->get_line(); 13908 13909 return undef unless ($input_line); 13910 13911 $tokenizer_self->{_last_line_number}++; 13912 13913 # remove any control m; otherwise here-target's may not match; 13914 # trimming trailing white space would work too, but that would 13915 # change the original line 13916 $input_line =~ s/(\r|\035)*$//gi; 13917 13918 my $input_line_number = $tokenizer_self->{_last_line_number}; 13919 13920 # create a data structure describing this line which will be 13921 # returned to the caller. 13922 13923 # _line_type codes are: 13924 # SYSTEM - system-specific code before hash-bang line 13925 # CODE - line of perl code (including comments) 13926 # POD_START - line starting pod, such as '=head' 13927 # POD - pod documentation text 13928 # POD_END - last line of pod section, '=cut' 13929 # HERE - text of here-document 13930 # HERE_END - last line of here-doc (target word) 13931 # FORMAT - format section 13932 # FORMAT_END - last line of format section, '.' 13933 # DATA_START - __DATA__ line 13934 # DATA - unidentified text following __DATA__ 13935 # END_START - __END__ line 13936 # END - unidentified text following __END__ 13937 # ERROR - we are in big trouble, probably not a perl script 13938 13939 # Other variables: 13940 # _curly_brace_depth - depth of curly braces at start of line 13941 # _square_bracket_depth - depth of square brackets at start of line 13942 # _paren_depth - depth of parens at start of line 13943 # _starting_in_quote - this line continues a multi-line quote 13944 # (so don't trim leading blanks!) 13945 # _ending_in_quote - this line ends in a multi-line quote 13946 # (so don't trim trailing blanks!) 13947 13948 my $line_of_tokens = { 13949 _line_type => 'EOF', 13950 _line_text => $input_line, 13951 _line_number => $input_line_number, 13952 _rtoken_type => undef, 13953 _rtokens => undef, 13954 _rlevels => undef, 13955 _rslevels => undef, 13956 _rblock_type => undef, 13957 _rcontainer_type => undef, 13958 _rcontainer_environment => undef, 13959 _rtype_sequence => undef, 13960 _rnesting_tokens => undef, 13961 _rci_levels => undef, 13962 _rnesting_blocks => undef, 13963 _python_indentation_level => 0, 13964 _starting_in_quote => 13965 ( $tokenizer_self->{_in_quote} && ( $quote_type eq 'Q' ) ), 13966 _ending_in_quote => 0, 13967 _curly_brace_depth => $brace_depth, 13968 _square_bracket_depth => $square_bracket_depth, 13969 _paren_depth => $paren_depth, 13970 _quote_character => '', 13971 }; 13972 13973 # must print line unchanged if we are in a here document 13974 if ( $tokenizer_self->{_in_here_doc} ) { 13975 13976 $line_of_tokens->{_line_type} = 'HERE'; 13977 my $here_doc_target = $tokenizer_self->{_here_doc_target}; 13978 my $here_quote_character = $tokenizer_self->{_here_quote_character}; 13979 my $candidate_target = $input_line; 13980 chomp $candidate_target; 13981 if ( $candidate_target eq $here_doc_target ) { 13982 $nearly_matched_here_target_at = undef; 13983 $line_of_tokens->{_line_type} = 'HERE_END'; 13984 write_logfile_entry("Exiting HERE document $here_doc_target\n"); 13985 13986 my $rhere_target_list = $tokenizer_self->{_rhere_target_list}; 13987 if (@$rhere_target_list) { # there can be multiple here targets 13988 ( $here_doc_target, $here_quote_character ) = 13989 @{ shift @$rhere_target_list }; 13990 $tokenizer_self->{_here_doc_target} = $here_doc_target; 13991 $tokenizer_self->{_here_quote_character} = 13992 $here_quote_character; 13993 write_logfile_entry( 13994 "Entering HERE document $here_doc_target\n"); 13995 $nearly_matched_here_target_at = undef; 13996 $started_looking_for_here_target_at = $input_line_number; 13997 } 13998 else { 13999 $tokenizer_self->{_in_here_doc} = 0; 14000 $tokenizer_self->{_here_doc_target} = ""; 14001 $tokenizer_self->{_here_quote_character} = ""; 14002 } 14003 } 14004 14005 # check for error of extra whitespace 14006 else { 14007 $candidate_target =~ s/\s*$//; 14008 $candidate_target =~ s/^\s*//; 14009 if ( $candidate_target eq $here_doc_target ) { 14010 $nearly_matched_here_target_at = $input_line_number; 14011 } 14012 } 14013 return $line_of_tokens; 14014 } 14015 14016 # must print line unchanged if we are in a format section 14017 elsif ( $tokenizer_self->{_in_format} ) { 14018 14019 if ( $input_line =~ /^\.[\s#]*$/ ) { 14020 write_logfile_entry("Exiting format section\n"); 14021 $tokenizer_self->{_in_format} = 0; 14022 $line_of_tokens->{_line_type} = 'FORMAT_END'; 14023 } 14024 else { 14025 $line_of_tokens->{_line_type} = 'FORMAT'; 14026 } 14027 return $line_of_tokens; 14028 } 14029 14030 # must print line unchanged if we are in pod documentation 14031 elsif ( $tokenizer_self->{_in_pod} ) { 14032 14033 $line_of_tokens->{_line_type} = 'POD'; 14034 if ( $input_line =~ /^=cut/ ) { 14035 $line_of_tokens->{_line_type} = 'POD_END'; 14036 write_logfile_entry("Exiting POD section\n"); 14037 $tokenizer_self->{_in_pod} = 0; 14038 } 14039 if ( $input_line =~ /^\#\!.*perl\b/ ) { 14040 warning("Hash-bang in pod can cause perl to fail! \n"); 14041 } 14042 14043 return $line_of_tokens; 14044 } 14045 14046 # must print line unchanged if we have seen a severe error (i.e., we 14047 # are seeing illegal tokens and connot continue. Syntax errors do 14048 # not pass this route). Calling routine can decide what to do, but 14049 # the default can be to just pass all lines as if they were after __END__ 14050 elsif ( $tokenizer_self->{_in_error} ) { 14051 $line_of_tokens->{_line_type} = 'ERROR'; 14052 return $line_of_tokens; 14053 } 14054 14055 # print line unchanged if we are __DATA__ section 14056 elsif ( $tokenizer_self->{_in_data} ) { 14057 14058 # ...but look for POD 14059 # Note that the _in_data and _in_end flags remain set 14060 # so that we return to that state after seeing the 14061 # end of a pod section 14062 if ( $input_line =~ /^=(?!cut)/ ) { 14063 $line_of_tokens->{_line_type} = 'POD_START'; 14064 write_logfile_entry("Entering POD section\n"); 14065 $tokenizer_self->{_in_pod} = 1; 14066 return $line_of_tokens; 14067 } 14068 else { 14069 $line_of_tokens->{_line_type} = 'DATA'; 14070 return $line_of_tokens; 14071 } 14072 } 14073 14074 # print line unchanged if we are in __END__ section 14075 elsif ( $tokenizer_self->{_in_end} ) { 14076 14077 # ...but look for POD 14078 # Note that the _in_data and _in_end flags remain set 14079 # so that we return to that state after seeing the 14080 # end of a pod section 14081 if ( $input_line =~ /^=(?!cut)/ ) { 14082 $line_of_tokens->{_line_type} = 'POD_START'; 14083 write_logfile_entry("Entering POD section\n"); 14084 $tokenizer_self->{_in_pod} = 1; 14085 return $line_of_tokens; 14086 } 14087 else { 14088 $line_of_tokens->{_line_type} = 'END'; 14089 return $line_of_tokens; 14090 } 14091 } 14092 14093 # check for a hash-bang line if we haven't seen one 14094 if ( !$tokenizer_self->{_saw_hash_bang} ) { 14095 if ( $input_line =~ /^\#\!.*perl\b/ ) { 14096 $tokenizer_self->{_saw_hash_bang} = $input_line_number; 14097 14098 # check for -w and -P flags 14099 if ( $input_line =~ /^\#\!.*perl\s.*-.*P/ ) { 14100 $tokenizer_self->{_saw_perl_dash_P} = 1; 14101 } 14102 14103 if ( $input_line =~ /^\#\!.*perl\s.*-.*w/ ) { 14104 $tokenizer_self->{_saw_perl_dash_w} = 1; 14105 } 14106 14107 if ( ( $input_line_number > 1 ) 14108 && ( !$tokenizer_self->{_look_for_hash_bang} ) ) 14109 { 14110 14111 # this is helpful for VMS systems; we may have accidentally 14112 # tokenized some DCL commands 14113 if ( $tokenizer_self->{_started_tokenizing} ) { 14114 warning( 14115"There seems to be a hash-bang after line 1; do you need to run with -x ?\n" 14116 ); 14117 } 14118 else { 14119 complain("Useless hash-bang after line 1\n"); 14120 } 14121 } 14122 14123 # Report the leading hash-bang as a system line 14124 # This will prevent -dac from deleting it 14125 else { 14126 $line_of_tokens->{_line_type} = 'SYSTEM'; 14127 return $line_of_tokens; 14128 } 14129 } 14130 } 14131 14132 # wait for a hash-bang before parsing if the user invoked us with -x 14133 if ( $tokenizer_self->{_look_for_hash_bang} 14134 && !$tokenizer_self->{_saw_hash_bang} ) 14135 { 14136 $line_of_tokens->{_line_type} = 'SYSTEM'; 14137 return $line_of_tokens; 14138 } 14139 14140 # now we know that it is ok to tokenize the line... 14141 # the line tokenizer will modify any of these private variables: 14142 # _rhere_target_list 14143 # _in_data 14144 # _in_end 14145 # _in_format 14146 # _in_error 14147 # _in_pod 14148 # _in_quote 14149 my $ending_in_quote_last = $tokenizer_self->{_in_quote}; 14150 tokenize_this_line($line_of_tokens); 14151 14152 # Now finish defining the return structure and return it 14153 $line_of_tokens->{_ending_in_quote} = $tokenizer_self->{_in_quote}; 14154 14155 # handle severe error (binary data in script) 14156 if ( $tokenizer_self->{_in_error} ) { 14157 $tokenizer_self->{_in_quote} = 0; # to avoid any more messages 14158 warning("Giving up after error\n"); 14159 $line_of_tokens->{_line_type} = 'ERROR'; 14160 reset_indentation_level(0); # avoid error messages 14161 return $line_of_tokens; 14162 } 14163 14164 # handle start of pod documentation 14165 if ( $tokenizer_self->{_in_pod} ) { 14166 14167 # This gets tricky..above a __DATA__ or __END__ section, perl 14168 # accepts '=cut' as the start of pod section. But afterwards, 14169 # only pod utilities see it and they may ignore an =cut without 14170 # leading =head. In any case, this isn't good. 14171 if ( $input_line =~ /^=cut\b/ ) { 14172 if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) { 14173 complain("=cut while not in pod ignored\n"); 14174 $tokenizer_self->{_in_pod} = 0; 14175 $line_of_tokens->{_line_type} = 'POD_STOP'; 14176 } 14177 else { 14178 $line_of_tokens->{_line_type} = 'POD_END'; 14179 complain( 14180"=cut starts a pod section .. this can fool pod utilities.\n" 14181 ); 14182 write_logfile_entry("Entering POD section\n"); 14183 } 14184 } 14185 14186 else { 14187 $line_of_tokens->{_line_type} = 'POD_START'; 14188 write_logfile_entry("Entering POD section\n"); 14189 } 14190 14191 return $line_of_tokens; 14192 } 14193 14194 # update indentation levels for log messages 14195 if ( $input_line !~ /^\s*$/ ) { 14196 my $rlevels = $line_of_tokens->{_rlevels}; 14197 my $structural_indentation_level = $$rlevels[0]; 14198 my ( $python_indentation_level, $msg ) = 14199 find_indentation_level( $input_line, $structural_indentation_level ); 14200 if ($msg) { write_logfile_entry("$msg") } 14201 $line_of_tokens->{_python_indentation_level} = 14202 $python_indentation_level; 14203 } 14204 14205 # see if this line contains here doc targets 14206 my $rhere_target_list = $tokenizer_self->{_rhere_target_list}; 14207 if (@$rhere_target_list) { 14208 14209 #my $here_doc_target = shift @$rhere_target_list; 14210 my ( $here_doc_target, $here_quote_character ) = 14211 @{ shift @$rhere_target_list }; 14212 $tokenizer_self->{_in_here_doc} = 1; 14213 $tokenizer_self->{_here_doc_target} = $here_doc_target; 14214 $tokenizer_self->{_here_quote_character} = $here_quote_character; 14215 write_logfile_entry("Entering HERE document $here_doc_target\n"); 14216 $started_looking_for_here_target_at = $input_line_number; 14217 } 14218 14219 # NOTE: __END__ and __DATA__ statements are written unformatted 14220 # because they can theoretically contain additional characters 14221 # which are not tokenized (and cannot be read with <DATA> either!). 14222 if ( $tokenizer_self->{_in_data} ) { 14223 $line_of_tokens->{_line_type} = 'DATA_START'; 14224 write_logfile_entry("Starting __DATA__ section\n"); 14225 $tokenizer_self->{_saw_data} = 1; 14226 14227 # keep parsing after __DATA__ if use SelfLoader was seen 14228 if ( $tokenizer_self->{_saw_selfloader} ) { 14229 $tokenizer_self->{_in_data} = 0; 14230 write_logfile_entry( 14231 "SelfLoader seen, continuing; -nlsl deactivates\n"); 14232 } 14233 14234 return $line_of_tokens; 14235 } 14236 14237 elsif ( $tokenizer_self->{_in_end} ) { 14238 $line_of_tokens->{_line_type} = 'END_START'; 14239 write_logfile_entry("Starting __END__ section\n"); 14240 $tokenizer_self->{_saw_end} = 1; 14241 14242 # keep parsing after __END__ if use AutoLoader was seen 14243 if ( $tokenizer_self->{_saw_autoloader} ) { 14244 $tokenizer_self->{_in_end} = 0; 14245 write_logfile_entry( 14246 "AutoLoader seen, continuing; -nlal deactivates\n"); 14247 } 14248 return $line_of_tokens; 14249 } 14250 14251 # now, finally, we know that this line is type 'CODE' 14252 $line_of_tokens->{_line_type} = 'CODE'; 14253 14254 # remember if we have seen any real code 14255 if ( !$tokenizer_self->{_started_tokenizing} 14256 && $input_line !~ /^\s*$/ 14257 && $input_line !~ /^\s*#/ ) 14258 { 14259 $tokenizer_self->{_started_tokenizing} = 1; 14260 } 14261 14262 if ( $tokenizer_self->{_debugger_object} ) { 14263 $tokenizer_self->{_debugger_object}->write_debug_entry($line_of_tokens); 14264 } 14265 14266 # Note: if keyword 'format' occurs in this line code, it is still CODE 14267 # (keyword 'format' need not start a line) 14268 if ( $tokenizer_self->{_in_format} ) { 14269 write_logfile_entry("Entering format section\n"); 14270 } 14271 14272 if ( $tokenizer_self->{_in_quote} 14273 and ( $tokenizer_self->{_line_start_quote} < 0 ) ) 14274 { 14275 14276 if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) { 14277 $tokenizer_self->{_line_start_quote} = $input_line_number; 14278 $tokenizer_self->{_quote_target} = $quote_target; 14279 write_logfile_entry( 14280 "Start multi-line quote or pattern ending in $quote_target\n"); 14281 } 14282 } 14283 elsif ( ( $tokenizer_self->{_line_start_quote} >= 0 ) 14284 and !$tokenizer_self->{_in_quote} ) 14285 { 14286 $tokenizer_self->{_line_start_quote} = -1; 14287 write_logfile_entry("End of multi-line quote or pattern\n"); 14288 } 14289 14290 # we are returning a line of CODE 14291 return $line_of_tokens; 14292} 14293 14294sub find_starting_indentation_level { 14295 14296 my $starting_level = 0; 14297 my $know_input_tabstr = -1; # flag for find_indentation_level 14298 14299 # use value if given as parameter 14300 if ( $tokenizer_self->{_know_starting_level} ) { 14301 $starting_level = $tokenizer_self->{_starting_level}; 14302 } 14303 14304 # if we know there is a hash_bang line, the level must be zero 14305 elsif ( $tokenizer_self->{_look_for_hash_bang} ) { 14306 $tokenizer_self->{_know_starting_level} = 1; 14307 } 14308 14309 # otherwise figure it out from the input file 14310 else { 14311 my $line; 14312 my $i = 0; 14313 my $structural_indentation_level = -1; # flag for find_indentation_level 14314 14315 my $msg = ""; 14316 while ( $line = 14317 $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) ) 14318 { 14319 14320 # if first line is #! then assume starting level is zero 14321 if ( $i == 1 && $line =~ /^\#\!/ ) { 14322 $starting_level = 0; 14323 last; 14324 } 14325 next if ( $line =~ /^\s*#/ ); # must not be comment 14326 next if ( $line =~ /^\s*$/ ); # must not be blank 14327 ( $starting_level, $msg ) = 14328 find_indentation_level( $line, $structural_indentation_level ); 14329 if ($msg) { write_logfile_entry("$msg") } 14330 last; 14331 } 14332 $msg = "Line $i implies starting-indentation-level = $starting_level\n"; 14333 14334 if ( $starting_level > 0 ) { 14335 14336 my $input_tabstr = $tokenizer_self->{_input_tabstr}; 14337 if ( $input_tabstr eq "\t" ) { 14338 $msg .= "by guessing input tabbing uses 1 tab per level\n"; 14339 } 14340 else { 14341 my $cols = length($input_tabstr); 14342 $msg .= 14343 "by guessing input tabbing uses $cols blanks per level\n"; 14344 } 14345 } 14346 write_logfile_entry("$msg"); 14347 } 14348 $tokenizer_self->{_starting_level} = $starting_level; 14349 reset_indentation_level($starting_level); 14350} 14351 14352=pod 14353 14354Find indentation level given a input line. At the same time, try to 14355figure out the input tabbing scheme. 14356 14357There are two types of calls: 14358 14359Type 1: $structural_indentation_level < 0 14360 In this case we have to guess $input_tabstr to figure out the level. 14361 14362Type 2: $structural_indentation_level >= 0 14363 In this case the level of this line is known, and this routine can 14364 update the tabbing string, if still unknown, to make the level correct. 14365 14366=cut 14367 14368sub find_indentation_level { 14369 my ( $line, $structural_indentation_level ) = @_; 14370 my $level = 0; 14371 my $msg = ""; 14372 14373 my $know_input_tabstr = $tokenizer_self->{_know_input_tabstr}; 14374 my $input_tabstr = $tokenizer_self->{_input_tabstr}; 14375 14376 # find leading whitespace 14377 my $leading_whitespace = ( $line =~ /^(\s*)/ ) ? $1 : ""; 14378 14379 # make first guess at input tabbing scheme if necessary 14380 if ( $know_input_tabstr < 0 ) { 14381 14382 $know_input_tabstr = 0; 14383 14384 if ( $tokenizer_self->{_tabs} ) { 14385 $input_tabstr = "\t"; 14386 if ( length($leading_whitespace) > 0 ) { 14387 if ( $leading_whitespace !~ /\t/ ) { 14388 14389 my $cols = $tokenizer_self->{_indent_columns}; 14390 14391 if ( length($leading_whitespace) < $cols ) { 14392 $cols = length($leading_whitespace); 14393 } 14394 $input_tabstr = " " x $cols; 14395 } 14396 } 14397 } 14398 else { 14399 $input_tabstr = " " x $tokenizer_self->{_indent_columns}; 14400 14401 if ( length($leading_whitespace) > 0 ) { 14402 if ( $leading_whitespace =~ /^\t/ ) { 14403 $input_tabstr = "\t"; 14404 } 14405 } 14406 } 14407 $tokenizer_self->{_know_input_tabstr} = $know_input_tabstr; 14408 $tokenizer_self->{_input_tabstr} = $input_tabstr; 14409 } 14410 14411 # determine the input tabbing scheme if possible 14412 if ( ( $know_input_tabstr == 0 ) 14413 && ( length($leading_whitespace) > 0 ) 14414 && ( $structural_indentation_level > 0 ) ) 14415 { 14416 my $saved_input_tabstr = $input_tabstr; 14417 14418 # check for common case of one tab per indentation level 14419 if ( $leading_whitespace eq "\t" x $structural_indentation_level ) { 14420 if ( $leading_whitespace eq "\t" x $structural_indentation_level ) { 14421 $input_tabstr = "\t"; 14422 $msg = "Guessing old indentation was tab character\n"; 14423 } 14424 } 14425 14426 else { 14427 14428 # detab any tabs based on 8 blanks per tab 14429 my $entabbed = ""; 14430 if ( $leading_whitespace =~ s/^\t+/ /g ) { 14431 $entabbed = "entabbed"; 14432 } 14433 14434 # now compute tabbing from number of spaces 14435 my $columns = 14436 length($leading_whitespace) / $structural_indentation_level; 14437 if ( $columns == int $columns ) { 14438 $msg = 14439 "Guessing old indentation was $columns $entabbed spaces\n"; 14440 } 14441 else { 14442 $columns = int $columns; 14443 $msg = 14444"old indentation is unclear, using $columns $entabbed spaces\n"; 14445 } 14446 $input_tabstr = " " x $columns; 14447 } 14448 $know_input_tabstr = 1; 14449 $tokenizer_self->{_know_input_tabstr} = $know_input_tabstr; 14450 $tokenizer_self->{_input_tabstr} = $input_tabstr; 14451 14452 # see if mistakes were made 14453 if ( ( $tokenizer_self->{_starting_level} > 0 ) 14454 && !$tokenizer_self->{_know_starting_level} ) 14455 { 14456 14457 if ( $input_tabstr ne $saved_input_tabstr ) { 14458 complain( 14459"I made a bad starting level guess; rerun with a value for -sil \n" 14460 ); 14461 } 14462 } 14463 } 14464 14465 # use current guess at input tabbing to get input indentation level 14466 # 14467 # Patch to handle a common case of entabbed leading whitespace 14468 # If the leading whitespace equals 4 spaces and we also have 14469 # tabs, detab the input whitespace assuming 8 spaces per tab. 14470 if ( length($input_tabstr) == 4 ) { 14471 $leading_whitespace =~ s/^\t+/ /g; 14472 } 14473 14474 if ( ( my $len_tab = length($input_tabstr) ) > 0 ) { 14475 my $pos = 0; 14476 14477 while ( substr( $leading_whitespace, $pos, $len_tab ) eq $input_tabstr ) 14478 { 14479 $pos += $len_tab; 14480 $level++; 14481 } 14482 } 14483 return ( $level, $msg ); 14484} 14485 14486sub dump_token_types { 14487 my $class = shift; 14488 my $fh = shift; 14489 14490 # This should be the latest list of token types in use 14491 # adding NEW_TOKENS: add a comment here 14492 print $fh <<'END_OF_LIST'; 14493 14494Here is a list of the token types currently used. 14495For the following tokens, the "type" of a token is just the token itself. 14496 14497.. :: << >> ** && .. || -> => += -= .= %= &= |= ^= *= <> 14498( ) <= >= == =~ !~ != ++ -- /= x= 14499... **= <<= >>= &&= ||= <=> 14500, + - / * | % ! x ~ = \ ? : . < > ^ & 14501 14502The following additional token types are defined: 14503 14504 type meaning 14505 b blank (white space) 14506 { indent: opening structural curly brace or square bracket or paren 14507 (code block, anonymous hash reference, or anonymous array reference) 14508 } outdent: right structural curly brace or square bracket or paren 14509 [ left non-structural square bracket (enclosing an array index) 14510 ] right non-structural square bracket 14511 ( left non-structural paren (all but a list right of an =) 14512 ) right non-structural parena 14513 L left non-structural curly brace (enclosing a key) 14514 R right non-structural curly brace 14515 ; terminal semicolon 14516 f indicates a semicolon in a "for" statement 14517 h here_doc operator << 14518 # a comment 14519 Q indicates a quote or pattern 14520 q indicates a qw quote block 14521 k a perl keyword 14522 C user-defined constant or constant function (with void prototype = ()) 14523 U user-defined function taking parameters 14524 G user-defined function taking block parameter (like grep/map/eval) 14525 M (unused, but reserved for subroutine definition name) 14526 P (unused, but -html uses it to label pod text) 14527 t type indicater such as %,$,@,*,&,sub 14528 w bare word (perhaps a subroutine call) 14529 i identifier of some type (with leading %, $, @, *, &, sub ) 14530 n a number 14531 v a v-string 14532 F a file test operator (like -e) 14533 Y File handle 14534 Z identifier in indirect object slot: may be file handle, object 14535 J LABEL: code block label 14536 j LABEL after next, last, redo, goto 14537 p unary + 14538 m unary - 14539 pp pre-increment operator ++ 14540 mm pre-decrement operator -- 14541END_OF_LIST 14542} 14543 14544# This is a currently unused debug routine 14545sub dump_functions { 14546 14547 my $fh = *STDOUT; 14548 my ( $pkg, $sub ); 14549 foreach $pkg ( keys %is_user_function ) { 14550 print $fh "\nnon-constant subs in package $pkg\n"; 14551 14552 foreach $sub ( keys %{ $is_user_function{$pkg} } ) { 14553 my $msg = ""; 14554 if ( $is_block_list_function{$pkg}{$sub} ) { 14555 $msg = 'block_list'; 14556 } 14557 14558 if ( $is_block_function{$pkg}{$sub} ) { 14559 $msg = 'block'; 14560 } 14561 print $fh "$sub $msg\n"; 14562 } 14563 } 14564 14565 foreach $pkg ( keys %is_constant ) { 14566 print $fh "\nconstants and constant subs in package $pkg\n"; 14567 14568 foreach $sub ( keys %{ $is_constant{$pkg} } ) { 14569 print $fh "$sub\n"; 14570 } 14571 } 14572} 14573 14574sub prepare_for_a_new_file { 14575 $saw_negative_indentation = 0; 14576 $id_scan_state = ''; 14577 $statement_type = ''; # currently either '' or 'use' 14578 $last_nonblank_token = ';'; # the only possible starting state which 14579 $last_nonblank_type = ';'; # will make a leading brace a code block 14580 $last_nonblank_block_type = ''; 14581 $last_nonblank_container_type = ''; 14582 $last_nonblank_type_sequence = ''; 14583 $last_last_nonblank_token = ';'; 14584 $last_last_nonblank_type = ';'; 14585 $last_last_nonblank_block_type = ''; 14586 $last_last_nonblank_container_type = ''; 14587 $last_last_nonblank_type_sequence = ''; 14588 $last_nonblank_prototype = ""; 14589 $identifier = ''; 14590 $in_quote = 0; # flag telling if we are chasing a quote, and what kind 14591 $quote_type = 'Q'; 14592 $quote_character = ""; # character we seek if chasing a quote 14593 $quote_pos = 0; # next character index to check for case of alphanum char 14594 $quote_depth = 0; 14595 $allowed_quote_modifiers = ""; 14596 $paren_depth = 0; 14597 $brace_depth = 0; 14598 $square_bracket_depth = 0; 14599 $current_package = "main"; 14600 @current_depth[ 0 .. $#closing_brace_names ] = 14601 (0) x scalar @closing_brace_names; 14602 @nesting_sequence_number[ 0 .. $#closing_brace_names ] = 14603 ( 0 .. $#closing_brace_names ); 14604 @current_sequence_number = (); 14605 14606 $paren_type[$paren_depth] = ''; 14607 $paren_semicolon_count[$paren_depth] = 0; 14608 $brace_type[$brace_depth] = ';'; # identify opening brace as code block 14609 $brace_structural_type[$brace_depth] = ''; 14610 $brace_context[$brace_depth] = UNKNOWN_CONTEXT; 14611 $paren_structural_type[$brace_depth] = ''; 14612 $square_bracket_type[$square_bracket_depth] = ''; 14613 $square_bracket_structural_type[$square_bracket_depth] = ''; 14614 $brace_package[$paren_depth] = $current_package; 14615 %is_constant = (); # user-defined constants 14616 %is_user_function = (); # user-defined functions 14617 %user_function_prototype = (); # their prototypes 14618 %is_block_function = (); 14619 %is_block_list_function = (); 14620 %saw_function_definition = (); 14621 $unexpected_error_count = 0; 14622 $want_paren = ""; 14623 $context = UNKNOWN_CONTEXT; 14624 @slevel_stack = (); 14625 $ci_string_in_tokenizer = ""; 14626 $continuation_string_in_tokenizer = "0"; 14627 $in_statement_continuation = 0; 14628 @lower_case_labels_at = (); 14629 $saw_v_string = 0; # for warning of v-strings on older perl 14630 $nesting_token_string = ""; 14631 $nesting_type_string = ""; 14632 $nesting_block_string = '1'; # initially in a block 14633 $nesting_list_string = '0'; # initially not in a list 14634 $nearly_matched_here_target_at = undef; 14635} 14636 14637sub get_quote_target { 14638 return matching_end_token($quote_character); 14639} 14640 14641sub get_indentation_level { 14642 return $level_in_tokenizer; 14643} 14644 14645sub reset_indentation_level { 14646 $level_in_tokenizer = $_[0]; 14647 $slevel_in_tokenizer = $_[0]; 14648 push @slevel_stack, $slevel_in_tokenizer; 14649} 14650 14651{ # begin closure tokenize_this_line 14652 14653 use constant BRACE => 0; 14654 use constant SQUARE_BRACKET => 1; 14655 use constant PAREN => 2; 14656 use constant QUESTION_COLON => 3; 14657 14658 my ( 14659 $block_type, $container_type, $correct_expecting, 14660 $expecting, $here_doc_target, $here_quote_character, 14661 $i, $i_tok, $last_nonblank_i, 14662 $next_tok, $next_type, $prototype, 14663 $rtoken_map, $rtoken_type, $rtokens, 14664 $tok, $type, $type_sequence, 14665 ); 14666 14667 my @here_target_list = (); # list of here-doc target strings 14668 14669 # ------------------------------------------------------------ 14670 # beginning of various scanner interfaces to simplify coding 14671 # ------------------------------------------------------------ 14672 sub scan_bare_identifier { 14673 ( $i, $tok, $type, $prototype ) = 14674 scan_bare_identifier_do( $input_line, $i, $tok, $type, $prototype, 14675 $rtoken_map ); 14676 } 14677 14678 sub scan_identifier { 14679 ( $i, $tok, $type, $id_scan_state, $identifier ) = 14680 scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens ); 14681 } 14682 14683 sub scan_id { 14684 ( $i, $tok, $type, $id_scan_state ) = 14685 scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map, 14686 $id_scan_state ); 14687 } 14688 14689 my $number; 14690 14691 sub scan_number { 14692 ( $i, $type, $number ) = 14693 scan_number_do( $input_line, $i, $rtoken_map, $type ); 14694 } 14695 14696 # a sub to warn if token found where term expected 14697 sub error_if_expecting_TERM { 14698 if ( $expecting == TERM ) { 14699 if ( $really_want_term{$last_nonblank_type} ) { 14700 unexpected( $tok, "term", $i_tok, $last_nonblank_i ); 14701 1; 14702 } 14703 } 14704 } 14705 14706 # a sub to warn if token found where operator expected 14707 sub error_if_expecting_OPERATOR { 14708 if ( $expecting == OPERATOR ) { 14709 my $thing = defined $_[0] ? $_[0] : $tok; 14710 unexpected( $thing, "operator", $i_tok, $last_nonblank_i ); 14711 if ( $i_tok == 0 ) { 14712 interrupt_logfile(); 14713 warning("Missing ';' above?\n"); 14714 resume_logfile(); 14715 } 14716 1; 14717 } 14718 } 14719 14720 # ------------------------------------------------------------ 14721 # end scanner interfaces 14722 # ------------------------------------------------------------ 14723 14724 # ------------------------------------------------------------ 14725 # begin hash of code for handling most token types 14726 # ------------------------------------------------------------ 14727 my $tokenization_code = { 14728 '>' => sub { 14729 error_if_expecting_TERM() 14730 if ( $expecting == TERM ); 14731 }, 14732 '|' => sub { 14733 error_if_expecting_TERM() 14734 if ( $expecting == TERM ); 14735 }, 14736 '$' => sub { 14737 14738 # start looking for a scalar 14739 error_if_expecting_OPERATOR("Scalar") 14740 if ( $expecting == OPERATOR ); 14741 scan_identifier(); 14742 14743 if ( $identifier eq '$^W' ) { 14744 $tokenizer_self->{_saw_perl_dash_w} = 1; 14745 } 14746 14747 # Check for indentifier in indirect object slot 14748 # (vorboard.pl, sort.t) 14749 # /^(print|printf|sort|exec|system)$/ ) 14750 if ( 14751 ( $last_nonblank_token =~ /$indirect_object_taker/ ) 14752 14753 || ( ( $last_nonblank_token eq '(' ) 14754 && ( $paren_type[$paren_depth] =~ /$indirect_object_taker/ ) 14755 ) || ( $last_nonblank_type =~ /^[Uw]$/ ) # possible object 14756 ) 14757 { 14758 $type = 'Z'; 14759 } 14760 }, 14761 '(' => sub { 14762 14763 ++$paren_depth; 14764 $paren_semicolon_count[$paren_depth] = 0; 14765 if ($want_paren) { 14766 $container_type = $want_paren; 14767 $want_paren = ""; 14768 } 14769 else { 14770 $container_type = $last_nonblank_token; 14771 } 14772 $paren_type[$paren_depth] = $container_type; 14773 $type_sequence = increase_nesting_depth( PAREN, $i_tok ); 14774 14775 #------------------------------------------------------------- 14776 # FUTURE UPDATE: give almost all parens full indentation. 14777 # parens are trouble because the user is free to add almost 14778 # as many as desired, even if they are not necessary. And 14779 # they can be misleading, because perl flattens lists. 14780 #$type = '{'; 14781 #------------------------------------------------------------- 14782 14783 #print "at '(' i=$i last_type=$last_nonblank_type last_tok=$last_nonblank_token\n"; 14784 14785 if ( $context == LIST_CONTEXT && $last_nonblank_token eq '=' ) { 14786 $type = '{'; 14787 } 14788 14789 # propagate types down through nested parens 14790 # for example: the second paren in 'if ((' would be structural 14791 # since the first is. 14792 14793 elsif ( $last_nonblank_token eq '(' ) { 14794 $type = $last_nonblank_type; 14795 } 14796 14797 # make most lists structural, including 14798 # sub call parameter lists and &&, ||, ! 14799 14800=pod 14801 NOTE: type ',' is not here because it causes subtle 14802 problems with continuation indentation for something 14803 like this, where the first 'or' will not get indented. 14804 14805 assert( 14806 __LINE__, 14807 ( not defined $check ) 14808 or ref $check 14809 or $check eq "new" 14810 or $check eq "old", 14811 ); 14812=cut 14813 14814 # TESTING: added '.' and '=' and F 14815 elsif ( $last_nonblank_type =~ /^([xwiZUF!=\?:\.]|\|\||\&\&)$/ ) { 14816 $type = '{'; 14817 } 14818 14819 ############################################ 14820 # TESTING: 14821 ##elsif ( $last_nonblank_type !~ /^[,]$/ ) { 14822 ## $type = '{'; 14823 ##} 14824 ############################################ 14825 14826 # paren after keyword.. 14827 # map1.t 14828 elsif ( $last_nonblank_type eq 'k' ) { $type = '{' } 14829 14830 if ( $last_nonblank_type eq ')' ) { 14831 warning( 14832 "Syntax error? found token '$last_nonblank_type' then '('\n" 14833 ); 14834 } 14835 $paren_structural_type[$paren_depth] = $type; 14836 14837 }, 14838 ')' => sub { 14839 $type_sequence = decrease_nesting_depth( PAREN, $i_tok ); 14840 14841 if ( $paren_structural_type[$paren_depth] eq '{' ) { 14842 $type = '}'; 14843 } 14844 14845 $container_type = $paren_type[$paren_depth]; 14846 if ( $paren_type[$paren_depth] =~ /^(for|foreach)$/ ) { 14847 my $num_sc = $paren_semicolon_count[$paren_depth]; 14848 if ( $num_sc > 0 && $num_sc != 2 ) { 14849 warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n"); 14850 } 14851 } 14852 14853 if ( $paren_depth > 0 ) { $paren_depth-- } 14854 }, 14855 ',' => sub { 14856 if ( $last_nonblank_type eq ',' ) { 14857 complain("Repeated ','s \n"); 14858 } 14859## FIXME: need to move this elsewhere, perhaps check after a '(' 14860## elsif ($last_nonblank_token eq '(') { 14861## warning("Leading ','s illegal in some versions of perl\n"); 14862## } 14863 }, 14864 ';' => sub { 14865 $context = UNKNOWN_CONTEXT; 14866 $statement_type = ''; 14867 14868 if ( $paren_type[$paren_depth] =~ /^(for|foreach)$/ ) 14869 { # mark ; in for loop 14870 14871 # Be careful: we do not want a semicolon such as the 14872 # following to be included: 14873 # 14874 # for (sort {strcoll($a,$b);} keys %investments) { 14875 14876 if ( $brace_depth == $depth_array[PAREN][BRACE][$paren_depth] 14877 && $square_bracket_depth == 14878 $depth_array[PAREN][SQUARE_BRACKET][$paren_depth] ) 14879 { 14880 14881 $type = 'f'; 14882 $paren_semicolon_count[$paren_depth]++; 14883 } 14884 } 14885 14886 }, 14887 '"' => sub { 14888 error_if_expecting_OPERATOR("String") 14889 if ( $expecting == OPERATOR ); 14890 $in_quote = 1; 14891 $type = 'Q'; 14892 $allowed_quote_modifiers = ""; 14893 }, 14894 "'" => sub { 14895 error_if_expecting_OPERATOR("String") 14896 if ( $expecting == OPERATOR ); 14897 $in_quote = 1; 14898 $type = 'Q'; 14899 $allowed_quote_modifiers = ""; 14900 }, 14901 '`' => sub { 14902 error_if_expecting_OPERATOR("String") 14903 if ( $expecting == OPERATOR ); 14904 $in_quote = 1; 14905 $type = 'Q'; 14906 $allowed_quote_modifiers = ""; 14907 }, 14908 '/' => sub { 14909 my $is_pattern; 14910 14911 if ( $expecting == UNKNOWN ) { # indeterminte, must guess.. 14912 my $msg; 14913 ( $is_pattern, $msg ) = 14914 guess_if_pattern_or_division( $i, $rtokens, $rtoken_map ); 14915 14916 if ($msg) { 14917 write_diagnostics("DIVIDE:$msg\n"); 14918 write_logfile_entry($msg); 14919 } 14920 } 14921 else { $is_pattern = ( $expecting == TERM ) } 14922 14923 if ($is_pattern) { 14924 $in_quote = 1; 14925 $type = 'Q'; 14926 $allowed_quote_modifiers = '[cgimosx]'; 14927 } 14928 else { # not a pattern; check for a /= token 14929 14930 if ( $$rtokens[ $i + 1 ] eq '=' ) { # form token /= 14931 $i++; 14932 $tok = '/='; 14933 $type = $tok; 14934 } 14935 14936 #DEBUG - collecting info on what tokens follow a divide 14937 # for development of guessing algorithm 14938 #if ( numerator_expected( $i, $rtokens ) < 0 ) { 14939 # #write_diagnostics( "DIVIDE? $input_line\n" ); 14940 #} 14941 } 14942 }, 14943 '{' => sub { 14944 14945 # if we just saw a ')', we will label this block with 14946 # its type. We need to do this to allow sub 14947 # code_block_type to determine if this brace starts a 14948 # code block or anonymous hash. (The type of a paren 14949 # pair is the preceding token, such as 'if', 'else', 14950 # etc). 14951 $container_type = ""; 14952 if ( $last_nonblank_token eq ')' ) { 14953 $last_nonblank_token = $paren_type[ $paren_depth + 1 ]; 14954 14955 # defensive move in case of a nesting error (pbug.t) 14956 # in which this ')' had no previous '(' 14957 # this nesting error will have been caught 14958 if ( !defined($last_nonblank_token) ) { 14959 $last_nonblank_token = 'if'; 14960 } 14961 14962 # check for syntax error here; 14963 # expecting: (if|elsif|while|until|for|foreach) 14964 # Delete this if it is too redundant 14965 #unless ( $is_keyword{$last_nonblank_token} ) { 14966 unless ( $last_nonblank_token =~ 14967 /^(if|elsif|unless|while|until|for|foreach)$/ ) 14968 { 14969 warning( 14970"syntax error at ') {', didn't see (if|elsif|unless|while|until|for|foreach)\n" 14971 ); 14972 } 14973 } 14974 14975 # now identify which of the three possible types of 14976 # curly braces we have: hash index container, anonymous 14977 # hash reference, or code block. 14978 14979 # non-structural (hash index) curly brace pair 14980 # get marked 'L' and 'R' 14981 if ( is_non_structural_brace() ) { 14982 $type = 'L'; 14983 } 14984 14985 # code and anonymous hash have the same type, '{', but are 14986 # distinguished by 'block_type', 14987 # which will be blank for an anonymous hash 14988 else { 14989 $block_type = code_block_type(); 14990 } 14991 $brace_type[ ++$brace_depth ] = $block_type; 14992 $brace_package[$brace_depth] = $current_package; 14993 $type_sequence = increase_nesting_depth( BRACE, $i_tok ); 14994 $brace_structural_type[$brace_depth] = $type; 14995 $brace_context[$brace_depth] = $context; 14996 }, 14997 '}' => sub { 14998 $block_type = $brace_type[$brace_depth]; 14999 if ($block_type) { $statement_type = '' } 15000 15001 if ( defined( $brace_package[$brace_depth] ) ) { 15002 $current_package = $brace_package[$brace_depth]; 15003 } 15004 15005 # can happen on brace error (caught elsewhere) 15006 else { 15007 } 15008 $type_sequence = decrease_nesting_depth( BRACE, $i_tok ); 15009 15010 if ( $brace_structural_type[$brace_depth] eq 'L' ) { 15011 $type = 'R'; 15012 } 15013 15014 # propagate type information for 'do' and 'eval' blocks. 15015 # This is necessary to enable us to know if an operator 15016 # or term is expected next 15017 if ( $brace_type[$brace_depth] =~ /$block_operator/ ) { 15018 $tok = $brace_type[$brace_depth]; 15019 } 15020 15021 $context = $brace_context[$brace_depth]; 15022 if ( $brace_depth > 0 ) { $brace_depth--; } 15023 }, 15024 '&' => sub { # maybe sub call? start looking 15025 15026 # We have to check for sub call unless we are sure we 15027 # are expecting an operator. This example from s2p 15028 # got mistaken as a q operator in an early version: 15029 # print BODY &q(<<'EOT'); 15030 if ( $expecting != OPERATOR ) { 15031 scan_identifier(); 15032 } 15033 else { 15034 } 15035 }, 15036 '<' => sub { # angle operator or less than? 15037 15038 if ( $expecting != OPERATOR ) { 15039 ( $i, $type ) = 15040 find_angle_operator_termination( $input_line, $i, $rtoken_map, 15041 $expecting ); 15042 15043 } 15044 else { 15045 } 15046 }, 15047 '?' => sub { # ?: conditional or starting pattern? 15048 15049 my $is_pattern; 15050 15051 if ( $expecting == UNKNOWN ) { 15052 15053 my $msg; 15054 ( $is_pattern, $msg ) = 15055 guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map ); 15056 15057 if ($msg) { write_logfile_entry($msg) } 15058 } 15059 else { $is_pattern = ( $expecting == TERM ) } 15060 15061 if ($is_pattern) { 15062 $in_quote = 1; 15063 $type = 'Q'; 15064 $allowed_quote_modifiers = '[cgimosx]'; # TBD:check this 15065 } 15066 else { 15067 15068 $type_sequence = 15069 increase_nesting_depth( QUESTION_COLON, $i_tok ); 15070 } 15071 }, 15072 '*' => sub { # typeglob, or multiply? 15073 15074 if ( $expecting == TERM ) { 15075 scan_identifier(); 15076 } 15077 else { 15078 15079 if ( $$rtokens[ $i + 1 ] eq '=' ) { 15080 $tok = '*='; 15081 $type = $tok; 15082 $i++; 15083 } 15084 elsif ( $$rtokens[ $i + 1 ] eq '*' ) { 15085 $tok = '**'; 15086 $type = $tok; 15087 $i++; 15088 if ( $$rtokens[ $i + 1 ] eq '=' ) { 15089 $tok = '**='; 15090 $type = $tok; 15091 $i++; 15092 } 15093 } 15094 } 15095 }, 15096 '.' => sub { # what kind of . ? 15097 15098 if ( $expecting != OPERATOR ) { 15099 scan_number(); 15100 if ( $type eq '.' ) { 15101 error_if_expecting_TERM() 15102 if ( $expecting == TERM ); 15103 } 15104 } 15105 else { 15106 } 15107 }, 15108 ':' => sub { 15109 15110 $type_sequence = decrease_nesting_depth( QUESTION_COLON, $i_tok ); 15111 if ( $last_nonblank_token eq '?' ) { 15112 warning("Syntax error near ? :\n"); 15113 } 15114 }, 15115 '+' => sub { # what kind of plus? 15116 15117 if ( $expecting == TERM ) { 15118 scan_number(); 15119 15120 # unary plus is safest assumption if not a number 15121 if ( !defined($number) ) { $type = 'p'; } 15122 } 15123 elsif ( $expecting == OPERATOR ) { 15124 } 15125 else { 15126 if ( $next_type eq 'w' ) { $type = 'p' } 15127 } 15128 }, 15129 '@' => sub { 15130 15131 error_if_expecting_OPERATOR("Array") 15132 if ( $expecting == OPERATOR ); 15133 scan_identifier(); 15134 }, 15135 '%' => sub { # hash or modulo? 15136 15137 # first guess is hash if no following blank 15138 if ( $expecting == UNKNOWN ) { 15139 if ( $next_type ne 'b' ) { $expecting = TERM } 15140 } 15141 if ( $expecting == TERM ) { 15142 scan_identifier(); 15143 } 15144 }, 15145 '[' => sub { 15146 $square_bracket_type[ ++$square_bracket_depth ] = 15147 $last_nonblank_token; 15148 $type_sequence = increase_nesting_depth( SQUARE_BRACKET, $i_tok ); 15149 15150 # It may seem odd, but structural square brackets have 15151 # type '{' and '}'. This simplifies the indentation logic. 15152 if ( !is_non_structural_brace() ) { 15153 $type = '{'; 15154 } 15155 $square_bracket_structural_type[$square_bracket_depth] = $type; 15156 }, 15157 ']' => sub { 15158 $type_sequence = decrease_nesting_depth( SQUARE_BRACKET, $i_tok ); 15159 15160 if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' ) 15161 { 15162 $type = '}'; 15163 } 15164 if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; } 15165 }, 15166 '-' => sub { # what kind of minus? 15167 15168 if ( ( $expecting != OPERATOR ) 15169 && $is_file_test_operator{$next_tok} ) 15170 { 15171 $i++; 15172 $tok .= $next_tok; 15173 $type = 'F'; 15174 } 15175 elsif ( $expecting == TERM ) { 15176 scan_number(); 15177 15178 # maybe part of bareword token? unary is safest 15179 if ( !defined($number) ) { $type = 'm'; } 15180 15181 } 15182 elsif ( $expecting == OPERATOR ) { 15183 } 15184 else { 15185 15186 if ( $next_type eq 'w' ) { 15187 $type = 'm'; 15188 } 15189 } 15190 }, 15191 15192 '^' => sub { 15193 15194 # check for special variables like ${^WARNING_BITS} 15195 if ( $expecting == TERM ) { 15196 15197 # FIXME: this should work but will not catch errors 15198 # because we also have to be sure that previous token is 15199 # a type character ($,@,%). 15200 if ( $last_nonblank_token eq '{' 15201 && ( $next_tok =~ /^[A-Za-z_]/ ) ) 15202 { 15203 15204 if ( $next_tok eq 'W' ) { 15205 $tokenizer_self->{_saw_perl_dash_w} = 1; 15206 } 15207 $tok = $tok . $next_tok; 15208 $i = $i + 1; 15209 $type = 'w'; 15210 } 15211 15212 else { 15213 unless ( error_if_expecting_TERM() ) { 15214 15215 # Something like this is valid but strange: 15216 # undef ^I; 15217 complain("The '^' seems unusual here\n"); 15218 } 15219 } 15220 } 15221 }, 15222 15223 '::' => sub { # probably a sub call 15224 scan_bare_identifier(); 15225 }, 15226 '<<' => sub { # maybe a here-doc? 15227 return 15228 unless ( $i < $max_token_index ) 15229 ; # here-doc not possible if end of line 15230 15231 if ( $expecting != OPERATOR ) { 15232 my ($found_target); 15233 ( $found_target, $here_doc_target, $here_quote_character, $i ) = 15234 find_here_doc( $expecting, $i, $rtokens, $rtoken_map ); 15235 15236 if ($found_target) { 15237 push @here_target_list, 15238 [ $here_doc_target, $here_quote_character ]; 15239 $type = 'h'; 15240 if ( length($here_doc_target) > 80 ) { 15241 my $truncated = substr( $here_doc_target, 0, 80 ); 15242 complain("Long here-target: '$truncated' ...\n"); 15243 } 15244 elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) { 15245 complain( 15246 "Unconventional here-target: '$here_doc_target'\n" 15247 ); 15248 } 15249 } 15250 elsif ( $expecting == TERM ) { 15251 15252 # shouldn't happen.. 15253 warning("Program bug; didn't find here doc target\n"); 15254 report_definite_bug(); 15255 } 15256 } 15257 else { 15258 } 15259 }, 15260 '->' => sub { 15261 15262 # if -> points to a bare word, we must scan for an identifier, 15263 # otherwise something like ->y would look like the y operator 15264 scan_identifier(); 15265 }, 15266 15267 # type = 'pp' for pre-increment, '++' for post-increment 15268 '++' => sub { 15269 if ( $expecting == TERM ) { $type = 'pp' } 15270 }, 15271 15272 '=>' => sub { 15273 if ( $last_nonblank_type eq $tok ) { 15274 complain("Repeated '=>'s \n"); 15275 } 15276 }, 15277 15278 # type = 'mm' for pre-decrement, '--' for post-decrement 15279 '--' => sub { 15280 15281 if ( $expecting == TERM ) { $type = 'mm' } 15282 }, 15283 15284 '&&' => sub { 15285 error_if_expecting_TERM() 15286 if ( $expecting == TERM ); 15287 }, 15288 15289 '||' => sub { 15290 error_if_expecting_TERM() 15291 if ( $expecting == TERM ); 15292 }, 15293 }; 15294 15295 # ------------------------------------------------------------ 15296 # end hash of code for handling individual token types 15297 # ------------------------------------------------------------ 15298 15299 sub tokenize_this_line { 15300 15301=pod 15302 15303This routine breaks a line of perl code into tokens which are of use in 15304indentation and reformatting. One of my goals has been to define tokens 15305such that a newline may be inserted between any pair of tokens without 15306changing or invalidating the program. This version comes close to this, 15307although there are necessarily a few exceptions which must be caught by 15308the formatter. Many of these involve the treatment of bare words. 15309 15310The tokens and their types are returned in arrays. See previous 15311routine for their names. 15312 15313See also the array "valid_token_types" in the BEGIN section for an 15314up-to-date list. 15315 15316To simplify things, token types are either a single character, or they 15317are identical to the tokens themselves. 15318 15319As a debugging aid, the -D flag creates a file containing a side-by-side 15320comparison of the input string and its tokenization for each line of a file. 15321This is an invaluable debugging aid. 15322 15323In addition to tokens, and some associated quantities, the tokenizer 15324also returns flags indication any special line types. These include 15325quotes, here_docs, formats. 15326 15327----------------------------------------------------------------------- 15328 15329How to add NEW_TOKENS: 15330 15331New token types will undoubtedly be needed in the future both to keep up 15332with changes in perl and to help adapt the tokenizer to other applications. 15333 15334Here are some notes on the minimal steps. I wrote these notes while 15335adding the 'v' token type for v-strings, which are things like version 15336numbers 5.6.0, and ip addresses, and will use that as an example. ( You 15337can use your editor to search for the string "NEW_TOKENS" to find the 15338appropriate sections to change): 15339 15340*. Try to talk somebody else into doing it! If not, .. 15341 15342*. Make a backup of your current version in case things don't work out! 15343 15344*. Think of a new, unused character for the token type, and add to 15345the array @valid_token_types in the BEGIN section of this package. 15346For example, I used 'v' for v-strings. 15347 15348*. Implement coding to recognize the $type of the token in this routine. 15349This is the hardest part, and is best done by immitating or modifying 15350some of the existing coding. For example, to recognize v-strings, I 15351patched 'sub scan_bare_identifier' to recognize v-strings beginning with 15352'v' and 'sub scan_number' to recognize v-strings without the leading 'v'. 15353 15354*. Update sub operator_expected. This update is critically important but 15355the coding is trivial. Look at the comments in that routine for help. 15356For v-strings, which should behave like numbers, I just added 'v' to the 15357regex used to handle numbers and strings (types 'n' and 'Q'). 15358 15359*. Implement a 'bond strength' rule in sub set_bond_strengths in 15360PerlTidy::Formatter for breaking lines around this token type. You can 15361skip this step and take the default at first, then adjust later to get 15362desired results. For adding type 'v', I looked at sub bond_strength and 15363saw that number type 'n' was using default strengths, so I didn't do 15364anything. I may tune it up someday if I don't like the way line 15365breaks with v-strings look. 15366 15367*. Implement a 'whitespace' rule in sub set_white_space_flag in 15368PerlTidy::Formatter. For adding type 'v', I looked at this routine 15369and saw that type 'n' used spaces on both sides, so I just added 'v' 15370to the array @spaces_both_sides. 15371 15372*. Update HtmlWriter package so that users can colorize the token as 15373desired. This is quite easy; see comments identified by 'NEW_TOKENS' in 15374that package. For v-strings, I initially chose to use a default color 15375equal to the default for numbers, but it might be nice to change that 15376eventually. 15377 15378*. Update comments in PerlTidy::Tokenizer::dump_token_types. 15379 15380*. Run lots and lots of debug tests. Start with special files designed 15381to test the new token type. Run with the -D flag to create a .DEBUG 15382file which shows the tokenization. When these work ok, test as many old 15383scripts as possible. Start with all of the '.t' files in the 'test' 15384directory of the distribution file. Compare .tdy output with previous 15385version and updated version to see the differences. Then include as 15386many more files as possible. My own technique has been to collect a huge 15387number of perl scripts (thousands!) into one directory and run perltidy 15388*, then run diff between the output of the previous version and the 15389current version. 15390 15391----------------------------------------------------------------------- 15392 15393=cut 15394 15395 my $line_of_tokens = shift; 15396 my ($untrimmed_input_line) = $line_of_tokens->{_line_text}; 15397 15398 # patch while coding change is underway 15399 # make callers private data to allow access 15400 # $tokenizer_self = $caller_tokenizer_self; 15401 15402 # extract line number for use in error messages 15403 $input_line_number = $line_of_tokens->{_line_number}; 15404 15405 # check for pod documentation 15406 if ( ( $untrimmed_input_line =~ /^=[A-Za-z_]/ ) ) { 15407 15408 # must not be in multi-line quote 15409 # and must not be in an eqn 15410 if ( !$in_quote and ( operator_expected( 'b', '=', 'b' ) == TERM ) ) 15411 { 15412 $tokenizer_self->{_in_pod} = 1; 15413 return; 15414 } 15415 } 15416 15417 $input_line = $untrimmed_input_line; 15418 15419 chomp $input_line; 15420 15421 # trim start of this line unless we are continuing a quoted line 15422 # do not trim end because we might end in a quote (test: deken4.pl) 15423 # PerlTidy::Formatter will delete needless trailing blanks 15424 unless ( $in_quote && ( $quote_type eq 'Q' ) ) { 15425 $input_line =~ s/^\s*//; # trim left end 15426 } 15427 15428 # initialize for the main loop 15429 my @output_token_list = (); # stack of output token indexes 15430 my @output_token_type = (); # token types 15431 my @output_block_type = (); # types of code block 15432 my @output_container_type = (); # paren types, such as if, elsif, .. 15433 my @output_type_sequence = (); # nesting sequential number 15434 15435 $tok = $last_nonblank_token; 15436 $type = $last_nonblank_type; 15437 $prototype = $last_nonblank_prototype; 15438 $last_nonblank_i = -1; 15439 $block_type = $last_nonblank_block_type; 15440 $container_type = $last_nonblank_container_type; 15441 $type_sequence = $last_nonblank_type_sequence; 15442 @here_target_list = (); # list of here-doc target strings 15443 15444 $peeked_ahead = 0; 15445 15446 # tokenization is done in two stages.. 15447 # stage 1 is a very simple pre-tokenization 15448 my $max_tokens_wanted = 0; # this signals pre_tokenize to get all tokens 15449 15450 # a little optimization for a full-line comment 15451 if ( !$in_quote && ( $input_line =~ /^#/ ) ) { 15452 $max_tokens_wanted = 1 # no use tokenizing a comment 15453 } 15454 15455 # start by breaking the line into pre-tokens 15456 ( $rpretokens, $rpretoken_map, $rpretoken_type ) = 15457 pre_tokenize( $input_line, $max_tokens_wanted ); 15458 15459 $max_token_index = scalar(@$rpretokens) - 1; 15460 push ( @$rpretokens, ' ', ' ', ' ' ) 15461 ; # extra whitespace simplifies logic 15462 push ( @$rpretoken_map, 0, 0, 0 ); # shouldn't be referenced 15463 push ( @$rpretoken_type, 'b', 'b', 'b' ); 15464 15465 # temporary copies while coding change is underway 15466 ( $rtokens, $rtoken_map, $rtoken_type ) = 15467 ( $rpretokens, $rpretoken_map, $rpretoken_type ); 15468 15469 # initialize for main loop 15470 for $i ( 0 .. $max_token_index + 3 ) { 15471 $output_token_type[$i] = ""; 15472 $output_block_type[$i] = ""; 15473 $output_container_type[$i] = ""; 15474 $output_type_sequence[$i] = ""; 15475 } 15476 $i = -1; 15477 $i_tok = -1; 15478 15479 # ------------------------------------------------------------ 15480 # begin main tokenization loop 15481 # ------------------------------------------------------------ 15482 15483 # we are looking at each pre-token of one line and combining them 15484 # into tokens 15485 while ( ++$i <= $max_token_index ) { 15486 15487 if ($in_quote) { # continue looking for end of a quote 15488 $type = $quote_type; 15489 15490 unless (@output_token_list) { # initialize if continuation line 15491 push ( @output_token_list, $i ); 15492 $output_token_type[$i] = $type; 15493 15494 } 15495 $tok = $quote_character unless ( $quote_character =~ /^\s*$/ ); 15496 15497 # scan for the end of the quote or pattern 15498 ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) = 15499 do_quote( $i, $in_quote, $quote_character, $quote_pos, 15500 $quote_depth, $rtokens, $rtoken_map ); 15501 15502 # all done if we didn't find it 15503 last if ($in_quote); 15504 15505 # re-initialize for next search 15506 $quote_character = ''; 15507 $quote_pos = 0; 15508 $quote_type = 'Q'; 15509 last if ( ++$i > $max_token_index ); 15510 15511 # look for any modifiers 15512 if ($allowed_quote_modifiers) { 15513 15514 # check for exact quote modifiers 15515 if ( $$rtokens[$i] =~ /^[A-Za-z_]/ ) { 15516 my $str = $$rtokens[$i]; 15517 while ( $str =~ /\G$allowed_quote_modifiers/gc ) { } 15518 15519 if ( defined( pos($str) ) ) { 15520 15521 # matched 15522 if ( pos($str) == length($str) ) { 15523 last if ( ++$i > $max_token_index ); 15524 } 15525 15526 # Looks like a joined quote modifier 15527 # and keyword, maybe something like 15528 # s/xxx/yyy/gefor @k=... 15529 # Example is "galgen.pl". Would have to split 15530 # the word and insert a new token in the 15531 # pre-token list. This is so rare that I haven't 15532 # done it. Will just issue a warning citation. 15533 15534 # This error might also be triggered if my quote 15535 # modifier characters are incomplete 15536 else { 15537 warning(<<EOM); 15538 15539Partial match to quote modifier $allowed_quote_modifiers at word: '$str' 15540Please put a space between quote modifiers and trailing keywords. 15541EOM 15542 15543 # print "token $$rtokens[$i]\n"; 15544 # my $num = length($str) - pos($str); 15545 # $$rtokens[$i]=substr($$rtokens[$i],pos($str),$num); 15546 # print "continuing with new token $$rtokens[$i]\n"; 15547 15548 # skipping past this token does least damage 15549 last if ( ++$i > $max_token_index ); 15550 } 15551 } 15552 else { 15553 15554 # example file: rokicki4.pl 15555 # This error might also be triggered if my quote 15556 # modifier characters are incomplete 15557 write_logfile_entry( 15558"Note: found word $str at quote modifier location\n" 15559 ); 15560 } 15561 } 15562 15563 # re-initialize 15564 $allowed_quote_modifiers = ""; 15565 } 15566 } 15567 15568 unless ( $tok =~ /^\s*$/ ) { 15569 15570 # try to catch some common errors 15571 if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) { 15572 15573 if ( $last_nonblank_token eq 'eq' ) { 15574 complain("Should 'eq' be '==' here ?\n"); 15575 } 15576 elsif ( $last_nonblank_token eq 'ne' ) { 15577 complain("Should 'ne' be '!=' here ?\n"); 15578 } 15579 } 15580 $last_last_nonblank_token = $last_nonblank_token; 15581 $last_last_nonblank_type = $last_nonblank_type; 15582 $last_last_nonblank_block_type = $last_nonblank_block_type; 15583 $last_last_nonblank_container_type = 15584 $last_nonblank_container_type; 15585 $last_last_nonblank_type_sequence = 15586 $last_nonblank_type_sequence; 15587 $last_nonblank_token = $tok; 15588 $last_nonblank_type = $type; 15589 $last_nonblank_prototype = $prototype; 15590 $last_nonblank_block_type = $block_type; 15591 $last_nonblank_container_type = $container_type; 15592 $last_nonblank_type_sequence = $type_sequence; 15593 $last_nonblank_i = $i_tok; 15594 } 15595 15596 # store previous token type 15597 if ( $i_tok >= 0 ) { 15598 $output_token_type[$i_tok] = $type; 15599 $output_block_type[$i_tok] = $block_type; 15600 $output_container_type[$i_tok] = $container_type; 15601 $output_type_sequence[$i_tok] = $type_sequence; 15602 } 15603 my $pre_tok = $$rtokens[$i]; # get the next pre-token 15604 my $pre_type = $$rtoken_type[$i]; # and type 15605 $tok = $pre_tok; 15606 $type = $pre_type; # to be modified as necessary 15607 $block_type = ""; # blank for all tokens except code block braces 15608 $container_type = ""; # blank for all tokens except some parens 15609 $type_sequence = ""; # blank for all tokens except ?/: 15610 $prototype = ""; # blank for all tokens except user defined subs 15611 $i_tok = $i; 15612 15613 # this pre-token will start an output token 15614 push ( @output_token_list, $i_tok ); 15615 15616 # continue gathering identifier if necessary 15617 # but do not start on blanks and comments 15618 if ( $id_scan_state && $pre_type !~ /[b#]/ ) { 15619 15620 if ( $id_scan_state =~ /(sub|package)/ ) { 15621 scan_id(); 15622 } 15623 else { 15624 scan_identifier(); 15625 } 15626 15627 last if ($id_scan_state); 15628 next if ( ( $i > 0 ) || $type ); 15629 15630 # didn't find any token; start over 15631 $type = $pre_type; 15632 $tok = $pre_tok; 15633 } 15634 15635 # handle whitespace tokens.. 15636 next if ( $type eq 'b' ); 15637 my $prev_tok = $i > 0 ? $$rtokens[ $i - 1 ] : ' '; 15638 my $prev_type = $i > 0 ? $$rtoken_type[ $i - 1 ] : 'b'; 15639 15640 # Build larger tokens where possible, since we are not in a quote. 15641 # 15642 # First try to assemble digraphs. The following tokens are 15643 # excluded and handled specially: 15644 # '/=' is excluded because the / might start a pattern. 15645 # 'x=' is excluded since it might be $x=, with $ on previous line 15646 # '**' and *= might be typeglobs of punctuation variables 15647 # I have allowed tokens starting with <, such as <=, 15648 # because I don't think these could be valid angle operators. 15649 # test file: storrs4.pl 15650 my $test_tok = $tok . $$rtokens[ $i + 1 ]; 15651 15652 if ( 15653 $is_digraph{$test_tok} 15654 && ( $test_tok ne '/=' ) # might be pattern 15655 && ( $test_tok ne 'x=' ) # might be $x 15656 && ( $test_tok ne '**' ) # typeglob? 15657 && ( $test_tok ne '*=' ) # typeglob? 15658 ) 15659 { 15660 $tok = $test_tok; 15661 $i++; 15662 15663 # Now try to assemble trigraphs. Note that all possible 15664 # perl trigraphs can be constructed by appending a character 15665 # to a digraph. 15666 $test_tok = $tok . $$rtokens[ $i + 1 ]; 15667 15668 if ( $is_trigraph{$test_tok} ) { 15669 $tok = $test_tok; 15670 $i++; 15671 } 15672 } 15673 $type = $tok; 15674 $next_tok = $$rtokens[ $i + 1 ]; 15675 $next_type = $$rtoken_type[ $i + 1 ]; 15676 $expecting = operator_expected( $prev_type, $tok, $next_type ); 15677 15678 # This debug mode forces perltidy to rely exclusively on its 15679 # guessing algorithms. This is a simple way to test them. 15680 TOKENIZER_DEBUG_FLAG_GUESS && do { 15681 $correct_expecting = $expecting; 15682 $expecting = UNKNOWN; 15683 }; 15684 15685 TOKENIZER_DEBUG_FLAG_TOKENIZE && do { 15686 local $" = ')('; 15687 my @debug_list = ( 15688 $last_nonblank_token, $tok, 15689 $next_tok, $brace_depth, 15690 $brace_type[$brace_depth], $paren_depth, 15691 $paren_type[$paren_depth] 15692 ); 15693 print "TOKENIZE:(@debug_list)\n"; 15694 }; 15695 15696 ############################################################### 15697 # We have the next token, $tok. 15698 # Now we have to examine this token and decide what it is 15699 # and define its $type 15700 # 15701 # section 1: bare words 15702 ############################################################### 15703 15704 if ( $pre_type eq 'w' ) { 15705 my ( $next_nonblank_token, $i_next ) = 15706 find_next_nonblank_token( $i, $rtokens ); 15707 15708 # quote a word followed by => operator 15709 if ( $next_nonblank_token eq '=' ) { 15710 15711 if ( $$rtokens[ $i_next + 1 ] eq '>' ) { 15712 if ( $is_constant{$current_package}{$tok} ) { 15713 $type = 'C'; 15714 } 15715 elsif ( $is_user_function{$current_package}{$tok} ) { 15716 $type = 'U'; 15717 $prototype = 15718 $user_function_prototype{$current_package}{$tok}; 15719 } 15720 elsif ( $tok =~ /^v\d+$/ ) { 15721 $type = 'v'; 15722 unless ($saw_v_string) { report_v_string($tok) } 15723 } 15724 else { $type = 'w' } 15725 15726 next; 15727 } 15728 } 15729 15730 # quote a bare word within braces..like xxx->{s}; note that we 15731 # must be sure this is not a structural brace, to avoid 15732 # mistaking {s} in the following for a quoted bare word: 15733 # for(@[){s}bla}BLA} 15734 if ( ( $last_nonblank_type eq 'L' ) 15735 && ( $next_nonblank_token eq '}' ) ) 15736 { 15737 $type = 'w'; 15738 next; 15739 } 15740 15741 # handle operator x (now we know it isn't $x=) 15742 if ( ( $tok =~ /^x\d*$/ ) && ( $expecting == OPERATOR ) ) { 15743 if ( $tok eq 'x' ) { 15744 15745 if ( $$rtokens[ $i + 1 ] eq '=' ) { # x= 15746 $tok = 'x='; 15747 $type = $tok; 15748 $i++; 15749 } 15750 else { 15751 $type = 'x'; 15752 } 15753 } 15754 15755 # FIXME: Patch: mark something like x4 as an integer for now 15756 # It gets fixed downstream. This is easier than 15757 # splitting the pretoken. 15758 else { 15759 $type = 'n'; 15760 } 15761 } 15762 15763 elsif ( ( $tok eq 'strict' ) 15764 and ( $last_nonblank_token eq 'use' ) ) 15765 { 15766 $tokenizer_self->{_saw_use_strict} = 1; 15767 scan_bare_identifier(); 15768 } 15769 15770 elsif ( ( $tok eq 'warnings' ) 15771 and ( $last_nonblank_token eq 'use' ) ) 15772 { 15773 $tokenizer_self->{_saw_perl_dash_w} = 1; 15774 15775 # scan as identifier, so that we pick up something like: 15776 # use warnings::register 15777 scan_bare_identifier(); 15778 } 15779 15780 elsif ( 15781 $tok eq 'AutoLoader' 15782 && $tokenizer_self->{_look_for_autoloader} 15783 && ( 15784 $last_nonblank_token eq 'use' 15785 15786 # these regexes are from AutoSplit.pm, which we want 15787 # to mimic 15788 || $input_line =~ /^\s*(use|require)\s+AutoLoader\b/ 15789 || $input_line =~ /\bISA\s*=.*\bAutoLoader\b/ 15790 ) 15791 ) 15792 { 15793 write_logfile_entry("AutoLoader seen, -nlal deactivates\n"); 15794 $tokenizer_self->{_saw_autoloader} = 1; 15795 $tokenizer_self->{_look_for_autoloader} = 0; 15796 scan_bare_identifier(); 15797 } 15798 15799 elsif ( 15800 $tok eq 'SelfLoader' 15801 && $tokenizer_self->{_look_for_selfloader} 15802 && ( $last_nonblank_token eq 'use' 15803 || $input_line =~ /^\s*(use|require)\s+SelfLoader\b/ 15804 || $input_line =~ /\bISA\s*=.*\bSelfLoader\b/ ) 15805 ) 15806 { 15807 write_logfile_entry("SelfLoader seen, -nlsl deactivates\n"); 15808 $tokenizer_self->{_saw_selfloader} = 1; 15809 $tokenizer_self->{_look_for_selfloader} = 0; 15810 scan_bare_identifier(); 15811 } 15812 15813 elsif ( ( $tok eq 'constant' ) 15814 and ( $last_nonblank_token eq 'use' ) ) 15815 { 15816 scan_bare_identifier(); 15817 my ( $next_nonblank_token, $i_next ) = 15818 find_next_nonblank_token( $i, $rtokens ); 15819 15820 if ($next_nonblank_token) { 15821 15822 if ( $is_keyword{$next_nonblank_token} ) { 15823 warning( 15824"Attempting to define constant '$next_nonblank_token' which is a perl keyword\n" 15825 ); 15826 } 15827 15828 # FIXME: could check for error in which next token is not 15829 # a word (number, punctuation, ..) 15830 else { 15831 $is_constant{$current_package} 15832 {$next_nonblank_token} = 1; 15833 } 15834 } 15835 } 15836 15837 elsif ( $tok eq 's' ) { 15838 error_if_expecting_OPERATOR() 15839 if ( $expecting == OPERATOR ); 15840 $in_quote = 2; # starting first of two patterns/quotes 15841 15842 # NOTE: camel 3 says egimosx, but 'c' is accepeted by perl 15843 $allowed_quote_modifiers = '[cegimosx]'; 15844 $type = 'Q'; 15845 } 15846 15847 elsif ( $tok =~ /^((y)|(tr))$/ ) { # built-in functions 15848 error_if_expecting_OPERATOR() 15849 if ( $expecting == OPERATOR ); 15850 $in_quote = 2; # starting first of two patterns/quotes 15851 $allowed_quote_modifiers = '[cds]'; 15852 $type = 'Q'; 15853 } 15854 15855 elsif ( $tok eq 'm' ) { # match operator 15856 error_if_expecting_OPERATOR() 15857 if ( $expecting == OPERATOR ); 15858 $in_quote = 1; 15859 $allowed_quote_modifiers = '[cgimosx]'; 15860 $type = 'Q'; 15861 } 15862 15863 elsif ( $tok =~ /^(q|qq|qw|qx)$/ ) { # various quote operators 15864 error_if_expecting_OPERATOR() 15865 if ( $expecting == OPERATOR ); 15866 $in_quote = 1; 15867 $allowed_quote_modifiers = ""; 15868 15869 # All quote types are 'Q' except possibly qw quotes. 15870 # qw quotes are special in that they may generally be trimmed 15871 # of leading and trailing whitespace. So they are given a 15872 # separate type, 'q', unless requested otherwise. 15873 $type = 15874 ( $tok eq 'qw' && $tokenizer_self->{_trim_qw} ) 15875 ? 'q' 15876 : 'Q'; 15877 $quote_type = $type; 15878 } 15879 15880 elsif ( $tok =~ /^(qr)$/ ) { 15881 error_if_expecting_OPERATOR() 15882 if ( $expecting == OPERATOR ); 15883 $in_quote = 1; 15884 $allowed_quote_modifiers = '[imosx]'; # camel 3 p 147 15885 $type = 'Q'; 15886 $quote_type = $type; 15887 } 15888 15889 # check for a statement label 15890 elsif ( ( $next_nonblank_token eq ':' ) 15891 && ( $$rtokens[ $i_next + 1 ] ne ':' ) 15892 && label_ok() ) 15893 { 15894 if ( $tok !~ /A-Z/ ) { 15895 push @lower_case_labels_at, $input_line_number; 15896 } 15897 $type = 'J'; 15898 $tok .= ':'; 15899 $i = $i_next; 15900 next; 15901 } 15902 15903 elsif ( $tok =~ /^(sub|package)$/ ) { 15904 error_if_expecting_OPERATOR() 15905 if ( $expecting == OPERATOR ); 15906 scan_id(); 15907 } 15908 15909 # Note on token types for format, __DATA__, __END__: 15910 # It simplifies things to give these type ';', so that when we 15911 # start rescanning we will be expecting a token of type TERM. 15912 # We will switch to type 'k' before outputting the tokens. 15913 elsif ( $tok eq 'format' ) { 15914 $type = ';'; # make tokenizer look for TERM next 15915 $tokenizer_self->{_in_format} = 1; 15916 last; 15917 } 15918 15919 elsif ( $tok eq '__DATA__' ) { 15920 $tokenizer_self->{_in_data} = $tok; 15921 $type = ';'; # make tokenizer look for TERM next 15922 last; 15923 } 15924 15925 elsif ( $tok eq '__END__' ) { 15926 $tokenizer_self->{_in_end} = $tok; 15927 $type = ';'; # make tokenizer look for TERM next 15928 last; 15929 } 15930 15931 elsif ( $is_keyword{$tok} ) { 15932 $type = 'k'; 15933 15934 # Since for and foreach may not be followed immediately 15935 # by an opening paren, we have to remember which keyword 15936 # is associated with the next '(' 15937 if ( $tok =~ /^(for|foreach)$/ ) { 15938 if ( new_statement_ok() ) { 15939 $want_paren = $tok; 15940 } 15941 } 15942 15943 # recognize 'use' statements, which are special 15944 elsif ( $tok =~ /^(use|require)$/ ) { 15945 $statement_type = $tok; 15946 error_if_expecting_OPERATOR() 15947 if ( $expecting == OPERATOR ); 15948 } 15949 } 15950 15951 # check for inline label 15952 elsif ( ( $last_nonblank_type eq 'k' ) 15953 && ( $last_nonblank_token =~ /^(redo|last|next|goto)$/ ) ) 15954 { 15955 $type = 'j'; 15956 next; 15957 } 15958 15959 # something else -- 15960 else { 15961 15962 scan_bare_identifier(); 15963 if ( $type eq 'w' ) { 15964 error_if_expecting_OPERATOR("bareword") 15965 if ( $expecting == OPERATOR ); 15966 15967 # mark bare words immediately followed by a paren as 15968 # functions 15969 $next_tok = $$rtokens[ $i + 1 ]; 15970 if ( $next_tok eq '(' ) { 15971 $type = 'U'; 15972 } 15973 15974 # mark bare words following a file test operator as 15975 # something that will expect an operator next. 15976 # patch 072901: unless followed immediately by a paren, 15977 # in which case it must be a function call (pid.t) 15978 if ( $last_nonblank_type eq 'F' && $next_tok ne '(' ) { 15979 $type = 'C'; 15980 } 15981 } 15982 } 15983 } 15984 15985 ############################################################### 15986 # section 2: strings of digits 15987 ############################################################### 15988 elsif ( $pre_type eq 'd' ) { 15989 error_if_expecting_OPERATOR("Number") 15990 if ( $expecting == OPERATOR ); 15991 scan_number(); 15992 if ( !defined($number) ) { 15993 15994 # shouldn't happen - we should always get a number 15995 warning("non-number beginning with digit--program bug\n"); 15996 report_definite_bug(); 15997 } 15998 } 15999 16000 ############################################################### 16001 # section 3: all other tokens 16002 ############################################################### 16003 16004 else { 16005 last if ( $tok eq '#' ); 16006 my $code = $tokenization_code->{$tok}; 16007 $code->() if $code; 16008 redo if $in_quote; 16009 } 16010 } 16011 16012 # ----------------------------- 16013 # end of main tokenization loop 16014 # ----------------------------- 16015 16016 if ( $i_tok >= 0 ) { 16017 $output_token_type[$i_tok] = $type; 16018 $output_block_type[$i_tok] = $block_type; 16019 $output_container_type[$i_tok] = $container_type; 16020 $output_type_sequence[$i_tok] = $type_sequence; 16021 } 16022 16023 unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) { 16024 $last_last_nonblank_token = $last_nonblank_token; 16025 $last_last_nonblank_type = $last_nonblank_type; 16026 $last_last_nonblank_block_type = $last_nonblank_block_type; 16027 $last_last_nonblank_container_type = $last_nonblank_container_type; 16028 $last_last_nonblank_type_sequence = $last_nonblank_type_sequence; 16029 $last_nonblank_token = $tok; 16030 $last_nonblank_type = $type; 16031 $last_nonblank_block_type = $block_type; 16032 $last_nonblank_container_type = $container_type; 16033 $last_nonblank_type_sequence = $type_sequence; 16034 $last_nonblank_prototype = $prototype; 16035 } 16036 16037 # reset indentation level if necessary at a sub or package 16038 # in an attempt to recover from a nesting error 16039 if ( $level_in_tokenizer < 0 ) { 16040 if ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) { 16041 reset_indentation_level(0); 16042 brace_warning("resetting level to 0 at $1 $2\n"); 16043 } 16044 } 16045 16046 # all done tokenizing this line ... 16047 # now prepare the final list of tokens and types 16048 16049 my @token_type = (); # stack of output token types 16050 my @block_type = (); # stack of output code block types 16051 my @container_type = (); # stack of output code container types 16052 my @type_sequence = (); # stack of output type sequence numbers 16053 my @tokens = (); # output tokens 16054 my @levels = (); # structural brace levels of output tokens 16055 my @slevels = (); # secondary nesting levels of output tokens 16056 my @nesting_tokens = (); # string of tokens leading to this depth 16057 my @nesting_types = (); # string of token types leading to this depth 16058 my @nesting_blocks = (); # string of block types leading to this depth 16059 my @nesting_lists = (); # string of list types leading to this depth 16060 my @ci_string = (); # string needed to compute continuation indentation 16061 my @container_environment = (); # BLOCK or LIST 16062 my $container_environment = ''; 16063 my $im = -1; # previous $i value 16064 my $num; 16065 my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' ); 16066 my $ci_string_sum = ( $_ = $ci_string_in_tokenizer ) =~ tr/1/0/; 16067 16068=head1 Computing Token Indentation 16069 16070 The final section of the tokenizer forms tokens and also computes 16071 parameters needed to find indentation. It is much easier to do it 16072 in the tokenizer than elsewhere. Here is a brief description of how 16073 indentation is computed. PerlTidy computes indentation as the sum 16074 of 2 terms: 16075 16076 (1) structural indentation, such as if/else/elsif blocks 16077 (2) continuation indentation, such as long parameter call lists. 16078 16079 These are occasionally called primary and secondary indentation. 16080 16081 Structural indentation is introduced by tokens of type '{', although 16082 the actual tokens might be '{', '(', or '['. Structural indentation 16083 is of two types: BLOCK and non-BLOCK. Default structural indentation 16084 is 4 characters if the standard indentation scheme is used. 16085 16086 Continuation indentation is introduced whenever a line at BLOCK level 16087 is broken before its termination. Default continuation indentation 16088 is 2 characters in the standard indentation scheme. 16089 16090 Both types of indentation may be nested arbitrarily deep and 16091 interlaced. The distinction between the two is somewhat arbitrary. 16092 16093 For each token, we will define two variables which would apply if 16094 the current statement were broken just before that token, so that 16095 that token started a new line: 16096 16097 $level = the structural indentation level, 16098 $ci_level = the continuation indentation level 16099 16100 The total indentation will be $level * (4 spaces) + $ci_level * (2 spaces), 16101 assuming defaults. However, in some special cases it is customary 16102 to modify $ci_level from this strict value. 16103 16104 The total structural indentation is easy to compute by adding and 16105 subtracting 1 from a saved value as types '{' and '}' are seen. The 16106 running value of this variable is $level_in_tokenizer. 16107 16108 The total continuation is much more difficult to compute, and requires 16109 several variables. These veriables are: 16110 16111 $ci_string_in_tokenizer = a string of 1's and 0's indicating, for 16112 each indentation level, if there are intervening open secondary 16113 structures just prior to that level. 16114 $continuation_string_in_tokenizer = a string of 1's and 0's indicating 16115 if the last token at that level is "continued", meaning that it 16116 is not the first token of an expression. 16117 $nesting_block_string = a string of 1's and 0's indicating, for each 16118 indentation level, if the level is of type BLOCK or not. 16119 $nesting_list_string = a string of 1's and 0's indicating, for each 16120 indentation level, if it is is appropriate for list formatting. 16121 If so, continuation indentation is used to indent long list items. 16122 @slevel_stack = a stack of total nesting depths at each 16123 structural indentation level, where "total nesting depth" means 16124 the nesting depth that would occur if every nesting token -- '{', '[', 16125 and '(' -- , regardless of context, is used to compute a nesting 16126 depth. 16127 16128=cut 16129 16130 my $level_i; 16131 my $nesting_token_string_i; 16132 my $ci_string_i; 16133 my $nesting_type_string_i; 16134 my $nesting_block_string_i; 16135 my $nesting_list_string_i; 16136 16137 foreach $i (@output_token_list) { # scan the list of pre-tokens indexes 16138 16139 # self-checking for valid token types 16140 my $type = $output_token_type[$i]; 16141 my $tok = $$rtokens[$i]; # the token, but ONLY if same as pretoken 16142 $level_i = $level_in_tokenizer; 16143 16144 # This can happen by running perltidy on non-scripts 16145 # although it could also be bug introduced by programming change. 16146 # Perl silently accepts a 032 (^Z) and takes it as the end 16147 if ( !$is_valid_token_type{$type} ) { 16148 my $val = ord($type); 16149 warning( 16150 "unexpected character decimal $val ($type) in script\n"); 16151 $tokenizer_self->{_in_error} = 1; 16152 } 16153 16154 # ------------------------------------------------------------------------ 16155 # TOKEN TYPE PATCHES 16156 # output __END__, __DATA__, and format as type 'k' instead of ';' 16157 # to make html colors correct, etc. 16158 my $fix_type = $type; 16159 if ( $type eq ';' && $tok =~ /\w/ ) { $fix_type = 'k' } 16160 16161 # output anonymous 'sub' as keyword 16162 if ( $type eq 't' && $tok eq 'sub' ) { $fix_type = 'k' } 16163 16164 # ------------------------------------------------------------------------ 16165 16166 $nesting_token_string_i = $nesting_token_string; 16167 $nesting_type_string_i = $nesting_type_string; 16168 $nesting_block_string_i = $nesting_block_string; 16169 $nesting_list_string_i = $nesting_list_string; 16170 16171 # set primary indentation levels based on structural braces 16172 # Note: these are set so that the leading braces have a HIGHER 16173 # level than their CONTENTS, which is convenient for indentation 16174 # Also, define continuation indentation for each token. 16175 if ( $type eq '{' || $type eq 'L' ) { 16176 16177 # use environment before updating 16178 $container_environment = 16179 $nesting_block_string =~ /1$/ ? 'BLOCK' 16180 : $nesting_list_string =~ /1$/ ? 'LIST' 16181 : ""; 16182 16183 # if the difference between total nesting levels is not 1, 16184 # there are intervening non-structural nesting types between 16185 # this '{' and the previous unclosed '{' 16186 my $intervening_secondary_structure = 0; 16187 if (@slevel_stack) { 16188 $intervening_secondary_structure = 16189 $slevel_in_tokenizer - $slevel_stack[$#slevel_stack]; 16190 } 16191 16192=head1 Continuation Indentation 16193 16194Having tried setting continuation indentation both in the formatter and 16195in the tokenizer, I can say that setting it in the tokenizer is much, 16196much easier. The formatter already has too much to do, and can't 16197make decisions on line breaks without knowing what 'ci' will be at 16198arbitrary locations. 16199 16200But a problem with setting the continuation indentation (ci) here 16201in the tokenizer is that we do not know where line breaks will actually 16202be. As a result, we don't know if we should propagate continuation 16203indentation to higher levels of structure. 16204 16205For nesting of only structural indentation, we never need to do this. 16206For example, in a long if statement, like this 16207 16208 if ( !$output_block_type[$i] 16209 && ($in_statement_continuation) ) 16210 { <--outdented 16211 do_something(); 16212 } 16213 16214the second line has ci but we do normally give the lines within the BLOCK 16215any ci. This would be true if we had blocks nested arbitrarily deeply. 16216 16217But consider something like this, where we have created a break after 16218an opening paren on line 1, and the paren is not (currently) a 16219structural indentation token: 16220 16221my $file = $menubar->Menubutton( 16222 qw/-text File -underline 0 -menuitems/ => [ 16223 [ 16224 Cascade => '~View', 16225 -menuitems => [ 16226 ... 16227 16228The second line has ci, so it would seem reasonable to propagate it 16229down, giving the third line 1 ci + 1 indentation. This suggests the 16230following rule, which is currently used to propagating ci down: if there 16231are any non-structural opening parens (or brackets, or braces), before 16232an opening structural brace, then ci is propagated down, and otherwise 16233not. The variable $intervening_secondary_structure contains this 16234information for the current token, and the string 16235"$ci_string_in_tokenizer" is a stack of previous values of this 16236variable. 16237 16238If no breaks are made just after a secondary structure, this method 16239will give ci where it really isn't required. For example, 16240 16241 my $str = join ( " ", map { 16242 /\n/ ? do { my $n = $_; $n =~ tr/\n/ /; $n } 16243 : $_; 16244 } @_ ) . "\015\012"; 16245 16246Here, there is no break after the first '(', so the second line gets 16247ci + one indent, but it would look ok without the ci. However, the 16248extra ci does no harm. 16249 16250This logic works well, but it is still incomplete. A current problem is 16251that the ci logic does not propagate down hierarchically through 16252consecutive non-structural bracing. More work needs to be done to 16253improve the formatting in this case. The next step in development along 16254these lines will be to define parens following a comma, in LIST context, 16255to be structural. Here is an example of two levels of non-structural 16256indentation, but only single continuation-indentation 16257 16258 $deps = control_fields( 16259 ( "Pre-Depends", "Depends", "Recommends", "Suggests", 16260 "Conflicts", "Provides" ) 16261 ); 16262 16263=cut 16264 16265 # save the current states 16266 push ( @slevel_stack, 1 + $slevel_in_tokenizer ); 16267 $level_in_tokenizer++; 16268 16269 $nesting_block_string .= $output_block_type[$i] ? '1' : '0'; 16270 16271 # we will use continuation indentation within containers 16272 # which are not blocks and not logical expressions 16273 my $bit = 0; 16274 if ( !$output_block_type[$i] ) { 16275 16276 # propagate flag down at nested open parens 16277 if ( $output_container_type[$i] eq '(' ) { 16278 $bit = 1 if ( $nesting_list_string =~ /1$/ ); 16279 } 16280 16281 # use list continuation if not a logical grouping 16282 else { 16283 $bit = 1 16284 if ( $output_container_type[$i] !~ 16285/^(if|elsif|unless|while|and|or|not|&&|!|\|\||for|foreach)$/ 16286 ); 16287 } 16288 } 16289 $nesting_list_string .= $bit; 16290 16291 $ci_string_in_tokenizer .= 16292 ( $intervening_secondary_structure != 0 ) ? '1' : '0'; 16293 $ci_string_sum = ( $_ = $ci_string_in_tokenizer ) =~ tr/1/0/; 16294 $continuation_string_in_tokenizer .= 16295 ( $in_statement_continuation > 0 ) ? '1' : '0'; 16296 16297=pod 16298 16299 Sometimes we want to give an opening brace continuation indentation, 16300 and sometimes not. For code blocks, we don't do it, so that the leading 16301 '{' gets outdented, like this: 16302 16303 if ( !$output_block_type[$i] 16304 && ($in_statement_continuation) ) 16305 { <--outdented 16306 16307 For other types, we will give them continuation indentation. For example, 16308 here is how a list looks with the opening paren indented: 16309 16310 @LoL = 16311 ( [ "fred", "barney" ], [ "george", "jane", "elroy" ], 16312 [ "homer", "marge", "bart" ], ); 16313 16314 This looks best when 'ci' is one-half of the indentation (i.e., 2 and 4) 16315 16316=cut 16317 16318 my $total_ci = $ci_string_sum; 16319 if ( 16320 !$output_block_type[$i] # patch: skip for BLOCK 16321 && ($in_statement_continuation) 16322 ) 16323 { 16324 $total_ci += $in_statement_continuation 16325 unless ( $ci_string_in_tokenizer =~ /1$/ ); 16326 } 16327 16328 $ci_string_i = $total_ci; 16329 $in_statement_continuation = 0; 16330 } 16331 16332 elsif ( $type eq '}' || $type eq 'R' ) { 16333 16334 # only a nesting error in the script would prevent popping here 16335 if ( @slevel_stack > 1 ) { pop (@slevel_stack); } 16336 16337 $level_i = --$level_in_tokenizer; 16338 16339 # restore previous level values 16340 if ( length($nesting_block_string) > 1 ) 16341 { # true for valid script 16342 chop $nesting_block_string; 16343 chop $nesting_list_string; 16344 16345 chop $ci_string_in_tokenizer; 16346 $ci_string_sum = 16347 ( $_ = $ci_string_in_tokenizer ) =~ tr/1/0/; 16348 16349 $in_statement_continuation = 16350 chop $continuation_string_in_tokenizer; 16351 16352 # zero continuation flag at terminal BLOCK '}' which 16353 # ends a statement. 16354 if ( $output_block_type[$i] ) { 16355 16356 # ...These include non-anonymous subs 16357 # note: could be sub ::abc { or sub 'abc 16358 if ( $output_block_type[$i] =~ m/^sub\s*/gc ) { 16359 16360 # note: older versions of perl require the /gc modifier 16361 # here or else the \G does not work. 16362 if ( $output_block_type[$i] =~ /\G('|::|\w)/gc ) { 16363 $in_statement_continuation = 0; 16364 } 16365 } 16366 16367 # ...and include all block types except user subs with 16368 # block prototypes and these: (sort|grep|map|do|eval) 16369 elsif ( $output_block_type[$i] =~ 16370/^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|continue|;|if|elsif|else|unless|while|until|for|foreach)$/ 16371 ) 16372 { 16373 $in_statement_continuation = 0; 16374 } 16375 16376 # ..and a block introduced by a label 16377 elsif ( $output_block_type[$i] =~ /^\w+\s*:$/gc ) { 16378 $in_statement_continuation = 0; 16379 } 16380 16381 # ..but these are not terminal types 16382 elsif ( $output_block_type[$i] =~ 16383 /^(sort|grep|map|do|eval)$/ ) 16384 { 16385 } 16386 16387 # ..nor user function with block prototype 16388 else { 16389 } 16390 } 16391 16392=pod 16393 If we are in a list, then 16394 we must set continuatoin indentation at the closing 16395 paren of something like this (paren after $check): 16396 assert( 16397 __LINE__, 16398 ( not defined $check ) 16399 or ref $check 16400 or $check eq "new" 16401 or $check eq "old", 16402 ); 16403=cut 16404 16405 #OLD: elsif ( $nesting_list_string =~ /1$/ ) { 16406 # $in_statement_continuation=1; 16407 #} 16408 16409 } 16410 16411 # use environment after updating 16412 $container_environment = 16413 $nesting_block_string =~ /1$/ ? 'BLOCK' 16414 : $nesting_list_string =~ /1$/ ? 'LIST' 16415 : ""; 16416 $ci_string_i = $ci_string_sum + $in_statement_continuation; 16417 $nesting_block_string_i = $nesting_block_string; 16418 $nesting_list_string_i = $nesting_list_string; 16419 } 16420 16421 # not a structural indentation type.. 16422 else { 16423 16424 $container_environment = 16425 $nesting_block_string =~ /1$/ ? 'BLOCK' 16426 : $nesting_list_string =~ /1$/ ? 'LIST' 16427 : ""; 16428 16429 # zero the continuation indentation at certain tokens so 16430 # that they will be at the same level as its container. For 16431 # commas, this simplifies the -lp indentation logic, which 16432 # counts commas. For ?: it makes them stand out. 16433 if ( $nesting_list_string =~ /1$/ ) { 16434 if ( $type =~ /^[,\?\:]$/ ) { 16435 $in_statement_continuation = 0; 16436 } 16437 } 16438 16439 # continuation indentation is sum of any open ci from previous 16440 # levels plus the current level 16441 $ci_string_i = $ci_string_sum + $in_statement_continuation; 16442 16443 # update continuation flag ... 16444 # if this isn't a blank or comment.. 16445 if ( $type !~ /^[b#]$/ ) { 16446 16447 # and we are in a BLOCK 16448 if ( $nesting_block_string =~ /1$/ ) { 16449 16450 # the next token after a ';' and label starts a new stmt 16451 if ( $type =~ /^[;J]$/ ) { 16452 $in_statement_continuation = 0; 16453 } 16454 16455 # otherwise, we are continuing the current statement 16456 else { 16457 $in_statement_continuation = 1; 16458 } 16459 } 16460 16461 # if we are not in a BLOCK.. 16462 else { 16463 16464 # do not use continuation indentation if not list 16465 # environment (could be within if/elsif clause) 16466 if ( $nesting_list_string =~ /0$/ ) { 16467 $in_statement_continuation = 0; 16468 } 16469 16470 # otherwise, the next token after a ',' starts a new term 16471 elsif ( $type =~ /^[,]$/ ) { 16472 $in_statement_continuation = 0; 16473 } 16474 16475 # otherwise, we are continuing the current term 16476 else { 16477 $in_statement_continuation = 1; 16478 } 16479 } 16480 } 16481 } 16482 16483 if ( $level_in_tokenizer < 0 ) { 16484 unless ($saw_negative_indentation) { 16485 $saw_negative_indentation = 1; 16486 warning("Starting negative indentation\n"); 16487 } 16488 } 16489 16490 # set secondary nesting levels based on all continment token types 16491 # Note: these are set so that the nesting depth is the depth 16492 # of the PREVIOUS TOKEN, which is convenient for setting 16493 # the stength of token bonds 16494 my $slevel_i = $slevel_in_tokenizer; 16495 if ( $type =~ /^[L\{\(\[]$/ ) { 16496 $slevel_in_tokenizer++; 16497 $nesting_token_string .= $tok; 16498 $nesting_type_string .= $type; 16499 } 16500 elsif ( $type =~ /^[R\}\)\]]$/ ) { 16501 $slevel_in_tokenizer--; 16502 my $char = chop $nesting_token_string; 16503 16504 if ( $char ne $matching_start_token{$tok} ) { 16505 $nesting_token_string .= $char . $tok; 16506 $nesting_type_string .= $type; 16507 } 16508 else { 16509 chop $nesting_type_string; 16510 } 16511 } 16512 16513 push ( @block_type, $output_block_type[$i] ); 16514 push ( @ci_string, $ci_string_i ); 16515 push ( @container_environment, $container_environment ); 16516 push ( @container_type, $output_container_type[$i] ); 16517 push ( @levels, $level_i ); 16518 push ( @nesting_tokens, $nesting_token_string_i ); 16519 push ( @nesting_types, $nesting_type_string_i ); 16520 push ( @slevels, $slevel_i ); 16521 push ( @token_type, $fix_type ); 16522 push ( @type_sequence, $output_type_sequence[$i] ); 16523 push ( @nesting_blocks, $nesting_block_string ); 16524 push ( @nesting_lists, $nesting_list_string ); 16525 16526 # now form the previous token 16527 if ( $im >= 0 ) { 16528 $num = 16529 $$rtoken_map[$i] - $$rtoken_map[$im]; # how many characters 16530 16531 if ( $num > 0 ) { 16532 push ( @tokens, 16533 substr( $input_line, $$rtoken_map[$im], $num ) ); 16534 } 16535 } 16536 $im = $i; 16537 } 16538 16539 $num = length($input_line) - $$rtoken_map[$im]; # make the last token 16540 if ( $num > 0 ) { 16541 push ( @tokens, substr( $input_line, $$rtoken_map[$im], $num ) ); 16542 } 16543 16544 $tokenizer_self->{_in_quote} = $in_quote; 16545 $tokenizer_self->{_rhere_target_list} = \@here_target_list; 16546 16547 $line_of_tokens->{_rtoken_type} = \@token_type; 16548 $line_of_tokens->{_rtokens} = \@tokens; 16549 $line_of_tokens->{_rblock_type} = \@block_type; 16550 $line_of_tokens->{_rcontainer_type} = \@container_type; 16551 $line_of_tokens->{_rcontainer_environment} = \@container_environment; 16552 $line_of_tokens->{_rtype_sequence} = \@type_sequence; 16553 $line_of_tokens->{_rlevels} = \@levels; 16554 $line_of_tokens->{_rslevels} = \@slevels; 16555 $line_of_tokens->{_rnesting_tokens} = \@nesting_tokens; 16556 $line_of_tokens->{_rci_levels} = \@ci_string; 16557 $line_of_tokens->{_rnesting_blocks} = \@nesting_blocks; 16558 16559 return; 16560 } 16561} # end closure tokenize_this_line 16562 16563sub new_statement_ok { 16564 16565 # return true if the current token can start a new statement 16566 16567 return label_ok() # a label would be ok here 16568 16569 || $last_nonblank_type eq 'J'; # or we follow a label 16570 16571} 16572 16573sub label_ok { 16574 16575 # Decide if a bare word followed by a colon here is a label 16576 16577 # if it follows an opening or closing code block curly brace.. 16578 if ( $last_nonblank_token =~ /^[\{\}]$/ 16579 && $last_nonblank_type eq $last_nonblank_token ) 16580 { 16581 16582 # it is a label if and only if the curly encloses a code block 16583 return $brace_type[$brace_depth]; 16584 } 16585 16586 # otherwise, it is a label if and only if it follows a ';' 16587 else { 16588 return ( $last_nonblank_token eq ';' ); 16589 } 16590} 16591 16592sub code_block_type { 16593 16594 # Decide if this is a block of code, and its type. 16595 # Must be called only when $type = $token = '{' 16596 # The problem is to distinguish between the start of a block of code 16597 # and the start of an anonymous hash reference 16598 # Returns "" if not code block, otherwise returns 'last_nonblank_token' 16599 # to indicate the type of code block. (For example, 'last_nonblank_token' 16600 # might be 'if' for an if block, 'else' for an else block, etc). 16601 16602 # handle case of multiple '{'s 16603 16604 # print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n"; 16605 16606 if ( $last_nonblank_token eq '{' 16607 && $last_nonblank_type eq $last_nonblank_token ) 16608 { 16609 16610 # opening brace where a statement may appear is probably 16611 # a code block but might be and anonymous hash reference 16612 if ( $brace_type[$brace_depth] ) { 16613 return decide_if_code_block(); 16614 } 16615 16616 # cannot start a code block within an anonymous hash 16617 else { 16618 return ""; 16619 } 16620 } 16621 16622 elsif ( $last_nonblank_token eq ';' ) { 16623 16624 # an opening brace where a statement may appear is probably 16625 # a code block but might be and anonymous hash reference 16626 return decide_if_code_block(); 16627 } 16628 16629 # handle case of '}{' 16630 elsif ( $last_nonblank_token eq '}' 16631 && $last_nonblank_type eq $last_nonblank_token ) 16632 { 16633 16634 # a } { situation ... 16635 # could be hash reference after code block..(blktype1.t) 16636 if ($last_nonblank_block_type) { 16637 return decide_if_code_block(); 16638 } 16639 16640 # must be a block if it follows a closing hash reference 16641 else { 16642 return $last_nonblank_token; 16643 } 16644 } 16645 16646 # NOTE: braces after type characters start code blocks, but for 16647 # simplicity these are not identified as such. See also 16648 # sub is_non_structural_brace. 16649 # elsif ( $last_nonblank_type eq 't' ) { 16650 # return $last_nonblank_token; 16651 # } 16652 16653 # brace after label: 16654 elsif ( $last_nonblank_type eq 'J' ) { 16655 return $last_nonblank_token; 16656 } 16657 16658 # otherwise, look at previous token. This must be a code block if 16659 # it follows any of these: 16660 elsif ( $last_nonblank_token =~ 16661/^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/ 16662 ) 16663 { 16664 return $last_nonblank_token; 16665 } 16666 16667 # or a sub definition 16668 elsif ( $last_nonblank_type =~ /^[ti]$/ 16669 && $last_nonblank_token =~ /^sub\b/ ) 16670 { 16671 return $last_nonblank_token; 16672 } 16673 16674 # user-defined subs with block parameters (like grep/map/eval) 16675 elsif ( $last_nonblank_type eq 'G' ) { 16676 return $last_nonblank_token; 16677 } 16678 16679 # anything else must be anonymous hash reference 16680 else { 16681 return ""; 16682 } 16683} 16684 16685sub decide_if_code_block { 16686 16687 # we are at a '{' where a statement may appear. 16688 # We must decide if this brace starts an anonymous hash or a code 16689 # block. 16690 16691 # return "" if anonymous hash, and $last_nonblank_token otherwise 16692 16693 # FIXME: coding incomplete 16694 return $last_nonblank_token; 16695} 16696 16697sub unexpected { 16698 16699 # report unexpected token type and show where it is 16700 my ( $found, $expecting, $i_tok, $last_nonblank_i ) = @_; 16701 $unexpected_error_count++; 16702 if ( $unexpected_error_count <= MAX_NAG_MESSAGES ) { 16703 my $msg = "found $found where $expecting expected"; 16704 my $pos = $$rpretoken_map[$i_tok]; 16705 interrupt_logfile(); 16706 my ( $offset, $numbered_line, $underline ) = 16707 make_numbered_line( $input_line_number, $input_line, $pos ); 16708 $underline = write_on_underline( $underline, $pos - $offset, '^' ); 16709 16710 my $trailer = ""; 16711 if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) { 16712 my $pos_prev = $$rpretoken_map[$last_nonblank_i]; 16713 my $num; 16714 if ( $$rpretoken_type[ $i_tok - 1 ] eq 'b' ) { 16715 $num = $$rpretoken_map[ $i_tok - 1 ] - $pos_prev; 16716 } 16717 else { 16718 $num = $pos - $pos_prev; 16719 } 16720 if ( $num > 40 ) { $num = 40; $pos_prev = $pos - 40; } 16721 16722 $underline = 16723 write_on_underline( $underline, $pos_prev - $offset, '-' x $num ); 16724 $trailer = " (previous token underlined)"; 16725 } 16726 warning( $numbered_line . "\n" ); 16727 warning( $underline . "\n" ); 16728 warning( $msg . $trailer . "\n" ); 16729 resume_logfile(); 16730 } 16731} 16732 16733sub indicate_error { 16734 my ( $msg, $line_number, $input_line, $pos, $carrat ) = @_; 16735 interrupt_logfile(); 16736 warning($msg); 16737 write_error_indicator_pair( $line_number, $input_line, $pos, $carrat ); 16738 resume_logfile(); 16739} 16740 16741sub write_error_indicator_pair { 16742 my ( $line_number, $input_line, $pos, $carrat ) = @_; 16743 my ( $offset, $numbered_line, $underline ) = 16744 make_numbered_line( $line_number, $input_line, $pos ); 16745 $underline = write_on_underline( $underline, $pos - $offset, $carrat ); 16746 warning( $numbered_line . "\n" ); 16747 $underline =~ s/\s*$//; 16748 warning( $underline . "\n" ); 16749} 16750 16751sub make_numbered_line { 16752 16753=pod 16754 16755 Given an input line, its line number, and a character position of interest, 16756 create a string not longer than 80 characters of the form 16757 $lineno: sub_string 16758 such that the sub_string of $str contains the position of interest 16759 16760 Here is an example of what we want, in this case we add trailing '...' 16761 because the line is long. 16762 167632: (One of QAML 2.0's authors is a member of the World Wide Web Con ... 16764 16765 Here is another example, this time in which we used leading '...' 16766 because of excessive length: 16767 167682: ... er of the World Wide Web Consortium's 16769 16770 input parameters are: 16771 $lineno = line number 16772 $str = the text of the line 16773 $pos = position of interest (the error) : 0 = first character 16774 16775 We return : 16776 - $offset = an offset which corrects the position in case we only 16777 display part of a line, such that $pos-$offset is the effective 16778 position from the start of the displayed line. 16779 - $numbered_line = the numbered line as above, 16780 - $underline = a blank 'underline' which is all spaces with the same 16781 number of characters as the numbered line. 16782 16783=cut 16784 16785 my ( $lineno, $str, $pos ) = @_; 16786 my $offset = ( $pos < 60 ) ? 0 : $pos - 40; 16787 my $excess = length($str) - $offset - 68; 16788 my $numc = ( $excess > 0 ) ? 68 : undef; 16789 16790 if ( defined($numc) ) { 16791 if ( $offset == 0 ) { 16792 $str = substr( $str, $offset, $numc - 4 ) . " ..."; 16793 } 16794 else { 16795 $str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ..."; 16796 } 16797 } 16798 else { 16799 16800 if ( $offset == 0 ) { 16801 } 16802 else { 16803 $str = "... " . substr( $str, $offset + 4 ); 16804 } 16805 } 16806 16807 my $numbered_line = sprintf( "%d: ", $lineno ); 16808 $offset -= length($numbered_line); 16809 $numbered_line .= $str; 16810 my $underline = " " x length($numbered_line); 16811 return ( $offset, $numbered_line, $underline ); 16812} 16813 16814sub write_on_underline { 16815 16816=pod 16817 16818The "underline" is a string that shows where an error is; it starts 16819out as a string of blanks with the same length as the numbered line of 16820code above it, and we have to add marking to show where an error is. 16821In the example below, we want to write the string '--^' just below 16822the line of bad code: 16823 168242: (One of QAML 2.0's authors is a member of the World Wide Web Con ... 16825 ---^ 16826We are given the current underline string, plus a position and a 16827string to write on it. 16828 16829In the above example, there will be 2 calls to do this: 16830First call: $pos=19, pos_chr=^ 16831Second call: $pos=16, pos_chr=--- 16832 16833This is a trivial thing to do with substr, but there is some 16834checking to do. 16835 16836=cut 16837 16838 my ( $underline, $pos, $pos_chr ) = @_; 16839 16840 # check for error..shouldn't happen 16841 unless ( ( $pos >= 0 ) && ( $pos <= length($underline) ) ) { 16842 return $underline; 16843 } 16844 my $excess = length($pos_chr) + $pos - length($underline); 16845 if ( $excess > 0 ) { 16846 $pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess ); 16847 } 16848 substr( $underline, $pos, length($pos_chr) ) = $pos_chr; 16849 return ($underline); 16850} 16851 16852sub is_non_structural_brace { 16853 16854 # Decide if a brace or bracket is structural or non-structural 16855 # by looking at the previous token and type 16856 16857 # EXPERIMENTAL: Mark slices as structural; idea was to improve formatting. 16858 # Tentatively deactivated because it caused the wrong operator expectation 16859 # for this code: 16860 # $user = @vars[1] / 100; 16861 # Must update sub operator_expected before re-implementing. 16862 # if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) { 16863 # return 0; 16864 # } 16865 16866 # NOTE: braces after type characters start code blocks, but for 16867 # simplicity these are not identified as such. See also 16868 # sub code_block_type 16869 # if ($last_nonblank_type eq 't') {return 0} 16870 16871 # otherwise, it is non-structural if it is decorated 16872 # by type information. 16873 # For example, the '{' here is non-structural: ${xxx} 16874 ( 16875 $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/ 16876 16877 # or if we follow a hash or array closing curly brace or bracket 16878 # For example, the second '{' in this is non-structural: $a{'x'}{'y'} 16879 # because the first '}' would have been given type 'R' 16880 || $last_nonblank_type =~ /^([R\]])$/ 16881 ); 16882} 16883 16884sub operator_expected { 16885 16886=pod 16887 16888Many perl symbols have two or more meanings. For example, '<<' 16889can be a shift operator or a here-doc operator. The 16890interpretation of these symbols depends on the current state of 16891the tokenizer, which may either be expecting a term or an 16892operator. For this example, a << would be a shift if an operator 16893is expected, and a here-doc if a term is expected. This routine 16894is called to make this decision for any current token. It returns 16895one of three possible values: 16896 16897 OPERATOR - operator expected (or at least, not a term) 16898 UNKNOWN - can't tell 16899 TERM - a term is expected (or at least, not an operator) 16900 16901The decision is based on what has been seen so far. This information 16902is stored in the "$last_nonblank_type" and "$last_nonblank_token" variables. 16903For example, if the $last_nonblank_type is '=~', then we are expecting 16904a TERM, whereas if $last_nonblank_type is 'n' (numeric), we are 16905expecting an OPERATOR. 16906 16907If a UNKNOWN is returned, the calling routine must guess. A major goal 16908of this tokenizer is to minimize the possiblity of returning 16909UNKNOWN, because a wrong guess can spoil the formatting of a script. 16910 16911adding NEW_TOKENS: it is critically important that this routine be updated 16912to allow it to determine if an operator or term is to be expected 16913after the new token. Doing this simply involves adding the new token 16914character to one of the regexes in this routine or to one of the hash lists 16915that it uses, which are initialized in the BEGIN section. 16916 16917=cut 16918 16919 my ( $prev_type, $tok, $next_type ) = @_; 16920 my $op_expected = UNKNOWN; 16921 16922 # Note: function prototype is available for token type 'U' for future 16923 # program development. It contains the leading and trailing parens, 16924 # and no blanks. It might be used to eliminate token type 'C', for 16925 # example (prototype = '()'). Thus: 16926 # if ($last_nonblank_type eq 'U') { 16927 # print "previous token=$last_nonblank_token type=$last_nonblank_type prototype=$last_nonblank_prototype\n"; 16928 # } 16929 16930 # A possible filehandle (or object) requires some care... 16931 if ( $last_nonblank_type eq 'Z' ) { 16932 16933 # angle.t 16934 if ( $last_nonblank_token =~ /^[A-Za-z_]/ ) { 16935 $op_expected = UNKNOWN; 16936 } 16937 16938 # For possible file handle like "$a", Perl uses weird parsing rules. 16939 # For example: 16940 # print $a/2,"/hi"; - division 16941 # print $a / 2,"/hi"; - division 16942 # print $a/ 2,"/hi"; - division 16943 # print $a /2,"/hi"; - pattern (and error)! 16944 elsif ( ( $prev_type eq 'b' ) && ( $next_type ne 'b' ) ) { 16945 $op_expected = TERM; 16946 } 16947 16948 # Note when an operation is being done where a 16949 # filehandle might be expected, since a change in whitespace 16950 # could change the interpretation of the statement. 16951 else { 16952 if ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) { 16953 complain("operator in print statement not recommended\n"); 16954 $op_expected = OPERATOR; 16955 } 16956 } 16957 } 16958 16959 # handle something after 'do' and 'eval' 16960 elsif ( $last_nonblank_token =~ /$block_operator/ ) { 16961 16962 # something like $a = eval "expression"; 16963 # ^ 16964 if ( $last_nonblank_type eq 'k' ) { 16965 $op_expected = TERM; # expression or list mode following keyword 16966 } 16967 16968 # something like $a = do { BLOCK } / 2; 16969 # ^ 16970 else { 16971 $op_expected = OPERATOR; # block mode following } 16972 } 16973 } 16974 16975 # handle bare word.. 16976 elsif ( $last_nonblank_type eq 'w' ) { 16977 16978 # unfortunately, we can't tell what type of token to expect next 16979 # after most bare words 16980 $op_expected = UNKNOWN; 16981 } 16982 16983 # operator, but not term possible after these types 16984 # Note: moved ')' from type to token because parens in list context 16985 # get marked as '{' '}' now. This is a minor glitch in the following: 16986 # my %opts = (ref $_[0] eq 'HASH') ? %{shift()} : (); 16987 # 16988 elsif ( ( $last_nonblank_type =~ /^[\]RnviQh]$/ ) 16989 || ( $last_nonblank_token =~ /^(\)|\$|\-\>)/ ) ) 16990 { 16991 $op_expected = OPERATOR; 16992 16993 # in a 'use' statement, numbers and v-strings are not really 16994 # numbers, so to avoid incorrect error messages, we will 16995 # mark them as unknown for now (use.t) 16996 if ( ( $statement_type eq 'use' ) 16997 && ( $last_nonblank_type =~ /^[nv]$/ ) ) 16998 { 16999 $op_expected = UNKNOWN; 17000 } 17001 } 17002 17003 # no operator after many keywords, such as "die", "warn", etc 17004 elsif ( $expecting_term_token{$last_nonblank_token} ) { 17005 $op_expected = TERM; 17006 } 17007 17008 # no operator after things like + - ** (i.e., other operators) 17009 elsif ( $expecting_term_types{$last_nonblank_type} ) { 17010 $op_expected = TERM; 17011 } 17012 17013 # a few operators, like "time", have an empty prototype () and so 17014 # take no parameters but produce a value to operate on 17015 elsif ( $expecting_operator_token{$last_nonblank_token} ) { 17016 $op_expected = OPERATOR; 17017 } 17018 17019 # post-increment and decrement produce values to be operated on 17020 elsif ( $expecting_operator_types{$last_nonblank_type} ) { 17021 $op_expected = OPERATOR; 17022 } 17023 17024 # no value to operate on after sub block 17025 elsif ( $last_nonblank_token =~ /^sub\s/ ) { $op_expected = TERM; } 17026 17027 # a right brace here indicates the end of a simple block. 17028 # all non-structural right braces have type 'R' 17029 # all braces associated with block operator keywords have been given those 17030 # keywords as "last_nonblank_token" and caught above. 17031 # (This statement is order dependent, and must come after checking 17032 # $last_nonblank_token). 17033 elsif ( $last_nonblank_type eq '}' ) { 17034 $op_expected = TERM; 17035 } 17036 17037 # something else..what did I forget? 17038 else { 17039 17040 # collecting diagnostics on unknown operator types..see what was missed 17041 $op_expected = UNKNOWN; 17042 write_diagnostics( 17043"OP: unknown after type=$last_nonblank_type token=$last_nonblank_token\n" 17044 ); 17045 } 17046 17047 TOKENIZER_DEBUG_FLAG_EXPECT && do { 17048 print 17049"EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n"; 17050 }; 17051 return $op_expected; 17052} 17053 17054=pod 17055 17056The following routines keep track of nesting depths of the nesting 17057types, ( [ { and ?. This is necessary for determining the indentation 17058level, and also for debugging programs. Not only do they keep track of 17059nesting depths of the individual brace types, but they check that each 17060of the other brace types is balanced within matching pairs. For 17061example, if the program sees this sequence: 17062 17063 { ( ( ) } 17064 17065then it can determine that there is an extra left paren somewhere 17066between the { and the }. And so on with every other possible 17067combination of outer and inner brace types. For another 17068example: 17069 17070 ( [ ..... ] ] ) 17071 17072which has an extra ] within the parens. 17073 17074The brace types have indexes 0 .. 3 which are indexes into 17075the matrices. 17076 17077The pair ? : are treated as just another nesting type, with ? acting 17078as the opening brace and : acting as the closing brace. 17079 17080The matrix 17081 17082 $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b]; 17083 17084saves the nesting depth of brace type $b (where $b is either of the other 17085nesting types) when brace type $a enters a new depth. When this depth 17086decreases, a check is made that the current depth of brace types $b is 17087unchanged, or otherwise there must have been an error. This can 17088be very useful for localizing errors, particularly when perl runs to 17089the end of a large file (such as this one) and announces that there 17090is a problem somewhere. 17091 17092A numerical sequence number is maintained for every nesting type, 17093so that each matching pair can be uniquely identified in a simple 17094way. 17095 17096=cut 17097 17098sub increase_nesting_depth { 17099 my ( $a, $i_tok ) = @_; 17100 my $b; 17101 $current_depth[$a]++; 17102 17103 # Sequence numbers increment by number of items. This keeps 17104 # a unique set of numbers but still allows the relative location 17105 # of any type to be determined. 17106 $nesting_sequence_number[$a] += scalar(@closing_brace_names); 17107 my $seqno = $nesting_sequence_number[$a]; 17108 $current_sequence_number[$a][ $current_depth[$a] ] = $seqno; 17109 17110 my $pos = $$rpretoken_map[$i_tok]; 17111 $starting_line_of_current_depth[$a][ $current_depth[$a] ] = 17112 [ $input_line_number, $input_line, $pos ]; 17113 17114 for $b ( 0 .. $#closing_brace_names ) { 17115 next if ( $b == $a ); 17116 $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b]; 17117 } 17118 return $seqno; 17119} 17120 17121sub decrease_nesting_depth { 17122 17123 my ( $a, $i_tok ) = @_; 17124 my $pos = $$rpretoken_map[$i_tok]; 17125 my $b; 17126 my $seqno = 0; 17127 17128 if ( $current_depth[$a] > 0 ) { 17129 17130 $seqno = $current_sequence_number[$a][ $current_depth[$a] ]; 17131 17132 # check that any brace types $b contained within are balanced 17133 for $b ( 0 .. $#closing_brace_names ) { 17134 next if ( $b == $a ); 17135 17136 unless ( $depth_array[$a][$b][ $current_depth[$a] ] == 17137 $current_depth[$b] ) 17138 { 17139 my $diff = $current_depth[$b] - 17140 $depth_array[$a][$b][ $current_depth[$a] ]; 17141 17142 # don't whine too many times 17143 my $saw_brace_error = get_saw_brace_error(); 17144 if ( 17145 $saw_brace_error <= MAX_NAG_MESSAGES 17146 17147 # if too many closing types have occured, we probably 17148 # already caught this error 17149 && ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) ) 17150 ) 17151 { 17152 interrupt_logfile(); 17153 my $rsl = 17154 $starting_line_of_current_depth[$a][ $current_depth[$a] ]; 17155 my $sl = $$rsl[0]; 17156 my $rel = [ $input_line_number, $input_line, $pos ]; 17157 my $el = $$rel[0]; 17158 my ($ess); 17159 17160 if ( $diff == 1 || $diff == -1 ) { 17161 $ess = ''; 17162 } 17163 else { 17164 $ess = 's'; 17165 } 17166 my $bname = 17167 ( $diff > 0 ) 17168 ? $opening_brace_names[$b] 17169 : $closing_brace_names[$b]; 17170 write_error_indicator_pair( @$rsl, '^' ); 17171 my $msg = <<"EOM"; 17172Found $diff extra $bname$ess between $opening_brace_names[$a] on line $sl and $closing_brace_names[$a] on line $el 17173EOM 17174 17175 if ( $diff > 0 ) { 17176 my $rml = 17177 $starting_line_of_current_depth[$b] 17178 [ $current_depth[$b] ]; 17179 my $ml = $$rml[0]; 17180 $msg .= 17181" The most recent un-matched $bname is on line $ml\n"; 17182 write_error_indicator_pair( @$rml, '^' ); 17183 } 17184 write_error_indicator_pair( @$rel, '^' ); 17185 warning($msg); 17186 resume_logfile(); 17187 } 17188 increment_brace_error(); 17189 } 17190 } 17191 $current_depth[$a]--; 17192 } 17193 else { 17194 17195 my $saw_brace_error = get_saw_brace_error(); 17196 if ( $saw_brace_error <= MAX_NAG_MESSAGES ) { 17197 my $msg = <<"EOM"; 17198There is no previous $opening_brace_names[$a] to match a $closing_brace_names[$a] on line $input_line_number 17199EOM 17200 indicate_error( $msg, $input_line_number, $input_line, $pos, '^' ); 17201 } 17202 increment_brace_error(); 17203 } 17204 return $seqno; 17205} 17206 17207sub check_final_nesting_depths { 17208 my ($a); 17209 17210 for $a ( 0 .. $#closing_brace_names ) { 17211 17212 if ( $current_depth[$a] ) { 17213 my $rsl = $starting_line_of_current_depth[$a][ $current_depth[$a] ]; 17214 my $sl = $$rsl[0]; 17215 my $msg = <<"EOM"; 17216Final nesting depth of $opening_brace_names[$a]s is $current_depth[$a] 17217The most recent un-matched $opening_brace_names[$a] is on line $sl 17218EOM 17219 indicate_error( $msg, @$rsl, '^' ); 17220 increment_brace_error(); 17221 } 17222 } 17223} 17224 17225sub numerator_expected { 17226 17227 # this is a filter for a possible numerator, in support of guessing 17228 # for the / pattern delimiter token. 17229 # returns - 17230 # 1 - yes 17231 # 0 - can't tell 17232 # -1 - no 17233 # Note: I am using the convention that variables ending in 17234 # _expected have these 3 possible values. 17235 my ( $i, $rtokens ) = @_; 17236 my $next_token = $$rtokens[ $i + 1 ]; 17237 if ( $next_token eq '=' ) { $i++; } # handle /= 17238 my ( $next_nonblank_token, $i_next ) = 17239 find_next_nonblank_token( $i, $rtokens ); 17240 17241 if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) { 17242 1; 17243 } 17244 else { 17245 17246 if ( $next_nonblank_token =~ /^\s*$/ ) { 17247 0; 17248 } 17249 else { 17250 -1; 17251 } 17252 } 17253} 17254 17255sub pattern_expected { 17256 17257 # This is the start of a filter for a possible pattern. 17258 # It looks at the token after a possbible pattern and tries to 17259 # determine if that token could end a pattern. 17260 # returns - 17261 # 1 - yes 17262 # 0 - can't tell 17263 # -1 - no 17264 my ( $i, $rtokens ) = @_; 17265 my $next_token = $$rtokens[ $i + 1 ]; 17266 if ( $next_token =~ /^[cgimosx]/ ) { $i++; } # skip possible modifier 17267 my ( $next_nonblank_token, $i_next ) = 17268 find_next_nonblank_token( $i, $rtokens ); 17269 17270 # list of tokens which may follow a pattern 17271 # (can probably be expanded) 17272 if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ ) 17273 { 17274 1; 17275 } 17276 else { 17277 17278 if ( $next_nonblank_token =~ /^\s*$/ ) { 17279 0; 17280 } 17281 else { 17282 -1; 17283 } 17284 } 17285} 17286 17287sub find_next_nonblank_token_on_this_line { 17288 my ( $i, $rtokens ) = @_; 17289 my $next_nonblank_token; 17290 17291 if ( $i < $max_token_index ) { 17292 $next_nonblank_token = $$rtokens[ ++$i ]; 17293 17294 if ( $next_nonblank_token =~ /^\s*$/ ) { 17295 17296 if ( $i < $max_token_index ) { 17297 $next_nonblank_token = $$rtokens[ ++$i ]; 17298 } 17299 } 17300 } 17301 else { 17302 $next_nonblank_token = ""; 17303 } 17304 return ( $next_nonblank_token, $i ); 17305} 17306 17307sub find_next_nonblank_token { 17308 my ( $i, $rtokens ) = @_; 17309 17310 if ( $i >= $max_token_index ) { 17311 17312 if ( !$peeked_ahead ) { 17313 $peeked_ahead = 1; 17314 $rtokens = peek_ahead_for_nonblank_token($rtokens); 17315 } 17316 } 17317 my $next_nonblank_token = $$rtokens[ ++$i ]; 17318 17319 if ( $next_nonblank_token =~ /^\s*$/ ) { 17320 $next_nonblank_token = $$rtokens[ ++$i ]; 17321 } 17322 return ( $next_nonblank_token, $i ); 17323} 17324 17325sub peek_ahead_for_n_nonblank_pre_tokens { 17326 17327 # returns next n pretokens if they exist 17328 # returns undef's if hits eof without seeing any pretokens 17329 my $max_pretokens = shift; 17330 my $line; 17331 my $i = 0; 17332 my ( $rpre_tokens, $rmap, $rpre_types ); 17333 17334 while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) ) 17335 { 17336 $line =~ s/^\s*//; # trim leading blanks 17337 next if ( length($line) <= 0 ); # skip blank 17338 next if ( $line =~ /^#/ ); # skip comment 17339 ( $rpre_tokens, $rmap, $rpre_types ) = 17340 pre_tokenize( $line, $max_pretokens ); 17341 last; 17342 } 17343 return ( $rpre_tokens, $rpre_types ); 17344} 17345 17346# look ahead for next non-blank, non-comment line of code 17347sub peek_ahead_for_nonblank_token { 17348 my $rtokens = shift; 17349 my $line; 17350 my $i = 0; 17351 17352 while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) ) 17353 { 17354 $line =~ s/^\s*//; # trim leading blanks 17355 next if ( length($line) <= 0 ); # skip blank 17356 next if ( $line =~ /^#/ ); # skip comment 17357 my ( $rtok, $rmap, $rtype ) = 17358 pre_tokenize( $line, 2 ); # only need 2 pre-tokens 17359 my $j = $max_token_index + 1; 17360 my $tok; 17361 17362 #write_logfile_entry("peeking at line:$line") 17363 # if $rOpts->{'DEBUG'}; 17364 foreach $tok (@$rtok) { 17365 last if ( $tok =~ "\n" ); 17366 $$rtokens[ ++$j ] = $tok; 17367 } 17368 last; 17369 } 17370 return $rtokens; 17371} 17372 17373sub pre_tokenize { 17374 17375 # Break a string, $str, into a sequence of preliminary tokens. We 17376 # are only interested in these types of tokens: identifier strings, 17377 # digits, spaces, and other characters. We cannot do better than 17378 # this yet because we might be in a quoted string or pattern. 17379 # Caller sets $max_tokens_wanted to 0 to get all tokens. 17380 my ( $str, $max_tokens_wanted ) = @_; 17381 my @tokens = (); 17382 my @token_map = (); 17383 my @type = (); 17384 my $i = 0; 17385 $token_map[0] = 0; 17386 17387 while (1) { 17388 17389 # whitespace 17390 if ( $str =~ /\G(\s+)/gc ) { $type[$i] = 'b'; } 17391 17392 # numbers 17393 # note that this must come before identifiers 17394 elsif ( $str =~ /\G(\d+)/gc ) { $type[$i] = 'd'; } 17395 17396 # identifiers 17397 elsif ( $str =~ /\G(\w+)/gc ) { $type[$i] = 'w'; } 17398 17399 # punctuation 17400 elsif ( $str =~ /\G(\W)/gc ) { $type[$i] = $1; } 17401 17402 else { last; } 17403 17404 $tokens[$i] = $1; 17405 $token_map[ ++$i ] = pos($str); 17406 last if ( $i == $max_tokens_wanted ); 17407 } 17408 return ( \@tokens, \@token_map, \@type ); 17409} 17410 17411sub show_tokens { 17412 17413 # this is an old debug routine 17414 my ( $rtokens, $rtoken_map ) = @_; 17415 my $num = scalar(@$rtokens); 17416 my $i; 17417 17418 for ( $i = 0 ; $i < $num ; $i++ ) { 17419 my $len = length( $$rtokens[$i] ); 17420 print "$i:$len:$$rtoken_map[$i]:$$rtokens[$i]:\n"; 17421 } 17422} 17423 17424sub find_angle_operator_termination { 17425 17426 # We are looking at a '<' and want to know if it is an angle operator. 17427 # We are to return: 17428 # $i = pretoken index of ending '>' if found, current $i otherwise 17429 # $type = 'Q' if found, '>' otherwise 17430 my ( $input_line, $i_beg, $rtoken_map, $expecting ) = @_; 17431 my $i = $i_beg; 17432 my $type = '<'; 17433 pos($input_line) = 1 + $$rtoken_map[$i]; 17434 17435 my $filter; 17436 17437 # we just have to find the next '>' if a term is expected 17438 if ( $expecting == TERM ) { $filter = '[\>]' } 17439 17440 # we have to guess if we don't know what is expected 17441 elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' } 17442 17443 # shouldn't happen - we shouldn't be here if operator is expected 17444 else { warning("Program Bug in find_angle_operator_termination\n") } 17445 17446 # To illustrate what we might be looking at, in case we are 17447 # guessing, here are some examples of valid angle operators 17448 # (or file globs): 17449 # <tmp_imp/*> 17450 # <FH> 17451 # <$fh> 17452 # <*.c *.h> 17453 # <_> 17454 # <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t) 17455 # <${PREFIX}*img*.$IMAGE_TYPE> 17456 # <img*.$IMAGE_TYPE> 17457 # <Timg*.$IMAGE_TYPE> 17458 # <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl> 17459 # 17460 # Here are some examples of lines which do not have angle operators: 17461 # return undef unless $self->[2]++ < $#{$self->[1]}; 17462 # < 2 || @$t > 17463 # 17464 # the following line from dlister.pl caused trouble: 17465 # print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n"; 17466 # 17467 # If the '<' starts an angle operator, it must end on this line and 17468 # it must not have certain characters like ';' and '=' in it. I use 17469 # this to limit the testing. This filter should be improved if 17470 # possible. 17471 17472 if ( $input_line =~ /($filter)/g ) { 17473 17474 if ( $1 eq '>' ) { 17475 17476 # We MAY have found an angle operator termination if we get 17477 # here, but we need to do more to be sure we haven't been 17478 # fooled. 17479 my $pos = pos($input_line); 17480 17481 ######################################debug##### 17482 my $pos_beg = $$rtoken_map[$i]; 17483 my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) ); 17484 17485 #write_diagnostics( "ANGLE? :$str\n"); 17486 #print "ANGLE: found $1 at pos=$pos\n"; 17487 ######################################debug##### 17488 $type = 'Q'; 17489 my $error; 17490 ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map ); 17491 17492 # It may be possible that a quote ends midway in a pretoken. 17493 # If this happens, it may be necessary to split the pretoken. 17494 if ($error) { 17495 warning( 17496 "Possible tokinization error..please check this line\n"); 17497 report_possible_bug(); 17498 } 17499 17500 # Now let's see where we stand.... 17501 # OK if math op not possible 17502 if ( $expecting == TERM ) { 17503 } 17504 17505 # OK if there are no more than 2 pre-tokens inside 17506 # (not possible to write 2 token math between < and >) 17507 # This catches most common cases 17508 elsif ( $i <= $i_beg + 3 ) { 17509 write_diagnostics("ANGLE(1 or 2 tokens): $str\n"); 17510 } 17511 17512 # Not sure.. 17513 else { 17514 17515 # Let's try a Brace Test: any braces inside must balance 17516 my $br = 0; 17517 while ( $str =~ /\{/g ) { $br++ } 17518 while ( $str =~ /\}/g ) { $br-- } 17519 my $sb = 0; 17520 while ( $str =~ /\[/g ) { $sb++ } 17521 while ( $str =~ /\]/g ) { $sb-- } 17522 my $pr = 0; 17523 while ( $str =~ /\(/g ) { $pr++ } 17524 while ( $str =~ /\)/g ) { $pr-- } 17525 17526 # if braces do not balance - not angle operator 17527 if ( $br || $sb || $pr ) { 17528 $i = $i_beg; 17529 $type = '<'; 17530 write_diagnostics( 17531 "NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n"); 17532 } 17533 17534 # we should keep doing more checks here...to be continued 17535 # Tentatively accepting this as a valid angle operator. 17536 # There are lots more things that can be checked. 17537 else { 17538 write_diagnostics( 17539 "ANGLE-Guessing yes: $str expecting=$expecting\n"); 17540 write_logfile_entry("Guessing angle operator here: $str\n"); 17541 } 17542 } 17543 } 17544 17545 # didn't find ending > 17546 else { 17547 if ( $expecting == TERM ) { 17548 warning("No ending > for angle operator\n"); 17549 } 17550 } 17551 } 17552 return ( $i, $type ); 17553} 17554 17555sub inverse_pretoken_map { 17556 17557 # Starting with the current pre_token index $i, scan forward until 17558 # finding the index of the next pre_token whose position is $pos. 17559 my ( $i, $pos, $rtoken_map ) = @_; 17560 my $error = 0; 17561 17562 while ( ++$i <= $max_token_index ) { 17563 17564 if ( $pos <= $$rtoken_map[$i] ) { 17565 17566 # Let the calling routine handle errors in which we do not 17567 # land on a pre-token boundary. It can happen by running 17568 # perltidy on some non-perl scripts, for example. 17569 if ( $pos < $$rtoken_map[$i] ) { $error = 1 } 17570 $i--; 17571 last; 17572 } 17573 } 17574 return ( $i, $error ); 17575} 17576 17577sub guess_if_pattern_or_conditional { 17578 17579 # this routine is called when we have encountered a ? following an 17580 # unknown bareword, and we must decide if it starts a pattern or not 17581 # input parameters: 17582 # $i - token index of the ? starting possible pattern 17583 # output parameters: 17584 # $is_pattern = 0 if probably not pattern, =1 if probably a pattern 17585 # msg = a warning or diagnostic message 17586 my ( $i, $rtokens, $rtoken_map ) = @_; 17587 my $is_pattern = 0; 17588 my $msg = "guessing that ? after $last_nonblank_token starts a "; 17589 17590 if ( $i >= $max_token_index ) { 17591 $msg .= "conditional (no end to pattern found on the line)\n"; 17592 } 17593 else { 17594 my $ibeg = $i; 17595 $i = $ibeg + 1; 17596 my $next_token = $$rtokens[$i]; # first token after ? 17597 17598 # look for a possible ending ? on this line.. 17599 my $in_quote = 1; 17600 my $quote_depth = 0; 17601 my $quote_character = ''; 17602 my $quote_pos = 0; 17603 ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) = 17604 follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character, 17605 $quote_pos, $quote_depth ); 17606 17607 if ($in_quote) { 17608 17609 # we didn't find an ending ? on this line, 17610 # so we bias towards conditional 17611 $is_pattern = 0; 17612 $msg .= "conditional (no ending ? on this line)\n"; 17613 17614 # we found an ending ?, so we bias towards a pattern 17615 } 17616 else { 17617 17618 if ( pattern_expected( $i, $rtokens ) >= 0 ) { 17619 $is_pattern = 1; 17620 $msg .= "pattern (found ending ? and pattern expected)\n"; 17621 } 17622 else { 17623 $msg .= "pattern (uncertain, but found ending ?)\n"; 17624 } 17625 } 17626 } 17627 return ( $is_pattern, $msg ); 17628} 17629 17630sub guess_if_pattern_or_division { 17631 17632 # this routine is called when we have encountered a / following an 17633 # unknown bareword, and we must decide if it starts a pattern or is a 17634 # division 17635 # input parameters: 17636 # $i - token index of the / starting possible pattern 17637 # output parameters: 17638 # $is_pattern = 0 if probably division, =1 if probably a pattern 17639 # msg = a warning or diagnostic message 17640 my ( $i, $rtokens, $rtoken_map ) = @_; 17641 my $is_pattern = 0; 17642 my $msg = "guessing that / after $last_nonblank_token starts a "; 17643 17644 if ( $i >= $max_token_index ) { 17645 "division (no end to pattern found on the line)\n"; 17646 } 17647 else { 17648 my $ibeg = $i; 17649 my $divide_expected = numerator_expected( $i, $rtokens ); 17650 $i = $ibeg + 1; 17651 my $next_token = $$rtokens[$i]; # first token after slash 17652 17653 # look for a possible ending / on this line.. 17654 my $in_quote = 1; 17655 my $quote_depth = 0; 17656 my $quote_character = ''; 17657 my $quote_pos = 0; 17658 ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) = 17659 follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character, 17660 $quote_pos, $quote_depth ); 17661 17662 if ($in_quote) { 17663 17664 # we didn't find an ending / on this line, 17665 # so we bias towards division 17666 if ( $divide_expected >= 0 ) { 17667 $is_pattern = 0; 17668 $msg .= "division (no ending / on this line)\n"; 17669 } 17670 else { 17671 $msg = "multi-line pattern (division not possible)\n"; 17672 $is_pattern = 1; 17673 } 17674 17675 } 17676 17677 # we found an ending /, so we bias towards a pattern 17678 else { 17679 17680 if ( pattern_expected( $i, $rtokens ) >= 0 ) { 17681 17682 if ( $divide_expected >= 0 ) { 17683 17684 if ( $i - $ibeg > 60 ) { 17685 $msg .= "division (matching / too distant)\n"; 17686 $is_pattern = 0; 17687 } 17688 else { 17689 $msg .= "pattern (but division possible too)\n"; 17690 $is_pattern = 1; 17691 } 17692 } 17693 else { 17694 $is_pattern = 1; 17695 $msg .= "pattern (division not possible)\n"; 17696 } 17697 } 17698 else { 17699 17700 if ( $divide_expected >= 0 ) { 17701 $is_pattern = 0; 17702 $msg .= "division (pattern not possible)\n"; 17703 } 17704 else { 17705 $is_pattern = 1; 17706 $msg .= 17707 "pattern (uncertain, but division would not work here)\n"; 17708 } 17709 } 17710 } 17711 } 17712 return ( $is_pattern, $msg ); 17713} 17714 17715sub find_here_doc { 17716 17717 # find the target of a here document, if any 17718 # input parameters: 17719 # $i - token index of the second < of << 17720 # ($i must be less than the last token index if this is called) 17721 # output parameters: 17722 # $found_target = 0 didn't find target; =1 found target 17723 # HERE_TARGET - the target string (may be empty string) 17724 # $i - unchanged if not here doc, 17725 # or index of the last token of the here target 17726 my ( $expecting, $i, $rtokens, $rtoken_map ) = @_; 17727 my $ibeg = $i; 17728 my $found_target = 0; 17729 my $here_doc_target = ''; 17730 my $here_quote_character = ''; 17731 my ( $next_nonblank_token, $i_next_nonblank, $next_token ); 17732 $next_token = $$rtokens[ $i + 1 ]; 17733 17734 # perl allows a backslash before the target string (heredoc.t) 17735 my $backslash = 0; 17736 if ( $next_token eq '\\' ) { 17737 $backslash = 1; 17738 $next_token = $$rtokens[ $i + 2 ]; 17739 } 17740 17741 ( $next_nonblank_token, $i_next_nonblank ) = 17742 find_next_nonblank_token_on_this_line( $i, $rtokens ); 17743 17744 if ( $next_nonblank_token =~ /[\'\"\`]/ ) { 17745 17746 my $in_quote = 1; 17747 my $quote_depth = 0; 17748 my $quote_pos = 0; 17749 17750 ( $i, $in_quote, $here_quote_character, $quote_pos, $quote_depth ) = 17751 follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens, 17752 $here_quote_character, $quote_pos, $quote_depth ); 17753 17754 if ($in_quote) { # didn't find end of quote, so no target found 17755 $i = $ibeg; 17756 } 17757 else { # found ending quote 17758 my $j; 17759 $found_target = 1; 17760 17761 my $tokj; 17762 for ( $j = $i_next_nonblank + 1 ; $j < $i ; $j++ ) { 17763 $tokj = $$rtokens[$j]; 17764 17765 # we have to remove any backslash before the quote character 17766 # so that the here-doc-target exactly matches this string 17767 next 17768 if ( $tokj eq "\\" 17769 && $j < $i - 1 17770 && $$rtokens[ $j + 1 ] eq $here_quote_character ); 17771 $here_doc_target .= $tokj; 17772 } 17773 } 17774 } 17775 17776 elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) { 17777 $found_target = 1; 17778 write_logfile_entry( 17779 "found blank here-target after <<; suggest using \"\"\n"); 17780 $i = $ibeg; 17781 } 17782 elsif ( $next_token =~ /^\w/ ) { # simple bareword or integer after << 17783 17784 my $here_doc_expected; 17785 if ( $expecting == UNKNOWN ) { 17786 $here_doc_expected = guess_if_here_doc($next_token); 17787 } 17788 else { 17789 $here_doc_expected = 1; 17790 } 17791 17792 if ($here_doc_expected) { 17793 $found_target = 1; 17794 $here_doc_target = $next_token; 17795 $i = $ibeg + 1; 17796 } 17797 17798 } 17799 else { 17800 17801 if ( $expecting == TERM ) { 17802 $found_target = 1; 17803 write_logfile_entry("Note: bare here-doc operator <<\n"); 17804 } 17805 else { 17806 $i = $ibeg; 17807 } 17808 } 17809 17810 # patch to neglect any prepended backslash 17811 if ( $found_target && $backslash ) { $i++ } 17812 17813 return ( $found_target, $here_doc_target, $here_quote_character, $i ); 17814} 17815 17816# try to resolve here-doc vs. shift by looking ahead for 17817# non-code or the end token (currently only looks for end token) 17818# returns 1 if it is probably a here doc, 0 if not 17819sub guess_if_here_doc { 17820 17821 # This is how many lines we will search for a target as part of the 17822 # guessing strategy. It is a constant because there is probably 17823 # little reason to change it. 17824 use constant HERE_DOC_WINDOW => 40; 17825 17826 my $next_token = shift; 17827 my $here_doc_expected = 0; 17828 my $line; 17829 my $k = 0; 17830 my $msg = "checking <<"; 17831 17832 while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $k++ ) ) 17833 { 17834 chomp $line; 17835 17836 if ( $line =~ /^$next_token$/ ) { 17837 $msg .= " -- found target $next_token ahead $k lines\n"; 17838 $here_doc_expected = 1; # got it 17839 last; 17840 } 17841 last if ( $k >= HERE_DOC_WINDOW ); 17842 } 17843 17844 unless ($here_doc_expected) { 17845 17846 if ( !defined($line) ) { 17847 $here_doc_expected = -1; # hit eof without seeing target 17848 $msg .= " -- must be shift; target $next_token not in file\n"; 17849 17850 } 17851 else { # still unsure..taking a wild guess 17852 17853 if ( !$is_constant{$current_package}{$next_token} ) { 17854 $here_doc_expected = 1; 17855 $msg .= 17856 " -- guessing it's a here-doc ($next_token not a constant)\n"; 17857 } 17858 else { 17859 $msg .= " -- guessing it's a shift\n"; 17860 $msg .= 17861 " -- guessing it's a shift ($next_token is a constant)\n"; 17862 } 17863 } 17864 } 17865 write_logfile_entry($msg); 17866 return $here_doc_expected; 17867} 17868 17869sub do_quote { 17870 17871 # follow (or continue following) quoted string or pattern 17872 # $in_quote return code: 17873 # 0 - ok, found end 17874 # 1 - still must find end of quote whose target is $quote_character 17875 # 2 - still looking for end of first of two quotes 17876 my ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth, $rtokens, 17877 $rtoken_map ) 17878 = @_; 17879 17880 if ( $in_quote == 2 ) { # two quotes/patterns to follow 17881 my $ibeg = $i; 17882 ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) = 17883 follow_quoted_string( $i, $in_quote, $rtokens, $quote_character, 17884 $quote_pos, $quote_depth ); 17885 17886 if ( $in_quote == 1 ) { 17887 if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; } 17888 $quote_character = ''; 17889 } 17890 } 17891 17892 if ( $in_quote == 1 ) { # one (more) quote to follow 17893 my $ibeg = $i; 17894 ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) = 17895 follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character, 17896 $quote_pos, $quote_depth ); 17897 } 17898 return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ); 17899} 17900 17901sub scan_number_do { 17902 17903=pod 17904 17905 scan a number in any of the formats that Perl accepts 17906 Underbars (_) are allowed in decimal numbers. 17907 input parameters - 17908 $input_line - the string to scan 17909 $i - pre_token index to start scanning 17910 $rtoken_map - reference to the pre_token map giving starting 17911 character position in $input_line of token $i 17912 output parameters - 17913 $i - last pre_token index of the number just scanned 17914 number - the number (characters); or undef if not a number 17915 17916=cut 17917 17918 my ( $input_line, $i, $rtoken_map, $input_type ) = @_; 17919 my $pos_beg = $$rtoken_map[$i]; 17920 my $pos; 17921 my $i_begin = $i; 17922 my $number = undef; 17923 my $type = $input_type; 17924 17925 my $first_char = substr( $input_line, $pos_beg, 1 ); 17926 17927 # Look for bad starting characters; Shouldn't happen.. 17928 if ( $first_char !~ /[\d\.\+\-Ee]/ ) { 17929 warning("Program bug - scan_number given character $first_char\n"); 17930 report_definite_bug(); 17931 return ( $i, $type, $number ); 17932 } 17933 17934 # handle v-string without leading 'v' character ('Two Dot' rule) 17935 # (vstring.t) 17936 pos($input_line) = $pos_beg; 17937 if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) { 17938 $pos = pos($input_line); 17939 my $numc = $pos - $pos_beg; 17940 $number = substr( $input_line, $pos_beg, $numc ); 17941 $type = 'v'; 17942 unless ($saw_v_string) { report_v_string($number) } 17943 } 17944 17945 # handle octal, hex, binary 17946 if ( !defined($number) ) { 17947 pos($input_line) = $pos_beg; 17948 if ( $input_line =~ /\G[+-]?0((x[0-9a-fA-F_]+)|([0-7_]+)|(b[01_]+))/g ) 17949 { 17950 $pos = pos($input_line); 17951 my $numc = $pos - $pos_beg; 17952 $number = substr( $input_line, $pos_beg, $numc ); 17953 $type = 'n'; 17954 } 17955 } 17956 17957 # handle decimal 17958 if ( !defined($number) ) { 17959 pos($input_line) = $pos_beg; 17960 17961 if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) { 17962 $pos = pos($input_line); 17963 17964 # watch out for things like 0..40 which would give 0. by this; 17965 if ( ( substr( $input_line, $pos - 1, 1 ) eq '.' ) 17966 && ( substr( $input_line, $pos, 1 ) eq '.' ) ) 17967 { 17968 $pos--; 17969 } 17970 my $numc = $pos - $pos_beg; 17971 $number = substr( $input_line, $pos_beg, $numc ); 17972 $type = 'n'; 17973 } 17974 } 17975 17976 # filter out non-numbers like e + - . e2 .e3 +e6 17977 # the rule: at least one digit, and any 'e' must be preceded by a digit 17978 if ( $number !~ /\d+[eE]?/ ) { 17979 $number = undef; 17980 $type = $input_type; 17981 return ( $i, $type, $number ); 17982 } 17983 17984 # Found a number; now we must convert back from character position 17985 # to pre_token index. An error here implies user syntax error. 17986 # An example would be an invalid octal number like '009'. 17987 my $error; 17988 ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map ); 17989 if ($error) { warning("Possibly invalid number\n") } 17990 17991 return ( $i, $type, $number ); 17992} 17993 17994sub scan_bare_identifier_do { 17995 17996 # this routine is called to scan a token starting with an alphanumeric 17997 # variable or package separator, :: or '. 17998 17999 my ( $input_line, $i, $tok, $type, $prototype, $rtoken_map ) = @_; 18000 my $i_begin = $i; 18001 my $package = undef; 18002 18003 my $i_beg = $i; 18004 18005 # we have to back up one pretoken at a :: since each : is one pretoken 18006 if ( $tok eq '::' ) { $i_beg-- } 18007 if ( $tok eq '->' ) { $i_beg-- } 18008 my $pos_beg = $$rtoken_map[$i_beg]; 18009 pos($input_line) = $pos_beg; 18010 18011 if ( $input_line =~ m/\G\s*((?:\w*(?:'|::)))*(?:->)?(\w+)/gc ) { 18012 18013 my $pos = pos($input_line); 18014 my $numc = $pos - $pos_beg; 18015 $tok = substr( $input_line, $pos_beg, $numc ); 18016 18017 # type 'w' includes anything without leading type info 18018 # ($,%,@,*) including something like abc::def::ghi 18019 $type = 'w'; 18020 18021 if ( defined($1) ) { 18022 $package = $1; 18023 $package =~ s/\'/::/g; 18024 if ( $package =~ /^\:/ ) { $package = 'main' . $package } 18025 $package =~ s/::$//; 18026 } 18027 else { 18028 $package = $current_package; 18029 18030 if ( $is_keyword{$tok} ) { 18031 $type = 'k'; 18032 } 18033 } 18034 my $sub_name = $2; 18035 18036 # if it is a bareword.. 18037 if ( $type eq 'w' ) { 18038 18039 # check for v-string with leading 'v' type character 18040 # (This seems to have presidence over filehandle, type 'Y') 18041 if ( $tok =~ /^v\d+$/ ) { 18042 18043 # we only have the first part - something like 'v101' - 18044 # look for more 18045 if ( $input_line =~ m/\G(\.\d+)+/gc ) { 18046 $pos = pos($input_line); 18047 $numc = $pos - $pos_beg; 18048 $tok = substr( $input_line, $pos_beg, $numc ); 18049 } 18050 $type = 'v'; 18051 18052 # warn if this version can't handle v-strings 18053 unless ($saw_v_string) { report_v_string($tok) } 18054 } 18055 18056 elsif ( $is_constant{$package}{$sub_name} ) { 18057 $type = 'C'; 18058 } 18059 18060 # bareword after sort has implied empty prototype; for example: 18061 # @sorted = sort numerically ( 53, 29, 11, 32, 7 ); 18062 # This has priority over whatever the user has specified. 18063 elsif ( $last_nonblank_token eq 'sort' 18064 && $last_nonblank_type eq 'k' ) 18065 { 18066 $type = 'Z'; 18067 } 18068 18069 # Note: strangely, perl does not seem to really let you create 18070 # functions which act like eval and do, in the sense that eval 18071 # and do may have operators following the final }, but any operators 18072 # that you create with prototype (&) apparently do not allow 18073 # trailing operators, only terms. This seems strange. 18074 # If this ever changes, here is the update 18075 # to make perltidy behave accordingly: 18076 18077 # elsif ( $is_block_function{$package}{$tok} ) { 18078 # $tok='eval'; # patch to do braces like eval - doesn't work 18079 # $type = 'k'; 18080 #} 18081 # FIXME: This should become a separate type to allow for different 18082 # future behavior: 18083 elsif ( $is_block_function{$package}{$sub_name} ) { 18084 $type = 'G'; 18085 } 18086 18087 elsif ( $is_block_list_function{$package}{$sub_name} ) { 18088 $type = 'G'; 18089 } 18090 elsif ( $is_user_function{$package}{$sub_name} ) { 18091 $type = 'U'; 18092 $prototype = $user_function_prototype{$package}{$sub_name}; 18093 } 18094 18095 # check for indirect object 18096 elsif ( 18097 18098 # added 2001-03-27: must not be followed immediately by '(' 18099 # see fhandle.t 18100 ( $input_line !~ m/\G\(/gc ) 18101 18102 # and 18103 && ( 18104 18105 # preceded by keyword like 'print', 'printf' and friends 18106 ( $last_nonblank_token =~ /$indirect_object_taker/ ) 18107 18108 # or preceded by something like 'print(' or 'printf(' 18109 || ( 18110 ( $last_nonblank_token eq '(' ) 18111 && ( $paren_type[$paren_depth] =~ 18112 /$indirect_object_taker/ ) 18113 ) 18114 ) 18115 ) 18116 { 18117 18118 # may not be indirect object unless followed by a space 18119 if ( $input_line =~ m/\G\s+/gc ) { 18120 $type = 'Y'; 18121 18122 # Abandon Hope ... 18123 # Perl's indirect object notation is a very bad 18124 # thing and can cause subtle bugs, especially for 18125 # beginning programmers. And I haven't even been 18126 # able to figure out a sane warning scheme which 18127 # doesn't get in the way of good scripts. 18128 18129 # Complain if a filehandle has any lower case 18130 # letters. This is suggested good practice, but the 18131 # main reason for this warning is that prior to 18132 # release 20010328, perltidy incorrectly parsed a 18133 # function call after a print/printf, with the 18134 # result that a space got added before the opening 18135 # paren, thereby converting the function name to a 18136 # filehandle according to perl's weird rules. This 18137 # will not usually generate a syntax error, so this 18138 # is a potentially serious bug. By warning 18139 # of filehandles with any lower case letters, 18140 # followed by opening parens, we will help the user 18141 # find almost all of these older errors. 18142 # use 'sub_name' because something like 18143 # main::MYHANDLE is ok for filehandle 18144 if ( $sub_name =~ /[a-z]/ ) { 18145 18146 # could be bug caused by older perltidy if 18147 # followed by '(' 18148 if ( $input_line =~ m/\G\s*\(/gc ) { 18149 complain( 18150"Caution: unknown word '$tok' in indirect object slot\n" 18151 ); 18152 } 18153 } 18154 } 18155 18156 # bareword not followed by a space -- may not be filehandle 18157 # (may be function call defined in a 'use' statement) 18158 else { 18159 $type = 'Z'; 18160 } 18161 } 18162 } 18163 18164 # Now we must convert back from character position 18165 # to pre_token index. 18166 # I don't think an error flag can occur here ..but who knows 18167 my $error; 18168 ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map ); 18169 if ($error) { 18170 warning("scan_bare_identifier: Possibly invalid tokenization\n"); 18171 } 18172 } 18173 18174 # no match but line not blank - could be syntax error 18175 # perl will take '::' alone without complaint 18176 else { 18177 $type = 'w'; 18178 18179 # change this warning to log message if it becomes annoying 18180 warning("didn't find identifier after leading ::\n"); 18181 } 18182 return ( $i, $tok, $type, $prototype ); 18183} 18184 18185sub scan_id_do { 18186 18187=pod 18188 18189This is the new scanner and will eventually replace scan_identifier. 18190Only type 'sub' and 'package' are implemented. 18191Token types $ * % @ & -> are not yet implemented. 18192 18193Scan identifier following a type token. 18194The type of call depends on $id_scan_state: $id_scan_state = '' 18195for starting call, in which case $tok must be the token defining 18196the type. 18197 18198If the type token is the last nonblank token on the line, a value 18199of $id_scan_state = $tok is returned, indicating that further 18200calls must be made to get the identifier. If the type token is 18201not the last nonblank token on the line, the identifier is 18202scanned and handled and a value of '' is returned. 18203 18204=cut 18205 18206 my ( $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state ) = @_; 18207 my $type = ''; 18208 my ( $i_beg, $pos_beg ); 18209 18210 #print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n"; 18211 #my ($a,$b,$c) = caller; 18212 #print "NSCAN: scan_id called with tok=$tok $a $b $c\n"; 18213 18214 # on re-entry, start scanning at first token on the line 18215 if ($id_scan_state) { 18216 $i_beg = $i; 18217 $type = ''; 18218 } 18219 18220 # on initial entry, start scanning just after type token 18221 else { 18222 $i_beg = $i + 1; 18223 $id_scan_state = $tok; 18224 $type = 't'; 18225 } 18226 18227 # find $i_beg = index of next nonblank token, 18228 # and handle empty lines 18229 my $blank_line = 0; 18230 my $next_nonblank_token = $$rtokens[$i_beg]; 18231 if ( $i_beg > $max_token_index ) { 18232 $blank_line = 1; 18233 } 18234 else { 18235 18236 # only a '#' immediately after a '$' is not a comment 18237 if ( $next_nonblank_token eq '#' ) { 18238 unless ( $tok eq '$' ) { 18239 $blank_line = 1; 18240 } 18241 } 18242 18243 if ( $next_nonblank_token =~ /^\s/ ) { 18244 ( $next_nonblank_token, $i_beg ) = 18245 find_next_nonblank_token_on_this_line( $i_beg, $rtokens ); 18246 if ( $next_nonblank_token =~ /(^#|^\s*$)/ ) { 18247 $blank_line = 1; 18248 } 18249 } 18250 } 18251 18252 # handle non-blank line; identifier, if any, must follow 18253 unless ($blank_line) { 18254 18255 if ( $id_scan_state eq 'sub' ) { 18256 ( $i, $tok, $type ) = 18257 do_scan_sub( $input_line, $i, $i_beg, $tok, $type, $rtokens, 18258 $rtoken_map ); 18259 } 18260 18261 elsif ( $id_scan_state eq 'package' ) { 18262 ( $i, $tok, $type ) = 18263 do_scan_package( $input_line, $i, $i_beg, $tok, $type, $rtokens, 18264 $rtoken_map ); 18265 } 18266 18267 else { 18268 warning("invalid token in scan_id: $tok\n"); 18269 } 18270 $id_scan_state = ''; 18271 } 18272 18273 if ( $id_scan_state && ( !defined($type) || !$type ) ) { 18274 18275 # shouldn't happen: 18276 warning( 18277"Program bug in scan_id: undefined type but scan_state=$id_scan_state\n" 18278 ); 18279 report_definite_bug(); 18280 } 18281 18282 TOKENIZER_DEBUG_FLAG_NSCAN && do { 18283 print 18284 "NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n"; 18285 }; 18286 return ( $i, $tok, $type, $id_scan_state ); 18287} 18288 18289sub do_scan_sub { 18290 18291 # do_scan_sub parses a sub name and prototype 18292 # it is called with $i_beg equal to the index of the first nonblank 18293 # token following a 'sub' token. 18294 18295 # TODO: add future error checks to be sure we have a valid 18296 # sub name. For example, 'sub &doit' is wrong. Also, be sure 18297 # a name is given if and only if a non-anonymous sub is 18298 # appropriate. 18299 18300 my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map ) = @_; 18301 my $subname = undef; 18302 my $package = undef; 18303 my $proto = undef; 18304 my $attrs = undef; 18305 18306 my $pos_beg = $$rtoken_map[$i_beg]; 18307 pos($input_line) = $pos_beg; 18308 18309 # sub NAME PROTO ATTRS BLOCK 18310 #if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*)(\w+)(\s*\([^){]*\))?/gc ) { 18311 if ( 18312 $input_line =~ m/\G\s* 18313 ((?:\w*(?:'|::))*) # package - something that ends in :: or ' 18314 (\w+) # NAME - required 18315 (\s*\([^){]*\))? # PROTO - something in parens 18316 (\s*:(\s*(\w+))+)? # ATTRS - leading : followed by one or more words 18317 /gcx 18318 ) 18319 { 18320 $subname = $2; 18321 $proto = $3; 18322 $attrs = $4; 18323 18324 if ($attrs) { 18325 18326 # unused for now 18327 18328 } 18329 $package = ( defined($1) && $1 ) ? $1 : $current_package; 18330 $package =~ s/\'/::/g; 18331 if ( $package =~ /^\:/ ) { $package = 'main' . $package } 18332 $package =~ s/::$//; 18333 my $pos = pos($input_line); 18334 my $numc = $pos - $pos_beg; 18335 $tok = 'sub ' . substr( $input_line, $pos_beg, $numc ); 18336 $type = 'i'; 18337 18338 # We must convert back from character position 18339 # to pre_token index. 18340 # I don't think an error flag can occur here ..but ? 18341 my $error; 18342 ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map ); 18343 if ($error) { warning("Possibly invalid sub\n") } 18344 18345 # TESTING: check for multiple definitions of a sub 18346 my ( $next_nonblank_token, $i_next ) = 18347 find_next_nonblank_token_on_this_line( $i, $rtokens ); 18348 18349 if ( $next_nonblank_token =~ /^(\s*|#)$/ ) 18350 { # skip blank or side comment 18351 my ( $rpre_tokens, $rpre_types ) = 18352 peek_ahead_for_n_nonblank_pre_tokens(1); 18353 if ( defined($rpre_tokens) && @$rpre_tokens ) { 18354 $next_nonblank_token = $rpre_tokens->[0]; 18355 } 18356 else { 18357 $next_nonblank_token = '}'; 18358 } 18359 } 18360 18361 if ( $next_nonblank_token eq '{' ) { 18362 if ( $saw_function_definition{$package}{$subname} ) { 18363 my $lno = $saw_function_definition{$package}{$subname}; 18364 warning( 18365"already saw definition of 'sub $subname' in package '$package' at line $lno\n" 18366 ); 18367 } 18368 $saw_function_definition{$package}{$subname} = $input_line_number; 18369 } 18370 elsif ( $next_nonblank_token eq ';' ) { 18371 } 18372 elsif ( $next_nonblank_token eq '}' ) { 18373 } 18374 elsif ($next_nonblank_token) { # EOF technically ok 18375 warning( 18376"expecting ';' or '{' after definition or declaration of sub $subname but saw ($next_nonblank_token)\n" 18377 ); 18378 18379 } 18380 18381 if ( defined($proto) ) { 18382 $proto =~ s/^\s*\(\s*//; 18383 $proto =~ s/\s*\)$//; 18384 if ($proto) { 18385 $is_user_function{$package}{$subname} = 1; 18386 $user_function_prototype{$package}{$subname} = "($proto)"; 18387 18388 # prototypes containing '&' must be treated specially.. 18389 if ( $proto =~ /\&/ ) { 18390 18391 # right curly braces of prototypes ending in 18392 # '&' may be followed by an operator 18393 if ( $proto =~ /\&$/ ) { 18394 $is_block_function{$package}{$subname} = 1; 18395 } 18396 18397 # right curly braces of prototypes NOT ending in 18398 # '&' may NOT be followed by an operator 18399 elsif ( $proto !~ /\&$/ ) { 18400 $is_block_list_function{$package}{$subname} = 1; 18401 } 18402 } 18403 } 18404 else { 18405 $is_constant{$package}{$subname} = 1; 18406 } 18407 } 18408 else { 18409 $is_user_function{$package}{$subname} = 1; 18410 } 18411 18412 } 18413 18414 # look for prototype following an anonymous sub so they don't get 18415 # stranded. ( sub.t ) 18416 #elsif ( $input_line =~ m/\G\s*\([^){]*\)/gc ) 18417 # sub PROTO ATTRS BLOCK 18418 elsif ( 18419 $input_line =~ m/\G(\s*\([^){]*\))? # PROTO 18420 (\s*:(\s*(\w+))+)? # ATTRS 18421 /gcx 18422 && ( $1 || $2 ) 18423 ) 18424 { 18425 18426 # remove this after testing 18427 if ($2) { write_diagnostics("Found anonymous sub ATTRS $2 \n"); } 18428 my $pos = pos($input_line); 18429 my $error; 18430 ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map ); 18431 if ($error) { warning("Possibly invalid sub\n") } 18432 } 18433 18434 # no match but line not blank 18435 else { 18436 } 18437 return ( $i, $tok, $type ); 18438} 18439 18440sub do_scan_package { 18441 18442 # do_scan_package parses a package name 18443 # it is called with $i_beg equal to the index of the first nonblank 18444 # token following a 'package' token. 18445 18446 my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map ) = @_; 18447 my $package = undef; 18448 my $pos_beg = $$rtoken_map[$i_beg]; 18449 pos($input_line) = $pos_beg; 18450 18451 # handle non-blank line; package name, if any, must follow 18452 if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w+)/gc ) { 18453 $package = $1; 18454 $package = ( defined($1) && $1 ) ? $1 : 'main'; 18455 $package =~ s/\'/::/g; 18456 if ( $package =~ /^\:/ ) { $package = 'main' . $package } 18457 $package =~ s/::$//; 18458 my $pos = pos($input_line); 18459 my $numc = $pos - $pos_beg; 18460 $tok = 'package ' . substr( $input_line, $pos_beg, $numc ); 18461 $type = 'i'; 18462 18463 # Now we must convert back from character position 18464 # to pre_token index. 18465 # I don't think an error flag can occur here ..but ? 18466 my $error; 18467 ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map ); 18468 if ($error) { warning("Possibly invalid package\n") } 18469 $current_package = $package; 18470 18471 # check for error 18472 my ( $next_nonblank_token, $i_next ) = 18473 find_next_nonblank_token( $i, $rtokens ); 18474 if ( $next_nonblank_token !~ /^[;\}]$/ ) { 18475 warning( 18476 "Unexpected '$next_nonblank_token' after package name '$tok'\n" 18477 ); 18478 } 18479 } 18480 18481 # no match but line not blank -- 18482 # could be a label with name package, like package: , for example. 18483 else { 18484 $type = 'k'; 18485 } 18486 18487 return ( $i, $tok, $type ); 18488} 18489 18490sub scan_identifier_do { 18491 18492=pod 18493 18494NOTE: This develomental scanner WILL BE REPLACED by the newer version 18495"scan_id". The reason is that scan_id will be regex based, which makes 18496maintainence much easier, and probably improves the speed. 18497 18498This routine assembles tokens into identifiers. 18499It maintains a scan state, id_scan_state. It updates 18500id_scan_state based upon current id_scan_state and token, and returns an 18501updated id_scan_state and the next index after the identifier. 18502 18503=cut 18504 18505 my ( $i, $id_scan_state, $identifier, $rtokens ) = @_; 18506 my $i_begin = $i; 18507 my $type = ''; 18508 my $tok_begin = $$rtokens[$i_begin]; 18509 if ( $tok_begin eq ':' ) { $tok_begin = '::' } 18510 my $id_scan_state_begin = $id_scan_state; 18511 my $identifier_begin = $identifier; 18512 my $tok = $tok_begin; 18513 my $message = ""; 18514 18515 # these flags will be used to help figure out the type: 18516 my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ ); 18517 my $saw_type; 18518 18519 # allow old package separator (') except in 'use' statement 18520 my $allow_tick = ( $last_nonblank_token ne 'use' ); 18521 18522 # get started by defining a type and a state if necessary 18523 unless ($id_scan_state) { 18524 $context = UNKNOWN_CONTEXT; 18525 18526 # fixup for digraph 18527 if ( $tok eq '>' ) { 18528 $tok = '->'; 18529 $tok_begin = $tok; 18530 } 18531 $identifier = $tok; 18532 18533 if ( $tok eq '$' || $tok eq '*' ) { 18534 $id_scan_state = '$'; 18535 $context = SCALAR_CONTEXT; 18536 } 18537 elsif ( $tok eq '%' || $tok eq '@' ) { 18538 $id_scan_state = '$'; 18539 $context = LIST_CONTEXT; 18540 } 18541 elsif ( $tok eq '&' ) { 18542 $id_scan_state = '&'; 18543 } 18544 elsif ( $tok eq 'sub' or $tok eq 'package' ) { 18545 $saw_alpha = 0; # 'sub' is considered type info here 18546 $id_scan_state = '$'; 18547 $identifier .= ' '; # need a space to separate sub from sub name 18548 } 18549 elsif ( $tok eq '::' ) { 18550 $id_scan_state = 'A'; 18551 } 18552 elsif ( $tok =~ /^[A-Za-z_]/ ) { 18553 $id_scan_state = ':'; 18554 } 18555 elsif ( $tok eq '->' ) { 18556 $id_scan_state = '$'; 18557 } 18558 else { 18559 18560 # shouldn't happen 18561 my ( $a, $b, $c ) = caller; 18562 warning("Program Bug: scan_identifier given bad token = $tok \n"); 18563 warning(" called from sub $a line: $c\n"); 18564 report_definite_bug(); 18565 } 18566 $saw_type = !$saw_alpha; 18567 } 18568 else { 18569 $i--; 18570 $saw_type = ( $tok =~ /([\$\%\@\*\&])/ ); 18571 } 18572 18573 # now loop to gather the identifier 18574 my $i_save = $i; 18575 18576 while ( $i < $max_token_index ) { 18577 $i_save = $i unless ( $tok =~ /^\s*$/ ); 18578 $tok = $$rtokens[ ++$i ]; 18579 18580 if ( ( $tok eq ':' ) && ( $$rtokens[ $i + 1 ] eq ':' ) ) { 18581 $tok = '::'; 18582 $i++; 18583 } 18584 18585 if ( $id_scan_state eq '$' ) { # starting variable name 18586 18587 if ( $tok eq '$' ) { 18588 18589 $identifier .= $tok; 18590 18591 # we've got a punctuation variable if end of line (punct.t) 18592 if ( $i == $max_token_index ) { 18593 $type = 'i'; 18594 $id_scan_state = ''; 18595 last; 18596 } 18597 } 18598 elsif ( $tok =~ /^[A-Za-z_]/ ) { # alphanumeric .. 18599 $saw_alpha = 1; 18600 $id_scan_state = ':'; # now need :: 18601 $identifier .= $tok; 18602 } 18603 elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric .. 18604 $saw_alpha = 1; 18605 $id_scan_state = ':'; # now need :: 18606 $identifier .= $tok; 18607 18608 # Perl will accept leading digits in identifiers, 18609 # although they may not always produce useful results. 18610 # Something like $main::0 is ok. But this also works: 18611 # 18612 # sub howdy::123::bubba{ print "bubba $54321!\n" } 18613 # howdy::123::bubba(); 18614 # 18615 } 18616 elsif ( $tok =~ /^[0-9]/ ) { # numeric 18617 $saw_alpha = 1; 18618 $id_scan_state = ':'; # now need :: 18619 $identifier .= $tok; 18620 } 18621 elsif ( $tok eq '::' ) { 18622 $id_scan_state = 'A'; 18623 $identifier .= $tok; 18624 } 18625 elsif ( ( $tok eq '#' ) && ( $identifier eq '$' ) ) { # $#array 18626 $identifier .= $tok; # keep same state, a $ could follow 18627 } 18628 elsif ( $tok eq '{' ) { # skip something like ${xxx} or ->{ 18629 $id_scan_state = ''; 18630 18631 # if this is the first token of a line, any tokens for this 18632 # identifier have already been accumulated 18633 if ( $identifier eq '$' || $i == 0 ) { $identifier = ''; } 18634 $i = $i_save; 18635 last; 18636 } 18637 18638 # space ok after leading $ % * & @ 18639 elsif ( $tok =~ /^\s*$/ ) { 18640 18641 if ( $identifier =~ /^[\$\%\*\&\@]/ ) { 18642 18643 if ( length($identifier) > 1 ) { 18644 $id_scan_state = ''; 18645 $i = $i_save; 18646 $type = 'i'; # probably punctuation variable 18647 last; 18648 } 18649 else { 18650 18651 # spaces after $'s are common, and space after @ 18652 # is harmless, so only complain about space 18653 # after other type characters. Space after $ and 18654 # @ will be removed in formatting. Report space 18655 # after % and * because they might indicate a 18656 # parsing error. In other words '% ' might be a 18657 # modulo operator. Delete this warning if it 18658 # gets annoying. 18659 if ( $identifier !~ /^[\@\$]$/ ) { 18660 $message = 18661 "Space in identifier, following $identifier\n"; 18662 } 18663 } 18664 } 18665 18666 # else: 18667 # space after '->' is ok 18668 } 18669 elsif ( $tok eq '^' ) { 18670 18671 # check for some special variables like $^W 18672 if ( $identifier =~ /^[\$\*\@\%]$/ ) { 18673 $identifier .= $tok; 18674 $id_scan_state = 'A'; 18675 } 18676 else { 18677 $id_scan_state = ''; 18678 } 18679 } 18680 else { # something else 18681 18682 # check for various punctuation variables 18683 if ( $identifier =~ /^[\$\*\@\%]$/ ) { 18684 $identifier .= $tok; 18685 } 18686 18687 elsif ( $identifier eq '$#' ) { 18688 18689 if ( $tok eq '{' ) { $type = 'i'; $i = $i_save } 18690 18691 # perl seems to allow just these: $#: $#- $#+ 18692 elsif ( $tok =~ /^[\:\-\+]$/ ) { 18693 $type = 'i'; 18694 $identifier .= $tok; 18695 } 18696 else { 18697 $i = $i_save; 18698 write_logfile_entry( 'Use of $# is deprecated' . "\n" ); 18699 } 18700 } 18701 elsif ( $identifier eq '$$' ) { 18702 18703 # perl does not allow references to punctuation 18704 # variables without braces. For example, this 18705 # won't work: 18706 # $:=\4; 18707 # $a = $$:; 18708 # You would have to use 18709 # $a = ${$:}; 18710 18711 $i = $i_save; 18712 if ( $tok eq '{' ) { $type = 't' } 18713 else { $type = 'i' } 18714 } 18715 elsif ( $identifier eq '->' ) { 18716 $i = $i_save; 18717 } 18718 else { 18719 $i = $i_save; 18720 if ( length($identifier) == 1 ) { $identifier = ''; } 18721 } 18722 $id_scan_state = ''; 18723 last; 18724 } 18725 } 18726 elsif ( $id_scan_state eq '&' ) { # starting sub call? 18727 18728 if ( $tok =~ /^[\$A-Za-z_]/ ) { # alphanumeric .. 18729 $id_scan_state = ':'; # now need :: 18730 $saw_alpha = 1; 18731 $identifier .= $tok; 18732 } 18733 elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric .. 18734 $id_scan_state = ':'; # now need :: 18735 $saw_alpha = 1; 18736 $identifier .= $tok; 18737 } 18738 elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above 18739 $id_scan_state = ':'; # now need :: 18740 $saw_alpha = 1; 18741 $identifier .= $tok; 18742 } 18743 elsif ( $tok =~ /^\s*$/ ) { # allow space 18744 } 18745 elsif ( $tok eq '::' ) { # leading :: 18746 $id_scan_state = 'A'; # accept alpha next 18747 $identifier .= $tok; 18748 } 18749 elsif ( $tok eq '{' ) { 18750 if ( $identifier eq '&' || $i == 0 ) { $identifier = ''; } 18751 $i = $i_save; 18752 $id_scan_state = ''; 18753 last; 18754 } 18755 else { 18756 18757 # punctuation variable? 18758 # testfile: cunningham4.pl 18759 if ( $identifier eq '&' ) { 18760 $identifier .= $tok; 18761 } 18762 else { 18763 $identifier = ''; 18764 $i = $i_save; 18765 $type = '&'; 18766 } 18767 $id_scan_state = ''; 18768 last; 18769 } 18770 } 18771 elsif ( $id_scan_state eq 'A' ) { # looking for alpha (after ::) 18772 18773 if ( $tok =~ /^[A-Za-z_]/ ) { # found it 18774 $identifier .= $tok; 18775 $id_scan_state = ':'; # now need :: 18776 $saw_alpha = 1; 18777 } 18778 elsif ( $tok eq "'" && $allow_tick ) { 18779 $identifier .= $tok; 18780 $id_scan_state = ':'; # now need :: 18781 $saw_alpha = 1; 18782 } 18783 elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above 18784 $identifier .= $tok; 18785 $id_scan_state = ':'; # now need :: 18786 $saw_alpha = 1; 18787 } 18788 elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) { 18789 $id_scan_state = '('; 18790 $identifier .= $tok; 18791 } 18792 elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) { 18793 $id_scan_state = ')'; 18794 $identifier .= $tok; 18795 } 18796 else { 18797 $id_scan_state = ''; 18798 $i = $i_save; 18799 last; 18800 } 18801 } 18802 elsif ( $id_scan_state eq ':' ) { # looking for :: after alpha 18803 18804 if ( $tok eq '::' ) { # got it 18805 $identifier .= $tok; 18806 $id_scan_state = 'A'; # now require alpha 18807 } 18808 elsif ( $tok =~ /^[A-Za-z_]/ ) { # more alphanumeric is ok here 18809 $identifier .= $tok; 18810 $id_scan_state = ':'; # now need :: 18811 $saw_alpha = 1; 18812 } 18813 elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above 18814 $identifier .= $tok; 18815 $id_scan_state = ':'; # now need :: 18816 $saw_alpha = 1; 18817 } 18818 elsif ( $tok eq "'" && $allow_tick ) { # tick 18819 18820 if ( $is_keyword{$identifier} ) { 18821 $id_scan_state = ''; # that's all 18822 $i = $i_save; 18823 } 18824 else { 18825 $identifier .= $tok; 18826 } 18827 } 18828 elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) { 18829 $id_scan_state = '('; 18830 $identifier .= $tok; 18831 } 18832 elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) { 18833 $id_scan_state = ')'; 18834 $identifier .= $tok; 18835 } 18836 else { 18837 $id_scan_state = ''; # that's all 18838 $i = $i_save; 18839 last; 18840 } 18841 } 18842 elsif ( $id_scan_state eq '(' ) { # looking for ( of prototype 18843 18844 if ( $tok eq '(' ) { # got it 18845 $identifier .= $tok; 18846 $id_scan_state = ')'; # now find the end of it 18847 } 18848 elsif ( $tok =~ /^\s*$/ ) { # blank - keep going 18849 $identifier .= $tok; 18850 } 18851 else { 18852 $id_scan_state = ''; # that's all - no prototype 18853 $i = $i_save; 18854 last; 18855 } 18856 } 18857 elsif ( $id_scan_state eq ')' ) { # looking for ) to end 18858 18859 if ( $tok eq ')' ) { # got it 18860 $identifier .= $tok; 18861 $id_scan_state = ''; # all done 18862 last; 18863 } 18864 elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) { 18865 $identifier .= $tok; 18866 } 18867 else { # probable error in script, but keep going 18868 warning("Unexpected '$tok' while seeking end of prototype\n"); 18869 $identifier .= $tok; 18870 } 18871 } 18872 else { # can get here due to error in initialization 18873 $id_scan_state = ''; 18874 $i = $i_save; 18875 last; 18876 } 18877 } 18878 18879 if ( $id_scan_state eq ')' ) { 18880 warning("Hit end of line while seeking ) to end prototype\n"); 18881 } 18882 18883 # once we enter the actual identifier, it may not extend beyond 18884 # the end of the current line 18885 if ( $id_scan_state =~ /^[A\:\(\)]/ ) { 18886 $id_scan_state = ''; 18887 } 18888 if ( $i < 0 ) { $i = 0 } 18889 18890 unless ($type) { 18891 18892 if ($saw_type) { 18893 18894 if ($saw_alpha) { 18895 $type = 'i'; 18896 } 18897 elsif ( $identifier eq '->' ) { 18898 $type = '->'; 18899 } 18900 elsif ( ( length($identifier) > 1 ) 18901 && ( $identifier !~ /\$$/ ) 18902 && ( $identifier !~ /^(sub |package )$/ ) ) 18903 { 18904 $type = 'i'; 18905 } 18906 else { $type = 't' } 18907 } 18908 elsif ($saw_alpha) { 18909 18910 # type 'w' includes anything without leading type info 18911 # ($,%,@,*) including something like abc::def::ghi 18912 $type = 'w'; 18913 } 18914 else { 18915 $type = ''; 18916 } # this can happen on a restart 18917 } 18918 18919 if ($identifier) { 18920 $tok = $identifier; 18921 if ($message) { write_logfile_entry($message) } 18922 } 18923 else { 18924 $tok = $tok_begin; 18925 $i = $i_begin; 18926 } 18927 18928 TOKENIZER_DEBUG_FLAG_SCAN_ID && do { 18929 my ( $a, $b, $c ) = caller; 18930 print 18931"SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n"; 18932 print 18933"SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n"; 18934 }; 18935 return ( $i, $tok, $type, $id_scan_state, $identifier ); 18936} 18937 18938sub follow_quoted_string { 18939 18940 # scan for a specific token, skipping escaped characters 18941 # if the quote character is blank, use the first non-blank character 18942 # input parameters: 18943 # $rtokens = reference to the array of tokens 18944 # $i = the token index of the first character to search 18945 # $in_quote = number of quoted strings being followed 18946 # $beginning_tok = the starting quote character 18947 # $quote_pos = index to check next for alphanumeric delimiter 18948 # output parameters: 18949 # $i = the token index of the ending quote character 18950 # $in_quote = decremented if found end, unchanged if not 18951 # $beginning_tok = the starting quote character 18952 # $quote_pos = index to check next for alphanumeric delimiter 18953 # $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested. 18954 my ( $i_beg, $in_quote, $rtokens, $beginning_tok, $quote_pos, $quote_depth ) 18955 = @_; 18956 my ( $tok, $end_tok ); 18957 my $i = $i_beg - 1; 18958 18959 TOKENIZER_DEBUG_FLAG_QUOTE && do { 18960 print 18961"QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n"; 18962 }; 18963 18964 # get the corresponding end token 18965 if ( $beginning_tok !~ /^\s*$/ ) { 18966 $end_tok = matching_end_token($beginning_tok); 18967 } 18968 18969 # a blank token means we must find and use the first non-blank one 18970 else { 18971 my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a <cr> 18972 18973 while ( $i < $max_token_index ) { 18974 $tok = $$rtokens[ ++$i ]; 18975 18976 if ( $tok !~ /^\s*$/ ) { 18977 18978 if ( ( $tok eq '#' ) && ($allow_quote_comments) ) { 18979 $i = $max_token_index; 18980 } 18981 else { 18982 18983 if ( length($tok) > 1 ) { 18984 if ( $quote_pos <= 0 ) { $quote_pos = 1 } 18985 $beginning_tok = substr( $tok, $quote_pos - 1, 1 ); 18986 } 18987 else { 18988 $beginning_tok = $tok; 18989 $quote_pos = 0; 18990 } 18991 $end_tok = matching_end_token($beginning_tok); 18992 $quote_depth = 1; 18993 last; 18994 } 18995 } 18996 else { 18997 $allow_quote_comments = 1; 18998 } 18999 } 19000 } 19001 19002 # There are two different loops which search for the ending quote 19003 # character. In the rare case of an alphanumeric quote delimiter, we 19004 # have to look through alphanumeric tokens character-by-character, since 19005 # the pre-tokenization process combines multiple alphanumeric 19006 # characters, whereas for a non-alphanumeric delimiter, only tokens of 19007 # length 1 can match. 19008 19009 # loop for case of alphanumeric quote delimiter.. 19010 # "quote_pos" is the position the current word to begin searching 19011 if ( $beginning_tok =~ /\w/ ) { 19012 19013 # Note this because it is not recommended practice except 19014 # for obfuscated perl contests 19015 if ( $in_quote == 1 ) { 19016 write_logfile_entry( 19017 "Note: alphanumeric quote delimiter ($beginning_tok) \n"); 19018 } 19019 19020 while ( $i < $max_token_index ) { 19021 19022 if ( $quote_pos == 0 || ( $i < 0 ) ) { 19023 $tok = $$rtokens[ ++$i ]; 19024 19025 if ( $tok eq '\\' ) { 19026 19027 $quote_pos++; 19028 last if ( $i >= $max_token_index ); 19029 $tok = $$rtokens[ ++$i ]; 19030 19031 } 19032 } 19033 my $old_pos = $quote_pos; 19034 19035 unless ( defined($tok) && defined($end_tok) && defined($quote_pos) ) 19036 { 19037 19038 } 19039 $quote_pos = 1 + index( $tok, $end_tok, $quote_pos ); 19040 19041 if ( $quote_pos > 0 ) { 19042 19043 $quote_depth--; 19044 19045 if ( $quote_depth == 0 ) { 19046 $in_quote--; 19047 last; 19048 } 19049 } 19050 } 19051 19052 # loop for case of a non-alphanumeric quote delimiter.. 19053 } 19054 else { 19055 19056 while ( $i < $max_token_index ) { 19057 $tok = $$rtokens[ ++$i ]; 19058 19059 if ( $tok eq $end_tok ) { 19060 $quote_depth--; 19061 19062 if ( $quote_depth == 0 ) { 19063 $in_quote--; 19064 last; 19065 } 19066 } 19067 elsif ( $tok eq $beginning_tok ) { 19068 $quote_depth++; 19069 } 19070 elsif ( $tok eq '\\' ) { 19071 $i++; 19072 } 19073 } 19074 } 19075 if ( $i > $max_token_index ) { $i = $max_token_index } 19076 return ( $i, $in_quote, $beginning_tok, $quote_pos, $quote_depth ); 19077} 19078 19079sub matching_end_token { 19080 19081 # find closing character for a pattern 19082 my $beginning_token = shift; 19083 19084 if ( $beginning_token eq '{' ) { 19085 '}'; 19086 } 19087 elsif ( $beginning_token eq '[' ) { 19088 ']'; 19089 } 19090 elsif ( $beginning_token eq '<' ) { 19091 '>'; 19092 } 19093 elsif ( $beginning_token eq '(' ) { 19094 ')'; 19095 } 19096 else { 19097 $beginning_token; 19098 } 19099} 19100 19101BEGIN { 19102 19103 # These names are used in error messages 19104 @opening_brace_names = qw# '{' '[' '(' '?' #; 19105 @closing_brace_names = qw# '}' ']' ')' ':' #; 19106 19107 my @digraphs = qw( 19108 .. :: << >> ** && .. || -> => += -= .= %= &= |= ^= *= <> 19109 <= >= == =~ !~ != ++ -- /= x= 19110 ); 19111 @is_digraph{@digraphs} = (1) x scalar(@digraphs); 19112 19113 my @trigraphs = qw( ... **= <<= >>= &&= ||= <=> ); 19114 @is_trigraph{@trigraphs} = (1) x scalar(@trigraphs); 19115 19116 # make a hash of all valid token types for self-checking the tokenizer 19117 # (adding NEW_TOKENS : select a new character and add to this list) 19118 my @valid_token_types = qw# 19119 b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v 19120 { } ( ) [ ] ; + - / * | % ! x ~ = \ ? : . < > ^ & 19121 #; 19122 push ( @valid_token_types, @digraphs ); 19123 push ( @valid_token_types, @trigraphs ); 19124 push ( @valid_token_types, '#' ); 19125 push ( @valid_token_types, ',' ); 19126 @is_valid_token_type{@valid_token_types} = (1) x scalar(@valid_token_types); 19127 19128 # a list of file test letters, as in -e (Table 3-4 of 'camel 3') 19129 my @file_test_operators = 19130 qw( A B C M O R S T W X b c d e f g k l o p r s t u w x z); 19131 @is_file_test_operator{@file_test_operators} = 19132 (1) x scalar(@file_test_operators); 19133 19134 # these functions have prototypes of the form (&@), so when they are 19135 # followed by a block, that block MAY NOT be followed by an 19136 # operator. 19137 $block_list_operator = make_regex('^(sort|grep|map)$'); 19138 19139 # these functions have prototypes of the form (&), so when they are 19140 # followed by a block, that block MAY BE followed by an operator. 19141 $block_operator = make_regex('^(do|eval)$'); 19142 19143 # these functions allow an identifier in the indirect object slot 19144 $indirect_object_taker = make_regex('^(print|printf|sort|exec|system)$'); 19145 19146 # I'll build the list of keywords incrementally 19147 my @Keywords = (); 19148 19149 # keywords and tokens after which a value or pattern is expected, 19150 # but not an operator. In other words, these should consume terms 19151 # to their right, or at least they are not expected to be followed 19152 # immediately by operators. 19153 # --added srand 20-mar-01 19154 my @value_requestor = qw( 19155 AUTOLOAD 19156 BEGIN 19157 CHECK 19158 DESTROY 19159 END 19160 EQ 19161 GE 19162 GT 19163 INIT 19164 LE 19165 LT 19166 NE 19167 abs 19168 accept 19169 alarm 19170 and 19171 atan2 19172 bind 19173 binmode 19174 bless 19175 caller 19176 chdir 19177 chmod 19178 chomp 19179 chop 19180 chown 19181 chr 19182 chroot 19183 close 19184 closedir 19185 cmp 19186 connect 19187 continue 19188 cos 19189 crypt 19190 dbmclose 19191 dbmopen 19192 defined 19193 delete 19194 die 19195 dump 19196 each 19197 else 19198 elsif 19199 eof 19200 eq 19201 exec 19202 exists 19203 exit 19204 exp 19205 fcntl 19206 fileno 19207 flock 19208 for 19209 foreach 19210 formline 19211 ge 19212 getc 19213 getgrgid 19214 getgrnam 19215 gethostbyaddr 19216 gethostbyname 19217 getnetbyaddr 19218 getnetbyname 19219 getpeername 19220 getpgrp 19221 getpriority 19222 getprotobyname 19223 getprotobynumber 19224 getpwnam 19225 getpwuid 19226 getservbyname 19227 getservbyport 19228 getsockname 19229 getsockopt 19230 glob 19231 gmtime 19232 goto 19233 grep 19234 gt 19235 hex 19236 if 19237 index 19238 int 19239 ioctl 19240 join 19241 keys 19242 kill 19243 last 19244 lc 19245 lcfirst 19246 le 19247 length 19248 link 19249 listen 19250 local 19251 localtime 19252 lock 19253 log 19254 lstat 19255 lt 19256 map 19257 mkdir 19258 msgctl 19259 msgget 19260 msgrcv 19261 msgsnd 19262 my 19263 ne 19264 next 19265 no 19266 not 19267 oct 19268 open 19269 opendir 19270 or 19271 ord 19272 our 19273 pack 19274 pipe 19275 pop 19276 pos 19277 print 19278 printf 19279 prototype 19280 push 19281 quotemeta 19282 rand 19283 read 19284 readdir 19285 readlink 19286 readline 19287 readpipe 19288 recv 19289 redo 19290 ref 19291 rename 19292 require 19293 reset 19294 return 19295 reverse 19296 rewinddir 19297 rindex 19298 rmdir 19299 scalar 19300 seek 19301 seekdir 19302 select 19303 semctl 19304 semget 19305 semop 19306 send 19307 sethostent 19308 setnetent 19309 setpgrp 19310 setpriority 19311 setprotoent 19312 setservent 19313 setsockopt 19314 shift 19315 shmctl 19316 shmget 19317 shmread 19318 shmwrite 19319 shutdown 19320 sin 19321 sleep 19322 socket 19323 socketpair 19324 sort 19325 splice 19326 split 19327 sprintf 19328 sqrt 19329 srand 19330 stat 19331 study 19332 substr 19333 symlink 19334 syscall 19335 sysopen 19336 sysread 19337 sysseek 19338 system 19339 syswrite 19340 tell 19341 telldir 19342 tie 19343 tied 19344 truncate 19345 uc 19346 ucfirst 19347 umask 19348 undef 19349 unless 19350 unlink 19351 unpack 19352 unshift 19353 untie 19354 until 19355 use 19356 utime 19357 values 19358 vec 19359 waitpid 19360 warn 19361 while 19362 write 19363 xor 19364 ); 19365 19366 push ( @Keywords, @value_requestor ); 19367 19368 # These are treated the same but are not keywords: 19369 my @extra_vr = qw( 19370 constant 19371 switch 19372 vars 19373 ); 19374 push ( @value_requestor, @extra_vr ); 19375 19376 @expecting_term_token{@value_requestor} = (1) x scalar(@value_requestor); 19377 19378 # this list contains keywords which do not look for arguments, 19379 # so that they might be followed by an operator, or at least 19380 # not a term. 19381 my @operator_requestor = qw( 19382 endgrent 19383 endhostent 19384 endnetent 19385 endprotoent 19386 endpwent 19387 endservent 19388 fork 19389 getgrent 19390 gethostent 19391 getlogin 19392 getnetent 19393 getppid 19394 getprotoent 19395 getpwent 19396 getservent 19397 setgrent 19398 setpwent 19399 time 19400 times 19401 wait 19402 wantarray 19403 ); 19404 19405 push ( @Keywords, @operator_requestor ); 19406 19407 # These are treated the same but are not considered keywords: 19408 my @extra_or = qw( 19409 STDERR 19410 STDIN 19411 STDOUT 19412 ); 19413 19414 push ( @operator_requestor, @extra_or ); 19415 19416 @expecting_operator_token{@operator_requestor} = 19417 (1) x scalar(@operator_requestor); 19418 19419 # these token TYPES expect trailing operator but not a term 19420 # note: ++ and -- are post-increment and decrement, 'C' = constant 19421 my @operator_requestor_types = qw( ++ -- C ); 19422 @expecting_operator_types{@operator_requestor_types} = 19423 (1) x scalar(@operator_requestor_types); 19424 19425 # these token TYPES consume values (terms) 19426 # note: pp and mm are pre-increment and decrement 19427 # f=semicolon in for, F=file test operator 19428 my @value_requestor_type = qw# 19429 L { ( [ ~ !~ =~ ; . .. ... : && ! || = + - x 19430 **= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= 19431 <= >= == != => \ > < % * / ? & | ** <=> 19432 f F pp mm Y p m U J G 19433 #; 19434 push ( @value_requestor_type, ',' ) 19435 ; # (perl doesn't like a ',' in a qw block) 19436 @expecting_term_types{@value_requestor_type} = 19437 (1) x scalar(@value_requestor_type); 19438 19439 # For simple syntax checking, it is nice to have a list of operators which 19440 # will really be unhappy if not followed by a term. This includes most 19441 # of the above... 19442 %really_want_term = %expecting_term_types; 19443 19444 # with these exceptions... 19445 delete $really_want_term{'U'}; # user sub, depends on prototype 19446 delete $really_want_term{'F'}; # file test works on $_ if no following term 19447 delete $really_want_term{'Y'}; # indirect object, too risky to check syntax; 19448 # let perl do it 19449 19450 # These keywords are handled specially in the tokenizer code: 19451 my @special_keywords = qw( 19452 do 19453 eval 19454 format 19455 m 19456 package 19457 q 19458 qq 19459 qr 19460 qw 19461 qx 19462 s 19463 sub 19464 tr 19465 y 19466 ); 19467 push ( @Keywords, @special_keywords ); 19468 19469 # Keywords after which list formatting may be used 19470 # WARNING: do not include |map|grep|eval or perl may die on 19471 # syntax errors (map1.t). 19472 my @keyword_taking_list = qw( 19473 and 19474 chmod 19475 chomp 19476 chop 19477 chown 19478 dbmopen 19479 die 19480 elsif 19481 exec 19482 fcntl 19483 for 19484 foreach 19485 formline 19486 getsockopt 19487 if 19488 index 19489 ioctl 19490 join 19491 kill 19492 local 19493 msgctl 19494 msgrcv 19495 msgsnd 19496 my 19497 open 19498 or 19499 our 19500 pack 19501 print 19502 printf 19503 push 19504 read 19505 readpipe 19506 recv 19507 return 19508 reverse 19509 rindex 19510 seek 19511 select 19512 semctl 19513 semget 19514 send 19515 setpriority 19516 setsockopt 19517 shmctl 19518 shmget 19519 shmread 19520 shmwrite 19521 socket 19522 socketpair 19523 sort 19524 splice 19525 split 19526 sprintf 19527 substr 19528 syscall 19529 sysopen 19530 sysread 19531 sysseek 19532 system 19533 syswrite 19534 tie 19535 unless 19536 unlink 19537 unpack 19538 unshift 19539 until 19540 vec 19541 warn 19542 while 19543 ); 19544 @is_keyword_taking_list{@keyword_taking_list} = 19545 (1) x scalar(@keyword_taking_list); 19546 19547 # These are not used in any way yet 19548 # my @unused_keywords = qw( 19549 # CORE 19550 # __FILE__ 19551 # __LINE__ 19552 # __PACKAGE__ 19553 # ); 19554 19555=pod 19556 19557 The list of keywords was extracted from function 'keyword' in perl file 19558 toke.c version 5.005.03, using this utility, plus a little editing: 19559 (file getkwd.pl): 19560 while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } } 19561 Add 'get' prefix where necessary, then split into the above lists. 19562 19563 This list should be updated as necessary. 19564 The list should not contain these special variables: 19565 ARGV DATA ENV SIG STDERR STDIN STDOUT 19566 __DATA__ __END__ 19567 19568=cut 19569 19570 @is_keyword{@Keywords} = (1) x scalar(@Keywords); 19571} 195721; 19573 19574package main; 19575PerlTidy::perltidy(); 19576