1#line 1
2#
3############################################################
4#
5#    perltidy - a perl script indenter and formatter
6#
7#    Copyright (c) 2000-2009 by Steve Hancock
8#    Distributed under the GPL license agreement; see file COPYING
9#
10#    This program is free software; you can redistribute it and/or modify
11#    it under the terms of the GNU General Public License as published by
12#    the Free Software Foundation; either version 2 of the License, or
13#    (at your option) any later version.
14#
15#    This program is distributed in the hope that it will be useful,
16#    but WITHOUT ANY WARRANTY; without even the implied warranty of
17#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18#    GNU General Public License for more details.
19#
20#    You should have received a copy of the GNU General Public License
21#    along with this program; if not, write to the Free Software
22#    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
23#
24#    For brief instructions instructions, try 'perltidy -h'.
25#    For more complete documentation, try 'man perltidy'
26#    or visit http://perltidy.sourceforge.net
27#
28#    This script is an example of the default style.  It was formatted with:
29#
30#      perltidy Tidy.pm
31#
32#    Code Contributions: See ChangeLog.html for a complete history.
33#      Michael Cartmell supplied code for adaptation to VMS and helped with
34#        v-strings.
35#      Hugh S. Myers supplied sub streamhandle and the supporting code to
36#        create a Perl::Tidy module which can operate on strings, arrays, etc.
37#      Yves Orton supplied coding to help detect Windows versions.
38#      Axel Rose supplied a patch for MacPerl.
39#      Sebastien Aperghis-Tramoni supplied a patch for the defined or operator.
40#      Dan Tyrell contributed a patch for binary I/O.
41#      Ueli Hugenschmidt contributed a patch for -fpsc
42#      Sam Kington supplied a patch to identify the initial indentation of
43#      entabbed code.
44#      jonathan swartz supplied patches for:
45#      * .../ pattern, which looks upwards from directory
46#      * --notidy, to be used in directories where we want to avoid
47#        accidentally tidying
48#      * prefilter and postfilter
49#      * iterations option
50#
51#      Many others have supplied key ideas, suggestions, and bug reports;
52#        see the CHANGES file.
53#
54############################################################
55
56package Perl::Tidy;
57use 5.004;    # need IO::File from 5.004 or later
58BEGIN { $^W = 1; }    # turn on warnings
59
60use strict;
61use Exporter;
62use Carp;
63$|++;
64
65use vars qw{
66  $VERSION
67  @ISA
68  @EXPORT
69  $missing_file_spec
70};
71
72@ISA    = qw( Exporter );
73@EXPORT = qw( &perltidy );
74
75use Cwd;
76use IO::File;
77use File::Basename;
78
79BEGIN {
80    ( $VERSION = q($Id: Tidy.pm,v 1.74 2010/12/17 13:56:49 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker
81}
82
83sub streamhandle {
84
85    # given filename and mode (r or w), create an object which:
86    #   has a 'getline' method if mode='r', and
87    #   has a 'print' method if mode='w'.
88    # The objects also need a 'close' method.
89    #
90    # How the object is made:
91    #
92    # if $filename is:     Make object using:
93    # ----------------     -----------------
94    # '-'                  (STDIN if mode = 'r', STDOUT if mode='w')
95    # string               IO::File
96    # ARRAY  ref           Perl::Tidy::IOScalarArray (formerly IO::ScalarArray)
97    # STRING ref           Perl::Tidy::IOScalar      (formerly IO::Scalar)
98    # object               object
99    #                      (check for 'print' method for 'w' mode)
100    #                      (check for 'getline' method for 'r' mode)
101    my $ref = ref( my $filename = shift );
102    my $mode = shift;
103    my $New;
104    my $fh;
105
106    # handle a reference
107    if ($ref) {
108        if ( $ref eq 'ARRAY' ) {
109            $New = sub { Perl::Tidy::IOScalarArray->new(@_) };
110        }
111        elsif ( $ref eq 'SCALAR' ) {
112            $New = sub { Perl::Tidy::IOScalar->new(@_) };
113        }
114        else {
115
116            # Accept an object with a getline method for reading. Note:
117            # IO::File is built-in and does not respond to the defined
118            # operator.  If this causes trouble, the check can be
119            # skipped and we can just let it crash if there is no
120            # getline.
121            if ( $mode =~ /[rR]/ ) {
122                if ( $ref eq 'IO::File' || defined &{ $ref . "::getline" } ) {
123                    $New = sub { $filename };
124                }
125                else {
126                    $New = sub { undef };
127                    confess <<EOM;
128------------------------------------------------------------------------
129No 'getline' method is defined for object of class $ref
130Please check your call to Perl::Tidy::perltidy.  Trace follows.
131------------------------------------------------------------------------
132EOM
133                }
134            }
135
136            # Accept an object with a print method for writing.
137            # See note above about IO::File
138            if ( $mode =~ /[wW]/ ) {
139                if ( $ref eq 'IO::File' || defined &{ $ref . "::print" } ) {
140                    $New = sub { $filename };
141                }
142                else {
143                    $New = sub { undef };
144                    confess <<EOM;
145------------------------------------------------------------------------
146No 'print' method is defined for object of class $ref
147Please check your call to Perl::Tidy::perltidy. Trace follows.
148------------------------------------------------------------------------
149EOM
150                }
151            }
152        }
153    }
154
155    # handle a string
156    else {
157        if ( $filename eq '-' ) {
158            $New = sub { $mode eq 'w' ? *STDOUT : *STDIN }
159        }
160        else {
161            $New = sub { IO::File->new(@_) };
162        }
163    }
164    $fh = $New->( $filename, $mode )
165      or warn "Couldn't open file:$filename in mode:$mode : $!\n";
166    return $fh, ( $ref or $filename );
167}
168
169sub find_input_line_ending {
170
171    # Peek at a file and return first line ending character.
172    # Quietly return undef in case of any trouble.
173    my ($input_file) = @_;
174    my $ending;
175
176    # silently ignore input from object or stdin
177    if ( ref($input_file) || $input_file eq '-' ) {
178        return $ending;
179    }
180    open( INFILE, $input_file ) || return $ending;
181
182    binmode INFILE;
183    my $buf;
184    read( INFILE, $buf, 1024 );
185    close INFILE;
186    if ( $buf && $buf =~ /([\012\015]+)/ ) {
187        my $test = $1;
188
189        # dos
190        if ( $test =~ /^(\015\012)+$/ ) { $ending = "\015\012" }
191
192        # mac
193        elsif ( $test =~ /^\015+$/ ) { $ending = "\015" }
194
195        # unix
196        elsif ( $test =~ /^\012+$/ ) { $ending = "\012" }
197
198        # unknown
199        else { }
200    }
201
202    # no ending seen
203    else { }
204
205    return $ending;
206}
207
208sub catfile {
209
210    # concatenate a path and file basename
211    # returns undef in case of error
212
213    BEGIN { eval "require File::Spec"; $missing_file_spec = $@; }
214
215    # use File::Spec if we can
216    unless ($missing_file_spec) {
217        return File::Spec->catfile(@_);
218    }
219
220    # Perl 5.004 systems may not have File::Spec so we'll make
221    # a simple try.  We assume File::Basename is available.
222    # return undef if not successful.
223    my $name      = pop @_;
224    my $path      = join '/', @_;
225    my $test_file = $path . $name;
226    my ( $test_name, $test_path ) = fileparse($test_file);
227    return $test_file if ( $test_name eq $name );
228    return undef if ( $^O eq 'VMS' );
229
230    # this should work at least for Windows and Unix:
231    $test_file = $path . '/' . $name;
232    ( $test_name, $test_path ) = fileparse($test_file);
233    return $test_file if ( $test_name eq $name );
234    return undef;
235}
236
237sub make_temporary_filename {
238
239    # Make a temporary filename.
240    #
241    # The POSIX tmpnam() function tends to be unreliable for non-unix
242    # systems (at least for the win32 systems that I've tested), so use
243    # a pre-defined name.  A slight disadvantage of this is that two
244    # perltidy runs in the same working directory may conflict.
245    # However, the chance of that is small and managable by the user.
246    # An alternative would be to check for the file's existance and use,
247    # say .TMP0, .TMP1, etc, but that scheme has its own problems.  So,
248    # keep it simple.
249    my $name = "perltidy.TMP";
250    if ( $^O =~ /win32|dos/i || $^O eq 'VMS' || $^O eq 'MacOs' ) {
251        return $name;
252    }
253    eval "use POSIX qw(tmpnam)";
254    if ($@) { return $name }
255    use IO::File;
256
257    # just make a couple of tries before giving up and using the default
258    for ( 0 .. 1 ) {
259        my $tmpname = tmpnam();
260        my $fh = IO::File->new( $tmpname, O_RDWR | O_CREAT | O_EXCL );
261        if ($fh) {
262            $fh->close();
263            return ($tmpname);
264            last;
265        }
266    }
267    return ($name);
268}
269
270# Here is a map of the flow of data from the input source to the output
271# line sink:
272#
273# LineSource-->Tokenizer-->Formatter-->VerticalAligner-->FileWriter-->
274#       input                         groups                 output
275#       lines   tokens      lines       of          lines    lines
276#                                      lines
277#
278# The names correspond to the package names responsible for the unit processes.
279#
280# The overall process is controlled by the "main" package.
281#
282# LineSource is the stream of input lines
283#
284# Tokenizer analyzes a line and breaks it into tokens, peeking ahead
285# if necessary.  A token is any section of the input line which should be
286# manipulated as a single entity during formatting.  For example, a single
287# ',' character is a token, and so is an entire side comment.  It handles
288# the complexities of Perl syntax, such as distinguishing between '<<' as
289# a shift operator and as a here-document, or distinguishing between '/'
290# as a divide symbol and as a pattern delimiter.
291#
292# Formatter inserts and deletes whitespace between tokens, and breaks
293# sequences of tokens at appropriate points as output lines.  It bases its
294# decisions on the default rules as modified by any command-line options.
295#
296# VerticalAligner collects groups of lines together and tries to line up
297# certain tokens, such as '=>', '#', and '=' by adding whitespace.
298#
299# FileWriter simply writes lines to the output stream.
300#
301# The Logger package, not shown, records significant events and warning
302# messages.  It writes a .LOG file, which may be saved with a
303# '-log' or a '-g' flag.
304
305{
306
307    # variables needed by interrupt handler:
308    my $tokenizer;
309    my $input_file;
310
311    # this routine may be called to give a status report if interrupted.  If a
312    # parameter is given, it will call exit with that parameter.  This is no
313    # longer used because it works under Unix but not under Windows.
314    sub interrupt_handler {
315
316        my $exit_flag = shift;
317        print STDERR "perltidy interrupted";
318        if ($tokenizer) {
319            my $input_line_number =
320              Perl::Tidy::Tokenizer::get_input_line_number();
321            print STDERR " at line $input_line_number";
322        }
323        if ($input_file) {
324
325            if   ( ref $input_file ) { print STDERR " of reference to:" }
326            else                     { print STDERR " of file:" }
327            print STDERR " $input_file";
328        }
329        print STDERR "\n";
330        exit $exit_flag if defined($exit_flag);
331    }
332
333    sub perltidy {
334
335        my %defaults = (
336            argv                  => undef,
337            destination           => undef,
338            formatter             => undef,
339            logfile               => undef,
340            errorfile             => undef,
341            perltidyrc            => undef,
342            source                => undef,
343            stderr                => undef,
344            dump_options          => undef,
345            dump_options_type     => undef,
346            dump_getopt_flags     => undef,
347            dump_options_category => undef,
348            dump_options_range    => undef,
349            dump_abbreviations    => undef,
350            prefilter             => undef,
351            postfilter            => undef,
352        );
353
354        # don't overwrite callers ARGV
355        local @ARGV = @ARGV;
356
357        my %input_hash = @_;
358
359        if ( my @bad_keys = grep { !exists $defaults{$_} } keys %input_hash ) {
360            local $" = ')(';
361            my @good_keys = sort keys %defaults;
362            @bad_keys = sort @bad_keys;
363            confess <<EOM;
364------------------------------------------------------------------------
365Unknown perltidy parameter : (@bad_keys)
366perltidy only understands : (@good_keys)
367------------------------------------------------------------------------
368
369EOM
370        }
371
372        my $get_hash_ref = sub {
373            my ($key) = @_;
374            my $hash_ref = $input_hash{$key};
375            if ( defined($hash_ref) ) {
376                unless ( ref($hash_ref) eq 'HASH' ) {
377                    my $what = ref($hash_ref);
378                    my $but_is =
379                      $what ? "but is ref to $what" : "but is not a reference";
380                    croak <<EOM;
381------------------------------------------------------------------------
382error in call to perltidy:
383-$key must be reference to HASH $but_is
384------------------------------------------------------------------------
385EOM
386                }
387            }
388            return $hash_ref;
389        };
390
391        %input_hash = ( %defaults, %input_hash );
392        my $argv               = $input_hash{'argv'};
393        my $destination_stream = $input_hash{'destination'};
394        my $errorfile_stream   = $input_hash{'errorfile'};
395        my $logfile_stream     = $input_hash{'logfile'};
396        my $perltidyrc_stream  = $input_hash{'perltidyrc'};
397        my $source_stream      = $input_hash{'source'};
398        my $stderr_stream      = $input_hash{'stderr'};
399        my $user_formatter     = $input_hash{'formatter'};
400        my $prefilter          = $input_hash{'prefilter'};
401        my $postfilter         = $input_hash{'postfilter'};
402
403        # various dump parameters
404        my $dump_options_type     = $input_hash{'dump_options_type'};
405        my $dump_options          = $get_hash_ref->('dump_options');
406        my $dump_getopt_flags     = $get_hash_ref->('dump_getopt_flags');
407        my $dump_options_category = $get_hash_ref->('dump_options_category');
408        my $dump_abbreviations    = $get_hash_ref->('dump_abbreviations');
409        my $dump_options_range    = $get_hash_ref->('dump_options_range');
410
411        # validate dump_options_type
412        if ( defined($dump_options) ) {
413            unless ( defined($dump_options_type) ) {
414                $dump_options_type = 'perltidyrc';
415            }
416            unless ( $dump_options_type =~ /^(perltidyrc|full)$/ ) {
417                croak <<EOM;
418------------------------------------------------------------------------
419Please check value of -dump_options_type in call to perltidy;
420saw: '$dump_options_type'
421expecting: 'perltidyrc' or 'full'
422------------------------------------------------------------------------
423EOM
424
425            }
426        }
427        else {
428            $dump_options_type = "";
429        }
430
431        if ($user_formatter) {
432
433            # if the user defines a formatter, there is no output stream,
434            # but we need a null stream to keep coding simple
435            $destination_stream = Perl::Tidy::DevNull->new();
436        }
437
438        # see if ARGV is overridden
439        if ( defined($argv) ) {
440
441            my $rargv = ref $argv;
442            if ( $rargv eq 'SCALAR' ) { $argv = $$argv; $rargv = undef }
443
444            # ref to ARRAY
445            if ($rargv) {
446                if ( $rargv eq 'ARRAY' ) {
447                    @ARGV = @$argv;
448                }
449                else {
450                    croak <<EOM;
451------------------------------------------------------------------------
452Please check value of -argv in call to perltidy;
453it must be a string or ref to ARRAY but is: $rargv
454------------------------------------------------------------------------
455EOM
456                }
457            }
458
459            # string
460            else {
461                my ( $rargv, $msg ) = parse_args($argv);
462                if ($msg) {
463                    die <<EOM;
464Error parsing this string passed to to perltidy with 'argv':
465$msg
466EOM
467                }
468                @ARGV = @{$rargv};
469            }
470        }
471
472        # redirect STDERR if requested
473        if ($stderr_stream) {
474            my ( $fh_stderr, $stderr_file ) =
475              Perl::Tidy::streamhandle( $stderr_stream, 'w' );
476            if ($fh_stderr) { *STDERR = $fh_stderr }
477            else {
478                croak <<EOM;
479------------------------------------------------------------------------
480Unable to redirect STDERR to $stderr_stream
481Please check value of -stderr in call to perltidy
482------------------------------------------------------------------------
483EOM
484            }
485        }
486
487        my $rpending_complaint;
488        $$rpending_complaint = "";
489        my $rpending_logfile_message;
490        $$rpending_logfile_message = "";
491
492        my ( $is_Windows, $Windows_type ) =
493          look_for_Windows($rpending_complaint);
494
495        # VMS file names are restricted to a 40.40 format, so we append _tdy
496        # instead of .tdy, etc. (but see also sub check_vms_filename)
497        my $dot;
498        my $dot_pattern;
499        if ( $^O eq 'VMS' ) {
500            $dot         = '_';
501            $dot_pattern = '_';
502        }
503        else {
504            $dot         = '.';
505            $dot_pattern = '\.';    # must escape for use in regex
506        }
507
508        # handle command line options
509        my ( $rOpts, $config_file, $rraw_options, $saw_extrude, $roption_string,
510            $rexpansion, $roption_category, $roption_range )
511          = process_command_line(
512            $perltidyrc_stream,  $is_Windows, $Windows_type,
513            $rpending_complaint, $dump_options_type,
514          );
515
516        # return or exit immediately after all dumps
517        my $quit_now = 0;
518
519        # Getopt parameters and their flags
520        if ( defined($dump_getopt_flags) ) {
521            $quit_now = 1;
522            foreach my $op ( @{$roption_string} ) {
523                my $opt  = $op;
524                my $flag = "";
525
526                # Examples:
527                #  some-option=s
528                #  some-option=i
529                #  some-option:i
530                #  some-option!
531                if ( $opt =~ /(.*)(!|=.*|:.*)$/ ) {
532                    $opt  = $1;
533                    $flag = $2;
534                }
535                $dump_getopt_flags->{$opt} = $flag;
536            }
537        }
538
539        if ( defined($dump_options_category) ) {
540            $quit_now = 1;
541            %{$dump_options_category} = %{$roption_category};
542        }
543
544        if ( defined($dump_options_range) ) {
545            $quit_now = 1;
546            %{$dump_options_range} = %{$roption_range};
547        }
548
549        if ( defined($dump_abbreviations) ) {
550            $quit_now = 1;
551            %{$dump_abbreviations} = %{$rexpansion};
552        }
553
554        if ( defined($dump_options) ) {
555            $quit_now = 1;
556            %{$dump_options} = %{$rOpts};
557        }
558
559        return if ($quit_now);
560
561        # make printable string of options for this run as possible diagnostic
562        my $readable_options = readable_options( $rOpts, $roption_string );
563
564        # dump from command line
565        if ( $rOpts->{'dump-options'} ) {
566            print STDOUT $readable_options;
567            exit 1;
568        }
569
570        check_options( $rOpts, $is_Windows, $Windows_type,
571            $rpending_complaint );
572
573        if ($user_formatter) {
574            $rOpts->{'format'} = 'user';
575        }
576
577        # there must be one entry here for every possible format
578        my %default_file_extension = (
579            tidy => 'tdy',
580            html => 'html',
581            user => '',
582        );
583
584        # be sure we have a valid output format
585        unless ( exists $default_file_extension{ $rOpts->{'format'} } ) {
586            my $formats = join ' ',
587              sort map { "'" . $_ . "'" } keys %default_file_extension;
588            my $fmt = $rOpts->{'format'};
589            die "-format='$fmt' but must be one of: $formats\n";
590        }
591
592        my $output_extension =
593          make_extension( $rOpts->{'output-file-extension'},
594            $default_file_extension{ $rOpts->{'format'} }, $dot );
595
596        my $backup_extension =
597          make_extension( $rOpts->{'backup-file-extension'}, 'bak', $dot );
598
599        my $html_toc_extension =
600          make_extension( $rOpts->{'html-toc-extension'}, 'toc', $dot );
601
602        my $html_src_extension =
603          make_extension( $rOpts->{'html-src-extension'}, 'src', $dot );
604
605        # check for -b option;
606        my $in_place_modify = $rOpts->{'backup-and-modify-in-place'}
607          && $rOpts->{'format'} eq 'tidy' # silently ignore unless beautify mode
608          && @ARGV > 0;    # silently ignore if standard input;
609                           # this allows -b to be in a .perltidyrc file
610                           # without error messages when running from an editor
611
612        # turn off -b with warnings in case of conflicts with other options
613        if ($in_place_modify) {
614            if ( $rOpts->{'standard-output'} ) {
615                warn "Ignoring -b; you may not use -b and -st together\n";
616                $in_place_modify = 0;
617            }
618            if ($destination_stream) {
619                warn
620"Ignoring -b; you may not specify a destination array and -b together\n";
621                $in_place_modify = 0;
622            }
623            if ($source_stream) {
624                warn
625"Ignoring -b; you may not specify a source array and -b together\n";
626                $in_place_modify = 0;
627            }
628            if ( $rOpts->{'outfile'} ) {
629                warn "Ignoring -b; you may not use -b and -o together\n";
630                $in_place_modify = 0;
631            }
632            if ( defined( $rOpts->{'output-path'} ) ) {
633                warn "Ignoring -b; you may not use -b and -opath together\n";
634                $in_place_modify = 0;
635            }
636        }
637
638        Perl::Tidy::Formatter::check_options($rOpts);
639        if ( $rOpts->{'format'} eq 'html' ) {
640            Perl::Tidy::HtmlWriter->check_options($rOpts);
641        }
642
643        # make the pattern of file extensions that we shouldn't touch
644        my $forbidden_file_extensions = "(($dot_pattern)(LOG|DEBUG|ERR|TEE)";
645        if ($output_extension) {
646            my $ext = quotemeta($output_extension);
647            $forbidden_file_extensions .= "|$ext";
648        }
649        if ( $in_place_modify && $backup_extension ) {
650            my $ext = quotemeta($backup_extension);
651            $forbidden_file_extensions .= "|$ext";
652        }
653        $forbidden_file_extensions .= ')$';
654
655        # Create a diagnostics object if requested;
656        # This is only useful for code development
657        my $diagnostics_object = undef;
658        if ( $rOpts->{'DIAGNOSTICS'} ) {
659            $diagnostics_object = Perl::Tidy::Diagnostics->new();
660        }
661
662        # no filenames should be given if input is from an array
663        if ($source_stream) {
664            if ( @ARGV > 0 ) {
665                die
666"You may not specify any filenames when a source array is given\n";
667            }
668
669            # we'll stuff the source array into ARGV
670            unshift( @ARGV, $source_stream );
671
672            # No special treatment for source stream which is a filename.
673            # This will enable checks for binary files and other bad stuff.
674            $source_stream = undef unless ref($source_stream);
675        }
676
677        # use stdin by default if no source array and no args
678        else {
679            unshift( @ARGV, '-' ) unless @ARGV;
680        }
681
682        # loop to process all files in argument list
683        my $number_of_files = @ARGV;
684        my $formatter       = undef;
685        $tokenizer = undef;
686        while ( $input_file = shift @ARGV ) {
687            my $fileroot;
688            my $input_file_permissions;
689
690            #---------------------------------------------------------------
691            # determine the input file name
692            #---------------------------------------------------------------
693            if ($source_stream) {
694                $fileroot = "perltidy";
695            }
696            elsif ( $input_file eq '-' ) {    # '-' indicates input from STDIN
697                $fileroot = "perltidy";   # root name to use for .ERR, .LOG, etc
698                $in_place_modify = 0;
699            }
700            else {
701                $fileroot = $input_file;
702                unless ( -e $input_file ) {
703
704                    # file doesn't exist - check for a file glob
705                    if ( $input_file =~ /([\?\*\[\{])/ ) {
706
707                        # Windows shell may not remove quotes, so do it
708                        my $input_file = $input_file;
709                        if ( $input_file =~ /^\'(.+)\'$/ ) { $input_file = $1 }
710                        if ( $input_file =~ /^\"(.+)\"$/ ) { $input_file = $1 }
711                        my $pattern = fileglob_to_re($input_file);
712                        ##eval "/$pattern/";
713                        if ( !$@ && opendir( DIR, './' ) ) {
714                            my @files =
715                              grep { /$pattern/ && !-d $_ } readdir(DIR);
716                            closedir(DIR);
717                            if (@files) {
718                                unshift @ARGV, @files;
719                                next;
720                            }
721                        }
722                    }
723                    print "skipping file: '$input_file': no matches found\n";
724                    next;
725                }
726
727                unless ( -f $input_file ) {
728                    print "skipping file: $input_file: not a regular file\n";
729                    next;
730                }
731
732                unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) {
733                    print
734"skipping file: $input_file: Non-text (override with -f)\n";
735                    next;
736                }
737
738                # we should have a valid filename now
739                $fileroot               = $input_file;
740                $input_file_permissions = ( stat $input_file )[2] & 07777;
741
742                if ( $^O eq 'VMS' ) {
743                    ( $fileroot, $dot ) = check_vms_filename($fileroot);
744                }
745
746                # add option to change path here
747                if ( defined( $rOpts->{'output-path'} ) ) {
748
749                    my ( $base, $old_path ) = fileparse($fileroot);
750                    my $new_path = $rOpts->{'output-path'};
751                    unless ( -d $new_path ) {
752                        unless ( mkdir $new_path, 0777 ) {
753                            die "unable to create directory $new_path: $!\n";
754                        }
755                    }
756                    my $path = $new_path;
757                    $fileroot = catfile( $path, $base );
758                    unless ($fileroot) {
759                        die <<EOM;
760------------------------------------------------------------------------
761Problem combining $new_path and $base to make a filename; check -opath
762------------------------------------------------------------------------
763EOM
764                    }
765                }
766            }
767
768            # Skip files with same extension as the output files because
769            # this can lead to a messy situation with files like
770            # script.tdy.tdy.tdy ... or worse problems ...  when you
771            # rerun perltidy over and over with wildcard input.
772            if (
773                !$source_stream
774                && (   $input_file =~ /$forbidden_file_extensions/o
775                    || $input_file eq 'DIAGNOSTICS' )
776              )
777            {
778                print "skipping file: $input_file: wrong extension\n";
779                next;
780            }
781
782            # the 'source_object' supplies a method to read the input file
783            my $source_object =
784              Perl::Tidy::LineSource->new( $input_file, $rOpts,
785                $rpending_logfile_message );
786            next unless ($source_object);
787
788            # Prefilters and postfilters: The prefilter is a code reference
789            # that will be applied to the source before tidying, and the
790            # postfilter is a code reference to the result before outputting.
791            if ($prefilter) {
792                my $buf = '';
793                while ( my $line = $source_object->get_line() ) {
794                    $buf .= $line;
795                }
796                $buf = $prefilter->($buf);
797
798                $source_object = Perl::Tidy::LineSource->new( \$buf, $rOpts,
799                    $rpending_logfile_message );
800            }
801
802            # register this file name with the Diagnostics package
803            $diagnostics_object->set_input_file($input_file)
804              if $diagnostics_object;
805
806            #---------------------------------------------------------------
807            # determine the output file name
808            #---------------------------------------------------------------
809            my $output_file = undef;
810            my $actual_output_extension;
811
812            if ( $rOpts->{'outfile'} ) {
813
814                if ( $number_of_files <= 1 ) {
815
816                    if ( $rOpts->{'standard-output'} ) {
817                        die "You may not use -o and -st together\n";
818                    }
819                    elsif ($destination_stream) {
820                        die
821"You may not specify a destination array and -o together\n";
822                    }
823                    elsif ( defined( $rOpts->{'output-path'} ) ) {
824                        die "You may not specify -o and -opath together\n";
825                    }
826                    elsif ( defined( $rOpts->{'output-file-extension'} ) ) {
827                        die "You may not specify -o and -oext together\n";
828                    }
829                    $output_file = $rOpts->{outfile};
830
831                    # make sure user gives a file name after -o
832                    if ( $output_file =~ /^-/ ) {
833                        die "You must specify a valid filename after -o\n";
834                    }
835
836                    # do not overwrite input file with -o
837                    if ( defined($input_file_permissions)
838                        && ( $output_file eq $input_file ) )
839                    {
840                        die
841                          "Use 'perltidy -b $input_file' to modify in-place\n";
842                    }
843                }
844                else {
845                    die "You may not use -o with more than one input file\n";
846                }
847            }
848            elsif ( $rOpts->{'standard-output'} ) {
849                if ($destination_stream) {
850                    die
851"You may not specify a destination array and -st together\n";
852                }
853                $output_file = '-';
854
855                if ( $number_of_files <= 1 ) {
856                }
857                else {
858                    die "You may not use -st with more than one input file\n";
859                }
860            }
861            elsif ($destination_stream) {
862                $output_file = $destination_stream;
863            }
864            elsif ($source_stream) {  # source but no destination goes to stdout
865                $output_file = '-';
866            }
867            elsif ( $input_file eq '-' ) {
868                $output_file = '-';
869            }
870            else {
871                if ($in_place_modify) {
872                    $output_file = IO::File->new_tmpfile()
873                      or die "cannot open temp file for -b option: $!\n";
874                }
875                else {
876                    $actual_output_extension = $output_extension;
877                    $output_file             = $fileroot . $output_extension;
878                }
879            }
880
881            # the 'sink_object' knows how to write the output file
882            my $tee_file = $fileroot . $dot . "TEE";
883
884            my $line_separator = $rOpts->{'output-line-ending'};
885            if ( $rOpts->{'preserve-line-endings'} ) {
886                $line_separator = find_input_line_ending($input_file);
887            }
888
889            # Eventually all I/O may be done with binmode, but for now it is
890            # only done when a user requests a particular line separator
891            # through the -ple or -ole flags
892            my $binmode = 0;
893            if   ( defined($line_separator) ) { $binmode        = 1 }
894            else                              { $line_separator = "\n" }
895
896            my ( $sink_object, $postfilter_buffer );
897            if ($postfilter) {
898                $sink_object =
899                  Perl::Tidy::LineSink->new( \$postfilter_buffer, $tee_file,
900                    $line_separator, $rOpts, $rpending_logfile_message,
901                    $binmode );
902            }
903            else {
904                $sink_object =
905                  Perl::Tidy::LineSink->new( $output_file, $tee_file,
906                    $line_separator, $rOpts, $rpending_logfile_message,
907                    $binmode );
908            }
909
910            #---------------------------------------------------------------
911            # initialize the error logger
912            #---------------------------------------------------------------
913            my $warning_file = $fileroot . $dot . "ERR";
914            if ($errorfile_stream) { $warning_file = $errorfile_stream }
915            my $log_file = $fileroot . $dot . "LOG";
916            if ($logfile_stream) { $log_file = $logfile_stream }
917
918            my $logger_object =
919              Perl::Tidy::Logger->new( $rOpts, $log_file, $warning_file,
920                $saw_extrude );
921            write_logfile_header(
922                $rOpts,        $logger_object, $config_file,
923                $rraw_options, $Windows_type,  $readable_options,
924            );
925            if ($$rpending_logfile_message) {
926                $logger_object->write_logfile_entry($$rpending_logfile_message);
927            }
928            if ($$rpending_complaint) {
929                $logger_object->complain($$rpending_complaint);
930            }
931
932            #---------------------------------------------------------------
933            # initialize the debug object, if any
934            #---------------------------------------------------------------
935            my $debugger_object = undef;
936            if ( $rOpts->{DEBUG} ) {
937                $debugger_object =
938                  Perl::Tidy::Debugger->new( $fileroot . $dot . "DEBUG" );
939            }
940
941            # loop over iterations
942            my $max_iterations    = $rOpts->{'iterations'};
943            my $sink_object_final = $sink_object;
944            for ( my $iter = 1 ; $iter <= $max_iterations ; $iter++ ) {
945                my $temp_buffer;
946
947                # local copies of some debugging objects which get deleted
948                # after first iteration, but will reappear after this loop
949                my $debugger_object    = $debugger_object;
950                my $logger_object      = $logger_object;
951                my $diagnostics_object = $diagnostics_object;
952
953                # output to temp buffer until last iteration
954                if ( $iter < $max_iterations ) {
955                    $sink_object =
956                      Perl::Tidy::LineSink->new( \$temp_buffer, $tee_file,
957                        $line_separator, $rOpts, $rpending_logfile_message,
958                        $binmode );
959                }
960                else {
961                    $sink_object = $sink_object_final;
962
963                    # terminate some debugging output after first pass
964                    # to avoid needless output.
965                    $debugger_object    = undef;
966                    $logger_object      = undef;
967                    $diagnostics_object = undef;
968                }
969
970              #---------------------------------------------------------------
971              # create a formatter for this file : html writer or pretty printer
972              #---------------------------------------------------------------
973
974                # we have to delete any old formatter because, for safety,
975                # the formatter will check to see that there is only one.
976                $formatter = undef;
977
978                if ($user_formatter) {
979                    $formatter = $user_formatter;
980                }
981                elsif ( $rOpts->{'format'} eq 'html' ) {
982                    $formatter =
983                      Perl::Tidy::HtmlWriter->new( $fileroot, $output_file,
984                        $actual_output_extension, $html_toc_extension,
985                        $html_src_extension );
986                }
987                elsif ( $rOpts->{'format'} eq 'tidy' ) {
988                    $formatter = Perl::Tidy::Formatter->new(
989                        logger_object      => $logger_object,
990                        diagnostics_object => $diagnostics_object,
991                        sink_object        => $sink_object,
992                    );
993                }
994                else {
995                    die "I don't know how to do -format=$rOpts->{'format'}\n";
996                }
997
998                unless ($formatter) {
999                    die
1000                      "Unable to continue with $rOpts->{'format'} formatting\n";
1001                }
1002
1003                #---------------------------------------------------------------
1004                # create the tokenizer for this file
1005                #---------------------------------------------------------------
1006                $tokenizer = undef;    # must destroy old tokenizer
1007                $tokenizer = Perl::Tidy::Tokenizer->new(
1008                    source_object      => $source_object,
1009                    logger_object      => $logger_object,
1010                    debugger_object    => $debugger_object,
1011                    diagnostics_object => $diagnostics_object,
1012                    starting_level => $rOpts->{'starting-indentation-level'},
1013                    tabs           => $rOpts->{'tabs'},
1014                    entab_leading_space => $rOpts->{'entab-leading-whitespace'},
1015                    indent_columns      => $rOpts->{'indent-columns'},
1016                    look_for_hash_bang  => $rOpts->{'look-for-hash-bang'},
1017                    look_for_autoloader => $rOpts->{'look-for-autoloader'},
1018                    look_for_selfloader => $rOpts->{'look-for-selfloader'},
1019                    trim_qw             => $rOpts->{'trim-qw'},
1020                );
1021
1022                #---------------------------------------------------------------
1023                # now we can do it
1024                #---------------------------------------------------------------
1025                process_this_file( $tokenizer, $formatter );
1026
1027                #---------------------------------------------------------------
1028                # close the input source and report errors
1029                #---------------------------------------------------------------
1030                $source_object->close_input_file();
1031
1032                # line source for next iteration (if any) comes from the current
1033                # temporary buffer
1034                if ( $iter < $max_iterations ) {
1035                    $source_object =
1036                      Perl::Tidy::LineSource->new( \$temp_buffer, $rOpts,
1037                        $rpending_logfile_message );
1038                }
1039
1040            }    # end loop over iterations
1041
1042            # get file names to use for syntax check
1043            my $ifname = $source_object->get_input_file_copy_name();
1044            my $ofname = $sink_object->get_output_file_copy();
1045
1046            #---------------------------------------------------------------
1047            # handle the -b option (backup and modify in-place)
1048            #---------------------------------------------------------------
1049            if ($in_place_modify) {
1050                unless ( -f $input_file ) {
1051
1052                    # oh, oh, no real file to backup ..
1053                    # shouldn't happen because of numerous preliminary checks
1054                    die print
1055"problem with -b backing up input file '$input_file': not a file\n";
1056                }
1057                my $backup_name = $input_file . $backup_extension;
1058                if ( -f $backup_name ) {
1059                    unlink($backup_name)
1060                      or die
1061"unable to remove previous '$backup_name' for -b option; check permissions: $!\n";
1062                }
1063                rename( $input_file, $backup_name )
1064                  or die
1065"problem renaming $input_file to $backup_name for -b option: $!\n";
1066                $ifname = $backup_name;
1067
1068                seek( $output_file, 0, 0 )
1069                  or die "unable to rewind tmp file for -b option: $!\n";
1070
1071                my $fout = IO::File->new("> $input_file")
1072                  or die
1073"problem opening $input_file for write for -b option; check directory permissions: $!\n";
1074                binmode $fout;
1075                my $line;
1076                while ( $line = $output_file->getline() ) {
1077                    $fout->print($line);
1078                }
1079                $fout->close();
1080                $output_file = $input_file;
1081                $ofname      = $input_file;
1082            }
1083
1084            #---------------------------------------------------------------
1085            # clean up and report errors
1086            #---------------------------------------------------------------
1087            $sink_object->close_output_file()    if $sink_object;
1088            $debugger_object->close_debug_file() if $debugger_object;
1089
1090            if ($postfilter) {
1091                my $new_sink =
1092                  Perl::Tidy::LineSink->new( $output_file, $tee_file,
1093                    $line_separator, $rOpts, $rpending_logfile_message,
1094                    $binmode );
1095                my $buf = $postfilter->($postfilter_buffer);
1096                foreach my $line ( split( "\n", $buf ) ) {
1097                    $new_sink->write_line($line);
1098                }
1099            }
1100
1101            my $infile_syntax_ok = 0;    # -1 no  0=don't know   1 yes
1102            if ($output_file) {
1103
1104                if ($input_file_permissions) {
1105
1106                    # give output script same permissions as input script, but
1107                    # make it user-writable or else we can't run perltidy again.
1108                    # Thus we retain whatever executable flags were set.
1109                    if ( $rOpts->{'format'} eq 'tidy' ) {
1110                        chmod( $input_file_permissions | 0600, $output_file );
1111                    }
1112
1113                    # else use default permissions for html and any other format
1114
1115                }
1116                if ( $logger_object && $rOpts->{'check-syntax'} ) {
1117                    $infile_syntax_ok =
1118                      check_syntax( $ifname, $ofname, $logger_object, $rOpts );
1119                }
1120            }
1121
1122            $logger_object->finish( $infile_syntax_ok, $formatter )
1123              if $logger_object;
1124        }    # end of loop to process all files
1125    }    # end of main program
1126}
1127
1128sub fileglob_to_re {
1129
1130    # modified (corrected) from version in find2perl
1131    my $x = shift;
1132    $x =~ s#([./^\$()])#\\$1#g;    # escape special characters
1133    $x =~ s#\*#.*#g;               # '*' -> '.*'
1134    $x =~ s#\?#.#g;                # '?' -> '.'
1135    "^$x\\z";                      # match whole word
1136}
1137
1138sub make_extension {
1139
1140    # Make a file extension, including any leading '.' if necessary
1141    # The '.' may actually be an '_' under VMS
1142    my ( $extension, $default, $dot ) = @_;
1143
1144    # Use the default if none specified
1145    $extension = $default unless ($extension);
1146
1147    # Only extensions with these leading characters get a '.'
1148    # This rule gives the user some freedom
1149    if ( $extension =~ /^[a-zA-Z0-9]/ ) {
1150        $extension = $dot . $extension;
1151    }
1152    return $extension;
1153}
1154
1155sub write_logfile_header {
1156    my (
1157        $rOpts,        $logger_object, $config_file,
1158        $rraw_options, $Windows_type,  $readable_options
1159    ) = @_;
1160    $logger_object->write_logfile_entry(
1161"perltidy version $VERSION log file on a $^O system, OLD_PERL_VERSION=$]\n"
1162    );
1163    if ($Windows_type) {
1164        $logger_object->write_logfile_entry("Windows type is $Windows_type\n");
1165    }
1166    my $options_string = join( ' ', @$rraw_options );
1167
1168    if ($config_file) {
1169        $logger_object->write_logfile_entry(
1170            "Found Configuration File >>> $config_file \n");
1171    }
1172    $logger_object->write_logfile_entry(
1173        "Configuration and command line parameters for this run:\n");
1174    $logger_object->write_logfile_entry("$options_string\n");
1175
1176    if ( $rOpts->{'DEBUG'} || $rOpts->{'show-options'} ) {
1177        $rOpts->{'logfile'} = 1;    # force logfile to be saved
1178        $logger_object->write_logfile_entry(
1179            "Final parameter set for this run\n");
1180        $logger_object->write_logfile_entry(
1181            "------------------------------------\n");
1182
1183        $logger_object->write_logfile_entry($readable_options);
1184
1185        $logger_object->write_logfile_entry(
1186            "------------------------------------\n");
1187    }
1188    $logger_object->write_logfile_entry(
1189        "To find error messages search for 'WARNING' with your editor\n");
1190}
1191
1192sub generate_options {
1193
1194    ######################################################################
1195    # Generate and return references to:
1196    #  @option_string - the list of options to be passed to Getopt::Long
1197    #  @defaults - the list of default options
1198    #  %expansion - a hash showing how all abbreviations are expanded
1199    #  %category - a hash giving the general category of each option
1200    #  %option_range - a hash giving the valid ranges of certain options
1201
1202    # Note: a few options are not documented in the man page and usage
1203    # message. This is because these are experimental or debug options and
1204    # may or may not be retained in future versions.
1205    #
1206    # Here are the undocumented flags as far as I know.  Any of them
1207    # may disappear at any time.  They are mainly for fine-tuning
1208    # and debugging.
1209    #
1210    # fll --> fuzzy-line-length           # a trivial parameter which gets
1211    #                                       turned off for the extrude option
1212    #                                       which is mainly for debugging
1213    # chk --> check-multiline-quotes      # check for old bug; to be deleted
1214    # scl --> short-concatenation-item-length   # helps break at '.'
1215    # recombine                           # for debugging line breaks
1216    # valign                              # for debugging vertical alignment
1217    # I   --> DIAGNOSTICS                 # for debugging
1218    ######################################################################
1219
1220    # here is a summary of the Getopt codes:
1221    # <none> does not take an argument
1222    # =s takes a mandatory string
1223    # :s takes an optional string  (DO NOT USE - filenames will get eaten up)
1224    # =i takes a mandatory integer
1225    # :i takes an optional integer (NOT RECOMMENDED - can cause trouble)
1226    # ! does not take an argument and may be negated
1227    #  i.e., -foo and -nofoo are allowed
1228    # a double dash signals the end of the options list
1229    #
1230    #---------------------------------------------------------------
1231    # Define the option string passed to GetOptions.
1232    #---------------------------------------------------------------
1233
1234    my @option_string   = ();
1235    my %expansion       = ();
1236    my %option_category = ();
1237    my %option_range    = ();
1238    my $rexpansion      = \%expansion;
1239
1240    # names of categories in manual
1241    # leading integers will allow sorting
1242    my @category_name = (
1243        '0. I/O control',
1244        '1. Basic formatting options',
1245        '2. Code indentation control',
1246        '3. Whitespace control',
1247        '4. Comment controls',
1248        '5. Linebreak controls',
1249        '6. Controlling list formatting',
1250        '7. Retaining or ignoring existing line breaks',
1251        '8. Blank line control',
1252        '9. Other controls',
1253        '10. HTML options',
1254        '11. pod2html options',
1255        '12. Controlling HTML properties',
1256        '13. Debugging',
1257    );
1258
1259    #  These options are parsed directly by perltidy:
1260    #    help h
1261    #    version v
1262    #  However, they are included in the option set so that they will
1263    #  be seen in the options dump.
1264
1265    # These long option names have no abbreviations or are treated specially
1266    @option_string = qw(
1267      html!
1268      noprofile
1269      no-profile
1270      npro
1271      recombine!
1272      valign!
1273      notidy
1274    );
1275
1276    my $category = 13;    # Debugging
1277    foreach (@option_string) {
1278        my $opt = $_;     # must avoid changing the actual flag
1279        $opt =~ s/!$//;
1280        $option_category{$opt} = $category_name[$category];
1281    }
1282
1283    $category = 11;                                       # HTML
1284    $option_category{html} = $category_name[$category];
1285
1286    # routine to install and check options
1287    my $add_option = sub {
1288        my ( $long_name, $short_name, $flag ) = @_;
1289        push @option_string, $long_name . $flag;
1290        $option_category{$long_name} = $category_name[$category];
1291        if ($short_name) {
1292            if ( $expansion{$short_name} ) {
1293                my $existing_name = $expansion{$short_name}[0];
1294                die
1295"redefining abbreviation $short_name for $long_name; already used for $existing_name\n";
1296            }
1297            $expansion{$short_name} = [$long_name];
1298            if ( $flag eq '!' ) {
1299                my $nshort_name = 'n' . $short_name;
1300                my $nolong_name = 'no' . $long_name;
1301                if ( $expansion{$nshort_name} ) {
1302                    my $existing_name = $expansion{$nshort_name}[0];
1303                    die
1304"attempting to redefine abbreviation $nshort_name for $nolong_name; already used for $existing_name\n";
1305                }
1306                $expansion{$nshort_name} = [$nolong_name];
1307            }
1308        }
1309    };
1310
1311    # Install long option names which have a simple abbreviation.
1312    # Options with code '!' get standard negation ('no' for long names,
1313    # 'n' for abbreviations).  Categories follow the manual.
1314
1315    ###########################
1316    $category = 0;    # I/O_Control
1317    ###########################
1318    $add_option->( 'backup-and-modify-in-place', 'b',     '!' );
1319    $add_option->( 'backup-file-extension',      'bext',  '=s' );
1320    $add_option->( 'force-read-binary',          'f',     '!' );
1321    $add_option->( 'format',                     'fmt',   '=s' );
1322    $add_option->( 'iterations',                 'it',    '=i' );
1323    $add_option->( 'logfile',                    'log',   '!' );
1324    $add_option->( 'logfile-gap',                'g',     ':i' );
1325    $add_option->( 'outfile',                    'o',     '=s' );
1326    $add_option->( 'output-file-extension',      'oext',  '=s' );
1327    $add_option->( 'output-path',                'opath', '=s' );
1328    $add_option->( 'profile',                    'pro',   '=s' );
1329    $add_option->( 'quiet',                      'q',     '!' );
1330    $add_option->( 'standard-error-output',      'se',    '!' );
1331    $add_option->( 'standard-output',            'st',    '!' );
1332    $add_option->( 'warning-output',             'w',     '!' );
1333
1334    # options which are both toggle switches and values moved here
1335    # to hide from tidyview (which does not show category 0 flags):
1336    # -ole moved here from category 1
1337    # -sil moved here from category 2
1338    $add_option->( 'output-line-ending',         'ole', '=s' );
1339    $add_option->( 'starting-indentation-level', 'sil', '=i' );
1340
1341    ########################################
1342    $category = 1;    # Basic formatting options
1343    ########################################
1344    $add_option->( 'check-syntax',             'syn',  '!' );
1345    $add_option->( 'entab-leading-whitespace', 'et',   '=i' );
1346    $add_option->( 'indent-columns',           'i',    '=i' );
1347    $add_option->( 'maximum-line-length',      'l',    '=i' );
1348    $add_option->( 'perl-syntax-check-flags',  'pscf', '=s' );
1349    $add_option->( 'preserve-line-endings',    'ple',  '!' );
1350    $add_option->( 'tabs',                     't',    '!' );
1351
1352    ########################################
1353    $category = 2;    # Code indentation control
1354    ########################################
1355    $add_option->( 'continuation-indentation',           'ci',   '=i' );
1356    $add_option->( 'line-up-parentheses',                'lp',   '!' );
1357    $add_option->( 'outdent-keyword-list',               'okwl', '=s' );
1358    $add_option->( 'outdent-keywords',                   'okw',  '!' );
1359    $add_option->( 'outdent-labels',                     'ola',  '!' );
1360    $add_option->( 'outdent-long-quotes',                'olq',  '!' );
1361    $add_option->( 'indent-closing-brace',               'icb',  '!' );
1362    $add_option->( 'closing-token-indentation',          'cti',  '=i' );
1363    $add_option->( 'closing-paren-indentation',          'cpi',  '=i' );
1364    $add_option->( 'closing-brace-indentation',          'cbi',  '=i' );
1365    $add_option->( 'closing-square-bracket-indentation', 'csbi', '=i' );
1366    $add_option->( 'brace-left-and-indent',              'bli',  '!' );
1367    $add_option->( 'brace-left-and-indent-list',         'blil', '=s' );
1368
1369    ########################################
1370    $category = 3;    # Whitespace control
1371    ########################################
1372    $add_option->( 'add-semicolons',                            'asc',   '!' );
1373    $add_option->( 'add-whitespace',                            'aws',   '!' );
1374    $add_option->( 'block-brace-tightness',                     'bbt',   '=i' );
1375    $add_option->( 'brace-tightness',                           'bt',    '=i' );
1376    $add_option->( 'delete-old-whitespace',                     'dws',   '!' );
1377    $add_option->( 'delete-semicolons',                         'dsm',   '!' );
1378    $add_option->( 'nospace-after-keyword',                     'nsak',  '=s' );
1379    $add_option->( 'nowant-left-space',                         'nwls',  '=s' );
1380    $add_option->( 'nowant-right-space',                        'nwrs',  '=s' );
1381    $add_option->( 'paren-tightness',                           'pt',    '=i' );
1382    $add_option->( 'space-after-keyword',                       'sak',   '=s' );
1383    $add_option->( 'space-for-semicolon',                       'sfs',   '!' );
1384    $add_option->( 'space-function-paren',                      'sfp',   '!' );
1385    $add_option->( 'space-keyword-paren',                       'skp',   '!' );
1386    $add_option->( 'space-terminal-semicolon',                  'sts',   '!' );
1387    $add_option->( 'square-bracket-tightness',                  'sbt',   '=i' );
1388    $add_option->( 'square-bracket-vertical-tightness',         'sbvt',  '=i' );
1389    $add_option->( 'square-bracket-vertical-tightness-closing', 'sbvtc', '=i' );
1390    $add_option->( 'trim-qw',                                   'tqw',   '!' );
1391    $add_option->( 'want-left-space',                           'wls',   '=s' );
1392    $add_option->( 'want-right-space',                          'wrs',   '=s' );
1393
1394    ########################################
1395    $category = 4;    # Comment controls
1396    ########################################
1397    $add_option->( 'closing-side-comment-else-flag',    'csce', '=i' );
1398    $add_option->( 'closing-side-comment-interval',     'csci', '=i' );
1399    $add_option->( 'closing-side-comment-list',         'cscl', '=s' );
1400    $add_option->( 'closing-side-comment-maximum-text', 'csct', '=i' );
1401    $add_option->( 'closing-side-comment-prefix',       'cscp', '=s' );
1402    $add_option->( 'closing-side-comment-warnings',     'cscw', '!' );
1403    $add_option->( 'closing-side-comments',             'csc',  '!' );
1404    $add_option->( 'closing-side-comments-balanced',    'cscb', '!' );
1405    $add_option->( 'format-skipping',                   'fs',   '!' );
1406    $add_option->( 'format-skipping-begin',             'fsb',  '=s' );
1407    $add_option->( 'format-skipping-end',               'fse',  '=s' );
1408    $add_option->( 'hanging-side-comments',             'hsc',  '!' );
1409    $add_option->( 'indent-block-comments',             'ibc',  '!' );
1410    $add_option->( 'indent-spaced-block-comments',      'isbc', '!' );
1411    $add_option->( 'fixed-position-side-comment',       'fpsc', '=i' );
1412    $add_option->( 'minimum-space-to-comment',          'msc',  '=i' );
1413    $add_option->( 'outdent-long-comments',             'olc',  '!' );
1414    $add_option->( 'outdent-static-block-comments',     'osbc', '!' );
1415    $add_option->( 'static-block-comment-prefix',       'sbcp', '=s' );
1416    $add_option->( 'static-block-comments',             'sbc',  '!' );
1417    $add_option->( 'static-side-comment-prefix',        'sscp', '=s' );
1418    $add_option->( 'static-side-comments',              'ssc',  '!' );
1419
1420    ########################################
1421    $category = 5;    # Linebreak controls
1422    ########################################
1423    $add_option->( 'add-newlines',                            'anl',   '!' );
1424    $add_option->( 'block-brace-vertical-tightness',          'bbvt',  '=i' );
1425    $add_option->( 'block-brace-vertical-tightness-list',     'bbvtl', '=s' );
1426    $add_option->( 'brace-vertical-tightness',                'bvt',   '=i' );
1427    $add_option->( 'brace-vertical-tightness-closing',        'bvtc',  '=i' );
1428    $add_option->( 'cuddled-else',                            'ce',    '!' );
1429    $add_option->( 'delete-old-newlines',                     'dnl',   '!' );
1430    $add_option->( 'opening-brace-always-on-right',           'bar',   '!' );
1431    $add_option->( 'opening-brace-on-new-line',               'bl',    '!' );
1432    $add_option->( 'opening-hash-brace-right',                'ohbr',  '!' );
1433    $add_option->( 'opening-paren-right',                     'opr',   '!' );
1434    $add_option->( 'opening-square-bracket-right',            'osbr',  '!' );
1435    $add_option->( 'opening-anonymous-sub-brace-on-new-line', 'asbl',  '!' );
1436    $add_option->( 'opening-sub-brace-on-new-line',           'sbl',   '!' );
1437    $add_option->( 'paren-vertical-tightness',                'pvt',   '=i' );
1438    $add_option->( 'paren-vertical-tightness-closing',        'pvtc',  '=i' );
1439    $add_option->( 'stack-closing-hash-brace',                'schb',  '!' );
1440    $add_option->( 'stack-closing-paren',                     'scp',   '!' );
1441    $add_option->( 'stack-closing-square-bracket',            'scsb',  '!' );
1442    $add_option->( 'stack-opening-hash-brace',                'sohb',  '!' );
1443    $add_option->( 'stack-opening-paren',                     'sop',   '!' );
1444    $add_option->( 'stack-opening-square-bracket',            'sosb',  '!' );
1445    $add_option->( 'vertical-tightness',                      'vt',    '=i' );
1446    $add_option->( 'vertical-tightness-closing',              'vtc',   '=i' );
1447    $add_option->( 'want-break-after',                        'wba',   '=s' );
1448    $add_option->( 'want-break-before',                       'wbb',   '=s' );
1449    $add_option->( 'break-after-all-operators',               'baao',  '!' );
1450    $add_option->( 'break-before-all-operators',              'bbao',  '!' );
1451    $add_option->( 'keep-interior-semicolons',                'kis',   '!' );
1452
1453    ########################################
1454    $category = 6;    # Controlling list formatting
1455    ########################################
1456    $add_option->( 'break-at-old-comma-breakpoints', 'boc', '!' );
1457    $add_option->( 'comma-arrow-breakpoints',        'cab', '=i' );
1458    $add_option->( 'maximum-fields-per-table',       'mft', '=i' );
1459
1460    ########################################
1461    $category = 7;    # Retaining or ignoring existing line breaks
1462    ########################################
1463    $add_option->( 'break-at-old-keyword-breakpoints', 'bok', '!' );
1464    $add_option->( 'break-at-old-logical-breakpoints', 'bol', '!' );
1465    $add_option->( 'break-at-old-ternary-breakpoints', 'bot', '!' );
1466    $add_option->( 'ignore-old-breakpoints',           'iob', '!' );
1467
1468    ########################################
1469    $category = 8;    # Blank line control
1470    ########################################
1471    $add_option->( 'blanks-before-blocks',            'bbb', '!' );
1472    $add_option->( 'blanks-before-comments',          'bbc', '!' );
1473    $add_option->( 'blanks-before-subs',              'bbs', '!' );
1474    $add_option->( 'long-block-line-count',           'lbl', '=i' );
1475    $add_option->( 'maximum-consecutive-blank-lines', 'mbl', '=i' );
1476    $add_option->( 'keep-old-blank-lines',            'kbl', '=i' );
1477
1478    ########################################
1479    $category = 9;    # Other controls
1480    ########################################
1481    $add_option->( 'delete-block-comments',        'dbc',  '!' );
1482    $add_option->( 'delete-closing-side-comments', 'dcsc', '!' );
1483    $add_option->( 'delete-pod',                   'dp',   '!' );
1484    $add_option->( 'delete-side-comments',         'dsc',  '!' );
1485    $add_option->( 'tee-block-comments',           'tbc',  '!' );
1486    $add_option->( 'tee-pod',                      'tp',   '!' );
1487    $add_option->( 'tee-side-comments',            'tsc',  '!' );
1488    $add_option->( 'look-for-autoloader',          'lal',  '!' );
1489    $add_option->( 'look-for-hash-bang',           'x',    '!' );
1490    $add_option->( 'look-for-selfloader',          'lsl',  '!' );
1491    $add_option->( 'pass-version-line',            'pvl',  '!' );
1492
1493    ########################################
1494    $category = 13;    # Debugging
1495    ########################################
1496    $add_option->( 'DEBUG',                           'D',    '!' );
1497    $add_option->( 'DIAGNOSTICS',                     'I',    '!' );
1498    $add_option->( 'check-multiline-quotes',          'chk',  '!' );
1499    $add_option->( 'dump-defaults',                   'ddf',  '!' );
1500    $add_option->( 'dump-long-names',                 'dln',  '!' );
1501    $add_option->( 'dump-options',                    'dop',  '!' );
1502    $add_option->( 'dump-profile',                    'dpro', '!' );
1503    $add_option->( 'dump-short-names',                'dsn',  '!' );
1504    $add_option->( 'dump-token-types',                'dtt',  '!' );
1505    $add_option->( 'dump-want-left-space',            'dwls', '!' );
1506    $add_option->( 'dump-want-right-space',           'dwrs', '!' );
1507    $add_option->( 'fuzzy-line-length',               'fll',  '!' );
1508    $add_option->( 'help',                            'h',    '' );
1509    $add_option->( 'short-concatenation-item-length', 'scl',  '=i' );
1510    $add_option->( 'show-options',                    'opt',  '!' );
1511    $add_option->( 'version',                         'v',    '' );
1512
1513    #---------------------------------------------------------------------
1514
1515    # The Perl::Tidy::HtmlWriter will add its own options to the string
1516    Perl::Tidy::HtmlWriter->make_getopt_long_names( \@option_string );
1517
1518    ########################################
1519    # Set categories 10, 11, 12
1520    ########################################
1521    # Based on their known order
1522    $category = 12;    # HTML properties
1523    foreach my $opt (@option_string) {
1524        my $long_name = $opt;
1525        $long_name =~ s/(!|=.*|:.*)$//;
1526        unless ( defined( $option_category{$long_name} ) ) {
1527            if ( $long_name =~ /^html-linked/ ) {
1528                $category = 10;    # HTML options
1529            }
1530            elsif ( $long_name =~ /^pod2html/ ) {
1531                $category = 11;    # Pod2html
1532            }
1533            $option_category{$long_name} = $category_name[$category];
1534        }
1535    }
1536
1537    #---------------------------------------------------------------
1538    # Assign valid ranges to certain options
1539    #---------------------------------------------------------------
1540    # In the future, these may be used to make preliminary checks
1541    # hash keys are long names
1542    # If key or value is undefined:
1543    #   strings may have any value
1544    #   integer ranges are >=0
1545    # If value is defined:
1546    #   value is [qw(any valid words)] for strings
1547    #   value is [min, max] for integers
1548    #   if min is undefined, there is no lower limit
1549    #   if max is undefined, there is no upper limit
1550    # Parameters not listed here have defaults
1551    %option_range = (
1552        'format'             => [ 'tidy', 'html', 'user' ],
1553        'output-line-ending' => [ 'dos',  'win',  'mac', 'unix' ],
1554
1555        'block-brace-tightness'    => [ 0, 2 ],
1556        'brace-tightness'          => [ 0, 2 ],
1557        'paren-tightness'          => [ 0, 2 ],
1558        'square-bracket-tightness' => [ 0, 2 ],
1559
1560        'block-brace-vertical-tightness'            => [ 0, 2 ],
1561        'brace-vertical-tightness'                  => [ 0, 2 ],
1562        'brace-vertical-tightness-closing'          => [ 0, 2 ],
1563        'paren-vertical-tightness'                  => [ 0, 2 ],
1564        'paren-vertical-tightness-closing'          => [ 0, 2 ],
1565        'square-bracket-vertical-tightness'         => [ 0, 2 ],
1566        'square-bracket-vertical-tightness-closing' => [ 0, 2 ],
1567        'vertical-tightness'                        => [ 0, 2 ],
1568        'vertical-tightness-closing'                => [ 0, 2 ],
1569
1570        'closing-brace-indentation'          => [ 0, 3 ],
1571        'closing-paren-indentation'          => [ 0, 3 ],
1572        'closing-square-bracket-indentation' => [ 0, 3 ],
1573        'closing-token-indentation'          => [ 0, 3 ],
1574
1575        'closing-side-comment-else-flag' => [ 0, 2 ],
1576        'comma-arrow-breakpoints'        => [ 0, 3 ],
1577    );
1578
1579    # Note: we could actually allow negative ci if someone really wants it:
1580    # $option_range{'continuation-indentation'} = [ undef, undef ];
1581
1582    #---------------------------------------------------------------
1583    # Assign default values to the above options here, except
1584    # for 'outfile' and 'help'.
1585    # These settings should approximate the perlstyle(1) suggestions.
1586    #---------------------------------------------------------------
1587    my @defaults = qw(
1588      add-newlines
1589      add-semicolons
1590      add-whitespace
1591      blanks-before-blocks
1592      blanks-before-comments
1593      blanks-before-subs
1594      block-brace-tightness=0
1595      block-brace-vertical-tightness=0
1596      brace-tightness=1
1597      brace-vertical-tightness-closing=0
1598      brace-vertical-tightness=0
1599      break-at-old-logical-breakpoints
1600      break-at-old-ternary-breakpoints
1601      break-at-old-keyword-breakpoints
1602      comma-arrow-breakpoints=1
1603      nocheck-syntax
1604      closing-side-comment-interval=6
1605      closing-side-comment-maximum-text=20
1606      closing-side-comment-else-flag=0
1607      closing-side-comments-balanced
1608      closing-paren-indentation=0
1609      closing-brace-indentation=0
1610      closing-square-bracket-indentation=0
1611      continuation-indentation=2
1612      delete-old-newlines
1613      delete-semicolons
1614      fuzzy-line-length
1615      hanging-side-comments
1616      indent-block-comments
1617      indent-columns=4
1618      iterations=1
1619      keep-old-blank-lines=1
1620      long-block-line-count=8
1621      look-for-autoloader
1622      look-for-selfloader
1623      maximum-consecutive-blank-lines=1
1624      maximum-fields-per-table=0
1625      maximum-line-length=80
1626      minimum-space-to-comment=4
1627      nobrace-left-and-indent
1628      nocuddled-else
1629      nodelete-old-whitespace
1630      nohtml
1631      nologfile
1632      noquiet
1633      noshow-options
1634      nostatic-side-comments
1635      notabs
1636      nowarning-output
1637      outdent-labels
1638      outdent-long-quotes
1639      outdent-long-comments
1640      paren-tightness=1
1641      paren-vertical-tightness-closing=0
1642      paren-vertical-tightness=0
1643      pass-version-line
1644      recombine
1645      valign
1646      short-concatenation-item-length=8
1647      space-for-semicolon
1648      square-bracket-tightness=1
1649      square-bracket-vertical-tightness-closing=0
1650      square-bracket-vertical-tightness=0
1651      static-block-comments
1652      trim-qw
1653      format=tidy
1654      backup-file-extension=bak
1655      format-skipping
1656
1657      pod2html
1658      html-table-of-contents
1659      html-entities
1660    );
1661
1662    push @defaults, "perl-syntax-check-flags=-c -T";
1663
1664    #---------------------------------------------------------------
1665    # Define abbreviations which will be expanded into the above primitives.
1666    # These may be defined recursively.
1667    #---------------------------------------------------------------
1668    %expansion = (
1669        %expansion,
1670        'freeze-newlines'   => [qw(noadd-newlines nodelete-old-newlines)],
1671        'fnl'               => [qw(freeze-newlines)],
1672        'freeze-whitespace' => [qw(noadd-whitespace nodelete-old-whitespace)],
1673        'fws'               => [qw(freeze-whitespace)],
1674        'freeze-blank-lines' =>
1675          [qw(maximum-consecutive-blank-lines=0 keep-old-blank-lines=2)],
1676        'fbl'                => [qw(freeze-blank-lines)],
1677        'indent-only'        => [qw(freeze-newlines freeze-whitespace)],
1678        'outdent-long-lines' => [qw(outdent-long-quotes outdent-long-comments)],
1679        'nooutdent-long-lines' =>
1680          [qw(nooutdent-long-quotes nooutdent-long-comments)],
1681        'noll' => [qw(nooutdent-long-lines)],
1682        'io'   => [qw(indent-only)],
1683        'delete-all-comments' =>
1684          [qw(delete-block-comments delete-side-comments delete-pod)],
1685        'nodelete-all-comments' =>
1686          [qw(nodelete-block-comments nodelete-side-comments nodelete-pod)],
1687        'dac'  => [qw(delete-all-comments)],
1688        'ndac' => [qw(nodelete-all-comments)],
1689        'gnu'  => [qw(gnu-style)],
1690        'pbp'  => [qw(perl-best-practices)],
1691        'tee-all-comments' =>
1692          [qw(tee-block-comments tee-side-comments tee-pod)],
1693        'notee-all-comments' =>
1694          [qw(notee-block-comments notee-side-comments notee-pod)],
1695        'tac'   => [qw(tee-all-comments)],
1696        'ntac'  => [qw(notee-all-comments)],
1697        'html'  => [qw(format=html)],
1698        'nhtml' => [qw(format=tidy)],
1699        'tidy'  => [qw(format=tidy)],
1700
1701        'swallow-optional-blank-lines'   => [qw(kbl=0)],
1702        'noswallow-optional-blank-lines' => [qw(kbl=1)],
1703        'sob'                            => [qw(kbl=0)],
1704        'nsob'                           => [qw(kbl=1)],
1705
1706        'break-after-comma-arrows'   => [qw(cab=0)],
1707        'nobreak-after-comma-arrows' => [qw(cab=1)],
1708        'baa'                        => [qw(cab=0)],
1709        'nbaa'                       => [qw(cab=1)],
1710
1711        'break-at-old-trinary-breakpoints' => [qw(bot)],
1712
1713        'cti=0' => [qw(cpi=0 cbi=0 csbi=0)],
1714        'cti=1' => [qw(cpi=1 cbi=1 csbi=1)],
1715        'cti=2' => [qw(cpi=2 cbi=2 csbi=2)],
1716        'icp'   => [qw(cpi=2 cbi=2 csbi=2)],
1717        'nicp'  => [qw(cpi=0 cbi=0 csbi=0)],
1718
1719        'closing-token-indentation=0' => [qw(cpi=0 cbi=0 csbi=0)],
1720        'closing-token-indentation=1' => [qw(cpi=1 cbi=1 csbi=1)],
1721        'closing-token-indentation=2' => [qw(cpi=2 cbi=2 csbi=2)],
1722        'indent-closing-paren'        => [qw(cpi=2 cbi=2 csbi=2)],
1723        'noindent-closing-paren'      => [qw(cpi=0 cbi=0 csbi=0)],
1724
1725        'vt=0' => [qw(pvt=0 bvt=0 sbvt=0)],
1726        'vt=1' => [qw(pvt=1 bvt=1 sbvt=1)],
1727        'vt=2' => [qw(pvt=2 bvt=2 sbvt=2)],
1728
1729        'vertical-tightness=0' => [qw(pvt=0 bvt=0 sbvt=0)],
1730        'vertical-tightness=1' => [qw(pvt=1 bvt=1 sbvt=1)],
1731        'vertical-tightness=2' => [qw(pvt=2 bvt=2 sbvt=2)],
1732
1733        'vtc=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)],
1734        'vtc=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],
1735        'vtc=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],
1736
1737        'vertical-tightness-closing=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)],
1738        'vertical-tightness-closing=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],
1739        'vertical-tightness-closing=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],
1740
1741        'otr'                   => [qw(opr ohbr osbr)],
1742        'opening-token-right'   => [qw(opr ohbr osbr)],
1743        'notr'                  => [qw(nopr nohbr nosbr)],
1744        'noopening-token-right' => [qw(nopr nohbr nosbr)],
1745
1746        'sot'                    => [qw(sop sohb sosb)],
1747        'nsot'                   => [qw(nsop nsohb nsosb)],
1748        'stack-opening-tokens'   => [qw(sop sohb sosb)],
1749        'nostack-opening-tokens' => [qw(nsop nsohb nsosb)],
1750
1751        'sct'                    => [qw(scp schb scsb)],
1752        'stack-closing-tokens'   => => [qw(scp schb scsb)],
1753        'nsct'                   => [qw(nscp nschb nscsb)],
1754        'nostack-opening-tokens' => [qw(nscp nschb nscsb)],
1755
1756        # 'mangle' originally deleted pod and comments, but to keep it
1757        # reversible, it no longer does.  But if you really want to
1758        # delete them, just use:
1759        #   -mangle -dac
1760
1761        # An interesting use for 'mangle' is to do this:
1762        #    perltidy -mangle myfile.pl -st | perltidy -o myfile.pl.new
1763        # which will form as many one-line blocks as possible
1764
1765        'mangle' => [
1766            qw(
1767              check-syntax
1768              keep-old-blank-lines=0
1769              delete-old-newlines
1770              delete-old-whitespace
1771              delete-semicolons
1772              indent-columns=0
1773              maximum-consecutive-blank-lines=0
1774              maximum-line-length=100000
1775              noadd-newlines
1776              noadd-semicolons
1777              noadd-whitespace
1778              noblanks-before-blocks
1779              noblanks-before-subs
1780              notabs
1781              )
1782        ],
1783
1784        # 'extrude' originally deleted pod and comments, but to keep it
1785        # reversible, it no longer does.  But if you really want to
1786        # delete them, just use
1787        #   extrude -dac
1788        #
1789        # An interesting use for 'extrude' is to do this:
1790        #    perltidy -extrude myfile.pl -st | perltidy -o myfile.pl.new
1791        # which will break up all one-line blocks.
1792
1793        'extrude' => [
1794            qw(
1795              check-syntax
1796              ci=0
1797              delete-old-newlines
1798              delete-old-whitespace
1799              delete-semicolons
1800              indent-columns=0
1801              maximum-consecutive-blank-lines=0
1802              maximum-line-length=1
1803              noadd-semicolons
1804              noadd-whitespace
1805              noblanks-before-blocks
1806              noblanks-before-subs
1807              nofuzzy-line-length
1808              notabs
1809              norecombine
1810              )
1811        ],
1812
1813        # this style tries to follow the GNU Coding Standards (which do
1814        # not really apply to perl but which are followed by some perl
1815        # programmers).
1816        'gnu-style' => [
1817            qw(
1818              lp bl noll pt=2 bt=2 sbt=2 cpi=1 csbi=1 cbi=1
1819              )
1820        ],
1821
1822        # Style suggested in Damian Conway's Perl Best Practices
1823        'perl-best-practices' => [
1824            qw(l=78 i=4 ci=4 st se vt=2 cti=0 pt=1 bt=1 sbt=1 bbt=1 nsfs nolq),
1825q(wbb=% + - * / x != == >= <= =~ !~ < > | & = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=)
1826        ],
1827
1828        # Additional styles can be added here
1829    );
1830
1831    Perl::Tidy::HtmlWriter->make_abbreviated_names( \%expansion );
1832
1833    # Uncomment next line to dump all expansions for debugging:
1834    # dump_short_names(\%expansion);
1835    return (
1836        \@option_string,   \@defaults, \%expansion,
1837        \%option_category, \%option_range
1838    );
1839
1840}    # end of generate_options
1841
1842sub process_command_line {
1843
1844    my (
1845        $perltidyrc_stream,  $is_Windows, $Windows_type,
1846        $rpending_complaint, $dump_options_type
1847    ) = @_;
1848
1849    use Getopt::Long;
1850
1851    my (
1852        $roption_string,   $rdefaults, $rexpansion,
1853        $roption_category, $roption_range
1854    ) = generate_options();
1855
1856    #---------------------------------------------------------------
1857    # set the defaults by passing the above list through GetOptions
1858    #---------------------------------------------------------------
1859    my %Opts = ();
1860    {
1861        local @ARGV;
1862        my $i;
1863
1864        # do not load the defaults if we are just dumping perltidyrc
1865        unless ( $dump_options_type eq 'perltidyrc' ) {
1866            for $i (@$rdefaults) { push @ARGV, "--" . $i }
1867        }
1868
1869        # Patch to save users Getopt::Long configuration
1870        # and set to Getopt::Long defaults.  Use eval to avoid
1871        # breaking old versions of Perl without these routines.
1872        my $glc;
1873        eval { $glc = Getopt::Long::Configure() };
1874        unless ($@) {
1875            eval { Getopt::Long::ConfigDefaults() };
1876        }
1877        else { $glc = undef }
1878
1879        if ( !GetOptions( \%Opts, @$roption_string ) ) {
1880            die "Programming Bug: error in setting default options";
1881        }
1882
1883        # Patch to put the previous Getopt::Long configuration back
1884        eval { Getopt::Long::Configure($glc) } if defined $glc;
1885    }
1886
1887    my $word;
1888    my @raw_options        = ();
1889    my $config_file        = "";
1890    my $saw_ignore_profile = 0;
1891    my $saw_extrude        = 0;
1892    my $saw_dump_profile   = 0;
1893    my $i;
1894
1895    #---------------------------------------------------------------
1896    # Take a first look at the command-line parameters.  Do as many
1897    # immediate dumps as possible, which can avoid confusion if the
1898    # perltidyrc file has an error.
1899    #---------------------------------------------------------------
1900    foreach $i (@ARGV) {
1901
1902        $i =~ s/^--/-/;
1903        if ( $i =~ /^-(npro|noprofile|no-profile)$/ ) {
1904            $saw_ignore_profile = 1;
1905        }
1906
1907        # note: this must come before -pro and -profile, below:
1908        elsif ( $i =~ /^-(dump-profile|dpro)$/ ) {
1909            $saw_dump_profile = 1;
1910        }
1911        elsif ( $i =~ /^-(pro|profile)=(.+)/ ) {
1912            if ($config_file) {
1913                warn
1914"Only one -pro=filename allowed, using '$2' instead of '$config_file'\n";
1915            }
1916            $config_file = $2;
1917
1918            # resolve <dir>/.../<file>, meaning look upwards from directory
1919            if ( defined($config_file) ) {
1920                if ( my ( $start_dir, $search_file ) =
1921                    ( $config_file =~ m{^(.*)\.\.\./(.*)$} ) )
1922                {
1923                    $start_dir = '.' if !$start_dir;
1924                    $start_dir = Cwd::realpath($start_dir);
1925                    if ( my $found_file =
1926                        find_file_upwards( $start_dir, $search_file ) )
1927                    {
1928                        $config_file = $found_file;
1929                    }
1930                }
1931            }
1932            unless ( -e $config_file ) {
1933                warn "cannot find file given with -pro=$config_file: $!\n";
1934                $config_file = "";
1935            }
1936        }
1937        elsif ( $i =~ /^-(pro|profile)=?$/ ) {
1938            die "usage: -pro=filename or --profile=filename, no spaces\n";
1939        }
1940        elsif ( $i =~ /^-extrude$/ ) {
1941            $saw_extrude = 1;
1942        }
1943        elsif ( $i =~ /^-(help|h|HELP|H)$/ ) {
1944            usage();
1945            exit 1;
1946        }
1947        elsif ( $i =~ /^-(version|v)$/ ) {
1948            show_version();
1949            exit 1;
1950        }
1951        elsif ( $i =~ /^-(dump-defaults|ddf)$/ ) {
1952            dump_defaults(@$rdefaults);
1953            exit 1;
1954        }
1955        elsif ( $i =~ /^-(dump-long-names|dln)$/ ) {
1956            dump_long_names(@$roption_string);
1957            exit 1;
1958        }
1959        elsif ( $i =~ /^-(dump-short-names|dsn)$/ ) {
1960            dump_short_names($rexpansion);
1961            exit 1;
1962        }
1963        elsif ( $i =~ /^-(dump-token-types|dtt)$/ ) {
1964            Perl::Tidy::Tokenizer->dump_token_types(*STDOUT);
1965            exit 1;
1966        }
1967    }
1968
1969    if ( $saw_dump_profile && $saw_ignore_profile ) {
1970        warn "No profile to dump because of -npro\n";
1971        exit 1;
1972    }
1973
1974    #---------------------------------------------------------------
1975    # read any .perltidyrc configuration file
1976    #---------------------------------------------------------------
1977    unless ($saw_ignore_profile) {
1978
1979        # resolve possible conflict between $perltidyrc_stream passed
1980        # as call parameter to perltidy and -pro=filename on command
1981        # line.
1982        if ($perltidyrc_stream) {
1983            if ($config_file) {
1984                warn <<EOM;
1985 Conflict: a perltidyrc configuration file was specified both as this
1986 perltidy call parameter: $perltidyrc_stream
1987 and with this -profile=$config_file.
1988 Using -profile=$config_file.
1989EOM
1990            }
1991            else {
1992                $config_file = $perltidyrc_stream;
1993            }
1994        }
1995
1996        # look for a config file if we don't have one yet
1997        my $rconfig_file_chatter;
1998        $$rconfig_file_chatter = "";
1999        $config_file =
2000          find_config_file( $is_Windows, $Windows_type, $rconfig_file_chatter,
2001            $rpending_complaint )
2002          unless $config_file;
2003
2004        # open any config file
2005        my $fh_config;
2006        if ($config_file) {
2007            ( $fh_config, $config_file ) =
2008              Perl::Tidy::streamhandle( $config_file, 'r' );
2009            unless ($fh_config) {
2010                $$rconfig_file_chatter .=
2011                  "# $config_file exists but cannot be opened\n";
2012            }
2013        }
2014
2015        if ($saw_dump_profile) {
2016            if ($saw_dump_profile) {
2017                dump_config_file( $fh_config, $config_file,
2018                    $rconfig_file_chatter );
2019                exit 1;
2020            }
2021        }
2022
2023        if ($fh_config) {
2024
2025            my ( $rconfig_list, $death_message ) =
2026              read_config_file( $fh_config, $config_file, $rexpansion );
2027            die $death_message if ($death_message);
2028
2029            # process any .perltidyrc parameters right now so we can
2030            # localize errors
2031            if (@$rconfig_list) {
2032                local @ARGV = @$rconfig_list;
2033
2034                expand_command_abbreviations( $rexpansion, \@raw_options,
2035                    $config_file );
2036
2037                if ( !GetOptions( \%Opts, @$roption_string ) ) {
2038                    die
2039"Error in this config file: $config_file  \nUse -npro to ignore this file, -h for help'\n";
2040                }
2041
2042                # Anything left in this local @ARGV is an error and must be
2043                # invalid bare words from the configuration file.  We cannot
2044                # check this earlier because bare words may have been valid
2045                # values for parameters.  We had to wait for GetOptions to have
2046                # a look at @ARGV.
2047                if (@ARGV) {
2048                    my $count = @ARGV;
2049                    my $str   = "\'" . pop(@ARGV) . "\'";
2050                    while ( my $param = pop(@ARGV) ) {
2051                        if ( length($str) < 70 ) {
2052                            $str .= ", '$param'";
2053                        }
2054                        else {
2055                            $str .= ", ...";
2056                            last;
2057                        }
2058                    }
2059                    die <<EOM;
2060There are $count unrecognized values in the configuration file '$config_file':
2061$str
2062Use leading dashes for parameters.  Use -npro to ignore this file.
2063EOM
2064                }
2065
2066                # Undo any options which cause premature exit.  They are not
2067                # appropriate for a config file, and it could be hard to
2068                # diagnose the cause of the premature exit.
2069                foreach (
2070                    qw{
2071                    dump-defaults
2072                    dump-long-names
2073                    dump-options
2074                    dump-profile
2075                    dump-short-names
2076                    dump-token-types
2077                    dump-want-left-space
2078                    dump-want-right-space
2079                    help
2080                    stylesheet
2081                    version
2082                    }
2083                  )
2084                {
2085
2086                    if ( defined( $Opts{$_} ) ) {
2087                        delete $Opts{$_};
2088                        warn "ignoring --$_ in config file: $config_file\n";
2089                    }
2090                }
2091            }
2092        }
2093    }
2094
2095    #---------------------------------------------------------------
2096    # now process the command line parameters
2097    #---------------------------------------------------------------
2098    expand_command_abbreviations( $rexpansion, \@raw_options, $config_file );
2099
2100    if ( !GetOptions( \%Opts, @$roption_string ) ) {
2101        die "Error on command line; for help try 'perltidy -h'\n";
2102    }
2103
2104    return ( \%Opts, $config_file, \@raw_options, $saw_extrude, $roption_string,
2105        $rexpansion, $roption_category, $roption_range );
2106}    # end of process_command_line
2107
2108sub check_options {
2109
2110    my ( $rOpts, $is_Windows, $Windows_type, $rpending_complaint ) = @_;
2111
2112    #---------------------------------------------------------------
2113    # check and handle any interactions among the basic options..
2114    #---------------------------------------------------------------
2115
2116    # Since -vt, -vtc, and -cti are abbreviations, but under
2117    # msdos, an unquoted input parameter like vtc=1 will be
2118    # seen as 2 parameters, vtc and 1, so the abbreviations
2119    # won't be seen.  Therefore, we will catch them here if
2120    # they get through.
2121
2122    if ( defined $rOpts->{'vertical-tightness'} ) {
2123        my $vt = $rOpts->{'vertical-tightness'};
2124        $rOpts->{'paren-vertical-tightness'}          = $vt;
2125        $rOpts->{'square-bracket-vertical-tightness'} = $vt;
2126        $rOpts->{'brace-vertical-tightness'}          = $vt;
2127    }
2128
2129    if ( defined $rOpts->{'vertical-tightness-closing'} ) {
2130        my $vtc = $rOpts->{'vertical-tightness-closing'};
2131        $rOpts->{'paren-vertical-tightness-closing'}          = $vtc;
2132        $rOpts->{'square-bracket-vertical-tightness-closing'} = $vtc;
2133        $rOpts->{'brace-vertical-tightness-closing'}          = $vtc;
2134    }
2135
2136    if ( defined $rOpts->{'closing-token-indentation'} ) {
2137        my $cti = $rOpts->{'closing-token-indentation'};
2138        $rOpts->{'closing-square-bracket-indentation'} = $cti;
2139        $rOpts->{'closing-brace-indentation'}          = $cti;
2140        $rOpts->{'closing-paren-indentation'}          = $cti;
2141    }
2142
2143    # In quiet mode, there is no log file and hence no way to report
2144    # results of syntax check, so don't do it.
2145    if ( $rOpts->{'quiet'} ) {
2146        $rOpts->{'check-syntax'} = 0;
2147    }
2148
2149    # can't check syntax if no output
2150    if ( $rOpts->{'format'} ne 'tidy' ) {
2151        $rOpts->{'check-syntax'} = 0;
2152    }
2153
2154    # Never let Windows 9x/Me systems run syntax check -- this will prevent a
2155    # wide variety of nasty problems on these systems, because they cannot
2156    # reliably run backticks.  Don't even think about changing this!
2157    if (   $rOpts->{'check-syntax'}
2158        && $is_Windows
2159        && ( !$Windows_type || $Windows_type =~ /^(9|Me)/ ) )
2160    {
2161        $rOpts->{'check-syntax'} = 0;
2162    }
2163
2164    # It's really a bad idea to check syntax as root unless you wrote
2165    # the script yourself.  FIXME: not sure if this works with VMS
2166    unless ($is_Windows) {
2167
2168        if ( $< == 0 && $rOpts->{'check-syntax'} ) {
2169            $rOpts->{'check-syntax'} = 0;
2170            $$rpending_complaint .=
2171"Syntax check deactivated for safety; you shouldn't run this as root\n";
2172        }
2173    }
2174
2175    # check iteration count and quietly fix if necessary:
2176    # - iterations option only applies to code beautification mode
2177    # - it shouldn't be nessary to use more than about 2 iterations
2178    if ( $rOpts->{'format'} ne 'tidy' ) {
2179        $rOpts->{'iterations'} = 1;
2180    }
2181    elsif ( defined( $rOpts->{'iterations'} ) ) {
2182        if    ( $rOpts->{'iterations'} <= 0 ) { $rOpts->{'iterations'} = 1 }
2183        elsif ( $rOpts->{'iterations'} > 5 )  { $rOpts->{'iterations'} = 5 }
2184    }
2185    else {
2186        $rOpts->{'iterations'} = 1;
2187    }
2188
2189    # see if user set a non-negative logfile-gap
2190    if ( defined( $rOpts->{'logfile-gap'} ) && $rOpts->{'logfile-gap'} >= 0 ) {
2191
2192        # a zero gap will be taken as a 1
2193        if ( $rOpts->{'logfile-gap'} == 0 ) {
2194            $rOpts->{'logfile-gap'} = 1;
2195        }
2196
2197        # setting a non-negative logfile gap causes logfile to be saved
2198        $rOpts->{'logfile'} = 1;
2199    }
2200
2201    # not setting logfile gap, or setting it negative, causes default of 50
2202    else {
2203        $rOpts->{'logfile-gap'} = 50;
2204    }
2205
2206    # set short-cut flag when only indentation is to be done.
2207    # Note that the user may or may not have already set the
2208    # indent-only flag.
2209    if (   !$rOpts->{'add-whitespace'}
2210        && !$rOpts->{'delete-old-whitespace'}
2211        && !$rOpts->{'add-newlines'}
2212        && !$rOpts->{'delete-old-newlines'} )
2213    {
2214        $rOpts->{'indent-only'} = 1;
2215    }
2216
2217    # -isbc implies -ibc
2218    if ( $rOpts->{'indent-spaced-block-comments'} ) {
2219        $rOpts->{'indent-block-comments'} = 1;
2220    }
2221
2222    # -bli flag implies -bl
2223    if ( $rOpts->{'brace-left-and-indent'} ) {
2224        $rOpts->{'opening-brace-on-new-line'} = 1;
2225    }
2226
2227    if (   $rOpts->{'opening-brace-always-on-right'}
2228        && $rOpts->{'opening-brace-on-new-line'} )
2229    {
2230        warn <<EOM;
2231 Conflict: you specified both 'opening-brace-always-on-right' (-bar) and
2232  'opening-brace-on-new-line' (-bl).  Ignoring -bl.
2233EOM
2234        $rOpts->{'opening-brace-on-new-line'} = 0;
2235    }
2236
2237    # it simplifies things if -bl is 0 rather than undefined
2238    if ( !defined( $rOpts->{'opening-brace-on-new-line'} ) ) {
2239        $rOpts->{'opening-brace-on-new-line'} = 0;
2240    }
2241
2242    # -sbl defaults to -bl if not defined
2243    if ( !defined( $rOpts->{'opening-sub-brace-on-new-line'} ) ) {
2244        $rOpts->{'opening-sub-brace-on-new-line'} =
2245          $rOpts->{'opening-brace-on-new-line'};
2246    }
2247
2248    if ( $rOpts->{'entab-leading-whitespace'} ) {
2249        if ( $rOpts->{'entab-leading-whitespace'} < 0 ) {
2250            warn "-et=n must use a positive integer; ignoring -et\n";
2251            $rOpts->{'entab-leading-whitespace'} = undef;
2252        }
2253
2254        # entab leading whitespace has priority over the older 'tabs' option
2255        if ( $rOpts->{'tabs'} ) { $rOpts->{'tabs'} = 0; }
2256    }
2257}
2258
2259sub find_file_upwards {
2260    my ( $search_dir, $search_file ) = @_;
2261
2262    $search_dir  =~ s{/+$}{};
2263    $search_file =~ s{^/+}{};
2264
2265    while (1) {
2266        my $try_path = "$search_dir/$search_file";
2267        if ( -f $try_path ) {
2268            return $try_path;
2269        }
2270        elsif ( $search_dir eq '/' ) {
2271            return undef;
2272        }
2273        else {
2274            $search_dir = dirname($search_dir);
2275        }
2276    }
2277}
2278
2279sub expand_command_abbreviations {
2280
2281    # go through @ARGV and expand any abbreviations
2282
2283    my ( $rexpansion, $rraw_options, $config_file ) = @_;
2284    my ($word);
2285
2286    # set a pass limit to prevent an infinite loop;
2287    # 10 should be plenty, but it may be increased to allow deeply
2288    # nested expansions.
2289    my $max_passes = 10;
2290    my @new_argv   = ();
2291
2292    # keep looping until all expansions have been converted into actual
2293    # dash parameters..
2294    for ( my $pass_count = 0 ; $pass_count <= $max_passes ; $pass_count++ ) {
2295        my @new_argv     = ();
2296        my $abbrev_count = 0;
2297
2298        # loop over each item in @ARGV..
2299        foreach $word (@ARGV) {
2300
2301            # convert any leading 'no-' to just 'no'
2302            if ( $word =~ /^(-[-]?no)-(.*)/ ) { $word = $1 . $2 }
2303
2304            # if it is a dash flag (instead of a file name)..
2305            if ( $word =~ /^-[-]?([\w\-]+)(.*)/ ) {
2306
2307                my $abr   = $1;
2308                my $flags = $2;
2309
2310                # save the raw input for debug output in case of circular refs
2311                if ( $pass_count == 0 ) {
2312                    push( @$rraw_options, $word );
2313                }
2314
2315                # recombine abbreviation and flag, if necessary,
2316                # to allow abbreviations with arguments such as '-vt=1'
2317                if ( $rexpansion->{ $abr . $flags } ) {
2318                    $abr   = $abr . $flags;
2319                    $flags = "";
2320                }
2321
2322                # if we see this dash item in the expansion hash..
2323                if ( $rexpansion->{$abr} ) {
2324                    $abbrev_count++;
2325
2326                    # stuff all of the words that it expands to into the
2327                    # new arg list for the next pass
2328                    foreach my $abbrev ( @{ $rexpansion->{$abr} } ) {
2329                        next unless $abbrev;    # for safety; shouldn't happen
2330                        push( @new_argv, '--' . $abbrev . $flags );
2331                    }
2332                }
2333
2334                # not in expansion hash, must be actual long name
2335                else {
2336                    push( @new_argv, $word );
2337                }
2338            }
2339
2340            # not a dash item, so just save it for the next pass
2341            else {
2342                push( @new_argv, $word );
2343            }
2344        }    # end of this pass
2345
2346        # update parameter list @ARGV to the new one
2347        @ARGV = @new_argv;
2348        last unless ( $abbrev_count > 0 );
2349
2350        # make sure we are not in an infinite loop
2351        if ( $pass_count == $max_passes ) {
2352            print STDERR
2353"I'm tired. We seem to be in an infinite loop trying to expand aliases.\n";
2354            print STDERR "Here are the raw options\n";
2355            local $" = ')(';
2356            print STDERR "(@$rraw_options)\n";
2357            my $num = @new_argv;
2358
2359            if ( $num < 50 ) {
2360                print STDERR "After $max_passes passes here is ARGV\n";
2361                print STDERR "(@new_argv)\n";
2362            }
2363            else {
2364                print STDERR "After $max_passes passes ARGV has $num entries\n";
2365            }
2366
2367            if ($config_file) {
2368                die <<"DIE";
2369Please check your configuration file $config_file for circular-references.
2370To deactivate it, use -npro.
2371DIE
2372            }
2373            else {
2374                die <<'DIE';
2375Program bug - circular-references in the %expansion hash, probably due to
2376a recent program change.
2377DIE
2378            }
2379        }    # end of check for circular references
2380    }    # end of loop over all passes
2381}
2382
2383# Debug routine -- this will dump the expansion hash
2384sub dump_short_names {
2385    my $rexpansion = shift;
2386    print STDOUT <<EOM;
2387List of short names.  This list shows how all abbreviations are
2388translated into other abbreviations and, eventually, into long names.
2389New abbreviations may be defined in a .perltidyrc file.
2390For a list of all long names, use perltidy --dump-long-names (-dln).
2391--------------------------------------------------------------------------
2392EOM
2393    foreach my $abbrev ( sort keys %$rexpansion ) {
2394        my @list = @{ $$rexpansion{$abbrev} };
2395        print STDOUT "$abbrev --> @list\n";
2396    }
2397}
2398
2399sub check_vms_filename {
2400
2401    # given a valid filename (the perltidy input file)
2402    # create a modified filename and separator character
2403    # suitable for VMS.
2404    #
2405    # Contributed by Michael Cartmell
2406    #
2407    my ( $base, $path ) = fileparse( $_[0] );
2408
2409    # remove explicit ; version
2410    $base =~ s/;-?\d*$//
2411
2412      # remove explicit . version ie two dots in filename NB ^ escapes a dot
2413      or $base =~ s/(          # begin capture $1
2414                  (?:^|[^^])\. # match a dot not preceded by a caret
2415                  (?:          # followed by nothing
2416                    |          # or
2417                    .*[^^]     # anything ending in a non caret
2418                  )
2419                )              # end capture $1
2420                \.-?\d*$       # match . version number
2421              /$1/x;
2422
2423    # normalise filename, if there are no unescaped dots then append one
2424    $base .= '.' unless $base =~ /(?:^|[^^])\./;
2425
2426    # if we don't already have an extension then we just append the extention
2427    my $separator = ( $base =~ /\.$/ ) ? "" : "_";
2428    return ( $path . $base, $separator );
2429}
2430
2431sub Win_OS_Type {
2432
2433    # TODO: are these more standard names?
2434    # Win32s Win95 Win98 WinMe WinNT3.51 WinNT4 Win2000 WinXP/.Net Win2003
2435
2436    # Returns a string that determines what MS OS we are on.
2437    # Returns win32s,95,98,Me,NT3.51,NT4,2000,XP/.Net,Win2003
2438    # Returns blank string if not an MS system.
2439    # Original code contributed by: Yves Orton
2440    # We need to know this to decide where to look for config files
2441
2442    my $rpending_complaint = shift;
2443    my $os                 = "";
2444    return $os unless $^O =~ /win32|dos/i;    # is it a MS box?
2445
2446    # Systems built from Perl source may not have Win32.pm
2447    # But probably have Win32::GetOSVersion() anyway so the
2448    # following line is not 'required':
2449    # return $os unless eval('require Win32');
2450
2451    # Use the standard API call to determine the version
2452    my ( $undef, $major, $minor, $build, $id );
2453    eval { ( $undef, $major, $minor, $build, $id ) = Win32::GetOSVersion() };
2454
2455    #
2456    #    NAME                   ID   MAJOR  MINOR
2457    #    Windows NT 4           2      4       0
2458    #    Windows 2000           2      5       0
2459    #    Windows XP             2      5       1
2460    #    Windows Server 2003    2      5       2
2461
2462    return "win32s" unless $id;    # If id==0 then its a win32s box.
2463    $os = {                        # Magic numbers from MSDN
2464                                   # documentation of GetOSVersion
2465        1 => {
2466            0  => "95",
2467            10 => "98",
2468            90 => "Me"
2469        },
2470        2 => {
2471            0  => "2000",          # or NT 4, see below
2472            1  => "XP/.Net",
2473            2  => "Win2003",
2474            51 => "NT3.51"
2475        }
2476    }->{$id}->{$minor};
2477
2478    # If $os is undefined, the above code is out of date.  Suggested updates
2479    # are welcome.
2480    unless ( defined $os ) {
2481        $os = "";
2482        $$rpending_complaint .= <<EOS;
2483Error trying to discover Win_OS_Type: $id:$major:$minor Has no name of record!
2484We won't be able to look for a system-wide config file.
2485EOS
2486    }
2487
2488    # Unfortunately the logic used for the various versions isnt so clever..
2489    # so we have to handle an outside case.
2490    return ( $os eq "2000" && $major != 5 ) ? "NT4" : $os;
2491}
2492
2493sub is_unix {
2494    return
2495         ( $^O !~ /win32|dos/i )
2496      && ( $^O ne 'VMS' )
2497      && ( $^O ne 'OS2' )
2498      && ( $^O ne 'MacOS' );
2499}
2500
2501sub look_for_Windows {
2502
2503    # determine Windows sub-type and location of
2504    # system-wide configuration files
2505    my $rpending_complaint = shift;
2506    my $is_Windows         = ( $^O =~ /win32|dos/i );
2507    my $Windows_type       = Win_OS_Type($rpending_complaint) if $is_Windows;
2508    return ( $is_Windows, $Windows_type );
2509}
2510
2511sub find_config_file {
2512
2513    # look for a .perltidyrc configuration file
2514    # For Windows also look for a file named perltidy.ini
2515    my ( $is_Windows, $Windows_type, $rconfig_file_chatter,
2516        $rpending_complaint ) = @_;
2517
2518    $$rconfig_file_chatter .= "# Config file search...system reported as:";
2519    if ($is_Windows) {
2520        $$rconfig_file_chatter .= "Windows $Windows_type\n";
2521    }
2522    else {
2523        $$rconfig_file_chatter .= " $^O\n";
2524    }
2525
2526    # sub to check file existance and record all tests
2527    my $exists_config_file = sub {
2528        my $config_file = shift;
2529        return 0 unless $config_file;
2530        $$rconfig_file_chatter .= "# Testing: $config_file\n";
2531        return -f $config_file;
2532    };
2533
2534    my $config_file;
2535
2536    # look in current directory first
2537    $config_file = ".perltidyrc";
2538    return $config_file if $exists_config_file->($config_file);
2539    if ($is_Windows) {
2540        $config_file = "perltidy.ini";
2541        return $config_file if $exists_config_file->($config_file);
2542    }
2543
2544    # Default environment vars.
2545    my @envs = qw(PERLTIDY HOME);
2546
2547    # Check the NT/2k/XP locations, first a local machine def, then a
2548    # network def
2549    push @envs, qw(USERPROFILE HOMESHARE) if $^O =~ /win32/i;
2550
2551    # Now go through the enviornment ...
2552    foreach my $var (@envs) {
2553        $$rconfig_file_chatter .= "# Examining: \$ENV{$var}";
2554        if ( defined( $ENV{$var} ) ) {
2555            $$rconfig_file_chatter .= " = $ENV{$var}\n";
2556
2557            # test ENV{ PERLTIDY } as file:
2558            if ( $var eq 'PERLTIDY' ) {
2559                $config_file = "$ENV{$var}";
2560                return $config_file if $exists_config_file->($config_file);
2561            }
2562
2563            # test ENV as directory:
2564            $config_file = catfile( $ENV{$var}, ".perltidyrc" );
2565            return $config_file if $exists_config_file->($config_file);
2566
2567            if ($is_Windows) {
2568                $config_file = catfile( $ENV{$var}, "perltidy.ini" );
2569                return $config_file if $exists_config_file->($config_file);
2570            }
2571        }
2572        else {
2573            $$rconfig_file_chatter .= "\n";
2574        }
2575    }
2576
2577    # then look for a system-wide definition
2578    # where to look varies with OS
2579    if ($is_Windows) {
2580
2581        if ($Windows_type) {
2582            my ( $os, $system, $allusers ) =
2583              Win_Config_Locs( $rpending_complaint, $Windows_type );
2584
2585            # Check All Users directory, if there is one.
2586            # i.e. C:\Documents and Settings\User\perltidy.ini
2587            if ($allusers) {
2588
2589                $config_file = catfile( $allusers, ".perltidyrc" );
2590                return $config_file if $exists_config_file->($config_file);
2591
2592                $config_file = catfile( $allusers, "perltidy.ini" );
2593                return $config_file if $exists_config_file->($config_file);
2594            }
2595
2596            # Check system directory.
2597            # retain old code in case someone has been able to create
2598            # a file with a leading period.
2599            $config_file = catfile( $system, ".perltidyrc" );
2600            return $config_file if $exists_config_file->($config_file);
2601
2602            $config_file = catfile( $system, "perltidy.ini" );
2603            return $config_file if $exists_config_file->($config_file);
2604        }
2605    }
2606
2607    # Place to add customization code for other systems
2608    elsif ( $^O eq 'OS2' ) {
2609    }
2610    elsif ( $^O eq 'MacOS' ) {
2611    }
2612    elsif ( $^O eq 'VMS' ) {
2613    }
2614
2615    # Assume some kind of Unix
2616    else {
2617
2618        $config_file = "/usr/local/etc/perltidyrc";
2619        return $config_file if $exists_config_file->($config_file);
2620
2621        $config_file = "/etc/perltidyrc";
2622        return $config_file if $exists_config_file->($config_file);
2623    }
2624
2625    # Couldn't find a config file
2626    return;
2627}
2628
2629sub Win_Config_Locs {
2630
2631    # In scalar context returns the OS name (95 98 ME NT3.51 NT4 2000 XP),
2632    # or undef if its not a win32 OS.  In list context returns OS, System
2633    # Directory, and All Users Directory.  All Users will be empty on a
2634    # 9x/Me box.  Contributed by: Yves Orton.
2635
2636    my $rpending_complaint = shift;
2637    my $os = (@_) ? shift : Win_OS_Type();
2638    return unless $os;
2639
2640    my $system   = "";
2641    my $allusers = "";
2642
2643    if ( $os =~ /9[58]|Me/ ) {
2644        $system = "C:/Windows";
2645    }
2646    elsif ( $os =~ /NT|XP|200?/ ) {
2647        $system = ( $os =~ /XP/ ) ? "C:/Windows/" : "C:/WinNT/";
2648        $allusers =
2649          ( $os =~ /NT/ )
2650          ? "C:/WinNT/profiles/All Users/"
2651          : "C:/Documents and Settings/All Users/";
2652    }
2653    else {
2654
2655        # This currently would only happen on a win32s computer.  I dont have
2656        # one to test, so I am unsure how to proceed.  Suggestions welcome!
2657        $$rpending_complaint .=
2658"I dont know a sensible place to look for config files on an $os system.\n";
2659        return;
2660    }
2661    return wantarray ? ( $os, $system, $allusers ) : $os;
2662}
2663
2664sub dump_config_file {
2665    my $fh                   = shift;
2666    my $config_file          = shift;
2667    my $rconfig_file_chatter = shift;
2668    print STDOUT "$$rconfig_file_chatter";
2669    if ($fh) {
2670        print STDOUT "# Dump of file: '$config_file'\n";
2671        while ( my $line = $fh->getline() ) { print STDOUT $line }
2672        eval { $fh->close() };
2673    }
2674    else {
2675        print STDOUT "# ...no config file found\n";
2676    }
2677}
2678
2679sub read_config_file {
2680
2681    my ( $fh, $config_file, $rexpansion ) = @_;
2682    my @config_list = ();
2683
2684    # file is bad if non-empty $death_message is returned
2685    my $death_message = "";
2686
2687    my $name = undef;
2688    my $line_no;
2689    while ( my $line = $fh->getline() ) {
2690        $line_no++;
2691        chomp $line;
2692        next if $line =~ /^\s*#/;    # skip full-line comment
2693        ( $line, $death_message ) =
2694          strip_comment( $line, $config_file, $line_no );
2695        last if ($death_message);
2696        $line =~ s/^\s*(.*?)\s*$/$1/;    # trim both ends
2697        next unless $line;
2698
2699        # look for something of the general form
2700        #    newname { body }
2701        # or just
2702        #    body
2703
2704        if ( $line =~ /^((\w+)\s*\{)?([^}]*)(\})?$/ ) {
2705            my ( $newname, $body, $curly ) = ( $2, $3, $4 );
2706
2707            # handle a new alias definition
2708            if ($newname) {
2709                if ($name) {
2710                    $death_message =
2711"No '}' seen after $name and before $newname in config file $config_file line $.\n";
2712                    last;
2713                }
2714                $name = $newname;
2715
2716                if ( ${$rexpansion}{$name} ) {
2717                    local $" = ')(';
2718                    my @names = sort keys %$rexpansion;
2719                    $death_message =
2720                        "Here is a list of all installed aliases\n(@names)\n"
2721                      . "Attempting to redefine alias ($name) in config file $config_file line $.\n";
2722                    last;
2723                }
2724                ${$rexpansion}{$name} = [];
2725            }
2726
2727            # now do the body
2728            if ($body) {
2729
2730                my ( $rbody_parts, $msg ) = parse_args($body);
2731                if ($msg) {
2732                    $death_message = <<EOM;
2733Error reading file '$config_file' at line number $line_no.
2734$msg
2735Please fix this line or use -npro to avoid reading this file
2736EOM
2737                    last;
2738                }
2739
2740                if ($name) {
2741
2742                    # remove leading dashes if this is an alias
2743                    foreach (@$rbody_parts) { s/^\-+//; }
2744                    push @{ ${$rexpansion}{$name} }, @$rbody_parts;
2745                }
2746                else {
2747                    push( @config_list, @$rbody_parts );
2748                }
2749            }
2750
2751            if ($curly) {
2752                unless ($name) {
2753                    $death_message =
2754"Unexpected '}' seen in config file $config_file line $.\n";
2755                    last;
2756                }
2757                $name = undef;
2758            }
2759        }
2760    }
2761    eval { $fh->close() };
2762    return ( \@config_list, $death_message );
2763}
2764
2765sub strip_comment {
2766
2767    my ( $instr, $config_file, $line_no ) = @_;
2768    my $msg = "";
2769
2770    # nothing to do if no comments
2771    if ( $instr !~ /#/ ) {
2772        return ( $instr, $msg );
2773    }
2774
2775    # use simple method of no quotes
2776    elsif ( $instr !~ /['"]/ ) {
2777        $instr =~ s/\s*\#.*$//;    # simple trim
2778        return ( $instr, $msg );
2779    }
2780
2781    # handle comments and quotes
2782    my $outstr     = "";
2783    my $quote_char = "";
2784    while (1) {
2785
2786        # looking for ending quote character
2787        if ($quote_char) {
2788            if ( $instr =~ /\G($quote_char)/gc ) {
2789                $quote_char = "";
2790                $outstr .= $1;
2791            }
2792            elsif ( $instr =~ /\G(.)/gc ) {
2793                $outstr .= $1;
2794            }
2795
2796            # error..we reached the end without seeing the ending quote char
2797            else {
2798                $msg = <<EOM;
2799Error reading file $config_file at line number $line_no.
2800Did not see ending quote character <$quote_char> in this text:
2801$instr
2802Please fix this line or use -npro to avoid reading this file
2803EOM
2804                last;
2805            }
2806        }
2807
2808        # accumulating characters and looking for start of a quoted string
2809        else {
2810            if ( $instr =~ /\G([\"\'])/gc ) {
2811                $outstr .= $1;
2812                $quote_char = $1;
2813            }
2814            elsif ( $instr =~ /\G#/gc ) {
2815                last;
2816            }
2817            elsif ( $instr =~ /\G(.)/gc ) {
2818                $outstr .= $1;
2819            }
2820            else {
2821                last;
2822            }
2823        }
2824    }
2825    return ( $outstr, $msg );
2826}
2827
2828sub parse_args {
2829
2830    # Parse a command string containing multiple string with possible
2831    # quotes, into individual commands.  It might look like this, for example:
2832    #
2833    #    -wba=" + - "  -some-thing -wbb='. && ||'
2834    #
2835    # There is no need, at present, to handle escaped quote characters.
2836    # (They are not perltidy tokens, so needn't be in strings).
2837
2838    my ($body)     = @_;
2839    my @body_parts = ();
2840    my $quote_char = "";
2841    my $part       = "";
2842    my $msg        = "";
2843    while (1) {
2844
2845        # looking for ending quote character
2846        if ($quote_char) {
2847            if ( $body =~ /\G($quote_char)/gc ) {
2848                $quote_char = "";
2849            }
2850            elsif ( $body =~ /\G(.)/gc ) {
2851                $part .= $1;
2852            }
2853
2854            # error..we reached the end without seeing the ending quote char
2855            else {
2856                if ( length($part) ) { push @body_parts, $part; }
2857                $msg = <<EOM;
2858Did not see ending quote character <$quote_char> in this text:
2859$body
2860EOM
2861                last;
2862            }
2863        }
2864
2865        # accumulating characters and looking for start of a quoted string
2866        else {
2867            if ( $body =~ /\G([\"\'])/gc ) {
2868                $quote_char = $1;
2869            }
2870            elsif ( $body =~ /\G(\s+)/gc ) {
2871                if ( length($part) ) { push @body_parts, $part; }
2872                $part = "";
2873            }
2874            elsif ( $body =~ /\G(.)/gc ) {
2875                $part .= $1;
2876            }
2877            else {
2878                if ( length($part) ) { push @body_parts, $part; }
2879                last;
2880            }
2881        }
2882    }
2883    return ( \@body_parts, $msg );
2884}
2885
2886sub dump_long_names {
2887
2888    my @names = sort @_;
2889    print STDOUT <<EOM;
2890# Command line long names (passed to GetOptions)
2891#---------------------------------------------------------------
2892# here is a summary of the Getopt codes:
2893# <none> does not take an argument
2894# =s takes a mandatory string
2895# :s takes an optional string
2896# =i takes a mandatory integer
2897# :i takes an optional integer
2898# ! does not take an argument and may be negated
2899#  i.e., -foo and -nofoo are allowed
2900# a double dash signals the end of the options list
2901#
2902#---------------------------------------------------------------
2903EOM
2904
2905    foreach (@names) { print STDOUT "$_\n" }
2906}
2907
2908sub dump_defaults {
2909    my @defaults = sort @_;
2910    print STDOUT "Default command line options:\n";
2911    foreach (@_) { print STDOUT "$_\n" }
2912}
2913
2914sub readable_options {
2915
2916    # return options for this run as a string which could be
2917    # put in a perltidyrc file
2918    my ( $rOpts, $roption_string ) = @_;
2919    my %Getopt_flags;
2920    my $rGetopt_flags    = \%Getopt_flags;
2921    my $readable_options = "# Final parameter set for this run.\n";
2922    $readable_options .=
2923      "# See utility 'perltidyrc_dump.pl' for nicer formatting.\n";
2924    foreach my $opt ( @{$roption_string} ) {
2925        my $flag = "";
2926        if ( $opt =~ /(.*)(!|=.*)$/ ) {
2927            $opt  = $1;
2928            $flag = $2;
2929        }
2930        if ( defined( $rOpts->{$opt} ) ) {
2931            $rGetopt_flags->{$opt} = $flag;
2932        }
2933    }
2934    foreach my $key ( sort keys %{$rOpts} ) {
2935        my $flag   = $rGetopt_flags->{$key};
2936        my $value  = $rOpts->{$key};
2937        my $prefix = '--';
2938        my $suffix = "";
2939        if ($flag) {
2940            if ( $flag =~ /^=/ ) {
2941                if ( $value !~ /^\d+$/ ) { $value = '"' . $value . '"' }
2942                $suffix = "=" . $value;
2943            }
2944            elsif ( $flag =~ /^!/ ) {
2945                $prefix .= "no" unless ($value);
2946            }
2947            else {
2948
2949                # shouldn't happen
2950                $readable_options .=
2951                  "# ERROR in dump_options: unrecognized flag $flag for $key\n";
2952            }
2953        }
2954        $readable_options .= $prefix . $key . $suffix . "\n";
2955    }
2956    return $readable_options;
2957}
2958
2959sub show_version {
2960    print <<"EOM";
2961This is perltidy, v$VERSION
2962
2963Copyright 2000-2010, Steve Hancock
2964
2965Perltidy is free software and may be copied under the terms of the GNU
2966General Public License, which is included in the distribution files.
2967
2968Complete documentation for perltidy can be found using 'man perltidy'
2969or on the internet at http://perltidy.sourceforge.net.
2970EOM
2971}
2972
2973sub usage {
2974
2975    print STDOUT <<EOF;
2976This is perltidy version $VERSION, a perl script indenter.  Usage:
2977
2978    perltidy [ options ] file1 file2 file3 ...
2979            (output goes to file1.tdy, file2.tdy, file3.tdy, ...)
2980    perltidy [ options ] file1 -o outfile
2981    perltidy [ options ] file1 -st >outfile
2982    perltidy [ options ] <infile >outfile
2983
2984Options have short and long forms. Short forms are shown; see
2985man pages for long forms.  Note: '=s' indicates a required string,
2986and '=n' indicates a required integer.
2987
2988I/O control
2989 -h      show this help
2990 -o=file name of the output file (only if single input file)
2991 -oext=s change output extension from 'tdy' to s
2992 -opath=path  change path to be 'path' for output files
2993 -b      backup original to .bak and modify file in-place
2994 -bext=s change default backup extension from 'bak' to s
2995 -q      deactivate error messages (for running under editor)
2996 -w      include non-critical warning messages in the .ERR error output
2997 -syn    run perl -c to check syntax (default under unix systems)
2998 -log    save .LOG file, which has useful diagnostics
2999 -f      force perltidy to read a binary file
3000 -g      like -log but writes more detailed .LOG file, for debugging scripts
3001 -opt    write the set of options actually used to a .LOG file
3002 -npro   ignore .perltidyrc configuration command file
3003 -pro=file   read configuration commands from file instead of .perltidyrc
3004 -st     send output to standard output, STDOUT
3005 -se     send error output to standard error output, STDERR
3006 -v      display version number to standard output and quit
3007
3008Basic Options:
3009 -i=n    use n columns per indentation level (default n=4)
3010 -t      tabs: use one tab character per indentation level, not recommeded
3011 -nt     no tabs: use n spaces per indentation level (default)
3012 -et=n   entab leading whitespace n spaces per tab; not recommended
3013 -io     "indent only": just do indentation, no other formatting.
3014 -sil=n  set starting indentation level to n;  use if auto detection fails
3015 -ole=s  specify output line ending (s=dos or win, mac, unix)
3016 -ple    keep output line endings same as input (input must be filename)
3017
3018Whitespace Control
3019 -fws    freeze whitespace; this disables all whitespace changes
3020           and disables the following switches:
3021 -bt=n   sets brace tightness,  n= (0 = loose, 1=default, 2 = tight)
3022 -bbt    same as -bt but for code block braces; same as -bt if not given
3023 -bbvt   block braces vertically tight; use with -bl or -bli
3024 -bbvtl=s  make -bbvt to apply to selected list of block types
3025 -pt=n   paren tightness (n=0, 1 or 2)
3026 -sbt=n  square bracket tightness (n=0, 1, or 2)
3027 -bvt=n  brace vertical tightness,
3028         n=(0=open, 1=close unless multiple steps on a line, 2=always close)
3029 -pvt=n  paren vertical tightness (see -bvt for n)
3030 -sbvt=n square bracket vertical tightness (see -bvt for n)
3031 -bvtc=n closing brace vertical tightness:
3032         n=(0=open, 1=sometimes close, 2=always close)
3033 -pvtc=n closing paren vertical tightness, see -bvtc for n.
3034 -sbvtc=n closing square bracket vertical tightness, see -bvtc for n.
3035 -ci=n   sets continuation indentation=n,  default is n=2 spaces
3036 -lp     line up parentheses, brackets, and non-BLOCK braces
3037 -sfs    add space before semicolon in for( ; ; )
3038 -aws    allow perltidy to add whitespace (default)
3039 -dws    delete all old non-essential whitespace
3040 -icb    indent closing brace of a code block
3041 -cti=n  closing indentation of paren, square bracket, or non-block brace:
3042         n=0 none, =1 align with opening, =2 one full indentation level
3043 -icp    equivalent to -cti=2
3044 -wls=s  want space left of tokens in string; i.e. -nwls='+ - * /'
3045 -wrs=s  want space right of tokens in string;
3046 -sts    put space before terminal semicolon of a statement
3047 -sak=s  put space between keywords given in s and '(';
3048 -nsak=s no space between keywords in s and '('; i.e. -nsak='my our local'
3049
3050Line Break Control
3051 -fnl    freeze newlines; this disables all line break changes
3052            and disables the following switches:
3053 -anl    add newlines;  ok to introduce new line breaks
3054 -bbs    add blank line before subs and packages
3055 -bbc    add blank line before block comments
3056 -bbb    add blank line between major blocks
3057 -kbl=n  keep old blank lines? 0=no, 1=some, 2=all
3058 -mbl=n  maximum consecutive blank lines to output (default=1)
3059 -ce     cuddled else; use this style: '} else {'
3060 -dnl    delete old newlines (default)
3061 -l=n    maximum line length;  default n=80
3062 -bl     opening brace on new line
3063 -sbl    opening sub brace on new line.  value of -bl is used if not given.
3064 -bli    opening brace on new line and indented
3065 -bar    opening brace always on right, even for long clauses
3066 -vt=n   vertical tightness (requires -lp); n controls break after opening
3067         token: 0=never  1=no break if next line balanced   2=no break
3068 -vtc=n  vertical tightness of closing container; n controls if closing
3069         token starts new line: 0=always  1=not unless list  1=never
3070 -wba=s  want break after tokens in string; i.e. wba=': .'
3071 -wbb=s  want break before tokens in string
3072
3073Following Old Breakpoints
3074 -kis    keep interior semicolons.  Allows multiple statements per line.
3075 -boc    break at old comma breaks: turns off all automatic list formatting
3076 -bol    break at old logical breakpoints: or, and, ||, && (default)
3077 -bok    break at old list keyword breakpoints such as map, sort (default)
3078 -bot    break at old conditional (ternary ?:) operator breakpoints (default)
3079 -cab=n  break at commas after a comma-arrow (=>):
3080         n=0 break at all commas after =>
3081         n=1 stable: break unless this breaks an existing one-line container
3082         n=2 break only if a one-line container cannot be formed
3083         n=3 do not treat commas after => specially at all
3084
3085Comment controls
3086 -ibc    indent block comments (default)
3087 -isbc   indent spaced block comments; may indent unless no leading space
3088 -msc=n  minimum desired spaces to side comment, default 4
3089 -fpsc=n fix position for side comments; default 0;
3090 -csc    add or update closing side comments after closing BLOCK brace
3091 -dcsc   delete closing side comments created by a -csc command
3092 -cscp=s change closing side comment prefix to be other than '## end'
3093 -cscl=s change closing side comment to apply to selected list of blocks
3094 -csci=n minimum number of lines needed to apply a -csc tag, default n=6
3095 -csct=n maximum number of columns of appended text, default n=20
3096 -cscw   causes warning if old side comment is overwritten with -csc
3097
3098 -sbc    use 'static block comments' identified by leading '##' (default)
3099 -sbcp=s change static block comment identifier to be other than '##'
3100 -osbc   outdent static block comments
3101
3102 -ssc    use 'static side comments' identified by leading '##' (default)
3103 -sscp=s change static side comment identifier to be other than '##'
3104
3105Delete selected text
3106 -dac    delete all comments AND pod
3107 -dbc    delete block comments
3108 -dsc    delete side comments
3109 -dp     delete pod
3110
3111Send selected text to a '.TEE' file
3112 -tac    tee all comments AND pod
3113 -tbc    tee block comments
3114 -tsc    tee side comments
3115 -tp     tee pod
3116
3117Outdenting
3118 -olq    outdent long quoted strings (default)
3119 -olc    outdent a long block comment line
3120 -ola    outdent statement labels
3121 -okw    outdent control keywords (redo, next, last, goto, return)
3122 -okwl=s specify alternative keywords for -okw command
3123
3124Other controls
3125 -mft=n  maximum fields per table; default n=40
3126 -x      do not format lines before hash-bang line (i.e., for VMS)
3127 -asc    allows perltidy to add a ';' when missing (default)
3128 -dsm    allows perltidy to delete an unnecessary ';'  (default)
3129
3130Combinations of other parameters
3131 -gnu     attempt to follow GNU Coding Standards as applied to perl
3132 -mangle  remove as many newlines as possible (but keep comments and pods)
3133 -extrude  insert as many newlines as possible
3134
3135Dump and die, debugging
3136 -dop    dump options used in this run to standard output and quit
3137 -ddf    dump default options to standard output and quit
3138 -dsn    dump all option short names to standard output and quit
3139 -dln    dump option long names to standard output and quit
3140 -dpro   dump whatever configuration file is in effect to standard output
3141 -dtt    dump all token types to standard output and quit
3142
3143HTML
3144 -html write an html file (see 'man perl2web' for many options)
3145       Note: when -html is used, no indentation or formatting are done.
3146       Hint: try perltidy -html -css=mystyle.css filename.pl
3147       and edit mystyle.css to change the appearance of filename.html.
3148       -nnn gives line numbers
3149       -pre only writes out <pre>..</pre> code section
3150       -toc places a table of contents to subs at the top (default)
3151       -pod passes pod text through pod2html (default)
3152       -frm write html as a frame (3 files)
3153       -text=s extra extension for table of contents if -frm, default='toc'
3154       -sext=s extra extension for file content if -frm, default='src'
3155
3156A prefix of "n" negates short form toggle switches, and a prefix of "no"
3157negates the long forms.  For example, -nasc means don't add missing
3158semicolons.
3159
3160If you are unable to see this entire text, try "perltidy -h | more"
3161For more detailed information, and additional options, try "man perltidy",
3162or go to the perltidy home page at http://perltidy.sourceforge.net
3163EOF
3164
3165}
3166
3167sub process_this_file {
3168
3169    my ( $truth, $beauty ) = @_;
3170
3171    # loop to process each line of this file
3172    while ( my $line_of_tokens = $truth->get_line() ) {
3173        $beauty->write_line($line_of_tokens);
3174    }
3175
3176    # finish up
3177    eval { $beauty->finish_formatting() };
3178    $truth->report_tokenization_errors();
3179}
3180
3181sub check_syntax {
3182
3183    # Use 'perl -c' to make sure that we did not create bad syntax
3184    # This is a very good independent check for programming errors
3185    #
3186    # Given names of the input and output files, ($ifname, $ofname),
3187    # we do the following:
3188    # - check syntax of the input file
3189    # - if bad, all done (could be an incomplete code snippet)
3190    # - if infile syntax ok, then check syntax of the output file;
3191    #   - if outfile syntax bad, issue warning; this implies a code bug!
3192    # - set and return flag "infile_syntax_ok" : =-1 bad 0 unknown 1 good
3193
3194    my ( $ifname, $ofname, $logger_object, $rOpts ) = @_;
3195    my $infile_syntax_ok = 0;
3196    my $line_of_dashes   = '-' x 42 . "\n";
3197
3198    my $flags = $rOpts->{'perl-syntax-check-flags'};
3199
3200    # be sure we invoke perl with -c
3201    # note: perl will accept repeated flags like '-c -c'.  It is safest
3202    # to append another -c than try to find an interior bundled c, as
3203    # in -Tc, because such a 'c' might be in a quoted string, for example.
3204    if ( $flags !~ /(^-c|\s+-c)/ ) { $flags .= " -c" }
3205
3206    # be sure we invoke perl with -x if requested
3207    # same comments about repeated parameters applies
3208    if ( $rOpts->{'look-for-hash-bang'} ) {
3209        if ( $flags !~ /(^-x|\s+-x)/ ) { $flags .= " -x" }
3210    }
3211
3212    # this shouldn't happen unless a termporary file couldn't be made
3213    if ( $ifname eq '-' ) {
3214        $logger_object->write_logfile_entry(
3215            "Cannot run perl -c on STDIN and STDOUT\n");
3216        return $infile_syntax_ok;
3217    }
3218
3219    $logger_object->write_logfile_entry(
3220        "checking input file syntax with perl $flags\n");
3221    $logger_object->write_logfile_entry($line_of_dashes);
3222
3223    # Not all operating systems/shells support redirection of the standard
3224    # error output.
3225    my $error_redirection = ( $^O eq 'VMS' ) ? "" : '2>&1';
3226
3227    my $perl_output = do_syntax_check( $ifname, $flags, $error_redirection );
3228    $logger_object->write_logfile_entry("$perl_output\n");
3229
3230    if ( $perl_output =~ /syntax\s*OK/ ) {
3231        $infile_syntax_ok = 1;
3232        $logger_object->write_logfile_entry($line_of_dashes);
3233        $logger_object->write_logfile_entry(
3234            "checking output file syntax with perl $flags ...\n");
3235        $logger_object->write_logfile_entry($line_of_dashes);
3236
3237        my $perl_output =
3238          do_syntax_check( $ofname, $flags, $error_redirection );
3239        $logger_object->write_logfile_entry("$perl_output\n");
3240
3241        unless ( $perl_output =~ /syntax\s*OK/ ) {
3242            $logger_object->write_logfile_entry($line_of_dashes);
3243            $logger_object->warning(
3244"The output file has a syntax error when tested with perl $flags $ofname !\n"
3245            );
3246            $logger_object->warning(
3247                "This implies an error in perltidy; the file $ofname is bad\n");
3248            $logger_object->report_definite_bug();
3249
3250            # the perl version number will be helpful for diagnosing the problem
3251            $logger_object->write_logfile_entry(
3252                qx/perl -v $error_redirection/ . "\n" );
3253        }
3254    }
3255    else {
3256
3257        # Only warn of perl -c syntax errors.  Other messages,
3258        # such as missing modules, are too common.  They can be
3259        # seen by running with perltidy -w
3260        $logger_object->complain("A syntax check using perl $flags gives: \n");
3261        $logger_object->complain($line_of_dashes);
3262        $logger_object->complain("$perl_output\n");
3263        $logger_object->complain($line_of_dashes);
3264        $infile_syntax_ok = -1;
3265        $logger_object->write_logfile_entry($line_of_dashes);
3266        $logger_object->write_logfile_entry(
3267"The output file will not be checked because of input file problems\n"
3268        );
3269    }
3270    return $infile_syntax_ok;
3271}
3272
3273sub do_syntax_check {
3274    my ( $fname, $flags, $error_redirection ) = @_;
3275
3276    # We have to quote the filename in case it has unusual characters
3277    # or spaces.  Example: this filename #CM11.pm# gives trouble.
3278    $fname = '"' . $fname . '"';
3279
3280    # Under VMS something like -T will become -t (and an error) so we
3281    # will put quotes around the flags.  Double quotes seem to work on
3282    # Unix/Windows/VMS, but this may not work on all systems.  (Single
3283    # quotes do not work under Windows).  It could become necessary to
3284    # put double quotes around each flag, such as:  -"c"  -"T"
3285    # We may eventually need some system-dependent coding here.
3286    $flags = '"' . $flags . '"';
3287
3288    # now wish for luck...
3289    return qx/perl $flags $fname $error_redirection/;
3290}
3291
3292#####################################################################
3293#
3294# This is a stripped down version of IO::Scalar
3295# Given a reference to a scalar, it supplies either:
3296# a getline method which reads lines (mode='r'), or
3297# a print method which reads lines (mode='w')
3298#
3299#####################################################################
3300package Perl::Tidy::IOScalar;
3301use Carp;
3302
3303sub new {
3304    my ( $package, $rscalar, $mode ) = @_;
3305    my $ref = ref $rscalar;
3306    if ( $ref ne 'SCALAR' ) {
3307        confess <<EOM;
3308------------------------------------------------------------------------
3309expecting ref to SCALAR but got ref to ($ref); trace follows:
3310------------------------------------------------------------------------
3311EOM
3312
3313    }
3314    if ( $mode eq 'w' ) {
3315        $$rscalar = "";
3316        return bless [ $rscalar, $mode ], $package;
3317    }
3318    elsif ( $mode eq 'r' ) {
3319
3320        # Convert a scalar to an array.
3321        # This avoids looking for "\n" on each call to getline
3322        my @array = map { $_ .= "\n" } split /\n/, ${$rscalar};
3323        my $i_next = 0;
3324        return bless [ \@array, $mode, $i_next ], $package;
3325    }
3326    else {
3327        confess <<EOM;
3328------------------------------------------------------------------------
3329expecting mode = 'r' or 'w' but got mode ($mode); trace follows:
3330------------------------------------------------------------------------
3331EOM
3332    }
3333}
3334
3335sub getline {
3336    my $self = shift;
3337    my $mode = $self->[1];
3338    if ( $mode ne 'r' ) {
3339        confess <<EOM;
3340------------------------------------------------------------------------
3341getline call requires mode = 'r' but mode = ($mode); trace follows:
3342------------------------------------------------------------------------
3343EOM
3344    }
3345    my $i = $self->[2]++;
3346    ##my $line = $self->[0]->[$i];
3347    return $self->[0]->[$i];
3348}
3349
3350sub print {
3351    my $self = shift;
3352    my $mode = $self->[1];
3353    if ( $mode ne 'w' ) {
3354        confess <<EOM;
3355------------------------------------------------------------------------
3356print call requires mode = 'w' but mode = ($mode); trace follows:
3357------------------------------------------------------------------------
3358EOM
3359    }
3360    ${ $self->[0] } .= $_[0];
3361}
3362sub close { return }
3363
3364#####################################################################
3365#
3366# This is a stripped down version of IO::ScalarArray
3367# Given a reference to an array, it supplies either:
3368# a getline method which reads lines (mode='r'), or
3369# a print method which reads lines (mode='w')
3370#
3371# NOTE: this routine assumes that that there aren't any embedded
3372# newlines within any of the array elements.  There are no checks
3373# for that.
3374#
3375#####################################################################
3376package Perl::Tidy::IOScalarArray;
3377use Carp;
3378
3379sub new {
3380    my ( $package, $rarray, $mode ) = @_;
3381    my $ref = ref $rarray;
3382    if ( $ref ne 'ARRAY' ) {
3383        confess <<EOM;
3384------------------------------------------------------------------------
3385expecting ref to ARRAY but got ref to ($ref); trace follows:
3386------------------------------------------------------------------------
3387EOM
3388
3389    }
3390    if ( $mode eq 'w' ) {
3391        @$rarray = ();
3392        return bless [ $rarray, $mode ], $package;
3393    }
3394    elsif ( $mode eq 'r' ) {
3395        my $i_next = 0;
3396        return bless [ $rarray, $mode, $i_next ], $package;
3397    }
3398    else {
3399        confess <<EOM;
3400------------------------------------------------------------------------
3401expecting mode = 'r' or 'w' but got mode ($mode); trace follows:
3402------------------------------------------------------------------------
3403EOM
3404    }
3405}
3406
3407sub getline {
3408    my $self = shift;
3409    my $mode = $self->[1];
3410    if ( $mode ne 'r' ) {
3411        confess <<EOM;
3412------------------------------------------------------------------------
3413getline requires mode = 'r' but mode = ($mode); trace follows:
3414------------------------------------------------------------------------
3415EOM
3416    }
3417    my $i = $self->[2]++;
3418    return $self->[0]->[$i];
3419}
3420
3421sub print {
3422    my $self = shift;
3423    my $mode = $self->[1];
3424    if ( $mode ne 'w' ) {
3425        confess <<EOM;
3426------------------------------------------------------------------------
3427print requires mode = 'w' but mode = ($mode); trace follows:
3428------------------------------------------------------------------------
3429EOM
3430    }
3431    push @{ $self->[0] }, $_[0];
3432}
3433sub close { return }
3434
3435#####################################################################
3436#
3437# the Perl::Tidy::LineSource class supplies an object with a 'get_line()' method
3438# which returns the next line to be parsed
3439#
3440#####################################################################
3441
3442package Perl::Tidy::LineSource;
3443
3444sub new {
3445
3446    my ( $class, $input_file, $rOpts, $rpending_logfile_message ) = @_;
3447    my $input_file_copy = undef;
3448    my $fh_copy;
3449
3450    my $input_line_ending;
3451    if ( $rOpts->{'preserve-line-endings'} ) {
3452        $input_line_ending = Perl::Tidy::find_input_line_ending($input_file);
3453    }
3454
3455    ( my $fh, $input_file ) = Perl::Tidy::streamhandle( $input_file, 'r' );
3456    return undef unless $fh;
3457
3458    # in order to check output syntax when standard output is used,
3459    # or when it is an object, we have to make a copy of the file
3460    if ( ( $input_file eq '-' || ref $input_file ) && $rOpts->{'check-syntax'} )
3461    {
3462
3463        # Turning off syntax check when input output is used.
3464        # The reason is that temporary files cause problems on
3465        # on many systems.
3466        $rOpts->{'check-syntax'} = 0;
3467        $input_file_copy = '-';
3468
3469        $$rpending_logfile_message .= <<EOM;
3470Note: --syntax check will be skipped because standard input is used
3471EOM
3472
3473    }
3474
3475    return bless {
3476        _fh                => $fh,
3477        _fh_copy           => $fh_copy,
3478        _filename          => $input_file,
3479        _input_file_copy   => $input_file_copy,
3480        _input_line_ending => $input_line_ending,
3481        _rinput_buffer     => [],
3482        _started           => 0,
3483    }, $class;
3484}
3485
3486sub get_input_file_copy_name {
3487    my $self   = shift;
3488    my $ifname = $self->{_input_file_copy};
3489    unless ($ifname) {
3490        $ifname = $self->{_filename};
3491    }
3492    return $ifname;
3493}
3494
3495sub close_input_file {
3496    my $self = shift;
3497    eval { $self->{_fh}->close() };
3498    eval { $self->{_fh_copy}->close() } if $self->{_fh_copy};
3499}
3500
3501sub get_line {
3502    my $self          = shift;
3503    my $line          = undef;
3504    my $fh            = $self->{_fh};
3505    my $fh_copy       = $self->{_fh_copy};
3506    my $rinput_buffer = $self->{_rinput_buffer};
3507
3508    if ( scalar(@$rinput_buffer) ) {
3509        $line = shift @$rinput_buffer;
3510    }
3511    else {
3512        $line = $fh->getline();
3513
3514        # patch to read raw mac files under unix, dos
3515        # see if the first line has embedded \r's
3516        if ( $line && !$self->{_started} ) {
3517            if ( $line =~ /[\015][^\015\012]/ ) {
3518
3519                # found one -- break the line up and store in a buffer
3520                @$rinput_buffer = map { $_ . "\n" } split /\015/, $line;
3521                my $count = @$rinput_buffer;
3522                $line = shift @$rinput_buffer;
3523            }
3524            $self->{_started}++;
3525        }
3526    }
3527    if ( $line && $fh_copy ) { $fh_copy->print($line); }
3528    return $line;
3529}
3530
3531#####################################################################
3532#
3533# the Perl::Tidy::LineSink class supplies a write_line method for
3534# actual file writing
3535#
3536#####################################################################
3537
3538package Perl::Tidy::LineSink;
3539
3540sub new {
3541
3542    my ( $class, $output_file, $tee_file, $line_separator, $rOpts,
3543        $rpending_logfile_message, $binmode )
3544      = @_;
3545    my $fh               = undef;
3546    my $fh_copy          = undef;
3547    my $fh_tee           = undef;
3548    my $output_file_copy = "";
3549    my $output_file_open = 0;
3550
3551    if ( $rOpts->{'format'} eq 'tidy' ) {
3552        ( $fh, $output_file ) = Perl::Tidy::streamhandle( $output_file, 'w' );
3553        unless ($fh) { die "Cannot write to output stream\n"; }
3554        $output_file_open = 1;
3555        if ($binmode) {
3556            if ( ref($fh) eq 'IO::File' ) {
3557                binmode $fh;
3558            }
3559            if ( $output_file eq '-' ) { binmode STDOUT }
3560        }
3561    }
3562
3563    # in order to check output syntax when standard output is used,
3564    # or when it is an object, we have to make a copy of the file
3565    if ( $output_file eq '-' || ref $output_file ) {
3566        if ( $rOpts->{'check-syntax'} ) {
3567
3568            # Turning off syntax check when standard output is used.
3569            # The reason is that temporary files cause problems on
3570            # on many systems.
3571            $rOpts->{'check-syntax'} = 0;
3572            $output_file_copy = '-';
3573            $$rpending_logfile_message .= <<EOM;
3574Note: --syntax check will be skipped because standard output is used
3575EOM
3576
3577        }
3578    }
3579
3580    bless {
3581        _fh               => $fh,
3582        _fh_copy          => $fh_copy,
3583        _fh_tee           => $fh_tee,
3584        _output_file      => $output_file,
3585        _output_file_open => $output_file_open,
3586        _output_file_copy => $output_file_copy,
3587        _tee_flag         => 0,
3588        _tee_file         => $tee_file,
3589        _tee_file_opened  => 0,
3590        _line_separator   => $line_separator,
3591        _binmode          => $binmode,
3592    }, $class;
3593}
3594
3595sub write_line {
3596
3597    my $self    = shift;
3598    my $fh      = $self->{_fh};
3599    my $fh_copy = $self->{_fh_copy};
3600
3601    my $output_file_open = $self->{_output_file_open};
3602    chomp $_[0];
3603    $_[0] .= $self->{_line_separator};
3604
3605    $fh->print( $_[0] ) if ( $self->{_output_file_open} );
3606    print $fh_copy $_[0] if ( $fh_copy && $self->{_output_file_copy} );
3607
3608    if ( $self->{_tee_flag} ) {
3609        unless ( $self->{_tee_file_opened} ) { $self->really_open_tee_file() }
3610        my $fh_tee = $self->{_fh_tee};
3611        print $fh_tee $_[0];
3612    }
3613}
3614
3615sub get_output_file_copy {
3616    my $self   = shift;
3617    my $ofname = $self->{_output_file_copy};
3618    unless ($ofname) {
3619        $ofname = $self->{_output_file};
3620    }
3621    return $ofname;
3622}
3623
3624sub tee_on {
3625    my $self = shift;
3626    $self->{_tee_flag} = 1;
3627}
3628
3629sub tee_off {
3630    my $self = shift;
3631    $self->{_tee_flag} = 0;
3632}
3633
3634sub really_open_tee_file {
3635    my $self     = shift;
3636    my $tee_file = $self->{_tee_file};
3637    my $fh_tee;
3638    $fh_tee = IO::File->new(">$tee_file")
3639      or die("couldn't open TEE file $tee_file: $!\n");
3640    binmode $fh_tee if $self->{_binmode};
3641    $self->{_tee_file_opened} = 1;
3642    $self->{_fh_tee}          = $fh_tee;
3643}
3644
3645sub close_output_file {
3646    my $self = shift;
3647    eval { $self->{_fh}->close() }      if $self->{_output_file_open};
3648    eval { $self->{_fh_copy}->close() } if ( $self->{_output_file_copy} );
3649    $self->close_tee_file();
3650}
3651
3652sub close_tee_file {
3653    my $self = shift;
3654
3655    if ( $self->{_tee_file_opened} ) {
3656        eval { $self->{_fh_tee}->close() };
3657        $self->{_tee_file_opened} = 0;
3658    }
3659}
3660
3661#####################################################################
3662#
3663# The Perl::Tidy::Diagnostics class writes the DIAGNOSTICS file, which is
3664# useful for program development.
3665#
3666# Only one such file is created regardless of the number of input
3667# files processed.  This allows the results of processing many files
3668# to be summarized in a single file.
3669#
3670#####################################################################
3671
3672package Perl::Tidy::Diagnostics;
3673
3674sub new {
3675
3676    my $class = shift;
3677    bless {
3678        _write_diagnostics_count => 0,
3679        _last_diagnostic_file    => "",
3680        _input_file              => "",
3681        _fh                      => undef,
3682    }, $class;
3683}
3684
3685sub set_input_file {
3686    my $self = shift;
3687    $self->{_input_file} = $_[0];
3688}
3689
3690# This is a diagnostic routine which is useful for program development.
3691# Output from debug messages go to a file named DIAGNOSTICS, where
3692# they are labeled by file and line.  This allows many files to be
3693# scanned at once for some particular condition of interest.
3694sub write_diagnostics {
3695    my $self = shift;
3696
3697    unless ( $self->{_write_diagnostics_count} ) {
3698        open DIAGNOSTICS, ">DIAGNOSTICS"
3699          or death("couldn't open DIAGNOSTICS: $!\n");
3700    }
3701
3702    my $last_diagnostic_file = $self->{_last_diagnostic_file};
3703    my $input_file           = $self->{_input_file};
3704    if ( $last_diagnostic_file ne $input_file ) {
3705        print DIAGNOSTICS "\nFILE:$input_file\n";
3706    }
3707    $self->{_last_diagnostic_file} = $input_file;
3708    my $input_line_number = Perl::Tidy::Tokenizer::get_input_line_number();
3709    print DIAGNOSTICS "$input_line_number:\t@_";
3710    $self->{_write_diagnostics_count}++;
3711}
3712
3713#####################################################################
3714#
3715# The Perl::Tidy::Logger class writes the .LOG and .ERR files
3716#
3717#####################################################################
3718
3719package Perl::Tidy::Logger;
3720
3721sub new {
3722    my $class = shift;
3723    my $fh;
3724    my ( $rOpts, $log_file, $warning_file, $saw_extrude ) = @_;
3725
3726    # remove any old error output file
3727    unless ( ref($warning_file) ) {
3728        if ( -e $warning_file ) { unlink($warning_file) }
3729    }
3730
3731    bless {
3732        _log_file                      => $log_file,
3733        _fh_warnings                   => undef,
3734        _rOpts                         => $rOpts,
3735        _fh_warnings                   => undef,
3736        _last_input_line_written       => 0,
3737        _at_end_of_file                => 0,
3738        _use_prefix                    => 1,
3739        _block_log_output              => 0,
3740        _line_of_tokens                => undef,
3741        _output_line_number            => undef,
3742        _wrote_line_information_string => 0,
3743        _wrote_column_headings         => 0,
3744        _warning_file                  => $warning_file,
3745        _warning_count                 => 0,
3746        _complaint_count               => 0,
3747        _saw_code_bug    => -1,             # -1=no 0=maybe 1=for sure
3748        _saw_brace_error => 0,
3749        _saw_extrude     => $saw_extrude,
3750        _output_array    => [],
3751    }, $class;
3752}
3753
3754sub close_log_file {
3755
3756    my $self = shift;
3757    if ( $self->{_fh_warnings} ) {
3758        eval { $self->{_fh_warnings}->close() };
3759        $self->{_fh_warnings} = undef;
3760    }
3761}
3762
3763sub get_warning_count {
3764    my $self = shift;
3765    return $self->{_warning_count};
3766}
3767
3768sub get_use_prefix {
3769    my $self = shift;
3770    return $self->{_use_prefix};
3771}
3772
3773sub block_log_output {
3774    my $self = shift;
3775    $self->{_block_log_output} = 1;
3776}
3777
3778sub unblock_log_output {
3779    my $self = shift;
3780    $self->{_block_log_output} = 0;
3781}
3782
3783sub interrupt_logfile {
3784    my $self = shift;
3785    $self->{_use_prefix} = 0;
3786    $self->warning("\n");
3787    $self->write_logfile_entry( '#' x 24 . "  WARNING  " . '#' x 25 . "\n" );
3788}
3789
3790sub resume_logfile {
3791    my $self = shift;
3792    $self->write_logfile_entry( '#' x 60 . "\n" );
3793    $self->{_use_prefix} = 1;
3794}
3795
3796sub we_are_at_the_last_line {
3797    my $self = shift;
3798    unless ( $self->{_wrote_line_information_string} ) {
3799        $self->write_logfile_entry("Last line\n\n");
3800    }
3801    $self->{_at_end_of_file} = 1;
3802}
3803
3804# record some stuff in case we go down in flames
3805sub black_box {
3806    my $self = shift;
3807    my ( $line_of_tokens, $output_line_number ) = @_;
3808    my $input_line        = $line_of_tokens->{_line_text};
3809    my $input_line_number = $line_of_tokens->{_line_number};
3810
3811    # save line information in case we have to write a logfile message
3812    $self->{_line_of_tokens}                = $line_of_tokens;
3813    $self->{_output_line_number}            = $output_line_number;
3814    $self->{_wrote_line_information_string} = 0;
3815
3816    my $last_input_line_written = $self->{_last_input_line_written};
3817    my $rOpts                   = $self->{_rOpts};
3818    if (
3819        (
3820            ( $input_line_number - $last_input_line_written ) >=
3821            $rOpts->{'logfile-gap'}
3822        )
3823        || ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ )
3824      )
3825    {
3826        my $rlevels                      = $line_of_tokens->{_rlevels};
3827        my $structural_indentation_level = $$rlevels[0];
3828        $self->{_last_input_line_written} = $input_line_number;
3829        ( my $out_str = $input_line ) =~ s/^\s*//;
3830        chomp $out_str;
3831
3832        $out_str = ( '.' x $structural_indentation_level ) . $out_str;
3833
3834        if ( length($out_str) > 35 ) {
3835            $out_str = substr( $out_str, 0, 35 ) . " ....";
3836        }
3837        $self->logfile_output( "", "$out_str\n" );
3838    }
3839}
3840
3841sub write_logfile_entry {
3842    my $self = shift;
3843
3844    # add leading >>> to avoid confusing error mesages and code
3845    $self->logfile_output( ">>>", "@_" );
3846}
3847
3848sub write_column_headings {
3849    my $self = shift;
3850
3851    $self->{_wrote_column_headings} = 1;
3852    my $routput_array = $self->{_output_array};
3853    push @{$routput_array}, <<EOM;
3854The nesting depths in the table below are at the start of the lines.
3855The indicated output line numbers are not always exact.
3856ci = levels of continuation indentation; bk = 1 if in BLOCK, 0 if not.
3857
3858in:out indent c b  nesting   code + messages; (messages begin with >>>)
3859lines  levels i k            (code begins with one '.' per indent level)
3860------  ----- - - --------   -------------------------------------------
3861EOM
3862}
3863
3864sub make_line_information_string {
3865
3866    # make columns of information when a logfile message needs to go out
3867    my $self                    = shift;
3868    my $line_of_tokens          = $self->{_line_of_tokens};
3869    my $input_line_number       = $line_of_tokens->{_line_number};
3870    my $line_information_string = "";
3871    if ($input_line_number) {
3872
3873        my $output_line_number   = $self->{_output_line_number};
3874        my $brace_depth          = $line_of_tokens->{_curly_brace_depth};
3875        my $paren_depth          = $line_of_tokens->{_paren_depth};
3876        my $square_bracket_depth = $line_of_tokens->{_square_bracket_depth};
3877        my $python_indentation_level =
3878          $line_of_tokens->{_python_indentation_level};
3879        my $rlevels         = $line_of_tokens->{_rlevels};
3880        my $rnesting_tokens = $line_of_tokens->{_rnesting_tokens};
3881        my $rci_levels      = $line_of_tokens->{_rci_levels};
3882        my $rnesting_blocks = $line_of_tokens->{_rnesting_blocks};
3883
3884        my $structural_indentation_level = $$rlevels[0];
3885
3886        $self->write_column_headings() unless $self->{_wrote_column_headings};
3887
3888        # keep logfile columns aligned for scripts up to 999 lines;
3889        # for longer scripts it doesn't really matter
3890        my $extra_space = "";
3891        $extra_space .=
3892            ( $input_line_number < 10 )  ? "  "
3893          : ( $input_line_number < 100 ) ? " "
3894          :                                "";
3895        $extra_space .=
3896            ( $output_line_number < 10 )  ? "  "
3897          : ( $output_line_number < 100 ) ? " "
3898          :                                 "";
3899
3900        # there are 2 possible nesting strings:
3901        # the original which looks like this:  (0 [1 {2
3902        # the new one, which looks like this:  {{[
3903        # the new one is easier to read, and shows the order, but
3904        # could be arbitrarily long, so we use it unless it is too long
3905        my $nesting_string =
3906          "($paren_depth [$square_bracket_depth {$brace_depth";
3907        my $nesting_string_new = $$rnesting_tokens[0];
3908
3909        my $ci_level = $$rci_levels[0];
3910        if ( $ci_level > 9 ) { $ci_level = '*' }
3911        my $bk = ( $$rnesting_blocks[0] =~ /1$/ ) ? '1' : '0';
3912
3913        if ( length($nesting_string_new) <= 8 ) {
3914            $nesting_string =
3915              $nesting_string_new . " " x ( 8 - length($nesting_string_new) );
3916        }
3917        if ( $python_indentation_level < 0 ) { $python_indentation_level = 0 }
3918        $line_information_string =
3919"L$input_line_number:$output_line_number$extra_space i$python_indentation_level:$structural_indentation_level $ci_level $bk $nesting_string";
3920    }
3921    return $line_information_string;
3922}
3923
3924sub logfile_output {
3925    my $self = shift;
3926    my ( $prompt, $msg ) = @_;
3927    return if ( $self->{_block_log_output} );
3928
3929    my $routput_array = $self->{_output_array};
3930    if ( $self->{_at_end_of_file} || !$self->{_use_prefix} ) {
3931        push @{$routput_array}, "$msg";
3932    }
3933    else {
3934        my $line_information_string = $self->make_line_information_string();
3935        $self->{_wrote_line_information_string} = 1;
3936
3937        if ($line_information_string) {
3938            push @{$routput_array}, "$line_information_string   $prompt$msg";
3939        }
3940        else {
3941            push @{$routput_array}, "$msg";
3942        }
3943    }
3944}
3945
3946sub get_saw_brace_error {
3947    my $self = shift;
3948    return $self->{_saw_brace_error};
3949}
3950
3951sub increment_brace_error {
3952    my $self = shift;
3953    $self->{_saw_brace_error}++;
3954}
3955
3956sub brace_warning {
3957    my $self = shift;
3958    use constant BRACE_WARNING_LIMIT => 10;
3959    my $saw_brace_error = $self->{_saw_brace_error};
3960
3961    if ( $saw_brace_error < BRACE_WARNING_LIMIT ) {
3962        $self->warning(@_);
3963    }
3964    $saw_brace_error++;
3965    $self->{_saw_brace_error} = $saw_brace_error;
3966
3967    if ( $saw_brace_error == BRACE_WARNING_LIMIT ) {
3968        $self->warning("No further warnings of this type will be given\n");
3969    }
3970}
3971
3972sub complain {
3973
3974    # handle non-critical warning messages based on input flag
3975    my $self  = shift;
3976    my $rOpts = $self->{_rOpts};
3977
3978    # these appear in .ERR output only if -w flag is used
3979    if ( $rOpts->{'warning-output'} ) {
3980        $self->warning(@_);
3981    }
3982
3983    # otherwise, they go to the .LOG file
3984    else {
3985        $self->{_complaint_count}++;
3986        $self->write_logfile_entry(@_);
3987    }
3988}
3989
3990sub warning {
3991
3992    # report errors to .ERR file (or stdout)
3993    my $self = shift;
3994    use constant WARNING_LIMIT => 50;
3995
3996    my $rOpts = $self->{_rOpts};
3997    unless ( $rOpts->{'quiet'} ) {
3998
3999        my $warning_count = $self->{_warning_count};
4000        unless ($warning_count) {
4001            my $warning_file = $self->{_warning_file};
4002            my $fh_warnings;
4003            if ( $rOpts->{'standard-error-output'} ) {
4004                $fh_warnings = *STDERR;
4005            }
4006            else {
4007                ( $fh_warnings, my $filename ) =
4008                  Perl::Tidy::streamhandle( $warning_file, 'w' );
4009                $fh_warnings or die("couldn't open $filename $!\n");
4010                warn "## Please see file $filename\n";
4011            }
4012            $self->{_fh_warnings} = $fh_warnings;
4013        }
4014
4015        my $fh_warnings = $self->{_fh_warnings};
4016        if ( $warning_count < WARNING_LIMIT ) {
4017            if ( $self->get_use_prefix() > 0 ) {
4018                my $input_line_number =
4019                  Perl::Tidy::Tokenizer::get_input_line_number();
4020                $fh_warnings->print("$input_line_number:\t@_");
4021                $self->write_logfile_entry("WARNING: @_");
4022            }
4023            else {
4024                $fh_warnings->print(@_);
4025                $self->write_logfile_entry(@_);
4026            }
4027        }
4028        $warning_count++;
4029        $self->{_warning_count} = $warning_count;
4030
4031        if ( $warning_count == WARNING_LIMIT ) {
4032            $fh_warnings->print("No further warnings will be given\n");
4033        }
4034    }
4035}
4036
4037# programming bug codes:
4038#   -1 = no bug
4039#    0 = maybe, not sure.
4040#    1 = definitely
4041sub report_possible_bug {
4042    my $self         = shift;
4043    my $saw_code_bug = $self->{_saw_code_bug};
4044    $self->{_saw_code_bug} = ( $saw_code_bug < 0 ) ? 0 : $saw_code_bug;
4045}
4046
4047sub report_definite_bug {
4048    my $self = shift;
4049    $self->{_saw_code_bug} = 1;
4050}
4051
4052sub ask_user_for_bug_report {
4053    my $self = shift;
4054
4055    my ( $infile_syntax_ok, $formatter ) = @_;
4056    my $saw_code_bug = $self->{_saw_code_bug};
4057    if ( ( $saw_code_bug == 0 ) && ( $infile_syntax_ok == 1 ) ) {
4058        $self->warning(<<EOM);
4059
4060You may have encountered a code bug in perltidy.  If you think so, and
4061the problem is not listed in the BUGS file at
4062http://perltidy.sourceforge.net, please report it so that it can be
4063corrected.  Include the smallest possible script which has the problem,
4064along with the .LOG file. See the manual pages for contact information.
4065Thank you!
4066EOM
4067
4068    }
4069    elsif ( $saw_code_bug == 1 ) {
4070        if ( $self->{_saw_extrude} ) {
4071            $self->warning(<<EOM);
4072
4073You may have encountered a bug in perltidy.  However, since you are using the
4074-extrude option, the problem may be with perl or one of its modules, which have
4075occasional problems with this type of file.  If you believe that the
4076problem is with perltidy, and the problem is not listed in the BUGS file at
4077http://perltidy.sourceforge.net, please report it so that it can be corrected.
4078Include the smallest possible script which has the problem, along with the .LOG
4079file. See the manual pages for contact information.
4080Thank you!
4081EOM
4082        }
4083        else {
4084            $self->warning(<<EOM);
4085
4086Oops, you seem to have encountered a bug in perltidy.  Please check the
4087BUGS file at http://perltidy.sourceforge.net.  If the problem is not
4088listed there, please report it so that it can be corrected.  Include the
4089smallest possible script which produces this message, along with the
4090.LOG file if appropriate.  See the manual pages for contact information.
4091Your efforts are appreciated.
4092Thank you!
4093EOM
4094            my $added_semicolon_count = 0;
4095            eval {
4096                $added_semicolon_count =
4097                  $formatter->get_added_semicolon_count();
4098            };
4099            if ( $added_semicolon_count > 0 ) {
4100                $self->warning(<<EOM);
4101
4102The log file shows that perltidy added $added_semicolon_count semicolons.
4103Please rerun with -nasc to see if that is the cause of the syntax error.  Even
4104if that is the problem, please report it so that it can be fixed.
4105EOM
4106
4107            }
4108        }
4109    }
4110}
4111
4112sub finish {
4113
4114    # called after all formatting to summarize errors
4115    my $self = shift;
4116    my ( $infile_syntax_ok, $formatter ) = @_;
4117
4118    my $rOpts         = $self->{_rOpts};
4119    my $warning_count = $self->{_warning_count};
4120    my $saw_code_bug  = $self->{_saw_code_bug};
4121
4122    my $save_logfile =
4123         ( $saw_code_bug == 0 && $infile_syntax_ok == 1 )
4124      || $saw_code_bug == 1
4125      || $rOpts->{'logfile'};
4126    my $log_file = $self->{_log_file};
4127    if ($warning_count) {
4128        if ($save_logfile) {
4129            $self->block_log_output();    # avoid echoing this to the logfile
4130            $self->warning(
4131                "The logfile $log_file may contain useful information\n");
4132            $self->unblock_log_output();
4133        }
4134
4135        if ( $self->{_complaint_count} > 0 ) {
4136            $self->warning(
4137"To see $self->{_complaint_count} non-critical warnings rerun with -w\n"
4138            );
4139        }
4140
4141        if ( $self->{_saw_brace_error}
4142            && ( $rOpts->{'logfile-gap'} > 1 || !$save_logfile ) )
4143        {
4144            $self->warning("To save a full .LOG file rerun with -g\n");
4145        }
4146    }
4147    $self->ask_user_for_bug_report( $infile_syntax_ok, $formatter );
4148
4149    if ($save_logfile) {
4150        my $log_file = $self->{_log_file};
4151        my ( $fh, $filename ) = Perl::Tidy::streamhandle( $log_file, 'w' );
4152        if ($fh) {
4153            my $routput_array = $self->{_output_array};
4154            foreach ( @{$routput_array} ) { $fh->print($_) }
4155            eval { $fh->close() };
4156        }
4157    }
4158}
4159
4160#####################################################################
4161#
4162# The Perl::Tidy::DevNull class supplies a dummy print method
4163#
4164#####################################################################
4165
4166package Perl::Tidy::DevNull;
4167sub new { return bless {}, $_[0] }
4168sub print { return }
4169sub close { return }
4170
4171#####################################################################
4172#
4173# The Perl::Tidy::HtmlWriter class writes a copy of the input stream in html
4174#
4175#####################################################################
4176
4177package Perl::Tidy::HtmlWriter;
4178
4179use File::Basename;
4180
4181# class variables
4182use vars qw{
4183  %html_color
4184  %html_bold
4185  %html_italic
4186  %token_short_names
4187  %short_to_long_names
4188  $rOpts
4189  $css_filename
4190  $css_linkname
4191  $missing_html_entities
4192};
4193
4194# replace unsafe characters with HTML entity representation if HTML::Entities
4195# is available
4196{ eval "use HTML::Entities"; $missing_html_entities = $@; }
4197
4198sub new {
4199
4200    my ( $class, $input_file, $html_file, $extension, $html_toc_extension,
4201        $html_src_extension )
4202      = @_;
4203
4204    my $html_file_opened = 0;
4205    my $html_fh;
4206    ( $html_fh, my $html_filename ) =
4207      Perl::Tidy::streamhandle( $html_file, 'w' );
4208    unless ($html_fh) {
4209        warn("can't open $html_file: $!\n");
4210        return undef;
4211    }
4212    $html_file_opened = 1;
4213
4214    if ( !$input_file || $input_file eq '-' || ref($input_file) ) {
4215        $input_file = "NONAME";
4216    }
4217
4218    # write the table of contents to a string
4219    my $toc_string;
4220    my $html_toc_fh = Perl::Tidy::IOScalar->new( \$toc_string, 'w' );
4221
4222    my $html_pre_fh;
4223    my @pre_string_stack;
4224    if ( $rOpts->{'html-pre-only'} ) {
4225
4226        # pre section goes directly to the output stream
4227        $html_pre_fh = $html_fh;
4228        $html_pre_fh->print( <<"PRE_END");
4229<pre>
4230PRE_END
4231    }
4232    else {
4233
4234        # pre section go out to a temporary string
4235        my $pre_string;
4236        $html_pre_fh = Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
4237        push @pre_string_stack, \$pre_string;
4238    }
4239
4240    # pod text gets diverted if the 'pod2html' is used
4241    my $html_pod_fh;
4242    my $pod_string;
4243    if ( $rOpts->{'pod2html'} ) {
4244        if ( $rOpts->{'html-pre-only'} ) {
4245            undef $rOpts->{'pod2html'};
4246        }
4247        else {
4248            eval "use Pod::Html";
4249            if ($@) {
4250                warn
4251"unable to find Pod::Html; cannot use pod2html\n-npod disables this message\n";
4252                undef $rOpts->{'pod2html'};
4253            }
4254            else {
4255                $html_pod_fh = Perl::Tidy::IOScalar->new( \$pod_string, 'w' );
4256            }
4257        }
4258    }
4259
4260    my $toc_filename;
4261    my $src_filename;
4262    if ( $rOpts->{'frames'} ) {
4263        unless ($extension) {
4264            warn
4265"cannot use frames without a specified output extension; ignoring -frm\n";
4266            undef $rOpts->{'frames'};
4267        }
4268        else {
4269            $toc_filename = $input_file . $html_toc_extension . $extension;
4270            $src_filename = $input_file . $html_src_extension . $extension;
4271        }
4272    }
4273
4274    # ----------------------------------------------------------
4275    # Output is now directed as follows:
4276    # html_toc_fh <-- table of contents items
4277    # html_pre_fh <-- the <pre> section of formatted code, except:
4278    # html_pod_fh <-- pod goes here with the pod2html option
4279    # ----------------------------------------------------------
4280
4281    my $title = $rOpts->{'title'};
4282    unless ($title) {
4283        ( $title, my $path ) = fileparse($input_file);
4284    }
4285    my $toc_item_count = 0;
4286    my $in_toc_package = "";
4287    my $last_level     = 0;
4288    bless {
4289        _input_file        => $input_file,          # name of input file
4290        _title             => $title,               # title, unescaped
4291        _html_file         => $html_file,           # name of .html output file
4292        _toc_filename      => $toc_filename,        # for frames option
4293        _src_filename      => $src_filename,        # for frames option
4294        _html_file_opened  => $html_file_opened,    # a flag
4295        _html_fh           => $html_fh,             # the output stream
4296        _html_pre_fh       => $html_pre_fh,         # pre section goes here
4297        _rpre_string_stack => \@pre_string_stack,   # stack of pre sections
4298        _html_pod_fh       => $html_pod_fh,         # pod goes here if pod2html
4299        _rpod_string       => \$pod_string,         # string holding pod
4300        _pod_cut_count     => 0,                    # how many =cut's?
4301        _html_toc_fh       => $html_toc_fh,         # fh for table of contents
4302        _rtoc_string       => \$toc_string,         # string holding toc
4303        _rtoc_item_count   => \$toc_item_count,     # how many toc items
4304        _rin_toc_package   => \$in_toc_package,     # package name
4305        _rtoc_name_count   => {},                   # hash to track unique names
4306        _rpackage_stack    => [],                   # stack to check for package
4307                                                    # name changes
4308        _rlast_level       => \$last_level,         # brace indentation level
4309    }, $class;
4310}
4311
4312sub add_toc_item {
4313
4314    # Add an item to the html table of contents.
4315    # This is called even if no table of contents is written,
4316    # because we still want to put the anchors in the <pre> text.
4317    # We are given an anchor name and its type; types are:
4318    #      'package', 'sub', '__END__', '__DATA__', 'EOF'
4319    # There must be an 'EOF' call at the end to wrap things up.
4320    my $self = shift;
4321    my ( $name, $type ) = @_;
4322    my $html_toc_fh     = $self->{_html_toc_fh};
4323    my $html_pre_fh     = $self->{_html_pre_fh};
4324    my $rtoc_name_count = $self->{_rtoc_name_count};
4325    my $rtoc_item_count = $self->{_rtoc_item_count};
4326    my $rlast_level     = $self->{_rlast_level};
4327    my $rin_toc_package = $self->{_rin_toc_package};
4328    my $rpackage_stack  = $self->{_rpackage_stack};
4329
4330    # packages contain sublists of subs, so to avoid errors all package
4331    # items are written and finished with the following routines
4332    my $end_package_list = sub {
4333        if ($$rin_toc_package) {
4334            $html_toc_fh->print("</ul>\n</li>\n");
4335            $$rin_toc_package = "";
4336        }
4337    };
4338
4339    my $start_package_list = sub {
4340        my ( $unique_name, $package ) = @_;
4341        if ($$rin_toc_package) { $end_package_list->() }
4342        $html_toc_fh->print(<<EOM);
4343<li><a href=\"#$unique_name\">package $package</a>
4344<ul>
4345EOM
4346        $$rin_toc_package = $package;
4347    };
4348
4349    # start the table of contents on the first item
4350    unless ($$rtoc_item_count) {
4351
4352        # but just quit if we hit EOF without any other entries
4353        # in this case, there will be no toc
4354        return if ( $type eq 'EOF' );
4355        $html_toc_fh->print( <<"TOC_END");
4356<!-- BEGIN CODE INDEX --><a name="code-index"></a>
4357<ul>
4358TOC_END
4359    }
4360    $$rtoc_item_count++;
4361
4362    # make a unique anchor name for this location:
4363    #   - packages get a 'package-' prefix
4364    #   - subs use their names
4365    my $unique_name = $name;
4366    if ( $type eq 'package' ) { $unique_name = "package-$name" }
4367
4368    # append '-1', '-2', etc if necessary to make unique; this will
4369    # be unique because subs and packages cannot have a '-'
4370    if ( my $count = $rtoc_name_count->{ lc $unique_name }++ ) {
4371        $unique_name .= "-$count";
4372    }
4373
4374    #   - all names get terminal '-' if pod2html is used, to avoid
4375    #     conflicts with anchor names created by pod2html
4376    if ( $rOpts->{'pod2html'} ) { $unique_name .= '-' }
4377
4378    # start/stop lists of subs
4379    if ( $type eq 'sub' ) {
4380        my $package = $rpackage_stack->[$$rlast_level];
4381        unless ($package) { $package = 'main' }
4382
4383        # if we're already in a package/sub list, be sure its the right
4384        # package or else close it
4385        if ( $$rin_toc_package && $$rin_toc_package ne $package ) {
4386            $end_package_list->();
4387        }
4388
4389        # start a package/sub list if necessary
4390        unless ($$rin_toc_package) {
4391            $start_package_list->( $unique_name, $package );
4392        }
4393    }
4394
4395    # now write an entry in the toc for this item
4396    if ( $type eq 'package' ) {
4397        $start_package_list->( $unique_name, $name );
4398    }
4399    elsif ( $type eq 'sub' ) {
4400        $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");
4401    }
4402    else {
4403        $end_package_list->();
4404        $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");
4405    }
4406
4407    # write the anchor in the <pre> section
4408    $html_pre_fh->print("<a name=\"$unique_name\"></a>");
4409
4410    # end the table of contents, if any, on the end of file
4411    if ( $type eq 'EOF' ) {
4412        $html_toc_fh->print( <<"TOC_END");
4413</ul>
4414<!-- END CODE INDEX -->
4415TOC_END
4416    }
4417}
4418
4419BEGIN {
4420
4421    # This is the official list of tokens which may be identified by the
4422    # user.  Long names are used as getopt keys.  Short names are
4423    # convenient short abbreviations for specifying input.  Short names
4424    # somewhat resemble token type characters, but are often different
4425    # because they may only be alphanumeric, to allow command line
4426    # input.  Also, note that because of case insensitivity of html,
4427    # this table must be in a single case only (I've chosen to use all
4428    # lower case).
4429    # When adding NEW_TOKENS: update this hash table
4430    # short names => long names
4431    %short_to_long_names = (
4432        'n'  => 'numeric',
4433        'p'  => 'paren',
4434        'q'  => 'quote',
4435        's'  => 'structure',
4436        'c'  => 'comment',
4437        'v'  => 'v-string',
4438        'cm' => 'comma',
4439        'w'  => 'bareword',
4440        'co' => 'colon',
4441        'pu' => 'punctuation',
4442        'i'  => 'identifier',
4443        'j'  => 'label',
4444        'h'  => 'here-doc-target',
4445        'hh' => 'here-doc-text',
4446        'k'  => 'keyword',
4447        'sc' => 'semicolon',
4448        'm'  => 'subroutine',
4449        'pd' => 'pod-text',
4450    );
4451
4452    # Now we have to map actual token types into one of the above short
4453    # names; any token types not mapped will get 'punctuation'
4454    # properties.
4455
4456    # The values of this hash table correspond to the keys of the
4457    # previous hash table.
4458    # The keys of this hash table are token types and can be seen
4459    # by running with --dump-token-types (-dtt).
4460
4461    # When adding NEW_TOKENS: update this hash table
4462    # $type => $short_name
4463    %token_short_names = (
4464        '#'  => 'c',
4465        'n'  => 'n',
4466        'v'  => 'v',
4467        'k'  => 'k',
4468        'F'  => 'k',
4469        'Q'  => 'q',
4470        'q'  => 'q',
4471        'J'  => 'j',
4472        'j'  => 'j',
4473        'h'  => 'h',
4474        'H'  => 'hh',
4475        'w'  => 'w',
4476        ','  => 'cm',
4477        '=>' => 'cm',
4478        ';'  => 'sc',
4479        ':'  => 'co',
4480        'f'  => 'sc',
4481        '('  => 'p',
4482        ')'  => 'p',
4483        'M'  => 'm',
4484        'P'  => 'pd',
4485        'A'  => 'co',
4486    );
4487
4488    # These token types will all be called identifiers for now
4489    # FIXME: need to separate user defined modules as separate type
4490    my @identifier = qw" i t U C Y Z G :: ";
4491    @token_short_names{@identifier} = ('i') x scalar(@identifier);
4492
4493    # These token types will be called 'structure'
4494    my @structure = qw" { } ";
4495    @token_short_names{@structure} = ('s') x scalar(@structure);
4496
4497    # OLD NOTES: save for reference
4498    # Any of these could be added later if it would be useful.
4499    # For now, they will by default become punctuation
4500    #    my @list = qw" L R [ ] ";
4501    #    @token_long_names{@list} = ('non-structure') x scalar(@list);
4502    #
4503    #    my @list = qw"
4504    #      / /= * *= ** **= + += - -= % %= = ++ -- << <<= >> >>= pp p m mm
4505    #      ";
4506    #    @token_long_names{@list} = ('math') x scalar(@list);
4507    #
4508    #    my @list = qw" & &= ~ ~= ^ ^= | |= ";
4509    #    @token_long_names{@list} = ('bit') x scalar(@list);
4510    #
4511    #    my @list = qw" == != < > <= <=> ";
4512    #    @token_long_names{@list} = ('numerical-comparison') x scalar(@list);
4513    #
4514    #    my @list = qw" && || ! &&= ||= //= ";
4515    #    @token_long_names{@list} = ('logical') x scalar(@list);
4516    #
4517    #    my @list = qw" . .= =~ !~ x x= ";
4518    #    @token_long_names{@list} = ('string-operators') x scalar(@list);
4519    #
4520    #    # Incomplete..
4521    #    my @list = qw" .. -> <> ... \ ? ";
4522    #    @token_long_names{@list} = ('misc-operators') x scalar(@list);
4523
4524}
4525
4526sub make_getopt_long_names {
4527    my $class = shift;
4528    my ($rgetopt_names) = @_;
4529    while ( my ( $short_name, $name ) = each %short_to_long_names ) {
4530        push @$rgetopt_names, "html-color-$name=s";
4531        push @$rgetopt_names, "html-italic-$name!";
4532        push @$rgetopt_names, "html-bold-$name!";
4533    }
4534    push @$rgetopt_names, "html-color-background=s";
4535    push @$rgetopt_names, "html-linked-style-sheet=s";
4536    push @$rgetopt_names, "nohtml-style-sheets";
4537    push @$rgetopt_names, "html-pre-only";
4538    push @$rgetopt_names, "html-line-numbers";
4539    push @$rgetopt_names, "html-entities!";
4540    push @$rgetopt_names, "stylesheet";
4541    push @$rgetopt_names, "html-table-of-contents!";
4542    push @$rgetopt_names, "pod2html!";
4543    push @$rgetopt_names, "frames!";
4544    push @$rgetopt_names, "html-toc-extension=s";
4545    push @$rgetopt_names, "html-src-extension=s";
4546
4547    # Pod::Html parameters:
4548    push @$rgetopt_names, "backlink=s";
4549    push @$rgetopt_names, "cachedir=s";
4550    push @$rgetopt_names, "htmlroot=s";
4551    push @$rgetopt_names, "libpods=s";
4552    push @$rgetopt_names, "podpath=s";
4553    push @$rgetopt_names, "podroot=s";
4554    push @$rgetopt_names, "title=s";
4555
4556    # Pod::Html parameters with leading 'pod' which will be removed
4557    # before the call to Pod::Html
4558    push @$rgetopt_names, "podquiet!";
4559    push @$rgetopt_names, "podverbose!";
4560    push @$rgetopt_names, "podrecurse!";
4561    push @$rgetopt_names, "podflush";
4562    push @$rgetopt_names, "podheader!";
4563    push @$rgetopt_names, "podindex!";
4564}
4565
4566sub make_abbreviated_names {
4567
4568    # We're appending things like this to the expansion list:
4569    #      'hcc'    => [qw(html-color-comment)],
4570    #      'hck'    => [qw(html-color-keyword)],
4571    #  etc
4572    my $class = shift;
4573    my ($rexpansion) = @_;
4574
4575    # abbreviations for color/bold/italic properties
4576    while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
4577        ${$rexpansion}{"hc$short_name"}  = ["html-color-$long_name"];
4578        ${$rexpansion}{"hb$short_name"}  = ["html-bold-$long_name"];
4579        ${$rexpansion}{"hi$short_name"}  = ["html-italic-$long_name"];
4580        ${$rexpansion}{"nhb$short_name"} = ["nohtml-bold-$long_name"];
4581        ${$rexpansion}{"nhi$short_name"} = ["nohtml-italic-$long_name"];
4582    }
4583
4584    # abbreviations for all other html options
4585    ${$rexpansion}{"hcbg"}  = ["html-color-background"];
4586    ${$rexpansion}{"pre"}   = ["html-pre-only"];
4587    ${$rexpansion}{"toc"}   = ["html-table-of-contents"];
4588    ${$rexpansion}{"ntoc"}  = ["nohtml-table-of-contents"];
4589    ${$rexpansion}{"nnn"}   = ["html-line-numbers"];
4590    ${$rexpansion}{"hent"}  = ["html-entities"];
4591    ${$rexpansion}{"nhent"} = ["nohtml-entities"];
4592    ${$rexpansion}{"css"}   = ["html-linked-style-sheet"];
4593    ${$rexpansion}{"nss"}   = ["nohtml-style-sheets"];
4594    ${$rexpansion}{"ss"}    = ["stylesheet"];
4595    ${$rexpansion}{"pod"}   = ["pod2html"];
4596    ${$rexpansion}{"npod"}  = ["nopod2html"];
4597    ${$rexpansion}{"frm"}   = ["frames"];
4598    ${$rexpansion}{"nfrm"}  = ["noframes"];
4599    ${$rexpansion}{"text"}  = ["html-toc-extension"];
4600    ${$rexpansion}{"sext"}  = ["html-src-extension"];
4601}
4602
4603sub check_options {
4604
4605    # This will be called once after options have been parsed
4606    my $class = shift;
4607    $rOpts = shift;
4608
4609    # X11 color names for default settings that seemed to look ok
4610    # (these color names are only used for programming clarity; the hex
4611    # numbers are actually written)
4612    use constant ForestGreen   => "#228B22";
4613    use constant SaddleBrown   => "#8B4513";
4614    use constant magenta4      => "#8B008B";
4615    use constant IndianRed3    => "#CD5555";
4616    use constant DeepSkyBlue4  => "#00688B";
4617    use constant MediumOrchid3 => "#B452CD";
4618    use constant black         => "#000000";
4619    use constant white         => "#FFFFFF";
4620    use constant red           => "#FF0000";
4621
4622    # set default color, bold, italic properties
4623    # anything not listed here will be given the default (punctuation) color --
4624    # these types currently not listed and get default: ws pu s sc cm co p
4625    # When adding NEW_TOKENS: add an entry here if you don't want defaults
4626
4627    # set_default_properties( $short_name, default_color, bold?, italic? );
4628    set_default_properties( 'c',  ForestGreen,   0, 0 );
4629    set_default_properties( 'pd', ForestGreen,   0, 1 );
4630    set_default_properties( 'k',  magenta4,      1, 0 );    # was SaddleBrown
4631    set_default_properties( 'q',  IndianRed3,    0, 0 );
4632    set_default_properties( 'hh', IndianRed3,    0, 1 );
4633    set_default_properties( 'h',  IndianRed3,    1, 0 );
4634    set_default_properties( 'i',  DeepSkyBlue4,  0, 0 );
4635    set_default_properties( 'w',  black,         0, 0 );
4636    set_default_properties( 'n',  MediumOrchid3, 0, 0 );
4637    set_default_properties( 'v',  MediumOrchid3, 0, 0 );
4638    set_default_properties( 'j',  IndianRed3,    1, 0 );
4639    set_default_properties( 'm',  red,           1, 0 );
4640
4641    set_default_color( 'html-color-background',  white );
4642    set_default_color( 'html-color-punctuation', black );
4643
4644    # setup property lookup tables for tokens based on their short names
4645    # every token type has a short name, and will use these tables
4646    # to do the html markup
4647    while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
4648        $html_color{$short_name}  = $rOpts->{"html-color-$long_name"};
4649        $html_bold{$short_name}   = $rOpts->{"html-bold-$long_name"};
4650        $html_italic{$short_name} = $rOpts->{"html-italic-$long_name"};
4651    }
4652
4653    # write style sheet to STDOUT and die if requested
4654    if ( defined( $rOpts->{'stylesheet'} ) ) {
4655        write_style_sheet_file('-');
4656        exit 1;
4657    }
4658
4659    # make sure user gives a file name after -css
4660    if ( defined( $rOpts->{'html-linked-style-sheet'} ) ) {
4661        $css_linkname = $rOpts->{'html-linked-style-sheet'};
4662        if ( $css_linkname =~ /^-/ ) {
4663            die "You must specify a valid filename after -css\n";
4664        }
4665    }
4666
4667    # check for conflict
4668    if ( $css_linkname && $rOpts->{'nohtml-style-sheets'} ) {
4669        $rOpts->{'nohtml-style-sheets'} = 0;
4670        warning("You can't specify both -css and -nss; -nss ignored\n");
4671    }
4672
4673    # write a style sheet file if necessary
4674    if ($css_linkname) {
4675
4676        # if the selected filename exists, don't write, because user may
4677        # have done some work by hand to create it; use backup name instead
4678        # Also, this will avoid a potential disaster in which the user
4679        # forgets to specify the style sheet, like this:
4680        #    perltidy -html -css myfile1.pl myfile2.pl
4681        # This would cause myfile1.pl to parsed as the style sheet by GetOpts
4682        my $css_filename = $css_linkname;
4683        unless ( -e $css_filename ) {
4684            write_style_sheet_file($css_filename);
4685        }
4686    }
4687    $missing_html_entities = 1 unless $rOpts->{'html-entities'};
4688}
4689
4690sub write_style_sheet_file {
4691
4692    my $css_filename = shift;
4693    my $fh;
4694    unless ( $fh = IO::File->new("> $css_filename") ) {
4695        die "can't open $css_filename: $!\n";
4696    }
4697    write_style_sheet_data($fh);
4698    eval { $fh->close };
4699}
4700
4701sub write_style_sheet_data {
4702
4703    # write the style sheet data to an open file handle
4704    my $fh = shift;
4705
4706    my $bg_color   = $rOpts->{'html-color-background'};
4707    my $text_color = $rOpts->{'html-color-punctuation'};
4708
4709    # pre-bgcolor is new, and may not be defined
4710    my $pre_bg_color = $rOpts->{'html-pre-color-background'};
4711    $pre_bg_color = $bg_color unless $pre_bg_color;
4712
4713    $fh->print(<<"EOM");
4714/* default style sheet generated by perltidy */
4715body {background: $bg_color; color: $text_color}
4716pre { color: $text_color;
4717      background: $pre_bg_color;
4718      font-family: courier;
4719    }
4720
4721EOM
4722
4723    foreach my $short_name ( sort keys %short_to_long_names ) {
4724        my $long_name = $short_to_long_names{$short_name};
4725
4726        my $abbrev = '.' . $short_name;
4727        if ( length($short_name) == 1 ) { $abbrev .= ' ' }    # for alignment
4728        my $color = $html_color{$short_name};
4729        if ( !defined($color) ) { $color = $text_color }
4730        $fh->print("$abbrev \{ color: $color;");
4731
4732        if ( $html_bold{$short_name} ) {
4733            $fh->print(" font-weight:bold;");
4734        }
4735
4736        if ( $html_italic{$short_name} ) {
4737            $fh->print(" font-style:italic;");
4738        }
4739        $fh->print("} /* $long_name */\n");
4740    }
4741}
4742
4743sub set_default_color {
4744
4745    # make sure that options hash $rOpts->{$key} contains a valid color
4746    my ( $key, $color ) = @_;
4747    if ( $rOpts->{$key} ) { $color = $rOpts->{$key} }
4748    $rOpts->{$key} = check_RGB($color);
4749}
4750
4751sub check_RGB {
4752
4753    # if color is a 6 digit hex RGB value, prepend a #, otherwise
4754    # assume that it is a valid ascii color name
4755    my ($color) = @_;
4756    if ( $color =~ /^[0-9a-fA-F]{6,6}$/ ) { $color = "#$color" }
4757    return $color;
4758}
4759
4760sub set_default_properties {
4761    my ( $short_name, $color, $bold, $italic ) = @_;
4762
4763    set_default_color( "html-color-$short_to_long_names{$short_name}", $color );
4764    my $key;
4765    $key = "html-bold-$short_to_long_names{$short_name}";
4766    $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $bold;
4767    $key = "html-italic-$short_to_long_names{$short_name}";
4768    $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $italic;
4769}
4770
4771sub pod_to_html {
4772
4773    # Use Pod::Html to process the pod and make the page
4774    # then merge the perltidy code sections into it.
4775    # return 1 if success, 0 otherwise
4776    my $self = shift;
4777    my ( $pod_string, $css_string, $toc_string, $rpre_string_stack ) = @_;
4778    my $input_file   = $self->{_input_file};
4779    my $title        = $self->{_title};
4780    my $success_flag = 0;
4781
4782    # don't try to use pod2html if no pod
4783    unless ($pod_string) {
4784        return $success_flag;
4785    }
4786
4787    # Pod::Html requires a real temporary filename
4788    # If we are making a frame, we have a name available
4789    # Otherwise, we have to fine one
4790    my $tmpfile;
4791    if ( $rOpts->{'frames'} ) {
4792        $tmpfile = $self->{_toc_filename};
4793    }
4794    else {
4795        $tmpfile = Perl::Tidy::make_temporary_filename();
4796    }
4797    my $fh_tmp = IO::File->new( $tmpfile, 'w' );
4798    unless ($fh_tmp) {
4799        warn "unable to open temporary file $tmpfile; cannot use pod2html\n";
4800        return $success_flag;
4801    }
4802
4803    #------------------------------------------------------------------
4804    # Warning: a temporary file is open; we have to clean up if
4805    # things go bad.  From here on all returns should be by going to
4806    # RETURN so that the temporary file gets unlinked.
4807    #------------------------------------------------------------------
4808
4809    # write the pod text to the temporary file
4810    $fh_tmp->print($pod_string);
4811    $fh_tmp->close();
4812
4813    # Hand off the pod to pod2html.
4814    # Note that we can use the same temporary filename for input and output
4815    # because of the way pod2html works.
4816    {
4817
4818        my @args;
4819        push @args, "--infile=$tmpfile", "--outfile=$tmpfile", "--title=$title";
4820        my $kw;
4821
4822        # Flags with string args:
4823        # "backlink=s", "cachedir=s", "htmlroot=s", "libpods=s",
4824        # "podpath=s", "podroot=s"
4825        # Note: -css=s is handled by perltidy itself
4826        foreach $kw (qw(backlink cachedir htmlroot libpods podpath podroot)) {
4827            if ( $rOpts->{$kw} ) { push @args, "--$kw=$rOpts->{$kw}" }
4828        }
4829
4830        # Toggle switches; these have extra leading 'pod'
4831        # "header!", "index!", "recurse!", "quiet!", "verbose!"
4832        foreach $kw (qw(podheader podindex podrecurse podquiet podverbose)) {
4833            my $kwd = $kw;    # allows us to strip 'pod'
4834            if ( $rOpts->{$kw} ) { $kwd =~ s/^pod//; push @args, "--$kwd" }
4835            elsif ( defined( $rOpts->{$kw} ) ) {
4836                $kwd =~ s/^pod//;
4837                push @args, "--no$kwd";
4838            }
4839        }
4840
4841        # "flush",
4842        $kw = 'podflush';
4843        if ( $rOpts->{$kw} ) { $kw =~ s/^pod//; push @args, "--$kw" }
4844
4845        # Must clean up if pod2html dies (it can);
4846        # Be careful not to overwrite callers __DIE__ routine
4847        local $SIG{__DIE__} = sub {
4848            print $_[0];
4849            unlink $tmpfile if -e $tmpfile;
4850            exit 1;
4851        };
4852
4853        pod2html(@args);
4854    }
4855    $fh_tmp = IO::File->new( $tmpfile, 'r' );
4856    unless ($fh_tmp) {
4857
4858        # this error shouldn't happen ... we just used this filename
4859        warn "unable to open temporary file $tmpfile; cannot use pod2html\n";
4860        goto RETURN;
4861    }
4862
4863    my $html_fh = $self->{_html_fh};
4864    my @toc;
4865    my $in_toc;
4866    my $no_print;
4867
4868    # This routine will write the html selectively and store the toc
4869    my $html_print = sub {
4870        foreach (@_) {
4871            $html_fh->print($_) unless ($no_print);
4872            if ($in_toc) { push @toc, $_ }
4873        }
4874    };
4875
4876    # loop over lines of html output from pod2html and merge in
4877    # the necessary perltidy html sections
4878    my ( $saw_body, $saw_index, $saw_body_end );
4879    while ( my $line = $fh_tmp->getline() ) {
4880
4881        if ( $line =~ /^\s*<html>\s*$/i ) {
4882            my $date = localtime;
4883            $html_print->("<!-- Generated by perltidy on $date -->\n");
4884            $html_print->($line);
4885        }
4886
4887        # Copy the perltidy css, if any, after <body> tag
4888        elsif ( $line =~ /^\s*<body.*>\s*$/i ) {
4889            $saw_body = 1;
4890            $html_print->($css_string) if $css_string;
4891            $html_print->($line);
4892
4893            # add a top anchor and heading
4894            $html_print->("<a name=\"-top-\"></a>\n");
4895            $title = escape_html($title);
4896            $html_print->("<h1>$title</h1>\n");
4897        }
4898        elsif ( $line =~ /^\s*<!-- INDEX BEGIN -->\s*$/i ) {
4899            $in_toc = 1;
4900
4901            # when frames are used, an extra table of contents in the
4902            # contents panel is confusing, so don't print it
4903            $no_print = $rOpts->{'frames'}
4904              || !$rOpts->{'html-table-of-contents'};
4905            $html_print->("<h2>Doc Index:</h2>\n") if $rOpts->{'frames'};
4906            $html_print->($line);
4907        }
4908
4909        # Copy the perltidy toc, if any, after the Pod::Html toc
4910        elsif ( $line =~ /^\s*<!-- INDEX END -->\s*$/i ) {
4911            $saw_index = 1;
4912            $html_print->($line);
4913            if ($toc_string) {
4914                $html_print->("<hr />\n") if $rOpts->{'frames'};
4915                $html_print->("<h2>Code Index:</h2>\n");
4916                my @toc = map { $_ .= "\n" } split /\n/, $toc_string;
4917                $html_print->(@toc);
4918            }
4919            $in_toc   = 0;
4920            $no_print = 0;
4921        }
4922
4923        # Copy one perltidy section after each marker
4924        elsif ( $line =~ /^(.*)<!-- pERLTIDY sECTION -->(.*)$/ ) {
4925            $line = $2;
4926            $html_print->($1) if $1;
4927
4928            # Intermingle code and pod sections if we saw multiple =cut's.
4929            if ( $self->{_pod_cut_count} > 1 ) {
4930                my $rpre_string = shift(@$rpre_string_stack);
4931                if ($$rpre_string) {
4932                    $html_print->('<pre>');
4933                    $html_print->($$rpre_string);
4934                    $html_print->('</pre>');
4935                }
4936                else {
4937
4938                    # shouldn't happen: we stored a string before writing
4939                    # each marker.
4940                    warn
4941"Problem merging html stream with pod2html; order may be wrong\n";
4942                }
4943                $html_print->($line);
4944            }
4945
4946            # If didn't see multiple =cut lines, we'll put the pod out first
4947            # and then the code, because it's less confusing.
4948            else {
4949
4950                # since we are not intermixing code and pod, we don't need
4951                # or want any <hr> lines which separated pod and code
4952                $html_print->($line) unless ( $line =~ /^\s*<hr>\s*$/i );
4953            }
4954        }
4955
4956        # Copy any remaining code section before the </body> tag
4957        elsif ( $line =~ /^\s*<\/body>\s*$/i ) {
4958            $saw_body_end = 1;
4959            if (@$rpre_string_stack) {
4960                unless ( $self->{_pod_cut_count} > 1 ) {
4961                    $html_print->('<hr />');
4962                }
4963                while ( my $rpre_string = shift(@$rpre_string_stack) ) {
4964                    $html_print->('<pre>');
4965                    $html_print->($$rpre_string);
4966                    $html_print->('</pre>');
4967                }
4968            }
4969            $html_print->($line);
4970        }
4971        else {
4972            $html_print->($line);
4973        }
4974    }
4975
4976    $success_flag = 1;
4977    unless ($saw_body) {
4978        warn "Did not see <body> in pod2html output\n";
4979        $success_flag = 0;
4980    }
4981    unless ($saw_body_end) {
4982        warn "Did not see </body> in pod2html output\n";
4983        $success_flag = 0;
4984    }
4985    unless ($saw_index) {
4986        warn "Did not find INDEX END in pod2html output\n";
4987        $success_flag = 0;
4988    }
4989
4990  RETURN:
4991    eval { $html_fh->close() };
4992
4993    # note that we have to unlink tmpfile before making frames
4994    # because the tmpfile may be one of the names used for frames
4995    unlink $tmpfile if -e $tmpfile;
4996    if ( $success_flag && $rOpts->{'frames'} ) {
4997        $self->make_frame( \@toc );
4998    }
4999    return $success_flag;
5000}
5001
5002sub make_frame {
5003
5004    # Make a frame with table of contents in the left panel
5005    # and the text in the right panel.
5006    # On entry:
5007    #  $html_filename contains the no-frames html output
5008    #  $rtoc is a reference to an array with the table of contents
5009    my $self          = shift;
5010    my ($rtoc)        = @_;
5011    my $input_file    = $self->{_input_file};
5012    my $html_filename = $self->{_html_file};
5013    my $toc_filename  = $self->{_toc_filename};
5014    my $src_filename  = $self->{_src_filename};
5015    my $title         = $self->{_title};
5016    $title = escape_html($title);
5017
5018    # FUTURE input parameter:
5019    my $top_basename = "";
5020
5021    # We need to produce 3 html files:
5022    # 1. - the table of contents
5023    # 2. - the contents (source code) itself
5024    # 3. - the frame which contains them
5025
5026    # get basenames for relative links
5027    my ( $toc_basename, $toc_path ) = fileparse($toc_filename);
5028    my ( $src_basename, $src_path ) = fileparse($src_filename);
5029
5030    # 1. Make the table of contents panel, with appropriate changes
5031    # to the anchor names
5032    my $src_frame_name = 'SRC';
5033    my $first_anchor =
5034      write_toc_html( $title, $toc_filename, $src_basename, $rtoc,
5035        $src_frame_name );
5036
5037    # 2. The current .html filename is renamed to be the contents panel
5038    rename( $html_filename, $src_filename )
5039      or die "Cannot rename $html_filename to $src_filename:$!\n";
5040
5041    # 3. Then use the original html filename for the frame
5042    write_frame_html(
5043        $title,        $html_filename, $top_basename,
5044        $toc_basename, $src_basename,  $src_frame_name
5045    );
5046}
5047
5048sub write_toc_html {
5049
5050    # write a separate html table of contents file for frames
5051    my ( $title, $toc_filename, $src_basename, $rtoc, $src_frame_name ) = @_;
5052    my $fh = IO::File->new( $toc_filename, 'w' )
5053      or die "Cannot open $toc_filename:$!\n";
5054    $fh->print(<<EOM);
5055<html>
5056<head>
5057<title>$title</title>
5058</head>
5059<body>
5060<h1><a href=\"$src_basename#-top-" target="$src_frame_name">$title</a></h1>
5061EOM
5062
5063    my $first_anchor =
5064      change_anchor_names( $rtoc, $src_basename, "$src_frame_name" );
5065    $fh->print( join "", @$rtoc );
5066
5067    $fh->print(<<EOM);
5068</body>
5069</html>
5070EOM
5071
5072}
5073
5074sub write_frame_html {
5075
5076    # write an html file to be the table of contents frame
5077    my (
5078        $title,        $frame_filename, $top_basename,
5079        $toc_basename, $src_basename,   $src_frame_name
5080    ) = @_;
5081
5082    my $fh = IO::File->new( $frame_filename, 'w' )
5083      or die "Cannot open $toc_basename:$!\n";
5084
5085    $fh->print(<<EOM);
5086<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
5087    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
5088<?xml version="1.0" encoding="iso-8859-1" ?>
5089<html xmlns="http://www.w3.org/1999/xhtml">
5090<head>
5091<title>$title</title>
5092</head>
5093EOM
5094
5095    # two left panels, one right, if master index file
5096    if ($top_basename) {
5097        $fh->print(<<EOM);
5098<frameset cols="20%,80%">
5099<frameset rows="30%,70%">
5100<frame src = "$top_basename" />
5101<frame src = "$toc_basename" />
5102</frameset>
5103EOM
5104    }
5105
5106    # one left panels, one right, if no master index file
5107    else {
5108        $fh->print(<<EOM);
5109<frameset cols="20%,*">
5110<frame src = "$toc_basename" />
5111EOM
5112    }
5113    $fh->print(<<EOM);
5114<frame src = "$src_basename" name = "$src_frame_name" />
5115<noframes>
5116<body>
5117<p>If you see this message, you are using a non-frame-capable web client.</p>
5118<p>This document contains:</p>
5119<ul>
5120<li><a href="$toc_basename">A table of contents</a></li>
5121<li><a href="$src_basename">The source code</a></li>
5122</ul>
5123</body>
5124</noframes>
5125</frameset>
5126</html>
5127EOM
5128}
5129
5130sub change_anchor_names {
5131
5132    # add a filename and target to anchors
5133    # also return the first anchor
5134    my ( $rlines, $filename, $target ) = @_;
5135    my $first_anchor;
5136    foreach my $line (@$rlines) {
5137
5138        #  We're looking for lines like this:
5139        #  <LI><A HREF="#synopsis">SYNOPSIS</A></LI>
5140        #  ----  -       --------  -----------------
5141        #  $1              $4            $5
5142        if ( $line =~ /^(.*)<a(.*)href\s*=\s*"([^#]*)#([^"]+)"[^>]*>(.*)$/i ) {
5143            my $pre  = $1;
5144            my $name = $4;
5145            my $post = $5;
5146            my $href = "$filename#$name";
5147            $line = "$pre<a href=\"$href\" target=\"$target\">$post\n";
5148            unless ($first_anchor) { $first_anchor = $href }
5149        }
5150    }
5151    return $first_anchor;
5152}
5153
5154sub close_html_file {
5155    my $self = shift;
5156    return unless $self->{_html_file_opened};
5157
5158    my $html_fh     = $self->{_html_fh};
5159    my $rtoc_string = $self->{_rtoc_string};
5160
5161    # There are 3 basic paths to html output...
5162
5163    # ---------------------------------
5164    # Path 1: finish up if in -pre mode
5165    # ---------------------------------
5166    if ( $rOpts->{'html-pre-only'} ) {
5167        $html_fh->print( <<"PRE_END");
5168</pre>
5169PRE_END
5170        eval { $html_fh->close() };
5171        return;
5172    }
5173
5174    # Finish the index
5175    $self->add_toc_item( 'EOF', 'EOF' );
5176
5177    my $rpre_string_stack = $self->{_rpre_string_stack};
5178
5179    # Patch to darken the <pre> background color in case of pod2html and
5180    # interleaved code/documentation.  Otherwise, the distinction
5181    # between code and documentation is blurred.
5182    if (   $rOpts->{pod2html}
5183        && $self->{_pod_cut_count} >= 1
5184        && $rOpts->{'html-color-background'} eq '#FFFFFF' )
5185    {
5186        $rOpts->{'html-pre-color-background'} = '#F0F0F0';
5187    }
5188
5189    # put the css or its link into a string, if used
5190    my $css_string;
5191    my $fh_css = Perl::Tidy::IOScalar->new( \$css_string, 'w' );
5192
5193    # use css linked to another file
5194    if ( $rOpts->{'html-linked-style-sheet'} ) {
5195        $fh_css->print(
5196            qq(<link rel="stylesheet" href="$css_linkname" type="text/css" />)
5197        );
5198    }
5199
5200    # use css embedded in this file
5201    elsif ( !$rOpts->{'nohtml-style-sheets'} ) {
5202        $fh_css->print( <<'ENDCSS');
5203<style type="text/css">
5204<!--
5205ENDCSS
5206        write_style_sheet_data($fh_css);
5207        $fh_css->print( <<"ENDCSS");
5208-->
5209</style>
5210ENDCSS
5211    }
5212
5213    # -----------------------------------------------------------
5214    # path 2: use pod2html if requested
5215    #         If we fail for some reason, continue on to path 3
5216    # -----------------------------------------------------------
5217    if ( $rOpts->{'pod2html'} ) {
5218        my $rpod_string = $self->{_rpod_string};
5219        $self->pod_to_html( $$rpod_string, $css_string, $$rtoc_string,
5220            $rpre_string_stack )
5221          && return;
5222    }
5223
5224    # --------------------------------------------------
5225    # path 3: write code in html, with pod only in italics
5226    # --------------------------------------------------
5227    my $input_file = $self->{_input_file};
5228    my $title      = escape_html($input_file);
5229    my $date       = localtime;
5230    $html_fh->print( <<"HTML_START");
5231<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
5232   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5233<!-- Generated by perltidy on $date -->
5234<html xmlns="http://www.w3.org/1999/xhtml">
5235<head>
5236<title>$title</title>
5237HTML_START
5238
5239    # output the css, if used
5240    if ($css_string) {
5241        $html_fh->print($css_string);
5242        $html_fh->print( <<"ENDCSS");
5243</head>
5244<body>
5245ENDCSS
5246    }
5247    else {
5248
5249        $html_fh->print( <<"HTML_START");
5250</head>
5251<body bgcolor=\"$rOpts->{'html-color-background'}\" text=\"$rOpts->{'html-color-punctuation'}\">
5252HTML_START
5253    }
5254
5255    $html_fh->print("<a name=\"-top-\"></a>\n");
5256    $html_fh->print( <<"EOM");
5257<h1>$title</h1>
5258EOM
5259
5260    # copy the table of contents
5261    if (   $$rtoc_string
5262        && !$rOpts->{'frames'}
5263        && $rOpts->{'html-table-of-contents'} )
5264    {
5265        $html_fh->print($$rtoc_string);
5266    }
5267
5268    # copy the pre section(s)
5269    my $fname_comment = $input_file;
5270    $fname_comment =~ s/--+/-/g;    # protect HTML comment tags
5271    $html_fh->print( <<"END_PRE");
5272<hr />
5273<!-- contents of filename: $fname_comment -->
5274<pre>
5275END_PRE
5276
5277    foreach my $rpre_string (@$rpre_string_stack) {
5278        $html_fh->print($$rpre_string);
5279    }
5280
5281    # and finish the html page
5282    $html_fh->print( <<"HTML_END");
5283</pre>
5284</body>
5285</html>
5286HTML_END
5287    eval { $html_fh->close() };    # could be object without close method
5288
5289    if ( $rOpts->{'frames'} ) {
5290        my @toc = map { $_ .= "\n" } split /\n/, $$rtoc_string;
5291        $self->make_frame( \@toc );
5292    }
5293}
5294
5295sub markup_tokens {
5296    my $self = shift;
5297    my ( $rtokens, $rtoken_type, $rlevels ) = @_;
5298    my ( @colored_tokens, $j, $string, $type, $token, $level );
5299    my $rlast_level    = $self->{_rlast_level};
5300    my $rpackage_stack = $self->{_rpackage_stack};
5301
5302    for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {
5303        $type  = $$rtoken_type[$j];
5304        $token = $$rtokens[$j];
5305        $level = $$rlevels[$j];
5306        $level = 0 if ( $level < 0 );
5307
5308        #-------------------------------------------------------
5309        # Update the package stack.  The package stack is needed to keep
5310        # the toc correct because some packages may be declared within
5311        # blocks and go out of scope when we leave the block.
5312        #-------------------------------------------------------
5313        if ( $level > $$rlast_level ) {
5314            unless ( $rpackage_stack->[ $level - 1 ] ) {
5315                $rpackage_stack->[ $level - 1 ] = 'main';
5316            }
5317            $rpackage_stack->[$level] = $rpackage_stack->[ $level - 1 ];
5318        }
5319        elsif ( $level < $$rlast_level ) {
5320            my $package = $rpackage_stack->[$level];
5321            unless ($package) { $package = 'main' }
5322
5323            # if we change packages due to a nesting change, we
5324            # have to make an entry in the toc
5325            if ( $package ne $rpackage_stack->[ $level + 1 ] ) {
5326                $self->add_toc_item( $package, 'package' );
5327            }
5328        }
5329        $$rlast_level = $level;
5330
5331        #-------------------------------------------------------
5332        # Intercept a sub name here; split it
5333        # into keyword 'sub' and sub name; and add an
5334        # entry in the toc
5335        #-------------------------------------------------------
5336        if ( $type eq 'i' && $token =~ /^(sub\s+)(\w.*)$/ ) {
5337            $token = $self->markup_html_element( $1, 'k' );
5338            push @colored_tokens, $token;
5339            $token = $2;
5340            $type  = 'M';
5341
5342            # but don't include sub declarations in the toc;
5343            # these wlll have leading token types 'i;'
5344            my $signature = join "", @$rtoken_type;
5345            unless ( $signature =~ /^i;/ ) {
5346                my $subname = $token;
5347                $subname =~ s/[\s\(].*$//; # remove any attributes and prototype
5348                $self->add_toc_item( $subname, 'sub' );
5349            }
5350        }
5351
5352        #-------------------------------------------------------
5353        # Intercept a package name here; split it
5354        # into keyword 'package' and name; add to the toc,
5355        # and update the package stack
5356        #-------------------------------------------------------
5357        if ( $type eq 'i' && $token =~ /^(package\s+)(\w.*)$/ ) {
5358            $token = $self->markup_html_element( $1, 'k' );
5359            push @colored_tokens, $token;
5360            $token = $2;
5361            $type  = 'i';
5362            $self->add_toc_item( "$token", 'package' );
5363            $rpackage_stack->[$level] = $token;
5364        }
5365
5366        $token = $self->markup_html_element( $token, $type );
5367        push @colored_tokens, $token;
5368    }
5369    return ( \@colored_tokens );
5370}
5371
5372sub markup_html_element {
5373    my $self = shift;
5374    my ( $token, $type ) = @_;
5375
5376    return $token if ( $type eq 'b' );    # skip a blank token
5377    return $token if ( $token =~ /^\s*$/ );    # skip a blank line
5378    $token = escape_html($token);
5379
5380    # get the short abbreviation for this token type
5381    my $short_name = $token_short_names{$type};
5382    if ( !defined($short_name) ) {
5383        $short_name = "pu";                    # punctuation is default
5384    }
5385
5386    # handle style sheets..
5387    if ( !$rOpts->{'nohtml-style-sheets'} ) {
5388        if ( $short_name ne 'pu' ) {
5389            $token = qq(<span class="$short_name">) . $token . "</span>";
5390        }
5391    }
5392
5393    # handle no style sheets..
5394    else {
5395        my $color = $html_color{$short_name};
5396
5397        if ( $color && ( $color ne $rOpts->{'html-color-punctuation'} ) ) {
5398            $token = qq(<font color="$color">) . $token . "</font>";
5399        }
5400        if ( $html_italic{$short_name} ) { $token = "<i>$token</i>" }
5401        if ( $html_bold{$short_name} )   { $token = "<b>$token</b>" }
5402    }
5403    return $token;
5404}
5405
5406sub escape_html {
5407
5408    my $token = shift;
5409    if ($missing_html_entities) {
5410        $token =~ s/\&/&amp;/g;
5411        $token =~ s/\</&lt;/g;
5412        $token =~ s/\>/&gt;/g;
5413        $token =~ s/\"/&quot;/g;
5414    }
5415    else {
5416        HTML::Entities::encode_entities($token);
5417    }
5418    return $token;
5419}
5420
5421sub finish_formatting {
5422
5423    # called after last line
5424    my $self = shift;
5425    $self->close_html_file();
5426    return;
5427}
5428
5429sub write_line {
5430
5431    my $self = shift;
5432    return unless $self->{_html_file_opened};
5433    my $html_pre_fh      = $self->{_html_pre_fh};
5434    my ($line_of_tokens) = @_;
5435    my $line_type        = $line_of_tokens->{_line_type};
5436    my $input_line       = $line_of_tokens->{_line_text};
5437    my $line_number      = $line_of_tokens->{_line_number};
5438    chomp $input_line;
5439
5440    # markup line of code..
5441    my $html_line;
5442    if ( $line_type eq 'CODE' ) {
5443        my $rtoken_type = $line_of_tokens->{_rtoken_type};
5444        my $rtokens     = $line_of_tokens->{_rtokens};
5445        my $rlevels     = $line_of_tokens->{_rlevels};
5446
5447        if ( $input_line =~ /(^\s*)/ ) {
5448            $html_line = $1;
5449        }
5450        else {
5451            $html_line = "";
5452        }
5453        my ($rcolored_tokens) =
5454          $self->markup_tokens( $rtokens, $rtoken_type, $rlevels );
5455        $html_line .= join '', @$rcolored_tokens;
5456    }
5457
5458    # markup line of non-code..
5459    else {
5460        my $line_character;
5461        if    ( $line_type eq 'HERE' )       { $line_character = 'H' }
5462        elsif ( $line_type eq 'HERE_END' )   { $line_character = 'h' }
5463        elsif ( $line_type eq 'FORMAT' )     { $line_character = 'H' }
5464        elsif ( $line_type eq 'FORMAT_END' ) { $line_character = 'h' }
5465        elsif ( $line_type eq 'SYSTEM' )     { $line_character = 'c' }
5466        elsif ( $line_type eq 'END_START' ) {
5467            $line_character = 'k';
5468            $self->add_toc_item( '__END__', '__END__' );
5469        }
5470        elsif ( $line_type eq 'DATA_START' ) {
5471            $line_character = 'k';
5472            $self->add_toc_item( '__DATA__', '__DATA__' );
5473        }
5474        elsif ( $line_type =~ /^POD/ ) {
5475            $line_character = 'P';
5476            if ( $rOpts->{'pod2html'} ) {
5477                my $html_pod_fh = $self->{_html_pod_fh};
5478                if ( $line_type eq 'POD_START' ) {
5479
5480                    my $rpre_string_stack = $self->{_rpre_string_stack};
5481                    my $rpre_string       = $rpre_string_stack->[-1];
5482
5483                    # if we have written any non-blank lines to the
5484                    # current pre section, start writing to a new output
5485                    # string
5486                    if ( $$rpre_string =~ /\S/ ) {
5487                        my $pre_string;
5488                        $html_pre_fh =
5489                          Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
5490                        $self->{_html_pre_fh} = $html_pre_fh;
5491                        push @$rpre_string_stack, \$pre_string;
5492
5493                        # leave a marker in the pod stream so we know
5494                        # where to put the pre section we just
5495                        # finished.
5496                        my $for_html = '=for html';    # don't confuse pod utils
5497                        $html_pod_fh->print(<<EOM);
5498
5499$for_html
5500<!-- pERLTIDY sECTION -->
5501
5502EOM
5503                    }
5504
5505                    # otherwise, just clear the current string and start
5506                    # over
5507                    else {
5508                        $$rpre_string = "";
5509                        $html_pod_fh->print("\n");
5510                    }
5511                }
5512                $html_pod_fh->print( $input_line . "\n" );
5513                if ( $line_type eq 'POD_END' ) {
5514                    $self->{_pod_cut_count}++;
5515                    $html_pod_fh->print("\n");
5516                }
5517                return;
5518            }
5519        }
5520        else { $line_character = 'Q' }
5521        $html_line = $self->markup_html_element( $input_line, $line_character );
5522    }
5523
5524    # add the line number if requested
5525    if ( $rOpts->{'html-line-numbers'} ) {
5526        my $extra_space .=
5527            ( $line_number < 10 )   ? "   "
5528          : ( $line_number < 100 )  ? "  "
5529          : ( $line_number < 1000 ) ? " "
5530          :                           "";
5531        $html_line = $extra_space . $line_number . " " . $html_line;
5532    }
5533
5534    # write the line
5535    $html_pre_fh->print("$html_line\n");
5536}
5537
5538#####################################################################
5539#
5540# The Perl::Tidy::Formatter package adds indentation, whitespace, and
5541# line breaks to the token stream
5542#
5543# WARNING: This is not a real class for speed reasons.  Only one
5544# Formatter may be used.
5545#
5546#####################################################################
5547
5548package Perl::Tidy::Formatter;
5549
5550BEGIN {
5551
5552    # Caution: these debug flags produce a lot of output
5553    # They should all be 0 except when debugging small scripts
5554    use constant FORMATTER_DEBUG_FLAG_BOND    => 0;
5555    use constant FORMATTER_DEBUG_FLAG_BREAK   => 0;
5556    use constant FORMATTER_DEBUG_FLAG_CI      => 0;
5557    use constant FORMATTER_DEBUG_FLAG_FLUSH   => 0;
5558    use constant FORMATTER_DEBUG_FLAG_FORCE   => 0;
5559    use constant FORMATTER_DEBUG_FLAG_LIST    => 0;
5560    use constant FORMATTER_DEBUG_FLAG_NOBREAK => 0;
5561    use constant FORMATTER_DEBUG_FLAG_OUTPUT  => 0;
5562    use constant FORMATTER_DEBUG_FLAG_SPARSE  => 0;
5563    use constant FORMATTER_DEBUG_FLAG_STORE   => 0;
5564    use constant FORMATTER_DEBUG_FLAG_UNDOBP  => 0;
5565    use constant FORMATTER_DEBUG_FLAG_WHITE   => 0;
5566
5567    my $debug_warning = sub {
5568        print "FORMATTER_DEBUGGING with key $_[0]\n";
5569    };
5570
5571    FORMATTER_DEBUG_FLAG_BOND    && $debug_warning->('BOND');
5572    FORMATTER_DEBUG_FLAG_BREAK   && $debug_warning->('BREAK');
5573    FORMATTER_DEBUG_FLAG_CI      && $debug_warning->('CI');
5574    FORMATTER_DEBUG_FLAG_FLUSH   && $debug_warning->('FLUSH');
5575    FORMATTER_DEBUG_FLAG_FORCE   && $debug_warning->('FORCE');
5576    FORMATTER_DEBUG_FLAG_LIST    && $debug_warning->('LIST');
5577    FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK');
5578    FORMATTER_DEBUG_FLAG_OUTPUT  && $debug_warning->('OUTPUT');
5579    FORMATTER_DEBUG_FLAG_SPARSE  && $debug_warning->('SPARSE');
5580    FORMATTER_DEBUG_FLAG_STORE   && $debug_warning->('STORE');
5581    FORMATTER_DEBUG_FLAG_UNDOBP  && $debug_warning->('UNDOBP');
5582    FORMATTER_DEBUG_FLAG_WHITE   && $debug_warning->('WHITE');
5583}
5584
5585use Carp;
5586use vars qw{
5587
5588  @gnu_stack
5589  $max_gnu_stack_index
5590  $gnu_position_predictor
5591  $line_start_index_to_go
5592  $last_indentation_written
5593  $last_unadjusted_indentation
5594  $last_leading_token
5595
5596  $saw_VERSION_in_this_file
5597  $saw_END_or_DATA_
5598
5599  @gnu_item_list
5600  $max_gnu_item_index
5601  $gnu_sequence_number
5602  $last_output_indentation
5603  %last_gnu_equals
5604  %gnu_comma_count
5605  %gnu_arrow_count
5606
5607  @block_type_to_go
5608  @type_sequence_to_go
5609  @container_environment_to_go
5610  @bond_strength_to_go
5611  @forced_breakpoint_to_go
5612  @lengths_to_go
5613  @levels_to_go
5614  @leading_spaces_to_go
5615  @reduced_spaces_to_go
5616  @matching_token_to_go
5617  @mate_index_to_go
5618  @nesting_blocks_to_go
5619  @ci_levels_to_go
5620  @nesting_depth_to_go
5621  @nobreak_to_go
5622  @old_breakpoint_to_go
5623  @tokens_to_go
5624  @types_to_go
5625
5626  %saved_opening_indentation
5627
5628  $max_index_to_go
5629  $comma_count_in_batch
5630  $old_line_count_in_batch
5631  $last_nonblank_index_to_go
5632  $last_nonblank_type_to_go
5633  $last_nonblank_token_to_go
5634  $last_last_nonblank_index_to_go
5635  $last_last_nonblank_type_to_go
5636  $last_last_nonblank_token_to_go
5637  @nonblank_lines_at_depth
5638  $starting_in_quote
5639  $ending_in_quote
5640
5641  $in_format_skipping_section
5642  $format_skipping_pattern_begin
5643  $format_skipping_pattern_end
5644
5645  $forced_breakpoint_count
5646  $forced_breakpoint_undo_count
5647  @forced_breakpoint_undo_stack
5648  %postponed_breakpoint
5649
5650  $tabbing
5651  $embedded_tab_count
5652  $first_embedded_tab_at
5653  $last_embedded_tab_at
5654  $deleted_semicolon_count
5655  $first_deleted_semicolon_at
5656  $last_deleted_semicolon_at
5657  $added_semicolon_count
5658  $first_added_semicolon_at
5659  $last_added_semicolon_at
5660  $first_tabbing_disagreement
5661  $last_tabbing_disagreement
5662  $in_tabbing_disagreement
5663  $tabbing_disagreement_count
5664  $input_line_tabbing
5665
5666  $last_line_type
5667  $last_line_leading_type
5668  $last_line_leading_level
5669  $last_last_line_leading_level
5670
5671  %block_leading_text
5672  %block_opening_line_number
5673  $csc_new_statement_ok
5674  $accumulating_text_for_block
5675  $leading_block_text
5676  $rleading_block_if_elsif_text
5677  $leading_block_text_level
5678  $leading_block_text_length_exceeded
5679  $leading_block_text_line_length
5680  $leading_block_text_line_number
5681  $closing_side_comment_prefix_pattern
5682  $closing_side_comment_list_pattern
5683
5684  $last_nonblank_token
5685  $last_nonblank_type
5686  $last_last_nonblank_token
5687  $last_last_nonblank_type
5688  $last_nonblank_block_type
5689  $last_output_level
5690  %is_do_follower
5691  %is_if_brace_follower
5692  %space_after_keyword
5693  $rbrace_follower
5694  $looking_for_else
5695  %is_last_next_redo_return
5696  %is_other_brace_follower
5697  %is_else_brace_follower
5698  %is_anon_sub_brace_follower
5699  %is_anon_sub_1_brace_follower
5700  %is_sort_map_grep
5701  %is_sort_map_grep_eval
5702  %is_sort_map_grep_eval_do
5703  %is_block_without_semicolon
5704  %is_if_unless
5705  %is_and_or
5706  %is_assignment
5707  %is_chain_operator
5708  %is_if_unless_and_or_last_next_redo_return
5709  %is_until_while_for_if_elsif_else
5710
5711  @has_broken_sublist
5712  @dont_align
5713  @want_comma_break
5714
5715  $is_static_block_comment
5716  $index_start_one_line_block
5717  $semicolons_before_block_self_destruct
5718  $index_max_forced_break
5719  $input_line_number
5720  $diagnostics_object
5721  $vertical_aligner_object
5722  $logger_object
5723  $file_writer_object
5724  $formatter_self
5725  @ci_stack
5726  $last_line_had_side_comment
5727  %want_break_before
5728  %outdent_keyword
5729  $static_block_comment_pattern
5730  $static_side_comment_pattern
5731  %opening_vertical_tightness
5732  %closing_vertical_tightness
5733  %closing_token_indentation
5734
5735  %opening_token_right
5736  %stack_opening_token
5737  %stack_closing_token
5738
5739  $block_brace_vertical_tightness_pattern
5740
5741  $rOpts_add_newlines
5742  $rOpts_add_whitespace
5743  $rOpts_block_brace_tightness
5744  $rOpts_block_brace_vertical_tightness
5745  $rOpts_brace_left_and_indent
5746  $rOpts_comma_arrow_breakpoints
5747  $rOpts_break_at_old_keyword_breakpoints
5748  $rOpts_break_at_old_comma_breakpoints
5749  $rOpts_break_at_old_logical_breakpoints
5750  $rOpts_break_at_old_ternary_breakpoints
5751  $rOpts_closing_side_comment_else_flag
5752  $rOpts_closing_side_comment_maximum_text
5753  $rOpts_continuation_indentation
5754  $rOpts_cuddled_else
5755  $rOpts_delete_old_whitespace
5756  $rOpts_fuzzy_line_length
5757  $rOpts_indent_columns
5758  $rOpts_line_up_parentheses
5759  $rOpts_maximum_fields_per_table
5760  $rOpts_maximum_line_length
5761  $rOpts_short_concatenation_item_length
5762  $rOpts_keep_old_blank_lines
5763  $rOpts_ignore_old_breakpoints
5764  $rOpts_format_skipping
5765  $rOpts_space_function_paren
5766  $rOpts_space_keyword_paren
5767  $rOpts_keep_interior_semicolons
5768
5769  $half_maximum_line_length
5770
5771  %is_opening_type
5772  %is_closing_type
5773  %is_keyword_returning_list
5774  %tightness
5775  %matching_token
5776  $rOpts
5777  %right_bond_strength
5778  %left_bond_strength
5779  %binary_ws_rules
5780  %want_left_space
5781  %want_right_space
5782  %is_digraph
5783  %is_trigraph
5784  $bli_pattern
5785  $bli_list_string
5786  %is_closing_type
5787  %is_opening_type
5788  %is_closing_token
5789  %is_opening_token
5790};
5791
5792BEGIN {
5793
5794    # default list of block types for which -bli would apply
5795    $bli_list_string = 'if else elsif unless while for foreach do : sub';
5796
5797    @_ = qw(
5798      .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
5799      <= >= == =~ !~ != ++ -- /= x=
5800    );
5801    @is_digraph{@_} = (1) x scalar(@_);
5802
5803    @_ = qw( ... **= <<= >>= &&= ||= //= <=> );
5804    @is_trigraph{@_} = (1) x scalar(@_);
5805
5806    @_ = qw(
5807      = **= += *= &= <<= &&=
5808      -= /= |= >>= ||= //=
5809      .= %= ^=
5810      x=
5811    );
5812    @is_assignment{@_} = (1) x scalar(@_);
5813
5814    @_ = qw(
5815      grep
5816      keys
5817      map
5818      reverse
5819      sort
5820      split
5821    );
5822    @is_keyword_returning_list{@_} = (1) x scalar(@_);
5823
5824    @_ = qw(is if unless and or err last next redo return);
5825    @is_if_unless_and_or_last_next_redo_return{@_} = (1) x scalar(@_);
5826
5827    # always break after a closing curly of these block types:
5828    @_ = qw(until while for if elsif else);
5829    @is_until_while_for_if_elsif_else{@_} = (1) x scalar(@_);
5830
5831    @_ = qw(last next redo return);
5832    @is_last_next_redo_return{@_} = (1) x scalar(@_);
5833
5834    @_ = qw(sort map grep);
5835    @is_sort_map_grep{@_} = (1) x scalar(@_);
5836
5837    @_ = qw(sort map grep eval);
5838    @is_sort_map_grep_eval{@_} = (1) x scalar(@_);
5839
5840    @_ = qw(sort map grep eval do);
5841    @is_sort_map_grep_eval_do{@_} = (1) x scalar(@_);
5842
5843    @_ = qw(if unless);
5844    @is_if_unless{@_} = (1) x scalar(@_);
5845
5846    @_ = qw(and or err);
5847    @is_and_or{@_} = (1) x scalar(@_);
5848
5849    # Identify certain operators which often occur in chains.
5850    # Note: the minus (-) causes a side effect of padding of the first line in
5851    # something like this (by sub set_logical_padding):
5852    #    Checkbutton => 'Transmission checked',
5853    #   -variable    => \$TRANS
5854    # This usually improves appearance so it seems ok.
5855    @_ = qw(&& || and or : ? . + - * /);
5856    @is_chain_operator{@_} = (1) x scalar(@_);
5857
5858    # We can remove semicolons after blocks preceded by these keywords
5859    @_ =
5860      qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
5861      unless while until for foreach);
5862    @is_block_without_semicolon{@_} = (1) x scalar(@_);
5863
5864    # 'L' is token for opening { at hash key
5865    @_ = qw" L { ( [ ";
5866    @is_opening_type{@_} = (1) x scalar(@_);
5867
5868    # 'R' is token for closing } at hash key
5869    @_ = qw" R } ) ] ";
5870    @is_closing_type{@_} = (1) x scalar(@_);
5871
5872    @_ = qw" { ( [ ";
5873    @is_opening_token{@_} = (1) x scalar(@_);
5874
5875    @_ = qw" } ) ] ";
5876    @is_closing_token{@_} = (1) x scalar(@_);
5877}
5878
5879# whitespace codes
5880use constant WS_YES      => 1;
5881use constant WS_OPTIONAL => 0;
5882use constant WS_NO       => -1;
5883
5884# Token bond strengths.
5885use constant NO_BREAK    => 10000;
5886use constant VERY_STRONG => 100;
5887use constant STRONG      => 2.1;
5888use constant NOMINAL     => 1.1;
5889use constant WEAK        => 0.8;
5890use constant VERY_WEAK   => 0.55;
5891
5892# values for testing indexes in output array
5893use constant UNDEFINED_INDEX => -1;
5894
5895# Maximum number of little messages; probably need not be changed.
5896use constant MAX_NAG_MESSAGES => 6;
5897
5898# increment between sequence numbers for each type
5899# For example, ?: pairs might have numbers 7,11,15,...
5900use constant TYPE_SEQUENCE_INCREMENT => 4;
5901
5902{
5903
5904    # methods to count instances
5905    my $_count = 0;
5906    sub get_count        { $_count; }
5907    sub _increment_count { ++$_count }
5908    sub _decrement_count { --$_count }
5909}
5910
5911sub trim {
5912
5913    # trim leading and trailing whitespace from a string
5914    $_[0] =~ s/\s+$//;
5915    $_[0] =~ s/^\s+//;
5916    return $_[0];
5917}
5918
5919sub split_words {
5920
5921    # given a string containing words separated by whitespace,
5922    # return the list of words
5923    my ($str) = @_;
5924    return unless $str;
5925    $str =~ s/\s+$//;
5926    $str =~ s/^\s+//;
5927    return split( /\s+/, $str );
5928}
5929
5930# interface to Perl::Tidy::Logger routines
5931sub warning {
5932    if ($logger_object) {
5933        $logger_object->warning(@_);
5934    }
5935}
5936
5937sub complain {
5938    if ($logger_object) {
5939        $logger_object->complain(@_);
5940    }
5941}
5942
5943sub write_logfile_entry {
5944    if ($logger_object) {
5945        $logger_object->write_logfile_entry(@_);
5946    }
5947}
5948
5949sub black_box {
5950    if ($logger_object) {
5951        $logger_object->black_box(@_);
5952    }
5953}
5954
5955sub report_definite_bug {
5956    if ($logger_object) {
5957        $logger_object->report_definite_bug();
5958    }
5959}
5960
5961sub get_saw_brace_error {
5962    if ($logger_object) {
5963        $logger_object->get_saw_brace_error();
5964    }
5965}
5966
5967sub we_are_at_the_last_line {
5968    if ($logger_object) {
5969        $logger_object->we_are_at_the_last_line();
5970    }
5971}
5972
5973# interface to Perl::Tidy::Diagnostics routine
5974sub write_diagnostics {
5975
5976    if ($diagnostics_object) {
5977        $diagnostics_object->write_diagnostics(@_);
5978    }
5979}
5980
5981sub get_added_semicolon_count {
5982    my $self = shift;
5983    return $added_semicolon_count;
5984}
5985
5986sub DESTROY {
5987    $_[0]->_decrement_count();
5988}
5989
5990sub new {
5991
5992    my $class = shift;
5993
5994    # we are given an object with a write_line() method to take lines
5995    my %defaults = (
5996        sink_object        => undef,
5997        diagnostics_object => undef,
5998        logger_object      => undef,
5999    );
6000    my %args = ( %defaults, @_ );
6001
6002    $logger_object      = $args{logger_object};
6003    $diagnostics_object = $args{diagnostics_object};
6004
6005    # we create another object with a get_line() and peek_ahead() method
6006    my $sink_object = $args{sink_object};
6007    $file_writer_object =
6008      Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
6009
6010    # initialize the leading whitespace stack to negative levels
6011    # so that we can never run off the end of the stack
6012    $gnu_position_predictor = 0;    # where the current token is predicted to be
6013    $max_gnu_stack_index    = 0;
6014    $max_gnu_item_index     = -1;
6015    $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 );
6016    @gnu_item_list               = ();
6017    $last_output_indentation     = 0;
6018    $last_indentation_written    = 0;
6019    $last_unadjusted_indentation = 0;
6020    $last_leading_token          = "";
6021
6022    $saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'};
6023    $saw_END_or_DATA_         = 0;
6024
6025    @block_type_to_go            = ();
6026    @type_sequence_to_go         = ();
6027    @container_environment_to_go = ();
6028    @bond_strength_to_go         = ();
6029    @forced_breakpoint_to_go     = ();
6030    @lengths_to_go               = ();    # line length to start of ith token
6031    @levels_to_go                = ();
6032    @matching_token_to_go        = ();
6033    @mate_index_to_go            = ();
6034    @nesting_blocks_to_go        = ();
6035    @ci_levels_to_go             = ();
6036    @nesting_depth_to_go         = (0);
6037    @nobreak_to_go               = ();
6038    @old_breakpoint_to_go        = ();
6039    @tokens_to_go                = ();
6040    @types_to_go                 = ();
6041    @leading_spaces_to_go        = ();
6042    @reduced_spaces_to_go        = ();
6043
6044    @dont_align         = ();
6045    @has_broken_sublist = ();
6046    @want_comma_break   = ();
6047
6048    @ci_stack                   = ("");
6049    $first_tabbing_disagreement = 0;
6050    $last_tabbing_disagreement  = 0;
6051    $tabbing_disagreement_count = 0;
6052    $in_tabbing_disagreement    = 0;
6053    $input_line_tabbing         = undef;
6054
6055    $last_line_type               = "";
6056    $last_last_line_leading_level = 0;
6057    $last_line_leading_level      = 0;
6058    $last_line_leading_type       = '#';
6059
6060    $last_nonblank_token        = ';';
6061    $last_nonblank_type         = ';';
6062    $last_last_nonblank_token   = ';';
6063    $last_last_nonblank_type    = ';';
6064    $last_nonblank_block_type   = "";
6065    $last_output_level          = 0;
6066    $looking_for_else           = 0;
6067    $embedded_tab_count         = 0;
6068    $first_embedded_tab_at      = 0;
6069    $last_embedded_tab_at       = 0;
6070    $deleted_semicolon_count    = 0;
6071    $first_deleted_semicolon_at = 0;
6072    $last_deleted_semicolon_at  = 0;
6073    $added_semicolon_count      = 0;
6074    $first_added_semicolon_at   = 0;
6075    $last_added_semicolon_at    = 0;
6076    $last_line_had_side_comment = 0;
6077    $is_static_block_comment    = 0;
6078    %postponed_breakpoint       = ();
6079
6080    # variables for adding side comments
6081    %block_leading_text        = ();
6082    %block_opening_line_number = ();
6083    $csc_new_statement_ok      = 1;
6084
6085    %saved_opening_indentation  = ();
6086    $in_format_skipping_section = 0;
6087
6088    reset_block_text_accumulator();
6089
6090    prepare_for_new_input_lines();
6091
6092    $vertical_aligner_object =
6093      Perl::Tidy::VerticalAligner->initialize( $rOpts, $file_writer_object,
6094        $logger_object, $diagnostics_object );
6095
6096    if ( $rOpts->{'entab-leading-whitespace'} ) {
6097        write_logfile_entry(
6098"Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"
6099        );
6100    }
6101    elsif ( $rOpts->{'tabs'} ) {
6102        write_logfile_entry("Indentation will be with a tab character\n");
6103    }
6104    else {
6105        write_logfile_entry(
6106            "Indentation will be with $rOpts->{'indent-columns'} spaces\n");
6107    }
6108
6109    # This was the start of a formatter referent, but object-oriented
6110    # coding has turned out to be too slow here.
6111    $formatter_self = {};
6112
6113    bless $formatter_self, $class;
6114
6115    # Safety check..this is not a class yet
6116    if ( _increment_count() > 1 ) {
6117        confess
6118"Attempt to create more than 1 object in $class, which is not a true class yet\n";
6119    }
6120    return $formatter_self;
6121}
6122
6123sub prepare_for_new_input_lines {
6124
6125    $gnu_sequence_number++;    # increment output batch counter
6126    %last_gnu_equals                = ();
6127    %gnu_comma_count                = ();
6128    %gnu_arrow_count                = ();
6129    $line_start_index_to_go         = 0;
6130    $max_gnu_item_index             = UNDEFINED_INDEX;
6131    $index_max_forced_break         = UNDEFINED_INDEX;
6132    $max_index_to_go                = UNDEFINED_INDEX;
6133    $last_nonblank_index_to_go      = UNDEFINED_INDEX;
6134    $last_nonblank_type_to_go       = '';
6135    $last_nonblank_token_to_go      = '';
6136    $last_last_nonblank_index_to_go = UNDEFINED_INDEX;
6137    $last_last_nonblank_type_to_go  = '';
6138    $last_last_nonblank_token_to_go = '';
6139    $forced_breakpoint_count        = 0;
6140    $forced_breakpoint_undo_count   = 0;
6141    $rbrace_follower                = undef;
6142    $lengths_to_go[0]               = 0;
6143    $old_line_count_in_batch        = 1;
6144    $comma_count_in_batch           = 0;
6145    $starting_in_quote              = 0;
6146
6147    destroy_one_line_block();
6148}
6149
6150sub write_line {
6151
6152    my $self = shift;
6153    my ($line_of_tokens) = @_;
6154
6155    my $line_type  = $line_of_tokens->{_line_type};
6156    my $input_line = $line_of_tokens->{_line_text};
6157
6158    if ( $rOpts->{notidy} ) {
6159        write_unindented_line($input_line);
6160        $last_line_type = $line_type;
6161        return;
6162    }
6163
6164    # _line_type codes are:
6165    #   SYSTEM         - system-specific code before hash-bang line
6166    #   CODE           - line of perl code (including comments)
6167    #   POD_START      - line starting pod, such as '=head'
6168    #   POD            - pod documentation text
6169    #   POD_END        - last line of pod section, '=cut'
6170    #   HERE           - text of here-document
6171    #   HERE_END       - last line of here-doc (target word)
6172    #   FORMAT         - format section
6173    #   FORMAT_END     - last line of format section, '.'
6174    #   DATA_START     - __DATA__ line
6175    #   DATA           - unidentified text following __DATA__
6176    #   END_START      - __END__ line
6177    #   END            - unidentified text following __END__
6178    #   ERROR          - we are in big trouble, probably not a perl script
6179
6180    # put a blank line after an =cut which comes before __END__ and __DATA__
6181    # (required by podchecker)
6182    if ( $last_line_type eq 'POD_END' && !$saw_END_or_DATA_ ) {
6183        $file_writer_object->reset_consecutive_blank_lines();
6184        if ( $input_line !~ /^\s*$/ ) { want_blank_line() }
6185    }
6186
6187    # handle line of code..
6188    if ( $line_type eq 'CODE' ) {
6189
6190        # let logger see all non-blank lines of code
6191        if ( $input_line !~ /^\s*$/ ) {
6192            my $output_line_number =
6193              $vertical_aligner_object->get_output_line_number();
6194            black_box( $line_of_tokens, $output_line_number );
6195        }
6196        print_line_of_tokens($line_of_tokens);
6197    }
6198
6199    # handle line of non-code..
6200    else {
6201
6202        # set special flags
6203        my $skip_line = 0;
6204        my $tee_line  = 0;
6205        if ( $line_type =~ /^POD/ ) {
6206
6207            # Pod docs should have a preceding blank line.  But be
6208            # very careful in __END__ and __DATA__ sections, because:
6209            #   1. the user may be using this section for any purpose whatsoever
6210            #   2. the blank counters are not active there
6211            # It should be safe to request a blank line between an
6212            # __END__ or __DATA__ and an immediately following '=head'
6213            # type line, (types END_START and DATA_START), but not for
6214            # any other lines of type END or DATA.
6215            if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
6216            if ( $rOpts->{'tee-pod'} )    { $tee_line  = 1; }
6217            if (  !$skip_line
6218                && $line_type eq 'POD_START'
6219                && $last_line_type !~ /^(END|DATA)$/ )
6220            {
6221                want_blank_line();
6222            }
6223        }
6224
6225        # leave the blank counters in a predictable state
6226        # after __END__ or __DATA__
6227        elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) {
6228            $file_writer_object->reset_consecutive_blank_lines();
6229            $saw_END_or_DATA_ = 1;
6230        }
6231
6232        # write unindented non-code line
6233        if ( !$skip_line ) {
6234            if ($tee_line) { $file_writer_object->tee_on() }
6235            write_unindented_line($input_line);
6236            if ($tee_line) { $file_writer_object->tee_off() }
6237        }
6238    }
6239    $last_line_type = $line_type;
6240}
6241
6242sub create_one_line_block {
6243    $index_start_one_line_block            = $_[0];
6244    $semicolons_before_block_self_destruct = $_[1];
6245}
6246
6247sub destroy_one_line_block {
6248    $index_start_one_line_block            = UNDEFINED_INDEX;
6249    $semicolons_before_block_self_destruct = 0;
6250}
6251
6252sub leading_spaces_to_go {
6253
6254    # return the number of indentation spaces for a token in the output stream;
6255    # these were previously stored by 'set_leading_whitespace'.
6256
6257    return get_SPACES( $leading_spaces_to_go[ $_[0] ] );
6258
6259}
6260
6261sub get_SPACES {
6262
6263    # return the number of leading spaces associated with an indentation
6264    # variable $indentation is either a constant number of spaces or an object
6265    # with a get_SPACES method.
6266    my $indentation = shift;
6267    return ref($indentation) ? $indentation->get_SPACES() : $indentation;
6268}
6269
6270sub get_RECOVERABLE_SPACES {
6271
6272    # return the number of spaces (+ means shift right, - means shift left)
6273    # that we would like to shift a group of lines with the same indentation
6274    # to get them to line up with their opening parens
6275    my $indentation = shift;
6276    return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0;
6277}
6278
6279sub get_AVAILABLE_SPACES_to_go {
6280
6281    my $item = $leading_spaces_to_go[ $_[0] ];
6282
6283    # return the number of available leading spaces associated with an
6284    # indentation variable.  $indentation is either a constant number of
6285    # spaces or an object with a get_AVAILABLE_SPACES method.
6286    return ref($item) ? $item->get_AVAILABLE_SPACES() : 0;
6287}
6288
6289sub new_lp_indentation_item {
6290
6291    # this is an interface to the IndentationItem class
6292    my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_;
6293
6294    # A negative level implies not to store the item in the item_list
6295    my $index = 0;
6296    if ( $level >= 0 ) { $index = ++$max_gnu_item_index; }
6297
6298    my $item = Perl::Tidy::IndentationItem->new(
6299        $spaces,      $level,
6300        $ci_level,    $available_spaces,
6301        $index,       $gnu_sequence_number,
6302        $align_paren, $max_gnu_stack_index,
6303        $line_start_index_to_go,
6304    );
6305
6306    if ( $level >= 0 ) {
6307        $gnu_item_list[$max_gnu_item_index] = $item;
6308    }
6309
6310    return $item;
6311}
6312
6313sub set_leading_whitespace {
6314
6315    # This routine defines leading whitespace
6316    # given: the level and continuation_level of a token,
6317    # define: space count of leading string which would apply if it
6318    # were the first token of a new line.
6319
6320    my ( $level, $ci_level, $in_continued_quote ) = @_;
6321
6322    # modify for -bli, which adds one continuation indentation for
6323    # opening braces
6324    if (   $rOpts_brace_left_and_indent
6325        && $max_index_to_go == 0
6326        && $block_type_to_go[$max_index_to_go] =~ /$bli_pattern/o )
6327    {
6328        $ci_level++;
6329    }
6330
6331    # patch to avoid trouble when input file has negative indentation.
6332    # other logic should catch this error.
6333    if ( $level < 0 ) { $level = 0 }
6334
6335    #-------------------------------------------
6336    # handle the standard indentation scheme
6337    #-------------------------------------------
6338    unless ($rOpts_line_up_parentheses) {
6339        my $space_count =
6340          $ci_level * $rOpts_continuation_indentation +
6341          $level * $rOpts_indent_columns;
6342        my $ci_spaces =
6343          ( $ci_level == 0 ) ? 0 : $rOpts_continuation_indentation;
6344
6345        if ($in_continued_quote) {
6346            $space_count = 0;
6347            $ci_spaces   = 0;
6348        }
6349        $leading_spaces_to_go[$max_index_to_go] = $space_count;
6350        $reduced_spaces_to_go[$max_index_to_go] = $space_count - $ci_spaces;
6351        return;
6352    }
6353
6354    #-------------------------------------------------------------
6355    # handle case of -lp indentation..
6356    #-------------------------------------------------------------
6357
6358    # The continued_quote flag means that this is the first token of a
6359    # line, and it is the continuation of some kind of multi-line quote
6360    # or pattern.  It requires special treatment because it must have no
6361    # added leading whitespace. So we create a special indentation item
6362    # which is not in the stack.
6363    if ($in_continued_quote) {
6364        my $space_count     = 0;
6365        my $available_space = 0;
6366        $level = -1;    # flag to prevent storing in item_list
6367        $leading_spaces_to_go[$max_index_to_go] =
6368          $reduced_spaces_to_go[$max_index_to_go] =
6369          new_lp_indentation_item( $space_count, $level, $ci_level,
6370            $available_space, 0 );
6371        return;
6372    }
6373
6374    # get the top state from the stack
6375    my $space_count      = $gnu_stack[$max_gnu_stack_index]->get_SPACES();
6376    my $current_level    = $gnu_stack[$max_gnu_stack_index]->get_LEVEL();
6377    my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL();
6378
6379    my $type        = $types_to_go[$max_index_to_go];
6380    my $token       = $tokens_to_go[$max_index_to_go];
6381    my $total_depth = $nesting_depth_to_go[$max_index_to_go];
6382
6383    if ( $type eq '{' || $type eq '(' ) {
6384
6385        $gnu_comma_count{ $total_depth + 1 } = 0;
6386        $gnu_arrow_count{ $total_depth + 1 } = 0;
6387
6388        # If we come to an opening token after an '=' token of some type,
6389        # see if it would be helpful to 'break' after the '=' to save space
6390        my $last_equals = $last_gnu_equals{$total_depth};
6391        if ( $last_equals && $last_equals > $line_start_index_to_go ) {
6392
6393            # find the position if we break at the '='
6394            my $i_test = $last_equals;
6395            if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
6396
6397            # TESTING
6398            ##my $too_close = ($i_test==$max_index_to_go-1);
6399
6400            my $test_position = total_line_length( $i_test, $max_index_to_go );
6401
6402            if (
6403
6404                # the equals is not just before an open paren (testing)
6405                ##!$too_close &&
6406
6407                # if we are beyond the midpoint
6408                $gnu_position_predictor > $half_maximum_line_length
6409
6410                # or we are beyont the 1/4 point and there was an old
6411                # break at the equals
6412                || (
6413                    $gnu_position_predictor > $half_maximum_line_length / 2
6414                    && (
6415                        $old_breakpoint_to_go[$last_equals]
6416                        || (   $last_equals > 0
6417                            && $old_breakpoint_to_go[ $last_equals - 1 ] )
6418                        || (   $last_equals > 1
6419                            && $types_to_go[ $last_equals - 1 ] eq 'b'
6420                            && $old_breakpoint_to_go[ $last_equals - 2 ] )
6421                    )
6422                )
6423              )
6424            {
6425
6426                # then make the switch -- note that we do not set a real
6427                # breakpoint here because we may not really need one; sub
6428                # scan_list will do that if necessary
6429                $line_start_index_to_go = $i_test + 1;
6430                $gnu_position_predictor = $test_position;
6431            }
6432        }
6433    }
6434
6435    # Check for decreasing depth ..
6436    # Note that one token may have both decreasing and then increasing
6437    # depth. For example, (level, ci) can go from (1,1) to (2,0).  So,
6438    # in this example we would first go back to (1,0) then up to (2,0)
6439    # in a single call.
6440    if ( $level < $current_level || $ci_level < $current_ci_level ) {
6441
6442        # loop to find the first entry at or completely below this level
6443        my ( $lev, $ci_lev );
6444        while (1) {
6445            if ($max_gnu_stack_index) {
6446
6447                # save index of token which closes this level
6448                $gnu_stack[$max_gnu_stack_index]->set_CLOSED($max_index_to_go);
6449
6450                # Undo any extra indentation if we saw no commas
6451                my $available_spaces =
6452                  $gnu_stack[$max_gnu_stack_index]->get_AVAILABLE_SPACES();
6453
6454                my $comma_count = 0;
6455                my $arrow_count = 0;
6456                if ( $type eq '}' || $type eq ')' ) {
6457                    $comma_count = $gnu_comma_count{$total_depth};
6458                    $arrow_count = $gnu_arrow_count{$total_depth};
6459                    $comma_count = 0 unless $comma_count;
6460                    $arrow_count = 0 unless $arrow_count;
6461                }
6462                $gnu_stack[$max_gnu_stack_index]->set_COMMA_COUNT($comma_count);
6463                $gnu_stack[$max_gnu_stack_index]->set_ARROW_COUNT($arrow_count);
6464
6465                if ( $available_spaces > 0 ) {
6466
6467                    if ( $comma_count <= 0 || $arrow_count > 0 ) {
6468
6469                        my $i = $gnu_stack[$max_gnu_stack_index]->get_INDEX();
6470                        my $seqno =
6471                          $gnu_stack[$max_gnu_stack_index]
6472                          ->get_SEQUENCE_NUMBER();
6473
6474                        # Be sure this item was created in this batch.  This
6475                        # should be true because we delete any available
6476                        # space from open items at the end of each batch.
6477                        if (   $gnu_sequence_number != $seqno
6478                            || $i > $max_gnu_item_index )
6479                        {
6480                            warning(
6481"Program bug with -lp.  seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n"
6482                            );
6483                            report_definite_bug();
6484                        }
6485
6486                        else {
6487                            if ( $arrow_count == 0 ) {
6488                                $gnu_item_list[$i]
6489                                  ->permanently_decrease_AVAILABLE_SPACES(
6490                                    $available_spaces);
6491                            }
6492                            else {
6493                                $gnu_item_list[$i]
6494                                  ->tentatively_decrease_AVAILABLE_SPACES(
6495                                    $available_spaces);
6496                            }
6497
6498                            my $j;
6499                            for (
6500                                $j = $i + 1 ;
6501                                $j <= $max_gnu_item_index ;
6502                                $j++
6503                              )
6504                            {
6505                                $gnu_item_list[$j]
6506                                  ->decrease_SPACES($available_spaces);
6507                            }
6508                        }
6509                    }
6510                }
6511
6512                # go down one level
6513                --$max_gnu_stack_index;
6514                $lev    = $gnu_stack[$max_gnu_stack_index]->get_LEVEL();
6515                $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL();
6516
6517                # stop when we reach a level at or below the current level
6518                if ( $lev <= $level && $ci_lev <= $ci_level ) {
6519                    $space_count =
6520                      $gnu_stack[$max_gnu_stack_index]->get_SPACES();
6521                    $current_level    = $lev;
6522                    $current_ci_level = $ci_lev;
6523                    last;
6524                }
6525            }
6526
6527            # reached bottom of stack .. should never happen because
6528            # only negative levels can get here, and $level was forced
6529            # to be positive above.
6530            else {
6531                warning(
6532"program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n"
6533                );
6534                report_definite_bug();
6535                last;
6536            }
6537        }
6538    }
6539
6540    # handle increasing depth
6541    if ( $level > $current_level || $ci_level > $current_ci_level ) {
6542
6543        # Compute the standard incremental whitespace.  This will be
6544        # the minimum incremental whitespace that will be used.  This
6545        # choice results in a smooth transition between the gnu-style
6546        # and the standard style.
6547        my $standard_increment =
6548          ( $level - $current_level ) * $rOpts_indent_columns +
6549          ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation;
6550
6551        # Now we have to define how much extra incremental space
6552        # ("$available_space") we want.  This extra space will be
6553        # reduced as necessary when long lines are encountered or when
6554        # it becomes clear that we do not have a good list.
6555        my $available_space = 0;
6556        my $align_paren     = 0;
6557        my $excess          = 0;
6558
6559        # initialization on empty stack..
6560        if ( $max_gnu_stack_index == 0 ) {
6561            $space_count = $level * $rOpts_indent_columns;
6562        }
6563
6564        # if this is a BLOCK, add the standard increment
6565        elsif ($last_nonblank_block_type) {
6566            $space_count += $standard_increment;
6567        }
6568
6569        # if last nonblank token was not structural indentation,
6570        # just use standard increment
6571        elsif ( $last_nonblank_type ne '{' ) {
6572            $space_count += $standard_increment;
6573        }
6574
6575        # otherwise use the space to the first non-blank level change token
6576        else {
6577
6578            $space_count = $gnu_position_predictor;
6579
6580            my $min_gnu_indentation =
6581              $gnu_stack[$max_gnu_stack_index]->get_SPACES();
6582
6583            $available_space = $space_count - $min_gnu_indentation;
6584            if ( $available_space >= $standard_increment ) {
6585                $min_gnu_indentation += $standard_increment;
6586            }
6587            elsif ( $available_space > 1 ) {
6588                $min_gnu_indentation += $available_space + 1;
6589            }
6590            elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
6591                if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
6592                    $min_gnu_indentation += 2;
6593                }
6594                else {
6595                    $min_gnu_indentation += 1;
6596                }
6597            }
6598            else {
6599                $min_gnu_indentation += $standard_increment;
6600            }
6601            $available_space = $space_count - $min_gnu_indentation;
6602
6603            if ( $available_space < 0 ) {
6604                $space_count     = $min_gnu_indentation;
6605                $available_space = 0;
6606            }
6607            $align_paren = 1;
6608        }
6609
6610        # update state, but not on a blank token
6611        if ( $types_to_go[$max_index_to_go] ne 'b' ) {
6612
6613            $gnu_stack[$max_gnu_stack_index]->set_HAVE_CHILD(1);
6614
6615            ++$max_gnu_stack_index;
6616            $gnu_stack[$max_gnu_stack_index] =
6617              new_lp_indentation_item( $space_count, $level, $ci_level,
6618                $available_space, $align_paren );
6619
6620            # If the opening paren is beyond the half-line length, then
6621            # we will use the minimum (standard) indentation.  This will
6622            # help avoid problems associated with running out of space
6623            # near the end of a line.  As a result, in deeply nested
6624            # lists, there will be some indentations which are limited
6625            # to this minimum standard indentation. But the most deeply
6626            # nested container will still probably be able to shift its
6627            # parameters to the right for proper alignment, so in most
6628            # cases this will not be noticable.
6629            if (   $available_space > 0
6630                && $space_count > $half_maximum_line_length )
6631            {
6632                $gnu_stack[$max_gnu_stack_index]
6633                  ->tentatively_decrease_AVAILABLE_SPACES($available_space);
6634            }
6635        }
6636    }
6637
6638    # Count commas and look for non-list characters.  Once we see a
6639    # non-list character, we give up and don't look for any more commas.
6640    if ( $type eq '=>' ) {
6641        $gnu_arrow_count{$total_depth}++;
6642
6643        # tentatively treating '=>' like '=' for estimating breaks
6644        # TODO: this could use some experimentation
6645        $last_gnu_equals{$total_depth} = $max_index_to_go;
6646    }
6647
6648    elsif ( $type eq ',' ) {
6649        $gnu_comma_count{$total_depth}++;
6650    }
6651
6652    elsif ( $is_assignment{$type} ) {
6653        $last_gnu_equals{$total_depth} = $max_index_to_go;
6654    }
6655
6656    # this token might start a new line
6657    # if this is a non-blank..
6658    if ( $type ne 'b' ) {
6659
6660        # and if ..
6661        if (
6662
6663            # this is the first nonblank token of the line
6664            $max_index_to_go == 1 && $types_to_go[0] eq 'b'
6665
6666            # or previous character was one of these:
6667            || $last_nonblank_type_to_go =~ /^([\:\?\,f])$/
6668
6669            # or previous character was opening and this does not close it
6670            || ( $last_nonblank_type_to_go eq '{' && $type ne '}' )
6671            || ( $last_nonblank_type_to_go eq '(' and $type ne ')' )
6672
6673            # or this token is one of these:
6674            || $type =~ /^([\.]|\|\||\&\&)$/
6675
6676            # or this is a closing structure
6677            || (   $last_nonblank_type_to_go eq '}'
6678                && $last_nonblank_token_to_go eq $last_nonblank_type_to_go )
6679
6680            # or previous token was keyword 'return'
6681            || ( $last_nonblank_type_to_go eq 'k'
6682                && ( $last_nonblank_token_to_go eq 'return' && $type ne '{' ) )
6683
6684            # or starting a new line at certain keywords is fine
6685            || (   $type eq 'k'
6686                && $is_if_unless_and_or_last_next_redo_return{$token} )
6687
6688            # or this is after an assignment after a closing structure
6689            || (
6690                $is_assignment{$last_nonblank_type_to_go}
6691                && (
6692                    $last_last_nonblank_type_to_go =~ /^[\}\)\]]$/
6693
6694                    # and it is significantly to the right
6695                    || $gnu_position_predictor > $half_maximum_line_length
6696                )
6697            )
6698          )
6699        {
6700            check_for_long_gnu_style_lines();
6701            $line_start_index_to_go = $max_index_to_go;
6702
6703            # back up 1 token if we want to break before that type
6704            # otherwise, we may strand tokens like '?' or ':' on a line
6705            if ( $line_start_index_to_go > 0 ) {
6706                if ( $last_nonblank_type_to_go eq 'k' ) {
6707
6708                    if ( $want_break_before{$last_nonblank_token_to_go} ) {
6709                        $line_start_index_to_go--;
6710                    }
6711                }
6712                elsif ( $want_break_before{$last_nonblank_type_to_go} ) {
6713                    $line_start_index_to_go--;
6714                }
6715            }
6716        }
6717    }
6718
6719    # remember the predicted position of this token on the output line
6720    if ( $max_index_to_go > $line_start_index_to_go ) {
6721        $gnu_position_predictor =
6722          total_line_length( $line_start_index_to_go, $max_index_to_go );
6723    }
6724    else {
6725        $gnu_position_predictor = $space_count +
6726          token_sequence_length( $max_index_to_go, $max_index_to_go );
6727    }
6728
6729    # store the indentation object for this token
6730    # this allows us to manipulate the leading whitespace
6731    # (in case we have to reduce indentation to fit a line) without
6732    # having to change any token values
6733    $leading_spaces_to_go[$max_index_to_go] = $gnu_stack[$max_gnu_stack_index];
6734    $reduced_spaces_to_go[$max_index_to_go] =
6735      ( $max_gnu_stack_index > 0 && $ci_level )
6736      ? $gnu_stack[ $max_gnu_stack_index - 1 ]
6737      : $gnu_stack[$max_gnu_stack_index];
6738    return;
6739}
6740
6741sub check_for_long_gnu_style_lines {
6742
6743    # look at the current estimated maximum line length, and
6744    # remove some whitespace if it exceeds the desired maximum
6745
6746    # this is only for the '-lp' style
6747    return unless ($rOpts_line_up_parentheses);
6748
6749    # nothing can be done if no stack items defined for this line
6750    return if ( $max_gnu_item_index == UNDEFINED_INDEX );
6751
6752    # see if we have exceeded the maximum desired line length
6753    # keep 2 extra free because they are needed in some cases
6754    # (result of trial-and-error testing)
6755    my $spaces_needed =
6756      $gnu_position_predictor - $rOpts_maximum_line_length + 2;
6757
6758    return if ( $spaces_needed <= 0 );
6759
6760    # We are over the limit, so try to remove a requested number of
6761    # spaces from leading whitespace.  We are only allowed to remove
6762    # from whitespace items created on this batch, since others have
6763    # already been used and cannot be undone.
6764    my @candidates = ();
6765    my $i;
6766
6767    # loop over all whitespace items created for the current batch
6768    for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
6769        my $item = $gnu_item_list[$i];
6770
6771        # item must still be open to be a candidate (otherwise it
6772        # cannot influence the current token)
6773        next if ( $item->get_CLOSED() >= 0 );
6774
6775        my $available_spaces = $item->get_AVAILABLE_SPACES();
6776
6777        if ( $available_spaces > 0 ) {
6778            push( @candidates, [ $i, $available_spaces ] );
6779        }
6780    }
6781
6782    return unless (@candidates);
6783
6784    # sort by available whitespace so that we can remove whitespace
6785    # from the maximum available first
6786    @candidates = sort { $b->[1] <=> $a->[1] } @candidates;
6787
6788    # keep removing whitespace until we are done or have no more
6789    my $candidate;
6790    foreach $candidate (@candidates) {
6791        my ( $i, $available_spaces ) = @{$candidate};
6792        my $deleted_spaces =
6793          ( $available_spaces > $spaces_needed )
6794          ? $spaces_needed
6795          : $available_spaces;
6796
6797        # remove the incremental space from this item
6798        $gnu_item_list[$i]->decrease_AVAILABLE_SPACES($deleted_spaces);
6799
6800        my $i_debug = $i;
6801
6802        # update the leading whitespace of this item and all items
6803        # that came after it
6804        for ( ; $i <= $max_gnu_item_index ; $i++ ) {
6805
6806            my $old_spaces = $gnu_item_list[$i]->get_SPACES();
6807            if ( $old_spaces >= $deleted_spaces ) {
6808                $gnu_item_list[$i]->decrease_SPACES($deleted_spaces);
6809            }
6810
6811            # shouldn't happen except for code bug:
6812            else {
6813                my $level        = $gnu_item_list[$i_debug]->get_LEVEL();
6814                my $ci_level     = $gnu_item_list[$i_debug]->get_CI_LEVEL();
6815                my $old_level    = $gnu_item_list[$i]->get_LEVEL();
6816                my $old_ci_level = $gnu_item_list[$i]->get_CI_LEVEL();
6817                warning(
6818"program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level  deleted: level=$old_level ci=$ci_level\n"
6819                );
6820                report_definite_bug();
6821            }
6822        }
6823        $gnu_position_predictor -= $deleted_spaces;
6824        $spaces_needed          -= $deleted_spaces;
6825        last unless ( $spaces_needed > 0 );
6826    }
6827}
6828
6829sub finish_lp_batch {
6830
6831    # This routine is called once after each each output stream batch is
6832    # finished to undo indentation for all incomplete -lp
6833    # indentation levels.  It is too risky to leave a level open,
6834    # because then we can't backtrack in case of a long line to follow.
6835    # This means that comments and blank lines will disrupt this
6836    # indentation style.  But the vertical aligner may be able to
6837    # get the space back if there are side comments.
6838
6839    # this is only for the 'lp' style
6840    return unless ($rOpts_line_up_parentheses);
6841
6842    # nothing can be done if no stack items defined for this line
6843    return if ( $max_gnu_item_index == UNDEFINED_INDEX );
6844
6845    # loop over all whitespace items created for the current batch
6846    my $i;
6847    for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
6848        my $item = $gnu_item_list[$i];
6849
6850        # only look for open items
6851        next if ( $item->get_CLOSED() >= 0 );
6852
6853        # Tentatively remove all of the available space
6854        # (The vertical aligner will try to get it back later)
6855        my $available_spaces = $item->get_AVAILABLE_SPACES();
6856        if ( $available_spaces > 0 ) {
6857
6858            # delete incremental space for this item
6859            $gnu_item_list[$i]
6860              ->tentatively_decrease_AVAILABLE_SPACES($available_spaces);
6861
6862            # Reduce the total indentation space of any nodes that follow
6863            # Note that any such nodes must necessarily be dependents
6864            # of this node.
6865            foreach ( $i + 1 .. $max_gnu_item_index ) {
6866                $gnu_item_list[$_]->decrease_SPACES($available_spaces);
6867            }
6868        }
6869    }
6870    return;
6871}
6872
6873sub reduce_lp_indentation {
6874
6875    # reduce the leading whitespace at token $i if possible by $spaces_needed
6876    # (a large value of $spaces_needed will remove all excess space)
6877    # NOTE: to be called from scan_list only for a sequence of tokens
6878    # contained between opening and closing parens/braces/brackets
6879
6880    my ( $i, $spaces_wanted ) = @_;
6881    my $deleted_spaces = 0;
6882
6883    my $item             = $leading_spaces_to_go[$i];
6884    my $available_spaces = $item->get_AVAILABLE_SPACES();
6885
6886    if (
6887        $available_spaces > 0
6888        && ( ( $spaces_wanted <= $available_spaces )
6889            || !$item->get_HAVE_CHILD() )
6890      )
6891    {
6892
6893        # we'll remove these spaces, but mark them as recoverable
6894        $deleted_spaces =
6895          $item->tentatively_decrease_AVAILABLE_SPACES($spaces_wanted);
6896    }
6897
6898    return $deleted_spaces;
6899}
6900
6901sub token_sequence_length {
6902
6903    # return length of tokens ($ifirst .. $ilast) including first & last
6904    # returns 0 if $ifirst > $ilast
6905    my $ifirst = shift;
6906    my $ilast  = shift;
6907    return 0 if ( $ilast < 0 || $ifirst > $ilast );
6908    return $lengths_to_go[ $ilast + 1 ] if ( $ifirst < 0 );
6909    return $lengths_to_go[ $ilast + 1 ] - $lengths_to_go[$ifirst];
6910}
6911
6912sub total_line_length {
6913
6914    # return length of a line of tokens ($ifirst .. $ilast)
6915    my $ifirst = shift;
6916    my $ilast  = shift;
6917    if ( $ifirst < 0 ) { $ifirst = 0 }
6918
6919    return leading_spaces_to_go($ifirst) +
6920      token_sequence_length( $ifirst, $ilast );
6921}
6922
6923sub excess_line_length {
6924
6925    # return number of characters by which a line of tokens ($ifirst..$ilast)
6926    # exceeds the allowable line length.
6927    my $ifirst = shift;
6928    my $ilast  = shift;
6929    if ( $ifirst < 0 ) { $ifirst = 0 }
6930    return leading_spaces_to_go($ifirst) +
6931      token_sequence_length( $ifirst, $ilast ) - $rOpts_maximum_line_length;
6932}
6933
6934sub finish_formatting {
6935
6936    # flush buffer and write any informative messages
6937    my $self = shift;
6938
6939    flush();
6940    $file_writer_object->decrement_output_line_number()
6941      ;    # fix up line number since it was incremented
6942    we_are_at_the_last_line();
6943    if ( $added_semicolon_count > 0 ) {
6944        my $first = ( $added_semicolon_count > 1 ) ? "First" : "";
6945        my $what =
6946          ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
6947        write_logfile_entry("$added_semicolon_count $what added:\n");
6948        write_logfile_entry(
6949            "  $first at input line $first_added_semicolon_at\n");
6950
6951        if ( $added_semicolon_count > 1 ) {
6952            write_logfile_entry(
6953                "   Last at input line $last_added_semicolon_at\n");
6954        }
6955        write_logfile_entry("  (Use -nasc to prevent semicolon addition)\n");
6956        write_logfile_entry("\n");
6957    }
6958
6959    if ( $deleted_semicolon_count > 0 ) {
6960        my $first = ( $deleted_semicolon_count > 1 ) ? "First" : "";
6961        my $what =
6962          ( $deleted_semicolon_count > 1 )
6963          ? "semicolons were"
6964          : "semicolon was";
6965        write_logfile_entry(
6966            "$deleted_semicolon_count unnecessary $what deleted:\n");
6967        write_logfile_entry(
6968            "  $first at input line $first_deleted_semicolon_at\n");
6969
6970        if ( $deleted_semicolon_count > 1 ) {
6971            write_logfile_entry(
6972                "   Last at input line $last_deleted_semicolon_at\n");
6973        }
6974        write_logfile_entry("  (Use -ndsc to prevent semicolon deletion)\n");
6975        write_logfile_entry("\n");
6976    }
6977
6978    if ( $embedded_tab_count > 0 ) {
6979        my $first = ( $embedded_tab_count > 1 ) ? "First" : "";
6980        my $what =
6981          ( $embedded_tab_count > 1 )
6982          ? "quotes or patterns"
6983          : "quote or pattern";
6984        write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
6985        write_logfile_entry(
6986"This means the display of this script could vary with device or software\n"
6987        );
6988        write_logfile_entry("  $first at input line $first_embedded_tab_at\n");
6989
6990        if ( $embedded_tab_count > 1 ) {
6991            write_logfile_entry(
6992                "   Last at input line $last_embedded_tab_at\n");
6993        }
6994        write_logfile_entry("\n");
6995    }
6996
6997    if ($first_tabbing_disagreement) {
6998        write_logfile_entry(
6999"First indentation disagreement seen at input line $first_tabbing_disagreement\n"
7000        );
7001    }
7002
7003    if ($in_tabbing_disagreement) {
7004        write_logfile_entry(
7005"Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n"
7006        );
7007    }
7008    else {
7009
7010        if ($last_tabbing_disagreement) {
7011
7012            write_logfile_entry(
7013"Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
7014            );
7015        }
7016        else {
7017            write_logfile_entry("No indentation disagreement seen\n");
7018        }
7019    }
7020    write_logfile_entry("\n");
7021
7022    $vertical_aligner_object->report_anything_unusual();
7023
7024    $file_writer_object->report_line_length_errors();
7025}
7026
7027sub check_options {
7028
7029    # This routine is called to check the Opts hash after it is defined
7030
7031    ($rOpts) = @_;
7032    my ( $tabbing_string, $tab_msg );
7033
7034    make_static_block_comment_pattern();
7035    make_static_side_comment_pattern();
7036    make_closing_side_comment_prefix();
7037    make_closing_side_comment_list_pattern();
7038    $format_skipping_pattern_begin =
7039      make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );
7040    $format_skipping_pattern_end =
7041      make_format_skipping_pattern( 'format-skipping-end', '#>>>' );
7042
7043    # If closing side comments ARE selected, then we can safely
7044    # delete old closing side comments unless closing side comment
7045    # warnings are requested.  This is a good idea because it will
7046    # eliminate any old csc's which fall below the line count threshold.
7047    # We cannot do this if warnings are turned on, though, because we
7048    # might delete some text which has been added.  So that must
7049    # be handled when comments are created.
7050    if ( $rOpts->{'closing-side-comments'} ) {
7051        if ( !$rOpts->{'closing-side-comment-warnings'} ) {
7052            $rOpts->{'delete-closing-side-comments'} = 1;
7053        }
7054    }
7055
7056    # If closing side comments ARE NOT selected, but warnings ARE
7057    # selected and we ARE DELETING csc's, then we will pretend to be
7058    # adding with a huge interval.  This will force the comments to be
7059    # generated for comparison with the old comments, but not added.
7060    elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
7061        if ( $rOpts->{'delete-closing-side-comments'} ) {
7062            $rOpts->{'delete-closing-side-comments'}  = 0;
7063            $rOpts->{'closing-side-comments'}         = 1;
7064            $rOpts->{'closing-side-comment-interval'} = 100000000;
7065        }
7066    }
7067
7068    make_bli_pattern();
7069    make_block_brace_vertical_tightness_pattern();
7070
7071    if ( $rOpts->{'line-up-parentheses'} ) {
7072
7073        if (   $rOpts->{'indent-only'}
7074            || !$rOpts->{'add-newlines'}
7075            || !$rOpts->{'delete-old-newlines'} )
7076        {
7077            warn <<EOM;
7078-----------------------------------------------------------------------
7079Conflict: -lp  conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
7080
7081The -lp indentation logic requires that perltidy be able to coordinate
7082arbitrarily large numbers of line breakpoints.  This isn't possible
7083with these flags. Sometimes an acceptable workaround is to use -wocb=3
7084-----------------------------------------------------------------------
7085EOM
7086            $rOpts->{'line-up-parentheses'} = 0;
7087        }
7088    }
7089
7090    # At present, tabs are not compatable with the line-up-parentheses style
7091    # (it would be possible to entab the total leading whitespace
7092    # just prior to writing the line, if desired).
7093    if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
7094        warn <<EOM;
7095Conflict: -t (tabs) cannot be used with the -lp  option; ignoring -t; see -et.
7096EOM
7097        $rOpts->{'tabs'} = 0;
7098    }
7099
7100    # Likewise, tabs are not compatable with outdenting..
7101    if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
7102        warn <<EOM;
7103Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
7104EOM
7105        $rOpts->{'tabs'} = 0;
7106    }
7107
7108    if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
7109        warn <<EOM;
7110Conflict: -t (tabs) cannot be used with the -ola  option; ignoring -t; see -et.
7111EOM
7112        $rOpts->{'tabs'} = 0;
7113    }
7114
7115    if ( !$rOpts->{'space-for-semicolon'} ) {
7116        $want_left_space{'f'} = -1;
7117    }
7118
7119    if ( $rOpts->{'space-terminal-semicolon'} ) {
7120        $want_left_space{';'} = 1;
7121    }
7122
7123    # implement outdenting preferences for keywords
7124    %outdent_keyword = ();
7125    unless ( @_ = split_words( $rOpts->{'outdent-keyword-okl'} ) ) {
7126        @_ = qw(next last redo goto return);    # defaults
7127    }
7128
7129    # FUTURE: if not a keyword, assume that it is an identifier
7130    foreach (@_) {
7131        if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) {
7132            $outdent_keyword{$_} = 1;
7133        }
7134        else {
7135            warn "ignoring '$_' in -okwl list; not a perl keyword";
7136        }
7137    }
7138
7139    # implement user whitespace preferences
7140    if ( @_ = split_words( $rOpts->{'want-left-space'} ) ) {
7141        @want_left_space{@_} = (1) x scalar(@_);
7142    }
7143
7144    if ( @_ = split_words( $rOpts->{'want-right-space'} ) ) {
7145        @want_right_space{@_} = (1) x scalar(@_);
7146    }
7147
7148    if ( @_ = split_words( $rOpts->{'nowant-left-space'} ) ) {
7149        @want_left_space{@_} = (-1) x scalar(@_);
7150    }
7151
7152    if ( @_ = split_words( $rOpts->{'nowant-right-space'} ) ) {
7153        @want_right_space{@_} = (-1) x scalar(@_);
7154    }
7155    if ( $rOpts->{'dump-want-left-space'} ) {
7156        dump_want_left_space(*STDOUT);
7157        exit 1;
7158    }
7159
7160    if ( $rOpts->{'dump-want-right-space'} ) {
7161        dump_want_right_space(*STDOUT);
7162        exit 1;
7163    }
7164
7165    # default keywords for which space is introduced before an opening paren
7166    # (at present, including them messes up vertical alignment)
7167    @_ = qw(my local our and or err eq ne if else elsif until
7168      unless while for foreach return switch case given when);
7169    @space_after_keyword{@_} = (1) x scalar(@_);
7170
7171    # allow user to modify these defaults
7172    if ( @_ = split_words( $rOpts->{'space-after-keyword'} ) ) {
7173        @space_after_keyword{@_} = (1) x scalar(@_);
7174    }
7175
7176    if ( @_ = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
7177        @space_after_keyword{@_} = (0) x scalar(@_);
7178    }
7179
7180    # implement user break preferences
7181    my @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | &
7182      = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
7183      . : ? && || and or err xor
7184    );
7185
7186    my $break_after = sub {
7187        foreach my $tok (@_) {
7188            if ( $tok eq '?' ) { $tok = ':' }    # patch to coordinate ?/:
7189            my $lbs = $left_bond_strength{$tok};
7190            my $rbs = $right_bond_strength{$tok};
7191            if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
7192                ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
7193                  ( $lbs, $rbs );
7194            }
7195        }
7196    };
7197
7198    my $break_before = sub {
7199        foreach my $tok (@_) {
7200            my $lbs = $left_bond_strength{$tok};
7201            my $rbs = $right_bond_strength{$tok};
7202            if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
7203                ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
7204                  ( $lbs, $rbs );
7205            }
7206        }
7207    };
7208
7209    $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
7210    $break_before->(@all_operators)
7211      if ( $rOpts->{'break-before-all-operators'} );
7212
7213    $break_after->( split_words( $rOpts->{'want-break-after'} ) );
7214    $break_before->( split_words( $rOpts->{'want-break-before'} ) );
7215
7216    # make note if breaks are before certain key types
7217    %want_break_before = ();
7218    foreach my $tok ( @all_operators, ',' ) {
7219        $want_break_before{$tok} =
7220          $left_bond_strength{$tok} < $right_bond_strength{$tok};
7221    }
7222
7223    # Coordinate ?/: breaks, which must be similar
7224    if ( !$want_break_before{':'} ) {
7225        $want_break_before{'?'}   = $want_break_before{':'};
7226        $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
7227        $left_bond_strength{'?'}  = NO_BREAK;
7228    }
7229
7230    # Define here tokens which may follow the closing brace of a do statement
7231    # on the same line, as in:
7232    #   } while ( $something);
7233    @_ = qw(until while unless if ; : );
7234    push @_, ',';
7235    @is_do_follower{@_} = (1) x scalar(@_);
7236
7237    # These tokens may follow the closing brace of an if or elsif block.
7238    # In other words, for cuddled else we want code to look like:
7239    #   } elsif ( $something) {
7240    #   } else {
7241    if ( $rOpts->{'cuddled-else'} ) {
7242        @_ = qw(else elsif);
7243        @is_if_brace_follower{@_} = (1) x scalar(@_);
7244    }
7245    else {
7246        %is_if_brace_follower = ();
7247    }
7248
7249    # nothing can follow the closing curly of an else { } block:
7250    %is_else_brace_follower = ();
7251
7252    # what can follow a multi-line anonymous sub definition closing curly:
7253    @_ = qw# ; : => or and  && || ~~ !~~ ) #;
7254    push @_, ',';
7255    @is_anon_sub_brace_follower{@_} = (1) x scalar(@_);
7256
7257    # what can follow a one-line anonynomous sub closing curly:
7258    # one-line anonumous subs also have ']' here...
7259    # see tk3.t and PP.pm
7260    @_ = qw#  ; : => or and  && || ) ] ~~ !~~ #;
7261    push @_, ',';
7262    @is_anon_sub_1_brace_follower{@_} = (1) x scalar(@_);
7263
7264    # What can follow a closing curly of a block
7265    # which is not an if/elsif/else/do/sort/map/grep/eval/sub
7266    # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
7267    @_ = qw#  ; : => or and  && || ) #;
7268    push @_, ',';
7269
7270    # allow cuddled continue if cuddled else is specified
7271    if ( $rOpts->{'cuddled-else'} ) { push @_, 'continue'; }
7272
7273    @is_other_brace_follower{@_} = (1) x scalar(@_);
7274
7275    $right_bond_strength{'{'} = WEAK;
7276    $left_bond_strength{'{'}  = VERY_STRONG;
7277
7278    # make -l=0  equal to -l=infinite
7279    if ( !$rOpts->{'maximum-line-length'} ) {
7280        $rOpts->{'maximum-line-length'} = 1000000;
7281    }
7282
7283    # make -lbl=0  equal to -lbl=infinite
7284    if ( !$rOpts->{'long-block-line-count'} ) {
7285        $rOpts->{'long-block-line-count'} = 1000000;
7286    }
7287
7288    my $ole = $rOpts->{'output-line-ending'};
7289    if ($ole) {
7290        my %endings = (
7291            dos  => "\015\012",
7292            win  => "\015\012",
7293            mac  => "\015",
7294            unix => "\012",
7295        );
7296        $ole = lc $ole;
7297        unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
7298            my $str = join " ", keys %endings;
7299            die <<EOM;
7300Unrecognized line ending '$ole'; expecting one of: $str
7301EOM
7302        }
7303        if ( $rOpts->{'preserve-line-endings'} ) {
7304            warn "Ignoring -ple; conflicts with -ole\n";
7305            $rOpts->{'preserve-line-endings'} = undef;
7306        }
7307    }
7308
7309    # hashes used to simplify setting whitespace
7310    %tightness = (
7311        '{' => $rOpts->{'brace-tightness'},
7312        '}' => $rOpts->{'brace-tightness'},
7313        '(' => $rOpts->{'paren-tightness'},
7314        ')' => $rOpts->{'paren-tightness'},
7315        '[' => $rOpts->{'square-bracket-tightness'},
7316        ']' => $rOpts->{'square-bracket-tightness'},
7317    );
7318    %matching_token = (
7319        '{' => '}',
7320        '(' => ')',
7321        '[' => ']',
7322        '?' => ':',
7323    );
7324
7325    # frequently used parameters
7326    $rOpts_add_newlines          = $rOpts->{'add-newlines'};
7327    $rOpts_add_whitespace        = $rOpts->{'add-whitespace'};
7328    $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
7329    $rOpts_block_brace_vertical_tightness =
7330      $rOpts->{'block-brace-vertical-tightness'};
7331    $rOpts_brace_left_and_indent   = $rOpts->{'brace-left-and-indent'};
7332    $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
7333    $rOpts_break_at_old_ternary_breakpoints =
7334      $rOpts->{'break-at-old-ternary-breakpoints'};
7335    $rOpts_break_at_old_comma_breakpoints =
7336      $rOpts->{'break-at-old-comma-breakpoints'};
7337    $rOpts_break_at_old_keyword_breakpoints =
7338      $rOpts->{'break-at-old-keyword-breakpoints'};
7339    $rOpts_break_at_old_logical_breakpoints =
7340      $rOpts->{'break-at-old-logical-breakpoints'};
7341    $rOpts_closing_side_comment_else_flag =
7342      $rOpts->{'closing-side-comment-else-flag'};
7343    $rOpts_closing_side_comment_maximum_text =
7344      $rOpts->{'closing-side-comment-maximum-text'};
7345    $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
7346    $rOpts_cuddled_else             = $rOpts->{'cuddled-else'};
7347    $rOpts_delete_old_whitespace    = $rOpts->{'delete-old-whitespace'};
7348    $rOpts_fuzzy_line_length        = $rOpts->{'fuzzy-line-length'};
7349    $rOpts_indent_columns           = $rOpts->{'indent-columns'};
7350    $rOpts_line_up_parentheses      = $rOpts->{'line-up-parentheses'};
7351    $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
7352    $rOpts_maximum_line_length      = $rOpts->{'maximum-line-length'};
7353    $rOpts_short_concatenation_item_length =
7354      $rOpts->{'short-concatenation-item-length'};
7355    $rOpts_keep_old_blank_lines     = $rOpts->{'keep-old-blank-lines'};
7356    $rOpts_ignore_old_breakpoints   = $rOpts->{'ignore-old-breakpoints'};
7357    $rOpts_format_skipping          = $rOpts->{'format-skipping'};
7358    $rOpts_space_function_paren     = $rOpts->{'space-function-paren'};
7359    $rOpts_space_keyword_paren      = $rOpts->{'space-keyword-paren'};
7360    $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};
7361    $half_maximum_line_length       = $rOpts_maximum_line_length / 2;
7362
7363    # Note that both opening and closing tokens can access the opening
7364    # and closing flags of their container types.
7365    %opening_vertical_tightness = (
7366        '(' => $rOpts->{'paren-vertical-tightness'},
7367        '{' => $rOpts->{'brace-vertical-tightness'},
7368        '[' => $rOpts->{'square-bracket-vertical-tightness'},
7369        ')' => $rOpts->{'paren-vertical-tightness'},
7370        '}' => $rOpts->{'brace-vertical-tightness'},
7371        ']' => $rOpts->{'square-bracket-vertical-tightness'},
7372    );
7373
7374    %closing_vertical_tightness = (
7375        '(' => $rOpts->{'paren-vertical-tightness-closing'},
7376        '{' => $rOpts->{'brace-vertical-tightness-closing'},
7377        '[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
7378        ')' => $rOpts->{'paren-vertical-tightness-closing'},
7379        '}' => $rOpts->{'brace-vertical-tightness-closing'},
7380        ']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
7381    );
7382
7383    # assume flag for '>' same as ')' for closing qw quotes
7384    %closing_token_indentation = (
7385        ')' => $rOpts->{'closing-paren-indentation'},
7386        '}' => $rOpts->{'closing-brace-indentation'},
7387        ']' => $rOpts->{'closing-square-bracket-indentation'},
7388        '>' => $rOpts->{'closing-paren-indentation'},
7389    );
7390
7391    %opening_token_right = (
7392        '(' => $rOpts->{'opening-paren-right'},
7393        '{' => $rOpts->{'opening-hash-brace-right'},
7394        '[' => $rOpts->{'opening-square-bracket-right'},
7395    );
7396
7397    %stack_opening_token = (
7398        '(' => $rOpts->{'stack-opening-paren'},
7399        '{' => $rOpts->{'stack-opening-hash-brace'},
7400        '[' => $rOpts->{'stack-opening-square-bracket'},
7401    );
7402
7403    %stack_closing_token = (
7404        ')' => $rOpts->{'stack-closing-paren'},
7405        '}' => $rOpts->{'stack-closing-hash-brace'},
7406        ']' => $rOpts->{'stack-closing-square-bracket'},
7407    );
7408}
7409
7410sub make_static_block_comment_pattern {
7411
7412    # create the pattern used to identify static block comments
7413    $static_block_comment_pattern = '^\s*##';
7414
7415    # allow the user to change it
7416    if ( $rOpts->{'static-block-comment-prefix'} ) {
7417        my $prefix = $rOpts->{'static-block-comment-prefix'};
7418        $prefix =~ s/^\s*//;
7419        my $pattern = $prefix;
7420
7421        # user may give leading caret to force matching left comments only
7422        if ( $prefix !~ /^\^#/ ) {
7423            if ( $prefix !~ /^#/ ) {
7424                die
7425"ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n";
7426            }
7427            $pattern = '^\s*' . $prefix;
7428        }
7429        eval "'##'=~/$pattern/";
7430        if ($@) {
7431            die
7432"ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n";
7433        }
7434        $static_block_comment_pattern = $pattern;
7435    }
7436}
7437
7438sub make_format_skipping_pattern {
7439    my ( $opt_name, $default ) = @_;
7440    my $param = $rOpts->{$opt_name};
7441    unless ($param) { $param = $default }
7442    $param =~ s/^\s*//;
7443    if ( $param !~ /^#/ ) {
7444        die "ERROR: the $opt_name parameter '$param' must begin with '#'\n";
7445    }
7446    my $pattern = '^' . $param . '\s';
7447    eval "'#'=~/$pattern/";
7448    if ($@) {
7449        die
7450"ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n";
7451    }
7452    return $pattern;
7453}
7454
7455sub make_closing_side_comment_list_pattern {
7456
7457    # turn any input list into a regex for recognizing selected block types
7458    $closing_side_comment_list_pattern = '^\w+';
7459    if ( defined( $rOpts->{'closing-side-comment-list'} )
7460        && $rOpts->{'closing-side-comment-list'} )
7461    {
7462        $closing_side_comment_list_pattern =
7463          make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
7464    }
7465}
7466
7467sub make_bli_pattern {
7468
7469    if ( defined( $rOpts->{'brace-left-and-indent-list'} )
7470        && $rOpts->{'brace-left-and-indent-list'} )
7471    {
7472        $bli_list_string = $rOpts->{'brace-left-and-indent-list'};
7473    }
7474
7475    $bli_pattern = make_block_pattern( '-blil', $bli_list_string );
7476}
7477
7478sub make_block_brace_vertical_tightness_pattern {
7479
7480    # turn any input list into a regex for recognizing selected block types
7481    $block_brace_vertical_tightness_pattern =
7482      '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
7483
7484    if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
7485        && $rOpts->{'block-brace-vertical-tightness-list'} )
7486    {
7487        $block_brace_vertical_tightness_pattern =
7488          make_block_pattern( '-bbvtl',
7489            $rOpts->{'block-brace-vertical-tightness-list'} );
7490    }
7491}
7492
7493sub make_block_pattern {
7494
7495    #  given a string of block-type keywords, return a regex to match them
7496    #  The only tricky part is that labels are indicated with a single ':'
7497    #  and the 'sub' token text may have additional text after it (name of
7498    #  sub).
7499    #
7500    #  Example:
7501    #
7502    #   input string: "if else elsif unless while for foreach do : sub";
7503    #   pattern:  '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
7504
7505    my ( $abbrev, $string ) = @_;
7506    my @list  = split_words($string);
7507    my @words = ();
7508    my %seen;
7509    for my $i (@list) {
7510        next if $seen{$i};
7511        $seen{$i} = 1;
7512        if ( $i eq 'sub' ) {
7513        }
7514        elsif ( $i eq ':' ) {
7515            push @words, '\w+:';
7516        }
7517        elsif ( $i =~ /^\w/ ) {
7518            push @words, $i;
7519        }
7520        else {
7521            warn "unrecognized block type $i after $abbrev, ignoring\n";
7522        }
7523    }
7524    my $pattern = '(' . join( '|', @words ) . ')$';
7525    if ( $seen{'sub'} ) {
7526        $pattern = '(' . $pattern . '|sub)';
7527    }
7528    $pattern = '^' . $pattern;
7529    return $pattern;
7530}
7531
7532sub make_static_side_comment_pattern {
7533
7534    # create the pattern used to identify static side comments
7535    $static_side_comment_pattern = '^##';
7536
7537    # allow the user to change it
7538    if ( $rOpts->{'static-side-comment-prefix'} ) {
7539        my $prefix = $rOpts->{'static-side-comment-prefix'};
7540        $prefix =~ s/^\s*//;
7541        my $pattern = '^' . $prefix;
7542        eval "'##'=~/$pattern/";
7543        if ($@) {
7544            die
7545"ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n";
7546        }
7547        $static_side_comment_pattern = $pattern;
7548    }
7549}
7550
7551sub make_closing_side_comment_prefix {
7552
7553    # Be sure we have a valid closing side comment prefix
7554    my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
7555    my $csc_prefix_pattern;
7556    if ( !defined($csc_prefix) ) {
7557        $csc_prefix         = '## end';
7558        $csc_prefix_pattern = '^##\s+end';
7559    }
7560    else {
7561        my $test_csc_prefix = $csc_prefix;
7562        if ( $test_csc_prefix !~ /^#/ ) {
7563            $test_csc_prefix = '#' . $test_csc_prefix;
7564        }
7565
7566        # make a regex to recognize the prefix
7567        my $test_csc_prefix_pattern = $test_csc_prefix;
7568
7569        # escape any special characters
7570        $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
7571
7572        $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
7573
7574        # allow exact number of intermediate spaces to vary
7575        $test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
7576
7577        # make sure we have a good pattern
7578        # if we fail this we probably have an error in escaping
7579        # characters.
7580        eval "'##'=~/$test_csc_prefix_pattern/";
7581        if ($@) {
7582
7583            # shouldn't happen..must have screwed up escaping, above
7584            report_definite_bug();
7585            warn
7586"Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n";
7587
7588            # just warn and keep going with defaults
7589            warn "Please consider using a simpler -cscp prefix\n";
7590            warn "Using default -cscp instead; please check output\n";
7591        }
7592        else {
7593            $csc_prefix         = $test_csc_prefix;
7594            $csc_prefix_pattern = $test_csc_prefix_pattern;
7595        }
7596    }
7597    $rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
7598    $closing_side_comment_prefix_pattern = $csc_prefix_pattern;
7599}
7600
7601sub dump_want_left_space {
7602    my $fh = shift;
7603    local $" = "\n";
7604    print $fh <<EOM;
7605These values are the main control of whitespace to the left of a token type;
7606They may be altered with the -wls parameter.
7607For a list of token types, use perltidy --dump-token-types (-dtt)
7608 1 means the token wants a space to its left
7609-1 means the token does not want a space to its left
7610------------------------------------------------------------------------
7611EOM
7612    foreach ( sort keys %want_left_space ) {
7613        print $fh "$_\t$want_left_space{$_}\n";
7614    }
7615}
7616
7617sub dump_want_right_space {
7618    my $fh = shift;
7619    local $" = "\n";
7620    print $fh <<EOM;
7621These values are the main control of whitespace to the right of a token type;
7622They may be altered with the -wrs parameter.
7623For a list of token types, use perltidy --dump-token-types (-dtt)
7624 1 means the token wants a space to its right
7625-1 means the token does not want a space to its right
7626------------------------------------------------------------------------
7627EOM
7628    foreach ( sort keys %want_right_space ) {
7629        print $fh "$_\t$want_right_space{$_}\n";
7630    }
7631}
7632
7633{    # begin is_essential_whitespace
7634
7635    my %is_sort_grep_map;
7636    my %is_for_foreach;
7637
7638    BEGIN {
7639
7640        @_ = qw(sort grep map);
7641        @is_sort_grep_map{@_} = (1) x scalar(@_);
7642
7643        @_ = qw(for foreach);
7644        @is_for_foreach{@_} = (1) x scalar(@_);
7645
7646    }
7647
7648    sub is_essential_whitespace {
7649
7650        # Essential whitespace means whitespace which cannot be safely deleted
7651        # without risking the introduction of a syntax error.
7652        # We are given three tokens and their types:
7653        # ($tokenl, $typel) is the token to the left of the space in question
7654        # ($tokenr, $typer) is the token to the right of the space in question
7655        # ($tokenll, $typell) is previous nonblank token to the left of $tokenl
7656        #
7657        # This is a slow routine but is not needed too often except when -mangle
7658        # is used.
7659        #
7660        # Note: This routine should almost never need to be changed.  It is
7661        # for avoiding syntax problems rather than for formatting.
7662        my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
7663
7664        my $result =
7665
7666          # never combine two bare words or numbers
7667          # examples:  and ::ok(1)
7668          #            return ::spw(...)
7669          #            for bla::bla:: abc
7670          # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
7671          #            $input eq"quit" to make $inputeq"quit"
7672          #            my $size=-s::SINK if $file;  <==OK but we won't do it
7673          # don't join something like: for bla::bla:: abc
7674          # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
7675          ( ( $tokenl =~ /([\'\w]|\:\:)$/ ) && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
7676
7677          # do not combine a number with a concatination dot
7678          # example: pom.caputo:
7679          # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
7680          || ( ( $typel eq 'n' ) && ( $tokenr eq '.' ) )
7681          || ( ( $typer eq 'n' ) && ( $tokenl eq '.' ) )
7682
7683          # do not join a minus with a bare word, because you might form
7684          # a file test operator.  Example from Complex.pm:
7685          # if (CORE::abs($z - i) < $eps); "z-i" would be taken as a file test.
7686          || ( ( $tokenl eq '-' ) && ( $tokenr =~ /^[_A-Za-z]$/ ) )
7687
7688          # and something like this could become ambiguous without space
7689          # after the '-':
7690          #   use constant III=>1;
7691          #   $a = $b - III;
7692          # and even this:
7693          #   $a = - III;
7694          || ( ( $tokenl eq '-' )
7695            && ( $typer =~ /^[wC]$/ && $tokenr =~ /^[_A-Za-z]/ ) )
7696
7697          # '= -' should not become =- or you will get a warning
7698          # about reversed -=
7699          # || ($tokenr eq '-')
7700
7701          # keep a space between a quote and a bareword to prevent the
7702          # bareword from becomming a quote modifier.
7703          || ( ( $typel eq 'Q' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
7704
7705          # keep a space between a token ending in '$' and any word;
7706          # this caused trouble:  "die @$ if $@"
7707          || ( ( $typel eq 'i' && $tokenl =~ /\$$/ )
7708            && ( $tokenr =~ /^[a-zA-Z_]/ ) )
7709
7710          # perl is very fussy about spaces before <<
7711          || ( $tokenr =~ /^\<\</ )
7712
7713          # avoid combining tokens to create new meanings. Example:
7714          #     $a+ +$b must not become $a++$b
7715          || ( $is_digraph{ $tokenl . $tokenr } )
7716          || ( $is_trigraph{ $tokenl . $tokenr } )
7717
7718          # another example: do not combine these two &'s:
7719          #     allow_options & &OPT_EXECCGI
7720          || ( $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) } )
7721
7722          # don't combine $$ or $# with any alphanumeric
7723          # (testfile mangle.t with --mangle)
7724          || ( ( $tokenl =~ /^\$[\$\#]$/ ) && ( $tokenr =~ /^\w/ ) )
7725
7726          # retain any space after possible filehandle
7727          # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
7728          || ( $typel eq 'Z' )
7729
7730          # Perl is sensitive to whitespace after the + here:
7731          #  $b = xvals $a + 0.1 * yvals $a;
7732          || ( $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/ )
7733
7734          # keep paren separate in 'use Foo::Bar ()'
7735          || ( $tokenr eq '('
7736            && $typel   eq 'w'
7737            && $typell  eq 'k'
7738            && $tokenll eq 'use' )
7739
7740          # keep any space between filehandle and paren:
7741          # file mangle.t with --mangle:
7742          || ( $typel eq 'Y' && $tokenr eq '(' )
7743
7744          # retain any space after here doc operator ( hereerr.t)
7745          || ( $typel eq 'h' )
7746
7747          # be careful with a space around ++ and --, to avoid ambiguity as to
7748          # which token it applies
7749          || ( ( $typer =~ /^(pp|mm)$/ )     && ( $tokenl !~ /^[\;\{\(\[]/ ) )
7750          || ( ( $typel =~ /^(\+\+|\-\-)$/ ) && ( $tokenr !~ /^[\;\}\)\]]/ ) )
7751
7752          # need space after foreach my; for example, this will fail in
7753          # older versions of Perl:
7754          # foreach my$ft(@filetypes)...
7755          || (
7756            $tokenl eq 'my'
7757
7758            #  /^(for|foreach)$/
7759            && $is_for_foreach{$tokenll}
7760            && $tokenr =~ /^\$/
7761          )
7762
7763          # must have space between grep and left paren; "grep(" will fail
7764          || ( $tokenr eq '(' && $is_sort_grep_map{$tokenl} )
7765
7766          # don't stick numbers next to left parens, as in:
7767          #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
7768          || ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) )
7769
7770          # We must be sure that a space between a ? and a quoted string
7771          # remains if the space before the ? remains.  [Loca.pm, lockarea]
7772          # ie,
7773          #    $b=join $comma ? ',' : ':', @_;  # ok
7774          #    $b=join $comma?',' : ':', @_;    # ok!
7775          #    $b=join $comma ?',' : ':', @_;   # error!
7776          # Not really required:
7777          ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) )
7778
7779          # do not remove space between an '&' and a bare word because
7780          # it may turn into a function evaluation, like here
7781          # between '&' and 'O_ACCMODE', producing a syntax error [File.pm]
7782          #    $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
7783          || ( ( $typel eq '&' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
7784
7785          ;    # the value of this long logic sequence is the result we want
7786        return $result;
7787    }
7788}
7789
7790sub set_white_space_flag {
7791
7792    #    This routine examines each pair of nonblank tokens and
7793    #    sets values for array @white_space_flag.
7794    #
7795    #    $white_space_flag[$j] is a flag indicating whether a white space
7796    #    BEFORE token $j is needed, with the following values:
7797    #
7798    #            -1 do not want a space before token $j
7799    #             0 optional space or $j is a whitespace
7800    #             1 want a space before token $j
7801    #
7802    #
7803    #   The values for the first token will be defined based
7804    #   upon the contents of the "to_go" output array.
7805    #
7806    #   Note: retain debug print statements because they are usually
7807    #   required after adding new token types.
7808
7809    BEGIN {
7810
7811        # initialize these global hashes, which control the use of
7812        # whitespace around tokens:
7813        #
7814        # %binary_ws_rules
7815        # %want_left_space
7816        # %want_right_space
7817        # %space_after_keyword
7818        #
7819        # Many token types are identical to the tokens themselves.
7820        # See the tokenizer for a complete list. Here are some special types:
7821        #   k = perl keyword
7822        #   f = semicolon in for statement
7823        #   m = unary minus
7824        #   p = unary plus
7825        # Note that :: is excluded since it should be contained in an identifier
7826        # Note that '->' is excluded because it never gets space
7827        # parentheses and brackets are excluded since they are handled specially
7828        # curly braces are included but may be overridden by logic, such as
7829        # newline logic.
7830
7831        # NEW_TOKENS: create a whitespace rule here.  This can be as
7832        # simple as adding your new letter to @spaces_both_sides, for
7833        # example.
7834
7835        @_ = qw" L { ( [ ";
7836        @is_opening_type{@_} = (1) x scalar(@_);
7837
7838        @_ = qw" R } ) ] ";
7839        @is_closing_type{@_} = (1) x scalar(@_);
7840
7841        my @spaces_both_sides = qw"
7842          + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
7843          .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
7844          &&= ||= //= <=> A k f w F n C Y U G v
7845          ";
7846
7847        my @spaces_left_side = qw"
7848          t ! ~ m p { \ h pp mm Z j
7849          ";
7850        push( @spaces_left_side, '#' );    # avoids warning message
7851
7852        my @spaces_right_side = qw"
7853          ; } ) ] R J ++ -- **=
7854          ";
7855        push( @spaces_right_side, ',' );    # avoids warning message
7856        @want_left_space{@spaces_both_sides} = (1) x scalar(@spaces_both_sides);
7857        @want_right_space{@spaces_both_sides} =
7858          (1) x scalar(@spaces_both_sides);
7859        @want_left_space{@spaces_left_side}  = (1) x scalar(@spaces_left_side);
7860        @want_right_space{@spaces_left_side} = (-1) x scalar(@spaces_left_side);
7861        @want_left_space{@spaces_right_side} =
7862          (-1) x scalar(@spaces_right_side);
7863        @want_right_space{@spaces_right_side} =
7864          (1) x scalar(@spaces_right_side);
7865        $want_left_space{'L'}   = WS_NO;
7866        $want_left_space{'->'}  = WS_NO;
7867        $want_right_space{'->'} = WS_NO;
7868        $want_left_space{'**'}  = WS_NO;
7869        $want_right_space{'**'} = WS_NO;
7870
7871        # hash type information must stay tightly bound
7872        # as in :  ${xxxx}
7873        $binary_ws_rules{'i'}{'L'} = WS_NO;
7874        $binary_ws_rules{'i'}{'{'} = WS_YES;
7875        $binary_ws_rules{'k'}{'{'} = WS_YES;
7876        $binary_ws_rules{'U'}{'{'} = WS_YES;
7877        $binary_ws_rules{'i'}{'['} = WS_NO;
7878        $binary_ws_rules{'R'}{'L'} = WS_NO;
7879        $binary_ws_rules{'R'}{'{'} = WS_NO;
7880        $binary_ws_rules{'t'}{'L'} = WS_NO;
7881        $binary_ws_rules{'t'}{'{'} = WS_NO;
7882        $binary_ws_rules{'}'}{'L'} = WS_NO;
7883        $binary_ws_rules{'}'}{'{'} = WS_NO;
7884        $binary_ws_rules{'$'}{'L'} = WS_NO;
7885        $binary_ws_rules{'$'}{'{'} = WS_NO;
7886        $binary_ws_rules{'@'}{'L'} = WS_NO;
7887        $binary_ws_rules{'@'}{'{'} = WS_NO;
7888        $binary_ws_rules{'='}{'L'} = WS_YES;
7889
7890        # the following includes ') {'
7891        # as in :    if ( xxx ) { yyy }
7892        $binary_ws_rules{']'}{'L'} = WS_NO;
7893        $binary_ws_rules{']'}{'{'} = WS_NO;
7894        $binary_ws_rules{')'}{'{'} = WS_YES;
7895        $binary_ws_rules{')'}{'['} = WS_NO;
7896        $binary_ws_rules{']'}{'['} = WS_NO;
7897        $binary_ws_rules{']'}{'{'} = WS_NO;
7898        $binary_ws_rules{'}'}{'['} = WS_NO;
7899        $binary_ws_rules{'R'}{'['} = WS_NO;
7900
7901        $binary_ws_rules{']'}{'++'} = WS_NO;
7902        $binary_ws_rules{']'}{'--'} = WS_NO;
7903        $binary_ws_rules{')'}{'++'} = WS_NO;
7904        $binary_ws_rules{')'}{'--'} = WS_NO;
7905
7906        $binary_ws_rules{'R'}{'++'} = WS_NO;
7907        $binary_ws_rules{'R'}{'--'} = WS_NO;
7908
7909        ########################################################
7910        # should no longer be necessary (see niek.pl)
7911        ##$binary_ws_rules{'k'}{':'} = WS_NO;     # keep colon with label
7912        ##$binary_ws_rules{'w'}{':'} = WS_NO;
7913        ########################################################
7914        $binary_ws_rules{'i'}{'Q'} = WS_YES;
7915        $binary_ws_rules{'n'}{'('} = WS_YES;    # occurs in 'use package n ()'
7916
7917        # FIXME: we need to split 'i' into variables and functions
7918        # and have no space for functions but space for variables.  For now,
7919        # I have a special patch in the special rules below
7920        $binary_ws_rules{'i'}{'('} = WS_NO;
7921
7922        $binary_ws_rules{'w'}{'('} = WS_NO;
7923        $binary_ws_rules{'w'}{'{'} = WS_YES;
7924    }
7925    my ( $jmax, $rtokens, $rtoken_type, $rblock_type ) = @_;
7926    my ( $last_token, $last_type, $last_block_type, $token, $type,
7927        $block_type );
7928    my (@white_space_flag);
7929    my $j_tight_closing_paren = -1;
7930
7931    if ( $max_index_to_go >= 0 ) {
7932        $token      = $tokens_to_go[$max_index_to_go];
7933        $type       = $types_to_go[$max_index_to_go];
7934        $block_type = $block_type_to_go[$max_index_to_go];
7935    }
7936    else {
7937        $token      = ' ';
7938        $type       = 'b';
7939        $block_type = '';
7940    }
7941
7942    # loop over all tokens
7943    my ( $j, $ws );
7944
7945    for ( $j = 0 ; $j <= $jmax ; $j++ ) {
7946
7947        if ( $$rtoken_type[$j] eq 'b' ) {
7948            $white_space_flag[$j] = WS_OPTIONAL;
7949            next;
7950        }
7951
7952        # set a default value, to be changed as needed
7953        $ws              = undef;
7954        $last_token      = $token;
7955        $last_type       = $type;
7956        $last_block_type = $block_type;
7957        $token           = $$rtokens[$j];
7958        $type            = $$rtoken_type[$j];
7959        $block_type      = $$rblock_type[$j];
7960
7961        #---------------------------------------------------------------
7962        # section 1:
7963        # handle space on the inside of opening braces
7964        #---------------------------------------------------------------
7965
7966        #    /^[L\{\(\[]$/
7967        if ( $is_opening_type{$last_type} ) {
7968
7969            $j_tight_closing_paren = -1;
7970
7971            # let's keep empty matched braces together: () {} []
7972            # except for BLOCKS
7973            if ( $token eq $matching_token{$last_token} ) {
7974                if ($block_type) {
7975                    $ws = WS_YES;
7976                }
7977                else {
7978                    $ws = WS_NO;
7979                }
7980            }
7981            else {
7982
7983                # we're considering the right of an opening brace
7984                # tightness = 0 means always pad inside with space
7985                # tightness = 1 means pad inside if "complex"
7986                # tightness = 2 means never pad inside with space
7987
7988                my $tightness;
7989                if (   $last_type eq '{'
7990                    && $last_token eq '{'
7991                    && $last_block_type )
7992                {
7993                    $tightness = $rOpts_block_brace_tightness;
7994                }
7995                else { $tightness = $tightness{$last_token} }
7996
7997    #=================================================================
7998    # Patch for fabrice_bug.pl
7999    # We must always avoid spaces around a bare word beginning with ^ as in:
8000    #    my $before = ${^PREMATCH};
8001    # Because all of the following cause an error in perl:
8002    #    my $before = ${ ^PREMATCH };
8003    #    my $before = ${ ^PREMATCH};
8004    #    my $before = ${^PREMATCH };
8005    # So if brace tightness flag is -bt=0 we must temporarily reset to bt=1.
8006    # Note that here we must set tightness=1 and not 2 so that the closing space
8007    # is also avoided (via the $j_tight_closing_paren flag in coding)
8008                if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 }
8009
8010              #=================================================================
8011
8012                if ( $tightness <= 0 ) {
8013                    $ws = WS_YES;
8014                }
8015                elsif ( $tightness > 1 ) {
8016                    $ws = WS_NO;
8017                }
8018                else {
8019
8020                    # Patch to count '-foo' as single token so that
8021                    # each of  $a{-foo} and $a{foo} and $a{'foo'} do
8022                    # not get spaces with default formatting.
8023                    my $j_here = $j;
8024                    ++$j_here
8025                      if ( $token eq '-'
8026                        && $last_token eq '{'
8027                        && $$rtoken_type[ $j + 1 ] eq 'w' );
8028
8029                    # $j_next is where a closing token should be if
8030                    # the container has a single token
8031                    my $j_next =
8032                      ( $$rtoken_type[ $j_here + 1 ] eq 'b' )
8033                      ? $j_here + 2
8034                      : $j_here + 1;
8035                    my $tok_next  = $$rtokens[$j_next];
8036                    my $type_next = $$rtoken_type[$j_next];
8037
8038                    # for tightness = 1, if there is just one token
8039                    # within the matching pair, we will keep it tight
8040                    if (
8041                        $tok_next eq $matching_token{$last_token}
8042
8043                        # but watch out for this: [ [ ]    (misc.t)
8044                        && $last_token ne $token
8045                      )
8046                    {
8047
8048                        # remember where to put the space for the closing paren
8049                        $j_tight_closing_paren = $j_next;
8050                        $ws                    = WS_NO;
8051                    }
8052                    else {
8053                        $ws = WS_YES;
8054                    }
8055                }
8056            }
8057        }    # done with opening braces and brackets
8058        my $ws_1 = $ws
8059          if FORMATTER_DEBUG_FLAG_WHITE;
8060
8061        #---------------------------------------------------------------
8062        # section 2:
8063        # handle space on inside of closing brace pairs
8064        #---------------------------------------------------------------
8065
8066        #   /[\}\)\]R]/
8067        if ( $is_closing_type{$type} ) {
8068
8069            if ( $j == $j_tight_closing_paren ) {
8070
8071                $j_tight_closing_paren = -1;
8072                $ws                    = WS_NO;
8073            }
8074            else {
8075
8076                if ( !defined($ws) ) {
8077
8078                    my $tightness;
8079                    if ( $type eq '}' && $token eq '}' && $block_type ) {
8080                        $tightness = $rOpts_block_brace_tightness;
8081                    }
8082                    else { $tightness = $tightness{$token} }
8083
8084                    $ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
8085                }
8086            }
8087        }
8088
8089        my $ws_2 = $ws
8090          if FORMATTER_DEBUG_FLAG_WHITE;
8091
8092        #---------------------------------------------------------------
8093        # section 3:
8094        # use the binary table
8095        #---------------------------------------------------------------
8096        if ( !defined($ws) ) {
8097            $ws = $binary_ws_rules{$last_type}{$type};
8098        }
8099        my $ws_3 = $ws
8100          if FORMATTER_DEBUG_FLAG_WHITE;
8101
8102        #---------------------------------------------------------------
8103        # section 4:
8104        # some special cases
8105        #---------------------------------------------------------------
8106        if ( $token eq '(' ) {
8107
8108            # This will have to be tweaked as tokenization changes.
8109            # We usually want a space at '} (', for example:
8110            #     map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
8111            #
8112            # But not others:
8113            #     &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
8114            # At present, the above & block is marked as type L/R so this case
8115            # won't go through here.
8116            if ( $last_type eq '}' ) { $ws = WS_YES }
8117
8118            # NOTE: some older versions of Perl had occasional problems if
8119            # spaces are introduced between keywords or functions and opening
8120            # parens.  So the default is not to do this except is certain
8121            # cases.  The current Perl seems to tolerate spaces.
8122
8123            # Space between keyword and '('
8124            elsif ( $last_type eq 'k' ) {
8125                $ws = WS_NO
8126                  unless ( $rOpts_space_keyword_paren
8127                    || $space_after_keyword{$last_token} );
8128            }
8129
8130            # Space between function and '('
8131            # -----------------------------------------------------
8132            # 'w' and 'i' checks for something like:
8133            #   myfun(    &myfun(   ->myfun(
8134            # -----------------------------------------------------
8135            elsif (( $last_type =~ /^[wUG]$/ )
8136                || ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) )
8137            {
8138                $ws = WS_NO unless ($rOpts_space_function_paren);
8139            }
8140
8141            # space between something like $i and ( in
8142            # for $i ( 0 .. 20 ) {
8143            # FIXME: eventually, type 'i' needs to be split into multiple
8144            # token types so this can be a hardwired rule.
8145            elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
8146                $ws = WS_YES;
8147            }
8148
8149            # allow constant function followed by '()' to retain no space
8150            elsif ( $last_type eq 'C' && $$rtokens[ $j + 1 ] eq ')' ) {
8151                $ws = WS_NO;
8152            }
8153        }
8154
8155        # patch for SWITCH/CASE: make space at ']{' optional
8156        # since the '{' might begin a case or when block
8157        elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
8158            $ws = WS_OPTIONAL;
8159        }
8160
8161        # keep space between 'sub' and '{' for anonymous sub definition
8162        if ( $type eq '{' ) {
8163            if ( $last_token eq 'sub' ) {
8164                $ws = WS_YES;
8165            }
8166
8167            # this is needed to avoid no space in '){'
8168            if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
8169
8170            # avoid any space before the brace or bracket in something like
8171            #  @opts{'a','b',...}
8172            if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
8173                $ws = WS_NO;
8174            }
8175        }
8176
8177        elsif ( $type eq 'i' ) {
8178
8179            # never a space before ->
8180            if ( $token =~ /^\-\>/ ) {
8181                $ws = WS_NO;
8182            }
8183        }
8184
8185        # retain any space between '-' and bare word
8186        elsif ( $type eq 'w' || $type eq 'C' ) {
8187            $ws = WS_OPTIONAL if $last_type eq '-';
8188
8189            # never a space before ->
8190            if ( $token =~ /^\-\>/ ) {
8191                $ws = WS_NO;
8192            }
8193        }
8194
8195        # retain any space between '-' and bare word
8196        # example: avoid space between 'USER' and '-' here:
8197        #   $myhash{USER-NAME}='steve';
8198        elsif ( $type eq 'm' || $type eq '-' ) {
8199            $ws = WS_OPTIONAL if ( $last_type eq 'w' );
8200        }
8201
8202        # always space before side comment
8203        elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
8204
8205        # always preserver whatever space was used after a possible
8206        # filehandle (except _) or here doc operator
8207        if (
8208            $type ne '#'
8209            && ( ( $last_type eq 'Z' && $last_token ne '_' )
8210                || $last_type eq 'h' )
8211          )
8212        {
8213            $ws = WS_OPTIONAL;
8214        }
8215
8216        my $ws_4 = $ws
8217          if FORMATTER_DEBUG_FLAG_WHITE;
8218
8219        #---------------------------------------------------------------
8220        # section 5:
8221        # default rules not covered above
8222        #---------------------------------------------------------------
8223        # if we fall through to here,
8224        # look at the pre-defined hash tables for the two tokens, and
8225        # if (they are equal) use the common value
8226        # if (either is zero or undef) use the other
8227        # if (either is -1) use it
8228        # That is,
8229        # left  vs right
8230        #  1    vs    1     -->  1
8231        #  0    vs    0     -->  0
8232        # -1    vs   -1     --> -1
8233        #
8234        #  0    vs   -1     --> -1
8235        #  0    vs    1     -->  1
8236        #  1    vs    0     -->  1
8237        # -1    vs    0     --> -1
8238        #
8239        # -1    vs    1     --> -1
8240        #  1    vs   -1     --> -1
8241        if ( !defined($ws) ) {
8242            my $wl = $want_left_space{$type};
8243            my $wr = $want_right_space{$last_type};
8244            if ( !defined($wl) ) { $wl = 0 }
8245            if ( !defined($wr) ) { $wr = 0 }
8246            $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
8247        }
8248
8249        if ( !defined($ws) ) {
8250            $ws = 0;
8251            write_diagnostics(
8252                "WS flag is undefined for tokens $last_token $token\n");
8253        }
8254
8255        # Treat newline as a whitespace. Otherwise, we might combine
8256        # 'Send' and '-recipients' here according to the above rules:
8257        #    my $msg = new Fax::Send
8258        #      -recipients => $to,
8259        #      -data => $data;
8260        if ( $ws == 0 && $j == 0 ) { $ws = 1 }
8261
8262        if (   ( $ws == 0 )
8263            && $j > 0
8264            && $j < $jmax
8265            && ( $last_type !~ /^[Zh]$/ ) )
8266        {
8267
8268            # If this happens, we have a non-fatal but undesirable
8269            # hole in the above rules which should be patched.
8270            write_diagnostics(
8271                "WS flag is zero for tokens $last_token $token\n");
8272        }
8273        $white_space_flag[$j] = $ws;
8274
8275        FORMATTER_DEBUG_FLAG_WHITE && do {
8276            my $str = substr( $last_token, 0, 15 );
8277            $str .= ' ' x ( 16 - length($str) );
8278            if ( !defined($ws_1) ) { $ws_1 = "*" }
8279            if ( !defined($ws_2) ) { $ws_2 = "*" }
8280            if ( !defined($ws_3) ) { $ws_3 = "*" }
8281            if ( !defined($ws_4) ) { $ws_4 = "*" }
8282            print
8283"WHITE:  i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
8284        };
8285    }
8286    return \@white_space_flag;
8287}
8288
8289{    # begin print_line_of_tokens
8290
8291    my $rtoken_type;
8292    my $rtokens;
8293    my $rlevels;
8294    my $rslevels;
8295    my $rblock_type;
8296    my $rcontainer_type;
8297    my $rcontainer_environment;
8298    my $rtype_sequence;
8299    my $input_line;
8300    my $rnesting_tokens;
8301    my $rci_levels;
8302    my $rnesting_blocks;
8303
8304    my $in_quote;
8305    my $python_indentation_level;
8306
8307    # These local token variables are stored by store_token_to_go:
8308    my $block_type;
8309    my $ci_level;
8310    my $container_environment;
8311    my $container_type;
8312    my $in_continued_quote;
8313    my $level;
8314    my $nesting_blocks;
8315    my $no_internal_newlines;
8316    my $slevel;
8317    my $token;
8318    my $type;
8319    my $type_sequence;
8320
8321    # routine to pull the jth token from the line of tokens
8322    sub extract_token {
8323        my $j = shift;
8324        $token                 = $$rtokens[$j];
8325        $type                  = $$rtoken_type[$j];
8326        $block_type            = $$rblock_type[$j];
8327        $container_type        = $$rcontainer_type[$j];
8328        $container_environment = $$rcontainer_environment[$j];
8329        $type_sequence         = $$rtype_sequence[$j];
8330        $level                 = $$rlevels[$j];
8331        $slevel                = $$rslevels[$j];
8332        $nesting_blocks        = $$rnesting_blocks[$j];
8333        $ci_level              = $$rci_levels[$j];
8334    }
8335
8336    {
8337        my @saved_token;
8338
8339        sub save_current_token {
8340
8341            @saved_token = (
8342                $block_type,            $ci_level,
8343                $container_environment, $container_type,
8344                $in_continued_quote,    $level,
8345                $nesting_blocks,        $no_internal_newlines,
8346                $slevel,                $token,
8347                $type,                  $type_sequence,
8348            );
8349        }
8350
8351        sub restore_current_token {
8352            (
8353                $block_type,            $ci_level,
8354                $container_environment, $container_type,
8355                $in_continued_quote,    $level,
8356                $nesting_blocks,        $no_internal_newlines,
8357                $slevel,                $token,
8358                $type,                  $type_sequence,
8359            ) = @saved_token;
8360        }
8361    }
8362
8363    # Routine to place the current token into the output stream.
8364    # Called once per output token.
8365    sub store_token_to_go {
8366
8367        my $flag = $no_internal_newlines;
8368        if ( $_[0] ) { $flag = 1 }
8369
8370        $tokens_to_go[ ++$max_index_to_go ]            = $token;
8371        $types_to_go[$max_index_to_go]                 = $type;
8372        $nobreak_to_go[$max_index_to_go]               = $flag;
8373        $old_breakpoint_to_go[$max_index_to_go]        = 0;
8374        $forced_breakpoint_to_go[$max_index_to_go]     = 0;
8375        $block_type_to_go[$max_index_to_go]            = $block_type;
8376        $type_sequence_to_go[$max_index_to_go]         = $type_sequence;
8377        $container_environment_to_go[$max_index_to_go] = $container_environment;
8378        $nesting_blocks_to_go[$max_index_to_go]        = $nesting_blocks;
8379        $ci_levels_to_go[$max_index_to_go]             = $ci_level;
8380        $mate_index_to_go[$max_index_to_go]            = -1;
8381        $matching_token_to_go[$max_index_to_go]        = '';
8382        $bond_strength_to_go[$max_index_to_go]         = 0;
8383
8384        # Note: negative levels are currently retained as a diagnostic so that
8385        # the 'final indentation level' is correctly reported for bad scripts.
8386        # But this means that every use of $level as an index must be checked.
8387        # If this becomes too much of a problem, we might give up and just clip
8388        # them at zero.
8389        ## $levels_to_go[$max_index_to_go] = ( $level > 0 ) ? $level : 0;
8390        $levels_to_go[$max_index_to_go] = $level;
8391        $nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0;
8392        $lengths_to_go[ $max_index_to_go + 1 ] =
8393          $lengths_to_go[$max_index_to_go] + length($token);
8394
8395        # Define the indentation that this token would have if it started
8396        # a new line.  We have to do this now because we need to know this
8397        # when considering one-line blocks.
8398        set_leading_whitespace( $level, $ci_level, $in_continued_quote );
8399
8400        if ( $type ne 'b' ) {
8401            $last_last_nonblank_index_to_go = $last_nonblank_index_to_go;
8402            $last_last_nonblank_type_to_go  = $last_nonblank_type_to_go;
8403            $last_last_nonblank_token_to_go = $last_nonblank_token_to_go;
8404            $last_nonblank_index_to_go      = $max_index_to_go;
8405            $last_nonblank_type_to_go       = $type;
8406            $last_nonblank_token_to_go      = $token;
8407            if ( $type eq ',' ) {
8408                $comma_count_in_batch++;
8409            }
8410        }
8411
8412        FORMATTER_DEBUG_FLAG_STORE && do {
8413            my ( $a, $b, $c ) = caller();
8414            print
8415"STORE: from $a $c: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n";
8416        };
8417    }
8418
8419    sub insert_new_token_to_go {
8420
8421        # insert a new token into the output stream.  use same level as
8422        # previous token; assumes a character at max_index_to_go.
8423        save_current_token();
8424        ( $token, $type, $slevel, $no_internal_newlines ) = @_;
8425
8426        if ( $max_index_to_go == UNDEFINED_INDEX ) {
8427            warning("code bug: bad call to insert_new_token_to_go\n");
8428        }
8429        $level = $levels_to_go[$max_index_to_go];
8430
8431        # FIXME: it seems to be necessary to use the next, rather than
8432        # previous, value of this variable when creating a new blank (align.t)
8433        #my $slevel         = $nesting_depth_to_go[$max_index_to_go];
8434        $nesting_blocks        = $nesting_blocks_to_go[$max_index_to_go];
8435        $ci_level              = $ci_levels_to_go[$max_index_to_go];
8436        $container_environment = $container_environment_to_go[$max_index_to_go];
8437        $in_continued_quote    = 0;
8438        $block_type            = "";
8439        $type_sequence         = "";
8440        store_token_to_go();
8441        restore_current_token();
8442        return;
8443    }
8444
8445    sub print_line_of_tokens {
8446
8447        my $line_of_tokens = shift;
8448
8449        # This routine is called once per input line to process all of
8450        # the tokens on that line.  This is the first stage of
8451        # beautification.
8452        #
8453        # Full-line comments and blank lines may be processed immediately.
8454        #
8455        # For normal lines of code, the tokens are stored one-by-one,
8456        # via calls to 'sub store_token_to_go', until a known line break
8457        # point is reached.  Then, the batch of collected tokens is
8458        # passed along to 'sub output_line_to_go' for further
8459        # processing.  This routine decides if there should be
8460        # whitespace between each pair of non-white tokens, so later
8461        # routines only need to decide on any additional line breaks.
8462        # Any whitespace is initally a single space character.  Later,
8463        # the vertical aligner may expand that to be multiple space
8464        # characters if necessary for alignment.
8465
8466        # extract input line number for error messages
8467        $input_line_number = $line_of_tokens->{_line_number};
8468
8469        $rtoken_type            = $line_of_tokens->{_rtoken_type};
8470        $rtokens                = $line_of_tokens->{_rtokens};
8471        $rlevels                = $line_of_tokens->{_rlevels};
8472        $rslevels               = $line_of_tokens->{_rslevels};
8473        $rblock_type            = $line_of_tokens->{_rblock_type};
8474        $rcontainer_type        = $line_of_tokens->{_rcontainer_type};
8475        $rcontainer_environment = $line_of_tokens->{_rcontainer_environment};
8476        $rtype_sequence         = $line_of_tokens->{_rtype_sequence};
8477        $input_line             = $line_of_tokens->{_line_text};
8478        $rnesting_tokens        = $line_of_tokens->{_rnesting_tokens};
8479        $rci_levels             = $line_of_tokens->{_rci_levels};
8480        $rnesting_blocks        = $line_of_tokens->{_rnesting_blocks};
8481
8482        $in_continued_quote = $starting_in_quote =
8483          $line_of_tokens->{_starting_in_quote};
8484        $in_quote        = $line_of_tokens->{_ending_in_quote};
8485        $ending_in_quote = $in_quote;
8486        $python_indentation_level =
8487          $line_of_tokens->{_python_indentation_level};
8488
8489        my $j;
8490        my $j_next;
8491        my $jmax;
8492        my $next_nonblank_token;
8493        my $next_nonblank_token_type;
8494        my $rwhite_space_flag;
8495
8496        $jmax                    = @$rtokens - 1;
8497        $block_type              = "";
8498        $container_type          = "";
8499        $container_environment   = "";
8500        $type_sequence           = "";
8501        $no_internal_newlines    = 1 - $rOpts_add_newlines;
8502        $is_static_block_comment = 0;
8503
8504        # Handle a continued quote..
8505        if ($in_continued_quote) {
8506
8507            # A line which is entirely a quote or pattern must go out
8508            # verbatim.  Note: the \n is contained in $input_line.
8509            if ( $jmax <= 0 ) {
8510                if ( ( $input_line =~ "\t" ) ) {
8511                    note_embedded_tab();
8512                }
8513                write_unindented_line("$input_line");
8514                $last_line_had_side_comment = 0;
8515                return;
8516            }
8517
8518            # prior to version 20010406, perltidy had a bug which placed
8519            # continuation indentation before the last line of some multiline
8520            # quotes and patterns -- exactly the lines passing this way.
8521            # To help find affected lines in scripts run with these
8522            # versions, run with '-chk', and it will warn of any quotes or
8523            # patterns which might have been modified by these early
8524            # versions.
8525            if ( $rOpts->{'check-multiline-quotes'} && $input_line =~ /^ / ) {
8526                warning(
8527"-chk: please check this line for extra leading whitespace\n"
8528                );
8529            }
8530        }
8531
8532        # Write line verbatim if we are in a formatting skip section
8533        if ($in_format_skipping_section) {
8534            write_unindented_line("$input_line");
8535            $last_line_had_side_comment = 0;
8536
8537            # Note: extra space appended to comment simplifies pattern matching
8538            if (   $jmax == 0
8539                && $$rtoken_type[0] eq '#'
8540                && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_end/o )
8541            {
8542                $in_format_skipping_section = 0;
8543                write_logfile_entry("Exiting formatting skip section\n");
8544            }
8545            return;
8546        }
8547
8548        # See if we are entering a formatting skip section
8549        if (   $rOpts_format_skipping
8550            && $jmax == 0
8551            && $$rtoken_type[0] eq '#'
8552            && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_begin/o )
8553        {
8554            flush();
8555            $in_format_skipping_section = 1;
8556            write_logfile_entry("Entering formatting skip section\n");
8557            write_unindented_line("$input_line");
8558            $last_line_had_side_comment = 0;
8559            return;
8560        }
8561
8562        # delete trailing blank tokens
8563        if ( $jmax > 0 && $$rtoken_type[$jmax] eq 'b' ) { $jmax-- }
8564
8565        # Handle a blank line..
8566        if ( $jmax < 0 ) {
8567
8568            # If keep-old-blank-lines is zero, we delete all
8569            # old blank lines and let the blank line rules generate any
8570            # needed blanks.
8571            if ($rOpts_keep_old_blank_lines) {
8572                flush();
8573                $file_writer_object->write_blank_code_line(
8574                    $rOpts_keep_old_blank_lines == 2 );
8575                $last_line_leading_type = 'b';
8576            }
8577            $last_line_had_side_comment = 0;
8578            return;
8579        }
8580
8581        # see if this is a static block comment (starts with ## by default)
8582        my $is_static_block_comment_without_leading_space = 0;
8583        if (   $jmax == 0
8584            && $$rtoken_type[0] eq '#'
8585            && $rOpts->{'static-block-comments'}
8586            && $input_line =~ /$static_block_comment_pattern/o )
8587        {
8588            $is_static_block_comment = 1;
8589            $is_static_block_comment_without_leading_space =
8590              substr( $input_line, 0, 1 ) eq '#';
8591        }
8592
8593        # Check for comments which are line directives
8594        # Treat exactly as static block comments without leading space
8595        # reference: perlsyn, near end, section Plain Old Comments (Not!)
8596        # example: '# line 42 "new_filename.plx"'
8597        if (
8598               $jmax == 0
8599            && $$rtoken_type[0] eq '#'
8600            && $input_line =~ /^\#   \s*
8601                               line \s+ (\d+)   \s*
8602                               (?:\s("?)([^"]+)\2)? \s*
8603                               $/x
8604          )
8605        {
8606            $is_static_block_comment                       = 1;
8607            $is_static_block_comment_without_leading_space = 1;
8608        }
8609
8610        # create a hanging side comment if appropriate
8611        if (
8612               $jmax == 0
8613            && $$rtoken_type[0] eq '#'    # only token is a comment
8614            && $last_line_had_side_comment    # last line had side comment
8615            && $input_line =~ /^\s/           # there is some leading space
8616            && !$is_static_block_comment    # do not make static comment hanging
8617            && $rOpts->{'hanging-side-comments'}    # user is allowing this
8618          )
8619        {
8620
8621            # We will insert an empty qw string at the start of the token list
8622            # to force this comment to be a side comment. The vertical aligner
8623            # should then line it up with the previous side comment.
8624            unshift @$rtoken_type,            'q';
8625            unshift @$rtokens,                '';
8626            unshift @$rlevels,                $$rlevels[0];
8627            unshift @$rslevels,               $$rslevels[0];
8628            unshift @$rblock_type,            '';
8629            unshift @$rcontainer_type,        '';
8630            unshift @$rcontainer_environment, '';
8631            unshift @$rtype_sequence,         '';
8632            unshift @$rnesting_tokens,        $$rnesting_tokens[0];
8633            unshift @$rci_levels,             $$rci_levels[0];
8634            unshift @$rnesting_blocks,        $$rnesting_blocks[0];
8635            $jmax = 1;
8636        }
8637
8638        # remember if this line has a side comment
8639        $last_line_had_side_comment =
8640          ( $jmax > 0 && $$rtoken_type[$jmax] eq '#' );
8641
8642        # Handle a block (full-line) comment..
8643        if ( ( $jmax == 0 ) && ( $$rtoken_type[0] eq '#' ) ) {
8644
8645            if ( $rOpts->{'delete-block-comments'} ) { return }
8646
8647            if ( $rOpts->{'tee-block-comments'} ) {
8648                $file_writer_object->tee_on();
8649            }
8650
8651            destroy_one_line_block();
8652            output_line_to_go();
8653
8654            # output a blank line before block comments
8655            if (
8656                   $last_line_leading_type !~ /^[#b]$/
8657                && $rOpts->{'blanks-before-comments'}    # only if allowed
8658                && !
8659                $is_static_block_comment    # never before static block comments
8660              )
8661            {
8662                flush();                    # switching to new output stream
8663                $file_writer_object->write_blank_code_line();
8664                $last_line_leading_type = 'b';
8665            }
8666
8667            # TRIM COMMENTS -- This could be turned off as a option
8668            $$rtokens[0] =~ s/\s*$//;       # trim right end
8669
8670            if (
8671                $rOpts->{'indent-block-comments'}
8672                && (  !$rOpts->{'indent-spaced-block-comments'}
8673                    || $input_line =~ /^\s+/ )
8674                && !$is_static_block_comment_without_leading_space
8675              )
8676            {
8677                extract_token(0);
8678                store_token_to_go();
8679                output_line_to_go();
8680            }
8681            else {
8682                flush();    # switching to new output stream
8683                $file_writer_object->write_code_line( $$rtokens[0] . "\n" );
8684                $last_line_leading_type = '#';
8685            }
8686            if ( $rOpts->{'tee-block-comments'} ) {
8687                $file_writer_object->tee_off();
8688            }
8689            return;
8690        }
8691
8692        # compare input/output indentation except for continuation lines
8693        # (because they have an unknown amount of initial blank space)
8694        # and lines which are quotes (because they may have been outdented)
8695        # Note: this test is placed here because we know the continuation flag
8696        # at this point, which allows us to avoid non-meaningful checks.
8697        my $structural_indentation_level = $$rlevels[0];
8698        compare_indentation_levels( $python_indentation_level,
8699            $structural_indentation_level )
8700          unless ( $python_indentation_level < 0
8701            || ( $$rci_levels[0] > 0 )
8702            || ( ( $python_indentation_level == 0 ) && $$rtoken_type[0] eq 'Q' )
8703          );
8704
8705        #   Patch needed for MakeMaker.  Do not break a statement
8706        #   in which $VERSION may be calculated.  See MakeMaker.pm;
8707        #   this is based on the coding in it.
8708        #   The first line of a file that matches this will be eval'd:
8709        #       /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
8710        #   Examples:
8711        #     *VERSION = \'1.01';
8712        #     ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
8713        #   We will pass such a line straight through without breaking
8714        #   it unless -npvl is used
8715
8716        my $is_VERSION_statement = 0;
8717
8718        if (
8719              !$saw_VERSION_in_this_file
8720            && $input_line =~ /VERSION/    # quick check to reject most lines
8721            && $input_line =~ /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
8722          )
8723        {
8724            $saw_VERSION_in_this_file = 1;
8725            $is_VERSION_statement     = 1;
8726            write_logfile_entry("passing VERSION line; -npvl deactivates\n");
8727            $no_internal_newlines = 1;
8728        }
8729
8730        # take care of indentation-only
8731        # NOTE: In previous versions we sent all qw lines out immediately here.
8732        # No longer doing this: also write a line which is entirely a 'qw' list
8733        # to allow stacking of opening and closing tokens.  Note that interior
8734        # qw lines will still go out at the end of this routine.
8735        if ( $rOpts->{'indent-only'} ) {
8736            flush();
8737            trim($input_line);
8738
8739            extract_token(0);
8740            $token                 = $input_line;
8741            $type                  = 'q';
8742            $block_type            = "";
8743            $container_type        = "";
8744            $container_environment = "";
8745            $type_sequence         = "";
8746            store_token_to_go();
8747            output_line_to_go();
8748            return;
8749        }
8750
8751        push( @$rtokens,     ' ', ' ' );   # making $j+2 valid simplifies coding
8752        push( @$rtoken_type, 'b', 'b' );
8753        ($rwhite_space_flag) =
8754          set_white_space_flag( $jmax, $rtokens, $rtoken_type, $rblock_type );
8755
8756        # find input tabbing to allow checks for tabbing disagreement
8757        ## not used for now
8758        ##$input_line_tabbing = "";
8759        ##if ( $input_line =~ /^(\s*)/ ) { $input_line_tabbing = $1; }
8760
8761        # if the buffer hasn't been flushed, add a leading space if
8762        # necessary to keep essential whitespace. This is really only
8763        # necessary if we are squeezing out all ws.
8764        if ( $max_index_to_go >= 0 ) {
8765
8766            $old_line_count_in_batch++;
8767
8768            if (
8769                is_essential_whitespace(
8770                    $last_last_nonblank_token,
8771                    $last_last_nonblank_type,
8772                    $tokens_to_go[$max_index_to_go],
8773                    $types_to_go[$max_index_to_go],
8774                    $$rtokens[0],
8775                    $$rtoken_type[0]
8776                )
8777              )
8778            {
8779                my $slevel = $$rslevels[0];
8780                insert_new_token_to_go( ' ', 'b', $slevel,
8781                    $no_internal_newlines );
8782            }
8783        }
8784
8785        # If we just saw the end of an elsif block, write nag message
8786        # if we do not see another elseif or an else.
8787        if ($looking_for_else) {
8788
8789            unless ( $$rtokens[0] =~ /^(elsif|else)$/ ) {
8790                write_logfile_entry("(No else block)\n");
8791            }
8792            $looking_for_else = 0;
8793        }
8794
8795        # This is a good place to kill incomplete one-line blocks
8796        if (   ( $semicolons_before_block_self_destruct == 0 )
8797            && ( $max_index_to_go >= 0 )
8798            && ( $types_to_go[$max_index_to_go] eq ';' )
8799            && ( $$rtokens[0] ne '}' ) )
8800        {
8801            destroy_one_line_block();
8802            output_line_to_go();
8803        }
8804
8805        # loop to process the tokens one-by-one
8806        $type  = 'b';
8807        $token = "";
8808
8809        foreach $j ( 0 .. $jmax ) {
8810
8811            # pull out the local values for this token
8812            extract_token($j);
8813
8814            if ( $type eq '#' ) {
8815
8816                # trim trailing whitespace
8817                # (there is no option at present to prevent this)
8818                $token =~ s/\s*$//;
8819
8820                if (
8821                    $rOpts->{'delete-side-comments'}
8822
8823                    # delete closing side comments if necessary
8824                    || (   $rOpts->{'delete-closing-side-comments'}
8825                        && $token =~ /$closing_side_comment_prefix_pattern/o
8826                        && $last_nonblank_block_type =~
8827                        /$closing_side_comment_list_pattern/o )
8828                  )
8829                {
8830                    if ( $types_to_go[$max_index_to_go] eq 'b' ) {
8831                        unstore_token_to_go();
8832                    }
8833                    last;
8834                }
8835            }
8836
8837            # If we are continuing after seeing a right curly brace, flush
8838            # buffer unless we see what we are looking for, as in
8839            #   } else ...
8840            if ( $rbrace_follower && $type ne 'b' ) {
8841
8842                unless ( $rbrace_follower->{$token} ) {
8843                    output_line_to_go();
8844                }
8845                $rbrace_follower = undef;
8846            }
8847
8848            $j_next = ( $$rtoken_type[ $j + 1 ] eq 'b' ) ? $j + 2 : $j + 1;
8849            $next_nonblank_token      = $$rtokens[$j_next];
8850            $next_nonblank_token_type = $$rtoken_type[$j_next];
8851
8852            #--------------------------------------------------------
8853            # Start of section to patch token text
8854            #--------------------------------------------------------
8855
8856            # Modify certain tokens here for whitespace
8857            # The following is not yet done, but could be:
8858            #   sub (x x x)
8859            if ( $type =~ /^[wit]$/ ) {
8860
8861                # Examples:
8862                # change '$  var'  to '$var' etc
8863                #        '-> new'  to '->new'
8864                if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) {
8865                    $token =~ s/\s*//g;
8866                }
8867
8868                if ( $token =~ /^sub/ ) { $token =~ s/\s+/ /g }
8869            }
8870
8871            # change 'LABEL   :'   to 'LABEL:'
8872            elsif ( $type eq 'J' ) { $token =~ s/\s+//g }
8873
8874            # patch to add space to something like "x10"
8875            # This avoids having to split this token in the pre-tokenizer
8876            elsif ( $type eq 'n' ) {
8877                if ( $token =~ /^x\d+/ ) { $token =~ s/x/x / }
8878            }
8879
8880            elsif ( $type eq 'Q' ) {
8881                note_embedded_tab() if ( $token =~ "\t" );
8882
8883                # make note of something like '$var = s/xxx/yyy/;'
8884                # in case it should have been '$var =~ s/xxx/yyy/;'
8885                if (
8886                       $token =~ /^(s|tr|y|m|\/)/
8887                    && $last_nonblank_token =~ /^(=|==|!=)$/
8888
8889                    # precededed by simple scalar
8890                    && $last_last_nonblank_type eq 'i'
8891                    && $last_last_nonblank_token =~ /^\$/
8892
8893                    # followed by some kind of termination
8894                    # (but give complaint if we can's see far enough ahead)
8895                    && $next_nonblank_token =~ /^[; \)\}]$/
8896
8897                    # scalar is not decleared
8898                    && !(
8899                           $types_to_go[0] eq 'k'
8900                        && $tokens_to_go[0] =~ /^(my|our|local)$/
8901                    )
8902                  )
8903                {
8904                    my $guess = substr( $last_nonblank_token, 0, 1 ) . '~';
8905                    complain(
8906"Note: be sure you want '$last_nonblank_token' instead of '$guess' here\n"
8907                    );
8908                }
8909            }
8910
8911           # trim blanks from right of qw quotes
8912           # (To avoid trimming qw quotes use -ntqw; the tokenizer handles this)
8913            elsif ( $type eq 'q' ) {
8914                $token =~ s/\s*$//;
8915                note_embedded_tab() if ( $token =~ "\t" );
8916            }
8917
8918            #--------------------------------------------------------
8919            # End of section to patch token text
8920            #--------------------------------------------------------
8921
8922            # insert any needed whitespace
8923            if (   ( $type ne 'b' )
8924                && ( $max_index_to_go >= 0 )
8925                && ( $types_to_go[$max_index_to_go] ne 'b' )
8926                && $rOpts_add_whitespace )
8927            {
8928                my $ws = $$rwhite_space_flag[$j];
8929
8930                if ( $ws == 1 ) {
8931                    insert_new_token_to_go( ' ', 'b', $slevel,
8932                        $no_internal_newlines );
8933                }
8934            }
8935
8936            # Do not allow breaks which would promote a side comment to a
8937            # block comment.  In order to allow a break before an opening
8938            # or closing BLOCK, followed by a side comment, those sections
8939            # of code will handle this flag separately.
8940            my $side_comment_follows = ( $next_nonblank_token_type eq '#' );
8941            my $is_opening_BLOCK =
8942              (      $type eq '{'
8943                  && $token eq '{'
8944                  && $block_type
8945                  && $block_type ne 't' );
8946            my $is_closing_BLOCK =
8947              (      $type eq '}'
8948                  && $token eq '}'
8949                  && $block_type
8950                  && $block_type ne 't' );
8951
8952            if (   $side_comment_follows
8953                && !$is_opening_BLOCK
8954                && !$is_closing_BLOCK )
8955            {
8956                $no_internal_newlines = 1;
8957            }
8958
8959            # We're only going to handle breaking for code BLOCKS at this
8960            # (top) level.  Other indentation breaks will be handled by
8961            # sub scan_list, which is better suited to dealing with them.
8962            if ($is_opening_BLOCK) {
8963
8964                # Tentatively output this token.  This is required before
8965                # calling starting_one_line_block.  We may have to unstore
8966                # it, though, if we have to break before it.
8967                store_token_to_go($side_comment_follows);
8968
8969                # Look ahead to see if we might form a one-line block
8970                my $too_long =
8971                  starting_one_line_block( $j, $jmax, $level, $slevel,
8972                    $ci_level, $rtokens, $rtoken_type, $rblock_type );
8973                clear_breakpoint_undo_stack();
8974
8975                # to simplify the logic below, set a flag to indicate if
8976                # this opening brace is far from the keyword which introduces it
8977                my $keyword_on_same_line = 1;
8978                if (   ( $max_index_to_go >= 0 )
8979                    && ( $last_nonblank_type eq ')' ) )
8980                {
8981                    if (   $block_type =~ /^(if|else|elsif)$/
8982                        && ( $tokens_to_go[0] eq '}' )
8983                        && $rOpts_cuddled_else )
8984                    {
8985                        $keyword_on_same_line = 1;
8986                    }
8987                    elsif ( ( $slevel < $nesting_depth_to_go[0] ) || $too_long )
8988                    {
8989                        $keyword_on_same_line = 0;
8990                    }
8991                }
8992
8993                # decide if user requested break before '{'
8994                my $want_break =
8995
8996                  # use -bl flag if not a sub block of any type
8997                  $block_type !~ /^sub/
8998                  ? $rOpts->{'opening-brace-on-new-line'}
8999
9000                  # use -sbl flag for a named sub block
9001                  : $block_type !~ /^sub\W*$/
9002                  ? $rOpts->{'opening-sub-brace-on-new-line'}
9003
9004                  # use -asbl flag for an anonymous sub block
9005                  : $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
9006
9007                # Break before an opening '{' ...
9008                if (
9009
9010                    # if requested
9011                    $want_break
9012
9013                    # and we were unable to start looking for a block,
9014                    && $index_start_one_line_block == UNDEFINED_INDEX
9015
9016                    # or if it will not be on same line as its keyword, so that
9017                    # it will be outdented (eval.t, overload.t), and the user
9018                    # has not insisted on keeping it on the right
9019                    || (   !$keyword_on_same_line
9020                        && !$rOpts->{'opening-brace-always-on-right'} )
9021
9022                  )
9023                {
9024
9025                    # but only if allowed
9026                    unless ($no_internal_newlines) {
9027
9028                        # since we already stored this token, we must unstore it
9029                        unstore_token_to_go();
9030
9031                        # then output the line
9032                        output_line_to_go();
9033
9034                        # and now store this token at the start of a new line
9035                        store_token_to_go($side_comment_follows);
9036                    }
9037                }
9038
9039                # Now update for side comment
9040                if ($side_comment_follows) { $no_internal_newlines = 1 }
9041
9042                # now output this line
9043                unless ($no_internal_newlines) {
9044                    output_line_to_go();
9045                }
9046            }
9047
9048            elsif ($is_closing_BLOCK) {
9049
9050                # If there is a pending one-line block ..
9051                if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
9052
9053                    # we have to terminate it if..
9054                    if (
9055
9056                    # it is too long (final length may be different from
9057                    # initial estimate). note: must allow 1 space for this token
9058                        excess_line_length( $index_start_one_line_block,
9059                            $max_index_to_go ) >= 0
9060
9061                        # or if it has too many semicolons
9062                        || (   $semicolons_before_block_self_destruct == 0
9063                            && $last_nonblank_type ne ';' )
9064                      )
9065                    {
9066                        destroy_one_line_block();
9067                    }
9068                }
9069
9070                # put a break before this closing curly brace if appropriate
9071                unless ( $no_internal_newlines
9072                    || $index_start_one_line_block != UNDEFINED_INDEX )
9073                {
9074
9075                    # add missing semicolon if ...
9076                    # there are some tokens
9077                    if (
9078                        ( $max_index_to_go > 0 )
9079
9080                        # and we don't have one
9081                        && ( $last_nonblank_type ne ';' )
9082
9083                        # patch until some block type issues are fixed:
9084                        # Do not add semi-colon for block types '{',
9085                        # '}', and ';' because we cannot be sure yet
9086                        # that this is a block and not an anonomyous
9087                        # hash (blktype.t, blktype1.t)
9088                        && ( $block_type !~ /^[\{\};]$/ )
9089
9090                        # patch: and do not add semi-colons for recently
9091                        # added block types (see tmp/semicolon.t)
9092                        && ( $block_type !~
9093                            /^(switch|case|given|when|default)$/ )
9094
9095                        # it seems best not to add semicolons in these
9096                        # special block types: sort|map|grep
9097                        && ( !$is_sort_map_grep{$block_type} )
9098
9099                        # and we are allowed to do so.
9100                        && $rOpts->{'add-semicolons'}
9101                      )
9102                    {
9103
9104                        save_current_token();
9105                        $token  = ';';
9106                        $type   = ';';
9107                        $level  = $levels_to_go[$max_index_to_go];
9108                        $slevel = $nesting_depth_to_go[$max_index_to_go];
9109                        $nesting_blocks =
9110                          $nesting_blocks_to_go[$max_index_to_go];
9111                        $ci_level       = $ci_levels_to_go[$max_index_to_go];
9112                        $block_type     = "";
9113                        $container_type = "";
9114                        $container_environment = "";
9115                        $type_sequence         = "";
9116
9117                        # Note - we remove any blank AFTER extracting its
9118                        # parameters such as level, etc, above
9119                        if ( $types_to_go[$max_index_to_go] eq 'b' ) {
9120                            unstore_token_to_go();
9121                        }
9122                        store_token_to_go();
9123
9124                        note_added_semicolon();
9125                        restore_current_token();
9126                    }
9127
9128                    # then write out everything before this closing curly brace
9129                    output_line_to_go();
9130
9131                }
9132
9133                # Now update for side comment
9134                if ($side_comment_follows) { $no_internal_newlines = 1 }
9135
9136                # store the closing curly brace
9137                store_token_to_go();
9138
9139                # ok, we just stored a closing curly brace.  Often, but
9140                # not always, we want to end the line immediately.
9141                # So now we have to check for special cases.
9142
9143                # if this '}' successfully ends a one-line block..
9144                my $is_one_line_block = 0;
9145                my $keep_going        = 0;
9146                if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
9147
9148                    # Remember the type of token just before the
9149                    # opening brace.  It would be more general to use
9150                    # a stack, but this will work for one-line blocks.
9151                    $is_one_line_block =
9152                      $types_to_go[$index_start_one_line_block];
9153
9154                    # we have to actually make it by removing tentative
9155                    # breaks that were set within it
9156                    undo_forced_breakpoint_stack(0);
9157                    set_nobreaks( $index_start_one_line_block,
9158                        $max_index_to_go - 1 );
9159
9160                    # then re-initialize for the next one-line block
9161                    destroy_one_line_block();
9162
9163                    # then decide if we want to break after the '}' ..
9164                    # We will keep going to allow certain brace followers as in:
9165                    #   do { $ifclosed = 1; last } unless $losing;
9166                    #
9167                    # But make a line break if the curly ends a
9168                    # significant block:
9169                    if (
9170                        $is_block_without_semicolon{$block_type}
9171
9172                        # if needless semicolon follows we handle it later
9173                        && $next_nonblank_token ne ';'
9174                      )
9175                    {
9176                        output_line_to_go() unless ($no_internal_newlines);
9177                    }
9178                }
9179
9180                # set string indicating what we need to look for brace follower
9181                # tokens
9182                if ( $block_type eq 'do' ) {
9183                    $rbrace_follower = \%is_do_follower;
9184                }
9185                elsif ( $block_type =~ /^(if|elsif|unless)$/ ) {
9186                    $rbrace_follower = \%is_if_brace_follower;
9187                }
9188                elsif ( $block_type eq 'else' ) {
9189                    $rbrace_follower = \%is_else_brace_follower;
9190                }
9191
9192                # added eval for borris.t
9193                elsif ($is_sort_map_grep_eval{$block_type}
9194                    || $is_one_line_block eq 'G' )
9195                {
9196                    $rbrace_follower = undef;
9197                    $keep_going      = 1;
9198                }
9199
9200                # anonymous sub
9201                elsif ( $block_type =~ /^sub\W*$/ ) {
9202
9203                    if ($is_one_line_block) {
9204                        $rbrace_follower = \%is_anon_sub_1_brace_follower;
9205                    }
9206                    else {
9207                        $rbrace_follower = \%is_anon_sub_brace_follower;
9208                    }
9209                }
9210
9211                # None of the above: specify what can follow a closing
9212                # brace of a block which is not an
9213                # if/elsif/else/do/sort/map/grep/eval
9214                # Testfiles:
9215                # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t
9216                else {
9217                    $rbrace_follower = \%is_other_brace_follower;
9218                }
9219
9220                # See if an elsif block is followed by another elsif or else;
9221                # complain if not.
9222                if ( $block_type eq 'elsif' ) {
9223
9224                    if ( $next_nonblank_token_type eq 'b' ) {    # end of line?
9225                        $looking_for_else = 1;    # ok, check on next line
9226                    }
9227                    else {
9228
9229                        unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) {
9230                            write_logfile_entry("No else block :(\n");
9231                        }
9232                    }
9233                }
9234
9235                # keep going after certain block types (map,sort,grep,eval)
9236                # added eval for borris.t
9237                if ($keep_going) {
9238
9239                    # keep going
9240                }
9241
9242                # if no more tokens, postpone decision until re-entring
9243                elsif ( ( $next_nonblank_token_type eq 'b' )
9244                    && $rOpts_add_newlines )
9245                {
9246                    unless ($rbrace_follower) {
9247                        output_line_to_go() unless ($no_internal_newlines);
9248                    }
9249                }
9250
9251                elsif ($rbrace_follower) {
9252
9253                    unless ( $rbrace_follower->{$next_nonblank_token} ) {
9254                        output_line_to_go() unless ($no_internal_newlines);
9255                    }
9256                    $rbrace_follower = undef;
9257                }
9258
9259                else {
9260                    output_line_to_go() unless ($no_internal_newlines);
9261                }
9262
9263            }    # end treatment of closing block token
9264
9265            # handle semicolon
9266            elsif ( $type eq ';' ) {
9267
9268                # kill one-line blocks with too many semicolons
9269                $semicolons_before_block_self_destruct--;
9270                if (
9271                    ( $semicolons_before_block_self_destruct < 0 )
9272                    || (   $semicolons_before_block_self_destruct == 0
9273                        && $next_nonblank_token_type !~ /^[b\}]$/ )
9274                  )
9275                {
9276                    destroy_one_line_block();
9277                }
9278
9279                # Remove unnecessary semicolons, but not after bare
9280                # blocks, where it could be unsafe if the brace is
9281                # mistokenized.
9282                if (
9283                    (
9284                        $last_nonblank_token eq '}'
9285                        && (
9286                            $is_block_without_semicolon{
9287                                $last_nonblank_block_type}
9288                            || $last_nonblank_block_type =~ /^sub\s+\w/
9289                            || $last_nonblank_block_type =~ /^\w+:$/ )
9290                    )
9291                    || $last_nonblank_type eq ';'
9292                  )
9293                {
9294
9295                    if (
9296                        $rOpts->{'delete-semicolons'}
9297
9298                        # don't delete ; before a # because it would promote it
9299                        # to a block comment
9300                        && ( $next_nonblank_token_type ne '#' )
9301                      )
9302                    {
9303                        note_deleted_semicolon();
9304                        output_line_to_go()
9305                          unless ( $no_internal_newlines
9306                            || $index_start_one_line_block != UNDEFINED_INDEX );
9307                        next;
9308                    }
9309                    else {
9310                        write_logfile_entry("Extra ';'\n");
9311                    }
9312                }
9313                store_token_to_go();
9314
9315                output_line_to_go()
9316                  unless ( $no_internal_newlines
9317                    || ( $rOpts_keep_interior_semicolons && $j < $jmax )
9318                    || ( $next_nonblank_token eq '}' ) );
9319
9320            }
9321
9322            # handle here_doc target string
9323            elsif ( $type eq 'h' ) {
9324                $no_internal_newlines =
9325                  1;    # no newlines after seeing here-target
9326                destroy_one_line_block();
9327                store_token_to_go();
9328            }
9329
9330            # handle all other token types
9331            else {
9332
9333                # if this is a blank...
9334                if ( $type eq 'b' ) {
9335
9336                    # make it just one character
9337                    $token = ' ' if $rOpts_add_whitespace;
9338
9339                    # delete it if unwanted by whitespace rules
9340                    # or we are deleting all whitespace
9341                    my $ws = $$rwhite_space_flag[ $j + 1 ];
9342                    if ( ( defined($ws) && $ws == -1 )
9343                        || $rOpts_delete_old_whitespace )
9344                    {
9345
9346                        # unless it might make a syntax error
9347                        next
9348                          unless is_essential_whitespace(
9349                            $last_last_nonblank_token,
9350                            $last_last_nonblank_type,
9351                            $tokens_to_go[$max_index_to_go],
9352                            $types_to_go[$max_index_to_go],
9353                            $$rtokens[ $j + 1 ],
9354                            $$rtoken_type[ $j + 1 ]
9355                          );
9356                    }
9357                }
9358                store_token_to_go();
9359            }
9360
9361            # remember two previous nonblank OUTPUT tokens
9362            if ( $type ne '#' && $type ne 'b' ) {
9363                $last_last_nonblank_token = $last_nonblank_token;
9364                $last_last_nonblank_type  = $last_nonblank_type;
9365                $last_nonblank_token      = $token;
9366                $last_nonblank_type       = $type;
9367                $last_nonblank_block_type = $block_type;
9368            }
9369
9370            # unset the continued-quote flag since it only applies to the
9371            # first token, and we want to resume normal formatting if
9372            # there are additional tokens on the line
9373            $in_continued_quote = 0;
9374
9375        }    # end of loop over all tokens in this 'line_of_tokens'
9376
9377        # we have to flush ..
9378        if (
9379
9380            # if there is a side comment
9381            ( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} )
9382
9383            # if this line ends in a quote
9384            # NOTE: This is critically important for insuring that quoted lines
9385            # do not get processed by things like -sot and -sct
9386            || $in_quote
9387
9388            # if this is a VERSION statement
9389            || $is_VERSION_statement
9390
9391            # to keep a label on one line if that is how it is now
9392            || ( ( $type eq 'J' ) && ( $max_index_to_go == 0 ) )
9393
9394            # if we are instructed to keep all old line breaks
9395            || !$rOpts->{'delete-old-newlines'}
9396          )
9397        {
9398            destroy_one_line_block();
9399            output_line_to_go();
9400        }
9401
9402        # mark old line breakpoints in current output stream
9403        if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_breakpoints ) {
9404            $old_breakpoint_to_go[$max_index_to_go] = 1;
9405        }
9406    }    # end sub print_line_of_tokens
9407}    # end print_line_of_tokens
9408
9409# sub output_line_to_go sends one logical line of tokens on down the
9410# pipeline to the VerticalAligner package, breaking the line into continuation
9411# lines as necessary.  The line of tokens is ready to go in the "to_go"
9412# arrays.
9413sub output_line_to_go {
9414
9415    # debug stuff; this routine can be called from many points
9416    FORMATTER_DEBUG_FLAG_OUTPUT && do {
9417        my ( $a, $b, $c ) = caller;
9418        write_diagnostics(
9419"OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n"
9420        );
9421        my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];
9422        write_diagnostics("$output_str\n");
9423    };
9424
9425    # just set a tentative breakpoint if we might be in a one-line block
9426    if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
9427        set_forced_breakpoint($max_index_to_go);
9428        return;
9429    }
9430
9431    my $cscw_block_comment;
9432    $cscw_block_comment = add_closing_side_comment()
9433      if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 );
9434
9435    match_opening_and_closing_tokens();
9436
9437    # tell the -lp option we are outputting a batch so it can close
9438    # any unfinished items in its stack
9439    finish_lp_batch();
9440
9441    # If this line ends in a code block brace, set breaks at any
9442    # previous closing code block braces to breakup a chain of code
9443    # blocks on one line.  This is very rare but can happen for
9444    # user-defined subs.  For example we might be looking at this:
9445    #  BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
9446    my $saw_good_break = 0;    # flag to force breaks even if short line
9447    if (
9448
9449        # looking for opening or closing block brace
9450        $block_type_to_go[$max_index_to_go]
9451
9452        # but not one of these which are never duplicated on a line:
9453        # until|while|for|if|elsif|else
9454        && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go] }
9455      )
9456    {
9457        my $lev = $nesting_depth_to_go[$max_index_to_go];
9458
9459        # Walk backwards from the end and
9460        # set break at any closing block braces at the same level.
9461        # But quit if we are not in a chain of blocks.
9462        for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) {
9463            last if ( $levels_to_go[$i] < $lev );    # stop at a lower level
9464            next if ( $levels_to_go[$i] > $lev );    # skip past higher level
9465
9466            if ( $block_type_to_go[$i] ) {
9467                if ( $tokens_to_go[$i] eq '}' ) {
9468                    set_forced_breakpoint($i);
9469                    $saw_good_break = 1;
9470                }
9471            }
9472
9473            # quit if we see anything besides words, function, blanks
9474            # at this level
9475            elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
9476        }
9477    }
9478
9479    my $imin = 0;
9480    my $imax = $max_index_to_go;
9481
9482    # trim any blank tokens
9483    if ( $max_index_to_go >= 0 ) {
9484        if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
9485        if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
9486    }
9487
9488    # anything left to write?
9489    if ( $imin <= $imax ) {
9490
9491        # add a blank line before certain key types
9492        if ( $last_line_leading_type !~ /^[#b]/ ) {
9493            my $want_blank    = 0;
9494            my $leading_token = $tokens_to_go[$imin];
9495            my $leading_type  = $types_to_go[$imin];
9496
9497            # blank lines before subs except declarations and one-liners
9498            # MCONVERSION LOCATION - for sub tokenization change
9499            if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) {
9500                $want_blank = ( $rOpts->{'blanks-before-subs'} )
9501                  && (
9502                    terminal_type( \@types_to_go, \@block_type_to_go, $imin,
9503                        $imax ) !~ /^[\;\}]$/
9504                  );
9505            }
9506
9507            # break before all package declarations
9508            # MCONVERSION LOCATION - for tokenizaton change
9509            elsif ($leading_token =~ /^(package\s)/
9510                && $leading_type eq 'i' )
9511            {
9512                $want_blank = ( $rOpts->{'blanks-before-subs'} );
9513            }
9514
9515            # break before certain key blocks except one-liners
9516            if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) {
9517                $want_blank = ( $rOpts->{'blanks-before-subs'} )
9518                  && (
9519                    terminal_type( \@types_to_go, \@block_type_to_go, $imin,
9520                        $imax ) ne '}'
9521                  );
9522            }
9523
9524            # Break before certain block types if we haven't had a
9525            # break at this level for a while.  This is the
9526            # difficult decision..
9527            elsif ($leading_token =~ /^(unless|if|while|until|for|foreach)$/
9528                && $leading_type eq 'k' )
9529            {
9530                my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
9531                if ( !defined($lc) ) { $lc = 0 }
9532
9533                $want_blank =
9534                     $rOpts->{'blanks-before-blocks'}
9535                  && $lc >= $rOpts->{'long-block-line-count'}
9536                  && $file_writer_object->get_consecutive_nonblank_lines() >=
9537                  $rOpts->{'long-block-line-count'}
9538                  && (
9539                    terminal_type( \@types_to_go, \@block_type_to_go, $imin,
9540                        $imax ) ne '}'
9541                  );
9542            }
9543
9544            if ($want_blank) {
9545
9546                # future: send blank line down normal path to VerticalAligner
9547                Perl::Tidy::VerticalAligner::flush();
9548                $file_writer_object->write_blank_code_line();
9549            }
9550        }
9551
9552        # update blank line variables and count number of consecutive
9553        # non-blank, non-comment lines at this level
9554        $last_last_line_leading_level = $last_line_leading_level;
9555        $last_line_leading_level      = $levels_to_go[$imin];
9556        if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
9557        $last_line_leading_type = $types_to_go[$imin];
9558        if (   $last_line_leading_level == $last_last_line_leading_level
9559            && $last_line_leading_type ne 'b'
9560            && $last_line_leading_type ne '#'
9561            && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) )
9562        {
9563            $nonblank_lines_at_depth[$last_line_leading_level]++;
9564        }
9565        else {
9566            $nonblank_lines_at_depth[$last_line_leading_level] = 1;
9567        }
9568
9569        FORMATTER_DEBUG_FLAG_FLUSH && do {
9570            my ( $package, $file, $line ) = caller;
9571            print
9572"FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n";
9573        };
9574
9575        # add a couple of extra terminal blank tokens
9576        pad_array_to_go();
9577
9578        # set all forced breakpoints for good list formatting
9579        my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0;
9580
9581        if (
9582            $max_index_to_go > 0
9583            && (
9584                   $is_long_line
9585                || $old_line_count_in_batch > 1
9586                || is_unbalanced_batch()
9587                || (
9588                    $comma_count_in_batch
9589                    && (   $rOpts_maximum_fields_per_table > 0
9590                        || $rOpts_comma_arrow_breakpoints == 0 )
9591                )
9592            )
9593          )
9594        {
9595            $saw_good_break ||= scan_list();
9596        }
9597
9598        # let $ri_first and $ri_last be references to lists of
9599        # first and last tokens of line fragments to output..
9600        my ( $ri_first, $ri_last );
9601
9602        # write a single line if..
9603        if (
9604
9605            # we aren't allowed to add any newlines
9606            !$rOpts_add_newlines
9607
9608            # or, we don't already have an interior breakpoint
9609            # and we didn't see a good breakpoint
9610            || (
9611                   !$forced_breakpoint_count
9612                && !$saw_good_break
9613
9614                # and this line is 'short'
9615                && !$is_long_line
9616            )
9617          )
9618        {
9619            @$ri_first = ($imin);
9620            @$ri_last  = ($imax);
9621        }
9622
9623        # otherwise use multiple lines
9624        else {
9625
9626            ( $ri_first, $ri_last, my $colon_count ) =
9627              set_continuation_breaks($saw_good_break);
9628
9629            break_all_chain_tokens( $ri_first, $ri_last );
9630
9631            break_equals( $ri_first, $ri_last );
9632
9633            # now we do a correction step to clean this up a bit
9634            # (The only time we would not do this is for debugging)
9635            if ( $rOpts->{'recombine'} ) {
9636                ( $ri_first, $ri_last ) =
9637                  recombine_breakpoints( $ri_first, $ri_last );
9638            }
9639
9640            insert_final_breaks( $ri_first, $ri_last ) if $colon_count;
9641        }
9642
9643        # do corrector step if -lp option is used
9644        my $do_not_pad = 0;
9645        if ($rOpts_line_up_parentheses) {
9646            $do_not_pad = correct_lp_indentation( $ri_first, $ri_last );
9647        }
9648        send_lines_to_vertical_aligner( $ri_first, $ri_last, $do_not_pad );
9649    }
9650    prepare_for_new_input_lines();
9651
9652    # output any new -cscw block comment
9653    if ($cscw_block_comment) {
9654        flush();
9655        $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
9656    }
9657}
9658
9659sub note_added_semicolon {
9660    $last_added_semicolon_at = $input_line_number;
9661    if ( $added_semicolon_count == 0 ) {
9662        $first_added_semicolon_at = $last_added_semicolon_at;
9663    }
9664    $added_semicolon_count++;
9665    write_logfile_entry("Added ';' here\n");
9666}
9667
9668sub note_deleted_semicolon {
9669    $last_deleted_semicolon_at = $input_line_number;
9670    if ( $deleted_semicolon_count == 0 ) {
9671        $first_deleted_semicolon_at = $last_deleted_semicolon_at;
9672    }
9673    $deleted_semicolon_count++;
9674    write_logfile_entry("Deleted unnecessary ';'\n");    # i hope ;)
9675}
9676
9677sub note_embedded_tab {
9678    $embedded_tab_count++;
9679    $last_embedded_tab_at = $input_line_number;
9680    if ( !$first_embedded_tab_at ) {
9681        $first_embedded_tab_at = $last_embedded_tab_at;
9682    }
9683
9684    if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) {
9685        write_logfile_entry("Embedded tabs in quote or pattern\n");
9686    }
9687}
9688
9689sub starting_one_line_block {
9690
9691    # after seeing an opening curly brace, look for the closing brace
9692    # and see if the entire block will fit on a line.  This routine is
9693    # not always right because it uses the old whitespace, so a check
9694    # is made later (at the closing brace) to make sure we really
9695    # have a one-line block.  We have to do this preliminary check,
9696    # though, because otherwise we would always break at a semicolon
9697    # within a one-line block if the block contains multiple statements.
9698
9699    my ( $j, $jmax, $level, $slevel, $ci_level, $rtokens, $rtoken_type,
9700        $rblock_type )
9701      = @_;
9702
9703    # kill any current block - we can only go 1 deep
9704    destroy_one_line_block();
9705
9706    # return value:
9707    #  1=distance from start of block to opening brace exceeds line length
9708    #  0=otherwise
9709
9710    my $i_start = 0;
9711
9712    # shouldn't happen: there must have been a prior call to
9713    # store_token_to_go to put the opening brace in the output stream
9714    if ( $max_index_to_go < 0 ) {
9715        warning("program bug: store_token_to_go called incorrectly\n");
9716        report_definite_bug();
9717    }
9718    else {
9719
9720        # cannot use one-line blocks with cuddled else else/elsif lines
9721        if ( ( $tokens_to_go[0] eq '}' ) && $rOpts_cuddled_else ) {
9722            return 0;
9723        }
9724    }
9725
9726    my $block_type = $$rblock_type[$j];
9727
9728    # find the starting keyword for this block (such as 'if', 'else', ...)
9729
9730    if ( $block_type =~ /^[\{\}\;\:]$/ ) {
9731        $i_start = $max_index_to_go;
9732    }
9733
9734    elsif ( $last_last_nonblank_token_to_go eq ')' ) {
9735
9736        # For something like "if (xxx) {", the keyword "if" will be
9737        # just after the most recent break. This will be 0 unless
9738        # we have just killed a one-line block and are starting another.
9739        # (doif.t)
9740        $i_start = $index_max_forced_break + 1;
9741        if ( $types_to_go[$i_start] eq 'b' ) {
9742            $i_start++;
9743        }
9744
9745        unless ( $tokens_to_go[$i_start] eq $block_type ) {
9746            return 0;
9747        }
9748    }
9749
9750    # the previous nonblank token should start these block types
9751    elsif (
9752        ( $last_last_nonblank_token_to_go eq $block_type )
9753        || (   $block_type =~ /^sub/
9754            && $last_last_nonblank_token_to_go =~ /^sub/ )
9755      )
9756    {
9757        $i_start = $last_last_nonblank_index_to_go;
9758    }
9759
9760    # patch for SWITCH/CASE to retain one-line case/when blocks
9761    elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
9762        $i_start = $index_max_forced_break + 1;
9763        if ( $types_to_go[$i_start] eq 'b' ) {
9764            $i_start++;
9765        }
9766        unless ( $tokens_to_go[$i_start] eq $block_type ) {
9767            return 0;
9768        }
9769    }
9770
9771    else {
9772        return 1;
9773    }
9774
9775    my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
9776
9777    my $i;
9778
9779    # see if length is too long to even start
9780    if ( $pos > $rOpts_maximum_line_length ) {
9781        return 1;
9782    }
9783
9784    for ( $i = $j + 1 ; $i <= $jmax ; $i++ ) {
9785
9786        # old whitespace could be arbitrarily large, so don't use it
9787        if   ( $$rtoken_type[$i] eq 'b' ) { $pos += 1 }
9788        else                              { $pos += length( $$rtokens[$i] ) }
9789
9790        # Return false result if we exceed the maximum line length,
9791        if ( $pos > $rOpts_maximum_line_length ) {
9792            return 0;
9793        }
9794
9795        # or encounter another opening brace before finding the closing brace.
9796        elsif ($$rtokens[$i] eq '{'
9797            && $$rtoken_type[$i] eq '{'
9798            && $$rblock_type[$i] )
9799        {
9800            return 0;
9801        }
9802
9803        # if we find our closing brace..
9804        elsif ($$rtokens[$i] eq '}'
9805            && $$rtoken_type[$i] eq '}'
9806            && $$rblock_type[$i] )
9807        {
9808
9809            # be sure any trailing comment also fits on the line
9810            my $i_nonblank =
9811              ( $$rtoken_type[ $i + 1 ] eq 'b' ) ? $i + 2 : $i + 1;
9812
9813            if ( $$rtoken_type[$i_nonblank] eq '#' ) {
9814                $pos += length( $$rtokens[$i_nonblank] );
9815
9816                if ( $i_nonblank > $i + 1 ) {
9817                    $pos += length( $$rtokens[ $i + 1 ] );
9818                }
9819
9820                if ( $pos > $rOpts_maximum_line_length ) {
9821                    return 0;
9822                }
9823            }
9824
9825            # ok, it's a one-line block
9826            create_one_line_block( $i_start, 20 );
9827            return 0;
9828        }
9829
9830        # just keep going for other characters
9831        else {
9832        }
9833    }
9834
9835    # Allow certain types of new one-line blocks to form by joining
9836    # input lines.  These can be safely done, but for other block types,
9837    # we keep old one-line blocks but do not form new ones. It is not
9838    # always a good idea to make as many one-line blocks as possible,
9839    # so other types are not done.  The user can always use -mangle.
9840    if ( $is_sort_map_grep_eval{$block_type} ) {
9841        create_one_line_block( $i_start, 1 );
9842    }
9843
9844    return 0;
9845}
9846
9847sub unstore_token_to_go {
9848
9849    # remove most recent token from output stream
9850    if ( $max_index_to_go > 0 ) {
9851        $max_index_to_go--;
9852    }
9853    else {
9854        $max_index_to_go = UNDEFINED_INDEX;
9855    }
9856
9857}
9858
9859sub want_blank_line {
9860    flush();
9861    $file_writer_object->want_blank_line();
9862}
9863
9864sub write_unindented_line {
9865    flush();
9866    $file_writer_object->write_line( $_[0] );
9867}
9868
9869sub undo_ci {
9870
9871    # Undo continuation indentation in certain sequences
9872    # For example, we can undo continuation indation in sort/map/grep chains
9873    #    my $dat1 = pack( "n*",
9874    #        map { $_, $lookup->{$_} }
9875    #          sort { $a <=> $b }
9876    #          grep { $lookup->{$_} ne $default } keys %$lookup );
9877    # To align the map/sort/grep keywords like this:
9878    #    my $dat1 = pack( "n*",
9879    #        map { $_, $lookup->{$_} }
9880    #        sort { $a <=> $b }
9881    #        grep { $lookup->{$_} ne $default } keys %$lookup );
9882    my ( $ri_first, $ri_last ) = @_;
9883    my ( $line_1, $line_2, $lev_last );
9884    my $this_line_is_semicolon_terminated;
9885    my $max_line = @$ri_first - 1;
9886
9887    # looking at each line of this batch..
9888    # We are looking at leading tokens and looking for a sequence
9889    # all at the same level and higher level than enclosing lines.
9890    foreach my $line ( 0 .. $max_line ) {
9891
9892        my $ibeg = $$ri_first[$line];
9893        my $lev  = $levels_to_go[$ibeg];
9894        if ( $line > 0 ) {
9895
9896            # if we have started a chain..
9897            if ($line_1) {
9898
9899                # see if it continues..
9900                if ( $lev == $lev_last ) {
9901                    if (   $types_to_go[$ibeg] eq 'k'
9902                        && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
9903                    {
9904
9905                        # chain continues...
9906                        # check for chain ending at end of a a statement
9907                        if ( $line == $max_line ) {
9908
9909                            # see of this line ends a statement
9910                            my $iend = $$ri_last[$line];
9911                            $this_line_is_semicolon_terminated =
9912                              $types_to_go[$iend] eq ';'
9913
9914                              # with possible side comment
9915                              || ( $types_to_go[$iend] eq '#'
9916                                && $iend - $ibeg >= 2
9917                                && $types_to_go[ $iend - 2 ] eq ';'
9918                                && $types_to_go[ $iend - 1 ] eq 'b' );
9919                        }
9920                        $line_2 = $line if ($this_line_is_semicolon_terminated);
9921                    }
9922                    else {
9923
9924                        # kill chain
9925                        $line_1 = undef;
9926                    }
9927                }
9928                elsif ( $lev < $lev_last ) {
9929
9930                    # chain ends with previous line
9931                    $line_2 = $line - 1;
9932                }
9933                elsif ( $lev > $lev_last ) {
9934
9935                    # kill chain
9936                    $line_1 = undef;
9937                }
9938
9939                # undo the continuation indentation if a chain ends
9940                if ( defined($line_2) && defined($line_1) ) {
9941                    my $continuation_line_count = $line_2 - $line_1 + 1;
9942                    @ci_levels_to_go[ @$ri_first[ $line_1 .. $line_2 ] ] =
9943                      (0) x ($continuation_line_count);
9944                    @leading_spaces_to_go[ @$ri_first[ $line_1 .. $line_2 ] ] =
9945                      @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $line_2 ] ];
9946                    $line_1 = undef;
9947                }
9948            }
9949
9950            # not in a chain yet..
9951            else {
9952
9953                # look for start of a new sort/map/grep chain
9954                if ( $lev > $lev_last ) {
9955                    if (   $types_to_go[$ibeg] eq 'k'
9956                        && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
9957                    {
9958                        $line_1 = $line;
9959                    }
9960                }
9961            }
9962        }
9963        $lev_last = $lev;
9964    }
9965}
9966
9967sub undo_lp_ci {
9968
9969    # If there is a single, long parameter within parens, like this:
9970    #
9971    #  $self->command( "/msg "
9972    #        . $infoline->chan
9973    #        . " You said $1, but did you know that it's square was "
9974    #        . $1 * $1 . " ?" );
9975    #
9976    # we can remove the continuation indentation of the 2nd and higher lines
9977    # to achieve this effect, which is more pleasing:
9978    #
9979    #  $self->command("/msg "
9980    #                 . $infoline->chan
9981    #                 . " You said $1, but did you know that it's square was "
9982    #                 . $1 * $1 . " ?");
9983
9984    my ( $line_open, $i_start, $closing_index, $ri_first, $ri_last ) = @_;
9985    my $max_line = @$ri_first - 1;
9986
9987    # must be multiple lines
9988    return unless $max_line > $line_open;
9989
9990    my $lev_start     = $levels_to_go[$i_start];
9991    my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
9992
9993    # see if all additional lines in this container have continuation
9994    # indentation
9995    my $n;
9996    my $line_1 = 1 + $line_open;
9997    for ( $n = $line_1 ; $n <= $max_line ; ++$n ) {
9998        my $ibeg = $$ri_first[$n];
9999        my $iend = $$ri_last[$n];
10000        if ( $ibeg eq $closing_index ) { $n--; last }
10001        return if ( $lev_start != $levels_to_go[$ibeg] );
10002        return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
10003        last   if ( $closing_index <= $iend );
10004    }
10005
10006    # we can reduce the indentation of all continuation lines
10007    my $continuation_line_count = $n - $line_open;
10008    @ci_levels_to_go[ @$ri_first[ $line_1 .. $n ] ] =
10009      (0) x ($continuation_line_count);
10010    @leading_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ] =
10011      @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ];
10012}
10013
10014sub set_logical_padding {
10015
10016    # Look at a batch of lines and see if extra padding can improve the
10017    # alignment when there are certain leading operators. Here is an
10018    # example, in which some extra space is introduced before
10019    # '( $year' to make it line up with the subsequent lines:
10020    #
10021    #       if (   ( $Year < 1601 )
10022    #           || ( $Year > 2899 )
10023    #           || ( $EndYear < 1601 )
10024    #           || ( $EndYear > 2899 ) )
10025    #       {
10026    #           &Error_OutOfRange;
10027    #       }
10028    #
10029    my ( $ri_first, $ri_last ) = @_;
10030    my $max_line = @$ri_first - 1;
10031
10032    my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $line, $pad_spaces,
10033        $tok_next, $type_next, $has_leading_op_next, $has_leading_op );
10034
10035    # looking at each line of this batch..
10036    foreach $line ( 0 .. $max_line - 1 ) {
10037
10038        # see if the next line begins with a logical operator
10039        $ibeg      = $$ri_first[$line];
10040        $iend      = $$ri_last[$line];
10041        $ibeg_next = $$ri_first[ $line + 1 ];
10042        $tok_next  = $tokens_to_go[$ibeg_next];
10043        $type_next = $types_to_go[$ibeg_next];
10044
10045        $has_leading_op_next = ( $tok_next =~ /^\w/ )
10046          ? $is_chain_operator{$tok_next}      # + - * / : ? && ||
10047          : $is_chain_operator{$type_next};    # and, or
10048
10049        next unless ($has_leading_op_next);
10050
10051        # next line must not be at lesser depth
10052        next
10053          if ( $nesting_depth_to_go[$ibeg] > $nesting_depth_to_go[$ibeg_next] );
10054
10055        # identify the token in this line to be padded on the left
10056        $ipad = undef;
10057
10058        # handle lines at same depth...
10059        if ( $nesting_depth_to_go[$ibeg] == $nesting_depth_to_go[$ibeg_next] ) {
10060
10061            # if this is not first line of the batch ...
10062            if ( $line > 0 ) {
10063
10064                # and we have leading operator..
10065                next if $has_leading_op;
10066
10067                # Introduce padding if..
10068                # 1. the previous line is at lesser depth, or
10069                # 2. the previous line ends in an assignment
10070                # 3. the previous line ends in a 'return'
10071                # 4. the previous line ends in a comma
10072                # Example 1: previous line at lesser depth
10073                #       if (   ( $Year < 1601 )      # <- we are here but
10074                #           || ( $Year > 2899 )      #  list has not yet
10075                #           || ( $EndYear < 1601 )   # collapsed vertically
10076                #           || ( $EndYear > 2899 ) )
10077                #       {
10078                #
10079                # Example 2: previous line ending in assignment:
10080                #    $leapyear =
10081                #        $year % 4   ? 0     # <- We are here
10082                #      : $year % 100 ? 1
10083                #      : $year % 400 ? 0
10084                #      : 1;
10085                #
10086                # Example 3: previous line ending in comma:
10087                #    push @expr,
10088                #        /test/   ? undef
10089                #      : eval($_) ? 1
10090                #      : eval($_) ? 1
10091                #      :            0;
10092
10093                # be sure levels agree (do not indent after an indented 'if')
10094                next if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] );
10095
10096                # allow padding on first line after a comma but only if:
10097                # (1) this is line 2 and
10098                # (2) there are at more than three lines and
10099                # (3) lines 3 and 4 have the same leading operator
10100                # These rules try to prevent padding within a long
10101                # comma-separated list.
10102                my $ok_comma;
10103                if (   $types_to_go[$iendm] eq ','
10104                    && $line == 1
10105                    && $max_line > 2 )
10106                {
10107                    my $ibeg_next_next = $$ri_first[ $line + 2 ];
10108                    my $tok_next_next  = $tokens_to_go[$ibeg_next_next];
10109                    $ok_comma = $tok_next_next eq $tok_next;
10110                }
10111
10112                next
10113                  unless (
10114                       $is_assignment{ $types_to_go[$iendm] }
10115                    || $ok_comma
10116                    || ( $nesting_depth_to_go[$ibegm] <
10117                        $nesting_depth_to_go[$ibeg] )
10118                    || (   $types_to_go[$iendm] eq 'k'
10119                        && $tokens_to_go[$iendm] eq 'return' )
10120                  );
10121
10122                # we will add padding before the first token
10123                $ipad = $ibeg;
10124            }
10125
10126            # for first line of the batch..
10127            else {
10128
10129                # WARNING: Never indent if first line is starting in a
10130                # continued quote, which would change the quote.
10131                next if $starting_in_quote;
10132
10133                # if this is text after closing '}'
10134                # then look for an interior token to pad
10135                if ( $types_to_go[$ibeg] eq '}' ) {
10136
10137                }
10138
10139                # otherwise, we might pad if it looks really good
10140                else {
10141
10142                    # we might pad token $ibeg, so be sure that it
10143                    # is at the same depth as the next line.
10144                    next
10145                      if ( $nesting_depth_to_go[$ibeg] !=
10146                        $nesting_depth_to_go[$ibeg_next] );
10147
10148                    # We can pad on line 1 of a statement if at least 3
10149                    # lines will be aligned. Otherwise, it
10150                    # can look very confusing.
10151
10152                 # We have to be careful not to pad if there are too few
10153                 # lines.  The current rule is:
10154                 # (1) in general we require at least 3 consecutive lines
10155                 # with the same leading chain operator token,
10156                 # (2) but an exception is that we only require two lines
10157                 # with leading colons if there are no more lines.  For example,
10158                 # the first $i in the following snippet would get padding
10159                 # by the second rule:
10160                 #
10161                 #   $i == 1 ? ( "First", "Color" )
10162                 # : $i == 2 ? ( "Then",  "Rarity" )
10163                 # :           ( "Then",  "Name" );
10164
10165                    if ( $max_line > 1 ) {
10166                        my $leading_token = $tokens_to_go[$ibeg_next];
10167                        my $tokens_differ;
10168
10169                        # never indent line 1 of a '.' series because
10170                        # previous line is most likely at same level.
10171                        # TODO: we should also look at the leasing_spaces
10172                        # of the last output line and skip if it is same
10173                        # as this line.
10174                        next if ( $leading_token eq '.' );
10175
10176                        my $count = 1;
10177                        foreach my $l ( 2 .. 3 ) {
10178                            last if ( $line + $l > $max_line );
10179                            my $ibeg_next_next = $$ri_first[ $line + $l ];
10180                            if ( $tokens_to_go[$ibeg_next_next] ne
10181                                $leading_token )
10182                            {
10183                                $tokens_differ = 1;
10184                                last;
10185                            }
10186                            $count++;
10187                        }
10188                        next if ($tokens_differ);
10189                        next if ( $count < 3 && $leading_token ne ':' );
10190                        $ipad = $ibeg;
10191                    }
10192                    else {
10193                        next;
10194                    }
10195                }
10196            }
10197        }
10198
10199        # find interior token to pad if necessary
10200        if ( !defined($ipad) ) {
10201
10202            for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) {
10203
10204                # find any unclosed container
10205                next
10206                  unless ( $type_sequence_to_go[$i]
10207                    && $mate_index_to_go[$i] > $iend );
10208
10209                # find next nonblank token to pad
10210                $ipad = $i + 1;
10211                if ( $types_to_go[$ipad] eq 'b' ) {
10212                    $ipad++;
10213                    last if ( $ipad > $iend );
10214                }
10215            }
10216            last unless $ipad;
10217        }
10218
10219        # next line must not be at greater depth
10220        my $iend_next = $$ri_last[ $line + 1 ];
10221        next
10222          if ( $nesting_depth_to_go[ $iend_next + 1 ] >
10223            $nesting_depth_to_go[$ipad] );
10224
10225        # lines must be somewhat similar to be padded..
10226        my $inext_next = $ibeg_next + 1;
10227        if ( $types_to_go[$inext_next] eq 'b' ) {
10228            $inext_next++;
10229        }
10230        my $type      = $types_to_go[$ipad];
10231        my $type_next = $types_to_go[ $ipad + 1 ];
10232
10233        # see if there are multiple continuation lines
10234        my $logical_continuation_lines = 1;
10235        if ( $line + 2 <= $max_line ) {
10236            my $leading_token  = $tokens_to_go[$ibeg_next];
10237            my $ibeg_next_next = $$ri_first[ $line + 2 ];
10238            if (   $tokens_to_go[$ibeg_next_next] eq $leading_token
10239                && $nesting_depth_to_go[$ibeg_next] eq
10240                $nesting_depth_to_go[$ibeg_next_next] )
10241            {
10242                $logical_continuation_lines++;
10243            }
10244        }
10245
10246        # see if leading types match
10247        my $types_match = $types_to_go[$inext_next] eq $type;
10248        my $matches_without_bang;
10249
10250        # if first line has leading ! then compare the following token
10251        if ( !$types_match && $type eq '!' ) {
10252            $types_match = $matches_without_bang =
10253              $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ];
10254        }
10255
10256        if (
10257
10258            # either we have multiple continuation lines to follow
10259            # and we are not padding the first token
10260            ( $logical_continuation_lines > 1 && $ipad > 0 )
10261
10262            # or..
10263            || (
10264
10265                # types must match
10266                $types_match
10267
10268                # and keywords must match if keyword
10269                && !(
10270                       $type eq 'k'
10271                    && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
10272                )
10273            )
10274          )
10275        {
10276
10277            #----------------------begin special checks--------------
10278            #
10279            # SPECIAL CHECK 1:
10280            # A check is needed before we can make the pad.
10281            # If we are in a list with some long items, we want each
10282            # item to stand out.  So in the following example, the
10283            # first line begining with '$casefold->' would look good
10284            # padded to align with the next line, but then it
10285            # would be indented more than the last line, so we
10286            # won't do it.
10287            #
10288            #  ok(
10289            #      $casefold->{code}         eq '0041'
10290            #        && $casefold->{status}  eq 'C'
10291            #        && $casefold->{mapping} eq '0061',
10292            #      'casefold 0x41'
10293            #  );
10294            #
10295            # Note:
10296            # It would be faster, and almost as good, to use a comma
10297            # count, and not pad if comma_count > 1 and the previous
10298            # line did not end with a comma.
10299            #
10300            my $ok_to_pad = 1;
10301
10302            my $ibg   = $$ri_first[ $line + 1 ];
10303            my $depth = $nesting_depth_to_go[ $ibg + 1 ];
10304
10305            # just use simplified formula for leading spaces to avoid
10306            # needless sub calls
10307            my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
10308
10309            # look at each line beyond the next ..
10310            my $l = $line + 1;
10311            foreach $l ( $line + 2 .. $max_line ) {
10312                my $ibg = $$ri_first[$l];
10313
10314                # quit looking at the end of this container
10315                last
10316                  if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth )
10317                  || ( $nesting_depth_to_go[$ibg] < $depth );
10318
10319                # cannot do the pad if a later line would be
10320                # outdented more
10321                if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) {
10322                    $ok_to_pad = 0;
10323                    last;
10324                }
10325            }
10326
10327            # don't pad if we end in a broken list
10328            if ( $l == $max_line ) {
10329                my $i2 = $$ri_last[$l];
10330                if ( $types_to_go[$i2] eq '#' ) {
10331                    my $i1 = $$ri_first[$l];
10332                    next
10333                      if (
10334                        terminal_type( \@types_to_go, \@block_type_to_go, $i1,
10335                            $i2 ) eq ','
10336                      );
10337                }
10338            }
10339
10340            # SPECIAL CHECK 2:
10341            # a minus may introduce a quoted variable, and we will
10342            # add the pad only if this line begins with a bare word,
10343            # such as for the word 'Button' here:
10344            #    [
10345            #         Button      => "Print letter \"~$_\"",
10346            #        -command     => [ sub { print "$_[0]\n" }, $_ ],
10347            #        -accelerator => "Meta+$_"
10348            #    ];
10349            #
10350            #  On the other hand, if 'Button' is quoted, it looks best
10351            #  not to pad:
10352            #    [
10353            #        'Button'     => "Print letter \"~$_\"",
10354            #        -command     => [ sub { print "$_[0]\n" }, $_ ],
10355            #        -accelerator => "Meta+$_"
10356            #    ];
10357            if ( $types_to_go[$ibeg_next] eq 'm' ) {
10358                $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q';
10359            }
10360
10361            next unless $ok_to_pad;
10362
10363            #----------------------end special check---------------
10364
10365            my $length_1 = total_line_length( $ibeg,      $ipad - 1 );
10366            my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
10367            $pad_spaces = $length_2 - $length_1;
10368
10369            # If the first line has a leading ! and the second does
10370            # not, then remove one space to try to align the next
10371            # leading characters, which are often the same.  For example:
10372            #  if (  !$ts
10373            #      || $ts == $self->Holder
10374            #      || $self->Holder->Type eq "Arena" )
10375            #
10376            # This usually helps readability, but if there are subsequent
10377            # ! operators things will still get messed up.  For example:
10378            #
10379            #  if (  !exists $Net::DNS::typesbyname{$qtype}
10380            #      && exists $Net::DNS::classesbyname{$qtype}
10381            #      && !exists $Net::DNS::classesbyname{$qclass}
10382            #      && exists $Net::DNS::typesbyname{$qclass} )
10383            # We can't fix that.
10384            if ($matches_without_bang) { $pad_spaces-- }
10385
10386            # make sure this won't change if -lp is used
10387            my $indentation_1 = $leading_spaces_to_go[$ibeg];
10388            if ( ref($indentation_1) ) {
10389                if ( $indentation_1->get_RECOVERABLE_SPACES() == 0 ) {
10390                    my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
10391                    unless ( $indentation_2->get_RECOVERABLE_SPACES() == 0 ) {
10392                        $pad_spaces = 0;
10393                    }
10394                }
10395            }
10396
10397            # we might be able to handle a pad of -1 by removing a blank
10398            # token
10399            if ( $pad_spaces < 0 ) {
10400
10401                if ( $pad_spaces == -1 ) {
10402                    if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' ) {
10403                        $tokens_to_go[ $ipad - 1 ] = '';
10404                    }
10405                }
10406                $pad_spaces = 0;
10407            }
10408
10409            # now apply any padding for alignment
10410            if ( $ipad >= 0 && $pad_spaces ) {
10411
10412                my $length_t = total_line_length( $ibeg, $iend );
10413                if ( $pad_spaces + $length_t <= $rOpts_maximum_line_length ) {
10414                    $tokens_to_go[$ipad] =
10415                      ' ' x $pad_spaces . $tokens_to_go[$ipad];
10416                }
10417            }
10418        }
10419    }
10420    continue {
10421        $iendm          = $iend;
10422        $ibegm          = $ibeg;
10423        $has_leading_op = $has_leading_op_next;
10424    }    # end of loop over lines
10425    return;
10426}
10427
10428sub correct_lp_indentation {
10429
10430    # When the -lp option is used, we need to make a last pass through
10431    # each line to correct the indentation positions in case they differ
10432    # from the predictions.  This is necessary because perltidy uses a
10433    # predictor/corrector method for aligning with opening parens.  The
10434    # predictor is usually good, but sometimes stumbles.  The corrector
10435    # tries to patch things up once the actual opening paren locations
10436    # are known.
10437    my ( $ri_first, $ri_last ) = @_;
10438    my $do_not_pad = 0;
10439
10440    #  Note on flag '$do_not_pad':
10441    #  We want to avoid a situation like this, where the aligner inserts
10442    #  whitespace before the '=' to align it with a previous '=', because
10443    #  otherwise the parens might become mis-aligned in a situation like
10444    #  this, where the '=' has become aligned with the previous line,
10445    #  pushing the opening '(' forward beyond where we want it.
10446    #
10447    #  $mkFloor::currentRoom = '';
10448    #  $mkFloor::c_entry     = $c->Entry(
10449    #                                 -width        => '10',
10450    #                                 -relief       => 'sunken',
10451    #                                 ...
10452    #                                 );
10453    #
10454    #  We leave it to the aligner to decide how to do this.
10455
10456    # first remove continuation indentation if appropriate
10457    my $max_line = @$ri_first - 1;
10458
10459    # looking at each line of this batch..
10460    my ( $ibeg, $iend );
10461    my $line;
10462    foreach $line ( 0 .. $max_line ) {
10463        $ibeg = $$ri_first[$line];
10464        $iend = $$ri_last[$line];
10465
10466        # looking at each token in this output line..
10467        my $i;
10468        foreach $i ( $ibeg .. $iend ) {
10469
10470            # How many space characters to place before this token
10471            # for special alignment.  Actual padding is done in the
10472            # continue block.
10473
10474            # looking for next unvisited indentation item
10475            my $indentation = $leading_spaces_to_go[$i];
10476            if ( !$indentation->get_MARKED() ) {
10477                $indentation->set_MARKED(1);
10478
10479                # looking for indentation item for which we are aligning
10480                # with parens, braces, and brackets
10481                next unless ( $indentation->get_ALIGN_PAREN() );
10482
10483                # skip closed container on this line
10484                if ( $i > $ibeg ) {
10485                    my $im = $i - 1;
10486                    if ( $types_to_go[$im] eq 'b' && $im > $ibeg ) { $im-- }
10487                    if (   $type_sequence_to_go[$im]
10488                        && $mate_index_to_go[$im] <= $iend )
10489                    {
10490                        next;
10491                    }
10492                }
10493
10494                if ( $line == 1 && $i == $ibeg ) {
10495                    $do_not_pad = 1;
10496                }
10497
10498                # Ok, let's see what the error is and try to fix it
10499                my $actual_pos;
10500                my $predicted_pos = $indentation->get_SPACES();
10501                if ( $i > $ibeg ) {
10502
10503                    # token is mid-line - use length to previous token
10504                    $actual_pos = total_line_length( $ibeg, $i - 1 );
10505
10506                    # for mid-line token, we must check to see if all
10507                    # additional lines have continuation indentation,
10508                    # and remove it if so.  Otherwise, we do not get
10509                    # good alignment.
10510                    my $closing_index = $indentation->get_CLOSED();
10511                    if ( $closing_index > $iend ) {
10512                        my $ibeg_next = $$ri_first[ $line + 1 ];
10513                        if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
10514                            undo_lp_ci( $line, $i, $closing_index, $ri_first,
10515                                $ri_last );
10516                        }
10517                    }
10518                }
10519                elsif ( $line > 0 ) {
10520
10521                    # handle case where token starts a new line;
10522                    # use length of previous line
10523                    my $ibegm = $$ri_first[ $line - 1 ];
10524                    my $iendm = $$ri_last[ $line - 1 ];
10525                    $actual_pos = total_line_length( $ibegm, $iendm );
10526
10527                    # follow -pt style
10528                    ++$actual_pos
10529                      if ( $types_to_go[ $iendm + 1 ] eq 'b' );
10530                }
10531                else {
10532
10533                    # token is first character of first line of batch
10534                    $actual_pos = $predicted_pos;
10535                }
10536
10537                my $move_right = $actual_pos - $predicted_pos;
10538
10539                # done if no error to correct (gnu2.t)
10540                if ( $move_right == 0 ) {
10541                    $indentation->set_RECOVERABLE_SPACES($move_right);
10542                    next;
10543                }
10544
10545                # if we have not seen closure for this indentation in
10546                # this batch, we can only pass on a request to the
10547                # vertical aligner
10548                my $closing_index = $indentation->get_CLOSED();
10549
10550                if ( $closing_index < 0 ) {
10551                    $indentation->set_RECOVERABLE_SPACES($move_right);
10552                    next;
10553                }
10554
10555                # If necessary, look ahead to see if there is really any
10556                # leading whitespace dependent on this whitespace, and
10557                # also find the longest line using this whitespace.
10558                # Since it is always safe to move left if there are no
10559                # dependents, we only need to do this if we may have
10560                # dependent nodes or need to move right.
10561
10562                my $right_margin = 0;
10563                my $have_child   = $indentation->get_HAVE_CHILD();
10564
10565                my %saw_indentation;
10566                my $line_count = 1;
10567                $saw_indentation{$indentation} = $indentation;
10568
10569                if ( $have_child || $move_right > 0 ) {
10570                    $have_child = 0;
10571                    my $max_length = 0;
10572                    if ( $i == $ibeg ) {
10573                        $max_length = total_line_length( $ibeg, $iend );
10574                    }
10575
10576                    # look ahead at the rest of the lines of this batch..
10577                    my $line_t;
10578                    foreach $line_t ( $line + 1 .. $max_line ) {
10579                        my $ibeg_t = $$ri_first[$line_t];
10580                        my $iend_t = $$ri_last[$line_t];
10581                        last if ( $closing_index <= $ibeg_t );
10582
10583                        # remember all different indentation objects
10584                        my $indentation_t = $leading_spaces_to_go[$ibeg_t];
10585                        $saw_indentation{$indentation_t} = $indentation_t;
10586                        $line_count++;
10587
10588                        # remember longest line in the group
10589                        my $length_t = total_line_length( $ibeg_t, $iend_t );
10590                        if ( $length_t > $max_length ) {
10591                            $max_length = $length_t;
10592                        }
10593                    }
10594                    $right_margin = $rOpts_maximum_line_length - $max_length;
10595                    if ( $right_margin < 0 ) { $right_margin = 0 }
10596                }
10597
10598                my $first_line_comma_count =
10599                  grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
10600                my $comma_count = $indentation->get_COMMA_COUNT();
10601                my $arrow_count = $indentation->get_ARROW_COUNT();
10602
10603                # This is a simple approximate test for vertical alignment:
10604                # if we broke just after an opening paren, brace, bracket,
10605                # and there are 2 or more commas in the first line,
10606                # and there are no '=>'s,
10607                # then we are probably vertically aligned.  We could set
10608                # an exact flag in sub scan_list, but this is good
10609                # enough.
10610                my $indentation_count = keys %saw_indentation;
10611                my $is_vertically_aligned =
10612                  (      $i == $ibeg
10613                      && $first_line_comma_count > 1
10614                      && $indentation_count == 1
10615                      && ( $arrow_count == 0 || $arrow_count == $line_count ) );
10616
10617                # Make the move if possible ..
10618                if (
10619
10620                    # we can always move left
10621                    $move_right < 0
10622
10623                    # but we should only move right if we are sure it will
10624                    # not spoil vertical alignment
10625                    || ( $comma_count == 0 )
10626                    || ( $comma_count > 0 && !$is_vertically_aligned )
10627                  )
10628                {
10629                    my $move =
10630                      ( $move_right <= $right_margin )
10631                      ? $move_right
10632                      : $right_margin;
10633
10634                    foreach ( keys %saw_indentation ) {
10635                        $saw_indentation{$_}
10636                          ->permanently_decrease_AVAILABLE_SPACES( -$move );
10637                    }
10638                }
10639
10640                # Otherwise, record what we want and the vertical aligner
10641                # will try to recover it.
10642                else {
10643                    $indentation->set_RECOVERABLE_SPACES($move_right);
10644                }
10645            }
10646        }
10647    }
10648    return $do_not_pad;
10649}
10650
10651# flush is called to output any tokens in the pipeline, so that
10652# an alternate source of lines can be written in the correct order
10653
10654sub flush {
10655    destroy_one_line_block();
10656    output_line_to_go();
10657    Perl::Tidy::VerticalAligner::flush();
10658}
10659
10660sub reset_block_text_accumulator {
10661
10662    # save text after 'if' and 'elsif' to append after 'else'
10663    if ($accumulating_text_for_block) {
10664
10665        if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
10666            push @{$rleading_block_if_elsif_text}, $leading_block_text;
10667        }
10668    }
10669    $accumulating_text_for_block        = "";
10670    $leading_block_text                 = "";
10671    $leading_block_text_level           = 0;
10672    $leading_block_text_length_exceeded = 0;
10673    $leading_block_text_line_number     = 0;
10674    $leading_block_text_line_length     = 0;
10675}
10676
10677sub set_block_text_accumulator {
10678    my $i = shift;
10679    $accumulating_text_for_block = $tokens_to_go[$i];
10680    if ( $accumulating_text_for_block !~ /^els/ ) {
10681        $rleading_block_if_elsif_text = [];
10682    }
10683    $leading_block_text       = "";
10684    $leading_block_text_level = $levels_to_go[$i];
10685    $leading_block_text_line_number =
10686      $vertical_aligner_object->get_output_line_number();
10687    $leading_block_text_length_exceeded = 0;
10688
10689    # this will contain the column number of the last character
10690    # of the closing side comment
10691    $leading_block_text_line_length =
10692      length($accumulating_text_for_block) +
10693      length( $rOpts->{'closing-side-comment-prefix'} ) +
10694      $leading_block_text_level * $rOpts_indent_columns + 3;
10695}
10696
10697sub accumulate_block_text {
10698    my $i = shift;
10699
10700    # accumulate leading text for -csc, ignoring any side comments
10701    if (   $accumulating_text_for_block
10702        && !$leading_block_text_length_exceeded
10703        && $types_to_go[$i] ne '#' )
10704    {
10705
10706        my $added_length = length( $tokens_to_go[$i] );
10707        $added_length += 1 if $i == 0;
10708        my $new_line_length = $leading_block_text_line_length + $added_length;
10709
10710        # we can add this text if we don't exceed some limits..
10711        if (
10712
10713            # we must not have already exceeded the text length limit
10714            length($leading_block_text) <
10715            $rOpts_closing_side_comment_maximum_text
10716
10717            # and either:
10718            # the new total line length must be below the line length limit
10719            # or the new length must be below the text length limit
10720            # (ie, we may allow one token to exceed the text length limit)
10721            && ( $new_line_length < $rOpts_maximum_line_length
10722                || length($leading_block_text) + $added_length <
10723                $rOpts_closing_side_comment_maximum_text )
10724
10725            # UNLESS: we are adding a closing paren before the brace we seek.
10726            # This is an attempt to avoid situations where the ... to be
10727            # added are longer than the omitted right paren, as in:
10728
10729            #   foreach my $item (@a_rather_long_variable_name_here) {
10730            #      &whatever;
10731            #   } ## end foreach my $item (@a_rather_long_variable_name_here...
10732
10733            || (
10734                $tokens_to_go[$i] eq ')'
10735                && (
10736                    (
10737                           $i + 1 <= $max_index_to_go
10738                        && $block_type_to_go[ $i + 1 ] eq
10739                        $accumulating_text_for_block
10740                    )
10741                    || (   $i + 2 <= $max_index_to_go
10742                        && $block_type_to_go[ $i + 2 ] eq
10743                        $accumulating_text_for_block )
10744                )
10745            )
10746          )
10747        {
10748
10749            # add an extra space at each newline
10750            if ( $i == 0 ) { $leading_block_text .= ' ' }
10751
10752            # add the token text
10753            $leading_block_text .= $tokens_to_go[$i];
10754            $leading_block_text_line_length = $new_line_length;
10755        }
10756
10757        # show that text was truncated if necessary
10758        elsif ( $types_to_go[$i] ne 'b' ) {
10759            $leading_block_text_length_exceeded = 1;
10760            $leading_block_text .= '...';
10761        }
10762    }
10763}
10764
10765{
10766    my %is_if_elsif_else_unless_while_until_for_foreach;
10767
10768    BEGIN {
10769
10770        # These block types may have text between the keyword and opening
10771        # curly.  Note: 'else' does not, but must be included to allow trailing
10772        # if/elsif text to be appended.
10773        # patch for SWITCH/CASE: added 'case' and 'when'
10774        @_ = qw(if elsif else unless while until for foreach case when);
10775        @is_if_elsif_else_unless_while_until_for_foreach{@_} = (1) x scalar(@_);
10776    }
10777
10778    sub accumulate_csc_text {
10779
10780        # called once per output buffer when -csc is used. Accumulates
10781        # the text placed after certain closing block braces.
10782        # Defines and returns the following for this buffer:
10783
10784        my $block_leading_text = "";    # the leading text of the last '}'
10785        my $rblock_leading_if_elsif_text;
10786        my $i_block_leading_text =
10787          -1;    # index of token owning block_leading_text
10788        my $block_line_count    = 100;    # how many lines the block spans
10789        my $terminal_type       = 'b';    # type of last nonblank token
10790        my $i_terminal          = 0;      # index of last nonblank token
10791        my $terminal_block_type = "";
10792
10793        for my $i ( 0 .. $max_index_to_go ) {
10794            my $type       = $types_to_go[$i];
10795            my $block_type = $block_type_to_go[$i];
10796            my $token      = $tokens_to_go[$i];
10797
10798            # remember last nonblank token type
10799            if ( $type ne '#' && $type ne 'b' ) {
10800                $terminal_type       = $type;
10801                $terminal_block_type = $block_type;
10802                $i_terminal          = $i;
10803            }
10804
10805            my $type_sequence = $type_sequence_to_go[$i];
10806            if ( $block_type && $type_sequence ) {
10807
10808                if ( $token eq '}' ) {
10809
10810                    # restore any leading text saved when we entered this block
10811                    if ( defined( $block_leading_text{$type_sequence} ) ) {
10812                        ( $block_leading_text, $rblock_leading_if_elsif_text ) =
10813                          @{ $block_leading_text{$type_sequence} };
10814                        $i_block_leading_text = $i;
10815                        delete $block_leading_text{$type_sequence};
10816                        $rleading_block_if_elsif_text =
10817                          $rblock_leading_if_elsif_text;
10818                    }
10819
10820                    # if we run into a '}' then we probably started accumulating
10821                    # at something like a trailing 'if' clause..no harm done.
10822                    if (   $accumulating_text_for_block
10823                        && $levels_to_go[$i] <= $leading_block_text_level )
10824                    {
10825                        my $lev = $levels_to_go[$i];
10826                        reset_block_text_accumulator();
10827                    }
10828
10829                    if ( defined( $block_opening_line_number{$type_sequence} ) )
10830                    {
10831                        my $output_line_number =
10832                          $vertical_aligner_object->get_output_line_number();
10833                        $block_line_count =
10834                          $output_line_number -
10835                          $block_opening_line_number{$type_sequence} + 1;
10836                        delete $block_opening_line_number{$type_sequence};
10837                    }
10838                    else {
10839
10840                        # Error: block opening line undefined for this line..
10841                        # This shouldn't be possible, but it is not a
10842                        # significant problem.
10843                    }
10844                }
10845
10846                elsif ( $token eq '{' ) {
10847
10848                    my $line_number =
10849                      $vertical_aligner_object->get_output_line_number();
10850                    $block_opening_line_number{$type_sequence} = $line_number;
10851
10852                    if (   $accumulating_text_for_block
10853                        && $levels_to_go[$i] == $leading_block_text_level )
10854                    {
10855
10856                        if ( $accumulating_text_for_block eq $block_type ) {
10857
10858                            # save any leading text before we enter this block
10859                            $block_leading_text{$type_sequence} = [
10860                                $leading_block_text,
10861                                $rleading_block_if_elsif_text
10862                            ];
10863                            $block_opening_line_number{$type_sequence} =
10864                              $leading_block_text_line_number;
10865                            reset_block_text_accumulator();
10866                        }
10867                        else {
10868
10869                            # shouldn't happen, but not a serious error.
10870                            # We were accumulating -csc text for block type
10871                            # $accumulating_text_for_block and unexpectedly
10872                            # encountered a '{' for block type $block_type.
10873                        }
10874                    }
10875                }
10876            }
10877
10878            if (   $type eq 'k'
10879                && $csc_new_statement_ok
10880                && $is_if_elsif_else_unless_while_until_for_foreach{$token}
10881                && $token =~ /$closing_side_comment_list_pattern/o )
10882            {
10883                set_block_text_accumulator($i);
10884            }
10885            else {
10886
10887                # note: ignoring type 'q' because of tricks being played
10888                # with 'q' for hanging side comments
10889                if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {
10890                    $csc_new_statement_ok =
10891                      ( $block_type || $type eq 'J' || $type eq ';' );
10892                }
10893                if (   $type eq ';'
10894                    && $accumulating_text_for_block
10895                    && $levels_to_go[$i] == $leading_block_text_level )
10896                {
10897                    reset_block_text_accumulator();
10898                }
10899                else {
10900                    accumulate_block_text($i);
10901                }
10902            }
10903        }
10904
10905        # Treat an 'else' block specially by adding preceding 'if' and
10906        # 'elsif' text.  Otherwise, the 'end else' is not helpful,
10907        # especially for cuddled-else formatting.
10908        if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) {
10909            $block_leading_text =
10910              make_else_csc_text( $i_terminal, $terminal_block_type,
10911                $block_leading_text, $rblock_leading_if_elsif_text );
10912        }
10913
10914        return ( $terminal_type, $i_terminal, $i_block_leading_text,
10915            $block_leading_text, $block_line_count );
10916    }
10917}
10918
10919sub make_else_csc_text {
10920
10921    # create additional -csc text for an 'else' and optionally 'elsif',
10922    # depending on the value of switch
10923    # $rOpts_closing_side_comment_else_flag:
10924    #
10925    #  = 0 add 'if' text to trailing else
10926    #  = 1 same as 0 plus:
10927    #      add 'if' to 'elsif's if can fit in line length
10928    #      add last 'elsif' to trailing else if can fit in one line
10929    #  = 2 same as 1 but do not check if exceed line length
10930    #
10931    # $rif_elsif_text = a reference to a list of all previous closing
10932    # side comments created for this if block
10933    #
10934    my ( $i_terminal, $block_type, $block_leading_text, $rif_elsif_text ) = @_;
10935    my $csc_text = $block_leading_text;
10936
10937    if ( $block_type eq 'elsif' && $rOpts_closing_side_comment_else_flag == 0 )
10938    {
10939        return $csc_text;
10940    }
10941
10942    my $count = @{$rif_elsif_text};
10943    return $csc_text unless ($count);
10944
10945    my $if_text = '[ if' . $rif_elsif_text->[0];
10946
10947    # always show the leading 'if' text on 'else'
10948    if ( $block_type eq 'else' ) {
10949        $csc_text .= $if_text;
10950    }
10951
10952    # see if that's all
10953    if ( $rOpts_closing_side_comment_else_flag == 0 ) {
10954        return $csc_text;
10955    }
10956
10957    my $last_elsif_text = "";
10958    if ( $count > 1 ) {
10959        $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ];
10960        if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; }
10961    }
10962
10963    # tentatively append one more item
10964    my $saved_text = $csc_text;
10965    if ( $block_type eq 'else' ) {
10966        $csc_text .= $last_elsif_text;
10967    }
10968    else {
10969        $csc_text .= ' ' . $if_text;
10970    }
10971
10972    # all done if no length checks requested
10973    if ( $rOpts_closing_side_comment_else_flag == 2 ) {
10974        return $csc_text;
10975    }
10976
10977    # undo it if line length exceeded
10978    my $length =
10979      length($csc_text) +
10980      length($block_type) +
10981      length( $rOpts->{'closing-side-comment-prefix'} ) +
10982      $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
10983    if ( $length > $rOpts_maximum_line_length ) {
10984        $csc_text = $saved_text;
10985    }
10986    return $csc_text;
10987}
10988
10989{    # sub balance_csc_text
10990
10991    my %matching_char;
10992
10993    BEGIN {
10994        %matching_char = (
10995            '{' => '}',
10996            '(' => ')',
10997            '[' => ']',
10998            '}' => '{',
10999            ')' => '(',
11000            ']' => '[',
11001        );
11002    }
11003
11004    sub balance_csc_text {
11005
11006        # Append characters to balance a closing side comment so that editors
11007        # such as vim can correctly jump through code.
11008        # Simple Example:
11009        #  input  = ## end foreach my $foo ( sort { $b  ...
11010        #  output = ## end foreach my $foo ( sort { $b  ...})
11011
11012        # NOTE: This routine does not currently filter out structures within
11013        # quoted text because the bounce algorithims in text editors do not
11014        # necessarily do this either (a version of vim was checked and
11015        # did not do this).
11016
11017        # Some complex examples which will cause trouble for some editors:
11018        #  while ( $mask_string =~ /\{[^{]*?\}/g ) {
11019        #  if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) {
11020        #  if ( $1 eq '{' ) {
11021        # test file test1/braces.pl has many such examples.
11022
11023        my ($csc) = @_;
11024
11025        # loop to examine characters one-by-one, RIGHT to LEFT and
11026        # build a balancing ending, LEFT to RIGHT.
11027        for ( my $pos = length($csc) - 1 ; $pos >= 0 ; $pos-- ) {
11028
11029            my $char = substr( $csc, $pos, 1 );
11030
11031            # ignore everything except structural characters
11032            next unless ( $matching_char{$char} );
11033
11034            # pop most recently appended character
11035            my $top = chop($csc);
11036
11037            # push it back plus the mate to the newest character
11038            # unless they balance each other.
11039            $csc = $csc . $top . $matching_char{$char} unless $top eq $char;
11040        }
11041
11042        # return the balanced string
11043        return $csc;
11044    }
11045}
11046
11047sub add_closing_side_comment {
11048
11049    # add closing side comments after closing block braces if -csc used
11050    my $cscw_block_comment;
11051
11052    #---------------------------------------------------------------
11053    # Step 1: loop through all tokens of this line to accumulate
11054    # the text needed to create the closing side comments. Also see
11055    # how the line ends.
11056    #---------------------------------------------------------------
11057
11058    my ( $terminal_type, $i_terminal, $i_block_leading_text,
11059        $block_leading_text, $block_line_count )
11060      = accumulate_csc_text();
11061
11062    #---------------------------------------------------------------
11063    # Step 2: make the closing side comment if this ends a block
11064    #---------------------------------------------------------------
11065    my $have_side_comment = $i_terminal != $max_index_to_go;
11066
11067    # if this line might end in a block closure..
11068    if (
11069        $terminal_type eq '}'
11070
11071        # ..and either
11072        && (
11073
11074            # the block is long enough
11075            ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )
11076
11077            # or there is an existing comment to check
11078            || (   $have_side_comment
11079                && $rOpts->{'closing-side-comment-warnings'} )
11080        )
11081
11082        # .. and if this is one of the types of interest
11083        && $block_type_to_go[$i_terminal] =~
11084        /$closing_side_comment_list_pattern/o
11085
11086        # .. but not an anonymous sub
11087        # These are not normally of interest, and their closing braces are
11088        # often followed by commas or semicolons anyway.  This also avoids
11089        # possible erratic output due to line numbering inconsistencies
11090        # in the cases where their closing braces terminate a line.
11091        && $block_type_to_go[$i_terminal] ne 'sub'
11092
11093        # ..and the corresponding opening brace must is not in this batch
11094        # (because we do not need to tag one-line blocks, although this
11095        # should also be caught with a positive -csci value)
11096        && $mate_index_to_go[$i_terminal] < 0
11097
11098        # ..and either
11099        && (
11100
11101            # this is the last token (line doesnt have a side comment)
11102            !$have_side_comment
11103
11104            # or the old side comment is a closing side comment
11105            || $tokens_to_go[$max_index_to_go] =~
11106            /$closing_side_comment_prefix_pattern/o
11107        )
11108      )
11109    {
11110
11111        # then make the closing side comment text
11112        my $token =
11113"$rOpts->{'closing-side-comment-prefix'} $block_type_to_go[$i_terminal]";
11114
11115        # append any extra descriptive text collected above
11116        if ( $i_block_leading_text == $i_terminal ) {
11117            $token .= $block_leading_text;
11118        }
11119
11120        $token = balance_csc_text($token)
11121          if $rOpts->{'closing-side-comments-balanced'};
11122
11123        $token =~ s/\s*$//;    # trim any trailing whitespace
11124
11125        # handle case of existing closing side comment
11126        if ($have_side_comment) {
11127
11128            # warn if requested and tokens differ significantly
11129            if ( $rOpts->{'closing-side-comment-warnings'} ) {
11130                my $old_csc = $tokens_to_go[$max_index_to_go];
11131                my $new_csc = $token;
11132                $new_csc =~ s/\s+//g;            # trim all whitespace
11133                $old_csc =~ s/\s+//g;            # trim all whitespace
11134                $new_csc =~ s/[\]\)\}\s]*$//;    # trim trailing structures
11135                $old_csc =~ s/[\]\)\}\s]*$//;    # trim trailing structures
11136                $new_csc =~ s/(\.\.\.)$//;       # trim trailing '...'
11137                my $new_trailing_dots = $1;
11138                $old_csc =~ s/(\.\.\.)\s*$//;    # trim trailing '...'
11139
11140                # Patch to handle multiple closing side comments at
11141                # else and elsif's.  These have become too complicated
11142                # to check, so if we see an indication of
11143                # '[ if' or '[ # elsif', then assume they were made
11144                # by perltidy.
11145                if ( $block_type_to_go[$i_terminal] eq 'else' ) {
11146                    if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc }
11147                }
11148                elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) {
11149                    if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc }
11150                }
11151
11152                # if old comment is contained in new comment,
11153                # only compare the common part.
11154                if ( length($new_csc) > length($old_csc) ) {
11155                    $new_csc = substr( $new_csc, 0, length($old_csc) );
11156                }
11157
11158                # if the new comment is shorter and has been limited,
11159                # only compare the common part.
11160                if ( length($new_csc) < length($old_csc) && $new_trailing_dots )
11161                {
11162                    $old_csc = substr( $old_csc, 0, length($new_csc) );
11163                }
11164
11165                # any remaining difference?
11166                if ( $new_csc ne $old_csc ) {
11167
11168                    # just leave the old comment if we are below the threshold
11169                    # for creating side comments
11170                    if ( $block_line_count <
11171                        $rOpts->{'closing-side-comment-interval'} )
11172                    {
11173                        $token = undef;
11174                    }
11175
11176                    # otherwise we'll make a note of it
11177                    else {
11178
11179                        warning(
11180"perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n"
11181                        );
11182
11183                     # save the old side comment in a new trailing block comment
11184                        my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
11185                        $year  += 1900;
11186                        $month += 1;
11187                        $cscw_block_comment =
11188"## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]";
11189                    }
11190                }
11191                else {
11192
11193                    # No differences.. we can safely delete old comment if we
11194                    # are below the threshold
11195                    if ( $block_line_count <
11196                        $rOpts->{'closing-side-comment-interval'} )
11197                    {
11198                        $token = undef;
11199                        unstore_token_to_go()
11200                          if ( $types_to_go[$max_index_to_go] eq '#' );
11201                        unstore_token_to_go()
11202                          if ( $types_to_go[$max_index_to_go] eq 'b' );
11203                    }
11204                }
11205            }
11206
11207            # switch to the new csc (unless we deleted it!)
11208            $tokens_to_go[$max_index_to_go] = $token if $token;
11209        }
11210
11211        # handle case of NO existing closing side comment
11212        else {
11213
11214            # insert the new side comment into the output token stream
11215            my $type          = '#';
11216            my $block_type    = '';
11217            my $type_sequence = '';
11218            my $container_environment =
11219              $container_environment_to_go[$max_index_to_go];
11220            my $level                = $levels_to_go[$max_index_to_go];
11221            my $slevel               = $nesting_depth_to_go[$max_index_to_go];
11222            my $no_internal_newlines = 0;
11223
11224            my $nesting_blocks     = $nesting_blocks_to_go[$max_index_to_go];
11225            my $ci_level           = $ci_levels_to_go[$max_index_to_go];
11226            my $in_continued_quote = 0;
11227
11228            # first insert a blank token
11229            insert_new_token_to_go( ' ', 'b', $slevel, $no_internal_newlines );
11230
11231            # then the side comment
11232            insert_new_token_to_go( $token, $type, $slevel,
11233                $no_internal_newlines );
11234        }
11235    }
11236    return $cscw_block_comment;
11237}
11238
11239sub previous_nonblank_token {
11240    my ($i)  = @_;
11241    my $name = "";
11242    my $im   = $i - 1;
11243    return "" if ( $im < 0 );
11244    if ( $types_to_go[$im] eq 'b' ) { $im--; }
11245    return "" if ( $im < 0 );
11246    $name = $tokens_to_go[$im];
11247
11248    # prepend any sub name to an isolated -> to avoid unwanted alignments
11249    # [test case is test8/penco.pl]
11250    if ( $name eq '->' ) {
11251        $im--;
11252        if ( $im >= 0 && $types_to_go[$im] ne 'b' ) {
11253            $name = $tokens_to_go[$im] . $name;
11254        }
11255    }
11256    return $name;
11257}
11258
11259sub send_lines_to_vertical_aligner {
11260
11261    my ( $ri_first, $ri_last, $do_not_pad ) = @_;
11262
11263    my $rindentation_list = [0];    # ref to indentations for each line
11264
11265    # define the array @matching_token_to_go for the output tokens
11266    # which will be non-blank for each special token (such as =>)
11267    # for which alignment is required.
11268    set_vertical_alignment_markers( $ri_first, $ri_last );
11269
11270    # flush if necessary to avoid unwanted alignment
11271    my $must_flush = 0;
11272    if ( @$ri_first > 1 ) {
11273
11274        # flush before a long if statement
11275        if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] =~ /^(if|unless)$/ ) {
11276            $must_flush = 1;
11277        }
11278    }
11279    if ($must_flush) {
11280        Perl::Tidy::VerticalAligner::flush();
11281    }
11282
11283    undo_ci( $ri_first, $ri_last );
11284
11285    set_logical_padding( $ri_first, $ri_last );
11286
11287    # loop to prepare each line for shipment
11288    my $n_last_line = @$ri_first - 1;
11289    my $in_comma_list;
11290    for my $n ( 0 .. $n_last_line ) {
11291        my $ibeg = $$ri_first[$n];
11292        my $iend = $$ri_last[$n];
11293
11294        my ( $rtokens, $rfields, $rpatterns ) =
11295          make_alignment_patterns( $ibeg, $iend );
11296
11297        my ( $indentation, $lev, $level_end, $terminal_type,
11298            $is_semicolon_terminated, $is_outdented_line )
11299          = set_adjusted_indentation( $ibeg, $iend, $rfields, $rpatterns,
11300            $ri_first, $ri_last, $rindentation_list );
11301
11302        # we will allow outdenting of long lines..
11303        my $outdent_long_lines = (
11304
11305            # which are long quotes, if allowed
11306            ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} )
11307
11308            # which are long block comments, if allowed
11309              || (
11310                   $types_to_go[$ibeg] eq '#'
11311                && $rOpts->{'outdent-long-comments'}
11312
11313                # but not if this is a static block comment
11314                && !$is_static_block_comment
11315              )
11316        );
11317
11318        my $level_jump =
11319          $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg];
11320
11321        my $rvertical_tightness_flags =
11322          set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
11323            $ri_first, $ri_last );
11324
11325        # flush an outdented line to avoid any unwanted vertical alignment
11326        Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
11327
11328        my $is_terminal_ternary = 0;
11329        if (   $tokens_to_go[$ibeg] eq ':'
11330            || $n > 0 && $tokens_to_go[ $$ri_last[ $n - 1 ] ] eq ':' )
11331        {
11332            if (   ( $terminal_type eq ';' && $level_end <= $lev )
11333                || ( $level_end < $lev ) )
11334            {
11335                $is_terminal_ternary = 1;
11336            }
11337        }
11338
11339        # send this new line down the pipe
11340        my $forced_breakpoint = $forced_breakpoint_to_go[$iend];
11341        Perl::Tidy::VerticalAligner::append_line(
11342            $lev,
11343            $level_end,
11344            $indentation,
11345            $rfields,
11346            $rtokens,
11347            $rpatterns,
11348            $forced_breakpoint_to_go[$iend] || $in_comma_list,
11349            $outdent_long_lines,
11350            $is_terminal_ternary,
11351            $is_semicolon_terminated,
11352            $do_not_pad,
11353            $rvertical_tightness_flags,
11354            $level_jump,
11355        );
11356        $in_comma_list =
11357          $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend];
11358
11359        # flush an outdented line to avoid any unwanted vertical alignment
11360        Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
11361
11362        $do_not_pad = 0;
11363
11364    }    # end of loop to output each line
11365
11366    # remember indentation of lines containing opening containers for
11367    # later use by sub set_adjusted_indentation
11368    save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
11369}
11370
11371{        # begin make_alignment_patterns
11372
11373    my %block_type_map;
11374    my %keyword_map;
11375
11376    BEGIN {
11377
11378        # map related block names into a common name to
11379        # allow alignment
11380        %block_type_map = (
11381            'unless'  => 'if',
11382            'else'    => 'if',
11383            'elsif'   => 'if',
11384            'when'    => 'if',
11385            'default' => 'if',
11386            'case'    => 'if',
11387            'sort'    => 'map',
11388            'grep'    => 'map',
11389        );
11390
11391        # map certain keywords to the same 'if' class to align
11392        # long if/elsif sequences. [elsif.pl]
11393        %keyword_map = (
11394            'unless'  => 'if',
11395            'else'    => 'if',
11396            'elsif'   => 'if',
11397            'when'    => 'given',
11398            'default' => 'given',
11399            'case'    => 'switch',
11400
11401            # treat an 'undef' similar to numbers and quotes
11402            'undef' => 'Q',
11403        );
11404    }
11405
11406    sub make_alignment_patterns {
11407
11408        # Here we do some important preliminary work for the
11409        # vertical aligner.  We create three arrays for one
11410        # output line. These arrays contain strings that can
11411        # be tested by the vertical aligner to see if
11412        # consecutive lines can be aligned vertically.
11413        #
11414        # The three arrays are indexed on the vertical
11415        # alignment fields and are:
11416        # @tokens - a list of any vertical alignment tokens for this line.
11417        #   These are tokens, such as '=' '&&' '#' etc which
11418        #   we want to might align vertically.  These are
11419        #   decorated with various information such as
11420        #   nesting depth to prevent unwanted vertical
11421        #   alignment matches.
11422        # @fields - the actual text of the line between the vertical alignment
11423        #   tokens.
11424        # @patterns - a modified list of token types, one for each alignment
11425        #   field.  These should normally each match before alignment is
11426        #   allowed, even when the alignment tokens match.
11427        my ( $ibeg, $iend ) = @_;
11428        my @tokens   = ();
11429        my @fields   = ();
11430        my @patterns = ();
11431        my $i_start  = $ibeg;
11432        my $i;
11433
11434        my $depth                 = 0;
11435        my @container_name        = ("");
11436        my @multiple_comma_arrows = (undef);
11437
11438        my $j = 0;    # field index
11439
11440        $patterns[0] = "";
11441        for $i ( $ibeg .. $iend ) {
11442
11443            # Keep track of containers balanced on this line only.
11444            # These are used below to prevent unwanted cross-line alignments.
11445            # Unbalanced containers already avoid aligning across
11446            # container boundaries.
11447            if ( $tokens_to_go[$i] eq '(' ) {
11448
11449                # if container is balanced on this line...
11450                my $i_mate = $mate_index_to_go[$i];
11451                if ( $i_mate > $i && $i_mate <= $iend ) {
11452                    $depth++;
11453                    my $seqno = $type_sequence_to_go[$i];
11454                    my $count = comma_arrow_count($seqno);
11455                    $multiple_comma_arrows[$depth] = $count && $count > 1;
11456
11457                    # Append the previous token name to make the container name
11458                    # more unique.  This name will also be given to any commas
11459                    # within this container, and it helps avoid undesirable
11460                    # alignments of different types of containers.
11461                    my $name = previous_nonblank_token($i);
11462                    $name =~ s/^->//;
11463                    $container_name[$depth] = "+" . $name;
11464
11465                    # Make the container name even more unique if necessary.
11466                    # If we are not vertically aligning this opening paren,
11467                    # append a character count to avoid bad alignment because
11468                    # it usually looks bad to align commas within continers
11469                    # for which the opening parens do not align.  Here
11470                    # is an example very BAD alignment of commas (because
11471                    # the atan2 functions are not all aligned):
11472                    #    $XY =
11473                    #      $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) +
11474                    #      $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) -
11475                    #      $X * atan2( $X,            1 ) -
11476                    #      $Y * atan2( $Y,            1 );
11477                    #
11478                    # On the other hand, it is usually okay to align commas if
11479                    # opening parens align, such as:
11480                    #    glVertex3d( $cx + $s * $xs, $cy,            $z );
11481                    #    glVertex3d( $cx,            $cy + $s * $ys, $z );
11482                    #    glVertex3d( $cx - $s * $xs, $cy,            $z );
11483                    #    glVertex3d( $cx,            $cy - $s * $ys, $z );
11484                    #
11485                    # To distinguish between these situations, we will
11486                    # append the length of the line from the previous matching
11487                    # token, or beginning of line, to the function name.  This
11488                    # will allow the vertical aligner to reject undesirable
11489                    # matches.
11490
11491                    # if we are not aligning on this paren...
11492                    if ( $matching_token_to_go[$i] eq '' ) {
11493
11494                        # Sum length from previous alignment, or start of line.
11495                        # Note that we have to sum token lengths here because
11496                        # padding has been done and so array $lengths_to_go
11497                        # is now wrong.
11498                        my $len =
11499                          length(
11500                            join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
11501                        $len += leading_spaces_to_go($i_start)
11502                          if ( $i_start == $ibeg );
11503
11504                        # tack length onto the container name to make unique
11505                        $container_name[$depth] .= "-" . $len;
11506                    }
11507                }
11508            }
11509            elsif ( $tokens_to_go[$i] eq ')' ) {
11510                $depth-- if $depth > 0;
11511            }
11512
11513            # if we find a new synchronization token, we are done with
11514            # a field
11515            if ( $i > $i_start && $matching_token_to_go[$i] ne '' ) {
11516
11517                my $tok = my $raw_tok = $matching_token_to_go[$i];
11518
11519                # make separators in different nesting depths unique
11520                # by appending the nesting depth digit.
11521                if ( $raw_tok ne '#' ) {
11522                    $tok .= "$nesting_depth_to_go[$i]";
11523                }
11524
11525                # also decorate commas with any container name to avoid
11526                # unwanted cross-line alignments.
11527                if ( $raw_tok eq ',' || $raw_tok eq '=>' ) {
11528                    if ( $container_name[$depth] ) {
11529                        $tok .= $container_name[$depth];
11530                    }
11531                }
11532
11533                # Patch to avoid aligning leading and trailing if, unless.
11534                # Mark trailing if, unless statements with container names.
11535                # This makes them different from leading if, unless which
11536                # are not so marked at present.  If we ever need to name
11537                # them too, we could use ci to distinguish them.
11538                # Example problem to avoid:
11539                #    return ( 2, "DBERROR" )
11540                #      if ( $retval == 2 );
11541                #    if   ( scalar @_ ) {
11542                #        my ( $a, $b, $c, $d, $e, $f ) = @_;
11543                #    }
11544                if ( $raw_tok eq '(' ) {
11545                    my $ci = $ci_levels_to_go[$ibeg];
11546                    if (   $container_name[$depth] =~ /^\+(if|unless)/
11547                        && $ci )
11548                    {
11549                        $tok .= $container_name[$depth];
11550                    }
11551                }
11552
11553                # Decorate block braces with block types to avoid
11554                # unwanted alignments such as the following:
11555                # foreach ( @{$routput_array} ) { $fh->print($_) }
11556                # eval                          { $fh->close() };
11557                if ( $raw_tok eq '{' && $block_type_to_go[$i] ) {
11558                    my $block_type = $block_type_to_go[$i];
11559
11560                    # map certain related block types to allow
11561                    # else blocks to align
11562                    $block_type = $block_type_map{$block_type}
11563                      if ( defined( $block_type_map{$block_type} ) );
11564
11565                    # remove sub names to allow one-line sub braces to align
11566                    # regardless of name
11567                    if ( $block_type =~ /^sub / ) { $block_type = 'sub' }
11568
11569                    # allow all control-type blocks to align
11570                    if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' }
11571
11572                    $tok .= $block_type;
11573                }
11574
11575                # concatenate the text of the consecutive tokens to form
11576                # the field
11577                push( @fields,
11578                    join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
11579
11580                # store the alignment token for this field
11581                push( @tokens, $tok );
11582
11583                # get ready for the next batch
11584                $i_start = $i;
11585                $j++;
11586                $patterns[$j] = "";
11587            }
11588
11589            # continue accumulating tokens
11590            # handle non-keywords..
11591            if ( $types_to_go[$i] ne 'k' ) {
11592                my $type = $types_to_go[$i];
11593
11594                # Mark most things before arrows as a quote to
11595                # get them to line up. Testfile: mixed.pl.
11596                if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) {
11597                    my $next_type = $types_to_go[ $i + 1 ];
11598                    my $i_next_nonblank =
11599                      ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
11600
11601                    if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
11602                        $type = 'Q';
11603
11604                        # Patch to ignore leading minus before words,
11605                        # by changing pattern 'mQ' into just 'Q',
11606                        # so that we can align things like this:
11607                        #  Button   => "Print letter \"~$_\"",
11608                        #  -command => [ sub { print "$_[0]\n" }, $_ ],
11609                        if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" }
11610                    }
11611                }
11612
11613                # patch to make numbers and quotes align
11614                if ( $type eq 'n' ) { $type = 'Q' }
11615
11616                # patch to ignore any ! in patterns
11617                if ( $type eq '!' ) { $type = '' }
11618
11619                $patterns[$j] .= $type;
11620            }
11621
11622            # for keywords we have to use the actual text
11623            else {
11624
11625                my $tok = $tokens_to_go[$i];
11626
11627                # but map certain keywords to a common string to allow
11628                # alignment.
11629                $tok = $keyword_map{$tok}
11630                  if ( defined( $keyword_map{$tok} ) );
11631                $patterns[$j] .= $tok;
11632            }
11633        }
11634
11635        # done with this line .. join text of tokens to make the last field
11636        push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
11637        return ( \@tokens, \@fields, \@patterns );
11638    }
11639
11640}    # end make_alignment_patterns
11641
11642{    # begin unmatched_indexes
11643
11644    # closure to keep track of unbalanced containers.
11645    # arrays shared by the routines in this block:
11646    my @unmatched_opening_indexes_in_this_batch;
11647    my @unmatched_closing_indexes_in_this_batch;
11648    my %comma_arrow_count;
11649
11650    sub is_unbalanced_batch {
11651        @unmatched_opening_indexes_in_this_batch +
11652          @unmatched_closing_indexes_in_this_batch;
11653    }
11654
11655    sub comma_arrow_count {
11656        my $seqno = $_[0];
11657        return $comma_arrow_count{$seqno};
11658    }
11659
11660    sub match_opening_and_closing_tokens {
11661
11662        # Match up indexes of opening and closing braces, etc, in this batch.
11663        # This has to be done after all tokens are stored because unstoring
11664        # of tokens would otherwise cause trouble.
11665
11666        @unmatched_opening_indexes_in_this_batch = ();
11667        @unmatched_closing_indexes_in_this_batch = ();
11668        %comma_arrow_count                       = ();
11669
11670        my ( $i, $i_mate, $token );
11671        foreach $i ( 0 .. $max_index_to_go ) {
11672            if ( $type_sequence_to_go[$i] ) {
11673                $token = $tokens_to_go[$i];
11674                if ( $token =~ /^[\(\[\{\?]$/ ) {
11675                    push @unmatched_opening_indexes_in_this_batch, $i;
11676                }
11677                elsif ( $token =~ /^[\)\]\}\:]$/ ) {
11678
11679                    $i_mate = pop @unmatched_opening_indexes_in_this_batch;
11680                    if ( defined($i_mate) && $i_mate >= 0 ) {
11681                        if ( $type_sequence_to_go[$i_mate] ==
11682                            $type_sequence_to_go[$i] )
11683                        {
11684                            $mate_index_to_go[$i]      = $i_mate;
11685                            $mate_index_to_go[$i_mate] = $i;
11686                        }
11687                        else {
11688                            push @unmatched_opening_indexes_in_this_batch,
11689                              $i_mate;
11690                            push @unmatched_closing_indexes_in_this_batch, $i;
11691                        }
11692                    }
11693                    else {
11694                        push @unmatched_closing_indexes_in_this_batch, $i;
11695                    }
11696                }
11697            }
11698            elsif ( $tokens_to_go[$i] eq '=>' ) {
11699                if (@unmatched_opening_indexes_in_this_batch) {
11700                    my $j     = $unmatched_opening_indexes_in_this_batch[-1];
11701                    my $seqno = $type_sequence_to_go[$j];
11702                    $comma_arrow_count{$seqno}++;
11703                }
11704            }
11705        }
11706    }
11707
11708    sub save_opening_indentation {
11709
11710        # This should be called after each batch of tokens is output. It
11711        # saves indentations of lines of all unmatched opening tokens.
11712        # These will be used by sub get_opening_indentation.
11713
11714        my ( $ri_first, $ri_last, $rindentation_list ) = @_;
11715
11716        # we no longer need indentations of any saved indentations which
11717        # are unmatched closing tokens in this batch, because we will
11718        # never encounter them again.  So we can delete them to keep
11719        # the hash size down.
11720        foreach (@unmatched_closing_indexes_in_this_batch) {
11721            my $seqno = $type_sequence_to_go[$_];
11722            delete $saved_opening_indentation{$seqno};
11723        }
11724
11725        # we need to save indentations of any unmatched opening tokens
11726        # in this batch because we may need them in a subsequent batch.
11727        foreach (@unmatched_opening_indexes_in_this_batch) {
11728            my $seqno = $type_sequence_to_go[$_];
11729            $saved_opening_indentation{$seqno} = [
11730                lookup_opening_indentation(
11731                    $_, $ri_first, $ri_last, $rindentation_list
11732                )
11733            ];
11734        }
11735    }
11736}    # end unmatched_indexes
11737
11738sub get_opening_indentation {
11739
11740    # get the indentation of the line which output the opening token
11741    # corresponding to a given closing token in the current output batch.
11742    #
11743    # given:
11744    # $i_closing - index in this line of a closing token ')' '}' or ']'
11745    #
11746    # $ri_first - reference to list of the first index $i for each output
11747    #               line in this batch
11748    # $ri_last - reference to list of the last index $i for each output line
11749    #              in this batch
11750    # $rindentation_list - reference to a list containing the indentation
11751    #            used for each line.
11752    #
11753    # return:
11754    #   -the indentation of the line which contained the opening token
11755    #    which matches the token at index $i_opening
11756    #   -and its offset (number of columns) from the start of the line
11757    #
11758    my ( $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_;
11759
11760    # first, see if the opening token is in the current batch
11761    my $i_opening = $mate_index_to_go[$i_closing];
11762    my ( $indent, $offset, $is_leading, $exists );
11763    $exists = 1;
11764    if ( $i_opening >= 0 ) {
11765
11766        # it is..look up the indentation
11767        ( $indent, $offset, $is_leading ) =
11768          lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
11769            $rindentation_list );
11770    }
11771
11772    # if not, it should have been stored in the hash by a previous batch
11773    else {
11774        my $seqno = $type_sequence_to_go[$i_closing];
11775        if ($seqno) {
11776            if ( $saved_opening_indentation{$seqno} ) {
11777                ( $indent, $offset, $is_leading ) =
11778                  @{ $saved_opening_indentation{$seqno} };
11779            }
11780
11781            # some kind of serious error
11782            # (example is badfile.t)
11783            else {
11784                $indent     = 0;
11785                $offset     = 0;
11786                $is_leading = 0;
11787                $exists     = 0;
11788            }
11789        }
11790
11791        # if no sequence number it must be an unbalanced container
11792        else {
11793            $indent     = 0;
11794            $offset     = 0;
11795            $is_leading = 0;
11796            $exists     = 0;
11797        }
11798    }
11799    return ( $indent, $offset, $is_leading, $exists );
11800}
11801
11802sub lookup_opening_indentation {
11803
11804    # get the indentation of the line in the current output batch
11805    # which output a selected opening token
11806    #
11807    # given:
11808    #   $i_opening - index of an opening token in the current output batch
11809    #                whose line indentation we need
11810    #   $ri_first - reference to list of the first index $i for each output
11811    #               line in this batch
11812    #   $ri_last - reference to list of the last index $i for each output line
11813    #              in this batch
11814    #   $rindentation_list - reference to a list containing the indentation
11815    #            used for each line.  (NOTE: the first slot in
11816    #            this list is the last returned line number, and this is
11817    #            followed by the list of indentations).
11818    #
11819    # return
11820    #   -the indentation of the line which contained token $i_opening
11821    #   -and its offset (number of columns) from the start of the line
11822
11823    my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
11824
11825    my $nline = $rindentation_list->[0];    # line number of previous lookup
11826
11827    # reset line location if necessary
11828    $nline = 0 if ( $i_opening < $ri_start->[$nline] );
11829
11830    # find the correct line
11831    unless ( $i_opening > $ri_last->[-1] ) {
11832        while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
11833    }
11834
11835    # error - token index is out of bounds - shouldn't happen
11836    else {
11837        warning(
11838"non-fatal program bug in lookup_opening_indentation - index out of range\n"
11839        );
11840        report_definite_bug();
11841        $nline = $#{$ri_last};
11842    }
11843
11844    $rindentation_list->[0] =
11845      $nline;    # save line number to start looking next call
11846    my $ibeg       = $ri_start->[$nline];
11847    my $offset     = token_sequence_length( $ibeg, $i_opening ) - 1;
11848    my $is_leading = ( $ibeg == $i_opening );
11849    return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
11850}
11851
11852{
11853    my %is_if_elsif_else_unless_while_until_for_foreach;
11854
11855    BEGIN {
11856
11857        # These block types may have text between the keyword and opening
11858        # curly.  Note: 'else' does not, but must be included to allow trailing
11859        # if/elsif text to be appended.
11860        # patch for SWITCH/CASE: added 'case' and 'when'
11861        @_ = qw(if elsif else unless while until for foreach case when);
11862        @is_if_elsif_else_unless_while_until_for_foreach{@_} = (1) x scalar(@_);
11863    }
11864
11865    sub set_adjusted_indentation {
11866
11867        # This routine has the final say regarding the actual indentation of
11868        # a line.  It starts with the basic indentation which has been
11869        # defined for the leading token, and then takes into account any
11870        # options that the user has set regarding special indenting and
11871        # outdenting.
11872
11873        my ( $ibeg, $iend, $rfields, $rpatterns, $ri_first, $ri_last,
11874            $rindentation_list )
11875          = @_;
11876
11877        # we need to know the last token of this line
11878        my ( $terminal_type, $i_terminal ) =
11879          terminal_type( \@types_to_go, \@block_type_to_go, $ibeg, $iend );
11880
11881        my $is_outdented_line = 0;
11882
11883        my $is_semicolon_terminated = $terminal_type eq ';'
11884          && $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg];
11885
11886        ##########################################################
11887        # Section 1: set a flag and a default indentation
11888        #
11889        # Most lines are indented according to the initial token.
11890        # But it is common to outdent to the level just after the
11891        # terminal token in certain cases...
11892        # adjust_indentation flag:
11893        #       0 - do not adjust
11894        #       1 - outdent
11895        #       2 - vertically align with opening token
11896        #       3 - indent
11897        ##########################################################
11898        my $adjust_indentation         = 0;
11899        my $default_adjust_indentation = $adjust_indentation;
11900
11901        my (
11902            $opening_indentation, $opening_offset,
11903            $is_leading,          $opening_exists
11904        );
11905
11906        # if we are at a closing token of some type..
11907        if ( $types_to_go[$ibeg] =~ /^[\)\}\]]$/ ) {
11908
11909            # get the indentation of the line containing the corresponding
11910            # opening token
11911            (
11912                $opening_indentation, $opening_offset,
11913                $is_leading,          $opening_exists
11914              )
11915              = get_opening_indentation( $ibeg, $ri_first, $ri_last,
11916                $rindentation_list );
11917
11918            # First set the default behavior:
11919            # default behavior is to outdent closing lines
11920            # of the form:   ");  };  ];  )->xxx;"
11921            if (
11922                $is_semicolon_terminated
11923
11924                # and 'cuddled parens' of the form:   ")->pack("
11925                || (
11926                       $terminal_type eq '('
11927                    && $types_to_go[$ibeg] eq ')'
11928                    && ( $nesting_depth_to_go[$iend] + 1 ==
11929                        $nesting_depth_to_go[$ibeg] )
11930                )
11931              )
11932            {
11933                $adjust_indentation = 1;
11934            }
11935
11936            # TESTING: outdent something like '),'
11937            if (
11938                $terminal_type eq ','
11939
11940                # allow just one character before the comma
11941                && $i_terminal == $ibeg + 1
11942
11943                # requre LIST environment; otherwise, we may outdent too much --
11944                # this can happen in calls without parentheses (overload.t);
11945                && $container_environment_to_go[$i_terminal] eq 'LIST'
11946              )
11947            {
11948                $adjust_indentation = 1;
11949            }
11950
11951            # undo continuation indentation of a terminal closing token if
11952            # it is the last token before a level decrease.  This will allow
11953            # a closing token to line up with its opening counterpart, and
11954            # avoids a indentation jump larger than 1 level.
11955            if (   $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/
11956                && $i_terminal == $ibeg )
11957            {
11958                my $ci        = $ci_levels_to_go[$ibeg];
11959                my $lev       = $levels_to_go[$ibeg];
11960                my $next_type = $types_to_go[ $ibeg + 1 ];
11961                my $i_next_nonblank =
11962                  ( ( $next_type eq 'b' ) ? $ibeg + 2 : $ibeg + 1 );
11963                if (   $i_next_nonblank <= $max_index_to_go
11964                    && $levels_to_go[$i_next_nonblank] < $lev )
11965                {
11966                    $adjust_indentation = 1;
11967                }
11968            }
11969
11970            # YVES patch 1 of 2:
11971            # Undo ci of line with leading closing eval brace,
11972            # but not beyond the indention of the line with
11973            # the opening brace.
11974            if (   $block_type_to_go[$ibeg] eq 'eval'
11975                && !$rOpts->{'line-up-parentheses'}
11976                && !$rOpts->{'indent-closing-brace'} )
11977            {
11978                (
11979                    $opening_indentation, $opening_offset,
11980                    $is_leading,          $opening_exists
11981                  )
11982                  = get_opening_indentation( $ibeg, $ri_first, $ri_last,
11983                    $rindentation_list );
11984                my $indentation = $leading_spaces_to_go[$ibeg];
11985                if ( defined($opening_indentation)
11986                    && $indentation > $opening_indentation )
11987                {
11988                    $adjust_indentation = 1;
11989                }
11990            }
11991
11992            $default_adjust_indentation = $adjust_indentation;
11993
11994            # Now modify default behavior according to user request:
11995            # handle option to indent non-blocks of the form );  };  ];
11996            # But don't do special indentation to something like ')->pack('
11997            if ( !$block_type_to_go[$ibeg] ) {
11998                my $cti = $closing_token_indentation{ $tokens_to_go[$ibeg] };
11999                if ( $cti == 1 ) {
12000                    if (   $i_terminal <= $ibeg + 1
12001                        || $is_semicolon_terminated )
12002                    {
12003                        $adjust_indentation = 2;
12004                    }
12005                    else {
12006                        $adjust_indentation = 0;
12007                    }
12008                }
12009                elsif ( $cti == 2 ) {
12010                    if ($is_semicolon_terminated) {
12011                        $adjust_indentation = 3;
12012                    }
12013                    else {
12014                        $adjust_indentation = 0;
12015                    }
12016                }
12017                elsif ( $cti == 3 ) {
12018                    $adjust_indentation = 3;
12019                }
12020            }
12021
12022            # handle option to indent blocks
12023            else {
12024                if (
12025                    $rOpts->{'indent-closing-brace'}
12026                    && (
12027                        $i_terminal == $ibeg    #  isolated terminal '}'
12028                        || $is_semicolon_terminated
12029                    )
12030                  )                             #  } xxxx ;
12031                {
12032                    $adjust_indentation = 3;
12033                }
12034            }
12035        }
12036
12037        # if at ');', '};', '>;', and '];' of a terminal qw quote
12038        elsif ($$rpatterns[0] =~ /^qb*;$/
12039            && $$rfields[0] =~ /^([\)\}\]\>]);$/ )
12040        {
12041            if ( $closing_token_indentation{$1} == 0 ) {
12042                $adjust_indentation = 1;
12043            }
12044            else {
12045                $adjust_indentation = 3;
12046            }
12047        }
12048
12049        # if line begins with a ':', align it with any
12050        # previous line leading with corresponding ?
12051        elsif ( $types_to_go[$ibeg] eq ':' ) {
12052            (
12053                $opening_indentation, $opening_offset,
12054                $is_leading,          $opening_exists
12055              )
12056              = get_opening_indentation( $ibeg, $ri_first, $ri_last,
12057                $rindentation_list );
12058            if ($is_leading) { $adjust_indentation = 2; }
12059        }
12060
12061        ##########################################################
12062        # Section 2: set indentation according to flag set above
12063        #
12064        # Select the indentation object to define leading
12065        # whitespace.  If we are outdenting something like '} } );'
12066        # then we want to use one level below the last token
12067        # ($i_terminal) in order to get it to fully outdent through
12068        # all levels.
12069        ##########################################################
12070        my $indentation;
12071        my $lev;
12072        my $level_end = $levels_to_go[$iend];
12073
12074        if ( $adjust_indentation == 0 ) {
12075            $indentation = $leading_spaces_to_go[$ibeg];
12076            $lev         = $levels_to_go[$ibeg];
12077        }
12078        elsif ( $adjust_indentation == 1 ) {
12079            $indentation = $reduced_spaces_to_go[$i_terminal];
12080            $lev         = $levels_to_go[$i_terminal];
12081        }
12082
12083        # handle indented closing token which aligns with opening token
12084        elsif ( $adjust_indentation == 2 ) {
12085
12086            # handle option to align closing token with opening token
12087            $lev = $levels_to_go[$ibeg];
12088
12089            # calculate spaces needed to align with opening token
12090            my $space_count =
12091              get_SPACES($opening_indentation) + $opening_offset;
12092
12093            # Indent less than the previous line.
12094            #
12095            # Problem: For -lp we don't exactly know what it was if there
12096            # were recoverable spaces sent to the aligner.  A good solution
12097            # would be to force a flush of the vertical alignment buffer, so
12098            # that we would know.  For now, this rule is used for -lp:
12099            #
12100            # When the last line did not start with a closing token we will
12101            # be optimistic that the aligner will recover everything wanted.
12102            #
12103            # This rule will prevent us from breaking a hierarchy of closing
12104            # tokens, and in a worst case will leave a closing paren too far
12105            # indented, but this is better than frequently leaving it not
12106            # indented enough.
12107            my $last_spaces = get_SPACES($last_indentation_written);
12108            if ( $last_leading_token !~ /^[\}\]\)]$/ ) {
12109                $last_spaces +=
12110                  get_RECOVERABLE_SPACES($last_indentation_written);
12111            }
12112
12113            # reset the indentation to the new space count if it works
12114            # only options are all or none: nothing in-between looks good
12115            $lev = $levels_to_go[$ibeg];
12116            if ( $space_count < $last_spaces ) {
12117                if ($rOpts_line_up_parentheses) {
12118                    my $lev = $levels_to_go[$ibeg];
12119                    $indentation =
12120                      new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
12121                }
12122                else {
12123                    $indentation = $space_count;
12124                }
12125            }
12126
12127            # revert to default if it doesnt work
12128            else {
12129                $space_count = leading_spaces_to_go($ibeg);
12130                if ( $default_adjust_indentation == 0 ) {
12131                    $indentation = $leading_spaces_to_go[$ibeg];
12132                }
12133                elsif ( $default_adjust_indentation == 1 ) {
12134                    $indentation = $reduced_spaces_to_go[$i_terminal];
12135                    $lev         = $levels_to_go[$i_terminal];
12136                }
12137            }
12138        }
12139
12140        # Full indentaion of closing tokens (-icb and -icp or -cti=2)
12141        else {
12142
12143            # handle -icb (indented closing code block braces)
12144            # Updated method for indented block braces: indent one full level if
12145            # there is no continuation indentation.  This will occur for major
12146            # structures such as sub, if, else, but not for things like map
12147            # blocks.
12148            #
12149            # Note: only code blocks without continuation indentation are
12150            # handled here (if, else, unless, ..). In the following snippet,
12151            # the terminal brace of the sort block will have continuation
12152            # indentation as shown so it will not be handled by the coding
12153            # here.  We would have to undo the continuation indentation to do
12154            # this, but it probably looks ok as is.  This is a possible future
12155            # update for semicolon terminated lines.
12156            #
12157            #     if ($sortby eq 'date' or $sortby eq 'size') {
12158            #         @files = sort {
12159            #             $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby}
12160            #                 or $a cmp $b
12161            #                 } @files;
12162            #         }
12163            #
12164            if (   $block_type_to_go[$ibeg]
12165                && $ci_levels_to_go[$i_terminal] == 0 )
12166            {
12167                my $spaces = get_SPACES( $leading_spaces_to_go[$i_terminal] );
12168                $indentation = $spaces + $rOpts_indent_columns;
12169
12170                # NOTE: for -lp we could create a new indentation object, but
12171                # there is probably no need to do it
12172            }
12173
12174            # handle -icp and any -icb block braces which fall through above
12175            # test such as the 'sort' block mentioned above.
12176            else {
12177
12178                # There are currently two ways to handle -icp...
12179                # One way is to use the indentation of the previous line:
12180                # $indentation = $last_indentation_written;
12181
12182                # The other way is to use the indentation that the previous line
12183                # would have had if it hadn't been adjusted:
12184                $indentation = $last_unadjusted_indentation;
12185
12186                # Current method: use the minimum of the two. This avoids
12187                # inconsistent indentation.
12188                if ( get_SPACES($last_indentation_written) <
12189                    get_SPACES($indentation) )
12190                {
12191                    $indentation = $last_indentation_written;
12192                }
12193            }
12194
12195            # use previous indentation but use own level
12196            # to cause list to be flushed properly
12197            $lev = $levels_to_go[$ibeg];
12198        }
12199
12200        # remember indentation except for multi-line quotes, which get
12201        # no indentation
12202        unless ( $ibeg == 0 && $starting_in_quote ) {
12203            $last_indentation_written    = $indentation;
12204            $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg];
12205            $last_leading_token          = $tokens_to_go[$ibeg];
12206        }
12207
12208        # be sure lines with leading closing tokens are not outdented more
12209        # than the line which contained the corresponding opening token.
12210
12211        #############################################################
12212        # updated per bug report in alex_bug.pl: we must not
12213        # mess with the indentation of closing logical braces so
12214        # we must treat something like '} else {' as if it were
12215        # an isolated brace my $is_isolated_block_brace = (
12216        # $iend == $ibeg ) && $block_type_to_go[$ibeg];
12217        #############################################################
12218        my $is_isolated_block_brace = $block_type_to_go[$ibeg]
12219          && ( $iend == $ibeg
12220            || $is_if_elsif_else_unless_while_until_for_foreach{
12221                $block_type_to_go[$ibeg] } );
12222
12223        # only do this for a ':; which is aligned with its leading '?'
12224        my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading;
12225        if (   defined($opening_indentation)
12226            && !$is_isolated_block_brace
12227            && !$is_unaligned_colon )
12228        {
12229            if ( get_SPACES($opening_indentation) > get_SPACES($indentation) ) {
12230                $indentation = $opening_indentation;
12231            }
12232        }
12233
12234        # remember the indentation of each line of this batch
12235        push @{$rindentation_list}, $indentation;
12236
12237        # outdent lines with certain leading tokens...
12238        if (
12239
12240            # must be first word of this batch
12241            $ibeg == 0
12242
12243            # and ...
12244            && (
12245
12246                # certain leading keywords if requested
12247                (
12248                       $rOpts->{'outdent-keywords'}
12249                    && $types_to_go[$ibeg] eq 'k'
12250                    && $outdent_keyword{ $tokens_to_go[$ibeg] }
12251                )
12252
12253                # or labels if requested
12254                || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' )
12255
12256                # or static block comments if requested
12257                || (   $types_to_go[$ibeg] eq '#'
12258                    && $rOpts->{'outdent-static-block-comments'}
12259                    && $is_static_block_comment )
12260            )
12261          )
12262
12263        {
12264            my $space_count = leading_spaces_to_go($ibeg);
12265            if ( $space_count > 0 ) {
12266                $space_count -= $rOpts_continuation_indentation;
12267                $is_outdented_line = 1;
12268                if ( $space_count < 0 ) { $space_count = 0 }
12269
12270                # do not promote a spaced static block comment to non-spaced;
12271                # this is not normally necessary but could be for some
12272                # unusual user inputs (such as -ci = -i)
12273                if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) {
12274                    $space_count = 1;
12275                }
12276
12277                if ($rOpts_line_up_parentheses) {
12278                    $indentation =
12279                      new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
12280                }
12281                else {
12282                    $indentation = $space_count;
12283                }
12284            }
12285        }
12286
12287        return ( $indentation, $lev, $level_end, $terminal_type,
12288            $is_semicolon_terminated, $is_outdented_line );
12289    }
12290}
12291
12292sub set_vertical_tightness_flags {
12293
12294    my ( $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ) = @_;
12295
12296    # Define vertical tightness controls for the nth line of a batch.
12297    # We create an array of parameters which tell the vertical aligner
12298    # if we should combine this line with the next line to achieve the
12299    # desired vertical tightness.  The array of parameters contains:
12300    #
12301    #   [0] type: 1=is opening tok 2=is closing tok  3=is opening block brace
12302    #   [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
12303    #             if closing: spaces of padding to use
12304    #   [2] sequence number of container
12305    #   [3] valid flag: do not append if this flag is false. Will be
12306    #       true if appropriate -vt flag is set.  Otherwise, Will be
12307    #       made true only for 2 line container in parens with -lp
12308    #
12309    # These flags are used by sub set_leading_whitespace in
12310    # the vertical aligner
12311
12312    my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ];
12313
12314    # For non-BLOCK tokens, we will need to examine the next line
12315    # too, so we won't consider the last line.
12316    if ( $n < $n_last_line ) {
12317
12318        # see if last token is an opening token...not a BLOCK...
12319        my $ibeg_next = $$ri_first[ $n + 1 ];
12320        my $token_end = $tokens_to_go[$iend];
12321        my $iend_next = $$ri_last[ $n + 1 ];
12322        if (
12323               $type_sequence_to_go[$iend]
12324            && !$block_type_to_go[$iend]
12325            && $is_opening_token{$token_end}
12326            && (
12327                $opening_vertical_tightness{$token_end} > 0
12328
12329                # allow 2-line method call to be closed up
12330                || (   $rOpts_line_up_parentheses
12331                    && $token_end eq '('
12332                    && $iend > $ibeg
12333                    && $types_to_go[ $iend - 1 ] ne 'b' )
12334            )
12335          )
12336        {
12337
12338            # avoid multiple jumps in nesting depth in one line if
12339            # requested
12340            my $ovt       = $opening_vertical_tightness{$token_end};
12341            my $iend_next = $$ri_last[ $n + 1 ];
12342            unless (
12343                $ovt < 2
12344                && ( $nesting_depth_to_go[ $iend_next + 1 ] !=
12345                    $nesting_depth_to_go[$ibeg_next] )
12346              )
12347            {
12348
12349                # If -vt flag has not been set, mark this as invalid
12350                # and aligner will validate it if it sees the closing paren
12351                # within 2 lines.
12352                my $valid_flag = $ovt;
12353                @{$rvertical_tightness_flags} =
12354                  ( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag );
12355            }
12356        }
12357
12358        # see if first token of next line is a closing token...
12359        # ..and be sure this line does not have a side comment
12360        my $token_next = $tokens_to_go[$ibeg_next];
12361        if (   $type_sequence_to_go[$ibeg_next]
12362            && !$block_type_to_go[$ibeg_next]
12363            && $is_closing_token{$token_next}
12364            && $types_to_go[$iend] !~ '#' )    # for safety, shouldn't happen!
12365        {
12366            my $ovt = $opening_vertical_tightness{$token_next};
12367            my $cvt = $closing_vertical_tightness{$token_next};
12368            if (
12369
12370                # never append a trailing line like   )->pack(
12371                # because it will throw off later alignment
12372                (
12373                    $nesting_depth_to_go[$ibeg_next] ==
12374                    $nesting_depth_to_go[ $iend_next + 1 ] + 1
12375                )
12376                && (
12377                    $cvt == 2
12378                    || (
12379                        $container_environment_to_go[$ibeg_next] ne 'LIST'
12380                        && (
12381                            $cvt == 1
12382
12383                            # allow closing up 2-line method calls
12384                            || (   $rOpts_line_up_parentheses
12385                                && $token_next eq ')' )
12386                        )
12387                    )
12388                )
12389              )
12390            {
12391
12392                # decide which trailing closing tokens to append..
12393                my $ok = 0;
12394                if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 }
12395                else {
12396                    my $str = join( '',
12397                        @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] );
12398
12399                    # append closing token if followed by comment or ';'
12400                    if ( $str =~ /^b?[#;]/ ) { $ok = 1 }
12401                }
12402
12403                if ($ok) {
12404                    my $valid_flag = $cvt;
12405                    @{$rvertical_tightness_flags} = (
12406                        2,
12407                        $tightness{$token_next} == 2 ? 0 : 1,
12408                        $type_sequence_to_go[$ibeg_next], $valid_flag,
12409                    );
12410                }
12411            }
12412        }
12413
12414        # Opening Token Right
12415        # If requested, move an isolated trailing opening token to the end of
12416        # the previous line which ended in a comma.  We could do this
12417        # in sub recombine_breakpoints but that would cause problems
12418        # with -lp formatting.  The problem is that indentation will
12419        # quickly move far to the right in nested expressions.  By
12420        # doing it after indentation has been set, we avoid changes
12421        # to the indentation.  Actual movement of the token takes place
12422        # in sub write_leader_and_string.
12423        if (
12424            $opening_token_right{ $tokens_to_go[$ibeg_next] }
12425
12426            # previous line is not opening
12427            # (use -sot to combine with it)
12428            && !$is_opening_token{$token_end}
12429
12430            # previous line ended in one of these
12431            # (add other cases if necessary; '=>' and '.' are not necessary
12432            ##&& ($is_opening_token{$token_end} || $token_end eq ',')
12433            && !$block_type_to_go[$ibeg_next]
12434
12435            # this is a line with just an opening token
12436            && (   $iend_next == $ibeg_next
12437                || $iend_next == $ibeg_next + 2
12438                && $types_to_go[$iend_next] eq '#' )
12439
12440            # looks bad if we align vertically with the wrong container
12441            && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]
12442          )
12443        {
12444            my $valid_flag = 1;
12445            my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
12446            @{$rvertical_tightness_flags} =
12447              ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, );
12448        }
12449
12450        # Stacking of opening and closing tokens
12451        my $stackable;
12452        my $token_beg_next = $tokens_to_go[$ibeg_next];
12453
12454        # patch to make something like 'qw(' behave like an opening paren
12455        # (aran.t)
12456        if ( $types_to_go[$ibeg_next] eq 'q' ) {
12457            if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) {
12458                $token_beg_next = $1;
12459            }
12460        }
12461
12462        if (   $is_closing_token{$token_end}
12463            && $is_closing_token{$token_beg_next} )
12464        {
12465            $stackable = $stack_closing_token{$token_beg_next}
12466              unless ( $block_type_to_go[$ibeg_next] )
12467              ;    # shouldn't happen; just checking
12468        }
12469        elsif ($is_opening_token{$token_end}
12470            && $is_opening_token{$token_beg_next} )
12471        {
12472            $stackable = $stack_opening_token{$token_beg_next}
12473              unless ( $block_type_to_go[$ibeg_next] )
12474              ;    # shouldn't happen; just checking
12475        }
12476
12477        if ($stackable) {
12478
12479            my $is_semicolon_terminated;
12480            if ( $n + 1 == $n_last_line ) {
12481                my ( $terminal_type, $i_terminal ) = terminal_type(
12482                    \@types_to_go, \@block_type_to_go,
12483                    $ibeg_next,    $iend_next
12484                );
12485                $is_semicolon_terminated = $terminal_type eq ';'
12486                  && $nesting_depth_to_go[$iend_next] <
12487                  $nesting_depth_to_go[$ibeg_next];
12488            }
12489
12490            # this must be a line with just an opening token
12491            # or end in a semicolon
12492            if (
12493                $is_semicolon_terminated
12494                || (   $iend_next == $ibeg_next
12495                    || $iend_next == $ibeg_next + 2
12496                    && $types_to_go[$iend_next] eq '#' )
12497              )
12498            {
12499                my $valid_flag = 1;
12500                my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
12501                @{$rvertical_tightness_flags} =
12502                  ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag,
12503                  );
12504            }
12505        }
12506    }
12507
12508    # Check for a last line with isolated opening BLOCK curly
12509    elsif ($rOpts_block_brace_vertical_tightness
12510        && $ibeg eq $iend
12511        && $types_to_go[$iend] eq '{'
12512        && $block_type_to_go[$iend] =~
12513        /$block_brace_vertical_tightness_pattern/o )
12514    {
12515        @{$rvertical_tightness_flags} =
12516          ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 );
12517    }
12518
12519    # pack in the sequence numbers of the ends of this line
12520    $rvertical_tightness_flags->[4] = get_seqno($ibeg);
12521    $rvertical_tightness_flags->[5] = get_seqno($iend);
12522    return $rvertical_tightness_flags;
12523}
12524
12525sub get_seqno {
12526
12527    # get opening and closing sequence numbers of a token for the vertical
12528    # aligner.  Assign qw quotes a value to allow qw opening and closing tokens
12529    # to be treated somewhat like opening and closing tokens for stacking
12530    # tokens by the vertical aligner.
12531    my ($ii) = @_;
12532    my $seqno = $type_sequence_to_go[$ii];
12533    if ( $types_to_go[$ii] eq 'q' ) {
12534        my $SEQ_QW = -1;
12535        if ( $ii > 0 ) {
12536            $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /^qw\s*[\(\{\[]/ );
12537        }
12538        else {
12539            if ( !$ending_in_quote ) {
12540                $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /[\)\}\]]$/ );
12541            }
12542        }
12543    }
12544    return ($seqno);
12545}
12546
12547{
12548    my %is_vertical_alignment_type;
12549    my %is_vertical_alignment_keyword;
12550
12551    BEGIN {
12552
12553        @_ = qw#
12554          = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
12555          { ? : => =~ && || // ~~ !~~
12556          #;
12557        @is_vertical_alignment_type{@_} = (1) x scalar(@_);
12558
12559        @_ = qw(if unless and or err eq ne for foreach while until);
12560        @is_vertical_alignment_keyword{@_} = (1) x scalar(@_);
12561    }
12562
12563    sub set_vertical_alignment_markers {
12564
12565        # This routine takes the first step toward vertical alignment of the
12566        # lines of output text.  It looks for certain tokens which can serve as
12567        # vertical alignment markers (such as an '=').
12568        #
12569        # Method: We look at each token $i in this output batch and set
12570        # $matching_token_to_go[$i] equal to those tokens at which we would
12571        # accept vertical alignment.
12572
12573        # nothing to do if we aren't allowed to change whitespace
12574        if ( !$rOpts_add_whitespace ) {
12575            for my $i ( 0 .. $max_index_to_go ) {
12576                $matching_token_to_go[$i] = '';
12577            }
12578            return;
12579        }
12580
12581        my ( $ri_first, $ri_last ) = @_;
12582
12583        # remember the index of last nonblank token before any sidecomment
12584        my $i_terminal = $max_index_to_go;
12585        if ( $types_to_go[$i_terminal] eq '#' ) {
12586            if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) {
12587                if ( $i_terminal > 0 ) { --$i_terminal }
12588            }
12589        }
12590
12591        # look at each line of this batch..
12592        my $last_vertical_alignment_before_index;
12593        my $vert_last_nonblank_type;
12594        my $vert_last_nonblank_token;
12595        my $vert_last_nonblank_block_type;
12596        my $max_line = @$ri_first - 1;
12597        my ( $i, $type, $token, $block_type, $alignment_type );
12598        my ( $ibeg, $iend, $line );
12599
12600        foreach $line ( 0 .. $max_line ) {
12601            $ibeg                                 = $$ri_first[$line];
12602            $iend                                 = $$ri_last[$line];
12603            $last_vertical_alignment_before_index = -1;
12604            $vert_last_nonblank_type              = '';
12605            $vert_last_nonblank_token             = '';
12606            $vert_last_nonblank_block_type        = '';
12607
12608            # look at each token in this output line..
12609            foreach $i ( $ibeg .. $iend ) {
12610                $alignment_type = '';
12611                $type           = $types_to_go[$i];
12612                $block_type     = $block_type_to_go[$i];
12613                $token          = $tokens_to_go[$i];
12614
12615                # check for flag indicating that we should not align
12616                # this token
12617                if ( $matching_token_to_go[$i] ) {
12618                    $matching_token_to_go[$i] = '';
12619                    next;
12620                }
12621
12622                #--------------------------------------------------------
12623                # First see if we want to align BEFORE this token
12624                #--------------------------------------------------------
12625
12626                # The first possible token that we can align before
12627                # is index 2 because: 1) it doesn't normally make sense to
12628                # align before the first token and 2) the second
12629                # token must be a blank if we are to align before
12630                # the third
12631                if ( $i < $ibeg + 2 ) { }
12632
12633                # must follow a blank token
12634                elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
12635
12636                # align a side comment --
12637                elsif ( $type eq '#' ) {
12638
12639                    unless (
12640
12641                        # it is a static side comment
12642                        (
12643                               $rOpts->{'static-side-comments'}
12644                            && $token =~ /$static_side_comment_pattern/o
12645                        )
12646
12647                        # or a closing side comment
12648                        || (   $vert_last_nonblank_block_type
12649                            && $token =~
12650                            /$closing_side_comment_prefix_pattern/o )
12651                      )
12652                    {
12653                        $alignment_type = $type;
12654                    }    ## Example of a static side comment
12655                }
12656
12657                # otherwise, do not align two in a row to create a
12658                # blank field
12659                elsif ( $last_vertical_alignment_before_index == $i - 2 ) { }
12660
12661                # align before one of these keywords
12662                # (within a line, since $i>1)
12663                elsif ( $type eq 'k' ) {
12664
12665                    #  /^(if|unless|and|or|eq|ne)$/
12666                    if ( $is_vertical_alignment_keyword{$token} ) {
12667                        $alignment_type = $token;
12668                    }
12669                }
12670
12671                # align before one of these types..
12672                # Note: add '.' after new vertical aligner is operational
12673                elsif ( $is_vertical_alignment_type{$type} ) {
12674                    $alignment_type = $token;
12675
12676                    # Do not align a terminal token.  Although it might
12677                    # occasionally look ok to do this, it has been found to be
12678                    # a good general rule.  The main problems are:
12679                    # (1) that the terminal token (such as an = or :) might get
12680                    # moved far to the right where it is hard to see because
12681                    # nothing follows it, and
12682                    # (2) doing so may prevent other good alignments.
12683                    if ( $i == $iend || $i >= $i_terminal ) {
12684                        $alignment_type = "";
12685                    }
12686
12687                    # Do not align leading ': (' or '. ('.  This would prevent
12688                    # alignment in something like the following:
12689                    #   $extra_space .=
12690                    #       ( $input_line_number < 10 )  ? "  "
12691                    #     : ( $input_line_number < 100 ) ? " "
12692                    #     :                                "";
12693                    # or
12694                    #  $code =
12695                    #      ( $case_matters ? $accessor : " lc($accessor) " )
12696                    #    . ( $yesno        ? " eq "       : " ne " )
12697                    if (   $i == $ibeg + 2
12698                        && $types_to_go[$ibeg] =~ /^[\.\:]$/
12699                        && $types_to_go[ $i - 1 ] eq 'b' )
12700                    {
12701                        $alignment_type = "";
12702                    }
12703
12704                    # For a paren after keyword, only align something like this:
12705                    #    if    ( $a ) { &a }
12706                    #    elsif ( $b ) { &b }
12707                    if ( $token eq '(' && $vert_last_nonblank_type eq 'k' ) {
12708                        $alignment_type = ""
12709                          unless $vert_last_nonblank_token =~
12710                              /^(if|unless|elsif)$/;
12711                    }
12712
12713                    # be sure the alignment tokens are unique
12714                    # This didn't work well: reason not determined
12715                    # if ($token ne $type) {$alignment_type .= $type}
12716                }
12717
12718                # NOTE: This is deactivated because it causes the previous
12719                # if/elsif alignment to fail
12720                #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])
12721                #{ $alignment_type = $type; }
12722
12723                if ($alignment_type) {
12724                    $last_vertical_alignment_before_index = $i;
12725                }
12726
12727                #--------------------------------------------------------
12728                # Next see if we want to align AFTER the previous nonblank
12729                #--------------------------------------------------------
12730
12731                # We want to line up ',' and interior ';' tokens, with the added
12732                # space AFTER these tokens.  (Note: interior ';' is included
12733                # because it may occur in short blocks).
12734                if (
12735
12736                    # we haven't already set it
12737                    !$alignment_type
12738
12739                    # and its not the first token of the line
12740                    && ( $i > $ibeg )
12741
12742                    # and it follows a blank
12743                    && $types_to_go[ $i - 1 ] eq 'b'
12744
12745                    # and previous token IS one of these:
12746                    && ( $vert_last_nonblank_type =~ /^[\,\;]$/ )
12747
12748                    # and it's NOT one of these
12749                    && ( $type !~ /^[b\#\)\]\}]$/ )
12750
12751                    # then go ahead and align
12752                  )
12753
12754                {
12755                    $alignment_type = $vert_last_nonblank_type;
12756                }
12757
12758                #--------------------------------------------------------
12759                # then store the value
12760                #--------------------------------------------------------
12761                $matching_token_to_go[$i] = $alignment_type;
12762                if ( $type ne 'b' ) {
12763                    $vert_last_nonblank_type       = $type;
12764                    $vert_last_nonblank_token      = $token;
12765                    $vert_last_nonblank_block_type = $block_type;
12766                }
12767            }
12768        }
12769    }
12770}
12771
12772sub terminal_type {
12773
12774    #    returns type of last token on this line (terminal token), as follows:
12775    #    returns # for a full-line comment
12776    #    returns ' ' for a blank line
12777    #    otherwise returns final token type
12778
12779    my ( $rtype, $rblock_type, $ibeg, $iend ) = @_;
12780
12781    # check for full-line comment..
12782    if ( $$rtype[$ibeg] eq '#' ) {
12783        return wantarray ? ( $$rtype[$ibeg], $ibeg ) : $$rtype[$ibeg];
12784    }
12785    else {
12786
12787        # start at end and walk bakwards..
12788        for ( my $i = $iend ; $i >= $ibeg ; $i-- ) {
12789
12790            # skip past any side comment and blanks
12791            next if ( $$rtype[$i] eq 'b' );
12792            next if ( $$rtype[$i] eq '#' );
12793
12794            # found it..make sure it is a BLOCK termination,
12795            # but hide a terminal } after sort/grep/map because it is not
12796            # necessarily the end of the line.  (terminal.t)
12797            my $terminal_type = $$rtype[$i];
12798            if (
12799                $terminal_type eq '}'
12800                && ( !$$rblock_type[$i]
12801                    || ( $is_sort_map_grep_eval_do{ $$rblock_type[$i] } ) )
12802              )
12803            {
12804                $terminal_type = 'b';
12805            }
12806            return wantarray ? ( $terminal_type, $i ) : $terminal_type;
12807        }
12808
12809        # empty line
12810        return wantarray ? ( ' ', $ibeg ) : ' ';
12811    }
12812}
12813
12814{
12815    my %is_good_keyword_breakpoint;
12816    my %is_lt_gt_le_ge;
12817
12818    sub set_bond_strengths {
12819
12820        BEGIN {
12821
12822            @_ = qw(if unless while until for foreach);
12823            @is_good_keyword_breakpoint{@_} = (1) x scalar(@_);
12824
12825            @_ = qw(lt gt le ge);
12826            @is_lt_gt_le_ge{@_} = (1) x scalar(@_);
12827
12828            ###############################################################
12829            # NOTE: NO_BREAK's set here are HINTS which may not be honored;
12830            # essential NO_BREAKS's must be enforced in section 2, below.
12831            ###############################################################
12832
12833            # adding NEW_TOKENS: add a left and right bond strength by
12834            # mimmicking what is done for an existing token type.  You
12835            # can skip this step at first and take the default, then
12836            # tweak later to get desired results.
12837
12838            # The bond strengths should roughly follow precenence order where
12839            # possible.  If you make changes, please check the results very
12840            # carefully on a variety of scripts.
12841
12842            # no break around possible filehandle
12843            $left_bond_strength{'Z'}  = NO_BREAK;
12844            $right_bond_strength{'Z'} = NO_BREAK;
12845
12846            # never put a bare word on a new line:
12847            # example print (STDERR, "bla"); will fail with break after (
12848            $left_bond_strength{'w'} = NO_BREAK;
12849
12850        # blanks always have infinite strength to force breaks after real tokens
12851            $right_bond_strength{'b'} = NO_BREAK;
12852
12853            # try not to break on exponentation
12854            @_                       = qw" ** .. ... <=> ";
12855            @left_bond_strength{@_}  = (STRONG) x scalar(@_);
12856            @right_bond_strength{@_} = (STRONG) x scalar(@_);
12857
12858            # The comma-arrow has very low precedence but not a good break point
12859            $left_bond_strength{'=>'}  = NO_BREAK;
12860            $right_bond_strength{'=>'} = NOMINAL;
12861
12862            # ok to break after label
12863            $left_bond_strength{'J'}  = NO_BREAK;
12864            $right_bond_strength{'J'} = NOMINAL;
12865            $left_bond_strength{'j'}  = STRONG;
12866            $right_bond_strength{'j'} = STRONG;
12867            $left_bond_strength{'A'}  = STRONG;
12868            $right_bond_strength{'A'} = STRONG;
12869
12870            $left_bond_strength{'->'}  = STRONG;
12871            $right_bond_strength{'->'} = VERY_STRONG;
12872
12873            # breaking AFTER modulus operator is ok:
12874            @_ = qw" % ";
12875            @left_bond_strength{@_} = (STRONG) x scalar(@_);
12876            @right_bond_strength{@_} =
12877              ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@_);
12878
12879            # Break AFTER math operators * and /
12880            @_                       = qw" * / x  ";
12881            @left_bond_strength{@_}  = (STRONG) x scalar(@_);
12882            @right_bond_strength{@_} = (NOMINAL) x scalar(@_);
12883
12884            # Break AFTER weakest math operators + and -
12885            # Make them weaker than * but a bit stronger than '.'
12886            @_ = qw" + - ";
12887            @left_bond_strength{@_} = (STRONG) x scalar(@_);
12888            @right_bond_strength{@_} =
12889              ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@_);
12890
12891            # breaking BEFORE these is just ok:
12892            @_                       = qw" >> << ";
12893            @right_bond_strength{@_} = (STRONG) x scalar(@_);
12894            @left_bond_strength{@_}  = (NOMINAL) x scalar(@_);
12895
12896            # breaking before the string concatenation operator seems best
12897            # because it can be hard to see at the end of a line
12898            $right_bond_strength{'.'} = STRONG;
12899            $left_bond_strength{'.'}  = 0.9 * NOMINAL + 0.1 * WEAK;
12900
12901            @_                       = qw"} ] ) ";
12902            @left_bond_strength{@_}  = (STRONG) x scalar(@_);
12903            @right_bond_strength{@_} = (NOMINAL) x scalar(@_);
12904
12905            # make these a little weaker than nominal so that they get
12906            # favored for end-of-line characters
12907            @_ = qw"!= == =~ !~ ~~ !~~";
12908            @left_bond_strength{@_} = (STRONG) x scalar(@_);
12909            @right_bond_strength{@_} =
12910              ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@_);
12911
12912            # break AFTER these
12913            @_ = qw" < >  | & >= <=";
12914            @left_bond_strength{@_} = (VERY_STRONG) x scalar(@_);
12915            @right_bond_strength{@_} =
12916              ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@_);
12917
12918            # breaking either before or after a quote is ok
12919            # but bias for breaking before a quote
12920            $left_bond_strength{'Q'}  = NOMINAL;
12921            $right_bond_strength{'Q'} = NOMINAL + 0.02;
12922            $left_bond_strength{'q'}  = NOMINAL;
12923            $right_bond_strength{'q'} = NOMINAL;
12924
12925            # starting a line with a keyword is usually ok
12926            $left_bond_strength{'k'} = NOMINAL;
12927
12928            # we usually want to bond a keyword strongly to what immediately
12929            # follows, rather than leaving it stranded at the end of a line
12930            $right_bond_strength{'k'} = STRONG;
12931
12932            $left_bond_strength{'G'}  = NOMINAL;
12933            $right_bond_strength{'G'} = STRONG;
12934
12935            # it is good to break AFTER various assignment operators
12936            @_ = qw(
12937              = **= += *= &= <<= &&=
12938              -= /= |= >>= ||= //=
12939              .= %= ^=
12940              x=
12941            );
12942            @left_bond_strength{@_} = (STRONG) x scalar(@_);
12943            @right_bond_strength{@_} =
12944              ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@_);
12945
12946            # break BEFORE '&&' and '||' and '//'
12947            # set strength of '||' to same as '=' so that chains like
12948            # $a = $b || $c || $d   will break before the first '||'
12949            $right_bond_strength{'||'} = NOMINAL;
12950            $left_bond_strength{'||'}  = $right_bond_strength{'='};
12951
12952            # same thing for '//'
12953            $right_bond_strength{'//'} = NOMINAL;
12954            $left_bond_strength{'//'}  = $right_bond_strength{'='};
12955
12956            # set strength of && a little higher than ||
12957            $right_bond_strength{'&&'} = NOMINAL;
12958            $left_bond_strength{'&&'}  = $left_bond_strength{'||'} + 0.1;
12959
12960            $left_bond_strength{';'}  = VERY_STRONG;
12961            $right_bond_strength{';'} = VERY_WEAK;
12962            $left_bond_strength{'f'}  = VERY_STRONG;
12963
12964            # make right strength of for ';' a little less than '='
12965            # to make for contents break after the ';' to avoid this:
12966            #   for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
12967            #     $number_of_fields )
12968            # and make it weaker than ',' and 'and' too
12969            $right_bond_strength{'f'} = VERY_WEAK - 0.03;
12970
12971            # The strengths of ?/: should be somewhere between
12972            # an '=' and a quote (NOMINAL),
12973            # make strength of ':' slightly less than '?' to help
12974            # break long chains of ? : after the colons
12975            $left_bond_strength{':'}  = 0.4 * WEAK + 0.6 * NOMINAL;
12976            $right_bond_strength{':'} = NO_BREAK;
12977            $left_bond_strength{'?'}  = $left_bond_strength{':'} + 0.01;
12978            $right_bond_strength{'?'} = NO_BREAK;
12979
12980            $left_bond_strength{','}  = VERY_STRONG;
12981            $right_bond_strength{','} = VERY_WEAK;
12982
12983            # Set bond strengths of certain keywords
12984            # make 'or', 'err', 'and' slightly weaker than a ','
12985            $left_bond_strength{'and'}  = VERY_WEAK - 0.01;
12986            $left_bond_strength{'or'}   = VERY_WEAK - 0.02;
12987            $left_bond_strength{'err'}  = VERY_WEAK - 0.02;
12988            $left_bond_strength{'xor'}  = NOMINAL;
12989            $right_bond_strength{'and'} = NOMINAL;
12990            $right_bond_strength{'or'}  = NOMINAL;
12991            $right_bond_strength{'err'} = NOMINAL;
12992            $right_bond_strength{'xor'} = STRONG;
12993        }
12994
12995        # patch-its always ok to break at end of line
12996        $nobreak_to_go[$max_index_to_go] = 0;
12997
12998        # adding a small 'bias' to strengths is a simple way to make a line
12999        # break at the first of a sequence of identical terms.  For example,
13000        # to force long string of conditional operators to break with
13001        # each line ending in a ':', we can add a small number to the bond
13002        # strength of each ':'
13003        my $colon_bias = 0;
13004        my $amp_bias   = 0;
13005        my $bar_bias   = 0;
13006        my $and_bias   = 0;
13007        my $or_bias    = 0;
13008        my $dot_bias   = 0;
13009        my $f_bias     = 0;
13010        my $code_bias  = -.01;
13011        my $type       = 'b';
13012        my $token      = ' ';
13013        my $last_type;
13014        my $last_nonblank_type  = $type;
13015        my $last_nonblank_token = $token;
13016        my $delta_bias          = 0.0001;
13017        my $list_str            = $left_bond_strength{'?'};
13018
13019        my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
13020            $next_nonblank_type, $next_token, $next_type, $total_nesting_depth,
13021        );
13022
13023        # preliminary loop to compute bond strengths
13024        for ( my $i = 0 ; $i <= $max_index_to_go ; $i++ ) {
13025            $last_type = $type;
13026            if ( $type ne 'b' ) {
13027                $last_nonblank_type  = $type;
13028                $last_nonblank_token = $token;
13029            }
13030            $type = $types_to_go[$i];
13031
13032            # strength on both sides of a blank is the same
13033            if ( $type eq 'b' && $last_type ne 'b' ) {
13034                $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ];
13035                next;
13036            }
13037
13038            $token               = $tokens_to_go[$i];
13039            $block_type          = $block_type_to_go[$i];
13040            $i_next              = $i + 1;
13041            $next_type           = $types_to_go[$i_next];
13042            $next_token          = $tokens_to_go[$i_next];
13043            $total_nesting_depth = $nesting_depth_to_go[$i_next];
13044            $i_next_nonblank     = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
13045            $next_nonblank_type  = $types_to_go[$i_next_nonblank];
13046            $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
13047
13048            # Some token chemistry...  The decision about where to break a
13049            # line depends upon a "bond strength" between tokens.  The LOWER
13050            # the bond strength, the MORE likely a break.  The strength
13051            # values are based on trial-and-error, and need to be tweaked
13052            # occasionally to get desired results.  Things to keep in mind
13053            # are:
13054            #   1. relative strengths are important.  small differences
13055            #      in strengths can make big formatting differences.
13056            #   2. each indentation level adds one unit of bond strength
13057            #   3. a value of NO_BREAK makes an unbreakable bond
13058            #   4. a value of VERY_WEAK is the strength of a ','
13059            #   5. values below NOMINAL are considered ok break points
13060            #   6. values above NOMINAL are considered poor break points
13061            # We are computing the strength of the bond between the current
13062            # token and the NEXT token.
13063            my $bond_str = VERY_STRONG;    # a default, high strength
13064
13065            #---------------------------------------------------------------
13066            # section 1:
13067            # use minimum of left and right bond strengths if defined;
13068            # digraphs and trigraphs like to break on their left
13069            #---------------------------------------------------------------
13070            my $bsr = $right_bond_strength{$type};
13071
13072            if ( !defined($bsr) ) {
13073
13074                if ( $is_digraph{$type} || $is_trigraph{$type} ) {
13075                    $bsr = STRONG;
13076                }
13077                else {
13078                    $bsr = VERY_STRONG;
13079                }
13080            }
13081
13082            # define right bond strengths of certain keywords
13083            if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
13084                $bsr = $right_bond_strength{$token};
13085            }
13086            elsif ( $token eq 'ne' or $token eq 'eq' ) {
13087                $bsr = NOMINAL;
13088            }
13089            my $bsl = $left_bond_strength{$next_nonblank_type};
13090
13091            # set terminal bond strength to the nominal value
13092            # this will cause good preceding breaks to be retained
13093            if ( $i_next_nonblank > $max_index_to_go ) {
13094                $bsl = NOMINAL;
13095            }
13096
13097            if ( !defined($bsl) ) {
13098
13099                if (   $is_digraph{$next_nonblank_type}
13100                    || $is_trigraph{$next_nonblank_type} )
13101                {
13102                    $bsl = WEAK;
13103                }
13104                else {
13105                    $bsl = VERY_STRONG;
13106                }
13107            }
13108
13109            # define right bond strengths of certain keywords
13110            if ( $next_nonblank_type eq 'k'
13111                && defined( $left_bond_strength{$next_nonblank_token} ) )
13112            {
13113                $bsl = $left_bond_strength{$next_nonblank_token};
13114            }
13115            elsif ($next_nonblank_token eq 'ne'
13116                or $next_nonblank_token eq 'eq' )
13117            {
13118                $bsl = NOMINAL;
13119            }
13120            elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {
13121                $bsl = 0.9 * NOMINAL + 0.1 * STRONG;
13122            }
13123
13124            # Note: it might seem that we would want to keep a NO_BREAK if
13125            # either token has this value.  This didn't work, because in an
13126            # arrow list, it prevents the comma from separating from the
13127            # following bare word (which is probably quoted by its arrow).
13128            # So necessary NO_BREAK's have to be handled as special cases
13129            # in the final section.
13130            $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
13131            my $bond_str_1 = $bond_str;
13132
13133            #---------------------------------------------------------------
13134            # section 2:
13135            # special cases
13136            #---------------------------------------------------------------
13137
13138            # allow long lines before final { in an if statement, as in:
13139            #    if (..........
13140            #      ..........)
13141            #    {
13142            #
13143            # Otherwise, the line before the { tends to be too short.
13144            if ( $type eq ')' ) {
13145                if ( $next_nonblank_type eq '{' ) {
13146                    $bond_str = VERY_WEAK + 0.03;
13147                }
13148            }
13149
13150            elsif ( $type eq '(' ) {
13151                if ( $next_nonblank_type eq '{' ) {
13152                    $bond_str = NOMINAL;
13153                }
13154            }
13155
13156            # break on something like '} (', but keep this stronger than a ','
13157            # example is in 'howe.pl'
13158            elsif ( $type eq 'R' or $type eq '}' ) {
13159                if ( $next_nonblank_type eq '(' ) {
13160                    $bond_str = 0.8 * VERY_WEAK + 0.2 * WEAK;
13161                }
13162            }
13163
13164            #-----------------------------------------------------------------
13165            # adjust bond strength bias
13166            #-----------------------------------------------------------------
13167
13168            # TESTING: add any bias set by sub scan_list at old comma
13169            # break points.
13170            elsif ( $type eq ',' ) {
13171                $bond_str += $bond_strength_to_go[$i];
13172            }
13173
13174            elsif ( $type eq 'f' ) {
13175                $bond_str += $f_bias;
13176                $f_bias   += $delta_bias;
13177            }
13178
13179          # in long ?: conditionals, bias toward just one set per line (colon.t)
13180            elsif ( $type eq ':' ) {
13181                if ( !$want_break_before{$type} ) {
13182                    $bond_str   += $colon_bias;
13183                    $colon_bias += $delta_bias;
13184                }
13185            }
13186
13187            if (   $next_nonblank_type eq ':'
13188                && $want_break_before{$next_nonblank_type} )
13189            {
13190                $bond_str   += $colon_bias;
13191                $colon_bias += $delta_bias;
13192            }
13193
13194            # if leading '.' is used, align all but 'short' quotes;
13195            # the idea is to not place something like "\n" on a single line.
13196            elsif ( $next_nonblank_type eq '.' ) {
13197                if ( $want_break_before{'.'} ) {
13198                    unless (
13199                        $last_nonblank_type eq '.'
13200                        && (
13201                            length($token) <=
13202                            $rOpts_short_concatenation_item_length )
13203                        && ( $token !~ /^[\)\]\}]$/ )
13204                      )
13205                    {
13206                        $dot_bias += $delta_bias;
13207                    }
13208                    $bond_str += $dot_bias;
13209                }
13210            }
13211            elsif ($next_nonblank_type eq '&&'
13212                && $want_break_before{$next_nonblank_type} )
13213            {
13214                $bond_str += $amp_bias;
13215                $amp_bias += $delta_bias;
13216            }
13217            elsif ($next_nonblank_type eq '||'
13218                && $want_break_before{$next_nonblank_type} )
13219            {
13220                $bond_str += $bar_bias;
13221                $bar_bias += $delta_bias;
13222            }
13223            elsif ( $next_nonblank_type eq 'k' ) {
13224
13225                if (   $next_nonblank_token eq 'and'
13226                    && $want_break_before{$next_nonblank_token} )
13227                {
13228                    $bond_str += $and_bias;
13229                    $and_bias += $delta_bias;
13230                }
13231                elsif ($next_nonblank_token =~ /^(or|err)$/
13232                    && $want_break_before{$next_nonblank_token} )
13233                {
13234                    $bond_str += $or_bias;
13235                    $or_bias  += $delta_bias;
13236                }
13237
13238                # FIXME: needs more testing
13239                elsif ( $is_keyword_returning_list{$next_nonblank_token} ) {
13240                    $bond_str = $list_str if ( $bond_str > $list_str );
13241                }
13242                elsif ( $token eq 'err'
13243                    && !$want_break_before{$token} )
13244                {
13245                    $bond_str += $or_bias;
13246                    $or_bias  += $delta_bias;
13247                }
13248            }
13249
13250            if ( $type eq ':'
13251                && !$want_break_before{$type} )
13252            {
13253                $bond_str   += $colon_bias;
13254                $colon_bias += $delta_bias;
13255            }
13256            elsif ( $type eq '&&'
13257                && !$want_break_before{$type} )
13258            {
13259                $bond_str += $amp_bias;
13260                $amp_bias += $delta_bias;
13261            }
13262            elsif ( $type eq '||'
13263                && !$want_break_before{$type} )
13264            {
13265                $bond_str += $bar_bias;
13266                $bar_bias += $delta_bias;
13267            }
13268            elsif ( $type eq 'k' ) {
13269
13270                if ( $token eq 'and'
13271                    && !$want_break_before{$token} )
13272                {
13273                    $bond_str += $and_bias;
13274                    $and_bias += $delta_bias;
13275                }
13276                elsif ( $token eq 'or'
13277                    && !$want_break_before{$token} )
13278                {
13279                    $bond_str += $or_bias;
13280                    $or_bias  += $delta_bias;
13281                }
13282            }
13283
13284            # keep matrix and hash indices together
13285            # but make them a little below STRONG to allow breaking open
13286            # something like {'some-word'}{'some-very-long-word'} at the }{
13287            # (bracebrk.t)
13288            if (   ( $type eq ']' or $type eq 'R' )
13289                && ( $next_nonblank_type eq '[' or $next_nonblank_type eq 'L' )
13290              )
13291            {
13292                $bond_str = 0.9 * STRONG + 0.1 * NOMINAL;
13293            }
13294
13295            if ( $next_nonblank_token =~ /^->/ ) {
13296
13297                # increase strength to the point where a break in the following
13298                # will be after the opening paren rather than at the arrow:
13299                #    $a->$b($c);
13300                if ( $type eq 'i' ) {
13301                    $bond_str = 1.45 * STRONG;
13302                }
13303
13304                elsif ( $type =~ /^[\)\]\}R]$/ ) {
13305                    $bond_str = 0.1 * STRONG + 0.9 * NOMINAL;
13306                }
13307
13308                # otherwise make strength before an '->' a little over a '+'
13309                else {
13310                    if ( $bond_str <= NOMINAL ) {
13311                        $bond_str = NOMINAL + 0.01;
13312                    }
13313                }
13314            }
13315
13316            if ( $token eq ')' && $next_nonblank_token eq '[' ) {
13317                $bond_str = 0.2 * STRONG + 0.8 * NOMINAL;
13318            }
13319
13320            # map1.t -- correct for a quirk in perl
13321            if (   $token eq '('
13322                && $next_nonblank_type eq 'i'
13323                && $last_nonblank_type eq 'k'
13324                && $is_sort_map_grep{$last_nonblank_token} )
13325
13326              #     /^(sort|map|grep)$/ )
13327            {
13328                $bond_str = NO_BREAK;
13329            }
13330
13331            # extrude.t: do not break before paren at:
13332            #    -l pid_filename(
13333            if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
13334                $bond_str = NO_BREAK;
13335            }
13336
13337            # good to break after end of code blocks
13338            if ( $type eq '}' && $block_type ) {
13339
13340                $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
13341                $code_bias += $delta_bias;
13342            }
13343
13344            if ( $type eq 'k' ) {
13345
13346                # allow certain control keywords to stand out
13347                if (   $next_nonblank_type eq 'k'
13348                    && $is_last_next_redo_return{$token} )
13349                {
13350                    $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
13351                }
13352
13353# Don't break after keyword my.  This is a quick fix for a
13354# rare problem with perl. An example is this line from file
13355# Container.pm:
13356# foreach my $question( Debian::DebConf::ConfigDb::gettree( $this->{'question'} ) )
13357
13358                if ( $token eq 'my' ) {
13359                    $bond_str = NO_BREAK;
13360                }
13361
13362            }
13363
13364            # good to break before 'if', 'unless', etc
13365            if ( $is_if_brace_follower{$next_nonblank_token} ) {
13366                $bond_str = VERY_WEAK;
13367            }
13368
13369            if ( $next_nonblank_type eq 'k' ) {
13370
13371                # keywords like 'unless', 'if', etc, within statements
13372                # make good breaks
13373                if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
13374                    $bond_str = VERY_WEAK / 1.05;
13375                }
13376            }
13377
13378            # try not to break before a comma-arrow
13379            elsif ( $next_nonblank_type eq '=>' ) {
13380                if ( $bond_str < STRONG ) { $bond_str = STRONG }
13381            }
13382
13383         #----------------------------------------------------------------------
13384         # only set NO_BREAK's from here on
13385         #----------------------------------------------------------------------
13386            if ( $type eq 'C' or $type eq 'U' ) {
13387
13388                # use strict requires that bare word and => not be separated
13389                if ( $next_nonblank_type eq '=>' ) {
13390                    $bond_str = NO_BREAK;
13391                }
13392
13393                # Never break between a bareword and a following paren because
13394                # perl may give an error.  For example, if a break is placed
13395                # between 'to_filehandle' and its '(' the following line will
13396                # give a syntax error [Carp.pm]: my( $no) =fileno(
13397                # to_filehandle( $in)) ;
13398                if ( $next_nonblank_token eq '(' ) {
13399                    $bond_str = NO_BREAK;
13400                }
13401            }
13402
13403           # use strict requires that bare word within braces not start new line
13404            elsif ( $type eq 'L' ) {
13405
13406                if ( $next_nonblank_type eq 'w' ) {
13407                    $bond_str = NO_BREAK;
13408                }
13409            }
13410
13411            # in older version of perl, use strict can cause problems with
13412            # breaks before bare words following opening parens.  For example,
13413            # this will fail under older versions if a break is made between
13414            # '(' and 'MAIL':
13415            #  use strict;
13416            #  open( MAIL, "a long filename or command");
13417            #  close MAIL;
13418            elsif ( $type eq '{' ) {
13419
13420                if ( $token eq '(' && $next_nonblank_type eq 'w' ) {
13421
13422                    # but it's fine to break if the word is followed by a '=>'
13423                    # or if it is obviously a sub call
13424                    my $i_next_next_nonblank = $i_next_nonblank + 1;
13425                    my $next_next_type = $types_to_go[$i_next_next_nonblank];
13426                    if (   $next_next_type eq 'b'
13427                        && $i_next_nonblank < $max_index_to_go )
13428                    {
13429                        $i_next_next_nonblank++;
13430                        $next_next_type = $types_to_go[$i_next_next_nonblank];
13431                    }
13432
13433                    ##if ( $next_next_type ne '=>' ) {
13434                    # these are ok: '->xxx', '=>', '('
13435
13436                    # We'll check for an old breakpoint and keep a leading
13437                    # bareword if it was that way in the input file.
13438                    # Presumably it was ok that way.  For example, the
13439                    # following would remain unchanged:
13440                    #
13441                    # @months = (
13442                    #   January,   February, March,    April,
13443                    #   May,       June,     July,     August,
13444                    #   September, October,  November, December,
13445                    # );
13446                    #
13447                    # This should be sufficient:
13448                    if ( !$old_breakpoint_to_go[$i]
13449                        && ( $next_next_type eq ',' || $next_next_type eq '}' )
13450                      )
13451                    {
13452                        $bond_str = NO_BREAK;
13453                    }
13454                }
13455            }
13456
13457            elsif ( $type eq 'w' ) {
13458
13459                if ( $next_nonblank_type eq 'R' ) {
13460                    $bond_str = NO_BREAK;
13461                }
13462
13463                # use strict requires that bare word and => not be separated
13464                if ( $next_nonblank_type eq '=>' ) {
13465                    $bond_str = NO_BREAK;
13466                }
13467            }
13468
13469            # in fact, use strict hates bare words on any new line.  For
13470            # example, a break before the underscore here provokes the
13471            # wrath of use strict:
13472            # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
13473            elsif ( $type eq 'F' ) {
13474                $bond_str = NO_BREAK;
13475            }
13476
13477            # use strict does not allow separating type info from trailing { }
13478            # testfile is readmail.pl
13479            elsif ( $type eq 't' or $type eq 'i' ) {
13480
13481                if ( $next_nonblank_type eq 'L' ) {
13482                    $bond_str = NO_BREAK;
13483                }
13484            }
13485
13486            # Do not break between a possible filehandle and a ? or / and do
13487            # not introduce a break after it if there is no blank
13488            # (extrude.t)
13489            elsif ( $type eq 'Z' ) {
13490
13491                # dont break..
13492                if (
13493
13494                    # if there is no blank and we do not want one. Examples:
13495                    #    print $x++    # do not break after $x
13496                    #    print HTML"HELLO"   # break ok after HTML
13497                    (
13498                           $next_type ne 'b'
13499                        && defined( $want_left_space{$next_type} )
13500                        && $want_left_space{$next_type} == WS_NO
13501                    )
13502
13503                    # or we might be followed by the start of a quote
13504                    || $next_nonblank_type =~ /^[\/\?]$/
13505                  )
13506                {
13507                    $bond_str = NO_BREAK;
13508                }
13509            }
13510
13511            # Do not break before a possible file handle
13512            if ( $next_nonblank_type eq 'Z' ) {
13513                $bond_str = NO_BREAK;
13514            }
13515
13516            # As a defensive measure, do not break between a '(' and a
13517            # filehandle.  In some cases, this can cause an error.  For
13518            # example, the following program works:
13519            #    my $msg="hi!\n";
13520            #    print
13521            #    ( STDOUT
13522            #    $msg
13523            #    );
13524            #
13525            # But this program fails:
13526            #    my $msg="hi!\n";
13527            #    print
13528            #    (
13529            #    STDOUT
13530            #    $msg
13531            #    );
13532            #
13533            # This is normally only a problem with the 'extrude' option
13534            if ( $next_nonblank_type eq 'Y' && $token eq '(' ) {
13535                $bond_str = NO_BREAK;
13536            }
13537
13538            # Breaking before a ++ can cause perl to guess wrong. For
13539            # example the following line will cause a syntax error
13540            # with -extrude if we break between '$i' and '++' [fixstyle2]
13541            #   print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) );
13542            elsif ( $next_nonblank_type eq '++' ) {
13543                $bond_str = NO_BREAK;
13544            }
13545
13546            # Breaking before a ? before a quote can cause trouble if
13547            # they are not separated by a blank.
13548            # Example: a syntax error occurs if you break before the ? here
13549            #  my$logic=join$all?' && ':' || ',@regexps;
13550            # From: Professional_Perl_Programming_Code/multifind.pl
13551            elsif ( $next_nonblank_type eq '?' ) {
13552                $bond_str = NO_BREAK
13553                  if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );
13554            }
13555
13556            # Breaking before a . followed by a number
13557            # can cause trouble if there is no intervening space
13558            # Example: a syntax error occurs if you break before the .2 here
13559            #  $str .= pack($endian.2, ensurrogate($ord));
13560            # From: perl58/Unicode.pm
13561            elsif ( $next_nonblank_type eq '.' ) {
13562                $bond_str = NO_BREAK
13563                  if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );
13564            }
13565
13566            # patch to put cuddled elses back together when on multiple
13567            # lines, as in: } \n else \n { \n
13568            if ($rOpts_cuddled_else) {
13569
13570                if (   ( $token eq 'else' ) && ( $next_nonblank_type eq '{' )
13571                    || ( $type eq '}' ) && ( $next_nonblank_token eq 'else' ) )
13572                {
13573                    $bond_str = NO_BREAK;
13574                }
13575            }
13576
13577            # keep '}' together with ';'
13578            if ( ( $token eq '}' ) && ( $next_nonblank_type eq ';' ) ) {
13579                $bond_str = NO_BREAK;
13580            }
13581
13582            # never break between sub name and opening paren
13583            if ( ( $type eq 'w' ) && ( $next_nonblank_token eq '(' ) ) {
13584                $bond_str = NO_BREAK;
13585            }
13586
13587            #---------------------------------------------------------------
13588            # section 3:
13589            # now take nesting depth into account
13590            #---------------------------------------------------------------
13591            # final strength incorporates the bond strength and nesting depth
13592            my $strength;
13593
13594            if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
13595                if ( $total_nesting_depth > 0 ) {
13596                    $strength = $bond_str + $total_nesting_depth;
13597                }
13598                else {
13599                    $strength = $bond_str;
13600                }
13601            }
13602            else {
13603                $strength = NO_BREAK;
13604            }
13605
13606            # always break after side comment
13607            if ( $type eq '#' ) { $strength = 0 }
13608
13609            $bond_strength_to_go[$i] = $strength;
13610
13611            FORMATTER_DEBUG_FLAG_BOND && do {
13612                my $str = substr( $token, 0, 15 );
13613                $str .= ' ' x ( 16 - length($str) );
13614                print
13615"BOND:  i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str -> $strength \n";
13616            };
13617        }
13618    }
13619
13620}
13621
13622sub pad_array_to_go {
13623
13624    # to simplify coding in scan_list and set_bond_strengths, it helps
13625    # to create some extra blank tokens at the end of the arrays
13626    $tokens_to_go[ $max_index_to_go + 1 ] = '';
13627    $tokens_to_go[ $max_index_to_go + 2 ] = '';
13628    $types_to_go[ $max_index_to_go + 1 ]  = 'b';
13629    $types_to_go[ $max_index_to_go + 2 ]  = 'b';
13630    $nesting_depth_to_go[ $max_index_to_go + 1 ] =
13631      $nesting_depth_to_go[$max_index_to_go];
13632
13633    #    /^[R\}\)\]]$/
13634    if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
13635        if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
13636
13637            # shouldn't happen:
13638            unless ( get_saw_brace_error() ) {
13639                warning(
13640"Program bug in scan_list: hit nesting error which should have been caught\n"
13641                );
13642                report_definite_bug();
13643            }
13644        }
13645        else {
13646            $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
13647        }
13648    }
13649
13650    #       /^[L\{\(\[]$/
13651    elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
13652        $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
13653    }
13654}
13655
13656{    # begin scan_list
13657
13658    my (
13659        $block_type,                $current_depth,
13660        $depth,                     $i,
13661        $i_last_nonblank_token,     $last_colon_sequence_number,
13662        $last_nonblank_token,       $last_nonblank_type,
13663        $last_old_breakpoint_count, $minimum_depth,
13664        $next_nonblank_block_type,  $next_nonblank_token,
13665        $next_nonblank_type,        $old_breakpoint_count,
13666        $starting_breakpoint_count, $starting_depth,
13667        $token,                     $type,
13668        $type_sequence,
13669    );
13670
13671    my (
13672        @breakpoint_stack,              @breakpoint_undo_stack,
13673        @comma_index,                   @container_type,
13674        @identifier_count_stack,        @index_before_arrow,
13675        @interrupted_list,              @item_count_stack,
13676        @last_comma_index,              @last_dot_index,
13677        @last_nonblank_type,            @old_breakpoint_count_stack,
13678        @opening_structure_index_stack, @rfor_semicolon_list,
13679        @has_old_logical_breakpoints,   @rand_or_list,
13680        @i_equals,
13681    );
13682
13683    # routine to define essential variables when we go 'up' to
13684    # a new depth
13685    sub check_for_new_minimum_depth {
13686        my $depth = shift;
13687        if ( $depth < $minimum_depth ) {
13688
13689            $minimum_depth = $depth;
13690
13691            # these arrays need not retain values between calls
13692            $breakpoint_stack[$depth]              = $starting_breakpoint_count;
13693            $container_type[$depth]                = "";
13694            $identifier_count_stack[$depth]        = 0;
13695            $index_before_arrow[$depth]            = -1;
13696            $interrupted_list[$depth]              = 1;
13697            $item_count_stack[$depth]              = 0;
13698            $last_nonblank_type[$depth]            = "";
13699            $opening_structure_index_stack[$depth] = -1;
13700
13701            $breakpoint_undo_stack[$depth]       = undef;
13702            $comma_index[$depth]                 = undef;
13703            $last_comma_index[$depth]            = undef;
13704            $last_dot_index[$depth]              = undef;
13705            $old_breakpoint_count_stack[$depth]  = undef;
13706            $has_old_logical_breakpoints[$depth] = 0;
13707            $rand_or_list[$depth]                = [];
13708            $rfor_semicolon_list[$depth]         = [];
13709            $i_equals[$depth]                    = -1;
13710
13711            # these arrays must retain values between calls
13712            if ( !defined( $has_broken_sublist[$depth] ) ) {
13713                $dont_align[$depth]         = 0;
13714                $has_broken_sublist[$depth] = 0;
13715                $want_comma_break[$depth]   = 0;
13716            }
13717        }
13718    }
13719
13720    # routine to decide which commas to break at within a container;
13721    # returns:
13722    #   $bp_count = number of comma breakpoints set
13723    #   $do_not_break_apart = a flag indicating if container need not
13724    #     be broken open
13725    sub set_comma_breakpoints {
13726
13727        my $dd                 = shift;
13728        my $bp_count           = 0;
13729        my $do_not_break_apart = 0;
13730
13731        # anything to do?
13732        if ( $item_count_stack[$dd] ) {
13733
13734            # handle commas not in containers...
13735            if ( $dont_align[$dd] ) {
13736                do_uncontained_comma_breaks($dd);
13737            }
13738
13739            # handle commas within containers...
13740            else {
13741                my $fbc = $forced_breakpoint_count;
13742
13743                # always open comma lists not preceded by keywords,
13744                # barewords, identifiers (that is, anything that doesn't
13745                # look like a function call)
13746                my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
13747
13748                set_comma_breakpoints_do(
13749                    $dd,
13750                    $opening_structure_index_stack[$dd],
13751                    $i,
13752                    $item_count_stack[$dd],
13753                    $identifier_count_stack[$dd],
13754                    $comma_index[$dd],
13755                    $next_nonblank_type,
13756                    $container_type[$dd],
13757                    $interrupted_list[$dd],
13758                    \$do_not_break_apart,
13759                    $must_break_open,
13760                );
13761                $bp_count = $forced_breakpoint_count - $fbc;
13762                $do_not_break_apart = 0 if $must_break_open;
13763            }
13764        }
13765        return ( $bp_count, $do_not_break_apart );
13766    }
13767
13768    sub do_uncontained_comma_breaks {
13769
13770        # Handle commas not in containers...
13771        # This is a catch-all routine for commas that we
13772        # don't know what to do with because the don't fall
13773        # within containers.  We will bias the bond strength
13774        # to break at commas which ended lines in the input
13775        # file.  This usually works better than just trying
13776        # to put as many items on a line as possible.  A
13777        # downside is that if the input file is garbage it
13778        # won't work very well. However, the user can always
13779        # prevent following the old breakpoints with the
13780        # -iob flag.
13781        my $dd   = shift;
13782        my $bias = -.01;
13783        foreach my $ii ( @{ $comma_index[$dd] } ) {
13784            if ( $old_breakpoint_to_go[$ii] ) {
13785                $bond_strength_to_go[$ii] = $bias;
13786
13787                # reduce bias magnitude to force breaks in order
13788                $bias *= 0.99;
13789            }
13790        }
13791
13792        # Also put a break before the first comma if
13793        # (1) there was a break there in the input, and
13794        # (2) that was exactly one previous break in the input
13795        #
13796        # For example, we will follow the user and break after
13797        # 'print' in this snippet:
13798        #    print
13799        #      "conformability (Not the same dimension)\n",
13800        #      "\t", $have, " is ", text_unit($hu), "\n",
13801        #      "\t", $want, " is ", text_unit($wu), "\n",
13802        #      ;
13803        my $i_first_comma = $comma_index[$dd]->[0];
13804        if ( $old_breakpoint_to_go[$i_first_comma] ) {
13805            my $level_comma = $levels_to_go[$i_first_comma];
13806            my $ibreak      = -1;
13807            my $obp_count   = 0;
13808            for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) {
13809                if ( $old_breakpoint_to_go[$ii] ) {
13810                    $obp_count++;
13811                    last if ( $obp_count > 1 );
13812                    $ibreak = $ii
13813                      if ( $levels_to_go[$ii] == $level_comma );
13814                }
13815            }
13816            if ( $ibreak >= 0 && $obp_count == 1 ) {
13817                set_forced_breakpoint($ibreak);
13818            }
13819        }
13820    }
13821
13822    my %is_logical_container;
13823
13824    BEGIN {
13825        @_ = qw# if elsif unless while and or err not && | || ? : ! #;
13826        @is_logical_container{@_} = (1) x scalar(@_);
13827    }
13828
13829    sub set_for_semicolon_breakpoints {
13830        my $dd = shift;
13831        foreach ( @{ $rfor_semicolon_list[$dd] } ) {
13832            set_forced_breakpoint($_);
13833        }
13834    }
13835
13836    sub set_logical_breakpoints {
13837        my $dd = shift;
13838        if (
13839               $item_count_stack[$dd] == 0
13840            && $is_logical_container{ $container_type[$dd] }
13841
13842            # TESTING:
13843            || $has_old_logical_breakpoints[$dd]
13844          )
13845        {
13846
13847            # Look for breaks in this order:
13848            # 0   1    2   3
13849            # or  and  ||  &&
13850            foreach my $i ( 0 .. 3 ) {
13851                if ( $rand_or_list[$dd][$i] ) {
13852                    foreach ( @{ $rand_or_list[$dd][$i] } ) {
13853                        set_forced_breakpoint($_);
13854                    }
13855
13856                    # break at any 'if' and 'unless' too
13857                    foreach ( @{ $rand_or_list[$dd][4] } ) {
13858                        set_forced_breakpoint($_);
13859                    }
13860                    $rand_or_list[$dd] = [];
13861                    last;
13862                }
13863            }
13864        }
13865    }
13866
13867    sub is_unbreakable_container {
13868
13869        # never break a container of one of these types
13870        # because bad things can happen (map1.t)
13871        my $dd = shift;
13872        $is_sort_map_grep{ $container_type[$dd] };
13873    }
13874
13875    sub scan_list {
13876
13877        # This routine is responsible for setting line breaks for all lists,
13878        # so that hierarchical structure can be displayed and so that list
13879        # items can be vertically aligned.  The output of this routine is
13880        # stored in the array @forced_breakpoint_to_go, which is used to set
13881        # final breakpoints.
13882
13883        $starting_depth = $nesting_depth_to_go[0];
13884
13885        $block_type                 = ' ';
13886        $current_depth              = $starting_depth;
13887        $i                          = -1;
13888        $last_colon_sequence_number = -1;
13889        $last_nonblank_token        = ';';
13890        $last_nonblank_type         = ';';
13891        $last_nonblank_block_type   = ' ';
13892        $last_old_breakpoint_count  = 0;
13893        $minimum_depth = $current_depth + 1;    # forces update in check below
13894        $old_breakpoint_count      = 0;
13895        $starting_breakpoint_count = $forced_breakpoint_count;
13896        $token                     = ';';
13897        $type                      = ';';
13898        $type_sequence             = '';
13899
13900        check_for_new_minimum_depth($current_depth);
13901
13902        my $is_long_line = excess_line_length( 0, $max_index_to_go ) > 0;
13903        my $want_previous_breakpoint = -1;
13904
13905        my $saw_good_breakpoint;
13906        my $i_line_end   = -1;
13907        my $i_line_start = -1;
13908
13909        # loop over all tokens in this batch
13910        while ( ++$i <= $max_index_to_go ) {
13911            if ( $type ne 'b' ) {
13912                $i_last_nonblank_token    = $i - 1;
13913                $last_nonblank_type       = $type;
13914                $last_nonblank_token      = $token;
13915                $last_nonblank_block_type = $block_type;
13916            }
13917            $type          = $types_to_go[$i];
13918            $block_type    = $block_type_to_go[$i];
13919            $token         = $tokens_to_go[$i];
13920            $type_sequence = $type_sequence_to_go[$i];
13921            my $next_type       = $types_to_go[ $i + 1 ];
13922            my $next_token      = $tokens_to_go[ $i + 1 ];
13923            my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
13924            $next_nonblank_type       = $types_to_go[$i_next_nonblank];
13925            $next_nonblank_token      = $tokens_to_go[$i_next_nonblank];
13926            $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
13927
13928            # set break if flag was set
13929            if ( $want_previous_breakpoint >= 0 ) {
13930                set_forced_breakpoint($want_previous_breakpoint);
13931                $want_previous_breakpoint = -1;
13932            }
13933
13934            $last_old_breakpoint_count = $old_breakpoint_count;
13935            if ( $old_breakpoint_to_go[$i] ) {
13936                $i_line_end   = $i;
13937                $i_line_start = $i_next_nonblank;
13938
13939                $old_breakpoint_count++;
13940
13941                # Break before certain keywords if user broke there and
13942                # this is a 'safe' break point. The idea is to retain
13943                # any preferred breaks for sequential list operations,
13944                # like a schwartzian transform.
13945                if ($rOpts_break_at_old_keyword_breakpoints) {
13946                    if (
13947                           $next_nonblank_type eq 'k'
13948                        && $is_keyword_returning_list{$next_nonblank_token}
13949                        && (   $type =~ /^[=\)\]\}Riw]$/
13950                            || $type eq 'k'
13951                            && $is_keyword_returning_list{$token} )
13952                      )
13953                    {
13954
13955                        # we actually have to set this break next time through
13956                        # the loop because if we are at a closing token (such
13957                        # as '}') which forms a one-line block, this break might
13958                        # get undone.
13959                        $want_previous_breakpoint = $i;
13960                    }
13961                }
13962            }
13963            next if ( $type eq 'b' );
13964            $depth = $nesting_depth_to_go[ $i + 1 ];
13965
13966            # safety check - be sure we always break after a comment
13967            # Shouldn't happen .. an error here probably means that the
13968            # nobreak flag did not get turned off correctly during
13969            # formatting.
13970            if ( $type eq '#' ) {
13971                if ( $i != $max_index_to_go ) {
13972                    warning(
13973"Non-fatal program bug: backup logic needed to break after a comment\n"
13974                    );
13975                    report_definite_bug();
13976                    $nobreak_to_go[$i] = 0;
13977                    set_forced_breakpoint($i);
13978                }
13979            }
13980
13981            # Force breakpoints at certain tokens in long lines.
13982            # Note that such breakpoints will be undone later if these tokens
13983            # are fully contained within parens on a line.
13984            if (
13985
13986                # break before a keyword within a line
13987                $type eq 'k'
13988                && $i > 0
13989
13990                # if one of these keywords:
13991                && $token =~ /^(if|unless|while|until|for)$/
13992
13993                # but do not break at something like '1 while'
13994                && ( $last_nonblank_type ne 'n' || $i > 2 )
13995
13996                # and let keywords follow a closing 'do' brace
13997                && $last_nonblank_block_type ne 'do'
13998
13999                && (
14000                    $is_long_line
14001
14002                    # or container is broken (by side-comment, etc)
14003                    || (   $next_nonblank_token eq '('
14004                        && $mate_index_to_go[$i_next_nonblank] < $i )
14005                )
14006              )
14007            {
14008                set_forced_breakpoint( $i - 1 );
14009            }
14010
14011            # remember locations of '||'  and '&&' for possible breaks if we
14012            # decide this is a long logical expression.
14013            if ( $type eq '||' ) {
14014                push @{ $rand_or_list[$depth][2] }, $i;
14015                ++$has_old_logical_breakpoints[$depth]
14016                  if ( ( $i == $i_line_start || $i == $i_line_end )
14017                    && $rOpts_break_at_old_logical_breakpoints );
14018            }
14019            elsif ( $type eq '&&' ) {
14020                push @{ $rand_or_list[$depth][3] }, $i;
14021                ++$has_old_logical_breakpoints[$depth]
14022                  if ( ( $i == $i_line_start || $i == $i_line_end )
14023                    && $rOpts_break_at_old_logical_breakpoints );
14024            }
14025            elsif ( $type eq 'f' ) {
14026                push @{ $rfor_semicolon_list[$depth] }, $i;
14027            }
14028            elsif ( $type eq 'k' ) {
14029                if ( $token eq 'and' ) {
14030                    push @{ $rand_or_list[$depth][1] }, $i;
14031                    ++$has_old_logical_breakpoints[$depth]
14032                      if ( ( $i == $i_line_start || $i == $i_line_end )
14033                        && $rOpts_break_at_old_logical_breakpoints );
14034                }
14035
14036                # break immediately at 'or's which are probably not in a logical
14037                # block -- but we will break in logical breaks below so that
14038                # they do not add to the forced_breakpoint_count
14039                elsif ( $token eq 'or' ) {
14040                    push @{ $rand_or_list[$depth][0] }, $i;
14041                    ++$has_old_logical_breakpoints[$depth]
14042                      if ( ( $i == $i_line_start || $i == $i_line_end )
14043                        && $rOpts_break_at_old_logical_breakpoints );
14044                    if ( $is_logical_container{ $container_type[$depth] } ) {
14045                    }
14046                    else {
14047                        if ($is_long_line) { set_forced_breakpoint($i) }
14048                        elsif ( ( $i == $i_line_start || $i == $i_line_end )
14049                            && $rOpts_break_at_old_logical_breakpoints )
14050                        {
14051                            $saw_good_breakpoint = 1;
14052                        }
14053                    }
14054                }
14055                elsif ( $token eq 'if' || $token eq 'unless' ) {
14056                    push @{ $rand_or_list[$depth][4] }, $i;
14057                    if ( ( $i == $i_line_start || $i == $i_line_end )
14058                        && $rOpts_break_at_old_logical_breakpoints )
14059                    {
14060                        set_forced_breakpoint($i);
14061                    }
14062                }
14063            }
14064            elsif ( $is_assignment{$type} ) {
14065                $i_equals[$depth] = $i;
14066            }
14067
14068            if ($type_sequence) {
14069
14070                # handle any postponed closing breakpoints
14071                if ( $token =~ /^[\)\]\}\:]$/ ) {
14072                    if ( $type eq ':' ) {
14073                        $last_colon_sequence_number = $type_sequence;
14074
14075                        # TESTING: retain break at a ':' line break
14076                        if ( ( $i == $i_line_start || $i == $i_line_end )
14077                            && $rOpts_break_at_old_ternary_breakpoints )
14078                        {
14079
14080                            # TESTING:
14081                            set_forced_breakpoint($i);
14082
14083                            # break at previous '='
14084                            if ( $i_equals[$depth] > 0 ) {
14085                                set_forced_breakpoint( $i_equals[$depth] );
14086                                $i_equals[$depth] = -1;
14087                            }
14088                        }
14089                    }
14090                    if ( defined( $postponed_breakpoint{$type_sequence} ) ) {
14091                        my $inc = ( $type eq ':' ) ? 0 : 1;
14092                        set_forced_breakpoint( $i - $inc );
14093                        delete $postponed_breakpoint{$type_sequence};
14094                    }
14095                }
14096
14097                # set breaks at ?/: if they will get separated (and are
14098                # not a ?/: chain), or if the '?' is at the end of the
14099                # line
14100                elsif ( $token eq '?' ) {
14101                    my $i_colon = $mate_index_to_go[$i];
14102                    if (
14103                        $i_colon <= 0  # the ':' is not in this batch
14104                        || $i == 0     # this '?' is the first token of the line
14105                        || $i ==
14106                        $max_index_to_go    # or this '?' is the last token
14107                      )
14108                    {
14109
14110                        # don't break at a '?' if preceded by ':' on
14111                        # this line of previous ?/: pair on this line.
14112                        # This is an attempt to preserve a chain of ?/:
14113                        # expressions (elsif2.t).  And don't break if
14114                        # this has a side comment.
14115                        set_forced_breakpoint($i)
14116                          unless (
14117                            $type_sequence == (
14118                                $last_colon_sequence_number +
14119                                  TYPE_SEQUENCE_INCREMENT
14120                            )
14121                            || $tokens_to_go[$max_index_to_go] eq '#'
14122                          );
14123                        set_closing_breakpoint($i);
14124                    }
14125                }
14126            }
14127
14128#print "LISTX sees: i=$i type=$type  tok=$token  block=$block_type depth=$depth\n";
14129
14130            #------------------------------------------------------------
14131            # Handle Increasing Depth..
14132            #
14133            # prepare for a new list when depth increases
14134            # token $i is a '(','{', or '['
14135            #------------------------------------------------------------
14136            if ( $depth > $current_depth ) {
14137
14138                $breakpoint_stack[$depth]       = $forced_breakpoint_count;
14139                $breakpoint_undo_stack[$depth]  = $forced_breakpoint_undo_count;
14140                $has_broken_sublist[$depth]     = 0;
14141                $identifier_count_stack[$depth] = 0;
14142                $index_before_arrow[$depth]     = -1;
14143                $interrupted_list[$depth]       = 0;
14144                $item_count_stack[$depth]       = 0;
14145                $last_comma_index[$depth]       = undef;
14146                $last_dot_index[$depth]         = undef;
14147                $last_nonblank_type[$depth]     = $last_nonblank_type;
14148                $old_breakpoint_count_stack[$depth]    = $old_breakpoint_count;
14149                $opening_structure_index_stack[$depth] = $i;
14150                $rand_or_list[$depth]                  = [];
14151                $rfor_semicolon_list[$depth]           = [];
14152                $i_equals[$depth]                      = -1;
14153                $want_comma_break[$depth]              = 0;
14154                $container_type[$depth] =
14155                  ( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ )
14156                  ? $last_nonblank_token
14157                  : "";
14158                $has_old_logical_breakpoints[$depth] = 0;
14159
14160                # if line ends here then signal closing token to break
14161                if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' )
14162                {
14163                    set_closing_breakpoint($i);
14164                }
14165
14166                # Not all lists of values should be vertically aligned..
14167                $dont_align[$depth] =
14168
14169                  # code BLOCKS are handled at a higher level
14170                  ( $block_type ne "" )
14171
14172                  # certain paren lists
14173                  || ( $type eq '(' ) && (
14174
14175                    # it does not usually look good to align a list of
14176                    # identifiers in a parameter list, as in:
14177                    #    my($var1, $var2, ...)
14178                    # (This test should probably be refined, for now I'm just
14179                    # testing for any keyword)
14180                    ( $last_nonblank_type eq 'k' )
14181
14182                    # a trailing '(' usually indicates a non-list
14183                    || ( $next_nonblank_type eq '(' )
14184                  );
14185
14186                # patch to outdent opening brace of long if/for/..
14187                # statements (like this one).  See similar coding in
14188                # set_continuation breaks.  We have also catch it here for
14189                # short line fragments which otherwise will not go through
14190                # set_continuation_breaks.
14191                if (
14192                    $block_type
14193
14194                    # if we have the ')' but not its '(' in this batch..
14195                    && ( $last_nonblank_token eq ')' )
14196                    && $mate_index_to_go[$i_last_nonblank_token] < 0
14197
14198                    # and user wants brace to left
14199                    && !$rOpts->{'opening-brace-always-on-right'}
14200
14201                    && ( $type  eq '{' )    # should be true
14202                    && ( $token eq '{' )    # should be true
14203                  )
14204                {
14205                    set_forced_breakpoint( $i - 1 );
14206                }
14207            }
14208
14209            #------------------------------------------------------------
14210            # Handle Decreasing Depth..
14211            #
14212            # finish off any old list when depth decreases
14213            # token $i is a ')','}', or ']'
14214            #------------------------------------------------------------
14215            elsif ( $depth < $current_depth ) {
14216
14217                check_for_new_minimum_depth($depth);
14218
14219                # force all outer logical containers to break after we see on
14220                # old breakpoint
14221                $has_old_logical_breakpoints[$depth] ||=
14222                  $has_old_logical_breakpoints[$current_depth];
14223
14224                # Patch to break between ') {' if the paren list is broken.
14225                # There is similar logic in set_continuation_breaks for
14226                # non-broken lists.
14227                if (   $token eq ')'
14228                    && $next_nonblank_block_type
14229                    && $interrupted_list[$current_depth]
14230                    && $next_nonblank_type eq '{'
14231                    && !$rOpts->{'opening-brace-always-on-right'} )
14232                {
14233                    set_forced_breakpoint($i);
14234                }
14235
14236#print "LISTY sees: i=$i type=$type  tok=$token  block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
14237
14238                # set breaks at commas if necessary
14239                my ( $bp_count, $do_not_break_apart ) =
14240                  set_comma_breakpoints($current_depth);
14241
14242                my $i_opening = $opening_structure_index_stack[$current_depth];
14243                my $saw_opening_structure = ( $i_opening >= 0 );
14244
14245                # this term is long if we had to break at interior commas..
14246                my $is_long_term = $bp_count > 0;
14247
14248                # ..or if the length between opening and closing parens exceeds
14249                # allowed line length
14250                if ( !$is_long_term && $saw_opening_structure ) {
14251                    my $i_opening_minus = find_token_starting_list($i_opening);
14252
14253                    # Note: we have to allow for one extra space after a
14254                    # closing token so that we do not strand a comma or
14255                    # semicolon, hence the '>=' here (oneline.t)
14256                    $is_long_term =
14257                      excess_line_length( $i_opening_minus, $i ) >= 0;
14258                }
14259
14260                # We've set breaks after all comma-arrows.  Now we have to
14261                # undo them if this can be a one-line block
14262                # (the only breakpoints set will be due to comma-arrows)
14263                if (
14264
14265                    # user doesn't require breaking after all comma-arrows
14266                    ( $rOpts_comma_arrow_breakpoints != 0 )
14267
14268                    # and if the opening structure is in this batch
14269                    && $saw_opening_structure
14270
14271                    # and either on the same old line
14272                    && (
14273                        $old_breakpoint_count_stack[$current_depth] ==
14274                        $last_old_breakpoint_count
14275
14276                        # or user wants to form long blocks with arrows
14277                        || $rOpts_comma_arrow_breakpoints == 2
14278                    )
14279
14280                  # and we made some breakpoints between the opening and closing
14281                    && ( $breakpoint_undo_stack[$current_depth] <
14282                        $forced_breakpoint_undo_count )
14283
14284                    # and this block is short enough to fit on one line
14285                    # Note: use < because need 1 more space for possible comma
14286                    && !$is_long_term
14287
14288                  )
14289                {
14290                    undo_forced_breakpoint_stack(
14291                        $breakpoint_undo_stack[$current_depth] );
14292                }
14293
14294                # now see if we have any comma breakpoints left
14295                my $has_comma_breakpoints =
14296                  ( $breakpoint_stack[$current_depth] !=
14297                      $forced_breakpoint_count );
14298
14299                # update broken-sublist flag of the outer container
14300                $has_broken_sublist[$depth] =
14301                     $has_broken_sublist[$depth]
14302                  || $has_broken_sublist[$current_depth]
14303                  || $is_long_term
14304                  || $has_comma_breakpoints;
14305
14306# Having come to the closing ')', '}', or ']', now we have to decide if we
14307# should 'open up' the structure by placing breaks at the opening and
14308# closing containers.  This is a tricky decision.  Here are some of the
14309# basic considerations:
14310#
14311# -If this is a BLOCK container, then any breakpoints will have already
14312# been set (and according to user preferences), so we need do nothing here.
14313#
14314# -If we have a comma-separated list for which we can align the list items,
14315# then we need to do so because otherwise the vertical aligner cannot
14316# currently do the alignment.
14317#
14318# -If this container does itself contain a container which has been broken
14319# open, then it should be broken open to properly show the structure.
14320#
14321# -If there is nothing to align, and no other reason to break apart,
14322# then do not do it.
14323#
14324# We will not break open the parens of a long but 'simple' logical expression.
14325# For example:
14326#
14327# This is an example of a simple logical expression and its formatting:
14328#
14329#     if ( $bigwasteofspace1 && $bigwasteofspace2
14330#         || $bigwasteofspace3 && $bigwasteofspace4 )
14331#
14332# Most people would prefer this than the 'spacey' version:
14333#
14334#     if (
14335#         $bigwasteofspace1 && $bigwasteofspace2
14336#         || $bigwasteofspace3 && $bigwasteofspace4
14337#     )
14338#
14339# To illustrate the rules for breaking logical expressions, consider:
14340#
14341#             FULLY DENSE:
14342#             if ( $opt_excl
14343#                 and ( exists $ids_excl_uc{$id_uc}
14344#                     or grep $id_uc =~ /$_/, @ids_excl_uc ))
14345#
14346# This is on the verge of being difficult to read.  The current default is to
14347# open it up like this:
14348#
14349#             DEFAULT:
14350#             if (
14351#                 $opt_excl
14352#                 and ( exists $ids_excl_uc{$id_uc}
14353#                     or grep $id_uc =~ /$_/, @ids_excl_uc )
14354#               )
14355#
14356# This is a compromise which tries to avoid being too dense and to spacey.
14357# A more spaced version would be:
14358#
14359#             SPACEY:
14360#             if (
14361#                 $opt_excl
14362#                 and (
14363#                     exists $ids_excl_uc{$id_uc}
14364#                     or grep $id_uc =~ /$_/, @ids_excl_uc
14365#                 )
14366#               )
14367#
14368# Some people might prefer the spacey version -- an option could be added.  The
14369# innermost expression contains a long block '( exists $ids_...  ')'.
14370#
14371# Here is how the logic goes: We will force a break at the 'or' that the
14372# innermost expression contains, but we will not break apart its opening and
14373# closing containers because (1) it contains no multi-line sub-containers itself,
14374# and (2) there is no alignment to be gained by breaking it open like this
14375#
14376#             and (
14377#                 exists $ids_excl_uc{$id_uc}
14378#                 or grep $id_uc =~ /$_/, @ids_excl_uc
14379#             )
14380#
14381# (although this looks perfectly ok and might be good for long expressions).  The
14382# outer 'if' container, though, contains a broken sub-container, so it will be
14383# broken open to avoid too much density.  Also, since it contains no 'or's, there
14384# will be a forced break at its 'and'.
14385
14386                # set some flags telling something about this container..
14387                my $is_simple_logical_expression = 0;
14388                if (   $item_count_stack[$current_depth] == 0
14389                    && $saw_opening_structure
14390                    && $tokens_to_go[$i_opening] eq '('
14391                    && $is_logical_container{ $container_type[$current_depth] }
14392                  )
14393                {
14394
14395                    # This seems to be a simple logical expression with
14396                    # no existing breakpoints.  Set a flag to prevent
14397                    # opening it up.
14398                    if ( !$has_comma_breakpoints ) {
14399                        $is_simple_logical_expression = 1;
14400                    }
14401
14402                    # This seems to be a simple logical expression with
14403                    # breakpoints (broken sublists, for example).  Break
14404                    # at all 'or's and '||'s.
14405                    else {
14406                        set_logical_breakpoints($current_depth);
14407                    }
14408                }
14409
14410                if ( $is_long_term
14411                    && @{ $rfor_semicolon_list[$current_depth] } )
14412                {
14413                    set_for_semicolon_breakpoints($current_depth);
14414
14415                    # open up a long 'for' or 'foreach' container to allow
14416                    # leading term alignment unless -lp is used.
14417                    $has_comma_breakpoints = 1
14418                      unless $rOpts_line_up_parentheses;
14419                }
14420
14421                if (
14422
14423                    # breaks for code BLOCKS are handled at a higher level
14424                    !$block_type
14425
14426                    # we do not need to break at the top level of an 'if'
14427                    # type expression
14428                    && !$is_simple_logical_expression
14429
14430                    ## modification to keep ': (' containers vertically tight;
14431                    ## but probably better to let user set -vt=1 to avoid
14432                    ## inconsistency with other paren types
14433                    ## && ($container_type[$current_depth] ne ':')
14434
14435                    # otherwise, we require one of these reasons for breaking:
14436                    && (
14437
14438                        # - this term has forced line breaks
14439                        $has_comma_breakpoints
14440
14441                       # - the opening container is separated from this batch
14442                       #   for some reason (comment, blank line, code block)
14443                       # - this is a non-paren container spanning multiple lines
14444                        || !$saw_opening_structure
14445
14446                        # - this is a long block contained in another breakable
14447                        #   container
14448                        || (   $is_long_term
14449                            && $container_environment_to_go[$i_opening] ne
14450                            'BLOCK' )
14451                    )
14452                  )
14453                {
14454
14455                    # For -lp option, we must put a breakpoint before
14456                    # the token which has been identified as starting
14457                    # this indentation level.  This is necessary for
14458                    # proper alignment.
14459                    if ( $rOpts_line_up_parentheses && $saw_opening_structure )
14460                    {
14461                        my $item = $leading_spaces_to_go[ $i_opening + 1 ];
14462                        if (   $i_opening + 1 < $max_index_to_go
14463                            && $types_to_go[ $i_opening + 1 ] eq 'b' )
14464                        {
14465                            $item = $leading_spaces_to_go[ $i_opening + 2 ];
14466                        }
14467                        if ( defined($item) ) {
14468                            my $i_start_2 = $item->get_STARTING_INDEX();
14469                            if (
14470                                defined($i_start_2)
14471
14472                                # we are breaking after an opening brace, paren,
14473                                # so don't break before it too
14474                                && $i_start_2 ne $i_opening
14475                              )
14476                            {
14477
14478                                # Only break for breakpoints at the same
14479                                # indentation level as the opening paren
14480                                my $test1 = $nesting_depth_to_go[$i_opening];
14481                                my $test2 = $nesting_depth_to_go[$i_start_2];
14482                                if ( $test2 == $test1 ) {
14483                                    set_forced_breakpoint( $i_start_2 - 1 );
14484                                }
14485                            }
14486                        }
14487                    }
14488
14489                    # break after opening structure.
14490                    # note: break before closing structure will be automatic
14491                    if ( $minimum_depth <= $current_depth ) {
14492
14493                        set_forced_breakpoint($i_opening)
14494                          unless ( $do_not_break_apart
14495                            || is_unbreakable_container($current_depth) );
14496
14497                        # break at '.' of lower depth level before opening token
14498                        if ( $last_dot_index[$depth] ) {
14499                            set_forced_breakpoint( $last_dot_index[$depth] );
14500                        }
14501
14502                        # break before opening structure if preeced by another
14503                        # closing structure and a comma.  This is normally
14504                        # done by the previous closing brace, but not
14505                        # if it was a one-line block.
14506                        if ( $i_opening > 2 ) {
14507                            my $i_prev =
14508                              ( $types_to_go[ $i_opening - 1 ] eq 'b' )
14509                              ? $i_opening - 2
14510                              : $i_opening - 1;
14511
14512                            if (   $types_to_go[$i_prev] eq ','
14513                                && $types_to_go[ $i_prev - 1 ] =~ /^[\)\}]$/ )
14514                            {
14515                                set_forced_breakpoint($i_prev);
14516                            }
14517
14518                            # also break before something like ':('  or '?('
14519                            # if appropriate.
14520                            elsif (
14521                                $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ )
14522                            {
14523                                my $token_prev = $tokens_to_go[$i_prev];
14524                                if ( $want_break_before{$token_prev} ) {
14525                                    set_forced_breakpoint($i_prev);
14526                                }
14527                            }
14528                        }
14529                    }
14530
14531                    # break after comma following closing structure
14532                    if ( $next_type eq ',' ) {
14533                        set_forced_breakpoint( $i + 1 );
14534                    }
14535
14536                    # break before an '=' following closing structure
14537                    if (
14538                        $is_assignment{$next_nonblank_type}
14539                        && ( $breakpoint_stack[$current_depth] !=
14540                            $forced_breakpoint_count )
14541                      )
14542                    {
14543                        set_forced_breakpoint($i);
14544                    }
14545
14546                    # break at any comma before the opening structure Added
14547                    # for -lp, but seems to be good in general.  It isn't
14548                    # obvious how far back to look; the '5' below seems to
14549                    # work well and will catch the comma in something like
14550                    #  push @list, myfunc( $param, $param, ..
14551
14552                    my $icomma = $last_comma_index[$depth];
14553                    if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
14554                        unless ( $forced_breakpoint_to_go[$icomma] ) {
14555                            set_forced_breakpoint($icomma);
14556                        }
14557                    }
14558                }    # end logic to open up a container
14559
14560                # Break open a logical container open if it was already open
14561                elsif ($is_simple_logical_expression
14562                    && $has_old_logical_breakpoints[$current_depth] )
14563                {
14564                    set_logical_breakpoints($current_depth);
14565                }
14566
14567                # Handle long container which does not get opened up
14568                elsif ($is_long_term) {
14569
14570                    # must set fake breakpoint to alert outer containers that
14571                    # they are complex
14572                    set_fake_breakpoint();
14573                }
14574            }
14575
14576            #------------------------------------------------------------
14577            # Handle this token
14578            #------------------------------------------------------------
14579
14580            $current_depth = $depth;
14581
14582            # handle comma-arrow
14583            if ( $type eq '=>' ) {
14584                next if ( $last_nonblank_type eq '=>' );
14585                next if $rOpts_break_at_old_comma_breakpoints;
14586                next if $rOpts_comma_arrow_breakpoints == 3;
14587                $want_comma_break[$depth]   = 1;
14588                $index_before_arrow[$depth] = $i_last_nonblank_token;
14589                next;
14590            }
14591
14592            elsif ( $type eq '.' ) {
14593                $last_dot_index[$depth] = $i;
14594            }
14595
14596            # Turn off alignment if we are sure that this is not a list
14597            # environment.  To be safe, we will do this if we see certain
14598            # non-list tokens, such as ';', and also the environment is
14599            # not a list.  Note that '=' could be in any of the = operators
14600            # (lextest.t). We can't just use the reported environment
14601            # because it can be incorrect in some cases.
14602            elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} )
14603                && $container_environment_to_go[$i] ne 'LIST' )
14604            {
14605                $dont_align[$depth]         = 1;
14606                $want_comma_break[$depth]   = 0;
14607                $index_before_arrow[$depth] = -1;
14608            }
14609
14610            # now just handle any commas
14611            next unless ( $type eq ',' );
14612
14613            $last_dot_index[$depth]   = undef;
14614            $last_comma_index[$depth] = $i;
14615
14616            # break here if this comma follows a '=>'
14617            # but not if there is a side comment after the comma
14618            if ( $want_comma_break[$depth] ) {
14619
14620                if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
14621                    $want_comma_break[$depth]   = 0;
14622                    $index_before_arrow[$depth] = -1;
14623                    next;
14624                }
14625
14626                set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
14627
14628                # break before the previous token if it looks safe
14629                # Example of something that we will not try to break before:
14630                #   DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
14631                # Also we don't want to break at a binary operator (like +):
14632                # $c->createOval(
14633                #    $x + $R, $y +
14634                #    $R => $x - $R,
14635                #    $y - $R, -fill   => 'black',
14636                # );
14637                my $ibreak = $index_before_arrow[$depth] - 1;
14638                if (   $ibreak > 0
14639                    && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
14640                {
14641                    if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
14642                    if ( $types_to_go[$ibreak]  eq 'b' ) { $ibreak-- }
14643                    if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {
14644
14645                        # don't break pointer calls, such as the following:
14646                        #  File::Spec->curdir  => 1,
14647                        # (This is tokenized as adjacent 'w' tokens)
14648                        if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) {
14649                            set_forced_breakpoint($ibreak);
14650                        }
14651                    }
14652                }
14653
14654                $want_comma_break[$depth]   = 0;
14655                $index_before_arrow[$depth] = -1;
14656
14657                # handle list which mixes '=>'s and ','s:
14658                # treat any list items so far as an interrupted list
14659                $interrupted_list[$depth] = 1;
14660                next;
14661            }
14662
14663            # break after all commas above starting depth
14664            if ( $depth < $starting_depth && !$dont_align[$depth] ) {
14665                set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
14666                next;
14667            }
14668
14669            # add this comma to the list..
14670            my $item_count = $item_count_stack[$depth];
14671            if ( $item_count == 0 ) {
14672
14673                # but do not form a list with no opening structure
14674                # for example:
14675
14676                #            open INFILE_COPY, ">$input_file_copy"
14677                #              or die ("very long message");
14678
14679                if ( ( $opening_structure_index_stack[$depth] < 0 )
14680                    && $container_environment_to_go[$i] eq 'BLOCK' )
14681                {
14682                    $dont_align[$depth] = 1;
14683                }
14684            }
14685
14686            $comma_index[$depth][$item_count] = $i;
14687            ++$item_count_stack[$depth];
14688            if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
14689                $identifier_count_stack[$depth]++;
14690            }
14691        }
14692
14693        #-------------------------------------------
14694        # end of loop over all tokens in this batch
14695        #-------------------------------------------
14696
14697        # set breaks for any unfinished lists ..
14698        for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) {
14699
14700            $interrupted_list[$dd] = 1;
14701            $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
14702            set_comma_breakpoints($dd);
14703            set_logical_breakpoints($dd)
14704              if ( $has_old_logical_breakpoints[$dd] );
14705            set_for_semicolon_breakpoints($dd);
14706
14707            # break open container...
14708            my $i_opening = $opening_structure_index_stack[$dd];
14709            set_forced_breakpoint($i_opening)
14710              unless (
14711                is_unbreakable_container($dd)
14712
14713                # Avoid a break which would place an isolated ' or "
14714                # on a line
14715                || (   $type eq 'Q'
14716                    && $i_opening >= $max_index_to_go - 2
14717                    && $token =~ /^['"]$/ )
14718              );
14719        }
14720
14721        # Return a flag indicating if the input file had some good breakpoints.
14722        # This flag will be used to force a break in a line shorter than the
14723        # allowed line length.
14724        if ( $has_old_logical_breakpoints[$current_depth] ) {
14725            $saw_good_breakpoint = 1;
14726        }
14727        return $saw_good_breakpoint;
14728    }
14729}    # end scan_list
14730
14731sub find_token_starting_list {
14732
14733    # When testing to see if a block will fit on one line, some
14734    # previous token(s) may also need to be on the line; particularly
14735    # if this is a sub call.  So we will look back at least one
14736    # token. NOTE: This isn't perfect, but not critical, because
14737    # if we mis-identify a block, it will be wrapped and therefore
14738    # fixed the next time it is formatted.
14739    my $i_opening_paren = shift;
14740    my $i_opening_minus = $i_opening_paren;
14741    my $im1             = $i_opening_paren - 1;
14742    my $im2             = $i_opening_paren - 2;
14743    my $im3             = $i_opening_paren - 3;
14744    my $typem1          = $types_to_go[$im1];
14745    my $typem2          = $im2 >= 0 ? $types_to_go[$im2] : 'b';
14746    if ( $typem1 eq ',' || ( $typem1 eq 'b' && $typem2 eq ',' ) ) {
14747        $i_opening_minus = $i_opening_paren;
14748    }
14749    elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) {
14750        $i_opening_minus = $im1 if $im1 >= 0;
14751
14752        # walk back to improve length estimate
14753        for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
14754            last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ );
14755            $i_opening_minus = $j;
14756        }
14757        if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
14758    }
14759    elsif ( $typem1 eq 'k' ) { $i_opening_minus = $im1 }
14760    elsif ( $typem1 eq 'b' && $im2 >= 0 && $types_to_go[$im2] eq 'k' ) {
14761        $i_opening_minus = $im2;
14762    }
14763    return $i_opening_minus;
14764}
14765
14766{    # begin set_comma_breakpoints_do
14767
14768    my %is_keyword_with_special_leading_term;
14769
14770    BEGIN {
14771
14772        # These keywords have prototypes which allow a special leading item
14773        # followed by a list
14774        @_ =
14775          qw(formline grep kill map printf sprintf push chmod join pack unshift);
14776        @is_keyword_with_special_leading_term{@_} = (1) x scalar(@_);
14777    }
14778
14779    sub set_comma_breakpoints_do {
14780
14781        # Given a list with some commas, set breakpoints at some of the
14782        # commas, if necessary, to make it easy to read.  This list is
14783        # an example:
14784        my (
14785            $depth,               $i_opening_paren,  $i_closing_paren,
14786            $item_count,          $identifier_count, $rcomma_index,
14787            $next_nonblank_type,  $list_type,        $interrupted,
14788            $rdo_not_break_apart, $must_break_open,
14789        ) = @_;
14790
14791        # nothing to do if no commas seen
14792        return if ( $item_count < 1 );
14793        my $i_first_comma     = $$rcomma_index[0];
14794        my $i_true_last_comma = $$rcomma_index[ $item_count - 1 ];
14795        my $i_last_comma      = $i_true_last_comma;
14796        if ( $i_last_comma >= $max_index_to_go ) {
14797            $i_last_comma = $$rcomma_index[ --$item_count - 1 ];
14798            return if ( $item_count < 1 );
14799        }
14800
14801        #---------------------------------------------------------------
14802        # find lengths of all items in the list to calculate page layout
14803        #---------------------------------------------------------------
14804        my $comma_count = $item_count;
14805        my @item_lengths;
14806        my @i_term_begin;
14807        my @i_term_end;
14808        my @i_term_comma;
14809        my $i_prev_plus;
14810        my @max_length = ( 0, 0 );
14811        my $first_term_length;
14812        my $i      = $i_opening_paren;
14813        my $is_odd = 1;
14814
14815        for ( my $j = 0 ; $j < $comma_count ; $j++ ) {
14816            $is_odd      = 1 - $is_odd;
14817            $i_prev_plus = $i + 1;
14818            $i           = $$rcomma_index[$j];
14819
14820            my $i_term_end =
14821              ( $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1;
14822            my $i_term_begin =
14823              ( $types_to_go[$i_prev_plus] eq 'b' )
14824              ? $i_prev_plus + 1
14825              : $i_prev_plus;
14826            push @i_term_begin, $i_term_begin;
14827            push @i_term_end,   $i_term_end;
14828            push @i_term_comma, $i;
14829
14830            # note: currently adding 2 to all lengths (for comma and space)
14831            my $length =
14832              2 + token_sequence_length( $i_term_begin, $i_term_end );
14833            push @item_lengths, $length;
14834
14835            if ( $j == 0 ) {
14836                $first_term_length = $length;
14837            }
14838            else {
14839
14840                if ( $length > $max_length[$is_odd] ) {
14841                    $max_length[$is_odd] = $length;
14842                }
14843            }
14844        }
14845
14846        # now we have to make a distinction between the comma count and item
14847        # count, because the item count will be one greater than the comma
14848        # count if the last item is not terminated with a comma
14849        my $i_b =
14850          ( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
14851          ? $i_last_comma + 1
14852          : $i_last_comma;
14853        my $i_e =
14854          ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
14855          ? $i_closing_paren - 2
14856          : $i_closing_paren - 1;
14857        my $i_effective_last_comma = $i_last_comma;
14858
14859        my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
14860
14861        if ( $last_item_length > 0 ) {
14862
14863            # add 2 to length because other lengths include a comma and a blank
14864            $last_item_length += 2;
14865            push @item_lengths, $last_item_length;
14866            push @i_term_begin, $i_b + 1;
14867            push @i_term_end,   $i_e;
14868            push @i_term_comma, undef;
14869
14870            my $i_odd = $item_count % 2;
14871
14872            if ( $last_item_length > $max_length[$i_odd] ) {
14873                $max_length[$i_odd] = $last_item_length;
14874            }
14875
14876            $item_count++;
14877            $i_effective_last_comma = $i_e + 1;
14878
14879            if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {
14880                $identifier_count++;
14881            }
14882        }
14883
14884        #---------------------------------------------------------------
14885        # End of length calculations
14886        #---------------------------------------------------------------
14887
14888        #---------------------------------------------------------------
14889        # Compound List Rule 1:
14890        # Break at (almost) every comma for a list containing a broken
14891        # sublist.  This has higher priority than the Interrupted List
14892        # Rule.
14893        #---------------------------------------------------------------
14894        if ( $has_broken_sublist[$depth] ) {
14895
14896            # Break at every comma except for a comma between two
14897            # simple, small terms.  This prevents long vertical
14898            # columns of, say, just 0's.
14899            my $small_length = 10;    # 2 + actual maximum length wanted
14900
14901            # We'll insert a break in long runs of small terms to
14902            # allow alignment in uniform tables.
14903            my $skipped_count = 0;
14904            my $columns       = table_columns_available($i_first_comma);
14905            my $fields        = int( $columns / $small_length );
14906            if (   $rOpts_maximum_fields_per_table
14907                && $fields > $rOpts_maximum_fields_per_table )
14908            {
14909                $fields = $rOpts_maximum_fields_per_table;
14910            }
14911            my $max_skipped_count = $fields - 1;
14912
14913            my $is_simple_last_term = 0;
14914            my $is_simple_next_term = 0;
14915            foreach my $j ( 0 .. $item_count ) {
14916                $is_simple_last_term = $is_simple_next_term;
14917                $is_simple_next_term = 0;
14918                if (   $j < $item_count
14919                    && $i_term_end[$j] == $i_term_begin[$j]
14920                    && $item_lengths[$j] <= $small_length )
14921                {
14922                    $is_simple_next_term = 1;
14923                }
14924                next if $j == 0;
14925                if (   $is_simple_last_term
14926                    && $is_simple_next_term
14927                    && $skipped_count < $max_skipped_count )
14928                {
14929                    $skipped_count++;
14930                }
14931                else {
14932                    $skipped_count = 0;
14933                    my $i = $i_term_comma[ $j - 1 ];
14934                    last unless defined $i;
14935                    set_forced_breakpoint($i);
14936                }
14937            }
14938
14939            # always break at the last comma if this list is
14940            # interrupted; we wouldn't want to leave a terminal '{', for
14941            # example.
14942            if ($interrupted) { set_forced_breakpoint($i_true_last_comma) }
14943            return;
14944        }
14945
14946#my ( $a, $b, $c ) = caller();
14947#print "LISTX: in set_list $a $c interupt=$interrupted count=$item_count
14948#i_first = $i_first_comma  i_last=$i_last_comma max=$max_index_to_go\n";
14949#print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n";
14950
14951        #---------------------------------------------------------------
14952        # Interrupted List Rule:
14953        # A list is is forced to use old breakpoints if it was interrupted
14954        # by side comments or blank lines, or requested by user.
14955        #---------------------------------------------------------------
14956        if (   $rOpts_break_at_old_comma_breakpoints
14957            || $interrupted
14958            || $i_opening_paren < 0 )
14959        {
14960            copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
14961            return;
14962        }
14963
14964        #---------------------------------------------------------------
14965        # Looks like a list of items.  We have to look at it and size it up.
14966        #---------------------------------------------------------------
14967
14968        my $opening_token = $tokens_to_go[$i_opening_paren];
14969        my $opening_environment =
14970          $container_environment_to_go[$i_opening_paren];
14971
14972        #-------------------------------------------------------------------
14973        # Return if this will fit on one line
14974        #-------------------------------------------------------------------
14975
14976        my $i_opening_minus = find_token_starting_list($i_opening_paren);
14977        return
14978          unless excess_line_length( $i_opening_minus, $i_closing_paren ) > 0;
14979
14980        #-------------------------------------------------------------------
14981        # Now we know that this block spans multiple lines; we have to set
14982        # at least one breakpoint -- real or fake -- as a signal to break
14983        # open any outer containers.
14984        #-------------------------------------------------------------------
14985        set_fake_breakpoint();
14986
14987        # be sure we do not extend beyond the current list length
14988        if ( $i_effective_last_comma >= $max_index_to_go ) {
14989            $i_effective_last_comma = $max_index_to_go - 1;
14990        }
14991
14992        # Set a flag indicating if we need to break open to keep -lp
14993        # items aligned.  This is necessary if any of the list terms
14994        # exceeds the available space after the '('.
14995        my $need_lp_break_open = $must_break_open;
14996        if ( $rOpts_line_up_parentheses && !$must_break_open ) {
14997            my $columns_if_unbroken = $rOpts_maximum_line_length -
14998              total_line_length( $i_opening_minus, $i_opening_paren );
14999            $need_lp_break_open =
15000                 ( $max_length[0] > $columns_if_unbroken )
15001              || ( $max_length[1] > $columns_if_unbroken )
15002              || ( $first_term_length > $columns_if_unbroken );
15003        }
15004
15005        # Specify if the list must have an even number of fields or not.
15006        # It is generally safest to assume an even number, because the
15007        # list items might be a hash list.  But if we can be sure that
15008        # it is not a hash, then we can allow an odd number for more
15009        # flexibility.
15010        my $odd_or_even = 2;    # 1 = odd field count ok, 2 = want even count
15011
15012        if (   $identifier_count >= $item_count - 1
15013            || $is_assignment{$next_nonblank_type}
15014            || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ )
15015          )
15016        {
15017            $odd_or_even = 1;
15018        }
15019
15020        # do we have a long first term which should be
15021        # left on a line by itself?
15022        my $use_separate_first_term = (
15023            $odd_or_even == 1       # only if we can use 1 field/line
15024              && $item_count > 3    # need several items
15025              && $first_term_length >
15026              2 * $max_length[0] - 2    # need long first term
15027              && $first_term_length >
15028              2 * $max_length[1] - 2    # need long first term
15029        );
15030
15031        # or do we know from the type of list that the first term should
15032        # be placed alone?
15033        if ( !$use_separate_first_term ) {
15034            if ( $is_keyword_with_special_leading_term{$list_type} ) {
15035                $use_separate_first_term = 1;
15036
15037                # should the container be broken open?
15038                if ( $item_count < 3 ) {
15039                    if ( $i_first_comma - $i_opening_paren < 4 ) {
15040                        $$rdo_not_break_apart = 1;
15041                    }
15042                }
15043                elsif ($first_term_length < 20
15044                    && $i_first_comma - $i_opening_paren < 4 )
15045                {
15046                    my $columns = table_columns_available($i_first_comma);
15047                    if ( $first_term_length < $columns ) {
15048                        $$rdo_not_break_apart = 1;
15049                    }
15050                }
15051            }
15052        }
15053
15054        # if so,
15055        if ($use_separate_first_term) {
15056
15057            # ..set a break and update starting values
15058            $use_separate_first_term = 1;
15059            set_forced_breakpoint($i_first_comma);
15060            $i_opening_paren = $i_first_comma;
15061            $i_first_comma   = $$rcomma_index[1];
15062            $item_count--;
15063            return if $comma_count == 1;
15064            shift @item_lengths;
15065            shift @i_term_begin;
15066            shift @i_term_end;
15067            shift @i_term_comma;
15068        }
15069
15070        # if not, update the metrics to include the first term
15071        else {
15072            if ( $first_term_length > $max_length[0] ) {
15073                $max_length[0] = $first_term_length;
15074            }
15075        }
15076
15077        # Field width parameters
15078        my $pair_width = ( $max_length[0] + $max_length[1] );
15079        my $max_width =
15080          ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1];
15081
15082        # Number of free columns across the page width for laying out tables
15083        my $columns = table_columns_available($i_first_comma);
15084
15085        # Estimated maximum number of fields which fit this space
15086        # This will be our first guess
15087        my $number_of_fields_max =
15088          maximum_number_of_fields( $columns, $odd_or_even, $max_width,
15089            $pair_width );
15090        my $number_of_fields = $number_of_fields_max;
15091
15092        # Find the best-looking number of fields
15093        # and make this our second guess if possible
15094        my ( $number_of_fields_best, $ri_ragged_break_list,
15095            $new_identifier_count )
15096          = study_list_complexity( \@i_term_begin, \@i_term_end, \@item_lengths,
15097            $max_width );
15098
15099        if (   $number_of_fields_best != 0
15100            && $number_of_fields_best < $number_of_fields_max )
15101        {
15102            $number_of_fields = $number_of_fields_best;
15103        }
15104
15105        # ----------------------------------------------------------------------
15106        # If we are crowded and the -lp option is being used, try to
15107        # undo some indentation
15108        # ----------------------------------------------------------------------
15109        if (
15110            $rOpts_line_up_parentheses
15111            && (
15112                $number_of_fields == 0
15113                || (   $number_of_fields == 1
15114                    && $number_of_fields != $number_of_fields_best )
15115            )
15116          )
15117        {
15118            my $available_spaces = get_AVAILABLE_SPACES_to_go($i_first_comma);
15119            if ( $available_spaces > 0 ) {
15120
15121                my $spaces_wanted = $max_width - $columns;    # for 1 field
15122
15123                if ( $number_of_fields_best == 0 ) {
15124                    $number_of_fields_best =
15125                      get_maximum_fields_wanted( \@item_lengths );
15126                }
15127
15128                if ( $number_of_fields_best != 1 ) {
15129                    my $spaces_wanted_2 =
15130                      1 + $pair_width - $columns;             # for 2 fields
15131                    if ( $available_spaces > $spaces_wanted_2 ) {
15132                        $spaces_wanted = $spaces_wanted_2;
15133                    }
15134                }
15135
15136                if ( $spaces_wanted > 0 ) {
15137                    my $deleted_spaces =
15138                      reduce_lp_indentation( $i_first_comma, $spaces_wanted );
15139
15140                    # redo the math
15141                    if ( $deleted_spaces > 0 ) {
15142                        $columns = table_columns_available($i_first_comma);
15143                        $number_of_fields_max =
15144                          maximum_number_of_fields( $columns, $odd_or_even,
15145                            $max_width, $pair_width );
15146                        $number_of_fields = $number_of_fields_max;
15147
15148                        if (   $number_of_fields_best == 1
15149                            && $number_of_fields >= 1 )
15150                        {
15151                            $number_of_fields = $number_of_fields_best;
15152                        }
15153                    }
15154                }
15155            }
15156        }
15157
15158        # try for one column if two won't work
15159        if ( $number_of_fields <= 0 ) {
15160            $number_of_fields = int( $columns / $max_width );
15161        }
15162
15163        # The user can place an upper bound on the number of fields,
15164        # which can be useful for doing maintenance on tables
15165        if (   $rOpts_maximum_fields_per_table
15166            && $number_of_fields > $rOpts_maximum_fields_per_table )
15167        {
15168            $number_of_fields = $rOpts_maximum_fields_per_table;
15169        }
15170
15171        # How many columns (characters) and lines would this container take
15172        # if no additional whitespace were added?
15173        my $packed_columns = token_sequence_length( $i_opening_paren + 1,
15174            $i_effective_last_comma + 1 );
15175        if ( $columns <= 0 ) { $columns = 1 }    # avoid divide by zero
15176        my $packed_lines = 1 + int( $packed_columns / $columns );
15177
15178        # are we an item contained in an outer list?
15179        my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
15180
15181        if ( $number_of_fields <= 0 ) {
15182
15183#         #---------------------------------------------------------------
15184#         # We're in trouble.  We can't find a single field width that works.
15185#         # There is no simple answer here; we may have a single long list
15186#         # item, or many.
15187#         #---------------------------------------------------------------
15188#
15189#         In many cases, it may be best to not force a break if there is just one
15190#         comma, because the standard continuation break logic will do a better
15191#         job without it.
15192#
15193#         In the common case that all but one of the terms can fit
15194#         on a single line, it may look better not to break open the
15195#         containing parens.  Consider, for example
15196#
15197#             $color =
15198#               join ( '/',
15199#                 sort { $color_value{$::a} <=> $color_value{$::b}; }
15200#                 keys %colors );
15201#
15202#         which will look like this with the container broken:
15203#
15204#             $color = join (
15205#                 '/',
15206#                 sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
15207#             );
15208#
15209#         Here is an example of this rule for a long last term:
15210#
15211#             log_message( 0, 256, 128,
15212#                 "Number of routes in adj-RIB-in to be considered: $peercount" );
15213#
15214#         And here is an example with a long first term:
15215#
15216#         $s = sprintf(
15217# "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
15218#             $r, $pu, $ps, $cu, $cs, $tt
15219#           )
15220#           if $style eq 'all';
15221
15222            my $i_last_comma = $$rcomma_index[ $comma_count - 1 ];
15223            my $long_last_term = excess_line_length( 0, $i_last_comma ) <= 0;
15224            my $long_first_term =
15225              excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0;
15226
15227            # break at every comma ...
15228            if (
15229
15230                # if requested by user or is best looking
15231                $number_of_fields_best == 1
15232
15233                # or if this is a sublist of a larger list
15234                || $in_hierarchical_list
15235
15236                # or if multiple commas and we dont have a long first or last
15237                # term
15238                || ( $comma_count > 1
15239                    && !( $long_last_term || $long_first_term ) )
15240              )
15241            {
15242                foreach ( 0 .. $comma_count - 1 ) {
15243                    set_forced_breakpoint( $$rcomma_index[$_] );
15244                }
15245            }
15246            elsif ($long_last_term) {
15247
15248                set_forced_breakpoint($i_last_comma);
15249                $$rdo_not_break_apart = 1 unless $must_break_open;
15250            }
15251            elsif ($long_first_term) {
15252
15253                set_forced_breakpoint($i_first_comma);
15254            }
15255            else {
15256
15257                # let breaks be defined by default bond strength logic
15258            }
15259            return;
15260        }
15261
15262        # --------------------------------------------------------
15263        # We have a tentative field count that seems to work.
15264        # How many lines will this require?
15265        # --------------------------------------------------------
15266        my $formatted_lines = $item_count / ($number_of_fields);
15267        if ( $formatted_lines != int $formatted_lines ) {
15268            $formatted_lines = 1 + int $formatted_lines;
15269        }
15270
15271        # So far we've been trying to fill out to the right margin.  But
15272        # compact tables are easier to read, so let's see if we can use fewer
15273        # fields without increasing the number of lines.
15274        $number_of_fields =
15275          compactify_table( $item_count, $number_of_fields, $formatted_lines,
15276            $odd_or_even );
15277
15278        # How many spaces across the page will we fill?
15279        my $columns_per_line =
15280          ( int $number_of_fields / 2 ) * $pair_width +
15281          ( $number_of_fields % 2 ) * $max_width;
15282
15283        my $formatted_columns;
15284
15285        if ( $number_of_fields > 1 ) {
15286            $formatted_columns =
15287              ( $pair_width * ( int( $item_count / 2 ) ) +
15288                  ( $item_count % 2 ) * $max_width );
15289        }
15290        else {
15291            $formatted_columns = $max_width * $item_count;
15292        }
15293        if ( $formatted_columns < $packed_columns ) {
15294            $formatted_columns = $packed_columns;
15295        }
15296
15297        my $unused_columns = $formatted_columns - $packed_columns;
15298
15299        # set some empirical parameters to help decide if we should try to
15300        # align; high sparsity does not look good, especially with few lines
15301        my $sparsity = ($unused_columns) / ($formatted_columns);
15302        my $max_allowed_sparsity =
15303            ( $item_count < 3 )    ? 0.1
15304          : ( $packed_lines == 1 ) ? 0.15
15305          : ( $packed_lines == 2 ) ? 0.4
15306          :                          0.7;
15307
15308        # Begin check for shortcut methods, which avoid treating a list
15309        # as a table for relatively small parenthesized lists.  These
15310        # are usually easier to read if not formatted as tables.
15311        if (
15312            $packed_lines <= 2    # probably can fit in 2 lines
15313            && $item_count < 9    # doesn't have too many items
15314            && $opening_environment eq 'BLOCK'    # not a sub-container
15315            && $opening_token       eq '('        # is paren list
15316          )
15317        {
15318
15319            # Shortcut method 1: for -lp and just one comma:
15320            # This is a no-brainer, just break at the comma.
15321            if (
15322                $rOpts_line_up_parentheses        # -lp
15323                && $item_count == 2               # two items, one comma
15324                && !$must_break_open
15325              )
15326            {
15327                my $i_break = $$rcomma_index[0];
15328                set_forced_breakpoint($i_break);
15329                $$rdo_not_break_apart = 1;
15330                set_non_alignment_flags( $comma_count, $rcomma_index );
15331                return;
15332
15333            }
15334
15335            # method 2 is for most small ragged lists which might look
15336            # best if not displayed as a table.
15337            if (
15338                ( $number_of_fields == 2 && $item_count == 3 )
15339                || (
15340                    $new_identifier_count > 0    # isn't all quotes
15341                    && $sparsity > 0.15
15342                )    # would be fairly spaced gaps if aligned
15343              )
15344            {
15345
15346                my $break_count = set_ragged_breakpoints( \@i_term_comma,
15347                    $ri_ragged_break_list );
15348                ++$break_count if ($use_separate_first_term);
15349
15350                # NOTE: we should really use the true break count here,
15351                # which can be greater if there are large terms and
15352                # little space, but usually this will work well enough.
15353                unless ($must_break_open) {
15354
15355                    if ( $break_count <= 1 ) {
15356                        $$rdo_not_break_apart = 1;
15357                    }
15358                    elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
15359                    {
15360                        $$rdo_not_break_apart = 1;
15361                    }
15362                }
15363                set_non_alignment_flags( $comma_count, $rcomma_index );
15364                return;
15365            }
15366
15367        }    # end shortcut methods
15368
15369        # debug stuff
15370
15371        FORMATTER_DEBUG_FLAG_SPARSE && do {
15372            print
15373"SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line  unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
15374
15375        };
15376
15377        #---------------------------------------------------------------
15378        # Compound List Rule 2:
15379        # If this list is too long for one line, and it is an item of a
15380        # larger list, then we must format it, regardless of sparsity
15381        # (ian.t).  One reason that we have to do this is to trigger
15382        # Compound List Rule 1, above, which causes breaks at all commas of
15383        # all outer lists.  In this way, the structure will be properly
15384        # displayed.
15385        #---------------------------------------------------------------
15386
15387        # Decide if this list is too long for one line unless broken
15388        my $total_columns = table_columns_available($i_opening_paren);
15389        my $too_long      = $packed_columns > $total_columns;
15390
15391        # For a paren list, include the length of the token just before the
15392        # '(' because this is likely a sub call, and we would have to
15393        # include the sub name on the same line as the list.  This is still
15394        # imprecise, but not too bad.  (steve.t)
15395        if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
15396
15397            $too_long = excess_line_length( $i_opening_minus,
15398                $i_effective_last_comma + 1 ) > 0;
15399        }
15400
15401        # FIXME: For an item after a '=>', try to include the length of the
15402        # thing before the '=>'.  This is crude and should be improved by
15403        # actually looking back token by token.
15404        if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
15405            my $i_opening_minus = $i_opening_paren - 4;
15406            if ( $i_opening_minus >= 0 ) {
15407                $too_long = excess_line_length( $i_opening_minus,
15408                    $i_effective_last_comma + 1 ) > 0;
15409            }
15410        }
15411
15412        # Always break lists contained in '[' and '{' if too long for 1 line,
15413        # and always break lists which are too long and part of a more complex
15414        # structure.
15415        my $must_break_open_container = $must_break_open
15416          || ( $too_long
15417            && ( $in_hierarchical_list || $opening_token ne '(' ) );
15418
15419#print "LISTX: next=$next_nonblank_type  avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long  opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines  packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n";
15420
15421        #---------------------------------------------------------------
15422        # The main decision:
15423        # Now decide if we will align the data into aligned columns.  Do not
15424        # attempt to align columns if this is a tiny table or it would be
15425        # too spaced.  It seems that the more packed lines we have, the
15426        # sparser the list that can be allowed and still look ok.
15427        #---------------------------------------------------------------
15428
15429        if (   ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
15430            || ( $formatted_lines < 2 )
15431            || ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
15432          )
15433        {
15434
15435            #---------------------------------------------------------------
15436            # too sparse: would look ugly if aligned in a table;
15437            #---------------------------------------------------------------
15438
15439            # use old breakpoints if this is a 'big' list
15440            # FIXME: goal is to improve set_ragged_breakpoints so that
15441            # this is not necessary.
15442            if ( $packed_lines > 2 && $item_count > 10 ) {
15443                write_logfile_entry("List sparse: using old breakpoints\n");
15444                copy_old_breakpoints( $i_first_comma, $i_last_comma );
15445            }
15446
15447            # let the continuation logic handle it if 2 lines
15448            else {
15449
15450                my $break_count = set_ragged_breakpoints( \@i_term_comma,
15451                    $ri_ragged_break_list );
15452                ++$break_count if ($use_separate_first_term);
15453
15454                unless ($must_break_open_container) {
15455                    if ( $break_count <= 1 ) {
15456                        $$rdo_not_break_apart = 1;
15457                    }
15458                    elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
15459                    {
15460                        $$rdo_not_break_apart = 1;
15461                    }
15462                }
15463                set_non_alignment_flags( $comma_count, $rcomma_index );
15464            }
15465            return;
15466        }
15467
15468        #---------------------------------------------------------------
15469        # go ahead and format as a table
15470        #---------------------------------------------------------------
15471        write_logfile_entry(
15472            "List: auto formatting with $number_of_fields fields/row\n");
15473
15474        my $j_first_break =
15475          $use_separate_first_term ? $number_of_fields : $number_of_fields - 1;
15476
15477        for (
15478            my $j = $j_first_break ;
15479            $j < $comma_count ;
15480            $j += $number_of_fields
15481          )
15482        {
15483            my $i = $$rcomma_index[$j];
15484            set_forced_breakpoint($i);
15485        }
15486        return;
15487    }
15488}
15489
15490sub set_non_alignment_flags {
15491
15492    # set flag which indicates that these commas should not be
15493    # aligned
15494    my ( $comma_count, $rcomma_index ) = @_;
15495    foreach ( 0 .. $comma_count - 1 ) {
15496        $matching_token_to_go[ $$rcomma_index[$_] ] = 1;
15497    }
15498}
15499
15500sub study_list_complexity {
15501
15502    # Look for complex tables which should be formatted with one term per line.
15503    # Returns the following:
15504    #
15505    #  \@i_ragged_break_list = list of good breakpoints to avoid lines
15506    #    which are hard to read
15507    #  $number_of_fields_best = suggested number of fields based on
15508    #    complexity; = 0 if any number may be used.
15509    #
15510    my ( $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;
15511    my $item_count            = @{$ri_term_begin};
15512    my $complex_item_count    = 0;
15513    my $number_of_fields_best = $rOpts_maximum_fields_per_table;
15514    my $i_max                 = @{$ritem_lengths} - 1;
15515    ##my @item_complexity;
15516
15517    my $i_last_last_break = -3;
15518    my $i_last_break      = -2;
15519    my @i_ragged_break_list;
15520
15521    my $definitely_complex = 30;
15522    my $definitely_simple  = 12;
15523    my $quote_count        = 0;
15524
15525    for my $i ( 0 .. $i_max ) {
15526        my $ib = $ri_term_begin->[$i];
15527        my $ie = $ri_term_end->[$i];
15528
15529        # define complexity: start with the actual term length
15530        my $weighted_length = ( $ritem_lengths->[$i] - 2 );
15531
15532        ##TBD: join types here and check for variations
15533        ##my $str=join "", @tokens_to_go[$ib..$ie];
15534
15535        my $is_quote = 0;
15536        if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {
15537            $is_quote = 1;
15538            $quote_count++;
15539        }
15540        elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {
15541            $quote_count++;
15542        }
15543
15544        if ( $ib eq $ie ) {
15545            if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {
15546                $complex_item_count++;
15547                $weighted_length *= 2;
15548            }
15549            else {
15550            }
15551        }
15552        else {
15553            if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {
15554                $complex_item_count++;
15555                $weighted_length *= 2;
15556            }
15557            if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {
15558                $weighted_length += 4;
15559            }
15560        }
15561
15562        # add weight for extra tokens.
15563        $weighted_length += 2 * ( $ie - $ib );
15564
15565##        my $BUB = join '', @tokens_to_go[$ib..$ie];
15566##        print "# COMPLEXITY:$weighted_length   $BUB\n";
15567
15568##push @item_complexity, $weighted_length;
15569
15570        # now mark a ragged break after this item it if it is 'long and
15571        # complex':
15572        if ( $weighted_length >= $definitely_complex ) {
15573
15574            # if we broke after the previous term
15575            # then break before it too
15576            if (   $i_last_break == $i - 1
15577                && $i > 1
15578                && $i_last_last_break != $i - 2 )
15579            {
15580
15581                ## FIXME: don't strand a small term
15582                pop @i_ragged_break_list;
15583                push @i_ragged_break_list, $i - 2;
15584                push @i_ragged_break_list, $i - 1;
15585            }
15586
15587            push @i_ragged_break_list, $i;
15588            $i_last_last_break = $i_last_break;
15589            $i_last_break      = $i;
15590        }
15591
15592        # don't break before a small last term -- it will
15593        # not look good on a line by itself.
15594        elsif ($i == $i_max
15595            && $i_last_break == $i - 1
15596            && $weighted_length <= $definitely_simple )
15597        {
15598            pop @i_ragged_break_list;
15599        }
15600    }
15601
15602    my $identifier_count = $i_max + 1 - $quote_count;
15603
15604    # Need more tuning here..
15605    if (   $max_width > 12
15606        && $complex_item_count > $item_count / 2
15607        && $number_of_fields_best != 2 )
15608    {
15609        $number_of_fields_best = 1;
15610    }
15611
15612    return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );
15613}
15614
15615sub get_maximum_fields_wanted {
15616
15617    # Not all tables look good with more than one field of items.
15618    # This routine looks at a table and decides if it should be
15619    # formatted with just one field or not.
15620    # This coding is still under development.
15621    my ($ritem_lengths) = @_;
15622
15623    my $number_of_fields_best = 0;
15624
15625    # For just a few items, we tentatively assume just 1 field.
15626    my $item_count = @{$ritem_lengths};
15627    if ( $item_count <= 5 ) {
15628        $number_of_fields_best = 1;
15629    }
15630
15631    # For larger tables, look at it both ways and see what looks best
15632    else {
15633
15634        my $is_odd            = 1;
15635        my @max_length        = ( 0, 0 );
15636        my @last_length_2     = ( undef, undef );
15637        my @first_length_2    = ( undef, undef );
15638        my $last_length       = undef;
15639        my $total_variation_1 = 0;
15640        my $total_variation_2 = 0;
15641        my @total_variation_2 = ( 0, 0 );
15642        for ( my $j = 0 ; $j < $item_count ; $j++ ) {
15643
15644            $is_odd = 1 - $is_odd;
15645            my $length = $ritem_lengths->[$j];
15646            if ( $length > $max_length[$is_odd] ) {
15647                $max_length[$is_odd] = $length;
15648            }
15649
15650            if ( defined($last_length) ) {
15651                my $dl = abs( $length - $last_length );
15652                $total_variation_1 += $dl;
15653            }
15654            $last_length = $length;
15655
15656            my $ll = $last_length_2[$is_odd];
15657            if ( defined($ll) ) {
15658                my $dl = abs( $length - $ll );
15659                $total_variation_2[$is_odd] += $dl;
15660            }
15661            else {
15662                $first_length_2[$is_odd] = $length;
15663            }
15664            $last_length_2[$is_odd] = $length;
15665        }
15666        $total_variation_2 = $total_variation_2[0] + $total_variation_2[1];
15667
15668        my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;
15669        unless ( $total_variation_2 < $factor * $total_variation_1 ) {
15670            $number_of_fields_best = 1;
15671        }
15672    }
15673    return ($number_of_fields_best);
15674}
15675
15676sub table_columns_available {
15677    my $i_first_comma = shift;
15678    my $columns =
15679      $rOpts_maximum_line_length - leading_spaces_to_go($i_first_comma);
15680
15681    # Patch: the vertical formatter does not line up lines whose lengths
15682    # exactly equal the available line length because of allowances
15683    # that must be made for side comments.  Therefore, the number of
15684    # available columns is reduced by 1 character.
15685    $columns -= 1;
15686    return $columns;
15687}
15688
15689sub maximum_number_of_fields {
15690
15691    # how many fields will fit in the available space?
15692    my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
15693    my $max_pairs        = int( $columns / $pair_width );
15694    my $number_of_fields = $max_pairs * 2;
15695    if (   $odd_or_even == 1
15696        && $max_pairs * $pair_width + $max_width <= $columns )
15697    {
15698        $number_of_fields++;
15699    }
15700    return $number_of_fields;
15701}
15702
15703sub compactify_table {
15704
15705    # given a table with a certain number of fields and a certain number
15706    # of lines, see if reducing the number of fields will make it look
15707    # better.
15708    my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
15709    if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
15710        my $min_fields;
15711
15712        for (
15713            $min_fields = $number_of_fields ;
15714            $min_fields >= $odd_or_even
15715            && $min_fields * $formatted_lines >= $item_count ;
15716            $min_fields -= $odd_or_even
15717          )
15718        {
15719            $number_of_fields = $min_fields;
15720        }
15721    }
15722    return $number_of_fields;
15723}
15724
15725sub set_ragged_breakpoints {
15726
15727    # Set breakpoints in a list that cannot be formatted nicely as a
15728    # table.
15729    my ( $ri_term_comma, $ri_ragged_break_list ) = @_;
15730
15731    my $break_count = 0;
15732    foreach (@$ri_ragged_break_list) {
15733        my $j = $ri_term_comma->[$_];
15734        if ($j) {
15735            set_forced_breakpoint($j);
15736            $break_count++;
15737        }
15738    }
15739    return $break_count;
15740}
15741
15742sub copy_old_breakpoints {
15743    my ( $i_first_comma, $i_last_comma ) = @_;
15744    for my $i ( $i_first_comma .. $i_last_comma ) {
15745        if ( $old_breakpoint_to_go[$i] ) {
15746            set_forced_breakpoint($i);
15747        }
15748    }
15749}
15750
15751sub set_nobreaks {
15752    my ( $i, $j ) = @_;
15753    if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
15754
15755        FORMATTER_DEBUG_FLAG_NOBREAK && do {
15756            my ( $a, $b, $c ) = caller();
15757            print(
15758"NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n"
15759            );
15760        };
15761
15762        @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
15763    }
15764
15765    # shouldn't happen; non-critical error
15766    else {
15767        FORMATTER_DEBUG_FLAG_NOBREAK && do {
15768            my ( $a, $b, $c ) = caller();
15769            print(
15770"NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n"
15771            );
15772        };
15773    }
15774}
15775
15776sub set_fake_breakpoint {
15777
15778    # Just bump up the breakpoint count as a signal that there are breaks.
15779    # This is useful if we have breaks but may want to postpone deciding where
15780    # to make them.
15781    $forced_breakpoint_count++;
15782}
15783
15784sub set_forced_breakpoint {
15785    my $i = shift;
15786
15787    return unless defined $i && $i >= 0;
15788
15789    # when called with certain tokens, use bond strengths to decide
15790    # if we break before or after it
15791    my $token = $tokens_to_go[$i];
15792
15793    if ( $token =~ /^([\=\.\,\:\?]|and|or|xor|&&|\|\|)$/ ) {
15794        if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
15795    }
15796
15797    # breaks are forced before 'if' and 'unless'
15798    elsif ( $is_if_unless{$token} ) { $i-- }
15799
15800    if ( $i >= 0 && $i <= $max_index_to_go ) {
15801        my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
15802
15803        FORMATTER_DEBUG_FLAG_FORCE && do {
15804            my ( $a, $b, $c ) = caller();
15805            print
15806"FORCE forced_breakpoint $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n";
15807        };
15808
15809        if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 ) {
15810            $forced_breakpoint_to_go[$i_nonblank] = 1;
15811
15812            if ( $i_nonblank > $index_max_forced_break ) {
15813                $index_max_forced_break = $i_nonblank;
15814            }
15815            $forced_breakpoint_count++;
15816            $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] =
15817              $i_nonblank;
15818
15819            # if we break at an opening container..break at the closing
15820            if ( $tokens_to_go[$i_nonblank] =~ /^[\{\[\(\?]$/ ) {
15821                set_closing_breakpoint($i_nonblank);
15822            }
15823        }
15824    }
15825}
15826
15827sub clear_breakpoint_undo_stack {
15828    $forced_breakpoint_undo_count = 0;
15829}
15830
15831sub undo_forced_breakpoint_stack {
15832
15833    my $i_start = shift;
15834    if ( $i_start < 0 ) {
15835        $i_start = 0;
15836        my ( $a, $b, $c ) = caller();
15837        warning(
15838"Program Bug: undo_forced_breakpoint_stack from $a $c has i=$i_start "
15839        );
15840    }
15841
15842    while ( $forced_breakpoint_undo_count > $i_start ) {
15843        my $i =
15844          $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
15845        if ( $i >= 0 && $i <= $max_index_to_go ) {
15846            $forced_breakpoint_to_go[$i] = 0;
15847            $forced_breakpoint_count--;
15848
15849            FORMATTER_DEBUG_FLAG_UNDOBP && do {
15850                my ( $a, $b, $c ) = caller();
15851                print(
15852"UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n"
15853                );
15854            };
15855        }
15856
15857        # shouldn't happen, but not a critical error
15858        else {
15859            FORMATTER_DEBUG_FLAG_UNDOBP && do {
15860                my ( $a, $b, $c ) = caller();
15861                print(
15862"Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go"
15863                );
15864            };
15865        }
15866    }
15867}
15868
15869{    # begin recombine_breakpoints
15870
15871    my %is_amp_amp;
15872    my %is_ternary;
15873    my %is_math_op;
15874
15875    BEGIN {
15876
15877        @_ = qw( && || );
15878        @is_amp_amp{@_} = (1) x scalar(@_);
15879
15880        @_ = qw( ? : );
15881        @is_ternary{@_} = (1) x scalar(@_);
15882
15883        @_ = qw( + - * / );
15884        @is_math_op{@_} = (1) x scalar(@_);
15885    }
15886
15887    sub recombine_breakpoints {
15888
15889        # sub set_continuation_breaks is very liberal in setting line breaks
15890        # for long lines, always setting breaks at good breakpoints, even
15891        # when that creates small lines.  Occasionally small line fragments
15892        # are produced which would look better if they were combined.
15893        # That's the task of this routine, recombine_breakpoints.
15894        #
15895        # $ri_beg = ref to array of BEGinning indexes of each line
15896        # $ri_end = ref to array of ENDing indexes of each line
15897        my ( $ri_beg, $ri_end ) = @_;
15898
15899        my $more_to_do = 1;
15900
15901        # We keep looping over all of the lines of this batch
15902        # until there are no more possible recombinations
15903        my $nmax_last = @$ri_end;
15904        while ($more_to_do) {
15905            my $n_best = 0;
15906            my $bs_best;
15907            my $n;
15908            my $nmax = @$ri_end - 1;
15909
15910            # safety check for infinite loop
15911            unless ( $nmax < $nmax_last ) {
15912
15913            # shouldn't happen because splice below decreases nmax on each pass:
15914            # but i get paranoid sometimes
15915                die "Program bug-infinite loop in recombine breakpoints\n";
15916            }
15917            $nmax_last  = $nmax;
15918            $more_to_do = 0;
15919            my $previous_outdentable_closing_paren;
15920            my $leading_amp_count = 0;
15921            my $this_line_is_semicolon_terminated;
15922
15923            # loop over all remaining lines in this batch
15924            for $n ( 1 .. $nmax ) {
15925
15926                #----------------------------------------------------------
15927                # If we join the current pair of lines,
15928                # line $n-1 will become the left part of the joined line
15929                # line $n will become the right part of the joined line
15930                #
15931                # Here are Indexes of the endpoint tokens of the two lines:
15932                #
15933                #  -----line $n-1--- | -----line $n-----
15934                #  $ibeg_1   $iend_1 | $ibeg_2   $iend_2
15935                #                    ^
15936                #                    |
15937                # We want to decide if we should remove the line break
15938                # betwen the tokens at $iend_1 and $ibeg_2
15939                #
15940                # We will apply a number of ad-hoc tests to see if joining
15941                # here will look ok.  The code will just issue a 'next'
15942                # command if the join doesn't look good.  If we get through
15943                # the gauntlet of tests, the lines will be recombined.
15944                #----------------------------------------------------------
15945                #
15946                # beginning and ending tokens of the lines we are working on
15947                my $ibeg_1 = $$ri_beg[ $n - 1 ];
15948                my $iend_1 = $$ri_end[ $n - 1 ];
15949                my $iend_2 = $$ri_end[$n];
15950                my $ibeg_2 = $$ri_beg[$n];
15951
15952                my $ibeg_nmax = $$ri_beg[$nmax];
15953
15954                # some beginning indexes of other lines, which may not exist
15955                my $ibeg_0 = $n > 1          ? $$ri_beg[ $n - 2 ] : -1;
15956                my $ibeg_3 = $n < $nmax      ? $$ri_beg[ $n + 1 ] : -1;
15957                my $ibeg_4 = $n + 2 <= $nmax ? $$ri_beg[ $n + 2 ] : -1;
15958
15959                my $bs_tweak = 0;
15960
15961                #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] -
15962                #        $nesting_depth_to_go[$ibeg_1] );
15963
15964##print "RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$types_to_go[$ibeg_1] =$tokens_to_go[$ibeg_1] next_type=$types_to_go[$ibeg_2] next_tok=$tokens_to_go[$ibeg_2]\n";
15965
15966                # If line $n is the last line, we set some flags and
15967                # do any special checks for it
15968                if ( $n == $nmax ) {
15969
15970                    # a terminal '{' should stay where it is
15971                    next if $types_to_go[$ibeg_2] eq '{';
15972
15973                    # set flag if statement $n ends in ';'
15974                    $this_line_is_semicolon_terminated =
15975                      $types_to_go[$iend_2] eq ';'
15976
15977                      # with possible side comment
15978                      || ( $types_to_go[$iend_2] eq '#'
15979                        && $iend_2 - $ibeg_2 >= 2
15980                        && $types_to_go[ $iend_2 - 2 ] eq ';'
15981                        && $types_to_go[ $iend_2 - 1 ] eq 'b' );
15982                }
15983
15984                #----------------------------------------------------------
15985                # Section 1: examine token at $iend_1 (right end of first line
15986                # of pair)
15987                #----------------------------------------------------------
15988
15989                # an isolated '}' may join with a ';' terminated segment
15990                if ( $types_to_go[$iend_1] eq '}' ) {
15991
15992                    # Check for cases where combining a semicolon terminated
15993                    # statement with a previous isolated closing paren will
15994                    # allow the combined line to be outdented.  This is
15995                    # generally a good move.  For example, we can join up
15996                    # the last two lines here:
15997                    #  (
15998                    #      $dev,  $ino,   $mode,  $nlink, $uid,     $gid, $rdev,
15999                    #      $size, $atime, $mtime, $ctime, $blksize, $blocks
16000                    #    )
16001                    #    = stat($file);
16002                    #
16003                    # to get:
16004                    #  (
16005                    #      $dev,  $ino,   $mode,  $nlink, $uid,     $gid, $rdev,
16006                    #      $size, $atime, $mtime, $ctime, $blksize, $blocks
16007                    #  ) = stat($file);
16008                    #
16009                    # which makes the parens line up.
16010                    #
16011                    # Another example, from Joe Matarazzo, probably looks best
16012                    # with the 'or' clause appended to the trailing paren:
16013                    #  $self->some_method(
16014                    #      PARAM1 => 'foo',
16015                    #      PARAM2 => 'bar'
16016                    #  ) or die "Some_method didn't work";
16017                    #
16018                    $previous_outdentable_closing_paren =
16019                      $this_line_is_semicolon_terminated    # ends in ';'
16020                      && $ibeg_1 == $iend_1    # only one token on last line
16021                      && $tokens_to_go[$iend_1] eq
16022                      ')'                      # must be structural paren
16023
16024                      # only &&, ||, and : if no others seen
16025                      # (but note: our count made below could be wrong
16026                      # due to intervening comments)
16027                      && ( $leading_amp_count == 0
16028                        || $types_to_go[$ibeg_2] !~ /^(:|\&\&|\|\|)$/ )
16029
16030                      # but leading colons probably line up with with a
16031                      # previous colon or question (count could be wrong).
16032                      && $types_to_go[$ibeg_2] ne ':'
16033
16034                      # only one step in depth allowed.  this line must not
16035                      # begin with a ')' itself.
16036                      && ( $nesting_depth_to_go[$iend_1] ==
16037                        $nesting_depth_to_go[$iend_2] + 1 );
16038
16039                    # YVES patch 2 of 2:
16040                    # Allow cuddled eval chains, like this:
16041                    #   eval {
16042                    #       #STUFF;
16043                    #       1; # return true
16044                    #   } or do {
16045                    #       #handle error
16046                    #   };
16047                    # This patch works together with a patch in
16048                    # setting adjusted indentation (where the closing eval
16049                    # brace is outdented if possible).
16050                    # The problem is that an 'eval' block has continuation
16051                    # indentation and it looks better to undo it in some
16052                    # cases.  If we do not use this patch we would get:
16053                    #   eval {
16054                    #       #STUFF;
16055                    #       1; # return true
16056                    #       }
16057                    #       or do {
16058                    #       #handle error
16059                    #     };
16060                    # The alternative, for uncuddled style, is to create
16061                    # a patch in set_adjusted_indentation which undoes
16062                    # the indentation of a leading line like 'or do {'.
16063                    # This doesn't work well with -icb through
16064                    if (
16065                           $block_type_to_go[$iend_1] eq 'eval'
16066                        && !$rOpts->{'line-up-parentheses'}
16067                        && !$rOpts->{'indent-closing-brace'}
16068                        && $tokens_to_go[$iend_2] eq '{'
16069                        && (
16070                            ( $types_to_go[$ibeg_2] =~ /^(|\&\&|\|\|)$/ )
16071                            || (   $types_to_go[$ibeg_2] eq 'k'
16072                                && $is_and_or{ $tokens_to_go[$ibeg_2] } )
16073                            || $is_if_unless{ $tokens_to_go[$ibeg_2] }
16074                        )
16075                      )
16076                    {
16077                        $previous_outdentable_closing_paren ||= 1;
16078                    }
16079
16080                    next
16081                      unless (
16082                        $previous_outdentable_closing_paren
16083
16084                        # handle '.' and '?' specially below
16085                        || ( $types_to_go[$ibeg_2] =~ /^[\.\?]$/ )
16086                      );
16087                }
16088
16089                # YVES
16090                # honor breaks at opening brace
16091                # Added to prevent recombining something like this:
16092                #  } || eval { package main;
16093                elsif ( $types_to_go[$iend_1] eq '{' ) {
16094                    next if $forced_breakpoint_to_go[$iend_1];
16095                }
16096
16097                # do not recombine lines with ending &&, ||,
16098                elsif ( $is_amp_amp{ $types_to_go[$iend_1] } ) {
16099                    next unless $want_break_before{ $types_to_go[$iend_1] };
16100                }
16101
16102                # keep a terminal colon
16103                elsif ( $types_to_go[$iend_1] eq ':' ) {
16104                    next unless $want_break_before{ $types_to_go[$iend_1] };
16105                }
16106
16107                # Identify and recombine a broken ?/: chain
16108                elsif ( $types_to_go[$iend_1] eq '?' ) {
16109
16110                    # Do not recombine different levels
16111                    next
16112                      if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
16113
16114                    # do not recombine unless next line ends in :
16115                    next unless $types_to_go[$iend_2] eq ':';
16116                }
16117
16118                # for lines ending in a comma...
16119                elsif ( $types_to_go[$iend_1] eq ',' ) {
16120
16121                    # Do not recombine at comma which is following the
16122                    # input bias.
16123                    # TODO: might be best to make a special flag
16124                    next if ( $old_breakpoint_to_go[$iend_1] );
16125
16126                 # an isolated '},' may join with an identifier + ';'
16127                 # this is useful for the class of a 'bless' statement (bless.t)
16128                    if (   $types_to_go[$ibeg_1] eq '}'
16129                        && $types_to_go[$ibeg_2] eq 'i' )
16130                    {
16131                        next
16132                          unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
16133                            && ( $iend_2 == ( $ibeg_2 + 1 ) )
16134                            && $this_line_is_semicolon_terminated );
16135
16136                        # override breakpoint
16137                        $forced_breakpoint_to_go[$iend_1] = 0;
16138                    }
16139
16140                    # but otherwise ..
16141                    else {
16142
16143                        # do not recombine after a comma unless this will leave
16144                        # just 1 more line
16145                        next unless ( $n + 1 >= $nmax );
16146
16147                    # do not recombine if there is a change in indentation depth
16148                        next
16149                          if (
16150                            $levels_to_go[$iend_1] != $levels_to_go[$iend_2] );
16151
16152                        # do not recombine a "complex expression" after a
16153                        # comma.  "complex" means no parens.
16154                        my $saw_paren;
16155                        foreach my $ii ( $ibeg_2 .. $iend_2 ) {
16156                            if ( $tokens_to_go[$ii] eq '(' ) {
16157                                $saw_paren = 1;
16158                                last;
16159                            }
16160                        }
16161                        next if $saw_paren;
16162                    }
16163                }
16164
16165                # opening paren..
16166                elsif ( $types_to_go[$iend_1] eq '(' ) {
16167
16168                    # No longer doing this
16169                }
16170
16171                elsif ( $types_to_go[$iend_1] eq ')' ) {
16172
16173                    # No longer doing this
16174                }
16175
16176                # keep a terminal for-semicolon
16177                elsif ( $types_to_go[$iend_1] eq 'f' ) {
16178                    next;
16179                }
16180
16181                # if '=' at end of line ...
16182                elsif ( $is_assignment{ $types_to_go[$iend_1] } ) {
16183
16184                    my $is_short_quote =
16185                      (      $types_to_go[$ibeg_2] eq 'Q'
16186                          && $ibeg_2 == $iend_2
16187                          && length( $tokens_to_go[$ibeg_2] ) <
16188                          $rOpts_short_concatenation_item_length );
16189                    my $is_ternary =
16190                      ( $types_to_go[$ibeg_1] eq '?'
16191                          && ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) );
16192
16193                    # always join an isolated '=', a short quote, or if this
16194                    # will put ?/: at start of adjacent lines
16195                    if (   $ibeg_1 != $iend_1
16196                        && !$is_short_quote
16197                        && !$is_ternary )
16198                    {
16199                        next
16200                          unless (
16201                            (
16202
16203                                # unless we can reduce this to two lines
16204                                $nmax < $n + 2
16205
16206                             # or three lines, the last with a leading semicolon
16207                                || (   $nmax == $n + 2
16208                                    && $types_to_go[$ibeg_nmax] eq ';' )
16209
16210                                # or the next line ends with a here doc
16211                                || $types_to_go[$iend_2] eq 'h'
16212
16213                               # or the next line ends in an open paren or brace
16214                               # and the break hasn't been forced [dima.t]
16215                                || (  !$forced_breakpoint_to_go[$iend_1]
16216                                    && $types_to_go[$iend_2] eq '{' )
16217                            )
16218
16219                            # do not recombine if the two lines might align well
16220                            # this is a very approximate test for this
16221                            && (   $ibeg_3 >= 0
16222                                && $types_to_go[$ibeg_2] ne
16223                                $types_to_go[$ibeg_3] )
16224                          );
16225
16226                        # -lp users often prefer this:
16227                        #  my $title = function($env, $env, $sysarea,
16228                        #                       "bubba Borrower Entry");
16229                        #  so we will recombine if -lp is used we have ending
16230                        #  comma
16231                        if (  !$rOpts_line_up_parentheses
16232                            || $types_to_go[$iend_2] ne ',' )
16233                        {
16234
16235                           # otherwise, scan the rhs line up to last token for
16236                           # complexity.  Note that we are not counting the last
16237                           # token in case it is an opening paren.
16238                            my $tv    = 0;
16239                            my $depth = $nesting_depth_to_go[$ibeg_2];
16240                            for ( my $i = $ibeg_2 + 1 ; $i < $iend_2 ; $i++ ) {
16241                                if ( $nesting_depth_to_go[$i] != $depth ) {
16242                                    $tv++;
16243                                    last if ( $tv > 1 );
16244                                }
16245                                $depth = $nesting_depth_to_go[$i];
16246                            }
16247
16248                         # ok to recombine if no level changes before last token
16249                            if ( $tv > 0 ) {
16250
16251                                # otherwise, do not recombine if more than two
16252                                # level changes.
16253                                next if ( $tv > 1 );
16254
16255                              # check total complexity of the two adjacent lines
16256                              # that will occur if we do this join
16257                                my $istop =
16258                                  ( $n < $nmax ) ? $$ri_end[ $n + 1 ] : $iend_2;
16259                                for ( my $i = $iend_2 ; $i <= $istop ; $i++ ) {
16260                                    if ( $nesting_depth_to_go[$i] != $depth ) {
16261                                        $tv++;
16262                                        last if ( $tv > 2 );
16263                                    }
16264                                    $depth = $nesting_depth_to_go[$i];
16265                                }
16266
16267                        # do not recombine if total is more than 2 level changes
16268                                next if ( $tv > 2 );
16269                            }
16270                        }
16271                    }
16272
16273                    unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) {
16274                        $forced_breakpoint_to_go[$iend_1] = 0;
16275                    }
16276                }
16277
16278                # for keywords..
16279                elsif ( $types_to_go[$iend_1] eq 'k' ) {
16280
16281                    # make major control keywords stand out
16282                    # (recombine.t)
16283                    next
16284                      if (
16285
16286                        #/^(last|next|redo|return)$/
16287                        $is_last_next_redo_return{ $tokens_to_go[$iend_1] }
16288
16289                        # but only if followed by multiple lines
16290                        && $n < $nmax
16291                      );
16292
16293                    if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
16294                        next
16295                          unless $want_break_before{ $tokens_to_go[$iend_1] };
16296                    }
16297                }
16298
16299                # handle trailing + - * /
16300                elsif ( $is_math_op{ $types_to_go[$iend_1] } ) {
16301
16302                    # combine lines if next line has single number
16303                    # or a short term followed by same operator
16304                    my $i_next_nonblank = $ibeg_2;
16305                    my $i_next_next     = $i_next_nonblank + 1;
16306                    $i_next_next++ if ( $types_to_go[$i_next_next] eq 'b' );
16307                    my $number_follows = $types_to_go[$i_next_nonblank] eq 'n'
16308                      && (
16309                        $i_next_nonblank == $iend_2
16310                        || (   $i_next_next == $iend_2
16311                            && $is_math_op{ $types_to_go[$i_next_next] } )
16312                        || $types_to_go[$i_next_next] eq ';'
16313                      );
16314
16315                    # find token before last operator of previous line
16316                    my $iend_1_minus = $iend_1;
16317                    $iend_1_minus--
16318                      if ( $iend_1_minus > $ibeg_1 );
16319                    $iend_1_minus--
16320                      if ( $types_to_go[$iend_1_minus] eq 'b'
16321                        && $iend_1_minus > $ibeg_1 );
16322
16323                    my $short_term_follows =
16324                      (      $types_to_go[$iend_2] eq $types_to_go[$iend_1]
16325                          && $types_to_go[$iend_1_minus] =~ /^[in]$/
16326                          && $iend_2 <= $ibeg_2 + 2
16327                          && length( $tokens_to_go[$ibeg_2] ) <
16328                          $rOpts_short_concatenation_item_length );
16329
16330                    next
16331                      unless ( $number_follows || $short_term_follows );
16332                }
16333
16334                #----------------------------------------------------------
16335                # Section 2: Now examine token at $ibeg_2 (left end of second
16336                # line of pair)
16337                #----------------------------------------------------------
16338
16339                # join lines identified above as capable of
16340                # causing an outdented line with leading closing paren
16341                if ($previous_outdentable_closing_paren) {
16342                    $forced_breakpoint_to_go[$iend_1] = 0;
16343                }
16344
16345                # do not recombine lines with leading :
16346                elsif ( $types_to_go[$ibeg_2] eq ':' ) {
16347                    $leading_amp_count++;
16348                    next if $want_break_before{ $types_to_go[$ibeg_2] };
16349                }
16350
16351                # handle lines with leading &&, ||
16352                elsif ( $is_amp_amp{ $types_to_go[$ibeg_2] } ) {
16353
16354                    $leading_amp_count++;
16355
16356                    # ok to recombine if it follows a ? or :
16357                    # and is followed by an open paren..
16358                    my $ok =
16359                      (      $is_ternary{ $types_to_go[$ibeg_1] }
16360                          && $tokens_to_go[$iend_2] eq '(' )
16361
16362                    # or is followed by a ? or : at same depth
16363                    #
16364                    # We are looking for something like this. We can
16365                    # recombine the && line with the line above to make the
16366                    # structure more clear:
16367                    #  return
16368                    #    exists $G->{Attr}->{V}
16369                    #    && exists $G->{Attr}->{V}->{$u}
16370                    #    ? %{ $G->{Attr}->{V}->{$u} }
16371                    #    : ();
16372                    #
16373                    # We should probably leave something like this alone:
16374                    #  return
16375                    #       exists $G->{Attr}->{E}
16376                    #    && exists $G->{Attr}->{E}->{$u}
16377                    #    && exists $G->{Attr}->{E}->{$u}->{$v}
16378                    #    ? %{ $G->{Attr}->{E}->{$u}->{$v} }
16379                    #    : ();
16380                    # so that we either have all of the &&'s (or ||'s)
16381                    # on one line, as in the first example, or break at
16382                    # each one as in the second example.  However, it
16383                    # sometimes makes things worse to check for this because
16384                    # it prevents multiple recombinations.  So this is not done.
16385                      || ( $ibeg_3 >= 0
16386                        && $is_ternary{ $types_to_go[$ibeg_3] }
16387                        && $nesting_depth_to_go[$ibeg_3] ==
16388                        $nesting_depth_to_go[$ibeg_2] );
16389
16390                    next if !$ok && $want_break_before{ $types_to_go[$ibeg_2] };
16391                    $forced_breakpoint_to_go[$iend_1] = 0;
16392
16393                    # tweak the bond strength to give this joint priority
16394                    # over ? and :
16395                    $bs_tweak = 0.25;
16396                }
16397
16398                # Identify and recombine a broken ?/: chain
16399                elsif ( $types_to_go[$ibeg_2] eq '?' ) {
16400
16401                    # Do not recombine different levels
16402                    my $lev = $levels_to_go[$ibeg_2];
16403                    next if ( $lev ne $levels_to_go[$ibeg_1] );
16404
16405                    # Do not recombine a '?' if either next line or
16406                    # previous line does not start with a ':'.  The reasons
16407                    # are that (1) no alignment of the ? will be possible
16408                    # and (2) the expression is somewhat complex, so the
16409                    # '?' is harder to see in the interior of the line.
16410                    my $follows_colon =
16411                      $ibeg_1 >= 0 && $types_to_go[$ibeg_1] eq ':';
16412                    my $precedes_colon =
16413                      $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
16414                    next unless ( $follows_colon || $precedes_colon );
16415
16416                    # we will always combining a ? line following a : line
16417                    if ( !$follows_colon ) {
16418
16419                        # ...otherwise recombine only if it looks like a chain.
16420                        # we will just look at a few nearby lines to see if
16421                        # this looks like a chain.
16422                        my $local_count = 0;
16423                        foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) {
16424                            $local_count++
16425                              if $ii >= 0
16426                                  && $types_to_go[$ii] eq ':'
16427                                  && $levels_to_go[$ii] == $lev;
16428                        }
16429                        next unless ( $local_count > 1 );
16430                    }
16431                    $forced_breakpoint_to_go[$iend_1] = 0;
16432                }
16433
16434                # do not recombine lines with leading '.'
16435                elsif ( $types_to_go[$ibeg_2] =~ /^(\.)$/ ) {
16436                    my $i_next_nonblank = $ibeg_2 + 1;
16437                    if ( $types_to_go[$i_next_nonblank] eq 'b' ) {
16438                        $i_next_nonblank++;
16439                    }
16440
16441                    next
16442                      unless (
16443
16444                   # ... unless there is just one and we can reduce
16445                   # this to two lines if we do.  For example, this
16446                   #
16447                   #
16448                   #  $bodyA .=
16449                   #    '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
16450                   #
16451                   #  looks better than this:
16452                   #  $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
16453                   #    . '$args .= $pat;'
16454
16455                        (
16456                               $n == 2
16457                            && $n == $nmax
16458                            && $types_to_go[$ibeg_1] ne $types_to_go[$ibeg_2]
16459                        )
16460
16461                        #  ... or this would strand a short quote , like this
16462                        #                . "some long qoute"
16463                        #                . "\n";
16464                        || (   $types_to_go[$i_next_nonblank] eq 'Q'
16465                            && $i_next_nonblank >= $iend_2 - 1
16466                            && length( $tokens_to_go[$i_next_nonblank] ) <
16467                            $rOpts_short_concatenation_item_length )
16468                      );
16469                }
16470
16471                # handle leading keyword..
16472                elsif ( $types_to_go[$ibeg_2] eq 'k' ) {
16473
16474                    # handle leading "or"
16475                    if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
16476                        next
16477                          unless (
16478                            $this_line_is_semicolon_terminated
16479                            && (
16480
16481                                # following 'if' or 'unless' or 'or'
16482                                $types_to_go[$ibeg_1] eq 'k'
16483                                && $is_if_unless{ $tokens_to_go[$ibeg_1] }
16484
16485                                # important: only combine a very simple or
16486                                # statement because the step below may have
16487                                # combined a trailing 'and' with this or,
16488                                # and we do not want to then combine
16489                                # everything together
16490                                && ( $iend_2 - $ibeg_2 <= 7 )
16491                            )
16492                          );
16493                    }
16494
16495                    # handle leading 'and'
16496                    elsif ( $tokens_to_go[$ibeg_2] eq 'and' ) {
16497
16498                        # Decide if we will combine a single terminal 'and'
16499                        # after an 'if' or 'unless'.
16500
16501                        #     This looks best with the 'and' on the same
16502                        #     line as the 'if':
16503                        #
16504                        #         $a = 1
16505                        #           if $seconds and $nu < 2;
16506                        #
16507                        #     But this looks better as shown:
16508                        #
16509                        #         $a = 1
16510                        #           if !$this->{Parents}{$_}
16511                        #           or $this->{Parents}{$_} eq $_;
16512                        #
16513                        next
16514                          unless (
16515                            $this_line_is_semicolon_terminated
16516                            && (
16517
16518                                # following 'if' or 'unless' or 'or'
16519                                $types_to_go[$ibeg_1] eq 'k'
16520                                && (   $is_if_unless{ $tokens_to_go[$ibeg_1] }
16521                                    || $tokens_to_go[$ibeg_1] eq 'or' )
16522                            )
16523                          );
16524                    }
16525
16526                    # handle leading "if" and "unless"
16527                    elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
16528
16529                      # FIXME: This is still experimental..may not be too useful
16530                        next
16531                          unless (
16532                            $this_line_is_semicolon_terminated
16533
16534                            #  previous line begins with 'and' or 'or'
16535                            && $types_to_go[$ibeg_1] eq 'k'
16536                            && $is_and_or{ $tokens_to_go[$ibeg_1] }
16537
16538                          );
16539                    }
16540
16541                    # handle all other leading keywords
16542                    else {
16543
16544                        # keywords look best at start of lines,
16545                        # but combine things like "1 while"
16546                        unless ( $is_assignment{ $types_to_go[$iend_1] } ) {
16547                            next
16548                              if ( ( $types_to_go[$iend_1] ne 'k' )
16549                                && ( $tokens_to_go[$ibeg_2] ne 'while' ) );
16550                        }
16551                    }
16552                }
16553
16554                # similar treatment of && and || as above for 'and' and 'or':
16555                # NOTE: This block of code is currently bypassed because
16556                # of a previous block but is retained for possible future use.
16557                elsif ( $is_amp_amp{ $types_to_go[$ibeg_2] } ) {
16558
16559                    # maybe looking at something like:
16560                    # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
16561
16562                    next
16563                      unless (
16564                        $this_line_is_semicolon_terminated
16565
16566                        # previous line begins with an 'if' or 'unless' keyword
16567                        && $types_to_go[$ibeg_1] eq 'k'
16568                        && $is_if_unless{ $tokens_to_go[$ibeg_1] }
16569
16570                      );
16571                }
16572
16573                # handle leading + - * /
16574                elsif ( $is_math_op{ $types_to_go[$ibeg_2] } ) {
16575                    my $i_next_nonblank = $ibeg_2 + 1;
16576                    if ( $types_to_go[$i_next_nonblank] eq 'b' ) {
16577                        $i_next_nonblank++;
16578                    }
16579
16580                    my $i_next_next = $i_next_nonblank + 1;
16581                    $i_next_next++ if ( $types_to_go[$i_next_next] eq 'b' );
16582
16583                    my $is_number = (
16584                        $types_to_go[$i_next_nonblank] eq 'n'
16585                          && ( $i_next_nonblank >= $iend_2 - 1
16586                            || $types_to_go[$i_next_next] eq ';' )
16587                    );
16588
16589                    my $iend_1_nonblank =
16590                      $types_to_go[$iend_1] eq 'b' ? $iend_1 - 1 : $iend_1;
16591                    my $iend_2_nonblank =
16592                      $types_to_go[$iend_2] eq 'b' ? $iend_2 - 1 : $iend_2;
16593
16594                    my $is_short_term =
16595                      (      $types_to_go[$ibeg_2] eq $types_to_go[$ibeg_1]
16596                          && $types_to_go[$iend_2_nonblank] =~ /^[in]$/
16597                          && $types_to_go[$iend_1_nonblank] =~ /^[in]$/
16598                          && $iend_2_nonblank <= $ibeg_2 + 2
16599                          && length( $tokens_to_go[$iend_2_nonblank] ) <
16600                          $rOpts_short_concatenation_item_length );
16601
16602                    # Combine these lines if this line is a single
16603                    # number, or if it is a short term with same
16604                    # operator as the previous line.  For example, in
16605                    # the following code we will combine all of the
16606                    # short terms $A, $B, $C, $D, $E, $F, together
16607                    # instead of leaving them one per line:
16608                    #  my $time =
16609                    #    $A * $B * $C * $D * $E * $F *
16610                    #    ( 2. * $eps * $sigma * $area ) *
16611                    #    ( 1. / $tcold**3 - 1. / $thot**3 );
16612                    # This can be important in math-intensive code.
16613                    next
16614                      unless (
16615                           $is_number
16616                        || $is_short_term
16617
16618                        # or if we can reduce this to two lines if we do.
16619                        || (   $n == 2
16620                            && $n == $nmax
16621                            && $types_to_go[$ibeg_1] ne $types_to_go[$ibeg_2] )
16622                      );
16623                }
16624
16625                # handle line with leading = or similar
16626                elsif ( $is_assignment{ $types_to_go[$ibeg_2] } ) {
16627                    next unless $n == 1;
16628                    next
16629                      unless (
16630
16631                        # unless we can reduce this to two lines
16632                        $nmax == 2
16633
16634                        # or three lines, the last with a leading semicolon
16635                        || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
16636
16637                        # or the next line ends with a here doc
16638                        || $types_to_go[$iend_2] eq 'h'
16639                      );
16640                }
16641
16642                #----------------------------------------------------------
16643                # Section 3:
16644                # Combine the lines if we arrive here and it is possible
16645                #----------------------------------------------------------
16646
16647                # honor hard breakpoints
16648                next if ( $forced_breakpoint_to_go[$iend_1] > 0 );
16649
16650                my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak;
16651
16652                # combined line cannot be too long
16653                next
16654                  if excess_line_length( $ibeg_1, $iend_2 ) > 0;
16655
16656                # do not recombine if we would skip in indentation levels
16657                if ( $n < $nmax ) {
16658                    my $if_next = $$ri_beg[ $n + 1 ];
16659                    next
16660                      if (
16661                           $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
16662                        && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
16663
16664                        # but an isolated 'if (' is undesirable
16665                        && !(
16666                               $n == 1
16667                            && $iend_1 - $ibeg_1 <= 2
16668                            && $types_to_go[$ibeg_1]  eq 'k'
16669                            && $tokens_to_go[$ibeg_1] eq 'if'
16670                            && $tokens_to_go[$iend_1] ne '('
16671                        )
16672                      );
16673                }
16674
16675                # honor no-break's
16676                next if ( $bs == NO_BREAK );
16677
16678                # remember the pair with the greatest bond strength
16679                if ( !$n_best ) {
16680                    $n_best  = $n;
16681                    $bs_best = $bs;
16682                }
16683                else {
16684
16685                    if ( $bs > $bs_best ) {
16686                        $n_best  = $n;
16687                        $bs_best = $bs;
16688                    }
16689                }
16690            }
16691
16692            # recombine the pair with the greatest bond strength
16693            if ($n_best) {
16694                splice @$ri_beg, $n_best, 1;
16695                splice @$ri_end, $n_best - 1, 1;
16696
16697                # keep going if we are still making progress
16698                $more_to_do++;
16699            }
16700        }
16701        return ( $ri_beg, $ri_end );
16702    }
16703}    # end recombine_breakpoints
16704
16705sub break_all_chain_tokens {
16706
16707    # scan the current breakpoints looking for breaks at certain "chain
16708    # operators" (. : && || + etc) which often occur repeatedly in a long
16709    # statement.  If we see a break at any one, break at all similar tokens
16710    # within the same container.
16711    #
16712    my ( $ri_left, $ri_right ) = @_;
16713
16714    my %saw_chain_type;
16715    my %left_chain_type;
16716    my %right_chain_type;
16717    my %interior_chain_type;
16718    my $nmax = @$ri_right - 1;
16719
16720    # scan the left and right end tokens of all lines
16721    my $count = 0;
16722    for my $n ( 0 .. $nmax ) {
16723        my $il    = $$ri_left[$n];
16724        my $ir    = $$ri_right[$n];
16725        my $typel = $types_to_go[$il];
16726        my $typer = $types_to_go[$ir];
16727        $typel = '+' if ( $typel eq '-' );    # treat + and - the same
16728        $typer = '+' if ( $typer eq '-' );
16729        $typel = '*' if ( $typel eq '/' );    # treat * and / the same
16730        $typer = '*' if ( $typer eq '/' );
16731        my $tokenl = $tokens_to_go[$il];
16732        my $tokenr = $tokens_to_go[$ir];
16733
16734        if ( $is_chain_operator{$tokenl} && $want_break_before{$typel} ) {
16735            next if ( $typel eq '?' );
16736            push @{ $left_chain_type{$typel} }, $il;
16737            $saw_chain_type{$typel} = 1;
16738            $count++;
16739        }
16740        if ( $is_chain_operator{$tokenr} && !$want_break_before{$typer} ) {
16741            next if ( $typer eq '?' );
16742            push @{ $right_chain_type{$typer} }, $ir;
16743            $saw_chain_type{$typer} = 1;
16744            $count++;
16745        }
16746    }
16747    return unless $count;
16748
16749    # now look for any interior tokens of the same types
16750    $count = 0;
16751    for my $n ( 0 .. $nmax ) {
16752        my $il = $$ri_left[$n];
16753        my $ir = $$ri_right[$n];
16754        for ( my $i = $il + 1 ; $i < $ir ; $i++ ) {
16755            my $type = $types_to_go[$i];
16756            $type = '+' if ( $type eq '-' );
16757            $type = '*' if ( $type eq '/' );
16758            if ( $saw_chain_type{$type} ) {
16759                push @{ $interior_chain_type{$type} }, $i;
16760                $count++;
16761            }
16762        }
16763    }
16764    return unless $count;
16765
16766    # now make a list of all new break points
16767    my @insert_list;
16768
16769    # loop over all chain types
16770    foreach my $type ( keys %saw_chain_type ) {
16771
16772        # quit if just ONE continuation line with leading .  For example--
16773        # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{'
16774        #  . $contents;
16775        last if ( $nmax == 1 && $type =~ /^[\.\+]$/ );
16776
16777        # loop over all interior chain tokens
16778        foreach my $itest ( @{ $interior_chain_type{$type} } ) {
16779
16780            # loop over all left end tokens of same type
16781            if ( $left_chain_type{$type} ) {
16782                next if $nobreak_to_go[ $itest - 1 ];
16783                foreach my $i ( @{ $left_chain_type{$type} } ) {
16784                    next unless in_same_container( $i, $itest );
16785                    push @insert_list, $itest - 1;
16786
16787                    # Break at matching ? if this : is at a different level.
16788                    # For example, the ? before $THRf_DEAD in the following
16789                    # should get a break if its : gets a break.
16790                    #
16791                    # my $flags =
16792                    #     ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE
16793                    #   : ( $_ & 4 ) ? $THRf_R_DETACHED
16794                    #   :              $THRf_R_JOINABLE;
16795                    if (   $type eq ':'
16796                        && $levels_to_go[$i] != $levels_to_go[$itest] )
16797                    {
16798                        my $i_question = $mate_index_to_go[$itest];
16799                        if ( $i_question > 0 ) {
16800                            push @insert_list, $i_question - 1;
16801                        }
16802                    }
16803                    last;
16804                }
16805            }
16806
16807            # loop over all right end tokens of same type
16808            if ( $right_chain_type{$type} ) {
16809                next if $nobreak_to_go[$itest];
16810                foreach my $i ( @{ $right_chain_type{$type} } ) {
16811                    next unless in_same_container( $i, $itest );
16812                    push @insert_list, $itest;
16813
16814                    # break at matching ? if this : is at a different level
16815                    if (   $type eq ':'
16816                        && $levels_to_go[$i] != $levels_to_go[$itest] )
16817                    {
16818                        my $i_question = $mate_index_to_go[$itest];
16819                        if ( $i_question >= 0 ) {
16820                            push @insert_list, $i_question;
16821                        }
16822                    }
16823                    last;
16824                }
16825            }
16826        }
16827    }
16828
16829    # insert any new break points
16830    if (@insert_list) {
16831        insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
16832    }
16833}
16834
16835sub break_equals {
16836
16837    # Look for assignment operators that could use a breakpoint.
16838    # For example, in the following snippet
16839    #
16840    #    $HOME = $ENV{HOME}
16841    #      || $ENV{LOGDIR}
16842    #      || $pw[7]
16843    #      || die "no home directory for user $<";
16844    #
16845    # we could break at the = to get this, which is a little nicer:
16846    #    $HOME =
16847    #         $ENV{HOME}
16848    #      || $ENV{LOGDIR}
16849    #      || $pw[7]
16850    #      || die "no home directory for user $<";
16851    #
16852    # The logic here follows the logic in set_logical_padding, which
16853    # will add the padding in the second line to improve alignment.
16854    #
16855    my ( $ri_left, $ri_right ) = @_;
16856    my $nmax = @$ri_right - 1;
16857    return unless ( $nmax >= 2 );
16858
16859    # scan the left ends of first two lines
16860    my $tokbeg = "";
16861    my $depth_beg;
16862    for my $n ( 1 .. 2 ) {
16863        my $il     = $$ri_left[$n];
16864        my $typel  = $types_to_go[$il];
16865        my $tokenl = $tokens_to_go[$il];
16866
16867        my $has_leading_op = ( $tokenl =~ /^\w/ )
16868          ? $is_chain_operator{$tokenl}    # + - * / : ? && ||
16869          : $is_chain_operator{$typel};    # and, or
16870        return unless ($has_leading_op);
16871        if ( $n > 1 ) {
16872            return
16873              unless ( $tokenl eq $tokbeg
16874                && $nesting_depth_to_go[$il] eq $depth_beg );
16875        }
16876        $tokbeg    = $tokenl;
16877        $depth_beg = $nesting_depth_to_go[$il];
16878    }
16879
16880    # now look for any interior tokens of the same types
16881    my $il = $$ri_left[0];
16882    my $ir = $$ri_right[0];
16883
16884    # now make a list of all new break points
16885    my @insert_list;
16886    for ( my $i = $ir - 1 ; $i > $il ; $i-- ) {
16887        my $type = $types_to_go[$i];
16888        if (   $is_assignment{$type}
16889            && $nesting_depth_to_go[$i] eq $depth_beg )
16890        {
16891            if ( $want_break_before{$type} ) {
16892                push @insert_list, $i - 1;
16893            }
16894            else {
16895                push @insert_list, $i;
16896            }
16897        }
16898    }
16899
16900    # Break after a 'return' followed by a chain of operators
16901    #  return ( $^O !~ /win32|dos/i )
16902    #    && ( $^O ne 'VMS' )
16903    #    && ( $^O ne 'OS2' )
16904    #    && ( $^O ne 'MacOS' );
16905    # To give:
16906    #  return
16907    #       ( $^O !~ /win32|dos/i )
16908    #    && ( $^O ne 'VMS' )
16909    #    && ( $^O ne 'OS2' )
16910    #    && ( $^O ne 'MacOS' );
16911    my $i = 0;
16912    if (   $types_to_go[$i] eq 'k'
16913        && $tokens_to_go[$i] eq 'return'
16914        && $ir > $il
16915        && $nesting_depth_to_go[$i] eq $depth_beg )
16916    {
16917        push @insert_list, $i;
16918    }
16919
16920    return unless (@insert_list);
16921
16922    # One final check...
16923    # scan second and thrid lines and be sure there are no assignments
16924    # we want to avoid breaking at an = to make something like this:
16925    #    unless ( $icon =
16926    #           $html_icons{"$type-$state"}
16927    #        or $icon = $html_icons{$type}
16928    #        or $icon = $html_icons{$state} )
16929    for my $n ( 1 .. 2 ) {
16930        my $il = $$ri_left[$n];
16931        my $ir = $$ri_right[$n];
16932        for ( my $i = $il + 1 ; $i <= $ir ; $i++ ) {
16933            my $type = $types_to_go[$i];
16934            return
16935              if ( $is_assignment{$type}
16936                && $nesting_depth_to_go[$i] eq $depth_beg );
16937        }
16938    }
16939
16940    # ok, insert any new break point
16941    if (@insert_list) {
16942        insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
16943    }
16944}
16945
16946sub insert_final_breaks {
16947
16948    my ( $ri_left, $ri_right ) = @_;
16949
16950    my $nmax = @$ri_right - 1;
16951
16952    # scan the left and right end tokens of all lines
16953    my $count         = 0;
16954    my $i_first_colon = -1;
16955    for my $n ( 0 .. $nmax ) {
16956        my $il    = $$ri_left[$n];
16957        my $ir    = $$ri_right[$n];
16958        my $typel = $types_to_go[$il];
16959        my $typer = $types_to_go[$ir];
16960        return if ( $typel eq '?' );
16961        return if ( $typer eq '?' );
16962        if    ( $typel eq ':' ) { $i_first_colon = $il; last; }
16963        elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; }
16964    }
16965
16966    # For long ternary chains,
16967    # if the first : we see has its # ? is in the interior
16968    # of a preceding line, then see if there are any good
16969    # breakpoints before the ?.
16970    if ( $i_first_colon > 0 ) {
16971        my $i_question = $mate_index_to_go[$i_first_colon];
16972        if ( $i_question > 0 ) {
16973            my @insert_list;
16974            for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) {
16975                my $token = $tokens_to_go[$ii];
16976                my $type  = $types_to_go[$ii];
16977
16978                # For now, a good break is either a comma or a 'return'.
16979                if ( ( $type eq ',' || $type eq 'k' && $token eq 'return' )
16980                    && in_same_container( $ii, $i_question ) )
16981                {
16982                    push @insert_list, $ii;
16983                    last;
16984                }
16985            }
16986
16987            # insert any new break points
16988            if (@insert_list) {
16989                insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
16990            }
16991        }
16992    }
16993}
16994
16995sub in_same_container {
16996
16997    # check to see if tokens at i1 and i2 are in the
16998    # same container, and not separated by a comma, ? or :
16999    my ( $i1, $i2 ) = @_;
17000    my $type  = $types_to_go[$i1];
17001    my $depth = $nesting_depth_to_go[$i1];
17002    return unless ( $nesting_depth_to_go[$i2] == $depth );
17003    if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) }
17004
17005    ###########################################################
17006    # This is potentially a very slow routine and not critical.
17007    # For safety just give up for large differences.
17008    # See test file 'infinite_loop.txt'
17009    # TODO: replace this loop with a data structure
17010    ###########################################################
17011    return if ( $i2 - $i1 > 200 );
17012
17013    for ( my $i = $i1 + 1 ; $i < $i2 ; $i++ ) {
17014        next   if ( $nesting_depth_to_go[$i] > $depth );
17015        return if ( $nesting_depth_to_go[$i] < $depth );
17016
17017        my $tok = $tokens_to_go[$i];
17018        $tok = ',' if $tok eq '=>';    # treat => same as ,
17019
17020        # Example: we would not want to break at any of these .'s
17021        #  : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>"
17022        if ( $type ne ':' ) {
17023            return if ( $tok =~ /^[\,\:\?]$/ ) || $tok eq '||' || $tok eq 'or';
17024        }
17025        else {
17026            return if ( $tok =~ /^[\,]$/ );
17027        }
17028    }
17029    return 1;
17030}
17031
17032sub set_continuation_breaks {
17033
17034    # Define an array of indexes for inserting newline characters to
17035    # keep the line lengths below the maximum desired length.  There is
17036    # an implied break after the last token, so it need not be included.
17037
17038    # Method:
17039    # This routine is part of series of routines which adjust line
17040    # lengths.  It is only called if a statement is longer than the
17041    # maximum line length, or if a preliminary scanning located
17042    # desirable break points.   Sub scan_list has already looked at
17043    # these tokens and set breakpoints (in array
17044    # $forced_breakpoint_to_go[$i]) where it wants breaks (for example
17045    # after commas, after opening parens, and before closing parens).
17046    # This routine will honor these breakpoints and also add additional
17047    # breakpoints as necessary to keep the line length below the maximum
17048    # requested.  It bases its decision on where the 'bond strength' is
17049    # lowest.
17050
17051    # Output: returns references to the arrays:
17052    #  @i_first
17053    #  @i_last
17054    # which contain the indexes $i of the first and last tokens on each
17055    # line.
17056
17057    # In addition, the array:
17058    #   $forced_breakpoint_to_go[$i]
17059    # may be updated to be =1 for any index $i after which there must be
17060    # a break.  This signals later routines not to undo the breakpoint.
17061
17062    my $saw_good_break = shift;
17063    my @i_first        = ();      # the first index to output
17064    my @i_last         = ();      # the last index to output
17065    my @i_colon_breaks = ();      # needed to decide if we have to break at ?'s
17066    if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
17067
17068    set_bond_strengths();
17069
17070    my $imin = 0;
17071    my $imax = $max_index_to_go;
17072    if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
17073    if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
17074    my $i_begin = $imin;          # index for starting next iteration
17075
17076    my $leading_spaces          = leading_spaces_to_go($imin);
17077    my $line_count              = 0;
17078    my $last_break_strength     = NO_BREAK;
17079    my $i_last_break            = -1;
17080    my $max_bias                = 0.001;
17081    my $tiny_bias               = 0.0001;
17082    my $leading_alignment_token = "";
17083    my $leading_alignment_type  = "";
17084
17085    # see if any ?/:'s are in order
17086    my $colons_in_order = 1;
17087    my $last_tok        = "";
17088    my @colon_list  = grep /^[\?\:]$/, @tokens_to_go[ 0 .. $max_index_to_go ];
17089    my $colon_count = @colon_list;
17090    foreach (@colon_list) {
17091        if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
17092        $last_tok = $_;
17093    }
17094
17095    # This is a sufficient but not necessary condition for colon chain
17096    my $is_colon_chain = ( $colons_in_order && @colon_list > 2 );
17097
17098    #-------------------------------------------------------
17099    # BEGINNING of main loop to set continuation breakpoints
17100    # Keep iterating until we reach the end
17101    #-------------------------------------------------------
17102    while ( $i_begin <= $imax ) {
17103        my $lowest_strength        = NO_BREAK;
17104        my $starting_sum           = $lengths_to_go[$i_begin];
17105        my $i_lowest               = -1;
17106        my $i_test                 = -1;
17107        my $lowest_next_token      = '';
17108        my $lowest_next_type       = 'b';
17109        my $i_lowest_next_nonblank = -1;
17110
17111        #-------------------------------------------------------
17112        # BEGINNING of inner loop to find the best next breakpoint
17113        #-------------------------------------------------------
17114        for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {
17115            my $type       = $types_to_go[$i_test];
17116            my $token      = $tokens_to_go[$i_test];
17117            my $next_type  = $types_to_go[ $i_test + 1 ];
17118            my $next_token = $tokens_to_go[ $i_test + 1 ];
17119            my $i_next_nonblank =
17120              ( ( $next_type eq 'b' ) ? $i_test + 2 : $i_test + 1 );
17121            my $next_nonblank_type       = $types_to_go[$i_next_nonblank];
17122            my $next_nonblank_token      = $tokens_to_go[$i_next_nonblank];
17123            my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
17124            my $strength                 = $bond_strength_to_go[$i_test];
17125            my $must_break               = 0;
17126
17127            # FIXME: TESTING: Might want to be able to break after these
17128            # force an immediate break at certain operators
17129            # with lower level than the start of the line
17130            if (
17131                (
17132                    $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
17133                    || (   $next_nonblank_type eq 'k'
17134                        && $next_nonblank_token =~ /^(and|or)$/ )
17135                )
17136                && ( $nesting_depth_to_go[$i_begin] >
17137                    $nesting_depth_to_go[$i_next_nonblank] )
17138              )
17139            {
17140                set_forced_breakpoint($i_next_nonblank);
17141            }
17142
17143            if (
17144
17145                # Try to put a break where requested by scan_list
17146                $forced_breakpoint_to_go[$i_test]
17147
17148                # break between ) { in a continued line so that the '{' can
17149                # be outdented
17150                # See similar logic in scan_list which catches instances
17151                # where a line is just something like ') {'
17152                || (   $line_count
17153                    && ( $token              eq ')' )
17154                    && ( $next_nonblank_type eq '{' )
17155                    && ($next_nonblank_block_type)
17156                    && !$rOpts->{'opening-brace-always-on-right'} )
17157
17158                # There is an implied forced break at a terminal opening brace
17159                || ( ( $type eq '{' ) && ( $i_test == $imax ) )
17160              )
17161            {
17162
17163                # Forced breakpoints must sometimes be overridden, for example
17164                # because of a side comment causing a NO_BREAK.  It is easier
17165                # to catch this here than when they are set.
17166                if ( $strength < NO_BREAK ) {
17167                    $strength   = $lowest_strength - $tiny_bias;
17168                    $must_break = 1;
17169                }
17170            }
17171
17172            # quit if a break here would put a good terminal token on
17173            # the next line and we already have a possible break
17174            if (
17175                   !$must_break
17176                && ( $next_nonblank_type =~ /^[\;\,]$/ )
17177                && (
17178                    (
17179                        $leading_spaces +
17180                        $lengths_to_go[ $i_next_nonblank + 1 ] -
17181                        $starting_sum
17182                    ) > $rOpts_maximum_line_length
17183                )
17184              )
17185            {
17186                last if ( $i_lowest >= 0 );
17187            }
17188
17189            # Avoid a break which would strand a single punctuation
17190            # token.  For example, we do not want to strand a leading
17191            # '.' which is followed by a long quoted string.
17192            if (
17193                   !$must_break
17194                && ( $i_test == $i_begin )
17195                && ( $i_test < $imax )
17196                && ( $token eq $type )
17197                && (
17198                    (
17199                        $leading_spaces +
17200                        $lengths_to_go[ $i_test + 1 ] -
17201                        $starting_sum
17202                    ) <= $rOpts_maximum_line_length
17203                )
17204              )
17205            {
17206                $i_test++;
17207
17208                if ( ( $i_test < $imax ) && ( $next_type eq 'b' ) ) {
17209                    $i_test++;
17210                }
17211                redo;
17212            }
17213
17214            if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) )
17215            {
17216
17217                # break at previous best break if it would have produced
17218                # a leading alignment of certain common tokens, and it
17219                # is different from the latest candidate break
17220                last
17221                  if ($leading_alignment_type);
17222
17223                # Force at least one breakpoint if old code had good
17224                # break It is only called if a breakpoint is required or
17225                # desired.  This will probably need some adjustments
17226                # over time.  A goal is to try to be sure that, if a new
17227                # side comment is introduced into formated text, then
17228                # the same breakpoints will occur.  scbreak.t
17229                last
17230                  if (
17231                    $i_test == $imax                # we are at the end
17232                    && !$forced_breakpoint_count    #
17233                    && $saw_good_break              # old line had good break
17234                    && $type =~ /^[#;\{]$/          # and this line ends in
17235                                                    # ';' or side comment
17236                    && $i_last_break < 0        # and we haven't made a break
17237                    && $i_lowest > 0            # and we saw a possible break
17238                    && $i_lowest < $imax - 1    # (but not just before this ;)
17239                    && $strength - $lowest_strength < 0.5 * WEAK # and it's good
17240                  );
17241
17242                $lowest_strength        = $strength;
17243                $i_lowest               = $i_test;
17244                $lowest_next_token      = $next_nonblank_token;
17245                $lowest_next_type       = $next_nonblank_type;
17246                $i_lowest_next_nonblank = $i_next_nonblank;
17247                last if $must_break;
17248
17249                # set flags to remember if a break here will produce a
17250                # leading alignment of certain common tokens
17251                if (   $line_count > 0
17252                    && $i_test < $imax
17253                    && ( $lowest_strength - $last_break_strength <= $max_bias )
17254                  )
17255                {
17256                    my $i_last_end = $i_begin - 1;
17257                    if ( $types_to_go[$i_last_end] eq 'b' ) { $i_last_end -= 1 }
17258                    my $tok_beg  = $tokens_to_go[$i_begin];
17259                    my $type_beg = $types_to_go[$i_begin];
17260                    if (
17261
17262                        # check for leading alignment of certain tokens
17263                        (
17264                               $tok_beg eq $next_nonblank_token
17265                            && $is_chain_operator{$tok_beg}
17266                            && (   $type_beg eq 'k'
17267                                || $type_beg eq $tok_beg )
17268                            && $nesting_depth_to_go[$i_begin] >=
17269                            $nesting_depth_to_go[$i_next_nonblank]
17270                        )
17271
17272                        || (   $tokens_to_go[$i_last_end] eq $token
17273                            && $is_chain_operator{$token}
17274                            && ( $type eq 'k' || $type eq $token )
17275                            && $nesting_depth_to_go[$i_last_end] >=
17276                            $nesting_depth_to_go[$i_test] )
17277                      )
17278                    {
17279                        $leading_alignment_token = $next_nonblank_token;
17280                        $leading_alignment_type  = $next_nonblank_type;
17281                    }
17282                }
17283            }
17284
17285            my $too_long =
17286              ( $i_test >= $imax )
17287              ? 1
17288              : (
17289                (
17290                    $leading_spaces +
17291                      $lengths_to_go[ $i_test + 2 ] -
17292                      $starting_sum
17293                ) > $rOpts_maximum_line_length
17294              );
17295
17296            FORMATTER_DEBUG_FLAG_BREAK
17297              && print
17298"BREAK: testing i = $i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type leading sp=($leading_spaces) next length = $lengths_to_go[$i_test+2] too_long=$too_long str=$strength\n";
17299
17300            # allow one extra terminal token after exceeding line length
17301            # if it would strand this token.
17302            if (   $rOpts_fuzzy_line_length
17303                && $too_long
17304                && ( $i_lowest == $i_test )
17305                && ( length($token) > 1 )
17306                && ( $next_nonblank_type =~ /^[\;\,]$/ ) )
17307            {
17308                $too_long = 0;
17309            }
17310
17311            last
17312              if (
17313                ( $i_test == $imax )    # we're done if no more tokens,
17314                || (
17315                    ( $i_lowest >= 0 )    # or no more space and we have a break
17316                    && $too_long
17317                )
17318              );
17319        }
17320
17321        #-------------------------------------------------------
17322        # END of inner loop to find the best next breakpoint
17323        # Now decide exactly where to put the breakpoint
17324        #-------------------------------------------------------
17325
17326        # it's always ok to break at imax if no other break was found
17327        if ( $i_lowest < 0 ) { $i_lowest = $imax }
17328
17329        # semi-final index calculation
17330        my $i_next_nonblank = (
17331            ( $types_to_go[ $i_lowest + 1 ] eq 'b' )
17332            ? $i_lowest + 2
17333            : $i_lowest + 1
17334        );
17335        my $next_nonblank_type  = $types_to_go[$i_next_nonblank];
17336        my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
17337
17338        #-------------------------------------------------------
17339        # ?/: rule 1 : if a break here will separate a '?' on this
17340        # line from its closing ':', then break at the '?' instead.
17341        #-------------------------------------------------------
17342        my $i;
17343        foreach $i ( $i_begin + 1 .. $i_lowest - 1 ) {
17344            next unless ( $tokens_to_go[$i] eq '?' );
17345
17346            # do not break if probable sequence of ?/: statements
17347            next if ($is_colon_chain);
17348
17349            # do not break if statement is broken by side comment
17350            next
17351              if (
17352                $tokens_to_go[$max_index_to_go] eq '#'
17353                && terminal_type( \@types_to_go, \@block_type_to_go, 0,
17354                    $max_index_to_go ) !~ /^[\;\}]$/
17355              );
17356
17357            # no break needed if matching : is also on the line
17358            next
17359              if ( $mate_index_to_go[$i] >= 0
17360                && $mate_index_to_go[$i] <= $i_next_nonblank );
17361
17362            $i_lowest = $i;
17363            if ( $want_break_before{'?'} ) { $i_lowest-- }
17364            last;
17365        }
17366
17367        #-------------------------------------------------------
17368        # END of inner loop to find the best next breakpoint:
17369        # Break the line after the token with index i=$i_lowest
17370        #-------------------------------------------------------
17371
17372        # final index calculation
17373        $i_next_nonblank = (
17374            ( $types_to_go[ $i_lowest + 1 ] eq 'b' )
17375            ? $i_lowest + 2
17376            : $i_lowest + 1
17377        );
17378        $next_nonblank_type  = $types_to_go[$i_next_nonblank];
17379        $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
17380
17381        FORMATTER_DEBUG_FLAG_BREAK
17382          && print "BREAK: best is i = $i_lowest strength = $lowest_strength\n";
17383
17384        #-------------------------------------------------------
17385        # ?/: rule 2 : if we break at a '?', then break at its ':'
17386        #
17387        # Note: this rule is also in sub scan_list to handle a break
17388        # at the start and end of a line (in case breaks are dictated
17389        # by side comments).
17390        #-------------------------------------------------------
17391        if ( $next_nonblank_type eq '?' ) {
17392            set_closing_breakpoint($i_next_nonblank);
17393        }
17394        elsif ( $types_to_go[$i_lowest] eq '?' ) {
17395            set_closing_breakpoint($i_lowest);
17396        }
17397
17398        #-------------------------------------------------------
17399        # ?/: rule 3 : if we break at a ':' then we save
17400        # its location for further work below.  We may need to go
17401        # back and break at its '?'.
17402        #-------------------------------------------------------
17403        if ( $next_nonblank_type eq ':' ) {
17404            push @i_colon_breaks, $i_next_nonblank;
17405        }
17406        elsif ( $types_to_go[$i_lowest] eq ':' ) {
17407            push @i_colon_breaks, $i_lowest;
17408        }
17409
17410        # here we should set breaks for all '?'/':' pairs which are
17411        # separated by this line
17412
17413        $line_count++;
17414
17415        # save this line segment, after trimming blanks at the ends
17416        push( @i_first,
17417            ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
17418        push( @i_last,
17419            ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
17420
17421        # set a forced breakpoint at a container opening, if necessary, to
17422        # signal a break at a closing container.  Excepting '(' for now.
17423        if ( $tokens_to_go[$i_lowest] =~ /^[\{\[]$/
17424            && !$forced_breakpoint_to_go[$i_lowest] )
17425        {
17426            set_closing_breakpoint($i_lowest);
17427        }
17428
17429        # get ready to go again
17430        $i_begin                 = $i_lowest + 1;
17431        $last_break_strength     = $lowest_strength;
17432        $i_last_break            = $i_lowest;
17433        $leading_alignment_token = "";
17434        $leading_alignment_type  = "";
17435        $lowest_next_token       = '';
17436        $lowest_next_type        = 'b';
17437
17438        if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
17439            $i_begin++;
17440        }
17441
17442        # update indentation size
17443        if ( $i_begin <= $imax ) {
17444            $leading_spaces = leading_spaces_to_go($i_begin);
17445        }
17446    }
17447
17448    #-------------------------------------------------------
17449    # END of main loop to set continuation breakpoints
17450    # Now go back and make any necessary corrections
17451    #-------------------------------------------------------
17452
17453    #-------------------------------------------------------
17454    # ?/: rule 4 -- if we broke at a ':', then break at
17455    # corresponding '?' unless this is a chain of ?: expressions
17456    #-------------------------------------------------------
17457    if (@i_colon_breaks) {
17458
17459        # using a simple method for deciding if we are in a ?/: chain --
17460        # this is a chain if it has multiple ?/: pairs all in order;
17461        # otherwise not.
17462        # Note that if line starts in a ':' we count that above as a break
17463        my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
17464
17465        unless ($is_chain) {
17466            my @insert_list = ();
17467            foreach (@i_colon_breaks) {
17468                my $i_question = $mate_index_to_go[$_];
17469                if ( $i_question >= 0 ) {
17470                    if ( $want_break_before{'?'} ) {
17471                        $i_question--;
17472                        if (   $i_question > 0
17473                            && $types_to_go[$i_question] eq 'b' )
17474                        {
17475                            $i_question--;
17476                        }
17477                    }
17478
17479                    if ( $i_question >= 0 ) {
17480                        push @insert_list, $i_question;
17481                    }
17482                }
17483                insert_additional_breaks( \@insert_list, \@i_first, \@i_last );
17484            }
17485        }
17486    }
17487    return ( \@i_first, \@i_last, $colon_count );
17488}
17489
17490sub insert_additional_breaks {
17491
17492    # this routine will add line breaks at requested locations after
17493    # sub set_continuation_breaks has made preliminary breaks.
17494
17495    my ( $ri_break_list, $ri_first, $ri_last ) = @_;
17496    my $i_f;
17497    my $i_l;
17498    my $line_number = 0;
17499    my $i_break_left;
17500    foreach $i_break_left ( sort { $a <=> $b } @$ri_break_list ) {
17501
17502        $i_f = $$ri_first[$line_number];
17503        $i_l = $$ri_last[$line_number];
17504        while ( $i_break_left >= $i_l ) {
17505            $line_number++;
17506
17507            # shouldn't happen unless caller passes bad indexes
17508            if ( $line_number >= @$ri_last ) {
17509                warning(
17510"Non-fatal program bug: couldn't set break at $i_break_left\n"
17511                );
17512                report_definite_bug();
17513                return;
17514            }
17515            $i_f = $$ri_first[$line_number];
17516            $i_l = $$ri_last[$line_number];
17517        }
17518
17519        my $i_break_right = $i_break_left + 1;
17520        if ( $types_to_go[$i_break_right] eq 'b' ) { $i_break_right++ }
17521
17522        if (   $i_break_left >= $i_f
17523            && $i_break_left < $i_l
17524            && $i_break_right > $i_f
17525            && $i_break_right <= $i_l )
17526        {
17527            splice( @$ri_first, $line_number, 1, ( $i_f, $i_break_right ) );
17528            splice( @$ri_last, $line_number, 1, ( $i_break_left, $i_l ) );
17529        }
17530    }
17531}
17532
17533sub set_closing_breakpoint {
17534
17535    # set a breakpoint at a matching closing token
17536    # at present, this is only used to break at a ':' which matches a '?'
17537    my $i_break = shift;
17538
17539    if ( $mate_index_to_go[$i_break] >= 0 ) {
17540
17541        # CAUTION: infinite recursion possible here:
17542        #   set_closing_breakpoint calls set_forced_breakpoint, and
17543        #   set_forced_breakpoint call set_closing_breakpoint
17544        #   ( test files attrib.t, BasicLyx.pm.html).
17545        # Don't reduce the '2' in the statement below
17546        if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
17547
17548            # break before } ] and ), but sub set_forced_breakpoint will decide
17549            # to break before or after a ? and :
17550            my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
17551            set_forced_breakpoint( $mate_index_to_go[$i_break] - $inc );
17552        }
17553    }
17554    else {
17555        my $type_sequence = $type_sequence_to_go[$i_break];
17556        if ($type_sequence) {
17557            my $closing_token = $matching_token{ $tokens_to_go[$i_break] };
17558            $postponed_breakpoint{$type_sequence} = 1;
17559        }
17560    }
17561}
17562
17563# check to see if output line tabbing agrees with input line
17564# this can be very useful for debugging a script which has an extra
17565# or missing brace
17566sub compare_indentation_levels {
17567
17568    my ( $python_indentation_level, $structural_indentation_level ) = @_;
17569    if ( ( $python_indentation_level ne $structural_indentation_level ) ) {
17570        $last_tabbing_disagreement = $input_line_number;
17571
17572        if ($in_tabbing_disagreement) {
17573        }
17574        else {
17575            $tabbing_disagreement_count++;
17576
17577            if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
17578                write_logfile_entry(
17579"Start indentation disagreement: input=$python_indentation_level; output=$structural_indentation_level\n"
17580                );
17581            }
17582            $in_tabbing_disagreement    = $input_line_number;
17583            $first_tabbing_disagreement = $in_tabbing_disagreement
17584              unless ($first_tabbing_disagreement);
17585        }
17586    }
17587    else {
17588
17589        if ($in_tabbing_disagreement) {
17590
17591            if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
17592                write_logfile_entry(
17593"End indentation disagreement from input line $in_tabbing_disagreement\n"
17594                );
17595
17596                if ( $tabbing_disagreement_count == MAX_NAG_MESSAGES ) {
17597                    write_logfile_entry(
17598                        "No further tabbing disagreements will be noted\n");
17599                }
17600            }
17601            $in_tabbing_disagreement = 0;
17602        }
17603    }
17604}
17605
17606#####################################################################
17607#
17608# the Perl::Tidy::IndentationItem class supplies items which contain
17609# how much whitespace should be used at the start of a line
17610#
17611#####################################################################
17612
17613package Perl::Tidy::IndentationItem;
17614
17615# Indexes for indentation items
17616use constant SPACES             => 0;     # total leading white spaces
17617use constant LEVEL              => 1;     # the indentation 'level'
17618use constant CI_LEVEL           => 2;     # the 'continuation level'
17619use constant AVAILABLE_SPACES   => 3;     # how many left spaces available
17620                                          # for this level
17621use constant CLOSED             => 4;     # index where we saw closing '}'
17622use constant COMMA_COUNT        => 5;     # how many commas at this level?
17623use constant SEQUENCE_NUMBER    => 6;     # output batch number
17624use constant INDEX              => 7;     # index in output batch list
17625use constant HAVE_CHILD         => 8;     # any dependents?
17626use constant RECOVERABLE_SPACES => 9;     # how many spaces to the right
17627                                          # we would like to move to get
17628                                          # alignment (negative if left)
17629use constant ALIGN_PAREN        => 10;    # do we want to try to align
17630                                          # with an opening structure?
17631use constant MARKED             => 11;    # if visited by corrector logic
17632use constant STACK_DEPTH        => 12;    # indentation nesting depth
17633use constant STARTING_INDEX     => 13;    # first token index of this level
17634use constant ARROW_COUNT        => 14;    # how many =>'s
17635
17636sub new {
17637
17638    # Create an 'indentation_item' which describes one level of leading
17639    # whitespace when the '-lp' indentation is used.  We return
17640    # a reference to an anonymous array of associated variables.
17641    # See above constants for storage scheme.
17642    my (
17643        $class,               $spaces,           $level,
17644        $ci_level,            $available_spaces, $index,
17645        $gnu_sequence_number, $align_paren,      $stack_depth,
17646        $starting_index,
17647    ) = @_;
17648    my $closed            = -1;
17649    my $arrow_count       = 0;
17650    my $comma_count       = 0;
17651    my $have_child        = 0;
17652    my $want_right_spaces = 0;
17653    my $marked            = 0;
17654    bless [
17655        $spaces,              $level,          $ci_level,
17656        $available_spaces,    $closed,         $comma_count,
17657        $gnu_sequence_number, $index,          $have_child,
17658        $want_right_spaces,   $align_paren,    $marked,
17659        $stack_depth,         $starting_index, $arrow_count,
17660    ], $class;
17661}
17662
17663sub permanently_decrease_AVAILABLE_SPACES {
17664
17665    # make a permanent reduction in the available indentation spaces
17666    # at one indentation item.  NOTE: if there are child nodes, their
17667    # total SPACES must be reduced by the caller.
17668
17669    my ( $item, $spaces_needed ) = @_;
17670    my $available_spaces = $item->get_AVAILABLE_SPACES();
17671    my $deleted_spaces =
17672      ( $available_spaces > $spaces_needed )
17673      ? $spaces_needed
17674      : $available_spaces;
17675    $item->decrease_AVAILABLE_SPACES($deleted_spaces);
17676    $item->decrease_SPACES($deleted_spaces);
17677    $item->set_RECOVERABLE_SPACES(0);
17678
17679    return $deleted_spaces;
17680}
17681
17682sub tentatively_decrease_AVAILABLE_SPACES {
17683
17684    # We are asked to tentatively delete $spaces_needed of indentation
17685    # for a indentation item.  We may want to undo this later.  NOTE: if
17686    # there are child nodes, their total SPACES must be reduced by the
17687    # caller.
17688    my ( $item, $spaces_needed ) = @_;
17689    my $available_spaces = $item->get_AVAILABLE_SPACES();
17690    my $deleted_spaces =
17691      ( $available_spaces > $spaces_needed )
17692      ? $spaces_needed
17693      : $available_spaces;
17694    $item->decrease_AVAILABLE_SPACES($deleted_spaces);
17695    $item->decrease_SPACES($deleted_spaces);
17696    $item->increase_RECOVERABLE_SPACES($deleted_spaces);
17697    return $deleted_spaces;
17698}
17699
17700sub get_STACK_DEPTH {
17701    my $self = shift;
17702    return $self->[STACK_DEPTH];
17703}
17704
17705sub get_SPACES {
17706    my $self = shift;
17707    return $self->[SPACES];
17708}
17709
17710sub get_MARKED {
17711    my $self = shift;
17712    return $self->[MARKED];
17713}
17714
17715sub set_MARKED {
17716    my ( $self, $value ) = @_;
17717    if ( defined($value) ) {
17718        $self->[MARKED] = $value;
17719    }
17720    return $self->[MARKED];
17721}
17722
17723sub get_AVAILABLE_SPACES {
17724    my $self = shift;
17725    return $self->[AVAILABLE_SPACES];
17726}
17727
17728sub decrease_SPACES {
17729    my ( $self, $value ) = @_;
17730    if ( defined($value) ) {
17731        $self->[SPACES] -= $value;
17732    }
17733    return $self->[SPACES];
17734}
17735
17736sub decrease_AVAILABLE_SPACES {
17737    my ( $self, $value ) = @_;
17738    if ( defined($value) ) {
17739        $self->[AVAILABLE_SPACES] -= $value;
17740    }
17741    return $self->[AVAILABLE_SPACES];
17742}
17743
17744sub get_ALIGN_PAREN {
17745    my $self = shift;
17746    return $self->[ALIGN_PAREN];
17747}
17748
17749sub get_RECOVERABLE_SPACES {
17750    my $self = shift;
17751    return $self->[RECOVERABLE_SPACES];
17752}
17753
17754sub set_RECOVERABLE_SPACES {
17755    my ( $self, $value ) = @_;
17756    if ( defined($value) ) {
17757        $self->[RECOVERABLE_SPACES] = $value;
17758    }
17759    return $self->[RECOVERABLE_SPACES];
17760}
17761
17762sub increase_RECOVERABLE_SPACES {
17763    my ( $self, $value ) = @_;
17764    if ( defined($value) ) {
17765        $self->[RECOVERABLE_SPACES] += $value;
17766    }
17767    return $self->[RECOVERABLE_SPACES];
17768}
17769
17770sub get_CI_LEVEL {
17771    my $self = shift;
17772    return $self->[CI_LEVEL];
17773}
17774
17775sub get_LEVEL {
17776    my $self = shift;
17777    return $self->[LEVEL];
17778}
17779
17780sub get_SEQUENCE_NUMBER {
17781    my $self = shift;
17782    return $self->[SEQUENCE_NUMBER];
17783}
17784
17785sub get_INDEX {
17786    my $self = shift;
17787    return $self->[INDEX];
17788}
17789
17790sub get_STARTING_INDEX {
17791    my $self = shift;
17792    return $self->[STARTING_INDEX];
17793}
17794
17795sub set_HAVE_CHILD {
17796    my ( $self, $value ) = @_;
17797    if ( defined($value) ) {
17798        $self->[HAVE_CHILD] = $value;
17799    }
17800    return $self->[HAVE_CHILD];
17801}
17802
17803sub get_HAVE_CHILD {
17804    my $self = shift;
17805    return $self->[HAVE_CHILD];
17806}
17807
17808sub set_ARROW_COUNT {
17809    my ( $self, $value ) = @_;
17810    if ( defined($value) ) {
17811        $self->[ARROW_COUNT] = $value;
17812    }
17813    return $self->[ARROW_COUNT];
17814}
17815
17816sub get_ARROW_COUNT {
17817    my $self = shift;
17818    return $self->[ARROW_COUNT];
17819}
17820
17821sub set_COMMA_COUNT {
17822    my ( $self, $value ) = @_;
17823    if ( defined($value) ) {
17824        $self->[COMMA_COUNT] = $value;
17825    }
17826    return $self->[COMMA_COUNT];
17827}
17828
17829sub get_COMMA_COUNT {
17830    my $self = shift;
17831    return $self->[COMMA_COUNT];
17832}
17833
17834sub set_CLOSED {
17835    my ( $self, $value ) = @_;
17836    if ( defined($value) ) {
17837        $self->[CLOSED] = $value;
17838    }
17839    return $self->[CLOSED];
17840}
17841
17842sub get_CLOSED {
17843    my $self = shift;
17844    return $self->[CLOSED];
17845}
17846
17847#####################################################################
17848#
17849# the Perl::Tidy::VerticalAligner::Line class supplies an object to
17850# contain a single output line
17851#
17852#####################################################################
17853
17854package Perl::Tidy::VerticalAligner::Line;
17855
17856{
17857
17858    use strict;
17859    use Carp;
17860
17861    use constant JMAX                      => 0;
17862    use constant JMAX_ORIGINAL_LINE        => 1;
17863    use constant RTOKENS                   => 2;
17864    use constant RFIELDS                   => 3;
17865    use constant RPATTERNS                 => 4;
17866    use constant INDENTATION               => 5;
17867    use constant LEADING_SPACE_COUNT       => 6;
17868    use constant OUTDENT_LONG_LINES        => 7;
17869    use constant LIST_TYPE                 => 8;
17870    use constant IS_HANGING_SIDE_COMMENT   => 9;
17871    use constant RALIGNMENTS               => 10;
17872    use constant MAXIMUM_LINE_LENGTH       => 11;
17873    use constant RVERTICAL_TIGHTNESS_FLAGS => 12;
17874
17875    my %_index_map;
17876    $_index_map{jmax}                      = JMAX;
17877    $_index_map{jmax_original_line}        = JMAX_ORIGINAL_LINE;
17878    $_index_map{rtokens}                   = RTOKENS;
17879    $_index_map{rfields}                   = RFIELDS;
17880    $_index_map{rpatterns}                 = RPATTERNS;
17881    $_index_map{indentation}               = INDENTATION;
17882    $_index_map{leading_space_count}       = LEADING_SPACE_COUNT;
17883    $_index_map{outdent_long_lines}        = OUTDENT_LONG_LINES;
17884    $_index_map{list_type}                 = LIST_TYPE;
17885    $_index_map{is_hanging_side_comment}   = IS_HANGING_SIDE_COMMENT;
17886    $_index_map{ralignments}               = RALIGNMENTS;
17887    $_index_map{maximum_line_length}       = MAXIMUM_LINE_LENGTH;
17888    $_index_map{rvertical_tightness_flags} = RVERTICAL_TIGHTNESS_FLAGS;
17889
17890    my @_default_data = ();
17891    $_default_data[JMAX]                      = undef;
17892    $_default_data[JMAX_ORIGINAL_LINE]        = undef;
17893    $_default_data[RTOKENS]                   = undef;
17894    $_default_data[RFIELDS]                   = undef;
17895    $_default_data[RPATTERNS]                 = undef;
17896    $_default_data[INDENTATION]               = undef;
17897    $_default_data[LEADING_SPACE_COUNT]       = undef;
17898    $_default_data[OUTDENT_LONG_LINES]        = undef;
17899    $_default_data[LIST_TYPE]                 = undef;
17900    $_default_data[IS_HANGING_SIDE_COMMENT]   = undef;
17901    $_default_data[RALIGNMENTS]               = [];
17902    $_default_data[MAXIMUM_LINE_LENGTH]       = undef;
17903    $_default_data[RVERTICAL_TIGHTNESS_FLAGS] = undef;
17904
17905    {
17906
17907        # methods to count object population
17908        my $_count = 0;
17909        sub get_count        { $_count; }
17910        sub _increment_count { ++$_count }
17911        sub _decrement_count { --$_count }
17912    }
17913
17914    # Constructor may be called as a class method
17915    sub new {
17916        my ( $caller, %arg ) = @_;
17917        my $caller_is_obj = ref($caller);
17918        my $class = $caller_is_obj || $caller;
17919        no strict "refs";
17920        my $self = bless [], $class;
17921
17922        $self->[RALIGNMENTS] = [];
17923
17924        my $index;
17925        foreach ( keys %_index_map ) {
17926            $index = $_index_map{$_};
17927            if    ( exists $arg{$_} ) { $self->[$index] = $arg{$_} }
17928            elsif ($caller_is_obj)    { $self->[$index] = $caller->[$index] }
17929            else { $self->[$index] = $_default_data[$index] }
17930        }
17931
17932        $self->_increment_count();
17933        return $self;
17934    }
17935
17936    sub DESTROY {
17937        $_[0]->_decrement_count();
17938    }
17939
17940    sub get_jmax                      { $_[0]->[JMAX] }
17941    sub get_jmax_original_line        { $_[0]->[JMAX_ORIGINAL_LINE] }
17942    sub get_rtokens                   { $_[0]->[RTOKENS] }
17943    sub get_rfields                   { $_[0]->[RFIELDS] }
17944    sub get_rpatterns                 { $_[0]->[RPATTERNS] }
17945    sub get_indentation               { $_[0]->[INDENTATION] }
17946    sub get_leading_space_count       { $_[0]->[LEADING_SPACE_COUNT] }
17947    sub get_outdent_long_lines        { $_[0]->[OUTDENT_LONG_LINES] }
17948    sub get_list_type                 { $_[0]->[LIST_TYPE] }
17949    sub get_is_hanging_side_comment   { $_[0]->[IS_HANGING_SIDE_COMMENT] }
17950    sub get_rvertical_tightness_flags { $_[0]->[RVERTICAL_TIGHTNESS_FLAGS] }
17951
17952    sub set_column     { $_[0]->[RALIGNMENTS]->[ $_[1] ]->set_column( $_[2] ) }
17953    sub get_alignment  { $_[0]->[RALIGNMENTS]->[ $_[1] ] }
17954    sub get_alignments { @{ $_[0]->[RALIGNMENTS] } }
17955    sub get_column     { $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_column() }
17956
17957    sub get_starting_column {
17958        $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_starting_column();
17959    }
17960
17961    sub increment_column {
17962        $_[0]->[RALIGNMENTS]->[ $_[1] ]->increment_column( $_[2] );
17963    }
17964    sub set_alignments { my $self = shift; @{ $self->[RALIGNMENTS] } = @_; }
17965
17966    sub current_field_width {
17967        my $self = shift;
17968        my ($j) = @_;
17969        if ( $j == 0 ) {
17970            return $self->get_column($j);
17971        }
17972        else {
17973            return $self->get_column($j) - $self->get_column( $j - 1 );
17974        }
17975    }
17976
17977    sub field_width_growth {
17978        my $self = shift;
17979        my $j    = shift;
17980        return $self->get_column($j) - $self->get_starting_column($j);
17981    }
17982
17983    sub starting_field_width {
17984        my $self = shift;
17985        my $j    = shift;
17986        if ( $j == 0 ) {
17987            return $self->get_starting_column($j);
17988        }
17989        else {
17990            return $self->get_starting_column($j) -
17991              $self->get_starting_column( $j - 1 );
17992        }
17993    }
17994
17995    sub increase_field_width {
17996
17997        my $self = shift;
17998        my ( $j, $pad ) = @_;
17999        my $jmax = $self->get_jmax();
18000        for my $k ( $j .. $jmax ) {
18001            $self->increment_column( $k, $pad );
18002        }
18003    }
18004
18005    sub get_available_space_on_right {
18006        my $self = shift;
18007        my $jmax = $self->get_jmax();
18008        return $self->[MAXIMUM_LINE_LENGTH] - $self->get_column($jmax);
18009    }
18010
18011    sub set_jmax                    { $_[0]->[JMAX]                    = $_[1] }
18012    sub set_jmax_original_line      { $_[0]->[JMAX_ORIGINAL_LINE]      = $_[1] }
18013    sub set_rtokens                 { $_[0]->[RTOKENS]                 = $_[1] }
18014    sub set_rfields                 { $_[0]->[RFIELDS]                 = $_[1] }
18015    sub set_rpatterns               { $_[0]->[RPATTERNS]               = $_[1] }
18016    sub set_indentation             { $_[0]->[INDENTATION]             = $_[1] }
18017    sub set_leading_space_count     { $_[0]->[LEADING_SPACE_COUNT]     = $_[1] }
18018    sub set_outdent_long_lines      { $_[0]->[OUTDENT_LONG_LINES]      = $_[1] }
18019    sub set_list_type               { $_[0]->[LIST_TYPE]               = $_[1] }
18020    sub set_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] = $_[1] }
18021    sub set_alignment               { $_[0]->[RALIGNMENTS]->[ $_[1] ]  = $_[2] }
18022
18023}
18024
18025#####################################################################
18026#
18027# the Perl::Tidy::VerticalAligner::Alignment class holds information
18028# on a single column being aligned
18029#
18030#####################################################################
18031package Perl::Tidy::VerticalAligner::Alignment;
18032
18033{
18034
18035    use strict;
18036
18037    #use Carp;
18038
18039    # Symbolic array indexes
18040    use constant COLUMN          => 0;    # the current column number
18041    use constant STARTING_COLUMN => 1;    # column number when created
18042    use constant MATCHING_TOKEN  => 2;    # what token we are matching
18043    use constant STARTING_LINE   => 3;    # the line index of creation
18044    use constant ENDING_LINE     => 4;    # the most recent line to use it
18045    use constant SAVED_COLUMN    => 5;    # the most recent line to use it
18046    use constant SERIAL_NUMBER   => 6;    # unique number for this alignment
18047                                          # (just its index in an array)
18048
18049    # Correspondence between variables and array indexes
18050    my %_index_map;
18051    $_index_map{column}          = COLUMN;
18052    $_index_map{starting_column} = STARTING_COLUMN;
18053    $_index_map{matching_token}  = MATCHING_TOKEN;
18054    $_index_map{starting_line}   = STARTING_LINE;
18055    $_index_map{ending_line}     = ENDING_LINE;
18056    $_index_map{saved_column}    = SAVED_COLUMN;
18057    $_index_map{serial_number}   = SERIAL_NUMBER;
18058
18059    my @_default_data = ();
18060    $_default_data[COLUMN]          = undef;
18061    $_default_data[STARTING_COLUMN] = undef;
18062    $_default_data[MATCHING_TOKEN]  = undef;
18063    $_default_data[STARTING_LINE]   = undef;
18064    $_default_data[ENDING_LINE]     = undef;
18065    $_default_data[SAVED_COLUMN]    = undef;
18066    $_default_data[SERIAL_NUMBER]   = undef;
18067
18068    # class population count
18069    {
18070        my $_count = 0;
18071        sub get_count        { $_count; }
18072        sub _increment_count { ++$_count }
18073        sub _decrement_count { --$_count }
18074    }
18075
18076    # constructor
18077    sub new {
18078        my ( $caller, %arg ) = @_;
18079        my $caller_is_obj = ref($caller);
18080        my $class = $caller_is_obj || $caller;
18081        no strict "refs";
18082        my $self = bless [], $class;
18083
18084        foreach ( keys %_index_map ) {
18085            my $index = $_index_map{$_};
18086            if    ( exists $arg{$_} ) { $self->[$index] = $arg{$_} }
18087            elsif ($caller_is_obj)    { $self->[$index] = $caller->[$index] }
18088            else { $self->[$index] = $_default_data[$index] }
18089        }
18090        $self->_increment_count();
18091        return $self;
18092    }
18093
18094    sub DESTROY {
18095        $_[0]->_decrement_count();
18096    }
18097
18098    sub get_column          { return $_[0]->[COLUMN] }
18099    sub get_starting_column { return $_[0]->[STARTING_COLUMN] }
18100    sub get_matching_token  { return $_[0]->[MATCHING_TOKEN] }
18101    sub get_starting_line   { return $_[0]->[STARTING_LINE] }
18102    sub get_ending_line     { return $_[0]->[ENDING_LINE] }
18103    sub get_serial_number   { return $_[0]->[SERIAL_NUMBER] }
18104
18105    sub set_column          { $_[0]->[COLUMN]          = $_[1] }
18106    sub set_starting_column { $_[0]->[STARTING_COLUMN] = $_[1] }
18107    sub set_matching_token  { $_[0]->[MATCHING_TOKEN]  = $_[1] }
18108    sub set_starting_line   { $_[0]->[STARTING_LINE]   = $_[1] }
18109    sub set_ending_line     { $_[0]->[ENDING_LINE]     = $_[1] }
18110    sub increment_column { $_[0]->[COLUMN] += $_[1] }
18111
18112    sub save_column    { $_[0]->[SAVED_COLUMN] = $_[0]->[COLUMN] }
18113    sub restore_column { $_[0]->[COLUMN]       = $_[0]->[SAVED_COLUMN] }
18114
18115}
18116
18117package Perl::Tidy::VerticalAligner;
18118
18119# The Perl::Tidy::VerticalAligner package collects output lines and
18120# attempts to line up certain common tokens, such as => and #, which are
18121# identified by the calling routine.
18122#
18123# There are two main routines: append_line and flush.  Append acts as a
18124# storage buffer, collecting lines into a group which can be vertically
18125# aligned.  When alignment is no longer possible or desirable, it dumps
18126# the group to flush.
18127#
18128#     append_line -----> flush
18129#
18130#     collects          writes
18131#     vertical          one
18132#     groups            group
18133
18134BEGIN {
18135
18136    # Caution: these debug flags produce a lot of output
18137    # They should all be 0 except when debugging small scripts
18138
18139    use constant VALIGN_DEBUG_FLAG_APPEND  => 0;
18140    use constant VALIGN_DEBUG_FLAG_APPEND0 => 0;
18141    use constant VALIGN_DEBUG_FLAG_TERNARY => 0;
18142
18143    my $debug_warning = sub {
18144        print "VALIGN_DEBUGGING with key $_[0]\n";
18145    };
18146
18147    VALIGN_DEBUG_FLAG_APPEND  && $debug_warning->('APPEND');
18148    VALIGN_DEBUG_FLAG_APPEND0 && $debug_warning->('APPEND0');
18149
18150}
18151
18152use vars qw(
18153  $vertical_aligner_self
18154  $current_line
18155  $maximum_alignment_index
18156  $ralignment_list
18157  $maximum_jmax_seen
18158  $minimum_jmax_seen
18159  $previous_minimum_jmax_seen
18160  $previous_maximum_jmax_seen
18161  $maximum_line_index
18162  $group_level
18163  $group_type
18164  $group_maximum_gap
18165  $marginal_match
18166  $last_group_level_written
18167  $last_leading_space_count
18168  $extra_indent_ok
18169  $zero_count
18170  @group_lines
18171  $last_comment_column
18172  $last_side_comment_line_number
18173  $last_side_comment_length
18174  $last_side_comment_level
18175  $outdented_line_count
18176  $first_outdented_line_at
18177  $last_outdented_line_at
18178  $diagnostics_object
18179  $logger_object
18180  $file_writer_object
18181  @side_comment_history
18182  $comment_leading_space_count
18183  $is_matching_terminal_line
18184
18185  $cached_line_text
18186  $cached_line_type
18187  $cached_line_flag
18188  $cached_seqno
18189  $cached_line_valid
18190  $cached_line_leading_space_count
18191  $cached_seqno_string
18192
18193  $seqno_string
18194  $last_nonblank_seqno_string
18195
18196  $rOpts
18197
18198  $rOpts_maximum_line_length
18199  $rOpts_continuation_indentation
18200  $rOpts_indent_columns
18201  $rOpts_tabs
18202  $rOpts_entab_leading_whitespace
18203  $rOpts_valign
18204
18205  $rOpts_fixed_position_side_comment
18206  $rOpts_minimum_space_to_comment
18207
18208);
18209
18210sub initialize {
18211
18212    my $class;
18213
18214    ( $class, $rOpts, $file_writer_object, $logger_object, $diagnostics_object )
18215      = @_;
18216
18217    # variables describing the entire space group:
18218    $ralignment_list            = [];
18219    $group_level                = 0;
18220    $last_group_level_written   = -1;
18221    $extra_indent_ok            = 0;    # can we move all lines to the right?
18222    $last_side_comment_length   = 0;
18223    $maximum_jmax_seen          = 0;
18224    $minimum_jmax_seen          = 0;
18225    $previous_minimum_jmax_seen = 0;
18226    $previous_maximum_jmax_seen = 0;
18227
18228    # variables describing each line of the group
18229    @group_lines = ();                  # list of all lines in group
18230
18231    $outdented_line_count          = 0;
18232    $first_outdented_line_at       = 0;
18233    $last_outdented_line_at        = 0;
18234    $last_side_comment_line_number = 0;
18235    $last_side_comment_level       = -1;
18236    $is_matching_terminal_line     = 0;
18237
18238    # most recent 3 side comments; [ line number, column ]
18239    $side_comment_history[0] = [ -300, 0 ];
18240    $side_comment_history[1] = [ -200, 0 ];
18241    $side_comment_history[2] = [ -100, 0 ];
18242
18243    # write_leader_and_string cache:
18244    $cached_line_text                = "";
18245    $cached_line_type                = 0;
18246    $cached_line_flag                = 0;
18247    $cached_seqno                    = 0;
18248    $cached_line_valid               = 0;
18249    $cached_line_leading_space_count = 0;
18250    $cached_seqno_string             = "";
18251
18252    # string of sequence numbers joined together
18253    $seqno_string               = "";
18254    $last_nonblank_seqno_string = "";
18255
18256    # frequently used parameters
18257    $rOpts_indent_columns           = $rOpts->{'indent-columns'};
18258    $rOpts_tabs                     = $rOpts->{'tabs'};
18259    $rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'};
18260    $rOpts_fixed_position_side_comment =
18261      $rOpts->{'fixed-position-side-comment'};
18262    $rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'};
18263    $rOpts_maximum_line_length      = $rOpts->{'maximum-line-length'};
18264    $rOpts_valign                   = $rOpts->{'valign'};
18265
18266    forget_side_comment();
18267
18268    initialize_for_new_group();
18269
18270    $vertical_aligner_self = {};
18271    bless $vertical_aligner_self, $class;
18272    return $vertical_aligner_self;
18273}
18274
18275sub initialize_for_new_group {
18276    $maximum_line_index      = -1;      # lines in the current group
18277    $maximum_alignment_index = -1;      # alignments in current group
18278    $zero_count              = 0;       # count consecutive lines without tokens
18279    $current_line            = undef;   # line being matched for alignment
18280    $group_maximum_gap       = 0;       # largest gap introduced
18281    $group_type              = "";
18282    $marginal_match          = 0;
18283    $comment_leading_space_count = 0;
18284    $last_leading_space_count    = 0;
18285}
18286
18287# interface to Perl::Tidy::Diagnostics routines
18288sub write_diagnostics {
18289    if ($diagnostics_object) {
18290        $diagnostics_object->write_diagnostics(@_);
18291    }
18292}
18293
18294# interface to Perl::Tidy::Logger routines
18295sub warning {
18296    if ($logger_object) {
18297        $logger_object->warning(@_);
18298    }
18299}
18300
18301sub write_logfile_entry {
18302    if ($logger_object) {
18303        $logger_object->write_logfile_entry(@_);
18304    }
18305}
18306
18307sub report_definite_bug {
18308    if ($logger_object) {
18309        $logger_object->report_definite_bug();
18310    }
18311}
18312
18313sub get_SPACES {
18314
18315    # return the number of leading spaces associated with an indentation
18316    # variable $indentation is either a constant number of spaces or an
18317    # object with a get_SPACES method.
18318    my $indentation = shift;
18319    return ref($indentation) ? $indentation->get_SPACES() : $indentation;
18320}
18321
18322sub get_RECOVERABLE_SPACES {
18323
18324    # return the number of spaces (+ means shift right, - means shift left)
18325    # that we would like to shift a group of lines with the same indentation
18326    # to get them to line up with their opening parens
18327    my $indentation = shift;
18328    return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0;
18329}
18330
18331sub get_STACK_DEPTH {
18332
18333    my $indentation = shift;
18334    return ref($indentation) ? $indentation->get_STACK_DEPTH() : 0;
18335}
18336
18337sub make_alignment {
18338    my ( $col, $token ) = @_;
18339
18340    # make one new alignment at column $col which aligns token $token
18341    ++$maximum_alignment_index;
18342    my $alignment = new Perl::Tidy::VerticalAligner::Alignment(
18343        column          => $col,
18344        starting_column => $col,
18345        matching_token  => $token,
18346        starting_line   => $maximum_line_index,
18347        ending_line     => $maximum_line_index,
18348        serial_number   => $maximum_alignment_index,
18349    );
18350    $ralignment_list->[$maximum_alignment_index] = $alignment;
18351    return $alignment;
18352}
18353
18354sub dump_alignments {
18355    print
18356"Current Alignments:\ni\ttoken\tstarting_column\tcolumn\tstarting_line\tending_line\n";
18357    for my $i ( 0 .. $maximum_alignment_index ) {
18358        my $column          = $ralignment_list->[$i]->get_column();
18359        my $starting_column = $ralignment_list->[$i]->get_starting_column();
18360        my $matching_token  = $ralignment_list->[$i]->get_matching_token();
18361        my $starting_line   = $ralignment_list->[$i]->get_starting_line();
18362        my $ending_line     = $ralignment_list->[$i]->get_ending_line();
18363        print
18364"$i\t$matching_token\t$starting_column\t$column\t$starting_line\t$ending_line\n";
18365    }
18366}
18367
18368sub save_alignment_columns {
18369    for my $i ( 0 .. $maximum_alignment_index ) {
18370        $ralignment_list->[$i]->save_column();
18371    }
18372}
18373
18374sub restore_alignment_columns {
18375    for my $i ( 0 .. $maximum_alignment_index ) {
18376        $ralignment_list->[$i]->restore_column();
18377    }
18378}
18379
18380sub forget_side_comment {
18381    $last_comment_column = 0;
18382}
18383
18384sub append_line {
18385
18386    # sub append is called to place one line in the current vertical group.
18387    #
18388    # The input parameters are:
18389    #     $level = indentation level of this line
18390    #     $rfields = reference to array of fields
18391    #     $rpatterns = reference to array of patterns, one per field
18392    #     $rtokens   = reference to array of tokens starting fields 1,2,..
18393    #
18394    # Here is an example of what this package does.  In this example,
18395    # we are trying to line up both the '=>' and the '#'.
18396    #
18397    #         '18' => 'grave',    #   \`
18398    #         '19' => 'acute',    #   `'
18399    #         '20' => 'caron',    #   \v
18400    # <-tabs-><f1-><--field 2 ---><-f3->
18401    # |            |              |    |
18402    # |            |              |    |
18403    # col1        col2         col3 col4
18404    #
18405    # The calling routine has already broken the entire line into 3 fields as
18406    # indicated.  (So the work of identifying promising common tokens has
18407    # already been done).
18408    #
18409    # In this example, there will be 2 tokens being matched: '=>' and '#'.
18410    # They are the leading parts of fields 2 and 3, but we do need to know
18411    # what they are so that we can dump a group of lines when these tokens
18412    # change.
18413    #
18414    # The fields contain the actual characters of each field.  The patterns
18415    # are like the fields, but they contain mainly token types instead
18416    # of tokens, so they have fewer characters.  They are used to be
18417    # sure we are matching fields of similar type.
18418    #
18419    # In this example, there will be 4 column indexes being adjusted.  The
18420    # first one is always at zero.  The interior columns are at the start of
18421    # the matching tokens, and the last one tracks the maximum line length.
18422    #
18423    # Basically, each time a new line comes in, it joins the current vertical
18424    # group if possible.  Otherwise it causes the current group to be dumped
18425    # and a new group is started.
18426    #
18427    # For each new group member, the column locations are increased, as
18428    # necessary, to make room for the new fields.  When the group is finally
18429    # output, these column numbers are used to compute the amount of spaces of
18430    # padding needed for each field.
18431    #
18432    # Programming note: the fields are assumed not to have any tab characters.
18433    # Tabs have been previously removed except for tabs in quoted strings and
18434    # side comments.  Tabs in these fields can mess up the column counting.
18435    # The log file warns the user if there are any such tabs.
18436
18437    my (
18438        $level,               $level_end,
18439        $indentation,         $rfields,
18440        $rtokens,             $rpatterns,
18441        $is_forced_break,     $outdent_long_lines,
18442        $is_terminal_ternary, $is_terminal_statement,
18443        $do_not_pad,          $rvertical_tightness_flags,
18444        $level_jump,
18445    ) = @_;
18446
18447    # number of fields is $jmax
18448    # number of tokens between fields is $jmax-1
18449    my $jmax = $#{$rfields};
18450
18451    my $leading_space_count = get_SPACES($indentation);
18452
18453    # set outdented flag to be sure we either align within statements or
18454    # across statement boundaries, but not both.
18455    my $is_outdented = $last_leading_space_count > $leading_space_count;
18456    $last_leading_space_count = $leading_space_count;
18457
18458    # Patch: undo for hanging side comment
18459    my $is_hanging_side_comment =
18460      ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ );
18461    $is_outdented = 0 if $is_hanging_side_comment;
18462
18463    VALIGN_DEBUG_FLAG_APPEND0 && do {
18464        print
18465"APPEND0: entering lines=$maximum_line_index new #fields= $jmax, leading_count=$leading_space_count last_cmt=$last_comment_column force=$is_forced_break\n";
18466    };
18467
18468    # Validate cached line if necessary: If we can produce a container
18469    # with just 2 lines total by combining an existing cached opening
18470    # token with the closing token to follow, then we will mark both
18471    # cached flags as valid.
18472    if ($rvertical_tightness_flags) {
18473        if (   $maximum_line_index <= 0
18474            && $cached_line_type
18475            && $cached_seqno
18476            && $rvertical_tightness_flags->[2]
18477            && $rvertical_tightness_flags->[2] == $cached_seqno )
18478        {
18479            $rvertical_tightness_flags->[3] ||= 1;
18480            $cached_line_valid ||= 1;
18481        }
18482    }
18483
18484    # do not join an opening block brace with an unbalanced line
18485    # unless requested with a flag value of 2
18486    if (   $cached_line_type == 3
18487        && $maximum_line_index < 0
18488        && $cached_line_flag < 2
18489        && $level_jump != 0 )
18490    {
18491        $cached_line_valid = 0;
18492    }
18493
18494    # patch until new aligner is finished
18495    if ($do_not_pad) { my_flush() }
18496
18497    # shouldn't happen:
18498    if ( $level < 0 ) { $level = 0 }
18499
18500    # do not align code across indentation level changes
18501    # or if vertical alignment is turned off for debugging
18502    if ( $level != $group_level || $is_outdented || !$rOpts_valign ) {
18503
18504        # we are allowed to shift a group of lines to the right if its
18505        # level is greater than the previous and next group
18506        $extra_indent_ok =
18507          ( $level < $group_level && $last_group_level_written < $group_level );
18508
18509        my_flush();
18510
18511        # If we know that this line will get flushed out by itself because
18512        # of level changes, we can leave the extra_indent_ok flag set.
18513        # That way, if we get an external flush call, we will still be
18514        # able to do some -lp alignment if necessary.
18515        $extra_indent_ok = ( $is_terminal_statement && $level > $group_level );
18516
18517        $group_level = $level;
18518
18519        # wait until after the above flush to get the leading space
18520        # count because it may have been changed if the -icp flag is in
18521        # effect
18522        $leading_space_count = get_SPACES($indentation);
18523
18524    }
18525
18526    # --------------------------------------------------------------------
18527    # Patch to collect outdentable block COMMENTS
18528    # --------------------------------------------------------------------
18529    my $is_blank_line = "";
18530    my $is_block_comment = ( $jmax == 0 && $rfields->[0] =~ /^#/ );
18531    if ( $group_type eq 'COMMENT' ) {
18532        if (
18533            (
18534                   $is_block_comment
18535                && $outdent_long_lines
18536                && $leading_space_count == $comment_leading_space_count
18537            )
18538            || $is_blank_line
18539          )
18540        {
18541            $group_lines[ ++$maximum_line_index ] = $rfields->[0];
18542            return;
18543        }
18544        else {
18545            my_flush();
18546        }
18547    }
18548
18549    # --------------------------------------------------------------------
18550    # add dummy fields for terminal ternary
18551    # --------------------------------------------------------------------
18552    my $j_terminal_match;
18553    if ( $is_terminal_ternary && $current_line ) {
18554        $j_terminal_match =
18555          fix_terminal_ternary( $rfields, $rtokens, $rpatterns );
18556        $jmax = @{$rfields} - 1;
18557    }
18558
18559    # --------------------------------------------------------------------
18560    # add dummy fields for else statement
18561    # --------------------------------------------------------------------
18562    if (   $rfields->[0] =~ /^else\s*$/
18563        && $current_line
18564        && $level_jump == 0 )
18565    {
18566        $j_terminal_match = fix_terminal_else( $rfields, $rtokens, $rpatterns );
18567        $jmax = @{$rfields} - 1;
18568    }
18569
18570    # --------------------------------------------------------------------
18571    # Step 1. Handle simple line of code with no fields to match.
18572    # --------------------------------------------------------------------
18573    if ( $jmax <= 0 ) {
18574        $zero_count++;
18575
18576        if ( $maximum_line_index >= 0
18577            && !get_RECOVERABLE_SPACES( $group_lines[0]->get_indentation() ) )
18578        {
18579
18580            # flush the current group if it has some aligned columns..
18581            if ( $group_lines[0]->get_jmax() > 1 ) { my_flush() }
18582
18583            # flush current group if we are just collecting side comments..
18584            elsif (
18585
18586                # ...and we haven't seen a comment lately
18587                ( $zero_count > 3 )
18588
18589                # ..or if this new line doesn't fit to the left of the comments
18590                || ( ( $leading_space_count + length( $$rfields[0] ) ) >
18591                    $group_lines[0]->get_column(0) )
18592              )
18593            {
18594                my_flush();
18595            }
18596        }
18597
18598        # patch to start new COMMENT group if this comment may be outdented
18599        if (   $is_block_comment
18600            && $outdent_long_lines
18601            && $maximum_line_index < 0 )
18602        {
18603            $group_type                           = 'COMMENT';
18604            $comment_leading_space_count          = $leading_space_count;
18605            $group_lines[ ++$maximum_line_index ] = $rfields->[0];
18606            return;
18607        }
18608
18609        # just write this line directly if no current group, no side comment,
18610        # and no space recovery is needed.
18611        if ( $maximum_line_index < 0 && !get_RECOVERABLE_SPACES($indentation) )
18612        {
18613            write_leader_and_string( $leading_space_count, $$rfields[0], 0,
18614                $outdent_long_lines, $rvertical_tightness_flags );
18615            return;
18616        }
18617    }
18618    else {
18619        $zero_count = 0;
18620    }
18621
18622    # programming check: (shouldn't happen)
18623    # an error here implies an incorrect call was made
18624    if ( $jmax > 0 && ( $#{$rtokens} != ( $jmax - 1 ) ) ) {
18625        warning(
18626"Program bug in Perl::Tidy::VerticalAligner - number of tokens = $#{$rtokens} should be one less than number of fields: $#{$rfields})\n"
18627        );
18628        report_definite_bug();
18629    }
18630
18631    # --------------------------------------------------------------------
18632    # create an object to hold this line
18633    # --------------------------------------------------------------------
18634    my $new_line = new Perl::Tidy::VerticalAligner::Line(
18635        jmax                      => $jmax,
18636        jmax_original_line        => $jmax,
18637        rtokens                   => $rtokens,
18638        rfields                   => $rfields,
18639        rpatterns                 => $rpatterns,
18640        indentation               => $indentation,
18641        leading_space_count       => $leading_space_count,
18642        outdent_long_lines        => $outdent_long_lines,
18643        list_type                 => "",
18644        is_hanging_side_comment   => $is_hanging_side_comment,
18645        maximum_line_length       => $rOpts->{'maximum-line-length'},
18646        rvertical_tightness_flags => $rvertical_tightness_flags,
18647    );
18648
18649    # Initialize a global flag saying if the last line of the group should
18650    # match end of group and also terminate the group.  There should be no
18651    # returns between here and where the flag is handled at the bottom.
18652    my $col_matching_terminal = 0;
18653    if ( defined($j_terminal_match) ) {
18654
18655        # remember the column of the terminal ? or { to match with
18656        $col_matching_terminal = $current_line->get_column($j_terminal_match);
18657
18658        # set global flag for sub decide_if_aligned
18659        $is_matching_terminal_line = 1;
18660    }
18661
18662    # --------------------------------------------------------------------
18663    # It simplifies things to create a zero length side comment
18664    # if none exists.
18665    # --------------------------------------------------------------------
18666    make_side_comment( $new_line, $level_end );
18667
18668    # --------------------------------------------------------------------
18669    # Decide if this is a simple list of items.
18670    # There are 3 list types: none, comma, comma-arrow.
18671    # We use this below to be less restrictive in deciding what to align.
18672    # --------------------------------------------------------------------
18673    if ($is_forced_break) {
18674        decide_if_list($new_line);
18675    }
18676
18677    if ($current_line) {
18678
18679        # --------------------------------------------------------------------
18680        # Allow hanging side comment to join current group, if any
18681        # This will help keep side comments aligned, because otherwise we
18682        # will have to start a new group, making alignment less likely.
18683        # --------------------------------------------------------------------
18684        join_hanging_comment( $new_line, $current_line )
18685          if $is_hanging_side_comment;
18686
18687        # --------------------------------------------------------------------
18688        # If there is just one previous line, and it has more fields
18689        # than the new line, try to join fields together to get a match with
18690        # the new line.  At the present time, only a single leading '=' is
18691        # allowed to be compressed out.  This is useful in rare cases where
18692        # a table is forced to use old breakpoints because of side comments,
18693        # and the table starts out something like this:
18694        #   my %MonthChars = ('0', 'Jan',   # side comment
18695        #                     '1', 'Feb',
18696        #                     '2', 'Mar',
18697        # Eliminating the '=' field will allow the remaining fields to line up.
18698        # This situation does not occur if there are no side comments
18699        # because scan_list would put a break after the opening '('.
18700        # --------------------------------------------------------------------
18701        eliminate_old_fields( $new_line, $current_line );
18702
18703        # --------------------------------------------------------------------
18704        # If the new line has more fields than the current group,
18705        # see if we can match the first fields and combine the remaining
18706        # fields of the new line.
18707        # --------------------------------------------------------------------
18708        eliminate_new_fields( $new_line, $current_line );
18709
18710        # --------------------------------------------------------------------
18711        # Flush previous group unless all common tokens and patterns match..
18712        # --------------------------------------------------------------------
18713        check_match( $new_line, $current_line );
18714
18715        # --------------------------------------------------------------------
18716        # See if there is space for this line in the current group (if any)
18717        # --------------------------------------------------------------------
18718        if ($current_line) {
18719            check_fit( $new_line, $current_line );
18720        }
18721    }
18722
18723    # --------------------------------------------------------------------
18724    # Append this line to the current group (or start new group)
18725    # --------------------------------------------------------------------
18726    accept_line($new_line);
18727
18728    # Future update to allow this to vary:
18729    $current_line = $new_line if ( $maximum_line_index == 0 );
18730
18731    # output this group if it ends in a terminal else or ternary line
18732    if ( defined($j_terminal_match) ) {
18733
18734        # if there is only one line in the group (maybe due to failure to match
18735        # perfectly with previous lines), then align the ? or { of this
18736        # terminal line with the previous one unless that would make the line
18737        # too long
18738        if ( $maximum_line_index == 0 ) {
18739            my $col_now = $current_line->get_column($j_terminal_match);
18740            my $pad     = $col_matching_terminal - $col_now;
18741            my $padding_available =
18742              $current_line->get_available_space_on_right();
18743            if ( $pad > 0 && $pad <= $padding_available ) {
18744                $current_line->increase_field_width( $j_terminal_match, $pad );
18745            }
18746        }
18747        my_flush();
18748        $is_matching_terminal_line = 0;
18749    }
18750
18751    # --------------------------------------------------------------------
18752    # Step 8. Some old debugging stuff
18753    # --------------------------------------------------------------------
18754    VALIGN_DEBUG_FLAG_APPEND && do {
18755        print "APPEND fields:";
18756        dump_array(@$rfields);
18757        print "APPEND tokens:";
18758        dump_array(@$rtokens);
18759        print "APPEND patterns:";
18760        dump_array(@$rpatterns);
18761        dump_alignments();
18762    };
18763
18764    return;
18765}
18766
18767sub join_hanging_comment {
18768
18769    my $line = shift;
18770    my $jmax = $line->get_jmax();
18771    return 0 unless $jmax == 1;    # must be 2 fields
18772    my $rtokens = $line->get_rtokens();
18773    return 0 unless $$rtokens[0] eq '#';    # the second field is a comment..
18774    my $rfields = $line->get_rfields();
18775    return 0 unless $$rfields[0] =~ /^\s*$/;    # the first field is empty...
18776    my $old_line            = shift;
18777    my $maximum_field_index = $old_line->get_jmax();
18778    return 0
18779      unless $maximum_field_index > $jmax;    # the current line has more fields
18780    my $rpatterns = $line->get_rpatterns();
18781
18782    $line->set_is_hanging_side_comment(1);
18783    $jmax = $maximum_field_index;
18784    $line->set_jmax($jmax);
18785    $$rfields[$jmax]         = $$rfields[1];
18786    $$rtokens[ $jmax - 1 ]   = $$rtokens[0];
18787    $$rpatterns[ $jmax - 1 ] = $$rpatterns[0];
18788    for ( my $j = 1 ; $j < $jmax ; $j++ ) {
18789        $$rfields[$j]         = " ";  # NOTE: caused glitch unless 1 blank, why?
18790        $$rtokens[ $j - 1 ]   = "";
18791        $$rpatterns[ $j - 1 ] = "";
18792    }
18793    return 1;
18794}
18795
18796sub eliminate_old_fields {
18797
18798    my $new_line = shift;
18799    my $jmax     = $new_line->get_jmax();
18800    if ( $jmax > $maximum_jmax_seen ) { $maximum_jmax_seen = $jmax }
18801    if ( $jmax < $minimum_jmax_seen ) { $minimum_jmax_seen = $jmax }
18802
18803    # there must be one previous line
18804    return unless ( $maximum_line_index == 0 );
18805
18806    my $old_line            = shift;
18807    my $maximum_field_index = $old_line->get_jmax();
18808
18809    ###############################################
18810    # this line must have fewer fields
18811    return unless $maximum_field_index > $jmax;
18812    ###############################################
18813
18814    # Identify specific cases where field elimination is allowed:
18815    # case=1: both lines have comma-separated lists, and the first
18816    #         line has an equals
18817    # case=2: both lines have leading equals
18818
18819    # case 1 is the default
18820    my $case = 1;
18821
18822    # See if case 2: both lines have leading '='
18823    # We'll require smiliar leading patterns in this case
18824    my $old_rtokens   = $old_line->get_rtokens();
18825    my $rtokens       = $new_line->get_rtokens();
18826    my $rpatterns     = $new_line->get_rpatterns();
18827    my $old_rpatterns = $old_line->get_rpatterns();
18828    if (   $rtokens->[0] =~ /^=\d*$/
18829        && $old_rtokens->[0]   eq $rtokens->[0]
18830        && $old_rpatterns->[0] eq $rpatterns->[0] )
18831    {
18832        $case = 2;
18833    }
18834
18835    # not too many fewer fields in new line for case 1
18836    return unless ( $case != 1 || $maximum_field_index - 2 <= $jmax );
18837
18838    # case 1 must have side comment
18839    my $old_rfields = $old_line->get_rfields();
18840    return
18841      if ( $case == 1
18842        && length( $$old_rfields[$maximum_field_index] ) == 0 );
18843
18844    my $rfields = $new_line->get_rfields();
18845
18846    my $hid_equals = 0;
18847
18848    my @new_alignments        = ();
18849    my @new_fields            = ();
18850    my @new_matching_patterns = ();
18851    my @new_matching_tokens   = ();
18852
18853    my $j = 0;
18854    my $k;
18855    my $current_field   = '';
18856    my $current_pattern = '';
18857
18858    # loop over all old tokens
18859    my $in_match = 0;
18860    for ( $k = 0 ; $k < $maximum_field_index ; $k++ ) {
18861        $current_field   .= $$old_rfields[$k];
18862        $current_pattern .= $$old_rpatterns[$k];
18863        last if ( $j > $jmax - 1 );
18864
18865        if ( $$old_rtokens[$k] eq $$rtokens[$j] ) {
18866            $in_match                  = 1;
18867            $new_fields[$j]            = $current_field;
18868            $new_matching_patterns[$j] = $current_pattern;
18869            $current_field             = '';
18870            $current_pattern           = '';
18871            $new_matching_tokens[$j]   = $$old_rtokens[$k];
18872            $new_alignments[$j]        = $old_line->get_alignment($k);
18873            $j++;
18874        }
18875        else {
18876
18877            if ( $$old_rtokens[$k] =~ /^\=\d*$/ ) {
18878                last if ( $case == 2 );    # avoid problems with stuff
18879                                           # like:   $a=$b=$c=$d;
18880                $hid_equals = 1;
18881            }
18882            last
18883              if ( $in_match && $case == 1 )
18884              ;    # disallow gaps in matching field types in case 1
18885        }
18886    }
18887
18888    # Modify the current state if we are successful.
18889    # We must exactly reach the ends of both lists for success.
18890    if (   ( $j == $jmax )
18891        && ( $current_field eq '' )
18892        && ( $case != 1 || $hid_equals ) )
18893    {
18894        $k = $maximum_field_index;
18895        $current_field   .= $$old_rfields[$k];
18896        $current_pattern .= $$old_rpatterns[$k];
18897        $new_fields[$j]            = $current_field;
18898        $new_matching_patterns[$j] = $current_pattern;
18899
18900        $new_alignments[$j] = $old_line->get_alignment($k);
18901        $maximum_field_index = $j;
18902
18903        $old_line->set_alignments(@new_alignments);
18904        $old_line->set_jmax($jmax);
18905        $old_line->set_rtokens( \@new_matching_tokens );
18906        $old_line->set_rfields( \@new_fields );
18907        $old_line->set_rpatterns( \@$rpatterns );
18908    }
18909}
18910
18911# create an empty side comment if none exists
18912sub make_side_comment {
18913    my $new_line  = shift;
18914    my $level_end = shift;
18915    my $jmax      = $new_line->get_jmax();
18916    my $rtokens   = $new_line->get_rtokens();
18917
18918    # if line does not have a side comment...
18919    if ( ( $jmax == 0 ) || ( $$rtokens[ $jmax - 1 ] ne '#' ) ) {
18920        my $rfields   = $new_line->get_rfields();
18921        my $rpatterns = $new_line->get_rpatterns();
18922        $$rtokens[$jmax]     = '#';
18923        $$rfields[ ++$jmax ] = '';
18924        $$rpatterns[$jmax]   = '#';
18925        $new_line->set_jmax($jmax);
18926        $new_line->set_jmax_original_line($jmax);
18927    }
18928
18929    # line has a side comment..
18930    else {
18931
18932        # don't remember old side comment location for very long
18933        my $line_number = $vertical_aligner_self->get_output_line_number();
18934        my $rfields     = $new_line->get_rfields();
18935        if (
18936            $line_number - $last_side_comment_line_number > 12
18937
18938            # and don't remember comment location across block level changes
18939            || ( $level_end < $last_side_comment_level && $$rfields[0] =~ /^}/ )
18940          )
18941        {
18942            forget_side_comment();
18943        }
18944        $last_side_comment_line_number = $line_number;
18945        $last_side_comment_level       = $level_end;
18946    }
18947}
18948
18949sub decide_if_list {
18950
18951    my $line = shift;
18952
18953    # A list will be taken to be a line with a forced break in which all
18954    # of the field separators are commas or comma-arrows (except for the
18955    # trailing #)
18956
18957    # List separator tokens are things like ',3'   or '=>2',
18958    # where the trailing digit is the nesting depth.  Allow braces
18959    # to allow nested list items.
18960    my $rtokens    = $line->get_rtokens();
18961    my $test_token = $$rtokens[0];
18962    if ( $test_token =~ /^(\,|=>)/ ) {
18963        my $list_type = $test_token;
18964        my $jmax      = $line->get_jmax();
18965
18966        foreach ( 1 .. $jmax - 2 ) {
18967            if ( $$rtokens[$_] !~ /^(\,|=>|\{)/ ) {
18968                $list_type = "";
18969                last;
18970            }
18971        }
18972        $line->set_list_type($list_type);
18973    }
18974}
18975
18976sub eliminate_new_fields {
18977
18978    return unless ( $maximum_line_index >= 0 );
18979    my ( $new_line, $old_line ) = @_;
18980    my $jmax = $new_line->get_jmax();
18981
18982    my $old_rtokens = $old_line->get_rtokens();
18983    my $rtokens     = $new_line->get_rtokens();
18984    my $is_assignment =
18985      ( $rtokens->[0] =~ /^=\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] ) );
18986
18987    # must be monotonic variation
18988    return unless ( $is_assignment || $previous_maximum_jmax_seen <= $jmax );
18989
18990    # must be more fields in the new line
18991    my $maximum_field_index = $old_line->get_jmax();
18992    return unless ( $maximum_field_index < $jmax );
18993
18994    unless ($is_assignment) {
18995        return
18996          unless ( $old_line->get_jmax_original_line() == $minimum_jmax_seen )
18997          ;    # only if monotonic
18998
18999        # never combine fields of a comma list
19000        return
19001          unless ( $maximum_field_index > 1 )
19002          && ( $new_line->get_list_type() !~ /^,/ );
19003    }
19004
19005    my $rfields       = $new_line->get_rfields();
19006    my $rpatterns     = $new_line->get_rpatterns();
19007    my $old_rpatterns = $old_line->get_rpatterns();
19008
19009    # loop over all OLD tokens except comment and check match
19010    my $match = 1;
19011    my $k;
19012    for ( $k = 0 ; $k < $maximum_field_index - 1 ; $k++ ) {
19013        if (   ( $$old_rtokens[$k] ne $$rtokens[$k] )
19014            || ( $$old_rpatterns[$k] ne $$rpatterns[$k] ) )
19015        {
19016            $match = 0;
19017            last;
19018        }
19019    }
19020
19021    # first tokens agree, so combine extra new tokens
19022    if ($match) {
19023        for $k ( $maximum_field_index .. $jmax - 1 ) {
19024
19025            $$rfields[ $maximum_field_index - 1 ] .= $$rfields[$k];
19026            $$rfields[$k] = "";
19027            $$rpatterns[ $maximum_field_index - 1 ] .= $$rpatterns[$k];
19028            $$rpatterns[$k] = "";
19029        }
19030
19031        $$rtokens[ $maximum_field_index - 1 ] = '#';
19032        $$rfields[$maximum_field_index]       = $$rfields[$jmax];
19033        $$rpatterns[$maximum_field_index]     = $$rpatterns[$jmax];
19034        $jmax                                 = $maximum_field_index;
19035    }
19036    $new_line->set_jmax($jmax);
19037}
19038
19039sub fix_terminal_ternary {
19040
19041    # Add empty fields as necessary to align a ternary term
19042    # like this:
19043    #
19044    #  my $leapyear =
19045    #      $year % 4   ? 0
19046    #    : $year % 100 ? 1
19047    #    : $year % 400 ? 0
19048    #    :               1;
19049    #
19050    # returns 1 if the terminal item should be indented
19051
19052    my ( $rfields, $rtokens, $rpatterns ) = @_;
19053
19054    my $jmax        = @{$rfields} - 1;
19055    my $old_line    = $group_lines[$maximum_line_index];
19056    my $rfields_old = $old_line->get_rfields();
19057
19058    my $rpatterns_old       = $old_line->get_rpatterns();
19059    my $rtokens_old         = $old_line->get_rtokens();
19060    my $maximum_field_index = $old_line->get_jmax();
19061
19062    # look for the question mark after the :
19063    my ($jquestion);
19064    my $depth_question;
19065    my $pad = "";
19066    for ( my $j = 0 ; $j < $maximum_field_index ; $j++ ) {
19067        my $tok = $rtokens_old->[$j];
19068        if ( $tok =~ /^\?(\d+)$/ ) {
19069            $depth_question = $1;
19070
19071            # depth must be correct
19072            next unless ( $depth_question eq $group_level );
19073
19074            $jquestion = $j;
19075            if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) {
19076                $pad = " " x length($1);
19077            }
19078            else {
19079                return;    # shouldn't happen
19080            }
19081            last;
19082        }
19083    }
19084    return unless ( defined($jquestion) );    # shouldn't happen
19085
19086    # Now splice the tokens and patterns of the previous line
19087    # into the else line to insure a match.  Add empty fields
19088    # as necessary.
19089    my $jadd = $jquestion;
19090
19091    # Work on copies of the actual arrays in case we have
19092    # to return due to an error
19093    my @fields   = @{$rfields};
19094    my @patterns = @{$rpatterns};
19095    my @tokens   = @{$rtokens};
19096
19097    VALIGN_DEBUG_FLAG_TERNARY && do {
19098        local $" = '><';
19099        print "CURRENT FIELDS=<@{$rfields_old}>\n";
19100        print "CURRENT TOKENS=<@{$rtokens_old}>\n";
19101        print "CURRENT PATTERNS=<@{$rpatterns_old}>\n";
19102        print "UNMODIFIED FIELDS=<@{$rfields}>\n";
19103        print "UNMODIFIED TOKENS=<@{$rtokens}>\n";
19104        print "UNMODIFIED PATTERNS=<@{$rpatterns}>\n";
19105    };
19106
19107    # handle cases of leading colon on this line
19108    if ( $fields[0] =~ /^(:\s*)(.*)$/ ) {
19109
19110        my ( $colon, $therest ) = ( $1, $2 );
19111
19112        # Handle sub-case of first field with leading colon plus additional code
19113        # This is the usual situation as at the '1' below:
19114        #  ...
19115        #  : $year % 400 ? 0
19116        #  :               1;
19117        if ($therest) {
19118
19119            # Split the first field after the leading colon and insert padding.
19120            # Note that this padding will remain even if the terminal value goes
19121            # out on a separate line.  This does not seem to look to bad, so no
19122            # mechanism has been included to undo it.
19123            my $field1 = shift @fields;
19124            unshift @fields, ( $colon, $pad . $therest );
19125
19126            # change the leading pattern from : to ?
19127            return unless ( $patterns[0] =~ s/^\:/?/ );
19128
19129            # install leading tokens and patterns of existing line
19130            unshift( @tokens,   @{$rtokens_old}[ 0 .. $jquestion ] );
19131            unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
19132
19133            # insert appropriate number of empty fields
19134            splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
19135        }
19136
19137        # handle sub-case of first field just equal to leading colon.
19138        # This can happen for example in the example below where
19139        # the leading '(' would create a new alignment token
19140        # : ( $name =~ /[]}]$/ ) ? ( $mname = $name )
19141        # :                        ( $mname = $name . '->' );
19142        else {
19143
19144            return unless ( $jmax > 0 && $tokens[0] ne '#' ); # shouldn't happen
19145
19146            # prepend a leading ? onto the second pattern
19147            $patterns[1] = "?b" . $patterns[1];
19148
19149            # pad the second field
19150            $fields[1] = $pad . $fields[1];
19151
19152            # install leading tokens and patterns of existing line, replacing
19153            # leading token and inserting appropriate number of empty fields
19154            splice( @tokens,   0, 1, @{$rtokens_old}[ 0 .. $jquestion ] );
19155            splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] );
19156            splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
19157        }
19158    }
19159
19160    # Handle case of no leading colon on this line.  This will
19161    # be the case when -wba=':' is used.  For example,
19162    #  $year % 400 ? 0 :
19163    #                1;
19164    else {
19165
19166        # install leading tokens and patterns of existing line
19167        $patterns[0] = '?' . 'b' . $patterns[0];
19168        unshift( @tokens,   @{$rtokens_old}[ 0 .. $jquestion ] );
19169        unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
19170
19171        # insert appropriate number of empty fields
19172        $jadd = $jquestion + 1;
19173        $fields[0] = $pad . $fields[0];
19174        splice( @fields, 0, 0, ('') x $jadd ) if $jadd;
19175    }
19176
19177    VALIGN_DEBUG_FLAG_TERNARY && do {
19178        local $" = '><';
19179        print "MODIFIED TOKENS=<@tokens>\n";
19180        print "MODIFIED PATTERNS=<@patterns>\n";
19181        print "MODIFIED FIELDS=<@fields>\n";
19182    };
19183
19184    # all ok .. update the arrays
19185    @{$rfields}   = @fields;
19186    @{$rtokens}   = @tokens;
19187    @{$rpatterns} = @patterns;
19188
19189    # force a flush after this line
19190    return $jquestion;
19191}
19192
19193sub fix_terminal_else {
19194
19195    # Add empty fields as necessary to align a balanced terminal
19196    # else block to a previous if/elsif/unless block,
19197    # like this:
19198    #
19199    #  if   ( 1 || $x ) { print "ok 13\n"; }
19200    #  else             { print "not ok 13\n"; }
19201    #
19202    # returns 1 if the else block should be indented
19203    #
19204    my ( $rfields, $rtokens, $rpatterns ) = @_;
19205    my $jmax = @{$rfields} - 1;
19206    return unless ( $jmax > 0 );
19207
19208    # check for balanced else block following if/elsif/unless
19209    my $rfields_old = $current_line->get_rfields();
19210
19211    # TBD: add handling for 'case'
19212    return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ );
19213
19214    # look for the opening brace after the else, and extrace the depth
19215    my $tok_brace = $rtokens->[0];
19216    my $depth_brace;
19217    if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; }
19218
19219    # probably:  "else # side_comment"
19220    else { return }
19221
19222    my $rpatterns_old       = $current_line->get_rpatterns();
19223    my $rtokens_old         = $current_line->get_rtokens();
19224    my $maximum_field_index = $current_line->get_jmax();
19225
19226    # be sure the previous if/elsif is followed by an opening paren
19227    my $jparen    = 0;
19228    my $tok_paren = '(' . $depth_brace;
19229    my $tok_test  = $rtokens_old->[$jparen];
19230    return unless ( $tok_test eq $tok_paren );    # shouldn't happen
19231
19232    # Now find the opening block brace
19233    my ($jbrace);
19234    for ( my $j = 1 ; $j < $maximum_field_index ; $j++ ) {
19235        my $tok = $rtokens_old->[$j];
19236        if ( $tok eq $tok_brace ) {
19237            $jbrace = $j;
19238            last;
19239        }
19240    }
19241    return unless ( defined($jbrace) );           # shouldn't happen
19242
19243    # Now splice the tokens and patterns of the previous line
19244    # into the else line to insure a match.  Add empty fields
19245    # as necessary.
19246    my $jadd = $jbrace - $jparen;
19247    splice( @{$rtokens},   0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] );
19248    splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] );
19249    splice( @{$rfields}, 1, 0, ('') x $jadd );
19250
19251    # force a flush after this line if it does not follow a case
19252    return $jbrace
19253      unless ( $rfields_old->[0] =~ /^case\s*$/ );
19254}
19255
19256{    # sub check_match
19257    my %is_good_alignment;
19258
19259    BEGIN {
19260
19261        # Vertically aligning on certain "good" tokens is usually okay
19262        # so we can be less restrictive in marginal cases.
19263        @_ = qw( { ? => = );
19264        push @_, (',');
19265        @is_good_alignment{@_} = (1) x scalar(@_);
19266    }
19267
19268    sub check_match {
19269
19270        # See if the current line matches the current vertical alignment group.
19271        # If not, flush the current group.
19272        my $new_line = shift;
19273        my $old_line = shift;
19274
19275        # uses global variables:
19276        #  $previous_minimum_jmax_seen
19277        #  $maximum_jmax_seen
19278        #  $maximum_line_index
19279        #  $marginal_match
19280        my $jmax                = $new_line->get_jmax();
19281        my $maximum_field_index = $old_line->get_jmax();
19282
19283        # flush if this line has too many fields
19284        if ( $jmax > $maximum_field_index ) { goto NO_MATCH }
19285
19286        # flush if adding this line would make a non-monotonic field count
19287        if (
19288            ( $maximum_field_index > $jmax )    # this has too few fields
19289            && (
19290                ( $previous_minimum_jmax_seen <
19291                    $jmax )                     # and wouldn't be monotonic
19292                || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen )
19293            )
19294          )
19295        {
19296            goto NO_MATCH;
19297        }
19298
19299        # otherwise see if this line matches the current group
19300        my $jmax_original_line      = $new_line->get_jmax_original_line();
19301        my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
19302        my $rtokens                 = $new_line->get_rtokens();
19303        my $rfields                 = $new_line->get_rfields();
19304        my $rpatterns               = $new_line->get_rpatterns();
19305        my $list_type               = $new_line->get_list_type();
19306
19307        my $group_list_type = $old_line->get_list_type();
19308        my $old_rpatterns   = $old_line->get_rpatterns();
19309        my $old_rtokens     = $old_line->get_rtokens();
19310
19311        my $jlimit = $jmax - 1;
19312        if ( $maximum_field_index > $jmax ) {
19313            $jlimit = $jmax_original_line;
19314            --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) );
19315        }
19316
19317        # handle comma-separated lists ..
19318        if ( $group_list_type && ( $list_type eq $group_list_type ) ) {
19319            for my $j ( 0 .. $jlimit ) {
19320                my $old_tok = $$old_rtokens[$j];
19321                next unless $old_tok;
19322                my $new_tok = $$rtokens[$j];
19323                next unless $new_tok;
19324
19325                # lists always match ...
19326                # unless they would align any '=>'s with ','s
19327                goto NO_MATCH
19328                  if ( $old_tok =~ /^=>/ && $new_tok =~ /^,/
19329                    || $new_tok =~ /^=>/ && $old_tok =~ /^,/ );
19330            }
19331        }
19332
19333        # do detailed check for everything else except hanging side comments
19334        elsif ( !$is_hanging_side_comment ) {
19335
19336            my $leading_space_count = $new_line->get_leading_space_count();
19337
19338            my $max_pad = 0;
19339            my $min_pad = 0;
19340            my $saw_good_alignment;
19341
19342            for my $j ( 0 .. $jlimit ) {
19343
19344                my $old_tok = $$old_rtokens[$j];
19345                my $new_tok = $$rtokens[$j];
19346
19347                # Note on encoding used for alignment tokens:
19348                # -------------------------------------------
19349                # Tokens are "decorated" with information which can help
19350                # prevent unwanted alignments.  Consider for example the
19351                # following two lines:
19352                #   local ( $xn, $xd ) = split( '/', &'rnorm(@_) );
19353                #   local ( $i, $f ) = &'bdiv( $xn, $xd );
19354                # There are three alignment tokens in each line, a comma,
19355                # an =, and a comma.  In the first line these three tokens
19356                # are encoded as:
19357                #    ,4+local-18     =3      ,4+split-7
19358                # and in the second line they are encoded as
19359                #    ,4+local-18     =3      ,4+&'bdiv-8
19360                # Tokens always at least have token name and nesting
19361                # depth.  So in this example the ='s are at depth 3 and
19362                # the ,'s are at depth 4.  This prevents aligning tokens
19363                # of different depths.  Commas contain additional
19364                # information, as follows:
19365                # ,  {depth} + {container name} - {spaces to opening paren}
19366                # This allows us to reject matching the rightmost commas
19367                # in the above two lines, since they are for different
19368                # function calls.  This encoding is done in
19369                # 'sub send_lines_to_vertical_aligner'.
19370
19371                # Pick off actual token.
19372                # Everything up to the first digit is the actual token.
19373                my $alignment_token = $new_tok;
19374                if ( $alignment_token =~ /^([^\d]+)/ ) { $alignment_token = $1 }
19375
19376                # see if the decorated tokens match
19377                my $tokens_match = $new_tok eq $old_tok
19378
19379                  # Exception for matching terminal : of ternary statement..
19380                  # consider containers prefixed by ? and : a match
19381                  || ( $new_tok =~ /^,\d*\+\:/ && $old_tok =~ /^,\d*\+\?/ );
19382
19383                # No match if the alignment tokens differ...
19384                if ( !$tokens_match ) {
19385
19386                    # ...Unless this is a side comment
19387                    if (
19388                        $j == $jlimit
19389
19390                        # and there is either at least one alignment token
19391                        # or this is a single item following a list.  This
19392                        # latter rule is required for 'December' to join
19393                        # the following list:
19394                        # my (@months) = (
19395                        #     '',       'January',   'February', 'March',
19396                        #     'April',  'May',       'June',     'July',
19397                        #     'August', 'September', 'October',  'November',
19398                        #     'December'
19399                        # );
19400                        # If it doesn't then the -lp formatting will fail.
19401                        && ( $j > 0 || $old_tok =~ /^,/ )
19402                      )
19403                    {
19404                        $marginal_match = 1
19405                          if ( $marginal_match == 0
19406                            && $maximum_line_index == 0 );
19407                        last;
19408                    }
19409
19410                    goto NO_MATCH;
19411                }
19412
19413                # Calculate amount of padding required to fit this in.
19414                # $pad is the number of spaces by which we must increase
19415                # the current field to squeeze in this field.
19416                my $pad =
19417                  length( $$rfields[$j] ) - $old_line->current_field_width($j);
19418                if ( $j == 0 ) { $pad += $leading_space_count; }
19419
19420                # remember max pads to limit marginal cases
19421                if ( $alignment_token ne '#' ) {
19422                    if ( $pad > $max_pad ) { $max_pad = $pad }
19423                    if ( $pad < $min_pad ) { $min_pad = $pad }
19424                }
19425                if ( $is_good_alignment{$alignment_token} ) {
19426                    $saw_good_alignment = 1;
19427                }
19428
19429                # If patterns don't match, we have to be careful...
19430                if ( $$old_rpatterns[$j] ne $$rpatterns[$j] ) {
19431
19432                    # flag this as a marginal match since patterns differ
19433                    $marginal_match = 1
19434                      if ( $marginal_match == 0 && $maximum_line_index == 0 );
19435
19436                    # We have to be very careful about aligning commas
19437                    # when the pattern's don't match, because it can be
19438                    # worse to create an alignment where none is needed
19439                    # than to omit one.  Here's an example where the ','s
19440                    # are not in named continers.  The first line below
19441                    # should not match the next two:
19442                    #   ( $a, $b ) = ( $b, $r );
19443                    #   ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
19444                    #   ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
19445                    if ( $alignment_token eq ',' ) {
19446
19447                       # do not align commas unless they are in named containers
19448                        goto NO_MATCH unless ( $new_tok =~ /[A-Za-z]/ );
19449                    }
19450
19451                    # do not align parens unless patterns match;
19452                    # large ugly spaces can occur in math expressions.
19453                    elsif ( $alignment_token eq '(' ) {
19454
19455                        # But we can allow a match if the parens don't
19456                        # require any padding.
19457                        if ( $pad != 0 ) { goto NO_MATCH }
19458                    }
19459
19460                    # Handle an '=' alignment with different patterns to
19461                    # the left.
19462                    elsif ( $alignment_token eq '=' ) {
19463
19464                        # It is best to be a little restrictive when
19465                        # aligning '=' tokens.  Here is an example of
19466                        # two lines that we will not align:
19467                        #       my $variable=6;
19468                        #       $bb=4;
19469                        # The problem is that one is a 'my' declaration,
19470                        # and the other isn't, so they're not very similar.
19471                        # We will filter these out by comparing the first
19472                        # letter of the pattern.  This is crude, but works
19473                        # well enough.
19474                        if (
19475                            substr( $$old_rpatterns[$j], 0, 1 ) ne
19476                            substr( $$rpatterns[$j], 0, 1 ) )
19477                        {
19478                            goto NO_MATCH;
19479                        }
19480
19481                        # If we pass that test, we'll call it a marginal match.
19482                        # Here is an example of a marginal match:
19483                        #       $done{$$op} = 1;
19484                        #       $op         = compile_bblock($op);
19485                        # The left tokens are both identifiers, but
19486                        # one accesses a hash and the other doesn't.
19487                        # We'll let this be a tentative match and undo
19488                        # it later if we don't find more than 2 lines
19489                        # in the group.
19490                        elsif ( $maximum_line_index == 0 ) {
19491                            $marginal_match =
19492                              2;    # =2 prevents being undone below
19493                        }
19494                    }
19495                }
19496
19497                # Don't let line with fewer fields increase column widths
19498                # ( align3.t )
19499                if ( $maximum_field_index > $jmax ) {
19500
19501                    # Exception: suspend this rule to allow last lines to join
19502                    if ( $pad > 0 ) { goto NO_MATCH; }
19503                }
19504            } ## end for my $j ( 0 .. $jlimit)
19505
19506            # Turn off the "marginal match" flag in some cases...
19507            # A "marginal match" occurs when the alignment tokens agree
19508            # but there are differences in the other tokens (patterns).
19509            # If we leave the marginal match flag set, then the rule is that we
19510            # will align only if there are more than two lines in the group.
19511            # We will turn of the flag if we almost have a match
19512            # and either we have seen a good alignment token or we
19513            # just need a small pad (2 spaces) to fit.  These rules are
19514            # the result of experimentation.  Tokens which misaligned by just
19515            # one or two characters are annoying.  On the other hand,
19516            # large gaps to less important alignment tokens are also annoying.
19517            if (   $marginal_match == 1
19518                && $jmax == $maximum_field_index
19519                && ( $saw_good_alignment || ( $max_pad < 3 && $min_pad > -3 ) )
19520              )
19521            {
19522                $marginal_match = 0;
19523            }
19524            ##print "marginal=$marginal_match saw=$saw_good_alignment jmax=$jmax max=$maximum_field_index maxpad=$max_pad minpad=$min_pad\n";
19525        }
19526
19527        # We have a match (even if marginal).
19528        # If the current line has fewer fields than the current group
19529        # but otherwise matches, copy the remaining group fields to
19530        # make it a perfect match.
19531        if ( $maximum_field_index > $jmax ) {
19532            my $comment = $$rfields[$jmax];
19533            for $jmax ( $jlimit .. $maximum_field_index ) {
19534                $$rtokens[$jmax]     = $$old_rtokens[$jmax];
19535                $$rfields[ ++$jmax ] = '';
19536                $$rpatterns[$jmax]   = $$old_rpatterns[$jmax];
19537            }
19538            $$rfields[$jmax] = $comment;
19539            $new_line->set_jmax($jmax);
19540        }
19541        return;
19542
19543      NO_MATCH:
19544        ##print "BUBBA: no match jmax=$jmax  max=$maximum_field_index $group_list_type lines=$maximum_line_index token=$$old_rtokens[0]\n";
19545        my_flush();
19546        return;
19547    }
19548}
19549
19550sub check_fit {
19551
19552    return unless ( $maximum_line_index >= 0 );
19553    my $new_line = shift;
19554    my $old_line = shift;
19555
19556    my $jmax                    = $new_line->get_jmax();
19557    my $leading_space_count     = $new_line->get_leading_space_count();
19558    my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
19559    my $rtokens                 = $new_line->get_rtokens();
19560    my $rfields                 = $new_line->get_rfields();
19561    my $rpatterns               = $new_line->get_rpatterns();
19562
19563    my $group_list_type = $group_lines[0]->get_list_type();
19564
19565    my $padding_so_far    = 0;
19566    my $padding_available = $old_line->get_available_space_on_right();
19567
19568    # save current columns in case this doesn't work
19569    save_alignment_columns();
19570
19571    my ( $j, $pad, $eight );
19572    my $maximum_field_index = $old_line->get_jmax();
19573    for $j ( 0 .. $jmax ) {
19574
19575        $pad = length( $$rfields[$j] ) - $old_line->current_field_width($j);
19576
19577        if ( $j == 0 ) {
19578            $pad += $leading_space_count;
19579        }
19580
19581        # remember largest gap of the group, excluding gap to side comment
19582        if (   $pad < 0
19583            && $group_maximum_gap < -$pad
19584            && $j > 0
19585            && $j < $jmax - 1 )
19586        {
19587            $group_maximum_gap = -$pad;
19588        }
19589
19590        next if $pad < 0;
19591
19592        ## This patch helps sometimes, but it doesn't check to see if
19593        ## the line is too long even without the side comment.  It needs
19594        ## to be reworked.
19595        ##don't let a long token with no trailing side comment push
19596        ##side comments out, or end a group.  (sidecmt1.t)
19597        ##next if ($j==$jmax-1 && length($$rfields[$jmax])==0);
19598
19599        # This line will need space; lets see if we want to accept it..
19600        if (
19601
19602            # not if this won't fit
19603            ( $pad > $padding_available )
19604
19605            # previously, there were upper bounds placed on padding here
19606            # (maximum_whitespace_columns), but they were not really helpful
19607
19608          )
19609        {
19610
19611            # revert to starting state then flush; things didn't work out
19612            restore_alignment_columns();
19613            my_flush();
19614            last;
19615        }
19616
19617        # patch to avoid excessive gaps in previous lines,
19618        # due to a line of fewer fields.
19619        #   return join( ".",
19620        #       $self->{"dfi"},  $self->{"aa"}, $self->rsvd,     $self->{"rd"},
19621        #       $self->{"area"}, $self->{"id"}, $self->{"sel"} );
19622        next if ( $jmax < $maximum_field_index && $j == $jmax - 1 );
19623
19624        # looks ok, squeeze this field in
19625        $old_line->increase_field_width( $j, $pad );
19626        $padding_available -= $pad;
19627
19628        # remember largest gap of the group, excluding gap to side comment
19629        if ( $pad > $group_maximum_gap && $j > 0 && $j < $jmax - 1 ) {
19630            $group_maximum_gap = $pad;
19631        }
19632    }
19633}
19634
19635sub accept_line {
19636
19637    # The current line either starts a new alignment group or is
19638    # accepted into the current alignment group.
19639    my $new_line = shift;
19640    $group_lines[ ++$maximum_line_index ] = $new_line;
19641
19642    # initialize field lengths if starting new group
19643    if ( $maximum_line_index == 0 ) {
19644
19645        my $jmax    = $new_line->get_jmax();
19646        my $rfields = $new_line->get_rfields();
19647        my $rtokens = $new_line->get_rtokens();
19648        my $j;
19649        my $col = $new_line->get_leading_space_count();
19650
19651        for $j ( 0 .. $jmax ) {
19652            $col += length( $$rfields[$j] );
19653
19654            # create initial alignments for the new group
19655            my $token = "";
19656            if ( $j < $jmax ) { $token = $$rtokens[$j] }
19657            my $alignment = make_alignment( $col, $token );
19658            $new_line->set_alignment( $j, $alignment );
19659        }
19660
19661        $maximum_jmax_seen = $jmax;
19662        $minimum_jmax_seen = $jmax;
19663    }
19664
19665    # use previous alignments otherwise
19666    else {
19667        my @new_alignments =
19668          $group_lines[ $maximum_line_index - 1 ]->get_alignments();
19669        $new_line->set_alignments(@new_alignments);
19670    }
19671
19672    # remember group jmax extremes for next call to append_line
19673    $previous_minimum_jmax_seen = $minimum_jmax_seen;
19674    $previous_maximum_jmax_seen = $maximum_jmax_seen;
19675}
19676
19677sub dump_array {
19678
19679    # debug routine to dump array contents
19680    local $" = ')(';
19681    print "(@_)\n";
19682}
19683
19684# flush() sends the current Perl::Tidy::VerticalAligner group down the
19685# pipeline to Perl::Tidy::FileWriter.
19686
19687# This is the external flush, which also empties the cache
19688sub flush {
19689
19690    if ( $maximum_line_index < 0 ) {
19691        if ($cached_line_type) {
19692            $seqno_string = $cached_seqno_string;
19693            entab_and_output( $cached_line_text,
19694                $cached_line_leading_space_count,
19695                $last_group_level_written );
19696            $cached_line_type    = 0;
19697            $cached_line_text    = "";
19698            $cached_seqno_string = "";
19699        }
19700    }
19701    else {
19702        my_flush();
19703    }
19704}
19705
19706# This is the internal flush, which leaves the cache intact
19707sub my_flush {
19708
19709    return if ( $maximum_line_index < 0 );
19710
19711    # handle a group of comment lines
19712    if ( $group_type eq 'COMMENT' ) {
19713
19714        VALIGN_DEBUG_FLAG_APPEND0 && do {
19715            my ( $a, $b, $c ) = caller();
19716            print
19717"APPEND0: Flush called from $a $b $c for COMMENT group: lines=$maximum_line_index \n";
19718
19719        };
19720        my $leading_space_count = $comment_leading_space_count;
19721        my $leading_string      = get_leading_string($leading_space_count);
19722
19723        # zero leading space count if any lines are too long
19724        my $max_excess = 0;
19725        for my $i ( 0 .. $maximum_line_index ) {
19726            my $str = $group_lines[$i];
19727            my $excess =
19728              length($str) + $leading_space_count - $rOpts_maximum_line_length;
19729            if ( $excess > $max_excess ) {
19730                $max_excess = $excess;
19731            }
19732        }
19733
19734        if ( $max_excess > 0 ) {
19735            $leading_space_count -= $max_excess;
19736            if ( $leading_space_count < 0 ) { $leading_space_count = 0 }
19737            $last_outdented_line_at =
19738              $file_writer_object->get_output_line_number();
19739            unless ($outdented_line_count) {
19740                $first_outdented_line_at = $last_outdented_line_at;
19741            }
19742            $outdented_line_count += ( $maximum_line_index + 1 );
19743        }
19744
19745        # write the group of lines
19746        my $outdent_long_lines = 0;
19747        for my $i ( 0 .. $maximum_line_index ) {
19748            write_leader_and_string( $leading_space_count, $group_lines[$i], 0,
19749                $outdent_long_lines, "" );
19750        }
19751    }
19752
19753    # handle a group of code lines
19754    else {
19755
19756        VALIGN_DEBUG_FLAG_APPEND0 && do {
19757            my $group_list_type = $group_lines[0]->get_list_type();
19758            my ( $a, $b, $c ) = caller();
19759            my $maximum_field_index = $group_lines[0]->get_jmax();
19760            print
19761"APPEND0: Flush called from $a $b $c fields=$maximum_field_index list=$group_list_type lines=$maximum_line_index extra=$extra_indent_ok\n";
19762
19763        };
19764
19765        # some small groups are best left unaligned
19766        my $do_not_align = decide_if_aligned();
19767
19768        # optimize side comment location
19769        $do_not_align = adjust_side_comment($do_not_align);
19770
19771        # recover spaces for -lp option if possible
19772        my $extra_leading_spaces = get_extra_leading_spaces();
19773
19774        # all lines of this group have the same basic leading spacing
19775        my $group_leader_length = $group_lines[0]->get_leading_space_count();
19776
19777        # add extra leading spaces if helpful
19778        my $min_ci_gap = improve_continuation_indentation( $do_not_align,
19779            $group_leader_length );
19780
19781        # loop to output all lines
19782        for my $i ( 0 .. $maximum_line_index ) {
19783            my $line = $group_lines[$i];
19784            write_vertically_aligned_line( $line, $min_ci_gap, $do_not_align,
19785                $group_leader_length, $extra_leading_spaces );
19786        }
19787    }
19788    initialize_for_new_group();
19789}
19790
19791sub decide_if_aligned {
19792
19793    # Do not try to align two lines which are not really similar
19794    return unless $maximum_line_index == 1;
19795    return if ($is_matching_terminal_line);
19796
19797    my $group_list_type = $group_lines[0]->get_list_type();
19798
19799    my $do_not_align = (
19800
19801        # always align lists
19802        !$group_list_type
19803
19804          && (
19805
19806            # don't align if it was just a marginal match
19807            $marginal_match
19808
19809            # don't align two lines with big gap
19810            || $group_maximum_gap > 12
19811
19812            # or lines with differing number of alignment tokens
19813            # TODO: this could be improved.  It occasionally rejects
19814            # good matches.
19815            || $previous_maximum_jmax_seen != $previous_minimum_jmax_seen
19816          )
19817    );
19818
19819    # But try to convert them into a simple comment group if the first line
19820    # a has side comment
19821    my $rfields             = $group_lines[0]->get_rfields();
19822    my $maximum_field_index = $group_lines[0]->get_jmax();
19823    if (   $do_not_align
19824        && ( $maximum_line_index > 0 )
19825        && ( length( $$rfields[$maximum_field_index] ) > 0 ) )
19826    {
19827        combine_fields();
19828        $do_not_align = 0;
19829    }
19830    return $do_not_align;
19831}
19832
19833sub adjust_side_comment {
19834
19835    my $do_not_align = shift;
19836
19837    # let's see if we can move the side comment field out a little
19838    # to improve readability (the last field is always a side comment field)
19839    my $have_side_comment       = 0;
19840    my $first_side_comment_line = -1;
19841    my $maximum_field_index     = $group_lines[0]->get_jmax();
19842    for my $i ( 0 .. $maximum_line_index ) {
19843        my $line = $group_lines[$i];
19844
19845        if ( length( $line->get_rfields()->[$maximum_field_index] ) ) {
19846            $have_side_comment       = 1;
19847            $first_side_comment_line = $i;
19848            last;
19849        }
19850    }
19851
19852    my $kmax = $maximum_field_index + 1;
19853
19854    if ($have_side_comment) {
19855
19856        my $line = $group_lines[0];
19857
19858        # the maximum space without exceeding the line length:
19859        my $avail = $line->get_available_space_on_right();
19860
19861        # try to use the previous comment column
19862        my $side_comment_column = $line->get_column( $kmax - 2 );
19863        my $move                = $last_comment_column - $side_comment_column;
19864
19865##        my $sc_line0 = $side_comment_history[0]->[0];
19866##        my $sc_col0  = $side_comment_history[0]->[1];
19867##        my $sc_line1 = $side_comment_history[1]->[0];
19868##        my $sc_col1  = $side_comment_history[1]->[1];
19869##        my $sc_line2 = $side_comment_history[2]->[0];
19870##        my $sc_col2  = $side_comment_history[2]->[1];
19871##
19872##        # FUTURE UPDATES:
19873##        # Be sure to ignore 'do not align' and  '} # end comments'
19874##        # Find first $move > 0 and $move <= $avail as follows:
19875##        # 1. try sc_col1 if sc_col1 == sc_col0 && (line-sc_line0) < 12
19876##        # 2. try sc_col2 if (line-sc_line2) < 12
19877##        # 3. try min possible space, plus up to 8,
19878##        # 4. try min possible space
19879
19880        if ( $kmax > 0 && !$do_not_align ) {
19881
19882            # but if this doesn't work, give up and use the minimum space
19883            if ( $move > $avail ) {
19884                $move = $rOpts_minimum_space_to_comment - 1;
19885            }
19886
19887            # but we want some minimum space to the comment
19888            my $min_move = $rOpts_minimum_space_to_comment - 1;
19889            if (   $move >= 0
19890                && $last_side_comment_length > 0
19891                && ( $first_side_comment_line == 0 )
19892                && $group_level == $last_group_level_written )
19893            {
19894                $min_move = 0;
19895            }
19896
19897            if ( $move < $min_move ) {
19898                $move = $min_move;
19899            }
19900
19901            # prevously, an upper bound was placed on $move here,
19902            # (maximum_space_to_comment), but it was not helpful
19903
19904            # don't exceed the available space
19905            if ( $move > $avail ) { $move = $avail }
19906
19907            # we can only increase space, never decrease
19908            if ( $move > 0 ) {
19909                $line->increase_field_width( $maximum_field_index - 1, $move );
19910            }
19911
19912            # remember this column for the next group
19913            $last_comment_column = $line->get_column( $kmax - 2 );
19914        }
19915        else {
19916
19917            # try to at least line up the existing side comment location
19918            if ( $kmax > 0 && $move > 0 && $move < $avail ) {
19919                $line->increase_field_width( $maximum_field_index - 1, $move );
19920                $do_not_align = 0;
19921            }
19922
19923            # reset side comment column if we can't align
19924            else {
19925                forget_side_comment();
19926            }
19927        }
19928    }
19929    return $do_not_align;
19930}
19931
19932sub improve_continuation_indentation {
19933    my ( $do_not_align, $group_leader_length ) = @_;
19934
19935    # See if we can increase the continuation indentation
19936    # to move all continuation lines closer to the next field
19937    # (unless it is a comment).
19938    #
19939    # '$min_ci_gap'is the extra indentation that we may need to introduce.
19940    # We will only introduce this to fields which already have some ci.
19941    # Without this variable, we would occasionally get something like this
19942    # (Complex.pm):
19943    #
19944    # use overload '+' => \&plus,
19945    #   '-'            => \&minus,
19946    #   '*'            => \&multiply,
19947    #   ...
19948    #   'tan'          => \&tan,
19949    #   'atan2'        => \&atan2,
19950    #
19951    # Whereas with this variable, we can shift variables over to get this:
19952    #
19953    # use overload '+' => \&plus,
19954    #          '-'     => \&minus,
19955    #          '*'     => \&multiply,
19956    #          ...
19957    #          'tan'   => \&tan,
19958    #          'atan2' => \&atan2,
19959
19960    ## BUB: Deactivated####################
19961    # The trouble with this patch is that it may, for example,
19962    # move in some 'or's  or ':'s, and leave some out, so that the
19963    # left edge alignment suffers.
19964    return 0;
19965    ###########################################
19966
19967    my $maximum_field_index = $group_lines[0]->get_jmax();
19968
19969    my $min_ci_gap = $rOpts_maximum_line_length;
19970    if ( $maximum_field_index > 1 && !$do_not_align ) {
19971
19972        for my $i ( 0 .. $maximum_line_index ) {
19973            my $line                = $group_lines[$i];
19974            my $leading_space_count = $line->get_leading_space_count();
19975            my $rfields             = $line->get_rfields();
19976
19977            my $gap =
19978              $line->get_column(0) -
19979              $leading_space_count -
19980              length( $$rfields[0] );
19981
19982            if ( $leading_space_count > $group_leader_length ) {
19983                if ( $gap < $min_ci_gap ) { $min_ci_gap = $gap }
19984            }
19985        }
19986
19987        if ( $min_ci_gap >= $rOpts_maximum_line_length ) {
19988            $min_ci_gap = 0;
19989        }
19990    }
19991    else {
19992        $min_ci_gap = 0;
19993    }
19994    return $min_ci_gap;
19995}
19996
19997sub write_vertically_aligned_line {
19998
19999    my ( $line, $min_ci_gap, $do_not_align, $group_leader_length,
20000        $extra_leading_spaces )
20001      = @_;
20002    my $rfields                   = $line->get_rfields();
20003    my $leading_space_count       = $line->get_leading_space_count();
20004    my $outdent_long_lines        = $line->get_outdent_long_lines();
20005    my $maximum_field_index       = $line->get_jmax();
20006    my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags();
20007
20008    # add any extra spaces
20009    if ( $leading_space_count > $group_leader_length ) {
20010        $leading_space_count += $min_ci_gap;
20011    }
20012
20013    my $str = $$rfields[0];
20014
20015    # loop to concatenate all fields of this line and needed padding
20016    my $total_pad_count = 0;
20017    my ( $j, $pad );
20018    for $j ( 1 .. $maximum_field_index ) {
20019
20020        # skip zero-length side comments
20021        last
20022          if ( ( $j == $maximum_field_index )
20023            && ( !defined( $$rfields[$j] ) || ( length( $$rfields[$j] ) == 0 ) )
20024          );
20025
20026        # compute spaces of padding before this field
20027        my $col = $line->get_column( $j - 1 );
20028        $pad = $col - ( length($str) + $leading_space_count );
20029
20030        if ($do_not_align) {
20031            $pad =
20032              ( $j < $maximum_field_index )
20033              ? 0
20034              : $rOpts_minimum_space_to_comment - 1;
20035        }
20036
20037        # if the -fpsc flag is set, move the side comment to the selected
20038        # column if and only if it is possible, ignoring constraints on
20039        # line length and minimum space to comment
20040        if ( $rOpts_fixed_position_side_comment && $j == $maximum_field_index )
20041        {
20042            my $newpad = $pad + $rOpts_fixed_position_side_comment - $col - 1;
20043            if ( $newpad >= 0 ) { $pad = $newpad; }
20044        }
20045
20046        # accumulate the padding
20047        if ( $pad > 0 ) { $total_pad_count += $pad; }
20048
20049        # add this field
20050        if ( !defined $$rfields[$j] ) {
20051            write_diagnostics("UNDEFined field at j=$j\n");
20052        }
20053
20054        # only add padding when we have a finite field;
20055        # this avoids extra terminal spaces if we have empty fields
20056        if ( length( $$rfields[$j] ) > 0 ) {
20057            $str .= ' ' x $total_pad_count;
20058            $total_pad_count = 0;
20059            $str .= $$rfields[$j];
20060        }
20061        else {
20062            $total_pad_count = 0;
20063        }
20064
20065        # update side comment history buffer
20066        if ( $j == $maximum_field_index ) {
20067            my $lineno = $file_writer_object->get_output_line_number();
20068            shift @side_comment_history;
20069            push @side_comment_history, [ $lineno, $col ];
20070        }
20071    }
20072
20073    my $side_comment_length = ( length( $$rfields[$maximum_field_index] ) );
20074
20075    # ship this line off
20076    write_leader_and_string( $leading_space_count + $extra_leading_spaces,
20077        $str, $side_comment_length, $outdent_long_lines,
20078        $rvertical_tightness_flags );
20079}
20080
20081sub get_extra_leading_spaces {
20082
20083    #----------------------------------------------------------
20084    # Define any extra indentation space (for the -lp option).
20085    # Here is why:
20086    # If a list has side comments, sub scan_list must dump the
20087    # list before it sees everything.  When this happens, it sets
20088    # the indentation to the standard scheme, but notes how
20089    # many spaces it would have liked to use.  We may be able
20090    # to recover that space here in the event that that all of the
20091    # lines of a list are back together again.
20092    #----------------------------------------------------------
20093
20094    my $extra_leading_spaces = 0;
20095    if ($extra_indent_ok) {
20096        my $object = $group_lines[0]->get_indentation();
20097        if ( ref($object) ) {
20098            my $extra_indentation_spaces_wanted =
20099              get_RECOVERABLE_SPACES($object);
20100
20101            # all indentation objects must be the same
20102            my $i;
20103            for $i ( 1 .. $maximum_line_index ) {
20104                if ( $object != $group_lines[$i]->get_indentation() ) {
20105                    $extra_indentation_spaces_wanted = 0;
20106                    last;
20107                }
20108            }
20109
20110            if ($extra_indentation_spaces_wanted) {
20111
20112                # the maximum space without exceeding the line length:
20113                my $avail = $group_lines[0]->get_available_space_on_right();
20114                $extra_leading_spaces =
20115                  ( $avail > $extra_indentation_spaces_wanted )
20116                  ? $extra_indentation_spaces_wanted
20117                  : $avail;
20118
20119                # update the indentation object because with -icp the terminal
20120                # ');' will use the same adjustment.
20121                $object->permanently_decrease_AVAILABLE_SPACES(
20122                    -$extra_leading_spaces );
20123            }
20124        }
20125    }
20126    return $extra_leading_spaces;
20127}
20128
20129sub combine_fields {
20130
20131    # combine all fields except for the comment field  ( sidecmt.t )
20132    # Uses global variables:
20133    #  @group_lines
20134    #  $maximum_line_index
20135    my ( $j, $k );
20136    my $maximum_field_index = $group_lines[0]->get_jmax();
20137    for ( $j = 0 ; $j <= $maximum_line_index ; $j++ ) {
20138        my $line    = $group_lines[$j];
20139        my $rfields = $line->get_rfields();
20140        foreach ( 1 .. $maximum_field_index - 1 ) {
20141            $$rfields[0] .= $$rfields[$_];
20142        }
20143        $$rfields[1] = $$rfields[$maximum_field_index];
20144
20145        $line->set_jmax(1);
20146        $line->set_column( 0, 0 );
20147        $line->set_column( 1, 0 );
20148
20149    }
20150    $maximum_field_index = 1;
20151
20152    for $j ( 0 .. $maximum_line_index ) {
20153        my $line    = $group_lines[$j];
20154        my $rfields = $line->get_rfields();
20155        for $k ( 0 .. $maximum_field_index ) {
20156            my $pad = length( $$rfields[$k] ) - $line->current_field_width($k);
20157            if ( $k == 0 ) {
20158                $pad += $group_lines[$j]->get_leading_space_count();
20159            }
20160
20161            if ( $pad > 0 ) { $line->increase_field_width( $k, $pad ) }
20162
20163        }
20164    }
20165}
20166
20167sub get_output_line_number {
20168
20169    # the output line number reported to a caller is the number of items
20170    # written plus the number of items in the buffer
20171    my $self = shift;
20172    1 + $maximum_line_index + $file_writer_object->get_output_line_number();
20173}
20174
20175sub write_leader_and_string {
20176
20177    my ( $leading_space_count, $str, $side_comment_length, $outdent_long_lines,
20178        $rvertical_tightness_flags )
20179      = @_;
20180
20181    # handle outdenting of long lines:
20182    if ($outdent_long_lines) {
20183        my $excess =
20184          length($str) -
20185          $side_comment_length +
20186          $leading_space_count -
20187          $rOpts_maximum_line_length;
20188        if ( $excess > 0 ) {
20189            $leading_space_count = 0;
20190            $last_outdented_line_at =
20191              $file_writer_object->get_output_line_number();
20192
20193            unless ($outdented_line_count) {
20194                $first_outdented_line_at = $last_outdented_line_at;
20195            }
20196            $outdented_line_count++;
20197        }
20198    }
20199
20200    # Make preliminary leading whitespace.  It could get changed
20201    # later by entabbing, so we have to keep track of any changes
20202    # to the leading_space_count from here on.
20203    my $leading_string =
20204      $leading_space_count > 0 ? ( ' ' x $leading_space_count ) : "";
20205
20206    # Unpack any recombination data; it was packed by
20207    # sub send_lines_to_vertical_aligner. Contents:
20208    #
20209    #   [0] type: 1=opening  2=closing  3=opening block brace
20210    #   [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
20211    #             if closing: spaces of padding to use
20212    #   [2] sequence number of container
20213    #   [3] valid flag: do not append if this flag is false
20214    #
20215    my ( $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
20216        $seqno_end );
20217    if ($rvertical_tightness_flags) {
20218        (
20219            $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
20220            $seqno_end
20221        ) = @{$rvertical_tightness_flags};
20222    }
20223
20224    $seqno_string = $seqno_end;
20225
20226    # handle any cached line ..
20227    # either append this line to it or write it out
20228    if ( length($cached_line_text) ) {
20229
20230        if ( !$cached_line_valid ) {
20231            entab_and_output( $cached_line_text,
20232                $cached_line_leading_space_count,
20233                $last_group_level_written );
20234        }
20235
20236        # handle cached line with opening container token
20237        elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {
20238
20239            my $gap = $leading_space_count - length($cached_line_text);
20240
20241            # handle option of just one tight opening per line:
20242            if ( $cached_line_flag == 1 ) {
20243                if ( defined($open_or_close) && $open_or_close == 1 ) {
20244                    $gap = -1;
20245                }
20246            }
20247
20248            if ( $gap >= 0 ) {
20249                $leading_string      = $cached_line_text . ' ' x $gap;
20250                $leading_space_count = $cached_line_leading_space_count;
20251                $seqno_string        = $cached_seqno_string . ':' . $seqno_beg;
20252            }
20253            else {
20254                entab_and_output( $cached_line_text,
20255                    $cached_line_leading_space_count,
20256                    $last_group_level_written );
20257            }
20258        }
20259
20260        # handle cached line to place before this closing container token
20261        else {
20262            my $test_line = $cached_line_text . ' ' x $cached_line_flag . $str;
20263
20264            if ( length($test_line) <= $rOpts_maximum_line_length ) {
20265
20266                $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
20267
20268                # Patch to outdent closing tokens ending # in ');'
20269                # If we are joining a line like ');' to a previous stacked
20270                # set of closing tokens, then decide if we may outdent the
20271                # combined stack to the indentation of the ');'.  Since we
20272                # should not normally outdent any of the other tokens more than
20273                # the indentation of the lines that contained them, we will
20274                # only do this if all of the corresponding opening
20275                # tokens were on the same line.  This can happen with
20276                # -sot and -sct.  For example, it is ok here:
20277                #   __PACKAGE__->load_components( qw(
20278                #         PK::Auto
20279                #         Core
20280                #   ));
20281                #
20282                #   But, for example, we do not outdent in this example because
20283                #   that would put the closing sub brace out farther than the
20284                #   opening sub brace:
20285                #
20286                #   perltidy -sot -sct
20287                #   $c->Tk::bind(
20288                #       '<Control-f>' => sub {
20289                #           my ($c) = @_;
20290                #           my $e = $c->XEvent;
20291                #           itemsUnderArea $c;
20292                #       } );
20293                #
20294                if ( $str =~ /^\);/ && $cached_line_text =~ /^[\)\}\]\s]*$/ ) {
20295
20296                    # The way to tell this is if the stacked sequence numbers
20297                    # of this output line are the reverse of the stacked
20298                    # sequence numbers of the previous non-blank line of
20299                    # sequence numbers.  So we can join if the previous
20300                    # nonblank string of tokens is the mirror image.  For
20301                    # example if stack )}] is 13:8:6 then we are looking for a
20302                    # leading stack like [{( which is 6:8:13 We only need to
20303                    # check the two ends, because the intermediate tokens must
20304                    # fall in order.  Note on speed: having to split on colons
20305                    # and eliminate multiple colons might appear to be slow,
20306                    # but it's not an issue because we almost never come
20307                    # through here.  In a typical file we don't.
20308                    $seqno_string               =~ s/^:+//;
20309                    $last_nonblank_seqno_string =~ s/^:+//;
20310                    $seqno_string               =~ s/:+/:/g;
20311                    $last_nonblank_seqno_string =~ s/:+/:/g;
20312
20313                    # how many spaces can we outdent?
20314                    my $diff =
20315                      $cached_line_leading_space_count - $leading_space_count;
20316                    if (   $diff > 0
20317                        && length($seqno_string)
20318                        && length($last_nonblank_seqno_string) ==
20319                        length($seqno_string) )
20320                    {
20321                        my @seqno_last =
20322                          ( split ':', $last_nonblank_seqno_string );
20323                        my @seqno_now = ( split ':', $seqno_string );
20324                        if (   $seqno_now[-1] == $seqno_last[0]
20325                            && $seqno_now[0] == $seqno_last[-1] )
20326                        {
20327
20328                            # OK to outdent ..
20329                            # for absolute safety, be sure we only remove
20330                            # whitespace
20331                            my $ws = substr( $test_line, 0, $diff );
20332                            if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
20333
20334                                $test_line = substr( $test_line, $diff );
20335                                $cached_line_leading_space_count -= $diff;
20336                            }
20337
20338                            # shouldn't happen, but not critical:
20339                            ##else {
20340                            ## ERROR transferring indentation here
20341                            ##}
20342                        }
20343                    }
20344                }
20345
20346                $str                 = $test_line;
20347                $leading_string      = "";
20348                $leading_space_count = $cached_line_leading_space_count;
20349            }
20350            else {
20351                entab_and_output( $cached_line_text,
20352                    $cached_line_leading_space_count,
20353                    $last_group_level_written );
20354            }
20355        }
20356    }
20357    $cached_line_type = 0;
20358    $cached_line_text = "";
20359
20360    # make the line to be written
20361    my $line = $leading_string . $str;
20362
20363    # write or cache this line
20364    if ( !$open_or_close || $side_comment_length > 0 ) {
20365        entab_and_output( $line, $leading_space_count, $group_level );
20366    }
20367    else {
20368        $cached_line_text                = $line;
20369        $cached_line_type                = $open_or_close;
20370        $cached_line_flag                = $tightness_flag;
20371        $cached_seqno                    = $seqno;
20372        $cached_line_valid               = $valid;
20373        $cached_line_leading_space_count = $leading_space_count;
20374        $cached_seqno_string             = $seqno_string;
20375    }
20376
20377    $last_group_level_written = $group_level;
20378    $last_side_comment_length = $side_comment_length;
20379    $extra_indent_ok          = 0;
20380}
20381
20382sub entab_and_output {
20383    my ( $line, $leading_space_count, $level ) = @_;
20384
20385    # The line is currently correct if there is no tabbing (recommended!)
20386    # We may have to lop off some leading spaces and replace with tabs.
20387    if ( $leading_space_count > 0 ) {
20388
20389        # Nothing to do if no tabs
20390        if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
20391            || $rOpts_indent_columns <= 0 )
20392        {
20393
20394            # nothing to do
20395        }
20396
20397        # Handle entab option
20398        elsif ($rOpts_entab_leading_whitespace) {
20399            my $space_count =
20400              $leading_space_count % $rOpts_entab_leading_whitespace;
20401            my $tab_count =
20402              int( $leading_space_count / $rOpts_entab_leading_whitespace );
20403            my $leading_string = "\t" x $tab_count . ' ' x $space_count;
20404            if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
20405                substr( $line, 0, $leading_space_count ) = $leading_string;
20406            }
20407            else {
20408
20409                # REMOVE AFTER TESTING
20410                # shouldn't happen - program error counting whitespace
20411                # we'll skip entabbing
20412                warning(
20413"Error entabbing in entab_and_output: expected count=$leading_space_count\n"
20414                );
20415            }
20416        }
20417
20418        # Handle option of one tab per level
20419        else {
20420            my $leading_string = ( "\t" x $level );
20421            my $space_count =
20422              $leading_space_count - $level * $rOpts_indent_columns;
20423
20424            # shouldn't happen:
20425            if ( $space_count < 0 ) {
20426                warning(
20427"Error entabbing in append_line: for level=$group_level count=$leading_space_count\n"
20428                );
20429                $leading_string = ( ' ' x $leading_space_count );
20430            }
20431            else {
20432                $leading_string .= ( ' ' x $space_count );
20433            }
20434            if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
20435                substr( $line, 0, $leading_space_count ) = $leading_string;
20436            }
20437            else {
20438
20439                # REMOVE AFTER TESTING
20440                # shouldn't happen - program error counting whitespace
20441                # we'll skip entabbing
20442                warning(
20443"Error entabbing in entab_and_output: expected count=$leading_space_count\n"
20444                );
20445            }
20446        }
20447    }
20448    $file_writer_object->write_code_line( $line . "\n" );
20449    if ($seqno_string) {
20450        $last_nonblank_seqno_string = $seqno_string;
20451    }
20452}
20453
20454{    # begin get_leading_string
20455
20456    my @leading_string_cache;
20457
20458    sub get_leading_string {
20459
20460        # define the leading whitespace string for this line..
20461        my $leading_whitespace_count = shift;
20462
20463        # Handle case of zero whitespace, which includes multi-line quotes
20464        # (which may have a finite level; this prevents tab problems)
20465        if ( $leading_whitespace_count <= 0 ) {
20466            return "";
20467        }
20468
20469        # look for previous result
20470        elsif ( $leading_string_cache[$leading_whitespace_count] ) {
20471            return $leading_string_cache[$leading_whitespace_count];
20472        }
20473
20474        # must compute a string for this number of spaces
20475        my $leading_string;
20476
20477        # Handle simple case of no tabs
20478        if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
20479            || $rOpts_indent_columns <= 0 )
20480        {
20481            $leading_string = ( ' ' x $leading_whitespace_count );
20482        }
20483
20484        # Handle entab option
20485        elsif ($rOpts_entab_leading_whitespace) {
20486            my $space_count =
20487              $leading_whitespace_count % $rOpts_entab_leading_whitespace;
20488            my $tab_count = int(
20489                $leading_whitespace_count / $rOpts_entab_leading_whitespace );
20490            $leading_string = "\t" x $tab_count . ' ' x $space_count;
20491        }
20492
20493        # Handle option of one tab per level
20494        else {
20495            $leading_string = ( "\t" x $group_level );
20496            my $space_count =
20497              $leading_whitespace_count - $group_level * $rOpts_indent_columns;
20498
20499            # shouldn't happen:
20500            if ( $space_count < 0 ) {
20501                warning(
20502"Error in append_line: for level=$group_level count=$leading_whitespace_count\n"
20503                );
20504                $leading_string = ( ' ' x $leading_whitespace_count );
20505            }
20506            else {
20507                $leading_string .= ( ' ' x $space_count );
20508            }
20509        }
20510        $leading_string_cache[$leading_whitespace_count] = $leading_string;
20511        return $leading_string;
20512    }
20513}    # end get_leading_string
20514
20515sub report_anything_unusual {
20516    my $self = shift;
20517    if ( $outdented_line_count > 0 ) {
20518        write_logfile_entry(
20519            "$outdented_line_count long lines were outdented:\n");
20520        write_logfile_entry(
20521            "  First at output line $first_outdented_line_at\n");
20522
20523        if ( $outdented_line_count > 1 ) {
20524            write_logfile_entry(
20525                "   Last at output line $last_outdented_line_at\n");
20526        }
20527        write_logfile_entry(
20528            "  use -noll to prevent outdenting, -l=n to increase line length\n"
20529        );
20530        write_logfile_entry("\n");
20531    }
20532}
20533
20534#####################################################################
20535#
20536# the Perl::Tidy::FileWriter class writes the output file
20537#
20538#####################################################################
20539
20540package Perl::Tidy::FileWriter;
20541
20542# Maximum number of little messages; probably need not be changed.
20543use constant MAX_NAG_MESSAGES => 6;
20544
20545sub write_logfile_entry {
20546    my $self          = shift;
20547    my $logger_object = $self->{_logger_object};
20548    if ($logger_object) {
20549        $logger_object->write_logfile_entry(@_);
20550    }
20551}
20552
20553sub new {
20554    my $class = shift;
20555    my ( $line_sink_object, $rOpts, $logger_object ) = @_;
20556
20557    bless {
20558        _line_sink_object           => $line_sink_object,
20559        _logger_object              => $logger_object,
20560        _rOpts                      => $rOpts,
20561        _output_line_number         => 1,
20562        _consecutive_blank_lines    => 0,
20563        _consecutive_nonblank_lines => 0,
20564        _first_line_length_error    => 0,
20565        _max_line_length_error      => 0,
20566        _last_line_length_error     => 0,
20567        _first_line_length_error_at => 0,
20568        _max_line_length_error_at   => 0,
20569        _last_line_length_error_at  => 0,
20570        _line_length_error_count    => 0,
20571        _max_output_line_length     => 0,
20572        _max_output_line_length_at  => 0,
20573    }, $class;
20574}
20575
20576sub tee_on {
20577    my $self = shift;
20578    $self->{_line_sink_object}->tee_on();
20579}
20580
20581sub tee_off {
20582    my $self = shift;
20583    $self->{_line_sink_object}->tee_off();
20584}
20585
20586sub get_output_line_number {
20587    my $self = shift;
20588    return $self->{_output_line_number};
20589}
20590
20591sub decrement_output_line_number {
20592    my $self = shift;
20593    $self->{_output_line_number}--;
20594}
20595
20596sub get_consecutive_nonblank_lines {
20597    my $self = shift;
20598    return $self->{_consecutive_nonblank_lines};
20599}
20600
20601sub reset_consecutive_blank_lines {
20602    my $self = shift;
20603    $self->{_consecutive_blank_lines} = 0;
20604}
20605
20606sub want_blank_line {
20607    my $self = shift;
20608    unless ( $self->{_consecutive_blank_lines} ) {
20609        $self->write_blank_code_line();
20610    }
20611}
20612
20613sub write_blank_code_line {
20614    my $self   = shift;
20615    my $forced = shift;
20616    my $rOpts  = $self->{_rOpts};
20617    return
20618      if (!$forced
20619        && $self->{_consecutive_blank_lines} >=
20620        $rOpts->{'maximum-consecutive-blank-lines'} );
20621    $self->{_consecutive_blank_lines}++;
20622    $self->{_consecutive_nonblank_lines} = 0;
20623    $self->write_line("\n");
20624}
20625
20626sub write_code_line {
20627    my $self = shift;
20628    my $a    = shift;
20629
20630    if ( $a =~ /^\s*$/ ) {
20631        my $rOpts = $self->{_rOpts};
20632        return
20633          if ( $self->{_consecutive_blank_lines} >=
20634            $rOpts->{'maximum-consecutive-blank-lines'} );
20635        $self->{_consecutive_blank_lines}++;
20636        $self->{_consecutive_nonblank_lines} = 0;
20637    }
20638    else {
20639        $self->{_consecutive_blank_lines} = 0;
20640        $self->{_consecutive_nonblank_lines}++;
20641    }
20642    $self->write_line($a);
20643}
20644
20645sub write_line {
20646    my $self = shift;
20647    my $a    = shift;
20648
20649    # TODO: go through and see if the test is necessary here
20650    if ( $a =~ /\n$/ ) { $self->{_output_line_number}++; }
20651
20652    $self->{_line_sink_object}->write_line($a);
20653
20654    # This calculation of excess line length ignores any internal tabs
20655    my $rOpts  = $self->{_rOpts};
20656    my $exceed = length($a) - $rOpts->{'maximum-line-length'} - 1;
20657    if ( $a =~ /^\t+/g ) {
20658        $exceed += pos($a) * ( $rOpts->{'indent-columns'} - 1 );
20659    }
20660
20661    # Note that we just incremented output line number to future value
20662    # so we must subtract 1 for current line number
20663    if ( length($a) > 1 + $self->{_max_output_line_length} ) {
20664        $self->{_max_output_line_length}    = length($a) - 1;
20665        $self->{_max_output_line_length_at} = $self->{_output_line_number} - 1;
20666    }
20667
20668    if ( $exceed > 0 ) {
20669        my $output_line_number = $self->{_output_line_number};
20670        $self->{_last_line_length_error}    = $exceed;
20671        $self->{_last_line_length_error_at} = $output_line_number - 1;
20672        if ( $self->{_line_length_error_count} == 0 ) {
20673            $self->{_first_line_length_error}    = $exceed;
20674            $self->{_first_line_length_error_at} = $output_line_number - 1;
20675        }
20676
20677        if (
20678            $self->{_last_line_length_error} > $self->{_max_line_length_error} )
20679        {
20680            $self->{_max_line_length_error}    = $exceed;
20681            $self->{_max_line_length_error_at} = $output_line_number - 1;
20682        }
20683
20684        if ( $self->{_line_length_error_count} < MAX_NAG_MESSAGES ) {
20685            $self->write_logfile_entry(
20686                "Line length exceeded by $exceed characters\n");
20687        }
20688        $self->{_line_length_error_count}++;
20689    }
20690
20691}
20692
20693sub report_line_length_errors {
20694    my $self                    = shift;
20695    my $rOpts                   = $self->{_rOpts};
20696    my $line_length_error_count = $self->{_line_length_error_count};
20697    if ( $line_length_error_count == 0 ) {
20698        $self->write_logfile_entry(
20699            "No lines exceeded $rOpts->{'maximum-line-length'} characters\n");
20700        my $max_output_line_length    = $self->{_max_output_line_length};
20701        my $max_output_line_length_at = $self->{_max_output_line_length_at};
20702        $self->write_logfile_entry(
20703"  Maximum output line length was $max_output_line_length at line $max_output_line_length_at\n"
20704        );
20705
20706    }
20707    else {
20708
20709        my $word = ( $line_length_error_count > 1 ) ? "s" : "";
20710        $self->write_logfile_entry(
20711"$line_length_error_count output line$word exceeded $rOpts->{'maximum-line-length'} characters:\n"
20712        );
20713
20714        $word = ( $line_length_error_count > 1 ) ? "First" : "";
20715        my $first_line_length_error    = $self->{_first_line_length_error};
20716        my $first_line_length_error_at = $self->{_first_line_length_error_at};
20717        $self->write_logfile_entry(
20718" $word at line $first_line_length_error_at by $first_line_length_error characters\n"
20719        );
20720
20721        if ( $line_length_error_count > 1 ) {
20722            my $max_line_length_error     = $self->{_max_line_length_error};
20723            my $max_line_length_error_at  = $self->{_max_line_length_error_at};
20724            my $last_line_length_error    = $self->{_last_line_length_error};
20725            my $last_line_length_error_at = $self->{_last_line_length_error_at};
20726            $self->write_logfile_entry(
20727" Maximum at line $max_line_length_error_at by $max_line_length_error characters\n"
20728            );
20729            $self->write_logfile_entry(
20730" Last at line $last_line_length_error_at by $last_line_length_error characters\n"
20731            );
20732        }
20733    }
20734}
20735
20736#####################################################################
20737#
20738# The Perl::Tidy::Debugger class shows line tokenization
20739#
20740#####################################################################
20741
20742package Perl::Tidy::Debugger;
20743
20744sub new {
20745
20746    my ( $class, $filename ) = @_;
20747
20748    bless {
20749        _debug_file        => $filename,
20750        _debug_file_opened => 0,
20751        _fh                => undef,
20752    }, $class;
20753}
20754
20755sub really_open_debug_file {
20756
20757    my $self       = shift;
20758    my $debug_file = $self->{_debug_file};
20759    my $fh;
20760    unless ( $fh = IO::File->new("> $debug_file") ) {
20761        warn("can't open $debug_file: $!\n");
20762    }
20763    $self->{_debug_file_opened} = 1;
20764    $self->{_fh}                = $fh;
20765    print $fh
20766      "Use -dump-token-types (-dtt) to get a list of token type codes\n";
20767}
20768
20769sub close_debug_file {
20770
20771    my $self = shift;
20772    my $fh   = $self->{_fh};
20773    if ( $self->{_debug_file_opened} ) {
20774
20775        eval { $self->{_fh}->close() };
20776    }
20777}
20778
20779sub write_debug_entry {
20780
20781    # This is a debug dump routine which may be modified as necessary
20782    # to dump tokens on a line-by-line basis.  The output will be written
20783    # to the .DEBUG file when the -D flag is entered.
20784    my $self           = shift;
20785    my $line_of_tokens = shift;
20786
20787    my $input_line        = $line_of_tokens->{_line_text};
20788    my $rtoken_type       = $line_of_tokens->{_rtoken_type};
20789    my $rtokens           = $line_of_tokens->{_rtokens};
20790    my $rlevels           = $line_of_tokens->{_rlevels};
20791    my $rslevels          = $line_of_tokens->{_rslevels};
20792    my $rblock_type       = $line_of_tokens->{_rblock_type};
20793    my $input_line_number = $line_of_tokens->{_line_number};
20794    my $line_type         = $line_of_tokens->{_line_type};
20795
20796    my ( $j, $num );
20797
20798    my $token_str              = "$input_line_number: ";
20799    my $reconstructed_original = "$input_line_number: ";
20800    my $block_str              = "$input_line_number: ";
20801
20802    #$token_str .= "$line_type: ";
20803    #$reconstructed_original .= "$line_type: ";
20804
20805    my $pattern   = "";
20806    my @next_char = ( '"', '"' );
20807    my $i_next    = 0;
20808    unless ( $self->{_debug_file_opened} ) { $self->really_open_debug_file() }
20809    my $fh = $self->{_fh};
20810
20811    for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {
20812
20813        # testing patterns
20814        if ( $$rtoken_type[$j] eq 'k' ) {
20815            $pattern .= $$rtokens[$j];
20816        }
20817        else {
20818            $pattern .= $$rtoken_type[$j];
20819        }
20820        $reconstructed_original .= $$rtokens[$j];
20821        $block_str .= "($$rblock_type[$j])";
20822        $num = length( $$rtokens[$j] );
20823        my $type_str = $$rtoken_type[$j];
20824
20825        # be sure there are no blank tokens (shouldn't happen)
20826        # This can only happen if a programming error has been made
20827        # because all valid tokens are non-blank
20828        if ( $type_str eq ' ' ) {
20829            print $fh "BLANK TOKEN on the next line\n";
20830            $type_str = $next_char[$i_next];
20831            $i_next   = 1 - $i_next;
20832        }
20833
20834        if ( length($type_str) == 1 ) {
20835            $type_str = $type_str x $num;
20836        }
20837        $token_str .= $type_str;
20838    }
20839
20840    # Write what you want here ...
20841    # print $fh "$input_line\n";
20842    # print $fh "$pattern\n";
20843    print $fh "$reconstructed_original\n";
20844    print $fh "$token_str\n";
20845
20846    #print $fh "$block_str\n";
20847}
20848
20849#####################################################################
20850#
20851# The Perl::Tidy::LineBuffer class supplies a 'get_line()'
20852# method for returning the next line to be parsed, as well as a
20853# 'peek_ahead()' method
20854#
20855# The input parameter is an object with a 'get_line()' method
20856# which returns the next line to be parsed
20857#
20858#####################################################################
20859
20860package Perl::Tidy::LineBuffer;
20861
20862sub new {
20863
20864    my $class              = shift;
20865    my $line_source_object = shift;
20866
20867    return bless {
20868        _line_source_object => $line_source_object,
20869        _rlookahead_buffer  => [],
20870    }, $class;
20871}
20872
20873sub peek_ahead {
20874    my $self               = shift;
20875    my $buffer_index       = shift;
20876    my $line               = undef;
20877    my $line_source_object = $self->{_line_source_object};
20878    my $rlookahead_buffer  = $self->{_rlookahead_buffer};
20879    if ( $buffer_index < scalar(@$rlookahead_buffer) ) {
20880        $line = $$rlookahead_buffer[$buffer_index];
20881    }
20882    else {
20883        $line = $line_source_object->get_line();
20884        push( @$rlookahead_buffer, $line );
20885    }
20886    return $line;
20887}
20888
20889sub get_line {
20890    my $self               = shift;
20891    my $line               = undef;
20892    my $line_source_object = $self->{_line_source_object};
20893    my $rlookahead_buffer  = $self->{_rlookahead_buffer};
20894
20895    if ( scalar(@$rlookahead_buffer) ) {
20896        $line = shift @$rlookahead_buffer;
20897    }
20898    else {
20899        $line = $line_source_object->get_line();
20900    }
20901    return $line;
20902}
20903
20904########################################################################
20905#
20906# the Perl::Tidy::Tokenizer package is essentially a filter which
20907# reads lines of perl source code from a source object and provides
20908# corresponding tokenized lines through its get_line() method.  Lines
20909# flow from the source_object to the caller like this:
20910#
20911# source_object --> LineBuffer_object --> Tokenizer -->  calling routine
20912#   get_line()         get_line()           get_line()     line_of_tokens
20913#
20914# The source object can be any object with a get_line() method which
20915# supplies one line (a character string) perl call.
20916# The LineBuffer object is created by the Tokenizer.
20917# The Tokenizer returns a reference to a data structure 'line_of_tokens'
20918# containing one tokenized line for each call to its get_line() method.
20919#
20920# WARNING: This is not a real class yet.  Only one tokenizer my be used.
20921#
20922########################################################################
20923
20924package Perl::Tidy::Tokenizer;
20925
20926BEGIN {
20927
20928    # Caution: these debug flags produce a lot of output
20929    # They should all be 0 except when debugging small scripts
20930
20931    use constant TOKENIZER_DEBUG_FLAG_EXPECT   => 0;
20932    use constant TOKENIZER_DEBUG_FLAG_NSCAN    => 0;
20933    use constant TOKENIZER_DEBUG_FLAG_QUOTE    => 0;
20934    use constant TOKENIZER_DEBUG_FLAG_SCAN_ID  => 0;
20935    use constant TOKENIZER_DEBUG_FLAG_TOKENIZE => 0;
20936
20937    my $debug_warning = sub {
20938        print "TOKENIZER_DEBUGGING with key $_[0]\n";
20939    };
20940
20941    TOKENIZER_DEBUG_FLAG_EXPECT   && $debug_warning->('EXPECT');
20942    TOKENIZER_DEBUG_FLAG_NSCAN    && $debug_warning->('NSCAN');
20943    TOKENIZER_DEBUG_FLAG_QUOTE    && $debug_warning->('QUOTE');
20944    TOKENIZER_DEBUG_FLAG_SCAN_ID  && $debug_warning->('SCAN_ID');
20945    TOKENIZER_DEBUG_FLAG_TOKENIZE && $debug_warning->('TOKENIZE');
20946
20947}
20948
20949use Carp;
20950
20951# PACKAGE VARIABLES for for processing an entire FILE.
20952use vars qw{
20953  $tokenizer_self
20954
20955  $last_nonblank_token
20956  $last_nonblank_type
20957  $last_nonblank_block_type
20958  $statement_type
20959  $in_attribute_list
20960  $current_package
20961  $context
20962
20963  %is_constant
20964  %is_user_function
20965  %user_function_prototype
20966  %is_block_function
20967  %is_block_list_function
20968  %saw_function_definition
20969
20970  $brace_depth
20971  $paren_depth
20972  $square_bracket_depth
20973
20974  @current_depth
20975  @total_depth
20976  $total_depth
20977  @nesting_sequence_number
20978  @current_sequence_number
20979  @paren_type
20980  @paren_semicolon_count
20981  @paren_structural_type
20982  @brace_type
20983  @brace_structural_type
20984  @brace_statement_type
20985  @brace_context
20986  @brace_package
20987  @square_bracket_type
20988  @square_bracket_structural_type
20989  @depth_array
20990  @nested_ternary_flag
20991  @starting_line_of_current_depth
20992};
20993
20994# GLOBAL CONSTANTS for routines in this package
20995use vars qw{
20996  %is_indirect_object_taker
20997  %is_block_operator
20998  %expecting_operator_token
20999  %expecting_operator_types
21000  %expecting_term_types
21001  %expecting_term_token
21002  %is_digraph
21003  %is_file_test_operator
21004  %is_trigraph
21005  %is_valid_token_type
21006  %is_keyword
21007  %is_code_block_token
21008  %really_want_term
21009  @opening_brace_names
21010  @closing_brace_names
21011  %is_keyword_taking_list
21012  %is_q_qq_qw_qx_qr_s_y_tr_m
21013};
21014
21015# possible values of operator_expected()
21016use constant TERM     => -1;
21017use constant UNKNOWN  => 0;
21018use constant OPERATOR => 1;
21019
21020# possible values of context
21021use constant SCALAR_CONTEXT  => -1;
21022use constant UNKNOWN_CONTEXT => 0;
21023use constant LIST_CONTEXT    => 1;
21024
21025# Maximum number of little messages; probably need not be changed.
21026use constant MAX_NAG_MESSAGES => 6;
21027
21028{
21029
21030    # methods to count instances
21031    my $_count = 0;
21032    sub get_count        { $_count; }
21033    sub _increment_count { ++$_count }
21034    sub _decrement_count { --$_count }
21035}
21036
21037sub DESTROY {
21038    $_[0]->_decrement_count();
21039}
21040
21041sub new {
21042
21043    my $class = shift;
21044
21045    # Note: 'tabs' and 'indent_columns' are temporary and should be
21046    # removed asap
21047    my %defaults = (
21048        source_object        => undef,
21049        debugger_object      => undef,
21050        diagnostics_object   => undef,
21051        logger_object        => undef,
21052        starting_level       => undef,
21053        indent_columns       => 4,
21054        tabs                 => 0,
21055        entab_leading_space  => undef,
21056        look_for_hash_bang   => 0,
21057        trim_qw              => 1,
21058        look_for_autoloader  => 1,
21059        look_for_selfloader  => 1,
21060        starting_line_number => 1,
21061    );
21062    my %args = ( %defaults, @_ );
21063
21064    # we are given an object with a get_line() method to supply source lines
21065    my $source_object = $args{source_object};
21066
21067    # we create another object with a get_line() and peek_ahead() method
21068    my $line_buffer_object = Perl::Tidy::LineBuffer->new($source_object);
21069
21070    # Tokenizer state data is as follows:
21071    # _rhere_target_list    reference to list of here-doc targets
21072    # _here_doc_target      the target string for a here document
21073    # _here_quote_character the type of here-doc quoting (" ' ` or none)
21074    #                       to determine if interpolation is done
21075    # _quote_target         character we seek if chasing a quote
21076    # _line_start_quote     line where we started looking for a long quote
21077    # _in_here_doc          flag indicating if we are in a here-doc
21078    # _in_pod               flag set if we are in pod documentation
21079    # _in_error             flag set if we saw severe error (binary in script)
21080    # _in_data              flag set if we are in __DATA__ section
21081    # _in_end               flag set if we are in __END__ section
21082    # _in_format            flag set if we are in a format description
21083    # _in_attribute_list    flag telling if we are looking for attributes
21084    # _in_quote             flag telling if we are chasing a quote
21085    # _starting_level       indentation level of first line
21086    # _input_tabstr         string denoting one indentation level of input file
21087    # _know_input_tabstr    flag indicating if we know _input_tabstr
21088    # _line_buffer_object   object with get_line() method to supply source code
21089    # _diagnostics_object   place to write debugging information
21090    # _unexpected_error_count  error count used to limit output
21091    # _lower_case_labels_at  line numbers where lower case labels seen
21092    $tokenizer_self = {
21093        _rhere_target_list                  => [],
21094        _in_here_doc                        => 0,
21095        _here_doc_target                    => "",
21096        _here_quote_character               => "",
21097        _in_data                            => 0,
21098        _in_end                             => 0,
21099        _in_format                          => 0,
21100        _in_error                           => 0,
21101        _in_pod                             => 0,
21102        _in_attribute_list                  => 0,
21103        _in_quote                           => 0,
21104        _quote_target                       => "",
21105        _line_start_quote                   => -1,
21106        _starting_level                     => $args{starting_level},
21107        _know_starting_level                => defined( $args{starting_level} ),
21108        _tabs                               => $args{tabs},
21109        _entab_leading_space                => $args{entab_leading_space},
21110        _indent_columns                     => $args{indent_columns},
21111        _look_for_hash_bang                 => $args{look_for_hash_bang},
21112        _trim_qw                            => $args{trim_qw},
21113        _input_tabstr                       => "",
21114        _know_input_tabstr                  => -1,
21115        _last_line_number                   => $args{starting_line_number} - 1,
21116        _saw_perl_dash_P                    => 0,
21117        _saw_perl_dash_w                    => 0,
21118        _saw_use_strict                     => 0,
21119        _saw_v_string                       => 0,
21120        _look_for_autoloader                => $args{look_for_autoloader},
21121        _look_for_selfloader                => $args{look_for_selfloader},
21122        _saw_autoloader                     => 0,
21123        _saw_selfloader                     => 0,
21124        _saw_hash_bang                      => 0,
21125        _saw_end                            => 0,
21126        _saw_data                           => 0,
21127        _saw_negative_indentation           => 0,
21128        _started_tokenizing                 => 0,
21129        _line_buffer_object                 => $line_buffer_object,
21130        _debugger_object                    => $args{debugger_object},
21131        _diagnostics_object                 => $args{diagnostics_object},
21132        _logger_object                      => $args{logger_object},
21133        _unexpected_error_count             => 0,
21134        _started_looking_for_here_target_at => 0,
21135        _nearly_matched_here_target_at      => undef,
21136        _line_text                          => "",
21137        _rlower_case_labels_at              => undef,
21138    };
21139
21140    prepare_for_a_new_file();
21141    find_starting_indentation_level();
21142
21143    bless $tokenizer_self, $class;
21144
21145    # This is not a full class yet, so die if an attempt is made to
21146    # create more than one object.
21147
21148    if ( _increment_count() > 1 ) {
21149        confess
21150"Attempt to create more than 1 object in $class, which is not a true class yet\n";
21151    }
21152
21153    return $tokenizer_self;
21154
21155}
21156
21157# interface to Perl::Tidy::Logger routines
21158sub warning {
21159    my $logger_object = $tokenizer_self->{_logger_object};
21160    if ($logger_object) {
21161        $logger_object->warning(@_);
21162    }
21163}
21164
21165sub complain {
21166    my $logger_object = $tokenizer_self->{_logger_object};
21167    if ($logger_object) {
21168        $logger_object->complain(@_);
21169    }
21170}
21171
21172sub write_logfile_entry {
21173    my $logger_object = $tokenizer_self->{_logger_object};
21174    if ($logger_object) {
21175        $logger_object->write_logfile_entry(@_);
21176    }
21177}
21178
21179sub interrupt_logfile {
21180    my $logger_object = $tokenizer_self->{_logger_object};
21181    if ($logger_object) {
21182        $logger_object->interrupt_logfile();
21183    }
21184}
21185
21186sub resume_logfile {
21187    my $logger_object = $tokenizer_self->{_logger_object};
21188    if ($logger_object) {
21189        $logger_object->resume_logfile();
21190    }
21191}
21192
21193sub increment_brace_error {
21194    my $logger_object = $tokenizer_self->{_logger_object};
21195    if ($logger_object) {
21196        $logger_object->increment_brace_error();
21197    }
21198}
21199
21200sub report_definite_bug {
21201    my $logger_object = $tokenizer_self->{_logger_object};
21202    if ($logger_object) {
21203        $logger_object->report_definite_bug();
21204    }
21205}
21206
21207sub brace_warning {
21208    my $logger_object = $tokenizer_self->{_logger_object};
21209    if ($logger_object) {
21210        $logger_object->brace_warning(@_);
21211    }
21212}
21213
21214sub get_saw_brace_error {
21215    my $logger_object = $tokenizer_self->{_logger_object};
21216    if ($logger_object) {
21217        $logger_object->get_saw_brace_error();
21218    }
21219    else {
21220        0;
21221    }
21222}
21223
21224# interface to Perl::Tidy::Diagnostics routines
21225sub write_diagnostics {
21226    if ( $tokenizer_self->{_diagnostics_object} ) {
21227        $tokenizer_self->{_diagnostics_object}->write_diagnostics(@_);
21228    }
21229}
21230
21231sub report_tokenization_errors {
21232
21233    my $self = shift;
21234
21235    my $level = get_indentation_level();
21236    if ( $level != $tokenizer_self->{_starting_level} ) {
21237        warning("final indentation level: $level\n");
21238    }
21239
21240    check_final_nesting_depths();
21241
21242    if ( $tokenizer_self->{_look_for_hash_bang}
21243        && !$tokenizer_self->{_saw_hash_bang} )
21244    {
21245        warning(
21246            "hit EOF without seeing hash-bang line; maybe don't need -x?\n");
21247    }
21248
21249    if ( $tokenizer_self->{_in_format} ) {
21250        warning("hit EOF while in format description\n");
21251    }
21252
21253    if ( $tokenizer_self->{_in_pod} ) {
21254
21255        # Just write log entry if this is after __END__ or __DATA__
21256        # because this happens to often, and it is not likely to be
21257        # a parsing error.
21258        if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
21259            write_logfile_entry(
21260"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
21261            );
21262        }
21263
21264        else {
21265            complain(
21266"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
21267            );
21268        }
21269
21270    }
21271
21272    if ( $tokenizer_self->{_in_here_doc} ) {
21273        my $here_doc_target = $tokenizer_self->{_here_doc_target};
21274        my $started_looking_for_here_target_at =
21275          $tokenizer_self->{_started_looking_for_here_target_at};
21276        if ($here_doc_target) {
21277            warning(
21278"hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n"
21279            );
21280        }
21281        else {
21282            warning(
21283"hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string\n"
21284            );
21285        }
21286        my $nearly_matched_here_target_at =
21287          $tokenizer_self->{_nearly_matched_here_target_at};
21288        if ($nearly_matched_here_target_at) {
21289            warning(
21290"NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n"
21291            );
21292        }
21293    }
21294
21295    if ( $tokenizer_self->{_in_quote} ) {
21296        my $line_start_quote = $tokenizer_self->{_line_start_quote};
21297        my $quote_target     = $tokenizer_self->{_quote_target};
21298        my $what =
21299          ( $tokenizer_self->{_in_attribute_list} )
21300          ? "attribute list"
21301          : "quote/pattern";
21302        warning(
21303"hit EOF seeking end of $what starting at line $line_start_quote ending in $quote_target\n"
21304        );
21305    }
21306
21307    unless ( $tokenizer_self->{_saw_perl_dash_w} ) {
21308        if ( $] < 5.006 ) {
21309            write_logfile_entry("Suggest including '-w parameter'\n");
21310        }
21311        else {
21312            write_logfile_entry("Suggest including 'use warnings;'\n");
21313        }
21314    }
21315
21316    if ( $tokenizer_self->{_saw_perl_dash_P} ) {
21317        write_logfile_entry("Use of -P parameter for defines is discouraged\n");
21318    }
21319
21320    unless ( $tokenizer_self->{_saw_use_strict} ) {
21321        write_logfile_entry("Suggest including 'use strict;'\n");
21322    }
21323
21324    # it is suggested that lables have at least one upper case character
21325    # for legibility and to avoid code breakage as new keywords are introduced
21326    if ( $tokenizer_self->{_rlower_case_labels_at} ) {
21327        my @lower_case_labels_at =
21328          @{ $tokenizer_self->{_rlower_case_labels_at} };
21329        write_logfile_entry(
21330            "Suggest using upper case characters in label(s)\n");
21331        local $" = ')(';
21332        write_logfile_entry("  defined at line(s): (@lower_case_labels_at)\n");
21333    }
21334}
21335
21336sub report_v_string {
21337
21338    # warn if this version can't handle v-strings
21339    my $tok = shift;
21340    unless ( $tokenizer_self->{_saw_v_string} ) {
21341        $tokenizer_self->{_saw_v_string} = $tokenizer_self->{_last_line_number};
21342    }
21343    if ( $] < 5.006 ) {
21344        warning(
21345"Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n"
21346        );
21347    }
21348}
21349
21350sub get_input_line_number {
21351    return $tokenizer_self->{_last_line_number};
21352}
21353
21354# returns the next tokenized line
21355sub get_line {
21356
21357    my $self = shift;
21358
21359    # USES GLOBAL VARIABLES: $tokenizer_self, $brace_depth,
21360    # $square_bracket_depth, $paren_depth
21361
21362    my $input_line = $tokenizer_self->{_line_buffer_object}->get_line();
21363    $tokenizer_self->{_line_text} = $input_line;
21364
21365    return undef unless ($input_line);
21366
21367    my $input_line_number = ++$tokenizer_self->{_last_line_number};
21368
21369    # Find and remove what characters terminate this line, including any
21370    # control r
21371    my $input_line_separator = "";
21372    if ( chomp($input_line) ) { $input_line_separator = $/ }
21373
21374    # TODO: what other characters should be included here?
21375    if ( $input_line =~ s/((\r|\035|\032)+)$// ) {
21376        $input_line_separator = $2 . $input_line_separator;
21377    }
21378
21379    # for backwards compatability we keep the line text terminated with
21380    # a newline character
21381    $input_line .= "\n";
21382    $tokenizer_self->{_line_text} = $input_line;    # update
21383
21384    # create a data structure describing this line which will be
21385    # returned to the caller.
21386
21387    # _line_type codes are:
21388    #   SYSTEM         - system-specific code before hash-bang line
21389    #   CODE           - line of perl code (including comments)
21390    #   POD_START      - line starting pod, such as '=head'
21391    #   POD            - pod documentation text
21392    #   POD_END        - last line of pod section, '=cut'
21393    #   HERE           - text of here-document
21394    #   HERE_END       - last line of here-doc (target word)
21395    #   FORMAT         - format section
21396    #   FORMAT_END     - last line of format section, '.'
21397    #   DATA_START     - __DATA__ line
21398    #   DATA           - unidentified text following __DATA__
21399    #   END_START      - __END__ line
21400    #   END            - unidentified text following __END__
21401    #   ERROR          - we are in big trouble, probably not a perl script
21402
21403    # Other variables:
21404    #   _curly_brace_depth     - depth of curly braces at start of line
21405    #   _square_bracket_depth  - depth of square brackets at start of line
21406    #   _paren_depth           - depth of parens at start of line
21407    #   _starting_in_quote     - this line continues a multi-line quote
21408    #                            (so don't trim leading blanks!)
21409    #   _ending_in_quote       - this line ends in a multi-line quote
21410    #                            (so don't trim trailing blanks!)
21411    my $line_of_tokens = {
21412        _line_type                => 'EOF',
21413        _line_text                => $input_line,
21414        _line_number              => $input_line_number,
21415        _rtoken_type              => undef,
21416        _rtokens                  => undef,
21417        _rlevels                  => undef,
21418        _rslevels                 => undef,
21419        _rblock_type              => undef,
21420        _rcontainer_type          => undef,
21421        _rcontainer_environment   => undef,
21422        _rtype_sequence           => undef,
21423        _rnesting_tokens          => undef,
21424        _rci_levels               => undef,
21425        _rnesting_blocks          => undef,
21426        _python_indentation_level => -1,                   ## 0,
21427        _starting_in_quote    => 0,                    # to be set by subroutine
21428        _ending_in_quote      => 0,
21429        _curly_brace_depth    => $brace_depth,
21430        _square_bracket_depth => $square_bracket_depth,
21431        _paren_depth          => $paren_depth,
21432        _quote_character      => '',
21433    };
21434
21435    # must print line unchanged if we are in a here document
21436    if ( $tokenizer_self->{_in_here_doc} ) {
21437
21438        $line_of_tokens->{_line_type} = 'HERE';
21439        my $here_doc_target      = $tokenizer_self->{_here_doc_target};
21440        my $here_quote_character = $tokenizer_self->{_here_quote_character};
21441        my $candidate_target     = $input_line;
21442        chomp $candidate_target;
21443        if ( $candidate_target eq $here_doc_target ) {
21444            $tokenizer_self->{_nearly_matched_here_target_at} = undef;
21445            $line_of_tokens->{_line_type}                     = 'HERE_END';
21446            write_logfile_entry("Exiting HERE document $here_doc_target\n");
21447
21448            my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
21449            if (@$rhere_target_list) {    # there can be multiple here targets
21450                ( $here_doc_target, $here_quote_character ) =
21451                  @{ shift @$rhere_target_list };
21452                $tokenizer_self->{_here_doc_target} = $here_doc_target;
21453                $tokenizer_self->{_here_quote_character} =
21454                  $here_quote_character;
21455                write_logfile_entry(
21456                    "Entering HERE document $here_doc_target\n");
21457                $tokenizer_self->{_nearly_matched_here_target_at} = undef;
21458                $tokenizer_self->{_started_looking_for_here_target_at} =
21459                  $input_line_number;
21460            }
21461            else {
21462                $tokenizer_self->{_in_here_doc}          = 0;
21463                $tokenizer_self->{_here_doc_target}      = "";
21464                $tokenizer_self->{_here_quote_character} = "";
21465            }
21466        }
21467
21468        # check for error of extra whitespace
21469        # note for PERL6: leading whitespace is allowed
21470        else {
21471            $candidate_target =~ s/\s*$//;
21472            $candidate_target =~ s/^\s*//;
21473            if ( $candidate_target eq $here_doc_target ) {
21474                $tokenizer_self->{_nearly_matched_here_target_at} =
21475                  $input_line_number;
21476            }
21477        }
21478        return $line_of_tokens;
21479    }
21480
21481    # must print line unchanged if we are in a format section
21482    elsif ( $tokenizer_self->{_in_format} ) {
21483
21484        if ( $input_line =~ /^\.[\s#]*$/ ) {
21485            write_logfile_entry("Exiting format section\n");
21486            $tokenizer_self->{_in_format} = 0;
21487            $line_of_tokens->{_line_type} = 'FORMAT_END';
21488        }
21489        else {
21490            $line_of_tokens->{_line_type} = 'FORMAT';
21491        }
21492        return $line_of_tokens;
21493    }
21494
21495    # must print line unchanged if we are in pod documentation
21496    elsif ( $tokenizer_self->{_in_pod} ) {
21497
21498        $line_of_tokens->{_line_type} = 'POD';
21499        if ( $input_line =~ /^=cut/ ) {
21500            $line_of_tokens->{_line_type} = 'POD_END';
21501            write_logfile_entry("Exiting POD section\n");
21502            $tokenizer_self->{_in_pod} = 0;
21503        }
21504        if ( $input_line =~ /^\#\!.*perl\b/ ) {
21505            warning(
21506                "Hash-bang in pod can cause older versions of perl to fail! \n"
21507            );
21508        }
21509
21510        return $line_of_tokens;
21511    }
21512
21513    # must print line unchanged if we have seen a severe error (i.e., we
21514    # are seeing illegal tokens and connot continue.  Syntax errors do
21515    # not pass this route).  Calling routine can decide what to do, but
21516    # the default can be to just pass all lines as if they were after __END__
21517    elsif ( $tokenizer_self->{_in_error} ) {
21518        $line_of_tokens->{_line_type} = 'ERROR';
21519        return $line_of_tokens;
21520    }
21521
21522    # print line unchanged if we are __DATA__ section
21523    elsif ( $tokenizer_self->{_in_data} ) {
21524
21525        # ...but look for POD
21526        # Note that the _in_data and _in_end flags remain set
21527        # so that we return to that state after seeing the
21528        # end of a pod section
21529        if ( $input_line =~ /^=(?!cut)/ ) {
21530            $line_of_tokens->{_line_type} = 'POD_START';
21531            write_logfile_entry("Entering POD section\n");
21532            $tokenizer_self->{_in_pod} = 1;
21533            return $line_of_tokens;
21534        }
21535        else {
21536            $line_of_tokens->{_line_type} = 'DATA';
21537            return $line_of_tokens;
21538        }
21539    }
21540
21541    # print line unchanged if we are in __END__ section
21542    elsif ( $tokenizer_self->{_in_end} ) {
21543
21544        # ...but look for POD
21545        # Note that the _in_data and _in_end flags remain set
21546        # so that we return to that state after seeing the
21547        # end of a pod section
21548        if ( $input_line =~ /^=(?!cut)/ ) {
21549            $line_of_tokens->{_line_type} = 'POD_START';
21550            write_logfile_entry("Entering POD section\n");
21551            $tokenizer_self->{_in_pod} = 1;
21552            return $line_of_tokens;
21553        }
21554        else {
21555            $line_of_tokens->{_line_type} = 'END';
21556            return $line_of_tokens;
21557        }
21558    }
21559
21560    # check for a hash-bang line if we haven't seen one
21561    if ( !$tokenizer_self->{_saw_hash_bang} ) {
21562        if ( $input_line =~ /^\#\!.*perl\b/ ) {
21563            $tokenizer_self->{_saw_hash_bang} = $input_line_number;
21564
21565            # check for -w and -P flags
21566            if ( $input_line =~ /^\#\!.*perl\s.*-.*P/ ) {
21567                $tokenizer_self->{_saw_perl_dash_P} = 1;
21568            }
21569
21570            if ( $input_line =~ /^\#\!.*perl\s.*-.*w/ ) {
21571                $tokenizer_self->{_saw_perl_dash_w} = 1;
21572            }
21573
21574            if (   ( $input_line_number > 1 )
21575                && ( !$tokenizer_self->{_look_for_hash_bang} ) )
21576            {
21577
21578                # this is helpful for VMS systems; we may have accidentally
21579                # tokenized some DCL commands
21580                if ( $tokenizer_self->{_started_tokenizing} ) {
21581                    warning(
21582"There seems to be a hash-bang after line 1; do you need to run with -x ?\n"
21583                    );
21584                }
21585                else {
21586                    complain("Useless hash-bang after line 1\n");
21587                }
21588            }
21589
21590            # Report the leading hash-bang as a system line
21591            # This will prevent -dac from deleting it
21592            else {
21593                $line_of_tokens->{_line_type} = 'SYSTEM';
21594                return $line_of_tokens;
21595            }
21596        }
21597    }
21598
21599    # wait for a hash-bang before parsing if the user invoked us with -x
21600    if ( $tokenizer_self->{_look_for_hash_bang}
21601        && !$tokenizer_self->{_saw_hash_bang} )
21602    {
21603        $line_of_tokens->{_line_type} = 'SYSTEM';
21604        return $line_of_tokens;
21605    }
21606
21607    # a first line of the form ': #' will be marked as SYSTEM
21608    # since lines of this form may be used by tcsh
21609    if ( $input_line_number == 1 && $input_line =~ /^\s*\:\s*\#/ ) {
21610        $line_of_tokens->{_line_type} = 'SYSTEM';
21611        return $line_of_tokens;
21612    }
21613
21614    # now we know that it is ok to tokenize the line...
21615    # the line tokenizer will modify any of these private variables:
21616    #        _rhere_target_list
21617    #        _in_data
21618    #        _in_end
21619    #        _in_format
21620    #        _in_error
21621    #        _in_pod
21622    #        _in_quote
21623    my $ending_in_quote_last = $tokenizer_self->{_in_quote};
21624    tokenize_this_line($line_of_tokens);
21625
21626    # Now finish defining the return structure and return it
21627    $line_of_tokens->{_ending_in_quote} = $tokenizer_self->{_in_quote};
21628
21629    # handle severe error (binary data in script)
21630    if ( $tokenizer_self->{_in_error} ) {
21631        $tokenizer_self->{_in_quote} = 0;    # to avoid any more messages
21632        warning("Giving up after error\n");
21633        $line_of_tokens->{_line_type} = 'ERROR';
21634        reset_indentation_level(0);          # avoid error messages
21635        return $line_of_tokens;
21636    }
21637
21638    # handle start of pod documentation
21639    if ( $tokenizer_self->{_in_pod} ) {
21640
21641        # This gets tricky..above a __DATA__ or __END__ section, perl
21642        # accepts '=cut' as the start of pod section. But afterwards,
21643        # only pod utilities see it and they may ignore an =cut without
21644        # leading =head.  In any case, this isn't good.
21645        if ( $input_line =~ /^=cut\b/ ) {
21646            if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
21647                complain("=cut while not in pod ignored\n");
21648                $tokenizer_self->{_in_pod}    = 0;
21649                $line_of_tokens->{_line_type} = 'POD_END';
21650            }
21651            else {
21652                $line_of_tokens->{_line_type} = 'POD_START';
21653                complain(
21654"=cut starts a pod section .. this can fool pod utilities.\n"
21655                );
21656                write_logfile_entry("Entering POD section\n");
21657            }
21658        }
21659
21660        else {
21661            $line_of_tokens->{_line_type} = 'POD_START';
21662            write_logfile_entry("Entering POD section\n");
21663        }
21664
21665        return $line_of_tokens;
21666    }
21667
21668    # update indentation levels for log messages
21669    if ( $input_line !~ /^\s*$/ ) {
21670        my $rlevels                      = $line_of_tokens->{_rlevels};
21671        my $structural_indentation_level = $$rlevels[0];
21672        my ( $python_indentation_level, $msg ) =
21673          find_indentation_level( $input_line, $structural_indentation_level );
21674        if ($msg) { write_logfile_entry("$msg") }
21675        if ( $tokenizer_self->{_know_input_tabstr} == 1 ) {
21676            $line_of_tokens->{_python_indentation_level} =
21677              $python_indentation_level;
21678        }
21679    }
21680
21681    # see if this line contains here doc targets
21682    my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
21683    if (@$rhere_target_list) {
21684
21685        my ( $here_doc_target, $here_quote_character ) =
21686          @{ shift @$rhere_target_list };
21687        $tokenizer_self->{_in_here_doc}          = 1;
21688        $tokenizer_self->{_here_doc_target}      = $here_doc_target;
21689        $tokenizer_self->{_here_quote_character} = $here_quote_character;
21690        write_logfile_entry("Entering HERE document $here_doc_target\n");
21691        $tokenizer_self->{_started_looking_for_here_target_at} =
21692          $input_line_number;
21693    }
21694
21695    # NOTE: __END__ and __DATA__ statements are written unformatted
21696    # because they can theoretically contain additional characters
21697    # which are not tokenized (and cannot be read with <DATA> either!).
21698    if ( $tokenizer_self->{_in_data} ) {
21699        $line_of_tokens->{_line_type} = 'DATA_START';
21700        write_logfile_entry("Starting __DATA__ section\n");
21701        $tokenizer_self->{_saw_data} = 1;
21702
21703        # keep parsing after __DATA__ if use SelfLoader was seen
21704        if ( $tokenizer_self->{_saw_selfloader} ) {
21705            $tokenizer_self->{_in_data} = 0;
21706            write_logfile_entry(
21707                "SelfLoader seen, continuing; -nlsl deactivates\n");
21708        }
21709
21710        return $line_of_tokens;
21711    }
21712
21713    elsif ( $tokenizer_self->{_in_end} ) {
21714        $line_of_tokens->{_line_type} = 'END_START';
21715        write_logfile_entry("Starting __END__ section\n");
21716        $tokenizer_self->{_saw_end} = 1;
21717
21718        # keep parsing after __END__ if use AutoLoader was seen
21719        if ( $tokenizer_self->{_saw_autoloader} ) {
21720            $tokenizer_self->{_in_end} = 0;
21721            write_logfile_entry(
21722                "AutoLoader seen, continuing; -nlal deactivates\n");
21723        }
21724        return $line_of_tokens;
21725    }
21726
21727    # now, finally, we know that this line is type 'CODE'
21728    $line_of_tokens->{_line_type} = 'CODE';
21729
21730    # remember if we have seen any real code
21731    if (  !$tokenizer_self->{_started_tokenizing}
21732        && $input_line !~ /^\s*$/
21733        && $input_line !~ /^\s*#/ )
21734    {
21735        $tokenizer_self->{_started_tokenizing} = 1;
21736    }
21737
21738    if ( $tokenizer_self->{_debugger_object} ) {
21739        $tokenizer_self->{_debugger_object}->write_debug_entry($line_of_tokens);
21740    }
21741
21742    # Note: if keyword 'format' occurs in this line code, it is still CODE
21743    # (keyword 'format' need not start a line)
21744    if ( $tokenizer_self->{_in_format} ) {
21745        write_logfile_entry("Entering format section\n");
21746    }
21747
21748    if ( $tokenizer_self->{_in_quote}
21749        and ( $tokenizer_self->{_line_start_quote} < 0 ) )
21750    {
21751
21752        #if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) {
21753        if (
21754            ( my $quote_target = $tokenizer_self->{_quote_target} ) !~ /^\s*$/ )
21755        {
21756            $tokenizer_self->{_line_start_quote} = $input_line_number;
21757            write_logfile_entry(
21758                "Start multi-line quote or pattern ending in $quote_target\n");
21759        }
21760    }
21761    elsif ( ( $tokenizer_self->{_line_start_quote} >= 0 )
21762        and !$tokenizer_self->{_in_quote} )
21763    {
21764        $tokenizer_self->{_line_start_quote} = -1;
21765        write_logfile_entry("End of multi-line quote or pattern\n");
21766    }
21767
21768    # we are returning a line of CODE
21769    return $line_of_tokens;
21770}
21771
21772sub find_starting_indentation_level {
21773
21774    # USES GLOBAL VARIABLES: $tokenizer_self
21775    my $starting_level    = 0;
21776    my $know_input_tabstr = -1;    # flag for find_indentation_level
21777
21778    # use value if given as parameter
21779    if ( $tokenizer_self->{_know_starting_level} ) {
21780        $starting_level = $tokenizer_self->{_starting_level};
21781    }
21782
21783    # if we know there is a hash_bang line, the level must be zero
21784    elsif ( $tokenizer_self->{_look_for_hash_bang} ) {
21785        $tokenizer_self->{_know_starting_level} = 1;
21786    }
21787
21788    # otherwise figure it out from the input file
21789    else {
21790        my $line;
21791        my $i                            = 0;
21792        my $structural_indentation_level = -1; # flag for find_indentation_level
21793
21794        # keep looking at lines until we find a hash bang or piece of code
21795        my $msg = "";
21796        while ( $line =
21797            $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
21798        {
21799
21800            # if first line is #! then assume starting level is zero
21801            if ( $i == 1 && $line =~ /^\#\!/ ) {
21802                $starting_level = 0;
21803                last;
21804            }
21805            next if ( $line =~ /^\s*#/ );    # skip past comments
21806            next if ( $line =~ /^\s*$/ );    # skip past blank lines
21807            ( $starting_level, $msg ) =
21808              find_indentation_level( $line, $structural_indentation_level );
21809            if ($msg) { write_logfile_entry("$msg") }
21810            last;
21811        }
21812        $msg = "Line $i implies starting-indentation-level = $starting_level\n";
21813
21814        if ( $starting_level > 0 ) {
21815
21816            my $input_tabstr = $tokenizer_self->{_input_tabstr};
21817            if ( $input_tabstr eq "\t" ) {
21818                $msg .= "by guessing input tabbing uses 1 tab per level\n";
21819            }
21820            else {
21821                my $cols = length($input_tabstr);
21822                $msg .=
21823                  "by guessing input tabbing uses $cols blanks per level\n";
21824            }
21825        }
21826        write_logfile_entry("$msg");
21827    }
21828    $tokenizer_self->{_starting_level} = $starting_level;
21829    reset_indentation_level($starting_level);
21830}
21831
21832# Find indentation level given a input line.  At the same time, try to
21833# figure out the input tabbing scheme.
21834#
21835# There are two types of calls:
21836#
21837# Type 1: $structural_indentation_level < 0
21838#  In this case we have to guess $input_tabstr to figure out the level.
21839#
21840# Type 2: $structural_indentation_level >= 0
21841#  In this case the level of this line is known, and this routine can
21842#  update the tabbing string, if still unknown, to make the level correct.
21843
21844sub find_indentation_level {
21845    my ( $line, $structural_indentation_level ) = @_;
21846
21847    # USES GLOBAL VARIABLES: $tokenizer_self
21848    my $level = 0;
21849    my $msg   = "";
21850
21851    my $know_input_tabstr = $tokenizer_self->{_know_input_tabstr};
21852    my $input_tabstr      = $tokenizer_self->{_input_tabstr};
21853
21854    # find leading whitespace
21855    my $leading_whitespace = ( $line =~ /^(\s*)/ ) ? $1 : "";
21856
21857    # make first guess at input tabbing scheme if necessary
21858    if ( $know_input_tabstr < 0 ) {
21859
21860        $know_input_tabstr = 0;
21861
21862        # When -et=n is used for the output formatting, we will assume that
21863        # tabs in the input formatting were also produced with -et=n.  This may
21864        # not be true, but it is the best guess because it will keep leading
21865        # whitespace unchanged on repeated formatting on small pieces of code
21866        # when -et=n is used.  Thanks to Sam Kington for this patch.
21867        if ( my $tabsize = $tokenizer_self->{_entab_leading_space} ) {
21868            $leading_whitespace =~ s{^ (\t*) }
21869           { " " x (length($1) * $tabsize) }xe;
21870            $input_tabstr = " " x $tokenizer_self->{_indent_columns};
21871        }
21872        elsif ( $tokenizer_self->{_tabs} ) {
21873            $input_tabstr = "\t";
21874            if ( length($leading_whitespace) > 0 ) {
21875                if ( $leading_whitespace !~ /\t/ ) {
21876
21877                    my $cols = $tokenizer_self->{_indent_columns};
21878
21879                    if ( length($leading_whitespace) < $cols ) {
21880                        $cols = length($leading_whitespace);
21881                    }
21882                    $input_tabstr = " " x $cols;
21883                }
21884            }
21885        }
21886        else {
21887            $input_tabstr = " " x $tokenizer_self->{_indent_columns};
21888
21889            if ( length($leading_whitespace) > 0 ) {
21890                if ( $leading_whitespace =~ /^\t/ ) {
21891                    $input_tabstr = "\t";
21892                }
21893            }
21894        }
21895        $tokenizer_self->{_know_input_tabstr} = $know_input_tabstr;
21896        $tokenizer_self->{_input_tabstr}      = $input_tabstr;
21897    }
21898
21899    # determine the input tabbing scheme if possible
21900    if (   ( $know_input_tabstr == 0 )
21901        && ( length($leading_whitespace) > 0 )
21902        && ( $structural_indentation_level > 0 ) )
21903    {
21904        my $saved_input_tabstr = $input_tabstr;
21905
21906        # check for common case of one tab per indentation level
21907        if ( $leading_whitespace eq "\t" x $structural_indentation_level ) {
21908            if ( $leading_whitespace eq "\t" x $structural_indentation_level ) {
21909                $input_tabstr = "\t";
21910                $msg          = "Guessing old indentation was tab character\n";
21911            }
21912        }
21913
21914        else {
21915
21916            # detab any tabs based on 8 blanks per tab
21917            my $entabbed = "";
21918            if ( $leading_whitespace =~ s/^\t+/        /g ) {
21919                $entabbed = "entabbed";
21920            }
21921
21922            # now compute tabbing from number of spaces
21923            my $columns =
21924              length($leading_whitespace) / $structural_indentation_level;
21925            if ( $columns == int $columns ) {
21926                $msg =
21927                  "Guessing old indentation was $columns $entabbed spaces\n";
21928            }
21929            else {
21930                $columns = int $columns;
21931                $msg =
21932"old indentation is unclear, using $columns $entabbed spaces\n";
21933            }
21934            $input_tabstr = " " x $columns;
21935        }
21936        $know_input_tabstr                    = 1;
21937        $tokenizer_self->{_know_input_tabstr} = $know_input_tabstr;
21938        $tokenizer_self->{_input_tabstr}      = $input_tabstr;
21939
21940        # see if mistakes were made
21941        if ( ( $tokenizer_self->{_starting_level} > 0 )
21942            && !$tokenizer_self->{_know_starting_level} )
21943        {
21944
21945            if ( $input_tabstr ne $saved_input_tabstr ) {
21946                complain(
21947"I made a bad starting level guess; rerun with a value for -sil \n"
21948                );
21949            }
21950        }
21951    }
21952
21953    # use current guess at input tabbing to get input indentation level
21954    #
21955    # Patch to handle a common case of entabbed leading whitespace
21956    # If the leading whitespace equals 4 spaces and we also have
21957    # tabs, detab the input whitespace assuming 8 spaces per tab.
21958    if ( length($input_tabstr) == 4 ) {
21959        $leading_whitespace =~ s/^\t+/        /g;
21960    }
21961
21962    if ( ( my $len_tab = length($input_tabstr) ) > 0 ) {
21963        my $pos = 0;
21964
21965        while ( substr( $leading_whitespace, $pos, $len_tab ) eq $input_tabstr )
21966        {
21967            $pos += $len_tab;
21968            $level++;
21969        }
21970    }
21971    return ( $level, $msg );
21972}
21973
21974# This is a currently unused debug routine
21975sub dump_functions {
21976
21977    my $fh = *STDOUT;
21978    my ( $pkg, $sub );
21979    foreach $pkg ( keys %is_user_function ) {
21980        print $fh "\nnon-constant subs in package $pkg\n";
21981
21982        foreach $sub ( keys %{ $is_user_function{$pkg} } ) {
21983            my $msg = "";
21984            if ( $is_block_list_function{$pkg}{$sub} ) {
21985                $msg = 'block_list';
21986            }
21987
21988            if ( $is_block_function{$pkg}{$sub} ) {
21989                $msg = 'block';
21990            }
21991            print $fh "$sub $msg\n";
21992        }
21993    }
21994
21995    foreach $pkg ( keys %is_constant ) {
21996        print $fh "\nconstants and constant subs in package $pkg\n";
21997
21998        foreach $sub ( keys %{ $is_constant{$pkg} } ) {
21999            print $fh "$sub\n";
22000        }
22001    }
22002}
22003
22004sub ones_count {
22005
22006    # count number of 1's in a string of 1's and 0's
22007    # example: ones_count("010101010101") gives 6
22008    return ( my $cis = $_[0] ) =~ tr/1/0/;
22009}
22010
22011sub prepare_for_a_new_file {
22012
22013    # previous tokens needed to determine what to expect next
22014    $last_nonblank_token      = ';';    # the only possible starting state which
22015    $last_nonblank_type       = ';';    # will make a leading brace a code block
22016    $last_nonblank_block_type = '';
22017
22018    # scalars for remembering statement types across multiple lines
22019    $statement_type    = '';            # '' or 'use' or 'sub..' or 'case..'
22020    $in_attribute_list = 0;
22021
22022    # scalars for remembering where we are in the file
22023    $current_package = "main";
22024    $context         = UNKNOWN_CONTEXT;
22025
22026    # hashes used to remember function information
22027    %is_constant             = ();      # user-defined constants
22028    %is_user_function        = ();      # user-defined functions
22029    %user_function_prototype = ();      # their prototypes
22030    %is_block_function       = ();
22031    %is_block_list_function  = ();
22032    %saw_function_definition = ();
22033
22034    # variables used to track depths of various containers
22035    # and report nesting errors
22036    $paren_depth          = 0;
22037    $brace_depth          = 0;
22038    $square_bracket_depth = 0;
22039    @current_depth[ 0 .. $#closing_brace_names ] =
22040      (0) x scalar @closing_brace_names;
22041    $total_depth = 0;
22042    @total_depth = ();
22043    @nesting_sequence_number[ 0 .. $#closing_brace_names ] =
22044      ( 0 .. $#closing_brace_names );
22045    @current_sequence_number             = ();
22046    $paren_type[$paren_depth]            = '';
22047    $paren_semicolon_count[$paren_depth] = 0;
22048    $paren_structural_type[$brace_depth] = '';
22049    $brace_type[$brace_depth] = ';';    # identify opening brace as code block
22050    $brace_structural_type[$brace_depth]                   = '';
22051    $brace_statement_type[$brace_depth]                    = "";
22052    $brace_context[$brace_depth]                           = UNKNOWN_CONTEXT;
22053    $brace_package[$paren_depth]                           = $current_package;
22054    $square_bracket_type[$square_bracket_depth]            = '';
22055    $square_bracket_structural_type[$square_bracket_depth] = '';
22056
22057    initialize_tokenizer_state();
22058}
22059
22060{                                       # begin tokenize_this_line
22061
22062    use constant BRACE          => 0;
22063    use constant SQUARE_BRACKET => 1;
22064    use constant PAREN          => 2;
22065    use constant QUESTION_COLON => 3;
22066
22067    # TV1: scalars for processing one LINE.
22068    # Re-initialized on each entry to sub tokenize_this_line.
22069    my (
22070        $block_type,        $container_type,    $expecting,
22071        $i,                 $i_tok,             $input_line,
22072        $input_line_number, $last_nonblank_i,   $max_token_index,
22073        $next_tok,          $next_type,         $peeked_ahead,
22074        $prototype,         $rhere_target_list, $rtoken_map,
22075        $rtoken_type,       $rtokens,           $tok,
22076        $type,              $type_sequence,     $indent_flag,
22077    );
22078
22079    # TV2: refs to ARRAYS for processing one LINE
22080    # Re-initialized on each call.
22081    my $routput_token_list     = [];    # stack of output token indexes
22082    my $routput_token_type     = [];    # token types
22083    my $routput_block_type     = [];    # types of code block
22084    my $routput_container_type = [];    # paren types, such as if, elsif, ..
22085    my $routput_type_sequence  = [];    # nesting sequential number
22086    my $routput_indent_flag    = [];    #
22087
22088    # TV3: SCALARS for quote variables.  These are initialized with a
22089    # subroutine call and continually updated as lines are processed.
22090    my ( $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
22091        $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, );
22092
22093    # TV4: SCALARS for multi-line identifiers and
22094    # statements. These are initialized with a subroutine call
22095    # and continually updated as lines are processed.
22096    my ( $id_scan_state, $identifier, $want_paren, $indented_if_level );
22097
22098    # TV5: SCALARS for tracking indentation level.
22099    # Initialized once and continually updated as lines are
22100    # processed.
22101    my (
22102        $nesting_token_string,      $nesting_type_string,
22103        $nesting_block_string,      $nesting_block_flag,
22104        $nesting_list_string,       $nesting_list_flag,
22105        $ci_string_in_tokenizer,    $continuation_string_in_tokenizer,
22106        $in_statement_continuation, $level_in_tokenizer,
22107        $slevel_in_tokenizer,       $rslevel_stack,
22108    );
22109
22110    # TV6: SCALARS for remembering several previous
22111    # tokens. Initialized once and continually updated as
22112    # lines are processed.
22113    my (
22114        $last_nonblank_container_type,     $last_nonblank_type_sequence,
22115        $last_last_nonblank_token,         $last_last_nonblank_type,
22116        $last_last_nonblank_block_type,    $last_last_nonblank_container_type,
22117        $last_last_nonblank_type_sequence, $last_nonblank_prototype,
22118    );
22119
22120    # ----------------------------------------------------------------
22121    # beginning of tokenizer variable access and manipulation routines
22122    # ----------------------------------------------------------------
22123
22124    sub initialize_tokenizer_state {
22125
22126        # TV1: initialized on each call
22127        # TV2: initialized on each call
22128        # TV3:
22129        $in_quote                = 0;
22130        $quote_type              = 'Q';
22131        $quote_character         = "";
22132        $quote_pos               = 0;
22133        $quote_depth             = 0;
22134        $quoted_string_1         = "";
22135        $quoted_string_2         = "";
22136        $allowed_quote_modifiers = "";
22137
22138        # TV4:
22139        $id_scan_state     = '';
22140        $identifier        = '';
22141        $want_paren        = "";
22142        $indented_if_level = 0;
22143
22144        # TV5:
22145        $nesting_token_string             = "";
22146        $nesting_type_string              = "";
22147        $nesting_block_string             = '1';    # initially in a block
22148        $nesting_block_flag               = 1;
22149        $nesting_list_string              = '0';    # initially not in a list
22150        $nesting_list_flag                = 0;      # initially not in a list
22151        $ci_string_in_tokenizer           = "";
22152        $continuation_string_in_tokenizer = "0";
22153        $in_statement_continuation        = 0;
22154        $level_in_tokenizer               = 0;
22155        $slevel_in_tokenizer              = 0;
22156        $rslevel_stack                    = [];
22157
22158        # TV6:
22159        $last_nonblank_container_type      = '';
22160        $last_nonblank_type_sequence       = '';
22161        $last_last_nonblank_token          = ';';
22162        $last_last_nonblank_type           = ';';
22163        $last_last_nonblank_block_type     = '';
22164        $last_last_nonblank_container_type = '';
22165        $last_last_nonblank_type_sequence  = '';
22166        $last_nonblank_prototype           = "";
22167    }
22168
22169    sub save_tokenizer_state {
22170
22171        my $rTV1 = [
22172            $block_type,        $container_type,    $expecting,
22173            $i,                 $i_tok,             $input_line,
22174            $input_line_number, $last_nonblank_i,   $max_token_index,
22175            $next_tok,          $next_type,         $peeked_ahead,
22176            $prototype,         $rhere_target_list, $rtoken_map,
22177            $rtoken_type,       $rtokens,           $tok,
22178            $type,              $type_sequence,     $indent_flag,
22179        ];
22180
22181        my $rTV2 = [
22182            $routput_token_list,    $routput_token_type,
22183            $routput_block_type,    $routput_container_type,
22184            $routput_type_sequence, $routput_indent_flag,
22185        ];
22186
22187        my $rTV3 = [
22188            $in_quote,        $quote_type,
22189            $quote_character, $quote_pos,
22190            $quote_depth,     $quoted_string_1,
22191            $quoted_string_2, $allowed_quote_modifiers,
22192        ];
22193
22194        my $rTV4 =
22195          [ $id_scan_state, $identifier, $want_paren, $indented_if_level ];
22196
22197        my $rTV5 = [
22198            $nesting_token_string,      $nesting_type_string,
22199            $nesting_block_string,      $nesting_block_flag,
22200            $nesting_list_string,       $nesting_list_flag,
22201            $ci_string_in_tokenizer,    $continuation_string_in_tokenizer,
22202            $in_statement_continuation, $level_in_tokenizer,
22203            $slevel_in_tokenizer,       $rslevel_stack,
22204        ];
22205
22206        my $rTV6 = [
22207            $last_nonblank_container_type,
22208            $last_nonblank_type_sequence,
22209            $last_last_nonblank_token,
22210            $last_last_nonblank_type,
22211            $last_last_nonblank_block_type,
22212            $last_last_nonblank_container_type,
22213            $last_last_nonblank_type_sequence,
22214            $last_nonblank_prototype,
22215        ];
22216        return [ $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ];
22217    }
22218
22219    sub restore_tokenizer_state {
22220        my ($rstate) = @_;
22221        my ( $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ) = @{$rstate};
22222        (
22223            $block_type,        $container_type,    $expecting,
22224            $i,                 $i_tok,             $input_line,
22225            $input_line_number, $last_nonblank_i,   $max_token_index,
22226            $next_tok,          $next_type,         $peeked_ahead,
22227            $prototype,         $rhere_target_list, $rtoken_map,
22228            $rtoken_type,       $rtokens,           $tok,
22229            $type,              $type_sequence,     $indent_flag,
22230        ) = @{$rTV1};
22231
22232        (
22233            $routput_token_list,    $routput_token_type,
22234            $routput_block_type,    $routput_container_type,
22235            $routput_type_sequence, $routput_type_sequence,
22236        ) = @{$rTV2};
22237
22238        (
22239            $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
22240            $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers,
22241        ) = @{$rTV3};
22242
22243        ( $id_scan_state, $identifier, $want_paren, $indented_if_level ) =
22244          @{$rTV4};
22245
22246        (
22247            $nesting_token_string,      $nesting_type_string,
22248            $nesting_block_string,      $nesting_block_flag,
22249            $nesting_list_string,       $nesting_list_flag,
22250            $ci_string_in_tokenizer,    $continuation_string_in_tokenizer,
22251            $in_statement_continuation, $level_in_tokenizer,
22252            $slevel_in_tokenizer,       $rslevel_stack,
22253        ) = @{$rTV5};
22254
22255        (
22256            $last_nonblank_container_type,
22257            $last_nonblank_type_sequence,
22258            $last_last_nonblank_token,
22259            $last_last_nonblank_type,
22260            $last_last_nonblank_block_type,
22261            $last_last_nonblank_container_type,
22262            $last_last_nonblank_type_sequence,
22263            $last_nonblank_prototype,
22264        ) = @{$rTV6};
22265    }
22266
22267    sub get_indentation_level {
22268
22269        # patch to avoid reporting error if indented if is not terminated
22270        if ($indented_if_level) { return $level_in_tokenizer - 1 }
22271        return $level_in_tokenizer;
22272    }
22273
22274    sub reset_indentation_level {
22275        $level_in_tokenizer  = $_[0];
22276        $slevel_in_tokenizer = $_[0];
22277        push @{$rslevel_stack}, $slevel_in_tokenizer;
22278    }
22279
22280    sub peeked_ahead {
22281        $peeked_ahead = defined( $_[0] ) ? $_[0] : $peeked_ahead;
22282    }
22283
22284    # ------------------------------------------------------------
22285    # end of tokenizer variable access and manipulation routines
22286    # ------------------------------------------------------------
22287
22288    # ------------------------------------------------------------
22289    # beginning of various scanner interface routines
22290    # ------------------------------------------------------------
22291    sub scan_replacement_text {
22292
22293        # check for here-docs in replacement text invoked by
22294        # a substitution operator with executable modifier 'e'.
22295        #
22296        # given:
22297        #  $replacement_text
22298        # return:
22299        #  $rht = reference to any here-doc targets
22300        my ($replacement_text) = @_;
22301
22302        # quick check
22303        return undef unless ( $replacement_text =~ /<</ );
22304
22305        write_logfile_entry("scanning replacement text for here-doc targets\n");
22306
22307        # save the logger object for error messages
22308        my $logger_object = $tokenizer_self->{_logger_object};
22309
22310        # localize all package variables
22311        local (
22312            $tokenizer_self,          $last_nonblank_token,
22313            $last_nonblank_type,      $last_nonblank_block_type,
22314            $statement_type,          $in_attribute_list,
22315            $current_package,         $context,
22316            %is_constant,             %is_user_function,
22317            %user_function_prototype, %is_block_function,
22318            %is_block_list_function,  %saw_function_definition,
22319            $brace_depth,             $paren_depth,
22320            $square_bracket_depth,    @current_depth,
22321            @total_depth,             $total_depth,
22322            @nesting_sequence_number, @current_sequence_number,
22323            @paren_type,              @paren_semicolon_count,
22324            @paren_structural_type,   @brace_type,
22325            @brace_structural_type,   @brace_statement_type,
22326            @brace_context,           @brace_package,
22327            @square_bracket_type,     @square_bracket_structural_type,
22328            @depth_array,             @starting_line_of_current_depth,
22329            @nested_ternary_flag,
22330        );
22331
22332        # save all lexical variables
22333        my $rstate = save_tokenizer_state();
22334        _decrement_count();    # avoid error check for multiple tokenizers
22335
22336        # make a new tokenizer
22337        my $rOpts = {};
22338        my $rpending_logfile_message;
22339        my $source_object =
22340          Perl::Tidy::LineSource->new( \$replacement_text, $rOpts,
22341            $rpending_logfile_message );
22342        my $tokenizer = Perl::Tidy::Tokenizer->new(
22343            source_object        => $source_object,
22344            logger_object        => $logger_object,
22345            starting_line_number => $input_line_number,
22346        );
22347
22348        # scan the replacement text
22349        1 while ( $tokenizer->get_line() );
22350
22351        # remove any here doc targets
22352        my $rht = undef;
22353        if ( $tokenizer_self->{_in_here_doc} ) {
22354            $rht = [];
22355            push @{$rht},
22356              [
22357                $tokenizer_self->{_here_doc_target},
22358                $tokenizer_self->{_here_quote_character}
22359              ];
22360            if ( $tokenizer_self->{_rhere_target_list} ) {
22361                push @{$rht}, @{ $tokenizer_self->{_rhere_target_list} };
22362                $tokenizer_self->{_rhere_target_list} = undef;
22363            }
22364            $tokenizer_self->{_in_here_doc} = undef;
22365        }
22366
22367        # now its safe to report errors
22368        $tokenizer->report_tokenization_errors();
22369
22370        # restore all tokenizer lexical variables
22371        restore_tokenizer_state($rstate);
22372
22373        # return the here doc targets
22374        return $rht;
22375    }
22376
22377    sub scan_bare_identifier {
22378        ( $i, $tok, $type, $prototype ) =
22379          scan_bare_identifier_do( $input_line, $i, $tok, $type, $prototype,
22380            $rtoken_map, $max_token_index );
22381    }
22382
22383    sub scan_identifier {
22384        ( $i, $tok, $type, $id_scan_state, $identifier ) =
22385          scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens,
22386            $max_token_index, $expecting );
22387    }
22388
22389    sub scan_id {
22390        ( $i, $tok, $type, $id_scan_state ) =
22391          scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map,
22392            $id_scan_state, $max_token_index );
22393    }
22394
22395    sub scan_number {
22396        my $number;
22397        ( $i, $type, $number ) =
22398          scan_number_do( $input_line, $i, $rtoken_map, $type,
22399            $max_token_index );
22400        return $number;
22401    }
22402
22403    # a sub to warn if token found where term expected
22404    sub error_if_expecting_TERM {
22405        if ( $expecting == TERM ) {
22406            if ( $really_want_term{$last_nonblank_type} ) {
22407                unexpected( $tok, "term", $i_tok, $last_nonblank_i, $rtoken_map,
22408                    $rtoken_type, $input_line );
22409                1;
22410            }
22411        }
22412    }
22413
22414    # a sub to warn if token found where operator expected
22415    sub error_if_expecting_OPERATOR {
22416        if ( $expecting == OPERATOR ) {
22417            my $thing = defined $_[0] ? $_[0] : $tok;
22418            unexpected( $thing, "operator", $i_tok, $last_nonblank_i,
22419                $rtoken_map, $rtoken_type, $input_line );
22420            if ( $i_tok == 0 ) {
22421                interrupt_logfile();
22422                warning("Missing ';' above?\n");
22423                resume_logfile();
22424            }
22425            1;
22426        }
22427    }
22428
22429    # ------------------------------------------------------------
22430    # end scanner interfaces
22431    # ------------------------------------------------------------
22432
22433    my %is_for_foreach;
22434    @_ = qw(for foreach);
22435    @is_for_foreach{@_} = (1) x scalar(@_);
22436
22437    my %is_my_our;
22438    @_ = qw(my our);
22439    @is_my_our{@_} = (1) x scalar(@_);
22440
22441    # These keywords may introduce blocks after parenthesized expressions,
22442    # in the form:
22443    # keyword ( .... ) { BLOCK }
22444    # patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when'
22445    my %is_blocktype_with_paren;
22446    @_ = qw(if elsif unless while until for foreach switch case given when);
22447    @is_blocktype_with_paren{@_} = (1) x scalar(@_);
22448
22449    # ------------------------------------------------------------
22450    # begin hash of code for handling most token types
22451    # ------------------------------------------------------------
22452    my $tokenization_code = {
22453
22454        # no special code for these types yet, but syntax checks
22455        # could be added
22456
22457##      '!'   => undef,
22458##      '!='  => undef,
22459##      '!~'  => undef,
22460##      '%='  => undef,
22461##      '&&=' => undef,
22462##      '&='  => undef,
22463##      '+='  => undef,
22464##      '-='  => undef,
22465##      '..'  => undef,
22466##      '..'  => undef,
22467##      '...' => undef,
22468##      '.='  => undef,
22469##      '<<=' => undef,
22470##      '<='  => undef,
22471##      '<=>' => undef,
22472##      '<>'  => undef,
22473##      '='   => undef,
22474##      '=='  => undef,
22475##      '=~'  => undef,
22476##      '>='  => undef,
22477##      '>>'  => undef,
22478##      '>>=' => undef,
22479##      '\\'  => undef,
22480##      '^='  => undef,
22481##      '|='  => undef,
22482##      '||=' => undef,
22483##      '//=' => undef,
22484##      '~'   => undef,
22485##      '~~'  => undef,
22486##      '!~~'  => undef,
22487
22488        '>' => sub {
22489            error_if_expecting_TERM()
22490              if ( $expecting == TERM );
22491        },
22492        '|' => sub {
22493            error_if_expecting_TERM()
22494              if ( $expecting == TERM );
22495        },
22496        '$' => sub {
22497
22498            # start looking for a scalar
22499            error_if_expecting_OPERATOR("Scalar")
22500              if ( $expecting == OPERATOR );
22501            scan_identifier();
22502
22503            if ( $identifier eq '$^W' ) {
22504                $tokenizer_self->{_saw_perl_dash_w} = 1;
22505            }
22506
22507            # Check for indentifier in indirect object slot
22508            # (vorboard.pl, sort.t).  Something like:
22509            #   /^(print|printf|sort|exec|system)$/
22510            if (
22511                $is_indirect_object_taker{$last_nonblank_token}
22512
22513                || ( ( $last_nonblank_token eq '(' )
22514                    && $is_indirect_object_taker{ $paren_type[$paren_depth] } )
22515                || ( $last_nonblank_type =~ /^[Uw]$/ )    # possible object
22516              )
22517            {
22518                $type = 'Z';
22519            }
22520        },
22521        '(' => sub {
22522
22523            ++$paren_depth;
22524            $paren_semicolon_count[$paren_depth] = 0;
22525            if ($want_paren) {
22526                $container_type = $want_paren;
22527                $want_paren     = "";
22528            }
22529            else {
22530                $container_type = $last_nonblank_token;
22531
22532                # We can check for a syntax error here of unexpected '(',
22533                # but this is going to get messy...
22534                if (
22535                    $expecting == OPERATOR
22536
22537                    # be sure this is not a method call of the form
22538                    # &method(...), $method->(..), &{method}(...),
22539                    # $ref[2](list) is ok & short for $ref[2]->(list)
22540                    # NOTE: at present, braces in something like &{ xxx }
22541                    # are not marked as a block, we might have a method call
22542                    && $last_nonblank_token !~ /^([\]\}\&]|\-\>)/
22543
22544                  )
22545                {
22546
22547                    # ref: camel 3 p 703.
22548                    if ( $last_last_nonblank_token eq 'do' ) {
22549                        complain(
22550"do SUBROUTINE is deprecated; consider & or -> notation\n"
22551                        );
22552                    }
22553                    else {
22554
22555                        # if this is an empty list, (), then it is not an
22556                        # error; for example, we might have a constant pi and
22557                        # invoke it with pi() or just pi;
22558                        my ( $next_nonblank_token, $i_next ) =
22559                          find_next_nonblank_token( $i, $rtokens,
22560                            $max_token_index );
22561                        if ( $next_nonblank_token ne ')' ) {
22562                            my $hint;
22563                            error_if_expecting_OPERATOR('(');
22564
22565                            if ( $last_nonblank_type eq 'C' ) {
22566                                $hint =
22567                                  "$last_nonblank_token has a void prototype\n";
22568                            }
22569                            elsif ( $last_nonblank_type eq 'i' ) {
22570                                if (   $i_tok > 0
22571                                    && $last_nonblank_token =~ /^\$/ )
22572                                {
22573                                    $hint =
22574"Do you mean '$last_nonblank_token->(' ?\n";
22575                                }
22576                            }
22577                            if ($hint) {
22578                                interrupt_logfile();
22579                                warning($hint);
22580                                resume_logfile();
22581                            }
22582                        } ## end if ( $next_nonblank_token...
22583                    } ## end else [ if ( $last_last_nonblank_token...
22584                } ## end if ( $expecting == OPERATOR...
22585            }
22586            $paren_type[$paren_depth] = $container_type;
22587            ( $type_sequence, $indent_flag ) =
22588              increase_nesting_depth( PAREN, $$rtoken_map[$i_tok] );
22589
22590            # propagate types down through nested parens
22591            # for example: the second paren in 'if ((' would be structural
22592            # since the first is.
22593
22594            if ( $last_nonblank_token eq '(' ) {
22595                $type = $last_nonblank_type;
22596            }
22597
22598            #     We exclude parens as structural after a ',' because it
22599            #     causes subtle problems with continuation indentation for
22600            #     something like this, where the first 'or' will not get
22601            #     indented.
22602            #
22603            #         assert(
22604            #             __LINE__,
22605            #             ( not defined $check )
22606            #               or ref $check
22607            #               or $check eq "new"
22608            #               or $check eq "old",
22609            #         );
22610            #
22611            #     Likewise, we exclude parens where a statement can start
22612            #     because of problems with continuation indentation, like
22613            #     these:
22614            #
22615            #         ($firstline =~ /^#\!.*perl/)
22616            #         and (print $File::Find::name, "\n")
22617            #           and (return 1);
22618            #
22619            #         (ref($usage_fref) =~ /CODE/)
22620            #         ? &$usage_fref
22621            #           : (&blast_usage, &blast_params, &blast_general_params);
22622
22623            else {
22624                $type = '{';
22625            }
22626
22627            if ( $last_nonblank_type eq ')' ) {
22628                warning(
22629                    "Syntax error? found token '$last_nonblank_type' then '('\n"
22630                );
22631            }
22632            $paren_structural_type[$paren_depth] = $type;
22633
22634        },
22635        ')' => sub {
22636            ( $type_sequence, $indent_flag ) =
22637              decrease_nesting_depth( PAREN, $$rtoken_map[$i_tok] );
22638
22639            if ( $paren_structural_type[$paren_depth] eq '{' ) {
22640                $type = '}';
22641            }
22642
22643            $container_type = $paren_type[$paren_depth];
22644
22645            #    /^(for|foreach)$/
22646            if ( $is_for_foreach{ $paren_type[$paren_depth] } ) {
22647                my $num_sc = $paren_semicolon_count[$paren_depth];
22648                if ( $num_sc > 0 && $num_sc != 2 ) {
22649                    warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n");
22650                }
22651            }
22652
22653            if ( $paren_depth > 0 ) { $paren_depth-- }
22654        },
22655        ',' => sub {
22656            if ( $last_nonblank_type eq ',' ) {
22657                complain("Repeated ','s \n");
22658            }
22659
22660            # patch for operator_expected: note if we are in the list (use.t)
22661            if ( $statement_type eq 'use' ) { $statement_type = '_use' }
22662##                FIXME: need to move this elsewhere, perhaps check after a '('
22663##                elsif ($last_nonblank_token eq '(') {
22664##                    warning("Leading ','s illegal in some versions of perl\n");
22665##                }
22666        },
22667        ';' => sub {
22668            $context        = UNKNOWN_CONTEXT;
22669            $statement_type = '';
22670
22671            #    /^(for|foreach)$/
22672            if ( $is_for_foreach{ $paren_type[$paren_depth] } )
22673            {    # mark ; in for loop
22674
22675                # Be careful: we do not want a semicolon such as the
22676                # following to be included:
22677                #
22678                #    for (sort {strcoll($a,$b);} keys %investments) {
22679
22680                if (   $brace_depth == $depth_array[PAREN][BRACE][$paren_depth]
22681                    && $square_bracket_depth ==
22682                    $depth_array[PAREN][SQUARE_BRACKET][$paren_depth] )
22683                {
22684
22685                    $type = 'f';
22686                    $paren_semicolon_count[$paren_depth]++;
22687                }
22688            }
22689
22690        },
22691        '"' => sub {
22692            error_if_expecting_OPERATOR("String")
22693              if ( $expecting == OPERATOR );
22694            $in_quote                = 1;
22695            $type                    = 'Q';
22696            $allowed_quote_modifiers = "";
22697        },
22698        "'" => sub {
22699            error_if_expecting_OPERATOR("String")
22700              if ( $expecting == OPERATOR );
22701            $in_quote                = 1;
22702            $type                    = 'Q';
22703            $allowed_quote_modifiers = "";
22704        },
22705        '`' => sub {
22706            error_if_expecting_OPERATOR("String")
22707              if ( $expecting == OPERATOR );
22708            $in_quote                = 1;
22709            $type                    = 'Q';
22710            $allowed_quote_modifiers = "";
22711        },
22712        '/' => sub {
22713            my $is_pattern;
22714
22715            if ( $expecting == UNKNOWN ) {    # indeterminte, must guess..
22716                my $msg;
22717                ( $is_pattern, $msg ) =
22718                  guess_if_pattern_or_division( $i, $rtokens, $rtoken_map,
22719                    $max_token_index );
22720
22721                if ($msg) {
22722                    write_diagnostics("DIVIDE:$msg\n");
22723                    write_logfile_entry($msg);
22724                }
22725            }
22726            else { $is_pattern = ( $expecting == TERM ) }
22727
22728            if ($is_pattern) {
22729                $in_quote                = 1;
22730                $type                    = 'Q';
22731                $allowed_quote_modifiers = '[cgimosxp]';
22732            }
22733            else {    # not a pattern; check for a /= token
22734
22735                if ( $$rtokens[ $i + 1 ] eq '=' ) {    # form token /=
22736                    $i++;
22737                    $tok  = '/=';
22738                    $type = $tok;
22739                }
22740
22741              #DEBUG - collecting info on what tokens follow a divide
22742              # for development of guessing algorithm
22743              #if ( numerator_expected( $i, $rtokens, $max_token_index ) < 0 ) {
22744              #    #write_diagnostics( "DIVIDE? $input_line\n" );
22745              #}
22746            }
22747        },
22748        '{' => sub {
22749
22750            # if we just saw a ')', we will label this block with
22751            # its type.  We need to do this to allow sub
22752            # code_block_type to determine if this brace starts a
22753            # code block or anonymous hash.  (The type of a paren
22754            # pair is the preceding token, such as 'if', 'else',
22755            # etc).
22756            $container_type = "";
22757
22758            # ATTRS: for a '{' following an attribute list, reset
22759            # things to look like we just saw the sub name
22760            if ( $statement_type =~ /^sub/ ) {
22761                $last_nonblank_token = $statement_type;
22762                $last_nonblank_type  = 'i';
22763                $statement_type      = "";
22764            }
22765
22766            # patch for SWITCH/CASE: hide these keywords from an immediately
22767            # following opening brace
22768            elsif ( ( $statement_type eq 'case' || $statement_type eq 'when' )
22769                && $statement_type eq $last_nonblank_token )
22770            {
22771                $last_nonblank_token = ";";
22772            }
22773
22774            elsif ( $last_nonblank_token eq ')' ) {
22775                $last_nonblank_token = $paren_type[ $paren_depth + 1 ];
22776
22777                # defensive move in case of a nesting error (pbug.t)
22778                # in which this ')' had no previous '('
22779                # this nesting error will have been caught
22780                if ( !defined($last_nonblank_token) ) {
22781                    $last_nonblank_token = 'if';
22782                }
22783
22784                # check for syntax error here;
22785                unless ( $is_blocktype_with_paren{$last_nonblank_token} ) {
22786                    my $list = join( ' ', sort keys %is_blocktype_with_paren );
22787                    warning(
22788                        "syntax error at ') {', didn't see one of: $list\n");
22789                }
22790            }
22791
22792            # patch for paren-less for/foreach glitch, part 2.
22793            # see note below under 'qw'
22794            elsif ($last_nonblank_token eq 'qw'
22795                && $is_for_foreach{$want_paren} )
22796            {
22797                $last_nonblank_token = $want_paren;
22798                if ( $last_last_nonblank_token eq $want_paren ) {
22799                    warning(
22800"syntax error at '$want_paren .. {' -- missing \$ loop variable\n"
22801                    );
22802
22803                }
22804                $want_paren = "";
22805            }
22806
22807            # now identify which of the three possible types of
22808            # curly braces we have: hash index container, anonymous
22809            # hash reference, or code block.
22810
22811            # non-structural (hash index) curly brace pair
22812            # get marked 'L' and 'R'
22813            if ( is_non_structural_brace() ) {
22814                $type = 'L';
22815
22816                # patch for SWITCH/CASE:
22817                # allow paren-less identifier after 'when'
22818                # if the brace is preceded by a space
22819                if (   $statement_type eq 'when'
22820                    && $last_nonblank_type      eq 'i'
22821                    && $last_last_nonblank_type eq 'k'
22822                    && ( $i_tok == 0 || $rtoken_type->[ $i_tok - 1 ] eq 'b' ) )
22823                {
22824                    $type       = '{';
22825                    $block_type = $statement_type;
22826                }
22827            }
22828
22829            # code and anonymous hash have the same type, '{', but are
22830            # distinguished by 'block_type',
22831            # which will be blank for an anonymous hash
22832            else {
22833
22834                $block_type = code_block_type( $i_tok, $rtokens, $rtoken_type,
22835                    $max_token_index );
22836
22837                # patch to promote bareword type to function taking block
22838                if (   $block_type
22839                    && $last_nonblank_type eq 'w'
22840                    && $last_nonblank_i >= 0 )
22841                {
22842                    if ( $routput_token_type->[$last_nonblank_i] eq 'w' ) {
22843                        $routput_token_type->[$last_nonblank_i] = 'G';
22844                    }
22845                }
22846
22847                # patch for SWITCH/CASE: if we find a stray opening block brace
22848                # where we might accept a 'case' or 'when' block, then take it
22849                if (   $statement_type eq 'case'
22850                    || $statement_type eq 'when' )
22851                {
22852                    if ( !$block_type || $block_type eq '}' ) {
22853                        $block_type = $statement_type;
22854                    }
22855                }
22856            }
22857            $brace_type[ ++$brace_depth ] = $block_type;
22858            $brace_package[$brace_depth] = $current_package;
22859            ( $type_sequence, $indent_flag ) =
22860              increase_nesting_depth( BRACE, $$rtoken_map[$i_tok] );
22861            $brace_structural_type[$brace_depth] = $type;
22862            $brace_context[$brace_depth]         = $context;
22863            $brace_statement_type[$brace_depth]  = $statement_type;
22864        },
22865        '}' => sub {
22866            $block_type = $brace_type[$brace_depth];
22867            if ($block_type) { $statement_type = '' }
22868            if ( defined( $brace_package[$brace_depth] ) ) {
22869                $current_package = $brace_package[$brace_depth];
22870            }
22871
22872            # can happen on brace error (caught elsewhere)
22873            else {
22874            }
22875            ( $type_sequence, $indent_flag ) =
22876              decrease_nesting_depth( BRACE, $$rtoken_map[$i_tok] );
22877
22878            if ( $brace_structural_type[$brace_depth] eq 'L' ) {
22879                $type = 'R';
22880            }
22881
22882            # propagate type information for 'do' and 'eval' blocks.
22883            # This is necessary to enable us to know if an operator
22884            # or term is expected next
22885            if ( $is_block_operator{ $brace_type[$brace_depth] } ) {
22886                $tok = $brace_type[$brace_depth];
22887            }
22888
22889            $context        = $brace_context[$brace_depth];
22890            $statement_type = $brace_statement_type[$brace_depth];
22891            if ( $brace_depth > 0 ) { $brace_depth--; }
22892        },
22893        '&' => sub {    # maybe sub call? start looking
22894
22895            # We have to check for sub call unless we are sure we
22896            # are expecting an operator.  This example from s2p
22897            # got mistaken as a q operator in an early version:
22898            #   print BODY &q(<<'EOT');
22899            if ( $expecting != OPERATOR ) {
22900                scan_identifier();
22901            }
22902            else {
22903            }
22904        },
22905        '<' => sub {    # angle operator or less than?
22906
22907            if ( $expecting != OPERATOR ) {
22908                ( $i, $type ) =
22909                  find_angle_operator_termination( $input_line, $i, $rtoken_map,
22910                    $expecting, $max_token_index );
22911
22912                if ( $type eq '<' && $expecting == TERM ) {
22913                    error_if_expecting_TERM();
22914                    interrupt_logfile();
22915                    warning("Unterminated <> operator?\n");
22916                    resume_logfile();
22917                }
22918            }
22919            else {
22920            }
22921        },
22922        '?' => sub {    # ?: conditional or starting pattern?
22923
22924            my $is_pattern;
22925
22926            if ( $expecting == UNKNOWN ) {
22927
22928                my $msg;
22929                ( $is_pattern, $msg ) =
22930                  guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map,
22931                    $max_token_index );
22932
22933                if ($msg) { write_logfile_entry($msg) }
22934            }
22935            else { $is_pattern = ( $expecting == TERM ) }
22936
22937            if ($is_pattern) {
22938                $in_quote                = 1;
22939                $type                    = 'Q';
22940                $allowed_quote_modifiers = '[cgimosxp]';
22941            }
22942            else {
22943                ( $type_sequence, $indent_flag ) =
22944                  increase_nesting_depth( QUESTION_COLON,
22945                    $$rtoken_map[$i_tok] );
22946            }
22947        },
22948        '*' => sub {    # typeglob, or multiply?
22949
22950            if ( $expecting == TERM ) {
22951                scan_identifier();
22952            }
22953            else {
22954
22955                if ( $$rtokens[ $i + 1 ] eq '=' ) {
22956                    $tok  = '*=';
22957                    $type = $tok;
22958                    $i++;
22959                }
22960                elsif ( $$rtokens[ $i + 1 ] eq '*' ) {
22961                    $tok  = '**';
22962                    $type = $tok;
22963                    $i++;
22964                    if ( $$rtokens[ $i + 1 ] eq '=' ) {
22965                        $tok  = '**=';
22966                        $type = $tok;
22967                        $i++;
22968                    }
22969                }
22970            }
22971        },
22972        '.' => sub {    # what kind of . ?
22973
22974            if ( $expecting != OPERATOR ) {
22975                scan_number();
22976                if ( $type eq '.' ) {
22977                    error_if_expecting_TERM()
22978                      if ( $expecting == TERM );
22979                }
22980            }
22981            else {
22982            }
22983        },
22984        ':' => sub {
22985
22986            # if this is the first nonblank character, call it a label
22987            # since perl seems to just swallow it
22988            if ( $input_line_number == 1 && $last_nonblank_i == -1 ) {
22989                $type = 'J';
22990            }
22991
22992            # ATTRS: check for a ':' which introduces an attribute list
22993            # (this might eventually get its own token type)
22994            elsif ( $statement_type =~ /^sub/ ) {
22995                $type              = 'A';
22996                $in_attribute_list = 1;
22997            }
22998
22999            # check for scalar attribute, such as
23000            # my $foo : shared = 1;
23001            elsif ($is_my_our{$statement_type}
23002                && $current_depth[QUESTION_COLON] == 0 )
23003            {
23004                $type              = 'A';
23005                $in_attribute_list = 1;
23006            }
23007
23008            # otherwise, it should be part of a ?/: operator
23009            else {
23010                ( $type_sequence, $indent_flag ) =
23011                  decrease_nesting_depth( QUESTION_COLON,
23012                    $$rtoken_map[$i_tok] );
23013                if ( $last_nonblank_token eq '?' ) {
23014                    warning("Syntax error near ? :\n");
23015                }
23016            }
23017        },
23018        '+' => sub {    # what kind of plus?
23019
23020            if ( $expecting == TERM ) {
23021                my $number = scan_number();
23022
23023                # unary plus is safest assumption if not a number
23024                if ( !defined($number) ) { $type = 'p'; }
23025            }
23026            elsif ( $expecting == OPERATOR ) {
23027            }
23028            else {
23029                if ( $next_type eq 'w' ) { $type = 'p' }
23030            }
23031        },
23032        '@' => sub {
23033
23034            error_if_expecting_OPERATOR("Array")
23035              if ( $expecting == OPERATOR );
23036            scan_identifier();
23037        },
23038        '%' => sub {    # hash or modulo?
23039
23040            # first guess is hash if no following blank
23041            if ( $expecting == UNKNOWN ) {
23042                if ( $next_type ne 'b' ) { $expecting = TERM }
23043            }
23044            if ( $expecting == TERM ) {
23045                scan_identifier();
23046            }
23047        },
23048        '[' => sub {
23049            $square_bracket_type[ ++$square_bracket_depth ] =
23050              $last_nonblank_token;
23051            ( $type_sequence, $indent_flag ) =
23052              increase_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );
23053
23054            # It may seem odd, but structural square brackets have
23055            # type '{' and '}'.  This simplifies the indentation logic.
23056            if ( !is_non_structural_brace() ) {
23057                $type = '{';
23058            }
23059            $square_bracket_structural_type[$square_bracket_depth] = $type;
23060        },
23061        ']' => sub {
23062            ( $type_sequence, $indent_flag ) =
23063              decrease_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );
23064
23065            if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' )
23066            {
23067                $type = '}';
23068            }
23069            if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; }
23070        },
23071        '-' => sub {    # what kind of minus?
23072
23073            if ( ( $expecting != OPERATOR )
23074                && $is_file_test_operator{$next_tok} )
23075            {
23076                my ( $next_nonblank_token, $i_next ) =
23077                  find_next_nonblank_token( $i + 1, $rtokens,
23078                    $max_token_index );
23079
23080                # check for a quoted word like "-w=>xx";
23081                # it is sufficient to just check for a following '='
23082                if ( $next_nonblank_token eq '=' ) {
23083                    $type = 'm';
23084                }
23085                else {
23086                    $i++;
23087                    $tok .= $next_tok;
23088                    $type = 'F';
23089                }
23090            }
23091            elsif ( $expecting == TERM ) {
23092                my $number = scan_number();
23093
23094                # maybe part of bareword token? unary is safest
23095                if ( !defined($number) ) { $type = 'm'; }
23096
23097            }
23098            elsif ( $expecting == OPERATOR ) {
23099            }
23100            else {
23101
23102                if ( $next_type eq 'w' ) {
23103                    $type = 'm';
23104                }
23105            }
23106        },
23107
23108        '^' => sub {
23109
23110            # check for special variables like ${^WARNING_BITS}
23111            if ( $expecting == TERM ) {
23112
23113                # FIXME: this should work but will not catch errors
23114                # because we also have to be sure that previous token is
23115                # a type character ($,@,%).
23116                if ( $last_nonblank_token eq '{'
23117                    && ( $next_tok =~ /^[A-Za-z_]/ ) )
23118                {
23119
23120                    if ( $next_tok eq 'W' ) {
23121                        $tokenizer_self->{_saw_perl_dash_w} = 1;
23122                    }
23123                    $tok  = $tok . $next_tok;
23124                    $i    = $i + 1;
23125                    $type = 'w';
23126                }
23127
23128                else {
23129                    unless ( error_if_expecting_TERM() ) {
23130
23131                        # Something like this is valid but strange:
23132                        # undef ^I;
23133                        complain("The '^' seems unusual here\n");
23134                    }
23135                }
23136            }
23137        },
23138
23139        '::' => sub {    # probably a sub call
23140            scan_bare_identifier();
23141        },
23142        '<<' => sub {    # maybe a here-doc?
23143            return
23144              unless ( $i < $max_token_index )
23145              ;          # here-doc not possible if end of line
23146
23147            if ( $expecting != OPERATOR ) {
23148                my ( $found_target, $here_doc_target, $here_quote_character,
23149                    $saw_error );
23150                (
23151                    $found_target, $here_doc_target, $here_quote_character, $i,
23152                    $saw_error
23153                  )
23154                  = find_here_doc( $expecting, $i, $rtokens, $rtoken_map,
23155                    $max_token_index );
23156
23157                if ($found_target) {
23158                    push @{$rhere_target_list},
23159                      [ $here_doc_target, $here_quote_character ];
23160                    $type = 'h';
23161                    if ( length($here_doc_target) > 80 ) {
23162                        my $truncated = substr( $here_doc_target, 0, 80 );
23163                        complain("Long here-target: '$truncated' ...\n");
23164                    }
23165                    elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) {
23166                        complain(
23167                            "Unconventional here-target: '$here_doc_target'\n"
23168                        );
23169                    }
23170                }
23171                elsif ( $expecting == TERM ) {
23172                    unless ($saw_error) {
23173
23174                        # shouldn't happen..
23175                        warning("Program bug; didn't find here doc target\n");
23176                        report_definite_bug();
23177                    }
23178                }
23179            }
23180            else {
23181            }
23182        },
23183        '->' => sub {
23184
23185            # if -> points to a bare word, we must scan for an identifier,
23186            # otherwise something like ->y would look like the y operator
23187            scan_identifier();
23188        },
23189
23190        # type = 'pp' for pre-increment, '++' for post-increment
23191        '++' => sub {
23192            if ( $expecting == TERM ) { $type = 'pp' }
23193            elsif ( $expecting == UNKNOWN ) {
23194                my ( $next_nonblank_token, $i_next ) =
23195                  find_next_nonblank_token( $i, $rtokens, $max_token_index );
23196                if ( $next_nonblank_token eq '$' ) { $type = 'pp' }
23197            }
23198        },
23199
23200        '=>' => sub {
23201            if ( $last_nonblank_type eq $tok ) {
23202                complain("Repeated '=>'s \n");
23203            }
23204
23205            # patch for operator_expected: note if we are in the list (use.t)
23206            # TODO: make version numbers a new token type
23207            if ( $statement_type eq 'use' ) { $statement_type = '_use' }
23208        },
23209
23210        # type = 'mm' for pre-decrement, '--' for post-decrement
23211        '--' => sub {
23212
23213            if ( $expecting == TERM ) { $type = 'mm' }
23214            elsif ( $expecting == UNKNOWN ) {
23215                my ( $next_nonblank_token, $i_next ) =
23216                  find_next_nonblank_token( $i, $rtokens, $max_token_index );
23217                if ( $next_nonblank_token eq '$' ) { $type = 'mm' }
23218            }
23219        },
23220
23221        '&&' => sub {
23222            error_if_expecting_TERM()
23223              if ( $expecting == TERM );
23224        },
23225
23226        '||' => sub {
23227            error_if_expecting_TERM()
23228              if ( $expecting == TERM );
23229        },
23230
23231        '//' => sub {
23232            error_if_expecting_TERM()
23233              if ( $expecting == TERM );
23234        },
23235    };
23236
23237    # ------------------------------------------------------------
23238    # end hash of code for handling individual token types
23239    # ------------------------------------------------------------
23240
23241    my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' );
23242
23243    # These block types terminate statements and do not need a trailing
23244    # semicolon
23245    # patched for SWITCH/CASE/
23246    my %is_zero_continuation_block_type;
23247    @_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ;
23248      if elsif else unless while until for foreach switch case given when);
23249    @is_zero_continuation_block_type{@_} = (1) x scalar(@_);
23250
23251    my %is_not_zero_continuation_block_type;
23252    @_ = qw(sort grep map do eval);
23253    @is_not_zero_continuation_block_type{@_} = (1) x scalar(@_);
23254
23255    my %is_logical_container;
23256    @_ = qw(if elsif unless while and or err not && !  || for foreach);
23257    @is_logical_container{@_} = (1) x scalar(@_);
23258
23259    my %is_binary_type;
23260    @_ = qw(|| &&);
23261    @is_binary_type{@_} = (1) x scalar(@_);
23262
23263    my %is_binary_keyword;
23264    @_ = qw(and or err eq ne cmp);
23265    @is_binary_keyword{@_} = (1) x scalar(@_);
23266
23267    # 'L' is token for opening { at hash key
23268    my %is_opening_type;
23269    @_ = qw" L { ( [ ";
23270    @is_opening_type{@_} = (1) x scalar(@_);
23271
23272    # 'R' is token for closing } at hash key
23273    my %is_closing_type;
23274    @_ = qw" R } ) ] ";
23275    @is_closing_type{@_} = (1) x scalar(@_);
23276
23277    my %is_redo_last_next_goto;
23278    @_ = qw(redo last next goto);
23279    @is_redo_last_next_goto{@_} = (1) x scalar(@_);
23280
23281    my %is_use_require;
23282    @_ = qw(use require);
23283    @is_use_require{@_} = (1) x scalar(@_);
23284
23285    my %is_sub_package;
23286    @_ = qw(sub package);
23287    @is_sub_package{@_} = (1) x scalar(@_);
23288
23289    # This hash holds the hash key in $tokenizer_self for these keywords:
23290    my %is_format_END_DATA = (
23291        'format'   => '_in_format',
23292        '__END__'  => '_in_end',
23293        '__DATA__' => '_in_data',
23294    );
23295
23296    # ref: camel 3 p 147,
23297    # but perl may accept undocumented flags
23298    # perl 5.10 adds 'p' (preserve)
23299    my %quote_modifiers = (
23300        's'  => '[cegimosxp]',
23301        'y'  => '[cds]',
23302        'tr' => '[cds]',
23303        'm'  => '[cgimosxp]',
23304        'qr' => '[imosxp]',
23305        'q'  => "",
23306        'qq' => "",
23307        'qw' => "",
23308        'qx' => "",
23309    );
23310
23311    # table showing how many quoted things to look for after quote operator..
23312    # s, y, tr have 2 (pattern and replacement)
23313    # others have 1 (pattern only)
23314    my %quote_items = (
23315        's'  => 2,
23316        'y'  => 2,
23317        'tr' => 2,
23318        'm'  => 1,
23319        'qr' => 1,
23320        'q'  => 1,
23321        'qq' => 1,
23322        'qw' => 1,
23323        'qx' => 1,
23324    );
23325
23326    sub tokenize_this_line {
23327
23328  # This routine breaks a line of perl code into tokens which are of use in
23329  # indentation and reformatting.  One of my goals has been to define tokens
23330  # such that a newline may be inserted between any pair of tokens without
23331  # changing or invalidating the program. This version comes close to this,
23332  # although there are necessarily a few exceptions which must be caught by
23333  # the formatter.  Many of these involve the treatment of bare words.
23334  #
23335  # The tokens and their types are returned in arrays.  See previous
23336  # routine for their names.
23337  #
23338  # See also the array "valid_token_types" in the BEGIN section for an
23339  # up-to-date list.
23340  #
23341  # To simplify things, token types are either a single character, or they
23342  # are identical to the tokens themselves.
23343  #
23344  # As a debugging aid, the -D flag creates a file containing a side-by-side
23345  # comparison of the input string and its tokenization for each line of a file.
23346  # This is an invaluable debugging aid.
23347  #
23348  # In addition to tokens, and some associated quantities, the tokenizer
23349  # also returns flags indication any special line types.  These include
23350  # quotes, here_docs, formats.
23351  #
23352  # -----------------------------------------------------------------------
23353  #
23354  # How to add NEW_TOKENS:
23355  #
23356  # New token types will undoubtedly be needed in the future both to keep up
23357  # with changes in perl and to help adapt the tokenizer to other applications.
23358  #
23359  # Here are some notes on the minimal steps.  I wrote these notes while
23360  # adding the 'v' token type for v-strings, which are things like version
23361  # numbers 5.6.0, and ip addresses, and will use that as an example.  ( You
23362  # can use your editor to search for the string "NEW_TOKENS" to find the
23363  # appropriate sections to change):
23364  #
23365  # *. Try to talk somebody else into doing it!  If not, ..
23366  #
23367  # *. Make a backup of your current version in case things don't work out!
23368  #
23369  # *. Think of a new, unused character for the token type, and add to
23370  # the array @valid_token_types in the BEGIN section of this package.
23371  # For example, I used 'v' for v-strings.
23372  #
23373  # *. Implement coding to recognize the $type of the token in this routine.
23374  # This is the hardest part, and is best done by immitating or modifying
23375  # some of the existing coding.  For example, to recognize v-strings, I
23376  # patched 'sub scan_bare_identifier' to recognize v-strings beginning with
23377  # 'v' and 'sub scan_number' to recognize v-strings without the leading 'v'.
23378  #
23379  # *. Update sub operator_expected.  This update is critically important but
23380  # the coding is trivial.  Look at the comments in that routine for help.
23381  # For v-strings, which should behave like numbers, I just added 'v' to the
23382  # regex used to handle numbers and strings (types 'n' and 'Q').
23383  #
23384  # *. Implement a 'bond strength' rule in sub set_bond_strengths in
23385  # Perl::Tidy::Formatter for breaking lines around this token type.  You can
23386  # skip this step and take the default at first, then adjust later to get
23387  # desired results.  For adding type 'v', I looked at sub bond_strength and
23388  # saw that number type 'n' was using default strengths, so I didn't do
23389  # anything.  I may tune it up someday if I don't like the way line
23390  # breaks with v-strings look.
23391  #
23392  # *. Implement a 'whitespace' rule in sub set_white_space_flag in
23393  # Perl::Tidy::Formatter.  For adding type 'v', I looked at this routine
23394  # and saw that type 'n' used spaces on both sides, so I just added 'v'
23395  # to the array @spaces_both_sides.
23396  #
23397  # *. Update HtmlWriter package so that users can colorize the token as
23398  # desired.  This is quite easy; see comments identified by 'NEW_TOKENS' in
23399  # that package.  For v-strings, I initially chose to use a default color
23400  # equal to the default for numbers, but it might be nice to change that
23401  # eventually.
23402  #
23403  # *. Update comments in Perl::Tidy::Tokenizer::dump_token_types.
23404  #
23405  # *. Run lots and lots of debug tests.  Start with special files designed
23406  # to test the new token type.  Run with the -D flag to create a .DEBUG
23407  # file which shows the tokenization.  When these work ok, test as many old
23408  # scripts as possible.  Start with all of the '.t' files in the 'test'
23409  # directory of the distribution file.  Compare .tdy output with previous
23410  # version and updated version to see the differences.  Then include as
23411  # many more files as possible. My own technique has been to collect a huge
23412  # number of perl scripts (thousands!) into one directory and run perltidy
23413  # *, then run diff between the output of the previous version and the
23414  # current version.
23415  #
23416  # *. For another example, search for the smartmatch operator '~~'
23417  # with your editor to see where updates were made for it.
23418  #
23419  # -----------------------------------------------------------------------
23420
23421        my $line_of_tokens = shift;
23422        my ($untrimmed_input_line) = $line_of_tokens->{_line_text};
23423
23424        # patch while coding change is underway
23425        # make callers private data to allow access
23426        # $tokenizer_self = $caller_tokenizer_self;
23427
23428        # extract line number for use in error messages
23429        $input_line_number = $line_of_tokens->{_line_number};
23430
23431        # reinitialize for multi-line quote
23432        $line_of_tokens->{_starting_in_quote} = $in_quote && $quote_type eq 'Q';
23433
23434        # check for pod documentation
23435        if ( ( $untrimmed_input_line =~ /^=[A-Za-z_]/ ) ) {
23436
23437            # must not be in multi-line quote
23438            # and must not be in an eqn
23439            if ( !$in_quote and ( operator_expected( 'b', '=', 'b' ) == TERM ) )
23440            {
23441                $tokenizer_self->{_in_pod} = 1;
23442                return;
23443            }
23444        }
23445
23446        $input_line = $untrimmed_input_line;
23447
23448        chomp $input_line;
23449
23450        # trim start of this line unless we are continuing a quoted line
23451        # do not trim end because we might end in a quote (test: deken4.pl)
23452        # Perl::Tidy::Formatter will delete needless trailing blanks
23453        unless ( $in_quote && ( $quote_type eq 'Q' ) ) {
23454            $input_line =~ s/^\s*//;    # trim left end
23455        }
23456
23457        # update the copy of the line for use in error messages
23458        # This must be exactly what we give the pre_tokenizer
23459        $tokenizer_self->{_line_text} = $input_line;
23460
23461        # re-initialize for the main loop
23462        $routput_token_list     = [];    # stack of output token indexes
23463        $routput_token_type     = [];    # token types
23464        $routput_block_type     = [];    # types of code block
23465        $routput_container_type = [];    # paren types, such as if, elsif, ..
23466        $routput_type_sequence  = [];    # nesting sequential number
23467
23468        $rhere_target_list = [];
23469
23470        $tok             = $last_nonblank_token;
23471        $type            = $last_nonblank_type;
23472        $prototype       = $last_nonblank_prototype;
23473        $last_nonblank_i = -1;
23474        $block_type      = $last_nonblank_block_type;
23475        $container_type  = $last_nonblank_container_type;
23476        $type_sequence   = $last_nonblank_type_sequence;
23477        $indent_flag     = 0;
23478        $peeked_ahead    = 0;
23479
23480        # tokenization is done in two stages..
23481        # stage 1 is a very simple pre-tokenization
23482        my $max_tokens_wanted = 0; # this signals pre_tokenize to get all tokens
23483
23484        # a little optimization for a full-line comment
23485        if ( !$in_quote && ( $input_line =~ /^#/ ) ) {
23486            $max_tokens_wanted = 1    # no use tokenizing a comment
23487        }
23488
23489        # start by breaking the line into pre-tokens
23490        ( $rtokens, $rtoken_map, $rtoken_type ) =
23491          pre_tokenize( $input_line, $max_tokens_wanted );
23492
23493        $max_token_index = scalar(@$rtokens) - 1;
23494        push( @$rtokens,    ' ', ' ', ' ' ); # extra whitespace simplifies logic
23495        push( @$rtoken_map, 0,   0,   0 );   # shouldn't be referenced
23496        push( @$rtoken_type, 'b', 'b', 'b' );
23497
23498        # initialize for main loop
23499        for $i ( 0 .. $max_token_index + 3 ) {
23500            $routput_token_type->[$i]     = "";
23501            $routput_block_type->[$i]     = "";
23502            $routput_container_type->[$i] = "";
23503            $routput_type_sequence->[$i]  = "";
23504            $routput_indent_flag->[$i]    = 0;
23505        }
23506        $i     = -1;
23507        $i_tok = -1;
23508
23509        # ------------------------------------------------------------
23510        # begin main tokenization loop
23511        # ------------------------------------------------------------
23512
23513        # we are looking at each pre-token of one line and combining them
23514        # into tokens
23515        while ( ++$i <= $max_token_index ) {
23516
23517            if ($in_quote) {    # continue looking for end of a quote
23518                $type = $quote_type;
23519
23520                unless ( @{$routput_token_list} )
23521                {               # initialize if continuation line
23522                    push( @{$routput_token_list}, $i );
23523                    $routput_token_type->[$i] = $type;
23524
23525                }
23526                $tok = $quote_character unless ( $quote_character =~ /^\s*$/ );
23527
23528                # scan for the end of the quote or pattern
23529                (
23530                    $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
23531                    $quoted_string_1, $quoted_string_2
23532                  )
23533                  = do_quote(
23534                    $i,               $in_quote,    $quote_character,
23535                    $quote_pos,       $quote_depth, $quoted_string_1,
23536                    $quoted_string_2, $rtokens,     $rtoken_map,
23537                    $max_token_index
23538                  );
23539
23540                # all done if we didn't find it
23541                last if ($in_quote);
23542
23543                # save pattern and replacement text for rescanning
23544                my $qs1 = $quoted_string_1;
23545                my $qs2 = $quoted_string_2;
23546
23547                # re-initialize for next search
23548                $quote_character = '';
23549                $quote_pos       = 0;
23550                $quote_type      = 'Q';
23551                $quoted_string_1 = "";
23552                $quoted_string_2 = "";
23553                last if ( ++$i > $max_token_index );
23554
23555                # look for any modifiers
23556                if ($allowed_quote_modifiers) {
23557
23558                    # check for exact quote modifiers
23559                    if ( $$rtokens[$i] =~ /^[A-Za-z_]/ ) {
23560                        my $str = $$rtokens[$i];
23561                        my $saw_modifier_e;
23562                        while ( $str =~ /\G$allowed_quote_modifiers/gc ) {
23563                            my $pos = pos($str);
23564                            my $char = substr( $str, $pos - 1, 1 );
23565                            $saw_modifier_e ||= ( $char eq 'e' );
23566                        }
23567
23568                        # For an 'e' quote modifier we must scan the replacement
23569                        # text for here-doc targets.
23570                        if ($saw_modifier_e) {
23571
23572                            my $rht = scan_replacement_text($qs1);
23573
23574                            # Change type from 'Q' to 'h' for quotes with
23575                            # here-doc targets so that the formatter (see sub
23576                            # print_line_of_tokens) will not make any line
23577                            # breaks after this point.
23578                            if ($rht) {
23579                                push @{$rhere_target_list}, @{$rht};
23580                                $type = 'h';
23581                                if ( $i_tok < 0 ) {
23582                                    my $ilast = $routput_token_list->[-1];
23583                                    $routput_token_type->[$ilast] = $type;
23584                                }
23585                            }
23586                        }
23587
23588                        if ( defined( pos($str) ) ) {
23589
23590                            # matched
23591                            if ( pos($str) == length($str) ) {
23592                                last if ( ++$i > $max_token_index );
23593                            }
23594
23595                            # Looks like a joined quote modifier
23596                            # and keyword, maybe something like
23597                            # s/xxx/yyy/gefor @k=...
23598                            # Example is "galgen.pl".  Would have to split
23599                            # the word and insert a new token in the
23600                            # pre-token list.  This is so rare that I haven't
23601                            # done it.  Will just issue a warning citation.
23602
23603                            # This error might also be triggered if my quote
23604                            # modifier characters are incomplete
23605                            else {
23606                                warning(<<EOM);
23607
23608Partial match to quote modifier $allowed_quote_modifiers at word: '$str'
23609Please put a space between quote modifiers and trailing keywords.
23610EOM
23611
23612                           # print "token $$rtokens[$i]\n";
23613                           # my $num = length($str) - pos($str);
23614                           # $$rtokens[$i]=substr($$rtokens[$i],pos($str),$num);
23615                           # print "continuing with new token $$rtokens[$i]\n";
23616
23617                                # skipping past this token does least damage
23618                                last if ( ++$i > $max_token_index );
23619                            }
23620                        }
23621                        else {
23622
23623                            # example file: rokicki4.pl
23624                            # This error might also be triggered if my quote
23625                            # modifier characters are incomplete
23626                            write_logfile_entry(
23627"Note: found word $str at quote modifier location\n"
23628                            );
23629                        }
23630                    }
23631
23632                    # re-initialize
23633                    $allowed_quote_modifiers = "";
23634                }
23635            }
23636
23637            unless ( $tok =~ /^\s*$/ ) {
23638
23639                # try to catch some common errors
23640                if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) {
23641
23642                    if ( $last_nonblank_token eq 'eq' ) {
23643                        complain("Should 'eq' be '==' here ?\n");
23644                    }
23645                    elsif ( $last_nonblank_token eq 'ne' ) {
23646                        complain("Should 'ne' be '!=' here ?\n");
23647                    }
23648                }
23649
23650                $last_last_nonblank_token      = $last_nonblank_token;
23651                $last_last_nonblank_type       = $last_nonblank_type;
23652                $last_last_nonblank_block_type = $last_nonblank_block_type;
23653                $last_last_nonblank_container_type =
23654                  $last_nonblank_container_type;
23655                $last_last_nonblank_type_sequence =
23656                  $last_nonblank_type_sequence;
23657                $last_nonblank_token          = $tok;
23658                $last_nonblank_type           = $type;
23659                $last_nonblank_prototype      = $prototype;
23660                $last_nonblank_block_type     = $block_type;
23661                $last_nonblank_container_type = $container_type;
23662                $last_nonblank_type_sequence  = $type_sequence;
23663                $last_nonblank_i              = $i_tok;
23664            }
23665
23666            # store previous token type
23667            if ( $i_tok >= 0 ) {
23668                $routput_token_type->[$i_tok]     = $type;
23669                $routput_block_type->[$i_tok]     = $block_type;
23670                $routput_container_type->[$i_tok] = $container_type;
23671                $routput_type_sequence->[$i_tok]  = $type_sequence;
23672                $routput_indent_flag->[$i_tok]    = $indent_flag;
23673            }
23674            my $pre_tok  = $$rtokens[$i];        # get the next pre-token
23675            my $pre_type = $$rtoken_type[$i];    # and type
23676            $tok  = $pre_tok;
23677            $type = $pre_type;                   # to be modified as necessary
23678            $block_type = "";    # blank for all tokens except code block braces
23679            $container_type = "";    # blank for all tokens except some parens
23680            $type_sequence  = "";    # blank for all tokens except ?/:
23681            $indent_flag    = 0;
23682            $prototype = "";    # blank for all tokens except user defined subs
23683            $i_tok     = $i;
23684
23685            # this pre-token will start an output token
23686            push( @{$routput_token_list}, $i_tok );
23687
23688            # continue gathering identifier if necessary
23689            # but do not start on blanks and comments
23690            if ( $id_scan_state && $pre_type !~ /[b#]/ ) {
23691
23692                if ( $id_scan_state =~ /^(sub|package)/ ) {
23693                    scan_id();
23694                }
23695                else {
23696                    scan_identifier();
23697                }
23698
23699                last if ($id_scan_state);
23700                next if ( ( $i > 0 ) || $type );
23701
23702                # didn't find any token; start over
23703                $type = $pre_type;
23704                $tok  = $pre_tok;
23705            }
23706
23707            # handle whitespace tokens..
23708            next if ( $type eq 'b' );
23709            my $prev_tok  = $i > 0 ? $$rtokens[ $i - 1 ]     : ' ';
23710            my $prev_type = $i > 0 ? $$rtoken_type[ $i - 1 ] : 'b';
23711
23712            # Build larger tokens where possible, since we are not in a quote.
23713            #
23714            # First try to assemble digraphs.  The following tokens are
23715            # excluded and handled specially:
23716            # '/=' is excluded because the / might start a pattern.
23717            # 'x=' is excluded since it might be $x=, with $ on previous line
23718            # '**' and *= might be typeglobs of punctuation variables
23719            # I have allowed tokens starting with <, such as <=,
23720            # because I don't think these could be valid angle operators.
23721            # test file: storrs4.pl
23722            my $test_tok   = $tok . $$rtokens[ $i + 1 ];
23723            my $combine_ok = $is_digraph{$test_tok};
23724
23725            # check for special cases which cannot be combined
23726            if ($combine_ok) {
23727
23728                # '//' must be defined_or operator if an operator is expected.
23729                # TODO: Code for other ambiguous digraphs (/=, x=, **, *=)
23730                # could be migrated here for clarity
23731                if ( $test_tok eq '//' ) {
23732                    my $next_type = $$rtokens[ $i + 1 ];
23733                    my $expecting =
23734                      operator_expected( $prev_type, $tok, $next_type );
23735                    $combine_ok = 0 unless ( $expecting == OPERATOR );
23736                }
23737            }
23738
23739            if (
23740                $combine_ok
23741                && ( $test_tok ne '/=' )    # might be pattern
23742                && ( $test_tok ne 'x=' )    # might be $x
23743                && ( $test_tok ne '**' )    # typeglob?
23744                && ( $test_tok ne '*=' )    # typeglob?
23745              )
23746            {
23747                $tok = $test_tok;
23748                $i++;
23749
23750                # Now try to assemble trigraphs.  Note that all possible
23751                # perl trigraphs can be constructed by appending a character
23752                # to a digraph.
23753                $test_tok = $tok . $$rtokens[ $i + 1 ];
23754
23755                if ( $is_trigraph{$test_tok} ) {
23756                    $tok = $test_tok;
23757                    $i++;
23758                }
23759            }
23760
23761            $type      = $tok;
23762            $next_tok  = $$rtokens[ $i + 1 ];
23763            $next_type = $$rtoken_type[ $i + 1 ];
23764
23765            TOKENIZER_DEBUG_FLAG_TOKENIZE && do {
23766                local $" = ')(';
23767                my @debug_list = (
23768                    $last_nonblank_token,      $tok,
23769                    $next_tok,                 $brace_depth,
23770                    $brace_type[$brace_depth], $paren_depth,
23771                    $paren_type[$paren_depth]
23772                );
23773                print "TOKENIZE:(@debug_list)\n";
23774            };
23775
23776            # turn off attribute list on first non-blank, non-bareword
23777            if ( $pre_type ne 'w' ) { $in_attribute_list = 0 }
23778
23779            ###############################################################
23780            # We have the next token, $tok.
23781            # Now we have to examine this token and decide what it is
23782            # and define its $type
23783            #
23784            # section 1: bare words
23785            ###############################################################
23786
23787            if ( $pre_type eq 'w' ) {
23788                $expecting = operator_expected( $prev_type, $tok, $next_type );
23789                my ( $next_nonblank_token, $i_next ) =
23790                  find_next_nonblank_token( $i, $rtokens, $max_token_index );
23791
23792                # ATTRS: handle sub and variable attributes
23793                if ($in_attribute_list) {
23794
23795                    # treat bare word followed by open paren like qw(
23796                    if ( $next_nonblank_token eq '(' ) {
23797                        $in_quote                = $quote_items{'q'};
23798                        $allowed_quote_modifiers = $quote_modifiers{'q'};
23799                        $type                    = 'q';
23800                        $quote_type              = 'q';
23801                        next;
23802                    }
23803
23804                    # handle bareword not followed by open paren
23805                    else {
23806                        $type = 'w';
23807                        next;
23808                    }
23809                }
23810
23811                # quote a word followed by => operator
23812                if ( $next_nonblank_token eq '=' ) {
23813
23814                    if ( $$rtokens[ $i_next + 1 ] eq '>' ) {
23815                        if ( $is_constant{$current_package}{$tok} ) {
23816                            $type = 'C';
23817                        }
23818                        elsif ( $is_user_function{$current_package}{$tok} ) {
23819                            $type = 'U';
23820                            $prototype =
23821                              $user_function_prototype{$current_package}{$tok};
23822                        }
23823                        elsif ( $tok =~ /^v\d+$/ ) {
23824                            $type = 'v';
23825                            report_v_string($tok);
23826                        }
23827                        else { $type = 'w' }
23828
23829                        next;
23830                    }
23831                }
23832
23833     # quote a bare word within braces..like xxx->{s}; note that we
23834     # must be sure this is not a structural brace, to avoid
23835     # mistaking {s} in the following for a quoted bare word:
23836     #     for(@[){s}bla}BLA}
23837     # Also treat q in something like var{-q} as a bare word, not qoute operator
23838                ##if (   ( $last_nonblank_type eq 'L' )
23839                ##    && ( $next_nonblank_token eq '}' ) )
23840                if (
23841                    $next_nonblank_token eq '}'
23842                    && (
23843                        $last_nonblank_type eq 'L'
23844                        || (   $last_nonblank_type eq 'm'
23845                            && $last_last_nonblank_type eq 'L' )
23846                    )
23847                  )
23848                {
23849                    $type = 'w';
23850                    next;
23851                }
23852
23853                # a bare word immediately followed by :: is not a keyword;
23854                # use $tok_kw when testing for keywords to avoid a mistake
23855                my $tok_kw = $tok;
23856                if ( $$rtokens[ $i + 1 ] eq ':' && $$rtokens[ $i + 2 ] eq ':' )
23857                {
23858                    $tok_kw .= '::';
23859                }
23860
23861                # handle operator x (now we know it isn't $x=)
23862                if ( ( $tok =~ /^x\d*$/ ) && ( $expecting == OPERATOR ) ) {
23863                    if ( $tok eq 'x' ) {
23864
23865                        if ( $$rtokens[ $i + 1 ] eq '=' ) {    # x=
23866                            $tok  = 'x=';
23867                            $type = $tok;
23868                            $i++;
23869                        }
23870                        else {
23871                            $type = 'x';
23872                        }
23873                    }
23874
23875                    # FIXME: Patch: mark something like x4 as an integer for now
23876                    # It gets fixed downstream.  This is easier than
23877                    # splitting the pretoken.
23878                    else {
23879                        $type = 'n';
23880                    }
23881                }
23882
23883                elsif ( ( $tok eq 'strict' )
23884                    and ( $last_nonblank_token eq 'use' ) )
23885                {
23886                    $tokenizer_self->{_saw_use_strict} = 1;
23887                    scan_bare_identifier();
23888                }
23889
23890                elsif ( ( $tok eq 'warnings' )
23891                    and ( $last_nonblank_token eq 'use' ) )
23892                {
23893                    $tokenizer_self->{_saw_perl_dash_w} = 1;
23894
23895                    # scan as identifier, so that we pick up something like:
23896                    # use warnings::register
23897                    scan_bare_identifier();
23898                }
23899
23900                elsif (
23901                       $tok eq 'AutoLoader'
23902                    && $tokenizer_self->{_look_for_autoloader}
23903                    && (
23904                        $last_nonblank_token eq 'use'
23905
23906                        # these regexes are from AutoSplit.pm, which we want
23907                        # to mimic
23908                        || $input_line =~ /^\s*(use|require)\s+AutoLoader\b/
23909                        || $input_line =~ /\bISA\s*=.*\bAutoLoader\b/
23910                    )
23911                  )
23912                {
23913                    write_logfile_entry("AutoLoader seen, -nlal deactivates\n");
23914                    $tokenizer_self->{_saw_autoloader}      = 1;
23915                    $tokenizer_self->{_look_for_autoloader} = 0;
23916                    scan_bare_identifier();
23917                }
23918
23919                elsif (
23920                       $tok eq 'SelfLoader'
23921                    && $tokenizer_self->{_look_for_selfloader}
23922                    && (   $last_nonblank_token eq 'use'
23923                        || $input_line =~ /^\s*(use|require)\s+SelfLoader\b/
23924                        || $input_line =~ /\bISA\s*=.*\bSelfLoader\b/ )
23925                  )
23926                {
23927                    write_logfile_entry("SelfLoader seen, -nlsl deactivates\n");
23928                    $tokenizer_self->{_saw_selfloader}      = 1;
23929                    $tokenizer_self->{_look_for_selfloader} = 0;
23930                    scan_bare_identifier();
23931                }
23932
23933                elsif ( ( $tok eq 'constant' )
23934                    and ( $last_nonblank_token eq 'use' ) )
23935                {
23936                    scan_bare_identifier();
23937                    my ( $next_nonblank_token, $i_next ) =
23938                      find_next_nonblank_token( $i, $rtokens,
23939                        $max_token_index );
23940
23941                    if ($next_nonblank_token) {
23942
23943                        if ( $is_keyword{$next_nonblank_token} ) {
23944                            warning(
23945"Attempting to define constant '$next_nonblank_token' which is a perl keyword\n"
23946                            );
23947                        }
23948
23949                        # FIXME: could check for error in which next token is
23950                        # not a word (number, punctuation, ..)
23951                        else {
23952                            $is_constant{$current_package}
23953                              {$next_nonblank_token} = 1;
23954                        }
23955                    }
23956                }
23957
23958                # various quote operators
23959                elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) {
23960                    if ( $expecting == OPERATOR ) {
23961
23962                        # patch for paren-less for/foreach glitch, part 1
23963                        # perl will accept this construct as valid:
23964                        #
23965                        #    foreach my $key qw\Uno Due Tres Quadro\ {
23966                        #        print "Set $key\n";
23967                        #    }
23968                        unless ( $tok eq 'qw' && $is_for_foreach{$want_paren} )
23969                        {
23970                            error_if_expecting_OPERATOR();
23971                        }
23972                    }
23973                    $in_quote                = $quote_items{$tok};
23974                    $allowed_quote_modifiers = $quote_modifiers{$tok};
23975
23976                   # All quote types are 'Q' except possibly qw quotes.
23977                   # qw quotes are special in that they may generally be trimmed
23978                   # of leading and trailing whitespace.  So they are given a
23979                   # separate type, 'q', unless requested otherwise.
23980                    $type =
23981                      ( $tok eq 'qw' && $tokenizer_self->{_trim_qw} )
23982                      ? 'q'
23983                      : 'Q';
23984                    $quote_type = $type;
23985                }
23986
23987                # check for a statement label
23988                elsif (
23989                       ( $next_nonblank_token eq ':' )
23990                    && ( $$rtokens[ $i_next + 1 ] ne ':' )
23991                    && ( $i_next <= $max_token_index )    # colon on same line
23992                    && label_ok()
23993                  )
23994                {
23995                    if ( $tok !~ /[A-Z]/ ) {
23996                        push @{ $tokenizer_self->{_rlower_case_labels_at} },
23997                          $input_line_number;
23998                    }
23999                    $type = 'J';
24000                    $tok .= ':';
24001                    $i = $i_next;
24002                    next;
24003                }
24004
24005                #      'sub' || 'package'
24006                elsif ( $is_sub_package{$tok_kw} ) {
24007                    error_if_expecting_OPERATOR()
24008                      if ( $expecting == OPERATOR );
24009                    scan_id();
24010                }
24011
24012                # Note on token types for format, __DATA__, __END__:
24013                # It simplifies things to give these type ';', so that when we
24014                # start rescanning we will be expecting a token of type TERM.
24015                # We will switch to type 'k' before outputting the tokens.
24016                elsif ( $is_format_END_DATA{$tok_kw} ) {
24017                    $type = ';';    # make tokenizer look for TERM next
24018                    $tokenizer_self->{ $is_format_END_DATA{$tok_kw} } = 1;
24019                    last;
24020                }
24021
24022                elsif ( $is_keyword{$tok_kw} ) {
24023                    $type = 'k';
24024
24025                    # Since for and foreach may not be followed immediately
24026                    # by an opening paren, we have to remember which keyword
24027                    # is associated with the next '('
24028                    if ( $is_for_foreach{$tok} ) {
24029                        if ( new_statement_ok() ) {
24030                            $want_paren = $tok;
24031                        }
24032                    }
24033
24034                    # recognize 'use' statements, which are special
24035                    elsif ( $is_use_require{$tok} ) {
24036                        $statement_type = $tok;
24037                        error_if_expecting_OPERATOR()
24038                          if ( $expecting == OPERATOR );
24039                    }
24040
24041                    # remember my and our to check for trailing ": shared"
24042                    elsif ( $is_my_our{$tok} ) {
24043                        $statement_type = $tok;
24044                    }
24045
24046                    # Check for misplaced 'elsif' and 'else', but allow isolated
24047                    # else or elsif blocks to be formatted.  This is indicated
24048                    # by a last noblank token of ';'
24049                    elsif ( $tok eq 'elsif' ) {
24050                        if (   $last_nonblank_token ne ';'
24051                            && $last_nonblank_block_type !~
24052                            /^(if|elsif|unless)$/ )
24053                        {
24054                            warning(
24055"expecting '$tok' to follow one of 'if|elsif|unless'\n"
24056                            );
24057                        }
24058                    }
24059                    elsif ( $tok eq 'else' ) {
24060
24061                        # patched for SWITCH/CASE
24062                        if (   $last_nonblank_token ne ';'
24063                            && $last_nonblank_block_type !~
24064                            /^(if|elsif|unless|case|when)$/ )
24065                        {
24066                            warning(
24067"expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n"
24068                            );
24069                        }
24070                    }
24071                    elsif ( $tok eq 'continue' ) {
24072                        if (   $last_nonblank_token ne ';'
24073                            && $last_nonblank_block_type !~
24074                            /(^(\{|\}|;|while|until|for|foreach)|:$)/ )
24075                        {
24076
24077                            # note: ';' '{' and '}' in list above
24078                            # because continues can follow bare blocks;
24079                            # ':' is labeled block
24080                            #
24081                            ############################################
24082                            # NOTE: This check has been deactivated because
24083                            # continue has an alternative usage for given/when
24084                            # blocks in perl 5.10
24085                            ## warning("'$tok' should follow a block\n");
24086                            ############################################
24087                        }
24088                    }
24089
24090                    # patch for SWITCH/CASE if 'case' and 'when are
24091                    # treated as keywords.
24092                    elsif ( $tok eq 'when' || $tok eq 'case' ) {
24093                        $statement_type = $tok;    # next '{' is block
24094                    }
24095
24096                    # indent trailing if/unless/while/until
24097                    # outdenting will be handled by later indentation loop
24098                    if (   $tok =~ /^(if|unless|while|until)$/
24099                        && $next_nonblank_token ne '(' )
24100                    {
24101                        $indent_flag = 1;
24102                    }
24103                }
24104
24105                # check for inline label following
24106                #         /^(redo|last|next|goto)$/
24107                elsif (( $last_nonblank_type eq 'k' )
24108                    && ( $is_redo_last_next_goto{$last_nonblank_token} ) )
24109                {
24110                    $type = 'j';
24111                    next;
24112                }
24113
24114                # something else --
24115                else {
24116
24117                    scan_bare_identifier();
24118                    if ( $type eq 'w' ) {
24119
24120                        if ( $expecting == OPERATOR ) {
24121
24122                            # don't complain about possible indirect object
24123                            # notation.
24124                            # For example:
24125                            #   package main;
24126                            #   sub new($) { ... }
24127                            #   $b = new A::;  # calls A::new
24128                            #   $c = new A;    # same thing but suspicious
24129                            # This will call A::new but we have a 'new' in
24130                            # main:: which looks like a constant.
24131                            #
24132                            if ( $last_nonblank_type eq 'C' ) {
24133                                if ( $tok !~ /::$/ ) {
24134                                    complain(<<EOM);
24135Expecting operator after '$last_nonblank_token' but found bare word '$tok'
24136       Maybe indirectet object notation?
24137EOM
24138                                }
24139                            }
24140                            else {
24141                                error_if_expecting_OPERATOR("bareword");
24142                            }
24143                        }
24144
24145                        # mark bare words immediately followed by a paren as
24146                        # functions
24147                        $next_tok = $$rtokens[ $i + 1 ];
24148                        if ( $next_tok eq '(' ) {
24149                            $type = 'U';
24150                        }
24151
24152                        # underscore after file test operator is file handle
24153                        if ( $tok eq '_' && $last_nonblank_type eq 'F' ) {
24154                            $type = 'Z';
24155                        }
24156
24157                        # patch for SWITCH/CASE if 'case' and 'when are
24158                        # not treated as keywords:
24159                        if (
24160                            (
24161                                   $tok eq 'case'
24162                                && $brace_type[$brace_depth] eq 'switch'
24163                            )
24164                            || (   $tok eq 'when'
24165                                && $brace_type[$brace_depth] eq 'given' )
24166                          )
24167                        {
24168                            $statement_type = $tok;    # next '{' is block
24169                            $type = 'k';    # for keyword syntax coloring
24170                        }
24171
24172                        # patch for SWITCH/CASE if switch and given not keywords
24173                        # Switch is not a perl 5 keyword, but we will gamble
24174                        # and mark switch followed by paren as a keyword.  This
24175                        # is only necessary to get html syntax coloring nice,
24176                        # and does not commit this as being a switch/case.
24177                        if ( $next_nonblank_token eq '('
24178                            && ( $tok eq 'switch' || $tok eq 'given' ) )
24179                        {
24180                            $type = 'k';    # for keyword syntax coloring
24181                        }
24182                    }
24183                }
24184            }
24185
24186            ###############################################################
24187            # section 2: strings of digits
24188            ###############################################################
24189            elsif ( $pre_type eq 'd' ) {
24190                $expecting = operator_expected( $prev_type, $tok, $next_type );
24191                error_if_expecting_OPERATOR("Number")
24192                  if ( $expecting == OPERATOR );
24193                my $number = scan_number();
24194                if ( !defined($number) ) {
24195
24196                    # shouldn't happen - we should always get a number
24197                    warning("non-number beginning with digit--program bug\n");
24198                    report_definite_bug();
24199                }
24200            }
24201
24202            ###############################################################
24203            # section 3: all other tokens
24204            ###############################################################
24205
24206            else {
24207                last if ( $tok eq '#' );
24208                my $code = $tokenization_code->{$tok};
24209                if ($code) {
24210                    $expecting =
24211                      operator_expected( $prev_type, $tok, $next_type );
24212                    $code->();
24213                    redo if $in_quote;
24214                }
24215            }
24216        }
24217
24218        # -----------------------------
24219        # end of main tokenization loop
24220        # -----------------------------
24221
24222        if ( $i_tok >= 0 ) {
24223            $routput_token_type->[$i_tok]     = $type;
24224            $routput_block_type->[$i_tok]     = $block_type;
24225            $routput_container_type->[$i_tok] = $container_type;
24226            $routput_type_sequence->[$i_tok]  = $type_sequence;
24227            $routput_indent_flag->[$i_tok]    = $indent_flag;
24228        }
24229
24230        unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) {
24231            $last_last_nonblank_token          = $last_nonblank_token;
24232            $last_last_nonblank_type           = $last_nonblank_type;
24233            $last_last_nonblank_block_type     = $last_nonblank_block_type;
24234            $last_last_nonblank_container_type = $last_nonblank_container_type;
24235            $last_last_nonblank_type_sequence  = $last_nonblank_type_sequence;
24236            $last_nonblank_token               = $tok;
24237            $last_nonblank_type                = $type;
24238            $last_nonblank_block_type          = $block_type;
24239            $last_nonblank_container_type      = $container_type;
24240            $last_nonblank_type_sequence       = $type_sequence;
24241            $last_nonblank_prototype           = $prototype;
24242        }
24243
24244        # reset indentation level if necessary at a sub or package
24245        # in an attempt to recover from a nesting error
24246        if ( $level_in_tokenizer < 0 ) {
24247            if ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) {
24248                reset_indentation_level(0);
24249                brace_warning("resetting level to 0 at $1 $2\n");
24250            }
24251        }
24252
24253        # all done tokenizing this line ...
24254        # now prepare the final list of tokens and types
24255
24256        my @token_type     = ();   # stack of output token types
24257        my @block_type     = ();   # stack of output code block types
24258        my @container_type = ();   # stack of output code container types
24259        my @type_sequence  = ();   # stack of output type sequence numbers
24260        my @tokens         = ();   # output tokens
24261        my @levels         = ();   # structural brace levels of output tokens
24262        my @slevels        = ();   # secondary nesting levels of output tokens
24263        my @nesting_tokens = ();   # string of tokens leading to this depth
24264        my @nesting_types  = ();   # string of token types leading to this depth
24265        my @nesting_blocks = ();   # string of block types leading to this depth
24266        my @nesting_lists  = ();   # string of list types leading to this depth
24267        my @ci_string = ();  # string needed to compute continuation indentation
24268        my @container_environment = ();    # BLOCK or LIST
24269        my $container_environment = '';
24270        my $im                    = -1;    # previous $i value
24271        my $num;
24272        my $ci_string_sum = ones_count($ci_string_in_tokenizer);
24273
24274# Computing Token Indentation
24275#
24276#     The final section of the tokenizer forms tokens and also computes
24277#     parameters needed to find indentation.  It is much easier to do it
24278#     in the tokenizer than elsewhere.  Here is a brief description of how
24279#     indentation is computed.  Perl::Tidy computes indentation as the sum
24280#     of 2 terms:
24281#
24282#     (1) structural indentation, such as if/else/elsif blocks
24283#     (2) continuation indentation, such as long parameter call lists.
24284#
24285#     These are occasionally called primary and secondary indentation.
24286#
24287#     Structural indentation is introduced by tokens of type '{', although
24288#     the actual tokens might be '{', '(', or '['.  Structural indentation
24289#     is of two types: BLOCK and non-BLOCK.  Default structural indentation
24290#     is 4 characters if the standard indentation scheme is used.
24291#
24292#     Continuation indentation is introduced whenever a line at BLOCK level
24293#     is broken before its termination.  Default continuation indentation
24294#     is 2 characters in the standard indentation scheme.
24295#
24296#     Both types of indentation may be nested arbitrarily deep and
24297#     interlaced.  The distinction between the two is somewhat arbitrary.
24298#
24299#     For each token, we will define two variables which would apply if
24300#     the current statement were broken just before that token, so that
24301#     that token started a new line:
24302#
24303#     $level = the structural indentation level,
24304#     $ci_level = the continuation indentation level
24305#
24306#     The total indentation will be $level * (4 spaces) + $ci_level * (2 spaces),
24307#     assuming defaults.  However, in some special cases it is customary
24308#     to modify $ci_level from this strict value.
24309#
24310#     The total structural indentation is easy to compute by adding and
24311#     subtracting 1 from a saved value as types '{' and '}' are seen.  The
24312#     running value of this variable is $level_in_tokenizer.
24313#
24314#     The total continuation is much more difficult to compute, and requires
24315#     several variables.  These veriables are:
24316#
24317#     $ci_string_in_tokenizer = a string of 1's and 0's indicating, for
24318#       each indentation level, if there are intervening open secondary
24319#       structures just prior to that level.
24320#     $continuation_string_in_tokenizer = a string of 1's and 0's indicating
24321#       if the last token at that level is "continued", meaning that it
24322#       is not the first token of an expression.
24323#     $nesting_block_string = a string of 1's and 0's indicating, for each
24324#       indentation level, if the level is of type BLOCK or not.
24325#     $nesting_block_flag = the most recent 1 or 0 of $nesting_block_string
24326#     $nesting_list_string = a string of 1's and 0's indicating, for each
24327#       indentation level, if it is is appropriate for list formatting.
24328#       If so, continuation indentation is used to indent long list items.
24329#     $nesting_list_flag = the most recent 1 or 0 of $nesting_list_string
24330#     @{$rslevel_stack} = a stack of total nesting depths at each
24331#       structural indentation level, where "total nesting depth" means
24332#       the nesting depth that would occur if every nesting token -- '{', '[',
24333#       and '(' -- , regardless of context, is used to compute a nesting
24334#       depth.
24335
24336        #my $nesting_block_flag = ($nesting_block_string =~ /1$/);
24337        #my $nesting_list_flag = ($nesting_list_string =~ /1$/);
24338
24339        my ( $ci_string_i, $level_i, $nesting_block_string_i,
24340            $nesting_list_string_i, $nesting_token_string_i,
24341            $nesting_type_string_i, );
24342
24343        foreach $i ( @{$routput_token_list} )
24344        {    # scan the list of pre-tokens indexes
24345
24346            # self-checking for valid token types
24347            my $type                    = $routput_token_type->[$i];
24348            my $forced_indentation_flag = $routput_indent_flag->[$i];
24349
24350            # See if we should undo the $forced_indentation_flag.
24351            # Forced indentation after 'if', 'unless', 'while' and 'until'
24352            # expressions without trailing parens is optional and doesn't
24353            # always look good.  It is usually okay for a trailing logical
24354            # expression, but if the expression is a function call, code block,
24355            # or some kind of list it puts in an unwanted extra indentation
24356            # level which is hard to remove.
24357            #
24358            # Example where extra indentation looks ok:
24359            # return 1
24360            #   if $det_a < 0 and $det_b > 0
24361            #       or $det_a > 0 and $det_b < 0;
24362            #
24363            # Example where extra indentation is not needed because
24364            # the eval brace also provides indentation:
24365            # print "not " if defined eval {
24366            #     reduce { die if $b > 2; $a + $b } 0, 1, 2, 3, 4;
24367            # };
24368            #
24369            # The following rule works fairly well:
24370            #   Undo the flag if the end of this line, or start of the next
24371            #   line, is an opening container token or a comma.
24372            # This almost always works, but if not after another pass it will
24373            # be stable.
24374            if ( $forced_indentation_flag && $type eq 'k' ) {
24375                my $ixlast  = -1;
24376                my $ilast   = $routput_token_list->[$ixlast];
24377                my $toklast = $routput_token_type->[$ilast];
24378                if ( $toklast eq '#' ) {
24379                    $ixlast--;
24380                    $ilast   = $routput_token_list->[$ixlast];
24381                    $toklast = $routput_token_type->[$ilast];
24382                }
24383                if ( $toklast eq 'b' ) {
24384                    $ixlast--;
24385                    $ilast   = $routput_token_list->[$ixlast];
24386                    $toklast = $routput_token_type->[$ilast];
24387                }
24388                if ( $toklast =~ /^[\{,]$/ ) {
24389                    $forced_indentation_flag = 0;
24390                }
24391                else {
24392                    ( $toklast, my $i_next ) =
24393                      find_next_nonblank_token( $max_token_index, $rtokens,
24394                        $max_token_index );
24395                    if ( $toklast =~ /^[\{,]$/ ) {
24396                        $forced_indentation_flag = 0;
24397                    }
24398                }
24399            }
24400
24401            # if we are already in an indented if, see if we should outdent
24402            if ($indented_if_level) {
24403
24404                # don't try to nest trailing if's - shouldn't happen
24405                if ( $type eq 'k' ) {
24406                    $forced_indentation_flag = 0;
24407                }
24408
24409                # check for the normal case - outdenting at next ';'
24410                elsif ( $type eq ';' ) {
24411                    if ( $level_in_tokenizer == $indented_if_level ) {
24412                        $forced_indentation_flag = -1;
24413                        $indented_if_level       = 0;
24414                    }
24415                }
24416
24417                # handle case of missing semicolon
24418                elsif ( $type eq '}' ) {
24419                    if ( $level_in_tokenizer == $indented_if_level ) {
24420                        $indented_if_level = 0;
24421
24422                        # TBD: This could be a subroutine call
24423                        $level_in_tokenizer--;
24424                        if ( @{$rslevel_stack} > 1 ) {
24425                            pop( @{$rslevel_stack} );
24426                        }
24427                        if ( length($nesting_block_string) > 1 )
24428                        {    # true for valid script
24429                            chop $nesting_block_string;
24430                            chop $nesting_list_string;
24431                        }
24432
24433                    }
24434                }
24435            }
24436
24437            my $tok = $$rtokens[$i];   # the token, but ONLY if same as pretoken
24438            $level_i = $level_in_tokenizer;
24439
24440            # This can happen by running perltidy on non-scripts
24441            # although it could also be bug introduced by programming change.
24442            # Perl silently accepts a 032 (^Z) and takes it as the end
24443            if ( !$is_valid_token_type{$type} ) {
24444                my $val = ord($type);
24445                warning(
24446                    "unexpected character decimal $val ($type) in script\n");
24447                $tokenizer_self->{_in_error} = 1;
24448            }
24449
24450            # ----------------------------------------------------------------
24451            # TOKEN TYPE PATCHES
24452            #  output __END__, __DATA__, and format as type 'k' instead of ';'
24453            # to make html colors correct, etc.
24454            my $fix_type = $type;
24455            if ( $type eq ';' && $tok =~ /\w/ ) { $fix_type = 'k' }
24456
24457            # output anonymous 'sub' as keyword
24458            if ( $type eq 't' && $tok eq 'sub' ) { $fix_type = 'k' }
24459
24460            # -----------------------------------------------------------------
24461
24462            $nesting_token_string_i = $nesting_token_string;
24463            $nesting_type_string_i  = $nesting_type_string;
24464            $nesting_block_string_i = $nesting_block_string;
24465            $nesting_list_string_i  = $nesting_list_string;
24466
24467            # set primary indentation levels based on structural braces
24468            # Note: these are set so that the leading braces have a HIGHER
24469            # level than their CONTENTS, which is convenient for indentation
24470            # Also, define continuation indentation for each token.
24471            if ( $type eq '{' || $type eq 'L' || $forced_indentation_flag > 0 )
24472            {
24473
24474                # use environment before updating
24475                $container_environment =
24476                    $nesting_block_flag ? 'BLOCK'
24477                  : $nesting_list_flag  ? 'LIST'
24478                  :                       "";
24479
24480                # if the difference between total nesting levels is not 1,
24481                # there are intervening non-structural nesting types between
24482                # this '{' and the previous unclosed '{'
24483                my $intervening_secondary_structure = 0;
24484                if ( @{$rslevel_stack} ) {
24485                    $intervening_secondary_structure =
24486                      $slevel_in_tokenizer - $rslevel_stack->[-1];
24487                }
24488
24489     # Continuation Indentation
24490     #
24491     # Having tried setting continuation indentation both in the formatter and
24492     # in the tokenizer, I can say that setting it in the tokenizer is much,
24493     # much easier.  The formatter already has too much to do, and can't
24494     # make decisions on line breaks without knowing what 'ci' will be at
24495     # arbitrary locations.
24496     #
24497     # But a problem with setting the continuation indentation (ci) here
24498     # in the tokenizer is that we do not know where line breaks will actually
24499     # be.  As a result, we don't know if we should propagate continuation
24500     # indentation to higher levels of structure.
24501     #
24502     # For nesting of only structural indentation, we never need to do this.
24503     # For example, in a long if statement, like this
24504     #
24505     #   if ( !$output_block_type[$i]
24506     #     && ($in_statement_continuation) )
24507     #   {           <--outdented
24508     #       do_something();
24509     #   }
24510     #
24511     # the second line has ci but we do normally give the lines within the BLOCK
24512     # any ci.  This would be true if we had blocks nested arbitrarily deeply.
24513     #
24514     # But consider something like this, where we have created a break after
24515     # an opening paren on line 1, and the paren is not (currently) a
24516     # structural indentation token:
24517     #
24518     # my $file = $menubar->Menubutton(
24519     #   qw/-text File -underline 0 -menuitems/ => [
24520     #       [
24521     #           Cascade    => '~View',
24522     #           -menuitems => [
24523     #           ...
24524     #
24525     # The second line has ci, so it would seem reasonable to propagate it
24526     # down, giving the third line 1 ci + 1 indentation.  This suggests the
24527     # following rule, which is currently used to propagating ci down: if there
24528     # are any non-structural opening parens (or brackets, or braces), before
24529     # an opening structural brace, then ci is propagated down, and otherwise
24530     # not.  The variable $intervening_secondary_structure contains this
24531     # information for the current token, and the string
24532     # "$ci_string_in_tokenizer" is a stack of previous values of this
24533     # variable.
24534
24535                # save the current states
24536                push( @{$rslevel_stack}, 1 + $slevel_in_tokenizer );
24537                $level_in_tokenizer++;
24538
24539                if ($forced_indentation_flag) {
24540
24541                    # break BEFORE '?' when there is forced indentation
24542                    if ( $type eq '?' ) { $level_i = $level_in_tokenizer; }
24543                    if ( $type eq 'k' ) {
24544                        $indented_if_level = $level_in_tokenizer;
24545                    }
24546                }
24547
24548                if ( $routput_block_type->[$i] ) {
24549                    $nesting_block_flag = 1;
24550                    $nesting_block_string .= '1';
24551                }
24552                else {
24553                    $nesting_block_flag = 0;
24554                    $nesting_block_string .= '0';
24555                }
24556
24557                # we will use continuation indentation within containers
24558                # which are not blocks and not logical expressions
24559                my $bit = 0;
24560                if ( !$routput_block_type->[$i] ) {
24561
24562                    # propagate flag down at nested open parens
24563                    if ( $routput_container_type->[$i] eq '(' ) {
24564                        $bit = 1 if $nesting_list_flag;
24565                    }
24566
24567                  # use list continuation if not a logical grouping
24568                  # /^(if|elsif|unless|while|and|or|not|&&|!|\|\||for|foreach)$/
24569                    else {
24570                        $bit = 1
24571                          unless
24572                            $is_logical_container{ $routput_container_type->[$i]
24573                              };
24574                    }
24575                }
24576                $nesting_list_string .= $bit;
24577                $nesting_list_flag = $bit;
24578
24579                $ci_string_in_tokenizer .=
24580                  ( $intervening_secondary_structure != 0 ) ? '1' : '0';
24581                $ci_string_sum = ones_count($ci_string_in_tokenizer);
24582                $continuation_string_in_tokenizer .=
24583                  ( $in_statement_continuation > 0 ) ? '1' : '0';
24584
24585   #  Sometimes we want to give an opening brace continuation indentation,
24586   #  and sometimes not.  For code blocks, we don't do it, so that the leading
24587   #  '{' gets outdented, like this:
24588   #
24589   #   if ( !$output_block_type[$i]
24590   #     && ($in_statement_continuation) )
24591   #   {           <--outdented
24592   #
24593   #  For other types, we will give them continuation indentation.  For example,
24594   #  here is how a list looks with the opening paren indented:
24595   #
24596   #     @LoL =
24597   #       ( [ "fred", "barney" ], [ "george", "jane", "elroy" ],
24598   #         [ "homer", "marge", "bart" ], );
24599   #
24600   #  This looks best when 'ci' is one-half of the indentation  (i.e., 2 and 4)
24601
24602                my $total_ci = $ci_string_sum;
24603                if (
24604                    !$routput_block_type->[$i]    # patch: skip for BLOCK
24605                    && ($in_statement_continuation)
24606                    && !( $forced_indentation_flag && $type eq ':' )
24607                  )
24608                {
24609                    $total_ci += $in_statement_continuation
24610                      unless ( $ci_string_in_tokenizer =~ /1$/ );
24611                }
24612
24613                $ci_string_i               = $total_ci;
24614                $in_statement_continuation = 0;
24615            }
24616
24617            elsif ($type eq '}'
24618                || $type eq 'R'
24619                || $forced_indentation_flag < 0 )
24620            {
24621
24622                # only a nesting error in the script would prevent popping here
24623                if ( @{$rslevel_stack} > 1 ) { pop( @{$rslevel_stack} ); }
24624
24625                $level_i = --$level_in_tokenizer;
24626
24627                # restore previous level values
24628                if ( length($nesting_block_string) > 1 )
24629                {    # true for valid script
24630                    chop $nesting_block_string;
24631                    $nesting_block_flag = ( $nesting_block_string =~ /1$/ );
24632                    chop $nesting_list_string;
24633                    $nesting_list_flag = ( $nesting_list_string =~ /1$/ );
24634
24635                    chop $ci_string_in_tokenizer;
24636                    $ci_string_sum = ones_count($ci_string_in_tokenizer);
24637
24638                    $in_statement_continuation =
24639                      chop $continuation_string_in_tokenizer;
24640
24641                    # zero continuation flag at terminal BLOCK '}' which
24642                    # ends a statement.
24643                    if ( $routput_block_type->[$i] ) {
24644
24645                        # ...These include non-anonymous subs
24646                        # note: could be sub ::abc { or sub 'abc
24647                        if ( $routput_block_type->[$i] =~ m/^sub\s*/gc ) {
24648
24649                         # note: older versions of perl require the /gc modifier
24650                         # here or else the \G does not work.
24651                            if ( $routput_block_type->[$i] =~ /\G('|::|\w)/gc )
24652                            {
24653                                $in_statement_continuation = 0;
24654                            }
24655                        }
24656
24657# ...and include all block types except user subs with
24658# block prototypes and these: (sort|grep|map|do|eval)
24659# /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|;|if|elsif|else|unless|while|until|for|foreach)$/
24660                        elsif (
24661                            $is_zero_continuation_block_type{
24662                                $routput_block_type->[$i] } )
24663                        {
24664                            $in_statement_continuation = 0;
24665                        }
24666
24667                        # ..but these are not terminal types:
24668                        #     /^(sort|grep|map|do|eval)$/ )
24669                        elsif (
24670                            $is_not_zero_continuation_block_type{
24671                                $routput_block_type->[$i] } )
24672                        {
24673                        }
24674
24675                        # ..and a block introduced by a label
24676                        # /^\w+\s*:$/gc ) {
24677                        elsif ( $routput_block_type->[$i] =~ /:$/ ) {
24678                            $in_statement_continuation = 0;
24679                        }
24680
24681                        # user function with block prototype
24682                        else {
24683                            $in_statement_continuation = 0;
24684                        }
24685                    }
24686
24687                    # If we are in a list, then
24688                    # we must set continuatoin indentation at the closing
24689                    # paren of something like this (paren after $check):
24690                    #     assert(
24691                    #         __LINE__,
24692                    #         ( not defined $check )
24693                    #           or ref $check
24694                    #           or $check eq "new"
24695                    #           or $check eq "old",
24696                    #     );
24697                    elsif ( $tok eq ')' ) {
24698                        $in_statement_continuation = 1
24699                          if $routput_container_type->[$i] =~ /^[;,\{\}]$/;
24700                    }
24701
24702                    elsif ( $tok eq ';' ) { $in_statement_continuation = 0 }
24703                }
24704
24705                # use environment after updating
24706                $container_environment =
24707                    $nesting_block_flag ? 'BLOCK'
24708                  : $nesting_list_flag  ? 'LIST'
24709                  :                       "";
24710                $ci_string_i = $ci_string_sum + $in_statement_continuation;
24711                $nesting_block_string_i = $nesting_block_string;
24712                $nesting_list_string_i  = $nesting_list_string;
24713            }
24714
24715            # not a structural indentation type..
24716            else {
24717
24718                $container_environment =
24719                    $nesting_block_flag ? 'BLOCK'
24720                  : $nesting_list_flag  ? 'LIST'
24721                  :                       "";
24722
24723                # zero the continuation indentation at certain tokens so
24724                # that they will be at the same level as its container.  For
24725                # commas, this simplifies the -lp indentation logic, which
24726                # counts commas.  For ?: it makes them stand out.
24727                if ($nesting_list_flag) {
24728                    if ( $type =~ /^[,\?\:]$/ ) {
24729                        $in_statement_continuation = 0;
24730                    }
24731                }
24732
24733                # be sure binary operators get continuation indentation
24734                if (
24735                    $container_environment
24736                    && (   $type eq 'k' && $is_binary_keyword{$tok}
24737                        || $is_binary_type{$type} )
24738                  )
24739                {
24740                    $in_statement_continuation = 1;
24741                }
24742
24743                # continuation indentation is sum of any open ci from previous
24744                # levels plus the current level
24745                $ci_string_i = $ci_string_sum + $in_statement_continuation;
24746
24747                # update continuation flag ...
24748                # if this isn't a blank or comment..
24749                if ( $type ne 'b' && $type ne '#' ) {
24750
24751                    # and we are in a BLOCK
24752                    if ($nesting_block_flag) {
24753
24754                        # the next token after a ';' and label starts a new stmt
24755                        if ( $type eq ';' || $type eq 'J' ) {
24756                            $in_statement_continuation = 0;
24757                        }
24758
24759                        # otherwise, we are continuing the current statement
24760                        else {
24761                            $in_statement_continuation = 1;
24762                        }
24763                    }
24764
24765                    # if we are not in a BLOCK..
24766                    else {
24767
24768                        # do not use continuation indentation if not list
24769                        # environment (could be within if/elsif clause)
24770                        if ( !$nesting_list_flag ) {
24771                            $in_statement_continuation = 0;
24772                        }
24773
24774                       # otherwise, the next token after a ',' starts a new term
24775                        elsif ( $type eq ',' ) {
24776                            $in_statement_continuation = 0;
24777                        }
24778
24779                        # otherwise, we are continuing the current term
24780                        else {
24781                            $in_statement_continuation = 1;
24782                        }
24783                    }
24784                }
24785            }
24786
24787            if ( $level_in_tokenizer < 0 ) {
24788                unless ( $tokenizer_self->{_saw_negative_indentation} ) {
24789                    $tokenizer_self->{_saw_negative_indentation} = 1;
24790                    warning("Starting negative indentation\n");
24791                }
24792            }
24793
24794            # set secondary nesting levels based on all continment token types
24795            # Note: these are set so that the nesting depth is the depth
24796            # of the PREVIOUS TOKEN, which is convenient for setting
24797            # the stength of token bonds
24798            my $slevel_i = $slevel_in_tokenizer;
24799
24800            #    /^[L\{\(\[]$/
24801            if ( $is_opening_type{$type} ) {
24802                $slevel_in_tokenizer++;
24803                $nesting_token_string .= $tok;
24804                $nesting_type_string  .= $type;
24805            }
24806
24807            #       /^[R\}\)\]]$/
24808            elsif ( $is_closing_type{$type} ) {
24809                $slevel_in_tokenizer--;
24810                my $char = chop $nesting_token_string;
24811
24812                if ( $char ne $matching_start_token{$tok} ) {
24813                    $nesting_token_string .= $char . $tok;
24814                    $nesting_type_string  .= $type;
24815                }
24816                else {
24817                    chop $nesting_type_string;
24818                }
24819            }
24820
24821            push( @block_type,            $routput_block_type->[$i] );
24822            push( @ci_string,             $ci_string_i );
24823            push( @container_environment, $container_environment );
24824            push( @container_type,        $routput_container_type->[$i] );
24825            push( @levels,                $level_i );
24826            push( @nesting_tokens,        $nesting_token_string_i );
24827            push( @nesting_types,         $nesting_type_string_i );
24828            push( @slevels,               $slevel_i );
24829            push( @token_type,            $fix_type );
24830            push( @type_sequence,         $routput_type_sequence->[$i] );
24831            push( @nesting_blocks,        $nesting_block_string );
24832            push( @nesting_lists,         $nesting_list_string );
24833
24834            # now form the previous token
24835            if ( $im >= 0 ) {
24836                $num =
24837                  $$rtoken_map[$i] - $$rtoken_map[$im];    # how many characters
24838
24839                if ( $num > 0 ) {
24840                    push( @tokens,
24841                        substr( $input_line, $$rtoken_map[$im], $num ) );
24842                }
24843            }
24844            $im = $i;
24845        }
24846
24847        $num = length($input_line) - $$rtoken_map[$im];    # make the last token
24848        if ( $num > 0 ) {
24849            push( @tokens, substr( $input_line, $$rtoken_map[$im], $num ) );
24850        }
24851
24852        $tokenizer_self->{_in_attribute_list} = $in_attribute_list;
24853        $tokenizer_self->{_in_quote}          = $in_quote;
24854        $tokenizer_self->{_quote_target} =
24855          $in_quote ? matching_end_token($quote_character) : "";
24856        $tokenizer_self->{_rhere_target_list} = $rhere_target_list;
24857
24858        $line_of_tokens->{_rtoken_type}            = \@token_type;
24859        $line_of_tokens->{_rtokens}                = \@tokens;
24860        $line_of_tokens->{_rblock_type}            = \@block_type;
24861        $line_of_tokens->{_rcontainer_type}        = \@container_type;
24862        $line_of_tokens->{_rcontainer_environment} = \@container_environment;
24863        $line_of_tokens->{_rtype_sequence}         = \@type_sequence;
24864        $line_of_tokens->{_rlevels}                = \@levels;
24865        $line_of_tokens->{_rslevels}               = \@slevels;
24866        $line_of_tokens->{_rnesting_tokens}        = \@nesting_tokens;
24867        $line_of_tokens->{_rci_levels}             = \@ci_string;
24868        $line_of_tokens->{_rnesting_blocks}        = \@nesting_blocks;
24869
24870        return;
24871    }
24872}    # end tokenize_this_line
24873
24874#########i#############################################################
24875# Tokenizer routines which assist in identifying token types
24876#######################################################################
24877
24878sub operator_expected {
24879
24880    # Many perl symbols have two or more meanings.  For example, '<<'
24881    # can be a shift operator or a here-doc operator.  The
24882    # interpretation of these symbols depends on the current state of
24883    # the tokenizer, which may either be expecting a term or an
24884    # operator.  For this example, a << would be a shift if an operator
24885    # is expected, and a here-doc if a term is expected.  This routine
24886    # is called to make this decision for any current token.  It returns
24887    # one of three possible values:
24888    #
24889    #     OPERATOR - operator expected (or at least, not a term)
24890    #     UNKNOWN  - can't tell
24891    #     TERM     - a term is expected (or at least, not an operator)
24892    #
24893    # The decision is based on what has been seen so far.  This
24894    # information is stored in the "$last_nonblank_type" and
24895    # "$last_nonblank_token" variables.  For example, if the
24896    # $last_nonblank_type is '=~', then we are expecting a TERM, whereas
24897    # if $last_nonblank_type is 'n' (numeric), we are expecting an
24898    # OPERATOR.
24899    #
24900    # If a UNKNOWN is returned, the calling routine must guess. A major
24901    # goal of this tokenizer is to minimize the possiblity of returning
24902    # UNKNOWN, because a wrong guess can spoil the formatting of a
24903    # script.
24904    #
24905    # adding NEW_TOKENS: it is critically important that this routine be
24906    # updated to allow it to determine if an operator or term is to be
24907    # expected after the new token.  Doing this simply involves adding
24908    # the new token character to one of the regexes in this routine or
24909    # to one of the hash lists
24910    # that it uses, which are initialized in the BEGIN section.
24911    # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token,
24912    # $statement_type
24913
24914    my ( $prev_type, $tok, $next_type ) = @_;
24915
24916    my $op_expected = UNKNOWN;
24917
24918#print "tok=$tok last type=$last_nonblank_type last tok=$last_nonblank_token\n";
24919
24920# Note: function prototype is available for token type 'U' for future
24921# program development.  It contains the leading and trailing parens,
24922# and no blanks.  It might be used to eliminate token type 'C', for
24923# example (prototype = '()'). Thus:
24924# if ($last_nonblank_type eq 'U') {
24925#     print "previous token=$last_nonblank_token  type=$last_nonblank_type prototype=$last_nonblank_prototype\n";
24926# }
24927
24928    # A possible filehandle (or object) requires some care...
24929    if ( $last_nonblank_type eq 'Z' ) {
24930
24931        # angle.t
24932        if ( $last_nonblank_token =~ /^[A-Za-z_]/ ) {
24933            $op_expected = UNKNOWN;
24934        }
24935
24936        # For possible file handle like "$a", Perl uses weird parsing rules.
24937        # For example:
24938        # print $a/2,"/hi";   - division
24939        # print $a / 2,"/hi"; - division
24940        # print $a/ 2,"/hi";  - division
24941        # print $a /2,"/hi";  - pattern (and error)!
24942        elsif ( ( $prev_type eq 'b' ) && ( $next_type ne 'b' ) ) {
24943            $op_expected = TERM;
24944        }
24945
24946        # Note when an operation is being done where a
24947        # filehandle might be expected, since a change in whitespace
24948        # could change the interpretation of the statement.
24949        else {
24950            if ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) {
24951                complain("operator in print statement not recommended\n");
24952                $op_expected = OPERATOR;
24953            }
24954        }
24955    }
24956
24957    # handle something after 'do' and 'eval'
24958    elsif ( $is_block_operator{$last_nonblank_token} ) {
24959
24960        # something like $a = eval "expression";
24961        #                          ^
24962        if ( $last_nonblank_type eq 'k' ) {
24963            $op_expected = TERM;    # expression or list mode following keyword
24964        }
24965
24966        # something like $a = do { BLOCK } / 2;
24967        #                                  ^
24968        else {
24969            $op_expected = OPERATOR;    # block mode following }
24970        }
24971    }
24972
24973    # handle bare word..
24974    elsif ( $last_nonblank_type eq 'w' ) {
24975
24976        # unfortunately, we can't tell what type of token to expect next
24977        # after most bare words
24978        $op_expected = UNKNOWN;
24979    }
24980
24981    # operator, but not term possible after these types
24982    # Note: moved ')' from type to token because parens in list context
24983    # get marked as '{' '}' now.  This is a minor glitch in the following:
24984    #    my %opts = (ref $_[0] eq 'HASH') ? %{shift()} : ();
24985    #
24986    elsif (( $last_nonblank_type =~ /^[\]RnviQh]$/ )
24987        || ( $last_nonblank_token =~ /^(\)|\$|\-\>)/ ) )
24988    {
24989        $op_expected = OPERATOR;
24990
24991        # in a 'use' statement, numbers and v-strings are not true
24992        # numbers, so to avoid incorrect error messages, we will
24993        # mark them as unknown for now (use.t)
24994        # TODO: it would be much nicer to create a new token V for VERSION
24995        # number in a use statement.  Then this could be a check on type V
24996        # and related patches which change $statement_type for '=>'
24997        # and ',' could be removed.  Further, it would clean things up to
24998        # scan the 'use' statement with a separate subroutine.
24999        if (   ( $statement_type eq 'use' )
25000            && ( $last_nonblank_type =~ /^[nv]$/ ) )
25001        {
25002            $op_expected = UNKNOWN;
25003        }
25004    }
25005
25006    # no operator after many keywords, such as "die", "warn", etc
25007    elsif ( $expecting_term_token{$last_nonblank_token} ) {
25008
25009        # patch for dor.t (defined or).
25010        # perl functions which may be unary operators
25011        # TODO: This list is incomplete, and these should be put
25012        # into a hash.
25013        if (   $tok eq '/'
25014            && $next_type          eq '/'
25015            && $last_nonblank_type eq 'k'
25016            && $last_nonblank_token =~ /^eof|undef|shift|pop$/ )
25017        {
25018            $op_expected = OPERATOR;
25019        }
25020        else {
25021            $op_expected = TERM;
25022        }
25023    }
25024
25025    # no operator after things like + - **  (i.e., other operators)
25026    elsif ( $expecting_term_types{$last_nonblank_type} ) {
25027        $op_expected = TERM;
25028    }
25029
25030    # a few operators, like "time", have an empty prototype () and so
25031    # take no parameters but produce a value to operate on
25032    elsif ( $expecting_operator_token{$last_nonblank_token} ) {
25033        $op_expected = OPERATOR;
25034    }
25035
25036    # post-increment and decrement produce values to be operated on
25037    elsif ( $expecting_operator_types{$last_nonblank_type} ) {
25038        $op_expected = OPERATOR;
25039    }
25040
25041    # no value to operate on after sub block
25042    elsif ( $last_nonblank_token =~ /^sub\s/ ) { $op_expected = TERM; }
25043
25044    # a right brace here indicates the end of a simple block.
25045    # all non-structural right braces have type 'R'
25046    # all braces associated with block operator keywords have been given those
25047    # keywords as "last_nonblank_token" and caught above.
25048    # (This statement is order dependent, and must come after checking
25049    # $last_nonblank_token).
25050    elsif ( $last_nonblank_type eq '}' ) {
25051
25052        # patch for dor.t (defined or).
25053        if (   $tok eq '/'
25054            && $next_type eq '/'
25055            && $last_nonblank_token eq ']' )
25056        {
25057            $op_expected = OPERATOR;
25058        }
25059        else {
25060            $op_expected = TERM;
25061        }
25062    }
25063
25064    # something else..what did I forget?
25065    else {
25066
25067        # collecting diagnostics on unknown operator types..see what was missed
25068        $op_expected = UNKNOWN;
25069        write_diagnostics(
25070"OP: unknown after type=$last_nonblank_type  token=$last_nonblank_token\n"
25071        );
25072    }
25073
25074    TOKENIZER_DEBUG_FLAG_EXPECT && do {
25075        print
25076"EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
25077    };
25078    return $op_expected;
25079}
25080
25081sub new_statement_ok {
25082
25083    # return true if the current token can start a new statement
25084    # USES GLOBAL VARIABLES: $last_nonblank_type
25085
25086    return label_ok()    # a label would be ok here
25087
25088      || $last_nonblank_type eq 'J';    # or we follow a label
25089
25090}
25091
25092sub label_ok {
25093
25094    # Decide if a bare word followed by a colon here is a label
25095    # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
25096    # $brace_depth, @brace_type
25097
25098    # if it follows an opening or closing code block curly brace..
25099    if ( ( $last_nonblank_token eq '{' || $last_nonblank_token eq '}' )
25100        && $last_nonblank_type eq $last_nonblank_token )
25101    {
25102
25103        # it is a label if and only if the curly encloses a code block
25104        return $brace_type[$brace_depth];
25105    }
25106
25107    # otherwise, it is a label if and only if it follows a ';'
25108    # (real or fake)
25109    else {
25110        return ( $last_nonblank_type eq ';' );
25111    }
25112}
25113
25114sub code_block_type {
25115
25116    # Decide if this is a block of code, and its type.
25117    # Must be called only when $type = $token = '{'
25118    # The problem is to distinguish between the start of a block of code
25119    # and the start of an anonymous hash reference
25120    # Returns "" if not code block, otherwise returns 'last_nonblank_token'
25121    # to indicate the type of code block.  (For example, 'last_nonblank_token'
25122    # might be 'if' for an if block, 'else' for an else block, etc).
25123    # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
25124    # $last_nonblank_block_type, $brace_depth, @brace_type
25125
25126    # handle case of multiple '{'s
25127
25128# print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n";
25129
25130    my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
25131    if (   $last_nonblank_token eq '{'
25132        && $last_nonblank_type eq $last_nonblank_token )
25133    {
25134
25135        # opening brace where a statement may appear is probably
25136        # a code block but might be and anonymous hash reference
25137        if ( $brace_type[$brace_depth] ) {
25138            return decide_if_code_block( $i, $rtokens, $rtoken_type,
25139                $max_token_index );
25140        }
25141
25142        # cannot start a code block within an anonymous hash
25143        else {
25144            return "";
25145        }
25146    }
25147
25148    elsif ( $last_nonblank_token eq ';' ) {
25149
25150        # an opening brace where a statement may appear is probably
25151        # a code block but might be and anonymous hash reference
25152        return decide_if_code_block( $i, $rtokens, $rtoken_type,
25153            $max_token_index );
25154    }
25155
25156    # handle case of '}{'
25157    elsif ($last_nonblank_token eq '}'
25158        && $last_nonblank_type eq $last_nonblank_token )
25159    {
25160
25161        # a } { situation ...
25162        # could be hash reference after code block..(blktype1.t)
25163        if ($last_nonblank_block_type) {
25164            return decide_if_code_block( $i, $rtokens, $rtoken_type,
25165                $max_token_index );
25166        }
25167
25168        # must be a block if it follows a closing hash reference
25169        else {
25170            return $last_nonblank_token;
25171        }
25172    }
25173
25174    # NOTE: braces after type characters start code blocks, but for
25175    # simplicity these are not identified as such.  See also
25176    # sub is_non_structural_brace.
25177    # elsif ( $last_nonblank_type eq 't' ) {
25178    #    return $last_nonblank_token;
25179    # }
25180
25181    # brace after label:
25182    elsif ( $last_nonblank_type eq 'J' ) {
25183        return $last_nonblank_token;
25184    }
25185
25186# otherwise, look at previous token.  This must be a code block if
25187# it follows any of these:
25188# /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/
25189    elsif ( $is_code_block_token{$last_nonblank_token} ) {
25190
25191        # Bug Patch: Note that the opening brace after the 'if' in the following
25192        # snippet is an anonymous hash ref and not a code block!
25193        #   print 'hi' if { x => 1, }->{x};
25194        # We can identify this situation because the last nonblank type
25195        # will be a keyword (instead of a closing peren)
25196        if (   $last_nonblank_token =~ /^(if|unless)$/
25197            && $last_nonblank_type eq 'k' )
25198        {
25199            return "";
25200        }
25201        else {
25202            return $last_nonblank_token;
25203        }
25204    }
25205
25206    # or a sub definition
25207    elsif ( ( $last_nonblank_type eq 'i' || $last_nonblank_type eq 't' )
25208        && $last_nonblank_token =~ /^sub\b/ )
25209    {
25210        return $last_nonblank_token;
25211    }
25212
25213    # user-defined subs with block parameters (like grep/map/eval)
25214    elsif ( $last_nonblank_type eq 'G' ) {
25215        return $last_nonblank_token;
25216    }
25217
25218    # check bareword
25219    elsif ( $last_nonblank_type eq 'w' ) {
25220        return decide_if_code_block( $i, $rtokens, $rtoken_type,
25221            $max_token_index );
25222    }
25223
25224    # anything else must be anonymous hash reference
25225    else {
25226        return "";
25227    }
25228}
25229
25230sub decide_if_code_block {
25231
25232    # USES GLOBAL VARIABLES: $last_nonblank_token
25233    my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
25234    my ( $next_nonblank_token, $i_next ) =
25235      find_next_nonblank_token( $i, $rtokens, $max_token_index );
25236
25237    # we are at a '{' where a statement may appear.
25238    # We must decide if this brace starts an anonymous hash or a code
25239    # block.
25240    # return "" if anonymous hash, and $last_nonblank_token otherwise
25241
25242    # initialize to be code BLOCK
25243    my $code_block_type = $last_nonblank_token;
25244
25245    # Check for the common case of an empty anonymous hash reference:
25246    # Maybe something like sub { { } }
25247    if ( $next_nonblank_token eq '}' ) {
25248        $code_block_type = "";
25249    }
25250
25251    else {
25252
25253        # To guess if this '{' is an anonymous hash reference, look ahead
25254        # and test as follows:
25255        #
25256        # it is a hash reference if next come:
25257        #   - a string or digit followed by a comma or =>
25258        #   - bareword followed by =>
25259        # otherwise it is a code block
25260        #
25261        # Examples of anonymous hash ref:
25262        # {'aa',};
25263        # {1,2}
25264        #
25265        # Examples of code blocks:
25266        # {1; print "hello\n", 1;}
25267        # {$a,1};
25268
25269        # We are only going to look ahead one more (nonblank/comment) line.
25270        # Strange formatting could cause a bad guess, but that's unlikely.
25271        my @pre_types  = @$rtoken_type[ $i + 1 .. $max_token_index ];
25272        my @pre_tokens = @$rtokens[ $i + 1 .. $max_token_index ];
25273        my ( $rpre_tokens, $rpre_types ) =
25274          peek_ahead_for_n_nonblank_pre_tokens(20);    # 20 is arbitrary but
25275                                                       # generous, and prevents
25276                                                       # wasting lots of
25277                                                       # time in mangled files
25278        if ( defined($rpre_types) && @$rpre_types ) {
25279            push @pre_types,  @$rpre_types;
25280            push @pre_tokens, @$rpre_tokens;
25281        }
25282
25283        # put a sentinal token to simplify stopping the search
25284        push @pre_types, '}';
25285
25286        my $jbeg = 0;
25287        $jbeg = 1 if $pre_types[0] eq 'b';
25288
25289        # first look for one of these
25290        #  - bareword
25291        #  - bareword with leading -
25292        #  - digit
25293        #  - quoted string
25294        my $j = $jbeg;
25295        if ( $pre_types[$j] =~ /^[\'\"]/ ) {
25296
25297            # find the closing quote; don't worry about escapes
25298            my $quote_mark = $pre_types[$j];
25299            for ( my $k = $j + 1 ; $k < $#pre_types ; $k++ ) {
25300                if ( $pre_types[$k] eq $quote_mark ) {
25301                    $j = $k + 1;
25302                    my $next = $pre_types[$j];
25303                    last;
25304                }
25305            }
25306        }
25307        elsif ( $pre_types[$j] eq 'd' ) {
25308            $j++;
25309        }
25310        elsif ( $pre_types[$j] eq 'w' ) {
25311            unless ( $is_keyword{ $pre_tokens[$j] } ) {
25312                $j++;
25313            }
25314        }
25315        elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) {
25316            $j++;
25317        }
25318        if ( $j > $jbeg ) {
25319
25320            $j++ if $pre_types[$j] eq 'b';
25321
25322            # it's a hash ref if a comma or => follow next
25323            if ( $pre_types[$j] eq ','
25324                || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' ) )
25325            {
25326                $code_block_type = "";
25327            }
25328        }
25329    }
25330
25331    return $code_block_type;
25332}
25333
25334sub unexpected {
25335
25336    # report unexpected token type and show where it is
25337    # USES GLOBAL VARIABLES: $tokenizer_self
25338    my ( $found, $expecting, $i_tok, $last_nonblank_i, $rpretoken_map,
25339        $rpretoken_type, $input_line )
25340      = @_;
25341
25342    if ( ++$tokenizer_self->{_unexpected_error_count} <= MAX_NAG_MESSAGES ) {
25343        my $msg = "found $found where $expecting expected";
25344        my $pos = $$rpretoken_map[$i_tok];
25345        interrupt_logfile();
25346        my $input_line_number = $tokenizer_self->{_last_line_number};
25347        my ( $offset, $numbered_line, $underline ) =
25348          make_numbered_line( $input_line_number, $input_line, $pos );
25349        $underline = write_on_underline( $underline, $pos - $offset, '^' );
25350
25351        my $trailer = "";
25352        if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) {
25353            my $pos_prev = $$rpretoken_map[$last_nonblank_i];
25354            my $num;
25355            if ( $$rpretoken_type[ $i_tok - 1 ] eq 'b' ) {
25356                $num = $$rpretoken_map[ $i_tok - 1 ] - $pos_prev;
25357            }
25358            else {
25359                $num = $pos - $pos_prev;
25360            }
25361            if ( $num > 40 ) { $num = 40; $pos_prev = $pos - 40; }
25362
25363            $underline =
25364              write_on_underline( $underline, $pos_prev - $offset, '-' x $num );
25365            $trailer = " (previous token underlined)";
25366        }
25367        warning( $numbered_line . "\n" );
25368        warning( $underline . "\n" );
25369        warning( $msg . $trailer . "\n" );
25370        resume_logfile();
25371    }
25372}
25373
25374sub is_non_structural_brace {
25375
25376    # Decide if a brace or bracket is structural or non-structural
25377    # by looking at the previous token and type
25378    # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token
25379
25380    # EXPERIMENTAL: Mark slices as structural; idea was to improve formatting.
25381    # Tentatively deactivated because it caused the wrong operator expectation
25382    # for this code:
25383    #      $user = @vars[1] / 100;
25384    # Must update sub operator_expected before re-implementing.
25385    # if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) {
25386    #    return 0;
25387    # }
25388
25389    # NOTE: braces after type characters start code blocks, but for
25390    # simplicity these are not identified as such.  See also
25391    # sub code_block_type
25392    # if ($last_nonblank_type eq 't') {return 0}
25393
25394    # otherwise, it is non-structural if it is decorated
25395    # by type information.
25396    # For example, the '{' here is non-structural:   ${xxx}
25397    (
25398        $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/
25399
25400          # or if we follow a hash or array closing curly brace or bracket
25401          # For example, the second '{' in this is non-structural: $a{'x'}{'y'}
25402          # because the first '}' would have been given type 'R'
25403          || $last_nonblank_type =~ /^([R\]])$/
25404    );
25405}
25406
25407#########i#############################################################
25408# Tokenizer routines for tracking container nesting depths
25409#######################################################################
25410
25411# The following routines keep track of nesting depths of the nesting
25412# types, ( [ { and ?.  This is necessary for determining the indentation
25413# level, and also for debugging programs.  Not only do they keep track of
25414# nesting depths of the individual brace types, but they check that each
25415# of the other brace types is balanced within matching pairs.  For
25416# example, if the program sees this sequence:
25417#
25418#         {  ( ( ) }
25419#
25420# then it can determine that there is an extra left paren somewhere
25421# between the { and the }.  And so on with every other possible
25422# combination of outer and inner brace types.  For another
25423# example:
25424#
25425#         ( [ ..... ]  ] )
25426#
25427# which has an extra ] within the parens.
25428#
25429# The brace types have indexes 0 .. 3 which are indexes into
25430# the matrices.
25431#
25432# The pair ? : are treated as just another nesting type, with ? acting
25433# as the opening brace and : acting as the closing brace.
25434#
25435# The matrix
25436#
25437#         $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];
25438#
25439# saves the nesting depth of brace type $b (where $b is either of the other
25440# nesting types) when brace type $a enters a new depth.  When this depth
25441# decreases, a check is made that the current depth of brace types $b is
25442# unchanged, or otherwise there must have been an error.  This can
25443# be very useful for localizing errors, particularly when perl runs to
25444# the end of a large file (such as this one) and announces that there
25445# is a problem somewhere.
25446#
25447# A numerical sequence number is maintained for every nesting type,
25448# so that each matching pair can be uniquely identified in a simple
25449# way.
25450
25451sub increase_nesting_depth {
25452    my ( $aa, $pos ) = @_;
25453
25454    # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
25455    # @current_sequence_number, @depth_array, @starting_line_of_current_depth
25456    my $bb;
25457    $current_depth[$aa]++;
25458    $total_depth++;
25459    $total_depth[$aa][ $current_depth[$aa] ] = $total_depth;
25460    my $input_line_number = $tokenizer_self->{_last_line_number};
25461    my $input_line        = $tokenizer_self->{_line_text};
25462
25463    # Sequence numbers increment by number of items.  This keeps
25464    # a unique set of numbers but still allows the relative location
25465    # of any type to be determined.
25466    $nesting_sequence_number[$aa] += scalar(@closing_brace_names);
25467    my $seqno = $nesting_sequence_number[$aa];
25468    $current_sequence_number[$aa][ $current_depth[$aa] ] = $seqno;
25469
25470    $starting_line_of_current_depth[$aa][ $current_depth[$aa] ] =
25471      [ $input_line_number, $input_line, $pos ];
25472
25473    for $bb ( 0 .. $#closing_brace_names ) {
25474        next if ( $bb == $aa );
25475        $depth_array[$aa][$bb][ $current_depth[$aa] ] = $current_depth[$bb];
25476    }
25477
25478    # set a flag for indenting a nested ternary statement
25479    my $indent = 0;
25480    if ( $aa == QUESTION_COLON ) {
25481        $nested_ternary_flag[ $current_depth[$aa] ] = 0;
25482        if ( $current_depth[$aa] > 1 ) {
25483            if ( $nested_ternary_flag[ $current_depth[$aa] - 1 ] == 0 ) {
25484                my $pdepth = $total_depth[$aa][ $current_depth[$aa] - 1 ];
25485                if ( $pdepth == $total_depth - 1 ) {
25486                    $indent = 1;
25487                    $nested_ternary_flag[ $current_depth[$aa] - 1 ] = -1;
25488                }
25489            }
25490        }
25491    }
25492    return ( $seqno, $indent );
25493}
25494
25495sub decrease_nesting_depth {
25496
25497    my ( $aa, $pos ) = @_;
25498
25499    # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
25500    # @current_sequence_number, @depth_array, @starting_line_of_current_depth
25501    my $bb;
25502    my $seqno             = 0;
25503    my $input_line_number = $tokenizer_self->{_last_line_number};
25504    my $input_line        = $tokenizer_self->{_line_text};
25505
25506    my $outdent = 0;
25507    $total_depth--;
25508    if ( $current_depth[$aa] > 0 ) {
25509
25510        # set a flag for un-indenting after seeing a nested ternary statement
25511        $seqno = $current_sequence_number[$aa][ $current_depth[$aa] ];
25512        if ( $aa == QUESTION_COLON ) {
25513            $outdent = $nested_ternary_flag[ $current_depth[$aa] ];
25514        }
25515
25516        # check that any brace types $bb contained within are balanced
25517        for $bb ( 0 .. $#closing_brace_names ) {
25518            next if ( $bb == $aa );
25519
25520            unless ( $depth_array[$aa][$bb][ $current_depth[$aa] ] ==
25521                $current_depth[$bb] )
25522            {
25523                my $diff =
25524                  $current_depth[$bb] -
25525                  $depth_array[$aa][$bb][ $current_depth[$aa] ];
25526
25527                # don't whine too many times
25528                my $saw_brace_error = get_saw_brace_error();
25529                if (
25530                    $saw_brace_error <= MAX_NAG_MESSAGES
25531
25532                    # if too many closing types have occured, we probably
25533                    # already caught this error
25534                    && ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) )
25535                  )
25536                {
25537                    interrupt_logfile();
25538                    my $rsl =
25539                      $starting_line_of_current_depth[$aa]
25540                      [ $current_depth[$aa] ];
25541                    my $sl  = $$rsl[0];
25542                    my $rel = [ $input_line_number, $input_line, $pos ];
25543                    my $el  = $$rel[0];
25544                    my ($ess);
25545
25546                    if ( $diff == 1 || $diff == -1 ) {
25547                        $ess = '';
25548                    }
25549                    else {
25550                        $ess = 's';
25551                    }
25552                    my $bname =
25553                      ( $diff > 0 )
25554                      ? $opening_brace_names[$bb]
25555                      : $closing_brace_names[$bb];
25556                    write_error_indicator_pair( @$rsl, '^' );
25557                    my $msg = <<"EOM";
25558Found $diff extra $bname$ess between $opening_brace_names[$aa] on line $sl and $closing_brace_names[$aa] on line $el
25559EOM
25560
25561                    if ( $diff > 0 ) {
25562                        my $rml =
25563                          $starting_line_of_current_depth[$bb]
25564                          [ $current_depth[$bb] ];
25565                        my $ml = $$rml[0];
25566                        $msg .=
25567"    The most recent un-matched $bname is on line $ml\n";
25568                        write_error_indicator_pair( @$rml, '^' );
25569                    }
25570                    write_error_indicator_pair( @$rel, '^' );
25571                    warning($msg);
25572                    resume_logfile();
25573                }
25574                increment_brace_error();
25575            }
25576        }
25577        $current_depth[$aa]--;
25578    }
25579    else {
25580
25581        my $saw_brace_error = get_saw_brace_error();
25582        if ( $saw_brace_error <= MAX_NAG_MESSAGES ) {
25583            my $msg = <<"EOM";
25584There is no previous $opening_brace_names[$aa] to match a $closing_brace_names[$aa] on line $input_line_number
25585EOM
25586            indicate_error( $msg, $input_line_number, $input_line, $pos, '^' );
25587        }
25588        increment_brace_error();
25589    }
25590    return ( $seqno, $outdent );
25591}
25592
25593sub check_final_nesting_depths {
25594    my ($aa);
25595
25596    # USES GLOBAL VARIABLES: @current_depth, @starting_line_of_current_depth
25597
25598    for $aa ( 0 .. $#closing_brace_names ) {
25599
25600        if ( $current_depth[$aa] ) {
25601            my $rsl =
25602              $starting_line_of_current_depth[$aa][ $current_depth[$aa] ];
25603            my $sl  = $$rsl[0];
25604            my $msg = <<"EOM";
25605Final nesting depth of $opening_brace_names[$aa]s is $current_depth[$aa]
25606The most recent un-matched $opening_brace_names[$aa] is on line $sl
25607EOM
25608            indicate_error( $msg, @$rsl, '^' );
25609            increment_brace_error();
25610        }
25611    }
25612}
25613
25614#########i#############################################################
25615# Tokenizer routines for looking ahead in input stream
25616#######################################################################
25617
25618sub peek_ahead_for_n_nonblank_pre_tokens {
25619
25620    # returns next n pretokens if they exist
25621    # returns undef's if hits eof without seeing any pretokens
25622    # USES GLOBAL VARIABLES: $tokenizer_self
25623    my $max_pretokens = shift;
25624    my $line;
25625    my $i = 0;
25626    my ( $rpre_tokens, $rmap, $rpre_types );
25627
25628    while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
25629    {
25630        $line =~ s/^\s*//;    # trim leading blanks
25631        next if ( length($line) <= 0 );    # skip blank
25632        next if ( $line =~ /^#/ );         # skip comment
25633        ( $rpre_tokens, $rmap, $rpre_types ) =
25634          pre_tokenize( $line, $max_pretokens );
25635        last;
25636    }
25637    return ( $rpre_tokens, $rpre_types );
25638}
25639
25640# look ahead for next non-blank, non-comment line of code
25641sub peek_ahead_for_nonblank_token {
25642
25643    # USES GLOBAL VARIABLES: $tokenizer_self
25644    my ( $rtokens, $max_token_index ) = @_;
25645    my $line;
25646    my $i = 0;
25647
25648    while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
25649    {
25650        $line =~ s/^\s*//;    # trim leading blanks
25651        next if ( length($line) <= 0 );    # skip blank
25652        next if ( $line =~ /^#/ );         # skip comment
25653        my ( $rtok, $rmap, $rtype ) =
25654          pre_tokenize( $line, 2 );        # only need 2 pre-tokens
25655        my $j = $max_token_index + 1;
25656        my $tok;
25657
25658        foreach $tok (@$rtok) {
25659            last if ( $tok =~ "\n" );
25660            $$rtokens[ ++$j ] = $tok;
25661        }
25662        last;
25663    }
25664    return $rtokens;
25665}
25666
25667#########i#############################################################
25668# Tokenizer guessing routines for ambiguous situations
25669#######################################################################
25670
25671sub guess_if_pattern_or_conditional {
25672
25673    # this routine is called when we have encountered a ? following an
25674    # unknown bareword, and we must decide if it starts a pattern or not
25675    # input parameters:
25676    #   $i - token index of the ? starting possible pattern
25677    # output parameters:
25678    #   $is_pattern = 0 if probably not pattern,  =1 if probably a pattern
25679    #   msg = a warning or diagnostic message
25680    # USES GLOBAL VARIABLES: $last_nonblank_token
25681    my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
25682    my $is_pattern = 0;
25683    my $msg        = "guessing that ? after $last_nonblank_token starts a ";
25684
25685    if ( $i >= $max_token_index ) {
25686        $msg .= "conditional (no end to pattern found on the line)\n";
25687    }
25688    else {
25689        my $ibeg = $i;
25690        $i = $ibeg + 1;
25691        my $next_token = $$rtokens[$i];    # first token after ?
25692
25693        # look for a possible ending ? on this line..
25694        my $in_quote        = 1;
25695        my $quote_depth     = 0;
25696        my $quote_character = '';
25697        my $quote_pos       = 0;
25698        my $quoted_string;
25699        (
25700            $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
25701            $quoted_string
25702          )
25703          = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
25704            $quote_pos, $quote_depth, $max_token_index );
25705
25706        if ($in_quote) {
25707
25708            # we didn't find an ending ? on this line,
25709            # so we bias towards conditional
25710            $is_pattern = 0;
25711            $msg .= "conditional (no ending ? on this line)\n";
25712
25713            # we found an ending ?, so we bias towards a pattern
25714        }
25715        else {
25716
25717            if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
25718                $is_pattern = 1;
25719                $msg .= "pattern (found ending ? and pattern expected)\n";
25720            }
25721            else {
25722                $msg .= "pattern (uncertain, but found ending ?)\n";
25723            }
25724        }
25725    }
25726    return ( $is_pattern, $msg );
25727}
25728
25729sub guess_if_pattern_or_division {
25730
25731    # this routine is called when we have encountered a / following an
25732    # unknown bareword, and we must decide if it starts a pattern or is a
25733    # division
25734    # input parameters:
25735    #   $i - token index of the / starting possible pattern
25736    # output parameters:
25737    #   $is_pattern = 0 if probably division,  =1 if probably a pattern
25738    #   msg = a warning or diagnostic message
25739    # USES GLOBAL VARIABLES: $last_nonblank_token
25740    my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
25741    my $is_pattern = 0;
25742    my $msg        = "guessing that / after $last_nonblank_token starts a ";
25743
25744    if ( $i >= $max_token_index ) {
25745        "division (no end to pattern found on the line)\n";
25746    }
25747    else {
25748        my $ibeg = $i;
25749        my $divide_expected =
25750          numerator_expected( $i, $rtokens, $max_token_index );
25751        $i = $ibeg + 1;
25752        my $next_token = $$rtokens[$i];    # first token after slash
25753
25754        # look for a possible ending / on this line..
25755        my $in_quote        = 1;
25756        my $quote_depth     = 0;
25757        my $quote_character = '';
25758        my $quote_pos       = 0;
25759        my $quoted_string;
25760        (
25761            $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
25762            $quoted_string
25763          )
25764          = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
25765            $quote_pos, $quote_depth, $max_token_index );
25766
25767        if ($in_quote) {
25768
25769            # we didn't find an ending / on this line,
25770            # so we bias towards division
25771            if ( $divide_expected >= 0 ) {
25772                $is_pattern = 0;
25773                $msg .= "division (no ending / on this line)\n";
25774            }
25775            else {
25776                $msg        = "multi-line pattern (division not possible)\n";
25777                $is_pattern = 1;
25778            }
25779
25780        }
25781
25782        # we found an ending /, so we bias towards a pattern
25783        else {
25784
25785            if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
25786
25787                if ( $divide_expected >= 0 ) {
25788
25789                    if ( $i - $ibeg > 60 ) {
25790                        $msg .= "division (matching / too distant)\n";
25791                        $is_pattern = 0;
25792                    }
25793                    else {
25794                        $msg .= "pattern (but division possible too)\n";
25795                        $is_pattern = 1;
25796                    }
25797                }
25798                else {
25799                    $is_pattern = 1;
25800                    $msg .= "pattern (division not possible)\n";
25801                }
25802            }
25803            else {
25804
25805                if ( $divide_expected >= 0 ) {
25806                    $is_pattern = 0;
25807                    $msg .= "division (pattern not possible)\n";
25808                }
25809                else {
25810                    $is_pattern = 1;
25811                    $msg .=
25812                      "pattern (uncertain, but division would not work here)\n";
25813                }
25814            }
25815        }
25816    }
25817    return ( $is_pattern, $msg );
25818}
25819
25820# try to resolve here-doc vs. shift by looking ahead for
25821# non-code or the end token (currently only looks for end token)
25822# returns 1 if it is probably a here doc, 0 if not
25823sub guess_if_here_doc {
25824
25825    # This is how many lines we will search for a target as part of the
25826    # guessing strategy.  It is a constant because there is probably
25827    # little reason to change it.
25828    # USES GLOBAL VARIABLES: $tokenizer_self, $current_package
25829    # %is_constant,
25830    use constant HERE_DOC_WINDOW => 40;
25831
25832    my $next_token        = shift;
25833    my $here_doc_expected = 0;
25834    my $line;
25835    my $k   = 0;
25836    my $msg = "checking <<";
25837
25838    while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $k++ ) )
25839    {
25840        chomp $line;
25841
25842        if ( $line =~ /^$next_token$/ ) {
25843            $msg .= " -- found target $next_token ahead $k lines\n";
25844            $here_doc_expected = 1;    # got it
25845            last;
25846        }
25847        last if ( $k >= HERE_DOC_WINDOW );
25848    }
25849
25850    unless ($here_doc_expected) {
25851
25852        if ( !defined($line) ) {
25853            $here_doc_expected = -1;    # hit eof without seeing target
25854            $msg .= " -- must be shift; target $next_token not in file\n";
25855
25856        }
25857        else {                          # still unsure..taking a wild guess
25858
25859            if ( !$is_constant{$current_package}{$next_token} ) {
25860                $here_doc_expected = 1;
25861                $msg .=
25862                  " -- guessing it's a here-doc ($next_token not a constant)\n";
25863            }
25864            else {
25865                $msg .=
25866                  " -- guessing it's a shift ($next_token is a constant)\n";
25867            }
25868        }
25869    }
25870    write_logfile_entry($msg);
25871    return $here_doc_expected;
25872}
25873
25874#########i#############################################################
25875# Tokenizer Routines for scanning identifiers and related items
25876#######################################################################
25877
25878sub scan_bare_identifier_do {
25879
25880    # this routine is called to scan a token starting with an alphanumeric
25881    # variable or package separator, :: or '.
25882    # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token,
25883    # $last_nonblank_type,@paren_type, $paren_depth
25884
25885    my ( $input_line, $i, $tok, $type, $prototype, $rtoken_map,
25886        $max_token_index )
25887      = @_;
25888    my $i_begin = $i;
25889    my $package = undef;
25890
25891    my $i_beg = $i;
25892
25893    # we have to back up one pretoken at a :: since each : is one pretoken
25894    if ( $tok eq '::' ) { $i_beg-- }
25895    if ( $tok eq '->' ) { $i_beg-- }
25896    my $pos_beg = $$rtoken_map[$i_beg];
25897    pos($input_line) = $pos_beg;
25898
25899    #  Examples:
25900    #   A::B::C
25901    #   A::
25902    #   ::A
25903    #   A'B
25904    if ( $input_line =~ m/\G\s*((?:\w*(?:'|::)))*(?:(?:->)?(\w+))?/gc ) {
25905
25906        my $pos  = pos($input_line);
25907        my $numc = $pos - $pos_beg;
25908        $tok = substr( $input_line, $pos_beg, $numc );
25909
25910        # type 'w' includes anything without leading type info
25911        # ($,%,@,*) including something like abc::def::ghi
25912        $type = 'w';
25913
25914        my $sub_name = "";
25915        if ( defined($2) ) { $sub_name = $2; }
25916        if ( defined($1) ) {
25917            $package = $1;
25918
25919            # patch: don't allow isolated package name which just ends
25920            # in the old style package separator (single quote).  Example:
25921            #   use CGI':all';
25922            if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) {
25923                $pos--;
25924            }
25925
25926            $package =~ s/\'/::/g;
25927            if ( $package =~ /^\:/ ) { $package = 'main' . $package }
25928            $package =~ s/::$//;
25929        }
25930        else {
25931            $package = $current_package;
25932
25933            if ( $is_keyword{$tok} ) {
25934                $type = 'k';
25935            }
25936        }
25937
25938        # if it is a bareword..
25939        if ( $type eq 'w' ) {
25940
25941            # check for v-string with leading 'v' type character
25942            # (This seems to have presidence over filehandle, type 'Y')
25943            if ( $tok =~ /^v\d[_\d]*$/ ) {
25944
25945                # we only have the first part - something like 'v101' -
25946                # look for more
25947                if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) {
25948                    $pos  = pos($input_line);
25949                    $numc = $pos - $pos_beg;
25950                    $tok  = substr( $input_line, $pos_beg, $numc );
25951                }
25952                $type = 'v';
25953
25954                # warn if this version can't handle v-strings
25955                report_v_string($tok);
25956            }
25957
25958            elsif ( $is_constant{$package}{$sub_name} ) {
25959                $type = 'C';
25960            }
25961
25962            # bareword after sort has implied empty prototype; for example:
25963            # @sorted = sort numerically ( 53, 29, 11, 32, 7 );
25964            # This has priority over whatever the user has specified.
25965            elsif ($last_nonblank_token eq 'sort'
25966                && $last_nonblank_type eq 'k' )
25967            {
25968                $type = 'Z';
25969            }
25970
25971            # Note: strangely, perl does not seem to really let you create
25972            # functions which act like eval and do, in the sense that eval
25973            # and do may have operators following the final }, but any operators
25974            # that you create with prototype (&) apparently do not allow
25975            # trailing operators, only terms.  This seems strange.
25976            # If this ever changes, here is the update
25977            # to make perltidy behave accordingly:
25978
25979            # elsif ( $is_block_function{$package}{$tok} ) {
25980            #    $tok='eval'; # patch to do braces like eval  - doesn't work
25981            #    $type = 'k';
25982            #}
25983            # FIXME: This could become a separate type to allow for different
25984            # future behavior:
25985            elsif ( $is_block_function{$package}{$sub_name} ) {
25986                $type = 'G';
25987            }
25988
25989            elsif ( $is_block_list_function{$package}{$sub_name} ) {
25990                $type = 'G';
25991            }
25992            elsif ( $is_user_function{$package}{$sub_name} ) {
25993                $type      = 'U';
25994                $prototype = $user_function_prototype{$package}{$sub_name};
25995            }
25996
25997            # check for indirect object
25998            elsif (
25999
26000                # added 2001-03-27: must not be followed immediately by '('
26001                # see fhandle.t
26002                ( $input_line !~ m/\G\(/gc )
26003
26004                # and
26005                && (
26006
26007                    # preceded by keyword like 'print', 'printf' and friends
26008                    $is_indirect_object_taker{$last_nonblank_token}
26009
26010                    # or preceded by something like 'print(' or 'printf('
26011                    || (
26012                        ( $last_nonblank_token eq '(' )
26013                        && $is_indirect_object_taker{ $paren_type[$paren_depth]
26014                        }
26015
26016                    )
26017                )
26018              )
26019            {
26020
26021                # may not be indirect object unless followed by a space
26022                if ( $input_line =~ m/\G\s+/gc ) {
26023                    $type = 'Y';
26024
26025                    # Abandon Hope ...
26026                    # Perl's indirect object notation is a very bad
26027                    # thing and can cause subtle bugs, especially for
26028                    # beginning programmers.  And I haven't even been
26029                    # able to figure out a sane warning scheme which
26030                    # doesn't get in the way of good scripts.
26031
26032                    # Complain if a filehandle has any lower case
26033                    # letters.  This is suggested good practice.
26034                    # Use 'sub_name' because something like
26035                    # main::MYHANDLE is ok for filehandle
26036                    if ( $sub_name =~ /[a-z]/ ) {
26037
26038                        # could be bug caused by older perltidy if
26039                        # followed by '('
26040                        if ( $input_line =~ m/\G\s*\(/gc ) {
26041                            complain(
26042"Caution: unknown word '$tok' in indirect object slot\n"
26043                            );
26044                        }
26045                    }
26046                }
26047
26048                # bareword not followed by a space -- may not be filehandle
26049                # (may be function call defined in a 'use' statement)
26050                else {
26051                    $type = 'Z';
26052                }
26053            }
26054        }
26055
26056        # Now we must convert back from character position
26057        # to pre_token index.
26058        # I don't think an error flag can occur here ..but who knows
26059        my $error;
26060        ( $i, $error ) =
26061          inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
26062        if ($error) {
26063            warning("scan_bare_identifier: Possibly invalid tokenization\n");
26064        }
26065    }
26066
26067    # no match but line not blank - could be syntax error
26068    # perl will take '::' alone without complaint
26069    else {
26070        $type = 'w';
26071
26072        # change this warning to log message if it becomes annoying
26073        warning("didn't find identifier after leading ::\n");
26074    }
26075    return ( $i, $tok, $type, $prototype );
26076}
26077
26078sub scan_id_do {
26079
26080# This is the new scanner and will eventually replace scan_identifier.
26081# Only type 'sub' and 'package' are implemented.
26082# Token types $ * % @ & -> are not yet implemented.
26083#
26084# Scan identifier following a type token.
26085# The type of call depends on $id_scan_state: $id_scan_state = ''
26086# for starting call, in which case $tok must be the token defining
26087# the type.
26088#
26089# If the type token is the last nonblank token on the line, a value
26090# of $id_scan_state = $tok is returned, indicating that further
26091# calls must be made to get the identifier.  If the type token is
26092# not the last nonblank token on the line, the identifier is
26093# scanned and handled and a value of '' is returned.
26094# USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, $in_attribute_list,
26095# $statement_type, $tokenizer_self
26096
26097    my ( $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state,
26098        $max_token_index )
26099      = @_;
26100    my $type = '';
26101    my ( $i_beg, $pos_beg );
26102
26103    #print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
26104    #my ($a,$b,$c) = caller;
26105    #print "NSCAN: scan_id called with tok=$tok $a $b $c\n";
26106
26107    # on re-entry, start scanning at first token on the line
26108    if ($id_scan_state) {
26109        $i_beg = $i;
26110        $type  = '';
26111    }
26112
26113    # on initial entry, start scanning just after type token
26114    else {
26115        $i_beg         = $i + 1;
26116        $id_scan_state = $tok;
26117        $type          = 't';
26118    }
26119
26120    # find $i_beg = index of next nonblank token,
26121    # and handle empty lines
26122    my $blank_line          = 0;
26123    my $next_nonblank_token = $$rtokens[$i_beg];
26124    if ( $i_beg > $max_token_index ) {
26125        $blank_line = 1;
26126    }
26127    else {
26128
26129        # only a '#' immediately after a '$' is not a comment
26130        if ( $next_nonblank_token eq '#' ) {
26131            unless ( $tok eq '$' ) {
26132                $blank_line = 1;
26133            }
26134        }
26135
26136        if ( $next_nonblank_token =~ /^\s/ ) {
26137            ( $next_nonblank_token, $i_beg ) =
26138              find_next_nonblank_token_on_this_line( $i_beg, $rtokens,
26139                $max_token_index );
26140            if ( $next_nonblank_token =~ /(^#|^\s*$)/ ) {
26141                $blank_line = 1;
26142            }
26143        }
26144    }
26145
26146    # handle non-blank line; identifier, if any, must follow
26147    unless ($blank_line) {
26148
26149        if ( $id_scan_state eq 'sub' ) {
26150            ( $i, $tok, $type, $id_scan_state ) = do_scan_sub(
26151                $input_line, $i,             $i_beg,
26152                $tok,        $type,          $rtokens,
26153                $rtoken_map, $id_scan_state, $max_token_index
26154            );
26155        }
26156
26157        elsif ( $id_scan_state eq 'package' ) {
26158            ( $i, $tok, $type ) =
26159              do_scan_package( $input_line, $i, $i_beg, $tok, $type, $rtokens,
26160                $rtoken_map, $max_token_index );
26161            $id_scan_state = '';
26162        }
26163
26164        else {
26165            warning("invalid token in scan_id: $tok\n");
26166            $id_scan_state = '';
26167        }
26168    }
26169
26170    if ( $id_scan_state && ( !defined($type) || !$type ) ) {
26171
26172        # shouldn't happen:
26173        warning(
26174"Program bug in scan_id: undefined type but scan_state=$id_scan_state\n"
26175        );
26176        report_definite_bug();
26177    }
26178
26179    TOKENIZER_DEBUG_FLAG_NSCAN && do {
26180        print
26181          "NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
26182    };
26183    return ( $i, $tok, $type, $id_scan_state );
26184}
26185
26186sub check_prototype {
26187    my ( $proto, $package, $subname ) = @_;
26188    return unless ( defined($package) && defined($subname) );
26189    if ( defined($proto) ) {
26190        $proto =~ s/^\s*\(\s*//;
26191        $proto =~ s/\s*\)$//;
26192        if ($proto) {
26193            $is_user_function{$package}{$subname}        = 1;
26194            $user_function_prototype{$package}{$subname} = "($proto)";
26195
26196            # prototypes containing '&' must be treated specially..
26197            if ( $proto =~ /\&/ ) {
26198
26199                # right curly braces of prototypes ending in
26200                # '&' may be followed by an operator
26201                if ( $proto =~ /\&$/ ) {
26202                    $is_block_function{$package}{$subname} = 1;
26203                }
26204
26205                # right curly braces of prototypes NOT ending in
26206                # '&' may NOT be followed by an operator
26207                elsif ( $proto !~ /\&$/ ) {
26208                    $is_block_list_function{$package}{$subname} = 1;
26209                }
26210            }
26211        }
26212        else {
26213            $is_constant{$package}{$subname} = 1;
26214        }
26215    }
26216    else {
26217        $is_user_function{$package}{$subname} = 1;
26218    }
26219}
26220
26221sub do_scan_package {
26222
26223    # do_scan_package parses a package name
26224    # it is called with $i_beg equal to the index of the first nonblank
26225    # token following a 'package' token.
26226    # USES GLOBAL VARIABLES: $current_package,
26227
26228    my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map,
26229        $max_token_index )
26230      = @_;
26231    my $package = undef;
26232    my $pos_beg = $$rtoken_map[$i_beg];
26233    pos($input_line) = $pos_beg;
26234
26235    # handle non-blank line; package name, if any, must follow
26236    if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w+)/gc ) {
26237        $package = $1;
26238        $package = ( defined($1) && $1 ) ? $1 : 'main';
26239        $package =~ s/\'/::/g;
26240        if ( $package =~ /^\:/ ) { $package = 'main' . $package }
26241        $package =~ s/::$//;
26242        my $pos  = pos($input_line);
26243        my $numc = $pos - $pos_beg;
26244        $tok = 'package ' . substr( $input_line, $pos_beg, $numc );
26245        $type = 'i';
26246
26247        # Now we must convert back from character position
26248        # to pre_token index.
26249        # I don't think an error flag can occur here ..but ?
26250        my $error;
26251        ( $i, $error ) =
26252          inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
26253        if ($error) { warning("Possibly invalid package\n") }
26254        $current_package = $package;
26255
26256        # check for error
26257        my ( $next_nonblank_token, $i_next ) =
26258          find_next_nonblank_token( $i, $rtokens, $max_token_index );
26259        if ( $next_nonblank_token !~ /^[;\}]$/ ) {
26260            warning(
26261                "Unexpected '$next_nonblank_token' after package name '$tok'\n"
26262            );
26263        }
26264    }
26265
26266    # no match but line not blank --
26267    # could be a label with name package, like package:  , for example.
26268    else {
26269        $type = 'k';
26270    }
26271
26272    return ( $i, $tok, $type );
26273}
26274
26275sub scan_identifier_do {
26276
26277    # This routine assembles tokens into identifiers.  It maintains a
26278    # scan state, id_scan_state.  It updates id_scan_state based upon
26279    # current id_scan_state and token, and returns an updated
26280    # id_scan_state and the next index after the identifier.
26281    # USES GLOBAL VARIABLES: $context, $last_nonblank_token,
26282    # $last_nonblank_type
26283
26284    my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index,
26285        $expecting )
26286      = @_;
26287    my $i_begin   = $i;
26288    my $type      = '';
26289    my $tok_begin = $$rtokens[$i_begin];
26290    if ( $tok_begin eq ':' ) { $tok_begin = '::' }
26291    my $id_scan_state_begin = $id_scan_state;
26292    my $identifier_begin    = $identifier;
26293    my $tok                 = $tok_begin;
26294    my $message             = "";
26295
26296    # these flags will be used to help figure out the type:
26297    my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ );
26298    my $saw_type;
26299
26300    # allow old package separator (') except in 'use' statement
26301    my $allow_tick = ( $last_nonblank_token ne 'use' );
26302
26303    # get started by defining a type and a state if necessary
26304    unless ($id_scan_state) {
26305        $context = UNKNOWN_CONTEXT;
26306
26307        # fixup for digraph
26308        if ( $tok eq '>' ) {
26309            $tok       = '->';
26310            $tok_begin = $tok;
26311        }
26312        $identifier = $tok;
26313
26314        if ( $tok eq '$' || $tok eq '*' ) {
26315            $id_scan_state = '$';
26316            $context       = SCALAR_CONTEXT;
26317        }
26318        elsif ( $tok eq '%' || $tok eq '@' ) {
26319            $id_scan_state = '$';
26320            $context       = LIST_CONTEXT;
26321        }
26322        elsif ( $tok eq '&' ) {
26323            $id_scan_state = '&';
26324        }
26325        elsif ( $tok eq 'sub' or $tok eq 'package' ) {
26326            $saw_alpha     = 0;     # 'sub' is considered type info here
26327            $id_scan_state = '$';
26328            $identifier .= ' ';     # need a space to separate sub from sub name
26329        }
26330        elsif ( $tok eq '::' ) {
26331            $id_scan_state = 'A';
26332        }
26333        elsif ( $tok =~ /^[A-Za-z_]/ ) {
26334            $id_scan_state = ':';
26335        }
26336        elsif ( $tok eq '->' ) {
26337            $id_scan_state = '$';
26338        }
26339        else {
26340
26341            # shouldn't happen
26342            my ( $a, $b, $c ) = caller;
26343            warning("Program Bug: scan_identifier given bad token = $tok \n");
26344            warning("   called from sub $a  line: $c\n");
26345            report_definite_bug();
26346        }
26347        $saw_type = !$saw_alpha;
26348    }
26349    else {
26350        $i--;
26351        $saw_type = ( $tok =~ /([\$\%\@\*\&])/ );
26352    }
26353
26354    # now loop to gather the identifier
26355    my $i_save = $i;
26356
26357    while ( $i < $max_token_index ) {
26358        $i_save = $i unless ( $tok =~ /^\s*$/ );
26359        $tok = $$rtokens[ ++$i ];
26360
26361        if ( ( $tok eq ':' ) && ( $$rtokens[ $i + 1 ] eq ':' ) ) {
26362            $tok = '::';
26363            $i++;
26364        }
26365
26366        if ( $id_scan_state eq '$' ) {    # starting variable name
26367
26368            if ( $tok eq '$' ) {
26369
26370                $identifier .= $tok;
26371
26372                # we've got a punctuation variable if end of line (punct.t)
26373                if ( $i == $max_token_index ) {
26374                    $type          = 'i';
26375                    $id_scan_state = '';
26376                    last;
26377                }
26378            }
26379            elsif ( $tok =~ /^[A-Za-z_]/ ) {    # alphanumeric ..
26380                $saw_alpha     = 1;
26381                $id_scan_state = ':';           # now need ::
26382                $identifier .= $tok;
26383            }
26384            elsif ( $tok eq "'" && $allow_tick ) {    # alphanumeric ..
26385                $saw_alpha     = 1;
26386                $id_scan_state = ':';                 # now need ::
26387                $identifier .= $tok;
26388
26389                # Perl will accept leading digits in identifiers,
26390                # although they may not always produce useful results.
26391                # Something like $main::0 is ok.  But this also works:
26392                #
26393                #  sub howdy::123::bubba{ print "bubba $54321!\n" }
26394                #  howdy::123::bubba();
26395                #
26396            }
26397            elsif ( $tok =~ /^[0-9]/ ) {              # numeric
26398                $saw_alpha     = 1;
26399                $id_scan_state = ':';                 # now need ::
26400                $identifier .= $tok;
26401            }
26402            elsif ( $tok eq '::' ) {
26403                $id_scan_state = 'A';
26404                $identifier .= $tok;
26405            }
26406            elsif ( ( $tok eq '#' ) && ( $identifier eq '$' ) ) {    # $#array
26407                $identifier .= $tok;    # keep same state, a $ could follow
26408            }
26409            elsif ( $tok eq '{' ) {
26410
26411                # check for something like ${#} or ${�}
26412                if (   $identifier eq '$'
26413                    && $i + 2 <= $max_token_index
26414                    && $$rtokens[ $i + 2 ] eq '}'
26415                    && $$rtokens[ $i + 1 ] !~ /[\s\w]/ )
26416                {
26417                    my $next2 = $$rtokens[ $i + 2 ];
26418                    my $next1 = $$rtokens[ $i + 1 ];
26419                    $identifier .= $tok . $next1 . $next2;
26420                    $i += 2;
26421                    $id_scan_state = '';
26422                    last;
26423                }
26424
26425                # skip something like ${xxx} or ->{
26426                $id_scan_state = '';
26427
26428                # if this is the first token of a line, any tokens for this
26429                # identifier have already been accumulated
26430                if ( $identifier eq '$' || $i == 0 ) { $identifier = ''; }
26431                $i = $i_save;
26432                last;
26433            }
26434
26435            # space ok after leading $ % * & @
26436            elsif ( $tok =~ /^\s*$/ ) {
26437
26438                if ( $identifier =~ /^[\$\%\*\&\@]/ ) {
26439
26440                    if ( length($identifier) > 1 ) {
26441                        $id_scan_state = '';
26442                        $i             = $i_save;
26443                        $type          = 'i';    # probably punctuation variable
26444                        last;
26445                    }
26446                    else {
26447
26448                        # spaces after $'s are common, and space after @
26449                        # is harmless, so only complain about space
26450                        # after other type characters. Space after $ and
26451                        # @ will be removed in formatting.  Report space
26452                        # after % and * because they might indicate a
26453                        # parsing error.  In other words '% ' might be a
26454                        # modulo operator.  Delete this warning if it
26455                        # gets annoying.
26456                        if ( $identifier !~ /^[\@\$]$/ ) {
26457                            $message =
26458                              "Space in identifier, following $identifier\n";
26459                        }
26460                    }
26461                }
26462
26463                # else:
26464                # space after '->' is ok
26465            }
26466            elsif ( $tok eq '^' ) {
26467
26468                # check for some special variables like $^W
26469                if ( $identifier =~ /^[\$\*\@\%]$/ ) {
26470                    $identifier .= $tok;
26471                    $id_scan_state = 'A';
26472
26473                    # Perl accepts '$^]' or '@^]', but
26474                    # there must not be a space before the ']'.
26475                    my $next1 = $$rtokens[ $i + 1 ];
26476                    if ( $next1 eq ']' ) {
26477                        $i++;
26478                        $identifier .= $next1;
26479                        $id_scan_state = "";
26480                        last;
26481                    }
26482                }
26483                else {
26484                    $id_scan_state = '';
26485                }
26486            }
26487            else {    # something else
26488
26489                # check for various punctuation variables
26490                if ( $identifier =~ /^[\$\*\@\%]$/ ) {
26491                    $identifier .= $tok;
26492                }
26493
26494                elsif ( $identifier eq '$#' ) {
26495
26496                    if ( $tok eq '{' ) { $type = 'i'; $i = $i_save }
26497
26498                    # perl seems to allow just these: $#: $#- $#+
26499                    elsif ( $tok =~ /^[\:\-\+]$/ ) {
26500                        $type = 'i';
26501                        $identifier .= $tok;
26502                    }
26503                    else {
26504                        $i = $i_save;
26505                        write_logfile_entry( 'Use of $# is deprecated' . "\n" );
26506                    }
26507                }
26508                elsif ( $identifier eq '$$' ) {
26509
26510                    # perl does not allow references to punctuation
26511                    # variables without braces.  For example, this
26512                    # won't work:
26513                    #  $:=\4;
26514                    #  $a = $$:;
26515                    # You would have to use
26516                    #  $a = ${$:};
26517
26518                    $i = $i_save;
26519                    if   ( $tok eq '{' ) { $type = 't' }
26520                    else                 { $type = 'i' }
26521                }
26522                elsif ( $identifier eq '->' ) {
26523                    $i = $i_save;
26524                }
26525                else {
26526                    $i = $i_save;
26527                    if ( length($identifier) == 1 ) { $identifier = ''; }
26528                }
26529                $id_scan_state = '';
26530                last;
26531            }
26532        }
26533        elsif ( $id_scan_state eq '&' ) {    # starting sub call?
26534
26535            if ( $tok =~ /^[\$A-Za-z_]/ ) {    # alphanumeric ..
26536                $id_scan_state = ':';          # now need ::
26537                $saw_alpha     = 1;
26538                $identifier .= $tok;
26539            }
26540            elsif ( $tok eq "'" && $allow_tick ) {    # alphanumeric ..
26541                $id_scan_state = ':';                 # now need ::
26542                $saw_alpha     = 1;
26543                $identifier .= $tok;
26544            }
26545            elsif ( $tok =~ /^[0-9]/ ) {    # numeric..see comments above
26546                $id_scan_state = ':';       # now need ::
26547                $saw_alpha     = 1;
26548                $identifier .= $tok;
26549            }
26550            elsif ( $tok =~ /^\s*$/ ) {     # allow space
26551            }
26552            elsif ( $tok eq '::' ) {        # leading ::
26553                $id_scan_state = 'A';       # accept alpha next
26554                $identifier .= $tok;
26555            }
26556            elsif ( $tok eq '{' ) {
26557                if ( $identifier eq '&' || $i == 0 ) { $identifier = ''; }
26558                $i             = $i_save;
26559                $id_scan_state = '';
26560                last;
26561            }
26562            else {
26563
26564                # punctuation variable?
26565                # testfile: cunningham4.pl
26566                #
26567                # We have to be careful here.  If we are in an unknown state,
26568                # we will reject the punctuation variable.  In the following
26569                # example the '&' is a binary opeator but we are in an unknown
26570                # state because there is no sigil on 'Prima', so we don't
26571                # know what it is.  But it is a bad guess that
26572                # '&~' is a punction variable.
26573                # $self->{text}->{colorMap}->[
26574                #   Prima::PodView::COLOR_CODE_FOREGROUND
26575                #   & ~tb::COLOR_INDEX ] =
26576                #   $sec->{ColorCode}
26577                if ( $identifier eq '&' && $expecting ) {
26578                    $identifier .= $tok;
26579                }
26580                else {
26581                    $identifier = '';
26582                    $i          = $i_save;
26583                    $type       = '&';
26584                }
26585                $id_scan_state = '';
26586                last;
26587            }
26588        }
26589        elsif ( $id_scan_state eq 'A' ) {    # looking for alpha (after ::)
26590
26591            if ( $tok =~ /^[A-Za-z_]/ ) {    # found it
26592                $identifier .= $tok;
26593                $id_scan_state = ':';        # now need ::
26594                $saw_alpha     = 1;
26595            }
26596            elsif ( $tok eq "'" && $allow_tick ) {
26597                $identifier .= $tok;
26598                $id_scan_state = ':';        # now need ::
26599                $saw_alpha     = 1;
26600            }
26601            elsif ( $tok =~ /^[0-9]/ ) {     # numeric..see comments above
26602                $identifier .= $tok;
26603                $id_scan_state = ':';        # now need ::
26604                $saw_alpha     = 1;
26605            }
26606            elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
26607                $id_scan_state = '(';
26608                $identifier .= $tok;
26609            }
26610            elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
26611                $id_scan_state = ')';
26612                $identifier .= $tok;
26613            }
26614            else {
26615                $id_scan_state = '';
26616                $i             = $i_save;
26617                last;
26618            }
26619        }
26620        elsif ( $id_scan_state eq ':' ) {    # looking for :: after alpha
26621
26622            if ( $tok eq '::' ) {            # got it
26623                $identifier .= $tok;
26624                $id_scan_state = 'A';        # now require alpha
26625            }
26626            elsif ( $tok =~ /^[A-Za-z_]/ ) {    # more alphanumeric is ok here
26627                $identifier .= $tok;
26628                $id_scan_state = ':';           # now need ::
26629                $saw_alpha     = 1;
26630            }
26631            elsif ( $tok =~ /^[0-9]/ ) {        # numeric..see comments above
26632                $identifier .= $tok;
26633                $id_scan_state = ':';           # now need ::
26634                $saw_alpha     = 1;
26635            }
26636            elsif ( $tok eq "'" && $allow_tick ) {    # tick
26637
26638                if ( $is_keyword{$identifier} ) {
26639                    $id_scan_state = '';              # that's all
26640                    $i             = $i_save;
26641                }
26642                else {
26643                    $identifier .= $tok;
26644                }
26645            }
26646            elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
26647                $id_scan_state = '(';
26648                $identifier .= $tok;
26649            }
26650            elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
26651                $id_scan_state = ')';
26652                $identifier .= $tok;
26653            }
26654            else {
26655                $id_scan_state = '';        # that's all
26656                $i             = $i_save;
26657                last;
26658            }
26659        }
26660        elsif ( $id_scan_state eq '(' ) {    # looking for ( of prototype
26661
26662            if ( $tok eq '(' ) {             # got it
26663                $identifier .= $tok;
26664                $id_scan_state = ')';        # now find the end of it
26665            }
26666            elsif ( $tok =~ /^\s*$/ ) {      # blank - keep going
26667                $identifier .= $tok;
26668            }
26669            else {
26670                $id_scan_state = '';         # that's all - no prototype
26671                $i             = $i_save;
26672                last;
26673            }
26674        }
26675        elsif ( $id_scan_state eq ')' ) {    # looking for ) to end
26676
26677            if ( $tok eq ')' ) {             # got it
26678                $identifier .= $tok;
26679                $id_scan_state = '';         # all done
26680                last;
26681            }
26682            elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) {
26683                $identifier .= $tok;
26684            }
26685            else {    # probable error in script, but keep going
26686                warning("Unexpected '$tok' while seeking end of prototype\n");
26687                $identifier .= $tok;
26688            }
26689        }
26690        else {        # can get here due to error in initialization
26691            $id_scan_state = '';
26692            $i             = $i_save;
26693            last;
26694        }
26695    }
26696
26697    if ( $id_scan_state eq ')' ) {
26698        warning("Hit end of line while seeking ) to end prototype\n");
26699    }
26700
26701    # once we enter the actual identifier, it may not extend beyond
26702    # the end of the current line
26703    if ( $id_scan_state =~ /^[A\:\(\)]/ ) {
26704        $id_scan_state = '';
26705    }
26706    if ( $i < 0 ) { $i = 0 }
26707
26708    unless ($type) {
26709
26710        if ($saw_type) {
26711
26712            if ($saw_alpha) {
26713                if ( $identifier =~ /^->/ && $last_nonblank_type eq 'w' ) {
26714                    $type = 'w';
26715                }
26716                else { $type = 'i' }
26717            }
26718            elsif ( $identifier eq '->' ) {
26719                $type = '->';
26720            }
26721            elsif (
26722                ( length($identifier) > 1 )
26723
26724                # In something like '@$=' we have an identifier '@$'
26725                # In something like '$${' we have type '$$' (and only
26726                # part of an identifier)
26727                && !( $identifier =~ /\$$/ && $tok eq '{' )
26728                && ( $identifier !~ /^(sub |package )$/ )
26729              )
26730            {
26731                $type = 'i';
26732            }
26733            else { $type = 't' }
26734        }
26735        elsif ($saw_alpha) {
26736
26737            # type 'w' includes anything without leading type info
26738            # ($,%,@,*) including something like abc::def::ghi
26739            $type = 'w';
26740        }
26741        else {
26742            $type = '';
26743        }    # this can happen on a restart
26744    }
26745
26746    if ($identifier) {
26747        $tok = $identifier;
26748        if ($message) { write_logfile_entry($message) }
26749    }
26750    else {
26751        $tok = $tok_begin;
26752        $i   = $i_begin;
26753    }
26754
26755    TOKENIZER_DEBUG_FLAG_SCAN_ID && do {
26756        my ( $a, $b, $c ) = caller;
26757        print
26758"SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n";
26759        print
26760"SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n";
26761    };
26762    return ( $i, $tok, $type, $id_scan_state, $identifier );
26763}
26764
26765{
26766
26767    # saved package and subnames in case prototype is on separate line
26768    my ( $package_saved, $subname_saved );
26769
26770    sub do_scan_sub {
26771
26772        # do_scan_sub parses a sub name and prototype
26773        # it is called with $i_beg equal to the index of the first nonblank
26774        # token following a 'sub' token.
26775
26776        # TODO: add future error checks to be sure we have a valid
26777        # sub name.  For example, 'sub &doit' is wrong.  Also, be sure
26778        # a name is given if and only if a non-anonymous sub is
26779        # appropriate.
26780        # USES GLOBAL VARS: $current_package, $last_nonblank_token,
26781        # $in_attribute_list, %saw_function_definition,
26782        # $statement_type
26783
26784        my (
26785            $input_line, $i,             $i_beg,
26786            $tok,        $type,          $rtokens,
26787            $rtoken_map, $id_scan_state, $max_token_index
26788        ) = @_;
26789        $id_scan_state = "";    # normally we get everything in one call
26790        my $subname = undef;
26791        my $package = undef;
26792        my $proto   = undef;
26793        my $attrs   = undef;
26794        my $match;
26795
26796        my $pos_beg = $$rtoken_map[$i_beg];
26797        pos($input_line) = $pos_beg;
26798
26799        # sub NAME PROTO ATTRS
26800        if (
26801            $input_line =~ m/\G\s*
26802        ((?:\w*(?:'|::))*)  # package - something that ends in :: or '
26803        (\w+)               # NAME    - required
26804        (\s*\([^){]*\))?    # PROTO   - something in parens
26805        (\s*:)?             # ATTRS   - leading : of attribute list
26806        /gcx
26807          )
26808        {
26809            $match   = 1;
26810            $subname = $2;
26811            $proto   = $3;
26812            $attrs   = $4;
26813
26814            $package = ( defined($1) && $1 ) ? $1 : $current_package;
26815            $package =~ s/\'/::/g;
26816            if ( $package =~ /^\:/ ) { $package = 'main' . $package }
26817            $package =~ s/::$//;
26818            my $pos  = pos($input_line);
26819            my $numc = $pos - $pos_beg;
26820            $tok = 'sub ' . substr( $input_line, $pos_beg, $numc );
26821            $type = 'i';
26822        }
26823
26824        # Look for prototype/attributes not preceded on this line by subname;
26825        # This might be an anonymous sub with attributes,
26826        # or a prototype on a separate line from its sub name
26827        elsif (
26828            $input_line =~ m/\G(\s*\([^){]*\))?  # PROTO
26829            (\s*:)?                              # ATTRS leading ':'
26830            /gcx
26831            && ( $1 || $2 )
26832          )
26833        {
26834            $match = 1;
26835            $proto = $1;
26836            $attrs = $2;
26837
26838            # Handle prototype on separate line from subname
26839            if ($subname_saved) {
26840                $package = $package_saved;
26841                $subname = $subname_saved;
26842                $tok     = $last_nonblank_token;
26843            }
26844            $type = 'i';
26845        }
26846
26847        if ($match) {
26848
26849            # ATTRS: if there are attributes, back up and let the ':' be
26850            # found later by the scanner.
26851            my $pos = pos($input_line);
26852            if ($attrs) {
26853                $pos -= length($attrs);
26854            }
26855
26856            my $next_nonblank_token = $tok;
26857
26858            # catch case of line with leading ATTR ':' after anonymous sub
26859            if ( $pos == $pos_beg && $tok eq ':' ) {
26860                $type              = 'A';
26861                $in_attribute_list = 1;
26862            }
26863
26864            # We must convert back from character position
26865            # to pre_token index.
26866            else {
26867
26868                # I don't think an error flag can occur here ..but ?
26869                my $error;
26870                ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map,
26871                    $max_token_index );
26872                if ($error) { warning("Possibly invalid sub\n") }
26873
26874                # check for multiple definitions of a sub
26875                ( $next_nonblank_token, my $i_next ) =
26876                  find_next_nonblank_token_on_this_line( $i, $rtokens,
26877                    $max_token_index );
26878            }
26879
26880            if ( $next_nonblank_token =~ /^(\s*|#)$/ )
26881            {    # skip blank or side comment
26882                my ( $rpre_tokens, $rpre_types ) =
26883                  peek_ahead_for_n_nonblank_pre_tokens(1);
26884                if ( defined($rpre_tokens) && @$rpre_tokens ) {
26885                    $next_nonblank_token = $rpre_tokens->[0];
26886                }
26887                else {
26888                    $next_nonblank_token = '}';
26889                }
26890            }
26891            $package_saved = "";
26892            $subname_saved = "";
26893            if ( $next_nonblank_token eq '{' ) {
26894                if ($subname) {
26895
26896                    # Check for multiple definitions of a sub, but
26897                    # it is ok to have multiple sub BEGIN, etc,
26898                    # so we do not complain if name is all caps
26899                    if (   $saw_function_definition{$package}{$subname}
26900                        && $subname !~ /^[A-Z]+$/ )
26901                    {
26902                        my $lno = $saw_function_definition{$package}{$subname};
26903                        warning(
26904"already saw definition of 'sub $subname' in package '$package' at line $lno\n"
26905                        );
26906                    }
26907                    $saw_function_definition{$package}{$subname} =
26908                      $tokenizer_self->{_last_line_number};
26909                }
26910            }
26911            elsif ( $next_nonblank_token eq ';' ) {
26912            }
26913            elsif ( $next_nonblank_token eq '}' ) {
26914            }
26915
26916            # ATTRS - if an attribute list follows, remember the name
26917            # of the sub so the next opening brace can be labeled.
26918            # Setting 'statement_type' causes any ':'s to introduce
26919            # attributes.
26920            elsif ( $next_nonblank_token eq ':' ) {
26921                $statement_type = $tok;
26922            }
26923
26924            # see if PROTO follows on another line:
26925            elsif ( $next_nonblank_token eq '(' ) {
26926                if ( $attrs || $proto ) {
26927                    warning(
26928"unexpected '(' after definition or declaration of sub '$subname'\n"
26929                    );
26930                }
26931                else {
26932                    $id_scan_state  = 'sub';    # we must come back to get proto
26933                    $statement_type = $tok;
26934                    $package_saved  = $package;
26935                    $subname_saved  = $subname;
26936                }
26937            }
26938            elsif ($next_nonblank_token) {      # EOF technically ok
26939                warning(
26940"expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n"
26941                );
26942            }
26943            check_prototype( $proto, $package, $subname );
26944        }
26945
26946        # no match but line not blank
26947        else {
26948        }
26949        return ( $i, $tok, $type, $id_scan_state );
26950    }
26951}
26952
26953#########i###############################################################
26954# Tokenizer utility routines which may use CONSTANTS but no other GLOBALS
26955#########################################################################
26956
26957sub find_next_nonblank_token {
26958    my ( $i, $rtokens, $max_token_index ) = @_;
26959
26960    if ( $i >= $max_token_index ) {
26961        if ( !peeked_ahead() ) {
26962            peeked_ahead(1);
26963            $rtokens =
26964              peek_ahead_for_nonblank_token( $rtokens, $max_token_index );
26965        }
26966    }
26967    my $next_nonblank_token = $$rtokens[ ++$i ];
26968
26969    if ( $next_nonblank_token =~ /^\s*$/ ) {
26970        $next_nonblank_token = $$rtokens[ ++$i ];
26971    }
26972    return ( $next_nonblank_token, $i );
26973}
26974
26975sub numerator_expected {
26976
26977    # this is a filter for a possible numerator, in support of guessing
26978    # for the / pattern delimiter token.
26979    # returns -
26980    #   1 - yes
26981    #   0 - can't tell
26982    #  -1 - no
26983    # Note: I am using the convention that variables ending in
26984    # _expected have these 3 possible values.
26985    my ( $i, $rtokens, $max_token_index ) = @_;
26986    my $next_token = $$rtokens[ $i + 1 ];
26987    if ( $next_token eq '=' ) { $i++; }    # handle /=
26988    my ( $next_nonblank_token, $i_next ) =
26989      find_next_nonblank_token( $i, $rtokens, $max_token_index );
26990
26991    if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) {
26992        1;
26993    }
26994    else {
26995
26996        if ( $next_nonblank_token =~ /^\s*$/ ) {
26997            0;
26998        }
26999        else {
27000            -1;
27001        }
27002    }
27003}
27004
27005sub pattern_expected {
27006
27007    # This is the start of a filter for a possible pattern.
27008    # It looks at the token after a possbible pattern and tries to
27009    # determine if that token could end a pattern.
27010    # returns -
27011    #   1 - yes
27012    #   0 - can't tell
27013    #  -1 - no
27014    my ( $i, $rtokens, $max_token_index ) = @_;
27015    my $next_token = $$rtokens[ $i + 1 ];
27016    if ( $next_token =~ /^[cgimosxp]/ ) { $i++; }    # skip possible modifier
27017    my ( $next_nonblank_token, $i_next ) =
27018      find_next_nonblank_token( $i, $rtokens, $max_token_index );
27019
27020    # list of tokens which may follow a pattern
27021    # (can probably be expanded)
27022    if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ )
27023    {
27024        1;
27025    }
27026    else {
27027
27028        if ( $next_nonblank_token =~ /^\s*$/ ) {
27029            0;
27030        }
27031        else {
27032            -1;
27033        }
27034    }
27035}
27036
27037sub find_next_nonblank_token_on_this_line {
27038    my ( $i, $rtokens, $max_token_index ) = @_;
27039    my $next_nonblank_token;
27040
27041    if ( $i < $max_token_index ) {
27042        $next_nonblank_token = $$rtokens[ ++$i ];
27043
27044        if ( $next_nonblank_token =~ /^\s*$/ ) {
27045
27046            if ( $i < $max_token_index ) {
27047                $next_nonblank_token = $$rtokens[ ++$i ];
27048            }
27049        }
27050    }
27051    else {
27052        $next_nonblank_token = "";
27053    }
27054    return ( $next_nonblank_token, $i );
27055}
27056
27057sub find_angle_operator_termination {
27058
27059    # We are looking at a '<' and want to know if it is an angle operator.
27060    # We are to return:
27061    #   $i = pretoken index of ending '>' if found, current $i otherwise
27062    #   $type = 'Q' if found, '>' otherwise
27063    my ( $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index ) = @_;
27064    my $i    = $i_beg;
27065    my $type = '<';
27066    pos($input_line) = 1 + $$rtoken_map[$i];
27067
27068    my $filter;
27069
27070    # we just have to find the next '>' if a term is expected
27071    if ( $expecting == TERM ) { $filter = '[\>]' }
27072
27073    # we have to guess if we don't know what is expected
27074    elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' }
27075
27076    # shouldn't happen - we shouldn't be here if operator is expected
27077    else { warning("Program Bug in find_angle_operator_termination\n") }
27078
27079    # To illustrate what we might be looking at, in case we are
27080    # guessing, here are some examples of valid angle operators
27081    # (or file globs):
27082    #  <tmp_imp/*>
27083    #  <FH>
27084    #  <$fh>
27085    #  <*.c *.h>
27086    #  <_>
27087    #  <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t)
27088    #  <${PREFIX}*img*.$IMAGE_TYPE>
27089    #  <img*.$IMAGE_TYPE>
27090    #  <Timg*.$IMAGE_TYPE>
27091    #  <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl>
27092    #
27093    # Here are some examples of lines which do not have angle operators:
27094    #  return undef unless $self->[2]++ < $#{$self->[1]};
27095    #  < 2  || @$t >
27096    #
27097    # the following line from dlister.pl caused trouble:
27098    #  print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n";
27099    #
27100    # If the '<' starts an angle operator, it must end on this line and
27101    # it must not have certain characters like ';' and '=' in it.  I use
27102    # this to limit the testing.  This filter should be improved if
27103    # possible.
27104
27105    if ( $input_line =~ /($filter)/g ) {
27106
27107        if ( $1 eq '>' ) {
27108
27109            # We MAY have found an angle operator termination if we get
27110            # here, but we need to do more to be sure we haven't been
27111            # fooled.
27112            my $pos = pos($input_line);
27113
27114            my $pos_beg = $$rtoken_map[$i];
27115            my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) );
27116
27117            # Reject if the closing '>' follows a '-' as in:
27118            # if ( VERSION < 5.009 && $op-> name eq 'aassign' ) { }
27119            if ( $expecting eq UNKNOWN ) {
27120                my $check = substr( $input_line, $pos - 2, 1 );
27121                if ( $check eq '-' ) {
27122                    return ( $i, $type );
27123                }
27124            }
27125
27126            ######################################debug#####
27127            #write_diagnostics( "ANGLE? :$str\n");
27128            #print "ANGLE: found $1 at pos=$pos str=$str check=$check\n";
27129            ######################################debug#####
27130            $type = 'Q';
27131            my $error;
27132            ( $i, $error ) =
27133              inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
27134
27135            # It may be possible that a quote ends midway in a pretoken.
27136            # If this happens, it may be necessary to split the pretoken.
27137            if ($error) {
27138                warning(
27139                    "Possible tokinization error..please check this line\n");
27140                report_possible_bug();
27141            }
27142
27143            # Now let's see where we stand....
27144            # OK if math op not possible
27145            if ( $expecting == TERM ) {
27146            }
27147
27148            # OK if there are no more than 2 pre-tokens inside
27149            # (not possible to write 2 token math between < and >)
27150            # This catches most common cases
27151            elsif ( $i <= $i_beg + 3 ) {
27152                write_diagnostics("ANGLE(1 or 2 tokens): $str\n");
27153            }
27154
27155            # Not sure..
27156            else {
27157
27158                # Let's try a Brace Test: any braces inside must balance
27159                my $br = 0;
27160                while ( $str =~ /\{/g ) { $br++ }
27161                while ( $str =~ /\}/g ) { $br-- }
27162                my $sb = 0;
27163                while ( $str =~ /\[/g ) { $sb++ }
27164                while ( $str =~ /\]/g ) { $sb-- }
27165                my $pr = 0;
27166                while ( $str =~ /\(/g ) { $pr++ }
27167                while ( $str =~ /\)/g ) { $pr-- }
27168
27169                # if braces do not balance - not angle operator
27170                if ( $br || $sb || $pr ) {
27171                    $i    = $i_beg;
27172                    $type = '<';
27173                    write_diagnostics(
27174                        "NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n");
27175                }
27176
27177                # we should keep doing more checks here...to be continued
27178                # Tentatively accepting this as a valid angle operator.
27179                # There are lots more things that can be checked.
27180                else {
27181                    write_diagnostics(
27182                        "ANGLE-Guessing yes: $str expecting=$expecting\n");
27183                    write_logfile_entry("Guessing angle operator here: $str\n");
27184                }
27185            }
27186        }
27187
27188        # didn't find ending >
27189        else {
27190            if ( $expecting == TERM ) {
27191                warning("No ending > for angle operator\n");
27192            }
27193        }
27194    }
27195    return ( $i, $type );
27196}
27197
27198sub scan_number_do {
27199
27200    #  scan a number in any of the formats that Perl accepts
27201    #  Underbars (_) are allowed in decimal numbers.
27202    #  input parameters -
27203    #      $input_line  - the string to scan
27204    #      $i           - pre_token index to start scanning
27205    #    $rtoken_map    - reference to the pre_token map giving starting
27206    #                    character position in $input_line of token $i
27207    #  output parameters -
27208    #    $i            - last pre_token index of the number just scanned
27209    #    number        - the number (characters); or undef if not a number
27210
27211    my ( $input_line, $i, $rtoken_map, $input_type, $max_token_index ) = @_;
27212    my $pos_beg = $$rtoken_map[$i];
27213    my $pos;
27214    my $i_begin = $i;
27215    my $number  = undef;
27216    my $type    = $input_type;
27217
27218    my $first_char = substr( $input_line, $pos_beg, 1 );
27219
27220    # Look for bad starting characters; Shouldn't happen..
27221    if ( $first_char !~ /[\d\.\+\-Ee]/ ) {
27222        warning("Program bug - scan_number given character $first_char\n");
27223        report_definite_bug();
27224        return ( $i, $type, $number );
27225    }
27226
27227    # handle v-string without leading 'v' character ('Two Dot' rule)
27228    # (vstring.t)
27229    # TODO: v-strings may contain underscores
27230    pos($input_line) = $pos_beg;
27231    if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) {
27232        $pos = pos($input_line);
27233        my $numc = $pos - $pos_beg;
27234        $number = substr( $input_line, $pos_beg, $numc );
27235        $type = 'v';
27236        report_v_string($number);
27237    }
27238
27239    # handle octal, hex, binary
27240    if ( !defined($number) ) {
27241        pos($input_line) = $pos_beg;
27242        if ( $input_line =~ /\G[+-]?0((x[0-9a-fA-F_]+)|([0-7_]+)|(b[01_]+))/g )
27243        {
27244            $pos = pos($input_line);
27245            my $numc = $pos - $pos_beg;
27246            $number = substr( $input_line, $pos_beg, $numc );
27247            $type = 'n';
27248        }
27249    }
27250
27251    # handle decimal
27252    if ( !defined($number) ) {
27253        pos($input_line) = $pos_beg;
27254
27255        if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) {
27256            $pos = pos($input_line);
27257
27258            # watch out for things like 0..40 which would give 0. by this;
27259            if (   ( substr( $input_line, $pos - 1, 1 ) eq '.' )
27260                && ( substr( $input_line, $pos, 1 ) eq '.' ) )
27261            {
27262                $pos--;
27263            }
27264            my $numc = $pos - $pos_beg;
27265            $number = substr( $input_line, $pos_beg, $numc );
27266            $type = 'n';
27267        }
27268    }
27269
27270    # filter out non-numbers like e + - . e2  .e3 +e6
27271    # the rule: at least one digit, and any 'e' must be preceded by a digit
27272    if (
27273        $number !~ /\d/    # no digits
27274        || (   $number =~ /^(.*)[eE]/
27275            && $1 !~ /\d/ )    # or no digits before the 'e'
27276      )
27277    {
27278        $number = undef;
27279        $type   = $input_type;
27280        return ( $i, $type, $number );
27281    }
27282
27283    # Found a number; now we must convert back from character position
27284    # to pre_token index. An error here implies user syntax error.
27285    # An example would be an invalid octal number like '009'.
27286    my $error;
27287    ( $i, $error ) =
27288      inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
27289    if ($error) { warning("Possibly invalid number\n") }
27290
27291    return ( $i, $type, $number );
27292}
27293
27294sub inverse_pretoken_map {
27295
27296    # Starting with the current pre_token index $i, scan forward until
27297    # finding the index of the next pre_token whose position is $pos.
27298    my ( $i, $pos, $rtoken_map, $max_token_index ) = @_;
27299    my $error = 0;
27300
27301    while ( ++$i <= $max_token_index ) {
27302
27303        if ( $pos <= $$rtoken_map[$i] ) {
27304
27305            # Let the calling routine handle errors in which we do not
27306            # land on a pre-token boundary.  It can happen by running
27307            # perltidy on some non-perl scripts, for example.
27308            if ( $pos < $$rtoken_map[$i] ) { $error = 1 }
27309            $i--;
27310            last;
27311        }
27312    }
27313    return ( $i, $error );
27314}
27315
27316sub find_here_doc {
27317
27318    # find the target of a here document, if any
27319    # input parameters:
27320    #   $i - token index of the second < of <<
27321    #   ($i must be less than the last token index if this is called)
27322    # output parameters:
27323    #   $found_target = 0 didn't find target; =1 found target
27324    #   HERE_TARGET - the target string (may be empty string)
27325    #   $i - unchanged if not here doc,
27326    #    or index of the last token of the here target
27327    #   $saw_error - flag noting unbalanced quote on here target
27328    my ( $expecting, $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
27329    my $ibeg                 = $i;
27330    my $found_target         = 0;
27331    my $here_doc_target      = '';
27332    my $here_quote_character = '';
27333    my $saw_error            = 0;
27334    my ( $next_nonblank_token, $i_next_nonblank, $next_token );
27335    $next_token = $$rtokens[ $i + 1 ];
27336
27337    # perl allows a backslash before the target string (heredoc.t)
27338    my $backslash = 0;
27339    if ( $next_token eq '\\' ) {
27340        $backslash  = 1;
27341        $next_token = $$rtokens[ $i + 2 ];
27342    }
27343
27344    ( $next_nonblank_token, $i_next_nonblank ) =
27345      find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index );
27346
27347    if ( $next_nonblank_token =~ /[\'\"\`]/ ) {
27348
27349        my $in_quote    = 1;
27350        my $quote_depth = 0;
27351        my $quote_pos   = 0;
27352        my $quoted_string;
27353
27354        (
27355            $i, $in_quote, $here_quote_character, $quote_pos, $quote_depth,
27356            $quoted_string
27357          )
27358          = follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens,
27359            $here_quote_character, $quote_pos, $quote_depth, $max_token_index );
27360
27361        if ($in_quote) {    # didn't find end of quote, so no target found
27362            $i = $ibeg;
27363            if ( $expecting == TERM ) {
27364                warning(
27365"Did not find here-doc string terminator ($here_quote_character) before end of line \n"
27366                );
27367                $saw_error = 1;
27368            }
27369        }
27370        else {              # found ending quote
27371            my $j;
27372            $found_target = 1;
27373
27374            my $tokj;
27375            for ( $j = $i_next_nonblank + 1 ; $j < $i ; $j++ ) {
27376                $tokj = $$rtokens[$j];
27377
27378                # we have to remove any backslash before the quote character
27379                # so that the here-doc-target exactly matches this string
27380                next
27381                  if ( $tokj eq "\\"
27382                    && $j < $i - 1
27383                    && $$rtokens[ $j + 1 ] eq $here_quote_character );
27384                $here_doc_target .= $tokj;
27385            }
27386        }
27387    }
27388
27389    elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) {
27390        $found_target = 1;
27391        write_logfile_entry(
27392            "found blank here-target after <<; suggest using \"\"\n");
27393        $i = $ibeg;
27394    }
27395    elsif ( $next_token =~ /^\w/ ) {    # simple bareword or integer after <<
27396
27397        my $here_doc_expected;
27398        if ( $expecting == UNKNOWN ) {
27399            $here_doc_expected = guess_if_here_doc($next_token);
27400        }
27401        else {
27402            $here_doc_expected = 1;
27403        }
27404
27405        if ($here_doc_expected) {
27406            $found_target    = 1;
27407            $here_doc_target = $next_token;
27408            $i               = $ibeg + 1;
27409        }
27410
27411    }
27412    else {
27413
27414        if ( $expecting == TERM ) {
27415            $found_target = 1;
27416            write_logfile_entry("Note: bare here-doc operator <<\n");
27417        }
27418        else {
27419            $i = $ibeg;
27420        }
27421    }
27422
27423    # patch to neglect any prepended backslash
27424    if ( $found_target && $backslash ) { $i++ }
27425
27426    return ( $found_target, $here_doc_target, $here_quote_character, $i,
27427        $saw_error );
27428}
27429
27430sub do_quote {
27431
27432    # follow (or continue following) quoted string(s)
27433    # $in_quote return code:
27434    #   0 - ok, found end
27435    #   1 - still must find end of quote whose target is $quote_character
27436    #   2 - still looking for end of first of two quotes
27437    #
27438    # Returns updated strings:
27439    #  $quoted_string_1 = quoted string seen while in_quote=1
27440    #  $quoted_string_2 = quoted string seen while in_quote=2
27441    my (
27442        $i,               $in_quote,    $quote_character,
27443        $quote_pos,       $quote_depth, $quoted_string_1,
27444        $quoted_string_2, $rtokens,     $rtoken_map,
27445        $max_token_index
27446    ) = @_;
27447
27448    my $in_quote_starting = $in_quote;
27449
27450    my $quoted_string;
27451    if ( $in_quote == 2 ) {    # two quotes/quoted_string_1s to follow
27452        my $ibeg = $i;
27453        (
27454            $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
27455            $quoted_string
27456          )
27457          = follow_quoted_string( $i, $in_quote, $rtokens, $quote_character,
27458            $quote_pos, $quote_depth, $max_token_index );
27459        $quoted_string_2 .= $quoted_string;
27460        if ( $in_quote == 1 ) {
27461            if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; }
27462            $quote_character = '';
27463        }
27464        else {
27465            $quoted_string_2 .= "\n";
27466        }
27467    }
27468
27469    if ( $in_quote == 1 ) {    # one (more) quote to follow
27470        my $ibeg = $i;
27471        (
27472            $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
27473            $quoted_string
27474          )
27475          = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
27476            $quote_pos, $quote_depth, $max_token_index );
27477        $quoted_string_1 .= $quoted_string;
27478        if ( $in_quote == 1 ) {
27479            $quoted_string_1 .= "\n";
27480        }
27481    }
27482    return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
27483        $quoted_string_1, $quoted_string_2 );
27484}
27485
27486sub follow_quoted_string {
27487
27488    # scan for a specific token, skipping escaped characters
27489    # if the quote character is blank, use the first non-blank character
27490    # input parameters:
27491    #   $rtokens = reference to the array of tokens
27492    #   $i = the token index of the first character to search
27493    #   $in_quote = number of quoted strings being followed
27494    #   $beginning_tok = the starting quote character
27495    #   $quote_pos = index to check next for alphanumeric delimiter
27496    # output parameters:
27497    #   $i = the token index of the ending quote character
27498    #   $in_quote = decremented if found end, unchanged if not
27499    #   $beginning_tok = the starting quote character
27500    #   $quote_pos = index to check next for alphanumeric delimiter
27501    #   $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested.
27502    #   $quoted_string = the text of the quote (without quotation tokens)
27503    my ( $i_beg, $in_quote, $rtokens, $beginning_tok, $quote_pos, $quote_depth,
27504        $max_token_index )
27505      = @_;
27506    my ( $tok, $end_tok );
27507    my $i             = $i_beg - 1;
27508    my $quoted_string = "";
27509
27510    TOKENIZER_DEBUG_FLAG_QUOTE && do {
27511        print
27512"QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n";
27513    };
27514
27515    # get the corresponding end token
27516    if ( $beginning_tok !~ /^\s*$/ ) {
27517        $end_tok = matching_end_token($beginning_tok);
27518    }
27519
27520    # a blank token means we must find and use the first non-blank one
27521    else {
27522        my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a <cr>
27523
27524        while ( $i < $max_token_index ) {
27525            $tok = $$rtokens[ ++$i ];
27526
27527            if ( $tok !~ /^\s*$/ ) {
27528
27529                if ( ( $tok eq '#' ) && ($allow_quote_comments) ) {
27530                    $i = $max_token_index;
27531                }
27532                else {
27533
27534                    if ( length($tok) > 1 ) {
27535                        if ( $quote_pos <= 0 ) { $quote_pos = 1 }
27536                        $beginning_tok = substr( $tok, $quote_pos - 1, 1 );
27537                    }
27538                    else {
27539                        $beginning_tok = $tok;
27540                        $quote_pos     = 0;
27541                    }
27542                    $end_tok     = matching_end_token($beginning_tok);
27543                    $quote_depth = 1;
27544                    last;
27545                }
27546            }
27547            else {
27548                $allow_quote_comments = 1;
27549            }
27550        }
27551    }
27552
27553    # There are two different loops which search for the ending quote
27554    # character.  In the rare case of an alphanumeric quote delimiter, we
27555    # have to look through alphanumeric tokens character-by-character, since
27556    # the pre-tokenization process combines multiple alphanumeric
27557    # characters, whereas for a non-alphanumeric delimiter, only tokens of
27558    # length 1 can match.
27559
27560    ###################################################################
27561    # Case 1 (rare): loop for case of alphanumeric quote delimiter..
27562    # "quote_pos" is the position the current word to begin searching
27563    ###################################################################
27564    if ( $beginning_tok =~ /\w/ ) {
27565
27566        # Note this because it is not recommended practice except
27567        # for obfuscated perl contests
27568        if ( $in_quote == 1 ) {
27569            write_logfile_entry(
27570                "Note: alphanumeric quote delimiter ($beginning_tok) \n");
27571        }
27572
27573        while ( $i < $max_token_index ) {
27574
27575            if ( $quote_pos == 0 || ( $i < 0 ) ) {
27576                $tok = $$rtokens[ ++$i ];
27577
27578                if ( $tok eq '\\' ) {
27579
27580                    # retain backslash unless it hides the end token
27581                    $quoted_string .= $tok
27582                      unless $$rtokens[ $i + 1 ] eq $end_tok;
27583                    $quote_pos++;
27584                    last if ( $i >= $max_token_index );
27585                    $tok = $$rtokens[ ++$i ];
27586                }
27587            }
27588            my $old_pos = $quote_pos;
27589
27590            unless ( defined($tok) && defined($end_tok) && defined($quote_pos) )
27591            {
27592
27593            }
27594            $quote_pos = 1 + index( $tok, $end_tok, $quote_pos );
27595
27596            if ( $quote_pos > 0 ) {
27597
27598                $quoted_string .=
27599                  substr( $tok, $old_pos, $quote_pos - $old_pos - 1 );
27600
27601                $quote_depth--;
27602
27603                if ( $quote_depth == 0 ) {
27604                    $in_quote--;
27605                    last;
27606                }
27607            }
27608            else {
27609                $quoted_string .= substr( $tok, $old_pos );
27610            }
27611        }
27612    }
27613
27614    ########################################################################
27615    # Case 2 (normal): loop for case of a non-alphanumeric quote delimiter..
27616    ########################################################################
27617    else {
27618
27619        while ( $i < $max_token_index ) {
27620            $tok = $$rtokens[ ++$i ];
27621
27622            if ( $tok eq $end_tok ) {
27623                $quote_depth--;
27624
27625                if ( $quote_depth == 0 ) {
27626                    $in_quote--;
27627                    last;
27628                }
27629            }
27630            elsif ( $tok eq $beginning_tok ) {
27631                $quote_depth++;
27632            }
27633            elsif ( $tok eq '\\' ) {
27634
27635                # retain backslash unless it hides the beginning or end token
27636                $tok = $$rtokens[ ++$i ];
27637                $quoted_string .= '\\'
27638                  unless ( $tok eq $end_tok || $tok eq $beginning_tok );
27639            }
27640            $quoted_string .= $tok;
27641        }
27642    }
27643    if ( $i > $max_token_index ) { $i = $max_token_index }
27644    return ( $i, $in_quote, $beginning_tok, $quote_pos, $quote_depth,
27645        $quoted_string );
27646}
27647
27648sub indicate_error {
27649    my ( $msg, $line_number, $input_line, $pos, $carrat ) = @_;
27650    interrupt_logfile();
27651    warning($msg);
27652    write_error_indicator_pair( $line_number, $input_line, $pos, $carrat );
27653    resume_logfile();
27654}
27655
27656sub write_error_indicator_pair {
27657    my ( $line_number, $input_line, $pos, $carrat ) = @_;
27658    my ( $offset, $numbered_line, $underline ) =
27659      make_numbered_line( $line_number, $input_line, $pos );
27660    $underline = write_on_underline( $underline, $pos - $offset, $carrat );
27661    warning( $numbered_line . "\n" );
27662    $underline =~ s/\s*$//;
27663    warning( $underline . "\n" );
27664}
27665
27666sub make_numbered_line {
27667
27668    #  Given an input line, its line number, and a character position of
27669    #  interest, create a string not longer than 80 characters of the form
27670    #     $lineno: sub_string
27671    #  such that the sub_string of $str contains the position of interest
27672    #
27673    #  Here is an example of what we want, in this case we add trailing
27674    #  '...' because the line is long.
27675    #
27676    # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
27677    #
27678    #  Here is another example, this time in which we used leading '...'
27679    #  because of excessive length:
27680    #
27681    # 2: ... er of the World Wide Web Consortium's
27682    #
27683    #  input parameters are:
27684    #   $lineno = line number
27685    #   $str = the text of the line
27686    #   $pos = position of interest (the error) : 0 = first character
27687    #
27688    #   We return :
27689    #     - $offset = an offset which corrects the position in case we only
27690    #       display part of a line, such that $pos-$offset is the effective
27691    #       position from the start of the displayed line.
27692    #     - $numbered_line = the numbered line as above,
27693    #     - $underline = a blank 'underline' which is all spaces with the same
27694    #       number of characters as the numbered line.
27695
27696    my ( $lineno, $str, $pos ) = @_;
27697    my $offset = ( $pos < 60 ) ? 0 : $pos - 40;
27698    my $excess = length($str) - $offset - 68;
27699    my $numc   = ( $excess > 0 ) ? 68 : undef;
27700
27701    if ( defined($numc) ) {
27702        if ( $offset == 0 ) {
27703            $str = substr( $str, $offset, $numc - 4 ) . " ...";
27704        }
27705        else {
27706            $str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ...";
27707        }
27708    }
27709    else {
27710
27711        if ( $offset == 0 ) {
27712        }
27713        else {
27714            $str = "... " . substr( $str, $offset + 4 );
27715        }
27716    }
27717
27718    my $numbered_line = sprintf( "%d: ", $lineno );
27719    $offset -= length($numbered_line);
27720    $numbered_line .= $str;
27721    my $underline = " " x length($numbered_line);
27722    return ( $offset, $numbered_line, $underline );
27723}
27724
27725sub write_on_underline {
27726
27727    # The "underline" is a string that shows where an error is; it starts
27728    # out as a string of blanks with the same length as the numbered line of
27729    # code above it, and we have to add marking to show where an error is.
27730    # In the example below, we want to write the string '--^' just below
27731    # the line of bad code:
27732    #
27733    # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
27734    #                 ---^
27735    # We are given the current underline string, plus a position and a
27736    # string to write on it.
27737    #
27738    # In the above example, there will be 2 calls to do this:
27739    # First call:  $pos=19, pos_chr=^
27740    # Second call: $pos=16, pos_chr=---
27741    #
27742    # This is a trivial thing to do with substr, but there is some
27743    # checking to do.
27744
27745    my ( $underline, $pos, $pos_chr ) = @_;
27746
27747    # check for error..shouldn't happen
27748    unless ( ( $pos >= 0 ) && ( $pos <= length($underline) ) ) {
27749        return $underline;
27750    }
27751    my $excess = length($pos_chr) + $pos - length($underline);
27752    if ( $excess > 0 ) {
27753        $pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess );
27754    }
27755    substr( $underline, $pos, length($pos_chr) ) = $pos_chr;
27756    return ($underline);
27757}
27758
27759sub pre_tokenize {
27760
27761    # Break a string, $str, into a sequence of preliminary tokens.  We
27762    # are interested in these types of tokens:
27763    #   words       (type='w'),            example: 'max_tokens_wanted'
27764    #   digits      (type = 'd'),          example: '0755'
27765    #   whitespace  (type = 'b'),          example: '   '
27766    #   any other single character (i.e. punct; type = the character itself).
27767    # We cannot do better than this yet because we might be in a quoted
27768    # string or pattern.  Caller sets $max_tokens_wanted to 0 to get all
27769    # tokens.
27770    my ( $str, $max_tokens_wanted ) = @_;
27771
27772    # we return references to these 3 arrays:
27773    my @tokens    = ();     # array of the tokens themselves
27774    my @token_map = (0);    # string position of start of each token
27775    my @type      = ();     # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct
27776
27777    do {
27778
27779        # whitespace
27780        if ( $str =~ /\G(\s+)/gc ) { push @type, 'b'; }
27781
27782        # numbers
27783        # note that this must come before words!
27784        elsif ( $str =~ /\G(\d+)/gc ) { push @type, 'd'; }
27785
27786        # words
27787        elsif ( $str =~ /\G(\w+)/gc ) { push @type, 'w'; }
27788
27789        # single-character punctuation
27790        elsif ( $str =~ /\G(\W)/gc ) { push @type, $1; }
27791
27792        # that's all..
27793        else {
27794            return ( \@tokens, \@token_map, \@type );
27795        }
27796
27797        push @tokens,    $1;
27798        push @token_map, pos($str);
27799
27800    } while ( --$max_tokens_wanted != 0 );
27801
27802    return ( \@tokens, \@token_map, \@type );
27803}
27804
27805sub show_tokens {
27806
27807    # this is an old debug routine
27808    my ( $rtokens, $rtoken_map ) = @_;
27809    my $num = scalar(@$rtokens);
27810    my $i;
27811
27812    for ( $i = 0 ; $i < $num ; $i++ ) {
27813        my $len = length( $$rtokens[$i] );
27814        print "$i:$len:$$rtoken_map[$i]:$$rtokens[$i]:\n";
27815    }
27816}
27817
27818sub matching_end_token {
27819
27820    # find closing character for a pattern
27821    my $beginning_token = shift;
27822
27823    if ( $beginning_token eq '{' ) {
27824        '}';
27825    }
27826    elsif ( $beginning_token eq '[' ) {
27827        ']';
27828    }
27829    elsif ( $beginning_token eq '<' ) {
27830        '>';
27831    }
27832    elsif ( $beginning_token eq '(' ) {
27833        ')';
27834    }
27835    else {
27836        $beginning_token;
27837    }
27838}
27839
27840sub dump_token_types {
27841    my $class = shift;
27842    my $fh    = shift;
27843
27844    # This should be the latest list of token types in use
27845    # adding NEW_TOKENS: add a comment here
27846    print $fh <<'END_OF_LIST';
27847
27848Here is a list of the token types currently used for lines of type 'CODE'.
27849For the following tokens, the "type" of a token is just the token itself.
27850
27851.. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
27852( ) <= >= == =~ !~ != ++ -- /= x=
27853... **= <<= >>= &&= ||= //= <=>
27854, + - / * | % ! x ~ = \ ? : . < > ^ &
27855
27856The following additional token types are defined:
27857
27858 type    meaning
27859    b    blank (white space)
27860    {    indent: opening structural curly brace or square bracket or paren
27861         (code block, anonymous hash reference, or anonymous array reference)
27862    }    outdent: right structural curly brace or square bracket or paren
27863    [    left non-structural square bracket (enclosing an array index)
27864    ]    right non-structural square bracket
27865    (    left non-structural paren (all but a list right of an =)
27866    )    right non-structural parena
27867    L    left non-structural curly brace (enclosing a key)
27868    R    right non-structural curly brace
27869    ;    terminal semicolon
27870    f    indicates a semicolon in a "for" statement
27871    h    here_doc operator <<
27872    #    a comment
27873    Q    indicates a quote or pattern
27874    q    indicates a qw quote block
27875    k    a perl keyword
27876    C    user-defined constant or constant function (with void prototype = ())
27877    U    user-defined function taking parameters
27878    G    user-defined function taking block parameter (like grep/map/eval)
27879    M    (unused, but reserved for subroutine definition name)
27880    P    (unused, but -html uses it to label pod text)
27881    t    type indicater such as %,$,@,*,&,sub
27882    w    bare word (perhaps a subroutine call)
27883    i    identifier of some type (with leading %, $, @, *, &, sub, -> )
27884    n    a number
27885    v    a v-string
27886    F    a file test operator (like -e)
27887    Y    File handle
27888    Z    identifier in indirect object slot: may be file handle, object
27889    J    LABEL:  code block label
27890    j    LABEL after next, last, redo, goto
27891    p    unary +
27892    m    unary -
27893    pp   pre-increment operator ++
27894    mm   pre-decrement operator --
27895    A    : used as attribute separator
27896
27897    Here are the '_line_type' codes used internally:
27898    SYSTEM         - system-specific code before hash-bang line
27899    CODE           - line of perl code (including comments)
27900    POD_START      - line starting pod, such as '=head'
27901    POD            - pod documentation text
27902    POD_END        - last line of pod section, '=cut'
27903    HERE           - text of here-document
27904    HERE_END       - last line of here-doc (target word)
27905    FORMAT         - format section
27906    FORMAT_END     - last line of format section, '.'
27907    DATA_START     - __DATA__ line
27908    DATA           - unidentified text following __DATA__
27909    END_START      - __END__ line
27910    END            - unidentified text following __END__
27911    ERROR          - we are in big trouble, probably not a perl script
27912END_OF_LIST
27913}
27914
27915BEGIN {
27916
27917    # These names are used in error messages
27918    @opening_brace_names = qw# '{' '[' '(' '?' #;
27919    @closing_brace_names = qw# '}' ']' ')' ':' #;
27920
27921    my @digraphs = qw(
27922      .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
27923      <= >= == =~ !~ != ++ -- /= x= ~~
27924    );
27925    @is_digraph{@digraphs} = (1) x scalar(@digraphs);
27926
27927    my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> !~~ );
27928    @is_trigraph{@trigraphs} = (1) x scalar(@trigraphs);
27929
27930    # make a hash of all valid token types for self-checking the tokenizer
27931    # (adding NEW_TOKENS : select a new character and add to this list)
27932    my @valid_token_types = qw#
27933      A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v
27934      { } ( ) [ ] ; + - / * | % ! x ~ = \ ? : . < > ^ &
27935      #;
27936    push( @valid_token_types, @digraphs );
27937    push( @valid_token_types, @trigraphs );
27938    push( @valid_token_types, '#' );
27939    push( @valid_token_types, ',' );
27940    @is_valid_token_type{@valid_token_types} = (1) x scalar(@valid_token_types);
27941
27942    # a list of file test letters, as in -e (Table 3-4 of 'camel 3')
27943    my @file_test_operators =
27944      qw( A B C M O R S T W X b c d e f g k l o p r s t u w x z);
27945    @is_file_test_operator{@file_test_operators} =
27946      (1) x scalar(@file_test_operators);
27947
27948    # these functions have prototypes of the form (&), so when they are
27949    # followed by a block, that block MAY BE followed by an operator.
27950    @_ = qw( do eval );
27951    @is_block_operator{@_} = (1) x scalar(@_);
27952
27953    # these functions allow an identifier in the indirect object slot
27954    @_ = qw( print printf sort exec system say);
27955    @is_indirect_object_taker{@_} = (1) x scalar(@_);
27956
27957    # These tokens may precede a code block
27958    # patched for SWITCH/CASE
27959    @_ =
27960      qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
27961      unless do while until eval for foreach map grep sort
27962      switch case given when);
27963    @is_code_block_token{@_} = (1) x scalar(@_);
27964
27965    # I'll build the list of keywords incrementally
27966    my @Keywords = ();
27967
27968    # keywords and tokens after which a value or pattern is expected,
27969    # but not an operator.  In other words, these should consume terms
27970    # to their right, or at least they are not expected to be followed
27971    # immediately by operators.
27972    my @value_requestor = qw(
27973      AUTOLOAD
27974      BEGIN
27975      CHECK
27976      DESTROY
27977      END
27978      EQ
27979      GE
27980      GT
27981      INIT
27982      LE
27983      LT
27984      NE
27985      UNITCHECK
27986      abs
27987      accept
27988      alarm
27989      and
27990      atan2
27991      bind
27992      binmode
27993      bless
27994      break
27995      caller
27996      chdir
27997      chmod
27998      chomp
27999      chop
28000      chown
28001      chr
28002      chroot
28003      close
28004      closedir
28005      cmp
28006      connect
28007      continue
28008      cos
28009      crypt
28010      dbmclose
28011      dbmopen
28012      defined
28013      delete
28014      die
28015      dump
28016      each
28017      else
28018      elsif
28019      eof
28020      eq
28021      exec
28022      exists
28023      exit
28024      exp
28025      fcntl
28026      fileno
28027      flock
28028      for
28029      foreach
28030      formline
28031      ge
28032      getc
28033      getgrgid
28034      getgrnam
28035      gethostbyaddr
28036      gethostbyname
28037      getnetbyaddr
28038      getnetbyname
28039      getpeername
28040      getpgrp
28041      getpriority
28042      getprotobyname
28043      getprotobynumber
28044      getpwnam
28045      getpwuid
28046      getservbyname
28047      getservbyport
28048      getsockname
28049      getsockopt
28050      glob
28051      gmtime
28052      goto
28053      grep
28054      gt
28055      hex
28056      if
28057      index
28058      int
28059      ioctl
28060      join
28061      keys
28062      kill
28063      last
28064      lc
28065      lcfirst
28066      le
28067      length
28068      link
28069      listen
28070      local
28071      localtime
28072      lock
28073      log
28074      lstat
28075      lt
28076      map
28077      mkdir
28078      msgctl
28079      msgget
28080      msgrcv
28081      msgsnd
28082      my
28083      ne
28084      next
28085      no
28086      not
28087      oct
28088      open
28089      opendir
28090      or
28091      ord
28092      our
28093      pack
28094      pipe
28095      pop
28096      pos
28097      print
28098      printf
28099      prototype
28100      push
28101      quotemeta
28102      rand
28103      read
28104      readdir
28105      readlink
28106      readline
28107      readpipe
28108      recv
28109      redo
28110      ref
28111      rename
28112      require
28113      reset
28114      return
28115      reverse
28116      rewinddir
28117      rindex
28118      rmdir
28119      scalar
28120      seek
28121      seekdir
28122      select
28123      semctl
28124      semget
28125      semop
28126      send
28127      sethostent
28128      setnetent
28129      setpgrp
28130      setpriority
28131      setprotoent
28132      setservent
28133      setsockopt
28134      shift
28135      shmctl
28136      shmget
28137      shmread
28138      shmwrite
28139      shutdown
28140      sin
28141      sleep
28142      socket
28143      socketpair
28144      sort
28145      splice
28146      split
28147      sprintf
28148      sqrt
28149      srand
28150      stat
28151      study
28152      substr
28153      symlink
28154      syscall
28155      sysopen
28156      sysread
28157      sysseek
28158      system
28159      syswrite
28160      tell
28161      telldir
28162      tie
28163      tied
28164      truncate
28165      uc
28166      ucfirst
28167      umask
28168      undef
28169      unless
28170      unlink
28171      unpack
28172      unshift
28173      untie
28174      until
28175      use
28176      utime
28177      values
28178      vec
28179      waitpid
28180      warn
28181      while
28182      write
28183      xor
28184
28185      switch
28186      case
28187      given
28188      when
28189      err
28190      say
28191    );
28192
28193    # patched above for SWITCH/CASE given/when err say
28194    # 'err' is a fairly safe addition.
28195    # TODO: 'default' still needed if appropriate
28196    # 'use feature' seen, but perltidy works ok without it.
28197    # Concerned that 'default' could break code.
28198    push( @Keywords, @value_requestor );
28199
28200    # These are treated the same but are not keywords:
28201    my @extra_vr = qw(
28202      constant
28203      vars
28204    );
28205    push( @value_requestor, @extra_vr );
28206
28207    @expecting_term_token{@value_requestor} = (1) x scalar(@value_requestor);
28208
28209    # this list contains keywords which do not look for arguments,
28210    # so that they might be followed by an operator, or at least
28211    # not a term.
28212    my @operator_requestor = qw(
28213      endgrent
28214      endhostent
28215      endnetent
28216      endprotoent
28217      endpwent
28218      endservent
28219      fork
28220      getgrent
28221      gethostent
28222      getlogin
28223      getnetent
28224      getppid
28225      getprotoent
28226      getpwent
28227      getservent
28228      setgrent
28229      setpwent
28230      time
28231      times
28232      wait
28233      wantarray
28234    );
28235
28236    push( @Keywords, @operator_requestor );
28237
28238    # These are treated the same but are not considered keywords:
28239    my @extra_or = qw(
28240      STDERR
28241      STDIN
28242      STDOUT
28243    );
28244
28245    push( @operator_requestor, @extra_or );
28246
28247    @expecting_operator_token{@operator_requestor} =
28248      (1) x scalar(@operator_requestor);
28249
28250    # these token TYPES expect trailing operator but not a term
28251    # note: ++ and -- are post-increment and decrement, 'C' = constant
28252    my @operator_requestor_types = qw( ++ -- C <> q );
28253    @expecting_operator_types{@operator_requestor_types} =
28254      (1) x scalar(@operator_requestor_types);
28255
28256    # these token TYPES consume values (terms)
28257    # note: pp and mm are pre-increment and decrement
28258    # f=semicolon in for,  F=file test operator
28259    my @value_requestor_type = qw#
28260      L { ( [ ~ !~ =~ ; . .. ... A : && ! || // = + - x
28261      **= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //=
28262      <= >= == != => \ > < % * / ? & | ** <=> ~~ !~~
28263      f F pp mm Y p m U J G j >> << ^ t
28264      #;
28265    push( @value_requestor_type, ',' )
28266      ;    # (perl doesn't like a ',' in a qw block)
28267    @expecting_term_types{@value_requestor_type} =
28268      (1) x scalar(@value_requestor_type);
28269
28270    # Note: the following valid token types are not assigned here to
28271    # hashes requesting to be followed by values or terms, but are
28272    # instead currently hard-coded into sub operator_expected:
28273    # ) -> :: Q R Z ] b h i k n v w } #
28274
28275    # For simple syntax checking, it is nice to have a list of operators which
28276    # will really be unhappy if not followed by a term.  This includes most
28277    # of the above...
28278    %really_want_term = %expecting_term_types;
28279
28280    # with these exceptions...
28281    delete $really_want_term{'U'}; # user sub, depends on prototype
28282    delete $really_want_term{'F'}; # file test works on $_ if no following term
28283    delete $really_want_term{'Y'}; # indirect object, too risky to check syntax;
28284                                   # let perl do it
28285
28286    @_ = qw(q qq qw qx qr s y tr m);
28287    @is_q_qq_qw_qx_qr_s_y_tr_m{@_} = (1) x scalar(@_);
28288
28289    # These keywords are handled specially in the tokenizer code:
28290    my @special_keywords = qw(
28291      do
28292      eval
28293      format
28294      m
28295      package
28296      q
28297      qq
28298      qr
28299      qw
28300      qx
28301      s
28302      sub
28303      tr
28304      y
28305    );
28306    push( @Keywords, @special_keywords );
28307
28308    # Keywords after which list formatting may be used
28309    # WARNING: do not include |map|grep|eval or perl may die on
28310    # syntax errors (map1.t).
28311    my @keyword_taking_list = qw(
28312      and
28313      chmod
28314      chomp
28315      chop
28316      chown
28317      dbmopen
28318      die
28319      elsif
28320      exec
28321      fcntl
28322      for
28323      foreach
28324      formline
28325      getsockopt
28326      if
28327      index
28328      ioctl
28329      join
28330      kill
28331      local
28332      msgctl
28333      msgrcv
28334      msgsnd
28335      my
28336      open
28337      or
28338      our
28339      pack
28340      print
28341      printf
28342      push
28343      read
28344      readpipe
28345      recv
28346      return
28347      reverse
28348      rindex
28349      seek
28350      select
28351      semctl
28352      semget
28353      send
28354      setpriority
28355      setsockopt
28356      shmctl
28357      shmget
28358      shmread
28359      shmwrite
28360      socket
28361      socketpair
28362      sort
28363      splice
28364      split
28365      sprintf
28366      substr
28367      syscall
28368      sysopen
28369      sysread
28370      sysseek
28371      system
28372      syswrite
28373      tie
28374      unless
28375      unlink
28376      unpack
28377      unshift
28378      until
28379      vec
28380      warn
28381      while
28382    );
28383    @is_keyword_taking_list{@keyword_taking_list} =
28384      (1) x scalar(@keyword_taking_list);
28385
28386    # These are not used in any way yet
28387    #    my @unused_keywords = qw(
28388    #      CORE
28389    #     __FILE__
28390    #     __LINE__
28391    #     __PACKAGE__
28392    #     );
28393
28394    #  The list of keywords was extracted from function 'keyword' in
28395    #  perl file toke.c version 5.005.03, using this utility, plus a
28396    #  little editing: (file getkwd.pl):
28397    #  while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } }
28398    #  Add 'get' prefix where necessary, then split into the above lists.
28399    #  This list should be updated as necessary.
28400    #  The list should not contain these special variables:
28401    #  ARGV DATA ENV SIG STDERR STDIN STDOUT
28402    #  __DATA__ __END__
28403
28404    @is_keyword{@Keywords} = (1) x scalar(@Keywords);
28405}
284061;
28407__END__
28408
28409#line 28750
28410