1#!/usr/bin/env perl
2############################################################
3#  Local Nuasis Revision $Id: perltidy,v 1.1 2003/06/05 01:53:21 hbo Exp $
4#
5#    perltidy - a perl script indenter and formatter
6#
7#    Copyright (c) 2000, 2001 by Steven L. Hancock
8#    Distributed under the GPL license agreement; see file COPYING
9#
10#    This program is free software; you can redistribute it and/or modify
11#    it under the terms of the GNU General Public License as published by
12#    the Free Software Foundation; either version 2 of the License, or
13#    (at your option) any later version.
14#
15#    This program is distributed in the hope that it will be useful,
16#    but WITHOUT ANY WARRANTY; without even the implied warranty of
17#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18#    GNU General Public License for more details.
19#
20#    You should have received a copy of the GNU General Public License
21#    along with this program; if not, write to the Free Software
22#    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
23#
24#    For brief instructions instructions, try 'perltidy -h'.
25#    For more complete documentation, try 'man perltidy'.
26#
27#    This script is an example of the default style.  It was formatted with:
28#
29#      perltidy perltidy
30#
31#    Code Contributions:
32#      Michael Cartmell supplied code for adaptation to VMS and helped with
33#        v-strings.
34#      Hugh S. Myers supplied sub streamhandle and the supporting code to
35#        create a PerlTidy module which can operate on strings, arrays, etc.
36#      Many others have supplied key ideas, suggestions, and bug reports;
37#        see the ChangeLog file.
38#
39############################################################
40
41package PerlTidy;
42use 5.004;    # need IO::File from 5.004 or later
43BEGIN { $^W = 1; }    # turn on warnings
44use strict;
45use Exporter;
46use Carp;
47
48use vars qw{
49  $VERSION
50  @ISA
51  @EXPORT
52  $missing_io_scalar
53  $missing_io_scalararray
54};
55
56@EXPORT = qw( &perltidy );
57
58eval "use diagnostics";
59{ eval "use IO::Scalar";      $missing_io_scalar      = $@; }
60{ eval "use IO::ScalarArray"; $missing_io_scalararray = $@; }
61use IO::File;
62
63BEGIN {
64    ($VERSION=q($Id: perltidy,v 1.1 2003/06/05 01:53:21 hbo Exp $)) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker
65}
66
67# Preloaded methods go here.
68sub streamhandle {
69    my $ref = ref( my $filename = shift );
70    my $mode = shift;
71    my $New;
72    my $fh;
73
74    if ( $ref eq 'ARRAY' ) {
75        die $missing_io_scalararray if $missing_io_scalararray;
76        $New = sub { IO::ScalarArray->new(@_) };
77    }
78    elsif ( $ref eq 'SCALAR' ) {
79        die $missing_io_scalar if $missing_io_scalar;
80        $New = sub { IO::Scalar->new(@_) };
81    }
82    elsif ( $filename eq '-' ) {
83        $New = sub { $mode eq 'w' ? *STDOUT : *STDIN }
84    }
85    else {
86        $New = sub { IO::File->new(@_) };
87    }
88    $fh = $New->( $filename, $mode )
89      or warn "Couldn't open file:$filename in mode:$mode : $!\n";
90    return $fh, ( $ref or $filename );
91}
92
93=pod
94
95POD NOTE: Documentation is contained in separately supplied .pod files;
96pod is used here only for long comments.
97
98Here is a map of the flow of data from the input source to the output
99line sink:
100
101LineSource-->Tokenizer-->Formatter-->VerticalAligner-->FileWriter-->
102      input                         groups                 output
103      lines   tokens      lines       of     lines          lines
104                                     lines
105
106The names correspond to the package names responsible for the unit processes.
107
108The overall process is controlled by the "main" package.
109
110LineSource is the stream of input lines
111
112Tokenizer analyzes a line and breaks it into tokens, peeking ahead
113if necessary.  A token is any section of the input line which should be
114manipulated as a single entity during formatting.  For example, a single
115',' character is a token, and so is an entire side comment.  It handles
116the complexities of Perl syntax, such as distinguishing between '<<' as
117a shift operator and as a here-document, or distinguishing between '/'
118as a divide symbol and as a pattern delimiter.
119
120Formatter inserts and deletes whitespace between tokens, and breaks
121sequences of tokens at appropriate points as output lines.  It bases its
122decisions on the default rules as modified by any command-line options.
123
124VerticalAligner collects groups of lines together and tries to line up
125certain tokens, such as '=>', '#', and '=' by adding whitespace.
126
127FileWriter simply writes lines to the output stream.
128
129The Logger package, not shown, records significant events and warning
130messages.  It writes a .LOG file, which may be saved with a
131'-log' or a '-g' flag.
132
133Some comments in this file refer to separate test files, most of which
134are in the test directory which can be downloaded in addition to the
135basic perltidy distribution.
136
137=cut
138
139sub perltidy {
140
141    my %defaults = (
142        source      => undef,
143        destination => undef,
144
145        # .. more to be added
146    );
147
148    my %input_hash = @_;
149    if ( my @bad_keys = grep { !exists $defaults{$_} } keys %input_hash ) {
150        local $" = ')(';
151        confess
152          "unknown parameters in call to PerlTidy::perltidy: (@bad_keys)\n";
153    }
154
155    %input_hash = ( %defaults, %input_hash );
156    my $source_array      = $input_hash{'source'};
157    my $destination_array = $input_hash{'destination'};
158
159    # VMS file names are restricted to a 40.40 format, so
160    # we append _tdy instead of .tdy, etc.
161    my $dot;
162    my $dot_pattern;
163    if ( $^O eq 'VMS' ) {
164        $dot         = '_';
165        $dot_pattern = '_';
166    }
167    else {
168        $dot         = '.';
169        $dot_pattern = '\.';    # must escape for use in regex
170    }
171
172    # handle command line options
173    my ( $rOpts, $config_file, $rraw_options, $pending_complaint, $saw_extrude )
174      = process_command_line();
175    PerlTidy::Formatter::check_options($rOpts);
176    if ( $rOpts->{'html'} ) {
177        PerlTidy::HtmlWriter->check_options($rOpts);
178    }
179
180    # create a diagnostics object if requested
181    my $diagnostics_object = undef;
182    if ( $rOpts->{'DIAGNOSTICS'} ) {
183        $diagnostics_object = PerlTidy::Diagnostics->new();
184    }
185
186    # no filenames should be given if input is from an array
187    if ($source_array) {
188        if ( @ARGV > 0 ) {
189            die
190"You may not specify any filenames when a source array is given\n";
191        }
192
193        # we'll stuff the source array into ARGV
194        unshift ( @ARGV, $source_array );
195    }
196
197    # use stdin by default if no source array and no args
198    else {
199        unshift ( @ARGV, '-' ) unless @ARGV;
200    }
201
202    # loop to process all files in argument list
203    my $number_of_files = @ARGV;
204    my $input_file;
205    my $formatter = undef;
206    my $tokenizer = undef;
207
208    # Set a flag here for any system which does not have a shell to
209    # expand wildcard filenames like '*.pl'.  In theory it should also
210    # be ok to set the flag for any system, but I prefer not to do so
211    # out of robustness concerns.
212    my $use_glob = ( $^O =~ /^(MSWin32|msdos|dos|win32)$/ );
213
214    while ( $input_file = shift @ARGV ) {
215        my $fileroot;
216
217        #---------------------------------------------------------------
218        # determine the input file name
219        #---------------------------------------------------------------
220        if ( $input_file eq '-' ) {    # '-' indicates input from STDIN
221            $fileroot = "perltidy";    # root name to use for .ERR, .LOG, etc
222        }
223        elsif ($source_array) {
224            $fileroot = "perltidy";
225        }
226        else {
227            $fileroot = $input_file;
228            unless ( -e $input_file ) {
229
230                # file doesn't exist, maybe we have a wildcard
231                if ($use_glob) {
232
233                    # be sure files exist, because glob('p.q') always
234                    # returns 'p.q' even if 'p.q' doesn't exist.
235                    my @files = grep { -e $_ } glob($input_file);
236                    if (@files) {
237                        unshift @ARGV, @files;
238                        next;
239                    }
240                }
241
242                print "skipping file: $input_file: does not exist\n";
243                next;
244            }
245
246            unless ( -f $input_file ) {
247                print "skipping: $input_file: not a regular file\n";
248                next;
249            }
250
251            unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) {
252                print
253                  "skipping file: $input_file: Non-text (override with -f)\n";
254                next;
255            }
256        }
257
258        # Skip files with same extension as the output files
259        # because this can lead to a messy situation
260        # with files like script.tdy.tdy.tdy ... when you rerun
261        # perltidy over and over with wildcard input
262        my $output_extension = $rOpts->{'html'} ? "html" : "tdy";
263        if ( defined( $rOpts->{'output-file-extension'} ) ) {
264            $output_extension = $rOpts->{'output-file-extension'};
265        }
266
267        if (
268            !$source_array
269            && ( $input_file =~
270                /($dot_pattern)($output_extension|LOG|DEBUG|ERR|TEE|TMPI|TMPO)$/
271            )
272            || ( $input_file eq 'DIAGNOSTICS' )
273          )
274        {
275            print "skipping file: $input_file: wrong extension\n";
276            next;
277        }
278
279        # the 'source_object' supplies a method to read the input file
280        my $source_object = PerlTidy::LineSource->new( $input_file, $rOpts );
281        next unless ($source_object);
282
283        # register this file name with the Diagnostics package
284        $diagnostics_object->set_input_file($input_file) if $diagnostics_object;
285
286        #---------------------------------------------------------------
287        # determine the output file name
288        #---------------------------------------------------------------
289        my $output_file = undef;
290
291        if ( $rOpts->{'outfile'} ) {
292
293            if ( $number_of_files <= 1 ) {
294
295                if ( $rOpts->{'standard-output'} ) {
296                    die "You may not use -o and -st together\n";
297                }
298                elsif ($destination_array) {
299                    die
300"You may not specify a destination array and -o together\n";
301                }
302                $output_file = $rOpts->{outfile};
303
304                # make sure user gives a file name after -o
305                if ( $output_file =~ /^-/ ) {
306                    die "You must specify a valid filename after -o\n";
307                }
308            }
309            else {
310                die "You may not use -o with more than one input file\n";
311            }
312        }
313        elsif ( $rOpts->{'standard-output'} ) {
314            if ($destination_array) {
315                die
316                  "You may not specify a destination array and -st together\n";
317            }
318            $output_file = '-';
319
320            if ( $number_of_files <= 1 ) {
321            }
322            else {
323                die "You may not use -st with more than one input file\n";
324            }
325        }
326        elsif ($destination_array) {
327            $output_file = $destination_array;
328        }
329        elsif ($source_array) {    # source but no destination goes to stdout
330            $output_file = '-';
331        }
332        elsif ( $input_file eq '-' ) {
333            $output_file = $input_file;
334        }
335        else {
336            $output_file = $fileroot . $dot . $output_extension;
337        }
338
339        # the 'sink_object' knows how to write the output file
340        my $tee_file    = $fileroot . $dot . "TEE";
341        my $sink_object =
342          PerlTidy::LineSink->new( $output_file, $tee_file, $rOpts );
343
344        #---------------------------------------------------------------
345        # initialize the error logger
346        #---------------------------------------------------------------
347        my $warning_file = $fileroot . $dot . "ERR";
348        my $log_file     = $fileroot . $dot . "LOG";
349
350        my $logger_object =
351          PerlTidy::Logger->new( $rOpts, $log_file, $warning_file,
352            $saw_extrude );
353        write_logfile_header( $rOpts, $logger_object, $config_file,
354            $rraw_options );
355        if ($pending_complaint) {
356            $logger_object->complain($pending_complaint);
357        }
358
359        #---------------------------------------------------------------
360        # initialize the debug object, if any
361        #---------------------------------------------------------------
362        my $debugger_object = undef;
363        if ( $rOpts->{DEBUG} ) {
364            $debugger_object =
365              PerlTidy::Debugger->new( $fileroot . $dot . "DEBUG" );
366        }
367
368        # we have to delete any old formatter because, for safety,
369        # formatter will check to see that there is only one.
370        $formatter = undef;
371
372        #---------------------------------------------------------------
373        # create a formatter for this file : html writer or pretty printer
374        #---------------------------------------------------------------
375        if ( $rOpts->{'html'} ) {
376            $formatter = PerlTidy::HtmlWriter->new( $fileroot, $output_file );
377        }
378
379        else {
380            $formatter = PerlTidy::Formatter->new(
381                logger_object      => $logger_object,
382                diagnostics_object => $diagnostics_object,
383                sink_object        => $sink_object,
384            );
385        }
386
387        #---------------------------------------------------------------
388        # create the tokenizer for this file
389        #---------------------------------------------------------------
390        $tokenizer = undef;
391        $tokenizer = PerlTidy::Tokenizer->new(
392            source_object       => $source_object,
393            logger_object       => $logger_object,
394            debugger_object     => $debugger_object,
395            diagnostics_object  => $diagnostics_object,
396            starting_level      => $rOpts->{'starting-indentation-level'},
397            tabs                => $rOpts->{'tabs'},
398            indent_columns      => $rOpts->{'indent-columns'},
399            look_for_hash_bang  => $rOpts->{'look-for-hash-bang'},
400            look_for_autoloader => $rOpts->{'look-for-autoloader'},
401            look_for_selfloader => $rOpts->{'look-for-selfloader'},
402            trim_qw             => $rOpts->{'trim-qw'},
403        );
404
405        #---------------------------------------------------------------
406        # now we can do it
407        #---------------------------------------------------------------
408        process_this_file( $tokenizer, $formatter );
409
410        #---------------------------------------------------------------
411        # clean up and report errors
412        #---------------------------------------------------------------
413        $source_object->close_input_file();
414        $sink_object->close_output_file()    if $sink_object;
415        $debugger_object->close_debug_file() if $debugger_object;
416
417        my $infile_syntax_ok = 0;    # -1 no  0=don't know   1 yes
418        if ($output_file) {
419            chmod 0755, $output_file;
420            if ( $logger_object && $rOpts->{'check-syntax'} ) {
421                my $ifname = $source_object->get_input_file_copy_name();
422                my $ofname = $sink_object->get_output_file_copy();
423                $infile_syntax_ok =
424                  check_syntax( $ifname, $ofname, $logger_object, $rOpts );
425            }
426        }
427        $source_object->unlink_copy();
428        $sink_object->unlink_copy();
429        $logger_object->finish( $infile_syntax_ok, $formatter )
430          if $logger_object;
431
432    }    # end of loop to process all files
433
434}    # end of main program
435
436sub write_logfile_header {
437    my ( $rOpts, $logger_object, $config_file, $rraw_options ) = @_;
438    $logger_object->write_logfile_entry(
439"perltidy version $VERSION log file on a $^O system, OLD_PERL_VERSION=$]\n"
440    );
441    my $options_string = join ( ' ', @$rraw_options );
442
443    if ($config_file) {
444        $logger_object->write_logfile_entry(
445            "Found Configuration File >>> $config_file \n");
446    }
447    $logger_object->write_logfile_entry(
448        "Configuration and command line parameters for this run:\n");
449    $logger_object->write_logfile_entry("$options_string\n");
450
451    if ( $rOpts->{'DEBUG'} || $rOpts->{'show-options'} ) {
452        $rOpts->{'logfile'} = 1;    # force logfile to be saved
453        $logger_object->write_logfile_entry(
454            "Final parameter set for this run\n");
455        $logger_object->write_logfile_entry(
456            "------------------------------------\n");
457
458        foreach my $i ( keys %{$rOpts} ) {
459            $logger_object->write_logfile_entry( '--' . "$i=$rOpts->{$i}\n" );
460        }
461        $logger_object->write_logfile_entry(
462            "------------------------------------\n");
463    }
464    $logger_object->write_logfile_entry(
465        "To find error messages search for 'WARNING' with your editor\n");
466}
467
468sub process_command_line {
469    use Getopt::Long;
470
471    ######################################################################
472    # Note: a few options are not documented in the man page and usage
473    # message. This is because these are experimental or debug options and
474    # may or may not be retained in future versions.
475    #
476    # Here are the undocumented flags as far as I know.  Any of them
477    # may disappear at any time.  They are mainly for fine-tuning
478    # and debugging.
479    #
480    # xsc --> maximum-space-to-comment    # for spacing side comments
481    # fll --> fuzzy-line-length           # trivial parameter
482    # iob --> ignore-old-line-breaks      # do not follow breaks in old script
483    # tdy --> tidy-output                 # This is an internal flag
484    # chk --> check-multiline-quotes      # check for old bug; to be deleted
485    # mci --> maximum-continuation-indentation  # need for -lp
486    # scl --> short-concatenation-item-length   # helps break at '.'
487    # bob --> break-after-opening-brace   # this is the default
488    # bsj --> big-space-jump              # used by vertical aligner
489    # recombine                           # for debugging line breaks
490    # I   --> DIAGNOSTICS                 # for debugging
491    #         maximum-whitespace-columns
492    ######################################################################
493
494    # here is a summary of the Getopt codes:
495    # <none> does not take an argument
496    # =s takes a mandatory string
497    # :s takes an optional string  (DO NOT USE - filenames will get eaten up)
498    # =i takes a mandatory integer
499    # :i takes an optional integer (NOT RECOMMENDED - can cause trouble)
500    # ! does not take an argument and may be negated
501    #  i.e., -foo and -nofoo are allowed
502    # a double dash signals the end of the options list
503    #
504    #---------------------------------------------------------------
505    # Define the option string passed to GetOptions.
506    #---------------------------------------------------------------
507
508    my @option_string = ();
509    my %expansion     = ();
510    my $rexpansion    = \%expansion;
511
512    #  These options are parsed directly by perltidy:
513    #    help h
514    #    version v
515    #  However, they are included in the option set so that they will
516    #  be seen in the options dump.
517
518    # These long option names have no abbreviations or are treated specially
519    @option_string = qw(
520      html!
521      maximum-whitespace-columns=i
522      noprofile
523      npro
524      recombine!
525    );
526
527    # routine to install and check options
528    my $add_option = sub {
529        my ( $long_name, $short_name, $flag ) = @_;
530        push @option_string, $long_name . $flag;
531        if ($short_name) {
532            if ( $expansion{$short_name} ) {
533                my $existing_name = $expansion{$short_name}[0];
534                die
535"redefining abbreviation $short_name for $long_name; already used for $existing_name\n";
536            }
537            $expansion{$short_name} = [$long_name];
538            if ( $flag eq '!' ) {
539                my $nshort_name = 'n' . $short_name;
540                my $nolong_name = 'no' . $long_name;
541                if ( $expansion{$nshort_name} ) {
542                    my $existing_name = $expansion{$nshort_name}[0];
543                    die
544"attempting to redefine abbreviation $nshort_name for $nolong_name; already used for $existing_name\n";
545                }
546                $expansion{$nshort_name} = [$nolong_name];
547            }
548        }
549    };
550
551    # Install long option names which have a simple abbreviation.
552    # Options with code '!' get standard negation ('no' for long names,
553    # 'n' for abbreviations)
554    $add_option->( 'DEBUG',                             'D',    '!' );
555    $add_option->( 'DIAGNOSTICS',                       'I',    '!' );
556    $add_option->( 'add-newlines',                      'anl',  '!' );
557    $add_option->( 'add-semicolons',                    'asc',  '!' );
558    $add_option->( 'add-whitespace',                    'aws',  '!' );
559    $add_option->( 'big-space-jump',                    'bsj',  '=i' );
560    $add_option->( 'blanks-before-blocks',              'bbb',  '!' );
561    $add_option->( 'blanks-before-comments',            'bbc',  '!' );
562    $add_option->( 'blanks-before-subs',                'bbs',  '!' );
563    $add_option->( 'block-brace-tightness',             'bbt',  '=i' );
564    $add_option->( 'brace-left-and-indent',             'bli',  '!' );
565    $add_option->( 'brace-tightness',                   'bt',   '=i' );
566    $add_option->( 'break-after-comma-arrows',          'baa',  '!' );
567    $add_option->( 'break-after-opening-brace',         'bob',  '!' );
568    $add_option->( 'check-multiline-quotes',            'chk',  '!' );
569    $add_option->( 'check-syntax',                      'syn',  '!' );
570    $add_option->( 'continuation-indentation',          'ci',   '=i' );
571    $add_option->( 'closing-side-comments',             'csc',  '!' );
572    $add_option->( 'closing-side-comment-prefix',       'cscp', '=s' );
573    $add_option->( 'closing-side-comment-list',         'cscl', '=s' );
574    $add_option->( 'closing-side-comment-interval',     'csci', '=i' );
575    $add_option->( 'closing-side-comment-maximum-text', 'csct', '=i' );
576    $add_option->( 'closing-side-comment-warnings',     'cscw', '!' );
577    $add_option->( 'cuddled-else',                      'ce',   '!' );
578    $add_option->( 'delete-block-comments',             'dbc',  '!' );
579    $add_option->( 'delete-closing-side-comments',      'dcsc', '!' );
580    $add_option->( 'delete-old-newlines',               'dnl',  '!' );
581    $add_option->( 'delete-old-whitespace',             'dws',  '!' );
582    $add_option->( 'delete-pod',                        'dp',   '!' );
583    $add_option->( 'delete-semicolons',                 'dsm',  '!' );
584    $add_option->( 'delete-side-comments',              'dsc',  '!' );
585    $add_option->( 'dump-defaults',                     'ddf',  '!' );
586    $add_option->( 'dump-long-names',                   'dln',  '!' );
587    $add_option->( 'dump-options',                      'dop',  '!' );
588    $add_option->( 'dump-short-names',                  'dsn',  '!' );
589    $add_option->( 'dump-token-types',                  'dtt',  '!' );
590    $add_option->( 'dump-want-left-space',              'dwls', '!' );
591    $add_option->( 'dump-want-right-space',             'dwrs', '!' );
592    $add_option->( 'force-read-binary',                 'f',    '!' );
593    $add_option->( 'fuzzy-line-length',                 'fll',  '!' );
594    $add_option->( 'hanging-side-comments',             'hsc',  '!' );
595    $add_option->( 'help',                              'h',    '' );
596    $add_option->( 'ignore-old-line-breaks',            'iob',  '!' );
597    $add_option->( 'indent-block-comments',             'ibc',  '!' );
598    $add_option->( 'indent-closing-brace',              'icb',  '!' );
599    $add_option->( 'indent-closing-paren',              'icp',  '!' );
600    $add_option->( 'indent-columns',                    'i',    '=i' );
601    $add_option->( 'line-up-parentheses',               'lp',   '!' );
602    $add_option->( 'logfile',                           'log',  '!' );
603    $add_option->( 'logfile-gap',                       'g',    ':i' );
604    $add_option->( 'long-block-line-count',             'lbl',  '=i' );
605    $add_option->( 'look-for-autoloader',               'lal',  '!' );
606    $add_option->( 'look-for-hash-bang',                'x',    '!' );
607    $add_option->( 'look-for-selfloader',               'lsl',  '!' );
608    $add_option->( 'maximum-consecutive-blank-lines',   'mbl',  '=i' );
609    $add_option->( 'maximum-continuation-indentation',  'mci',  '=i' );
610    $add_option->( 'maximum-fields-per-table',          'mft',  '=i' );
611    $add_option->( 'maximum-line-length',               'l',    '=i' );
612    $add_option->( 'maximum-space-to-comment',          'xsc',  '=i' );
613    $add_option->( 'minimum-space-to-comment',          'msc',  '=i' );
614    $add_option->( 'nowant-left-space',                 'nwls', '=s' );
615    $add_option->( 'nowant-right-space',                'nwrs', '=s' );
616    $add_option->( 'opening-brace-always-on-right',     'bar',  '' );
617    $add_option->( 'opening-brace-on-new-line',         'bl',   '!' );
618    $add_option->( 'opening-sub-brace-on-new-line',     'sbl',  '!' );
619    $add_option->( 'outdent-labels',                    'ola',  '!' );
620    $add_option->( 'outdent-keywords',                  'okw',  '!' );
621    $add_option->( 'outdent-keyword-list',              'okwl', '=s' );
622    $add_option->( 'outdent-long-quotes',               'olq',  '!' );
623    $add_option->( 'outdent-long-comments',             'olc',  '!' );
624    $add_option->( 'outfile',                           'o',    '=s' );
625    $add_option->( 'output-file-extension',             'oext', '=s' );
626    $add_option->( 'paren-tightness',                   'pt',   '=i' );
627    $add_option->( 'pass-version-line',                 'pvl',  '!' );
628    $add_option->( 'profile',                           'pro',  '=s' );
629    $add_option->( 'quiet',                             'q',    '!' );
630    $add_option->( 'short-concatenation-item-length',   'scl',  '=i' );
631    $add_option->( 'show-options',                      'opt',  '!' );
632    $add_option->( 'space-for-semicolon',               'sfs',  '!' );
633    $add_option->( 'space-terminal-semicolon',          'sts',  '!' );
634    $add_option->( 'static-side-comments',              'ssc',  '!' );
635    $add_option->( 'static-side-comment-prefix',        'sscp', '=s' );
636    $add_option->( 'square-bracket-tightness',          'sbt',  '=i' );
637    $add_option->( 'standard-error-output',             'se',   '!' );
638    $add_option->( 'standard-output',                   'st',   '!' );
639    $add_option->( 'starting-indentation-level',        'sil',  '=i' );
640    $add_option->( 'static-block-comments',             'sbc',  '!' );
641    $add_option->( 'static-block-comment-prefix',       'sbcp', '=s' );
642    $add_option->( 'swallow-optional-blank-lines',      'sob',  '!' );
643    $add_option->( 'tabs',                              't',    '!' );
644    $add_option->( 'tee-block-comments',                'tbc',  '!' );
645    $add_option->( 'tee-pod',                           'tp',   '!' );
646    $add_option->( 'tee-side-comments',                 'tsc',  '!' );
647    $add_option->( 'tidy-output',                       'tdy',  '!' );
648    $add_option->( 'trim-qw',                           'tqw',  '!' );
649    $add_option->( 'version',                           'v',    '' );
650    $add_option->( 'want-break-after',                  'wba',  '=s' );
651    $add_option->( 'want-break-before',                 'wbb',  '=s' );
652    $add_option->( 'want-left-space',                   'wls',  '=s' );
653    $add_option->( 'want-right-space',                  'wrs',  '=s' );
654    $add_option->( 'warning-output',                    'w',    '!' );
655
656    # The PerlTidy::HtmlWriter will add its own options to the string
657    PerlTidy::HtmlWriter->make_getopt_long_names( \@option_string );
658
659    #---------------------------------------------------------------
660    # Assign default values to the above options here, except
661    # for 'outfile' and 'help'.
662    # These settings should approximate the perlstyle(1) suggestions.
663    #---------------------------------------------------------------
664    my @defaults = qw(
665      add-newlines
666      add-semicolons
667      add-whitespace
668      big-space-jump=24
669      blanks-before-blocks
670      blanks-before-comments
671      blanks-before-subs
672      block-brace-tightness=0
673      brace-tightness=1
674      break-after-opening-brace
675      check-syntax
676      closing-side-comment-interval=6
677      closing-side-comment-maximum-text=20
678      continuation-indentation=2
679      delete-old-newlines
680      delete-semicolons
681      fuzzy-line-length
682      hanging-side-comments
683      indent-block-comments
684      indent-columns=4
685      long-block-line-count=8
686      look-for-autoloader
687      look-for-selfloader
688      maximum-consecutive-blank-lines=1
689      maximum-continuation-indentation=40
690      maximum-fields-per-table=40
691      maximum-line-length=80
692      maximum-space-to-comment=32
693      maximum-whitespace-columns=32
694      minimum-space-to-comment=4
695      nobrace-left-and-indent
696      nobreak-after-comma-arrows
697      nocuddled-else
698      nodelete-old-whitespace
699      nohtml
700      noignore-old-line-breaks
701      noindent-closing-brace
702      noindent-closing-paren
703      nologfile
704      noquiet
705      noshow-options
706      nostatic-side-comments
707      noswallow-optional-blank-lines
708      notabs
709      nowarning-output
710      outdent-long-quotes
711      paren-tightness=1
712      pass-version-line
713      recombine
714      short-concatenation-item-length=8
715      space-for-semicolon
716      square-bracket-tightness=1
717      static-block-comments
718      tidy-output
719      trim-qw
720    );
721
722    #---------------------------------------------------------------
723    # set the defaults by passing the above list through GetOptions
724    #---------------------------------------------------------------
725    my %Opts = ();
726    {
727        local @ARGV;
728        my $i;
729
730        for $i (@defaults) { push @ARGV, "--" . $i }
731
732        if ( !GetOptions( \%Opts, @option_string ) ) {
733            die "Programming Bug: error in setting default options";
734        }
735    }
736
737    #---------------------------------------------------------------
738    # Define abbreviations which will be expanded into the above primitives.
739    # These may be defined recursively.
740    #---------------------------------------------------------------
741    %expansion = (
742        %expansion,
743        'freeze-newlines'    => [qw(noadd-newlines nodelete-old-newlines)],
744        'fnl'                => [qw(freeze-newlines)],
745        'freeze-whitespace'  => [qw(noadd-whitespace nodelete-old-whitespace)],
746        'fws'                => [qw(freeze-whitespace)],
747        'indent-only'        => [qw(freeze-newlines freeze-whitespace)],
748        'outdent-long-lines' => [qw(outdent-long-quotes outdent-long-comments)],
749        'nooutdent-long-lines' =>
750          [qw(nooutdent-long-quotes nooutdent-long-comments)],
751        'noll'                => [qw(nooutdent-long-lines)],
752        'io'                  => [qw(indent-only)],
753        'delete-all-comments' =>
754          [qw(delete-block-comments delete-side-comments delete-pod)],
755        'nodelete-all-comments' =>
756          [qw(nodelete-block-comments nodelete-side-comments nodelete-pod)],
757        'dac'              => [qw(delete-all-comments)],
758        'ndac'             => [qw(nodelete-all-comments)],
759        'gnu'              => [qw(gnu-style)],
760        'tee-all-comments' =>
761          [qw(tee-block-comments tee-side-comments tee-pod)],
762        'notee-all-comments' =>
763          [qw(notee-block-comments notee-side-comments notee-pod)],
764        'tac'   => [qw(tee-all-comments)],
765        'ntac'  => [qw(notee-all-comments)],
766        'nhtml' => [qw(nohtml)],
767
768        # 'mangle' originally deleted pod and comments, but to keep it
769        # reversible, it no longer does.  But if you really want to
770        # delete them, just use:
771        #   -mangle -dac
772
773        # An interesting use for 'mangle' is to do this:
774        #    perltidy -mangle myfile.pl -st | perltidy -o myfile.pl.new
775        # which will form as many one-line blocks as possible
776
777        'mangle' => [
778            qw(
779              check-syntax
780              delete-old-newlines
781              delete-old-whitespace
782              delete-semicolons
783              indent-columns=0
784              maximum-consecutive-blank-lines=0
785              maximum-line-length=100000
786              noadd-newlines
787              noadd-semicolons
788              noadd-whitespace
789              noblanks-before-blocks
790              noblanks-before-subs
791              notabs
792              )
793        ],
794
795        # 'extrude' originally deleted pod and comments, but to keep it
796        # reversible, it no longer does.  But if you really want to
797        # delete them, just use
798        #   extrude -dac
799        #
800        # An interesting use for 'extrude' is to do this:
801        #    perltidy -extrude myfile.pl -st | perltidy -o myfile.pl.new
802        # which will break up all one-line blocks.
803
804        'extrude' => [
805            qw(
806              check-syntax
807              ci=0
808              delete-old-newlines
809              delete-old-whitespace
810              delete-semicolons
811              indent-columns=0
812              maximum-consecutive-blank-lines=0
813              maximum-line-length=1
814              noadd-semicolons
815              noadd-whitespace
816              noblanks-before-blocks
817              noblanks-before-subs
818              nofuzzy-line-length
819              notabs
820              )
821        ],
822
823        # this style tries to follow the GNU Coding Standards (which do
824        # not really apply to perl but which are followed by some perl
825        # programmers).
826        'gnu-style' => [
827            qw(
828              lp bl noll pt=2 bt=2 sbt=2 icp
829              )
830        ],
831
832        # Additional styles can be added here
833    );
834
835    PerlTidy::HtmlWriter->make_abbreviated_names( \%expansion );
836
837    # Uncomment next line to dump all expansions for debugging:
838    # dump_short_names(\%expansion);
839
840    my $word;
841    my @raw_options        = ();
842    my $config_file        = "";
843    my $saw_ignore_profile = 0;
844    my $saw_extrude        = 0;
845    my $i;
846
847    #---------------------------------------------------------------
848    # Take a first look at the command-line parameters.  Do as many
849    # immediate dumps as possible, which can avoid confusion if the
850    # perltidyrc file has an error.
851    #---------------------------------------------------------------
852    foreach $i (@ARGV) {
853
854        if ( $i =~ /-(npro|noprofile)$/ ) {
855            $saw_ignore_profile = 1;
856        }
857        elsif ( $i =~ /-(pro|profile)=(.+)/ ) {
858            if ($config_file) {
859                print STDERR
860"Only one -pro=filename allowed, using '$2' instead of '$config_file'\n";
861            }
862            $config_file = $2;
863        }
864        elsif ( $i =~ /-(pro|profile)=?$/ ) {
865            print STDERR
866              "usage: -pro=filename or --profile=filename, no spaces\n";
867            exit 1;
868        }
869        elsif ( $i =~ /-extrude$/ ) {
870            $saw_extrude = 1;
871        }
872        elsif ( $i =~ /-(help|h)$/ ) {
873            usage();
874            exit 1;
875        }
876        elsif ( $i =~ /-(version|v)$/ ) {
877            show_version();
878            exit 1;
879        }
880        elsif ( $i =~ /-(dump-defaults|ddf)$/ ) {
881            dump_defaults(@defaults);
882            exit 1;
883        }
884        elsif ( $i =~ /-(dump-long-names|dln)$/ ) {
885            dump_long_names(@option_string);
886            exit 1;
887        }
888        elsif ( $i =~ /-(dump-short-names|dsn)$/ ) {
889            dump_short_names( \%expansion );
890            exit 1;
891        }
892        elsif ( $i =~ /-(dump-token-types|dtt)$/ ) {
893            PerlTidy::Tokenizer->dump_token_types(*STDOUT);
894            exit 1;
895        }
896    }
897
898    #---------------------------------------------------------------
899    # read any .perltidyrc configuration file
900    #---------------------------------------------------------------
901    unless ($saw_ignore_profile) {
902
903        $config_file = find_config_file() unless $config_file;
904        my $rconfig_list;
905        ( $config_file, $rconfig_list ) =
906          read_config_file( $config_file, \%expansion );
907
908        # process any .perltidyrc parameters right now so we can localize errors
909        if (@$rconfig_list) {
910            local @ARGV = @$rconfig_list;
911
912            expand_command_abbreviations( \%expansion, \@raw_options,
913                $config_file );
914
915            if ( !GetOptions( \%Opts, @option_string ) ) {
916                die
917"Error in this config file: $config_file  \nUse -npro to ignore this file, -h for help'\n";
918            }
919
920            # Undo any options which cause premature exit.  They are not
921            # appropriate for a config file, and it could be hard to
922            # diagnose the cause of the premature exit.
923            foreach (
924                qw{
925                dump-defaults
926                dump-long-names
927                dump-options
928                dump-short-names
929                dump-token-types
930                dump-want-left-space
931                dump-want-right-space
932                help
933                stylesheet
934                version
935                }
936              )
937            {
938                if ( defined( $Opts{$_} ) ) {
939                    delete $Opts{$_};
940                    print STDERR "ignoring --$_ in config file: $config_file\n";
941                }
942            }
943        }
944    }
945
946    #---------------------------------------------------------------
947    # now process the command line parameters
948    #---------------------------------------------------------------
949    expand_command_abbreviations( \%expansion, \@raw_options, $config_file );
950
951    if ( !GetOptions( \%Opts, @option_string ) ) {
952        die "Error on command line; for help try 'perltidy -h'\n";
953    }
954
955    if ( $Opts{'dump-options'} ) {
956        dump_options( \%Opts );
957        exit 1;
958    }
959
960    #---------------------------------------------------------------
961    # Now we have to handle any interactions among the options..
962    #---------------------------------------------------------------
963
964    # In quiet mode, there is no log file and hence no way to report
965    # results of syntax check, so don't do it.
966    if ( $Opts{'quiet'} ) {
967        $Opts{'check-syntax'} = 0;
968    }
969
970    # either html output or tidy output, not both
971    if ( $Opts{'html'} ) {
972        $Opts{'tidy-output'} = 0;
973    }
974
975    # can't check syntax if no output
976    if ( !$Opts{'tidy-output'} ) {
977        $Opts{'check-syntax'} = 0;
978    }
979
980    # It's really a bad idea to check syntax as root unless you wrote
981    # the script yourself.
982    my $pending_complaint = "";
983
984    # everybody is root in windows 95/98, so we can't complain about it
985    unless ( $^O =~ /^(MSWin32|msdos|dos|win32)$/ ) {
986
987        if ( $< == 0 && $Opts{'check-syntax'} ) {
988            $Opts{'check-syntax'} = 0;
989            $pending_complaint =
990"Syntax check deactivated for safety; you shouldn't run this as root\n";
991        }
992    }
993
994    # see if user set a non-negative logfile-gap
995    if ( defined( $Opts{'logfile-gap'} ) && $Opts{'logfile-gap'} >= 0 ) {
996
997        # a zero gap will be taken as a 1
998        if ( $Opts{'logfile-gap'} == 0 ) {
999            $Opts{'logfile-gap'} = 1;
1000        }
1001
1002        # setting a non-negative logfile gap causes logfile to be saved
1003        $Opts{'logfile'} = 1;
1004    }
1005
1006    # not setting logfile gap, or setting it negative, causes default of 50
1007    else {
1008        $Opts{'logfile-gap'} = 50;
1009    }
1010
1011    # set short-cut flag when only indentation is to be done.
1012    # Note that the user may or may not have already set the
1013    # indent-only flag.
1014    if ( !$Opts{'add-whitespace'}
1015        && !$Opts{'delete-old-whitespace'}
1016        && !$Opts{'add-newlines'}
1017        && !$Opts{'delete-old-newlines'} )
1018    {
1019        $Opts{'indent-only'} = 1;
1020    }
1021
1022    # -nbob implies -bli
1023    if ( !$Opts{'break-after-opening-brace'} ) {
1024        $Opts{'brace-left-and-indent'} = 1;
1025    }
1026
1027    # -bli flag implies -bl
1028    if ( $Opts{'brace-left-and-indent'} ) {
1029        $Opts{'opening-brace-on-new-line'} = 1;
1030    }
1031
1032    if ( $Opts{'opening-brace-always-on-right'}
1033        && $Opts{'opening-brace-on-new-line'} )
1034    {
1035        print STDERR <<EOM;
1036 Conflict: you specified both 'opening-brace-always-on-right' (-bar) and
1037  'opening-brace-on-new-line' (-bl).  Ignoring -bl.
1038EOM
1039        $Opts{'opening-brace-on-new-line'} = 0;
1040    }
1041
1042    # it simplifies things if -bl is 0 rather than undefined
1043    if ( !defined( $Opts{'opening-brace-on-new-line'} ) ) {
1044        $Opts{'opening-brace-on-new-line'} = 0;
1045    }
1046
1047    # -sbl defaults to -bl if not defined
1048    if ( !defined( $Opts{'opening-sub-brace-on-new-line'} ) ) {
1049        $Opts{'opening-sub-brace-on-new-line'} =
1050          $Opts{'opening-brace-on-new-line'};
1051    }
1052
1053    # set shortcut flag if no blanks to be written
1054    unless ( $Opts{'maximum-consecutive-blank-lines'} ) {
1055        $Opts{'swallow-optional-blank-lines'} = 1;
1056    }
1057    return ( \%Opts, $config_file, \@raw_options, $pending_complaint,
1058        $saw_extrude );
1059
1060}    # end of process_command_line
1061
1062sub expand_command_abbreviations {
1063
1064    # go through @ARGV and expand any abbreviations
1065
1066    my ( $rexpansion, $rraw_options, $config_file ) = @_;
1067    my ($word);
1068
1069    # set a pass limit to prevent an infinite loop;
1070    # 10 should be plenty, but it may be increased to allow deeply
1071    # nested expansions.
1072    my $max_passes = 10;
1073    my @new_argv   = ();
1074
1075    # keep looping until all expansions have been converted into actual
1076    # dash parameters..
1077    for ( my $pass_count = 0 ; $pass_count <= $max_passes ; $pass_count++ ) {
1078        my @new_argv     = ();
1079        my $abbrev_count = 0;
1080
1081        # loop over each item in @ARGV..
1082        foreach $word (@ARGV) {
1083
1084            # if it is a dash flag (instead of a file name)..
1085            if ( $word =~ /^-[-]?([\w\-]+)(.*)/ ) {
1086
1087                # save the raw input for debug output in case of circular refs
1088                if ( $pass_count == 0 ) {
1089                    push ( @$rraw_options, $word );
1090                }
1091
1092                my $abr   = $1;
1093                my $flags = $2;
1094
1095                # if we see this dash item in the expansion hash..
1096                if ( $rexpansion->{$abr} ) {
1097                    $abbrev_count++;
1098
1099                    # stuff all of the words that it expands to into the
1100                    # new arg list for the next pass
1101                    foreach my $abbrev ( @{ $rexpansion->{$abr} } ) {
1102                        push ( @new_argv, '--' . $abbrev . $flags );
1103                    }
1104                }
1105
1106                # not in expansion hash, must be actual long name
1107                else {
1108                    push ( @new_argv, $word );
1109                }
1110            }
1111
1112            # not a dash item, so just save it for the next pass
1113            else {
1114                push ( @new_argv, $word );
1115            }
1116        }    # end of this pass
1117
1118        # update parameter list @ARGV to the new one
1119        @ARGV = @new_argv;
1120        last unless ( $abbrev_count > 0 );
1121
1122        # make sure we are not in an infinite loop
1123        if ( $pass_count == $max_passes ) {
1124            print STDERR
1125"I'm tired. We seem to be in an infinite loop trying to expand aliases.\n";
1126            print STDERR "Here are the raw options\n";
1127            local $" = ')(';
1128            print STDERR "(@$rraw_options)\n";
1129            my $num = @new_argv;
1130
1131            if ( $num < 50 ) {
1132                print STDERR "After $max_passes passes here is ARGV\n";
1133                print STDERR "(@new_argv)\n";
1134            }
1135            else {
1136                print STDERR "After $max_passes passes ARGV has $num entries\n";
1137            }
1138
1139            if ($config_file) {
1140                die <<"DIE";
1141Please check your configuration file $config_file for circular-references.
1142To deactivate it, use -npro.
1143DIE
1144            }
1145            else {
1146                die <<'DIE';
1147Program bug - circular-references in the %expansion hash, probably due to
1148a recent program change.
1149DIE
1150            }
1151        }    # end of check for circular references
1152    }    # end of loop over all passes
1153}
1154
1155# Debug routine -- this will dump the expansion hash
1156sub dump_short_names {
1157    my $rexpansion = shift;
1158    print STDOUT <<EOM;
1159List of short names.  This list shows how all abbreviations are
1160translated into other abbreviations and, eventually, into long names.
1161New abbreviations may be defined in a .perltidyrc file.
1162For a list of all long names, use perltidy --dump-long-names (-dln).
1163--------------------------------------------------------------------------
1164EOM
1165    foreach my $abbrev ( sort keys %$rexpansion ) {
1166        my @list = @{ $$rexpansion{$abbrev} };
1167        print STDOUT "$abbrev --> @list\n";
1168    }
1169}
1170
1171sub find_config_file {
1172
1173    my $config_file;
1174
1175    # look in current directory first
1176    if ( -e ".perltidyrc" ) {
1177        $config_file = ".perltidyrc";
1178    }
1179
1180    # then the home directory
1181    elsif ( defined( $ENV{HOME} ) && -e "$ENV{HOME}/.perltidyrc" ) {
1182        $config_file = "$ENV{HOME}/.perltidyrc";
1183    }
1184
1185    # then look for a system-wide definition
1186    elsif ( -e "/usr/local/etc/perltidyrc" ) {
1187        $config_file = "/usr/local/etc/perltidyrc";
1188    }
1189    elsif ( -e "/etc/perltidyrc" ) {
1190        $config_file = "/etc/perltidyrc";
1191    }
1192    return $config_file;
1193}
1194
1195sub read_config_file {
1196
1197    my @config_list = ();
1198
1199    my ( $config_file, $rexpansion ) = @_;
1200
1201    my $name = undef;
1202    my $line_no;
1203    if ( defined($config_file) && -e $config_file ) {
1204
1205        unless ( open CONFIG, "<$config_file" ) {
1206            warn "cannot open config file $config_file: $!\n";
1207            $config_file = "";
1208        }
1209        else {
1210
1211            while (<CONFIG>) {
1212                $line_no++;
1213                chomp;
1214                next if /^\s*#/;    # skip full-line comment
1215                $_ = strip_comment( $_, $config_file, $line_no );
1216                s/^\s*(.*?)\s*$/$1/;    # trim both ends
1217                next unless $_;
1218
1219                # look for something of the general form
1220                #    newname { body }
1221                # or just
1222                #    body
1223
1224                if ( $_ =~ /^((\w+)\s*\{)?([^}]*)(\})?$/ ) {
1225                    my ( $newname, $body, $curly ) = ( $2, $3, $4 );
1226
1227                    # handle a new alias definition
1228                    if ($newname) {
1229                        if ($name) {
1230                            die
1231"No '}' seen after $name and before $newname in config file $config_file line $.\n";
1232                        }
1233                        $name = $newname;
1234
1235                        if ( ${$rexpansion}{$name} ) {
1236                            local $" = ')(';
1237                            my @names = sort keys %$rexpansion;
1238                            print
1239"Here is a list of all installed aliases\n(@names)\n";
1240                            die
1241"Attempting to redefine alias ($name) in config file $config_file line $.\n";
1242                        }
1243                        ${$rexpansion}{$name} = [];
1244                    }
1245
1246                    # now do the body
1247                    if ($body) {
1248
1249                        my $rbody_parts =
1250                          parse_body( $body, $config_file, $line_no );
1251
1252                        if ($name) {
1253
1254                            # remove leading dashes if this is an alias
1255                            foreach (@$rbody_parts) { s/^\-+//; }
1256                            push @{ ${$rexpansion}{$name} }, @$rbody_parts;
1257                        }
1258
1259                        else {
1260                            push ( @config_list, @$rbody_parts );
1261                        }
1262                    }
1263
1264                    if ($curly) {
1265                        unless ($name) {
1266                            die
1267"Unexpected '}' seen in config file $config_file line $.\n";
1268                        }
1269                        $name = undef;
1270                    }
1271                }
1272            }
1273            close CONFIG;
1274        }
1275    }
1276    else {
1277        $config_file = "";
1278    }
1279    return ( $config_file, \@config_list );
1280}
1281
1282sub strip_comment {
1283
1284    my ( $instr, $config_file, $line_no ) = @_;
1285
1286    # nothing to do if no comments
1287    if ( $instr !~ /#/ ) {
1288        return $instr;
1289    }
1290
1291    # use simple method of no quotes
1292    elsif ( $instr !~ /['"]/ ) {
1293        $instr =~ s/\s*\#.*$//;    # simple trim
1294        return $instr;
1295    }
1296
1297    # handle comments and quotes
1298    my $outstr     = "";
1299    my $quote_char = "";
1300    while (1) {
1301
1302        # looking for ending quote character
1303        if ($quote_char) {
1304            if ( $instr =~ /\G($quote_char)/gc ) {
1305                $quote_char = "";
1306                $outstr .= $1;
1307            }
1308            elsif ( $instr =~ /\G(.)/gc ) {
1309                $outstr .= $1;
1310            }
1311
1312            # error..we reached the end without seeing the ending quote char
1313            else {
1314                die <<EOM;
1315Error reading file $config_file at line number $line_no.
1316Did not see ending quote character <$quote_char> in this text:
1317$instr
1318Please fix this line or use -npro to avoid reading this file
1319EOM
1320                last;
1321            }
1322        }
1323
1324        # accumulating characters and looking for start of a quoted string
1325        else {
1326            if ( $instr =~ /\G([\"\'])/gc ) {
1327                $outstr .= $1;
1328                $quote_char = $1;
1329            }
1330            elsif ( $instr =~ /\G#/gc ) {
1331                last;
1332            }
1333            elsif ( $instr =~ /\G(.)/gc ) {
1334                $outstr .= $1;
1335            }
1336            else {
1337                last;
1338            }
1339        }
1340    }
1341    return $outstr;
1342}
1343
1344sub parse_body {
1345
1346=pod
1347
1348Parse a command string containing multiple string with possible
1349quotes, into individual commands.  It might look like this, for example:
1350
1351   -wba=" + - "  -some-thing -wbb='. && ||'
1352
1353There is no need, at present, to handle escaped quote characters.
1354(They are not perltidy tokens, so needn't be in strings).
1355
1356=cut
1357
1358    my ( $body, $config_file, $line_no ) = @_;
1359    my @body_parts = ();
1360    my $quote_char = "";
1361    my $part       = "";
1362    while (1) {
1363
1364        # looking for ending quote character
1365        if ($quote_char) {
1366            if ( $body =~ /\G($quote_char)/gc ) {
1367                $quote_char = "";
1368            }
1369            elsif ( $body =~ /\G(.)/gc ) {
1370                $part .= $1;
1371            }
1372
1373            # error..we reached the end without seeing the ending quote char
1374            else {
1375                if ($part) { push @body_parts, $part; }
1376                die <<EOM;
1377Error reading file $config_file at line number $line_no.
1378Did not see ending quote character <$quote_char> in this text:
1379$body
1380Please fix this line or use -npro to avoid reading this file
1381EOM
1382                last;
1383            }
1384        }
1385
1386        # accumulating characters and looking for start of a quoted string
1387        else {
1388            if ( $body =~ /\G([\"\'])/gc ) {
1389                $quote_char = $1;
1390            }
1391            elsif ( $body =~ /\G(\s+)/gc ) {
1392                push @body_parts, $part;
1393                $part = "";
1394            }
1395            elsif ( $body =~ /\G(.)/gc ) {
1396                $part .= $1;
1397            }
1398            else {
1399                if ($part) { push @body_parts, $part; }
1400                last;
1401            }
1402        }
1403    }
1404    return ( \@body_parts );    # sound's ghoulish!
1405}
1406
1407sub dump_long_names {
1408
1409    my @names = sort @_;
1410    print STDOUT <<EOM;
1411# Command line long names (passed to GetOptions)
1412#---------------------------------------------------------------
1413# here is a summary of the Getopt codes:
1414# <none> does not take an argument
1415# =s takes a mandatory string
1416# :s takes an optional string
1417# =i takes a mandatory integer
1418# :i takes an optional integer
1419# ! does not take an argument and may be negated
1420#  i.e., -foo and -nofoo are allowed
1421# a double dash signals the end of the options list
1422#
1423#---------------------------------------------------------------
1424EOM
1425
1426    foreach (@names) { print STDOUT "$_\n" }
1427}
1428
1429sub dump_defaults {
1430    my @defaults = sort @_;
1431    print STDOUT "Default command line options:\n";
1432    foreach (@_) { print STDOUT "$_\n" }
1433}
1434
1435sub dump_options {
1436    my ($rOpts) = @_;
1437    local $" = "\n";
1438    print STDOUT "Final parameter set for this run\n";
1439    foreach my $i ( sort keys %{$rOpts} ) {
1440        print STDOUT "$i=$rOpts->{$i}\n";
1441    }
1442}
1443
1444sub show_version {
1445    print <<"EOM";
1446This is perltidy, v$VERSION
1447
1448Copyright 2000-2001, Steven L. Hancock
1449
1450PerlTidy is free software and may be copied under the terms of the GNU
1451General Public License, which is included in the distribution files.
1452
1453Complete documentation for perltidy can be found using 'man perltidy'
1454or on the internet at http://perltidy.sourceforge.net.
1455EOM
1456}
1457
1458sub usage {
1459
1460    print STDOUT <<EOF;
1461This is perltidy version $VERSION, a perl script indenter.  Usage:
1462
1463    perltidy [ options ] file1 file2 file3 ...
1464            (output goes to file1.tdy, file2.tdy, file3.tdy, ...)
1465    perltidy [ options ] file1 -o outfile
1466    perltidy [ options ] file1 -st >outfile
1467    perltidy [ options ] <infile >outfile
1468
1469Options have short and long forms. Short forms are shown; see
1470man pages for long forms.  Note: '=s' indicates a required string,
1471and '=n' indicates a required integer.
1472
1473I/O control
1474 -h      show this help
1475 -o=file name of the output file (only if single input file)
1476 -q      deactivate error messages (for running under editor)
1477 -w      include non-critical warning messages in the .ERR error output
1478 -syn    run perl -c to check syntax (default under unix systems)
1479 -log    save .LOG file, which has useful diagnostics
1480 -f      force perltidy to read a binary file
1481 -g      like -log but writes more detailed .LOG file, for debugging scripts
1482 -opt    write the set of options actually used to a .LOG file
1483 -npro   ignore .perltidyrc configuration command file
1484 -pro=file   read configuration commands from file instead of .perltidyrc
1485 -st     send output to standard output, STDOUT
1486 -se     send error output to standard error output, STDERR
1487 -v      display version number to standard output and quit
1488
1489Basic Options:
1490 -i=n    use n columns per indentation level (default n=4)
1491 -t      tabs: use one tab character per indentation level, not recommeded
1492 -nt     no tabs: use n spaces per indentation level (default)
1493 -io     "indent only": just do indentation, no other formatting.
1494 -sil=n  set starting indentation level to n;  use if auto detection fails
1495
1496Whitespace Control
1497 -fws    freeze whitespace; this disables all whitespace changes
1498           and disables the following switches:
1499 -bt=n   sets brace tightness,  n= (0 = loose, 1=default, 2 = tight)
1500 -bbt    same as -bt but for code block braces; same as -bt if not given
1501 -pt=n   paren tightness (n=0, 1 or 2)
1502 -sbt=n  square bracket tightness (n=0, 1, or 2)
1503 -ci=n   sets continuation indentation=n,  default is n=2 spaces
1504 -lp     line up parentheses, brackets, and non-BLOCK braces
1505 -ibc    indent block comments; this is the default
1506 -sfs    add space before semicolon in for( ; ; )
1507 -msc=n  minimum spaces to side comment, default 4
1508 -aws    allow perltidy to add whitespace (default)
1509 -dws    delete all old non-essential whitespace
1510 -icb    indent closing brace of a code block
1511 -icp    indent closing paren, square-bracket, or brace of non code block
1512 -wls=s  want space left of tokens in string; i.e. -nwls='+ - * /'
1513 -wrs=s  want space right of tokens in string;
1514 -sts    put space before terminal semicolon of a statement
1515
1516Line Break Control
1517 -fnl    freeze newlines; this disables all line break changes
1518            and disables the following switches:
1519 -anl    add newlines;  ok to introduce new line breaks
1520 -bbs    add blank line before subs and packages
1521 -bbc    add blank line before block comments
1522 -bbb    add blank line between major blocks
1523 -sob    swallow optional blank lines
1524 -ce     cuddled else; use this style: '} else {'
1525 -dnl    delete old newlines (default)
1526 -mbl=n  maximum consecutive blank lines (default=1)
1527 -l=n    maximum line length;  default n=80
1528 -bl     opening brace on new line
1529 -sbl    opening sub brace on new line.  value of -bl is used if not given.
1530 -bli    opening brace on new line and indented
1531 -bar    opening brace always on right, even for long clauses
1532 -wba=s  want break after tokens in string; i.e. wba=': .'
1533 -wbb=s  want break before tokens in string
1534
1535Delete selected text
1536 -dac    delete all comments AND pod
1537 -dbc    delete block comments
1538 -dsc    delete side comments
1539 -dp     delete pod
1540
1541Send selected text to a '.TEE' file
1542 -tac    tee all comments AND pod
1543 -tbc    tee block comments
1544 -tsc    tee side comments
1545 -tp     tee pod
1546
1547Combinations of other parameters
1548 -gnu     attempt to follow GNU Coding Standards as applied to perl
1549 -mangle  remove as many newlines as possible (but keep comments and pods)
1550 -extrude  insert as many newlines as possible
1551
1552Other controls
1553 -mft=n  maximum fields per table; default n=40
1554 -x      do not format lines before hash-bang line (i.e., for VMS)
1555 -asc    allows perltidy to add a ';' when missing (default)
1556 -dsm    allows perltidy to delete an unnecessary ';'  (default)
1557
1558 -olq    outdent long quoted strings (default)
1559 -olc    outdent a long block comment line
1560 -ola    outdent statement labels
1561 -okw    outdent control keywords (redo, next, last, goto, return)
1562 -okwl=s specify alternative keywords for -okw command
1563
1564 -csc    add or update closing side comments after closing BLOCK brace
1565 -dcsc   delete closing side comments created by a -csc command
1566 -cscp=s change closing side comment prefix to be other than '## end'
1567 -cscl=s change closing side comment to apply to selected list of blocks
1568 -csci=n minimum number of lines needed to apply a -csc tag, default n=6
1569 -csct=n maximum number of columns of appended text, default n=20
1570 -cscw   causes warning if old side comment is overwritten with -csc
1571
1572 -sbc    use 'static block comments' identified by leading '##' (default)
1573 -sbcp=s change static block comment identifier to be other than '##'
1574
1575 -ssc    use 'static side comments' identified by leading '##' (default)
1576 -sscp=s change static side comment identifier to be other than '##'
1577
1578 -dop    dump options used in this run to standard output and quit
1579 -ddf    dump default options to standard output and quit
1580 -dsn    dump all option short names to standard output and quit
1581 -dln    dump option long names to standard output and quit
1582 -dtt    dump all token types to standard output and quit
1583
1584 -html   write an html file (see 'man perl2web' for many options)
1585         Note: when -html is used, no indentation or formatting are done.
1586         Hint: try perltidy -html -css=mystyle.css filename.pl
1587         and edit mystyle.css to change the appearance of filename.html.
1588         -nnn gives line numbers
1589         -pre only writes out <pre>..</pre> code section
1590
1591A prefix of "n" negates short form toggle switches, and a prefix of "no"
1592negates the long forms.  For example, -nt or --notabs mean to indent with
1593spaces rather than tabs.   Do not bundle switches together.
1594
1595If you are unable to see this entire text, try "perltidy -h | more"
1596For more detailed information, and additional options, try "man perltidy",
1597or go to the perltidy home page at http://perltidy.sourceforge.net
1598EOF
1599
1600}
1601
1602sub process_this_file {
1603
1604    my ( $truth, $beauty ) = @_;
1605
1606    # loop to process each line of this file
1607    while ( my $line_of_tokens = $truth->get_line() ) {
1608        $beauty->write_line($line_of_tokens);
1609    }
1610
1611    # finish up
1612    $beauty->finish_formatting();
1613    $truth->report_tokenization_errors();
1614}
1615
1616sub check_syntax {
1617
1618    # Use 'perl -c' to make sure that we did not create bad syntax
1619    # This is a very good independent check for programming errors
1620    #
1621    # Given names of the input and output files, ($ifname, $ofname),
1622    # we do the following:
1623    # - check syntax of the input file
1624    # - if bad, all done (could be an incomplete code snippet)
1625    # - if infile syntax ok, then check syntax of the output file;
1626    #   - if outfile syntax bad, issue warning; this implies a code bug!
1627    # - set and return flag "infile_syntax_ok" : =-1 bad 0 unknown 1 good
1628
1629    my ( $ifname, $ofname, $logger_object, $rOpts ) = @_;
1630    my $infile_syntax_ok = 0;
1631    my $line_of_dashes   = '-' x 42 . "\n";
1632
1633    # invoke perl with -x if requested
1634    my $dash_x = $rOpts->{'look-for-hash-bang'} ? "-x" : "";
1635
1636    # this shouldn't happen unless perltidy.TMPI couldn't be made
1637    if ( $ifname eq '-' ) {
1638        $logger_object->write_logfile_entry(
1639            "Cannot run perl -c on STDIN and STDOUT\n");
1640        return $infile_syntax_ok;
1641    }
1642
1643    $logger_object->write_logfile_entry(
1644        "checking input file syntax with perl -c...\n");
1645    $logger_object->write_logfile_entry($line_of_dashes);
1646
1647    # Not all operating systems/shells support redirection of the standard
1648    # error output.
1649    my $error_redirection = ( $^O eq 'VMS' ) ? "" : '2>&1';
1650    my $flags = "-c -T $dash_x";
1651
1652    my $perl_output = do_syntax_check( $ifname, $flags, $error_redirection );
1653    $logger_object->write_logfile_entry("$perl_output\n");
1654
1655    if ( $perl_output =~ /syntax\s*OK/ ) {
1656        $infile_syntax_ok = 1;
1657        $logger_object->write_logfile_entry($line_of_dashes);
1658        $logger_object->write_logfile_entry(
1659            "checking output file syntax with perl -c...\n");
1660        $logger_object->write_logfile_entry($line_of_dashes);
1661
1662        my $perl_output =
1663          do_syntax_check( $ofname, $flags, $error_redirection );
1664        $logger_object->write_logfile_entry("$perl_output\n");
1665
1666        unless ( $perl_output =~ /syntax\s*OK/ ) {
1667            $logger_object->write_logfile_entry($line_of_dashes);
1668            $logger_object->warning(
1669"The output file has a syntax error when tested with perl -c $dash_x $ofname !\n"
1670            );
1671            $logger_object->warning(
1672                "This implies an error in perltidy; the file $ofname is bad\n");
1673            $logger_object->report_definite_bug();
1674
1675            # the perl version number will be helpful for diagnosing the problem
1676            $logger_object->write_logfile_entry(
1677                `perl -v $dash_x $ofname $error_redirection` . "\n" );
1678        }
1679    }
1680    else {
1681
1682        # Only warn of perl -c syntax errors.  Other messages,
1683        # such as missing modules, are too common.  They can be
1684        # seen by running with perltidy -w
1685        $logger_object->complain("A syntax check using perl -c gives: \n");
1686        $logger_object->complain($line_of_dashes);
1687        $logger_object->complain("$perl_output\n");
1688        $logger_object->complain($line_of_dashes);
1689        $infile_syntax_ok = -1;
1690        $logger_object->write_logfile_entry($line_of_dashes);
1691        $logger_object->write_logfile_entry(
1692"The output file will not be checked because of input file problems\n"
1693        );
1694    }
1695    return $infile_syntax_ok;
1696}
1697
1698sub do_syntax_check {
1699    my ( $fname, $flags, $error_redirection ) = @_;
1700    return `perl $flags $fname $error_redirection`;
1701}
1702
1703#####################################################################
1704#
1705# the PerlTidy::LineSource class supplies an object with a 'get_line()' method
1706# which returns the next line to be parsed
1707#
1708#####################################################################
1709
1710package PerlTidy::LineSource;
1711
1712sub new {
1713
1714    my $class      = shift;
1715    my $input_file = shift;
1716    my $rOpts      = shift;
1717    my $fh;
1718    my $input_file_copy = undef;
1719    my $fh_copy;
1720
1721    unless ( ( $fh, $input_file ) = PerlTidy::streamhandle( $input_file, 'r' ) )
1722    {
1723        return undef;
1724    }
1725    else {
1726
1727        # in order to check output syntax when standard output is used, we have
1728        # to make a copy of the file
1729        if ( $input_file eq '-' && $rOpts->{'check-syntax'} ) {
1730            $input_file_copy = "perltidy.TMPI";
1731            $fh_copy         = IO::File->new(">$input_file_copy")
1732              or die (
1733                "Couldn't open $input_file_copy: $!\n
1734                           It is needed to check syntax; deactivate with -nsyn"
1735              );
1736        }
1737
1738        return bless {
1739            _fh              => $fh,
1740            _fh_copy         => $fh_copy,
1741            _filename        => $input_file,
1742            _input_file_copy => $input_file_copy,
1743        }, $class;
1744    }
1745}
1746
1747sub get_input_file_copy_name {
1748    my $self   = shift;
1749    my $ifname = $self->{_input_file_copy};
1750    unless ($ifname) {
1751        $ifname = $self->{_filename};
1752    }
1753    return $ifname;
1754}
1755
1756sub close_input_file {
1757    my $self = shift;
1758    $self->{_fh}->close();
1759    $self->{_fh_copy}->close() if $self->{_fh_copy};
1760}
1761
1762sub unlink_copy {
1763    my $self = shift;
1764    unlink $self->{_input_file_copy} if $self->{_input_file_copy};
1765    my $fname = $self->{_input_file_copy};
1766}
1767
1768sub get_line {
1769    my $self    = shift;
1770    my $line    = undef;
1771    my $fh      = $self->{_fh};
1772    my $fh_copy = $self->{_fh_copy};
1773    $line = $fh->getline();
1774    if ( $line && $fh_copy ) { $fh_copy->print($line); }
1775    return $line;
1776}
1777
1778#####################################################################
1779#
1780# the PerlTidy::LineSink class supplies a write_line method for
1781# actual file writing
1782#
1783#####################################################################
1784
1785package PerlTidy::LineSink;
1786
1787sub new {
1788
1789    my ( $class, $output_file, $tee_file, $rOpts ) = @_;
1790    my $fh               = undef;
1791    my $fh_copy          = undef;
1792    my $fh_tee           = undef;
1793    my $output_file_copy = "";
1794    my $output_file_open = 0;
1795
1796    if ( $rOpts->{'tidy-output'} ) {
1797        ( $fh, $output_file ) = PerlTidy::streamhandle( $output_file, 'w' );
1798        if ( $output_file eq '-' ) { $output_file_copy = "perltidy.TMPO"; }
1799        $output_file_open = 1;
1800    }
1801
1802    # in order to check output syntax when standard output is used, we have to
1803    # make a copy of the file
1804    if ($output_file_copy) {
1805        if ( $rOpts->{'check-syntax'} ) {
1806            $fh_copy = IO::File->new(">$output_file_copy")
1807              or die (
1808                "couldn't open $output_file_copy: $!\n
1809                   which is needed for to check syntax; deactivate with -nsyn"
1810              );
1811        }
1812        else {
1813            $output_file_copy = "";
1814        }
1815    }
1816
1817    bless {
1818        _fh               => $fh,
1819        _fh_copy          => $fh_copy,
1820        _fh_tee           => $fh_tee,
1821        _output_file      => $output_file,
1822        _output_file_open => $output_file_open,
1823        _output_file_copy => $output_file_copy,
1824        _tee_flag         => 0,
1825        _tee_file         => $tee_file,
1826        _tee_file_opened  => 0,
1827    }, $class;
1828}
1829
1830sub write_line {
1831
1832    my $self    = shift;
1833    my $fh      = $self->{_fh};
1834    my $fh_copy = $self->{_fh_copy};
1835
1836    my $output_file_open = $self->{_output_file_open};
1837
1838    $fh->print( $_[0] ) if ( $self->{_output_file_open} );
1839    print $fh_copy $_[0] if ( $self->{_output_file_copy} );
1840
1841    if ( $self->{_tee_flag} ) {
1842        unless ( $self->{_tee_file_opened} ) { $self->really_open_tee_file() }
1843        my $fh_tee = $self->{_fh_tee};
1844        print $fh_tee $_[0];
1845    }
1846}
1847
1848sub get_output_file_copy {
1849    my $self   = shift;
1850    my $ofname = $self->{_output_file_copy};
1851    unless ($ofname) {
1852        $ofname = $self->{_output_file};
1853    }
1854    return $ofname;
1855}
1856
1857sub tee_on {
1858    my $self = shift;
1859    $self->{_tee_flag} = 1;
1860}
1861
1862sub tee_off {
1863    my $self = shift;
1864    $self->{_tee_flag} = 0;
1865}
1866
1867sub really_open_tee_file {
1868    my $self     = shift;
1869    my $tee_file = $self->{_tee_file};
1870    my $fh_tee;
1871    $fh_tee = IO::File->new(">$tee_file")
1872      or die ("couldn't open TEE file $tee_file: $!\n");
1873    $self->{_tee_file_opened} = 1;
1874    $self->{_fh_tee}          = $fh_tee;
1875}
1876
1877sub close_output_file {
1878    my $self = shift;
1879    $self->{_fh}->close() if $self->{_output_file_open};
1880    close $self->{_fh_copy} if ( $self->{_output_file_copy} );
1881    $self->close_tee_file();
1882}
1883
1884sub close_tee_file {
1885    my $self = shift;
1886
1887    if ( $self->{_tee_file_opened} ) {
1888        close $self->{_fh_tee};
1889        $self->{_tee_file_opened} = 0;
1890    }
1891}
1892
1893sub unlink_copy {
1894    my $self = shift;
1895    unlink( $self->{_output_file_copy} ) if $self->{_output_file_copy};
1896}
1897
1898#####################################################################
1899#
1900# The PerlTidy::Diagnostics class writes the DIAGNOSTICS file, which is
1901# useful for program development.
1902#
1903# Only one such file is created regardless of the number of input
1904# files processed.  This allows the results of processing many files
1905# to be summarized in a single file.
1906#
1907#####################################################################
1908
1909package PerlTidy::Diagnostics;
1910
1911sub new {
1912
1913    my $class = shift;
1914    bless {
1915        _write_diagnostics_count => 0,
1916        _last_diagnostic_file    => "",
1917        _input_file              => "",
1918        _fh                      => undef,
1919    }, $class;
1920}
1921
1922sub set_input_file {
1923    my $self = shift;
1924    $self->{_input_file} = $_[0];
1925}
1926
1927# This is a diagnostic routine which is useful for program development.
1928# Output from debug messages go to a file named DIAGNOSTICS, where
1929# they are labeled by file and line.  This allows many files to be
1930# scanned at once for some particular condition of interest.
1931sub write_diagnostics {
1932    my $self = shift;
1933
1934    unless ( $self->{_write_diagnostics_count} ) {
1935        open DIAGNOSTICS, ">DIAGNOSTICS"
1936          or death("couldn't open DIAGNOSTICS: $!\n");
1937    }
1938
1939    my $last_diagnostic_file = $self->{_last_diagnostic_file};
1940    my $input_file           = $self->{_input_file};
1941    if ( $last_diagnostic_file ne $input_file ) {
1942        print DIAGNOSTICS "\nFILE:$input_file\n";
1943    }
1944    $self->{_last_diagnostic_file} = $input_file;
1945    my $input_line_number = PerlTidy::Tokenizer::get_input_line_number();
1946    print DIAGNOSTICS "$input_line_number:\t@_";
1947    $self->{_write_diagnostics_count}++;
1948}
1949
1950#####################################################################
1951#
1952# The PerlTidy::Logger class writes the .LOG and .ERR files
1953#
1954#####################################################################
1955
1956package PerlTidy::Logger;
1957
1958sub new {
1959    my $class = shift;
1960    my $fh;
1961    my ( $rOpts, $log_file, $warning_file, $saw_extrude ) = @_;
1962    $fh = IO::File->new(">$log_file")
1963      or die ("couldn't open log file $log_file: $!\n");
1964
1965    # remove any old error output file
1966    if ( -e $warning_file ) { unlink($warning_file) }
1967
1968    bless {
1969        _log_file                      => $log_file,
1970        _fh                            => $fh,
1971        _fh_warnings                   => undef,
1972        _rOpts                         => $rOpts,
1973        _fh_warnings                   => undef,
1974        _last_input_line_written       => 0,
1975        _at_end_of_file                => 0,
1976        _use_prefix                    => 1,
1977        _block_log_output              => 0,
1978        _line_of_tokens                => undef,
1979        _output_line_number            => undef,
1980        _wrote_line_information_string => 0,
1981        _wrote_column_headings         => 0,
1982        _warning_file                  => $warning_file,
1983        _warning_count                 => 0,
1984        _complaint_count               => 0,
1985        _saw_code_bug    => -1,             # -1=no 0=maybe 1=for sure
1986        _saw_brace_error => 0,
1987        _saw_extrude     => $saw_extrude,
1988    }, $class;
1989}
1990
1991sub close_log_file {
1992    my $self = shift;
1993    close $self->{_fh};
1994    close $self->{_fh_warnings} if ( $self->{_warning_count} );
1995}
1996
1997sub get_warning_count {
1998    my $self = shift;
1999    return $self->{_warning_count};
2000}
2001
2002sub get_use_prefix {
2003    my $self = shift;
2004    return $self->{_use_prefix};
2005}
2006
2007sub block_log_output {
2008    my $self = shift;
2009    $self->{_block_log_output} = 1;
2010}
2011
2012sub unblock_log_output {
2013    my $self = shift;
2014    $self->{_block_log_output} = 0;
2015}
2016
2017sub interrupt_logfile {
2018    my $self = shift;
2019    $self->{_use_prefix} = 0;
2020    $self->warning("\n");
2021    $self->write_logfile_entry( '#' x 24 . "  WARNING  " . '#' x 25 . "\n" );
2022}
2023
2024sub resume_logfile {
2025    my $self = shift;
2026    $self->write_logfile_entry( '#' x 60 . "\n" );
2027    $self->{_use_prefix} = 1;
2028}
2029
2030sub we_are_at_the_last_line {
2031    my $self = shift;
2032    unless ( $self->{_wrote_line_information_string} ) {
2033        $self->write_logfile_entry("Last line\n\n");
2034    }
2035    $self->{_at_end_of_file} = 1;
2036}
2037
2038# record some stuff in case we go down in flames
2039sub black_box {
2040    my $self = shift;
2041    my ( $line_of_tokens, $output_line_number ) = @_;
2042    my $input_line        = $line_of_tokens->{_line_text};
2043    my $input_line_number = $line_of_tokens->{_line_number};
2044
2045    # save line information in case we have to write a logfile message
2046    $self->{_line_of_tokens}                = $line_of_tokens;
2047    $self->{_output_line_number}            = $output_line_number;
2048    $self->{_wrote_line_information_string} = 0;
2049
2050    my $last_input_line_written = $self->{_last_input_line_written};
2051    my $rOpts                   = $self->{_rOpts};
2052    if (
2053        (
2054            ( $input_line_number - $last_input_line_written ) >=
2055            $rOpts->{'logfile-gap'}
2056        )
2057        || ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ )
2058      )
2059    {
2060        my $rlevels                      = $line_of_tokens->{_rlevels};
2061        my $structural_indentation_level = $$rlevels[0];
2062        $self->{_last_input_line_written} = $input_line_number;
2063        ( my $out_str = $input_line ) =~ s/^\s*//;
2064        chomp $out_str;
2065
2066        $out_str = ( '.' x $structural_indentation_level ) . $out_str;
2067
2068        if ( length($out_str) > 35 ) {
2069            $out_str = substr( $out_str, 0, 35 ) . " ....";
2070        }
2071        $self->logfile_output( "", "$out_str\n" );
2072    }
2073}
2074
2075sub write_logfile_entry {
2076    my $self = shift;
2077
2078    # add leading >>> to avoid confusing error mesages and code
2079    $self->logfile_output( ">>>", "@_" );
2080}
2081
2082sub write_column_headings {
2083    my $self = shift;
2084
2085    $self->{_wrote_column_headings} = 1;
2086    my $fh = $self->{_fh};
2087    print $fh <<EOM;
2088The nesting depths in the table below are at the start of the lines.
2089The indicated output line numbers are not always exact.
2090ci = levels of continuation indentation; bk = 1 if in BLOCK, 0 if not.
2091
2092in:out indent c b  nesting   code + messages; (messages begin with >>>)
2093lines  levels i k            (code begins with one '.' per indent level)
2094------  ----- - - --------   -------------------------------------------
2095EOM
2096}
2097
2098sub make_line_information_string {
2099
2100    # make columns of information when a logfile message needs to go out
2101    my $self                     = shift;
2102    my $line_of_tokens           = $self->{_line_of_tokens};
2103    my $output_line_number       = $self->{_output_line_number};
2104    my $brace_depth              = $line_of_tokens->{_curly_brace_depth};
2105    my $paren_depth              = $line_of_tokens->{_paren_depth};
2106    my $square_bracket_depth     = $line_of_tokens->{_square_bracket_depth};
2107    my $input_line_number        = $line_of_tokens->{_line_number};
2108    my $python_indentation_level = $line_of_tokens->{_python_indentation_level};
2109    my $rlevels                  = $line_of_tokens->{_rlevels};
2110    my $rnesting_tokens          = $line_of_tokens->{_rnesting_tokens};
2111    my $rci_levels               = $line_of_tokens->{_rci_levels};
2112    my $rnesting_blocks          = $line_of_tokens->{_rnesting_blocks};
2113
2114    my $structural_indentation_level = $$rlevels[0];
2115    my $line_information_string      = "";
2116
2117    if ($input_line_number) {
2118        $self->write_column_headings() unless $self->{_wrote_column_headings};
2119
2120        # keep logfile columns aligned for scripts up to 999 lines;
2121        # for longer scripts it doesn't really matter
2122        my $extra_space = "";
2123        $extra_space .= ( $input_line_number < 10 ) ? "  "
2124          : ( $input_line_number < 100 ) ? " "
2125          : "";
2126        $extra_space .= ( $output_line_number < 10 ) ? "  "
2127          : ( $output_line_number < 100 ) ? " "
2128          : "";
2129
2130        # there are 2 possible nesting strings:
2131        # the original which looks like this:  (0 [1 {2
2132        # the new one, which looks like this:  {{[
2133        # the new one is easier to read, and shows the order, but
2134        # could be arbitrarily long, so we use it unless it is too long
2135        my $nesting_string =
2136          "($paren_depth [$square_bracket_depth {$brace_depth";
2137        my $nesting_string_new = $$rnesting_tokens[0];
2138
2139        my $ci_level = $$rci_levels[0];
2140        if ( $ci_level > 9 ) { $ci_level = '*' }
2141        my $bk = ( $$rnesting_blocks[0] =~ /1$/ ) ? '1' : '0';
2142
2143        if ( length($nesting_string_new) <= 8 ) {
2144            $nesting_string =
2145              $nesting_string_new . " " x ( 8 - length($nesting_string_new) );
2146        }
2147
2148        $line_information_string =
2149"L$input_line_number:$output_line_number$extra_space i$python_indentation_level:$structural_indentation_level $ci_level $bk $nesting_string";
2150    }
2151    return $line_information_string;
2152}
2153
2154sub logfile_output {
2155    my $self = shift;
2156    my ( $prompt, $msg ) = @_;
2157    return if ( $self->{_block_log_output} );
2158
2159    my $fh = $self->{_fh};
2160    if ( $self->{_at_end_of_file} || !$self->{_use_prefix} ) {
2161        print $fh "$msg";
2162    }
2163    else {
2164        my $line_information_string = $self->make_line_information_string();
2165        $self->{_wrote_line_information_string} = 1;
2166
2167        if ($line_information_string) {
2168            print $fh "$line_information_string   $prompt$msg";
2169        }
2170        else {
2171            print $fh "$msg";
2172        }
2173    }
2174}
2175
2176sub get_saw_brace_error {
2177    my $self = shift;
2178    return $self->{_saw_brace_error};
2179}
2180
2181sub increment_brace_error {
2182    my $self = shift;
2183    $self->{_saw_brace_error}++;
2184}
2185
2186sub brace_warning {
2187    my $self = shift;
2188    use constant BRACE_WARNING_LIMIT => 10;
2189    my $saw_brace_error = $self->{_saw_brace_error};
2190
2191    if ( $saw_brace_error < BRACE_WARNING_LIMIT ) {
2192        $self->warning(@_);
2193    }
2194    $saw_brace_error++;
2195    $self->{_saw_brace_error} = $saw_brace_error;
2196
2197    if ( $saw_brace_error == BRACE_WARNING_LIMIT ) {
2198        $self->warning("No further warnings of this type will be given\n");
2199    }
2200}
2201
2202sub complain {
2203
2204    # handle non-critical warning messages based on input flag
2205    my $self  = shift;
2206    my $rOpts = $self->{_rOpts};
2207
2208    # these appear in .ERR output only if -w flag is used
2209    if ( $rOpts->{'warning-output'} ) {
2210        $self->warning(@_);
2211    }
2212
2213    # otherwise, they go to the .LOG file
2214    else {
2215        $self->{_complaint_count}++;
2216        $self->write_logfile_entry(@_);
2217    }
2218}
2219
2220sub warning {
2221
2222    # report errors to .ERR file (or stdout)
2223    my $self = shift;
2224    use constant WARNING_LIMIT => 50;
2225
2226    my $rOpts = $self->{_rOpts};
2227    unless ( $rOpts->{'quiet'} ) {
2228
2229        my $warning_count = $self->{_warning_count};
2230        unless ($warning_count) {
2231            my $warning_file = $self->{_warning_file};
2232            my $fh_warnings;
2233            if ( $rOpts->{'standard-error-output'} ) {
2234                $fh_warnings = *STDERR;
2235            }
2236            else {
2237                $fh_warnings = IO::File->new(">$warning_file")
2238                  or death("couldn't open $warning_file: $!\n");
2239                print STDERR "Please see file $warning_file!\n";
2240            }
2241            $self->{_fh_warnings} = $fh_warnings;
2242        }
2243
2244        my $fh_warnings = $self->{_fh_warnings};
2245        if ( $warning_count < WARNING_LIMIT ) {
2246            if ( $self->get_use_prefix() > 0 ) {
2247                my $input_line_number =
2248                  PerlTidy::Tokenizer::get_input_line_number();
2249                print $fh_warnings "$input_line_number:\t@_";
2250                $self->write_logfile_entry("WARNING: @_");
2251            }
2252            else {
2253                print $fh_warnings @_;
2254                $self->write_logfile_entry(@_);
2255            }
2256        }
2257        $warning_count++;
2258        $self->{_warning_count} = $warning_count;
2259
2260        if ( $warning_count == WARNING_LIMIT ) {
2261            print $fh_warnings "No further warnings will be given";
2262        }
2263    }
2264}
2265
2266# programming bug codes:
2267#   -1 = no bug
2268#   0 = maybe, not sure.
2269#   1 = definitely
2270sub report_possible_bug {
2271    my $self         = shift;
2272    my $saw_code_bug = $self->{_saw_code_bug};
2273    $self->{_saw_code_bug} = ( $saw_code_bug < 0 ) ? 0 : $saw_code_bug;
2274}
2275
2276sub report_definite_bug {
2277    my $self = shift;
2278    $self->{_saw_code_bug} = 1;
2279}
2280
2281sub ask_user_for_bug_report {
2282    my $self = shift;
2283
2284    my ( $infile_syntax_ok, $formatter ) = @_;
2285    my $saw_code_bug = $self->{_saw_code_bug};
2286    if ( ( $saw_code_bug == 0 ) && ( $infile_syntax_ok == 1 ) ) {
2287        $self->warning(<<EOM);
2288
2289You may have encountered a code bug in perltidy.  If you think so, and
2290the problem is not listed in the BUGS file at
2291http://perltidy.sourceforge.net, please report it so that it can be
2292corrected.  Include the smallest possible script which has the problem,
2293along with the .LOG file. See the manual pages for contact information.
2294Thank you!
2295EOM
2296
2297    }
2298    elsif ( $saw_code_bug == 1 ) {
2299        if ( $self->{_saw_extrude} ) {
2300            $self->warning(<<EOM);
2301You may have encountered a bug in perltidy.  However, since you are
2302using the -extrude option, the problem may be with perl itself, which
2303has occasional parsing problems with this type of file.  If you believe
2304that the problem is with perltidy, and the problem is not listed in the
2305BUGS file at http://perltidy.sourceforge.net, please report it so that
2306it can be corrected.  Include the smallest possible script which has the
2307problem, along with the .LOG file. See the manual pages for contact
2308information.
2309Thank you!
2310EOM
2311        }
2312        else {
2313            $self->warning(<<EOM);
2314
2315Oops, you seem to have encountered a bug in perltidy.  Please check the
2316BUGS file at http://perltidy.sourceforge.net.  If the problem is not
2317listed there, please report it so that it can be corrected.  Include the
2318smallest possible script which produces this message, along with the
2319.LOG file if appropriate.  See the manual pages for contact information.
2320Your efforts are appreciated.
2321Thank you!
2322EOM
2323            my $added_semicolon_count = $formatter->get_added_semicolon_count();
2324            if ( $added_semicolon_count > 0 ) {
2325                $self->warning(<<EOM);
2326
2327The log file shows that perltidy added $added_semicolon_count semicolons.
2328Please rerun with -nasc to see if that is the cause of the syntax error.  Even
2329if that is the problem, please report it so that it can be fixed.
2330EOM
2331
2332            }
2333        }
2334    }
2335}
2336
2337sub finish {
2338
2339    # called after all formatting to summarize errors
2340    my $self = shift;
2341    my ( $infile_syntax_ok, $formatter ) = @_;
2342
2343    my $rOpts         = $self->{_rOpts};
2344    my $warning_count = $self->{_warning_count};
2345    my $saw_code_bug  = $self->{_saw_code_bug};
2346
2347    my $save_logfile = ( $saw_code_bug == 0 && $infile_syntax_ok == 1 )
2348      || $saw_code_bug == 1
2349      || $rOpts->{'logfile'};
2350    my $log_file = $self->{_log_file};
2351    if ($warning_count) {
2352        if ($save_logfile) {
2353            $self->block_log_output();    # avoid echoing this to the logfile
2354            $self->warning(
2355                "The logfile $log_file may contain useful information\n");
2356            $self->unblock_log_output();
2357        }
2358
2359        if ( $self->{_complaint_count} > 0 ) {
2360            $self->warning(
2361"To see $self->{_complaint_count} non-critical warnings rerun with -w\n"
2362            );
2363        }
2364
2365        if ( $self->{_saw_brace_error}
2366            && ( $rOpts->{'logfile-gap'} > 1 || !$save_logfile ) )
2367        {
2368            $self->warning("To save a full .LOG file rerun with -g\n");
2369        }
2370    }
2371    $self->ask_user_for_bug_report( $infile_syntax_ok, $formatter );
2372    $self->close_log_file();
2373
2374    # delete the log file unless it is needed or wanted
2375    unlink($log_file) unless ($save_logfile);
2376}
2377
2378#####################################################################
2379#
2380# The PerlTidy::HtmlWriter class writes a copy of the input stream in html
2381#
2382#####################################################################
2383
2384package PerlTidy::HtmlWriter;
2385
2386# class variables
2387use vars qw{
2388  %html_color
2389  %html_bold
2390  %html_italic
2391  %token_short_names
2392  %short_to_long_names
2393  $rOpts
2394  $css_filename
2395  $css_linkname
2396  $missing_html_entities
2397};
2398
2399# replace unsafe characters with HTML entity representation if HTML::Entities
2400# is available
2401{ eval "use HTML::Entities"; $missing_html_entities = $@; }
2402
2403sub new {
2404
2405    my ( $class, $input_file, $html_file ) = @_;
2406
2407    my $html_file_opened = 0;
2408    my $html_fh;
2409    unless ( $html_fh = IO::File->new("> $html_file") ) {
2410        warn("can't open $html_file: $!\n");
2411        return undef;
2412    }
2413    $html_file_opened = 1;
2414
2415    unless ( $rOpts->{'html-pre-only'} ) {
2416        $html_fh->print( <<"HTML_START");
2417<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
2418<HTML>
2419<HEAD>
2420HTML_START
2421
2422        # use css linked to another file
2423        if ( $rOpts->{'html-linked-style-sheet'} ) {
2424            $html_fh->print(
2425                qq(<link rel=stylesheet href="$css_linkname" type="text/css">));
2426            $html_fh->print( <<"ENDCSS");
2427<TITLE>$input_file</TITLE>
2428</HEAD>
2429<BODY>
2430ENDCSS
2431        }
2432
2433        # use css embedded in this file
2434        elsif ( !$rOpts->{'nohtml-style-sheets'} ) {
2435            $html_fh->print( <<'ENDCSS');
2436<STYLE TYPE="text/css">
2437<!--
2438ENDCSS
2439            write_style_sheet_data($html_fh);
2440            $html_fh->print( <<"ENDCSS");
2441-->
2442</STYLE>
2443<TITLE>$input_file</TITLE>
2444</HEAD>
2445<BODY>
2446ENDCSS
2447        }
2448
2449        # no css used
2450        else {
2451
2452            $html_fh->print( <<"HTML_START");
2453<TITLE>$input_file</TITLE>
2454</HEAD>
2455<BODY BGCOLOR=\"$rOpts->{'html-color-background'}\" TEXT=\"$rOpts->{'html-color-punctuation'}\">
2456HTML_START
2457        }
2458    }
2459
2460    $html_fh->print( <<"END_PRE");
2461<!-- filename: $input_file -->
2462<PRE>
2463END_PRE
2464
2465    bless {
2466        _html_file        => $html_file,
2467        _html_file_opened => $html_file_opened,
2468        _html_fh          => $html_fh,
2469    }, $class;
2470}
2471
2472BEGIN {
2473
2474    # This is the official list of tokens which may be identified by the
2475    # user.  Long names are used as getopt keys.  Short names are
2476    # convenient short abbreviations for specifying input.  Short names
2477    # somewhat resemble token type characters, but are often different
2478    # because they may only be alphanumeric, to allow command line
2479    # input.  Also, note that because of case insensitivity of html,
2480    # this table must be in a single case only (I've chosen to use all
2481    # lower case).
2482    # When adding NEW_TOKENS: update this hash table
2483    # short names => long names
2484    %short_to_long_names = (
2485        'n'  => 'numeric',
2486        'p'  => 'paren',
2487        'q'  => 'quote',
2488        's'  => 'structure',
2489        'c'  => 'comment',
2490        'v'  => 'v-string',
2491        'cm' => 'comma',
2492        'w'  => 'bareword',
2493        'co' => 'colon',
2494        'pu' => 'punctuation',
2495        'i'  => 'identifier',
2496        'j'  => 'label',
2497        'h'  => 'here-doc-target',
2498        'hh' => 'here-doc-text',
2499        'k'  => 'keyword',
2500        'sc' => 'semicolon',
2501        'm'  => 'subroutine',
2502        'pd' => 'pod-text',
2503    );
2504
2505    # Now we have to map actual token types into one of the above short
2506    # names; any token types not mapped will get 'punctuation'
2507    # properties.
2508
2509    # The values of this hash table correspond to the keys of the
2510    # previous hash table.
2511    # The keys of this hash table are token types and can be seen
2512    # by running with --dump-token-types (-dtt).
2513
2514    # When adding NEW_TOKENS: update this hash table
2515    # $type => $short_name
2516    %token_short_names = (
2517        '#'  => 'c',
2518        'n'  => 'n',
2519        'v'  => 'v',
2520        'k'  => 'k',
2521        'F'  => 'k',
2522        'Q'  => 'q',
2523        'q'  => 'q',
2524        'J'  => 'j',
2525        'j'  => 'j',
2526        'h'  => 'h',
2527        'H'  => 'hh',
2528        'w'  => 'w',
2529        ','  => 'cm',
2530        '=>' => 'cm',
2531        ';'  => 'sc',
2532        ':'  => 'co',
2533        'f'  => 'sc',
2534        '('  => 'p',
2535        ')'  => 'p',
2536        'M'  => 'm',
2537        'P'  => 'pd',
2538    );
2539
2540    # These token types will all be called identifiers for now
2541    # FIXME: need to separate user defined modules as separate type
2542    my @identifier = qw" i t U C Y Z G :: ";
2543    @token_short_names{@identifier} = ('i') x scalar(@identifier);
2544
2545    # These token types will be called 'structure'
2546    my @structure = qw" { } ";
2547    @token_short_names{@structure} = ('s') x scalar(@structure);
2548
2549    # OLD NOTES: save for reference
2550    # Any of these could be added later if it would be useful.
2551    # For now, they will by default become punctuation
2552    #    my @list = qw" L R [ ] ";
2553    #    @token_long_names{@list} = ('non-structure') x scalar(@list);
2554    #
2555    #    my @list = qw"
2556    #      / /= * *= ** **= + += - -= % %= = ++ -- << <<= >> >>= pp p m mm
2557    #      ";
2558    #    @token_long_names{@list} = ('math') x scalar(@list);
2559    #
2560    #    my @list = qw" & &= ~ ~= ^ ^= | |= ";
2561    #    @token_long_names{@list} = ('bit') x scalar(@list);
2562    #
2563    #    my @list = qw" == != < > <= <=> ";
2564    #    @token_long_names{@list} = ('numerical-comparison') x scalar(@list);
2565    #
2566    #    my @list = qw" && || ! &&= ||= ";
2567    #    @token_long_names{@list} = ('logical') x scalar(@list);
2568    #
2569    #    my @list = qw" . .= =~ !~ x x= ";
2570    #    @token_long_names{@list} = ('string-operators') x scalar(@list);
2571    #
2572    #    # Incomplete..
2573    #    my @list = qw" .. -> <> ... \ ? ";
2574    #    @token_long_names{@list} = ('misc-operators') x scalar(@list);
2575
2576}
2577
2578sub make_getopt_long_names {
2579    my $class = shift;
2580    my ($rgetopt_names) = @_;
2581    while ( my ( $short_name, $name ) = each %short_to_long_names ) {
2582        push @$rgetopt_names, "html-color-$name=s";
2583        push @$rgetopt_names, "html-italic-$name!";
2584        push @$rgetopt_names, "html-bold-$name!";
2585    }
2586    push @$rgetopt_names, "html-color-background=s";
2587    push @$rgetopt_names, "html-linked-style-sheet=s";
2588    push @$rgetopt_names, "nohtml-style-sheets";
2589    push @$rgetopt_names, "html-pre-only";
2590    push @$rgetopt_names, "html-line-numbers";
2591    push @$rgetopt_names, "stylesheet";
2592}
2593
2594sub make_abbreviated_names {
2595
2596    # We're appending things like this to the expansion list:
2597    #      'hcc'    => [qw(html-color-comment)],
2598    #      'hck'    => [qw(html-color-keyword)],
2599    #  etc
2600    my $class = shift;
2601    my ($rexpansion) = @_;
2602
2603    # abbreviations for color/bold/italic properties
2604    while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
2605        ${$rexpansion}{"hc$short_name"}  = ["html-color-$long_name"];
2606        ${$rexpansion}{"hb$short_name"}  = ["html-bold-$long_name"];
2607        ${$rexpansion}{"hi$short_name"}  = ["html-italic-$long_name"];
2608        ${$rexpansion}{"nhb$short_name"} = ["nohtml-bold-$long_name"];
2609        ${$rexpansion}{"nhi$short_name"} = ["nohtml-italic-$long_name"];
2610    }
2611
2612    # abbreviations for all other html options
2613    ${$rexpansion}{"hcbg"} = ["html-color-background"];
2614    ${$rexpansion}{"pre"}  = ["html-pre-only"];
2615    ${$rexpansion}{"nnn"}  = ["html-line-numbers"];
2616    ${$rexpansion}{"css"}  = ["html-linked-style-sheet"];
2617    ${$rexpansion}{"nss"}  = ["nohtml-style-sheets"];
2618    ${$rexpansion}{"ss"}   = ["stylesheet"];
2619}
2620
2621sub check_options {
2622
2623    # This will be called once after options have been parsed
2624    my $class = shift;
2625    $rOpts = shift;
2626
2627    # X11 color names for default settings that seemed to look ok
2628    # (these color names are only used for programming clarity; the hex
2629    # numbers are actually written)
2630    use constant ForestGreen   => "#228B22";
2631    use constant SaddleBrown   => "#8B4513";
2632    use constant IndianRed3    => "#CD5555";
2633    use constant DeepSkyBlue4  => "#00688B";
2634    use constant MediumOrchid3 => "#B452CD";
2635    use constant black         => "#000000";
2636    use constant white         => "#FFFFFF";
2637    use constant red           => "#FF0000";
2638
2639    # set default color, bold, italic properties
2640    # anything not listed here will be given the default (punctuation) color --
2641    # these types currently not listed and get default: ws pu s sc cm co p
2642    # When adding NEW_TOKENS: add an entry here if you don't want defaults
2643
2644    # set_default_properties( $short_name, default_color, bold?, italic? );
2645    set_default_properties( 'c',  ForestGreen,   0, 0 );
2646    set_default_properties( 'pd', ForestGreen,   0, 1 );
2647    set_default_properties( 'k',  SaddleBrown,   1, 0 );
2648    set_default_properties( 'q',  IndianRed3,    0, 0 );
2649    set_default_properties( 'hh', IndianRed3,    0, 1 );
2650    set_default_properties( 'h',  IndianRed3,    1, 0 );
2651    set_default_properties( 'i',  DeepSkyBlue4,  0, 0 );
2652    set_default_properties( 'w',  black,         0, 0 );
2653    set_default_properties( 'n',  MediumOrchid3, 0, 0 );
2654    set_default_properties( 'v',  MediumOrchid3, 0, 0 );
2655    set_default_properties( 'j',  black,         1, 0 );
2656    set_default_properties( 'm',  red,           1, 0 );
2657
2658    set_default_color( 'html-color-background',  white );
2659    set_default_color( 'html-color-punctuation', black );
2660
2661    # setup property lookup tables for tokens based on their short names
2662    # every token type has a short name, and will use these tables
2663    # to do the html markup
2664    while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
2665        $html_color{$short_name}  = $rOpts->{"html-color-$long_name"};
2666        $html_bold{$short_name}   = $rOpts->{"html-bold-$long_name"};
2667        $html_italic{$short_name} = $rOpts->{"html-italic-$long_name"};
2668    }
2669
2670    # write style sheet to STDOUT and die if requested
2671    if ( defined( $rOpts->{'stylesheet'} ) ) {
2672        write_style_sheet_file('-');
2673        exit;
2674    }
2675
2676    # make sure user gives a file name after -css
2677    if ( defined( $rOpts->{'html-linked-style-sheet'} ) ) {
2678        $css_linkname = $rOpts->{'html-linked-style-sheet'};
2679        if ( $css_linkname =~ /^-/ ) {
2680            die "You must specify a valid filename after -css\n";
2681        }
2682    }
2683
2684    # check for conflict
2685    if ( $css_linkname && $rOpts->{'nohtml-style-sheets'} ) {
2686        $rOpts->{'nohtml-style-sheets'} = 0;
2687        warning("You can't specify both -css and -nss; -nss ignored\n");
2688    }
2689
2690    # write a style sheet file if necessary
2691    if ($css_linkname) {
2692
2693        # if the selected filename exists, don't write, because user may
2694        # have done some work by hand to create it; use backup name instead
2695        # Also, this will avoid a potential disaster in which the user
2696        # forgets to specify the style sheet, like this:
2697        #    perltidy -html -css myfile1.pl myfile2.pl
2698        # This would cause myfile1.pl to parsed as the style sheet by GetOpts
2699
2700        my $css_filename = $css_linkname;
2701        if ( -e $css_filename ) {
2702        }
2703        else {
2704
2705            write_style_sheet_file($css_filename);
2706        }
2707    }
2708}
2709
2710sub write_style_sheet_file {
2711
2712    my $css_filename = shift;
2713    my $fh;
2714    unless ( $fh = IO::File->new("> $css_filename") ) {
2715        die "can't open $css_filename: $!\n";
2716    }
2717    write_style_sheet_data($fh);
2718    $fh->close;
2719}
2720
2721sub write_style_sheet_data {
2722
2723    # write the style sheet data to an open file handle
2724    my $fh = shift;
2725
2726    my $bg_color   = $rOpts->{'html-color-background'};
2727    my $text_color = $rOpts->{'html-color-punctuation'};
2728
2729    $fh->print(<<"EOM");
2730/* default style sheet generated by perltidy */
2731body {background: $bg_color; color: $text_color}
2732pre { color: $text_color;
2733      background: $bg_color;
2734      font-family: courier;
2735    }
2736
2737EOM
2738
2739    foreach my $short_name ( sort keys %short_to_long_names ) {
2740        my $long_name = $short_to_long_names{$short_name};
2741
2742        my $abbrev = '.' . $short_name;
2743        if ( length($short_name) == 1 ) { $abbrev .= ' ' }    # for alignment
2744        my $color = $html_color{$short_name};
2745        if ( !defined($color) ) { $color = $text_color }
2746        $fh->print("$abbrev \{ color: $color;");
2747
2748        if ( $html_bold{$short_name} ) {
2749            $fh->print(" font-weight:bold;");
2750        }
2751
2752        if ( $html_italic{$short_name} ) {
2753            $fh->print(" font-style:italic;");
2754        }
2755        $fh->print("} /* $long_name */\n");
2756    }
2757}
2758
2759sub set_default_color {
2760
2761    # make sure that options hash $rOpts->{$key} contains a valid color
2762    my ( $key, $color ) = @_;
2763    if ( $rOpts->{$key} ) { $color = $rOpts->{$key} }
2764    $rOpts->{$key} = check_RGB($color);
2765}
2766
2767sub check_RGB {
2768
2769    # if color is a 6 digit hex RGB value, prepend a #, otherwise
2770    # assume that it is a valid ascii color name
2771    my ($color) = @_;
2772    if ( $color =~ /^[0-9a-fA-F]{6,6}$/ ) { $color = "#$color" }
2773    return $color;
2774}
2775
2776sub set_default_properties {
2777    my ( $short_name, $color, $bold, $italic ) = @_;
2778
2779    set_default_color( "html-color-$short_to_long_names{$short_name}", $color );
2780    my $key;
2781    $key = "html-bold-$short_to_long_names{$short_name}";
2782    $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $bold;
2783    $key = "html-italic-$short_to_long_names{$short_name}";
2784    $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $italic;
2785}
2786
2787sub close_html_file {
2788    my $self = shift;
2789    return unless $self->{_html_file_opened};
2790    my $html_fh = $self->{_html_fh};
2791    $html_fh->print( <<"PRE_END");
2792</PRE>
2793PRE_END
2794    unless ( $rOpts->{'html-pre-only'} ) {
2795        $html_fh->print( <<"HTML_END");
2796</BODY>
2797</HTML>
2798HTML_END
2799    }
2800    $html_fh->close();
2801}
2802
2803sub markup_tokens {
2804    my $self = shift;
2805    my ( $rtokens, $rtoken_type ) = @_;
2806    my ( @colored_tokens, $j, $string, $type, $token );
2807
2808    for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {
2809        $type  = $$rtoken_type[$j];
2810        $token = $$rtokens[$j];
2811
2812        #-------------------------------------------------------
2813        # Patch : intercept a sub name here and split it
2814        # into keyword 'sub' and sub name
2815        if ( $type eq 'i' && $token =~ /^(sub\s+)(\w.*)$/ ) {
2816            $token = $self->markup_html_element( $1, 'k' );
2817            push @colored_tokens, $token;
2818            $token = $2;
2819            $type  = 'M';
2820        }
2821
2822        # Patch : intercept a package name here and split it
2823        # into keyword 'package' and name
2824        if ( $type eq 'i' && $token =~ /^(package\s+)(\w.*)$/ ) {
2825            $token = $self->markup_html_element( $1, 'k' );
2826            push @colored_tokens, $token;
2827            $token = $2;
2828            $type  = 'i';
2829        }
2830
2831        #-------------------------------------------------------
2832
2833        $token = $self->markup_html_element( $token, $type );
2834        push @colored_tokens, $token;
2835    }
2836    return \@colored_tokens;
2837}
2838
2839sub markup_html_element {
2840    my $self = shift;
2841    my ( $token, $type ) = @_;
2842
2843    return $token if ( $type eq 'b' );    # skip a blank
2844    return $token if ( $token =~ /^\s*$/ );
2845
2846    if ($missing_html_entities) {
2847        $token =~ s/\&/&amp;/g;
2848        $token =~ s/\</&lt;/g;
2849        $token =~ s/\>/&gt;/g;
2850        $token =~ s/\"/&quot;/g;
2851    }
2852    else {
2853        encode_entities($token);
2854    }
2855
2856    # get the short abbreviation for this token type
2857    my $short_name = $token_short_names{$type};
2858    if ( !defined($short_name) ) {
2859        $short_name = "pu";    # punctuation is default
2860    }
2861
2862    # handle style sheets..
2863    if ( !$rOpts->{'nohtml-style-sheets'} ) {
2864        if ( $short_name ne 'pu' ) {
2865            $token = qq(<SPAN CLASS="$short_name">) . $token . "</SPAN>";
2866        }
2867    }
2868
2869    # handle no style sheets..
2870    else {
2871        my $color = $html_color{$short_name};
2872
2873        if ( $color && ( $color ne $rOpts->{'html-color-punctuation'} ) ) {
2874            $token = qq(<FONT COLOR="$color">) . $token . "</FONT>";
2875        }
2876        if ( $html_italic{$short_name} ) { $token = "<I>$token</I>" }
2877        if ( $html_bold{$short_name} )   { $token = "<B>$token</B>" }
2878    }
2879    return $token;
2880}
2881
2882sub finish_formatting {
2883
2884    # called after last line
2885    my $self = shift;
2886    $self->close_html_file();
2887    return;
2888}
2889
2890sub write_line {
2891
2892    my $self = shift;
2893    return unless $self->{_html_file_opened};
2894    my $html_fh = $self->{_html_fh};
2895    my ($line_of_tokens) = @_;
2896    my $line_type   = $line_of_tokens->{_line_type};
2897    my $input_line  = $line_of_tokens->{_line_text};
2898    my $line_number = $line_of_tokens->{_line_number};
2899    chomp $input_line;
2900
2901    # markup line of code..
2902    my $html_line;
2903    if ( $line_type eq 'CODE' ) {
2904        my $rtoken_type = $line_of_tokens->{_rtoken_type};
2905        my $rtokens     = $line_of_tokens->{_rtokens};
2906
2907        if ( $input_line =~ /(^\s*)/ ) {
2908            $html_line = $1;
2909        }
2910        else {
2911            $html_line = "";
2912        }
2913        my $rcolored_tokens = $self->markup_tokens( $rtokens, $rtoken_type );
2914        $html_line .= join '', @$rcolored_tokens;
2915    }
2916
2917    # markup line of non-code..
2918    else {
2919        my $line_character;
2920        if    ( $line_type eq 'HERE' )       { $line_character = 'H' }
2921        elsif ( $line_type eq 'HERE_END' )   { $line_character = 'h' }
2922        elsif ( $line_type eq 'FORMAT' )     { $line_character = 'H' }
2923        elsif ( $line_type eq 'FORMAT_END' ) { $line_character = 'h' }
2924        elsif ( $line_type =~ /^POD/ ) { $line_character = 'P' }
2925        elsif ( $line_type eq 'SYSTEM' )     { $line_character = 'c' }
2926        elsif ( $line_type eq 'END_START' )  { $line_character = 'k' }
2927        elsif ( $line_type eq 'DATA_START' ) { $line_character = 'k' }
2928        else { $line_character = 'Q' }
2929        $html_line = $self->markup_html_element( $input_line, $line_character );
2930    }
2931
2932    # add the line number if requested
2933    if ( $rOpts->{'html-line-numbers'} ) {
2934        my $extra_space .= ( $line_number < 10 ) ? "   "
2935          : ( $line_number < 100 )  ? "  "
2936          : ( $line_number < 1000 ) ? " "
2937          : "";
2938        $html_line = $extra_space . $line_number . " " . $html_line;
2939    }
2940
2941    # write the line
2942    $html_fh->print("$html_line\n");
2943}
2944
2945#####################################################################
2946#
2947# The PerlTidy::Formatter package adds indentation, whitespace, and line breaks
2948# to the token stream
2949#
2950# WARNING: This is not a real class yet.  Only one Formatter my be used.
2951#
2952#####################################################################
2953
2954package PerlTidy::Formatter;
2955
2956BEGIN {
2957
2958    # Caution: these debug flags produce a lot of output
2959    # They should all be 0 except when debugging small scripts
2960    use constant FORMATTER_DEBUG_FLAG_BOND    => 0;
2961    use constant FORMATTER_DEBUG_FLAG_BREAK   => 0;
2962    use constant FORMATTER_DEBUG_FLAG_CI      => 0;
2963    use constant FORMATTER_DEBUG_FLAG_EQUALS  => 0;
2964    use constant FORMATTER_DEBUG_FLAG_FLUSH   => 0;
2965    use constant FORMATTER_DEBUG_FLAG_FORCE   => 0;
2966    use constant FORMATTER_DEBUG_FLAG_LIST    => 0;
2967    use constant FORMATTER_DEBUG_FLAG_NOBREAK => 0;
2968    use constant FORMATTER_DEBUG_FLAG_OUTPUT  => 0;
2969    use constant FORMATTER_DEBUG_FLAG_SPARSE  => 0;
2970    use constant FORMATTER_DEBUG_FLAG_STORE   => 0;
2971    use constant FORMATTER_DEBUG_FLAG_UNDOBP  => 0;
2972    use constant FORMATTER_DEBUG_FLAG_WHITE   => 0;
2973
2974    my $debug_warning = sub {
2975        print "FORMATTER_DEBUGGING with key $_[0]\n";
2976    };
2977
2978    FORMATTER_DEBUG_FLAG_BOND    && $debug_warning->('BOND');
2979    FORMATTER_DEBUG_FLAG_BREAK   && $debug_warning->('BREAK');
2980    FORMATTER_DEBUG_FLAG_CI      && $debug_warning->('CI');
2981    FORMATTER_DEBUG_FLAG_EQUALS  && $debug_warning->('EQUALS');
2982    FORMATTER_DEBUG_FLAG_FLUSH   && $debug_warning->('FLUSH');
2983    FORMATTER_DEBUG_FLAG_FORCE   && $debug_warning->('FORCE');
2984    FORMATTER_DEBUG_FLAG_LIST    && $debug_warning->('LIST');
2985    FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK');
2986    FORMATTER_DEBUG_FLAG_OUTPUT  && $debug_warning->('OUTPUT');
2987    FORMATTER_DEBUG_FLAG_SPARSE  && $debug_warning->('SPARSE');
2988    FORMATTER_DEBUG_FLAG_STORE   && $debug_warning->('STORE');
2989    FORMATTER_DEBUG_FLAG_UNDOBP  && $debug_warning->('UNDOBP');
2990    FORMATTER_DEBUG_FLAG_WHITE   && $debug_warning->('WHITE');
2991}
2992
2993use Carp;
2994use vars qw{
2995
2996  @gnu_stack
2997  $max_gnu_stack_index
2998  $gnu_position_predictor
2999  $line_start_index_to_go
3000  $last_indentation_written
3001  $last_unadjusted_indentation
3002
3003  $saw_VERSION_in_this_file
3004
3005  @gnu_item_list
3006  $max_gnu_item_index
3007  $gnu_sequence_number
3008  $last_output_indentation
3009
3010  @block_type_to_go
3011  @type_sequence_to_go
3012  @container_environment_to_go
3013  @bond_strength_to_go
3014  @forced_breakpoint_to_go
3015  @lengths_to_go
3016  @levels_to_go
3017  @leading_spaces_to_go
3018  @reduced_spaces_to_go
3019  @matching_token_to_go
3020  @mate_index_to_go
3021  @nesting_blocks_to_go
3022  @ci_levels_to_go
3023  @nesting_depth_to_go
3024  @nobreak_to_go
3025  @old_breakpoint_to_go
3026  @tokens_to_go
3027  @types_to_go
3028
3029  %saved_opening_indentation
3030
3031  $max_index_to_go
3032  $old_line_count_in_batch
3033  $last_nonblank_index_to_go
3034  $last_nonblank_type_to_go
3035  $last_nonblank_token_to_go
3036  $last_last_nonblank_index_to_go
3037  $last_last_nonblank_type_to_go
3038  $last_last_nonblank_token_to_go
3039  @nonblank_lines_at_depth
3040
3041  $forced_breakpoint_count
3042  $forced_breakpoint_undo_count
3043  @forced_breakpoint_undo_stack
3044  %postponed_breakpoint
3045
3046  $tabbing
3047  $tabstr
3048  $embedded_tab_count
3049  $first_embedded_tab_at
3050  $last_embedded_tab_at
3051  $deleted_semicolon_count
3052  $first_deleted_semicolon_at
3053  $last_deleted_semicolon_at
3054  $added_semicolon_count
3055  $first_added_semicolon_at
3056  $last_added_semicolon_at
3057  $saw_negative_indentation
3058  $first_tabbing_disagreement
3059  $last_tabbing_disagreement
3060  $in_tabbing_disagreement
3061  $tabbing_disagreement_count
3062  $input_line_tabbing
3063
3064  $last_line_leading_type
3065  $last_line_leading_level
3066  $last_last_line_leading_level
3067
3068  %block_leading_text
3069  %block_opening_line_number
3070  $csc_new_statement_ok
3071  $accumulating_text_for_block
3072  $leading_block_text
3073  $leading_block_text_level
3074  $leading_block_text_length_exceeded
3075  $leading_block_text_line_number
3076  $closing_side_comment_prefix_pattern
3077  $closing_side_comment_list_pattern
3078
3079  $last_nonblank_token
3080  $last_nonblank_type
3081  $last_last_nonblank_token
3082  $last_last_nonblank_type
3083  $last_nonblank_block_type
3084  $last_output_level
3085  $do_follower_pattern
3086  $if_brace_follower_pattern
3087  %space_before_paren
3088  $brace_follower_pattern
3089  $looking_for_else
3090  $other_brace_follower_pattern
3091  $else_brace_follower_pattern
3092  $anon_sub_brace_follower_pattern
3093  $anon_sub_1_brace_follower_pattern
3094
3095  @has_broken_sublist
3096  @dont_align
3097  @want_comma_break
3098
3099  $index_start_one_line_block
3100  $semicolons_before_block_self_destruct
3101  $index_max_forced_break
3102  $input_line_number
3103  $diagnostics_object
3104  $vertical_aligner_object
3105  $logger_object
3106  $file_writer_object
3107  $formatter_self
3108  @ci_stack
3109  $last_line_had_side_comment
3110  %want_break_before
3111  %outdent_keyword
3112  $static_block_comment_pattern
3113  $static_side_comment_pattern
3114
3115  $rOpts_add_whitespace
3116  $rOpts_continuation_indentation
3117  $rOpts_cuddled_else
3118  $rOpts_delete_old_whitespace
3119  $rOpts_fuzzy_line_length
3120  $rOpts_indent_columns
3121  $rOpts_line_up_parentheses
3122  $rOpts_maximum_line_length
3123  $rOpts_brace_left_and_indent
3124
3125  %is_vertical_alignment_type
3126  %tightness
3127  %matching_token
3128  $rOpts
3129  %right_bond_strength
3130  %left_bond_strength
3131  %binary_ws_rules
3132  %want_left_space
3133  %want_right_space
3134  %is_digraph
3135  %is_trigraph
3136  $bli_pattern
3137};
3138
3139sub make_regex {
3140
3141    # Given a string, make the corresponding regex with qr.
3142    # Versions of perl before 5.005 do not have qr,
3143    # so we will just return the string, which will work
3144    # but not be optimized.
3145    BEGIN {
3146        if ( $] < 5.005 ) {
3147            sub qr { $_[0] }
3148        }
3149    }
3150    qr($_[0]);
3151}
3152
3153BEGIN {
3154
3155    # block types for which -bli is active
3156    $bli_pattern = '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
3157    my @digraphs = qw(
3158      .. :: << >> ** && .. ||  -> => += -= .= %= &= |= ^= *= <>
3159      <= >= == =~ !~ != ++ -- /= x=
3160    );
3161    @is_digraph{@digraphs} = (1) x scalar(@digraphs);
3162
3163    my @trigraphs = qw( ... **= <<= >>= &&= ||= <=> );
3164    @is_trigraph{@trigraphs} = (1) x scalar(@trigraphs);
3165
3166}
3167
3168# whitespace codes
3169use constant WS_YES      => 1;
3170use constant WS_OPTIONAL => 0;
3171use constant WS_NO       => -1;
3172
3173# Token bond strengths.
3174use constant NO_BREAK    => 10000;
3175use constant VERY_STRONG => 100;
3176use constant STRONG      => 2.1;
3177use constant NOMINAL     => 1.1;
3178use constant WEAK        => 0.8;
3179use constant VERY_WEAK   => 0.55;
3180
3181# values for testing indexes in output array
3182use constant UNDEFINED_INDEX => -1;
3183
3184# Maximum number of little messages; probably need not be changed.
3185use constant MAX_NAG_MESSAGES => 6;
3186
3187# increment between sequence numbers for each type
3188# For example, ?: pairs might have numbers 7,11,15,...
3189use constant TYPE_SEQUENCE_INCREMENT => 4;
3190
3191{
3192
3193    # methods to count instances
3194    my $_count = 0;
3195    sub get_count        { $_count; }
3196    sub _increment_count { ++$_count }
3197    sub _decrement_count { --$_count }
3198}
3199
3200# interface to PerlTidy::Logger routines
3201sub warning {
3202    if ($logger_object) {
3203        $logger_object->warning(@_);
3204    }
3205}
3206
3207sub complain {
3208    if ($logger_object) {
3209        $logger_object->complain(@_);
3210    }
3211}
3212
3213sub write_logfile_entry {
3214    if ($logger_object) {
3215        $logger_object->write_logfile_entry(@_);
3216    }
3217}
3218
3219sub black_box {
3220    if ($logger_object) {
3221        $logger_object->black_box(@_);
3222    }
3223}
3224
3225sub report_definite_bug {
3226    if ($logger_object) {
3227        $logger_object->report_definite_bug();
3228    }
3229}
3230
3231sub get_saw_brace_error {
3232    if ($logger_object) {
3233        $logger_object->get_saw_brace_error();
3234    }
3235}
3236
3237sub we_are_at_the_last_line {
3238    if ($logger_object) {
3239        $logger_object->we_are_at_the_last_line();
3240    }
3241}
3242
3243# interface to PerlTidy::Diagnostics routine
3244sub write_diagnostics {
3245
3246    if ($diagnostics_object) {
3247        $diagnostics_object->write_diagnostics(@_);
3248    }
3249}
3250
3251sub get_added_semicolon_count {
3252    my $self = shift;
3253    return $added_semicolon_count;
3254}
3255
3256sub DESTROY {
3257    $_[0]->_decrement_count();
3258}
3259
3260sub new {
3261
3262    my $class = shift;
3263
3264    # we are given an object with a write_line() method to take lines
3265    my %defaults = (
3266        sink_object        => undef,
3267        diagnostics_object => undef,
3268        logger_object      => undef,
3269    );
3270    my %args = ( %defaults, @_ );
3271
3272    $logger_object      = $args{logger_object};
3273    $diagnostics_object = $args{diagnostics_object};
3274
3275    # FIXME: we create another object with a get_line() and peek_ahead() method
3276    my $sink_object = $args{sink_object};
3277    $file_writer_object =
3278      PerlTidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
3279
3280    # initialize the leading whitespace stack to negative levels
3281    # so that we can never run off the end of the stack
3282    $gnu_position_predictor = 0;    # where the current token is predicted to be
3283    $max_gnu_stack_index    = 0;
3284    $max_gnu_item_index     = -1;
3285    $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 );
3286    @gnu_item_list               = ();
3287    $gnu_sequence_number         = 0;    # this will count the output batches
3288    $last_output_indentation     = 0;
3289    $last_indentation_written    = 0;
3290    $last_unadjusted_indentation = 0;
3291
3292    $saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'};
3293
3294    @block_type_to_go            = ();
3295    @type_sequence_to_go         = ();
3296    @container_environment_to_go = ();
3297    @bond_strength_to_go         = ();
3298    @forced_breakpoint_to_go     = ();
3299    @lengths_to_go               = ();    # line length to start of ith token
3300    @levels_to_go                = ();
3301    @matching_token_to_go        = ();
3302    @mate_index_to_go            = ();
3303    @nesting_blocks_to_go        = ();
3304    @ci_levels_to_go             = ();
3305    @nesting_depth_to_go         = (0);
3306    @nobreak_to_go               = ();
3307    @old_breakpoint_to_go        = ();
3308    @tokens_to_go                = ();
3309    @types_to_go                 = ();
3310    @leading_spaces_to_go        = ();
3311    @reduced_spaces_to_go        = ();
3312
3313    @dont_align         = ();
3314    @has_broken_sublist = ();
3315    @want_comma_break   = ();
3316
3317    @ci_stack                   = ("");
3318    $saw_negative_indentation   = 0;
3319    $first_tabbing_disagreement = 0;
3320    $last_tabbing_disagreement  = 0;
3321    $tabbing_disagreement_count = 0;
3322    $in_tabbing_disagreement    = 0;
3323    $input_line_tabbing         = undef;
3324
3325    $last_last_line_leading_level = 0;
3326    $last_line_leading_level      = 0;
3327    $last_line_leading_type       = '#';
3328
3329    $last_nonblank_token        = ';';
3330    $last_nonblank_type         = ';';
3331    $last_last_nonblank_token   = ';';
3332    $last_last_nonblank_type    = ';';
3333    $last_nonblank_block_type   = "";
3334    $last_output_level          = 0;
3335    $looking_for_else           = 0;
3336    $embedded_tab_count         = 0;
3337    $first_embedded_tab_at      = 0;
3338    $last_embedded_tab_at       = 0;
3339    $deleted_semicolon_count    = 0;
3340    $first_deleted_semicolon_at = 0;
3341    $last_deleted_semicolon_at  = 0;
3342    $added_semicolon_count      = 0;
3343    $first_added_semicolon_at   = 0;
3344    $last_added_semicolon_at    = 0;
3345    $last_line_had_side_comment = 0;
3346    %postponed_breakpoint       = ();
3347
3348    # variables for adding side comments
3349    %block_leading_text        = ();
3350    %block_opening_line_number = ();
3351    $csc_new_statement_ok      = 1;
3352
3353    %saved_opening_indentation = ();
3354
3355    reset_block_text_accumulator();
3356
3357    prepare_for_new_input_lines();
3358
3359    $vertical_aligner_object =
3360      PerlTidy::VerticalAligner->initialize( $rOpts, $file_writer_object,
3361        $logger_object, $diagnostics_object );
3362
3363    if ( $rOpts->{'tabs'} ) {
3364        write_logfile_entry("Indentation will be with a tab character\n");
3365    }
3366    else {
3367        write_logfile_entry(
3368            "Indentation will be with $rOpts->{'indent-columns'} spaces\n");
3369    }
3370
3371    # This is the start of a formatter referent.
3372    # I'll populate it someday when I figure out an easy, automated
3373    # way.
3374    $formatter_self = {};
3375
3376    bless $formatter_self, $class;
3377
3378    # Safety check..this is not a class yet
3379    if ( _increment_count() > 1 ) {
3380        confess
3381"Attempt to create more than 1 object in $class, which is not a true class yet\n";
3382    }
3383
3384    return $formatter_self;
3385
3386}
3387
3388sub prepare_for_new_input_lines {
3389
3390    $gnu_sequence_number++;    # increment output batch counter
3391    $line_start_index_to_go         = 0;
3392    $max_gnu_item_index             = UNDEFINED_INDEX;
3393    $index_max_forced_break         = UNDEFINED_INDEX;
3394    $max_index_to_go                = UNDEFINED_INDEX;
3395    $last_nonblank_index_to_go      = UNDEFINED_INDEX;
3396    $last_nonblank_type_to_go       = '';
3397    $last_nonblank_token_to_go      = '';
3398    $last_last_nonblank_index_to_go = UNDEFINED_INDEX;
3399    $last_last_nonblank_type_to_go  = '';
3400    $last_last_nonblank_token_to_go = '';
3401    $forced_breakpoint_count        = 0;
3402    $forced_breakpoint_undo_count   = 0;
3403    $brace_follower_pattern         = undef;
3404    $lengths_to_go[0] = 0;
3405    $old_line_count_in_batch = 1;
3406
3407    destroy_one_line_block();
3408}
3409
3410sub write_line {
3411
3412    my $self = shift;
3413    my ($line_of_tokens) = @_;
3414
3415    my $line_type            = $line_of_tokens->{_line_type};
3416    my $input_line           = $line_of_tokens->{_line_text};
3417    my $want_blank_line_next = 0;
3418
3419    # handle line of code..
3420    if ( $line_type eq 'CODE' ) {
3421
3422        # let logger see all non-blank lines of code
3423        if ( $input_line !~ /^\s*$/ ) {
3424            my $output_line_number =
3425              $vertical_aligner_object->get_output_line_number();
3426            black_box( $line_of_tokens, $output_line_number );
3427        }
3428        print_line_of_tokens($line_of_tokens);
3429    }
3430
3431    # handle line of non-code..
3432    else {
3433
3434        # set special flags
3435        my $skip_line = 0;
3436        my $tee_line  = 0;
3437        if ( $line_type =~ /^POD/ ) {
3438
3439            # pod docs should have a preceding blank line
3440            if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
3441            if ( $rOpts->{'tee-pod'} )    { $tee_line  = 1; }
3442            if ( !$skip_line && $line_type eq 'POD_START' ) {
3443                want_blank_line();
3444            }
3445
3446            # patch to put a blank line after =cut
3447            # (required by podchecker)
3448            if ( $line_type eq 'POD_END' ) {
3449                $file_writer_object->reset_consecutive_blank_lines();
3450                $want_blank_line_next = 1;
3451            }
3452        }
3453
3454        # write unindented non-code line
3455        if ( !$skip_line ) {
3456            if ($tee_line) { $file_writer_object->tee_on() }
3457            write_unindented_line($input_line);
3458            if ($tee_line)             { $file_writer_object->tee_off() }
3459            if ($want_blank_line_next) { want_blank_line(); }
3460        }
3461    }
3462}
3463
3464sub create_one_line_block {
3465    $index_start_one_line_block            = $_[0];
3466    $semicolons_before_block_self_destruct = $_[1];
3467}
3468
3469sub destroy_one_line_block {
3470    $index_start_one_line_block            = UNDEFINED_INDEX;
3471    $semicolons_before_block_self_destruct = 0;
3472}
3473
3474sub leading_spaces_to_go {
3475
3476    # return the number of indentation spaces for a token in the output stream;
3477    # these were previously stored by 'set_leading_whitespace'.
3478
3479    return get_SPACES( $leading_spaces_to_go[ $_[0] ] );
3480
3481}
3482
3483sub get_SPACES {
3484
3485    # return the number of leading spaces associated with an indentation
3486    # variable $indentation is either a constant number of spaces or an object
3487    # with a get_SPACES method.
3488    my $indentation = shift;
3489    return ref($indentation) ? $indentation->get_SPACES() : $indentation;
3490}
3491
3492sub get_AVAILABLE_SPACES_to_go {
3493
3494    my $item = $leading_spaces_to_go[ $_[0] ];
3495
3496    # return the number of available leading spaces associated with an
3497    # indentation variable.  $indentation is either a constant number of
3498    # spaces or an object with a get_AVAILABLE_SPACES method.
3499    return ref($item) ? $item->get_AVAILABLE_SPACES() : 0;
3500}
3501
3502sub new_lp_indentation_item {
3503
3504    # this is an interface to the IndentationItem class
3505    my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_;
3506
3507    # A negative level implies not to store the item in the item_list
3508    my $index = 0;
3509    if ( $level >= 0 ) { $index = ++$max_gnu_item_index; }
3510
3511    my $item = PerlTidy::IndentationItem->new(
3512        $spaces,      $level,
3513        $ci_level,    $available_spaces,
3514        $index,       $gnu_sequence_number,
3515        $align_paren, $max_gnu_stack_index,
3516        $line_start_index_to_go,
3517    );
3518
3519    if ( $level >= 0 ) {
3520        $gnu_item_list[$max_gnu_item_index] = $item;
3521    }
3522
3523    return $item;
3524}
3525
3526sub set_leading_whitespace {
3527
3528    # This routine defines leading whitespace
3529    # given: the level and continuation_level of a token,
3530    # define: space count of leading string which would apply if it
3531    # were the first token of a new line.
3532
3533    my ( $level, $ci_level, $in_continued_quote ) = @_;
3534
3535    # patch for -bli, which adds one continuation indentation for
3536    # opening braces
3537    if ( $rOpts_brace_left_and_indent
3538        && $max_index_to_go == 0
3539        && $block_type_to_go[$max_index_to_go] =~ /$bli_pattern/o )
3540    {
3541        $ci_level++;
3542    }
3543
3544    # patch to avoid trouble when input file has negative indentation.
3545    # other logic should catch this error.
3546    if ( $level < 0 ) { $level = 0 }
3547
3548    #-------------------------------------------
3549    # handle the standard indentation scheme
3550    #-------------------------------------------
3551    unless ($rOpts_line_up_parentheses) {
3552        my $space_count = $ci_level * $rOpts_continuation_indentation + $level *
3553          $rOpts_indent_columns;
3554        my $ci_spaces =
3555          ( $ci_level == 0 ) ? 0 : $rOpts_continuation_indentation;
3556
3557        if ($in_continued_quote) {
3558            $space_count = 0;
3559            $ci_spaces   = 0;
3560        }
3561        $leading_spaces_to_go[$max_index_to_go] = $space_count;
3562        $reduced_spaces_to_go[$max_index_to_go] = $space_count - $ci_spaces;
3563        return;
3564    }
3565
3566    #-------------------------------------------------------------
3567    # handle case of -lp indentation..
3568    #-------------------------------------------------------------
3569
3570    # The continued_quote flag means that this is the first token of a
3571    # line, and it is the continuation of some kind of multi-line quote
3572    # or pattern.  It requires special treatment because it must have no
3573    # added leading whitespace. So we create a special indentation item
3574    # which is not in the stack.
3575    if ($in_continued_quote) {
3576        my $space_count     = 0;
3577        my $available_space = 0;
3578        $level = -1;    # flag to prevent storing in item_list
3579        $leading_spaces_to_go[$max_index_to_go]   =
3580          $reduced_spaces_to_go[$max_index_to_go] =
3581          new_lp_indentation_item( $space_count, $level, $ci_level,
3582            $available_space, 0 );
3583        return;
3584    }
3585
3586    # get the top state from the stack
3587    my $space_count      = $gnu_stack[$max_gnu_stack_index]->get_SPACES();
3588    my $current_level    = $gnu_stack[$max_gnu_stack_index]->get_LEVEL();
3589    my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL();
3590
3591    my $type  = $types_to_go[$max_index_to_go];
3592    my $token = $tokens_to_go[$max_index_to_go];
3593
3594    # If we come to an opening paren far to the right of the '=', then
3595    # backup starting index to the '=' to avoid going beyond the right margin
3596    #if ( $token eq '(' ) {
3597    if ( $type =~ /^[\{\(\[]$/ ) {
3598        my $last_equals = $gnu_stack[$max_gnu_stack_index]->get_LAST_EQUALS();
3599
3600        # if we have an '=' after the previous assumed breakpoint
3601        if ( $last_equals > $line_start_index_to_go ) {
3602
3603            # and we are far to the right
3604            if ( $gnu_position_predictor > $rOpts_maximum_line_length / 2 ) {
3605
3606                # ok, make the switch -- note that we do not set a real
3607                # breakpoint here because we may not really need one; sub
3608                # scan_list will do that if necessary
3609                $line_start_index_to_go = $last_equals;
3610                if ( $types_to_go[ $line_start_index_to_go + 1 ] eq 'b' ) {
3611                    $line_start_index_to_go++;
3612                }
3613
3614                # and update the position predictor
3615                $gnu_position_predictor =
3616                  total_line_length( $line_start_index_to_go,
3617                    $max_index_to_go - 1 );
3618            }
3619        }
3620    }
3621
3622    # Check for decreasing depth ..
3623    # Note that one token may have both decreasing and then increasing
3624    # depth. For example, (level, ci) can go from (1,1) to (2,0).  So,
3625    # in this example we would first go back to (1,0) then up to (2,0)
3626    # in a single call.
3627    if ( $level < $current_level || $ci_level < $current_ci_level ) {
3628
3629        # loop to find the first entry at or completely below this level
3630        my ( $lev, $ci_lev );
3631        while (1) {
3632            if ($max_gnu_stack_index) {
3633
3634                # save index of token which closes this level
3635                $gnu_stack[$max_gnu_stack_index]->set_CLOSED($max_index_to_go);
3636
3637                # Undo any extra indentation if we saw no commas
3638                my $available_spaces =
3639                  $gnu_stack[$max_gnu_stack_index]->get_AVAILABLE_SPACES();
3640                my $comma_count =
3641                  $gnu_stack[$max_gnu_stack_index]->get_COMMA_COUNT();
3642
3643                if ( $comma_count <= 0 && $available_spaces > 0 ) {
3644
3645                    my $i     = $gnu_stack[$max_gnu_stack_index]->get_INDEX();
3646                    my $seqno =
3647                      $gnu_stack[$max_gnu_stack_index]->get_SEQUENCE_NUMBER();
3648
3649                    # Be sure this item was created in this batch.  This
3650                    # should be true because we delete any available
3651                    # space from open items at the end of each batch.
3652                    if ( $gnu_sequence_number != $seqno
3653                        || $i > $max_gnu_item_index )
3654                    {
3655                        warning(
3656"Program bug with -lp.  seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n"
3657                        );
3658                        report_definite_bug();
3659                    }
3660
3661                    else {
3662                        $gnu_item_list[$i]
3663                          ->permanently_decrease_AVAILABLE_SPACES(
3664                            $available_spaces);
3665
3666                        my $j;
3667                        for ( $j = $i + 1 ; $j <= $max_gnu_item_index ; $j++ ) {
3668                            $gnu_item_list[$j]
3669                              ->decrease_SPACES($available_spaces);
3670                        }
3671                    }
3672                }
3673
3674                # go down one level
3675                --$max_gnu_stack_index;
3676                $lev    = $gnu_stack[$max_gnu_stack_index]->get_LEVEL();
3677                $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL();
3678
3679                # stop when we reach a level at or below the current level
3680                if ( $lev <= $level && $ci_lev <= $ci_level ) {
3681                    $space_count =
3682                      $gnu_stack[$max_gnu_stack_index]->get_SPACES();
3683                    $current_level    = $lev;
3684                    $current_ci_level = $ci_lev;
3685                    last;
3686                }
3687            }
3688
3689            # reached bottom of stack .. should never happen because
3690            # only negative levels can get here, and $level was forced
3691            # to be positive above.
3692            else {
3693                warning(
3694"program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n"
3695                );
3696                report_definite_bug();
3697                last;
3698            }
3699        }
3700    }
3701
3702    # handle increasing depth
3703    if ( $level > $current_level || $ci_level > $current_ci_level ) {
3704
3705        # Compute the standard incremental whitespace.  This will be
3706        # the minimum incremental whitespace that will be used.  This
3707        # choice results in a smooth transition between the gnu-style
3708        # and the standard style.
3709        my $standard_increment =
3710          ( $level - $current_level ) * $rOpts_indent_columns +
3711          ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation;
3712
3713        # Now we have to define how much extra incremental space
3714        # ("$available_space") we want.  This extra space will be
3715        # reduced as necessary when long lines are encountered or when
3716        # it becomes clear that we do not have a good list.
3717        my $available_space = 0;
3718        my $align_paren     = 0;
3719        my $excess          = 0;
3720
3721        # initialization on empty stack..
3722        if ( $max_gnu_stack_index == 0 ) {
3723            $space_count = $level * $rOpts_indent_columns;
3724        }
3725
3726        # if this is a BLOCK, add the standard increment
3727        elsif ($last_nonblank_block_type) {
3728            $space_count += $standard_increment;
3729        }
3730
3731        # if last nonblank token was not structural indentation,
3732        # just use standard increment
3733        elsif ( $last_nonblank_type ne '{' ) {
3734            $space_count += $standard_increment;
3735        }
3736
3737        # otherwise use the space to the first non-blank level change token
3738        else {
3739
3740            ( $space_count, $available_space, $excess ) =
3741              get_gnu_indentation( $standard_increment,
3742                $gnu_position_predictor );
3743            $align_paren = 1;
3744        }
3745
3746        # update state, but not on a blank token
3747        if ( $types_to_go[$max_index_to_go] ne 'b' ) {
3748
3749            $gnu_stack[$max_gnu_stack_index]->set_HAVE_CHILD(1);
3750
3751            ++$max_gnu_stack_index;
3752            $gnu_stack[$max_gnu_stack_index] =
3753              new_lp_indentation_item( $space_count, $level, $ci_level,
3754                $available_space, $align_paren );
3755
3756            #TESTING-LP
3757            ##################################################################
3758            #$gnu_stack[$max_gnu_stack_index]->set_RECOVERABLE_SPACES($excess);
3759            ##################################################################
3760        }
3761    }
3762
3763    # Count commas and look for non-list characters.  Once we see a
3764    # non-list character, we give up and don't look for any more commas.
3765    if ( $type eq ',' ) {
3766
3767        my $comma_count = $gnu_stack[$max_gnu_stack_index]->get_COMMA_COUNT();
3768        if ( $comma_count >= 0 ) {
3769            $gnu_stack[$max_gnu_stack_index]->set_COMMA_COUNT( ++$comma_count );
3770        }
3771    }
3772    elsif ( $type eq '=' ) {
3773        $gnu_stack[$max_gnu_stack_index]->set_LAST_EQUALS($max_index_to_go);
3774    }
3775
3776=pod
3777
3778    # Filter out non-lists.  Note that '=' in this filter could be in
3779    # any of the = operators.  Actually, we might be in a list if we see
3780    # ? and : following a comma or comma arrow,  but it probably won't
3781    # format well any way with the extra gnu spaces, because it will
3782    # probably be a long line that breaks after the ':', so it isn't
3783    # worth worrying about.  Example:
3784
3785      my $ftp = $pkg->SUPER::new(
3786          Timeout  => defined $arg{Timeout} ? $arg{Timeout} : 120,
3787          PeerAddr => $peer,
3788      );
3789
3790    NEEDS FURTHER EVALUATION
3791    # BUB: TESTING: commented out: needs more evaluation
3792    #elsif ( $type =~ /(^[\?\:\;\<\>\~]$)|[=]/ && $type !~ /^=>$/ ) {
3793    #    $gnu_stack[$max_gnu_stack_index]->set_COMMA_COUNT(-1);
3794    #}
3795
3796=cut
3797
3798    # this token might start a new line
3799    # if this is a non-blank..
3800    if ( $type ne 'b' ) {
3801
3802        # '.' ':', '?' '||' '&&' added (mci.t)
3803        #if ( $last_nonblank_type =~ /^([\,\{])$/
3804
3805        # and if ..
3806        if (
3807
3808            # previous character was one of these:
3809            $last_nonblank_type =~ /^([\:\?\,])$/
3810
3811            # previous character was opening and this does not close it
3812            || ( $last_nonblank_type eq '{' && $type ne '}' )
3813
3814            # or this token is one of these:
3815            || $type =~ /^([\.]|\|\||\&\&)$/
3816
3817            # or this is a closing structure
3818            || ( $type eq '}' && $token eq $type )
3819
3820            # or previous token was a keyword
3821            || (
3822                $last_nonblank_type eq 'k'
3823
3824                # in this set:
3825                && (
3826                    $last_nonblank_token =~
3827                    /^(if|unless|and|or|last|next|redo|return)$/
3828
3829                    && $type ne '{'
3830                )
3831            )
3832
3833            # or this is after an assignment after a closing structure
3834            || (
3835                $last_nonblank_type =~ /=/
3836                && $last_nonblank_type !~ /(==|!=|>=|<=|=~|=>)/
3837                && (
3838                    $last_last_nonblank_type =~ /^[\}\)\]]$/
3839
3840                    # and it is significantly to the right
3841                    || $gnu_position_predictor > $rOpts_maximum_line_length / 2
3842                )
3843            )
3844          )
3845        {
3846            check_for_long_gnu_style_lines();
3847            $line_start_index_to_go = $max_index_to_go;
3848
3849            # back up 1 token if we want to break before that type
3850            # otherwise, we may strand tokens like '?' or ':' on a line
3851            if ( $line_start_index_to_go > 0 ) {
3852                if ( $last_nonblank_type eq 'k' ) {
3853                    if ( $last_nonblank_token =~ /^(and|or)$/ ) {
3854                        $line_start_index_to_go--;
3855                    }
3856                }
3857                elsif ( $want_break_before{$last_nonblank_type} ) {
3858                    $line_start_index_to_go--;
3859                }
3860            }
3861        }
3862    }
3863
3864    # remember the predicted position of this token on the output line
3865    if ( $max_index_to_go > $line_start_index_to_go ) {
3866        $gnu_position_predictor =
3867          total_line_length( $line_start_index_to_go, $max_index_to_go );
3868    }
3869    else {
3870        $gnu_position_predictor = $space_count +
3871          token_sequence_length( $max_index_to_go, $max_index_to_go );
3872    }
3873
3874    # store the indentation object for this token
3875    # this allows us to manipulate the leading whitespace
3876    # (in case we have to reduce indentation to fit a line) without
3877    # having to change any token values
3878    $leading_spaces_to_go[$max_index_to_go] = $gnu_stack[$max_gnu_stack_index];
3879    $reduced_spaces_to_go[$max_index_to_go] =
3880      ( $max_gnu_stack_index > 0 && $ci_level )
3881      ? $gnu_stack[ $max_gnu_stack_index - 1 ]
3882      : $gnu_stack[$max_gnu_stack_index];
3883    return;
3884}
3885
3886sub get_gnu_indentation {
3887
3888    # define the next indentation space count for the case that gnu-style
3889    # indentation will be used for this token.
3890
3891    # basically, we want to indent to just after the last token on the
3892    # previous line, but we have to put some limits on this
3893
3894    my ( $standard_increment, $gnu_indentation ) = @_;
3895
3896    # total indentation we already have:
3897    my $space_count = $gnu_stack[$max_gnu_stack_index]->get_SPACES();
3898
3899    # avoid excessive space if requested...
3900    my $excess = 0;
3901    if ( $rOpts->{'maximum-continuation-indentation'} > 0 ) {
3902        $excess =
3903          ( $gnu_indentation - $space_count ) -
3904          $rOpts->{'maximum-continuation-indentation'};
3905        if ( $excess > 0 ) {
3906
3907            #TESTING: - FUTURE LOGIC MAY SKIP THIS
3908            $gnu_indentation -= $excess;
3909        }
3910        else { $excess = 0 }
3911    }
3912
3913    # always add an increment of at least the standard amount
3914    my $min_gnu_indentation = $space_count + $standard_increment;
3915
3916    # how many spaces do we have extra?
3917    my $available_space = $gnu_indentation - $min_gnu_indentation;
3918
3919    #TESTING-LP : FUTURE UPDATE
3920    #$available_space -= $excess;
3921    $excess = 0;
3922
3923    #----------------------------------------------------------------
3924    # for TESTING only: this should make the indentation identical to
3925    # the standard scheme, even though a stack is used.  This is a good
3926    # way to test the proper functioning of the stack.
3927    my $TESTING = 0;    # 0 or 1
3928
3929    #----------------------------------------------------------------
3930
3931    # maintain at least the minimum incremental spacing
3932    if ( $TESTING || $available_space < 0 ) {
3933        $gnu_indentation = $min_gnu_indentation;
3934        $available_space = 0;
3935        $excess          = 0;
3936    }
3937
3938    return ( $gnu_indentation, $available_space, $excess );
3939}
3940
3941sub check_for_long_gnu_style_lines {
3942
3943    # look at the current estimated maximum line length, and
3944    # remove some whitespace if it exceeds the desired maximum
3945
3946    # this is only for the '-lp' style
3947    return unless ($rOpts_line_up_parentheses);
3948
3949    # nothing can be done if no stack items defined for this line
3950    return if ( $max_gnu_item_index == UNDEFINED_INDEX );
3951
3952    # see if we have exceeded the maximum desired line length
3953    # keep 2 extra free because they are needed in some cases
3954    # (result of trial-and-error testing)
3955    my $spaces_needed =
3956      $gnu_position_predictor - $rOpts_maximum_line_length + 2;
3957
3958    return if ( $spaces_needed < 0 );
3959
3960    # We are over the limit, so try to remove a requested number of
3961    # spaces from leading whitespace.  We are only allowed to remove
3962    # from whitespace items created on this batch, since others have
3963    # already been used and cannot be undone.
3964    my @candidates = ();
3965    my $i;
3966
3967    # loop over all whitespace items created for the current batch
3968    for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
3969        my $item = $gnu_item_list[$i];
3970
3971        # item must still be open to be a candidate (otherwise it
3972        # cannot influence the current token)
3973        next if ( $item->get_CLOSED() >= 0 );
3974
3975        my $available_spaces = $item->get_AVAILABLE_SPACES();
3976
3977        if ( $available_spaces > 0 ) {
3978            push ( @candidates, [ $i, $available_spaces ] );
3979        }
3980    }
3981
3982    return unless (@candidates);
3983
3984    # sort by available whitespace so that we can remove whitespace
3985    # from the maximum available first
3986    @candidates = sort { $b->[1] <=> $a->[1] } @candidates;
3987
3988    # keep removing whitespace until we are done or have no more
3989    my $candidate;
3990    foreach $candidate (@candidates) {
3991        my ( $i, $available_spaces ) = @{$candidate};
3992        my $deleted_spaces =
3993          ( $available_spaces > $spaces_needed )
3994          ? $spaces_needed
3995          : $available_spaces;
3996
3997        # remove the incremental space from this item
3998        $gnu_item_list[$i]->decrease_AVAILABLE_SPACES($deleted_spaces);
3999
4000        my $i_debug = $i;
4001
4002        # update the leading whitespace of this item and all items
4003        # that came after it
4004        for ( ; $i <= $max_gnu_item_index ; $i++ ) {
4005
4006            my $old_spaces = $gnu_item_list[$i]->get_SPACES();
4007            if ( $old_spaces > $deleted_spaces ) {
4008                $gnu_item_list[$i]->decrease_SPACES($deleted_spaces);
4009            }
4010
4011            # shouldn't happen except for code bug:
4012            else {
4013                my $level        = $gnu_item_list[$i_debug]->get_LEVEL();
4014                my $ci_level     = $gnu_item_list[$i_debug]->get_CI_LEVEL();
4015                my $old_level    = $gnu_item_list[$i]->get_LEVEL();
4016                my $old_ci_level = $gnu_item_list[$i]->get_CI_LEVEL();
4017                warning(
4018"program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level  deleted: level=$old_level ci=$ci_level\n"
4019                );
4020                report_definite_bug();
4021            }
4022        }
4023        $gnu_position_predictor -= $deleted_spaces;
4024        $spaces_needed          -= $deleted_spaces;
4025        last unless ( $spaces_needed > 0 );
4026    }
4027}
4028
4029sub finish_lp_batch {
4030
4031    # This routine is called once after each each output stream batch is
4032    # finished to undo indentation for all incomplete -lp
4033    # indentation levels.  It is too risky to leave a level open,
4034    # because then we can't backtrack in case of a long line to follow.
4035    # This means that comments and blank lines will disrupt this
4036    # indentation style.  But the vertical aligner may be able to
4037    # get the space back if there are side comments.
4038
4039    # this is only for the 'lp' style
4040    return unless ($rOpts_line_up_parentheses);
4041
4042    # nothing can be done if no stack items defined for this line
4043    return if ( $max_gnu_item_index == UNDEFINED_INDEX );
4044
4045    # loop over all whitespace items created for the current batch
4046    my $i;
4047    for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
4048        my $item = $gnu_item_list[$i];
4049
4050        # only look for open items
4051        next if ( $item->get_CLOSED() >= 0 );
4052
4053        # forget index of any '='
4054        $item->set_LAST_EQUALS(-1);
4055
4056        # Tentatively remove all of the available space
4057        # (The vertical aligner will try to get it back later)
4058        my $available_spaces = $item->get_AVAILABLE_SPACES();
4059        if ( $available_spaces > 0 ) {
4060
4061            # delete incremental space for this item
4062            $gnu_item_list[$i]
4063              ->tentatively_decrease_AVAILABLE_SPACES($available_spaces);
4064
4065            # Reduce the total indentation space of any nodes that follow
4066            # Note that any such nodes must necessarily be dependents
4067            # of this node.
4068            foreach my $j ( $i + 1 .. $max_gnu_item_index ) {
4069                $gnu_item_list[$j]->decrease_SPACES($available_spaces);
4070            }
4071        }
4072    }
4073    return;
4074}
4075
4076sub reduce_lp_indentation {
4077
4078    # reduce the leading whitespace at token $i if possible by $spaces_needed
4079    # (a large value of $spaces_needed will remove all excess space)
4080    # NOTE: to be called from scan_list only for a sequence of tokens
4081    # contained between opening and closing parens/braces/brackets
4082
4083    my ( $i, $spaces_needed ) = @_;
4084    my $deleted_spaces = 0;
4085
4086    # this is only for the -lp style
4087    if ($rOpts_line_up_parentheses) {
4088
4089        # it is only safe to undo indentation if there are no children.
4090        my $item = $leading_spaces_to_go[$i];
4091        if ( !$item->get_HAVE_CHILD() ) {
4092
4093            # we'll remove these spaces, but mark them as recoverable
4094            $deleted_spaces =
4095              $item->tentatively_decrease_AVAILABLE_SPACES($spaces_needed);
4096        }
4097    }
4098
4099    return $deleted_spaces;
4100}
4101
4102sub token_sequence_length {
4103
4104    # return length of tokens ($ifirst .. $ilast) including first & last
4105    # returns 0 if $ifirst > $ilast
4106    my $ifirst = shift;
4107    my $ilast  = shift;
4108    return 0 if ( $ilast < 0 || $ifirst > $ilast );
4109    return $lengths_to_go[ $ilast + 1 ] if ( $ifirst < 0 );
4110    return $lengths_to_go[ $ilast + 1 ] - $lengths_to_go[$ifirst];
4111}
4112
4113sub total_line_length {
4114
4115    # return length of a line of tokens ($ifirst .. $ilast)
4116    my $ifirst = shift;
4117    my $ilast  = shift;
4118    if ( $ifirst < 0 ) { $ifirst = 0 }
4119
4120    return leading_spaces_to_go($ifirst) +
4121      token_sequence_length( $ifirst, $ilast );
4122}
4123
4124sub excess_line_length {
4125
4126    # return number of characters by which a line of tokens ($ifirst..$ilast)
4127    # exceeds the allowable line length.
4128    my $ifirst = shift;
4129    my $ilast  = shift;
4130    if ( $ifirst < 0 ) { $ifirst = 0 }
4131    return leading_spaces_to_go($ifirst) +
4132      token_sequence_length( $ifirst, $ilast ) - $rOpts_maximum_line_length;
4133}
4134
4135sub finish_formatting {
4136
4137    # flush buffer and write any informative messages
4138    my $self = shift;
4139
4140    flush();
4141    $file_writer_object->decrement_output_line_number()
4142      ;    # fix up line number since it was incremented
4143    we_are_at_the_last_line();
4144    if ( $added_semicolon_count > 0 ) {
4145        my $first = ( $added_semicolon_count > 1 ) ? "First" : "";
4146        my $what =
4147          ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
4148        write_logfile_entry("$added_semicolon_count $what added:\n");
4149        write_logfile_entry(
4150            "  $first at input line $first_added_semicolon_at\n");
4151
4152        if ( $added_semicolon_count > 1 ) {
4153            write_logfile_entry(
4154                "   Last at input line $last_added_semicolon_at\n");
4155        }
4156        write_logfile_entry("  (Use -nasc to prevent semicolon addition)\n");
4157        write_logfile_entry("\n");
4158    }
4159
4160    if ( $deleted_semicolon_count > 0 ) {
4161        my $first = ( $deleted_semicolon_count > 1 ) ? "First" : "";
4162        my $what =
4163          ( $deleted_semicolon_count > 1 )
4164          ? "semicolons were"
4165          : "semicolon was";
4166        write_logfile_entry(
4167            "$deleted_semicolon_count unnecessary $what deleted:\n");
4168        write_logfile_entry(
4169            "  $first at input line $first_deleted_semicolon_at\n");
4170
4171        if ( $deleted_semicolon_count > 1 ) {
4172            write_logfile_entry(
4173                "   Last at input line $last_deleted_semicolon_at\n");
4174        }
4175        write_logfile_entry("  (Use -ndsc to prevent semicolon deletion)\n");
4176        write_logfile_entry("\n");
4177    }
4178
4179    if ( $embedded_tab_count > 0 ) {
4180        my $first = ( $embedded_tab_count > 1 ) ? "First" : "";
4181        my $what =
4182          ( $embedded_tab_count > 1 )
4183          ? "quotes or patterns"
4184          : "quote or pattern";
4185        write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
4186        write_logfile_entry(
4187"This means the display of this script could vary with device or software\n"
4188        );
4189        write_logfile_entry("  $first at input line $first_embedded_tab_at\n");
4190
4191        if ( $embedded_tab_count > 1 ) {
4192            write_logfile_entry(
4193                "   Last at input line $last_embedded_tab_at\n");
4194        }
4195        write_logfile_entry("\n");
4196    }
4197
4198    if ($first_tabbing_disagreement) {
4199        write_logfile_entry(
4200"First indentation disagreement seen at input line $first_tabbing_disagreement\n"
4201        );
4202    }
4203
4204    if ($in_tabbing_disagreement) {
4205        write_logfile_entry(
4206"Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n"
4207        );
4208    }
4209    else {
4210
4211        if ($last_tabbing_disagreement) {
4212
4213            write_logfile_entry(
4214"Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
4215            );
4216        }
4217        else {
4218            write_logfile_entry("No indentation disagreement seen\n");
4219        }
4220    }
4221    write_logfile_entry("\n");
4222
4223    $vertical_aligner_object->report_anything_unusual();
4224
4225    $file_writer_object->report_line_length_errors();
4226}
4227
4228sub check_options {
4229
4230    # This routine is called to check the Opts hash after it is defined
4231
4232    ($rOpts) = @_;
4233    my ( $tabbing_string, $tab_msg );
4234    my @list;    # working storage
4235
4236    make_static_block_comment_pattern();
4237    make_static_side_comment_pattern();
4238    make_closing_side_comment_prefix();
4239    make_closing_side_comment_list_pattern();
4240
4241    # The -lp indentation logic requires that perltidy examine large
4242    # blocks of code between flushing.  When the user takes control
4243    # of line breaks, perltidy never sees large enough buffers to
4244    # use the -lp style.  There's no way around this.
4245    if ( $rOpts->{'line-up-parentheses'} ) {
4246
4247        if ( $rOpts->{'indent-only'}
4248            || !$rOpts->{'add-newlines'}
4249            || !$rOpts->{'delete-old-newlines'} )
4250        {
4251            print STDERR <<EOM;
4252Conflict: -lp  conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
4253EOM
4254            $rOpts->{'line-up-parentheses'} = 0;
4255        }
4256    }
4257
4258    # At present, tabs are not compatable with the line-up-parentheses style
4259    # (it would be possible to entab the total leading whitespace
4260    # just prior to writing the line, if desired).
4261    if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
4262        print STDERR <<EOM;
4263Conflict: -t (tabs) cannot be used with the -lp  option; ignoring -t.
4264EOM
4265        $rOpts->{'tabs'} = 0;
4266    }
4267
4268    # Likewise, tabs are not compatable with outdenting..
4269    if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
4270        print STDERR <<EOM;
4271Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t.
4272EOM
4273        $rOpts->{'tabs'} = 0;
4274    }
4275
4276    if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
4277        print STDERR <<EOM;
4278Conflict: -t (tabs) cannot be used with the -ola  option; ignoring -t.
4279EOM
4280        $rOpts->{'tabs'} = 0;
4281    }
4282
4283    if ( $rOpts->{'tabs'} ) {
4284        $tabstr = "\t";
4285    }
4286    else {
4287        $tabstr = " " x $rOpts->{'indent-columns'};
4288    }
4289
4290    if ( !$rOpts->{'space-for-semicolon'} ) {
4291        $want_left_space{'f'} = -1;
4292    }
4293
4294    if ( $rOpts->{'space-terminal-semicolon'} ) {
4295        $want_left_space{';'} = 1;
4296    }
4297
4298    # implement outdenting preferences for keywords
4299    %outdent_keyword = ();
4300
4301    # load defaults
4302    @list = qw(next last redo goto return);
4303
4304    # override defaults if requested
4305    if ( $rOpts->{'outdent-keyword-list'} ) {
4306        $rOpts->{'outdent-keyword-list'} =~ s/^\s*//;
4307        $rOpts->{'outdent-keyword-list'} =~ s/\s*$//;
4308        @list = split /\s+/, $rOpts->{'outdent-keyword-list'};
4309    }
4310
4311    # FUTURE: if not a keyword, assume that it is an identifier
4312    foreach my $i (@list) {
4313        if ( $PerlTidy::Tokenizer::is_keyword{$i} ) {
4314            $outdent_keyword{$i} = 1;
4315        }
4316        else {
4317            print STDERR "ignoring '$i' in -okwl list; not a perl keyword";
4318        }
4319    }
4320
4321    # implement user whitespace preferences
4322    if ( $rOpts->{'want-left-space'} ) {
4323        @list = split /\s/, $rOpts->{'want-left-space'};
4324        @want_left_space{@list} = (1) x scalar(@list);
4325    }
4326
4327    if ( $rOpts->{'want-right-space'} ) {
4328        @list = split /\s/, $rOpts->{'want-right-space'};
4329        @want_right_space{@list} = (1) x scalar(@list);
4330    }
4331    if ( $rOpts->{'nowant-left-space'} ) {
4332        @list = split /\s/, $rOpts->{'nowant-left-space'};
4333        @want_left_space{@list} = (-1) x scalar(@list);
4334    }
4335
4336    if ( $rOpts->{'nowant-right-space'} ) {
4337        @list = split /\s/, $rOpts->{'nowant-right-space'};
4338        @want_right_space{@list} = (-1) x scalar(@list);
4339    }
4340    if ( $rOpts->{'dump-want-left-space'} ) {
4341        dump_want_left_space(*STDOUT);
4342        exit 1;
4343    }
4344
4345    if ( $rOpts->{'dump-want-right-space'} ) {
4346        dump_want_right_space(*STDOUT);
4347        exit 1;
4348    }
4349
4350    # implement user break preferences
4351    if ( $rOpts->{'want-break-after'} ) {
4352        @list = split /\s/, $rOpts->{'want-break-after'};
4353        foreach my $tok (@list) {
4354            if ( $tok eq '?' ) { $tok = ':' }    # patch to coordinate ?/:
4355            my $lbs = $left_bond_strength{$tok};
4356            my $rbs = $right_bond_strength{$tok};
4357            if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
4358                ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
4359                  ( $lbs, $rbs );
4360            }
4361        }
4362    }
4363
4364    if ( $rOpts->{'want-break-before'} ) {
4365        @list = split /\s/, $rOpts->{'want-break-before'};
4366        foreach my $tok (@list) {
4367            my $lbs = $left_bond_strength{$tok};
4368            my $rbs = $right_bond_strength{$tok};
4369            if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
4370                ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
4371                  ( $lbs, $rbs );
4372            }
4373        }
4374    }
4375
4376    # make note if breaks are before certain key types
4377    %want_break_before = ();
4378    foreach my $tok ( '.', ',', ':', '?', '&&', '||' ) {
4379        $want_break_before{$tok} =
4380          $left_bond_strength{$tok} < $right_bond_strength{$tok};
4381    }
4382
4383    # Coordinate ?/: breaks, which must be similar
4384    if ( !$want_break_before{':'} ) {
4385        $want_break_before{'?'}   = $want_break_before{':'};
4386        $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
4387        $left_bond_strength{'?'}  = NO_BREAK;
4388    }
4389
4390    # Define here tokens which may follow the closing brace of a do statement
4391    # on the same line, as in:
4392    #   } while ( $something);
4393    $do_follower_pattern = make_regex('^(until|while|unless|if|;|,)$');
4394
4395    # These tokens may follow the closing brace of an if or elsif block.
4396    # In other words, for cuddled else we want code to look like:
4397    #   } elsif ( $something) {
4398    #   } else {
4399    if ( $rOpts->{'cuddled-else'} ) {
4400        $if_brace_follower_pattern = make_regex('^(else|elsif)$');
4401    }
4402    else { $if_brace_follower_pattern = undef; }
4403
4404    # nothing can follow the closing curly of an else { } block:
4405    $else_brace_follower_pattern = make_regex('^$');
4406
4407    # what can follow a multi-line anonymous sub definition closing curly:
4408    $anon_sub_brace_follower_pattern =
4409      make_regex('^(\,|\;|:|=>|or|and|\&\&|\|\}|\)|)$');
4410
4411    # what can follow a one-line anonynomous sub closing curly:
4412    # one-line anonumous subs also have ']' here...
4413    # see tk3.t and PP.pm
4414    $anon_sub_1_brace_follower_pattern =
4415      make_regex('^(\,|\;|:|=>|or|and|\&\&|\|\}|\]|\)|)$');
4416
4417    # What can follow a closing curly of a short block
4418    # which is not an if/elsif/else/do/sort/map/grep/eval/sub
4419    # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
4420    $other_brace_follower_pattern =
4421      make_regex('^(\,|\;|:|=>|or|and|\&\&|\|\}|\)|)$');
4422
4423    # TESTING -- deactivated 20010614 because -bl didn't
4424    # work well with it.
4425    #if ( $rOpts->{'opening-brace-on-new-line'} ) {
4426    #    $left_bond_strength{'{'}  = WEAK;
4427    #    $right_bond_strength{'{'} = VERY_STRONG;
4428    #}
4429    #else {
4430    $right_bond_strength{'{'} = WEAK;
4431    $left_bond_strength{'{'}  = VERY_STRONG;
4432
4433    #}
4434
4435    # make -l=0  equal to -l=infinite
4436    if ( !$rOpts->{'maximum-line-length'} ) {
4437        $rOpts->{'maximum-line-length'} = 1000000;
4438    }
4439
4440    # make -lbl=0  equal to -lbl=infinite
4441    if ( !$rOpts->{'long-block-line-count'} ) {
4442        $rOpts->{'long-block-line-count'} = 1000000;
4443    }
4444
4445    # hashes used to simplify setting whitespace
4446    %tightness = (
4447        '{' => $rOpts->{'brace-tightness'},
4448        '}' => $rOpts->{'brace-tightness'},
4449        '(' => $rOpts->{'paren-tightness'},
4450        ')' => $rOpts->{'paren-tightness'},
4451        '[' => $rOpts->{'square-bracket-tightness'},
4452        ']' => $rOpts->{'square-bracket-tightness'},
4453    );
4454    %matching_token = (
4455        '{' => '}',
4456        '(' => ')',
4457        '[' => ']',
4458        '?' => ':',
4459    );
4460
4461    # frequently used parameters
4462    $rOpts_add_whitespace           = $rOpts->{'add-whitespace'};
4463    $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
4464    $rOpts_cuddled_else             = $rOpts->{'cuddled-else'};
4465    $rOpts_delete_old_whitespace    = $rOpts->{'delete-old-whitespace'};
4466    $rOpts_fuzzy_line_length        = $rOpts->{'fuzzy-line-length'};
4467    $rOpts_indent_columns           = $rOpts->{'indent-columns'};
4468    $rOpts_line_up_parentheses      = $rOpts->{'line-up-parentheses'};
4469    $rOpts_maximum_line_length      = $rOpts->{'maximum-line-length'};
4470    $rOpts_brace_left_and_indent    = $rOpts->{'brace-left-and-indent'};
4471
4472}
4473
4474sub make_static_block_comment_pattern {
4475
4476    # create the pattern used to identify static block comments
4477    $static_block_comment_pattern = '^(\s*)##';
4478
4479    # allow the user to change it
4480    if ( $rOpts->{'static-block-comment-prefix'} ) {
4481        my $prefix = $rOpts->{'static-block-comment-prefix'};
4482        $prefix =~ s/^\s*//;
4483        if ( $prefix !~ /^#/ ) {
4484            die "ERROR: the -sbcp prefix '$prefix' must begin with '#'\n";
4485
4486        }
4487        my $pattern = '^(\s*)' . $prefix;
4488        eval "'##'=~/$pattern/";
4489        if ($@) {
4490            die
4491"ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n";
4492        }
4493        $static_block_comment_pattern = $pattern;
4494    }
4495}
4496
4497sub make_closing_side_comment_list_pattern {
4498
4499    # turn any input list into a regex for recognizing selected block types
4500    $closing_side_comment_list_pattern = '^\w+';
4501    if ( defined( $rOpts->{'closing-side-comment-list'} )
4502        && $rOpts->{'closing-side-comment-list'} )
4503    {
4504        $closing_side_comment_list_pattern =
4505          make_block_pattern( $rOpts->{'closing-side-comment-list'} );
4506    }
4507}
4508
4509sub make_block_pattern {
4510
4511=pod
4512
4513   given a string of block-type keywords, return a regex to match them
4514   The only tricky part is that labels are indicated with a single ':'
4515   and the 'sub' token text may have additional text after it (name of sub).
4516
4517   Example:
4518
4519    input string: "if else elsif unless while for foreach do : sub";
4520    pattern:  '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
4521
4522=cut
4523
4524    my ( $string, $abbrev ) = @_;
4525    $string =~ s/^\s*//;
4526    $string =~ s/\s$//;
4527    my @list = split /\s+/, $string;
4528    my $saw_sub = 0;
4529    my @words   = ();
4530    for my $i (@list) {
4531        if ( $i eq 'sub' ) { $saw_sub = 1 }
4532        elsif ( $i eq ':' ) {
4533            push @words, '\w+:';
4534        }
4535        elsif ( $i =~ /^\w/ ) {
4536            push @words, $i;
4537        }
4538        else {
4539            print STDERR "unrecognized block type $i after -cscl, ignoring\n";
4540        }
4541    }
4542    my $pattern = '(' . join ( '|', @words ) . ')$';
4543    if ($saw_sub) {
4544        $pattern = '(' . $pattern . '|sub)';
4545    }
4546    $pattern = '^' . $pattern;
4547
4548    return $pattern;
4549}
4550
4551sub make_static_side_comment_pattern {
4552
4553    # create the pattern used to identify static side comments
4554    $static_side_comment_pattern = '^##';
4555
4556    # allow the user to change it
4557    if ( $rOpts->{'static-side-comment-prefix'} ) {
4558        my $prefix = $rOpts->{'static-side-comment-prefix'};
4559        $prefix =~ s/^\s*//;
4560        my $pattern = '^' . $prefix;
4561        eval "'##'=~/$pattern/";
4562        if ($@) {
4563            die
4564"ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n";
4565        }
4566        $static_side_comment_pattern = $pattern;
4567    }
4568}
4569
4570sub make_closing_side_comment_prefix {
4571
4572    # Be sure we have a valid closing side comment prefix
4573    my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
4574    my $csc_prefix_pattern;
4575    if ( !defined($csc_prefix) ) {
4576        $csc_prefix         = '## end';
4577        $csc_prefix_pattern = '^##\s+end';
4578    }
4579    else {
4580        my $test_csc_prefix = $csc_prefix;
4581        if ( $test_csc_prefix !~ /^#/ ) {
4582            $test_csc_prefix = '#' . $test_csc_prefix;
4583        }
4584
4585        # make a regex to recognize the prefix
4586        my $test_csc_prefix_pattern = $test_csc_prefix;
4587
4588        # escape any special characters
4589        $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
4590
4591        $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
4592
4593        # allow exact number of intermediate spaces to vary
4594        $test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
4595
4596        # make sure we have a good pattern
4597        # if we fail this we probably have an error in escaping
4598        # characters.
4599        eval "'##'=~/$test_csc_prefix_pattern/";
4600        if ($@) {
4601
4602            # shouldn't happen..must have screwed up escaping, above
4603            report_definite_bug();
4604            print STDERR
4605"Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n";
4606
4607            # just warn and keep going with defaults
4608            print STDERR "Please consider using a simpler -cscp prefix\n";
4609            print STDERR "Using default -cscp instead; please check output\n";
4610        }
4611        else {
4612            $csc_prefix         = $test_csc_prefix;
4613            $csc_prefix_pattern = $test_csc_prefix_pattern;
4614        }
4615    }
4616    $rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
4617    $closing_side_comment_prefix_pattern = $csc_prefix_pattern;
4618}
4619
4620sub dump_want_left_space {
4621    my $fh = shift;
4622    local $" = "\n";
4623    print $fh <<EOM;
4624These values are the main control of whitespace to the left of a token type;
4625They may be altered with the -wls parameter.
4626For a list of token types, use perltidy --dump-token-types (-dtt)
4627 1 means the token wants a space to its left
4628-1 means the token does not want a space to its left
4629---------------------------------------------------------------
4630EOM
4631    foreach my $i ( sort keys %want_left_space ) {
4632        print $fh "$i\t$want_left_space{$i}\n";
4633    }
4634}
4635
4636sub dump_want_right_space {
4637    my $fh = shift;
4638    local $" = "\n";
4639    print $fh <<EOM;
4640These values are the main control of whitespace to the right of a token type;
4641They may be altered with the -wrs parameter.
4642For a list of token types, use perltidy --dump-token-types (-dtt)
4643 1 means the token wants a space to its right
4644-1 means the token does not want a space to its right
4645---------------------------------------------------------------
4646EOM
4647    foreach my $i ( sort keys %want_right_space ) {
4648        print $fh "$i\t$want_right_space{$i}\n";
4649    }
4650}
4651
4652sub is_essential_whitespace {
4653
4654    # Essential whitespace means whitespace which cannot be safely deleted.
4655    # We are given three tokens and their types:
4656    # ($tokenl, $typel) is the token to the left of the space in question
4657    # ($tokenr, $typer) is the token to the right of the space in question
4658    # ($tokenll, $typell) is previous nonblank token to the left of $tokenl
4659    #
4660    # This is a slow routine but is needed too often except when -mangle
4661    # is used.
4662    my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
4663
4664    # never combine two bare words or numbers
4665    my $result = ( ( $tokenr =~ /^[\'\w]/ ) && ( $tokenl =~ /[\'\w]$/ ) )
4666
4667      # do not combine a number with a concatination dot
4668      # example: pom.caputo:
4669      # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
4670      || ( ( $typel eq 'n' ) && ( $tokenr eq '.' ) )
4671      || ( ( $typer eq 'n' ) && ( $tokenl eq '.' ) )
4672
4673      # do not join a minus with a bare word, because you might form
4674      # a file test operator.  Example from Complex.pm:
4675      # if (CORE::abs($z - i) < $eps); "z-i" would be taken as a file test.
4676      || ( ( $tokenl eq '-' ) && ( $tokenr =~ /^[_A-Za-z]$/ ) )
4677
4678      # and something like this could become ambiguous without space
4679      # after the '-':
4680      #   use constant III=>1;
4681      #   $a = $b - III;
4682      # and even this:
4683      #   $a = - III;
4684      || ( ( $tokenl eq '-' )
4685        && ( $typer =~ /^[wC]$/ && $tokenr =~ /^[_A-Za-z]/ ) )
4686
4687      # '= -' should not become =- or you will get a warning
4688      # about reversed -=
4689      # || ($tokenr eq '-')
4690
4691      # keep a space between a quote and a bareword to prevent the
4692      # bareword from becomming a quote modifier.
4693      || ( ( $typel eq 'Q' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
4694
4695      # perl is very fussy about spaces before <<
4696      || ( $tokenr =~ /^\<\</ )
4697
4698      # avoid combining tokens to create new meanings. Example:
4699      #     $a+ +$b must not become $a++$b
4700      || ( $is_digraph{ $tokenl . $tokenr } )
4701      || ( $is_trigraph{ $tokenl . $tokenr } )
4702
4703      # another example: do not combine these two &'s:
4704      #     allow_options & &OPT_EXECCGI
4705      || ( $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) } )
4706
4707      # don't combine $$ or $# with any alphanumeric
4708      # (testfile mangle.t with --mangle)
4709      || ( ( $tokenl =~ /^\$[\$\#]$/ ) && ( $tokenr =~ /^\w/ ) )
4710
4711      # retain any space after possible filehandle
4712      # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
4713      || ( $typel eq 'Z' || $typell eq 'Z' )
4714
4715      # keep any space between filehandle and paren:
4716      # file mangle.t with --mangle:
4717      || ( $typel eq 'Y' && $tokenr eq '(' )
4718
4719      # retain any space after here doc operator ( hereerr.t)
4720      || ( $typel eq 'h' )
4721
4722      # FIXME: this needs some further work; extrude.t has test cases
4723      # it is safest to retain any space after start of ? : operator
4724      # because of perl's quirky parser.
4725      # ie, this line will fail if you remove the space after the '?':
4726      #    $b=join $comma ? ',' : ':', @_;   # ok
4727      #    $b=join $comma ?',' : ':', @_;   # error!
4728      # but this is ok :)
4729      #    $b=join $comma?',' : ':', @_;   # not a problem!
4730      ## || ($typel eq '?')
4731
4732      # be careful with a space around ++ and --, to avoid ambiguity as to
4733      # which token it applies
4734      || ( ( $typer =~ /^(pp|mm)$/ )     && ( $tokenl !~ /^[\;\{\(\[]/ ) )
4735      || ( ( $typel =~ /^(\+\+|\-\-)$/ ) && ( $tokenr !~ /^[\;\}\)\]]/ ) )
4736
4737      # need space after foreach my; for example, this will fail in
4738      # older versions of Perl:
4739      # foreach my$ft(@filetypes)...
4740      || ( $tokenl eq 'my'
4741        && $tokenll =~ /^(for|foreach)$/
4742        && $tokenr  =~ /^\$/ )
4743
4744      # must have space between grep and left paren; "grep(" will fail
4745      || ( $tokenr eq '(' && $tokenl =~ /^(sort|grep|map)$/ )
4746
4747      # don't stick numbers next to left parens, as in:
4748      #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
4749      || ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) )
4750
4751      # don't join something like: for bla::bla:: abc
4752      # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
4753      || ( $tokenl =~ /\:\:$/ && ( $tokenr =~ /^[\'\w]/ ) )
4754      ;    # the value of this long logic sequence is the result we want
4755    return $result;
4756}
4757
4758sub set_white_space_flag {
4759
4760=pod
4761
4762     This routine examines each pair of nonblank tokens and
4763     sets values for array @white_space_flag.
4764
4765     $white_space_flag[$j] is a flag indicating whether a white space
4766     BEFORE token $j is needed, with the following values:
4767
4768             -1 do not want a space before token $j
4769              0 optional space or $j is a whitespace
4770              1 want a space before token $j
4771
4772
4773    The values for the first token will be defined based
4774    upon the contents of the "to_go" output array.
4775
4776    Note: retain debug print statements because they are usually
4777    required after adding new token types.
4778
4779=cut
4780
4781    BEGIN {
4782
4783        # initialize these global hashes, which control the use of
4784        # whitespace around tokens:
4785        #
4786        # %binary_ws_rules
4787        # %want_left_space
4788        # %want_right_space
4789        # %space_before_paren
4790        #
4791        # Many token types are identical to the tokens themselves.
4792        # See the tokenizer for a complete list. Here are some special types:
4793        #   k = perl keyword
4794        #   f = semicolon in for statement
4795        #   m = unary minus
4796        #   p = unary plus
4797        # Note that :: is excluded since it should be contained in an identifier
4798        # Note that '->' is excluded because it never gets space
4799        # parentheses and brackets are excluded since they are handled specially
4800        # curly braces are included but may be overridden by logic, such as
4801        # newline logic.
4802
4803        # NEW_TOKENS: create a whitespace rule here.  This can be as
4804        # simple as adding your new letter to @spaces_both_sides, for
4805        # example.
4806
4807        my @spaces_both_sides = qw"
4808          + - * / % ? = . : x < > | & ^ .. << >> ** && .. ||  => += -=
4809          .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>=
4810          &&= ||= <=> k f w F n C Y U G v
4811          ";
4812
4813        my @spaces_left_side = qw"
4814          t ! ~ m p { \ h pp mm Z j
4815          ";
4816        push ( @spaces_left_side, '#' );    # avoids warning message
4817
4818        my @spaces_right_side = qw"
4819          ; } ) ] R J ++ -- **=
4820          ";
4821        push ( @spaces_right_side, ',' );    # avoids warning message
4822        my @space_before_paren = qw(
4823          my local and or eq ne if else elsif until unless while
4824          for foreach push return shift unshift pop join split die
4825        );
4826        @want_left_space{@spaces_both_sides} = (1) x scalar(@spaces_both_sides);
4827        @want_right_space{@spaces_both_sides} =
4828          (1) x scalar(@spaces_both_sides);
4829        @want_left_space{@spaces_left_side}  = (1) x scalar(@spaces_left_side);
4830        @want_right_space{@spaces_left_side} = (-1) x scalar(@spaces_left_side);
4831        @want_left_space{@spaces_right_side} =
4832          (-1) x scalar(@spaces_right_side);
4833        @want_right_space{@spaces_right_side} =
4834          (1) x scalar(@spaces_right_side);
4835        @space_before_paren{@space_before_paren} =
4836          (1) x scalar(@space_before_paren);
4837        $want_left_space{'L'}   = WS_NO;
4838        $want_left_space{'->'}  = WS_NO;
4839        $want_right_space{'->'} = WS_NO;
4840        $want_left_space{'**'}  = WS_NO;
4841        $want_right_space{'**'} = WS_NO;
4842
4843        # hash type information must stay tightly bound
4844        # as in :  ${xxxx}
4845        $binary_ws_rules{'i'}{'L'} = WS_NO;
4846        $binary_ws_rules{'i'}{'{'} = WS_YES;
4847        $binary_ws_rules{'k'}{'{'} = WS_YES;
4848        $binary_ws_rules{'U'}{'{'} = WS_YES;
4849        $binary_ws_rules{'i'}{'['} = WS_NO;
4850        $binary_ws_rules{'R'}{'L'} = WS_NO;
4851        $binary_ws_rules{'R'}{'{'} = WS_NO;
4852        $binary_ws_rules{'t'}{'L'} = WS_NO;
4853        $binary_ws_rules{'t'}{'{'} = WS_NO;
4854        $binary_ws_rules{'}'}{'L'} = WS_NO;
4855        $binary_ws_rules{'}'}{'{'} = WS_NO;
4856        $binary_ws_rules{'$'}{'L'} = WS_NO;
4857        $binary_ws_rules{'$'}{'{'} = WS_NO;
4858        $binary_ws_rules{'@'}{'L'} = WS_NO;
4859        $binary_ws_rules{'@'}{'{'} = WS_NO;
4860        $binary_ws_rules{'='}{'L'} = WS_YES;
4861
4862        # the following includes ') {'
4863        # as in :    if ( xxx ) { yyy }
4864        $binary_ws_rules{']'}{'L'} = WS_NO;
4865        $binary_ws_rules{']'}{'{'} = WS_NO;
4866        $binary_ws_rules{')'}{'{'} = WS_YES;
4867        $binary_ws_rules{')'}{'['} = WS_NO;
4868        $binary_ws_rules{']'}{'['} = WS_NO;
4869        $binary_ws_rules{']'}{'{'} = WS_NO;
4870        $binary_ws_rules{'}'}{'['} = WS_NO;
4871        $binary_ws_rules{'R'}{'['} = WS_NO;
4872
4873        $binary_ws_rules{']'}{'++'} = WS_NO;
4874        $binary_ws_rules{']'}{'--'} = WS_NO;
4875        $binary_ws_rules{')'}{'++'} = WS_NO;
4876        $binary_ws_rules{')'}{'--'} = WS_NO;
4877
4878        $binary_ws_rules{'R'}{'++'} = WS_NO;
4879        $binary_ws_rules{'R'}{'--'} = WS_NO;
4880
4881        $binary_ws_rules{'k'}{':'} = WS_NO;     # keep colon with label
4882        $binary_ws_rules{'w'}{':'} = WS_NO;
4883        $binary_ws_rules{'i'}{'Q'} = WS_YES;
4884        $binary_ws_rules{'n'}{'('} = WS_YES;    # occurs in 'use package n ()'
4885
4886        # FIXME: we need to split 'i' into variables and functions
4887        # and have no space for functions but space for variables.  For now,
4888        # I have a special patch in the special rules below
4889        $binary_ws_rules{'i'}{'('} = WS_NO;
4890
4891        $binary_ws_rules{'w'}{'('} = WS_NO;
4892        $binary_ws_rules{'w'}{'{'} = WS_YES;
4893    }
4894    my ( $jmax, $rtokens, $rtoken_type, $rblock_type ) = @_;
4895    my ( $last_token, $last_type, $last_block_type, $token, $type,
4896        $block_type );
4897    my (@white_space_flag);
4898    my $j_tight_closing_paren = -1;
4899
4900    if ( $max_index_to_go >= 0 ) {
4901        $token      = $tokens_to_go[$max_index_to_go];
4902        $type       = $types_to_go[$max_index_to_go];
4903        $block_type = $block_type_to_go[$max_index_to_go];
4904    }
4905    else {
4906        $token      = ' ';
4907        $type       = 'b';
4908        $block_type = '';
4909    }
4910
4911    # loop over all tokens
4912    my ( $j, $ws );
4913
4914    for ( $j = 0 ; $j <= $jmax ; $j++ ) {
4915
4916        if ( $$rtoken_type[$j] eq 'b' ) {
4917            $white_space_flag[$j] = WS_OPTIONAL;
4918            next;
4919        }
4920
4921        # set a default value, to be changed as needed
4922        $ws              = undef;
4923        $last_token      = $token;
4924        $last_type       = $type;
4925        $last_block_type = $block_type;
4926        $token           = $$rtokens[$j];
4927        $type            = $$rtoken_type[$j];
4928        $block_type      = $$rblock_type[$j];
4929
4930        #---------------------------------------------------------------
4931        # section 1:
4932        # handle space on the inside of opening braces
4933        #---------------------------------------------------------------
4934        if ( ( $last_type =~ /^[L\{\(\[]$/ ) ) {
4935
4936            $j_tight_closing_paren = -1;
4937
4938            # let's keep empty matched braces together: () {} []
4939            # except for BLOCKS
4940            if ( $token eq $matching_token{$last_token} ) {
4941                if ($block_type) {
4942                    $ws = WS_YES;
4943                }
4944                else {
4945                    $ws = WS_NO;
4946                }
4947            }
4948            else {
4949
4950                # we're considering the right of an opening brace
4951                # tightness = 0 means always pad inside with space
4952                # tightness = 1 means pad inside if "complex"
4953                # tightness = 2 means never pad inside with space
4954
4955                my $tightness;
4956                if ( $last_type eq '{'
4957                    && $last_token eq '{'
4958                    && $last_block_type )
4959                {
4960                    $tightness = $rOpts->{'block-brace-tightness'};
4961                }
4962                else { $tightness = $tightness{$last_token} }
4963
4964                if ( $tightness <= 0 ) {
4965                    $ws = WS_YES;
4966                }
4967                elsif ( $tightness > 1 ) {
4968                    $ws = WS_NO;
4969                }
4970                else {
4971                    my $j_next =
4972                      ( $$rtoken_type[ $j + 1 ] eq 'b' ) ? $j + 2 : $j + 1;
4973                    my $tok_next  = $$rtokens[$j_next];
4974                    my $type_next = $$rtoken_type[$j_next];
4975
4976                    # for tightness = 1, if there is just one token
4977                    # within the matching pair, we will keep it tight
4978                    if (
4979                        $tok_next eq $matching_token{$last_token}
4980
4981                        # but watch out for this: [ [ ]    (misc.t)
4982                        && $last_token ne $token
4983                      )
4984                    {
4985
4986                        # remember where to put the space for the closing paren
4987                        $j_tight_closing_paren = $j_next;
4988                        $ws                    = WS_NO;
4989                    }
4990                    else {
4991                        $ws = WS_YES;
4992                    }
4993                }
4994            }
4995        }    # done with opening braces and brackets
4996        my $ws_1 = $ws;    # for debugging
4997
4998        #---------------------------------------------------------------
4999        # section 2:
5000        # handle space on inside of closing brace pairs
5001        #---------------------------------------------------------------
5002        if ( $type =~ /[\}\)\]R]/ ) {
5003
5004            if ( $j == $j_tight_closing_paren ) {
5005
5006                $j_tight_closing_paren = -1;
5007                $ws                    = WS_NO;
5008            }
5009            else {
5010
5011                if ( !defined($ws) ) {
5012
5013                    my $tightness;
5014                    if ( $type eq '}' && $token eq '}' && $block_type ) {
5015                        $tightness = $rOpts->{'block-brace-tightness'};
5016                    }
5017                    else { $tightness = $tightness{$token} }
5018
5019                    $ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
5020                }
5021            }
5022        }
5023
5024        my $ws_2 = $ws;    # for debugging
5025
5026        #---------------------------------------------------------------
5027        # section 3:
5028        # use the binary table
5029        #---------------------------------------------------------------
5030        if ( !defined($ws) ) {
5031            $ws = $binary_ws_rules{$last_type}{$type};
5032        }
5033        my $ws_3 = $ws;    # for debugging
5034
5035        #---------------------------------------------------------------
5036        # section 4:
5037        # some special cases
5038        #---------------------------------------------------------------
5039        if ( $token eq '(' ) {
5040
5041            # This will have to be tweaked as tokenization changes.
5042            # We want a space after certain block types:
5043            #     map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
5044            #
5045            # But not others:
5046            #     &{ $_->[1] } ( delete $_[$#_]{ $_->[0] } );
5047            # At present, the & block is not marked as a code block, so
5048            # this works:
5049            if ( $last_type eq '}' ) {
5050                if ( $last_block_type =~ /^(sort|map|grep)$/ ) {
5051                    $ws = WS_YES;
5052                }
5053                else {
5054                    $ws = WS_NO;
5055                }
5056            }
5057
5058            # -----------------------------------------------------
5059            # added 'w' and 'i' checks for TESTING gnu-style update
5060            # something like:
5061            #   myfun(    &myfun(   ->myfun(
5062            # -----------------------------------------------------
5063            if ( ( $last_type =~ /^[wkU]$/ )
5064                || ( $last_type eq 'i' && $last_token =~ /^(\&|->)/ ) )
5065            {
5066
5067                # Do not introduce new space between keyword or function
5068                # and ( except in special cases) because this can
5069                # introduce errors in some cases ( prnterr1.t )
5070                unless ( $space_before_paren{$last_token} ) {
5071                    $ws = WS_NO;
5072                }
5073            }
5074
5075            # space between something like $i and ( in
5076            # for $i ( 0 .. 20 ) {
5077            # FIXME: eventually, type 'i' needs to be split into multiple
5078            # token types so this can be a hardwired rule.
5079            elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
5080                $ws = WS_YES;
5081            }
5082        }
5083
5084        # keep space between 'sub' and '{' for anonymous sub definition
5085        if ( $type eq '{' ) {
5086            if ( $last_token eq 'sub' ) {
5087                $ws = WS_YES;
5088            }
5089
5090            # this is needed to avoid no space in '){'
5091            if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
5092
5093            # avoid any space before the brace or bracket in something like
5094            #  @opts{'a','b',...}
5095            if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
5096                $ws = WS_NO;
5097            }
5098        }
5099
5100        elsif ( $type eq 'i' ) {
5101
5102            # never a space before ->
5103            if ( $token =~ /^\-\>/ ) {
5104                $ws = WS_NO;
5105            }
5106        }
5107
5108        # retain any space between '-' and bare word
5109        elsif ( $type eq 'w' || $type eq 'C' ) {
5110            $ws = WS_OPTIONAL if $last_type eq '-';
5111        }
5112
5113        # always space before side comment
5114        elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
5115
5116        # always preserver whatever space was used after a possible
5117        # filehandle or here doc operator
5118        if ( $type ne '#' && ( $last_type eq 'Z' || $last_type eq 'h' ) ) {
5119            $ws = WS_OPTIONAL;
5120        }
5121
5122        my $ws_4 = $ws;
5123
5124        #---------------------------------------------------------------
5125        # section 5:
5126        # default rules not covered above
5127        #---------------------------------------------------------------
5128        # if we fall through to here,
5129        # look at the pre-defined hash tables for the two tokens, and
5130        # if (they are equal) use the common value
5131        # if (either is zero or undef) use the other
5132        # if (either is -1) use it
5133        # That is,
5134        # left  vs right
5135        # 1    vs    1     --> 1
5136        # 0     vs    0     --> 0
5137        # -1    vs   -1    --> -1
5138        # 0     vs   -1    --> -1
5139        # 0     vs    1     --> 1
5140        # 1     vs    0     --> 1
5141        # -1    vs    0     --> -1
5142        # -1    vs    1     --> -1
5143        # 1     vs   -1    --> -1
5144        if ( !defined($ws) ) {
5145            my $wl = $want_left_space{$type};
5146            my $wr = $want_right_space{$last_type};
5147            if ( !defined($wl) ) { $wl = 0 }
5148            if ( !defined($wr) ) { $wr = 0 }
5149            $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
5150        }
5151
5152        if ( !defined($ws) ) {
5153            $ws = 0;
5154            write_diagnostics(
5155                "WS flag is undefined for tokens $last_token $token\n");
5156        }
5157
5158        if ( ( $ws == 0 )
5159            && $j > 0
5160            && $j < $jmax
5161            && ( $last_type !~ /^[Zh]$/ ) )
5162        {
5163
5164            # If this happens, we have a non-fatal but undesirable
5165            # hole in the above rules which should be patched.
5166            write_diagnostics(
5167                "WS flag is zero for tokens $last_token $token\n");
5168        }
5169        $white_space_flag[$j] = $ws;
5170
5171        FORMATTER_DEBUG_FLAG_WHITE && do {
5172            my $str = substr( $last_token, 0, 15 );
5173            $str .= ' ' x ( 16 - length($str) );
5174            if ( !defined($ws_1) ) { $ws_1 = "*" }
5175            if ( !defined($ws_2) ) { $ws_2 = "*" }
5176            if ( !defined($ws_3) ) { $ws_3 = "*" }
5177            if ( !defined($ws_4) ) { $ws_4 = "*" }
5178            print
5179"WHITE:  i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
5180        };
5181    }
5182    return \@white_space_flag;
5183}
5184
5185{    # begin closure print_line_of_tokens
5186
5187    my $rtoken_type;
5188    my $rtokens;
5189    my $rlevels;
5190    my $rslevels;
5191    my $rblock_type;
5192    my $rcontainer_type;
5193    my $rcontainer_environment;
5194    my $rtype_sequence;
5195    my $input_line;
5196    my $rnesting_tokens;
5197    my $rci_levels;
5198    my $rnesting_blocks;
5199
5200    my $in_quote;
5201    my $python_indentation_level;
5202
5203    # These local token variables are stored by store_token_to_go:
5204    my $block_type;
5205    my $ci_level;
5206    my $container_environment;
5207    my $container_type;
5208    my $in_continued_quote;
5209    my $level;
5210    my $nesting_blocks;
5211    my $no_internal_newlines;
5212    my $slevel;
5213    my $token;
5214    my $type;
5215    my $type_sequence;
5216
5217    # routine to pull the jth token from the line of tokens
5218    sub extract_token {
5219        my $j = shift;
5220        $token                 = $$rtokens[$j];
5221        $type                  = $$rtoken_type[$j];
5222        $block_type            = $$rblock_type[$j];
5223        $container_type        = $$rcontainer_type[$j];
5224        $container_environment = $$rcontainer_environment[$j];
5225        $type_sequence         = $$rtype_sequence[$j];
5226        $level                 = $$rlevels[$j];
5227        $slevel                = $$rslevels[$j];
5228        $nesting_blocks        = $$rnesting_blocks[$j];
5229        $ci_level              = $$rci_levels[$j];
5230    }
5231
5232    # routines to save and restore the current token
5233    {
5234
5235        # Saved values
5236        my $saved_block_type;
5237        my $saved_ci_level;
5238        my $saved_container_environment;
5239        my $saved_container_type;
5240        my $saved_in_continued_quote;
5241        my $saved_level;
5242        my $saved_nesting_blocks;
5243        my $saved_no_internal_newlines;
5244        my $saved_slevel;
5245        my $saved_token;
5246        my $saved_type;
5247        my $saved_type_sequence;
5248
5249        sub save_current_token {
5250            $saved_block_type            = $block_type;
5251            $saved_ci_level              = $ci_level;
5252            $saved_container_environment = $container_environment;
5253            $saved_container_type        = $container_type;
5254            $saved_in_continued_quote    = $in_continued_quote;
5255            $saved_level                 = $level;
5256            $saved_nesting_blocks        = $nesting_blocks;
5257            $saved_no_internal_newlines  = $no_internal_newlines;
5258            $saved_slevel                = $slevel;
5259            $saved_token                 = $token;
5260            $saved_type                  = $type;
5261            $saved_type_sequence         = $type_sequence;
5262        }
5263
5264        sub restore_current_token {
5265            $block_type            = $saved_block_type;
5266            $ci_level              = $saved_ci_level;
5267            $container_environment = $saved_container_environment;
5268            $container_type        = $saved_container_type;
5269            $in_continued_quote    = $saved_in_continued_quote;
5270            $level                 = $saved_level;
5271            $nesting_blocks        = $saved_nesting_blocks;
5272            $no_internal_newlines  = $saved_no_internal_newlines;
5273            $slevel                = $saved_slevel;
5274            $token                 = $saved_token;
5275            $type                  = $saved_type;
5276            $type_sequence         = $saved_type_sequence;
5277        }
5278    }
5279
5280    # Routine to place the current token into the output stream.
5281    # Called once per output token.
5282    sub store_token_to_go {
5283
5284        my $flag = $no_internal_newlines;
5285        if ( $_[0] ) { $flag = 1 }
5286
5287        $tokens_to_go[ ++$max_index_to_go ] = $token;
5288        $types_to_go[$max_index_to_go]                 = $type;
5289        $nobreak_to_go[$max_index_to_go]               = $flag;
5290        $old_breakpoint_to_go[$max_index_to_go]        = 0;
5291        $forced_breakpoint_to_go[$max_index_to_go]     = 0;
5292        $block_type_to_go[$max_index_to_go]            = $block_type;
5293        $type_sequence_to_go[$max_index_to_go]         = $type_sequence;
5294        $container_environment_to_go[$max_index_to_go] = $container_environment;
5295        $nesting_blocks_to_go[$max_index_to_go]        = $nesting_blocks;
5296        $ci_levels_to_go[$max_index_to_go]             = $ci_level;
5297        $mate_index_to_go[$max_index_to_go]            = -1;
5298
5299        if ( $type ne 'b' ) {
5300            $last_last_nonblank_index_to_go = $last_nonblank_index_to_go;
5301            $last_last_nonblank_type_to_go  = $last_nonblank_type_to_go;
5302            $last_last_nonblank_token_to_go = $last_nonblank_token_to_go;
5303            $last_nonblank_index_to_go      = $max_index_to_go;
5304            $last_nonblank_type_to_go       = $type;
5305            $last_nonblank_token_to_go      = $token;
5306        }
5307
5308        $levels_to_go[$max_index_to_go] = $level;
5309        $nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0;
5310        $lengths_to_go[ $max_index_to_go + 1 ] =
5311          $lengths_to_go[$max_index_to_go] + length($token);
5312
5313        # Define the indentation that this token would have if it started
5314        # a new line.  We have to do this now because we need to know this
5315        # when considering one-line blocks.
5316        set_leading_whitespace( $level, $ci_level, $in_continued_quote );
5317
5318        FORMATTER_DEBUG_FLAG_STORE && do {
5319            my ( $a, $b, $c ) = caller();
5320            print
5321"STORE: from $a $c: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n";
5322        };
5323    };
5324
5325    sub insert_new_token_to_go {
5326
5327        # insert a new token into the output stream.  use same level as
5328        # previous token; assumes a character at max_index_to_go.
5329        save_current_token();
5330        ( $token, $type, $slevel, $no_internal_newlines ) = @_;
5331
5332        if ( $max_index_to_go == UNDEFINED_INDEX ) {
5333            warning("code bug: bad call to insert_new_token_to_go\n");
5334        }
5335        $level = $levels_to_go[$max_index_to_go];
5336
5337        # FIXME: it seems to be necessary to use the next, rather than
5338        # previous, value of this variable when creating a new blank (align.t)
5339        #my $slevel         = $nesting_depth_to_go[$max_index_to_go];
5340        $nesting_blocks        = $nesting_blocks_to_go[$max_index_to_go];
5341        $ci_level              = $ci_levels_to_go[$max_index_to_go];
5342        $container_environment = $container_environment_to_go[$max_index_to_go];
5343        $in_continued_quote    = 0;
5344        $block_type            = "";
5345        $type_sequence         = "";
5346        store_token_to_go();
5347        restore_current_token();
5348        return;
5349    }
5350
5351    sub print_line_of_tokens {
5352
5353        my $line_of_tokens = shift;
5354
5355        # this routine is called once per input line to process all of the
5356        # tokens on that line.  Each token is sent to sub store_token_to_go,
5357        # which stores them in an output buffer which is dumped whenever
5358        # appropriate.
5359
5360        # extract input line number for error messages
5361        $input_line_number = $line_of_tokens->{_line_number};
5362
5363        $rtoken_type            = $line_of_tokens->{_rtoken_type};
5364        $rtokens                = $line_of_tokens->{_rtokens};
5365        $rlevels                = $line_of_tokens->{_rlevels};
5366        $rslevels               = $line_of_tokens->{_rslevels};
5367        $rblock_type            = $line_of_tokens->{_rblock_type};
5368        $rcontainer_type        = $line_of_tokens->{_rcontainer_type};
5369        $rcontainer_environment = $line_of_tokens->{_rcontainer_environment};
5370        $rtype_sequence         = $line_of_tokens->{_rtype_sequence};
5371        $input_line             = $line_of_tokens->{_line_text};
5372        $rnesting_tokens        = $line_of_tokens->{_rnesting_tokens};
5373        $rci_levels             = $line_of_tokens->{_rci_levels};
5374        $rnesting_blocks        = $line_of_tokens->{_rnesting_blocks};
5375
5376        $in_continued_quote       = $line_of_tokens->{_starting_in_quote};
5377        $in_quote                 = $line_of_tokens->{_ending_in_quote};
5378        $python_indentation_level =
5379          $line_of_tokens->{_python_indentation_level};
5380
5381        my $j;
5382        my $j_next;
5383        my $jmax;
5384        my $next_nonblank_token;
5385        my $next_nonblank_token_type;
5386        my $rwhite_space_flag;
5387
5388        $jmax                  = @$rtokens - 1;
5389        $block_type            = "";
5390        $container_type        = "";
5391        $container_environment = "";
5392        $type_sequence         = "";
5393        $no_internal_newlines  = 1 - $rOpts->{'add-newlines'};
5394
5395        # Handle a continued quote..
5396        if ($in_continued_quote) {
5397
5398            # A line which is entirely a quote or pattern must go out
5399            # verbatim.  Note: the \n is contained in $input_line.
5400            if ( $jmax <= 0 ) {
5401                if ( ( $input_line =~ "\t" ) ) {
5402                    note_embedded_tab();
5403                }
5404                write_unindented_line("$input_line");
5405                $last_line_had_side_comment = 0;
5406                return;
5407            }
5408
5409            # prior to version 20010406, perltidy had a bug which placed
5410            # continuation indentation before the last line of some multiline
5411            # quotes and patterns -- exactly the lines passing this way.
5412            # To help find affected lines in scripts run with these
5413            # versions, run with '-chk', and it will warn of any quotes or
5414            # patterns which might have been modified by these early
5415            # versions.
5416            if ( $rOpts->{'check-multiline-quotes'} && $input_line =~ /^ / ) {
5417                warning(
5418"-chk: please check this line for extra leading whitespace\n"
5419                );
5420            }
5421        }
5422
5423        # delete trailing blank tokens
5424        if ( $jmax > 0 && $$rtoken_type[$jmax] eq 'b' ) { $jmax-- }
5425
5426        # Handle a blank line..
5427        if ( $jmax < 0 ) {
5428
5429            # For the 'swallow-optional-blank-lines' option, we delete all
5430            # old blank lines and let the blank line rules generate any
5431            # needed blanks.
5432            if ( !$rOpts->{'swallow-optional-blank-lines'} ) {
5433                flush();
5434                $file_writer_object->write_blank_code_line();
5435                $last_line_leading_type = 'b';
5436            }
5437            $last_line_had_side_comment = 0;
5438            return;
5439        }
5440
5441        # see if this is a static block comment (starts with ##)
5442        my $is_static_block_comment                       = 0;
5443        my $is_static_block_comment_without_leading_space = 0;
5444        if ( $jmax == 0
5445            && $$rtoken_type[0] eq '#'
5446            && $rOpts->{'static-block-comments'}
5447            && $input_line =~ /$static_block_comment_pattern/o )
5448        {
5449            $is_static_block_comment                       = 1;
5450            $is_static_block_comment_without_leading_space =
5451              ( length($1) <= 0 );
5452        }
5453
5454        # create a hanging side comment if appropriate
5455        if (
5456            $jmax == 0
5457            && $$rtoken_type[0] eq '#'    # only token is a comment
5458            && $last_line_had_side_comment    # last line had side comment
5459            && $input_line =~ /^\s/           # there is some leading space
5460            && !$is_static_block_comment    # do not make static comment hanging
5461            && $rOpts->{'hanging-side-comments'}    # user is allowing this
5462          )
5463        {
5464
5465            # We will insert an empty qw string at the start of the token list
5466            # to force this comment to be a side comment. The vertical aligner
5467            # should then line it up with the previous side comment.
5468            unshift @$rtoken_type,            'q';
5469            unshift @$rtokens,                '';
5470            unshift @$rlevels,                $$rlevels[0];
5471            unshift @$rslevels,               $$rslevels[0];
5472            unshift @$rblock_type,            '';
5473            unshift @$rcontainer_type,        '';
5474            unshift @$rcontainer_environment, '';
5475            unshift @$rtype_sequence,         '';
5476            unshift @$rnesting_tokens,        $$rnesting_tokens[0];
5477            unshift @$rci_levels,             $$rci_levels[0];
5478            unshift @$rnesting_blocks,        $$rnesting_blocks[0];
5479            $jmax = 1;
5480        }
5481
5482        # remember if this line has a side comment
5483        $last_line_had_side_comment =
5484          ( $jmax > 0 && $$rtoken_type[$jmax] eq '#' );
5485
5486        # Handle a block (full-line) comment..
5487        if ( ( $jmax == 0 ) && ( $$rtoken_type[0] eq '#' ) ) {
5488
5489            if ( $rOpts->{'delete-block-comments'} ) { return }
5490
5491            if ( $rOpts->{'tee-block-comments'} ) {
5492                $file_writer_object->tee_on();
5493            }
5494            flush();
5495
5496            # output a blank line before block comments
5497            if (
5498                $last_line_leading_type !~ /^[#b]$/
5499                && $rOpts->{'blanks-before-comments'}    # only if allowed
5500                && !
5501                $is_static_block_comment    # never before static block comments
5502              )
5503            {
5504                $file_writer_object->write_blank_code_line();
5505                $last_line_leading_type = 'b';
5506            }
5507
5508            if ( $rOpts->{'indent-block-comments'}
5509                && !$is_static_block_comment_without_leading_space )
5510            {
5511
5512                extract_token(0);
5513                store_token_to_go();
5514                flush();
5515            }
5516            else {
5517                $file_writer_object->write_code_line( $$rtokens[0] . "\n" );
5518                $last_line_leading_type = '#';
5519            }
5520            if ( $rOpts->{'tee-block-comments'} ) {
5521                $file_writer_object->tee_off();
5522            }
5523            return;
5524        }
5525
5526        # compare input/output indentation except for continuation lines
5527        # (because they have an unknown amount of initial blank space)
5528        # and lines which are quotes (because they may have been outdented)
5529        # Note: this test is placed here because we know the continuation flag
5530        # at this point, which allows us to avoid non-meaningful checks.
5531        my $structural_indentation_level = $$rlevels[0];
5532        compare_indentation_levels( $python_indentation_level,
5533            $structural_indentation_level )
5534          unless ( ( $$rci_levels[0] > 0 )
5535            || ( ( $python_indentation_level == 0 ) && $$rtoken_type[0] eq 'Q' )
5536          );
5537
5538=pod
5539
5540     Patch needed for MakeMaker.  Do not break a statement
5541     in which $VERSION may be calculated.  See MakeMaker.pm;
5542     this is based on the coding in it.
5543     The first line of a file that matches this will be eval'd:
5544         /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
5545     Examples:
5546       *VERSION = \'1.01';
5547       ( $VERSION ) = '$Revision: 1.1 $ ' =~ /\$Revision:\s+([^\s]+)/;
5548     We will pass such a line straight through (by changing it
5549     to a quoted string) unless -npvl is used
5550
5551     But note: this means that the formatter will not see every token,
5552     which complicates things.  For example, loops which look at
5553     block sequence numbers may see a closing sequence number but not
5554     the corresponding opening sequence number (sidecmt.t).  Example:
5555
5556    my $VERSION = do { my @r = (q$Revision: 1.1 $ =~ /\d+/g); sprintf
5557    "%d."."%02d" x $#r, @r };
5558
5559    Here, the opening brace of 'do {' will not be seen, while the closing
5560    '}' will be seen as an individual token.
5561
5562=cut
5563
5564        my $is_VERSION_statement = 0;
5565
5566        if (
5567            !$saw_VERSION_in_this_file
5568            && $input_line =~ /VERSION/    # quick check to reject most lines
5569            && $input_line =~ /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
5570          )
5571        {
5572            $saw_VERSION_in_this_file = 1;
5573            $is_VERSION_statement     = 1;
5574            write_logfile_entry("passing VERSION line; -npvl deactivates\n");
5575        }
5576
5577        # take care of indentation-only
5578        # also write a line which is entirely a 'qw' list
5579        if ( $is_VERSION_statement
5580            || $rOpts->{'indent-only'}
5581            || ( ( $jmax == 0 ) && ( $$rtoken_type[0] eq 'q' ) ) )
5582        {
5583            flush();
5584            $input_line =~ s/^\s*//;    # trim left end
5585
5586            unless ( $rOpts->{'indent-only'} ) {
5587                $input_line =~ s/\s*$//;    # trim right end
5588            }
5589
5590            extract_token(0);
5591            $token                 = $input_line;
5592            $type                  = 'q';
5593            $block_type            = "";
5594            $container_type        = "";
5595            $container_environment = "";
5596            $type_sequence         = "";
5597            store_token_to_go();
5598            output_line_to_go();
5599            return;
5600        }
5601
5602        push ( @$rtokens,     ' ', ' ' );  # making $j+2 valid simplifies coding
5603        push ( @$rtoken_type, 'b', 'b' );
5604        ($rwhite_space_flag) =
5605          set_white_space_flag( $jmax, $rtokens, $rtoken_type, $rblock_type );
5606
5607        # find input tabbing to allow checks for tabbing disagreement
5608        $input_line_tabbing = "";
5609        if ( $input_line =~ /^(\s*)/ ) { $input_line_tabbing = $1; }
5610
5611        # if the buffer hasn't been flushed, add a leading space if
5612        # necessary to keep essential whitespace. This is really only
5613        # necessary if we are squeezing out all ws.
5614        if ( $max_index_to_go >= 0 ) {
5615
5616            $old_line_count_in_batch++;
5617
5618            if (
5619                is_essential_whitespace(
5620                    $last_last_nonblank_token,
5621                    $last_last_nonblank_type,
5622                    $tokens_to_go[$max_index_to_go],
5623                    $types_to_go[$max_index_to_go],
5624                    $$rtokens[0],
5625                    $$rtoken_type[0]
5626                )
5627              )
5628            {
5629                my $slevel = $$rslevels[0];
5630                insert_new_token_to_go( ' ', 'b', $slevel,
5631                    $no_internal_newlines );
5632            }
5633        }
5634
5635        # If we just saw the end of an elsif block, write nag message
5636        # if we do not see another elseif or an else.
5637        if ($looking_for_else) {
5638
5639            unless ( $$rtokens[0] =~ /^(elsif|else)$/ ) {
5640                write_logfile_entry("(No else block)\n");
5641            }
5642            $looking_for_else = 0;
5643        }
5644
5645        # This is a good place to kill incomplete one-line blocks
5646        if ( ( $semicolons_before_block_self_destruct == 0 )
5647            && ( $max_index_to_go >= 0 )
5648            && ( $types_to_go[$max_index_to_go] eq ';' )
5649            && ( $$rtokens[0] ne '}' ) )
5650        {
5651            destroy_one_line_block();
5652            output_line_to_go();
5653        }
5654
5655        # loop to process the tokens one-by-one
5656        $type  = 'b';
5657        $token = "";
5658
5659        for ( $j = 0 ; $j <= $jmax ; $j++ ) {
5660
5661            # pull out the local values for this token
5662            extract_token($j);
5663
5664            if ( $type eq '#' ) {
5665
5666                # trim trailing whitespace
5667                # (there is no option at present to prevent this)
5668                $token =~ s/\s*$//;
5669
5670                if (
5671                    $rOpts->{'delete-side-comments'}
5672
5673                    # delete closing side comments if necessary
5674                    || ( $rOpts->{'delete-closing-side-comments'}
5675                        && $token =~ /$closing_side_comment_prefix_pattern/o
5676                        && $last_nonblank_block_type =~
5677                        /$closing_side_comment_list_pattern/o )
5678                  )
5679                {
5680                    if ( $types_to_go[$max_index_to_go] eq 'b' ) {
5681                        unstore_token_to_go();
5682                    }
5683                    last;
5684                }
5685            }
5686
5687            # If we are continuing after seeing a right curly brace, flush
5688            # buffer unless we see what we are looking for, as in
5689            #   } else ...
5690            if ( $brace_follower_pattern && $type ne 'b' ) {
5691
5692                unless ( $token =~ /$brace_follower_pattern/ ) {
5693                    output_line_to_go();
5694                }
5695                $brace_follower_pattern = undef;
5696            }
5697
5698            $j_next = ( $$rtoken_type[ $j + 1 ] eq 'b' ) ? $j + 2 : $j + 1;
5699            $next_nonblank_token      = $$rtokens[$j_next];
5700            $next_nonblank_token_type = $$rtoken_type[$j_next];
5701
5702            #--------------------------------------------------------
5703            # Start of patch section
5704            #--------------------------------------------------------
5705
5706            # Modify certain tokens here for whitespace
5707            # The following is not yet done, but could be:
5708            #   sub (x x x)
5709            # These become type 'i', space and all.
5710            if ( $type eq 'i' or $type eq 't' ) {
5711
5712                # change "$  var"  to "$var" etc
5713                if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) {
5714                    $token =~ s/\s*//g;
5715                }
5716
5717                if ( $token =~ /^sub/ ) { $token =~ s/\s+/ /g }
5718            }
5719
5720            # change 'LABEL   :'   to 'LABEL:'
5721            elsif ( $type eq 'J' ) { $token =~ s/\s+//g }
5722
5723            # patch to add space to something like "x10"
5724            # This avoids having to split this token in the pre-tokenizer
5725            elsif ( $type eq 'n' ) {
5726                if ( $token =~ /^x\d+/ ) { $token =~ s/x/x / }
5727            }
5728
5729            elsif ( $type eq 'Q' ) {
5730                note_embedded_tab() if ( $token =~ "\t" );
5731            }
5732
5733            # trim blanks from right of qw quotes
5734            # (To avoid trimming qw quotes use -ntqw; the tokenizer handles this)
5735            elsif ( $type eq 'q' ) {
5736                $token =~ s/\s*$//;
5737                note_embedded_tab() if ( $token =~ "\t" );
5738            }
5739
5740            #--------------------------------------------------------
5741            # End of patch section
5742            #--------------------------------------------------------
5743
5744            # insert any needed whitespace
5745            if ( ( $type ne 'b' )
5746                && ( $max_index_to_go >= 0 )
5747                && ( $types_to_go[$max_index_to_go] ne 'b' )
5748                && $rOpts_add_whitespace )
5749            {
5750                my $ws = $$rwhite_space_flag[$j];
5751
5752                if ( $ws == 1 ) {
5753                    insert_new_token_to_go( ' ', 'b', $slevel,
5754                        $no_internal_newlines );
5755                }
5756            }
5757
5758            # Do not allow breaks which would promote a side comment to a
5759            # block comment.  In order to allow a break before an opening
5760            # or closing BLOCK, followed by a side comment, those sections
5761            # of code will handle this flag separately.
5762            my $side_comment_follows = ( $next_nonblank_token_type eq '#' );
5763            my $is_opening_BLOCK =
5764              ( $type eq '{'
5765                  && $token eq '{'
5766                  && $block_type
5767                  && $block_type ne 't' );
5768            my $is_closing_BLOCK =
5769              ( $type eq '}'
5770                  && $token eq '}'
5771                  && $block_type
5772                  && $block_type ne 't' );
5773
5774            if ( $side_comment_follows
5775                && !$is_opening_BLOCK
5776                && !$is_closing_BLOCK )
5777            {
5778                $no_internal_newlines = 1;
5779            }
5780
5781            # We're only going to handle breaking for code BLOCKS at this
5782            # (top) level.  Other indentation breaks will be handled by
5783            # sub scan_list, which is better suited to dealing with them.
5784            if ($is_opening_BLOCK) {
5785
5786                # Tentatively output this token.  This is required before
5787                # calling starting_one_line_block.  We may have to unstore
5788                # it, though, if we have to break before it.
5789                store_token_to_go($side_comment_follows);
5790
5791                # Look ahead to see if we might form a one-line block
5792                my $too_long =
5793                  starting_one_line_block( $j, $jmax, $level, $slevel,
5794                    $ci_level, $rtokens, $rtoken_type, $rblock_type );
5795                clear_breakpoint_undo_stack();
5796
5797                # to simplify the logic below, set a flag to indicate if
5798                # this opening brace is far from the keyword which introduces it
5799                my $keyword_on_same_line = 1;
5800                if ( ( $max_index_to_go >= 0 )
5801                    && ( $last_nonblank_type eq ')' ) )
5802                {
5803                    if ( $block_type =~ /^(if|else|elsif)$/
5804                        && ( $tokens_to_go[0] eq '}' )
5805                        && $rOpts_cuddled_else )
5806                    {
5807                        $keyword_on_same_line = 1;
5808                    }
5809                    elsif ( ( $slevel < $nesting_depth_to_go[0] ) || $too_long )
5810                    {
5811                        $keyword_on_same_line = 0;
5812                    }
5813                }
5814
5815                # decide if user requested break before '{'
5816                my $want_break =
5817
5818                  # use -bl flag if not a sub block of any type
5819                  $block_type !~ /^sub/
5820                  ? $rOpts->{'opening-brace-on-new-line'}
5821
5822                  # use -sbl flag unless this is an anonymous sub block
5823                  : $block_type !~ /^sub\W*$/
5824                  ? $rOpts->{'opening-sub-brace-on-new-line'}
5825
5826                  # do not break for anonymous subs
5827                  : 0;
5828
5829                # Break before an opening '{' ...
5830                if (
5831
5832                    # if requested
5833                    $want_break
5834
5835                    # and we were unable to start looking for a block,
5836                    # or the -nbob is in use
5837                    && ( $index_start_one_line_block == UNDEFINED_INDEX
5838                        || !$rOpts->{'break-after-opening-brace'} )
5839
5840                    # or if it will not be on same line as its keyword, so that
5841                    # it will be outdented (eval.t, overload.t), and the user
5842                    # has not insisted on keeping it on the right
5843                    || ( !$keyword_on_same_line
5844                        && !$rOpts->{'opening-brace-always-on-right'} )
5845
5846                  )
5847                {
5848
5849                    # but only if allowed
5850                    unless ($no_internal_newlines) {
5851
5852                        # since we already stored this token, we must unstore it
5853                        unstore_token_to_go();
5854
5855                        # then output the line
5856                        output_line_to_go();
5857
5858                        # and now store this token at the start of a new line
5859                        store_token_to_go($side_comment_follows);
5860                    }
5861                }
5862
5863                # Now update for side comment
5864                if ($side_comment_follows) { $no_internal_newlines = 1 }
5865
5866                # now output this line
5867                unless ( !$rOpts->{'break-after-opening-brace'}
5868                    && $block_type =~ /$bli_pattern/o
5869                    && $max_index_to_go == 0 )
5870                {
5871                    unless ($no_internal_newlines) {
5872                        output_line_to_go();
5873                    }
5874                }
5875            }
5876
5877            elsif ($is_closing_BLOCK) {
5878
5879                # If there is a pending one-line block ..
5880                if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
5881
5882                    # we have to terminate it if..
5883                    if (
5884
5885                        # it is too long (final length may be different from
5886                        # initial estimate). note: must allow 1 space for this token
5887                        excess_line_length( $index_start_one_line_block,
5888                            $max_index_to_go ) >= 0
5889
5890                        # or if it has too many semicolons
5891                        || ( $semicolons_before_block_self_destruct == 0
5892                            && $last_nonblank_type ne ';' )
5893                      )
5894                    {
5895                        destroy_one_line_block();
5896                    }
5897                }
5898
5899                # put a break before this closing curly brace if appropriate
5900                unless ( $no_internal_newlines
5901                    || $index_start_one_line_block != UNDEFINED_INDEX )
5902                {
5903
5904                    # add missing semicolon if ...
5905                    # there are some tokens
5906                    if (
5907                        ( $max_index_to_go > 0 )
5908                        &&
5909
5910                        # and we don't have one
5911                        ( $last_nonblank_type ne ';' )
5912
5913                        # patch until some block type issues are fixed:
5914                        # Do not add semi-colon for block types '{', '}', and ';'
5915                        # because we cannot be sure yet that this
5916                        # is a block and not an anonomyous hash
5917                        # (blktype.t, blktype1.t)
5918                        && ( $block_type !~ /^[\{\};]$/ )
5919
5920                        # and we are allowed to do so.
5921                        && $rOpts->{'add-semicolons'}
5922                      )
5923                    {
5924
5925                        save_current_token();
5926                        $token  = ';';
5927                        $type   = ';';
5928                        $level  = $levels_to_go[$max_index_to_go];
5929                        $slevel = $nesting_depth_to_go[$max_index_to_go];
5930                        $nesting_blocks =
5931                          $nesting_blocks_to_go[$max_index_to_go];
5932                        $ci_level       = $ci_levels_to_go[$max_index_to_go];
5933                        $block_type     = "";
5934                        $container_type = "";
5935                        $container_environment = "";
5936                        $type_sequence         = "";
5937
5938                        # Note - we remove any blank AFTER extracting its
5939                        # parameters such as level, etc, above
5940                        if ( $types_to_go[$max_index_to_go] eq 'b' ) {
5941                            unstore_token_to_go();
5942                        }
5943                        store_token_to_go();
5944
5945                        note_added_semicolon();
5946                        restore_current_token();
5947                    }
5948
5949                    # then write out everything before this closing curly brace
5950                    output_line_to_go();
5951                }
5952
5953                # Now update for side comment
5954                if ($side_comment_follows) { $no_internal_newlines = 1 }
5955
5956                # store the closing curly brace
5957                store_token_to_go();
5958
5959                # ok, we just stored a closing curly brace.  Often, but
5960                # not always, we want to end the line immediately.
5961                # So now we have to check for special cases.
5962
5963                # if this '}' successfully ends a one-line block..
5964                my $is_one_line_block = 0;
5965                if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
5966
5967                    $is_one_line_block = 1;
5968
5969                    # we have to actually make it by removing tentative
5970                    # breaks that were set within it
5971                    undo_forced_breakpoint_stack(0);
5972                    set_nobreaks( $index_start_one_line_block,
5973                        $max_index_to_go - 1 );
5974
5975                    # then re-initialize for the next one-line block
5976                    destroy_one_line_block();
5977
5978                    # then decide if we want to break after the '}' ..
5979                    # We will keep going to allow certain brace followers as in:
5980                    #   do { $ifclosed = 1; last } unless $losing;
5981                    #
5982                    # But make a line break if the curly ends a significant block:
5983                    if ( $block_type =~ /^(until|while|for|if|elsif|else)$/ ) {
5984                        output_line_to_go() unless ($no_internal_newlines);
5985                    }
5986                }
5987
5988                # set string indicating what we need to look for brace follower
5989                # tokens
5990                if ( $block_type eq 'do' ) {
5991                    $brace_follower_pattern = $do_follower_pattern;
5992                }
5993                elsif ( $block_type =~ /^(if|elsif|unless)$/ ) {
5994                    $brace_follower_pattern = $if_brace_follower_pattern;
5995                }
5996                elsif ( $block_type eq 'else' ) {
5997                    $brace_follower_pattern = $else_brace_follower_pattern;
5998                }
5999
6000                # added eval for borris.t
6001                elsif ( $block_type =~ /^(sort|map|grep|eval)$/ ) {
6002                    $brace_follower_pattern = "";
6003                }
6004
6005                # anonymous sub
6006                elsif ( $block_type =~ /^sub\W*$/ ) {
6007
6008                    if ($is_one_line_block) {
6009                        $brace_follower_pattern =
6010                          $anon_sub_1_brace_follower_pattern;
6011                    }
6012                    else {
6013                        $brace_follower_pattern =
6014                          $anon_sub_brace_follower_pattern;
6015                    }
6016                }
6017
6018                # None of the above:
6019                # include here everything you would allow to follow a short block
6020                # which is not an if/elsif/else/do/sort/map/grep/eval
6021                # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
6022                # break1.t
6023                else {
6024                    $brace_follower_pattern = $other_brace_follower_pattern;
6025                }
6026
6027                # See if an elsif block is followed by another elsif or else;
6028                # complain if not.
6029                if ( $block_type eq 'elsif' ) {
6030
6031                    if ( $next_nonblank_token_type eq 'b' ) {    # end of line?
6032                        $looking_for_else = 1;    # ok, check on next line
6033                    }
6034                    else {
6035
6036                        unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) {
6037                            write_logfile_entry("No else block :(\n");
6038                        }
6039                    }
6040                }
6041
6042                # Note: continue blocks are always un-cuddled for now, but
6043                # this is the place to allow cuddled continues
6044
6045                # keep going after these block types: map,sort,grep
6046                # added eval for borris.t
6047                if ( $block_type =~ /^(sort|grep|map|eval)$/ ) {
6048
6049                    # keep going
6050                }
6051
6052                # if no more tokens, postpone decision until re-entring
6053                elsif ( ( $next_nonblank_token_type eq 'b' )
6054                    && $rOpts->{'add-newlines'} )
6055                {
6056                    unless ($brace_follower_pattern) {
6057                        output_line_to_go() unless ($no_internal_newlines);
6058                    }
6059                }
6060
6061                elsif ($brace_follower_pattern) {
6062
6063                    unless ( $next_nonblank_token =~ /$brace_follower_pattern/ )
6064                    {
6065                        output_line_to_go() unless ($no_internal_newlines);
6066                    }
6067                    $brace_follower_pattern = undef;
6068                }
6069
6070                else {
6071                    output_line_to_go() unless ($no_internal_newlines);
6072                }
6073
6074            }    # end treatment of closing block token
6075
6076            # handle semicolon
6077            elsif ( $type eq ';' ) {
6078
6079                # kill one-line blocks with too many semicolons
6080                $semicolons_before_block_self_destruct--;
6081                if (
6082                    ( $semicolons_before_block_self_destruct < 0 )
6083                    || ( $semicolons_before_block_self_destruct == 0
6084                        && $next_nonblank_token_type !~ /^[b\}]$/ )
6085                  )
6086                {
6087                    destroy_one_line_block();
6088                }
6089
6090                if (
6091                    ( $last_nonblank_token eq '}' )
6092                    && ( $last_nonblank_block_type =~
6093                        /^(if|else|elsif|unless|while|for|foreach)$/ )
6094                  )
6095                {
6096
6097                    if (
6098                        $rOpts->{'delete-semicolons'}
6099
6100                        # don't delete ; before a # because it would promote it
6101                        # to a block comment
6102                        && ( $next_nonblank_token_type ne '#' )
6103                      )
6104                    {
6105                        note_deleted_semicolon();
6106                        output_line_to_go()
6107                          unless ( $no_internal_newlines
6108                            || $index_start_one_line_block != UNDEFINED_INDEX );
6109                        next;
6110                    }
6111                    else {
6112                        write_logfile_entry("Extra ';'\n");
6113                    }
6114                }
6115                store_token_to_go();
6116
6117                output_line_to_go()
6118                  unless ( $no_internal_newlines
6119                    || ( $next_nonblank_token eq '}' ) );
6120
6121            }
6122
6123            # handle here_doc target string
6124            elsif ( $type eq 'h' ) {
6125                $no_internal_newlines =
6126                  1;    # no newlines after seeing here-target
6127                destroy_one_line_block();
6128                store_token_to_go();
6129            }
6130
6131            # handle all other token types
6132            else {
6133
6134                # if this is a blank...
6135                if ( $type eq 'b' ) {
6136
6137                    # make it just one character
6138                    $token = ' ' if $rOpts_add_whitespace;
6139
6140                    # delete it if unwanted by whitespace rules
6141                    # or we are deleting all whitespace
6142                    my $ws = $$rwhite_space_flag[ $j + 1 ];
6143                    if ( ( defined($ws) && $ws == -1 )
6144                        || $rOpts_delete_old_whitespace )
6145                    {
6146
6147                        # unless it might make a syntax error
6148                        next
6149                          unless is_essential_whitespace(
6150                            $last_last_nonblank_token,
6151                            $last_last_nonblank_type,
6152                            $tokens_to_go[$max_index_to_go],
6153                            $types_to_go[$max_index_to_go],
6154                            $$rtokens[ $j + 1 ],
6155                            $$rtoken_type[ $j + 1 ]
6156                          );
6157                    }
6158                }
6159                store_token_to_go();
6160            }
6161
6162            # remember two previous nonblank OUTPUT tokens
6163            if ( $type ne '#' && $type ne 'b' ) {
6164                $last_last_nonblank_token = $last_nonblank_token;
6165                $last_last_nonblank_type  = $last_nonblank_type;
6166                $last_nonblank_token      = $token;
6167                $last_nonblank_type       = $type;
6168                $last_nonblank_block_type = $block_type;
6169            }
6170
6171            # unset the continued-quote flag since it only applies to the
6172            # first token, and we want to resume normal formatting if
6173            # there are additional tokens on the line
6174            $in_continued_quote = 0;
6175
6176        }    # end of loop over all tokens in this 'line_of_tokens'
6177
6178        # we have to flush ..
6179        if (
6180
6181            # if there is a side comment
6182            ( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} )
6183
6184            # if this line which ends in a quote
6185            || $in_quote
6186
6187            # to keep a label on one line if that is how it is now
6188            || ( ( $type eq 'J' ) && ( $max_index_to_go == 0 ) )
6189
6190            # if we are instructed to keep all old line breaks
6191            || !$rOpts->{'delete-old-newlines'}
6192          )
6193        {
6194            destroy_one_line_block();
6195            output_line_to_go();
6196        }
6197
6198        # mark old line breakpoints in current output stream
6199        if ( $max_index_to_go >= 0 && !$rOpts->{'ignore-old-line-breaks'} ) {
6200            $old_breakpoint_to_go[$max_index_to_go] = 1;
6201        }
6202    }
6203}    # end closure print_line_of_tokens
6204
6205sub note_added_semicolon {
6206    $last_added_semicolon_at = $input_line_number;
6207    if ( $added_semicolon_count == 0 ) {
6208        $first_added_semicolon_at = $last_added_semicolon_at;
6209    }
6210    $added_semicolon_count++;
6211    write_logfile_entry("Added ';' here\n");
6212}
6213
6214sub note_deleted_semicolon {
6215    $last_deleted_semicolon_at = $input_line_number;
6216    if ( $deleted_semicolon_count == 0 ) {
6217        $first_deleted_semicolon_at = $last_deleted_semicolon_at;
6218    }
6219    $deleted_semicolon_count++;
6220    write_logfile_entry("Deleted unnecessary ';'\n");    # i hope ;)
6221}
6222
6223sub note_embedded_tab {
6224    $embedded_tab_count++;
6225    $last_embedded_tab_at = $input_line_number;
6226    if ( !$first_embedded_tab_at ) {
6227        $first_embedded_tab_at = $last_embedded_tab_at;
6228    }
6229
6230    if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) {
6231        write_logfile_entry("Embedded tabs in quote or pattern\n");
6232    }
6233}
6234
6235sub starting_one_line_block {
6236
6237    # after seeing an opening curly brace, look for the closing brace
6238    # and see if the entire block will fit on a line.  This routine is
6239    # not always right because it uses the old whitespace, so a check
6240    # is made later (at the closing brace) to make sure we really
6241    # have a one-line block.  We have to do this preliminary check,
6242    # though, because otherwise we would always break at a semicolon
6243    # within a one-line block if the block contains multiple statements.
6244
6245    my ( $j, $jmax, $level, $slevel, $ci_level, $rtokens, $rtoken_type,
6246        $rblock_type )
6247      = @_;
6248
6249    # kill any current block - we can only go 1 deep
6250    destroy_one_line_block();
6251
6252    # return value:
6253    #  1=distance from start of block to opening brace exceeds line length
6254    #  0=otherwise
6255
6256    my $i_start = 0;
6257
6258    # shouldn't happen: there must have been a prior call to
6259    # store_token_to_go to put the opening brace in the output stream
6260    if ( $max_index_to_go < 0 ) {
6261        warning("program bug: store_token_to_go called incorrectly\n");
6262        report_definite_bug();
6263    }
6264    else {
6265
6266        # cannot use one-line blocks with cuddled else else/elsif lines
6267        if ( ( $tokens_to_go[0] eq '}' ) && $rOpts_cuddled_else ) {
6268            return 0;
6269        }
6270    }
6271
6272    my $block_type = $$rblock_type[$j];
6273
6274    # find the starting keyword for this block (such as 'if', 'else', ...)
6275
6276    if ( $block_type =~ /^[\{\}\;\:]$/ ) {
6277        $i_start = $max_index_to_go;
6278    }
6279
6280    elsif ( $last_last_nonblank_token_to_go eq ')' ) {
6281
6282        # For something like "if (xxx) {", the keyword "if" will be
6283        # just after the most recent break. This will be 0 unless
6284        # we have just killed a one-line block and are starting another.
6285        # (doif.t)
6286        $i_start = $index_max_forced_break + 1;
6287        if ( $types_to_go[$i_start] eq 'b' ) {
6288            $i_start++;
6289        }
6290
6291        unless ( $tokens_to_go[$i_start] eq $block_type ) {
6292            return 0;
6293        }
6294    }
6295
6296    # the previous nonblank token should start these block types
6297    elsif ( ( $last_last_nonblank_token_to_go eq $block_type )
6298        || ( $block_type =~ /^sub/
6299            && $last_last_nonblank_token_to_go =~ /^sub/ ) )
6300    {
6301        $i_start = $last_last_nonblank_index_to_go;
6302    }
6303
6304    else {
6305        return 1;
6306    }
6307
6308    my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
6309
6310    my $i;
6311
6312    # see if length is too long to even start
6313    if ( $pos > $rOpts_maximum_line_length ) {
6314        return 1;
6315    }
6316
6317    for ( $i = $j + 1 ; $i <= $jmax ; $i++ ) {
6318
6319        # old whitespace could be arbitrarily large, so don't use it
6320        if ( $$rtoken_type[$i] eq 'b' ) { $pos += 1 }
6321        else { $pos += length( $$rtokens[$i] ) }
6322
6323        # Return false result if we exceed the maximum line length,
6324        if ( $pos > $rOpts_maximum_line_length ) {
6325            return 0;
6326        }
6327
6328        # or encounter another opening brace before finding the closing brace.
6329        elsif ( $$rtokens[$i] eq '{'
6330            && $$rtoken_type[$i] eq '{'
6331            && $$rblock_type[$i] )
6332        {
6333            return 0;
6334        }
6335
6336        # if we find our closing brace..
6337        elsif ( $$rtokens[$i] eq '}'
6338            && $$rtoken_type[$i] eq '}'
6339            && $$rblock_type[$i] )
6340        {
6341
6342            # be sure any trailing comment also fits on the line
6343            my $i_nonblank =
6344              ( $$rtoken_type[ $i + 1 ] eq 'b' ) ? $i + 2 : $i + 1;
6345
6346            if ( $$rtoken_type[$i_nonblank] eq '#' ) {
6347                $pos += length( $$rtokens[$i_nonblank] );
6348
6349                if ( $i_nonblank > $i + 1 ) {
6350                    $pos += length( $$rtokens[ $i + 1 ] );
6351                }
6352
6353                if ( $pos > $rOpts_maximum_line_length ) {
6354                    return 0;
6355                }
6356            }
6357
6358            # ok, it's a one-line block
6359            create_one_line_block( $i_start, 20 );
6360            return 0;
6361        }
6362
6363        # just keep going for other characters
6364        else {
6365        }
6366    }
6367
6368    # Allow certain types of new one-line blocks to form by joining
6369    # input lines.  These can be safely done, but for other block types,
6370    # we keep old one-line blocks but do not form new ones. It is not
6371    # always a good idea to make as many one-line blocks as possible,
6372    # so other types are not done.  The user can always use -mangle.
6373    if ( $block_type =~ /^(eval|map|grep|sort)/ ) {
6374        create_one_line_block( $i_start, 1 );
6375    }
6376
6377    return 0;
6378}
6379
6380sub unstore_token_to_go {
6381
6382    # remove most recent token from output stream
6383    if ( $max_index_to_go > 0 ) {
6384        $max_index_to_go--;
6385    }
6386    else {
6387        $max_index_to_go = UNDEFINED_INDEX;
6388    }
6389
6390}
6391
6392sub want_blank_line {
6393    flush();
6394    $file_writer_object->want_blank_line();
6395}
6396
6397sub write_unindented_line {
6398    flush();
6399    $file_writer_object->write_line( $_[0] );
6400}
6401
6402sub correct_lp_indentation {
6403
6404    # When the -lp option is used, we need to make a last pass through
6405    # each line to correct the indentation positions in case they differ
6406    # from the predictions.  This is necessary because perltidy uses a
6407    # predictor/corrector method for aligning with opening parens.  The
6408    # predictor is usually good, but sometimes stumbles.  The corrector
6409    # tries to patch things up once the actual opening paren locations
6410    # are known.
6411    my ( $ri_first, $ri_last ) = @_;
6412    my $do_not_pad = 0;
6413
6414=pod
6415
6416    Note on flag '$do_not_pad':
6417    We want to avoid a situation like this, where the aligner inserts
6418    whitespace before the '=' to align it with a previous '=', because
6419    otherwise the parens might become mis-aligned in a situation like
6420    this, where the '=' has become aligned with the previous line,
6421    pushing the opening '(' forward beyond where we want it.
6422
6423    $mkFloor::currentRoom = '';
6424    $mkFloor::c_entry     = $c->Entry(
6425                                   -width        => '10',
6426                                   -relief       => 'sunken',
6427                                   ...
6428                                   );
6429
6430    We leave it to the aligner to decide how to do this.
6431
6432=cut
6433
6434    # looking at each line of this batch..
6435    my $max_line = @$ri_first - 1;
6436    my ( $ibeg, $iend );
6437    for my $line ( 0 .. $max_line ) {
6438        $ibeg = $$ri_first[$line];
6439        $iend = $$ri_last[$line];
6440
6441        # looking at each token in this output line..
6442        my $i;
6443        foreach $i ( $ibeg .. $iend ) {
6444
6445            # looking for next unvisited indentation item
6446            my $indentation = $leading_spaces_to_go[$i];
6447            if ( !$indentation->get_MARKED() ) {
6448                $indentation->set_MARKED(1);
6449
6450                # looking for indentation item for which we are aligning
6451                # with parens, braces, and brackets
6452                next unless ( $indentation->get_ALIGN_PAREN() );
6453
6454                if ( $line == 1 && $i == $ibeg ) {
6455                    $do_not_pad = 1;
6456                }
6457
6458                # Ok, let's see what the error is and try to fix it
6459                my $actual_pos;
6460                my $predicted_pos = $indentation->get_SPACES();
6461                if ( $i > $ibeg ) {
6462
6463                    # token is mid-line - use length to previous token
6464                    $actual_pos = total_line_length( $ibeg, $i - 1 );
6465                }
6466                elsif ( $line > 0 ) {
6467
6468                    # handle case where token starts a new line;
6469                    # use length of previous line
6470                    my $ibegm = $$ri_first[ $line - 1 ];
6471                    my $iendm = $$ri_last[ $line - 1 ];
6472                    $actual_pos = total_line_length( $ibegm, $iendm );
6473
6474                    # follow -pt style
6475                    ++$actual_pos if $types_to_go[ $iendm + 1 ] eq 'b';
6476                }
6477                else {
6478
6479                    # token is first character of first line of batch
6480                    $actual_pos = $predicted_pos;
6481                }
6482
6483                my $move_right = $actual_pos - $predicted_pos;
6484
6485                # done if no error to correct (gnu2.t)
6486                # next unless ($move_right);
6487                if ( $move_right == 0 ) {
6488                    $indentation->set_RECOVERABLE_SPACES($move_right);
6489                    next;
6490                }
6491
6492                # if we have not seen closure for this indentation in
6493                # this batch, we can only pass on a request to the
6494                # vertical aligner
6495                my $closing_index = $indentation->get_CLOSED();
6496
6497                if ( $closing_index < 0 ) {
6498                    $indentation->set_RECOVERABLE_SPACES($move_right);
6499                    next;
6500                }
6501
6502                # If necessary, look ahead to see if there is really any
6503                # leading whitespace dependent on this whitespace, and
6504                # also find the longest line using this whitespace.
6505                # Since it is always safe to move left if there are no
6506                # dependents, we only need to do this if we may have
6507                # dependent nodes or need to move right.
6508
6509                my $right_margin = 0;
6510                my $have_child   = $indentation->get_HAVE_CHILD();
6511                if ( $have_child || $move_right > 0 ) {
6512                    $have_child = 0;
6513                    my $max_length = 0;
6514                    if ( $i == $ibeg ) {
6515                        $max_length = total_line_length( $ibeg, $iend );
6516                    }
6517
6518                    # look ahead at the rest of the lines of this batch..
6519                    my $line_t;
6520                    foreach $line_t ( $line + 1 .. $max_line ) {
6521                        my $ibeg_t = $$ri_first[$line_t];
6522                        my $iend_t = $$ri_last[$line_t];
6523                        last if ( $closing_index <= $ibeg_t );
6524
6525                        # If this is a dependent we will not move the text;
6526                        # it is very rare to get here and if we do it implies
6527                        # a short complicated statement that will probably
6528                        # look ok unchanged.
6529                        my $indentation_t = $leading_spaces_to_go[$ibeg_t];
6530                        if ( $indentation_t != $indentation ) {
6531                            $have_child = 1;
6532                            last;
6533                        }
6534                        my $length_t = total_line_length( $ibeg_t, $iend_t );
6535
6536                        if ( $length_t > $max_length ) {
6537                            $max_length = $length_t;
6538                        }
6539                    }
6540                    $right_margin = $rOpts_maximum_line_length - $max_length;
6541                    if ( $right_margin < 0 ) { $right_margin = 0 }
6542                }
6543
6544                # This is a simple approximate test for vertical alignment:
6545                # if we broke just after an opening paren, brace, bracket,
6546                # then we are probably vertically aligned.  We could
6547                # set an exact flag in sub scan_list, but this is good
6548                # enough.
6549                my $is_vertically_aligned = ( $i == $ibeg );
6550
6551                # Make the move if possible ..
6552                # Must not be any dependent indentation..
6553                if (
6554                    !$have_child
6555
6556                    # we can always move left, but we can only move right if
6557                    # we are sure it will not spoil vertical alignment,
6558                    # which is implied by ending commas
6559                    && ( $move_right < 0 || !$is_vertically_aligned )
6560                  )
6561                {
6562                    my $move =
6563                      ( $move_right <= $right_margin )
6564                      ? $move_right
6565                      : $right_margin;
6566                    $indentation->permanently_decrease_AVAILABLE_SPACES(
6567                        -$move );
6568                }
6569
6570                # Otherwise, record what we want and the vertical aligner
6571                # will try to recover it.
6572                else {
6573                    $indentation->set_RECOVERABLE_SPACES($move_right);
6574                }
6575            }
6576        }
6577    }
6578    return $do_not_pad;
6579}
6580
6581# flush is called to output any tokens in the pipeline, so that
6582# an alternate source of lines can be written in the correct order
6583
6584sub flush {
6585    destroy_one_line_block();
6586    output_line_to_go();
6587    PerlTidy::VerticalAligner::flush();
6588}
6589
6590# output_line_to_go sends one logical line of tokens on down the
6591# pipeline to the VerticalAligner package, breaking the line into continuation
6592# lines as necessary.  The line of tokens is ready to go in the "to_go"
6593# arrays.
6594
6595sub output_line_to_go {
6596
6597    # debug stuff; this routine can be called from many points
6598    FORMATTER_DEBUG_FLAG_OUTPUT && do {
6599        my ( $a, $b, $c ) = caller;
6600        write_diagnostics(
6601"OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n"
6602        );
6603        my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];
6604        write_diagnostics("$output_str\n");
6605    };
6606
6607    # just set a tentative breakpoint if we might be in a one-line block
6608    if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
6609        set_forced_breakpoint($max_index_to_go);
6610        return;
6611    }
6612
6613    my $cscw_block_comment;
6614    $cscw_block_comment = add_closing_side_comment()
6615      if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 );
6616
6617    match_opening_and_closing_tokens();
6618
6619    # tell the -lp option we are outputting a batch so it can close
6620    # any unfinished items in its stack
6621    finish_lp_batch();
6622
6623    my $imin = 0;
6624    my $imax = $max_index_to_go;
6625
6626    # trim any blank tokens
6627    if ( $max_index_to_go >= 0 ) {
6628        if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
6629        if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
6630    }
6631
6632    # anything left to write?
6633    if ( $imin <= $imax ) {
6634
6635        # add a blank line before certain key types
6636        if ( $last_line_leading_type !~ /^[#b]/ ) {
6637            my $want_blank    = 0;
6638            my $leading_token = $tokens_to_go[$imin];
6639            my $leading_type  = $types_to_go[$imin];
6640
6641            # blank lines before subs except declarations and one-liners
6642            # MCONVERSION LOCATION - for sub tokenization change
6643            if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) {
6644                $want_blank = ( $rOpts->{'blanks-before-subs'} )
6645                  && (
6646                    terminal_type( \@types_to_go, \@block_type_to_go, $imin,
6647                        $imax ) !~ /^[\;\}]$/
6648                  );
6649            }
6650
6651            # break before all package declarations
6652            # MCONVERSION LOCATION - for tokenizaton change
6653            elsif ( $leading_token =~ /^(package\s)/ && $leading_type eq 'i' ) {
6654                $want_blank = ( $rOpts->{'blanks-before-subs'} );
6655            }
6656
6657            # break before certain key blocks except one-liners
6658            if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) {
6659                $want_blank = ( $rOpts->{'blanks-before-subs'} )
6660                  && (
6661                    terminal_type( \@types_to_go, \@block_type_to_go, $imin,
6662                        $imax ) ne '}'
6663                  );
6664            }
6665
6666            # Break before certain block types if we haven't had a break at this
6667            # level for a while.  This is the difficult decision..
6668            elsif ( $leading_token =~ /^(unless|if|while|until|for|foreach)$/
6669                && $leading_type eq 'k'
6670                && $last_line_leading_level >= 0 )
6671            {
6672                my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
6673                if ( !defined($lc) ) { $lc = 0 }
6674
6675                $want_blank =
6676                  $rOpts->{'blanks-before-blocks'}
6677                  && $lc >= $rOpts->{'long-block-line-count'}
6678                  && $file_writer_object->get_consecutive_nonblank_lines() >=
6679                  $rOpts->{'long-block-line-count'}
6680                  && (
6681                    terminal_type( \@types_to_go, \@block_type_to_go, $imin,
6682                        $imax ) ne '}'
6683                  );
6684            }
6685
6686            if ($want_blank) {
6687
6688                # future: send blank line down normal path to VerticalAligner
6689                PerlTidy::VerticalAligner::flush();
6690                $file_writer_object->write_blank_code_line();
6691            }
6692        }
6693
6694        # update blank line variables and count number of consecutive
6695        # non-blank, non-comment lines at this level
6696        $last_last_line_leading_level = $last_line_leading_level;
6697        $last_line_leading_level      = $levels_to_go[$imin];
6698        $last_line_leading_type       = $types_to_go[$imin];
6699        if ( $last_line_leading_level == $last_last_line_leading_level
6700            && $last_line_leading_level >= 0
6701            && $last_line_leading_type ne 'b'
6702            && $last_line_leading_type ne '#'
6703            && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) )
6704        {
6705            $nonblank_lines_at_depth[$last_line_leading_level]++;
6706        }
6707        else {
6708            $nonblank_lines_at_depth[$last_line_leading_level] = 1;
6709        }
6710
6711        FORMATTER_DEBUG_FLAG_FLUSH && do {
6712            my ( $package, $file, $line ) = caller;
6713            print
6714"FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n";
6715        };
6716
6717        # add a couple of extra terminal blank tokens
6718        pad_array_to_go();
6719
6720        # set all forced breakpoints for good list formatting
6721        my $saw_good_break = 0;
6722        my $is_long_line   = excess_line_length( $imin, $max_index_to_go ) > 0;
6723        if (
6724            $max_index_to_go > 0
6725            && ( $is_long_line
6726                || $old_line_count_in_batch > 1
6727                || is_unbalanced_batch() )
6728          )
6729        {
6730            $saw_good_break = scan_list();
6731        }
6732
6733        # let $ri_first and $ri_last be references to lists of
6734        # first and last tokens of line fragments to output..
6735        my ( $ri_first, $ri_last );
6736
6737        # write a single line if..
6738        if (
6739
6740            # we aren't allowed to add any newlines
6741            !$rOpts->{'add-newlines'}
6742
6743            # or, we don't already have an interior breakpoint
6744            # and we didn't see a good breakpoint
6745            || (
6746                !$forced_breakpoint_count
6747                && !$saw_good_break
6748
6749                # and this line is 'short'
6750                && !$is_long_line
6751            )
6752          )
6753        {
6754            @$ri_first = ($imin);
6755            @$ri_last  = ($imax);
6756        }
6757
6758        # otherwise use multiple lines
6759        else {
6760
6761            ( $ri_first, $ri_last ) = set_continuation_breaks($saw_good_break);
6762
6763            # now we do a correction step to clean this up a bit
6764            # (The only time we would not do this is for debugging)
6765            if ( $rOpts->{'recombine'} ) {
6766                ( $ri_first, $ri_last ) =
6767                  recombine_breakpoints( $ri_first, $ri_last );
6768            }
6769        }
6770
6771        # do corrector step if -lp option is used
6772        my $do_not_pad = 0;
6773        if ($rOpts_line_up_parentheses) {
6774            $do_not_pad = correct_lp_indentation( $ri_first, $ri_last );
6775        }
6776        send_lines_to_vertical_aligner( $ri_first, $ri_last, $do_not_pad );
6777    }
6778    prepare_for_new_input_lines();
6779
6780    # output any new -cscw block comment
6781    if ($cscw_block_comment) {
6782        flush();
6783        $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
6784    }
6785}
6786
6787sub reset_block_text_accumulator {
6788    $accumulating_text_for_block        = "";
6789    $leading_block_text                 = "";
6790    $leading_block_text_level           = 0;
6791    $leading_block_text_length_exceeded = 0;
6792    $leading_block_text_line_number     = 0;
6793}
6794
6795sub set_block_text_accumulator {
6796    my $i = shift;
6797    $accumulating_text_for_block    = $tokens_to_go[$i];
6798    $leading_block_text             = "";
6799    $leading_block_text_level       = $levels_to_go[$i];
6800    $leading_block_text_line_number =
6801      $vertical_aligner_object->get_output_line_number();
6802    $leading_block_text_length_exceeded = 0;
6803}
6804
6805sub accumulate_block_text {
6806    my $i = shift;
6807
6808    # accumulate leading text, but ignore any side comments
6809    if ( $accumulating_text_for_block
6810        && !$leading_block_text_length_exceeded
6811        && $types_to_go[$i] ne '#' )
6812    {
6813
6814        # do not add more characters than allowed
6815        if (
6816            length($leading_block_text) <
6817            $rOpts->{'closing-side-comment-maximum-text'} )
6818        {
6819
6820            # add an extra space at each newline
6821            if ( $i == 0 ) {
6822                $leading_block_text .= ' ';
6823            }
6824
6825            # add the token text
6826            $leading_block_text .= $tokens_to_go[$i];
6827        }
6828
6829        # show that text was truncated if necessary
6830        elsif ( $types_to_go[$i] ne 'b' ) {
6831            $leading_block_text_length_exceeded = 1;
6832            $leading_block_text .= '...';
6833        }
6834    }
6835}
6836
6837sub accumulate_csc_text {
6838
6839    # called once per output buffer when -csc is used. Accumulates
6840    # the text placed after certain closing block braces.
6841    # Defines and returns the following for this buffer:
6842
6843    my $block_leading_text   = "";    # the leading text of the last '}'
6844    my $i_block_leading_text = -1;    # index of token owning block_leading_text
6845    my $block_line_count     = 100;   # how many lines the block spans
6846    my $terminal_type        = 'b';   # type of last nonblank token
6847    my $i_terminal           = 0;     # index of last nonblank token
6848
6849    for my $i ( 0 .. $max_index_to_go ) {
6850        my $type       = $types_to_go[$i];
6851        my $block_type = $block_type_to_go[$i];
6852        my $token      = $tokens_to_go[$i];
6853
6854        # remember last nonblank token type
6855        if ( $type ne '#' && $type ne 'b' ) {
6856            $terminal_type = $type;
6857            $i_terminal    = $i;
6858        }
6859
6860        my $type_sequence = $type_sequence_to_go[$i];
6861        if ( $block_type && $type_sequence ) {
6862
6863            if ( $token eq '}' ) {
6864
6865                if ( defined( $block_leading_text{$type_sequence} ) ) {
6866                    $block_leading_text   = $block_leading_text{$type_sequence};
6867                    $i_block_leading_text = $i;
6868                    delete $block_leading_text{$type_sequence};
6869                }
6870
6871                # if we run into a '}' then we probably started accumulating
6872                # at something like a trailing 'if' clause..no harm done.
6873                if ( $accumulating_text_for_block
6874                    && $levels_to_go[$i] <= $leading_block_text_level )
6875                {
6876                    my $lev = $levels_to_go[$i];
6877                    reset_block_text_accumulator();
6878                }
6879
6880                if ( defined( $block_opening_line_number{$type_sequence} ) ) {
6881                    my $output_line_number =
6882                      $vertical_aligner_object->get_output_line_number();
6883                    $block_line_count = $output_line_number -
6884                      $block_opening_line_number{$type_sequence} + 1;
6885                    delete $block_opening_line_number{$type_sequence};
6886                }
6887                else {
6888
6889                    # This can happen in the unusual case where a $VERSION line
6890                    # has been quoted to keep MakeMaker happy.  It is not a
6891                    # significant problem.
6892                    # warning(
6893                    #    "Note: block opening line undefined for this line\n");
6894                }
6895            }
6896
6897            elsif ( $token eq '{' ) {
6898
6899                my $line_number =
6900                  $vertical_aligner_object->get_output_line_number();
6901                $block_opening_line_number{$type_sequence} = $line_number;
6902
6903                if ( $accumulating_text_for_block
6904                    && $levels_to_go[$i] == $leading_block_text_level )
6905                {
6906                    if ( $accumulating_text_for_block eq $block_type ) {
6907                        $block_leading_text{$type_sequence} =
6908                          $leading_block_text;
6909                        $block_opening_line_number{$type_sequence} =
6910                          $leading_block_text_line_number;
6911                        reset_block_text_accumulator();
6912                    }
6913                    else {
6914
6915                        # shouldn't happen, but not a serious error.
6916                        # We were accumulating -csc text for block type
6917                        # $accumulating_text_for_block and unexpectedly
6918                        # encountered a '{' for block type $block_type.
6919                    }
6920                }
6921
6922            }
6923        }
6924
6925        if ( $type eq 'k'
6926            && $csc_new_statement_ok
6927            && $token =~ /^(if|elsif|unless|while|until|for|foreach)$/
6928            && $token =~ /$closing_side_comment_list_pattern/o )
6929        {
6930            set_block_text_accumulator($i);
6931        }
6932        else {
6933
6934            # note: ignoring type 'q' because of tricks being played with 'q'
6935            # for hanging side comments and $VERSION
6936            if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {
6937                $csc_new_statement_ok =
6938                  ( $block_type || $type eq 'J' || $type eq ';' );
6939            }
6940            if ( $type eq ';'
6941                && $accumulating_text_for_block
6942                && $levels_to_go[$i] == $leading_block_text_level )
6943            {
6944                reset_block_text_accumulator();
6945            }
6946            else {
6947                accumulate_block_text($i);
6948            }
6949        }
6950    }
6951    return ( $terminal_type, $i_terminal, $i_block_leading_text,
6952        $block_leading_text, $block_line_count );
6953}
6954
6955sub add_closing_side_comment {
6956
6957    # add closing side comments after closing block braces if -csc used
6958    my $cscw_block_comment;
6959
6960    #---------------------------------------------------------------
6961    # Step 1: loop through all tokens of this line to accumulate
6962    # the text needed to create the closing side comments. Also see
6963    # how the line ends.
6964    #---------------------------------------------------------------
6965
6966    my ( $terminal_type, $i_terminal, $i_block_leading_text,
6967        $block_leading_text, $block_line_count )
6968      = accumulate_csc_text();
6969
6970    #---------------------------------------------------------------
6971    # Step 2: make the closing side comment if this ends a block
6972    #---------------------------------------------------------------
6973
6974    # if this line might end in a block closure..
6975    if (
6976        $terminal_type eq '}'
6977
6978        # .. and the block is not too short
6979        && ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )
6980
6981        # .. and if this is one of the types of interest
6982        && $block_type_to_go[$i_terminal] =~
6983        /$closing_side_comment_list_pattern/o
6984
6985        # ..and the corresponding opening brace must is not in this batch
6986        # (because we do not need to tag one-line blocks, although this
6987        # should also be caught with a positive -csci value)
6988        && $mate_index_to_go[$i_terminal] < 0
6989
6990        # ..and either
6991        && (
6992
6993            # this is the last token (line doesnt have a side comment)
6994            $i_terminal eq $max_index_to_go
6995
6996            # or the old side comment is a closing side comment
6997            || $tokens_to_go[$max_index_to_go] =~
6998            /$closing_side_comment_prefix_pattern/o
6999        )
7000      )
7001    {
7002
7003        # then make the closing side comment text
7004        my $token =
7005"$rOpts->{'closing-side-comment-prefix'} $block_type_to_go[$i_terminal]";
7006
7007        # append any extra descriptive text collected above
7008        if ( $i_block_leading_text == $i_terminal ) {
7009            $token .= $block_leading_text;
7010        }
7011        $token =~ s/\s*$//;    # trim any trailing whitespace
7012
7013        # handle case of existing closing side comment
7014        if ( $i_terminal != $max_index_to_go ) {
7015
7016            # warn if requested and tokens differ significantly
7017            if ( $rOpts->{'closing-side-comment-warnings'} ) {
7018                my $old_csc = $tokens_to_go[$max_index_to_go];
7019                my $new_csc = $token;
7020                $new_csc =~ s/\.\.\.\s*$//;    # trim trailing '...'
7021                $old_csc =~ s/\.\.\.\s*$//;
7022                $new_csc =~ s/\s+//g;          # trim all whitespace
7023                $old_csc =~ s/\s+//g;
7024
7025                # no problem if old comment is contained in new comment
7026                if ( length($new_csc) > length($old_csc) ) {
7027                    $new_csc = substr( $new_csc, 0, length($old_csc) );
7028                }
7029
7030                # any remaining difference?
7031                if ( $new_csc ne $old_csc ) {
7032                    warning(
7033"perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n"
7034                    );
7035
7036                    # save the old side comment in a new trailing block comment
7037                    my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
7038                    $year  += 1900;
7039                    $month += 1;
7040                    $cscw_block_comment =
7041"## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]";
7042                }
7043            }
7044
7045            # switch to the new csc
7046            $tokens_to_go[$max_index_to_go] = $token;
7047        }
7048
7049        # handle case of NO existing closing side comment
7050        else {
7051
7052            # insert the new side comment into the output token stream
7053            my $type                  = '#';
7054            my $block_type            = '';
7055            my $type_sequence         = '';
7056            my $container_environment =
7057              $container_environment_to_go[$max_index_to_go];
7058            my $level                = $levels_to_go[$max_index_to_go];
7059            my $slevel               = $nesting_depth_to_go[$max_index_to_go];
7060            my $no_internal_newlines = 0;
7061
7062            my $nesting_blocks     = $nesting_blocks_to_go[$max_index_to_go];
7063            my $ci_level           = $ci_levels_to_go[$max_index_to_go];
7064            my $in_continued_quote = 0;
7065
7066            # first insert a blank token
7067            insert_new_token_to_go( ' ', 'b', $slevel, $no_internal_newlines );
7068
7069            # then the side comment
7070            insert_new_token_to_go( $token, $type, $slevel,
7071                $no_internal_newlines );
7072        }
7073    }
7074    return $cscw_block_comment;
7075}
7076
7077sub send_lines_to_vertical_aligner {
7078
7079    my ( $ri_first, $ri_last, $do_not_pad ) = @_;
7080
7081    my $rindentation_list = [0];    # ref to indentations for each line
7082
7083    set_vertical_alignment_markers( $ri_first, $ri_last );
7084
7085    # flush if necessary to avoid unwanted alignment
7086    my $must_flush = 0;
7087    if ( @$ri_first > 1 ) {
7088
7089        # flush before a long if statement
7090        if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] =~ /^(if|unless)$/ ) {
7091            $must_flush = 1;
7092        }
7093    }
7094    if ($must_flush) {
7095        PerlTidy::VerticalAligner::flush();
7096    }
7097
7098##    TESTING - in preparation for future update to adjust ci_levels in
7099##    certain cases for better alignment.
7100##    my @levs = @levels_to_go[ @$ri_first[0..@$ri_first-1]];
7101##    my @cilevs = @ci_levels_to_go[ @$ri_first[0..@$ri_first-1]];
7102##    print "BUB: levs are:(@levs) ci=(@cilevs)\n";
7103
7104    # loop to prepare each line for shipment
7105    for my $n ( 0 .. @$ri_first - 1 ) {
7106        my $ibeg = $$ri_first[$n];
7107        my $iend = $$ri_last[$n];
7108
7109        my @patterns = ();
7110        my @tokens   = ();
7111        my @fields   = ();
7112        my $i_start  = $ibeg;
7113        my $i;
7114
7115        my $j = 0;    # field index
7116
7117        $patterns[0] = "";
7118        for $i ( $ibeg .. $iend ) {
7119
7120            # if we find a new synchronization token, we are done with
7121            # a field
7122            if ( $i > $i_start && $matching_token_to_go[$i] ne '' ) {
7123
7124                # make separators in different nesting depths unique
7125                # by appending the nesting depth digit.
7126                my $tok = $matching_token_to_go[$i];
7127                if ( $tok ne '#' ) {
7128                    $tok .= "$nesting_depth_to_go[$i]";
7129                }
7130
7131                # concatenate the text of the consecutive tokens to form
7132                # the field
7133                push ( @fields,
7134                    join ( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
7135
7136                # store the alignment token for this field
7137                push ( @tokens, $tok );
7138
7139                # get ready for the next batch
7140                $i_start = $i;
7141                $j++;
7142                $patterns[$j] = "";
7143            }
7144
7145            # continue accumulating tokens
7146            # handle non-keywords..
7147            if ( $types_to_go[$i] ne 'k' ) {
7148                my $type = $types_to_go[$i];
7149
7150                # Mark most things before arrows as a quote to
7151                # get them to line up. Testfile: mixed.pl.
7152                if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) {
7153                    my $next_type       = $types_to_go[ $i + 1 ];
7154                    my $i_next_nonblank =
7155                      ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
7156
7157                    if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
7158                        $type = 'Q';
7159                    }
7160                }
7161
7162                # minor patch to make numbers and quotes align
7163                if ( $type eq 'n' ) { $type = 'Q' }
7164
7165                $patterns[$j] .= $type;
7166            }
7167
7168            # for keywords we have to use the actual text
7169            else {
7170
7171                # map certain keywords to the same 'if' class to align
7172                # long if/elsif sequences. my testfile: elsif.pl
7173                my $tok = $tokens_to_go[$i];
7174                if ( $n == 0 ) {
7175                    if ( $tok eq 'elsif' )  { $tok = 'if' }
7176                    if ( $tok eq 'else' )   { $tok = 'if' }
7177                    if ( $tok eq 'unless' ) { $tok = 'if' }
7178                }
7179                $patterns[$j] .= $tok;
7180            }
7181        }
7182
7183        # done with this line .. join text of tokens to make the last field
7184        push ( @fields, join ( '', @tokens_to_go[ $i_start .. $iend ] ) );
7185
7186        my ( $indentation, $lev, $level_end, $is_semicolon_terminated,
7187            $is_outdented_line )
7188          = set_adjusted_indentation( $ibeg, $iend, \@fields, \@patterns,
7189            $ri_first, $ri_last, $rindentation_list );
7190
7191        # we will allow outdenting of long lines..
7192        my $outdent_long_lines = (
7193
7194            # which are long quotes, if allowed
7195            ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} )
7196
7197            # which are long block comments, if allowed
7198            || (
7199                $types_to_go[$ibeg] eq '#'
7200                && $rOpts->{'outdent-long-comments'}
7201
7202                # but not if this is a static block comment
7203                && !(
7204                    $rOpts->{'static-block-comments'}
7205                    && $tokens_to_go[$ibeg] =~ /$static_block_comment_pattern/o
7206                )
7207            )
7208        );
7209
7210        # flush an outdented line to avoid any unwanted vertical alignment
7211        PerlTidy::VerticalAligner::flush() if ($is_outdented_line);
7212
7213        # send this new line down the pipe
7214        PerlTidy::VerticalAligner::append_line(
7215            $lev,                            $level_end,
7216            $indentation,                    \@fields,
7217            \@tokens,                        \@patterns,
7218            $forced_breakpoint_to_go[$iend], $outdent_long_lines,
7219            $is_semicolon_terminated,        $do_not_pad
7220        );
7221
7222        # flush an outdented line to avoid any unwanted vertical alignment
7223        PerlTidy::VerticalAligner::flush() if ($is_outdented_line);
7224
7225        $do_not_pad = 0;
7226
7227    }    # end of loop to output each line
7228
7229    # remember indentation of lines containing opening containers for
7230    # later use by sub set_adjusted_indentation
7231    save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
7232}
7233
7234{        # begin closure unmatched_indexes
7235
7236    # closure to keep track of unbalanced containers.
7237    # arrays shared by the routines in this block:
7238    my @unmatched_opening_indexes_in_this_batch;
7239    my @unmatched_closing_indexes_in_this_batch;
7240
7241    sub is_unbalanced_batch {
7242        @unmatched_opening_indexes_in_this_batch +
7243          @unmatched_closing_indexes_in_this_batch;
7244    }
7245
7246    sub match_opening_and_closing_tokens {
7247
7248        # Match up indexes of opening and closing braces, etc, in this batch.
7249        # This has to be done after all tokens are stored because unstoring
7250        # of tokens would otherwise cause trouble.
7251
7252        @unmatched_opening_indexes_in_this_batch = ();
7253        @unmatched_closing_indexes_in_this_batch = ();
7254
7255        my ( $i, $i_mate, $token );
7256        foreach $i ( 0 .. $max_index_to_go ) {
7257            if ( $type_sequence_to_go[$i] ) {
7258                $token = $tokens_to_go[$i];
7259                if ( $token =~ /^[\(\[\{\?]$/ ) {
7260                    push @unmatched_opening_indexes_in_this_batch, $i;
7261                }
7262                elsif ( $token =~ /^[\)\]\}\:]$/ ) {
7263
7264                    $i_mate = pop @unmatched_opening_indexes_in_this_batch;
7265                    if ( defined($i_mate) && $i_mate >= 0 ) {
7266                        if ( $type_sequence_to_go[$i_mate] ==
7267                            $type_sequence_to_go[$i] )
7268                        {
7269                            $mate_index_to_go[$i]      = $i_mate;
7270                            $mate_index_to_go[$i_mate] = $i;
7271                        }
7272                        else {
7273                            push @unmatched_opening_indexes_in_this_batch,
7274                              $i_mate;
7275                            push @unmatched_closing_indexes_in_this_batch, $i;
7276                        }
7277                    }
7278                    else {
7279                        push @unmatched_closing_indexes_in_this_batch, $i;
7280                    }
7281                }
7282            }
7283        }
7284    }
7285
7286    sub save_opening_indentation {
7287
7288        # This should be called after each batch of tokens is output. It
7289        # saves indentations of lines of all unmatched opening tokens.
7290        # These will be used by sub get_opening_indentation.
7291
7292        my ( $ri_first, $ri_last, $rindentation_list ) = @_;
7293
7294        # we no longer need indentations of any saved indentations which
7295        # are unmatched closing tokens in this batch, because we will
7296        # never encounter them again.  So we can delete them to keep
7297        # the hash size down.
7298        foreach (@unmatched_closing_indexes_in_this_batch) {
7299            my $seqno = $type_sequence_to_go[$_];
7300            delete $saved_opening_indentation{$seqno};
7301        }
7302
7303        # we need to save indentations of any unmatched opening tokens
7304        # in this batch because we may need them in a subsequent batch.
7305        foreach (@unmatched_opening_indexes_in_this_batch) {
7306            my $seqno = $type_sequence_to_go[$_];
7307            $saved_opening_indentation{$seqno} = [
7308                lookup_opening_indentation(
7309                    $_,       $ri_first,
7310                    $ri_last, $rindentation_list
7311                )
7312            ];
7313        }
7314    }
7315}    # end closure unmatched_indexes
7316
7317sub get_opening_indentation {
7318
7319    # get the indentation of the line which output the opening token
7320    # corresponding to a given closing token in the current output batch.
7321    #
7322    # given:
7323    # $i_closing - index in this line of a closing token ')' '}' or ']'
7324    #
7325    # $ri_first - reference to list of the first index $i for each output
7326    #               line in this batch
7327    # $ri_last - reference to list of the last index $i for each output line
7328    #              in this batch
7329    # $rindentation_list - reference to a list containing the indentation
7330    #            used for each line.
7331    #
7332    # return:
7333    #   -the indentation of the line which contained the opening token
7334    #    which matches the token at index $i_opening
7335    #   -and its offset (number of columns) from the start of the line
7336    #
7337    my ( $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_;
7338
7339    # first, see if the opening token is in the current batch
7340    my $i_opening = $mate_index_to_go[$i_closing];
7341    my ( $indent, $offset );
7342    if ( $i_opening >= 0 ) {
7343
7344        # it is..look up the indentation
7345        ( $indent, $offset ) =
7346          lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
7347            $rindentation_list );
7348    }
7349
7350    # if not, it should have been stored in the hash by a previous batch
7351    else {
7352        my $seqno = $type_sequence_to_go[$i_closing];
7353        if ($seqno) {
7354            if ( $saved_opening_indentation{$seqno} ) {
7355                ( $indent, $offset ) = @{ $saved_opening_indentation{$seqno} };
7356            }
7357        }
7358    }
7359    return ( $indent, $offset );
7360}
7361
7362sub lookup_opening_indentation {
7363
7364    # get the indentation of the line in the current output batch
7365    # which output a selected opening token
7366    #
7367    # given:
7368    #   $i_opening - index of an opening token in the current output batch
7369    #                whose line indentation we need
7370    #   $ri_first - reference to list of the first index $i for each output
7371    #               line in this batch
7372    #   $ri_last - reference to list of the last index $i for each output line
7373    #              in this batch
7374    #   $rindentation_list - reference to a list containing the indentation
7375    #            used for each line.  (NOTE: the first slot in
7376    #            this list is the last returned line number, and this is
7377    #            followed by the list of indentations).
7378    #
7379    # return
7380    #   -the indentation of the line which contained token $i_opening
7381    #   -and its offset (number of columns) from the start of the line
7382
7383    my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
7384
7385    my $nline = $rindentation_list->[0];    # line number of previous lookup
7386
7387    # reset line location if necessary
7388    $nline = 0 if ( $i_opening < $ri_start->[$nline] );
7389
7390    # find the correct line
7391    my $nmax = $#{$ri_last};
7392    unless ( $i_opening > $ri_last->[$nmax] ) {
7393        while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
7394    }
7395
7396    # error - token index is out of bounds - shouldn't happen
7397    else {
7398        warning(
7399"non-fatal program bug in lookup_opening_indentation - index out of range\n"
7400        );
7401        report_definite_bug();
7402        $nline = $nmax;
7403    }
7404
7405    $rindentation_list->[0] =
7406      $nline;    # save line number to start looking next call
7407    my $ibeg = $ri_start->[$nline];
7408    my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
7409    return ( $rindentation_list->[ $nline + 1 ], $offset );
7410}
7411
7412sub set_adjusted_indentation {
7413
7414    # This routine has the final say regarding the actual indentation of
7415    # a line.  It starts with the basic indentation which has been
7416    # defined for the leading token, and then takes into account any
7417    # options that the user has set regarding special indenting and
7418    # outdenting.
7419
7420    my ( $ibeg, $iend, $rfields, $rpatterns, $ri_first, $ri_last,
7421        $rindentation_list )
7422      = @_;
7423
7424    # we need to know the last token of this line
7425    my ( $terminal_type, $i_terminal ) =
7426      terminal_type( \@types_to_go, \@block_type_to_go, $ibeg, $iend );
7427
7428    my $is_outdented_line = 0;
7429
7430    my $is_semicolon_terminated =
7431      $terminal_type eq ';'
7432      && $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg];
7433
7434    # Most lines are indented according to the initial token.
7435    # But it is common to outdent to the level just after the
7436    # terminal token in certain cases...
7437    # adjust_indentation flag:
7438    #       0 - do not adjust
7439    #       1 - outdent
7440    #      -1 - indent
7441    my $adjust_indentation = 0;
7442
7443    my ( $opening_indentation, $opening_offset );
7444
7445    # if we are at a closing token of some type..
7446    if ( $types_to_go[$ibeg] =~ /^[\)\}\]]$/ ) {
7447
7448        # get the indentation of the line containing the corresponding
7449        # opening token
7450        ( $opening_indentation, $opening_offset ) =
7451          get_opening_indentation( $ibeg, $ri_first, $ri_last,
7452            $rindentation_list );
7453
7454        # First set the default behavior:
7455        # default behavior is to outdent closing lines
7456        # of the form:   ");  };  ];  )->xxx;"
7457        if (
7458            $is_semicolon_terminated
7459
7460            # and 'cuddled parens' of the form:   ")->pack("
7461            || (
7462                $terminal_type eq '('
7463                && $types_to_go[$ibeg] eq ')'
7464                && ( $nesting_depth_to_go[$iend] + 1 ==
7465                    $nesting_depth_to_go[$ibeg] )
7466            )
7467          )
7468        {
7469            $adjust_indentation = 1;
7470        }
7471
7472        # TESTING: outdent something like '),'
7473        if (
7474            $terminal_type eq ','
7475
7476            # allow just one character before the comma
7477            && $i_terminal == $ibeg + 1
7478
7479            # requre LIST environment; otherwise, we may outdent too much --
7480            # this can happen in calls without parentheses (overload.t);
7481            && $container_environment_to_go[$i_terminal] eq 'LIST'
7482          )
7483        {
7484            $adjust_indentation = 1;
7485        }
7486
7487        # undo continuation indentation of a terminal closing token if
7488        # it is the last token before a level decrease.  This will allow
7489        # a closing token to line up with its opening counterpart, and
7490        # avoids a indentation jump larger than 1 level.
7491        if ( $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/
7492            && $i_terminal == $ibeg )
7493        {
7494            my $ci              = $ci_levels_to_go[$ibeg];
7495            my $lev             = $levels_to_go[$ibeg];
7496            my $next_type       = $types_to_go[ $ibeg + 1 ];
7497            my $i_next_nonblank =
7498              ( ( $next_type eq 'b' ) ? $ibeg + 2 : $ibeg + 1 );
7499            if ( $i_next_nonblank <= $max_index_to_go
7500                && $levels_to_go[$i_next_nonblank] < $lev )
7501            {
7502                $adjust_indentation = 1;
7503            }
7504        }
7505
7506        # Now modify default behavior according to user request:
7507        # handle option to indent non-blocks of the form );  };  ];
7508        if ( !$block_type_to_go[$ibeg] ) {
7509            if ( $rOpts->{'indent-closing-paren'}
7510                && $is_semicolon_terminated
7511                && $i_terminal == $ibeg + 1 )
7512            {
7513                $adjust_indentation = -1;
7514            }
7515        }
7516
7517        # handle option to indent blocks
7518        else {
7519            if (
7520                $rOpts->{'indent-closing-brace'}
7521                && (
7522                    $i_terminal == $ibeg    #  isolated terminal '}'
7523                    || $is_semicolon_terminated
7524                )
7525              )    #  } xxxx ;
7526            {
7527                $adjust_indentation = -1;
7528            }
7529        }
7530    }
7531
7532    # if at ');', '};', '>;', and '];' of a terminal qw quote
7533    elsif ( $$rpatterns[0] =~ /^qb*;$/ && $$rfields[0] =~ /^[\)\}\]\>];$/ ) {
7534        if ( !$rOpts->{'indent-closing-paren'} ) {
7535            $adjust_indentation = 1;
7536        }
7537        else {
7538            $adjust_indentation = -1;
7539        }
7540    }
7541
7542    # Handle variation in indentation styles...
7543    # Select the indentation object to define leading
7544    # whitespace.  If we are outdenting something like '} } );'
7545    # then we want to use one level below the last token
7546    # ($i_terminal) in order to get it to fully outdent through
7547    # all levels.
7548    my $indentation;
7549    my $lev;
7550    my $level_end = $levels_to_go[$iend];
7551
7552    if ( $adjust_indentation == 0 ) {
7553        $indentation = $leading_spaces_to_go[$ibeg];
7554        $lev         = $levels_to_go[$ibeg];
7555    }
7556    elsif ( $adjust_indentation == 1 ) {
7557        $indentation = $reduced_spaces_to_go[$i_terminal];
7558        $lev         = $levels_to_go[$i_terminal];
7559    }
7560    else {
7561
7562        # There are two ways to handle -icb and -icp...
7563        # One way is to use the indentation of the previous line:
7564        # $indentation = $last_indentation_written;
7565
7566        # The other way is to use the indentation that the previous line
7567        # would have had if it hadn't been adjusted:
7568        $indentation = $last_unadjusted_indentation;
7569
7570        # Current method: use the minimum of the two. This avoids inconsistent
7571        # indentation.
7572        if ( get_SPACES($last_indentation_written) < get_SPACES($indentation) )
7573        {
7574            $indentation = $last_indentation_written;
7575        }
7576
7577        # use previous indentation but use own level
7578        # to cause list to be flushed properly
7579        $lev = $levels_to_go[$ibeg];
7580    }
7581
7582    # remember indentation except for multi-line quotes, which get
7583    # no indentation
7584    unless ( $types_to_go[$ibeg] eq 'Q' && $lev == 0 ) {
7585        $last_indentation_written    = $indentation;
7586        $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg];
7587    }
7588
7589    # be sure lines with leading closing tokens are not outdented more
7590    # than the line which contained the corresponding opening token.
7591    if ( defined($opening_indentation) ) {
7592        if ( get_SPACES($opening_indentation) > get_SPACES($indentation) ) {
7593            $indentation = $opening_indentation;
7594        }
7595    }
7596
7597    # remember the indentation of each line of this batch
7598    push @{$rindentation_list}, $indentation;
7599
7600    # outdent lines with certain leading tokens...
7601    if (
7602
7603        # must be first word of this batch
7604        $ibeg == 0
7605
7606        # and be certain leading keywords if requested
7607        && ( $rOpts->{'outdent-keywords'}
7608            && $types_to_go[$ibeg] eq 'k'
7609            && $outdent_keyword{ $tokens_to_go[$ibeg] } )
7610
7611        # or labels if requested
7612        || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' )
7613      )
7614    {
7615        my $space_count = leading_spaces_to_go($ibeg);
7616        if ( $space_count > 0 ) {
7617            $space_count -= $rOpts_continuation_indentation;
7618            $is_outdented_line = 1;
7619            if ( $space_count < 0 ) { $space_count = 0 }
7620
7621            if ($rOpts_line_up_parentheses) {
7622                $indentation =
7623                  new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
7624            }
7625            else {
7626                $indentation = $space_count;
7627            }
7628        }
7629    }
7630
7631    return ( $indentation, $lev, $level_end, $is_semicolon_terminated,
7632        $is_outdented_line );
7633}
7634
7635BEGIN {
7636    @_ = qw#{ ? : => = += -= =~ *= /= && || #;
7637    @is_vertical_alignment_type{@_} = (1) x scalar(@_);
7638}
7639
7640sub set_vertical_alignment_markers {
7641
7642    # Look at the tokens in this output batch and define the array
7643    # 'matching_token_to_go' which marks tokens at which we would
7644    # accept vertical alignment.
7645
7646    # nothing to do if we aren't allowed to change whitespace
7647    if ( !$rOpts_add_whitespace ) {
7648        for my $i ( 0 .. $max_index_to_go ) {
7649            $matching_token_to_go[$i] = '';
7650        }
7651        return;
7652    }
7653
7654    my ( $ri_first, $ri_last ) = @_;
7655
7656    # look at each line of this batch..
7657    my $last_vertical_alignment_before_index;
7658    my $vert_last_nonblank_type;
7659    my $vert_last_nonblank_block_type;
7660    my $max_line = @$ri_first - 1;
7661    my ( $i, $type, $token, $block_type, $last_nonblank_token,
7662        $alignment_type );
7663    my ( $ibeg, $iend );
7664    for my $line ( 0 .. $max_line ) {
7665        $ibeg = $$ri_first[$line];
7666        $iend = $$ri_last[$line];
7667        $last_vertical_alignment_before_index = -1;
7668        $vert_last_nonblank_type              = '';
7669        $vert_last_nonblank_block_type        = '';
7670
7671        # look at each token in this output line..
7672        foreach $i ( $ibeg .. $iend ) {
7673            $alignment_type = '';
7674            $type           = $types_to_go[$i];
7675            $block_type     = $block_type_to_go[$i];
7676            $token          = $tokens_to_go[$i];
7677
7678            #--------------------------------------------------------
7679            # First see if we want to align BEFORE this token
7680            #--------------------------------------------------------
7681
7682            # The first possible token that we can alignment_type before
7683            # is index 2 because: 1) it doesn't normally make sense to
7684            # alignment_type before the first token and 2) the second
7685            # token must be a blank if we are to alignment_type before
7686            # the third
7687            if ( $i < $ibeg + 2 ) {
7688            }
7689
7690            # TESTING : this causes too many bad side effects
7691            #elsif ( $type =~ /^(\[|L)$/ ) {
7692            #    $alignment_type = $type;
7693            #}
7694
7695            # must follow a blank token
7696            elsif ( $types_to_go[ $i - 1 ] ne 'b' ) {
7697            }
7698
7699            # align a side comment --
7700            elsif ( $type eq '#' ) {
7701
7702                unless (
7703
7704                    # it is a static side comment
7705                    (
7706                        $rOpts->{'static-side-comments'}
7707                        && $token =~ /$static_side_comment_pattern/o
7708                    )
7709
7710                    # or a closing side comment
7711                    || ( $vert_last_nonblank_block_type
7712                        && $token =~ /$closing_side_comment_prefix_pattern/o )
7713                  )
7714                {
7715                    $alignment_type = $type;
7716                }    ## Example of a static side comment
7717            }
7718
7719            # otherwise, do not alignment_type two in a row to create a
7720            # blank field
7721            elsif ( $last_vertical_alignment_before_index == $i - 2 ) {
7722            }
7723
7724            # alignment_type before one of these keywords
7725            # (within a line, since $i>1)
7726            elsif ( $type eq 'k' ) {
7727                if ( $token =~ /^(if|unless|and|or|eq|ne)$/ ) {
7728                    $alignment_type = $token;
7729                }
7730            }
7731
7732            # We have to be very careful about alignment before opening parens.
7733            # It is ok to line up sequences like this:
7734            #    if    ( $something eq "simple" )  { &handle_simple }
7735            #    elsif ( $something eq "hard" )    { &handle_hard }
7736            elsif ( $type eq '(' ) {
7737                if ( ( $i == $ibeg + 2 )
7738                    && $tokens_to_go[$ibeg] =~ /^(if|elsif)/ )
7739                {
7740                    $alignment_type = $type;
7741                }
7742            }
7743
7744            # alignment_type before one of these types..
7745            # Note: add '.' after new vertical aligner is operational
7746            elsif ( $is_vertical_alignment_type{$type} ) {
7747                $alignment_type = $token;
7748
7749                # be sure the alignment tokens are unique
7750                # This didn't work well: reason not determined
7751                # if ($token ne $type) {$alignment_type .= $type}
7752            }
7753
7754            # NOTE: This is deactivated until the new vertical aligner
7755            # is finished because it causes the previous if/elsif alignment
7756            # to fail
7757            #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i]) {
7758            #    $alignment_type = $type;
7759            #}
7760
7761            if ($alignment_type) {
7762                $last_vertical_alignment_before_index = $i;
7763            }
7764
7765            #--------------------------------------------------------
7766            # Next see if we want to align AFTER the previous nonblank
7767            #--------------------------------------------------------
7768
7769            # We want to line up ',' and interior ';' tokens, with the added
7770            # space AFTER these tokens.  (Note: interior ';' is included
7771            # because it may occur in short blocks).
7772            if (
7773
7774                # we haven't already set it
7775                !$alignment_type
7776
7777                # and its not the first token of the line
7778                && ( $i > $ibeg )
7779
7780                # and it follows a blank
7781                && $types_to_go[ $i - 1 ] eq 'b'
7782
7783                # and previous token IS one of these:
7784                && ( $vert_last_nonblank_type =~ /^[\,\;]$/ )
7785
7786                # and it's NOT one of these
7787                && ( $type !~ /^[b\#\)\]\}]$/ )
7788
7789                # then go ahead and align
7790              )
7791
7792            {
7793                $alignment_type = $vert_last_nonblank_type;
7794            }
7795
7796            #--------------------------------------------------------
7797            # then store the value
7798            #--------------------------------------------------------
7799            $matching_token_to_go[$i] = $alignment_type;
7800            if ( $type ne 'b' ) {
7801                $vert_last_nonblank_type       = $type;
7802                $vert_last_nonblank_block_type = $block_type;
7803            }
7804        }
7805    }
7806}
7807
7808sub terminal_type {
7809
7810    #    returns type of last token on this line (terminal token), as follows:
7811    #    returns # for a full-line comment
7812    #    returns ' ' for a blank line
7813    #    otherwise returns final token type
7814
7815    my ( $rtype, $rblock_type, $ibeg, $iend ) = @_;
7816
7817    # check for full-line comment..
7818    if ( $$rtype[$ibeg] eq '#' ) {
7819        return wantarray ? ( $$rtype[$ibeg], $ibeg ) : $$rtype[$ibeg];
7820    }
7821    else {
7822
7823        # start at end and walk bakwards..
7824        for ( my $i = $iend ; $i >= $ibeg ; $i-- ) {
7825
7826            # skip past any side comment and blanks
7827            next if ( $$rtype[$i] eq 'b' );
7828            next if ( $$rtype[$i] eq '#' );
7829
7830            # found it..make sure it is a BLOCK termination,
7831            # but hide a terminal } after sort/grep/map because it is not
7832            # necessarily the end of the line.  (terminal.t)
7833            my $terminal_type = $$rtype[$i];
7834            if (
7835                $terminal_type eq '}'
7836                && ( !$$rblock_type[$i]
7837                    || ( $$rblock_type[$i] =~ /^(sort|grep|map|do|eval)$/ ) )
7838              )
7839            {
7840                $terminal_type = 'b';
7841            }
7842            return wantarray ? ( $terminal_type, $i ) : $terminal_type;
7843        }
7844
7845        # empty line
7846        return wantarray ? ( ' ', $ibeg ) : ' ';
7847    }
7848}
7849
7850sub set_bond_strengths {
7851
7852    BEGIN {
7853
7854        ###############################################################
7855        # NOTE: NO_BREAK's set here are HINTS which may not be honored;
7856        # essential NO_BREAKS's must be enforced in section 2, below.
7857        ###############################################################
7858
7859        # adding NEW_TOKENS: add a left and right bond strength by
7860        # mimmicking what is done for an existing token type.  You
7861        # can skip this step at first and take the default, then
7862        # tweak later to get desired results.
7863
7864        # The bond strengths should roughly follow precenence order where
7865        # possible.  If you make changes, please check the results very
7866        # carefully on a variety of scripts.
7867
7868        # no break around possible filehandle
7869        $left_bond_strength{'Z'}  = NO_BREAK;
7870        $right_bond_strength{'Z'} = NO_BREAK;
7871
7872        # never put a bare word on a new line:
7873        # example print (STDERR, "bla"); will fail with break after (
7874        $left_bond_strength{'w'} = NO_BREAK;
7875
7876        # blanks always have infinite strength to force breaks after real tokens
7877        $right_bond_strength{'b'} = NO_BREAK;
7878
7879        # try not to break on exponentation
7880        @_ = qw" ** .. ... <=> ";
7881        @left_bond_strength{@_}  = (STRONG) x scalar(@_);
7882        @right_bond_strength{@_} = (STRONG) x scalar(@_);
7883
7884        # The comma-arrow has very low precedence but not a good break point
7885        $left_bond_strength{'=>'}  = NO_BREAK;
7886        $right_bond_strength{'=>'} = NOMINAL;
7887
7888        # ok to break after label
7889        $left_bond_strength{'J'}  = NO_BREAK;
7890        $right_bond_strength{'J'} = NOMINAL;
7891        $left_bond_strength{'j'}  = STRONG;
7892        $right_bond_strength{'j'} = STRONG;
7893
7894        $left_bond_strength{'->'}  = STRONG;
7895        $right_bond_strength{'->'} = VERY_STRONG;
7896
7897        # breaking AFTER these is just ok:
7898        @_ = qw" % + - * / x  ";
7899        @left_bond_strength{@_}  = (STRONG) x scalar(@_);
7900        @right_bond_strength{@_} = (NOMINAL) x scalar(@_);
7901
7902        # breaking BEFORE these is just ok:
7903        @_ = qw" >> << ";
7904        @right_bond_strength{@_} = (STRONG) x scalar(@_);
7905        @left_bond_strength{@_}  = (NOMINAL) x scalar(@_);
7906
7907        # I prefer breaking before the string concatenation operator
7908        # because it can be hard to see at the end of a line
7909        # swap these to break after a '.'
7910        # this could be a future option
7911        $right_bond_strength{'.'} = STRONG;
7912        $left_bond_strength{'.'}  = 0.9 * NOMINAL + 0.1 * WEAK;
7913
7914        @_ = qw"} ] ) ";
7915        @left_bond_strength{@_}  = (STRONG) x scalar(@_);
7916        @right_bond_strength{@_} = (NOMINAL) x scalar(@_);
7917
7918        # make these a little weaker than nominal so that they get
7919        # favored for end-of-line characters
7920        @_ = qw"!= == =~ !~";
7921        @left_bond_strength{@_}  = (STRONG) x scalar(@_);
7922        @right_bond_strength{@_} = ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@_);
7923
7924        # break AFTER these
7925        @_ = qw" < >  | & >= <=";
7926        @left_bond_strength{@_}  = (VERY_STRONG) x scalar(@_);
7927        @right_bond_strength{@_} = ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@_);
7928
7929        # breaking either before or after a quote is ok
7930        # but bias for breaking before a quote
7931        $left_bond_strength{'Q'}  = NOMINAL;
7932        $right_bond_strength{'Q'} = NOMINAL + 0.02;
7933        $left_bond_strength{'q'}  = NOMINAL;
7934        $right_bond_strength{'q'} = NOMINAL;
7935
7936        # starting a line with a keyword is usually ok
7937        $left_bond_strength{'k'} = NOMINAL;
7938
7939        # we usually want to bond a keyword strongly to what immediately
7940        # follows, rather than leaving it stranded at the end of a line
7941        $right_bond_strength{'k'} = STRONG;
7942
7943        $left_bond_strength{'G'}  = NOMINAL;
7944        $right_bond_strength{'G'} = STRONG;
7945
7946        # it is very good to break AFTER various assignment operators
7947        @_ = qw(
7948          = **= += *= &= <<= &&=
7949          -= /= |= >>= ||=
7950          .= %= ^=
7951          x=
7952        );
7953        @left_bond_strength{@_}  = (STRONG) x scalar(@_);
7954        @right_bond_strength{@_} =
7955          ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@_);
7956
7957        # break BEFORE '&&' and '||'
7958        # set strength of '||' to same as '=' so that chains like
7959        # $a = $b || $c || $d   will break before the first '||'
7960        $right_bond_strength{'||'} = NOMINAL;
7961        $left_bond_strength{'||'}  = $right_bond_strength{'='};
7962
7963        # set strength of && a little higher than ||
7964        $right_bond_strength{'&&'} = NOMINAL;
7965        $left_bond_strength{'&&'}  = $left_bond_strength{'||'} + 0.1;
7966
7967        $left_bond_strength{';'}  = VERY_STRONG;
7968        $right_bond_strength{';'} = VERY_WEAK;
7969        $left_bond_strength{'f'}  = VERY_STRONG;
7970
7971        # make right strength of for ';' a little less than '='
7972        # to make for contents break after the ';' to avoid this:
7973        #   for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
7974        #     $number_of_fields )
7975        # and make it weaker than ',' too
7976        $right_bond_strength{'f'} = VERY_WEAK - 0.001;
7977
7978        # The strengths of ?/: should be somewhere between
7979        # an '=' and a quote (NOMINAL),
7980        # make strength of ':' slightly less than '?' to help
7981        # break long chains of ? : after the colons
7982        $left_bond_strength{':'}  = 0.4 * WEAK + 0.6 * NOMINAL;
7983        $right_bond_strength{':'} = NO_BREAK;
7984        $left_bond_strength{'?'}  = $left_bond_strength{':'} + 0.01;
7985        $right_bond_strength{'?'} = NO_BREAK;
7986
7987        $left_bond_strength{','}  = VERY_STRONG;
7988        $right_bond_strength{','} = VERY_WEAK;
7989    }
7990
7991    # patch-its always ok to break at end of line
7992    $nobreak_to_go[$max_index_to_go] = 0;
7993
7994    # adding a small 'bias' to strengths is a simple way to make a line
7995    # break at the first of a sequence of identical terms.  For example,
7996    # to force long string of conditional operators to break with
7997    # each line ending in a ':', we can add a small number to the bond
7998    # strength of each ':'
7999    my $colon_bias = 0;
8000    my $amp_bias   = 0;
8001    my $bar_bias   = 0;
8002    my $and_bias   = 0;
8003    my $or_bias    = 0;
8004    my $dot_bias   = 0;
8005    my $f_bias     = 0;
8006    my $code_bias  = -.01;
8007    my $type       = 'b';
8008    my $token      = ' ';
8009    my $last_type;
8010    my $last_nonblank_type  = $type;
8011    my $last_nonblank_token = $token;
8012    my $delta_bias          = 0.0001;
8013
8014    my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
8015        $next_nonblank_type, $next_token, $next_type, $total_nesting_depth, );
8016
8017    # preliminary loop to compute bond strengths
8018    for ( my $i = 0 ; $i <= $max_index_to_go ; $i++ ) {
8019        $last_type = $type;
8020        if ( $type ne 'b' ) {
8021            $last_nonblank_type  = $type;
8022            $last_nonblank_token = $token;
8023        }
8024        $type = $types_to_go[$i];
8025
8026        # strength on both sides of a blank is the same
8027        if ( $type eq 'b' && $last_type ne 'b' ) {
8028            $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ];
8029            next;
8030        }
8031
8032        $token               = $tokens_to_go[$i];
8033        $block_type          = $block_type_to_go[$i];
8034        $i_next              = $i + 1;
8035        $next_type           = $types_to_go[$i_next];
8036        $next_token          = $tokens_to_go[$i_next];
8037        $total_nesting_depth = $nesting_depth_to_go[$i_next];
8038        $i_next_nonblank     = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
8039        $next_nonblank_type  = $types_to_go[$i_next_nonblank];
8040        $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
8041
8042        # Some token chemistry...  The decision about where to break a
8043        # line depends upon a "bond strength" between tokens.  The LOWER
8044        # the bond strength, the MORE likely a break.  The strength
8045        # values are based on trial-and-error, and need to be tweaked
8046        # occasionally to get desired results.  Things to keep in mind
8047        # are:
8048        #   1. relative strengths are important.  small differences
8049        #      in strengths can make big formatting differences.
8050        #   2. each indentation level adds one unit of bond strength
8051        #   3. a value of NO_BREAK makes an unbreakable bond
8052        #   4. a value of VERY_WEAK is the strength of a ','
8053        #   5. values below NOMINAL are considered ok break points
8054        #   6. values above NOMINAL are considered poor break points
8055        # We are computing the strength of the bond between the current
8056        # token and the NEXT token.
8057        my $bond_str = VERY_STRONG;    # a default, high strength
8058
8059        #---------------------------------------------------------------
8060        # section 1:
8061        # use minimum of left and right bond strengths if defined;
8062        # digraphs and trigraphs like to break on their left
8063        #---------------------------------------------------------------
8064        my $bsr = $right_bond_strength{$type};
8065
8066        if ( !defined($bsr) ) {
8067
8068            if ( $is_digraph{$type} || $is_trigraph{$type} ) {
8069                $bsr = STRONG;
8070            }
8071            else {
8072                $bsr = VERY_STRONG;
8073            }
8074        }
8075
8076        if ( $token eq 'and' or $token eq 'or' ) {
8077            $bsr = NOMINAL;
8078        }
8079        elsif ( $token eq 'ne' or $token eq 'eq' ) {
8080            $bsr = NOMINAL;
8081        }
8082        my $bsl = $left_bond_strength{$next_nonblank_type};
8083
8084        # set terminal bond strength to the nominal value
8085        # this will cause good preceding breaks to be retained
8086        if ( $i_next_nonblank > $max_index_to_go ) {
8087            $bsl = NOMINAL;
8088        }
8089
8090        if ( !defined($bsl) ) {
8091
8092            if ( $is_digraph{$next_nonblank_type}
8093                || $is_trigraph{$next_nonblank_type} )
8094            {
8095                $bsl = WEAK;
8096            }
8097            else {
8098                $bsl = VERY_STRONG;
8099            }
8100        }
8101
8102        # make or, and slightly weaker than a ','
8103        if ( $next_nonblank_token eq 'or' ) {
8104            $bsl = VERY_WEAK - 0.02;
8105        }
8106        if ( $next_nonblank_token eq 'and' ) {
8107            $bsl = VERY_WEAK - 0.01;
8108        }
8109        elsif ( $next_nonblank_token eq 'ne' or $next_nonblank_token eq 'eq' ) {
8110            $bsl = NOMINAL;
8111        }
8112        elsif ( $next_nonblank_token =~ /^(lt|gt|le|ge)$/ ) {
8113            $bsl = 0.9 * NOMINAL + 0.1 * STRONG;
8114        }
8115
8116        # Note: it might seem that we would want to keep a NO_BREAK if
8117        # either token has this value.  This didn't work, because in an
8118        # arrow list, it prevents the comma from separating from the
8119        # following bare word (which is probably quoted by its arrow).
8120        # So necessary NO_BREAK's have to be handled as special cases
8121        # in the final section.
8122        $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
8123        my $bond_str_1 = $bond_str;
8124
8125        #---------------------------------------------------------------
8126        # section 2:
8127        # special cases
8128        #---------------------------------------------------------------
8129
8130        # allow long lines before final { in an if statement, as in:
8131        #    if (..........
8132        #      ..........)
8133        #    {
8134        #
8135        # Otherwise, the line before the { tends to be too short.
8136        if ( $type eq ')' ) {
8137            if ( $next_nonblank_type eq '{' ) {
8138                $bond_str = VERY_WEAK + 0.03;
8139            }
8140        }
8141
8142        elsif ( $type eq '(' ) {
8143            if ( $next_nonblank_type eq '{' ) {
8144                $bond_str = NOMINAL;
8145            }
8146        }
8147
8148        # break on something like '} (', but keep this stronger than a ','
8149        # example is in 'howe.pl'
8150        elsif ( $type eq 'R' or $type eq '}' ) {
8151            if ( $next_nonblank_type eq '(' ) {
8152                $bond_str = 0.8 * VERY_WEAK + 0.2 * WEAK;
8153            }
8154        }
8155
8156        #-----------------------------------------------------------------
8157        # adjust bond strength bias
8158        #-----------------------------------------------------------------
8159
8160        elsif ( $type eq 'f' ) {
8161            $bond_str += $f_bias;
8162            $f_bias   += $delta_bias;
8163        }
8164
8165        # in long ?: conditionals, bias toward just one set per line (colon.t)
8166        elsif ( $type eq ':' ) {
8167            if ( !$want_break_before{$type} ) {
8168                $bond_str   += $colon_bias;
8169                $colon_bias += $delta_bias;
8170            }
8171        }
8172
8173        if ( $next_nonblank_type eq ':'
8174            && $want_break_before{$next_nonblank_type} )
8175        {
8176            $bond_str   += $colon_bias;
8177            $colon_bias += $delta_bias;
8178        }
8179
8180        # if leading '.' is used, align all but 'short' quotes;
8181        # the idea is to not place something like "\n" on a single line.
8182        elsif ( $next_nonblank_type eq '.' ) {
8183            if ( $want_break_before{'.'} ) {
8184                unless (
8185                    $last_nonblank_type eq '.'
8186                    && (
8187                        length($token) <=
8188                        $rOpts->{'short-concatenation-item-length'} )
8189                    && ( $token !~ /^[\)\]\}]$/ )
8190                  )
8191                {
8192                    $dot_bias += $delta_bias;
8193                }
8194                $bond_str += $dot_bias;
8195            }
8196        }
8197        elsif ( $next_nonblank_type eq '&&' ) {
8198            $bond_str += $amp_bias;
8199            $amp_bias += $delta_bias;
8200        }
8201        elsif ( $next_nonblank_type eq '||' ) {
8202            $bond_str += $bar_bias;
8203            $bar_bias += $delta_bias;
8204        }
8205        elsif ( $next_nonblank_type eq 'k' ) {
8206
8207            if ( $next_nonblank_token eq 'and' ) {
8208                $bond_str += $and_bias;
8209                $and_bias += $delta_bias;
8210            }
8211            elsif ( $next_nonblank_token eq 'or' ) {
8212                $bond_str += $or_bias;
8213                $or_bias  += $delta_bias;
8214            }
8215        }
8216
8217        # keep matrix and hash indices together
8218        # but make them a little below STRONG to allow breaking open
8219        # something like {'some-word'}{'some-very-long-word'} at the }{
8220        # (bracebrk.t)
8221        if ( ( $type eq ']' or $type eq 'R' )
8222            && ( $next_nonblank_type eq '[' or $next_nonblank_type eq 'L' ) )
8223        {
8224            $bond_str = 0.9 * STRONG + 0.1 * NOMINAL;
8225        }
8226
8227        # increase strength to the point where a break in the following
8228        # will be after the opening paren rather than at the arrow:
8229        #    $a->$b($c);
8230        if ( ( $type eq 'i' )
8231            && ( $next_nonblank_type eq 'i' )
8232            && ( $next_nonblank_token =~ /^->/ ) )
8233        {
8234            $bond_str = 1.45 * STRONG;
8235        }
8236
8237        # map1.t -- a quirk in perl
8238        if ( $token eq '('
8239            && $next_nonblank_type eq 'i'
8240            && $last_nonblank_type eq 'k'
8241            && $last_nonblank_token =~ /^(sort|map|grep)$/ )
8242        {
8243            $bond_str = NO_BREAK;
8244        }
8245
8246        # extrude.t: do not break before paren at:
8247        #    -l pid_filename(
8248        if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
8249            $bond_str = NO_BREAK;
8250        }
8251
8252        # good to break after end of code blocks
8253        if ( $type eq '}' && $block_type ) {
8254
8255            $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
8256            $code_bias += $delta_bias;
8257        }
8258
8259        if ( $type eq 'k' ) {
8260
8261            # allow certain control keywords to stand out
8262            if ( ( $next_nonblank_type eq 'k' )
8263                && ( $token =~ /^(last|next|redo|return)$/ ) )
8264            {
8265                $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
8266            }
8267
8268            # Don't break after keyword my.  This is a quick fix for a
8269            # rare problem with perl. An example is this line from file
8270            # Container.pm:
8271            # foreach my $question( Debian::DebConf::ConfigDb::gettree( $this->{'question'} ) )
8272
8273            if ( $token eq 'my' ) {
8274                $bond_str = NO_BREAK;
8275            }
8276
8277        }
8278
8279        # good to break before 'if', 'unless', etc
8280        if ( $if_brace_follower_pattern
8281            && ( $next_nonblank_token =~ /$if_brace_follower_pattern/ ) )
8282        {
8283            $bond_str = VERY_WEAK;
8284        }
8285
8286        if ( $next_nonblank_type eq 'k' ) {
8287
8288            # keywords like 'unless' 'if' make good breaks
8289            if ( $do_follower_pattern
8290                && $next_nonblank_token =~ /$do_follower_pattern/ )
8291            {
8292                $bond_str = VERY_WEAK / 1.05;
8293            }
8294
8295        }
8296
8297        # try not to break before a comma-arrow
8298        elsif ( $next_nonblank_type eq '=>' ) {
8299            if ( $bond_str < STRONG ) { $bond_str = STRONG }
8300        }
8301
8302        if ( $type eq 'C' or $type eq 'U' ) {
8303
8304            # use strict requires that bare word and => not be separated
8305            if ( $next_nonblank_type eq '=>' ) {
8306                $bond_str = NO_BREAK;
8307            }
8308
8309        }
8310
8311        # use strict requires that bare word within braces not start new line
8312        elsif ( $type eq 'L' ) {
8313
8314            if ( $next_nonblank_type eq 'w' ) {
8315                $bond_str = NO_BREAK;
8316            }
8317        }
8318
8319        elsif ( $type eq 'w' ) {
8320
8321            if ( $next_nonblank_type eq 'R' ) {
8322                $bond_str = NO_BREAK;
8323            }
8324
8325            # use strict requires that bare word and => not be separated
8326            if ( $next_nonblank_type eq '=>' ) {
8327                $bond_str = NO_BREAK;
8328            }
8329        }
8330
8331        # in fact, use strict hates bare words on any new line.  For example,
8332        # a break before the underscore here provokes the wrath of use strict:
8333        #    if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
8334        elsif ( $type eq 'F' ) {
8335            $bond_str = NO_BREAK;
8336        }
8337
8338        # use strict does not allow separating type info from trailing { }
8339        # testfile is readmail.pl
8340        elsif ( $type eq 't' or $type eq 'i' ) {
8341
8342            if ( $next_nonblank_type eq 'L' ) {
8343                $bond_str = NO_BREAK;
8344            }
8345        }
8346
8347        # Do not break between a possible filehandle and a ? or /
8348        # and do not introduce a break after it if there is no blank (extrude.t)
8349        elsif ( $type eq 'Z' ) {
8350
8351            # dont break..
8352            if (
8353
8354                # if there is no blank and we do not want one. Examples:
8355                #    print $x++    # do not break after $x
8356                #    print HTML"HELLO"   # break ok after HTML
8357                (
8358                    $next_type ne 'b'
8359                    && defined( $want_left_space{$next_type} )
8360                    && $want_left_space{$next_type} == WS_NO
8361                )
8362
8363                # or we might be followed by the start of a quote
8364                || $next_nonblank_type =~ /^[\/\?]$/
8365              )
8366            {
8367                $bond_str = NO_BREAK;
8368            }
8369        }
8370
8371        # Do not break before a possible file handle
8372        #if ( ( $type eq 'Z' ) || ( $next_nonblank_type eq 'Z' ) ) {
8373        if ( $next_nonblank_type eq 'Z' ) {
8374            $bond_str = NO_BREAK;
8375        }
8376
8377        # patch to put cuddled elses back together when on multiple
8378        # lines, as in: } \n else \n { \n
8379        if ($rOpts_cuddled_else) {
8380
8381            if ( ( $token eq 'else' ) && ( $next_nonblank_type eq '{' )
8382                || ( $type eq '}' ) && ( $next_nonblank_token eq 'else' ) )
8383            {
8384                $bond_str = NO_BREAK;
8385            }
8386        }
8387
8388        # keep '}' together with ';'
8389        if ( ( $token eq '}' ) && ( $next_nonblank_type eq ';' ) ) {
8390            $bond_str = NO_BREAK;
8391        }
8392
8393        # never break between sub name and opening paren
8394        if ( ( $type eq 'w' ) && ( $next_nonblank_token eq '(' ) ) {
8395            $bond_str = NO_BREAK;
8396        }
8397
8398        #---------------------------------------------------------------
8399        # section 3:
8400        # now take nesting depth into account
8401        #---------------------------------------------------------------
8402        # final strength incorporates the bond strength and nesting depth
8403        my $strength;
8404
8405        if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
8406            if ( $total_nesting_depth > 0 ) {
8407                $strength = $bond_str + $total_nesting_depth;
8408            }
8409            else {
8410                $strength = $bond_str;
8411            }
8412        }
8413        else {
8414            $strength = NO_BREAK;
8415        }
8416
8417        # always break after side comment
8418        if ( $type eq '#' ) { $strength = 0 }
8419
8420        $bond_strength_to_go[$i] = $strength;
8421
8422        FORMATTER_DEBUG_FLAG_BOND && do {
8423            my $str = substr( $token, 0, 15 );
8424            $str .= ' ' x ( 16 - length($str) );
8425            print
8426"BOND:  i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str -> $strength \n";
8427        };
8428    }
8429}
8430
8431sub pad_array_to_go {
8432
8433    # to simplify coding in scan_list and set_bond_strengths, it helps
8434    # to create some extra blank tokens at the end of the arrays
8435    $tokens_to_go[ $max_index_to_go + 1 ]        = '';
8436    $tokens_to_go[ $max_index_to_go + 2 ]        = '';
8437    $types_to_go[ $max_index_to_go + 1 ]         = 'b';
8438    $types_to_go[ $max_index_to_go + 2 ]         = 'b';
8439    $nesting_depth_to_go[ $max_index_to_go + 1 ] =
8440      $nesting_depth_to_go[$max_index_to_go];
8441
8442    if ( $types_to_go[$max_index_to_go] =~ /^[R\}\)\]]$/ ) {
8443        if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
8444
8445            # shouldn't happen:
8446            unless ( get_saw_brace_error() ) {
8447                warning(
8448"Program bug in scan_list: hit nesting error which should have been caught\n"
8449                );
8450                report_definite_bug();
8451            }
8452        }
8453        else {
8454            $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
8455        }
8456    }
8457    elsif ( $types_to_go[$max_index_to_go] =~ /^[L\{\(\[]$/ ) {
8458        $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
8459    }
8460}
8461
8462{    # begin closure scan_list
8463
8464    my (
8465        $block_type,                 $current_depth,
8466        $depth,                      $good_old_breakpoint,
8467        $i,                          $i_last_nonblank_token,
8468        $last_colon_sequence_number, $last_nonblank_token,
8469        $last_nonblank_type,         $last_old_breakpoint_count,
8470        $minimum_depth,              $next_nonblank_block_type,
8471        $next_nonblank_token,        $next_nonblank_type,
8472        $old_breakpoint_count,       $starting_breakpoint_count,
8473        $starting_depth,             $token,
8474        $type,                       $type_sequence,
8475    );
8476
8477    my (
8478        @breakpoint_stack,           @breakpoint_undo_stack,
8479        @comma_index,                @container_type,
8480        @identifier_count_stack,     @index_before_arrow,
8481        @interrupted_list,           @item_count_stack,
8482        @last_comma_index,           @last_equals_index,
8483        @last_nonblank_type,         @longest_term,
8484        @max_length,                 @maximum_nesting_depth,
8485        @old_breakpoint_count_stack, @opening_structure_index_stack,
8486        @rand_or_list,
8487    );
8488
8489    # routine to define essential variables when we go 'up' to
8490    # a new depth
8491    sub check_for_new_minimum_depth {
8492        my $depth = shift;
8493        if ( $depth < $minimum_depth ) {
8494
8495            $minimum_depth = $depth;
8496
8497            # these arrays need not retain values between calls
8498            $breakpoint_stack[$depth]              = $starting_breakpoint_count;
8499            $container_type[$depth]                = "";
8500            $identifier_count_stack[$depth]        = 0;
8501            $index_before_arrow[$depth]            = -1;
8502            $interrupted_list[$depth]              = 1;
8503            $item_count_stack[$depth]              = 0;
8504            $last_nonblank_type[$depth]            = "";
8505            $longest_term[$depth]                  = 0;
8506            $maximum_nesting_depth[$depth]         = $depth;
8507            $opening_structure_index_stack[$depth] = -1;
8508
8509            # these arrays must retain values between calls
8510            if ( !defined( $has_broken_sublist[$depth] ) ) {
8511                $dont_align[$depth]         = 0;
8512                $has_broken_sublist[$depth] = 0;
8513                $want_comma_break[$depth]   = 0;
8514            }
8515        }
8516    }
8517
8518    # routine to decide which commas to break at within a container;
8519    # returns:
8520    #   $bp_count = number of comma breakpoints set
8521    #   $do_not_break_apart = a flag indicating if container need not
8522    #     be broken open
8523    sub set_comma_breakpoints {
8524
8525        my $dd                 = shift;
8526        my $bp_count           = 0;
8527        my $do_not_break_apart = 0;
8528        if ( $item_count_stack[$dd] && !$dont_align[$dd] ) {
8529
8530            my $fbc = $forced_breakpoint_count;
8531            set_comma_breakpoints_do(
8532                $dd,
8533                $opening_structure_index_stack[$dd],
8534                $i,
8535                $item_count_stack[$dd],
8536                $identifier_count_stack[$dd],
8537                $comma_index[$dd],
8538                $max_length[$dd],
8539                $next_nonblank_type,
8540                $container_type[$dd],
8541                $interrupted_list[$dd],
8542                $maximum_nesting_depth[$dd],
8543                \$do_not_break_apart,
8544            );
8545            $bp_count = $forced_breakpoint_count - $fbc;
8546
8547            # always open comma lists not preceded by keywords,
8548            # barewords, identifiers (that is, anything that doesn't
8549            # look like a function call)
8550            if ($do_not_break_apart) {
8551                $do_not_break_apart = 0
8552                  if ( $last_nonblank_type[$dd] !~ /[kwiU]/ );
8553            }
8554        }
8555        return ( $bp_count, $do_not_break_apart );
8556    };
8557
8558    my %is_logical_container;
8559
8560    BEGIN {
8561        @_ = qw# if elsif unless while and or not && | || ? : ! #;
8562        @is_logical_container{@_} = (1) x scalar(@_);
8563    }
8564
8565    sub set_logical_breakpoints {
8566        my $dd        = shift;
8567        my $i_opening = $opening_structure_index_stack[$dd];
8568        if ( defined( $rand_or_list[$dd] )
8569            && $item_count_stack[$dd] == 0
8570            && $is_logical_container{ $container_type[$dd] } )
8571        {
8572            while ( my $j = pop ( @{ $rand_or_list[$dd] } ) ) {
8573                set_forced_breakpoint($j);
8574            }
8575        }
8576    }
8577
8578    sub update_longest_term {
8579        my $ii      = shift;
8580        my $depth   = shift;
8581        my $i_comma = $last_comma_index[$depth];
8582        if ( defined($i_comma) ) {
8583            my $length = token_sequence_length( $i_comma + 1, $ii );
8584            if ( $length > $longest_term[$depth] ) {
8585                $longest_term[$depth] = $length;
8586            }
8587        }
8588        else {
8589            my $i_opening = $opening_structure_index_stack[$depth];
8590            if ( $i_opening >= 0 ) {
8591                my $length = token_sequence_length( $i_opening + 1, $ii );
8592                $longest_term[$depth] = $length;
8593            }
8594        }
8595    }
8596
8597    sub is_unbreakable_container {
8598
8599        # never break one of these types (map1.t)
8600        my $dd = shift;
8601        $container_type[$dd] =~ /^(sort|map|grep)$/;
8602    }
8603
8604    sub scan_list {
8605
8606        # This routine is responsible for setting line breaks for all lists,
8607        # so that hierarchical structure can be displayed and so that list
8608        # items can be vertically aligned.  The output of this routine is
8609        # stored in the array @forced_breakpoint_to_go, which is used to set
8610        # final breakpoints.
8611
8612        $starting_depth = $nesting_depth_to_go[0];
8613
8614        $block_type                 = ' ';
8615        $current_depth              = $starting_depth;
8616        $good_old_breakpoint        = 0;
8617        $i                          = -1;
8618        $last_colon_sequence_number = -1;
8619        $last_nonblank_token        = ';';
8620        $last_nonblank_type         = ';';
8621        $last_old_breakpoint_count  = 0;
8622        $minimum_depth = $current_depth + 1;    # forces update in check below
8623        $old_breakpoint_count      = 0;
8624        $starting_breakpoint_count = $forced_breakpoint_count;
8625        $token                     = ';';
8626        $type                      = ';';
8627        $type_sequence             = '';
8628
8629        @breakpoint_stack              = ();
8630        @breakpoint_undo_stack         = ();
8631        @comma_index                   = ();
8632        @container_type                = ();
8633        @identifier_count_stack        = ();
8634        @index_before_arrow            = ();
8635        @interrupted_list              = ();
8636        @item_count_stack              = ();
8637        @last_comma_index              = ();
8638        @last_equals_index             = ();
8639        @last_nonblank_type            = ();
8640        @longest_term                  = ();
8641        @max_length                    = ();
8642        @maximum_nesting_depth         = ();
8643        @old_breakpoint_count_stack    = ();
8644        @opening_structure_index_stack = ();
8645        @rand_or_list                  = ();
8646
8647        check_for_new_minimum_depth($current_depth);
8648
8649        my $is_long_line = excess_line_length( 0, $max_index_to_go ) > 0;
8650        my $want_previous_breakpoint = 0;
8651
8652        # loop over all tokens in this batch
8653        while ( ++$i <= $max_index_to_go ) {
8654            if ( $type ne 'b' ) {
8655                $i_last_nonblank_token = $i - 1;
8656                $last_nonblank_type    = $type;
8657                $last_nonblank_token   = $token;
8658            }
8659            $type          = $types_to_go[$i];
8660            $block_type    = $block_type_to_go[$i];
8661            $token         = $tokens_to_go[$i];
8662            $type_sequence = $type_sequence_to_go[$i];
8663            my $next_type       = $types_to_go[ $i + 1 ];
8664            my $next_token      = $tokens_to_go[ $i + 1 ];
8665            my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
8666            $next_nonblank_type       = $types_to_go[$i_next_nonblank];
8667            $next_nonblank_token      = $tokens_to_go[$i_next_nonblank];
8668            $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
8669
8670            # set break if flag was set
8671            if ($want_previous_breakpoint) {
8672                set_forced_breakpoint( $i - 1 );
8673                $want_previous_breakpoint = 0;
8674            }
8675
8676            $last_old_breakpoint_count = $old_breakpoint_count;
8677            if ( $old_breakpoint_to_go[$i] ) {
8678                $old_breakpoint_count++;
8679
8680                # if old line broke before certain key types, we take that
8681                # as a cue that the user wants to break there
8682                if (
8683                    (
8684                        $next_nonblank_type =~ /^(\&\&|\|\|)$/
8685                        && $want_break_before{$next_nonblank_type}
8686                    )
8687                    || ( $next_nonblank_type eq 'k'
8688                        && $next_nonblank_token =~ /^(if|unless|and|or)$/ )
8689                  )
8690                {
8691                    $good_old_breakpoint++;
8692                }
8693
8694                # Break before certain keywords if user broke there and
8695                # this is a 'safe' break point. The idea is to retain
8696                # any preferred breaks for sequential list operations,
8697                # like a schwartzian transform.
8698                if (
8699                    $next_nonblank_type eq 'k'
8700                    && $next_nonblank_token =~ /^(sort|grep|map|eval)$/
8701                    && ( $type =~ /^[=\)\]\}Riw]$/
8702                        || $type eq 'k' && $token =~ /^(sort|grep|map|eval)$/ )
8703                  )
8704                {
8705
8706                    # we actually have to set this break next time through
8707                    # the loop because if we are at a closing token (such
8708                    # as '}') which forms a one-line block, this break might
8709                    # get undone.
8710                    $want_previous_breakpoint = 1;
8711                }
8712            }
8713            next if ( $type eq 'b' );
8714            $depth = $nesting_depth_to_go[ $i + 1 ];
8715
8716            # safety check - be sure we always break after a comment
8717            # Shouldn't happen .. an error here probably means that the
8718            # nobreak flag did not get turned off correctly during
8719            # formatting.
8720            if ( $type eq '#' ) {
8721                if ( $i != $max_index_to_go ) {
8722                    warning(
8723"Non-fatal program bug: backup logic needed to break after a comment\n"
8724                    );
8725                    report_definite_bug();
8726                    $nobreak_to_go[$i] = 0;
8727                    set_forced_breakpoint($i);
8728                }
8729            }
8730
8731            # Force breakpoints at certain tokens in long lines.
8732            # Note that such breakpoints will be undone later if these tokens
8733            # are fully contained within parens on a line.
8734            if ( $is_long_line
8735                && ( ( $type eq 'k' && $token =~ /^(if|unless)$/ ) ) )
8736            {
8737                set_forced_breakpoint( $i - 1 ) unless $i == 0;
8738            }
8739
8740            # remember locations of '||'  and '&&' for possible breaks if we decide
8741            # this is a long logical expression.
8742            if ( $type eq '||' ) { push @{ $rand_or_list[$depth] }, $i }
8743            if ( $type eq '&&' ) { push @{ $rand_or_list[$depth] }, $i }
8744            if ( $type eq 'k' && $token eq 'and' ) {
8745                push @{ $rand_or_list[$depth] }, $i;
8746            }
8747
8748            # break immediately at 'or's which are probably not in a logical
8749            # block -- but we will break in logical breaks below so that
8750            # they do not add to the forced_breakpoint_count
8751            if ( $type eq 'k' && $token eq 'or' ) {
8752                if ( $is_logical_container{ $container_type[$depth] } ) {
8753                    push @{ $rand_or_list[$depth] }, $i;
8754                }
8755                else {
8756                    if ($is_long_line) { set_forced_breakpoint($i) }
8757                }
8758            }
8759
8760            if ($type_sequence) {
8761
8762                # handle any postponed closing breakpoints
8763                if ( $token =~ /^[\)\]\}\:]$/ ) {
8764                    if ( $token eq ':' ) {
8765                        $last_colon_sequence_number = $type_sequence;
8766                    }
8767                    if ( defined( $postponed_breakpoint{$type_sequence} ) ) {
8768                        my $inc = ( $token eq ':' ) ? 0 : 1;
8769                        set_forced_breakpoint( $i - $inc );
8770                        delete $postponed_breakpoint{$type_sequence};
8771                    }
8772                }
8773
8774                # set breaks at ?/: if they will get separated (and are not a ?/:
8775                # chain), or if the '?' is at the end of the line
8776                elsif ( $token eq '?' ) {
8777                    my $i_colon = $mate_index_to_go[$i];
8778                    if (
8779                        $i_colon <= 0  # the ':' is not in this batch
8780                        || $i == 0     # this '?' is the first token of the line
8781                        || $i ==
8782                        $max_index_to_go    # or this '?' is the last token
8783                      )
8784                    {
8785
8786                        # don't break at a '?' if preceded by ':' on this
8787                        # line of previous ?/: pair on this line.  This is
8788                        # an attempt to preserve a chain of ?/: expressions
8789                        # (elsif2.t).  And don't break if this has a side comment.
8790                        set_forced_breakpoint($i)
8791                          unless (
8792                            $type_sequence == (
8793                            $last_colon_sequence_number +
8794                            TYPE_SEQUENCE_INCREMENT
8795                            )
8796                            || $tokens_to_go[$max_index_to_go] eq '#'
8797                          );
8798                        set_closing_breakpoint($i);
8799                    }
8800                }
8801            }
8802
8803            #print "LISTX sees: i=$i type=$type  tok=$token  block=$block_type depth=$depth\n";
8804
8805            #------------------------------------------------------------
8806            # Handle Increasing Depth..
8807            #
8808            # prepare for a new list when depth increases
8809            # token $i is a '(','{', or '['
8810            #------------------------------------------------------------
8811            if ( $depth > $current_depth ) {
8812
8813                $breakpoint_stack[$depth]       = $forced_breakpoint_count;
8814                $breakpoint_undo_stack[$depth]  = $forced_breakpoint_undo_count;
8815                $has_broken_sublist[$depth]     = 0;
8816                $identifier_count_stack[$depth] = 0;
8817                $index_before_arrow[$depth]     = -1;
8818                $interrupted_list[$depth]       = 0;
8819                $item_count_stack[$depth]       = 0;
8820                $last_comma_index[$depth]       = undef;
8821                $last_equals_index[$depth]      = undef;
8822                $last_nonblank_type[$depth]     = $last_nonblank_type;
8823                $longest_term[$depth]           = 0;
8824                $maximum_nesting_depth[$depth]  = $depth;
8825                $old_breakpoint_count_stack[$depth]    = $old_breakpoint_count;
8826                $opening_structure_index_stack[$depth] = $i;
8827                $rand_or_list[$depth]                  = [];
8828                $want_comma_break[$depth]              = 0;
8829
8830                # we want to remember keywords my, local, our
8831                $container_type[$depth] =
8832                  ( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ )
8833                  ? $last_nonblank_token
8834                  : "";
8835
8836                # if line ends here then signal closing token to break
8837                if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' )
8838                {
8839                    set_closing_breakpoint($i);
8840                }
8841
8842                # Not all lists of values should be vertically aligned..
8843                $dont_align[$depth] =
8844
8845                  # code BLOCKS are handled at a higher level
8846                  ( $block_type ne "" )
8847
8848                  # certain paren lists
8849                  || ( $type eq '(' ) && (
8850
8851                    # it does not usually look good to align a list of
8852                    # identifiers in a parameter list, as in:
8853                    #    my($var1, $var2, ...)
8854                    # (This test should probably be refined, for now I'm just
8855                    # testing for any keyword)
8856                    ( $last_nonblank_type eq 'k' )
8857
8858                    # a trailing '(' usually indicates a non-list
8859                    || ( $next_nonblank_type eq '(' )
8860                  );
8861
8862                # patch to outdent opening brace of long if/for/..
8863                # statements (like this one).  See similar coding in
8864                # set_continuation breaks.  We have also catch it here for
8865                # short line fragments which otherwise will not go through
8866                # set_continuation_breaks.
8867                if (
8868                    $block_type
8869
8870                    # if we have the ')' but not its '(' in this batch..
8871                    && ( $last_nonblank_token eq ')' )
8872                    && $mate_index_to_go[$i_last_nonblank_token] < 0
8873
8874                    # and user wants brace to left
8875                    && !$rOpts->{'opening-brace-always-on-right'}
8876
8877                    && ( $type  eq '{' )    # should be true
8878                    && ( $token eq '{' )    # should be true
8879                  )
8880                {
8881                    set_forced_breakpoint( $i - 1 );
8882                }
8883            }
8884
8885            #------------------------------------------------------------
8886            # Handle Decreasing Depth..
8887            #
8888            # finish off any old list when depth decreases
8889            # token $i is a ')','}', or ']'
8890            #------------------------------------------------------------
8891            elsif ( $depth < $current_depth ) {
8892
8893                check_for_new_minimum_depth($depth);
8894
8895                # remember how deep we have been
8896                if ( $maximum_nesting_depth[$depth] <
8897                    $maximum_nesting_depth[$current_depth] )
8898                {
8899                    $maximum_nesting_depth[$depth] =
8900                      $maximum_nesting_depth[$current_depth];
8901                }
8902
8903                # get final term length if necessary
8904                update_longest_term( $i - 1, $current_depth );
8905
8906                # Patch to break between ') {' if the paren list is broken.
8907                # There is similar logic in set_continuation_breaks for
8908                # non-broken lists.
8909                if ( $token eq ')'
8910                    && $next_nonblank_block_type
8911                    && $interrupted_list[$current_depth]
8912                    && $next_nonblank_type eq '{'
8913                    && !$rOpts->{'opening-brace-always-on-right'} )
8914                {
8915                    set_forced_breakpoint($i);
8916                }
8917
8918                #print "LISTY sees: i=$i type=$type  tok=$token  block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
8919
8920                # set breaks at commas if necessary
8921                my ( $bp_count, $do_not_break_apart ) =
8922                  set_comma_breakpoints($current_depth);
8923
8924                my $i_opening = $opening_structure_index_stack[$current_depth];
8925                my $saw_opening_structure = ( $i_opening >= 0 );
8926
8927                # this term is long if we had to break at interior commas..
8928                my $is_long_term = $bp_count > 0;
8929
8930                # ..or if the length between opening and closing parens exceeds
8931                # allowed line length
8932                if ( !$is_long_term && $saw_opening_structure ) {
8933                    my $i_opening_minus = find_token_starting_list($i_opening);
8934
8935                    # Note: we have to allow for one extra space after a
8936                    # closing token so that we do not strand a comma or
8937                    # semicolon, hence the '>=' here (oneline.t)
8938                    $is_long_term =
8939                      excess_line_length( $i_opening_minus, $i ) >= 0;
8940                }
8941
8942                # We've set breaks after all comma-arrows.  Now we have to
8943                # undo them if this can be a one-line block
8944                # (the only breakpoints set will be due to comma-arrows)
8945                if (
8946                    !$rOpts->{'break-after-comma-arrows'}
8947
8948                    # if the opening structure is in this batch
8949                    && $saw_opening_structure
8950
8951                    # and on the same old line
8952                    && ( $old_breakpoint_count_stack[$current_depth] ==
8953                        $last_old_breakpoint_count )
8954
8955                    # and we made some breakpoints between the opening and closing
8956                    && ( $breakpoint_undo_stack[$current_depth] <
8957                        $forced_breakpoint_undo_count )
8958
8959                    # and this block is short enough to fit on one line
8960                    # Note: use < because need 1 more space for possible comma
8961                    && !$is_long_term
8962
8963                  )
8964                {
8965                    undo_forced_breakpoint_stack(
8966                        $breakpoint_undo_stack[$current_depth] );
8967                }
8968
8969                # now see if we have any comma breakpoints left
8970                my $has_comma_breakpoints =
8971                  ( $breakpoint_stack[$current_depth] !=
8972                      $forced_breakpoint_count );
8973
8974                # update broken-sublist flag of the outer container
8975                $has_broken_sublist[$depth] = $has_broken_sublist[$depth]
8976                  || $has_broken_sublist[$current_depth]
8977                  || $is_long_term
8978                  || $has_comma_breakpoints;
8979
8980=pod
8981
8982Having come to the closing ')', '}', or ']', now we have to decide if we
8983should 'open up' the structure by placing breaks at the opening and
8984closing containers.  This is a tricky decision.  Here are some of the
8985basic considerations:
8986
8987-If this is a BLOCK container, then any breakpoints will have already
8988been set (and according to user preferences), so we need do nothing here.
8989
8990-If we have a comma-separated list for which we can align the list items,
8991then we need to do so because otherwise the vertical aligner cannot
8992currently do the alignment.
8993
8994-If this container does itself contain a container which has been broken
8995open, then it should be broken open to properly show the structure.
8996
8997-If there is nothing to align, and no other reason to break apart,
8998then do not do it.
8999
9000We will not break open the parens of a long but 'simple' logical expression.
9001For example:
9002
9003This is an example of a simple logical expression and its formatting:
9004
9005    if ( $bigwasteofspace1 && $bigwasteofspace2
9006        || $bigwasteofspace3 && $bigwasteofspace4 )
9007
9008Most people would prefer this than the 'spacey' version:
9009
9010    if (
9011        $bigwasteofspace1 && $bigwasteofspace2
9012        || $bigwasteofspace3 && $bigwasteofspace4
9013    )
9014
9015To illustrate the rules for breaking logical expressions, consider:
9016
9017            FULLY DENSE:
9018            if ( $opt_excl
9019                and ( exists $ids_excl_uc{$id_uc}
9020                    or grep $id_uc =~ /$_/, @ids_excl_uc ))
9021
9022This is on the verge of being difficult to read.  The current default is to
9023open it up like this:
9024
9025            DEFAULT:
9026            if (
9027                $opt_excl
9028                and ( exists $ids_excl_uc{$id_uc}
9029                    or grep $id_uc =~ /$_/, @ids_excl_uc )
9030              )
9031
9032This is a compromise which tries to avoid being too dense and to spacey.
9033A more spaced version would be:
9034
9035            SPACEY:
9036            if (
9037                $opt_excl
9038                and (
9039                    exists $ids_excl_uc{$id_uc}
9040                    or grep $id_uc =~ /$_/, @ids_excl_uc
9041                )
9042              )
9043
9044Some people might prefer the spacey version -- an option could be added.  The
9045innermost expression contains a long block '( exists $ids_...  ')'.
9046
9047Here is how the logic goes: We will force a break at the 'or' that the
9048innermost expression contains, but we will not break apart its opening an
9049closing containers because (1) it contains no multi-line sub-containers itself,
9050and (2) there is no alignment to be gained by breaking it open like this
9051
9052            and (
9053                exists $ids_excl_uc{$id_uc}
9054                or grep $id_uc =~ /$_/, @ids_excl_uc
9055            )
9056
9057(although this looks perfectly ok and might be good for long expressions).  The
9058outer 'if' container, though, contains a broken sub-container, so it will be
9059broken open to avoid too much density.  Also, since it contains no 'or's, there
9060will be a forced break at its 'and'.
9061
9062=cut
9063
9064                # set some flags telling something about this container..
9065                my $is_simple_logical_expression = 0;
9066                if ( $item_count_stack[$current_depth] == 0
9067                    && $saw_opening_structure
9068                    && $tokens_to_go[$i_opening] eq '('
9069                    && $is_logical_container{ $container_type[$current_depth] }
9070                  )
9071                {
9072
9073                    # This seems to be a simple logical expression with no existing
9074                    # breakpoints.  Set a flag to prevent opening it up.
9075                    if ( !$has_comma_breakpoints ) {
9076                        $is_simple_logical_expression = 1;
9077                    }
9078
9079                    # This seems to be a simple logical expression with breakpoints
9080                    # (broken sublists, for example).  Break at all 'or's and '||'s.
9081                    else {
9082                        set_logical_breakpoints($current_depth);
9083                    }
9084                }
9085
9086                if (
9087
9088                    # breaks for code BLOCKS are handled at a higher level
9089                    !$block_type
9090
9091                    # we do not need to break at the top level of an 'if'
9092                    # type expression
9093                    && !$is_simple_logical_expression
9094
9095                    # otherwise, we require one of these reasons for breaking:
9096                    && (
9097
9098                        # - this term has forced line breaks
9099                        $has_comma_breakpoints
9100
9101                        # - the opening container is separated from this batch
9102                        #   for some reason (comment, blank line, code block)
9103                        # - this is a non-paren container spanning multiple lines
9104                        || !$saw_opening_structure
9105
9106                        # - this is a long block contained in another breakable
9107                        #   container
9108                        || ( $is_long_term
9109                            && $container_environment_to_go[$i_opening] ne
9110                            'BLOCK' )
9111                    )
9112                  )
9113                {
9114
9115                    # For -lp option, we should put a breakpoint before the token
9116                    # which has been identified as starting this indentation level.
9117                    # This is necessary for proper alignment.
9118                    if ( $rOpts_line_up_parentheses && $saw_opening_structure )
9119                    {
9120                        my $item = $leading_spaces_to_go[ $i_opening + 1 ];
9121                        if ( defined($item) ) {
9122                            my $i_start_2 = $item->get_STARTING_INDEX();
9123                            if (
9124                                defined($i_start_2)
9125
9126                                # we are breaking after an opening brace, paren,
9127                                # so don't break before it too
9128                                && $i_start_2 ne $i_opening
9129                              )
9130                            {
9131
9132                                # Only break for breakpoints at the same indentation
9133                                # level as the opening paren
9134                                my $test1 = $leading_spaces_to_go[$i_opening];
9135                                my $test2 = $leading_spaces_to_go[$i_start_2];
9136                                if ( $test2 == $test1 ) {
9137                                    set_forced_breakpoint( $i_start_2 - 1 );
9138                                }
9139                            }
9140                        }
9141                    }
9142
9143                    # break after opening structure.
9144                    # note: break before closing structure will be automatic
9145                    if ( $minimum_depth <= $current_depth ) {
9146
9147                        set_forced_breakpoint($i_opening)
9148                          unless ( $do_not_break_apart
9149                            || is_unbreakable_container($current_depth) );
9150
9151                        # break before opening structure if preeced by another
9152                        # closing structure and a comma.  This is normally
9153                        # done by the previous closing brace, but not
9154                        # if it was a one-line block.
9155                        if ( $i_opening > 2 ) {
9156                            my $i_prev =
9157                              ( $types_to_go[ $i_opening - 1 ] eq 'b' )
9158                              ? $i_opening - 2
9159                              : $i_opening - 1;
9160
9161                            if ( $types_to_go[$i_prev] eq ','
9162                                && $types_to_go[ $i_prev - 1 ] =~ /^[\)\}]$/ )
9163                            {
9164                                set_forced_breakpoint($i_prev);
9165                            }
9166
9167                            # also break before something like ':('  or '?('
9168                            # if appropriate.
9169                            elsif (
9170                                $types_to_go[$i_prev] =~ /^([\:\?]|&&|\|\|)$/ )
9171                            {
9172                                my $token_prev = $tokens_to_go[$i_prev];
9173                                if ( $want_break_before{$token_prev} ) {
9174                                    set_forced_breakpoint($i_prev);
9175                                }
9176                            }
9177                        }
9178                    }
9179
9180                    # break after comma following closing structure
9181                    if ( $next_type eq ',' ) {
9182                        set_forced_breakpoint( $i + 1 );
9183                    }
9184
9185                    # break before an '=' following closing structure
9186                    if (
9187                        $next_nonblank_type eq '='
9188                        && ( $breakpoint_stack[$current_depth] !=
9189                            $forced_breakpoint_count )
9190                      )
9191                    {
9192                        set_forced_breakpoint($i);
9193                    }
9194
9195                    # break at any comma before the opening structure Added
9196                    # for -lp, but seems to be good in general.  It isn't
9197                    # obvious how far back to look; the '5' below seems to
9198                    # work well and will catch the comma in something like
9199                    #  push @list, myfunc( $param, $param, ..
9200
9201                    my $icomma = $last_comma_index[$depth];
9202                    if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
9203                        unless ( $forced_breakpoint_to_go[$icomma] ) {
9204                            set_forced_breakpoint($icomma);
9205                        }
9206                    }
9207
9208                    # With the -lp option we will break at an '=' preceding
9209                    # an open paren by a significant number of spaces.
9210                    # This can help save space across the page.  The
9211                    # constants in this are somewhat arbitrary.  But note
9212                    # the patch above, which disables this for a list
9213                    # which is more than 1 level deep.
9214
9215=pod
9216
9217         Here is an example of what this is trying to accomplish:
9218
9219            my $result_string =
9220              $candidates->result_as_string(
9221                                             'Type'     => 'All',
9222                                             'Category' => 'Failed'
9223            );
9224
9225         Without the break at the '=', the result would be:
9226
9227            my $result_string = $candidates->result_as_string(
9228                                                    'Type'     => 'All',
9229                                                    'Category' => 'Failed'
9230            );
9231
9232        which is not as nice.
9233
9234=cut
9235
9236                    if ($rOpts_line_up_parentheses) {
9237
9238                        if ( $token eq ')' ) {
9239                            my $iequals = $last_equals_index[$depth];
9240
9241                            # We do not need to break at any '=' if there is
9242                            # room for the call parameters.
9243                            my $recoverable = 0;
9244                            my $icomma      = $last_comma_index[$current_depth];
9245                            if ( defined($icomma) ) { # should always be defined
9246
9247                                # method 1: for comma lists this
9248                                # information is available in the
9249                                # indentation.
9250                                $recoverable =
9251                                  $leading_spaces_to_go[$icomma]
9252                                  ->get_RECOVERABLE_SPACES() > 0
9253
9254                                  # method 2: for comma-arrow lists
9255                                  # we need to use an alternate method
9256                                  || $longest_term[$current_depth] >
9257                                  table_columns_available($icomma);
9258
9259                            }
9260
9261                            FORMATTER_DEBUG_FLAG_EQUALS && do {
9262                                if ( defined($iequals) ) {
9263                                    print
9264"EQUALS: considering = break iequals=$iequals rec=$recoverable\n";
9265                                }
9266                                else {
9267                                    print
9268                                      "EQUALS: at = break but '=' undefined\n";
9269                                }
9270                            };
9271
9272                            if (
9273                                defined($iequals)
9274
9275                                && ( $recoverable > 0 )
9276
9277                                # more than 10 spaces from the '=' to the '('
9278                                && (
9279                                    token_sequence_length( $iequals,
9280                                        $i_opening ) > 10
9281                                )
9282
9283                                # and the '=' is more than 8 spaces
9284                                # from line start
9285                                && ( token_sequence_length( 0, $iequals ) > 8 )
9286                              )
9287                            {
9288                                unless ( $forced_breakpoint_to_go[$iequals] ) {
9289                                    set_forced_breakpoint($iequals);
9290                                }
9291                            }
9292                        }
9293
9294                        # Do not allow break on '=' of a structure
9295                        # containing this one because the space cannot
9296                        # be recovered at the present time (gnu1.t).  We
9297                        # might be left with a big gap.
9298                        if ( $depth >= 1 ) {
9299                            $last_equals_index[ $depth - 1 ] = undef;
9300                            FORMATTER_DEBUG_FLAG_EQUALS && do {
9301                                print
9302"EQUALS: undefining iequals after breaking a container at depth $depth\n";
9303                            };
9304                        }
9305                    }    # end -lp logic
9306                }    # end logic to open up a container
9307
9308                # Handle long container which does not get opened up
9309                elsif ($is_long_term) {
9310
9311                    # must set fake breakpoint to alert outer containers that
9312                    # they are complex
9313                    set_fake_breakpoint();
9314
9315                    # avoid possible -lp problems (see note above)
9316                    if ( $depth >= 1 ) {
9317                        $last_equals_index[ $depth - 1 ] = undef;
9318                        FORMATTER_DEBUG_FLAG_EQUALS && do {
9319                            print
9320"EQUALS: undefining iequals after long block for depth $depth\n";
9321                        };
9322                    }
9323                }
9324            }
9325
9326            #------------------------------------------------------------
9327            # Handle this token
9328            #------------------------------------------------------------
9329
9330            $current_depth = $depth;
9331
9332            # handle comma-arrow
9333            if ( $type eq '=>' ) {
9334                next if ( $last_nonblank_type eq '=>' );
9335                $want_comma_break[$depth]   = 1;
9336                $index_before_arrow[$depth] = $i_last_nonblank_token;
9337                next;
9338            }
9339
9340            # remember location of any of the assignment operators
9341            if ( $type =~ /=/ && $type !~ /(==|!=|>=|<=|=~|=>)/ ) {
9342                $last_equals_index[$depth] = $i;
9343                FORMATTER_DEBUG_FLAG_EQUALS && do {
9344                    print "EQUALS: found equals at i=$i for depth=$depth\n";
9345                };
9346            }
9347
9348            # Turn off alignment if we are sure that this is not a list
9349            # environment.  To be safe, we will do this if we see certain
9350            # non-list tokens, such as ';', and also the environment is
9351            # not a list.  Note that '=' could be in any of the = operators
9352            # (lextest.t). We can't just use the reported environment
9353            # because it can be incorrect in some cases.
9354
9355            if ( $type =~ /(^[\;\<\>\~]$)|[=]/
9356                && $container_environment_to_go[$i] ne 'LIST' )
9357            {
9358                $dont_align[$depth]         = 1;
9359                $want_comma_break[$depth]   = 0;
9360                $index_before_arrow[$depth] = -1;
9361            }
9362
9363            # now just handle any commas
9364            next unless ( $type eq ',' );
9365
9366            # keep track of longest item between commas
9367            update_longest_term( $i, $depth );
9368
9369            $last_comma_index[$depth] = $i;
9370
9371            # break here if this comma follows a '=>'
9372            # but not if there is a side comment after the comma
9373            if ( $want_comma_break[$depth] ) {
9374                set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
9375
9376                # break before the previous token if it looks safe
9377                # Example of something that we will not try to break before:
9378                #   DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
9379                my $ibreak = $index_before_arrow[$depth] - 1;
9380                if ( $ibreak > 0
9381                    && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
9382                {
9383                    if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
9384                    if ( $types_to_go[$ibreak] =~ /^[,b\(\{\[]$/ ) {
9385                        set_forced_breakpoint($ibreak);
9386                    }
9387                }
9388
9389                $want_comma_break[$depth]   = 0;
9390                $index_before_arrow[$depth] = -1;
9391
9392                # items after '=>' may be long, so breaking at a preceding
9393                # '=' might leave a gap when -lp is used.
9394                if ( $depth >= 1 ) {
9395                    $last_equals_index[ $depth - 1 ] = undef;
9396                    FORMATTER_DEBUG_FLAG_EQUALS && do {
9397                        print
9398"EQUALS: comma-arrow: undef equals at depth above $depth\n";
9399                    };
9400                }
9401
9402                # handle list which mixes '=>'s and ','s:
9403                # treat any list items so far as an interrupted list
9404                $interrupted_list[$depth] = 1;
9405                next;
9406            }
9407
9408            # skip past these commas if we are not supposed to format them
9409            next if ( $dont_align[$depth] );
9410
9411            # break after all commas above starting depth
9412            ## FIXME: re-check this
9413            if ( $depth < $starting_depth ) {
9414                set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
9415                next;
9416            }
9417
9418            # add this comma to the list..
9419            my $item_count = $item_count_stack[$depth];
9420            if ( $item_count == 0 ) {
9421
9422                # but do not form a list with no opening structure
9423                # for example:
9424
9425                #            open INFILE_COPY, ">$input_file_copy"
9426                #              or die ("very long message");
9427
9428                if ( ( $opening_structure_index_stack[$depth] < 0 )
9429                    && $container_environment_to_go[$i] eq 'BLOCK' )
9430                {
9431                    $dont_align[$depth] = 1;
9432                    next;
9433                }
9434
9435                $max_length[$depth][0] = 0;
9436                $max_length[$depth][1] = 0;
9437            }
9438
9439            # save max length of list items to calculate page layout
9440            my $i_prev =
9441              ( $item_count > 0 )
9442              ? $comma_index[$depth][ $item_count - 1 ]
9443              : $opening_structure_index_stack[$depth];
9444
9445            if ( !defined($i_prev) ) { $i_prev = -1 }
9446            my $length = token_sequence_length( $i_prev + 1, $i );
9447
9448            if ( $length > $max_length[$depth][ $item_count % 2 ] ) {
9449                $max_length[$depth][ $item_count % 2 ] = $length;
9450            }
9451
9452            $comma_index[$depth][$item_count] = $i;
9453            ++$item_count_stack[$depth];
9454            if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
9455                $identifier_count_stack[$depth]++;
9456            }
9457        }
9458
9459        #-------------------------------------------
9460        # end of loop over all tokens in this batch
9461        #-------------------------------------------
9462
9463        # set breaks for any unfinished lists ..
9464        for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) {
9465
9466            $interrupted_list[$dd] = 1;
9467            $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
9468            set_comma_breakpoints($dd);
9469            set_logical_breakpoints($dd) if $good_old_breakpoint > 0;
9470
9471            # break open container...
9472            my $i_opening = $opening_structure_index_stack[$dd];
9473            set_forced_breakpoint($i_opening)
9474              unless (
9475                is_unbreakable_container($dd)
9476
9477                # Avoid a break which would place an isolated ' or "
9478                # on a line
9479                || ( $type eq 'Q'
9480                    && $i_opening >= $max_index_to_go - 2
9481                    && $token =~ /^['"]$/ )
9482              );
9483        }
9484
9485        # Return a flag indicating if the input file had some breakpoints,
9486        # and they were all good.  If not all of the breakpoints were good,
9487        # we take that as a bad omen and do not set the flag.  This flag
9488        # will be used to force a break in a line shorter than the allowed
9489        # line length.
9490        my $saw_good_breakpoint =
9491          ( $old_breakpoint_count > 0
9492              && $old_breakpoint_count == $good_old_breakpoint );
9493
9494        return $saw_good_breakpoint;
9495    }
9496}    # end closure scan_list
9497
9498sub find_token_starting_list {
9499
9500    # When testing to see if a block will fit on one line, some
9501    # previous token(s) may also need to be on the line; particularly
9502    # if this is a sub call.  So we will look back at least one
9503    # token. NOTE: This isn't perfect, but not critical, because
9504    # if we mis-identify a block, it will be wrapped and therefore
9505    # fixed the next time it is formatted.
9506    my $i_opening_paren = shift;
9507    my $i_opening_minus = $i_opening_paren;
9508    my $im1             = $i_opening_paren - 1;
9509    my $im2             = $i_opening_paren - 2;
9510    my $im3             = $i_opening_paren - 3;
9511    my $typem1          = $types_to_go[$im1];
9512    my $typem2          = $im2 >= 0 ? $types_to_go[$im2] : 'b';
9513    if ( $typem1 eq ',' || ( $typem1 eq 'b' && $typem2 eq ',' ) ) {
9514        $i_opening_minus = $i_opening_paren;
9515    }
9516    elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) {
9517        $i_opening_minus = $im1 if $im1 >= 0;
9518
9519        # walk back to improve length estimate
9520        for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
9521            last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ );
9522            $i_opening_minus = $j;
9523        }
9524        if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
9525    }
9526    elsif ( $typem1 eq 'k' ) { $i_opening_minus = $im1 }
9527    elsif ( $typem1 eq 'b' && $im2 >= 0 && $types_to_go[$im2] eq 'k' ) {
9528        $i_opening_minus = $im2;
9529    }
9530    return $i_opening_minus;
9531}
9532
9533sub set_comma_breakpoints_do {
9534
9535    # Given a list with some commas, set breakpoints at some
9536    # of the commas to allow nice alignment if possible.  This
9537    # list is an example:
9538    my (
9539        $depth,       $i_opening_paren,       $i_closing_paren,
9540        $item_count,  $identifier_count,      $rcomma_index,
9541        $rmax_length, $next_nonblank_type,    $list_type,
9542        $interrupted, $maximum_nesting_depth, $rdo_not_break_apart,
9543      )
9544      = @_;
9545
9546    # nothing to do if no commas seen
9547    return if ( $item_count < 1 );
9548
9549    #---------------------------------------------------------------
9550    # Compound List Rule 1:
9551    # Break at every comma for a list containing a broken sublist.
9552    # This has higher priority than the Interrupted List Rule.
9553    #---------------------------------------------------------------
9554    if ( $has_broken_sublist[$depth] ) {
9555        for ( my $j = 0 ; $j < $item_count ; $j++ ) {
9556            my $i = $$rcomma_index[$j];
9557            set_forced_breakpoint($i);
9558        }
9559        return;
9560    }
9561
9562    my $i_first_comma = $$rcomma_index[0];
9563    my $i_last_comma  = $$rcomma_index[ $item_count - 1 ];
9564
9565    #my ( $a, $b, $c ) = caller();
9566    #print "LISTX: in set_list $a $c interupt=$interrupted count=$item_count
9567    #i_first = $i_first_comma  i_last=$i_last_comma max=$max_index_to_go\n";
9568    #print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n";
9569
9570    #---------------------------------------------------------------
9571    # Interrupted List Rule:
9572    # A list is is forced to use old breakpoints if it was interrupted
9573    # by side comments or blank lines.
9574    #---------------------------------------------------------------
9575    if ( $interrupted || $i_opening_paren < 0 ) {
9576        write_logfile_entry("list broken: using old breakpoints\n")
9577          unless ( $item_count < 6 );
9578        copy_old_breakpoints( $i_first_comma, $i_last_comma );
9579        return;
9580    }
9581
9582    my $opening_token       = $tokens_to_go[$i_opening_paren];
9583    my $opening_environment = $container_environment_to_go[$i_opening_paren];
9584
9585    #---------------------------------------------------------------
9586    # Looks like a list of items.  We have to look at it and size it up.
9587    #---------------------------------------------------------------
9588
9589    return if ( $i_first_comma < 1 );
9590    if ( $i_last_comma >= $max_index_to_go ) {
9591        $i_last_comma = $$rcomma_index[ --$item_count - 1 ];
9592        return if ( $item_count <= 2 );    # not much of a list
9593    }
9594
9595    #-------------------------------------------------------------------
9596    # Return if this will fit on one line
9597    #-------------------------------------------------------------------
9598
9599    my $i_opening_minus = find_token_starting_list($i_opening_paren);
9600    return unless excess_line_length( $i_opening_minus, $i_closing_paren ) > 0;
9601
9602    #-------------------------------------------------------------------
9603    # Now we know that this block spans multiple lines; we have to set
9604    # at least one breakpoint -- real or fake -- as a signal to break
9605    # open any outer containers.
9606    #-------------------------------------------------------------------
9607    set_fake_breakpoint();
9608
9609    # now we have to make a distinction between the comma count and item count,
9610    # because the item count will be one greater than the comma count if
9611    # the last item is not terminated with a comma
9612    my $comma_count = $item_count;
9613    my $i_b         =
9614      ( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
9615      ? $i_last_comma + 1
9616      : $i_last_comma;
9617    my $i_e =
9618      ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
9619      ? $i_closing_paren - 2
9620      : $i_closing_paren - 1;
9621    my $i_effective_last_comma = $i_last_comma;
9622
9623    my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
9624    if ( $last_item_length > 0 ) {
9625
9626        # add 2 to length because other lengths include a comma and a blank
9627        $last_item_length += 2;
9628        my $i_odd = $item_count % 2;
9629        if ( $last_item_length > $$rmax_length[$i_odd] ) {
9630            $$rmax_length[$i_odd] = $last_item_length;
9631        }
9632        $item_count++;
9633        $i_effective_last_comma = $i_e + 1;
9634    }
9635
9636    # be sure we do not extend beyond the current list length
9637    if ( $i_effective_last_comma >= $max_index_to_go ) {
9638        $i_effective_last_comma = $max_index_to_go - 1;
9639    }
9640
9641    # Field width parameters
9642    my $pair_width = ( $$rmax_length[0] + $$rmax_length[1] );
9643    my $max_width  =
9644      ( $$rmax_length[0] > $$rmax_length[1] )
9645      ? $$rmax_length[0]
9646      : $$rmax_length[1];
9647
9648    # Number of free columns across the page width for laying out tables
9649    my $columns = table_columns_available($i_first_comma);
9650
9651    # Specify if the list must have an even number of fields or not.  It
9652    # is generally safest in perl to assume an even number, because the
9653    # list items might be a hash list.  But if we can be sure that it is
9654    # not a hash, then we can allow an odd number for more flexibility.
9655    my $odd_or_even = 2;    # 1 = odd field count ok, 2 = want even count
9656    if ( $list_type =~ /^(my|local|our)$/ || $next_nonblank_type eq '=' ) {
9657        $odd_or_even = 1;
9658    }
9659    if ( $identifier_count == $comma_count ) { $odd_or_even = 1 }   # seems safe
9660
9661    # Number of fields which fit this space
9662    my $number_of_fields =
9663      maximum_number_of_fields( $columns, $odd_or_even, $max_width,
9664        $pair_width );
9665
9666    # ----------------------------------------------------------------------
9667    # If we are crowded and the -lp option is being used, try to
9668    # undo some indentation
9669    # ----------------------------------------------------------------------
9670
9671    if ( $number_of_fields < 2 ) {
9672        my $available_spaces = get_AVAILABLE_SPACES_to_go($i_first_comma);
9673        if ( $available_spaces > 0 ) {
9674
9675            # asking for an extra space for safety
9676            my $spaces_wanted = 1 + $pair_width - $columns;    # for 2 fields
9677            if ( $spaces_wanted > $available_spaces
9678                || $rOpts->{'maximum-fields-per-table'} == 1 )
9679            {
9680                $spaces_wanted = 1 + $max_width - $columns;    # for 1 field
9681            }
9682
9683            # ask for space if needed
9684            if ( $spaces_wanted > 0 ) {
9685                reduce_lp_indentation( $i_first_comma, $spaces_wanted );
9686
9687                # redo the math
9688                $columns          = table_columns_available($i_first_comma);
9689                $number_of_fields =
9690                  maximum_number_of_fields( $columns, $odd_or_even, $max_width,
9691                    $pair_width );
9692            }
9693        }
9694    }
9695
9696    # try for one column if two won't work
9697    if ( $number_of_fields <= 0 ) {
9698        $number_of_fields = int( $columns / $max_width );
9699    }
9700
9701    #print "LISTX: fields=$number_of_fields columns=$columns max_width==$max_width  w0=${$rmax_length}[0]  w1=${$rmax_length}[1] width=$pair_width\n";
9702
9703    # The user can place an upper bound on the number of fields,
9704    # which can be useful for doing maintenance on tables
9705    if ( $number_of_fields > $rOpts->{'maximum-fields-per-table'} ) {
9706        $number_of_fields = int $rOpts->{'maximum-fields-per-table'};
9707    }
9708
9709    # How many columns (characters) and lines would this container take
9710    # if no additional whitespace were added?
9711    my $packed_columns =
9712      token_sequence_length( $i_opening_paren + 1,
9713        $i_effective_last_comma + 1 );
9714    if ( $columns <= 0 ) { $columns = 1 }    # avoid divide by zero
9715    my $packed_lines = 1 + int( $packed_columns / $columns );
9716
9717    # are we an item contained in an outer list?
9718    my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
9719
9720    if ( $number_of_fields <= 0 ) {
9721
9722=pod
9723
9724        #---------------------------------------------------------------
9725        # We're in trouble.  We can't find a single field width that works.
9726        # There is no simple answer here; we may have a single long list
9727        # item, or many.
9728        #---------------------------------------------------------------
9729
9730        In many cases, it may be best to not force a break if there is just one
9731        comma, because the standard continuation break logic will do a better
9732        job without it.
9733
9734        In the common case that all but one of the terms can fit
9735        on a single line, it may look better not to break open the
9736        containing parens.  Consider, for example
9737
9738            $color =
9739              join ( '/',
9740                sort { $color_value{$::a} <=> $color_value{$::b}; }
9741                keys %colors );
9742
9743        which will look like this with the container broken:
9744
9745            $color = join (
9746                '/',
9747                sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
9748            );
9749
9750        Here is an example of this rule for a long last term:
9751
9752            log_message( 0, 256, 128,
9753                "Number of routes in adj-RIB-in to be considered: $peercount" );
9754
9755        And here is an example with a long first term:
9756
9757        $s = sprintf(
9758"%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
9759            $r, $pu, $ps, $cu, $cs, $tt
9760          )
9761          if $style eq 'all';
9762
9763=cut
9764
9765        my $i_last_comma    = $$rcomma_index[ $comma_count - 1 ];
9766        my $long_last_term  = excess_line_length( 0, $i_last_comma ) <= 0;
9767        my $long_first_term =
9768          excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0;
9769
9770        # break at every comma ...
9771        if (
9772
9773            # if user requested
9774            $rOpts->{'maximum-fields-per-table'} == 1
9775
9776            # or if this is a sublist of a larger list
9777            || $in_hierarchical_list
9778
9779            # or if multiple commas and we dont have a long first or last term
9780            || ( $comma_count > 1 && !( $long_last_term || $long_first_term ) )
9781          )
9782        {
9783            for ( my $j = 0 ; $j < $comma_count ; $j++ ) {
9784                my $i = $$rcomma_index[$j];
9785                set_forced_breakpoint($i);
9786            }
9787        }
9788        elsif ($long_last_term) {
9789
9790            set_forced_breakpoint($i_last_comma);
9791            $$rdo_not_break_apart = 1;
9792        }
9793        elsif ($long_first_term) {
9794
9795            set_forced_breakpoint($i_first_comma);
9796            $$rdo_not_break_apart = 1;
9797        }
9798        else {
9799
9800            # let breaks be defined by default bond strength logic
9801        }
9802        return;
9803    }
9804
9805    # --------------------------------------------------------
9806    # We have a tentative field count that seems to work.
9807    # How many lines will this require?
9808    # --------------------------------------------------------
9809    my $formatted_lines = $item_count / ($number_of_fields);
9810    if ( $formatted_lines != int $formatted_lines ) {
9811        $formatted_lines = 1 + int $formatted_lines;
9812    }
9813
9814    # So far we've been trying to fill out to the right margin.  But
9815    # compact tables are easier to read, so let's see if we can use fewer
9816    # fields without increasing the number of lines.
9817    $number_of_fields =
9818      compactify_table( $item_count, $number_of_fields, $formatted_lines,
9819        $odd_or_even );
9820
9821    # How many spaces across the page will we fill?
9822    my $columns_per_line =
9823      ( int $number_of_fields / 2 ) * $pair_width + ( $number_of_fields % 2 ) *
9824      $max_width;
9825
9826    my $formatted_columns;
9827
9828    if ( $number_of_fields > 1 ) {
9829        $formatted_columns =
9830          ( $pair_width * ( int( $item_count / 2 ) ) + ( $item_count % 2 ) *
9831              $max_width );
9832    }
9833    else {
9834        $formatted_columns = $max_width * $item_count;
9835    }
9836
9837    my $unused_columns = $formatted_columns - $packed_columns;
9838
9839    # set some empirical parameters to help decide if we should try to
9840    # align; high sparsity does not look good, especially with few lines
9841    my $sparsity             = ($unused_columns) / ($formatted_columns);
9842    my $max_allowed_sparsity =
9843      ( $item_count < 3 ) ? 0.1
9844      : ( $packed_lines == 1 ) ? 0.15
9845      : ( $packed_lines == 2 ) ? 0.4
9846      : 0.7;
9847
9848    # Shortcut method 1: for 2 lines, just one comma:
9849    if (
9850        $packed_lines <= 2               # probably can fit in 2 lines
9851        && $item_count == 2              # two items, one comma
9852        && $rOpts_line_up_parentheses    # -lp
9853        && $opening_environment eq 'BLOCK'    # not a sub-container
9854        && $opening_token       eq '('        # is paren list
9855      )
9856    {
9857        my $i_break = $$rcomma_index[0];
9858        set_forced_breakpoint($i_break);
9859        $$rdo_not_break_apart = 1;
9860        return;
9861    }
9862
9863    # Shortcut method 2: for relatively simple 2 liner function calls
9864    # which usually look better without aligning commas and opening
9865    # up the container
9866    if (
9867        $packed_lines <= 2    # probably can fit in 2 lines
9868        && ( $identifier_count > 0.5 * $item_count )    # isn't all quotes
9869        && $sparsity > 0.15    # would be fairly spaced gaps if aligned
9870        && $item_count < 9     # doesn't have too many items
9871        && $opening_environment eq 'BLOCK'    # not a sub-container
9872        && $opening_token       eq '('        # is paren list
9873        && $maximum_nesting_depth <= $depth   # has no sublist
9874      )
9875    {
9876
9877        # let breaks be defined by bond strength logic
9878        $$rdo_not_break_apart = 1;
9879        return;
9880    }
9881
9882    # debug stuff
9883    FORMATTER_DEBUG_FLAG_SPARSE && do {
9884
9885        print
9886"SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line  unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
9887
9888    };
9889
9890    #---------------------------------------------------------------
9891    # Compound List Rule 2:
9892    # If this list is too long for one line, and it is an item of a
9893    # larger list, then we must format it, regardless of sparsity
9894    # (ian.t).  One reason that we have to do this is to trigger
9895    # Compound List Rule 1, above, which causes breaks at all commas of
9896    # all outer lists.  In this way, the structure will be properly
9897    # displayed.
9898    #---------------------------------------------------------------
9899
9900    # Decide if this list is too long for one line unless broken
9901    my $total_columns = table_columns_available($i_opening_paren);
9902    my $too_long      = $packed_columns > $total_columns;
9903
9904    # For a paren list, include the length of the token just before the
9905    # '(' because this is likely a sub call, and we would have to
9906    # include the sub name on the same line as the list.  This is still
9907    # imprecise, but not too bad.  (steve.t)
9908    if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
9909
9910        $too_long =
9911          excess_line_length( $i_opening_minus, $i_effective_last_comma + 1 ) >
9912          0;
9913    }
9914
9915    # FIXME: For an item after a '=>', try to include the length of the thing
9916    # before the '=>'.  This is crude and should be improved by actually
9917    # looking back token by token.
9918    if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
9919        my $i_opening_minus = $i_opening_paren - 4;
9920        if ( $i_opening_minus >= 0 ) {
9921            $too_long =
9922              excess_line_length( $i_opening_minus,
9923                $i_effective_last_comma + 1 ) > 0;
9924        }
9925    }
9926
9927    # EXPERIMENTAL: for -lp, assume that we can get back any needed spaces
9928    # by outdenting  ... NOT
9929    #    if ($too_long && $opening_token eq '(') {
9930    #        my $available_spaces = get_AVAILABLE_SPACES_to_go($i_opening_paren);
9931    #        if ( $available_spaces > 0 ) {
9932    #            $too_long = ( $packed_columns > ( $columns + $available_spaces ) );
9933    #        }
9934    #    }
9935
9936    # Always break lists contained in '[' and '{' if too long for 1 line,
9937    # and always break lists which are too long and part of a more complex
9938    # structure.
9939
9940    my $must_format =
9941      ( $too_long && ( $in_hierarchical_list || $opening_token ne '(' ) );
9942
9943    #print "LISTX: next=$next_nonblank_type  avail cols=$columns packed=$packed_columns must format = $must_format too-long=$too_long  opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines  packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n";
9944
9945    #---------------------------------------------------------------
9946    # The main decision:
9947    # Now decide if we will align the data into aligned columns.  Do not
9948    # attempt to align columns if this is a tiny table or it would be
9949    # too spaced.  It seems that the more packed lines we have, the
9950    # sparser the list that can be allowed and still look ok.
9951    #---------------------------------------------------------------
9952
9953    if (
9954        !$must_format
9955        && ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
9956            || ( $formatted_lines < 2 )
9957            || ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
9958        )
9959      )
9960    {
9961
9962        #---------------------------------------------------------------
9963        # too sparse: would look ugly if aligned in a table;
9964        #---------------------------------------------------------------
9965
9966        # use old breakpoints if this is more than 2 lines
9967        # (otherwise, the list breakup will be at the mercy of the
9968        # standard continuation line break algorithm).
9969        if ( $packed_lines > 2 ) {
9970            write_logfile_entry("List sparse: using old breakpoints\n");
9971            copy_old_breakpoints( $i_first_comma, $i_last_comma );
9972        }
9973
9974        # let the continuation logic handle it if 2 lines
9975        else {
9976            $$rdo_not_break_apart = 1;
9977        }
9978        return;
9979    }
9980
9981    #---------------------------------------------------------------
9982    # looks ok, so go ahead and format the table
9983    #---------------------------------------------------------------
9984    write_logfile_entry(
9985        "List: auto formatting with $number_of_fields fields/row\n");
9986    my $j;
9987
9988    for ( $j = $number_of_fields - 1 ;
9989        $j < $comma_count ; $j += $number_of_fields )
9990    {
9991        my $i = $$rcomma_index[$j];
9992        set_forced_breakpoint($i);
9993    }
9994
9995    # Save list diagnostics during development
9996    FORMATTER_DEBUG_FLAG_LIST && do {
9997        my $pkl = sprintf( "%.1f", $packed_lines );
9998        my $fml = sprintf( "%.1f", $formatted_lines );
9999        write_diagnostics(<<"EOM");
10000List:items=$item_count commas=$comma_count ids=$identifier_count cols=$columns fmt_lines=$fml pkd_lines=$pkl brks=$forced_breakpoint_count
10001  fmt_cols=$formatted_columns pk_cols=$packed_columns unusd=$unused_columns
10002EOM
10003    };
10004
10005    return;
10006}
10007
10008sub table_columns_available {
10009    my $i_first_comma = shift;
10010    my $columns       =
10011      $rOpts_maximum_line_length - leading_spaces_to_go($i_first_comma);
10012
10013    # Patch: the vertical formatter does not line up lines whose lengths
10014    # exactly equal the available line length because of allowances
10015    # that must be made for side comments.  Therefore, the number of
10016    # available columns is reduced by 1 character.
10017    $columns -= 1;
10018    return $columns;
10019}
10020
10021sub maximum_number_of_fields {
10022
10023    my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
10024    my $max_pairs        = int( $columns / $pair_width );
10025    my $number_of_fields = $max_pairs * 2;
10026    if ( $odd_or_even == 1
10027        && $max_pairs * $pair_width + $max_width <= $columns )
10028    {
10029        $number_of_fields++;
10030    }
10031    return $number_of_fields;
10032}
10033
10034sub compactify_table {
10035
10036    # given a table with a certain number of fields and a certain number
10037    # of lines, see if reducing the number of fields will make it look
10038    # better.
10039    my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
10040    if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
10041        my $min_fields;
10042
10043        for ( $min_fields = $number_of_fields ;
10044            $min_fields >= $odd_or_even
10045            && $min_fields * $formatted_lines >= $item_count ;
10046            $min_fields -= $odd_or_even )
10047        {
10048            $number_of_fields = $min_fields;
10049        }
10050    }
10051    return $number_of_fields;
10052}
10053
10054sub copy_old_breakpoints {
10055    my ( $i_first_comma, $i_last_comma ) = @_;
10056    for my $i ( $i_first_comma .. $i_last_comma ) {
10057        if ( $old_breakpoint_to_go[$i] ) {
10058            set_forced_breakpoint($i);
10059        }
10060    }
10061}
10062
10063sub set_nobreaks {
10064    my ( $i, $j ) = @_;
10065    if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
10066
10067        FORMATTER_DEBUG_FLAG_NOBREAK && do {
10068            my ( $a, $b, $c ) = caller();
10069            print(
10070"NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n"
10071            );
10072        };
10073
10074        my $k;
10075        for ( $k = $i ; $k <= $j ; $k++ ) {
10076            $nobreak_to_go[$k] = 1;
10077        }
10078    }
10079
10080    # shouldn't happen; non-critical error
10081    else {
10082        FORMATTER_DEBUG_FLAG_NOBREAK && do {
10083            my ( $a, $b, $c ) = caller();
10084            print(
10085"NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n"
10086            );
10087        };
10088    }
10089}
10090
10091sub set_fake_breakpoint {
10092
10093    # Just bump up the breakpoint count as a signal that there are breaks.
10094    # This is useful if we have breaks but may want to postpone deciding where
10095    # to make them.
10096    $forced_breakpoint_count++;
10097}
10098
10099sub set_forced_breakpoint {
10100    my $i = shift;
10101
10102    # when called with certain tokens, use bond strengths to decide
10103    # if we break before or after it
10104    my $token = $tokens_to_go[$i];
10105    if ( $token =~ /^([\,\:\?]|&&|\|\|)$/ ) {
10106        if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
10107    }
10108
10109    # breaks are forced before 'or' and 'and' for now:
10110    if ( $token eq 'and' || $token eq 'or' ) { $i-- }
10111
10112    if ( $i >= 0 && $i <= $max_index_to_go ) {
10113        my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
10114
10115        FORMATTER_DEBUG_FLAG_FORCE && do {
10116            my ( $a, $b, $c ) = caller();
10117            print
10118"FORCE forced_breakpoint $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n";
10119        };
10120
10121        # Note: I thought it would be best not to set these if the nobreak
10122        # flag is set (since it has priority), but things really looked
10123        # better without doing this check.  The reason is that the really
10124        # complex lines which would trigger this should really be split
10125        # up.  ( break.t )
10126        # if ( $i_nonblank >= 0  && !$nobreak_to_go[$i_nonblank]) {
10127        # So just do this:
10128        if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 ) {
10129            $forced_breakpoint_to_go[$i_nonblank] = 1;
10130
10131            if ( $i_nonblank > $index_max_forced_break ) {
10132                $index_max_forced_break = $i_nonblank;
10133            }
10134            $forced_breakpoint_count++;
10135            $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] =
10136              $i_nonblank;
10137
10138            # if we break at an opening container..break at the closing
10139            if ( $tokens_to_go[$i_nonblank] =~ /^[\{\[\(\?]$/ ) {
10140                set_closing_breakpoint($i_nonblank);
10141            }
10142        }
10143    }
10144}
10145
10146sub clear_breakpoint_undo_stack {
10147    $forced_breakpoint_undo_count = 0;
10148}
10149
10150sub undo_forced_breakpoint_stack {
10151
10152    my $i_start = shift;
10153    if ( $i_start < 0 ) {
10154        $i_start = 0;
10155        my ( $a, $b, $c ) = caller();
10156        warning(
10157"Program Bug: undo_forced_breakpoint_stack from $a $c has i=$i_start "
10158        );
10159    }
10160
10161    while ( $forced_breakpoint_undo_count > $i_start ) {
10162        my $i =
10163          $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
10164        if ( $i >= 0 && $i <= $max_index_to_go ) {
10165            $forced_breakpoint_to_go[$i] = 0;
10166            $forced_breakpoint_count--;
10167
10168            FORMATTER_DEBUG_FLAG_UNDOBP && do {
10169                my ( $a, $b, $c ) = caller();
10170                print(
10171"UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n"
10172                );
10173            };
10174        }
10175
10176        # shouldn't happen, but not a critical error
10177        else {
10178            FORMATTER_DEBUG_FLAG_UNDOBP && do {
10179                my ( $a, $b, $c ) = caller();
10180                print(
10181"Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go"
10182                );
10183            };
10184        }
10185    }
10186}
10187
10188sub recombine_breakpoints {
10189
10190    # sub set_continuation_breaks is very liberal in setting line breaks
10191    # for long lines, always setting breaks at good breakpoints, even
10192    # when that creates small lines.  Occasionally small line fragments
10193    # are produced which would look better if they were combined.
10194    # That's the task of this routine, recombine_breakpoints.
10195    my ( $ri_first, $ri_last ) = @_;
10196    my $more_to_do = 1;
10197
10198    # Keep looping until there are no more possible recombinations
10199    my $nmax_last = @$ri_last;
10200    while ($more_to_do) {
10201        my $n_best = 0;
10202        my $bs_best;
10203        my $n;
10204        my $nmax = @$ri_last - 1;
10205
10206        # safety check..
10207        unless ( $nmax < $nmax_last ) {
10208
10209            # shouldn't happen because splice below decreases nmax on each pass:
10210            # but i get paranoid sometimes
10211            die "Program bug-infinite loop in recombine breakpoints\n";
10212        }
10213        $nmax_last  = $nmax;
10214        $more_to_do = 0;
10215
10216        # loop over all remaining lines...
10217        for $n ( 1 .. $nmax ) {
10218
10219            #----------------------------------------------------------
10220            # Indexes of the endpoints of the two lines are:
10221            #
10222            #  ---left---- | ---right---
10223            #  $if   $imid | $imidr   $il
10224            #
10225            # We want to decide if we should join tokens $imid to $imidr
10226            #----------------------------------------------------------
10227            my $if    = $$ri_first[ $n - 1 ];
10228            my $il    = $$ri_last[$n];
10229            my $imid  = $$ri_last[ $n - 1 ];
10230            my $imidr = $$ri_first[$n];
10231
10232            #print "RECOMBINE: n=$n imid=$imid if=$if type=$types_to_go[$if] =$tokens_to_go[$if] next_type=$types_to_go[$imidr] next_tok=$tokens_to_go[$imidr]\n";
10233
10234            #----------------------------------------------------------
10235            # Start of special recombination rules
10236            # These are ad-hoc rules which have been found to work ok.
10237            # Skip to next pair to avoid re-combination.
10238            #----------------------------------------------------------
10239
10240            # a terminal '{' should stay where it is
10241            next if ( $n == $nmax && $types_to_go[$imidr] eq '{' );
10242
10243            #----------------------------------------------------------
10244            # examine token at $imid  (right end of first line of pair)
10245            #----------------------------------------------------------
10246
10247            # an isolated '}' may join with a ';' terminated segment
10248            if ( $types_to_go[$imid] eq '}' ) {
10249                next unless (
10250
10251                    # join } and ;
10252                    ( ( $if == $imid ) && ( $types_to_go[$il] eq ';' ) )
10253
10254                    # handle '.' below
10255                    || ( $types_to_go[$imidr] eq '.' )
10256                );
10257            }
10258
10259            # for lines ending in a comma...
10260            elsif ( $types_to_go[$imid] eq ',' ) {
10261
10262                # an isolated '},' may join with an identifier + ';'
10263                # this is useful for the class of a 'bless' statement (bless.t)
10264                if ( $types_to_go[$if] eq '}' && $types_to_go[$imidr] eq 'i' ) {
10265                    next
10266                      unless ( ( $if == ( $imid - 1 ) )
10267                        && ( $il == ( $imidr + 1 ) )
10268                        && ( $types_to_go[$il] eq ';' ) );
10269
10270                    # override breakpoint
10271                    $forced_breakpoint_to_go[$imid] = 0;
10272                }
10273
10274                # but otherwise, do not recombine unless this will leave
10275                # just 1 more line
10276                else {
10277                    next unless ( $n + 1 >= $nmax );
10278                }
10279            }
10280
10281            # opening paren..
10282            elsif ( $types_to_go[$imid] eq '(' ) {
10283
10284                # No longer doing this
10285            }
10286
10287            elsif ( $types_to_go[$imid] eq ')' ) {
10288
10289                # No longer doing this
10290            }
10291
10292            # keep a terminal colon
10293            elsif ( $types_to_go[$imid] eq ':' ) {
10294                next;
10295            }
10296
10297            # keep a terminal for-semicolon
10298            elsif ( $types_to_go[$imid] eq 'f' ) {
10299                next;
10300            }
10301
10302            # if '=' at end of line ...
10303            elsif ( $types_to_go[$imid] eq '=' ) {
10304                my $is_math = (
10305                    ( $types_to_go[$il] =~ /^[+-\/\*\)]$/ )
10306
10307                    # note no '$' in pattern because -> can start long identifier
10308                    && !grep { $_ =~ /^(->|=>|[\,])/ }
10309                      @types_to_go[ $imidr .. $il ]
10310                );
10311
10312                # retain the break after the '=' unless ...
10313                next unless (
10314
10315                    # '=' is followed by a number and looks like math
10316                    ( $types_to_go[$imidr] eq 'n' && $is_math )
10317
10318                    # or followed by a scalar and looks like math
10319                    || ( ( $types_to_go[$imidr] eq 'i' )
10320                        && ( $tokens_to_go[$imidr] =~ /^\$/ )
10321                        && $is_math )
10322
10323                    # or followed by a single "short" token ('12' is arbitrary)
10324                    || ( $il == $imidr
10325                        && token_sequence_length( $imidr, $imidr ) < 12 )
10326
10327                );
10328            }
10329
10330            # for keywords..
10331            elsif ( $types_to_go[$imid] eq 'k' ) {
10332
10333                # make major control keywords stand out
10334                # (recombine.t)
10335                next if ( $tokens_to_go[$imid] =~ /^(last|next|redo|return)$/ );
10336            }
10337
10338            #----------------------------------------------------------
10339            # examine token at $imidr (left end of second line of pair)
10340            #----------------------------------------------------------
10341
10342            # do not recombine lines with leading &&, ||, or :
10343            if ( $types_to_go[$imidr] =~ /^(|:|\&\&|\|\|)$/ ) {
10344                next;
10345            }
10346
10347            # Identify and recombine a broken ?/: chain
10348            elsif ( $types_to_go[$imidr] eq '?' ) {
10349
10350                # indexes of line first tokens --
10351                #  mm  - line before previous line
10352                #  f   - previous line
10353                #     <-- this line
10354                #  ff  - next line
10355                #  fff - line after next
10356                my $iff = $n < $nmax ? $$ri_first[ $n + 1 ] : -1;
10357                my $ifff = $n + 2 <= $nmax ? $$ri_first[ $n + 2 ] : -1;
10358                my $imm = $n > 1 ? $$ri_first[ $n - 2 ] : -1;
10359                my $seqno = $type_sequence_to_go[$imidr];
10360
10361                my $f_ok =
10362                  ( $tokens_to_go[$if] eq ':'
10363                      && $type_sequence_to_go[$if] ==
10364                      $seqno - TYPE_SEQUENCE_INCREMENT );
10365                my $mm_ok =
10366                  ( $imm >= 0
10367                      && $tokens_to_go[$imm] eq ':'
10368                      && $type_sequence_to_go[$imm] ==
10369                      $seqno - 2 * TYPE_SEQUENCE_INCREMENT );
10370
10371                my $ff_ok =
10372                  ( $iff > 0
10373                      && $tokens_to_go[$iff] eq ':'
10374                      && $type_sequence_to_go[$iff] == $seqno );
10375                my $fff_ok =
10376                  ( $ifff > 0
10377                      && $tokens_to_go[$ifff] eq ':'
10378                      && $type_sequence_to_go[$ifff] ==
10379                      $seqno + TYPE_SEQUENCE_INCREMENT );
10380
10381                # we require that this '?' be part of a correct sequence
10382                # of 3 in a row or else no recombination is done.
10383                next unless ( ( $ff_ok || $mm_ok ) && ( $f_ok || $fff_ok ) );
10384                $forced_breakpoint_to_go[$imid] = 0;
10385            }
10386
10387            # do not recombine lines with leading '.'
10388            elsif ( $types_to_go[$imidr] =~ /^(\.)$/ ) {
10389                my $i_next_nonblank = $imidr + 1;
10390                if ( $types_to_go[$i_next_nonblank] eq 'b' ) {
10391                    $i_next_nonblank++;
10392                }
10393
10394=pod
10395      ... unless there is just one and we can reduce this to
10396      two lines if we do.  For example, this :
10397
10398                $bodyA .=
10399                  '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
10400
10401      looks better than this:
10402                $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
10403                   . '$args .= $pat;'
10404
10405
10406      ... or this would strand a short token on the last line, like this
10407                . "some long qoute"
10408                . "\n";
10409
10410=cut
10411
10412                next unless (
10413
10414                    (
10415                        $n == 2
10416                        && $n == $nmax
10417                        && $types_to_go[$if] ne $types_to_go[$imidr]
10418                    )
10419
10420                    || (
10421                        (
10422                            (
10423                                $n == $nmax
10424                                && token_sequence_length( $i_next_nonblank,
10425                                    $il ) <
10426                                $rOpts->{'short-concatenation-item-length'} + 1
10427                            ) || $types_to_go[$i_next_nonblank] eq 'Q'
10428                        )
10429                        && $i_next_nonblank <= $il
10430                        && length( $tokens_to_go[$i_next_nonblank] ) <
10431                        $rOpts->{'short-concatenation-item-length'}
10432                    )
10433                );
10434            }
10435
10436            # handle leading keyword..
10437            elsif ( $types_to_go[$imidr] eq 'k' ) {
10438
10439                # handle leading "and" and "or"
10440                if ( $tokens_to_go[$imidr] =~ /^(and|or)$/ ) {
10441
10442                    # Decide if we will combine a single terminal 'and' and
10443                    # 'or' after an 'if' or 'unless'.  We should consider the
10444                    # possible vertical alignment, and visual clutter.
10445
10446=pod
10447
10448    This looks best with the 'and' on the same line as the 'if':
10449
10450        $a = 1
10451          if $seconds and $nu < 2;
10452
10453    But this looks better as shown:
10454
10455        $a = 1
10456          if !$this->{Parents}{$_}
10457          or $this->{Parents}{$_} eq $_;
10458
10459    Eventually, it would be nice to look for similarities (such as 'this' or
10460    'Parents'), but for now I'm using a simple rule that says that the
10461    resulting line length must not be more than half the maximum line length
10462    (making it 80/2 = 40 characters by default).
10463
10464=cut
10465
10466                    next unless (
10467                        $n == $nmax    # if this is the last line
10468                        && $types_to_go[$il] eq ';'  # ending in ';'
10469                        && $types_to_go[$if] eq 'k'  # after an 'if' or 'unless'
10470                        && $tokens_to_go[$if] =~ /^(if|unless)$/
10471
10472                        # and if this doesn't make a long last line
10473                        && total_line_length( $if, $il ) <=
10474                        $rOpts_maximum_line_length / 2
10475                    );
10476
10477                    # override breakpoint
10478                    $forced_breakpoint_to_go[$imid] = 0;
10479                }
10480
10481                # handle leading "if" and "unless"
10482                elsif ( $tokens_to_go[$imidr] =~ /^(if|unless)$/ ) {
10483
10484=pod
10485
10486FIXME: This is experimental..may not be too useful
10487
10488=cut
10489
10490                    next unless (
10491                        $n == $nmax    # if this is the last line
10492                        && $types_to_go[$il] eq ';'  # ending in ';'
10493                        && $types_to_go[$if] eq 'k'  # after an 'if' or 'unless'
10494                        && $tokens_to_go[$if] =~ /^(and|or)$/
10495
10496                        # and if this doesn't make a long last line
10497                        && total_line_length( $if, $il ) <=
10498                        $rOpts_maximum_line_length / 2
10499                    );
10500
10501                    # override breakpoint
10502                    $forced_breakpoint_to_go[$imid] = 0;
10503                }
10504
10505                # handle all other leading keywords
10506                else {
10507
10508                    # keywords look best at start of lines,
10509                    # but combine things like "1 while"
10510                    next
10511                      if ( ( $types_to_go[$imid] ne 'k' )
10512                        && ( $tokens_to_go[$imidr] !~ /^(while)$/ ) );
10513                }
10514            }
10515
10516            # similar treatment of && and || as above for 'and' and 'or':
10517            elsif ( $types_to_go[$imidr] =~ /^(&&|\|\|)$/ ) {
10518
10519                # maybe looking at something like:
10520                #   unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
10521
10522                next unless (
10523                    $n == $nmax    # if this is the last line
10524                    && $types_to_go[$il] eq ';'    # ending in ';'
10525                    && $types_to_go[$if] eq 'k'    # after an 'if' or 'unless'
10526                    && $tokens_to_go[$if] =~ /^(if|unless)$/
10527
10528                    # and if this doesn't make a long last line
10529                    && total_line_length( $if, $il ) <=
10530                    $rOpts_maximum_line_length / 2
10531                );
10532
10533                # override breakpoint
10534                $forced_breakpoint_to_go[$imid] = 0;
10535            }
10536
10537            # honor hard breakpoints
10538            next if ( $forced_breakpoint_to_go[$imid] > 0 );
10539
10540            #----------------------------------------------------------
10541            # end of special recombination rules
10542            #----------------------------------------------------------
10543
10544            my $bs = $bond_strength_to_go[$imid];
10545
10546            # combined line cannot be too long
10547            next
10548              if excess_line_length( $if, $il ) > 0;
10549
10550            # do not recombine if we would skip in indentation levels
10551            if ( $n < $nmax ) {
10552                my $if_next = $$ri_first[ $n + 1 ];
10553                next
10554                  if (
10555                    $levels_to_go[$if] < $levels_to_go[$imidr]
10556                    && $levels_to_go[$imidr] < $levels_to_go[$if_next]
10557
10558                    # but an isolated 'if (' is undesirable
10559                    && !(
10560                        $n == 1
10561                        && $imid - $if <= 2
10562                        && $types_to_go[$if]  eq 'k'
10563                        && $tokens_to_go[$if] eq 'if'
10564                        && $tokens_to_go[$imid] ne '('
10565                    )
10566
10567                    #
10568                  );
10569            }
10570
10571            # honor no-break's
10572            next if ( $bs == NO_BREAK );
10573
10574            # remember the pair with the greatest bond strength
10575            if ( !$n_best ) {
10576                $n_best  = $n;
10577                $bs_best = $bs;
10578            }
10579            else {
10580
10581                if ( $bs > $bs_best ) {
10582                    $n_best  = $n;
10583                    $bs_best = $bs;
10584                }
10585
10586                # we have 2 or more candidates, so need another pass
10587                $more_to_do++;
10588            }
10589        }
10590
10591        # recombine the pair with the greatest bond strength
10592        if ($n_best) {
10593            splice @$ri_first, $n_best, 1;
10594            splice @$ri_last, $n_best - 1, 1;
10595        }
10596    }
10597    return ( $ri_first, $ri_last );
10598}
10599
10600sub set_continuation_breaks {
10601
10602    # Define an array of indexes for inserting newline characters to
10603    # keep the line lengths below the maximum desired length.  There is
10604    # an implied break after the last token, so it need not be included.
10605    # We'll break at points where the bond strength is lowest.
10606    my $saw_good_break = shift;
10607    my @i_first        = ();      # the first index to output
10608    my @i_last         = ();      # the last index to output
10609    my @i_colon_breaks = ();      # needed to decide if we have to break at ?'s
10610    if ( $tokens_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
10611
10612    set_bond_strengths();
10613
10614    my $imin = 0;
10615    my $imax = $max_index_to_go;
10616    if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
10617    if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
10618    my $i_begin = $imin;
10619
10620    my $leading_spaces          = leading_spaces_to_go($imin);
10621    my $line_count              = 0;
10622    my $last_break_strength     = NO_BREAK;
10623    my $i_last_break            = -1;
10624    my $max_bias                = 0.001;
10625    my $leading_alignment_token = "";
10626    my $leading_alignment_type  = "";
10627
10628    # see if any ?/:'s are in order
10629    my $colons_in_order = 1;
10630    my $last_tok        = "";
10631    my @colon_list = grep /^[\?\:]$/, @tokens_to_go[ 0 .. $max_index_to_go ];
10632    foreach my $tok (@colon_list) {
10633        if ( $tok eq $last_tok ) { $colons_in_order = 0; last }
10634        $last_tok = $tok;
10635    }
10636
10637    # This is a sufficient but not necessary condition for colon chain
10638    my $is_colon_chain = ( $colons_in_order && @colon_list > 2 );
10639
10640    while ( $i_begin <= $imax ) {
10641        my $lowest_strength        = NO_BREAK;
10642        my $starting_sum           = $lengths_to_go[$i_begin];
10643        my $i_lowest               = -1;
10644        my $i_test                 = -1;
10645        my $lowest_next_token      = '';
10646        my $lowest_next_type       = 'b';
10647        my $i_lowest_next_nonblank = -1;
10648
10649        # loop to find next break point
10650        for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {
10651            my $type            = $types_to_go[$i_test];
10652            my $token           = $tokens_to_go[$i_test];
10653            my $next_type       = $types_to_go[ $i_test + 1 ];
10654            my $next_token      = $tokens_to_go[ $i_test + 1 ];
10655            my $i_next_nonblank =
10656              ( ( $next_type eq 'b' ) ? $i_test + 2 : $i_test + 1 );
10657            my $next_nonblank_type       = $types_to_go[$i_next_nonblank];
10658            my $next_nonblank_token      = $tokens_to_go[$i_next_nonblank];
10659            my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
10660            my $strength                 = $bond_strength_to_go[$i_test];
10661            my $must_break               = 0;
10662
10663            if (
10664
10665                # Try to put a break where requested by scan_list
10666                $forced_breakpoint_to_go[$i_test]
10667
10668                # break between ) { in a continued line so that the '{' can
10669                # be outdented
10670                # See similar logic in scan_list which catches instances
10671                # where a line is just something like ') {'
10672                || ( $line_count
10673                    && ( $token              eq ')' )
10674                    && ( $next_nonblank_type eq '{' )
10675                    && ($next_nonblank_block_type)
10676                    && !$rOpts->{'opening-brace-always-on-right'} )
10677
10678                # There is an implied forced break at a terminal opening brace
10679                || ( ( $type eq '{' ) && ( $i_test == $imax ) )
10680
10681              )
10682            {
10683
10684                # Forced breakpoints must sometimes be overridden because of a
10685                # side comment causing a NO_BREAK.  It is easier to catch this
10686                # here than when they are set.
10687                if ( $strength < NO_BREAK ) {
10688                    $strength   = $lowest_strength / 2;
10689                    $must_break = 1;
10690                }
10691            }
10692
10693            # quit if a break here would put a good terminal token on
10694            # the next line and we already have a possible break
10695            if (
10696                !$must_break
10697                && ( $next_nonblank_type =~ /^[\;\,]$/ )
10698                && (
10699                    (
10700                        $leading_spaces + $lengths_to_go[ $i_next_nonblank + 1 ]
10701                        - $starting_sum
10702                    ) > $rOpts_maximum_line_length
10703                )
10704              )
10705            {
10706                last if ( $i_lowest >= 0 );
10707            }
10708
10709            # Avoid a break which would strand a single punctuation
10710            # token.  For example, we do not want to strand a leading
10711            # '.' which is followed by a long quoted string.
10712            if (
10713                !$must_break
10714                && ( $i_test == $i_begin )
10715                && ( $i_test < $imax )
10716                && ( $token eq $type )
10717                && (
10718                    (
10719                        $leading_spaces + $lengths_to_go[ $i_test + 1 ] -
10720                        $starting_sum
10721                    ) <= $rOpts_maximum_line_length
10722                )
10723              )
10724            {
10725                $i_test++;
10726
10727                if ( ( $i_test < $imax ) && ( $next_type eq 'b' ) ) {
10728                    $i_test++;
10729                }
10730                redo;
10731            }
10732
10733            if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) )
10734            {
10735
10736                # break at previous best break if it would have produced
10737                # a leading alignment of certain common tokens, and it
10738                # is different from the latest candidate break
10739                last
10740                  if (
10741                    $leading_alignment_type
10742                    && ( $next_nonblank_type ne $leading_alignment_type
10743                        || $next_nonblank_token ne $leading_alignment_token )
10744                  );
10745
10746                # Force at least one breakpoint if old code had good break
10747                # It is only called if a breakpoint is required or desired.
10748                #print "at tok=$token next=$next_nonblank_token str=$strength i=$i_test i-last=$i_last_break i_low= $i_lowest str= $lowest_strength\n";
10749                last if (
10750                    $i_test == $imax                # we are at the end
10751                    && !$forced_breakpoint_count    #
10752                    && $saw_good_break              # old line had good break
10753                    && $type eq ';'                 # and this line ends in a ;
10754                    && $i_last_break < 0        # and we haven't made a break
10755                    && $i_lowest > 0            # and we saw a possible break
10756                    && $i_lowest < $imax - 1    # (but not just before this ;)
10757                    && $strength - $lowest_strength < 0.5 * WEAK # and it's good
10758                );
10759
10760                $lowest_strength        = $strength;
10761                $i_lowest               = $i_test;
10762                $lowest_next_token      = $next_nonblank_token;
10763                $lowest_next_type       = $next_nonblank_type;
10764                $i_lowest_next_nonblank = $i_next_nonblank;
10765                last if $must_break;
10766
10767                # set flags to remember if a break here will produce a
10768                # leading alignment of certain common tokens
10769                if (
10770                    $line_count > 0
10771                    && $i_test < $imax
10772                    && ( $lowest_strength - $last_break_strength <= $max_bias )
10773                    && ( $types_to_go[$i_begin] =~ /^(\.|\&\&|\|\||:)$/
10774                        && $types_to_go[$i_begin] eq $next_nonblank_type )
10775                    || ( $tokens_to_go[$i_begin] =~ /^(and|or)$/
10776                        && $tokens_to_go[$i_begin] eq $next_nonblank_token )
10777                  )
10778                {
10779                    $leading_alignment_token = $next_nonblank_token;
10780                    $leading_alignment_type  = $next_nonblank_type;
10781                }
10782            }
10783
10784            my $too_long = ( $i_test >= $imax )
10785              ? 1
10786              : (
10787                (
10788                    $leading_spaces + $lengths_to_go[ $i_test + 2 ] -
10789                      $starting_sum
10790                ) > $rOpts_maximum_line_length
10791              );
10792
10793            FORMATTER_DEBUG_FLAG_BREAK
10794              && print
10795"BREAK: testing i = $i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type leading sp=($leading_spaces) next length = $lengths_to_go[$i_test+2] too_long=$too_long str=$strength\n";
10796
10797            # allow one extra terminal token after exceeding line length
10798            # if it would strand this token.
10799            if ( $rOpts_fuzzy_line_length
10800                && $too_long
10801                && ( $i_lowest == $i_test )
10802                && ( length($token) > 1 )
10803                && ( $next_nonblank_type =~ /^[\;\,]$/ ) )
10804            {
10805                $too_long = 0;
10806            }
10807
10808            last if (
10809                ( $i_test == $imax )    # we're done if no more tokens,
10810                || (
10811                    ( $i_lowest >= 0 )    # or no more space and we have a break
10812                    && $too_long
10813                )
10814            );
10815        }
10816
10817        # it's always ok to break at imax if no other break was found
10818        if ( $i_lowest < 0 ) { $i_lowest = $imax }
10819
10820        # semi-final index calculation
10821        my $i_next_nonblank =
10822          ( ( $types_to_go[ $i_lowest + 1 ] eq 'b' )
10823            ? $i_lowest + 2
10824            : $i_lowest + 1 );
10825        my $next_nonblank_type  = $types_to_go[$i_next_nonblank];
10826        my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
10827
10828        #-------------------------------------------------------
10829        # ?/: rule 1 : if a break here will separate a '?' on this
10830        # line from its closing ':', then break at the '?' instead.
10831        #-------------------------------------------------------
10832        foreach my $i ( $i_begin + 1 .. $i_lowest - 1 ) {
10833            next unless ( $tokens_to_go[$i] eq '?' );
10834
10835            # do not break if probable sequence of ?/: statements
10836            next if ($is_colon_chain);
10837
10838            # do not break if statement is broken by side comment
10839            next
10840              if (
10841                $tokens_to_go[$max_index_to_go] eq '#'
10842                && terminal_type( \@types_to_go, \@block_type_to_go, 0,
10843                    $max_index_to_go ) !~ /^[\;\}]$/
10844              );
10845
10846            # no break needed if matching : is also on the line
10847            next
10848              if ( $mate_index_to_go[$i] >= 0
10849                && $mate_index_to_go[$i] <= $i_next_nonblank );
10850
10851            $i_lowest = $i;
10852            if ( $want_break_before{'?'} ) { $i_lowest-- }
10853            last;
10854        }
10855
10856        # final index calculation
10857        $i_next_nonblank =
10858          ( ( $types_to_go[ $i_lowest + 1 ] eq 'b' )
10859            ? $i_lowest + 2
10860            : $i_lowest + 1 );
10861        $next_nonblank_type  = $types_to_go[$i_next_nonblank];
10862        $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
10863
10864        FORMATTER_DEBUG_FLAG_BREAK
10865          && print "BREAK: best is i = $i_lowest strength = $lowest_strength\n";
10866
10867        #-------------------------------------------------------
10868        # ?/: rule 2 : if we break at a '?', then break at its ':'
10869        #
10870        # Note: this rule is also in sub scan_list to handle a break
10871        # at the start and end of a line (in case breaks are dictated
10872        # by side comments).
10873        #-------------------------------------------------------
10874        if ( $next_nonblank_type eq '?' ) {
10875            set_closing_breakpoint($i_next_nonblank);
10876        }
10877        elsif ( $types_to_go[$i_lowest] eq '?' ) {
10878            set_closing_breakpoint($i_lowest);
10879        }
10880
10881        #-------------------------------------------------------
10882        # ?/: rule 3 : if we break at a ':' then we save
10883        # its location for further work below.  We may need to go
10884        # back and break at its '?'.
10885        #-------------------------------------------------------
10886        if ( $next_nonblank_type eq ':' ) {
10887            push @i_colon_breaks, $i_next_nonblank;
10888        }
10889        elsif ( $types_to_go[$i_lowest] eq ':' ) {
10890            push @i_colon_breaks, $i_lowest;
10891        }
10892
10893        # here we should set breaks for all '?'/':' pairs which are
10894        # separated by this line
10895
10896        $line_count++;
10897
10898        # save this line segment, after trimming blanks at the ends
10899        push ( @i_first,
10900            ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
10901        push ( @i_last,
10902            ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
10903
10904        # set a forced breakpoint at a container opening, if necessary, to
10905        # signal a break at a closing container.  Excepting '(' for now.
10906        if ( $tokens_to_go[$i_lowest] =~ /^[\{\[]$/
10907            && !$forced_breakpoint_to_go[$i_lowest] )
10908        {
10909            set_closing_breakpoint($i_lowest);
10910        }
10911
10912        # get ready to go again
10913        $i_begin                 = $i_lowest + 1;
10914        $last_break_strength     = $lowest_strength;
10915        $i_last_break            = $i_lowest;
10916        $leading_alignment_token = "";
10917        $leading_alignment_type  = "";
10918        $lowest_next_token       = '';
10919        $lowest_next_type        = 'b';
10920
10921        if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
10922            $i_begin++;
10923        }
10924
10925        # update indentation size
10926        if ( $i_begin <= $imax ) {
10927            $leading_spaces = leading_spaces_to_go($i_begin);
10928        }
10929    }
10930
10931    #-------------------------------------------------------
10932    # ?/: rule 4 -- if we broke at a ':', then break at
10933    # corresponding '?' unless this is a chain of ?: expressions
10934    #-------------------------------------------------------
10935    if (@i_colon_breaks) {
10936
10937        # using a simple method for deciding if we are in a ?/: chain --
10938        # this is a chain if it has multiple ?/: pairs all in order;
10939        # otherwise not.
10940        # Note that if line starts in a ':' we count that above as a break
10941        my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
10942
10943        unless ($is_chain) {
10944            my @insert_list = ();
10945            foreach my $i (@i_colon_breaks) {
10946                my $i_question = $mate_index_to_go[$i];
10947                if ( $i_question >= 0 ) {
10948                    if ( $want_break_before{'?'} ) { $i_question-- }
10949                    if ( $i_question >= 0 ) {
10950                        push @insert_list, $i_question;
10951                    }
10952                }
10953                insert_additional_breaks( \@insert_list, \@i_first, \@i_last );
10954            }
10955        }
10956    }
10957    return \@i_first, \@i_last;
10958}
10959
10960sub insert_additional_breaks {
10961
10962    # this routine will add line breaks at requested locations after
10963    # sub set_continuation_breaks has made preliminary breaks.
10964
10965    my ( $ri_break_list, $ri_first, $ri_last ) = @_;
10966    my $i_f;
10967    my $i_l;
10968    my $line_number = 0;
10969    foreach my $i_break_left ( sort @$ri_break_list ) {
10970
10971        $i_f = $$ri_first[$line_number];
10972        $i_l = $$ri_last[$line_number];
10973        while ( $i_break_left >= $i_l ) {
10974            $line_number++;
10975
10976            # shouldn't happen unless caller passes bad indexes
10977            if ( $line_number >= @$ri_last ) {
10978                warning(
10979"Non-fatal program bug: couldn't set break at $i_break_left\n"
10980                );
10981                report_definite_bug();
10982                return;
10983            }
10984            $i_f = $$ri_first[$line_number];
10985            $i_l = $$ri_last[$line_number];
10986        }
10987
10988        my $i_break_right = $i_break_left + 1;
10989        if ( $types_to_go[$i_break_right] eq 'b' ) { $i_break_right++ }
10990
10991        if ( $i_break_left >= $i_f
10992            && $i_break_left < $i_l
10993            && $i_break_right > $i_f
10994            && $i_break_right <= $i_l )
10995        {
10996            splice( @$ri_first, $line_number, 1, ( $i_f, $i_break_right ) );
10997            splice( @$ri_last, $line_number, 1, ( $i_break_left, $i_l ) );
10998        }
10999    }
11000}
11001
11002sub set_closing_breakpoint {
11003
11004    # set a breakpoint at a matching closing token
11005    # at present, this is only used to break at a ':' which matches a '?'
11006    my $i_break = shift;
11007
11008    if ( $mate_index_to_go[$i_break] >= 0 ) {
11009
11010        # watch out for break between something like '()'
11011        # which can occur under certain error conditions.
11012        # -- infinte recursion will occur (attrib.t)
11013        if ( $mate_index_to_go[$i_break] > $i_break + 1 ) {
11014
11015            # break before } ] and ), but sub set_forced_breakpoint will decide
11016            # to break before or after a ? and :
11017            my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
11018            set_forced_breakpoint( $mate_index_to_go[$i_break] - $inc );
11019        }
11020    }
11021    else {
11022        my $type_sequence = $type_sequence_to_go[$i_break];
11023        if ($type_sequence) {
11024            my $closing_token = $matching_token{ $tokens_to_go[$i_break] };
11025            $postponed_breakpoint{$type_sequence} = 1;
11026        }
11027    }
11028}
11029
11030# check to see if output line tabbing agrees with input line
11031# this can be very useful for debugging a script which has an extra
11032# or missing brace
11033sub compare_indentation_levels {
11034
11035    my ( $python_indentation_level, $structural_indentation_level ) = @_;
11036    if ( ( $python_indentation_level ne $structural_indentation_level )
11037        && ( PerlTidy::Tokenizer::know_input_tabstr() ) )
11038    {
11039        $last_tabbing_disagreement = $input_line_number;
11040
11041        if ($in_tabbing_disagreement) {
11042        }
11043        else {
11044            $tabbing_disagreement_count++;
11045
11046            if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
11047                write_logfile_entry(
11048"Start indentation disagreement: input=$python_indentation_level; output=$structural_indentation_level\n"
11049                );
11050            }
11051            $in_tabbing_disagreement    = $input_line_number;
11052            $first_tabbing_disagreement = $in_tabbing_disagreement
11053              unless ($first_tabbing_disagreement);
11054        }
11055    }
11056    else {
11057
11058        if ($in_tabbing_disagreement) {
11059
11060            if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
11061                write_logfile_entry(
11062"End indentation disagreement from input line $in_tabbing_disagreement\n"
11063                );
11064
11065                if ( $tabbing_disagreement_count == MAX_NAG_MESSAGES ) {
11066                    write_logfile_entry(
11067                        "No further tabbing disagreements will be noted\n");
11068                }
11069            }
11070            $in_tabbing_disagreement = 0;
11071        }
11072    }
11073    $input_line_tabbing = undef;    # deactivate test for this input line
11074}
11075
11076#####################################################################
11077#
11078# the PerlTidy::IndentationItem class supplies items which contain
11079# how much whitespace should be used at the start of a line
11080#
11081#####################################################################
11082
11083package PerlTidy::IndentationItem;
11084
11085# Indexes for indentation items
11086use constant SPACES             => 0;     # total leading white spaces
11087use constant LEVEL              => 1;     # the indentation 'level'
11088use constant CI_LEVEL           => 2;     # the 'continuation level'
11089use constant AVAILABLE_SPACES   => 3;     # how many left spaces available
11090                                          # for this level
11091use constant CLOSED             => 4;     # index where we saw closing '}'
11092use constant COMMA_COUNT        => 5;     # how many commas at this level?
11093use constant SEQUENCE_NUMBER    => 6;     # output batch number
11094use constant INDEX              => 7;     # index in output batch list
11095use constant HAVE_CHILD         => 8;     # any dependents?
11096use constant RECOVERABLE_SPACES => 9;     # how many spaces to the right
11097                                          # we would like to move to get
11098                                          # alignment (negative if left)
11099use constant ALIGN_PAREN        => 10;    # do we want to try to align
11100                                          # with an opening structure?
11101use constant MARKED             => 11;    # if visited by corrector logic
11102use constant STACK_DEPTH        => 12;    # indentation nesting depth
11103use constant STARTING_INDEX     => 13;    # first token index of this level
11104use constant LAST_EQUALS        => 14;    # index of last '=' in this batch
11105
11106sub new {
11107
11108    # Create an 'indentation_item' which describes one level of leading
11109    # whitespace when the '-lp' indentation is used.  We return
11110    # a reference to an anonymous array of associated variables.
11111    # See constants _xxx for storage scheme.
11112    my (
11113        $class,               $spaces,           $level,
11114        $ci_level,            $available_spaces, $index,
11115        $gnu_sequence_number, $align_paren,      $stack_depth,
11116        $starting_index,
11117      )
11118      = @_;
11119    my $closed            = -1;
11120    my $last_equals       = -1;
11121    my $comma_count       = 0;
11122    my $have_child        = 0;
11123    my $want_right_spaces = 0;
11124    my $marked            = 0;
11125    bless [
11126        $spaces,              $level,          $ci_level,
11127        $available_spaces,    $closed,         $comma_count,
11128        $gnu_sequence_number, $index,          $have_child,
11129        $want_right_spaces,   $align_paren,    $marked,
11130        $stack_depth,         $starting_index, $last_equals,
11131    ], $class;
11132}
11133
11134sub permanently_decrease_AVAILABLE_SPACES {
11135
11136    # make a permanent reduction in the available indentation spaces
11137    # at one indentation item.  NOTE: if there are child nodes, their
11138    # total SPACES must be reduced by the caller.
11139
11140    my ( $item, $spaces_needed ) = @_;
11141    my $available_spaces = $item->get_AVAILABLE_SPACES();
11142    my $deleted_spaces   =
11143      ( $available_spaces > $spaces_needed )
11144      ? $spaces_needed
11145      : $available_spaces;
11146    $item->decrease_AVAILABLE_SPACES($deleted_spaces);
11147    $item->decrease_SPACES($deleted_spaces);
11148    $item->set_RECOVERABLE_SPACES(0);
11149
11150    return $deleted_spaces;
11151}
11152
11153sub tentatively_decrease_AVAILABLE_SPACES {
11154
11155    # We are asked to tentatively delete $spaces_needed of indentation
11156    # for a indentation item.  We may want to undo this later.  NOTE: if
11157    # there are child nodes, their total SPACES must be reduced by the
11158    # caller.
11159    my ( $item, $spaces_needed ) = @_;
11160    my $available_spaces = $item->get_AVAILABLE_SPACES();
11161    my $deleted_spaces   =
11162      ( $available_spaces > $spaces_needed )
11163      ? $spaces_needed
11164      : $available_spaces;
11165    $item->decrease_AVAILABLE_SPACES($deleted_spaces);
11166    $item->decrease_SPACES($deleted_spaces);
11167    $item->increase_RECOVERABLE_SPACES($deleted_spaces);
11168    return $deleted_spaces;
11169}
11170
11171sub get_STACK_DEPTH {
11172    my $self = shift;
11173    return $self->[STACK_DEPTH];
11174}
11175
11176sub get_SPACES {
11177    my $self = shift;
11178    return $self->[SPACES];
11179}
11180
11181sub get_MARKED {
11182    my $self = shift;
11183    return $self->[MARKED];
11184}
11185
11186sub set_MARKED {
11187    my ( $self, $value ) = @_;
11188    if ( defined($value) ) {
11189        $self->[MARKED] = $value;
11190    }
11191    return $self->[MARKED];
11192}
11193
11194sub get_AVAILABLE_SPACES {
11195    my $self = shift;
11196    return $self->[AVAILABLE_SPACES];
11197}
11198
11199sub decrease_SPACES {
11200    my ( $self, $value ) = @_;
11201    if ( defined($value) ) {
11202        $self->[SPACES] -= $value;
11203    }
11204    return $self->[SPACES];
11205}
11206
11207sub decrease_AVAILABLE_SPACES {
11208    my ( $self, $value ) = @_;
11209    if ( defined($value) ) {
11210        $self->[AVAILABLE_SPACES] -= $value;
11211    }
11212    return $self->[AVAILABLE_SPACES];
11213}
11214
11215sub get_ALIGN_PAREN {
11216    my $self = shift;
11217    return $self->[ALIGN_PAREN];
11218}
11219
11220sub get_RECOVERABLE_SPACES {
11221    my $self = shift;
11222    return $self->[RECOVERABLE_SPACES];
11223}
11224
11225sub set_RECOVERABLE_SPACES {
11226    my ( $self, $value ) = @_;
11227    if ( defined($value) ) {
11228        $self->[RECOVERABLE_SPACES] = $value;
11229    }
11230    return $self->[RECOVERABLE_SPACES];
11231}
11232
11233sub increase_RECOVERABLE_SPACES {
11234    my ( $self, $value ) = @_;
11235    if ( defined($value) ) {
11236        $self->[RECOVERABLE_SPACES] += $value;
11237    }
11238    return $self->[RECOVERABLE_SPACES];
11239}
11240
11241sub get_CI_LEVEL {
11242    my $self = shift;
11243    return $self->[CI_LEVEL];
11244}
11245
11246sub get_LEVEL {
11247    my $self = shift;
11248    return $self->[LEVEL];
11249}
11250
11251sub get_SEQUENCE_NUMBER {
11252    my $self = shift;
11253    return $self->[SEQUENCE_NUMBER];
11254}
11255
11256sub get_INDEX {
11257    my $self = shift;
11258    return $self->[INDEX];
11259}
11260
11261sub get_STARTING_INDEX {
11262    my $self = shift;
11263    return $self->[STARTING_INDEX];
11264}
11265
11266sub set_HAVE_CHILD {
11267    my ( $self, $value ) = @_;
11268    if ( defined($value) ) {
11269        $self->[HAVE_CHILD] = $value;
11270    }
11271    return $self->[HAVE_CHILD];
11272}
11273
11274sub get_HAVE_CHILD {
11275    my $self = shift;
11276    return $self->[HAVE_CHILD];
11277}
11278
11279sub set_LAST_EQUALS {
11280    my ( $self, $value ) = @_;
11281    if ( defined($value) ) {
11282        $self->[LAST_EQUALS] = $value;
11283    }
11284    return $self->[LAST_EQUALS];
11285}
11286
11287sub get_LAST_EQUALS {
11288    my $self = shift;
11289    return $self->[LAST_EQUALS];
11290}
11291
11292sub set_COMMA_COUNT {
11293    my ( $self, $value ) = @_;
11294    if ( defined($value) ) {
11295        $self->[COMMA_COUNT] = $value;
11296    }
11297    return $self->[COMMA_COUNT];
11298}
11299
11300sub get_COMMA_COUNT {
11301    my $self = shift;
11302    return $self->[COMMA_COUNT];
11303}
11304
11305sub set_CLOSED {
11306    my ( $self, $value ) = @_;
11307    if ( defined($value) ) {
11308        $self->[CLOSED] = $value;
11309    }
11310    return $self->[CLOSED];
11311}
11312
11313sub get_CLOSED {
11314    my $self = shift;
11315    return $self->[CLOSED];
11316}
11317
11318#####################################################################
11319#
11320# the PerlTidy::VerticalAligner::Line class supplies an object to
11321# contain a single output line
11322#
11323#####################################################################
11324
11325package PerlTidy::VerticalAligner::Line;
11326
11327{
11328
11329    use strict;
11330    use Carp;
11331
11332    use constant JMAX                    => 0;
11333    use constant JMAX_ORIGINAL_LINE      => 1;
11334    use constant RTOKENS                 => 2;
11335    use constant RFIELDS                 => 3;
11336    use constant RPATTERNS               => 4;
11337    use constant INDENTATION             => 5;
11338    use constant LEADING_SPACE_COUNT     => 6;
11339    use constant OUTDENT_LONG_LINES      => 7;
11340    use constant LIST_TYPE               => 8;
11341    use constant IS_HANGING_SIDE_COMMENT => 9;
11342    use constant RALIGNMENTS             => 10;
11343    use constant MAXIMUM_LINE_LENGTH     => 11;
11344
11345    my %_index_map;
11346    $_index_map{jmax}                    = JMAX;
11347    $_index_map{jmax_original_line}      = JMAX_ORIGINAL_LINE;
11348    $_index_map{rtokens}                 = RTOKENS;
11349    $_index_map{rfields}                 = RFIELDS;
11350    $_index_map{rpatterns}               = RPATTERNS;
11351    $_index_map{indentation}             = INDENTATION;
11352    $_index_map{leading_space_count}     = LEADING_SPACE_COUNT;
11353    $_index_map{outdent_long_lines}      = OUTDENT_LONG_LINES;
11354    $_index_map{list_type}               = LIST_TYPE;
11355    $_index_map{is_hanging_side_comment} = IS_HANGING_SIDE_COMMENT;
11356    $_index_map{ralignments}             = RALIGNMENTS;
11357    $_index_map{maximum_line_length}     = MAXIMUM_LINE_LENGTH;
11358
11359    my @_default_data = ();
11360    $_default_data[JMAX]                    = undef;
11361    $_default_data[JMAX_ORIGINAL_LINE]      = undef;
11362    $_default_data[RTOKENS]                 = undef;
11363    $_default_data[RFIELDS]                 = undef;
11364    $_default_data[RPATTERNS]               = undef;
11365    $_default_data[INDENTATION]             = undef;
11366    $_default_data[LEADING_SPACE_COUNT]     = undef;
11367    $_default_data[OUTDENT_LONG_LINES]      = undef;
11368    $_default_data[LIST_TYPE]               = undef;
11369    $_default_data[IS_HANGING_SIDE_COMMENT] = undef;
11370    $_default_data[RALIGNMENTS]             = [];
11371    $_default_data[MAXIMUM_LINE_LENGTH]     = undef;
11372
11373    {
11374
11375        # methods to count object population
11376        my $_count = 0;
11377        sub get_count        { $_count; }
11378        sub _increment_count { ++$_count }
11379        sub _decrement_count { --$_count }
11380    }
11381
11382    # Constructor may be called as a class method
11383    sub new {
11384        my ( $caller, %arg ) = @_;
11385        my $caller_is_obj = ref($caller);
11386        my $class = $caller_is_obj || $caller;
11387        no strict "refs";
11388        my $self = bless [], $class;
11389
11390        $self->[RALIGNMENTS] = [];
11391
11392        foreach my $member ( keys %_index_map ) {
11393            my $index = $_index_map{$member};
11394            if ( exists $arg{$member} ) { $self->[$index] = $arg{$member} }
11395            elsif ($caller_is_obj) { $self->[$index] = $caller->[$index] }
11396            else { $self->[$index] = $_default_data[$index] }
11397        }
11398
11399        $self->_increment_count();
11400        return $self;
11401    }
11402
11403    sub DESTROY {
11404        $_[0]->_decrement_count();
11405    }
11406
11407    sub get_jmax                    { $_[0]->[JMAX] }
11408    sub get_jmax_original_line      { $_[0]->[JMAX_ORIGINAL_LINE] }
11409    sub get_rtokens                 { $_[0]->[RTOKENS] }
11410    sub get_rfields                 { $_[0]->[RFIELDS] }
11411    sub get_rpatterns               { $_[0]->[RPATTERNS] }
11412    sub get_indentation             { $_[0]->[INDENTATION] }
11413    sub get_leading_space_count     { $_[0]->[LEADING_SPACE_COUNT] }
11414    sub get_outdent_long_lines      { $_[0]->[OUTDENT_LONG_LINES] }
11415    sub get_list_type               { $_[0]->[LIST_TYPE] }
11416    sub get_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] }
11417
11418    sub set_column     { $_[0]->[RALIGNMENTS]->[ $_[1] ]->set_column( $_[2] ) }
11419    sub get_alignment  { $_[0]->[RALIGNMENTS]->[ $_[1] ] }
11420    sub get_alignments { @{ $_[0]->[RALIGNMENTS] } }
11421    sub get_column     { $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_column() }
11422
11423    sub get_starting_column {
11424        $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_starting_column();
11425    }
11426
11427    sub increment_column {
11428        $_[0]->[RALIGNMENTS]->[ $_[1] ]->increment_column( $_[2] );
11429    }
11430    sub set_alignments { my $self = shift; @{ $self->[RALIGNMENTS] } = @_; }
11431
11432    sub current_field_width {
11433        my $self = shift;
11434        my ($j) = @_;
11435        if ( $j == 0 ) {
11436            return $self->get_column($j);
11437        }
11438        else {
11439            return $self->get_column($j) - $self->get_column( $j - 1 );
11440        }
11441    }
11442
11443    sub field_width_growth {
11444        my $self = shift;
11445        my $j    = shift;
11446        return $self->get_column($j) - $self->get_starting_column($j);
11447    }
11448
11449    sub starting_field_width {
11450        my $self = shift;
11451        my $j    = shift;
11452        if ( $j == 0 ) {
11453            return $self->get_starting_column($j);
11454        }
11455        else {
11456            return $self->get_starting_column($j) -
11457              $self->get_starting_column( $j - 1 );
11458        }
11459    }
11460
11461    sub increase_field_width {
11462
11463        my $self = shift;
11464        my ( $j, $pad ) = @_;
11465        my $jmax = $self->get_jmax();
11466        for my $k ( $j .. $jmax ) {
11467            $self->increment_column( $k, $pad );
11468        }
11469    }
11470
11471    sub get_available_space_on_right {
11472        my $self = shift;
11473        my $jmax = $self->get_jmax();
11474        return $self->[MAXIMUM_LINE_LENGTH] - $self->get_column($jmax);
11475    }
11476
11477    sub set_jmax                { $_[0]->[JMAX]                = $_[1] }
11478    sub set_jmax_original_line  { $_[0]->[JMAX_ORIGINAL_LINE]  = $_[1] }
11479    sub set_rtokens             { $_[0]->[RTOKENS]             = $_[1] }
11480    sub set_rfields             { $_[0]->[RFIELDS]             = $_[1] }
11481    sub set_rpatterns           { $_[0]->[RPATTERNS]           = $_[1] }
11482    sub set_indentation         { $_[0]->[INDENTATION]         = $_[1] }
11483    sub set_leading_space_count { $_[0]->[LEADING_SPACE_COUNT] = $_[1] }
11484    sub set_outdent_long_lines  { $_[0]->[OUTDENT_LONG_LINES]  = $_[1] }
11485    sub set_list_type           { $_[0]->[LIST_TYPE]           = $_[1] }
11486    sub set_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] = $_[1] }
11487    sub set_alignment { $_[0]->[RALIGNMENTS]->[ $_[1] ] = $_[2] }
11488
11489}
11490
11491#####################################################################
11492#
11493# the PerlTidy::VerticalAligner::Alignment class holds information
11494# on a single column being aligned
11495#
11496#####################################################################
11497package PerlTidy::VerticalAligner::Alignment;
11498
11499{
11500
11501    use strict;
11502
11503    #use Carp;
11504
11505    # Symbolic array indexes
11506    use constant COLUMN          => 0;    # the current column number
11507    use constant STARTING_COLUMN => 1;    # column number when created
11508    use constant MATCHING_TOKEN  => 2;    # what token we are matching
11509    use constant STARTING_LINE   => 3;    # the line index of creation
11510    use constant ENDING_LINE     => 4;    # the most recent line to use it
11511    use constant SAVED_COLUMN    => 5;    # the most recent line to use it
11512    use constant SERIAL_NUMBER   => 6;    # unique number for this alignment
11513                                          # (just its index in an array)
11514
11515    # Correspondence between variables and array indexes
11516    my %_index_map;
11517    $_index_map{column}          = COLUMN;
11518    $_index_map{starting_column} = STARTING_COLUMN;
11519    $_index_map{matching_token}  = MATCHING_TOKEN;
11520    $_index_map{starting_line}   = STARTING_LINE;
11521    $_index_map{ending_line}     = ENDING_LINE;
11522    $_index_map{saved_column}    = SAVED_COLUMN;
11523    $_index_map{serial_number}   = SERIAL_NUMBER;
11524
11525    my @_default_data = ();
11526    $_default_data[COLUMN]          = undef;
11527    $_default_data[STARTING_COLUMN] = undef;
11528    $_default_data[MATCHING_TOKEN]  = undef;
11529    $_default_data[STARTING_LINE]   = undef;
11530    $_default_data[ENDING_LINE]     = undef;
11531    $_default_data[SAVED_COLUMN]    = undef;
11532    $_default_data[SERIAL_NUMBER]   = undef;
11533
11534    # class population count
11535    {
11536        my $_count = 0;
11537        sub get_count        { $_count; }
11538        sub _increment_count { ++$_count }
11539        sub _decrement_count { --$_count }
11540    }
11541
11542    # constructor
11543    sub new {
11544        my ( $caller, %arg ) = @_;
11545        my $caller_is_obj = ref($caller);
11546        my $class = $caller_is_obj || $caller;
11547        no strict "refs";
11548        my $self = bless [], $class;
11549
11550        foreach my $member ( keys %_index_map ) {
11551            my $index = $_index_map{$member};
11552            if ( exists $arg{$member} ) { $self->[$index] = $arg{$member} }
11553            elsif ($caller_is_obj) { $self->[$index] = $caller->[$index] }
11554            else { $self->[$index] = $_default_data[$index] }
11555        }
11556        $self->_increment_count();
11557        return $self;
11558    }
11559
11560    sub DESTROY {
11561        $_[0]->_decrement_count();
11562    }
11563
11564    sub get_column          { return $_[0]->[COLUMN] }
11565    sub get_starting_column { return $_[0]->[STARTING_COLUMN] }
11566    sub get_matching_token  { return $_[0]->[MATCHING_TOKEN] }
11567    sub get_starting_line   { return $_[0]->[STARTING_LINE] }
11568    sub get_ending_line     { return $_[0]->[ENDING_LINE] }
11569    sub get_serial_number   { return $_[0]->[SERIAL_NUMBER] }
11570
11571    sub set_column          { $_[0]->[COLUMN]          = $_[1] }
11572    sub set_starting_column { $_[0]->[STARTING_COLUMN] = $_[1] }
11573    sub set_matching_token  { $_[0]->[MATCHING_TOKEN]  = $_[1] }
11574    sub set_starting_line   { $_[0]->[STARTING_LINE]   = $_[1] }
11575    sub set_ending_line     { $_[0]->[ENDING_LINE]     = $_[1] }
11576    sub increment_column { $_[0]->[COLUMN] += $_[1] }
11577
11578    sub save_column    { $_[0]->[SAVED_COLUMN] = $_[0]->[COLUMN] }
11579    sub restore_column { $_[0]->[COLUMN]       = $_[0]->[SAVED_COLUMN] }
11580
11581}
11582
11583package PerlTidy::VerticalAligner;
11584
11585=pod
11586
11587The PerlTidy::VerticalAligner package collects output lines and attempts to line
11588up certain common tokens, such as => and #, which are identified
11589by the calling routine.
11590
11591There are two main routines: append_line and flush.  Append acts as a storage
11592buffer, collecting lines into a group which can be vertically aligned.
11593When alignment is no longer possible or desirable, it dumps the group
11594to flush.
11595
11596    append_line -----> flush
11597
11598    collects          writes
11599    vertical          one
11600    groups            group
11601
11602=cut
11603
11604BEGIN {
11605
11606    # Caution: these debug flags produce a lot of output
11607    # They should all be 0 except when debugging small scripts
11608
11609    use constant VALIGN_DEBUG_FLAG_APPEND  => 0;
11610    use constant VALIGN_DEBUG_FLAG_APPEND0 => 0;
11611
11612    my $debug_warning = sub {
11613        print "VALIGN_DEBUGGING with key $_[0]\n";
11614    };
11615
11616    VALIGN_DEBUG_FLAG_APPEND  && $debug_warning->('APPEND');
11617    VALIGN_DEBUG_FLAG_APPEND0 && $debug_warning->('APPEND0');
11618
11619}
11620
11621use vars qw(
11622  $vertical_aligner_self
11623  $current_line
11624  $maximum_alignment_index
11625  $ralignment_list
11626  $maximum_jmax_seen
11627  $minimum_jmax_seen
11628  $previous_minimum_jmax_seen
11629  $previous_maximum_jmax_seen
11630  $maximum_line_index
11631  $group_level
11632  $last_group_level_written
11633  $extra_indent_ok
11634  $zero_count
11635  @group_lines
11636  $last_comment_column
11637  $last_side_comment_line_number
11638  $last_side_comment_length
11639  $last_side_comment_level
11640  $outdented_line_count
11641  $first_outdented_line_at
11642  $last_outdented_line_at
11643  $diagnostics_object
11644  $logger_object
11645  $file_writer_object
11646  $rOpts
11647
11648  $rOpts_maximum_line_length
11649  $rOpts_continuation_indentation
11650  $rOpts_indent_columns
11651  $rOpts_tabs
11652
11653  $rOpts_maximum_whitespace_columns
11654  $rOpts_big_space_jump
11655  $rOpts_minimum_space_to_comment
11656  $rOpts_maximum_space_to_comment
11657
11658);
11659
11660sub initialize {
11661
11662    my $class;
11663
11664    ( $class, $rOpts, $file_writer_object, $logger_object, $diagnostics_object )
11665      = @_;
11666
11667    # variables describing the entire space group:
11668
11669    $ralignment_list            = [];
11670    $group_level                = 0;
11671    $last_group_level_written   = -1;
11672    $extra_indent_ok            = 0;    # can we move all lines to the right?
11673    $last_side_comment_length   = 0;
11674    $maximum_jmax_seen          = 0;
11675    $minimum_jmax_seen          = 0;
11676    $previous_minimum_jmax_seen = 0;
11677    $previous_maximum_jmax_seen = 0;
11678
11679    # variables describing each line of the group
11680    @group_lines = ();                  # list of all lines in group
11681
11682    $outdented_line_count          = 0;
11683    $first_outdented_line_at       = 0;
11684    $last_outdented_line_at        = 0;
11685    $last_side_comment_line_number = 0;
11686    $last_side_comment_level       = -1;
11687
11688    # frequently used parameters
11689    $rOpts_indent_columns             = $rOpts->{'indent-columns'};
11690    $rOpts_tabs                       = $rOpts->{'tabs'};
11691    $rOpts_maximum_whitespace_columns = $rOpts->{'maximum-whitespace-columns'};
11692    $rOpts_big_space_jump             = $rOpts->{'big-space-jump'};
11693    $rOpts_minimum_space_to_comment   = $rOpts->{'minimum-space-to-comment'};
11694    $rOpts_maximum_space_to_comment   = $rOpts->{'maximum-space-to-comment'};
11695
11696    forget_side_comment();
11697
11698    initialize_for_new_group();
11699
11700    $vertical_aligner_self = {};
11701    bless $vertical_aligner_self, $class;
11702    return $vertical_aligner_self;
11703}
11704
11705sub initialize_for_new_group {
11706    $maximum_line_index      = -1;      # lines in the current group
11707    $maximum_alignment_index = -1;      # alignments in current group
11708    $zero_count              = 0;       # count consecutive lines without tokens
11709    $current_line            = undef;   # line being matched for alignment
11710}
11711
11712# interface to PerlTidy::Diagnostics routines
11713sub write_diagnostics {
11714    if ($diagnostics_object) {
11715        $diagnostics_object->write_diagnostics(@_);
11716    }
11717}
11718
11719# interface to PerlTidy::Logger routines
11720sub warning {
11721    if ($logger_object) {
11722        $logger_object->warning(@_);
11723    }
11724}
11725
11726sub write_logfile_entry {
11727    if ($logger_object) {
11728        $logger_object->write_logfile_entry(@_);
11729    }
11730}
11731
11732sub report_definite_bug {
11733    if ($logger_object) {
11734        $logger_object->report_definite_bug();
11735    }
11736}
11737
11738sub get_SPACES {
11739
11740    # return the number of leading spaces associated with an indentation
11741    # variable $indentation is either a constant number of spaces or an
11742    # object with a get_SPACES method.
11743    my $indentation = shift;
11744    return ref($indentation) ? $indentation->get_SPACES() : $indentation;
11745}
11746
11747sub get_RECOVERABLE_SPACES {
11748
11749    # return the number of spaces (+ means shift right, - means shift left)
11750    # that we would like to shift a group of lines with the same indentation
11751    # to get them to line up with their opening parens
11752    my $indentation = shift;
11753    return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0;
11754}
11755
11756sub get_STACK_DEPTH {
11757
11758    my $indentation = shift;
11759    return ref($indentation) ? $indentation->get_STACK_DEPTH() : 0;
11760}
11761
11762sub make_alignment {
11763    my ( $col, $token ) = @_;
11764
11765    # make one new alignment at column $col which aligns token $token
11766    ++$maximum_alignment_index;
11767    my $alignment = new PerlTidy::VerticalAligner::Alignment(
11768        column          => $col,
11769        starting_column => $col,
11770        matching_token  => $token,
11771        starting_line   => $maximum_line_index,
11772        ending_line     => $maximum_line_index,
11773        serial_number   => $maximum_alignment_index,
11774    );
11775    $ralignment_list->[$maximum_alignment_index] = $alignment;
11776    return $alignment;
11777}
11778
11779sub dump_alignments {
11780    print
11781"Current Alignments:\ni\ttoken\tstarting_column\tcolumn\tstarting_line\tending_line\n";
11782    for my $i ( 0 .. $maximum_alignment_index ) {
11783        my $column          = $ralignment_list->[$i]->get_column();
11784        my $starting_column = $ralignment_list->[$i]->get_starting_column();
11785        my $matching_token  = $ralignment_list->[$i]->get_matching_token();
11786        my $starting_line   = $ralignment_list->[$i]->get_starting_line();
11787        my $ending_line     = $ralignment_list->[$i]->get_ending_line();
11788        print
11789"$i\t$matching_token\t$starting_column\t$column\t$starting_line\t$ending_line\n";
11790    }
11791}
11792
11793sub save_alignment_columns {
11794    for my $i ( 0 .. $maximum_alignment_index ) {
11795        $ralignment_list->[$i]->save_column();
11796    }
11797}
11798
11799sub restore_alignment_columns {
11800    for my $i ( 0 .. $maximum_alignment_index ) {
11801        $ralignment_list->[$i]->restore_column();
11802    }
11803}
11804
11805sub forget_side_comment {
11806    $last_comment_column = 0;
11807}
11808
11809sub append_line {
11810
11811=pod
11812
11813sub append is called to place one line in the current vertical group.
11814
11815The input parameters are:
11816    $level = indentation level of this line
11817    $rfields = reference to array of fields
11818    $rpatterns = reference to array of patterns, one per field
11819    $rtokens   = reference to array of tokens starting fields 1,2,..
11820
11821Here is an example of what this package does.  In this example,
11822we are trying to line up both the '=>' and the '#'.
11823
11824        '18' => 'grave',    #   \`
11825        '19' => 'acute',    #   `'
11826        '20' => 'caron',    #   \v
11827<-tabs-><f1-><--field 2 ---><-f3->
11828|            |              |    |
11829|            |              |    |
11830col1        col2         col3 col4
11831
11832The calling routine has already broken the entire line into 3 fields as
11833indicated.  (So the work of identifying promising common tokens has
11834already been done).
11835
11836In this example, there will be 2 tokens being matched: '=>' and '#'.
11837They are the leading parts of fields 2 and 3, but we do need to know
11838what they are so that we can dump a group of lines when these tokens
11839change.
11840
11841The fields contain the actual characters of each field.  The patterns
11842are like the fields, but they contain mainly token types instead
11843of tokens, so they have fewer characters.  They are used to be
11844sure we are matching fields of similar type.
11845
11846In this example, there will be 4 column indexes being adjusted.  The
11847first one is always at zero.  The interior columns are at the start of
11848the matching tokens, and the last one tracks the maximum line length.
11849
11850Basically, each time a new line comes in, it joins the current vertical
11851group if possible.  Otherwise it causes the current group to be dumped
11852and a new group is started.
11853
11854For each new group member, the column locations are increased, as
11855necessary, to make room for the new fields.  When the group is finally
11856output, these column numbers are used to compute the amount of spaces of
11857padding needed for each field.
11858
11859Programming note: the fields are assumed not to have any tab characters.
11860Tabs have been previously removed except for tabs in quoted strings and
11861side comments.  Tabs in these fields can mess up the column counting.
11862The log file warns the user if there are any such tabs.
11863
11864=cut
11865
11866    my (
11867        $level,           $level_end,          $indentation,
11868        $rfields,         $rtokens,            $rpatterns,
11869        $is_forced_break, $outdent_long_lines, $is_terminal_statement,
11870        $do_not_pad
11871      )
11872      = @_;
11873
11874    my $leading_space_count = get_SPACES($indentation);
11875
11876    # number of fields is $jmax
11877    # number of tokens between fields is $jmax-1
11878    my $jmax = $#{$rfields};
11879    $previous_minimum_jmax_seen = $minimum_jmax_seen;
11880    $previous_maximum_jmax_seen = $maximum_jmax_seen;
11881
11882    VALIGN_DEBUG_FLAG_APPEND0 && do {
11883        print
11884"APPEND0: entering lines=$maximum_line_index new #fields= $jmax, leading_count=$leading_space_count last_cmt=$last_comment_column force=$is_forced_break\n";
11885    };
11886
11887    # patch until new aligner is finished
11888    if ($do_not_pad) { flush() }
11889
11890    # shouldn't happen:
11891    if ( $level < 0 ) { $level = 0 }
11892
11893    # do not align code across indentation level changes
11894    if ( $level != $group_level ) {
11895
11896        # we are allowed to shift a group of lines to the right if its
11897        # level is greater than the previous and next group
11898        $extra_indent_ok =
11899          ( $level < $group_level && $last_group_level_written < $group_level );
11900
11901        flush();
11902
11903        # If we know that this line will get flushed out by itself because
11904        # of level changes, we can leave the extra_indent_ok flag set.
11905        # That way, if we get an external flush call, we will still be
11906        # able to do some -lp alignment if necessary.
11907        $extra_indent_ok = ( $is_terminal_statement && $level > $group_level );
11908
11909        $group_level = $level;
11910
11911        # wait until after the above flush to get the leading space
11912        # count because it may have been changed if the -icp flag is in
11913        # effect
11914        $leading_space_count = get_SPACES($indentation);
11915
11916    }
11917
11918    # --------------------------------------------------------------------
11919    # Step 1. Handle simple line of code with no fields to match.
11920    # --------------------------------------------------------------------
11921    if ( $jmax <= 0 ) {
11922        $zero_count++;
11923
11924        if ( $maximum_line_index >= 0 ) {
11925
11926            # flush the current group if it has some aligned columns..
11927            if ( $group_lines[0]->get_jmax() > 1 ) { flush() }
11928
11929            # flush current group if we are just collecting side comments..
11930            elsif (
11931
11932                # ...and we haven't seen a comment lately
11933                ( $zero_count > 3 )
11934
11935                # ..or if this new line doesn't fit to the left of the comments
11936                || ( ( $leading_space_count + length( $$rfields[0] ) ) >
11937                    $group_lines[0]->get_column(0) )
11938              )
11939            {
11940                flush();
11941            }
11942        }
11943
11944        # just write this line directly if no current group, no side comment,
11945        # and no space recovery is needed.
11946        if ( $maximum_line_index < 0 && !get_RECOVERABLE_SPACES($indentation) )
11947        {
11948            write_leader_and_string( $leading_space_count, $$rfields[0], 0,
11949                $outdent_long_lines );
11950            return;
11951        }
11952    }
11953    else {
11954        $zero_count = 0;
11955    }
11956
11957    # programming check: (shouldn't happen)
11958    # an error here implies an incorrect call was made
11959    if ( $jmax > 0 && ( $#{$rtokens} != ( $jmax - 1 ) ) ) {
11960        warning(
11961"Program bug in PerlTidy::VerticalAligner - number of tokens = $#{$rtokens} should be one less than number of fields: $#{$rfields})\n"
11962        );
11963        report_definite_bug();
11964    }
11965
11966    # --------------------------------------------------------------------
11967    # create an object to hold this line
11968    # --------------------------------------------------------------------
11969    my $is_hanging_side_comment = 0;
11970    my $new_line                = new PerlTidy::VerticalAligner::Line(
11971        jmax                    => $jmax,
11972        jmax_original_line      => $jmax,
11973        rtokens                 => $rtokens,
11974        rfields                 => $rfields,
11975        rpatterns               => $rpatterns,
11976        indentation             => $indentation,
11977        leading_space_count     => $leading_space_count,
11978        outdent_long_lines      => $outdent_long_lines,
11979        list_type               => "",
11980        is_hanging_side_comment => $is_hanging_side_comment,
11981        maximum_line_length     => $rOpts->{'maximum-line-length'},
11982    );
11983
11984    # --------------------------------------------------------------------
11985    # It simplifies things to create a zero length side comment
11986    # if none exists.
11987    # --------------------------------------------------------------------
11988    make_side_comment( $new_line, $level_end );
11989
11990    # --------------------------------------------------------------------
11991    # Decide if this is a simple list of items.
11992    # There are 3 list types: none, comma, comma-arrow.
11993    # We use this below to be less restrictive in deciding what to align.
11994    # --------------------------------------------------------------------
11995    if ($is_forced_break) {
11996        decide_if_list($new_line);
11997    }
11998
11999    if ($current_line) {
12000
12001        # --------------------------------------------------------------------
12002        # Allow hanging side comment to join current group, if any
12003        # This will help keep side comments aligned, because otherwise we
12004        # will have to start a new group, making alignment less likely.
12005        # --------------------------------------------------------------------
12006        $is_hanging_side_comment =
12007          hanging_comment_check( $new_line, $current_line );
12008
12009        # --------------------------------------------------------------------
12010        # If there is just one previous line, and it has more fields
12011        # than the new line, try to join fields together to get a match with
12012        # the new line.  At the present time, only a single leading '=' is
12013        # allowed to be compressed out.  This is useful in rare cases where
12014        # a table is forced to use old breakpoints because of side comments,
12015        # and the table starts out something like this:
12016        #   my %MonthChars = ('0', 'Jan',   # side comment
12017        #                     '1', 'Feb',
12018        #                     '2', 'Mar',
12019        # Eliminating the '=' field will allow the remaining fields to line up.
12020        # This situation does not occur if there are no side comments
12021        # because scan_list would put a break after the opening '('.
12022        # --------------------------------------------------------------------
12023        eliminate_old_fields( $new_line, $current_line );
12024
12025        # --------------------------------------------------------------------
12026        # If the new line has more fields than the current group,
12027        # see if we can match the first fields and combine the remaining
12028        # fields of the new line.
12029        # --------------------------------------------------------------------
12030        eliminate_new_fields( $new_line, $current_line );
12031
12032        # --------------------------------------------------------------------
12033        # Flush previous group unless all common tokens and patterns match..
12034        # --------------------------------------------------------------------
12035        check_match( $new_line, $current_line );
12036
12037        # --------------------------------------------------------------------
12038        # See if there is space for this line in the current group (if any)
12039        # --------------------------------------------------------------------
12040        if ($current_line) {
12041            check_fit( $new_line, $current_line );
12042        }
12043    }
12044
12045    # --------------------------------------------------------------------
12046    # Append this line to the current group (or start new group)
12047    # --------------------------------------------------------------------
12048    accept_line($new_line);
12049
12050    # Future update to allow this to vary:
12051    $current_line = $new_line if ( $maximum_line_index == 0 );
12052
12053    # --------------------------------------------------------------------
12054    # Step 8. Some old debugging stuff
12055    # --------------------------------------------------------------------
12056    VALIGN_DEBUG_FLAG_APPEND && do {
12057        print "APPEND fields:";
12058        dump_array(@$rfields);
12059        print "APPEND tokens:";
12060        dump_array(@$rtokens);
12061        print "APPEND patterns:";
12062        dump_array(@$rpatterns);
12063        dump_alignments();
12064    };
12065}
12066
12067sub hanging_comment_check {
12068
12069    my $line = shift;
12070    my $jmax = $line->get_jmax();
12071    return 0 unless $jmax == 1;    # must be 2 fields
12072    my $rtokens = $line->get_rtokens();
12073    return 0 unless $$rtokens[0] eq '#';    # the second field is a comment..
12074    my $rfields = $line->get_rfields();
12075    return 0 unless $$rfields[0] =~ /^\s*$/;    # the first field is empty...
12076    my $old_line            = shift;
12077    my $maximum_field_index = $old_line->get_jmax();
12078    return 0
12079      unless $maximum_field_index > $jmax;    # the current line has more fields
12080    my $rpatterns = $line->get_rpatterns();
12081
12082    $line->set_is_hanging_side_comment(1);
12083    $jmax = $maximum_field_index;
12084    $line->set_jmax($jmax);
12085    $$rfields[$jmax] = $$rfields[1];
12086    $$rtokens[ $jmax - 1 ]   = $$rtokens[0];
12087    $$rpatterns[ $jmax - 1 ] = $$rpatterns[0];
12088    for ( my $j = 1 ; $j < $jmax ; $j++ ) {
12089        $$rfields[$j] = " ";    # NOTE: caused glitch unless 1 blank, why?
12090        $$rtokens[ $j - 1 ]   = "";
12091        $$rpatterns[ $j - 1 ] = "";
12092    }
12093    return 1;
12094}
12095
12096sub eliminate_old_fields {
12097
12098    my $new_line = shift;
12099    my $jmax     = $new_line->get_jmax();
12100    if ( $jmax > $maximum_jmax_seen ) { $maximum_jmax_seen = $jmax }
12101    if ( $jmax < $minimum_jmax_seen ) { $minimum_jmax_seen = $jmax }
12102
12103    # there must be one previous line
12104    return unless ( $maximum_line_index == 0 );
12105
12106    my $old_line            = shift;
12107    my $maximum_field_index = $old_line->get_jmax();
12108
12109    # this line must have fewer fields
12110    return unless $maximum_field_index > $jmax;
12111
12112    # be reasonable, not too few
12113    return unless ( $maximum_field_index - 2 <= $jmax );
12114
12115    # must have side comment
12116    my $old_rfields = $old_line->get_rfields();
12117    return unless ( length( $$old_rfields[$maximum_field_index] ) > 0 );
12118
12119    my $rtokens   = $new_line->get_rtokens();
12120    my $rfields   = $new_line->get_rfields();
12121    my $rpatterns = $new_line->get_rpatterns();
12122
12123    my $old_rtokens   = $old_line->get_rtokens();
12124    my $old_rpatterns = $old_line->get_rpatterns();
12125
12126    my $hid_equals = 0;
12127
12128    my @new_alignments        = ();
12129    my @new_fields            = ();
12130    my @new_matching_patterns = ();
12131    my @new_matching_tokens   = ();
12132
12133    my $j = 0;
12134    my $k;
12135    my $current_field   = '';
12136    my $current_pattern = '';
12137
12138    # loop over all old tokens
12139    my $in_match = 0;
12140    for ( $k = 0 ; $k < $maximum_field_index ; $k++ ) {
12141        $current_field .= $$old_rfields[$k];
12142        $current_pattern .= $$old_rpatterns[$k];
12143        last if ( $j > $jmax - 1 );
12144
12145        if ( $$old_rtokens[$k] eq $$rtokens[$j] ) {
12146            $in_match = 1;
12147            $new_fields[$j]            = $current_field;
12148            $new_matching_patterns[$j] = $current_pattern;
12149            $current_field   = '';
12150            $current_pattern = '';
12151            $new_matching_tokens[$j] = $$old_rtokens[$k];
12152            $new_alignments[$j]      = $old_line->get_alignment($k);
12153            $j++;
12154        }
12155        else {
12156
12157            if ( $$old_rtokens[$k] =~ /^\=\d*$/ ) {
12158                $hid_equals = 1;
12159            }
12160            last if $in_match;    # disallow gaps in matching field types
12161        }
12162    }
12163
12164    # Modify the current state if we are successful.
12165    # We must exactly reach the ends of both lists for success.
12166    if ( ( $j == $jmax ) && ( $current_field eq '' ) && $hid_equals ) {
12167        $k = $maximum_field_index;
12168        $current_field .= $$old_rfields[$k];
12169        $current_pattern .= $$old_rpatterns[$k];
12170        $new_fields[$j]            = $current_field;
12171        $new_matching_patterns[$j] = $current_pattern;
12172
12173        $new_alignments[$j] = $old_line->get_alignment($k);
12174        $maximum_field_index = $j;
12175
12176        $old_line->set_alignments(@new_alignments);
12177        $old_line->set_jmax($jmax);
12178        $old_line->set_rtokens( \@new_matching_tokens );
12179        $old_line->set_rfields( \@new_fields );
12180        $old_line->set_rpatterns( \@$rpatterns );
12181    }
12182}
12183
12184# create an empty side comment if none exists
12185sub make_side_comment {
12186    my $new_line  = shift;
12187    my $level_end = shift;
12188    my $jmax      = $new_line->get_jmax();
12189    my $rtokens   = $new_line->get_rtokens();
12190
12191    # if line does not have a side comment...
12192    if ( ( $jmax == 0 ) || ( $$rtokens[ $jmax - 1 ] ne '#' ) ) {
12193        my $rfields   = $new_line->get_rfields();
12194        my $rpatterns = $new_line->get_rpatterns();
12195        $$rtokens[$jmax] = '#';
12196        $$rfields[ ++$jmax ] = '';
12197        $$rpatterns[$jmax] = '#';
12198        $new_line->set_jmax($jmax);
12199        $new_line->set_jmax_original_line($jmax);
12200    }
12201
12202    # line has a side comment..
12203    else {
12204
12205        # don't remember old side comment location for very long
12206        # and don't remember across level changes
12207        my $line_number = $vertical_aligner_self->get_output_line_number();
12208        if ( $level_end < $last_side_comment_level
12209            || $line_number - $last_side_comment_line_number > 12 )
12210        {
12211            forget_side_comment();
12212        }
12213        $last_side_comment_line_number = $line_number;
12214        $last_side_comment_level       = $level_end;
12215    }
12216}
12217
12218sub decide_if_list {
12219
12220    my $line = shift;
12221
12222    # A list will be taken to be a line with a forced break in which all
12223    # of the field separators are commas or comma-arrows (except for the
12224    # trailing #)
12225
12226    # List separator tokens are things like ',3'   or '=>2',
12227    # where the trailing digit is the nesting depth.  Allow braces
12228    # to allow nested list items.
12229    my $rtokens    = $line->get_rtokens();
12230    my $test_token = $$rtokens[0];
12231    if ( $test_token =~ /^(\,|=>)/ ) {
12232        my $list_type = $test_token;
12233        my $jmax      = $line->get_jmax();
12234
12235        foreach my $i ( 1 .. $jmax - 2 ) {
12236
12237            if ( $$rtokens[$i] !~ /^(\,|=>|\{)/ ) {
12238                $list_type = "";
12239                last;
12240            }
12241        }
12242        $line->set_list_type($list_type);
12243    }
12244}
12245
12246sub eliminate_new_fields {
12247
12248    return unless ( $maximum_line_index >= 0 );
12249    my $new_line = shift;
12250    my $jmax     = $new_line->get_jmax();
12251
12252    # must be monotonic variation
12253    return unless ( $previous_maximum_jmax_seen <= $jmax );
12254
12255    # must be more fields in the new line
12256    my $old_line            = shift;
12257    my $maximum_field_index = $old_line->get_jmax();
12258    return unless ( $maximum_field_index < $jmax );
12259
12260    return
12261      unless ( $old_line->get_jmax_original_line() == $minimum_jmax_seen )
12262      ;    # only if monotonic
12263
12264    # never combine fields of a comma list
12265    return
12266      unless ( $maximum_field_index > 1 )
12267      && ( $new_line->get_list_type() !~ /^,/ );
12268
12269    my $rtokens       = $new_line->get_rtokens();
12270    my $rfields       = $new_line->get_rfields();
12271    my $rpatterns     = $new_line->get_rpatterns();
12272    my $old_rpatterns = $old_line->get_rpatterns();
12273    my $old_rtokens   = $old_line->get_rtokens();
12274
12275    # loop over all old tokens except comment
12276    my $match = 1;
12277    my $k;
12278    for ( $k = 0 ; $k < $maximum_field_index - 1 ; $k++ ) {
12279        if ( ( $$old_rtokens[$k] ne $$rtokens[$k] )
12280            || ( $$old_rpatterns[$k] ne $$rpatterns[$k] ) )
12281        {
12282            $match = 0;
12283            last;
12284        }
12285    }
12286
12287    # first tokens agree, so combine new tokens
12288    if ($match) {
12289        for $k ( $maximum_field_index .. $jmax - 1 ) {
12290
12291            $$rfields[ $maximum_field_index - 1 ] .= $$rfields[$k];
12292            $$rfields[$k] = "";
12293            $$rpatterns[ $maximum_field_index - 1 ] .= $$rpatterns[$k];
12294            $$rpatterns[$k] = "";
12295        }
12296
12297        $$rtokens[ $maximum_field_index - 1 ] = '#';
12298        $$rfields[$maximum_field_index]   = $$rfields[$jmax];
12299        $$rpatterns[$maximum_field_index] = $$rpatterns[$jmax];
12300        $jmax = $maximum_field_index;
12301    }
12302    $new_line->set_jmax($jmax);
12303}
12304
12305sub check_match {
12306
12307    my $new_line = shift;
12308    my $old_line = shift;
12309
12310    my $jmax                = $new_line->get_jmax();
12311    my $maximum_field_index = $old_line->get_jmax();
12312
12313    # flush if this line has too many fields
12314    if ( $jmax > $maximum_field_index ) { flush(); return }
12315
12316    # flush if adding this line would make a non-monotonic field count
12317    if (
12318        ( $maximum_field_index > $jmax )    # this has too few fields
12319        && (
12320            ( $previous_minimum_jmax_seen < $jmax )  # and wouldn't be monotonic
12321            || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen )
12322        )
12323      )
12324    {
12325        flush();
12326        return;
12327    }
12328
12329    # otherwise append this line if everything matches
12330    my $jmax_original_line      = $new_line->get_jmax_original_line();
12331    my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
12332    my $rtokens                 = $new_line->get_rtokens();
12333    my $rfields                 = $new_line->get_rfields();
12334    my $rpatterns               = $new_line->get_rpatterns();
12335    my $list_type               = $new_line->get_list_type();
12336
12337    my $group_list_type = $old_line->get_list_type();
12338    my $old_rpatterns   = $old_line->get_rpatterns();
12339    my $old_rtokens     = $old_line->get_rtokens();
12340
12341    my $jlimit = $jmax - 1;
12342    if ( $maximum_field_index > $jmax ) {
12343        $jlimit = $jmax_original_line;
12344        --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) );
12345    }
12346
12347    my $everything_matches = 1;
12348
12349    # common list types always match
12350    unless ( ( $group_list_type && ( $list_type eq $group_list_type ) )
12351        || $is_hanging_side_comment )
12352    {
12353
12354        my $leading_space_count = $new_line->get_leading_space_count();
12355        for my $j ( 0 .. $jlimit ) {
12356            my $match = 1;
12357            if (
12358                ( $j < $jlimit )
12359                && ( ( $$old_rtokens[$j] ne $$rtokens[$j] )
12360                    || ( $$old_rpatterns[$j] ne $$rpatterns[$j] ) )
12361              )
12362            {
12363                $match = 0;
12364            }
12365
12366            # Don't let line with fewer fields increase column widths
12367            # ( align3.t )
12368            if ( $maximum_field_index > $jmax ) {
12369                my $pad =
12370                  length( $$rfields[$j] ) - $old_line->current_field_width($j);
12371
12372                if ( $j == 0 ) {
12373                    $pad += $leading_space_count;
12374                }
12375                if ( $pad > 0 ) { $match = 0 }
12376            }
12377
12378            unless ($match) {
12379                $everything_matches = 0;
12380                last;
12381            }
12382        }
12383    }
12384
12385    if ( $maximum_field_index > $jmax ) {
12386
12387        if ($everything_matches) {
12388
12389            my $comment = $$rfields[$jmax];
12390            for $jmax ( $jlimit .. $maximum_field_index ) {
12391                $$rtokens[$jmax] = $$old_rtokens[$jmax];
12392                $$rfields[ ++$jmax ] = '';
12393                $$rpatterns[$jmax] = $$old_rpatterns[$jmax];
12394            }
12395            $$rfields[$jmax] = $comment;
12396            $new_line->set_jmax($jmax);
12397        }
12398    }
12399
12400    flush() unless ($everything_matches);
12401}
12402
12403sub check_fit {
12404
12405    return unless ( $maximum_line_index >= 0 );
12406    my $new_line = shift;
12407    my $old_line = shift;
12408
12409    my $jmax                    = $new_line->get_jmax();
12410    my $leading_space_count     = $new_line->get_leading_space_count();
12411    my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
12412    my $rtokens                 = $new_line->get_rtokens();
12413    my $rfields                 = $new_line->get_rfields();
12414    my $rpatterns               = $new_line->get_rpatterns();
12415
12416    my $group_list_type = $group_lines[0]->get_list_type();
12417
12418    my $padding_so_far    = 0;
12419    my $padding_available = $old_line->get_available_space_on_right();
12420
12421    # save current columns in case this doesn't work
12422    save_alignment_columns();
12423
12424    my ( $j, $pad, $eight, $big_jump );
12425    for $j ( 0 .. $jmax ) {
12426
12427        $pad = length( $$rfields[$j] ) - $old_line->current_field_width($j);
12428
12429        if ( $j == 0 ) {
12430            $pad += $leading_space_count;
12431        }
12432
12433        # sudden increase or decrease in whitespace space looks bad
12434        # (unless this is a list or side comment).  The number was
12435        # initially '8', but that seems too small and it is now a parameter
12436        # ( eight.t )
12437        # my $eight    = $group_list_type ? 12 : 8;
12438        $big_jump = 0;
12439        $eight    =
12440          $group_list_type
12441          ? 1.5 * $rOpts_big_space_jump
12442          : $rOpts_big_space_jump;
12443
12444        if ( !$is_hanging_side_comment && $j < $jmax - 1 ) {
12445
12446            # a sudden increase of over '8' spaces is too much
12447            if ( $pad > $eight ) { $big_jump = 1 }
12448
12449            # a sudden decrease of over '8' spaces to a new low is too much
12450            elsif ( $pad < 0 ) {
12451                my $test_pad =
12452                  length( $$rfields[$j] ) - $old_line->starting_field_width($j);
12453
12454                if ( $j == 0 ) {
12455                    $test_pad += $leading_space_count;
12456                }
12457                if ( $test_pad < -$eight ) { $big_jump = 1 }
12458            }
12459        }
12460
12461        next if !$big_jump && $pad < 0;
12462
12463        # This line will need space; lets see if we want to accept it..
12464        if (
12465
12466            # not if padding increases too much
12467            $big_jump
12468
12469            # not if this won't fit
12470            || ( $pad > $padding_available )
12471
12472            # or, with the exception of space to side comments, ..
12473            || (
12474                $j < $jmax - 1
12475
12476                # causes too many consecutive columns of whitespace
12477                && (
12478                    (
12479                        $pad + $old_line->field_width_growth($j) >
12480                        $rOpts_maximum_whitespace_columns
12481                    )
12482
12483                )
12484            )
12485          )
12486        {
12487
12488            # revert to starting state then flush; things didn't work out
12489            restore_alignment_columns();
12490            flush();
12491            last;
12492        }
12493
12494        # looks ok, squeeze this field in
12495        $old_line->increase_field_width( $j, $pad );
12496        $padding_available -= $pad;
12497    }
12498}
12499
12500sub accept_line {
12501
12502    my $new_line = shift;
12503    $group_lines[ ++$maximum_line_index ] = $new_line;
12504
12505    # initialize field lengths if starting new group
12506    if ( $maximum_line_index == 0 ) {
12507
12508        my $jmax    = $new_line->get_jmax();
12509        my $rfields = $new_line->get_rfields();
12510        my $rtokens = $new_line->get_rtokens();
12511        my $j;
12512        my $col = $new_line->get_leading_space_count();
12513
12514        for $j ( 0 .. $jmax ) {
12515            $col += length( $$rfields[$j] );
12516
12517            # create initial alignments for the new group
12518            my $token = "";
12519            if ( $j < $jmax ) { $token = $$rtokens[$j] }
12520            my $alignment = make_alignment( $col, $token );
12521            $new_line->set_alignment( $j, $alignment );
12522        }
12523
12524        $maximum_jmax_seen = $jmax;
12525        $minimum_jmax_seen = $jmax;
12526    }
12527
12528    # use previous alignments otherwise
12529    else {
12530        my @new_alignments =
12531          $group_lines[ $maximum_line_index - 1 ]->get_alignments();
12532        $new_line->set_alignments(@new_alignments);
12533    }
12534}
12535
12536sub dump_array {
12537
12538    # debug routine to dump array contents
12539    local $" = ')(';
12540    print "(@_)\n";
12541}
12542
12543=pod
12544
12545flush() sends the current PerlTidy::VerticalAligner group down the pipeline to PerlTidy::FileWriter.
12546
12547=cut
12548
12549sub flush {
12550
12551    return unless ( $maximum_line_index >= 0 );
12552
12553    VALIGN_DEBUG_FLAG_APPEND0 && do {
12554        my $group_list_type = $group_lines[0]->get_list_type();
12555        my ( $a, $b, $c ) = caller();
12556        my $maximum_field_index = $group_lines[0]->get_jmax();
12557        print
12558"APPEND0: Flush called from $a $b $c fields=$maximum_field_index list=$group_list_type lines=$maximum_line_index extra=$extra_indent_ok\n";
12559
12560    };
12561
12562    # some small groups are best left unaligned
12563    my $do_not_align = decide_if_aligned();
12564
12565    # optimize side comment location
12566    $do_not_align = adjust_side_comment($do_not_align);
12567
12568    # recover spaces for -lp option if possible
12569    my $extra_leading_spaces = get_extra_leading_spaces();
12570
12571    # all lines of this group have the same basic leading spacing
12572    my $group_leader_length = $group_lines[0]->get_leading_space_count();
12573
12574    # add extra leading spaces if helpful
12575    my $min_ci_gap =
12576      improve_continuation_indentation( $do_not_align, $group_leader_length );
12577
12578    # loop to output all lines
12579    for my $i ( 0 .. $maximum_line_index ) {
12580        my $line = $group_lines[$i];
12581        write_vertically_aligned_line( $line, $min_ci_gap, $do_not_align,
12582            $group_leader_length, $extra_leading_spaces );
12583    }
12584
12585    initialize_for_new_group();
12586}
12587
12588sub decide_if_aligned {
12589
12590    # Do not try to align two lines which are not really similar
12591    my $group_list_type = $group_lines[0]->get_list_type();
12592    my $do_not_align    =
12593      ( $maximum_line_index < 2
12594          && !$group_list_type
12595          && ( $previous_maximum_jmax_seen != $previous_minimum_jmax_seen ) );
12596
12597    # But try to convert them into a simple comment group if the first line
12598    # a has side comment
12599    my $rfields             = $group_lines[0]->get_rfields();
12600    my $maximum_field_index = $group_lines[0]->get_jmax();
12601    if ( $do_not_align
12602        && ( $maximum_line_index > 0 )
12603        && ( length( $$rfields[$maximum_field_index] ) > 0 ) )
12604    {
12605        combine_fields();
12606        $do_not_align = 0;
12607    }
12608    return $do_not_align;
12609}
12610
12611sub adjust_side_comment {
12612
12613    my $do_not_align = shift;
12614
12615    # let's see if we can move the side comment field out a little
12616    # to improve readability (the last field is always a side comment field)
12617    my $have_side_comment       = 0;
12618    my $first_side_comment_line = -1;
12619    my $maximum_field_index     = $group_lines[0]->get_jmax();
12620    for my $i ( 0 .. $maximum_line_index ) {
12621        my $line = $group_lines[$i];
12622
12623        if ( length( $line->get_rfields()->[$maximum_field_index] ) ) {
12624            $have_side_comment       = 1;
12625            $first_side_comment_line = $i;
12626            last;
12627        }
12628    }
12629
12630    my $kmax = $maximum_field_index + 1;
12631
12632    if ($have_side_comment) {
12633
12634        my $line = $group_lines[0];
12635
12636        # the maximum space without exceeding the line length:
12637        my $avail = $line->get_available_space_on_right();
12638
12639        # try to use the previous comment column
12640        my $move = $last_comment_column - $line->get_column( $kmax - 2 );
12641
12642        if ( $kmax > 0 && !$do_not_align ) {
12643
12644            # but if this doesn't work, give up and use the minimum space
12645            if ( $move > $avail ) {
12646                $move = $rOpts_minimum_space_to_comment - 1;
12647            }
12648
12649            # but we want some minimum space to the comment
12650            my $min_move = $rOpts_minimum_space_to_comment - 1;
12651            if ( $move >= 0
12652                && $last_side_comment_length > 0
12653                && ( $first_side_comment_line == 0 )
12654                && $group_level == $last_group_level_written )
12655            {
12656                $min_move = 0;
12657            }
12658
12659            if ( $move < $min_move ) {
12660                $move = $min_move;
12661            }
12662
12663            # if this causes too much space, give up and use the minimum space
12664            if ( $move > $rOpts_maximum_space_to_comment - 1 ) {
12665                $move = $rOpts_minimum_space_to_comment - 1;
12666            }
12667
12668            # don't exceed the available space
12669            if ( $move > $avail ) { $move = $avail }
12670
12671            # we can only increase space, never decrease
12672            if ( $move > 0 ) {
12673                $line->increase_field_width( $maximum_field_index - 1, $move );
12674            }
12675
12676            # remember this column for the next group
12677            $last_comment_column = $line->get_column( $kmax - 2 );
12678        }
12679
12680        else {
12681
12682            # try to at least line up the existing side comment location
12683            if ( $kmax > 0 && $move > 0 && $move < $avail ) {
12684                $line->increase_field_width( $maximum_field_index - 1, $move );
12685                $do_not_align = 0;
12686            }
12687
12688            # reset side comment column if we can't align
12689            else {
12690                forget_side_comment();
12691            }
12692        }
12693    }
12694    return $do_not_align;
12695}
12696
12697sub improve_continuation_indentation {
12698    my ( $do_not_align, $group_leader_length ) = @_;
12699
12700=pod
12701
12702See if we can increase the continuation indentation
12703to move all continuation lines closer to the next field
12704(unless it is a comment).
12705
12706'$min_ci_gap'is the extra indentation that we may need to introduce.
12707We will only introduce this to fields which already have some ci.
12708Without this variable, we would occasionally get something like this
12709(Complex.pm):
12710
12711use overload '+' => \&plus,
12712  '-'            => \&minus,
12713  '*'            => \&multiply,
12714  ...
12715  'tan'          => \&tan,
12716  'atan2'        => \&atan2,
12717
12718Whereas with this variable, we can shift variables over to get this:
12719
12720use overload '+' => \&plus,
12721         '-'     => \&minus,
12722         '*'     => \&multiply,
12723         ...
12724         'tan'   => \&tan,
12725         'atan2' => \&atan2,
12726
12727=cut
12728
12729    my $maximum_field_index = $group_lines[0]->get_jmax();
12730
12731    my $min_ci_gap = $rOpts->{'maximum-line-length'};
12732    if ( $maximum_field_index > 1 && !$do_not_align ) {
12733
12734        for my $i ( 0 .. $maximum_line_index ) {
12735            my $line                = $group_lines[$i];
12736            my $leading_space_count = $line->get_leading_space_count();
12737            my $rfields             = $line->get_rfields();
12738
12739            my $gap = $line->get_column(0) - $leading_space_count -
12740              length( $$rfields[0] );
12741
12742            if ( $leading_space_count > $group_leader_length ) {
12743                if ( $gap < $min_ci_gap ) { $min_ci_gap = $gap }
12744            }
12745        }
12746
12747        if ( $min_ci_gap >= $rOpts->{'maximum-line-length'} ) {
12748            $min_ci_gap = 0;
12749        }
12750    }
12751    else {
12752        $min_ci_gap = 0;
12753    }
12754    return $min_ci_gap;
12755}
12756
12757sub write_vertically_aligned_line {
12758
12759    my ( $line, $min_ci_gap, $do_not_align, $group_leader_length,
12760        $extra_leading_spaces )
12761      = @_;
12762    my $rfields             = $line->get_rfields();
12763    my $leading_space_count = $line->get_leading_space_count();
12764    my $outdent_long_lines  = $line->get_outdent_long_lines();
12765    my $maximum_field_index = $line->get_jmax();
12766
12767    # add any extra spaces
12768    if ( $leading_space_count > $group_leader_length ) {
12769        $leading_space_count += $min_ci_gap;
12770    }
12771
12772    my $str = $$rfields[0];
12773
12774    # loop to concatenate all fields of this line and needed padding
12775    my $total_pad_count = 0;
12776    my ( $j, $pad );
12777    for $j ( 1 .. $maximum_field_index ) {
12778
12779        # skip zero-length side comments
12780        last
12781          if ( ( $j == $maximum_field_index )
12782            && ( !defined( $$rfields[$j] ) || ( length( $$rfields[$j] ) == 0 ) )
12783          );
12784
12785        # compute spaces of padding before this field
12786        $pad =
12787          $line->get_column( $j - 1 ) - ( length($str) + $leading_space_count );
12788
12789        if ($do_not_align) {
12790            $pad =
12791              ( $j < $maximum_field_index )
12792              ? 0
12793              : $rOpts_minimum_space_to_comment - 1;
12794        }
12795
12796        # accumulate the padding
12797        if ( $pad > 0 ) { $total_pad_count += $pad; }
12798
12799        # add this field
12800        if ( !defined $$rfields[$j] ) {
12801            write_diagnostics("UNDEFined field at j=$j\n");
12802        }
12803
12804        # only add padding when we have a finite field;
12805        # this avoids extra terminal spaces if we have empty fields
12806        if ( length( $$rfields[$j] ) > 0 ) {
12807            $str .= ' ' x $total_pad_count;
12808            $total_pad_count = 0;
12809            $str .= $$rfields[$j];
12810        }
12811    }
12812
12813    my $side_comment_length = ( length( $$rfields[$maximum_field_index] ) );
12814
12815    # ship this line off
12816    write_leader_and_string( $leading_space_count + $extra_leading_spaces, $str,
12817        $side_comment_length, $outdent_long_lines );
12818}
12819
12820sub get_extra_leading_spaces {
12821
12822    #----------------------------------------------------------
12823    # Define any extra indentation space (for the -lp option).
12824    # Here is why:
12825    # If a list has side comments, sub scan_list must dump the
12826    # list before it sees everything.  When this happens, it sets
12827    # the indentation to the standard scheme, but notes how
12828    # many spaces it would have liked to use.  We may be able
12829    # to recover that space here in the event that that all of the
12830    # lines of a list are back together again.
12831    #----------------------------------------------------------
12832
12833    my $extra_leading_spaces = 0;
12834    if ($extra_indent_ok) {
12835        my $object = $group_lines[0]->get_indentation();
12836        if ( ref($object) ) {
12837            my $extra_indentation_spaces_wanted =
12838              get_RECOVERABLE_SPACES($object);
12839
12840            # all indentation objects must be the same
12841            my $i;
12842            for $i ( 1 .. $maximum_line_index ) {
12843                if ( $object != $group_lines[$i]->get_indentation() ) {
12844                    $extra_indentation_spaces_wanted = 0;
12845                    last;
12846                }
12847            }
12848
12849            if ($extra_indentation_spaces_wanted) {
12850
12851                # the maximum space without exceeding the line length:
12852                my $avail = $group_lines[0]->get_available_space_on_right();
12853                $extra_leading_spaces =
12854                  ( $avail > $extra_indentation_spaces_wanted )
12855                  ? $extra_indentation_spaces_wanted
12856                  : $avail;
12857
12858                # update the indentation object because with -icp the terminal
12859                # ');' will use the same adjustment.
12860                $object->permanently_decrease_AVAILABLE_SPACES(
12861                    -$extra_leading_spaces );
12862            }
12863        }
12864    }
12865    return $extra_leading_spaces;
12866}
12867
12868sub combine_fields {
12869
12870    # combine all fields except for the comment field  ( sidecmt.t )
12871    my ( $j, $k );
12872    my $maximum_field_index = $group_lines[0]->get_jmax();
12873    for ( $j = 0 ; $j <= $maximum_line_index ; $j++ ) {
12874        my $line    = $group_lines[$j];
12875        my $rfields = $line->get_rfields();
12876        for ( $k = 1 ; $k < $maximum_field_index ; $k++ ) {
12877            $$rfields[0] .= $$rfields[$k];
12878        }
12879        $$rfields[1] = $$rfields[$maximum_field_index];
12880
12881        $line->set_jmax(1);
12882        $line->set_column( 0, 0 );
12883        $line->set_column( 1, 0 );
12884
12885    }
12886    $maximum_field_index = 1;
12887
12888    for $j ( 0 .. $maximum_line_index ) {
12889        my $line    = $group_lines[$j];
12890        my $rfields = $line->get_rfields();
12891        for $k ( 0 .. $maximum_field_index ) {
12892            my $pad = length( $$rfields[$k] ) - $line->current_field_width($k);
12893            if ( $k == 0 ) {
12894                $pad += $group_lines[$j]->get_leading_space_count();
12895            }
12896
12897            if ( $pad > 0 ) { $line->increase_field_width( $k, $pad ) }
12898
12899        }
12900    }
12901}
12902
12903sub get_output_line_number {
12904
12905    # the output line number reported to a caller is the number of items
12906    # written plus the number of items in the buffer
12907    my $self = shift;
12908    1 + $maximum_line_index + $file_writer_object->get_output_line_number();
12909}
12910
12911sub write_leader_and_string {
12912
12913    my ( $leading_space_count, $str, $side_comment_length, $outdent_long_lines )
12914      = @_;
12915
12916    my $leading_string = get_leading_string($leading_space_count);
12917
12918    # reduce continuation indentation if it will make the line fit
12919    # in the available page width.  Do not include side comment length
12920    # when considering the excess.
12921    my $excess =
12922      length($str) - $side_comment_length + $leading_space_count -
12923      $rOpts->{'maximum-line-length'};
12924
12925    # handle long lines:
12926    if ( $excess > 0 ) {
12927
12928        # TODO: consider re-implementing this logic
12929        # Reduce continuation indentation if that solves the problem
12930        #if ( length($spaces) >= $excess ) {
12931        #    $spaces = substr( $spaces, 0, length($spaces) - $excess );
12932
12933        #}
12934
12935        # Otherwise, outdent if permitted
12936        #else {
12937
12938        if ($outdent_long_lines) {
12939            $leading_string         = "";
12940            $last_outdented_line_at =
12941              $file_writer_object->get_output_line_number();
12942
12943            unless ($outdented_line_count) {
12944                $first_outdented_line_at = $last_outdented_line_at;
12945            }
12946            $outdented_line_count++;
12947        }
12948
12949        #}
12950    }
12951
12952    $file_writer_object->write_code_line( $leading_string . $str . "\n" );
12953    $last_group_level_written = $group_level;
12954    $last_side_comment_length = $side_comment_length;
12955    $extra_indent_ok          = 0;
12956}
12957
12958{    # begin closure get_leading_string
12959
12960    my @leading_string_cache;
12961
12962    sub get_leading_string {
12963
12964        # define the leading whitespace string for this line..
12965        my $leading_whitespace_count = shift;
12966
12967        # Handle case of zero whitespace, which includes multi-line quotes
12968        # (which may have a finite level; this prevents tab problems)
12969        if ( $leading_whitespace_count <= 0 ) {
12970            return "";
12971        }
12972
12973        # look for previous result
12974        elsif ( $leading_string_cache[$leading_whitespace_count] ) {
12975            return $leading_string_cache[$leading_whitespace_count];
12976        }
12977
12978        # must compute a string for this number of spaces
12979        my $leading_string;
12980
12981        # Handle simple case of no tabs
12982        if ( !$rOpts_tabs || $rOpts_indent_columns <= 0 ) {
12983            $leading_string = ( ' ' x $leading_whitespace_count );
12984        }
12985
12986        # Handle tabs
12987        else {
12988            $leading_string = ( "\t" x $group_level );
12989            my $space_count =
12990              $leading_whitespace_count - $group_level * $rOpts_indent_columns;
12991
12992            # shouldn't happen:
12993            if ( $space_count < 0 ) {
12994                warning(
12995"Error in append_line: for level=$group_level count=$leading_whitespace_count\n"
12996                );
12997                $leading_string = ( ' ' x $leading_whitespace_count );
12998            }
12999            else {
13000                $leading_string .= ( ' ' x $space_count );
13001            }
13002        }
13003        $leading_string_cache[$leading_whitespace_count] = $leading_string;
13004        return $leading_string;
13005    }
13006}    # end closure get_leading_string
13007
13008sub report_anything_unusual {
13009    my $self = shift;
13010    if ( $outdented_line_count > 0 ) {
13011        write_logfile_entry(
13012            "$outdented_line_count long lines were outdented:\n");
13013        write_logfile_entry(
13014            "  First at output line $first_outdented_line_at\n");
13015
13016        if ( $outdented_line_count > 1 ) {
13017            write_logfile_entry(
13018                "   Last at output line $last_outdented_line_at\n");
13019        }
13020        write_logfile_entry(
13021            "  use -noll to prevent outdenting, -l=n to increase line length\n"
13022        );
13023        write_logfile_entry("\n");
13024    }
13025}
13026
13027#####################################################################
13028#
13029# the PerlTidy::FileWriter class writes the output file
13030#
13031#####################################################################
13032
13033package PerlTidy::FileWriter;
13034
13035# Maximum number of little messages; probably need not be changed.
13036use constant MAX_NAG_MESSAGES => 6;
13037
13038sub write_logfile_entry {
13039    my $self          = shift;
13040    my $logger_object = $self->{_logger_object};
13041    if ($logger_object) {
13042        $logger_object->write_logfile_entry(@_);
13043    }
13044}
13045
13046sub new {
13047    my $class = shift;
13048    my ( $line_sink_object, $rOpts, $logger_object ) = @_;
13049
13050    bless {
13051        _line_sink_object           => $line_sink_object,
13052        _logger_object              => $logger_object,
13053        _rOpts                      => $rOpts,
13054        _output_line_number         => 1,
13055        _consecutive_blank_lines    => 0,
13056        _consecutive_nonblank_lines => 0,
13057        _first_line_length_error    => 0,
13058        _max_line_length_error      => 0,
13059        _last_line_length_error     => 0,
13060        _first_line_length_error_at => 0,
13061        _max_line_length_error_at   => 0,
13062        _last_line_length_error_at  => 0,
13063        _line_length_error_count    => 0,
13064        _max_output_line_length     => 0,
13065        _max_output_line_length_at  => 0,
13066    }, $class;
13067}
13068
13069sub tee_on {
13070    my $self = shift;
13071    $self->{_line_sink_object}->tee_on();
13072}
13073
13074sub tee_off {
13075    my $self = shift;
13076    $self->{_line_sink_object}->tee_off();
13077}
13078
13079sub get_output_line_number {
13080    my $self = shift;
13081    return $self->{_output_line_number};
13082}
13083
13084sub decrement_output_line_number {
13085    my $self = shift;
13086    $self->{_output_line_number}--;
13087}
13088
13089sub get_consecutive_nonblank_lines {
13090    my $self = shift;
13091    return $self->{_consecutive_nonblank_lines};
13092}
13093
13094sub reset_consecutive_blank_lines {
13095    my $self = shift;
13096    $self->{_consecutive_blank_lines} = 0;
13097}
13098
13099sub want_blank_line {
13100    my $self = shift;
13101    unless ( $self->{_consecutive_blank_lines} ) {
13102        $self->write_blank_code_line();
13103    }
13104}
13105
13106sub write_blank_code_line {
13107    my $self  = shift;
13108    my $rOpts = $self->{_rOpts};
13109    return
13110      if ( $self->{_consecutive_blank_lines} >=
13111        $rOpts->{'maximum-consecutive-blank-lines'} );
13112    $self->{_consecutive_blank_lines}++;
13113    $self->{_consecutive_nonblank_lines} = 0;
13114    $self->write_line("\n");
13115}
13116
13117sub write_code_line {
13118    my $self = shift;
13119    my $a    = shift;
13120
13121    if ( $a =~ /^\s*$/ ) {
13122        my $rOpts = $self->{_rOpts};
13123        return
13124          if ( $self->{_consecutive_blank_lines} >=
13125            $rOpts->{'maximum-consecutive-blank-lines'} );
13126        $self->{_consecutive_blank_lines}++;
13127        $self->{_consecutive_nonblank_lines} = 0;
13128    }
13129    else {
13130        $self->{_consecutive_blank_lines} = 0;
13131        $self->{_consecutive_nonblank_lines}++;
13132    }
13133    $self->write_line($a);
13134}
13135
13136sub write_line {
13137    my $self = shift;
13138    my $a    = shift;
13139    $self->{_line_sink_object}->write_line($a);
13140    if ( $a =~ /\n$/ ) { $self->{_output_line_number}++; }
13141
13142    # This calculation of excess line length ignores any internal tabs
13143    my $rOpts  = $self->{_rOpts};
13144    my $exceed = length($a) - $rOpts->{'maximum-line-length'} - 1;
13145    if ( $a =~ /^\t+/g ) {
13146        $exceed += pos($a) * ( $rOpts->{'indent-columns'} - 1 );
13147    }
13148
13149    # Note that we just incremented output line number to future value
13150    # so we must subtract 1 for current line number
13151    if ( length($a) > 1 + $self->{_max_output_line_length} ) {
13152        $self->{_max_output_line_length}    = length($a) - 1;
13153        $self->{_max_output_line_length_at} = $self->{_output_line_number} - 1;
13154    }
13155
13156    if ( $exceed > 0 ) {
13157        my $output_line_number = $self->{_output_line_number};
13158        $self->{_last_line_length_error}    = $exceed;
13159        $self->{_last_line_length_error_at} = $output_line_number - 1;
13160        if ( $self->{_line_length_error_count} == 0 ) {
13161            $self->{_first_line_length_error}    = $exceed;
13162            $self->{_first_line_length_error_at} = $output_line_number - 1;
13163        }
13164
13165        if (
13166            $self->{_last_line_length_error} > $self->{_max_line_length_error} )
13167        {
13168            $self->{_max_line_length_error}    = $exceed;
13169            $self->{_max_line_length_error_at} = $output_line_number - 1;
13170        }
13171
13172        if ( $self->{_line_length_error_count} < MAX_NAG_MESSAGES ) {
13173            $self->write_logfile_entry(
13174                "Line length exceeded by $exceed characters\n");
13175        }
13176        $self->{_line_length_error_count}++;
13177    }
13178
13179}
13180
13181sub report_line_length_errors {
13182    my $self                    = shift;
13183    my $rOpts                   = $self->{_rOpts};
13184    my $line_length_error_count = $self->{_line_length_error_count};
13185    if ( $line_length_error_count == 0 ) {
13186        $self->write_logfile_entry(
13187            "No lines exceeded $rOpts->{'maximum-line-length'} characters\n");
13188        my $max_output_line_length    = $self->{_max_output_line_length};
13189        my $max_output_line_length_at = $self->{_max_output_line_length_at};
13190        $self->write_logfile_entry(
13191"  Maximum output line length was $max_output_line_length at line $max_output_line_length_at\n"
13192        );
13193
13194    }
13195    else {
13196
13197        my $word = ( $line_length_error_count > 1 ) ? "s" : "";
13198        $self->write_logfile_entry(
13199"$line_length_error_count output line$word exceeded $rOpts->{'maximum-line-length'} characters:\n"
13200        );
13201
13202        $word = ( $line_length_error_count > 1 ) ? "First" : "";
13203        my $first_line_length_error    = $self->{_first_line_length_error};
13204        my $first_line_length_error_at = $self->{_first_line_length_error_at};
13205        $self->write_logfile_entry(
13206" $word at line $first_line_length_error_at by $first_line_length_error characters\n"
13207        );
13208
13209        if ( $line_length_error_count > 1 ) {
13210            my $max_line_length_error     = $self->{_max_line_length_error};
13211            my $max_line_length_error_at  = $self->{_max_line_length_error_at};
13212            my $last_line_length_error    = $self->{_last_line_length_error};
13213            my $last_line_length_error_at = $self->{_last_line_length_error_at};
13214            $self->write_logfile_entry(
13215" Maximum at line $max_line_length_error_at by $max_line_length_error characters\n"
13216            );
13217            $self->write_logfile_entry(
13218" Last at line $last_line_length_error_at by $last_line_length_error characters\n"
13219            );
13220        }
13221    }
13222}
13223
13224#####################################################################
13225#
13226# The PerlTidy::Debugger class shows line tokenization
13227#
13228#####################################################################
13229
13230package PerlTidy::Debugger;
13231
13232sub new {
13233
13234    my ( $class, $filename ) = @_;
13235
13236    bless {
13237        _debug_file        => $filename,
13238        _debug_file_opened => 0,
13239        _fh                => undef,
13240    }, $class;
13241}
13242
13243sub really_open_debug_file {
13244
13245    my $self       = shift;
13246    my $debug_file = $self->{_debug_file};
13247    my $fh;
13248    unless ( $fh = IO::File->new("> $debug_file") ) {
13249        warn("can't open $debug_file: $!\n");
13250    }
13251    $self->{_debug_file_opened} = 1;
13252    $self->{_fh}                = $fh;
13253    print $fh
13254      "Use -dump-token-types (-dtt) to get a list of token type codes\n";
13255}
13256
13257sub close_debug_file {
13258
13259    my $self = shift;
13260    my $fh   = $self->{_fh};
13261    if ( $self->{_debug_file_opened} ) {
13262
13263        close $self->{_fh};
13264    }
13265}
13266
13267sub write_debug_entry {
13268
13269    # This is a debug dump routine which may be modified as necessary
13270    # to dump tokens on a line-by-line basis.  The output will be written
13271    # to the .DEBUG file when the -D flag is entered.
13272    my $self           = shift;
13273    my $line_of_tokens = shift;
13274
13275    my $input_line        = $line_of_tokens->{_line_text};
13276    my $rtoken_type       = $line_of_tokens->{_rtoken_type};
13277    my $rtokens           = $line_of_tokens->{_rtokens};
13278    my $rlevels           = $line_of_tokens->{_rlevels};
13279    my $rslevels          = $line_of_tokens->{_rslevels};
13280    my $rblock_type       = $line_of_tokens->{_rblock_type};
13281    my $input_line_number = $line_of_tokens->{_line_number};
13282    my $line_type         = $line_of_tokens->{_line_type};
13283
13284    my ( $j, $num );
13285
13286    my $token_str              = "$input_line_number: ";
13287    my $reconstructed_original = "$input_line_number: ";
13288    my $block_str              = "$input_line_number: ";
13289
13290    #$token_str .= "$line_type: ";
13291    #$reconstructed_original .= "$line_type: ";
13292
13293    my $pattern   = "";
13294    my @next_char = ( '"', '"' );
13295    my $i_next    = 0;
13296    unless ( $self->{_debug_file_opened} ) { $self->really_open_debug_file() }
13297    my $fh = $self->{_fh};
13298
13299    for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {
13300
13301        # testing patterns
13302        if ( $$rtoken_type[$j] eq 'k' ) {
13303            $pattern .= $$rtokens[$j];
13304        }
13305        else {
13306            $pattern .= $$rtoken_type[$j];
13307        }
13308        $reconstructed_original .= $$rtokens[$j];
13309        $block_str .= "($$rblock_type[$j])";
13310        $num = length( $$rtokens[$j] );
13311        my $type_str = $$rtoken_type[$j];
13312
13313        # be sure there are no blank tokens (shouldn't happen)
13314        # This can only happen if a programming error has been made
13315        # because all valid tokens are non-blank
13316        if ( $type_str eq ' ' ) {
13317            print $fh "BLANK TOKEN on the next line\n";
13318            $type_str = $next_char[$i_next];
13319            $i_next   = 1 - $i_next;
13320        }
13321
13322        if ( length($type_str) == 1 ) {
13323            $type_str = $type_str x $num;
13324        }
13325        $token_str .= $type_str;
13326    }
13327
13328    # Write what you want here ...
13329    # print $fh "$input_line\n";
13330    # print $fh "$pattern\n";
13331    print $fh "$reconstructed_original\n";
13332    print $fh "$token_str\n";
13333
13334    #print $fh "$block_str\n";
13335}
13336
13337#####################################################################
13338#
13339# The PerlTidy::LineBuffer class supplies a 'get_line()'
13340# method for returning the next line to be parsed, as well as a
13341# 'peek_ahead()' method
13342#
13343# The input parameter is an object with a 'get_line()' method
13344# which returns the next line to be parsed
13345#
13346#####################################################################
13347
13348package PerlTidy::LineBuffer;
13349
13350sub new {
13351
13352    my $class              = shift;
13353    my $line_source_object = shift;
13354
13355    return bless {
13356        _line_source_object => $line_source_object,
13357        _rlookahead_buffer  => [],
13358    }, $class;
13359}
13360
13361sub peek_ahead {
13362    my $self               = shift;
13363    my $buffer_index       = shift;
13364    my $line               = undef;
13365    my $input_file_copy    = $self->{_input_file_copy};
13366    my $line_source_object = $self->{_line_source_object};
13367    my $rlookahead_buffer  = $self->{_rlookahead_buffer};
13368    if ( $buffer_index < scalar(@$rlookahead_buffer) ) {
13369        $line = $$rlookahead_buffer[$buffer_index];
13370    }
13371    else {
13372        $line = $line_source_object->get_line();
13373        push ( @$rlookahead_buffer, $line );
13374    }
13375    return $line;
13376}
13377
13378sub get_line {
13379    my $self               = shift;
13380    my $line               = undef;
13381    my $line_source_object = $self->{_line_source_object};
13382    my $rlookahead_buffer  = $self->{_rlookahead_buffer};
13383
13384    if ( scalar(@$rlookahead_buffer) ) {
13385        $line = shift @$rlookahead_buffer;
13386    }
13387    else {
13388        $line = $line_source_object->get_line();
13389    }
13390    return $line;
13391}
13392
13393########################################################################
13394#
13395# the PerlTidy::Tokenizer package is essentially a filter which
13396# reads lines of perl source code from a source object and provides
13397# corresponding tokenized lines through its get_line() method.  Lines
13398# flow from the source_object to the caller like this:
13399#
13400# source_object --> LineBuffer_object --> Tokenizer -->  calling routine
13401#   get_line()         get_line()           get_line()     line_of_tokens
13402#
13403# The source object can be any object with a get_line() method which
13404# supplies one line (a character string) perl call.
13405# The LineBuffer object is created by the Tokenizer.
13406# The Tokenizer returns a reference to a data structure 'line_of_tokens'
13407# containing one tokenized line for each call to its get_line() method.
13408#
13409# WARNING: This is not a real class yet.  Only one tokenizer my be used.
13410#
13411########################################################################
13412
13413package PerlTidy::Tokenizer;
13414
13415BEGIN {
13416
13417    # Caution: these debug flags produce a lot of output
13418    # They should all be 0 except when debugging small scripts
13419
13420    use constant TOKENIZER_DEBUG_FLAG_EXPECT   => 0;
13421    use constant TOKENIZER_DEBUG_FLAG_GUESS    => 0;
13422    use constant TOKENIZER_DEBUG_FLAG_NSCAN    => 0;
13423    use constant TOKENIZER_DEBUG_FLAG_QUOTE    => 0;
13424    use constant TOKENIZER_DEBUG_FLAG_SCAN_ID  => 0;
13425    use constant TOKENIZER_DEBUG_FLAG_TOKENIZE => 0;
13426
13427    my $debug_warning = sub {
13428        print "TOKENIZER_DEBUGGING with key $_[0]\n";
13429    };
13430
13431    TOKENIZER_DEBUG_FLAG_EXPECT   && $debug_warning->('EXPECT');
13432    TOKENIZER_DEBUG_FLAG_GUESS    && $debug_warning->('GUESS');
13433    TOKENIZER_DEBUG_FLAG_NSCAN    && $debug_warning->('NSCAN');
13434    TOKENIZER_DEBUG_FLAG_QUOTE    && $debug_warning->('QUOTE');
13435    TOKENIZER_DEBUG_FLAG_SCAN_ID  && $debug_warning->('SCAN_ID');
13436    TOKENIZER_DEBUG_FLAG_TOKENIZE && $debug_warning->('TOKENIZE');
13437
13438}
13439
13440sub make_regex {
13441
13442    # Given a string, make the corresponding regex with qr.
13443    # Versions of perl before 5.005 do not have qr,
13444    # so we will just return the string, which will work
13445    # but not be optimized.
13446    BEGIN {
13447        if ( $] < 5.005 ) {
13448            sub qr { $_[0] }
13449        }
13450    }
13451    qr($_[0]);
13452}
13453
13454use Carp;
13455use vars qw{
13456  $tokenizer_self
13457  $level_in_tokenizer
13458  $slevel_in_tokenizer
13459  $nesting_token_string
13460  $nesting_type_string
13461  $nesting_block_string
13462  $nesting_list_string
13463  $saw_negative_indentation
13464  $id_scan_state
13465  $last_nonblank_token
13466  $last_nonblank_type
13467  $last_nonblank_block_type
13468  $last_nonblank_container_type
13469  $last_nonblank_type_sequence
13470  $last_last_nonblank_token
13471  $last_last_nonblank_type
13472  $last_last_nonblank_block_type
13473  $last_last_nonblank_container_type
13474  $last_last_nonblank_type_sequence
13475  $last_nonblank_prototype
13476  $statement_type
13477  $identifier
13478  $in_quote
13479  $quote_type
13480  $quote_character
13481  $quote_pos
13482  $quote_depth
13483  $allowed_quote_modifiers
13484  $paren_depth
13485  @paren_type
13486  @paren_semicolon_count
13487  @paren_structural_type
13488  $brace_depth
13489  @brace_type
13490  @brace_structural_type
13491  @brace_context
13492  @brace_package
13493  $square_bracket_depth
13494  @square_bracket_type
13495  @square_bracket_structural_type
13496  @depth_array
13497  @starting_line_of_current_depth
13498  @current_depth
13499  @current_sequence_number
13500  @nesting_sequence_number
13501  @lower_case_labels_at
13502  $saw_v_string
13503  %is_constant
13504  %is_user_function
13505  %user_function_prototype
13506  %saw_function_definition
13507  $max_token_index
13508  $peeked_ahead
13509  $current_package
13510  $unexpected_error_count
13511  $input_line
13512  $input_line_number
13513  $rpretokens
13514  $rpretoken_map
13515  $rpretoken_type
13516  $want_paren
13517  $context
13518  @slevel_stack
13519  $ci_string_in_tokenizer
13520  $continuation_string_in_tokenizer
13521  $in_statement_continuation
13522  $started_looking_for_here_target_at
13523  $nearly_matched_here_target_at
13524
13525  $indirect_object_taker
13526  $block_list_operator
13527  $block_operator
13528  %expecting_operator_token
13529  %expecting_operator_types
13530  %expecting_term_types
13531  %expecting_term_token
13532  %is_block_function
13533  %is_block_list_function
13534  %is_digraph
13535  %is_file_test_operator
13536  %is_trigraph
13537  %is_valid_token_type
13538  %is_keyword
13539  %really_want_term
13540  @opening_brace_names
13541  @closing_brace_names
13542  %is_keyword_taking_list
13543};
13544
13545# possible values of operator_expected()
13546use constant TERM     => -1;
13547use constant UNKNOWN  => 0;
13548use constant OPERATOR => 1;
13549
13550# possible values of context
13551use constant SCALAR_CONTEXT  => -1;
13552use constant UNKNOWN_CONTEXT => 0;
13553use constant LIST_CONTEXT    => 1;
13554
13555# Maximum number of little messages; probably need not be changed.
13556use constant MAX_NAG_MESSAGES => 6;
13557
13558{
13559
13560    # methods to count instances
13561    my $_count = 0;
13562    sub get_count        { $_count; }
13563    sub _increment_count { ++$_count }
13564    sub _decrement_count { --$_count }
13565}
13566
13567sub DESTROY {
13568    $_[0]->_decrement_count();
13569}
13570
13571sub new {
13572
13573    my $class = shift;
13574
13575    # Note: 'tabs' and 'indent_columns' are temporary and should be
13576    # removed asap
13577    my %defaults = (
13578        source_object       => undef,
13579        debugger_object     => undef,
13580        diagnostics_object  => undef,
13581        logger_object       => undef,
13582        starting_level      => undef,
13583        indent_columns      => 4,
13584        tabs                => 0,
13585        look_for_hash_bang  => 0,
13586        trim_qw             => 1,
13587        look_for_autoloader => 1,
13588        look_for_selfloader => 1,
13589    );
13590    my %args = ( %defaults, @_ );
13591
13592    # we are given an object with a get_line() method to supply source lines
13593    my $source_object = $args{source_object};
13594
13595    # we create another object with a get_line() and peek_ahead() method
13596    my $line_buffer_object = PerlTidy::LineBuffer->new($source_object);
13597
13598    # Tokenizer state data is as follows:
13599    # _rhere_target_list    reference to list of here-doc targets
13600    # _here_doc_target      the target string for a here document
13601    # _here_quote_character the type of here-doc quoting (" ' ` or none)
13602    #                       to determine if interpolation is done
13603    # _quote_target         character we seek if chasing a quote
13604    # _line_start_quote     line where we started looking for a long quote
13605    # _in_here_doc          flag indicating if we are in a here-doc
13606    # _in_pod               flag set if we are in pod documentation
13607    # _in_error             flag set if we saw severe error (binary in script)
13608    # _in_data              flag set if we are in __DATA__ section
13609    # _in_end               flag set if we are in __END__ section
13610    # _in_format            flag set if we are in a format description
13611    # _in_quote             flag telling if we are chasing a quote
13612    # _starting_level       indentation level of first line
13613    # _input_tabstr         string denoting one indentation level of input file
13614    # _know_input_tabstr    flag indicating if we know _input_tabstr
13615    # _line_buffer_object   object with get_line() method to supply source code
13616    # _diagnostics_object   place to write debugging information
13617    $tokenizer_self = {
13618        _rhere_target_list    => undef,
13619        _in_here_doc          => 0,
13620        _here_doc_target      => "",
13621        _here_quote_character => "",
13622        _in_data              => 0,
13623        _in_end               => 0,
13624        _in_format            => 0,
13625        _in_error             => 0,
13626        _in_pod               => 0,
13627        _in_quote             => 0,
13628        _quote_target         => "",
13629        _line_start_quote     => -1,
13630        _starting_level       => $args{starting_level},
13631        _know_starting_level  => defined( $args{starting_level} ),
13632        _tabs                 => $args{tabs},
13633        _indent_columns       => $args{indent_columns},
13634        _look_for_hash_bang   => $args{look_for_hash_bang},
13635        _trim_qw              => $args{trim_qw},
13636        _input_tabstr         => "",
13637        _know_input_tabstr    => -1,
13638        _last_line_number     => 0,
13639        _saw_perl_dash_P      => 0,
13640        _saw_perl_dash_w      => 0,
13641        _saw_use_strict       => 0,
13642        _look_for_autoloader  => $args{look_for_autoloader},
13643        _look_for_selfloader  => $args{look_for_selfloader},
13644        _saw_autoloader       => 0,
13645        _saw_selfloader       => 0,
13646        _saw_hash_bang        => 0,
13647        _saw_end              => 0,
13648        _saw_data             => 0,
13649        _saw_lc_filehandle    => 0,
13650        _started_tokenizing   => 0,
13651        _line_buffer_object   => $line_buffer_object,
13652        _debugger_object      => $args{debugger_object},
13653        _diagnostics_object   => $args{diagnostics_object},
13654        _logger_object        => $args{logger_object},
13655    };
13656
13657    prepare_for_a_new_file();
13658    find_starting_indentation_level();
13659
13660    bless $tokenizer_self, $class;
13661
13662    # This is not a full class yet, so die if an attempt is made to
13663    # create more than one object.
13664
13665    if ( _increment_count() > 1 ) {
13666        confess
13667"Attempt to create more than 1 object in $class, which is not a true class yet\n";
13668    }
13669
13670    return $tokenizer_self;
13671
13672}
13673
13674# interface to PerlTidy::Logger routines
13675sub warning {
13676    my $logger_object = $tokenizer_self->{_logger_object};
13677    if ($logger_object) {
13678        $logger_object->warning(@_);
13679    }
13680}
13681
13682sub complain {
13683    my $logger_object = $tokenizer_self->{_logger_object};
13684    if ($logger_object) {
13685        $logger_object->complain(@_);
13686    }
13687}
13688
13689sub write_logfile_entry {
13690    my $logger_object = $tokenizer_self->{_logger_object};
13691    if ($logger_object) {
13692        $logger_object->write_logfile_entry(@_);
13693    }
13694}
13695
13696sub interrupt_logfile {
13697    my $logger_object = $tokenizer_self->{_logger_object};
13698    if ($logger_object) {
13699        $logger_object->interrupt_logfile();
13700    }
13701}
13702
13703sub resume_logfile {
13704    my $logger_object = $tokenizer_self->{_logger_object};
13705    if ($logger_object) {
13706        $logger_object->resume_logfile();
13707    }
13708}
13709
13710sub increment_brace_error {
13711    my $logger_object = $tokenizer_self->{_logger_object};
13712    if ($logger_object) {
13713        $logger_object->increment_brace_error();
13714    }
13715}
13716
13717sub report_definite_bug {
13718    my $logger_object = $tokenizer_self->{_logger_object};
13719    if ($logger_object) {
13720        $logger_object->report_definite_bug();
13721    }
13722}
13723
13724sub brace_warning {
13725    my $logger_object = $tokenizer_self->{_logger_object};
13726    if ($logger_object) {
13727        $logger_object->brace_warning(@_);
13728    }
13729}
13730
13731sub get_saw_brace_error {
13732    my $logger_object = $tokenizer_self->{_logger_object};
13733    if ($logger_object) {
13734        $logger_object->get_saw_brace_error();
13735    }
13736    else {
13737        0;
13738    }
13739}
13740
13741# interface to PerlTidy::Diagnostics routines
13742sub write_diagnostics {
13743    if ( $tokenizer_self->{_diagnostics_object} ) {
13744        $tokenizer_self->{_diagnostics_object}->write_diagnostics(@_);
13745    }
13746}
13747
13748sub report_tokenization_errors {
13749
13750    my $self = shift;
13751
13752    my $level = get_indentation_level();
13753    if ( $level != $tokenizer_self->{_starting_level} ) {
13754        warning("final indentation level: $level\n");
13755    }
13756
13757    check_final_nesting_depths();
13758
13759    if ( $tokenizer_self->{_look_for_hash_bang}
13760        && !$tokenizer_self->{_saw_hash_bang} )
13761    {
13762        warning(
13763            "hit EOF without seeing hash-bang line; maybe don't need -x?\n");
13764    }
13765
13766    if ( $tokenizer_self->{_in_format} ) {
13767        warning("hit EOF while in format description\n");
13768    }
13769
13770    # this check may be removed after a year or so
13771    if ( $tokenizer_self->{_saw_lc_filehandle} ) {
13772
13773        warning( <<'EOM' );
13774------------------------------------------------------------------
13775PLEASE NOTE: If you get this message, it is because perltidy noticed
13776possible ambiguous syntax at one or more places in your script, as
13777noted above.  The problem is with statements accepting indirect objects,
13778such as print and printf statements of the form
13779
13780    print bareword ( $etc
13781
13782Perltidy needs your help in deciding if 'bareword' is a filehandle or a
13783function call.  The problem is the space between 'bareword' and '('.  If
13784'bareword' is a function call, you should remove the trailing space.  If
13785'bareword' is a filehandle, you should avoid the opening paren or else
13786globally capitalize 'bareword' to be BAREWORD.  So the above line
13787would be:
13788
13789    print bareword( $etc    # function
13790or
13791    print bareword @list    # filehandle
13792or
13793    print BAREWORD ( $etc   # filehandle
13794
13795If you want to keep the line as it is, and are sure it is correct,
13796you can use -w=0 to prevent this message.
13797------------------------------------------------------------------
13798EOM
13799
13800        #It is very possible that this is syntax is unambiguous to perl, which
13801        #may know what 'bareword' is by parsing any 'use' and 'require' modules
13802        #that you have referenced, but perltidy does not currently parse other
13803        #modules, so it has not seen any function prototypes that they might
13804        #contain.  To prevent this message, use -w=0.
13805
13806    }
13807
13808    if ( $tokenizer_self->{_in_pod} ) {
13809
13810        # Just write log entry if this is after __END__ or __DATA__
13811        # because this happens to often, and it is not likely to be
13812        # a parsing error.
13813        if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
13814            write_logfile_entry(
13815"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
13816            );
13817        }
13818
13819        else {
13820            complain(
13821"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
13822            );
13823        }
13824
13825    }
13826
13827    if ( $tokenizer_self->{_in_here_doc} ) {
13828        my $here_doc_target = $tokenizer_self->{_here_doc_target};
13829        if ($here_doc_target) {
13830            warning(
13831"hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n"
13832            );
13833        }
13834        else {
13835            warning(
13836"hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string\n"
13837            );
13838        }
13839        if ($nearly_matched_here_target_at) {
13840            warning(
13841"NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n"
13842            );
13843        }
13844    }
13845
13846    if ( $tokenizer_self->{_in_quote} ) {
13847        my $line_start_quote = $tokenizer_self->{_line_start_quote};
13848        my $quote_target     = $tokenizer_self->{_quote_target};
13849        warning(
13850"hit EOF seeking end of quote/pattern starting at line $line_start_quote ending in $quote_target\n"
13851        );
13852    }
13853
13854    unless ( $tokenizer_self->{_saw_perl_dash_w} ) {
13855        if ( $] < 5.006 ) {
13856            write_logfile_entry("Suggest including '-w parameter'\n");
13857        }
13858        else {
13859            write_logfile_entry("Suggest including 'use warnings;'\n");
13860        }
13861    }
13862
13863    if ( $tokenizer_self->{_saw_perl_dash_P} ) {
13864        write_logfile_entry("Use of -P parameter for defines is discouraged\n");
13865    }
13866
13867    unless ( $tokenizer_self->{_saw_use_strict} ) {
13868        write_logfile_entry("Suggest including 'use strict;'\n");
13869    }
13870
13871    # it is suggested that lables have at least one upper case character
13872    # for legibility and to avoid code breakage as new keywords are introduced
13873    if (@lower_case_labels_at) {
13874        my $num = @lower_case_labels_at;
13875        write_logfile_entry(
13876            "Suggest using upper case characters in label(s)\n");
13877        local $" = ')(';
13878        write_logfile_entry("  defined at line(s): (@lower_case_labels_at)\n");
13879    }
13880}
13881
13882sub report_v_string {
13883
13884    # warn if this version can't handle v-strings
13885    my $tok = shift;
13886    $saw_v_string = $input_line_number;
13887    if ( $] < 5.006 ) {
13888        warning(
13889"Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n"
13890        );
13891    }
13892}
13893
13894sub know_input_tabstr {
13895    return ( $tokenizer_self->{_know_input_tabstr} == 1 );
13896}
13897
13898sub get_input_line_number {
13899    return $tokenizer_self->{_last_line_number};
13900}
13901
13902# returns the next tokenized line
13903sub get_line {
13904
13905    my $self = shift;
13906
13907    my $input_line = $tokenizer_self->{_line_buffer_object}->get_line();
13908
13909    return undef unless ($input_line);
13910
13911    $tokenizer_self->{_last_line_number}++;
13912
13913    # remove any control m; otherwise here-target's may not match;
13914    # trimming trailing white space would work too, but that would
13915    # change the original line
13916    $input_line =~ s/(\r|\035)*$//gi;
13917
13918    my $input_line_number = $tokenizer_self->{_last_line_number};
13919
13920    # create a data structure describing this line which will be
13921    # returned to the caller.
13922
13923    # _line_type codes are:
13924    #   SYSTEM         - system-specific code before hash-bang line
13925    #   CODE           - line of perl code (including comments)
13926    #   POD_START      - line starting pod, such as '=head'
13927    #   POD            - pod documentation text
13928    #   POD_END        - last line of pod section, '=cut'
13929    #   HERE           - text of here-document
13930    #   HERE_END       - last line of here-doc (target word)
13931    #   FORMAT         - format section
13932    #   FORMAT_END     - last line of format section, '.'
13933    #   DATA_START     - __DATA__ line
13934    #   DATA           - unidentified text following __DATA__
13935    #   END_START      - __END__ line
13936    #   END            - unidentified text following __END__
13937    #   ERROR          - we are in big trouble, probably not a perl script
13938
13939    # Other variables:
13940    #   _curly_brace_depth     - depth of curly braces at start of line
13941    #   _square_bracket_depth  - depth of square brackets at start of line
13942    #   _paren_depth           - depth of parens at start of line
13943    #   _starting_in_quote     - this line continues a multi-line quote
13944    #                            (so don't trim leading blanks!)
13945    #   _ending_in_quote       - this line ends in a multi-line quote
13946    #                            (so don't trim trailing blanks!)
13947
13948    my $line_of_tokens = {
13949        _line_type                => 'EOF',
13950        _line_text                => $input_line,
13951        _line_number              => $input_line_number,
13952        _rtoken_type              => undef,
13953        _rtokens                  => undef,
13954        _rlevels                  => undef,
13955        _rslevels                 => undef,
13956        _rblock_type              => undef,
13957        _rcontainer_type          => undef,
13958        _rcontainer_environment   => undef,
13959        _rtype_sequence           => undef,
13960        _rnesting_tokens          => undef,
13961        _rci_levels               => undef,
13962        _rnesting_blocks          => undef,
13963        _python_indentation_level => 0,
13964        _starting_in_quote        =>
13965          ( $tokenizer_self->{_in_quote} && ( $quote_type eq 'Q' ) ),
13966        _ending_in_quote      => 0,
13967        _curly_brace_depth    => $brace_depth,
13968        _square_bracket_depth => $square_bracket_depth,
13969        _paren_depth          => $paren_depth,
13970        _quote_character      => '',
13971    };
13972
13973    # must print line unchanged if we are in a here document
13974    if ( $tokenizer_self->{_in_here_doc} ) {
13975
13976        $line_of_tokens->{_line_type} = 'HERE';
13977        my $here_doc_target      = $tokenizer_self->{_here_doc_target};
13978        my $here_quote_character = $tokenizer_self->{_here_quote_character};
13979        my $candidate_target     = $input_line;
13980        chomp $candidate_target;
13981        if ( $candidate_target eq $here_doc_target ) {
13982            $nearly_matched_here_target_at = undef;
13983            $line_of_tokens->{_line_type} = 'HERE_END';
13984            write_logfile_entry("Exiting HERE document $here_doc_target\n");
13985
13986            my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
13987            if (@$rhere_target_list) {    # there can be multiple here targets
13988                ( $here_doc_target, $here_quote_character ) =
13989                  @{ shift @$rhere_target_list };
13990                $tokenizer_self->{_here_doc_target}      = $here_doc_target;
13991                $tokenizer_self->{_here_quote_character} =
13992                  $here_quote_character;
13993                write_logfile_entry(
13994                    "Entering HERE document $here_doc_target\n");
13995                $nearly_matched_here_target_at      = undef;
13996                $started_looking_for_here_target_at = $input_line_number;
13997            }
13998            else {
13999                $tokenizer_self->{_in_here_doc}          = 0;
14000                $tokenizer_self->{_here_doc_target}      = "";
14001                $tokenizer_self->{_here_quote_character} = "";
14002            }
14003        }
14004
14005        # check for error of extra whitespace
14006        else {
14007            $candidate_target =~ s/\s*$//;
14008            $candidate_target =~ s/^\s*//;
14009            if ( $candidate_target eq $here_doc_target ) {
14010                $nearly_matched_here_target_at = $input_line_number;
14011            }
14012        }
14013        return $line_of_tokens;
14014    }
14015
14016    # must print line unchanged if we are in a format section
14017    elsif ( $tokenizer_self->{_in_format} ) {
14018
14019        if ( $input_line =~ /^\.[\s#]*$/ ) {
14020            write_logfile_entry("Exiting format section\n");
14021            $tokenizer_self->{_in_format} = 0;
14022            $line_of_tokens->{_line_type} = 'FORMAT_END';
14023        }
14024        else {
14025            $line_of_tokens->{_line_type} = 'FORMAT';
14026        }
14027        return $line_of_tokens;
14028    }
14029
14030    # must print line unchanged if we are in pod documentation
14031    elsif ( $tokenizer_self->{_in_pod} ) {
14032
14033        $line_of_tokens->{_line_type} = 'POD';
14034        if ( $input_line =~ /^=cut/ ) {
14035            $line_of_tokens->{_line_type} = 'POD_END';
14036            write_logfile_entry("Exiting POD section\n");
14037            $tokenizer_self->{_in_pod} = 0;
14038        }
14039        if ( $input_line =~ /^\#\!.*perl\b/ ) {
14040            warning("Hash-bang in pod can cause perl to fail! \n");
14041        }
14042
14043        return $line_of_tokens;
14044    }
14045
14046    # must print line unchanged if we have seen a severe error (i.e., we
14047    # are seeing illegal tokens and connot continue.  Syntax errors do
14048    # not pass this route).  Calling routine can decide what to do, but
14049    # the default can be to just pass all lines as if they were after __END__
14050    elsif ( $tokenizer_self->{_in_error} ) {
14051        $line_of_tokens->{_line_type} = 'ERROR';
14052        return $line_of_tokens;
14053    }
14054
14055    # print line unchanged if we are __DATA__ section
14056    elsif ( $tokenizer_self->{_in_data} ) {
14057
14058        # ...but look for POD
14059        # Note that the _in_data and _in_end flags remain set
14060        # so that we return to that state after seeing the
14061        # end of a pod section
14062        if ( $input_line =~ /^=(?!cut)/ ) {
14063            $line_of_tokens->{_line_type} = 'POD_START';
14064            write_logfile_entry("Entering POD section\n");
14065            $tokenizer_self->{_in_pod} = 1;
14066            return $line_of_tokens;
14067        }
14068        else {
14069            $line_of_tokens->{_line_type} = 'DATA';
14070            return $line_of_tokens;
14071        }
14072    }
14073
14074    # print line unchanged if we are in __END__ section
14075    elsif ( $tokenizer_self->{_in_end} ) {
14076
14077        # ...but look for POD
14078        # Note that the _in_data and _in_end flags remain set
14079        # so that we return to that state after seeing the
14080        # end of a pod section
14081        if ( $input_line =~ /^=(?!cut)/ ) {
14082            $line_of_tokens->{_line_type} = 'POD_START';
14083            write_logfile_entry("Entering POD section\n");
14084            $tokenizer_self->{_in_pod} = 1;
14085            return $line_of_tokens;
14086        }
14087        else {
14088            $line_of_tokens->{_line_type} = 'END';
14089            return $line_of_tokens;
14090        }
14091    }
14092
14093    # check for a hash-bang line if we haven't seen one
14094    if ( !$tokenizer_self->{_saw_hash_bang} ) {
14095        if ( $input_line =~ /^\#\!.*perl\b/ ) {
14096            $tokenizer_self->{_saw_hash_bang} = $input_line_number;
14097
14098            # check for -w and -P flags
14099            if ( $input_line =~ /^\#\!.*perl\s.*-.*P/ ) {
14100                $tokenizer_self->{_saw_perl_dash_P} = 1;
14101            }
14102
14103            if ( $input_line =~ /^\#\!.*perl\s.*-.*w/ ) {
14104                $tokenizer_self->{_saw_perl_dash_w} = 1;
14105            }
14106
14107            if ( ( $input_line_number > 1 )
14108                && ( !$tokenizer_self->{_look_for_hash_bang} ) )
14109            {
14110
14111                # this is helpful for VMS systems; we may have accidentally
14112                # tokenized some DCL commands
14113                if ( $tokenizer_self->{_started_tokenizing} ) {
14114                    warning(
14115"There seems to be a hash-bang after line 1; do you need to run with -x ?\n"
14116                    );
14117                }
14118                else {
14119                    complain("Useless hash-bang after line 1\n");
14120                }
14121            }
14122
14123            # Report the leading hash-bang as a system line
14124            # This will prevent -dac from deleting it
14125            else {
14126                $line_of_tokens->{_line_type} = 'SYSTEM';
14127                return $line_of_tokens;
14128            }
14129        }
14130    }
14131
14132    # wait for a hash-bang before parsing if the user invoked us with -x
14133    if ( $tokenizer_self->{_look_for_hash_bang}
14134        && !$tokenizer_self->{_saw_hash_bang} )
14135    {
14136        $line_of_tokens->{_line_type} = 'SYSTEM';
14137        return $line_of_tokens;
14138    }
14139
14140    # now we know that it is ok to tokenize the line...
14141    # the line tokenizer will modify any of these private variables:
14142    #        _rhere_target_list
14143    #        _in_data
14144    #        _in_end
14145    #        _in_format
14146    #        _in_error
14147    #        _in_pod
14148    #        _in_quote
14149    my $ending_in_quote_last = $tokenizer_self->{_in_quote};
14150    tokenize_this_line($line_of_tokens);
14151
14152    # Now finish defining the return structure and return it
14153    $line_of_tokens->{_ending_in_quote} = $tokenizer_self->{_in_quote};
14154
14155    # handle severe error (binary data in script)
14156    if ( $tokenizer_self->{_in_error} ) {
14157        $tokenizer_self->{_in_quote} = 0;    # to avoid any more messages
14158        warning("Giving up after error\n");
14159        $line_of_tokens->{_line_type} = 'ERROR';
14160        reset_indentation_level(0);          # avoid error messages
14161        return $line_of_tokens;
14162    }
14163
14164    # handle start of pod documentation
14165    if ( $tokenizer_self->{_in_pod} ) {
14166
14167        # This gets tricky..above a __DATA__ or __END__ section, perl
14168        # accepts '=cut' as the start of pod section. But afterwards,
14169        # only pod utilities see it and they may ignore an =cut without
14170        # leading =head.  In any case, this isn't good.
14171        if ( $input_line =~ /^=cut\b/ ) {
14172            if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
14173                complain("=cut while not in pod ignored\n");
14174                $tokenizer_self->{_in_pod}    = 0;
14175                $line_of_tokens->{_line_type} = 'POD_STOP';
14176            }
14177            else {
14178                $line_of_tokens->{_line_type} = 'POD_END';
14179                complain(
14180"=cut starts a pod section .. this can fool pod utilities.\n"
14181                );
14182                write_logfile_entry("Entering POD section\n");
14183            }
14184        }
14185
14186        else {
14187            $line_of_tokens->{_line_type} = 'POD_START';
14188            write_logfile_entry("Entering POD section\n");
14189        }
14190
14191        return $line_of_tokens;
14192    }
14193
14194    # update indentation levels for log messages
14195    if ( $input_line !~ /^\s*$/ ) {
14196        my $rlevels                      = $line_of_tokens->{_rlevels};
14197        my $structural_indentation_level = $$rlevels[0];
14198        my ( $python_indentation_level, $msg ) =
14199          find_indentation_level( $input_line, $structural_indentation_level );
14200        if ($msg) { write_logfile_entry("$msg") }
14201        $line_of_tokens->{_python_indentation_level} =
14202          $python_indentation_level;
14203    }
14204
14205    # see if this line contains here doc targets
14206    my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
14207    if (@$rhere_target_list) {
14208
14209        #my $here_doc_target = shift @$rhere_target_list;
14210        my ( $here_doc_target, $here_quote_character ) =
14211          @{ shift @$rhere_target_list };
14212        $tokenizer_self->{_in_here_doc}          = 1;
14213        $tokenizer_self->{_here_doc_target}      = $here_doc_target;
14214        $tokenizer_self->{_here_quote_character} = $here_quote_character;
14215        write_logfile_entry("Entering HERE document $here_doc_target\n");
14216        $started_looking_for_here_target_at = $input_line_number;
14217    }
14218
14219    # NOTE: __END__ and __DATA__ statements are written unformatted
14220    # because they can theoretically contain additional characters
14221    # which are not tokenized (and cannot be read with <DATA> either!).
14222    if ( $tokenizer_self->{_in_data} ) {
14223        $line_of_tokens->{_line_type} = 'DATA_START';
14224        write_logfile_entry("Starting __DATA__ section\n");
14225        $tokenizer_self->{_saw_data} = 1;
14226
14227        # keep parsing after __DATA__ if use SelfLoader was seen
14228        if ( $tokenizer_self->{_saw_selfloader} ) {
14229            $tokenizer_self->{_in_data} = 0;
14230            write_logfile_entry(
14231                "SelfLoader seen, continuing; -nlsl deactivates\n");
14232        }
14233
14234        return $line_of_tokens;
14235    }
14236
14237    elsif ( $tokenizer_self->{_in_end} ) {
14238        $line_of_tokens->{_line_type} = 'END_START';
14239        write_logfile_entry("Starting __END__ section\n");
14240        $tokenizer_self->{_saw_end} = 1;
14241
14242        # keep parsing after __END__ if use AutoLoader was seen
14243        if ( $tokenizer_self->{_saw_autoloader} ) {
14244            $tokenizer_self->{_in_end} = 0;
14245            write_logfile_entry(
14246                "AutoLoader seen, continuing; -nlal deactivates\n");
14247        }
14248        return $line_of_tokens;
14249    }
14250
14251    # now, finally, we know that this line is type 'CODE'
14252    $line_of_tokens->{_line_type} = 'CODE';
14253
14254    # remember if we have seen any real code
14255    if ( !$tokenizer_self->{_started_tokenizing}
14256        && $input_line !~ /^\s*$/
14257        && $input_line !~ /^\s*#/ )
14258    {
14259        $tokenizer_self->{_started_tokenizing} = 1;
14260    }
14261
14262    if ( $tokenizer_self->{_debugger_object} ) {
14263        $tokenizer_self->{_debugger_object}->write_debug_entry($line_of_tokens);
14264    }
14265
14266    # Note: if keyword 'format' occurs in this line code, it is still CODE
14267    # (keyword 'format' need not start a line)
14268    if ( $tokenizer_self->{_in_format} ) {
14269        write_logfile_entry("Entering format section\n");
14270    }
14271
14272    if ( $tokenizer_self->{_in_quote}
14273        and ( $tokenizer_self->{_line_start_quote} < 0 ) )
14274    {
14275
14276        if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) {
14277            $tokenizer_self->{_line_start_quote} = $input_line_number;
14278            $tokenizer_self->{_quote_target}     = $quote_target;
14279            write_logfile_entry(
14280                "Start multi-line quote or pattern ending in $quote_target\n");
14281        }
14282    }
14283    elsif ( ( $tokenizer_self->{_line_start_quote} >= 0 )
14284        and !$tokenizer_self->{_in_quote} )
14285    {
14286        $tokenizer_self->{_line_start_quote} = -1;
14287        write_logfile_entry("End of multi-line quote or pattern\n");
14288    }
14289
14290    # we are returning a line of CODE
14291    return $line_of_tokens;
14292}
14293
14294sub find_starting_indentation_level {
14295
14296    my $starting_level    = 0;
14297    my $know_input_tabstr = -1;    # flag for find_indentation_level
14298
14299    # use value if given as parameter
14300    if ( $tokenizer_self->{_know_starting_level} ) {
14301        $starting_level = $tokenizer_self->{_starting_level};
14302    }
14303
14304    # if we know there is a hash_bang line, the level must be zero
14305    elsif ( $tokenizer_self->{_look_for_hash_bang} ) {
14306        $tokenizer_self->{_know_starting_level} = 1;
14307    }
14308
14309    # otherwise figure it out from the input file
14310    else {
14311        my $line;
14312        my $i = 0;
14313        my $structural_indentation_level = -1; # flag for find_indentation_level
14314
14315        my $msg = "";
14316        while ( $line =
14317            $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
14318        {
14319
14320            # if first line is #! then assume starting level is zero
14321            if ( $i == 1 && $line =~ /^\#\!/ ) {
14322                $starting_level = 0;
14323                last;
14324            }
14325            next if ( $line =~ /^\s*#/ );      # must not be comment
14326            next if ( $line =~ /^\s*$/ );      # must not be blank
14327            ( $starting_level, $msg ) =
14328              find_indentation_level( $line, $structural_indentation_level );
14329            if ($msg) { write_logfile_entry("$msg") }
14330            last;
14331        }
14332        $msg = "Line $i implies starting-indentation-level = $starting_level\n";
14333
14334        if ( $starting_level > 0 ) {
14335
14336            my $input_tabstr = $tokenizer_self->{_input_tabstr};
14337            if ( $input_tabstr eq "\t" ) {
14338                $msg .= "by guessing input tabbing uses 1 tab per level\n";
14339            }
14340            else {
14341                my $cols = length($input_tabstr);
14342                $msg .=
14343                  "by guessing input tabbing uses $cols blanks per level\n";
14344            }
14345        }
14346        write_logfile_entry("$msg");
14347    }
14348    $tokenizer_self->{_starting_level} = $starting_level;
14349    reset_indentation_level($starting_level);
14350}
14351
14352=pod
14353
14354Find indentation level given a input line.  At the same time, try to
14355figure out the input tabbing scheme.
14356
14357There are two types of calls:
14358
14359Type 1: $structural_indentation_level < 0
14360 In this case we have to guess $input_tabstr to figure out the level.
14361
14362Type 2: $structural_indentation_level >= 0
14363 In this case the level of this line is known, and this routine can
14364 update the tabbing string, if still unknown, to make the level correct.
14365
14366=cut
14367
14368sub find_indentation_level {
14369    my ( $line, $structural_indentation_level ) = @_;
14370    my $level = 0;
14371    my $msg   = "";
14372
14373    my $know_input_tabstr = $tokenizer_self->{_know_input_tabstr};
14374    my $input_tabstr      = $tokenizer_self->{_input_tabstr};
14375
14376    # find leading whitespace
14377    my $leading_whitespace = ( $line =~ /^(\s*)/ ) ? $1 : "";
14378
14379    # make first guess at input tabbing scheme if necessary
14380    if ( $know_input_tabstr < 0 ) {
14381
14382        $know_input_tabstr = 0;
14383
14384        if ( $tokenizer_self->{_tabs} ) {
14385            $input_tabstr = "\t";
14386            if ( length($leading_whitespace) > 0 ) {
14387                if ( $leading_whitespace !~ /\t/ ) {
14388
14389                    my $cols = $tokenizer_self->{_indent_columns};
14390
14391                    if ( length($leading_whitespace) < $cols ) {
14392                        $cols = length($leading_whitespace);
14393                    }
14394                    $input_tabstr = " " x $cols;
14395                }
14396            }
14397        }
14398        else {
14399            $input_tabstr = " " x $tokenizer_self->{_indent_columns};
14400
14401            if ( length($leading_whitespace) > 0 ) {
14402                if ( $leading_whitespace =~ /^\t/ ) {
14403                    $input_tabstr = "\t";
14404                }
14405            }
14406        }
14407        $tokenizer_self->{_know_input_tabstr} = $know_input_tabstr;
14408        $tokenizer_self->{_input_tabstr}      = $input_tabstr;
14409    }
14410
14411    # determine the input tabbing scheme if possible
14412    if ( ( $know_input_tabstr == 0 )
14413        && ( length($leading_whitespace) > 0 )
14414        && ( $structural_indentation_level > 0 ) )
14415    {
14416        my $saved_input_tabstr = $input_tabstr;
14417
14418        # check for common case of one tab per indentation level
14419        if ( $leading_whitespace eq "\t" x $structural_indentation_level ) {
14420            if ( $leading_whitespace eq "\t" x $structural_indentation_level ) {
14421                $input_tabstr = "\t";
14422                $msg          = "Guessing old indentation was tab character\n";
14423            }
14424        }
14425
14426        else {
14427
14428            # detab any tabs based on 8 blanks per tab
14429            my $entabbed = "";
14430            if ( $leading_whitespace =~ s/^\t+/        /g ) {
14431                $entabbed = "entabbed";
14432            }
14433
14434            # now compute tabbing from number of spaces
14435            my $columns =
14436              length($leading_whitespace) / $structural_indentation_level;
14437            if ( $columns == int $columns ) {
14438                $msg =
14439                  "Guessing old indentation was $columns $entabbed spaces\n";
14440            }
14441            else {
14442                $columns = int $columns;
14443                $msg     =
14444"old indentation is unclear, using $columns $entabbed spaces\n";
14445            }
14446            $input_tabstr = " " x $columns;
14447        }
14448        $know_input_tabstr = 1;
14449        $tokenizer_self->{_know_input_tabstr} = $know_input_tabstr;
14450        $tokenizer_self->{_input_tabstr}      = $input_tabstr;
14451
14452        # see if mistakes were made
14453        if ( ( $tokenizer_self->{_starting_level} > 0 )
14454            && !$tokenizer_self->{_know_starting_level} )
14455        {
14456
14457            if ( $input_tabstr ne $saved_input_tabstr ) {
14458                complain(
14459"I made a bad starting level guess; rerun with a value for -sil \n"
14460                );
14461            }
14462        }
14463    }
14464
14465    # use current guess at input tabbing to get input indentation level
14466    #
14467    # Patch to handle a common case of entabbed leading whitespace
14468    # If the leading whitespace equals 4 spaces and we also have
14469    # tabs, detab the input whitespace assuming 8 spaces per tab.
14470    if ( length($input_tabstr) == 4 ) {
14471        $leading_whitespace =~ s/^\t+/        /g;
14472    }
14473
14474    if ( ( my $len_tab = length($input_tabstr) ) > 0 ) {
14475        my $pos = 0;
14476
14477        while ( substr( $leading_whitespace, $pos, $len_tab ) eq $input_tabstr )
14478        {
14479            $pos += $len_tab;
14480            $level++;
14481        }
14482    }
14483    return ( $level, $msg );
14484}
14485
14486sub dump_token_types {
14487    my $class = shift;
14488    my $fh    = shift;
14489
14490    # This should be the latest list of token types in use
14491    # adding NEW_TOKENS: add a comment here
14492    print $fh <<'END_OF_LIST';
14493
14494Here is a list of the token types currently used.
14495For the following tokens, the "type" of a token is just the token itself.
14496
14497.. :: << >> ** && .. ||  -> => += -= .= %= &= |= ^= *= <>
14498( ) <= >= == =~ !~ != ++ -- /= x=
14499... **= <<= >>= &&= ||= <=>
14500, + - / * | % ! x ~ = \ ? : . < > ^ &
14501
14502The following additional token types are defined:
14503
14504 type    meaning
14505    b    blank (white space)
14506    {    indent: opening structural curly brace or square bracket or paren
14507         (code block, anonymous hash reference, or anonymous array reference)
14508    }    outdent: right structural curly brace or square bracket or paren
14509    [    left non-structural square bracket (enclosing an array index)
14510    ]    right non-structural square bracket
14511    (    left non-structural paren (all but a list right of an =)
14512    )    right non-structural parena
14513    L    left non-structural curly brace (enclosing a key)
14514    R    right non-structural curly brace
14515    ;    terminal semicolon
14516    f    indicates a semicolon in a "for" statement
14517    h    here_doc operator <<
14518    #    a comment
14519    Q    indicates a quote or pattern
14520    q    indicates a qw quote block
14521    k    a perl keyword
14522    C    user-defined constant or constant function (with void prototype = ())
14523    U    user-defined function taking parameters
14524    G    user-defined function taking block parameter (like grep/map/eval)
14525    M    (unused, but reserved for subroutine definition name)
14526    P    (unused, but -html uses it to label pod text)
14527    t    type indicater such as %,$,@,*,&,sub
14528    w    bare word (perhaps a subroutine call)
14529    i    identifier of some type (with leading %, $, @, *, &, sub )
14530    n    a number
14531    v    a v-string
14532    F    a file test operator (like -e)
14533    Y    File handle
14534    Z    identifier in indirect object slot: may be file handle, object
14535    J    LABEL:  code block label
14536    j    LABEL after next, last, redo, goto
14537    p    unary +
14538    m    unary -
14539    pp   pre-increment operator ++
14540    mm   pre-decrement operator --
14541END_OF_LIST
14542}
14543
14544# This is a currently unused debug routine
14545sub dump_functions {
14546
14547    my $fh = *STDOUT;
14548    my ( $pkg, $sub );
14549    foreach $pkg ( keys %is_user_function ) {
14550        print $fh "\nnon-constant subs in package $pkg\n";
14551
14552        foreach $sub ( keys %{ $is_user_function{$pkg} } ) {
14553            my $msg = "";
14554            if ( $is_block_list_function{$pkg}{$sub} ) {
14555                $msg = 'block_list';
14556            }
14557
14558            if ( $is_block_function{$pkg}{$sub} ) {
14559                $msg = 'block';
14560            }
14561            print $fh "$sub $msg\n";
14562        }
14563    }
14564
14565    foreach $pkg ( keys %is_constant ) {
14566        print $fh "\nconstants and constant subs in package $pkg\n";
14567
14568        foreach $sub ( keys %{ $is_constant{$pkg} } ) {
14569            print $fh "$sub\n";
14570        }
14571    }
14572}
14573
14574sub prepare_for_a_new_file {
14575    $saw_negative_indentation = 0;
14576    $id_scan_state            = '';
14577    $statement_type           = '';     # currently either '' or 'use'
14578    $last_nonblank_token      = ';';    # the only possible starting state which
14579    $last_nonblank_type       = ';';    # will make a leading brace a code block
14580    $last_nonblank_block_type = '';
14581    $last_nonblank_container_type      = '';
14582    $last_nonblank_type_sequence       = '';
14583    $last_last_nonblank_token          = ';';
14584    $last_last_nonblank_type           = ';';
14585    $last_last_nonblank_block_type     = '';
14586    $last_last_nonblank_container_type = '';
14587    $last_last_nonblank_type_sequence  = '';
14588    $last_nonblank_prototype           = "";
14589    $identifier                        = '';
14590    $in_quote   = 0;     # flag telling if we are chasing a quote, and what kind
14591    $quote_type = 'Q';
14592    $quote_character = "";    # character we seek if chasing a quote
14593    $quote_pos   = 0;  # next character index to check for case of alphanum char
14594    $quote_depth = 0;
14595    $allowed_quote_modifiers = "";
14596    $paren_depth             = 0;
14597    $brace_depth             = 0;
14598    $square_bracket_depth    = 0;
14599    $current_package         = "main";
14600    @current_depth[ 0 .. $#closing_brace_names ] =
14601      (0) x scalar @closing_brace_names;
14602    @nesting_sequence_number[ 0 .. $#closing_brace_names ] =
14603      ( 0 .. $#closing_brace_names );
14604    @current_sequence_number = ();
14605
14606    $paren_type[$paren_depth]            = '';
14607    $paren_semicolon_count[$paren_depth] = 0;
14608    $brace_type[$brace_depth] = ';';    # identify opening brace as code block
14609    $brace_structural_type[$brace_depth]                   = '';
14610    $brace_context[$brace_depth]                           = UNKNOWN_CONTEXT;
14611    $paren_structural_type[$brace_depth]                   = '';
14612    $square_bracket_type[$square_bracket_depth]            = '';
14613    $square_bracket_structural_type[$square_bracket_depth] = '';
14614    $brace_package[$paren_depth]                           = $current_package;
14615    %is_constant                      = ();             # user-defined constants
14616    %is_user_function                 = ();             # user-defined functions
14617    %user_function_prototype          = ();             # their prototypes
14618    %is_block_function                = ();
14619    %is_block_list_function           = ();
14620    %saw_function_definition          = ();
14621    $unexpected_error_count           = 0;
14622    $want_paren                       = "";
14623    $context                          = UNKNOWN_CONTEXT;
14624    @slevel_stack                     = ();
14625    $ci_string_in_tokenizer           = "";
14626    $continuation_string_in_tokenizer = "0";
14627    $in_statement_continuation        = 0;
14628    @lower_case_labels_at             = ();
14629    $saw_v_string         = 0;      # for warning of v-strings on older perl
14630    $nesting_token_string = "";
14631    $nesting_type_string  = "";
14632    $nesting_block_string = '1';    # initially in a block
14633    $nesting_list_string  = '0';    # initially not in a list
14634    $nearly_matched_here_target_at = undef;
14635}
14636
14637sub get_quote_target {
14638    return matching_end_token($quote_character);
14639}
14640
14641sub get_indentation_level {
14642    return $level_in_tokenizer;
14643}
14644
14645sub reset_indentation_level {
14646    $level_in_tokenizer  = $_[0];
14647    $slevel_in_tokenizer = $_[0];
14648    push @slevel_stack, $slevel_in_tokenizer;
14649}
14650
14651{    # begin closure tokenize_this_line
14652
14653    use constant BRACE          => 0;
14654    use constant SQUARE_BRACKET => 1;
14655    use constant PAREN          => 2;
14656    use constant QUESTION_COLON => 3;
14657
14658    my (
14659        $block_type, $container_type,  $correct_expecting,
14660        $expecting,  $here_doc_target, $here_quote_character,
14661        $i,          $i_tok,           $last_nonblank_i,
14662        $next_tok,   $next_type,       $prototype,
14663        $rtoken_map, $rtoken_type,     $rtokens,
14664        $tok,        $type,            $type_sequence,
14665    );
14666
14667    my @here_target_list = ();    # list of here-doc target strings
14668
14669    # ------------------------------------------------------------
14670    # beginning of various scanner interfaces to simplify coding
14671    # ------------------------------------------------------------
14672    sub scan_bare_identifier {
14673        ( $i, $tok, $type, $prototype ) =
14674          scan_bare_identifier_do( $input_line, $i, $tok, $type, $prototype,
14675            $rtoken_map );
14676    }
14677
14678    sub scan_identifier {
14679        ( $i, $tok, $type, $id_scan_state, $identifier ) =
14680          scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens );
14681    }
14682
14683    sub scan_id {
14684        ( $i, $tok, $type, $id_scan_state ) =
14685          scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map,
14686            $id_scan_state );
14687    }
14688
14689    my $number;
14690
14691    sub scan_number {
14692        ( $i, $type, $number ) =
14693          scan_number_do( $input_line, $i, $rtoken_map, $type );
14694    }
14695
14696    # a sub to warn if token found where term expected
14697    sub error_if_expecting_TERM {
14698        if ( $expecting == TERM ) {
14699            if ( $really_want_term{$last_nonblank_type} ) {
14700                unexpected( $tok, "term", $i_tok, $last_nonblank_i );
14701                1;
14702            }
14703        }
14704    }
14705
14706    # a sub to warn if token found where operator expected
14707    sub error_if_expecting_OPERATOR {
14708        if ( $expecting == OPERATOR ) {
14709            my $thing = defined $_[0] ? $_[0] : $tok;
14710            unexpected( $thing, "operator", $i_tok, $last_nonblank_i );
14711            if ( $i_tok == 0 ) {
14712                interrupt_logfile();
14713                warning("Missing ';' above?\n");
14714                resume_logfile();
14715            }
14716            1;
14717        }
14718    }
14719
14720    # ------------------------------------------------------------
14721    # end scanner interfaces
14722    # ------------------------------------------------------------
14723
14724    # ------------------------------------------------------------
14725    # begin hash of code for handling most token types
14726    # ------------------------------------------------------------
14727    my $tokenization_code = {
14728        '>' => sub {
14729            error_if_expecting_TERM()
14730              if ( $expecting == TERM );
14731        },
14732        '|' => sub {
14733            error_if_expecting_TERM()
14734              if ( $expecting == TERM );
14735        },
14736        '$' => sub {
14737
14738            # start looking for a scalar
14739            error_if_expecting_OPERATOR("Scalar")
14740              if ( $expecting == OPERATOR );
14741            scan_identifier();
14742
14743            if ( $identifier eq '$^W' ) {
14744                $tokenizer_self->{_saw_perl_dash_w} = 1;
14745            }
14746
14747            # Check for indentifier in indirect object slot
14748            # (vorboard.pl, sort.t)
14749            # /^(print|printf|sort|exec|system)$/ )
14750            if (
14751                ( $last_nonblank_token =~ /$indirect_object_taker/ )
14752
14753                || ( ( $last_nonblank_token eq '(' )
14754                    && ( $paren_type[$paren_depth] =~ /$indirect_object_taker/ )
14755                ) || ( $last_nonblank_type =~ /^[Uw]$/ )    # possible object
14756              )
14757            {
14758                $type = 'Z';
14759            }
14760        },
14761        '(' => sub {
14762
14763            ++$paren_depth;
14764            $paren_semicolon_count[$paren_depth] = 0;
14765            if ($want_paren) {
14766                $container_type = $want_paren;
14767                $want_paren     = "";
14768            }
14769            else {
14770                $container_type = $last_nonblank_token;
14771            }
14772            $paren_type[$paren_depth] = $container_type;
14773            $type_sequence = increase_nesting_depth( PAREN, $i_tok );
14774
14775            #-------------------------------------------------------------
14776            # FUTURE UPDATE: give almost all parens full indentation.
14777            # parens are trouble because the user is free to add almost
14778            # as many as desired, even if they are not necessary.   And
14779            # they can be misleading, because perl flattens lists.
14780            #$type = '{';
14781            #-------------------------------------------------------------
14782
14783            #print "at '(' i=$i last_type=$last_nonblank_type last_tok=$last_nonblank_token\n";
14784
14785            if ( $context == LIST_CONTEXT && $last_nonblank_token eq '=' ) {
14786                $type = '{';
14787            }
14788
14789            # propagate types down through nested parens
14790            # for example: the second paren in 'if ((' would be structural
14791            # since the first is.
14792
14793            elsif ( $last_nonblank_token eq '(' ) {
14794                $type = $last_nonblank_type;
14795            }
14796
14797            # make most lists structural, including
14798            # sub call parameter lists and &&, ||, !
14799
14800=pod
14801                NOTE: type ',' is not here because it causes subtle
14802                problems with continuation indentation for something
14803                like this, where the first 'or' will not get indented.
14804
14805                    assert(
14806                        __LINE__,
14807                        ( not defined $check )
14808                          or ref $check
14809                          or $check eq "new"
14810                          or $check eq "old",
14811                    );
14812=cut
14813
14814            # TESTING: added '.' and '=' and F
14815            elsif ( $last_nonblank_type =~ /^([xwiZUF!=\?:\.]|\|\||\&\&)$/ ) {
14816                $type = '{';
14817            }
14818
14819            ############################################
14820            # TESTING:
14821            ##elsif ( $last_nonblank_type !~ /^[,]$/ ) {
14822            ##    $type = '{';
14823            ##}
14824            ############################################
14825
14826            # paren after keyword..
14827            # map1.t
14828            elsif ( $last_nonblank_type eq 'k' ) { $type = '{' }
14829
14830            if ( $last_nonblank_type eq ')' ) {
14831                warning(
14832                    "Syntax error? found token '$last_nonblank_type' then '('\n"
14833                );
14834            }
14835            $paren_structural_type[$paren_depth] = $type;
14836
14837        },
14838        ')' => sub {
14839            $type_sequence = decrease_nesting_depth( PAREN, $i_tok );
14840
14841            if ( $paren_structural_type[$paren_depth] eq '{' ) {
14842                $type = '}';
14843            }
14844
14845            $container_type = $paren_type[$paren_depth];
14846            if ( $paren_type[$paren_depth] =~ /^(for|foreach)$/ ) {
14847                my $num_sc = $paren_semicolon_count[$paren_depth];
14848                if ( $num_sc > 0 && $num_sc != 2 ) {
14849                    warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n");
14850                }
14851            }
14852
14853            if ( $paren_depth > 0 ) { $paren_depth-- }
14854        },
14855        ',' => sub {
14856            if ( $last_nonblank_type eq ',' ) {
14857                complain("Repeated ','s \n");
14858            }
14859##                FIXME: need to move this elsewhere, perhaps check after a '('
14860##                elsif ($last_nonblank_token eq '(') {
14861##                    warning("Leading ','s illegal in some versions of perl\n");
14862##                }
14863        },
14864        ';' => sub {
14865            $context        = UNKNOWN_CONTEXT;
14866            $statement_type = '';
14867
14868            if ( $paren_type[$paren_depth] =~ /^(for|foreach)$/ )
14869            {    # mark ; in for loop
14870
14871                # Be careful: we do not want a semicolon such as the
14872                # following to be included:
14873                #
14874                #    for (sort {strcoll($a,$b);} keys %investments) {
14875
14876                if ( $brace_depth == $depth_array[PAREN][BRACE][$paren_depth]
14877                    && $square_bracket_depth ==
14878                    $depth_array[PAREN][SQUARE_BRACKET][$paren_depth] )
14879                {
14880
14881                    $type = 'f';
14882                    $paren_semicolon_count[$paren_depth]++;
14883                }
14884            }
14885
14886        },
14887        '"' => sub {
14888            error_if_expecting_OPERATOR("String")
14889              if ( $expecting == OPERATOR );
14890            $in_quote                = 1;
14891            $type                    = 'Q';
14892            $allowed_quote_modifiers = "";
14893        },
14894        "'" => sub {
14895            error_if_expecting_OPERATOR("String")
14896              if ( $expecting == OPERATOR );
14897            $in_quote                = 1;
14898            $type                    = 'Q';
14899            $allowed_quote_modifiers = "";
14900        },
14901        '`' => sub {
14902            error_if_expecting_OPERATOR("String")
14903              if ( $expecting == OPERATOR );
14904            $in_quote                = 1;
14905            $type                    = 'Q';
14906            $allowed_quote_modifiers = "";
14907        },
14908        '/' => sub {
14909            my $is_pattern;
14910
14911            if ( $expecting == UNKNOWN ) {    # indeterminte, must guess..
14912                my $msg;
14913                ( $is_pattern, $msg ) =
14914                  guess_if_pattern_or_division( $i, $rtokens, $rtoken_map );
14915
14916                if ($msg) {
14917                    write_diagnostics("DIVIDE:$msg\n");
14918                    write_logfile_entry($msg);
14919                }
14920            }
14921            else { $is_pattern = ( $expecting == TERM ) }
14922
14923            if ($is_pattern) {
14924                $in_quote                = 1;
14925                $type                    = 'Q';
14926                $allowed_quote_modifiers = '[cgimosx]';
14927            }
14928            else {    # not a pattern; check for a /= token
14929
14930                if ( $$rtokens[ $i + 1 ] eq '=' ) {    # form token /=
14931                    $i++;
14932                    $tok  = '/=';
14933                    $type = $tok;
14934                }
14935
14936                #DEBUG - collecting info on what tokens follow a divide
14937                # for development of guessing algorithm
14938                #if ( numerator_expected( $i, $rtokens ) < 0 ) {
14939                #    #write_diagnostics( "DIVIDE? $input_line\n" );
14940                #}
14941            }
14942        },
14943        '{' => sub {
14944
14945            # if we just saw a ')', we will label this block with
14946            # its type.  We need to do this to allow sub
14947            # code_block_type to determine if this brace starts a
14948            # code block or anonymous hash.  (The type of a paren
14949            # pair is the preceding token, such as 'if', 'else',
14950            # etc).
14951            $container_type = "";
14952            if ( $last_nonblank_token eq ')' ) {
14953                $last_nonblank_token = $paren_type[ $paren_depth + 1 ];
14954
14955                # defensive move in case of a nesting error (pbug.t)
14956                # in which this ')' had no previous '('
14957                # this nesting error will have been caught
14958                if ( !defined($last_nonblank_token) ) {
14959                    $last_nonblank_token = 'if';
14960                }
14961
14962                # check for syntax error here;
14963                # expecting: (if|elsif|while|until|for|foreach)
14964                # Delete this if it is too redundant
14965                #unless ( $is_keyword{$last_nonblank_token} ) {
14966                unless ( $last_nonblank_token =~
14967                    /^(if|elsif|unless|while|until|for|foreach)$/ )
14968                {
14969                    warning(
14970"syntax error at ') {', didn't see (if|elsif|unless|while|until|for|foreach)\n"
14971                    );
14972                }
14973            }
14974
14975            # now identify which of the three possible types of
14976            # curly braces we have: hash index container, anonymous
14977            # hash reference, or code block.
14978
14979            # non-structural (hash index) curly brace pair
14980            # get marked 'L' and 'R'
14981            if ( is_non_structural_brace() ) {
14982                $type = 'L';
14983            }
14984
14985            # code and anonymous hash have the same type, '{', but are
14986            # distinguished by 'block_type',
14987            # which will be blank for an anonymous hash
14988            else {
14989                $block_type = code_block_type();
14990            }
14991            $brace_type[ ++$brace_depth ] = $block_type;
14992            $brace_package[$brace_depth] = $current_package;
14993            $type_sequence = increase_nesting_depth( BRACE, $i_tok );
14994            $brace_structural_type[$brace_depth] = $type;
14995            $brace_context[$brace_depth]         = $context;
14996        },
14997        '}' => sub {
14998            $block_type = $brace_type[$brace_depth];
14999            if ($block_type) { $statement_type = '' }
15000
15001            if ( defined( $brace_package[$brace_depth] ) ) {
15002                $current_package = $brace_package[$brace_depth];
15003            }
15004
15005            # can happen on brace error (caught elsewhere)
15006            else {
15007            }
15008            $type_sequence = decrease_nesting_depth( BRACE, $i_tok );
15009
15010            if ( $brace_structural_type[$brace_depth] eq 'L' ) {
15011                $type = 'R';
15012            }
15013
15014            # propagate type information for 'do' and 'eval' blocks.
15015            # This is necessary to enable us to know if an operator
15016            # or term is expected next
15017            if ( $brace_type[$brace_depth] =~ /$block_operator/ ) {
15018                $tok = $brace_type[$brace_depth];
15019            }
15020
15021            $context = $brace_context[$brace_depth];
15022            if ( $brace_depth > 0 ) { $brace_depth--; }
15023        },
15024        '&' => sub {    # maybe sub call? start looking
15025
15026            # We have to check for sub call unless we are sure we
15027            # are expecting an operator.  This example from s2p
15028            # got mistaken as a q operator in an early version:
15029            #   print BODY &q(<<'EOT');
15030            if ( $expecting != OPERATOR ) {
15031                scan_identifier();
15032            }
15033            else {
15034            }
15035        },
15036        '<' => sub {    # angle operator or less than?
15037
15038            if ( $expecting != OPERATOR ) {
15039                ( $i, $type ) =
15040                  find_angle_operator_termination( $input_line, $i, $rtoken_map,
15041                    $expecting );
15042
15043            }
15044            else {
15045            }
15046        },
15047        '?' => sub {    # ?: conditional or starting pattern?
15048
15049            my $is_pattern;
15050
15051            if ( $expecting == UNKNOWN ) {
15052
15053                my $msg;
15054                ( $is_pattern, $msg ) =
15055                  guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map );
15056
15057                if ($msg) { write_logfile_entry($msg) }
15058            }
15059            else { $is_pattern = ( $expecting == TERM ) }
15060
15061            if ($is_pattern) {
15062                $in_quote                = 1;
15063                $type                    = 'Q';
15064                $allowed_quote_modifiers = '[cgimosx]';    # TBD:check this
15065            }
15066            else {
15067
15068                $type_sequence =
15069                  increase_nesting_depth( QUESTION_COLON, $i_tok );
15070            }
15071        },
15072        '*' => sub {    # typeglob, or multiply?
15073
15074            if ( $expecting == TERM ) {
15075                scan_identifier();
15076            }
15077            else {
15078
15079                if ( $$rtokens[ $i + 1 ] eq '=' ) {
15080                    $tok  = '*=';
15081                    $type = $tok;
15082                    $i++;
15083                }
15084                elsif ( $$rtokens[ $i + 1 ] eq '*' ) {
15085                    $tok  = '**';
15086                    $type = $tok;
15087                    $i++;
15088                    if ( $$rtokens[ $i + 1 ] eq '=' ) {
15089                        $tok  = '**=';
15090                        $type = $tok;
15091                        $i++;
15092                    }
15093                }
15094            }
15095        },
15096        '.' => sub {    # what kind of . ?
15097
15098            if ( $expecting != OPERATOR ) {
15099                scan_number();
15100                if ( $type eq '.' ) {
15101                    error_if_expecting_TERM()
15102                      if ( $expecting == TERM );
15103                }
15104            }
15105            else {
15106            }
15107        },
15108        ':' => sub {
15109
15110            $type_sequence = decrease_nesting_depth( QUESTION_COLON, $i_tok );
15111            if ( $last_nonblank_token eq '?' ) {
15112                warning("Syntax error near ? :\n");
15113            }
15114        },
15115        '+' => sub {    # what kind of plus?
15116
15117            if ( $expecting == TERM ) {
15118                scan_number();
15119
15120                # unary plus is safest assumption if not a number
15121                if ( !defined($number) ) { $type = 'p'; }
15122            }
15123            elsif ( $expecting == OPERATOR ) {
15124            }
15125            else {
15126                if ( $next_type eq 'w' ) { $type = 'p' }
15127            }
15128        },
15129        '@' => sub {
15130
15131            error_if_expecting_OPERATOR("Array")
15132              if ( $expecting == OPERATOR );
15133            scan_identifier();
15134        },
15135        '%' => sub {    # hash or modulo?
15136
15137            # first guess is hash if no following blank
15138            if ( $expecting == UNKNOWN ) {
15139                if ( $next_type ne 'b' ) { $expecting = TERM }
15140            }
15141            if ( $expecting == TERM ) {
15142                scan_identifier();
15143            }
15144        },
15145        '[' => sub {
15146            $square_bracket_type[ ++$square_bracket_depth ] =
15147              $last_nonblank_token;
15148            $type_sequence = increase_nesting_depth( SQUARE_BRACKET, $i_tok );
15149
15150            # It may seem odd, but structural square brackets have
15151            # type '{' and '}'.  This simplifies the indentation logic.
15152            if ( !is_non_structural_brace() ) {
15153                $type = '{';
15154            }
15155            $square_bracket_structural_type[$square_bracket_depth] = $type;
15156        },
15157        ']' => sub {
15158            $type_sequence = decrease_nesting_depth( SQUARE_BRACKET, $i_tok );
15159
15160            if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' )
15161            {
15162                $type = '}';
15163            }
15164            if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; }
15165        },
15166        '-' => sub {    # what kind of minus?
15167
15168            if ( ( $expecting != OPERATOR )
15169                && $is_file_test_operator{$next_tok} )
15170            {
15171                $i++;
15172                $tok .= $next_tok;
15173                $type = 'F';
15174            }
15175            elsif ( $expecting == TERM ) {
15176                scan_number();
15177
15178                # maybe part of bareword token? unary is safest
15179                if ( !defined($number) ) { $type = 'm'; }
15180
15181            }
15182            elsif ( $expecting == OPERATOR ) {
15183            }
15184            else {
15185
15186                if ( $next_type eq 'w' ) {
15187                    $type = 'm';
15188                }
15189            }
15190        },
15191
15192        '^' => sub {
15193
15194            # check for special variables like ${^WARNING_BITS}
15195            if ( $expecting == TERM ) {
15196
15197                # FIXME: this should work but will not catch errors
15198                # because we also have to be sure that previous token is
15199                # a type character ($,@,%).
15200                if ( $last_nonblank_token eq '{'
15201                    && ( $next_tok =~ /^[A-Za-z_]/ ) )
15202                {
15203
15204                    if ( $next_tok eq 'W' ) {
15205                        $tokenizer_self->{_saw_perl_dash_w} = 1;
15206                    }
15207                    $tok  = $tok . $next_tok;
15208                    $i    = $i + 1;
15209                    $type = 'w';
15210                }
15211
15212                else {
15213                    unless ( error_if_expecting_TERM() ) {
15214
15215                        # Something like this is valid but strange:
15216                        # undef ^I;
15217                        complain("The '^' seems unusual here\n");
15218                    }
15219                }
15220            }
15221        },
15222
15223        '::' => sub {    # probably a sub call
15224            scan_bare_identifier();
15225        },
15226        '<<' => sub {    # maybe a here-doc?
15227            return
15228              unless ( $i < $max_token_index )
15229              ;          # here-doc not possible if end of line
15230
15231            if ( $expecting != OPERATOR ) {
15232                my ($found_target);
15233                ( $found_target, $here_doc_target, $here_quote_character, $i ) =
15234                  find_here_doc( $expecting, $i, $rtokens, $rtoken_map );
15235
15236                if ($found_target) {
15237                    push @here_target_list,
15238                      [ $here_doc_target, $here_quote_character ];
15239                    $type = 'h';
15240                    if ( length($here_doc_target) > 80 ) {
15241                        my $truncated = substr( $here_doc_target, 0, 80 );
15242                        complain("Long here-target: '$truncated' ...\n");
15243                    }
15244                    elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) {
15245                        complain(
15246                            "Unconventional here-target: '$here_doc_target'\n"
15247                        );
15248                    }
15249                }
15250                elsif ( $expecting == TERM ) {
15251
15252                    # shouldn't happen..
15253                    warning("Program bug; didn't find here doc target\n");
15254                    report_definite_bug();
15255                }
15256            }
15257            else {
15258            }
15259        },
15260        '->' => sub {
15261
15262            # if -> points to a bare word, we must scan for an identifier,
15263            # otherwise something like ->y would look like the y operator
15264            scan_identifier();
15265        },
15266
15267        # type = 'pp' for pre-increment, '++' for post-increment
15268        '++' => sub {
15269            if ( $expecting == TERM ) { $type = 'pp' }
15270        },
15271
15272        '=>' => sub {
15273            if ( $last_nonblank_type eq $tok ) {
15274                complain("Repeated '=>'s \n");
15275            }
15276        },
15277
15278        # type = 'mm' for pre-decrement, '--' for post-decrement
15279        '--' => sub {
15280
15281            if ( $expecting == TERM ) { $type = 'mm' }
15282        },
15283
15284        '&&' => sub {
15285            error_if_expecting_TERM()
15286              if ( $expecting == TERM );
15287        },
15288
15289        '||' => sub {
15290            error_if_expecting_TERM()
15291              if ( $expecting == TERM );
15292        },
15293    };
15294
15295    # ------------------------------------------------------------
15296    # end hash of code for handling individual token types
15297    # ------------------------------------------------------------
15298
15299    sub tokenize_this_line {
15300
15301=pod
15302
15303This routine breaks a line of perl code into tokens which are of use in
15304indentation and reformatting.  One of my goals has been to define tokens
15305such that a newline may be inserted between any pair of tokens without
15306changing or invalidating the program. This version comes close to this,
15307although there are necessarily a few exceptions which must be caught by
15308the formatter.  Many of these involve the treatment of bare words.
15309
15310The tokens and their types are returned in arrays.  See previous
15311routine for their names.
15312
15313See also the array "valid_token_types" in the BEGIN section for an
15314up-to-date list.
15315
15316To simplify things, token types are either a single character, or they
15317are identical to the tokens themselves.
15318
15319As a debugging aid, the -D flag creates a file containing a side-by-side
15320comparison of the input string and its tokenization for each line of a file.
15321This is an invaluable debugging aid.
15322
15323In addition to tokens, and some associated quantities, the tokenizer
15324also returns flags indication any special line types.  These include
15325quotes, here_docs, formats.
15326
15327-----------------------------------------------------------------------
15328
15329How to add NEW_TOKENS:
15330
15331New token types will undoubtedly be needed in the future both to keep up
15332with changes in perl and to help adapt the tokenizer to other applications.
15333
15334Here are some notes on the minimal steps.  I wrote these notes while
15335adding the 'v' token type for v-strings, which are things like version
15336numbers 5.6.0, and ip addresses, and will use that as an example.  ( You
15337can use your editor to search for the string "NEW_TOKENS" to find the
15338appropriate sections to change):
15339
15340*. Try to talk somebody else into doing it!  If not, ..
15341
15342*. Make a backup of your current version in case things don't work out!
15343
15344*. Think of a new, unused character for the token type, and add to
15345the array @valid_token_types in the BEGIN section of this package.
15346For example, I used 'v' for v-strings.
15347
15348*. Implement coding to recognize the $type of the token in this routine.
15349This is the hardest part, and is best done by immitating or modifying
15350some of the existing coding.  For example, to recognize v-strings, I
15351patched 'sub scan_bare_identifier' to recognize v-strings beginning with
15352'v' and 'sub scan_number' to recognize v-strings without the leading 'v'.
15353
15354*. Update sub operator_expected.  This update is critically important but
15355the coding is trivial.  Look at the comments in that routine for help.
15356For v-strings, which should behave like numbers, I just added 'v' to the
15357regex used to handle numbers and strings (types 'n' and 'Q').
15358
15359*. Implement a 'bond strength' rule in sub set_bond_strengths in
15360PerlTidy::Formatter for breaking lines around this token type.  You can
15361skip this step and take the default at first, then adjust later to get
15362desired results.  For adding type 'v', I looked at sub bond_strength and
15363saw that number type 'n' was using default strengths, so I didn't do
15364anything.  I may tune it up someday if I don't like the way line
15365breaks with v-strings look.
15366
15367*. Implement a 'whitespace' rule in sub set_white_space_flag in
15368PerlTidy::Formatter.  For adding type 'v', I looked at this routine
15369and saw that type 'n' used spaces on both sides, so I just added 'v'
15370to the array @spaces_both_sides.
15371
15372*. Update HtmlWriter package so that users can colorize the token as
15373desired.  This is quite easy; see comments identified by 'NEW_TOKENS' in
15374that package.  For v-strings, I initially chose to use a default color
15375equal to the default for numbers, but it might be nice to change that
15376eventually.
15377
15378*. Update comments in PerlTidy::Tokenizer::dump_token_types.
15379
15380*. Run lots and lots of debug tests.  Start with special files designed
15381to test the new token type.  Run with the -D flag to create a .DEBUG
15382file which shows the tokenization.  When these work ok, test as many old
15383scripts as possible.  Start with all of the '.t' files in the 'test'
15384directory of the distribution file.  Compare .tdy output with previous
15385version and updated version to see the differences.  Then include as
15386many more files as possible. My own technique has been to collect a huge
15387number of perl scripts (thousands!) into one directory and run perltidy
15388*, then run diff between the output of the previous version and the
15389current version.
15390
15391-----------------------------------------------------------------------
15392
15393=cut
15394
15395        my $line_of_tokens = shift;
15396        my ($untrimmed_input_line) = $line_of_tokens->{_line_text};
15397
15398        # patch while coding change is underway
15399        # make callers private data to allow access
15400        # $tokenizer_self = $caller_tokenizer_self;
15401
15402        # extract line number for use in error messages
15403        $input_line_number = $line_of_tokens->{_line_number};
15404
15405        # check for pod documentation
15406        if ( ( $untrimmed_input_line =~ /^=[A-Za-z_]/ ) ) {
15407
15408            # must not be in multi-line quote
15409            # and must not be in an eqn
15410            if ( !$in_quote and ( operator_expected( 'b', '=', 'b' ) == TERM ) )
15411            {
15412                $tokenizer_self->{_in_pod} = 1;
15413                return;
15414            }
15415        }
15416
15417        $input_line = $untrimmed_input_line;
15418
15419        chomp $input_line;
15420
15421        # trim start of this line unless we are continuing a quoted line
15422        # do not trim end because we might end in a quote (test: deken4.pl)
15423        # PerlTidy::Formatter will delete needless trailing blanks
15424        unless ( $in_quote && ( $quote_type eq 'Q' ) ) {
15425            $input_line =~ s/^\s*//;    # trim left end
15426        }
15427
15428        # initialize for the main loop
15429        my @output_token_list     = ();    # stack of output token indexes
15430        my @output_token_type     = ();    # token types
15431        my @output_block_type     = ();    # types of code block
15432        my @output_container_type = ();    # paren types, such as if, elsif, ..
15433        my @output_type_sequence  = ();    # nesting sequential number
15434
15435        $tok             = $last_nonblank_token;
15436        $type            = $last_nonblank_type;
15437        $prototype       = $last_nonblank_prototype;
15438        $last_nonblank_i = -1;
15439        $block_type      = $last_nonblank_block_type;
15440        $container_type  = $last_nonblank_container_type;
15441        $type_sequence   = $last_nonblank_type_sequence;
15442        @here_target_list = ();            # list of here-doc target strings
15443
15444        $peeked_ahead = 0;
15445
15446        # tokenization is done in two stages..
15447        # stage 1 is a very simple pre-tokenization
15448        my $max_tokens_wanted = 0; # this signals pre_tokenize to get all tokens
15449
15450        # a little optimization for a full-line comment
15451        if ( !$in_quote && ( $input_line =~ /^#/ ) ) {
15452            $max_tokens_wanted = 1    # no use tokenizing a comment
15453        }
15454
15455        # start by breaking the line into pre-tokens
15456        ( $rpretokens, $rpretoken_map, $rpretoken_type ) =
15457          pre_tokenize( $input_line, $max_tokens_wanted );
15458
15459        $max_token_index = scalar(@$rpretokens) - 1;
15460        push ( @$rpretokens, ' ', ' ', ' ' )
15461          ;    # extra whitespace simplifies logic
15462        push ( @$rpretoken_map,  0,   0,   0 );     # shouldn't be referenced
15463        push ( @$rpretoken_type, 'b', 'b', 'b' );
15464
15465        # temporary copies while coding change is underway
15466        ( $rtokens, $rtoken_map, $rtoken_type ) =
15467          ( $rpretokens, $rpretoken_map, $rpretoken_type );
15468
15469        # initialize for main loop
15470        for $i ( 0 .. $max_token_index + 3 ) {
15471            $output_token_type[$i]     = "";
15472            $output_block_type[$i]     = "";
15473            $output_container_type[$i] = "";
15474            $output_type_sequence[$i]  = "";
15475        }
15476        $i     = -1;
15477        $i_tok = -1;
15478
15479        # ------------------------------------------------------------
15480        # begin main tokenization loop
15481        # ------------------------------------------------------------
15482
15483        # we are looking at each pre-token of one line and combining them
15484        # into tokens
15485        while ( ++$i <= $max_token_index ) {
15486
15487            if ($in_quote) {    # continue looking for end of a quote
15488                $type = $quote_type;
15489
15490                unless (@output_token_list) {  # initialize if continuation line
15491                    push ( @output_token_list, $i );
15492                    $output_token_type[$i] = $type;
15493
15494                }
15495                $tok = $quote_character unless ( $quote_character =~ /^\s*$/ );
15496
15497                # scan for the end of the quote or pattern
15498                ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) =
15499                  do_quote( $i, $in_quote, $quote_character, $quote_pos,
15500                    $quote_depth, $rtokens, $rtoken_map );
15501
15502                # all done if we didn't find it
15503                last if ($in_quote);
15504
15505                # re-initialize for next search
15506                $quote_character = '';
15507                $quote_pos       = 0;
15508                $quote_type      = 'Q';
15509                last if ( ++$i > $max_token_index );
15510
15511                # look for any modifiers
15512                if ($allowed_quote_modifiers) {
15513
15514                    # check for exact quote modifiers
15515                    if ( $$rtokens[$i] =~ /^[A-Za-z_]/ ) {
15516                        my $str = $$rtokens[$i];
15517                        while ( $str =~ /\G$allowed_quote_modifiers/gc ) { }
15518
15519                        if ( defined( pos($str) ) ) {
15520
15521                            # matched
15522                            if ( pos($str) == length($str) ) {
15523                                last if ( ++$i > $max_token_index );
15524                            }
15525
15526                            # Looks like a joined quote modifier
15527                            # and keyword, maybe something like
15528                            # s/xxx/yyy/gefor @k=...
15529                            # Example is "galgen.pl".  Would have to split
15530                            # the word and insert a new token in the
15531                            # pre-token list.  This is so rare that I haven't
15532                            # done it.  Will just issue a warning citation.
15533
15534                            # This error might also be triggered if my quote
15535                            # modifier characters are incomplete
15536                            else {
15537                                warning(<<EOM);
15538
15539Partial match to quote modifier $allowed_quote_modifiers at word: '$str'
15540Please put a space between quote modifiers and trailing keywords.
15541EOM
15542
15543                                # print "token $$rtokens[$i]\n";
15544                                # my $num = length($str) - pos($str);
15545                                # $$rtokens[$i]=substr($$rtokens[$i],pos($str),$num);
15546                                # print "continuing with new token $$rtokens[$i]\n";
15547
15548                                # skipping past this token does least damage
15549                                last if ( ++$i > $max_token_index );
15550                            }
15551                        }
15552                        else {
15553
15554                            # example file: rokicki4.pl
15555                            # This error might also be triggered if my quote
15556                            # modifier characters are incomplete
15557                            write_logfile_entry(
15558"Note: found word $str at quote modifier location\n"
15559                            );
15560                        }
15561                    }
15562
15563                    # re-initialize
15564                    $allowed_quote_modifiers = "";
15565                }
15566            }
15567
15568            unless ( $tok =~ /^\s*$/ ) {
15569
15570                # try to catch some common errors
15571                if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) {
15572
15573                    if ( $last_nonblank_token eq 'eq' ) {
15574                        complain("Should 'eq' be '==' here ?\n");
15575                    }
15576                    elsif ( $last_nonblank_token eq 'ne' ) {
15577                        complain("Should 'ne' be '!=' here ?\n");
15578                    }
15579                }
15580                $last_last_nonblank_token          = $last_nonblank_token;
15581                $last_last_nonblank_type           = $last_nonblank_type;
15582                $last_last_nonblank_block_type     = $last_nonblank_block_type;
15583                $last_last_nonblank_container_type =
15584                  $last_nonblank_container_type;
15585                $last_last_nonblank_type_sequence =
15586                  $last_nonblank_type_sequence;
15587                $last_nonblank_token          = $tok;
15588                $last_nonblank_type           = $type;
15589                $last_nonblank_prototype      = $prototype;
15590                $last_nonblank_block_type     = $block_type;
15591                $last_nonblank_container_type = $container_type;
15592                $last_nonblank_type_sequence  = $type_sequence;
15593                $last_nonblank_i              = $i_tok;
15594            }
15595
15596            # store previous token type
15597            if ( $i_tok >= 0 ) {
15598                $output_token_type[$i_tok]     = $type;
15599                $output_block_type[$i_tok]     = $block_type;
15600                $output_container_type[$i_tok] = $container_type;
15601                $output_type_sequence[$i_tok]  = $type_sequence;
15602            }
15603            my $pre_tok  = $$rtokens[$i];        # get the next pre-token
15604            my $pre_type = $$rtoken_type[$i];    # and type
15605            $tok  = $pre_tok;
15606            $type = $pre_type;                   # to be modified as necessary
15607            $block_type = "";    # blank for all tokens except code block braces
15608            $container_type = "";    # blank for all tokens except some parens
15609            $type_sequence  = "";    # blank for all tokens except ?/:
15610            $prototype = "";    # blank for all tokens except user defined subs
15611            $i_tok     = $i;
15612
15613            # this pre-token will start an output token
15614            push ( @output_token_list, $i_tok );
15615
15616            # continue gathering identifier if necessary
15617            # but do not start on blanks and comments
15618            if ( $id_scan_state && $pre_type !~ /[b#]/ ) {
15619
15620                if ( $id_scan_state =~ /(sub|package)/ ) {
15621                    scan_id();
15622                }
15623                else {
15624                    scan_identifier();
15625                }
15626
15627                last if ($id_scan_state);
15628                next if ( ( $i > 0 ) || $type );
15629
15630                # didn't find any token; start over
15631                $type = $pre_type;
15632                $tok  = $pre_tok;
15633            }
15634
15635            # handle whitespace tokens..
15636            next if ( $type eq 'b' );
15637            my $prev_tok  = $i > 0 ? $$rtokens[ $i - 1 ]     : ' ';
15638            my $prev_type = $i > 0 ? $$rtoken_type[ $i - 1 ] : 'b';
15639
15640            # Build larger tokens where possible, since we are not in a quote.
15641            #
15642            # First try to assemble digraphs.  The following tokens are
15643            # excluded and handled specially:
15644            # '/=' is excluded because the / might start a pattern.
15645            # 'x=' is excluded since it might be $x=, with $ on previous line
15646            # '**' and *= might be typeglobs of punctuation variables
15647            # I have allowed tokens starting with <, such as <=,
15648            # because I don't think these could be valid angle operators.
15649            # test file: storrs4.pl
15650            my $test_tok = $tok . $$rtokens[ $i + 1 ];
15651
15652            if (
15653                $is_digraph{$test_tok}
15654                && ( $test_tok ne '/=' )    # might be pattern
15655                && ( $test_tok ne 'x=' )    # might be $x
15656                && ( $test_tok ne '**' )    # typeglob?
15657                && ( $test_tok ne '*=' )    # typeglob?
15658              )
15659            {
15660                $tok = $test_tok;
15661                $i++;
15662
15663                # Now try to assemble trigraphs.  Note that all possible
15664                # perl trigraphs can be constructed by appending a character
15665                # to a digraph.
15666                $test_tok = $tok . $$rtokens[ $i + 1 ];
15667
15668                if ( $is_trigraph{$test_tok} ) {
15669                    $tok = $test_tok;
15670                    $i++;
15671                }
15672            }
15673            $type      = $tok;
15674            $next_tok  = $$rtokens[ $i + 1 ];
15675            $next_type = $$rtoken_type[ $i + 1 ];
15676            $expecting = operator_expected( $prev_type, $tok, $next_type );
15677
15678            # This debug mode forces perltidy to rely exclusively on its
15679            # guessing algorithms.  This is a simple way to test them.
15680            TOKENIZER_DEBUG_FLAG_GUESS && do {
15681                $correct_expecting = $expecting;
15682                $expecting         = UNKNOWN;
15683            };
15684
15685            TOKENIZER_DEBUG_FLAG_TOKENIZE && do {
15686                local $" = ')(';
15687                my @debug_list = (
15688                    $last_nonblank_token,      $tok,
15689                    $next_tok,                 $brace_depth,
15690                    $brace_type[$brace_depth], $paren_depth,
15691                    $paren_type[$paren_depth]
15692                );
15693                print "TOKENIZE:(@debug_list)\n";
15694            };
15695
15696            ###############################################################
15697            # We have the next token, $tok.
15698            # Now we have to examine this token and decide what it is
15699            # and define its $type
15700            #
15701            # section 1: bare words
15702            ###############################################################
15703
15704            if ( $pre_type eq 'w' ) {
15705                my ( $next_nonblank_token, $i_next ) =
15706                  find_next_nonblank_token( $i, $rtokens );
15707
15708                # quote a word followed by => operator
15709                if ( $next_nonblank_token eq '=' ) {
15710
15711                    if ( $$rtokens[ $i_next + 1 ] eq '>' ) {
15712                        if ( $is_constant{$current_package}{$tok} ) {
15713                            $type = 'C';
15714                        }
15715                        elsif ( $is_user_function{$current_package}{$tok} ) {
15716                            $type      = 'U';
15717                            $prototype =
15718                              $user_function_prototype{$current_package}{$tok};
15719                        }
15720                        elsif ( $tok =~ /^v\d+$/ ) {
15721                            $type = 'v';
15722                            unless ($saw_v_string) { report_v_string($tok) }
15723                        }
15724                        else { $type = 'w' }
15725
15726                        next;
15727                    }
15728                }
15729
15730                # quote a bare word within braces..like xxx->{s}; note that we
15731                # must be sure this is not a structural brace, to avoid
15732                # mistaking {s} in the following for a quoted bare word:
15733                #     for(@[){s}bla}BLA}
15734                if ( ( $last_nonblank_type eq 'L' )
15735                    && ( $next_nonblank_token eq '}' ) )
15736                {
15737                    $type = 'w';
15738                    next;
15739                }
15740
15741                # handle operator x (now we know it isn't $x=)
15742                if ( ( $tok =~ /^x\d*$/ ) && ( $expecting == OPERATOR ) ) {
15743                    if ( $tok eq 'x' ) {
15744
15745                        if ( $$rtokens[ $i + 1 ] eq '=' ) {    # x=
15746                            $tok  = 'x=';
15747                            $type = $tok;
15748                            $i++;
15749                        }
15750                        else {
15751                            $type = 'x';
15752                        }
15753                    }
15754
15755                    # FIXME: Patch: mark something like x4 as an integer for now
15756                    # It gets fixed downstream.  This is easier than
15757                    # splitting the pretoken.
15758                    else {
15759                        $type = 'n';
15760                    }
15761                }
15762
15763                elsif ( ( $tok eq 'strict' )
15764                    and ( $last_nonblank_token eq 'use' ) )
15765                {
15766                    $tokenizer_self->{_saw_use_strict} = 1;
15767                    scan_bare_identifier();
15768                }
15769
15770                elsif ( ( $tok eq 'warnings' )
15771                    and ( $last_nonblank_token eq 'use' ) )
15772                {
15773                    $tokenizer_self->{_saw_perl_dash_w} = 1;
15774
15775                    # scan as identifier, so that we pick up something like:
15776                    # use warnings::register
15777                    scan_bare_identifier();
15778                }
15779
15780                elsif (
15781                    $tok eq 'AutoLoader'
15782                    && $tokenizer_self->{_look_for_autoloader}
15783                    && (
15784                        $last_nonblank_token eq 'use'
15785
15786                        # these regexes are from AutoSplit.pm, which we want
15787                        # to mimic
15788                        || $input_line =~ /^\s*(use|require)\s+AutoLoader\b/
15789                        || $input_line =~ /\bISA\s*=.*\bAutoLoader\b/
15790                    )
15791                  )
15792                {
15793                    write_logfile_entry("AutoLoader seen, -nlal deactivates\n");
15794                    $tokenizer_self->{_saw_autoloader}      = 1;
15795                    $tokenizer_self->{_look_for_autoloader} = 0;
15796                    scan_bare_identifier();
15797                }
15798
15799                elsif (
15800                    $tok eq 'SelfLoader'
15801                    && $tokenizer_self->{_look_for_selfloader}
15802                    && ( $last_nonblank_token eq 'use'
15803                        || $input_line =~ /^\s*(use|require)\s+SelfLoader\b/
15804                        || $input_line =~ /\bISA\s*=.*\bSelfLoader\b/ )
15805                  )
15806                {
15807                    write_logfile_entry("SelfLoader seen, -nlsl deactivates\n");
15808                    $tokenizer_self->{_saw_selfloader}      = 1;
15809                    $tokenizer_self->{_look_for_selfloader} = 0;
15810                    scan_bare_identifier();
15811                }
15812
15813                elsif ( ( $tok eq 'constant' )
15814                    and ( $last_nonblank_token eq 'use' ) )
15815                {
15816                    scan_bare_identifier();
15817                    my ( $next_nonblank_token, $i_next ) =
15818                      find_next_nonblank_token( $i, $rtokens );
15819
15820                    if ($next_nonblank_token) {
15821
15822                        if ( $is_keyword{$next_nonblank_token} ) {
15823                            warning(
15824"Attempting to define constant '$next_nonblank_token' which is a perl keyword\n"
15825                            );
15826                        }
15827
15828                        # FIXME: could check for error in which next token is not
15829                        # a word (number, punctuation, ..)
15830                        else {
15831                            $is_constant{$current_package}
15832                              {$next_nonblank_token} = 1;
15833                        }
15834                    }
15835                }
15836
15837                elsif ( $tok eq 's' ) {
15838                    error_if_expecting_OPERATOR()
15839                      if ( $expecting == OPERATOR );
15840                    $in_quote = 2;    # starting first of two patterns/quotes
15841
15842                    # NOTE: camel 3 says egimosx, but 'c' is accepeted by perl
15843                    $allowed_quote_modifiers = '[cegimosx]';
15844                    $type                    = 'Q';
15845                }
15846
15847                elsif ( $tok =~ /^((y)|(tr))$/ ) {    # built-in functions
15848                    error_if_expecting_OPERATOR()
15849                      if ( $expecting == OPERATOR );
15850                    $in_quote = 2;    # starting first of two patterns/quotes
15851                    $allowed_quote_modifiers = '[cds]';
15852                    $type                    = 'Q';
15853                }
15854
15855                elsif ( $tok eq 'm' ) {    # match operator
15856                    error_if_expecting_OPERATOR()
15857                      if ( $expecting == OPERATOR );
15858                    $in_quote                = 1;
15859                    $allowed_quote_modifiers = '[cgimosx]';
15860                    $type                    = 'Q';
15861                }
15862
15863                elsif ( $tok =~ /^(q|qq|qw|qx)$/ ) {   # various quote operators
15864                    error_if_expecting_OPERATOR()
15865                      if ( $expecting == OPERATOR );
15866                    $in_quote                = 1;
15867                    $allowed_quote_modifiers = "";
15868
15869                    # All quote types are 'Q' except possibly qw quotes.
15870                    # qw quotes are special in that they may generally be trimmed
15871                    # of leading and trailing whitespace.  So they are given a
15872                    # separate type, 'q', unless requested otherwise.
15873                    $type =
15874                      ( $tok eq 'qw' && $tokenizer_self->{_trim_qw} )
15875                      ? 'q'
15876                      : 'Q';
15877                    $quote_type = $type;
15878                }
15879
15880                elsif ( $tok =~ /^(qr)$/ ) {
15881                    error_if_expecting_OPERATOR()
15882                      if ( $expecting == OPERATOR );
15883                    $in_quote                = 1;
15884                    $allowed_quote_modifiers = '[imosx]';    # camel 3 p 147
15885                    $type                    = 'Q';
15886                    $quote_type              = $type;
15887                }
15888
15889                # check for a statement label
15890                elsif ( ( $next_nonblank_token eq ':' )
15891                    && ( $$rtokens[ $i_next + 1 ] ne ':' )
15892                    && label_ok() )
15893                {
15894                    if ( $tok !~ /A-Z/ ) {
15895                        push @lower_case_labels_at, $input_line_number;
15896                    }
15897                    $type = 'J';
15898                    $tok .= ':';
15899                    $i = $i_next;
15900                    next;
15901                }
15902
15903                elsif ( $tok =~ /^(sub|package)$/ ) {
15904                    error_if_expecting_OPERATOR()
15905                      if ( $expecting == OPERATOR );
15906                    scan_id();
15907                }
15908
15909                # Note on token types for format, __DATA__, __END__:
15910                # It simplifies things to give these type ';', so that when we
15911                # start rescanning we will be expecting a token of type TERM.
15912                # We will switch to type 'k' before outputting the tokens.
15913                elsif ( $tok eq 'format' ) {
15914                    $type = ';';    # make tokenizer look for TERM next
15915                    $tokenizer_self->{_in_format} = 1;
15916                    last;
15917                }
15918
15919                elsif ( $tok eq '__DATA__' ) {
15920                    $tokenizer_self->{_in_data} = $tok;
15921                    $type = ';';    # make tokenizer look for TERM next
15922                    last;
15923                }
15924
15925                elsif ( $tok eq '__END__' ) {
15926                    $tokenizer_self->{_in_end} = $tok;
15927                    $type = ';';    # make tokenizer look for TERM next
15928                    last;
15929                }
15930
15931                elsif ( $is_keyword{$tok} ) {
15932                    $type = 'k';
15933
15934                    # Since for and foreach may not be followed immediately
15935                    # by an opening paren, we have to remember which keyword
15936                    # is associated with the next '('
15937                    if ( $tok =~ /^(for|foreach)$/ ) {
15938                        if ( new_statement_ok() ) {
15939                            $want_paren = $tok;
15940                        }
15941                    }
15942
15943                    # recognize 'use' statements, which are special
15944                    elsif ( $tok =~ /^(use|require)$/ ) {
15945                        $statement_type = $tok;
15946                        error_if_expecting_OPERATOR()
15947                          if ( $expecting == OPERATOR );
15948                    }
15949                }
15950
15951                # check for inline label
15952                elsif ( ( $last_nonblank_type eq 'k' )
15953                    && ( $last_nonblank_token =~ /^(redo|last|next|goto)$/ ) )
15954                {
15955                    $type = 'j';
15956                    next;
15957                }
15958
15959                # something else --
15960                else {
15961
15962                    scan_bare_identifier();
15963                    if ( $type eq 'w' ) {
15964                        error_if_expecting_OPERATOR("bareword")
15965                          if ( $expecting == OPERATOR );
15966
15967                        # mark bare words immediately followed by a paren as
15968                        # functions
15969                        $next_tok = $$rtokens[ $i + 1 ];
15970                        if ( $next_tok eq '(' ) {
15971                            $type = 'U';
15972                        }
15973
15974                        # mark bare words following a file test operator as
15975                        # something that will expect an operator next.
15976                        # patch 072901: unless followed immediately by a paren,
15977                        # in which case it must be a function call (pid.t)
15978                        if ( $last_nonblank_type eq 'F' && $next_tok ne '(' ) {
15979                            $type = 'C';
15980                        }
15981                    }
15982                }
15983            }
15984
15985            ###############################################################
15986            # section 2: strings of digits
15987            ###############################################################
15988            elsif ( $pre_type eq 'd' ) {
15989                error_if_expecting_OPERATOR("Number")
15990                  if ( $expecting == OPERATOR );
15991                scan_number();
15992                if ( !defined($number) ) {
15993
15994                    # shouldn't happen - we should always get a number
15995                    warning("non-number beginning with digit--program bug\n");
15996                    report_definite_bug();
15997                }
15998            }
15999
16000            ###############################################################
16001            # section 3: all other tokens
16002            ###############################################################
16003
16004            else {
16005                last if ( $tok eq '#' );
16006                my $code = $tokenization_code->{$tok};
16007                $code->() if $code;
16008                redo if $in_quote;
16009            }
16010        }
16011
16012        # -----------------------------
16013        # end of main tokenization loop
16014        # -----------------------------
16015
16016        if ( $i_tok >= 0 ) {
16017            $output_token_type[$i_tok]     = $type;
16018            $output_block_type[$i_tok]     = $block_type;
16019            $output_container_type[$i_tok] = $container_type;
16020            $output_type_sequence[$i_tok]  = $type_sequence;
16021        }
16022
16023        unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) {
16024            $last_last_nonblank_token          = $last_nonblank_token;
16025            $last_last_nonblank_type           = $last_nonblank_type;
16026            $last_last_nonblank_block_type     = $last_nonblank_block_type;
16027            $last_last_nonblank_container_type = $last_nonblank_container_type;
16028            $last_last_nonblank_type_sequence  = $last_nonblank_type_sequence;
16029            $last_nonblank_token               = $tok;
16030            $last_nonblank_type                = $type;
16031            $last_nonblank_block_type          = $block_type;
16032            $last_nonblank_container_type      = $container_type;
16033            $last_nonblank_type_sequence       = $type_sequence;
16034            $last_nonblank_prototype           = $prototype;
16035        }
16036
16037        # reset indentation level if necessary at a sub or package
16038        # in an attempt to recover from a nesting error
16039        if ( $level_in_tokenizer < 0 ) {
16040            if ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) {
16041                reset_indentation_level(0);
16042                brace_warning("resetting level to 0 at $1 $2\n");
16043            }
16044        }
16045
16046        # all done tokenizing this line ...
16047        # now prepare the final list of tokens and types
16048
16049        my @token_type     = ();   # stack of output token types
16050        my @block_type     = ();   # stack of output code block types
16051        my @container_type = ();   # stack of output code container types
16052        my @type_sequence  = ();   # stack of output type sequence numbers
16053        my @tokens         = ();   # output tokens
16054        my @levels         = ();   # structural brace levels of output tokens
16055        my @slevels        = ();   # secondary nesting levels of output tokens
16056        my @nesting_tokens = ();   # string of tokens leading to this depth
16057        my @nesting_types  = ();   # string of token types leading to this depth
16058        my @nesting_blocks = ();   # string of block types leading to this depth
16059        my @nesting_lists  = ();   # string of list types leading to this depth
16060        my @ci_string = ();  # string needed to compute continuation indentation
16061        my @container_environment = ();    # BLOCK or LIST
16062        my $container_environment = '';
16063        my $im                    = -1;    # previous $i value
16064        my $num;
16065        my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' );
16066        my $ci_string_sum = ( $_ = $ci_string_in_tokenizer ) =~ tr/1/0/;
16067
16068=head1 Computing Token Indentation
16069
16070    The final section of the tokenizer forms tokens and also computes
16071    parameters needed to find indentation.  It is much easier to do it
16072    in the tokenizer than elsewhere.  Here is a brief description of how
16073    indentation is computed.  PerlTidy computes indentation as the sum
16074    of 2 terms:
16075
16076    (1) structural indentation, such as if/else/elsif blocks
16077    (2) continuation indentation, such as long parameter call lists.
16078
16079    These are occasionally called primary and secondary indentation.
16080
16081    Structural indentation is introduced by tokens of type '{', although
16082    the actual tokens might be '{', '(', or '['.  Structural indentation
16083    is of two types: BLOCK and non-BLOCK.  Default structural indentation
16084    is 4 characters if the standard indentation scheme is used.
16085
16086    Continuation indentation is introduced whenever a line at BLOCK level
16087    is broken before its termination.  Default continuation indentation
16088    is 2 characters in the standard indentation scheme.
16089
16090    Both types of indentation may be nested arbitrarily deep and
16091    interlaced.  The distinction between the two is somewhat arbitrary.
16092
16093    For each token, we will define two variables which would apply if
16094    the current statement were broken just before that token, so that
16095    that token started a new line:
16096
16097    $level = the structural indentation level,
16098    $ci_level = the continuation indentation level
16099
16100    The total indentation will be $level * (4 spaces) + $ci_level * (2 spaces),
16101    assuming defaults.  However, in some special cases it is customary
16102    to modify $ci_level from this strict value.
16103
16104    The total structural indentation is easy to compute by adding and
16105    subtracting 1 from a saved value as types '{' and '}' are seen.  The
16106    running value of this variable is $level_in_tokenizer.
16107
16108    The total continuation is much more difficult to compute, and requires
16109    several variables.  These veriables are:
16110
16111    $ci_string_in_tokenizer = a string of 1's and 0's indicating, for
16112      each indentation level, if there are intervening open secondary
16113      structures just prior to that level.
16114    $continuation_string_in_tokenizer = a string of 1's and 0's indicating
16115      if the last token at that level is "continued", meaning that it
16116      is not the first token of an expression.
16117    $nesting_block_string = a string of 1's and 0's indicating, for each
16118      indentation level, if the level is of type BLOCK or not.
16119    $nesting_list_string = a string of 1's and 0's indicating, for each
16120      indentation level, if it is is appropriate for list formatting.
16121      If so, continuation indentation is used to indent long list items.
16122    @slevel_stack = a stack of total nesting depths at each
16123      structural indentation level, where "total nesting depth" means
16124      the nesting depth that would occur if every nesting token -- '{', '[',
16125      and '(' -- , regardless of context, is used to compute a nesting
16126      depth.
16127
16128=cut
16129
16130        my $level_i;
16131        my $nesting_token_string_i;
16132        my $ci_string_i;
16133        my $nesting_type_string_i;
16134        my $nesting_block_string_i;
16135        my $nesting_list_string_i;
16136
16137        foreach $i (@output_token_list) {  # scan the list of pre-tokens indexes
16138
16139            # self-checking for valid token types
16140            my $type = $output_token_type[$i];
16141            my $tok = $$rtokens[$i];   # the token, but ONLY if same as pretoken
16142            $level_i = $level_in_tokenizer;
16143
16144            # This can happen by running perltidy on non-scripts
16145            # although it could also be bug introduced by programming change.
16146            # Perl silently accepts a 032 (^Z) and takes it as the end
16147            if ( !$is_valid_token_type{$type} ) {
16148                my $val = ord($type);
16149                warning(
16150                    "unexpected character decimal $val ($type) in script\n");
16151                $tokenizer_self->{_in_error} = 1;
16152            }
16153
16154            # ------------------------------------------------------------------------
16155            # TOKEN TYPE PATCHES
16156            #  output __END__, __DATA__, and format as type 'k' instead of ';'
16157            # to make html colors correct, etc.
16158            my $fix_type = $type;
16159            if ( $type eq ';' && $tok =~ /\w/ ) { $fix_type = 'k' }
16160
16161            # output anonymous 'sub' as keyword
16162            if ( $type eq 't' && $tok eq 'sub' ) { $fix_type = 'k' }
16163
16164            # ------------------------------------------------------------------------
16165
16166            $nesting_token_string_i = $nesting_token_string;
16167            $nesting_type_string_i  = $nesting_type_string;
16168            $nesting_block_string_i = $nesting_block_string;
16169            $nesting_list_string_i  = $nesting_list_string;
16170
16171            # set primary indentation levels based on structural braces
16172            # Note: these are set so that the leading braces have a HIGHER
16173            # level than their CONTENTS, which is convenient for indentation
16174            # Also, define continuation indentation for each token.
16175            if ( $type eq '{' || $type eq 'L' ) {
16176
16177                # use environment before updating
16178                $container_environment =
16179                  $nesting_block_string =~ /1$/ ? 'BLOCK'
16180                  : $nesting_list_string =~ /1$/ ? 'LIST'
16181                  : "";
16182
16183                # if the difference between total nesting levels is not 1,
16184                # there are intervening non-structural nesting types between
16185                # this '{' and the previous unclosed '{'
16186                my $intervening_secondary_structure = 0;
16187                if (@slevel_stack) {
16188                    $intervening_secondary_structure =
16189                      $slevel_in_tokenizer - $slevel_stack[$#slevel_stack];
16190                }
16191
16192=head1 Continuation Indentation
16193
16194Having tried setting continuation indentation both in the formatter and
16195in the tokenizer, I can say that setting it in the tokenizer is much,
16196much easier.  The formatter already has too much to do, and can't
16197make decisions on line breaks without knowing what 'ci' will be at
16198arbitrary locations.
16199
16200But a problem with setting the continuation indentation (ci) here
16201in the tokenizer is that we do not know where line breaks will actually
16202be.  As a result, we don't know if we should propagate continuation
16203indentation to higher levels of structure.
16204
16205For nesting of only structural indentation, we never need to do this.
16206For example, in a long if statement, like this
16207
16208  if ( !$output_block_type[$i]
16209    && ($in_statement_continuation) )
16210  {           <--outdented
16211      do_something();
16212  }
16213
16214the second line has ci but we do normally give the lines within the BLOCK
16215any ci.  This would be true if we had blocks nested arbitrarily deeply.
16216
16217But consider something like this, where we have created a break after
16218an opening paren on line 1, and the paren is not (currently) a
16219structural indentation token:
16220
16221my $file = $menubar->Menubutton(
16222  qw/-text File -underline 0 -menuitems/ => [
16223      [
16224          Cascade    => '~View',
16225          -menuitems => [
16226          ...
16227
16228The second line has ci, so it would seem reasonable to propagate it
16229down, giving the third line 1 ci + 1 indentation.  This suggests the
16230following rule, which is currently used to propagating ci down: if there
16231are any non-structural opening parens (or brackets, or braces), before
16232an opening structural brace, then ci is propagated down, and otherwise
16233not.  The variable $intervening_secondary_structure contains this
16234information for the current token, and the string
16235"$ci_string_in_tokenizer" is a stack of previous values of this
16236variable.
16237
16238If no breaks are made just after a secondary structure, this method
16239will give ci where it really isn't required.  For example,
16240
16241    my $str = join ( " ", map {
16242       /\n/ ? do { my $n = $_; $n =~ tr/\n/ /; $n }
16243       : $_;
16244      } @_ ) . "\015\012";
16245
16246Here, there is no break after the first '(', so the second line gets
16247ci + one indent, but it would look ok without the ci.  However, the
16248extra ci does no harm.
16249
16250This logic works well, but it is still incomplete.  A current problem is
16251that the ci logic does not propagate down hierarchically through
16252consecutive non-structural bracing.  More work needs to be done to
16253improve the formatting in this case.  The next step in development along
16254these lines will be to define parens following a comma, in LIST context,
16255to be structural.  Here is an example of two levels of non-structural
16256indentation, but only single continuation-indentation
16257
16258   $deps = control_fields(
16259     ( "Pre-Depends", "Depends",  "Recommends", "Suggests",
16260     "Conflicts",     "Provides" )
16261   );
16262
16263=cut
16264
16265                # save the current states
16266                push ( @slevel_stack, 1 + $slevel_in_tokenizer );
16267                $level_in_tokenizer++;
16268
16269                $nesting_block_string .= $output_block_type[$i] ? '1' : '0';
16270
16271                # we will use continuation indentation within containers
16272                # which are not blocks and not logical expressions
16273                my $bit = 0;
16274                if ( !$output_block_type[$i] ) {
16275
16276                    # propagate flag down at nested open parens
16277                    if ( $output_container_type[$i] eq '(' ) {
16278                        $bit = 1 if ( $nesting_list_string =~ /1$/ );
16279                    }
16280
16281                    # use list continuation if not a logical grouping
16282                    else {
16283                        $bit = 1
16284                          if ( $output_container_type[$i] !~
16285/^(if|elsif|unless|while|and|or|not|&&|!|\|\||for|foreach)$/
16286                          );
16287                    }
16288                }
16289                $nesting_list_string .= $bit;
16290
16291                $ci_string_in_tokenizer .=
16292                  ( $intervening_secondary_structure != 0 ) ? '1' : '0';
16293                $ci_string_sum = ( $_ = $ci_string_in_tokenizer ) =~ tr/1/0/;
16294                $continuation_string_in_tokenizer .=
16295                  ( $in_statement_continuation > 0 ) ? '1' : '0';
16296
16297=pod
16298
16299 Sometimes we want to give an opening brace continuation indentation,
16300 and sometimes not.  For code blocks, we don't do it, so that the leading
16301 '{' gets outdented, like this:
16302
16303  if ( !$output_block_type[$i]
16304    && ($in_statement_continuation) )
16305  {           <--outdented
16306
16307 For other types, we will give them continuation indentation.  For example,
16308 here is how a list looks with the opening paren indented:
16309
16310    @LoL =
16311      ( [ "fred", "barney" ], [ "george", "jane", "elroy" ],
16312        [ "homer", "marge", "bart" ], );
16313
16314 This looks best when 'ci' is one-half of the indentation  (i.e., 2 and 4)
16315
16316=cut
16317
16318                my $total_ci = $ci_string_sum;
16319                if (
16320                    !$output_block_type[$i]    # patch: skip for BLOCK
16321                    && ($in_statement_continuation)
16322                  )
16323                {
16324                    $total_ci += $in_statement_continuation
16325                      unless ( $ci_string_in_tokenizer =~ /1$/ );
16326                }
16327
16328                $ci_string_i               = $total_ci;
16329                $in_statement_continuation = 0;
16330            }
16331
16332            elsif ( $type eq '}' || $type eq 'R' ) {
16333
16334                # only a nesting error in the script would prevent popping here
16335                if ( @slevel_stack > 1 ) { pop (@slevel_stack); }
16336
16337                $level_i = --$level_in_tokenizer;
16338
16339                # restore previous level values
16340                if ( length($nesting_block_string) > 1 )
16341                {    # true for valid script
16342                    chop $nesting_block_string;
16343                    chop $nesting_list_string;
16344
16345                    chop $ci_string_in_tokenizer;
16346                    $ci_string_sum =
16347                      ( $_ = $ci_string_in_tokenizer ) =~ tr/1/0/;
16348
16349                    $in_statement_continuation =
16350                      chop $continuation_string_in_tokenizer;
16351
16352                    # zero continuation flag at terminal BLOCK '}' which
16353                    # ends a statement.
16354                    if ( $output_block_type[$i] ) {
16355
16356                        # ...These include non-anonymous subs
16357                        # note: could be sub ::abc { or sub 'abc
16358                        if ( $output_block_type[$i] =~ m/^sub\s*/gc ) {
16359
16360                            # note: older versions of perl require the /gc modifier
16361                            # here or else the \G does not work.
16362                            if ( $output_block_type[$i] =~ /\G('|::|\w)/gc ) {
16363                                $in_statement_continuation = 0;
16364                            }
16365                        }
16366
16367                        # ...and include all block types except user subs with
16368                        # block prototypes and these: (sort|grep|map|do|eval)
16369                        elsif ( $output_block_type[$i] =~
16370/^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|continue|;|if|elsif|else|unless|while|until|for|foreach)$/
16371                          )
16372                        {
16373                            $in_statement_continuation = 0;
16374                        }
16375
16376                        # ..and a block introduced by a label
16377                        elsif ( $output_block_type[$i] =~ /^\w+\s*:$/gc ) {
16378                            $in_statement_continuation = 0;
16379                        }
16380
16381                        # ..but these are not terminal types
16382                        elsif ( $output_block_type[$i] =~
16383                            /^(sort|grep|map|do|eval)$/ )
16384                        {
16385                        }
16386
16387                        # ..nor user function with block prototype
16388                        else {
16389                        }
16390                    }
16391
16392=pod
16393                If we are in a list, then
16394                we must set continuatoin indentation at the closing
16395                paren of something like this (paren after $check):
16396                    assert(
16397                        __LINE__,
16398                        ( not defined $check )
16399                          or ref $check
16400                          or $check eq "new"
16401                          or $check eq "old",
16402                    );
16403=cut
16404
16405                    #OLD: elsif ( $nesting_list_string =~ /1$/ ) {
16406                    #    $in_statement_continuation=1;
16407                    #}
16408
16409                }
16410
16411                # use environment after updating
16412                $container_environment =
16413                  $nesting_block_string =~ /1$/ ? 'BLOCK'
16414                  : $nesting_list_string =~ /1$/ ? 'LIST'
16415                  : "";
16416                $ci_string_i = $ci_string_sum + $in_statement_continuation;
16417                $nesting_block_string_i = $nesting_block_string;
16418                $nesting_list_string_i  = $nesting_list_string;
16419            }
16420
16421            # not a structural indentation type..
16422            else {
16423
16424                $container_environment =
16425                  $nesting_block_string =~ /1$/ ? 'BLOCK'
16426                  : $nesting_list_string =~ /1$/ ? 'LIST'
16427                  : "";
16428
16429                # zero the continuation indentation at certain tokens so
16430                # that they will be at the same level as its container.  For
16431                # commas, this simplifies the -lp indentation logic, which
16432                # counts commas.  For ?: it makes them stand out.
16433                if ( $nesting_list_string =~ /1$/ ) {
16434                    if ( $type =~ /^[,\?\:]$/ ) {
16435                        $in_statement_continuation = 0;
16436                    }
16437                }
16438
16439                # continuation indentation is sum of any open ci from previous
16440                # levels plus the current level
16441                $ci_string_i = $ci_string_sum + $in_statement_continuation;
16442
16443                # update continuation flag ...
16444                # if this isn't a blank or comment..
16445                if ( $type !~ /^[b#]$/ ) {
16446
16447                    # and we are in a BLOCK
16448                    if ( $nesting_block_string =~ /1$/ ) {
16449
16450                        # the next token after a ';' and label starts a new stmt
16451                        if ( $type =~ /^[;J]$/ ) {
16452                            $in_statement_continuation = 0;
16453                        }
16454
16455                        # otherwise, we are continuing the current statement
16456                        else {
16457                            $in_statement_continuation = 1;
16458                        }
16459                    }
16460
16461                    # if we are not in a BLOCK..
16462                    else {
16463
16464                        # do not use continuation indentation if not list
16465                        # environment (could be within if/elsif clause)
16466                        if ( $nesting_list_string =~ /0$/ ) {
16467                            $in_statement_continuation = 0;
16468                        }
16469
16470                        # otherwise, the next token after a ',' starts a new term
16471                        elsif ( $type =~ /^[,]$/ ) {
16472                            $in_statement_continuation = 0;
16473                        }
16474
16475                        # otherwise, we are continuing the current term
16476                        else {
16477                            $in_statement_continuation = 1;
16478                        }
16479                    }
16480                }
16481            }
16482
16483            if ( $level_in_tokenizer < 0 ) {
16484                unless ($saw_negative_indentation) {
16485                    $saw_negative_indentation = 1;
16486                    warning("Starting negative indentation\n");
16487                }
16488            }
16489
16490            # set secondary nesting levels based on all continment token types
16491            # Note: these are set so that the nesting depth is the depth
16492            # of the PREVIOUS TOKEN, which is convenient for setting
16493            # the stength of token bonds
16494            my $slevel_i = $slevel_in_tokenizer;
16495            if ( $type =~ /^[L\{\(\[]$/ ) {
16496                $slevel_in_tokenizer++;
16497                $nesting_token_string .= $tok;
16498                $nesting_type_string .= $type;
16499            }
16500            elsif ( $type =~ /^[R\}\)\]]$/ ) {
16501                $slevel_in_tokenizer--;
16502                my $char = chop $nesting_token_string;
16503
16504                if ( $char ne $matching_start_token{$tok} ) {
16505                    $nesting_token_string .= $char . $tok;
16506                    $nesting_type_string .= $type;
16507                }
16508                else {
16509                    chop $nesting_type_string;
16510                }
16511            }
16512
16513            push ( @block_type,            $output_block_type[$i] );
16514            push ( @ci_string,             $ci_string_i );
16515            push ( @container_environment, $container_environment );
16516            push ( @container_type,        $output_container_type[$i] );
16517            push ( @levels,                $level_i );
16518            push ( @nesting_tokens,        $nesting_token_string_i );
16519            push ( @nesting_types,         $nesting_type_string_i );
16520            push ( @slevels,               $slevel_i );
16521            push ( @token_type,            $fix_type );
16522            push ( @type_sequence,         $output_type_sequence[$i] );
16523            push ( @nesting_blocks,        $nesting_block_string );
16524            push ( @nesting_lists,         $nesting_list_string );
16525
16526            # now form the previous token
16527            if ( $im >= 0 ) {
16528                $num =
16529                  $$rtoken_map[$i] - $$rtoken_map[$im];    # how many characters
16530
16531                if ( $num > 0 ) {
16532                    push ( @tokens,
16533                        substr( $input_line, $$rtoken_map[$im], $num ) );
16534                }
16535            }
16536            $im = $i;
16537        }
16538
16539        $num = length($input_line) - $$rtoken_map[$im];    # make the last token
16540        if ( $num > 0 ) {
16541            push ( @tokens, substr( $input_line, $$rtoken_map[$im], $num ) );
16542        }
16543
16544        $tokenizer_self->{_in_quote}          = $in_quote;
16545        $tokenizer_self->{_rhere_target_list} = \@here_target_list;
16546
16547        $line_of_tokens->{_rtoken_type}            = \@token_type;
16548        $line_of_tokens->{_rtokens}                = \@tokens;
16549        $line_of_tokens->{_rblock_type}            = \@block_type;
16550        $line_of_tokens->{_rcontainer_type}        = \@container_type;
16551        $line_of_tokens->{_rcontainer_environment} = \@container_environment;
16552        $line_of_tokens->{_rtype_sequence}         = \@type_sequence;
16553        $line_of_tokens->{_rlevels}                = \@levels;
16554        $line_of_tokens->{_rslevels}               = \@slevels;
16555        $line_of_tokens->{_rnesting_tokens}        = \@nesting_tokens;
16556        $line_of_tokens->{_rci_levels}             = \@ci_string;
16557        $line_of_tokens->{_rnesting_blocks}        = \@nesting_blocks;
16558
16559        return;
16560    }
16561}    # end closure tokenize_this_line
16562
16563sub new_statement_ok {
16564
16565    # return true if the current token can start a new statement
16566
16567    return label_ok()    # a label would be ok here
16568
16569      || $last_nonblank_type eq 'J';    # or we follow a label
16570
16571}
16572
16573sub label_ok {
16574
16575    # Decide if a bare word followed by a colon here is a label
16576
16577    # if it follows an opening or closing code block curly brace..
16578    if ( $last_nonblank_token =~ /^[\{\}]$/
16579        && $last_nonblank_type eq $last_nonblank_token )
16580    {
16581
16582        # it is a label if and only if the curly encloses a code block
16583        return $brace_type[$brace_depth];
16584    }
16585
16586    # otherwise, it is a label if and only if it follows a ';'
16587    else {
16588        return ( $last_nonblank_token eq ';' );
16589    }
16590}
16591
16592sub code_block_type {
16593
16594    # Decide if this is a block of code, and its type.
16595    # Must be called only when $type = $token = '{'
16596    # The problem is to distinguish between the start of a block of code
16597    # and the start of an anonymous hash reference
16598    # Returns "" if not code block, otherwise returns 'last_nonblank_token'
16599    # to indicate the type of code block.  (For example, 'last_nonblank_token'
16600    # might be 'if' for an if block, 'else' for an else block, etc).
16601
16602    # handle case of multiple '{'s
16603
16604    # print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n";
16605
16606    if ( $last_nonblank_token eq '{'
16607        && $last_nonblank_type eq $last_nonblank_token )
16608    {
16609
16610        # opening brace where a statement may appear is probably
16611        # a code block but might be and anonymous hash reference
16612        if ( $brace_type[$brace_depth] ) {
16613            return decide_if_code_block();
16614        }
16615
16616        # cannot start a code block within an anonymous hash
16617        else {
16618            return "";
16619        }
16620    }
16621
16622    elsif ( $last_nonblank_token eq ';' ) {
16623
16624        # an opening brace where a statement may appear is probably
16625        # a code block but might be and anonymous hash reference
16626        return decide_if_code_block();
16627    }
16628
16629    # handle case of '}{'
16630    elsif ( $last_nonblank_token eq '}'
16631        && $last_nonblank_type eq $last_nonblank_token )
16632    {
16633
16634        # a } { situation ...
16635        # could be hash reference after code block..(blktype1.t)
16636        if ($last_nonblank_block_type) {
16637            return decide_if_code_block();
16638        }
16639
16640        # must be a block if it follows a closing hash reference
16641        else {
16642            return $last_nonblank_token;
16643        }
16644    }
16645
16646    # NOTE: braces after type characters start code blocks, but for
16647    # simplicity these are not identified as such.  See also
16648    # sub is_non_structural_brace.
16649    # elsif ( $last_nonblank_type eq 't' ) {
16650    #    return $last_nonblank_token;
16651    # }
16652
16653    # brace after label:
16654    elsif ( $last_nonblank_type eq 'J' ) {
16655        return $last_nonblank_token;
16656    }
16657
16658    # otherwise, look at previous token.  This must be a code block if
16659    # it follows any of these:
16660    elsif ( $last_nonblank_token =~
16661/^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/
16662      )
16663    {
16664        return $last_nonblank_token;
16665    }
16666
16667    # or a sub definition
16668    elsif ( $last_nonblank_type =~ /^[ti]$/
16669        && $last_nonblank_token =~ /^sub\b/ )
16670    {
16671        return $last_nonblank_token;
16672    }
16673
16674    # user-defined subs with block parameters (like grep/map/eval)
16675    elsif ( $last_nonblank_type eq 'G' ) {
16676        return $last_nonblank_token;
16677    }
16678
16679    # anything else must be anonymous hash reference
16680    else {
16681        return "";
16682    }
16683}
16684
16685sub decide_if_code_block {
16686
16687    # we are at a '{' where a statement may appear.
16688    # We must decide if this brace starts an anonymous hash or a code
16689    # block.
16690
16691    # return "" if anonymous hash, and $last_nonblank_token otherwise
16692
16693    # FIXME: coding incomplete
16694    return $last_nonblank_token;
16695}
16696
16697sub unexpected {
16698
16699    # report unexpected token type and show where it is
16700    my ( $found, $expecting, $i_tok, $last_nonblank_i ) = @_;
16701    $unexpected_error_count++;
16702    if ( $unexpected_error_count <= MAX_NAG_MESSAGES ) {
16703        my $msg = "found $found where $expecting expected";
16704        my $pos = $$rpretoken_map[$i_tok];
16705        interrupt_logfile();
16706        my ( $offset, $numbered_line, $underline ) =
16707          make_numbered_line( $input_line_number, $input_line, $pos );
16708        $underline = write_on_underline( $underline, $pos - $offset, '^' );
16709
16710        my $trailer = "";
16711        if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) {
16712            my $pos_prev = $$rpretoken_map[$last_nonblank_i];
16713            my $num;
16714            if ( $$rpretoken_type[ $i_tok - 1 ] eq 'b' ) {
16715                $num = $$rpretoken_map[ $i_tok - 1 ] - $pos_prev;
16716            }
16717            else {
16718                $num = $pos - $pos_prev;
16719            }
16720            if ( $num > 40 ) { $num = 40; $pos_prev = $pos - 40; }
16721
16722            $underline =
16723              write_on_underline( $underline, $pos_prev - $offset, '-' x $num );
16724            $trailer = " (previous token underlined)";
16725        }
16726        warning( $numbered_line . "\n" );
16727        warning( $underline . "\n" );
16728        warning( $msg . $trailer . "\n" );
16729        resume_logfile();
16730    }
16731}
16732
16733sub indicate_error {
16734    my ( $msg, $line_number, $input_line, $pos, $carrat ) = @_;
16735    interrupt_logfile();
16736    warning($msg);
16737    write_error_indicator_pair( $line_number, $input_line, $pos, $carrat );
16738    resume_logfile();
16739}
16740
16741sub write_error_indicator_pair {
16742    my ( $line_number, $input_line, $pos, $carrat ) = @_;
16743    my ( $offset, $numbered_line, $underline ) =
16744      make_numbered_line( $line_number, $input_line, $pos );
16745    $underline = write_on_underline( $underline, $pos - $offset, $carrat );
16746    warning( $numbered_line . "\n" );
16747    $underline =~ s/\s*$//;
16748    warning( $underline . "\n" );
16749}
16750
16751sub make_numbered_line {
16752
16753=pod
16754
16755 Given an input line, its line number, and a character position of interest,
16756 create a string not longer than 80 characters of the form
16757    $lineno: sub_string
16758 such that the sub_string of $str contains the position of interest
16759
16760 Here is an example of what we want, in this case we add trailing '...'
16761 because the line is long.
16762
167632: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
16764
16765 Here is another example, this time in which we used leading '...'
16766 because of excessive length:
16767
167682: ... er of the World Wide Web Consortium's
16769
16770 input parameters are:
16771  $lineno = line number
16772  $str = the text of the line
16773  $pos = position of interest (the error) : 0 = first character
16774
16775  We return :
16776    - $offset = an offset which corrects the position in case we only
16777      display part of a line, such that $pos-$offset is the effective
16778      position from the start of the displayed line.
16779    - $numbered_line = the numbered line as above,
16780    - $underline = a blank 'underline' which is all spaces with the same
16781      number of characters as the numbered line.
16782
16783=cut
16784
16785    my ( $lineno, $str, $pos ) = @_;
16786    my $offset = ( $pos < 60 ) ? 0 : $pos - 40;
16787    my $excess = length($str) - $offset - 68;
16788    my $numc   = ( $excess > 0 ) ? 68 : undef;
16789
16790    if ( defined($numc) ) {
16791        if ( $offset == 0 ) {
16792            $str = substr( $str, $offset, $numc - 4 ) . " ...";
16793        }
16794        else {
16795            $str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ...";
16796        }
16797    }
16798    else {
16799
16800        if ( $offset == 0 ) {
16801        }
16802        else {
16803            $str = "... " . substr( $str, $offset + 4 );
16804        }
16805    }
16806
16807    my $numbered_line = sprintf( "%d: ", $lineno );
16808    $offset -= length($numbered_line);
16809    $numbered_line .= $str;
16810    my $underline = " " x length($numbered_line);
16811    return ( $offset, $numbered_line, $underline );
16812}
16813
16814sub write_on_underline {
16815
16816=pod
16817
16818The "underline" is a string that shows where an error is; it starts
16819out as a string of blanks with the same length as the numbered line of
16820code above it, and we have to add marking to show where an error is.
16821In the example below, we want to write the string '--^' just below
16822the line of bad code:
16823
168242: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
16825                ---^
16826We are given the current underline string, plus a position and a
16827string to write on it.
16828
16829In the above example, there will be 2 calls to do this:
16830First call:  $pos=19, pos_chr=^
16831Second call: $pos=16, pos_chr=---
16832
16833This is a trivial thing to do with substr, but there is some
16834checking to do.
16835
16836=cut
16837
16838    my ( $underline, $pos, $pos_chr ) = @_;
16839
16840    # check for error..shouldn't happen
16841    unless ( ( $pos >= 0 ) && ( $pos <= length($underline) ) ) {
16842        return $underline;
16843    }
16844    my $excess = length($pos_chr) + $pos - length($underline);
16845    if ( $excess > 0 ) {
16846        $pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess );
16847    }
16848    substr( $underline, $pos, length($pos_chr) ) = $pos_chr;
16849    return ($underline);
16850}
16851
16852sub is_non_structural_brace {
16853
16854    # Decide if a brace or bracket is structural or non-structural
16855    # by looking at the previous token and type
16856
16857    # EXPERIMENTAL: Mark slices as structural; idea was to improve formatting.
16858    # Tentatively deactivated because it caused the wrong operator expectation
16859    # for this code:
16860    #      $user = @vars[1] / 100;
16861    # Must update sub operator_expected before re-implementing.
16862    # if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) {
16863    #    return 0;
16864    # }
16865
16866    # NOTE: braces after type characters start code blocks, but for
16867    # simplicity these are not identified as such.  See also
16868    # sub code_block_type
16869    # if ($last_nonblank_type eq 't') {return 0}
16870
16871    # otherwise, it is non-structural if it is decorated
16872    # by type information.
16873    # For example, the '{' here is non-structural:   ${xxx}
16874    (
16875      $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/
16876
16877      # or if we follow a hash or array closing curly brace or bracket
16878      # For example, the second '{' in this is non-structural: $a{'x'}{'y'}
16879      # because the first '}' would have been given type 'R'
16880      || $last_nonblank_type =~ /^([R\]])$/
16881    );
16882}
16883
16884sub operator_expected {
16885
16886=pod
16887
16888Many perl symbols have two or more meanings.  For example, '<<'
16889can be a shift operator or a here-doc operator.  The
16890interpretation of these symbols depends on the current state of
16891the tokenizer, which may either be expecting a term or an
16892operator.  For this example, a << would be a shift if an operator
16893is expected, and a here-doc if a term is expected.  This routine
16894is called to make this decision for any current token.  It returns
16895one of three possible values:
16896
16897    OPERATOR - operator expected (or at least, not a term)
16898    UNKNOWN  - can't tell
16899    TERM     - a term is expected (or at least, not an operator)
16900
16901The decision is based on what has been seen so far.  This information
16902is stored in the "$last_nonblank_type" and "$last_nonblank_token" variables.
16903For example, if the $last_nonblank_type is '=~', then we are expecting
16904a TERM, whereas if $last_nonblank_type is 'n' (numeric), we are
16905expecting an OPERATOR.
16906
16907If a UNKNOWN is returned, the calling routine must guess. A major goal
16908of this tokenizer is to minimize the possiblity of returning
16909UNKNOWN, because a wrong guess can spoil the formatting of a script.
16910
16911adding NEW_TOKENS: it is critically important that this routine be updated
16912to allow it to determine if an operator or term is to be expected
16913after the new token.  Doing this simply involves adding the new token
16914character to one of the regexes in this routine or to one of the hash lists
16915that it uses, which are initialized in the BEGIN section.
16916
16917=cut
16918
16919    my ( $prev_type, $tok, $next_type ) = @_;
16920    my $op_expected = UNKNOWN;
16921
16922    # Note: function prototype is available for token type 'U' for future
16923    # program development.  It contains the leading and trailing parens,
16924    # and no blanks.  It might be used to eliminate token type 'C', for
16925    # example (prototype = '()'). Thus:
16926    # if ($last_nonblank_type eq 'U') {
16927    #     print "previous token=$last_nonblank_token  type=$last_nonblank_type prototype=$last_nonblank_prototype\n";
16928    # }
16929
16930    # A possible filehandle (or object) requires some care...
16931    if ( $last_nonblank_type eq 'Z' ) {
16932
16933        # angle.t
16934        if ( $last_nonblank_token =~ /^[A-Za-z_]/ ) {
16935            $op_expected = UNKNOWN;
16936        }
16937
16938        # For possible file handle like "$a", Perl uses weird parsing rules.
16939        # For example:
16940        # print $a/2,"/hi";   - division
16941        # print $a / 2,"/hi"; - division
16942        # print $a/ 2,"/hi";  - division
16943        # print $a /2,"/hi";  - pattern (and error)!
16944        elsif ( ( $prev_type eq 'b' ) && ( $next_type ne 'b' ) ) {
16945            $op_expected = TERM;
16946        }
16947
16948        # Note when an operation is being done where a
16949        # filehandle might be expected, since a change in whitespace
16950        # could change the interpretation of the statement.
16951        else {
16952            if ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) {
16953                complain("operator in print statement not recommended\n");
16954                $op_expected = OPERATOR;
16955            }
16956        }
16957    }
16958
16959    # handle something after 'do' and 'eval'
16960    elsif ( $last_nonblank_token =~ /$block_operator/ ) {
16961
16962        # something like $a = eval "expression";
16963        #                          ^
16964        if ( $last_nonblank_type eq 'k' ) {
16965            $op_expected = TERM;    # expression or list mode following keyword
16966        }
16967
16968        # something like $a = do { BLOCK } / 2;
16969        #                                  ^
16970        else {
16971            $op_expected = OPERATOR;    # block mode following }
16972        }
16973    }
16974
16975    # handle bare word..
16976    elsif ( $last_nonblank_type eq 'w' ) {
16977
16978        # unfortunately, we can't tell what type of token to expect next
16979        # after most bare words
16980        $op_expected = UNKNOWN;
16981    }
16982
16983    # operator, but not term possible after these types
16984    # Note: moved ')' from type to token because parens in list context
16985    # get marked as '{' '}' now.  This is a minor glitch in the following:
16986    #    my %opts = (ref $_[0] eq 'HASH') ? %{shift()} : ();
16987    #
16988    elsif ( ( $last_nonblank_type =~ /^[\]RnviQh]$/ )
16989        || ( $last_nonblank_token =~ /^(\)|\$|\-\>)/ ) )
16990    {
16991        $op_expected = OPERATOR;
16992
16993        # in a 'use' statement, numbers and v-strings are not really
16994        # numbers, so to avoid incorrect error messages, we will
16995        # mark them as unknown for now (use.t)
16996        if ( ( $statement_type eq 'use' )
16997            && ( $last_nonblank_type =~ /^[nv]$/ ) )
16998        {
16999            $op_expected = UNKNOWN;
17000        }
17001    }
17002
17003    # no operator after many keywords, such as "die", "warn", etc
17004    elsif ( $expecting_term_token{$last_nonblank_token} ) {
17005        $op_expected = TERM;
17006    }
17007
17008    # no operator after things like + - **  (i.e., other operators)
17009    elsif ( $expecting_term_types{$last_nonblank_type} ) {
17010        $op_expected = TERM;
17011    }
17012
17013    # a few operators, like "time", have an empty prototype () and so
17014    # take no parameters but produce a value to operate on
17015    elsif ( $expecting_operator_token{$last_nonblank_token} ) {
17016        $op_expected = OPERATOR;
17017    }
17018
17019    # post-increment and decrement produce values to be operated on
17020    elsif ( $expecting_operator_types{$last_nonblank_type} ) {
17021        $op_expected = OPERATOR;
17022    }
17023
17024    # no value to operate on after sub block
17025    elsif ( $last_nonblank_token =~ /^sub\s/ ) { $op_expected = TERM; }
17026
17027    # a right brace here indicates the end of a simple block.
17028    # all non-structural right braces have type 'R'
17029    # all braces associated with block operator keywords have been given those
17030    # keywords as "last_nonblank_token" and caught above.
17031    # (This statement is order dependent, and must come after checking
17032    # $last_nonblank_token).
17033    elsif ( $last_nonblank_type eq '}' ) {
17034        $op_expected = TERM;
17035    }
17036
17037    # something else..what did I forget?
17038    else {
17039
17040        # collecting diagnostics on unknown operator types..see what was missed
17041        $op_expected = UNKNOWN;
17042        write_diagnostics(
17043"OP: unknown after type=$last_nonblank_type  token=$last_nonblank_token\n"
17044        );
17045    }
17046
17047    TOKENIZER_DEBUG_FLAG_EXPECT && do {
17048        print
17049"EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
17050    };
17051    return $op_expected;
17052}
17053
17054=pod
17055
17056The following routines keep track of nesting depths of the nesting
17057types, ( [ { and ?.  This is necessary for determining the indentation
17058level, and also for debugging programs.  Not only do they keep track of
17059nesting depths of the individual brace types, but they check that each
17060of the other brace types is balanced within matching pairs.  For
17061example, if the program sees this sequence:
17062
17063        {  ( ( ) }
17064
17065then it can determine that there is an extra left paren somewhere
17066between the { and the }.  And so on with every other possible
17067combination of outer and inner brace types.  For another
17068example:
17069
17070        ( [ ..... ]  ] )
17071
17072which has an extra ] within the parens.
17073
17074The brace types have indexes 0 .. 3 which are indexes into
17075the matrices.
17076
17077The pair ? : are treated as just another nesting type, with ? acting
17078as the opening brace and : acting as the closing brace.
17079
17080The matrix
17081
17082        $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];
17083
17084saves the nesting depth of brace type $b (where $b is either of the other
17085nesting types) when brace type $a enters a new depth.  When this depth
17086decreases, a check is made that the current depth of brace types $b is
17087unchanged, or otherwise there must have been an error.  This can
17088be very useful for localizing errors, particularly when perl runs to
17089the end of a large file (such as this one) and announces that there
17090is a problem somewhere.
17091
17092A numerical sequence number is maintained for every nesting type,
17093so that each matching pair can be uniquely identified in a simple
17094way.
17095
17096=cut
17097
17098sub increase_nesting_depth {
17099    my ( $a, $i_tok ) = @_;
17100    my $b;
17101    $current_depth[$a]++;
17102
17103    # Sequence numbers increment by number of items.  This keeps
17104    # a unique set of numbers but still allows the relative location
17105    # of any type to be determined.
17106    $nesting_sequence_number[$a] += scalar(@closing_brace_names);
17107    my $seqno = $nesting_sequence_number[$a];
17108    $current_sequence_number[$a][ $current_depth[$a] ] = $seqno;
17109
17110    my $pos = $$rpretoken_map[$i_tok];
17111    $starting_line_of_current_depth[$a][ $current_depth[$a] ] =
17112      [ $input_line_number, $input_line, $pos ];
17113
17114    for $b ( 0 .. $#closing_brace_names ) {
17115        next if ( $b == $a );
17116        $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];
17117    }
17118    return $seqno;
17119}
17120
17121sub decrease_nesting_depth {
17122
17123    my ( $a, $i_tok ) = @_;
17124    my $pos = $$rpretoken_map[$i_tok];
17125    my $b;
17126    my $seqno = 0;
17127
17128    if ( $current_depth[$a] > 0 ) {
17129
17130        $seqno = $current_sequence_number[$a][ $current_depth[$a] ];
17131
17132        # check that any brace types $b contained within are balanced
17133        for $b ( 0 .. $#closing_brace_names ) {
17134            next if ( $b == $a );
17135
17136            unless ( $depth_array[$a][$b][ $current_depth[$a] ] ==
17137                $current_depth[$b] )
17138            {
17139                my $diff = $current_depth[$b] -
17140                  $depth_array[$a][$b][ $current_depth[$a] ];
17141
17142                # don't whine too many times
17143                my $saw_brace_error = get_saw_brace_error();
17144                if (
17145                    $saw_brace_error <= MAX_NAG_MESSAGES
17146
17147                    # if too many closing types have occured, we probably
17148                    # already caught this error
17149                    && ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) )
17150                  )
17151                {
17152                    interrupt_logfile();
17153                    my $rsl =
17154                      $starting_line_of_current_depth[$a][ $current_depth[$a] ];
17155                    my $sl  = $$rsl[0];
17156                    my $rel = [ $input_line_number, $input_line, $pos ];
17157                    my $el  = $$rel[0];
17158                    my ($ess);
17159
17160                    if ( $diff == 1 || $diff == -1 ) {
17161                        $ess = '';
17162                    }
17163                    else {
17164                        $ess = 's';
17165                    }
17166                    my $bname =
17167                      ( $diff > 0 )
17168                      ? $opening_brace_names[$b]
17169                      : $closing_brace_names[$b];
17170                    write_error_indicator_pair( @$rsl, '^' );
17171                    my $msg = <<"EOM";
17172Found $diff extra $bname$ess between $opening_brace_names[$a] on line $sl and $closing_brace_names[$a] on line $el
17173EOM
17174
17175                    if ( $diff > 0 ) {
17176                        my $rml =
17177                          $starting_line_of_current_depth[$b]
17178                          [ $current_depth[$b] ];
17179                        my $ml = $$rml[0];
17180                        $msg .=
17181"    The most recent un-matched $bname is on line $ml\n";
17182                        write_error_indicator_pair( @$rml, '^' );
17183                    }
17184                    write_error_indicator_pair( @$rel, '^' );
17185                    warning($msg);
17186                    resume_logfile();
17187                }
17188                increment_brace_error();
17189            }
17190        }
17191        $current_depth[$a]--;
17192    }
17193    else {
17194
17195        my $saw_brace_error = get_saw_brace_error();
17196        if ( $saw_brace_error <= MAX_NAG_MESSAGES ) {
17197            my $msg = <<"EOM";
17198There is no previous $opening_brace_names[$a] to match a $closing_brace_names[$a] on line $input_line_number
17199EOM
17200            indicate_error( $msg, $input_line_number, $input_line, $pos, '^' );
17201        }
17202        increment_brace_error();
17203    }
17204    return $seqno;
17205}
17206
17207sub check_final_nesting_depths {
17208    my ($a);
17209
17210    for $a ( 0 .. $#closing_brace_names ) {
17211
17212        if ( $current_depth[$a] ) {
17213            my $rsl = $starting_line_of_current_depth[$a][ $current_depth[$a] ];
17214            my $sl  = $$rsl[0];
17215            my $msg = <<"EOM";
17216Final nesting depth of $opening_brace_names[$a]s is $current_depth[$a]
17217The most recent un-matched $opening_brace_names[$a] is on line $sl
17218EOM
17219            indicate_error( $msg, @$rsl, '^' );
17220            increment_brace_error();
17221        }
17222    }
17223}
17224
17225sub numerator_expected {
17226
17227    # this is a filter for a possible numerator, in support of guessing
17228    # for the / pattern delimiter token.
17229    # returns -
17230    #   1 - yes
17231    #   0 - can't tell
17232    #  -1 - no
17233    # Note: I am using the convention that variables ending in
17234    # _expected have these 3 possible values.
17235    my ( $i, $rtokens ) = @_;
17236    my $next_token = $$rtokens[ $i + 1 ];
17237    if ( $next_token eq '=' ) { $i++; }    # handle /=
17238    my ( $next_nonblank_token, $i_next ) =
17239      find_next_nonblank_token( $i, $rtokens );
17240
17241    if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) {
17242        1;
17243    }
17244    else {
17245
17246        if ( $next_nonblank_token =~ /^\s*$/ ) {
17247            0;
17248        }
17249        else {
17250            -1;
17251        }
17252    }
17253}
17254
17255sub pattern_expected {
17256
17257    # This is the start of a filter for a possible pattern.
17258    # It looks at the token after a possbible pattern and tries to
17259    # determine if that token could end a pattern.
17260    # returns -
17261    #   1 - yes
17262    #   0 - can't tell
17263    #  -1 - no
17264    my ( $i, $rtokens ) = @_;
17265    my $next_token = $$rtokens[ $i + 1 ];
17266    if ( $next_token =~ /^[cgimosx]/ ) { $i++; }    # skip possible modifier
17267    my ( $next_nonblank_token, $i_next ) =
17268      find_next_nonblank_token( $i, $rtokens );
17269
17270    # list of tokens which may follow a pattern
17271    # (can probably be expanded)
17272    if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ )
17273    {
17274        1;
17275    }
17276    else {
17277
17278        if ( $next_nonblank_token =~ /^\s*$/ ) {
17279            0;
17280        }
17281        else {
17282            -1;
17283        }
17284    }
17285}
17286
17287sub find_next_nonblank_token_on_this_line {
17288    my ( $i, $rtokens ) = @_;
17289    my $next_nonblank_token;
17290
17291    if ( $i < $max_token_index ) {
17292        $next_nonblank_token = $$rtokens[ ++$i ];
17293
17294        if ( $next_nonblank_token =~ /^\s*$/ ) {
17295
17296            if ( $i < $max_token_index ) {
17297                $next_nonblank_token = $$rtokens[ ++$i ];
17298            }
17299        }
17300    }
17301    else {
17302        $next_nonblank_token = "";
17303    }
17304    return ( $next_nonblank_token, $i );
17305}
17306
17307sub find_next_nonblank_token {
17308    my ( $i, $rtokens ) = @_;
17309
17310    if ( $i >= $max_token_index ) {
17311
17312        if ( !$peeked_ahead ) {
17313            $peeked_ahead = 1;
17314            $rtokens      = peek_ahead_for_nonblank_token($rtokens);
17315        }
17316    }
17317    my $next_nonblank_token = $$rtokens[ ++$i ];
17318
17319    if ( $next_nonblank_token =~ /^\s*$/ ) {
17320        $next_nonblank_token = $$rtokens[ ++$i ];
17321    }
17322    return ( $next_nonblank_token, $i );
17323}
17324
17325sub peek_ahead_for_n_nonblank_pre_tokens {
17326
17327    # returns next n pretokens if they exist
17328    # returns undef's if hits eof without seeing any pretokens
17329    my $max_pretokens = shift;
17330    my $line;
17331    my $i = 0;
17332    my ( $rpre_tokens, $rmap, $rpre_types );
17333
17334    while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
17335    {
17336        $line =~ s/^\s*//;    # trim leading blanks
17337        next if ( length($line) <= 0 );    # skip blank
17338        next if ( $line =~ /^#/ );         # skip comment
17339        ( $rpre_tokens, $rmap, $rpre_types ) =
17340          pre_tokenize( $line, $max_pretokens );
17341        last;
17342    }
17343    return ( $rpre_tokens, $rpre_types );
17344}
17345
17346# look ahead for next non-blank, non-comment line of code
17347sub peek_ahead_for_nonblank_token {
17348    my $rtokens = shift;
17349    my $line;
17350    my $i = 0;
17351
17352    while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
17353    {
17354        $line =~ s/^\s*//;    # trim leading blanks
17355        next if ( length($line) <= 0 );    # skip blank
17356        next if ( $line =~ /^#/ );         # skip comment
17357        my ( $rtok, $rmap, $rtype ) =
17358          pre_tokenize( $line, 2 );        # only need 2 pre-tokens
17359        my $j = $max_token_index + 1;
17360        my $tok;
17361
17362        #write_logfile_entry("peeking at line:$line")
17363        #  if $rOpts->{'DEBUG'};
17364        foreach $tok (@$rtok) {
17365            last if ( $tok =~ "\n" );
17366            $$rtokens[ ++$j ] = $tok;
17367        }
17368        last;
17369    }
17370    return $rtokens;
17371}
17372
17373sub pre_tokenize {
17374
17375    # Break a string, $str, into a sequence of preliminary tokens.  We
17376    # are only interested in these types of tokens: identifier strings,
17377    # digits, spaces, and other characters.  We cannot do better than
17378    # this yet because we might be in a quoted string or pattern.
17379    # Caller sets $max_tokens_wanted to 0 to get all tokens.
17380    my ( $str, $max_tokens_wanted ) = @_;
17381    my @tokens    = ();
17382    my @token_map = ();
17383    my @type      = ();
17384    my $i         = 0;
17385    $token_map[0] = 0;
17386
17387    while (1) {
17388
17389        # whitespace
17390        if ( $str =~ /\G(\s+)/gc ) { $type[$i] = 'b'; }
17391
17392        # numbers
17393        # note that this must come before identifiers
17394        elsif ( $str =~ /\G(\d+)/gc ) { $type[$i] = 'd'; }
17395
17396        # identifiers
17397        elsif ( $str =~ /\G(\w+)/gc ) { $type[$i] = 'w'; }
17398
17399        # punctuation
17400        elsif ( $str =~ /\G(\W)/gc ) { $type[$i] = $1; }
17401
17402        else { last; }
17403
17404        $tokens[$i] = $1;
17405        $token_map[ ++$i ] = pos($str);
17406        last if ( $i == $max_tokens_wanted );
17407    }
17408    return ( \@tokens, \@token_map, \@type );
17409}
17410
17411sub show_tokens {
17412
17413    # this is an old debug routine
17414    my ( $rtokens, $rtoken_map ) = @_;
17415    my $num = scalar(@$rtokens);
17416    my $i;
17417
17418    for ( $i = 0 ; $i < $num ; $i++ ) {
17419        my $len = length( $$rtokens[$i] );
17420        print "$i:$len:$$rtoken_map[$i]:$$rtokens[$i]:\n";
17421    }
17422}
17423
17424sub find_angle_operator_termination {
17425
17426    # We are looking at a '<' and want to know if it is an angle operator.
17427    # We are to return:
17428    #   $i = pretoken index of ending '>' if found, current $i otherwise
17429    #   $type = 'Q' if found, '>' otherwise
17430    my ( $input_line, $i_beg, $rtoken_map, $expecting ) = @_;
17431    my $i    = $i_beg;
17432    my $type = '<';
17433    pos($input_line) = 1 + $$rtoken_map[$i];
17434
17435    my $filter;
17436
17437    # we just have to find the next '>' if a term is expected
17438    if ( $expecting == TERM ) { $filter = '[\>]' }
17439
17440    # we have to guess if we don't know what is expected
17441    elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' }
17442
17443    # shouldn't happen - we shouldn't be here if operator is expected
17444    else { warning("Program Bug in find_angle_operator_termination\n") }
17445
17446    # To illustrate what we might be looking at, in case we are
17447    # guessing, here are some examples of valid angle operators
17448    # (or file globs):
17449    #  <tmp_imp/*>
17450    #  <FH>
17451    #  <$fh>
17452    #  <*.c *.h>
17453    #  <_>
17454    #  <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t)
17455    #  <${PREFIX}*img*.$IMAGE_TYPE>
17456    #  <img*.$IMAGE_TYPE>
17457    #  <Timg*.$IMAGE_TYPE>
17458    #  <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl>
17459    #
17460    # Here are some examples of lines which do not have angle operators:
17461    #  return undef unless $self->[2]++ < $#{$self->[1]};
17462    #  < 2  || @$t >
17463    #
17464    # the following line from dlister.pl caused trouble:
17465    #  print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n";
17466    #
17467    # If the '<' starts an angle operator, it must end on this line and
17468    # it must not have certain characters like ';' and '=' in it.  I use
17469    # this to limit the testing.  This filter should be improved if
17470    # possible.
17471
17472    if ( $input_line =~ /($filter)/g ) {
17473
17474        if ( $1 eq '>' ) {
17475
17476            # We MAY have found an angle operator termination if we get
17477            # here, but we need to do more to be sure we haven't been
17478            # fooled.
17479            my $pos = pos($input_line);
17480
17481            ######################################debug#####
17482            my $pos_beg = $$rtoken_map[$i];
17483            my $str     = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) );
17484
17485            #write_diagnostics( "ANGLE? :$str\n");
17486            #print "ANGLE: found $1 at pos=$pos\n";
17487            ######################################debug#####
17488            $type = 'Q';
17489            my $error;
17490            ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map );
17491
17492            # It may be possible that a quote ends midway in a pretoken.
17493            # If this happens, it may be necessary to split the pretoken.
17494            if ($error) {
17495                warning(
17496                    "Possible tokinization error..please check this line\n");
17497                report_possible_bug();
17498            }
17499
17500            # Now let's see where we stand....
17501            # OK if math op not possible
17502            if ( $expecting == TERM ) {
17503            }
17504
17505            # OK if there are no more than 2 pre-tokens inside
17506            # (not possible to write 2 token math between < and >)
17507            # This catches most common cases
17508            elsif ( $i <= $i_beg + 3 ) {
17509                write_diagnostics("ANGLE(1 or 2 tokens): $str\n");
17510            }
17511
17512            # Not sure..
17513            else {
17514
17515                # Let's try a Brace Test: any braces inside must balance
17516                my $br = 0;
17517                while ( $str =~ /\{/g ) { $br++ }
17518                while ( $str =~ /\}/g ) { $br-- }
17519                my $sb = 0;
17520                while ( $str =~ /\[/g ) { $sb++ }
17521                while ( $str =~ /\]/g ) { $sb-- }
17522                my $pr = 0;
17523                while ( $str =~ /\(/g ) { $pr++ }
17524                while ( $str =~ /\)/g ) { $pr-- }
17525
17526                # if braces do not balance - not angle operator
17527                if ( $br || $sb || $pr ) {
17528                    $i    = $i_beg;
17529                    $type = '<';
17530                    write_diagnostics(
17531                        "NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n");
17532                }
17533
17534                # we should keep doing more checks here...to be continued
17535                # Tentatively accepting this as a valid angle operator.
17536                # There are lots more things that can be checked.
17537                else {
17538                    write_diagnostics(
17539                        "ANGLE-Guessing yes: $str expecting=$expecting\n");
17540                    write_logfile_entry("Guessing angle operator here: $str\n");
17541                }
17542            }
17543        }
17544
17545        # didn't find ending >
17546        else {
17547            if ( $expecting == TERM ) {
17548                warning("No ending > for angle operator\n");
17549            }
17550        }
17551    }
17552    return ( $i, $type );
17553}
17554
17555sub inverse_pretoken_map {
17556
17557    # Starting with the current pre_token index $i, scan forward until
17558    # finding the index of the next pre_token whose position is $pos.
17559    my ( $i, $pos, $rtoken_map ) = @_;
17560    my $error = 0;
17561
17562    while ( ++$i <= $max_token_index ) {
17563
17564        if ( $pos <= $$rtoken_map[$i] ) {
17565
17566            # Let the calling routine handle errors in which we do not
17567            # land on a pre-token boundary.  It can happen by running
17568            # perltidy on some non-perl scripts, for example.
17569            if ( $pos < $$rtoken_map[$i] ) { $error = 1 }
17570            $i--;
17571            last;
17572        }
17573    }
17574    return ( $i, $error );
17575}
17576
17577sub guess_if_pattern_or_conditional {
17578
17579    # this routine is called when we have encountered a ? following an
17580    # unknown bareword, and we must decide if it starts a pattern or not
17581    # input parameters:
17582    #   $i - token index of the ? starting possible pattern
17583    # output parameters:
17584    #   $is_pattern = 0 if probably not pattern,  =1 if probably a pattern
17585    #   msg = a warning or diagnostic message
17586    my ( $i, $rtokens, $rtoken_map ) = @_;
17587    my $is_pattern = 0;
17588    my $msg        = "guessing that ? after $last_nonblank_token starts a ";
17589
17590    if ( $i >= $max_token_index ) {
17591        $msg .= "conditional (no end to pattern found on the line)\n";
17592    }
17593    else {
17594        my $ibeg = $i;
17595        $i = $ibeg + 1;
17596        my $next_token = $$rtokens[$i];    # first token after ?
17597
17598        # look for a possible ending ? on this line..
17599        my $in_quote        = 1;
17600        my $quote_depth     = 0;
17601        my $quote_character = '';
17602        my $quote_pos       = 0;
17603        ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) =
17604          follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
17605            $quote_pos, $quote_depth );
17606
17607        if ($in_quote) {
17608
17609            # we didn't find an ending ? on this line,
17610            # so we bias towards conditional
17611            $is_pattern = 0;
17612            $msg .= "conditional (no ending ? on this line)\n";
17613
17614            # we found an ending ?, so we bias towards a pattern
17615        }
17616        else {
17617
17618            if ( pattern_expected( $i, $rtokens ) >= 0 ) {
17619                $is_pattern = 1;
17620                $msg .= "pattern (found ending ? and pattern expected)\n";
17621            }
17622            else {
17623                $msg .= "pattern (uncertain, but found ending ?)\n";
17624            }
17625        }
17626    }
17627    return ( $is_pattern, $msg );
17628}
17629
17630sub guess_if_pattern_or_division {
17631
17632    # this routine is called when we have encountered a / following an
17633    # unknown bareword, and we must decide if it starts a pattern or is a
17634    # division
17635    # input parameters:
17636    #   $i - token index of the / starting possible pattern
17637    # output parameters:
17638    #   $is_pattern = 0 if probably division,  =1 if probably a pattern
17639    #   msg = a warning or diagnostic message
17640    my ( $i, $rtokens, $rtoken_map ) = @_;
17641    my $is_pattern = 0;
17642    my $msg        = "guessing that / after $last_nonblank_token starts a ";
17643
17644    if ( $i >= $max_token_index ) {
17645        "division (no end to pattern found on the line)\n";
17646    }
17647    else {
17648        my $ibeg = $i;
17649        my $divide_expected = numerator_expected( $i, $rtokens );
17650        $i = $ibeg + 1;
17651        my $next_token = $$rtokens[$i];    # first token after slash
17652
17653        # look for a possible ending / on this line..
17654        my $in_quote        = 1;
17655        my $quote_depth     = 0;
17656        my $quote_character = '';
17657        my $quote_pos       = 0;
17658        ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) =
17659          follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
17660            $quote_pos, $quote_depth );
17661
17662        if ($in_quote) {
17663
17664            # we didn't find an ending / on this line,
17665            # so we bias towards division
17666            if ( $divide_expected >= 0 ) {
17667                $is_pattern = 0;
17668                $msg .= "division (no ending / on this line)\n";
17669            }
17670            else {
17671                $msg        = "multi-line pattern (division not possible)\n";
17672                $is_pattern = 1;
17673            }
17674
17675        }
17676
17677        # we found an ending /, so we bias towards a pattern
17678        else {
17679
17680            if ( pattern_expected( $i, $rtokens ) >= 0 ) {
17681
17682                if ( $divide_expected >= 0 ) {
17683
17684                    if ( $i - $ibeg > 60 ) {
17685                        $msg .= "division (matching / too distant)\n";
17686                        $is_pattern = 0;
17687                    }
17688                    else {
17689                        $msg .= "pattern (but division possible too)\n";
17690                        $is_pattern = 1;
17691                    }
17692                }
17693                else {
17694                    $is_pattern = 1;
17695                    $msg .= "pattern (division not possible)\n";
17696                }
17697            }
17698            else {
17699
17700                if ( $divide_expected >= 0 ) {
17701                    $is_pattern = 0;
17702                    $msg .= "division (pattern not possible)\n";
17703                }
17704                else {
17705                    $is_pattern = 1;
17706                    $msg .=
17707                      "pattern (uncertain, but division would not work here)\n";
17708                }
17709            }
17710        }
17711    }
17712    return ( $is_pattern, $msg );
17713}
17714
17715sub find_here_doc {
17716
17717    # find the target of a here document, if any
17718    # input parameters:
17719    #   $i - token index of the second < of <<
17720    #   ($i must be less than the last token index if this is called)
17721    # output parameters:
17722    #   $found_target = 0 didn't find target; =1 found target
17723    #   HERE_TARGET - the target string (may be empty string)
17724    #   $i - unchanged if not here doc,
17725    #    or index of the last token of the here target
17726    my ( $expecting, $i, $rtokens, $rtoken_map ) = @_;
17727    my $ibeg                 = $i;
17728    my $found_target         = 0;
17729    my $here_doc_target      = '';
17730    my $here_quote_character = '';
17731    my ( $next_nonblank_token, $i_next_nonblank, $next_token );
17732    $next_token = $$rtokens[ $i + 1 ];
17733
17734    # perl allows a backslash before the target string (heredoc.t)
17735    my $backslash = 0;
17736    if ( $next_token eq '\\' ) {
17737        $backslash  = 1;
17738        $next_token = $$rtokens[ $i + 2 ];
17739    }
17740
17741    ( $next_nonblank_token, $i_next_nonblank ) =
17742      find_next_nonblank_token_on_this_line( $i, $rtokens );
17743
17744    if ( $next_nonblank_token =~ /[\'\"\`]/ ) {
17745
17746        my $in_quote    = 1;
17747        my $quote_depth = 0;
17748        my $quote_pos   = 0;
17749
17750        ( $i, $in_quote, $here_quote_character, $quote_pos, $quote_depth ) =
17751          follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens,
17752            $here_quote_character, $quote_pos, $quote_depth );
17753
17754        if ($in_quote) {    # didn't find end of quote, so no target found
17755            $i = $ibeg;
17756        }
17757        else {              # found ending quote
17758            my $j;
17759            $found_target = 1;
17760
17761            my $tokj;
17762            for ( $j = $i_next_nonblank + 1 ; $j < $i ; $j++ ) {
17763                $tokj = $$rtokens[$j];
17764
17765                # we have to remove any backslash before the quote character
17766                # so that the here-doc-target exactly matches this string
17767                next
17768                  if ( $tokj eq "\\"
17769                    && $j < $i - 1
17770                    && $$rtokens[ $j + 1 ] eq $here_quote_character );
17771                $here_doc_target .= $tokj;
17772            }
17773        }
17774    }
17775
17776    elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) {
17777        $found_target = 1;
17778        write_logfile_entry(
17779            "found blank here-target after <<; suggest using \"\"\n");
17780        $i = $ibeg;
17781    }
17782    elsif ( $next_token =~ /^\w/ ) {    # simple bareword or integer after <<
17783
17784        my $here_doc_expected;
17785        if ( $expecting == UNKNOWN ) {
17786            $here_doc_expected = guess_if_here_doc($next_token);
17787        }
17788        else {
17789            $here_doc_expected = 1;
17790        }
17791
17792        if ($here_doc_expected) {
17793            $found_target    = 1;
17794            $here_doc_target = $next_token;
17795            $i               = $ibeg + 1;
17796        }
17797
17798    }
17799    else {
17800
17801        if ( $expecting == TERM ) {
17802            $found_target = 1;
17803            write_logfile_entry("Note: bare here-doc operator <<\n");
17804        }
17805        else {
17806            $i = $ibeg;
17807        }
17808    }
17809
17810    # patch to neglect any prepended backslash
17811    if ( $found_target && $backslash ) { $i++ }
17812
17813    return ( $found_target, $here_doc_target, $here_quote_character, $i );
17814}
17815
17816# try to resolve here-doc vs. shift by looking ahead for
17817# non-code or the end token (currently only looks for end token)
17818# returns 1 if it is probably a here doc, 0 if not
17819sub guess_if_here_doc {
17820
17821    # This is how many lines we will search for a target as part of the
17822    # guessing strategy.  It is a constant because there is probably
17823    # little reason to change it.
17824    use constant HERE_DOC_WINDOW => 40;
17825
17826    my $next_token        = shift;
17827    my $here_doc_expected = 0;
17828    my $line;
17829    my $k   = 0;
17830    my $msg = "checking <<";
17831
17832    while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $k++ ) )
17833    {
17834        chomp $line;
17835
17836        if ( $line =~ /^$next_token$/ ) {
17837            $msg .= " -- found target $next_token ahead $k lines\n";
17838            $here_doc_expected = 1;    # got it
17839            last;
17840        }
17841        last if ( $k >= HERE_DOC_WINDOW );
17842    }
17843
17844    unless ($here_doc_expected) {
17845
17846        if ( !defined($line) ) {
17847            $here_doc_expected = -1;    # hit eof without seeing target
17848            $msg .= " -- must be shift; target $next_token not in file\n";
17849
17850        }
17851        else {                          # still unsure..taking a wild guess
17852
17853            if ( !$is_constant{$current_package}{$next_token} ) {
17854                $here_doc_expected = 1;
17855                $msg .=
17856                  " -- guessing it's a here-doc ($next_token not a constant)\n";
17857            }
17858            else {
17859                $msg .= " -- guessing it's a shift\n";
17860                $msg .=
17861                  " -- guessing it's a shift ($next_token is a constant)\n";
17862            }
17863        }
17864    }
17865    write_logfile_entry($msg);
17866    return $here_doc_expected;
17867}
17868
17869sub do_quote {
17870
17871    # follow (or continue following) quoted string or pattern
17872    # $in_quote return code:
17873    #   0 - ok, found end
17874    #   1 - still must find end of quote whose target is $quote_character
17875    #   2 - still looking for end of first of two quotes
17876    my ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth, $rtokens,
17877        $rtoken_map )
17878      = @_;
17879
17880    if ( $in_quote == 2 ) {    # two quotes/patterns to follow
17881        my $ibeg = $i;
17882        ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) =
17883          follow_quoted_string( $i, $in_quote, $rtokens, $quote_character,
17884            $quote_pos, $quote_depth );
17885
17886        if ( $in_quote == 1 ) {
17887            if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; }
17888            $quote_character = '';
17889        }
17890    }
17891
17892    if ( $in_quote == 1 ) {    # one (more) quote to follow
17893        my $ibeg = $i;
17894        ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) =
17895          follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
17896            $quote_pos, $quote_depth );
17897    }
17898    return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth );
17899}
17900
17901sub scan_number_do {
17902
17903=pod
17904
17905  scan a number in any of the formats that Perl accepts
17906  Underbars (_) are allowed in decimal numbers.
17907  input parameters -
17908      $input_line  - the string to scan
17909      $i           - pre_token index to start scanning
17910    $rtoken_map    - reference to the pre_token map giving starting
17911                    character position in $input_line of token $i
17912  output parameters -
17913    $i            - last pre_token index of the number just scanned
17914    number        - the number (characters); or undef if not a number
17915
17916=cut
17917
17918    my ( $input_line, $i, $rtoken_map, $input_type ) = @_;
17919    my $pos_beg = $$rtoken_map[$i];
17920    my $pos;
17921    my $i_begin = $i;
17922    my $number  = undef;
17923    my $type    = $input_type;
17924
17925    my $first_char = substr( $input_line, $pos_beg, 1 );
17926
17927    # Look for bad starting characters; Shouldn't happen..
17928    if ( $first_char !~ /[\d\.\+\-Ee]/ ) {
17929        warning("Program bug - scan_number given character $first_char\n");
17930        report_definite_bug();
17931        return ( $i, $type, $number );
17932    }
17933
17934    # handle v-string without leading 'v' character ('Two Dot' rule)
17935    # (vstring.t)
17936    pos($input_line) = $pos_beg;
17937    if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) {
17938        $pos = pos($input_line);
17939        my $numc = $pos - $pos_beg;
17940        $number = substr( $input_line, $pos_beg, $numc );
17941        $type = 'v';
17942        unless ($saw_v_string) { report_v_string($number) }
17943    }
17944
17945    # handle octal, hex, binary
17946    if ( !defined($number) ) {
17947        pos($input_line) = $pos_beg;
17948        if ( $input_line =~ /\G[+-]?0((x[0-9a-fA-F_]+)|([0-7_]+)|(b[01_]+))/g )
17949        {
17950            $pos = pos($input_line);
17951            my $numc = $pos - $pos_beg;
17952            $number = substr( $input_line, $pos_beg, $numc );
17953            $type = 'n';
17954        }
17955    }
17956
17957    # handle decimal
17958    if ( !defined($number) ) {
17959        pos($input_line) = $pos_beg;
17960
17961        if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) {
17962            $pos = pos($input_line);
17963
17964            # watch out for things like 0..40 which would give 0. by this;
17965            if ( ( substr( $input_line, $pos - 1, 1 ) eq '.' )
17966                && ( substr( $input_line, $pos, 1 ) eq '.' ) )
17967            {
17968                $pos--;
17969            }
17970            my $numc = $pos - $pos_beg;
17971            $number = substr( $input_line, $pos_beg, $numc );
17972            $type = 'n';
17973        }
17974    }
17975
17976    # filter out non-numbers like e + - . e2  .e3 +e6
17977    # the rule: at least one digit, and any 'e' must be preceded by a digit
17978    if ( $number !~ /\d+[eE]?/ ) {
17979        $number = undef;
17980        $type   = $input_type;
17981        return ( $i, $type, $number );
17982    }
17983
17984    # Found a number; now we must convert back from character position
17985    # to pre_token index. An error here implies user syntax error.
17986    # An example would be an invalid octal number like '009'.
17987    my $error;
17988    ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map );
17989    if ($error) { warning("Possibly invalid number\n") }
17990
17991    return ( $i, $type, $number );
17992}
17993
17994sub scan_bare_identifier_do {
17995
17996    # this routine is called to scan a token starting with an alphanumeric
17997    # variable or package separator, :: or '.
17998
17999    my ( $input_line, $i, $tok, $type, $prototype, $rtoken_map ) = @_;
18000    my $i_begin = $i;
18001    my $package = undef;
18002
18003    my $i_beg = $i;
18004
18005    # we have to back up one pretoken at a :: since each : is one pretoken
18006    if ( $tok eq '::' ) { $i_beg-- }
18007    if ( $tok eq '->' ) { $i_beg-- }
18008    my $pos_beg = $$rtoken_map[$i_beg];
18009    pos($input_line) = $pos_beg;
18010
18011    if ( $input_line =~ m/\G\s*((?:\w*(?:'|::)))*(?:->)?(\w+)/gc ) {
18012
18013        my $pos  = pos($input_line);
18014        my $numc = $pos - $pos_beg;
18015        $tok = substr( $input_line, $pos_beg, $numc );
18016
18017        # type 'w' includes anything without leading type info
18018        # ($,%,@,*) including something like abc::def::ghi
18019        $type = 'w';
18020
18021        if ( defined($1) ) {
18022            $package = $1;
18023            $package =~ s/\'/::/g;
18024            if ( $package =~ /^\:/ ) { $package = 'main' . $package }
18025            $package =~ s/::$//;
18026        }
18027        else {
18028            $package = $current_package;
18029
18030            if ( $is_keyword{$tok} ) {
18031                $type = 'k';
18032            }
18033        }
18034        my $sub_name = $2;
18035
18036        # if it is a bareword..
18037        if ( $type eq 'w' ) {
18038
18039            # check for v-string with leading 'v' type character
18040            # (This seems to have presidence over filehandle, type 'Y')
18041            if ( $tok =~ /^v\d+$/ ) {
18042
18043                # we only have the first part - something like 'v101' -
18044                # look for more
18045                if ( $input_line =~ m/\G(\.\d+)+/gc ) {
18046                    $pos  = pos($input_line);
18047                    $numc = $pos - $pos_beg;
18048                    $tok  = substr( $input_line, $pos_beg, $numc );
18049                }
18050                $type = 'v';
18051
18052                # warn if this version can't handle v-strings
18053                unless ($saw_v_string) { report_v_string($tok) }
18054            }
18055
18056            elsif ( $is_constant{$package}{$sub_name} ) {
18057                $type = 'C';
18058            }
18059
18060            # bareword after sort has implied empty prototype; for example:
18061            # @sorted = sort numerically ( 53, 29, 11, 32, 7 );
18062            # This has priority over whatever the user has specified.
18063            elsif ( $last_nonblank_token eq 'sort'
18064                && $last_nonblank_type eq 'k' )
18065            {
18066                $type = 'Z';
18067            }
18068
18069            # Note: strangely, perl does not seem to really let you create
18070            # functions which act like eval and do, in the sense that eval
18071            # and do may have operators following the final }, but any operators
18072            # that you create with prototype (&) apparently do not allow
18073            # trailing operators, only terms.  This seems strange.
18074            # If this ever changes, here is the update
18075            # to make perltidy behave accordingly:
18076
18077            # elsif ( $is_block_function{$package}{$tok} ) {
18078            #    $tok='eval'; # patch to do braces like eval  - doesn't work
18079            #    $type = 'k';
18080            #}
18081            # FIXME: This should become a separate type to allow for different
18082            # future behavior:
18083            elsif ( $is_block_function{$package}{$sub_name} ) {
18084                $type = 'G';
18085            }
18086
18087            elsif ( $is_block_list_function{$package}{$sub_name} ) {
18088                $type = 'G';
18089            }
18090            elsif ( $is_user_function{$package}{$sub_name} ) {
18091                $type      = 'U';
18092                $prototype = $user_function_prototype{$package}{$sub_name};
18093            }
18094
18095            # check for indirect object
18096            elsif (
18097
18098                # added 2001-03-27: must not be followed immediately by '('
18099                # see fhandle.t
18100                ( $input_line !~ m/\G\(/gc )
18101
18102                # and
18103                && (
18104
18105                    # preceded by keyword like 'print', 'printf' and friends
18106                    ( $last_nonblank_token =~ /$indirect_object_taker/ )
18107
18108                    # or preceded by something like 'print(' or 'printf('
18109                    || (
18110                        ( $last_nonblank_token eq '(' )
18111                        && ( $paren_type[$paren_depth] =~
18112                            /$indirect_object_taker/ )
18113                    )
18114                )
18115              )
18116            {
18117
18118                # may not be indirect object unless followed by a space
18119                if ( $input_line =~ m/\G\s+/gc ) {
18120                    $type = 'Y';
18121
18122                    # Abandon Hope ...
18123                    # Perl's indirect object notation is a very bad
18124                    # thing and can cause subtle bugs, especially for
18125                    # beginning programmers.  And I haven't even been
18126                    # able to figure out a sane warning scheme which
18127                    # doesn't get in the way of good scripts.
18128
18129                    # Complain if a filehandle has any lower case
18130                    # letters.  This is suggested good practice, but the
18131                    # main reason for this warning is that prior to
18132                    # release 20010328, perltidy incorrectly parsed a
18133                    # function call after a print/printf, with the
18134                    # result that a space got added before the opening
18135                    # paren, thereby converting the function name to a
18136                    # filehandle according to perl's weird rules.  This
18137                    # will not usually generate a syntax error, so this
18138                    # is a potentially serious bug.  By warning
18139                    # of filehandles with any lower case letters,
18140                    # followed by opening parens, we will help the user
18141                    # find almost all of these older errors.
18142                    # use 'sub_name' because something like
18143                    # main::MYHANDLE is ok for filehandle
18144                    if ( $sub_name =~ /[a-z]/ ) {
18145
18146                        # could be bug caused by older perltidy if
18147                        # followed by '('
18148                        if ( $input_line =~ m/\G\s*\(/gc ) {
18149                            complain(
18150"Caution: unknown word '$tok' in indirect object slot\n"
18151                            );
18152                        }
18153                    }
18154                }
18155
18156                # bareword not followed by a space -- may not be filehandle
18157                # (may be function call defined in a 'use' statement)
18158                else {
18159                    $type = 'Z';
18160                }
18161            }
18162        }
18163
18164        # Now we must convert back from character position
18165        # to pre_token index.
18166        # I don't think an error flag can occur here ..but who knows
18167        my $error;
18168        ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map );
18169        if ($error) {
18170            warning("scan_bare_identifier: Possibly invalid tokenization\n");
18171        }
18172    }
18173
18174    # no match but line not blank - could be syntax error
18175    # perl will take '::' alone without complaint
18176    else {
18177        $type = 'w';
18178
18179        # change this warning to log message if it becomes annoying
18180        warning("didn't find identifier after leading ::\n");
18181    }
18182    return ( $i, $tok, $type, $prototype );
18183}
18184
18185sub scan_id_do {
18186
18187=pod
18188
18189This is the new scanner and will eventually replace scan_identifier.
18190Only type 'sub' and 'package' are implemented.
18191Token types $ * % @ & -> are not yet implemented.
18192
18193Scan identifier following a type token.
18194The type of call depends on $id_scan_state: $id_scan_state = ''
18195for starting call, in which case $tok must be the token defining
18196the type.
18197
18198If the type token is the last nonblank token on the line, a value
18199of $id_scan_state = $tok is returned, indicating that further
18200calls must be made to get the identifier.  If the type token is
18201not the last nonblank token on the line, the identifier is
18202scanned and handled and a value of '' is returned.
18203
18204=cut
18205
18206    my ( $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state ) = @_;
18207    my $type = '';
18208    my ( $i_beg, $pos_beg );
18209
18210    #print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
18211    #my ($a,$b,$c) = caller;
18212    #print "NSCAN: scan_id called with tok=$tok $a $b $c\n";
18213
18214    # on re-entry, start scanning at first token on the line
18215    if ($id_scan_state) {
18216        $i_beg = $i;
18217        $type  = '';
18218    }
18219
18220    # on initial entry, start scanning just after type token
18221    else {
18222        $i_beg         = $i + 1;
18223        $id_scan_state = $tok;
18224        $type          = 't';
18225    }
18226
18227    # find $i_beg = index of next nonblank token,
18228    # and handle empty lines
18229    my $blank_line          = 0;
18230    my $next_nonblank_token = $$rtokens[$i_beg];
18231    if ( $i_beg > $max_token_index ) {
18232        $blank_line = 1;
18233    }
18234    else {
18235
18236        # only a '#' immediately after a '$' is not a comment
18237        if ( $next_nonblank_token eq '#' ) {
18238            unless ( $tok eq '$' ) {
18239                $blank_line = 1;
18240            }
18241        }
18242
18243        if ( $next_nonblank_token =~ /^\s/ ) {
18244            ( $next_nonblank_token, $i_beg ) =
18245              find_next_nonblank_token_on_this_line( $i_beg, $rtokens );
18246            if ( $next_nonblank_token =~ /(^#|^\s*$)/ ) {
18247                $blank_line = 1;
18248            }
18249        }
18250    }
18251
18252    # handle non-blank line; identifier, if any, must follow
18253    unless ($blank_line) {
18254
18255        if ( $id_scan_state eq 'sub' ) {
18256            ( $i, $tok, $type ) =
18257              do_scan_sub( $input_line, $i, $i_beg, $tok, $type, $rtokens,
18258                $rtoken_map );
18259        }
18260
18261        elsif ( $id_scan_state eq 'package' ) {
18262            ( $i, $tok, $type ) =
18263              do_scan_package( $input_line, $i, $i_beg, $tok, $type, $rtokens,
18264                $rtoken_map );
18265        }
18266
18267        else {
18268            warning("invalid token in scan_id: $tok\n");
18269        }
18270        $id_scan_state = '';
18271    }
18272
18273    if ( $id_scan_state && ( !defined($type) || !$type ) ) {
18274
18275        # shouldn't happen:
18276        warning(
18277"Program bug in scan_id: undefined type but scan_state=$id_scan_state\n"
18278        );
18279        report_definite_bug();
18280    }
18281
18282    TOKENIZER_DEBUG_FLAG_NSCAN && do {
18283        print
18284          "NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
18285    };
18286    return ( $i, $tok, $type, $id_scan_state );
18287}
18288
18289sub do_scan_sub {
18290
18291    # do_scan_sub parses a sub name and prototype
18292    # it is called with $i_beg equal to the index of the first nonblank
18293    # token following a 'sub' token.
18294
18295    # TODO: add future error checks to be sure we have a valid
18296    # sub name.  For example, 'sub &doit' is wrong.  Also, be sure
18297    # a name is given if and only if a non-anonymous sub is
18298    # appropriate.
18299
18300    my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map ) = @_;
18301    my $subname = undef;
18302    my $package = undef;
18303    my $proto   = undef;
18304    my $attrs   = undef;
18305
18306    my $pos_beg = $$rtoken_map[$i_beg];
18307    pos($input_line) = $pos_beg;
18308
18309    # sub NAME PROTO ATTRS BLOCK
18310    #if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*)(\w+)(\s*\([^){]*\))?/gc ) {
18311    if (
18312        $input_line =~ m/\G\s*
18313        ((?:\w*(?:'|::))*)  # package - something that ends in :: or '
18314        (\w+)               # NAME    - required
18315        (\s*\([^){]*\))?    # PROTO   - something in parens
18316        (\s*:(\s*(\w+))+)?  # ATTRS   - leading : followed by one or more words
18317        /gcx
18318      )
18319    {
18320        $subname = $2;
18321        $proto   = $3;
18322        $attrs   = $4;
18323
18324        if ($attrs) {
18325
18326            # unused for now
18327
18328        }
18329        $package = ( defined($1) && $1 ) ? $1 : $current_package;
18330        $package =~ s/\'/::/g;
18331        if ( $package =~ /^\:/ ) { $package = 'main' . $package }
18332        $package =~ s/::$//;
18333        my $pos  = pos($input_line);
18334        my $numc = $pos - $pos_beg;
18335        $tok = 'sub ' . substr( $input_line, $pos_beg, $numc );
18336        $type = 'i';
18337
18338        # We must convert back from character position
18339        # to pre_token index.
18340        # I don't think an error flag can occur here ..but ?
18341        my $error;
18342        ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map );
18343        if ($error) { warning("Possibly invalid sub\n") }
18344
18345        # TESTING: check for multiple definitions of a sub
18346        my ( $next_nonblank_token, $i_next ) =
18347          find_next_nonblank_token_on_this_line( $i, $rtokens );
18348
18349        if ( $next_nonblank_token =~ /^(\s*|#)$/ )
18350        {    # skip blank or side comment
18351            my ( $rpre_tokens, $rpre_types ) =
18352              peek_ahead_for_n_nonblank_pre_tokens(1);
18353            if ( defined($rpre_tokens) && @$rpre_tokens ) {
18354                $next_nonblank_token = $rpre_tokens->[0];
18355            }
18356            else {
18357                $next_nonblank_token = '}';
18358            }
18359        }
18360
18361        if ( $next_nonblank_token eq '{' ) {
18362            if ( $saw_function_definition{$package}{$subname} ) {
18363                my $lno = $saw_function_definition{$package}{$subname};
18364                warning(
18365"already saw definition of 'sub $subname' in package '$package' at line $lno\n"
18366                );
18367            }
18368            $saw_function_definition{$package}{$subname} = $input_line_number;
18369        }
18370        elsif ( $next_nonblank_token eq ';' ) {
18371        }
18372        elsif ( $next_nonblank_token eq '}' ) {
18373        }
18374        elsif ($next_nonblank_token) {    # EOF technically ok
18375            warning(
18376"expecting ';' or '{' after definition or declaration of sub $subname but saw ($next_nonblank_token)\n"
18377            );
18378
18379        }
18380
18381        if ( defined($proto) ) {
18382            $proto =~ s/^\s*\(\s*//;
18383            $proto =~ s/\s*\)$//;
18384            if ($proto) {
18385                $is_user_function{$package}{$subname}        = 1;
18386                $user_function_prototype{$package}{$subname} = "($proto)";
18387
18388                # prototypes containing '&' must be treated specially..
18389                if ( $proto =~ /\&/ ) {
18390
18391                    # right curly braces of prototypes ending in
18392                    # '&' may be followed by an operator
18393                    if ( $proto =~ /\&$/ ) {
18394                        $is_block_function{$package}{$subname} = 1;
18395                    }
18396
18397                    # right curly braces of prototypes NOT ending in
18398                    # '&' may NOT be followed by an operator
18399                    elsif ( $proto !~ /\&$/ ) {
18400                        $is_block_list_function{$package}{$subname} = 1;
18401                    }
18402                }
18403            }
18404            else {
18405                $is_constant{$package}{$subname} = 1;
18406            }
18407        }
18408        else {
18409            $is_user_function{$package}{$subname} = 1;
18410        }
18411
18412    }
18413
18414    # look for prototype following an anonymous sub so they don't get
18415    # stranded.  ( sub.t )
18416    #elsif ( $input_line =~ m/\G\s*\([^){]*\)/gc )
18417    # sub PROTO ATTRS BLOCK
18418    elsif (
18419        $input_line =~ m/\G(\s*\([^){]*\))?  # PROTO
18420      (\s*:(\s*(\w+))+)?    # ATTRS
18421      /gcx
18422        && ( $1 || $2 )
18423      )
18424    {
18425
18426        # remove this after testing
18427        if ($2) { write_diagnostics("Found anonymous sub ATTRS $2 \n"); }
18428        my $pos = pos($input_line);
18429        my $error;
18430        ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map );
18431        if ($error) { warning("Possibly invalid sub\n") }
18432    }
18433
18434    # no match but line not blank
18435    else {
18436    }
18437    return ( $i, $tok, $type );
18438}
18439
18440sub do_scan_package {
18441
18442    # do_scan_package parses a package name
18443    # it is called with $i_beg equal to the index of the first nonblank
18444    # token following a 'package' token.
18445
18446    my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map ) = @_;
18447    my $package = undef;
18448    my $pos_beg = $$rtoken_map[$i_beg];
18449    pos($input_line) = $pos_beg;
18450
18451    # handle non-blank line; package name, if any, must follow
18452    if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w+)/gc ) {
18453        $package = $1;
18454        $package = ( defined($1) && $1 ) ? $1 : 'main';
18455        $package =~ s/\'/::/g;
18456        if ( $package =~ /^\:/ ) { $package = 'main' . $package }
18457        $package =~ s/::$//;
18458        my $pos  = pos($input_line);
18459        my $numc = $pos - $pos_beg;
18460        $tok = 'package ' . substr( $input_line, $pos_beg, $numc );
18461        $type = 'i';
18462
18463        # Now we must convert back from character position
18464        # to pre_token index.
18465        # I don't think an error flag can occur here ..but ?
18466        my $error;
18467        ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map );
18468        if ($error) { warning("Possibly invalid package\n") }
18469        $current_package = $package;
18470
18471        # check for error
18472        my ( $next_nonblank_token, $i_next ) =
18473          find_next_nonblank_token( $i, $rtokens );
18474        if ( $next_nonblank_token !~ /^[;\}]$/ ) {
18475            warning(
18476                "Unexpected '$next_nonblank_token' after package name '$tok'\n"
18477            );
18478        }
18479    }
18480
18481    # no match but line not blank --
18482    # could be a label with name package, like package:  , for example.
18483    else {
18484        $type = 'k';
18485    }
18486
18487    return ( $i, $tok, $type );
18488}
18489
18490sub scan_identifier_do {
18491
18492=pod
18493
18494NOTE: This develomental scanner WILL BE REPLACED by the newer version
18495"scan_id".  The reason is that scan_id will be regex based, which makes
18496maintainence much easier, and probably improves the speed.
18497
18498This routine assembles tokens into identifiers.
18499It maintains a scan state, id_scan_state.  It updates
18500id_scan_state based upon current id_scan_state and token, and returns an
18501updated id_scan_state and the next index after the identifier.
18502
18503=cut
18504
18505    my ( $i, $id_scan_state, $identifier, $rtokens ) = @_;
18506    my $i_begin   = $i;
18507    my $type      = '';
18508    my $tok_begin = $$rtokens[$i_begin];
18509    if ( $tok_begin eq ':' ) { $tok_begin = '::' }
18510    my $id_scan_state_begin = $id_scan_state;
18511    my $identifier_begin    = $identifier;
18512    my $tok                 = $tok_begin;
18513    my $message             = "";
18514
18515    # these flags will be used to help figure out the type:
18516    my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ );
18517    my $saw_type;
18518
18519    # allow old package separator (') except in 'use' statement
18520    my $allow_tick = ( $last_nonblank_token ne 'use' );
18521
18522    # get started by defining a type and a state if necessary
18523    unless ($id_scan_state) {
18524        $context = UNKNOWN_CONTEXT;
18525
18526        # fixup for digraph
18527        if ( $tok eq '>' ) {
18528            $tok       = '->';
18529            $tok_begin = $tok;
18530        }
18531        $identifier = $tok;
18532
18533        if ( $tok eq '$' || $tok eq '*' ) {
18534            $id_scan_state = '$';
18535            $context       = SCALAR_CONTEXT;
18536        }
18537        elsif ( $tok eq '%' || $tok eq '@' ) {
18538            $id_scan_state = '$';
18539            $context       = LIST_CONTEXT;
18540        }
18541        elsif ( $tok eq '&' ) {
18542            $id_scan_state = '&';
18543        }
18544        elsif ( $tok eq 'sub' or $tok eq 'package' ) {
18545            $saw_alpha     = 0;     # 'sub' is considered type info here
18546            $id_scan_state = '$';
18547            $identifier .= ' ';     # need a space to separate sub from sub name
18548        }
18549        elsif ( $tok eq '::' ) {
18550            $id_scan_state = 'A';
18551        }
18552        elsif ( $tok =~ /^[A-Za-z_]/ ) {
18553            $id_scan_state = ':';
18554        }
18555        elsif ( $tok eq '->' ) {
18556            $id_scan_state = '$';
18557        }
18558        else {
18559
18560            # shouldn't happen
18561            my ( $a, $b, $c ) = caller;
18562            warning("Program Bug: scan_identifier given bad token = $tok \n");
18563            warning("   called from sub $a  line: $c\n");
18564            report_definite_bug();
18565        }
18566        $saw_type = !$saw_alpha;
18567    }
18568    else {
18569        $i--;
18570        $saw_type = ( $tok =~ /([\$\%\@\*\&])/ );
18571    }
18572
18573    # now loop to gather the identifier
18574    my $i_save = $i;
18575
18576    while ( $i < $max_token_index ) {
18577        $i_save = $i unless ( $tok =~ /^\s*$/ );
18578        $tok = $$rtokens[ ++$i ];
18579
18580        if ( ( $tok eq ':' ) && ( $$rtokens[ $i + 1 ] eq ':' ) ) {
18581            $tok = '::';
18582            $i++;
18583        }
18584
18585        if ( $id_scan_state eq '$' ) {    # starting variable name
18586
18587            if ( $tok eq '$' ) {
18588
18589                $identifier .= $tok;
18590
18591                # we've got a punctuation variable if end of line (punct.t)
18592                if ( $i == $max_token_index ) {
18593                    $type          = 'i';
18594                    $id_scan_state = '';
18595                    last;
18596                }
18597            }
18598            elsif ( $tok =~ /^[A-Za-z_]/ ) {    # alphanumeric ..
18599                $saw_alpha     = 1;
18600                $id_scan_state = ':';           # now need ::
18601                $identifier .= $tok;
18602            }
18603            elsif ( $tok eq "'" && $allow_tick ) {    # alphanumeric ..
18604                $saw_alpha     = 1;
18605                $id_scan_state = ':';                 # now need ::
18606                $identifier .= $tok;
18607
18608                # Perl will accept leading digits in identifiers,
18609                # although they may not always produce useful results.
18610                # Something like $main::0 is ok.  But this also works:
18611                #
18612                #  sub howdy::123::bubba{ print "bubba $54321!\n" }
18613                #  howdy::123::bubba();
18614                #
18615            }
18616            elsif ( $tok =~ /^[0-9]/ ) {              # numeric
18617                $saw_alpha     = 1;
18618                $id_scan_state = ':';                 # now need ::
18619                $identifier .= $tok;
18620            }
18621            elsif ( $tok eq '::' ) {
18622                $id_scan_state = 'A';
18623                $identifier .= $tok;
18624            }
18625            elsif ( ( $tok eq '#' ) && ( $identifier eq '$' ) ) {    # $#array
18626                $identifier .= $tok;    # keep same state, a $ could follow
18627            }
18628            elsif ( $tok eq '{' ) {     # skip something like ${xxx} or ->{
18629                $id_scan_state = '';
18630
18631                # if this is the first token of a line, any tokens for this
18632                # identifier have already been accumulated
18633                if ( $identifier eq '$' || $i == 0 ) { $identifier = ''; }
18634                $i = $i_save;
18635                last;
18636            }
18637
18638            # space ok after leading $ % * & @
18639            elsif ( $tok =~ /^\s*$/ ) {
18640
18641                if ( $identifier =~ /^[\$\%\*\&\@]/ ) {
18642
18643                    if ( length($identifier) > 1 ) {
18644                        $id_scan_state = '';
18645                        $i             = $i_save;
18646                        $type          = 'i';    # probably punctuation variable
18647                        last;
18648                    }
18649                    else {
18650
18651                        # spaces after $'s are common, and space after @
18652                        # is harmless, so only complain about space
18653                        # after other type characters. Space after $ and
18654                        # @ will be removed in formatting.  Report space
18655                        # after % and * because they might indicate a
18656                        # parsing error.  In other words '% ' might be a
18657                        # modulo operator.  Delete this warning if it
18658                        # gets annoying.
18659                        if ( $identifier !~ /^[\@\$]$/ ) {
18660                            $message =
18661                              "Space in identifier, following $identifier\n";
18662                        }
18663                    }
18664                }
18665
18666                # else:
18667                # space after '->' is ok
18668            }
18669            elsif ( $tok eq '^' ) {
18670
18671                # check for some special variables like $^W
18672                if ( $identifier =~ /^[\$\*\@\%]$/ ) {
18673                    $identifier .= $tok;
18674                    $id_scan_state = 'A';
18675                }
18676                else {
18677                    $id_scan_state = '';
18678                }
18679            }
18680            else {    # something else
18681
18682                # check for various punctuation variables
18683                if ( $identifier =~ /^[\$\*\@\%]$/ ) {
18684                    $identifier .= $tok;
18685                }
18686
18687                elsif ( $identifier eq '$#' ) {
18688
18689                    if ( $tok eq '{' ) { $type = 'i'; $i = $i_save }
18690
18691                    # perl seems to allow just these: $#: $#- $#+
18692                    elsif ( $tok =~ /^[\:\-\+]$/ ) {
18693                        $type = 'i';
18694                        $identifier .= $tok;
18695                    }
18696                    else {
18697                        $i = $i_save;
18698                        write_logfile_entry( 'Use of $# is deprecated' . "\n" );
18699                    }
18700                }
18701                elsif ( $identifier eq '$$' ) {
18702
18703                    # perl does not allow references to punctuation
18704                    # variables without braces.  For example, this
18705                    # won't work:
18706                    #  $:=\4;
18707                    #  $a = $$:;
18708                    # You would have to use
18709                    #  $a = ${$:};
18710
18711                    $i = $i_save;
18712                    if ( $tok eq '{' ) { $type = 't' }
18713                    else { $type = 'i' }
18714                }
18715                elsif ( $identifier eq '->' ) {
18716                    $i = $i_save;
18717                }
18718                else {
18719                    $i = $i_save;
18720                    if ( length($identifier) == 1 ) { $identifier = ''; }
18721                }
18722                $id_scan_state = '';
18723                last;
18724            }
18725        }
18726        elsif ( $id_scan_state eq '&' ) {    # starting sub call?
18727
18728            if ( $tok =~ /^[\$A-Za-z_]/ ) {    # alphanumeric ..
18729                $id_scan_state = ':';          # now need ::
18730                $saw_alpha     = 1;
18731                $identifier .= $tok;
18732            }
18733            elsif ( $tok eq "'" && $allow_tick ) {    # alphanumeric ..
18734                $id_scan_state = ':';                 # now need ::
18735                $saw_alpha     = 1;
18736                $identifier .= $tok;
18737            }
18738            elsif ( $tok =~ /^[0-9]/ ) {    # numeric..see comments above
18739                $id_scan_state = ':';       # now need ::
18740                $saw_alpha     = 1;
18741                $identifier .= $tok;
18742            }
18743            elsif ( $tok =~ /^\s*$/ ) {     # allow space
18744            }
18745            elsif ( $tok eq '::' ) {        # leading ::
18746                $id_scan_state = 'A';       # accept alpha next
18747                $identifier .= $tok;
18748            }
18749            elsif ( $tok eq '{' ) {
18750                if ( $identifier eq '&' || $i == 0 ) { $identifier = ''; }
18751                $i             = $i_save;
18752                $id_scan_state = '';
18753                last;
18754            }
18755            else {
18756
18757                # punctuation variable?
18758                # testfile: cunningham4.pl
18759                if ( $identifier eq '&' ) {
18760                    $identifier .= $tok;
18761                }
18762                else {
18763                    $identifier = '';
18764                    $i          = $i_save;
18765                    $type       = '&';
18766                }
18767                $id_scan_state = '';
18768                last;
18769            }
18770        }
18771        elsif ( $id_scan_state eq 'A' ) {    # looking for alpha (after ::)
18772
18773            if ( $tok =~ /^[A-Za-z_]/ ) {    # found it
18774                $identifier .= $tok;
18775                $id_scan_state = ':';        # now need ::
18776                $saw_alpha     = 1;
18777            }
18778            elsif ( $tok eq "'" && $allow_tick ) {
18779                $identifier .= $tok;
18780                $id_scan_state = ':';        # now need ::
18781                $saw_alpha     = 1;
18782            }
18783            elsif ( $tok =~ /^[0-9]/ ) {     # numeric..see comments above
18784                $identifier .= $tok;
18785                $id_scan_state = ':';        # now need ::
18786                $saw_alpha     = 1;
18787            }
18788            elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
18789                $id_scan_state = '(';
18790                $identifier .= $tok;
18791            }
18792            elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
18793                $id_scan_state = ')';
18794                $identifier .= $tok;
18795            }
18796            else {
18797                $id_scan_state = '';
18798                $i             = $i_save;
18799                last;
18800            }
18801        }
18802        elsif ( $id_scan_state eq ':' ) {    # looking for :: after alpha
18803
18804            if ( $tok eq '::' ) {            # got it
18805                $identifier .= $tok;
18806                $id_scan_state = 'A';        # now require alpha
18807            }
18808            elsif ( $tok =~ /^[A-Za-z_]/ ) {    # more alphanumeric is ok here
18809                $identifier .= $tok;
18810                $id_scan_state = ':';           # now need ::
18811                $saw_alpha     = 1;
18812            }
18813            elsif ( $tok =~ /^[0-9]/ ) {        # numeric..see comments above
18814                $identifier .= $tok;
18815                $id_scan_state = ':';           # now need ::
18816                $saw_alpha     = 1;
18817            }
18818            elsif ( $tok eq "'" && $allow_tick ) {    # tick
18819
18820                if ( $is_keyword{$identifier} ) {
18821                    $id_scan_state = '';              # that's all
18822                    $i             = $i_save;
18823                }
18824                else {
18825                    $identifier .= $tok;
18826                }
18827            }
18828            elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
18829                $id_scan_state = '(';
18830                $identifier .= $tok;
18831            }
18832            elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
18833                $id_scan_state = ')';
18834                $identifier .= $tok;
18835            }
18836            else {
18837                $id_scan_state = '';        # that's all
18838                $i             = $i_save;
18839                last;
18840            }
18841        }
18842        elsif ( $id_scan_state eq '(' ) {    # looking for ( of prototype
18843
18844            if ( $tok eq '(' ) {             # got it
18845                $identifier .= $tok;
18846                $id_scan_state = ')';        # now find the end of it
18847            }
18848            elsif ( $tok =~ /^\s*$/ ) {      # blank - keep going
18849                $identifier .= $tok;
18850            }
18851            else {
18852                $id_scan_state = '';         # that's all - no prototype
18853                $i             = $i_save;
18854                last;
18855            }
18856        }
18857        elsif ( $id_scan_state eq ')' ) {    # looking for ) to end
18858
18859            if ( $tok eq ')' ) {             # got it
18860                $identifier .= $tok;
18861                $id_scan_state = '';         # all done
18862                last;
18863            }
18864            elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) {
18865                $identifier .= $tok;
18866            }
18867            else {    # probable error in script, but keep going
18868                warning("Unexpected '$tok' while seeking end of prototype\n");
18869                $identifier .= $tok;
18870            }
18871        }
18872        else {    # can get here due to error in initialization
18873            $id_scan_state = '';
18874            $i             = $i_save;
18875            last;
18876        }
18877    }
18878
18879    if ( $id_scan_state eq ')' ) {
18880        warning("Hit end of line while seeking ) to end prototype\n");
18881    }
18882
18883    # once we enter the actual identifier, it may not extend beyond
18884    # the end of the current line
18885    if ( $id_scan_state =~ /^[A\:\(\)]/ ) {
18886        $id_scan_state = '';
18887    }
18888    if ( $i < 0 ) { $i = 0 }
18889
18890    unless ($type) {
18891
18892        if ($saw_type) {
18893
18894            if ($saw_alpha) {
18895                $type = 'i';
18896            }
18897            elsif ( $identifier eq '->' ) {
18898                $type = '->';
18899            }
18900            elsif ( ( length($identifier) > 1 )
18901                && ( $identifier !~ /\$$/ )
18902                && ( $identifier !~ /^(sub |package )$/ ) )
18903            {
18904                $type = 'i';
18905            }
18906            else { $type = 't' }
18907        }
18908        elsif ($saw_alpha) {
18909
18910            # type 'w' includes anything without leading type info
18911            # ($,%,@,*) including something like abc::def::ghi
18912            $type = 'w';
18913        }
18914        else {
18915            $type = '';
18916        }    # this can happen on a restart
18917    }
18918
18919    if ($identifier) {
18920        $tok = $identifier;
18921        if ($message) { write_logfile_entry($message) }
18922    }
18923    else {
18924        $tok = $tok_begin;
18925        $i   = $i_begin;
18926    }
18927
18928    TOKENIZER_DEBUG_FLAG_SCAN_ID && do {
18929        my ( $a, $b, $c ) = caller;
18930        print
18931"SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n";
18932        print
18933"SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n";
18934    };
18935    return ( $i, $tok, $type, $id_scan_state, $identifier );
18936}
18937
18938sub follow_quoted_string {
18939
18940    # scan for a specific token, skipping escaped characters
18941    # if the quote character is blank, use the first non-blank character
18942    # input parameters:
18943    #   $rtokens = reference to the array of tokens
18944    #   $i = the token index of the first character to search
18945    #   $in_quote = number of quoted strings being followed
18946    #   $beginning_tok = the starting quote character
18947    #   $quote_pos = index to check next for alphanumeric delimiter
18948    # output parameters:
18949    #   $i = the token index of the ending quote character
18950    #   $in_quote = decremented if found end, unchanged if not
18951    #   $beginning_tok = the starting quote character
18952    #   $quote_pos = index to check next for alphanumeric delimiter
18953    #   $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested.
18954    my ( $i_beg, $in_quote, $rtokens, $beginning_tok, $quote_pos, $quote_depth )
18955      = @_;
18956    my ( $tok, $end_tok );
18957    my $i = $i_beg - 1;
18958
18959    TOKENIZER_DEBUG_FLAG_QUOTE && do {
18960        print
18961"QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n";
18962    };
18963
18964    # get the corresponding end token
18965    if ( $beginning_tok !~ /^\s*$/ ) {
18966        $end_tok = matching_end_token($beginning_tok);
18967    }
18968
18969    # a blank token means we must find and use the first non-blank one
18970    else {
18971        my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a <cr>
18972
18973        while ( $i < $max_token_index ) {
18974            $tok = $$rtokens[ ++$i ];
18975
18976            if ( $tok !~ /^\s*$/ ) {
18977
18978                if ( ( $tok eq '#' ) && ($allow_quote_comments) ) {
18979                    $i = $max_token_index;
18980                }
18981                else {
18982
18983                    if ( length($tok) > 1 ) {
18984                        if ( $quote_pos <= 0 ) { $quote_pos = 1 }
18985                        $beginning_tok = substr( $tok, $quote_pos - 1, 1 );
18986                    }
18987                    else {
18988                        $beginning_tok = $tok;
18989                        $quote_pos     = 0;
18990                    }
18991                    $end_tok     = matching_end_token($beginning_tok);
18992                    $quote_depth = 1;
18993                    last;
18994                }
18995            }
18996            else {
18997                $allow_quote_comments = 1;
18998            }
18999        }
19000    }
19001
19002    # There are two different loops which search for the ending quote
19003    # character.  In the rare case of an alphanumeric quote delimiter, we
19004    # have to look through alphanumeric tokens character-by-character, since
19005    # the pre-tokenization process combines multiple alphanumeric
19006    # characters, whereas for a non-alphanumeric delimiter, only tokens of
19007    # length 1 can match.
19008
19009    # loop for case of alphanumeric quote delimiter..
19010    # "quote_pos" is the position the current word to begin searching
19011    if ( $beginning_tok =~ /\w/ ) {
19012
19013        # Note this because it is not recommended practice except
19014        # for obfuscated perl contests
19015        if ( $in_quote == 1 ) {
19016            write_logfile_entry(
19017                "Note: alphanumeric quote delimiter ($beginning_tok) \n");
19018        }
19019
19020        while ( $i < $max_token_index ) {
19021
19022            if ( $quote_pos == 0 || ( $i < 0 ) ) {
19023                $tok = $$rtokens[ ++$i ];
19024
19025                if ( $tok eq '\\' ) {
19026
19027                    $quote_pos++;
19028                    last if ( $i >= $max_token_index );
19029                    $tok = $$rtokens[ ++$i ];
19030
19031                }
19032            }
19033            my $old_pos = $quote_pos;
19034
19035            unless ( defined($tok) && defined($end_tok) && defined($quote_pos) )
19036            {
19037
19038            }
19039            $quote_pos = 1 + index( $tok, $end_tok, $quote_pos );
19040
19041            if ( $quote_pos > 0 ) {
19042
19043                $quote_depth--;
19044
19045                if ( $quote_depth == 0 ) {
19046                    $in_quote--;
19047                    last;
19048                }
19049            }
19050        }
19051
19052        # loop for case of a non-alphanumeric quote delimiter..
19053    }
19054    else {
19055
19056        while ( $i < $max_token_index ) {
19057            $tok = $$rtokens[ ++$i ];
19058
19059            if ( $tok eq $end_tok ) {
19060                $quote_depth--;
19061
19062                if ( $quote_depth == 0 ) {
19063                    $in_quote--;
19064                    last;
19065                }
19066            }
19067            elsif ( $tok eq $beginning_tok ) {
19068                $quote_depth++;
19069            }
19070            elsif ( $tok eq '\\' ) {
19071                $i++;
19072            }
19073        }
19074    }
19075    if ( $i > $max_token_index ) { $i = $max_token_index }
19076    return ( $i, $in_quote, $beginning_tok, $quote_pos, $quote_depth );
19077}
19078
19079sub matching_end_token {
19080
19081    # find closing character for a pattern
19082    my $beginning_token = shift;
19083
19084    if ( $beginning_token eq '{' ) {
19085        '}';
19086    }
19087    elsif ( $beginning_token eq '[' ) {
19088        ']';
19089    }
19090    elsif ( $beginning_token eq '<' ) {
19091        '>';
19092    }
19093    elsif ( $beginning_token eq '(' ) {
19094        ')';
19095    }
19096    else {
19097        $beginning_token;
19098    }
19099}
19100
19101BEGIN {
19102
19103    # These names are used in error messages
19104    @opening_brace_names = qw# '{' '[' '(' '?' #;
19105    @closing_brace_names = qw# '}' ']' ')' ':' #;
19106
19107    my @digraphs = qw(
19108      .. :: << >> ** && .. ||  -> => += -= .= %= &= |= ^= *= <>
19109      <= >= == =~ !~ != ++ -- /= x=
19110    );
19111    @is_digraph{@digraphs} = (1) x scalar(@digraphs);
19112
19113    my @trigraphs = qw( ... **= <<= >>= &&= ||= <=> );
19114    @is_trigraph{@trigraphs} = (1) x scalar(@trigraphs);
19115
19116    # make a hash of all valid token types for self-checking the tokenizer
19117    # (adding NEW_TOKENS : select a new character and add to this list)
19118    my @valid_token_types = qw#
19119      b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v
19120      { } ( ) [ ] ; + - / * | % ! x ~ = \ ? : . < > ^ &
19121      #;
19122    push ( @valid_token_types, @digraphs );
19123    push ( @valid_token_types, @trigraphs );
19124    push ( @valid_token_types, '#' );
19125    push ( @valid_token_types, ',' );
19126    @is_valid_token_type{@valid_token_types} = (1) x scalar(@valid_token_types);
19127
19128    # a list of file test letters, as in -e (Table 3-4 of 'camel 3')
19129    my @file_test_operators =
19130      qw( A B C M O R S T W X b c d e f g k l o p r s t u w x z);
19131    @is_file_test_operator{@file_test_operators} =
19132      (1) x scalar(@file_test_operators);
19133
19134    # these functions have prototypes of the form (&@), so when they are
19135    # followed by a block, that block MAY NOT be followed by an
19136    # operator.
19137    $block_list_operator = make_regex('^(sort|grep|map)$');
19138
19139    # these functions have prototypes of the form (&), so when they are
19140    # followed by a block, that block MAY BE followed by an operator.
19141    $block_operator = make_regex('^(do|eval)$');
19142
19143    # these functions allow an identifier in the indirect object slot
19144    $indirect_object_taker = make_regex('^(print|printf|sort|exec|system)$');
19145
19146    # I'll build the list of keywords incrementally
19147    my @Keywords = ();
19148
19149    # keywords and tokens after which a value or pattern is expected,
19150    # but not an operator.  In other words, these should consume terms
19151    # to their right, or at least they are not expected to be followed
19152    # immediately by operators.
19153    # --added srand 20-mar-01
19154    my @value_requestor = qw(
19155      AUTOLOAD
19156      BEGIN
19157      CHECK
19158      DESTROY
19159      END
19160      EQ
19161      GE
19162      GT
19163      INIT
19164      LE
19165      LT
19166      NE
19167      abs
19168      accept
19169      alarm
19170      and
19171      atan2
19172      bind
19173      binmode
19174      bless
19175      caller
19176      chdir
19177      chmod
19178      chomp
19179      chop
19180      chown
19181      chr
19182      chroot
19183      close
19184      closedir
19185      cmp
19186      connect
19187      continue
19188      cos
19189      crypt
19190      dbmclose
19191      dbmopen
19192      defined
19193      delete
19194      die
19195      dump
19196      each
19197      else
19198      elsif
19199      eof
19200      eq
19201      exec
19202      exists
19203      exit
19204      exp
19205      fcntl
19206      fileno
19207      flock
19208      for
19209      foreach
19210      formline
19211      ge
19212      getc
19213      getgrgid
19214      getgrnam
19215      gethostbyaddr
19216      gethostbyname
19217      getnetbyaddr
19218      getnetbyname
19219      getpeername
19220      getpgrp
19221      getpriority
19222      getprotobyname
19223      getprotobynumber
19224      getpwnam
19225      getpwuid
19226      getservbyname
19227      getservbyport
19228      getsockname
19229      getsockopt
19230      glob
19231      gmtime
19232      goto
19233      grep
19234      gt
19235      hex
19236      if
19237      index
19238      int
19239      ioctl
19240      join
19241      keys
19242      kill
19243      last
19244      lc
19245      lcfirst
19246      le
19247      length
19248      link
19249      listen
19250      local
19251      localtime
19252      lock
19253      log
19254      lstat
19255      lt
19256      map
19257      mkdir
19258      msgctl
19259      msgget
19260      msgrcv
19261      msgsnd
19262      my
19263      ne
19264      next
19265      no
19266      not
19267      oct
19268      open
19269      opendir
19270      or
19271      ord
19272      our
19273      pack
19274      pipe
19275      pop
19276      pos
19277      print
19278      printf
19279      prototype
19280      push
19281      quotemeta
19282      rand
19283      read
19284      readdir
19285      readlink
19286      readline
19287      readpipe
19288      recv
19289      redo
19290      ref
19291      rename
19292      require
19293      reset
19294      return
19295      reverse
19296      rewinddir
19297      rindex
19298      rmdir
19299      scalar
19300      seek
19301      seekdir
19302      select
19303      semctl
19304      semget
19305      semop
19306      send
19307      sethostent
19308      setnetent
19309      setpgrp
19310      setpriority
19311      setprotoent
19312      setservent
19313      setsockopt
19314      shift
19315      shmctl
19316      shmget
19317      shmread
19318      shmwrite
19319      shutdown
19320      sin
19321      sleep
19322      socket
19323      socketpair
19324      sort
19325      splice
19326      split
19327      sprintf
19328      sqrt
19329      srand
19330      stat
19331      study
19332      substr
19333      symlink
19334      syscall
19335      sysopen
19336      sysread
19337      sysseek
19338      system
19339      syswrite
19340      tell
19341      telldir
19342      tie
19343      tied
19344      truncate
19345      uc
19346      ucfirst
19347      umask
19348      undef
19349      unless
19350      unlink
19351      unpack
19352      unshift
19353      untie
19354      until
19355      use
19356      utime
19357      values
19358      vec
19359      waitpid
19360      warn
19361      while
19362      write
19363      xor
19364    );
19365
19366    push ( @Keywords, @value_requestor );
19367
19368    # These are treated the same but are not keywords:
19369    my @extra_vr = qw(
19370      constant
19371      switch
19372      vars
19373    );
19374    push ( @value_requestor, @extra_vr );
19375
19376    @expecting_term_token{@value_requestor} = (1) x scalar(@value_requestor);
19377
19378    # this list contains keywords which do not look for arguments,
19379    # so that they might be followed by an operator, or at least
19380    # not a term.
19381    my @operator_requestor = qw(
19382      endgrent
19383      endhostent
19384      endnetent
19385      endprotoent
19386      endpwent
19387      endservent
19388      fork
19389      getgrent
19390      gethostent
19391      getlogin
19392      getnetent
19393      getppid
19394      getprotoent
19395      getpwent
19396      getservent
19397      setgrent
19398      setpwent
19399      time
19400      times
19401      wait
19402      wantarray
19403    );
19404
19405    push ( @Keywords, @operator_requestor );
19406
19407    # These are treated the same but are not considered keywords:
19408    my @extra_or = qw(
19409      STDERR
19410      STDIN
19411      STDOUT
19412    );
19413
19414    push ( @operator_requestor, @extra_or );
19415
19416    @expecting_operator_token{@operator_requestor} =
19417      (1) x scalar(@operator_requestor);
19418
19419    # these token TYPES expect trailing operator but not a term
19420    # note: ++ and -- are post-increment and decrement, 'C' = constant
19421    my @operator_requestor_types = qw( ++ -- C );
19422    @expecting_operator_types{@operator_requestor_types} =
19423      (1) x scalar(@operator_requestor_types);
19424
19425    # these token TYPES consume values (terms)
19426    # note: pp and mm are pre-increment and decrement
19427    # f=semicolon in for,  F=file test operator
19428    my @value_requestor_type = qw#
19429      L { ( [ ~ !~ =~ ; . .. ...  : && ! || = + - x
19430      **= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||=
19431      <= >= == != => \ > < % * / ? & | ** <=>
19432      f F pp mm Y p m U J G
19433      #;
19434    push ( @value_requestor_type, ',' )
19435      ;    # (perl doesn't like a ',' in a qw block)
19436    @expecting_term_types{@value_requestor_type} =
19437      (1) x scalar(@value_requestor_type);
19438
19439    # For simple syntax checking, it is nice to have a list of operators which
19440    # will really be unhappy if not followed by a term.  This includes most
19441    # of the above...
19442    %really_want_term = %expecting_term_types;
19443
19444    # with these exceptions...
19445    delete $really_want_term{'U'}; # user sub, depends on prototype
19446    delete $really_want_term{'F'}; # file test works on $_ if no following term
19447    delete $really_want_term{'Y'}; # indirect object, too risky to check syntax;
19448                                   # let perl do it
19449
19450    # These keywords are handled specially in the tokenizer code:
19451    my @special_keywords = qw(
19452      do
19453      eval
19454      format
19455      m
19456      package
19457      q
19458      qq
19459      qr
19460      qw
19461      qx
19462      s
19463      sub
19464      tr
19465      y
19466    );
19467    push ( @Keywords, @special_keywords );
19468
19469    # Keywords after which list formatting may be used
19470    # WARNING: do not include |map|grep|eval or perl may die on
19471    # syntax errors (map1.t).
19472    my @keyword_taking_list = qw(
19473      and
19474      chmod
19475      chomp
19476      chop
19477      chown
19478      dbmopen
19479      die
19480      elsif
19481      exec
19482      fcntl
19483      for
19484      foreach
19485      formline
19486      getsockopt
19487      if
19488      index
19489      ioctl
19490      join
19491      kill
19492      local
19493      msgctl
19494      msgrcv
19495      msgsnd
19496      my
19497      open
19498      or
19499      our
19500      pack
19501      print
19502      printf
19503      push
19504      read
19505      readpipe
19506      recv
19507      return
19508      reverse
19509      rindex
19510      seek
19511      select
19512      semctl
19513      semget
19514      send
19515      setpriority
19516      setsockopt
19517      shmctl
19518      shmget
19519      shmread
19520      shmwrite
19521      socket
19522      socketpair
19523      sort
19524      splice
19525      split
19526      sprintf
19527      substr
19528      syscall
19529      sysopen
19530      sysread
19531      sysseek
19532      system
19533      syswrite
19534      tie
19535      unless
19536      unlink
19537      unpack
19538      unshift
19539      until
19540      vec
19541      warn
19542      while
19543    );
19544    @is_keyword_taking_list{@keyword_taking_list} =
19545      (1) x scalar(@keyword_taking_list);
19546
19547    # These are not used in any way yet
19548    #    my @unused_keywords = qw(
19549    #      CORE
19550    #     __FILE__
19551    #     __LINE__
19552    #     __PACKAGE__
19553    #     );
19554
19555=pod
19556
19557     The list of keywords was extracted from function 'keyword' in perl file
19558     toke.c version 5.005.03, using this utility, plus a little editing:
19559     (file getkwd.pl):
19560     while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } }
19561     Add 'get' prefix where necessary, then split into the above lists.
19562
19563     This list should be updated as necessary.
19564     The list should not contain these special variables:
19565     ARGV DATA ENV SIG STDERR STDIN STDOUT
19566     __DATA__ __END__
19567
19568=cut
19569
19570    @is_keyword{@Keywords} = (1) x scalar(@Keywords);
19571}
195721;
19573
19574package main;
19575PerlTidy::perltidy();
19576