1###############################################################################
2#
3# LaTeX::TOM::Parser
4#
5# The parsing class
6#
7###############################################################################
8
9package LaTeX::TOM::Parser;
10
11use strict;
12use base qw(
13    LaTeX::TOM::Node
14    LaTeX::TOM::Tree
15);
16use constant true  => 1;
17use constant false => 0;
18
19use Carp qw(carp croak);
20use File::Basename qw(fileparse);
21
22our $VERSION = '0.07';
23
24my %error_handlers = (
25    0 => sub { warn "parse error: $_[0].\n" },
26    1 => sub { die  "parse error: $_[0].\n" },
27    2 => sub {},
28);
29
30# Constructor
31#
32sub new {
33    my $class = shift;
34
35    no strict 'refs';
36
37    my $self = bless {
38        config => {
39            BRACELESS          => \%{"${class}::BRACELESS"},
40            INNERCMDS          => \%{"${class}::INNERCMDS"},
41            MATHENVS           => \%{"${class}::MATHENVS"},
42            MATHBRACKETS       => \%{"${class}::MATHBRACKETS"},
43            PARSE_ERRORS_FATAL =>  ${"${class}::PARSE_ERRORS_FATAL"},
44            TEXTENVS           => \%{"${class}::TEXTENVS"},
45        },
46    };
47
48    $self->_init(@_);
49
50    return $self;
51}
52
53# Set/reset "globals"
54#
55sub _init {
56    my $parser = shift;
57    my ($parse_errors_fatal, $readinputs, $applymappings) = @_;
58
59    my $retrieve_opt_default = sub
60    {
61        my ($opt, $default) = @_;
62        return $opt if defined $opt;
63        return $default;
64    };
65
66    # set user options
67    #
68    $parser->{readinputs}         = $retrieve_opt_default->($readinputs, 0);
69    $parser->{applymappings}      = $retrieve_opt_default->($applymappings, 0);
70    $parser->{PARSE_ERRORS_FATAL} = $retrieve_opt_default->($parse_errors_fatal, $parser->{config}{PARSE_ERRORS_FATAL});
71
72    # init internal stuff
73    #
74    $parser->{MATHBRACKETS} = $parser->{config}{MATHBRACKETS};
75
76    # this will hold a running list/hash of commands that have been remapped
77    $parser->{MAPPEDCMDS} = {};
78
79    # this will hold a running list/hash of commands that have been used. We dont
80    # bother apply mappings except to commands that have been used.
81    $parser->{USED_COMMANDS} = {};
82
83    # no file yet
84    $parser->{file} = undef;
85}
86
87# Parse a LaTeX file, return a tree. You probably want this method.
88#
89sub parseFile {
90    my $parser = shift;
91    my $filename = shift;
92
93    # init variables
94    #
95    $parser->{file} = $filename;        # file name member data
96    my $tree = {};                      # init output tree
97
98    # read in text from file or bomb out
99    #
100    my $text = _readFile($filename, true);
101
102    # do the parse
103    #
104    $tree = $parser->parse($text);
105
106    return $tree;
107}
108
109# main parsing entrypoint
110#
111sub parse {
112    my $parser = shift;
113    my ($text) = @_;
114
115    # first half of parsing (goes up to finding commands, reading inputs)
116    #
117    my ($tree, $bracehash) = $parser->_parseA($text);
118    _debug(
119        'done with _parseA',
120         sub { $tree->_warn() },
121    );
122
123    # handle mappings
124    #
125    $parser->_applyMappings($tree) if $parser->{applymappings};
126    _debug(
127        'done with _applyMappings',
128         sub { $tree->_warn() },
129    );
130
131    # second half of parsing (environments)
132    #
133    $parser->_parseB($tree);
134    _debug(
135        'done with _parseB',
136         sub { $tree->_warn() },
137    );
138
139    # once all the above is done we can propegate math/plaintext modes down
140    #
141    $parser->_propegateModes($tree, 0, 0);   # math = 0, plaintext = 0
142    _debug(
143        'done with _propegateModes',
144         sub { $tree->_warn() },
145    );
146
147    # handle kooky \[ \] math mode
148    #
149    if (not exists $parser->{MAPPEDCMDS}->{'\\['}) {
150        # math mode (\[ \], \( \))
151        $parser->_stage5($tree, {'\\[' => '\\]', '\\(' => '\\)'}, 1);
152        $parser->_propegateModes($tree, 0, 0);     # have to do this again of course
153        $parser->{MATHBRACKETS}->{'\\['} = '\\]';  # put back in brackets list for
154        $parser->{MATHBRACKETS}->{'\\('} = '\\)';  # printing purposes.
155    }
156    _debug(
157        undef,
158        sub { $tree->_warn() },
159    );
160
161    $tree->listify;     # add linked-list stuff
162
163    return $tree;
164}
165
166# Parsing with no mappings and no externally accessible parser object.
167#
168sub _basicparse {
169    my $parser = shift; # @_ would break code
170    my $text   = shift;
171
172    my $parse_errors_fatal = (defined $_[0] ? $_[0] : $parser->{config}{PARSE_ERRORS_FATAL});
173    my $readinputs = (defined $_[1] ? $_[1] : 1);
174
175    $parser = LaTeX::TOM::Parser->new($parse_errors_fatal, $readinputs);
176    my ($tree, $bracehash) = $parser->_parseA($text);
177
178    $parser->_parseB($tree);
179
180    $tree->listify; # add linked-list stuff
181
182    return ($tree, $bracehash);
183}
184
185# start the tree. separate out comment and text nodes.
186#
187sub _stage1 {
188    my $parser = shift;
189    my $text = shift;
190
191    my @nodes = _getTextAndCommentNodes($text, 0, length($text));
192
193    return LaTeX::TOM::Tree->new([@nodes]);
194}
195
196# this stage parses the braces ({}) and adds the corresponding structure to
197# the tree.
198#
199sub _stage2 {
200    my $parser = shift;
201
202    my $tree = shift;
203    my $bracehash = shift || undef;
204    my $startidx = shift || 0;      # last two params for starting at some specific
205    my $startpos = shift || 0;      # node and offset.
206
207    my %blankhash;
208
209    if (not defined $bracehash) {
210        $bracehash = {%blankhash};
211    }
212
213    my $leftidx = -1;
214    my $leftpos = -1;
215    my $leftcount = 0;
216
217    # loop through the nodes
218    for (my $i = $startidx; $i < @{$tree->{nodes}}; $i++) {
219        my $node = $tree->{nodes}[$i];
220        my $spos = $node->{start};	# get text start position
221
222        # set position placeholder within the text block
223        my $pos = ($i == $startidx) ? $startpos : 0;
224
225        if ($node->{type} eq 'TEXT') {
226
227         _debug("parseStage2: looking at text node: [$node->{content}]", undef);
228
229         my ($nextpos, $brace) = _findbrace($node->{content}, $pos);
230         while ($nextpos != -1) {
231
232            $pos = $nextpos + 1; # update position pointer
233
234            # handle left brace
235            if ($brace eq '{') {
236                _debug("found '{' at position $nextpos, leftcount is $leftcount", undef);
237                if ($leftcount == 0) {
238                    $leftpos = $nextpos;
239                    $leftidx = $i
240                }
241                $leftcount++;
242            }
243
244            # handle right brance
245            elsif ($brace eq '}') {
246
247                _debug("found '}' at position $nextpos, leftcount is $leftcount", undef);
248                my $rightpos = $nextpos;
249                $leftcount--;
250
251                # found the corresponding right brace to our starting left brace
252                if ($leftcount == 0) {
253
254                    # see if we have to split the text node into 3 parts
255                    #
256                    if ($leftidx == $i) {
257
258                        my ($leftside, $textnode3) = $node->split($rightpos, $rightpos);
259                        my ($textnode1, $textnode2) = $leftside->split($leftpos, $leftpos);
260
261                        # make the new GROUP node
262                        my $groupnode = LaTeX::TOM::Node->new(
263                            {type => 'GROUP',
264                             start => $textnode2->{start} - 1,
265                             end => $textnode2->{end} + 1,
266                             children => LaTeX::TOM::Tree->new([$textnode2]),
267                            });
268
269                        # splice the new subtree into the old location
270                        splice @{$tree->{nodes}}, $i, 1, $textnode1, $groupnode, $textnode3;
271
272                        # add to the brace-pair lookup table
273                        $bracehash->{$groupnode->{start}} = $groupnode->{end};
274                        $bracehash->{$groupnode->{end}} = $groupnode->{start};
275
276                        # recur into new child node
277                        $parser->_stage2($groupnode->{children}, $bracehash);
278
279                        $i++; # skip to textnode3 for further processing
280                    }
281
282                    # split across nodes
283                    #
284                    else {
285
286                        my ($textnode1, $textnode2) = $tree->{nodes}[$leftidx]->split($leftpos, $leftpos);
287                        my ($textnode3, $textnode4) = $node->split($rightpos, $rightpos);
288
289                        # remove nodes in between the node we found '{' in and the node
290                        # we found '}' in
291                        #
292                        my @removed = splice @{$tree->{nodes}}, $leftidx+1, $i-$leftidx-1;
293
294                        # create a group node that contains the text after the left brace,
295                        # then all the nodes up until the next text node, then the text
296                        # before the right brace.
297                        #
298                        my $groupnode = LaTeX::TOM::Node->new(
299                            {type => 'GROUP',
300                             start => $textnode2->{start} - 1,
301                             end => $textnode3->{end} + 1,
302                             children => LaTeX::TOM::Tree->new(
303                                [$textnode2,
304                                 @removed,
305                                 $textnode3]),
306                            });
307
308                        # replace the two original text nodes with the leftover left and
309                        # right portions, as well as the group node with everything in
310                        # the middle.
311                        #
312                        splice @{$tree->{nodes}}, $leftidx, 2, $textnode1, $groupnode, $textnode4;
313
314                        # add to the brace-pair lookup table
315                        $bracehash->{$groupnode->{start}} = $groupnode->{end};
316                        $bracehash->{$groupnode->{end}} = $groupnode->{start};
317
318                        # recur into new child nodes
319                        $parser->_stage2($groupnode->{children}, $bracehash);
320
321                        # step back to textnode4 on this level for further processing
322                        $i -= scalar @removed;
323                    }
324
325                    $leftpos = -1; # reset left data
326                    $leftidx = -1;
327                    last;
328                } # $leftcount == 0
329
330                # check for '}'-based error
331                #
332                if ($leftcount < 0) {
333                    $error_handlers{$parser->{PARSE_ERRORS_FATAL}}->("'}' before '{' at " . ($spos + $rightpos));
334                    $leftcount = 0; # reset and continue
335                }
336            } # right brace
337
338            ($nextpos, $brace) = _findbrace($node->{content}, $pos);
339
340         } # while (braces left)
341
342         } # if TEXT
343
344    } # loop over all nodes
345
346    # check for extra '{' parse error
347    #
348    if ($leftcount > 0) {
349        my $spos = $tree->{nodes}[$leftidx]->{start}; # get text start position
350        $error_handlers{$parser->{PARSE_ERRORS_FATAL}}->("unmatched '{' at " . ($spos + $leftpos));
351
352        # try to continue on, after the offending brace
353        $parser->_stage2($tree, $bracehash, $leftidx, $leftpos + 1);
354    }
355
356    return $bracehash;
357}
358
359# this stage finds LaTeX commands and accordingly turns GROUP nodes into
360# command nodes, labeled with the command
361#
362sub _stage3 {
363    my $parser = shift;
364
365    my $tree = shift;
366    my $parent = shift;
367
368    for (my $i = 0; $i< @{$tree->{nodes}}; $i++) {
369
370        my $node = $tree->{nodes}[$i];
371
372        # check text node for command tag
373        if ($node->{type} eq 'TEXT') {
374            my $text = $node->{content};
375
376            # inner command (such as {\command text text}). our regexp checks to see
377            # if this text chunk begins with \command, since that would be the case
378            # due to the previous parsing stages. if found, the parent node is
379            # promoted to a command.
380            #
381            if ($text =~ /^\s*\\(\w+\*?)/ && defined $parent && $parser->{config}{INNERCMDS}->{$1}) {
382                my $command = $1;
383
384                # if the parent is already a command node, we have to make a new
385                # nested command node
386                #
387                if ($parent->{type} eq 'COMMAND') {
388
389                    # make a new command node
390                    my $newnode = LaTeX::TOM::Node->new(
391                        {type => 'COMMAND',
392                         command => $command,
393                         start => $parent->{start},
394                         end => $parent->{end},
395                         position => 'inner',
396                         children => $parent->{children} });
397
398                    # point parent to it
399                    $parent->{children} = LaTeX::TOM::Tree->new([$newnode]);
400
401                    # start over at this level (get additional inner commands)
402                    $parent = $newnode;
403                    $i = -1;
404
405                    $parser->{USED_COMMANDS}->{$newnode->{command}} = 1;
406                }
407
408                # parent is a naked group, we can make it into a command node
409                #
410                elsif ($parent->{type} eq 'GROUP') {
411                    $parent->{type} = 'COMMAND';
412                    $parent->{command} = $command;
413                    $parent->{position} = 'inner';
414
415                    # start over at this level
416                    $i = -1;
417
418                    $parser->{USED_COMMANDS}->{$parent->{command}} = 1;
419                }
420
421                $node->{content} =~ s/^\s*\\(?:\w+\*?)//o;
422            }
423
424            # outer command (such as \command{parameters}). our regexp checks to
425            # see if this text chunk ends in \command, since that would be the case
426            # due to the previous parsing stages.
427            #
428            if ($text =~ /(?:^|[^\\])(\\\w+\*?(\s*\[.*?\])?)\s*$/os &&
429                    defined $tree->{nodes}[$i+1] &&
430                    $tree->{nodes}[$i+1]->{type} eq 'GROUP') {
431
432                my $tag = $1;
433
434                _debug("found text node [$text] with command tag [$tag]", undef);
435
436                # remove the text
437                $node->{content} =~ s/\\\w+\*?\s*(?:\[.*?\])?\s*$//os;
438
439                # parse it for command and ops
440                $tag =~ /^\\(\w+\*?)\s*(?:\[(.*?)\])?$/os;
441
442                my $command = $1;
443                my $opts = $2;
444
445                # make the next node a command node with the above data
446                my $next = $tree->{nodes}[$i+1];
447
448                $next->{type} = 'COMMAND';
449                $next->{command} = $command;
450                $next->{opts} = $opts;
451                $next->{position} = 'outer';
452
453                $parser->{USED_COMMANDS}->{$next->{command}} = 1;
454            }
455
456            # recognize braceless commands
457            #
458            if ($text =~ /(\\(\w+\*?)[ \t]+(\S+))/gso || $text =~ /(\\(\w+)(\d+))/gso) {
459                my $all = $1;
460                my $command = $2;
461                my $param = $3;
462
463                if ($parser->{config}{BRACELESS}->{$command}) {
464                 # warn "found braceless command $command with param $param";
465
466                    # get location to split from node text
467                    my $a = index $node->{content}, $all, 0;
468                    my $b = $a + length($all) - 1;
469
470                    # make all the new nodes
471
472                    # new left and right text nodes
473                    my ($leftnode, $rightnode) = $node->split($a, $b);
474
475                    # param contents node
476                    my $pstart = index $node->{content}, $param, $a;
477                    my $newchild = LaTeX::TOM::Node->new(
478                        {type => 'TEXT',
479                         start => $node->{start} + $pstart,
480                         end => $node->{start} + $pstart + length($param) - 1,
481                         content => $param });
482
483                    # new command node
484                    my $commandnode = LaTeX::TOM::Node->new(
485                        {type => 'COMMAND',
486                         braces => 0,
487                         command => $command,
488                         start => $node->{start} + $a,
489                         end => $node->{start} + $b,
490                         children => LaTeX::TOM::Tree->new([$newchild]),
491                        });
492
493                    $parser->{USED_COMMANDS}->{$commandnode->{command}} = 1;
494
495                    # splice these all into the original array
496                    splice @{$tree->{nodes}}, $i, 1, $leftnode, $commandnode, $rightnode;
497
498                    # make the rightnode the node we're currently analyzing
499                    $node = $rightnode;
500
501                    # make sure outer loop will continue parsing *after* rightnode
502                    $i += 2;
503                }
504            }
505        }
506
507        # recur
508        if ($node->{type} eq 'GROUP' ||
509            $node->{type} eq 'COMMAND') {
510
511            $parser->_stage3($node->{children}, $node);
512        }
513    }
514}
515
516# this stage finds \begin{x} \end{x} environments and shoves their contents
517#	down into a new child node, with a parent node of ENVIRONMENT type.
518#
519# this has the effect of making the tree deeper, since much of the structure
520#	is in environment tags and will now be picked up.
521#
522# for ENVIRONMENTs, "start" means the ending } on the \begin tag,
523# "end" means the starting \ on the \end tag,
524# "ostart" is the starting \ on the "begin" tag,
525# "oend" is the ending } on the "end" tag, and
526# and "class" is the "x" from above.
527#
528sub _stage4 {
529    my $parser = shift;
530    my $tree = shift;
531
532    my $bcount = 0; # \begin "stack count"
533    my $class = ""; # environment class
534    my $bidx = 0;   # \begin array index.
535
536    for (my $i = 0; $i < @{$tree->{nodes}}; $i++) {
537        my $node = $tree->{nodes}->[$i];
538
539        # see if this is a "\begin" command node
540        if ($node->{type} eq 'COMMAND' && $node->{command} eq 'begin') {
541
542            _debug("parseStage4: found a begin COMMAND node, $node->{children}->{nodes}[0]->{content} @ $node->{start}", undef);
543
544            # start a new "stack"
545            if ($bcount == 0) {
546                $bidx = $i;
547                $bcount++;
548                $class = $node->{children}->{nodes}->[0]->{content};
549                _debug("parseStage4: opening environment tag found, class = $class", undef);
550            }
551
552            # add to the "stack"
553            elsif ($node->{children}->{nodes}->[0]->{content} eq $class) {
554                $bcount++;
555                _debug("parseStage4: incrementing tag count for $class", undef);
556            }
557        }
558
559        # handle "\end" command nodes
560        elsif ($node->{type} eq 'COMMAND' &&
561               $node->{command} eq 'end' &&
562               $node->{children}->{nodes}->[0]->{content} eq $class) {
563
564            $bcount--;
565            _debug("parseStage4: decrementing tag count for $class", undef);
566
567            # we found our closing "\end" tag. replace everything with the proper
568            # ENVIRONMENT tag and subtree.
569            #
570            if ($bcount == 0) {
571
572                _debug("parseStage4: closing environment $class", undef);
573
574                # first we must take everything between the "\begin" and "\end"
575                # nodes and put them in a new array, removing them from the old one
576                my @newarray = splice @{$tree->{nodes}}, $bidx+1, $i - ($bidx + 1);
577
578                # make the ENVIRONMENT node
579                my $start = $tree->{nodes}[$bidx]->{end};
580                my $end = $node->{start};
581                my $envnode = LaTeX::TOM::Node->new(
582                    {type => 'ENVIRONMENT',
583                     class => $class,
584                     start => $start, # "inner" start and end
585                     end => $end,
586                     ostart => $start - length('begin') - length($class) - 2,
587                     oend => $end + length('end') + length($class) + 2,
588                     children => LaTeX::TOM::Tree->new([@newarray]),
589                    });
590
591                if ($parser->{config}{MATHENVS}->{$envnode->{class}}) {
592                    $envnode->{math} = 1;
593                }
594
595                # replace the \begin and \end COMMAND nodes with the single
596                # environment node
597                splice @{$tree->{nodes}}, $bidx, 2, $envnode;
598
599                $class = ""; # reset class.
600
601                # i is going to change by however many nodes we removed
602                $i -= scalar @newarray;
603
604                # recur into the children
605                $parser->_stage4($envnode->{children});
606            }
607        }
608
609        # recur in general
610        elsif ($node->{children}) {
611            $parser->_stage4($node->{children});
612        }
613    }
614
615    # parse error if we're missing an "\end" tag.
616    if ($bcount > 0) {
617        $error_handlers{$parser->{PARSE_ERRORS_FATAL}}->(
618            "missing \\end{$class} for \\begin{$class} at position $tree->{nodes}[$bidx]->{end}"
619        );
620    }
621}
622
623# This is the "math" stage: here we grab simple-delimeter math modes from
624# the text they are embedded in, and turn those into new groupings, with the
625# "math" flag set.
626#
627# having this top level to go over all the bracket types prevents some pretty
628# bad combinatorial explosion
629#
630sub _stage5 {
631    my $parser = shift;
632
633    my $tree = shift;
634    my $caremath = shift || 0;
635
636    my $brackets = $parser->{MATHBRACKETS};
637
638    # loop through all the different math mode bracket types
639    foreach my $left (sort {length($b) <=> length($a)} keys %$brackets) {
640        my $right = $brackets->{$left};
641
642        $parser->_stage5_r($tree, $left, $right, $caremath);
643    }
644}
645
646# recursive meat of above
647#
648sub _stage5_r {
649    my $parser = shift;
650
651    my $tree = shift;
652    my $left = shift;
653    my $right = shift;
654    my $caremath = shift || 0; # do we care if we're already in math mode?
655                               # this matters for \( \), \[ \]
656
657    my $leftpos = -1; # no text pos for found left brace yet.
658    my $leftidx = -1; # no array index for found left brace yet.
659
660        # loop through the nodes
661        for (my $i = 0; $i < scalar @{$tree->{nodes}}; $i++) {
662            my $node = $tree->{nodes}[$i];
663            my $pos = 0; # position placeholder within the text block
664            my $spos = $node->{start}; # get text start position
665
666            if ($node->{type} eq 'TEXT' &&
667               (!$caremath || (!$node->{math} && $caremath))) {
668
669                # search for left brace if we haven't started a pair yet
670                if ($leftidx == -1) {
671                    $leftpos = _findsymbol($node->{content}, $left, $pos);
672
673                    if ($leftpos != -1) {
674                        _debug("found (left) $left in [$node->{content}]", undef);
675                        $leftidx = $i;
676                        $pos = $leftpos + 1; # next pos to search from
677                    }
678                }
679
680                # search for a right brace
681                if ($leftpos != -1) {
682                    my $rightpos = _findsymbol($node->{content}, $right, $pos);
683
684                    # found
685                    if ($rightpos != -1) {
686
687                        # we have to split the text node into 3 parts
688                        if ($leftidx == $i) {
689                            _debug("splitwithin: found (right) $right in [$node->{content}]", undef);
690
691                            my ($leftnode, $textnode3) = $node->split($rightpos, $rightpos + length($right) - 1);
692                            my ($textnode1, $textnode2) = $leftnode->split($leftpos, $leftpos + length($left) - 1);
693
694                            my $startpos = $spos; # get text start position
695
696                            # make the math ENVIRONMENT node
697                            my $mathnode = LaTeX::TOM::Node->new(
698                                {type => 'ENVIRONMENT',
699                                class => $left,	# use left delim as class
700                                math => 1,
701                                start => $startpos + $leftpos,
702                                ostart => $startpos + $leftpos - length($left) + 1,
703                                end => $startpos + $rightpos,
704                                oend => $startpos + $rightpos + length($right) - 1,
705                                children => LaTeX::TOM::Tree->new([$textnode2]),
706                                });
707
708                            splice @{$tree->{nodes}}, $i, 1, $textnode1, $mathnode, $textnode3;
709
710                            $i++; # skip ahead two nodes, so we'll be parsing textnode3
711                        }
712
713                        # split across nodes
714                        else {
715
716                            _debug("splitacross: found (right) $right in [$node->{content}]", undef);
717
718                            # create new set of 4 smaller text nodes from the original two
719                            # that contain the left and right delimeters
720                            #
721                            my ($textnode1, $textnode2) = $tree->{nodes}[$leftidx]->split($leftpos, $leftpos + length($left) - 1);
722                            my ($textnode3, $textnode4) = $tree->{nodes}[$i]->split($rightpos, $rightpos + length($right) - 1);
723
724                            # nodes to remove "from the middle" (between the left and right
725                            # text nodes which contain the delimeters)
726                            #
727                            my @remnodes = splice @{$tree->{nodes}}, $leftidx+1, $i - $leftidx - 1;
728
729                            # create a math node that contains the text after the left brace,
730                            # then all the nodes up until the next text node, then the text
731                            # before the right brace.
732                            #
733                            my $mathnode = LaTeX::TOM::Node->new(
734                                {type => 'ENVIRONMENT',
735                                class => $left,
736                                math => 1,
737                                start => $textnode2->{start} - 1,
738                                end => $textnode3->{end} + 1,
739                                ostart => $textnode2->{start} - 1 - length($left) + 1,
740                                oend => $textnode3->{end} + 1 + length($right) - 1,
741                                children => LaTeX::TOM::Tree->new(
742                                [$textnode2,
743                                 @remnodes,
744                                 $textnode3]),
745                                });
746
747                            # replace (TEXT_A, ... , TEXT_B) with the mathnode created above
748                            splice @{$tree->{nodes}}, $leftidx, 2, $textnode1, $mathnode, $textnode4;
749
750                            # do all nodes again but the very leftmost
751                            #
752                            $i = $leftidx;
753                        }
754
755                        $leftpos = -1; # reset left data
756                        $leftidx = -1;
757                    } # right brace
758                } # left brace
759                else {
760
761                    my $rightpos = _findsymbol($node->{content}, $right, $pos);
762
763                    if ($rightpos != -1) {
764                        my $startpos = $node->{start}; # get text start position
765                        $error_handlers{$parser->{PARSE_ERRORS_FATAL}}->("unmatched '$right' at " . ($startpos + $rightpos));
766                    }
767                }
768            } # if TEXT
769
770            # recur, but not into verbatim environments!
771            #
772            elsif ($node->{children} &&
773                         !(
774                             ($node->{type} eq 'COMMAND' && $node->{command} =~ /^verb/) ||
775                             ($node->{type} eq 'ENVIRONMENT' && $node->{class} =~ /^verbatim/))) {
776
777                if ($LaTeX::TOM::DEBUG) {
778                    my $message  = "Recurring into $node->{type} node ";
779                       $message .= $node->{command} if ($node->{type} eq 'COMMAND');
780                       $message .= $node->{class}   if ($node->{type} eq 'ENVIRONMENT');
781                    _debug($message, undef);
782                }
783
784                $parser->_stage5_r($node->{children}, $left, $right, $caremath);
785            }
786
787        } # loop over text blocks
788
789        if ($leftpos != -1) {
790            my $startpos = $tree->{nodes}[$leftidx]->{start};   # get text start position
791            $error_handlers{$parser->{PARSE_ERRORS_FATAL}}->("unmatched '$left' at " . ($startpos + $leftpos));
792        }
793}
794
795# This stage propegates the math mode flag and plaintext flags downward.
796#
797# After this is done, we can make the claim that only text nodes marked with
798# the plaintext flag should be printed.	math nodes will have the "math" flag,
799# and also plantext = 0.
800#
801sub _propegateModes {
802    my $parser = shift;
803
804    my $tree = shift;
805    my $math = shift;       # most likely want to call this with 0
806    my $plaintext = shift;  # ditto this-- default to nothing visible.
807
808    foreach my $node (@{$tree->{nodes}}) {
809
810        # handle text nodes on this level. set flags.
811        #
812        if ($node->{type} eq 'TEXT') {
813            $node->{math} = $math;
814            $node->{plaintext} = $plaintext;
815        }
816
817        # propegate flags downward, possibly modified
818        #
819        elsif (defined $node->{children}) {
820
821            my $mathflag = $math;    # math propegates down by default
822            my $plaintextflag = 0;   # plaintext flag does NOT propegate by default
823
824            # handle math or plain text forcing envs
825            #
826            if ($node->{type} eq 'ENVIRONMENT' || $node->{type} eq 'COMMAND') {
827                if (defined $node->{class} && (
828                    $parser->{config}{MATHENVS}->{$node->{class}} ||
829                    $parser->{config}{MATHENVS}->{"$node->{class}*"})
830                   )
831                {
832                    $mathflag = 1;
833                    $plaintextflag = 0;
834                }
835                elsif (($node->{type} eq 'COMMAND' &&
836                                ($parser->{config}{TEXTENVS}->{$node->{command}} ||
837                                 $parser->{config}{TEXTENVS}->{"$node->{command}*"})) ||
838                             ($node->{type} eq 'ENVIRONMENT' &&
839                                ($parser->{config}{TEXTENVS}->{$node->{class}} ||
840                                 $parser->{config}{TEXTENVS}{"$node->{command}*"}))
841                            ) {
842
843                    $mathflag = 0;
844                    $plaintextflag = 1;
845                }
846            }
847
848            # groupings change nothing
849            #
850            elsif ($node->{type} eq 'GROUP') {
851                $mathflag = $math;
852                $plaintextflag = $plaintext;
853            }
854
855            # recur
856            $parser->_propegateModes($node->{children}, $mathflag, $plaintextflag);
857        }
858    }
859}
860
861# apply a mapping to text nodes in a tree
862#
863# for newcommands and defs: mapping is a hash:
864#
865# {name, nparams, template, type}
866#
867# name is a string
868# nparams is an integer
869# template is a tree fragement containing text nodes with #x flags, where
870# parameters will be replaced.
871# type is "command"
872#
873# for newenvironments:
874#
875# {name, nparams, btemplate, etemplate, type}
876#
877# same as above, except type is "environment" and there are two templates,
878# btemplate and etemplate.
879#
880sub _applyMapping {
881    my $parser = shift;
882
883    my $tree = shift;
884    my $mapping = shift;
885    my $i = shift || 0;  # index to start with, in tree.
886
887    my $applications = 0; # keep track of # of applications
888
889    for (; $i < @{$tree->{nodes}}; $i++) {
890
891        my $node = $tree->{nodes}[$i];
892
893        # begin environment nodes
894        #
895        if ($node->{type}                            eq 'COMMAND'
896         && $node->{command}                         eq 'begin'
897         && $node->{children}->{nodes}[0]->{content} eq $mapping->{name}
898        ) {
899            # grab the nparams next group nodes as parameters
900            #
901            my @params = ();
902
903            my $remain = $mapping->{nparams};
904            my $j = 1;
905            while ($remain > 0 && ($i + $j) < scalar @{$tree->{nodes}}) {
906
907                my $node = $tree->{nodes}[$i + $j];
908
909                # grab group node
910                if ($node->{type} eq 'GROUP') {
911                    push @params, $node->{children};
912                    $remain--;
913                }
914
915                $j++;
916            }
917
918            # if we didn't get enough group nodes, bomb out
919            next if $remain;
920
921            # otherwise make new subtree
922            my $applied = _applyParamsToTemplate($mapping->{btemplate}, @params);
923
924            # splice in the result
925            splice @{$tree->{nodes}}, $i, $j, @{$applied->{nodes}};
926
927            # skip past all the new stuff
928            $i += scalar @{$applied->{nodes}} - 1;
929        }
930
931        # end environment nodes
932        #
933        elsif ($node->{type}                            eq 'COMMAND'
934            && $node->{command}                         eq 'end'
935            && $node->{children}->{nodes}[0]->{content} eq $mapping->{name}
936        ) {
937            # make new subtree (no params)
938            my $applied = $mapping->{etemplate}->copy();
939
940            # splice in the result
941            splice @{$tree->{nodes}}, $i, 1, @{$applied->{nodes}};
942
943            # skip past all the new stuff
944            $i += scalar @{$applied->{nodes}} - 1;
945
946            $applications++; # only count end environment nodes
947        }
948
949        # newcommand nodes
950        #
951        elsif ($node->{type}       eq 'COMMAND'
952            && $node->{command}    eq $mapping->{name}
953            && $mapping->{nparams}
954        ) {
955            my @params = ();
956
957            # children of COMMAND node will be first parameter
958            push @params, $node->{children};
959
960            # find next nparams GROUP nodes and push their children onto @params
961            my $remain = $mapping->{nparams} - 1;
962            my $j = 1;
963            while ($remain > 0 && ($i + $j) < scalar @{$tree->{nodes}}) {
964
965                my $node = $tree->{nodes}[$i + $j];
966
967                # grab group node
968                if ($node->{type} eq 'GROUP') {
969                    push @params, $node->{children};
970                    $remain--;
971                }
972
973                $j++;
974            }
975
976            # if we didn't get enough group nodes, bomb out
977            next if ($remain > 0);
978
979            # apply the params to the template
980            my $applied = _applyParamsToTemplate($mapping->{template}, @params);
981
982            # splice in the result
983            splice @{$tree->{nodes}}, $i, $j, @{$applied->{nodes}};
984
985            # skip past all the new stuff
986            $i += scalar @{$applied->{nodes}} - 1;
987
988            $applications++;
989        }
990
991        # find 0-param mappings
992        elsif ($node->{type} eq 'TEXT' && !$mapping->{nparams}) {
993
994             my $text = $node->{content};
995             my $command = $mapping->{name};
996
997             # find occurrences of the mapping command
998             #
999             my $wordend = ($command =~ /\w$/ ? 1 : 0);
1000             while (($wordend && $text =~ /\\\Q$command\E(\W|$)/g) ||
1001                            (!$wordend && $text =~ /\\\Q$command\E/g)) {
1002
1003                 _debug("found occurrence of mapping $command", undef);
1004
1005                 my $idx = index $node->{content}, '\\' . $command, 0;
1006
1007                 # split the text node at that command
1008                 my ($leftnode, $rightnode) = $node->split($idx, $idx + length($command));
1009
1010                 # copy the mapping template
1011                 my $applied = $mapping->{template}->copy();
1012
1013                 # splice the new nodes in
1014                 splice @{$tree->{nodes}}, $i, 1, $leftnode, @{$applied->{nodes}}, $rightnode;
1015
1016                 # adjust i so we end up on rightnode when we're done
1017                 $i += scalar @{$applied->{nodes}} + 1;
1018
1019                 # get the next node
1020                 $node = $tree->{$node}[$i];
1021
1022                 # count application
1023                 $applications++;
1024             }
1025        }
1026
1027        # recur
1028        elsif ($node->{children}) {
1029
1030            $applications += $parser->_applyMapping($node->{children}, $mapping);
1031        }
1032    }
1033
1034    return $applications;
1035}
1036
1037# find and apply all mappings in the tree, progressively and recursively.
1038# a mapping applies to the entire tree and subtree consisting of nodes AFTER
1039# itself in the level array.
1040#
1041sub _applyMappings {
1042    my $parser = shift;
1043
1044    my $tree = shift;
1045
1046    for (my $i = 0; $i < @{$tree->{nodes}}; $i++) {
1047
1048        my $prev = $tree->{nodes}[$i-1];
1049        my $node = $tree->{nodes}[$i];
1050
1051        # find newcommands
1052        if ($node->{type} eq 'COMMAND' &&
1053                $node->{command} =~ /^(re)?newcommand$/) {
1054
1055            my $mapping = _makeMapping($tree, $i);
1056            next if (!$mapping->{name}); # skip fragged commands
1057
1058            if ($parser->{USED_COMMANDS}->{$mapping->{name}}) {
1059                _debug("applying (nc) mapping $mapping->{name}", undef);
1060            } else {
1061                _debug("NOT applying (nc) mapping $mapping->{name}", undef);
1062                next;
1063            }
1064
1065            # add to mappings list
1066            #
1067            $parser->{MAPPEDCMDS}->{"\\$mapping->{name}"} = 1;
1068
1069            _debug("found a mapping with name $mapping->{name}, $mapping->{nparams} params", undef);
1070
1071            # remove the mapping declaration
1072            #
1073            splice @{$tree->{nodes}}, $i, $mapping->{skip} + 1;
1074
1075            # apply the mapping
1076            my $count = $parser->_applyMapping($tree, $mapping, $i);
1077
1078            if ($count > 0) {
1079                _debug("printing altered subtree", sub { $tree->_warn() });
1080            }
1081
1082            $i--; # since we removed the cmd node, check this index again
1083        }
1084
1085        # handle "\newenvironment" mappings
1086        elsif ($node->{type} eq 'COMMAND' &&
1087                 $node->{command} =~ /^(re)?newenvironment$/) {
1088
1089            # make a mapping hash
1090            #
1091            my $mapping = $parser->_makeEnvMapping($tree, $i);
1092            next if (!$mapping->{name}); # skip fragged commands.
1093
1094            _debug("applying (ne) mapping $mapping->{name}", undef);
1095
1096            # remove the mapping declaration
1097            #
1098            splice @{$tree->{nodes}}, $i, $mapping->{skip} + 1;
1099
1100            # apply the mapping
1101            #
1102            my $count = $parser->_applyMapping($tree, $mapping, $i);
1103        }
1104
1105        # handle "\def" stype commands.
1106        elsif ($node->{type} eq 'COMMAND' &&
1107                 defined $prev &&
1108                 $prev->{type} eq 'TEXT' &&
1109                 $prev->{content} =~ /\\def\s*$/o) {
1110
1111             _debug("found def style mapping $node->{command}", undef);
1112
1113             # remove the \def
1114             $prev->{content} =~ s/\\def\s*$//o;
1115
1116             # make the mapping
1117             my $mapping = {name => $node->{command},
1118                nparams => 0,
1119                template => $node->{children}->copy(),
1120                type => 'command'};
1121
1122             next if (!$mapping->{name}); # skip fragged commands
1123
1124             if ($parser->{USED_COMMANDS}->{$mapping->{name}}) {
1125                 _debug("applying (def) mapping $mapping->{name}", undef);
1126             } else {
1127                 _debug("NOT applying (def) mapping $mapping->{name}", undef);
1128                 next;
1129             }
1130
1131             # add to mappings list
1132             #
1133             $parser->{MAPPEDCMDS}->{"\\$mapping->{name}"} = 1;
1134
1135             _debug("template is", sub { $mapping->{template}->_warn() });
1136
1137             # remove the command node
1138             splice @{$tree->{nodes}}, $i, 1;
1139
1140             # apply the mapping
1141             my $count = $parser->_applyMapping($tree, $mapping, $i);
1142
1143             $i--; # check this index again
1144        }
1145
1146        # recur
1147        elsif ($node->{children}) {
1148
1149            $parser->_applyMappings($node->{children});
1150        }
1151    }
1152}
1153
1154# read files from \input commands and place into the tree, parsed
1155#
1156# also include bibliographies
1157#
1158sub _addInputs {
1159    my $parser = shift;
1160
1161    my $tree = shift;
1162
1163    for (my $i = 0; $i < @{$tree->{nodes}}; $i++) {
1164
1165        my $node = $tree->{nodes}[$i];
1166
1167        if ($node->{type}    eq 'COMMAND'
1168         && $node->{command} eq 'input'
1169        ) {
1170            my $file = $node->{children}->{nodes}[0]->{content};
1171            next if $file =~ /pstex/; # ignore pstex images
1172
1173            _debug("reading input file $file", undef);
1174
1175            my $contents;
1176            my $filename = fileparse($file);
1177            my $has_extension = qr/\.\S+$/;
1178
1179            # read in contents of file
1180            if (-e $file && $filename =~ $has_extension) {
1181                $contents = _readFile($file);
1182            }
1183            elsif ($filename !~ $has_extension) {
1184                $file = "$file.tex";
1185                $contents = _readFile($file) if -e $file;
1186            }
1187
1188            # dump Psfig/TeX files, they aren't useful to us and have
1189            # nonconforming syntax. Use declaration line as our heuristic.
1190            #
1191            if (defined $contents
1192                     && $contents =~ m!^ \% \s*? Psfig/TeX \s* $!mx
1193            ) {
1194                undef $contents;
1195                carp "ignoring Psfig input `$file'";
1196            }
1197
1198            # actually do the parse of the sub-content
1199            #
1200            if (defined $contents) {
1201                # parse into a tree
1202                my ($subtree,) = $parser->_basicparse($contents, $parser->{PARSE_ERRORS_FATAL});
1203
1204                # replace \input command node with subtree
1205                splice @{$tree->{nodes}}, $i, 1, @{$subtree->{nodes}};
1206
1207                # step back
1208                $i--;
1209            }
1210        }
1211        elsif ($node->{type}    eq 'COMMAND'
1212            && $node->{command} eq 'bibliography'
1213        ) {
1214             # try to find a .bbl file
1215             #
1216             foreach my $file (<*.bbl>) {
1217
1218                 my $contents = _readFile($file);
1219
1220                 if (defined $contents) {
1221
1222                     my ($subtree,) = $parser->_basicparse($contents, $parser->{PARSE_ERRORS_FATAL});
1223                     splice @{$tree->{nodes}}, $i, 1, @{$subtree->{nodes}};
1224                     $i--;
1225                 }
1226             }
1227        }
1228
1229        # recur
1230        if ($node->{children}) {
1231            $parser->_addInputs($node->{children});
1232        }
1233    }
1234}
1235
1236# do pre-mapping parsing
1237#
1238sub _parseA {
1239    my $parser = shift;
1240    my $text = shift;
1241
1242    my $tree = $parser->_stage1($text);
1243    my $bracehash = $parser->_stage2($tree);
1244
1245    $parser->_stage3($tree);
1246
1247    $parser->_addInputs($tree) if $parser->{readinputs};
1248
1249    return ($tree, $bracehash);
1250}
1251
1252# do post-mapping parsing (make environments)
1253#
1254sub _parseB {
1255    my $parser = shift;
1256    my $tree = shift;
1257
1258    $parser->_stage4($tree);
1259
1260    _debug("done with parseStage4", undef);
1261
1262    $parser->_stage5($tree, 0);
1263
1264    _debug("done with parseStage5", undef);
1265}
1266
1267###############################################################################
1268#
1269# Parser "Static" Subroutines
1270#
1271###############################################################################
1272
1273# find next unescaped char in some text
1274#
1275sub _uindex {
1276    my $text = shift;
1277    my $char = shift;
1278    my $pos = shift;
1279
1280    my $realbrace = 0;
1281    my $idx = -1;
1282
1283    # get next opening brace
1284    do {
1285        $realbrace = 1;
1286        $idx = index $text, $char, $pos;
1287
1288        if ($idx != -1) {
1289            $pos = $idx + 1;
1290            my $prevchar = substr $text, $idx - 1, 1;
1291            if ($prevchar eq '\\') {
1292                $realbrace = 0;
1293                $idx = -1;
1294            }
1295        }
1296    } while (!$realbrace);
1297
1298    return $idx;
1299}
1300
1301# support function: find the next occurrence of some symbol which is
1302# not escaped.
1303#
1304sub _findsymbol {
1305    my $text = shift;
1306    my $symbol = shift;
1307    my $pos = shift;
1308
1309    my $realhit = 0;
1310    my $index = -1;
1311
1312    # get next occurrence of the symbol
1313    do {
1314        $realhit = 1;
1315        $index = index $text, $symbol, $pos;
1316
1317        if ($index != -1) {
1318            $pos = $index + 1;
1319
1320            # make sure this occurrence isn't escaped. this is imperfect.
1321            #
1322            my $prevchar = ($index - 1 >= 0) ?
1323                                             (substr $text, $index - 1, 1) : '';
1324            my $pprevchar = ($index - 2 >= 0) ?
1325                                             (substr $text, $index - 2, 1) : '';
1326            if ($prevchar eq '\\' && $pprevchar ne '\\') {
1327                $realhit = 0;
1328                $index = -1;
1329            }
1330        }
1331    } while (!$realhit);
1332
1333    return $index;
1334}
1335
1336# support function: find the earliest next brace in some (flat) text
1337#
1338sub _findbrace {
1339    my $text = shift;
1340    my $pos = shift;
1341
1342    my $realbrace = 0;
1343    my $index_o = -1;
1344    my $index_c = -1;
1345
1346    my $pos_o = $pos;
1347    my $pos_c = $pos;
1348
1349    # get next opening brace
1350    do {
1351        $realbrace = 1;
1352        $index_o = index $text, '{', $pos_o;
1353
1354        if ($index_o != -1) {
1355            $pos_o = $index_o + 1;
1356
1357            # make sure this brace isn't escaped. this is imperfect.
1358            #
1359            my $prevchar = ($index_o - 1 >= 0) ?
1360                (substr $text, $index_o - 1, 1) : '';
1361            my $pprevchar = ($index_o - 2 >= 0) ?
1362                (substr $text, $index_o - 2, 1) : '';
1363
1364            if ($prevchar eq '\\' && $pprevchar ne '\\') {
1365                $realbrace = 0;
1366                $index_o = -1;
1367            }
1368        }
1369    } while (!$realbrace);
1370
1371    # get next closing brace
1372    do {
1373        $realbrace = 1;
1374        $index_c = index $text, '}', $pos_c;
1375
1376        if (($index_c - 1) >= 0 && substr($text, $index_c - 1, 1) eq ' ') {
1377            $pos_c = $index_c + 1;
1378            $index_c = -1;
1379        }
1380
1381        if ($index_c != -1) {
1382            $pos_c = $index_c + 1;
1383
1384            # make sure this brace isn't escaped. this is imperfect.
1385            #
1386            my $prevchar = ($index_c - 1 >= 0) ?
1387                (substr $text, $index_c - 1, 1) : '';
1388            my $pprevchar = ($index_c - 2 >= 0) ?
1389                (substr $text, $index_c - 2, 1) : '';
1390
1391            if ($prevchar eq '\\' && $pprevchar ne '\\') {
1392                $realbrace = 0;
1393                $index_c = -1;
1394            }
1395        }
1396    } while (!$realbrace);
1397
1398    # handle all find cases
1399    return (-1, '') if ($index_o == -1 && $index_c == -1);
1400    return ($index_o, '{') if ($index_c == -1 ||
1401        ($index_o != -1 && $index_o < $index_c));
1402
1403    return ($index_c, '}') if ($index_o == -1 || $index_c < $index_o);
1404}
1405
1406
1407# skip "blank nodes" in a tree, starting at some position. will finish
1408# at the first non-blank node. (ie, not a comment or whitespace TEXT node.
1409#
1410sub _skipBlankNodes {
1411    my $tree = shift;
1412    my $i = shift;
1413
1414    while ($tree->{nodes}[$i]->{type} eq 'COMMENT' ||
1415        ($tree->{nodes}[$i]->{type} eq 'TEXT' &&
1416        $tree->{nodes}[$i]->{content} =~ /^\s*$/s)) {
1417
1418        $i++;
1419    }
1420
1421    return $i;
1422}
1423
1424# is the passed-in node a valid parameter node? for this to be true, it must
1425# either be a GROUP or a position = inner command.
1426#
1427sub _validParamNode {
1428    my $node = shift;
1429
1430    return 1 if ($node->{type} eq 'GROUP' ||
1431        ($node->{type} eq 'COMMAND' && $node->{position} eq 'inner'));
1432
1433    return 0;
1434}
1435
1436# duplicate a valid param node.	This means for a group, copy the child tree.
1437# for a command, make a new tree with just the command node and its child tree.
1438#
1439sub _duplicateParam {
1440    my $parser = shift;
1441    my $node = shift;
1442
1443    if ($node->{type} eq 'GROUP') {
1444        return $node->{children}->copy();
1445    }
1446    elsif ($node->{type} eq 'COMMAND') {
1447
1448        my $subtree = $node->{children}->copy(); # copy child subtree
1449        my $nodecopy = $node->copy(); # make a new node with old data
1450        $nodecopy->{children} = $subtree; # set the child pointer to new subtree
1451
1452        # return a new tree with the new node (subtree) as its only element
1453        return LaTeX::TOM::Tree->new([$nodecopy]);
1454    }
1455
1456    return undef;
1457}
1458
1459# make a mapping from a newenvironment fragment
1460#
1461# newenvironments have the following syntax:
1462#
1463# \newenvironment{name}[nparams]?{beginTeX}{endTeX}
1464#
1465sub _makeEnvMapping {
1466    my $parser = shift;
1467    my $tree   = shift;
1468    my $i      = shift;
1469
1470    return undef if ($tree->{nodes}[$i]->{type} ne 'COMMAND' ||
1471        ($tree->{nodes}[$i]->{command} ne 'newenvironment' &&
1472        $tree->{nodes}[$i]->{command} ne 'renewenvironment'));
1473
1474    # figure out command (first child, text node)
1475    my $command = $tree->{nodes}[$i]->{children}->{nodes}[0]->{content};
1476    if ($command =~ /^\s*\\(\S+)\s*$/) {
1477        $command = $1;
1478    }
1479
1480    my $next = $i+1;
1481
1482    # figure out number of params
1483    my $nparams = 0;
1484    if ($tree->{nodes}[$next]->{type} eq 'TEXT') {
1485        my $text = $tree->{nodes}[$next]->{content};
1486
1487        if ($text =~ /^\s*\[\s*([0-9])+\s*\]\s*$/) {
1488            $nparams = $1;
1489        }
1490
1491        $next++;
1492    }
1493
1494    # default templates-- just repeat the declarations
1495    #
1496    my ($btemplate) = $parser->_basicparse("\\begin{$command}", 2, 0);
1497    my ($etemplate) = $parser->_basicparse("\\end{$command}", 2, 0);
1498
1499    my $endpos = $next;
1500
1501    # get two group subtrees... one for the begin and one for the end
1502    # templates. we only ignore whitespace TEXT nodes and comments
1503    #
1504    $next = _skipBlankNodes($tree, $next);
1505    if (_validParamNode($tree->{nodes}[$next])) {
1506        $btemplate = $parser->_duplicateParam($tree->{nodes}[$next]);
1507        $next++;
1508
1509        $next = _skipBlankNodes($tree, $next);
1510
1511        if (_validParamNode($tree->{nodes}[$next])) {
1512            $etemplate = $parser->_duplicateParam($tree->{nodes}[$next]);
1513            $endpos = $next;
1514        }
1515    }
1516
1517    # build and return the mapping hash
1518    #
1519    return {name => $command,
1520        nparams => $nparams,
1521        btemplate => $btemplate,    # begin template
1522        etemplate => $etemplate,    # end template
1523        skip => $endpos - $i,
1524        type => 'environment'};
1525}
1526
1527# make a mapping from a newcommand fragment
1528# takes tree pointer and index of command node
1529#
1530# newcommands have the following syntax:
1531#
1532# \newcommand{\name}[nparams]?{anyTeX}
1533#
1534sub _makeMapping {
1535    my $tree = shift;
1536    my $i = shift;
1537
1538    return undef if ($tree->{nodes}[$i]->{type} ne 'COMMAND' ||
1539        ($tree->{nodes}[$i]->{command} ne 'newcommand' &&
1540        $tree->{nodes}[$i]->{command} ne 'renewcommand'));
1541
1542    # figure out command (first child, text node)
1543    my $command = $tree->{nodes}[$i]->{children}->{nodes}[0]->{content};
1544    if ($command =~ /^\s*\\(\S+)\s*$/) {
1545        $command = $1;
1546    }
1547
1548    my $next = $i+1;
1549
1550    # figure out number of params
1551    my $nparams = 0;
1552    if ($tree->{nodes}[$next]->{type} eq 'TEXT') {
1553        my $text = $tree->{nodes}[$next]->{content};
1554
1555        if ($text =~ /^\s*\[\s*([0-9])+\s*\]\s*$/) {
1556            $nparams = $1;
1557        }
1558
1559        $next++;
1560    }
1561
1562    # grab subtree template (array ref)
1563    #
1564    my $template;
1565    if ($tree->{nodes}[$next]->{type} eq 'GROUP') {
1566        $template = $tree->{nodes}[$next]->{children}->copy();
1567    } else {
1568        return undef;
1569    }
1570
1571    # build and return the mapping hash
1572    #
1573    return {name => $command,
1574        nparams => $nparams,
1575        template => $template,
1576        skip => $next - $i,
1577        type => 'command'};
1578}
1579
1580# this sub is the main entry point for the sub that actually takes a set of
1581# parameter trees and inserts them into a template tree. the return result,
1582# newly allocated, should be plopped back into the original tree where the
1583# parameters (along with the initial command invocation)
1584#
1585sub _applyParamsToTemplate {
1586    my $template = shift;
1587    my @params = @_;
1588
1589    # have to copy the template to a freshly allocated tree
1590    #
1591    my $applied = $template->copy();
1592
1593    # now recursively apply the params.
1594    #
1595    _applyParamsToTemplate_r($applied, @params);
1596
1597    return $applied;
1598}
1599
1600# recursive helper for above
1601#
1602sub _applyParamsToTemplate_r {
1603    my $template = shift;
1604    my @params = @_;
1605
1606    for (my $i = 0; $i < @{$template->{nodes}}; $i++) {
1607
1608        my $node = $template->{nodes}[$i];
1609
1610        if ($node->{type} eq 'TEXT') {
1611
1612            my $text = $node->{content};
1613
1614            # find occurrences of the parameter flags
1615            #
1616            if ($text =~ /(#([0-9]+))/) {
1617
1618                my $all = $1;
1619                my $num = $2;
1620
1621                # get the index of the flag we just found
1622                #
1623                my $idx = index $text, $all, 0;
1624
1625                # split the node on the location of the flag
1626                #
1627                my ($leftnode, $rightnode) = $node->split($idx, $idx + length($all) - 1);
1628
1629                # make a copy of the param we want
1630                #
1631                my $param = $params[$num - 1]->copy();
1632
1633                # splice the new text nodes, along with the parameter subtree, into
1634                # the old location
1635                #
1636                splice @{$template->{nodes}}, $i, 1, $leftnode, @{$param->{nodes}}, $rightnode;
1637
1638                # skip forward to where $rightnode is in $template on next iteration
1639                #
1640                $i += scalar @{$param->{nodes}};
1641            }
1642        }
1643
1644        # recur
1645        elsif (defined $node->{children}) {
1646
1647            _applyParamsToTemplate_r($node->{children}, @params);
1648        }
1649    }
1650}
1651
1652
1653# This sub takes a chunk of the document text between two points and makes
1654# it into a list of TEXT nodes and COMMENT nodes, as we would expect from
1655# '%' prefixed LaTeX comment lines
1656#
1657sub _getTextAndCommentNodes {
1658    my ($text, $begins, $ends) = @_;
1659
1660    my $node_text = substr $text, $begins, $ends - $begins;
1661
1662    _debug("getTextAndCommentNodes: looking at [$node_text]", undef);
1663
1664    my $make_node = sub {
1665        my ($mode_type, $begins, $start_pos, $output) = @_;
1666
1667        return LaTeX::TOM::Node->new({
1668            type    => uc $mode_type,
1669            start   => $begins + $start_pos,
1670            end     => $begins + $start_pos + length($output) - 1,
1671            content => $output,
1672        });
1673    };
1674
1675    my @lines = split (/(
1676       (?:\s*     # whitespace
1677         (?<!\\)  # unescaped
1678         \%[^\n]* # comment
1679       \n)+       # newline
1680    )/mx, $node_text);
1681
1682    my @nodes;
1683
1684    my $start_pos = 0;
1685    my $output;
1686    my $mode_type;
1687    my $first = true;
1688
1689    foreach my $line (@lines) {
1690
1691         my $line_type = (
1692                 $line =~ /^\s*\%/
1693         && $node_text !~ /
1694                           \\begin\{verbatim\}
1695                             .* \Q$line\E .*
1696                           \\end\{verbatim\}
1697                          /sx
1698        ) ? 'comment' : 'text';
1699
1700        # if type stays the same, add to output and do nothing
1701        if ($first || $line_type eq $mode_type) {
1702
1703            $output .= $line;
1704
1705            # handle turning off initialization stuff
1706            $first &&= false;
1707            $mode_type ||= $line_type;
1708        }
1709
1710        # if type changes, make new node from current chunk, change mode type
1711        # and start a new chunk
1712        else {
1713            push @nodes, $make_node->($mode_type, $begins, $start_pos, $output);
1714
1715            $start_pos += length($output); # update start position
1716            $output = $line;
1717
1718            $mode_type = $line_type;
1719        }
1720    }
1721
1722    push @nodes, $make_node->($mode_type, $begins, $start_pos, $output) if defined $output;
1723
1724    return @nodes;
1725}
1726
1727# Read in the contents of a text file on disk. Return in string scalar.
1728#
1729sub _readFile {
1730    my ($file, $raise_error) = @_;
1731
1732    $raise_error ||= false;
1733
1734    my $opened = open(my $fh, '<', $file);
1735
1736    unless ($opened) {
1737        croak "Cannot open $file: $!" if $raise_error;
1738        return undef;
1739    }
1740
1741    my $contents = do { local $/; <$fh> };
1742    close($fh);
1743
1744    return $contents;
1745}
1746
1747sub _debug {
1748    my ($message, $code) = @_;
1749
1750    my $DEBUG = $LaTeX::TOM::DEBUG;
1751
1752    return unless $DEBUG >= 1 && $DEBUG <= 2;
1753
1754    my ($filename, $line) = (caller)[1,2];
1755    my $caller = join ':', (fileparse($filename))[0], $line;
1756
1757    warn "$caller: $message\n" if $DEBUG >= 1 && defined $message;
1758    $code->()                  if $DEBUG == 2 && defined $code;
1759}
1760
17611;
1762