1############################################################################### 2# 3# LaTeX::TOM::Parser 4# 5# The parsing class 6# 7############################################################################### 8 9package LaTeX::TOM::Parser; 10 11use strict; 12use base qw( 13 LaTeX::TOM::Node 14 LaTeX::TOM::Tree 15); 16use constant true => 1; 17use constant false => 0; 18 19use Carp qw(carp croak); 20use File::Basename qw(fileparse); 21 22our $VERSION = '0.07'; 23 24my %error_handlers = ( 25 0 => sub { warn "parse error: $_[0].\n" }, 26 1 => sub { die "parse error: $_[0].\n" }, 27 2 => sub {}, 28); 29 30# Constructor 31# 32sub new { 33 my $class = shift; 34 35 no strict 'refs'; 36 37 my $self = bless { 38 config => { 39 BRACELESS => \%{"${class}::BRACELESS"}, 40 INNERCMDS => \%{"${class}::INNERCMDS"}, 41 MATHENVS => \%{"${class}::MATHENVS"}, 42 MATHBRACKETS => \%{"${class}::MATHBRACKETS"}, 43 PARSE_ERRORS_FATAL => ${"${class}::PARSE_ERRORS_FATAL"}, 44 TEXTENVS => \%{"${class}::TEXTENVS"}, 45 }, 46 }; 47 48 $self->_init(@_); 49 50 return $self; 51} 52 53# Set/reset "globals" 54# 55sub _init { 56 my $parser = shift; 57 my ($parse_errors_fatal, $readinputs, $applymappings) = @_; 58 59 my $retrieve_opt_default = sub 60 { 61 my ($opt, $default) = @_; 62 return $opt if defined $opt; 63 return $default; 64 }; 65 66 # set user options 67 # 68 $parser->{readinputs} = $retrieve_opt_default->($readinputs, 0); 69 $parser->{applymappings} = $retrieve_opt_default->($applymappings, 0); 70 $parser->{PARSE_ERRORS_FATAL} = $retrieve_opt_default->($parse_errors_fatal, $parser->{config}{PARSE_ERRORS_FATAL}); 71 72 # init internal stuff 73 # 74 $parser->{MATHBRACKETS} = $parser->{config}{MATHBRACKETS}; 75 76 # this will hold a running list/hash of commands that have been remapped 77 $parser->{MAPPEDCMDS} = {}; 78 79 # this will hold a running list/hash of commands that have been used. We dont 80 # bother apply mappings except to commands that have been used. 81 $parser->{USED_COMMANDS} = {}; 82 83 # no file yet 84 $parser->{file} = undef; 85} 86 87# Parse a LaTeX file, return a tree. You probably want this method. 88# 89sub parseFile { 90 my $parser = shift; 91 my $filename = shift; 92 93 # init variables 94 # 95 $parser->{file} = $filename; # file name member data 96 my $tree = {}; # init output tree 97 98 # read in text from file or bomb out 99 # 100 my $text = _readFile($filename, true); 101 102 # do the parse 103 # 104 $tree = $parser->parse($text); 105 106 return $tree; 107} 108 109# main parsing entrypoint 110# 111sub parse { 112 my $parser = shift; 113 my ($text) = @_; 114 115 # first half of parsing (goes up to finding commands, reading inputs) 116 # 117 my ($tree, $bracehash) = $parser->_parseA($text); 118 _debug( 119 'done with _parseA', 120 sub { $tree->_warn() }, 121 ); 122 123 # handle mappings 124 # 125 $parser->_applyMappings($tree) if $parser->{applymappings}; 126 _debug( 127 'done with _applyMappings', 128 sub { $tree->_warn() }, 129 ); 130 131 # second half of parsing (environments) 132 # 133 $parser->_parseB($tree); 134 _debug( 135 'done with _parseB', 136 sub { $tree->_warn() }, 137 ); 138 139 # once all the above is done we can propegate math/plaintext modes down 140 # 141 $parser->_propegateModes($tree, 0, 0); # math = 0, plaintext = 0 142 _debug( 143 'done with _propegateModes', 144 sub { $tree->_warn() }, 145 ); 146 147 # handle kooky \[ \] math mode 148 # 149 if (not exists $parser->{MAPPEDCMDS}->{'\\['}) { 150 # math mode (\[ \], \( \)) 151 $parser->_stage5($tree, {'\\[' => '\\]', '\\(' => '\\)'}, 1); 152 $parser->_propegateModes($tree, 0, 0); # have to do this again of course 153 $parser->{MATHBRACKETS}->{'\\['} = '\\]'; # put back in brackets list for 154 $parser->{MATHBRACKETS}->{'\\('} = '\\)'; # printing purposes. 155 } 156 _debug( 157 undef, 158 sub { $tree->_warn() }, 159 ); 160 161 $tree->listify; # add linked-list stuff 162 163 return $tree; 164} 165 166# Parsing with no mappings and no externally accessible parser object. 167# 168sub _basicparse { 169 my $parser = shift; # @_ would break code 170 my $text = shift; 171 172 my $parse_errors_fatal = (defined $_[0] ? $_[0] : $parser->{config}{PARSE_ERRORS_FATAL}); 173 my $readinputs = (defined $_[1] ? $_[1] : 1); 174 175 $parser = LaTeX::TOM::Parser->new($parse_errors_fatal, $readinputs); 176 my ($tree, $bracehash) = $parser->_parseA($text); 177 178 $parser->_parseB($tree); 179 180 $tree->listify; # add linked-list stuff 181 182 return ($tree, $bracehash); 183} 184 185# start the tree. separate out comment and text nodes. 186# 187sub _stage1 { 188 my $parser = shift; 189 my $text = shift; 190 191 my @nodes = _getTextAndCommentNodes($text, 0, length($text)); 192 193 return LaTeX::TOM::Tree->new([@nodes]); 194} 195 196# this stage parses the braces ({}) and adds the corresponding structure to 197# the tree. 198# 199sub _stage2 { 200 my $parser = shift; 201 202 my $tree = shift; 203 my $bracehash = shift || undef; 204 my $startidx = shift || 0; # last two params for starting at some specific 205 my $startpos = shift || 0; # node and offset. 206 207 my %blankhash; 208 209 if (not defined $bracehash) { 210 $bracehash = {%blankhash}; 211 } 212 213 my $leftidx = -1; 214 my $leftpos = -1; 215 my $leftcount = 0; 216 217 # loop through the nodes 218 for (my $i = $startidx; $i < @{$tree->{nodes}}; $i++) { 219 my $node = $tree->{nodes}[$i]; 220 my $spos = $node->{start}; # get text start position 221 222 # set position placeholder within the text block 223 my $pos = ($i == $startidx) ? $startpos : 0; 224 225 if ($node->{type} eq 'TEXT') { 226 227 _debug("parseStage2: looking at text node: [$node->{content}]", undef); 228 229 my ($nextpos, $brace) = _findbrace($node->{content}, $pos); 230 while ($nextpos != -1) { 231 232 $pos = $nextpos + 1; # update position pointer 233 234 # handle left brace 235 if ($brace eq '{') { 236 _debug("found '{' at position $nextpos, leftcount is $leftcount", undef); 237 if ($leftcount == 0) { 238 $leftpos = $nextpos; 239 $leftidx = $i 240 } 241 $leftcount++; 242 } 243 244 # handle right brance 245 elsif ($brace eq '}') { 246 247 _debug("found '}' at position $nextpos, leftcount is $leftcount", undef); 248 my $rightpos = $nextpos; 249 $leftcount--; 250 251 # found the corresponding right brace to our starting left brace 252 if ($leftcount == 0) { 253 254 # see if we have to split the text node into 3 parts 255 # 256 if ($leftidx == $i) { 257 258 my ($leftside, $textnode3) = $node->split($rightpos, $rightpos); 259 my ($textnode1, $textnode2) = $leftside->split($leftpos, $leftpos); 260 261 # make the new GROUP node 262 my $groupnode = LaTeX::TOM::Node->new( 263 {type => 'GROUP', 264 start => $textnode2->{start} - 1, 265 end => $textnode2->{end} + 1, 266 children => LaTeX::TOM::Tree->new([$textnode2]), 267 }); 268 269 # splice the new subtree into the old location 270 splice @{$tree->{nodes}}, $i, 1, $textnode1, $groupnode, $textnode3; 271 272 # add to the brace-pair lookup table 273 $bracehash->{$groupnode->{start}} = $groupnode->{end}; 274 $bracehash->{$groupnode->{end}} = $groupnode->{start}; 275 276 # recur into new child node 277 $parser->_stage2($groupnode->{children}, $bracehash); 278 279 $i++; # skip to textnode3 for further processing 280 } 281 282 # split across nodes 283 # 284 else { 285 286 my ($textnode1, $textnode2) = $tree->{nodes}[$leftidx]->split($leftpos, $leftpos); 287 my ($textnode3, $textnode4) = $node->split($rightpos, $rightpos); 288 289 # remove nodes in between the node we found '{' in and the node 290 # we found '}' in 291 # 292 my @removed = splice @{$tree->{nodes}}, $leftidx+1, $i-$leftidx-1; 293 294 # create a group node that contains the text after the left brace, 295 # then all the nodes up until the next text node, then the text 296 # before the right brace. 297 # 298 my $groupnode = LaTeX::TOM::Node->new( 299 {type => 'GROUP', 300 start => $textnode2->{start} - 1, 301 end => $textnode3->{end} + 1, 302 children => LaTeX::TOM::Tree->new( 303 [$textnode2, 304 @removed, 305 $textnode3]), 306 }); 307 308 # replace the two original text nodes with the leftover left and 309 # right portions, as well as the group node with everything in 310 # the middle. 311 # 312 splice @{$tree->{nodes}}, $leftidx, 2, $textnode1, $groupnode, $textnode4; 313 314 # add to the brace-pair lookup table 315 $bracehash->{$groupnode->{start}} = $groupnode->{end}; 316 $bracehash->{$groupnode->{end}} = $groupnode->{start}; 317 318 # recur into new child nodes 319 $parser->_stage2($groupnode->{children}, $bracehash); 320 321 # step back to textnode4 on this level for further processing 322 $i -= scalar @removed; 323 } 324 325 $leftpos = -1; # reset left data 326 $leftidx = -1; 327 last; 328 } # $leftcount == 0 329 330 # check for '}'-based error 331 # 332 if ($leftcount < 0) { 333 $error_handlers{$parser->{PARSE_ERRORS_FATAL}}->("'}' before '{' at " . ($spos + $rightpos)); 334 $leftcount = 0; # reset and continue 335 } 336 } # right brace 337 338 ($nextpos, $brace) = _findbrace($node->{content}, $pos); 339 340 } # while (braces left) 341 342 } # if TEXT 343 344 } # loop over all nodes 345 346 # check for extra '{' parse error 347 # 348 if ($leftcount > 0) { 349 my $spos = $tree->{nodes}[$leftidx]->{start}; # get text start position 350 $error_handlers{$parser->{PARSE_ERRORS_FATAL}}->("unmatched '{' at " . ($spos + $leftpos)); 351 352 # try to continue on, after the offending brace 353 $parser->_stage2($tree, $bracehash, $leftidx, $leftpos + 1); 354 } 355 356 return $bracehash; 357} 358 359# this stage finds LaTeX commands and accordingly turns GROUP nodes into 360# command nodes, labeled with the command 361# 362sub _stage3 { 363 my $parser = shift; 364 365 my $tree = shift; 366 my $parent = shift; 367 368 for (my $i = 0; $i< @{$tree->{nodes}}; $i++) { 369 370 my $node = $tree->{nodes}[$i]; 371 372 # check text node for command tag 373 if ($node->{type} eq 'TEXT') { 374 my $text = $node->{content}; 375 376 # inner command (such as {\command text text}). our regexp checks to see 377 # if this text chunk begins with \command, since that would be the case 378 # due to the previous parsing stages. if found, the parent node is 379 # promoted to a command. 380 # 381 if ($text =~ /^\s*\\(\w+\*?)/ && defined $parent && $parser->{config}{INNERCMDS}->{$1}) { 382 my $command = $1; 383 384 # if the parent is already a command node, we have to make a new 385 # nested command node 386 # 387 if ($parent->{type} eq 'COMMAND') { 388 389 # make a new command node 390 my $newnode = LaTeX::TOM::Node->new( 391 {type => 'COMMAND', 392 command => $command, 393 start => $parent->{start}, 394 end => $parent->{end}, 395 position => 'inner', 396 children => $parent->{children} }); 397 398 # point parent to it 399 $parent->{children} = LaTeX::TOM::Tree->new([$newnode]); 400 401 # start over at this level (get additional inner commands) 402 $parent = $newnode; 403 $i = -1; 404 405 $parser->{USED_COMMANDS}->{$newnode->{command}} = 1; 406 } 407 408 # parent is a naked group, we can make it into a command node 409 # 410 elsif ($parent->{type} eq 'GROUP') { 411 $parent->{type} = 'COMMAND'; 412 $parent->{command} = $command; 413 $parent->{position} = 'inner'; 414 415 # start over at this level 416 $i = -1; 417 418 $parser->{USED_COMMANDS}->{$parent->{command}} = 1; 419 } 420 421 $node->{content} =~ s/^\s*\\(?:\w+\*?)//o; 422 } 423 424 # outer command (such as \command{parameters}). our regexp checks to 425 # see if this text chunk ends in \command, since that would be the case 426 # due to the previous parsing stages. 427 # 428 if ($text =~ /(?:^|[^\\])(\\\w+\*?(\s*\[.*?\])?)\s*$/os && 429 defined $tree->{nodes}[$i+1] && 430 $tree->{nodes}[$i+1]->{type} eq 'GROUP') { 431 432 my $tag = $1; 433 434 _debug("found text node [$text] with command tag [$tag]", undef); 435 436 # remove the text 437 $node->{content} =~ s/\\\w+\*?\s*(?:\[.*?\])?\s*$//os; 438 439 # parse it for command and ops 440 $tag =~ /^\\(\w+\*?)\s*(?:\[(.*?)\])?$/os; 441 442 my $command = $1; 443 my $opts = $2; 444 445 # make the next node a command node with the above data 446 my $next = $tree->{nodes}[$i+1]; 447 448 $next->{type} = 'COMMAND'; 449 $next->{command} = $command; 450 $next->{opts} = $opts; 451 $next->{position} = 'outer'; 452 453 $parser->{USED_COMMANDS}->{$next->{command}} = 1; 454 } 455 456 # recognize braceless commands 457 # 458 if ($text =~ /(\\(\w+\*?)[ \t]+(\S+))/gso || $text =~ /(\\(\w+)(\d+))/gso) { 459 my $all = $1; 460 my $command = $2; 461 my $param = $3; 462 463 if ($parser->{config}{BRACELESS}->{$command}) { 464 # warn "found braceless command $command with param $param"; 465 466 # get location to split from node text 467 my $a = index $node->{content}, $all, 0; 468 my $b = $a + length($all) - 1; 469 470 # make all the new nodes 471 472 # new left and right text nodes 473 my ($leftnode, $rightnode) = $node->split($a, $b); 474 475 # param contents node 476 my $pstart = index $node->{content}, $param, $a; 477 my $newchild = LaTeX::TOM::Node->new( 478 {type => 'TEXT', 479 start => $node->{start} + $pstart, 480 end => $node->{start} + $pstart + length($param) - 1, 481 content => $param }); 482 483 # new command node 484 my $commandnode = LaTeX::TOM::Node->new( 485 {type => 'COMMAND', 486 braces => 0, 487 command => $command, 488 start => $node->{start} + $a, 489 end => $node->{start} + $b, 490 children => LaTeX::TOM::Tree->new([$newchild]), 491 }); 492 493 $parser->{USED_COMMANDS}->{$commandnode->{command}} = 1; 494 495 # splice these all into the original array 496 splice @{$tree->{nodes}}, $i, 1, $leftnode, $commandnode, $rightnode; 497 498 # make the rightnode the node we're currently analyzing 499 $node = $rightnode; 500 501 # make sure outer loop will continue parsing *after* rightnode 502 $i += 2; 503 } 504 } 505 } 506 507 # recur 508 if ($node->{type} eq 'GROUP' || 509 $node->{type} eq 'COMMAND') { 510 511 $parser->_stage3($node->{children}, $node); 512 } 513 } 514} 515 516# this stage finds \begin{x} \end{x} environments and shoves their contents 517# down into a new child node, with a parent node of ENVIRONMENT type. 518# 519# this has the effect of making the tree deeper, since much of the structure 520# is in environment tags and will now be picked up. 521# 522# for ENVIRONMENTs, "start" means the ending } on the \begin tag, 523# "end" means the starting \ on the \end tag, 524# "ostart" is the starting \ on the "begin" tag, 525# "oend" is the ending } on the "end" tag, and 526# and "class" is the "x" from above. 527# 528sub _stage4 { 529 my $parser = shift; 530 my $tree = shift; 531 532 my $bcount = 0; # \begin "stack count" 533 my $class = ""; # environment class 534 my $bidx = 0; # \begin array index. 535 536 for (my $i = 0; $i < @{$tree->{nodes}}; $i++) { 537 my $node = $tree->{nodes}->[$i]; 538 539 # see if this is a "\begin" command node 540 if ($node->{type} eq 'COMMAND' && $node->{command} eq 'begin') { 541 542 _debug("parseStage4: found a begin COMMAND node, $node->{children}->{nodes}[0]->{content} @ $node->{start}", undef); 543 544 # start a new "stack" 545 if ($bcount == 0) { 546 $bidx = $i; 547 $bcount++; 548 $class = $node->{children}->{nodes}->[0]->{content}; 549 _debug("parseStage4: opening environment tag found, class = $class", undef); 550 } 551 552 # add to the "stack" 553 elsif ($node->{children}->{nodes}->[0]->{content} eq $class) { 554 $bcount++; 555 _debug("parseStage4: incrementing tag count for $class", undef); 556 } 557 } 558 559 # handle "\end" command nodes 560 elsif ($node->{type} eq 'COMMAND' && 561 $node->{command} eq 'end' && 562 $node->{children}->{nodes}->[0]->{content} eq $class) { 563 564 $bcount--; 565 _debug("parseStage4: decrementing tag count for $class", undef); 566 567 # we found our closing "\end" tag. replace everything with the proper 568 # ENVIRONMENT tag and subtree. 569 # 570 if ($bcount == 0) { 571 572 _debug("parseStage4: closing environment $class", undef); 573 574 # first we must take everything between the "\begin" and "\end" 575 # nodes and put them in a new array, removing them from the old one 576 my @newarray = splice @{$tree->{nodes}}, $bidx+1, $i - ($bidx + 1); 577 578 # make the ENVIRONMENT node 579 my $start = $tree->{nodes}[$bidx]->{end}; 580 my $end = $node->{start}; 581 my $envnode = LaTeX::TOM::Node->new( 582 {type => 'ENVIRONMENT', 583 class => $class, 584 start => $start, # "inner" start and end 585 end => $end, 586 ostart => $start - length('begin') - length($class) - 2, 587 oend => $end + length('end') + length($class) + 2, 588 children => LaTeX::TOM::Tree->new([@newarray]), 589 }); 590 591 if ($parser->{config}{MATHENVS}->{$envnode->{class}}) { 592 $envnode->{math} = 1; 593 } 594 595 # replace the \begin and \end COMMAND nodes with the single 596 # environment node 597 splice @{$tree->{nodes}}, $bidx, 2, $envnode; 598 599 $class = ""; # reset class. 600 601 # i is going to change by however many nodes we removed 602 $i -= scalar @newarray; 603 604 # recur into the children 605 $parser->_stage4($envnode->{children}); 606 } 607 } 608 609 # recur in general 610 elsif ($node->{children}) { 611 $parser->_stage4($node->{children}); 612 } 613 } 614 615 # parse error if we're missing an "\end" tag. 616 if ($bcount > 0) { 617 $error_handlers{$parser->{PARSE_ERRORS_FATAL}}->( 618 "missing \\end{$class} for \\begin{$class} at position $tree->{nodes}[$bidx]->{end}" 619 ); 620 } 621} 622 623# This is the "math" stage: here we grab simple-delimeter math modes from 624# the text they are embedded in, and turn those into new groupings, with the 625# "math" flag set. 626# 627# having this top level to go over all the bracket types prevents some pretty 628# bad combinatorial explosion 629# 630sub _stage5 { 631 my $parser = shift; 632 633 my $tree = shift; 634 my $caremath = shift || 0; 635 636 my $brackets = $parser->{MATHBRACKETS}; 637 638 # loop through all the different math mode bracket types 639 foreach my $left (sort {length($b) <=> length($a)} keys %$brackets) { 640 my $right = $brackets->{$left}; 641 642 $parser->_stage5_r($tree, $left, $right, $caremath); 643 } 644} 645 646# recursive meat of above 647# 648sub _stage5_r { 649 my $parser = shift; 650 651 my $tree = shift; 652 my $left = shift; 653 my $right = shift; 654 my $caremath = shift || 0; # do we care if we're already in math mode? 655 # this matters for \( \), \[ \] 656 657 my $leftpos = -1; # no text pos for found left brace yet. 658 my $leftidx = -1; # no array index for found left brace yet. 659 660 # loop through the nodes 661 for (my $i = 0; $i < scalar @{$tree->{nodes}}; $i++) { 662 my $node = $tree->{nodes}[$i]; 663 my $pos = 0; # position placeholder within the text block 664 my $spos = $node->{start}; # get text start position 665 666 if ($node->{type} eq 'TEXT' && 667 (!$caremath || (!$node->{math} && $caremath))) { 668 669 # search for left brace if we haven't started a pair yet 670 if ($leftidx == -1) { 671 $leftpos = _findsymbol($node->{content}, $left, $pos); 672 673 if ($leftpos != -1) { 674 _debug("found (left) $left in [$node->{content}]", undef); 675 $leftidx = $i; 676 $pos = $leftpos + 1; # next pos to search from 677 } 678 } 679 680 # search for a right brace 681 if ($leftpos != -1) { 682 my $rightpos = _findsymbol($node->{content}, $right, $pos); 683 684 # found 685 if ($rightpos != -1) { 686 687 # we have to split the text node into 3 parts 688 if ($leftidx == $i) { 689 _debug("splitwithin: found (right) $right in [$node->{content}]", undef); 690 691 my ($leftnode, $textnode3) = $node->split($rightpos, $rightpos + length($right) - 1); 692 my ($textnode1, $textnode2) = $leftnode->split($leftpos, $leftpos + length($left) - 1); 693 694 my $startpos = $spos; # get text start position 695 696 # make the math ENVIRONMENT node 697 my $mathnode = LaTeX::TOM::Node->new( 698 {type => 'ENVIRONMENT', 699 class => $left, # use left delim as class 700 math => 1, 701 start => $startpos + $leftpos, 702 ostart => $startpos + $leftpos - length($left) + 1, 703 end => $startpos + $rightpos, 704 oend => $startpos + $rightpos + length($right) - 1, 705 children => LaTeX::TOM::Tree->new([$textnode2]), 706 }); 707 708 splice @{$tree->{nodes}}, $i, 1, $textnode1, $mathnode, $textnode3; 709 710 $i++; # skip ahead two nodes, so we'll be parsing textnode3 711 } 712 713 # split across nodes 714 else { 715 716 _debug("splitacross: found (right) $right in [$node->{content}]", undef); 717 718 # create new set of 4 smaller text nodes from the original two 719 # that contain the left and right delimeters 720 # 721 my ($textnode1, $textnode2) = $tree->{nodes}[$leftidx]->split($leftpos, $leftpos + length($left) - 1); 722 my ($textnode3, $textnode4) = $tree->{nodes}[$i]->split($rightpos, $rightpos + length($right) - 1); 723 724 # nodes to remove "from the middle" (between the left and right 725 # text nodes which contain the delimeters) 726 # 727 my @remnodes = splice @{$tree->{nodes}}, $leftidx+1, $i - $leftidx - 1; 728 729 # create a math node that contains the text after the left brace, 730 # then all the nodes up until the next text node, then the text 731 # before the right brace. 732 # 733 my $mathnode = LaTeX::TOM::Node->new( 734 {type => 'ENVIRONMENT', 735 class => $left, 736 math => 1, 737 start => $textnode2->{start} - 1, 738 end => $textnode3->{end} + 1, 739 ostart => $textnode2->{start} - 1 - length($left) + 1, 740 oend => $textnode3->{end} + 1 + length($right) - 1, 741 children => LaTeX::TOM::Tree->new( 742 [$textnode2, 743 @remnodes, 744 $textnode3]), 745 }); 746 747 # replace (TEXT_A, ... , TEXT_B) with the mathnode created above 748 splice @{$tree->{nodes}}, $leftidx, 2, $textnode1, $mathnode, $textnode4; 749 750 # do all nodes again but the very leftmost 751 # 752 $i = $leftidx; 753 } 754 755 $leftpos = -1; # reset left data 756 $leftidx = -1; 757 } # right brace 758 } # left brace 759 else { 760 761 my $rightpos = _findsymbol($node->{content}, $right, $pos); 762 763 if ($rightpos != -1) { 764 my $startpos = $node->{start}; # get text start position 765 $error_handlers{$parser->{PARSE_ERRORS_FATAL}}->("unmatched '$right' at " . ($startpos + $rightpos)); 766 } 767 } 768 } # if TEXT 769 770 # recur, but not into verbatim environments! 771 # 772 elsif ($node->{children} && 773 !( 774 ($node->{type} eq 'COMMAND' && $node->{command} =~ /^verb/) || 775 ($node->{type} eq 'ENVIRONMENT' && $node->{class} =~ /^verbatim/))) { 776 777 if ($LaTeX::TOM::DEBUG) { 778 my $message = "Recurring into $node->{type} node "; 779 $message .= $node->{command} if ($node->{type} eq 'COMMAND'); 780 $message .= $node->{class} if ($node->{type} eq 'ENVIRONMENT'); 781 _debug($message, undef); 782 } 783 784 $parser->_stage5_r($node->{children}, $left, $right, $caremath); 785 } 786 787 } # loop over text blocks 788 789 if ($leftpos != -1) { 790 my $startpos = $tree->{nodes}[$leftidx]->{start}; # get text start position 791 $error_handlers{$parser->{PARSE_ERRORS_FATAL}}->("unmatched '$left' at " . ($startpos + $leftpos)); 792 } 793} 794 795# This stage propegates the math mode flag and plaintext flags downward. 796# 797# After this is done, we can make the claim that only text nodes marked with 798# the plaintext flag should be printed. math nodes will have the "math" flag, 799# and also plantext = 0. 800# 801sub _propegateModes { 802 my $parser = shift; 803 804 my $tree = shift; 805 my $math = shift; # most likely want to call this with 0 806 my $plaintext = shift; # ditto this-- default to nothing visible. 807 808 foreach my $node (@{$tree->{nodes}}) { 809 810 # handle text nodes on this level. set flags. 811 # 812 if ($node->{type} eq 'TEXT') { 813 $node->{math} = $math; 814 $node->{plaintext} = $plaintext; 815 } 816 817 # propegate flags downward, possibly modified 818 # 819 elsif (defined $node->{children}) { 820 821 my $mathflag = $math; # math propegates down by default 822 my $plaintextflag = 0; # plaintext flag does NOT propegate by default 823 824 # handle math or plain text forcing envs 825 # 826 if ($node->{type} eq 'ENVIRONMENT' || $node->{type} eq 'COMMAND') { 827 if (defined $node->{class} && ( 828 $parser->{config}{MATHENVS}->{$node->{class}} || 829 $parser->{config}{MATHENVS}->{"$node->{class}*"}) 830 ) 831 { 832 $mathflag = 1; 833 $plaintextflag = 0; 834 } 835 elsif (($node->{type} eq 'COMMAND' && 836 ($parser->{config}{TEXTENVS}->{$node->{command}} || 837 $parser->{config}{TEXTENVS}->{"$node->{command}*"})) || 838 ($node->{type} eq 'ENVIRONMENT' && 839 ($parser->{config}{TEXTENVS}->{$node->{class}} || 840 $parser->{config}{TEXTENVS}{"$node->{command}*"})) 841 ) { 842 843 $mathflag = 0; 844 $plaintextflag = 1; 845 } 846 } 847 848 # groupings change nothing 849 # 850 elsif ($node->{type} eq 'GROUP') { 851 $mathflag = $math; 852 $plaintextflag = $plaintext; 853 } 854 855 # recur 856 $parser->_propegateModes($node->{children}, $mathflag, $plaintextflag); 857 } 858 } 859} 860 861# apply a mapping to text nodes in a tree 862# 863# for newcommands and defs: mapping is a hash: 864# 865# {name, nparams, template, type} 866# 867# name is a string 868# nparams is an integer 869# template is a tree fragement containing text nodes with #x flags, where 870# parameters will be replaced. 871# type is "command" 872# 873# for newenvironments: 874# 875# {name, nparams, btemplate, etemplate, type} 876# 877# same as above, except type is "environment" and there are two templates, 878# btemplate and etemplate. 879# 880sub _applyMapping { 881 my $parser = shift; 882 883 my $tree = shift; 884 my $mapping = shift; 885 my $i = shift || 0; # index to start with, in tree. 886 887 my $applications = 0; # keep track of # of applications 888 889 for (; $i < @{$tree->{nodes}}; $i++) { 890 891 my $node = $tree->{nodes}[$i]; 892 893 # begin environment nodes 894 # 895 if ($node->{type} eq 'COMMAND' 896 && $node->{command} eq 'begin' 897 && $node->{children}->{nodes}[0]->{content} eq $mapping->{name} 898 ) { 899 # grab the nparams next group nodes as parameters 900 # 901 my @params = (); 902 903 my $remain = $mapping->{nparams}; 904 my $j = 1; 905 while ($remain > 0 && ($i + $j) < scalar @{$tree->{nodes}}) { 906 907 my $node = $tree->{nodes}[$i + $j]; 908 909 # grab group node 910 if ($node->{type} eq 'GROUP') { 911 push @params, $node->{children}; 912 $remain--; 913 } 914 915 $j++; 916 } 917 918 # if we didn't get enough group nodes, bomb out 919 next if $remain; 920 921 # otherwise make new subtree 922 my $applied = _applyParamsToTemplate($mapping->{btemplate}, @params); 923 924 # splice in the result 925 splice @{$tree->{nodes}}, $i, $j, @{$applied->{nodes}}; 926 927 # skip past all the new stuff 928 $i += scalar @{$applied->{nodes}} - 1; 929 } 930 931 # end environment nodes 932 # 933 elsif ($node->{type} eq 'COMMAND' 934 && $node->{command} eq 'end' 935 && $node->{children}->{nodes}[0]->{content} eq $mapping->{name} 936 ) { 937 # make new subtree (no params) 938 my $applied = $mapping->{etemplate}->copy(); 939 940 # splice in the result 941 splice @{$tree->{nodes}}, $i, 1, @{$applied->{nodes}}; 942 943 # skip past all the new stuff 944 $i += scalar @{$applied->{nodes}} - 1; 945 946 $applications++; # only count end environment nodes 947 } 948 949 # newcommand nodes 950 # 951 elsif ($node->{type} eq 'COMMAND' 952 && $node->{command} eq $mapping->{name} 953 && $mapping->{nparams} 954 ) { 955 my @params = (); 956 957 # children of COMMAND node will be first parameter 958 push @params, $node->{children}; 959 960 # find next nparams GROUP nodes and push their children onto @params 961 my $remain = $mapping->{nparams} - 1; 962 my $j = 1; 963 while ($remain > 0 && ($i + $j) < scalar @{$tree->{nodes}}) { 964 965 my $node = $tree->{nodes}[$i + $j]; 966 967 # grab group node 968 if ($node->{type} eq 'GROUP') { 969 push @params, $node->{children}; 970 $remain--; 971 } 972 973 $j++; 974 } 975 976 # if we didn't get enough group nodes, bomb out 977 next if ($remain > 0); 978 979 # apply the params to the template 980 my $applied = _applyParamsToTemplate($mapping->{template}, @params); 981 982 # splice in the result 983 splice @{$tree->{nodes}}, $i, $j, @{$applied->{nodes}}; 984 985 # skip past all the new stuff 986 $i += scalar @{$applied->{nodes}} - 1; 987 988 $applications++; 989 } 990 991 # find 0-param mappings 992 elsif ($node->{type} eq 'TEXT' && !$mapping->{nparams}) { 993 994 my $text = $node->{content}; 995 my $command = $mapping->{name}; 996 997 # find occurrences of the mapping command 998 # 999 my $wordend = ($command =~ /\w$/ ? 1 : 0); 1000 while (($wordend && $text =~ /\\\Q$command\E(\W|$)/g) || 1001 (!$wordend && $text =~ /\\\Q$command\E/g)) { 1002 1003 _debug("found occurrence of mapping $command", undef); 1004 1005 my $idx = index $node->{content}, '\\' . $command, 0; 1006 1007 # split the text node at that command 1008 my ($leftnode, $rightnode) = $node->split($idx, $idx + length($command)); 1009 1010 # copy the mapping template 1011 my $applied = $mapping->{template}->copy(); 1012 1013 # splice the new nodes in 1014 splice @{$tree->{nodes}}, $i, 1, $leftnode, @{$applied->{nodes}}, $rightnode; 1015 1016 # adjust i so we end up on rightnode when we're done 1017 $i += scalar @{$applied->{nodes}} + 1; 1018 1019 # get the next node 1020 $node = $tree->{$node}[$i]; 1021 1022 # count application 1023 $applications++; 1024 } 1025 } 1026 1027 # recur 1028 elsif ($node->{children}) { 1029 1030 $applications += $parser->_applyMapping($node->{children}, $mapping); 1031 } 1032 } 1033 1034 return $applications; 1035} 1036 1037# find and apply all mappings in the tree, progressively and recursively. 1038# a mapping applies to the entire tree and subtree consisting of nodes AFTER 1039# itself in the level array. 1040# 1041sub _applyMappings { 1042 my $parser = shift; 1043 1044 my $tree = shift; 1045 1046 for (my $i = 0; $i < @{$tree->{nodes}}; $i++) { 1047 1048 my $prev = $tree->{nodes}[$i-1]; 1049 my $node = $tree->{nodes}[$i]; 1050 1051 # find newcommands 1052 if ($node->{type} eq 'COMMAND' && 1053 $node->{command} =~ /^(re)?newcommand$/) { 1054 1055 my $mapping = _makeMapping($tree, $i); 1056 next if (!$mapping->{name}); # skip fragged commands 1057 1058 if ($parser->{USED_COMMANDS}->{$mapping->{name}}) { 1059 _debug("applying (nc) mapping $mapping->{name}", undef); 1060 } else { 1061 _debug("NOT applying (nc) mapping $mapping->{name}", undef); 1062 next; 1063 } 1064 1065 # add to mappings list 1066 # 1067 $parser->{MAPPEDCMDS}->{"\\$mapping->{name}"} = 1; 1068 1069 _debug("found a mapping with name $mapping->{name}, $mapping->{nparams} params", undef); 1070 1071 # remove the mapping declaration 1072 # 1073 splice @{$tree->{nodes}}, $i, $mapping->{skip} + 1; 1074 1075 # apply the mapping 1076 my $count = $parser->_applyMapping($tree, $mapping, $i); 1077 1078 if ($count > 0) { 1079 _debug("printing altered subtree", sub { $tree->_warn() }); 1080 } 1081 1082 $i--; # since we removed the cmd node, check this index again 1083 } 1084 1085 # handle "\newenvironment" mappings 1086 elsif ($node->{type} eq 'COMMAND' && 1087 $node->{command} =~ /^(re)?newenvironment$/) { 1088 1089 # make a mapping hash 1090 # 1091 my $mapping = $parser->_makeEnvMapping($tree, $i); 1092 next if (!$mapping->{name}); # skip fragged commands. 1093 1094 _debug("applying (ne) mapping $mapping->{name}", undef); 1095 1096 # remove the mapping declaration 1097 # 1098 splice @{$tree->{nodes}}, $i, $mapping->{skip} + 1; 1099 1100 # apply the mapping 1101 # 1102 my $count = $parser->_applyMapping($tree, $mapping, $i); 1103 } 1104 1105 # handle "\def" stype commands. 1106 elsif ($node->{type} eq 'COMMAND' && 1107 defined $prev && 1108 $prev->{type} eq 'TEXT' && 1109 $prev->{content} =~ /\\def\s*$/o) { 1110 1111 _debug("found def style mapping $node->{command}", undef); 1112 1113 # remove the \def 1114 $prev->{content} =~ s/\\def\s*$//o; 1115 1116 # make the mapping 1117 my $mapping = {name => $node->{command}, 1118 nparams => 0, 1119 template => $node->{children}->copy(), 1120 type => 'command'}; 1121 1122 next if (!$mapping->{name}); # skip fragged commands 1123 1124 if ($parser->{USED_COMMANDS}->{$mapping->{name}}) { 1125 _debug("applying (def) mapping $mapping->{name}", undef); 1126 } else { 1127 _debug("NOT applying (def) mapping $mapping->{name}", undef); 1128 next; 1129 } 1130 1131 # add to mappings list 1132 # 1133 $parser->{MAPPEDCMDS}->{"\\$mapping->{name}"} = 1; 1134 1135 _debug("template is", sub { $mapping->{template}->_warn() }); 1136 1137 # remove the command node 1138 splice @{$tree->{nodes}}, $i, 1; 1139 1140 # apply the mapping 1141 my $count = $parser->_applyMapping($tree, $mapping, $i); 1142 1143 $i--; # check this index again 1144 } 1145 1146 # recur 1147 elsif ($node->{children}) { 1148 1149 $parser->_applyMappings($node->{children}); 1150 } 1151 } 1152} 1153 1154# read files from \input commands and place into the tree, parsed 1155# 1156# also include bibliographies 1157# 1158sub _addInputs { 1159 my $parser = shift; 1160 1161 my $tree = shift; 1162 1163 for (my $i = 0; $i < @{$tree->{nodes}}; $i++) { 1164 1165 my $node = $tree->{nodes}[$i]; 1166 1167 if ($node->{type} eq 'COMMAND' 1168 && $node->{command} eq 'input' 1169 ) { 1170 my $file = $node->{children}->{nodes}[0]->{content}; 1171 next if $file =~ /pstex/; # ignore pstex images 1172 1173 _debug("reading input file $file", undef); 1174 1175 my $contents; 1176 my $filename = fileparse($file); 1177 my $has_extension = qr/\.\S+$/; 1178 1179 # read in contents of file 1180 if (-e $file && $filename =~ $has_extension) { 1181 $contents = _readFile($file); 1182 } 1183 elsif ($filename !~ $has_extension) { 1184 $file = "$file.tex"; 1185 $contents = _readFile($file) if -e $file; 1186 } 1187 1188 # dump Psfig/TeX files, they aren't useful to us and have 1189 # nonconforming syntax. Use declaration line as our heuristic. 1190 # 1191 if (defined $contents 1192 && $contents =~ m!^ \% \s*? Psfig/TeX \s* $!mx 1193 ) { 1194 undef $contents; 1195 carp "ignoring Psfig input `$file'"; 1196 } 1197 1198 # actually do the parse of the sub-content 1199 # 1200 if (defined $contents) { 1201 # parse into a tree 1202 my ($subtree,) = $parser->_basicparse($contents, $parser->{PARSE_ERRORS_FATAL}); 1203 1204 # replace \input command node with subtree 1205 splice @{$tree->{nodes}}, $i, 1, @{$subtree->{nodes}}; 1206 1207 # step back 1208 $i--; 1209 } 1210 } 1211 elsif ($node->{type} eq 'COMMAND' 1212 && $node->{command} eq 'bibliography' 1213 ) { 1214 # try to find a .bbl file 1215 # 1216 foreach my $file (<*.bbl>) { 1217 1218 my $contents = _readFile($file); 1219 1220 if (defined $contents) { 1221 1222 my ($subtree,) = $parser->_basicparse($contents, $parser->{PARSE_ERRORS_FATAL}); 1223 splice @{$tree->{nodes}}, $i, 1, @{$subtree->{nodes}}; 1224 $i--; 1225 } 1226 } 1227 } 1228 1229 # recur 1230 if ($node->{children}) { 1231 $parser->_addInputs($node->{children}); 1232 } 1233 } 1234} 1235 1236# do pre-mapping parsing 1237# 1238sub _parseA { 1239 my $parser = shift; 1240 my $text = shift; 1241 1242 my $tree = $parser->_stage1($text); 1243 my $bracehash = $parser->_stage2($tree); 1244 1245 $parser->_stage3($tree); 1246 1247 $parser->_addInputs($tree) if $parser->{readinputs}; 1248 1249 return ($tree, $bracehash); 1250} 1251 1252# do post-mapping parsing (make environments) 1253# 1254sub _parseB { 1255 my $parser = shift; 1256 my $tree = shift; 1257 1258 $parser->_stage4($tree); 1259 1260 _debug("done with parseStage4", undef); 1261 1262 $parser->_stage5($tree, 0); 1263 1264 _debug("done with parseStage5", undef); 1265} 1266 1267############################################################################### 1268# 1269# Parser "Static" Subroutines 1270# 1271############################################################################### 1272 1273# find next unescaped char in some text 1274# 1275sub _uindex { 1276 my $text = shift; 1277 my $char = shift; 1278 my $pos = shift; 1279 1280 my $realbrace = 0; 1281 my $idx = -1; 1282 1283 # get next opening brace 1284 do { 1285 $realbrace = 1; 1286 $idx = index $text, $char, $pos; 1287 1288 if ($idx != -1) { 1289 $pos = $idx + 1; 1290 my $prevchar = substr $text, $idx - 1, 1; 1291 if ($prevchar eq '\\') { 1292 $realbrace = 0; 1293 $idx = -1; 1294 } 1295 } 1296 } while (!$realbrace); 1297 1298 return $idx; 1299} 1300 1301# support function: find the next occurrence of some symbol which is 1302# not escaped. 1303# 1304sub _findsymbol { 1305 my $text = shift; 1306 my $symbol = shift; 1307 my $pos = shift; 1308 1309 my $realhit = 0; 1310 my $index = -1; 1311 1312 # get next occurrence of the symbol 1313 do { 1314 $realhit = 1; 1315 $index = index $text, $symbol, $pos; 1316 1317 if ($index != -1) { 1318 $pos = $index + 1; 1319 1320 # make sure this occurrence isn't escaped. this is imperfect. 1321 # 1322 my $prevchar = ($index - 1 >= 0) ? 1323 (substr $text, $index - 1, 1) : ''; 1324 my $pprevchar = ($index - 2 >= 0) ? 1325 (substr $text, $index - 2, 1) : ''; 1326 if ($prevchar eq '\\' && $pprevchar ne '\\') { 1327 $realhit = 0; 1328 $index = -1; 1329 } 1330 } 1331 } while (!$realhit); 1332 1333 return $index; 1334} 1335 1336# support function: find the earliest next brace in some (flat) text 1337# 1338sub _findbrace { 1339 my $text = shift; 1340 my $pos = shift; 1341 1342 my $realbrace = 0; 1343 my $index_o = -1; 1344 my $index_c = -1; 1345 1346 my $pos_o = $pos; 1347 my $pos_c = $pos; 1348 1349 # get next opening brace 1350 do { 1351 $realbrace = 1; 1352 $index_o = index $text, '{', $pos_o; 1353 1354 if ($index_o != -1) { 1355 $pos_o = $index_o + 1; 1356 1357 # make sure this brace isn't escaped. this is imperfect. 1358 # 1359 my $prevchar = ($index_o - 1 >= 0) ? 1360 (substr $text, $index_o - 1, 1) : ''; 1361 my $pprevchar = ($index_o - 2 >= 0) ? 1362 (substr $text, $index_o - 2, 1) : ''; 1363 1364 if ($prevchar eq '\\' && $pprevchar ne '\\') { 1365 $realbrace = 0; 1366 $index_o = -1; 1367 } 1368 } 1369 } while (!$realbrace); 1370 1371 # get next closing brace 1372 do { 1373 $realbrace = 1; 1374 $index_c = index $text, '}', $pos_c; 1375 1376 if (($index_c - 1) >= 0 && substr($text, $index_c - 1, 1) eq ' ') { 1377 $pos_c = $index_c + 1; 1378 $index_c = -1; 1379 } 1380 1381 if ($index_c != -1) { 1382 $pos_c = $index_c + 1; 1383 1384 # make sure this brace isn't escaped. this is imperfect. 1385 # 1386 my $prevchar = ($index_c - 1 >= 0) ? 1387 (substr $text, $index_c - 1, 1) : ''; 1388 my $pprevchar = ($index_c - 2 >= 0) ? 1389 (substr $text, $index_c - 2, 1) : ''; 1390 1391 if ($prevchar eq '\\' && $pprevchar ne '\\') { 1392 $realbrace = 0; 1393 $index_c = -1; 1394 } 1395 } 1396 } while (!$realbrace); 1397 1398 # handle all find cases 1399 return (-1, '') if ($index_o == -1 && $index_c == -1); 1400 return ($index_o, '{') if ($index_c == -1 || 1401 ($index_o != -1 && $index_o < $index_c)); 1402 1403 return ($index_c, '}') if ($index_o == -1 || $index_c < $index_o); 1404} 1405 1406 1407# skip "blank nodes" in a tree, starting at some position. will finish 1408# at the first non-blank node. (ie, not a comment or whitespace TEXT node. 1409# 1410sub _skipBlankNodes { 1411 my $tree = shift; 1412 my $i = shift; 1413 1414 while ($tree->{nodes}[$i]->{type} eq 'COMMENT' || 1415 ($tree->{nodes}[$i]->{type} eq 'TEXT' && 1416 $tree->{nodes}[$i]->{content} =~ /^\s*$/s)) { 1417 1418 $i++; 1419 } 1420 1421 return $i; 1422} 1423 1424# is the passed-in node a valid parameter node? for this to be true, it must 1425# either be a GROUP or a position = inner command. 1426# 1427sub _validParamNode { 1428 my $node = shift; 1429 1430 return 1 if ($node->{type} eq 'GROUP' || 1431 ($node->{type} eq 'COMMAND' && $node->{position} eq 'inner')); 1432 1433 return 0; 1434} 1435 1436# duplicate a valid param node. This means for a group, copy the child tree. 1437# for a command, make a new tree with just the command node and its child tree. 1438# 1439sub _duplicateParam { 1440 my $parser = shift; 1441 my $node = shift; 1442 1443 if ($node->{type} eq 'GROUP') { 1444 return $node->{children}->copy(); 1445 } 1446 elsif ($node->{type} eq 'COMMAND') { 1447 1448 my $subtree = $node->{children}->copy(); # copy child subtree 1449 my $nodecopy = $node->copy(); # make a new node with old data 1450 $nodecopy->{children} = $subtree; # set the child pointer to new subtree 1451 1452 # return a new tree with the new node (subtree) as its only element 1453 return LaTeX::TOM::Tree->new([$nodecopy]); 1454 } 1455 1456 return undef; 1457} 1458 1459# make a mapping from a newenvironment fragment 1460# 1461# newenvironments have the following syntax: 1462# 1463# \newenvironment{name}[nparams]?{beginTeX}{endTeX} 1464# 1465sub _makeEnvMapping { 1466 my $parser = shift; 1467 my $tree = shift; 1468 my $i = shift; 1469 1470 return undef if ($tree->{nodes}[$i]->{type} ne 'COMMAND' || 1471 ($tree->{nodes}[$i]->{command} ne 'newenvironment' && 1472 $tree->{nodes}[$i]->{command} ne 'renewenvironment')); 1473 1474 # figure out command (first child, text node) 1475 my $command = $tree->{nodes}[$i]->{children}->{nodes}[0]->{content}; 1476 if ($command =~ /^\s*\\(\S+)\s*$/) { 1477 $command = $1; 1478 } 1479 1480 my $next = $i+1; 1481 1482 # figure out number of params 1483 my $nparams = 0; 1484 if ($tree->{nodes}[$next]->{type} eq 'TEXT') { 1485 my $text = $tree->{nodes}[$next]->{content}; 1486 1487 if ($text =~ /^\s*\[\s*([0-9])+\s*\]\s*$/) { 1488 $nparams = $1; 1489 } 1490 1491 $next++; 1492 } 1493 1494 # default templates-- just repeat the declarations 1495 # 1496 my ($btemplate) = $parser->_basicparse("\\begin{$command}", 2, 0); 1497 my ($etemplate) = $parser->_basicparse("\\end{$command}", 2, 0); 1498 1499 my $endpos = $next; 1500 1501 # get two group subtrees... one for the begin and one for the end 1502 # templates. we only ignore whitespace TEXT nodes and comments 1503 # 1504 $next = _skipBlankNodes($tree, $next); 1505 if (_validParamNode($tree->{nodes}[$next])) { 1506 $btemplate = $parser->_duplicateParam($tree->{nodes}[$next]); 1507 $next++; 1508 1509 $next = _skipBlankNodes($tree, $next); 1510 1511 if (_validParamNode($tree->{nodes}[$next])) { 1512 $etemplate = $parser->_duplicateParam($tree->{nodes}[$next]); 1513 $endpos = $next; 1514 } 1515 } 1516 1517 # build and return the mapping hash 1518 # 1519 return {name => $command, 1520 nparams => $nparams, 1521 btemplate => $btemplate, # begin template 1522 etemplate => $etemplate, # end template 1523 skip => $endpos - $i, 1524 type => 'environment'}; 1525} 1526 1527# make a mapping from a newcommand fragment 1528# takes tree pointer and index of command node 1529# 1530# newcommands have the following syntax: 1531# 1532# \newcommand{\name}[nparams]?{anyTeX} 1533# 1534sub _makeMapping { 1535 my $tree = shift; 1536 my $i = shift; 1537 1538 return undef if ($tree->{nodes}[$i]->{type} ne 'COMMAND' || 1539 ($tree->{nodes}[$i]->{command} ne 'newcommand' && 1540 $tree->{nodes}[$i]->{command} ne 'renewcommand')); 1541 1542 # figure out command (first child, text node) 1543 my $command = $tree->{nodes}[$i]->{children}->{nodes}[0]->{content}; 1544 if ($command =~ /^\s*\\(\S+)\s*$/) { 1545 $command = $1; 1546 } 1547 1548 my $next = $i+1; 1549 1550 # figure out number of params 1551 my $nparams = 0; 1552 if ($tree->{nodes}[$next]->{type} eq 'TEXT') { 1553 my $text = $tree->{nodes}[$next]->{content}; 1554 1555 if ($text =~ /^\s*\[\s*([0-9])+\s*\]\s*$/) { 1556 $nparams = $1; 1557 } 1558 1559 $next++; 1560 } 1561 1562 # grab subtree template (array ref) 1563 # 1564 my $template; 1565 if ($tree->{nodes}[$next]->{type} eq 'GROUP') { 1566 $template = $tree->{nodes}[$next]->{children}->copy(); 1567 } else { 1568 return undef; 1569 } 1570 1571 # build and return the mapping hash 1572 # 1573 return {name => $command, 1574 nparams => $nparams, 1575 template => $template, 1576 skip => $next - $i, 1577 type => 'command'}; 1578} 1579 1580# this sub is the main entry point for the sub that actually takes a set of 1581# parameter trees and inserts them into a template tree. the return result, 1582# newly allocated, should be plopped back into the original tree where the 1583# parameters (along with the initial command invocation) 1584# 1585sub _applyParamsToTemplate { 1586 my $template = shift; 1587 my @params = @_; 1588 1589 # have to copy the template to a freshly allocated tree 1590 # 1591 my $applied = $template->copy(); 1592 1593 # now recursively apply the params. 1594 # 1595 _applyParamsToTemplate_r($applied, @params); 1596 1597 return $applied; 1598} 1599 1600# recursive helper for above 1601# 1602sub _applyParamsToTemplate_r { 1603 my $template = shift; 1604 my @params = @_; 1605 1606 for (my $i = 0; $i < @{$template->{nodes}}; $i++) { 1607 1608 my $node = $template->{nodes}[$i]; 1609 1610 if ($node->{type} eq 'TEXT') { 1611 1612 my $text = $node->{content}; 1613 1614 # find occurrences of the parameter flags 1615 # 1616 if ($text =~ /(#([0-9]+))/) { 1617 1618 my $all = $1; 1619 my $num = $2; 1620 1621 # get the index of the flag we just found 1622 # 1623 my $idx = index $text, $all, 0; 1624 1625 # split the node on the location of the flag 1626 # 1627 my ($leftnode, $rightnode) = $node->split($idx, $idx + length($all) - 1); 1628 1629 # make a copy of the param we want 1630 # 1631 my $param = $params[$num - 1]->copy(); 1632 1633 # splice the new text nodes, along with the parameter subtree, into 1634 # the old location 1635 # 1636 splice @{$template->{nodes}}, $i, 1, $leftnode, @{$param->{nodes}}, $rightnode; 1637 1638 # skip forward to where $rightnode is in $template on next iteration 1639 # 1640 $i += scalar @{$param->{nodes}}; 1641 } 1642 } 1643 1644 # recur 1645 elsif (defined $node->{children}) { 1646 1647 _applyParamsToTemplate_r($node->{children}, @params); 1648 } 1649 } 1650} 1651 1652 1653# This sub takes a chunk of the document text between two points and makes 1654# it into a list of TEXT nodes and COMMENT nodes, as we would expect from 1655# '%' prefixed LaTeX comment lines 1656# 1657sub _getTextAndCommentNodes { 1658 my ($text, $begins, $ends) = @_; 1659 1660 my $node_text = substr $text, $begins, $ends - $begins; 1661 1662 _debug("getTextAndCommentNodes: looking at [$node_text]", undef); 1663 1664 my $make_node = sub { 1665 my ($mode_type, $begins, $start_pos, $output) = @_; 1666 1667 return LaTeX::TOM::Node->new({ 1668 type => uc $mode_type, 1669 start => $begins + $start_pos, 1670 end => $begins + $start_pos + length($output) - 1, 1671 content => $output, 1672 }); 1673 }; 1674 1675 my @lines = split (/( 1676 (?:\s* # whitespace 1677 (?<!\\) # unescaped 1678 \%[^\n]* # comment 1679 \n)+ # newline 1680 )/mx, $node_text); 1681 1682 my @nodes; 1683 1684 my $start_pos = 0; 1685 my $output; 1686 my $mode_type; 1687 my $first = true; 1688 1689 foreach my $line (@lines) { 1690 1691 my $line_type = ( 1692 $line =~ /^\s*\%/ 1693 && $node_text !~ / 1694 \\begin\{verbatim\} 1695 .* \Q$line\E .* 1696 \\end\{verbatim\} 1697 /sx 1698 ) ? 'comment' : 'text'; 1699 1700 # if type stays the same, add to output and do nothing 1701 if ($first || $line_type eq $mode_type) { 1702 1703 $output .= $line; 1704 1705 # handle turning off initialization stuff 1706 $first &&= false; 1707 $mode_type ||= $line_type; 1708 } 1709 1710 # if type changes, make new node from current chunk, change mode type 1711 # and start a new chunk 1712 else { 1713 push @nodes, $make_node->($mode_type, $begins, $start_pos, $output); 1714 1715 $start_pos += length($output); # update start position 1716 $output = $line; 1717 1718 $mode_type = $line_type; 1719 } 1720 } 1721 1722 push @nodes, $make_node->($mode_type, $begins, $start_pos, $output) if defined $output; 1723 1724 return @nodes; 1725} 1726 1727# Read in the contents of a text file on disk. Return in string scalar. 1728# 1729sub _readFile { 1730 my ($file, $raise_error) = @_; 1731 1732 $raise_error ||= false; 1733 1734 my $opened = open(my $fh, '<', $file); 1735 1736 unless ($opened) { 1737 croak "Cannot open $file: $!" if $raise_error; 1738 return undef; 1739 } 1740 1741 my $contents = do { local $/; <$fh> }; 1742 close($fh); 1743 1744 return $contents; 1745} 1746 1747sub _debug { 1748 my ($message, $code) = @_; 1749 1750 my $DEBUG = $LaTeX::TOM::DEBUG; 1751 1752 return unless $DEBUG >= 1 && $DEBUG <= 2; 1753 1754 my ($filename, $line) = (caller)[1,2]; 1755 my $caller = join ':', (fileparse($filename))[0], $line; 1756 1757 warn "$caller: $message\n" if $DEBUG >= 1 && defined $message; 1758 $code->() if $DEBUG == 2 && defined $code; 1759} 1760 17611; 1762