1############################################################################### 2# 3# Package: NaturalDocs::Parser::Native 4# 5############################################################################### 6# 7# A package that converts comments from Natural Docs' native format into <NaturalDocs::Parser::ParsedTopic> objects. 8# Unlike most second-level packages, these are packages and not object classes. 9# 10############################################################################### 11 12# This file is part of Natural Docs, which is Copyright � 2003-2010 Greg Valure 13# Natural Docs is licensed under version 3 of the GNU Affero General Public License (AGPL) 14# Refer to License.txt for the complete details 15 16 17use strict; 18use integer; 19 20package NaturalDocs::Parser::Native; 21 22 23############################################################################### 24# Group: Variables 25 26 27# Return values of TagType(). Not documented here. 28use constant POSSIBLE_OPENING_TAG => 1; 29use constant POSSIBLE_CLOSING_TAG => 2; 30use constant NOT_A_TAG => 3; 31 32 33# 34# var: package 35# 36# A <SymbolString> representing the package normal topics will be a part of at the current point in the file. This is a package variable 37# because it needs to be reserved between function calls. 38# 39my $package; 40 41# 42# hash: functionListIgnoredHeadings 43# 44# An existence hash of all the headings that prevent the parser from creating function list symbols. Whenever one of 45# these headings are used in a function list topic, symbols are not created from definition lists until the next heading. The keys 46# are in all lowercase. 47# 48my %functionListIgnoredHeadings = ( 'parameters' => 1, 49 'parameter' => 1, 50 'params' => 1, 51 'param' => 1, 52 'arguments' => 1, 53 'argument' => 1, 54 'args' => 1, 55 'arg' => 1 ); 56 57 58############################################################################### 59# Group: Interface Functions 60 61 62# 63# Function: Start 64# 65# This will be called whenever a file is about to be parsed. It allows the package to reset its internal state. 66# 67sub Start 68 { 69 my ($self) = @_; 70 $package = undef; 71 }; 72 73 74# 75# Function: IsMine 76# 77# Examines the comment and returns whether it is *definitely* Natural Docs content, i.e. it is owned by this package. Note 78# that a comment can fail this function and still be interpreted as a Natural Docs content, for example a JavaDoc-styled comment 79# that doesn't have header lines but no JavaDoc tags either. 80# 81# Parameters: 82# 83# commentLines - An arrayref of the comment lines. Must have been run through <NaturalDocs::Parser->CleanComment()>. 84# isJavaDoc - Whether the comment was JavaDoc-styled. 85# 86# Returns: 87# 88# Whether the comment is *definitely* Natural Docs content. 89# 90sub IsMine #(string[] commentLines, bool isJavaDoc) 91 { 92 my ($self, $commentLines, $isJavaDoc) = @_; 93 94 # Skip to the first line with content. 95 my $line = 0; 96 97 while ($line < scalar @$commentLines && !length $commentLines->[$line]) 98 { $line++; }; 99 100 return $self->ParseHeaderLine($commentLines->[$line]); 101 }; 102 103 104 105# 106# Function: ParseComment 107# 108# This will be called whenever a comment capable of containing Natural Docs content is found. 109# 110# Parameters: 111# 112# commentLines - An arrayref of the comment lines. Must have been run through <NaturalDocs::Parser->CleanComment()>. 113# *The original memory will be changed.* 114# isJavaDoc - Whether the comment is JavaDoc styled. 115# lineNumber - The line number of the first of the comment lines. 116# parsedTopics - A reference to the array where any new <NaturalDocs::Parser::ParsedTopics> should be placed. 117# 118# Returns: 119# 120# The number of parsed topics added to the array, or zero if none. 121# 122sub ParseComment #(commentLines, isJavaDoc, lineNumber, parsedTopics) 123 { 124 my ($self, $commentLines, $isJavaDoc, $lineNumber, $parsedTopics) = @_; 125 126 my $topicCount = 0; 127 my $prevLineBlank = 1; 128 my $inCodeSection = 0; 129 130 my ($type, $scope, $isPlural, $title, $symbol); 131 #my $package; # package variable. 132 my ($newKeyword, $newTitle); 133 134 my $index = 0; 135 136 my $bodyStart = 0; 137 my $bodyEnd = 0; # Not inclusive. 138 139 while ($index < scalar @$commentLines) 140 { 141 # Everything but leading whitespace was removed beforehand. 142 143 # If we're in a code section... 144 if ($inCodeSection) 145 { 146 if ($commentLines->[$index] =~ /^ *\( *(?:end|finish|done)(?: +(?:table|code|example|diagram))? *\)$/i) 147 { $inCodeSection = undef; }; 148 149 $prevLineBlank = 0; 150 $bodyEnd++; 151 } 152 153 # If the line is empty... 154 elsif (!length($commentLines->[$index])) 155 { 156 $prevLineBlank = 1; 157 158 if ($topicCount) 159 { $bodyEnd++; }; 160 } 161 162 # If the line has a recognized header and the previous line is blank... 163 elsif ($prevLineBlank && (($newKeyword, $newTitle) = $self->ParseHeaderLine($commentLines->[$index])) ) 164 { 165 # Process the previous one, if any. 166 167 if ($topicCount) 168 { 169 if ($scope == ::SCOPE_START() || $scope == ::SCOPE_END()) 170 { $package = undef; }; 171 172 my $body = $self->FormatBody($commentLines, $bodyStart, $bodyEnd, $type, $isPlural); 173 my $newTopic = $self->MakeParsedTopic($type, $title, $package, $body, $lineNumber + $bodyStart - 1, $isPlural); 174 push @$parsedTopics, $newTopic; 175 176 $package = $newTopic->Package(); 177 }; 178 179 $title = $newTitle; 180 181 my $typeInfo; 182 ($type, $typeInfo, $isPlural) = NaturalDocs::Topics->KeywordInfo($newKeyword); 183 $scope = $typeInfo->Scope(); 184 185 $bodyStart = $index + 1; 186 $bodyEnd = $index + 1; 187 188 $topicCount++; 189 190 $prevLineBlank = 0; 191 } 192 193 # If we're on a non-empty, non-header line of a JavaDoc-styled comment and we haven't started a topic yet... 194 elsif ($isJavaDoc && !$topicCount) 195 { 196 $type = undef; 197 $scope = ::SCOPE_NORMAL(); # The scope repair and topic merging processes will handle if this is a class topic. 198 $isPlural = undef; 199 $title = undef; 200 $symbol = undef; 201 202 $bodyStart = $index; 203 $bodyEnd = $index + 1; 204 205 $topicCount++; 206 207 $prevLineBlank = undef; 208 } 209 210 # If we're on a normal content line within a topic 211 elsif ($topicCount) 212 { 213 $prevLineBlank = 0; 214 $bodyEnd++; 215 216 if ($commentLines->[$index] =~ /^ *\( *(?:(?:start|begin)? +)?(?:table|code|example|diagram) *\)$/i) 217 { $inCodeSection = 1; }; 218 }; 219 220 221 $index++; 222 }; 223 224 225 # Last one, if any. This is the only one that gets the prototypes. 226 if ($topicCount) 227 { 228 if ($scope == ::SCOPE_START() || $scope == ::SCOPE_END()) 229 { $package = undef; }; 230 231 my $body = $self->FormatBody($commentLines, $bodyStart, $bodyEnd, $type, $isPlural); 232 my $newTopic = $self->MakeParsedTopic($type, $title, $package, $body, $lineNumber + $bodyStart - 1, $isPlural); 233 push @$parsedTopics, $newTopic; 234 $topicCount++; 235 236 $package = $newTopic->Package(); 237 }; 238 239 return $topicCount; 240 }; 241 242 243# 244# Function: ParseHeaderLine 245# 246# If the passed line is a topic header, returns the array ( keyword, title ). Otherwise returns an empty array. 247# 248sub ParseHeaderLine #(line) 249 { 250 my ($self, $line) = @_; 251 252 if ($line =~ /^ *([a-z0-9 ]*[a-z0-9]): +(.*)$/i) 253 { 254 my ($keyword, $title) = ($1, $2); 255 256 # We need to do it this way because if you do "if (ND:T->KeywordInfo($keyword)" and the last element of the array it 257 # returns is false, the statement is false. That is really retarded, but there it is. 258 my ($type, undef, undef) = NaturalDocs::Topics->KeywordInfo($keyword); 259 260 if ($type) 261 { return ($keyword, $title); } 262 else 263 { return ( ); }; 264 } 265 else 266 { return ( ); }; 267 }; 268 269 270 271############################################################################### 272# Group: Support Functions 273 274 275# 276# Function: MakeParsedTopic 277# 278# Creates a <NaturalDocs::Parser::ParsedTopic> object for the passed parameters. Scope is gotten from 279# the package variable <package> instead of from the parameters. The summary is generated from the body. 280# 281# Parameters: 282# 283# type - The <TopicType>. May be undef for headerless topics. 284# title - The title of the topic. May be undef for headerless topics. 285# package - The package <SymbolString> the topic appears in. 286# body - The topic's body in <NDMarkup>. 287# lineNumber - The topic's line number. 288# isList - Whether the topic is a list. 289# 290# Returns: 291# 292# The <NaturalDocs::Parser::ParsedTopic> object. 293# 294sub MakeParsedTopic #(type, title, package, body, lineNumber, isList) 295 { 296 my ($self, $type, $title, $package, $body, $lineNumber, $isList) = @_; 297 298 my $summary; 299 300 if (defined $body) 301 { $summary = NaturalDocs::Parser->GetSummaryFromBody($body); }; 302 303 return NaturalDocs::Parser::ParsedTopic->New($type, $title, $package, undef, undef, $summary, 304 $body, $lineNumber, $isList); 305 }; 306 307 308# 309# Function: FormatBody 310# 311# Converts the section body to <NDMarkup>. 312# 313# Parameters: 314# 315# commentLines - The arrayref of comment lines. 316# startingIndex - The starting index of the body to format. 317# endingIndex - The ending index of the body to format, *not* inclusive. 318# type - The type of the section. May be undef for headerless comments. 319# isList - Whether it's a list topic. 320# 321# Returns: 322# 323# The body formatted in <NDMarkup>. 324# 325sub FormatBody #(commentLines, startingIndex, endingIndex, type, isList) 326 { 327 my ($self, $commentLines, $startingIndex, $endingIndex, $type, $isList) = @_; 328 329 use constant TAG_NONE => 1; 330 use constant TAG_PARAGRAPH => 2; 331 use constant TAG_BULLETLIST => 3; 332 use constant TAG_DESCRIPTIONLIST => 4; 333 use constant TAG_HEADING => 5; 334 use constant TAG_PREFIXCODE => 6; 335 use constant TAG_TAGCODE => 7; 336 337 my %tagEnders = ( TAG_NONE() => '', 338 TAG_PARAGRAPH() => '</p>', 339 TAG_BULLETLIST() => '</li></ul>', 340 TAG_DESCRIPTIONLIST() => '</dd></dl>', 341 TAG_HEADING() => '</h>', 342 TAG_PREFIXCODE() => '</code>', 343 TAG_TAGCODE() => '</code>' ); 344 345 my $topLevelTag = TAG_NONE; 346 347 my $output; 348 my $textBlock; 349 my $prevLineBlank = 1; 350 351 my $codeBlock; 352 my $removedCodeSpaces; 353 354 my $ignoreListSymbols; 355 356 my $index = $startingIndex; 357 358 while ($index < $endingIndex) 359 { 360 # If we're in a tagged code section... 361 if ($topLevelTag == TAG_TAGCODE) 362 { 363 if ($commentLines->[$index] =~ /^ *\( *(?:end|finish|done)(?: +(?:table|code|example|diagram))? *\)$/i) 364 { 365 $codeBlock =~ s/\n+$//; 366 $output .= NaturalDocs::NDMarkup->ConvertAmpChars($codeBlock) . '</code>'; 367 $codeBlock = undef; 368 $topLevelTag = TAG_NONE; 369 $prevLineBlank = undef; 370 } 371 else 372 { 373 $self->AddToCodeBlock($commentLines->[$index], \$codeBlock, \$removedCodeSpaces); 374 }; 375 } 376 377 # If the line starts with a code designator... 378 elsif ($commentLines->[$index] =~ /^ *[>:|](.*)$/) 379 { 380 my $code = $1; 381 382 if ($topLevelTag == TAG_PREFIXCODE) 383 { 384 $self->AddToCodeBlock($code, \$codeBlock, \$removedCodeSpaces); 385 } 386 else # $topLevelTag != TAG_PREFIXCODE 387 { 388 if (defined $textBlock) 389 { 390 $output .= $self->RichFormatTextBlock($textBlock) . $tagEnders{$topLevelTag}; 391 $textBlock = undef; 392 }; 393 394 $topLevelTag = TAG_PREFIXCODE; 395 $output .= '<code type="anonymous">'; 396 $self->AddToCodeBlock($code, \$codeBlock, \$removedCodeSpaces); 397 }; 398 } 399 400 # If we're not in either code style... 401 else 402 { 403 # Strip any leading whitespace. 404 $commentLines->[$index] =~ s/^ +//; 405 406 # If we were in a prefixed code section... 407 if ($topLevelTag == TAG_PREFIXCODE) 408 { 409 $codeBlock =~ s/\n+$//; 410 $output .= NaturalDocs::NDMarkup->ConvertAmpChars($codeBlock) . '</code>'; 411 $codeBlock = undef; 412 $topLevelTag = TAG_NONE; 413 $prevLineBlank = undef; 414 }; 415 416 417 # If the line is blank... 418 if (!length($commentLines->[$index])) 419 { 420 # End a paragraph. Everything else ignores it for now. 421 if ($topLevelTag == TAG_PARAGRAPH) 422 { 423 $output .= $self->RichFormatTextBlock($textBlock) . '</p>'; 424 $textBlock = undef; 425 $topLevelTag = TAG_NONE; 426 }; 427 428 $prevLineBlank = 1; 429 } 430 431 # If the line starts with a bullet... 432 elsif ($commentLines->[$index] =~ /^[-\*o+] +([^ ].*)$/ && 433 substr($1, 0, 2) ne '- ') # Make sure "o - Something" is a definition, not a bullet. 434 { 435 my $bulletedText = $1; 436 437 if (defined $textBlock) 438 { $output .= $self->RichFormatTextBlock($textBlock); }; 439 440 if ($topLevelTag == TAG_BULLETLIST) 441 { 442 $output .= '</li><li>'; 443 } 444 else #($topLevelTag != TAG_BULLETLIST) 445 { 446 $output .= $tagEnders{$topLevelTag} . '<ul><li>'; 447 $topLevelTag = TAG_BULLETLIST; 448 }; 449 450 $textBlock = $bulletedText; 451 452 $prevLineBlank = undef; 453 } 454 455 # If the line looks like a description list entry... 456 elsif ($commentLines->[$index] =~ /^(.+?) +- +([^ ].*)$/ && $topLevelTag != TAG_PARAGRAPH) 457 { 458 my $entry = $1; 459 my $description = $2; 460 461 if (defined $textBlock) 462 { $output .= $self->RichFormatTextBlock($textBlock); }; 463 464 if ($topLevelTag == TAG_DESCRIPTIONLIST) 465 { 466 $output .= '</dd>'; 467 } 468 else #($topLevelTag != TAG_DESCRIPTIONLIST) 469 { 470 $output .= $tagEnders{$topLevelTag} . '<dl>'; 471 $topLevelTag = TAG_DESCRIPTIONLIST; 472 }; 473 474 if (($isList && !$ignoreListSymbols) || $type eq ::TOPIC_ENUMERATION()) 475 { 476 $output .= '<ds>' . NaturalDocs::NDMarkup->ConvertAmpChars($entry) . '</ds><dd>'; 477 } 478 else 479 { 480 $output .= '<de>' . NaturalDocs::NDMarkup->ConvertAmpChars($entry) . '</de><dd>'; 481 }; 482 483 $textBlock = $description; 484 485 $prevLineBlank = undef; 486 } 487 488 # If the line could be a header... 489 elsif ($prevLineBlank && $commentLines->[$index] =~ /^(.*)([^ ]):$/) 490 { 491 my $headerText = $1 . $2; 492 493 if (defined $textBlock) 494 { 495 $output .= $self->RichFormatTextBlock($textBlock); 496 $textBlock = undef; 497 } 498 499 $output .= $tagEnders{$topLevelTag}; 500 $topLevelTag = TAG_NONE; 501 502 $output .= '<h>' . $self->RichFormatTextBlock($headerText) . '</h>'; 503 504 if ($type eq ::TOPIC_FUNCTION() && $isList) 505 { 506 $ignoreListSymbols = exists $functionListIgnoredHeadings{lc($headerText)}; 507 }; 508 509 $prevLineBlank = undef; 510 } 511 512 # If the line looks like a code tag... 513 elsif ($commentLines->[$index] =~ /^\( *(?:(?:start|begin)? +)?(table|code|example|diagram) *\)$/i) 514 { 515 my $codeType = lc($1); 516 517 if (defined $textBlock) 518 { 519 $output .= $self->RichFormatTextBlock($textBlock); 520 $textBlock = undef; 521 }; 522 523 if ($codeType eq 'example') 524 { $codeType = 'anonymous'; } 525 elsif ($codeType eq 'table' || $codeType eq 'diagram') 526 { $codeType = 'text'; } 527 # else leave it 'code' 528 529 $output .= $tagEnders{$topLevelTag} . '<code type="' . $codeType . '">'; 530 $topLevelTag = TAG_TAGCODE; 531 } 532 533 # If the line looks like an inline image... 534 elsif ($commentLines->[$index] =~ /^(\( *see +)([^\)]+?)( *\))$/i) 535 { 536 if (defined $textBlock) 537 { 538 $output .= $self->RichFormatTextBlock($textBlock); 539 $textBlock = undef; 540 }; 541 542 $output .= $tagEnders{$topLevelTag}; 543 $topLevelTag = TAG_NONE; 544 545 $output .= '<img mode="inline" target="' . NaturalDocs::NDMarkup->ConvertAmpChars($2) . '" ' 546 . 'original="' . NaturalDocs::NDMarkup->ConvertAmpChars($1 . $2 . $3) . '">'; 547 548 $prevLineBlank = undef; 549 } 550 551 # If the line isn't any of those, we consider it normal text. 552 else 553 { 554 # A blank line followed by normal text ends lists. We don't handle this when we detect if the line's blank because 555 # we don't want blank lines between list items to break the list. 556 if ($prevLineBlank && ($topLevelTag == TAG_BULLETLIST || $topLevelTag == TAG_DESCRIPTIONLIST)) 557 { 558 $output .= $self->RichFormatTextBlock($textBlock) . $tagEnders{$topLevelTag} . '<p>'; 559 560 $topLevelTag = TAG_PARAGRAPH; 561 $textBlock = undef; 562 } 563 564 elsif ($topLevelTag == TAG_NONE) 565 { 566 $output .= '<p>'; 567 $topLevelTag = TAG_PARAGRAPH; 568 # textBlock will already be undef. 569 }; 570 571 if (defined $textBlock) 572 { $textBlock .= ' '; }; 573 574 $textBlock .= $commentLines->[$index]; 575 576 $prevLineBlank = undef; 577 }; 578 }; 579 580 $index++; 581 }; 582 583 # Clean up anything left dangling. 584 if (defined $textBlock) 585 { 586 $output .= $self->RichFormatTextBlock($textBlock) . $tagEnders{$topLevelTag}; 587 } 588 elsif (defined $codeBlock) 589 { 590 $codeBlock =~ s/\n+$//; 591 $output .= NaturalDocs::NDMarkup->ConvertAmpChars($codeBlock) . '</code>'; 592 }; 593 594 return $output; 595 }; 596 597 598# 599# Function: AddToCodeBlock 600# 601# Adds a line of text to a code block, handling all the indentation processing required. 602# 603# Parameters: 604# 605# line - The line of text to add. 606# codeBlockRef - A reference to the code block to add it to. 607# removedSpacesRef - A reference to a variable to hold the number of spaces removed. It needs to be stored between calls. 608# It will reset itself automatically when the code block codeBlockRef points to is undef. 609# 610sub AddToCodeBlock #(line, codeBlockRef, removedSpacesRef) 611 { 612 my ($self, $line, $codeBlockRef, $removedSpacesRef) = @_; 613 614 $line =~ /^( *)(.*)$/; 615 my ($spaces, $code) = ($1, $2); 616 617 if (!defined $$codeBlockRef) 618 { 619 if (length($code)) 620 { 621 $$codeBlockRef = $code . "\n"; 622 $$removedSpacesRef = length($spaces); 623 }; 624 # else ignore leading line breaks. 625 } 626 627 elsif (length $code) 628 { 629 # Make sure we have the minimum amount of spaces to the left possible. 630 if (length($spaces) != $$removedSpacesRef) 631 { 632 my $spaceDifference = abs( length($spaces) - $$removedSpacesRef ); 633 my $spacesToAdd = ' ' x $spaceDifference; 634 635 if (length($spaces) > $$removedSpacesRef) 636 { 637 $$codeBlockRef .= $spacesToAdd; 638 } 639 else 640 { 641 $$codeBlockRef =~ s/^(.)/$spacesToAdd . $1/gme; 642 $$removedSpacesRef = length($spaces); 643 }; 644 }; 645 646 $$codeBlockRef .= $code . "\n"; 647 } 648 649 else # (!length $code) 650 { 651 $$codeBlockRef .= "\n"; 652 }; 653 }; 654 655 656# 657# Function: RichFormatTextBlock 658# 659# Applies rich <NDMarkup> formatting to a chunk of text. This includes both amp chars, formatting tags, and link tags. 660# 661# Parameters: 662# 663# text - The block of text to format. 664# 665# Returns: 666# 667# The formatted text block. 668# 669sub RichFormatTextBlock #(text) 670 { 671 my ($self, $text) = @_; 672 my $output; 673 674 675 # First find bare urls, e-mail addresses, and images. We have to do this before the split because they may contain underscores 676 # or asterisks. We have to mark the tags with \x1E and \x1F so they don't get confused with angle brackets from the comment. 677 # We can't convert the amp chars beforehand because we need lookbehinds in the regexps below and they need to be 678 # constant length. Sucks, huh? 679 680 $text =~ s{ 681 # The previous character can't be an alphanumeric or an opening angle bracket. 682 (?<! [a-z0-9<] ) 683 684 # Optional mailto:. Ignored in output. 685 (?:mailto\:)? 686 687 # Begin capture 688 ( 689 690 # The user portion. Alphanumeric and - _. Dots can appear between, but not at the edges or more than 691 # one in a row. 692 (?: [a-z0-9\-_]+ \. )* [a-z0-9\-_]+ 693 694 @ 695 696 # The domain. Alphanumeric and -. Dots same as above, however, there must be at least two sections 697 # and the last one must be two to four alphanumeric characters (.com, .uk, .info, .203 for IP addresses) 698 (?: [a-z0-9\-]+ \. )+ [a-z]{2,4} 699 700 # End capture. 701 ) 702 703 # The next character can't be an alphanumeric, which should prevent .abcde from matching the two to 704 # four character requirement, or a closing angle bracket. 705 (?! [a-z0-9>] ) 706 707 } 708 709 {"\x1E" . 'email target="' . NaturalDocs::NDMarkup->ConvertAmpChars($1) . '" ' 710 . 'name="' . NaturalDocs::NDMarkup->ConvertAmpChars($1) . '"' . "\x1F"}igxe; 711 712 $text =~ s{ 713 # The previous character can't be an alphanumeric or an opening angle bracket. 714 (?<! [a-z0-9<] ) 715 716 # Begin capture. 717 ( 718 719 # URL must start with one of the acceptable protocols. 720 (?:http|https|ftp|news|file)\: 721 722 # The acceptable URL characters as far as I know. 723 [a-z0-9\-\=\~\@\#\%\&\_\+\/\;\:\?\*\.\,]* 724 725 # The URL characters minus period and comma. If it ends on them, they're probably intended as 726 # punctuation. 727 [a-z0-9\-\=\~\@\#\%\&\_\+\/\;\:\?\*] 728 729 # End capture. 730 ) 731 732 # The next character must not be an acceptable character or a closing angle bracket. It must also not be a 733 # dot and then an acceptable character. These will prevent the URL from ending early just to get a match. 734 (?! \.?[a-z0-9\-\=\~\@\#\%\&\_\+\/\;\:\?\*\>] ) 735 736 } 737 738 {"\x1E" . 'url target="' . NaturalDocs::NDMarkup->ConvertAmpChars($1) . '" ' 739 . 'name="' . NaturalDocs::NDMarkup->ConvertAmpChars($1) . '"' . "\x1F"}igxe; 740 741 742 # Find image links. Inline images should already be pulled out by now. 743 744 $text =~ s{(\( *see +)([^\)\<\>]+?)( *\))} 745 {"\x1E" . 'img mode="link" target="' . NaturalDocs::NDMarkup->ConvertAmpChars($2) . '" ' 746 . 'original="' . NaturalDocs::NDMarkup->ConvertAmpChars($1 . $2 . $3) . '"' . "\x1F"}gie; 747 748 749 750 # Split the text from the potential tags. 751 752 my @tempTextBlocks = split(/([\*_<>\x1E\x1F])/, $text); 753 754 # Since the symbols are considered dividers, empty strings could appear between two in a row or at the beginning/end of the 755 # array. This could seriously screw up TagType(), so we need to get rid of them. 756 my @textBlocks; 757 758 while (scalar @tempTextBlocks) 759 { 760 my $tempTextBlock = shift @tempTextBlocks; 761 762 if (length $tempTextBlock) 763 { push @textBlocks, $tempTextBlock; }; 764 }; 765 766 767 my $bold; 768 my $underline; 769 my $underlineHasWhitespace; 770 771 my $index = 0; 772 773 while ($index < scalar @textBlocks) 774 { 775 if ($textBlocks[$index] eq "\x1E") 776 { 777 $output .= '<'; 778 $index++; 779 780 while ($textBlocks[$index] ne "\x1F") 781 { 782 $output .= $textBlocks[$index]; 783 $index++; 784 }; 785 786 $output .= '>'; 787 } 788 789 elsif ($textBlocks[$index] eq '<' && $self->TagType(\@textBlocks, $index) == POSSIBLE_OPENING_TAG) 790 { 791 my $endingIndex = $self->ClosingTag(\@textBlocks, $index, undef); 792 793 if ($endingIndex != -1) 794 { 795 my $linkText; 796 $index++; 797 798 while ($index < $endingIndex) 799 { 800 $linkText .= $textBlocks[$index]; 801 $index++; 802 }; 803 # Index will be incremented again at the end of the loop. 804 805 $linkText = NaturalDocs::NDMarkup->ConvertAmpChars($linkText); 806 807 if ($linkText =~ /^(?:mailto\:)?((?:[a-z0-9\-_]+\.)*[a-z0-9\-_]+@(?:[a-z0-9\-]+\.)+[a-z]{2,4})$/i) 808 { $output .= '<email target="' . $1 . '" name="' . $1 . '">'; } 809 elsif ($linkText =~ /^(.+?) at (?:mailto\:)?((?:[a-z0-9\-_]+\.)*[a-z0-9\-_]+@(?:[a-z0-9\-]+\.)+[a-z]{2,4})$/i) 810 { $output .= '<email target="' . $2 . '" name="' . $1 . '">'; } 811 elsif ($linkText =~ /^(?:http|https|ftp|news|file)\:/i) 812 { $output .= '<url target="' . $linkText . '" name="' . $linkText . '">'; } 813 elsif ($linkText =~ /^(.+?) at ((?:http|https|ftp|news|file)\:.+)/i) 814 { $output .= '<url target="' . $2 . '" name="' . $1 . '">'; } 815 else 816 { $output .= '<link target="' . $linkText . '" name="' . $linkText . '" original="<' . $linkText . '>">'; }; 817 } 818 819 else # it's not a link. 820 { 821 $output .= '<'; 822 }; 823 } 824 825 elsif ($textBlocks[$index] eq '*') 826 { 827 my $tagType = $self->TagType(\@textBlocks, $index); 828 829 if ($tagType == POSSIBLE_OPENING_TAG && $self->ClosingTag(\@textBlocks, $index, undef) != -1) 830 { 831 # ClosingTag() makes sure tags aren't opened multiple times in a row. 832 $bold = 1; 833 $output .= '<b>'; 834 } 835 elsif ($bold && $tagType == POSSIBLE_CLOSING_TAG) 836 { 837 $bold = undef; 838 $output .= '</b>'; 839 } 840 else 841 { 842 $output .= '*'; 843 }; 844 } 845 846 elsif ($textBlocks[$index] eq '_') 847 { 848 my $tagType = $self->TagType(\@textBlocks, $index); 849 850 if ($tagType == POSSIBLE_OPENING_TAG && $self->ClosingTag(\@textBlocks, $index, \$underlineHasWhitespace) != -1) 851 { 852 # ClosingTag() makes sure tags aren't opened multiple times in a row. 853 $underline = 1; 854 #underlineHasWhitespace is set by ClosingTag(). 855 $output .= '<u>'; 856 } 857 elsif ($underline && $tagType == POSSIBLE_CLOSING_TAG) 858 { 859 $underline = undef; 860 #underlineHasWhitespace will be reset by the next opening underline. 861 $output .= '</u>'; 862 } 863 elsif ($underline && !$underlineHasWhitespace) 864 { 865 # If there's no whitespace between underline tags, all underscores are replaced by spaces so 866 # _some_underlined_text_ becomes <u>some underlined text</u>. The standard _some underlined text_ 867 # will work too. 868 $output .= ' '; 869 } 870 else 871 { 872 $output .= '_'; 873 }; 874 } 875 876 else # plain text or a > that isn't part of a link 877 { 878 $output .= NaturalDocs::NDMarkup->ConvertAmpChars($textBlocks[$index]); 879 }; 880 881 $index++; 882 }; 883 884 return $output; 885 }; 886 887 888# 889# Function: TagType 890# 891# Returns whether the tag is a possible opening or closing tag, or neither. "Possible" because it doesn't check if an opening tag is 892# closed or a closing tag is opened, just whether the surrounding characters allow it to be a candidate for a tag. For example, in 893# "A _B" the underscore is a possible opening underline tag, but in "A_B" it is not. Support function for <RichFormatTextBlock()>. 894# 895# Parameters: 896# 897# textBlocks - A reference to an array of text blocks. 898# index - The index of the tag. 899# 900# Returns: 901# 902# POSSIBLE_OPENING_TAG, POSSIBLE_CLOSING_TAG, or NOT_A_TAG. 903# 904sub TagType #(textBlocks, index) 905 { 906 my ($self, $textBlocks, $index) = @_; 907 908 909 # Possible opening tags 910 911 if ( ( $textBlocks->[$index] =~ /^[\*_<]$/ ) && 912 913 # Before it must be whitespace, the beginning of the text, or ({["'-/*_. 914 ( $index == 0 || $textBlocks->[$index-1] =~ /[\ \t\n\(\{\[\"\'\-\/\*\_]$/ ) && 915 916 # Notes for 2.0: Include Spanish upside down ! and ? as well as opening quotes (66) and apostrophes (6). Look into 917 # Unicode character classes as well. 918 919 # After it must be non-whitespace. 920 ( $index + 1 < scalar @$textBlocks && $textBlocks->[$index+1] !~ /^[\ \t\n]/) && 921 922 # Make sure we don't accept <<, <=, <-, or *= as opening tags. 923 ( $textBlocks->[$index] ne '<' || $textBlocks->[$index+1] !~ /^[<=-]/ ) && 924 ( $textBlocks->[$index] ne '*' || $textBlocks->[$index+1] !~ /^[\=\*]/ ) && 925 926 # Make sure we don't accept * or _ before it unless it's <. 927 ( $textBlocks->[$index] eq '<' || $index == 0 || $textBlocks->[$index-1] !~ /[\*\_]$/) ) 928 { 929 return POSSIBLE_OPENING_TAG; 930 } 931 932 933 # Possible closing tags 934 935 elsif ( ( $textBlocks->[$index] =~ /^[\*_>]$/) && 936 937 # After it must be whitespace, the end of the text, or )}].,!?"';:-/*_. 938 ( $index + 1 == scalar @$textBlocks || $textBlocks->[$index+1] =~ /^[ \t\n\)\]\}\.\,\!\?\"\'\;\:\-\/\*\_]/ || 939 # Links also get plurals, like <link>s, <linx>es, <link>'s, and <links>'. 940 ( $textBlocks->[$index] eq '>' && $textBlocks->[$index+1] =~ /^(?:es|s|\')/ ) ) && 941 942 # Notes for 2.0: Include closing quotes (99) and apostrophes (9). Look into Unicode character classes as well. 943 944 # Before it must be non-whitespace. 945 ( $index != 0 && $textBlocks->[$index-1] !~ /[ \t\n]$/ ) && 946 947 # Make sure we don't accept >>, ->, or => as closing tags. >= is already taken care of. 948 ( $textBlocks->[$index] ne '>' || $textBlocks->[$index-1] !~ /[>=-]$/ ) && 949 950 # Make sure we don't accept * or _ after it unless it's >. 951 ( $textBlocks->[$index] eq '>' || $textBlocks->[$index+1] !~ /[\*\_]$/) ) 952 { 953 return POSSIBLE_CLOSING_TAG; 954 } 955 956 else 957 { 958 return NOT_A_TAG; 959 }; 960 961 }; 962 963 964# 965# Function: ClosingTag 966# 967# Returns whether a tag is closed or not, where it's closed if it is, and optionally whether there is any whitespace between the 968# tags. Support function for <RichFormatTextBlock()>. 969# 970# The results of this function are in full context, meaning that if it says a tag is closed, it can be interpreted as that tag in the 971# final output. It takes into account any spoiling factors, like there being two opening tags in a row. 972# 973# Parameters: 974# 975# textBlocks - A reference to an array of text blocks. 976# index - The index of the opening tag. 977# hasWhitespaceRef - A reference to the variable that will hold whether there is whitespace between the tags or not. If 978# undef, the function will not check. If the tag is not closed, the variable will not be changed. 979# 980# Returns: 981# 982# If the tag is closed, it returns the index of the closing tag and puts whether there was whitespace between the tags in 983# hasWhitespaceRef if it was specified. If the tag is not closed, it returns -1 and doesn't touch the variable pointed to by 984# hasWhitespaceRef. 985# 986sub ClosingTag #(textBlocks, index, hasWhitespace) 987 { 988 my ($self, $textBlocks, $index, $hasWhitespaceRef) = @_; 989 990 my $hasWhitespace; 991 my $closingTag; 992 993 if ($textBlocks->[$index] eq '*' || $textBlocks->[$index] eq '_') 994 { $closingTag = $textBlocks->[$index]; } 995 elsif ($textBlocks->[$index] eq '<') 996 { $closingTag = '>'; } 997 else 998 { return -1; }; 999 1000 my $beginningIndex = $index; 1001 $index++; 1002 1003 while ($index < scalar @$textBlocks) 1004 { 1005 if ($textBlocks->[$index] eq '<' && $self->TagType($textBlocks, $index) == POSSIBLE_OPENING_TAG) 1006 { 1007 # If we hit a < and we're checking whether a link is closed, it's not. The first < becomes literal and the second one 1008 # becomes the new link opening. 1009 if ($closingTag eq '>') 1010 { 1011 return -1; 1012 } 1013 1014 # If we're not searching for the end of a link, we have to skip the link because formatting tags cannot appear within 1015 # them. That's of course provided it's closed. 1016 else 1017 { 1018 my $linkHasWhitespace; 1019 1020 my $endIndex = $self->ClosingTag($textBlocks, $index, 1021 ($hasWhitespaceRef && !$hasWhitespace ? \$linkHasWhitespace : undef) ); 1022 1023 if ($endIndex != -1) 1024 { 1025 if ($linkHasWhitespace) 1026 { $hasWhitespace = 1; }; 1027 1028 # index will be incremented again at the end of the loop, which will bring us past the link's >. 1029 $index = $endIndex; 1030 }; 1031 }; 1032 } 1033 1034 elsif ($textBlocks->[$index] eq $closingTag) 1035 { 1036 my $tagType = $self->TagType($textBlocks, $index); 1037 1038 if ($tagType == POSSIBLE_CLOSING_TAG) 1039 { 1040 # There needs to be something between the tags for them to count. 1041 if ($index == $beginningIndex + 1) 1042 { return -1; } 1043 else 1044 { 1045 # Success! 1046 1047 if ($hasWhitespaceRef) 1048 { $$hasWhitespaceRef = $hasWhitespace; }; 1049 1050 return $index; 1051 }; 1052 } 1053 1054 # If there are two opening tags of the same type, the first becomes literal and the next becomes part of a tag. 1055 elsif ($tagType == POSSIBLE_OPENING_TAG) 1056 { return -1; } 1057 } 1058 1059 elsif ($hasWhitespaceRef && !$hasWhitespace) 1060 { 1061 if ($textBlocks->[$index] =~ /[ \t\n]/) 1062 { $hasWhitespace = 1; }; 1063 }; 1064 1065 $index++; 1066 }; 1067 1068 # Hit the end of the text blocks if we're here. 1069 return -1; 1070 }; 1071 1072 10731; 1074