1############################################################################### 2# 3# Package: NaturalDocs::Parser 4# 5############################################################################### 6# 7# A package that coordinates source file parsing between the <NaturalDocs::Languages::Base>-derived objects and its own 8# sub-packages such as <NaturalDocs::Parser::Native>. Also handles sending symbols to <NaturalDocs::SymbolTable> and 9# other generic topic processing. 10# 11# Usage and Dependencies: 12# 13# - Prior to use, <NaturalDocs::Settings>, <NaturalDocs::Languages>, <NaturalDocs::Project>, <NaturalDocs::SymbolTable>, 14# and <NaturalDocs::ClassHierarchy> must be initialized. <NaturalDocs::SymbolTable> and <NaturalDocs::ClassHierarchy> 15# do not have to be fully resolved. 16# 17# - Aside from that, the package is ready to use right away. It does not have its own initialization function. 18# 19############################################################################### 20 21# This file is part of Natural Docs, which is Copyright � 2003-2010 Greg Valure 22# Natural Docs is licensed under version 3 of the GNU Affero General Public License (AGPL) 23# Refer to License.txt for the complete details 24 25use NaturalDocs::Parser::ParsedTopic; 26use NaturalDocs::Parser::Native; 27use NaturalDocs::Parser::JavaDoc; 28 29use strict; 30use integer; 31 32package NaturalDocs::Parser; 33 34 35 36############################################################################### 37# Group: Variables 38 39 40# 41# var: sourceFile 42# 43# The source <FileName> currently being parsed. 44# 45my $sourceFile; 46 47# 48# var: language 49# 50# The language object for the file, derived from <NaturalDocs::Languages::Base>. 51# 52my $language; 53 54# 55# Array: parsedFile 56# 57# An array of <NaturalDocs::Parser::ParsedTopic> objects. 58# 59my @parsedFile; 60 61 62# 63# bool: parsingForInformation 64# Whether <ParseForInformation()> was called. If false, then <ParseForBuild()> was called. 65# 66my $parsingForInformation; 67 68 69 70############################################################################### 71# Group: Functions 72 73# 74# Function: ParseForInformation 75# 76# Parses the input file for information. Will update the information about the file in <NaturalDocs::SymbolTable> and 77# <NaturalDocs::Project>. 78# 79# Parameters: 80# 81# file - The <FileName> to parse. 82# 83sub ParseForInformation #(file) 84 { 85 my ($self, $file) = @_; 86 $sourceFile = $file; 87 88 $parsingForInformation = 1; 89 90 # Watch this parse so we detect any changes. 91 NaturalDocs::SymbolTable->WatchFileForChanges($sourceFile); 92 NaturalDocs::ClassHierarchy->WatchFileForChanges($sourceFile); 93 NaturalDocs::SourceDB->WatchFileForChanges($sourceFile); 94 95 my $defaultMenuTitle = $self->Parse(); 96 97 foreach my $topic (@parsedFile) 98 { 99 # Add a symbol for the topic. 100 101 my $type = $topic->Type(); 102 if ($type eq ::TOPIC_ENUMERATION()) 103 { $type = ::TOPIC_TYPE(); }; 104 105 NaturalDocs::SymbolTable->AddSymbol($topic->Symbol(), $sourceFile, $type, 106 $topic->Prototype(), $topic->Summary()); 107 108 109 # You can't put the function call directly in a while with a regex. It has to sit in a variable to work. 110 my $body = $topic->Body(); 111 112 113 # If it's a list or enum topic, add a symbol for each description list entry. 114 115 if ($topic->IsList() || $topic->Type() eq ::TOPIC_ENUMERATION()) 116 { 117 # We'll hijack the enum constants to apply to non-enum behavior too. 118 my $behavior; 119 120 if ($topic->Type() eq ::TOPIC_ENUMERATION()) 121 { 122 $type = ::TOPIC_CONSTANT(); 123 $behavior = $language->EnumValues(); 124 } 125 elsif (NaturalDocs::Topics->TypeInfo($topic->Type())->Scope() == ::SCOPE_ALWAYS_GLOBAL()) 126 { 127 $behavior = ::ENUM_GLOBAL(); 128 } 129 else 130 { 131 $behavior = ::ENUM_UNDER_PARENT(); 132 }; 133 134 while ($body =~ /<ds>([^<]+)<\/ds><dd>(.*?)<\/dd>/g) 135 { 136 my ($listTextSymbol, $listSummary) = ($1, $2); 137 138 $listTextSymbol = NaturalDocs::NDMarkup->RestoreAmpChars($listTextSymbol); 139 my $listSymbol = NaturalDocs::SymbolString->FromText($listTextSymbol); 140 141 if ($behavior == ::ENUM_UNDER_PARENT()) 142 { $listSymbol = NaturalDocs::SymbolString->Join($topic->Package(), $listSymbol); } 143 elsif ($behavior == ::ENUM_UNDER_TYPE()) 144 { $listSymbol = NaturalDocs::SymbolString->Join($topic->Symbol(), $listSymbol); }; 145 146 NaturalDocs::SymbolTable->AddSymbol($listSymbol, $sourceFile, $type, undef, 147 $self->GetSummaryFromDescriptionList($listSummary)); 148 }; 149 }; 150 151 152 # Add references in the topic. 153 154 while ($body =~ /<link target=\"([^\"]*)\" name=\"[^\"]*\" original=\"[^\"]*\">/g) 155 { 156 my $linkText = NaturalDocs::NDMarkup->RestoreAmpChars($1); 157 my $linkSymbol = NaturalDocs::SymbolString->FromText($linkText); 158 159 NaturalDocs::SymbolTable->AddReference(::REFERENCE_TEXT(), $linkSymbol, 160 $topic->Package(), $topic->Using(), $sourceFile); 161 }; 162 163 164 # Add images in the topic. 165 166 while ($body =~ /<img mode=\"[^\"]*\" target=\"([^\"]+)\" original=\"[^\"]*\">/g) 167 { 168 my $target = NaturalDocs::NDMarkup->RestoreAmpChars($1); 169 NaturalDocs::ImageReferenceTable->AddReference($sourceFile, $target); 170 }; 171 }; 172 173 # Handle any changes to the file. 174 NaturalDocs::ClassHierarchy->AnalyzeChanges(); 175 NaturalDocs::SymbolTable->AnalyzeChanges(); 176 NaturalDocs::SourceDB->AnalyzeWatchedFileChanges(); 177 178 # Update project on the file's characteristics. 179 my $hasContent = (scalar @parsedFile > 0); 180 181 NaturalDocs::Project->SetHasContent($sourceFile, $hasContent); 182 if ($hasContent) 183 { NaturalDocs::Project->SetDefaultMenuTitle($sourceFile, $defaultMenuTitle); }; 184 185 # We don't need to keep this around. 186 @parsedFile = ( ); 187 }; 188 189 190# 191# Function: ParseForBuild 192# 193# Parses the input file for building, returning it as a <NaturalDocs::Parser::ParsedTopic> arrayref. 194# 195# Note that all new and changed files should be parsed for symbols via <ParseForInformation()> before calling this function on 196# *any* file. The reason is that <NaturalDocs::SymbolTable> needs to know about all the symbol definitions and references to 197# resolve them properly. 198# 199# Parameters: 200# 201# file - The <FileName> to parse for building. 202# 203# Returns: 204# 205# An arrayref of the source file as <NaturalDocs::Parser::ParsedTopic> objects. 206# 207sub ParseForBuild #(file) 208 { 209 my ($self, $file) = @_; 210 $sourceFile = $file; 211 212 $parsingForInformation = undef; 213 214 $self->Parse(); 215 216 return \@parsedFile; 217 }; 218 219 220 221 222############################################################################### 223# Group: Interface Functions 224 225 226# 227# Function: OnComment 228# 229# The function called by <NaturalDocs::Languages::Base>-derived objects when their parsers encounter a comment 230# suitable for documentation. 231# 232# Parameters: 233# 234# commentLines - An arrayref of the comment's lines. The language's comment symbols should be converted to spaces, 235# and there should be no line break characters at the end of each line. *The original memory will be 236# changed.* 237# lineNumber - The line number of the first of the comment lines. 238# isJavaDoc - Whether the comment is in JavaDoc format. 239# 240# Returns: 241# 242# The number of topics created by this comment, or zero if none. 243# 244sub OnComment #(string[] commentLines, int lineNumber, bool isJavaDoc) 245 { 246 my ($self, $commentLines, $lineNumber, $isJavaDoc) = @_; 247 248 $self->CleanComment($commentLines); 249 250 # We check if it's definitely Natural Docs content first. This overrides all else, since it's possible that a comment could start 251 # with a topic line yet have something that looks like a JavaDoc tag. Natural Docs wins in this case. 252 if (NaturalDocs::Parser::Native->IsMine($commentLines, $isJavaDoc)) 253 { return NaturalDocs::Parser::Native->ParseComment($commentLines, $isJavaDoc, $lineNumber, \@parsedFile); } 254 255 elsif (NaturalDocs::Parser::JavaDoc->IsMine($commentLines, $isJavaDoc)) 256 { return NaturalDocs::Parser::JavaDoc->ParseComment($commentLines, $isJavaDoc, $lineNumber, \@parsedFile); } 257 258 # If the content is ambiguous and it's a JavaDoc-styled comment, treat it as Natural Docs content. 259 elsif ($isJavaDoc) 260 { return NaturalDocs::Parser::Native->ParseComment($commentLines, $isJavaDoc, $lineNumber, \@parsedFile); } 261 }; 262 263 264# 265# Function: OnClass 266# 267# A function called by <NaturalDocs::Languages::Base>-derived objects when their parsers encounter a class declaration. 268# 269# Parameters: 270# 271# class - The <SymbolString> of the class encountered. 272# 273sub OnClass #(class) 274 { 275 my ($self, $class) = @_; 276 277 if ($parsingForInformation) 278 { NaturalDocs::ClassHierarchy->AddClass($sourceFile, $class); }; 279 }; 280 281 282# 283# Function: OnClassParent 284# 285# A function called by <NaturalDocs::Languages::Base>-derived objects when their parsers encounter a declaration of 286# inheritance. 287# 288# Parameters: 289# 290# class - The <SymbolString> of the class we're in. 291# parent - The <SymbolString> of the class it inherits. 292# scope - The package <SymbolString> that the reference appeared in. 293# using - An arrayref of package <SymbolStrings> that the reference has access to via "using" statements. 294# resolvingFlags - Any <Resolving Flags> to be used when resolving the reference. <RESOLVE_NOPLURAL> is added 295# automatically since that would never apply to source code. 296# 297sub OnClassParent #(class, parent, scope, using, resolvingFlags) 298 { 299 my ($self, $class, $parent, $scope, $using, $resolvingFlags) = @_; 300 301 if ($parsingForInformation) 302 { 303 NaturalDocs::ClassHierarchy->AddParentReference($sourceFile, $class, $parent, $scope, $using, 304 $resolvingFlags | ::RESOLVE_NOPLURAL()); 305 }; 306 }; 307 308 309 310############################################################################### 311# Group: Support Functions 312 313 314# Function: Parse 315# 316# Opens the source file and parses process. Most of the actual parsing is done in <NaturalDocs::Languages::Base->ParseFile()> 317# and <OnComment()>, though. 318# 319# *Do not call externally.* Rather, call <ParseForInformation()> or <ParseForBuild()>. 320# 321# Returns: 322# 323# The default menu title of the file. Will be the <FileName> if nothing better is found. 324# 325sub Parse 326 { 327 my ($self) = @_; 328 329 NaturalDocs::Error->OnStartParsing($sourceFile); 330 331 $language = NaturalDocs::Languages->LanguageOf($sourceFile); 332 NaturalDocs::Parser::Native->Start(); 333 @parsedFile = ( ); 334 335 my ($autoTopics, $scopeRecord) = $language->ParseFile($sourceFile, \@parsedFile); 336 337 338 $self->AddToClassHierarchy(); 339 340 $self->BreakLists(); 341 342 if (defined $autoTopics) 343 { 344 if (defined $scopeRecord) 345 { $self->RepairPackages($autoTopics, $scopeRecord); }; 346 347 $self->MergeAutoTopics($language, $autoTopics); 348 }; 349 350 $self->RemoveRemainingHeaderlessTopics(); 351 352 353 # We don't need to do this if there aren't any auto-topics because the only package changes would be implied by the comments. 354 if (defined $autoTopics) 355 { $self->AddPackageDelineators(); }; 356 357 if (!NaturalDocs::Settings->NoAutoGroup()) 358 { $self->MakeAutoGroups($autoTopics); }; 359 360 361 # Set the menu title. 362 363 my $defaultMenuTitle = $sourceFile; 364 365 if (scalar @parsedFile) 366 { 367 my $addFileTitle; 368 369 if (NaturalDocs::Settings->OnlyFileTitles()) 370 { 371 # We still want to use the title from the topics if the first one is a file. 372 if ($parsedFile[0]->Type() eq ::TOPIC_FILE()) 373 { $addFileTitle = 0; } 374 else 375 { $addFileTitle = 1; }; 376 } 377 elsif (scalar @parsedFile == 1 || NaturalDocs::Topics->TypeInfo( $parsedFile[0]->Type() )->PageTitleIfFirst()) 378 { $addFileTitle = 0; } 379 else 380 { $addFileTitle = 1; }; 381 382 if (!$addFileTitle) 383 { 384 $defaultMenuTitle = $parsedFile[0]->Title(); 385 } 386 else 387 { 388 # If the title ended up being the file name, add a leading section for it. 389 390 unshift @parsedFile, 391 NaturalDocs::Parser::ParsedTopic->New(::TOPIC_FILE(), (NaturalDocs::File->SplitPath($sourceFile))[2], 392 undef, undef, undef, undef, undef, 1, undef); 393 }; 394 }; 395 396 NaturalDocs::Error->OnEndParsing($sourceFile); 397 398 return $defaultMenuTitle; 399 }; 400 401 402# 403# Function: CleanComment 404# 405# Removes any extraneous formatting and whitespace from the comment. Eliminates comment boxes, horizontal lines, trailing 406# whitespace from lines, and expands all tab characters. It keeps leading whitespace, though, since it may be needed for 407# example code, and blank lines, since the original line numbers are needed. 408# 409# Parameters: 410# 411# commentLines - An arrayref of the comment lines to clean. *The original memory will be changed.* Lines should have the 412# language's comment symbols replaced by spaces and not have a trailing line break. 413# 414sub CleanComment #(commentLines) 415 { 416 my ($self, $commentLines) = @_; 417 418 use constant DONT_KNOW => 0; 419 use constant IS_UNIFORM => 1; 420 use constant IS_UNIFORM_IF_AT_END => 2; 421 use constant IS_NOT_UNIFORM => 3; 422 423 my $leftSide = DONT_KNOW; 424 my $rightSide = DONT_KNOW; 425 my $leftSideChar; 426 my $rightSideChar; 427 428 my $index = 0; 429 my $tabLength = NaturalDocs::Settings->TabLength(); 430 431 while ($index < scalar @$commentLines) 432 { 433 # Strip trailing whitespace from the original. 434 435 $commentLines->[$index] =~ s/[ \t]+$//; 436 437 438 # Expand tabs in the original. This method is almost six times faster than Text::Tabs' method. 439 440 my $tabIndex = index($commentLines->[$index], "\t"); 441 442 while ($tabIndex != -1) 443 { 444 substr( $commentLines->[$index], $tabIndex, 1, ' ' x ($tabLength - ($tabIndex % $tabLength)) ); 445 $tabIndex = index($commentLines->[$index], "\t", $tabIndex); 446 }; 447 448 449 # Make a working copy and strip leading whitespace as well. This has to be done after tabs are expanded because 450 # stripping indentation could change how far tabs are expanded. 451 452 my $line = $commentLines->[$index]; 453 $line =~ s/^ +//; 454 455 # If the line is blank... 456 if (!length $line) 457 { 458 # If we have a potential vertical line, this only acceptable if it's at the end of the comment. 459 if ($leftSide == IS_UNIFORM) 460 { $leftSide = IS_UNIFORM_IF_AT_END; }; 461 if ($rightSide == IS_UNIFORM) 462 { $rightSide = IS_UNIFORM_IF_AT_END; }; 463 } 464 465 # If there's at least four symbols in a row, it's a horizontal line. The second regex supports differing edge characters. It 466 # doesn't matter if any of this matches the left and right side symbols. The length < 256 is a sanity check, because that 467 # regexp has caused the perl regexp engine to choke on an insane line someone sent me from an automatically generated 468 # file. It had over 10k characters on the first line, and most of them were 0x00. 469 elsif ($line =~ /^([^a-zA-Z0-9 ])\1{3,}$/ || 470 (length $line < 256 && $line =~ /^([^a-zA-Z0-9 ])\1*([^a-zA-Z0-9 ])\2{3,}([^a-zA-Z0-9 ])\3*$/) ) 471 { 472 # Ignore it. This has no effect on the vertical line detection. We want to keep it in the output though in case it was 473 # in a code section. 474 } 475 476 # If the line is not blank or a horizontal line... 477 else 478 { 479 # More content means any previous blank lines are no longer tolerated in vertical line detection. They are only 480 # acceptable at the end of the comment. 481 482 if ($leftSide == IS_UNIFORM_IF_AT_END) 483 { $leftSide = IS_NOT_UNIFORM; }; 484 if ($rightSide == IS_UNIFORM_IF_AT_END) 485 { $rightSide = IS_NOT_UNIFORM; }; 486 487 488 # Detect vertical lines. Lines are only lines if they are followed by whitespace or a connected horizontal line. 489 # Otherwise we may accidentally detect lines from short comments that just happen to have every first or last 490 # character the same. 491 492 if ($leftSide != IS_NOT_UNIFORM) 493 { 494 if ($line =~ /^([^a-zA-Z0-9])\1*(?: |$)/) 495 { 496 if ($leftSide == DONT_KNOW) 497 { 498 $leftSide = IS_UNIFORM; 499 $leftSideChar = $1; 500 } 501 else # ($leftSide == IS_UNIFORM) Other choices already ruled out. 502 { 503 if ($leftSideChar ne $1) 504 { $leftSide = IS_NOT_UNIFORM; }; 505 }; 506 } 507 # We'll tolerate the lack of symbols on the left on the first line, because it may be a 508 # /* Function: Whatever 509 # * Description. 510 # */ 511 # comment which would have the leading /* blanked out. 512 elsif ($index != 0) 513 { 514 $leftSide = IS_NOT_UNIFORM; 515 }; 516 }; 517 518 if ($rightSide != IS_NOT_UNIFORM) 519 { 520 if ($line =~ / ([^a-zA-Z0-9])\1*$/) 521 { 522 if ($rightSide == DONT_KNOW) 523 { 524 $rightSide = IS_UNIFORM; 525 $rightSideChar = $1; 526 } 527 else # ($rightSide == IS_UNIFORM) Other choices already ruled out. 528 { 529 if ($rightSideChar ne $1) 530 { $rightSide = IS_NOT_UNIFORM; }; 531 }; 532 } 533 else 534 { 535 $rightSide = IS_NOT_UNIFORM; 536 }; 537 }; 538 539 # We'll remove vertical lines later if they're uniform throughout the entire comment. 540 }; 541 542 $index++; 543 }; 544 545 546 if ($leftSide == IS_UNIFORM_IF_AT_END) 547 { $leftSide = IS_UNIFORM; }; 548 if ($rightSide == IS_UNIFORM_IF_AT_END) 549 { $rightSide = IS_UNIFORM; }; 550 551 552 $index = 0; 553 my $inCodeSection = 0; 554 555 while ($index < scalar @$commentLines) 556 { 557 # Clear horizontal lines only if we're not in a code section. 558 if ($commentLines->[$index] =~ /^ *([^a-zA-Z0-9 ])\1{3,}$/ || 559 ( length $commentLines->[$index] < 256 && 560 $commentLines->[$index] =~ /^ *([^a-zA-Z0-9 ])\1*([^a-zA-Z0-9 ])\2{3,}([^a-zA-Z0-9 ])\3*$/ ) ) 561 { 562 if (!$inCodeSection) 563 { $commentLines->[$index] = ''; } 564 } 565 566 else 567 { 568 # Clear vertical lines. 569 570 if ($leftSide == IS_UNIFORM) 571 { 572 # This works because every line should either start this way, be blank, or be the first line that doesn't start with a 573 # symbol. 574 $commentLines->[$index] =~ s/^ *([^a-zA-Z0-9 ])\1*//; 575 }; 576 577 if ($rightSide == IS_UNIFORM) 578 { 579 $commentLines->[$index] =~ s/ *([^a-zA-Z0-9 ])\1*$//; 580 }; 581 582 583 # Clear horizontal lines again if there were vertical lines. This catches lines that were separated from the verticals by 584 # whitespace. 585 586 if (($leftSide == IS_UNIFORM || $rightSide == IS_UNIFORM) && !$inCodeSection) 587 { 588 $commentLines->[$index] =~ s/^ *([^a-zA-Z0-9 ])\1{3,}$//; 589 $commentLines->[$index] =~ s/^ *([^a-zA-Z0-9 ])\1*([^a-zA-Z0-9 ])\2{3,}([^a-zA-Z0-9 ])\3*$//; 590 }; 591 592 593 # Check for the start and end of code sections. Note that this doesn't affect vertical line removal. 594 595 if (!$inCodeSection && 596 $commentLines->[$index] =~ /^ *\( *(?:(?:start|begin)? +)?(?:table|code|example|diagram) *\)$/i ) 597 { 598 $inCodeSection = 1; 599 } 600 elsif ($inCodeSection && 601 $commentLines->[$index] =~ /^ *\( *(?:end|finish|done)(?: +(?:table|code|example|diagram))? *\)$/i) 602 { 603 $inCodeSection = 0; 604 } 605 } 606 607 608 $index++; 609 }; 610 611 }; 612 613 614 615############################################################################### 616# Group: Processing Functions 617 618 619# 620# Function: RepairPackages 621# 622# Recalculates the packages for all comment topics using the auto-topics and the scope record. Call this *before* calling 623# <MergeAutoTopics()>. 624# 625# Parameters: 626# 627# autoTopics - A reference to the list of automatically generated <NaturalDocs::Parser::ParsedTopics>. 628# scopeRecord - A reference to an array of <NaturalDocs::Languages::Advanced::ScopeChanges>. 629# 630sub RepairPackages #(autoTopics, scopeRecord) 631 { 632 my ($self, $autoTopics, $scopeRecord) = @_; 633 634 my $topicIndex = 0; 635 my $autoTopicIndex = 0; 636 my $scopeIndex = 0; 637 638 my $topic = $parsedFile[0]; 639 my $autoTopic = $autoTopics->[0]; 640 my $scopeChange = $scopeRecord->[0]; 641 642 my $currentPackage; 643 my $inFakePackage; 644 645 while (defined $topic) 646 { 647 # First update the scope via the record if its defined and has the lowest line number. 648 if (defined $scopeChange && 649 $scopeChange->LineNumber() <= $topic->LineNumber() && 650 (!defined $autoTopic || $scopeChange->LineNumber() <= $autoTopic->LineNumber()) ) 651 { 652 $currentPackage = $scopeChange->Scope(); 653 $scopeIndex++; 654 $scopeChange = $scopeRecord->[$scopeIndex]; # Will be undef when past end. 655 $inFakePackage = undef; 656 } 657 658 # Next try to end a fake scope with an auto topic if its defined and has the lowest line number. 659 elsif (defined $autoTopic && 660 $autoTopic->LineNumber() <= $topic->LineNumber()) 661 { 662 if ($inFakePackage) 663 { 664 $currentPackage = $autoTopic->Package(); 665 $inFakePackage = undef; 666 }; 667 668 $autoTopicIndex++; 669 $autoTopic = $autoTopics->[$autoTopicIndex]; # Will be undef when past end. 670 } 671 672 673 # Finally try to handle the topic, since it has the lowest line number. Check for Type() because headerless topics won't have 674 # one. 675 else 676 { 677 my $scope; 678 if ($topic->Type()) 679 { $scope = NaturalDocs::Topics->TypeInfo($topic->Type())->Scope(); } 680 else 681 { $scope = ::SCOPE_NORMAL(); }; 682 683 if ($scope == ::SCOPE_START() || $scope == ::SCOPE_END()) 684 { 685 # They should already have the correct class and scope. 686 $currentPackage = $topic->Package(); 687 $inFakePackage = 1; 688 } 689 else 690 { 691 # Fix the package of everything else. 692 693 # Note that the first function or variable topic to appear in a fake package will assume that package even if it turns out 694 # to be incorrect in the actual code, since the topic will come before the auto-topic. This will be corrected in 695 # MergeAutoTopics(). 696 697 $topic->SetPackage($currentPackage); 698 }; 699 700 $topicIndex++; 701 $topic = $parsedFile[$topicIndex]; # Will be undef when past end. 702 }; 703 }; 704 705 }; 706 707 708# 709# Function: MergeAutoTopics 710# 711# Merges the automatically generated topics into the file. If an auto-topic matches an existing topic, it will have it's prototype 712# and package transferred. If it doesn't, the auto-topic will be inserted into the list unless 713# <NaturalDocs::Settings->DocumentedOnly()> is set. If an existing topic doesn't have a title, it's assumed to be a headerless 714# comment and will be merged with the next auto-topic or discarded. 715# 716# Parameters: 717# 718# language - The <NaturalDocs::Languages::Base>-derived class for the file. 719# autoTopics - A reference to the list of automatically generated topics. 720# 721sub MergeAutoTopics #(language, autoTopics) 722 { 723 my ($self, $language, $autoTopics) = @_; 724 725 my $topicIndex = 0; 726 my $autoTopicIndex = 0; 727 728 # Keys are topic types, values are existence hashrefs of titles. 729 my %topicsInLists; 730 731 while ($topicIndex < scalar @parsedFile && $autoTopicIndex < scalar @$autoTopics) 732 { 733 my $topic = $parsedFile[$topicIndex]; 734 my $autoTopic = $autoTopics->[$autoTopicIndex]; 735 736 my $cleanTitle = $topic->Title(); 737 $cleanTitle =~ s/[\t ]*\([^\(]*$//; 738 739 # Add the auto-topic if it's higher in the file than the current topic. 740 if ($autoTopic->LineNumber() < $topic->LineNumber()) 741 { 742 if (exists $topicsInLists{$autoTopic->Type()} && 743 exists $topicsInLists{$autoTopic->Type()}->{$autoTopic->Title()}) 744 { 745 # Remove it from the list so a second one with the same name will be added. 746 delete $topicsInLists{$autoTopic->Type()}->{$autoTopic->Title()}; 747 } 748 elsif (!NaturalDocs::Settings->DocumentedOnly()) 749 { 750 splice(@parsedFile, $topicIndex, 0, $autoTopic); 751 $topicIndex++; 752 }; 753 754 $autoTopicIndex++; 755 } 756 757 # Remove a headerless topic if there's another topic between it and the next auto-topic. 758 elsif (!$topic->Title() && $topicIndex + 1 < scalar @parsedFile && 759 $parsedFile[$topicIndex+1]->LineNumber() < $autoTopic->LineNumber()) 760 { 761 splice(@parsedFile, $topicIndex, 1); 762 } 763 764 # Transfer information if we have a match or a headerless topic. 765 elsif ( !$topic->Title() || 766 $topic->Symbol() eq $autoTopic->Symbol() || 767 ( $topic->Type() == $autoTopic->Type() && 768 ( index($autoTopic->Title(), $cleanTitle) != -1 || index($cleanTitle, $autoTopic->Title()) != -1 ) ) ) 769 { 770 $topic->SetType($autoTopic->Type()); 771 $topic->SetPrototype($autoTopic->Prototype()); 772 $topic->SetUsing($autoTopic->Using()); 773 774 if (!$topic->Title()) 775 { $topic->SetTitle($autoTopic->Title()); }; 776 777 if (NaturalDocs::Topics->TypeInfo($topic->Type())->Scope() != ::SCOPE_START()) 778 { $topic->SetPackage($autoTopic->Package()); } 779 elsif ($autoTopic->Package() ne $topic->Package()) 780 { 781 my @autoPackageIdentifiers = NaturalDocs::SymbolString->IdentifiersOf($autoTopic->Package()); 782 my @packageIdentifiers = NaturalDocs::SymbolString->IdentifiersOf($topic->Package()); 783 784 while (scalar @autoPackageIdentifiers && $autoPackageIdentifiers[-1] eq $packageIdentifiers[-1]) 785 { 786 pop @autoPackageIdentifiers; 787 pop @packageIdentifiers; 788 }; 789 790 if (scalar @autoPackageIdentifiers) 791 { $topic->SetPackage( NaturalDocs::SymbolString->Join(@autoPackageIdentifiers) ); }; 792 }; 793 794 $topicIndex++; 795 $autoTopicIndex++; 796 } 797 798 # Extract topics in lists. 799 elsif ($topic->IsList()) 800 { 801 if (!exists $topicsInLists{$topic->Type()}) 802 { $topicsInLists{$topic->Type()} = { }; }; 803 804 my $body = $topic->Body(); 805 806 while ($body =~ /<ds>([^<]+)<\/ds>/g) 807 { $topicsInLists{$topic->Type()}->{NaturalDocs::NDMarkup->RestoreAmpChars($1)} = 1; }; 808 809 $topicIndex++; 810 } 811 812 # Otherwise there's no match. Skip the topic. The auto-topic will be added later. 813 else 814 { 815 $topicIndex++; 816 } 817 }; 818 819 # Add any auto-topics remaining. 820 if (!NaturalDocs::Settings->DocumentedOnly()) 821 { 822 while ($autoTopicIndex < scalar @$autoTopics) 823 { 824 my $autoTopic = $autoTopics->[$autoTopicIndex]; 825 826 if (exists $topicsInLists{$autoTopic->Type()} && 827 exists $topicsInLists{$autoTopic->Type()}->{$autoTopic->Title()}) 828 { 829 # Remove it from the list so a second one with the same name will be added. 830 delete $topicsInLists{$autoTopic->Type()}->{$autoTopic->Title()}; 831 } 832 else 833 { 834 push(@parsedFile, $autoTopic); 835 }; 836 837 $autoTopicIndex++; 838 }; 839 }; 840 }; 841 842 843# 844# Function: RemoveRemainingHeaderlessTopics 845# 846# After <MergeAutoTopics()> is done, this function removes any remaining headerless topics from the file. If they don't merge 847# into anything, they're not valid topics. 848# 849sub RemoveRemainingHeaderlessTopics 850 { 851 my ($self) = @_; 852 853 my $index = 0; 854 while ($index < scalar @parsedFile) 855 { 856 if ($parsedFile[$index]->Title()) 857 { $index++; } 858 else 859 { splice(@parsedFile, $index, 1); }; 860 }; 861 }; 862 863 864# 865# Function: MakeAutoGroups 866# 867# Creates group topics for files that do not have them. 868# 869sub MakeAutoGroups 870 { 871 my ($self) = @_; 872 873 # No groups only one topic. 874 if (scalar @parsedFile < 2) 875 { return; }; 876 877 my $index = 0; 878 my $startStretch = 0; 879 880 # Skip the first entry if its the page title. 881 if (NaturalDocs::Topics->TypeInfo( $parsedFile[0]->Type() )->PageTitleIfFirst()) 882 { 883 $index = 1; 884 $startStretch = 1; 885 }; 886 887 # Make auto-groups for each stretch between scope-altering topics. 888 while ($index < scalar @parsedFile) 889 { 890 my $scope = NaturalDocs::Topics->TypeInfo($parsedFile[$index]->Type())->Scope(); 891 892 if ($scope == ::SCOPE_START() || $scope == ::SCOPE_END()) 893 { 894 if ($index > $startStretch) 895 { $index += $self->MakeAutoGroupsFor($startStretch, $index); }; 896 897 $startStretch = $index + 1; 898 }; 899 900 $index++; 901 }; 902 903 if ($index > $startStretch) 904 { $self->MakeAutoGroupsFor($startStretch, $index); }; 905 }; 906 907 908# 909# Function: MakeAutoGroupsFor 910# 911# Creates group topics for sections of files that do not have them. A support function for <MakeAutoGroups()>. 912# 913# Parameters: 914# 915# startIndex - The index to start at. 916# endIndex - The index to end at. Not inclusive. 917# 918# Returns: 919# 920# The number of group topics added. 921# 922sub MakeAutoGroupsFor #(startIndex, endIndex) 923 { 924 my ($self, $startIndex, $endIndex) = @_; 925 926 # No groups if any are defined already. 927 for (my $i = $startIndex; $i < $endIndex; $i++) 928 { 929 if ($parsedFile[$i]->Type() eq ::TOPIC_GROUP()) 930 { return 0; }; 931 }; 932 933 934 use constant COUNT => 0; 935 use constant TYPE => 1; 936 use constant SECOND_TYPE => 2; 937 use constant SIZE => 3; 938 939 # This is an array of ( count, type, secondType ) triples. Count and Type will always be filled in; count is the number of 940 # consecutive topics of type. On the second pass, if small groups are combined secondType will be filled in. There will not be 941 # more than two types per group. 942 my @groups; 943 my $groupIndex = 0; 944 945 946 # First pass: Determine all the groups. 947 948 my $i = $startIndex; 949 my $currentType; 950 951 while ($i < $endIndex) 952 { 953 if (!defined $currentType || ($parsedFile[$i]->Type() ne $currentType && $parsedFile[$i]->Type() ne ::TOPIC_GENERIC()) ) 954 { 955 if (defined $currentType) 956 { $groupIndex += SIZE; }; 957 958 $currentType = $parsedFile[$i]->Type(); 959 960 $groups[$groupIndex + COUNT] = 1; 961 $groups[$groupIndex + TYPE] = $currentType; 962 } 963 else 964 { $groups[$groupIndex + COUNT]++; }; 965 966 $i++; 967 }; 968 969 970 # Second pass: Combine groups based on "noise". Noise means types go from A to B to A at least once, and there are at least 971 # two groups in a row with three or less, and at least one of those groups is two or less. So 3, 3, 3 doesn't count as noise, but 972 # 3, 2, 3 does. 973 974 $groupIndex = 0; 975 976 # While there are at least three groups left... 977 while ($groupIndex < scalar @groups - (2 * SIZE)) 978 { 979 # If the group two places in front of this one has the same type... 980 if ($groups[$groupIndex + (2 * SIZE) + TYPE] eq $groups[$groupIndex + TYPE]) 981 { 982 # It means we went from A to B to A, which partially qualifies as noise. 983 984 my $firstType = $groups[$groupIndex + TYPE]; 985 my $secondType = $groups[$groupIndex + SIZE + TYPE]; 986 987 if (NaturalDocs::Topics->TypeInfo($firstType)->CanGroupWith($secondType) || 988 NaturalDocs::Topics->TypeInfo($secondType)->CanGroupWith($firstType)) 989 { 990 my $hasNoise; 991 992 my $hasThrees; 993 my $hasTwosOrOnes; 994 995 my $endIndex = $groupIndex; 996 997 while ($endIndex < scalar @groups && 998 ($groups[$endIndex + TYPE] eq $firstType || $groups[$endIndex + TYPE] eq $secondType)) 999 { 1000 if ($groups[$endIndex + COUNT] > 3) 1001 { 1002 # They must be consecutive to count. 1003 $hasThrees = 0; 1004 $hasTwosOrOnes = 0; 1005 } 1006 elsif ($groups[$endIndex + COUNT] == 3) 1007 { 1008 $hasThrees = 1; 1009 1010 if ($hasTwosOrOnes) 1011 { $hasNoise = 1; }; 1012 } 1013 else # < 3 1014 { 1015 if ($hasThrees || $hasTwosOrOnes) 1016 { $hasNoise = 1; }; 1017 1018 $hasTwosOrOnes = 1; 1019 }; 1020 1021 $endIndex += SIZE; 1022 }; 1023 1024 if (!$hasNoise) 1025 { 1026 $groupIndex = $endIndex - SIZE; 1027 } 1028 else # hasNoise 1029 { 1030 $groups[$groupIndex + SECOND_TYPE] = $secondType; 1031 1032 for (my $noiseIndex = $groupIndex + SIZE; $noiseIndex < $endIndex; $noiseIndex += SIZE) 1033 { 1034 $groups[$groupIndex + COUNT] += $groups[$noiseIndex + COUNT]; 1035 }; 1036 1037 splice(@groups, $groupIndex + SIZE, $endIndex - $groupIndex - SIZE); 1038 1039 $groupIndex += SIZE; 1040 }; 1041 } 1042 1043 else # They can't group together 1044 { 1045 $groupIndex += SIZE; 1046 }; 1047 } 1048 1049 else 1050 { $groupIndex += SIZE; }; 1051 }; 1052 1053 1054 # Finally, create group topics for the parsed file. 1055 1056 $groupIndex = 0; 1057 $i = $startIndex; 1058 1059 while ($groupIndex < scalar @groups) 1060 { 1061 if ($groups[$groupIndex + TYPE] ne ::TOPIC_GENERIC()) 1062 { 1063 my $topic = $parsedFile[$i]; 1064 my $title = NaturalDocs::Topics->NameOfType($groups[$groupIndex + TYPE], 1); 1065 1066 if (defined $groups[$groupIndex + SECOND_TYPE]) 1067 { $title .= ' and ' . NaturalDocs::Topics->NameOfType($groups[$groupIndex + SECOND_TYPE], 1); }; 1068 1069 splice(@parsedFile, $i, 0, NaturalDocs::Parser::ParsedTopic->New(::TOPIC_GROUP(), 1070 $title, 1071 $topic->Package(), $topic->Using(), 1072 undef, undef, undef, 1073 $topic->LineNumber()) ); 1074 $i++; 1075 }; 1076 1077 $i += $groups[$groupIndex + COUNT]; 1078 $groupIndex += SIZE; 1079 }; 1080 1081 return (scalar @groups / SIZE); 1082 }; 1083 1084 1085# 1086# Function: AddToClassHierarchy 1087# 1088# Adds any class topics to the class hierarchy, since they may not have been called with <OnClass()> if they didn't match up to 1089# an auto-topic. 1090# 1091sub AddToClassHierarchy 1092 { 1093 my ($self) = @_; 1094 1095 foreach my $topic (@parsedFile) 1096 { 1097 if ($topic->Type() && NaturalDocs::Topics->TypeInfo( $topic->Type() )->ClassHierarchy()) 1098 { 1099 if ($topic->IsList()) 1100 { 1101 my $body = $topic->Body(); 1102 1103 while ($body =~ /<ds>([^<]+)<\/ds>/g) 1104 { 1105 $self->OnClass( NaturalDocs::SymbolString->FromText( NaturalDocs::NDMarkup->RestoreAmpChars($1) ) ); 1106 }; 1107 } 1108 else 1109 { 1110 $self->OnClass($topic->Package()); 1111 }; 1112 }; 1113 }; 1114 }; 1115 1116 1117# 1118# Function: AddPackageDelineators 1119# 1120# Adds section and class topics to make sure the package is correctly represented in the documentation. Should be called last in 1121# this process. 1122# 1123sub AddPackageDelineators 1124 { 1125 my ($self) = @_; 1126 1127 my $index = 0; 1128 my $currentPackage; 1129 1130 # Values are the arrayref [ title, type ]; 1131 my %usedPackages; 1132 1133 while ($index < scalar @parsedFile) 1134 { 1135 my $topic = $parsedFile[$index]; 1136 1137 if ($topic->Package() ne $currentPackage) 1138 { 1139 $currentPackage = $topic->Package(); 1140 my $scopeType = NaturalDocs::Topics->TypeInfo($topic->Type())->Scope(); 1141 1142 if ($scopeType == ::SCOPE_START()) 1143 { 1144 $usedPackages{$currentPackage} = [ $topic->Title(), $topic->Type() ]; 1145 } 1146 elsif ($scopeType == ::SCOPE_END()) 1147 { 1148 my $newTopic; 1149 1150 if (!defined $currentPackage) 1151 { 1152 $newTopic = NaturalDocs::Parser::ParsedTopic->New(::TOPIC_SECTION(), 'Global', 1153 undef, undef, 1154 undef, undef, undef, 1155 $topic->LineNumber(), undef); 1156 } 1157 else 1158 { 1159 my ($title, $body, $summary, $type); 1160 my @packageIdentifiers = NaturalDocs::SymbolString->IdentifiersOf($currentPackage); 1161 1162 if (exists $usedPackages{$currentPackage}) 1163 { 1164 $title = $usedPackages{$currentPackage}->[0]; 1165 $type = $usedPackages{$currentPackage}->[1]; 1166 $body = '<p>(continued)</p>'; 1167 $summary = '(continued)'; 1168 } 1169 else 1170 { 1171 $title = join($language->PackageSeparator(), @packageIdentifiers); 1172 $type = ::TOPIC_CLASS(); 1173 1174 # Body and summary stay undef. 1175 1176 $usedPackages{$currentPackage} = $title; 1177 }; 1178 1179 my @titleIdentifiers = NaturalDocs::SymbolString->IdentifiersOf( NaturalDocs::SymbolString->FromText($title) ); 1180 for (my $i = 0; $i < scalar @titleIdentifiers; $i++) 1181 { pop @packageIdentifiers; }; 1182 1183 $newTopic = NaturalDocs::Parser::ParsedTopic->New($type, $title, 1184 NaturalDocs::SymbolString->Join(@packageIdentifiers), undef, 1185 undef, $summary, $body, 1186 $topic->LineNumber(), undef); 1187 } 1188 1189 splice(@parsedFile, $index, 0, $newTopic); 1190 $index++; 1191 } 1192 }; 1193 1194 $index++; 1195 }; 1196 }; 1197 1198 1199# 1200# Function: BreakLists 1201# 1202# Breaks list topics into individual topics. 1203# 1204sub BreakLists 1205 { 1206 my $self = shift; 1207 1208 my $index = 0; 1209 1210 while ($index < scalar @parsedFile) 1211 { 1212 my $topic = $parsedFile[$index]; 1213 1214 if ($topic->IsList() && NaturalDocs::Topics->TypeInfo( $topic->Type() )->BreakLists()) 1215 { 1216 my $body = $topic->Body(); 1217 1218 my @newTopics; 1219 my $newBody; 1220 1221 my $bodyIndex = 0; 1222 1223 for (;;) 1224 { 1225 my $startList = index($body, '<dl>', $bodyIndex); 1226 1227 if ($startList == -1) 1228 { last; }; 1229 1230 $newBody .= substr($body, $bodyIndex, $startList - $bodyIndex); 1231 1232 my $endList = index($body, '</dl>', $startList); 1233 my $listBody = substr($body, $startList, $endList - $startList); 1234 1235 while ($listBody =~ /<ds>([^<]+)<\/ds><dd>(.*?)<\/dd>/g) 1236 { 1237 my ($symbol, $description) = ($1, $2); 1238 1239 push @newTopics, NaturalDocs::Parser::ParsedTopic->New( $topic->Type(), $symbol, $topic->Package(), 1240 $topic->Using(), undef, 1241 $self->GetSummaryFromDescriptionList($description), 1242 '<p>' . $description . '</p>', $topic->LineNumber(), 1243 undef ); 1244 }; 1245 1246 $bodyIndex = $endList + 5; 1247 }; 1248 1249 $newBody .= substr($body, $bodyIndex); 1250 1251 # Remove trailing headings. 1252 $newBody =~ s/(?:<h>[^<]+<\/h>)+$//; 1253 1254 # Remove empty headings. 1255 $newBody =~ s/(?:<h>[^<]+<\/h>)+(<h>[^<]+<\/h>)/$1/g; 1256 1257 if ($newBody) 1258 { 1259 unshift @newTopics, NaturalDocs::Parser::ParsedTopic->New( ::TOPIC_GROUP(), $topic->Title(), $topic->Package(), 1260 $topic->Using(), undef, 1261 $self->GetSummaryFromBody($newBody), $newBody, 1262 $topic->LineNumber(), undef ); 1263 }; 1264 1265 splice(@parsedFile, $index, 1, @newTopics); 1266 1267 $index += scalar @newTopics; 1268 } 1269 1270 else # not a list 1271 { $index++; }; 1272 }; 1273 }; 1274 1275 1276# 1277# Function: GetSummaryFromBody 1278# 1279# Returns the summary text from the topic body. 1280# 1281# Parameters: 1282# 1283# body - The complete topic body, in <NDMarkup>. 1284# 1285# Returns: 1286# 1287# The topic summary, or undef if none. 1288# 1289sub GetSummaryFromBody #(body) 1290 { 1291 my ($self, $body) = @_; 1292 1293 my $summary; 1294 1295 # Extract the first sentence from the leading paragraph, if any. We'll tolerate a single header beforehand, but nothing else. 1296 1297 if ($body =~ /^(?:<h>[^<]*<\/h>)?<p>(.*?)(<\/p>|[\.\!\?](?:[\)\}\'\ ]|"|>))/x) 1298 { 1299 $summary = $1; 1300 1301 if ($2 ne '</p>') 1302 { $summary .= $2; }; 1303 }; 1304 1305 return $summary; 1306 }; 1307 1308 1309# 1310# Function: GetSummaryFromDescriptionList 1311# 1312# Returns the summary text from a description list entry. 1313# 1314# Parameters: 1315# 1316# description - The description in <NDMarkup>. Should be the content between the <dd></dd> tags only. 1317# 1318# Returns: 1319# 1320# The description summary, or undef if none. 1321# 1322sub GetSummaryFromDescriptionList #(description) 1323 { 1324 my ($self, $description) = @_; 1325 1326 my $summary; 1327 1328 if ($description =~ /^(.*?)($|[\.\!\?](?:[\)\}\'\ ]|"|>))/) 1329 { $summary = $1 . $2; }; 1330 1331 return $summary; 1332 }; 1333 1334 13351; 1336