1###############################################################################
2#
3#   Package: NaturalDocs::Parser
4#
5###############################################################################
6#
7#   A package that coordinates source file parsing between the <NaturalDocs::Languages::Base>-derived objects and its own
8#   sub-packages such as <NaturalDocs::Parser::Native>.  Also handles sending symbols to <NaturalDocs::SymbolTable> and
9#   other generic topic processing.
10#
11#   Usage and Dependencies:
12#
13#       - Prior to use, <NaturalDocs::Settings>, <NaturalDocs::Languages>, <NaturalDocs::Project>, <NaturalDocs::SymbolTable>,
14#         and <NaturalDocs::ClassHierarchy> must be initialized.  <NaturalDocs::SymbolTable> and <NaturalDocs::ClassHierarchy>
15#         do not have to be fully resolved.
16#
17#       - Aside from that, the package is ready to use right away.  It does not have its own initialization function.
18#
19###############################################################################
20
21# This file is part of Natural Docs, which is Copyright � 2003-2010 Greg Valure
22# Natural Docs is licensed under version 3 of the GNU Affero General Public License (AGPL)
23# Refer to License.txt for the complete details
24
25use NaturalDocs::Parser::ParsedTopic;
26use NaturalDocs::Parser::Native;
27use NaturalDocs::Parser::JavaDoc;
28
29use strict;
30use integer;
31
32package NaturalDocs::Parser;
33
34
35
36###############################################################################
37# Group: Variables
38
39
40#
41#   var: sourceFile
42#
43#   The source <FileName> currently being parsed.
44#
45my $sourceFile;
46
47#
48#   var: language
49#
50#   The language object for the file, derived from <NaturalDocs::Languages::Base>.
51#
52my $language;
53
54#
55#   Array: parsedFile
56#
57#   An array of <NaturalDocs::Parser::ParsedTopic> objects.
58#
59my @parsedFile;
60
61
62#
63#   bool: parsingForInformation
64#   Whether <ParseForInformation()> was called.  If false, then <ParseForBuild()> was called.
65#
66my $parsingForInformation;
67
68
69
70###############################################################################
71# Group: Functions
72
73#
74#   Function: ParseForInformation
75#
76#   Parses the input file for information.  Will update the information about the file in <NaturalDocs::SymbolTable> and
77#   <NaturalDocs::Project>.
78#
79#   Parameters:
80#
81#       file - The <FileName> to parse.
82#
83sub ParseForInformation #(file)
84    {
85    my ($self, $file) = @_;
86    $sourceFile = $file;
87
88    $parsingForInformation = 1;
89
90    # Watch this parse so we detect any changes.
91    NaturalDocs::SymbolTable->WatchFileForChanges($sourceFile);
92    NaturalDocs::ClassHierarchy->WatchFileForChanges($sourceFile);
93    NaturalDocs::SourceDB->WatchFileForChanges($sourceFile);
94
95    my $defaultMenuTitle = $self->Parse();
96
97    foreach my $topic (@parsedFile)
98        {
99        # Add a symbol for the topic.
100
101        my $type = $topic->Type();
102        if ($type eq ::TOPIC_ENUMERATION())
103            {  $type = ::TOPIC_TYPE();  };
104
105        NaturalDocs::SymbolTable->AddSymbol($topic->Symbol(), $sourceFile, $type,
106                                                                   $topic->Prototype(), $topic->Summary());
107
108
109        # You can't put the function call directly in a while with a regex.  It has to sit in a variable to work.
110        my $body = $topic->Body();
111
112
113        # If it's a list or enum topic, add a symbol for each description list entry.
114
115        if ($topic->IsList() || $topic->Type() eq ::TOPIC_ENUMERATION())
116            {
117            # We'll hijack the enum constants to apply to non-enum behavior too.
118            my $behavior;
119
120            if ($topic->Type() eq ::TOPIC_ENUMERATION())
121                {
122                $type = ::TOPIC_CONSTANT();
123                $behavior = $language->EnumValues();
124                }
125            elsif (NaturalDocs::Topics->TypeInfo($topic->Type())->Scope() == ::SCOPE_ALWAYS_GLOBAL())
126                {
127                $behavior = ::ENUM_GLOBAL();
128                }
129            else
130                {
131                $behavior = ::ENUM_UNDER_PARENT();
132                };
133
134            while ($body =~ /<ds>([^<]+)<\/ds><dd>(.*?)<\/dd>/g)
135                {
136                my ($listTextSymbol, $listSummary) = ($1, $2);
137
138                $listTextSymbol = NaturalDocs::NDMarkup->RestoreAmpChars($listTextSymbol);
139                my $listSymbol = NaturalDocs::SymbolString->FromText($listTextSymbol);
140
141                if ($behavior == ::ENUM_UNDER_PARENT())
142                    {  $listSymbol = NaturalDocs::SymbolString->Join($topic->Package(), $listSymbol);  }
143                elsif ($behavior == ::ENUM_UNDER_TYPE())
144                    {  $listSymbol = NaturalDocs::SymbolString->Join($topic->Symbol(), $listSymbol);  };
145
146                NaturalDocs::SymbolTable->AddSymbol($listSymbol, $sourceFile, $type, undef,
147                                                                           $self->GetSummaryFromDescriptionList($listSummary));
148                };
149            };
150
151
152        # Add references in the topic.
153
154        while ($body =~ /<link target=\"([^\"]*)\" name=\"[^\"]*\" original=\"[^\"]*\">/g)
155            {
156            my $linkText = NaturalDocs::NDMarkup->RestoreAmpChars($1);
157            my $linkSymbol = NaturalDocs::SymbolString->FromText($linkText);
158
159            NaturalDocs::SymbolTable->AddReference(::REFERENCE_TEXT(), $linkSymbol,
160                                                                           $topic->Package(), $topic->Using(), $sourceFile);
161            };
162
163
164        # Add images in the topic.
165
166        while ($body =~ /<img mode=\"[^\"]*\" target=\"([^\"]+)\" original=\"[^\"]*\">/g)
167            {
168            my $target = NaturalDocs::NDMarkup->RestoreAmpChars($1);
169            NaturalDocs::ImageReferenceTable->AddReference($sourceFile, $target);
170            };
171        };
172
173    # Handle any changes to the file.
174    NaturalDocs::ClassHierarchy->AnalyzeChanges();
175    NaturalDocs::SymbolTable->AnalyzeChanges();
176    NaturalDocs::SourceDB->AnalyzeWatchedFileChanges();
177
178    # Update project on the file's characteristics.
179    my $hasContent = (scalar @parsedFile > 0);
180
181    NaturalDocs::Project->SetHasContent($sourceFile, $hasContent);
182    if ($hasContent)
183        {  NaturalDocs::Project->SetDefaultMenuTitle($sourceFile, $defaultMenuTitle);  };
184
185    # We don't need to keep this around.
186    @parsedFile = ( );
187    };
188
189
190#
191#   Function: ParseForBuild
192#
193#   Parses the input file for building, returning it as a <NaturalDocs::Parser::ParsedTopic> arrayref.
194#
195#   Note that all new and changed files should be parsed for symbols via <ParseForInformation()> before calling this function on
196#   *any* file.  The reason is that <NaturalDocs::SymbolTable> needs to know about all the symbol definitions and references to
197#   resolve them properly.
198#
199#   Parameters:
200#
201#       file - The <FileName> to parse for building.
202#
203#   Returns:
204#
205#       An arrayref of the source file as <NaturalDocs::Parser::ParsedTopic> objects.
206#
207sub ParseForBuild #(file)
208    {
209    my ($self, $file) = @_;
210    $sourceFile = $file;
211
212    $parsingForInformation = undef;
213
214    $self->Parse();
215
216    return \@parsedFile;
217    };
218
219
220
221
222###############################################################################
223# Group: Interface Functions
224
225
226#
227#   Function: OnComment
228#
229#   The function called by <NaturalDocs::Languages::Base>-derived objects when their parsers encounter a comment
230#   suitable for documentation.
231#
232#   Parameters:
233#
234#       commentLines - An arrayref of the comment's lines.  The language's comment symbols should be converted to spaces,
235#                               and there should be no line break characters at the end of each line.  *The original memory will be
236#                               changed.*
237#       lineNumber - The line number of the first of the comment lines.
238#       isJavaDoc - Whether the comment is in JavaDoc format.
239#
240#   Returns:
241#
242#       The number of topics created by this comment, or zero if none.
243#
244sub OnComment #(string[] commentLines, int lineNumber, bool isJavaDoc)
245    {
246    my ($self, $commentLines, $lineNumber, $isJavaDoc) = @_;
247
248    $self->CleanComment($commentLines);
249
250    # We check if it's definitely Natural Docs content first.  This overrides all else, since it's possible that a comment could start
251    # with a topic line yet have something that looks like a JavaDoc tag.  Natural Docs wins in this case.
252    if (NaturalDocs::Parser::Native->IsMine($commentLines, $isJavaDoc))
253        {  return NaturalDocs::Parser::Native->ParseComment($commentLines, $isJavaDoc, $lineNumber, \@parsedFile);  }
254
255    elsif (NaturalDocs::Parser::JavaDoc->IsMine($commentLines, $isJavaDoc))
256        {  return NaturalDocs::Parser::JavaDoc->ParseComment($commentLines, $isJavaDoc, $lineNumber, \@parsedFile);  }
257
258    # If the content is ambiguous and it's a JavaDoc-styled comment, treat it as Natural Docs content.
259    elsif ($isJavaDoc)
260        {  return NaturalDocs::Parser::Native->ParseComment($commentLines, $isJavaDoc, $lineNumber, \@parsedFile);  }
261    };
262
263
264#
265#   Function: OnClass
266#
267#   A function called by <NaturalDocs::Languages::Base>-derived objects when their parsers encounter a class declaration.
268#
269#   Parameters:
270#
271#       class - The <SymbolString> of the class encountered.
272#
273sub OnClass #(class)
274    {
275    my ($self, $class) = @_;
276
277    if ($parsingForInformation)
278        {  NaturalDocs::ClassHierarchy->AddClass($sourceFile, $class);  };
279    };
280
281
282#
283#   Function: OnClassParent
284#
285#   A function called by <NaturalDocs::Languages::Base>-derived objects when their parsers encounter a declaration of
286#   inheritance.
287#
288#   Parameters:
289#
290#       class - The <SymbolString> of the class we're in.
291#       parent - The <SymbolString> of the class it inherits.
292#       scope - The package <SymbolString> that the reference appeared in.
293#       using - An arrayref of package <SymbolStrings> that the reference has access to via "using" statements.
294#       resolvingFlags - Any <Resolving Flags> to be used when resolving the reference.  <RESOLVE_NOPLURAL> is added
295#                              automatically since that would never apply to source code.
296#
297sub OnClassParent #(class, parent, scope, using, resolvingFlags)
298    {
299    my ($self, $class, $parent, $scope, $using, $resolvingFlags) = @_;
300
301    if ($parsingForInformation)
302        {
303        NaturalDocs::ClassHierarchy->AddParentReference($sourceFile, $class, $parent, $scope, $using,
304                                                                                   $resolvingFlags | ::RESOLVE_NOPLURAL());
305        };
306    };
307
308
309
310###############################################################################
311# Group: Support Functions
312
313
314#   Function: Parse
315#
316#   Opens the source file and parses process.  Most of the actual parsing is done in <NaturalDocs::Languages::Base->ParseFile()>
317#   and <OnComment()>, though.
318#
319#   *Do not call externally.*  Rather, call <ParseForInformation()> or <ParseForBuild()>.
320#
321#   Returns:
322#
323#       The default menu title of the file.  Will be the <FileName> if nothing better is found.
324#
325sub Parse
326    {
327    my ($self) = @_;
328
329    NaturalDocs::Error->OnStartParsing($sourceFile);
330
331    $language = NaturalDocs::Languages->LanguageOf($sourceFile);
332    NaturalDocs::Parser::Native->Start();
333    @parsedFile = ( );
334
335    my ($autoTopics, $scopeRecord) = $language->ParseFile($sourceFile, \@parsedFile);
336
337
338    $self->AddToClassHierarchy();
339
340    $self->BreakLists();
341
342    if (defined $autoTopics)
343        {
344        if (defined $scopeRecord)
345            {  $self->RepairPackages($autoTopics, $scopeRecord);  };
346
347        $self->MergeAutoTopics($language, $autoTopics);
348        };
349
350    $self->RemoveRemainingHeaderlessTopics();
351
352
353    # We don't need to do this if there aren't any auto-topics because the only package changes would be implied by the comments.
354    if (defined $autoTopics)
355        {  $self->AddPackageDelineators();  };
356
357    if (!NaturalDocs::Settings->NoAutoGroup())
358        {  $self->MakeAutoGroups($autoTopics);  };
359
360
361    # Set the menu title.
362
363    my $defaultMenuTitle = $sourceFile;
364
365    if (scalar @parsedFile)
366        {
367        my $addFileTitle;
368
369        if (NaturalDocs::Settings->OnlyFileTitles())
370            {
371            # We still want to use the title from the topics if the first one is a file.
372            if ($parsedFile[0]->Type() eq ::TOPIC_FILE())
373                {  $addFileTitle = 0;  }
374            else
375                {  $addFileTitle = 1;  };
376            }
377        elsif (scalar @parsedFile == 1 || NaturalDocs::Topics->TypeInfo( $parsedFile[0]->Type() )->PageTitleIfFirst())
378            {  $addFileTitle = 0;  }
379        else
380            {  $addFileTitle = 1;  };
381
382        if (!$addFileTitle)
383            {
384            $defaultMenuTitle = $parsedFile[0]->Title();
385            }
386        else
387            {
388            # If the title ended up being the file name, add a leading section for it.
389
390            unshift @parsedFile,
391                       NaturalDocs::Parser::ParsedTopic->New(::TOPIC_FILE(), (NaturalDocs::File->SplitPath($sourceFile))[2],
392                                                                                  undef, undef, undef, undef, undef, 1, undef);
393            };
394        };
395
396    NaturalDocs::Error->OnEndParsing($sourceFile);
397
398    return $defaultMenuTitle;
399    };
400
401
402#
403#   Function: CleanComment
404#
405#   Removes any extraneous formatting and whitespace from the comment.  Eliminates comment boxes, horizontal lines, trailing
406#   whitespace from lines, and expands all tab characters.  It keeps leading whitespace, though, since it may be needed for
407#   example code, and blank lines, since the original line numbers are needed.
408#
409#   Parameters:
410#
411#       commentLines  - An arrayref of the comment lines to clean.  *The original memory will be changed.*  Lines should have the
412#                                language's comment symbols replaced by spaces and not have a trailing line break.
413#
414sub CleanComment #(commentLines)
415    {
416    my ($self, $commentLines) = @_;
417
418    use constant DONT_KNOW => 0;
419    use constant IS_UNIFORM => 1;
420    use constant IS_UNIFORM_IF_AT_END => 2;
421    use constant IS_NOT_UNIFORM => 3;
422
423    my $leftSide = DONT_KNOW;
424    my $rightSide = DONT_KNOW;
425    my $leftSideChar;
426    my $rightSideChar;
427
428    my $index = 0;
429    my $tabLength = NaturalDocs::Settings->TabLength();
430
431    while ($index < scalar @$commentLines)
432        {
433        # Strip trailing whitespace from the original.
434
435        $commentLines->[$index] =~ s/[ \t]+$//;
436
437
438        # Expand tabs in the original.  This method is almost six times faster than Text::Tabs' method.
439
440        my $tabIndex = index($commentLines->[$index], "\t");
441
442        while ($tabIndex != -1)
443            {
444            substr( $commentLines->[$index], $tabIndex, 1, ' ' x ($tabLength - ($tabIndex % $tabLength)) );
445            $tabIndex = index($commentLines->[$index], "\t", $tabIndex);
446            };
447
448
449        # Make a working copy and strip leading whitespace as well.  This has to be done after tabs are expanded because
450        # stripping indentation could change how far tabs are expanded.
451
452        my $line = $commentLines->[$index];
453        $line =~ s/^ +//;
454
455        # If the line is blank...
456        if (!length $line)
457            {
458            # If we have a potential vertical line, this only acceptable if it's at the end of the comment.
459            if ($leftSide == IS_UNIFORM)
460                {  $leftSide = IS_UNIFORM_IF_AT_END;  };
461            if ($rightSide == IS_UNIFORM)
462                {  $rightSide = IS_UNIFORM_IF_AT_END;  };
463            }
464
465        # If there's at least four symbols in a row, it's a horizontal line.  The second regex supports differing edge characters.  It
466        # doesn't matter if any of this matches the left and right side symbols.  The length < 256 is a sanity check, because that
467        # regexp has caused the perl regexp engine to choke on an insane line someone sent me from an automatically generated
468        # file.  It had over 10k characters on the first line, and most of them were 0x00.
469        elsif ($line =~ /^([^a-zA-Z0-9 ])\1{3,}$/ ||
470                (length $line < 256 && $line =~ /^([^a-zA-Z0-9 ])\1*([^a-zA-Z0-9 ])\2{3,}([^a-zA-Z0-9 ])\3*$/) )
471            {
472            # Ignore it.  This has no effect on the vertical line detection.  We want to keep it in the output though in case it was
473            # in a code section.
474            }
475
476        # If the line is not blank or a horizontal line...
477        else
478            {
479            # More content means any previous blank lines are no longer tolerated in vertical line detection.  They are only
480            # acceptable at the end of the comment.
481
482            if ($leftSide == IS_UNIFORM_IF_AT_END)
483                {  $leftSide = IS_NOT_UNIFORM;  };
484            if ($rightSide == IS_UNIFORM_IF_AT_END)
485                {  $rightSide = IS_NOT_UNIFORM;  };
486
487
488            # Detect vertical lines.  Lines are only lines if they are followed by whitespace or a connected horizontal line.
489            # Otherwise we may accidentally detect lines from short comments that just happen to have every first or last
490            # character the same.
491
492            if ($leftSide != IS_NOT_UNIFORM)
493                {
494                if ($line =~ /^([^a-zA-Z0-9])\1*(?: |$)/)
495                    {
496                    if ($leftSide == DONT_KNOW)
497                        {
498                        $leftSide = IS_UNIFORM;
499                        $leftSideChar = $1;
500                        }
501                    else # ($leftSide == IS_UNIFORM)  Other choices already ruled out.
502                        {
503                        if ($leftSideChar ne $1)
504                            {  $leftSide = IS_NOT_UNIFORM;  };
505                        };
506                    }
507                # We'll tolerate the lack of symbols on the left on the first line, because it may be a
508                # /* Function: Whatever
509                #  * Description.
510                #  */
511                # comment which would have the leading /* blanked out.
512                elsif ($index != 0)
513                    {
514                    $leftSide = IS_NOT_UNIFORM;
515                    };
516                };
517
518            if ($rightSide != IS_NOT_UNIFORM)
519                {
520                if ($line =~ / ([^a-zA-Z0-9])\1*$/)
521                    {
522                    if ($rightSide == DONT_KNOW)
523                        {
524                        $rightSide = IS_UNIFORM;
525                        $rightSideChar = $1;
526                        }
527                    else # ($rightSide == IS_UNIFORM)  Other choices already ruled out.
528                        {
529                        if ($rightSideChar ne $1)
530                            {  $rightSide = IS_NOT_UNIFORM;  };
531                        };
532                    }
533                else
534                    {
535                    $rightSide = IS_NOT_UNIFORM;
536                    };
537                };
538
539            # We'll remove vertical lines later if they're uniform throughout the entire comment.
540            };
541
542        $index++;
543        };
544
545
546    if ($leftSide == IS_UNIFORM_IF_AT_END)
547        {  $leftSide = IS_UNIFORM;  };
548    if ($rightSide == IS_UNIFORM_IF_AT_END)
549        {  $rightSide = IS_UNIFORM;  };
550
551
552    $index = 0;
553    my $inCodeSection = 0;
554
555    while ($index < scalar @$commentLines)
556        {
557        # Clear horizontal lines only if we're not in a code section.
558        if ($commentLines->[$index] =~ /^ *([^a-zA-Z0-9 ])\1{3,}$/ ||
559            ( length $commentLines->[$index] < 256 &&
560              $commentLines->[$index] =~ /^ *([^a-zA-Z0-9 ])\1*([^a-zA-Z0-9 ])\2{3,}([^a-zA-Z0-9 ])\3*$/ ) )
561        	{
562        	if (!$inCodeSection)
563        		{  $commentLines->[$index] = '';  }
564        	}
565
566        else
567        	{
568	        # Clear vertical lines.
569
570	        if ($leftSide == IS_UNIFORM)
571	            {
572	            # This works because every line should either start this way, be blank, or be the first line that doesn't start with a
573	            # symbol.
574	            $commentLines->[$index] =~ s/^ *([^a-zA-Z0-9 ])\1*//;
575	            };
576
577	        if ($rightSide == IS_UNIFORM)
578	            {
579	            $commentLines->[$index] =~ s/ *([^a-zA-Z0-9 ])\1*$//;
580	            };
581
582
583	        # Clear horizontal lines again if there were vertical lines.  This catches lines that were separated from the verticals by
584	        # whitespace.
585
586	        if (($leftSide == IS_UNIFORM || $rightSide == IS_UNIFORM) && !$inCodeSection)
587	            {
588	            $commentLines->[$index] =~ s/^ *([^a-zA-Z0-9 ])\1{3,}$//;
589	            $commentLines->[$index] =~ s/^ *([^a-zA-Z0-9 ])\1*([^a-zA-Z0-9 ])\2{3,}([^a-zA-Z0-9 ])\3*$//;
590	            };
591
592
593	        # Check for the start and end of code sections.  Note that this doesn't affect vertical line removal.
594
595	        if (!$inCodeSection &&
596	        	$commentLines->[$index] =~ /^ *\( *(?:(?:start|begin)? +)?(?:table|code|example|diagram) *\)$/i )
597	        	{
598	        	$inCodeSection = 1;
599	        	}
600	        elsif ($inCodeSection &&
601	        	    $commentLines->[$index] =~ /^ *\( *(?:end|finish|done)(?: +(?:table|code|example|diagram))? *\)$/i)
602	        	 {
603	        	 $inCodeSection = 0;
604	        	 }
605	        }
606
607
608        $index++;
609        };
610
611    };
612
613
614
615###############################################################################
616# Group: Processing Functions
617
618
619#
620#   Function: RepairPackages
621#
622#   Recalculates the packages for all comment topics using the auto-topics and the scope record.  Call this *before* calling
623#   <MergeAutoTopics()>.
624#
625#   Parameters:
626#
627#       autoTopics - A reference to the list of automatically generated <NaturalDocs::Parser::ParsedTopics>.
628#       scopeRecord - A reference to an array of <NaturalDocs::Languages::Advanced::ScopeChanges>.
629#
630sub RepairPackages #(autoTopics, scopeRecord)
631    {
632    my ($self, $autoTopics, $scopeRecord) = @_;
633
634    my $topicIndex = 0;
635    my $autoTopicIndex = 0;
636    my $scopeIndex = 0;
637
638    my $topic = $parsedFile[0];
639    my $autoTopic = $autoTopics->[0];
640    my $scopeChange = $scopeRecord->[0];
641
642    my $currentPackage;
643    my $inFakePackage;
644
645    while (defined $topic)
646        {
647        # First update the scope via the record if its defined and has the lowest line number.
648        if (defined $scopeChange &&
649            $scopeChange->LineNumber() <= $topic->LineNumber() &&
650            (!defined $autoTopic || $scopeChange->LineNumber() <= $autoTopic->LineNumber()) )
651            {
652            $currentPackage = $scopeChange->Scope();
653            $scopeIndex++;
654            $scopeChange = $scopeRecord->[$scopeIndex];  # Will be undef when past end.
655            $inFakePackage = undef;
656            }
657
658        # Next try to end a fake scope with an auto topic if its defined and has the lowest line number.
659        elsif (defined $autoTopic &&
660                $autoTopic->LineNumber() <= $topic->LineNumber())
661            {
662            if ($inFakePackage)
663                {
664                $currentPackage = $autoTopic->Package();
665                $inFakePackage = undef;
666                };
667
668            $autoTopicIndex++;
669            $autoTopic = $autoTopics->[$autoTopicIndex];  # Will be undef when past end.
670            }
671
672
673        # Finally try to handle the topic, since it has the lowest line number.  Check for Type() because headerless topics won't have
674        # one.
675        else
676            {
677            my $scope;
678            if ($topic->Type())
679                {  $scope = NaturalDocs::Topics->TypeInfo($topic->Type())->Scope();  }
680            else
681                {  $scope = ::SCOPE_NORMAL();  };
682
683            if ($scope == ::SCOPE_START() || $scope == ::SCOPE_END())
684                {
685                # They should already have the correct class and scope.
686                $currentPackage = $topic->Package();
687                $inFakePackage = 1;
688                }
689           else
690                {
691                # Fix the package of everything else.
692
693                # Note that the first function or variable topic to appear in a fake package will assume that package even if it turns out
694                # to be incorrect in the actual code, since the topic will come before the auto-topic.  This will be corrected in
695                # MergeAutoTopics().
696
697                $topic->SetPackage($currentPackage);
698                };
699
700            $topicIndex++;
701            $topic = $parsedFile[$topicIndex];  # Will be undef when past end.
702            };
703        };
704
705    };
706
707
708#
709#   Function: MergeAutoTopics
710#
711#   Merges the automatically generated topics into the file.  If an auto-topic matches an existing topic, it will have it's prototype
712#   and package transferred.  If it doesn't, the auto-topic will be inserted into the list unless
713#   <NaturalDocs::Settings->DocumentedOnly()> is set.  If an existing topic doesn't have a title, it's assumed to be a headerless
714#   comment and will be merged with the next auto-topic or discarded.
715#
716#   Parameters:
717#
718#       language - The <NaturalDocs::Languages::Base>-derived class for the file.
719#       autoTopics - A reference to the list of automatically generated topics.
720#
721sub MergeAutoTopics #(language, autoTopics)
722    {
723    my ($self, $language, $autoTopics) = @_;
724
725    my $topicIndex = 0;
726    my $autoTopicIndex = 0;
727
728    # Keys are topic types, values are existence hashrefs of titles.
729    my %topicsInLists;
730
731    while ($topicIndex < scalar @parsedFile && $autoTopicIndex < scalar @$autoTopics)
732        {
733        my $topic = $parsedFile[$topicIndex];
734        my $autoTopic = $autoTopics->[$autoTopicIndex];
735
736        my $cleanTitle = $topic->Title();
737        $cleanTitle =~ s/[\t ]*\([^\(]*$//;
738
739        # Add the auto-topic if it's higher in the file than the current topic.
740        if ($autoTopic->LineNumber() < $topic->LineNumber())
741            {
742            if (exists $topicsInLists{$autoTopic->Type()} &&
743                exists $topicsInLists{$autoTopic->Type()}->{$autoTopic->Title()})
744                {
745                # Remove it from the list so a second one with the same name will be added.
746                delete $topicsInLists{$autoTopic->Type()}->{$autoTopic->Title()};
747                }
748            elsif (!NaturalDocs::Settings->DocumentedOnly())
749                {
750                splice(@parsedFile, $topicIndex, 0, $autoTopic);
751                $topicIndex++;
752                };
753
754            $autoTopicIndex++;
755            }
756
757        # Remove a headerless topic if there's another topic between it and the next auto-topic.
758        elsif (!$topic->Title() && $topicIndex + 1 < scalar @parsedFile &&
759                $parsedFile[$topicIndex+1]->LineNumber() < $autoTopic->LineNumber())
760            {
761            splice(@parsedFile, $topicIndex, 1);
762            }
763
764        # Transfer information if we have a match or a headerless topic.
765        elsif ( !$topic->Title() ||
766        		  $topic->Symbol() eq $autoTopic->Symbol() ||
767        		  ( $topic->Type() == $autoTopic->Type() &&
768        			( index($autoTopic->Title(), $cleanTitle) != -1 || index($cleanTitle, $autoTopic->Title()) != -1 ) ) )
769            {
770            $topic->SetType($autoTopic->Type());
771            $topic->SetPrototype($autoTopic->Prototype());
772            $topic->SetUsing($autoTopic->Using());
773
774            if (!$topic->Title())
775                {  $topic->SetTitle($autoTopic->Title());  };
776
777            if (NaturalDocs::Topics->TypeInfo($topic->Type())->Scope() != ::SCOPE_START())
778                {  $topic->SetPackage($autoTopic->Package());  }
779            elsif ($autoTopic->Package() ne $topic->Package())
780                {
781                my @autoPackageIdentifiers = NaturalDocs::SymbolString->IdentifiersOf($autoTopic->Package());
782                my @packageIdentifiers = NaturalDocs::SymbolString->IdentifiersOf($topic->Package());
783
784                while (scalar @autoPackageIdentifiers && $autoPackageIdentifiers[-1] eq $packageIdentifiers[-1])
785                    {
786                    pop @autoPackageIdentifiers;
787                    pop @packageIdentifiers;
788                    };
789
790                if (scalar @autoPackageIdentifiers)
791                    {  $topic->SetPackage( NaturalDocs::SymbolString->Join(@autoPackageIdentifiers) );  };
792                };
793
794            $topicIndex++;
795            $autoTopicIndex++;
796            }
797
798        # Extract topics in lists.
799        elsif ($topic->IsList())
800            {
801            if (!exists $topicsInLists{$topic->Type()})
802                {  $topicsInLists{$topic->Type()} = { };  };
803
804            my $body = $topic->Body();
805
806            while ($body =~ /<ds>([^<]+)<\/ds>/g)
807                {  $topicsInLists{$topic->Type()}->{NaturalDocs::NDMarkup->RestoreAmpChars($1)} = 1;  };
808
809            $topicIndex++;
810            }
811
812        # Otherwise there's no match.  Skip the topic.  The auto-topic will be added later.
813        else
814            {
815            $topicIndex++;
816            }
817        };
818
819    # Add any auto-topics remaining.
820    if (!NaturalDocs::Settings->DocumentedOnly())
821    	{
822	    while ($autoTopicIndex < scalar @$autoTopics)
823	        {
824	        my $autoTopic = $autoTopics->[$autoTopicIndex];
825
826	        if (exists $topicsInLists{$autoTopic->Type()} &&
827	            exists $topicsInLists{$autoTopic->Type()}->{$autoTopic->Title()})
828	            {
829	            # Remove it from the list so a second one with the same name will be added.
830	            delete $topicsInLists{$autoTopic->Type()}->{$autoTopic->Title()};
831	            }
832	        else
833	            {
834	            push(@parsedFile, $autoTopic);
835	            };
836
837	        $autoTopicIndex++;
838	        };
839        };
840   };
841
842
843#
844#   Function: RemoveRemainingHeaderlessTopics
845#
846#   After <MergeAutoTopics()> is done, this function removes any remaining headerless topics from the file.  If they don't merge
847#   into anything, they're not valid topics.
848#
849sub RemoveRemainingHeaderlessTopics
850    {
851    my ($self) = @_;
852
853    my $index = 0;
854    while ($index < scalar @parsedFile)
855        {
856        if ($parsedFile[$index]->Title())
857            {  $index++;  }
858        else
859            {  splice(@parsedFile, $index, 1);  };
860        };
861    };
862
863
864#
865#   Function: MakeAutoGroups
866#
867#   Creates group topics for files that do not have them.
868#
869sub MakeAutoGroups
870    {
871    my ($self) = @_;
872
873    # No groups only one topic.
874    if (scalar @parsedFile < 2)
875        {  return;  };
876
877    my $index = 0;
878    my $startStretch = 0;
879
880    # Skip the first entry if its the page title.
881    if (NaturalDocs::Topics->TypeInfo( $parsedFile[0]->Type() )->PageTitleIfFirst())
882        {
883        $index = 1;
884        $startStretch = 1;
885        };
886
887    # Make auto-groups for each stretch between scope-altering topics.
888    while ($index < scalar @parsedFile)
889        {
890        my $scope = NaturalDocs::Topics->TypeInfo($parsedFile[$index]->Type())->Scope();
891
892        if ($scope == ::SCOPE_START() || $scope == ::SCOPE_END())
893            {
894            if ($index > $startStretch)
895                {  $index += $self->MakeAutoGroupsFor($startStretch, $index);  };
896
897            $startStretch = $index + 1;
898            };
899
900        $index++;
901        };
902
903    if ($index > $startStretch)
904        {  $self->MakeAutoGroupsFor($startStretch, $index);  };
905    };
906
907
908#
909#   Function: MakeAutoGroupsFor
910#
911#   Creates group topics for sections of files that do not have them.  A support function for <MakeAutoGroups()>.
912#
913#   Parameters:
914#
915#       startIndex - The index to start at.
916#       endIndex - The index to end at.  Not inclusive.
917#
918#   Returns:
919#
920#       The number of group topics added.
921#
922sub MakeAutoGroupsFor #(startIndex, endIndex)
923    {
924    my ($self, $startIndex, $endIndex) = @_;
925
926    # No groups if any are defined already.
927    for (my $i = $startIndex; $i < $endIndex; $i++)
928        {
929        if ($parsedFile[$i]->Type() eq ::TOPIC_GROUP())
930            {  return 0;  };
931        };
932
933
934    use constant COUNT => 0;
935    use constant TYPE => 1;
936    use constant SECOND_TYPE => 2;
937    use constant SIZE => 3;
938
939    # This is an array of ( count, type, secondType ) triples.  Count and Type will always be filled in; count is the number of
940    # consecutive topics of type.  On the second pass, if small groups are combined secondType will be filled in.  There will not be
941    # more than two types per group.
942    my @groups;
943    my $groupIndex = 0;
944
945
946    # First pass: Determine all the groups.
947
948    my $i = $startIndex;
949    my $currentType;
950
951    while ($i < $endIndex)
952        {
953        if (!defined $currentType || ($parsedFile[$i]->Type() ne $currentType && $parsedFile[$i]->Type() ne ::TOPIC_GENERIC()) )
954            {
955            if (defined $currentType)
956                {  $groupIndex += SIZE;  };
957
958            $currentType = $parsedFile[$i]->Type();
959
960            $groups[$groupIndex + COUNT] = 1;
961            $groups[$groupIndex + TYPE] = $currentType;
962            }
963        else
964            {  $groups[$groupIndex + COUNT]++;  };
965
966        $i++;
967        };
968
969
970    # Second pass: Combine groups based on "noise".  Noise means types go from A to B to A at least once, and there are at least
971    # two groups in a row with three or less, and at least one of those groups is two or less.  So 3, 3, 3 doesn't count as noise, but
972    # 3, 2, 3 does.
973
974    $groupIndex = 0;
975
976    # While there are at least three groups left...
977    while ($groupIndex < scalar @groups - (2 * SIZE))
978        {
979        # If the group two places in front of this one has the same type...
980        if ($groups[$groupIndex + (2 * SIZE) + TYPE] eq $groups[$groupIndex + TYPE])
981            {
982            # It means we went from A to B to A, which partially qualifies as noise.
983
984            my $firstType = $groups[$groupIndex + TYPE];
985            my $secondType = $groups[$groupIndex + SIZE + TYPE];
986
987            if (NaturalDocs::Topics->TypeInfo($firstType)->CanGroupWith($secondType) ||
988                NaturalDocs::Topics->TypeInfo($secondType)->CanGroupWith($firstType))
989                {
990                my $hasNoise;
991
992                my $hasThrees;
993                my $hasTwosOrOnes;
994
995                my $endIndex = $groupIndex;
996
997                while ($endIndex < scalar @groups &&
998                         ($groups[$endIndex + TYPE] eq $firstType || $groups[$endIndex + TYPE] eq $secondType))
999                    {
1000                    if ($groups[$endIndex + COUNT] > 3)
1001                        {
1002                        # They must be consecutive to count.
1003                        $hasThrees = 0;
1004                        $hasTwosOrOnes = 0;
1005                        }
1006                    elsif ($groups[$endIndex + COUNT] == 3)
1007                        {
1008                        $hasThrees = 1;
1009
1010                        if ($hasTwosOrOnes)
1011                            {  $hasNoise = 1;  };
1012                        }
1013                    else # < 3
1014                        {
1015                        if ($hasThrees || $hasTwosOrOnes)
1016                            {  $hasNoise = 1;  };
1017
1018                        $hasTwosOrOnes = 1;
1019                        };
1020
1021                    $endIndex += SIZE;
1022                    };
1023
1024                if (!$hasNoise)
1025                    {
1026                    $groupIndex = $endIndex - SIZE;
1027                    }
1028                else # hasNoise
1029                    {
1030                    $groups[$groupIndex + SECOND_TYPE] = $secondType;
1031
1032                    for (my $noiseIndex = $groupIndex + SIZE; $noiseIndex < $endIndex; $noiseIndex += SIZE)
1033                        {
1034                        $groups[$groupIndex + COUNT] += $groups[$noiseIndex + COUNT];
1035                        };
1036
1037                    splice(@groups, $groupIndex + SIZE, $endIndex - $groupIndex - SIZE);
1038
1039                    $groupIndex += SIZE;
1040                    };
1041                }
1042
1043            else # They can't group together
1044                {
1045                $groupIndex += SIZE;
1046                };
1047            }
1048
1049        else
1050            {  $groupIndex += SIZE;  };
1051        };
1052
1053
1054    # Finally, create group topics for the parsed file.
1055
1056    $groupIndex = 0;
1057    $i = $startIndex;
1058
1059    while ($groupIndex < scalar @groups)
1060        {
1061        if ($groups[$groupIndex + TYPE] ne ::TOPIC_GENERIC())
1062            {
1063            my $topic = $parsedFile[$i];
1064            my $title = NaturalDocs::Topics->NameOfType($groups[$groupIndex + TYPE], 1);
1065
1066            if (defined $groups[$groupIndex + SECOND_TYPE])
1067                {  $title .= ' and ' . NaturalDocs::Topics->NameOfType($groups[$groupIndex + SECOND_TYPE], 1);  };
1068
1069            splice(@parsedFile, $i, 0, NaturalDocs::Parser::ParsedTopic->New(::TOPIC_GROUP(),
1070                                                                                                            $title,
1071                                                                                                            $topic->Package(), $topic->Using(),
1072                                                                                                            undef, undef, undef,
1073                                                                                                            $topic->LineNumber()) );
1074            $i++;
1075            };
1076
1077        $i += $groups[$groupIndex + COUNT];
1078        $groupIndex += SIZE;
1079        };
1080
1081    return (scalar @groups / SIZE);
1082    };
1083
1084
1085#
1086#   Function: AddToClassHierarchy
1087#
1088#   Adds any class topics to the class hierarchy, since they may not have been called with <OnClass()> if they didn't match up to
1089#   an auto-topic.
1090#
1091sub AddToClassHierarchy
1092    {
1093    my ($self) = @_;
1094
1095    foreach my $topic (@parsedFile)
1096        {
1097        if ($topic->Type() && NaturalDocs::Topics->TypeInfo( $topic->Type() )->ClassHierarchy())
1098            {
1099            if ($topic->IsList())
1100                {
1101                my $body = $topic->Body();
1102
1103                while ($body =~ /<ds>([^<]+)<\/ds>/g)
1104                    {
1105                    $self->OnClass( NaturalDocs::SymbolString->FromText( NaturalDocs::NDMarkup->RestoreAmpChars($1) ) );
1106                    };
1107                }
1108            else
1109                {
1110                $self->OnClass($topic->Package());
1111                };
1112            };
1113        };
1114    };
1115
1116
1117#
1118#   Function: AddPackageDelineators
1119#
1120#   Adds section and class topics to make sure the package is correctly represented in the documentation.  Should be called last in
1121#   this process.
1122#
1123sub AddPackageDelineators
1124    {
1125    my ($self) = @_;
1126
1127    my $index = 0;
1128    my $currentPackage;
1129
1130    # Values are the arrayref [ title, type ];
1131    my %usedPackages;
1132
1133    while ($index < scalar @parsedFile)
1134        {
1135        my $topic = $parsedFile[$index];
1136
1137        if ($topic->Package() ne $currentPackage)
1138            {
1139            $currentPackage = $topic->Package();
1140            my $scopeType = NaturalDocs::Topics->TypeInfo($topic->Type())->Scope();
1141
1142            if ($scopeType == ::SCOPE_START())
1143                {
1144                $usedPackages{$currentPackage} = [ $topic->Title(), $topic->Type() ];
1145                }
1146            elsif ($scopeType == ::SCOPE_END())
1147                {
1148                my $newTopic;
1149
1150                if (!defined $currentPackage)
1151                    {
1152                    $newTopic = NaturalDocs::Parser::ParsedTopic->New(::TOPIC_SECTION(), 'Global',
1153                                                                                                   undef, undef,
1154                                                                                                   undef, undef, undef,
1155                                                                                                   $topic->LineNumber(), undef);
1156                    }
1157                else
1158                    {
1159                    my ($title, $body, $summary, $type);
1160                    my @packageIdentifiers = NaturalDocs::SymbolString->IdentifiersOf($currentPackage);
1161
1162                    if (exists $usedPackages{$currentPackage})
1163                        {
1164                        $title = $usedPackages{$currentPackage}->[0];
1165                        $type = $usedPackages{$currentPackage}->[1];
1166                        $body = '<p>(continued)</p>';
1167                        $summary = '(continued)';
1168                        }
1169                    else
1170                        {
1171                        $title = join($language->PackageSeparator(), @packageIdentifiers);
1172                        $type = ::TOPIC_CLASS();
1173
1174                        # Body and summary stay undef.
1175
1176                        $usedPackages{$currentPackage} = $title;
1177                        };
1178
1179                    my @titleIdentifiers = NaturalDocs::SymbolString->IdentifiersOf( NaturalDocs::SymbolString->FromText($title) );
1180                    for (my $i = 0; $i < scalar @titleIdentifiers; $i++)
1181                        {  pop @packageIdentifiers;  };
1182
1183                    $newTopic = NaturalDocs::Parser::ParsedTopic->New($type, $title,
1184                                                                                                   NaturalDocs::SymbolString->Join(@packageIdentifiers), undef,
1185                                                                                                   undef, $summary, $body,
1186                                                                                                   $topic->LineNumber(), undef);
1187                    }
1188
1189                splice(@parsedFile, $index, 0, $newTopic);
1190                $index++;
1191                }
1192            };
1193
1194        $index++;
1195        };
1196    };
1197
1198
1199#
1200#   Function: BreakLists
1201#
1202#   Breaks list topics into individual topics.
1203#
1204sub BreakLists
1205    {
1206    my $self = shift;
1207
1208    my $index = 0;
1209
1210    while ($index < scalar @parsedFile)
1211        {
1212        my $topic = $parsedFile[$index];
1213
1214        if ($topic->IsList() && NaturalDocs::Topics->TypeInfo( $topic->Type() )->BreakLists())
1215            {
1216            my $body = $topic->Body();
1217
1218            my @newTopics;
1219            my $newBody;
1220
1221            my $bodyIndex = 0;
1222
1223            for (;;)
1224                {
1225                my $startList = index($body, '<dl>', $bodyIndex);
1226
1227                if ($startList == -1)
1228                    {  last;  };
1229
1230                $newBody .= substr($body, $bodyIndex, $startList - $bodyIndex);
1231
1232                my $endList = index($body, '</dl>', $startList);
1233                my $listBody = substr($body, $startList, $endList - $startList);
1234
1235                while ($listBody =~ /<ds>([^<]+)<\/ds><dd>(.*?)<\/dd>/g)
1236                    {
1237                    my ($symbol, $description) = ($1, $2);
1238
1239                    push @newTopics, NaturalDocs::Parser::ParsedTopic->New( $topic->Type(), $symbol, $topic->Package(),
1240                                                                                                            $topic->Using(), undef,
1241                                                                                                            $self->GetSummaryFromDescriptionList($description),
1242                                                                                                            '<p>' . $description .  '</p>', $topic->LineNumber(),
1243                                                                                                            undef );
1244                    };
1245
1246                $bodyIndex = $endList + 5;
1247                };
1248
1249            $newBody .= substr($body, $bodyIndex);
1250
1251            # Remove trailing headings.
1252            $newBody =~ s/(?:<h>[^<]+<\/h>)+$//;
1253
1254            # Remove empty headings.
1255            $newBody =~ s/(?:<h>[^<]+<\/h>)+(<h>[^<]+<\/h>)/$1/g;
1256
1257            if ($newBody)
1258                {
1259                unshift @newTopics, NaturalDocs::Parser::ParsedTopic->New( ::TOPIC_GROUP(), $topic->Title(), $topic->Package(),
1260                                                                                                          $topic->Using(), undef,
1261                                                                                                          $self->GetSummaryFromBody($newBody), $newBody,
1262                                                                                                          $topic->LineNumber(), undef );
1263                };
1264
1265            splice(@parsedFile, $index, 1, @newTopics);
1266
1267            $index += scalar @newTopics;
1268            }
1269
1270        else # not a list
1271            {  $index++;  };
1272        };
1273    };
1274
1275
1276#
1277#   Function: GetSummaryFromBody
1278#
1279#   Returns the summary text from the topic body.
1280#
1281#   Parameters:
1282#
1283#       body - The complete topic body, in <NDMarkup>.
1284#
1285#   Returns:
1286#
1287#       The topic summary, or undef if none.
1288#
1289sub GetSummaryFromBody #(body)
1290    {
1291    my ($self, $body) = @_;
1292
1293    my $summary;
1294
1295    # Extract the first sentence from the leading paragraph, if any.  We'll tolerate a single header beforehand, but nothing else.
1296
1297    if ($body =~ /^(?:<h>[^<]*<\/h>)?<p>(.*?)(<\/p>|[\.\!\?](?:[\)\}\'\ ]|&quot;|&gt;))/x)
1298        {
1299        $summary = $1;
1300
1301        if ($2 ne '</p>')
1302            {  $summary .= $2;  };
1303        };
1304
1305    return $summary;
1306    };
1307
1308
1309#
1310#   Function: GetSummaryFromDescriptionList
1311#
1312#   Returns the summary text from a description list entry.
1313#
1314#   Parameters:
1315#
1316#       description - The description in <NDMarkup>.  Should be the content between the <dd></dd> tags only.
1317#
1318#   Returns:
1319#
1320#       The description summary, or undef if none.
1321#
1322sub GetSummaryFromDescriptionList #(description)
1323    {
1324    my ($self, $description) = @_;
1325
1326    my $summary;
1327
1328    if ($description =~ /^(.*?)($|[\.\!\?](?:[\)\}\'\ ]|&quot;|&gt;))/)
1329        {  $summary = $1 . $2;  };
1330
1331    return $summary;
1332    };
1333
1334
13351;
1336