1 { Search engine for wiki pages
2 
3   Copyright (C) 2012  Mattias Gaertner  mattias@freepascal.org
4 
5   This source is free software; you can redistribute it and/or modify it under
6   the terms of the GNU General Public License as published by the Free
7   Software Foundation; either version 2 of the License, or (at your option)
8   any later version.
9 
10   This code is distributed in the hope that it will be useful, but WITHOUT ANY
11   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
13   details.
14 
15   A copy of the GNU General Public License is available on the World Wide Web
16   at <http://www.gnu.org/copyleft/gpl.html>. You can also obtain it by writing
17   to the Free Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
18   Boston, MA 02110-1335, USA.
19 
20 }
21 unit WikiHelpManager;
22 
23 {$mode objfpc}{$H+}
24 
25 { $DEFINE VerboseWikiHelp}
26 
27 interface
28 
29 uses
30   Classes, SysUtils, math,
31   // LazUtils
32   LazFileUtils, LazLogger, LazDbgLog, LazUTF8, LazStringUtils, laz2_DOM, AvgLvlTree,
33   // CodeTools
34   BasicCodeTools, KeywordFuncLists,
35   //
36   MTProcs,
37   // Wiki
38   Wiki2HTMLConvert, Wiki2XHTMLConvert, WikiFormat, WikiParser;
39 
40 type
41   TWikiHelp = class;
42 
43   TWHFitsCategory = (
44     whfcNone,
45     whfcLink,
46     whfcText,
47     whfcHeader,
48     whfcPageTitle
49     );
50   TWHFitsCategories = set of TWHFitsCategory;
51 
52   TWHFitsStringFlag = (
53     whfsPart,
54     whfsWholeWord
55     );
56   TWHFitsStringFlags = set of TWHFitsStringFlag;
57 
58   TWHPhrasePageFit = record
59     Category: TWHFitsCategory;
60     Quality: TWHFitsStringFlag;
61   end;
62   PWHPhrasePageFit = ^TWHPhrasePageFit;
63 
64   TWHScore = single;
65 
66   { TWHScoring }
67 
68   TWHScoring = class(TPersistent)
69   public
70     Phrases: array[TWHFitsCategory,TWHFitsStringFlag] of TWHScore;
Equalsnull71     function Equals(Obj: TObject): boolean; override;
72     procedure Assign(Source: TPersistent); override;
73   end;
74 
75   { TWikiHelpQuery }
76 
77   TWikiHelpQuery = class(TPersistent)
78   public
79     Phrases: TStrings;
80     LoPhrases: TStrings; // Phrases lowercase
81     Languages: string; // comma separated list, '-' means not the original, 'de' = german, '*' = all
82     Scoring: TWHScoring;
83     FreeScoring: boolean;
84     constructor Create(const SearchText: string; const aLang: string = '';
85       aScoring: TWHScoring = nil; aFreeScoring: boolean = false);
86     constructor Clone(Query: TWikiHelpQuery);
87     destructor Destroy; override;
Equalsnull88     function Equals(Obj: TObject): boolean; override;
89     procedure Assign(Source: TPersistent); override;
90   end;
91 
92   TWHTextNodeType = (
93     whnTxt,
94     whnHeader,
95     whnLink
96     );
97 
98   { TWHTextNode }
99 
100   TWHTextNode = class
101   private
102     FChildNodes: TFPList; // list of TW2HelpTextNode
103     FIndexInParent: integer;
104     FParent: TWHTextNode;
GetChildNodesnull105     function GetChildNodes(Index: integer): TWHTextNode;
106     procedure RemoveChild(Child: TWHTextNode);
107   public
108     Typ: TWHTextNodeType;
109     Txt: string;
110     constructor Create(aTyp: TWHTextNodeType; aParent: TWHTextNode);
111     destructor Destroy; override;
112     procedure Clear;
113     procedure Add(Node: TWHTextNode);
Countnull114     function Count: integer;
115     property ChildNodes[Index: integer]: TWHTextNode read GetChildNodes; default;
116     property IndexInParent: integer read FIndexInParent;
117     property Parent: TWHTextNode read FParent;
FirstChildnull118     function FirstChild: TWHTextNode;
LastChildnull119     function LastChild: TWHTextNode;
NextSiblingnull120     function NextSibling: TWHTextNode;
PreviousSiblingnull121     function PreviousSibling: TWHTextNode;
Nextnull122     function Next: TWHTextNode; // first child, then next sibling, then next sibling of parent, ...
NextSkipChildrennull123     function NextSkipChildren: TWHTextNode; // first next sibling, then next sibling of parent, ...
Previousnull124     function Previous: TWHTextNode; // the reverse of Next
LastLeafnull125     function LastLeaf: TWHTextNode; // get last child of last child of ...
Levelnull126     function Level: integer; // root node has 0
127 
CalcMemSizenull128     function CalcMemSize: SizeInt;
129   end;
130 
131   { TW2HelpPage }
132 
133   TW2HelpPage = class(TW2HTMLPage)
134   public
135     WHRoot: TWHTextNode;
136     CurWHNode: TWHTextNode;
137     Score: single;
138     destructor Destroy; override;
GetScorenull139     function GetScore(Query: TWikiHelpQuery): TWHScore;
140     procedure GetFit(Query: TWikiHelpQuery; Fit: PWHPhrasePageFit);
GetNodeHighestScorenull141     function GetNodeHighestScore(Query: TWikiHelpQuery): TWHTextNode;
142   end;
143 
144   { TWiki2HelpConverter }
145 
146   TWiki2HelpConverter = class(TWiki2HTMLConverter)
147   private
148     FCurQuery: TWikiHelpQuery;
149     FHelp: TWikiHelp;
150   protected
151     PagesPerThread: integer;
152     AvailableImages: TFilenameToStringTree; // existing files in the ImagesDirectory
153     procedure SavePage({%H-}Page: TW2XHTMLPage); override;
FindImagenull154     function FindImage(const ImgFilename: string): string; override;
155     procedure ExtractPageText(Page: TW2HelpPage);
156     procedure ExtractTextToken(Token: TWPToken);
157     procedure ParallelExtractPageText(Index: PtrInt; {%H-}Data: Pointer; {%H-}Item: TMultiThreadProcItem);
158     procedure ParallelLoadPage(Index: PtrInt; {%H-}Data: Pointer; {%H-}Item: TMultiThreadProcItem);
159     procedure ParallelComputeScores(Index: PtrInt; {%H-}Data: Pointer; {%H-}Item: TMultiThreadProcItem);
160   public
161     constructor Create; override;
162     procedure Clear; override;
163     destructor Destroy; override;
164     procedure LoadPages;
165     procedure ConvertInit; override;
166     procedure ExtractAllTexts;
167     procedure Search(Query: TWikiHelpQuery; var FoundPages: TFPList);
168     procedure SavePageAsHTMLToStream(Page: TW2HelpPage; aStream: TStream);
PageToFilenamenull169     function PageToFilename(Page: string; IsInternalLink, {%H-}Full: boolean
170       ): string; override;
PageToFilenamenull171     function PageToFilename(Page: TW2XHTMLPage; {%H-}Full: boolean): string; override;
172     property Help: TWikiHelp read FHelp;
173   end;
174 
175   { TWikiHelpThread }
176 
177   TWikiHelpThread = class(TThread)
178   protected
179     fLogMsg: string;
180     fCompleted: boolean;
181     procedure Execute; override;
182     procedure MainThreadLog;
183     procedure Log({%H-}Msg: string);
184     procedure ConverterLog({%H-}Msg: string);
185     procedure Scanned; // called in thread at end
186   public
187     Help: TWikiHelp;
188   end;
189 
190   TWikiHelpProgressStep = (
191     whpsNone,
192     whpsWikiScanDir,
193     whpsWikiLoadPages,
194     whpsWikiExtractPageTexts,
195     whpsWikiLoadComplete,
196     whpsWikiSearch,
197     whpsWikiSearchComplete
198     );
199 
200   { TWikiHelp }
201 
202   TWikiHelp = class(TComponent)
203   private
204     FAborting: boolean;
205     FConverter: TWiki2HelpConverter;
206     FMaxResults: integer;
207     FOnScanned: TNotifyEvent;
208     FOnSearched: TNotifyEvent;
209     FQuery: TWikiHelpQuery;
210     FDefaultScoring: TWHScoring;
211     FResultsCSS: string;
212     FResultsCSSURL: string;
213     FResultsHTML: string;
214     FXMLDirectory: string;
215     FCritSec: TRTLCriticalSection;
216     FScanThread: TWikiHelpThread;
217     fProgressStep: TWikiHelpProgressStep;
218     fProgressCount: integer;
219     fProgressMax: integer;
220     fWikiLoadTimeMSec: integer;
221     fWikiSearchTimeMSec: integer;
GetImagesDirectorynull222     function GetImagesDirectory: string;
223     procedure SetImagesDirectory(AValue: string);
224     procedure SetMaxResults(AValue: integer);
225     procedure SetQuery(AValue: TWikiHelpQuery);
226     procedure SetXMLDirectory(AValue: string);
227     procedure EnterCritSect;
228     procedure LeaveCritSect;
229     procedure Scanned;
230     procedure DoSearch;
FoundNodeToHTMLSnippetnull231     function FoundNodeToHTMLSnippet(aPage: TW2HelpPage; aNode: TWHTextNode;
232       aQuery: TWikiHelpQuery): string;
233   public
234     constructor Create(AOwner: TComponent); override;
235     destructor Destroy; override;
GetProgressCaptionnull236     function GetProgressCaption: string;
Busynull237     function Busy: boolean;
238 
239     property ResultsCSS: string read FResultsCSS write FResultsCSS;
240     property ResultsCSSURL: string read FResultsCSSURL write FResultsCSSURL;
241 
242     // load wiki files
243     procedure StartLoading; // returns immediately
LoadingContentnull244     function LoadingContent: boolean;
245     procedure AbortLoading(Wait: boolean);
246     property Aborting: boolean read FAborting;
LoadCompletenull247     function LoadComplete: boolean;
248 
249     // languages
CollectAllLanguagesnull250     function CollectAllLanguages(AsCaption: boolean): TStrings;
251 
252     // search
253     procedure Search(const Term: string; const Languages: string = '';
254                      Scoring: TWHScoring = nil; FreeScoring: boolean = false);
255     procedure Search(aQuery: TWikiHelpQuery);
256     property Query: TWikiHelpQuery read FQuery;
257     property DefaultScoring: TWHScoring read FDefaultScoring;
258     property MaxResults: integer read FMaxResults write SetMaxResults;
259     property ResultsHTML: string read FResultsHTML;
260 
261     // get page
262     procedure SavePageToStream(DocumentName: string; aStream: TStream);
263   public
264     property XMLDirectory: string read FXMLDirectory write SetXMLDirectory; // directory where the wiki xml files are
265     property ImagesDirectory: string read GetImagesDirectory write SetImagesDirectory; // directory where the wiki image files are
266     property Converter: TWiki2HelpConverter read FConverter;
267     property OnScanned: TNotifyEvent read FOnScanned write FOnScanned;
268     property OnSearched: TNotifyEvent read FOnSearched write FOnSearched;
269   end;
270 
271 var
272   WikiHelp: TWikiHelp = nil;
273 
SearchTextToPhrasesnull274 function SearchTextToPhrases(Txt: string): TStringList;
CompareW2HPageForScorenull275 function CompareW2HPageForScore(Page1, Page2: Pointer): integer;
TextToHTMLSnippednull276 function TextToHTMLSnipped(Txt: string; LoCaseStringsToHighlight: TStrings;
277   MaxUTF8Length: integer): string;
278 
dbgsnull279 function dbgs(t: TWHTextNodeType): string; overload;
280 
281 procedure Test_TextToHTMLSnipped;
282 
283 implementation
284 
TextToHTMLSnippednull285 function TextToHTMLSnipped(Txt: string; LoCaseStringsToHighlight: TStrings;
286   MaxUTF8Length: integer): string;
287 var
288   i: Integer;
289   LoTxt: String;
290   Bold: PByte; // for each UTF-8 character: the number of matching phrases
291   Phrase: String;
292   PhraseStartChar: Char;
293   LoTxtP: PChar;
294   CurLoTxtP: PChar;
295   CurPhraseP: PChar;
296   BoldP: PByte;
297   l: Integer;
298   BestPhraseCount: Integer;
299   CurPhraseCount: Integer;
300   BestPos: Integer;
301   IsBold: Boolean;
302   StartChomped: Boolean;
303   EndChomped: Boolean;
304 begin
305   if MaxUTF8Length<=0 then exit('');
306   Result:=UTF8Trim(Txt);
307   {$IFDEF VerboseTextToHTMLSnipped}
308   debugln(['TextToHTMLSnipped trimmed Result="',Result,'"']);
309   debugln(['TextToHTMLSnipped LoCaseStringsToHighlight="',Trim(LoCaseStringsToHighlight.Text),'"']);
310   {$ENDIF}
311   // convert white space to single space
312   i:=1;
313   while i<=length(Result) do begin
314     if Result[i] in [#0..#31] then
315       Result[i]:=' ';
316     if (Result[i]=' ') and ((i=1) or (Result[i-1]=' ')) then
317       Delete(Result,i,1)
318     else
319       inc(i);
320   end;
321   if Result='' then exit;
322   LoTxt:=UTF8LowerCase(Result);
323   {$IFDEF VerboseTextToHTMLSnipped}
324   debugln(['TextToHTMLSnipped locase Result="',LoTxt,'"']);
325   {$ENDIF}
326   GetMem(Bold,Max(length(LoTxt),length(Result))+7);
327   try
328     // mark phrases
329     FillByte(Bold^,length(LoTxt)+7,0);
330     for i:=0 to Min(LoCaseStringsToHighlight.Count-1,255) do begin
331       Phrase:=LoCaseStringsToHighlight[i];
332       if Phrase='' then continue;
333       BoldP:=Bold;
334       PhraseStartChar:=Phrase[1];
335       LoTxtP:=PChar(LoTxt);
336       while LoTxtP^<>#0 do begin
337         //debugln(['TextToHTMLSnipped PhraseStartChar=',PhraseStartChar,' ',dbgstr(LoTxtP^)]);
338         if LoTxtP^=PhraseStartChar then begin
339           CurLoTxtP:=LoTxtP+1;
340           CurPhraseP:=PChar(Phrase)+1;
341           while (CurLoTxtP^=CurPhraseP^) and (CurLoTxtP^<>#0) do begin
342             inc(CurLoTxtP);
343             inc(CurPhraseP);
344           end;
345           if CurPhraseP^=#0 then begin
346             // phrase found => mark phrase in Bold array
347             //debugln(['TextToHTMLSnipped phrase "',Phrase,'" found at ',LoTxtP-PChar(LoTxt)]);
348             CurPhraseP:=PChar(Phrase);
349             while (CurPhraseP^<>#0) do begin
350               l:=UTF8CodepointSize(CurPhraseP);
351               inc(LoTxtP,l);
352               inc(CurPhraseP,l);
353               BoldP^+=1;
354               inc(BoldP);
355             end;
356             continue;
357           end;
358         end;
359         inc(LoTxtP,UTF8CodepointSize(LoTxtP));
360         inc(BoldP);
361       end;
362     end;
363 
364     {$IFDEF VerboseTextToHTMLSnipped}
365     dbgout(' Bold: ');
366     LoTxtP:=PChar(LoTxt);
367     BoldP:=Bold;
368     while LoTxtP^<>#0 do begin
369       dbgout([' ',dbgstr(LoTxtP^),':',BoldP^]);
370       inc(LoTxtP,UTF8CodepointSize(LoTxtP));
371       inc(BoldP);
372     end;
373     debugln;
374     debugln('Result="',Result,'"');
375     dbgout ('Bold  = ');
376     l:=UTF8Length(Result);
377     for i:=0 to l-1 do
378       dbgout(dbgs(Bold[i]));
379     debugln;
380     {$ENDIF}
381 
382     l:=UTF8Length(Result);
383     StartChomped:=false;
384     EndChomped:=false;
385     if (l>MaxUTF8Length) then begin
386       // text too long
387       // => find substring with most phrases
388       CurPhraseCount:=0;
389       for i:=0 to MaxUTF8Length-1 do
390         inc(CurPhraseCount,Bold[i]);
391       BestPhraseCount:=CurPhraseCount;
392       BestPos:=0;
393       for i:=0 to l-MaxUTF8Length-1 do begin
394         CurPhraseCount+=Bold[i+MaxUTF8Length]-Bold[i];
395         if CurPhraseCount>=BestPhraseCount then begin
396           BestPhraseCount:=CurPhraseCount;
397           BestPos:=i+1;
398         end;
399       end;
400       if BestPos>0 then begin
401         // BestPos is the latest substring containing the maximum
402         // => move BestPos to center the maximum
403         // => balance left and right of not marked characters
404         i:=BestPos;
405         while (i>0) and (Bold[i-1]=0)  and (Bold[i+MaxUTF8Length-1]=0) do
406           dec(i);
407         if i<BestPos then inc(i);
408         BestPos:=(i+BestPos) div 2;
409       end;
410 
411       // cut down Result and Bold
412       Result:=UTF8Copy(Result,BestPos+1,MaxUTF8Length);
413       if BestPos>0 then begin
414         StartChomped:=true;
415         System.Move(Bold[BestPos],Bold[0],MaxUTF8Length);
416       end;
417       if BestPos+MaxUTF8Length<l then
418         EndChomped:=true;
419     end;
420 
421     {$IFDEF VerboseTextToHTMLSnipped}
422     debugln(['TextToHTMLSnipped chomped Result="',Result,'"']);
423     {$ENDIF}
424 
425     // add bold tags
426     i:=1;
427     BoldP:=Bold;
428     IsBold:=false;
429     while i<=length(Result) do begin
430       if (BoldP^>0) and (not IsBold) then begin
431         // insert bold start tag
432         Insert('<b>',Result,i);
433         inc(i,length('<b>'));
434         IsBold:=true;
435       end else if (BoldP^=0) and IsBold then begin
436         // insert bold end tag
437         Insert('</b>',Result,i);
438         inc(i,length('</b>'));
439         IsBold:=false;
440       end;
441       if Result[i]='<' then begin
442         // replace <
443         ReplaceSubstring(Result,i,1,'&lt;');
444         inc(i,length('&lt;'));
445       end else if Result[i]='>' then begin
446         // replace >
447         ReplaceSubstring(Result,i,1,'&gt;');
448         inc(i,length('&gt;'));
449       end else
450         inc(i,UTF8CodepointSize(@Result[i]));
451       inc(BoldP);
452     end;
453     if IsBold then
454       Result+='</b>';
455     // prepend and append '...'
456     Result:=UTF8Trim(Result);
457     if StartChomped then
458       Result:='...'+Result;
459     if EndChomped then
460       Result+='...';
461 
462     {$IFDEF VerboseTextToHTMLSnipped}
463     debugln(['TextToHTMLSnipped END Result="',Result,'"']);
464     {$ENDIF}
465   finally
466     FreeMem(Bold);
467   end;
468 end;
469 
dbgsnull470 function dbgs(t: TWHTextNodeType): string;
471 begin
472   Result:='';
473   writestr(Result,t);
474 end;
475 
476 procedure Test_TextToHTMLSnipped;
477 
478   procedure t(Txt, LoCaseHighlights: string; MaxUTF8Length: integer; Expected: string);
479   var
480     LoCaseStringsToHighlight: TStringList;
481     s: String;
482   begin
483     LoCaseStringsToHighlight:=TStringList.Create;
484     LoCaseStringsToHighlight.Delimiter:=',';
485     LoCaseStringsToHighlight.StrictDelimiter:=true;
486     LoCaseStringsToHighlight.DelimitedText:=LoCaseHighlights;
487     s:=TextToHTMLSnipped(Txt,LoCaseStringsToHighlight,MaxUTF8Length);
488     if Expected<>s then begin
489       debugln(['Test_TextToHTMLSnipped Txt="'+Txt+'"']);
490       debugln(['Test_TextToHTMLSnipped LoCaseHighlights="'+LoCaseHighlights+'"']);
491       debugln(['Test_TextToHTMLSnipped MaxUTF8Length='+dbgs(MaxUTF8Length)]);
492       debugln(['Test_TextToHTMLSnipped Expected="'+Expected+'"']);
493       debugln(['Test_TextToHTMLSnipped Result  ="'+s+'"']);
494       raise Exception.Create('Test_TextToHTMLSnipped: Txt="'+Txt+'" LoCaseHighlights="'+LoCaseHighlights+'" Max='+dbgs(MaxUTF8Length)+' Expected="'+Expected+'" Result="'+s+'"');
495     end;
496     LoCaseStringsToHighlight.Free;
497   end;
498 
499 begin
500   t('','',0,'');
501   t('bla','bla',100,'<b>bla</b>');
502   t('bla foo bar','bla,bar',100,'<b>bla</b> foo <b>bar</b>');
503   t('bla foo bar','bla foo,bla,foo',100,'<b>bla foo</b> bar');
504   t('bla foo bar','foo',100,'bla <b>foo</b> bar');
505   t('bla foo bar','foo',7,'...a <b>foo</b> b...');
506   t('bl< foo >ar','foo',7,'...&lt; <b>foo</b> &gt;...');
507 end;
508 
SearchTextToPhrasesnull509 function SearchTextToPhrases(Txt: string): TStringList;
510 var
511   p: PChar;
512   StartPos: PChar;
513   Phrase: String;
514 begin
515   Result:=TStringList.Create;
516   Txt:=UTF8Trim(Txt);
517   if Txt='' then exit;
518   Result.Add(Txt);
519   p:=PChar(Txt);
520   Phrase:='';
521   while p^<>#0 do begin
522     if p^='"' then begin
523       // quote
524       inc(p);
525       StartPos:=p;
526       while not (p^ in [#0,'"']) do inc(p);
527       Phrase:=Phrase+SubString(StartPos,p-StartPos);
528       if p^<>#0 then inc(p);
529     end else if p^ in [' ',#9,#10,#13] then begin
530       // space => end phrase
531       inc(p);
532       if Phrase<>'' then begin
533         if Result.IndexOf(Phrase)<0 then
534           Result.Add(Phrase);
535         Phrase:='';
536       end;
537     end else begin
538       // word
539       StartPos:=p;
540       while not (p^ in [#0,'"',' ',#9,#10,#13]) do inc(p);
541       Phrase:=Phrase+SubString(StartPos,p-StartPos);
542     end;
543   end;
544   if (Phrase<>'') and (Result.IndexOf(Phrase)<0) then
545     Result.Add(Phrase);
546 end;
547 
CompareW2HPageForScorenull548 function CompareW2HPageForScore(Page1, Page2: Pointer): integer;
549 var
550   p1: TW2HelpPage absolute Page1;
551   p2: TW2HelpPage absolute Page2;
552 begin
553   if p1.Score>p2.Score then
554     exit(-1)
555   else if p1.Score<p2.Score then
556     exit(1)
557   else
558     exit(0);
559 end;
560 
561 { TWHScoring }
562 
TWHScoring.Equalsnull563 function TWHScoring.Equals(Obj: TObject): boolean;
564 var
565   c: TWHFitsCategory;
566   f: TWHFitsStringFlag;
567   Src: TWHScoring;
568 begin
569   if Self=Obj then exit(true);
570   Result:=false;
571   if Obj=nil then exit;
572   if Obj is TWHScoring then begin
573     Src:=TWHScoring(Obj);
574     for c:=Low(TWHFitsCategory) to high(TWHFitsCategory) do
575       for f:=Low(TWHFitsStringFlag) to high(TWHFitsStringFlag) do
576         if Phrases[c,f]<>Src.Phrases[c,f] then exit;
577   end;
578   Result:=true;
579 end;
580 
581 procedure TWHScoring.Assign(Source: TPersistent);
582 var
583   Src: TWHScoring;
584 begin
585   if Source is TWHScoring then begin
586     Src:=TWHScoring(Source);
587     Move(Src.Phrases,Phrases,SizeOf(Phrases));
588   end else
589     inherited Assign(Source);
590 end;
591 
592 { TWikiHelpQuery }
593 
594 constructor TWikiHelpQuery.Create(const SearchText: string;
595   const aLang: string; aScoring: TWHScoring; aFreeScoring: boolean);
596 var
597   i: Integer;
598 begin
599   Phrases:=SearchTextToPhrases(SearchText);
600   LoPhrases:=TStringList.Create;
601   for i:=0 to Phrases.Count-1 do
602     LoPhrases.Add(UTF8LowerCase(Phrases[i]));
603   Languages:=aLang;
604   Scoring:=aScoring;
605   FreeScoring:=aFreeScoring;
606 end;
607 
608 constructor TWikiHelpQuery.Clone(Query: TWikiHelpQuery);
609 begin
610   Phrases:=TStringList.Create;
611   LoPhrases:=TStringList.Create;
612   Scoring:=TWHScoring.Create;
613   FreeScoring:=true;
614   Assign(Query);
615 end;
616 
617 destructor TWikiHelpQuery.Destroy;
618 begin
619   FreeAndNil(Phrases);
620   FreeAndNil(LoPhrases);
621   if FreeScoring then
622     FreeAndNil(Scoring)
623   else
624     Scoring:=nil;
625   inherited Destroy;
626 end;
627 
TWikiHelpQuery.Equalsnull628 function TWikiHelpQuery.Equals(Obj: TObject): boolean;
629 var
630   Src: TWikiHelpQuery;
631 begin
632   if Obj=Self then exit(true);
633   Result:=false;
634   if Obj=nil then exit;
635   if not (Obj is TWikiHelpQuery) then exit;
636   Src:=TWikiHelpQuery(Obj);
637   if not Phrases.Equals(Src.Phrases) then exit;
638   // LoPhrases is computed from Phrases
639   if Languages<>Src.Languages then exit;
640   if not Scoring.Equals(Src.Scoring) then exit;
641   Result:=true;
642 end;
643 
644 procedure TWikiHelpQuery.Assign(Source: TPersistent);
645 var
646   Src: TWikiHelpQuery;
647 begin
648   if Source is TWikiHelpQuery then begin
649     Src:=TWikiHelpQuery(Source);
650     Phrases.Assign(Src.Phrases);
651     LoPhrases.Assign(Src.LoPhrases);
652     Languages:=Src.Languages;
653     Scoring.Assign(Src.Scoring);
654   end else
655     inherited Assign(Source);
656 end;
657 
658 { TW2HelpPage }
659 
660 destructor TW2HelpPage.Destroy;
661 begin
662   FreeAndNil(WHRoot);
663   inherited Destroy;
664 end;
665 
GetScorenull666 function TW2HelpPage.GetScore(Query: TWikiHelpQuery): TWHScore;
667 var
668   PhrasesFit: PWHPhrasePageFit;
669   Size: Integer;
670   i: Integer;
671   Fit: PWHPhrasePageFit;
672 begin
673   Result:=0;
674   if (Query=nil) or (Query.LoPhrases.Count=0) then exit;
675   if not WikiPageHasLanguage(WikiDocumentName,Query.Languages) then begin
676     //debugln(['TW2HelpPage.GetScore lang does not fit ',WikiDocumentName,' "',GetWikiPageLanguage(WikiDocumentName),'" ',Query.Languages]);
677     exit;
678   end;
679 
680   Size:=Query.LoPhrases.Count*SizeOf(TWHPhrasePageFit);
681   GetMem(PhrasesFit,Size);
682   try
683     FillByte(PhrasesFit^,Size,0);
684     GetFit(Query,PhrasesFit);
685     for i:=0 to Query.LoPhrases.Count-1 do begin
686       Fit:=@PhrasesFit[i];
687       Result+=Query.Scoring.Phrases[Fit^.Category,Fit^.Quality];
688     end;
689   finally
690     FreeMem(PhrasesFit);
691   end;
692 end;
693 
694 procedure TW2HelpPage.GetFit(Query: TWikiHelpQuery; Fit: PWHPhrasePageFit);
695 
696   procedure CheckTxt(s: string; Category: TWHFitsCategory);
697   var
698     i: Integer;
699     Phrase: String;
700     FitsWholeWord: boolean;
701     FitsCount: SizeInt;
702     Quality: TWHFitsStringFlag;
703   begin
704     s:=UTF8LowerCase(s);
705     for i:=0 to Query.LoPhrases.Count-1 do begin
706       if Fit[i].Category>Category then continue;
707       if (Fit[i].Category=Category) and (Fit[i].Quality>=whfsWholeWord) then
708         continue;
709       Phrase:=Query.LoPhrases[i];
710       HasTxtWord(PChar(Phrase),PChar(s),FitsWholeWord,FitsCount);
711       if FitsCount<=0 then continue;
712       if FitsWholeWord then
713         Quality:=whfsWholeWord
714       else
715         Quality:=whfsPart;
716       Fit[i].Category:=Category;
717       Fit[i].Quality:=Quality;
718     end;
719   end;
720 
721   procedure Traverse(Node: TWHTextNode);
722   var
723     i: Integer;
724     Category: TWHFitsCategory;
725   begin
726     if Node=nil then exit;
727     case Node.Typ of
728     whnTxt: Category:=whfcText;
729     whnHeader: Category:=whfcHeader;
730     whnLink: Category:=whfcLink;
731     else exit;
732     end;
733     CheckTxt(Node.Txt,Category);
734     for i:=0 to Node.Count-1 do
735       Traverse(Node[i]);
736   end;
737 
738 begin
739   CheckTxt(WikiPage.Title,whfcPageTitle);
740   Traverse(WHRoot);
741 end;
742 
GetNodeHighestScorenull743 function TW2HelpPage.GetNodeHighestScore(Query: TWikiHelpQuery): TWHTextNode;
744 
GetNodeScorenull745   function GetNodeScore(Node: TWHTextNode): TWHScore;
746   var
747     s: String;
748     i: Integer;
749     Phrase: String;
750     FitsWholeWord: boolean;
751     FitsCount: SizeInt;
752     Quality: TWHFitsStringFlag;
753     Category: TWHFitsCategory;
754   begin
755     Result:=0;
756     case Node.Typ of
757     whnTxt: Category:=whfcText;
758     whnHeader: Category:=whfcHeader;
759     whnLink: Category:=whfcLink;
760     else exit;
761     end;
762     s:=UTF8LowerCase(Node.Txt);
763     for i:=0 to Query.LoPhrases.Count-1 do begin
764       Phrase:=Query.LoPhrases[i];
765       HasTxtWord(PChar(Phrase),PChar(s),FitsWholeWord,FitsCount);
766       if FitsCount<=0 then continue;
767       if FitsWholeWord then
768         Quality:=whfsWholeWord
769       else
770         Quality:=whfsPart;
771       Result+=Query.Scoring.Phrases[Category,Quality];
772     end;
773   end;
774 
775   procedure Traverse(Node: TWHTextNode;
776     var BestNode: TWHTextNode; var BestScore: TWHScore);
777   var
778     i: Integer;
779     NodeScore: TWHScore;
780   begin
781     if Node=nil then exit;
782     NodeScore:=GetNodeScore(Node);
783     if NodeScore>BestScore then begin
784       BestNode:=Node;
785       BestScore:=NodeScore;
786     end;
787     for i:=0 to Node.Count-1 do
788       Traverse(Node[i],BestNode,BestScore);
789   end;
790 
791 var
792   NodeScore: TWHScore;
793 begin
794   Result:=nil;
795   NodeScore:=0;
796   Traverse(WHRoot,Result,NodeScore);
797 end;
798 
799 { TWHTextNode }
800 
TWHTextNode.GetChildNodesnull801 function TWHTextNode.GetChildNodes(Index: integer): TWHTextNode;
802 begin
803   Result:=TWHTextNode(FChildNodes[Index]);
804 end;
805 
806 procedure TWHTextNode.RemoveChild(Child: TWHTextNode);
807 var
808   i: Integer;
809 begin
810   FChildNodes.Delete(Child.IndexInParent);
811   for i:=Child.IndexInParent to FChildNodes.Count-1 do
812     ChildNodes[i].fIndexInParent:=i;
813 end;
814 
815 constructor TWHTextNode.Create(aTyp: TWHTextNodeType; aParent: TWHTextNode);
816 begin
817   Typ:=aTyp;
818   if aParent<>nil then
819     aParent.Add(Self)
820   else
821     fIndexInParent:=-1;
822 end;
823 
824 destructor TWHTextNode.Destroy;
825 begin
826   Clear;
827   if Parent<>nil then
828     Parent.RemoveChild(Self);
829   FreeAndNil(FChildNodes);
830   inherited Destroy;
831 end;
832 
833 procedure TWHTextNode.Clear;
834 var
835   i: Integer;
836   Child: TWHTextNode;
837 begin
838   Txt:='';
839   if FChildNodes<>nil then begin
840     for i:=FChildNodes.Count-1 downto 0 do begin
841       Child:=TWHTextNode(FChildNodes[i]);
842       Child.fParent:=nil;
843       Child.Free;
844     end;
845     FChildNodes.Clear;
846   end;
847 end;
848 
849 procedure TWHTextNode.Add(Node: TWHTextNode);
850 begin
851   if Node.Parent=Self then exit;
852   if Node.Parent<>nil then
853     Node.Parent.RemoveChild(Node);
854   if FChildNodes=nil then
855     FChildNodes:=TFPList.Create;
856   Node.fIndexInParent:=Count;
857   FChildNodes.Add(Node);
858   Node.fParent:=Self;
859 end;
860 
Countnull861 function TWHTextNode.Count: integer;
862 begin
863   if FChildNodes<>nil then
864     Result:=FChildNodes.Count
865   else
866     Result:=0;
867 end;
868 
FirstChildnull869 function TWHTextNode.FirstChild: TWHTextNode;
870 begin
871   if Count>0 then
872     Result:=ChildNodes[0]
873   else
874     Result:=nil;
875 end;
876 
LastChildnull877 function TWHTextNode.LastChild: TWHTextNode;
878 var
879   c: Integer;
880 begin
881   c:=Count;
882   if c>0 then
883     Result:=ChildNodes[c-1]
884   else
885     Result:=nil;
886 end;
887 
NextSiblingnull888 function TWHTextNode.NextSibling: TWHTextNode;
889 begin
890   if (Parent=nil) or (IndexInParent+2>=Parent.Count) then exit(nil);
891   Result:=Parent[IndexInParent+1];
892 end;
893 
TWHTextNode.PreviousSiblingnull894 function TWHTextNode.PreviousSibling: TWHTextNode;
895 begin
896   if (Parent=nil) or (IndexInParent=0) then exit(nil);
897   Result:=Parent[IndexInParent-1];
898 end;
899 
Nextnull900 function TWHTextNode.Next: TWHTextNode;
901 begin
902   Result:=FirstChild;
903   if Result=nil then
904     Result:=NextSkipChildren;
905 end;
906 
NextSkipChildrennull907 function TWHTextNode.NextSkipChildren: TWHTextNode;
908 var
909   Node: TWHTextNode;
910 begin
911   Result:=Self;
912   repeat
913     Node:=Result.NextSibling;
914     if Node<>nil then exit(Node);
915     Result:=Result.Parent;
916   until Result=nil;
917   Result:=nil;
918 end;
919 
TWHTextNode.Previousnull920 function TWHTextNode.Previous: TWHTextNode;
921 var
922   Node: TWHTextNode;
923 begin
924   Result:=PreviousSibling;
925   if Result=nil then
926     exit(Parent);
927   Node:=Result.LastLeaf;
928   if Node<>nil then
929     Result:=Node;
930 end;
931 
LastLeafnull932 function TWHTextNode.LastLeaf: TWHTextNode;
933 var
934   Node: TWHTextNode;
935 begin
936   Result:=LastChild;
937   if Result=nil then exit;
938   repeat
939     Node:=Result.LastChild;
940     if Node=nil then exit;
941     Result:=Node;
942   until false;
943 end;
944 
TWHTextNode.Levelnull945 function TWHTextNode.Level: integer;
946 var
947   Node: TWHTextNode;
948 begin
949   Result:=0;
950   Node:=Parent;
951   while Node<>nil do begin
952     inc(Result);
953     Node:=Node.Parent;
954   end;
955 end;
956 
CalcMemSizenull957 function TWHTextNode.CalcMemSize: SizeInt;
958 var
959   i: Integer;
960 begin
961   Result:=InstanceSize+SizeInt(MemSizeString(Txt));
962   if FChildNodes<>nil then begin
963     inc(Result,FChildNodes.InstanceSize+FChildNodes.Count*SizeOf(Pointer));
964     for i:=0 to Count-1 do
965       inc(Result,ChildNodes[i].CalcMemSize);
966   end;
967 end;
968 
969 { TWiki2HelpConverter }
970 
971 procedure TWiki2HelpConverter.SavePage(Page: TW2XHTMLPage);
972 begin
973   // do not save
974 end;
975 
TWiki2HelpConverter.FindImagenull976 function TWiki2HelpConverter.FindImage(const ImgFilename: string): string;
977 begin
978   //Log('AvailableImages='+dbgs(AvailableImages.Tree.Count)+' Img="'+ImgFilename+'"');
979   if AvailableImages.Contains(ImgFilename) then
980     Result:=ImgFilename
981   else
982     Result:='';
983 end;
984 
985 procedure TWiki2HelpConverter.ExtractTextToken(Token: TWPToken);
986 var
987   Page: TW2HelpPage;
988   W: TWikiPage;
989   Txt: String;
990   CurNode: TWHTextNode;
991   StartP, EndP: PChar;
992   NodeType: TWHTextNodeType;
993   TextToken: TWPTextToken;
994   LinkToken: TWPLinkToken;
995   Caption: String;
996 begin
997   Page:=TW2HelpPage(Token.UserData);
998   W:=Page.WikiPage;
999   CurNode:=Page.CurWHNode;
1000   if CurNode=nil then CurNode:=Page.WHRoot;
1001   case Token.Token of
1002   wptText:
1003     if Token is TWPTextToken then begin
1004       TextToken:=TWPTextToken(Token);
1005       StartP:=PChar(W.Src)+TextToken.StartPos-1;
1006       EndP:=PChar(W.Src)+TextToken.EndPos-1;
1007       while (StartP<EndP) and (StartP^ in [#1..#31,' ']) do inc(StartP);
1008       if StartP<EndP then begin
1009         // not only space
1010         Txt:=copy(W.Src,TextToken.StartPos,TextToken.EndPos-TextToken.StartPos);
1011         CurNode.Txt:=CurNode.Txt+Txt;
1012         exit;
1013       end;
1014     end;
1015 
1016   wptSection,wptHeader:
1017     if Token.Range=wprOpen then begin
1018       if Token.Token=wptHeader then
1019         NodeType:=whnHeader
1020       else
1021         NodeType:=whnTxt;
1022       Page.CurWHNode:=TWHTextNode.Create(NodeType,CurNode);
1023       exit;
1024     end else if Token.Range=wprClose then begin
1025       Page.CurWHNode:=CurNode.Parent;
1026       exit;
1027     end;
1028 
1029   wptInternLink, wptExternLink:
1030     if Token is TWPLinkToken then begin
1031       LinkToken:=TWPLinkToken(Token);
1032       Caption:=copy(W.Src,LinkToken.CaptionStartPos,
1033                     LinkToken.CaptionEndPos-LinkToken.CaptionStartPos);
1034       if Caption<>'' then begin
1035         CurNode:=TWHTextNode.Create(whnLink,CurNode);
1036         CurNode.Txt:=Caption;
1037         // do not exit, append a space to the current node
1038       end;
1039     end;
1040   end;
1041   // add a space to separate words
1042   if (CurNode.Txt='') or (not (CurNode.Txt[length(CurNode.Txt)] in [#1..#31,' ']))
1043   then
1044     CurNode.Txt:=CurNode.Txt+' ';
1045 end;
1046 
1047 procedure TWiki2HelpConverter.ParallelExtractPageText(Index: PtrInt;
1048   Data: Pointer; Item: TMultiThreadProcItem);
1049 var
1050   StartIndex, EndIndex: Integer;
1051   i: Integer;
1052 begin
1053   StartIndex:=Index*PagesPerThread;
1054   EndIndex:=Min(StartIndex+PagesPerThread-1,Count-1);
1055   if Help.Aborting then exit;
1056   for i:=StartIndex to EndIndex do
1057     ExtractPageText(TW2HelpPage(Pages[i]));
1058   Help.EnterCritSect;
1059   try
1060     inc(Help.fProgressCount,PagesPerThread);
1061   finally
1062     Help.LeaveCritSect;
1063   end;
1064 end;
1065 
1066 procedure TWiki2HelpConverter.ParallelLoadPage(Index: PtrInt; Data: Pointer;
1067   Item: TMultiThreadProcItem);
1068 var
1069   Page: TW2HelpPage;
1070   StartIndex, EndIndex: Integer;
1071   i: Integer;
1072 begin
1073   StartIndex:=Index*PagesPerThread;
1074   EndIndex:=Min(StartIndex+PagesPerThread-1,Count-1);
1075   for i:=StartIndex to EndIndex do begin
1076     if Help.Aborting then exit;
1077     Page:=TW2HelpPage(Pages[i]);
1078     try
1079       Page.ParseWikiDoc(false);
1080     except
1081       on E: Exception do begin
1082         Log('ERROR: '+Page.WikiFilename+': '+E.Message);
1083       end;
1084     end;
1085   end;
1086   Help.EnterCritSect;
1087   try
1088     inc(Help.fProgressCount,PagesPerThread);
1089   finally
1090     Help.LeaveCritSect;
1091   end;
1092 end;
1093 
1094 procedure TWiki2HelpConverter.ParallelComputeScores(Index: PtrInt;
1095   Data: Pointer; Item: TMultiThreadProcItem);
1096 var
1097   StartIndex, EndIndex: Integer;
1098   i: Integer;
1099   Page: TW2HelpPage;
1100 begin
1101   StartIndex:=Index*PagesPerThread;
1102   EndIndex:=Min(StartIndex+PagesPerThread-1,Count-1);
1103   if Help.Aborting then exit;
1104   for i:=StartIndex to EndIndex do begin
1105     Page:=TW2HelpPage(Pages[i]);
1106     Page.Score:=Page.GetScore(FCurQuery);
1107   end;
1108   Help.EnterCritSect;
1109   try
1110     inc(Help.fProgressCount,PagesPerThread);
1111   finally
1112     Help.LeaveCritSect;
1113   end;
1114 end;
1115 
1116 procedure TWiki2HelpConverter.ExtractPageText(Page: TW2HelpPage);
1117 begin
1118   FreeAndNil(Page.WHRoot);
1119   Page.WHRoot:=TWHTextNode.Create(whnTxt,nil);
1120   try
1121     Page.CurWHNode:=Page.WHRoot;
1122     if Page.WikiPage<>nil then
1123       Page.WikiPage.Parse(@ExtractTextToken,Page);
1124   finally
1125     Page.CurWHNode:=nil;
1126   end;
1127 end;
1128 
1129 procedure TWiki2HelpConverter.ConvertInit;
1130 var
1131   FileInfo: TSearchRec;
1132 begin
1133   inherited ConvertInit;
1134 
1135   //Log('ImagesDir='+ImagesDir);
1136   AvailableImages.Clear;
1137   if FindFirstUTF8(ImagesDir+AllFilesMask,faAnyFile,FileInfo)=0 then begin
1138     repeat
1139       if (FileInfo.Name='') or (FileInfo.Name='.') or (FileInfo.Name='..') then
1140         continue;
1141       AvailableImages[FileInfo.Name]:='1';
1142     until FindNextUTF8(FileInfo)<>0;
1143   end;
1144   FindCloseUTF8(FileInfo);
1145   Log('Found '+IntToStr(AvailableImages.Tree.Count)+' wiki images in "'+ImagesDir+'"');
1146 end;
1147 
1148 procedure TWiki2HelpConverter.ExtractAllTexts;
1149 begin
1150   Help.EnterCritSect;
1151   try
1152     Help.fProgressStep:=whpsWikiExtractPageTexts;
1153     Help.fProgressCount:=0;
1154     Help.fProgressMax:=Count;
1155   finally
1156     Help.LeaveCritSect;
1157   end;
1158   ProcThreadPool.DoParallel(@ParallelExtractPageText,0,(Count-1) div PagesPerThread);
1159 end;
1160 
1161 procedure TWiki2HelpConverter.Search(Query: TWikiHelpQuery;
1162   var FoundPages: TFPList);
1163 var
1164   i: Integer;
1165   Page: TW2HelpPage;
1166 begin
1167   Help.EnterCritSect;
1168   try
1169     Help.fProgressStep:=whpsWikiSearch;
1170     Help.fProgressCount:=0;
1171     Help.fProgressMax:=Count;
1172   finally
1173     Help.LeaveCritSect;
1174   end;
1175   FCurQuery:=Query;
1176   if FoundPages=nil then
1177     FoundPages:=TFPList.Create;
1178   ProcThreadPool.DoParallel(@ParallelComputeScores,0,(Count-1) div PagesPerThread);
1179   for i:=0 to Count-1 do begin
1180     Page:=TW2HelpPage(Pages[i]);
1181     if Page.Score<=0 then continue;
1182     FoundPages.Add(Page);
1183   end;
1184   FoundPages.Sort(@CompareW2HPageForScore);
1185 end;
1186 
1187 procedure TWiki2HelpConverter.SavePageAsHTMLToStream(Page: TW2HelpPage;
1188   aStream: TStream);
1189 begin
1190   ConvertPage(Page);
1191   SavePageToStream(Page,aStream);
1192   Page.ClearConversion;
1193 end;
1194 
TWiki2HelpConverter.PageToFilenamenull1195 function TWiki2HelpConverter.PageToFilename(Page: string; IsInternalLink,
1196   Full: boolean): string;
1197 begin
1198   Result:=WikiPageToFilename(Page,IsInternalLink,false);
1199 end;
1200 
TWiki2HelpConverter.PageToFilenamenull1201 function TWiki2HelpConverter.PageToFilename(Page: TW2XHTMLPage; Full: boolean
1202   ): string;
1203 begin
1204   Result:=Page.WikiDocumentName;
1205 end;
1206 
1207 procedure TWiki2HelpConverter.LoadPages;
1208 begin
1209   Help.EnterCritSect;
1210   try
1211     Help.fProgressStep:=whpsWikiLoadPages;
1212     Help.fProgressCount:=0;
1213     Help.fProgressMax:=Count;
1214   finally
1215     Help.LeaveCritSect;
1216   end;
1217   ProcThreadPool.DoParallel(@ParallelLoadPage,0,(Count-1) div PagesPerThread);
1218 end;
1219 
1220 constructor TWiki2HelpConverter.Create;
1221 begin
1222   inherited Create;
1223   AvailableImages:=TFilenameToStringTree.Create(true);
1224   fPageClass:=TW2HelpPage;
1225   PagesPerThread:=100;
1226 end;
1227 
1228 procedure TWiki2HelpConverter.Clear;
1229 begin
1230   inherited Clear;
1231   AvailableImages.Clear;
1232 end;
1233 
1234 destructor TWiki2HelpConverter.Destroy;
1235 begin
1236   inherited Destroy;
1237   FreeAndNil(AvailableImages);
1238 end;
1239 
1240 { TWikiHelpThread }
1241 
1242 procedure TWikiHelpThread.Execute;
1243 var
1244   FileInfo: TSearchRec;
1245   Files: TStringList;
1246   i: Integer;
1247   Filename: String;
1248   StartTime: TDateTime;
1249   EndTime: TDateTime;
1250 begin
1251   {$IF FPC_FULLVERSION<30000}
1252   CurrentThread:=Self;
1253   {$ENDIF}
1254   try
1255     Files:=nil;
1256     try
1257       StartTime:=Now;
1258       Log('TWikiHelpThread.Execute START XMLDirectory="'+Help.XMLDirectory+'"');
1259 
1260       Files:=TStringList.Create;
1261       try
1262         Help.Converter.OnLog:=@ConverterLog;
1263         // get all wiki xml files
1264         if FindFirstUTF8(Help.XMLDirectory+AllFilesMask,faAnyFile,FileInfo)=0 then begin
1265           repeat
1266             if CompareFileExt(FileInfo.Name,'.xml',false)<>0 then continue;
1267             Files.Add(FileInfo.Name);
1268           until FindNextUTF8(FileInfo)<>0;
1269         end;
1270         FindCloseUTF8(FileInfo);
1271 
1272         // add file names to converter
1273         for i:=0 to Files.Count-1 do begin
1274           Filename:=Help.XMLDirectory+Files[i];
1275           Help.Converter.AddWikiPage(Filename,false);
1276         end;
1277         if Help.Aborting then exit;
1278 
1279         // load xml files
1280         Help.Converter.LoadPages;
1281         if Help.Aborting then exit;
1282 
1283         // extract texts
1284         Help.Converter.ConvertInit;
1285         if Help.Aborting then exit;
1286         Help.Converter.ExtractAllTexts;
1287         if Help.Aborting then exit;
1288 
1289         fCompleted:=true;
1290         EndTime:=Now;
1291         Help.fWikiLoadTimeMSec:=round(Abs(EndTime-StartTime)*86400000);
1292         Log('TWikiHelpThread.Execute SCAN complete XMLDirectory="'+Help.XMLDirectory+'" '+dbgs(Help.fWikiLoadTimeMSec)+'msec');
1293       finally
1294         Files.Free;
1295         Help.Converter.OnLog:=nil;
1296       end;
1297     except
1298       on E: Exception do begin
1299         Log('TWikiHelpThread.Execute error: '+E.Message);
1300       end;
1301     end;
1302   finally
1303     Scanned;
1304     {$IF FPC_FULLVERSION<30000}
1305     CurrentThread:=nil;
1306     {$ENDIF}
1307   end;
1308 end;
1309 
1310 procedure TWikiHelpThread.MainThreadLog;
1311 // called in main thread
1312 begin
1313   DebugLn(fLogMsg);
1314 end;
1315 
1316 procedure TWikiHelpThread.Log(Msg: string);
1317 begin
1318   fLogMsg:=Msg;
1319   CurrentThread.Synchronize(@MainThreadLog);
1320 end;
1321 
1322 procedure TWikiHelpThread.ConverterLog(Msg: string);
1323 begin
1324   {$IFDEF VerboseWikiHelp}
1325   Log(Msg);
1326   {$ENDIF}
1327 end;
1328 
1329 procedure TWikiHelpThread.Scanned;
1330 // called in this thread
1331 begin
1332   Help.EnterCritSect;
1333   try
1334     Help.FScanThread:=nil;
1335     if fCompleted then
1336       Help.fProgressStep:=whpsWikiLoadComplete
1337     else
1338       Help.fProgressStep:=whpsNone;
1339   finally
1340     Help.LeaveCritSect;
1341   end;
1342   Synchronize(@Help.Scanned);
1343 end;
1344 
1345 { TWikiHelp }
1346 
1347 procedure TWikiHelp.SetImagesDirectory(AValue: string);
1348 var
1349   NewDir: String;
1350 begin
1351   NewDir:=TrimAndExpandDirectory(TrimFilename(AValue));
1352   if Converter.ImagesDir=NewDir then Exit;
1353   AbortLoading(true);
1354   Converter.ImagesDir:=NewDir;
1355 end;
1356 
1357 procedure TWikiHelp.SetMaxResults(AValue: integer);
1358 begin
1359   if FMaxResults=AValue then Exit;
1360   FMaxResults:=AValue;
1361 end;
1362 
1363 procedure TWikiHelp.SetQuery(AValue: TWikiHelpQuery);
1364 begin
1365   if FQuery=AValue then Exit;
1366   FQuery:=AValue;
1367 end;
1368 
GetImagesDirectorynull1369 function TWikiHelp.GetImagesDirectory: string;
1370 begin
1371   Result:=Converter.ImagesDir;
1372 end;
1373 
1374 procedure TWikiHelp.SetXMLDirectory(AValue: string);
1375 var
1376   NewDir: String;
1377 begin
1378   NewDir:=TrimAndExpandDirectory(TrimFilename(AValue));
1379   if FXMLDirectory=NewDir then Exit;
1380   AbortLoading(true);
1381   FXMLDirectory:=NewDir;
1382 end;
1383 
1384 procedure TWikiHelp.EnterCritSect;
1385 begin
1386   EnterCriticalsection(FCritSec);
1387 end;
1388 
1389 procedure TWikiHelp.LeaveCritSect;
1390 begin
1391   LeaveCriticalsection(FCritSec);
1392 end;
1393 
1394 procedure TWikiHelp.Scanned;
1395 begin
1396   if Assigned(OnScanned) then
1397     OnScanned(Self);
1398   DoSearch;
1399 end;
1400 
1401 procedure TWikiHelp.DoSearch;
1402 var
1403   StartTime: TDateTime;
1404   EndTime: TDateTime;
1405   FoundPages: TFPList;
1406   i: Integer;
1407   Page: TW2HelpPage;
1408   Node: TWHTextNode;
1409   s: String;
1410   HTML: String;
1411 begin
1412   FResultsHTML:='';
1413   if (Query=nil) or (Query.Phrases.Count=0) then begin
1414     EnterCritSect;
1415     try
1416       fProgressStep:=whpsWikiLoadComplete;
1417     finally
1418       LeaveCritSect;
1419     end;
1420   end else begin
1421     StartTime:=Now;
1422     //debugln(['TWikiHelp.DoSearch START Search=',Trim(Query.Phrases.Text),' Lang="',Query.Languages,'"']);
1423     FoundPages:=nil;
1424     Converter.Search(Query,FoundPages);
1425     HTML:='<html>'+LineEnding
1426          +'<head>'+LineEnding
1427          +' <meta content="text/html; charset=utf-8" http-equiv="Content-Type">'+LineEnding;
1428     if ResultsCSSURL<>'' then
1429     HTML+=' <link href="'+ResultsCSSURL+'" type="text/css" rel="stylesheet">'+LineEnding;
1430     HTML+='</head>'+LineEnding
1431          +'<body>'+LineEnding;
1432     for i:=0 to Min(FoundPages.Count-1,MaxResults) do begin
1433       Page:=TW2HelpPage(FoundPages[i]);
1434       //debugln(['TWikiHelp.DoSearch ',Page.WikiDocumentName,' ',Page.WikiLanguage,' ',WikiPageHasLanguage(Page.WikiDocumentName,Query.Languages)]);
1435       Node:=Page.GetNodeHighestScore(Query);
1436       s:='<div class="wikiSearchResultItem">'+FoundNodeToHTMLSnippet(Page,Node,Query)+'</div>'+LineEnding;
1437       //debugln(['TWikiHelp.TestSearch Score=',Page.Score,' HTML="',s,'"']);
1438       HTML+=s;
1439     end;
1440     HTML+='</body>'+LineEnding
1441          +'</html>'+LineEnding;
1442     FResultsHTML:=HTML;
1443     FoundPages.Free;
1444     EndTime:=Now;
1445     fWikiSearchTimeMSec:=round(Abs(EndTime-StartTime)*86400000);
1446     EnterCritSect;
1447     try
1448       fProgressStep:=whpsWikiSearchComplete;
1449     finally
1450       LeaveCritSect;
1451     end;
1452     //debugln(['TWikiHelp.DoSearch END Search="',Trim(Query.Phrases.Text),'" ',dbgs(fWikiSearchTimeMSec)+'msec']);
1453   end;
1454   if Assigned(OnSearched) then
1455     OnSearched(Self);
1456 end;
1457 
FoundNodeToHTMLSnippetnull1458 function TWikiHelp.FoundNodeToHTMLSnippet(aPage: TW2HelpPage;
1459   aNode: TWHTextNode; aQuery: TWikiHelpQuery): string;
1460 var
1461   HeaderNode: TWHTextNode;
1462 begin
1463   // link to the page
1464   Result:='<a href="'+StrToXMLValue(aPage.WikiDocumentName)+'"'
1465     +' class="wikiLinkPage"'
1466     +' alt="'+StrToXMLValue(aPage.WikiPage.Title)+'"'
1467     +'>'
1468     +TextToHTMLSnipped(aPage.WikiPage.Title,aQuery.LoPhrases,200)+'</a><br>'+LineEnding;
1469   if aNode=nil then begin
1470     // get the first node with some text
1471     aNode:=aPage.WHRoot;
1472     while (aNode<>nil) and (UTF8Trim(aNode.Txt)='') do
1473       aNode:=aNode.Next;
1474   end;
1475   if aNode<>nil then begin
1476     //debugln(['TWikiHelp.FoundNodeToHTMLSnippet ',dbgs(aNode.Typ),' Txt="'+dbgstr(ANode.Txt)+'"']);
1477     HeaderNode:=aNode;
1478     while (HeaderNode<>nil) and (HeaderNode.Typ<>whnHeader) do
1479       HeaderNode:=HeaderNode.Previous;
1480     if aNode=HeaderNode then begin
1481       // get the first node after the header with some text
1482       repeat
1483         aNode:=aNode.Next;
1484       until (aNode=nil) or (UTF8Trim(aNode.Txt)<>'');
1485     end;
1486     if HeaderNode<>nil then begin
1487       // add a direct link to the sub topic
1488       Result+='Topic <a href="'+StrToXMLValue(aPage.WikiDocumentName+'#'+WikiHeaderToLink(HeaderNode.Txt))+'"'
1489         +' class="wikiLinkTopic"'
1490         +' alt="'+StrToXMLValue(HeaderNode.Txt)+'"'
1491         +'>'
1492         +TextToHTMLSnipped(HeaderNode.Txt,aQuery.LoPhrases,200)+'</a>: ';
1493     end;
1494     if aNode<>nil then begin
1495       // add text
1496       Result+=TextToHTMLSnipped(aNode.Txt,aQuery.LoPhrases,200);
1497     end;
1498   end;
1499 end;
1500 
1501 constructor TWikiHelp.Create(AOwner: TComponent);
1502 begin
1503   InitCriticalSection(FCritSec);
1504   inherited Create(AOwner);
1505   FConverter:=TWiki2HelpConverter.Create;
1506   FConverter.CodeTags:=WikiCreateCommonCodeTagList(true);
1507   FConverter.FHelp:=Self;
1508   FDefaultScoring:=TWHScoring.Create;
1509   with FDefaultScoring do begin
1510     Phrases[whfcPageTitle,whfsWholeWord]:=128;
1511     Phrases[whfcPageTitle,whfsPart]:=64;
1512     Phrases[whfcHeader,whfsWholeWord]:=32;
1513     Phrases[whfcHeader,whfsPart]:=16;
1514     Phrases[whfcText,whfsWholeWord]:=8;
1515     Phrases[whfcText,whfsPart]:=4;
1516     Phrases[whfcLink,whfsWholeWord]:=2;
1517     Phrases[whfcLink,whfsPart]:=1;
1518   end;
1519   FMaxResults:=10;
1520   fProgressStep:=whpsNone;
1521 end;
1522 
1523 destructor TWikiHelp.Destroy;
1524 begin
1525   AbortLoading(true);
1526   FConverter.CodeTags.Free;
1527   FreeAndNil(FConverter);
1528   FreeAndNil(FDefaultScoring);
1529   FreeAndNil(FQuery);
1530   inherited Destroy;
1531   DoneCriticalsection(FCritSec);
1532 end;
1533 
1534 procedure TWikiHelp.StartLoading;
1535 begin
1536   if not DirPathExists(XMLDirectory) then
1537     raise Exception.Create('TWikiHelp.StartScan XMLDirectory not found: '+XMLDirectory);
1538   if not DirPathExists(ImagesDirectory) then
1539     raise Exception.Create('TWikiHelp.StartScan ImagesDirectory not found: '+ImagesDirectory);
1540   EnterCritSect;
1541   try
1542     if fProgressStep>whpsNone then exit;
1543     fProgressStep:=whpsWikiScanDir;
1544     fWikiLoadTimeMSec:=0;
1545     fProgressCount:=0;
1546     fProgressMax:=0;
1547     FScanThread:=TWikiHelpThread.Create(true);
1548     FScanThread.FreeOnTerminate:=true;
1549     FScanThread.Help:=Self;
1550     {$IF FPC_FULLVERSION<=20403}
1551     FScanThread.Resume;
1552     {$ELSE}
1553     FScanThread.Start;
1554     {$ENDIF}
1555   finally
1556     LeaveCritSect;
1557   end;
1558 end;
1559 
LoadingContentnull1560 function TWikiHelp.LoadingContent: boolean;
1561 begin
1562   Result:=(fProgressStep>whpsNone) and (fProgressStep<whpsWikiLoadComplete);
1563 end;
1564 
1565 procedure TWikiHelp.AbortLoading(Wait: boolean);
1566 begin
1567   EnterCritSect;
1568   try
1569     if not LoadingContent then exit;
1570     FAborting:=true;
1571   finally
1572     LeaveCritSect;
1573   end;
1574   if not Wait then exit;
1575   while LoadingContent do
1576     Sleep(10);
1577   EnterCritSect;
1578   try
1579     FAborting:=false;
1580   finally
1581     LeaveCritSect;
1582   end;
1583 end;
1584 
LoadCompletenull1585 function TWikiHelp.LoadComplete: boolean;
1586 begin
1587   Result:=(fProgressStep>=whpsWikiLoadComplete);
1588 end;
1589 
CollectAllLanguagesnull1590 function TWikiHelp.CollectAllLanguages(AsCaption: boolean): TStrings;
1591 
1592   procedure Add(Code: string);
1593   begin
1594     if AsCaption then
1595       Code:=WikiLangCodeToCaption(Code);
1596     CollectAllLanguages.Add(Code);
1597   end;
1598 
1599 var
1600   Codes: String;
1601   p: SizeInt;
1602   Code: String;
1603 begin
1604   Result:=TStringList.Create;
1605   Add('');
1606   if LoadComplete then begin
1607     Codes:=Converter.CollectAllLangCodes(';')+';';
1608     repeat
1609       p:=Pos(';',Codes);
1610       if p<1 then p:=length(Codes)+1;
1611       Code:=LeftStr(Codes,p-1);
1612       Delete(Codes,1,p);
1613       if Code<>'' then
1614         Add(Code);
1615     until Codes='';
1616   end;
1617 end;
1618 
GetProgressCaptionnull1619 function TWikiHelp.GetProgressCaption: string;
1620 begin
1621   EnterCritSect;
1622   try
1623     case fProgressStep of
1624     whpsNone: Result:='Wiki not yet loaded.';
1625     whpsWikiScanDir: Result:='Scanning Wiki directory ...';
1626     whpsWikiLoadPages: Result:='Loaded '+IntToStr(fProgressCount)+' of '+IntToStr(fProgressMax)+' Wiki pages.';
1627     whpsWikiExtractPageTexts: Result:='Read '+IntToStr(fProgressCount)+' of '+IntToStr(fProgressMax)+' Wiki pages.';
1628     whpsWikiLoadComplete: Result:='Loaded '+IntToStr(Converter.Count)+' Wiki pages in '+IntToStr(fWikiLoadTimeMSec)+'msec.';
1629     whpsWikiSearch: Result:='Searched '+IntToStr(fProgressCount)+' of '+IntToStr(fProgressMax)+' Wiki pages.';
1630     whpsWikiSearchComplete: Result:='Searched '+IntToStr(Converter.Count)+' Wiki pages in '+IntToStr(fWikiSearchTimeMSec)+'msec.';
1631     else Result:='unknown step: '+IntToStr(ord(fProgressStep));
1632     end;
1633   finally
1634     LeaveCritSect;
1635   end;
1636 end;
1637 
Busynull1638 function TWikiHelp.Busy: boolean;
1639 begin
1640   Result:=not (fProgressStep in [whpsWikiLoadComplete,whpsWikiSearchComplete]);
1641 end;
1642 
1643 procedure TWikiHelp.Search(const Term: string; const Languages: string;
1644   Scoring: TWHScoring; FreeScoring: boolean);
1645 var
1646   aQuery: TWikiHelpQuery;
1647 begin
1648   if Scoring=nil then Scoring:=DefaultScoring;
1649   aQuery:=TWikiHelpQuery.Create(Term,Languages,
1650     Scoring,FreeScoring and (Scoring<>DefaultScoring));
1651   try
1652     Search(aQuery);
1653   finally
1654     aQuery.Free;
1655   end;
1656 end;
1657 
1658 procedure TWikiHelp.Search(aQuery: TWikiHelpQuery);
1659 begin
1660   EnterCritSect;
1661   try
1662     if aQuery=nil then exit;
1663     if FQuery=nil then
1664       // first query
1665       FQuery:=TWikiHelpQuery.Clone(aQuery)
1666     else if FQuery.Equals(aQuery) then begin
1667       // same query
1668       //debugln(['TWikiHelp.Search same query ',FQuery=aQuery,' ',FQuery.Scoring.Equals(aQuery.Scoring),' fquery.scoring=',FQuery.Scoring.Phrases[whfcPageTitle,whfsWholeWord],' aquery.scoring=',aQuery.Scoring.Phrases[whfcPageTitle,whfsWholeWord]]);
1669       exit;
1670     end else
1671       FQuery.Assign(aQuery);
1672     if LoadingContent then exit;
1673   finally
1674     LeaveCritSect;
1675   end;
1676   //debugln(['TWikiHelp.Search searching']);
1677   DoSearch;
1678 end;
1679 
1680 procedure TWikiHelp.SavePageToStream(DocumentName: string; aStream: TStream);
1681 var
1682   Page: TW2HelpPage;
1683 begin
1684   Page:=TW2HelpPage(Converter.GetPageWithDocumentName(DocumentName));
1685   if Page=nil then
1686     raise Exception.Create('document "'+DocumentName+'" not found in wiki');
1687   Converter.SavePageAsHTMLToStream(Page,aStream);
1688 end;
1689 
1690 end.
1691 
1692