1 { Search engine for wiki pages
2
3 Copyright (C) 2012 Mattias Gaertner mattias@freepascal.org
4
5 This source is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 2 of the License, or (at your option)
8 any later version.
9
10 This code is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
13 details.
14
15 A copy of the GNU General Public License is available on the World Wide Web
16 at <http://www.gnu.org/copyleft/gpl.html>. You can also obtain it by writing
17 to the Free Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
18 Boston, MA 02110-1335, USA.
19
20 }
21 unit WikiHelpManager;
22
23 {$mode objfpc}{$H+}
24
25 { $DEFINE VerboseWikiHelp}
26
27 interface
28
29 uses
30 Classes, SysUtils, math,
31 // LazUtils
32 LazFileUtils, LazLogger, LazDbgLog, LazUTF8, LazStringUtils, laz2_DOM, AvgLvlTree,
33 // CodeTools
34 BasicCodeTools, KeywordFuncLists,
35 //
36 MTProcs,
37 // Wiki
38 Wiki2HTMLConvert, Wiki2XHTMLConvert, WikiFormat, WikiParser;
39
40 type
41 TWikiHelp = class;
42
43 TWHFitsCategory = (
44 whfcNone,
45 whfcLink,
46 whfcText,
47 whfcHeader,
48 whfcPageTitle
49 );
50 TWHFitsCategories = set of TWHFitsCategory;
51
52 TWHFitsStringFlag = (
53 whfsPart,
54 whfsWholeWord
55 );
56 TWHFitsStringFlags = set of TWHFitsStringFlag;
57
58 TWHPhrasePageFit = record
59 Category: TWHFitsCategory;
60 Quality: TWHFitsStringFlag;
61 end;
62 PWHPhrasePageFit = ^TWHPhrasePageFit;
63
64 TWHScore = single;
65
66 { TWHScoring }
67
68 TWHScoring = class(TPersistent)
69 public
70 Phrases: array[TWHFitsCategory,TWHFitsStringFlag] of TWHScore;
Equalsnull71 function Equals(Obj: TObject): boolean; override;
72 procedure Assign(Source: TPersistent); override;
73 end;
74
75 { TWikiHelpQuery }
76
77 TWikiHelpQuery = class(TPersistent)
78 public
79 Phrases: TStrings;
80 LoPhrases: TStrings; // Phrases lowercase
81 Languages: string; // comma separated list, '-' means not the original, 'de' = german, '*' = all
82 Scoring: TWHScoring;
83 FreeScoring: boolean;
84 constructor Create(const SearchText: string; const aLang: string = '';
85 aScoring: TWHScoring = nil; aFreeScoring: boolean = false);
86 constructor Clone(Query: TWikiHelpQuery);
87 destructor Destroy; override;
Equalsnull88 function Equals(Obj: TObject): boolean; override;
89 procedure Assign(Source: TPersistent); override;
90 end;
91
92 TWHTextNodeType = (
93 whnTxt,
94 whnHeader,
95 whnLink
96 );
97
98 { TWHTextNode }
99
100 TWHTextNode = class
101 private
102 FChildNodes: TFPList; // list of TW2HelpTextNode
103 FIndexInParent: integer;
104 FParent: TWHTextNode;
GetChildNodesnull105 function GetChildNodes(Index: integer): TWHTextNode;
106 procedure RemoveChild(Child: TWHTextNode);
107 public
108 Typ: TWHTextNodeType;
109 Txt: string;
110 constructor Create(aTyp: TWHTextNodeType; aParent: TWHTextNode);
111 destructor Destroy; override;
112 procedure Clear;
113 procedure Add(Node: TWHTextNode);
Countnull114 function Count: integer;
115 property ChildNodes[Index: integer]: TWHTextNode read GetChildNodes; default;
116 property IndexInParent: integer read FIndexInParent;
117 property Parent: TWHTextNode read FParent;
FirstChildnull118 function FirstChild: TWHTextNode;
LastChildnull119 function LastChild: TWHTextNode;
NextSiblingnull120 function NextSibling: TWHTextNode;
PreviousSiblingnull121 function PreviousSibling: TWHTextNode;
Nextnull122 function Next: TWHTextNode; // first child, then next sibling, then next sibling of parent, ...
NextSkipChildrennull123 function NextSkipChildren: TWHTextNode; // first next sibling, then next sibling of parent, ...
Previousnull124 function Previous: TWHTextNode; // the reverse of Next
LastLeafnull125 function LastLeaf: TWHTextNode; // get last child of last child of ...
Levelnull126 function Level: integer; // root node has 0
127
CalcMemSizenull128 function CalcMemSize: SizeInt;
129 end;
130
131 { TW2HelpPage }
132
133 TW2HelpPage = class(TW2HTMLPage)
134 public
135 WHRoot: TWHTextNode;
136 CurWHNode: TWHTextNode;
137 Score: single;
138 destructor Destroy; override;
GetScorenull139 function GetScore(Query: TWikiHelpQuery): TWHScore;
140 procedure GetFit(Query: TWikiHelpQuery; Fit: PWHPhrasePageFit);
GetNodeHighestScorenull141 function GetNodeHighestScore(Query: TWikiHelpQuery): TWHTextNode;
142 end;
143
144 { TWiki2HelpConverter }
145
146 TWiki2HelpConverter = class(TWiki2HTMLConverter)
147 private
148 FCurQuery: TWikiHelpQuery;
149 FHelp: TWikiHelp;
150 protected
151 PagesPerThread: integer;
152 AvailableImages: TFilenameToStringTree; // existing files in the ImagesDirectory
153 procedure SavePage({%H-}Page: TW2XHTMLPage); override;
FindImagenull154 function FindImage(const ImgFilename: string): string; override;
155 procedure ExtractPageText(Page: TW2HelpPage);
156 procedure ExtractTextToken(Token: TWPToken);
157 procedure ParallelExtractPageText(Index: PtrInt; {%H-}Data: Pointer; {%H-}Item: TMultiThreadProcItem);
158 procedure ParallelLoadPage(Index: PtrInt; {%H-}Data: Pointer; {%H-}Item: TMultiThreadProcItem);
159 procedure ParallelComputeScores(Index: PtrInt; {%H-}Data: Pointer; {%H-}Item: TMultiThreadProcItem);
160 public
161 constructor Create; override;
162 procedure Clear; override;
163 destructor Destroy; override;
164 procedure LoadPages;
165 procedure ConvertInit; override;
166 procedure ExtractAllTexts;
167 procedure Search(Query: TWikiHelpQuery; var FoundPages: TFPList);
168 procedure SavePageAsHTMLToStream(Page: TW2HelpPage; aStream: TStream);
PageToFilenamenull169 function PageToFilename(Page: string; IsInternalLink, {%H-}Full: boolean
170 ): string; override;
PageToFilenamenull171 function PageToFilename(Page: TW2XHTMLPage; {%H-}Full: boolean): string; override;
172 property Help: TWikiHelp read FHelp;
173 end;
174
175 { TWikiHelpThread }
176
177 TWikiHelpThread = class(TThread)
178 protected
179 fLogMsg: string;
180 fCompleted: boolean;
181 procedure Execute; override;
182 procedure MainThreadLog;
183 procedure Log({%H-}Msg: string);
184 procedure ConverterLog({%H-}Msg: string);
185 procedure Scanned; // called in thread at end
186 public
187 Help: TWikiHelp;
188 end;
189
190 TWikiHelpProgressStep = (
191 whpsNone,
192 whpsWikiScanDir,
193 whpsWikiLoadPages,
194 whpsWikiExtractPageTexts,
195 whpsWikiLoadComplete,
196 whpsWikiSearch,
197 whpsWikiSearchComplete
198 );
199
200 { TWikiHelp }
201
202 TWikiHelp = class(TComponent)
203 private
204 FAborting: boolean;
205 FConverter: TWiki2HelpConverter;
206 FMaxResults: integer;
207 FOnScanned: TNotifyEvent;
208 FOnSearched: TNotifyEvent;
209 FQuery: TWikiHelpQuery;
210 FDefaultScoring: TWHScoring;
211 FResultsCSS: string;
212 FResultsCSSURL: string;
213 FResultsHTML: string;
214 FXMLDirectory: string;
215 FCritSec: TRTLCriticalSection;
216 FScanThread: TWikiHelpThread;
217 fProgressStep: TWikiHelpProgressStep;
218 fProgressCount: integer;
219 fProgressMax: integer;
220 fWikiLoadTimeMSec: integer;
221 fWikiSearchTimeMSec: integer;
GetImagesDirectorynull222 function GetImagesDirectory: string;
223 procedure SetImagesDirectory(AValue: string);
224 procedure SetMaxResults(AValue: integer);
225 procedure SetQuery(AValue: TWikiHelpQuery);
226 procedure SetXMLDirectory(AValue: string);
227 procedure EnterCritSect;
228 procedure LeaveCritSect;
229 procedure Scanned;
230 procedure DoSearch;
FoundNodeToHTMLSnippetnull231 function FoundNodeToHTMLSnippet(aPage: TW2HelpPage; aNode: TWHTextNode;
232 aQuery: TWikiHelpQuery): string;
233 public
234 constructor Create(AOwner: TComponent); override;
235 destructor Destroy; override;
GetProgressCaptionnull236 function GetProgressCaption: string;
Busynull237 function Busy: boolean;
238
239 property ResultsCSS: string read FResultsCSS write FResultsCSS;
240 property ResultsCSSURL: string read FResultsCSSURL write FResultsCSSURL;
241
242 // load wiki files
243 procedure StartLoading; // returns immediately
LoadingContentnull244 function LoadingContent: boolean;
245 procedure AbortLoading(Wait: boolean);
246 property Aborting: boolean read FAborting;
LoadCompletenull247 function LoadComplete: boolean;
248
249 // languages
CollectAllLanguagesnull250 function CollectAllLanguages(AsCaption: boolean): TStrings;
251
252 // search
253 procedure Search(const Term: string; const Languages: string = '';
254 Scoring: TWHScoring = nil; FreeScoring: boolean = false);
255 procedure Search(aQuery: TWikiHelpQuery);
256 property Query: TWikiHelpQuery read FQuery;
257 property DefaultScoring: TWHScoring read FDefaultScoring;
258 property MaxResults: integer read FMaxResults write SetMaxResults;
259 property ResultsHTML: string read FResultsHTML;
260
261 // get page
262 procedure SavePageToStream(DocumentName: string; aStream: TStream);
263 public
264 property XMLDirectory: string read FXMLDirectory write SetXMLDirectory; // directory where the wiki xml files are
265 property ImagesDirectory: string read GetImagesDirectory write SetImagesDirectory; // directory where the wiki image files are
266 property Converter: TWiki2HelpConverter read FConverter;
267 property OnScanned: TNotifyEvent read FOnScanned write FOnScanned;
268 property OnSearched: TNotifyEvent read FOnSearched write FOnSearched;
269 end;
270
271 var
272 WikiHelp: TWikiHelp = nil;
273
SearchTextToPhrasesnull274 function SearchTextToPhrases(Txt: string): TStringList;
CompareW2HPageForScorenull275 function CompareW2HPageForScore(Page1, Page2: Pointer): integer;
TextToHTMLSnippednull276 function TextToHTMLSnipped(Txt: string; LoCaseStringsToHighlight: TStrings;
277 MaxUTF8Length: integer): string;
278
dbgsnull279 function dbgs(t: TWHTextNodeType): string; overload;
280
281 procedure Test_TextToHTMLSnipped;
282
283 implementation
284
TextToHTMLSnippednull285 function TextToHTMLSnipped(Txt: string; LoCaseStringsToHighlight: TStrings;
286 MaxUTF8Length: integer): string;
287 var
288 i: Integer;
289 LoTxt: String;
290 Bold: PByte; // for each UTF-8 character: the number of matching phrases
291 Phrase: String;
292 PhraseStartChar: Char;
293 LoTxtP: PChar;
294 CurLoTxtP: PChar;
295 CurPhraseP: PChar;
296 BoldP: PByte;
297 l: Integer;
298 BestPhraseCount: Integer;
299 CurPhraseCount: Integer;
300 BestPos: Integer;
301 IsBold: Boolean;
302 StartChomped: Boolean;
303 EndChomped: Boolean;
304 begin
305 if MaxUTF8Length<=0 then exit('');
306 Result:=UTF8Trim(Txt);
307 {$IFDEF VerboseTextToHTMLSnipped}
308 debugln(['TextToHTMLSnipped trimmed Result="',Result,'"']);
309 debugln(['TextToHTMLSnipped LoCaseStringsToHighlight="',Trim(LoCaseStringsToHighlight.Text),'"']);
310 {$ENDIF}
311 // convert white space to single space
312 i:=1;
313 while i<=length(Result) do begin
314 if Result[i] in [#0..#31] then
315 Result[i]:=' ';
316 if (Result[i]=' ') and ((i=1) or (Result[i-1]=' ')) then
317 Delete(Result,i,1)
318 else
319 inc(i);
320 end;
321 if Result='' then exit;
322 LoTxt:=UTF8LowerCase(Result);
323 {$IFDEF VerboseTextToHTMLSnipped}
324 debugln(['TextToHTMLSnipped locase Result="',LoTxt,'"']);
325 {$ENDIF}
326 GetMem(Bold,Max(length(LoTxt),length(Result))+7);
327 try
328 // mark phrases
329 FillByte(Bold^,length(LoTxt)+7,0);
330 for i:=0 to Min(LoCaseStringsToHighlight.Count-1,255) do begin
331 Phrase:=LoCaseStringsToHighlight[i];
332 if Phrase='' then continue;
333 BoldP:=Bold;
334 PhraseStartChar:=Phrase[1];
335 LoTxtP:=PChar(LoTxt);
336 while LoTxtP^<>#0 do begin
337 //debugln(['TextToHTMLSnipped PhraseStartChar=',PhraseStartChar,' ',dbgstr(LoTxtP^)]);
338 if LoTxtP^=PhraseStartChar then begin
339 CurLoTxtP:=LoTxtP+1;
340 CurPhraseP:=PChar(Phrase)+1;
341 while (CurLoTxtP^=CurPhraseP^) and (CurLoTxtP^<>#0) do begin
342 inc(CurLoTxtP);
343 inc(CurPhraseP);
344 end;
345 if CurPhraseP^=#0 then begin
346 // phrase found => mark phrase in Bold array
347 //debugln(['TextToHTMLSnipped phrase "',Phrase,'" found at ',LoTxtP-PChar(LoTxt)]);
348 CurPhraseP:=PChar(Phrase);
349 while (CurPhraseP^<>#0) do begin
350 l:=UTF8CodepointSize(CurPhraseP);
351 inc(LoTxtP,l);
352 inc(CurPhraseP,l);
353 BoldP^+=1;
354 inc(BoldP);
355 end;
356 continue;
357 end;
358 end;
359 inc(LoTxtP,UTF8CodepointSize(LoTxtP));
360 inc(BoldP);
361 end;
362 end;
363
364 {$IFDEF VerboseTextToHTMLSnipped}
365 dbgout(' Bold: ');
366 LoTxtP:=PChar(LoTxt);
367 BoldP:=Bold;
368 while LoTxtP^<>#0 do begin
369 dbgout([' ',dbgstr(LoTxtP^),':',BoldP^]);
370 inc(LoTxtP,UTF8CodepointSize(LoTxtP));
371 inc(BoldP);
372 end;
373 debugln;
374 debugln('Result="',Result,'"');
375 dbgout ('Bold = ');
376 l:=UTF8Length(Result);
377 for i:=0 to l-1 do
378 dbgout(dbgs(Bold[i]));
379 debugln;
380 {$ENDIF}
381
382 l:=UTF8Length(Result);
383 StartChomped:=false;
384 EndChomped:=false;
385 if (l>MaxUTF8Length) then begin
386 // text too long
387 // => find substring with most phrases
388 CurPhraseCount:=0;
389 for i:=0 to MaxUTF8Length-1 do
390 inc(CurPhraseCount,Bold[i]);
391 BestPhraseCount:=CurPhraseCount;
392 BestPos:=0;
393 for i:=0 to l-MaxUTF8Length-1 do begin
394 CurPhraseCount+=Bold[i+MaxUTF8Length]-Bold[i];
395 if CurPhraseCount>=BestPhraseCount then begin
396 BestPhraseCount:=CurPhraseCount;
397 BestPos:=i+1;
398 end;
399 end;
400 if BestPos>0 then begin
401 // BestPos is the latest substring containing the maximum
402 // => move BestPos to center the maximum
403 // => balance left and right of not marked characters
404 i:=BestPos;
405 while (i>0) and (Bold[i-1]=0) and (Bold[i+MaxUTF8Length-1]=0) do
406 dec(i);
407 if i<BestPos then inc(i);
408 BestPos:=(i+BestPos) div 2;
409 end;
410
411 // cut down Result and Bold
412 Result:=UTF8Copy(Result,BestPos+1,MaxUTF8Length);
413 if BestPos>0 then begin
414 StartChomped:=true;
415 System.Move(Bold[BestPos],Bold[0],MaxUTF8Length);
416 end;
417 if BestPos+MaxUTF8Length<l then
418 EndChomped:=true;
419 end;
420
421 {$IFDEF VerboseTextToHTMLSnipped}
422 debugln(['TextToHTMLSnipped chomped Result="',Result,'"']);
423 {$ENDIF}
424
425 // add bold tags
426 i:=1;
427 BoldP:=Bold;
428 IsBold:=false;
429 while i<=length(Result) do begin
430 if (BoldP^>0) and (not IsBold) then begin
431 // insert bold start tag
432 Insert('<b>',Result,i);
433 inc(i,length('<b>'));
434 IsBold:=true;
435 end else if (BoldP^=0) and IsBold then begin
436 // insert bold end tag
437 Insert('</b>',Result,i);
438 inc(i,length('</b>'));
439 IsBold:=false;
440 end;
441 if Result[i]='<' then begin
442 // replace <
443 ReplaceSubstring(Result,i,1,'<');
444 inc(i,length('<'));
445 end else if Result[i]='>' then begin
446 // replace >
447 ReplaceSubstring(Result,i,1,'>');
448 inc(i,length('>'));
449 end else
450 inc(i,UTF8CodepointSize(@Result[i]));
451 inc(BoldP);
452 end;
453 if IsBold then
454 Result+='</b>';
455 // prepend and append '...'
456 Result:=UTF8Trim(Result);
457 if StartChomped then
458 Result:='...'+Result;
459 if EndChomped then
460 Result+='...';
461
462 {$IFDEF VerboseTextToHTMLSnipped}
463 debugln(['TextToHTMLSnipped END Result="',Result,'"']);
464 {$ENDIF}
465 finally
466 FreeMem(Bold);
467 end;
468 end;
469
dbgsnull470 function dbgs(t: TWHTextNodeType): string;
471 begin
472 Result:='';
473 writestr(Result,t);
474 end;
475
476 procedure Test_TextToHTMLSnipped;
477
478 procedure t(Txt, LoCaseHighlights: string; MaxUTF8Length: integer; Expected: string);
479 var
480 LoCaseStringsToHighlight: TStringList;
481 s: String;
482 begin
483 LoCaseStringsToHighlight:=TStringList.Create;
484 LoCaseStringsToHighlight.Delimiter:=',';
485 LoCaseStringsToHighlight.StrictDelimiter:=true;
486 LoCaseStringsToHighlight.DelimitedText:=LoCaseHighlights;
487 s:=TextToHTMLSnipped(Txt,LoCaseStringsToHighlight,MaxUTF8Length);
488 if Expected<>s then begin
489 debugln(['Test_TextToHTMLSnipped Txt="'+Txt+'"']);
490 debugln(['Test_TextToHTMLSnipped LoCaseHighlights="'+LoCaseHighlights+'"']);
491 debugln(['Test_TextToHTMLSnipped MaxUTF8Length='+dbgs(MaxUTF8Length)]);
492 debugln(['Test_TextToHTMLSnipped Expected="'+Expected+'"']);
493 debugln(['Test_TextToHTMLSnipped Result ="'+s+'"']);
494 raise Exception.Create('Test_TextToHTMLSnipped: Txt="'+Txt+'" LoCaseHighlights="'+LoCaseHighlights+'" Max='+dbgs(MaxUTF8Length)+' Expected="'+Expected+'" Result="'+s+'"');
495 end;
496 LoCaseStringsToHighlight.Free;
497 end;
498
499 begin
500 t('','',0,'');
501 t('bla','bla',100,'<b>bla</b>');
502 t('bla foo bar','bla,bar',100,'<b>bla</b> foo <b>bar</b>');
503 t('bla foo bar','bla foo,bla,foo',100,'<b>bla foo</b> bar');
504 t('bla foo bar','foo',100,'bla <b>foo</b> bar');
505 t('bla foo bar','foo',7,'...a <b>foo</b> b...');
506 t('bl< foo >ar','foo',7,'...< <b>foo</b> >...');
507 end;
508
SearchTextToPhrasesnull509 function SearchTextToPhrases(Txt: string): TStringList;
510 var
511 p: PChar;
512 StartPos: PChar;
513 Phrase: String;
514 begin
515 Result:=TStringList.Create;
516 Txt:=UTF8Trim(Txt);
517 if Txt='' then exit;
518 Result.Add(Txt);
519 p:=PChar(Txt);
520 Phrase:='';
521 while p^<>#0 do begin
522 if p^='"' then begin
523 // quote
524 inc(p);
525 StartPos:=p;
526 while not (p^ in [#0,'"']) do inc(p);
527 Phrase:=Phrase+SubString(StartPos,p-StartPos);
528 if p^<>#0 then inc(p);
529 end else if p^ in [' ',#9,#10,#13] then begin
530 // space => end phrase
531 inc(p);
532 if Phrase<>'' then begin
533 if Result.IndexOf(Phrase)<0 then
534 Result.Add(Phrase);
535 Phrase:='';
536 end;
537 end else begin
538 // word
539 StartPos:=p;
540 while not (p^ in [#0,'"',' ',#9,#10,#13]) do inc(p);
541 Phrase:=Phrase+SubString(StartPos,p-StartPos);
542 end;
543 end;
544 if (Phrase<>'') and (Result.IndexOf(Phrase)<0) then
545 Result.Add(Phrase);
546 end;
547
CompareW2HPageForScorenull548 function CompareW2HPageForScore(Page1, Page2: Pointer): integer;
549 var
550 p1: TW2HelpPage absolute Page1;
551 p2: TW2HelpPage absolute Page2;
552 begin
553 if p1.Score>p2.Score then
554 exit(-1)
555 else if p1.Score<p2.Score then
556 exit(1)
557 else
558 exit(0);
559 end;
560
561 { TWHScoring }
562
TWHScoring.Equalsnull563 function TWHScoring.Equals(Obj: TObject): boolean;
564 var
565 c: TWHFitsCategory;
566 f: TWHFitsStringFlag;
567 Src: TWHScoring;
568 begin
569 if Self=Obj then exit(true);
570 Result:=false;
571 if Obj=nil then exit;
572 if Obj is TWHScoring then begin
573 Src:=TWHScoring(Obj);
574 for c:=Low(TWHFitsCategory) to high(TWHFitsCategory) do
575 for f:=Low(TWHFitsStringFlag) to high(TWHFitsStringFlag) do
576 if Phrases[c,f]<>Src.Phrases[c,f] then exit;
577 end;
578 Result:=true;
579 end;
580
581 procedure TWHScoring.Assign(Source: TPersistent);
582 var
583 Src: TWHScoring;
584 begin
585 if Source is TWHScoring then begin
586 Src:=TWHScoring(Source);
587 Move(Src.Phrases,Phrases,SizeOf(Phrases));
588 end else
589 inherited Assign(Source);
590 end;
591
592 { TWikiHelpQuery }
593
594 constructor TWikiHelpQuery.Create(const SearchText: string;
595 const aLang: string; aScoring: TWHScoring; aFreeScoring: boolean);
596 var
597 i: Integer;
598 begin
599 Phrases:=SearchTextToPhrases(SearchText);
600 LoPhrases:=TStringList.Create;
601 for i:=0 to Phrases.Count-1 do
602 LoPhrases.Add(UTF8LowerCase(Phrases[i]));
603 Languages:=aLang;
604 Scoring:=aScoring;
605 FreeScoring:=aFreeScoring;
606 end;
607
608 constructor TWikiHelpQuery.Clone(Query: TWikiHelpQuery);
609 begin
610 Phrases:=TStringList.Create;
611 LoPhrases:=TStringList.Create;
612 Scoring:=TWHScoring.Create;
613 FreeScoring:=true;
614 Assign(Query);
615 end;
616
617 destructor TWikiHelpQuery.Destroy;
618 begin
619 FreeAndNil(Phrases);
620 FreeAndNil(LoPhrases);
621 if FreeScoring then
622 FreeAndNil(Scoring)
623 else
624 Scoring:=nil;
625 inherited Destroy;
626 end;
627
TWikiHelpQuery.Equalsnull628 function TWikiHelpQuery.Equals(Obj: TObject): boolean;
629 var
630 Src: TWikiHelpQuery;
631 begin
632 if Obj=Self then exit(true);
633 Result:=false;
634 if Obj=nil then exit;
635 if not (Obj is TWikiHelpQuery) then exit;
636 Src:=TWikiHelpQuery(Obj);
637 if not Phrases.Equals(Src.Phrases) then exit;
638 // LoPhrases is computed from Phrases
639 if Languages<>Src.Languages then exit;
640 if not Scoring.Equals(Src.Scoring) then exit;
641 Result:=true;
642 end;
643
644 procedure TWikiHelpQuery.Assign(Source: TPersistent);
645 var
646 Src: TWikiHelpQuery;
647 begin
648 if Source is TWikiHelpQuery then begin
649 Src:=TWikiHelpQuery(Source);
650 Phrases.Assign(Src.Phrases);
651 LoPhrases.Assign(Src.LoPhrases);
652 Languages:=Src.Languages;
653 Scoring.Assign(Src.Scoring);
654 end else
655 inherited Assign(Source);
656 end;
657
658 { TW2HelpPage }
659
660 destructor TW2HelpPage.Destroy;
661 begin
662 FreeAndNil(WHRoot);
663 inherited Destroy;
664 end;
665
GetScorenull666 function TW2HelpPage.GetScore(Query: TWikiHelpQuery): TWHScore;
667 var
668 PhrasesFit: PWHPhrasePageFit;
669 Size: Integer;
670 i: Integer;
671 Fit: PWHPhrasePageFit;
672 begin
673 Result:=0;
674 if (Query=nil) or (Query.LoPhrases.Count=0) then exit;
675 if not WikiPageHasLanguage(WikiDocumentName,Query.Languages) then begin
676 //debugln(['TW2HelpPage.GetScore lang does not fit ',WikiDocumentName,' "',GetWikiPageLanguage(WikiDocumentName),'" ',Query.Languages]);
677 exit;
678 end;
679
680 Size:=Query.LoPhrases.Count*SizeOf(TWHPhrasePageFit);
681 GetMem(PhrasesFit,Size);
682 try
683 FillByte(PhrasesFit^,Size,0);
684 GetFit(Query,PhrasesFit);
685 for i:=0 to Query.LoPhrases.Count-1 do begin
686 Fit:=@PhrasesFit[i];
687 Result+=Query.Scoring.Phrases[Fit^.Category,Fit^.Quality];
688 end;
689 finally
690 FreeMem(PhrasesFit);
691 end;
692 end;
693
694 procedure TW2HelpPage.GetFit(Query: TWikiHelpQuery; Fit: PWHPhrasePageFit);
695
696 procedure CheckTxt(s: string; Category: TWHFitsCategory);
697 var
698 i: Integer;
699 Phrase: String;
700 FitsWholeWord: boolean;
701 FitsCount: SizeInt;
702 Quality: TWHFitsStringFlag;
703 begin
704 s:=UTF8LowerCase(s);
705 for i:=0 to Query.LoPhrases.Count-1 do begin
706 if Fit[i].Category>Category then continue;
707 if (Fit[i].Category=Category) and (Fit[i].Quality>=whfsWholeWord) then
708 continue;
709 Phrase:=Query.LoPhrases[i];
710 HasTxtWord(PChar(Phrase),PChar(s),FitsWholeWord,FitsCount);
711 if FitsCount<=0 then continue;
712 if FitsWholeWord then
713 Quality:=whfsWholeWord
714 else
715 Quality:=whfsPart;
716 Fit[i].Category:=Category;
717 Fit[i].Quality:=Quality;
718 end;
719 end;
720
721 procedure Traverse(Node: TWHTextNode);
722 var
723 i: Integer;
724 Category: TWHFitsCategory;
725 begin
726 if Node=nil then exit;
727 case Node.Typ of
728 whnTxt: Category:=whfcText;
729 whnHeader: Category:=whfcHeader;
730 whnLink: Category:=whfcLink;
731 else exit;
732 end;
733 CheckTxt(Node.Txt,Category);
734 for i:=0 to Node.Count-1 do
735 Traverse(Node[i]);
736 end;
737
738 begin
739 CheckTxt(WikiPage.Title,whfcPageTitle);
740 Traverse(WHRoot);
741 end;
742
GetNodeHighestScorenull743 function TW2HelpPage.GetNodeHighestScore(Query: TWikiHelpQuery): TWHTextNode;
744
GetNodeScorenull745 function GetNodeScore(Node: TWHTextNode): TWHScore;
746 var
747 s: String;
748 i: Integer;
749 Phrase: String;
750 FitsWholeWord: boolean;
751 FitsCount: SizeInt;
752 Quality: TWHFitsStringFlag;
753 Category: TWHFitsCategory;
754 begin
755 Result:=0;
756 case Node.Typ of
757 whnTxt: Category:=whfcText;
758 whnHeader: Category:=whfcHeader;
759 whnLink: Category:=whfcLink;
760 else exit;
761 end;
762 s:=UTF8LowerCase(Node.Txt);
763 for i:=0 to Query.LoPhrases.Count-1 do begin
764 Phrase:=Query.LoPhrases[i];
765 HasTxtWord(PChar(Phrase),PChar(s),FitsWholeWord,FitsCount);
766 if FitsCount<=0 then continue;
767 if FitsWholeWord then
768 Quality:=whfsWholeWord
769 else
770 Quality:=whfsPart;
771 Result+=Query.Scoring.Phrases[Category,Quality];
772 end;
773 end;
774
775 procedure Traverse(Node: TWHTextNode;
776 var BestNode: TWHTextNode; var BestScore: TWHScore);
777 var
778 i: Integer;
779 NodeScore: TWHScore;
780 begin
781 if Node=nil then exit;
782 NodeScore:=GetNodeScore(Node);
783 if NodeScore>BestScore then begin
784 BestNode:=Node;
785 BestScore:=NodeScore;
786 end;
787 for i:=0 to Node.Count-1 do
788 Traverse(Node[i],BestNode,BestScore);
789 end;
790
791 var
792 NodeScore: TWHScore;
793 begin
794 Result:=nil;
795 NodeScore:=0;
796 Traverse(WHRoot,Result,NodeScore);
797 end;
798
799 { TWHTextNode }
800
TWHTextNode.GetChildNodesnull801 function TWHTextNode.GetChildNodes(Index: integer): TWHTextNode;
802 begin
803 Result:=TWHTextNode(FChildNodes[Index]);
804 end;
805
806 procedure TWHTextNode.RemoveChild(Child: TWHTextNode);
807 var
808 i: Integer;
809 begin
810 FChildNodes.Delete(Child.IndexInParent);
811 for i:=Child.IndexInParent to FChildNodes.Count-1 do
812 ChildNodes[i].fIndexInParent:=i;
813 end;
814
815 constructor TWHTextNode.Create(aTyp: TWHTextNodeType; aParent: TWHTextNode);
816 begin
817 Typ:=aTyp;
818 if aParent<>nil then
819 aParent.Add(Self)
820 else
821 fIndexInParent:=-1;
822 end;
823
824 destructor TWHTextNode.Destroy;
825 begin
826 Clear;
827 if Parent<>nil then
828 Parent.RemoveChild(Self);
829 FreeAndNil(FChildNodes);
830 inherited Destroy;
831 end;
832
833 procedure TWHTextNode.Clear;
834 var
835 i: Integer;
836 Child: TWHTextNode;
837 begin
838 Txt:='';
839 if FChildNodes<>nil then begin
840 for i:=FChildNodes.Count-1 downto 0 do begin
841 Child:=TWHTextNode(FChildNodes[i]);
842 Child.fParent:=nil;
843 Child.Free;
844 end;
845 FChildNodes.Clear;
846 end;
847 end;
848
849 procedure TWHTextNode.Add(Node: TWHTextNode);
850 begin
851 if Node.Parent=Self then exit;
852 if Node.Parent<>nil then
853 Node.Parent.RemoveChild(Node);
854 if FChildNodes=nil then
855 FChildNodes:=TFPList.Create;
856 Node.fIndexInParent:=Count;
857 FChildNodes.Add(Node);
858 Node.fParent:=Self;
859 end;
860
Countnull861 function TWHTextNode.Count: integer;
862 begin
863 if FChildNodes<>nil then
864 Result:=FChildNodes.Count
865 else
866 Result:=0;
867 end;
868
FirstChildnull869 function TWHTextNode.FirstChild: TWHTextNode;
870 begin
871 if Count>0 then
872 Result:=ChildNodes[0]
873 else
874 Result:=nil;
875 end;
876
LastChildnull877 function TWHTextNode.LastChild: TWHTextNode;
878 var
879 c: Integer;
880 begin
881 c:=Count;
882 if c>0 then
883 Result:=ChildNodes[c-1]
884 else
885 Result:=nil;
886 end;
887
NextSiblingnull888 function TWHTextNode.NextSibling: TWHTextNode;
889 begin
890 if (Parent=nil) or (IndexInParent+2>=Parent.Count) then exit(nil);
891 Result:=Parent[IndexInParent+1];
892 end;
893
TWHTextNode.PreviousSiblingnull894 function TWHTextNode.PreviousSibling: TWHTextNode;
895 begin
896 if (Parent=nil) or (IndexInParent=0) then exit(nil);
897 Result:=Parent[IndexInParent-1];
898 end;
899
Nextnull900 function TWHTextNode.Next: TWHTextNode;
901 begin
902 Result:=FirstChild;
903 if Result=nil then
904 Result:=NextSkipChildren;
905 end;
906
NextSkipChildrennull907 function TWHTextNode.NextSkipChildren: TWHTextNode;
908 var
909 Node: TWHTextNode;
910 begin
911 Result:=Self;
912 repeat
913 Node:=Result.NextSibling;
914 if Node<>nil then exit(Node);
915 Result:=Result.Parent;
916 until Result=nil;
917 Result:=nil;
918 end;
919
TWHTextNode.Previousnull920 function TWHTextNode.Previous: TWHTextNode;
921 var
922 Node: TWHTextNode;
923 begin
924 Result:=PreviousSibling;
925 if Result=nil then
926 exit(Parent);
927 Node:=Result.LastLeaf;
928 if Node<>nil then
929 Result:=Node;
930 end;
931
LastLeafnull932 function TWHTextNode.LastLeaf: TWHTextNode;
933 var
934 Node: TWHTextNode;
935 begin
936 Result:=LastChild;
937 if Result=nil then exit;
938 repeat
939 Node:=Result.LastChild;
940 if Node=nil then exit;
941 Result:=Node;
942 until false;
943 end;
944
TWHTextNode.Levelnull945 function TWHTextNode.Level: integer;
946 var
947 Node: TWHTextNode;
948 begin
949 Result:=0;
950 Node:=Parent;
951 while Node<>nil do begin
952 inc(Result);
953 Node:=Node.Parent;
954 end;
955 end;
956
CalcMemSizenull957 function TWHTextNode.CalcMemSize: SizeInt;
958 var
959 i: Integer;
960 begin
961 Result:=InstanceSize+SizeInt(MemSizeString(Txt));
962 if FChildNodes<>nil then begin
963 inc(Result,FChildNodes.InstanceSize+FChildNodes.Count*SizeOf(Pointer));
964 for i:=0 to Count-1 do
965 inc(Result,ChildNodes[i].CalcMemSize);
966 end;
967 end;
968
969 { TWiki2HelpConverter }
970
971 procedure TWiki2HelpConverter.SavePage(Page: TW2XHTMLPage);
972 begin
973 // do not save
974 end;
975
TWiki2HelpConverter.FindImagenull976 function TWiki2HelpConverter.FindImage(const ImgFilename: string): string;
977 begin
978 //Log('AvailableImages='+dbgs(AvailableImages.Tree.Count)+' Img="'+ImgFilename+'"');
979 if AvailableImages.Contains(ImgFilename) then
980 Result:=ImgFilename
981 else
982 Result:='';
983 end;
984
985 procedure TWiki2HelpConverter.ExtractTextToken(Token: TWPToken);
986 var
987 Page: TW2HelpPage;
988 W: TWikiPage;
989 Txt: String;
990 CurNode: TWHTextNode;
991 StartP, EndP: PChar;
992 NodeType: TWHTextNodeType;
993 TextToken: TWPTextToken;
994 LinkToken: TWPLinkToken;
995 Caption: String;
996 begin
997 Page:=TW2HelpPage(Token.UserData);
998 W:=Page.WikiPage;
999 CurNode:=Page.CurWHNode;
1000 if CurNode=nil then CurNode:=Page.WHRoot;
1001 case Token.Token of
1002 wptText:
1003 if Token is TWPTextToken then begin
1004 TextToken:=TWPTextToken(Token);
1005 StartP:=PChar(W.Src)+TextToken.StartPos-1;
1006 EndP:=PChar(W.Src)+TextToken.EndPos-1;
1007 while (StartP<EndP) and (StartP^ in [#1..#31,' ']) do inc(StartP);
1008 if StartP<EndP then begin
1009 // not only space
1010 Txt:=copy(W.Src,TextToken.StartPos,TextToken.EndPos-TextToken.StartPos);
1011 CurNode.Txt:=CurNode.Txt+Txt;
1012 exit;
1013 end;
1014 end;
1015
1016 wptSection,wptHeader:
1017 if Token.Range=wprOpen then begin
1018 if Token.Token=wptHeader then
1019 NodeType:=whnHeader
1020 else
1021 NodeType:=whnTxt;
1022 Page.CurWHNode:=TWHTextNode.Create(NodeType,CurNode);
1023 exit;
1024 end else if Token.Range=wprClose then begin
1025 Page.CurWHNode:=CurNode.Parent;
1026 exit;
1027 end;
1028
1029 wptInternLink, wptExternLink:
1030 if Token is TWPLinkToken then begin
1031 LinkToken:=TWPLinkToken(Token);
1032 Caption:=copy(W.Src,LinkToken.CaptionStartPos,
1033 LinkToken.CaptionEndPos-LinkToken.CaptionStartPos);
1034 if Caption<>'' then begin
1035 CurNode:=TWHTextNode.Create(whnLink,CurNode);
1036 CurNode.Txt:=Caption;
1037 // do not exit, append a space to the current node
1038 end;
1039 end;
1040 end;
1041 // add a space to separate words
1042 if (CurNode.Txt='') or (not (CurNode.Txt[length(CurNode.Txt)] in [#1..#31,' ']))
1043 then
1044 CurNode.Txt:=CurNode.Txt+' ';
1045 end;
1046
1047 procedure TWiki2HelpConverter.ParallelExtractPageText(Index: PtrInt;
1048 Data: Pointer; Item: TMultiThreadProcItem);
1049 var
1050 StartIndex, EndIndex: Integer;
1051 i: Integer;
1052 begin
1053 StartIndex:=Index*PagesPerThread;
1054 EndIndex:=Min(StartIndex+PagesPerThread-1,Count-1);
1055 if Help.Aborting then exit;
1056 for i:=StartIndex to EndIndex do
1057 ExtractPageText(TW2HelpPage(Pages[i]));
1058 Help.EnterCritSect;
1059 try
1060 inc(Help.fProgressCount,PagesPerThread);
1061 finally
1062 Help.LeaveCritSect;
1063 end;
1064 end;
1065
1066 procedure TWiki2HelpConverter.ParallelLoadPage(Index: PtrInt; Data: Pointer;
1067 Item: TMultiThreadProcItem);
1068 var
1069 Page: TW2HelpPage;
1070 StartIndex, EndIndex: Integer;
1071 i: Integer;
1072 begin
1073 StartIndex:=Index*PagesPerThread;
1074 EndIndex:=Min(StartIndex+PagesPerThread-1,Count-1);
1075 for i:=StartIndex to EndIndex do begin
1076 if Help.Aborting then exit;
1077 Page:=TW2HelpPage(Pages[i]);
1078 try
1079 Page.ParseWikiDoc(false);
1080 except
1081 on E: Exception do begin
1082 Log('ERROR: '+Page.WikiFilename+': '+E.Message);
1083 end;
1084 end;
1085 end;
1086 Help.EnterCritSect;
1087 try
1088 inc(Help.fProgressCount,PagesPerThread);
1089 finally
1090 Help.LeaveCritSect;
1091 end;
1092 end;
1093
1094 procedure TWiki2HelpConverter.ParallelComputeScores(Index: PtrInt;
1095 Data: Pointer; Item: TMultiThreadProcItem);
1096 var
1097 StartIndex, EndIndex: Integer;
1098 i: Integer;
1099 Page: TW2HelpPage;
1100 begin
1101 StartIndex:=Index*PagesPerThread;
1102 EndIndex:=Min(StartIndex+PagesPerThread-1,Count-1);
1103 if Help.Aborting then exit;
1104 for i:=StartIndex to EndIndex do begin
1105 Page:=TW2HelpPage(Pages[i]);
1106 Page.Score:=Page.GetScore(FCurQuery);
1107 end;
1108 Help.EnterCritSect;
1109 try
1110 inc(Help.fProgressCount,PagesPerThread);
1111 finally
1112 Help.LeaveCritSect;
1113 end;
1114 end;
1115
1116 procedure TWiki2HelpConverter.ExtractPageText(Page: TW2HelpPage);
1117 begin
1118 FreeAndNil(Page.WHRoot);
1119 Page.WHRoot:=TWHTextNode.Create(whnTxt,nil);
1120 try
1121 Page.CurWHNode:=Page.WHRoot;
1122 if Page.WikiPage<>nil then
1123 Page.WikiPage.Parse(@ExtractTextToken,Page);
1124 finally
1125 Page.CurWHNode:=nil;
1126 end;
1127 end;
1128
1129 procedure TWiki2HelpConverter.ConvertInit;
1130 var
1131 FileInfo: TSearchRec;
1132 begin
1133 inherited ConvertInit;
1134
1135 //Log('ImagesDir='+ImagesDir);
1136 AvailableImages.Clear;
1137 if FindFirstUTF8(ImagesDir+AllFilesMask,faAnyFile,FileInfo)=0 then begin
1138 repeat
1139 if (FileInfo.Name='') or (FileInfo.Name='.') or (FileInfo.Name='..') then
1140 continue;
1141 AvailableImages[FileInfo.Name]:='1';
1142 until FindNextUTF8(FileInfo)<>0;
1143 end;
1144 FindCloseUTF8(FileInfo);
1145 Log('Found '+IntToStr(AvailableImages.Tree.Count)+' wiki images in "'+ImagesDir+'"');
1146 end;
1147
1148 procedure TWiki2HelpConverter.ExtractAllTexts;
1149 begin
1150 Help.EnterCritSect;
1151 try
1152 Help.fProgressStep:=whpsWikiExtractPageTexts;
1153 Help.fProgressCount:=0;
1154 Help.fProgressMax:=Count;
1155 finally
1156 Help.LeaveCritSect;
1157 end;
1158 ProcThreadPool.DoParallel(@ParallelExtractPageText,0,(Count-1) div PagesPerThread);
1159 end;
1160
1161 procedure TWiki2HelpConverter.Search(Query: TWikiHelpQuery;
1162 var FoundPages: TFPList);
1163 var
1164 i: Integer;
1165 Page: TW2HelpPage;
1166 begin
1167 Help.EnterCritSect;
1168 try
1169 Help.fProgressStep:=whpsWikiSearch;
1170 Help.fProgressCount:=0;
1171 Help.fProgressMax:=Count;
1172 finally
1173 Help.LeaveCritSect;
1174 end;
1175 FCurQuery:=Query;
1176 if FoundPages=nil then
1177 FoundPages:=TFPList.Create;
1178 ProcThreadPool.DoParallel(@ParallelComputeScores,0,(Count-1) div PagesPerThread);
1179 for i:=0 to Count-1 do begin
1180 Page:=TW2HelpPage(Pages[i]);
1181 if Page.Score<=0 then continue;
1182 FoundPages.Add(Page);
1183 end;
1184 FoundPages.Sort(@CompareW2HPageForScore);
1185 end;
1186
1187 procedure TWiki2HelpConverter.SavePageAsHTMLToStream(Page: TW2HelpPage;
1188 aStream: TStream);
1189 begin
1190 ConvertPage(Page);
1191 SavePageToStream(Page,aStream);
1192 Page.ClearConversion;
1193 end;
1194
TWiki2HelpConverter.PageToFilenamenull1195 function TWiki2HelpConverter.PageToFilename(Page: string; IsInternalLink,
1196 Full: boolean): string;
1197 begin
1198 Result:=WikiPageToFilename(Page,IsInternalLink,false);
1199 end;
1200
TWiki2HelpConverter.PageToFilenamenull1201 function TWiki2HelpConverter.PageToFilename(Page: TW2XHTMLPage; Full: boolean
1202 ): string;
1203 begin
1204 Result:=Page.WikiDocumentName;
1205 end;
1206
1207 procedure TWiki2HelpConverter.LoadPages;
1208 begin
1209 Help.EnterCritSect;
1210 try
1211 Help.fProgressStep:=whpsWikiLoadPages;
1212 Help.fProgressCount:=0;
1213 Help.fProgressMax:=Count;
1214 finally
1215 Help.LeaveCritSect;
1216 end;
1217 ProcThreadPool.DoParallel(@ParallelLoadPage,0,(Count-1) div PagesPerThread);
1218 end;
1219
1220 constructor TWiki2HelpConverter.Create;
1221 begin
1222 inherited Create;
1223 AvailableImages:=TFilenameToStringTree.Create(true);
1224 fPageClass:=TW2HelpPage;
1225 PagesPerThread:=100;
1226 end;
1227
1228 procedure TWiki2HelpConverter.Clear;
1229 begin
1230 inherited Clear;
1231 AvailableImages.Clear;
1232 end;
1233
1234 destructor TWiki2HelpConverter.Destroy;
1235 begin
1236 inherited Destroy;
1237 FreeAndNil(AvailableImages);
1238 end;
1239
1240 { TWikiHelpThread }
1241
1242 procedure TWikiHelpThread.Execute;
1243 var
1244 FileInfo: TSearchRec;
1245 Files: TStringList;
1246 i: Integer;
1247 Filename: String;
1248 StartTime: TDateTime;
1249 EndTime: TDateTime;
1250 begin
1251 {$IF FPC_FULLVERSION<30000}
1252 CurrentThread:=Self;
1253 {$ENDIF}
1254 try
1255 Files:=nil;
1256 try
1257 StartTime:=Now;
1258 Log('TWikiHelpThread.Execute START XMLDirectory="'+Help.XMLDirectory+'"');
1259
1260 Files:=TStringList.Create;
1261 try
1262 Help.Converter.OnLog:=@ConverterLog;
1263 // get all wiki xml files
1264 if FindFirstUTF8(Help.XMLDirectory+AllFilesMask,faAnyFile,FileInfo)=0 then begin
1265 repeat
1266 if CompareFileExt(FileInfo.Name,'.xml',false)<>0 then continue;
1267 Files.Add(FileInfo.Name);
1268 until FindNextUTF8(FileInfo)<>0;
1269 end;
1270 FindCloseUTF8(FileInfo);
1271
1272 // add file names to converter
1273 for i:=0 to Files.Count-1 do begin
1274 Filename:=Help.XMLDirectory+Files[i];
1275 Help.Converter.AddWikiPage(Filename,false);
1276 end;
1277 if Help.Aborting then exit;
1278
1279 // load xml files
1280 Help.Converter.LoadPages;
1281 if Help.Aborting then exit;
1282
1283 // extract texts
1284 Help.Converter.ConvertInit;
1285 if Help.Aborting then exit;
1286 Help.Converter.ExtractAllTexts;
1287 if Help.Aborting then exit;
1288
1289 fCompleted:=true;
1290 EndTime:=Now;
1291 Help.fWikiLoadTimeMSec:=round(Abs(EndTime-StartTime)*86400000);
1292 Log('TWikiHelpThread.Execute SCAN complete XMLDirectory="'+Help.XMLDirectory+'" '+dbgs(Help.fWikiLoadTimeMSec)+'msec');
1293 finally
1294 Files.Free;
1295 Help.Converter.OnLog:=nil;
1296 end;
1297 except
1298 on E: Exception do begin
1299 Log('TWikiHelpThread.Execute error: '+E.Message);
1300 end;
1301 end;
1302 finally
1303 Scanned;
1304 {$IF FPC_FULLVERSION<30000}
1305 CurrentThread:=nil;
1306 {$ENDIF}
1307 end;
1308 end;
1309
1310 procedure TWikiHelpThread.MainThreadLog;
1311 // called in main thread
1312 begin
1313 DebugLn(fLogMsg);
1314 end;
1315
1316 procedure TWikiHelpThread.Log(Msg: string);
1317 begin
1318 fLogMsg:=Msg;
1319 CurrentThread.Synchronize(@MainThreadLog);
1320 end;
1321
1322 procedure TWikiHelpThread.ConverterLog(Msg: string);
1323 begin
1324 {$IFDEF VerboseWikiHelp}
1325 Log(Msg);
1326 {$ENDIF}
1327 end;
1328
1329 procedure TWikiHelpThread.Scanned;
1330 // called in this thread
1331 begin
1332 Help.EnterCritSect;
1333 try
1334 Help.FScanThread:=nil;
1335 if fCompleted then
1336 Help.fProgressStep:=whpsWikiLoadComplete
1337 else
1338 Help.fProgressStep:=whpsNone;
1339 finally
1340 Help.LeaveCritSect;
1341 end;
1342 Synchronize(@Help.Scanned);
1343 end;
1344
1345 { TWikiHelp }
1346
1347 procedure TWikiHelp.SetImagesDirectory(AValue: string);
1348 var
1349 NewDir: String;
1350 begin
1351 NewDir:=TrimAndExpandDirectory(TrimFilename(AValue));
1352 if Converter.ImagesDir=NewDir then Exit;
1353 AbortLoading(true);
1354 Converter.ImagesDir:=NewDir;
1355 end;
1356
1357 procedure TWikiHelp.SetMaxResults(AValue: integer);
1358 begin
1359 if FMaxResults=AValue then Exit;
1360 FMaxResults:=AValue;
1361 end;
1362
1363 procedure TWikiHelp.SetQuery(AValue: TWikiHelpQuery);
1364 begin
1365 if FQuery=AValue then Exit;
1366 FQuery:=AValue;
1367 end;
1368
GetImagesDirectorynull1369 function TWikiHelp.GetImagesDirectory: string;
1370 begin
1371 Result:=Converter.ImagesDir;
1372 end;
1373
1374 procedure TWikiHelp.SetXMLDirectory(AValue: string);
1375 var
1376 NewDir: String;
1377 begin
1378 NewDir:=TrimAndExpandDirectory(TrimFilename(AValue));
1379 if FXMLDirectory=NewDir then Exit;
1380 AbortLoading(true);
1381 FXMLDirectory:=NewDir;
1382 end;
1383
1384 procedure TWikiHelp.EnterCritSect;
1385 begin
1386 EnterCriticalsection(FCritSec);
1387 end;
1388
1389 procedure TWikiHelp.LeaveCritSect;
1390 begin
1391 LeaveCriticalsection(FCritSec);
1392 end;
1393
1394 procedure TWikiHelp.Scanned;
1395 begin
1396 if Assigned(OnScanned) then
1397 OnScanned(Self);
1398 DoSearch;
1399 end;
1400
1401 procedure TWikiHelp.DoSearch;
1402 var
1403 StartTime: TDateTime;
1404 EndTime: TDateTime;
1405 FoundPages: TFPList;
1406 i: Integer;
1407 Page: TW2HelpPage;
1408 Node: TWHTextNode;
1409 s: String;
1410 HTML: String;
1411 begin
1412 FResultsHTML:='';
1413 if (Query=nil) or (Query.Phrases.Count=0) then begin
1414 EnterCritSect;
1415 try
1416 fProgressStep:=whpsWikiLoadComplete;
1417 finally
1418 LeaveCritSect;
1419 end;
1420 end else begin
1421 StartTime:=Now;
1422 //debugln(['TWikiHelp.DoSearch START Search=',Trim(Query.Phrases.Text),' Lang="',Query.Languages,'"']);
1423 FoundPages:=nil;
1424 Converter.Search(Query,FoundPages);
1425 HTML:='<html>'+LineEnding
1426 +'<head>'+LineEnding
1427 +' <meta content="text/html; charset=utf-8" http-equiv="Content-Type">'+LineEnding;
1428 if ResultsCSSURL<>'' then
1429 HTML+=' <link href="'+ResultsCSSURL+'" type="text/css" rel="stylesheet">'+LineEnding;
1430 HTML+='</head>'+LineEnding
1431 +'<body>'+LineEnding;
1432 for i:=0 to Min(FoundPages.Count-1,MaxResults) do begin
1433 Page:=TW2HelpPage(FoundPages[i]);
1434 //debugln(['TWikiHelp.DoSearch ',Page.WikiDocumentName,' ',Page.WikiLanguage,' ',WikiPageHasLanguage(Page.WikiDocumentName,Query.Languages)]);
1435 Node:=Page.GetNodeHighestScore(Query);
1436 s:='<div class="wikiSearchResultItem">'+FoundNodeToHTMLSnippet(Page,Node,Query)+'</div>'+LineEnding;
1437 //debugln(['TWikiHelp.TestSearch Score=',Page.Score,' HTML="',s,'"']);
1438 HTML+=s;
1439 end;
1440 HTML+='</body>'+LineEnding
1441 +'</html>'+LineEnding;
1442 FResultsHTML:=HTML;
1443 FoundPages.Free;
1444 EndTime:=Now;
1445 fWikiSearchTimeMSec:=round(Abs(EndTime-StartTime)*86400000);
1446 EnterCritSect;
1447 try
1448 fProgressStep:=whpsWikiSearchComplete;
1449 finally
1450 LeaveCritSect;
1451 end;
1452 //debugln(['TWikiHelp.DoSearch END Search="',Trim(Query.Phrases.Text),'" ',dbgs(fWikiSearchTimeMSec)+'msec']);
1453 end;
1454 if Assigned(OnSearched) then
1455 OnSearched(Self);
1456 end;
1457
FoundNodeToHTMLSnippetnull1458 function TWikiHelp.FoundNodeToHTMLSnippet(aPage: TW2HelpPage;
1459 aNode: TWHTextNode; aQuery: TWikiHelpQuery): string;
1460 var
1461 HeaderNode: TWHTextNode;
1462 begin
1463 // link to the page
1464 Result:='<a href="'+StrToXMLValue(aPage.WikiDocumentName)+'"'
1465 +' class="wikiLinkPage"'
1466 +' alt="'+StrToXMLValue(aPage.WikiPage.Title)+'"'
1467 +'>'
1468 +TextToHTMLSnipped(aPage.WikiPage.Title,aQuery.LoPhrases,200)+'</a><br>'+LineEnding;
1469 if aNode=nil then begin
1470 // get the first node with some text
1471 aNode:=aPage.WHRoot;
1472 while (aNode<>nil) and (UTF8Trim(aNode.Txt)='') do
1473 aNode:=aNode.Next;
1474 end;
1475 if aNode<>nil then begin
1476 //debugln(['TWikiHelp.FoundNodeToHTMLSnippet ',dbgs(aNode.Typ),' Txt="'+dbgstr(ANode.Txt)+'"']);
1477 HeaderNode:=aNode;
1478 while (HeaderNode<>nil) and (HeaderNode.Typ<>whnHeader) do
1479 HeaderNode:=HeaderNode.Previous;
1480 if aNode=HeaderNode then begin
1481 // get the first node after the header with some text
1482 repeat
1483 aNode:=aNode.Next;
1484 until (aNode=nil) or (UTF8Trim(aNode.Txt)<>'');
1485 end;
1486 if HeaderNode<>nil then begin
1487 // add a direct link to the sub topic
1488 Result+='Topic <a href="'+StrToXMLValue(aPage.WikiDocumentName+'#'+WikiHeaderToLink(HeaderNode.Txt))+'"'
1489 +' class="wikiLinkTopic"'
1490 +' alt="'+StrToXMLValue(HeaderNode.Txt)+'"'
1491 +'>'
1492 +TextToHTMLSnipped(HeaderNode.Txt,aQuery.LoPhrases,200)+'</a>: ';
1493 end;
1494 if aNode<>nil then begin
1495 // add text
1496 Result+=TextToHTMLSnipped(aNode.Txt,aQuery.LoPhrases,200);
1497 end;
1498 end;
1499 end;
1500
1501 constructor TWikiHelp.Create(AOwner: TComponent);
1502 begin
1503 InitCriticalSection(FCritSec);
1504 inherited Create(AOwner);
1505 FConverter:=TWiki2HelpConverter.Create;
1506 FConverter.CodeTags:=WikiCreateCommonCodeTagList(true);
1507 FConverter.FHelp:=Self;
1508 FDefaultScoring:=TWHScoring.Create;
1509 with FDefaultScoring do begin
1510 Phrases[whfcPageTitle,whfsWholeWord]:=128;
1511 Phrases[whfcPageTitle,whfsPart]:=64;
1512 Phrases[whfcHeader,whfsWholeWord]:=32;
1513 Phrases[whfcHeader,whfsPart]:=16;
1514 Phrases[whfcText,whfsWholeWord]:=8;
1515 Phrases[whfcText,whfsPart]:=4;
1516 Phrases[whfcLink,whfsWholeWord]:=2;
1517 Phrases[whfcLink,whfsPart]:=1;
1518 end;
1519 FMaxResults:=10;
1520 fProgressStep:=whpsNone;
1521 end;
1522
1523 destructor TWikiHelp.Destroy;
1524 begin
1525 AbortLoading(true);
1526 FConverter.CodeTags.Free;
1527 FreeAndNil(FConverter);
1528 FreeAndNil(FDefaultScoring);
1529 FreeAndNil(FQuery);
1530 inherited Destroy;
1531 DoneCriticalsection(FCritSec);
1532 end;
1533
1534 procedure TWikiHelp.StartLoading;
1535 begin
1536 if not DirPathExists(XMLDirectory) then
1537 raise Exception.Create('TWikiHelp.StartScan XMLDirectory not found: '+XMLDirectory);
1538 if not DirPathExists(ImagesDirectory) then
1539 raise Exception.Create('TWikiHelp.StartScan ImagesDirectory not found: '+ImagesDirectory);
1540 EnterCritSect;
1541 try
1542 if fProgressStep>whpsNone then exit;
1543 fProgressStep:=whpsWikiScanDir;
1544 fWikiLoadTimeMSec:=0;
1545 fProgressCount:=0;
1546 fProgressMax:=0;
1547 FScanThread:=TWikiHelpThread.Create(true);
1548 FScanThread.FreeOnTerminate:=true;
1549 FScanThread.Help:=Self;
1550 {$IF FPC_FULLVERSION<=20403}
1551 FScanThread.Resume;
1552 {$ELSE}
1553 FScanThread.Start;
1554 {$ENDIF}
1555 finally
1556 LeaveCritSect;
1557 end;
1558 end;
1559
LoadingContentnull1560 function TWikiHelp.LoadingContent: boolean;
1561 begin
1562 Result:=(fProgressStep>whpsNone) and (fProgressStep<whpsWikiLoadComplete);
1563 end;
1564
1565 procedure TWikiHelp.AbortLoading(Wait: boolean);
1566 begin
1567 EnterCritSect;
1568 try
1569 if not LoadingContent then exit;
1570 FAborting:=true;
1571 finally
1572 LeaveCritSect;
1573 end;
1574 if not Wait then exit;
1575 while LoadingContent do
1576 Sleep(10);
1577 EnterCritSect;
1578 try
1579 FAborting:=false;
1580 finally
1581 LeaveCritSect;
1582 end;
1583 end;
1584
LoadCompletenull1585 function TWikiHelp.LoadComplete: boolean;
1586 begin
1587 Result:=(fProgressStep>=whpsWikiLoadComplete);
1588 end;
1589
CollectAllLanguagesnull1590 function TWikiHelp.CollectAllLanguages(AsCaption: boolean): TStrings;
1591
1592 procedure Add(Code: string);
1593 begin
1594 if AsCaption then
1595 Code:=WikiLangCodeToCaption(Code);
1596 CollectAllLanguages.Add(Code);
1597 end;
1598
1599 var
1600 Codes: String;
1601 p: SizeInt;
1602 Code: String;
1603 begin
1604 Result:=TStringList.Create;
1605 Add('');
1606 if LoadComplete then begin
1607 Codes:=Converter.CollectAllLangCodes(';')+';';
1608 repeat
1609 p:=Pos(';',Codes);
1610 if p<1 then p:=length(Codes)+1;
1611 Code:=LeftStr(Codes,p-1);
1612 Delete(Codes,1,p);
1613 if Code<>'' then
1614 Add(Code);
1615 until Codes='';
1616 end;
1617 end;
1618
GetProgressCaptionnull1619 function TWikiHelp.GetProgressCaption: string;
1620 begin
1621 EnterCritSect;
1622 try
1623 case fProgressStep of
1624 whpsNone: Result:='Wiki not yet loaded.';
1625 whpsWikiScanDir: Result:='Scanning Wiki directory ...';
1626 whpsWikiLoadPages: Result:='Loaded '+IntToStr(fProgressCount)+' of '+IntToStr(fProgressMax)+' Wiki pages.';
1627 whpsWikiExtractPageTexts: Result:='Read '+IntToStr(fProgressCount)+' of '+IntToStr(fProgressMax)+' Wiki pages.';
1628 whpsWikiLoadComplete: Result:='Loaded '+IntToStr(Converter.Count)+' Wiki pages in '+IntToStr(fWikiLoadTimeMSec)+'msec.';
1629 whpsWikiSearch: Result:='Searched '+IntToStr(fProgressCount)+' of '+IntToStr(fProgressMax)+' Wiki pages.';
1630 whpsWikiSearchComplete: Result:='Searched '+IntToStr(Converter.Count)+' Wiki pages in '+IntToStr(fWikiSearchTimeMSec)+'msec.';
1631 else Result:='unknown step: '+IntToStr(ord(fProgressStep));
1632 end;
1633 finally
1634 LeaveCritSect;
1635 end;
1636 end;
1637
Busynull1638 function TWikiHelp.Busy: boolean;
1639 begin
1640 Result:=not (fProgressStep in [whpsWikiLoadComplete,whpsWikiSearchComplete]);
1641 end;
1642
1643 procedure TWikiHelp.Search(const Term: string; const Languages: string;
1644 Scoring: TWHScoring; FreeScoring: boolean);
1645 var
1646 aQuery: TWikiHelpQuery;
1647 begin
1648 if Scoring=nil then Scoring:=DefaultScoring;
1649 aQuery:=TWikiHelpQuery.Create(Term,Languages,
1650 Scoring,FreeScoring and (Scoring<>DefaultScoring));
1651 try
1652 Search(aQuery);
1653 finally
1654 aQuery.Free;
1655 end;
1656 end;
1657
1658 procedure TWikiHelp.Search(aQuery: TWikiHelpQuery);
1659 begin
1660 EnterCritSect;
1661 try
1662 if aQuery=nil then exit;
1663 if FQuery=nil then
1664 // first query
1665 FQuery:=TWikiHelpQuery.Clone(aQuery)
1666 else if FQuery.Equals(aQuery) then begin
1667 // same query
1668 //debugln(['TWikiHelp.Search same query ',FQuery=aQuery,' ',FQuery.Scoring.Equals(aQuery.Scoring),' fquery.scoring=',FQuery.Scoring.Phrases[whfcPageTitle,whfsWholeWord],' aquery.scoring=',aQuery.Scoring.Phrases[whfcPageTitle,whfsWholeWord]]);
1669 exit;
1670 end else
1671 FQuery.Assign(aQuery);
1672 if LoadingContent then exit;
1673 finally
1674 LeaveCritSect;
1675 end;
1676 //debugln(['TWikiHelp.Search searching']);
1677 DoSearch;
1678 end;
1679
1680 procedure TWikiHelp.SavePageToStream(DocumentName: string; aStream: TStream);
1681 var
1682 Page: TW2HelpPage;
1683 begin
1684 Page:=TW2HelpPage(Converter.GetPageWithDocumentName(DocumentName));
1685 if Page=nil then
1686 raise Exception.Create('document "'+DocumentName+'" not found in wiki');
1687 Converter.SavePageAsHTMLToStream(Page,aStream);
1688 end;
1689
1690 end.
1691
1692