1 { Converter for wiki pages to a chm file
2 
3   Copyright (C) 2012  Mattias Gaertner  mattias@freepascal.org
4 
5   This source is free software; you can redistribute it and/or modify it under
6   the terms of the GNU General Public License as published by the Free
7   Software Foundation; either version 2 of the License, or (at your option)
8   any later version.
9 
10   This code is distributed in the hope that it will be useful, but WITHOUT ANY
11   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
13   details.
14 
15   A copy of the GNU General Public License is available on the World Wide Web
16   at <http://www.gnu.org/copyleft/gpl.html>. You can also obtain it by writing
17   to the Free Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
18   Boston, MA 02110-1335, USA.
19 
20 
21 ToDo:
22   Full text search
23   images
24 
25 }
26 unit Wiki2CHMConvert;
27 
28 {$mode objfpc}{$H+}
29 
30 { $DEFINE VerboseCHMIndex}
31 { $DEFINE EnableWikiCHMWriter}
32 
33 interface
34 
35 uses
36   Classes, SysUtils,
37   {$IFDEF EnableWikiCHMWriter}
38   wikichmwriter, wikichmfilewriter, wikichmsitemap,
39   {$ELSE}
40   chmwriter, chmfilewriter, chmsitemap,
41   {$ENDIF}
42   // LazUtils
43   LazLoggerBase, LazUTF8, LazFileUtils, AvgLvlTree,
44   // LazWiki
45   Wiki2HTMLConvert, Wiki2XHTMLConvert;
46 
47 const
48   CHMImagesDir = '/images/';
49 type
50 
51   { TW2CHMPage
52     for future extensions and descendants }
53 
54   TW2CHMPage = class(TW2HTMLPage)
55   public
56   end;
57 
58   { TWiki2CHMConverter }
59 
60   TWiki2CHMConverter = class(TWiki2HTMLConverter)
61   private
62     FCHMFile: string;
63     FTOCRootName: String;
64     FIndexFileName, FTOCFileName: String;
65     FTocSitemap, FIndexSitemap: TCHMSiteMap;
66     FTocStream, FIndexStream: TMemoryStream;
67     procedure SetCHMFile(AValue: string);
68     procedure SetIndexFileName(AValue: string);
69     procedure SetTOCFilename(AValue: string);
70   protected
71     Writer: TChmWriter;
72     FilesCompressed: integer;
73     DocumentNameToPage: TStringToPointerTree; // Page.WikiDocumentName+'.html' to Page
74     procedure AddIndexItem(AText, AUrl: String); override;
75     procedure AddTocItem(ALevel: Integer; AText, AUrl: String); override;
76     procedure ConvertInit; override;
OnWriterGetFileDatanull77     function OnWriterGetFileData(const DataName: String; out PathInChm: String;
78       out FileName: String; var Stream: TStream): Boolean;
79     procedure OnWriterLastFileAdded(Sender: TObject);
GetImageLinknull80     function GetImageLink(ImgFilename: string): string; override;
GetInternalImageLinknull81     function GetInternalImageLink(ImgFilename: String): String; override;
GetPageLinknull82     function GetPageLink(Page: TW2XHTMLPage): string; override;
83     procedure SaveAllPages; override;
84   public
85     constructor Create; override;
86     destructor Destroy; override;
87     procedure Clear; override;
GetRelativeCSSFileNamenull88     function GetRelativeCSSFileName: String; override;
89     property CHMFile: string read FCHMFile write SetCHMFile;
90     property IndexFileName: string read FIndexFileName write SetIndexFileName;
91     property TOCFileName: String read FTOCFilename write SetTOCFilename;
92     property TOCRootName: String read FTOCRootName write FTOCRootName;
93   end;
94 
95 implementation
96 
97 { TWiki2HTMLConverter }
98 
OnWriterGetFileDatanull99 function TWiki2CHMConverter.OnWriterGetFileData(const DataName: String; out
100   PathInChm: String; out FileName: String; var Stream: TStream): Boolean;
101 var
102   Page: TW2CHMPage;
103   ImgFilename: String;
104 begin
105   Stream.Size:=0;
106   inc(FilesCompressed);
107   debugln(['chm processing ',FilesCompressed,' of ',Writer.FilesToCompress.Count,' "',DataName,'" ...']);
108   Result := False; // Return true to abort compressing files
109   Filename:=ExtractFileName(DataName);
110   PathInChm:='/'+ExtractFilePath(DataName);
111   // cleanup string
112   PathInChm:=StringReplace(PathInChm, '\','/',[rfReplaceAll]);
113   PathInChm:=StringReplace(PathInChm, '//','/',[rfReplaceAll]);
114   Page:=TW2CHMPage(DocumentNameToPage[DataName]);
115   if Page<>nil then begin
116     // a page
117     SavePageToStream(Page,Stream);
118   end else if copy(DataName,1,length(CHMImagesDir))=CHMImagesDir then begin
119     // an image
120     ImgFilename:=ImagesDir+copy(DataName,length(CHMImagesDir)+1,length(DataName));
121     //debugln(['TWiki2CHMConverter.OnWriterGetFileData img="',DataName,'" File="',ImgFilename,'" FileSize=',FileSize(ImgFilename)]);
122     TMemoryStream(Stream).LoadFromFile(ImgFilename);
123   end else if DataName=GetRelativeCSSFilename then begin
124     // the css file
125     TMemoryStream(Stream).LoadFromFile(CSSFilename);
126   end else
127    raise Exception.Create('TWiki2CHMConverter.OnWriterGetFileData failed DataName="'+dbgstr(DataName)+'"');
128 end;
129 
130 procedure TWiki2CHMConverter.AddIndexItem(AText, AUrl: String);
131 var
132   {$IF FPC_FULLVERSION>=30200}
133   x: integer;
134   {$ENDIF}
135   AItem: TCHMSiteMapItem;
136   i: Integer;
137   txt, url, itemtxt, itemurl, itemlocal: String;
138 begin
139   // Avoid empty index data
140   if (AText = '') or (AUrl = '') then
141     exit;
142 
143   AText := EscapeToHTML(AText);
144 
145   // Avoid duplicate index items.
146   txt := UTF8Trim(UTF8Lowercase(AText));
147   url := UTF8Trim(UTF8Lowercase(AUrl));
148   for i:=0 to FIndexSiteMap.Items.Count-1 do begin
149     AItem := FIndexSiteMap.Items.Item[i];
150     itemtxt := UTF8Lowercase(AItem.Text);
151     {$IF FPC_FULLVERSION>=30200}
152     URL:='';
153     for x:=0 to AItem.SubItemcount-1 do
154     begin
155       URL:=AItem.SubItem[x].URL;
156       if URL<>'' then
157         break;
158       URL:=AItem.SubItem[x].Local;
159       if URL<>'' then
160         break;
161     end;
162     {$ELSE}
163     URL:=AItem.URL;
164     {$ENDIF}
165     itemurl := UTF8Lowercase(URL);
166     itemlocal := UTF8Lowercase(AItem.Local);
167     if (txt = itemtxt) and ((url = itemurl) or (url = itemlocal)) then
168       exit;
169   end;
170 
171   AItem := FIndexSiteMap.Items.NewItem;
172   AItem.Text := UTF8Trim(AText);
173   {$IF FPC_FULLVERSION>=30200}
174   AItem.AddURL(AUrl);
175   {$ELSE}
176   AItem.Local := Trim(AUrl);
177   AItem.Keyword := UTF8Trim(AText);
178   {$ENDIF}
179 end;
180 
181 procedure TWiki2CHMConverter.AddTocItem(ALevel: Integer; AText, AUrl: String);
182 // Is called whenever a new node is added to the xml TOC
183 
NewItemAtLevelnull184   function NewItemAtLevel(ALevel: Integer): TChmSiteMapItem;
185   var
186     item: TChmSiteMapItem;
187     items: TChmSiteMapItems;
188     level: Integer;
189   begin
190     level := 0;
191     items := FTOCSiteMap.Items;
192     while level < ALevel do begin
193       if items.Count = 0 then
194         item := items.NewItem
195       else
196         item := items.Item[items.Count-1];
197       items := item.Children;
198       inc(level);
199     end;
200     Result := items.NewItem;
201   end;
202 
203 var
204   item: TCHMSitemapItem;
205 begin
206   item := NewItemAtLevel(ALevel);
207   {$IF FPC_FULLVERSION>=30200}
208   item.AddURL(AUrl);
209   {$ELSE}
210   item.Local := AUrl;
211   {$ENDIF}
212   item.Text := EscapeToHTML(AText);
213   item.ImageNumber := 0;
214 end;
215 
CompareIndexnull216 function CompareIndex(Item1, Item2: Pointer): Integer;
217 var
218   indexItem1, indexItem2: TChmSiteMapItem;
219 begin
220   indexItem1 := TChmSiteMapItem(Item1);
221   indexItem2 := TChmSiteMapItem(Item2);
222   Result := UTF8CompareStr(UTF8Lowercase(indexItem1.Text), UTF8Lowercase(indexItem2.Text));
223 end;
224 
225 procedure TWiki2CHMConverter.OnWriterLastFileAdded(Sender: TObject);
226 var
227   CurWriter: TChmWriter;
228 begin
229   // Assign the TOC and index files
230   CurWriter := TChmWriter(Sender);
231   if CurWriter=nil then exit;
232 
233   // write Index (see TChmProject.LastFileAdded)
234   if (IndexFileName <> '') then begin
235     FIndexStream := TMemoryStream.Create;
236     FIndexSiteMap.Items.Sort(@CompareIndex);
237     FIndexSiteMap.SaveToStream(FIndexStream);
238     CurWriter.AppendIndex(FIndexStream);
239     CurWriter.AppendBinaryIndexFromSitemap(FIndexSitemap,false);
240   end;
241 
242   // write TOC (see TChmProject.LastFileAdded)
243   if (TOCFileName <> '') then begin
244     FTOCStream := TMemoryStream.Create;
245     FTocSitemap.SaveToStream(FTOCStream);
246     CurWriter.AppendTOC(FTocStream);
247     CurWriter.AppendBinaryTOCFromSiteMap(FTOCSitemap);
248   end;
249 end;
250 
GetImageLinknull251 function TWiki2CHMConverter.GetImageLink(ImgFilename: string): string;
252 begin
253   Result:=CHMImagesDir+ExtractFileName(ImgFilename);
254 end;
255 
TWiki2CHMConverter.GetInternalImageLinknull256 function TWiki2CHMConverter.GetInternalImageLink(ImgFilename: String): String;
257 begin
258   Result := CHMImagesDir + 'internal/' + ExtractFilename(ImgFilename);
259 end;
260 
GetPageLinknull261 function TWiki2CHMConverter.GetPageLink(Page: TW2XHTMLPage): string;
262 begin
263 //  Result:=Page.WikiDocumentName+'.html';
264   Result :=Page.Filename;
265 end;
266 
GetRelativeCSSFilenamenull267 function TWiki2CHMConverter.GetRelativeCSSFilename: String;
268 begin
269   Result := ExtractFileName(CSSFilename);
270 end;
271 
272 procedure TWiki2CHMConverter.SetCHMFile(AValue: string);
273 var
274   NewValue: String;
275 begin
276   NewValue:=TrimFilename(AValue);
277   if FCHMFile=NewValue then Exit;
278   FCHMFile:=NewValue;
279 end;
280 
281 procedure TWiki2CHMConverter.SetIndexFileName(AValue: string);
282 begin
283   if FIndexFileName=AValue then Exit;
284   FIndexFileName:=AValue;
285 end;
286 
287 procedure TWiki2CHMConverter.SetTOCFilename(AValue: String);
288 begin
289   if FTOCFileName = AValue then Exit;
290   FTOCFilename := AValue;
291 end;
292 
293 procedure TWiki2CHMConverter.ConvertInit;
294 var
295   i: Integer;
296   Page: TW2CHMPage;
297 begin
298   inherited ConvertInit;
299 
300   if CHMFile='' then
301     raise Exception.Create('chm file not set');
302 
303   FIndexSitemap := TChmSitemap.Create(stIndex);
304   FTOCSitemap := TChmSitemap.Create(stTOC);
305   AddTocItem(0, FTOCRootName, '');
306 
307   for i:=0 to Count-1 do begin
308     Page:=TW2CHMPage(Pages[i]);
309     DocumentNameToPage[GetPageLink(Page)]:=Page;
310   end;
311 end;
312 
313 procedure TWiki2CHMConverter.SaveAllPages;
314 var
315   ms: TMemoryStream;
316   Filename: String;
317   i: Integer;
318   F2PItem: PStringToPointerTreeItem;
319 begin
320   Filename:=CHMFile;
321   debugln(['initializing chm ...']);
322   Writer:=nil;
323   ms:=TMemoryStream.Create;
324   try
325     Writer:=TChmWriter.Create(ms,false);
326     // see TChmProject.WriteChm();
327 
328     // our callback to get data
329     Writer.OnGetFileData :=@OnWriterGetFileData;
330     Writer.OnLastFile    :=@OnWriterLastFileAdded;
331     FilesCompressed:=0;
332 
333     // give it the html files
334     for i:=0 to Count-1 do
335       Writer.FilesToCompress.Add(GetPageLink(TW2CHMPage(Pages[i])));
336 
337     // give it the css file
338     Writer.FilesToCompress.Add(GetRelativeCSSFilename);
339 
340     // give it the image files
341     for F2PItem in UsedImages do
342       Writer.FilesToCompress.Add(F2PItem^.Name);
343 
344     // now some settings in the chm
345     Writer.DefaultPage := GetPageLink(TW2CHMPage(Pages[0]));
346     Writer.Title := Title;
347     //Writer.DefaultFont := DefaultFont;
348     Writer.FullTextSearch := true;
349     Writer.HasBinaryIndex := true;
350     Writer.IndexName := IndexFileName;
351     Writer.HasBinaryTOC := true;
352     Writer.TOCName := FTOCFileName;
353     //Writer.ReadmeMessage := ReadmeMessage;
354     {for i:=0 to Count-1 do
355       begin
356         ContextNode:=TChmContextNode(files.objects[i]);
357         if assigned(ContextNode) and (ContextNode.contextnumber<>0) then
358           Writer.AddContext(ContextNode.ContextNumber,files[i]);
359       end;}
360     //if FWindows.Count>0 then
361     //  Writer.Windows:=FWIndows;
362 
363     if assigned(FTocSiteMap) then
364       Writer.TocSitemap := FTocSiteMap;
365 
366     // and write!
367     debugln(['creating chm ...']);
368     Writer.Execute;
369 
370     debugln(['writing file "',Filename,'" ...']);
371     ms.SaveToFile(Filename);
372   finally
373     FreeAndNil(Writer);
374     ms.Free;
375   end;
376 end;
377 
378 constructor TWiki2CHMConverter.Create;
379 begin
380   inherited Create;
381   FPageClass:=TW2CHMPage;
382   FOutputDir:='chm';
383   FCHMFile:='wiki.chm';
384   FIndexFileName:='_index.hhk';
385   FTOCFileName:='_table_of_contents.hhc';
386   FTOCRootName:='Pages available';
387   DocumentNameToPage:=TStringToPointerTree.Create(true);
388 end;
389 
390 destructor TWiki2CHMConverter.Destroy;
391 begin
392   FreeAndNil(FTocSiteMap);
393   FreeAndNil(FTocStream);
394   FreeAndNil(FIndexSitemap);
395   FreeAndNil(FIndexStream);
396   FreeAndNil(DocumentNameToPage);
397   inherited Destroy;
398 end;
399 
400 procedure TWiki2CHMConverter.Clear;
401 begin
402   if DocumentNameToPage <> nil then
403     DocumentNameToPage.Clear;
404   inherited Clear;
405 end;
406 
407 end.
408 
409