1 #include "docbookxslt.h"
2 #include "docbookxslt_p.h"
3 
4 #ifdef Q_OS_WIN
5 // one of the xslt/xml headers pulls in windows.h and breaks <limits>
6 #define NOMINMAX
7 #include <QHash>
8 #endif
9 
10 #include "../config-kdoctools.h"
11 #include "loggingcategory.h"
12 
13 #include <libxml/catalog.h>
14 #include <libxml/parser.h>
15 #include <libxml/parserInternals.h>
16 #include <libxml/xmlIO.h>
17 #include <libxslt/transform.h>
18 #include <libxslt/xsltInternals.h>
19 #include <libxslt/xsltconfig.h>
20 #include <libxslt/xsltutils.h>
21 
22 #include <QByteArray>
23 #include <QDir>
24 #include <QFile>
25 #include <QStandardPaths>
26 #include <QString>
27 #include <QTextCodec>
28 #include <QUrl>
29 #include <QVector>
30 
31 #if !defined(SIMPLE_XSLT)
32 extern HelpProtocol *slave;
33 #define INFO(x)                                                                                                                                                \
34     if (slave)                                                                                                                                                 \
35         slave->infoMessage(x);
36 #else
37 #define INFO(x)
38 #endif
39 
writeToQString(void * context,const char * buffer,int len)40 int writeToQString(void *context, const char *buffer, int len)
41 {
42     QString *t = (QString *)context;
43     *t += QString::fromUtf8(buffer, len);
44     return len;
45 }
46 
47 #if defined(SIMPLE_XSLT) && defined(Q_OS_WIN)
48 
49 #define MAX_PATHS 64
50 xmlExternalEntityLoader defaultEntityLoader = NULL;
51 static xmlChar *paths[MAX_PATHS + 1];
52 static int nbpaths = 0;
53 static QHash<QString, QString> replaceURLList;
54 
55 /*
56  * Entity loading control and customization.
57  * taken from xsltproc.c
58  */
xsltprocExternalEntityLoader(const char * _URL,const char * ID,xmlParserCtxtPtr ctxt)59 static xmlParserInputPtr xsltprocExternalEntityLoader(const char *_URL, const char *ID, xmlParserCtxtPtr ctxt)
60 {
61     xmlParserInputPtr ret;
62     warningSAXFunc warning = NULL;
63 
64     // use local available dtd versions instead of fetching it every time from the internet
65     QString url = QLatin1String(_URL);
66     QHash<QString, QString>::const_iterator i;
67     for (i = replaceURLList.constBegin(); i != replaceURLList.constEnd(); i++) {
68         if (url.startsWith(i.key())) {
69             url.replace(i.key(), i.value());
70             qCDebug(KDocToolsLog) << "converted" << _URL << "to" << url;
71         }
72     }
73     char URL[1024];
74     strcpy(URL, url.toLatin1().constData());
75 
76     const char *lastsegment = URL;
77     const char *iter = URL;
78 
79     if (nbpaths > 0) {
80         while (*iter != 0) {
81             if (*iter == '/') {
82                 lastsegment = iter + 1;
83             }
84             iter++;
85         }
86     }
87 
88     if ((ctxt != NULL) && (ctxt->sax != NULL)) {
89         warning = ctxt->sax->warning;
90         ctxt->sax->warning = NULL;
91     }
92 
93     if (defaultEntityLoader != NULL) {
94         ret = defaultEntityLoader(URL, ID, ctxt);
95         if (ret != NULL) {
96             if (warning != NULL) {
97                 ctxt->sax->warning = warning;
98             }
99             qCDebug(KDocToolsLog) << "Loaded URL=\"" << URL << "\" ID=\"" << ID << "\"";
100             return (ret);
101         }
102     }
103     for (int i = 0; i < nbpaths; i++) {
104         xmlChar *newURL;
105 
106         newURL = xmlStrdup((const xmlChar *)paths[i]);
107         newURL = xmlStrcat(newURL, (const xmlChar *)"/");
108         newURL = xmlStrcat(newURL, (const xmlChar *)lastsegment);
109         if (newURL != NULL) {
110             ret = defaultEntityLoader((const char *)newURL, ID, ctxt);
111             if (ret != NULL) {
112                 if (warning != NULL) {
113                     ctxt->sax->warning = warning;
114                 }
115                 qCDebug(KDocToolsLog) << "Loaded URL=\"" << newURL << "\" ID=\"" << ID << "\"";
116                 xmlFree(newURL);
117                 return (ret);
118             }
119             xmlFree(newURL);
120         }
121     }
122     if (warning != NULL) {
123         ctxt->sax->warning = warning;
124         if (URL != NULL) {
125             warning(ctxt, "failed to load external entity \"%s\"\n", URL);
126         } else if (ID != NULL) {
127             warning(ctxt, "failed to load external entity \"%s\"\n", ID);
128         }
129     }
130     return (NULL);
131 }
132 #endif
133 
transform(const QString & pat,const QString & tss,const QVector<const char * > & params)134 QString KDocTools::transform(const QString &pat, const QString &tss, const QVector<const char *> &params)
135 {
136     QString parsed;
137 
138     INFO(i18n("Parsing stylesheet"));
139 #if defined(SIMPLE_XSLT) && defined(Q_OS_WIN)
140     // prepare use of local available dtd versions instead of fetching every time from the internet
141     // this approach is url based
142     if (!defaultEntityLoader) {
143         defaultEntityLoader = xmlGetExternalEntityLoader();
144         xmlSetExternalEntityLoader(xsltprocExternalEntityLoader);
145 
146         replaceURLList[QLatin1String("http://www.oasis-open.org/docbook/xml/4.5")] = QString("file:///%1").arg(DOCBOOK_XML_CURRDTD);
147     }
148 #endif
149 
150     xsltStylesheetPtr style_sheet = xsltParseStylesheetFile((const xmlChar *)QFile::encodeName(tss).constData());
151 
152     if (!style_sheet) {
153         return parsed;
154     }
155     if (style_sheet->indent == 1) {
156         xmlIndentTreeOutput = 1;
157     } else {
158         xmlIndentTreeOutput = 0;
159     }
160 
161     INFO(i18n("Parsing document"));
162 
163     xmlParserCtxtPtr pctxt;
164 
165     pctxt = xmlNewParserCtxt();
166     if (pctxt == nullptr) {
167         return parsed;
168     }
169 
170     xmlDocPtr doc = xmlCtxtReadFile(pctxt, QFile::encodeName(pat).constData(), nullptr, XML_PARSE_NOENT | XML_PARSE_DTDLOAD | XML_PARSE_NONET);
171     /* Clean the context pointer, now useless */
172     const bool context_valid = (pctxt->valid == 0);
173     xmlFreeParserCtxt(pctxt);
174 
175     /* Check both the returned doc (for parsing errors) and the context
176        (for validation errors) */
177     if (doc == nullptr) {
178         return parsed;
179     } else {
180         if (context_valid) {
181             xmlFreeDoc(doc);
182             return parsed;
183         }
184     }
185 
186     INFO(i18n("Applying stylesheet"));
187     QVector<const char *> p = params;
188     p.append(nullptr);
189     xmlDocPtr res = xsltApplyStylesheet(style_sheet, doc, const_cast<const char **>(&p[0]));
190     xmlFreeDoc(doc);
191     if (res != nullptr) {
192         xmlOutputBufferPtr outp = xmlOutputBufferCreateIO(writeToQString, nullptr, &parsed, nullptr);
193         outp->written = 0;
194         INFO(i18n("Writing document"));
195         xsltSaveResultTo(outp, res, style_sheet);
196         xmlOutputBufferClose(outp);
197         xmlFreeDoc(res);
198     }
199     xsltFreeStylesheet(style_sheet);
200 
201     if (parsed.isEmpty()) {
202         parsed = QLatin1Char(' '); // avoid error message
203     }
204     return parsed;
205 }
206 
207 /*
208 xmlParserInputPtr meinExternalEntityLoader(const char *URL, const char *ID,
209                        xmlParserCtxtPtr ctxt) {
210     xmlParserInputPtr ret = NULL;
211 
212     // fprintf(stderr, "loading %s %s %s\n", URL, ID, ctxt->directory);
213 
214     if (URL == NULL) {
215         if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
216             ctxt->sax->warning(ctxt,
217                     "failed to load external entity \"%s\"\n", ID);
218         return(NULL);
219     }
220     if (!qstrcmp(ID, "-//OASIS//DTD DocBook XML V4.1.2//EN"))
221         URL = "docbook/xml-dtd-4.1.2/docbookx.dtd";
222     if (!qstrcmp(ID, "-//OASIS//DTD XML DocBook V4.1.2//EN"))
223     URL = "docbook/xml-dtd-4.1.2/docbookx.dtd";
224 
225     QString file;
226     if (QFile::exists( QDir::currentPath() + "/" + URL ) )
227         file = QDir::currentPath() + "/" + URL;
228     else
229         file = locate("dtd", URL);
230 
231     ret = xmlNewInputFromFile(ctxt, file.toLatin1().constData());
232     if (ret == NULL) {
233         if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
234             ctxt->sax->warning(ctxt,
235 
236                 "failed to load external entity \"%s\"\n", URL);
237     }
238     return(ret);
239 }
240 */
241 
splitOut(const QString & parsed,int index)242 QString splitOut(const QString &parsed, int index)
243 {
244     int start_index = index + 1;
245     while (parsed.at(start_index - 1) != QLatin1Char('>')) {
246         start_index++;
247     }
248 
249     int inside = 0;
250 
251     QString filedata;
252 
253     while (true) {
254         int endindex = parsed.indexOf(QStringLiteral("</FILENAME>"), index);
255         int startindex = parsed.indexOf(QStringLiteral("<FILENAME "), index) + 1;
256 
257         // qCDebug(KDocToolsLog) << "FILENAME " << startindex << " " << endindex << " " << inside << " " << parsed.mid(startindex + 18, 15)<< " " <<
258         // parsed.length();
259 
260         if (startindex > 0) {
261             if (startindex < endindex) {
262                 // qCDebug(KDocToolsLog) << "finding another";
263                 index = startindex + 8;
264                 inside++;
265             } else {
266                 index = endindex + 8;
267                 inside--;
268             }
269         } else {
270             inside--;
271             index = endindex + 1;
272         }
273 
274         if (inside == 0) {
275             filedata = parsed.mid(start_index, endindex - start_index);
276             break;
277         }
278     }
279 
280     index = filedata.indexOf(QStringLiteral("<FILENAME "));
281 
282     if (index > 0) {
283         int endindex = filedata.lastIndexOf(QStringLiteral("</FILENAME>"));
284         while (filedata.at(endindex) != QLatin1Char('>')) {
285             endindex++;
286         }
287         endindex++;
288         filedata = filedata.left(index) + filedata.mid(endindex);
289     }
290 
291     return filedata;
292 }
293 
fromUnicode(const QString & data)294 QByteArray fromUnicode(const QString &data)
295 {
296 #ifdef Q_OS_WIN
297     return data.toUtf8();
298 #else
299     QTextCodec *locale = QTextCodec::codecForLocale();
300     QByteArray result;
301     char buffer[30000];
302     uint buffer_len = 0;
303     uint len = 0;
304     int offset = 0;
305     const int part_len = 5000;
306 
307     QString part;
308 
309     while (offset < data.length()) {
310         part = data.mid(offset, part_len);
311         QByteArray test = locale->fromUnicode(part);
312         if (locale->toUnicode(test) == part) {
313             result += test;
314             offset += part_len;
315             continue;
316         }
317         len = part.length();
318         buffer_len = 0;
319         for (uint i = 0; i < len; i++) {
320             QByteArray test = locale->fromUnicode(part.mid(i, 1));
321             if (locale->toUnicode(test) == part.mid(i, 1)) {
322                 if (buffer_len + test.length() + 1 > sizeof(buffer)) {
323                     break;
324                 }
325                 strcpy(buffer + buffer_len, test.data());
326                 buffer_len += test.length();
327             } else {
328                 QString res = QStringLiteral("&#%1;").arg(part.at(i).unicode());
329                 test = locale->fromUnicode(res);
330                 if (buffer_len + test.length() + 1 > sizeof(buffer)) {
331                     break;
332                 }
333                 strcpy(buffer + buffer_len, test.data());
334                 buffer_len += test.length();
335             }
336         }
337         result += QByteArray(buffer, buffer_len + 1);
338         offset += part_len;
339     }
340     return result;
341 #endif
342 }
343 
replaceCharsetHeader(QString & output)344 void replaceCharsetHeader(QString &output)
345 {
346     QString name;
347 #ifdef Q_OS_WIN
348     name = "utf-8";
349     // may be required for all xml output
350     if (output.contains("<table-of-contents>"))
351         output.replace(QLatin1String("<?xml version=\"1.0\"?>"), QLatin1String("<?xml version=\"1.0\" encoding=\"%1\"?>").arg(name));
352 #else
353     name = QLatin1String(QTextCodec::codecForLocale()->name());
354     name.replace(QLatin1String("ISO "), QLatin1String("iso-"));
355     output.replace(QLatin1String("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">"),
356                    QLatin1String("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=%1\">").arg(name));
357 #endif
358 }
359 
extractFileToBuffer(const QString & content,const QString & filename)360 QByteArray KDocTools::extractFileToBuffer(const QString &content, const QString &filename)
361 {
362     const int index = content.indexOf(QLatin1String("<FILENAME filename=\"%1\"").arg(filename));
363     if (index == -1) {
364         if (filename == QLatin1String("index.html")) {
365             return fromUnicode(content);
366         } else {
367             return QByteArray(); // null value, not just empty
368         }
369     }
370     QString data_file = splitOut(content, index);
371     replaceCharsetHeader(data_file);
372     return fromUnicode(data_file);
373 }
374 
375 class DtdStandardDirs
376 {
377 public:
378     QString srcdir;
379 };
380 
Q_GLOBAL_STATIC(DtdStandardDirs,s_dtdDirs)381 Q_GLOBAL_STATIC(DtdStandardDirs, s_dtdDirs)
382 
383 void KDocTools::setupStandardDirs(const QString &srcdir)
384 {
385     QByteArray catalogs;
386 
387     if (srcdir.isEmpty()) {
388         catalogs += getKDocToolsCatalogs().join(" ").toLocal8Bit();
389     } else {
390         catalogs += QUrl::fromLocalFile(srcdir + QStringLiteral("/customization/catalog.xml")).toEncoded();
391         s_dtdDirs()->srcdir = srcdir;
392     }
393     // qCDebug(KDocToolsLog) << "XML_CATALOG_FILES: " << catalogs;
394     qputenv("XML_CATALOG_FILES", catalogs);
395     xmlInitializeCatalog();
396 #if defined(_MSC_VER)
397     /* Workaround: apparently setting XML_CATALOG_FILES set here
398        has no effect on the libxml2 functions.
399        This code path could be used in all cases instead of setting the
400        variable, but this requires more investigation on the reason of
401        the issue. */
402     xmlLoadCatalogs(catalogs.constData());
403 #endif
404 }
405 
locateFileInDtdResource(const QString & file,const QStandardPaths::LocateOptions option)406 QString KDocTools::locateFileInDtdResource(const QString &file, const QStandardPaths::LocateOptions option)
407 {
408     const QStringList lst = locateFilesInDtdResource(file, option);
409     return lst.isEmpty() ? QString() : lst.first();
410 }
411 
locateFilesInDtdResource(const QString & file,const QStandardPaths::LocateOptions option)412 QStringList locateFilesInDtdResource(const QString &file, const QStandardPaths::LocateOptions option)
413 {
414     QFileInfo info(file);
415     if (info.exists() && info.isAbsolute()) {
416         return QStringList() << file;
417     }
418 
419     const QString srcdir = s_dtdDirs()->srcdir;
420     if (!srcdir.isEmpty()) {
421         const QString test = srcdir + QLatin1Char('/') + file;
422         if (QFile::exists(test)) {
423             return QStringList() << test;
424         }
425         qCDebug(KDocToolsLog) << "Could not locate file" << file << "in" << srcdir;
426         return QStringList();
427     }
428     // Using locateAll() is necessary to be able to find all catalogs when
429     // running in environments where every repository is installed in its own
430     // prefix.
431     // This is the case on build.kde.org where kdelibs4support installs catalogs
432     // in a different prefix than kdoctools.
433     const QString fileName = QStringLiteral("kf5/kdoctools/") + file;
434     QStringList result = QStandardPaths::locateAll(QStandardPaths::GenericDataLocation, fileName, option);
435 
436     // fallback to stuff installed with KDocTools
437     const QFileInfo fileInInstallDataDir(QStringLiteral(KDOCTOOLS_INSTALL_DATADIR_KF5) + QStringLiteral("/kdoctools/") + file);
438     if (fileInInstallDataDir.exists()) {
439         if ((option == QStandardPaths::LocateFile) && fileInInstallDataDir.isFile()) {
440             result.append(fileInInstallDataDir.absoluteFilePath());
441         }
442         if ((option == QStandardPaths::LocateDirectory) && fileInInstallDataDir.isDir()) {
443             result.append(fileInInstallDataDir.absoluteFilePath());
444         }
445     }
446 
447     if (result.isEmpty()) {
448         qCDebug(KDocToolsLog) << "Could not locate file" << fileName << "in" << QStandardPaths::standardLocations(QStandardPaths::GenericDataLocation);
449     }
450     return result;
451 }
452 
getKDocToolsCatalogs()453 QStringList getKDocToolsCatalogs()
454 {
455     // Find all catalogs as catalog*.xml, and add them to the list, starting
456     // from catalog.xml (the main one).
457     const QStringList dirNames = locateFilesInDtdResource(QStringLiteral("customization"), QStandardPaths::LocateDirectory);
458     if (dirNames.isEmpty()) {
459         return QStringList();
460     }
461     QStringList catalogFiles;
462     for (const QString &customizationDirName : dirNames) {
463         QDir customizationDir = QDir(customizationDirName);
464         const QStringList catalogFileFilters(QStringLiteral("catalog*.xml"));
465         const QFileInfoList catalogInfoFiles = customizationDir.entryInfoList(catalogFileFilters, QDir::Files, QDir::Name);
466         for (const QFileInfo &fileInfo : catalogInfoFiles) {
467             const QString fullFileName = QUrl::fromLocalFile(fileInfo.absoluteFilePath()).toEncoded();
468             if (fileInfo.fileName() == QStringLiteral("catalog.xml")) {
469                 catalogFiles.prepend(fullFileName);
470             } else {
471                 catalogFiles.append(fullFileName);
472             }
473         }
474     }
475 
476     QStringList catalogs;
477     for (const QString &aCatalog : std::as_const(catalogFiles)) {
478         catalogs << aCatalog;
479     }
480     // qCDebug(KDocToolsLog) << "Found catalogs: " << catalogs;
481     return catalogs;
482 }
483 
documentationDirs()484 QStringList KDocTools::documentationDirs()
485 {
486     /* List of paths containing documentation */
487     return QStandardPaths::locateAll(QStandardPaths::GenericDataLocation, QStringLiteral("doc/HTML"), QStandardPaths::LocateDirectory);
488 }
489