1 #include "docbookxslt.h"
2 #include "docbookxslt_p.h"
3
4 #ifdef Q_OS_WIN
5 // one of the xslt/xml headers pulls in windows.h and breaks <limits>
6 #define NOMINMAX
7 #include <QHash>
8 #endif
9
10 #include "../config-kdoctools.h"
11 #include "loggingcategory.h"
12
13 #include <libxml/catalog.h>
14 #include <libxml/parser.h>
15 #include <libxml/parserInternals.h>
16 #include <libxml/xmlIO.h>
17 #include <libxslt/transform.h>
18 #include <libxslt/xsltInternals.h>
19 #include <libxslt/xsltconfig.h>
20 #include <libxslt/xsltutils.h>
21
22 #include <QByteArray>
23 #include <QDir>
24 #include <QFile>
25 #include <QStandardPaths>
26 #include <QString>
27 #include <QTextCodec>
28 #include <QUrl>
29 #include <QVector>
30
31 #if !defined(SIMPLE_XSLT)
32 extern HelpProtocol *slave;
33 #define INFO(x) \
34 if (slave) \
35 slave->infoMessage(x);
36 #else
37 #define INFO(x)
38 #endif
39
writeToQString(void * context,const char * buffer,int len)40 int writeToQString(void *context, const char *buffer, int len)
41 {
42 QString *t = (QString *)context;
43 *t += QString::fromUtf8(buffer, len);
44 return len;
45 }
46
47 #if defined(SIMPLE_XSLT) && defined(Q_OS_WIN)
48
49 #define MAX_PATHS 64
50 xmlExternalEntityLoader defaultEntityLoader = NULL;
51 static xmlChar *paths[MAX_PATHS + 1];
52 static int nbpaths = 0;
53 static QHash<QString, QString> replaceURLList;
54
55 /*
56 * Entity loading control and customization.
57 * taken from xsltproc.c
58 */
xsltprocExternalEntityLoader(const char * _URL,const char * ID,xmlParserCtxtPtr ctxt)59 static xmlParserInputPtr xsltprocExternalEntityLoader(const char *_URL, const char *ID, xmlParserCtxtPtr ctxt)
60 {
61 xmlParserInputPtr ret;
62 warningSAXFunc warning = NULL;
63
64 // use local available dtd versions instead of fetching it every time from the internet
65 QString url = QLatin1String(_URL);
66 QHash<QString, QString>::const_iterator i;
67 for (i = replaceURLList.constBegin(); i != replaceURLList.constEnd(); i++) {
68 if (url.startsWith(i.key())) {
69 url.replace(i.key(), i.value());
70 qCDebug(KDocToolsLog) << "converted" << _URL << "to" << url;
71 }
72 }
73 char URL[1024];
74 strcpy(URL, url.toLatin1().constData());
75
76 const char *lastsegment = URL;
77 const char *iter = URL;
78
79 if (nbpaths > 0) {
80 while (*iter != 0) {
81 if (*iter == '/') {
82 lastsegment = iter + 1;
83 }
84 iter++;
85 }
86 }
87
88 if ((ctxt != NULL) && (ctxt->sax != NULL)) {
89 warning = ctxt->sax->warning;
90 ctxt->sax->warning = NULL;
91 }
92
93 if (defaultEntityLoader != NULL) {
94 ret = defaultEntityLoader(URL, ID, ctxt);
95 if (ret != NULL) {
96 if (warning != NULL) {
97 ctxt->sax->warning = warning;
98 }
99 qCDebug(KDocToolsLog) << "Loaded URL=\"" << URL << "\" ID=\"" << ID << "\"";
100 return (ret);
101 }
102 }
103 for (int i = 0; i < nbpaths; i++) {
104 xmlChar *newURL;
105
106 newURL = xmlStrdup((const xmlChar *)paths[i]);
107 newURL = xmlStrcat(newURL, (const xmlChar *)"/");
108 newURL = xmlStrcat(newURL, (const xmlChar *)lastsegment);
109 if (newURL != NULL) {
110 ret = defaultEntityLoader((const char *)newURL, ID, ctxt);
111 if (ret != NULL) {
112 if (warning != NULL) {
113 ctxt->sax->warning = warning;
114 }
115 qCDebug(KDocToolsLog) << "Loaded URL=\"" << newURL << "\" ID=\"" << ID << "\"";
116 xmlFree(newURL);
117 return (ret);
118 }
119 xmlFree(newURL);
120 }
121 }
122 if (warning != NULL) {
123 ctxt->sax->warning = warning;
124 if (URL != NULL) {
125 warning(ctxt, "failed to load external entity \"%s\"\n", URL);
126 } else if (ID != NULL) {
127 warning(ctxt, "failed to load external entity \"%s\"\n", ID);
128 }
129 }
130 return (NULL);
131 }
132 #endif
133
transform(const QString & pat,const QString & tss,const QVector<const char * > & params)134 QString KDocTools::transform(const QString &pat, const QString &tss, const QVector<const char *> ¶ms)
135 {
136 QString parsed;
137
138 INFO(i18n("Parsing stylesheet"));
139 #if defined(SIMPLE_XSLT) && defined(Q_OS_WIN)
140 // prepare use of local available dtd versions instead of fetching every time from the internet
141 // this approach is url based
142 if (!defaultEntityLoader) {
143 defaultEntityLoader = xmlGetExternalEntityLoader();
144 xmlSetExternalEntityLoader(xsltprocExternalEntityLoader);
145
146 replaceURLList[QLatin1String("http://www.oasis-open.org/docbook/xml/4.5")] = QString("file:///%1").arg(DOCBOOK_XML_CURRDTD);
147 }
148 #endif
149
150 xsltStylesheetPtr style_sheet = xsltParseStylesheetFile((const xmlChar *)QFile::encodeName(tss).constData());
151
152 if (!style_sheet) {
153 return parsed;
154 }
155 if (style_sheet->indent == 1) {
156 xmlIndentTreeOutput = 1;
157 } else {
158 xmlIndentTreeOutput = 0;
159 }
160
161 INFO(i18n("Parsing document"));
162
163 xmlParserCtxtPtr pctxt;
164
165 pctxt = xmlNewParserCtxt();
166 if (pctxt == nullptr) {
167 return parsed;
168 }
169
170 xmlDocPtr doc = xmlCtxtReadFile(pctxt, QFile::encodeName(pat).constData(), nullptr, XML_PARSE_NOENT | XML_PARSE_DTDLOAD | XML_PARSE_NONET);
171 /* Clean the context pointer, now useless */
172 const bool context_valid = (pctxt->valid == 0);
173 xmlFreeParserCtxt(pctxt);
174
175 /* Check both the returned doc (for parsing errors) and the context
176 (for validation errors) */
177 if (doc == nullptr) {
178 return parsed;
179 } else {
180 if (context_valid) {
181 xmlFreeDoc(doc);
182 return parsed;
183 }
184 }
185
186 INFO(i18n("Applying stylesheet"));
187 QVector<const char *> p = params;
188 p.append(nullptr);
189 xmlDocPtr res = xsltApplyStylesheet(style_sheet, doc, const_cast<const char **>(&p[0]));
190 xmlFreeDoc(doc);
191 if (res != nullptr) {
192 xmlOutputBufferPtr outp = xmlOutputBufferCreateIO(writeToQString, nullptr, &parsed, nullptr);
193 outp->written = 0;
194 INFO(i18n("Writing document"));
195 xsltSaveResultTo(outp, res, style_sheet);
196 xmlOutputBufferClose(outp);
197 xmlFreeDoc(res);
198 }
199 xsltFreeStylesheet(style_sheet);
200
201 if (parsed.isEmpty()) {
202 parsed = QLatin1Char(' '); // avoid error message
203 }
204 return parsed;
205 }
206
207 /*
208 xmlParserInputPtr meinExternalEntityLoader(const char *URL, const char *ID,
209 xmlParserCtxtPtr ctxt) {
210 xmlParserInputPtr ret = NULL;
211
212 // fprintf(stderr, "loading %s %s %s\n", URL, ID, ctxt->directory);
213
214 if (URL == NULL) {
215 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
216 ctxt->sax->warning(ctxt,
217 "failed to load external entity \"%s\"\n", ID);
218 return(NULL);
219 }
220 if (!qstrcmp(ID, "-//OASIS//DTD DocBook XML V4.1.2//EN"))
221 URL = "docbook/xml-dtd-4.1.2/docbookx.dtd";
222 if (!qstrcmp(ID, "-//OASIS//DTD XML DocBook V4.1.2//EN"))
223 URL = "docbook/xml-dtd-4.1.2/docbookx.dtd";
224
225 QString file;
226 if (QFile::exists( QDir::currentPath() + "/" + URL ) )
227 file = QDir::currentPath() + "/" + URL;
228 else
229 file = locate("dtd", URL);
230
231 ret = xmlNewInputFromFile(ctxt, file.toLatin1().constData());
232 if (ret == NULL) {
233 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
234 ctxt->sax->warning(ctxt,
235
236 "failed to load external entity \"%s\"\n", URL);
237 }
238 return(ret);
239 }
240 */
241
splitOut(const QString & parsed,int index)242 QString splitOut(const QString &parsed, int index)
243 {
244 int start_index = index + 1;
245 while (parsed.at(start_index - 1) != QLatin1Char('>')) {
246 start_index++;
247 }
248
249 int inside = 0;
250
251 QString filedata;
252
253 while (true) {
254 int endindex = parsed.indexOf(QStringLiteral("</FILENAME>"), index);
255 int startindex = parsed.indexOf(QStringLiteral("<FILENAME "), index) + 1;
256
257 // qCDebug(KDocToolsLog) << "FILENAME " << startindex << " " << endindex << " " << inside << " " << parsed.mid(startindex + 18, 15)<< " " <<
258 // parsed.length();
259
260 if (startindex > 0) {
261 if (startindex < endindex) {
262 // qCDebug(KDocToolsLog) << "finding another";
263 index = startindex + 8;
264 inside++;
265 } else {
266 index = endindex + 8;
267 inside--;
268 }
269 } else {
270 inside--;
271 index = endindex + 1;
272 }
273
274 if (inside == 0) {
275 filedata = parsed.mid(start_index, endindex - start_index);
276 break;
277 }
278 }
279
280 index = filedata.indexOf(QStringLiteral("<FILENAME "));
281
282 if (index > 0) {
283 int endindex = filedata.lastIndexOf(QStringLiteral("</FILENAME>"));
284 while (filedata.at(endindex) != QLatin1Char('>')) {
285 endindex++;
286 }
287 endindex++;
288 filedata = filedata.left(index) + filedata.mid(endindex);
289 }
290
291 return filedata;
292 }
293
fromUnicode(const QString & data)294 QByteArray fromUnicode(const QString &data)
295 {
296 #ifdef Q_OS_WIN
297 return data.toUtf8();
298 #else
299 QTextCodec *locale = QTextCodec::codecForLocale();
300 QByteArray result;
301 char buffer[30000];
302 uint buffer_len = 0;
303 uint len = 0;
304 int offset = 0;
305 const int part_len = 5000;
306
307 QString part;
308
309 while (offset < data.length()) {
310 part = data.mid(offset, part_len);
311 QByteArray test = locale->fromUnicode(part);
312 if (locale->toUnicode(test) == part) {
313 result += test;
314 offset += part_len;
315 continue;
316 }
317 len = part.length();
318 buffer_len = 0;
319 for (uint i = 0; i < len; i++) {
320 QByteArray test = locale->fromUnicode(part.mid(i, 1));
321 if (locale->toUnicode(test) == part.mid(i, 1)) {
322 if (buffer_len + test.length() + 1 > sizeof(buffer)) {
323 break;
324 }
325 strcpy(buffer + buffer_len, test.data());
326 buffer_len += test.length();
327 } else {
328 QString res = QStringLiteral("&#%1;").arg(part.at(i).unicode());
329 test = locale->fromUnicode(res);
330 if (buffer_len + test.length() + 1 > sizeof(buffer)) {
331 break;
332 }
333 strcpy(buffer + buffer_len, test.data());
334 buffer_len += test.length();
335 }
336 }
337 result += QByteArray(buffer, buffer_len + 1);
338 offset += part_len;
339 }
340 return result;
341 #endif
342 }
343
replaceCharsetHeader(QString & output)344 void replaceCharsetHeader(QString &output)
345 {
346 QString name;
347 #ifdef Q_OS_WIN
348 name = "utf-8";
349 // may be required for all xml output
350 if (output.contains("<table-of-contents>"))
351 output.replace(QLatin1String("<?xml version=\"1.0\"?>"), QLatin1String("<?xml version=\"1.0\" encoding=\"%1\"?>").arg(name));
352 #else
353 name = QLatin1String(QTextCodec::codecForLocale()->name());
354 name.replace(QLatin1String("ISO "), QLatin1String("iso-"));
355 output.replace(QLatin1String("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">"),
356 QLatin1String("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=%1\">").arg(name));
357 #endif
358 }
359
extractFileToBuffer(const QString & content,const QString & filename)360 QByteArray KDocTools::extractFileToBuffer(const QString &content, const QString &filename)
361 {
362 const int index = content.indexOf(QLatin1String("<FILENAME filename=\"%1\"").arg(filename));
363 if (index == -1) {
364 if (filename == QLatin1String("index.html")) {
365 return fromUnicode(content);
366 } else {
367 return QByteArray(); // null value, not just empty
368 }
369 }
370 QString data_file = splitOut(content, index);
371 replaceCharsetHeader(data_file);
372 return fromUnicode(data_file);
373 }
374
375 class DtdStandardDirs
376 {
377 public:
378 QString srcdir;
379 };
380
Q_GLOBAL_STATIC(DtdStandardDirs,s_dtdDirs)381 Q_GLOBAL_STATIC(DtdStandardDirs, s_dtdDirs)
382
383 void KDocTools::setupStandardDirs(const QString &srcdir)
384 {
385 QByteArray catalogs;
386
387 if (srcdir.isEmpty()) {
388 catalogs += getKDocToolsCatalogs().join(" ").toLocal8Bit();
389 } else {
390 catalogs += QUrl::fromLocalFile(srcdir + QStringLiteral("/customization/catalog.xml")).toEncoded();
391 s_dtdDirs()->srcdir = srcdir;
392 }
393 // qCDebug(KDocToolsLog) << "XML_CATALOG_FILES: " << catalogs;
394 qputenv("XML_CATALOG_FILES", catalogs);
395 xmlInitializeCatalog();
396 #if defined(_MSC_VER)
397 /* Workaround: apparently setting XML_CATALOG_FILES set here
398 has no effect on the libxml2 functions.
399 This code path could be used in all cases instead of setting the
400 variable, but this requires more investigation on the reason of
401 the issue. */
402 xmlLoadCatalogs(catalogs.constData());
403 #endif
404 }
405
locateFileInDtdResource(const QString & file,const QStandardPaths::LocateOptions option)406 QString KDocTools::locateFileInDtdResource(const QString &file, const QStandardPaths::LocateOptions option)
407 {
408 const QStringList lst = locateFilesInDtdResource(file, option);
409 return lst.isEmpty() ? QString() : lst.first();
410 }
411
locateFilesInDtdResource(const QString & file,const QStandardPaths::LocateOptions option)412 QStringList locateFilesInDtdResource(const QString &file, const QStandardPaths::LocateOptions option)
413 {
414 QFileInfo info(file);
415 if (info.exists() && info.isAbsolute()) {
416 return QStringList() << file;
417 }
418
419 const QString srcdir = s_dtdDirs()->srcdir;
420 if (!srcdir.isEmpty()) {
421 const QString test = srcdir + QLatin1Char('/') + file;
422 if (QFile::exists(test)) {
423 return QStringList() << test;
424 }
425 qCDebug(KDocToolsLog) << "Could not locate file" << file << "in" << srcdir;
426 return QStringList();
427 }
428 // Using locateAll() is necessary to be able to find all catalogs when
429 // running in environments where every repository is installed in its own
430 // prefix.
431 // This is the case on build.kde.org where kdelibs4support installs catalogs
432 // in a different prefix than kdoctools.
433 const QString fileName = QStringLiteral("kf5/kdoctools/") + file;
434 QStringList result = QStandardPaths::locateAll(QStandardPaths::GenericDataLocation, fileName, option);
435
436 // fallback to stuff installed with KDocTools
437 const QFileInfo fileInInstallDataDir(QStringLiteral(KDOCTOOLS_INSTALL_DATADIR_KF5) + QStringLiteral("/kdoctools/") + file);
438 if (fileInInstallDataDir.exists()) {
439 if ((option == QStandardPaths::LocateFile) && fileInInstallDataDir.isFile()) {
440 result.append(fileInInstallDataDir.absoluteFilePath());
441 }
442 if ((option == QStandardPaths::LocateDirectory) && fileInInstallDataDir.isDir()) {
443 result.append(fileInInstallDataDir.absoluteFilePath());
444 }
445 }
446
447 if (result.isEmpty()) {
448 qCDebug(KDocToolsLog) << "Could not locate file" << fileName << "in" << QStandardPaths::standardLocations(QStandardPaths::GenericDataLocation);
449 }
450 return result;
451 }
452
getKDocToolsCatalogs()453 QStringList getKDocToolsCatalogs()
454 {
455 // Find all catalogs as catalog*.xml, and add them to the list, starting
456 // from catalog.xml (the main one).
457 const QStringList dirNames = locateFilesInDtdResource(QStringLiteral("customization"), QStandardPaths::LocateDirectory);
458 if (dirNames.isEmpty()) {
459 return QStringList();
460 }
461 QStringList catalogFiles;
462 for (const QString &customizationDirName : dirNames) {
463 QDir customizationDir = QDir(customizationDirName);
464 const QStringList catalogFileFilters(QStringLiteral("catalog*.xml"));
465 const QFileInfoList catalogInfoFiles = customizationDir.entryInfoList(catalogFileFilters, QDir::Files, QDir::Name);
466 for (const QFileInfo &fileInfo : catalogInfoFiles) {
467 const QString fullFileName = QUrl::fromLocalFile(fileInfo.absoluteFilePath()).toEncoded();
468 if (fileInfo.fileName() == QStringLiteral("catalog.xml")) {
469 catalogFiles.prepend(fullFileName);
470 } else {
471 catalogFiles.append(fullFileName);
472 }
473 }
474 }
475
476 QStringList catalogs;
477 for (const QString &aCatalog : std::as_const(catalogFiles)) {
478 catalogs << aCatalog;
479 }
480 // qCDebug(KDocToolsLog) << "Found catalogs: " << catalogs;
481 return catalogs;
482 }
483
documentationDirs()484 QStringList KDocTools::documentationDirs()
485 {
486 /* List of paths containing documentation */
487 return QStandardPaths::locateAll(QStandardPaths::GenericDataLocation, QStringLiteral("doc/HTML"), QStandardPaths::LocateDirectory);
488 }
489