1 /****************************************************************************
2 **
3 ** Copyright (C) 2016 The Qt Company Ltd.
4 ** Contact: https://www.qt.io/licensing/
5 **
6 ** This file is part of the Qt Assistant of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:LGPL$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and The Qt Company. For licensing terms
14 ** and conditions see https://www.qt.io/terms-conditions. For further
15 ** information use the contact form at https://www.qt.io/contact-us.
16 **
17 ** GNU Lesser General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU Lesser
19 ** General Public License version 3 as published by the Free Software
20 ** Foundation and appearing in the file LICENSE.LGPL3 included in the
21 ** packaging of this file. Please review the following information to
22 ** ensure the GNU Lesser General Public License version 3 requirements
23 ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24 **
25 ** GNU General Public License Usage
26 ** Alternatively, this file may be used under the terms of the GNU
27 ** General Public License version 2.0 or (at your option) the GNU General
28 ** Public license version 3 or any later version approved by the KDE Free
29 ** Qt Foundation. The licenses are as published by the Free Software
30 ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31 ** included in the packaging of this file. Please review the following
32 ** information to ensure the GNU General Public License requirements will
33 ** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34 ** https://www.gnu.org/licenses/gpl-3.0.html.
35 **
36 ** $QT_END_LICENSE$
37 **
38 ****************************************************************************/
39
40 #include "qhelpsearchindexwriter_default_p.h"
41 #include "qhelp_global.h"
42 #include "qhelpenginecore.h"
43 #include "qhelpdbreader_p.h"
44
45 #include <QtCore/QDataStream>
46 #include <QtCore/QDateTime>
47 #include <QtCore/QDir>
48 #include <QtCore/QTextCodec>
49 #include <QtCore/QTextStream>
50 #include <QtCore/QSet>
51 #include <QtCore/QUrl>
52 #include <QtCore/QVariant>
53 #include <QtSql/QSqlDatabase>
54 #include <QtSql/QSqlDriver>
55 #include <QtSql/QSqlError>
56 #include <QtSql/QSqlQuery>
57
58 #include <QTextDocument>
59
60 QT_BEGIN_NAMESPACE
61
62 namespace fulltextsearch {
63 namespace qt {
64
65 const char FTS_DB_NAME[] = "fts";
66
Writer(const QString & path)67 Writer::Writer(const QString &path)
68 : m_dbDir(path)
69 {
70 clearLegacyIndex();
71 QDir().mkpath(m_dbDir);
72 m_uniqueId = QHelpGlobal::uniquifyConnectionName(QLatin1String("QHelpWriter"), this);
73 m_db = new QSqlDatabase();
74 *m_db = QSqlDatabase::addDatabase(QLatin1String("QSQLITE"), m_uniqueId);
75 const QString dbPath = m_dbDir + QLatin1Char('/') + QLatin1String(FTS_DB_NAME);
76 m_db->setDatabaseName(dbPath);
77 if (!m_db->open()) {
78 const QString &error = QHelpSearchIndexWriter::tr("Cannot open database \"%1\" using connection \"%2\": %3")
79 .arg(dbPath, m_uniqueId, m_db->lastError().text());
80 qWarning("%s", qUtf8Printable(error));
81 delete m_db;
82 m_db = nullptr;
83 QSqlDatabase::removeDatabase(m_uniqueId);
84 m_uniqueId = QString();
85 } else {
86 startTransaction();
87 }
88 }
89
tryInit(bool reindex)90 bool Writer::tryInit(bool reindex)
91 {
92 if (!m_db)
93 return true;
94
95 QSqlQuery query(*m_db);
96 // HACK: we try to perform any modifying command just to check if
97 // we don't get SQLITE_BUSY code (SQLITE_BUSY is defined to 5 in sqlite driver)
98 if (!query.exec(QLatin1String("CREATE TABLE foo ();"))) {
99 if (query.lastError().nativeErrorCode() == QLatin1String("5")) // db is locked
100 return false;
101 }
102 // HACK: clear what we have created
103 query.exec(QLatin1String("DROP TABLE foo;"));
104
105 init(reindex);
106 return true;
107 }
108
hasDB()109 bool Writer::hasDB()
110 {
111 if (!m_db)
112 return false;
113
114 QSqlQuery query(*m_db);
115
116 query.prepare(QLatin1String("SELECT id FROM info LIMIT 1"));
117 query.exec();
118
119 return query.next();
120 }
121
clearLegacyIndex()122 void Writer::clearLegacyIndex()
123 {
124 // Clear old legacy clucene index.
125 // More important in case of Creator, since
126 // the index folder is common for all Creator versions
127 QDir dir(m_dbDir);
128 if (!dir.exists())
129 return;
130
131 const QStringList &list = dir.entryList(QDir::Files | QDir::Hidden);
132 if (!list.contains(QLatin1String(FTS_DB_NAME))) {
133 for (const QString &item : list)
134 dir.remove(item);
135 }
136 }
137
init(bool reindex)138 void Writer::init(bool reindex)
139 {
140 if (!m_db)
141 return;
142
143 QSqlQuery query(*m_db);
144
145 if (reindex && hasDB()) {
146 m_needOptimize = true;
147
148 query.exec(QLatin1String("DROP TABLE titles;"));
149 query.exec(QLatin1String("DROP TABLE contents;"));
150 query.exec(QLatin1String("DROP TABLE info;"));
151 }
152
153 query.exec(QLatin1String("CREATE TABLE info (id INTEGER PRIMARY KEY, namespace, attributes, url, title, data);"));
154
155 query.exec(QLatin1String("CREATE VIRTUAL TABLE titles USING fts5("
156 "namespace UNINDEXED, attributes UNINDEXED, "
157 "url UNINDEXED, title, "
158 "tokenize = 'porter unicode61', content = 'info', content_rowid='id');"));
159 query.exec(QLatin1String("CREATE TRIGGER titles_insert AFTER INSERT ON info BEGIN "
160 "INSERT INTO titles(rowid, namespace, attributes, url, title) "
161 "VALUES(new.id, new.namespace, new.attributes, new.url, new.title); "
162 "END;"));
163 query.exec(QLatin1String("CREATE TRIGGER titles_delete AFTER DELETE ON info BEGIN "
164 "INSERT INTO titles(titles, rowid, namespace, attributes, url, title) "
165 "VALUES('delete', old.id, old.namespace, old.attributes, old.url, old.title); "
166 "END;"));
167 query.exec(QLatin1String("CREATE TRIGGER titles_update AFTER UPDATE ON info BEGIN "
168 "INSERT INTO titles(titles, rowid, namespace, attributes, url, title) "
169 "VALUES('delete', old.id, old.namespace, old.attributes, old.url, old.title); "
170 "INSERT INTO titles(rowid, namespace, attributes, url, title) "
171 "VALUES(new.id, new.namespace, new.attributes, new.url, new.title); "
172 "END;"));
173
174 query.exec(QLatin1String("CREATE VIRTUAL TABLE contents USING fts5("
175 "namespace UNINDEXED, attributes UNINDEXED, "
176 "url UNINDEXED, title, data, "
177 "tokenize = 'porter unicode61', content = 'info', content_rowid='id');"));
178 query.exec(QLatin1String("CREATE TRIGGER contents_insert AFTER INSERT ON info BEGIN "
179 "INSERT INTO contents(rowid, namespace, attributes, url, title, data) "
180 "VALUES(new.id, new.namespace, new.attributes, new.url, new.title, new.data); "
181 "END;"));
182 query.exec(QLatin1String("CREATE TRIGGER contents_delete AFTER DELETE ON info BEGIN "
183 "INSERT INTO contents(contents, rowid, namespace, attributes, url, title, data) "
184 "VALUES('delete', old.id, old.namespace, old.attributes, old.url, old.title, old.data); "
185 "END;"));
186 query.exec(QLatin1String("CREATE TRIGGER contents_update AFTER UPDATE ON info BEGIN "
187 "INSERT INTO contents(contents, rowid, namespace, attributes, url, title, data) "
188 "VALUES('delete', old.id, old.namespace, old.attributes, old.url, old.title, old.data); "
189 "INSERT INTO contents(rowid, namespace, attributes, url, title, data) "
190 "VALUES(new.id, new.namespace, new.attributes, new.url, new.title, new.data); "
191 "END;"));
192 }
193
~Writer()194 Writer::~Writer()
195 {
196 if (m_db) {
197 m_db->close();
198 delete m_db;
199 }
200
201 if (!m_uniqueId.isEmpty())
202 QSqlDatabase::removeDatabase(m_uniqueId);
203 }
204
flush()205 void Writer::flush()
206 {
207 if (!m_db)
208 return;
209
210 QSqlQuery query(*m_db);
211
212 query.prepare(QLatin1String("INSERT INTO info (namespace, attributes, url, title, data) VALUES (?, ?, ?, ?, ?)"));
213 query.addBindValue(m_namespaces);
214 query.addBindValue(m_attributes);
215 query.addBindValue(m_urls);
216 query.addBindValue(m_titles);
217 query.addBindValue(m_contents);
218 query.execBatch();
219
220 m_namespaces = QVariantList();
221 m_attributes = QVariantList();
222 m_urls = QVariantList();
223 m_titles = QVariantList();
224 m_contents = QVariantList();
225 }
226
removeNamespace(const QString & namespaceName)227 void Writer::removeNamespace(const QString &namespaceName)
228 {
229 if (!m_db)
230 return;
231
232 if (!hasNamespace(namespaceName))
233 return; // no data to delete
234
235 m_needOptimize = true;
236
237 QSqlQuery query(*m_db);
238
239 query.prepare(QLatin1String("DELETE FROM info WHERE namespace = ?"));
240 query.addBindValue(namespaceName);
241 query.exec();
242 }
243
hasNamespace(const QString & namespaceName)244 bool Writer::hasNamespace(const QString &namespaceName)
245 {
246 if (!m_db)
247 return false;
248
249 QSqlQuery query(*m_db);
250
251 query.prepare(QLatin1String("SELECT id FROM info WHERE namespace = ? LIMIT 1"));
252 query.addBindValue(namespaceName);
253 query.exec();
254
255 return query.next();
256 }
257
insertDoc(const QString & namespaceName,const QString & attributes,const QString & url,const QString & title,const QString & contents)258 void Writer::insertDoc(const QString &namespaceName,
259 const QString &attributes,
260 const QString &url,
261 const QString &title,
262 const QString &contents)
263 {
264 m_namespaces.append(namespaceName);
265 m_attributes.append(attributes);
266 m_urls.append(url);
267 m_titles.append(title);
268 m_contents.append(contents);
269 }
270
startTransaction()271 void Writer::startTransaction()
272 {
273 if (!m_db)
274 return;
275
276 m_needOptimize = false;
277 if (m_db && m_db->driver()->hasFeature(QSqlDriver::Transactions))
278 m_db->transaction();
279 }
280
endTransaction()281 void Writer::endTransaction()
282 {
283 if (!m_db)
284 return;
285
286 QSqlQuery query(*m_db);
287
288 if (m_needOptimize) {
289 query.exec(QLatin1String("INSERT INTO titles(titles) VALUES('rebuild')"));
290 query.exec(QLatin1String("INSERT INTO contents(contents) VALUES('rebuild')"));
291 }
292
293 if (m_db && m_db->driver()->hasFeature(QSqlDriver::Transactions))
294 m_db->commit();
295
296 if (m_needOptimize)
297 query.exec(QLatin1String("VACUUM"));
298 }
299
QHelpSearchIndexWriter()300 QHelpSearchIndexWriter::QHelpSearchIndexWriter()
301 : QThread()
302 , m_cancel(false)
303 {
304 }
305
~QHelpSearchIndexWriter()306 QHelpSearchIndexWriter::~QHelpSearchIndexWriter()
307 {
308 m_mutex.lock();
309 this->m_cancel = true;
310 m_mutex.unlock();
311
312 wait();
313 }
314
cancelIndexing()315 void QHelpSearchIndexWriter::cancelIndexing()
316 {
317 QMutexLocker lock(&m_mutex);
318 m_cancel = true;
319 }
320
updateIndex(const QString & collectionFile,const QString & indexFilesFolder,bool reindex)321 void QHelpSearchIndexWriter::updateIndex(const QString &collectionFile,
322 const QString &indexFilesFolder,
323 bool reindex)
324 {
325 wait();
326 QMutexLocker lock(&m_mutex);
327
328 m_cancel = false;
329 m_reindex = reindex;
330 m_collectionFile = collectionFile;
331 m_indexFilesFolder = indexFilesFolder;
332
333 lock.unlock();
334
335 start(QThread::LowestPriority);
336 }
337
338 static const char IndexedNamespacesKey[] = "FTS5IndexedNamespaces";
339
readIndexMap(const QHelpEngineCore & engine)340 static QMap<QString, QDateTime> readIndexMap(const QHelpEngineCore &engine)
341 {
342 QMap<QString, QDateTime> indexMap;
343 QDataStream dataStream(engine.customValue(
344 QLatin1String(IndexedNamespacesKey)).toByteArray());
345 dataStream >> indexMap;
346 return indexMap;
347 }
348
writeIndexMap(QHelpEngineCore * engine,const QMap<QString,QDateTime> & indexMap)349 static bool writeIndexMap(QHelpEngineCore *engine,
350 const QMap<QString, QDateTime> &indexMap)
351 {
352 QByteArray data;
353
354 QDataStream dataStream(&data, QIODevice::ReadWrite);
355 dataStream << indexMap;
356
357 return engine->setCustomValue(
358 QLatin1String(IndexedNamespacesKey), data);
359 }
360
clearIndexMap(QHelpEngineCore * engine)361 static bool clearIndexMap(QHelpEngineCore *engine)
362 {
363 return engine->removeCustomValue(QLatin1String(IndexedNamespacesKey));
364 }
365
run()366 void QHelpSearchIndexWriter::run()
367 {
368 QMutexLocker lock(&m_mutex);
369
370 if (m_cancel)
371 return;
372
373 const bool reindex(m_reindex);
374 const QString collectionFile(m_collectionFile);
375 const QString indexPath(m_indexFilesFolder);
376
377 lock.unlock();
378
379 QHelpEngineCore engine(collectionFile, nullptr);
380 if (!engine.setupData())
381 return;
382
383 if (reindex)
384 clearIndexMap(&engine);
385
386 emit indexingStarted();
387
388 Writer writer(indexPath);
389
390 while (!writer.tryInit(reindex))
391 sleep(1);
392
393 const QStringList ®isteredDocs = engine.registeredDocumentations();
394 QMap<QString, QDateTime> indexMap = readIndexMap(engine);
395
396 if (!reindex) {
397 for (const QString &namespaceName : registeredDocs) {
398 if (indexMap.contains(namespaceName)) {
399 const QString path = engine.documentationFileName(namespaceName);
400 if (indexMap.value(namespaceName) < QFileInfo(path).lastModified()) {
401 // Remove some outdated indexed stuff
402 indexMap.remove(namespaceName);
403 writer.removeNamespace(namespaceName);
404 } else if (!writer.hasNamespace(namespaceName)) {
405 // No data in fts db for namespace.
406 // The namespace could have been removed from fts db
407 // or the whole fts db have been removed
408 // without removing it from indexMap.
409 indexMap.remove(namespaceName);
410 }
411 } else {
412 // Needed in case namespaceName was removed from indexMap
413 // without removing it from fts db.
414 // May happen when e.g. qch file was removed manually
415 // without removing fts db.
416 writer.removeNamespace(namespaceName);
417 }
418 // TODO: we may also detect if there are any other data
419 // and remove it
420 }
421 } else {
422 indexMap.clear();
423 }
424
425 for (const QString &namespaceName : indexMap.keys()) {
426 if (!registeredDocs.contains(namespaceName)) {
427 indexMap.remove(namespaceName);
428 writer.removeNamespace(namespaceName);
429 }
430 }
431
432 for (const QString &namespaceName : registeredDocs) {
433 lock.relock();
434 if (m_cancel) {
435 // store what we have done so far
436 writeIndexMap(&engine, indexMap);
437 writer.endTransaction();
438 emit indexingFinished();
439 return;
440 }
441 lock.unlock();
442
443 // if indexed, continue
444 if (indexMap.contains(namespaceName))
445 continue;
446
447 const QString fileName = engine.documentationFileName(namespaceName);
448 QHelpDBReader reader(fileName, QHelpGlobal::uniquifyConnectionName(
449 fileName, this), nullptr);
450 if (!reader.init())
451 continue;
452
453 const QString virtualFolder = reader.virtualFolder();
454
455 const QList<QStringList> &attributeSets =
456 engine.filterAttributeSets(namespaceName);
457
458 for (const QStringList &attributes : attributeSets) {
459 const QString &attributesString = attributes.join(QLatin1Char('|'));
460
461 const QMap<QString, QByteArray> htmlFiles
462 = reader.filesData(attributes, QLatin1String("html"));
463 const QMap<QString, QByteArray> htmFiles
464 = reader.filesData(attributes, QLatin1String("htm"));
465 const QMap<QString, QByteArray> txtFiles
466 = reader.filesData(attributes, QLatin1String("txt"));
467
468 QMultiMap<QString, QByteArray> files = htmlFiles;
469 files.unite(htmFiles);
470 files.unite(txtFiles);
471
472 for (auto it = files.cbegin(), end = files.cend(); it != end ; ++it) {
473 lock.relock();
474 if (m_cancel) {
475 // store what we have done so far
476 writeIndexMap(&engine, indexMap);
477 writer.endTransaction();
478 emit indexingFinished();
479 return;
480 }
481 lock.unlock();
482
483 const QString &file = it.key();
484 const QByteArray &data = it.value();
485
486 if (data.isEmpty())
487 continue;
488
489 QUrl url;
490 url.setScheme(QLatin1String("qthelp"));
491 url.setAuthority(namespaceName);
492 url.setPath(QLatin1Char('/') + virtualFolder + QLatin1Char('/') + file);
493
494 if (url.hasFragment())
495 url.setFragment(QString());
496
497 const QString &fullFileName = url.toString();
498 if (!fullFileName.endsWith(QLatin1String(".html"))
499 && !fullFileName.endsWith(QLatin1String(".htm"))
500 && !fullFileName.endsWith(QLatin1String(".txt"))) {
501 continue;
502 }
503
504 QTextStream s(data);
505 const QString &en = QHelpGlobal::codecFromData(data);
506 s.setCodec(QTextCodec::codecForName(en.toLatin1().constData()));
507
508 const QString &text = s.readAll();
509 if (text.isEmpty())
510 continue;
511
512 QString title;
513 QString contents;
514 if (fullFileName.endsWith(QLatin1String(".txt"))) {
515 title = fullFileName.mid(fullFileName.lastIndexOf(QLatin1Char('/')) + 1);
516 contents = text.toHtmlEscaped();
517 } else {
518 QTextDocument doc;
519 doc.setHtml(text);
520
521 title = doc.metaInformation(QTextDocument::DocumentTitle).toHtmlEscaped();
522 contents = doc.toPlainText().toHtmlEscaped();
523 }
524
525 writer.insertDoc(namespaceName, attributesString, fullFileName, title, contents);
526 }
527 }
528 writer.flush();
529 const QString &path = engine.documentationFileName(namespaceName);
530 indexMap.insert(namespaceName, QFileInfo(path).lastModified());
531 }
532
533 writeIndexMap(&engine, indexMap);
534
535 writer.endTransaction();
536 emit indexingFinished();
537 }
538
539 } // namespace std
540 } // namespace fulltextsearch
541
542 QT_END_NAMESPACE
543