1 /****************************************************************************
2 **
3 ** Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies).
4 ** Contact: Qt Software Information (qt-info@nokia.com)
5 **
6 ** This file is part of the Qt Assistant of the Qt Toolkit.
7 **
8 ** Commercial Usage
9 ** Licensees holding valid Qt Commercial licenses may use this file in
10 ** accordance with the Qt Commercial License Agreement provided with the
11 ** Software or, alternatively, in accordance with the terms contained in
12 ** a written agreement between you and Nokia.
13 **
14 **
15 ** GNU General Public License Usage
16 ** Alternatively, this file may be used under the terms of the GNU
17 ** General Public License versions 2.0 or 3.0 as published by the Free
18 ** Software Foundation and appearing in the file LICENSE.GPL included in
19 ** the packaging of this file. Please review the following information
20 ** to ensure GNU General Public Licensing requirements will be met:
21 ** http://www.fsf.org/licensing/licenses/info/GPLv2.html and
22 ** http://www.gnu.org/copyleft/gpl.html. In addition, as a special
23 ** exception, Nokia gives you certain additional rights. These rights
24 ** are described in the Nokia Qt GPL Exception version 1.3, included in
25 ** the file GPL_EXCEPTION.txt in this package.
26 **
27 ** Qt for Windows(R) Licensees
28 ** As a special exception, Nokia, as the sole copyright holder for Qt
29 ** Designer, grants users of the Qt/Eclipse Integration plug-in the
30 ** right for the Qt/Eclipse Integration to link to functionality
31 ** provided by Qt Designer and its related libraries.
32 **
33 ** If you are unsure which license is appropriate for your use, please
34 ** contact the sales department at qt-sales@nokia.com.
35 **
36 ****************************************************************************/
37
38 #include "HelpIndex.h"
39
40 #include <QFile>
41 #include <QDir>
42 #include <QStringList>
43 #include <QApplication>
44 #include <QByteArray>
45 #include <QTextStream>
46 #include <QUrl>
47 #include <QTextCodec>
48 #include <cctype>
49 #include <QTextDocument>
50 #include <QTimer>
51
52 #include <algorithm>
53 #include <utility>
54
55 QT_BEGIN_NAMESPACE
56
57 struct Term
58 {
59 Term() = default;
TermTerm60 Term(QString t, int f, QVector<Document> l)
61 : term(std::move(t))
62 , frequency(f)
63 , documents(std::move(l))
64 {
65 }
66 QString term;
67 int frequency = -1;
68 QVector<Document> documents;
operator <Term69 bool operator<(const Term & i2) const { return frequency < i2.frequency; }
70 };
71
operator >>(QDataStream & s,Document & l)72 QDataStream & operator>>(QDataStream & s, Document & l)
73 {
74 s >> l.docNumber;
75 s >> l.frequency;
76 return s;
77 }
78
operator <<(QDataStream & s,const Document & l)79 QDataStream & operator<<(QDataStream & s, const Document & l)
80 {
81 s << (qint16)l.docNumber;
82 s << (qint16)l.frequency;
83 return s;
84 }
85
HelpIndex(QString dp,const QString &)86 HelpIndex::HelpIndex(QString dp, const QString & /* hp */)
87 : QObject(nullptr)
88 , docPath(std::move(dp))
89 {
90 alreadyHaveDocList = false;
91
92 connect(qApp, SIGNAL(lastWindowClosed()), this, SLOT(setLastWinClosed()));
93
94 m_pTimer = new QTimer(this);
95 m_pTimer->setSingleShot(true);
96 m_pTimer->setInterval(0);
97 connect(m_pTimer, SIGNAL(timeout()), this, SLOT(filterNext()));
98 }
99
HelpIndex(QStringList dl,const QString &)100 HelpIndex::HelpIndex(QStringList dl, const QString & /* hp */)
101 : QObject(nullptr)
102 , docList{ std::move(dl) }
103 {
104 alreadyHaveDocList = true;
105
106 connect(qApp, SIGNAL(lastWindowClosed()), this, SLOT(setLastWinClosed()));
107 }
108
setLastWinClosed()109 void HelpIndex::setLastWinClosed()
110 {
111 lastWindowClosed = true;
112 }
113
setDictionaryFile(const QString & f)114 void HelpIndex::setDictionaryFile(const QString & f)
115 {
116 dictFile = f;
117 }
118
setDocListFile(const QString & f)119 void HelpIndex::setDocListFile(const QString & f)
120 {
121 docListFile = f;
122 }
123
setDocList(const QStringList & lst)124 void HelpIndex::setDocList(const QStringList & lst)
125 {
126 docList = lst;
127 }
128
makeIndex()129 void HelpIndex::makeIndex()
130 {
131 if(!alreadyHaveDocList)
132 setupDocumentList();
133
134 lastWindowClosed = false;
135 emit indexingStart(docList.count());
136 dict.clear();
137 m_iCurItem = 0;
138 m_pTimer->start(); //singleshot
139 }
140
filterNext()141 void HelpIndex::filterNext()
142 {
143 if(m_iCurItem < docList.count() && !lastWindowClosed)
144 {
145 QUrl url(docList.at(m_iCurItem));
146 parseDocument(url.toLocalFile(), m_iCurItem);
147 emit indexingProgress(m_iCurItem);
148 m_iCurItem++;
149 m_pTimer->start(); //singleshot
150 }
151 else
152 {
153 emit indexingEnd();
154 }
155 }
156
setupDocumentList()157 void HelpIndex::setupDocumentList()
158 {
159 docList.clear();
160 titleList.clear();
161 QDir d(docPath);
162 QStringList filters{QLatin1String("*.html")};
163 QStringList lst = d.entryList(filters);
164 for(auto&& item : lst)
165 {
166 QString filename = QLatin1String("file:///") + docPath + QLatin1String("/") + item;
167 docList.append(filename);
168 titleList.append(getDocumentTitle(filename));
169 }
170 }
171
insertInDict(const QString & str,int docNum)172 void HelpIndex::insertInDict(const QString & str, int docNum)
173 {
174 if(str == QLatin1String("amp") || str == QLatin1String("nbsp"))
175 return;
176 Entry * e = nullptr;
177 if(dict.count())
178 e = dict[str];
179
180 if(e)
181 {
182 if(e->documents.last().docNumber != docNum)
183 e->documents.append(Document(docNum, 1));
184 else
185 e->documents.last().frequency++;
186 }
187 else
188 {
189 dict.insert(str, new Entry(docNum));
190 }
191 }
192
getCharsetForDocument(QFile * file)193 QString HelpIndex::getCharsetForDocument(QFile * file)
194 {
195 QTextStream s(file);
196 QString contents = s.readAll();
197
198 QString encoding;
199 int start = contents.indexOf(QLatin1String("<meta"), 0, Qt::CaseInsensitive);
200 if(start > 0)
201 {
202 int end = contents.indexOf(QLatin1String(">"), start);
203 QString meta = contents.mid(start + 5, end - start);
204 meta = meta.toLower();
205 QRegExp r(QLatin1String("charset=([^\"\\s]+)"));
206 if(r.indexIn(meta) != -1)
207 encoding = r.cap(1);
208 }
209
210 file->seek(0);
211 if(encoding.isEmpty())
212 return QLatin1String("utf-8");
213 return encoding;
214 }
215
parseDocument(const QString & filename,int docNum)216 void HelpIndex::parseDocument(const QString & filename, int docNum)
217 {
218 QFile file(filename);
219 if(!file.open(QFile::ReadOnly))
220 {
221 qWarning("Can't open file %s", qPrintable(filename));
222 return;
223 }
224
225 QTextStream s(&file);
226 QString en = getCharsetForDocument(&file);
227 s.setCodec(QTextCodec::codecForName(en.toLatin1().constData()));
228
229 QString text = s.readAll();
230 if(text.isNull())
231 return;
232
233 bool valid = true;
234 const QChar * buf = text.unicode();
235 QChar str[64];
236 QChar c = buf[0];
237 int j = 0;
238 int i = 0;
239 while(j < text.length())
240 {
241 if(c == QLatin1Char('<') || c == QLatin1Char('&'))
242 {
243 valid = false;
244 if(i > 1)
245 insertInDict(QString(str, i), docNum);
246 i = 0;
247 c = buf[++j];
248 continue;
249 }
250 if((c == QLatin1Char('>') || c == QLatin1Char(';')) && !valid)
251 {
252 valid = true;
253 c = buf[++j];
254 continue;
255 }
256 if(!valid)
257 {
258 c = buf[++j];
259 continue;
260 }
261 if((c.isLetterOrNumber() || c == QLatin1Char('_')) && i < 63)
262 {
263 str[i] = c.toLower();
264 ++i;
265 }
266 else
267 {
268 if(i > 1)
269 insertInDict(QString(str, i), docNum);
270 i = 0;
271 }
272 c = buf[++j];
273 }
274 if(i > 1)
275 insertInDict(QString(str, i), docNum);
276 file.close();
277 }
278
writeDict()279 void HelpIndex::writeDict()
280 {
281 QFile f(dictFile);
282 qDebug("Write dict to %s", f.fileName().toUtf8().data());
283 if(!f.open(QFile::WriteOnly))
284 return;
285 QDataStream s(&f);
286 for(auto it = dict.cbegin(); it != dict.cend(); ++it)
287 {
288 s << it.key();
289 s << it.value()->documents.count();
290 s << it.value()->documents;
291 }
292 f.close();
293 writeDocumentList();
294 }
295
writeDocumentList()296 void HelpIndex::writeDocumentList()
297 {
298 QFile f(docListFile);
299 if(!f.open(QFile::WriteOnly))
300 return;
301 QDataStream s(&f);
302 s << docList;
303
304 QFile f1(docListFile + ".titles");
305 if(!f1.open(QFile::WriteOnly))
306 return;
307 QDataStream s1(&f1);
308 s1 << titleList;
309 }
310
readDict()311 void HelpIndex::readDict()
312 {
313 QFile f(dictFile);
314 if(!f.open(QFile::ReadOnly))
315 return;
316
317 dict.clear();
318 QDataStream s(&f);
319 QString key;
320 int numOfDocs;
321 QVector<Document> docs;
322 while(!s.atEnd())
323 {
324 s >> key;
325 s >> numOfDocs;
326 docs.resize(numOfDocs);
327 s >> docs;
328 dict.insert(key, new Entry(docs));
329 }
330 f.close();
331 readDocumentList();
332 }
333
readDocumentList()334 void HelpIndex::readDocumentList()
335 {
336 QFile f(docListFile);
337 if(!f.open(QFile::ReadOnly))
338 return;
339 QDataStream s(&f);
340 s >> docList;
341 QFile f1(docListFile + ".titles");
342 if(!f1.open(QFile::ReadOnly))
343 return;
344 QDataStream s1(&f1);
345 s1 >> titleList;
346 }
347
query(const QStringList & terms,const QStringList & termSeq,const QStringList & seqWords)348 QStringList HelpIndex::query(const QStringList & terms, const QStringList & termSeq, const QStringList & seqWords)
349 {
350 QList<Term> termList;
351 for(const auto & term : terms)
352 {
353 if(term.contains(QLatin1Char('*')))
354 {
355 QVector<Document> wcts = setupDummyTerm(getWildcardTerms(term));
356 termList.append(Term(QLatin1String("dummy"), wcts.count(), wcts));
357 }
358 else if(dict[term])
359 {
360 auto e = dict[term];
361 termList.append(Term(term, e->documents.count(), e->documents));
362 }
363 else
364 {
365 return QStringList();
366 }
367 }
368 if(!termList.count())
369 return QStringList();
370 std::sort(termList.begin(), termList.end());
371
372 QVector<Document> minDocs = termList.takeFirst().documents;
373 for(const auto & term : termList)
374 {
375 QVector<Document> docs = term.documents;
376 for(auto minDoc_it = minDocs.begin(); minDoc_it != minDocs.end();)
377 {
378 bool found = false;
379 for(auto&& doc : docs)
380 {
381 if(minDoc_it->docNumber == doc.docNumber)
382 {
383 minDoc_it->frequency += doc.frequency;
384 found = true;
385 break;
386 }
387 }
388 if(!found)
389 minDoc_it = minDocs.erase(minDoc_it);
390 else
391 ++minDoc_it;
392 }
393 }
394
395 QStringList results;
396 std::sort(minDocs.begin(), minDocs.end());
397 if(termSeq.isEmpty())
398 {
399 for(auto & minDoc : minDocs)
400 results << docList.at((int)minDoc.docNumber);
401 return results;
402 }
403
404 for(const auto & minDoc : minDocs)
405 {
406 auto fileName = docList[(int)minDoc.docNumber];
407 if(searchForPattern(termSeq, seqWords, fileName))
408 results << fileName;
409 }
410 return results;
411 }
412
getDocumentTitle(const QString & fullFileName)413 QString HelpIndex::getDocumentTitle(const QString & fullFileName)
414 {
415 QUrl url(fullFileName);
416 QString fileName = url.toLocalFile();
417
418 if(documentTitleCache.contains(fileName))
419 return documentTitleCache.value(fileName);
420
421 QFile file(fileName);
422 if(!file.open(QFile::ReadOnly))
423 {
424 qWarning("Can't open file %s", qPrintable(fileName));
425 return fileName;
426 }
427 QTextStream s(&file);
428 QString text = s.readAll();
429
430 int start = text.indexOf(QLatin1String("<title>"), 0, Qt::CaseInsensitive) + 7;
431 int end = text.indexOf(QLatin1String("</title>"), start, Qt::CaseInsensitive);
432
433 QString title = tr("Untitled");
434 if(end > start)
435 {
436 title = text.mid(start, end - start);
437 if(Qt::mightBeRichText(title))
438 {
439 QTextDocument doc;
440 doc.setHtml(title);
441 title = doc.toPlainText();
442 }
443 }
444 documentTitleCache.insert(fileName, title);
445 return title;
446 }
447
getWildcardTerms(const QString & term)448 QStringList HelpIndex::getWildcardTerms(const QString & term)
449 {
450 QStringList lst;
451 QStringList terms = split(term);
452
453 for(auto it = dict.begin(); it != dict.end(); ++it)
454 {
455 int index = 0;
456 bool found = false;
457 QString text(it.key());
458 for(auto iter = terms.cbegin(); iter != terms.cend(); ++iter)
459 {
460 if(*iter == QLatin1String("*"))
461 {
462 found = true;
463 continue;
464 }
465 if(iter == terms.cbegin() && (*iter)[0] != text[0])
466 {
467 found = false;
468 break;
469 }
470 index = text.indexOf(*iter, index);
471 if(*iter == terms.last() && index != text.length() - 1)
472 {
473 index = text.lastIndexOf(*iter);
474 if(index != text.length() - iter->length())
475 {
476 found = false;
477 break;
478 }
479 }
480 if(index != -1)
481 {
482 found = true;
483 index += iter->length();
484 continue;
485 }
486 else
487 {
488 found = false;
489 break;
490 }
491 }
492 if(found)
493 lst << text;
494 }
495
496 return lst;
497 }
498
split(const QString & str)499 QStringList HelpIndex::split(const QString & str)
500 {
501 QStringList lst;
502 int j = 0;
503 int i = str.indexOf(QLatin1Char('*'), j);
504
505 if(str.startsWith(QLatin1String("*")))
506 lst << QLatin1String("*");
507
508 while(i != -1)
509 {
510 if(i > j && i <= (int)str.length())
511 {
512 lst << str.mid(j, i - j);
513 lst << QLatin1String("*");
514 }
515 j = i + 1;
516 i = str.indexOf(QLatin1Char('*'), j);
517 }
518
519 int l = str.length() - 1;
520 if(str.mid(j, l - j + 1).length() > 0)
521 lst << str.mid(j, l - j + 1);
522
523 return lst;
524 }
525
setupDummyTerm(const QStringList & terms)526 QVector<Document> HelpIndex::setupDummyTerm(const QStringList & terms)
527 {
528 QList<Term> termList;
529 for(const auto & term : terms)
530 {
531 if(dict[term])
532 {
533 auto e = dict[term];
534 termList.append(Term(term, e->documents.count(), e->documents));
535 }
536 }
537 if(!termList.count())
538 return QVector<Document>();
539 std::sort(termList.begin(), termList.end());
540
541 auto maxList = termList.takeLast().documents;
542 for(const auto & term : termList)
543 for(const auto & doc : term.documents)
544 if(maxList.indexOf(doc) == -1)
545 maxList.append(doc);
546
547 return maxList;
548 }
549
buildMiniDict(const QString & str)550 void HelpIndex::buildMiniDict(const QString & str)
551 {
552 if(miniDict[str])
553 miniDict[str]->positions.append(wordNum);
554 ++wordNum;
555 }
556
searchForPattern(const QStringList & patterns,const QStringList & words,const QString & fileName)557 bool HelpIndex::searchForPattern(const QStringList & patterns, const QStringList & words, const QString & fileName)
558 {
559 QUrl url(fileName);
560 QString fName = url.toLocalFile();
561 QFile file(fName);
562 if(!file.open(QFile::ReadOnly))
563 {
564 qWarning("Can't open file %s", qPrintable(fName));
565 return false;
566 }
567
568 wordNum = 3;
569 miniDict.clear();
570 for(auto&& word : words)
571 miniDict.insert(word, new PosEntry(0));
572
573 QTextStream s(&file);
574 QString text = s.readAll();
575 bool valid = true;
576 const QChar * buf = text.unicode();
577 QChar str[64];
578 QChar c = buf[0];
579 int j = 0;
580 int i = 0;
581 while(j < text.length())
582 {
583 if(c == QLatin1Char('<') || c == QLatin1Char('&'))
584 {
585 valid = false;
586 if(i > 1)
587 buildMiniDict(QString(str, i));
588 i = 0;
589 c = buf[++j];
590 continue;
591 }
592 if((c == QLatin1Char('>') || c == QLatin1Char(';')) && !valid)
593 {
594 valid = true;
595 c = buf[++j];
596 continue;
597 }
598 if(!valid)
599 {
600 c = buf[++j];
601 continue;
602 }
603 if((c.isLetterOrNumber() || c == QLatin1Char('_')) && i < 63)
604 {
605 str[i] = c.toLower();
606 ++i;
607 }
608 else
609 {
610 if(i > 1)
611 buildMiniDict(QString(str, i));
612 i = 0;
613 }
614 c = buf[++j];
615 }
616 if(i > 1)
617 buildMiniDict(QString(str, i));
618 file.close();
619
620 QStringList wordLst;
621 QList<uint> a;
622 for(auto&& pattern : patterns)
623 {
624 wordLst = pattern.split(QLatin1Char(' '));
625 a = miniDict[wordLst[0]]->positions;
626 for(int j = 1; j < (int)wordLst.count(); ++j)
627 {
628 auto b = miniDict[wordLst[j]]->positions;
629 auto aIt = a.begin();
630 while(aIt != a.end())
631 {
632 if(b.contains(*aIt + 1))
633 {
634 (*aIt)++;
635 ++aIt;
636 }
637 else
638 {
639 aIt = a.erase(aIt);
640 }
641 }
642 }
643 }
644 if(a.count())
645 return true;
646 return false;
647 }
648
649 QT_END_NAMESPACE
650