1 /* 2 Kchmviewer - a CHM and EPUB file viewer with broad language support 3 SPDX-FileCopyrightText: 2004-2014 George Yunaev gyunaev@ulduzsoft.com 4 5 SPDX-License-Identifier: GPL-3.0-or-later 6 */ 7 8 #ifndef EBOOK_SEARCH_INDEX_H 9 #define EBOOK_SEARCH_INDEX_H 10 11 #include <QDataStream> 12 #include <QHash> 13 #include <QStringList> 14 #include <QUrl> 15 #include <QVector> 16 17 #include "helper_entitydecoder.h" 18 19 class EBook; 20 21 // This code is based on some pretty old version of Qt Assistant 22 namespace QtAs 23 { 24 struct Document { DocumentDocument25 Document(int d, int f) 26 : docNumber(d) 27 , frequency(f) 28 { 29 } DocumentDocument30 Document() 31 : docNumber(-1) 32 , frequency(0) 33 { 34 } 35 bool operator==(const Document doc) const 36 { 37 return docNumber == doc.docNumber; 38 } 39 40 bool operator<(const Document doc) const 41 { 42 return frequency > doc.frequency; 43 } 44 45 bool operator<=(const Document doc) const 46 { 47 return frequency >= doc.frequency; 48 } 49 50 bool operator>(const Document doc) const 51 { 52 return frequency < doc.frequency; 53 } 54 55 qint16 docNumber; 56 qint16 frequency; 57 }; 58 59 QDataStream &operator>>(QDataStream &s, Document &l); 60 QDataStream &operator<<(QDataStream &s, const Document l); 61 62 class Index : public QObject 63 { 64 Q_OBJECT 65 public: 66 Index(); 67 68 void writeDict(QDataStream &stream); 69 bool readDict(QDataStream &stream); 70 bool makeIndex(const QList<QUrl> &docs, EBook *chmFile); 71 QList<QUrl> query(const QStringList &, const QStringList &, const QStringList &, EBook *chmFile); getCharsSplit()72 QString getCharsSplit() const 73 { 74 return m_charssplit; 75 } getCharsPartOfWord()76 QString getCharsPartOfWord() const 77 { 78 return m_charsword; 79 } 80 81 signals: 82 void indexingProgress(int, const QString &); 83 84 public slots: 85 void setLastWinClosed(); 86 87 private: 88 struct Entry { EntryEntry89 explicit Entry(int d) 90 { 91 documents.append(Document(d, 1)); 92 } EntryEntry93 explicit Entry(const QVector<Document> &l) 94 : documents(l) 95 { 96 } 97 QVector<Document> documents; 98 }; 99 100 struct PosEntry { PosEntryPosEntry101 explicit PosEntry(int p) 102 { 103 positions.append(p); 104 } 105 QList<uint> positions; 106 }; 107 108 bool parseDocumentToStringlist(EBook *chmFile, const QUrl &filename, QStringList &tokenlist); 109 void insertInDict(const QString &, int); 110 111 QStringList getWildcardTerms(const QString &); 112 QStringList split(const QString &); 113 QList<Document> setupDummyTerm(const QStringList &); 114 bool searchForPhrases(const QStringList &phrases, const QStringList &words, const QUrl &filename, EBook *chmFile); 115 116 QList<QUrl> docList; 117 QHash<QString, Entry *> dict; 118 QHash<QString, PosEntry *> miniDict; 119 bool lastWindowClosed; 120 HelperEntityDecoder entityDecoder; 121 122 // Those characters are splitters (i.e. split the word), but added themselves into dictionary too. 123 // This makes the dictionary MUCH larger, but ensure that for the piece of "window->print" both 124 // search for "print" and "->print" will find it. 125 QString m_charssplit; 126 127 // Those characters are parts of word - for example, '_' is here, and search for _debug will find only _debug. 128 QString m_charsword; 129 }; 130 131 }; 132 133 Q_DECLARE_TYPEINFO(QtAs::Document, Q_MOVABLE_TYPE); 134 135 #endif // EBOOK_SEARCH_INDEX_H 136