1 /*
2     Kchmviewer - a CHM and EPUB file viewer with broad language support
3     SPDX-FileCopyrightText: 2004-2014 George Yunaev gyunaev@ulduzsoft.com
4 
5     SPDX-License-Identifier: GPL-3.0-or-later
6 */
7 
8 #ifndef EBOOK_SEARCH_INDEX_H
9 #define EBOOK_SEARCH_INDEX_H
10 
11 #include <QDataStream>
12 #include <QHash>
13 #include <QStringList>
14 #include <QUrl>
15 #include <QVector>
16 
17 #include "helper_entitydecoder.h"
18 
19 class EBook;
20 
21 // This code is based on some pretty old version of Qt Assistant
22 namespace QtAs
23 {
24 struct Document {
DocumentDocument25     Document(int d, int f)
26         : docNumber(d)
27         , frequency(f)
28     {
29     }
DocumentDocument30     Document()
31         : docNumber(-1)
32         , frequency(0)
33     {
34     }
35     bool operator==(const Document doc) const
36     {
37         return docNumber == doc.docNumber;
38     }
39 
40     bool operator<(const Document doc) const
41     {
42         return frequency > doc.frequency;
43     }
44 
45     bool operator<=(const Document doc) const
46     {
47         return frequency >= doc.frequency;
48     }
49 
50     bool operator>(const Document doc) const
51     {
52         return frequency < doc.frequency;
53     }
54 
55     qint16 docNumber;
56     qint16 frequency;
57 };
58 
59 QDataStream &operator>>(QDataStream &s, Document &l);
60 QDataStream &operator<<(QDataStream &s, const Document l);
61 
62 class Index : public QObject
63 {
64     Q_OBJECT
65 public:
66     Index();
67 
68     void writeDict(QDataStream &stream);
69     bool readDict(QDataStream &stream);
70     bool makeIndex(const QList<QUrl> &docs, EBook *chmFile);
71     QList<QUrl> query(const QStringList &, const QStringList &, const QStringList &, EBook *chmFile);
getCharsSplit()72     QString getCharsSplit() const
73     {
74         return m_charssplit;
75     }
getCharsPartOfWord()76     QString getCharsPartOfWord() const
77     {
78         return m_charsword;
79     }
80 
81 signals:
82     void indexingProgress(int, const QString &);
83 
84 public slots:
85     void setLastWinClosed();
86 
87 private:
88     struct Entry {
EntryEntry89         explicit Entry(int d)
90         {
91             documents.append(Document(d, 1));
92         }
EntryEntry93         explicit Entry(const QVector<Document> &l)
94             : documents(l)
95         {
96         }
97         QVector<Document> documents;
98     };
99 
100     struct PosEntry {
PosEntryPosEntry101         explicit PosEntry(int p)
102         {
103             positions.append(p);
104         }
105         QList<uint> positions;
106     };
107 
108     bool parseDocumentToStringlist(EBook *chmFile, const QUrl &filename, QStringList &tokenlist);
109     void insertInDict(const QString &, int);
110 
111     QStringList getWildcardTerms(const QString &);
112     QStringList split(const QString &);
113     QList<Document> setupDummyTerm(const QStringList &);
114     bool searchForPhrases(const QStringList &phrases, const QStringList &words, const QUrl &filename, EBook *chmFile);
115 
116     QList<QUrl> docList;
117     QHash<QString, Entry *> dict;
118     QHash<QString, PosEntry *> miniDict;
119     bool lastWindowClosed;
120     HelperEntityDecoder entityDecoder;
121 
122     // Those characters are splitters (i.e. split the word), but added themselves into dictionary too.
123     // This makes the dictionary MUCH larger, but ensure that for the piece of "window->print" both
124     // search for "print" and "->print" will find it.
125     QString m_charssplit;
126 
127     // Those characters are parts of word - for example, '_' is here, and search for _debug will find only _debug.
128     QString m_charsword;
129 };
130 
131 };
132 
133 Q_DECLARE_TYPEINFO(QtAs::Document, Q_MOVABLE_TYPE);
134 
135 #endif // EBOOK_SEARCH_INDEX_H
136