1 /*
2 SPDX-FileCopyrightText: 2009-2021 Graeme Gott <graeme@gottcode.org>
3
4 SPDX-License-Identifier: GPL-3.0-or-later
5 */
6
7 #include "trie.h"
8
9 #include <QDataStream>
10 #include <QTextStream>
11
12 #include <queue>
13 #include <utility>
14
15 //-----------------------------------------------------------------------------
16
17 namespace
18 {
19
20 /**
21 * @brief The TrieGenerator class generates the optimized word list for the trie class.
22 */
23 class TrieGenerator
24 {
25 public:
26 /**
27 * Constructs a generator instance.
28 * @param key the letter contained by the generator instance
29 */
TrieGenerator(const QChar & key=QChar ())30 explicit TrieGenerator(const QChar& key = QChar())
31 : m_key(key)
32 , m_word(false)
33 , m_children(nullptr)
34 , m_next(nullptr)
35 , m_count(0)
36 {
37 }
38
39 /**
40 * Constructs a generator instance.
41 * @param word a list of letters to build child instances from
42 */
TrieGenerator(const QString & word)43 explicit TrieGenerator(const QString& word)
44 : m_word(false)
45 , m_children(nullptr)
46 , m_next(nullptr)
47 , m_count(0)
48 {
49 addWord(word, QStringList(word));
50 }
51
52 /**
53 * Constructs a generator instance.
54 * @param data contents of a word list file including alternate spellings
55 */
56 explicit TrieGenerator(const QByteArray& data);
57
58 /**
59 * Destroys the generator.
60 */
61 ~TrieGenerator();
62
63 /**
64 * Compiles a compressed list of the trie for faster access.
65 * @param [out] nodes the compressed list of nodes
66 * @param [out] spellings the list of spellings
67 */
68 void run(QVector<Trie::Node>& nodes, QStringList& spellings) const;
69
70 private:
71 /**
72 * Fetch a child generator matching a specific letter. Creates one if does not exist yet.
73 * @param letter the letter to add
74 * @return the child generator
75 */
76 TrieGenerator* addChild(const QChar& letter);
77
78 /**
79 * Adds a word by walking through the letters and fetching the next child of each letter.
80 * @param word the word to add
81 * @param spellings alternate spellings of the word to store
82 */
83 void addWord(const QString& word, const QStringList& spellings);
84
85 // Uncopyable
86 TrieGenerator(const TrieGenerator&) = delete;
87 TrieGenerator& operator=(const TrieGenerator&) = delete;
88
89 private:
90 QChar m_key; /**< letter represented by the generator */
91 bool m_word; /**< is this node a word */
92 TrieGenerator* m_children; /**< first child generator */
93 TrieGenerator* m_next; /**< first sibling generator */
94 int m_count; /**< how many children the generator has */
95 QStringList m_spellings; /**< alternate spellings of word */
96 };
97
98 //-----------------------------------------------------------------------------
99
TrieGenerator(const QByteArray & data)100 TrieGenerator::TrieGenerator(const QByteArray& data)
101 : m_word(false)
102 , m_children(nullptr)
103 , m_next(nullptr)
104 , m_count(0)
105 {
106 QTextStream stream(data);
107 #if (QT_VERSION < QT_VERSION_CHECK(6,0,0))
108 stream.setCodec("UTF-8");
109 #endif
110 while (!stream.atEnd()) {
111 #if (QT_VERSION >= QT_VERSION_CHECK(5,14,0))
112 QStringList spellings = stream.readLine().simplified().split(QChar(' '), Qt::SkipEmptyParts);
113 #else
114 QStringList spellings = stream.readLine().simplified().split(QChar(' '), QString::SkipEmptyParts);
115 #endif
116 if (spellings.isEmpty()) {
117 continue;
118 }
119
120 QString word = spellings.first().toUpper();
121 if (spellings.count() == 1) {
122 spellings[0] = word.toLower();
123 } else {
124 spellings.removeFirst();
125 }
126
127 if (word.length() >= 3 && word.length() <= 25) {
128 addWord(word, spellings);
129 }
130 }
131 }
132
133 //-----------------------------------------------------------------------------
134
~TrieGenerator()135 TrieGenerator::~TrieGenerator()
136 {
137 delete m_children;
138 delete m_next;
139 }
140
141 //-----------------------------------------------------------------------------
142
addChild(const QChar & letter)143 TrieGenerator* TrieGenerator::addChild(const QChar& letter)
144 {
145 if (!m_children) {
146 m_children = new TrieGenerator(letter);
147 m_count = 1;
148 return m_children;
149 }
150
151 TrieGenerator* previous = nullptr;
152 TrieGenerator* current = m_children;
153 while (current && current->m_key != letter) {
154 previous = current;
155 current = current->m_next;
156 }
157
158 if (!current) {
159 previous->m_next = current = new TrieGenerator(letter);
160 m_count++;
161 }
162 return current;
163 }
164
165 //-----------------------------------------------------------------------------
166
addWord(const QString & word,const QStringList & spellings)167 void TrieGenerator::addWord(const QString& word, const QStringList& spellings)
168 {
169 TrieGenerator* node = this;
170 for (const QChar& c : word) {
171 node = node->addChild(c);
172 }
173 node->m_word = true;
174 node->m_spellings = spellings;
175 }
176
177 //-----------------------------------------------------------------------------
178
run(QVector<Trie::Node> & nodes,QStringList & spellings) const179 void TrieGenerator::run(QVector<Trie::Node>& nodes, QStringList& spellings) const
180 {
181 std::queue<std::pair<const TrieGenerator*, int>> next;
182 nodes.append(Trie::Node());
183 next.emplace(this, 0);
184
185 while (!next.empty()) {
186 auto item = next.front();
187 next.pop();
188
189 const TrieGenerator* trie = item.first;
190 Trie::Node& node = nodes[item.second];
191 node.m_letter = trie->m_key;
192 node.m_word = spellings.count();
193 spellings += trie->m_spellings;
194 node.m_word_count = trie->m_spellings.count();
195 node.m_child_count = trie->m_count;
196
197 if (node.m_child_count) {
198 node.m_children = nodes.count();
199
200 trie = trie->m_children;
201 int count = node.m_child_count;
202 for (int i = 0; i < count; ++i) {
203 nodes.append(Trie::Node());
204 next.emplace(trie, nodes.count() - 1);
205 trie = trie->m_next;
206 }
207 }
208 }
209 }
210
211 }
212
213 //-----------------------------------------------------------------------------
214
Trie()215 Trie::Trie()
216 {
217 }
218
219 //-----------------------------------------------------------------------------
220
Trie(const QString & word)221 Trie::Trie(const QString& word)
222 {
223 TrieGenerator generator(word);
224 generator.run(m_nodes, m_spellings);
225 checkNodes();
226 }
227
228 //-----------------------------------------------------------------------------
229
Trie(const QByteArray & data)230 Trie::Trie(const QByteArray& data)
231 {
232 TrieGenerator generator(data);
233 generator.run(m_nodes, m_spellings);
234 checkNodes();
235 }
236
237 //-----------------------------------------------------------------------------
238
clear()239 void Trie::clear()
240 {
241 m_nodes.clear();
242 m_spellings.clear();
243 }
244
245 //-----------------------------------------------------------------------------
246
checkNodes()247 void Trie::checkNodes()
248 {
249 // Verify that no nodes reference outside list
250 const quint32 count = m_nodes.size();
251 for (const Node& node : qAsConst(m_nodes)) {
252 const quint32 start = node.m_children;
253 const quint32 end = start + node.m_child_count;
254 if ((start >= count) || (end > count)) {
255 clear();
256 break;
257 }
258 }
259 }
260
261 //-----------------------------------------------------------------------------
262
child(const QChar & letter,const Node * node) const263 const Trie::Node* Trie::child(const QChar& letter, const Node* node) const
264 {
265 const auto end = m_nodes.cbegin() + node->m_children + node->m_child_count;
266 for (auto i = m_nodes.cbegin() + node->m_children; i < end; ++i) {
267 if (*i == letter) {
268 return &*i;
269 }
270 }
271 return nullptr;
272 }
273
274 //-----------------------------------------------------------------------------
275
spellings(const QString & word,const QStringList & default_value) const276 QStringList Trie::spellings(const QString& word, const QStringList& default_value) const
277 {
278 const Trie::Node* node = &m_nodes[0];
279 const int length = word.length();
280 for (int i = 0; i < length; ++i) {
281 node = child(word.at(i), node);
282 if (!node) {
283 return default_value;
284 }
285 }
286 return m_spellings.mid(node->m_word, node->m_word_count);
287 }
288
289 //-----------------------------------------------------------------------------
290
operator <<(QDataStream & stream,const Trie & trie)291 QDataStream& operator<<(QDataStream& stream, const Trie& trie)
292 {
293 stream << trie.m_nodes << trie.m_spellings.join('\n');
294 return stream;
295 }
296
297 //-----------------------------------------------------------------------------
298
operator >>(QDataStream & stream,Trie & trie)299 QDataStream& operator>>(QDataStream& stream, Trie& trie)
300 {
301 QString spellings;
302 stream >> trie.m_nodes >> spellings;
303 trie.checkNodes();
304 if (!trie.isEmpty()) {
305 trie.m_spellings = spellings.split('\n');
306 }
307 return stream;
308 }
309
310 //-----------------------------------------------------------------------------
311
operator <<(QDataStream & stream,const Trie::Node & node)312 QDataStream& operator<<(QDataStream& stream, const Trie::Node& node)
313 {
314 stream << node.m_letter << node.m_word << node.m_children << node.m_word_count << node.m_child_count;
315 return stream;
316 }
317
318 //-----------------------------------------------------------------------------
319
operator >>(QDataStream & stream,Trie::Node & node)320 QDataStream& operator>>(QDataStream& stream, Trie::Node& node)
321 {
322 stream >> node.m_letter >> node.m_word >> node.m_children >> node.m_word_count >> node.m_child_count;
323 return stream;
324 }
325
326 //-----------------------------------------------------------------------------
327