1 /*
2 	SPDX-FileCopyrightText: 2009-2021 Graeme Gott <graeme@gottcode.org>
3 
4 	SPDX-License-Identifier: GPL-3.0-or-later
5 */
6 
7 #include "trie.h"
8 
9 #include <QDataStream>
10 #include <QTextStream>
11 
12 #include <queue>
13 #include <utility>
14 
15 //-----------------------------------------------------------------------------
16 
17 namespace
18 {
19 
20 /**
21  * @brief The TrieGenerator class generates the optimized word list for the trie class.
22  */
23 class TrieGenerator
24 {
25 public:
26 	/**
27 	 * Constructs a generator instance.
28 	 * @param key the letter contained by the generator instance
29 	 */
TrieGenerator(const QChar & key=QChar ())30 	explicit TrieGenerator(const QChar& key = QChar())
31 		: m_key(key)
32 		, m_word(false)
33 		, m_children(nullptr)
34 		, m_next(nullptr)
35 		, m_count(0)
36 	{
37 	}
38 
39 	/**
40 	 * Constructs a generator instance.
41 	 * @param word a list of letters to build child instances from
42 	 */
TrieGenerator(const QString & word)43 	explicit TrieGenerator(const QString& word)
44 		: m_word(false)
45 		, m_children(nullptr)
46 		, m_next(nullptr)
47 		, m_count(0)
48 	{
49 		addWord(word, QStringList(word));
50 	}
51 
52 	/**
53 	 * Constructs a generator instance.
54 	 * @param data contents of a word list file including alternate spellings
55 	 */
56 	explicit TrieGenerator(const QByteArray& data);
57 
58 	/**
59 	 * Destroys the generator.
60 	 */
61 	~TrieGenerator();
62 
63 	/**
64 	 * Compiles a compressed list of the trie for faster access.
65 	 * @param [out] nodes the compressed list of nodes
66 	 * @param [out] spellings the list of spellings
67 	 */
68 	void run(QVector<Trie::Node>& nodes, QStringList& spellings) const;
69 
70 private:
71 	/**
72 	 * Fetch a child generator matching a specific letter. Creates one if does not exist yet.
73 	 * @param letter the letter to add
74 	 * @return the child generator
75 	 */
76 	TrieGenerator* addChild(const QChar& letter);
77 
78 	/**
79 	 * Adds a word by walking through the letters and fetching the next child of each letter.
80 	 * @param word the word to add
81 	 * @param spellings alternate spellings of the word to store
82 	 */
83 	void addWord(const QString& word, const QStringList& spellings);
84 
85 	// Uncopyable
86 	TrieGenerator(const TrieGenerator&) = delete;
87 	TrieGenerator& operator=(const TrieGenerator&) = delete;
88 
89 private:
90 	QChar m_key; /**< letter represented by the generator */
91 	bool m_word; /**< is this node a word */
92 	TrieGenerator* m_children; /**< first child generator */
93 	TrieGenerator* m_next; /**< first sibling generator */
94 	int m_count; /**< how many children the generator has */
95 	QStringList m_spellings; /**< alternate spellings of word */
96 };
97 
98 //-----------------------------------------------------------------------------
99 
TrieGenerator(const QByteArray & data)100 TrieGenerator::TrieGenerator(const QByteArray& data)
101 	: m_word(false)
102 	, m_children(nullptr)
103 	, m_next(nullptr)
104 	, m_count(0)
105 {
106 	QTextStream stream(data);
107 #if (QT_VERSION < QT_VERSION_CHECK(6,0,0))
108 	stream.setCodec("UTF-8");
109 #endif
110 	while (!stream.atEnd()) {
111 #if (QT_VERSION >= QT_VERSION_CHECK(5,14,0))
112 		QStringList spellings = stream.readLine().simplified().split(QChar(' '), Qt::SkipEmptyParts);
113 #else
114 		QStringList spellings = stream.readLine().simplified().split(QChar(' '), QString::SkipEmptyParts);
115 #endif
116 		if (spellings.isEmpty()) {
117 			continue;
118 		}
119 
120 		QString word = spellings.first().toUpper();
121 		if (spellings.count() == 1) {
122 			spellings[0] = word.toLower();
123 		} else {
124 			spellings.removeFirst();
125 		}
126 
127 		if (word.length() >= 3 && word.length() <= 25) {
128 			addWord(word, spellings);
129 		}
130 	}
131 }
132 
133 //-----------------------------------------------------------------------------
134 
~TrieGenerator()135 TrieGenerator::~TrieGenerator()
136 {
137 	delete m_children;
138 	delete m_next;
139 }
140 
141 //-----------------------------------------------------------------------------
142 
addChild(const QChar & letter)143 TrieGenerator* TrieGenerator::addChild(const QChar& letter)
144 {
145 	if (!m_children) {
146 		m_children = new TrieGenerator(letter);
147 		m_count = 1;
148 		return m_children;
149 	}
150 
151 	TrieGenerator* previous = nullptr;
152 	TrieGenerator* current = m_children;
153 	while (current && current->m_key != letter) {
154 		previous = current;
155 		current = current->m_next;
156 	}
157 
158 	if (!current) {
159 		previous->m_next = current = new TrieGenerator(letter);
160 		m_count++;
161 	}
162 	return current;
163 }
164 
165 //-----------------------------------------------------------------------------
166 
addWord(const QString & word,const QStringList & spellings)167 void TrieGenerator::addWord(const QString& word, const QStringList& spellings)
168 {
169 	TrieGenerator* node = this;
170 	for (const QChar& c : word) {
171 		node = node->addChild(c);
172 	}
173 	node->m_word = true;
174 	node->m_spellings = spellings;
175 }
176 
177 //-----------------------------------------------------------------------------
178 
run(QVector<Trie::Node> & nodes,QStringList & spellings) const179 void TrieGenerator::run(QVector<Trie::Node>& nodes, QStringList& spellings) const
180 {
181 	std::queue<std::pair<const TrieGenerator*, int>> next;
182 	nodes.append(Trie::Node());
183 	next.emplace(this, 0);
184 
185 	while (!next.empty()) {
186 		auto item = next.front();
187 		next.pop();
188 
189 		const TrieGenerator* trie = item.first;
190 		Trie::Node& node = nodes[item.second];
191 		node.m_letter = trie->m_key;
192 		node.m_word = spellings.count();
193 		spellings += trie->m_spellings;
194 		node.m_word_count = trie->m_spellings.count();
195 		node.m_child_count = trie->m_count;
196 
197 		if (node.m_child_count) {
198 			node.m_children = nodes.count();
199 
200 			trie = trie->m_children;
201 			int count = node.m_child_count;
202 			for (int i = 0; i < count; ++i) {
203 				nodes.append(Trie::Node());
204 				next.emplace(trie, nodes.count() - 1);
205 				trie = trie->m_next;
206 			}
207 		}
208 	}
209 }
210 
211 }
212 
213 //-----------------------------------------------------------------------------
214 
Trie()215 Trie::Trie()
216 {
217 }
218 
219 //-----------------------------------------------------------------------------
220 
Trie(const QString & word)221 Trie::Trie(const QString& word)
222 {
223 	TrieGenerator generator(word);
224 	generator.run(m_nodes, m_spellings);
225 	checkNodes();
226 }
227 
228 //-----------------------------------------------------------------------------
229 
Trie(const QByteArray & data)230 Trie::Trie(const QByteArray& data)
231 {
232 	TrieGenerator generator(data);
233 	generator.run(m_nodes, m_spellings);
234 	checkNodes();
235 }
236 
237 //-----------------------------------------------------------------------------
238 
clear()239 void Trie::clear()
240 {
241 	m_nodes.clear();
242 	m_spellings.clear();
243 }
244 
245 //-----------------------------------------------------------------------------
246 
checkNodes()247 void Trie::checkNodes()
248 {
249 	// Verify that no nodes reference outside list
250 	const quint32 count = m_nodes.size();
251 	for (const Node& node : qAsConst(m_nodes)) {
252 		const quint32 start = node.m_children;
253 		const quint32 end = start + node.m_child_count;
254 		if ((start >= count) || (end > count)) {
255 			clear();
256 			break;
257 		}
258 	}
259 }
260 
261 //-----------------------------------------------------------------------------
262 
child(const QChar & letter,const Node * node) const263 const Trie::Node* Trie::child(const QChar& letter, const Node* node) const
264 {
265 	const auto end = m_nodes.cbegin() + node->m_children + node->m_child_count;
266 	for (auto i = m_nodes.cbegin() + node->m_children; i < end; ++i) {
267 		if (*i == letter) {
268 			return &*i;
269 		}
270 	}
271 	return nullptr;
272 }
273 
274 //-----------------------------------------------------------------------------
275 
spellings(const QString & word,const QStringList & default_value) const276 QStringList Trie::spellings(const QString& word, const QStringList& default_value) const
277 {
278 	const Trie::Node* node = &m_nodes[0];
279 	const int length = word.length();
280 	for (int i = 0; i < length; ++i) {
281 		node = child(word.at(i), node);
282 		if (!node) {
283 			return default_value;
284 		}
285 	}
286 	return m_spellings.mid(node->m_word, node->m_word_count);
287 }
288 
289 //-----------------------------------------------------------------------------
290 
operator <<(QDataStream & stream,const Trie & trie)291 QDataStream& operator<<(QDataStream& stream, const Trie& trie)
292 {
293 	stream << trie.m_nodes << trie.m_spellings.join('\n');
294 	return stream;
295 }
296 
297 //-----------------------------------------------------------------------------
298 
operator >>(QDataStream & stream,Trie & trie)299 QDataStream& operator>>(QDataStream& stream, Trie& trie)
300 {
301 	QString spellings;
302 	stream >> trie.m_nodes >> spellings;
303 	trie.checkNodes();
304 	if (!trie.isEmpty()) {
305 		trie.m_spellings = spellings.split('\n');
306 	}
307 	return stream;
308 }
309 
310 //-----------------------------------------------------------------------------
311 
operator <<(QDataStream & stream,const Trie::Node & node)312 QDataStream& operator<<(QDataStream& stream, const Trie::Node& node)
313 {
314 	stream << node.m_letter << node.m_word << node.m_children << node.m_word_count << node.m_child_count;
315 	return stream;
316 }
317 
318 //-----------------------------------------------------------------------------
319 
operator >>(QDataStream & stream,Trie::Node & node)320 QDataStream& operator>>(QDataStream& stream, Trie::Node& node)
321 {
322 	stream >> node.m_letter >> node.m_word >> node.m_children >> node.m_word_count >> node.m_child_count;
323 	return stream;
324 }
325 
326 //-----------------------------------------------------------------------------
327