1 /*
2 SPDX-FileCopyrightText: 2013-2019 Andreas Cord-Landwehr <cordlandwehr@kde.org>
3
4 SPDX-License-Identifier: GPL-2.0-only OR GPL-3.0-only OR LicenseRef-KDE-Accepted-GPL
5 */
6
7 #include "courseparser.h"
8 #include "artikulate_debug.h"
9 #include "core/ieditablecourse.h"
10 #include "core/language.h"
11 #include "core/phoneme.h"
12 #include "core/phrase.h"
13 #include "core/unit.h"
14
15 #include <KTar>
16 #include <QDir>
17 #include <QDomDocument>
18 #include <QFile>
19 #include <QFileInfo>
20 #include <QXmlSchema>
21 #include <QXmlSchemaValidator>
22 #include <QXmlStreamReader>
23
loadXmlSchema(const QString & schemeName)24 QXmlSchema CourseParser::loadXmlSchema(const QString &schemeName)
25 {
26 QString relPath = QStringLiteral(":/artikulate/schemes/%1.xsd").arg(schemeName);
27 QUrl file = QUrl::fromLocalFile(relPath);
28
29 QXmlSchema schema;
30 if (file.isEmpty() || schema.load(file) == false) {
31 qCWarning(ARTIKULATE_PARSER()) << "Schema at file " << file.toLocalFile() << " is invalid.";
32 }
33 return schema;
34 }
35
loadDomDocument(const QUrl & path,const QXmlSchema & schema)36 QDomDocument CourseParser::loadDomDocument(const QUrl &path, const QXmlSchema &schema)
37 {
38 QDomDocument document;
39 QXmlSchemaValidator validator(schema);
40 if (!validator.validate(path)) {
41 qCWarning(ARTIKULATE_PARSER()) << "Schema is not valid, aborting loading of XML document:" << path.toLocalFile();
42 return document;
43 }
44
45 QString errorMsg;
46 QFile file(path.toLocalFile());
47 if (file.open(QIODevice::ReadOnly)) {
48 if (!document.setContent(&file, &errorMsg)) {
49 qCWarning(ARTIKULATE_PARSER()) << errorMsg;
50 }
51 } else {
52 qCWarning(ARTIKULATE_PARSER()) << "Could not open XML document " << path.toLocalFile() << " for reading, aborting.";
53 }
54 return document;
55 }
56
parseUnits(const QUrl & path,QVector<std::shared_ptr<Phoneme>> phonemes,bool skipIncomplete)57 std::vector<std::shared_ptr<Unit>> CourseParser::parseUnits(const QUrl &path, QVector<std::shared_ptr<Phoneme>> phonemes, bool skipIncomplete)
58 {
59 std::vector<std::shared_ptr<Unit>> units;
60
61 QFileInfo info(path.toLocalFile());
62 if (!info.exists()) {
63 qCCritical(ARTIKULATE_PARSER()()) << "No course file available at location" << path.toLocalFile();
64 return units;
65 }
66
67 QXmlStreamReader xml;
68 QFile file(path.toLocalFile());
69 if (file.open(QIODevice::ReadOnly)) {
70 xml.setDevice(&file);
71 xml.readNextStartElement();
72
73 while (!xml.atEnd() && !xml.hasError()) {
74 bool elementOk {false};
75 QXmlStreamReader::TokenType token = xml.readNext();
76
77 if (token == QXmlStreamReader::StartDocument) {
78 continue;
79 }
80 if (token == QXmlStreamReader::StartElement) {
81 if (xml.name() == "units") {
82 continue;
83 } else if (xml.name() == "unit") {
84 auto unit = parseUnit(xml, path, phonemes, skipIncomplete, elementOk);
85 if (elementOk) {
86 units.push_back(std::move(unit));
87 }
88 }
89 }
90 }
91 if (xml.hasError()) {
92 qCCritical(ARTIKULATE_PARSER()) << "Error occurred when reading Course XML file:" << path.toLocalFile();
93 }
94 } else {
95 qCCritical(ARTIKULATE_PARSER()) << "Could not open course file" << path.toLocalFile();
96 }
97 xml.clear();
98 file.close();
99
100 return units;
101 }
102
parseUnit(QXmlStreamReader & xml,const QUrl & path,QVector<std::shared_ptr<Phoneme>> phonemes,bool skipIncomplete,bool & ok)103 std::shared_ptr<Unit> CourseParser::parseUnit(QXmlStreamReader &xml, const QUrl &path, QVector<std::shared_ptr<Phoneme>> phonemes, bool skipIncomplete, bool &ok)
104 {
105 std::shared_ptr<Unit> unit = Unit::create();
106 ok = true;
107
108 if (xml.tokenType() != QXmlStreamReader::StartElement && xml.name() == "unit") {
109 qCWarning(ARTIKULATE_PARSER()) << "Expected to parse 'unit' element, aborting here";
110 return unit;
111 }
112
113 xml.readNext();
114 while (!(xml.tokenType() == QXmlStreamReader::EndElement && xml.name() == "unit")) {
115 if (xml.tokenType() == QXmlStreamReader::StartElement) {
116 bool elementOk {false};
117 if (xml.name() == "id") {
118 unit->setId(parseElement(xml, elementOk));
119 ok &= elementOk;
120 } else if (xml.name() == "foreignId") {
121 unit->setForeignId(parseElement(xml, elementOk));
122 ok &= elementOk;
123 } else if (xml.name() == "title") {
124 unit->setTitle(parseElement(xml, elementOk));
125 ok &= elementOk;
126 } else if (xml.name() == "phrases") {
127 // nothing to do
128 } else if (xml.name() == "phrase") {
129 auto phrase = parsePhrase(xml, path, phonemes, elementOk);
130 if (elementOk && (!skipIncomplete || !phrase->soundFileUrl().isEmpty())) {
131 unit->addPhrase(phrase, unit->phrases().size());
132 }
133 ok &= elementOk;
134 } else {
135 qCWarning(ARTIKULATE_PARSER()) << "Skipping unknown token" << xml.name();
136 }
137 }
138 xml.readNext();
139 }
140 if (!ok) {
141 qCWarning(ARTIKULATE_PARSER()) << "Errors occurred while parsing unit" << unit->title() << unit->id();
142 }
143 return unit;
144 }
145
parsePhrase(QXmlStreamReader & xml,const QUrl & path,QVector<std::shared_ptr<Phoneme>> phonemes,bool & ok)146 std::shared_ptr<Phrase> CourseParser::parsePhrase(QXmlStreamReader &xml, const QUrl &path, QVector<std::shared_ptr<Phoneme>> phonemes, bool &ok)
147 {
148 std::shared_ptr<Phrase> phrase = Phrase::create();
149 ok = true;
150
151 if (xml.tokenType() != QXmlStreamReader::StartElement && xml.name() == "phrase") {
152 qCWarning(ARTIKULATE_PARSER()) << "Expected to parse 'phrase' element, aborting here";
153 ok = false;
154 return phrase;
155 }
156
157 xml.readNext();
158 while (!(xml.tokenType() == QXmlStreamReader::EndElement && xml.name() == "phrase")) {
159 if (xml.tokenType() == QXmlStreamReader::StartElement) {
160 bool elementOk {false};
161 if (xml.name() == "id") {
162 phrase->setId(parseElement(xml, elementOk));
163 ok &= elementOk;
164 } else if (xml.name() == "foreignId") {
165 phrase->setForeignId(parseElement(xml, elementOk));
166 ok &= elementOk;
167 } else if (xml.name() == "text") {
168 phrase->setText(parseElement(xml, elementOk));
169 ok &= elementOk;
170 } else if (xml.name() == "i18nText") {
171 phrase->seti18nText(parseElement(xml, elementOk));
172 ok &= elementOk;
173 } else if (xml.name() == "soundFile") {
174 QString fileName = parseElement(xml, elementOk);
175 if (!fileName.isEmpty()) {
176 phrase->setSound(QUrl::fromLocalFile(path.adjusted(QUrl::RemoveFilename | QUrl::StripTrailingSlash).path() + '/' + fileName));
177 }
178 ok &= elementOk;
179 } else if (xml.name() == "phonemes") {
180 auto parsedPhonemeIds = parsePhonemeIds(xml, elementOk);
181 for (auto phoneme : phonemes) {
182 if (parsedPhonemeIds.contains(phoneme->id())) {
183 phrase->addPhoneme(phoneme.get());
184 }
185 }
186 ok &= elementOk;
187 } else if (xml.name() == "type") {
188 const QString type = parseElement(xml, elementOk);
189 if (type == "word") {
190 phrase->setType(IPhrase::Type::Word);
191 } else if (type == "expression") {
192 phrase->setType(IPhrase::Type::Expression);
193 } else if (type == "sentence") {
194 phrase->setType(IPhrase::Type::Sentence);
195 } else if (type == "paragraph") {
196 phrase->setType(IPhrase::Type::Paragraph);
197 }
198 ok &= elementOk;
199 } else if (xml.name() == "editState") {
200 const QString type = parseElement(xml, elementOk);
201 if (type == "translated") {
202 phrase->setEditState(Phrase::EditState::Translated);
203 } else if (type == "completed") {
204 phrase->setEditState(Phrase::EditState::Completed);
205 } else if (type == "unknown") {
206 phrase->setEditState(Phrase::EditState::Completed);
207 }
208 ok &= elementOk;
209 } else {
210 qCWarning(ARTIKULATE_PARSER()) << "Skipping unknown token" << xml.name();
211 }
212 }
213 xml.readNext();
214 }
215 if (!ok) {
216 qCWarning(ARTIKULATE_PARSER()) << "Errors occurred while parsing phrase" << phrase->text() << phrase->id();
217 }
218 return phrase;
219 }
220
parsePhonemeIds(QXmlStreamReader & xml,bool & ok)221 QStringList CourseParser::parsePhonemeIds(QXmlStreamReader &xml, bool &ok)
222 {
223 QStringList ids;
224 ok = true;
225
226 if (xml.tokenType() != QXmlStreamReader::StartElement && xml.name() == "phonemes") {
227 qCWarning(ARTIKULATE_PARSER()) << "Expected to parse 'phonemes' element, aborting here";
228 ok = false;
229 return ids;
230 }
231
232 xml.readNext();
233 while (!(xml.tokenType() == QXmlStreamReader::EndElement && xml.name() == "phonemes")) {
234 xml.readNext();
235 if (xml.tokenType() == QXmlStreamReader::StartElement) {
236 if (xml.name() == "phonemeID") {
237 bool elementOk {false};
238 ids.append(parseElement(xml, elementOk));
239 ok &= elementOk;
240 } else {
241 qCWarning(ARTIKULATE_PARSER()) << "Skipping unknown token" << xml.name();
242 }
243 }
244 }
245 return ids;
246 }
247
parseElement(QXmlStreamReader & xml,bool & ok)248 QString CourseParser::parseElement(QXmlStreamReader &xml, bool &ok)
249 {
250 ok = true;
251 if (xml.tokenType() != QXmlStreamReader::StartElement) {
252 qCCritical(ARTIKULATE_PARSER()) << "Parsing element that does not start with a start element";
253 ok = false;
254 return QString();
255 }
256
257 QString elementName = xml.name().toString();
258 xml.readNext();
259
260 // qCDebug(ARTIKULATE_PARSER()) << "parsed: " << elementName << " / " << xml.text().toString();
261 return xml.text().toString();
262 }
263
serializedDocument(std::shared_ptr<IEditableCourse> course,bool trainingExport)264 QDomDocument CourseParser::serializedDocument(std::shared_ptr<IEditableCourse> course, bool trainingExport)
265 {
266 QDomDocument document;
267 // prepare xml header
268 QDomProcessingInstruction header = document.createProcessingInstruction(QStringLiteral("xml"), QStringLiteral("version=\"1.0\""));
269 document.appendChild(header);
270
271 // create main element
272 QDomElement root = document.createElement(QStringLiteral("course"));
273 document.appendChild(root);
274
275 QDomElement idElement = document.createElement(QStringLiteral("id"));
276 QDomElement titleElement = document.createElement(QStringLiteral("title"));
277 QDomElement descriptionElement = document.createElement(QStringLiteral("description"));
278 QDomElement languageElement = document.createElement(QStringLiteral("language"));
279
280 idElement.appendChild(document.createTextNode(course->id()));
281 titleElement.appendChild(document.createTextNode(course->title()));
282 descriptionElement.appendChild(document.createTextNode(course->description()));
283 languageElement.appendChild(document.createTextNode(course->id()));
284
285 QDomElement unitListElement = document.createElement(QStringLiteral("units"));
286 // create units
287 for (auto unit : course->units()) {
288 QDomElement unitElement = document.createElement(QStringLiteral("unit"));
289
290 QDomElement unitIdElement = document.createElement(QStringLiteral("id"));
291 QDomElement unitTitleElement = document.createElement(QStringLiteral("title"));
292 QDomElement unitPhraseListElement = document.createElement(QStringLiteral("phrases"));
293 unitIdElement.appendChild(document.createTextNode(unit->id()));
294 unitTitleElement.appendChild(document.createTextNode(unit->title()));
295
296 // construct phrases
297 for (auto &phrase : unit->phrases()) {
298 if (trainingExport && phrase->soundFileUrl().isEmpty()) {
299 continue;
300 }
301 unitPhraseListElement.appendChild(serializedPhrase(std::static_pointer_cast<IEditablePhrase>(phrase), document));
302 }
303
304 if (trainingExport && unitPhraseListElement.childNodes().isEmpty()) {
305 continue;
306 }
307
308 // construct the unit element
309 unitElement.appendChild(unitIdElement);
310 if (!unit->foreignId().isEmpty()) {
311 QDomElement unitForeignIdElement = document.createElement(QStringLiteral("foreignId"));
312 unitForeignIdElement.appendChild(document.createTextNode(unit->foreignId()));
313 unitElement.appendChild(unitForeignIdElement);
314 }
315 unitElement.appendChild(unitTitleElement);
316 unitElement.appendChild(unitPhraseListElement);
317
318 unitListElement.appendChild(unitElement);
319 }
320
321 root.appendChild(idElement);
322 if (!course->foreignId().isEmpty()) {
323 QDomElement courseForeignIdElement = document.createElement(QStringLiteral("foreignId"));
324 courseForeignIdElement.appendChild(document.createTextNode(course->foreignId()));
325 root.appendChild(courseForeignIdElement);
326 }
327 root.appendChild(titleElement);
328 root.appendChild(descriptionElement);
329 root.appendChild(languageElement);
330 root.appendChild(unitListElement);
331
332 return document;
333 }
334
serializedPhrase(std::shared_ptr<IEditablePhrase> phrase,QDomDocument & document)335 QDomElement CourseParser::serializedPhrase(std::shared_ptr<IEditablePhrase> phrase, QDomDocument &document)
336 {
337 QDomElement phraseElement = document.createElement(QStringLiteral("phrase"));
338 QDomElement phraseIdElement = document.createElement(QStringLiteral("id"));
339 QDomElement phraseTextElement = document.createElement(QStringLiteral("text"));
340 QDomElement phrasei18nTextElement = document.createElement(QStringLiteral("i18nText"));
341 QDomElement phraseSoundFileElement = document.createElement(QStringLiteral("soundFile"));
342 QDomElement phraseTypeElement = document.createElement(QStringLiteral("type"));
343 QDomElement phraseEditStateElement = document.createElement(QStringLiteral("editState"));
344 QDomElement phrasePhonemeListElement = document.createElement(QStringLiteral("phonemes"));
345
346 phraseIdElement.appendChild(document.createTextNode(phrase->id()));
347 phraseTextElement.appendChild(document.createTextNode(phrase->text()));
348 phrasei18nTextElement.appendChild(document.createTextNode(phrase->i18nText()));
349 phraseSoundFileElement.appendChild(document.createTextNode(phrase->sound().fileName()));
350 phraseTypeElement.appendChild(document.createTextNode(phrase->typeString()));
351 phraseEditStateElement.appendChild(document.createTextNode(phrase->editStateString()));
352
353 // add phonemes
354 for (auto &phoneme : phrase->phonemes()) {
355 QDomElement phonemeElement = document.createElement(QStringLiteral("phonemeID"));
356 phonemeElement.appendChild(document.createTextNode(phoneme->id()));
357 phrasePhonemeListElement.appendChild(phonemeElement);
358 }
359
360 phraseElement.appendChild(phraseIdElement);
361 if (!phrase->foreignId().isEmpty()) {
362 QDomElement phraseForeignIdElement = document.createElement(QStringLiteral("foreignId"));
363 phraseForeignIdElement.appendChild(document.createTextNode(phrase->foreignId()));
364 phraseElement.appendChild(phraseForeignIdElement);
365 }
366 phraseElement.appendChild(phraseTextElement);
367 phraseElement.appendChild(phrasei18nTextElement);
368 phraseElement.appendChild(phraseSoundFileElement);
369 phraseElement.appendChild(phraseTypeElement);
370 phraseElement.appendChild(phraseEditStateElement);
371 phraseElement.appendChild(phrasePhonemeListElement);
372
373 return phraseElement;
374 }
375
exportCourseToGhnsPackage(std::shared_ptr<IEditableCourse> course,const QString & exportPath)376 bool CourseParser::exportCourseToGhnsPackage(std::shared_ptr<IEditableCourse> course, const QString &exportPath)
377 {
378 // filename
379 const QString fileName = course->id() + ".tar.bz2";
380 KTar tar = KTar(exportPath + '/' + fileName, QStringLiteral("application/x-bzip"));
381 if (!tar.open(QIODevice::WriteOnly)) {
382 qCWarning(ARTIKULATE_CORE()) << "Unable to open tar file" << exportPath + '/' + fileName << "in write mode, aborting.";
383 return false;
384 }
385
386 for (auto &unit : course->units()) {
387 for (auto &phrase : unit->phrases()) {
388 if (QFile::exists(phrase->soundFileUrl())) {
389 tar.addLocalFile(phrase->soundFileUrl(), phrase->id() + ".ogg");
390 }
391 }
392 }
393
394 tar.writeFile(course->id() + ".xml", CourseParser::serializedDocument(course, true).toByteArray());
395
396 tar.close();
397 return true;
398 }
399