1 /*
2     SPDX-FileCopyrightText: 2013-2019 Andreas Cord-Landwehr <cordlandwehr@kde.org>
3 
4     SPDX-License-Identifier: GPL-2.0-only OR GPL-3.0-only OR LicenseRef-KDE-Accepted-GPL
5 */
6 
7 #include "courseparser.h"
8 #include "artikulate_debug.h"
9 #include "core/ieditablecourse.h"
10 #include "core/language.h"
11 #include "core/phoneme.h"
12 #include "core/phrase.h"
13 #include "core/unit.h"
14 
15 #include <KTar>
16 #include <QDir>
17 #include <QDomDocument>
18 #include <QFile>
19 #include <QFileInfo>
20 #include <QXmlSchema>
21 #include <QXmlSchemaValidator>
22 #include <QXmlStreamReader>
23 
loadXmlSchema(const QString & schemeName)24 QXmlSchema CourseParser::loadXmlSchema(const QString &schemeName)
25 {
26     QString relPath = QStringLiteral(":/artikulate/schemes/%1.xsd").arg(schemeName);
27     QUrl file = QUrl::fromLocalFile(relPath);
28 
29     QXmlSchema schema;
30     if (file.isEmpty() || schema.load(file) == false) {
31         qCWarning(ARTIKULATE_PARSER()) << "Schema at file " << file.toLocalFile() << " is invalid.";
32     }
33     return schema;
34 }
35 
loadDomDocument(const QUrl & path,const QXmlSchema & schema)36 QDomDocument CourseParser::loadDomDocument(const QUrl &path, const QXmlSchema &schema)
37 {
38     QDomDocument document;
39     QXmlSchemaValidator validator(schema);
40     if (!validator.validate(path)) {
41         qCWarning(ARTIKULATE_PARSER()) << "Schema is not valid, aborting loading of XML document:" << path.toLocalFile();
42         return document;
43     }
44 
45     QString errorMsg;
46     QFile file(path.toLocalFile());
47     if (file.open(QIODevice::ReadOnly)) {
48         if (!document.setContent(&file, &errorMsg)) {
49             qCWarning(ARTIKULATE_PARSER()) << errorMsg;
50         }
51     } else {
52         qCWarning(ARTIKULATE_PARSER()) << "Could not open XML document " << path.toLocalFile() << " for reading, aborting.";
53     }
54     return document;
55 }
56 
parseUnits(const QUrl & path,QVector<std::shared_ptr<Phoneme>> phonemes,bool skipIncomplete)57 std::vector<std::shared_ptr<Unit>> CourseParser::parseUnits(const QUrl &path, QVector<std::shared_ptr<Phoneme>> phonemes, bool skipIncomplete)
58 {
59     std::vector<std::shared_ptr<Unit>> units;
60 
61     QFileInfo info(path.toLocalFile());
62     if (!info.exists()) {
63         qCCritical(ARTIKULATE_PARSER()()) << "No course file available at location" << path.toLocalFile();
64         return units;
65     }
66 
67     QXmlStreamReader xml;
68     QFile file(path.toLocalFile());
69     if (file.open(QIODevice::ReadOnly)) {
70         xml.setDevice(&file);
71         xml.readNextStartElement();
72 
73         while (!xml.atEnd() && !xml.hasError()) {
74             bool elementOk {false};
75             QXmlStreamReader::TokenType token = xml.readNext();
76 
77             if (token == QXmlStreamReader::StartDocument) {
78                 continue;
79             }
80             if (token == QXmlStreamReader::StartElement) {
81                 if (xml.name() == "units") {
82                     continue;
83                 } else if (xml.name() == "unit") {
84                     auto unit = parseUnit(xml, path, phonemes, skipIncomplete, elementOk);
85                     if (elementOk) {
86                         units.push_back(std::move(unit));
87                     }
88                 }
89             }
90         }
91         if (xml.hasError()) {
92             qCCritical(ARTIKULATE_PARSER()) << "Error occurred when reading Course XML file:" << path.toLocalFile();
93         }
94     } else {
95         qCCritical(ARTIKULATE_PARSER()) << "Could not open course file" << path.toLocalFile();
96     }
97     xml.clear();
98     file.close();
99 
100     return units;
101 }
102 
parseUnit(QXmlStreamReader & xml,const QUrl & path,QVector<std::shared_ptr<Phoneme>> phonemes,bool skipIncomplete,bool & ok)103 std::shared_ptr<Unit> CourseParser::parseUnit(QXmlStreamReader &xml, const QUrl &path, QVector<std::shared_ptr<Phoneme>> phonemes, bool skipIncomplete, bool &ok)
104 {
105     std::shared_ptr<Unit> unit = Unit::create();
106     ok = true;
107 
108     if (xml.tokenType() != QXmlStreamReader::StartElement && xml.name() == "unit") {
109         qCWarning(ARTIKULATE_PARSER()) << "Expected to parse 'unit' element, aborting here";
110         return unit;
111     }
112 
113     xml.readNext();
114     while (!(xml.tokenType() == QXmlStreamReader::EndElement && xml.name() == "unit")) {
115         if (xml.tokenType() == QXmlStreamReader::StartElement) {
116             bool elementOk {false};
117             if (xml.name() == "id") {
118                 unit->setId(parseElement(xml, elementOk));
119                 ok &= elementOk;
120             } else if (xml.name() == "foreignId") {
121                 unit->setForeignId(parseElement(xml, elementOk));
122                 ok &= elementOk;
123             } else if (xml.name() == "title") {
124                 unit->setTitle(parseElement(xml, elementOk));
125                 ok &= elementOk;
126             } else if (xml.name() == "phrases") {
127                 // nothing to do
128             } else if (xml.name() == "phrase") {
129                 auto phrase = parsePhrase(xml, path, phonemes, elementOk);
130                 if (elementOk && (!skipIncomplete || !phrase->soundFileUrl().isEmpty())) {
131                     unit->addPhrase(phrase, unit->phrases().size());
132                 }
133                 ok &= elementOk;
134             } else {
135                 qCWarning(ARTIKULATE_PARSER()) << "Skipping unknown token" << xml.name();
136             }
137         }
138         xml.readNext();
139     }
140     if (!ok) {
141         qCWarning(ARTIKULATE_PARSER()) << "Errors occurred while parsing unit" << unit->title() << unit->id();
142     }
143     return unit;
144 }
145 
parsePhrase(QXmlStreamReader & xml,const QUrl & path,QVector<std::shared_ptr<Phoneme>> phonemes,bool & ok)146 std::shared_ptr<Phrase> CourseParser::parsePhrase(QXmlStreamReader &xml, const QUrl &path, QVector<std::shared_ptr<Phoneme>> phonemes, bool &ok)
147 {
148     std::shared_ptr<Phrase> phrase = Phrase::create();
149     ok = true;
150 
151     if (xml.tokenType() != QXmlStreamReader::StartElement && xml.name() == "phrase") {
152         qCWarning(ARTIKULATE_PARSER()) << "Expected to parse 'phrase' element, aborting here";
153         ok = false;
154         return phrase;
155     }
156 
157     xml.readNext();
158     while (!(xml.tokenType() == QXmlStreamReader::EndElement && xml.name() == "phrase")) {
159         if (xml.tokenType() == QXmlStreamReader::StartElement) {
160             bool elementOk {false};
161             if (xml.name() == "id") {
162                 phrase->setId(parseElement(xml, elementOk));
163                 ok &= elementOk;
164             } else if (xml.name() == "foreignId") {
165                 phrase->setForeignId(parseElement(xml, elementOk));
166                 ok &= elementOk;
167             } else if (xml.name() == "text") {
168                 phrase->setText(parseElement(xml, elementOk));
169                 ok &= elementOk;
170             } else if (xml.name() == "i18nText") {
171                 phrase->seti18nText(parseElement(xml, elementOk));
172                 ok &= elementOk;
173             } else if (xml.name() == "soundFile") {
174                 QString fileName = parseElement(xml, elementOk);
175                 if (!fileName.isEmpty()) {
176                     phrase->setSound(QUrl::fromLocalFile(path.adjusted(QUrl::RemoveFilename | QUrl::StripTrailingSlash).path() + '/' + fileName));
177                 }
178                 ok &= elementOk;
179             } else if (xml.name() == "phonemes") {
180                 auto parsedPhonemeIds = parsePhonemeIds(xml, elementOk);
181                 for (auto phoneme : phonemes) {
182                     if (parsedPhonemeIds.contains(phoneme->id())) {
183                         phrase->addPhoneme(phoneme.get());
184                     }
185                 }
186                 ok &= elementOk;
187             } else if (xml.name() == "type") {
188                 const QString type = parseElement(xml, elementOk);
189                 if (type == "word") {
190                     phrase->setType(IPhrase::Type::Word);
191                 } else if (type == "expression") {
192                     phrase->setType(IPhrase::Type::Expression);
193                 } else if (type == "sentence") {
194                     phrase->setType(IPhrase::Type::Sentence);
195                 } else if (type == "paragraph") {
196                     phrase->setType(IPhrase::Type::Paragraph);
197                 }
198                 ok &= elementOk;
199             } else if (xml.name() == "editState") {
200                 const QString type = parseElement(xml, elementOk);
201                 if (type == "translated") {
202                     phrase->setEditState(Phrase::EditState::Translated);
203                 } else if (type == "completed") {
204                     phrase->setEditState(Phrase::EditState::Completed);
205                 } else if (type == "unknown") {
206                     phrase->setEditState(Phrase::EditState::Completed);
207                 }
208                 ok &= elementOk;
209             } else {
210                 qCWarning(ARTIKULATE_PARSER()) << "Skipping unknown token" << xml.name();
211             }
212         }
213         xml.readNext();
214     }
215     if (!ok) {
216         qCWarning(ARTIKULATE_PARSER()) << "Errors occurred while parsing phrase" << phrase->text() << phrase->id();
217     }
218     return phrase;
219 }
220 
parsePhonemeIds(QXmlStreamReader & xml,bool & ok)221 QStringList CourseParser::parsePhonemeIds(QXmlStreamReader &xml, bool &ok)
222 {
223     QStringList ids;
224     ok = true;
225 
226     if (xml.tokenType() != QXmlStreamReader::StartElement && xml.name() == "phonemes") {
227         qCWarning(ARTIKULATE_PARSER()) << "Expected to parse 'phonemes' element, aborting here";
228         ok = false;
229         return ids;
230     }
231 
232     xml.readNext();
233     while (!(xml.tokenType() == QXmlStreamReader::EndElement && xml.name() == "phonemes")) {
234         xml.readNext();
235         if (xml.tokenType() == QXmlStreamReader::StartElement) {
236             if (xml.name() == "phonemeID") {
237                 bool elementOk {false};
238                 ids.append(parseElement(xml, elementOk));
239                 ok &= elementOk;
240             } else {
241                 qCWarning(ARTIKULATE_PARSER()) << "Skipping unknown token" << xml.name();
242             }
243         }
244     }
245     return ids;
246 }
247 
parseElement(QXmlStreamReader & xml,bool & ok)248 QString CourseParser::parseElement(QXmlStreamReader &xml, bool &ok)
249 {
250     ok = true;
251     if (xml.tokenType() != QXmlStreamReader::StartElement) {
252         qCCritical(ARTIKULATE_PARSER()) << "Parsing element that does not start with a start element";
253         ok = false;
254         return QString();
255     }
256 
257     QString elementName = xml.name().toString();
258     xml.readNext();
259 
260     // qCDebug(ARTIKULATE_PARSER()) << "parsed: " << elementName << " / " << xml.text().toString();
261     return xml.text().toString();
262 }
263 
serializedDocument(std::shared_ptr<IEditableCourse> course,bool trainingExport)264 QDomDocument CourseParser::serializedDocument(std::shared_ptr<IEditableCourse> course, bool trainingExport)
265 {
266     QDomDocument document;
267     // prepare xml header
268     QDomProcessingInstruction header = document.createProcessingInstruction(QStringLiteral("xml"), QStringLiteral("version=\"1.0\""));
269     document.appendChild(header);
270 
271     // create main element
272     QDomElement root = document.createElement(QStringLiteral("course"));
273     document.appendChild(root);
274 
275     QDomElement idElement = document.createElement(QStringLiteral("id"));
276     QDomElement titleElement = document.createElement(QStringLiteral("title"));
277     QDomElement descriptionElement = document.createElement(QStringLiteral("description"));
278     QDomElement languageElement = document.createElement(QStringLiteral("language"));
279 
280     idElement.appendChild(document.createTextNode(course->id()));
281     titleElement.appendChild(document.createTextNode(course->title()));
282     descriptionElement.appendChild(document.createTextNode(course->description()));
283     languageElement.appendChild(document.createTextNode(course->id()));
284 
285     QDomElement unitListElement = document.createElement(QStringLiteral("units"));
286     // create units
287     for (auto unit : course->units()) {
288         QDomElement unitElement = document.createElement(QStringLiteral("unit"));
289 
290         QDomElement unitIdElement = document.createElement(QStringLiteral("id"));
291         QDomElement unitTitleElement = document.createElement(QStringLiteral("title"));
292         QDomElement unitPhraseListElement = document.createElement(QStringLiteral("phrases"));
293         unitIdElement.appendChild(document.createTextNode(unit->id()));
294         unitTitleElement.appendChild(document.createTextNode(unit->title()));
295 
296         // construct phrases
297         for (auto &phrase : unit->phrases()) {
298             if (trainingExport && phrase->soundFileUrl().isEmpty()) {
299                 continue;
300             }
301             unitPhraseListElement.appendChild(serializedPhrase(std::static_pointer_cast<IEditablePhrase>(phrase), document));
302         }
303 
304         if (trainingExport && unitPhraseListElement.childNodes().isEmpty()) {
305             continue;
306         }
307 
308         // construct the unit element
309         unitElement.appendChild(unitIdElement);
310         if (!unit->foreignId().isEmpty()) {
311             QDomElement unitForeignIdElement = document.createElement(QStringLiteral("foreignId"));
312             unitForeignIdElement.appendChild(document.createTextNode(unit->foreignId()));
313             unitElement.appendChild(unitForeignIdElement);
314         }
315         unitElement.appendChild(unitTitleElement);
316         unitElement.appendChild(unitPhraseListElement);
317 
318         unitListElement.appendChild(unitElement);
319     }
320 
321     root.appendChild(idElement);
322     if (!course->foreignId().isEmpty()) {
323         QDomElement courseForeignIdElement = document.createElement(QStringLiteral("foreignId"));
324         courseForeignIdElement.appendChild(document.createTextNode(course->foreignId()));
325         root.appendChild(courseForeignIdElement);
326     }
327     root.appendChild(titleElement);
328     root.appendChild(descriptionElement);
329     root.appendChild(languageElement);
330     root.appendChild(unitListElement);
331 
332     return document;
333 }
334 
serializedPhrase(std::shared_ptr<IEditablePhrase> phrase,QDomDocument & document)335 QDomElement CourseParser::serializedPhrase(std::shared_ptr<IEditablePhrase> phrase, QDomDocument &document)
336 {
337     QDomElement phraseElement = document.createElement(QStringLiteral("phrase"));
338     QDomElement phraseIdElement = document.createElement(QStringLiteral("id"));
339     QDomElement phraseTextElement = document.createElement(QStringLiteral("text"));
340     QDomElement phrasei18nTextElement = document.createElement(QStringLiteral("i18nText"));
341     QDomElement phraseSoundFileElement = document.createElement(QStringLiteral("soundFile"));
342     QDomElement phraseTypeElement = document.createElement(QStringLiteral("type"));
343     QDomElement phraseEditStateElement = document.createElement(QStringLiteral("editState"));
344     QDomElement phrasePhonemeListElement = document.createElement(QStringLiteral("phonemes"));
345 
346     phraseIdElement.appendChild(document.createTextNode(phrase->id()));
347     phraseTextElement.appendChild(document.createTextNode(phrase->text()));
348     phrasei18nTextElement.appendChild(document.createTextNode(phrase->i18nText()));
349     phraseSoundFileElement.appendChild(document.createTextNode(phrase->sound().fileName()));
350     phraseTypeElement.appendChild(document.createTextNode(phrase->typeString()));
351     phraseEditStateElement.appendChild(document.createTextNode(phrase->editStateString()));
352 
353     // add phonemes
354     for (auto &phoneme : phrase->phonemes()) {
355         QDomElement phonemeElement = document.createElement(QStringLiteral("phonemeID"));
356         phonemeElement.appendChild(document.createTextNode(phoneme->id()));
357         phrasePhonemeListElement.appendChild(phonemeElement);
358     }
359 
360     phraseElement.appendChild(phraseIdElement);
361     if (!phrase->foreignId().isEmpty()) {
362         QDomElement phraseForeignIdElement = document.createElement(QStringLiteral("foreignId"));
363         phraseForeignIdElement.appendChild(document.createTextNode(phrase->foreignId()));
364         phraseElement.appendChild(phraseForeignIdElement);
365     }
366     phraseElement.appendChild(phraseTextElement);
367     phraseElement.appendChild(phrasei18nTextElement);
368     phraseElement.appendChild(phraseSoundFileElement);
369     phraseElement.appendChild(phraseTypeElement);
370     phraseElement.appendChild(phraseEditStateElement);
371     phraseElement.appendChild(phrasePhonemeListElement);
372 
373     return phraseElement;
374 }
375 
exportCourseToGhnsPackage(std::shared_ptr<IEditableCourse> course,const QString & exportPath)376 bool CourseParser::exportCourseToGhnsPackage(std::shared_ptr<IEditableCourse> course, const QString &exportPath)
377 {
378     // filename
379     const QString fileName = course->id() + ".tar.bz2";
380     KTar tar = KTar(exportPath + '/' + fileName, QStringLiteral("application/x-bzip"));
381     if (!tar.open(QIODevice::WriteOnly)) {
382         qCWarning(ARTIKULATE_CORE()) << "Unable to open tar file" << exportPath + '/' + fileName << "in write mode, aborting.";
383         return false;
384     }
385 
386     for (auto &unit : course->units()) {
387         for (auto &phrase : unit->phrases()) {
388             if (QFile::exists(phrase->soundFileUrl())) {
389                 tar.addLocalFile(phrase->soundFileUrl(), phrase->id() + ".ogg");
390             }
391         }
392     }
393 
394     tar.writeFile(course->id() + ".xml", CourseParser::serializedDocument(course, true).toByteArray());
395 
396     tar.close();
397     return true;
398 }
399