1 // For license of this file, see <project-root-folder>/LICENSE.md.
2 
3 #include "services/standard/feedparser.h"
4 
5 #include "exceptions/applicationexception.h"
6 #include "miscellaneous/application.h"
7 #include "network-web/webfactory.h"
8 
9 #include <QDebug>
10 #include <QRegularExpression>
11 
12 #include <utility>
13 
FeedParser(QString data)14 FeedParser::FeedParser(QString data) : m_xmlData(std::move(data)), m_mrssNamespace(QSL("http://search.yahoo.com/mrss/")) {
15   QString error;
16 
17   if (!m_xml.setContent(m_xmlData, true, &error)) {
18     throw ApplicationException(QObject::tr("XML problem: %1").arg(error));
19   }
20 }
21 
messages()22 QList<Message> FeedParser::messages() {
23   QString feed_author = feedAuthor();
24   QList<Message> messages;
25   QDateTime current_time = QDateTime::currentDateTime();
26 
27   // Pull out all messages.
28   QDomNodeList messages_in_xml = messageElements();
29 
30   for (int i = 0; i < messages_in_xml.size(); i++) {
31     QDomNode message_item = messages_in_xml.item(i);
32 
33     try {
34       Message new_message = extractMessage(message_item.toElement(), current_time);
35 
36       if (new_message.m_author.isEmpty() && !feed_author.isEmpty()) {
37         new_message.m_author = feed_author;
38       }
39 
40       new_message.m_url = new_message.m_url.replace(QRegularExpression(QSL("[\\t\\n]")), QString());
41 
42       messages.append(new_message);
43     }
44     catch (const ApplicationException& ex) {
45       qDebugNN << LOGSEC_CORE
46                << "Problem when extracting message: "
47                << ex.message();
48     }
49   }
50 
51   return messages;
52 }
53 
mrssGetEnclosures(const QDomElement & msg_element) const54 QList<Enclosure> FeedParser::mrssGetEnclosures(const QDomElement& msg_element) const {
55   QList<Enclosure> enclosures;
56   auto content_list = msg_element.elementsByTagNameNS(m_mrssNamespace, QSL("content"));
57 
58   for (int i = 0; i < content_list.size(); i++) {
59     QDomElement elem_content = content_list.at(i).toElement();
60     QString url = elem_content.attribute(QSL("url"));
61     QString type = elem_content.attribute(QSL("type"));
62 
63     if (!url.isEmpty() && !type.isEmpty()) {
64       enclosures.append(Enclosure(url, type));
65     }
66   }
67 
68   auto thumbnail_list = msg_element.elementsByTagNameNS(m_mrssNamespace, QSL("thumbnail"));
69 
70   for (int i = 0; i < thumbnail_list.size(); i++) {
71     QDomElement elem_content = thumbnail_list.at(i).toElement();
72     QString url = elem_content.attribute(QSL("url"));
73 
74     if (!url.isEmpty()) {
75       enclosures.append(Enclosure(url, QSL("image/png")));
76     }
77   }
78 
79   return enclosures;
80 }
81 
mrssTextFromPath(const QDomElement & msg_element,const QString & xml_path) const82 QString FeedParser::mrssTextFromPath(const QDomElement& msg_element, const QString& xml_path) const {
83   QString text = msg_element.elementsByTagNameNS(m_mrssNamespace, xml_path).at(0).toElement().text();
84 
85   return text;
86 }
87 
rawXmlChild(const QDomElement & container) const88 QString FeedParser::rawXmlChild(const QDomElement& container) const {
89   QString raw;
90   auto children = container.childNodes();
91 
92   for (int i = 0; i < children.size(); i++) {
93     if (children.at(i).isCDATASection()) {
94       raw += children.at(i).toCDATASection().data();
95     }
96     else {
97       QString raw_ch;
98       QTextStream str(&raw_ch);
99 
100       children.at(i).save(str, 0);
101       raw += qApp->web()->unescapeHtml(raw_ch);
102     }
103   }
104 
105   return raw;
106 }
107 
textsFromPath(const QDomElement & element,const QString & namespace_uri,const QString & xml_path,bool only_first) const108 QStringList FeedParser::textsFromPath(const QDomElement& element, const QString& namespace_uri,
109                                       const QString& xml_path, bool only_first) const {
110   QStringList paths = xml_path.split('/');
111   QStringList result;
112   QList<QDomElement> current_elements;
113 
114   current_elements.append(element);
115 
116   while (!paths.isEmpty()) {
117     QList<QDomElement> next_elements;
118     QString next_local_name = paths.takeFirst();
119 
120     for (const QDomElement& elem : current_elements) {
121       QDomNodeList elements = elem.elementsByTagNameNS(namespace_uri, next_local_name);
122 
123       for (int i = 0; i < elements.size(); i++) {
124         next_elements.append(elements.at(i).toElement());
125 
126         if (only_first) {
127           break;
128         }
129       }
130 
131       if (next_elements.size() == 1 && only_first) {
132         break;
133       }
134     }
135 
136     current_elements = next_elements;
137   }
138 
139   if (!current_elements.isEmpty()) {
140     for (const QDomElement& elem : qAsConst(current_elements)) {
141       result.append(elem.text());
142     }
143   }
144 
145   return result;
146 }
147 
feedAuthor() const148 QString FeedParser::feedAuthor() const {
149   return QL1S("");
150 }
151