1 /*
2 This file is part of the syndication library
3 SPDX-FileCopyrightText: 2005 Frank Osterfeld <osterfeld@kde.org>
4
5 SPDX-License-Identifier: LGPL-2.0-or-later
6 */
7
8 #include <rss2/category.h>
9 #include <rss2/cloud.h>
10 #include <rss2/document.h>
11 #include <rss2/image.h>
12 #include <rss2/item.h>
13 #include <rss2/textinput.h>
14
15 #include <constants.h>
16 #include <documentvisitor.h>
17 #include <tools.h>
18
19 #include <QDomDocument>
20 #include <QList>
21 #include <QSet>
22 #include <QString>
23
24 #include <vector>
25
26 namespace Syndication
27 {
28 namespace RSS2
29 {
30 class Document::DocumentPrivate
31 {
32 public:
DocumentPrivate()33 DocumentPrivate()
34 : itemDescriptionIsCDATA(false)
35 , itemDescriptionContainsMarkup(false)
36 , itemDescGuessed(false)
37 , itemTitleIsCDATA(false)
38 , itemTitleContainsMarkup(false)
39 , itemTitlesGuessed(false)
40 {
41 }
42 mutable bool itemDescriptionIsCDATA;
43 mutable bool itemDescriptionContainsMarkup;
44 mutable bool itemDescGuessed;
45 mutable bool itemTitleIsCDATA;
46 mutable bool itemTitleContainsMarkup;
47 mutable bool itemTitlesGuessed;
48 };
49
Document(const QDomElement & element)50 Document::Document(const QDomElement &element)
51 : SpecificDocument()
52 , ElementWrapper(element)
53 , d(new DocumentPrivate)
54 {
55 }
56
fromXML(const QDomDocument & doc)57 Document Document::fromXML(const QDomDocument &doc)
58 {
59 QDomNode channelNode = doc.namedItem(QStringLiteral("rss")).namedItem(QStringLiteral("channel"));
60
61 return Document(channelNode.toElement());
62 }
63
Document()64 Document::Document()
65 : SpecificDocument()
66 , ElementWrapper()
67 , d(new DocumentPrivate)
68 {
69 }
70
Document(const Document & other)71 Document::Document(const Document &other)
72 : SpecificDocument(other)
73 , ElementWrapper(other)
74 {
75 d = other.d;
76 }
77
~Document()78 Document::~Document()
79 {
80 }
81
operator =(const Document & other)82 Document &Document::operator=(const Document &other)
83 {
84 ElementWrapper::operator=(other);
85 d = other.d;
86 return *this;
87 }
isValid() const88 bool Document::isValid() const
89 {
90 return !isNull();
91 }
92
title() const93 QString Document::title() const
94 {
95 return extractElementTextNS(QString(), QStringLiteral("title"));
96 }
97
link() const98 QString Document::link() const
99 {
100 return extractElementTextNS(QString(), QStringLiteral("link"));
101 }
102
description() const103 QString Document::description() const
104 {
105 const QString desc = extractElementTextNS(QString(), QStringLiteral("description"));
106 return normalize(desc);
107 }
108
language() const109 QString Document::language() const
110 {
111 const QString lang = extractElementTextNS(QString(), QStringLiteral("language"));
112
113 if (!lang.isNull()) {
114 return lang;
115 } else {
116 return extractElementTextNS(dublinCoreNamespace(), QStringLiteral("language"));
117 }
118 }
119
copyright() const120 QString Document::copyright() const
121 {
122 const QString rights = extractElementTextNS(QString(), QStringLiteral("copyright"));
123 if (!rights.isNull()) {
124 return rights;
125 } else {
126 // if <copyright> is not provided, use <dc:rights>
127 return extractElementTextNS(dublinCoreNamespace(), QStringLiteral("rights"));
128 }
129 }
130
managingEditor() const131 QString Document::managingEditor() const
132 {
133 return extractElementTextNS(QString(), QStringLiteral("managingEditor"));
134 }
135
webMaster() const136 QString Document::webMaster() const
137 {
138 return extractElementTextNS(QString(), QStringLiteral("webMaster"));
139 }
140
pubDate() const141 time_t Document::pubDate() const
142 {
143 QString str = extractElementTextNS(QString(), QStringLiteral("pubDate"));
144
145 if (!str.isNull()) {
146 return parseDate(str, RFCDate);
147 } else {
148 // if there is no pubDate, check for dc:date
149 str = extractElementTextNS(dublinCoreNamespace(), QStringLiteral("date"));
150 return parseDate(str, ISODate);
151 }
152 }
153
lastBuildDate() const154 time_t Document::lastBuildDate() const
155 {
156 const QString str = extractElementTextNS(QString(), QStringLiteral("lastBuildDate"));
157
158 return parseDate(str, RFCDate);
159 }
160
categories() const161 QList<Category> Document::categories() const
162 {
163 QList<Category> categories;
164
165 QList<QDomElement> catNodes = elementsByTagNameNS(QString(), QStringLiteral("category"));
166 categories.reserve(catNodes.count());
167 QList<QDomElement>::ConstIterator it = catNodes.constBegin();
168 QList<QDomElement>::ConstIterator end(catNodes.constEnd());
169 for (; it != end; ++it) {
170 categories.append(Category(*it));
171 }
172
173 return categories;
174 }
175
generator() const176 QString Document::generator() const
177 {
178 return extractElementTextNS(QString(), QStringLiteral("generator"));
179 }
180
docs() const181 QString Document::docs() const
182 {
183 return extractElementTextNS(QString(), QStringLiteral("docs"));
184 }
185
cloud() const186 Cloud Document::cloud() const
187 {
188 return Cloud(firstElementByTagNameNS(QString(), QStringLiteral("cloud")));
189 }
190
ttl() const191 int Document::ttl() const
192 {
193 bool ok;
194 int c;
195
196 QString text = extractElementTextNS(QString(), QStringLiteral("ttl"));
197 c = text.toInt(&ok);
198 return ok ? c : 0;
199 }
200
image() const201 Image Document::image() const
202 {
203 return Image(firstElementByTagNameNS(QString(), QStringLiteral("image")));
204 }
205
textInput() const206 TextInput Document::textInput() const
207 {
208 TextInput ti = TextInput(firstElementByTagNameNS(QString(), QStringLiteral("textInput")));
209
210 if (!ti.isNull()) {
211 return ti;
212 }
213
214 // Netscape's version of RSS 0.91 has textinput, not textInput
215 return TextInput(firstElementByTagNameNS(QString(), QStringLiteral("textinput")));
216 }
217
skipHours() const218 QSet<int> Document::skipHours() const
219 {
220 QSet<int> skipHours;
221 QDomElement skipHoursNode = firstElementByTagNameNS(QString(), QStringLiteral("skipHours"));
222 if (!skipHoursNode.isNull()) {
223 ElementWrapper skipHoursWrapper(skipHoursNode);
224 bool ok = false;
225 QList<QDomElement> hours = skipHoursWrapper.elementsByTagNameNS(QString(), QStringLiteral("hour"));
226 QList<QDomElement>::ConstIterator it = hours.constBegin();
227 const QList<QDomElement>::ConstIterator end(hours.constEnd());
228 for (; it != end; ++it) {
229 int h = (*it).text().toInt(&ok);
230 if (ok) {
231 skipHours.insert(h);
232 }
233 }
234 }
235
236 return skipHours;
237 }
238
skipDays() const239 QSet<Document::DayOfWeek> Document::skipDays() const
240 {
241 QSet<DayOfWeek> skipDays;
242 QDomElement skipDaysNode = firstElementByTagNameNS(QString(), QStringLiteral("skipDays"));
243 if (!skipDaysNode.isNull()) {
244 ElementWrapper skipDaysWrapper(skipDaysNode);
245 QHash<QString, DayOfWeek> weekDays;
246
247 weekDays[QStringLiteral("Monday")] = Monday;
248 weekDays[QStringLiteral("Tuesday")] = Tuesday;
249 weekDays[QStringLiteral("Wednesday")] = Wednesday;
250 weekDays[QStringLiteral("Thursday")] = Thursday;
251 weekDays[QStringLiteral("Friday")] = Friday;
252 weekDays[QStringLiteral("Saturday")] = Saturday;
253 weekDays[QStringLiteral("Sunday")] = Sunday;
254
255 QList<QDomElement> days = skipDaysWrapper.elementsByTagNameNS(QString(), QStringLiteral("day"));
256 const QList<QDomElement>::ConstIterator daysEnd(days.constEnd());
257 for (QList<QDomElement>::ConstIterator it = days.constBegin(); it != daysEnd; ++it) {
258 if (weekDays.contains((*it).text())) {
259 skipDays.insert(weekDays[(*it).text()]);
260 }
261 }
262 }
263
264 return skipDays;
265 }
266
items() const267 QList<Item> Document::items() const
268 {
269 QList<Item> items;
270
271 QList<QDomElement> itemNodes = elementsByTagNameNS(QString(), QStringLiteral("item"));
272
273 DocumentPtr doccpy(new Document(*this));
274 items.reserve(itemNodes.count());
275
276 const QList<QDomElement>::ConstIterator end(itemNodes.constEnd());
277 for (QList<QDomElement>::ConstIterator it = itemNodes.constBegin(); it != end; ++it) {
278 items.append(Item(*it, doccpy));
279 }
280
281 return items;
282 }
unhandledElements() const283 QList<QDomElement> Document::unhandledElements() const
284 {
285 // TODO: do not hardcode this list here
286 static std::vector<ElementType> handled; // QVector would require a default ctor, and ElementType is too big for QList
287 if (handled.empty()) {
288 handled.reserve(22);
289 handled.push_back(ElementType(QStringLiteral("title")));
290 handled.push_back(ElementType(QStringLiteral("link")));
291 handled.push_back(ElementType(QStringLiteral("description")));
292 handled.push_back(ElementType(QStringLiteral("language")));
293 handled.push_back(ElementType(QStringLiteral("copyright")));
294 handled.push_back(ElementType(QStringLiteral("managingEditor")));
295 handled.push_back(ElementType(QStringLiteral("webMaster")));
296 handled.push_back(ElementType(QStringLiteral("pubDate")));
297 handled.push_back(ElementType(QStringLiteral("lastBuildDate")));
298 handled.push_back(ElementType(QStringLiteral("skipDays")));
299 handled.push_back(ElementType(QStringLiteral("skipHours")));
300 handled.push_back(ElementType(QStringLiteral("item")));
301 handled.push_back(ElementType(QStringLiteral("textinput")));
302 handled.push_back(ElementType(QStringLiteral("textInput")));
303 handled.push_back(ElementType(QStringLiteral("image")));
304 handled.push_back(ElementType(QStringLiteral("ttl")));
305 handled.push_back(ElementType(QStringLiteral("generator")));
306 handled.push_back(ElementType(QStringLiteral("docs")));
307 handled.push_back(ElementType(QStringLiteral("cloud")));
308 handled.push_back(ElementType(QStringLiteral("language"), dublinCoreNamespace()));
309 handled.push_back(ElementType(QStringLiteral("rights"), dublinCoreNamespace()));
310 handled.push_back(ElementType(QStringLiteral("date"), dublinCoreNamespace()));
311 }
312
313 QList<QDomElement> notHandled;
314
315 QDomNodeList children = element().childNodes();
316 const int numChildren = children.size();
317 for (int i = 0; i < numChildren; ++i) {
318 QDomElement el = children.at(i).toElement();
319 if (!el.isNull() //
320 && std::find(handled.cbegin(), handled.cend(), ElementType(el.localName(), el.namespaceURI())) == handled.cend()) {
321 notHandled.append(el);
322 }
323 }
324
325 return notHandled;
326 }
327
debugInfo() const328 QString Document::debugInfo() const
329 {
330 QString info;
331 info += QLatin1String("### Document: ###################\n");
332 if (!title().isNull()) {
333 info += QLatin1String("title: #") + title() + QLatin1String("#\n");
334 }
335 if (!description().isNull()) {
336 info += QLatin1String("description: #") + description() + QLatin1String("#\n");
337 }
338 if (!link().isNull()) {
339 info += QLatin1String("link: #") + link() + QLatin1String("#\n");
340 }
341 if (!language().isNull()) {
342 info += QLatin1String("language: #") + language() + QLatin1String("#\n");
343 }
344 if (!copyright().isNull()) {
345 info += QLatin1String("copyright: #") + copyright() + QLatin1String("#\n");
346 }
347 if (!managingEditor().isNull()) {
348 info += QLatin1String("managingEditor: #") + managingEditor() + QLatin1String("#\n");
349 }
350 if (!webMaster().isNull()) {
351 info += QLatin1String("webMaster: #") + webMaster() + QLatin1String("#\n");
352 }
353
354 QString dpubdate = dateTimeToString(pubDate());
355 if (!dpubdate.isNull()) {
356 info += QLatin1String("pubDate: #") + dpubdate + QLatin1String("#\n");
357 }
358
359 QString dlastbuilddate = dateTimeToString(lastBuildDate());
360 if (!dlastbuilddate.isNull()) {
361 info += QLatin1String("lastBuildDate: #") + dlastbuilddate + QLatin1String("#\n");
362 }
363
364 if (!textInput().isNull()) {
365 info += textInput().debugInfo();
366 }
367 if (!cloud().isNull()) {
368 info += cloud().debugInfo();
369 }
370 if (!image().isNull()) {
371 info += image().debugInfo();
372 }
373
374 QList<Category> cats = categories();
375
376 const QList<Category>::ConstIterator end(cats.constEnd());
377 for (QList<Category>::ConstIterator it = cats.constBegin(); it != end; ++it) {
378 info += (*it).debugInfo();
379 }
380 QList<Item> litems = items();
381 const QList<Item>::ConstIterator itEnd(litems.constEnd());
382 for (QList<Item>::ConstIterator it = litems.constBegin(); it != itEnd; ++it) {
383 info += (*it).debugInfo();
384 }
385 info += QLatin1String("### Document end ################\n");
386 return info;
387 }
388
getItemTitleFormatInfo(bool * isCDATA,bool * containsMarkup) const389 void Document::getItemTitleFormatInfo(bool *isCDATA, bool *containsMarkup) const
390 {
391 if (!d->itemTitlesGuessed) {
392 QString titles;
393 QList<Item> litems = items();
394
395 if (litems.isEmpty()) {
396 d->itemTitlesGuessed = true;
397 return;
398 }
399
400 QDomElement titleEl = (*litems.begin()).firstElementByTagNameNS(QString(), QStringLiteral("title"));
401 d->itemTitleIsCDATA = titleEl.firstChild().isCDATASection();
402
403 int nmax = litems.size() < 10 ? litems.size() : 10; // we check a maximum of 10 items
404 int i = 0;
405
406 QList<Item>::ConstIterator it = litems.constBegin();
407
408 while (i < nmax) {
409 titles += (*it).originalTitle();
410 ++it;
411 ++i;
412 }
413
414 d->itemTitleContainsMarkup = stringContainsMarkup(titles);
415 d->itemTitlesGuessed = true;
416 }
417
418 if (isCDATA != nullptr) {
419 *isCDATA = d->itemTitleIsCDATA;
420 }
421 if (containsMarkup != nullptr) {
422 *containsMarkup = d->itemTitleContainsMarkup;
423 }
424 }
425
getItemDescriptionFormatInfo(bool * isCDATA,bool * containsMarkup) const426 void Document::getItemDescriptionFormatInfo(bool *isCDATA, bool *containsMarkup) const
427 {
428 if (!d->itemDescGuessed) {
429 QString desc;
430 QList<Item> litems = items();
431
432 if (litems.isEmpty()) {
433 d->itemDescGuessed = true;
434 return;
435 }
436
437 QDomElement descEl = (*litems.begin()).firstElementByTagNameNS(QString(), QStringLiteral("description"));
438 d->itemDescriptionIsCDATA = descEl.firstChild().isCDATASection();
439
440 int nmax = litems.size() < 10 ? litems.size() : 10; // we check a maximum of 10 items
441 int i = 0;
442
443 QList<Item>::ConstIterator it = litems.constBegin();
444
445 while (i < nmax) {
446 desc += (*it).originalDescription();
447 ++it;
448 ++i;
449 }
450
451 d->itemDescriptionContainsMarkup = stringContainsMarkup(desc);
452 d->itemDescGuessed = true;
453 }
454
455 if (isCDATA != nullptr) {
456 *isCDATA = d->itemDescriptionIsCDATA;
457 }
458 if (containsMarkup != nullptr) {
459 *containsMarkup = d->itemDescriptionContainsMarkup;
460 }
461 }
462
accept(DocumentVisitor * visitor)463 bool Document::accept(DocumentVisitor *visitor)
464 {
465 return visitor->visitRSS2Document(this);
466 }
467
468 } // namespace RSS2
469 } // namespace Syndication
470