1 /*
2  * Bittorrent Client using Qt and libtorrent.
3  * Copyright (C) 2015, 2017  Vladimir Golovnev <glassez@yandex.ru>
4  * Copyright (C) 2010  Christophe Dumez <chris@qbittorrent.org>
5  * Copyright (C) 2010  Arnaud Demaiziere <arnaud@qbittorrent.org>
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License
9  * as published by the Free Software Foundation; either version 2
10  * of the License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
20  *
21  * In addition, as a special exception, the copyright holders give permission to
22  * link this program with the OpenSSL project's "OpenSSL" library (or with
23  * modified versions of it that use the same license as the "OpenSSL" library),
24  * and distribute the linked executables. You must obey the GNU General Public
25  * License in all respects for all of the code used other than "OpenSSL".  If you
26  * modify file(s), you may extend this exception to your version of the file(s),
27  * but you are not obligated to do so. If you do not wish to do so, delete this
28  * exception statement from your version.
29  */
30 
31 #include "rss_feed.h"
32 
33 #include <algorithm>
34 #include <vector>
35 
36 #include <QDir>
37 #include <QJsonArray>
38 #include <QJsonDocument>
39 #include <QJsonObject>
40 #include <QJsonValue>
41 #include <QUrl>
42 
43 #include "base/asyncfilestorage.h"
44 #include "base/global.h"
45 #include "base/logger.h"
46 #include "base/net/downloadmanager.h"
47 #include "base/profile.h"
48 #include "base/utils/fs.h"
49 #include "rss_article.h"
50 #include "rss_parser.h"
51 #include "rss_session.h"
52 
53 const QString KEY_UID(QStringLiteral("uid"));
54 const QString KEY_URL(QStringLiteral("url"));
55 const QString KEY_TITLE(QStringLiteral("title"));
56 const QString KEY_LASTBUILDDATE(QStringLiteral("lastBuildDate"));
57 const QString KEY_ISLOADING(QStringLiteral("isLoading"));
58 const QString KEY_HASERROR(QStringLiteral("hasError"));
59 const QString KEY_ARTICLES(QStringLiteral("articles"));
60 
61 using namespace RSS;
62 
Feed(const QUuid & uid,const QString & url,const QString & path,Session * session)63 Feed::Feed(const QUuid &uid, const QString &url, const QString &path, Session *session)
64     : Item(path)
65     , m_session(session)
66     , m_uid(uid)
67     , m_url(url)
68 {
69     m_dataFileName = QString::fromLatin1(m_uid.toRfc4122().toHex()) + QLatin1String(".json");
70 
71     // Move to new file naming scheme (since v4.1.2)
72     const QString legacyFilename
73     {Utils::Fs::toValidFileSystemName(m_url, false, QLatin1String("_"))
74                 + QLatin1String(".json")};
75     const QDir storageDir {m_session->dataFileStorage()->storageDir()};
76     if (!QFile::exists(storageDir.absoluteFilePath(m_dataFileName)))
77         QFile::rename(storageDir.absoluteFilePath(legacyFilename), storageDir.absoluteFilePath(m_dataFileName));
78 
79     m_parser = new Private::Parser(m_lastBuildDate);
80     m_parser->moveToThread(m_session->workingThread());
81     connect(this, &Feed::destroyed, m_parser, &Private::Parser::deleteLater);
82     connect(m_parser, &Private::Parser::finished, this, &Feed::handleParsingFinished);
83 
84     connect(m_session, &Session::maxArticlesPerFeedChanged, this, &Feed::handleMaxArticlesPerFeedChanged);
85 
86     if (m_session->isProcessingEnabled())
87         downloadIcon();
88     else
89         connect(m_session, &Session::processingStateChanged, this, &Feed::handleSessionProcessingEnabledChanged);
90 
91     Net::DownloadManager::instance()->registerSequentialService(Net::ServiceID::fromURL(m_url));
92 
93     load();
94 }
95 
~Feed()96 Feed::~Feed()
97 {
98     emit aboutToBeDestroyed(this);
99     Utils::Fs::forceRemove(m_iconPath);
100 }
101 
articles() const102 QList<Article *> Feed::articles() const
103 {
104     return m_articlesByDate;
105 }
106 
markAsRead()107 void Feed::markAsRead()
108 {
109     const int oldUnreadCount = m_unreadCount;
110     for (Article *article : asConst(m_articles))
111     {
112         if (!article->isRead())
113         {
114             article->disconnect(this);
115             article->markAsRead();
116             --m_unreadCount;
117             emit articleRead(article);
118         }
119     }
120 
121     if (m_unreadCount != oldUnreadCount)
122     {
123         m_dirty = true;
124         store();
125         emit unreadCountChanged(this);
126     }
127 }
128 
refresh()129 void Feed::refresh()
130 {
131     if (m_downloadHandler)
132         m_downloadHandler->cancel();
133 
134     // NOTE: Should we allow manually refreshing for disabled session?
135 
136     m_downloadHandler = Net::DownloadManager::instance()->download(m_url);
137     connect(m_downloadHandler, &Net::DownloadHandler::finished, this, &Feed::handleDownloadFinished);
138 
139     m_isLoading = true;
140     emit stateChanged(this);
141 }
142 
uid() const143 QUuid Feed::uid() const
144 {
145     return m_uid;
146 }
147 
url() const148 QString Feed::url() const
149 {
150     return m_url;
151 }
152 
title() const153 QString Feed::title() const
154 {
155     return m_title;
156 }
157 
isLoading() const158 bool Feed::isLoading() const
159 {
160     return m_isLoading;
161 }
162 
lastBuildDate() const163 QString Feed::lastBuildDate() const
164 {
165     return m_lastBuildDate;
166 }
167 
unreadCount() const168 int Feed::unreadCount() const
169 {
170     return m_unreadCount;
171 }
172 
articleByGUID(const QString & guid) const173 Article *Feed::articleByGUID(const QString &guid) const
174 {
175     return m_articles.value(guid);
176 }
177 
handleMaxArticlesPerFeedChanged(const int n)178 void Feed::handleMaxArticlesPerFeedChanged(const int n)
179 {
180     while (m_articlesByDate.size() > n)
181         removeOldestArticle();
182     // We don't need store articles here
183 }
184 
handleIconDownloadFinished(const Net::DownloadResult & result)185 void Feed::handleIconDownloadFinished(const Net::DownloadResult &result)
186 {
187     if (result.status == Net::DownloadStatus::Success)
188     {
189         m_iconPath = Utils::Fs::toUniformPath(result.filePath);
190         emit iconLoaded(this);
191     }
192 }
193 
hasError() const194 bool Feed::hasError() const
195 {
196     return m_hasError;
197 }
198 
handleDownloadFinished(const Net::DownloadResult & result)199 void Feed::handleDownloadFinished(const Net::DownloadResult &result)
200 {
201     m_downloadHandler = nullptr; // will be deleted by DownloadManager later
202 
203     if (result.status == Net::DownloadStatus::Success)
204     {
205         LogMsg(tr("RSS feed at '%1' is successfully downloaded. Starting to parse it.")
206                 .arg(result.url));
207         // Parse the download RSS
208         m_parser->parse(result.data);
209     }
210     else
211     {
212         m_isLoading = false;
213         m_hasError = true;
214 
215         LogMsg(tr("Failed to download RSS feed at '%1'. Reason: %2")
216                .arg(result.url, result.errorString), Log::WARNING);
217 
218         emit stateChanged(this);
219     }
220 }
221 
handleParsingFinished(const RSS::Private::ParsingResult & result)222 void Feed::handleParsingFinished(const RSS::Private::ParsingResult &result)
223 {
224     m_hasError = !result.error.isEmpty();
225 
226     if (!result.title.isEmpty() && (title() != result.title))
227     {
228         m_title = result.title;
229         m_dirty = true;
230         emit titleChanged(this);
231     }
232 
233     if (!result.lastBuildDate.isEmpty())
234     {
235         m_lastBuildDate = result.lastBuildDate;
236         m_dirty = true;
237     }
238 
239     // For some reason, the RSS feed may contain malformed XML data and it may not be
240     // successfully parsed by the XML parser. We are still trying to load as many articles
241     // as possible until we encounter corrupted data. So we can have some articles here
242     // even in case of parsing error.
243     const int newArticlesCount = updateArticles(result.articles);
244     store();
245 
246     if (m_hasError)
247     {
248         LogMsg(tr("Failed to parse RSS feed at '%1'. Reason: %2").arg(m_url, result.error)
249                , Log::WARNING);
250     }
251     LogMsg(tr("RSS feed at '%1' updated. Added %2 new articles.")
252            .arg(url(), QString::number(newArticlesCount)));
253 
254     m_isLoading = false;
255     emit stateChanged(this);
256 }
257 
load()258 void Feed::load()
259 {
260     QFile file(m_session->dataFileStorage()->storageDir().absoluteFilePath(m_dataFileName));
261 
262     if (!file.exists())
263     {
264         loadArticlesLegacy();
265         m_dirty = true;
266         store(); // convert to new format
267     }
268     else if (file.open(QFile::ReadOnly))
269     {
270         loadArticles(file.readAll());
271         file.close();
272     }
273     else
274     {
275         LogMsg(tr("Couldn't read RSS Session data from %1. Error: %2")
276                .arg(m_dataFileName, file.errorString())
277                , Log::WARNING);
278     }
279 }
280 
loadArticles(const QByteArray & data)281 void Feed::loadArticles(const QByteArray &data)
282 {
283     QJsonParseError jsonError;
284     const QJsonDocument jsonDoc = QJsonDocument::fromJson(data, &jsonError);
285     if (jsonError.error != QJsonParseError::NoError)
286     {
287         LogMsg(tr("Couldn't parse RSS Session data. Error: %1").arg(jsonError.errorString())
288                , Log::WARNING);
289         return;
290     }
291 
292     if (!jsonDoc.isArray())
293     {
294         LogMsg(tr("Couldn't load RSS Session data. Invalid data format."), Log::WARNING);
295         return;
296     }
297 
298     const QJsonArray jsonArr = jsonDoc.array();
299     int i = -1;
300     for (const QJsonValue &jsonVal : jsonArr)
301     {
302         ++i;
303         if (!jsonVal.isObject())
304         {
305             LogMsg(tr("Couldn't load RSS article '%1#%2'. Invalid data format.").arg(m_url).arg(i)
306                    , Log::WARNING);
307             continue;
308         }
309 
310         try
311         {
312             auto article = new Article(this, jsonVal.toObject());
313             if (!addArticle(article))
314                 delete article;
315         }
316         catch (const RuntimeError &) {}
317     }
318 }
319 
loadArticlesLegacy()320 void Feed::loadArticlesLegacy()
321 {
322     const SettingsPtr qBTRSSFeeds = Profile::instance()->applicationSettings(QStringLiteral("qBittorrent-rss-feeds"));
323     const QVariantHash allOldItems = qBTRSSFeeds->value("old_items").toHash();
324 
325     for (const QVariant &var : asConst(allOldItems.value(m_url).toList()))
326     {
327         auto hash = var.toHash();
328         // update legacy keys
329         hash[Article::KeyLink] = hash.take(QLatin1String("news_link"));
330         hash[Article::KeyTorrentURL] = hash.take(QLatin1String("torrent_url"));
331         hash[Article::KeyIsRead] = hash.take(QLatin1String("read"));
332         try
333         {
334             auto article = new Article(this, hash);
335             if (!addArticle(article))
336                 delete article;
337         }
338         catch (const RuntimeError &) {}
339     }
340 }
341 
store()342 void Feed::store()
343 {
344     if (!m_dirty) return;
345 
346     m_dirty = false;
347     m_savingTimer.stop();
348 
349     QJsonArray jsonArr;
350     for (Article *article :asConst(m_articles))
351         jsonArr << article->toJsonObject();
352 
353     m_session->dataFileStorage()->store(m_dataFileName, QJsonDocument(jsonArr).toJson());
354 }
355 
storeDeferred()356 void Feed::storeDeferred()
357 {
358     if (!m_savingTimer.isActive())
359         m_savingTimer.start(5 * 1000, this);
360 }
361 
addArticle(Article * article)362 bool Feed::addArticle(Article *article)
363 {
364     Q_ASSERT(article);
365     Q_ASSERT(!m_articles.contains(article->guid()));
366 
367     // Insertion sort
368     const int maxArticles = m_session->maxArticlesPerFeed();
369     const auto lowerBound = std::lower_bound(m_articlesByDate.begin(), m_articlesByDate.end()
370                                        , article->date(), Article::articleDateRecentThan);
371     if ((lowerBound - m_articlesByDate.begin()) >= maxArticles)
372         return false; // we reach max articles
373 
374     m_articles[article->guid()] = article;
375     m_articlesByDate.insert(lowerBound, article);
376     if (!article->isRead())
377     {
378         increaseUnreadCount();
379         connect(article, &Article::read, this, &Feed::handleArticleRead);
380     }
381 
382     m_dirty = true;
383     emit newArticle(article);
384 
385     if (m_articlesByDate.size() > maxArticles)
386         removeOldestArticle();
387 
388     return true;
389 }
390 
removeOldestArticle()391 void Feed::removeOldestArticle()
392 {
393     auto oldestArticle = m_articlesByDate.last();
394     emit articleAboutToBeRemoved(oldestArticle);
395 
396     m_articles.remove(oldestArticle->guid());
397     m_articlesByDate.removeLast();
398     const bool isRead = oldestArticle->isRead();
399     delete oldestArticle;
400 
401     if (!isRead)
402         decreaseUnreadCount();
403 }
404 
increaseUnreadCount()405 void Feed::increaseUnreadCount()
406 {
407     ++m_unreadCount;
408     emit unreadCountChanged(this);
409 }
410 
decreaseUnreadCount()411 void Feed::decreaseUnreadCount()
412 {
413     Q_ASSERT(m_unreadCount > 0);
414 
415     --m_unreadCount;
416     emit unreadCountChanged(this);
417 }
418 
downloadIcon()419 void Feed::downloadIcon()
420 {
421     // Download the RSS Feed icon
422     // XXX: This works for most sites but it is not perfect
423     const QUrl url(m_url);
424     const auto iconUrl = QString::fromLatin1("%1://%2/favicon.ico").arg(url.scheme(), url.host());
425     Net::DownloadManager::instance()->download(
426             Net::DownloadRequest(iconUrl).saveToFile(true)
427                 , this, &Feed::handleIconDownloadFinished);
428 }
429 
updateArticles(const QList<QVariantHash> & loadedArticles)430 int Feed::updateArticles(const QList<QVariantHash> &loadedArticles)
431 {
432     if (loadedArticles.empty())
433         return 0;
434 
435     QDateTime dummyPubDate {QDateTime::currentDateTime()};
436     QVector<QVariantHash> newArticles;
437     newArticles.reserve(loadedArticles.size());
438     for (QVariantHash article : loadedArticles)
439     {
440         // If article has no publication date we use feed update time as a fallback.
441         // To prevent processing of "out-of-limit" articles we must not assign dates
442         // that are earlier than the dates of existing articles.
443         const Article *existingArticle = articleByGUID(article[Article::KeyId].toString());
444         if (existingArticle)
445         {
446             dummyPubDate = existingArticle->date().addMSecs(-1);
447             continue;
448         }
449 
450         QVariant &articleDate = article[Article::KeyDate];
451         if (!articleDate.toDateTime().isValid())
452             articleDate = dummyPubDate;
453 
454         newArticles.append(article);
455     }
456 
457     if (newArticles.empty())
458         return 0;
459 
460     using ArticleSortAdaptor = QPair<QDateTime, const QVariantHash *>;
461     std::vector<ArticleSortAdaptor> sortData;
462     const QList<Article *> existingArticles = articles();
463     sortData.reserve(existingArticles.size() + newArticles.size());
464     std::transform(existingArticles.begin(), existingArticles.end(), std::back_inserter(sortData)
465                    , [](const Article *article)
466     {
467         return qMakePair(article->date(), nullptr);
468     });
469     std::transform(newArticles.begin(), newArticles.end(), std::back_inserter(sortData)
470                    , [](const QVariantHash &article)
471     {
472         return qMakePair(article[Article::KeyDate].toDateTime(), &article);
473     });
474 
475     // Sort article list in reverse chronological order
476     std::sort(sortData.begin(), sortData.end()
477               , [](const ArticleSortAdaptor &a1, const ArticleSortAdaptor &a2)
478     {
479         return (a1.first > a2.first);
480     });
481 
482     if (sortData.size() > static_cast<uint>(m_session->maxArticlesPerFeed()))
483         sortData.resize(m_session->maxArticlesPerFeed());
484 
485     int newArticlesCount = 0;
486     std::for_each(sortData.crbegin(), sortData.crend(), [this, &newArticlesCount](const ArticleSortAdaptor &a)
487     {
488         if (a.second)
489         {
490             addArticle(new Article {this, *a.second});
491             ++newArticlesCount;
492         }
493     });
494 
495     return newArticlesCount;
496 }
497 
iconPath() const498 QString Feed::iconPath() const
499 {
500     return m_iconPath;
501 }
502 
toJsonValue(const bool withData) const503 QJsonValue Feed::toJsonValue(const bool withData) const
504 {
505     QJsonObject jsonObj;
506     jsonObj.insert(KEY_UID, uid().toString());
507     jsonObj.insert(KEY_URL, url());
508 
509     if (withData)
510     {
511         jsonObj.insert(KEY_TITLE, title());
512         jsonObj.insert(KEY_LASTBUILDDATE, lastBuildDate());
513         jsonObj.insert(KEY_ISLOADING, isLoading());
514         jsonObj.insert(KEY_HASERROR, hasError());
515 
516         QJsonArray jsonArr;
517         for (Article *article : asConst(m_articles))
518             jsonArr << article->toJsonObject();
519         jsonObj.insert(KEY_ARTICLES, jsonArr);
520     }
521 
522     return jsonObj;
523 }
524 
handleSessionProcessingEnabledChanged(const bool enabled)525 void Feed::handleSessionProcessingEnabledChanged(const bool enabled)
526 {
527     if (enabled)
528     {
529         downloadIcon();
530         disconnect(m_session, &Session::processingStateChanged
531                    , this, &Feed::handleSessionProcessingEnabledChanged);
532     }
533 }
534 
handleArticleRead(Article * article)535 void Feed::handleArticleRead(Article *article)
536 {
537     article->disconnect(this);
538     decreaseUnreadCount();
539     emit articleRead(article);
540     // will be stored deferred
541     m_dirty = true;
542     storeDeferred();
543 }
544 
cleanup()545 void Feed::cleanup()
546 {
547     Utils::Fs::forceRemove(m_session->dataFileStorage()->storageDir().absoluteFilePath(m_dataFileName));
548 }
549 
timerEvent(QTimerEvent * event)550 void Feed::timerEvent(QTimerEvent *event)
551 {
552     Q_UNUSED(event);
553     store();
554 }
555