1 #include "rssfeed.h"
2
3 #include <algorithm>
4 #include <cerrno>
5 #include <cinttypes>
6 #include <cstring>
7 #include <curl/curl.h>
8 #include <functional>
9 #include <iostream>
10 #include <langinfo.h>
11 #include <sstream>
12 #include <sys/utsname.h>
13 #include <string.h>
14 #include <time.h>
15
16 #include "cache.h"
17 #include "config.h"
18 #include "configcontainer.h"
19 #include "confighandlerexception.h"
20 #include "dbexception.h"
21 #include "htmlrenderer.h"
22 #include "logger.h"
23 #include "scopemeasure.h"
24 #include "strprintf.h"
25 #include "tagsouppullparser.h"
26 #include "utils.h"
27
28 namespace newsboat {
29
RssFeed(Cache * c)30 RssFeed::RssFeed(Cache* c)
31 : pubDate_(0)
32 , ch(c)
33 , search_feed(false)
34 , is_rtl_(false)
35 , idx(0)
36 , order(0)
37 , status_(DlStatus::SUCCESS)
38 {
39 }
40
~RssFeed()41 RssFeed::~RssFeed()
42 {
43 }
44
unread_item_count() const45 unsigned int RssFeed::unread_item_count() const
46 {
47 std::lock_guard<std::mutex> lock(item_mutex);
48 return std::count_if(items_.begin(),
49 items_.end(),
50 [](const std::shared_ptr<RssItem>& item) {
51 return item->unread();
52 });
53 }
54
matches_tag(const std::string & tag)55 bool RssFeed::matches_tag(const std::string& tag)
56 {
57 return std::find_if(
58 tags_.begin(), tags_.end(), [&](const std::string& t) {
59 return tag == t;
60 }) != tags_.end();
61 }
62
get_firsttag()63 std::string RssFeed::get_firsttag()
64 {
65 for (const auto& t : tags_) {
66 if (t.substr(0, 1) != "~") {
67 return t;
68 }
69 }
70 return "";
71 }
72
get_tags() const73 std::string RssFeed::get_tags() const
74 {
75 std::string tags;
76 for (const auto& t : tags_) {
77 if (t.substr(0, 1) != "~" && t.substr(0, 1) != "!") {
78 tags.append(t);
79 tags.append(" ");
80 }
81 }
82 return tags;
83 }
84
set_tags(const std::vector<std::string> & tags)85 void RssFeed::set_tags(const std::vector<std::string>& tags)
86 {
87 tags_ = tags;
88 }
89
title() const90 std::string RssFeed::title() const
91 {
92 bool found_title = false;
93 std::string alt_title;
94 for (const auto& tag : tags_) {
95 if (tag.substr(0, 1) == "~") {
96 found_title = true;
97 alt_title = tag.substr(1, tag.length() - 1);
98 break;
99 }
100 }
101 return found_title
102 ? alt_title
103 : utils::utf8_to_locale(title_);
104 }
105
hidden() const106 bool RssFeed::hidden() const
107 {
108 return std::any_of(tags_.begin(),
109 tags_.end(),
110 [](const std::string& tag) {
111 return tag.substr(0, 1) == "!";
112 });
113 }
114
get_item_by_guid(const std::string & guid)115 std::shared_ptr<RssItem> RssFeed::get_item_by_guid(const std::string& guid)
116 {
117 std::lock_guard<std::mutex> lock(item_mutex);
118 return get_item_by_guid_unlocked(guid);
119 }
120
get_item_by_guid_unlocked(const std::string & guid)121 std::shared_ptr<RssItem> RssFeed::get_item_by_guid_unlocked(
122 const std::string& guid)
123 {
124 auto it = items_guid_map.find(guid);
125 if (it != items_guid_map.end()) {
126 return it->second;
127 }
128 LOG(Level::DEBUG,
129 "RssFeed::get_item_by_guid_unlocked: hit dummy item!");
130 LOG(Level::DEBUG,
131 "RssFeed::get_item_by_guid_unlocked: items_guid_map.size = %" PRIu64,
132 static_cast<uint64_t>(items_guid_map.size()));
133
134 // should never happen!
135 return std::shared_ptr<RssItem>(new RssItem(ch));
136 }
137
attribute_value(const std::string & attribname) const138 nonstd::optional<std::string> RssFeed::attribute_value(const std::string&
139 attribname) const
140 {
141 if (attribname == "feedtitle") {
142 return title();
143 } else if (attribname == "description") {
144 return utils::utf8_to_locale(description());
145 } else if (attribname == "feedlink") {
146 return link();
147 } else if (attribname == "feeddate") {
148 return pubDate();
149 } else if (attribname == "rssurl") {
150 return rssurl();
151 } else if (attribname == "unread_count") {
152 return std::to_string(unread_item_count());
153 } else if (attribname == "total_count") {
154 return std::to_string(items_.size());
155 } else if (attribname == "tags") {
156 return get_tags();
157 } else if (attribname == "feedindex") {
158 return std::to_string(idx);
159 }
160 return nonstd::nullopt;
161 }
162
update_items(std::vector<std::shared_ptr<RssFeed>> feeds)163 void RssFeed::update_items(std::vector<std::shared_ptr<RssFeed>> feeds)
164 {
165 std::lock_guard<std::mutex> lock(item_mutex);
166 if (query.empty()) {
167 return;
168 }
169
170 LOG(Level::DEBUG, "RssFeed::update_items: query = `%s'", query);
171
172 ScopeMeasure sm("RssFeed::update_items");
173
174 Matcher m(query);
175
176 items_.clear();
177 items_guid_map.clear();
178
179 for (const auto& feed : feeds) {
180 if (feed->is_query_feed()) {
181 // don't fetch items from other query feeds!
182 continue;
183 }
184 for (const auto& item : feed->items()) {
185 if (!item->deleted() && m.matches(item.get())) {
186 LOG(Level::DEBUG, "RssFeed::update_items: Matcher matches!");
187 item->set_feedptr(feed);
188 items_.push_back(item);
189 items_guid_map[item->guid()] = item;
190 }
191 }
192 }
193
194 sm.stopover("matching");
195
196 std::sort(items_.begin(), items_.end());
197
198 sm.stopover("sorting");
199 }
200
set_rssurl(const std::string & u)201 void RssFeed::set_rssurl(const std::string& u)
202 {
203 rssurl_ = u;
204 if (utils::is_query_url(u)) {
205 /* Query string looks like this:
206 *
207 * query:Title:unread = "yes" and age between 0:7
208 *
209 * So we split by colons to get title and the query itself. */
210 const auto tokens = utils::tokenize(u, ":");
211
212 if (tokens.size() < 3) {
213 throw _s("too few arguments");
214 }
215
216 /* "Between" operator requires a range, which contains a colon.
217 * Since we've been tokenizing by colon, we might've
218 * inadertently split the query itself. Let's reconstruct it! */
219 auto query = tokens[2];
220 for (auto it = tokens.begin() + 3; it != tokens.end(); ++it) {
221 query += ":";
222 query += *it;
223 }
224 // Have to check if the result is a valid query, just in case
225 Matcher m;
226 if (!m.parse(query)) {
227 throw strprintf::fmt(
228 _("`%s' is not a valid filter expression"),
229 query);
230 }
231
232 LOG(Level::DEBUG,
233 "RssFeed::set_rssurl: query name = `%s' expr = `%s'",
234 tokens[1],
235 query);
236
237 set_title(tokens[1]);
238 set_query(query);
239 }
240 }
241
sort(const ArticleSortStrategy & sort_strategy)242 void RssFeed::sort(const ArticleSortStrategy& sort_strategy)
243 {
244 std::lock_guard<std::mutex> lock(item_mutex);
245 sort_unlocked(sort_strategy);
246 }
247
sort_unlocked(const ArticleSortStrategy & sort_strategy)248 void RssFeed::sort_unlocked(const ArticleSortStrategy& sort_strategy)
249 {
250 switch (sort_strategy.sm) {
251 case ArtSortMethod::TITLE:
252 std::stable_sort(items_.begin(),
253 items_.end(),
254 [&](const std::shared_ptr<RssItem>& a,
255 const std::shared_ptr<RssItem>& b) {
256 const auto cmp = utils::strnaturalcmp(utils::utf8_to_locale(a->title()),
257 utils::utf8_to_locale(b->title()));
258 return sort_strategy.sd == SortDirection::DESC ? (cmp > 0) : (cmp < 0);
259 });
260 break;
261 case ArtSortMethod::FLAGS:
262 std::stable_sort(items_.begin(),
263 items_.end(),
264 [&](const std::shared_ptr<RssItem>& a,
265 const std::shared_ptr<RssItem>& b) {
266 return sort_strategy.sd ==
267 SortDirection::DESC
268 ? (strcmp(a->flags().c_str(),
269 b->flags().c_str()) > 0)
270 : (strcmp(a->flags().c_str(),
271 b->flags().c_str()) < 0);
272 });
273 break;
274 case ArtSortMethod::AUTHOR:
275 std::stable_sort(items_.begin(),
276 items_.end(),
277 [&](const std::shared_ptr<RssItem>& a,
278 const std::shared_ptr<RssItem>& b) {
279 const auto author_a = utils::utf8_to_locale(a->author());
280 const auto author_b = utils::utf8_to_locale(b->author());
281 const auto cmp = strcmp(author_a.c_str(), author_b.c_str());
282 return sort_strategy.sd == SortDirection::DESC ? (cmp > 0) : (cmp < 0);
283 });
284 break;
285 case ArtSortMethod::LINK:
286 std::stable_sort(items_.begin(),
287 items_.end(),
288 [&](const std::shared_ptr<RssItem>& a,
289 const std::shared_ptr<RssItem>& b) {
290 return sort_strategy.sd ==
291 SortDirection::DESC
292 ? (strcmp(a->link().c_str(),
293 b->link().c_str()) > 0)
294 : (strcmp(a->link().c_str(),
295 b->link().c_str()) < 0);
296 });
297 break;
298 case ArtSortMethod::GUID:
299 std::stable_sort(items_.begin(),
300 items_.end(),
301 [&](const std::shared_ptr<RssItem>& a,
302 const std::shared_ptr<RssItem>& b) {
303 return sort_strategy.sd ==
304 SortDirection::DESC
305 ? (strcmp(a->guid().c_str(),
306 b->guid().c_str()) > 0)
307 : (strcmp(a->guid().c_str(),
308 b->guid().c_str()) < 0);
309 });
310 break;
311 case ArtSortMethod::DATE:
312 std::stable_sort(items_.begin(),
313 items_.end(),
314 [&](const std::shared_ptr<RssItem>& a,
315 const std::shared_ptr<RssItem>& b) {
316 // date is descending by default
317 return sort_strategy.sd == SortDirection::ASC
318 ? (a->pubDate_timestamp() >
319 b->pubDate_timestamp())
320 : (a->pubDate_timestamp() <
321 b->pubDate_timestamp());
322 });
323 break;
324 case ArtSortMethod::RANDOM:
325 std::random_shuffle(items_.begin(), items_.end());
326 break;
327 }
328 }
329
purge_deleted_items()330 void RssFeed::purge_deleted_items()
331 {
332 std::lock_guard<std::mutex> lock(item_mutex);
333 ScopeMeasure m1("RssFeed::purge_deleted_items");
334
335 // Purge in items_guid_map
336 {
337 std::lock_guard<std::mutex> lock2(items_guid_map_mutex);
338 for (const auto& item : items_) {
339 if (item->deleted()) {
340 items_guid_map.erase(item->guid());
341 }
342 }
343 }
344
345 items_.erase(std::remove_if(items_.begin(),
346 items_.end(),
347 [](const std::shared_ptr<RssItem> item) {
348 return item->deleted();
349 }),
350 items_.end());
351 }
352
set_feedptrs(std::shared_ptr<RssFeed> self)353 void RssFeed::set_feedptrs(std::shared_ptr<RssFeed> self)
354 {
355 std::lock_guard<std::mutex> lock(item_mutex);
356 for (const auto& item : items_) {
357 item->set_feedptr(self);
358 }
359 }
360
get_status()361 std::string RssFeed::get_status()
362 {
363 std::lock_guard<std::mutex> guard(status_mutex_);
364
365 switch (status_) {
366 case DlStatus::SUCCESS:
367 return " ";
368 case DlStatus::TO_BE_DOWNLOADED:
369 return "_";
370 case DlStatus::DURING_DOWNLOAD:
371 return ".";
372 case DlStatus::DL_ERROR:
373 return "x";
374 }
375 return "?";
376 }
377
unload()378 void RssFeed::unload()
379 {
380 std::lock_guard<std::mutex> lock(item_mutex);
381 for (const auto& item : items_) {
382 item->unload();
383 }
384 }
385
load()386 void RssFeed::load()
387 {
388 std::lock_guard<std::mutex> lock(item_mutex);
389 ch->fetch_descriptions(this);
390 }
391
mark_all_items_read()392 void RssFeed::mark_all_items_read()
393 {
394 std::lock_guard<std::mutex> lock(item_mutex);
395 for (const auto& item : items_) {
396 item->set_unread_nowrite(false);
397 }
398 }
399
400 } // namespace newsboat
401