1 #include "rssfeed.h"
2 
3 #include <algorithm>
4 #include <cerrno>
5 #include <cinttypes>
6 #include <cstring>
7 #include <curl/curl.h>
8 #include <functional>
9 #include <iostream>
10 #include <langinfo.h>
11 #include <sstream>
12 #include <sys/utsname.h>
13 #include <string.h>
14 #include <time.h>
15 
16 #include "cache.h"
17 #include "config.h"
18 #include "configcontainer.h"
19 #include "confighandlerexception.h"
20 #include "dbexception.h"
21 #include "htmlrenderer.h"
22 #include "logger.h"
23 #include "scopemeasure.h"
24 #include "strprintf.h"
25 #include "tagsouppullparser.h"
26 #include "utils.h"
27 
28 namespace newsboat {
29 
RssFeed(Cache * c)30 RssFeed::RssFeed(Cache* c)
31 	: pubDate_(0)
32 	, ch(c)
33 	, search_feed(false)
34 	, is_rtl_(false)
35 	, idx(0)
36 	, order(0)
37 	, status_(DlStatus::SUCCESS)
38 {
39 }
40 
~RssFeed()41 RssFeed::~RssFeed()
42 {
43 }
44 
unread_item_count() const45 unsigned int RssFeed::unread_item_count() const
46 {
47 	std::lock_guard<std::mutex> lock(item_mutex);
48 	return std::count_if(items_.begin(),
49 			items_.end(),
50 	[](const std::shared_ptr<RssItem>& item) {
51 		return item->unread();
52 	});
53 }
54 
matches_tag(const std::string & tag)55 bool RssFeed::matches_tag(const std::string& tag)
56 {
57 	return std::find_if(
58 	tags_.begin(), tags_.end(), [&](const std::string& t) {
59 		return tag == t;
60 	}) != tags_.end();
61 }
62 
get_firsttag()63 std::string RssFeed::get_firsttag()
64 {
65 	for (const auto& t : tags_) {
66 		if (t.substr(0, 1) != "~") {
67 			return t;
68 		}
69 	}
70 	return "";
71 }
72 
get_tags() const73 std::string RssFeed::get_tags() const
74 {
75 	std::string tags;
76 	for (const auto& t : tags_) {
77 		if (t.substr(0, 1) != "~" && t.substr(0, 1) != "!") {
78 			tags.append(t);
79 			tags.append(" ");
80 		}
81 	}
82 	return tags;
83 }
84 
set_tags(const std::vector<std::string> & tags)85 void RssFeed::set_tags(const std::vector<std::string>& tags)
86 {
87 	tags_ = tags;
88 }
89 
title() const90 std::string RssFeed::title() const
91 {
92 	bool found_title = false;
93 	std::string alt_title;
94 	for (const auto& tag : tags_) {
95 		if (tag.substr(0, 1) == "~") {
96 			found_title = true;
97 			alt_title = tag.substr(1, tag.length() - 1);
98 			break;
99 		}
100 	}
101 	return found_title
102 		? alt_title
103 		: utils::utf8_to_locale(title_);
104 }
105 
hidden() const106 bool RssFeed::hidden() const
107 {
108 	return std::any_of(tags_.begin(),
109 			tags_.end(),
110 	[](const std::string& tag) {
111 		return tag.substr(0, 1) == "!";
112 	});
113 }
114 
get_item_by_guid(const std::string & guid)115 std::shared_ptr<RssItem> RssFeed::get_item_by_guid(const std::string& guid)
116 {
117 	std::lock_guard<std::mutex> lock(item_mutex);
118 	return get_item_by_guid_unlocked(guid);
119 }
120 
get_item_by_guid_unlocked(const std::string & guid)121 std::shared_ptr<RssItem> RssFeed::get_item_by_guid_unlocked(
122 	const std::string& guid)
123 {
124 	auto it = items_guid_map.find(guid);
125 	if (it != items_guid_map.end()) {
126 		return it->second;
127 	}
128 	LOG(Level::DEBUG,
129 		"RssFeed::get_item_by_guid_unlocked: hit dummy item!");
130 	LOG(Level::DEBUG,
131 		"RssFeed::get_item_by_guid_unlocked: items_guid_map.size = %" PRIu64,
132 		static_cast<uint64_t>(items_guid_map.size()));
133 
134 	// should never happen!
135 	return std::shared_ptr<RssItem>(new RssItem(ch));
136 }
137 
attribute_value(const std::string & attribname) const138 nonstd::optional<std::string> RssFeed::attribute_value(const std::string&
139 	attribname) const
140 {
141 	if (attribname == "feedtitle") {
142 		return title();
143 	} else if (attribname == "description") {
144 		return utils::utf8_to_locale(description());
145 	} else if (attribname == "feedlink") {
146 		return link();
147 	} else if (attribname == "feeddate") {
148 		return pubDate();
149 	} else if (attribname == "rssurl") {
150 		return rssurl();
151 	} else if (attribname == "unread_count") {
152 		return std::to_string(unread_item_count());
153 	} else if (attribname == "total_count") {
154 		return std::to_string(items_.size());
155 	} else if (attribname == "tags") {
156 		return get_tags();
157 	} else if (attribname == "feedindex") {
158 		return std::to_string(idx);
159 	}
160 	return nonstd::nullopt;
161 }
162 
update_items(std::vector<std::shared_ptr<RssFeed>> feeds)163 void RssFeed::update_items(std::vector<std::shared_ptr<RssFeed>> feeds)
164 {
165 	std::lock_guard<std::mutex> lock(item_mutex);
166 	if (query.empty()) {
167 		return;
168 	}
169 
170 	LOG(Level::DEBUG, "RssFeed::update_items: query = `%s'", query);
171 
172 	ScopeMeasure sm("RssFeed::update_items");
173 
174 	Matcher m(query);
175 
176 	items_.clear();
177 	items_guid_map.clear();
178 
179 	for (const auto& feed : feeds) {
180 		if (feed->is_query_feed()) {
181 			// don't fetch items from other query feeds!
182 			continue;
183 		}
184 		for (const auto& item : feed->items()) {
185 			if (!item->deleted() && m.matches(item.get())) {
186 				LOG(Level::DEBUG, "RssFeed::update_items: Matcher matches!");
187 				item->set_feedptr(feed);
188 				items_.push_back(item);
189 				items_guid_map[item->guid()] = item;
190 			}
191 		}
192 	}
193 
194 	sm.stopover("matching");
195 
196 	std::sort(items_.begin(), items_.end());
197 
198 	sm.stopover("sorting");
199 }
200 
set_rssurl(const std::string & u)201 void RssFeed::set_rssurl(const std::string& u)
202 {
203 	rssurl_ = u;
204 	if (utils::is_query_url(u)) {
205 		/* Query string looks like this:
206 		 *
207 		 * query:Title:unread = "yes" and age between 0:7
208 		 *
209 		 * So we split by colons to get title and the query itself. */
210 		const auto tokens = utils::tokenize(u, ":");
211 
212 		if (tokens.size() < 3) {
213 			throw _s("too few arguments");
214 		}
215 
216 		/* "Between" operator requires a range, which contains a colon.
217 		 * Since we've been tokenizing by colon, we might've
218 		 * inadertently split the query itself. Let's reconstruct it! */
219 		auto query = tokens[2];
220 		for (auto it = tokens.begin() + 3; it != tokens.end(); ++it) {
221 			query += ":";
222 			query += *it;
223 		}
224 		// Have to check if the result is a valid query, just in case
225 		Matcher m;
226 		if (!m.parse(query)) {
227 			throw strprintf::fmt(
228 				_("`%s' is not a valid filter expression"),
229 				query);
230 		}
231 
232 		LOG(Level::DEBUG,
233 			"RssFeed::set_rssurl: query name = `%s' expr = `%s'",
234 			tokens[1],
235 			query);
236 
237 		set_title(tokens[1]);
238 		set_query(query);
239 	}
240 }
241 
sort(const ArticleSortStrategy & sort_strategy)242 void RssFeed::sort(const ArticleSortStrategy& sort_strategy)
243 {
244 	std::lock_guard<std::mutex> lock(item_mutex);
245 	sort_unlocked(sort_strategy);
246 }
247 
sort_unlocked(const ArticleSortStrategy & sort_strategy)248 void RssFeed::sort_unlocked(const ArticleSortStrategy& sort_strategy)
249 {
250 	switch (sort_strategy.sm) {
251 	case ArtSortMethod::TITLE:
252 		std::stable_sort(items_.begin(),
253 			items_.end(),
254 			[&](const std::shared_ptr<RssItem>& a,
255 		const std::shared_ptr<RssItem>& b) {
256 			const auto cmp = utils::strnaturalcmp(utils::utf8_to_locale(a->title()),
257 					utils::utf8_to_locale(b->title()));
258 			return sort_strategy.sd == SortDirection::DESC ? (cmp > 0) : (cmp < 0);
259 		});
260 		break;
261 	case ArtSortMethod::FLAGS:
262 		std::stable_sort(items_.begin(),
263 			items_.end(),
264 			[&](const std::shared_ptr<RssItem>& a,
265 		const std::shared_ptr<RssItem>& b) {
266 			return sort_strategy.sd ==
267 				SortDirection::DESC
268 				? (strcmp(a->flags().c_str(),
269 						b->flags().c_str()) > 0)
270 				: (strcmp(a->flags().c_str(),
271 						b->flags().c_str()) < 0);
272 		});
273 		break;
274 	case ArtSortMethod::AUTHOR:
275 		std::stable_sort(items_.begin(),
276 			items_.end(),
277 			[&](const std::shared_ptr<RssItem>& a,
278 		const std::shared_ptr<RssItem>& b) {
279 			const auto author_a = utils::utf8_to_locale(a->author());
280 			const auto author_b = utils::utf8_to_locale(b->author());
281 			const auto cmp = strcmp(author_a.c_str(), author_b.c_str());
282 			return sort_strategy.sd == SortDirection::DESC ? (cmp > 0) : (cmp < 0);
283 		});
284 		break;
285 	case ArtSortMethod::LINK:
286 		std::stable_sort(items_.begin(),
287 			items_.end(),
288 			[&](const std::shared_ptr<RssItem>& a,
289 		const std::shared_ptr<RssItem>& b) {
290 			return sort_strategy.sd ==
291 				SortDirection::DESC
292 				? (strcmp(a->link().c_str(),
293 						b->link().c_str()) > 0)
294 				: (strcmp(a->link().c_str(),
295 						b->link().c_str()) < 0);
296 		});
297 		break;
298 	case ArtSortMethod::GUID:
299 		std::stable_sort(items_.begin(),
300 			items_.end(),
301 			[&](const std::shared_ptr<RssItem>& a,
302 		const std::shared_ptr<RssItem>& b) {
303 			return sort_strategy.sd ==
304 				SortDirection::DESC
305 				? (strcmp(a->guid().c_str(),
306 						b->guid().c_str()) > 0)
307 				: (strcmp(a->guid().c_str(),
308 						b->guid().c_str()) < 0);
309 		});
310 		break;
311 	case ArtSortMethod::DATE:
312 		std::stable_sort(items_.begin(),
313 			items_.end(),
314 			[&](const std::shared_ptr<RssItem>& a,
315 		const std::shared_ptr<RssItem>& b) {
316 			// date is descending by default
317 			return sort_strategy.sd == SortDirection::ASC
318 				? (a->pubDate_timestamp() >
319 					b->pubDate_timestamp())
320 				: (a->pubDate_timestamp() <
321 					b->pubDate_timestamp());
322 		});
323 		break;
324 	case ArtSortMethod::RANDOM:
325 		std::random_shuffle(items_.begin(), items_.end());
326 		break;
327 	}
328 }
329 
purge_deleted_items()330 void RssFeed::purge_deleted_items()
331 {
332 	std::lock_guard<std::mutex> lock(item_mutex);
333 	ScopeMeasure m1("RssFeed::purge_deleted_items");
334 
335 	// Purge in items_guid_map
336 	{
337 		std::lock_guard<std::mutex> lock2(items_guid_map_mutex);
338 		for (const auto& item : items_) {
339 			if (item->deleted()) {
340 				items_guid_map.erase(item->guid());
341 			}
342 		}
343 	}
344 
345 	items_.erase(std::remove_if(items_.begin(),
346 			items_.end(),
347 	[](const std::shared_ptr<RssItem> item) {
348 		return item->deleted();
349 	}),
350 	items_.end());
351 }
352 
set_feedptrs(std::shared_ptr<RssFeed> self)353 void RssFeed::set_feedptrs(std::shared_ptr<RssFeed> self)
354 {
355 	std::lock_guard<std::mutex> lock(item_mutex);
356 	for (const auto& item : items_) {
357 		item->set_feedptr(self);
358 	}
359 }
360 
get_status()361 std::string RssFeed::get_status()
362 {
363 	std::lock_guard<std::mutex> guard(status_mutex_);
364 
365 	switch (status_) {
366 	case DlStatus::SUCCESS:
367 		return " ";
368 	case DlStatus::TO_BE_DOWNLOADED:
369 		return "_";
370 	case DlStatus::DURING_DOWNLOAD:
371 		return ".";
372 	case DlStatus::DL_ERROR:
373 		return "x";
374 	}
375 	return "?";
376 }
377 
unload()378 void RssFeed::unload()
379 {
380 	std::lock_guard<std::mutex> lock(item_mutex);
381 	for (const auto& item : items_) {
382 		item->unload();
383 	}
384 }
385 
load()386 void RssFeed::load()
387 {
388 	std::lock_guard<std::mutex> lock(item_mutex);
389 	ch->fetch_descriptions(this);
390 }
391 
mark_all_items_read()392 void RssFeed::mark_all_items_read()
393 {
394 	std::lock_guard<std::mutex> lock(item_mutex);
395 	for (const auto& item : items_) {
396 		item->set_unread_nowrite(false);
397 	}
398 }
399 
400 } // namespace newsboat
401