1 #include "cache.h"
2 
3 #include <cassert>
4 #include <cinttypes>
5 #include <cstdlib>
6 #include <cstring>
7 #include <fstream>
8 #include <iostream>
9 #include <sqlite3.h>
10 #include <sstream>
11 #include <time.h>
12 
13 #include "config.h"
14 #include "configcontainer.h"
15 #include "controller.h"
16 #include "dbexception.h"
17 #include "logger.h"
18 #include "matcherexception.h"
19 #include "rssfeed.h"
20 #include "scopemeasure.h"
21 #include "strprintf.h"
22 #include "utils.h"
23 
24 namespace newsboat {
25 
run_sql_impl(const std::string & query,int (* callback)(void *,int,char **,char **),void * callback_argument,bool do_throw)26 inline void Cache::run_sql_impl(const std::string& query,
27 	int (*callback)(void*, int, char**, char**),
28 	void* callback_argument,
29 	bool do_throw)
30 {
31 	LOG(Level::DEBUG, "running query: %s", query);
32 	const int rc = sqlite3_exec(
33 			db, query.c_str(), callback, callback_argument, nullptr);
34 	if (rc != SQLITE_OK) {
35 		const std::string message = "query \"%s\" failed: (%d) %s";
36 		LOG(Level::CRITICAL, message, query, rc, sqlite3_errstr(rc));
37 		if (do_throw) {
38 			throw DbException(db);
39 		}
40 	}
41 }
42 
run_sql(const std::string & query,int (* callback)(void *,int,char **,char **),void * callback_argument)43 void Cache::run_sql(const std::string& query,
44 	int (*callback)(void*, int, char**, char**),
45 	void* callback_argument)
46 {
47 	run_sql_impl(query, callback, callback_argument, true);
48 }
49 
run_sql_nothrow(const std::string & query,int (* callback)(void *,int,char **,char **),void * callback_argument)50 void Cache::run_sql_nothrow(const std::string& query,
51 	int (*callback)(void*, int, char**, char**),
52 	void* callback_argument)
53 {
54 	run_sql_impl(query, callback, callback_argument, false);
55 }
56 
57 struct CbHandler {
CbHandlernewsboat::CbHandler58 	CbHandler()
59 		: c(-1)
60 	{
61 	}
set_countnewsboat::CbHandler62 	void set_count(int i)
63 	{
64 		c = i;
65 	}
countnewsboat::CbHandler66 	int count()
67 	{
68 		return c;
69 	}
70 
71 private:
72 	int c;
73 };
74 
75 struct HeaderValues {
76 	time_t lastmodified;
77 	std::string etag;
78 };
79 
count_callback(void * handler,int argc,char ** argv,char **)80 static int count_callback(void* handler, int argc, char** argv,
81 	char** /* azColName */)
82 {
83 	CbHandler* cbh = static_cast<CbHandler*>(handler);
84 
85 	if (argc > 0) {
86 		std::istringstream is(argv[0]);
87 		int x;
88 		is >> x;
89 		cbh->set_count(x);
90 	}
91 
92 	return 0;
93 }
94 
single_string_callback(void * handler,int argc,char ** argv,char **)95 static int single_string_callback(void* handler,
96 	int argc,
97 	char** argv,
98 	char** /* azColName */)
99 {
100 	std::string* value = reinterpret_cast<std::string*>(handler);
101 	if (argc > 0 && argv[0]) {
102 		*value = argv[0];
103 	}
104 	return 0;
105 }
106 
rssfeed_callback(void * myfeed,int argc,char ** argv,char **)107 static int rssfeed_callback(void* myfeed, int argc, char** argv,
108 	char** /* azColName */)
109 {
110 	std::shared_ptr<RssFeed>* feed =
111 		static_cast<std::shared_ptr<RssFeed>*>(myfeed);
112 	// normaly, this shouldn't happen, but we keep the assert()s here
113 	// nevertheless
114 	assert(argc == 3);
115 	assert(argv[0] != nullptr);
116 	assert(argv[1] != nullptr);
117 	assert(argv[2] != nullptr);
118 	(*feed)->set_title(argv[0]);
119 	(*feed)->set_link(argv[1]);
120 	(*feed)->set_rtl(strcmp(argv[2], "1") == 0);
121 	LOG(Level::INFO,
122 		"rssfeed_callback: title = %s link = %s is_rtl = %s",
123 		argv[0],
124 		argv[1],
125 		argv[2]);
126 	return 0;
127 }
128 
lastmodified_callback(void * handler,int argc,char ** argv,char **)129 static int lastmodified_callback(void* handler,
130 	int argc,
131 	char** argv,
132 	char** /* azColName */)
133 {
134 	HeaderValues* result = static_cast<HeaderValues*>(handler);
135 	assert(argc == 2);
136 	assert(result != nullptr);
137 	if (argv[0]) {
138 		std::istringstream is(argv[0]);
139 		is >> result->lastmodified;
140 	} else {
141 		result->lastmodified = 0;
142 	}
143 	if (argv[1]) {
144 		result->etag = argv[1];
145 	} else {
146 		result->etag = "";
147 	}
148 	LOG(Level::INFO,
149 		"lastmodified_callback: lastmodified = %" PRId64 " etag = %s",
150 		// On GCC, `time_t` is `long int`, which is at least 32 bits long
151 		// according to the spec. On x86_64, it's actually 64 bits. Thus,
152 		// casting to int64_t is either a no-op, or an up-cast which are always
153 		// safe.
154 		static_cast<int64_t>(result->lastmodified),
155 		result->etag);
156 	return 0;
157 }
158 
vectorofstring_callback(void * vp,int argc,char ** argv,char **)159 static int vectorofstring_callback(void* vp, int argc, char** argv,
160 	char** /* azColName */)
161 {
162 	std::vector<std::string>* vectorptr =
163 		static_cast<std::vector<std::string>*>(vp);
164 	assert(argc == 1);
165 	assert(argv[0] != nullptr);
166 	vectorptr->push_back(std::string(argv[0]));
167 	LOG(Level::INFO, "vectorofstring_callback: element = %s", argv[0]);
168 	return 0;
169 }
170 
rssitem_callback(void * myfeed,int argc,char ** argv,char **)171 static int rssitem_callback(void* myfeed, int argc, char** argv,
172 	char** /* azColName */)
173 {
174 	std::shared_ptr<RssFeed>* feed =
175 		static_cast<std::shared_ptr<RssFeed>*>(myfeed);
176 	assert(argc == 13);
177 	std::shared_ptr<RssItem> item(new RssItem(nullptr));
178 	item->set_guid(argv[0]);
179 	item->set_title(argv[1]);
180 	item->set_author(argv[2]);
181 	item->set_link(argv[3]);
182 
183 	std::istringstream is(argv[4]);
184 	time_t t;
185 	is >> t;
186 	item->set_pubDate(t);
187 
188 	item->set_size(utils::to_u(argv[5]));
189 	item->set_unread((std::string("1") == argv[6]));
190 
191 	item->set_feedurl(argv[7]);
192 
193 	item->set_enclosure_url(argv[8] ? argv[8] : "");
194 	item->set_enclosure_type(argv[9] ? argv[9] : "");
195 	item->set_enqueued((std::string("1") == (argv[10] ? argv[10] : "")));
196 	item->set_flags(argv[11] ? argv[11] : "");
197 	item->set_base(argv[12] ? argv[12] : "");
198 
199 	//(*feed)->items().push_back(item);
200 	(*feed)->add_item(item);
201 	return 0;
202 }
203 
fill_content_callback(void * myfeed,int argc,char ** argv,char **)204 static int fill_content_callback(void* myfeed,
205 	int argc,
206 	char** argv,
207 	char** /* azColName */)
208 {
209 	RssFeed* feed = static_cast<RssFeed*>(myfeed);
210 	assert(argc == 3);
211 	if (argv[0]) {
212 		std::shared_ptr<RssItem> item =
213 			feed->get_item_by_guid_unlocked(argv[0]);
214 		item->set_description(argv[1] ? argv[1] : "", argv[2] ? argv[2] : "");
215 	}
216 	return 0;
217 }
218 
search_item_callback(void * myfeed,int argc,char ** argv,char **)219 static int search_item_callback(void* myfeed,
220 	int argc,
221 	char** argv,
222 	char** /* azColName */)
223 {
224 	std::vector<std::shared_ptr<RssItem>>* items =
225 			static_cast<std::vector<std::shared_ptr<RssItem>>*>(myfeed);
226 	assert(argc == 13);
227 	std::shared_ptr<RssItem> item(new RssItem(nullptr));
228 	item->set_guid(argv[0]);
229 	item->set_title(argv[1]);
230 	item->set_author(argv[2]);
231 	item->set_link(argv[3]);
232 
233 	std::istringstream is(argv[4]);
234 	time_t t;
235 	is >> t;
236 	item->set_pubDate(t);
237 
238 	item->set_size(utils::to_u(argv[5]));
239 	item->set_unread((std::string("1") == argv[6]));
240 	item->set_feedurl(argv[7]);
241 
242 	item->set_enclosure_url(argv[8] ? argv[8] : "");
243 	item->set_enclosure_type(argv[9] ? argv[9] : "");
244 	item->set_enqueued((std::string("1") == argv[10]));
245 	item->set_flags(argv[11] ? argv[11] : "");
246 	item->set_base(argv[12] ? argv[12] : "");
247 
248 	items->push_back(item);
249 	return 0;
250 }
251 
guid_callback(void * myguids,int argc,char ** argv,char **)252 static int guid_callback(void* myguids, int argc, char** argv,
253 	char** /* azColName */)
254 {
255 	auto* guids = static_cast<std::unordered_set<std::string>*>(myguids);
256 	assert(argc == 1);
257 	guids->emplace(argv[0]);
258 	return 0;
259 }
260 
Cache(const std::string & cachefile,ConfigContainer * c)261 Cache::Cache(const std::string& cachefile, ConfigContainer* c)
262 	: db(0)
263 	, cfg(c)
264 {
265 	const int error = sqlite3_open(cachefile.c_str(), &db);
266 	if (error != SQLITE_OK) {
267 		LOG(Level::ERROR,
268 			"couldn't sqlite3_open(%s): error = %d",
269 			cachefile,
270 			error);
271 		throw DbException(db);
272 	}
273 
274 	populate_tables();
275 	set_pragmas();
276 
277 	clean_old_articles();
278 
279 	// we need to manually lock all DB operations because SQLite has no
280 	// explicit support for multithreading.
281 }
282 
~Cache()283 Cache::~Cache()
284 {
285 	sqlite3_close(db);
286 }
287 
set_pragmas()288 void Cache::set_pragmas()
289 {
290 	// first, we need to swithc off synchronous writing as it's slow as hell
291 	run_sql("PRAGMA synchronous = OFF;");
292 
293 	// then we disable case-sensitive matching for the LIKE operator in
294 	// SQLite, for search operations
295 	run_sql("PRAGMA case_sensitive_like=OFF;");
296 }
297 
298 static const schema_patches schemaPatches{
299 	{	{2, 10},
300 		{
301 			"CREATE TABLE rss_feed ( "
302 			" rssurl VARCHAR(1024) PRIMARY KEY NOT NULL, "
303 			" url VARCHAR(1024) NOT NULL, "
304 			" title VARCHAR(1024) NOT NULL ); ",
305 
306 			"CREATE TABLE rss_item ( "
307 			" id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, "
308 			" guid VARCHAR(64) NOT NULL, "
309 			" title VARCHAR(1024) NOT NULL, "
310 			" author VARCHAR(1024) NOT NULL, "
311 			" url VARCHAR(1024) NOT NULL, "
312 			" feedurl VARCHAR(1024) NOT NULL, "
313 			" pubDate INTEGER NOT NULL, "
314 			" content VARCHAR(65535) NOT NULL,"
315 			" unread INTEGER(1) NOT NULL );",
316 
317 			"CREATE TABLE google_replay ( "
318 			" id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, "
319 			" guid VARCHAR(64) NOT NULL, "
320 			" state INTEGER NOT NULL, "
321 			" ts INTEGER NOT NULL );",
322 
323 			/* we need to do these ALTER TABLE statements because we
324 			 * need to store additional data for the podcast support
325 			 */
326 			"ALTER TABLE rss_item ADD enclosure_url VARCHAR(1024);",
327 
328 			"ALTER TABLE rss_item ADD enclosure_type "
329 			"VARCHAR(1024);",
330 
331 			"ALTER TABLE rss_item ADD enqueued INTEGER(1) NOT NULL "
332 			"DEFAULT 0;",
333 
334 			"ALTER TABLE rss_item ADD flags VARCHAR(52);",
335 
336 			/* create indexes to speed up certain queries */
337 			"CREATE INDEX IF NOT EXISTS idx_rssurl ON "
338 			"rss_feed(rssurl);",
339 
340 			"CREATE INDEX IF NOT EXISTS idx_guid ON "
341 			"rss_item(guid);",
342 
343 			"CREATE INDEX IF NOT EXISTS idx_feedurl ON "
344 			"rss_item(feedurl);",
345 			/* we analyse the indices for better statistics */
346 			"ANALYZE;",
347 
348 			"ALTER TABLE rss_feed ADD lastmodified INTEGER(11) NOT "
349 			"NULL "
350 			"DEFAULT 0;",
351 
352 			"CREATE INDEX IF NOT EXISTS idx_lastmodified ON "
353 			"rss_feed(lastmodified);",
354 
355 			"ALTER TABLE rss_item ADD deleted INTEGER(1) NOT NULL "
356 			"DEFAULT "
357 			"0;",
358 
359 			"CREATE INDEX IF NOT EXISTS idx_deleted ON "
360 			"rss_item(deleted);",
361 
362 			"ALTER TABLE rss_feed ADD is_rtl INTEGER(1) NOT NULL "
363 			"DEFAULT "
364 			"0;",
365 
366 			"ALTER TABLE rss_feed ADD etag VARCHAR(128) NOT NULL "
367 			"DEFAULT "
368 			"\"\";",
369 
370 			"ALTER TABLE rss_item ADD base VARCHAR(128) NOT NULL "
371 			"DEFAULT "
372 			"\"\";",
373 		}
374 	},
375 	{	{2, 11},
376 		{
377 			"CREATE TABLE metadata ( "
378 			" db_schema_version_major INTEGER NOT NULL, "
379 			" db_schema_version_minor INTEGER NOT NULL );",
380 
381 			"INSERT INTO metadata VALUES ( 2, 11 );"
382 		}
383 	},
384 	{	{2, 22},
385 		{
386 			"UPDATE metadata SET db_schema_version_major = 2, db_schema_version_minor = 22;",
387 
388 			"ALTER TABLE rss_item ADD COLUMN content_mime_type VARCHAR(255) NOT NULL DEFAULT \"\";"
389 		}
390 	}
391 
392 	// Note: schema changes should use the version number of the release that introduced them.
393 };
394 
populate_tables()395 void Cache::populate_tables()
396 {
397 	const SchemaVersion version = get_schema_version();
398 	LOG(Level::INFO,
399 		"Cache::populate_tables: DB schema version %u.%u",
400 		version.major,
401 		version.minor);
402 
403 	if (version.major > utils::newsboat_version_major()) {
404 		const std::string msg =
405 			"Database schema isn't supported because it's too new";
406 		LOG(Level::ERROR, msg);
407 		throw std::runtime_error(msg);
408 	}
409 
410 	auto patches_it = schemaPatches.cbegin();
411 
412 	// rewind to the first patch that should be applied
413 	while (patches_it != schemaPatches.cend() &&
414 		patches_it->first <= version) {
415 		++patches_it;
416 	}
417 
418 	for (; patches_it != schemaPatches.cend(); ++patches_it) {
419 		const SchemaVersion patch_version = patches_it->first;
420 		LOG(Level::INFO,
421 			"Cache::populate_tables: applying DB schema patches "
422 			"for version %u.%u",
423 			patch_version.major,
424 			patch_version.minor);
425 		for (const auto& query : patches_it->second) {
426 			run_sql_nothrow(query);
427 		}
428 	}
429 }
430 
fetch_lastmodified(const std::string & feedurl,time_t & t,std::string & etag)431 void Cache::fetch_lastmodified(const std::string& feedurl,
432 	time_t& t,
433 	std::string& etag)
434 {
435 	std::lock_guard<std::mutex> lock(mtx);
436 	std::string query = prepare_query(
437 			"SELECT lastmodified, etag FROM rss_feed WHERE rssurl = '%q';",
438 			feedurl);
439 	HeaderValues result = {0, ""};
440 	run_sql(query, lastmodified_callback, &result);
441 	t = result.lastmodified;
442 	etag = result.etag;
443 	LOG(Level::DEBUG,
444 		"Cache::fetch_lastmodified: t = %" PRId64 " etag = %s",
445 		// On GCC, `time_t` is `long int`, which is at least 32 bits. On
446 		// x86_64, it's 64 bits. Thus, this cast is either a no-op, or an
447 		// up-cast which is always safe.
448 		static_cast<int64_t>(t),
449 		etag);
450 }
451 
update_lastmodified(const std::string & feedurl,time_t t,const std::string & etag)452 void Cache::update_lastmodified(const std::string& feedurl,
453 	time_t t,
454 	const std::string& etag)
455 {
456 	if (t == 0 && etag.length() == 0) {
457 		LOG(Level::INFO,
458 			"Cache::update_lastmodified: both time and etag are "
459 			"empty, not updating anything");
460 		return;
461 	}
462 	std::lock_guard<std::mutex> lock(mtx);
463 	std::string query = "UPDATE rss_feed SET ";
464 	if (t > 0) {
465 		query.append(prepare_query("lastmodified = '%d'", t));
466 	}
467 	if (etag.length() > 0) {
468 		query.append(prepare_query("%c etag = %s",
469 				(t > 0 ? ',' : ' '),
470 				prepare_query("'%q'", etag)));
471 	}
472 	query.append(" WHERE rssurl = ");
473 	query.append(prepare_query("'%q'", feedurl));
474 	run_sql_nothrow(query);
475 }
476 
mark_item_deleted(const std::string & guid,bool b)477 void Cache::mark_item_deleted(const std::string& guid, bool b)
478 {
479 	std::lock_guard<std::mutex> lock(mtx);
480 	std::string query = prepare_query(
481 			"UPDATE rss_item SET deleted = %u WHERE guid = '%q'",
482 			b ? 1 : 0,
483 			guid);
484 	run_sql_nothrow(query);
485 }
486 
mark_feed_items_deleted(const std::string & feedurl)487 void Cache::mark_feed_items_deleted(const std::string& feedurl)
488 {
489 	std::lock_guard<std::mutex> lock(mtx);
490 	std::string query = prepare_query(
491 			"UPDATE rss_item SET deleted = 1 WHERE feedurl = '%s';",
492 			feedurl);
493 	run_sql_nothrow(query);
494 }
495 
496 // this function writes an RssFeed including all RssItems to the database
externalize_rssfeed(std::shared_ptr<RssFeed> feed,bool reset_unread)497 void Cache::externalize_rssfeed(std::shared_ptr<RssFeed> feed,
498 	bool reset_unread)
499 {
500 	ScopeMeasure m1("Cache::externalize_feed");
501 	if (feed->is_query_feed()) {
502 		return;
503 	}
504 
505 	std::lock_guard<std::mutex> lock(mtx);
506 	std::lock_guard<std::mutex> feedlock(feed->item_mutex);
507 	// scope_transaction dbtrans(db);
508 
509 	CbHandler count_cbh;
510 	auto query = prepare_query(
511 			"SELECT count(*) FROM rss_feed WHERE rssurl = '%q';",
512 			feed->rssurl());
513 	run_sql(query, count_callback, &count_cbh);
514 
515 	const int count = count_cbh.count();
516 	LOG(Level::DEBUG,
517 		"Cache::externalize_rss_feed: rss_feeds with rssurl = '%s': "
518 		"found "
519 		"%d",
520 		feed->rssurl(),
521 		count);
522 	if (count > 0) {
523 		const std::string updatequery = prepare_query(
524 				"UPDATE rss_feed "
525 				"SET title = '%q', url = '%q', is_rtl = %u "
526 				"WHERE rssurl = '%q';",
527 				feed->title_raw(),
528 				feed->link(),
529 				feed->is_rtl() ? 1 : 0,
530 				feed->rssurl());
531 		run_sql(updatequery);
532 	} else {
533 		const std::string insertquery = prepare_query(
534 				"INSERT INTO rss_feed (rssurl, url, title, is_rtl) "
535 				"VALUES ( '%q', '%q', '%q', %u );",
536 				feed->rssurl(),
537 				feed->link(),
538 				feed->title_raw(),
539 				feed->is_rtl() ? 1 : 0);
540 		run_sql(insertquery);
541 	}
542 
543 	const unsigned int max_items = cfg->get_configvalue_as_int("max-items");
544 
545 	LOG(Level::INFO,
546 		"Cache::externalize_feed: max_items = %u "
547 		"feed.total_item_count() = "
548 		"%u",
549 		max_items,
550 		feed->total_item_count());
551 
552 	if (max_items > 0 && feed->total_item_count() > max_items) {
553 		feed->erase_items(
554 			feed->items().begin() + max_items, feed->items().end());
555 	}
556 
557 	const unsigned int days = cfg->get_configvalue_as_int("keep-articles-days");
558 	const time_t old_time = time(nullptr) - days * 24 * 60 * 60;
559 
560 	// the reverse iterator is there for the sorting foo below (think about
561 	// it)
562 	for (auto it = feed->items().rbegin(); it != feed->items().rend();
563 		++it) {
564 		if (days == 0 || (*it)->pubDate_timestamp() >= old_time)
565 			update_rssitem_unlocked(
566 				*it, feed->rssurl(), reset_unread);
567 	}
568 }
569 
570 // this function reads an RssFeed including all of its RssItems.
571 // the feed parameter needs to have the rssurl member set.
internalize_rssfeed(std::string rssurl,RssIgnores * ign)572 std::shared_ptr<RssFeed> Cache::internalize_rssfeed(std::string rssurl,
573 	RssIgnores* ign)
574 {
575 	ScopeMeasure m1("Cache::internalize_rssfeed");
576 
577 	std::shared_ptr<RssFeed> feed(new RssFeed(this));
578 	feed->set_rssurl(rssurl);
579 
580 	if (utils::is_query_url(rssurl)) {
581 		return feed;
582 	}
583 
584 	std::lock_guard<std::mutex> lock(mtx);
585 	std::lock_guard<std::mutex> feedlock(feed->item_mutex);
586 
587 	/* first, we check whether the feed is there at all */
588 	std::string query = prepare_query(
589 			"SELECT count(*) FROM rss_feed WHERE rssurl = '%q';", rssurl);
590 	CbHandler count_cbh;
591 	run_sql(query, count_callback, &count_cbh);
592 
593 	if (count_cbh.count() == 0) {
594 		return feed;
595 	}
596 
597 	/* then we first read the feed from the database */
598 	query = prepare_query(
599 			"SELECT title, url, is_rtl FROM rss_feed WHERE rssurl = '%q';",
600 			rssurl);
601 	run_sql(query, rssfeed_callback, &feed);
602 
603 	/* ...and then the associated items */
604 	query = prepare_query(
605 			"SELECT guid, title, author, url, pubDate, length(content), "
606 			"unread, "
607 			"feedurl, enclosure_url, enclosure_type, enqueued, flags, base "
608 			"FROM rss_item "
609 			"WHERE feedurl = '%q' "
610 			"AND deleted = 0 "
611 			"ORDER BY pubDate DESC, id DESC;",
612 			rssurl);
613 	run_sql(query, rssitem_callback, &feed);
614 
615 	auto feed_weak_ptr = std::weak_ptr<RssFeed>(feed);
616 	for (const auto& item : feed->items()) {
617 		item->set_cache(this);
618 		item->set_feedptr(feed_weak_ptr);
619 		item->set_feedurl(feed->rssurl());
620 	}
621 
622 	if (ign != nullptr) {
623 		auto& items = feed->items();
624 		items.erase(
625 			std::remove_if(
626 				items.begin(),
627 				items.end(),
628 		[&](std::shared_ptr<RssItem> item) -> bool {
629 			try
630 			{
631 				return ign->matches(item.get());
632 			} catch (const MatcherException& ex)
633 			{
634 				LOG(Level::DEBUG,
635 					"oops, Matcher exception: %s",
636 					ex.what());
637 				return false;
638 			}
639 		}),
640 		items.end());
641 	}
642 
643 	const unsigned int max_items = cfg->get_configvalue_as_int("max-items");
644 
645 	if (max_items > 0 && feed->total_item_count() > max_items) {
646 		std::vector<std::shared_ptr<RssItem>> flagged_items;
647 		for (unsigned int j = max_items; j < feed->total_item_count();
648 			++j) {
649 			if (feed->items()[j]->flags().length() == 0) {
650 				delete_item(feed->items()[j]);
651 			} else {
652 				flagged_items.push_back(feed->items()[j]);
653 			}
654 		}
655 
656 		auto it = feed->items().begin() + max_items;
657 		feed->erase_items(
658 			it, feed->items().end()); // delete old entries
659 
660 		// if some flagged articles were saved, append them
661 		feed->add_items(flagged_items);
662 	}
663 	feed->sort_unlocked(cfg->get_article_sort_strategy());
664 	return feed;
665 }
666 
search_for_items(const std::string & querystr,const std::string & feedurl)667 std::vector<std::shared_ptr<RssItem>> Cache::search_for_items(
668 		const std::string& querystr, const std::string& feedurl)
669 {
670 	assert(!utils::is_query_url(feedurl));
671 	std::string query;
672 	std::vector<std::shared_ptr<RssItem>> items;
673 
674 	std::lock_guard<std::mutex> lock(mtx);
675 	if (feedurl.length() > 0) {
676 		query = prepare_query(
677 				"SELECT guid, title, author, url, pubDate, "
678 				"length(content), "
679 				"unread, feedurl, enclosure_url, enclosure_type, "
680 				"enqueued, flags, base "
681 				"FROM rss_item "
682 				"WHERE (title LIKE '%%%q%%' OR content LIKE '%%%q%%') "
683 				"AND feedurl = '%q' "
684 				"AND deleted = 0 "
685 				"ORDER BY pubDate DESC, id DESC;",
686 				querystr,
687 				querystr,
688 				feedurl);
689 	} else {
690 		query = prepare_query(
691 				"SELECT guid, title, author, url, pubDate, "
692 				"length(content), "
693 				"unread, feedurl, enclosure_url, enclosure_type, "
694 				"enqueued, flags, base "
695 				"FROM rss_item "
696 				"WHERE (title LIKE '%%%q%%' OR content LIKE '%%%q%%') "
697 				"AND deleted = 0 "
698 				"ORDER BY pubDate DESC,  id DESC;",
699 				querystr,
700 				querystr);
701 	}
702 
703 	run_sql(query, search_item_callback, &items);
704 	for (const auto& item : items) {
705 		item->set_cache(this);
706 	}
707 
708 	return items;
709 }
710 
search_in_items(const std::string & querystr,const std::unordered_set<std::string> & guids)711 std::unordered_set<std::string> Cache::search_in_items(
712 	const std::string& querystr,
713 	const std::unordered_set<std::string>& guids)
714 {
715 	std::string list = "(";
716 
717 	for (const auto& guid : guids) {
718 		list.append(prepare_query("%Q, ", guid));
719 	}
720 	list.append("'')");
721 
722 	std::string query = prepare_query(
723 			"SELECT guid "
724 			"FROM rss_item "
725 			"WHERE (title LIKE '%%%q%%' OR content LIKE '%%%q%%') "
726 			"AND guid IN %s;",
727 			querystr,
728 			querystr,
729 			list);
730 
731 	std::unordered_set<std::string> items;
732 	std::lock_guard<std::mutex> lock(mtx);
733 	run_sql(query, guid_callback, &items);
734 	return items;
735 }
736 
delete_item(const std::shared_ptr<RssItem> & item)737 void Cache::delete_item(const std::shared_ptr<RssItem>& item)
738 {
739 	const std::string query = prepare_query(
740 			"DELETE FROM rss_item WHERE guid = '%q';", item->guid());
741 	run_sql(query);
742 }
743 
do_vacuum()744 void Cache::do_vacuum()
745 {
746 	std::lock_guard<std::mutex> lock(mtx);
747 	run_sql("VACUUM;");
748 }
749 
cleanup_cache(std::vector<std::shared_ptr<RssFeed>> feeds,bool always_clean)750 void Cache::cleanup_cache(std::vector<std::shared_ptr<RssFeed>> feeds,
751 	bool always_clean)
752 {
753 	// we don't use the std::lock_guard<> here... see comments below
754 	mtx.lock();
755 
756 	/*
757 	 * cache cleanup means that all entries in both the RssFeed and
758 	 * RssItem tables that are associated with an RSS feed URL that is not
759 	 * contained in the current configuration are deleted. Such entries are
760 	 * the result when a user deletes one or more lines in the urls
761 	 * configuration file. We then assume that the user isn't interested
762 	 * anymore in reading this feed, and delete all associated entries
763 	 * because they would be non-accessible.
764 	 *
765 	 * The behaviour whether the cleanup is done or not is configurable via
766 	 * the configuration file.
767 	 */
768 	if (always_clean || cfg->get_configvalue_as_bool("cleanup-on-quit")) {
769 		LOG(Level::DEBUG, "Cache::cleanup_cache: cleaning up cache...");
770 		std::string list = "(";
771 
772 		for (const auto& feed : feeds) {
773 			std::string name =
774 				prepare_query("'%q'", feed->rssurl());
775 			list.append(name);
776 			list.append(", ");
777 		}
778 		list.append("'')");
779 
780 		std::string cleanup_rss_feeds_statement(
781 			"DELETE FROM rss_feed WHERE rssurl NOT IN ");
782 		cleanup_rss_feeds_statement.append(list);
783 		cleanup_rss_feeds_statement.push_back(';');
784 
785 		std::string cleanup_rss_items_statement(
786 			"DELETE FROM rss_item WHERE feedurl NOT IN ");
787 		cleanup_rss_items_statement.append(list);
788 		cleanup_rss_items_statement.push_back(';');
789 
790 		std::string cleanup_read_items_statement(
791 			"UPDATE rss_item SET deleted = 1 WHERE unread = 0");
792 
793 		run_sql(cleanup_rss_feeds_statement);
794 		run_sql(cleanup_rss_items_statement);
795 		if (cfg->get_configvalue_as_bool(
796 				"delete-read-articles-on-quit")) {
797 			run_sql(cleanup_read_items_statement);
798 		}
799 
800 		// WARNING: THE MISSING UNLOCK OPERATION IS MISSING FOR A
801 		// PURPOSE! It's missing so that no database operation can occur
802 		// after the cache cleanup! mtx->unlock();
803 	} else {
804 		LOG(Level::DEBUG,
805 			"Cache::cleanup_cache: NOT cleaning up cache...");
806 	}
807 }
808 
update_rssitem_unlocked(std::shared_ptr<RssItem> item,const std::string & feedurl,bool reset_unread)809 void Cache::update_rssitem_unlocked(std::shared_ptr<RssItem> item,
810 	const std::string& feedurl,
811 	bool reset_unread)
812 {
813 	std::string query = prepare_query(
814 			"SELECT count(*) FROM rss_item WHERE guid = '%q';",
815 			item->guid());
816 	CbHandler count_cbh;
817 	run_sql(query, count_callback, &count_cbh);
818 	const auto description = item->description();
819 	if (count_cbh.count() > 0) {
820 		if (reset_unread) {
821 			std::string content;
822 			query = prepare_query(
823 					"SELECT content FROM rss_item WHERE guid = "
824 					"'%q';",
825 					item->guid());
826 			run_sql(query, single_string_callback, &content);
827 			if (content != description.text) {
828 				LOG(Level::DEBUG,
829 					"Cache::update_rssitem_unlocked: '%s' "
830 					"is "
831 					"different from '%s'",
832 					content,
833 					description.text);
834 				query = prepare_query(
835 						"UPDATE rss_item SET unread = 1 WHERE "
836 						"guid = '%q';",
837 						item->guid());
838 				run_sql(query);
839 			}
840 		}
841 		std::string update;
842 		if (item->override_unread()) {
843 			update = prepare_query(
844 					"UPDATE rss_item "
845 					"SET title = '%q', author = '%q', url = '%q', "
846 					"feedurl = '%q', "
847 					"content = '%q', content_mime_type = '%q', enclosure_url = '%q', "
848 					"enclosure_type = '%q', base = '%q', unread = "
849 					"'%d' "
850 					"WHERE guid = '%q'",
851 					item->title(),
852 					item->author(),
853 					item->link(),
854 					feedurl,
855 					description.text,
856 					description.mime,
857 					item->enclosure_url(),
858 					item->enclosure_type(),
859 					item->get_base(),
860 					(item->unread() ? 1 : 0),
861 					item->guid());
862 		} else {
863 			update = prepare_query(
864 					"UPDATE rss_item "
865 					"SET title = '%q', author = '%q', url = '%q', "
866 					"feedurl = '%q', "
867 					"content = '%q', content_mime_type = '%q', enclosure_url = '%q', "
868 					"enclosure_type = '%q', base = '%q' "
869 					"WHERE guid = '%q'",
870 					item->title(),
871 					item->author(),
872 					item->link(),
873 					feedurl,
874 					description.text,
875 					description.mime,
876 					item->enclosure_url(),
877 					item->enclosure_type(),
878 					item->get_base(),
879 					item->guid());
880 		}
881 		run_sql(update);
882 	} else {
883 		std::string insert = prepare_query(
884 				"INSERT INTO rss_item (guid, title, author, url, "
885 				"feedurl, "
886 				"pubDate, content, content_mime_type, unread, enclosure_url, "
887 				"enclosure_type, enqueued, base) "
888 				"VALUES "
889 				"('%q','%q','%q','%q','%q','%u','%q','%q','%d','%q','%q',%d,"
890 				" "
891 				"'%q')",
892 				item->guid(),
893 				item->title(),
894 				item->author(),
895 				item->link(),
896 				feedurl,
897 				item->pubDate_timestamp(),
898 				description.text,
899 				description.mime,
900 				(item->unread() ? 1 : 0),
901 				item->enclosure_url(),
902 				item->enclosure_type(),
903 				item->enqueued() ? 1 : 0,
904 				item->get_base());
905 		run_sql(insert);
906 	}
907 }
908 
mark_all_read(std::shared_ptr<RssFeed> feed)909 void Cache::mark_all_read(std::shared_ptr<RssFeed> feed)
910 {
911 	std::lock_guard<std::mutex> lock(mtx);
912 	std::lock_guard<std::mutex> itemlock(feed->item_mutex);
913 	std::string query =
914 		"UPDATE rss_item SET unread = '0' WHERE unread != '0' AND guid "
915 		"IN (";
916 
917 	for (const auto& item : feed->items()) {
918 		query.append(prepare_query("'%q',", item->guid()));
919 	}
920 	query.append("'');");
921 
922 	run_sql(query);
923 }
924 
925 /* this function marks all RssItems (optionally of a certain feed url) as read
926  */
mark_all_read(const std::string & feedurl)927 void Cache::mark_all_read(const std::string& feedurl)
928 {
929 	std::lock_guard<std::mutex> lock(mtx);
930 
931 	std::string query;
932 	if (feedurl.length() > 0) {
933 		query = prepare_query(
934 				"UPDATE rss_item "
935 				"SET unread = '0' "
936 				"WHERE unread != '0' "
937 				"AND feedurl = '%q';",
938 				feedurl);
939 	} else {
940 		query = prepare_query(
941 				"UPDATE rss_item "
942 				"SET unread = '0' "
943 				"WHERE unread != '0';");
944 	}
945 	run_sql(query);
946 }
947 
update_rssitem_unread_and_enqueued(RssItem * item,const std::string &)948 void Cache::update_rssitem_unread_and_enqueued(RssItem* item,
949 	const std::string& /* feedurl */)
950 {
951 	std::lock_guard<std::mutex> lock(mtx);
952 
953 	const auto query = prepare_query(
954 			"UPDATE rss_item "
955 			"SET unread = '%d', enqueued = '%d' "
956 			"WHERE guid = '%q'",
957 			item->unread() ? 1 : 0,
958 			item->enqueued() ? 1 : 0,
959 			item->guid());
960 	run_sql(query);
961 }
962 
963 /* this function updates the unread and enqueued flags */
update_rssitem_unread_and_enqueued(std::shared_ptr<RssItem> item,const std::string & feedurl)964 void Cache::update_rssitem_unread_and_enqueued(std::shared_ptr<RssItem> item,
965 	const std::string& feedurl)
966 {
967 	update_rssitem_unread_and_enqueued(item.get(), feedurl);
968 }
969 
970 /* helper function to wrap std::string around the sqlite3_*mprintf function */
prepare_query(const std::string & format)971 std::string Cache::prepare_query(const std::string& format)
972 {
973 	return format;
974 }
975 
976 template<typename... Args>
prepare_query(const std::string & format,const std::string & argument,Args...args)977 std::string Cache::prepare_query(const std::string& format,
978 	const std::string& argument,
979 	Args... args)
980 {
981 	return prepare_query(format, argument.c_str(), args...);
982 }
983 
984 template<typename T, typename... Args>
prepare_query(const std::string & format,const T & argument,Args...args)985 std::string Cache::prepare_query(const std::string& format, const T& argument,
986 	Args... args)
987 {
988 	std::string local_format, remaining_format;
989 	std::tie(local_format, remaining_format) =
990 		strprintf::split_format(format);
991 
992 	char* piece = sqlite3_mprintf(local_format.c_str(), argument);
993 	std::string result;
994 	if (piece) {
995 		result = piece;
996 		sqlite3_free(piece);
997 	}
998 
999 	return result + prepare_query(remaining_format, args...);
1000 }
1001 
update_rssitem_flags(RssItem * item)1002 void Cache::update_rssitem_flags(RssItem* item)
1003 {
1004 	std::lock_guard<std::mutex> lock(mtx);
1005 
1006 	const std::string update = prepare_query(
1007 			"UPDATE rss_item SET flags = '%q' WHERE guid = '%q';",
1008 			item->flags(),
1009 			item->guid());
1010 
1011 	run_sql(update);
1012 }
1013 
remove_old_deleted_items(RssFeed * feed)1014 void Cache::remove_old_deleted_items(RssFeed* feed)
1015 {
1016 	ScopeMeasure m1("Cache::remove_old_deleted_items");
1017 
1018 	std::lock_guard<std::mutex> cache_lock(mtx);
1019 	std::lock_guard<std::mutex> feed_lock(feed->item_mutex);
1020 
1021 	std::vector<std::string> guids;
1022 	for (const auto& item : feed->items()) {
1023 		guids.push_back(item->guid());
1024 	}
1025 
1026 	if (guids.empty()) {
1027 		LOG(Level::DEBUG,
1028 			"Cache::remove_old_deleted_items: not cleaning up "
1029 			"anything because last reload brought no new items "
1030 			"(detected no changes)");
1031 		return;
1032 	}
1033 	std::string guidset = "(";
1034 	for (const auto& guid : guids) {
1035 		guidset.append(prepare_query("'%q', ", guid));
1036 	}
1037 	guidset.append("'')");
1038 	const std::string query = prepare_query(
1039 			"DELETE FROM rss_item "
1040 			"WHERE feedurl = '%q' "
1041 			"AND deleted = 1 "
1042 			"AND guid NOT IN %s;",
1043 			feed->rssurl(),
1044 			guidset);
1045 	run_sql(query);
1046 }
1047 
mark_items_read_by_guid(const std::vector<std::string> & guids)1048 void Cache::mark_items_read_by_guid(const std::vector<std::string>& guids)
1049 {
1050 	ScopeMeasure m1("Cache::mark_items_read_by_guid");
1051 	std::string guidset("(");
1052 	for (const auto& guid : guids) {
1053 		guidset.append(prepare_query("'%q', ", guid));
1054 	}
1055 	guidset.append("'')");
1056 
1057 	const std::string updatequery = prepare_query(
1058 			"UPDATE rss_item SET unread = 0 WHERE unread = 1 AND guid IN "
1059 			"%s;",
1060 			guidset);
1061 
1062 	std::lock_guard<std::mutex> lock(mtx);
1063 	run_sql(updatequery);
1064 }
1065 
get_read_item_guids()1066 std::vector<std::string> Cache::get_read_item_guids()
1067 {
1068 	std::vector<std::string> guids;
1069 	const std::string query = "SELECT guid FROM rss_item WHERE unread = 0;";
1070 
1071 	std::lock_guard<std::mutex> lock(mtx);
1072 	run_sql(query, vectorofstring_callback, &guids);
1073 
1074 	return guids;
1075 }
1076 
clean_old_articles()1077 void Cache::clean_old_articles()
1078 {
1079 	std::lock_guard<std::mutex> lock(mtx);
1080 
1081 	const unsigned int days = cfg->get_configvalue_as_int("keep-articles-days");
1082 	if (days > 0) {
1083 		const time_t old_date = time(nullptr) - days * 24 * 60 * 60;
1084 
1085 		const std::string query(prepare_query(
1086 				"DELETE FROM rss_item WHERE pubDate < %d", old_date));
1087 		LOG(Level::DEBUG,
1088 			"Cache::clean_old_articles: about to delete articles "
1089 			"with a pubDate older than %" PRId64,
1090 			// On GCC, `time_t` is `long int`, which is at least 32 bits long
1091 			// according to the spec. On x86_64, it's actually 64 bits. Thus,
1092 			// casting to int64_t is either a no-op, or an up-cast which are
1093 			// always safe.
1094 			static_cast<int64_t>(old_date));
1095 		run_sql(query);
1096 	} else {
1097 		LOG(Level::DEBUG,
1098 			"Cache::clean_old_articles, days == 0, not cleaning up "
1099 			"anything");
1100 	}
1101 }
1102 
fetch_descriptions(RssFeed * feed)1103 void Cache::fetch_descriptions(RssFeed* feed)
1104 {
1105 	std::vector<std::string> guids;
1106 	for (const auto& item : feed->items()) {
1107 		guids.push_back(prepare_query("'%q'", item->guid()));
1108 	}
1109 	const std::string in_clause = utils::join(guids, ", ");
1110 
1111 	const std::string query = prepare_query(
1112 			"SELECT guid, content, content_mime_type FROM rss_item WHERE guid IN (%s);",
1113 			in_clause);
1114 
1115 	run_sql(query, fill_content_callback, feed);
1116 }
1117 
fetch_description(const RssItem & item)1118 std::string Cache::fetch_description(const RssItem& item)
1119 {
1120 	const std::string in_clause = prepare_query("'%q'", item.guid());
1121 
1122 	const std::string query = prepare_query(
1123 			"SELECT content FROM rss_item WHERE guid = %s;",
1124 			in_clause);
1125 
1126 	std::string description;
1127 	auto store_description = [](void* d, int, char** argv, char**) -> int {
1128 		auto& desc = *static_cast<std::string*>(d);
1129 		desc = argv[0] ? argv[0] : "";
1130 		return 0;
1131 	};
1132 
1133 	run_sql(query, store_description, &description);
1134 	return description;
1135 }
1136 
get_schema_version()1137 SchemaVersion Cache::get_schema_version()
1138 {
1139 	sqlite3_stmt* stmt{};
1140 	SchemaVersion result;
1141 
1142 	int rc = sqlite3_prepare_v2(db,
1143 			"SELECT db_schema_version_major, db_schema_version_minor "
1144 			"FROM metadata",
1145 			-1,
1146 			&stmt,
1147 			nullptr);
1148 
1149 	if (rc != SQLITE_OK) {
1150 		// I'm pretty sure the query above is correct, and the only way
1151 		// it can fail is when metadata table is not present in the DB.
1152 		// That means we're dealing with an empty cache file, or one
1153 		// that was created by an older version of Newsboat.
1154 		result = unknown_version;
1155 	} else {
1156 		rc = sqlite3_step(stmt);
1157 		if (rc != SQLITE_ROW) {
1158 			// table is empty. Technically, this is impossible, but
1159 			// is easy enough to fix - just re-create the db
1160 			result = unknown_version;
1161 		} else {
1162 			// row is available, let's grab it!
1163 			result.major = sqlite3_column_int(stmt, 0);
1164 			result.minor = sqlite3_column_int(stmt, 1);
1165 
1166 			assert(sqlite3_step(stmt) == SQLITE_DONE);
1167 		}
1168 	}
1169 
1170 	sqlite3_finalize(stmt);
1171 	return result;
1172 }
1173 
1174 } // namespace newsboat
1175