1 #include "cache.h"
2
3 #include <cassert>
4 #include <cinttypes>
5 #include <cstdlib>
6 #include <cstring>
7 #include <fstream>
8 #include <iostream>
9 #include <sqlite3.h>
10 #include <sstream>
11 #include <time.h>
12
13 #include "config.h"
14 #include "configcontainer.h"
15 #include "controller.h"
16 #include "dbexception.h"
17 #include "logger.h"
18 #include "matcherexception.h"
19 #include "rssfeed.h"
20 #include "scopemeasure.h"
21 #include "strprintf.h"
22 #include "utils.h"
23
24 namespace newsboat {
25
run_sql_impl(const std::string & query,int (* callback)(void *,int,char **,char **),void * callback_argument,bool do_throw)26 inline void Cache::run_sql_impl(const std::string& query,
27 int (*callback)(void*, int, char**, char**),
28 void* callback_argument,
29 bool do_throw)
30 {
31 LOG(Level::DEBUG, "running query: %s", query);
32 const int rc = sqlite3_exec(
33 db, query.c_str(), callback, callback_argument, nullptr);
34 if (rc != SQLITE_OK) {
35 const std::string message = "query \"%s\" failed: (%d) %s";
36 LOG(Level::CRITICAL, message, query, rc, sqlite3_errstr(rc));
37 if (do_throw) {
38 throw DbException(db);
39 }
40 }
41 }
42
run_sql(const std::string & query,int (* callback)(void *,int,char **,char **),void * callback_argument)43 void Cache::run_sql(const std::string& query,
44 int (*callback)(void*, int, char**, char**),
45 void* callback_argument)
46 {
47 run_sql_impl(query, callback, callback_argument, true);
48 }
49
run_sql_nothrow(const std::string & query,int (* callback)(void *,int,char **,char **),void * callback_argument)50 void Cache::run_sql_nothrow(const std::string& query,
51 int (*callback)(void*, int, char**, char**),
52 void* callback_argument)
53 {
54 run_sql_impl(query, callback, callback_argument, false);
55 }
56
57 struct CbHandler {
CbHandlernewsboat::CbHandler58 CbHandler()
59 : c(-1)
60 {
61 }
set_countnewsboat::CbHandler62 void set_count(int i)
63 {
64 c = i;
65 }
countnewsboat::CbHandler66 int count()
67 {
68 return c;
69 }
70
71 private:
72 int c;
73 };
74
75 struct HeaderValues {
76 time_t lastmodified;
77 std::string etag;
78 };
79
count_callback(void * handler,int argc,char ** argv,char **)80 static int count_callback(void* handler, int argc, char** argv,
81 char** /* azColName */)
82 {
83 CbHandler* cbh = static_cast<CbHandler*>(handler);
84
85 if (argc > 0) {
86 std::istringstream is(argv[0]);
87 int x;
88 is >> x;
89 cbh->set_count(x);
90 }
91
92 return 0;
93 }
94
single_string_callback(void * handler,int argc,char ** argv,char **)95 static int single_string_callback(void* handler,
96 int argc,
97 char** argv,
98 char** /* azColName */)
99 {
100 std::string* value = reinterpret_cast<std::string*>(handler);
101 if (argc > 0 && argv[0]) {
102 *value = argv[0];
103 }
104 return 0;
105 }
106
rssfeed_callback(void * myfeed,int argc,char ** argv,char **)107 static int rssfeed_callback(void* myfeed, int argc, char** argv,
108 char** /* azColName */)
109 {
110 std::shared_ptr<RssFeed>* feed =
111 static_cast<std::shared_ptr<RssFeed>*>(myfeed);
112 // normaly, this shouldn't happen, but we keep the assert()s here
113 // nevertheless
114 assert(argc == 3);
115 assert(argv[0] != nullptr);
116 assert(argv[1] != nullptr);
117 assert(argv[2] != nullptr);
118 (*feed)->set_title(argv[0]);
119 (*feed)->set_link(argv[1]);
120 (*feed)->set_rtl(strcmp(argv[2], "1") == 0);
121 LOG(Level::INFO,
122 "rssfeed_callback: title = %s link = %s is_rtl = %s",
123 argv[0],
124 argv[1],
125 argv[2]);
126 return 0;
127 }
128
lastmodified_callback(void * handler,int argc,char ** argv,char **)129 static int lastmodified_callback(void* handler,
130 int argc,
131 char** argv,
132 char** /* azColName */)
133 {
134 HeaderValues* result = static_cast<HeaderValues*>(handler);
135 assert(argc == 2);
136 assert(result != nullptr);
137 if (argv[0]) {
138 std::istringstream is(argv[0]);
139 is >> result->lastmodified;
140 } else {
141 result->lastmodified = 0;
142 }
143 if (argv[1]) {
144 result->etag = argv[1];
145 } else {
146 result->etag = "";
147 }
148 LOG(Level::INFO,
149 "lastmodified_callback: lastmodified = %" PRId64 " etag = %s",
150 // On GCC, `time_t` is `long int`, which is at least 32 bits long
151 // according to the spec. On x86_64, it's actually 64 bits. Thus,
152 // casting to int64_t is either a no-op, or an up-cast which are always
153 // safe.
154 static_cast<int64_t>(result->lastmodified),
155 result->etag);
156 return 0;
157 }
158
vectorofstring_callback(void * vp,int argc,char ** argv,char **)159 static int vectorofstring_callback(void* vp, int argc, char** argv,
160 char** /* azColName */)
161 {
162 std::vector<std::string>* vectorptr =
163 static_cast<std::vector<std::string>*>(vp);
164 assert(argc == 1);
165 assert(argv[0] != nullptr);
166 vectorptr->push_back(std::string(argv[0]));
167 LOG(Level::INFO, "vectorofstring_callback: element = %s", argv[0]);
168 return 0;
169 }
170
rssitem_callback(void * myfeed,int argc,char ** argv,char **)171 static int rssitem_callback(void* myfeed, int argc, char** argv,
172 char** /* azColName */)
173 {
174 std::shared_ptr<RssFeed>* feed =
175 static_cast<std::shared_ptr<RssFeed>*>(myfeed);
176 assert(argc == 13);
177 std::shared_ptr<RssItem> item(new RssItem(nullptr));
178 item->set_guid(argv[0]);
179 item->set_title(argv[1]);
180 item->set_author(argv[2]);
181 item->set_link(argv[3]);
182
183 std::istringstream is(argv[4]);
184 time_t t;
185 is >> t;
186 item->set_pubDate(t);
187
188 item->set_size(utils::to_u(argv[5]));
189 item->set_unread((std::string("1") == argv[6]));
190
191 item->set_feedurl(argv[7]);
192
193 item->set_enclosure_url(argv[8] ? argv[8] : "");
194 item->set_enclosure_type(argv[9] ? argv[9] : "");
195 item->set_enqueued((std::string("1") == (argv[10] ? argv[10] : "")));
196 item->set_flags(argv[11] ? argv[11] : "");
197 item->set_base(argv[12] ? argv[12] : "");
198
199 //(*feed)->items().push_back(item);
200 (*feed)->add_item(item);
201 return 0;
202 }
203
fill_content_callback(void * myfeed,int argc,char ** argv,char **)204 static int fill_content_callback(void* myfeed,
205 int argc,
206 char** argv,
207 char** /* azColName */)
208 {
209 RssFeed* feed = static_cast<RssFeed*>(myfeed);
210 assert(argc == 3);
211 if (argv[0]) {
212 std::shared_ptr<RssItem> item =
213 feed->get_item_by_guid_unlocked(argv[0]);
214 item->set_description(argv[1] ? argv[1] : "", argv[2] ? argv[2] : "");
215 }
216 return 0;
217 }
218
search_item_callback(void * myfeed,int argc,char ** argv,char **)219 static int search_item_callback(void* myfeed,
220 int argc,
221 char** argv,
222 char** /* azColName */)
223 {
224 std::vector<std::shared_ptr<RssItem>>* items =
225 static_cast<std::vector<std::shared_ptr<RssItem>>*>(myfeed);
226 assert(argc == 13);
227 std::shared_ptr<RssItem> item(new RssItem(nullptr));
228 item->set_guid(argv[0]);
229 item->set_title(argv[1]);
230 item->set_author(argv[2]);
231 item->set_link(argv[3]);
232
233 std::istringstream is(argv[4]);
234 time_t t;
235 is >> t;
236 item->set_pubDate(t);
237
238 item->set_size(utils::to_u(argv[5]));
239 item->set_unread((std::string("1") == argv[6]));
240 item->set_feedurl(argv[7]);
241
242 item->set_enclosure_url(argv[8] ? argv[8] : "");
243 item->set_enclosure_type(argv[9] ? argv[9] : "");
244 item->set_enqueued((std::string("1") == argv[10]));
245 item->set_flags(argv[11] ? argv[11] : "");
246 item->set_base(argv[12] ? argv[12] : "");
247
248 items->push_back(item);
249 return 0;
250 }
251
guid_callback(void * myguids,int argc,char ** argv,char **)252 static int guid_callback(void* myguids, int argc, char** argv,
253 char** /* azColName */)
254 {
255 auto* guids = static_cast<std::unordered_set<std::string>*>(myguids);
256 assert(argc == 1);
257 guids->emplace(argv[0]);
258 return 0;
259 }
260
Cache(const std::string & cachefile,ConfigContainer * c)261 Cache::Cache(const std::string& cachefile, ConfigContainer* c)
262 : db(0)
263 , cfg(c)
264 {
265 const int error = sqlite3_open(cachefile.c_str(), &db);
266 if (error != SQLITE_OK) {
267 LOG(Level::ERROR,
268 "couldn't sqlite3_open(%s): error = %d",
269 cachefile,
270 error);
271 throw DbException(db);
272 }
273
274 populate_tables();
275 set_pragmas();
276
277 clean_old_articles();
278
279 // we need to manually lock all DB operations because SQLite has no
280 // explicit support for multithreading.
281 }
282
~Cache()283 Cache::~Cache()
284 {
285 sqlite3_close(db);
286 }
287
set_pragmas()288 void Cache::set_pragmas()
289 {
290 // first, we need to swithc off synchronous writing as it's slow as hell
291 run_sql("PRAGMA synchronous = OFF;");
292
293 // then we disable case-sensitive matching for the LIKE operator in
294 // SQLite, for search operations
295 run_sql("PRAGMA case_sensitive_like=OFF;");
296 }
297
298 static const schema_patches schemaPatches{
299 { {2, 10},
300 {
301 "CREATE TABLE rss_feed ( "
302 " rssurl VARCHAR(1024) PRIMARY KEY NOT NULL, "
303 " url VARCHAR(1024) NOT NULL, "
304 " title VARCHAR(1024) NOT NULL ); ",
305
306 "CREATE TABLE rss_item ( "
307 " id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, "
308 " guid VARCHAR(64) NOT NULL, "
309 " title VARCHAR(1024) NOT NULL, "
310 " author VARCHAR(1024) NOT NULL, "
311 " url VARCHAR(1024) NOT NULL, "
312 " feedurl VARCHAR(1024) NOT NULL, "
313 " pubDate INTEGER NOT NULL, "
314 " content VARCHAR(65535) NOT NULL,"
315 " unread INTEGER(1) NOT NULL );",
316
317 "CREATE TABLE google_replay ( "
318 " id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, "
319 " guid VARCHAR(64) NOT NULL, "
320 " state INTEGER NOT NULL, "
321 " ts INTEGER NOT NULL );",
322
323 /* we need to do these ALTER TABLE statements because we
324 * need to store additional data for the podcast support
325 */
326 "ALTER TABLE rss_item ADD enclosure_url VARCHAR(1024);",
327
328 "ALTER TABLE rss_item ADD enclosure_type "
329 "VARCHAR(1024);",
330
331 "ALTER TABLE rss_item ADD enqueued INTEGER(1) NOT NULL "
332 "DEFAULT 0;",
333
334 "ALTER TABLE rss_item ADD flags VARCHAR(52);",
335
336 /* create indexes to speed up certain queries */
337 "CREATE INDEX IF NOT EXISTS idx_rssurl ON "
338 "rss_feed(rssurl);",
339
340 "CREATE INDEX IF NOT EXISTS idx_guid ON "
341 "rss_item(guid);",
342
343 "CREATE INDEX IF NOT EXISTS idx_feedurl ON "
344 "rss_item(feedurl);",
345 /* we analyse the indices for better statistics */
346 "ANALYZE;",
347
348 "ALTER TABLE rss_feed ADD lastmodified INTEGER(11) NOT "
349 "NULL "
350 "DEFAULT 0;",
351
352 "CREATE INDEX IF NOT EXISTS idx_lastmodified ON "
353 "rss_feed(lastmodified);",
354
355 "ALTER TABLE rss_item ADD deleted INTEGER(1) NOT NULL "
356 "DEFAULT "
357 "0;",
358
359 "CREATE INDEX IF NOT EXISTS idx_deleted ON "
360 "rss_item(deleted);",
361
362 "ALTER TABLE rss_feed ADD is_rtl INTEGER(1) NOT NULL "
363 "DEFAULT "
364 "0;",
365
366 "ALTER TABLE rss_feed ADD etag VARCHAR(128) NOT NULL "
367 "DEFAULT "
368 "\"\";",
369
370 "ALTER TABLE rss_item ADD base VARCHAR(128) NOT NULL "
371 "DEFAULT "
372 "\"\";",
373 }
374 },
375 { {2, 11},
376 {
377 "CREATE TABLE metadata ( "
378 " db_schema_version_major INTEGER NOT NULL, "
379 " db_schema_version_minor INTEGER NOT NULL );",
380
381 "INSERT INTO metadata VALUES ( 2, 11 );"
382 }
383 },
384 { {2, 22},
385 {
386 "UPDATE metadata SET db_schema_version_major = 2, db_schema_version_minor = 22;",
387
388 "ALTER TABLE rss_item ADD COLUMN content_mime_type VARCHAR(255) NOT NULL DEFAULT \"\";"
389 }
390 }
391
392 // Note: schema changes should use the version number of the release that introduced them.
393 };
394
populate_tables()395 void Cache::populate_tables()
396 {
397 const SchemaVersion version = get_schema_version();
398 LOG(Level::INFO,
399 "Cache::populate_tables: DB schema version %u.%u",
400 version.major,
401 version.minor);
402
403 if (version.major > utils::newsboat_version_major()) {
404 const std::string msg =
405 "Database schema isn't supported because it's too new";
406 LOG(Level::ERROR, msg);
407 throw std::runtime_error(msg);
408 }
409
410 auto patches_it = schemaPatches.cbegin();
411
412 // rewind to the first patch that should be applied
413 while (patches_it != schemaPatches.cend() &&
414 patches_it->first <= version) {
415 ++patches_it;
416 }
417
418 for (; patches_it != schemaPatches.cend(); ++patches_it) {
419 const SchemaVersion patch_version = patches_it->first;
420 LOG(Level::INFO,
421 "Cache::populate_tables: applying DB schema patches "
422 "for version %u.%u",
423 patch_version.major,
424 patch_version.minor);
425 for (const auto& query : patches_it->second) {
426 run_sql_nothrow(query);
427 }
428 }
429 }
430
fetch_lastmodified(const std::string & feedurl,time_t & t,std::string & etag)431 void Cache::fetch_lastmodified(const std::string& feedurl,
432 time_t& t,
433 std::string& etag)
434 {
435 std::lock_guard<std::mutex> lock(mtx);
436 std::string query = prepare_query(
437 "SELECT lastmodified, etag FROM rss_feed WHERE rssurl = '%q';",
438 feedurl);
439 HeaderValues result = {0, ""};
440 run_sql(query, lastmodified_callback, &result);
441 t = result.lastmodified;
442 etag = result.etag;
443 LOG(Level::DEBUG,
444 "Cache::fetch_lastmodified: t = %" PRId64 " etag = %s",
445 // On GCC, `time_t` is `long int`, which is at least 32 bits. On
446 // x86_64, it's 64 bits. Thus, this cast is either a no-op, or an
447 // up-cast which is always safe.
448 static_cast<int64_t>(t),
449 etag);
450 }
451
update_lastmodified(const std::string & feedurl,time_t t,const std::string & etag)452 void Cache::update_lastmodified(const std::string& feedurl,
453 time_t t,
454 const std::string& etag)
455 {
456 if (t == 0 && etag.length() == 0) {
457 LOG(Level::INFO,
458 "Cache::update_lastmodified: both time and etag are "
459 "empty, not updating anything");
460 return;
461 }
462 std::lock_guard<std::mutex> lock(mtx);
463 std::string query = "UPDATE rss_feed SET ";
464 if (t > 0) {
465 query.append(prepare_query("lastmodified = '%d'", t));
466 }
467 if (etag.length() > 0) {
468 query.append(prepare_query("%c etag = %s",
469 (t > 0 ? ',' : ' '),
470 prepare_query("'%q'", etag)));
471 }
472 query.append(" WHERE rssurl = ");
473 query.append(prepare_query("'%q'", feedurl));
474 run_sql_nothrow(query);
475 }
476
mark_item_deleted(const std::string & guid,bool b)477 void Cache::mark_item_deleted(const std::string& guid, bool b)
478 {
479 std::lock_guard<std::mutex> lock(mtx);
480 std::string query = prepare_query(
481 "UPDATE rss_item SET deleted = %u WHERE guid = '%q'",
482 b ? 1 : 0,
483 guid);
484 run_sql_nothrow(query);
485 }
486
mark_feed_items_deleted(const std::string & feedurl)487 void Cache::mark_feed_items_deleted(const std::string& feedurl)
488 {
489 std::lock_guard<std::mutex> lock(mtx);
490 std::string query = prepare_query(
491 "UPDATE rss_item SET deleted = 1 WHERE feedurl = '%s';",
492 feedurl);
493 run_sql_nothrow(query);
494 }
495
496 // this function writes an RssFeed including all RssItems to the database
externalize_rssfeed(std::shared_ptr<RssFeed> feed,bool reset_unread)497 void Cache::externalize_rssfeed(std::shared_ptr<RssFeed> feed,
498 bool reset_unread)
499 {
500 ScopeMeasure m1("Cache::externalize_feed");
501 if (feed->is_query_feed()) {
502 return;
503 }
504
505 std::lock_guard<std::mutex> lock(mtx);
506 std::lock_guard<std::mutex> feedlock(feed->item_mutex);
507 // scope_transaction dbtrans(db);
508
509 CbHandler count_cbh;
510 auto query = prepare_query(
511 "SELECT count(*) FROM rss_feed WHERE rssurl = '%q';",
512 feed->rssurl());
513 run_sql(query, count_callback, &count_cbh);
514
515 const int count = count_cbh.count();
516 LOG(Level::DEBUG,
517 "Cache::externalize_rss_feed: rss_feeds with rssurl = '%s': "
518 "found "
519 "%d",
520 feed->rssurl(),
521 count);
522 if (count > 0) {
523 const std::string updatequery = prepare_query(
524 "UPDATE rss_feed "
525 "SET title = '%q', url = '%q', is_rtl = %u "
526 "WHERE rssurl = '%q';",
527 feed->title_raw(),
528 feed->link(),
529 feed->is_rtl() ? 1 : 0,
530 feed->rssurl());
531 run_sql(updatequery);
532 } else {
533 const std::string insertquery = prepare_query(
534 "INSERT INTO rss_feed (rssurl, url, title, is_rtl) "
535 "VALUES ( '%q', '%q', '%q', %u );",
536 feed->rssurl(),
537 feed->link(),
538 feed->title_raw(),
539 feed->is_rtl() ? 1 : 0);
540 run_sql(insertquery);
541 }
542
543 const unsigned int max_items = cfg->get_configvalue_as_int("max-items");
544
545 LOG(Level::INFO,
546 "Cache::externalize_feed: max_items = %u "
547 "feed.total_item_count() = "
548 "%u",
549 max_items,
550 feed->total_item_count());
551
552 if (max_items > 0 && feed->total_item_count() > max_items) {
553 feed->erase_items(
554 feed->items().begin() + max_items, feed->items().end());
555 }
556
557 const unsigned int days = cfg->get_configvalue_as_int("keep-articles-days");
558 const time_t old_time = time(nullptr) - days * 24 * 60 * 60;
559
560 // the reverse iterator is there for the sorting foo below (think about
561 // it)
562 for (auto it = feed->items().rbegin(); it != feed->items().rend();
563 ++it) {
564 if (days == 0 || (*it)->pubDate_timestamp() >= old_time)
565 update_rssitem_unlocked(
566 *it, feed->rssurl(), reset_unread);
567 }
568 }
569
570 // this function reads an RssFeed including all of its RssItems.
571 // the feed parameter needs to have the rssurl member set.
internalize_rssfeed(std::string rssurl,RssIgnores * ign)572 std::shared_ptr<RssFeed> Cache::internalize_rssfeed(std::string rssurl,
573 RssIgnores* ign)
574 {
575 ScopeMeasure m1("Cache::internalize_rssfeed");
576
577 std::shared_ptr<RssFeed> feed(new RssFeed(this));
578 feed->set_rssurl(rssurl);
579
580 if (utils::is_query_url(rssurl)) {
581 return feed;
582 }
583
584 std::lock_guard<std::mutex> lock(mtx);
585 std::lock_guard<std::mutex> feedlock(feed->item_mutex);
586
587 /* first, we check whether the feed is there at all */
588 std::string query = prepare_query(
589 "SELECT count(*) FROM rss_feed WHERE rssurl = '%q';", rssurl);
590 CbHandler count_cbh;
591 run_sql(query, count_callback, &count_cbh);
592
593 if (count_cbh.count() == 0) {
594 return feed;
595 }
596
597 /* then we first read the feed from the database */
598 query = prepare_query(
599 "SELECT title, url, is_rtl FROM rss_feed WHERE rssurl = '%q';",
600 rssurl);
601 run_sql(query, rssfeed_callback, &feed);
602
603 /* ...and then the associated items */
604 query = prepare_query(
605 "SELECT guid, title, author, url, pubDate, length(content), "
606 "unread, "
607 "feedurl, enclosure_url, enclosure_type, enqueued, flags, base "
608 "FROM rss_item "
609 "WHERE feedurl = '%q' "
610 "AND deleted = 0 "
611 "ORDER BY pubDate DESC, id DESC;",
612 rssurl);
613 run_sql(query, rssitem_callback, &feed);
614
615 auto feed_weak_ptr = std::weak_ptr<RssFeed>(feed);
616 for (const auto& item : feed->items()) {
617 item->set_cache(this);
618 item->set_feedptr(feed_weak_ptr);
619 item->set_feedurl(feed->rssurl());
620 }
621
622 if (ign != nullptr) {
623 auto& items = feed->items();
624 items.erase(
625 std::remove_if(
626 items.begin(),
627 items.end(),
628 [&](std::shared_ptr<RssItem> item) -> bool {
629 try
630 {
631 return ign->matches(item.get());
632 } catch (const MatcherException& ex)
633 {
634 LOG(Level::DEBUG,
635 "oops, Matcher exception: %s",
636 ex.what());
637 return false;
638 }
639 }),
640 items.end());
641 }
642
643 const unsigned int max_items = cfg->get_configvalue_as_int("max-items");
644
645 if (max_items > 0 && feed->total_item_count() > max_items) {
646 std::vector<std::shared_ptr<RssItem>> flagged_items;
647 for (unsigned int j = max_items; j < feed->total_item_count();
648 ++j) {
649 if (feed->items()[j]->flags().length() == 0) {
650 delete_item(feed->items()[j]);
651 } else {
652 flagged_items.push_back(feed->items()[j]);
653 }
654 }
655
656 auto it = feed->items().begin() + max_items;
657 feed->erase_items(
658 it, feed->items().end()); // delete old entries
659
660 // if some flagged articles were saved, append them
661 feed->add_items(flagged_items);
662 }
663 feed->sort_unlocked(cfg->get_article_sort_strategy());
664 return feed;
665 }
666
search_for_items(const std::string & querystr,const std::string & feedurl)667 std::vector<std::shared_ptr<RssItem>> Cache::search_for_items(
668 const std::string& querystr, const std::string& feedurl)
669 {
670 assert(!utils::is_query_url(feedurl));
671 std::string query;
672 std::vector<std::shared_ptr<RssItem>> items;
673
674 std::lock_guard<std::mutex> lock(mtx);
675 if (feedurl.length() > 0) {
676 query = prepare_query(
677 "SELECT guid, title, author, url, pubDate, "
678 "length(content), "
679 "unread, feedurl, enclosure_url, enclosure_type, "
680 "enqueued, flags, base "
681 "FROM rss_item "
682 "WHERE (title LIKE '%%%q%%' OR content LIKE '%%%q%%') "
683 "AND feedurl = '%q' "
684 "AND deleted = 0 "
685 "ORDER BY pubDate DESC, id DESC;",
686 querystr,
687 querystr,
688 feedurl);
689 } else {
690 query = prepare_query(
691 "SELECT guid, title, author, url, pubDate, "
692 "length(content), "
693 "unread, feedurl, enclosure_url, enclosure_type, "
694 "enqueued, flags, base "
695 "FROM rss_item "
696 "WHERE (title LIKE '%%%q%%' OR content LIKE '%%%q%%') "
697 "AND deleted = 0 "
698 "ORDER BY pubDate DESC, id DESC;",
699 querystr,
700 querystr);
701 }
702
703 run_sql(query, search_item_callback, &items);
704 for (const auto& item : items) {
705 item->set_cache(this);
706 }
707
708 return items;
709 }
710
search_in_items(const std::string & querystr,const std::unordered_set<std::string> & guids)711 std::unordered_set<std::string> Cache::search_in_items(
712 const std::string& querystr,
713 const std::unordered_set<std::string>& guids)
714 {
715 std::string list = "(";
716
717 for (const auto& guid : guids) {
718 list.append(prepare_query("%Q, ", guid));
719 }
720 list.append("'')");
721
722 std::string query = prepare_query(
723 "SELECT guid "
724 "FROM rss_item "
725 "WHERE (title LIKE '%%%q%%' OR content LIKE '%%%q%%') "
726 "AND guid IN %s;",
727 querystr,
728 querystr,
729 list);
730
731 std::unordered_set<std::string> items;
732 std::lock_guard<std::mutex> lock(mtx);
733 run_sql(query, guid_callback, &items);
734 return items;
735 }
736
delete_item(const std::shared_ptr<RssItem> & item)737 void Cache::delete_item(const std::shared_ptr<RssItem>& item)
738 {
739 const std::string query = prepare_query(
740 "DELETE FROM rss_item WHERE guid = '%q';", item->guid());
741 run_sql(query);
742 }
743
do_vacuum()744 void Cache::do_vacuum()
745 {
746 std::lock_guard<std::mutex> lock(mtx);
747 run_sql("VACUUM;");
748 }
749
cleanup_cache(std::vector<std::shared_ptr<RssFeed>> feeds,bool always_clean)750 void Cache::cleanup_cache(std::vector<std::shared_ptr<RssFeed>> feeds,
751 bool always_clean)
752 {
753 // we don't use the std::lock_guard<> here... see comments below
754 mtx.lock();
755
756 /*
757 * cache cleanup means that all entries in both the RssFeed and
758 * RssItem tables that are associated with an RSS feed URL that is not
759 * contained in the current configuration are deleted. Such entries are
760 * the result when a user deletes one or more lines in the urls
761 * configuration file. We then assume that the user isn't interested
762 * anymore in reading this feed, and delete all associated entries
763 * because they would be non-accessible.
764 *
765 * The behaviour whether the cleanup is done or not is configurable via
766 * the configuration file.
767 */
768 if (always_clean || cfg->get_configvalue_as_bool("cleanup-on-quit")) {
769 LOG(Level::DEBUG, "Cache::cleanup_cache: cleaning up cache...");
770 std::string list = "(";
771
772 for (const auto& feed : feeds) {
773 std::string name =
774 prepare_query("'%q'", feed->rssurl());
775 list.append(name);
776 list.append(", ");
777 }
778 list.append("'')");
779
780 std::string cleanup_rss_feeds_statement(
781 "DELETE FROM rss_feed WHERE rssurl NOT IN ");
782 cleanup_rss_feeds_statement.append(list);
783 cleanup_rss_feeds_statement.push_back(';');
784
785 std::string cleanup_rss_items_statement(
786 "DELETE FROM rss_item WHERE feedurl NOT IN ");
787 cleanup_rss_items_statement.append(list);
788 cleanup_rss_items_statement.push_back(';');
789
790 std::string cleanup_read_items_statement(
791 "UPDATE rss_item SET deleted = 1 WHERE unread = 0");
792
793 run_sql(cleanup_rss_feeds_statement);
794 run_sql(cleanup_rss_items_statement);
795 if (cfg->get_configvalue_as_bool(
796 "delete-read-articles-on-quit")) {
797 run_sql(cleanup_read_items_statement);
798 }
799
800 // WARNING: THE MISSING UNLOCK OPERATION IS MISSING FOR A
801 // PURPOSE! It's missing so that no database operation can occur
802 // after the cache cleanup! mtx->unlock();
803 } else {
804 LOG(Level::DEBUG,
805 "Cache::cleanup_cache: NOT cleaning up cache...");
806 }
807 }
808
update_rssitem_unlocked(std::shared_ptr<RssItem> item,const std::string & feedurl,bool reset_unread)809 void Cache::update_rssitem_unlocked(std::shared_ptr<RssItem> item,
810 const std::string& feedurl,
811 bool reset_unread)
812 {
813 std::string query = prepare_query(
814 "SELECT count(*) FROM rss_item WHERE guid = '%q';",
815 item->guid());
816 CbHandler count_cbh;
817 run_sql(query, count_callback, &count_cbh);
818 const auto description = item->description();
819 if (count_cbh.count() > 0) {
820 if (reset_unread) {
821 std::string content;
822 query = prepare_query(
823 "SELECT content FROM rss_item WHERE guid = "
824 "'%q';",
825 item->guid());
826 run_sql(query, single_string_callback, &content);
827 if (content != description.text) {
828 LOG(Level::DEBUG,
829 "Cache::update_rssitem_unlocked: '%s' "
830 "is "
831 "different from '%s'",
832 content,
833 description.text);
834 query = prepare_query(
835 "UPDATE rss_item SET unread = 1 WHERE "
836 "guid = '%q';",
837 item->guid());
838 run_sql(query);
839 }
840 }
841 std::string update;
842 if (item->override_unread()) {
843 update = prepare_query(
844 "UPDATE rss_item "
845 "SET title = '%q', author = '%q', url = '%q', "
846 "feedurl = '%q', "
847 "content = '%q', content_mime_type = '%q', enclosure_url = '%q', "
848 "enclosure_type = '%q', base = '%q', unread = "
849 "'%d' "
850 "WHERE guid = '%q'",
851 item->title(),
852 item->author(),
853 item->link(),
854 feedurl,
855 description.text,
856 description.mime,
857 item->enclosure_url(),
858 item->enclosure_type(),
859 item->get_base(),
860 (item->unread() ? 1 : 0),
861 item->guid());
862 } else {
863 update = prepare_query(
864 "UPDATE rss_item "
865 "SET title = '%q', author = '%q', url = '%q', "
866 "feedurl = '%q', "
867 "content = '%q', content_mime_type = '%q', enclosure_url = '%q', "
868 "enclosure_type = '%q', base = '%q' "
869 "WHERE guid = '%q'",
870 item->title(),
871 item->author(),
872 item->link(),
873 feedurl,
874 description.text,
875 description.mime,
876 item->enclosure_url(),
877 item->enclosure_type(),
878 item->get_base(),
879 item->guid());
880 }
881 run_sql(update);
882 } else {
883 std::string insert = prepare_query(
884 "INSERT INTO rss_item (guid, title, author, url, "
885 "feedurl, "
886 "pubDate, content, content_mime_type, unread, enclosure_url, "
887 "enclosure_type, enqueued, base) "
888 "VALUES "
889 "('%q','%q','%q','%q','%q','%u','%q','%q','%d','%q','%q',%d,"
890 " "
891 "'%q')",
892 item->guid(),
893 item->title(),
894 item->author(),
895 item->link(),
896 feedurl,
897 item->pubDate_timestamp(),
898 description.text,
899 description.mime,
900 (item->unread() ? 1 : 0),
901 item->enclosure_url(),
902 item->enclosure_type(),
903 item->enqueued() ? 1 : 0,
904 item->get_base());
905 run_sql(insert);
906 }
907 }
908
mark_all_read(std::shared_ptr<RssFeed> feed)909 void Cache::mark_all_read(std::shared_ptr<RssFeed> feed)
910 {
911 std::lock_guard<std::mutex> lock(mtx);
912 std::lock_guard<std::mutex> itemlock(feed->item_mutex);
913 std::string query =
914 "UPDATE rss_item SET unread = '0' WHERE unread != '0' AND guid "
915 "IN (";
916
917 for (const auto& item : feed->items()) {
918 query.append(prepare_query("'%q',", item->guid()));
919 }
920 query.append("'');");
921
922 run_sql(query);
923 }
924
925 /* this function marks all RssItems (optionally of a certain feed url) as read
926 */
mark_all_read(const std::string & feedurl)927 void Cache::mark_all_read(const std::string& feedurl)
928 {
929 std::lock_guard<std::mutex> lock(mtx);
930
931 std::string query;
932 if (feedurl.length() > 0) {
933 query = prepare_query(
934 "UPDATE rss_item "
935 "SET unread = '0' "
936 "WHERE unread != '0' "
937 "AND feedurl = '%q';",
938 feedurl);
939 } else {
940 query = prepare_query(
941 "UPDATE rss_item "
942 "SET unread = '0' "
943 "WHERE unread != '0';");
944 }
945 run_sql(query);
946 }
947
update_rssitem_unread_and_enqueued(RssItem * item,const std::string &)948 void Cache::update_rssitem_unread_and_enqueued(RssItem* item,
949 const std::string& /* feedurl */)
950 {
951 std::lock_guard<std::mutex> lock(mtx);
952
953 const auto query = prepare_query(
954 "UPDATE rss_item "
955 "SET unread = '%d', enqueued = '%d' "
956 "WHERE guid = '%q'",
957 item->unread() ? 1 : 0,
958 item->enqueued() ? 1 : 0,
959 item->guid());
960 run_sql(query);
961 }
962
963 /* this function updates the unread and enqueued flags */
update_rssitem_unread_and_enqueued(std::shared_ptr<RssItem> item,const std::string & feedurl)964 void Cache::update_rssitem_unread_and_enqueued(std::shared_ptr<RssItem> item,
965 const std::string& feedurl)
966 {
967 update_rssitem_unread_and_enqueued(item.get(), feedurl);
968 }
969
970 /* helper function to wrap std::string around the sqlite3_*mprintf function */
prepare_query(const std::string & format)971 std::string Cache::prepare_query(const std::string& format)
972 {
973 return format;
974 }
975
976 template<typename... Args>
prepare_query(const std::string & format,const std::string & argument,Args...args)977 std::string Cache::prepare_query(const std::string& format,
978 const std::string& argument,
979 Args... args)
980 {
981 return prepare_query(format, argument.c_str(), args...);
982 }
983
984 template<typename T, typename... Args>
prepare_query(const std::string & format,const T & argument,Args...args)985 std::string Cache::prepare_query(const std::string& format, const T& argument,
986 Args... args)
987 {
988 std::string local_format, remaining_format;
989 std::tie(local_format, remaining_format) =
990 strprintf::split_format(format);
991
992 char* piece = sqlite3_mprintf(local_format.c_str(), argument);
993 std::string result;
994 if (piece) {
995 result = piece;
996 sqlite3_free(piece);
997 }
998
999 return result + prepare_query(remaining_format, args...);
1000 }
1001
update_rssitem_flags(RssItem * item)1002 void Cache::update_rssitem_flags(RssItem* item)
1003 {
1004 std::lock_guard<std::mutex> lock(mtx);
1005
1006 const std::string update = prepare_query(
1007 "UPDATE rss_item SET flags = '%q' WHERE guid = '%q';",
1008 item->flags(),
1009 item->guid());
1010
1011 run_sql(update);
1012 }
1013
remove_old_deleted_items(RssFeed * feed)1014 void Cache::remove_old_deleted_items(RssFeed* feed)
1015 {
1016 ScopeMeasure m1("Cache::remove_old_deleted_items");
1017
1018 std::lock_guard<std::mutex> cache_lock(mtx);
1019 std::lock_guard<std::mutex> feed_lock(feed->item_mutex);
1020
1021 std::vector<std::string> guids;
1022 for (const auto& item : feed->items()) {
1023 guids.push_back(item->guid());
1024 }
1025
1026 if (guids.empty()) {
1027 LOG(Level::DEBUG,
1028 "Cache::remove_old_deleted_items: not cleaning up "
1029 "anything because last reload brought no new items "
1030 "(detected no changes)");
1031 return;
1032 }
1033 std::string guidset = "(";
1034 for (const auto& guid : guids) {
1035 guidset.append(prepare_query("'%q', ", guid));
1036 }
1037 guidset.append("'')");
1038 const std::string query = prepare_query(
1039 "DELETE FROM rss_item "
1040 "WHERE feedurl = '%q' "
1041 "AND deleted = 1 "
1042 "AND guid NOT IN %s;",
1043 feed->rssurl(),
1044 guidset);
1045 run_sql(query);
1046 }
1047
mark_items_read_by_guid(const std::vector<std::string> & guids)1048 void Cache::mark_items_read_by_guid(const std::vector<std::string>& guids)
1049 {
1050 ScopeMeasure m1("Cache::mark_items_read_by_guid");
1051 std::string guidset("(");
1052 for (const auto& guid : guids) {
1053 guidset.append(prepare_query("'%q', ", guid));
1054 }
1055 guidset.append("'')");
1056
1057 const std::string updatequery = prepare_query(
1058 "UPDATE rss_item SET unread = 0 WHERE unread = 1 AND guid IN "
1059 "%s;",
1060 guidset);
1061
1062 std::lock_guard<std::mutex> lock(mtx);
1063 run_sql(updatequery);
1064 }
1065
get_read_item_guids()1066 std::vector<std::string> Cache::get_read_item_guids()
1067 {
1068 std::vector<std::string> guids;
1069 const std::string query = "SELECT guid FROM rss_item WHERE unread = 0;";
1070
1071 std::lock_guard<std::mutex> lock(mtx);
1072 run_sql(query, vectorofstring_callback, &guids);
1073
1074 return guids;
1075 }
1076
clean_old_articles()1077 void Cache::clean_old_articles()
1078 {
1079 std::lock_guard<std::mutex> lock(mtx);
1080
1081 const unsigned int days = cfg->get_configvalue_as_int("keep-articles-days");
1082 if (days > 0) {
1083 const time_t old_date = time(nullptr) - days * 24 * 60 * 60;
1084
1085 const std::string query(prepare_query(
1086 "DELETE FROM rss_item WHERE pubDate < %d", old_date));
1087 LOG(Level::DEBUG,
1088 "Cache::clean_old_articles: about to delete articles "
1089 "with a pubDate older than %" PRId64,
1090 // On GCC, `time_t` is `long int`, which is at least 32 bits long
1091 // according to the spec. On x86_64, it's actually 64 bits. Thus,
1092 // casting to int64_t is either a no-op, or an up-cast which are
1093 // always safe.
1094 static_cast<int64_t>(old_date));
1095 run_sql(query);
1096 } else {
1097 LOG(Level::DEBUG,
1098 "Cache::clean_old_articles, days == 0, not cleaning up "
1099 "anything");
1100 }
1101 }
1102
fetch_descriptions(RssFeed * feed)1103 void Cache::fetch_descriptions(RssFeed* feed)
1104 {
1105 std::vector<std::string> guids;
1106 for (const auto& item : feed->items()) {
1107 guids.push_back(prepare_query("'%q'", item->guid()));
1108 }
1109 const std::string in_clause = utils::join(guids, ", ");
1110
1111 const std::string query = prepare_query(
1112 "SELECT guid, content, content_mime_type FROM rss_item WHERE guid IN (%s);",
1113 in_clause);
1114
1115 run_sql(query, fill_content_callback, feed);
1116 }
1117
fetch_description(const RssItem & item)1118 std::string Cache::fetch_description(const RssItem& item)
1119 {
1120 const std::string in_clause = prepare_query("'%q'", item.guid());
1121
1122 const std::string query = prepare_query(
1123 "SELECT content FROM rss_item WHERE guid = %s;",
1124 in_clause);
1125
1126 std::string description;
1127 auto store_description = [](void* d, int, char** argv, char**) -> int {
1128 auto& desc = *static_cast<std::string*>(d);
1129 desc = argv[0] ? argv[0] : "";
1130 return 0;
1131 };
1132
1133 run_sql(query, store_description, &description);
1134 return description;
1135 }
1136
get_schema_version()1137 SchemaVersion Cache::get_schema_version()
1138 {
1139 sqlite3_stmt* stmt{};
1140 SchemaVersion result;
1141
1142 int rc = sqlite3_prepare_v2(db,
1143 "SELECT db_schema_version_major, db_schema_version_minor "
1144 "FROM metadata",
1145 -1,
1146 &stmt,
1147 nullptr);
1148
1149 if (rc != SQLITE_OK) {
1150 // I'm pretty sure the query above is correct, and the only way
1151 // it can fail is when metadata table is not present in the DB.
1152 // That means we're dealing with an empty cache file, or one
1153 // that was created by an older version of Newsboat.
1154 result = unknown_version;
1155 } else {
1156 rc = sqlite3_step(stmt);
1157 if (rc != SQLITE_ROW) {
1158 // table is empty. Technically, this is impossible, but
1159 // is easy enough to fix - just re-create the db
1160 result = unknown_version;
1161 } else {
1162 // row is available, let's grab it!
1163 result.major = sqlite3_column_int(stmt, 0);
1164 result.minor = sqlite3_column_int(stmt, 1);
1165
1166 assert(sqlite3_step(stmt) == SQLITE_DONE);
1167 }
1168 }
1169
1170 sqlite3_finalize(stmt);
1171 return result;
1172 }
1173
1174 } // namespace newsboat
1175