1 /*
2 * Copyright (c) 2012-2018 Nikola Kolev <koue@chaosophia.net>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above
12 * copyright notice, this list of conditions and the following
13 * disclaimer in the documentation and/or other materials provided
14 * with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
19 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
20 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
22 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
28 *
29 */
30
31 #include <cez_fossil.h>
32 #include <curl/curl.h>
33 #include <curl/easy.h>
34 #include <sqlite3.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <sys/stat.h>
39 #include <unistd.h>
40
41 #include "rss.h"
42
43 #define RSSROLL_VERSION "rssroll/0.6.2"
44
45 int debug = 0;
46
47 /* rss database store */
48 Global g;
49
50 /* curl write function */
51 static size_t
write_data(void * ptr,size_t size,size_t nmemb,void * stream)52 write_data(void *ptr, size_t size, size_t nmemb, void *stream)
53 {
54 int written = fwrite(ptr, size, nmemb, (FILE *)stream);
55 return written;
56 }
57
58 /* add new item into the database */
59 void
add_feed(int chan_id,char * item_url,char * item_title,char * item_desc,time_t item_date)60 add_feed(int chan_id, char *item_url, char *item_title, char *item_desc,
61 time_t item_date)
62 {
63 dmsg(0, "%s: %s", __func__, item_url);
64 db_multi_exec("INSERT INTO feeds (chanid, modified, link, title, "
65 "description, pubdate) "
66 "VALUES (%d, 0, '%q', '%q', '%q', '%ld')",
67 chan_id, item_url, item_title, item_desc, item_date);
68 printf("New feed has been added %s.\n", item_url);
69 }
70
71 /* checks if the feed url appears into the database */
72 int
check_link(int chan_id,char * item_link,time_t item_pubdate)73 check_link(int chan_id, char *item_link, time_t item_pubdate)
74 {
75 int result = 0;
76 time_t date;
77
78 dmsg(0, "check_link");
79 result = db_int(0, "SELECT id FROM feeds WHERE pubdate = '%ld' "
80 "AND chanid = '%d' AND link = '%q'",
81 item_pubdate, chan_id, item_link);
82 if (result) {
83 dmsg(0, "record has been found.");
84 return (1); /* Don't do anything ;
85 If you want to update changed post do it here */
86 }
87 /* update last modified time of the channel */
88 db_multi_exec("UPDATE channels SET modified = '%ld' WHERE id = '%d'",
89 time(&date), chan_id);
90 /* call add_feed to add the item into the database */
91 return (0);
92 }
93
94 /* parse content of the rss */
95 void
parse_body(int chan_id,char * rssfile)96 parse_body(int chan_id, char *rssfile)
97 {
98 int i;
99 st_rss_t *rss = NULL;
100
101 dmsg(0,"parse_body.");
102
103 if ((rss = rss_open(rssfile)) == NULL) {
104 printf("rss id [%d] cannot be parsed.\n", chan_id);
105 return;
106 }
107 dmsg(0,"items - %d", rss->item_count);
108
109 /* check in reverse order, first feed has been added last to the rss */
110 for (i = (rss->item_count - 1); i > -1; i--) {
111 if(check_link(chan_id, rss->item[i].url, rss->item[i].date) == 0) {
112 add_feed(chan_id, rss->item[i].url, rss->item[i].title,
113 rss->item[i].desc, rss->item[i].date);
114 }
115 }
116 rss_close(rss);
117 }
118
119 /* fetch rss file */
120 void
fetch_channel(int chan_id,time_t chan_modified,const char * chan_link)121 fetch_channel(int chan_id, time_t chan_modified, const char *chan_link)
122 {
123 CURL *curl_handle;
124 struct curl_slist *if_chan_modified = NULL;
125 FILE *bodyfile;
126 char chan_last_modified_time[64], fn[]="/tmp/rssroll.tmp.XXXXXXXXXX";
127 long http_code = 0;
128 int fd;
129
130 dmsg(0,"%s: %d, %ld, %s", __func__, chan_id, chan_modified, chan_link);
131
132 strftime(chan_last_modified_time, sizeof(chan_last_modified_time),
133 "If-Modified-Since: %a, %d %b %Y %T %Z", localtime(&chan_modified));
134 curl_global_init(CURL_GLOBAL_ALL);
135 curl_handle = curl_easy_init();
136 if_chan_modified = curl_slist_append(if_chan_modified,
137 chan_last_modified_time);
138 curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, RSSROLL_VERSION);
139 curl_easy_setopt(curl_handle, CURLOPT_HTTPHEADER, if_chan_modified);
140 curl_easy_setopt(curl_handle, CURLOPT_URL, chan_link);
141 curl_easy_setopt(curl_handle, CURLOPT_NOPROGRESS, 1L);
142 curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, write_data);
143
144 if ((fd = mkstemp(fn)) == -1) {
145 fprintf(stderr, "%s: Cannot create temp file: %s\n", __func__,
146 chan_link);
147 goto cleanup;
148 }
149 if (fchmod(fd, 0600)) {
150 fprintf(stderr, "%s: Cannot set permission to temp file: %s\n",
151 __func__, chan_link);
152 goto done;
153 }
154
155 if ((bodyfile = fdopen(fd, "w")) == NULL) {
156 fprintf(stderr, "%s: Cannot open file for parsing: %s\n",
157 __func__, chan_link);
158 goto done;
159 }
160 curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, bodyfile);
161 curl_easy_perform(curl_handle);
162 curl_easy_getinfo(curl_handle, CURLINFO_RESPONSE_CODE, &http_code);
163 fclose(bodyfile);
164 if (http_code != 304) {
165 parse_body(chan_id, fn);
166 } else {
167 dmsg(0,"%s: id - %d, link - %s has not been changed.",
168 __func__, chan_id, chan_link);
169 }
170 done:
171 unlink(fn);
172 cleanup:
173 dmsg(0, "%s: cleanup", __func__);
174 unlink(fn);
175 curl_slist_free_all(if_chan_modified);
176 curl_easy_cleanup(curl_handle);
177 curl_global_cleanup();
178 }
179
180 static void
usage(void)181 usage(void)
182 {
183 extern char *__progname;
184 fprintf(stderr, "Usage: %s [-v] [-d database]\n", __progname);
185 exit(1);
186 }
187
188 int
main(int argc,char ** argv)189 main(int argc, char** argv)
190 {
191
192 int ch;
193 const char *dbname = "/var/db/rssroll.db";
194 Stmt q;
195
196 while ((ch = getopt(argc, argv, "d:v")) != -1) {
197 switch (ch) {
198 case 'd':
199 dbname = optarg;
200 break;
201 case 'v':
202 debug++;
203 break;
204 default:
205 usage();
206 }
207 }
208 if (argc != optind) {
209 usage();
210 }
211 if (access(dbname, R_OK)) {
212 fprintf(stderr, "Cannot read database file: %s!\n", dbname);
213 return (1);
214 }
215 if (sqlite3_open(dbname, &g.db) != SQLITE_OK) {
216 fprintf(stderr, "Cannot open database file: %s\n", dbname);
217 return (1);
218 }
219 dmsg(0,"database successfully loaded.");
220 db_prepare(&q, "SELECT id, modified, link FROM channels");
221 while (db_step(&q)==SQLITE_ROW) {
222 fetch_channel(db_column_int(&q, 0), (time_t)db_column_int64(&q, 1),
223 db_column_text(&q, 2));
224 }
225 db_finalize(&q);
226 sqlite3_close(g.db);
227 dmsg(0,"database successfully closed.");
228 return (0);
229 }
230