1 /*
2  * Copyright (c) 2012-2018 Nikola Kolev <koue@chaosophia.net>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  *    - Redistributions of source code must retain the above copyright
10  *      notice, this list of conditions and the following disclaimer.
11  *    - Redistributions in binary form must reproduce the above
12  *      copyright notice, this list of conditions and the following
13  *      disclaimer in the documentation and/or other materials provided
14  *      with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
19  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
20  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
22  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27  * POSSIBILITY OF SUCH DAMAGE.
28  *
29  */
30 
31 #include <cez_fossil.h>
32 #include <curl/curl.h>
33 #include <curl/easy.h>
34 #include <sqlite3.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <sys/stat.h>
39 #include <unistd.h>
40 
41 #include "rss.h"
42 
43 #define RSSROLL_VERSION	"rssroll/0.6.2"
44 
45 int debug = 0;
46 
47 /* rss database store	*/
48 Global g;
49 
50 /* curl write function */
51 static size_t
write_data(void * ptr,size_t size,size_t nmemb,void * stream)52 write_data(void *ptr, size_t size, size_t nmemb, void *stream)
53 {
54 	int written = fwrite(ptr, size, nmemb, (FILE *)stream);
55 	return written;
56 }
57 
58 /* add new item into the database */
59 void
add_feed(int chan_id,char * item_url,char * item_title,char * item_desc,time_t item_date)60 add_feed(int chan_id, char *item_url, char *item_title, char *item_desc,
61     time_t item_date)
62 {
63 	dmsg(0, "%s: %s", __func__, item_url);
64 	db_multi_exec("INSERT INTO feeds (chanid, modified, link, title, "
65 					"description, pubdate) "
66 			"VALUES (%d, 0, '%q', '%q', '%q', '%ld')",
67 			 chan_id, item_url, item_title, item_desc, item_date);
68 	printf("New feed has been added %s.\n", item_url);
69 }
70 
71 /* checks if the feed url appears into the database */
72 int
check_link(int chan_id,char * item_link,time_t item_pubdate)73 check_link(int chan_id, char *item_link, time_t item_pubdate)
74 {
75 	int result = 0;
76 	time_t	date;
77 
78 	dmsg(0, "check_link");
79 	result = db_int(0, "SELECT id FROM feeds WHERE pubdate = '%ld' "
80 				"AND chanid = '%d' AND link = '%q'",
81 					 item_pubdate, chan_id, item_link);
82 	if (result) {
83 		dmsg(0, "record has been found.");
84 		return (1); /* Don't do anything ;
85 			     If you want to update changed post do it here */
86 	}
87 	/* update last modified  time of the channel */
88 	db_multi_exec("UPDATE channels SET modified = '%ld' WHERE id = '%d'",
89 	    time(&date), chan_id);
90 	/* call add_feed to add the item into the database */
91 	return (0);
92 }
93 
94 /* parse content of the rss */
95 void
parse_body(int chan_id,char * rssfile)96 parse_body(int chan_id, char *rssfile)
97 {
98 	int i;
99 	st_rss_t *rss = NULL;
100 
101 	dmsg(0,"parse_body.");
102 
103 	if ((rss = rss_open(rssfile)) == NULL) {
104 		printf("rss id [%d] cannot be parsed.\n", chan_id);
105 		return;
106 	}
107 	dmsg(0,"items - %d", rss->item_count);
108 
109 	/* check in reverse order, first feed has been added last to the rss */
110 	for (i = (rss->item_count - 1); i > -1; i--) {
111 		if(check_link(chan_id, rss->item[i].url, rss->item[i].date) == 0) {
112 			add_feed(chan_id, rss->item[i].url, rss->item[i].title,
113 			    rss->item[i].desc, rss->item[i].date);
114 		}
115 	}
116 	rss_close(rss);
117 }
118 
119 /* fetch rss file */
120 void
fetch_channel(int chan_id,time_t chan_modified,const char * chan_link)121 fetch_channel(int chan_id, time_t chan_modified, const char *chan_link)
122 {
123 	CURL *curl_handle;
124 	struct curl_slist *if_chan_modified = NULL;
125 	FILE *bodyfile;
126 	char chan_last_modified_time[64], fn[]="/tmp/rssroll.tmp.XXXXXXXXXX";
127 	long	http_code = 0;
128 	int fd;
129 
130 	dmsg(0,"%s: %d, %ld, %s", __func__, chan_id, chan_modified, chan_link);
131 
132 	strftime(chan_last_modified_time, sizeof(chan_last_modified_time),
133 	    "If-Modified-Since: %a, %d %b %Y %T %Z", localtime(&chan_modified));
134 	curl_global_init(CURL_GLOBAL_ALL);
135 	curl_handle = curl_easy_init();
136 	if_chan_modified = curl_slist_append(if_chan_modified,
137 	    chan_last_modified_time);
138 	curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, RSSROLL_VERSION);
139 	curl_easy_setopt(curl_handle, CURLOPT_HTTPHEADER, if_chan_modified);
140 	curl_easy_setopt(curl_handle, CURLOPT_URL, chan_link);
141 	curl_easy_setopt(curl_handle, CURLOPT_NOPROGRESS, 1L);
142 	curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, write_data);
143 
144 	if ((fd = mkstemp(fn)) == -1) {
145 		fprintf(stderr, "%s: Cannot create temp file: %s\n", __func__,
146 		    chan_link);
147 		goto cleanup;
148 	}
149 	if (fchmod(fd, 0600)) {
150 		fprintf(stderr, "%s: Cannot set permission to temp file: %s\n",
151 		    __func__,  chan_link);
152 		goto done;
153 	}
154 
155 	if ((bodyfile = fdopen(fd, "w")) == NULL) {
156 		fprintf(stderr, "%s: Cannot open file for parsing: %s\n",
157 		    __func__, chan_link);
158 		goto done;
159 	}
160 	curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, bodyfile);
161 	curl_easy_perform(curl_handle);
162 	curl_easy_getinfo(curl_handle, CURLINFO_RESPONSE_CODE, &http_code);
163 	fclose(bodyfile);
164 	if (http_code != 304) {
165 		parse_body(chan_id, fn);
166 	} else {
167 		dmsg(0,"%s: id - %d, link - %s has not been changed.",
168 		    __func__, chan_id, chan_link);
169 	}
170 done:
171 	unlink(fn);
172 cleanup:
173 	dmsg(0, "%s: cleanup", __func__);
174 	unlink(fn);
175 	curl_slist_free_all(if_chan_modified);
176 	curl_easy_cleanup(curl_handle);
177 	curl_global_cleanup();
178 }
179 
180 static void
usage(void)181 usage(void)
182 {
183 	extern	char *__progname;
184 	fprintf(stderr, "Usage: %s [-v] [-d database]\n", __progname);
185 	exit(1);
186 }
187 
188 int
main(int argc,char ** argv)189 main(int argc, char** argv)
190 {
191 
192 	int ch;
193 	const char *dbname = "/var/db/rssroll.db";
194 	Stmt q;
195 
196 	while ((ch = getopt(argc, argv, "d:v")) != -1) {
197 		switch (ch) {
198 			case 'd':
199 				dbname = optarg;
200 				break;
201 			case 'v':
202 				debug++;
203 				break;
204 			default:
205 				usage();
206 		}
207 	}
208 	if (argc != optind) {
209 		usage();
210 	}
211 	if (access(dbname, R_OK)) {
212 		fprintf(stderr, "Cannot read database file: %s!\n", dbname);
213 		return (1);
214 	}
215 	if (sqlite3_open(dbname, &g.db) != SQLITE_OK) {
216 		fprintf(stderr, "Cannot open database file: %s\n", dbname);
217 		return (1);
218 	}
219 	dmsg(0,"database successfully loaded.");
220 	db_prepare(&q, "SELECT id, modified, link FROM channels");
221 	while (db_step(&q)==SQLITE_ROW) {
222 		fetch_channel(db_column_int(&q, 0), (time_t)db_column_int64(&q, 1),
223 		    db_column_text(&q, 2));
224 	}
225 	db_finalize(&q);
226 	sqlite3_close(g.db);
227 	dmsg(0,"database successfully closed.");
228 	return (0);
229 }
230