1 /*
2 Copyright (C) 2005-2017 Marius L. Jøhndal
3
4 This library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
8
9 This library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with this library; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17
18 */
19
20 #ifdef HAVE_CONFIG_H
21 #include "config.h"
22 #endif /* HAVE_CONFIG_H */
23
24 #include <string.h>
25 #include <sys/types.h>
26 #include <sys/stat.h>
27 #include <glib.h>
28 #include <glib/gprintf.h>
29 #include "libxmlutil.h"
30 #include "urlget.h"
31 #include "channel.h"
32 #include "rss.h"
33 #include "utils.h"
34 #include "progress.h"
35 #include "filenames.h"
36
37 static int _enclosure_pattern_match(enclosure_filter *filter,
38 const enclosure *enclosure);
39
_enclosure_iterator(const void * user_data,int i,const xmlNode * node)40 static void _enclosure_iterator(const void *user_data, int i, const xmlNode *node)
41 {
42 const char *downloadtime;
43
44 channel *c = (channel *)user_data;
45
46 downloadtime = libxmlutil_attr_as_string(node, "downloadtime");
47
48 if (downloadtime)
49 downloadtime = g_strdup(downloadtime);
50 else
51 downloadtime = get_rfc822_time();
52
53 g_hash_table_insert(c->downloaded_enclosures,
54 (gpointer)libxmlutil_attr_as_string(node, "url"),
55 (gpointer)downloadtime);
56 }
57
channel_new(const char * url,const char * channel_file,const char * spool_directory,const char * filename_pattern,int resume)58 channel *channel_new(const char *url, const char *channel_file,
59 const char *spool_directory,
60 const char *filename_pattern,
61 int resume)
62 {
63 channel *c;
64 xmlDocPtr doc;
65 xmlNode *root_element = NULL;
66 const char *s;
67
68 c = (channel *)malloc(sizeof(struct _channel));
69 c->url = g_strdup(url);
70 c->channel_filename = g_strdup(channel_file);
71 c->spool_directory = g_strdup(spool_directory);
72 c->filename_pattern = g_strdup(filename_pattern);
73 // c->resume = resume;
74 c->rss_last_fetched = NULL;
75 c->downloaded_enclosures = g_hash_table_new_full(g_str_hash, g_str_equal, NULL, g_free);
76
77 if (g_file_test(c->channel_filename, G_FILE_TEST_EXISTS)) {
78 doc = xmlReadFile(c->channel_filename, NULL, 0);
79
80 if (!doc) {
81 g_fprintf(stderr, "Error parsing channel file %s.\n", c->channel_filename);
82 return NULL;
83 }
84
85 root_element = xmlDocGetRootElement(doc);
86
87 if (!root_element) {
88 xmlFreeDoc(doc);
89
90 g_fprintf(stderr, "Error parsing channel file %s.\n", c->channel_filename);
91 return NULL;
92 }
93
94 /* Fetch channel attributes. */
95 s = libxmlutil_attr_as_string(root_element, "rsslastfetched");
96
97 if (s)
98 c->rss_last_fetched = g_strdup(s);
99
100 /* Iterate encolsure elements. */
101 libxmlutil_iterate_by_tag_name(root_element, "enclosure", c, _enclosure_iterator);
102
103 xmlFreeDoc(doc);
104 }
105
106 return c;
107 }
108
_cast_channel_save_downloaded_enclosure(gpointer key,gpointer value,gpointer user_data)109 static void _cast_channel_save_downloaded_enclosure(gpointer key, gpointer value,
110 gpointer user_data)
111 {
112 FILE *f = (FILE *)user_data;
113 gchar *escaped_key = g_markup_escape_text(key, -1);
114
115 if (value)
116 g_fprintf(f, " <enclosure url=\"%s\" downloadtime=\"%s\"/>\n",
117 escaped_key, (gchar *)value);
118 else
119 g_fprintf(f, " <enclosure url=\"%s\"/>\n", escaped_key);
120
121 g_free(escaped_key);
122 }
123
_cast_channel_save_channel(FILE * f,gpointer user_data,int debug)124 static int _cast_channel_save_channel(FILE *f, gpointer user_data, int debug)
125 {
126 channel *c = (channel *)user_data;
127
128 g_fprintf(f, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
129
130 if (c->rss_last_fetched)
131 g_fprintf(f, "<channel version=\"1.0\" rsslastfetched=\"%s\">\n", c->rss_last_fetched);
132 else
133 g_fprintf(f, "<channel version=\"1.0\">\n");
134
135 g_hash_table_foreach(c->downloaded_enclosures, _cast_channel_save_downloaded_enclosure, f);
136
137 g_fprintf(f, "</channel>\n");
138
139 return 0;
140 }
141
_cast_channel_save(channel * c,int debug)142 static void _cast_channel_save(channel *c, int debug)
143 {
144 write_by_temporary_file(c->channel_filename, _cast_channel_save_channel, c, NULL, debug);
145 }
146
channel_free(channel * c)147 void channel_free(channel *c)
148 {
149 g_hash_table_destroy(c->downloaded_enclosures);
150 g_free(c->spool_directory);
151 g_free(c->channel_filename);
152 g_free(c->url);
153 g_free(c->filename_pattern);
154 free(c);
155 }
156
_enclosure_urlget_cb(void * buffer,size_t size,size_t nmemb,void * user_data)157 static size_t _enclosure_urlget_cb(void *buffer, size_t size, size_t nmemb, void *user_data)
158 {
159 FILE *f = (FILE *)user_data;
160
161 return fwrite(buffer, size, nmemb, f);
162 }
163
_get_rss(channel * c,void * user_data,channel_callback cb,int debug)164 static rss_file *_get_rss(channel *c, void *user_data, channel_callback cb, int debug)
165 {
166 rss_file *f;
167
168 if (cb)
169 cb(user_data, CCA_RSS_DOWNLOAD_START, NULL, NULL, NULL);
170
171 if (!strncmp("http://", c->url, strlen("http://"))
172 || !strncmp("https://", c->url, strlen("https://")))
173 f = rss_open_url(c->url, debug);
174 else
175 f = rss_open_file(c->url);
176
177 if (cb)
178 cb(user_data, CCA_RSS_DOWNLOAD_END, &(f->channel_info), NULL, NULL);
179
180 return f;
181 }
182
_do_download(channel * c,channel_info * channel_info,rss_item * item,void * user_data,channel_callback cb,int resume,int debug,int show_progress_bar)183 static int _do_download(channel *c, channel_info *channel_info, rss_item *item,
184 void *user_data, channel_callback cb, int resume,
185 int debug, int show_progress_bar)
186 {
187 int download_failed;
188 long resume_from = 0;
189 gchar *enclosure_full_filename;
190 FILE *enclosure_file;
191 struct stat fileinfo;
192 progress_bar *pb;
193
194 /* Check that the spool directory exists. */
195 if (!g_file_test(c->spool_directory, G_FILE_TEST_IS_DIR)) {
196 g_fprintf(stderr, "Spool directory %s not found.\n", c->spool_directory);
197 return 1;
198 }
199
200 /* Build enclosure filename. */
201 enclosure_full_filename = build_enclosure_filename(c->spool_directory,
202 c->filename_pattern, channel_info, item);
203
204 if (g_file_test(enclosure_full_filename, G_FILE_TEST_EXISTS)) {
205 /* A file with the same filename already exists. If the user has asked us
206 to resume downloads, we should append to the file. Otherwise we should
207 refuse to continue. If the feed uses the same filename for each
208 enclosure, running in append mode will corrupt existing files. There is
209 probably no practical way to avoid this, and the issue is documented in
210 castget(1) and castgetrc(5). */
211 if (resume) {
212 /* Set resume offset to the size of the file as it is now (and use
213 non-append mode if the size is zero or stat() fails). */
214 if (0 == stat(enclosure_full_filename, &fileinfo))
215 resume_from = fileinfo.st_size;
216 else
217 resume_from = 0;
218 } else {
219 /* File exists but user does not allow us to append so we have to abort. */
220 g_fprintf(stderr, "Enclosure file %s already exists.\n", enclosure_full_filename);
221 g_free(enclosure_full_filename);
222 return 1;
223 }
224 } else
225 /* By letting the offset be 0 we will write in non-append mode. */
226 resume_from = 0;
227
228 enclosure_file = fopen(enclosure_full_filename, resume_from ? "ab" : "wb");
229
230 if (!enclosure_file) {
231 g_fprintf(stderr, "Error opening enclosure file %s.\n", enclosure_full_filename);
232 g_free(enclosure_full_filename);
233 return 1;
234 }
235
236 if (cb)
237 cb(user_data, CCA_ENCLOSURE_DOWNLOAD_START, channel_info, item->enclosure, enclosure_full_filename);
238
239 if (show_progress_bar)
240 pb = progress_bar_new(resume_from);
241 else
242 pb = NULL;
243
244 if (urlget_buffer(item->enclosure->url, enclosure_file, _enclosure_urlget_cb, resume_from, debug, pb)) {
245 g_fprintf(stderr, "Error downloading enclosure from %s.\n", item->enclosure->url);
246
247 download_failed = 1;
248 } else
249 download_failed = 0;
250
251 if (pb)
252 progress_bar_free(pb);
253
254 fclose(enclosure_file);
255
256 if (cb)
257 cb(user_data, CCA_ENCLOSURE_DOWNLOAD_END, channel_info, item->enclosure, enclosure_full_filename);
258
259 g_free(enclosure_full_filename);
260
261 return download_failed;
262 }
263
_do_catchup(channel * c,channel_info * channel_info,rss_item * item,void * user_data,channel_callback cb)264 static int _do_catchup(channel *c, channel_info *channel_info, rss_item *item,
265 void *user_data, channel_callback cb)
266 {
267 if (cb) {
268 cb(user_data, CCA_ENCLOSURE_DOWNLOAD_START, channel_info, item->enclosure, NULL);
269
270 cb(user_data, CCA_ENCLOSURE_DOWNLOAD_END, channel_info, item->enclosure, NULL);
271 }
272
273 return 0;
274 }
275
channel_update(channel * c,void * user_data,channel_callback cb,int no_download,int no_mark_read,int first_only,int resume,enclosure_filter * filter,int debug,int show_progress_bar)276 int channel_update(channel *c, void *user_data, channel_callback cb,
277 int no_download, int no_mark_read, int first_only,
278 int resume, enclosure_filter *filter, int debug,
279 int show_progress_bar)
280 {
281 int i, download_failed;
282 rss_file *f;
283
284 /* Retrieve the RSS file. */
285 f = _get_rss(c, user_data, cb, debug);
286
287 if (!f)
288 return 1;
289
290 /* Check enclosures in RSS file. */
291 for (i = 0; i < f->num_items; i++)
292 if (f->items[i]->enclosure) {
293 if (!g_hash_table_lookup_extended(c->downloaded_enclosures, f->items[i]->enclosure->url, NULL, NULL)) {
294 rss_item *item;
295
296 item = f->items[i];
297
298 if (!filter || _enclosure_pattern_match(filter, item->enclosure)) {
299 if (no_download)
300 download_failed = _do_catchup(c, &(f->channel_info), item, user_data, cb);
301 else
302 download_failed = _do_download(c, &(f->channel_info), item, user_data, cb, resume, debug, show_progress_bar);
303
304 if (download_failed)
305 break;
306
307 if (!no_mark_read) {
308 /* Mark enclosure as downloaded and immediately save channel
309 file to ensure that it reflects the change. */
310 g_hash_table_insert(c->downloaded_enclosures, f->items[i]->enclosure->url,
311 (gpointer)get_rfc822_time());
312
313 _cast_channel_save(c, debug);
314 }
315
316 /* If we have been instructed to deal only with the first
317 available enclosure, it is time to break out of the loop. */
318 if (first_only)
319 break;
320 }
321 }
322 }
323
324 if (!no_mark_read) {
325 /* Update the RSS last fetched time and save the channel file again. */
326
327 if (c->rss_last_fetched)
328 g_free(c->rss_last_fetched);
329
330 c->rss_last_fetched = g_strdup(f->fetched_time);
331
332 _cast_channel_save(c, debug);
333 }
334
335 rss_close(f);
336
337 return 0;
338 }
339
340 /* Match the (file) name of an enclosure against a regexp. Letters
341 in the pattern match both upper and lower case letters if
342 'caseless' is TRUE. Returns TRUE if the pattern matches, FALSE
343 otherwise. */
_enclosure_pattern_match(enclosure_filter * filter,const enclosure * enclosure)344 static gboolean _enclosure_pattern_match(enclosure_filter *filter,
345 const enclosure *enclosure)
346 {
347 GError *error = NULL;
348 GRegexCompileFlags compile_options = 0;
349 GRegexMatchFlags match_options = 0;
350 GRegex *regex;
351 gboolean match;
352
353 g_assert(filter);
354 g_assert(filter->pattern);
355 g_assert(enclosure);
356
357 if (filter->caseless)
358 compile_options |= G_REGEX_CASELESS;
359
360 regex = g_regex_new(filter->pattern, compile_options, match_options,
361 &error);
362
363 if (error) {
364 fprintf(stderr, "Error compiling regular expression %s: %s\n",
365 filter->pattern, error->message);
366 g_error_free(error);
367 return FALSE;
368 }
369
370 match = g_regex_match(regex, enclosure->url, match_options, NULL);
371
372 g_regex_unref(regex);
373
374 return match;
375 }
376
enclosure_filter_new(const gchar * pattern,gboolean caseless)377 enclosure_filter *enclosure_filter_new(const gchar *pattern,
378 gboolean caseless)
379 {
380 enclosure_filter *e = g_malloc(sizeof(struct _enclosure_filter));
381
382 g_assert(pattern);
383
384 e->pattern = g_strdup(pattern);
385 e->caseless = caseless;
386
387 return e;
388 }
389
enclosure_filter_free(enclosure_filter * e)390 void enclosure_filter_free(enclosure_filter *e)
391 {
392 g_free(e->pattern);
393 g_free(e);
394 }
395