1 /**
2  * @file feed.c  feed node and subscription type
3  *
4  * Copyright (C) 2003-2017 Lars Windolf <lars.windolf@gmx.de>
5  * Copyright (C) 2004-2006 Nathan J. Conrad <t98502@users.sourceforge.net>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
20  */
21 
22 #include "feed.h"
23 
24 #include <string.h>
25 
26 #include "conf.h"
27 #include "common.h"
28 #include "db.h"
29 #include "debug.h"
30 #include "favicon.h"
31 #include "feedlist.h"
32 #include "html.h"
33 #include "itemlist.h"
34 #include "metadata.h"
35 #include "node.h"
36 #include "render.h"
37 #include "update.h"
38 #include "xml.h"
39 #include "ui/icons.h"
40 #include "ui/liferea_shell.h"
41 #include "ui/subscription_dialog.h"
42 #include "ui/feed_list_node.h"
43 
44 feedPtr
feed_new(void)45 feed_new (void)
46 {
47 	feedPtr		feed;
48 
49 	feed = g_new0 (struct feed, 1);
50 
51 	feed->cacheLimit = CACHE_DEFAULT;
52 	feed->valid = TRUE;
53 
54 	return feed;
55 }
56 
57 static void
feed_import(nodePtr node,nodePtr parent,xmlNodePtr xml,gboolean trusted)58 feed_import (nodePtr node, nodePtr parent, xmlNodePtr xml, gboolean trusted)
59 {
60 	gchar		*cacheLimitStr, *title;
61 	gchar		*tmp;
62 	feedPtr		feed = NULL;
63 
64 	xmlChar	*typeStr = xmlGetProp (xml, BAD_CAST"type");
65 
66 	feed = feed_new ();
67 	feed->fhp = feed_type_str_to_fhp (typeStr);
68 	xmlFree (typeStr);
69 
70 	node_set_data (node, feed);
71 	node_set_subscription (node, subscription_import (xml, trusted));
72 
73 	/* Set the feed cache limit */
74 	cacheLimitStr = xmlGetProp (xml, BAD_CAST "cacheLimit");
75 	if (cacheLimitStr && !xmlStrcmp (cacheLimitStr, "unlimited"))
76 		feed->cacheLimit = CACHE_UNLIMITED;
77 	else
78 		feed->cacheLimit = common_parse_long (cacheLimitStr, CACHE_DEFAULT);
79 	xmlFree (cacheLimitStr);
80 
81 	/* enclosure auto download flag */
82 	tmp = xmlGetProp (xml, BAD_CAST"encAutoDownload");
83 	if (tmp && !xmlStrcmp (tmp, BAD_CAST"true"))
84 		feed->encAutoDownload = TRUE;
85 	xmlFree (tmp);
86 
87 	/* comment feed handling flag */
88 	tmp = xmlGetProp (xml, BAD_CAST"ignoreComments");
89 	if (tmp && !xmlStrcmp (tmp, BAD_CAST"true"))
90 		feed->ignoreComments = TRUE;
91 	xmlFree (tmp);
92 
93 	tmp = xmlGetProp (xml, BAD_CAST"markAsRead");
94 	if (tmp && !xmlStrcmp (tmp, BAD_CAST"true"))
95 		feed->markAsRead = TRUE;
96 	xmlFree (tmp);
97 
98 	tmp = xmlGetProp (xml, BAD_CAST"html5Extract");
99 	if (tmp && !xmlStrcmp (tmp, BAD_CAST"true"))
100 		feed->html5Extract = TRUE;
101 	xmlFree (tmp);
102 
103 	title = xmlGetProp (xml, BAD_CAST"title");
104 	if (!title || !xmlStrcmp (title, BAD_CAST"")) {
105 		if (title)
106 			xmlFree (title);
107 		title = xmlGetProp (xml, BAD_CAST"text");
108 	}
109 
110 	node_set_title (node, title);
111 	xmlFree (title);
112 
113 	if (node->subscription)
114 		debug4 (DEBUG_CACHE, "import feed: title=%s source=%s typeStr=%s interval=%d",
115 		        node_get_title (node),
116 	        	subscription_get_source (node->subscription),
117 		        typeStr,
118 		        subscription_get_update_interval (node->subscription));
119 }
120 
121 static void
feed_export(nodePtr node,xmlNodePtr xml,gboolean trusted)122 feed_export (nodePtr node, xmlNodePtr xml, gboolean trusted)
123 {
124 	feedPtr feed = (feedPtr) node->data;
125 	gchar *cacheLimit = NULL;
126 
127 	if (node->subscription)
128 		subscription_export (node->subscription, xml, trusted);
129 
130 	if (trusted) {
131 		if (feed->cacheLimit >= 0)
132 			cacheLimit = g_strdup_printf ("%d", feed->cacheLimit);
133 		if (feed->cacheLimit == CACHE_UNLIMITED)
134 			cacheLimit = g_strdup ("unlimited");
135 		if (cacheLimit)
136 			xmlNewProp (xml, BAD_CAST"cacheLimit", BAD_CAST cacheLimit);
137 
138 		if (feed->encAutoDownload)
139 			xmlNewProp (xml, BAD_CAST"encAutoDownload", BAD_CAST"true");
140 
141 		if (feed->ignoreComments)
142 			xmlNewProp (xml, BAD_CAST"ignoreComments", BAD_CAST"true");
143 
144 		if (feed->markAsRead)
145 			xmlNewProp (xml, BAD_CAST"markAsRead", BAD_CAST"true");
146 
147 		if (feed->html5Extract)
148 			xmlNewProp (xml, BAD_CAST"html5Extract", BAD_CAST"true");
149 	}
150 
151 	if (node->subscription)
152 		debug3 (DEBUG_CACHE, "adding feed: source=%s interval=%d cacheLimit=%s",
153 		        subscription_get_source (node->subscription),
154 			subscription_get_update_interval (node->subscription),
155 		        (cacheLimit != NULL ? cacheLimit : ""));
156 	g_free (cacheLimit);
157 }
158 
159 static void
feed_add_xml_attributes(nodePtr node,xmlNodePtr feedNode)160 feed_add_xml_attributes (nodePtr node, xmlNodePtr feedNode)
161 {
162 	feedPtr	feed = (feedPtr)node->data;
163 	gchar	*tmp;
164 
165 	xmlNewTextChild (feedNode, NULL, "feedId", node_get_id (node));
166 	xmlNewTextChild (feedNode, NULL, "feedTitle", node_get_title (node));
167 
168 	if (node->subscription)
169 		subscription_to_xml (node->subscription, feedNode);
170 
171 	tmp = g_strdup_printf("%d", node->available?1:0);
172 	xmlNewTextChild(feedNode, NULL, "feedStatus", tmp);
173 	g_free(tmp);
174 
175 	tmp = g_strdup_printf("file://%s", node_get_favicon_file (node));
176 	xmlNewTextChild(feedNode, NULL, "favicon", tmp);
177 	g_free(tmp);
178 
179 	if(feed->parseErrors && (strlen(feed->parseErrors->str) > 0))
180 		xmlNewTextChild(feedNode, NULL, "parseError", feed->parseErrors->str);
181 }
182 
183 xmlDocPtr
feed_to_xml(nodePtr node,xmlNodePtr feedNode)184 feed_to_xml (nodePtr node, xmlNodePtr feedNode)
185 {
186 	xmlDocPtr	doc = NULL;
187 
188 	if (!feedNode) {
189 		doc = xmlNewDoc ("1.0");
190 		feedNode = xmlNewDocNode (doc, NULL, "feed", NULL);
191 		xmlDocSetRootElement (doc, feedNode);
192 	}
193 	feed_add_xml_attributes (node, feedNode);
194 
195 	return doc;
196 }
197 
198 guint
feed_get_max_item_count(nodePtr node)199 feed_get_max_item_count (nodePtr node)
200 {
201 	gint	default_max_items;
202 	feedPtr	feed = (feedPtr)node->data;
203 
204 	switch (feed->cacheLimit) {
205 		case CACHE_DEFAULT:
206 			conf_get_int_value (DEFAULT_MAX_ITEMS, &default_max_items);
207 			return default_max_items;
208 			break;
209 		case CACHE_DISABLE:
210 		case CACHE_UNLIMITED:
211 			return G_MAXUINT;
212 			break;
213 		default:
214 			return feed->cacheLimit;
215 			break;
216 	}
217 }
218 
219 // HTML5 Headline enrichment
220 
221 static void
feed_enrich_item_cb(const struct updateResult * const result,gpointer userdata,updateFlags flags)222 feed_enrich_item_cb (const struct updateResult * const result, gpointer userdata, updateFlags flags) {
223 	itemPtr item;
224 	gchar	*article;
225 
226 	if (!result->data || result->httpstatus >= 400)
227 		return;
228 
229 	item = item_load (GPOINTER_TO_UINT (userdata));
230 	if (!item)
231 		return;
232 
233 	article = html_get_article (result->data, result->source);
234 
235 	if (article)
236 		article = xhtml_strip_dhtml (article);
237 	if (article) {
238 		// Enable AMP images by replacing <amg-img> by <img>
239 		gchar *tmp = g_strjoinv("<img", g_strsplit(article, "<amp-img", 0));
240 		g_free (article);
241 		article = tmp;
242 
243 		metadata_list_set (&(item->metadata), "richContent", article);
244 		db_item_update (item);
245 		itemlist_update_item (item);
246 		g_free (article);
247 	} else {
248 		// If there is no HTML5 article try to fetch AMP source if there is one
249 		gchar *ampurl = html_get_amp_url (result->data);
250 		if (ampurl) {
251 			updateRequestPtr request;
252 
253 			debug3 (DEBUG_HTML, "Fetching AMP HTML %ld %s : %s", item->id, item->title, ampurl);
254 			request = update_request_new ();
255 			update_request_set_source (request, ampurl);
256 			// Explicitely do not pass proxy/auth options to Google
257 			request->options = g_new0 (struct updateOptions, 1);
258 			update_execute_request (NULL, request, feed_enrich_item_cb, item, 0);
259 		}
260 	}
261 	item_unload (item);
262 }
263 
264 /**
265  * Checks content of an items source and tries to crawl content
266  */
267 void
feed_enrich_item(subscriptionPtr subscription,itemPtr item)268 feed_enrich_item (subscriptionPtr subscription, itemPtr item)
269 {
270 	updateRequestPtr request;
271 
272 	if (!item->source)
273 		return;
274 
275 	// Don't enrich twice
276 	if (NULL != metadata_list_get (item->metadata, "richContent"))
277 		return;
278 
279 	// Fetch item->link document and try to parse it as XHTML
280 	debug3 (DEBUG_HTML, "Fetching HTML5 %ld %s : %s", item->id, item->title, item->source);
281 	request = update_request_new ();
282 	update_request_set_source (request, item->source);
283 
284 	// Pass options of parent feed (e.g. password, proxy...)
285 	request->options = update_options_copy (subscription->updateOptions);
286 
287 	update_execute_request (subscription, request, feed_enrich_item_cb, GUINT_TO_POINTER (item->id), 0);
288 }
289 
290 
291 /* implementation of subscription type interface */
292 
293 static void
feed_process_update_result(subscriptionPtr subscription,const struct updateResult * const result,updateFlags flags)294 feed_process_update_result (subscriptionPtr subscription, const struct updateResult * const result, updateFlags flags)
295 {
296 	feedParserCtxtPtr	ctxt;
297 	nodePtr			node = subscription->node;
298 	feedPtr			feed = (feedPtr)node->data;
299 
300 	debug_enter ("feed_process_update_result");
301 
302 	if (result->data) {
303 		/* parse the new downloaded feed into feed and itemSet */
304 		ctxt = feed_create_parser_ctxt ();
305 		ctxt->feed = feed;
306 		ctxt->data = result->data;
307 		ctxt->dataLength = result->size;
308 		ctxt->subscription = subscription;
309 
310 		/* try to parse the feed */
311 		feed_parse (ctxt);
312 
313 		if (ctxt->failed) {
314 			/* No feed found, display an error */
315 			node->available = FALSE;
316 
317 			g_string_prepend (feed->parseErrors, _("<p>Could not detect the type of this feed! Please check if the source really points to a resource provided in one of the supported syndication formats!</p>"
318 			                                       "XML Parser Output:<br /><div class='xmlparseroutput'>"));
319 			g_string_append (feed->parseErrors, "</div>");
320 		} else if (!ctxt->failed && !ctxt->feed->fhp) {
321 			/* There's a feed but no Handler. This means autodiscovery
322 			 * found a feed, but we still need to download it.
323 			 * An update should be in progress that will process it */
324 		} else {
325 			/* Feed found, process it */
326 			itemSetPtr	itemSet;
327 			gboolean	html5_enabled;
328 
329 			node->available = TRUE;
330 
331 			/* merge the resulting items into the node's item set */
332 			itemSet = node_get_itemset (node);
333 			node->newCount = itemset_merge_items (itemSet, ctxt->items, ctxt->feed->valid, ctxt->feed->markAsRead);
334 			itemlist_merge_itemset (itemSet);
335 			itemset_free (itemSet);
336 
337 			/* restore user defined properties if necessary */
338 			if ((flags & FEED_REQ_RESET_TITLE) && ctxt->title)
339 				node_set_title (node, ctxt->title);
340 
341 			if (flags > 0)
342 				db_subscription_update (subscription);
343 
344 			liferea_shell_set_status_bar (_("\"%s\" updated..."), node_get_title (node));
345 		}
346 
347 		feed_free_parser_ctxt (ctxt);
348 	} else {
349 		node->available = FALSE;
350 
351 		liferea_shell_set_status_bar (_("\"%s\" is not available"), node_get_title (node));
352 	}
353 
354 	feed_list_node_update (node->id);
355 
356 	debug_exit ("feed_process_update_result");
357 }
358 
359 static gboolean
feed_prepare_update_request(subscriptionPtr subscription,struct updateRequest * request)360 feed_prepare_update_request (subscriptionPtr subscription, struct updateRequest *request)
361 {
362 	/* Nothing to do. Feeds require no subscription extra handling. */
363 
364 	return TRUE;
365 }
366 
367 /* implementation of the node type interface */
368 
369 static itemSetPtr
feed_load(nodePtr node)370 feed_load (nodePtr node)
371 {
372 	return db_itemset_load(node->id);
373 }
374 
375 static void
feed_save(nodePtr node)376 feed_save (nodePtr node)
377 {
378 	/* Nothing to do. Feeds do not have any UI states */
379 }
380 
381 static void
feed_update_counters(nodePtr node)382 feed_update_counters (nodePtr node)
383 {
384 	node->itemCount = db_itemset_get_item_count (node->id);
385 	node->unreadCount = db_itemset_get_unread_count (node->id);
386 }
387 
388 static void
feed_remove(nodePtr node)389 feed_remove (nodePtr node)
390 {
391 	feed_list_node_remove_node (node);
392 
393 	favicon_remove_from_cache (node->id);
394 	db_subscription_remove (node->id);
395 }
396 
397 static const gchar *
feed_get_direction(nodePtr feed)398 feed_get_direction(nodePtr feed)
399 {
400 	if (node_get_title (feed))
401 		return (common_get_text_direction (node_get_title (feed)));
402 	else
403 		return ("ltr");
404 }
405 
406 static gchar *
feed_render(nodePtr node)407 feed_render (nodePtr node)
408 {
409 	gchar		*output = NULL;
410 	xmlDocPtr	doc;
411 	renderParamPtr	params;
412 	const gchar     *text_direction = NULL;
413 
414 	text_direction = feed_get_direction (node);
415 	params = render_parameter_new ();
416 	render_parameter_add (params, "appDirection='%s'", common_get_app_direction ());
417 	render_parameter_add (params, "txtDirection='%s'", text_direction);
418 
419 	doc = feed_to_xml (node, NULL);
420 	output = render_xml (doc, "feed", params);
421 	xmlFreeDoc (doc);
422 
423 	return output;
424 }
425 
426 static gboolean
feed_add(void)427 feed_add (void)
428 {
429 	subscription_dialog_new ();
430 	return TRUE;
431 }
432 
433 static void
feed_properties(nodePtr node)434 feed_properties (nodePtr node)
435 {
436 	subscription_prop_dialog_new (node->subscription);
437 }
438 
439 static void
feed_free(nodePtr node)440 feed_free (nodePtr node)
441 {
442 	feedPtr	feed = (feedPtr)node->data;
443 
444 	if (feed->parseErrors)
445 		g_string_free (feed->parseErrors, TRUE);
446 	g_free (feed);
447 }
448 
449 subscriptionTypePtr
feed_get_subscription_type(void)450 feed_get_subscription_type (void)
451 {
452 	static struct subscriptionType sti = {
453 		feed_prepare_update_request,
454 		feed_process_update_result
455 	};
456 
457 	return &sti;
458 }
459 
460 nodeTypePtr
feed_get_node_type(void)461 feed_get_node_type (void)
462 {
463 	static struct nodeType nti = {
464 		NODE_CAPABILITY_SHOW_UNREAD_COUNT |
465 		NODE_CAPABILITY_UPDATE |
466 		NODE_CAPABILITY_UPDATE_FAVICON |
467 		NODE_CAPABILITY_EXPORT,
468 		"feed",		/* not used, feed format ids are used instead */
469 		NULL,
470 		feed_import,
471 		feed_export,
472 		feed_load,
473 		feed_save,
474 		feed_update_counters,
475 		feed_remove,
476 		feed_render,
477 		feed_add,
478 		feed_properties,
479 		feed_free
480 	};
481 	nti.icon = icon_get (ICON_DEFAULT);
482 
483 	return &nti;
484 }
485