1<?php
2/**
3 * news_admin - import RSS and Atom feeds
4 *
5 * @link http://www.egroupware.org
6 * @author Ralf Becker <RalfBecker-AT-outdoor-training.de>
7 * @package news_admin
8 * @copyright (c) 2007-16 by Ralf Becker <RalfBecker-AT-outdoor-training.de>
9 * @license http://opensource.org/licenses/gpl-license.php GPL - GNU General Public License
10 * @version $Id$
11 */
12
13use EGroupware\Api;
14
15/**
16 * Import RSS and Atom feeds via PEAR's XML_Feed_Parser class
17 */
18class news_admin_import
19{
20	/**
21	 * Reference to the news_admins's bo
22	 *
23	 * @var news_admin_bo
24	 */
25	var $bonews;
26
27	/**
28	 * Constructor
29	 *
30	 * @return news_admin_import
31	 */
32	function __construct($bonews=null)
33	{
34		if (is_null($bonews))
35		{
36			$this->bonews = new news_admin_bo();
37		}
38		else
39		{
40			$this->bonews = $bonews;
41		}
42	}
43
44	/**
45	 * Read the feed of the given URL
46	 *
47	 * @param string $url
48	 * @param array $context =null
49	 * @return XML_Feed_Parser|boolean false on error
50	 */
51	function read($url, array $context=null)
52	{
53		$default_lang = $GLOBALS['egw']->preferences->default['common']['lang'];
54		$default_context = array(
55			'method'=>'GET',
56			'header' => 'Accept-Language: '.($default_lang ? $default_lang : 'en').';0.8,en;0.2',
57		);
58		$parts = parse_url($url);
59		if (!in_array($parts['scheme'],array('http','https','ftp'))) return false;	// security!
60
61		if (!($feed_xml = file_get_contents($url, false,
62			Api\Framework::proxy_context(null, null, $context ? $context : $default_context))) ||
63			!@include_once('XML/Feed/Parser.php'))
64		{
65			return false;
66		}
67		$matches = null;
68		// if the xml-file specifes an encoding, convert it to our own encoding
69		if (preg_match('/\<\?xml.*encoding="([^"]+)"/i',$feed_xml,$matches) && $matches[1])
70		{
71			$feed_xml = preg_replace('/(\<\?xml.*encoding=")([^"]+)"/i','$1'.Api\Translation::charset().'"',
72				Api\Translation::convert($feed_xml, $matches[1]));
73		}
74		// stop "unsupported encoding" warnings
75		error_reporting(($level = error_reporting()) & !E_WARNING);
76		try {
77		    $parser = new XML_Feed_Parser($feed_xml);
78		}
79		catch (XML_Feed_Parser_Exception $e)
80		{
81			unset($e);	// not used
82			if (!$context)
83			{
84				// Try again with a user agent
85				$context = array('user_agent' => 'Mozilla/5.0')+$default_context;
86				$parser = $this->read($url, $context);
87			}
88			else
89			{
90				$parser = false;
91			}
92		}
93		error_reporting($level);
94
95		return $parser;
96	}
97
98	/**
99	 * Import the feed of one category
100	 *
101	 * @param int $cat_id
102	 * @return array/boolean array(total imported,newly imported) or false on error
103	 */
104	function import($cat_id)
105	{
106		if (($cat = $this->bonews->read_cat($cat_id)) === false) return false;
107		if (! ($url = $cat['import_url'])) return false;
108		if (!isset($cat['keep_imported'])) $cat['keep_imported'] = -1; // keep all was the default.
109
110		$parser = $this->read($url);
111
112		if (!is_object($parser)) return false;
113
114		$imported = $newly = $deleted = 0;
115
116		$news_delete = array();
117		if ($cat['keep_imported'] >= 0)
118		{
119			$check = array('cat_id' => $cat_id);
120			$count = 0;
121			foreach((array)$this->bonews->search($check,array('news_id'),'news_date DESC') as $news)
122			{
123				if (++$count > $cat['keep_imported']) {
124					$news_delete[$news['news_id']] = true;
125				}
126			}
127		}
128
129		foreach ($parser as $entry)
130		{
131			$content_is_html = $entry->content && strip_tags($entry->content) != $entry->content;
132
133			/* Update comma to the %ASCII encoding in the link to cope with the etemplate display.
134			 * This cannot be done inside the eTemplate as this does not know the content type
135			 * (URL / HTML / Plain Text) when converting variables to values in etemplate:expand_name()
136			 */
137			$entry->link = str_replace(',', '%2C', $entry->link);
138
139			$check = array('cat_id' => $cat_id);
140			if ($content_is_html)
141			{
142				$check['news_teaser'] = $entry->link;
143			}
144			else
145			{
146				$check['news_content'] = $entry->link;
147			}
148			if (($newsitem = $this->bonews->read($check)))
149			{
150				if (0 == $cat['keep_imported'])
151				{
152					unset($news_delete[$newsitem['news_id']]);
153				}
154			} else {
155				$this->bonews->init();
156				++$newly;
157			}
158			if ((($date = $entry->updated) || ($date = $entry->pubDate)) && !is_numeric($date))
159			{
160				$date = strtotime($date);
161			}
162			if (!($err = $this->bonews->save($item=array(
163				'cat_id' => $cat_id,
164				'news_date' => $date,
165				'news_headline' => $entry->title,
166				'news_content' => $content_is_html ? $entry->content : $entry->link,
167				'news_is_html' => $content_is_html ? -2 : -1,		// -1 = only link
168				'news_teaser' =>  $content_is_html ? $entry->link : ($entry->summary != $entry->title ? $entry->summary : NULL),
169				'news_submittedby' => 0,
170			),true)))
171			{
172				++$imported;
173			}
174			//var_dump($err); print "<li><a href=\"$entry->link\" target=\"_blank\">$entry->title</a></li>\n"; //_debug_array($this->bonews->data);
175		}
176
177		$deleted = 0;
178		// kope with huge number of news rows to delete triggering SQL error:
179		// "Got a packet bigger than 'max_allowed_packet' bytes" (of default 1MB) by deleting chunks of 1000 rows
180		for($n = 0; ($ids = array_slice(array_keys($news_delete), 1000*$n, 1000)); ++$n)
181		{
182			$deleted += $this->bonews->delete(array($this->bonews->autoinc_id => $ids));
183		}
184		/* Update the category timestamp on successful import */
185		$cat['import_timestamp'] = $this->bonews->now;
186		$this->bonews->save_cat($cat);
187
188		return array($imported,$newly,$deleted);
189	}
190
191	/**
192	 * Import all Api\Categories, called via the async timed service
193	 *
194	 */
195	function async_import()
196	{
197		$cats = $nul = null;
198		if (!$this->bonews->get_cats(array(
199			'num_rows' => 999,
200			'start' => 0,
201		),$cats,$nul,true)) return;
202
203		foreach($cats as $cat)
204		{
205			if ($cat['import_url'] && $cat['import_frequency'] && !((int)date('H') % $cat['import_frequency']))
206			{
207				$this->import($cat['id']);
208			}
209		}
210	}
211}
212