1<?php 2/** 3 * news_admin - import RSS and Atom feeds 4 * 5 * @link http://www.egroupware.org 6 * @author Ralf Becker <RalfBecker-AT-outdoor-training.de> 7 * @package news_admin 8 * @copyright (c) 2007-16 by Ralf Becker <RalfBecker-AT-outdoor-training.de> 9 * @license http://opensource.org/licenses/gpl-license.php GPL - GNU General Public License 10 * @version $Id$ 11 */ 12 13use EGroupware\Api; 14 15/** 16 * Import RSS and Atom feeds via PEAR's XML_Feed_Parser class 17 */ 18class news_admin_import 19{ 20 /** 21 * Reference to the news_admins's bo 22 * 23 * @var news_admin_bo 24 */ 25 var $bonews; 26 27 /** 28 * Constructor 29 * 30 * @return news_admin_import 31 */ 32 function __construct($bonews=null) 33 { 34 if (is_null($bonews)) 35 { 36 $this->bonews = new news_admin_bo(); 37 } 38 else 39 { 40 $this->bonews = $bonews; 41 } 42 } 43 44 /** 45 * Read the feed of the given URL 46 * 47 * @param string $url 48 * @param array $context =null 49 * @return XML_Feed_Parser|boolean false on error 50 */ 51 function read($url, array $context=null) 52 { 53 $default_lang = $GLOBALS['egw']->preferences->default['common']['lang']; 54 $default_context = array( 55 'method'=>'GET', 56 'header' => 'Accept-Language: '.($default_lang ? $default_lang : 'en').';0.8,en;0.2', 57 ); 58 $parts = parse_url($url); 59 if (!in_array($parts['scheme'],array('http','https','ftp'))) return false; // security! 60 61 if (!($feed_xml = file_get_contents($url, false, 62 Api\Framework::proxy_context(null, null, $context ? $context : $default_context))) || 63 !@include_once('XML/Feed/Parser.php')) 64 { 65 return false; 66 } 67 $matches = null; 68 // if the xml-file specifes an encoding, convert it to our own encoding 69 if (preg_match('/\<\?xml.*encoding="([^"]+)"/i',$feed_xml,$matches) && $matches[1]) 70 { 71 $feed_xml = preg_replace('/(\<\?xml.*encoding=")([^"]+)"/i','$1'.Api\Translation::charset().'"', 72 Api\Translation::convert($feed_xml, $matches[1])); 73 } 74 // stop "unsupported encoding" warnings 75 error_reporting(($level = error_reporting()) & !E_WARNING); 76 try { 77 $parser = new XML_Feed_Parser($feed_xml); 78 } 79 catch (XML_Feed_Parser_Exception $e) 80 { 81 unset($e); // not used 82 if (!$context) 83 { 84 // Try again with a user agent 85 $context = array('user_agent' => 'Mozilla/5.0')+$default_context; 86 $parser = $this->read($url, $context); 87 } 88 else 89 { 90 $parser = false; 91 } 92 } 93 error_reporting($level); 94 95 return $parser; 96 } 97 98 /** 99 * Import the feed of one category 100 * 101 * @param int $cat_id 102 * @return array/boolean array(total imported,newly imported) or false on error 103 */ 104 function import($cat_id) 105 { 106 if (($cat = $this->bonews->read_cat($cat_id)) === false) return false; 107 if (! ($url = $cat['import_url'])) return false; 108 if (!isset($cat['keep_imported'])) $cat['keep_imported'] = -1; // keep all was the default. 109 110 $parser = $this->read($url); 111 112 if (!is_object($parser)) return false; 113 114 $imported = $newly = $deleted = 0; 115 116 $news_delete = array(); 117 if ($cat['keep_imported'] >= 0) 118 { 119 $check = array('cat_id' => $cat_id); 120 $count = 0; 121 foreach((array)$this->bonews->search($check,array('news_id'),'news_date DESC') as $news) 122 { 123 if (++$count > $cat['keep_imported']) { 124 $news_delete[$news['news_id']] = true; 125 } 126 } 127 } 128 129 foreach ($parser as $entry) 130 { 131 $content_is_html = $entry->content && strip_tags($entry->content) != $entry->content; 132 133 /* Update comma to the %ASCII encoding in the link to cope with the etemplate display. 134 * This cannot be done inside the eTemplate as this does not know the content type 135 * (URL / HTML / Plain Text) when converting variables to values in etemplate:expand_name() 136 */ 137 $entry->link = str_replace(',', '%2C', $entry->link); 138 139 $check = array('cat_id' => $cat_id); 140 if ($content_is_html) 141 { 142 $check['news_teaser'] = $entry->link; 143 } 144 else 145 { 146 $check['news_content'] = $entry->link; 147 } 148 if (($newsitem = $this->bonews->read($check))) 149 { 150 if (0 == $cat['keep_imported']) 151 { 152 unset($news_delete[$newsitem['news_id']]); 153 } 154 } else { 155 $this->bonews->init(); 156 ++$newly; 157 } 158 if ((($date = $entry->updated) || ($date = $entry->pubDate)) && !is_numeric($date)) 159 { 160 $date = strtotime($date); 161 } 162 if (!($err = $this->bonews->save($item=array( 163 'cat_id' => $cat_id, 164 'news_date' => $date, 165 'news_headline' => $entry->title, 166 'news_content' => $content_is_html ? $entry->content : $entry->link, 167 'news_is_html' => $content_is_html ? -2 : -1, // -1 = only link 168 'news_teaser' => $content_is_html ? $entry->link : ($entry->summary != $entry->title ? $entry->summary : NULL), 169 'news_submittedby' => 0, 170 ),true))) 171 { 172 ++$imported; 173 } 174 //var_dump($err); print "<li><a href=\"$entry->link\" target=\"_blank\">$entry->title</a></li>\n"; //_debug_array($this->bonews->data); 175 } 176 177 $deleted = 0; 178 // kope with huge number of news rows to delete triggering SQL error: 179 // "Got a packet bigger than 'max_allowed_packet' bytes" (of default 1MB) by deleting chunks of 1000 rows 180 for($n = 0; ($ids = array_slice(array_keys($news_delete), 1000*$n, 1000)); ++$n) 181 { 182 $deleted += $this->bonews->delete(array($this->bonews->autoinc_id => $ids)); 183 } 184 /* Update the category timestamp on successful import */ 185 $cat['import_timestamp'] = $this->bonews->now; 186 $this->bonews->save_cat($cat); 187 188 return array($imported,$newly,$deleted); 189 } 190 191 /** 192 * Import all Api\Categories, called via the async timed service 193 * 194 */ 195 function async_import() 196 { 197 $cats = $nul = null; 198 if (!$this->bonews->get_cats(array( 199 'num_rows' => 999, 200 'start' => 0, 201 ),$cats,$nul,true)) return; 202 203 foreach($cats as $cat) 204 { 205 if ($cat['import_url'] && $cat['import_frequency'] && !((int)date('H') % $cat['import_frequency'])) 206 { 207 $this->import($cat['id']); 208 } 209 } 210 } 211} 212