1<?php
2/**
3 * SimplePie
4 *
5 * A PHP-Based RSS and Atom Feed Framework.
6 * Takes the hard work out of managing a complete RSS/Atom solution.
7 *
8 * Copyright (c) 2004-2016, Ryan Parman, Sam Sneddon, Ryan McCue, and contributors
9 * All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without modification, are
12 * permitted provided that the following conditions are met:
13 *
14 * 	* Redistributions of source code must retain the above copyright notice, this list of
15 * 	  conditions and the following disclaimer.
16 *
17 * 	* Redistributions in binary form must reproduce the above copyright notice, this list
18 * 	  of conditions and the following disclaimer in the documentation and/or other materials
19 * 	  provided with the distribution.
20 *
21 * 	* Neither the name of the SimplePie Team nor the names of its contributors may be used
22 * 	  to endorse or promote products derived from this software without specific prior
23 * 	  written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
26 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
27 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
28 * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
32 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
34 *
35 * @package SimplePie
36 * @copyright 2004-2016 Ryan Parman, Sam Sneddon, Ryan McCue
37 * @author Ryan Parman
38 * @author Sam Sneddon
39 * @author Ryan McCue
40 * @link http://simplepie.org/ SimplePie
41 * @license http://www.opensource.org/licenses/bsd-license.php BSD License
42 */
43
44/**
45 * Used for feed auto-discovery
46 *
47 *
48 * This class can be overloaded with {@see SimplePie::set_locator_class()}
49 *
50 * @package SimplePie
51 */
52class SimplePie_Locator
53{
54	var $useragent;
55	var $timeout;
56	var $file;
57	var $local = array();
58	var $elsewhere = array();
59	var $cached_entities = array();
60	var $http_base;
61	var $base;
62	var $base_location = 0;
63	var $checked_feeds = 0;
64	var $max_checked_feeds = 10;
65	var $force_fsockopen = false;
66	var $curl_options = array();
67	protected $registry;
68
69	public function __construct(SimplePie_File $file, $timeout = 10, $useragent = null, $max_checked_feeds = 10, $force_fsockopen = false, $curl_options = array())
70	{
71		$this->file = $file;
72		$this->useragent = $useragent;
73		$this->timeout = $timeout;
74		$this->max_checked_feeds = $max_checked_feeds;
75		$this->force_fsockopen = $force_fsockopen;
76		$this->curl_options = $curl_options;
77
78		if (class_exists('DOMDocument'))
79		{
80			$this->dom = new DOMDocument();
81
82			set_error_handler(array('SimplePie_Misc', 'silence_errors'));
83			$this->dom->loadHTML($this->file->body);
84			restore_error_handler();
85		}
86		else
87		{
88			$this->dom = null;
89		}
90	}
91
92	public function set_registry(SimplePie_Registry $registry)
93	{
94		$this->registry = $registry;
95	}
96
97	public function find($type = SIMPLEPIE_LOCATOR_ALL, &$working = null)
98	{
99		if ($this->is_feed($this->file))
100		{
101			return $this->file;
102		}
103
104		if ($this->file->method & SIMPLEPIE_FILE_SOURCE_REMOTE)
105		{
106			$sniffer = $this->registry->create('Content_Type_Sniffer', array($this->file));
107			if ($sniffer->get_type() !== 'text/html')
108			{
109				return null;
110			}
111		}
112
113		if ($type & ~SIMPLEPIE_LOCATOR_NONE)
114		{
115			$this->get_base();
116		}
117
118		if ($type & SIMPLEPIE_LOCATOR_AUTODISCOVERY && $working = $this->autodiscovery())
119		{
120			return $working[0];
121		}
122
123		if ($type & (SIMPLEPIE_LOCATOR_LOCAL_EXTENSION | SIMPLEPIE_LOCATOR_LOCAL_BODY | SIMPLEPIE_LOCATOR_REMOTE_EXTENSION | SIMPLEPIE_LOCATOR_REMOTE_BODY) && $this->get_links())
124		{
125			if ($type & SIMPLEPIE_LOCATOR_LOCAL_EXTENSION && $working = $this->extension($this->local))
126			{
127				return $working[0];
128			}
129
130			if ($type & SIMPLEPIE_LOCATOR_LOCAL_BODY && $working = $this->body($this->local))
131			{
132				return $working[0];
133			}
134
135			if ($type & SIMPLEPIE_LOCATOR_REMOTE_EXTENSION && $working = $this->extension($this->elsewhere))
136			{
137				return $working[0];
138			}
139
140			if ($type & SIMPLEPIE_LOCATOR_REMOTE_BODY && $working = $this->body($this->elsewhere))
141			{
142				return $working[0];
143			}
144		}
145		return null;
146	}
147
148	public function is_feed($file, $check_html = false)
149	{
150		if ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE)
151		{
152			$sniffer = $this->registry->create('Content_Type_Sniffer', array($file));
153			$sniffed = $sniffer->get_type();
154			$mime_types = array('application/rss+xml', 'application/rdf+xml',
155			                    'text/rdf', 'application/atom+xml', 'text/xml',
156			                    'application/xml', 'application/x-rss+xml');
157			if ($check_html)
158			{
159				$mime_types[] = 'text/html';
160			}
161
162			return in_array($sniffed, $mime_types);
163		}
164		elseif ($file->method & SIMPLEPIE_FILE_SOURCE_LOCAL)
165		{
166			return true;
167		}
168		else
169		{
170			return false;
171		}
172	}
173
174	public function get_base()
175	{
176		if ($this->dom === null)
177		{
178			throw new SimplePie_Exception('DOMDocument not found, unable to use locator');
179		}
180		$this->http_base = $this->file->url;
181		$this->base = $this->http_base;
182		$elements = $this->dom->getElementsByTagName('base');
183		foreach ($elements as $element)
184		{
185			if ($element->hasAttribute('href'))
186			{
187				$base = $this->registry->call('Misc', 'absolutize_url', array(trim($element->getAttribute('href')), $this->http_base));
188				if ($base === false)
189				{
190					continue;
191				}
192				$this->base = $base;
193				$this->base_location = method_exists($element, 'getLineNo') ? $element->getLineNo() : 0;
194				break;
195			}
196		}
197	}
198
199	public function autodiscovery()
200	{
201		$done = array();
202		$feeds = array();
203		$feeds = array_merge($feeds, $this->search_elements_by_tag('link', $done, $feeds));
204		$feeds = array_merge($feeds, $this->search_elements_by_tag('a', $done, $feeds));
205		$feeds = array_merge($feeds, $this->search_elements_by_tag('area', $done, $feeds));
206
207		if (!empty($feeds))
208		{
209			return array_values($feeds);
210		}
211
212		return null;
213	}
214
215	protected function search_elements_by_tag($name, &$done, $feeds)
216	{
217		if ($this->dom === null)
218		{
219			throw new SimplePie_Exception('DOMDocument not found, unable to use locator');
220		}
221
222		$links = $this->dom->getElementsByTagName($name);
223		foreach ($links as $link)
224		{
225			if ($this->checked_feeds === $this->max_checked_feeds)
226			{
227				break;
228			}
229			if ($link->hasAttribute('href') && $link->hasAttribute('rel'))
230			{
231				$rel = array_unique($this->registry->call('Misc', 'space_separated_tokens', array(strtolower($link->getAttribute('rel')))));
232				$line = method_exists($link, 'getLineNo') ? $link->getLineNo() : 1;
233
234				if ($this->base_location < $line)
235				{
236					$href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->base));
237				}
238				else
239				{
240					$href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->http_base));
241				}
242				if ($href === false)
243				{
244					continue;
245				}
246
247				if (!in_array($href, $done) && in_array('feed', $rel) || (in_array('alternate', $rel) && !in_array('stylesheet', $rel) && $link->hasAttribute('type') && in_array(strtolower($this->registry->call('Misc', 'parse_mime', array($link->getAttribute('type')))), array('text/html', 'application/rss+xml', 'application/atom+xml'))) && !isset($feeds[$href]))
248				{
249					$this->checked_feeds++;
250					$headers = array(
251						'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
252					);
253					$feed = $this->registry->create('File', array($href, $this->timeout, 5, $headers, $this->useragent, $this->force_fsockopen, $this->curl_options));
254					if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed, true))
255					{
256						$feeds[$href] = $feed;
257					}
258				}
259				$done[] = $href;
260			}
261		}
262
263		return $feeds;
264	}
265
266	public function get_links()
267	{
268		if ($this->dom === null)
269		{
270			throw new SimplePie_Exception('DOMDocument not found, unable to use locator');
271		}
272
273		$links = $this->dom->getElementsByTagName('a');
274		foreach ($links as $link)
275		{
276			if ($link->hasAttribute('href'))
277			{
278				$href = trim($link->getAttribute('href'));
279				$parsed = $this->registry->call('Misc', 'parse_url', array($href));
280				if ($parsed['scheme'] === '' || preg_match('/^(https?|feed)?$/i', $parsed['scheme']))
281				{
282					if (method_exists($link, 'getLineNo') && $this->base_location < $link->getLineNo())
283					{
284						$href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->base));
285					}
286					else
287					{
288						$href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->http_base));
289					}
290					if ($href === false)
291					{
292						continue;
293					}
294
295					$current = $this->registry->call('Misc', 'parse_url', array($this->file->url));
296
297					if ($parsed['authority'] === '' || $parsed['authority'] === $current['authority'])
298					{
299						$this->local[] = $href;
300					}
301					else
302					{
303						$this->elsewhere[] = $href;
304					}
305				}
306			}
307		}
308		$this->local = array_unique($this->local);
309		$this->elsewhere = array_unique($this->elsewhere);
310		if (!empty($this->local) || !empty($this->elsewhere))
311		{
312			return true;
313		}
314		return null;
315	}
316
317	public function get_rel_link($rel)
318	{
319		if ($this->dom === null)
320		{
321			throw new SimplePie_Exception('DOMDocument not found, unable to use '.
322			                              'locator');
323		}
324		if (!class_exists('DOMXpath'))
325		{
326			throw new SimplePie_Exception('DOMXpath not found, unable to use '.
327			                              'get_rel_link');
328		}
329
330		$xpath = new DOMXpath($this->dom);
331		$query = '//a[@rel and @href] | //link[@rel and @href]';
332		foreach ($xpath->query($query) as $link)
333		{
334			$href = trim($link->getAttribute('href'));
335			$parsed = $this->registry->call('Misc', 'parse_url', array($href));
336			if ($parsed['scheme'] === '' ||
337			    preg_match('/^https?$/i', $parsed['scheme']))
338			{
339				if (method_exists($link, 'getLineNo') &&
340				    $this->base_location < $link->getLineNo())
341				{
342					$href =
343						$this->registry->call('Misc', 'absolutize_url',
344						                      array(trim($link->getAttribute('href')),
345						                            $this->base));
346				}
347				else
348				{
349					$href =
350						$this->registry->call('Misc', 'absolutize_url',
351						                      array(trim($link->getAttribute('href')),
352						                            $this->http_base));
353				}
354				if ($href === false)
355				{
356					return null;
357				}
358				$rel_values = explode(' ', strtolower($link->getAttribute('rel')));
359				if (in_array($rel, $rel_values))
360				{
361					return $href;
362				}
363			}
364		}
365		return null;
366	}
367
368	public function extension(&$array)
369	{
370		foreach ($array as $key => $value)
371		{
372			if ($this->checked_feeds === $this->max_checked_feeds)
373			{
374				break;
375			}
376			if (in_array(strtolower(strrchr($value, '.')), array('.rss', '.rdf', '.atom', '.xml')))
377			{
378				$this->checked_feeds++;
379
380				$headers = array(
381					'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
382				);
383				$feed = $this->registry->create('File', array($value, $this->timeout, 5, $headers, $this->useragent, $this->force_fsockopen, $this->curl_options));
384				if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed))
385				{
386					return array($feed);
387				}
388				else
389				{
390					unset($array[$key]);
391				}
392			}
393		}
394		return null;
395	}
396
397	public function body(&$array)
398	{
399		foreach ($array as $key => $value)
400		{
401			if ($this->checked_feeds === $this->max_checked_feeds)
402			{
403				break;
404			}
405			if (preg_match('/(feed|rss|rdf|atom|xml)/i', $value))
406			{
407				$this->checked_feeds++;
408				$headers = array(
409					'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
410				);
411				$feed = $this->registry->create('File', array($value, $this->timeout, 5, null, $this->useragent, $this->force_fsockopen, $this->curl_options));
412				if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed))
413				{
414					return array($feed);
415				}
416				else
417				{
418					unset($array[$key]);
419				}
420			}
421		}
422		return null;
423	}
424}
425