1<?php
2/**
3 * Zend Framework (http://framework.zend.com/)
4 *
5 * @link      http://github.com/zendframework/zf2 for the canonical source repository
6 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
7 * @license   http://framework.zend.com/license/new-bsd New BSD License
8 */
9
10namespace Zend\Feed\Reader\Feed;
11
12use DateTime;
13use DOMDocument;
14use Zend\Feed\Reader;
15use Zend\Feed\Reader\Collection;
16use Zend\Feed\Reader\Exception;
17
18/**
19*/
20class Rss extends AbstractFeed
21{
22    /**
23     * Constructor
24     *
25     * @param  DOMDocument $dom
26     * @param  string $type
27     */
28    public function __construct(DOMDocument $dom, $type = null)
29    {
30        parent::__construct($dom, $type);
31
32        $manager = Reader\Reader::getExtensionManager();
33
34        $feed = $manager->get('DublinCore\Feed');
35        $feed->setDomDocument($dom);
36        $feed->setType($this->data['type']);
37        $feed->setXpath($this->xpath);
38        $this->extensions['DublinCore\Feed'] = $feed;
39
40        $feed = $manager->get('Atom\Feed');
41        $feed->setDomDocument($dom);
42        $feed->setType($this->data['type']);
43        $feed->setXpath($this->xpath);
44        $this->extensions['Atom\Feed'] = $feed;
45
46        if ($this->getType() !== Reader\Reader::TYPE_RSS_10
47            && $this->getType() !== Reader\Reader::TYPE_RSS_090
48        ) {
49            $xpathPrefix = '/rss/channel';
50        } else {
51            $xpathPrefix = '/rdf:RDF/rss:channel';
52        }
53        foreach ($this->extensions as $extension) {
54            $extension->setXpathPrefix($xpathPrefix);
55        }
56    }
57
58    /**
59     * Get a single author
60     *
61     * @param  int $index
62     * @return string|null
63     */
64    public function getAuthor($index = 0)
65    {
66        $authors = $this->getAuthors();
67
68        if (isset($authors[$index])) {
69            return $authors[$index];
70        }
71
72        return;
73    }
74
75    /**
76     * Get an array with feed authors
77     *
78     * @return array
79     */
80    public function getAuthors()
81    {
82        if (array_key_exists('authors', $this->data)) {
83            return $this->data['authors'];
84        }
85
86        $authors = array();
87        $authorsDc = $this->getExtension('DublinCore')->getAuthors();
88        if (!empty($authorsDc)) {
89            foreach ($authorsDc as $author) {
90                $authors[] = array(
91                    'name' => $author['name']
92                );
93            }
94        }
95
96        /**
97         * Technically RSS doesn't specific author element use at the feed level
98         * but it's supported on a "just in case" basis.
99         */
100        if ($this->getType() !== Reader\Reader::TYPE_RSS_10
101        && $this->getType() !== Reader\Reader::TYPE_RSS_090) {
102            $list = $this->xpath->query('//author');
103        } else {
104            $list = $this->xpath->query('//rss:author');
105        }
106        if ($list->length) {
107            foreach ($list as $author) {
108                $string = trim($author->nodeValue);
109                $data = array();
110                // Pretty rough parsing - but it's a catchall
111                if (preg_match("/^.*@[^ ]*/", $string, $matches)) {
112                    $data['email'] = trim($matches[0]);
113                    if (preg_match("/\((.*)\)$/", $string, $matches)) {
114                        $data['name'] = $matches[1];
115                    }
116                    $authors[] = $data;
117                }
118            }
119        }
120
121        if (count($authors) == 0) {
122            $authors = $this->getExtension('Atom')->getAuthors();
123        } else {
124            $authors = new Reader\Collection\Author(
125                Reader\Reader::arrayUnique($authors)
126            );
127        }
128
129        if (count($authors) == 0) {
130            $authors = null;
131        }
132
133        $this->data['authors'] = $authors;
134
135        return $this->data['authors'];
136    }
137
138    /**
139     * Get the copyright entry
140     *
141     * @return string|null
142     */
143    public function getCopyright()
144    {
145        if (array_key_exists('copyright', $this->data)) {
146            return $this->data['copyright'];
147        }
148
149        $copyright = null;
150
151        if ($this->getType() !== Reader\Reader::TYPE_RSS_10 &&
152            $this->getType() !== Reader\Reader::TYPE_RSS_090) {
153            $copyright = $this->xpath->evaluate('string(/rss/channel/copyright)');
154        }
155
156        if (!$copyright && $this->getExtension('DublinCore') !== null) {
157            $copyright = $this->getExtension('DublinCore')->getCopyright();
158        }
159
160        if (empty($copyright)) {
161            $copyright = $this->getExtension('Atom')->getCopyright();
162        }
163
164        if (!$copyright) {
165            $copyright = null;
166        }
167
168        $this->data['copyright'] = $copyright;
169
170        return $this->data['copyright'];
171    }
172
173    /**
174     * Get the feed creation date
175     *
176     * @return string|null
177     */
178    public function getDateCreated()
179    {
180        return $this->getDateModified();
181    }
182
183    /**
184     * Get the feed modification date
185     *
186     * @return DateTime
187     * @throws Exception\RuntimeException
188     */
189    public function getDateModified()
190    {
191        if (array_key_exists('datemodified', $this->data)) {
192            return $this->data['datemodified'];
193        }
194
195        $date = null;
196
197        if ($this->getType() !== Reader\Reader::TYPE_RSS_10 &&
198            $this->getType() !== Reader\Reader::TYPE_RSS_090) {
199            $dateModified = $this->xpath->evaluate('string(/rss/channel/pubDate)');
200            if (!$dateModified) {
201                $dateModified = $this->xpath->evaluate('string(/rss/channel/lastBuildDate)');
202            }
203            if ($dateModified) {
204                $dateModifiedParsed = strtotime($dateModified);
205                if ($dateModifiedParsed) {
206                    $date = new DateTime('@' . $dateModifiedParsed);
207                } else {
208                    $dateStandards = array(DateTime::RSS, DateTime::RFC822,
209                                           DateTime::RFC2822, null);
210                    foreach ($dateStandards as $standard) {
211                        try {
212                            $date = DateTime::createFromFormat($standard, $dateModified);
213                            break;
214                        } catch (\Exception $e) {
215                            if ($standard === null) {
216                                throw new Exception\RuntimeException(
217                                    'Could not load date due to unrecognised'
218                                    .' format (should follow RFC 822 or 2822):'
219                                    . $e->getMessage(),
220                                    0, $e
221                                );
222                            }
223                        }
224                    }
225                }
226            }
227        }
228
229        if (!$date) {
230            $date = $this->getExtension('DublinCore')->getDate();
231        }
232
233        if (!$date) {
234            $date = $this->getExtension('Atom')->getDateModified();
235        }
236
237        if (!$date) {
238            $date = null;
239        }
240
241        $this->data['datemodified'] = $date;
242
243        return $this->data['datemodified'];
244    }
245
246    /**
247     * Get the feed lastBuild date
248     *
249     * @throws Exception\RuntimeException
250     * @return DateTime
251     */
252    public function getLastBuildDate()
253    {
254        if (array_key_exists('lastBuildDate', $this->data)) {
255            return $this->data['lastBuildDate'];
256        }
257
258        $date = null;
259
260        if ($this->getType() !== Reader\Reader::TYPE_RSS_10 &&
261            $this->getType() !== Reader\Reader::TYPE_RSS_090) {
262            $lastBuildDate = $this->xpath->evaluate('string(/rss/channel/lastBuildDate)');
263            if ($lastBuildDate) {
264                $lastBuildDateParsed = strtotime($lastBuildDate);
265                if ($lastBuildDateParsed) {
266                    $date = new DateTime('@' . $lastBuildDateParsed);
267                } else {
268                    $dateStandards = array(DateTime::RSS, DateTime::RFC822,
269                                           DateTime::RFC2822, null);
270                    foreach ($dateStandards as $standard) {
271                        try {
272                            $date = DateTime::createFromFormat($standard, $lastBuildDateParsed);
273                            break;
274                        } catch (\Exception $e) {
275                            if ($standard === null) {
276                                throw new Exception\RuntimeException(
277                                    'Could not load date due to unrecognised'
278                                    .' format (should follow RFC 822 or 2822):'
279                                    . $e->getMessage(),
280                                    0, $e
281                                );
282                            }
283                        }
284                    }
285                }
286            }
287        }
288
289        if (!$date) {
290            $date = null;
291        }
292
293        $this->data['lastBuildDate'] = $date;
294
295        return $this->data['lastBuildDate'];
296    }
297
298    /**
299     * Get the feed description
300     *
301     * @return string|null
302     */
303    public function getDescription()
304    {
305        if (array_key_exists('description', $this->data)) {
306            return $this->data['description'];
307        }
308
309        if ($this->getType() !== Reader\Reader::TYPE_RSS_10 &&
310            $this->getType() !== Reader\Reader::TYPE_RSS_090) {
311            $description = $this->xpath->evaluate('string(/rss/channel/description)');
312        } else {
313            $description = $this->xpath->evaluate('string(/rdf:RDF/rss:channel/rss:description)');
314        }
315
316        if (!$description && $this->getExtension('DublinCore') !== null) {
317            $description = $this->getExtension('DublinCore')->getDescription();
318        }
319
320        if (empty($description)) {
321            $description = $this->getExtension('Atom')->getDescription();
322        }
323
324        if (!$description) {
325            $description = null;
326        }
327
328        $this->data['description'] = $description;
329
330        return $this->data['description'];
331    }
332
333    /**
334     * Get the feed ID
335     *
336     * @return string|null
337     */
338    public function getId()
339    {
340        if (array_key_exists('id', $this->data)) {
341            return $this->data['id'];
342        }
343
344        $id = null;
345
346        if ($this->getType() !== Reader\Reader::TYPE_RSS_10 &&
347            $this->getType() !== Reader\Reader::TYPE_RSS_090) {
348            $id = $this->xpath->evaluate('string(/rss/channel/guid)');
349        }
350
351        if (!$id && $this->getExtension('DublinCore') !== null) {
352            $id = $this->getExtension('DublinCore')->getId();
353        }
354
355        if (empty($id)) {
356            $id = $this->getExtension('Atom')->getId();
357        }
358
359        if (!$id) {
360            if ($this->getLink()) {
361                $id = $this->getLink();
362            } elseif ($this->getTitle()) {
363                $id = $this->getTitle();
364            } else {
365                $id = null;
366            }
367        }
368
369        $this->data['id'] = $id;
370
371        return $this->data['id'];
372    }
373
374    /**
375     * Get the feed image data
376     *
377     * @return array|null
378     */
379    public function getImage()
380    {
381        if (array_key_exists('image', $this->data)) {
382            return $this->data['image'];
383        }
384
385        if ($this->getType() !== Reader\Reader::TYPE_RSS_10 &&
386            $this->getType() !== Reader\Reader::TYPE_RSS_090) {
387            $list = $this->xpath->query('/rss/channel/image');
388            $prefix = '/rss/channel/image[1]';
389        } else {
390            $list = $this->xpath->query('/rdf:RDF/rss:channel/rss:image');
391            $prefix = '/rdf:RDF/rss:channel/rss:image[1]';
392        }
393        if ($list->length > 0) {
394            $image = array();
395            $value = $this->xpath->evaluate('string(' . $prefix . '/url)');
396            if ($value) {
397                $image['uri'] = $value;
398            }
399            $value = $this->xpath->evaluate('string(' . $prefix . '/link)');
400            if ($value) {
401                $image['link'] = $value;
402            }
403            $value = $this->xpath->evaluate('string(' . $prefix . '/title)');
404            if ($value) {
405                $image['title'] = $value;
406            }
407            $value = $this->xpath->evaluate('string(' . $prefix . '/height)');
408            if ($value) {
409                $image['height'] = $value;
410            }
411            $value = $this->xpath->evaluate('string(' . $prefix . '/width)');
412            if ($value) {
413                $image['width'] = $value;
414            }
415            $value = $this->xpath->evaluate('string(' . $prefix . '/description)');
416            if ($value) {
417                $image['description'] = $value;
418            }
419        } else {
420            $image = null;
421        }
422
423        $this->data['image'] = $image;
424
425        return $this->data['image'];
426    }
427
428    /**
429     * Get the feed language
430     *
431     * @return string|null
432     */
433    public function getLanguage()
434    {
435        if (array_key_exists('language', $this->data)) {
436            return $this->data['language'];
437        }
438
439        $language = null;
440
441        if ($this->getType() !== Reader\Reader::TYPE_RSS_10 &&
442            $this->getType() !== Reader\Reader::TYPE_RSS_090) {
443            $language = $this->xpath->evaluate('string(/rss/channel/language)');
444        }
445
446        if (!$language && $this->getExtension('DublinCore') !== null) {
447            $language = $this->getExtension('DublinCore')->getLanguage();
448        }
449
450        if (empty($language)) {
451            $language = $this->getExtension('Atom')->getLanguage();
452        }
453
454        if (!$language) {
455            $language = $this->xpath->evaluate('string(//@xml:lang[1])');
456        }
457
458        if (!$language) {
459            $language = null;
460        }
461
462        $this->data['language'] = $language;
463
464        return $this->data['language'];
465    }
466
467    /**
468     * Get a link to the feed
469     *
470     * @return string|null
471     */
472    public function getLink()
473    {
474        if (array_key_exists('link', $this->data)) {
475            return $this->data['link'];
476        }
477
478        if ($this->getType() !== Reader\Reader::TYPE_RSS_10 &&
479            $this->getType() !== Reader\Reader::TYPE_RSS_090) {
480            $link = $this->xpath->evaluate('string(/rss/channel/link)');
481        } else {
482            $link = $this->xpath->evaluate('string(/rdf:RDF/rss:channel/rss:link)');
483        }
484
485        if (empty($link)) {
486            $link = $this->getExtension('Atom')->getLink();
487        }
488
489        if (!$link) {
490            $link = null;
491        }
492
493        $this->data['link'] = $link;
494
495        return $this->data['link'];
496    }
497
498    /**
499     * Get a link to the feed XML
500     *
501     * @return string|null
502     */
503    public function getFeedLink()
504    {
505        if (array_key_exists('feedlink', $this->data)) {
506            return $this->data['feedlink'];
507        }
508
509        $link = $this->getExtension('Atom')->getFeedLink();
510
511        if ($link === null || empty($link)) {
512            $link = $this->getOriginalSourceUri();
513        }
514
515        $this->data['feedlink'] = $link;
516
517        return $this->data['feedlink'];
518    }
519
520    /**
521     * Get the feed generator entry
522     *
523     * @return string|null
524     */
525    public function getGenerator()
526    {
527        if (array_key_exists('generator', $this->data)) {
528            return $this->data['generator'];
529        }
530
531        $generator = null;
532
533        if ($this->getType() !== Reader\Reader::TYPE_RSS_10 &&
534            $this->getType() !== Reader\Reader::TYPE_RSS_090) {
535            $generator = $this->xpath->evaluate('string(/rss/channel/generator)');
536        }
537
538        if (!$generator) {
539            if ($this->getType() !== Reader\Reader::TYPE_RSS_10 &&
540            $this->getType() !== Reader\Reader::TYPE_RSS_090) {
541                $generator = $this->xpath->evaluate('string(/rss/channel/atom:generator)');
542            } else {
543                $generator = $this->xpath->evaluate('string(/rdf:RDF/rss:channel/atom:generator)');
544            }
545        }
546
547        if (empty($generator)) {
548            $generator = $this->getExtension('Atom')->getGenerator();
549        }
550
551        if (!$generator) {
552            $generator = null;
553        }
554
555        $this->data['generator'] = $generator;
556
557        return $this->data['generator'];
558    }
559
560    /**
561     * Get the feed title
562     *
563     * @return string|null
564     */
565    public function getTitle()
566    {
567        if (array_key_exists('title', $this->data)) {
568            return $this->data['title'];
569        }
570
571        if ($this->getType() !== Reader\Reader::TYPE_RSS_10 &&
572            $this->getType() !== Reader\Reader::TYPE_RSS_090) {
573            $title = $this->xpath->evaluate('string(/rss/channel/title)');
574        } else {
575            $title = $this->xpath->evaluate('string(/rdf:RDF/rss:channel/rss:title)');
576        }
577
578        if (!$title && $this->getExtension('DublinCore') !== null) {
579            $title = $this->getExtension('DublinCore')->getTitle();
580        }
581
582        if (!$title) {
583            $title = $this->getExtension('Atom')->getTitle();
584        }
585
586        if (!$title) {
587            $title = null;
588        }
589
590        $this->data['title'] = $title;
591
592        return $this->data['title'];
593    }
594
595    /**
596     * Get an array of any supported Pusubhubbub endpoints
597     *
598     * @return array|null
599     */
600    public function getHubs()
601    {
602        if (array_key_exists('hubs', $this->data)) {
603            return $this->data['hubs'];
604        }
605
606        $hubs = $this->getExtension('Atom')->getHubs();
607
608        if (empty($hubs)) {
609            $hubs = null;
610        } else {
611            $hubs = array_unique($hubs);
612        }
613
614        $this->data['hubs'] = $hubs;
615
616        return $this->data['hubs'];
617    }
618
619    /**
620     * Get all categories
621     *
622     * @return Reader\Collection\Category
623     */
624    public function getCategories()
625    {
626        if (array_key_exists('categories', $this->data)) {
627            return $this->data['categories'];
628        }
629
630        if ($this->getType() !== Reader\Reader::TYPE_RSS_10 &&
631            $this->getType() !== Reader\Reader::TYPE_RSS_090) {
632            $list = $this->xpath->query('/rss/channel//category');
633        } else {
634            $list = $this->xpath->query('/rdf:RDF/rss:channel//rss:category');
635        }
636
637        if ($list->length) {
638            $categoryCollection = new Collection\Category;
639            foreach ($list as $category) {
640                $categoryCollection[] = array(
641                    'term' => $category->nodeValue,
642                    'scheme' => $category->getAttribute('domain'),
643                    'label' => $category->nodeValue,
644                );
645            }
646        } else {
647            $categoryCollection = $this->getExtension('DublinCore')->getCategories();
648        }
649
650        if (count($categoryCollection) == 0) {
651            $categoryCollection = $this->getExtension('Atom')->getCategories();
652        }
653
654        $this->data['categories'] = $categoryCollection;
655
656        return $this->data['categories'];
657    }
658
659    /**
660     * Read all entries to the internal entries array
661     *
662     */
663    protected function indexEntries()
664    {
665        if ($this->getType() !== Reader\Reader::TYPE_RSS_10 && $this->getType() !== Reader\Reader::TYPE_RSS_090) {
666            $entries = $this->xpath->evaluate('//item');
667        } else {
668            $entries = $this->xpath->evaluate('//rss:item');
669        }
670
671        foreach ($entries as $index => $entry) {
672            $this->entries[$index] = $entry;
673        }
674    }
675
676    /**
677     * Register the default namespaces for the current feed format
678     *
679     */
680    protected function registerNamespaces()
681    {
682        switch ($this->data['type']) {
683            case Reader\Reader::TYPE_RSS_10:
684                $this->xpath->registerNamespace('rdf', Reader\Reader::NAMESPACE_RDF);
685                $this->xpath->registerNamespace('rss', Reader\Reader::NAMESPACE_RSS_10);
686                break;
687
688            case Reader\Reader::TYPE_RSS_090:
689                $this->xpath->registerNamespace('rdf', Reader\Reader::NAMESPACE_RDF);
690                $this->xpath->registerNamespace('rss', Reader\Reader::NAMESPACE_RSS_090);
691                break;
692        }
693    }
694}
695