1<?php
2
3/**
4 * @see       https://github.com/laminas/laminas-feed for the canonical source repository
5 * @copyright https://github.com/laminas/laminas-feed/blob/master/COPYRIGHT.md
6 * @license   https://github.com/laminas/laminas-feed/blob/master/LICENSE.md New BSD License
7 */
8
9namespace Laminas\Feed\Reader\Feed;
10
11use DateTime;
12use DOMDocument;
13use Laminas\Feed\Reader;
14use Laminas\Feed\Reader\Collection;
15use Laminas\Feed\Reader\Exception;
16
17class Rss extends AbstractFeed
18{
19    /**
20     * @param null|string $type
21     */
22    public function __construct(DOMDocument $dom, $type = null)
23    {
24        parent::__construct($dom, $type);
25
26        $manager = Reader\Reader::getExtensionManager();
27
28        $feed = $manager->get('DublinCore\Feed');
29        $feed->setDomDocument($dom);
30        $feed->setType($this->data['type']);
31        $feed->setXpath($this->xpath);
32        $this->extensions['DublinCore\Feed'] = $feed;
33
34        $feed = $manager->get('Atom\Feed');
35        $feed->setDomDocument($dom);
36        $feed->setType($this->data['type']);
37        $feed->setXpath($this->xpath);
38        $this->extensions['Atom\Feed'] = $feed;
39
40        if ($this->getType() !== Reader\Reader::TYPE_RSS_10
41            && $this->getType() !== Reader\Reader::TYPE_RSS_090
42        ) {
43            $xpathPrefix = '/rss/channel';
44        } else {
45            $xpathPrefix = '/rdf:RDF/rss:channel';
46        }
47        foreach ($this->extensions as $extension) {
48            $extension->setXpathPrefix($xpathPrefix);
49        }
50    }
51
52    /**
53     * Get a single author
54     *
55     * @param  int $index
56     * @return null|string
57     */
58    public function getAuthor($index = 0)
59    {
60        $authors = $this->getAuthors();
61
62        if (isset($authors[$index])) {
63            return $authors[$index];
64        }
65
66        return;
67    }
68
69    /**
70     * Get an array with feed authors
71     *
72     * @return array
73     */
74    public function getAuthors()
75    {
76        if (array_key_exists('authors', $this->data)) {
77            return $this->data['authors'];
78        }
79
80        $authors   = [];
81        $authorsDc = $this->getExtension('DublinCore')->getAuthors();
82        if (! empty($authorsDc)) {
83            foreach ($authorsDc as $author) {
84                $authors[] = [
85                    'name' => $author['name'],
86                ];
87            }
88        }
89
90        /**
91         * Technically RSS doesn't specific author element use at the feed level
92         * but it's supported on a "just in case" basis.
93         */
94        if ($this->getType() !== Reader\Reader::TYPE_RSS_10
95            && $this->getType() !== Reader\Reader::TYPE_RSS_090
96        ) {
97            $list = $this->xpath->query('//author');
98        } else {
99            $list = $this->xpath->query('//rss:author');
100        }
101        if ($list->length) {
102            foreach ($list as $author) {
103                $string = trim($author->nodeValue);
104                $data   = [];
105                // Pretty rough parsing - but it's a catchall
106                if (preg_match('/^.*@[^ ]*/', $string, $matches)) {
107                    $data['email'] = trim($matches[0]);
108                    if (preg_match('/\((.*)\)$/', $string, $matches)) {
109                        $data['name'] = $matches[1];
110                    }
111                    $authors[] = $data;
112                }
113            }
114        }
115
116        if (count($authors) === 0) {
117            $authors = $this->getExtension('Atom')->getAuthors();
118        } else {
119            $authors = new Reader\Collection\Author(
120                Reader\Reader::arrayUnique($authors)
121            );
122        }
123
124        if (count($authors) === 0) {
125            $authors = null;
126        }
127
128        $this->data['authors'] = $authors;
129
130        return $this->data['authors'];
131    }
132
133    /**
134     * Get the copyright entry
135     *
136     * @return null|string
137     */
138    public function getCopyright()
139    {
140        if (array_key_exists('copyright', $this->data)) {
141            return $this->data['copyright'];
142        }
143
144        $copyright = null;
145
146        if ($this->getType() !== Reader\Reader::TYPE_RSS_10
147            && $this->getType() !== Reader\Reader::TYPE_RSS_090
148        ) {
149            $copyright = $this->xpath->evaluate('string(/rss/channel/copyright)');
150        }
151
152        if (! $copyright && $this->getExtension('DublinCore') !== null) {
153            $copyright = $this->getExtension('DublinCore')->getCopyright();
154        }
155
156        if (empty($copyright)) {
157            $copyright = $this->getExtension('Atom')->getCopyright();
158        }
159
160        if (! $copyright) {
161            $copyright = null;
162        }
163
164        $this->data['copyright'] = $copyright;
165
166        return $this->data['copyright'];
167    }
168
169    /**
170     * Get the feed creation date
171     *
172     * @return null|DateTime
173     */
174    public function getDateCreated()
175    {
176        return $this->getDateModified();
177    }
178
179    /**
180     * Get the feed modification date
181     *
182     * @return DateTime
183     * @throws Exception\RuntimeException
184     */
185    public function getDateModified()
186    {
187        if (array_key_exists('datemodified', $this->data)) {
188            return $this->data['datemodified'];
189        }
190
191        $date = null;
192
193        if ($this->getType() !== Reader\Reader::TYPE_RSS_10
194            && $this->getType() !== Reader\Reader::TYPE_RSS_090
195        ) {
196            $dateModified = $this->xpath->evaluate('string(/rss/channel/pubDate)');
197            if (! $dateModified) {
198                $dateModified = $this->xpath->evaluate('string(/rss/channel/lastBuildDate)');
199            }
200            if ($dateModified) {
201                $dateModifiedParsed = strtotime($dateModified);
202                if ($dateModifiedParsed) {
203                    $date = new DateTime('@' . $dateModifiedParsed);
204                } else {
205                    $dateStandards = [
206                        DateTime::RSS,
207                        DateTime::RFC822,
208                        DateTime::RFC2822,
209                        null,
210                    ];
211                    foreach ($dateStandards as $standard) {
212                        try {
213                            $date = DateTime::createFromFormat($standard, $dateModified);
214                            break;
215                        } catch (\Exception $e) {
216                            if ($standard === null) {
217                                throw new Exception\RuntimeException(
218                                    'Could not load date due to unrecognised format'
219                                    . ' (should follow RFC 822 or 2822): ' . $e->getMessage(),
220                                    0,
221                                    $e
222                                );
223                            }
224                        }
225                    }
226                }
227            }
228        }
229
230        if (! $date) {
231            $date = $this->getExtension('DublinCore')->getDate();
232        }
233
234        if (! $date) {
235            $date = $this->getExtension('Atom')->getDateModified();
236        }
237
238        if (! $date) {
239            $date = null;
240        }
241
242        $this->data['datemodified'] = $date;
243
244        return $this->data['datemodified'];
245    }
246
247    /**
248     * Get the feed lastBuild date
249     *
250     * @return DateTime
251     * @throws Exception\RuntimeException
252     */
253    public function getLastBuildDate()
254    {
255        if (array_key_exists('lastBuildDate', $this->data)) {
256            return $this->data['lastBuildDate'];
257        }
258
259        $date = null;
260
261        if ($this->getType() !== Reader\Reader::TYPE_RSS_10
262            && $this->getType() !== Reader\Reader::TYPE_RSS_090
263        ) {
264            $lastBuildDate = $this->xpath->evaluate('string(/rss/channel/lastBuildDate)');
265            if ($lastBuildDate) {
266                $lastBuildDateParsed = strtotime($lastBuildDate);
267                if ($lastBuildDateParsed) {
268                    $date = new DateTime('@' . $lastBuildDateParsed);
269                } else {
270                    $dateStandards = [
271                        DateTime::RSS,
272                        DateTime::RFC822,
273                        DateTime::RFC2822,
274                        null,
275                    ];
276                    foreach ($dateStandards as $standard) {
277                        try {
278                            $date = DateTime::createFromFormat($standard, $lastBuildDateParsed);
279                            break;
280                        } catch (\Exception $e) {
281                            if ($standard === null) {
282                                throw new Exception\RuntimeException(
283                                    'Could not load date due to unrecognised format'
284                                    . ' (should follow RFC 822 or 2822): ' . $e->getMessage(),
285                                    0,
286                                    $e
287                                );
288                            }
289                        }
290                    }
291                }
292            }
293        }
294
295        if (! $date) {
296            $date = null;
297        }
298
299        $this->data['lastBuildDate'] = $date;
300
301        return $this->data['lastBuildDate'];
302    }
303
304    /**
305     * Get the feed description
306     *
307     * @return null|string
308     */
309    public function getDescription()
310    {
311        if (array_key_exists('description', $this->data)) {
312            return $this->data['description'];
313        }
314
315        if ($this->getType() !== Reader\Reader::TYPE_RSS_10
316            && $this->getType() !== Reader\Reader::TYPE_RSS_090
317        ) {
318            $description = $this->xpath->evaluate('string(/rss/channel/description)');
319        } else {
320            $description = $this->xpath->evaluate('string(/rdf:RDF/rss:channel/rss:description)');
321        }
322
323        if (! $description && $this->getExtension('DublinCore') !== null) {
324            $description = $this->getExtension('DublinCore')->getDescription();
325        }
326
327        if (empty($description)) {
328            $description = $this->getExtension('Atom')->getDescription();
329        }
330
331        if (! $description) {
332            $description = null;
333        }
334
335        $this->data['description'] = $description;
336
337        return $this->data['description'];
338    }
339
340    /**
341     * Get the feed ID
342     *
343     * @return null|string
344     */
345    public function getId()
346    {
347        if (array_key_exists('id', $this->data)) {
348            return $this->data['id'];
349        }
350
351        $id = null;
352
353        if ($this->getType() !== Reader\Reader::TYPE_RSS_10
354            && $this->getType() !== Reader\Reader::TYPE_RSS_090
355        ) {
356            $id = $this->xpath->evaluate('string(/rss/channel/guid)');
357        }
358
359        if (! $id && $this->getExtension('DublinCore') !== null) {
360            $id = $this->getExtension('DublinCore')->getId();
361        }
362
363        if (empty($id)) {
364            $id = $this->getExtension('Atom')->getId();
365        }
366
367        if (! $id) {
368            if ($this->getLink()) {
369                $id = $this->getLink();
370            } elseif ($this->getTitle()) {
371                $id = $this->getTitle();
372            } else {
373                $id = null;
374            }
375        }
376
377        $this->data['id'] = $id;
378
379        return $this->data['id'];
380    }
381
382    /**
383     * Get the feed image data
384     *
385     * @return null|array
386     */
387    public function getImage()
388    {
389        if (array_key_exists('image', $this->data)) {
390            return $this->data['image'];
391        }
392
393        if ($this->getType() !== Reader\Reader::TYPE_RSS_10
394            && $this->getType() !== Reader\Reader::TYPE_RSS_090
395        ) {
396            $list   = $this->xpath->query('/rss/channel/image');
397            $prefix = '/rss/channel/image[1]';
398        } else {
399            $list   = $this->xpath->query('/rdf:RDF/rss:channel/rss:image');
400            $prefix = '/rdf:RDF/rss:channel/rss:image[1]';
401        }
402        if ($list->length > 0) {
403            $image = [];
404            $value = $this->xpath->evaluate('string(' . $prefix . '/url)');
405            if ($value) {
406                $image['uri'] = $value;
407            }
408            $value = $this->xpath->evaluate('string(' . $prefix . '/link)');
409            if ($value) {
410                $image['link'] = $value;
411            }
412            $value = $this->xpath->evaluate('string(' . $prefix . '/title)');
413            if ($value) {
414                $image['title'] = $value;
415            }
416            $value = $this->xpath->evaluate('string(' . $prefix . '/height)');
417            if ($value) {
418                $image['height'] = $value;
419            }
420            $value = $this->xpath->evaluate('string(' . $prefix . '/width)');
421            if ($value) {
422                $image['width'] = $value;
423            }
424            $value = $this->xpath->evaluate('string(' . $prefix . '/description)');
425            if ($value) {
426                $image['description'] = $value;
427            }
428        } else {
429            $image = null;
430        }
431
432        $this->data['image'] = $image;
433
434        return $this->data['image'];
435    }
436
437    /**
438     * Get the feed language
439     *
440     * @return null|string
441     */
442    public function getLanguage()
443    {
444        if (array_key_exists('language', $this->data)) {
445            return $this->data['language'];
446        }
447
448        $language = null;
449
450        if ($this->getType() !== Reader\Reader::TYPE_RSS_10
451            && $this->getType() !== Reader\Reader::TYPE_RSS_090
452        ) {
453            $language = $this->xpath->evaluate('string(/rss/channel/language)');
454        }
455
456        if (! $language && $this->getExtension('DublinCore') !== null) {
457            $language = $this->getExtension('DublinCore')->getLanguage();
458        }
459
460        if (empty($language)) {
461            $language = $this->getExtension('Atom')->getLanguage();
462        }
463
464        if (! $language) {
465            $language = $this->xpath->evaluate('string(//@xml:lang[1])');
466        }
467
468        if (! $language) {
469            $language = null;
470        }
471
472        $this->data['language'] = $language;
473
474        return $this->data['language'];
475    }
476
477    /**
478     * Get a link to the feed
479     *
480     * @return null|string
481     */
482    public function getLink()
483    {
484        if (array_key_exists('link', $this->data)) {
485            return $this->data['link'];
486        }
487
488        if ($this->getType() !== Reader\Reader::TYPE_RSS_10
489            && $this->getType() !== Reader\Reader::TYPE_RSS_090
490        ) {
491            $link = $this->xpath->evaluate('string(/rss/channel/link)');
492        } else {
493            $link = $this->xpath->evaluate('string(/rdf:RDF/rss:channel/rss:link)');
494        }
495
496        if (empty($link)) {
497            $link = $this->getExtension('Atom')->getLink();
498        }
499
500        if (! $link) {
501            $link = null;
502        }
503
504        $this->data['link'] = $link;
505
506        return $this->data['link'];
507    }
508
509    /**
510     * Get a link to the feed XML
511     *
512     * @return null|string
513     */
514    public function getFeedLink()
515    {
516        if (array_key_exists('feedlink', $this->data)) {
517            return $this->data['feedlink'];
518        }
519
520        $link = $this->getExtension('Atom')->getFeedLink();
521
522        if ($link === null || empty($link)) {
523            $link = $this->getOriginalSourceUri();
524        }
525
526        $this->data['feedlink'] = $link;
527
528        return $this->data['feedlink'];
529    }
530
531    /**
532     * Get the feed generator entry
533     *
534     * @return null|string
535     */
536    public function getGenerator()
537    {
538        if (array_key_exists('generator', $this->data)) {
539            return $this->data['generator'];
540        }
541
542        $generator = null;
543
544        if ($this->getType() !== Reader\Reader::TYPE_RSS_10
545            && $this->getType() !== Reader\Reader::TYPE_RSS_090
546        ) {
547            $generator = $this->xpath->evaluate('string(/rss/channel/generator)');
548        }
549
550        if (! $generator) {
551            if ($this->getType() !== Reader\Reader::TYPE_RSS_10
552                && $this->getType() !== Reader\Reader::TYPE_RSS_090
553            ) {
554                $generator = $this->xpath->evaluate('string(/rss/channel/atom:generator)');
555            } else {
556                $generator = $this->xpath->evaluate('string(/rdf:RDF/rss:channel/atom:generator)');
557            }
558        }
559
560        if (empty($generator)) {
561            $generator = $this->getExtension('Atom')->getGenerator();
562        }
563
564        if (! $generator) {
565            $generator = null;
566        }
567
568        $this->data['generator'] = $generator;
569
570        return $this->data['generator'];
571    }
572
573    /**
574     * Get the feed title
575     *
576     * @return null|string
577     */
578    public function getTitle()
579    {
580        if (array_key_exists('title', $this->data)) {
581            return $this->data['title'];
582        }
583
584        if ($this->getType() !== Reader\Reader::TYPE_RSS_10
585            && $this->getType() !== Reader\Reader::TYPE_RSS_090
586        ) {
587            $title = $this->xpath->evaluate('string(/rss/channel/title)');
588        } else {
589            $title = $this->xpath->evaluate('string(/rdf:RDF/rss:channel/rss:title)');
590        }
591
592        if (! $title && $this->getExtension('DublinCore') !== null) {
593            $title = $this->getExtension('DublinCore')->getTitle();
594        }
595
596        if (! $title) {
597            $title = $this->getExtension('Atom')->getTitle();
598        }
599
600        if (! $title) {
601            $title = null;
602        }
603
604        $this->data['title'] = $title;
605
606        return $this->data['title'];
607    }
608
609    /**
610     * Get an array of any supported Pusubhubbub endpoints
611     *
612     * @return null|array
613     */
614    public function getHubs()
615    {
616        if (array_key_exists('hubs', $this->data)) {
617            return $this->data['hubs'];
618        }
619
620        $hubs = $this->getExtension('Atom')->getHubs();
621
622        if (empty($hubs)) {
623            $hubs = null;
624        } else {
625            $hubs = array_unique($hubs);
626        }
627
628        $this->data['hubs'] = $hubs;
629
630        return $this->data['hubs'];
631    }
632
633    /**
634     * Get all categories
635     *
636     * @return Reader\Collection\Category
637     */
638    public function getCategories()
639    {
640        if (array_key_exists('categories', $this->data)) {
641            return $this->data['categories'];
642        }
643
644        if ($this->getType() !== Reader\Reader::TYPE_RSS_10
645            && $this->getType() !== Reader\Reader::TYPE_RSS_090
646        ) {
647            $list = $this->xpath->query('/rss/channel//category');
648        } else {
649            $list = $this->xpath->query('/rdf:RDF/rss:channel//rss:category');
650        }
651
652        if ($list->length) {
653            $categoryCollection = new Collection\Category();
654            foreach ($list as $category) {
655                $categoryCollection[] = [
656                    'term'   => $category->nodeValue,
657                    'scheme' => $category->getAttribute('domain'),
658                    'label'  => $category->nodeValue,
659                ];
660            }
661        } else {
662            $categoryCollection = $this->getExtension('DublinCore')->getCategories();
663        }
664
665        if (count($categoryCollection) === 0) {
666            $categoryCollection = $this->getExtension('Atom')->getCategories();
667        }
668
669        $this->data['categories'] = $categoryCollection;
670
671        return $this->data['categories'];
672    }
673
674    /**
675     * Read all entries to the internal entries array
676     */
677    protected function indexEntries()
678    {
679        if ($this->getType() !== Reader\Reader::TYPE_RSS_10 && $this->getType() !== Reader\Reader::TYPE_RSS_090) {
680            $entries = $this->xpath->evaluate('//item');
681        } else {
682            $entries = $this->xpath->evaluate('//rss:item');
683        }
684
685        foreach ($entries as $index => $entry) {
686            $this->entries[$index] = $entry;
687        }
688    }
689
690    /**
691     * Register the default namespaces for the current feed format
692     */
693    protected function registerNamespaces()
694    {
695        switch ($this->data['type']) {
696            case Reader\Reader::TYPE_RSS_10:
697                $this->xpath->registerNamespace('rdf', Reader\Reader::NAMESPACE_RDF);
698                $this->xpath->registerNamespace('rss', Reader\Reader::NAMESPACE_RSS_10);
699                break;
700
701            case Reader\Reader::TYPE_RSS_090:
702                $this->xpath->registerNamespace('rdf', Reader\Reader::NAMESPACE_RDF);
703                $this->xpath->registerNamespace('rss', Reader\Reader::NAMESPACE_RSS_090);
704                break;
705        }
706    }
707}
708