1<?php 2 3/** 4 * @see https://github.com/laminas/laminas-feed for the canonical source repository 5 * @copyright https://github.com/laminas/laminas-feed/blob/master/COPYRIGHT.md 6 * @license https://github.com/laminas/laminas-feed/blob/master/LICENSE.md New BSD License 7 */ 8 9namespace Laminas\Feed\Reader\Feed; 10 11use DateTime; 12use DOMDocument; 13use Laminas\Feed\Reader; 14use Laminas\Feed\Reader\Collection; 15use Laminas\Feed\Reader\Exception; 16 17class Rss extends AbstractFeed 18{ 19 /** 20 * @param null|string $type 21 */ 22 public function __construct(DOMDocument $dom, $type = null) 23 { 24 parent::__construct($dom, $type); 25 26 $manager = Reader\Reader::getExtensionManager(); 27 28 $feed = $manager->get('DublinCore\Feed'); 29 $feed->setDomDocument($dom); 30 $feed->setType($this->data['type']); 31 $feed->setXpath($this->xpath); 32 $this->extensions['DublinCore\Feed'] = $feed; 33 34 $feed = $manager->get('Atom\Feed'); 35 $feed->setDomDocument($dom); 36 $feed->setType($this->data['type']); 37 $feed->setXpath($this->xpath); 38 $this->extensions['Atom\Feed'] = $feed; 39 40 if ($this->getType() !== Reader\Reader::TYPE_RSS_10 41 && $this->getType() !== Reader\Reader::TYPE_RSS_090 42 ) { 43 $xpathPrefix = '/rss/channel'; 44 } else { 45 $xpathPrefix = '/rdf:RDF/rss:channel'; 46 } 47 foreach ($this->extensions as $extension) { 48 $extension->setXpathPrefix($xpathPrefix); 49 } 50 } 51 52 /** 53 * Get a single author 54 * 55 * @param int $index 56 * @return null|string 57 */ 58 public function getAuthor($index = 0) 59 { 60 $authors = $this->getAuthors(); 61 62 if (isset($authors[$index])) { 63 return $authors[$index]; 64 } 65 66 return; 67 } 68 69 /** 70 * Get an array with feed authors 71 * 72 * @return array 73 */ 74 public function getAuthors() 75 { 76 if (array_key_exists('authors', $this->data)) { 77 return $this->data['authors']; 78 } 79 80 $authors = []; 81 $authorsDc = $this->getExtension('DublinCore')->getAuthors(); 82 if (! empty($authorsDc)) { 83 foreach ($authorsDc as $author) { 84 $authors[] = [ 85 'name' => $author['name'], 86 ]; 87 } 88 } 89 90 /** 91 * Technically RSS doesn't specific author element use at the feed level 92 * but it's supported on a "just in case" basis. 93 */ 94 if ($this->getType() !== Reader\Reader::TYPE_RSS_10 95 && $this->getType() !== Reader\Reader::TYPE_RSS_090 96 ) { 97 $list = $this->xpath->query('//author'); 98 } else { 99 $list = $this->xpath->query('//rss:author'); 100 } 101 if ($list->length) { 102 foreach ($list as $author) { 103 $string = trim($author->nodeValue); 104 $data = []; 105 // Pretty rough parsing - but it's a catchall 106 if (preg_match('/^.*@[^ ]*/', $string, $matches)) { 107 $data['email'] = trim($matches[0]); 108 if (preg_match('/\((.*)\)$/', $string, $matches)) { 109 $data['name'] = $matches[1]; 110 } 111 $authors[] = $data; 112 } 113 } 114 } 115 116 if (count($authors) === 0) { 117 $authors = $this->getExtension('Atom')->getAuthors(); 118 } else { 119 $authors = new Reader\Collection\Author( 120 Reader\Reader::arrayUnique($authors) 121 ); 122 } 123 124 if (count($authors) === 0) { 125 $authors = null; 126 } 127 128 $this->data['authors'] = $authors; 129 130 return $this->data['authors']; 131 } 132 133 /** 134 * Get the copyright entry 135 * 136 * @return null|string 137 */ 138 public function getCopyright() 139 { 140 if (array_key_exists('copyright', $this->data)) { 141 return $this->data['copyright']; 142 } 143 144 $copyright = null; 145 146 if ($this->getType() !== Reader\Reader::TYPE_RSS_10 147 && $this->getType() !== Reader\Reader::TYPE_RSS_090 148 ) { 149 $copyright = $this->xpath->evaluate('string(/rss/channel/copyright)'); 150 } 151 152 if (! $copyright && $this->getExtension('DublinCore') !== null) { 153 $copyright = $this->getExtension('DublinCore')->getCopyright(); 154 } 155 156 if (empty($copyright)) { 157 $copyright = $this->getExtension('Atom')->getCopyright(); 158 } 159 160 if (! $copyright) { 161 $copyright = null; 162 } 163 164 $this->data['copyright'] = $copyright; 165 166 return $this->data['copyright']; 167 } 168 169 /** 170 * Get the feed creation date 171 * 172 * @return null|DateTime 173 */ 174 public function getDateCreated() 175 { 176 return $this->getDateModified(); 177 } 178 179 /** 180 * Get the feed modification date 181 * 182 * @return DateTime 183 * @throws Exception\RuntimeException 184 */ 185 public function getDateModified() 186 { 187 if (array_key_exists('datemodified', $this->data)) { 188 return $this->data['datemodified']; 189 } 190 191 $date = null; 192 193 if ($this->getType() !== Reader\Reader::TYPE_RSS_10 194 && $this->getType() !== Reader\Reader::TYPE_RSS_090 195 ) { 196 $dateModified = $this->xpath->evaluate('string(/rss/channel/pubDate)'); 197 if (! $dateModified) { 198 $dateModified = $this->xpath->evaluate('string(/rss/channel/lastBuildDate)'); 199 } 200 if ($dateModified) { 201 $dateModifiedParsed = strtotime($dateModified); 202 if ($dateModifiedParsed) { 203 $date = new DateTime('@' . $dateModifiedParsed); 204 } else { 205 $dateStandards = [ 206 DateTime::RSS, 207 DateTime::RFC822, 208 DateTime::RFC2822, 209 null, 210 ]; 211 foreach ($dateStandards as $standard) { 212 try { 213 $date = DateTime::createFromFormat($standard, $dateModified); 214 break; 215 } catch (\Exception $e) { 216 if ($standard === null) { 217 throw new Exception\RuntimeException( 218 'Could not load date due to unrecognised format' 219 . ' (should follow RFC 822 or 2822): ' . $e->getMessage(), 220 0, 221 $e 222 ); 223 } 224 } 225 } 226 } 227 } 228 } 229 230 if (! $date) { 231 $date = $this->getExtension('DublinCore')->getDate(); 232 } 233 234 if (! $date) { 235 $date = $this->getExtension('Atom')->getDateModified(); 236 } 237 238 if (! $date) { 239 $date = null; 240 } 241 242 $this->data['datemodified'] = $date; 243 244 return $this->data['datemodified']; 245 } 246 247 /** 248 * Get the feed lastBuild date 249 * 250 * @return DateTime 251 * @throws Exception\RuntimeException 252 */ 253 public function getLastBuildDate() 254 { 255 if (array_key_exists('lastBuildDate', $this->data)) { 256 return $this->data['lastBuildDate']; 257 } 258 259 $date = null; 260 261 if ($this->getType() !== Reader\Reader::TYPE_RSS_10 262 && $this->getType() !== Reader\Reader::TYPE_RSS_090 263 ) { 264 $lastBuildDate = $this->xpath->evaluate('string(/rss/channel/lastBuildDate)'); 265 if ($lastBuildDate) { 266 $lastBuildDateParsed = strtotime($lastBuildDate); 267 if ($lastBuildDateParsed) { 268 $date = new DateTime('@' . $lastBuildDateParsed); 269 } else { 270 $dateStandards = [ 271 DateTime::RSS, 272 DateTime::RFC822, 273 DateTime::RFC2822, 274 null, 275 ]; 276 foreach ($dateStandards as $standard) { 277 try { 278 $date = DateTime::createFromFormat($standard, $lastBuildDateParsed); 279 break; 280 } catch (\Exception $e) { 281 if ($standard === null) { 282 throw new Exception\RuntimeException( 283 'Could not load date due to unrecognised format' 284 . ' (should follow RFC 822 or 2822): ' . $e->getMessage(), 285 0, 286 $e 287 ); 288 } 289 } 290 } 291 } 292 } 293 } 294 295 if (! $date) { 296 $date = null; 297 } 298 299 $this->data['lastBuildDate'] = $date; 300 301 return $this->data['lastBuildDate']; 302 } 303 304 /** 305 * Get the feed description 306 * 307 * @return null|string 308 */ 309 public function getDescription() 310 { 311 if (array_key_exists('description', $this->data)) { 312 return $this->data['description']; 313 } 314 315 if ($this->getType() !== Reader\Reader::TYPE_RSS_10 316 && $this->getType() !== Reader\Reader::TYPE_RSS_090 317 ) { 318 $description = $this->xpath->evaluate('string(/rss/channel/description)'); 319 } else { 320 $description = $this->xpath->evaluate('string(/rdf:RDF/rss:channel/rss:description)'); 321 } 322 323 if (! $description && $this->getExtension('DublinCore') !== null) { 324 $description = $this->getExtension('DublinCore')->getDescription(); 325 } 326 327 if (empty($description)) { 328 $description = $this->getExtension('Atom')->getDescription(); 329 } 330 331 if (! $description) { 332 $description = null; 333 } 334 335 $this->data['description'] = $description; 336 337 return $this->data['description']; 338 } 339 340 /** 341 * Get the feed ID 342 * 343 * @return null|string 344 */ 345 public function getId() 346 { 347 if (array_key_exists('id', $this->data)) { 348 return $this->data['id']; 349 } 350 351 $id = null; 352 353 if ($this->getType() !== Reader\Reader::TYPE_RSS_10 354 && $this->getType() !== Reader\Reader::TYPE_RSS_090 355 ) { 356 $id = $this->xpath->evaluate('string(/rss/channel/guid)'); 357 } 358 359 if (! $id && $this->getExtension('DublinCore') !== null) { 360 $id = $this->getExtension('DublinCore')->getId(); 361 } 362 363 if (empty($id)) { 364 $id = $this->getExtension('Atom')->getId(); 365 } 366 367 if (! $id) { 368 if ($this->getLink()) { 369 $id = $this->getLink(); 370 } elseif ($this->getTitle()) { 371 $id = $this->getTitle(); 372 } else { 373 $id = null; 374 } 375 } 376 377 $this->data['id'] = $id; 378 379 return $this->data['id']; 380 } 381 382 /** 383 * Get the feed image data 384 * 385 * @return null|array 386 */ 387 public function getImage() 388 { 389 if (array_key_exists('image', $this->data)) { 390 return $this->data['image']; 391 } 392 393 if ($this->getType() !== Reader\Reader::TYPE_RSS_10 394 && $this->getType() !== Reader\Reader::TYPE_RSS_090 395 ) { 396 $list = $this->xpath->query('/rss/channel/image'); 397 $prefix = '/rss/channel/image[1]'; 398 } else { 399 $list = $this->xpath->query('/rdf:RDF/rss:channel/rss:image'); 400 $prefix = '/rdf:RDF/rss:channel/rss:image[1]'; 401 } 402 if ($list->length > 0) { 403 $image = []; 404 $value = $this->xpath->evaluate('string(' . $prefix . '/url)'); 405 if ($value) { 406 $image['uri'] = $value; 407 } 408 $value = $this->xpath->evaluate('string(' . $prefix . '/link)'); 409 if ($value) { 410 $image['link'] = $value; 411 } 412 $value = $this->xpath->evaluate('string(' . $prefix . '/title)'); 413 if ($value) { 414 $image['title'] = $value; 415 } 416 $value = $this->xpath->evaluate('string(' . $prefix . '/height)'); 417 if ($value) { 418 $image['height'] = $value; 419 } 420 $value = $this->xpath->evaluate('string(' . $prefix . '/width)'); 421 if ($value) { 422 $image['width'] = $value; 423 } 424 $value = $this->xpath->evaluate('string(' . $prefix . '/description)'); 425 if ($value) { 426 $image['description'] = $value; 427 } 428 } else { 429 $image = null; 430 } 431 432 $this->data['image'] = $image; 433 434 return $this->data['image']; 435 } 436 437 /** 438 * Get the feed language 439 * 440 * @return null|string 441 */ 442 public function getLanguage() 443 { 444 if (array_key_exists('language', $this->data)) { 445 return $this->data['language']; 446 } 447 448 $language = null; 449 450 if ($this->getType() !== Reader\Reader::TYPE_RSS_10 451 && $this->getType() !== Reader\Reader::TYPE_RSS_090 452 ) { 453 $language = $this->xpath->evaluate('string(/rss/channel/language)'); 454 } 455 456 if (! $language && $this->getExtension('DublinCore') !== null) { 457 $language = $this->getExtension('DublinCore')->getLanguage(); 458 } 459 460 if (empty($language)) { 461 $language = $this->getExtension('Atom')->getLanguage(); 462 } 463 464 if (! $language) { 465 $language = $this->xpath->evaluate('string(//@xml:lang[1])'); 466 } 467 468 if (! $language) { 469 $language = null; 470 } 471 472 $this->data['language'] = $language; 473 474 return $this->data['language']; 475 } 476 477 /** 478 * Get a link to the feed 479 * 480 * @return null|string 481 */ 482 public function getLink() 483 { 484 if (array_key_exists('link', $this->data)) { 485 return $this->data['link']; 486 } 487 488 if ($this->getType() !== Reader\Reader::TYPE_RSS_10 489 && $this->getType() !== Reader\Reader::TYPE_RSS_090 490 ) { 491 $link = $this->xpath->evaluate('string(/rss/channel/link)'); 492 } else { 493 $link = $this->xpath->evaluate('string(/rdf:RDF/rss:channel/rss:link)'); 494 } 495 496 if (empty($link)) { 497 $link = $this->getExtension('Atom')->getLink(); 498 } 499 500 if (! $link) { 501 $link = null; 502 } 503 504 $this->data['link'] = $link; 505 506 return $this->data['link']; 507 } 508 509 /** 510 * Get a link to the feed XML 511 * 512 * @return null|string 513 */ 514 public function getFeedLink() 515 { 516 if (array_key_exists('feedlink', $this->data)) { 517 return $this->data['feedlink']; 518 } 519 520 $link = $this->getExtension('Atom')->getFeedLink(); 521 522 if ($link === null || empty($link)) { 523 $link = $this->getOriginalSourceUri(); 524 } 525 526 $this->data['feedlink'] = $link; 527 528 return $this->data['feedlink']; 529 } 530 531 /** 532 * Get the feed generator entry 533 * 534 * @return null|string 535 */ 536 public function getGenerator() 537 { 538 if (array_key_exists('generator', $this->data)) { 539 return $this->data['generator']; 540 } 541 542 $generator = null; 543 544 if ($this->getType() !== Reader\Reader::TYPE_RSS_10 545 && $this->getType() !== Reader\Reader::TYPE_RSS_090 546 ) { 547 $generator = $this->xpath->evaluate('string(/rss/channel/generator)'); 548 } 549 550 if (! $generator) { 551 if ($this->getType() !== Reader\Reader::TYPE_RSS_10 552 && $this->getType() !== Reader\Reader::TYPE_RSS_090 553 ) { 554 $generator = $this->xpath->evaluate('string(/rss/channel/atom:generator)'); 555 } else { 556 $generator = $this->xpath->evaluate('string(/rdf:RDF/rss:channel/atom:generator)'); 557 } 558 } 559 560 if (empty($generator)) { 561 $generator = $this->getExtension('Atom')->getGenerator(); 562 } 563 564 if (! $generator) { 565 $generator = null; 566 } 567 568 $this->data['generator'] = $generator; 569 570 return $this->data['generator']; 571 } 572 573 /** 574 * Get the feed title 575 * 576 * @return null|string 577 */ 578 public function getTitle() 579 { 580 if (array_key_exists('title', $this->data)) { 581 return $this->data['title']; 582 } 583 584 if ($this->getType() !== Reader\Reader::TYPE_RSS_10 585 && $this->getType() !== Reader\Reader::TYPE_RSS_090 586 ) { 587 $title = $this->xpath->evaluate('string(/rss/channel/title)'); 588 } else { 589 $title = $this->xpath->evaluate('string(/rdf:RDF/rss:channel/rss:title)'); 590 } 591 592 if (! $title && $this->getExtension('DublinCore') !== null) { 593 $title = $this->getExtension('DublinCore')->getTitle(); 594 } 595 596 if (! $title) { 597 $title = $this->getExtension('Atom')->getTitle(); 598 } 599 600 if (! $title) { 601 $title = null; 602 } 603 604 $this->data['title'] = $title; 605 606 return $this->data['title']; 607 } 608 609 /** 610 * Get an array of any supported Pusubhubbub endpoints 611 * 612 * @return null|array 613 */ 614 public function getHubs() 615 { 616 if (array_key_exists('hubs', $this->data)) { 617 return $this->data['hubs']; 618 } 619 620 $hubs = $this->getExtension('Atom')->getHubs(); 621 622 if (empty($hubs)) { 623 $hubs = null; 624 } else { 625 $hubs = array_unique($hubs); 626 } 627 628 $this->data['hubs'] = $hubs; 629 630 return $this->data['hubs']; 631 } 632 633 /** 634 * Get all categories 635 * 636 * @return Reader\Collection\Category 637 */ 638 public function getCategories() 639 { 640 if (array_key_exists('categories', $this->data)) { 641 return $this->data['categories']; 642 } 643 644 if ($this->getType() !== Reader\Reader::TYPE_RSS_10 645 && $this->getType() !== Reader\Reader::TYPE_RSS_090 646 ) { 647 $list = $this->xpath->query('/rss/channel//category'); 648 } else { 649 $list = $this->xpath->query('/rdf:RDF/rss:channel//rss:category'); 650 } 651 652 if ($list->length) { 653 $categoryCollection = new Collection\Category(); 654 foreach ($list as $category) { 655 $categoryCollection[] = [ 656 'term' => $category->nodeValue, 657 'scheme' => $category->getAttribute('domain'), 658 'label' => $category->nodeValue, 659 ]; 660 } 661 } else { 662 $categoryCollection = $this->getExtension('DublinCore')->getCategories(); 663 } 664 665 if (count($categoryCollection) === 0) { 666 $categoryCollection = $this->getExtension('Atom')->getCategories(); 667 } 668 669 $this->data['categories'] = $categoryCollection; 670 671 return $this->data['categories']; 672 } 673 674 /** 675 * Read all entries to the internal entries array 676 */ 677 protected function indexEntries() 678 { 679 if ($this->getType() !== Reader\Reader::TYPE_RSS_10 && $this->getType() !== Reader\Reader::TYPE_RSS_090) { 680 $entries = $this->xpath->evaluate('//item'); 681 } else { 682 $entries = $this->xpath->evaluate('//rss:item'); 683 } 684 685 foreach ($entries as $index => $entry) { 686 $this->entries[$index] = $entry; 687 } 688 } 689 690 /** 691 * Register the default namespaces for the current feed format 692 */ 693 protected function registerNamespaces() 694 { 695 switch ($this->data['type']) { 696 case Reader\Reader::TYPE_RSS_10: 697 $this->xpath->registerNamespace('rdf', Reader\Reader::NAMESPACE_RDF); 698 $this->xpath->registerNamespace('rss', Reader\Reader::NAMESPACE_RSS_10); 699 break; 700 701 case Reader\Reader::TYPE_RSS_090: 702 $this->xpath->registerNamespace('rdf', Reader\Reader::NAMESPACE_RDF); 703 $this->xpath->registerNamespace('rss', Reader\Reader::NAMESPACE_RSS_090); 704 break; 705 } 706 } 707} 708