1<?php 2/** 3 * Zend Framework (http://framework.zend.com/) 4 * 5 * @link http://github.com/zendframework/zf2 for the canonical source repository 6 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com) 7 * @license http://framework.zend.com/license/new-bsd New BSD License 8 */ 9 10namespace Zend\Feed\Reader\Feed; 11 12use DateTime; 13use DOMDocument; 14use Zend\Feed\Reader; 15use Zend\Feed\Reader\Collection; 16use Zend\Feed\Reader\Exception; 17 18/** 19*/ 20class Rss extends AbstractFeed 21{ 22 /** 23 * Constructor 24 * 25 * @param DOMDocument $dom 26 * @param string $type 27 */ 28 public function __construct(DOMDocument $dom, $type = null) 29 { 30 parent::__construct($dom, $type); 31 32 $manager = Reader\Reader::getExtensionManager(); 33 34 $feed = $manager->get('DublinCore\Feed'); 35 $feed->setDomDocument($dom); 36 $feed->setType($this->data['type']); 37 $feed->setXpath($this->xpath); 38 $this->extensions['DublinCore\Feed'] = $feed; 39 40 $feed = $manager->get('Atom\Feed'); 41 $feed->setDomDocument($dom); 42 $feed->setType($this->data['type']); 43 $feed->setXpath($this->xpath); 44 $this->extensions['Atom\Feed'] = $feed; 45 46 if ($this->getType() !== Reader\Reader::TYPE_RSS_10 47 && $this->getType() !== Reader\Reader::TYPE_RSS_090 48 ) { 49 $xpathPrefix = '/rss/channel'; 50 } else { 51 $xpathPrefix = '/rdf:RDF/rss:channel'; 52 } 53 foreach ($this->extensions as $extension) { 54 $extension->setXpathPrefix($xpathPrefix); 55 } 56 } 57 58 /** 59 * Get a single author 60 * 61 * @param int $index 62 * @return string|null 63 */ 64 public function getAuthor($index = 0) 65 { 66 $authors = $this->getAuthors(); 67 68 if (isset($authors[$index])) { 69 return $authors[$index]; 70 } 71 72 return; 73 } 74 75 /** 76 * Get an array with feed authors 77 * 78 * @return array 79 */ 80 public function getAuthors() 81 { 82 if (array_key_exists('authors', $this->data)) { 83 return $this->data['authors']; 84 } 85 86 $authors = array(); 87 $authorsDc = $this->getExtension('DublinCore')->getAuthors(); 88 if (!empty($authorsDc)) { 89 foreach ($authorsDc as $author) { 90 $authors[] = array( 91 'name' => $author['name'] 92 ); 93 } 94 } 95 96 /** 97 * Technically RSS doesn't specific author element use at the feed level 98 * but it's supported on a "just in case" basis. 99 */ 100 if ($this->getType() !== Reader\Reader::TYPE_RSS_10 101 && $this->getType() !== Reader\Reader::TYPE_RSS_090) { 102 $list = $this->xpath->query('//author'); 103 } else { 104 $list = $this->xpath->query('//rss:author'); 105 } 106 if ($list->length) { 107 foreach ($list as $author) { 108 $string = trim($author->nodeValue); 109 $data = array(); 110 // Pretty rough parsing - but it's a catchall 111 if (preg_match("/^.*@[^ ]*/", $string, $matches)) { 112 $data['email'] = trim($matches[0]); 113 if (preg_match("/\((.*)\)$/", $string, $matches)) { 114 $data['name'] = $matches[1]; 115 } 116 $authors[] = $data; 117 } 118 } 119 } 120 121 if (count($authors) == 0) { 122 $authors = $this->getExtension('Atom')->getAuthors(); 123 } else { 124 $authors = new Reader\Collection\Author( 125 Reader\Reader::arrayUnique($authors) 126 ); 127 } 128 129 if (count($authors) == 0) { 130 $authors = null; 131 } 132 133 $this->data['authors'] = $authors; 134 135 return $this->data['authors']; 136 } 137 138 /** 139 * Get the copyright entry 140 * 141 * @return string|null 142 */ 143 public function getCopyright() 144 { 145 if (array_key_exists('copyright', $this->data)) { 146 return $this->data['copyright']; 147 } 148 149 $copyright = null; 150 151 if ($this->getType() !== Reader\Reader::TYPE_RSS_10 && 152 $this->getType() !== Reader\Reader::TYPE_RSS_090) { 153 $copyright = $this->xpath->evaluate('string(/rss/channel/copyright)'); 154 } 155 156 if (!$copyright && $this->getExtension('DublinCore') !== null) { 157 $copyright = $this->getExtension('DublinCore')->getCopyright(); 158 } 159 160 if (empty($copyright)) { 161 $copyright = $this->getExtension('Atom')->getCopyright(); 162 } 163 164 if (!$copyright) { 165 $copyright = null; 166 } 167 168 $this->data['copyright'] = $copyright; 169 170 return $this->data['copyright']; 171 } 172 173 /** 174 * Get the feed creation date 175 * 176 * @return string|null 177 */ 178 public function getDateCreated() 179 { 180 return $this->getDateModified(); 181 } 182 183 /** 184 * Get the feed modification date 185 * 186 * @return DateTime 187 * @throws Exception\RuntimeException 188 */ 189 public function getDateModified() 190 { 191 if (array_key_exists('datemodified', $this->data)) { 192 return $this->data['datemodified']; 193 } 194 195 $date = null; 196 197 if ($this->getType() !== Reader\Reader::TYPE_RSS_10 && 198 $this->getType() !== Reader\Reader::TYPE_RSS_090) { 199 $dateModified = $this->xpath->evaluate('string(/rss/channel/pubDate)'); 200 if (!$dateModified) { 201 $dateModified = $this->xpath->evaluate('string(/rss/channel/lastBuildDate)'); 202 } 203 if ($dateModified) { 204 $dateModifiedParsed = strtotime($dateModified); 205 if ($dateModifiedParsed) { 206 $date = new DateTime('@' . $dateModifiedParsed); 207 } else { 208 $dateStandards = array(DateTime::RSS, DateTime::RFC822, 209 DateTime::RFC2822, null); 210 foreach ($dateStandards as $standard) { 211 try { 212 $date = DateTime::createFromFormat($standard, $dateModified); 213 break; 214 } catch (\Exception $e) { 215 if ($standard === null) { 216 throw new Exception\RuntimeException( 217 'Could not load date due to unrecognised' 218 .' format (should follow RFC 822 or 2822):' 219 . $e->getMessage(), 220 0, $e 221 ); 222 } 223 } 224 } 225 } 226 } 227 } 228 229 if (!$date) { 230 $date = $this->getExtension('DublinCore')->getDate(); 231 } 232 233 if (!$date) { 234 $date = $this->getExtension('Atom')->getDateModified(); 235 } 236 237 if (!$date) { 238 $date = null; 239 } 240 241 $this->data['datemodified'] = $date; 242 243 return $this->data['datemodified']; 244 } 245 246 /** 247 * Get the feed lastBuild date 248 * 249 * @throws Exception\RuntimeException 250 * @return DateTime 251 */ 252 public function getLastBuildDate() 253 { 254 if (array_key_exists('lastBuildDate', $this->data)) { 255 return $this->data['lastBuildDate']; 256 } 257 258 $date = null; 259 260 if ($this->getType() !== Reader\Reader::TYPE_RSS_10 && 261 $this->getType() !== Reader\Reader::TYPE_RSS_090) { 262 $lastBuildDate = $this->xpath->evaluate('string(/rss/channel/lastBuildDate)'); 263 if ($lastBuildDate) { 264 $lastBuildDateParsed = strtotime($lastBuildDate); 265 if ($lastBuildDateParsed) { 266 $date = new DateTime('@' . $lastBuildDateParsed); 267 } else { 268 $dateStandards = array(DateTime::RSS, DateTime::RFC822, 269 DateTime::RFC2822, null); 270 foreach ($dateStandards as $standard) { 271 try { 272 $date = DateTime::createFromFormat($standard, $lastBuildDateParsed); 273 break; 274 } catch (\Exception $e) { 275 if ($standard === null) { 276 throw new Exception\RuntimeException( 277 'Could not load date due to unrecognised' 278 .' format (should follow RFC 822 or 2822):' 279 . $e->getMessage(), 280 0, $e 281 ); 282 } 283 } 284 } 285 } 286 } 287 } 288 289 if (!$date) { 290 $date = null; 291 } 292 293 $this->data['lastBuildDate'] = $date; 294 295 return $this->data['lastBuildDate']; 296 } 297 298 /** 299 * Get the feed description 300 * 301 * @return string|null 302 */ 303 public function getDescription() 304 { 305 if (array_key_exists('description', $this->data)) { 306 return $this->data['description']; 307 } 308 309 if ($this->getType() !== Reader\Reader::TYPE_RSS_10 && 310 $this->getType() !== Reader\Reader::TYPE_RSS_090) { 311 $description = $this->xpath->evaluate('string(/rss/channel/description)'); 312 } else { 313 $description = $this->xpath->evaluate('string(/rdf:RDF/rss:channel/rss:description)'); 314 } 315 316 if (!$description && $this->getExtension('DublinCore') !== null) { 317 $description = $this->getExtension('DublinCore')->getDescription(); 318 } 319 320 if (empty($description)) { 321 $description = $this->getExtension('Atom')->getDescription(); 322 } 323 324 if (!$description) { 325 $description = null; 326 } 327 328 $this->data['description'] = $description; 329 330 return $this->data['description']; 331 } 332 333 /** 334 * Get the feed ID 335 * 336 * @return string|null 337 */ 338 public function getId() 339 { 340 if (array_key_exists('id', $this->data)) { 341 return $this->data['id']; 342 } 343 344 $id = null; 345 346 if ($this->getType() !== Reader\Reader::TYPE_RSS_10 && 347 $this->getType() !== Reader\Reader::TYPE_RSS_090) { 348 $id = $this->xpath->evaluate('string(/rss/channel/guid)'); 349 } 350 351 if (!$id && $this->getExtension('DublinCore') !== null) { 352 $id = $this->getExtension('DublinCore')->getId(); 353 } 354 355 if (empty($id)) { 356 $id = $this->getExtension('Atom')->getId(); 357 } 358 359 if (!$id) { 360 if ($this->getLink()) { 361 $id = $this->getLink(); 362 } elseif ($this->getTitle()) { 363 $id = $this->getTitle(); 364 } else { 365 $id = null; 366 } 367 } 368 369 $this->data['id'] = $id; 370 371 return $this->data['id']; 372 } 373 374 /** 375 * Get the feed image data 376 * 377 * @return array|null 378 */ 379 public function getImage() 380 { 381 if (array_key_exists('image', $this->data)) { 382 return $this->data['image']; 383 } 384 385 if ($this->getType() !== Reader\Reader::TYPE_RSS_10 && 386 $this->getType() !== Reader\Reader::TYPE_RSS_090) { 387 $list = $this->xpath->query('/rss/channel/image'); 388 $prefix = '/rss/channel/image[1]'; 389 } else { 390 $list = $this->xpath->query('/rdf:RDF/rss:channel/rss:image'); 391 $prefix = '/rdf:RDF/rss:channel/rss:image[1]'; 392 } 393 if ($list->length > 0) { 394 $image = array(); 395 $value = $this->xpath->evaluate('string(' . $prefix . '/url)'); 396 if ($value) { 397 $image['uri'] = $value; 398 } 399 $value = $this->xpath->evaluate('string(' . $prefix . '/link)'); 400 if ($value) { 401 $image['link'] = $value; 402 } 403 $value = $this->xpath->evaluate('string(' . $prefix . '/title)'); 404 if ($value) { 405 $image['title'] = $value; 406 } 407 $value = $this->xpath->evaluate('string(' . $prefix . '/height)'); 408 if ($value) { 409 $image['height'] = $value; 410 } 411 $value = $this->xpath->evaluate('string(' . $prefix . '/width)'); 412 if ($value) { 413 $image['width'] = $value; 414 } 415 $value = $this->xpath->evaluate('string(' . $prefix . '/description)'); 416 if ($value) { 417 $image['description'] = $value; 418 } 419 } else { 420 $image = null; 421 } 422 423 $this->data['image'] = $image; 424 425 return $this->data['image']; 426 } 427 428 /** 429 * Get the feed language 430 * 431 * @return string|null 432 */ 433 public function getLanguage() 434 { 435 if (array_key_exists('language', $this->data)) { 436 return $this->data['language']; 437 } 438 439 $language = null; 440 441 if ($this->getType() !== Reader\Reader::TYPE_RSS_10 && 442 $this->getType() !== Reader\Reader::TYPE_RSS_090) { 443 $language = $this->xpath->evaluate('string(/rss/channel/language)'); 444 } 445 446 if (!$language && $this->getExtension('DublinCore') !== null) { 447 $language = $this->getExtension('DublinCore')->getLanguage(); 448 } 449 450 if (empty($language)) { 451 $language = $this->getExtension('Atom')->getLanguage(); 452 } 453 454 if (!$language) { 455 $language = $this->xpath->evaluate('string(//@xml:lang[1])'); 456 } 457 458 if (!$language) { 459 $language = null; 460 } 461 462 $this->data['language'] = $language; 463 464 return $this->data['language']; 465 } 466 467 /** 468 * Get a link to the feed 469 * 470 * @return string|null 471 */ 472 public function getLink() 473 { 474 if (array_key_exists('link', $this->data)) { 475 return $this->data['link']; 476 } 477 478 if ($this->getType() !== Reader\Reader::TYPE_RSS_10 && 479 $this->getType() !== Reader\Reader::TYPE_RSS_090) { 480 $link = $this->xpath->evaluate('string(/rss/channel/link)'); 481 } else { 482 $link = $this->xpath->evaluate('string(/rdf:RDF/rss:channel/rss:link)'); 483 } 484 485 if (empty($link)) { 486 $link = $this->getExtension('Atom')->getLink(); 487 } 488 489 if (!$link) { 490 $link = null; 491 } 492 493 $this->data['link'] = $link; 494 495 return $this->data['link']; 496 } 497 498 /** 499 * Get a link to the feed XML 500 * 501 * @return string|null 502 */ 503 public function getFeedLink() 504 { 505 if (array_key_exists('feedlink', $this->data)) { 506 return $this->data['feedlink']; 507 } 508 509 $link = $this->getExtension('Atom')->getFeedLink(); 510 511 if ($link === null || empty($link)) { 512 $link = $this->getOriginalSourceUri(); 513 } 514 515 $this->data['feedlink'] = $link; 516 517 return $this->data['feedlink']; 518 } 519 520 /** 521 * Get the feed generator entry 522 * 523 * @return string|null 524 */ 525 public function getGenerator() 526 { 527 if (array_key_exists('generator', $this->data)) { 528 return $this->data['generator']; 529 } 530 531 $generator = null; 532 533 if ($this->getType() !== Reader\Reader::TYPE_RSS_10 && 534 $this->getType() !== Reader\Reader::TYPE_RSS_090) { 535 $generator = $this->xpath->evaluate('string(/rss/channel/generator)'); 536 } 537 538 if (!$generator) { 539 if ($this->getType() !== Reader\Reader::TYPE_RSS_10 && 540 $this->getType() !== Reader\Reader::TYPE_RSS_090) { 541 $generator = $this->xpath->evaluate('string(/rss/channel/atom:generator)'); 542 } else { 543 $generator = $this->xpath->evaluate('string(/rdf:RDF/rss:channel/atom:generator)'); 544 } 545 } 546 547 if (empty($generator)) { 548 $generator = $this->getExtension('Atom')->getGenerator(); 549 } 550 551 if (!$generator) { 552 $generator = null; 553 } 554 555 $this->data['generator'] = $generator; 556 557 return $this->data['generator']; 558 } 559 560 /** 561 * Get the feed title 562 * 563 * @return string|null 564 */ 565 public function getTitle() 566 { 567 if (array_key_exists('title', $this->data)) { 568 return $this->data['title']; 569 } 570 571 if ($this->getType() !== Reader\Reader::TYPE_RSS_10 && 572 $this->getType() !== Reader\Reader::TYPE_RSS_090) { 573 $title = $this->xpath->evaluate('string(/rss/channel/title)'); 574 } else { 575 $title = $this->xpath->evaluate('string(/rdf:RDF/rss:channel/rss:title)'); 576 } 577 578 if (!$title && $this->getExtension('DublinCore') !== null) { 579 $title = $this->getExtension('DublinCore')->getTitle(); 580 } 581 582 if (!$title) { 583 $title = $this->getExtension('Atom')->getTitle(); 584 } 585 586 if (!$title) { 587 $title = null; 588 } 589 590 $this->data['title'] = $title; 591 592 return $this->data['title']; 593 } 594 595 /** 596 * Get an array of any supported Pusubhubbub endpoints 597 * 598 * @return array|null 599 */ 600 public function getHubs() 601 { 602 if (array_key_exists('hubs', $this->data)) { 603 return $this->data['hubs']; 604 } 605 606 $hubs = $this->getExtension('Atom')->getHubs(); 607 608 if (empty($hubs)) { 609 $hubs = null; 610 } else { 611 $hubs = array_unique($hubs); 612 } 613 614 $this->data['hubs'] = $hubs; 615 616 return $this->data['hubs']; 617 } 618 619 /** 620 * Get all categories 621 * 622 * @return Reader\Collection\Category 623 */ 624 public function getCategories() 625 { 626 if (array_key_exists('categories', $this->data)) { 627 return $this->data['categories']; 628 } 629 630 if ($this->getType() !== Reader\Reader::TYPE_RSS_10 && 631 $this->getType() !== Reader\Reader::TYPE_RSS_090) { 632 $list = $this->xpath->query('/rss/channel//category'); 633 } else { 634 $list = $this->xpath->query('/rdf:RDF/rss:channel//rss:category'); 635 } 636 637 if ($list->length) { 638 $categoryCollection = new Collection\Category; 639 foreach ($list as $category) { 640 $categoryCollection[] = array( 641 'term' => $category->nodeValue, 642 'scheme' => $category->getAttribute('domain'), 643 'label' => $category->nodeValue, 644 ); 645 } 646 } else { 647 $categoryCollection = $this->getExtension('DublinCore')->getCategories(); 648 } 649 650 if (count($categoryCollection) == 0) { 651 $categoryCollection = $this->getExtension('Atom')->getCategories(); 652 } 653 654 $this->data['categories'] = $categoryCollection; 655 656 return $this->data['categories']; 657 } 658 659 /** 660 * Read all entries to the internal entries array 661 * 662 */ 663 protected function indexEntries() 664 { 665 if ($this->getType() !== Reader\Reader::TYPE_RSS_10 && $this->getType() !== Reader\Reader::TYPE_RSS_090) { 666 $entries = $this->xpath->evaluate('//item'); 667 } else { 668 $entries = $this->xpath->evaluate('//rss:item'); 669 } 670 671 foreach ($entries as $index => $entry) { 672 $this->entries[$index] = $entry; 673 } 674 } 675 676 /** 677 * Register the default namespaces for the current feed format 678 * 679 */ 680 protected function registerNamespaces() 681 { 682 switch ($this->data['type']) { 683 case Reader\Reader::TYPE_RSS_10: 684 $this->xpath->registerNamespace('rdf', Reader\Reader::NAMESPACE_RDF); 685 $this->xpath->registerNamespace('rss', Reader\Reader::NAMESPACE_RSS_10); 686 break; 687 688 case Reader\Reader::TYPE_RSS_090: 689 $this->xpath->registerNamespace('rdf', Reader\Reader::NAMESPACE_RDF); 690 $this->xpath->registerNamespace('rss', Reader\Reader::NAMESPACE_RSS_090); 691 break; 692 } 693 } 694} 695