1<?php 2 3############################################################################### 4# Gregarius - A PHP based RSS aggregator. 5# Copyright (C) 2003 - 2006 Marco Bonetti 6# 7############################################################################### 8# This program is free software and open source software; you can redistribute 9# it and/or modify it under the terms of the GNU General Public License as 10# published by the Free Software Foundation; either version 2 of the License, 11# or (at your option) any later version. 12# 13# This program is distributed in the hope that it will be useful, but WITHOUT 14# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 15# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 16# more details. 17# 18# You should have received a copy of the GNU General Public License along 19# with this program; if not, write to the Free Software Foundation, Inc., 20# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA or visit 21# http://www.gnu.org/licenses/gpl.html 22# 23############################################################################### 24# E-mail: mbonetti at gmail dot com 25# Web page: http://gregarius.net/ 26# 27############################################################################### 28 29 30 31function getLastModif() { 32 return getProperty('__meta__','meta.lastupdate'); 33} 34 35function getETag() { 36 return md5(getLastModif().$_SERVER['PHP_SELF']); 37} 38 39 40function rss_error($message, $severity = RSS_ERROR_ERROR, $render = false) { 41 if ($render) { 42 echo "<p class=\"error\">$message</p>\n"; 43 return; 44 } 45 46 if (!isset($GLOBALS['rss'])) { 47 rss_require('cls/rss.php'); 48 } 49 50 $GLOBALS['rss'] -> error($message, $severity); 51} 52 53/** this functions checks whether a URI exists */ 54function getHttpResponseCode($forUri) { 55 return getUrl($forUri, 255); 56} 57 58function getContentType($link, & $contentType) { 59 $url_parts = @ parse_url($link); 60 if (empty ($url_parts["host"])) { 61 return (false); 62 } 63 if (!empty ($url_parts["path"])) { 64 $documentpath = $url_parts["path"]; 65 } else { 66 $documentpath = "/"; 67 } 68 if (!empty ($url_parts["query"])) { 69 $documentpath .= "?".$url_parts["query"]; 70 } 71 $host = $url_parts["host"]; 72 $port = (array_key_exists('port', $url_parts) ? $url_parts["port"] : "80"); 73 74 $socket = @ fsockopen($host, $port, $errno, $errstr, 30); 75 if (!$socket) { 76 return (false); 77 } 78 79 $ret = false; 80 fwrite($socket, "GET ".$documentpath." HTTP/1.0\r\nHost: $host\r\n\r\n"); 81 while (!feof($socket)) { 82 $line = fgets($socket, 100); 83 if (preg_match("/Content-Type: (.*)/i", $line, $matches)) { 84 $contentType = $matches[1]; 85 $ret = true; 86 break; 87 } 88 } 89 90 return $ret; 91} 92 93// basically strips folder resources from URIs. 94// http://pear.php.net/package/HTTP_Client/ --> http://pear.php.net/ 95function get_host($url, & $host) { 96 $ret = preg_match("/^(http:\/\/)?([^\/]+)/i", $url, $matches); 97 $host = $matches[2]; 98 99 //ensure we have a slash 100 if (substr($host, -1) != "/") { 101 $host .= "/"; 102 } 103 104 return $ret; 105} 106 107/** 108 * Builds a title out of an already encoded string. 109 */ 110function makeTitle($title) { 111 // Let us find out if the user has set a title. 112 $userTitle = _TITLE_; 113 if (getConfig('rss.output.title')) { 114 $userTitle = getConfig('rss.output.title'); 115 } 116 $ret = "". $userTitle .""; 117 if ($title) { 118 if (is_array($title)) { 119 foreach($title as $token) { 120 $ret .= " ".TITLE_SEP." ".$token; 121 } 122 } else { 123 $ret .= " ".TITLE_SEP." ".$title; 124 } 125 } 126 return $ret; 127} 128 129/*** update the given feed(s) **/ 130function update($id) { 131 $kses_allowed = getConfig('rss.input.allowed'); //getAllowedTags(); 132 $updatedIds = array (); 133 134 135 $sql = "select id, url, title, mode from ".getTable("channels"); 136 if ($id != "" && is_numeric($id)) { 137 $sql .= " where id=$id"; 138 $sql .= " and not(mode & ".RSS_MODE_DELETED_STATE.") "; 139 } else { 140 $sql .= " where not(mode & ".RSS_MODE_DELETED_STATE.") "; 141 } 142 143 if (getConfig('rss.config.absoluteordering')) { 144 $sql .= " order by parent, position"; 145 } else { 146 $sql .= " order by parent, title"; 147 } 148 149 $res = rss_query($sql); 150 while (list ($cid, $url, $title, $mode) = rss_fetch_row($res)) { 151 152 // suppress warnings because Magpie is rather noisy 153 $old_level = error_reporting(E_ERROR); 154 $rss = fetch_rss($url); 155 156 //reset 157 error_reporting($old_level); 158 159 if (!$rss && $id != "" && is_numeric($id)) { 160 return array (magpie_error(), array ()); 161 } 162 elseif (!$rss || !($rss->rss_origin & MAGPIE_FEED_ORIGIN_HTTP_200) ) { 163 continue; // no need to do anything if we do not get a 200 OK from the feed 164 } 165 166 // base URL for items in this feed. 167 if (array_key_exists('link', $rss->channel)) { 168 $baseUrl = $rss->channel['link']; 169 } else { 170 $baseUrl = $url; // The feed is invalid 171 } 172 173 // Keep track of guids we've handled, because some feeds (hello, 174 // Technorati!) have this insane habit of serving the same item 175 // twice in the same feed. 176 $guids = array(); 177 178 // Allow updates in this feed? 179 $allowUpdates = getProperty($cid,'rss.input.allowupdates'); 180 if ($allowUpdates === null) { 181 $allowUpdates = getConfig('rss.input.allowupdates'); 182 } 183 184 $itemIdsInFeed = array(); // This variable will store the item id's of the elements in the feed 185 foreach ($rss->items as $item) { 186 187 $item = rss_plugin_hook('rss.plugins.rssitem', $item); 188 // a plugin might delete this item 189 if(!isset($item)) 190 continue; 191 192 // item title: strip out html tags 193 $title = array_key_exists('title', $item) ? strip_tags($item['title']) : ""; 194 //$title = str_replace('& ', '& ', $title); 195 196 197 $description = ""; 198 // item content, if any 199 if (array_key_exists('content', $item) && is_array($item['content']) && array_key_exists('encoded', $item['content'])) { 200 $description = $item['content']['encoded']; 201 } 202 elseif (array_key_exists('description', $item)) { 203 $description = $item['description']; 204 } 205 elseif (array_key_exists('atom_content', $item)) { 206 $description = $item['atom_content']; 207 } 208 elseif (array_key_exists('summary', $item)) { 209 $description = $item['summary']; 210 } 211 else { 212 $description = ""; 213 } 214 215 $md5sum = ""; 216 $guid = ""; 217 218 if(array_key_exists('guid', $item) && $item['guid'] != "") { 219 $guid = $item['guid']; 220 } 221 elseif(array_key_exists('id', $item) && $item['id'] != "") { 222 $guid = $item['id']; 223 } 224 $guid = trim($guid); 225 $guid = rss_real_escape_string($guid); 226 227 // skip this one if it's an in-feed-dupe 228 if ($guid && isset($guids[$guid])) { 229 continue; 230 } 231 elseif($guid) { 232 $guids[$guid] = true; 233 } 234 235 if ($description != "") { 236 $md5sum = md5($description); 237 $description = kses($description, $kses_allowed); // strip out tags 238 239 if ($baseUrl != "") { 240 $description = relative_to_absolute($description, $baseUrl); 241 } 242 } 243 244 // Now let plugins modify the description 245 $description = rss_plugin_hook('rss.plugins.import.description', $description); 246 247 248 // link 249 if (array_key_exists('link', $item) && $item['link'] != "") { 250 $url = $item['link']; 251 } 252 elseif (array_key_exists('guid', $item) && $item['guid'] != "") { 253 $url = $item['guid']; 254 } 255 elseif (array_key_exists('link_', $item) && $item['link_'] != "") { 256 $url = $item['link_']; 257 } 258 else { 259 // fall back to something basic 260 $url = md5($title); 261 } 262 263 // make sure the url is properly escaped 264 $url = htmlentities($url, ENT_QUOTES ); 265 266 $url = rss_real_escape_string($url); 267 268 // author 269 if (array_key_exists('dc', $item) && array_key_exists('creator', $item['dc'])) { 270 // RSS 1.0 271 $author = $item['dc']['creator']; 272 } else if (array_key_exists('author_name', $item)) { 273 // Atom 0.3 274 $author = $item['author_name']; 275 } else { 276 $author = ""; 277 } 278 279 $author = trim(strip_tags($author)); 280 281 // pubdate 282 $cDate = -1; 283 if (array_key_exists('dc', $item) && array_key_exists('date', $item['dc'])) { 284 // RSS 1.0 285 $cDate = parse_w3cdtf($item['dc']['date']); 286 } 287 elseif (array_key_exists('pubdate', $item)) { 288 // RSS 2.0 (?) 289 // We use the second param of strtotime here as a workaround 290 // of a PHP bug with strtotime. If the pubdate field doesn't 291 // contain seconds, the strtotime function will use the current 292 // time to fill in seconds in PHP4. This interferes with the 293 // update mechanism of gregarius. See ticket #328 for the full 294 // gory details. Giving a known date as a second param to 295 // strtotime fixes this problem, hence the 0 here. 296 $cDate = strtotime($item['pubdate'], 0); 297 } 298 elseif (array_key_exists('published',$item)) { 299 // atom 1.0 300 $cDate = parse_iso8601($item['published']); 301 } 302 elseif (array_key_exists('issued', $item)) { 303 //Atom, alternative 304 $cDate = parse_iso8601($item['issued']); 305 } 306 elseif (array_key_exists('updated', $item)) { 307 //Atom, alternative 308 $cDate = parse_iso8601($item['updated']); 309 } 310 elseif (array_key_exists('created', $item)) { 311 // atom 0.3 312 $cDate = parse_iso8601($item['created']); 313 } 314 315 // enclosure 316 if (array_key_exists('enclosure@url', $item) ) { 317 $enclosure = $item['enclosure@url']; 318 // If the enclosure is an image, append it to the content 319 // but only if it isn't there yet 320 if ($enclosure && 321 array_key_exists('enclosure@type', $item) && 322 preg_match('#image/(png|gif|jpe?g)#', $item['enclosure@type']) && 323 (FALSE == strpos($description,$enclosure))) { 324 $description = '<img src="'.$enclosure.'" alt="" />' . $description; 325 $enclosure = ''; 326 } 327 } else { 328 $enclosure = ""; 329 } 330 331 // drop items with an url exceeding our column length: we couldn't provide a 332 // valid link back anyway. 333 if (strlen($url) >= 255) { 334 continue; 335 } 336 337 $dbtitle = rss_real_escape_string($title); 338 if (strlen($dbtitle) >= 255) { 339 $dbtitle=substr($dbtitle,0,254); 340 } 341 342 if ($cDate > 0) { 343 $sec = "FROM_UNIXTIME($cDate)"; 344 } else { 345 $sec = "null"; 346 } 347 348 // check whether we already have this item 349 if ($guid) { 350 $sql = "select id,unread, md5sum, guid, pubdate from ".getTable("item") 351 ." where cid=$cid and guid='$guid'"; 352 } else { 353 $sql = "select id,unread, md5sum, guid, pubdate from ".getTable("item") 354 ." where cid=$cid and url='$url' and title='$dbtitle'" 355 ." and (pubdate is NULL OR pubdate=$sec)"; 356 } 357 358 $subres = rss_query($sql); 359 list ($indb, $state, $dbmd5sum, $dbGuid, $dbPubDate) = rss_fetch_row($subres); 360 361 if ($indb) { 362 $itemIdsInFeed[] = $indb; 363 if (!($state & RSS_MODE_DELETED_STATE) && $md5sum != $dbmd5sum) { 364 // the md5sums do not match. 365 if($allowUpdates) { // Are we allowed update items in the db? 366 list ($cid, $indb, $description) = 367 rss_plugin_hook('rss.plugins.items.updated', array ($cid, $indb, $description)); 368 369 $sql = "update ".getTable("item") 370 ." set "." description='".rss_real_escape_string($description)."', " 371 ." unread = unread | ".RSS_MODE_UNREAD_STATE 372 .", md5sum='$md5sum'" . " where cid=$cid and id=$indb"; 373 374 rss_query($sql); 375 $updatedIds[] = $indb; 376 continue; 377 } 378 } 379 } else { // $indb = "" . This must be new item then. In you go. 380 381 list ($cid, $dbtitle, $url, $description) = 382 rss_plugin_hook('rss.plugins.items.new', array ($cid, $dbtitle, $url, $description)); 383 384 $sql = "insert into ".getTable("item") 385 ." (cid, added, title, url, enclosure," 386 ." description, author, unread, pubdate, md5sum, guid) " 387 ." values ("."$cid, now(), '$dbtitle', " 388 ." '$url', '".rss_real_escape_string($enclosure)."', '" 389 .rss_real_escape_string($description)."', '" 390 .rss_real_escape_string($author)."', " 391 ."$mode, $sec, '$md5sum', '$guid')"; 392 393 rss_query($sql); 394 395 $newIid = rss_insert_id(); 396 $itemIdsInFeed[] = $newIid; 397 $updatedIds[] = $newIid; 398 rss_plugin_hook('rss.plugins.items.newiid',array($newIid,$item,$cid)); 399 } // end handling of this item 400 401 } // end handling of all the items in this feed 402 $sql = "update " .getTable("channels") . " set "." itemsincache = '" 403 . serialize($itemIdsInFeed) . "' where id=$cid"; 404 rss_query($sql); 405 406 407 } // end handling all the feeds we were asked to handle 408 409 if ($id != "" && is_numeric($id)) { 410 if ($rss) { 411 // when everything went well, return the error code 412 // and numer of new items 413 return array ($rss->rss_origin, $updatedIds); 414 } else { 415 return array (-1, array ()); 416 } 417 } else { 418 return array (-1, $updatedIds); 419 } 420} 421 422function getRootFolder() { 423 $sql = "select id from ".getTable("folders")."where name = '' order by position asc limit 1"; 424 list($root) = rss_fetch_row(rss_query($sql)); 425 426 if (!$root) { 427 $root = 0; 428 } 429 430 return $root; 431} 432 433function add_channel($url, $folderid = 0, $title_=null,$descr_=null,$tags = null) { 434 if (!$url || strlen($url) <= 7) { 435 return array (-2, "Invalid URL $url"); 436 } 437 if (!is_numeric($folderid)) { 438 return array (-2, "Invalid folderid $folderid"); 439 } 440 441 $url = sanitize(str_replace('&','&',$url), RSS_SANITIZER_URL); 442 443 $urlDB = rss_real_escape_string($url); //htmlentities($url); 444 445 $res = rss_query("select count(*) as channel_exists from ".getTable("channels")." where url='$urlDB'"); 446 list ($channel_exists) = rss_fetch_row($res); 447 if ($channel_exists > 0) { 448 // fatal 449 return array (-2, "Looks like you are already subscribed to this channel"); 450 } 451 452 $res = rss_query("select 1+max(position) as np from ".getTable("channels")); 453 list ($np) = rss_fetch_row($res); 454 455 if (!$np) { 456 $np = "0"; 457 } 458 459 // Here we go! 460 //error_reporting(E_ALL); 461 $old_level = error_reporting(E_ERROR); 462 $rss = fetch_rss($url); 463 error_reporting($old_level); 464 465 if ($rss) { 466 if ($title_) { 467 $title = rss_real_escape_string($title_); 468 } 469 elseif (is_object($rss) && array_key_exists('title#', $rss->channel)) { 470 if (array_key_exists('title', $rss->channel)) { 471 $title = rss_real_escape_string($rss->channel['title']); 472 } else { 473 $title = " "; 474 } 475 } 476 else { 477 $title = ""; 478 } 479 480 if (is_object($rss) && array_key_exists('link', $rss->channel)) { 481 $siteurl = rss_real_escape_string(htmlentities($rss->channel['link'])); 482 } else { 483 $siteurl = ""; 484 } 485 486 $refreshinterval = 0; 487 if(is_object($rss) && array_key_exists('syn', $rss->channel)) { 488 $syn = $rss->channel['syn']; 489 490 if(array_key_exists('updateperiod', $syn)) { 491 if("hourly" == $syn['updateperiod']) { 492 if(array_key_exists('updatefrequency', $syn)) { 493 $refreshinterval = 60 * $syn['updatefrequency']; 494 } 495 } 496 } 497 } 498 499 if ($descr_) { 500 $descr = rss_real_escape_string($descr_); 501 } 502 elseif (is_object($rss) && array_key_exists('description', $rss->channel)) { 503 $descr = rss_real_escape_string($rss->channel['description']); 504 } 505 else { 506 $descr = ""; 507 } 508 509 //lets see if this server has a favicon 510 $icon = ""; 511 if (getConfig('rss.output.showfavicons')) { 512 // if we got nothing so far, lets try to fall back to 513 // favicons 514 if ($icon == "" && $siteurl != "") { 515 $match = get_host($siteurl, $host); 516 $uri = "http://".$host."favicon.ico"; 517 if ($match && getContentType($uri, $contentType)) { 518 if (preg_match("/image\/x-icon/", $contentType)) { 519 $icon = $uri; 520 } 521 } 522 } 523 } 524 525 $private = preg_match('|(https?://)([^:]+:[^@]+@)(.+)$|',$url); 526 527 if ($title != "") { 528 $title = strip_tags($title); 529 $descr = strip_tags($descr); 530 531 // add channel to root folder by default 532 if(!$folderid) { 533 $folderid = getRootFolder(); 534 } 535 536 list($title,$urlDB,$siteurl,$folderid,$descr,$icon) = 537 rss_plugin_hook('rss.plugins.feed.new', 538 array ($title,$urlDB,$siteurl,$folderid,$descr,$icon)); 539 540 $mode = RSS_MODE_UNREAD_STATE; 541 if ($private) { 542 $mode |= RSS_MODE_PRIVATE_STATE; 543 } 544 545 $sql = "insert into ".getTable("channels") 546 ." (title, url, siteurl, parent, descr, dateadded, icon, position, mode, daterefreshed)" 547 ." values ('$title', '$urlDB', '$siteurl', $folderid, '$descr', now(), '$icon', $np, $mode, '0000-00-00 00:00:00')"; 548 549 rss_query($sql); 550 $newid = rss_insert_id(); 551 552 if ($icon && cacheFavicon($icon)) { 553 rss_query("update " . getTable("channels") . " set icon='blob:".$icon."'" 554 ." where id=$newid"); 555 } 556 557 if($tags != "") { 558 __exp__submitTag($newid,$tags,"'channel'"); 559 } 560 561 if(false == empty($refreshinterval)) { 562 setProperty($newid, 'rss.config.refreshinterval', $refreshinterval); 563 } 564 565 return array ($newid, ""); 566 567 } else { 568 // non-fatal, will look further 569 return array (-1, "I'm sorry, I couldn't extract a valid RSS feed from <a href=\"$url\">$url</a>."); 570 } 571 } else { 572 global $MAGPIE_ERROR; 573 $retError = "I'm sorry, I couldn't retrieve <a href=\"$url\">$url</a>."; 574 if ($MAGPIE_ERROR) { 575 $retError .= "\n<br />$MAGPIE_ERROR\n"; 576 } 577 // non-fatal, will look further 578 return array (-1, $retError); 579 } 580} 581 582/** 583 * Replaces relative urls with absolute ones for anchors and images 584 * Credits: Julien Mudry 585 */ 586function relative_to_absolute($content, $feed_url) { 587 preg_match('/(http|https|ftp):\/\//', $feed_url, $protocol); 588 $server_url = preg_replace("/(http|https|ftp|news):\/\//", "", $feed_url); 589 $server_url = preg_replace("/\/.*/", "", $server_url); 590 591 if ($server_url == '') { 592 return $content; 593 } 594 595 if (isset($protocol[0])) { 596 $new_content = preg_replace('/href="\//', 'href="'.$protocol[0].$server_url.'/', $content); 597 $new_content = preg_replace('/src="\//', 'src="'.$protocol[0].$server_url.'/', $new_content); 598 } else { 599 $new_content = $content; 600 } 601 return $new_content; 602} 603 604/** 605 * parse an ISO 8601 date, losely based on parse_w3cdtf from MagpieRSS 606 */ 607function parse_iso8601($date_str) { 608# regex to match wc3dtf 609 $pat = "/(\d{4})-?(\d{2})-?(\d{2})T?(\d{2}):?(\d{2})(:?(\d{2}\.?\d*))?(?:([-+])(\d{2}):?(\d{2})|(Z))?/"; 610 611 if (preg_match($pat, $date_str, $match)) { 612 list ($year, $month, $day, $hours, $minutes, $seconds) 613 = array (@$match[1], @$match[2], @$match[3], @$match[4], @$match[5], @$match[6]); 614 615 // calc epoch for current date assuming GMT 616 617 $epoch = gmmktime($hours, $minutes, intval($seconds), $month, $day, $year); 618 619 $offset = 0; 620 if (@$match[10] == 'Z') { 621 // zulu time, aka GMT 622 623 } else { 624 list ($tz_mod, $tz_hour, $tz_min) = array (@$match[8], @$match[9], @$match[10]); 625 626 // zero out the variables 627 628 if (!$tz_hour) { 629 $tz_hour = 0; 630 } 631 if (!$tz_min) { 632 $tz_min = 0; 633 } 634 635 $offset_secs = (($tz_hour * 60) + $tz_min) * 60; 636 637 // is timezone ahead of GMT? then subtract offset 638 639 640 if ($tz_mod == '+') { 641 $offset_secs = $offset_secs * -1; 642 } 643 644 $offset = $offset_secs; 645 } 646 $epoch = $epoch + $offset; 647 return $epoch; 648 } else { 649 return -1; 650 } 651} 652 653/** 654 * Returns the relative path of the install dir, e.g: 655 * http://host.com/thing/ -> "/thing/" 656 * http://host.com/ -> "/" 657 */ 658function getPath($path='') { 659 static $ret; 660 if ($ret === NULL) { 661 $ret = dirname($_SERVER['PHP_SELF']); 662 if (defined('RSS_FILE_LOCATION') && eregi(RSS_FILE_LOCATION."\$", $ret)) { 663 $ret = substr($ret, 0, strlen($ret) - strlen(RSS_FILE_LOCATION)); 664 } 665 if (substr($ret, -1) == "\\") { // Take off trailing backslash 666 $ret = substr($ret, 0, -1); 667 } 668 if (substr($ret, -1) != "/") { // Add a frontslash 669 $ret .= "/"; 670 } 671 } 672 return $ret . $path; 673 674} 675$dummy = getPath(); 676 677/** 678 * builds an url for an archive link 679 */ 680function makeArchiveUrl($ts, $channel, $cid, $dayView) { 681 $ret = getPath(); 682 if (getConfig('rss.output.usemodrewrite')) { 683 if ($channel) { 684 $ret .= "$channel/"; 685 } 686 $ret .= rss_date(($dayView ? 'Y/m/d/' : 'Y/m/'), $ts, false); 687 } else { 688 $ret .= "feed.php?"; 689 if ($cid) { 690 $ret .= "channel=$cid&"; 691 } 692 $ret .= "y=".rss_date('Y', $ts, false) 693 ."&m=".rss_date('m', $ts, false) 694 . ($dayView ? ("&d=".rss_date('d', $ts, false)) : ""); 695 } 696 return $ret; 697} 698 699/** 700 * Fetches a remote URL and returns the content 701 */ 702function getUrl($url, $maxlen = 0) { 703 //Bug: in windows, scheme returned by parse_url contains the drive letter 704 //of the file so a test like !isset(scheme) does not work 705 //maybe it would be better to only use is_file() which only detect 706 //local files? 707 $urlParts = parse_url($url); 708 if (@is_file($url) || (!isset($urlParts['scheme']) && !isset($urlParts['host'])) ) { 709 //local file! 710 $c = ""; 711 $h = @fopen($url, "r"); 712 if ($h) { 713 while (!feof($h)) { 714 $c .= @fread($h, 8192); 715 } 716 } 717 @fclose($h); 718 return $c; 719 } 720 721 rss_require('extlib/Snoopy.class.inc'); 722 $client = new Snoopy(); 723 $client->agent = MAGPIE_USER_AGENT; 724 $client->use_gzip = getConfig('rss.output.compression'); 725 726 if ($maxlen) { 727 $client->maxlength = $maxlen; 728 } 729 @ $client->fetch($url); 730 return $client->results; 731} 732 733/** 734 * Feed Autodiscovery 735 * 736 * returns an array of all (hopefully) rss/atom/rdf feeds in the document, 737 * pointed by $url. 738 * See http://diveintomark.org/archives/2002/06/02/important_change_to_the_link_tag 739 * 740 * @param string $url URL of a web document containing <link> elements 741 * @return array Array of feed URLs 742 */ 743function extractFeeds($url) { 744 rss_require('extlib/uri_util.php'); 745 $cnt = getUrl($url); 746 $ret = array (); 747 //find all link tags 748 if (preg_match_all('|<link\s+\w*=["\'][^"\']+["\']+[^>]*>|Uis', $cnt, $res)) { 749 while (list ($id, $match) = each($res[0])) { 750 // we only want '<link alternate=...' 751 if (strpos(strtolower($match), 'alternate') && 752 !strpos(strtolower($match), 'stylesheet') && // extract the attributes 753 preg_match_all('|([a-zA-Z]*)=["\']([^"\']*)|', $match, $res2, PREG_SET_ORDER)) { 754 $tmp = array (); 755 //populate the return array: attr_name => attr_value 756 while (list ($id2, $match2) = each($res2)) { 757 $attr = strtolower(trim($match2[1])); 758 $val = trim($match2[2]); 759 // make sure we have absolute URI's 760 if ($attr == "href") { 761 $val = absolute_uri($val, $url); 762 } 763 $tmp[$attr] = $val; 764 } 765 $ret[] = $tmp; 766 } 767 } 768 } 769 return $ret; 770} 771 772function real_strip_slashes($string) { 773 if (stripslashes($string) == $string) { 774 return $string; 775 } 776 return real_strip_slashes(stripslashes($string)); 777} 778 779function rss_htmlspecialchars($in) { 780 return htmlspecialchars($in, ENT_NOQUOTES, 781 (getConfig('rss.output.encoding') ? getConfig('rss.output.encoding') : DEFAULT_OUTPUT_ENCODING)); 782} 783 784function firstNwords($text, $count=7) { 785 $new = ""; 786 $expr = '/(.+?\s+){1,' . $count . '}/'; 787 if ( preg_match($expr, $text, $matches) ) { 788 $result = $matches[0] . '...'; 789 $new = preg_replace('/(\r\n|\r|\n)/', ' ', $result); 790 $new = strip_tags($new); 791 } 792 return $new; 793} 794 795/** Props: mr at bbp dot biz - http://ch2.php.net/substr */ 796function html_substr($posttext, $minimum_length = 200, $length_offset = 20, $cut_words = FALSE, $dots = TRUE) { 797 798 // $minimum_length: 799 // The approximate length you want the concatenated text to be 800 801 802 // $length_offset: 803 // The variation in how long the text can be in this example text 804 // length will be between 200 and 200-20=180 characters and the 805 // character where the last tag ends 806 807 // Reset tag counter & quote checker 808 $tag_counter = 0; 809 $quotes_on = FALSE; 810 // Check if the text is too long 811 if (strlen($posttext) > $minimum_length) { 812 // Reset the tag_counter and pass through (part of) the entire text 813 $c = 0; 814 for ($i = 0; $i < strlen($posttext); $i++) { 815 // Load the current character and the next one 816 // if the string has not arrived at the last character 817 $current_char = substr($posttext,$i,1); 818 if ($i < strlen($posttext) - 1) { 819 $next_char = substr($posttext,$i + 1,1); 820 } else { 821 $next_char = ""; 822 } 823 // First check if quotes are on 824 if (!$quotes_on) { 825 // Check if it's a tag 826 // On a "<" add 3 if it's an opening tag (like <a href...) 827 // or add only 1 if it's an ending tag (like </a>) 828 if ($current_char == '<') { 829 if ($next_char == '/') { 830 $tag_counter += 1; 831 } else { 832 $tag_counter += 3; 833 } 834 } 835 // Slash signifies an ending (like </a> or ... />) 836 // substract 2 837 if ($current_char == '/' && $tag_counter <> 0) 838 $tag_counter -= 2; 839 // On a ">" substract 1 840 if ($current_char == '>') 841 $tag_counter -= 1; 842 // If quotes are encountered, start ignoring the tags 843 // (for directory slashes) 844 if ($current_char == '"') 845 $quotes_on = TRUE; 846 } else { 847 // IF quotes are encountered again, turn it back off 848 if ($current_char == '"') 849 $quotes_on = FALSE; 850 } 851 852 // Count only the chars outside html tags 853 if($tag_counter == 2 || $tag_counter == 0) { 854 $c++; 855 } 856 857 // Check if the counter has reached the minimum length yet, 858 // then wait for the tag_counter to become 0, and chop the string there 859 if ($c > $minimum_length - $length_offset && $tag_counter == 0 && ($next_char == ' ' || $cut_words == TRUE)) { 860 $posttext = substr($posttext,0,$i + 1); 861 if($dots) { 862 $posttext .= '...'; 863 } 864 return $posttext; 865 } 866 } 867 } 868 return $posttext; 869} 870 871 872function showViewForm($args) { //$curValue, $show_private) { 873 list($curValue, $show_private) = $args; 874 875 // post back to self, we should be able to handle the request, shouldn't we. 876 echo "\n<form action=\"".$_SERVER['REQUEST_URI'] ."\" method=\"post\" id=\"frmShow\">\n" 877 ."<p><label for=\"".SHOW_WHAT."\">".__('Show items: ')."</label>\n" 878 ."<select name=\"".SHOW_WHAT."\" id=\"".SHOW_WHAT."\" "." onchange=\"document.getElementById('frmShow').submit();\">\n" 879 ."\t<option value=\"".SHOW_UNREAD_ONLY."\"" . (SHOW_UNREAD_ONLY == $curValue ? " selected=\"selected\"" : "") . ">".__('Unread only')."</option>\n" 880 ."\t<option value=\"".SHOW_READ_AND_UNREAD."\"" . (SHOW_READ_AND_UNREAD == $curValue ? " selected=\"selected\"" : "") . ">".__('Read and unread')."</option>\n" 881 ."</select>" 882 ."</p>\n"; 883 /* 884 if(isLoggedIn()) { 885 echo "<p><label for=\"chkPrivate\">".__('Show Private:')."</label>\n" 886 ."<input type=\"checkbox\" name=\"chkPrivate\" id=\"chkPrivate\" value=\"1\" onchange=\"if(false == document.getElementById('chkPrivate').checked) { document.getElementById('chkPrivate').value = 0; document.getElementById('chkPrivate').checked = true; } document.getElementById('frmShow').submit();\"" . (1 == $show_private ? " checked" : "") . ">\n" 887 ."</p>\n"; 888 } 889 */ 890 echo "</form>\n"; 891} 892 893 894function getUnreadCount($cid, $fid) { 895 static $_uccache = array(); 896 $key_ = "key $cid $fid key"; 897 if (isset($_uccache[$key_])) { 898 return $_uccache[$key_]; 899 } 900 901 $sql = "select count(*) from " . getTable("item") ."i " 902 ."inner join " . getTable('channels')." c on c.id = i.cid " 903 ." where i.unread & ".RSS_MODE_UNREAD_STATE. " and not(i.unread & " . 904 RSS_MODE_DELETED_STATE .") " 905 ." and not(c.mode & ".RSS_MODE_DELETED_STATE.") "; 906 907 if (hidePrivate()) { 908 $sql .= " and not(i.unread & ".RSS_MODE_PRIVATE_STATE.")"; 909 } 910 911 if ($cid) { 912 $sql .= " and c.id=$cid "; 913 } 914 elseif ($fid) { 915 $sql .= " and c.parent=$fid "; 916 } 917 918 $res = rss_query($sql); 919 920 list ($_uccache[$key_]) = rss_fetch_row($res); 921 return $_uccache[$key_]; 922} 923 924function rss_locale_date ($fmt, $ts, $addTZOffset = true) { 925 926 if (isset($_SERVER["WINDIR"])) { 927 //%e doesnt' exists under windows! 928 $fmt=str_replace("%e","%#d",$fmt); 929 } 930 931 if ($addTZOffset) { 932 return utf8_encode(strftime($fmt, $ts +3600 * getConfig('rss.config.tzoffset'))); 933 } 934 return utf8_encode(strftime($fmt, $ts)); 935} 936 937function rss_date($fmt, $ts, $addTZOffset = true) { 938 if ($addTZOffset) { 939 return date($fmt, $ts +3600 * getConfig('rss.config.tzoffset')); 940 } 941 return date($fmt, $ts); 942 943} 944 945function _pf($msg) { 946 if (defined('PROFILING') && PROFILING && isset($GLOBALS['rss']) && method_exists($GLOBALS['rss'], "_pf")) { 947 $GLOBALS['rss'] -> _pf($msg); 948 } 949} 950 951 952function guessTransportProto() { 953 954 if (defined ('RSS_SERVER_PROTO')) { 955 return RSS_SERVER_PROTO; 956 } 957 958 if (array_key_exists("SERVER_PORT",$_SERVER)) { 959 if ($_SERVER["SERVER_PORT"] == 443) { 960 $proto = "https://"; 961 } else { 962 $proto = "http://"; 963 } 964 } else { 965 // best effort 966 $proto = "http://"; 967 } 968 return $proto; 969} 970 971function rss_redirect($url = "") { 972 header("Location: " . 973 (guessTransportProto() . $_SERVER['HTTP_HOST'] . getPath() . $url)); 974} 975 976/* 977fixes #117. 978http://www.php.net/manual/en/function.getallheaders.php 979*/ 980function rss_getallheaders() { 981 $headers = array(); 982 foreach($_SERVER as $h=>$v) { 983 if(ereg('HTTP_(.+)',$h,$hp)) { 984 $headers[$hp[1]]=$v; 985 } 986 } 987 return $headers; 988} 989 990// moved from ajax.php 991function __exp__submitTag($id,$tags,$type = "'item'") { 992 $tags = strip_tags($tags); 993 $ftags = utf8_encode( preg_replace(ALLOWED_TAGS_REGEXP,'', trim($tags))); 994 $tarr = array_slice(explode(" ",$ftags),0,MAX_TAGS_PER_ITEM); 995 $ftags = implode(" ",__priv__updateTags($id,$tarr,$type)); 996 return "$id,". $ftags; 997} 998 999function __priv__updateTags($fid,$tags,$type) { 1000 rss_query("delete from " .getTable('metatag') 1001 . " where fid=$fid and ttype=$type"); 1002 $ret = array(); 1003 foreach($tags as $tag) { 1004 $ttag = trim($tag); 1005 if ($ttag == "" || in_array($ttag,$ret)) { 1006 continue; 1007 } 1008 $ttag = sanitize($ttag, 1009 RSS_SANITIZER_NO_SPACES | RSS_SANITIZER_SIMPLE_SQL 1010 ); 1011 1012 rss_query( "insert into ". getTable('tag') 1013 . " (tag) values ('$ttag')", false ); 1014 $tid = 0; 1015 if(rss_is_sql_error(RSS_SQL_ERROR_DUPLICATE_ROW)) { 1016 list($tid)=rss_fetch_row(rss_query("select id from " 1017 .getTable('tag') . " where tag='$ttag'")); 1018 } else { 1019 $tid = rss_insert_id(); 1020 } 1021 if ($tid) { 1022 rss_query( "insert into ". getTable('metatag') 1023 . " (fid,tid,ttype,tdate) values ($fid,$tid,$type,now())" ); 1024 if (rss_is_sql_error(RSS_SQL_ERROR_NO_ERROR)) { 1025 $ret[] = $ttag; 1026 } 1027 } 1028 } 1029 1030 rss_invalidate_cache(); 1031 1032 sort($ret); 1033 return $ret; 1034} 1035 1036 1037 1038/** 1039 * this was taken straight from WordPress 1040 */ 1041function utf8_uri_encode( $utf8_string ) { 1042 $unicode = ''; 1043 $values = array(); 1044 $num_octets = 1; 1045 1046 for ($i = 0; $i < strlen( $utf8_string ); $i++ ) { 1047 1048 $value = ord( $utf8_string[ $i ] ); 1049 1050 if ( $value < 128 ) { 1051 $unicode .= chr($value); 1052 } else { 1053 if ( count( $values ) == 0 ) 1054 $num_octets = ( $value < 224 ) ? 2 : 3; 1055 1056 $values[] = $value; 1057 1058 if ( count( $values ) == $num_octets ) { 1059 if ($num_octets == 3) { 1060 $unicode .= '%' . dechex($values[0]) . '%' . dechex($values[1]) . '%' . dechex($values[2]); 1061 } else { 1062 $unicode .= '%' . dechex($values[0]) . '%' . dechex($values[1]); 1063 } 1064 1065 $values = array(); 1066 $num_octets = 1; 1067 } 1068 } 1069 } 1070 1071 return $unicode; 1072} 1073 1074/* 1075// Deprecated in favor of the new core.php functionalities 1076function ETagHandler($key) { 1077 // This function should be used inline for speed. However if you have already 1078 // included util.php you might as well use it. 1079 if (array_key_exists('HTTP_IF_NONE_MATCH',$_SERVER) && 1080 $_SERVER['HTTP_IF_NONE_MATCH'] == $key) { 1081 header("HTTP/1.1 304 Not Modified"); 1082 flush(); 1083 exit(); 1084 } else { 1085 header("ETag: $key"); 1086 // ob_start('ob_gzhandler'); 1087 } 1088} 1089*/ 1090//these two eval_ functions taken from the comments at http://us3.php.net/eval 1091/* 1092function eval_mixed_helper($arr) { 1093 return ("echo stripslashes(\"".addslashes($arr[1])."\");"); 1094} 1095 1096function eval_mixed($string) { 1097 $string = "<? ?>".$string."<? ?>"; 1098 $string = preg_replace("/<\?=\s+(.*?)\s+\?>/", "<? echo $1; ?>", $string); 1099 $string = str_replace('?>', '', str_replace( array('<?php', '<?'), '', preg_replace_callback( "/\?>((.|\n)*?)<\?(php)?/","eval_mixed_helper",$string) ) ); 1100 return $string; 1101} 1102*/ 1103 1104function rss_svn_rev($prefix='.') { 1105 static $ret; 1106 if ($ret != NULL) { 1107 return $ret; 1108 } 1109 if (file_exists(GREGARIUS_HOME .'.svn/dir-wcprops')) { 1110 $raw=getUrl(GREGARIUS_HOME .'.svn/dir-wcprops'); 1111 if ($raw && preg_match('#ver/([0-9]+)/#',$raw,$matches) && isset($matches[1])) { 1112 $ret = $prefix . $matches[1]; 1113 } 1114 } else { 1115 $ret = ""; 1116 } 1117 return $ret; 1118} 1119 1120function cacheFavicon($icon) { 1121 // Make sure only real favicons get fetched into the DB 1122 if (! preg_match('#^https?://.+$#',$icon)) { 1123 return false; 1124 } 1125 1126 $icon_ = rss_real_escape_string($icon); 1127 $binIcon = getUrl($icon); 1128 if ($binIcon) { 1129 $sql = "delete from " . getTable('cache') 1130 ." where cachetype='icon' and cachekey='$icon_'"; 1131 rss_query($sql); 1132 $sql = "insert into ". getTable('cache') 1133 ."(cachekey,timestamp,cachetype,data) values " 1134 ."('$icon_',now(),'icon','".rss_real_escape_string($binIcon)."')"; 1135 rss_query($sql); 1136 return rss_is_sql_error(RSS_SQL_ERROR_NO_ERROR); 1137 } 1138 return false; 1139} 1140 1141function sanitize($input, $rules = 0) { 1142 $ret = $input; 1143 if ($rules & RSS_SANITIZER_SIMPLE_SQL) { 1144 $ret = rss_real_escape_string($ret); 1145 } 1146 if ($rules & RSS_SANITIZER_NO_SPACES) { 1147 $ret = preg_replace('#\s#','',$ret); 1148 // also strip out SQL comments 1149 $ret = preg_replace('#/\*.*\*/#','',$ret); 1150 } 1151 if ($rules & RSS_SANITIZER_NUMERIC) { 1152 $ret = preg_replace('#[^0-9\.-]#','',$ret); 1153 } 1154 if ($rules & RSS_SANITIZER_CHARACTERS) { 1155 $ret = preg_replace('#[^a-zA-Z]#','',$ret); 1156 } 1157 if ($rules & RSS_SANITIZER_CHARACTERS_EXT) { 1158 $ret = preg_replace('#[^a-zA-Z_]#','',$ret); 1159 } 1160 if ($rules & RSS_SANITIZER_WORDS) { 1161 $ret = preg_replace('#[^a-zA-Z0-9\-\._]#','',$ret); 1162 } 1163 if ($rules & RSS_SANITIZER_URL) { 1164 // filter out "unsafe" characters: {,},|,\,^,<,> 1165 $ret = preg_replace('#[{}\|\\\^<>]#','',$ret); 1166 } 1167 return $ret; 1168} 1169 1170function hidePrivate() { 1171 $ret = 0; 1172 1173 if(isLoggedIn()) { 1174 $ret = rss_user_show_private(); 1175 } 1176 1177 return !$ret; 1178} 1179 1180function isLoggedIn() { 1181 if (!isset($GLOBALS['rssuser'])) { 1182 require_once('cls/user.php'); 1183 } 1184 1185 return rss_user_check_user_level(RSS_USER_LEVEL_PRIVATE); 1186} 1187 1188// Send a crappy 404 (to save bandwidth) for webbots 1189function rss_404() { 1190 header("HTTP/1.1 404 Not Found"); 1191 echo "404 Page Not Found\n"; 1192} 1193 1194function rss_uri($title, $sep=RSS_URI_SEPARATOR) { 1195 return utf8_uri_encode(preg_replace('#[ \#%&/\+\'"\?]#',$sep,$title)); 1196} 1197?> 1198