1<?php
2
3###############################################################################
4# Gregarius - A PHP based RSS aggregator.
5# Copyright (C) 2003 - 2006 Marco Bonetti
6#
7###############################################################################
8# This program is free software and open source software; you can redistribute
9# it and/or modify it under the terms of the GNU General Public License as
10# published by the Free Software Foundation; either version 2 of the License,
11# or (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful, but WITHOUT
14# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
16# more details.
17#
18# You should have received a copy of the GNU General Public License along
19# with this program; if not, write to the Free Software Foundation, Inc.,
20# 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA  or visit
21# http://www.gnu.org/licenses/gpl.html
22#
23###############################################################################
24# E-mail:      mbonetti at gmail dot com
25# Web page:    http://gregarius.net/
26#
27###############################################################################
28
29
30
31function getLastModif() {
32    return getProperty('__meta__','meta.lastupdate');
33}
34
35function getETag() {
36    return md5(getLastModif().$_SERVER['PHP_SELF']);
37}
38
39
40function rss_error($message, $severity = RSS_ERROR_ERROR, $render = false) {
41    if ($render) {
42        echo "<p class=\"error\">$message</p>\n";
43        return;
44    }
45
46    if (!isset($GLOBALS['rss'])) {
47        rss_require('cls/rss.php');
48    }
49
50    $GLOBALS['rss'] -> error($message, $severity);
51}
52
53/** this functions checks whether a URI exists */
54function getHttpResponseCode($forUri) {
55    return getUrl($forUri, 255);
56}
57
58function getContentType($link, & $contentType) {
59    $url_parts = @ parse_url($link);
60    if (empty ($url_parts["host"])) {
61        return (false);
62    }
63    if (!empty ($url_parts["path"])) {
64        $documentpath = $url_parts["path"];
65    } else {
66        $documentpath = "/";
67    }
68    if (!empty ($url_parts["query"])) {
69        $documentpath .= "?".$url_parts["query"];
70    }
71    $host = $url_parts["host"];
72    $port = (array_key_exists('port', $url_parts) ? $url_parts["port"] : "80");
73
74    $socket = @ fsockopen($host, $port, $errno, $errstr, 30);
75    if (!$socket) {
76        return (false);
77    }
78
79    $ret = false;
80    fwrite($socket, "GET ".$documentpath." HTTP/1.0\r\nHost: $host\r\n\r\n");
81    while (!feof($socket)) {
82        $line = fgets($socket, 100);
83        if (preg_match("/Content-Type: (.*)/i", $line, $matches)) {
84            $contentType = $matches[1];
85            $ret = true;
86            break;
87        }
88    }
89
90    return $ret;
91}
92
93// basically strips folder resources from URIs.
94// http://pear.php.net/package/HTTP_Client/ --> http://pear.php.net/
95function get_host($url, & $host) {
96    $ret = preg_match("/^(http:\/\/)?([^\/]+)/i", $url, $matches);
97    $host = $matches[2];
98
99    //ensure we have a slash
100    if (substr($host, -1) != "/") {
101        $host .= "/";
102    }
103
104    return $ret;
105}
106
107/**
108 * Builds a title out of an already encoded string.
109 */
110function makeTitle($title) {
111    // Let us find out if the user has set a title.
112    $userTitle = _TITLE_;
113    if (getConfig('rss.output.title')) {
114        $userTitle = getConfig('rss.output.title');
115    }
116    $ret = "". $userTitle ."";
117    if ($title) {
118        if (is_array($title)) {
119            foreach($title as $token) {
120                $ret .= " ".TITLE_SEP." ".$token;
121            }
122        } else {
123            $ret .= " ".TITLE_SEP." ".$title;
124        }
125    }
126    return $ret;
127}
128
129/*** update the given feed(s) **/
130function update($id) {
131    $kses_allowed = getConfig('rss.input.allowed'); //getAllowedTags();
132    $updatedIds = array ();
133
134
135    $sql = "select id, url, title, mode from ".getTable("channels");
136    if ($id != "" && is_numeric($id)) {
137        $sql .= " where id=$id";
138        $sql .= " and not(mode & ".RSS_MODE_DELETED_STATE.") ";
139    } else {
140        $sql .= " where not(mode & ".RSS_MODE_DELETED_STATE.") ";
141    }
142
143    if (getConfig('rss.config.absoluteordering')) {
144        $sql .= " order by parent, position";
145    } else {
146        $sql .= " order by parent, title";
147    }
148
149    $res = rss_query($sql);
150    while (list ($cid, $url, $title, $mode) = rss_fetch_row($res)) {
151
152        // suppress warnings because Magpie is rather noisy
153        $old_level = error_reporting(E_ERROR);
154        $rss = fetch_rss($url);
155
156        //reset
157        error_reporting($old_level);
158
159        if (!$rss && $id != "" && is_numeric($id)) {
160            return array (magpie_error(), array ());
161        }
162        elseif (!$rss || !($rss->rss_origin & MAGPIE_FEED_ORIGIN_HTTP_200) ) {
163            continue; // no need to do anything if we do not get a 200 OK from the feed
164        }
165
166        // base URL for items in this feed.
167        if (array_key_exists('link', $rss->channel)) {
168            $baseUrl = $rss->channel['link'];
169        } else {
170            $baseUrl = $url; // The feed is invalid
171        }
172
173        // Keep track of guids we've handled, because some feeds (hello,
174        // Technorati!) have this insane habit of serving the same item
175        // twice in the same feed.
176        $guids = array();
177
178        // Allow updates in this feed?
179        $allowUpdates = getProperty($cid,'rss.input.allowupdates');
180        if ($allowUpdates === null) {
181            $allowUpdates = getConfig('rss.input.allowupdates');
182        }
183
184        $itemIdsInFeed = array(); // This variable will store the item id's of the elements in the feed
185        foreach ($rss->items as $item) {
186
187            $item = rss_plugin_hook('rss.plugins.rssitem', $item);
188            // a plugin might delete this item
189            if(!isset($item))
190                continue;
191
192            // item title: strip out html tags
193            $title = array_key_exists('title', $item) ? strip_tags($item['title']) : "";
194            //$title = str_replace('& ', '&amp; ', $title);
195
196
197            $description = "";
198            // item content, if any
199            if (array_key_exists('content', $item) && is_array($item['content']) && array_key_exists('encoded', $item['content'])) {
200                $description = $item['content']['encoded'];
201            }
202            elseif (array_key_exists('description', $item)) {
203                $description = $item['description'];
204            }
205            elseif (array_key_exists('atom_content', $item)) {
206                $description = $item['atom_content'];
207            }
208            elseif (array_key_exists('summary', $item)) {
209                $description = $item['summary'];
210            }
211            else {
212                $description = "";
213            }
214
215            $md5sum = "";
216            $guid = "";
217
218            if(array_key_exists('guid', $item) && $item['guid'] != "") {
219                $guid = $item['guid'];
220            }
221            elseif(array_key_exists('id', $item) && $item['id'] != "") {
222                $guid = $item['id'];
223            }
224            $guid = trim($guid);
225            $guid = rss_real_escape_string($guid);
226
227            // skip this one if it's an  in-feed-dupe
228            if ($guid && isset($guids[$guid])) {
229                continue;
230            }
231            elseif($guid) {
232                $guids[$guid] = true;
233            }
234
235            if ($description != "") {
236                $md5sum = md5($description);
237                $description = kses($description, $kses_allowed); // strip out tags
238
239                if ($baseUrl != "") {
240                    $description = relative_to_absolute($description, $baseUrl);
241                }
242            }
243
244            // Now let plugins modify the description
245            $description = rss_plugin_hook('rss.plugins.import.description', $description);
246
247
248            // link
249            if (array_key_exists('link', $item) && $item['link'] != "") {
250                $url = $item['link'];
251            }
252            elseif (array_key_exists('guid', $item) && $item['guid'] != "") {
253                $url = $item['guid'];
254            }
255            elseif (array_key_exists('link_', $item) && $item['link_'] != "") {
256                $url = $item['link_'];
257            }
258            else {
259                // fall back to something basic
260                $url = md5($title);
261            }
262
263            // make sure the url is properly escaped
264            $url = htmlentities($url, ENT_QUOTES );
265
266            $url = rss_real_escape_string($url);
267
268            // author
269            if (array_key_exists('dc', $item) && array_key_exists('creator', $item['dc'])) {
270                // RSS 1.0
271                $author = $item['dc']['creator'];
272            } else if (array_key_exists('author_name', $item)) {
273                // Atom 0.3
274                $author = $item['author_name'];
275            } else {
276                $author = "";
277            }
278
279            $author = trim(strip_tags($author));
280
281            // pubdate
282            $cDate = -1;
283            if (array_key_exists('dc', $item) && array_key_exists('date', $item['dc'])) {
284                // RSS 1.0
285                $cDate = parse_w3cdtf($item['dc']['date']);
286            }
287            elseif (array_key_exists('pubdate', $item)) {
288                // RSS 2.0 (?)
289                // We use the second param of strtotime here as a workaround
290                // of a PHP bug with strtotime. If the pubdate field doesn't
291                // contain seconds, the strtotime function will use the current
292                // time to fill in seconds in PHP4. This interferes with the
293                // update mechanism of gregarius. See ticket #328 for the full
294                // gory details. Giving a known date as a second param to
295                // strtotime fixes this problem, hence the 0 here.
296                $cDate = strtotime($item['pubdate'], 0);
297            }
298            elseif (array_key_exists('published',$item)) {
299                // atom 1.0
300                $cDate = parse_iso8601($item['published']);
301            }
302            elseif (array_key_exists('issued', $item)) {
303                //Atom, alternative
304                $cDate = parse_iso8601($item['issued']);
305            }
306            elseif (array_key_exists('updated', $item)) {
307                //Atom, alternative
308                $cDate = parse_iso8601($item['updated']);
309            }
310            elseif (array_key_exists('created', $item)) {
311                // atom 0.3
312                $cDate = parse_iso8601($item['created']);
313            }
314
315            // enclosure
316            if (array_key_exists('enclosure@url', $item) ) {
317                $enclosure = $item['enclosure@url'];
318                // If the enclosure is an image, append it to the content
319				// but only if it isn't there yet
320                if ($enclosure &&
321					array_key_exists('enclosure@type', $item) &&
322					preg_match('#image/(png|gif|jpe?g)#', $item['enclosure@type']) &&
323					(FALSE == strpos($description,$enclosure))) {
324                    	$description = '<img src="'.$enclosure.'" alt="" />' . $description;
325                    	$enclosure = '';
326                }
327            } else {
328                $enclosure = "";
329            }
330
331            // drop items with an url exceeding our column length: we couldn't provide a
332            // valid link back anyway.
333            if (strlen($url) >= 255) {
334                continue;
335            }
336
337            $dbtitle = rss_real_escape_string($title);
338            if (strlen($dbtitle) >= 255) {
339                $dbtitle=substr($dbtitle,0,254);
340            }
341
342            if ($cDate > 0) {
343                $sec = "FROM_UNIXTIME($cDate)";
344            } else {
345                $sec = "null";
346            }
347
348            // check whether we already have this item
349            if ($guid) {
350                $sql = "select id,unread, md5sum, guid, pubdate from ".getTable("item")
351                       ." where cid=$cid and guid='$guid'";
352            } else {
353                $sql = "select id,unread, md5sum, guid, pubdate from ".getTable("item")
354                       ." where cid=$cid and url='$url' and title='$dbtitle'"
355                       ." and (pubdate is NULL OR pubdate=$sec)";
356            }
357
358            $subres = rss_query($sql);
359            list ($indb, $state, $dbmd5sum, $dbGuid, $dbPubDate) = rss_fetch_row($subres);
360
361            if ($indb) {
362                $itemIdsInFeed[] = $indb;
363                if (!($state & RSS_MODE_DELETED_STATE) && $md5sum != $dbmd5sum) {
364                    // the md5sums do not match.
365                    if($allowUpdates) { // Are we allowed update items in the db?
366                        list ($cid, $indb, $description) =
367                            rss_plugin_hook('rss.plugins.items.updated', array ($cid, $indb, $description));
368
369                        $sql = "update ".getTable("item")
370                               ." set "." description='".rss_real_escape_string($description)."', "
371                               ." unread = unread | ".RSS_MODE_UNREAD_STATE
372                               .", md5sum='$md5sum'" . " where cid=$cid and id=$indb";
373
374                        rss_query($sql);
375                        $updatedIds[] = $indb;
376                        continue;
377                    }
378                }
379            } else { // $indb = "" . This must be new item then. In you go.
380
381                list ($cid, $dbtitle, $url, $description) =
382                    rss_plugin_hook('rss.plugins.items.new', array ($cid, $dbtitle, $url, $description));
383
384                $sql = "insert into ".getTable("item")
385                       ." (cid, added, title, url, enclosure,"
386                       ." description, author, unread, pubdate, md5sum, guid) "
387                       ." values ("."$cid, now(), '$dbtitle', "
388                       ." '$url', '".rss_real_escape_string($enclosure)."', '"
389                       .rss_real_escape_string($description)."', '"
390                       .rss_real_escape_string($author)."', "
391                       ."$mode, $sec, '$md5sum', '$guid')";
392
393                rss_query($sql);
394
395                $newIid = rss_insert_id();
396                $itemIdsInFeed[] = $newIid;
397                $updatedIds[] = $newIid;
398                rss_plugin_hook('rss.plugins.items.newiid',array($newIid,$item,$cid));
399            } // end handling of this item
400
401        } // end handling of all the items in this feed
402        $sql = "update " .getTable("channels") . " set "." itemsincache = '"
403               . serialize($itemIdsInFeed) . "' where id=$cid";
404        rss_query($sql);
405
406
407    } // end handling all the feeds we were asked to handle
408
409    if ($id != "" && is_numeric($id)) {
410        if ($rss) {
411            // when everything went well, return the error code
412            // and numer of new items
413            return array ($rss->rss_origin, $updatedIds);
414        } else {
415            return array (-1, array ());
416        }
417    } else {
418        return array (-1, $updatedIds);
419    }
420}
421
422function getRootFolder() {
423    $sql = "select id from ".getTable("folders")."where name = '' order by position asc limit 1";
424    list($root) = rss_fetch_row(rss_query($sql));
425
426    if (!$root) {
427        $root = 0;
428    }
429
430    return $root;
431}
432
433function add_channel($url, $folderid = 0, $title_=null,$descr_=null,$tags = null) {
434    if (!$url || strlen($url) <= 7) {
435        return array (-2, "Invalid URL $url");
436    }
437    if (!is_numeric($folderid)) {
438        return array (-2, "Invalid folderid $folderid");
439    }
440
441    $url = sanitize(str_replace('&amp;','&',$url), RSS_SANITIZER_URL);
442
443    $urlDB = rss_real_escape_string($url); //htmlentities($url);
444
445    $res = rss_query("select count(*) as channel_exists from ".getTable("channels")." where url='$urlDB'");
446    list ($channel_exists) = rss_fetch_row($res);
447    if ($channel_exists > 0) {
448        // fatal
449        return array (-2, "Looks like you are already subscribed to this channel");
450    }
451
452    $res = rss_query("select 1+max(position) as np from ".getTable("channels"));
453    list ($np) = rss_fetch_row($res);
454
455    if (!$np) {
456        $np = "0";
457    }
458
459    // Here we go!
460    //error_reporting(E_ALL);
461    $old_level = error_reporting(E_ERROR);
462    $rss = fetch_rss($url);
463    error_reporting($old_level);
464
465    if ($rss) {
466        if ($title_) {
467            $title = rss_real_escape_string($title_);
468        }
469        elseif (is_object($rss) && array_key_exists('title#', $rss->channel)) {
470            if (array_key_exists('title', $rss->channel)) {
471                $title = rss_real_escape_string($rss->channel['title']);
472            } else {
473                $title = " ";
474            }
475        }
476        else {
477            $title = "";
478        }
479
480        if (is_object($rss) && array_key_exists('link', $rss->channel)) {
481            $siteurl = rss_real_escape_string(htmlentities($rss->channel['link']));
482        } else {
483            $siteurl = "";
484        }
485
486				$refreshinterval = 0;
487				if(is_object($rss) && array_key_exists('syn', $rss->channel)) {
488						$syn = $rss->channel['syn'];
489
490						if(array_key_exists('updateperiod', $syn)) {
491								if("hourly" == $syn['updateperiod']) {
492										if(array_key_exists('updatefrequency', $syn)) {
493												$refreshinterval = 60 * $syn['updatefrequency'];
494										}
495								}
496						}
497				}
498
499        if ($descr_) {
500            $descr = rss_real_escape_string($descr_);
501        }
502        elseif  (is_object($rss) && array_key_exists('description', $rss->channel)) {
503            $descr = rss_real_escape_string($rss->channel['description']);
504        }
505        else {
506            $descr = "";
507        }
508
509        //lets see if this server has a favicon
510        $icon = "";
511        if (getConfig('rss.output.showfavicons')) {
512            // if we got nothing so far, lets try to fall back to
513            // favicons
514            if ($icon == "" && $siteurl != "") {
515                $match = get_host($siteurl, $host);
516                $uri = "http://".$host."favicon.ico";
517                if ($match && getContentType($uri, $contentType)) {
518                    if (preg_match("/image\/x-icon/", $contentType)) {
519                        $icon = $uri;
520                    }
521                }
522            }
523        }
524
525        $private = preg_match('|(https?://)([^:]+:[^@]+@)(.+)$|',$url);
526
527        if ($title != "") {
528            $title = strip_tags($title);
529            $descr = strip_tags($descr);
530
531            // add channel to root folder by default
532            if(!$folderid) {
533                $folderid = getRootFolder();
534            }
535
536            list($title,$urlDB,$siteurl,$folderid,$descr,$icon) =
537                rss_plugin_hook('rss.plugins.feed.new',
538                                array ($title,$urlDB,$siteurl,$folderid,$descr,$icon));
539
540            $mode = RSS_MODE_UNREAD_STATE;
541            if ($private) {
542                $mode |= RSS_MODE_PRIVATE_STATE;
543            }
544
545            $sql = "insert into ".getTable("channels")
546                   ." (title, url, siteurl, parent, descr, dateadded, icon, position, mode, daterefreshed)"
547                   ." values ('$title', '$urlDB', '$siteurl', $folderid, '$descr', now(), '$icon', $np, $mode, '0000-00-00 00:00:00')";
548
549            rss_query($sql);
550            $newid = rss_insert_id();
551
552            if ($icon && cacheFavicon($icon)) {
553                rss_query("update " . getTable("channels") . " set icon='blob:".$icon."'"
554                          ." where id=$newid");
555            }
556
557            if($tags != "") {
558                __exp__submitTag($newid,$tags,"'channel'");
559            }
560
561            if(false == empty($refreshinterval)) {
562            	setProperty($newid, 'rss.config.refreshinterval', $refreshinterval);
563						}
564
565            return array ($newid, "");
566
567        } else {
568            // non-fatal, will look further
569            return array (-1, "I'm sorry, I couldn't extract a valid RSS feed from <a href=\"$url\">$url</a>.");
570        }
571    } else {
572        global $MAGPIE_ERROR;
573        $retError = "I'm sorry, I couldn't retrieve <a href=\"$url\">$url</a>.";
574        if ($MAGPIE_ERROR) {
575            $retError .= "\n<br />$MAGPIE_ERROR\n";
576        }
577        // non-fatal, will look further
578        return array (-1, $retError);
579    }
580}
581
582/**
583 * Replaces relative urls with absolute ones for anchors and images
584 * Credits: Julien Mudry
585 */
586function relative_to_absolute($content, $feed_url) {
587    preg_match('/(http|https|ftp):\/\//', $feed_url, $protocol);
588    $server_url = preg_replace("/(http|https|ftp|news):\/\//", "", $feed_url);
589    $server_url = preg_replace("/\/.*/", "", $server_url);
590
591    if ($server_url == '') {
592        return $content;
593    }
594
595    if (isset($protocol[0])) {
596        $new_content = preg_replace('/href="\//', 'href="'.$protocol[0].$server_url.'/', $content);
597        $new_content = preg_replace('/src="\//', 'src="'.$protocol[0].$server_url.'/', $new_content);
598    } else {
599        $new_content = $content;
600    }
601    return $new_content;
602}
603
604/**
605 * parse an ISO 8601 date, losely based on parse_w3cdtf from MagpieRSS
606 */
607function parse_iso8601($date_str) {
608# regex to match wc3dtf
609    $pat = "/(\d{4})-?(\d{2})-?(\d{2})T?(\d{2}):?(\d{2})(:?(\d{2}\.?\d*))?(?:([-+])(\d{2}):?(\d{2})|(Z))?/";
610
611    if (preg_match($pat, $date_str, $match)) {
612        list ($year, $month, $day, $hours, $minutes, $seconds)
613        = array (@$match[1], @$match[2], @$match[3], @$match[4], @$match[5], @$match[6]);
614
615        // calc epoch for current date assuming GMT
616
617        $epoch = gmmktime($hours, $minutes, intval($seconds), $month, $day, $year);
618
619        $offset = 0;
620        if (@$match[10] == 'Z') {
621            // zulu time, aka GMT
622
623        } else {
624            list ($tz_mod, $tz_hour, $tz_min) = array (@$match[8], @$match[9], @$match[10]);
625
626            // zero out the variables
627
628            if (!$tz_hour) {
629                $tz_hour = 0;
630            }
631            if (!$tz_min) {
632                $tz_min = 0;
633            }
634
635            $offset_secs = (($tz_hour * 60) + $tz_min) * 60;
636
637            // is timezone ahead of GMT?     then subtract offset
638
639
640            if ($tz_mod == '+') {
641                $offset_secs = $offset_secs * -1;
642            }
643
644            $offset = $offset_secs;
645        }
646        $epoch = $epoch + $offset;
647        return $epoch;
648    } else {
649        return -1;
650    }
651}
652
653/**
654 * Returns the relative path of the install dir, e.g:
655 * http://host.com/thing/ -> "/thing/"
656 * http://host.com/ -> "/"
657 */
658function getPath($path='') {
659    static $ret;
660    if ($ret === NULL) {
661        $ret = dirname($_SERVER['PHP_SELF']);
662        if (defined('RSS_FILE_LOCATION') && eregi(RSS_FILE_LOCATION."\$", $ret)) {
663            $ret = substr($ret, 0, strlen($ret) - strlen(RSS_FILE_LOCATION));
664        }
665        if (substr($ret, -1) == "\\") { // Take off trailing backslash
666            $ret = substr($ret, 0, -1);
667        }
668        if (substr($ret, -1) != "/") {  // Add a frontslash
669            $ret .= "/";
670        }
671    }
672    return $ret . $path;
673
674}
675$dummy = getPath();
676
677/**
678 * builds an url for an archive link
679 */
680function makeArchiveUrl($ts, $channel, $cid, $dayView) {
681    $ret = getPath();
682    if (getConfig('rss.output.usemodrewrite')) {
683        if ($channel) {
684            $ret .= "$channel/";
685        }
686        $ret .= rss_date(($dayView ? 'Y/m/d/' : 'Y/m/'), $ts, false);
687    } else {
688        $ret .= "feed.php?";
689        if ($cid) {
690            $ret .= "channel=$cid&amp;";
691        }
692        $ret .= "y=".rss_date('Y', $ts, false)
693                ."&amp;m=".rss_date('m', $ts, false)
694                . ($dayView ? ("&amp;d=".rss_date('d', $ts, false)) : "");
695    }
696    return $ret;
697}
698
699/**
700 * Fetches a remote URL and returns the content
701 */
702function getUrl($url, $maxlen = 0) {
703    //Bug: in windows, scheme returned by parse_url contains the drive letter
704    //of the file so a test like !isset(scheme) does not work
705    //maybe it would be better to only use is_file() which only detect
706    //local files?
707    $urlParts = parse_url($url);
708    if (@is_file($url) || (!isset($urlParts['scheme']) && !isset($urlParts['host'])) ) {
709        //local file!
710        $c = "";
711        $h = @fopen($url, "r");
712        if ($h) {
713            while (!feof($h)) {
714                $c .= @fread($h, 8192);
715            }
716        }
717        @fclose($h);
718        return $c;
719    }
720
721    rss_require('extlib/Snoopy.class.inc');
722    $client = new Snoopy();
723    $client->agent = MAGPIE_USER_AGENT;
724    $client->use_gzip = getConfig('rss.output.compression');
725
726    if ($maxlen) {
727        $client->maxlength = $maxlen;
728    }
729    @ $client->fetch($url);
730    return $client->results;
731}
732
733/**
734 * Feed Autodiscovery
735 *
736 * returns an array of all (hopefully) rss/atom/rdf feeds in the document,
737 * pointed by $url.
738 * See http://diveintomark.org/archives/2002/06/02/important_change_to_the_link_tag
739 *
740 * @param string $url URL of a web document containing <link> elements
741 * @return array Array of feed URLs
742 */
743function extractFeeds($url) {
744    rss_require('extlib/uri_util.php');
745    $cnt = getUrl($url);
746    $ret = array ();
747    //find all link tags
748    if (preg_match_all('|<link\s+\w*=["\'][^"\']+["\']+[^>]*>|Uis', $cnt, $res)) {
749        while (list ($id, $match) = each($res[0])) {
750            // we only want '<link alternate=...'
751            if (strpos(strtolower($match), 'alternate') &&
752                    !strpos(strtolower($match), 'stylesheet')  && // extract the attributes
753                    preg_match_all('|([a-zA-Z]*)=["\']([^"\']*)|', $match, $res2, PREG_SET_ORDER)) {
754                $tmp = array ();
755                //populate the return array: attr_name => attr_value
756                while (list ($id2, $match2) = each($res2)) {
757                    $attr = strtolower(trim($match2[1]));
758                    $val = trim($match2[2]);
759                    // make sure we have absolute URI's
760                    if ($attr == "href") {
761                        $val = absolute_uri($val, $url);
762                    }
763                    $tmp[$attr] = $val;
764                }
765                $ret[] = $tmp;
766            }
767        }
768    }
769    return $ret;
770}
771
772function real_strip_slashes($string) {
773    if (stripslashes($string) == $string) {
774        return $string;
775    }
776    return real_strip_slashes(stripslashes($string));
777}
778
779function rss_htmlspecialchars($in) {
780    return htmlspecialchars($in, ENT_NOQUOTES,
781                            (getConfig('rss.output.encoding') ? getConfig('rss.output.encoding') : DEFAULT_OUTPUT_ENCODING));
782}
783
784function firstNwords($text, $count=7) {
785    $new = "";
786    $expr = '/(.+?\s+){1,' . $count . '}/';
787    if ( preg_match($expr, $text, $matches) ) {
788        $result = $matches[0] . '...';
789        $new = preg_replace('/(\r\n|\r|\n)/', ' ', $result);
790        $new = strip_tags($new);
791    }
792    return $new;
793}
794
795/** Props: mr at bbp dot biz - http://ch2.php.net/substr */
796function html_substr($posttext, $minimum_length = 200, $length_offset = 20, $cut_words = FALSE, $dots = TRUE) {
797
798    // $minimum_length:
799    // The approximate length you want the concatenated text to be
800
801
802    // $length_offset:
803    // The variation in how long the text can be in this example text
804    // length will be between 200 and 200-20=180 characters and the
805    // character where the last tag ends
806
807    // Reset tag counter & quote checker
808    $tag_counter = 0;
809    $quotes_on = FALSE;
810    // Check if the text is too long
811    if (strlen($posttext) > $minimum_length) {
812        // Reset the tag_counter and pass through (part of) the entire text
813        $c = 0;
814        for ($i = 0; $i < strlen($posttext); $i++) {
815            // Load the current character and the next one
816            // if the string has not arrived at the last character
817            $current_char = substr($posttext,$i,1);
818            if ($i < strlen($posttext) - 1) {
819                $next_char = substr($posttext,$i + 1,1);
820            } else {
821                $next_char = "";
822            }
823            // First check if quotes are on
824            if (!$quotes_on) {
825                // Check if it's a tag
826                // On a "<" add 3 if it's an opening tag (like <a href...)
827                // or add only 1 if it's an ending tag (like </a>)
828                if ($current_char == '<') {
829                    if ($next_char == '/') {
830                        $tag_counter += 1;
831                    } else {
832                        $tag_counter += 3;
833                    }
834                }
835                // Slash signifies an ending (like </a> or ... />)
836                // substract 2
837                if ($current_char == '/' && $tag_counter <> 0)
838                    $tag_counter -= 2;
839                // On a ">" substract 1
840                if ($current_char == '>')
841                    $tag_counter -= 1;
842                // If quotes are encountered, start ignoring the tags
843                // (for directory slashes)
844                if ($current_char == '"')
845                    $quotes_on = TRUE;
846            } else {
847                // IF quotes are encountered again, turn it back off
848                if ($current_char == '"')
849                    $quotes_on = FALSE;
850            }
851
852            // Count only the chars outside html tags
853            if($tag_counter == 2 || $tag_counter == 0) {
854                $c++;
855            }
856
857            // Check if the counter has reached the minimum length yet,
858            // then wait for the tag_counter to become 0, and chop the string there
859            if ($c > $minimum_length - $length_offset && $tag_counter == 0 && ($next_char == ' ' || $cut_words == TRUE)) {
860                $posttext = substr($posttext,0,$i + 1);
861                if($dots) {
862                    $posttext .= '...';
863                }
864                return $posttext;
865            }
866        }
867    }
868    return $posttext;
869}
870
871
872function showViewForm($args) { //$curValue, $show_private) {
873    list($curValue, $show_private) = $args;
874
875    // post back to self, we should be able to handle the request, shouldn't we.
876    echo "\n<form action=\"".$_SERVER['REQUEST_URI'] ."\" method=\"post\" id=\"frmShow\">\n"
877    ."<p><label for=\"".SHOW_WHAT."\">".__('Show items: ')."</label>\n"
878    ."<select name=\"".SHOW_WHAT."\" id=\"".SHOW_WHAT."\" "." onchange=\"document.getElementById('frmShow').submit();\">\n"
879    ."\t<option value=\"".SHOW_UNREAD_ONLY."\"" . (SHOW_UNREAD_ONLY == $curValue ? " selected=\"selected\"" : "") . ">".__('Unread only')."</option>\n"
880    ."\t<option value=\"".SHOW_READ_AND_UNREAD."\"" . (SHOW_READ_AND_UNREAD == $curValue ? " selected=\"selected\"" : "") . ">".__('Read and unread')."</option>\n"
881    ."</select>"
882    ."</p>\n";
883    /*
884    		if(isLoggedIn()) {
885    			echo "<p><label for=\"chkPrivate\">".__('Show Private:')."</label>\n"
886    			."<input type=\"checkbox\" name=\"chkPrivate\" id=\"chkPrivate\" value=\"1\" onchange=\"if(false == document.getElementById('chkPrivate').checked) { document.getElementById('chkPrivate').value = 0; document.getElementById('chkPrivate').checked = true; } document.getElementById('frmShow').submit();\"" . (1 == $show_private ? " checked" : "") . ">\n"
887    			."</p>\n";
888    		}
889    		*/
890    echo "</form>\n";
891}
892
893
894function getUnreadCount($cid, $fid) {
895    static $_uccache = array();
896    $key_ = "key $cid $fid key";
897    if (isset($_uccache[$key_])) {
898        return $_uccache[$key_];
899    }
900
901    $sql = "select count(*) from " . getTable("item") ."i "
902           ."inner join " . getTable('channels')." c on c.id = i.cid "
903           ." where i.unread & ".RSS_MODE_UNREAD_STATE. " and not(i.unread & " .
904           RSS_MODE_DELETED_STATE .") "
905           ." and not(c.mode & ".RSS_MODE_DELETED_STATE.") ";
906
907    if (hidePrivate()) {
908        $sql .= " and not(i.unread & ".RSS_MODE_PRIVATE_STATE.")";
909    }
910
911    if ($cid) {
912        $sql .= " and c.id=$cid ";
913    }
914    elseif ($fid) {
915        $sql .= " and c.parent=$fid ";
916    }
917
918    $res = rss_query($sql);
919
920    list ($_uccache[$key_]) = rss_fetch_row($res);
921    return $_uccache[$key_];
922}
923
924function rss_locale_date ($fmt, $ts, $addTZOffset = true) {
925
926    if (isset($_SERVER["WINDIR"])) {
927        //%e doesnt' exists under windows!
928        $fmt=str_replace("%e","%#d",$fmt);
929    }
930
931    if ($addTZOffset) {
932        return utf8_encode(strftime($fmt, $ts +3600 * getConfig('rss.config.tzoffset')));
933    }
934    return utf8_encode(strftime($fmt, $ts));
935}
936
937function rss_date($fmt, $ts, $addTZOffset = true) {
938    if ($addTZOffset) {
939        return date($fmt, $ts +3600 * getConfig('rss.config.tzoffset'));
940    }
941    return date($fmt, $ts);
942
943}
944
945function _pf($msg) {
946    if (defined('PROFILING') && PROFILING && isset($GLOBALS['rss']) && method_exists($GLOBALS['rss'], "_pf")) {
947        $GLOBALS['rss'] -> _pf($msg);
948    }
949}
950
951
952function guessTransportProto() {
953
954    if (defined ('RSS_SERVER_PROTO')) {
955        return RSS_SERVER_PROTO;
956    }
957
958    if (array_key_exists("SERVER_PORT",$_SERVER)) {
959        if ($_SERVER["SERVER_PORT"] == 443) {
960            $proto = "https://";
961        } else {
962            $proto = "http://";
963        }
964    } else {
965        // best effort
966        $proto = "http://";
967    }
968    return $proto;
969}
970
971function rss_redirect($url = "") {
972    header("Location: " .
973           (guessTransportProto() . $_SERVER['HTTP_HOST'] . getPath() . $url));
974}
975
976/*
977fixes #117.
978http://www.php.net/manual/en/function.getallheaders.php
979*/
980function rss_getallheaders() {
981    $headers = array();
982    foreach($_SERVER as $h=>$v) {
983        if(ereg('HTTP_(.+)',$h,$hp)) {
984            $headers[$hp[1]]=$v;
985        }
986    }
987    return $headers;
988}
989
990// moved from ajax.php
991function __exp__submitTag($id,$tags,$type = "'item'") {
992    $tags = strip_tags($tags);
993    $ftags = utf8_encode( preg_replace(ALLOWED_TAGS_REGEXP,'', trim($tags)));
994    $tarr = array_slice(explode(" ",$ftags),0,MAX_TAGS_PER_ITEM);
995    $ftags = implode(" ",__priv__updateTags($id,$tarr,$type));
996    return "$id,". $ftags;
997}
998
999function __priv__updateTags($fid,$tags,$type) {
1000    rss_query("delete from " .getTable('metatag')
1001              . " where fid=$fid and ttype=$type");
1002    $ret = array();
1003    foreach($tags as $tag) {
1004        $ttag = trim($tag);
1005        if ($ttag == "" || in_array($ttag,$ret)) {
1006            continue;
1007        }
1008        $ttag = sanitize($ttag,
1009                         RSS_SANITIZER_NO_SPACES | RSS_SANITIZER_SIMPLE_SQL
1010                        );
1011
1012        rss_query( "insert into ". getTable('tag')
1013                   . " (tag) values ('$ttag')", false );
1014        $tid = 0;
1015        if(rss_is_sql_error(RSS_SQL_ERROR_DUPLICATE_ROW)) {
1016            list($tid)=rss_fetch_row(rss_query("select id from "
1017                                               .getTable('tag') . " where tag='$ttag'"));
1018        } else {
1019            $tid = rss_insert_id();
1020        }
1021        if ($tid) {
1022            rss_query( "insert into ". getTable('metatag')
1023                       . " (fid,tid,ttype,tdate) values ($fid,$tid,$type,now())" );
1024            if (rss_is_sql_error(RSS_SQL_ERROR_NO_ERROR)) {
1025                $ret[] = $ttag;
1026            }
1027        }
1028    }
1029
1030    rss_invalidate_cache();
1031
1032    sort($ret);
1033    return $ret;
1034}
1035
1036
1037
1038/**
1039 * this was taken straight from WordPress
1040 */
1041function utf8_uri_encode( $utf8_string ) {
1042    $unicode = '';
1043    $values = array();
1044    $num_octets = 1;
1045
1046    for ($i = 0; $i < strlen( $utf8_string ); $i++ ) {
1047
1048        $value = ord( $utf8_string[ $i ] );
1049
1050        if ( $value < 128 ) {
1051            $unicode .= chr($value);
1052        } else {
1053            if ( count( $values ) == 0 )
1054                $num_octets = ( $value < 224 ) ? 2 : 3;
1055
1056            $values[] = $value;
1057
1058            if ( count( $values ) == $num_octets ) {
1059                if ($num_octets == 3) {
1060                    $unicode .= '%' . dechex($values[0]) . '%' . dechex($values[1]) . '%' . dechex($values[2]);
1061                } else {
1062                    $unicode .= '%' . dechex($values[0]) . '%' . dechex($values[1]);
1063                }
1064
1065                $values = array();
1066                $num_octets = 1;
1067            }
1068        }
1069    }
1070
1071    return $unicode;
1072}
1073
1074/*
1075// Deprecated in favor of the new core.php functionalities
1076function ETagHandler($key) {
1077    // This function should be used inline for speed. However if you have already
1078    // included util.php you might as well use it.
1079    if (array_key_exists('HTTP_IF_NONE_MATCH',$_SERVER) &&
1080            $_SERVER['HTTP_IF_NONE_MATCH'] == $key) {
1081        header("HTTP/1.1 304 Not Modified");
1082        flush();
1083        exit();
1084    } else {
1085        header("ETag: $key");
1086        // ob_start('ob_gzhandler');
1087    }
1088}
1089*/
1090//these two eval_ functions taken from the comments at http://us3.php.net/eval
1091/*
1092function eval_mixed_helper($arr) {
1093    return ("echo stripslashes(\"".addslashes($arr[1])."\");");
1094}
1095
1096function eval_mixed($string) {
1097    $string = "<? ?>".$string."<? ?>";
1098    $string = preg_replace("/<\?=\s+(.*?)\s+\?>/", "<? echo $1; ?>", $string);
1099    $string = str_replace('?>', '', str_replace( array('<?php', '<?'), '', preg_replace_callback( "/\?>((.|\n)*?)<\?(php)?/","eval_mixed_helper",$string) ) );
1100    return $string;
1101}
1102*/
1103
1104function rss_svn_rev($prefix='.') {
1105    static $ret;
1106    if ($ret != NULL) {
1107        return $ret;
1108    }
1109    if (file_exists(GREGARIUS_HOME .'.svn/dir-wcprops')) {
1110        $raw=getUrl(GREGARIUS_HOME .'.svn/dir-wcprops');
1111        if ($raw && preg_match('#ver/([0-9]+)/#',$raw,$matches) && isset($matches[1])) {
1112            $ret = $prefix . $matches[1];
1113        }
1114    } else {
1115        $ret = "";
1116    }
1117    return $ret;
1118}
1119
1120function cacheFavicon($icon) {
1121    // Make sure only real favicons get fetched into the DB
1122    if (! preg_match('#^https?://.+$#',$icon)) {
1123        return false;
1124    }
1125
1126    $icon_ = rss_real_escape_string($icon);
1127    $binIcon = getUrl($icon);
1128    if ($binIcon) {
1129        $sql = "delete from " . getTable('cache')
1130               ." where cachetype='icon' and cachekey='$icon_'";
1131        rss_query($sql);
1132        $sql = "insert into ". getTable('cache')
1133               ."(cachekey,timestamp,cachetype,data) values "
1134               ."('$icon_',now(),'icon','".rss_real_escape_string($binIcon)."')";
1135        rss_query($sql);
1136        return rss_is_sql_error(RSS_SQL_ERROR_NO_ERROR);
1137    }
1138    return false;
1139}
1140
1141function sanitize($input, $rules = 0) {
1142    $ret = $input;
1143    if ($rules & RSS_SANITIZER_SIMPLE_SQL) {
1144        $ret = rss_real_escape_string($ret);
1145    }
1146    if ($rules & RSS_SANITIZER_NO_SPACES) {
1147        $ret = preg_replace('#\s#','',$ret);
1148        // also strip out SQL comments
1149        $ret = preg_replace('#/\*.*\*/#','',$ret);
1150    }
1151    if ($rules & RSS_SANITIZER_NUMERIC) {
1152        $ret = preg_replace('#[^0-9\.-]#','',$ret);
1153    }
1154    if ($rules & RSS_SANITIZER_CHARACTERS) {
1155        $ret = preg_replace('#[^a-zA-Z]#','',$ret);
1156    }
1157    if ($rules & RSS_SANITIZER_CHARACTERS_EXT) {
1158        $ret = preg_replace('#[^a-zA-Z_]#','',$ret);
1159    }
1160    if ($rules & RSS_SANITIZER_WORDS) {
1161        $ret = preg_replace('#[^a-zA-Z0-9\-\._]#','',$ret);
1162    }
1163    if ($rules & RSS_SANITIZER_URL) {
1164        // filter out "unsafe" characters: {,},|,\,^,<,>
1165        $ret = preg_replace('#[{}\|\\\^<>]#','',$ret);
1166    }
1167    return $ret;
1168}
1169
1170function hidePrivate() {
1171    $ret = 0;
1172
1173    if(isLoggedIn()) {
1174        $ret = rss_user_show_private();
1175    }
1176
1177    return !$ret;
1178}
1179
1180function isLoggedIn() {
1181    if (!isset($GLOBALS['rssuser'])) {
1182        require_once('cls/user.php');
1183    }
1184
1185    return rss_user_check_user_level(RSS_USER_LEVEL_PRIVATE);
1186}
1187
1188// Send a crappy 404 (to save bandwidth) for webbots
1189function rss_404() {
1190    header("HTTP/1.1 404 Not Found");
1191    echo "404 Page Not Found\n";
1192}
1193
1194function rss_uri($title, $sep=RSS_URI_SEPARATOR) {
1195    return utf8_uri_encode(preg_replace('#[ \#%&/\+\'"\?]#',$sep,$title));
1196}
1197?>
1198