1<?php
2/**
3 * The MetaData Renderer
4 *
5 * Metadata is additional information about a DokuWiki page that gets extracted mainly from the page's content
6 * but also it's own filesystem data (like the creation time). All metadata is stored in the fields $meta and
7 * $persistent.
8 *
9 * Some simplified rendering to $doc is done to gather the page's (text-only) abstract.
10 *
11 * @author Esther Brunner <wikidesign@gmail.com>
12 */
13class Doku_Renderer_metadata extends Doku_Renderer
14{
15    /** the approximate byte lenght to capture for the abstract */
16    const ABSTRACT_LEN = 250;
17
18    /** the maximum UTF8 character length for the abstract */
19    const ABSTRACT_MAX = 500;
20
21    /** @var array transient meta data, will be reset on each rendering */
22    public $meta = array();
23
24    /** @var array persistent meta data, will be kept until explicitly deleted */
25    public $persistent = array();
26
27    /** @var array the list of headers used to create unique link ids */
28    protected $headers = array();
29
30    /** @var string temporary $doc store */
31    protected $store = '';
32
33    /** @var string keeps the first image reference */
34    protected $firstimage = '';
35
36    /** @var bool whether or not data is being captured for the abstract, public to be accessible by plugins */
37    public $capturing = true;
38
39    /** @var bool determines if enough data for the abstract was collected, yet */
40    public $capture = true;
41
42    /** @var int number of bytes captured for abstract */
43    protected $captured = 0;
44
45    /**
46     * Returns the format produced by this renderer.
47     *
48     * @return string always 'metadata'
49     */
50    public function getFormat()
51    {
52        return 'metadata';
53    }
54
55    /**
56     * Initialize the document
57     *
58     * Sets up some of the persistent info about the page if it doesn't exist, yet.
59     */
60    public function document_start()
61    {
62        global $ID;
63
64        $this->headers = array();
65
66        // external pages are missing create date
67        if (!isset($this->persistent['date']['created']) || !$this->persistent['date']['created']) {
68            $this->persistent['date']['created'] = filectime(wikiFN($ID));
69        }
70        if (!isset($this->persistent['user'])) {
71            $this->persistent['user'] = '';
72        }
73        if (!isset($this->persistent['creator'])) {
74            $this->persistent['creator'] = '';
75        }
76        // reset metadata to persistent values
77        $this->meta = $this->persistent;
78    }
79
80    /**
81     * Finalize the document
82     *
83     * Stores collected data in the metadata
84     */
85    public function document_end()
86    {
87        global $ID;
88
89        // store internal info in metadata (notoc,nocache)
90        $this->meta['internal'] = $this->info;
91
92        if (!isset($this->meta['description']['abstract'])) {
93            // cut off too long abstracts
94            $this->doc = trim($this->doc);
95            if (strlen($this->doc) > self::ABSTRACT_MAX) {
96                $this->doc = \dokuwiki\Utf8\PhpString::substr($this->doc, 0, self::ABSTRACT_MAX).'…';
97            }
98            $this->meta['description']['abstract'] = $this->doc;
99        }
100
101        $this->meta['relation']['firstimage'] = $this->firstimage;
102
103        if (!isset($this->meta['date']['modified'])) {
104            $this->meta['date']['modified'] = filemtime(wikiFN($ID));
105        }
106    }
107
108    /**
109     * Render plain text data
110     *
111     * This function takes care of the amount captured data and will stop capturing when
112     * enough abstract data is available
113     *
114     * @param $text
115     */
116    public function cdata($text)
117    {
118        if (!$this->capture || !$this->capturing) {
119            return;
120        }
121
122        $this->doc .= $text;
123
124        $this->captured += strlen($text);
125        if ($this->captured > self::ABSTRACT_LEN) {
126            $this->capture = false;
127        }
128    }
129
130    /**
131     * Add an item to the TOC
132     *
133     * @param string $id       the hash link
134     * @param string $text     the text to display
135     * @param int    $level    the nesting level
136     */
137    public function toc_additem($id, $text, $level)
138    {
139        global $conf;
140
141        //only add items within configured levels
142        if ($level >= $conf['toptoclevel'] && $level <= $conf['maxtoclevel']) {
143            // the TOC is one of our standard ul list arrays ;-)
144            $this->meta['description']['tableofcontents'][] = array(
145                'hid'   => $id,
146                'title' => $text,
147                'type'  => 'ul',
148                'level' => $level - $conf['toptoclevel'] + 1
149            );
150        }
151    }
152
153    /**
154     * Render a heading
155     *
156     * @param string $text  the text to display
157     * @param int    $level header level
158     * @param int    $pos   byte position in the original source
159     */
160    public function header($text, $level, $pos)
161    {
162        if (!isset($this->meta['title'])) {
163            $this->meta['title'] = $text;
164        }
165
166        // add the header to the TOC
167        $hid = $this->_headerToLink($text, true);
168        $this->toc_additem($hid, $text, $level);
169
170        // add to summary
171        $this->cdata(DOKU_LF.$text.DOKU_LF);
172    }
173
174    /**
175     * Open a paragraph
176     */
177    public function p_open()
178    {
179        $this->cdata(DOKU_LF);
180    }
181
182    /**
183     * Close a paragraph
184     */
185    public function p_close()
186    {
187        $this->cdata(DOKU_LF);
188    }
189
190    /**
191     * Create a line break
192     */
193    public function linebreak()
194    {
195        $this->cdata(DOKU_LF);
196    }
197
198    /**
199     * Create a horizontal line
200     */
201    public function hr()
202    {
203        $this->cdata(DOKU_LF.'----------'.DOKU_LF);
204    }
205
206    /**
207     * Callback for footnote start syntax
208     *
209     * All following content will go to the footnote instead of
210     * the document. To achieve this the previous rendered content
211     * is moved to $store and $doc is cleared
212     *
213     * @author Andreas Gohr <andi@splitbrain.org>
214     */
215    public function footnote_open()
216    {
217        if ($this->capture) {
218            // move current content to store
219            // this is required to ensure safe behaviour of plugins accessed within footnotes
220            $this->store = $this->doc;
221            $this->doc   = '';
222
223            // disable capturing
224            $this->capturing = false;
225        }
226    }
227
228    /**
229     * Callback for footnote end syntax
230     *
231     * All content rendered whilst within footnote syntax mode is discarded,
232     * the previously rendered content is restored and capturing is re-enabled.
233     *
234     * @author Andreas Gohr
235     */
236    public function footnote_close()
237    {
238        if ($this->capture) {
239            // re-enable capturing
240            $this->capturing = true;
241            // restore previously rendered content
242            $this->doc   = $this->store;
243            $this->store = '';
244        }
245    }
246
247    /**
248     * Open an unordered list
249     */
250    public function listu_open()
251    {
252        $this->cdata(DOKU_LF);
253    }
254
255    /**
256     * Open an ordered list
257     */
258    public function listo_open()
259    {
260        $this->cdata(DOKU_LF);
261    }
262
263    /**
264     * Open a list item
265     *
266     * @param int $level the nesting level
267     * @param bool $node true when a node; false when a leaf
268     */
269    public function listitem_open($level, $node=false)
270    {
271        $this->cdata(str_repeat(DOKU_TAB, $level).'* ');
272    }
273
274    /**
275     * Close a list item
276     */
277    public function listitem_close()
278    {
279        $this->cdata(DOKU_LF);
280    }
281
282    /**
283     * Output preformatted text
284     *
285     * @param string $text
286     */
287    public function preformatted($text)
288    {
289        $this->cdata($text);
290    }
291
292    /**
293     * Start a block quote
294     */
295    public function quote_open()
296    {
297        $this->cdata(DOKU_LF.DOKU_TAB.'"');
298    }
299
300    /**
301     * Stop a block quote
302     */
303    public function quote_close()
304    {
305        $this->cdata('"'.DOKU_LF);
306    }
307
308    /**
309     * Display text as file content, optionally syntax highlighted
310     *
311     * @param string $text text to show
312     * @param string $lang programming language to use for syntax highlighting
313     * @param string $file file path label
314     */
315    public function file($text, $lang = null, $file = null)
316    {
317        $this->cdata(DOKU_LF.$text.DOKU_LF);
318    }
319
320    /**
321     * Display text as code content, optionally syntax highlighted
322     *
323     * @param string $text     text to show
324     * @param string $language programming language to use for syntax highlighting
325     * @param string $file     file path label
326     */
327    public function code($text, $language = null, $file = null)
328    {
329        $this->cdata(DOKU_LF.$text.DOKU_LF);
330    }
331
332    /**
333     * Format an acronym
334     *
335     * Uses $this->acronyms
336     *
337     * @param string $acronym
338     */
339    public function acronym($acronym)
340    {
341        $this->cdata($acronym);
342    }
343
344    /**
345     * Format a smiley
346     *
347     * Uses $this->smiley
348     *
349     * @param string $smiley
350     */
351    public function smiley($smiley)
352    {
353        $this->cdata($smiley);
354    }
355
356    /**
357     * Format an entity
358     *
359     * Entities are basically small text replacements
360     *
361     * Uses $this->entities
362     *
363     * @param string $entity
364     */
365    public function entity($entity)
366    {
367        $this->cdata($entity);
368    }
369
370    /**
371     * Typographically format a multiply sign
372     *
373     * Example: ($x=640, $y=480) should result in "640×480"
374     *
375     * @param string|int $x first value
376     * @param string|int $y second value
377     */
378    public function multiplyentity($x, $y)
379    {
380        $this->cdata($x.'×'.$y);
381    }
382
383    /**
384     * Render an opening single quote char (language specific)
385     */
386    public function singlequoteopening()
387    {
388        global $lang;
389        $this->cdata($lang['singlequoteopening']);
390    }
391
392    /**
393     * Render a closing single quote char (language specific)
394     */
395    public function singlequoteclosing()
396    {
397        global $lang;
398        $this->cdata($lang['singlequoteclosing']);
399    }
400
401    /**
402     * Render an apostrophe char (language specific)
403     */
404    public function apostrophe()
405    {
406        global $lang;
407        $this->cdata($lang['apostrophe']);
408    }
409
410    /**
411     * Render an opening double quote char (language specific)
412     */
413    public function doublequoteopening()
414    {
415        global $lang;
416        $this->cdata($lang['doublequoteopening']);
417    }
418
419    /**
420     * Render an closinging double quote char (language specific)
421     */
422    public function doublequoteclosing()
423    {
424        global $lang;
425        $this->cdata($lang['doublequoteclosing']);
426    }
427
428    /**
429     * Render a CamelCase link
430     *
431     * @param string $link The link name
432     * @see http://en.wikipedia.org/wiki/CamelCase
433     */
434    public function camelcaselink($link)
435    {
436        $this->internallink($link, $link);
437    }
438
439    /**
440     * Render a page local link
441     *
442     * @param string $hash hash link identifier
443     * @param string $name name for the link
444     */
445    public function locallink($hash, $name = null)
446    {
447        if (is_array($name)) {
448            $this->_firstimage($name['src']);
449            if ($name['type'] == 'internalmedia') {
450                $this->_recordMediaUsage($name['src']);
451            }
452        }
453    }
454
455    /**
456     * keep track of internal links in $this->meta['relation']['references']
457     *
458     * @param string            $id   page ID to link to. eg. 'wiki:syntax'
459     * @param string|array|null $name name for the link, array for media file
460     */
461    public function internallink($id, $name = null)
462    {
463        global $ID;
464
465        if (is_array($name)) {
466            $this->_firstimage($name['src']);
467            if ($name['type'] == 'internalmedia') {
468                $this->_recordMediaUsage($name['src']);
469            }
470        }
471
472        $parts = explode('?', $id, 2);
473        if (count($parts) === 2) {
474            $id = $parts[0];
475        }
476
477        $default = $this->_simpleTitle($id);
478
479        // first resolve and clean up the $id
480        resolve_pageid(getNS($ID), $id, $exists);
481        @list($page) = explode('#', $id, 2);
482
483        // set metadata
484        $this->meta['relation']['references'][$page] = $exists;
485        // $data = array('relation' => array('isreferencedby' => array($ID => true)));
486        // p_set_metadata($id, $data);
487
488        // add link title to summary
489        if ($this->capture) {
490            $name = $this->_getLinkTitle($name, $default, $id);
491            $this->doc .= $name;
492        }
493    }
494
495    /**
496     * Render an external link
497     *
498     * @param string            $url  full URL with scheme
499     * @param string|array|null $name name for the link, array for media file
500     */
501    public function externallink($url, $name = null)
502    {
503        if (is_array($name)) {
504            $this->_firstimage($name['src']);
505            if ($name['type'] == 'internalmedia') {
506                $this->_recordMediaUsage($name['src']);
507            }
508        }
509
510        if ($this->capture) {
511            $this->doc .= $this->_getLinkTitle($name, '<'.$url.'>');
512        }
513    }
514
515    /**
516     * Render an interwiki link
517     *
518     * You may want to use $this->_resolveInterWiki() here
519     *
520     * @param string       $match     original link - probably not much use
521     * @param string|array $name      name for the link, array for media file
522     * @param string       $wikiName  indentifier (shortcut) for the remote wiki
523     * @param string       $wikiUri   the fragment parsed from the original link
524     */
525    public function interwikilink($match, $name, $wikiName, $wikiUri)
526    {
527        if (is_array($name)) {
528            $this->_firstimage($name['src']);
529            if ($name['type'] == 'internalmedia') {
530                $this->_recordMediaUsage($name['src']);
531            }
532        }
533
534        if ($this->capture) {
535            list($wikiUri) = explode('#', $wikiUri, 2);
536            $name = $this->_getLinkTitle($name, $wikiUri);
537            $this->doc .= $name;
538        }
539    }
540
541    /**
542     * Link to windows share
543     *
544     * @param string       $url  the link
545     * @param string|array $name name for the link, array for media file
546     */
547    public function windowssharelink($url, $name = null)
548    {
549        if (is_array($name)) {
550            $this->_firstimage($name['src']);
551            if ($name['type'] == 'internalmedia') {
552                $this->_recordMediaUsage($name['src']);
553            }
554        }
555
556        if ($this->capture) {
557            if ($name) {
558                $this->doc .= $name;
559            } else {
560                $this->doc .= '<'.$url.'>';
561            }
562        }
563    }
564
565    /**
566     * Render a linked E-Mail Address
567     *
568     * Should honor $conf['mailguard'] setting
569     *
570     * @param string       $address Email-Address
571     * @param string|array $name    name for the link, array for media file
572     */
573    public function emaillink($address, $name = null)
574    {
575        if (is_array($name)) {
576            $this->_firstimage($name['src']);
577            if ($name['type'] == 'internalmedia') {
578                $this->_recordMediaUsage($name['src']);
579            }
580        }
581
582        if ($this->capture) {
583            if ($name) {
584                $this->doc .= $name;
585            } else {
586                $this->doc .= '<'.$address.'>';
587            }
588        }
589    }
590
591    /**
592     * Render an internal media file
593     *
594     * @param string $src     media ID
595     * @param string $title   descriptive text
596     * @param string $align   left|center|right
597     * @param int    $width   width of media in pixel
598     * @param int    $height  height of media in pixel
599     * @param string $cache   cache|recache|nocache
600     * @param string $linking linkonly|detail|nolink
601     */
602    public function internalmedia($src, $title = null, $align = null, $width = null,
603                           $height = null, $cache = null, $linking = null)
604    {
605        if ($this->capture && $title) {
606            $this->doc .= '['.$title.']';
607        }
608        $this->_firstimage($src);
609        $this->_recordMediaUsage($src);
610    }
611
612    /**
613     * Render an external media file
614     *
615     * @param string $src     full media URL
616     * @param string $title   descriptive text
617     * @param string $align   left|center|right
618     * @param int    $width   width of media in pixel
619     * @param int    $height  height of media in pixel
620     * @param string $cache   cache|recache|nocache
621     * @param string $linking linkonly|detail|nolink
622     */
623    public function externalmedia($src, $title = null, $align = null, $width = null,
624                           $height = null, $cache = null, $linking = null)
625    {
626        if ($this->capture && $title) {
627            $this->doc .= '['.$title.']';
628        }
629        $this->_firstimage($src);
630    }
631
632    /**
633     * Render the output of an RSS feed
634     *
635     * @param string $url    URL of the feed
636     * @param array  $params Finetuning of the output
637     */
638    public function rss($url, $params)
639    {
640        $this->meta['relation']['haspart'][$url] = true;
641
642        $this->meta['date']['valid']['age'] =
643            isset($this->meta['date']['valid']['age']) ?
644                min($this->meta['date']['valid']['age'], $params['refresh']) :
645                $params['refresh'];
646    }
647
648    #region Utils
649
650    /**
651     * Removes any Namespace from the given name but keeps
652     * casing and special chars
653     *
654     * @author Andreas Gohr <andi@splitbrain.org>
655     *
656     * @param string $name
657     *
658     * @return mixed|string
659     */
660    public function _simpleTitle($name)
661    {
662        global $conf;
663
664        if (is_array($name)) {
665            return '';
666        }
667
668        if ($conf['useslash']) {
669            $nssep = '[:;/]';
670        } else {
671            $nssep = '[:;]';
672        }
673        $name = preg_replace('!.*'.$nssep.'!', '', $name);
674        //if there is a hash we use the anchor name only
675        $name = preg_replace('!.*#!', '', $name);
676        return $name;
677    }
678
679    /**
680     * Construct a title and handle images in titles
681     *
682     * @author Harry Fuecks <hfuecks@gmail.com>
683     * @param string|array|null $title    either string title or media array
684     * @param string            $default  default title if nothing else is found
685     * @param null|string       $id       linked page id (used to extract title from first heading)
686     * @return string title text
687     */
688    public function _getLinkTitle($title, $default, $id = null)
689    {
690        if (is_array($title)) {
691            if ($title['title']) {
692                return '['.$title['title'].']';
693            } else {
694                return $default;
695            }
696        } elseif (is_null($title) || trim($title) == '') {
697            if (useHeading('content') && $id) {
698                $heading = p_get_first_heading($id, METADATA_DONT_RENDER);
699                if ($heading) {
700                    return $heading;
701                }
702            }
703            return $default;
704        } else {
705            return $title;
706        }
707    }
708
709    /**
710     * Remember first image
711     *
712     * @param string $src image URL or ID
713     */
714    protected function _firstimage($src)
715    {
716        global $ID;
717
718        if ($this->firstimage) {
719            return;
720        }
721
722        list($src) = explode('#', $src, 2);
723        if (!media_isexternal($src)) {
724            resolve_mediaid(getNS($ID), $src, $exists);
725        }
726        if (preg_match('/.(jpe?g|gif|png)$/i', $src)) {
727            $this->firstimage = $src;
728        }
729    }
730
731    /**
732     * Store list of used media files in metadata
733     *
734     * @param string $src media ID
735     */
736    protected function _recordMediaUsage($src)
737    {
738        global $ID;
739
740        list ($src) = explode('#', $src, 2);
741        if (media_isexternal($src)) {
742            return;
743        }
744        resolve_mediaid(getNS($ID), $src, $exists);
745        $this->meta['relation']['media'][$src] = $exists;
746    }
747
748    #endregion
749}
750
751//Setup VIM: ex: et ts=4 :
752