1<?php
2
3// does not support network paths
4
5class HTMLPurifier_URIFilter_MakeAbsolute extends HTMLPurifier_URIFilter
6{
7    /**
8     * @type string
9     */
10    public $name = 'MakeAbsolute';
11
12    /**
13     * @type
14     */
15    protected $base;
16
17    /**
18     * @type array
19     */
20    protected $basePathStack = array();
21
22    /**
23     * @param HTMLPurifier_Config $config
24     * @return bool
25     */
26    public function prepare($config)
27    {
28        $def = $config->getDefinition('URI');
29        $this->base = $def->base;
30        if (is_null($this->base)) {
31            trigger_error(
32                'URI.MakeAbsolute is being ignored due to lack of ' .
33                'value for URI.Base configuration',
34                E_USER_WARNING
35            );
36            return false;
37        }
38        $this->base->fragment = null; // fragment is invalid for base URI
39        $stack = explode('/', $this->base->path);
40        array_pop($stack); // discard last segment
41        $stack = $this->_collapseStack($stack); // do pre-parsing
42        $this->basePathStack = $stack;
43        return true;
44    }
45
46    /**
47     * @param HTMLPurifier_URI $uri
48     * @param HTMLPurifier_Config $config
49     * @param HTMLPurifier_Context $context
50     * @return bool
51     */
52    public function filter(&$uri, $config, $context)
53    {
54        if (is_null($this->base)) {
55            return true;
56        } // abort early
57        if ($uri->path === '' && is_null($uri->scheme) &&
58            is_null($uri->host) && is_null($uri->query) && is_null($uri->fragment)) {
59            // reference to current document
60            $uri = clone $this->base;
61            return true;
62        }
63        if (!is_null($uri->scheme)) {
64            // absolute URI already: don't change
65            if (!is_null($uri->host)) {
66                return true;
67            }
68            $scheme_obj = $uri->getSchemeObj($config, $context);
69            if (!$scheme_obj) {
70                // scheme not recognized
71                return false;
72            }
73            if (!$scheme_obj->hierarchical) {
74                // non-hierarchal URI with explicit scheme, don't change
75                return true;
76            }
77            // special case: had a scheme but always is hierarchical and had no authority
78        }
79        if (!is_null($uri->host)) {
80            // network path, don't bother
81            return true;
82        }
83        if ($uri->path === '') {
84            $uri->path = $this->base->path;
85        } elseif ($uri->path[0] !== '/') {
86            // relative path, needs more complicated processing
87            $stack = explode('/', $uri->path);
88            $new_stack = array_merge($this->basePathStack, $stack);
89            if ($new_stack[0] !== '' && !is_null($this->base->host)) {
90                array_unshift($new_stack, '');
91            }
92            $new_stack = $this->_collapseStack($new_stack);
93            $uri->path = implode('/', $new_stack);
94        } else {
95            // absolute path, but still we should collapse
96            $uri->path = implode('/', $this->_collapseStack(explode('/', $uri->path)));
97        }
98        // re-combine
99        $uri->scheme = $this->base->scheme;
100        if (is_null($uri->userinfo)) {
101            $uri->userinfo = $this->base->userinfo;
102        }
103        if (is_null($uri->host)) {
104            $uri->host = $this->base->host;
105        }
106        if (is_null($uri->port)) {
107            $uri->port = $this->base->port;
108        }
109        return true;
110    }
111
112    /**
113     * Resolve dots and double-dots in a path stack
114     * @param array $stack
115     * @return array
116     */
117    private function _collapseStack($stack)
118    {
119        $result = array();
120        $is_folder = false;
121        for ($i = 0; isset($stack[$i]); $i++) {
122            $is_folder = false;
123            // absorb an internally duplicated slash
124            if ($stack[$i] == '' && $i && isset($stack[$i + 1])) {
125                continue;
126            }
127            if ($stack[$i] == '..') {
128                if (!empty($result)) {
129                    $segment = array_pop($result);
130                    if ($segment === '' && empty($result)) {
131                        // error case: attempted to back out too far:
132                        // restore the leading slash
133                        $result[] = '';
134                    } elseif ($segment === '..') {
135                        $result[] = '..'; // cannot remove .. with ..
136                    }
137                } else {
138                    // relative path, preserve the double-dots
139                    $result[] = '..';
140                }
141                $is_folder = true;
142                continue;
143            }
144            if ($stack[$i] == '.') {
145                // silently absorb
146                $is_folder = true;
147                continue;
148            }
149            $result[] = $stack[$i];
150        }
151        if ($is_folder) {
152            $result[] = '';
153        }
154        return $result;
155    }
156}
157
158// vim: et sw=4 sts=4
159