1<?php
2
3/**
4 * HTML Purifier's internal representation of a URI.
5 * @note
6 *      Internal data-structures are completely escaped. If the data needs
7 *      to be used in a non-URI context (which is very unlikely), be sure
8 *      to decode it first. The URI may not necessarily be well-formed until
9 *      validate() is called.
10 */
11class HTMLPurifier_URI
12{
13    /**
14     * @type string
15     */
16    public $scheme;
17
18    /**
19     * @type string
20     */
21    public $userinfo;
22
23    /**
24     * @type string
25     */
26    public $host;
27
28    /**
29     * @type int
30     */
31    public $port;
32
33    /**
34     * @type string
35     */
36    public $path;
37
38    /**
39     * @type string
40     */
41    public $query;
42
43    /**
44     * @type string
45     */
46    public $fragment;
47
48    /**
49     * @param string $scheme
50     * @param string $userinfo
51     * @param string $host
52     * @param int $port
53     * @param string $path
54     * @param string $query
55     * @param string $fragment
56     * @note Automatically normalizes scheme and port
57     */
58    public function __construct($scheme, $userinfo, $host, $port, $path, $query, $fragment)
59    {
60        $this->scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme);
61        $this->userinfo = $userinfo;
62        $this->host = $host;
63        $this->port = is_null($port) ? $port : (int)$port;
64        $this->path = $path;
65        $this->query = $query;
66        $this->fragment = $fragment;
67    }
68
69    /**
70     * Retrieves a scheme object corresponding to the URI's scheme/default
71     * @param HTMLPurifier_Config $config
72     * @param HTMLPurifier_Context $context
73     * @return HTMLPurifier_URIScheme Scheme object appropriate for validating this URI
74     */
75    public function getSchemeObj($config, $context)
76    {
77        $registry = HTMLPurifier_URISchemeRegistry::instance();
78        if ($this->scheme !== null) {
79            $scheme_obj = $registry->getScheme($this->scheme, $config, $context);
80            if (!$scheme_obj) {
81                return false;
82            } // invalid scheme, clean it out
83        } else {
84            // no scheme: retrieve the default one
85            $def = $config->getDefinition('URI');
86            $scheme_obj = $def->getDefaultScheme($config, $context);
87            if (!$scheme_obj) {
88                if ($def->defaultScheme !== null) {
89                    // something funky happened to the default scheme object
90                    trigger_error(
91                        'Default scheme object "' . $def->defaultScheme . '" was not readable',
92                        E_USER_WARNING
93                    );
94                } // suppress error if it's null
95                return false;
96            }
97        }
98        return $scheme_obj;
99    }
100
101    /**
102     * Generic validation method applicable for all schemes. May modify
103     * this URI in order to get it into a compliant form.
104     * @param HTMLPurifier_Config $config
105     * @param HTMLPurifier_Context $context
106     * @return bool True if validation/filtering succeeds, false if failure
107     */
108    public function validate($config, $context)
109    {
110        // ABNF definitions from RFC 3986
111        $chars_sub_delims = '!$&\'()*+,;=';
112        $chars_gen_delims = ':/?#[]@';
113        $chars_pchar = $chars_sub_delims . ':@';
114
115        // validate host
116        if (!is_null($this->host)) {
117            $host_def = new HTMLPurifier_AttrDef_URI_Host();
118            $this->host = $host_def->validate($this->host, $config, $context);
119            if ($this->host === false) {
120                $this->host = null;
121            }
122        }
123
124        // validate scheme
125        // NOTE: It's not appropriate to check whether or not this
126        // scheme is in our registry, since a URIFilter may convert a
127        // URI that we don't allow into one we do.  So instead, we just
128        // check if the scheme can be dropped because there is no host
129        // and it is our default scheme.
130        if (!is_null($this->scheme) && is_null($this->host) || $this->host === '') {
131            // support for relative paths is pretty abysmal when the
132            // scheme is present, so axe it when possible
133            $def = $config->getDefinition('URI');
134            if ($def->defaultScheme === $this->scheme) {
135                $this->scheme = null;
136            }
137        }
138
139        // validate username
140        if (!is_null($this->userinfo)) {
141            $encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':');
142            $this->userinfo = $encoder->encode($this->userinfo);
143        }
144
145        // validate port
146        if (!is_null($this->port)) {
147            if ($this->port < 1 || $this->port > 65535) {
148                $this->port = null;
149            }
150        }
151
152        // validate path
153        $segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/');
154        if (!is_null($this->host)) { // this catches $this->host === ''
155            // path-abempty (hier and relative)
156            // http://www.example.com/my/path
157            // //www.example.com/my/path (looks odd, but works, and
158            //                            recognized by most browsers)
159            // (this set is valid or invalid on a scheme by scheme
160            // basis, so we'll deal with it later)
161            // file:///my/path
162            // ///my/path
163            $this->path = $segments_encoder->encode($this->path);
164        } elseif ($this->path !== '') {
165            if ($this->path[0] === '/') {
166                // path-absolute (hier and relative)
167                // http:/my/path
168                // /my/path
169                if (strlen($this->path) >= 2 && $this->path[1] === '/') {
170                    // This could happen if both the host gets stripped
171                    // out
172                    // http://my/path
173                    // //my/path
174                    $this->path = '';
175                } else {
176                    $this->path = $segments_encoder->encode($this->path);
177                }
178            } elseif (!is_null($this->scheme)) {
179                // path-rootless (hier)
180                // http:my/path
181                // Short circuit evaluation means we don't need to check nz
182                $this->path = $segments_encoder->encode($this->path);
183            } else {
184                // path-noscheme (relative)
185                // my/path
186                // (once again, not checking nz)
187                $segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@');
188                $c = strpos($this->path, '/');
189                if ($c !== false) {
190                    $this->path =
191                        $segment_nc_encoder->encode(substr($this->path, 0, $c)) .
192                        $segments_encoder->encode(substr($this->path, $c));
193                } else {
194                    $this->path = $segment_nc_encoder->encode($this->path);
195                }
196            }
197        } else {
198            // path-empty (hier and relative)
199            $this->path = ''; // just to be safe
200        }
201
202        // qf = query and fragment
203        $qf_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/?');
204
205        if (!is_null($this->query)) {
206            $this->query = $qf_encoder->encode($this->query);
207        }
208
209        if (!is_null($this->fragment)) {
210            $this->fragment = $qf_encoder->encode($this->fragment);
211        }
212        return true;
213    }
214
215    /**
216     * Convert URI back to string
217     * @return string URI appropriate for output
218     */
219    public function toString()
220    {
221        // reconstruct authority
222        $authority = null;
223        // there is a rendering difference between a null authority
224        // (http:foo-bar) and an empty string authority
225        // (http:///foo-bar).
226        if (!is_null($this->host)) {
227            $authority = '';
228            if (!is_null($this->userinfo)) {
229                $authority .= $this->userinfo . '@';
230            }
231            $authority .= $this->host;
232            if (!is_null($this->port)) {
233                $authority .= ':' . $this->port;
234            }
235        }
236
237        // Reconstruct the result
238        // One might wonder about parsing quirks from browsers after
239        // this reconstruction.  Unfortunately, parsing behavior depends
240        // on what *scheme* was employed (file:///foo is handled *very*
241        // differently than http:///foo), so unfortunately we have to
242        // defer to the schemes to do the right thing.
243        $result = '';
244        if (!is_null($this->scheme)) {
245            $result .= $this->scheme . ':';
246        }
247        if (!is_null($authority)) {
248            $result .= '//' . $authority;
249        }
250        $result .= $this->path;
251        if (!is_null($this->query)) {
252            $result .= '?' . $this->query;
253        }
254        if (!is_null($this->fragment)) {
255            $result .= '#' . $this->fragment;
256        }
257
258        return $result;
259    }
260
261    /**
262     * Returns true if this URL might be considered a 'local' URL given
263     * the current context.  This is true when the host is null, or
264     * when it matches the host supplied to the configuration.
265     *
266     * Note that this does not do any scheme checking, so it is mostly
267     * only appropriate for metadata that doesn't care about protocol
268     * security.  isBenign is probably what you actually want.
269     * @param HTMLPurifier_Config $config
270     * @param HTMLPurifier_Context $context
271     * @return bool
272     */
273    public function isLocal($config, $context)
274    {
275        if ($this->host === null) {
276            return true;
277        }
278        $uri_def = $config->getDefinition('URI');
279        if ($uri_def->host === $this->host) {
280            return true;
281        }
282        return false;
283    }
284
285    /**
286     * Returns true if this URL should be considered a 'benign' URL,
287     * that is:
288     *
289     *      - It is a local URL (isLocal), and
290     *      - It has a equal or better level of security
291     * @param HTMLPurifier_Config $config
292     * @param HTMLPurifier_Context $context
293     * @return bool
294     */
295    public function isBenign($config, $context)
296    {
297        if (!$this->isLocal($config, $context)) {
298            return false;
299        }
300
301        $scheme_obj = $this->getSchemeObj($config, $context);
302        if (!$scheme_obj) {
303            return false;
304        } // conservative approach
305
306        $current_scheme_obj = $config->getDefinition('URI')->getDefaultScheme($config, $context);
307        if ($current_scheme_obj->secure) {
308            if (!$scheme_obj->secure) {
309                return false;
310            }
311        }
312        return true;
313    }
314}
315
316// vim: et sw=4 sts=4
317