1<?php
2
3/**
4 * Class that handles operations involving percent-encoding in URIs.
5 *
6 * @warning
7 *      Be careful when reusing instances of PercentEncoder. The object
8 *      you use for normalize() SHOULD NOT be used for encode(), or
9 *      vice-versa.
10 */
11class HTMLPurifier_PercentEncoder
12{
13
14    /**
15     * Reserved characters to preserve when using encode().
16     * @type array
17     */
18    protected $preserve = array();
19
20    /**
21     * String of characters that should be preserved while using encode().
22     * @param bool $preserve
23     */
24    public function __construct($preserve = false)
25    {
26        // unreserved letters, ought to const-ify
27        for ($i = 48; $i <= 57; $i++) { // digits
28            $this->preserve[$i] = true;
29        }
30        for ($i = 65; $i <= 90; $i++) { // upper-case
31            $this->preserve[$i] = true;
32        }
33        for ($i = 97; $i <= 122; $i++) { // lower-case
34            $this->preserve[$i] = true;
35        }
36        $this->preserve[45] = true; // Dash         -
37        $this->preserve[46] = true; // Period       .
38        $this->preserve[95] = true; // Underscore   _
39        $this->preserve[126]= true; // Tilde        ~
40
41        // extra letters not to escape
42        if ($preserve !== false) {
43            for ($i = 0, $c = strlen($preserve); $i < $c; $i++) {
44                $this->preserve[ord($preserve[$i])] = true;
45            }
46        }
47    }
48
49    /**
50     * Our replacement for urlencode, it encodes all non-reserved characters,
51     * as well as any extra characters that were instructed to be preserved.
52     * @note
53     *      Assumes that the string has already been normalized, making any
54     *      and all percent escape sequences valid. Percents will not be
55     *      re-escaped, regardless of their status in $preserve
56     * @param string $string String to be encoded
57     * @return string Encoded string.
58     */
59    public function encode($string)
60    {
61        $ret = '';
62        for ($i = 0, $c = strlen($string); $i < $c; $i++) {
63            if ($string[$i] !== '%' && !isset($this->preserve[$int = ord($string[$i])])) {
64                $ret .= '%' . sprintf('%02X', $int);
65            } else {
66                $ret .= $string[$i];
67            }
68        }
69        return $ret;
70    }
71
72    /**
73     * Fix up percent-encoding by decoding unreserved characters and normalizing.
74     * @warning This function is affected by $preserve, even though the
75     *          usual desired behavior is for this not to preserve those
76     *          characters. Be careful when reusing instances of PercentEncoder!
77     * @param string $string String to normalize
78     * @return string
79     */
80    public function normalize($string)
81    {
82        if ($string == '') {
83            return '';
84        }
85        $parts = explode('%', $string);
86        $ret = array_shift($parts);
87        foreach ($parts as $part) {
88            $length = strlen($part);
89            if ($length < 2) {
90                $ret .= '%25' . $part;
91                continue;
92            }
93            $encoding = substr($part, 0, 2);
94            $text     = substr($part, 2);
95            if (!ctype_xdigit($encoding)) {
96                $ret .= '%25' . $part;
97                continue;
98            }
99            $int = hexdec($encoding);
100            if (isset($this->preserve[$int])) {
101                $ret .= chr($int) . $text;
102                continue;
103            }
104            $encoding = strtoupper($encoding);
105            $ret .= '%' . $encoding . $text;
106        }
107        return $ret;
108    }
109}
110
111// vim: et sw=4 sts=4
112