1<?php
2
3/**
4 * Base class for all validating attribute definitions.
5 *
6 * This family of classes forms the core for not only HTML attribute validation,
7 * but also any sort of string that needs to be validated or cleaned (which
8 * means CSS properties and composite definitions are defined here too).
9 * Besides defining (through code) what precisely makes the string valid,
10 * subclasses are also responsible for cleaning the code if possible.
11 */
12
13abstract class HTMLPurifier_AttrDef
14{
15
16    /**
17     * Tells us whether or not an HTML attribute is minimized.
18     * Has no meaning in other contexts.
19     * @type bool
20     */
21    public $minimized = false;
22
23    /**
24     * Tells us whether or not an HTML attribute is required.
25     * Has no meaning in other contexts
26     * @type bool
27     */
28    public $required = false;
29
30    /**
31     * Validates and cleans passed string according to a definition.
32     *
33     * @param string $string String to be validated and cleaned.
34     * @param HTMLPurifier_Config $config Mandatory HTMLPurifier_Config object.
35     * @param HTMLPurifier_Context $context Mandatory HTMLPurifier_Context object.
36     */
37    abstract public function validate($string, $config, $context);
38
39    /**
40     * Convenience method that parses a string as if it were CDATA.
41     *
42     * This method process a string in the manner specified at
43     * <http://www.w3.org/TR/html4/types.html#h-6.2> by removing
44     * leading and trailing whitespace, ignoring line feeds, and replacing
45     * carriage returns and tabs with spaces.  While most useful for HTML
46     * attributes specified as CDATA, it can also be applied to most CSS
47     * values.
48     *
49     * @note This method is not entirely standards compliant, as trim() removes
50     *       more types of whitespace than specified in the spec. In practice,
51     *       this is rarely a problem, as those extra characters usually have
52     *       already been removed by HTMLPurifier_Encoder.
53     *
54     * @warning This processing is inconsistent with XML's whitespace handling
55     *          as specified by section 3.3.3 and referenced XHTML 1.0 section
56     *          4.7.  However, note that we are NOT necessarily
57     *          parsing XML, thus, this behavior may still be correct. We
58     *          assume that newlines have been normalized.
59     */
60    public function parseCDATA($string)
61    {
62        $string = trim($string);
63        $string = str_replace(array("\n", "\t", "\r"), ' ', $string);
64        return $string;
65    }
66
67    /**
68     * Factory method for creating this class from a string.
69     * @param string $string String construction info
70     * @return HTMLPurifier_AttrDef Created AttrDef object corresponding to $string
71     */
72    public function make($string)
73    {
74        // default implementation, return a flyweight of this object.
75        // If $string has an effect on the returned object (i.e. you
76        // need to overload this method), it is best
77        // to clone or instantiate new copies. (Instantiation is safer.)
78        return $this;
79    }
80
81    /**
82     * Removes spaces from rgb(0, 0, 0) so that shorthand CSS properties work
83     * properly. THIS IS A HACK!
84     * @param string $string a CSS colour definition
85     * @return string
86     */
87    protected function mungeRgb($string)
88    {
89        $p = '\s*(\d+(\.\d+)?([%]?))\s*';
90
91        if (preg_match('/(rgba|hsla)\(/', $string)) {
92            return preg_replace('/(rgba|hsla)\('.$p.','.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8,\11)', $string);
93        }
94
95        return preg_replace('/(rgb|hsl)\('.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8)', $string);
96    }
97
98    /**
99     * Parses a possibly escaped CSS string and returns the "pure"
100     * version of it.
101     */
102    protected function expandCSSEscape($string)
103    {
104        // flexibly parse it
105        $ret = '';
106        for ($i = 0, $c = strlen($string); $i < $c; $i++) {
107            if ($string[$i] === '\\') {
108                $i++;
109                if ($i >= $c) {
110                    $ret .= '\\';
111                    break;
112                }
113                if (ctype_xdigit($string[$i])) {
114                    $code = $string[$i];
115                    for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
116                        if (!ctype_xdigit($string[$i])) {
117                            break;
118                        }
119                        $code .= $string[$i];
120                    }
121                    // We have to be extremely careful when adding
122                    // new characters, to make sure we're not breaking
123                    // the encoding.
124                    $char = HTMLPurifier_Encoder::unichr(hexdec($code));
125                    if (HTMLPurifier_Encoder::cleanUTF8($char) === '') {
126                        continue;
127                    }
128                    $ret .= $char;
129                    if ($i < $c && trim($string[$i]) !== '') {
130                        $i--;
131                    }
132                    continue;
133                }
134                if ($string[$i] === "\n") {
135                    continue;
136                }
137            }
138            $ret .= $string[$i];
139        }
140        return $ret;
141    }
142}
143
144// vim: et sw=4 sts=4
145