1<?php 2 3/** 4 * Base class for all validating attribute definitions. 5 * 6 * This family of classes forms the core for not only HTML attribute validation, 7 * but also any sort of string that needs to be validated or cleaned (which 8 * means CSS properties and composite definitions are defined here too). 9 * Besides defining (through code) what precisely makes the string valid, 10 * subclasses are also responsible for cleaning the code if possible. 11 */ 12 13abstract class HTMLPurifier_AttrDef 14{ 15 16 /** 17 * Tells us whether or not an HTML attribute is minimized. 18 * Has no meaning in other contexts. 19 * @type bool 20 */ 21 public $minimized = false; 22 23 /** 24 * Tells us whether or not an HTML attribute is required. 25 * Has no meaning in other contexts 26 * @type bool 27 */ 28 public $required = false; 29 30 /** 31 * Validates and cleans passed string according to a definition. 32 * 33 * @param string $string String to be validated and cleaned. 34 * @param HTMLPurifier_Config $config Mandatory HTMLPurifier_Config object. 35 * @param HTMLPurifier_Context $context Mandatory HTMLPurifier_Context object. 36 */ 37 abstract public function validate($string, $config, $context); 38 39 /** 40 * Convenience method that parses a string as if it were CDATA. 41 * 42 * This method process a string in the manner specified at 43 * <http://www.w3.org/TR/html4/types.html#h-6.2> by removing 44 * leading and trailing whitespace, ignoring line feeds, and replacing 45 * carriage returns and tabs with spaces. While most useful for HTML 46 * attributes specified as CDATA, it can also be applied to most CSS 47 * values. 48 * 49 * @note This method is not entirely standards compliant, as trim() removes 50 * more types of whitespace than specified in the spec. In practice, 51 * this is rarely a problem, as those extra characters usually have 52 * already been removed by HTMLPurifier_Encoder. 53 * 54 * @warning This processing is inconsistent with XML's whitespace handling 55 * as specified by section 3.3.3 and referenced XHTML 1.0 section 56 * 4.7. However, note that we are NOT necessarily 57 * parsing XML, thus, this behavior may still be correct. We 58 * assume that newlines have been normalized. 59 */ 60 public function parseCDATA($string) 61 { 62 $string = trim($string); 63 $string = str_replace(array("\n", "\t", "\r"), ' ', $string); 64 return $string; 65 } 66 67 /** 68 * Factory method for creating this class from a string. 69 * @param string $string String construction info 70 * @return HTMLPurifier_AttrDef Created AttrDef object corresponding to $string 71 */ 72 public function make($string) 73 { 74 // default implementation, return a flyweight of this object. 75 // If $string has an effect on the returned object (i.e. you 76 // need to overload this method), it is best 77 // to clone or instantiate new copies. (Instantiation is safer.) 78 return $this; 79 } 80 81 /** 82 * Removes spaces from rgb(0, 0, 0) so that shorthand CSS properties work 83 * properly. THIS IS A HACK! 84 * @param string $string a CSS colour definition 85 * @return string 86 */ 87 protected function mungeRgb($string) 88 { 89 $p = '\s*(\d+(\.\d+)?([%]?))\s*'; 90 91 if (preg_match('/(rgba|hsla)\(/', $string)) { 92 return preg_replace('/(rgba|hsla)\('.$p.','.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8,\11)', $string); 93 } 94 95 return preg_replace('/(rgb|hsl)\('.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8)', $string); 96 } 97 98 /** 99 * Parses a possibly escaped CSS string and returns the "pure" 100 * version of it. 101 */ 102 protected function expandCSSEscape($string) 103 { 104 // flexibly parse it 105 $ret = ''; 106 for ($i = 0, $c = strlen($string); $i < $c; $i++) { 107 if ($string[$i] === '\\') { 108 $i++; 109 if ($i >= $c) { 110 $ret .= '\\'; 111 break; 112 } 113 if (ctype_xdigit($string[$i])) { 114 $code = $string[$i]; 115 for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) { 116 if (!ctype_xdigit($string[$i])) { 117 break; 118 } 119 $code .= $string[$i]; 120 } 121 // We have to be extremely careful when adding 122 // new characters, to make sure we're not breaking 123 // the encoding. 124 $char = HTMLPurifier_Encoder::unichr(hexdec($code)); 125 if (HTMLPurifier_Encoder::cleanUTF8($char) === '') { 126 continue; 127 } 128 $ret .= $char; 129 if ($i < $c && trim($string[$i]) !== '') { 130 $i--; 131 } 132 continue; 133 } 134 if ($string[$i] === "\n") { 135 continue; 136 } 137 } 138 $ret .= $string[$i]; 139 } 140 return $ret; 141 } 142} 143 144// vim: et sw=4 sts=4 145