1<?php
2
3/**
4 * @file
5 * This file was auto-generated by generate-includes.php and includes all of
6 * the core files required by HTML Purifier. Use this if performance is a
7 * primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS
8 * FILE, changes will be overwritten the next time the script is run.
9 *
10 * @version 4.10.0
11 *
12 * @warning
13 *      You must *not* include any other HTML Purifier files before this file,
14 *      because 'require' not 'require_once' is used.
15 *
16 * @warning
17 *      This file requires that the include path contains the HTML Purifier
18 *      library directory; this is not auto-set.
19 */
20
21
22
23/*! @mainpage
24 *
25 * HTML Purifier is an HTML filter that will take an arbitrary snippet of
26 * HTML and rigorously test, validate and filter it into a version that
27 * is safe for output onto webpages. It achieves this by:
28 *
29 *  -# Lexing (parsing into tokens) the document,
30 *  -# Executing various strategies on the tokens:
31 *      -# Removing all elements not in the whitelist,
32 *      -# Making the tokens well-formed,
33 *      -# Fixing the nesting of the nodes, and
34 *      -# Validating attributes of the nodes; and
35 *  -# Generating HTML from the purified tokens.
36 *
37 * However, most users will only need to interface with the HTMLPurifier
38 * and HTMLPurifier_Config.
39 */
40
41/*
42    HTML Purifier 4.10.0 - Standards Compliant HTML Filtering
43    Copyright (C) 2006-2008 Edward Z. Yang
44
45    This library is free software; you can redistribute it and/or
46    modify it under the terms of the GNU Lesser General Public
47    License as published by the Free Software Foundation; either
48    version 2.1 of the License, or (at your option) any later version.
49
50    This library is distributed in the hope that it will be useful,
51    but WITHOUT ANY WARRANTY; without even the implied warranty of
52    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
53    Lesser General Public License for more details.
54
55    You should have received a copy of the GNU Lesser General Public
56    License along with this library; if not, write to the Free Software
57    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
58 */
59
60/**
61 * Facade that coordinates HTML Purifier's subsystems in order to purify HTML.
62 *
63 * @note There are several points in which configuration can be specified
64 *       for HTML Purifier.  The precedence of these (from lowest to
65 *       highest) is as follows:
66 *          -# Instance: new HTMLPurifier($config)
67 *          -# Invocation: purify($html, $config)
68 *       These configurations are entirely independent of each other and
69 *       are *not* merged (this behavior may change in the future).
70 *
71 * @todo We need an easier way to inject strategies using the configuration
72 *       object.
73 */
74class HTMLPurifier
75{
76
77    /**
78     * Version of HTML Purifier.
79     * @type string
80     */
81    public $version = '4.10.0';
82
83    /**
84     * Constant with version of HTML Purifier.
85     */
86    const VERSION = '4.10.0';
87
88    /**
89     * Global configuration object.
90     * @type HTMLPurifier_Config
91     */
92    public $config;
93
94    /**
95     * Array of extra filter objects to run on HTML,
96     * for backwards compatibility.
97     * @type HTMLPurifier_Filter[]
98     */
99    private $filters = array();
100
101    /**
102     * Single instance of HTML Purifier.
103     * @type HTMLPurifier
104     */
105    private static $instance;
106
107    /**
108     * @type HTMLPurifier_Strategy_Core
109     */
110    protected $strategy;
111
112    /**
113     * @type HTMLPurifier_Generator
114     */
115    protected $generator;
116
117    /**
118     * Resultant context of last run purification.
119     * Is an array of contexts if the last called method was purifyArray().
120     * @type HTMLPurifier_Context
121     */
122    public $context;
123
124    /**
125     * Initializes the purifier.
126     *
127     * @param HTMLPurifier_Config|mixed $config Optional HTMLPurifier_Config object
128     *                for all instances of the purifier, if omitted, a default
129     *                configuration is supplied (which can be overridden on a
130     *                per-use basis).
131     *                The parameter can also be any type that
132     *                HTMLPurifier_Config::create() supports.
133     */
134    public function __construct($config = null)
135    {
136        $this->config = HTMLPurifier_Config::create($config);
137        $this->strategy = new HTMLPurifier_Strategy_Core();
138    }
139
140    /**
141     * Adds a filter to process the output. First come first serve
142     *
143     * @param HTMLPurifier_Filter $filter HTMLPurifier_Filter object
144     */
145    public function addFilter($filter)
146    {
147        trigger_error(
148            'HTMLPurifier->addFilter() is deprecated, use configuration directives' .
149            ' in the Filter namespace or Filter.Custom',
150            E_USER_WARNING
151        );
152        $this->filters[] = $filter;
153    }
154
155    /**
156     * Filters an HTML snippet/document to be XSS-free and standards-compliant.
157     *
158     * @param string $html String of HTML to purify
159     * @param HTMLPurifier_Config $config Config object for this operation,
160     *                if omitted, defaults to the config object specified during this
161     *                object's construction. The parameter can also be any type
162     *                that HTMLPurifier_Config::create() supports.
163     *
164     * @return string Purified HTML
165     */
166    public function purify($html, $config = null)
167    {
168        // :TODO: make the config merge in, instead of replace
169        $config = $config ? HTMLPurifier_Config::create($config) : $this->config;
170
171        // implementation is partially environment dependant, partially
172        // configuration dependant
173        $lexer = HTMLPurifier_Lexer::create($config);
174
175        $context = new HTMLPurifier_Context();
176
177        // setup HTML generator
178        $this->generator = new HTMLPurifier_Generator($config, $context);
179        $context->register('Generator', $this->generator);
180
181        // set up global context variables
182        if ($config->get('Core.CollectErrors')) {
183            // may get moved out if other facilities use it
184            $language_factory = HTMLPurifier_LanguageFactory::instance();
185            $language = $language_factory->create($config, $context);
186            $context->register('Locale', $language);
187
188            $error_collector = new HTMLPurifier_ErrorCollector($context);
189            $context->register('ErrorCollector', $error_collector);
190        }
191
192        // setup id_accumulator context, necessary due to the fact that
193        // AttrValidator can be called from many places
194        $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
195        $context->register('IDAccumulator', $id_accumulator);
196
197        $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
198
199        // setup filters
200        $filter_flags = $config->getBatch('Filter');
201        $custom_filters = $filter_flags['Custom'];
202        unset($filter_flags['Custom']);
203        $filters = array();
204        foreach ($filter_flags as $filter => $flag) {
205            if (!$flag) {
206                continue;
207            }
208            if (strpos($filter, '.') !== false) {
209                continue;
210            }
211            $class = "HTMLPurifier_Filter_$filter";
212            $filters[] = new $class;
213        }
214        foreach ($custom_filters as $filter) {
215            // maybe "HTMLPurifier_Filter_$filter", but be consistent with AutoFormat
216            $filters[] = $filter;
217        }
218        $filters = array_merge($filters, $this->filters);
219        // maybe prepare(), but later
220
221        for ($i = 0, $filter_size = count($filters); $i < $filter_size; $i++) {
222            $html = $filters[$i]->preFilter($html, $config, $context);
223        }
224
225        // purified HTML
226        $html =
227            $this->generator->generateFromTokens(
228            // list of tokens
229                $this->strategy->execute(
230                // list of un-purified tokens
231                    $lexer->tokenizeHTML(
232                    // un-purified HTML
233                        $html,
234                        $config,
235                        $context
236                    ),
237                    $config,
238                    $context
239                )
240            );
241
242        for ($i = $filter_size - 1; $i >= 0; $i--) {
243            $html = $filters[$i]->postFilter($html, $config, $context);
244        }
245
246        $html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context);
247        $this->context =& $context;
248        return $html;
249    }
250
251    /**
252     * Filters an array of HTML snippets
253     *
254     * @param string[] $array_of_html Array of html snippets
255     * @param HTMLPurifier_Config $config Optional config object for this operation.
256     *                See HTMLPurifier::purify() for more details.
257     *
258     * @return string[] Array of purified HTML
259     */
260    public function purifyArray($array_of_html, $config = null)
261    {
262        $context_array = array();
263        foreach ($array_of_html as $key => $html) {
264            $array_of_html[$key] = $this->purify($html, $config);
265            $context_array[$key] = $this->context;
266        }
267        $this->context = $context_array;
268        return $array_of_html;
269    }
270
271    /**
272     * Singleton for enforcing just one HTML Purifier in your system
273     *
274     * @param HTMLPurifier|HTMLPurifier_Config $prototype Optional prototype
275     *                   HTMLPurifier instance to overload singleton with,
276     *                   or HTMLPurifier_Config instance to configure the
277     *                   generated version with.
278     *
279     * @return HTMLPurifier
280     */
281    public static function instance($prototype = null)
282    {
283        if (!self::$instance || $prototype) {
284            if ($prototype instanceof HTMLPurifier) {
285                self::$instance = $prototype;
286            } elseif ($prototype) {
287                self::$instance = new HTMLPurifier($prototype);
288            } else {
289                self::$instance = new HTMLPurifier();
290            }
291        }
292        return self::$instance;
293    }
294
295    /**
296     * Singleton for enforcing just one HTML Purifier in your system
297     *
298     * @param HTMLPurifier|HTMLPurifier_Config $prototype Optional prototype
299     *                   HTMLPurifier instance to overload singleton with,
300     *                   or HTMLPurifier_Config instance to configure the
301     *                   generated version with.
302     *
303     * @return HTMLPurifier
304     * @note Backwards compatibility, see instance()
305     */
306    public static function getInstance($prototype = null)
307    {
308        return HTMLPurifier::instance($prototype);
309    }
310}
311
312
313
314
315
316/**
317 * Converts a stream of HTMLPurifier_Token into an HTMLPurifier_Node,
318 * and back again.
319 *
320 * @note This transformation is not an equivalence.  We mutate the input
321 * token stream to make it so; see all [MUT] markers in code.
322 */
323class HTMLPurifier_Arborize
324{
325    public static function arborize($tokens, $config, $context) {
326        $definition = $config->getHTMLDefinition();
327        $parent = new HTMLPurifier_Token_Start($definition->info_parent);
328        $stack = array($parent->toNode());
329        foreach ($tokens as $token) {
330            $token->skip = null; // [MUT]
331            $token->carryover = null; // [MUT]
332            if ($token instanceof HTMLPurifier_Token_End) {
333                $token->start = null; // [MUT]
334                $r = array_pop($stack);
335                //assert($r->name === $token->name);
336                //assert(empty($token->attr));
337                $r->endCol = $token->col;
338                $r->endLine = $token->line;
339                $r->endArmor = $token->armor;
340                continue;
341            }
342            $node = $token->toNode();
343            $stack[count($stack)-1]->children[] = $node;
344            if ($token instanceof HTMLPurifier_Token_Start) {
345                $stack[] = $node;
346            }
347        }
348        //assert(count($stack) == 1);
349        return $stack[0];
350    }
351
352    public static function flatten($node, $config, $context) {
353        $level = 0;
354        $nodes = array($level => new HTMLPurifier_Queue(array($node)));
355        $closingTokens = array();
356        $tokens = array();
357        do {
358            while (!$nodes[$level]->isEmpty()) {
359                $node = $nodes[$level]->shift(); // FIFO
360                list($start, $end) = $node->toTokenPair();
361                if ($level > 0) {
362                    $tokens[] = $start;
363                }
364                if ($end !== NULL) {
365                    $closingTokens[$level][] = $end;
366                }
367                if ($node instanceof HTMLPurifier_Node_Element) {
368                    $level++;
369                    $nodes[$level] = new HTMLPurifier_Queue();
370                    foreach ($node->children as $childNode) {
371                        $nodes[$level]->push($childNode);
372                    }
373                }
374            }
375            $level--;
376            if ($level && isset($closingTokens[$level])) {
377                while ($token = array_pop($closingTokens[$level])) {
378                    $tokens[] = $token;
379                }
380            }
381        } while ($level > 0);
382        return $tokens;
383    }
384}
385
386
387
388/**
389 * Defines common attribute collections that modules reference
390 */
391
392class HTMLPurifier_AttrCollections
393{
394
395    /**
396     * Associative array of attribute collections, indexed by name.
397     * @type array
398     */
399    public $info = array();
400
401    /**
402     * Performs all expansions on internal data for use by other inclusions
403     * It also collects all attribute collection extensions from
404     * modules
405     * @param HTMLPurifier_AttrTypes $attr_types HTMLPurifier_AttrTypes instance
406     * @param HTMLPurifier_HTMLModule[] $modules Hash array of HTMLPurifier_HTMLModule members
407     */
408    public function __construct($attr_types, $modules)
409    {
410        $this->doConstruct($attr_types, $modules);
411    }
412
413    public function doConstruct($attr_types, $modules)
414    {
415        // load extensions from the modules
416        foreach ($modules as $module) {
417            foreach ($module->attr_collections as $coll_i => $coll) {
418                if (!isset($this->info[$coll_i])) {
419                    $this->info[$coll_i] = array();
420                }
421                foreach ($coll as $attr_i => $attr) {
422                    if ($attr_i === 0 && isset($this->info[$coll_i][$attr_i])) {
423                        // merge in includes
424                        $this->info[$coll_i][$attr_i] = array_merge(
425                            $this->info[$coll_i][$attr_i],
426                            $attr
427                        );
428                        continue;
429                    }
430                    $this->info[$coll_i][$attr_i] = $attr;
431                }
432            }
433        }
434        // perform internal expansions and inclusions
435        foreach ($this->info as $name => $attr) {
436            // merge attribute collections that include others
437            $this->performInclusions($this->info[$name]);
438            // replace string identifiers with actual attribute objects
439            $this->expandIdentifiers($this->info[$name], $attr_types);
440        }
441    }
442
443    /**
444     * Takes a reference to an attribute associative array and performs
445     * all inclusions specified by the zero index.
446     * @param array &$attr Reference to attribute array
447     */
448    public function performInclusions(&$attr)
449    {
450        if (!isset($attr[0])) {
451            return;
452        }
453        $merge = $attr[0];
454        $seen  = array(); // recursion guard
455        // loop through all the inclusions
456        for ($i = 0; isset($merge[$i]); $i++) {
457            if (isset($seen[$merge[$i]])) {
458                continue;
459            }
460            $seen[$merge[$i]] = true;
461            // foreach attribute of the inclusion, copy it over
462            if (!isset($this->info[$merge[$i]])) {
463                continue;
464            }
465            foreach ($this->info[$merge[$i]] as $key => $value) {
466                if (isset($attr[$key])) {
467                    continue;
468                } // also catches more inclusions
469                $attr[$key] = $value;
470            }
471            if (isset($this->info[$merge[$i]][0])) {
472                // recursion
473                $merge = array_merge($merge, $this->info[$merge[$i]][0]);
474            }
475        }
476        unset($attr[0]);
477    }
478
479    /**
480     * Expands all string identifiers in an attribute array by replacing
481     * them with the appropriate values inside HTMLPurifier_AttrTypes
482     * @param array &$attr Reference to attribute array
483     * @param HTMLPurifier_AttrTypes $attr_types HTMLPurifier_AttrTypes instance
484     */
485    public function expandIdentifiers(&$attr, $attr_types)
486    {
487        // because foreach will process new elements we add, make sure we
488        // skip duplicates
489        $processed = array();
490
491        foreach ($attr as $def_i => $def) {
492            // skip inclusions
493            if ($def_i === 0) {
494                continue;
495            }
496
497            if (isset($processed[$def_i])) {
498                continue;
499            }
500
501            // determine whether or not attribute is required
502            if ($required = (strpos($def_i, '*') !== false)) {
503                // rename the definition
504                unset($attr[$def_i]);
505                $def_i = trim($def_i, '*');
506                $attr[$def_i] = $def;
507            }
508
509            $processed[$def_i] = true;
510
511            // if we've already got a literal object, move on
512            if (is_object($def)) {
513                // preserve previous required
514                $attr[$def_i]->required = ($required || $attr[$def_i]->required);
515                continue;
516            }
517
518            if ($def === false) {
519                unset($attr[$def_i]);
520                continue;
521            }
522
523            if ($t = $attr_types->get($def)) {
524                $attr[$def_i] = $t;
525                $attr[$def_i]->required = $required;
526            } else {
527                unset($attr[$def_i]);
528            }
529        }
530    }
531}
532
533
534
535
536
537/**
538 * Base class for all validating attribute definitions.
539 *
540 * This family of classes forms the core for not only HTML attribute validation,
541 * but also any sort of string that needs to be validated or cleaned (which
542 * means CSS properties and composite definitions are defined here too).
543 * Besides defining (through code) what precisely makes the string valid,
544 * subclasses are also responsible for cleaning the code if possible.
545 */
546
547abstract class HTMLPurifier_AttrDef
548{
549
550    /**
551     * Tells us whether or not an HTML attribute is minimized.
552     * Has no meaning in other contexts.
553     * @type bool
554     */
555    public $minimized = false;
556
557    /**
558     * Tells us whether or not an HTML attribute is required.
559     * Has no meaning in other contexts
560     * @type bool
561     */
562    public $required = false;
563
564    /**
565     * Validates and cleans passed string according to a definition.
566     *
567     * @param string $string String to be validated and cleaned.
568     * @param HTMLPurifier_Config $config Mandatory HTMLPurifier_Config object.
569     * @param HTMLPurifier_Context $context Mandatory HTMLPurifier_Context object.
570     */
571    abstract public function validate($string, $config, $context);
572
573    /**
574     * Convenience method that parses a string as if it were CDATA.
575     *
576     * This method process a string in the manner specified at
577     * <http://www.w3.org/TR/html4/types.html#h-6.2> by removing
578     * leading and trailing whitespace, ignoring line feeds, and replacing
579     * carriage returns and tabs with spaces.  While most useful for HTML
580     * attributes specified as CDATA, it can also be applied to most CSS
581     * values.
582     *
583     * @note This method is not entirely standards compliant, as trim() removes
584     *       more types of whitespace than specified in the spec. In practice,
585     *       this is rarely a problem, as those extra characters usually have
586     *       already been removed by HTMLPurifier_Encoder.
587     *
588     * @warning This processing is inconsistent with XML's whitespace handling
589     *          as specified by section 3.3.3 and referenced XHTML 1.0 section
590     *          4.7.  However, note that we are NOT necessarily
591     *          parsing XML, thus, this behavior may still be correct. We
592     *          assume that newlines have been normalized.
593     */
594    public function parseCDATA($string)
595    {
596        $string = trim($string);
597        $string = str_replace(array("\n", "\t", "\r"), ' ', $string);
598        return $string;
599    }
600
601    /**
602     * Factory method for creating this class from a string.
603     * @param string $string String construction info
604     * @return HTMLPurifier_AttrDef Created AttrDef object corresponding to $string
605     */
606    public function make($string)
607    {
608        // default implementation, return a flyweight of this object.
609        // If $string has an effect on the returned object (i.e. you
610        // need to overload this method), it is best
611        // to clone or instantiate new copies. (Instantiation is safer.)
612        return $this;
613    }
614
615    /**
616     * Removes spaces from rgb(0, 0, 0) so that shorthand CSS properties work
617     * properly. THIS IS A HACK!
618     * @param string $string a CSS colour definition
619     * @return string
620     */
621    protected function mungeRgb($string)
622    {
623        $p = '\s*(\d+(\.\d+)?([%]?))\s*';
624
625        if (preg_match('/(rgba|hsla)\(/', $string)) {
626            return preg_replace('/(rgba|hsla)\('.$p.','.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8,\11)', $string);
627        }
628
629        return preg_replace('/(rgb|hsl)\('.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8)', $string);
630    }
631
632    /**
633     * Parses a possibly escaped CSS string and returns the "pure"
634     * version of it.
635     */
636    protected function expandCSSEscape($string)
637    {
638        // flexibly parse it
639        $ret = '';
640        for ($i = 0, $c = strlen($string); $i < $c; $i++) {
641            if ($string[$i] === '\\') {
642                $i++;
643                if ($i >= $c) {
644                    $ret .= '\\';
645                    break;
646                }
647                if (ctype_xdigit($string[$i])) {
648                    $code = $string[$i];
649                    for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
650                        if (!ctype_xdigit($string[$i])) {
651                            break;
652                        }
653                        $code .= $string[$i];
654                    }
655                    // We have to be extremely careful when adding
656                    // new characters, to make sure we're not breaking
657                    // the encoding.
658                    $char = HTMLPurifier_Encoder::unichr(hexdec($code));
659                    if (HTMLPurifier_Encoder::cleanUTF8($char) === '') {
660                        continue;
661                    }
662                    $ret .= $char;
663                    if ($i < $c && trim($string[$i]) !== '') {
664                        $i--;
665                    }
666                    continue;
667                }
668                if ($string[$i] === "\n") {
669                    continue;
670                }
671            }
672            $ret .= $string[$i];
673        }
674        return $ret;
675    }
676}
677
678
679
680
681
682/**
683 * Processes an entire attribute array for corrections needing multiple values.
684 *
685 * Occasionally, a certain attribute will need to be removed and popped onto
686 * another value.  Instead of creating a complex return syntax for
687 * HTMLPurifier_AttrDef, we just pass the whole attribute array to a
688 * specialized object and have that do the special work.  That is the
689 * family of HTMLPurifier_AttrTransform.
690 *
691 * An attribute transformation can be assigned to run before or after
692 * HTMLPurifier_AttrDef validation.  See HTMLPurifier_HTMLDefinition for
693 * more details.
694 */
695
696abstract class HTMLPurifier_AttrTransform
697{
698
699    /**
700     * Abstract: makes changes to the attributes dependent on multiple values.
701     *
702     * @param array $attr Assoc array of attributes, usually from
703     *              HTMLPurifier_Token_Tag::$attr
704     * @param HTMLPurifier_Config $config Mandatory HTMLPurifier_Config object.
705     * @param HTMLPurifier_Context $context Mandatory HTMLPurifier_Context object
706     * @return array Processed attribute array.
707     */
708    abstract public function transform($attr, $config, $context);
709
710    /**
711     * Prepends CSS properties to the style attribute, creating the
712     * attribute if it doesn't exist.
713     * @param array &$attr Attribute array to process (passed by reference)
714     * @param string $css CSS to prepend
715     */
716    public function prependCSS(&$attr, $css)
717    {
718        $attr['style'] = isset($attr['style']) ? $attr['style'] : '';
719        $attr['style'] = $css . $attr['style'];
720    }
721
722    /**
723     * Retrieves and removes an attribute
724     * @param array &$attr Attribute array to process (passed by reference)
725     * @param mixed $key Key of attribute to confiscate
726     * @return mixed
727     */
728    public function confiscateAttr(&$attr, $key)
729    {
730        if (!isset($attr[$key])) {
731            return null;
732        }
733        $value = $attr[$key];
734        unset($attr[$key]);
735        return $value;
736    }
737}
738
739
740
741
742
743/**
744 * Provides lookup array of attribute types to HTMLPurifier_AttrDef objects
745 */
746class HTMLPurifier_AttrTypes
747{
748    /**
749     * Lookup array of attribute string identifiers to concrete implementations.
750     * @type HTMLPurifier_AttrDef[]
751     */
752    protected $info = array();
753
754    /**
755     * Constructs the info array, supplying default implementations for attribute
756     * types.
757     */
758    public function __construct()
759    {
760        // XXX This is kind of poor, since we don't actually /clone/
761        // instances; instead, we use the supplied make() attribute. So,
762        // the underlying class must know how to deal with arguments.
763        // With the old implementation of Enum, that ignored its
764        // arguments when handling a make dispatch, the IAlign
765        // definition wouldn't work.
766
767        // pseudo-types, must be instantiated via shorthand
768        $this->info['Enum']    = new HTMLPurifier_AttrDef_Enum();
769        $this->info['Bool']    = new HTMLPurifier_AttrDef_HTML_Bool();
770
771        $this->info['CDATA']    = new HTMLPurifier_AttrDef_Text();
772        $this->info['ID']       = new HTMLPurifier_AttrDef_HTML_ID();
773        $this->info['Length']   = new HTMLPurifier_AttrDef_HTML_Length();
774        $this->info['MultiLength'] = new HTMLPurifier_AttrDef_HTML_MultiLength();
775        $this->info['NMTOKENS'] = new HTMLPurifier_AttrDef_HTML_Nmtokens();
776        $this->info['Pixels']   = new HTMLPurifier_AttrDef_HTML_Pixels();
777        $this->info['Text']     = new HTMLPurifier_AttrDef_Text();
778        $this->info['URI']      = new HTMLPurifier_AttrDef_URI();
779        $this->info['LanguageCode'] = new HTMLPurifier_AttrDef_Lang();
780        $this->info['Color']    = new HTMLPurifier_AttrDef_HTML_Color();
781        $this->info['IAlign']   = self::makeEnum('top,middle,bottom,left,right');
782        $this->info['LAlign']   = self::makeEnum('top,bottom,left,right');
783        $this->info['FrameTarget'] = new HTMLPurifier_AttrDef_HTML_FrameTarget();
784
785        // unimplemented aliases
786        $this->info['ContentType'] = new HTMLPurifier_AttrDef_Text();
787        $this->info['ContentTypes'] = new HTMLPurifier_AttrDef_Text();
788        $this->info['Charsets'] = new HTMLPurifier_AttrDef_Text();
789        $this->info['Character'] = new HTMLPurifier_AttrDef_Text();
790
791        // "proprietary" types
792        $this->info['Class'] = new HTMLPurifier_AttrDef_HTML_Class();
793
794        // number is really a positive integer (one or more digits)
795        // FIXME: ^^ not always, see start and value of list items
796        $this->info['Number']   = new HTMLPurifier_AttrDef_Integer(false, false, true);
797    }
798
799    private static function makeEnum($in)
800    {
801        return new HTMLPurifier_AttrDef_Clone(new HTMLPurifier_AttrDef_Enum(explode(',', $in)));
802    }
803
804    /**
805     * Retrieves a type
806     * @param string $type String type name
807     * @return HTMLPurifier_AttrDef Object AttrDef for type
808     */
809    public function get($type)
810    {
811        // determine if there is any extra info tacked on
812        if (strpos($type, '#') !== false) {
813            list($type, $string) = explode('#', $type, 2);
814        } else {
815            $string = '';
816        }
817
818        if (!isset($this->info[$type])) {
819            trigger_error('Cannot retrieve undefined attribute type ' . $type, E_USER_ERROR);
820            return;
821        }
822        return $this->info[$type]->make($string);
823    }
824
825    /**
826     * Sets a new implementation for a type
827     * @param string $type String type name
828     * @param HTMLPurifier_AttrDef $impl Object AttrDef for type
829     */
830    public function set($type, $impl)
831    {
832        $this->info[$type] = $impl;
833    }
834}
835
836
837
838
839
840/**
841 * Validates the attributes of a token. Doesn't manage required attributes
842 * very well. The only reason we factored this out was because RemoveForeignElements
843 * also needed it besides ValidateAttributes.
844 */
845class HTMLPurifier_AttrValidator
846{
847
848    /**
849     * Validates the attributes of a token, mutating it as necessary.
850     * that has valid tokens
851     * @param HTMLPurifier_Token $token Token to validate.
852     * @param HTMLPurifier_Config $config Instance of HTMLPurifier_Config
853     * @param HTMLPurifier_Context $context Instance of HTMLPurifier_Context
854     */
855    public function validateToken($token, $config, $context)
856    {
857        $definition = $config->getHTMLDefinition();
858        $e =& $context->get('ErrorCollector', true);
859
860        // initialize IDAccumulator if necessary
861        $ok =& $context->get('IDAccumulator', true);
862        if (!$ok) {
863            $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
864            $context->register('IDAccumulator', $id_accumulator);
865        }
866
867        // initialize CurrentToken if necessary
868        $current_token =& $context->get('CurrentToken', true);
869        if (!$current_token) {
870            $context->register('CurrentToken', $token);
871        }
872
873        if (!$token instanceof HTMLPurifier_Token_Start &&
874            !$token instanceof HTMLPurifier_Token_Empty
875        ) {
876            return;
877        }
878
879        // create alias to global definition array, see also $defs
880        // DEFINITION CALL
881        $d_defs = $definition->info_global_attr;
882
883        // don't update token until the very end, to ensure an atomic update
884        $attr = $token->attr;
885
886        // do global transformations (pre)
887        // nothing currently utilizes this
888        foreach ($definition->info_attr_transform_pre as $transform) {
889            $attr = $transform->transform($o = $attr, $config, $context);
890            if ($e) {
891                if ($attr != $o) {
892                    $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
893                }
894            }
895        }
896
897        // do local transformations only applicable to this element (pre)
898        // ex. <p align="right"> to <p style="text-align:right;">
899        foreach ($definition->info[$token->name]->attr_transform_pre as $transform) {
900            $attr = $transform->transform($o = $attr, $config, $context);
901            if ($e) {
902                if ($attr != $o) {
903                    $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
904                }
905            }
906        }
907
908        // create alias to this element's attribute definition array, see
909        // also $d_defs (global attribute definition array)
910        // DEFINITION CALL
911        $defs = $definition->info[$token->name]->attr;
912
913        $attr_key = false;
914        $context->register('CurrentAttr', $attr_key);
915
916        // iterate through all the attribute keypairs
917        // Watch out for name collisions: $key has previously been used
918        foreach ($attr as $attr_key => $value) {
919
920            // call the definition
921            if (isset($defs[$attr_key])) {
922                // there is a local definition defined
923                if ($defs[$attr_key] === false) {
924                    // We've explicitly been told not to allow this element.
925                    // This is usually when there's a global definition
926                    // that must be overridden.
927                    // Theoretically speaking, we could have a
928                    // AttrDef_DenyAll, but this is faster!
929                    $result = false;
930                } else {
931                    // validate according to the element's definition
932                    $result = $defs[$attr_key]->validate(
933                        $value,
934                        $config,
935                        $context
936                    );
937                }
938            } elseif (isset($d_defs[$attr_key])) {
939                // there is a global definition defined, validate according
940                // to the global definition
941                $result = $d_defs[$attr_key]->validate(
942                    $value,
943                    $config,
944                    $context
945                );
946            } else {
947                // system never heard of the attribute? DELETE!
948                $result = false;
949            }
950
951            // put the results into effect
952            if ($result === false || $result === null) {
953                // this is a generic error message that should replaced
954                // with more specific ones when possible
955                if ($e) {
956                    $e->send(E_ERROR, 'AttrValidator: Attribute removed');
957                }
958
959                // remove the attribute
960                unset($attr[$attr_key]);
961            } elseif (is_string($result)) {
962                // generally, if a substitution is happening, there
963                // was some sort of implicit correction going on. We'll
964                // delegate it to the attribute classes to say exactly what.
965
966                // simple substitution
967                $attr[$attr_key] = $result;
968            } else {
969                // nothing happens
970            }
971
972            // we'd also want slightly more complicated substitution
973            // involving an array as the return value,
974            // although we're not sure how colliding attributes would
975            // resolve (certain ones would be completely overriden,
976            // others would prepend themselves).
977        }
978
979        $context->destroy('CurrentAttr');
980
981        // post transforms
982
983        // global (error reporting untested)
984        foreach ($definition->info_attr_transform_post as $transform) {
985            $attr = $transform->transform($o = $attr, $config, $context);
986            if ($e) {
987                if ($attr != $o) {
988                    $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
989                }
990            }
991        }
992
993        // local (error reporting untested)
994        foreach ($definition->info[$token->name]->attr_transform_post as $transform) {
995            $attr = $transform->transform($o = $attr, $config, $context);
996            if ($e) {
997                if ($attr != $o) {
998                    $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
999                }
1000            }
1001        }
1002
1003        $token->attr = $attr;
1004
1005        // destroy CurrentToken if we made it ourselves
1006        if (!$current_token) {
1007            $context->destroy('CurrentToken');
1008        }
1009
1010    }
1011
1012
1013}
1014
1015
1016
1017
1018
1019// constants are slow, so we use as few as possible
1020if (!defined('HTMLPURIFIER_PREFIX')) {
1021    define('HTMLPURIFIER_PREFIX', dirname(__FILE__) . '/standalone');
1022    set_include_path(HTMLPURIFIER_PREFIX . PATH_SEPARATOR . get_include_path());
1023}
1024
1025// accomodations for versions earlier than 5.0.2
1026// borrowed from PHP_Compat, LGPL licensed, by Aidan Lister <aidan@php.net>
1027if (!defined('PHP_EOL')) {
1028    switch (strtoupper(substr(PHP_OS, 0, 3))) {
1029        case 'WIN':
1030            define('PHP_EOL', "\r\n");
1031            break;
1032        case 'DAR':
1033            define('PHP_EOL', "\r");
1034            break;
1035        default:
1036            define('PHP_EOL', "\n");
1037    }
1038}
1039
1040/**
1041 * Bootstrap class that contains meta-functionality for HTML Purifier such as
1042 * the autoload function.
1043 *
1044 * @note
1045 *      This class may be used without any other files from HTML Purifier.
1046 */
1047class HTMLPurifier_Bootstrap
1048{
1049
1050    /**
1051     * Autoload function for HTML Purifier
1052     * @param string $class Class to load
1053     * @return bool
1054     */
1055    public static function autoload($class)
1056    {
1057        $file = HTMLPurifier_Bootstrap::getPath($class);
1058        if (!$file) {
1059            return false;
1060        }
1061        // Technically speaking, it should be ok and more efficient to
1062        // just do 'require', but Antonio Parraga reports that with
1063        // Zend extensions such as Zend debugger and APC, this invariant
1064        // may be broken.  Since we have efficient alternatives, pay
1065        // the cost here and avoid the bug.
1066        require_once HTMLPURIFIER_PREFIX . '/' . $file;
1067        return true;
1068    }
1069
1070    /**
1071     * Returns the path for a specific class.
1072     * @param string $class Class path to get
1073     * @return string
1074     */
1075    public static function getPath($class)
1076    {
1077        if (strncmp('HTMLPurifier', $class, 12) !== 0) {
1078            return false;
1079        }
1080        // Custom implementations
1081        if (strncmp('HTMLPurifier_Language_', $class, 22) === 0) {
1082            $code = str_replace('_', '-', substr($class, 22));
1083            $file = 'HTMLPurifier/Language/classes/' . $code . '.php';
1084        } else {
1085            $file = str_replace('_', '/', $class) . '.php';
1086        }
1087        if (!file_exists(HTMLPURIFIER_PREFIX . '/' . $file)) {
1088            return false;
1089        }
1090        return $file;
1091    }
1092
1093    /**
1094     * "Pre-registers" our autoloader on the SPL stack.
1095     */
1096    public static function registerAutoload()
1097    {
1098        $autoload = array('HTMLPurifier_Bootstrap', 'autoload');
1099        if (($funcs = spl_autoload_functions()) === false) {
1100            spl_autoload_register($autoload);
1101        } elseif (function_exists('spl_autoload_unregister')) {
1102            if (version_compare(PHP_VERSION, '5.3.0', '>=')) {
1103                // prepend flag exists, no need for shenanigans
1104                spl_autoload_register($autoload, true, true);
1105            } else {
1106                $buggy  = version_compare(PHP_VERSION, '5.2.11', '<');
1107                $compat = version_compare(PHP_VERSION, '5.1.2', '<=') &&
1108                    version_compare(PHP_VERSION, '5.1.0', '>=');
1109                foreach ($funcs as $func) {
1110                    if ($buggy && is_array($func)) {
1111                        // :TRICKY: There are some compatibility issues and some
1112                        // places where we need to error out
1113                        $reflector = new ReflectionMethod($func[0], $func[1]);
1114                        if (!$reflector->isStatic()) {
1115                            throw new Exception(
1116                                'HTML Purifier autoloader registrar is not compatible
1117                                with non-static object methods due to PHP Bug #44144;
1118                                Please do not use HTMLPurifier.autoload.php (or any
1119                                file that includes this file); instead, place the code:
1120                                spl_autoload_register(array(\'HTMLPurifier_Bootstrap\', \'autoload\'))
1121                                after your own autoloaders.'
1122                            );
1123                        }
1124                        // Suprisingly, spl_autoload_register supports the
1125                        // Class::staticMethod callback format, although call_user_func doesn't
1126                        if ($compat) {
1127                            $func = implode('::', $func);
1128                        }
1129                    }
1130                    spl_autoload_unregister($func);
1131                }
1132                spl_autoload_register($autoload);
1133                foreach ($funcs as $func) {
1134                    spl_autoload_register($func);
1135                }
1136            }
1137        }
1138    }
1139}
1140
1141
1142
1143
1144
1145/**
1146 * Super-class for definition datatype objects, implements serialization
1147 * functions for the class.
1148 */
1149abstract class HTMLPurifier_Definition
1150{
1151
1152    /**
1153     * Has setup() been called yet?
1154     * @type bool
1155     */
1156    public $setup = false;
1157
1158    /**
1159     * If true, write out the final definition object to the cache after
1160     * setup.  This will be true only if all invocations to get a raw
1161     * definition object are also optimized.  This does not cause file
1162     * system thrashing because on subsequent calls the cached object
1163     * is used and any writes to the raw definition object are short
1164     * circuited.  See enduser-customize.html for the high-level
1165     * picture.
1166     * @type bool
1167     */
1168    public $optimized = null;
1169
1170    /**
1171     * What type of definition is it?
1172     * @type string
1173     */
1174    public $type;
1175
1176    /**
1177     * Sets up the definition object into the final form, something
1178     * not done by the constructor
1179     * @param HTMLPurifier_Config $config
1180     */
1181    abstract protected function doSetup($config);
1182
1183    /**
1184     * Setup function that aborts if already setup
1185     * @param HTMLPurifier_Config $config
1186     */
1187    public function setup($config)
1188    {
1189        if ($this->setup) {
1190            return;
1191        }
1192        $this->setup = true;
1193        $this->doSetup($config);
1194    }
1195}
1196
1197
1198
1199
1200
1201/**
1202 * Defines allowed CSS attributes and what their values are.
1203 * @see HTMLPurifier_HTMLDefinition
1204 */
1205class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
1206{
1207
1208    public $type = 'CSS';
1209
1210    /**
1211     * Assoc array of attribute name to definition object.
1212     * @type HTMLPurifier_AttrDef[]
1213     */
1214    public $info = array();
1215
1216    /**
1217     * Constructs the info array.  The meat of this class.
1218     * @param HTMLPurifier_Config $config
1219     */
1220    protected function doSetup($config)
1221    {
1222        $this->info['text-align'] = new HTMLPurifier_AttrDef_Enum(
1223            array('left', 'right', 'center', 'justify'),
1224            false
1225        );
1226
1227        $border_style =
1228        $this->info['border-bottom-style'] =
1229        $this->info['border-right-style'] =
1230        $this->info['border-left-style'] =
1231        $this->info['border-top-style'] = new HTMLPurifier_AttrDef_Enum(
1232            array(
1233                'none',
1234                'hidden',
1235                'dotted',
1236                'dashed',
1237                'solid',
1238                'double',
1239                'groove',
1240                'ridge',
1241                'inset',
1242                'outset'
1243            ),
1244            false
1245        );
1246
1247        $this->info['border-style'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_style);
1248
1249        $this->info['clear'] = new HTMLPurifier_AttrDef_Enum(
1250            array('none', 'left', 'right', 'both'),
1251            false
1252        );
1253        $this->info['float'] = new HTMLPurifier_AttrDef_Enum(
1254            array('none', 'left', 'right'),
1255            false
1256        );
1257        $this->info['font-style'] = new HTMLPurifier_AttrDef_Enum(
1258            array('normal', 'italic', 'oblique'),
1259            false
1260        );
1261        $this->info['font-variant'] = new HTMLPurifier_AttrDef_Enum(
1262            array('normal', 'small-caps'),
1263            false
1264        );
1265
1266        $uri_or_none = new HTMLPurifier_AttrDef_CSS_Composite(
1267            array(
1268                new HTMLPurifier_AttrDef_Enum(array('none')),
1269                new HTMLPurifier_AttrDef_CSS_URI()
1270            )
1271        );
1272
1273        $this->info['list-style-position'] = new HTMLPurifier_AttrDef_Enum(
1274            array('inside', 'outside'),
1275            false
1276        );
1277        $this->info['list-style-type'] = new HTMLPurifier_AttrDef_Enum(
1278            array(
1279                'disc',
1280                'circle',
1281                'square',
1282                'decimal',
1283                'lower-roman',
1284                'upper-roman',
1285                'lower-alpha',
1286                'upper-alpha',
1287                'none'
1288            ),
1289            false
1290        );
1291        $this->info['list-style-image'] = $uri_or_none;
1292
1293        $this->info['list-style'] = new HTMLPurifier_AttrDef_CSS_ListStyle($config);
1294
1295        $this->info['text-transform'] = new HTMLPurifier_AttrDef_Enum(
1296            array('capitalize', 'uppercase', 'lowercase', 'none'),
1297            false
1298        );
1299        $this->info['color'] = new HTMLPurifier_AttrDef_CSS_Color();
1300
1301        $this->info['background-image'] = $uri_or_none;
1302        $this->info['background-repeat'] = new HTMLPurifier_AttrDef_Enum(
1303            array('repeat', 'repeat-x', 'repeat-y', 'no-repeat')
1304        );
1305        $this->info['background-attachment'] = new HTMLPurifier_AttrDef_Enum(
1306            array('scroll', 'fixed')
1307        );
1308        $this->info['background-position'] = new HTMLPurifier_AttrDef_CSS_BackgroundPosition();
1309
1310        $border_color =
1311        $this->info['border-top-color'] =
1312        $this->info['border-bottom-color'] =
1313        $this->info['border-left-color'] =
1314        $this->info['border-right-color'] =
1315        $this->info['background-color'] = new HTMLPurifier_AttrDef_CSS_Composite(
1316            array(
1317                new HTMLPurifier_AttrDef_Enum(array('transparent')),
1318                new HTMLPurifier_AttrDef_CSS_Color()
1319            )
1320        );
1321
1322        $this->info['background'] = new HTMLPurifier_AttrDef_CSS_Background($config);
1323
1324        $this->info['border-color'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_color);
1325
1326        $border_width =
1327        $this->info['border-top-width'] =
1328        $this->info['border-bottom-width'] =
1329        $this->info['border-left-width'] =
1330        $this->info['border-right-width'] = new HTMLPurifier_AttrDef_CSS_Composite(
1331            array(
1332                new HTMLPurifier_AttrDef_Enum(array('thin', 'medium', 'thick')),
1333                new HTMLPurifier_AttrDef_CSS_Length('0') //disallow negative
1334            )
1335        );
1336
1337        $this->info['border-width'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_width);
1338
1339        $this->info['letter-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(
1340            array(
1341                new HTMLPurifier_AttrDef_Enum(array('normal')),
1342                new HTMLPurifier_AttrDef_CSS_Length()
1343            )
1344        );
1345
1346        $this->info['word-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(
1347            array(
1348                new HTMLPurifier_AttrDef_Enum(array('normal')),
1349                new HTMLPurifier_AttrDef_CSS_Length()
1350            )
1351        );
1352
1353        $this->info['font-size'] = new HTMLPurifier_AttrDef_CSS_Composite(
1354            array(
1355                new HTMLPurifier_AttrDef_Enum(
1356                    array(
1357                        'xx-small',
1358                        'x-small',
1359                        'small',
1360                        'medium',
1361                        'large',
1362                        'x-large',
1363                        'xx-large',
1364                        'larger',
1365                        'smaller'
1366                    )
1367                ),
1368                new HTMLPurifier_AttrDef_CSS_Percentage(),
1369                new HTMLPurifier_AttrDef_CSS_Length()
1370            )
1371        );
1372
1373        $this->info['line-height'] = new HTMLPurifier_AttrDef_CSS_Composite(
1374            array(
1375                new HTMLPurifier_AttrDef_Enum(array('normal')),
1376                new HTMLPurifier_AttrDef_CSS_Number(true), // no negatives
1377                new HTMLPurifier_AttrDef_CSS_Length('0'),
1378                new HTMLPurifier_AttrDef_CSS_Percentage(true)
1379            )
1380        );
1381
1382        $margin =
1383        $this->info['margin-top'] =
1384        $this->info['margin-bottom'] =
1385        $this->info['margin-left'] =
1386        $this->info['margin-right'] = new HTMLPurifier_AttrDef_CSS_Composite(
1387            array(
1388                new HTMLPurifier_AttrDef_CSS_Length(),
1389                new HTMLPurifier_AttrDef_CSS_Percentage(),
1390                new HTMLPurifier_AttrDef_Enum(array('auto'))
1391            )
1392        );
1393
1394        $this->info['margin'] = new HTMLPurifier_AttrDef_CSS_Multiple($margin);
1395
1396        // non-negative
1397        $padding =
1398        $this->info['padding-top'] =
1399        $this->info['padding-bottom'] =
1400        $this->info['padding-left'] =
1401        $this->info['padding-right'] = new HTMLPurifier_AttrDef_CSS_Composite(
1402            array(
1403                new HTMLPurifier_AttrDef_CSS_Length('0'),
1404                new HTMLPurifier_AttrDef_CSS_Percentage(true)
1405            )
1406        );
1407
1408        $this->info['padding'] = new HTMLPurifier_AttrDef_CSS_Multiple($padding);
1409
1410        $this->info['text-indent'] = new HTMLPurifier_AttrDef_CSS_Composite(
1411            array(
1412                new HTMLPurifier_AttrDef_CSS_Length(),
1413                new HTMLPurifier_AttrDef_CSS_Percentage()
1414            )
1415        );
1416
1417        $trusted_wh = new HTMLPurifier_AttrDef_CSS_Composite(
1418            array(
1419                new HTMLPurifier_AttrDef_CSS_Length('0'),
1420                new HTMLPurifier_AttrDef_CSS_Percentage(true),
1421                new HTMLPurifier_AttrDef_Enum(array('auto'))
1422            )
1423        );
1424        $max = $config->get('CSS.MaxImgLength');
1425
1426        $this->info['min-width'] =
1427        $this->info['max-width'] =
1428        $this->info['min-height'] =
1429        $this->info['max-height'] =
1430        $this->info['width'] =
1431        $this->info['height'] =
1432            $max === null ?
1433                $trusted_wh :
1434                new HTMLPurifier_AttrDef_Switch(
1435                    'img',
1436                    // For img tags:
1437                    new HTMLPurifier_AttrDef_CSS_Composite(
1438                        array(
1439                            new HTMLPurifier_AttrDef_CSS_Length('0', $max),
1440                            new HTMLPurifier_AttrDef_Enum(array('auto'))
1441                        )
1442                    ),
1443                    // For everyone else:
1444                    $trusted_wh
1445                );
1446
1447        $this->info['text-decoration'] = new HTMLPurifier_AttrDef_CSS_TextDecoration();
1448
1449        $this->info['font-family'] = new HTMLPurifier_AttrDef_CSS_FontFamily();
1450
1451        // this could use specialized code
1452        $this->info['font-weight'] = new HTMLPurifier_AttrDef_Enum(
1453            array(
1454                'normal',
1455                'bold',
1456                'bolder',
1457                'lighter',
1458                '100',
1459                '200',
1460                '300',
1461                '400',
1462                '500',
1463                '600',
1464                '700',
1465                '800',
1466                '900'
1467            ),
1468            false
1469        );
1470
1471        // MUST be called after other font properties, as it references
1472        // a CSSDefinition object
1473        $this->info['font'] = new HTMLPurifier_AttrDef_CSS_Font($config);
1474
1475        // same here
1476        $this->info['border'] =
1477        $this->info['border-bottom'] =
1478        $this->info['border-top'] =
1479        $this->info['border-left'] =
1480        $this->info['border-right'] = new HTMLPurifier_AttrDef_CSS_Border($config);
1481
1482        $this->info['border-collapse'] = new HTMLPurifier_AttrDef_Enum(
1483            array('collapse', 'separate')
1484        );
1485
1486        $this->info['caption-side'] = new HTMLPurifier_AttrDef_Enum(
1487            array('top', 'bottom')
1488        );
1489
1490        $this->info['table-layout'] = new HTMLPurifier_AttrDef_Enum(
1491            array('auto', 'fixed')
1492        );
1493
1494        $this->info['vertical-align'] = new HTMLPurifier_AttrDef_CSS_Composite(
1495            array(
1496                new HTMLPurifier_AttrDef_Enum(
1497                    array(
1498                        'baseline',
1499                        'sub',
1500                        'super',
1501                        'top',
1502                        'text-top',
1503                        'middle',
1504                        'bottom',
1505                        'text-bottom'
1506                    )
1507                ),
1508                new HTMLPurifier_AttrDef_CSS_Length(),
1509                new HTMLPurifier_AttrDef_CSS_Percentage()
1510            )
1511        );
1512
1513        $this->info['border-spacing'] = new HTMLPurifier_AttrDef_CSS_Multiple(new HTMLPurifier_AttrDef_CSS_Length(), 2);
1514
1515        // These CSS properties don't work on many browsers, but we live
1516        // in THE FUTURE!
1517        $this->info['white-space'] = new HTMLPurifier_AttrDef_Enum(
1518            array('nowrap', 'normal', 'pre', 'pre-wrap', 'pre-line')
1519        );
1520
1521        if ($config->get('CSS.Proprietary')) {
1522            $this->doSetupProprietary($config);
1523        }
1524
1525        if ($config->get('CSS.AllowTricky')) {
1526            $this->doSetupTricky($config);
1527        }
1528
1529        if ($config->get('CSS.Trusted')) {
1530            $this->doSetupTrusted($config);
1531        }
1532
1533        $allow_important = $config->get('CSS.AllowImportant');
1534        // wrap all attr-defs with decorator that handles !important
1535        foreach ($this->info as $k => $v) {
1536            $this->info[$k] = new HTMLPurifier_AttrDef_CSS_ImportantDecorator($v, $allow_important);
1537        }
1538
1539        $this->setupConfigStuff($config);
1540    }
1541
1542    /**
1543     * @param HTMLPurifier_Config $config
1544     */
1545    protected function doSetupProprietary($config)
1546    {
1547        // Internet Explorer only scrollbar colors
1548        $this->info['scrollbar-arrow-color'] = new HTMLPurifier_AttrDef_CSS_Color();
1549        $this->info['scrollbar-base-color'] = new HTMLPurifier_AttrDef_CSS_Color();
1550        $this->info['scrollbar-darkshadow-color'] = new HTMLPurifier_AttrDef_CSS_Color();
1551        $this->info['scrollbar-face-color'] = new HTMLPurifier_AttrDef_CSS_Color();
1552        $this->info['scrollbar-highlight-color'] = new HTMLPurifier_AttrDef_CSS_Color();
1553        $this->info['scrollbar-shadow-color'] = new HTMLPurifier_AttrDef_CSS_Color();
1554
1555        // vendor specific prefixes of opacity
1556        $this->info['-moz-opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue();
1557        $this->info['-khtml-opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue();
1558
1559        // only opacity, for now
1560        $this->info['filter'] = new HTMLPurifier_AttrDef_CSS_Filter();
1561
1562        // more CSS3
1563        $this->info['page-break-after'] =
1564        $this->info['page-break-before'] = new HTMLPurifier_AttrDef_Enum(
1565            array(
1566                'auto',
1567                'always',
1568                'avoid',
1569                'left',
1570                'right'
1571            )
1572        );
1573        $this->info['page-break-inside'] = new HTMLPurifier_AttrDef_Enum(array('auto', 'avoid'));
1574
1575        $border_radius = new HTMLPurifier_AttrDef_CSS_Composite(
1576            array(
1577                new HTMLPurifier_AttrDef_CSS_Percentage(true), // disallow negative
1578                new HTMLPurifier_AttrDef_CSS_Length('0') // disallow negative
1579            ));
1580
1581        $this->info['border-top-left-radius'] =
1582        $this->info['border-top-right-radius'] =
1583        $this->info['border-bottom-right-radius'] =
1584        $this->info['border-bottom-left-radius'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_radius, 2);
1585        // TODO: support SLASH syntax
1586        $this->info['border-radius'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_radius, 4);
1587
1588    }
1589
1590    /**
1591     * @param HTMLPurifier_Config $config
1592     */
1593    protected function doSetupTricky($config)
1594    {
1595        $this->info['display'] = new HTMLPurifier_AttrDef_Enum(
1596            array(
1597                'inline',
1598                'block',
1599                'list-item',
1600                'run-in',
1601                'compact',
1602                'marker',
1603                'table',
1604                'inline-block',
1605                'inline-table',
1606                'table-row-group',
1607                'table-header-group',
1608                'table-footer-group',
1609                'table-row',
1610                'table-column-group',
1611                'table-column',
1612                'table-cell',
1613                'table-caption',
1614                'none'
1615            )
1616        );
1617        $this->info['visibility'] = new HTMLPurifier_AttrDef_Enum(
1618            array('visible', 'hidden', 'collapse')
1619        );
1620        $this->info['overflow'] = new HTMLPurifier_AttrDef_Enum(array('visible', 'hidden', 'auto', 'scroll'));
1621        $this->info['opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue();
1622    }
1623
1624    /**
1625     * @param HTMLPurifier_Config $config
1626     */
1627    protected function doSetupTrusted($config)
1628    {
1629        $this->info['position'] = new HTMLPurifier_AttrDef_Enum(
1630            array('static', 'relative', 'absolute', 'fixed')
1631        );
1632        $this->info['top'] =
1633        $this->info['left'] =
1634        $this->info['right'] =
1635        $this->info['bottom'] = new HTMLPurifier_AttrDef_CSS_Composite(
1636            array(
1637                new HTMLPurifier_AttrDef_CSS_Length(),
1638                new HTMLPurifier_AttrDef_CSS_Percentage(),
1639                new HTMLPurifier_AttrDef_Enum(array('auto')),
1640            )
1641        );
1642        $this->info['z-index'] = new HTMLPurifier_AttrDef_CSS_Composite(
1643            array(
1644                new HTMLPurifier_AttrDef_Integer(),
1645                new HTMLPurifier_AttrDef_Enum(array('auto')),
1646            )
1647        );
1648    }
1649
1650    /**
1651     * Performs extra config-based processing. Based off of
1652     * HTMLPurifier_HTMLDefinition.
1653     * @param HTMLPurifier_Config $config
1654     * @todo Refactor duplicate elements into common class (probably using
1655     *       composition, not inheritance).
1656     */
1657    protected function setupConfigStuff($config)
1658    {
1659        // setup allowed elements
1660        $support = "(for information on implementing this, see the " .
1661            "support forums) ";
1662        $allowed_properties = $config->get('CSS.AllowedProperties');
1663        if ($allowed_properties !== null) {
1664            foreach ($this->info as $name => $d) {
1665                if (!isset($allowed_properties[$name])) {
1666                    unset($this->info[$name]);
1667                }
1668                unset($allowed_properties[$name]);
1669            }
1670            // emit errors
1671            foreach ($allowed_properties as $name => $d) {
1672                // :TODO: Is this htmlspecialchars() call really necessary?
1673                $name = htmlspecialchars($name);
1674                trigger_error("Style attribute '$name' is not supported $support", E_USER_WARNING);
1675            }
1676        }
1677
1678        $forbidden_properties = $config->get('CSS.ForbiddenProperties');
1679        if ($forbidden_properties !== null) {
1680            foreach ($this->info as $name => $d) {
1681                if (isset($forbidden_properties[$name])) {
1682                    unset($this->info[$name]);
1683                }
1684            }
1685        }
1686    }
1687}
1688
1689
1690
1691
1692
1693/**
1694 * Defines allowed child nodes and validates nodes against it.
1695 */
1696abstract class HTMLPurifier_ChildDef
1697{
1698    /**
1699     * Type of child definition, usually right-most part of class name lowercase.
1700     * Used occasionally in terms of context.
1701     * @type string
1702     */
1703    public $type;
1704
1705    /**
1706     * Indicates whether or not an empty array of children is okay.
1707     *
1708     * This is necessary for redundant checking when changes affecting
1709     * a child node may cause a parent node to now be disallowed.
1710     * @type bool
1711     */
1712    public $allow_empty;
1713
1714    /**
1715     * Lookup array of all elements that this definition could possibly allow.
1716     * @type array
1717     */
1718    public $elements = array();
1719
1720    /**
1721     * Get lookup of tag names that should not close this element automatically.
1722     * All other elements will do so.
1723     * @param HTMLPurifier_Config $config HTMLPurifier_Config object
1724     * @return array
1725     */
1726    public function getAllowedElements($config)
1727    {
1728        return $this->elements;
1729    }
1730
1731    /**
1732     * Validates nodes according to definition and returns modification.
1733     *
1734     * @param HTMLPurifier_Node[] $children Array of HTMLPurifier_Node
1735     * @param HTMLPurifier_Config $config HTMLPurifier_Config object
1736     * @param HTMLPurifier_Context $context HTMLPurifier_Context object
1737     * @return bool|array true to leave nodes as is, false to remove parent node, array of replacement children
1738     */
1739    abstract public function validateChildren($children, $config, $context);
1740}
1741
1742
1743
1744
1745
1746/**
1747 * Configuration object that triggers customizable behavior.
1748 *
1749 * @warning This class is strongly defined: that means that the class
1750 *          will fail if an undefined directive is retrieved or set.
1751 *
1752 * @note Many classes that could (although many times don't) use the
1753 *       configuration object make it a mandatory parameter.  This is
1754 *       because a configuration object should always be forwarded,
1755 *       otherwise, you run the risk of missing a parameter and then
1756 *       being stumped when a configuration directive doesn't work.
1757 *
1758 * @todo Reconsider some of the public member variables
1759 */
1760class HTMLPurifier_Config
1761{
1762
1763    /**
1764     * HTML Purifier's version
1765     * @type string
1766     */
1767    public $version = '4.10.0';
1768
1769    /**
1770     * Whether or not to automatically finalize
1771     * the object if a read operation is done.
1772     * @type bool
1773     */
1774    public $autoFinalize = true;
1775
1776    // protected member variables
1777
1778    /**
1779     * Namespace indexed array of serials for specific namespaces.
1780     * @see getSerial() for more info.
1781     * @type string[]
1782     */
1783    protected $serials = array();
1784
1785    /**
1786     * Serial for entire configuration object.
1787     * @type string
1788     */
1789    protected $serial;
1790
1791    /**
1792     * Parser for variables.
1793     * @type HTMLPurifier_VarParser_Flexible
1794     */
1795    protected $parser = null;
1796
1797    /**
1798     * Reference HTMLPurifier_ConfigSchema for value checking.
1799     * @type HTMLPurifier_ConfigSchema
1800     * @note This is public for introspective purposes. Please don't
1801     *       abuse!
1802     */
1803    public $def;
1804
1805    /**
1806     * Indexed array of definitions.
1807     * @type HTMLPurifier_Definition[]
1808     */
1809    protected $definitions;
1810
1811    /**
1812     * Whether or not config is finalized.
1813     * @type bool
1814     */
1815    protected $finalized = false;
1816
1817    /**
1818     * Property list containing configuration directives.
1819     * @type array
1820     */
1821    protected $plist;
1822
1823    /**
1824     * Whether or not a set is taking place due to an alias lookup.
1825     * @type bool
1826     */
1827    private $aliasMode;
1828
1829    /**
1830     * Set to false if you do not want line and file numbers in errors.
1831     * (useful when unit testing).  This will also compress some errors
1832     * and exceptions.
1833     * @type bool
1834     */
1835    public $chatty = true;
1836
1837    /**
1838     * Current lock; only gets to this namespace are allowed.
1839     * @type string
1840     */
1841    private $lock;
1842
1843    /**
1844     * Constructor
1845     * @param HTMLPurifier_ConfigSchema $definition ConfigSchema that defines
1846     * what directives are allowed.
1847     * @param HTMLPurifier_PropertyList $parent
1848     */
1849    public function __construct($definition, $parent = null)
1850    {
1851        $parent = $parent ? $parent : $definition->defaultPlist;
1852        $this->plist = new HTMLPurifier_PropertyList($parent);
1853        $this->def = $definition; // keep a copy around for checking
1854        $this->parser = new HTMLPurifier_VarParser_Flexible();
1855    }
1856
1857    /**
1858     * Convenience constructor that creates a config object based on a mixed var
1859     * @param mixed $config Variable that defines the state of the config
1860     *                      object. Can be: a HTMLPurifier_Config() object,
1861     *                      an array of directives based on loadArray(),
1862     *                      or a string filename of an ini file.
1863     * @param HTMLPurifier_ConfigSchema $schema Schema object
1864     * @return HTMLPurifier_Config Configured object
1865     */
1866    public static function create($config, $schema = null)
1867    {
1868        if ($config instanceof HTMLPurifier_Config) {
1869            // pass-through
1870            return $config;
1871        }
1872        if (!$schema) {
1873            $ret = HTMLPurifier_Config::createDefault();
1874        } else {
1875            $ret = new HTMLPurifier_Config($schema);
1876        }
1877        if (is_string($config)) {
1878            $ret->loadIni($config);
1879        } elseif (is_array($config)) $ret->loadArray($config);
1880        return $ret;
1881    }
1882
1883    /**
1884     * Creates a new config object that inherits from a previous one.
1885     * @param HTMLPurifier_Config $config Configuration object to inherit from.
1886     * @return HTMLPurifier_Config object with $config as its parent.
1887     */
1888    public static function inherit(HTMLPurifier_Config $config)
1889    {
1890        return new HTMLPurifier_Config($config->def, $config->plist);
1891    }
1892
1893    /**
1894     * Convenience constructor that creates a default configuration object.
1895     * @return HTMLPurifier_Config default object.
1896     */
1897    public static function createDefault()
1898    {
1899        $definition = HTMLPurifier_ConfigSchema::instance();
1900        $config = new HTMLPurifier_Config($definition);
1901        return $config;
1902    }
1903
1904    /**
1905     * Retrieves a value from the configuration.
1906     *
1907     * @param string $key String key
1908     * @param mixed $a
1909     *
1910     * @return mixed
1911     */
1912    public function get($key, $a = null)
1913    {
1914        if ($a !== null) {
1915            $this->triggerError(
1916                "Using deprecated API: use \$config->get('$key.$a') instead",
1917                E_USER_WARNING
1918            );
1919            $key = "$key.$a";
1920        }
1921        if (!$this->finalized) {
1922            $this->autoFinalize();
1923        }
1924        if (!isset($this->def->info[$key])) {
1925            // can't add % due to SimpleTest bug
1926            $this->triggerError(
1927                'Cannot retrieve value of undefined directive ' . htmlspecialchars($key),
1928                E_USER_WARNING
1929            );
1930            return;
1931        }
1932        if (isset($this->def->info[$key]->isAlias)) {
1933            $d = $this->def->info[$key];
1934            $this->triggerError(
1935                'Cannot get value from aliased directive, use real name ' . $d->key,
1936                E_USER_ERROR
1937            );
1938            return;
1939        }
1940        if ($this->lock) {
1941            list($ns) = explode('.', $key);
1942            if ($ns !== $this->lock) {
1943                $this->triggerError(
1944                    'Cannot get value of namespace ' . $ns . ' when lock for ' .
1945                    $this->lock .
1946                    ' is active, this probably indicates a Definition setup method ' .
1947                    'is accessing directives that are not within its namespace',
1948                    E_USER_ERROR
1949                );
1950                return;
1951            }
1952        }
1953        return $this->plist->get($key);
1954    }
1955
1956    /**
1957     * Retrieves an array of directives to values from a given namespace
1958     *
1959     * @param string $namespace String namespace
1960     *
1961     * @return array
1962     */
1963    public function getBatch($namespace)
1964    {
1965        if (!$this->finalized) {
1966            $this->autoFinalize();
1967        }
1968        $full = $this->getAll();
1969        if (!isset($full[$namespace])) {
1970            $this->triggerError(
1971                'Cannot retrieve undefined namespace ' .
1972                htmlspecialchars($namespace),
1973                E_USER_WARNING
1974            );
1975            return;
1976        }
1977        return $full[$namespace];
1978    }
1979
1980    /**
1981     * Returns a SHA-1 signature of a segment of the configuration object
1982     * that uniquely identifies that particular configuration
1983     *
1984     * @param string $namespace Namespace to get serial for
1985     *
1986     * @return string
1987     * @note Revision is handled specially and is removed from the batch
1988     *       before processing!
1989     */
1990    public function getBatchSerial($namespace)
1991    {
1992        if (empty($this->serials[$namespace])) {
1993            $batch = $this->getBatch($namespace);
1994            unset($batch['DefinitionRev']);
1995            $this->serials[$namespace] = sha1(serialize($batch));
1996        }
1997        return $this->serials[$namespace];
1998    }
1999
2000    /**
2001     * Returns a SHA-1 signature for the entire configuration object
2002     * that uniquely identifies that particular configuration
2003     *
2004     * @return string
2005     */
2006    public function getSerial()
2007    {
2008        if (empty($this->serial)) {
2009            $this->serial = sha1(serialize($this->getAll()));
2010        }
2011        return $this->serial;
2012    }
2013
2014    /**
2015     * Retrieves all directives, organized by namespace
2016     *
2017     * @warning This is a pretty inefficient function, avoid if you can
2018     */
2019    public function getAll()
2020    {
2021        if (!$this->finalized) {
2022            $this->autoFinalize();
2023        }
2024        $ret = array();
2025        foreach ($this->plist->squash() as $name => $value) {
2026            list($ns, $key) = explode('.', $name, 2);
2027            $ret[$ns][$key] = $value;
2028        }
2029        return $ret;
2030    }
2031
2032    /**
2033     * Sets a value to configuration.
2034     *
2035     * @param string $key key
2036     * @param mixed $value value
2037     * @param mixed $a
2038     */
2039    public function set($key, $value, $a = null)
2040    {
2041        if (strpos($key, '.') === false) {
2042            $namespace = $key;
2043            $directive = $value;
2044            $value = $a;
2045            $key = "$key.$directive";
2046            $this->triggerError("Using deprecated API: use \$config->set('$key', ...) instead", E_USER_NOTICE);
2047        } else {
2048            list($namespace) = explode('.', $key);
2049        }
2050        if ($this->isFinalized('Cannot set directive after finalization')) {
2051            return;
2052        }
2053        if (!isset($this->def->info[$key])) {
2054            $this->triggerError(
2055                'Cannot set undefined directive ' . htmlspecialchars($key) . ' to value',
2056                E_USER_WARNING
2057            );
2058            return;
2059        }
2060        $def = $this->def->info[$key];
2061
2062        if (isset($def->isAlias)) {
2063            if ($this->aliasMode) {
2064                $this->triggerError(
2065                    'Double-aliases not allowed, please fix '.
2066                    'ConfigSchema bug with' . $key,
2067                    E_USER_ERROR
2068                );
2069                return;
2070            }
2071            $this->aliasMode = true;
2072            $this->set($def->key, $value);
2073            $this->aliasMode = false;
2074            $this->triggerError("$key is an alias, preferred directive name is {$def->key}", E_USER_NOTICE);
2075            return;
2076        }
2077
2078        // Raw type might be negative when using the fully optimized form
2079        // of stdClass, which indicates allow_null == true
2080        $rtype = is_int($def) ? $def : $def->type;
2081        if ($rtype < 0) {
2082            $type = -$rtype;
2083            $allow_null = true;
2084        } else {
2085            $type = $rtype;
2086            $allow_null = isset($def->allow_null);
2087        }
2088
2089        try {
2090            $value = $this->parser->parse($value, $type, $allow_null);
2091        } catch (HTMLPurifier_VarParserException $e) {
2092            $this->triggerError(
2093                'Value for ' . $key . ' is of invalid type, should be ' .
2094                HTMLPurifier_VarParser::getTypeName($type),
2095                E_USER_WARNING
2096            );
2097            return;
2098        }
2099        if (is_string($value) && is_object($def)) {
2100            // resolve value alias if defined
2101            if (isset($def->aliases[$value])) {
2102                $value = $def->aliases[$value];
2103            }
2104            // check to see if the value is allowed
2105            if (isset($def->allowed) && !isset($def->allowed[$value])) {
2106                $this->triggerError(
2107                    'Value not supported, valid values are: ' .
2108                    $this->_listify($def->allowed),
2109                    E_USER_WARNING
2110                );
2111                return;
2112            }
2113        }
2114        $this->plist->set($key, $value);
2115
2116        // reset definitions if the directives they depend on changed
2117        // this is a very costly process, so it's discouraged
2118        // with finalization
2119        if ($namespace == 'HTML' || $namespace == 'CSS' || $namespace == 'URI') {
2120            $this->definitions[$namespace] = null;
2121        }
2122
2123        $this->serials[$namespace] = false;
2124    }
2125
2126    /**
2127     * Convenience function for error reporting
2128     *
2129     * @param array $lookup
2130     *
2131     * @return string
2132     */
2133    private function _listify($lookup)
2134    {
2135        $list = array();
2136        foreach ($lookup as $name => $b) {
2137            $list[] = $name;
2138        }
2139        return implode(', ', $list);
2140    }
2141
2142    /**
2143     * Retrieves object reference to the HTML definition.
2144     *
2145     * @param bool $raw Return a copy that has not been setup yet. Must be
2146     *             called before it's been setup, otherwise won't work.
2147     * @param bool $optimized If true, this method may return null, to
2148     *             indicate that a cached version of the modified
2149     *             definition object is available and no further edits
2150     *             are necessary.  Consider using
2151     *             maybeGetRawHTMLDefinition, which is more explicitly
2152     *             named, instead.
2153     *
2154     * @return HTMLPurifier_HTMLDefinition
2155     */
2156    public function getHTMLDefinition($raw = false, $optimized = false)
2157    {
2158        return $this->getDefinition('HTML', $raw, $optimized);
2159    }
2160
2161    /**
2162     * Retrieves object reference to the CSS definition
2163     *
2164     * @param bool $raw Return a copy that has not been setup yet. Must be
2165     *             called before it's been setup, otherwise won't work.
2166     * @param bool $optimized If true, this method may return null, to
2167     *             indicate that a cached version of the modified
2168     *             definition object is available and no further edits
2169     *             are necessary.  Consider using
2170     *             maybeGetRawCSSDefinition, which is more explicitly
2171     *             named, instead.
2172     *
2173     * @return HTMLPurifier_CSSDefinition
2174     */
2175    public function getCSSDefinition($raw = false, $optimized = false)
2176    {
2177        return $this->getDefinition('CSS', $raw, $optimized);
2178    }
2179
2180    /**
2181     * Retrieves object reference to the URI definition
2182     *
2183     * @param bool $raw Return a copy that has not been setup yet. Must be
2184     *             called before it's been setup, otherwise won't work.
2185     * @param bool $optimized If true, this method may return null, to
2186     *             indicate that a cached version of the modified
2187     *             definition object is available and no further edits
2188     *             are necessary.  Consider using
2189     *             maybeGetRawURIDefinition, which is more explicitly
2190     *             named, instead.
2191     *
2192     * @return HTMLPurifier_URIDefinition
2193     */
2194    public function getURIDefinition($raw = false, $optimized = false)
2195    {
2196        return $this->getDefinition('URI', $raw, $optimized);
2197    }
2198
2199    /**
2200     * Retrieves a definition
2201     *
2202     * @param string $type Type of definition: HTML, CSS, etc
2203     * @param bool $raw Whether or not definition should be returned raw
2204     * @param bool $optimized Only has an effect when $raw is true.  Whether
2205     *        or not to return null if the result is already present in
2206     *        the cache.  This is off by default for backwards
2207     *        compatibility reasons, but you need to do things this
2208     *        way in order to ensure that caching is done properly.
2209     *        Check out enduser-customize.html for more details.
2210     *        We probably won't ever change this default, as much as the
2211     *        maybe semantics is the "right thing to do."
2212     *
2213     * @throws HTMLPurifier_Exception
2214     * @return HTMLPurifier_Definition
2215     */
2216    public function getDefinition($type, $raw = false, $optimized = false)
2217    {
2218        if ($optimized && !$raw) {
2219            throw new HTMLPurifier_Exception("Cannot set optimized = true when raw = false");
2220        }
2221        if (!$this->finalized) {
2222            $this->autoFinalize();
2223        }
2224        // temporarily suspend locks, so we can handle recursive definition calls
2225        $lock = $this->lock;
2226        $this->lock = null;
2227        $factory = HTMLPurifier_DefinitionCacheFactory::instance();
2228        $cache = $factory->create($type, $this);
2229        $this->lock = $lock;
2230        if (!$raw) {
2231            // full definition
2232            // ---------------
2233            // check if definition is in memory
2234            if (!empty($this->definitions[$type])) {
2235                $def = $this->definitions[$type];
2236                // check if the definition is setup
2237                if ($def->setup) {
2238                    return $def;
2239                } else {
2240                    $def->setup($this);
2241                    if ($def->optimized) {
2242                        $cache->add($def, $this);
2243                    }
2244                    return $def;
2245                }
2246            }
2247            // check if definition is in cache
2248            $def = $cache->get($this);
2249            if ($def) {
2250                // definition in cache, save to memory and return it
2251                $this->definitions[$type] = $def;
2252                return $def;
2253            }
2254            // initialize it
2255            $def = $this->initDefinition($type);
2256            // set it up
2257            $this->lock = $type;
2258            $def->setup($this);
2259            $this->lock = null;
2260            // save in cache
2261            $cache->add($def, $this);
2262            // return it
2263            return $def;
2264        } else {
2265            // raw definition
2266            // --------------
2267            // check preconditions
2268            $def = null;
2269            if ($optimized) {
2270                if (is_null($this->get($type . '.DefinitionID'))) {
2271                    // fatally error out if definition ID not set
2272                    throw new HTMLPurifier_Exception(
2273                        "Cannot retrieve raw version without specifying %$type.DefinitionID"
2274                    );
2275                }
2276            }
2277            if (!empty($this->definitions[$type])) {
2278                $def = $this->definitions[$type];
2279                if ($def->setup && !$optimized) {
2280                    $extra = $this->chatty ?
2281                        " (try moving this code block earlier in your initialization)" :
2282                        "";
2283                    throw new HTMLPurifier_Exception(
2284                        "Cannot retrieve raw definition after it has already been setup" .
2285                        $extra
2286                    );
2287                }
2288                if ($def->optimized === null) {
2289                    $extra = $this->chatty ? " (try flushing your cache)" : "";
2290                    throw new HTMLPurifier_Exception(
2291                        "Optimization status of definition is unknown" . $extra
2292                    );
2293                }
2294                if ($def->optimized !== $optimized) {
2295                    $msg = $optimized ? "optimized" : "unoptimized";
2296                    $extra = $this->chatty ?
2297                        " (this backtrace is for the first inconsistent call, which was for a $msg raw definition)"
2298                        : "";
2299                    throw new HTMLPurifier_Exception(
2300                        "Inconsistent use of optimized and unoptimized raw definition retrievals" . $extra
2301                    );
2302                }
2303            }
2304            // check if definition was in memory
2305            if ($def) {
2306                if ($def->setup) {
2307                    // invariant: $optimized === true (checked above)
2308                    return null;
2309                } else {
2310                    return $def;
2311                }
2312            }
2313            // if optimized, check if definition was in cache
2314            // (because we do the memory check first, this formulation
2315            // is prone to cache slamming, but I think
2316            // guaranteeing that either /all/ of the raw
2317            // setup code or /none/ of it is run is more important.)
2318            if ($optimized) {
2319                // This code path only gets run once; once we put
2320                // something in $definitions (which is guaranteed by the
2321                // trailing code), we always short-circuit above.
2322                $def = $cache->get($this);
2323                if ($def) {
2324                    // save the full definition for later, but don't
2325                    // return it yet
2326                    $this->definitions[$type] = $def;
2327                    return null;
2328                }
2329            }
2330            // check invariants for creation
2331            if (!$optimized) {
2332                if (!is_null($this->get($type . '.DefinitionID'))) {
2333                    if ($this->chatty) {
2334                        $this->triggerError(
2335                            'Due to a documentation error in previous version of HTML Purifier, your ' .
2336                            'definitions are not being cached.  If this is OK, you can remove the ' .
2337                            '%$type.DefinitionRev and %$type.DefinitionID declaration.  Otherwise, ' .
2338                            'modify your code to use maybeGetRawDefinition, and test if the returned ' .
2339                            'value is null before making any edits (if it is null, that means that a ' .
2340                            'cached version is available, and no raw operations are necessary).  See ' .
2341                            '<a href="http://htmlpurifier.org/docs/enduser-customize.html#optimized">' .
2342                            'Customize</a> for more details',
2343                            E_USER_WARNING
2344                        );
2345                    } else {
2346                        $this->triggerError(
2347                            "Useless DefinitionID declaration",
2348                            E_USER_WARNING
2349                        );
2350                    }
2351                }
2352            }
2353            // initialize it
2354            $def = $this->initDefinition($type);
2355            $def->optimized = $optimized;
2356            return $def;
2357        }
2358        throw new HTMLPurifier_Exception("The impossible happened!");
2359    }
2360
2361    /**
2362     * Initialise definition
2363     *
2364     * @param string $type What type of definition to create
2365     *
2366     * @return HTMLPurifier_CSSDefinition|HTMLPurifier_HTMLDefinition|HTMLPurifier_URIDefinition
2367     * @throws HTMLPurifier_Exception
2368     */
2369    private function initDefinition($type)
2370    {
2371        // quick checks failed, let's create the object
2372        if ($type == 'HTML') {
2373            $def = new HTMLPurifier_HTMLDefinition();
2374        } elseif ($type == 'CSS') {
2375            $def = new HTMLPurifier_CSSDefinition();
2376        } elseif ($type == 'URI') {
2377            $def = new HTMLPurifier_URIDefinition();
2378        } else {
2379            throw new HTMLPurifier_Exception(
2380                "Definition of $type type not supported"
2381            );
2382        }
2383        $this->definitions[$type] = $def;
2384        return $def;
2385    }
2386
2387    public function maybeGetRawDefinition($name)
2388    {
2389        return $this->getDefinition($name, true, true);
2390    }
2391
2392    /**
2393     * @return HTMLPurifier_HTMLDefinition
2394     */
2395    public function maybeGetRawHTMLDefinition()
2396    {
2397        return $this->getDefinition('HTML', true, true);
2398    }
2399
2400    /**
2401     * @return HTMLPurifier_CSSDefinition
2402     */
2403    public function maybeGetRawCSSDefinition()
2404    {
2405        return $this->getDefinition('CSS', true, true);
2406    }
2407
2408    /**
2409     * @return HTMLPurifier_URIDefinition
2410     */
2411    public function maybeGetRawURIDefinition()
2412    {
2413        return $this->getDefinition('URI', true, true);
2414    }
2415
2416    /**
2417     * Loads configuration values from an array with the following structure:
2418     * Namespace.Directive => Value
2419     *
2420     * @param array $config_array Configuration associative array
2421     */
2422    public function loadArray($config_array)
2423    {
2424        if ($this->isFinalized('Cannot load directives after finalization')) {
2425            return;
2426        }
2427        foreach ($config_array as $key => $value) {
2428            $key = str_replace('_', '.', $key);
2429            if (strpos($key, '.') !== false) {
2430                $this->set($key, $value);
2431            } else {
2432                $namespace = $key;
2433                $namespace_values = $value;
2434                foreach ($namespace_values as $directive => $value2) {
2435                    $this->set($namespace .'.'. $directive, $value2);
2436                }
2437            }
2438        }
2439    }
2440
2441    /**
2442     * Returns a list of array(namespace, directive) for all directives
2443     * that are allowed in a web-form context as per an allowed
2444     * namespaces/directives list.
2445     *
2446     * @param array $allowed List of allowed namespaces/directives
2447     * @param HTMLPurifier_ConfigSchema $schema Schema to use, if not global copy
2448     *
2449     * @return array
2450     */
2451    public static function getAllowedDirectivesForForm($allowed, $schema = null)
2452    {
2453        if (!$schema) {
2454            $schema = HTMLPurifier_ConfigSchema::instance();
2455        }
2456        if ($allowed !== true) {
2457            if (is_string($allowed)) {
2458                $allowed = array($allowed);
2459            }
2460            $allowed_ns = array();
2461            $allowed_directives = array();
2462            $blacklisted_directives = array();
2463            foreach ($allowed as $ns_or_directive) {
2464                if (strpos($ns_or_directive, '.') !== false) {
2465                    // directive
2466                    if ($ns_or_directive[0] == '-') {
2467                        $blacklisted_directives[substr($ns_or_directive, 1)] = true;
2468                    } else {
2469                        $allowed_directives[$ns_or_directive] = true;
2470                    }
2471                } else {
2472                    // namespace
2473                    $allowed_ns[$ns_or_directive] = true;
2474                }
2475            }
2476        }
2477        $ret = array();
2478        foreach ($schema->info as $key => $def) {
2479            list($ns, $directive) = explode('.', $key, 2);
2480            if ($allowed !== true) {
2481                if (isset($blacklisted_directives["$ns.$directive"])) {
2482                    continue;
2483                }
2484                if (!isset($allowed_directives["$ns.$directive"]) && !isset($allowed_ns[$ns])) {
2485                    continue;
2486                }
2487            }
2488            if (isset($def->isAlias)) {
2489                continue;
2490            }
2491            if ($directive == 'DefinitionID' || $directive == 'DefinitionRev') {
2492                continue;
2493            }
2494            $ret[] = array($ns, $directive);
2495        }
2496        return $ret;
2497    }
2498
2499    /**
2500     * Loads configuration values from $_GET/$_POST that were posted
2501     * via ConfigForm
2502     *
2503     * @param array $array $_GET or $_POST array to import
2504     * @param string|bool $index Index/name that the config variables are in
2505     * @param array|bool $allowed List of allowed namespaces/directives
2506     * @param bool $mq_fix Boolean whether or not to enable magic quotes fix
2507     * @param HTMLPurifier_ConfigSchema $schema Schema to use, if not global copy
2508     *
2509     * @return mixed
2510     */
2511    public static function loadArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true, $schema = null)
2512    {
2513        $ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix, $schema);
2514        $config = HTMLPurifier_Config::create($ret, $schema);
2515        return $config;
2516    }
2517
2518    /**
2519     * Merges in configuration values from $_GET/$_POST to object. NOT STATIC.
2520     *
2521     * @param array $array $_GET or $_POST array to import
2522     * @param string|bool $index Index/name that the config variables are in
2523     * @param array|bool $allowed List of allowed namespaces/directives
2524     * @param bool $mq_fix Boolean whether or not to enable magic quotes fix
2525     */
2526    public function mergeArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true)
2527    {
2528        $ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix, $this->def);
2529        $this->loadArray($ret);
2530    }
2531
2532    /**
2533     * Prepares an array from a form into something usable for the more
2534     * strict parts of HTMLPurifier_Config
2535     *
2536     * @param array $array $_GET or $_POST array to import
2537     * @param string|bool $index Index/name that the config variables are in
2538     * @param array|bool $allowed List of allowed namespaces/directives
2539     * @param bool $mq_fix Boolean whether or not to enable magic quotes fix
2540     * @param HTMLPurifier_ConfigSchema $schema Schema to use, if not global copy
2541     *
2542     * @return array
2543     */
2544    public static function prepareArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true, $schema = null)
2545    {
2546        if ($index !== false) {
2547            $array = (isset($array[$index]) && is_array($array[$index])) ? $array[$index] : array();
2548        }
2549
2550        $allowed = HTMLPurifier_Config::getAllowedDirectivesForForm($allowed, $schema);
2551        $ret = array();
2552        foreach ($allowed as $key) {
2553            list($ns, $directive) = $key;
2554            $skey = "$ns.$directive";
2555            if (!empty($array["Null_$skey"])) {
2556                $ret[$ns][$directive] = null;
2557                continue;
2558            }
2559            if (!isset($array[$skey])) {
2560                continue;
2561            }
2562            $ret[$ns][$directive] = $array[$skey];
2563        }
2564        return $ret;
2565    }
2566
2567    /**
2568     * Loads configuration values from an ini file
2569     *
2570     * @param string $filename Name of ini file
2571     */
2572    public function loadIni($filename)
2573    {
2574        if ($this->isFinalized('Cannot load directives after finalization')) {
2575            return;
2576        }
2577        $array = parse_ini_file($filename, true);
2578        $this->loadArray($array);
2579    }
2580
2581    /**
2582     * Checks whether or not the configuration object is finalized.
2583     *
2584     * @param string|bool $error String error message, or false for no error
2585     *
2586     * @return bool
2587     */
2588    public function isFinalized($error = false)
2589    {
2590        if ($this->finalized && $error) {
2591            $this->triggerError($error, E_USER_ERROR);
2592        }
2593        return $this->finalized;
2594    }
2595
2596    /**
2597     * Finalizes configuration only if auto finalize is on and not
2598     * already finalized
2599     */
2600    public function autoFinalize()
2601    {
2602        if ($this->autoFinalize) {
2603            $this->finalize();
2604        } else {
2605            $this->plist->squash(true);
2606        }
2607    }
2608
2609    /**
2610     * Finalizes a configuration object, prohibiting further change
2611     */
2612    public function finalize()
2613    {
2614        $this->finalized = true;
2615        $this->parser = null;
2616    }
2617
2618    /**
2619     * Produces a nicely formatted error message by supplying the
2620     * stack frame information OUTSIDE of HTMLPurifier_Config.
2621     *
2622     * @param string $msg An error message
2623     * @param int $no An error number
2624     */
2625    protected function triggerError($msg, $no)
2626    {
2627        // determine previous stack frame
2628        $extra = '';
2629        if ($this->chatty) {
2630            $trace = debug_backtrace();
2631            // zip(tail(trace), trace) -- but PHP is not Haskell har har
2632            for ($i = 0, $c = count($trace); $i < $c - 1; $i++) {
2633                // XXX this is not correct on some versions of HTML Purifier
2634                if ($trace[$i + 1]['class'] === 'HTMLPurifier_Config') {
2635                    continue;
2636                }
2637                $frame = $trace[$i];
2638                $extra = " invoked on line {$frame['line']} in file {$frame['file']}";
2639                break;
2640            }
2641        }
2642        trigger_error($msg . $extra, $no);
2643    }
2644
2645    /**
2646     * Returns a serialized form of the configuration object that can
2647     * be reconstituted.
2648     *
2649     * @return string
2650     */
2651    public function serialize()
2652    {
2653        $this->getDefinition('HTML');
2654        $this->getDefinition('CSS');
2655        $this->getDefinition('URI');
2656        return serialize($this);
2657    }
2658
2659}
2660
2661
2662
2663
2664
2665/**
2666 * Configuration definition, defines directives and their defaults.
2667 */
2668class HTMLPurifier_ConfigSchema
2669{
2670    /**
2671     * Defaults of the directives and namespaces.
2672     * @type array
2673     * @note This shares the exact same structure as HTMLPurifier_Config::$conf
2674     */
2675    public $defaults = array();
2676
2677    /**
2678     * The default property list. Do not edit this property list.
2679     * @type array
2680     */
2681    public $defaultPlist;
2682
2683    /**
2684     * Definition of the directives.
2685     * The structure of this is:
2686     *
2687     *  array(
2688     *      'Namespace' => array(
2689     *          'Directive' => new stdClass(),
2690     *      )
2691     *  )
2692     *
2693     * The stdClass may have the following properties:
2694     *
2695     *  - If isAlias isn't set:
2696     *      - type: Integer type of directive, see HTMLPurifier_VarParser for definitions
2697     *      - allow_null: If set, this directive allows null values
2698     *      - aliases: If set, an associative array of value aliases to real values
2699     *      - allowed: If set, a lookup array of allowed (string) values
2700     *  - If isAlias is set:
2701     *      - namespace: Namespace this directive aliases to
2702     *      - name: Directive name this directive aliases to
2703     *
2704     * In certain degenerate cases, stdClass will actually be an integer. In
2705     * that case, the value is equivalent to an stdClass with the type
2706     * property set to the integer. If the integer is negative, type is
2707     * equal to the absolute value of integer, and allow_null is true.
2708     *
2709     * This class is friendly with HTMLPurifier_Config. If you need introspection
2710     * about the schema, you're better of using the ConfigSchema_Interchange,
2711     * which uses more memory but has much richer information.
2712     * @type array
2713     */
2714    public $info = array();
2715
2716    /**
2717     * Application-wide singleton
2718     * @type HTMLPurifier_ConfigSchema
2719     */
2720    protected static $singleton;
2721
2722    public function __construct()
2723    {
2724        $this->defaultPlist = new HTMLPurifier_PropertyList();
2725    }
2726
2727    /**
2728     * Unserializes the default ConfigSchema.
2729     * @return HTMLPurifier_ConfigSchema
2730     */
2731    public static function makeFromSerial()
2732    {
2733        $contents = file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema.ser');
2734        $r = unserialize($contents);
2735        if (!$r) {
2736            $hash = sha1($contents);
2737            trigger_error("Unserialization of configuration schema failed, sha1 of file was $hash", E_USER_ERROR);
2738        }
2739        return $r;
2740    }
2741
2742    /**
2743     * Retrieves an instance of the application-wide configuration definition.
2744     * @param HTMLPurifier_ConfigSchema $prototype
2745     * @return HTMLPurifier_ConfigSchema
2746     */
2747    public static function instance($prototype = null)
2748    {
2749        if ($prototype !== null) {
2750            HTMLPurifier_ConfigSchema::$singleton = $prototype;
2751        } elseif (HTMLPurifier_ConfigSchema::$singleton === null || $prototype === true) {
2752            HTMLPurifier_ConfigSchema::$singleton = HTMLPurifier_ConfigSchema::makeFromSerial();
2753        }
2754        return HTMLPurifier_ConfigSchema::$singleton;
2755    }
2756
2757    /**
2758     * Defines a directive for configuration
2759     * @warning Will fail of directive's namespace is defined.
2760     * @warning This method's signature is slightly different from the legacy
2761     *          define() static method! Beware!
2762     * @param string $key Name of directive
2763     * @param mixed $default Default value of directive
2764     * @param string $type Allowed type of the directive. See
2765     *      HTMLPurifier_DirectiveDef::$type for allowed values
2766     * @param bool $allow_null Whether or not to allow null values
2767     */
2768    public function add($key, $default, $type, $allow_null)
2769    {
2770        $obj = new stdClass();
2771        $obj->type = is_int($type) ? $type : HTMLPurifier_VarParser::$types[$type];
2772        if ($allow_null) {
2773            $obj->allow_null = true;
2774        }
2775        $this->info[$key] = $obj;
2776        $this->defaults[$key] = $default;
2777        $this->defaultPlist->set($key, $default);
2778    }
2779
2780    /**
2781     * Defines a directive value alias.
2782     *
2783     * Directive value aliases are convenient for developers because it lets
2784     * them set a directive to several values and get the same result.
2785     * @param string $key Name of Directive
2786     * @param array $aliases Hash of aliased values to the real alias
2787     */
2788    public function addValueAliases($key, $aliases)
2789    {
2790        if (!isset($this->info[$key]->aliases)) {
2791            $this->info[$key]->aliases = array();
2792        }
2793        foreach ($aliases as $alias => $real) {
2794            $this->info[$key]->aliases[$alias] = $real;
2795        }
2796    }
2797
2798    /**
2799     * Defines a set of allowed values for a directive.
2800     * @warning This is slightly different from the corresponding static
2801     *          method definition.
2802     * @param string $key Name of directive
2803     * @param array $allowed Lookup array of allowed values
2804     */
2805    public function addAllowedValues($key, $allowed)
2806    {
2807        $this->info[$key]->allowed = $allowed;
2808    }
2809
2810    /**
2811     * Defines a directive alias for backwards compatibility
2812     * @param string $key Directive that will be aliased
2813     * @param string $new_key Directive that the alias will be to
2814     */
2815    public function addAlias($key, $new_key)
2816    {
2817        $obj = new stdClass;
2818        $obj->key = $new_key;
2819        $obj->isAlias = true;
2820        $this->info[$key] = $obj;
2821    }
2822
2823    /**
2824     * Replaces any stdClass that only has the type property with type integer.
2825     */
2826    public function postProcess()
2827    {
2828        foreach ($this->info as $key => $v) {
2829            if (count((array) $v) == 1) {
2830                $this->info[$key] = $v->type;
2831            } elseif (count((array) $v) == 2 && isset($v->allow_null)) {
2832                $this->info[$key] = -$v->type;
2833            }
2834        }
2835    }
2836}
2837
2838
2839
2840
2841
2842/**
2843 * @todo Unit test
2844 */
2845class HTMLPurifier_ContentSets
2846{
2847
2848    /**
2849     * List of content set strings (pipe separators) indexed by name.
2850     * @type array
2851     */
2852    public $info = array();
2853
2854    /**
2855     * List of content set lookups (element => true) indexed by name.
2856     * @type array
2857     * @note This is in HTMLPurifier_HTMLDefinition->info_content_sets
2858     */
2859    public $lookup = array();
2860
2861    /**
2862     * Synchronized list of defined content sets (keys of info).
2863     * @type array
2864     */
2865    protected $keys = array();
2866    /**
2867     * Synchronized list of defined content values (values of info).
2868     * @type array
2869     */
2870    protected $values = array();
2871
2872    /**
2873     * Merges in module's content sets, expands identifiers in the content
2874     * sets and populates the keys, values and lookup member variables.
2875     * @param HTMLPurifier_HTMLModule[] $modules List of HTMLPurifier_HTMLModule
2876     */
2877    public function __construct($modules)
2878    {
2879        if (!is_array($modules)) {
2880            $modules = array($modules);
2881        }
2882        // populate content_sets based on module hints
2883        // sorry, no way of overloading
2884        foreach ($modules as $module) {
2885            foreach ($module->content_sets as $key => $value) {
2886                $temp = $this->convertToLookup($value);
2887                if (isset($this->lookup[$key])) {
2888                    // add it into the existing content set
2889                    $this->lookup[$key] = array_merge($this->lookup[$key], $temp);
2890                } else {
2891                    $this->lookup[$key] = $temp;
2892                }
2893            }
2894        }
2895        $old_lookup = false;
2896        while ($old_lookup !== $this->lookup) {
2897            $old_lookup = $this->lookup;
2898            foreach ($this->lookup as $i => $set) {
2899                $add = array();
2900                foreach ($set as $element => $x) {
2901                    if (isset($this->lookup[$element])) {
2902                        $add += $this->lookup[$element];
2903                        unset($this->lookup[$i][$element]);
2904                    }
2905                }
2906                $this->lookup[$i] += $add;
2907            }
2908        }
2909
2910        foreach ($this->lookup as $key => $lookup) {
2911            $this->info[$key] = implode(' | ', array_keys($lookup));
2912        }
2913        $this->keys   = array_keys($this->info);
2914        $this->values = array_values($this->info);
2915    }
2916
2917    /**
2918     * Accepts a definition; generates and assigns a ChildDef for it
2919     * @param HTMLPurifier_ElementDef $def HTMLPurifier_ElementDef reference
2920     * @param HTMLPurifier_HTMLModule $module Module that defined the ElementDef
2921     */
2922    public function generateChildDef(&$def, $module)
2923    {
2924        if (!empty($def->child)) { // already done!
2925            return;
2926        }
2927        $content_model = $def->content_model;
2928        if (is_string($content_model)) {
2929            // Assume that $this->keys is alphanumeric
2930            $def->content_model = preg_replace_callback(
2931                '/\b(' . implode('|', $this->keys) . ')\b/',
2932                array($this, 'generateChildDefCallback'),
2933                $content_model
2934            );
2935            //$def->content_model = str_replace(
2936            //    $this->keys, $this->values, $content_model);
2937        }
2938        $def->child = $this->getChildDef($def, $module);
2939    }
2940
2941    public function generateChildDefCallback($matches)
2942    {
2943        return $this->info[$matches[0]];
2944    }
2945
2946    /**
2947     * Instantiates a ChildDef based on content_model and content_model_type
2948     * member variables in HTMLPurifier_ElementDef
2949     * @note This will also defer to modules for custom HTMLPurifier_ChildDef
2950     *       subclasses that need content set expansion
2951     * @param HTMLPurifier_ElementDef $def HTMLPurifier_ElementDef to have ChildDef extracted
2952     * @param HTMLPurifier_HTMLModule $module Module that defined the ElementDef
2953     * @return HTMLPurifier_ChildDef corresponding to ElementDef
2954     */
2955    public function getChildDef($def, $module)
2956    {
2957        $value = $def->content_model;
2958        if (is_object($value)) {
2959            trigger_error(
2960                'Literal object child definitions should be stored in '.
2961                'ElementDef->child not ElementDef->content_model',
2962                E_USER_NOTICE
2963            );
2964            return $value;
2965        }
2966        switch ($def->content_model_type) {
2967            case 'required':
2968                return new HTMLPurifier_ChildDef_Required($value);
2969            case 'optional':
2970                return new HTMLPurifier_ChildDef_Optional($value);
2971            case 'empty':
2972                return new HTMLPurifier_ChildDef_Empty();
2973            case 'custom':
2974                return new HTMLPurifier_ChildDef_Custom($value);
2975        }
2976        // defer to its module
2977        $return = false;
2978        if ($module->defines_child_def) { // save a func call
2979            $return = $module->getChildDef($def);
2980        }
2981        if ($return !== false) {
2982            return $return;
2983        }
2984        // error-out
2985        trigger_error(
2986            'Could not determine which ChildDef class to instantiate',
2987            E_USER_ERROR
2988        );
2989        return false;
2990    }
2991
2992    /**
2993     * Converts a string list of elements separated by pipes into
2994     * a lookup array.
2995     * @param string $string List of elements
2996     * @return array Lookup array of elements
2997     */
2998    protected function convertToLookup($string)
2999    {
3000        $array = explode('|', str_replace(' ', '', $string));
3001        $ret = array();
3002        foreach ($array as $k) {
3003            $ret[$k] = true;
3004        }
3005        return $ret;
3006    }
3007}
3008
3009
3010
3011
3012
3013/**
3014 * Registry object that contains information about the current context.
3015 * @warning Is a bit buggy when variables are set to null: it thinks
3016 *          they don't exist! So use false instead, please.
3017 * @note Since the variables Context deals with may not be objects,
3018 *       references are very important here! Do not remove!
3019 */
3020class HTMLPurifier_Context
3021{
3022
3023    /**
3024     * Private array that stores the references.
3025     * @type array
3026     */
3027    private $_storage = array();
3028
3029    /**
3030     * Registers a variable into the context.
3031     * @param string $name String name
3032     * @param mixed $ref Reference to variable to be registered
3033     */
3034    public function register($name, &$ref)
3035    {
3036        if (array_key_exists($name, $this->_storage)) {
3037            trigger_error(
3038                "Name $name produces collision, cannot re-register",
3039                E_USER_ERROR
3040            );
3041            return;
3042        }
3043        $this->_storage[$name] =& $ref;
3044    }
3045
3046    /**
3047     * Retrieves a variable reference from the context.
3048     * @param string $name String name
3049     * @param bool $ignore_error Boolean whether or not to ignore error
3050     * @return mixed
3051     */
3052    public function &get($name, $ignore_error = false)
3053    {
3054        if (!array_key_exists($name, $this->_storage)) {
3055            if (!$ignore_error) {
3056                trigger_error(
3057                    "Attempted to retrieve non-existent variable $name",
3058                    E_USER_ERROR
3059                );
3060            }
3061            $var = null; // so we can return by reference
3062            return $var;
3063        }
3064        return $this->_storage[$name];
3065    }
3066
3067    /**
3068     * Destroys a variable in the context.
3069     * @param string $name String name
3070     */
3071    public function destroy($name)
3072    {
3073        if (!array_key_exists($name, $this->_storage)) {
3074            trigger_error(
3075                "Attempted to destroy non-existent variable $name",
3076                E_USER_ERROR
3077            );
3078            return;
3079        }
3080        unset($this->_storage[$name]);
3081    }
3082
3083    /**
3084     * Checks whether or not the variable exists.
3085     * @param string $name String name
3086     * @return bool
3087     */
3088    public function exists($name)
3089    {
3090        return array_key_exists($name, $this->_storage);
3091    }
3092
3093    /**
3094     * Loads a series of variables from an associative array
3095     * @param array $context_array Assoc array of variables to load
3096     */
3097    public function loadArray($context_array)
3098    {
3099        foreach ($context_array as $key => $discard) {
3100            $this->register($key, $context_array[$key]);
3101        }
3102    }
3103}
3104
3105
3106
3107
3108
3109/**
3110 * Abstract class representing Definition cache managers that implements
3111 * useful common methods and is a factory.
3112 * @todo Create a separate maintenance file advanced users can use to
3113 *       cache their custom HTMLDefinition, which can be loaded
3114 *       via a configuration directive
3115 * @todo Implement memcached
3116 */
3117abstract class HTMLPurifier_DefinitionCache
3118{
3119    /**
3120     * @type string
3121     */
3122    public $type;
3123
3124    /**
3125     * @param string $type Type of definition objects this instance of the
3126     *      cache will handle.
3127     */
3128    public function __construct($type)
3129    {
3130        $this->type = $type;
3131    }
3132
3133    /**
3134     * Generates a unique identifier for a particular configuration
3135     * @param HTMLPurifier_Config $config Instance of HTMLPurifier_Config
3136     * @return string
3137     */
3138    public function generateKey($config)
3139    {
3140        return $config->version . ',' . // possibly replace with function calls
3141            $config->getBatchSerial($this->type) . ',' .
3142            $config->get($this->type . '.DefinitionRev');
3143    }
3144
3145    /**
3146     * Tests whether or not a key is old with respect to the configuration's
3147     * version and revision number.
3148     * @param string $key Key to test
3149     * @param HTMLPurifier_Config $config Instance of HTMLPurifier_Config to test against
3150     * @return bool
3151     */
3152    public function isOld($key, $config)
3153    {
3154        if (substr_count($key, ',') < 2) {
3155            return true;
3156        }
3157        list($version, $hash, $revision) = explode(',', $key, 3);
3158        $compare = version_compare($version, $config->version);
3159        // version mismatch, is always old
3160        if ($compare != 0) {
3161            return true;
3162        }
3163        // versions match, ids match, check revision number
3164        if ($hash == $config->getBatchSerial($this->type) &&
3165            $revision < $config->get($this->type . '.DefinitionRev')) {
3166            return true;
3167        }
3168        return false;
3169    }
3170
3171    /**
3172     * Checks if a definition's type jives with the cache's type
3173     * @note Throws an error on failure
3174     * @param HTMLPurifier_Definition $def Definition object to check
3175     * @return bool true if good, false if not
3176     */
3177    public function checkDefType($def)
3178    {
3179        if ($def->type !== $this->type) {
3180            trigger_error("Cannot use definition of type {$def->type} in cache for {$this->type}");
3181            return false;
3182        }
3183        return true;
3184    }
3185
3186    /**
3187     * Adds a definition object to the cache
3188     * @param HTMLPurifier_Definition $def
3189     * @param HTMLPurifier_Config $config
3190     */
3191    abstract public function add($def, $config);
3192
3193    /**
3194     * Unconditionally saves a definition object to the cache
3195     * @param HTMLPurifier_Definition $def
3196     * @param HTMLPurifier_Config $config
3197     */
3198    abstract public function set($def, $config);
3199
3200    /**
3201     * Replace an object in the cache
3202     * @param HTMLPurifier_Definition $def
3203     * @param HTMLPurifier_Config $config
3204     */
3205    abstract public function replace($def, $config);
3206
3207    /**
3208     * Retrieves a definition object from the cache
3209     * @param HTMLPurifier_Config $config
3210     */
3211    abstract public function get($config);
3212
3213    /**
3214     * Removes a definition object to the cache
3215     * @param HTMLPurifier_Config $config
3216     */
3217    abstract public function remove($config);
3218
3219    /**
3220     * Clears all objects from cache
3221     * @param HTMLPurifier_Config $config
3222     */
3223    abstract public function flush($config);
3224
3225    /**
3226     * Clears all expired (older version or revision) objects from cache
3227     * @note Be careful implementing this method as flush. Flush must
3228     *       not interfere with other Definition types, and cleanup()
3229     *       should not be repeatedly called by userland code.
3230     * @param HTMLPurifier_Config $config
3231     */
3232    abstract public function cleanup($config);
3233}
3234
3235
3236
3237
3238
3239/**
3240 * Responsible for creating definition caches.
3241 */
3242class HTMLPurifier_DefinitionCacheFactory
3243{
3244    /**
3245     * @type array
3246     */
3247    protected $caches = array('Serializer' => array());
3248
3249    /**
3250     * @type array
3251     */
3252    protected $implementations = array();
3253
3254    /**
3255     * @type HTMLPurifier_DefinitionCache_Decorator[]
3256     */
3257    protected $decorators = array();
3258
3259    /**
3260     * Initialize default decorators
3261     */
3262    public function setup()
3263    {
3264        $this->addDecorator('Cleanup');
3265    }
3266
3267    /**
3268     * Retrieves an instance of global definition cache factory.
3269     * @param HTMLPurifier_DefinitionCacheFactory $prototype
3270     * @return HTMLPurifier_DefinitionCacheFactory
3271     */
3272    public static function instance($prototype = null)
3273    {
3274        static $instance;
3275        if ($prototype !== null) {
3276            $instance = $prototype;
3277        } elseif ($instance === null || $prototype === true) {
3278            $instance = new HTMLPurifier_DefinitionCacheFactory();
3279            $instance->setup();
3280        }
3281        return $instance;
3282    }
3283
3284    /**
3285     * Registers a new definition cache object
3286     * @param string $short Short name of cache object, for reference
3287     * @param string $long Full class name of cache object, for construction
3288     */
3289    public function register($short, $long)
3290    {
3291        $this->implementations[$short] = $long;
3292    }
3293
3294    /**
3295     * Factory method that creates a cache object based on configuration
3296     * @param string $type Name of definitions handled by cache
3297     * @param HTMLPurifier_Config $config Config instance
3298     * @return mixed
3299     */
3300    public function create($type, $config)
3301    {
3302        $method = $config->get('Cache.DefinitionImpl');
3303        if ($method === null) {
3304            return new HTMLPurifier_DefinitionCache_Null($type);
3305        }
3306        if (!empty($this->caches[$method][$type])) {
3307            return $this->caches[$method][$type];
3308        }
3309        if (isset($this->implementations[$method]) &&
3310            class_exists($class = $this->implementations[$method], false)) {
3311            $cache = new $class($type);
3312        } else {
3313            if ($method != 'Serializer') {
3314                trigger_error("Unrecognized DefinitionCache $method, using Serializer instead", E_USER_WARNING);
3315            }
3316            $cache = new HTMLPurifier_DefinitionCache_Serializer($type);
3317        }
3318        foreach ($this->decorators as $decorator) {
3319            $new_cache = $decorator->decorate($cache);
3320            // prevent infinite recursion in PHP 4
3321            unset($cache);
3322            $cache = $new_cache;
3323        }
3324        $this->caches[$method][$type] = $cache;
3325        return $this->caches[$method][$type];
3326    }
3327
3328    /**
3329     * Registers a decorator to add to all new cache objects
3330     * @param HTMLPurifier_DefinitionCache_Decorator|string $decorator An instance or the name of a decorator
3331     */
3332    public function addDecorator($decorator)
3333    {
3334        if (is_string($decorator)) {
3335            $class = "HTMLPurifier_DefinitionCache_Decorator_$decorator";
3336            $decorator = new $class;
3337        }
3338        $this->decorators[$decorator->name] = $decorator;
3339    }
3340}
3341
3342
3343
3344
3345
3346/**
3347 * Represents a document type, contains information on which modules
3348 * need to be loaded.
3349 * @note This class is inspected by Printer_HTMLDefinition->renderDoctype.
3350 *       If structure changes, please update that function.
3351 */
3352class HTMLPurifier_Doctype
3353{
3354    /**
3355     * Full name of doctype
3356     * @type string
3357     */
3358    public $name;
3359
3360    /**
3361     * List of standard modules (string identifiers or literal objects)
3362     * that this doctype uses
3363     * @type array
3364     */
3365    public $modules = array();
3366
3367    /**
3368     * List of modules to use for tidying up code
3369     * @type array
3370     */
3371    public $tidyModules = array();
3372
3373    /**
3374     * Is the language derived from XML (i.e. XHTML)?
3375     * @type bool
3376     */
3377    public $xml = true;
3378
3379    /**
3380     * List of aliases for this doctype
3381     * @type array
3382     */
3383    public $aliases = array();
3384
3385    /**
3386     * Public DTD identifier
3387     * @type string
3388     */
3389    public $dtdPublic;
3390
3391    /**
3392     * System DTD identifier
3393     * @type string
3394     */
3395    public $dtdSystem;
3396
3397    public function __construct(
3398        $name = null,
3399        $xml = true,
3400        $modules = array(),
3401        $tidyModules = array(),
3402        $aliases = array(),
3403        $dtd_public = null,
3404        $dtd_system = null
3405    ) {
3406        $this->name         = $name;
3407        $this->xml          = $xml;
3408        $this->modules      = $modules;
3409        $this->tidyModules  = $tidyModules;
3410        $this->aliases      = $aliases;
3411        $this->dtdPublic    = $dtd_public;
3412        $this->dtdSystem    = $dtd_system;
3413    }
3414}
3415
3416
3417
3418
3419
3420class HTMLPurifier_DoctypeRegistry
3421{
3422
3423    /**
3424     * Hash of doctype names to doctype objects.
3425     * @type array
3426     */
3427    protected $doctypes;
3428
3429    /**
3430     * Lookup table of aliases to real doctype names.
3431     * @type array
3432     */
3433    protected $aliases;
3434
3435    /**
3436     * Registers a doctype to the registry
3437     * @note Accepts a fully-formed doctype object, or the
3438     *       parameters for constructing a doctype object
3439     * @param string $doctype Name of doctype or literal doctype object
3440     * @param bool $xml
3441     * @param array $modules Modules doctype will load
3442     * @param array $tidy_modules Modules doctype will load for certain modes
3443     * @param array $aliases Alias names for doctype
3444     * @param string $dtd_public
3445     * @param string $dtd_system
3446     * @return HTMLPurifier_Doctype Editable registered doctype
3447     */
3448    public function register(
3449        $doctype,
3450        $xml = true,
3451        $modules = array(),
3452        $tidy_modules = array(),
3453        $aliases = array(),
3454        $dtd_public = null,
3455        $dtd_system = null
3456    ) {
3457        if (!is_array($modules)) {
3458            $modules = array($modules);
3459        }
3460        if (!is_array($tidy_modules)) {
3461            $tidy_modules = array($tidy_modules);
3462        }
3463        if (!is_array($aliases)) {
3464            $aliases = array($aliases);
3465        }
3466        if (!is_object($doctype)) {
3467            $doctype = new HTMLPurifier_Doctype(
3468                $doctype,
3469                $xml,
3470                $modules,
3471                $tidy_modules,
3472                $aliases,
3473                $dtd_public,
3474                $dtd_system
3475            );
3476        }
3477        $this->doctypes[$doctype->name] = $doctype;
3478        $name = $doctype->name;
3479        // hookup aliases
3480        foreach ($doctype->aliases as $alias) {
3481            if (isset($this->doctypes[$alias])) {
3482                continue;
3483            }
3484            $this->aliases[$alias] = $name;
3485        }
3486        // remove old aliases
3487        if (isset($this->aliases[$name])) {
3488            unset($this->aliases[$name]);
3489        }
3490        return $doctype;
3491    }
3492
3493    /**
3494     * Retrieves reference to a doctype of a certain name
3495     * @note This function resolves aliases
3496     * @note When possible, use the more fully-featured make()
3497     * @param string $doctype Name of doctype
3498     * @return HTMLPurifier_Doctype Editable doctype object
3499     */
3500    public function get($doctype)
3501    {
3502        if (isset($this->aliases[$doctype])) {
3503            $doctype = $this->aliases[$doctype];
3504        }
3505        if (!isset($this->doctypes[$doctype])) {
3506            trigger_error('Doctype ' . htmlspecialchars($doctype) . ' does not exist', E_USER_ERROR);
3507            $anon = new HTMLPurifier_Doctype($doctype);
3508            return $anon;
3509        }
3510        return $this->doctypes[$doctype];
3511    }
3512
3513    /**
3514     * Creates a doctype based on a configuration object,
3515     * will perform initialization on the doctype
3516     * @note Use this function to get a copy of doctype that config
3517     *       can hold on to (this is necessary in order to tell
3518     *       Generator whether or not the current document is XML
3519     *       based or not).
3520     * @param HTMLPurifier_Config $config
3521     * @return HTMLPurifier_Doctype
3522     */
3523    public function make($config)
3524    {
3525        return clone $this->get($this->getDoctypeFromConfig($config));
3526    }
3527
3528    /**
3529     * Retrieves the doctype from the configuration object
3530     * @param HTMLPurifier_Config $config
3531     * @return string
3532     */
3533    public function getDoctypeFromConfig($config)
3534    {
3535        // recommended test
3536        $doctype = $config->get('HTML.Doctype');
3537        if (!empty($doctype)) {
3538            return $doctype;
3539        }
3540        $doctype = $config->get('HTML.CustomDoctype');
3541        if (!empty($doctype)) {
3542            return $doctype;
3543        }
3544        // backwards-compatibility
3545        if ($config->get('HTML.XHTML')) {
3546            $doctype = 'XHTML 1.0';
3547        } else {
3548            $doctype = 'HTML 4.01';
3549        }
3550        if ($config->get('HTML.Strict')) {
3551            $doctype .= ' Strict';
3552        } else {
3553            $doctype .= ' Transitional';
3554        }
3555        return $doctype;
3556    }
3557}
3558
3559
3560
3561
3562
3563/**
3564 * Structure that stores an HTML element definition. Used by
3565 * HTMLPurifier_HTMLDefinition and HTMLPurifier_HTMLModule.
3566 * @note This class is inspected by HTMLPurifier_Printer_HTMLDefinition.
3567 *       Please update that class too.
3568 * @warning If you add new properties to this class, you MUST update
3569 *          the mergeIn() method.
3570 */
3571class HTMLPurifier_ElementDef
3572{
3573    /**
3574     * Does the definition work by itself, or is it created solely
3575     * for the purpose of merging into another definition?
3576     * @type bool
3577     */
3578    public $standalone = true;
3579
3580    /**
3581     * Associative array of attribute name to HTMLPurifier_AttrDef.
3582     * @type array
3583     * @note Before being processed by HTMLPurifier_AttrCollections
3584     *       when modules are finalized during
3585     *       HTMLPurifier_HTMLDefinition->setup(), this array may also
3586     *       contain an array at index 0 that indicates which attribute
3587     *       collections to load into the full array. It may also
3588     *       contain string indentifiers in lieu of HTMLPurifier_AttrDef,
3589     *       see HTMLPurifier_AttrTypes on how they are expanded during
3590     *       HTMLPurifier_HTMLDefinition->setup() processing.
3591     */
3592    public $attr = array();
3593
3594    // XXX: Design note: currently, it's not possible to override
3595    // previously defined AttrTransforms without messing around with
3596    // the final generated config. This is by design; a previous version
3597    // used an associated list of attr_transform, but it was extremely
3598    // easy to accidentally override other attribute transforms by
3599    // forgetting to specify an index (and just using 0.)  While we
3600    // could check this by checking the index number and complaining,
3601    // there is a second problem which is that it is not at all easy to
3602    // tell when something is getting overridden. Combine this with a
3603    // codebase where this isn't really being used, and it's perfect for
3604    // nuking.
3605
3606    /**
3607     * List of tags HTMLPurifier_AttrTransform to be done before validation.
3608     * @type array
3609     */
3610    public $attr_transform_pre = array();
3611
3612    /**
3613     * List of tags HTMLPurifier_AttrTransform to be done after validation.
3614     * @type array
3615     */
3616    public $attr_transform_post = array();
3617
3618    /**
3619     * HTMLPurifier_ChildDef of this tag.
3620     * @type HTMLPurifier_ChildDef
3621     */
3622    public $child;
3623
3624    /**
3625     * Abstract string representation of internal ChildDef rules.
3626     * @see HTMLPurifier_ContentSets for how this is parsed and then transformed
3627     * into an HTMLPurifier_ChildDef.
3628     * @warning This is a temporary variable that is not available after
3629     *      being processed by HTMLDefinition
3630     * @type string
3631     */
3632    public $content_model;
3633
3634    /**
3635     * Value of $child->type, used to determine which ChildDef to use,
3636     * used in combination with $content_model.
3637     * @warning This must be lowercase
3638     * @warning This is a temporary variable that is not available after
3639     *      being processed by HTMLDefinition
3640     * @type string
3641     */
3642    public $content_model_type;
3643
3644    /**
3645     * Does the element have a content model (#PCDATA | Inline)*? This
3646     * is important for chameleon ins and del processing in
3647     * HTMLPurifier_ChildDef_Chameleon. Dynamically set: modules don't
3648     * have to worry about this one.
3649     * @type bool
3650     */
3651    public $descendants_are_inline = false;
3652
3653    /**
3654     * List of the names of required attributes this element has.
3655     * Dynamically populated by HTMLPurifier_HTMLDefinition::getElement()
3656     * @type array
3657     */
3658    public $required_attr = array();
3659
3660    /**
3661     * Lookup table of tags excluded from all descendants of this tag.
3662     * @type array
3663     * @note SGML permits exclusions for all descendants, but this is
3664     *       not possible with DTDs or XML Schemas. W3C has elected to
3665     *       use complicated compositions of content_models to simulate
3666     *       exclusion for children, but we go the simpler, SGML-style
3667     *       route of flat-out exclusions, which correctly apply to
3668     *       all descendants and not just children. Note that the XHTML
3669     *       Modularization Abstract Modules are blithely unaware of such
3670     *       distinctions.
3671     */
3672    public $excludes = array();
3673
3674    /**
3675     * This tag is explicitly auto-closed by the following tags.
3676     * @type array
3677     */
3678    public $autoclose = array();
3679
3680    /**
3681     * If a foreign element is found in this element, test if it is
3682     * allowed by this sub-element; if it is, instead of closing the
3683     * current element, place it inside this element.
3684     * @type string
3685     */
3686    public $wrap;
3687
3688    /**
3689     * Whether or not this is a formatting element affected by the
3690     * "Active Formatting Elements" algorithm.
3691     * @type bool
3692     */
3693    public $formatting;
3694
3695    /**
3696     * Low-level factory constructor for creating new standalone element defs
3697     */
3698    public static function create($content_model, $content_model_type, $attr)
3699    {
3700        $def = new HTMLPurifier_ElementDef();
3701        $def->content_model = $content_model;
3702        $def->content_model_type = $content_model_type;
3703        $def->attr = $attr;
3704        return $def;
3705    }
3706
3707    /**
3708     * Merges the values of another element definition into this one.
3709     * Values from the new element def take precedence if a value is
3710     * not mergeable.
3711     * @param HTMLPurifier_ElementDef $def
3712     */
3713    public function mergeIn($def)
3714    {
3715        // later keys takes precedence
3716        foreach ($def->attr as $k => $v) {
3717            if ($k === 0) {
3718                // merge in the includes
3719                // sorry, no way to override an include
3720                foreach ($v as $v2) {
3721                    $this->attr[0][] = $v2;
3722                }
3723                continue;
3724            }
3725            if ($v === false) {
3726                if (isset($this->attr[$k])) {
3727                    unset($this->attr[$k]);
3728                }
3729                continue;
3730            }
3731            $this->attr[$k] = $v;
3732        }
3733        $this->_mergeAssocArray($this->excludes, $def->excludes);
3734        $this->attr_transform_pre = array_merge($this->attr_transform_pre, $def->attr_transform_pre);
3735        $this->attr_transform_post = array_merge($this->attr_transform_post, $def->attr_transform_post);
3736
3737        if (!empty($def->content_model)) {
3738            $this->content_model =
3739                str_replace("#SUPER", $this->content_model, $def->content_model);
3740            $this->child = false;
3741        }
3742        if (!empty($def->content_model_type)) {
3743            $this->content_model_type = $def->content_model_type;
3744            $this->child = false;
3745        }
3746        if (!is_null($def->child)) {
3747            $this->child = $def->child;
3748        }
3749        if (!is_null($def->formatting)) {
3750            $this->formatting = $def->formatting;
3751        }
3752        if ($def->descendants_are_inline) {
3753            $this->descendants_are_inline = $def->descendants_are_inline;
3754        }
3755    }
3756
3757    /**
3758     * Merges one array into another, removes values which equal false
3759     * @param $a1 Array by reference that is merged into
3760     * @param $a2 Array that merges into $a1
3761     */
3762    private function _mergeAssocArray(&$a1, $a2)
3763    {
3764        foreach ($a2 as $k => $v) {
3765            if ($v === false) {
3766                if (isset($a1[$k])) {
3767                    unset($a1[$k]);
3768                }
3769                continue;
3770            }
3771            $a1[$k] = $v;
3772        }
3773    }
3774}
3775
3776
3777
3778
3779
3780/**
3781 * A UTF-8 specific character encoder that handles cleaning and transforming.
3782 * @note All functions in this class should be static.
3783 */
3784class HTMLPurifier_Encoder
3785{
3786
3787    /**
3788     * Constructor throws fatal error if you attempt to instantiate class
3789     */
3790    private function __construct()
3791    {
3792        trigger_error('Cannot instantiate encoder, call methods statically', E_USER_ERROR);
3793    }
3794
3795    /**
3796     * Error-handler that mutes errors, alternative to shut-up operator.
3797     */
3798    public static function muteErrorHandler()
3799    {
3800    }
3801
3802    /**
3803     * iconv wrapper which mutes errors, but doesn't work around bugs.
3804     * @param string $in Input encoding
3805     * @param string $out Output encoding
3806     * @param string $text The text to convert
3807     * @return string
3808     */
3809    public static function unsafeIconv($in, $out, $text)
3810    {
3811        set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
3812        $r = iconv($in, $out, $text);
3813        restore_error_handler();
3814        return $r;
3815    }
3816
3817    /**
3818     * iconv wrapper which mutes errors and works around bugs.
3819     * @param string $in Input encoding
3820     * @param string $out Output encoding
3821     * @param string $text The text to convert
3822     * @param int $max_chunk_size
3823     * @return string
3824     */
3825    public static function iconv($in, $out, $text, $max_chunk_size = 8000)
3826    {
3827        $code = self::testIconvTruncateBug();
3828        if ($code == self::ICONV_OK) {
3829            return self::unsafeIconv($in, $out, $text);
3830        } elseif ($code == self::ICONV_TRUNCATES) {
3831            // we can only work around this if the input character set
3832            // is utf-8
3833            if ($in == 'utf-8') {
3834                if ($max_chunk_size < 4) {
3835                    trigger_error('max_chunk_size is too small', E_USER_WARNING);
3836                    return false;
3837                }
3838                // split into 8000 byte chunks, but be careful to handle
3839                // multibyte boundaries properly
3840                if (($c = strlen($text)) <= $max_chunk_size) {
3841                    return self::unsafeIconv($in, $out, $text);
3842                }
3843                $r = '';
3844                $i = 0;
3845                while (true) {
3846                    if ($i + $max_chunk_size >= $c) {
3847                        $r .= self::unsafeIconv($in, $out, substr($text, $i));
3848                        break;
3849                    }
3850                    // wibble the boundary
3851                    if (0x80 != (0xC0 & ord($text[$i + $max_chunk_size]))) {
3852                        $chunk_size = $max_chunk_size;
3853                    } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 1]))) {
3854                        $chunk_size = $max_chunk_size - 1;
3855                    } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 2]))) {
3856                        $chunk_size = $max_chunk_size - 2;
3857                    } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 3]))) {
3858                        $chunk_size = $max_chunk_size - 3;
3859                    } else {
3860                        return false; // rather confusing UTF-8...
3861                    }
3862                    $chunk = substr($text, $i, $chunk_size); // substr doesn't mind overlong lengths
3863                    $r .= self::unsafeIconv($in, $out, $chunk);
3864                    $i += $chunk_size;
3865                }
3866                return $r;
3867            } else {
3868                return false;
3869            }
3870        } else {
3871            return false;
3872        }
3873    }
3874
3875    /**
3876     * Cleans a UTF-8 string for well-formedness and SGML validity
3877     *
3878     * It will parse according to UTF-8 and return a valid UTF8 string, with
3879     * non-SGML codepoints excluded.
3880     *
3881     * Specifically, it will permit:
3882     * \x{9}\x{A}\x{D}\x{20}-\x{7E}\x{A0}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}
3883     * Source: https://www.w3.org/TR/REC-xml/#NT-Char
3884     * Arguably this function should be modernized to the HTML5 set
3885     * of allowed characters:
3886     * https://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
3887     * which simultaneously expand and restrict the set of allowed characters.
3888     *
3889     * @param string $str The string to clean
3890     * @param bool $force_php
3891     * @return string
3892     *
3893     * @note Just for reference, the non-SGML code points are 0 to 31 and
3894     *       127 to 159, inclusive.  However, we allow code points 9, 10
3895     *       and 13, which are the tab, line feed and carriage return
3896     *       respectively. 128 and above the code points map to multibyte
3897     *       UTF-8 representations.
3898     *
3899     * @note Fallback code adapted from utf8ToUnicode by Henri Sivonen and
3900     *       hsivonen@iki.fi at <http://iki.fi/hsivonen/php-utf8/> under the
3901     *       LGPL license.  Notes on what changed are inside, but in general,
3902     *       the original code transformed UTF-8 text into an array of integer
3903     *       Unicode codepoints. Understandably, transforming that back to
3904     *       a string would be somewhat expensive, so the function was modded to
3905     *       directly operate on the string.  However, this discourages code
3906     *       reuse, and the logic enumerated here would be useful for any
3907     *       function that needs to be able to understand UTF-8 characters.
3908     *       As of right now, only smart lossless character encoding converters
3909     *       would need that, and I'm probably not going to implement them.
3910     */
3911    public static function cleanUTF8($str, $force_php = false)
3912    {
3913        // UTF-8 validity is checked since PHP 4.3.5
3914        // This is an optimization: if the string is already valid UTF-8, no
3915        // need to do PHP stuff. 99% of the time, this will be the case.
3916        if (preg_match(
3917            '/^[\x{9}\x{A}\x{D}\x{20}-\x{7E}\x{A0}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]*$/Du',
3918            $str
3919        )) {
3920            return $str;
3921        }
3922
3923        $mState = 0; // cached expected number of octets after the current octet
3924        // until the beginning of the next UTF8 character sequence
3925        $mUcs4  = 0; // cached Unicode character
3926        $mBytes = 1; // cached expected number of octets in the current sequence
3927
3928        // original code involved an $out that was an array of Unicode
3929        // codepoints.  Instead of having to convert back into UTF-8, we've
3930        // decided to directly append valid UTF-8 characters onto a string
3931        // $out once they're done.  $char accumulates raw bytes, while $mUcs4
3932        // turns into the Unicode code point, so there's some redundancy.
3933
3934        $out = '';
3935        $char = '';
3936
3937        $len = strlen($str);
3938        for ($i = 0; $i < $len; $i++) {
3939            $in = ord($str[$i]);
3940            $char .= $str[$i]; // append byte to char
3941            if (0 == $mState) {
3942                // When mState is zero we expect either a US-ASCII character
3943                // or a multi-octet sequence.
3944                if (0 == (0x80 & ($in))) {
3945                    // US-ASCII, pass straight through.
3946                    if (($in <= 31 || $in == 127) &&
3947                        !($in == 9 || $in == 13 || $in == 10) // save \r\t\n
3948                    ) {
3949                        // control characters, remove
3950                    } else {
3951                        $out .= $char;
3952                    }
3953                    // reset
3954                    $char = '';
3955                    $mBytes = 1;
3956                } elseif (0xC0 == (0xE0 & ($in))) {
3957                    // First octet of 2 octet sequence
3958                    $mUcs4 = ($in);
3959                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
3960                    $mState = 1;
3961                    $mBytes = 2;
3962                } elseif (0xE0 == (0xF0 & ($in))) {
3963                    // First octet of 3 octet sequence
3964                    $mUcs4 = ($in);
3965                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
3966                    $mState = 2;
3967                    $mBytes = 3;
3968                } elseif (0xF0 == (0xF8 & ($in))) {
3969                    // First octet of 4 octet sequence
3970                    $mUcs4 = ($in);
3971                    $mUcs4 = ($mUcs4 & 0x07) << 18;
3972                    $mState = 3;
3973                    $mBytes = 4;
3974                } elseif (0xF8 == (0xFC & ($in))) {
3975                    // First octet of 5 octet sequence.
3976                    //
3977                    // This is illegal because the encoded codepoint must be
3978                    // either:
3979                    // (a) not the shortest form or
3980                    // (b) outside the Unicode range of 0-0x10FFFF.
3981                    // Rather than trying to resynchronize, we will carry on
3982                    // until the end of the sequence and let the later error
3983                    // handling code catch it.
3984                    $mUcs4 = ($in);
3985                    $mUcs4 = ($mUcs4 & 0x03) << 24;
3986                    $mState = 4;
3987                    $mBytes = 5;
3988                } elseif (0xFC == (0xFE & ($in))) {
3989                    // First octet of 6 octet sequence, see comments for 5
3990                    // octet sequence.
3991                    $mUcs4 = ($in);
3992                    $mUcs4 = ($mUcs4 & 1) << 30;
3993                    $mState = 5;
3994                    $mBytes = 6;
3995                } else {
3996                    // Current octet is neither in the US-ASCII range nor a
3997                    // legal first octet of a multi-octet sequence.
3998                    $mState = 0;
3999                    $mUcs4  = 0;
4000                    $mBytes = 1;
4001                    $char = '';
4002                }
4003            } else {
4004                // When mState is non-zero, we expect a continuation of the
4005                // multi-octet sequence
4006                if (0x80 == (0xC0 & ($in))) {
4007                    // Legal continuation.
4008                    $shift = ($mState - 1) * 6;
4009                    $tmp = $in;
4010                    $tmp = ($tmp & 0x0000003F) << $shift;
4011                    $mUcs4 |= $tmp;
4012
4013                    if (0 == --$mState) {
4014                        // End of the multi-octet sequence. mUcs4 now contains
4015                        // the final Unicode codepoint to be output
4016
4017                        // Check for illegal sequences and codepoints.
4018
4019                        // From Unicode 3.1, non-shortest form is illegal
4020                        if (((2 == $mBytes) && ($mUcs4 < 0x0080)) ||
4021                            ((3 == $mBytes) && ($mUcs4 < 0x0800)) ||
4022                            ((4 == $mBytes) && ($mUcs4 < 0x10000)) ||
4023                            (4 < $mBytes) ||
4024                            // From Unicode 3.2, surrogate characters = illegal
4025                            (($mUcs4 & 0xFFFFF800) == 0xD800) ||
4026                            // Codepoints outside the Unicode range are illegal
4027                            ($mUcs4 > 0x10FFFF)
4028                        ) {
4029
4030                        } elseif (0xFEFF != $mUcs4 && // omit BOM
4031                            // check for valid Char unicode codepoints
4032                            (
4033                                0x9 == $mUcs4 ||
4034                                0xA == $mUcs4 ||
4035                                0xD == $mUcs4 ||
4036                                (0x20 <= $mUcs4 && 0x7E >= $mUcs4) ||
4037                                // 7F-9F is not strictly prohibited by XML,
4038                                // but it is non-SGML, and thus we don't allow it
4039                                (0xA0 <= $mUcs4 && 0xD7FF >= $mUcs4) ||
4040                                (0xE000 <= $mUcs4 && 0xFFFD >= $mUcs4) ||
4041                                (0x10000 <= $mUcs4 && 0x10FFFF >= $mUcs4)
4042                            )
4043                        ) {
4044                            $out .= $char;
4045                        }
4046                        // initialize UTF8 cache (reset)
4047                        $mState = 0;
4048                        $mUcs4  = 0;
4049                        $mBytes = 1;
4050                        $char = '';
4051                    }
4052                } else {
4053                    // ((0xC0 & (*in) != 0x80) && (mState != 0))
4054                    // Incomplete multi-octet sequence.
4055                    // used to result in complete fail, but we'll reset
4056                    $mState = 0;
4057                    $mUcs4  = 0;
4058                    $mBytes = 1;
4059                    $char ='';
4060                }
4061            }
4062        }
4063        return $out;
4064    }
4065
4066    /**
4067     * Translates a Unicode codepoint into its corresponding UTF-8 character.
4068     * @note Based on Feyd's function at
4069     *       <http://forums.devnetwork.net/viewtopic.php?p=191404#191404>,
4070     *       which is in public domain.
4071     * @note While we're going to do code point parsing anyway, a good
4072     *       optimization would be to refuse to translate code points that
4073     *       are non-SGML characters.  However, this could lead to duplication.
4074     * @note This is very similar to the unichr function in
4075     *       maintenance/generate-entity-file.php (although this is superior,
4076     *       due to its sanity checks).
4077     */
4078
4079    // +----------+----------+----------+----------+
4080    // | 33222222 | 22221111 | 111111   |          |
4081    // | 10987654 | 32109876 | 54321098 | 76543210 | bit
4082    // +----------+----------+----------+----------+
4083    // |          |          |          | 0xxxxxxx | 1 byte 0x00000000..0x0000007F
4084    // |          |          | 110yyyyy | 10xxxxxx | 2 byte 0x00000080..0x000007FF
4085    // |          | 1110zzzz | 10yyyyyy | 10xxxxxx | 3 byte 0x00000800..0x0000FFFF
4086    // | 11110www | 10wwzzzz | 10yyyyyy | 10xxxxxx | 4 byte 0x00010000..0x0010FFFF
4087    // +----------+----------+----------+----------+
4088    // | 00000000 | 00011111 | 11111111 | 11111111 | Theoretical upper limit of legal scalars: 2097151 (0x001FFFFF)
4089    // | 00000000 | 00010000 | 11111111 | 11111111 | Defined upper limit of legal scalar codes
4090    // +----------+----------+----------+----------+
4091
4092    public static function unichr($code)
4093    {
4094        if ($code > 1114111 or $code < 0 or
4095            ($code >= 55296 and $code <= 57343) ) {
4096            // bits are set outside the "valid" range as defined
4097            // by UNICODE 4.1.0
4098            return '';
4099        }
4100
4101        $x = $y = $z = $w = 0;
4102        if ($code < 128) {
4103            // regular ASCII character
4104            $x = $code;
4105        } else {
4106            // set up bits for UTF-8
4107            $x = ($code & 63) | 128;
4108            if ($code < 2048) {
4109                $y = (($code & 2047) >> 6) | 192;
4110            } else {
4111                $y = (($code & 4032) >> 6) | 128;
4112                if ($code < 65536) {
4113                    $z = (($code >> 12) & 15) | 224;
4114                } else {
4115                    $z = (($code >> 12) & 63) | 128;
4116                    $w = (($code >> 18) & 7)  | 240;
4117                }
4118            }
4119        }
4120        // set up the actual character
4121        $ret = '';
4122        if ($w) {
4123            $ret .= chr($w);
4124        }
4125        if ($z) {
4126            $ret .= chr($z);
4127        }
4128        if ($y) {
4129            $ret .= chr($y);
4130        }
4131        $ret .= chr($x);
4132
4133        return $ret;
4134    }
4135
4136    /**
4137     * @return bool
4138     */
4139    public static function iconvAvailable()
4140    {
4141        static $iconv = null;
4142        if ($iconv === null) {
4143            $iconv = function_exists('iconv') && self::testIconvTruncateBug() != self::ICONV_UNUSABLE;
4144        }
4145        return $iconv;
4146    }
4147
4148    /**
4149     * Convert a string to UTF-8 based on configuration.
4150     * @param string $str The string to convert
4151     * @param HTMLPurifier_Config $config
4152     * @param HTMLPurifier_Context $context
4153     * @return string
4154     */
4155    public static function convertToUTF8($str, $config, $context)
4156    {
4157        $encoding = $config->get('Core.Encoding');
4158        if ($encoding === 'utf-8') {
4159            return $str;
4160        }
4161        static $iconv = null;
4162        if ($iconv === null) {
4163            $iconv = self::iconvAvailable();
4164        }
4165        if ($iconv && !$config->get('Test.ForceNoIconv')) {
4166            // unaffected by bugs, since UTF-8 support all characters
4167            $str = self::unsafeIconv($encoding, 'utf-8//IGNORE', $str);
4168            if ($str === false) {
4169                // $encoding is not a valid encoding
4170                trigger_error('Invalid encoding ' . $encoding, E_USER_ERROR);
4171                return '';
4172            }
4173            // If the string is bjorked by Shift_JIS or a similar encoding
4174            // that doesn't support all of ASCII, convert the naughty
4175            // characters to their true byte-wise ASCII/UTF-8 equivalents.
4176            $str = strtr($str, self::testEncodingSupportsASCII($encoding));
4177            return $str;
4178        } elseif ($encoding === 'iso-8859-1') {
4179            $str = utf8_encode($str);
4180            return $str;
4181        }
4182        $bug = HTMLPurifier_Encoder::testIconvTruncateBug();
4183        if ($bug == self::ICONV_OK) {
4184            trigger_error('Encoding not supported, please install iconv', E_USER_ERROR);
4185        } else {
4186            trigger_error(
4187                'You have a buggy version of iconv, see https://bugs.php.net/bug.php?id=48147 ' .
4188                'and http://sourceware.org/bugzilla/show_bug.cgi?id=13541',
4189                E_USER_ERROR
4190            );
4191        }
4192    }
4193
4194    /**
4195     * Converts a string from UTF-8 based on configuration.
4196     * @param string $str The string to convert
4197     * @param HTMLPurifier_Config $config
4198     * @param HTMLPurifier_Context $context
4199     * @return string
4200     * @note Currently, this is a lossy conversion, with unexpressable
4201     *       characters being omitted.
4202     */
4203    public static function convertFromUTF8($str, $config, $context)
4204    {
4205        $encoding = $config->get('Core.Encoding');
4206        if ($escape = $config->get('Core.EscapeNonASCIICharacters')) {
4207            $str = self::convertToASCIIDumbLossless($str);
4208        }
4209        if ($encoding === 'utf-8') {
4210            return $str;
4211        }
4212        static $iconv = null;
4213        if ($iconv === null) {
4214            $iconv = self::iconvAvailable();
4215        }
4216        if ($iconv && !$config->get('Test.ForceNoIconv')) {
4217            // Undo our previous fix in convertToUTF8, otherwise iconv will barf
4218            $ascii_fix = self::testEncodingSupportsASCII($encoding);
4219            if (!$escape && !empty($ascii_fix)) {
4220                $clear_fix = array();
4221                foreach ($ascii_fix as $utf8 => $native) {
4222                    $clear_fix[$utf8] = '';
4223                }
4224                $str = strtr($str, $clear_fix);
4225            }
4226            $str = strtr($str, array_flip($ascii_fix));
4227            // Normal stuff
4228            $str = self::iconv('utf-8', $encoding . '//IGNORE', $str);
4229            return $str;
4230        } elseif ($encoding === 'iso-8859-1') {
4231            $str = utf8_decode($str);
4232            return $str;
4233        }
4234        trigger_error('Encoding not supported', E_USER_ERROR);
4235        // You might be tempted to assume that the ASCII representation
4236        // might be OK, however, this is *not* universally true over all
4237        // encodings.  So we take the conservative route here, rather
4238        // than forcibly turn on %Core.EscapeNonASCIICharacters
4239    }
4240
4241    /**
4242     * Lossless (character-wise) conversion of HTML to ASCII
4243     * @param string $str UTF-8 string to be converted to ASCII
4244     * @return string ASCII encoded string with non-ASCII character entity-ized
4245     * @warning Adapted from MediaWiki, claiming fair use: this is a common
4246     *       algorithm. If you disagree with this license fudgery,
4247     *       implement it yourself.
4248     * @note Uses decimal numeric entities since they are best supported.
4249     * @note This is a DUMB function: it has no concept of keeping
4250     *       character entities that the projected character encoding
4251     *       can allow. We could possibly implement a smart version
4252     *       but that would require it to also know which Unicode
4253     *       codepoints the charset supported (not an easy task).
4254     * @note Sort of with cleanUTF8() but it assumes that $str is
4255     *       well-formed UTF-8
4256     */
4257    public static function convertToASCIIDumbLossless($str)
4258    {
4259        $bytesleft = 0;
4260        $result = '';
4261        $working = 0;
4262        $len = strlen($str);
4263        for ($i = 0; $i < $len; $i++) {
4264            $bytevalue = ord($str[$i]);
4265            if ($bytevalue <= 0x7F) { //0xxx xxxx
4266                $result .= chr($bytevalue);
4267                $bytesleft = 0;
4268            } elseif ($bytevalue <= 0xBF) { //10xx xxxx
4269                $working = $working << 6;
4270                $working += ($bytevalue & 0x3F);
4271                $bytesleft--;
4272                if ($bytesleft <= 0) {
4273                    $result .= "&#" . $working . ";";
4274                }
4275            } elseif ($bytevalue <= 0xDF) { //110x xxxx
4276                $working = $bytevalue & 0x1F;
4277                $bytesleft = 1;
4278            } elseif ($bytevalue <= 0xEF) { //1110 xxxx
4279                $working = $bytevalue & 0x0F;
4280                $bytesleft = 2;
4281            } else { //1111 0xxx
4282                $working = $bytevalue & 0x07;
4283                $bytesleft = 3;
4284            }
4285        }
4286        return $result;
4287    }
4288
4289    /** No bugs detected in iconv. */
4290    const ICONV_OK = 0;
4291
4292    /** Iconv truncates output if converting from UTF-8 to another
4293     *  character set with //IGNORE, and a non-encodable character is found */
4294    const ICONV_TRUNCATES = 1;
4295
4296    /** Iconv does not support //IGNORE, making it unusable for
4297     *  transcoding purposes */
4298    const ICONV_UNUSABLE = 2;
4299
4300    /**
4301     * glibc iconv has a known bug where it doesn't handle the magic
4302     * //IGNORE stanza correctly.  In particular, rather than ignore
4303     * characters, it will return an EILSEQ after consuming some number
4304     * of characters, and expect you to restart iconv as if it were
4305     * an E2BIG.  Old versions of PHP did not respect the errno, and
4306     * returned the fragment, so as a result you would see iconv
4307     * mysteriously truncating output. We can work around this by
4308     * manually chopping our input into segments of about 8000
4309     * characters, as long as PHP ignores the error code.  If PHP starts
4310     * paying attention to the error code, iconv becomes unusable.
4311     *
4312     * @return int Error code indicating severity of bug.
4313     */
4314    public static function testIconvTruncateBug()
4315    {
4316        static $code = null;
4317        if ($code === null) {
4318            // better not use iconv, otherwise infinite loop!
4319            $r = self::unsafeIconv('utf-8', 'ascii//IGNORE', "\xCE\xB1" . str_repeat('a', 9000));
4320            if ($r === false) {
4321                $code = self::ICONV_UNUSABLE;
4322            } elseif (($c = strlen($r)) < 9000) {
4323                $code = self::ICONV_TRUNCATES;
4324            } elseif ($c > 9000) {
4325                trigger_error(
4326                    'Your copy of iconv is extremely buggy. Please notify HTML Purifier maintainers: ' .
4327                    'include your iconv version as per phpversion()',
4328                    E_USER_ERROR
4329                );
4330            } else {
4331                $code = self::ICONV_OK;
4332            }
4333        }
4334        return $code;
4335    }
4336
4337    /**
4338     * This expensive function tests whether or not a given character
4339     * encoding supports ASCII. 7/8-bit encodings like Shift_JIS will
4340     * fail this test, and require special processing. Variable width
4341     * encodings shouldn't ever fail.
4342     *
4343     * @param string $encoding Encoding name to test, as per iconv format
4344     * @param bool $bypass Whether or not to bypass the precompiled arrays.
4345     * @return Array of UTF-8 characters to their corresponding ASCII,
4346     *      which can be used to "undo" any overzealous iconv action.
4347     */
4348    public static function testEncodingSupportsASCII($encoding, $bypass = false)
4349    {
4350        // All calls to iconv here are unsafe, proof by case analysis:
4351        // If ICONV_OK, no difference.
4352        // If ICONV_TRUNCATE, all calls involve one character inputs,
4353        // so bug is not triggered.
4354        // If ICONV_UNUSABLE, this call is irrelevant
4355        static $encodings = array();
4356        if (!$bypass) {
4357            if (isset($encodings[$encoding])) {
4358                return $encodings[$encoding];
4359            }
4360            $lenc = strtolower($encoding);
4361            switch ($lenc) {
4362                case 'shift_jis':
4363                    return array("\xC2\xA5" => '\\', "\xE2\x80\xBE" => '~');
4364                case 'johab':
4365                    return array("\xE2\x82\xA9" => '\\');
4366            }
4367            if (strpos($lenc, 'iso-8859-') === 0) {
4368                return array();
4369            }
4370        }
4371        $ret = array();
4372        if (self::unsafeIconv('UTF-8', $encoding, 'a') === false) {
4373            return false;
4374        }
4375        for ($i = 0x20; $i <= 0x7E; $i++) { // all printable ASCII chars
4376            $c = chr($i); // UTF-8 char
4377            $r = self::unsafeIconv('UTF-8', "$encoding//IGNORE", $c); // initial conversion
4378            if ($r === '' ||
4379                // This line is needed for iconv implementations that do not
4380                // omit characters that do not exist in the target character set
4381                ($r === $c && self::unsafeIconv($encoding, 'UTF-8//IGNORE', $r) !== $c)
4382            ) {
4383                // Reverse engineer: what's the UTF-8 equiv of this byte
4384                // sequence? This assumes that there's no variable width
4385                // encoding that doesn't support ASCII.
4386                $ret[self::unsafeIconv($encoding, 'UTF-8//IGNORE', $c)] = $c;
4387            }
4388        }
4389        $encodings[$encoding] = $ret;
4390        return $ret;
4391    }
4392}
4393
4394
4395
4396
4397
4398/**
4399 * Object that provides entity lookup table from entity name to character
4400 */
4401class HTMLPurifier_EntityLookup
4402{
4403    /**
4404     * Assoc array of entity name to character represented.
4405     * @type array
4406     */
4407    public $table;
4408
4409    /**
4410     * Sets up the entity lookup table from the serialized file contents.
4411     * @param bool $file
4412     * @note The serialized contents are versioned, but were generated
4413     *       using the maintenance script generate_entity_file.php
4414     * @warning This is not in constructor to help enforce the Singleton
4415     */
4416    public function setup($file = false)
4417    {
4418        if (!$file) {
4419            $file = HTMLPURIFIER_PREFIX . '/HTMLPurifier/EntityLookup/entities.ser';
4420        }
4421        $this->table = unserialize(file_get_contents($file));
4422    }
4423
4424    /**
4425     * Retrieves sole instance of the object.
4426     * @param bool|HTMLPurifier_EntityLookup $prototype Optional prototype of custom lookup table to overload with.
4427     * @return HTMLPurifier_EntityLookup
4428     */
4429    public static function instance($prototype = false)
4430    {
4431        // no references, since PHP doesn't copy unless modified
4432        static $instance = null;
4433        if ($prototype) {
4434            $instance = $prototype;
4435        } elseif (!$instance) {
4436            $instance = new HTMLPurifier_EntityLookup();
4437            $instance->setup();
4438        }
4439        return $instance;
4440    }
4441}
4442
4443
4444
4445
4446
4447// if want to implement error collecting here, we'll need to use some sort
4448// of global data (probably trigger_error) because it's impossible to pass
4449// $config or $context to the callback functions.
4450
4451/**
4452 * Handles referencing and derefencing character entities
4453 */
4454class HTMLPurifier_EntityParser
4455{
4456
4457    /**
4458     * Reference to entity lookup table.
4459     * @type HTMLPurifier_EntityLookup
4460     */
4461    protected $_entity_lookup;
4462
4463    /**
4464     * Callback regex string for entities in text.
4465     * @type string
4466     */
4467    protected $_textEntitiesRegex;
4468
4469    /**
4470     * Callback regex string for entities in attributes.
4471     * @type string
4472     */
4473    protected $_attrEntitiesRegex;
4474
4475    /**
4476     * Tests if the beginning of a string is a semi-optional regex
4477     */
4478    protected $_semiOptionalPrefixRegex;
4479
4480    public function __construct() {
4481        // From
4482        // http://stackoverflow.com/questions/15532252/why-is-reg-being-rendered-as-without-the-bounding-semicolon
4483        $semi_optional = "quot|QUOT|lt|LT|gt|GT|amp|AMP|AElig|Aacute|Acirc|Agrave|Aring|Atilde|Auml|COPY|Ccedil|ETH|Eacute|Ecirc|Egrave|Euml|Iacute|Icirc|Igrave|Iuml|Ntilde|Oacute|Ocirc|Ograve|Oslash|Otilde|Ouml|REG|THORN|Uacute|Ucirc|Ugrave|Uuml|Yacute|aacute|acirc|acute|aelig|agrave|aring|atilde|auml|brvbar|ccedil|cedil|cent|copy|curren|deg|divide|eacute|ecirc|egrave|eth|euml|frac12|frac14|frac34|iacute|icirc|iexcl|igrave|iquest|iuml|laquo|macr|micro|middot|nbsp|not|ntilde|oacute|ocirc|ograve|ordf|ordm|oslash|otilde|ouml|para|plusmn|pound|raquo|reg|sect|shy|sup1|sup2|sup3|szlig|thorn|times|uacute|ucirc|ugrave|uml|uuml|yacute|yen|yuml";
4484
4485        // NB: three empty captures to put the fourth match in the right
4486        // place
4487        $this->_semiOptionalPrefixRegex = "/&()()()($semi_optional)/";
4488
4489        $this->_textEntitiesRegex =
4490            '/&(?:'.
4491            // hex
4492            '[#]x([a-fA-F0-9]+);?|'.
4493            // dec
4494            '[#]0*(\d+);?|'.
4495            // string (mandatory semicolon)
4496            // NB: order matters: match semicolon preferentially
4497            '([A-Za-z_:][A-Za-z0-9.\-_:]*);|'.
4498            // string (optional semicolon)
4499            "($semi_optional)".
4500            ')/';
4501
4502        $this->_attrEntitiesRegex =
4503            '/&(?:'.
4504            // hex
4505            '[#]x([a-fA-F0-9]+);?|'.
4506            // dec
4507            '[#]0*(\d+);?|'.
4508            // string (mandatory semicolon)
4509            // NB: order matters: match semicolon preferentially
4510            '([A-Za-z_:][A-Za-z0-9.\-_:]*);|'.
4511            // string (optional semicolon)
4512            // don't match if trailing is equals or alphanumeric (URL
4513            // like)
4514            "($semi_optional)(?![=;A-Za-z0-9])".
4515            ')/';
4516
4517    }
4518
4519    /**
4520     * Substitute entities with the parsed equivalents.  Use this on
4521     * textual data in an HTML document (as opposed to attributes.)
4522     *
4523     * @param string $string String to have entities parsed.
4524     * @return string Parsed string.
4525     */
4526    public function substituteTextEntities($string)
4527    {
4528        return preg_replace_callback(
4529            $this->_textEntitiesRegex,
4530            array($this, 'entityCallback'),
4531            $string
4532        );
4533    }
4534
4535    /**
4536     * Substitute entities with the parsed equivalents.  Use this on
4537     * attribute contents in documents.
4538     *
4539     * @param string $string String to have entities parsed.
4540     * @return string Parsed string.
4541     */
4542    public function substituteAttrEntities($string)
4543    {
4544        return preg_replace_callback(
4545            $this->_attrEntitiesRegex,
4546            array($this, 'entityCallback'),
4547            $string
4548        );
4549    }
4550
4551    /**
4552     * Callback function for substituteNonSpecialEntities() that does the work.
4553     *
4554     * @param array $matches  PCRE matches array, with 0 the entire match, and
4555     *                  either index 1, 2 or 3 set with a hex value, dec value,
4556     *                  or string (respectively).
4557     * @return string Replacement string.
4558     */
4559
4560    protected function entityCallback($matches)
4561    {
4562        $entity = $matches[0];
4563        $hex_part = @$matches[1];
4564        $dec_part = @$matches[2];
4565        $named_part = empty($matches[3]) ? @$matches[4] : $matches[3];
4566        if ($hex_part !== NULL && $hex_part !== "") {
4567            return HTMLPurifier_Encoder::unichr(hexdec($hex_part));
4568        } elseif ($dec_part !== NULL && $dec_part !== "") {
4569            return HTMLPurifier_Encoder::unichr((int) $dec_part);
4570        } else {
4571            if (!$this->_entity_lookup) {
4572                $this->_entity_lookup = HTMLPurifier_EntityLookup::instance();
4573            }
4574            if (isset($this->_entity_lookup->table[$named_part])) {
4575                return $this->_entity_lookup->table[$named_part];
4576            } else {
4577                // exact match didn't match anything, so test if
4578                // any of the semicolon optional match the prefix.
4579                // Test that this is an EXACT match is important to
4580                // prevent infinite loop
4581                if (!empty($matches[3])) {
4582                    return preg_replace_callback(
4583                        $this->_semiOptionalPrefixRegex,
4584                        array($this, 'entityCallback'),
4585                        $entity
4586                    );
4587                }
4588                return $entity;
4589            }
4590        }
4591    }
4592
4593    // LEGACY CODE BELOW
4594
4595    /**
4596     * Callback regex string for parsing entities.
4597     * @type string
4598     */
4599    protected $_substituteEntitiesRegex =
4600        '/&(?:[#]x([a-fA-F0-9]+)|[#]0*(\d+)|([A-Za-z_:][A-Za-z0-9.\-_:]*));?/';
4601    //     1. hex             2. dec      3. string (XML style)
4602
4603    /**
4604     * Decimal to parsed string conversion table for special entities.
4605     * @type array
4606     */
4607    protected $_special_dec2str =
4608        array(
4609            34 => '"',
4610            38 => '&',
4611            39 => "'",
4612            60 => '<',
4613            62 => '>'
4614        );
4615
4616    /**
4617     * Stripped entity names to decimal conversion table for special entities.
4618     * @type array
4619     */
4620    protected $_special_ent2dec =
4621        array(
4622            'quot' => 34,
4623            'amp'  => 38,
4624            'lt'   => 60,
4625            'gt'   => 62
4626        );
4627
4628    /**
4629     * Substitutes non-special entities with their parsed equivalents. Since
4630     * running this whenever you have parsed character is t3h 5uck, we run
4631     * it before everything else.
4632     *
4633     * @param string $string String to have non-special entities parsed.
4634     * @return string Parsed string.
4635     */
4636    public function substituteNonSpecialEntities($string)
4637    {
4638        // it will try to detect missing semicolons, but don't rely on it
4639        return preg_replace_callback(
4640            $this->_substituteEntitiesRegex,
4641            array($this, 'nonSpecialEntityCallback'),
4642            $string
4643        );
4644    }
4645
4646    /**
4647     * Callback function for substituteNonSpecialEntities() that does the work.
4648     *
4649     * @param array $matches  PCRE matches array, with 0 the entire match, and
4650     *                  either index 1, 2 or 3 set with a hex value, dec value,
4651     *                  or string (respectively).
4652     * @return string Replacement string.
4653     */
4654
4655    protected function nonSpecialEntityCallback($matches)
4656    {
4657        // replaces all but big five
4658        $entity = $matches[0];
4659        $is_num = (@$matches[0][1] === '#');
4660        if ($is_num) {
4661            $is_hex = (@$entity[2] === 'x');
4662            $code = $is_hex ? hexdec($matches[1]) : (int) $matches[2];
4663            // abort for special characters
4664            if (isset($this->_special_dec2str[$code])) {
4665                return $entity;
4666            }
4667            return HTMLPurifier_Encoder::unichr($code);
4668        } else {
4669            if (isset($this->_special_ent2dec[$matches[3]])) {
4670                return $entity;
4671            }
4672            if (!$this->_entity_lookup) {
4673                $this->_entity_lookup = HTMLPurifier_EntityLookup::instance();
4674            }
4675            if (isset($this->_entity_lookup->table[$matches[3]])) {
4676                return $this->_entity_lookup->table[$matches[3]];
4677            } else {
4678                return $entity;
4679            }
4680        }
4681    }
4682
4683    /**
4684     * Substitutes only special entities with their parsed equivalents.
4685     *
4686     * @notice We try to avoid calling this function because otherwise, it
4687     * would have to be called a lot (for every parsed section).
4688     *
4689     * @param string $string String to have non-special entities parsed.
4690     * @return string Parsed string.
4691     */
4692    public function substituteSpecialEntities($string)
4693    {
4694        return preg_replace_callback(
4695            $this->_substituteEntitiesRegex,
4696            array($this, 'specialEntityCallback'),
4697            $string
4698        );
4699    }
4700
4701    /**
4702     * Callback function for substituteSpecialEntities() that does the work.
4703     *
4704     * This callback has same syntax as nonSpecialEntityCallback().
4705     *
4706     * @param array $matches  PCRE-style matches array, with 0 the entire match, and
4707     *                  either index 1, 2 or 3 set with a hex value, dec value,
4708     *                  or string (respectively).
4709     * @return string Replacement string.
4710     */
4711    protected function specialEntityCallback($matches)
4712    {
4713        $entity = $matches[0];
4714        $is_num = (@$matches[0][1] === '#');
4715        if ($is_num) {
4716            $is_hex = (@$entity[2] === 'x');
4717            $int = $is_hex ? hexdec($matches[1]) : (int) $matches[2];
4718            return isset($this->_special_dec2str[$int]) ?
4719                $this->_special_dec2str[$int] :
4720                $entity;
4721        } else {
4722            return isset($this->_special_ent2dec[$matches[3]]) ?
4723                $this->_special_dec2str[$this->_special_ent2dec[$matches[3]]] :
4724                $entity;
4725        }
4726    }
4727}
4728
4729
4730
4731
4732
4733/**
4734 * Error collection class that enables HTML Purifier to report HTML
4735 * problems back to the user
4736 */
4737class HTMLPurifier_ErrorCollector
4738{
4739
4740    /**
4741     * Identifiers for the returned error array. These are purposely numeric
4742     * so list() can be used.
4743     */
4744    const LINENO   = 0;
4745    const SEVERITY = 1;
4746    const MESSAGE  = 2;
4747    const CHILDREN = 3;
4748
4749    /**
4750     * @type array
4751     */
4752    protected $errors;
4753
4754    /**
4755     * @type array
4756     */
4757    protected $_current;
4758
4759    /**
4760     * @type array
4761     */
4762    protected $_stacks = array(array());
4763
4764    /**
4765     * @type HTMLPurifier_Language
4766     */
4767    protected $locale;
4768
4769    /**
4770     * @type HTMLPurifier_Generator
4771     */
4772    protected $generator;
4773
4774    /**
4775     * @type HTMLPurifier_Context
4776     */
4777    protected $context;
4778
4779    /**
4780     * @type array
4781     */
4782    protected $lines = array();
4783
4784    /**
4785     * @param HTMLPurifier_Context $context
4786     */
4787    public function __construct($context)
4788    {
4789        $this->locale    =& $context->get('Locale');
4790        $this->context   = $context;
4791        $this->_current  =& $this->_stacks[0];
4792        $this->errors    =& $this->_stacks[0];
4793    }
4794
4795    /**
4796     * Sends an error message to the collector for later use
4797     * @param int $severity Error severity, PHP error style (don't use E_USER_)
4798     * @param string $msg Error message text
4799     */
4800    public function send($severity, $msg)
4801    {
4802        $args = array();
4803        if (func_num_args() > 2) {
4804            $args = func_get_args();
4805            array_shift($args);
4806            unset($args[0]);
4807        }
4808
4809        $token = $this->context->get('CurrentToken', true);
4810        $line  = $token ? $token->line : $this->context->get('CurrentLine', true);
4811        $col   = $token ? $token->col  : $this->context->get('CurrentCol', true);
4812        $attr  = $this->context->get('CurrentAttr', true);
4813
4814        // perform special substitutions, also add custom parameters
4815        $subst = array();
4816        if (!is_null($token)) {
4817            $args['CurrentToken'] = $token;
4818        }
4819        if (!is_null($attr)) {
4820            $subst['$CurrentAttr.Name'] = $attr;
4821            if (isset($token->attr[$attr])) {
4822                $subst['$CurrentAttr.Value'] = $token->attr[$attr];
4823            }
4824        }
4825
4826        if (empty($args)) {
4827            $msg = $this->locale->getMessage($msg);
4828        } else {
4829            $msg = $this->locale->formatMessage($msg, $args);
4830        }
4831
4832        if (!empty($subst)) {
4833            $msg = strtr($msg, $subst);
4834        }
4835
4836        // (numerically indexed)
4837        $error = array(
4838            self::LINENO   => $line,
4839            self::SEVERITY => $severity,
4840            self::MESSAGE  => $msg,
4841            self::CHILDREN => array()
4842        );
4843        $this->_current[] = $error;
4844
4845        // NEW CODE BELOW ...
4846        // Top-level errors are either:
4847        //  TOKEN type, if $value is set appropriately, or
4848        //  "syntax" type, if $value is null
4849        $new_struct = new HTMLPurifier_ErrorStruct();
4850        $new_struct->type = HTMLPurifier_ErrorStruct::TOKEN;
4851        if ($token) {
4852            $new_struct->value = clone $token;
4853        }
4854        if (is_int($line) && is_int($col)) {
4855            if (isset($this->lines[$line][$col])) {
4856                $struct = $this->lines[$line][$col];
4857            } else {
4858                $struct = $this->lines[$line][$col] = $new_struct;
4859            }
4860            // These ksorts may present a performance problem
4861            ksort($this->lines[$line], SORT_NUMERIC);
4862        } else {
4863            if (isset($this->lines[-1])) {
4864                $struct = $this->lines[-1];
4865            } else {
4866                $struct = $this->lines[-1] = $new_struct;
4867            }
4868        }
4869        ksort($this->lines, SORT_NUMERIC);
4870
4871        // Now, check if we need to operate on a lower structure
4872        if (!empty($attr)) {
4873            $struct = $struct->getChild(HTMLPurifier_ErrorStruct::ATTR, $attr);
4874            if (!$struct->value) {
4875                $struct->value = array($attr, 'PUT VALUE HERE');
4876            }
4877        }
4878        if (!empty($cssprop)) {
4879            $struct = $struct->getChild(HTMLPurifier_ErrorStruct::CSSPROP, $cssprop);
4880            if (!$struct->value) {
4881                // if we tokenize CSS this might be a little more difficult to do
4882                $struct->value = array($cssprop, 'PUT VALUE HERE');
4883            }
4884        }
4885
4886        // Ok, structs are all setup, now time to register the error
4887        $struct->addError($severity, $msg);
4888    }
4889
4890    /**
4891     * Retrieves raw error data for custom formatter to use
4892     */
4893    public function getRaw()
4894    {
4895        return $this->errors;
4896    }
4897
4898    /**
4899     * Default HTML formatting implementation for error messages
4900     * @param HTMLPurifier_Config $config Configuration, vital for HTML output nature
4901     * @param array $errors Errors array to display; used for recursion.
4902     * @return string
4903     */
4904    public function getHTMLFormatted($config, $errors = null)
4905    {
4906        $ret = array();
4907
4908        $this->generator = new HTMLPurifier_Generator($config, $this->context);
4909        if ($errors === null) {
4910            $errors = $this->errors;
4911        }
4912
4913        // 'At line' message needs to be removed
4914
4915        // generation code for new structure goes here. It needs to be recursive.
4916        foreach ($this->lines as $line => $col_array) {
4917            if ($line == -1) {
4918                continue;
4919            }
4920            foreach ($col_array as $col => $struct) {
4921                $this->_renderStruct($ret, $struct, $line, $col);
4922            }
4923        }
4924        if (isset($this->lines[-1])) {
4925            $this->_renderStruct($ret, $this->lines[-1]);
4926        }
4927
4928        if (empty($errors)) {
4929            return '<p>' . $this->locale->getMessage('ErrorCollector: No errors') . '</p>';
4930        } else {
4931            return '<ul><li>' . implode('</li><li>', $ret) . '</li></ul>';
4932        }
4933
4934    }
4935
4936    private function _renderStruct(&$ret, $struct, $line = null, $col = null)
4937    {
4938        $stack = array($struct);
4939        $context_stack = array(array());
4940        while ($current = array_pop($stack)) {
4941            $context = array_pop($context_stack);
4942            foreach ($current->errors as $error) {
4943                list($severity, $msg) = $error;
4944                $string = '';
4945                $string .= '<div>';
4946                // W3C uses an icon to indicate the severity of the error.
4947                $error = $this->locale->getErrorName($severity);
4948                $string .= "<span class=\"error e$severity\"><strong>$error</strong></span> ";
4949                if (!is_null($line) && !is_null($col)) {
4950                    $string .= "<em class=\"location\">Line $line, Column $col: </em> ";
4951                } else {
4952                    $string .= '<em class="location">End of Document: </em> ';
4953                }
4954                $string .= '<strong class="description">' . $this->generator->escape($msg) . '</strong> ';
4955                $string .= '</div>';
4956                // Here, have a marker for the character on the column appropriate.
4957                // Be sure to clip extremely long lines.
4958                //$string .= '<pre>';
4959                //$string .= '';
4960                //$string .= '</pre>';
4961                $ret[] = $string;
4962            }
4963            foreach ($current->children as $array) {
4964                $context[] = $current;
4965                $stack = array_merge($stack, array_reverse($array, true));
4966                for ($i = count($array); $i > 0; $i--) {
4967                    $context_stack[] = $context;
4968                }
4969            }
4970        }
4971    }
4972}
4973
4974
4975
4976
4977
4978/**
4979 * Records errors for particular segments of an HTML document such as tokens,
4980 * attributes or CSS properties. They can contain error structs (which apply
4981 * to components of what they represent), but their main purpose is to hold
4982 * errors applying to whatever struct is being used.
4983 */
4984class HTMLPurifier_ErrorStruct
4985{
4986
4987    /**
4988     * Possible values for $children first-key. Note that top-level structures
4989     * are automatically token-level.
4990     */
4991    const TOKEN     = 0;
4992    const ATTR      = 1;
4993    const CSSPROP   = 2;
4994
4995    /**
4996     * Type of this struct.
4997     * @type string
4998     */
4999    public $type;
5000
5001    /**
5002     * Value of the struct we are recording errors for. There are various
5003     * values for this:
5004     *  - TOKEN: Instance of HTMLPurifier_Token
5005     *  - ATTR: array('attr-name', 'value')
5006     *  - CSSPROP: array('prop-name', 'value')
5007     * @type mixed
5008     */
5009    public $value;
5010
5011    /**
5012     * Errors registered for this structure.
5013     * @type array
5014     */
5015    public $errors = array();
5016
5017    /**
5018     * Child ErrorStructs that are from this structure. For example, a TOKEN
5019     * ErrorStruct would contain ATTR ErrorStructs. This is a multi-dimensional
5020     * array in structure: [TYPE]['identifier']
5021     * @type array
5022     */
5023    public $children = array();
5024
5025    /**
5026     * @param string $type
5027     * @param string $id
5028     * @return mixed
5029     */
5030    public function getChild($type, $id)
5031    {
5032        if (!isset($this->children[$type][$id])) {
5033            $this->children[$type][$id] = new HTMLPurifier_ErrorStruct();
5034            $this->children[$type][$id]->type = $type;
5035        }
5036        return $this->children[$type][$id];
5037    }
5038
5039    /**
5040     * @param int $severity
5041     * @param string $message
5042     */
5043    public function addError($severity, $message)
5044    {
5045        $this->errors[] = array($severity, $message);
5046    }
5047}
5048
5049
5050
5051
5052
5053/**
5054 * Global exception class for HTML Purifier; any exceptions we throw
5055 * are from here.
5056 */
5057class HTMLPurifier_Exception extends Exception
5058{
5059
5060}
5061
5062
5063
5064
5065
5066/**
5067 * Represents a pre or post processing filter on HTML Purifier's output
5068 *
5069 * Sometimes, a little ad-hoc fixing of HTML has to be done before
5070 * it gets sent through HTML Purifier: you can use filters to acheive
5071 * this effect. For instance, YouTube videos can be preserved using
5072 * this manner. You could have used a decorator for this task, but
5073 * PHP's support for them is not terribly robust, so we're going
5074 * to just loop through the filters.
5075 *
5076 * Filters should be exited first in, last out. If there are three filters,
5077 * named 1, 2 and 3, the order of execution should go 1->preFilter,
5078 * 2->preFilter, 3->preFilter, purify, 3->postFilter, 2->postFilter,
5079 * 1->postFilter.
5080 *
5081 * @note Methods are not declared abstract as it is perfectly legitimate
5082 *       for an implementation not to want anything to happen on a step
5083 */
5084
5085class HTMLPurifier_Filter
5086{
5087
5088    /**
5089     * Name of the filter for identification purposes.
5090     * @type string
5091     */
5092    public $name;
5093
5094    /**
5095     * Pre-processor function, handles HTML before HTML Purifier
5096     * @param string $html
5097     * @param HTMLPurifier_Config $config
5098     * @param HTMLPurifier_Context $context
5099     * @return string
5100     */
5101    public function preFilter($html, $config, $context)
5102    {
5103        return $html;
5104    }
5105
5106    /**
5107     * Post-processor function, handles HTML after HTML Purifier
5108     * @param string $html
5109     * @param HTMLPurifier_Config $config
5110     * @param HTMLPurifier_Context $context
5111     * @return string
5112     */
5113    public function postFilter($html, $config, $context)
5114    {
5115        return $html;
5116    }
5117}
5118
5119
5120
5121
5122
5123/**
5124 * Generates HTML from tokens.
5125 * @todo Refactor interface so that configuration/context is determined
5126 *       upon instantiation, no need for messy generateFromTokens() calls
5127 * @todo Make some of the more internal functions protected, and have
5128 *       unit tests work around that
5129 */
5130class HTMLPurifier_Generator
5131{
5132
5133    /**
5134     * Whether or not generator should produce XML output.
5135     * @type bool
5136     */
5137    private $_xhtml = true;
5138
5139    /**
5140     * :HACK: Whether or not generator should comment the insides of <script> tags.
5141     * @type bool
5142     */
5143    private $_scriptFix = false;
5144
5145    /**
5146     * Cache of HTMLDefinition during HTML output to determine whether or
5147     * not attributes should be minimized.
5148     * @type HTMLPurifier_HTMLDefinition
5149     */
5150    private $_def;
5151
5152    /**
5153     * Cache of %Output.SortAttr.
5154     * @type bool
5155     */
5156    private $_sortAttr;
5157
5158    /**
5159     * Cache of %Output.FlashCompat.
5160     * @type bool
5161     */
5162    private $_flashCompat;
5163
5164    /**
5165     * Cache of %Output.FixInnerHTML.
5166     * @type bool
5167     */
5168    private $_innerHTMLFix;
5169
5170    /**
5171     * Stack for keeping track of object information when outputting IE
5172     * compatibility code.
5173     * @type array
5174     */
5175    private $_flashStack = array();
5176
5177    /**
5178     * Configuration for the generator
5179     * @type HTMLPurifier_Config
5180     */
5181    protected $config;
5182
5183    /**
5184     * @param HTMLPurifier_Config $config
5185     * @param HTMLPurifier_Context $context
5186     */
5187    public function __construct($config, $context)
5188    {
5189        $this->config = $config;
5190        $this->_scriptFix = $config->get('Output.CommentScriptContents');
5191        $this->_innerHTMLFix = $config->get('Output.FixInnerHTML');
5192        $this->_sortAttr = $config->get('Output.SortAttr');
5193        $this->_flashCompat = $config->get('Output.FlashCompat');
5194        $this->_def = $config->getHTMLDefinition();
5195        $this->_xhtml = $this->_def->doctype->xml;
5196    }
5197
5198    /**
5199     * Generates HTML from an array of tokens.
5200     * @param HTMLPurifier_Token[] $tokens Array of HTMLPurifier_Token
5201     * @return string Generated HTML
5202     */
5203    public function generateFromTokens($tokens)
5204    {
5205        if (!$tokens) {
5206            return '';
5207        }
5208
5209        // Basic algorithm
5210        $html = '';
5211        for ($i = 0, $size = count($tokens); $i < $size; $i++) {
5212            if ($this->_scriptFix && $tokens[$i]->name === 'script'
5213                && $i + 2 < $size && $tokens[$i+2] instanceof HTMLPurifier_Token_End) {
5214                // script special case
5215                // the contents of the script block must be ONE token
5216                // for this to work.
5217                $html .= $this->generateFromToken($tokens[$i++]);
5218                $html .= $this->generateScriptFromToken($tokens[$i++]);
5219            }
5220            $html .= $this->generateFromToken($tokens[$i]);
5221        }
5222
5223        // Tidy cleanup
5224        if (extension_loaded('tidy') && $this->config->get('Output.TidyFormat')) {
5225            $tidy = new Tidy;
5226            $tidy->parseString(
5227                $html,
5228                array(
5229                    'indent'=> true,
5230                    'output-xhtml' => $this->_xhtml,
5231                    'show-body-only' => true,
5232                    'indent-spaces' => 2,
5233                    'wrap' => 68,
5234                ),
5235                'utf8'
5236            );
5237            $tidy->cleanRepair();
5238            $html = (string) $tidy; // explicit cast necessary
5239        }
5240
5241        // Normalize newlines to system defined value
5242        if ($this->config->get('Core.NormalizeNewlines')) {
5243            $nl = $this->config->get('Output.Newline');
5244            if ($nl === null) {
5245                $nl = PHP_EOL;
5246            }
5247            if ($nl !== "\n") {
5248                $html = str_replace("\n", $nl, $html);
5249            }
5250        }
5251        return $html;
5252    }
5253
5254    /**
5255     * Generates HTML from a single token.
5256     * @param HTMLPurifier_Token $token HTMLPurifier_Token object.
5257     * @return string Generated HTML
5258     */
5259    public function generateFromToken($token)
5260    {
5261        if (!$token instanceof HTMLPurifier_Token) {
5262            trigger_error('Cannot generate HTML from non-HTMLPurifier_Token object', E_USER_WARNING);
5263            return '';
5264
5265        } elseif ($token instanceof HTMLPurifier_Token_Start) {
5266            $attr = $this->generateAttributes($token->attr, $token->name);
5267            if ($this->_flashCompat) {
5268                if ($token->name == "object") {
5269                    $flash = new stdClass();
5270                    $flash->attr = $token->attr;
5271                    $flash->param = array();
5272                    $this->_flashStack[] = $flash;
5273                }
5274            }
5275            return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
5276
5277        } elseif ($token instanceof HTMLPurifier_Token_End) {
5278            $_extra = '';
5279            if ($this->_flashCompat) {
5280                if ($token->name == "object" && !empty($this->_flashStack)) {
5281                    // doesn't do anything for now
5282                }
5283            }
5284            return $_extra . '</' . $token->name . '>';
5285
5286        } elseif ($token instanceof HTMLPurifier_Token_Empty) {
5287            if ($this->_flashCompat && $token->name == "param" && !empty($this->_flashStack)) {
5288                $this->_flashStack[count($this->_flashStack)-1]->param[$token->attr['name']] = $token->attr['value'];
5289            }
5290            $attr = $this->generateAttributes($token->attr, $token->name);
5291            return '<' . $token->name . ($attr ? ' ' : '') . $attr .
5292                ( $this->_xhtml ? ' /': '' ) // <br /> v. <br>
5293                . '>';
5294
5295        } elseif ($token instanceof HTMLPurifier_Token_Text) {
5296            return $this->escape($token->data, ENT_NOQUOTES);
5297
5298        } elseif ($token instanceof HTMLPurifier_Token_Comment) {
5299            return '<!--' . $token->data . '-->';
5300        } else {
5301            return '';
5302
5303        }
5304    }
5305
5306    /**
5307     * Special case processor for the contents of script tags
5308     * @param HTMLPurifier_Token $token HTMLPurifier_Token object.
5309     * @return string
5310     * @warning This runs into problems if there's already a literal
5311     *          --> somewhere inside the script contents.
5312     */
5313    public function generateScriptFromToken($token)
5314    {
5315        if (!$token instanceof HTMLPurifier_Token_Text) {
5316            return $this->generateFromToken($token);
5317        }
5318        // Thanks <http://lachy.id.au/log/2005/05/script-comments>
5319        $data = preg_replace('#//\s*$#', '', $token->data);
5320        return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>';
5321    }
5322
5323    /**
5324     * Generates attribute declarations from attribute array.
5325     * @note This does not include the leading or trailing space.
5326     * @param array $assoc_array_of_attributes Attribute array
5327     * @param string $element Name of element attributes are for, used to check
5328     *        attribute minimization.
5329     * @return string Generated HTML fragment for insertion.
5330     */
5331    public function generateAttributes($assoc_array_of_attributes, $element = '')
5332    {
5333        $html = '';
5334        if ($this->_sortAttr) {
5335            ksort($assoc_array_of_attributes);
5336        }
5337        foreach ($assoc_array_of_attributes as $key => $value) {
5338            if (!$this->_xhtml) {
5339                // Remove namespaced attributes
5340                if (strpos($key, ':') !== false) {
5341                    continue;
5342                }
5343                // Check if we should minimize the attribute: val="val" -> val
5344                if ($element && !empty($this->_def->info[$element]->attr[$key]->minimized)) {
5345                    $html .= $key . ' ';
5346                    continue;
5347                }
5348            }
5349            // Workaround for Internet Explorer innerHTML bug.
5350            // Essentially, Internet Explorer, when calculating
5351            // innerHTML, omits quotes if there are no instances of
5352            // angled brackets, quotes or spaces.  However, when parsing
5353            // HTML (for example, when you assign to innerHTML), it
5354            // treats backticks as quotes.  Thus,
5355            //      <img alt="``" />
5356            // becomes
5357            //      <img alt=`` />
5358            // becomes
5359            //      <img alt='' />
5360            // Fortunately, all we need to do is trigger an appropriate
5361            // quoting style, which we do by adding an extra space.
5362            // This also is consistent with the W3C spec, which states
5363            // that user agents may ignore leading or trailing
5364            // whitespace (in fact, most don't, at least for attributes
5365            // like alt, but an extra space at the end is barely
5366            // noticeable).  Still, we have a configuration knob for
5367            // this, since this transformation is not necesary if you
5368            // don't process user input with innerHTML or you don't plan
5369            // on supporting Internet Explorer.
5370            if ($this->_innerHTMLFix) {
5371                if (strpos($value, '`') !== false) {
5372                    // check if correct quoting style would not already be
5373                    // triggered
5374                    if (strcspn($value, '"\' <>') === strlen($value)) {
5375                        // protect!
5376                        $value .= ' ';
5377                    }
5378                }
5379            }
5380            $html .= $key.'="'.$this->escape($value).'" ';
5381        }
5382        return rtrim($html);
5383    }
5384
5385    /**
5386     * Escapes raw text data.
5387     * @todo This really ought to be protected, but until we have a facility
5388     *       for properly generating HTML here w/o using tokens, it stays
5389     *       public.
5390     * @param string $string String data to escape for HTML.
5391     * @param int $quote Quoting style, like htmlspecialchars. ENT_NOQUOTES is
5392     *               permissible for non-attribute output.
5393     * @return string escaped data.
5394     */
5395    public function escape($string, $quote = null)
5396    {
5397        // Workaround for APC bug on Mac Leopard reported by sidepodcast
5398        // http://htmlpurifier.org/phorum/read.php?3,4823,4846
5399        if ($quote === null) {
5400            $quote = ENT_COMPAT;
5401        }
5402        return htmlspecialchars($string, $quote, 'UTF-8');
5403    }
5404}
5405
5406
5407
5408
5409
5410/**
5411 * Definition of the purified HTML that describes allowed children,
5412 * attributes, and many other things.
5413 *
5414 * Conventions:
5415 *
5416 * All member variables that are prefixed with info
5417 * (including the main $info array) are used by HTML Purifier internals
5418 * and should not be directly edited when customizing the HTMLDefinition.
5419 * They can usually be set via configuration directives or custom
5420 * modules.
5421 *
5422 * On the other hand, member variables without the info prefix are used
5423 * internally by the HTMLDefinition and MUST NOT be used by other HTML
5424 * Purifier internals. Many of them, however, are public, and may be
5425 * edited by userspace code to tweak the behavior of HTMLDefinition.
5426 *
5427 * @note This class is inspected by Printer_HTMLDefinition; please
5428 *       update that class if things here change.
5429 *
5430 * @warning Directives that change this object's structure must be in
5431 *          the HTML or Attr namespace!
5432 */
5433class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
5434{
5435
5436    // FULLY-PUBLIC VARIABLES ---------------------------------------------
5437
5438    /**
5439     * Associative array of element names to HTMLPurifier_ElementDef.
5440     * @type HTMLPurifier_ElementDef[]
5441     */
5442    public $info = array();
5443
5444    /**
5445     * Associative array of global attribute name to attribute definition.
5446     * @type array
5447     */
5448    public $info_global_attr = array();
5449
5450    /**
5451     * String name of parent element HTML will be going into.
5452     * @type string
5453     */
5454    public $info_parent = 'div';
5455
5456    /**
5457     * Definition for parent element, allows parent element to be a
5458     * tag that's not allowed inside the HTML fragment.
5459     * @type HTMLPurifier_ElementDef
5460     */
5461    public $info_parent_def;
5462
5463    /**
5464     * String name of element used to wrap inline elements in block context.
5465     * @type string
5466     * @note This is rarely used except for BLOCKQUOTEs in strict mode
5467     */
5468    public $info_block_wrapper = 'p';
5469
5470    /**
5471     * Associative array of deprecated tag name to HTMLPurifier_TagTransform.
5472     * @type array
5473     */
5474    public $info_tag_transform = array();
5475
5476    /**
5477     * Indexed list of HTMLPurifier_AttrTransform to be performed before validation.
5478     * @type HTMLPurifier_AttrTransform[]
5479     */
5480    public $info_attr_transform_pre = array();
5481
5482    /**
5483     * Indexed list of HTMLPurifier_AttrTransform to be performed after validation.
5484     * @type HTMLPurifier_AttrTransform[]
5485     */
5486    public $info_attr_transform_post = array();
5487
5488    /**
5489     * Nested lookup array of content set name (Block, Inline) to
5490     * element name to whether or not it belongs in that content set.
5491     * @type array
5492     */
5493    public $info_content_sets = array();
5494
5495    /**
5496     * Indexed list of HTMLPurifier_Injector to be used.
5497     * @type HTMLPurifier_Injector[]
5498     */
5499    public $info_injector = array();
5500
5501    /**
5502     * Doctype object
5503     * @type HTMLPurifier_Doctype
5504     */
5505    public $doctype;
5506
5507
5508
5509    // RAW CUSTOMIZATION STUFF --------------------------------------------
5510
5511    /**
5512     * Adds a custom attribute to a pre-existing element
5513     * @note This is strictly convenience, and does not have a corresponding
5514     *       method in HTMLPurifier_HTMLModule
5515     * @param string $element_name Element name to add attribute to
5516     * @param string $attr_name Name of attribute
5517     * @param mixed $def Attribute definition, can be string or object, see
5518     *             HTMLPurifier_AttrTypes for details
5519     */
5520    public function addAttribute($element_name, $attr_name, $def)
5521    {
5522        $module = $this->getAnonymousModule();
5523        if (!isset($module->info[$element_name])) {
5524            $element = $module->addBlankElement($element_name);
5525        } else {
5526            $element = $module->info[$element_name];
5527        }
5528        $element->attr[$attr_name] = $def;
5529    }
5530
5531    /**
5532     * Adds a custom element to your HTML definition
5533     * @see HTMLPurifier_HTMLModule::addElement() for detailed
5534     *       parameter and return value descriptions.
5535     */
5536    public function addElement($element_name, $type, $contents, $attr_collections, $attributes = array())
5537    {
5538        $module = $this->getAnonymousModule();
5539        // assume that if the user is calling this, the element
5540        // is safe. This may not be a good idea
5541        $element = $module->addElement($element_name, $type, $contents, $attr_collections, $attributes);
5542        return $element;
5543    }
5544
5545    /**
5546     * Adds a blank element to your HTML definition, for overriding
5547     * existing behavior
5548     * @param string $element_name
5549     * @return HTMLPurifier_ElementDef
5550     * @see HTMLPurifier_HTMLModule::addBlankElement() for detailed
5551     *       parameter and return value descriptions.
5552     */
5553    public function addBlankElement($element_name)
5554    {
5555        $module  = $this->getAnonymousModule();
5556        $element = $module->addBlankElement($element_name);
5557        return $element;
5558    }
5559
5560    /**
5561     * Retrieves a reference to the anonymous module, so you can
5562     * bust out advanced features without having to make your own
5563     * module.
5564     * @return HTMLPurifier_HTMLModule
5565     */
5566    public function getAnonymousModule()
5567    {
5568        if (!$this->_anonModule) {
5569            $this->_anonModule = new HTMLPurifier_HTMLModule();
5570            $this->_anonModule->name = 'Anonymous';
5571        }
5572        return $this->_anonModule;
5573    }
5574
5575    private $_anonModule = null;
5576
5577    // PUBLIC BUT INTERNAL VARIABLES --------------------------------------
5578
5579    /**
5580     * @type string
5581     */
5582    public $type = 'HTML';
5583
5584    /**
5585     * @type HTMLPurifier_HTMLModuleManager
5586     */
5587    public $manager;
5588
5589    /**
5590     * Performs low-cost, preliminary initialization.
5591     */
5592    public function __construct()
5593    {
5594        $this->manager = new HTMLPurifier_HTMLModuleManager();
5595    }
5596
5597    /**
5598     * @param HTMLPurifier_Config $config
5599     */
5600    protected function doSetup($config)
5601    {
5602        $this->processModules($config);
5603        $this->setupConfigStuff($config);
5604        unset($this->manager);
5605
5606        // cleanup some of the element definitions
5607        foreach ($this->info as $k => $v) {
5608            unset($this->info[$k]->content_model);
5609            unset($this->info[$k]->content_model_type);
5610        }
5611    }
5612
5613    /**
5614     * Extract out the information from the manager
5615     * @param HTMLPurifier_Config $config
5616     */
5617    protected function processModules($config)
5618    {
5619        if ($this->_anonModule) {
5620            // for user specific changes
5621            // this is late-loaded so we don't have to deal with PHP4
5622            // reference wonky-ness
5623            $this->manager->addModule($this->_anonModule);
5624            unset($this->_anonModule);
5625        }
5626
5627        $this->manager->setup($config);
5628        $this->doctype = $this->manager->doctype;
5629
5630        foreach ($this->manager->modules as $module) {
5631            foreach ($module->info_tag_transform as $k => $v) {
5632                if ($v === false) {
5633                    unset($this->info_tag_transform[$k]);
5634                } else {
5635                    $this->info_tag_transform[$k] = $v;
5636                }
5637            }
5638            foreach ($module->info_attr_transform_pre as $k => $v) {
5639                if ($v === false) {
5640                    unset($this->info_attr_transform_pre[$k]);
5641                } else {
5642                    $this->info_attr_transform_pre[$k] = $v;
5643                }
5644            }
5645            foreach ($module->info_attr_transform_post as $k => $v) {
5646                if ($v === false) {
5647                    unset($this->info_attr_transform_post[$k]);
5648                } else {
5649                    $this->info_attr_transform_post[$k] = $v;
5650                }
5651            }
5652            foreach ($module->info_injector as $k => $v) {
5653                if ($v === false) {
5654                    unset($this->info_injector[$k]);
5655                } else {
5656                    $this->info_injector[$k] = $v;
5657                }
5658            }
5659        }
5660        $this->info = $this->manager->getElements();
5661        $this->info_content_sets = $this->manager->contentSets->lookup;
5662    }
5663
5664    /**
5665     * Sets up stuff based on config. We need a better way of doing this.
5666     * @param HTMLPurifier_Config $config
5667     */
5668    protected function setupConfigStuff($config)
5669    {
5670        $block_wrapper = $config->get('HTML.BlockWrapper');
5671        if (isset($this->info_content_sets['Block'][$block_wrapper])) {
5672            $this->info_block_wrapper = $block_wrapper;
5673        } else {
5674            trigger_error(
5675                'Cannot use non-block element as block wrapper',
5676                E_USER_ERROR
5677            );
5678        }
5679
5680        $parent = $config->get('HTML.Parent');
5681        $def = $this->manager->getElement($parent, true);
5682        if ($def) {
5683            $this->info_parent = $parent;
5684            $this->info_parent_def = $def;
5685        } else {
5686            trigger_error(
5687                'Cannot use unrecognized element as parent',
5688                E_USER_ERROR
5689            );
5690            $this->info_parent_def = $this->manager->getElement($this->info_parent, true);
5691        }
5692
5693        // support template text
5694        $support = "(for information on implementing this, see the support forums) ";
5695
5696        // setup allowed elements -----------------------------------------
5697
5698        $allowed_elements = $config->get('HTML.AllowedElements');
5699        $allowed_attributes = $config->get('HTML.AllowedAttributes'); // retrieve early
5700
5701        if (!is_array($allowed_elements) && !is_array($allowed_attributes)) {
5702            $allowed = $config->get('HTML.Allowed');
5703            if (is_string($allowed)) {
5704                list($allowed_elements, $allowed_attributes) = $this->parseTinyMCEAllowedList($allowed);
5705            }
5706        }
5707
5708        if (is_array($allowed_elements)) {
5709            foreach ($this->info as $name => $d) {
5710                if (!isset($allowed_elements[$name])) {
5711                    unset($this->info[$name]);
5712                }
5713                unset($allowed_elements[$name]);
5714            }
5715            // emit errors
5716            foreach ($allowed_elements as $element => $d) {
5717                $element = htmlspecialchars($element); // PHP doesn't escape errors, be careful!
5718                trigger_error("Element '$element' is not supported $support", E_USER_WARNING);
5719            }
5720        }
5721
5722        // setup allowed attributes ---------------------------------------
5723
5724        $allowed_attributes_mutable = $allowed_attributes; // by copy!
5725        if (is_array($allowed_attributes)) {
5726            // This actually doesn't do anything, since we went away from
5727            // global attributes. It's possible that userland code uses
5728            // it, but HTMLModuleManager doesn't!
5729            foreach ($this->info_global_attr as $attr => $x) {
5730                $keys = array($attr, "*@$attr", "*.$attr");
5731                $delete = true;
5732                foreach ($keys as $key) {
5733                    if ($delete && isset($allowed_attributes[$key])) {
5734                        $delete = false;
5735                    }
5736                    if (isset($allowed_attributes_mutable[$key])) {
5737                        unset($allowed_attributes_mutable[$key]);
5738                    }
5739                }
5740                if ($delete) {
5741                    unset($this->info_global_attr[$attr]);
5742                }
5743            }
5744
5745            foreach ($this->info as $tag => $info) {
5746                foreach ($info->attr as $attr => $x) {
5747                    $keys = array("$tag@$attr", $attr, "*@$attr", "$tag.$attr", "*.$attr");
5748                    $delete = true;
5749                    foreach ($keys as $key) {
5750                        if ($delete && isset($allowed_attributes[$key])) {
5751                            $delete = false;
5752                        }
5753                        if (isset($allowed_attributes_mutable[$key])) {
5754                            unset($allowed_attributes_mutable[$key]);
5755                        }
5756                    }
5757                    if ($delete) {
5758                        if ($this->info[$tag]->attr[$attr]->required) {
5759                            trigger_error(
5760                                "Required attribute '$attr' in element '$tag' " .
5761                                "was not allowed, which means '$tag' will not be allowed either",
5762                                E_USER_WARNING
5763                            );
5764                        }
5765                        unset($this->info[$tag]->attr[$attr]);
5766                    }
5767                }
5768            }
5769            // emit errors
5770            foreach ($allowed_attributes_mutable as $elattr => $d) {
5771                $bits = preg_split('/[.@]/', $elattr, 2);
5772                $c = count($bits);
5773                switch ($c) {
5774                    case 2:
5775                        if ($bits[0] !== '*') {
5776                            $element = htmlspecialchars($bits[0]);
5777                            $attribute = htmlspecialchars($bits[1]);
5778                            if (!isset($this->info[$element])) {
5779                                trigger_error(
5780                                    "Cannot allow attribute '$attribute' if element " .
5781                                    "'$element' is not allowed/supported $support"
5782                                );
5783                            } else {
5784                                trigger_error(
5785                                    "Attribute '$attribute' in element '$element' not supported $support",
5786                                    E_USER_WARNING
5787                                );
5788                            }
5789                            break;
5790                        }
5791                    // otherwise fall through
5792                    case 1:
5793                        $attribute = htmlspecialchars($bits[0]);
5794                        trigger_error(
5795                            "Global attribute '$attribute' is not ".
5796                            "supported in any elements $support",
5797                            E_USER_WARNING
5798                        );
5799                        break;
5800                }
5801            }
5802        }
5803
5804        // setup forbidden elements ---------------------------------------
5805
5806        $forbidden_elements   = $config->get('HTML.ForbiddenElements');
5807        $forbidden_attributes = $config->get('HTML.ForbiddenAttributes');
5808
5809        foreach ($this->info as $tag => $info) {
5810            if (isset($forbidden_elements[$tag])) {
5811                unset($this->info[$tag]);
5812                continue;
5813            }
5814            foreach ($info->attr as $attr => $x) {
5815                if (isset($forbidden_attributes["$tag@$attr"]) ||
5816                    isset($forbidden_attributes["*@$attr"]) ||
5817                    isset($forbidden_attributes[$attr])
5818                ) {
5819                    unset($this->info[$tag]->attr[$attr]);
5820                    continue;
5821                } elseif (isset($forbidden_attributes["$tag.$attr"])) { // this segment might get removed eventually
5822                    // $tag.$attr are not user supplied, so no worries!
5823                    trigger_error(
5824                        "Error with $tag.$attr: tag.attr syntax not supported for " .
5825                        "HTML.ForbiddenAttributes; use tag@attr instead",
5826                        E_USER_WARNING
5827                    );
5828                }
5829            }
5830        }
5831        foreach ($forbidden_attributes as $key => $v) {
5832            if (strlen($key) < 2) {
5833                continue;
5834            }
5835            if ($key[0] != '*') {
5836                continue;
5837            }
5838            if ($key[1] == '.') {
5839                trigger_error(
5840                    "Error with $key: *.attr syntax not supported for HTML.ForbiddenAttributes; use attr instead",
5841                    E_USER_WARNING
5842                );
5843            }
5844        }
5845
5846        // setup injectors -----------------------------------------------------
5847        foreach ($this->info_injector as $i => $injector) {
5848            if ($injector->checkNeeded($config) !== false) {
5849                // remove injector that does not have it's required
5850                // elements/attributes present, and is thus not needed.
5851                unset($this->info_injector[$i]);
5852            }
5853        }
5854    }
5855
5856    /**
5857     * Parses a TinyMCE-flavored Allowed Elements and Attributes list into
5858     * separate lists for processing. Format is element[attr1|attr2],element2...
5859     * @warning Although it's largely drawn from TinyMCE's implementation,
5860     *      it is different, and you'll probably have to modify your lists
5861     * @param array $list String list to parse
5862     * @return array
5863     * @todo Give this its own class, probably static interface
5864     */
5865    public function parseTinyMCEAllowedList($list)
5866    {
5867        $list = str_replace(array(' ', "\t"), '', $list);
5868
5869        $elements = array();
5870        $attributes = array();
5871
5872        $chunks = preg_split('/(,|[\n\r]+)/', $list);
5873        foreach ($chunks as $chunk) {
5874            if (empty($chunk)) {
5875                continue;
5876            }
5877            // remove TinyMCE element control characters
5878            if (!strpos($chunk, '[')) {
5879                $element = $chunk;
5880                $attr = false;
5881            } else {
5882                list($element, $attr) = explode('[', $chunk);
5883            }
5884            if ($element !== '*') {
5885                $elements[$element] = true;
5886            }
5887            if (!$attr) {
5888                continue;
5889            }
5890            $attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ]
5891            $attr = explode('|', $attr);
5892            foreach ($attr as $key) {
5893                $attributes["$element.$key"] = true;
5894            }
5895        }
5896        return array($elements, $attributes);
5897    }
5898}
5899
5900
5901
5902
5903
5904/**
5905 * Represents an XHTML 1.1 module, with information on elements, tags
5906 * and attributes.
5907 * @note Even though this is technically XHTML 1.1, it is also used for
5908 *       regular HTML parsing. We are using modulization as a convenient
5909 *       way to represent the internals of HTMLDefinition, and our
5910 *       implementation is by no means conforming and does not directly
5911 *       use the normative DTDs or XML schemas.
5912 * @note The public variables in a module should almost directly
5913 *       correspond to the variables in HTMLPurifier_HTMLDefinition.
5914 *       However, the prefix info carries no special meaning in these
5915 *       objects (include it anyway if that's the correspondence though).
5916 * @todo Consider making some member functions protected
5917 */
5918
5919class HTMLPurifier_HTMLModule
5920{
5921
5922    // -- Overloadable ----------------------------------------------------
5923
5924    /**
5925     * Short unique string identifier of the module.
5926     * @type string
5927     */
5928    public $name;
5929
5930    /**
5931     * Informally, a list of elements this module changes.
5932     * Not used in any significant way.
5933     * @type array
5934     */
5935    public $elements = array();
5936
5937    /**
5938     * Associative array of element names to element definitions.
5939     * Some definitions may be incomplete, to be merged in later
5940     * with the full definition.
5941     * @type array
5942     */
5943    public $info = array();
5944
5945    /**
5946     * Associative array of content set names to content set additions.
5947     * This is commonly used to, say, add an A element to the Inline
5948     * content set. This corresponds to an internal variable $content_sets
5949     * and NOT info_content_sets member variable of HTMLDefinition.
5950     * @type array
5951     */
5952    public $content_sets = array();
5953
5954    /**
5955     * Associative array of attribute collection names to attribute
5956     * collection additions. More rarely used for adding attributes to
5957     * the global collections. Example is the StyleAttribute module adding
5958     * the style attribute to the Core. Corresponds to HTMLDefinition's
5959     * attr_collections->info, since the object's data is only info,
5960     * with extra behavior associated with it.
5961     * @type array
5962     */
5963    public $attr_collections = array();
5964
5965    /**
5966     * Associative array of deprecated tag name to HTMLPurifier_TagTransform.
5967     * @type array
5968     */
5969    public $info_tag_transform = array();
5970
5971    /**
5972     * List of HTMLPurifier_AttrTransform to be performed before validation.
5973     * @type array
5974     */
5975    public $info_attr_transform_pre = array();
5976
5977    /**
5978     * List of HTMLPurifier_AttrTransform to be performed after validation.
5979     * @type array
5980     */
5981    public $info_attr_transform_post = array();
5982
5983    /**
5984     * List of HTMLPurifier_Injector to be performed during well-formedness fixing.
5985     * An injector will only be invoked if all of it's pre-requisites are met;
5986     * if an injector fails setup, there will be no error; it will simply be
5987     * silently disabled.
5988     * @type array
5989     */
5990    public $info_injector = array();
5991
5992    /**
5993     * Boolean flag that indicates whether or not getChildDef is implemented.
5994     * For optimization reasons: may save a call to a function. Be sure
5995     * to set it if you do implement getChildDef(), otherwise it will have
5996     * no effect!
5997     * @type bool
5998     */
5999    public $defines_child_def = false;
6000
6001    /**
6002     * Boolean flag whether or not this module is safe. If it is not safe, all
6003     * of its members are unsafe. Modules are safe by default (this might be
6004     * slightly dangerous, but it doesn't make much sense to force HTML Purifier,
6005     * which is based off of safe HTML, to explicitly say, "This is safe," even
6006     * though there are modules which are "unsafe")
6007     *
6008     * @type bool
6009     * @note Previously, safety could be applied at an element level granularity.
6010     *       We've removed this ability, so in order to add "unsafe" elements
6011     *       or attributes, a dedicated module with this property set to false
6012     *       must be used.
6013     */
6014    public $safe = true;
6015
6016    /**
6017     * Retrieves a proper HTMLPurifier_ChildDef subclass based on
6018     * content_model and content_model_type member variables of
6019     * the HTMLPurifier_ElementDef class. There is a similar function
6020     * in HTMLPurifier_HTMLDefinition.
6021     * @param HTMLPurifier_ElementDef $def
6022     * @return HTMLPurifier_ChildDef subclass
6023     */
6024    public function getChildDef($def)
6025    {
6026        return false;
6027    }
6028
6029    // -- Convenience -----------------------------------------------------
6030
6031    /**
6032     * Convenience function that sets up a new element
6033     * @param string $element Name of element to add
6034     * @param string|bool $type What content set should element be registered to?
6035     *              Set as false to skip this step.
6036     * @param string $contents Allowed children in form of:
6037     *              "$content_model_type: $content_model"
6038     * @param array $attr_includes What attribute collections to register to
6039     *              element?
6040     * @param array $attr What unique attributes does the element define?
6041     * @see HTMLPurifier_ElementDef:: for in-depth descriptions of these parameters.
6042     * @return HTMLPurifier_ElementDef Created element definition object, so you
6043     *         can set advanced parameters
6044     */
6045    public function addElement($element, $type, $contents, $attr_includes = array(), $attr = array())
6046    {
6047        $this->elements[] = $element;
6048        // parse content_model
6049        list($content_model_type, $content_model) = $this->parseContents($contents);
6050        // merge in attribute inclusions
6051        $this->mergeInAttrIncludes($attr, $attr_includes);
6052        // add element to content sets
6053        if ($type) {
6054            $this->addElementToContentSet($element, $type);
6055        }
6056        // create element
6057        $this->info[$element] = HTMLPurifier_ElementDef::create(
6058            $content_model,
6059            $content_model_type,
6060            $attr
6061        );
6062        // literal object $contents means direct child manipulation
6063        if (!is_string($contents)) {
6064            $this->info[$element]->child = $contents;
6065        }
6066        return $this->info[$element];
6067    }
6068
6069    /**
6070     * Convenience function that creates a totally blank, non-standalone
6071     * element.
6072     * @param string $element Name of element to create
6073     * @return HTMLPurifier_ElementDef Created element
6074     */
6075    public function addBlankElement($element)
6076    {
6077        if (!isset($this->info[$element])) {
6078            $this->elements[] = $element;
6079            $this->info[$element] = new HTMLPurifier_ElementDef();
6080            $this->info[$element]->standalone = false;
6081        } else {
6082            trigger_error("Definition for $element already exists in module, cannot redefine");
6083        }
6084        return $this->info[$element];
6085    }
6086
6087    /**
6088     * Convenience function that registers an element to a content set
6089     * @param string $element Element to register
6090     * @param string $type Name content set (warning: case sensitive, usually upper-case
6091     *        first letter)
6092     */
6093    public function addElementToContentSet($element, $type)
6094    {
6095        if (!isset($this->content_sets[$type])) {
6096            $this->content_sets[$type] = '';
6097        } else {
6098            $this->content_sets[$type] .= ' | ';
6099        }
6100        $this->content_sets[$type] .= $element;
6101    }
6102
6103    /**
6104     * Convenience function that transforms single-string contents
6105     * into separate content model and content model type
6106     * @param string $contents Allowed children in form of:
6107     *                  "$content_model_type: $content_model"
6108     * @return array
6109     * @note If contents is an object, an array of two nulls will be
6110     *       returned, and the callee needs to take the original $contents
6111     *       and use it directly.
6112     */
6113    public function parseContents($contents)
6114    {
6115        if (!is_string($contents)) {
6116            return array(null, null);
6117        } // defer
6118        switch ($contents) {
6119            // check for shorthand content model forms
6120            case 'Empty':
6121                return array('empty', '');
6122            case 'Inline':
6123                return array('optional', 'Inline | #PCDATA');
6124            case 'Flow':
6125                return array('optional', 'Flow | #PCDATA');
6126        }
6127        list($content_model_type, $content_model) = explode(':', $contents);
6128        $content_model_type = strtolower(trim($content_model_type));
6129        $content_model = trim($content_model);
6130        return array($content_model_type, $content_model);
6131    }
6132
6133    /**
6134     * Convenience function that merges a list of attribute includes into
6135     * an attribute array.
6136     * @param array $attr Reference to attr array to modify
6137     * @param array $attr_includes Array of includes / string include to merge in
6138     */
6139    public function mergeInAttrIncludes(&$attr, $attr_includes)
6140    {
6141        if (!is_array($attr_includes)) {
6142            if (empty($attr_includes)) {
6143                $attr_includes = array();
6144            } else {
6145                $attr_includes = array($attr_includes);
6146            }
6147        }
6148        $attr[0] = $attr_includes;
6149    }
6150
6151    /**
6152     * Convenience function that generates a lookup table with boolean
6153     * true as value.
6154     * @param string $list List of values to turn into a lookup
6155     * @note You can also pass an arbitrary number of arguments in
6156     *       place of the regular argument
6157     * @return array array equivalent of list
6158     */
6159    public function makeLookup($list)
6160    {
6161        if (is_string($list)) {
6162            $list = func_get_args();
6163        }
6164        $ret = array();
6165        foreach ($list as $value) {
6166            if (is_null($value)) {
6167                continue;
6168            }
6169            $ret[$value] = true;
6170        }
6171        return $ret;
6172    }
6173
6174    /**
6175     * Lazy load construction of the module after determining whether
6176     * or not it's needed, and also when a finalized configuration object
6177     * is available.
6178     * @param HTMLPurifier_Config $config
6179     */
6180    public function setup($config)
6181    {
6182    }
6183}
6184
6185
6186
6187
6188
6189class HTMLPurifier_HTMLModuleManager
6190{
6191
6192    /**
6193     * @type HTMLPurifier_DoctypeRegistry
6194     */
6195    public $doctypes;
6196
6197    /**
6198     * Instance of current doctype.
6199     * @type string
6200     */
6201    public $doctype;
6202
6203    /**
6204     * @type HTMLPurifier_AttrTypes
6205     */
6206    public $attrTypes;
6207
6208    /**
6209     * Active instances of modules for the specified doctype are
6210     * indexed, by name, in this array.
6211     * @type HTMLPurifier_HTMLModule[]
6212     */
6213    public $modules = array();
6214
6215    /**
6216     * Array of recognized HTMLPurifier_HTMLModule instances,
6217     * indexed by module's class name. This array is usually lazy loaded, but a
6218     * user can overload a module by pre-emptively registering it.
6219     * @type HTMLPurifier_HTMLModule[]
6220     */
6221    public $registeredModules = array();
6222
6223    /**
6224     * List of extra modules that were added by the user
6225     * using addModule(). These get unconditionally merged into the current doctype, whatever
6226     * it may be.
6227     * @type HTMLPurifier_HTMLModule[]
6228     */
6229    public $userModules = array();
6230
6231    /**
6232     * Associative array of element name to list of modules that have
6233     * definitions for the element; this array is dynamically filled.
6234     * @type array
6235     */
6236    public $elementLookup = array();
6237
6238    /**
6239     * List of prefixes we should use for registering small names.
6240     * @type array
6241     */
6242    public $prefixes = array('HTMLPurifier_HTMLModule_');
6243
6244    /**
6245     * @type HTMLPurifier_ContentSets
6246     */
6247    public $contentSets;
6248
6249    /**
6250     * @type HTMLPurifier_AttrCollections
6251     */
6252    public $attrCollections;
6253
6254    /**
6255     * If set to true, unsafe elements and attributes will be allowed.
6256     * @type bool
6257     */
6258    public $trusted = false;
6259
6260    public function __construct()
6261    {
6262        // editable internal objects
6263        $this->attrTypes = new HTMLPurifier_AttrTypes();
6264        $this->doctypes  = new HTMLPurifier_DoctypeRegistry();
6265
6266        // setup basic modules
6267        $common = array(
6268            'CommonAttributes', 'Text', 'Hypertext', 'List',
6269            'Presentation', 'Edit', 'Bdo', 'Tables', 'Image',
6270            'StyleAttribute',
6271            // Unsafe:
6272            'Scripting', 'Object', 'Forms',
6273            // Sorta legacy, but present in strict:
6274            'Name',
6275        );
6276        $transitional = array('Legacy', 'Target', 'Iframe');
6277        $xml = array('XMLCommonAttributes');
6278        $non_xml = array('NonXMLCommonAttributes');
6279
6280        // setup basic doctypes
6281        $this->doctypes->register(
6282            'HTML 4.01 Transitional',
6283            false,
6284            array_merge($common, $transitional, $non_xml),
6285            array('Tidy_Transitional', 'Tidy_Proprietary'),
6286            array(),
6287            '-//W3C//DTD HTML 4.01 Transitional//EN',
6288            'http://www.w3.org/TR/html4/loose.dtd'
6289        );
6290
6291        $this->doctypes->register(
6292            'HTML 4.01 Strict',
6293            false,
6294            array_merge($common, $non_xml),
6295            array('Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'),
6296            array(),
6297            '-//W3C//DTD HTML 4.01//EN',
6298            'http://www.w3.org/TR/html4/strict.dtd'
6299        );
6300
6301        $this->doctypes->register(
6302            'XHTML 1.0 Transitional',
6303            true,
6304            array_merge($common, $transitional, $xml, $non_xml),
6305            array('Tidy_Transitional', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Name'),
6306            array(),
6307            '-//W3C//DTD XHTML 1.0 Transitional//EN',
6308            'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'
6309        );
6310
6311        $this->doctypes->register(
6312            'XHTML 1.0 Strict',
6313            true,
6314            array_merge($common, $xml, $non_xml),
6315            array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'),
6316            array(),
6317            '-//W3C//DTD XHTML 1.0 Strict//EN',
6318            'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'
6319        );
6320
6321        $this->doctypes->register(
6322            'XHTML 1.1',
6323            true,
6324            // Iframe is a real XHTML 1.1 module, despite being
6325            // "transitional"!
6326            array_merge($common, $xml, array('Ruby', 'Iframe')),
6327            array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict', 'Tidy_Name'), // Tidy_XHTML1_1
6328            array(),
6329            '-//W3C//DTD XHTML 1.1//EN',
6330            'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd'
6331        );
6332
6333    }
6334
6335    /**
6336     * Registers a module to the recognized module list, useful for
6337     * overloading pre-existing modules.
6338     * @param $module Mixed: string module name, with or without
6339     *                HTMLPurifier_HTMLModule prefix, or instance of
6340     *                subclass of HTMLPurifier_HTMLModule.
6341     * @param $overload Boolean whether or not to overload previous modules.
6342     *                  If this is not set, and you do overload a module,
6343     *                  HTML Purifier will complain with a warning.
6344     * @note This function will not call autoload, you must instantiate
6345     *       (and thus invoke) autoload outside the method.
6346     * @note If a string is passed as a module name, different variants
6347     *       will be tested in this order:
6348     *          - Check for HTMLPurifier_HTMLModule_$name
6349     *          - Check all prefixes with $name in order they were added
6350     *          - Check for literal object name
6351     *          - Throw fatal error
6352     *       If your object name collides with an internal class, specify
6353     *       your module manually. All modules must have been included
6354     *       externally: registerModule will not perform inclusions for you!
6355     */
6356    public function registerModule($module, $overload = false)
6357    {
6358        if (is_string($module)) {
6359            // attempt to load the module
6360            $original_module = $module;
6361            $ok = false;
6362            foreach ($this->prefixes as $prefix) {
6363                $module = $prefix . $original_module;
6364                if (class_exists($module)) {
6365                    $ok = true;
6366                    break;
6367                }
6368            }
6369            if (!$ok) {
6370                $module = $original_module;
6371                if (!class_exists($module)) {
6372                    trigger_error(
6373                        $original_module . ' module does not exist',
6374                        E_USER_ERROR
6375                    );
6376                    return;
6377                }
6378            }
6379            $module = new $module();
6380        }
6381        if (empty($module->name)) {
6382            trigger_error('Module instance of ' . get_class($module) . ' must have name');
6383            return;
6384        }
6385        if (!$overload && isset($this->registeredModules[$module->name])) {
6386            trigger_error('Overloading ' . $module->name . ' without explicit overload parameter', E_USER_WARNING);
6387        }
6388        $this->registeredModules[$module->name] = $module;
6389    }
6390
6391    /**
6392     * Adds a module to the current doctype by first registering it,
6393     * and then tacking it on to the active doctype
6394     */
6395    public function addModule($module)
6396    {
6397        $this->registerModule($module);
6398        if (is_object($module)) {
6399            $module = $module->name;
6400        }
6401        $this->userModules[] = $module;
6402    }
6403
6404    /**
6405     * Adds a class prefix that registerModule() will use to resolve a
6406     * string name to a concrete class
6407     */
6408    public function addPrefix($prefix)
6409    {
6410        $this->prefixes[] = $prefix;
6411    }
6412
6413    /**
6414     * Performs processing on modules, after being called you may
6415     * use getElement() and getElements()
6416     * @param HTMLPurifier_Config $config
6417     */
6418    public function setup($config)
6419    {
6420        $this->trusted = $config->get('HTML.Trusted');
6421
6422        // generate
6423        $this->doctype = $this->doctypes->make($config);
6424        $modules = $this->doctype->modules;
6425
6426        // take out the default modules that aren't allowed
6427        $lookup = $config->get('HTML.AllowedModules');
6428        $special_cases = $config->get('HTML.CoreModules');
6429
6430        if (is_array($lookup)) {
6431            foreach ($modules as $k => $m) {
6432                if (isset($special_cases[$m])) {
6433                    continue;
6434                }
6435                if (!isset($lookup[$m])) {
6436                    unset($modules[$k]);
6437                }
6438            }
6439        }
6440
6441        // custom modules
6442        if ($config->get('HTML.Proprietary')) {
6443            $modules[] = 'Proprietary';
6444        }
6445        if ($config->get('HTML.SafeObject')) {
6446            $modules[] = 'SafeObject';
6447        }
6448        if ($config->get('HTML.SafeEmbed')) {
6449            $modules[] = 'SafeEmbed';
6450        }
6451        if ($config->get('HTML.SafeScripting') !== array()) {
6452            $modules[] = 'SafeScripting';
6453        }
6454        if ($config->get('HTML.Nofollow')) {
6455            $modules[] = 'Nofollow';
6456        }
6457        if ($config->get('HTML.TargetBlank')) {
6458            $modules[] = 'TargetBlank';
6459        }
6460        // NB: HTML.TargetNoreferrer and HTML.TargetNoopener must be AFTER HTML.TargetBlank
6461        // so that its post-attr-transform gets run afterwards.
6462        if ($config->get('HTML.TargetNoreferrer')) {
6463            $modules[] = 'TargetNoreferrer';
6464        }
6465        if ($config->get('HTML.TargetNoopener')) {
6466            $modules[] = 'TargetNoopener';
6467        }
6468
6469        // merge in custom modules
6470        $modules = array_merge($modules, $this->userModules);
6471
6472        foreach ($modules as $module) {
6473            $this->processModule($module);
6474            $this->modules[$module]->setup($config);
6475        }
6476
6477        foreach ($this->doctype->tidyModules as $module) {
6478            $this->processModule($module);
6479            $this->modules[$module]->setup($config);
6480        }
6481
6482        // prepare any injectors
6483        foreach ($this->modules as $module) {
6484            $n = array();
6485            foreach ($module->info_injector as $injector) {
6486                if (!is_object($injector)) {
6487                    $class = "HTMLPurifier_Injector_$injector";
6488                    $injector = new $class;
6489                }
6490                $n[$injector->name] = $injector;
6491            }
6492            $module->info_injector = $n;
6493        }
6494
6495        // setup lookup table based on all valid modules
6496        foreach ($this->modules as $module) {
6497            foreach ($module->info as $name => $def) {
6498                if (!isset($this->elementLookup[$name])) {
6499                    $this->elementLookup[$name] = array();
6500                }
6501                $this->elementLookup[$name][] = $module->name;
6502            }
6503        }
6504
6505        // note the different choice
6506        $this->contentSets = new HTMLPurifier_ContentSets(
6507        // content set assembly deals with all possible modules,
6508        // not just ones deemed to be "safe"
6509            $this->modules
6510        );
6511        $this->attrCollections = new HTMLPurifier_AttrCollections(
6512            $this->attrTypes,
6513            // there is no way to directly disable a global attribute,
6514            // but using AllowedAttributes or simply not including
6515            // the module in your custom doctype should be sufficient
6516            $this->modules
6517        );
6518    }
6519
6520    /**
6521     * Takes a module and adds it to the active module collection,
6522     * registering it if necessary.
6523     */
6524    public function processModule($module)
6525    {
6526        if (!isset($this->registeredModules[$module]) || is_object($module)) {
6527            $this->registerModule($module);
6528        }
6529        $this->modules[$module] = $this->registeredModules[$module];
6530    }
6531
6532    /**
6533     * Retrieves merged element definitions.
6534     * @return Array of HTMLPurifier_ElementDef
6535     */
6536    public function getElements()
6537    {
6538        $elements = array();
6539        foreach ($this->modules as $module) {
6540            if (!$this->trusted && !$module->safe) {
6541                continue;
6542            }
6543            foreach ($module->info as $name => $v) {
6544                if (isset($elements[$name])) {
6545                    continue;
6546                }
6547                $elements[$name] = $this->getElement($name);
6548            }
6549        }
6550
6551        // remove dud elements, this happens when an element that
6552        // appeared to be safe actually wasn't
6553        foreach ($elements as $n => $v) {
6554            if ($v === false) {
6555                unset($elements[$n]);
6556            }
6557        }
6558
6559        return $elements;
6560
6561    }
6562
6563    /**
6564     * Retrieves a single merged element definition
6565     * @param string $name Name of element
6566     * @param bool $trusted Boolean trusted overriding parameter: set to true
6567     *                 if you want the full version of an element
6568     * @return HTMLPurifier_ElementDef Merged HTMLPurifier_ElementDef
6569     * @note You may notice that modules are getting iterated over twice (once
6570     *       in getElements() and once here). This
6571     *       is because
6572     */
6573    public function getElement($name, $trusted = null)
6574    {
6575        if (!isset($this->elementLookup[$name])) {
6576            return false;
6577        }
6578
6579        // setup global state variables
6580        $def = false;
6581        if ($trusted === null) {
6582            $trusted = $this->trusted;
6583        }
6584
6585        // iterate through each module that has registered itself to this
6586        // element
6587        foreach ($this->elementLookup[$name] as $module_name) {
6588            $module = $this->modules[$module_name];
6589
6590            // refuse to create/merge from a module that is deemed unsafe--
6591            // pretend the module doesn't exist--when trusted mode is not on.
6592            if (!$trusted && !$module->safe) {
6593                continue;
6594            }
6595
6596            // clone is used because, ideally speaking, the original
6597            // definition should not be modified. Usually, this will
6598            // make no difference, but for consistency's sake
6599            $new_def = clone $module->info[$name];
6600
6601            if (!$def && $new_def->standalone) {
6602                $def = $new_def;
6603            } elseif ($def) {
6604                // This will occur even if $new_def is standalone. In practice,
6605                // this will usually result in a full replacement.
6606                $def->mergeIn($new_def);
6607            } else {
6608                // :TODO:
6609                // non-standalone definitions that don't have a standalone
6610                // to merge into could be deferred to the end
6611                // HOWEVER, it is perfectly valid for a non-standalone
6612                // definition to lack a standalone definition, even
6613                // after all processing: this allows us to safely
6614                // specify extra attributes for elements that may not be
6615                // enabled all in one place.  In particular, this might
6616                // be the case for trusted elements.  WARNING: care must
6617                // be taken that the /extra/ definitions are all safe.
6618                continue;
6619            }
6620
6621            // attribute value expansions
6622            $this->attrCollections->performInclusions($def->attr);
6623            $this->attrCollections->expandIdentifiers($def->attr, $this->attrTypes);
6624
6625            // descendants_are_inline, for ChildDef_Chameleon
6626            if (is_string($def->content_model) &&
6627                strpos($def->content_model, 'Inline') !== false) {
6628                if ($name != 'del' && $name != 'ins') {
6629                    // this is for you, ins/del
6630                    $def->descendants_are_inline = true;
6631                }
6632            }
6633
6634            $this->contentSets->generateChildDef($def, $module);
6635        }
6636
6637        // This can occur if there is a blank definition, but no base to
6638        // mix it in with
6639        if (!$def) {
6640            return false;
6641        }
6642
6643        // add information on required attributes
6644        foreach ($def->attr as $attr_name => $attr_def) {
6645            if ($attr_def->required) {
6646                $def->required_attr[] = $attr_name;
6647            }
6648        }
6649        return $def;
6650    }
6651}
6652
6653
6654
6655
6656
6657/**
6658 * Component of HTMLPurifier_AttrContext that accumulates IDs to prevent dupes
6659 * @note In Slashdot-speak, dupe means duplicate.
6660 * @note The default constructor does not accept $config or $context objects:
6661 *       use must use the static build() factory method to perform initialization.
6662 */
6663class HTMLPurifier_IDAccumulator
6664{
6665
6666    /**
6667     * Lookup table of IDs we've accumulated.
6668     * @public
6669     */
6670    public $ids = array();
6671
6672    /**
6673     * Builds an IDAccumulator, also initializing the default blacklist
6674     * @param HTMLPurifier_Config $config Instance of HTMLPurifier_Config
6675     * @param HTMLPurifier_Context $context Instance of HTMLPurifier_Context
6676     * @return HTMLPurifier_IDAccumulator Fully initialized HTMLPurifier_IDAccumulator
6677     */
6678    public static function build($config, $context)
6679    {
6680        $id_accumulator = new HTMLPurifier_IDAccumulator();
6681        $id_accumulator->load($config->get('Attr.IDBlacklist'));
6682        return $id_accumulator;
6683    }
6684
6685    /**
6686     * Add an ID to the lookup table.
6687     * @param string $id ID to be added.
6688     * @return bool status, true if success, false if there's a dupe
6689     */
6690    public function add($id)
6691    {
6692        if (isset($this->ids[$id])) {
6693            return false;
6694        }
6695        return $this->ids[$id] = true;
6696    }
6697
6698    /**
6699     * Load a list of IDs into the lookup table
6700     * @param $array_of_ids Array of IDs to load
6701     * @note This function doesn't care about duplicates
6702     */
6703    public function load($array_of_ids)
6704    {
6705        foreach ($array_of_ids as $id) {
6706            $this->ids[$id] = true;
6707        }
6708    }
6709}
6710
6711
6712
6713
6714
6715/**
6716 * Injects tokens into the document while parsing for well-formedness.
6717 * This enables "formatter-like" functionality such as auto-paragraphing,
6718 * smiley-ification and linkification to take place.
6719 *
6720 * A note on how handlers create changes; this is done by assigning a new
6721 * value to the $token reference. These values can take a variety of forms and
6722 * are best described HTMLPurifier_Strategy_MakeWellFormed->processToken()
6723 * documentation.
6724 *
6725 * @todo Allow injectors to request a re-run on their output. This
6726 *       would help if an operation is recursive.
6727 */
6728abstract class HTMLPurifier_Injector
6729{
6730
6731    /**
6732     * Advisory name of injector, this is for friendly error messages.
6733     * @type string
6734     */
6735    public $name;
6736
6737    /**
6738     * @type HTMLPurifier_HTMLDefinition
6739     */
6740    protected $htmlDefinition;
6741
6742    /**
6743     * Reference to CurrentNesting variable in Context. This is an array
6744     * list of tokens that we are currently "inside"
6745     * @type array
6746     */
6747    protected $currentNesting;
6748
6749    /**
6750     * Reference to current token.
6751     * @type HTMLPurifier_Token
6752     */
6753    protected $currentToken;
6754
6755    /**
6756     * Reference to InputZipper variable in Context.
6757     * @type HTMLPurifier_Zipper
6758     */
6759    protected $inputZipper;
6760
6761    /**
6762     * Array of elements and attributes this injector creates and therefore
6763     * need to be allowed by the definition. Takes form of
6764     * array('element' => array('attr', 'attr2'), 'element2')
6765     * @type array
6766     */
6767    public $needed = array();
6768
6769    /**
6770     * Number of elements to rewind backwards (relative).
6771     * @type bool|int
6772     */
6773    protected $rewindOffset = false;
6774
6775    /**
6776     * Rewind to a spot to re-perform processing. This is useful if you
6777     * deleted a node, and now need to see if this change affected any
6778     * earlier nodes. Rewinding does not affect other injectors, and can
6779     * result in infinite loops if not used carefully.
6780     * @param bool|int $offset
6781     * @warning HTML Purifier will prevent you from fast-forwarding with this
6782     *          function.
6783     */
6784    public function rewindOffset($offset)
6785    {
6786        $this->rewindOffset = $offset;
6787    }
6788
6789    /**
6790     * Retrieves rewind offset, and then unsets it.
6791     * @return bool|int
6792     */
6793    public function getRewindOffset()
6794    {
6795        $r = $this->rewindOffset;
6796        $this->rewindOffset = false;
6797        return $r;
6798    }
6799
6800    /**
6801     * Prepares the injector by giving it the config and context objects:
6802     * this allows references to important variables to be made within
6803     * the injector. This function also checks if the HTML environment
6804     * will work with the Injector (see checkNeeded()).
6805     * @param HTMLPurifier_Config $config
6806     * @param HTMLPurifier_Context $context
6807     * @return bool|string Boolean false if success, string of missing needed element/attribute if failure
6808     */
6809    public function prepare($config, $context)
6810    {
6811        $this->htmlDefinition = $config->getHTMLDefinition();
6812        // Even though this might fail, some unit tests ignore this and
6813        // still test checkNeeded, so be careful. Maybe get rid of that
6814        // dependency.
6815        $result = $this->checkNeeded($config);
6816        if ($result !== false) {
6817            return $result;
6818        }
6819        $this->currentNesting =& $context->get('CurrentNesting');
6820        $this->currentToken   =& $context->get('CurrentToken');
6821        $this->inputZipper    =& $context->get('InputZipper');
6822        return false;
6823    }
6824
6825    /**
6826     * This function checks if the HTML environment
6827     * will work with the Injector: if p tags are not allowed, the
6828     * Auto-Paragraphing injector should not be enabled.
6829     * @param HTMLPurifier_Config $config
6830     * @return bool|string Boolean false if success, string of missing needed element/attribute if failure
6831     */
6832    public function checkNeeded($config)
6833    {
6834        $def = $config->getHTMLDefinition();
6835        foreach ($this->needed as $element => $attributes) {
6836            if (is_int($element)) {
6837                $element = $attributes;
6838            }
6839            if (!isset($def->info[$element])) {
6840                return $element;
6841            }
6842            if (!is_array($attributes)) {
6843                continue;
6844            }
6845            foreach ($attributes as $name) {
6846                if (!isset($def->info[$element]->attr[$name])) {
6847                    return "$element.$name";
6848                }
6849            }
6850        }
6851        return false;
6852    }
6853
6854    /**
6855     * Tests if the context node allows a certain element
6856     * @param string $name Name of element to test for
6857     * @return bool True if element is allowed, false if it is not
6858     */
6859    public function allowsElement($name)
6860    {
6861        if (!empty($this->currentNesting)) {
6862            $parent_token = array_pop($this->currentNesting);
6863            $this->currentNesting[] = $parent_token;
6864            $parent = $this->htmlDefinition->info[$parent_token->name];
6865        } else {
6866            $parent = $this->htmlDefinition->info_parent_def;
6867        }
6868        if (!isset($parent->child->elements[$name]) || isset($parent->excludes[$name])) {
6869            return false;
6870        }
6871        // check for exclusion
6872        if (!empty($this->currentNesting)) {
6873        for ($i = count($this->currentNesting) - 2; $i >= 0; $i--) {
6874            $node = $this->currentNesting[$i];
6875            $def  = $this->htmlDefinition->info[$node->name];
6876            if (isset($def->excludes[$name])) {
6877                return false;
6878            }
6879        }
6880        }
6881        return true;
6882    }
6883
6884    /**
6885     * Iterator function, which starts with the next token and continues until
6886     * you reach the end of the input tokens.
6887     * @warning Please prevent previous references from interfering with this
6888     *          functions by setting $i = null beforehand!
6889     * @param int $i Current integer index variable for inputTokens
6890     * @param HTMLPurifier_Token $current Current token variable.
6891     *          Do NOT use $token, as that variable is also a reference
6892     * @return bool
6893     */
6894    protected function forward(&$i, &$current)
6895    {
6896        if ($i === null) {
6897            $i = count($this->inputZipper->back) - 1;
6898        } else {
6899            $i--;
6900        }
6901        if ($i < 0) {
6902            return false;
6903        }
6904        $current = $this->inputZipper->back[$i];
6905        return true;
6906    }
6907
6908    /**
6909     * Similar to _forward, but accepts a third parameter $nesting (which
6910     * should be initialized at 0) and stops when we hit the end tag
6911     * for the node $this->inputIndex starts in.
6912     * @param int $i Current integer index variable for inputTokens
6913     * @param HTMLPurifier_Token $current Current token variable.
6914     *          Do NOT use $token, as that variable is also a reference
6915     * @param int $nesting
6916     * @return bool
6917     */
6918    protected function forwardUntilEndToken(&$i, &$current, &$nesting)
6919    {
6920        $result = $this->forward($i, $current);
6921        if (!$result) {
6922            return false;
6923        }
6924        if ($nesting === null) {
6925            $nesting = 0;
6926        }
6927        if ($current instanceof HTMLPurifier_Token_Start) {
6928            $nesting++;
6929        } elseif ($current instanceof HTMLPurifier_Token_End) {
6930            if ($nesting <= 0) {
6931                return false;
6932            }
6933            $nesting--;
6934        }
6935        return true;
6936    }
6937
6938    /**
6939     * Iterator function, starts with the previous token and continues until
6940     * you reach the beginning of input tokens.
6941     * @warning Please prevent previous references from interfering with this
6942     *          functions by setting $i = null beforehand!
6943     * @param int $i Current integer index variable for inputTokens
6944     * @param HTMLPurifier_Token $current Current token variable.
6945     *          Do NOT use $token, as that variable is also a reference
6946     * @return bool
6947     */
6948    protected function backward(&$i, &$current)
6949    {
6950        if ($i === null) {
6951            $i = count($this->inputZipper->front) - 1;
6952        } else {
6953            $i--;
6954        }
6955        if ($i < 0) {
6956            return false;
6957        }
6958        $current = $this->inputZipper->front[$i];
6959        return true;
6960    }
6961
6962    /**
6963     * Handler that is called when a text token is processed
6964     */
6965    public function handleText(&$token)
6966    {
6967    }
6968
6969    /**
6970     * Handler that is called when a start or empty token is processed
6971     */
6972    public function handleElement(&$token)
6973    {
6974    }
6975
6976    /**
6977     * Handler that is called when an end token is processed
6978     */
6979    public function handleEnd(&$token)
6980    {
6981        $this->notifyEnd($token);
6982    }
6983
6984    /**
6985     * Notifier that is called when an end token is processed
6986     * @param HTMLPurifier_Token $token Current token variable.
6987     * @note This differs from handlers in that the token is read-only
6988     * @deprecated
6989     */
6990    public function notifyEnd($token)
6991    {
6992    }
6993}
6994
6995
6996
6997
6998
6999/**
7000 * Represents a language and defines localizable string formatting and
7001 * other functions, as well as the localized messages for HTML Purifier.
7002 */
7003class HTMLPurifier_Language
7004{
7005
7006    /**
7007     * ISO 639 language code of language. Prefers shortest possible version.
7008     * @type string
7009     */
7010    public $code = 'en';
7011
7012    /**
7013     * Fallback language code.
7014     * @type bool|string
7015     */
7016    public $fallback = false;
7017
7018    /**
7019     * Array of localizable messages.
7020     * @type array
7021     */
7022    public $messages = array();
7023
7024    /**
7025     * Array of localizable error codes.
7026     * @type array
7027     */
7028    public $errorNames = array();
7029
7030    /**
7031     * True if no message file was found for this language, so English
7032     * is being used instead. Check this if you'd like to notify the
7033     * user that they've used a non-supported language.
7034     * @type bool
7035     */
7036    public $error = false;
7037
7038    /**
7039     * Has the language object been loaded yet?
7040     * @type bool
7041     * @todo Make it private, fix usage in HTMLPurifier_LanguageTest
7042     */
7043    public $_loaded = false;
7044
7045    /**
7046     * @type HTMLPurifier_Config
7047     */
7048    protected $config;
7049
7050    /**
7051     * @type HTMLPurifier_Context
7052     */
7053    protected $context;
7054
7055    /**
7056     * @param HTMLPurifier_Config $config
7057     * @param HTMLPurifier_Context $context
7058     */
7059    public function __construct($config, $context)
7060    {
7061        $this->config  = $config;
7062        $this->context = $context;
7063    }
7064
7065    /**
7066     * Loads language object with necessary info from factory cache
7067     * @note This is a lazy loader
7068     */
7069    public function load()
7070    {
7071        if ($this->_loaded) {
7072            return;
7073        }
7074        $factory = HTMLPurifier_LanguageFactory::instance();
7075        $factory->loadLanguage($this->code);
7076        foreach ($factory->keys as $key) {
7077            $this->$key = $factory->cache[$this->code][$key];
7078        }
7079        $this->_loaded = true;
7080    }
7081
7082    /**
7083     * Retrieves a localised message.
7084     * @param string $key string identifier of message
7085     * @return string localised message
7086     */
7087    public function getMessage($key)
7088    {
7089        if (!$this->_loaded) {
7090            $this->load();
7091        }
7092        if (!isset($this->messages[$key])) {
7093            return "[$key]";
7094        }
7095        return $this->messages[$key];
7096    }
7097
7098    /**
7099     * Retrieves a localised error name.
7100     * @param int $int error number, corresponding to PHP's error reporting
7101     * @return string localised message
7102     */
7103    public function getErrorName($int)
7104    {
7105        if (!$this->_loaded) {
7106            $this->load();
7107        }
7108        if (!isset($this->errorNames[$int])) {
7109            return "[Error: $int]";
7110        }
7111        return $this->errorNames[$int];
7112    }
7113
7114    /**
7115     * Converts an array list into a string readable representation
7116     * @param array $array
7117     * @return string
7118     */
7119    public function listify($array)
7120    {
7121        $sep      = $this->getMessage('Item separator');
7122        $sep_last = $this->getMessage('Item separator last');
7123        $ret = '';
7124        for ($i = 0, $c = count($array); $i < $c; $i++) {
7125            if ($i == 0) {
7126            } elseif ($i + 1 < $c) {
7127                $ret .= $sep;
7128            } else {
7129                $ret .= $sep_last;
7130            }
7131            $ret .= $array[$i];
7132        }
7133        return $ret;
7134    }
7135
7136    /**
7137     * Formats a localised message with passed parameters
7138     * @param string $key string identifier of message
7139     * @param array $args Parameters to substitute in
7140     * @return string localised message
7141     * @todo Implement conditionals? Right now, some messages make
7142     *     reference to line numbers, but those aren't always available
7143     */
7144    public function formatMessage($key, $args = array())
7145    {
7146        if (!$this->_loaded) {
7147            $this->load();
7148        }
7149        if (!isset($this->messages[$key])) {
7150            return "[$key]";
7151        }
7152        $raw = $this->messages[$key];
7153        $subst = array();
7154        $generator = false;
7155        foreach ($args as $i => $value) {
7156            if (is_object($value)) {
7157                if ($value instanceof HTMLPurifier_Token) {
7158                    // factor this out some time
7159                    if (!$generator) {
7160                        $generator = $this->context->get('Generator');
7161                    }
7162                    if (isset($value->name)) {
7163                        $subst['$'.$i.'.Name'] = $value->name;
7164                    }
7165                    if (isset($value->data)) {
7166                        $subst['$'.$i.'.Data'] = $value->data;
7167                    }
7168                    $subst['$'.$i.'.Compact'] =
7169                    $subst['$'.$i.'.Serialized'] = $generator->generateFromToken($value);
7170                    // a more complex algorithm for compact representation
7171                    // could be introduced for all types of tokens. This
7172                    // may need to be factored out into a dedicated class
7173                    if (!empty($value->attr)) {
7174                        $stripped_token = clone $value;
7175                        $stripped_token->attr = array();
7176                        $subst['$'.$i.'.Compact'] = $generator->generateFromToken($stripped_token);
7177                    }
7178                    $subst['$'.$i.'.Line'] = $value->line ? $value->line : 'unknown';
7179                }
7180                continue;
7181            } elseif (is_array($value)) {
7182                $keys = array_keys($value);
7183                if (array_keys($keys) === $keys) {
7184                    // list
7185                    $subst['$'.$i] = $this->listify($value);
7186                } else {
7187                    // associative array
7188                    // no $i implementation yet, sorry
7189                    $subst['$'.$i.'.Keys'] = $this->listify($keys);
7190                    $subst['$'.$i.'.Values'] = $this->listify(array_values($value));
7191                }
7192                continue;
7193            }
7194            $subst['$' . $i] = $value;
7195        }
7196        return strtr($raw, $subst);
7197    }
7198}
7199
7200
7201
7202
7203
7204/**
7205 * Class responsible for generating HTMLPurifier_Language objects, managing
7206 * caching and fallbacks.
7207 * @note Thanks to MediaWiki for the general logic, although this version
7208 *       has been entirely rewritten
7209 * @todo Serialized cache for languages
7210 */
7211class HTMLPurifier_LanguageFactory
7212{
7213
7214    /**
7215     * Cache of language code information used to load HTMLPurifier_Language objects.
7216     * Structure is: $factory->cache[$language_code][$key] = $value
7217     * @type array
7218     */
7219    public $cache;
7220
7221    /**
7222     * Valid keys in the HTMLPurifier_Language object. Designates which
7223     * variables to slurp out of a message file.
7224     * @type array
7225     */
7226    public $keys = array('fallback', 'messages', 'errorNames');
7227
7228    /**
7229     * Instance to validate language codes.
7230     * @type HTMLPurifier_AttrDef_Lang
7231     *
7232     */
7233    protected $validator;
7234
7235    /**
7236     * Cached copy of dirname(__FILE__), directory of current file without
7237     * trailing slash.
7238     * @type string
7239     */
7240    protected $dir;
7241
7242    /**
7243     * Keys whose contents are a hash map and can be merged.
7244     * @type array
7245     */
7246    protected $mergeable_keys_map = array('messages' => true, 'errorNames' => true);
7247
7248    /**
7249     * Keys whose contents are a list and can be merged.
7250     * @value array lookup
7251     */
7252    protected $mergeable_keys_list = array();
7253
7254    /**
7255     * Retrieve sole instance of the factory.
7256     * @param HTMLPurifier_LanguageFactory $prototype Optional prototype to overload sole instance with,
7257     *                   or bool true to reset to default factory.
7258     * @return HTMLPurifier_LanguageFactory
7259     */
7260    public static function instance($prototype = null)
7261    {
7262        static $instance = null;
7263        if ($prototype !== null) {
7264            $instance = $prototype;
7265        } elseif ($instance === null || $prototype == true) {
7266            $instance = new HTMLPurifier_LanguageFactory();
7267            $instance->setup();
7268        }
7269        return $instance;
7270    }
7271
7272    /**
7273     * Sets up the singleton, much like a constructor
7274     * @note Prevents people from getting this outside of the singleton
7275     */
7276    public function setup()
7277    {
7278        $this->validator = new HTMLPurifier_AttrDef_Lang();
7279        $this->dir = HTMLPURIFIER_PREFIX . '/HTMLPurifier';
7280    }
7281
7282    /**
7283     * Creates a language object, handles class fallbacks
7284     * @param HTMLPurifier_Config $config
7285     * @param HTMLPurifier_Context $context
7286     * @param bool|string $code Code to override configuration with. Private parameter.
7287     * @return HTMLPurifier_Language
7288     */
7289    public function create($config, $context, $code = false)
7290    {
7291        // validate language code
7292        if ($code === false) {
7293            $code = $this->validator->validate(
7294                $config->get('Core.Language'),
7295                $config,
7296                $context
7297            );
7298        } else {
7299            $code = $this->validator->validate($code, $config, $context);
7300        }
7301        if ($code === false) {
7302            $code = 'en'; // malformed code becomes English
7303        }
7304
7305        $pcode = str_replace('-', '_', $code); // make valid PHP classname
7306        static $depth = 0; // recursion protection
7307
7308        if ($code == 'en') {
7309            $lang = new HTMLPurifier_Language($config, $context);
7310        } else {
7311            $class = 'HTMLPurifier_Language_' . $pcode;
7312            $file  = $this->dir . '/Language/classes/' . $code . '.php';
7313            if (file_exists($file) || class_exists($class, false)) {
7314                $lang = new $class($config, $context);
7315            } else {
7316                // Go fallback
7317                $raw_fallback = $this->getFallbackFor($code);
7318                $fallback = $raw_fallback ? $raw_fallback : 'en';
7319                $depth++;
7320                $lang = $this->create($config, $context, $fallback);
7321                if (!$raw_fallback) {
7322                    $lang->error = true;
7323                }
7324                $depth--;
7325            }
7326        }
7327        $lang->code = $code;
7328        return $lang;
7329    }
7330
7331    /**
7332     * Returns the fallback language for language
7333     * @note Loads the original language into cache
7334     * @param string $code language code
7335     * @return string|bool
7336     */
7337    public function getFallbackFor($code)
7338    {
7339        $this->loadLanguage($code);
7340        return $this->cache[$code]['fallback'];
7341    }
7342
7343    /**
7344     * Loads language into the cache, handles message file and fallbacks
7345     * @param string $code language code
7346     */
7347    public function loadLanguage($code)
7348    {
7349        static $languages_seen = array(); // recursion guard
7350
7351        // abort if we've already loaded it
7352        if (isset($this->cache[$code])) {
7353            return;
7354        }
7355
7356        // generate filename
7357        $filename = $this->dir . '/Language/messages/' . $code . '.php';
7358
7359        // default fallback : may be overwritten by the ensuing include
7360        $fallback = ($code != 'en') ? 'en' : false;
7361
7362        // load primary localisation
7363        if (!file_exists($filename)) {
7364            // skip the include: will rely solely on fallback
7365            $filename = $this->dir . '/Language/messages/en.php';
7366            $cache = array();
7367        } else {
7368            include $filename;
7369            $cache = compact($this->keys);
7370        }
7371
7372        // load fallback localisation
7373        if (!empty($fallback)) {
7374
7375            // infinite recursion guard
7376            if (isset($languages_seen[$code])) {
7377                trigger_error(
7378                    'Circular fallback reference in language ' .
7379                    $code,
7380                    E_USER_ERROR
7381                );
7382                $fallback = 'en';
7383            }
7384            $language_seen[$code] = true;
7385
7386            // load the fallback recursively
7387            $this->loadLanguage($fallback);
7388            $fallback_cache = $this->cache[$fallback];
7389
7390            // merge fallback with current language
7391            foreach ($this->keys as $key) {
7392                if (isset($cache[$key]) && isset($fallback_cache[$key])) {
7393                    if (isset($this->mergeable_keys_map[$key])) {
7394                        $cache[$key] = $cache[$key] + $fallback_cache[$key];
7395                    } elseif (isset($this->mergeable_keys_list[$key])) {
7396                        $cache[$key] = array_merge($fallback_cache[$key], $cache[$key]);
7397                    }
7398                } else {
7399                    $cache[$key] = $fallback_cache[$key];
7400                }
7401            }
7402        }
7403
7404        // save to cache for later retrieval
7405        $this->cache[$code] = $cache;
7406        return;
7407    }
7408}
7409
7410
7411
7412
7413
7414/**
7415 * Represents a measurable length, with a string numeric magnitude
7416 * and a unit. This object is immutable.
7417 */
7418class HTMLPurifier_Length
7419{
7420
7421    /**
7422     * String numeric magnitude.
7423     * @type string
7424     */
7425    protected $n;
7426
7427    /**
7428     * String unit. False is permitted if $n = 0.
7429     * @type string|bool
7430     */
7431    protected $unit;
7432
7433    /**
7434     * Whether or not this length is valid. Null if not calculated yet.
7435     * @type bool
7436     */
7437    protected $isValid;
7438
7439    /**
7440     * Array Lookup array of units recognized by CSS 3
7441     * @type array
7442     */
7443    protected static $allowedUnits = array(
7444        'em' => true, 'ex' => true, 'px' => true, 'in' => true,
7445        'cm' => true, 'mm' => true, 'pt' => true, 'pc' => true,
7446        'ch' => true, 'rem' => true, 'vw' => true, 'vh' => true,
7447        'vmin' => true, 'vmax' => true
7448    );
7449
7450    /**
7451     * @param string $n Magnitude
7452     * @param bool|string $u Unit
7453     */
7454    public function __construct($n = '0', $u = false)
7455    {
7456        $this->n = (string) $n;
7457        $this->unit = $u !== false ? (string) $u : false;
7458    }
7459
7460    /**
7461     * @param string $s Unit string, like '2em' or '3.4in'
7462     * @return HTMLPurifier_Length
7463     * @warning Does not perform validation.
7464     */
7465    public static function make($s)
7466    {
7467        if ($s instanceof HTMLPurifier_Length) {
7468            return $s;
7469        }
7470        $n_length = strspn($s, '1234567890.+-');
7471        $n = substr($s, 0, $n_length);
7472        $unit = substr($s, $n_length);
7473        if ($unit === '') {
7474            $unit = false;
7475        }
7476        return new HTMLPurifier_Length($n, $unit);
7477    }
7478
7479    /**
7480     * Validates the number and unit.
7481     * @return bool
7482     */
7483    protected function validate()
7484    {
7485        // Special case:
7486        if ($this->n === '+0' || $this->n === '-0') {
7487            $this->n = '0';
7488        }
7489        if ($this->n === '0' && $this->unit === false) {
7490            return true;
7491        }
7492        if (!ctype_lower($this->unit)) {
7493            $this->unit = strtolower($this->unit);
7494        }
7495        if (!isset(HTMLPurifier_Length::$allowedUnits[$this->unit])) {
7496            return false;
7497        }
7498        // Hack:
7499        $def = new HTMLPurifier_AttrDef_CSS_Number();
7500        $result = $def->validate($this->n, false, false);
7501        if ($result === false) {
7502            return false;
7503        }
7504        $this->n = $result;
7505        return true;
7506    }
7507
7508    /**
7509     * Returns string representation of number.
7510     * @return string
7511     */
7512    public function toString()
7513    {
7514        if (!$this->isValid()) {
7515            return false;
7516        }
7517        return $this->n . $this->unit;
7518    }
7519
7520    /**
7521     * Retrieves string numeric magnitude.
7522     * @return string
7523     */
7524    public function getN()
7525    {
7526        return $this->n;
7527    }
7528
7529    /**
7530     * Retrieves string unit.
7531     * @return string
7532     */
7533    public function getUnit()
7534    {
7535        return $this->unit;
7536    }
7537
7538    /**
7539     * Returns true if this length unit is valid.
7540     * @return bool
7541     */
7542    public function isValid()
7543    {
7544        if ($this->isValid === null) {
7545            $this->isValid = $this->validate();
7546        }
7547        return $this->isValid;
7548    }
7549
7550    /**
7551     * Compares two lengths, and returns 1 if greater, -1 if less and 0 if equal.
7552     * @param HTMLPurifier_Length $l
7553     * @return int
7554     * @warning If both values are too large or small, this calculation will
7555     *          not work properly
7556     */
7557    public function compareTo($l)
7558    {
7559        if ($l === false) {
7560            return false;
7561        }
7562        if ($l->unit !== $this->unit) {
7563            $converter = new HTMLPurifier_UnitConverter();
7564            $l = $converter->convert($l, $this->unit);
7565            if ($l === false) {
7566                return false;
7567            }
7568        }
7569        return $this->n - $l->n;
7570    }
7571}
7572
7573
7574
7575
7576
7577/**
7578 * Forgivingly lexes HTML (SGML-style) markup into tokens.
7579 *
7580 * A lexer parses a string of SGML-style markup and converts them into
7581 * corresponding tokens.  It doesn't check for well-formedness, although its
7582 * internal mechanism may make this automatic (such as the case of
7583 * HTMLPurifier_Lexer_DOMLex).  There are several implementations to choose
7584 * from.
7585 *
7586 * A lexer is HTML-oriented: it might work with XML, but it's not
7587 * recommended, as we adhere to a subset of the specification for optimization
7588 * reasons. This might change in the future. Also, most tokenizers are not
7589 * expected to handle DTDs or PIs.
7590 *
7591 * This class should not be directly instantiated, but you may use create() to
7592 * retrieve a default copy of the lexer.  Being a supertype, this class
7593 * does not actually define any implementation, but offers commonly used
7594 * convenience functions for subclasses.
7595 *
7596 * @note The unit tests will instantiate this class for testing purposes, as
7597 *       many of the utility functions require a class to be instantiated.
7598 *       This means that, even though this class is not runnable, it will
7599 *       not be declared abstract.
7600 *
7601 * @par
7602 *
7603 * @note
7604 * We use tokens rather than create a DOM representation because DOM would:
7605 *
7606 * @par
7607 *  -# Require more processing and memory to create,
7608 *  -# Is not streamable, and
7609 *  -# Has the entire document structure (html and body not needed).
7610 *
7611 * @par
7612 * However, DOM is helpful in that it makes it easy to move around nodes
7613 * without a lot of lookaheads to see when a tag is closed. This is a
7614 * limitation of the token system and some workarounds would be nice.
7615 */
7616class HTMLPurifier_Lexer
7617{
7618
7619    /**
7620     * Whether or not this lexer implements line-number/column-number tracking.
7621     * If it does, set to true.
7622     */
7623    public $tracksLineNumbers = false;
7624
7625    // -- STATIC ----------------------------------------------------------
7626
7627    /**
7628     * Retrieves or sets the default Lexer as a Prototype Factory.
7629     *
7630     * By default HTMLPurifier_Lexer_DOMLex will be returned. There are
7631     * a few exceptions involving special features that only DirectLex
7632     * implements.
7633     *
7634     * @note The behavior of this class has changed, rather than accepting
7635     *       a prototype object, it now accepts a configuration object.
7636     *       To specify your own prototype, set %Core.LexerImpl to it.
7637     *       This change in behavior de-singletonizes the lexer object.
7638     *
7639     * @param HTMLPurifier_Config $config
7640     * @return HTMLPurifier_Lexer
7641     * @throws HTMLPurifier_Exception
7642     */
7643    public static function create($config)
7644    {
7645        if (!($config instanceof HTMLPurifier_Config)) {
7646            $lexer = $config;
7647            trigger_error(
7648                "Passing a prototype to
7649                HTMLPurifier_Lexer::create() is deprecated, please instead
7650                use %Core.LexerImpl",
7651                E_USER_WARNING
7652            );
7653        } else {
7654            $lexer = $config->get('Core.LexerImpl');
7655        }
7656
7657        $needs_tracking =
7658            $config->get('Core.MaintainLineNumbers') ||
7659            $config->get('Core.CollectErrors');
7660
7661        $inst = null;
7662        if (is_object($lexer)) {
7663            $inst = $lexer;
7664        } else {
7665            if (is_null($lexer)) {
7666                do {
7667                    // auto-detection algorithm
7668                    if ($needs_tracking) {
7669                        $lexer = 'DirectLex';
7670                        break;
7671                    }
7672
7673                    if (class_exists('DOMDocument', false) &&
7674                        method_exists('DOMDocument', 'loadHTML') &&
7675                        !extension_loaded('domxml')
7676                    ) {
7677                        // check for DOM support, because while it's part of the
7678                        // core, it can be disabled compile time. Also, the PECL
7679                        // domxml extension overrides the default DOM, and is evil
7680                        // and nasty and we shan't bother to support it
7681                        $lexer = 'DOMLex';
7682                    } else {
7683                        $lexer = 'DirectLex';
7684                    }
7685                } while (0);
7686            } // do..while so we can break
7687
7688            // instantiate recognized string names
7689            switch ($lexer) {
7690                case 'DOMLex':
7691                    $inst = new HTMLPurifier_Lexer_DOMLex();
7692                    break;
7693                case 'DirectLex':
7694                    $inst = new HTMLPurifier_Lexer_DirectLex();
7695                    break;
7696                case 'PH5P':
7697                    $inst = new HTMLPurifier_Lexer_PH5P();
7698                    break;
7699                default:
7700                    throw new HTMLPurifier_Exception(
7701                        "Cannot instantiate unrecognized Lexer type " .
7702                        htmlspecialchars($lexer)
7703                    );
7704            }
7705        }
7706
7707        if (!$inst) {
7708            throw new HTMLPurifier_Exception('No lexer was instantiated');
7709        }
7710
7711        // once PHP DOM implements native line numbers, or we
7712        // hack out something using XSLT, remove this stipulation
7713        if ($needs_tracking && !$inst->tracksLineNumbers) {
7714            throw new HTMLPurifier_Exception(
7715                'Cannot use lexer that does not support line numbers with ' .
7716                'Core.MaintainLineNumbers or Core.CollectErrors (use DirectLex instead)'
7717            );
7718        }
7719
7720        return $inst;
7721
7722    }
7723
7724    // -- CONVENIENCE MEMBERS ---------------------------------------------
7725
7726    public function __construct()
7727    {
7728        $this->_entity_parser = new HTMLPurifier_EntityParser();
7729    }
7730
7731    /**
7732     * Most common entity to raw value conversion table for special entities.
7733     * @type array
7734     */
7735    protected $_special_entity2str =
7736        array(
7737            '&quot;' => '"',
7738            '&amp;' => '&',
7739            '&lt;' => '<',
7740            '&gt;' => '>',
7741            '&#39;' => "'",
7742            '&#039;' => "'",
7743            '&#x27;' => "'"
7744        );
7745
7746    public function parseText($string, $config) {
7747        return $this->parseData($string, false, $config);
7748    }
7749
7750    public function parseAttr($string, $config) {
7751        return $this->parseData($string, true, $config);
7752    }
7753
7754    /**
7755     * Parses special entities into the proper characters.
7756     *
7757     * This string will translate escaped versions of the special characters
7758     * into the correct ones.
7759     *
7760     * @param string $string String character data to be parsed.
7761     * @return string Parsed character data.
7762     */
7763    public function parseData($string, $is_attr, $config)
7764    {
7765        // following functions require at least one character
7766        if ($string === '') {
7767            return '';
7768        }
7769
7770        // subtracts amps that cannot possibly be escaped
7771        $num_amp = substr_count($string, '&') - substr_count($string, '& ') -
7772            ($string[strlen($string) - 1] === '&' ? 1 : 0);
7773
7774        if (!$num_amp) {
7775            return $string;
7776        } // abort if no entities
7777        $num_esc_amp = substr_count($string, '&amp;');
7778        $string = strtr($string, $this->_special_entity2str);
7779
7780        // code duplication for sake of optimization, see above
7781        $num_amp_2 = substr_count($string, '&') - substr_count($string, '& ') -
7782            ($string[strlen($string) - 1] === '&' ? 1 : 0);
7783
7784        if ($num_amp_2 <= $num_esc_amp) {
7785            return $string;
7786        }
7787
7788        // hmm... now we have some uncommon entities. Use the callback.
7789        if ($config->get('Core.LegacyEntityDecoder')) {
7790            $string = $this->_entity_parser->substituteSpecialEntities($string);
7791        } else {
7792            if ($is_attr) {
7793                $string = $this->_entity_parser->substituteAttrEntities($string);
7794            } else {
7795                $string = $this->_entity_parser->substituteTextEntities($string);
7796            }
7797        }
7798        return $string;
7799    }
7800
7801    /**
7802     * Lexes an HTML string into tokens.
7803     * @param $string String HTML.
7804     * @param HTMLPurifier_Config $config
7805     * @param HTMLPurifier_Context $context
7806     * @return HTMLPurifier_Token[] array representation of HTML.
7807     */
7808    public function tokenizeHTML($string, $config, $context)
7809    {
7810        trigger_error('Call to abstract class', E_USER_ERROR);
7811    }
7812
7813    /**
7814     * Translates CDATA sections into regular sections (through escaping).
7815     * @param string $string HTML string to process.
7816     * @return string HTML with CDATA sections escaped.
7817     */
7818    protected static function escapeCDATA($string)
7819    {
7820        return preg_replace_callback(
7821            '/<!\[CDATA\[(.+?)\]\]>/s',
7822            array('HTMLPurifier_Lexer', 'CDATACallback'),
7823            $string
7824        );
7825    }
7826
7827    /**
7828     * Special CDATA case that is especially convoluted for <script>
7829     * @param string $string HTML string to process.
7830     * @return string HTML with CDATA sections escaped.
7831     */
7832    protected static function escapeCommentedCDATA($string)
7833    {
7834        return preg_replace_callback(
7835            '#<!--//--><!\[CDATA\[//><!--(.+?)//--><!\]\]>#s',
7836            array('HTMLPurifier_Lexer', 'CDATACallback'),
7837            $string
7838        );
7839    }
7840
7841    /**
7842     * Special Internet Explorer conditional comments should be removed.
7843     * @param string $string HTML string to process.
7844     * @return string HTML with conditional comments removed.
7845     */
7846    protected static function removeIEConditional($string)
7847    {
7848        return preg_replace(
7849            '#<!--\[if [^>]+\]>.*?<!\[endif\]-->#si', // probably should generalize for all strings
7850            '',
7851            $string
7852        );
7853    }
7854
7855    /**
7856     * Callback function for escapeCDATA() that does the work.
7857     *
7858     * @warning Though this is public in order to let the callback happen,
7859     *          calling it directly is not recommended.
7860     * @param array $matches PCRE matches array, with index 0 the entire match
7861     *                  and 1 the inside of the CDATA section.
7862     * @return string Escaped internals of the CDATA section.
7863     */
7864    protected static function CDATACallback($matches)
7865    {
7866        // not exactly sure why the character set is needed, but whatever
7867        return htmlspecialchars($matches[1], ENT_COMPAT, 'UTF-8');
7868    }
7869
7870    /**
7871     * Takes a piece of HTML and normalizes it by converting entities, fixing
7872     * encoding, extracting bits, and other good stuff.
7873     * @param string $html HTML.
7874     * @param HTMLPurifier_Config $config
7875     * @param HTMLPurifier_Context $context
7876     * @return string
7877     * @todo Consider making protected
7878     */
7879    public function normalize($html, $config, $context)
7880    {
7881        // normalize newlines to \n
7882        if ($config->get('Core.NormalizeNewlines')) {
7883            $html = str_replace("\r\n", "\n", $html);
7884            $html = str_replace("\r", "\n", $html);
7885        }
7886
7887        if ($config->get('HTML.Trusted')) {
7888            // escape convoluted CDATA
7889            $html = $this->escapeCommentedCDATA($html);
7890        }
7891
7892        // escape CDATA
7893        $html = $this->escapeCDATA($html);
7894
7895        $html = $this->removeIEConditional($html);
7896
7897        // extract body from document if applicable
7898        if ($config->get('Core.ConvertDocumentToFragment')) {
7899            $e = false;
7900            if ($config->get('Core.CollectErrors')) {
7901                $e =& $context->get('ErrorCollector');
7902            }
7903            $new_html = $this->extractBody($html);
7904            if ($e && $new_html != $html) {
7905                $e->send(E_WARNING, 'Lexer: Extracted body');
7906            }
7907            $html = $new_html;
7908        }
7909
7910        // expand entities that aren't the big five
7911        if ($config->get('Core.LegacyEntityDecoder')) {
7912            $html = $this->_entity_parser->substituteNonSpecialEntities($html);
7913        }
7914
7915        // clean into wellformed UTF-8 string for an SGML context: this has
7916        // to be done after entity expansion because the entities sometimes
7917        // represent non-SGML characters (horror, horror!)
7918        $html = HTMLPurifier_Encoder::cleanUTF8($html);
7919
7920        // if processing instructions are to removed, remove them now
7921        if ($config->get('Core.RemoveProcessingInstructions')) {
7922            $html = preg_replace('#<\?.+?\?>#s', '', $html);
7923        }
7924
7925        $hidden_elements = $config->get('Core.HiddenElements');
7926        if ($config->get('Core.AggressivelyRemoveScript') &&
7927            !($config->get('HTML.Trusted') || !$config->get('Core.RemoveScriptContents')
7928                || empty($hidden_elements["script"]))) {
7929            $html = preg_replace('#<script[^>]*>.*?</script>#i', '', $html);
7930        }
7931
7932        return $html;
7933    }
7934
7935    /**
7936     * Takes a string of HTML (fragment or document) and returns the content
7937     * @todo Consider making protected
7938     */
7939    public function extractBody($html)
7940    {
7941        $matches = array();
7942        $result = preg_match('|(.*?)<body[^>]*>(.*)</body>|is', $html, $matches);
7943        if ($result) {
7944            // Make sure it's not in a comment
7945            $comment_start = strrpos($matches[1], '<!--');
7946            $comment_end   = strrpos($matches[1], '-->');
7947            if ($comment_start === false ||
7948                ($comment_end !== false && $comment_end > $comment_start)) {
7949                return $matches[2];
7950            }
7951        }
7952        return $html;
7953    }
7954}
7955
7956
7957
7958
7959
7960/**
7961 * Abstract base node class that all others inherit from.
7962 *
7963 * Why do we not use the DOM extension?  (1) It is not always available,
7964 * (2) it has funny constraints on the data it can represent,
7965 * whereas we want a maximally flexible representation, and (3) its
7966 * interface is a bit cumbersome.
7967 */
7968abstract class HTMLPurifier_Node
7969{
7970    /**
7971     * Line number of the start token in the source document
7972     * @type int
7973     */
7974    public $line;
7975
7976    /**
7977     * Column number of the start token in the source document. Null if unknown.
7978     * @type int
7979     */
7980    public $col;
7981
7982    /**
7983     * Lookup array of processing that this token is exempt from.
7984     * Currently, valid values are "ValidateAttributes".
7985     * @type array
7986     */
7987    public $armor = array();
7988
7989    /**
7990     * When true, this node should be ignored as non-existent.
7991     *
7992     * Who is responsible for ignoring dead nodes?  FixNesting is
7993     * responsible for removing them before passing on to child
7994     * validators.
7995     */
7996    public $dead = false;
7997
7998    /**
7999     * Returns a pair of start and end tokens, where the end token
8000     * is null if it is not necessary. Does not include children.
8001     * @type array
8002     */
8003    abstract public function toTokenPair();
8004}
8005
8006
8007
8008
8009
8010/**
8011 * Class that handles operations involving percent-encoding in URIs.
8012 *
8013 * @warning
8014 *      Be careful when reusing instances of PercentEncoder. The object
8015 *      you use for normalize() SHOULD NOT be used for encode(), or
8016 *      vice-versa.
8017 */
8018class HTMLPurifier_PercentEncoder
8019{
8020
8021    /**
8022     * Reserved characters to preserve when using encode().
8023     * @type array
8024     */
8025    protected $preserve = array();
8026
8027    /**
8028     * String of characters that should be preserved while using encode().
8029     * @param bool $preserve
8030     */
8031    public function __construct($preserve = false)
8032    {
8033        // unreserved letters, ought to const-ify
8034        for ($i = 48; $i <= 57; $i++) { // digits
8035            $this->preserve[$i] = true;
8036        }
8037        for ($i = 65; $i <= 90; $i++) { // upper-case
8038            $this->preserve[$i] = true;
8039        }
8040        for ($i = 97; $i <= 122; $i++) { // lower-case
8041            $this->preserve[$i] = true;
8042        }
8043        $this->preserve[45] = true; // Dash         -
8044        $this->preserve[46] = true; // Period       .
8045        $this->preserve[95] = true; // Underscore   _
8046        $this->preserve[126]= true; // Tilde        ~
8047
8048        // extra letters not to escape
8049        if ($preserve !== false) {
8050            for ($i = 0, $c = strlen($preserve); $i < $c; $i++) {
8051                $this->preserve[ord($preserve[$i])] = true;
8052            }
8053        }
8054    }
8055
8056    /**
8057     * Our replacement for urlencode, it encodes all non-reserved characters,
8058     * as well as any extra characters that were instructed to be preserved.
8059     * @note
8060     *      Assumes that the string has already been normalized, making any
8061     *      and all percent escape sequences valid. Percents will not be
8062     *      re-escaped, regardless of their status in $preserve
8063     * @param string $string String to be encoded
8064     * @return string Encoded string.
8065     */
8066    public function encode($string)
8067    {
8068        $ret = '';
8069        for ($i = 0, $c = strlen($string); $i < $c; $i++) {
8070            if ($string[$i] !== '%' && !isset($this->preserve[$int = ord($string[$i])])) {
8071                $ret .= '%' . sprintf('%02X', $int);
8072            } else {
8073                $ret .= $string[$i];
8074            }
8075        }
8076        return $ret;
8077    }
8078
8079    /**
8080     * Fix up percent-encoding by decoding unreserved characters and normalizing.
8081     * @warning This function is affected by $preserve, even though the
8082     *          usual desired behavior is for this not to preserve those
8083     *          characters. Be careful when reusing instances of PercentEncoder!
8084     * @param string $string String to normalize
8085     * @return string
8086     */
8087    public function normalize($string)
8088    {
8089        if ($string == '') {
8090            return '';
8091        }
8092        $parts = explode('%', $string);
8093        $ret = array_shift($parts);
8094        foreach ($parts as $part) {
8095            $length = strlen($part);
8096            if ($length < 2) {
8097                $ret .= '%25' . $part;
8098                continue;
8099            }
8100            $encoding = substr($part, 0, 2);
8101            $text     = substr($part, 2);
8102            if (!ctype_xdigit($encoding)) {
8103                $ret .= '%25' . $part;
8104                continue;
8105            }
8106            $int = hexdec($encoding);
8107            if (isset($this->preserve[$int])) {
8108                $ret .= chr($int) . $text;
8109                continue;
8110            }
8111            $encoding = strtoupper($encoding);
8112            $ret .= '%' . $encoding . $text;
8113        }
8114        return $ret;
8115    }
8116}
8117
8118
8119
8120
8121
8122/**
8123 * Generic property list implementation
8124 */
8125class HTMLPurifier_PropertyList
8126{
8127    /**
8128     * Internal data-structure for properties.
8129     * @type array
8130     */
8131    protected $data = array();
8132
8133    /**
8134     * Parent plist.
8135     * @type HTMLPurifier_PropertyList
8136     */
8137    protected $parent;
8138
8139    /**
8140     * Cache.
8141     * @type array
8142     */
8143    protected $cache;
8144
8145    /**
8146     * @param HTMLPurifier_PropertyList $parent Parent plist
8147     */
8148    public function __construct($parent = null)
8149    {
8150        $this->parent = $parent;
8151    }
8152
8153    /**
8154     * Recursively retrieves the value for a key
8155     * @param string $name
8156     * @throws HTMLPurifier_Exception
8157     */
8158    public function get($name)
8159    {
8160        if ($this->has($name)) {
8161            return $this->data[$name];
8162        }
8163        // possible performance bottleneck, convert to iterative if necessary
8164        if ($this->parent) {
8165            return $this->parent->get($name);
8166        }
8167        throw new HTMLPurifier_Exception("Key '$name' not found");
8168    }
8169
8170    /**
8171     * Sets the value of a key, for this plist
8172     * @param string $name
8173     * @param mixed $value
8174     */
8175    public function set($name, $value)
8176    {
8177        $this->data[$name] = $value;
8178    }
8179
8180    /**
8181     * Returns true if a given key exists
8182     * @param string $name
8183     * @return bool
8184     */
8185    public function has($name)
8186    {
8187        return array_key_exists($name, $this->data);
8188    }
8189
8190    /**
8191     * Resets a value to the value of it's parent, usually the default. If
8192     * no value is specified, the entire plist is reset.
8193     * @param string $name
8194     */
8195    public function reset($name = null)
8196    {
8197        if ($name == null) {
8198            $this->data = array();
8199        } else {
8200            unset($this->data[$name]);
8201        }
8202    }
8203
8204    /**
8205     * Squashes this property list and all of its property lists into a single
8206     * array, and returns the array. This value is cached by default.
8207     * @param bool $force If true, ignores the cache and regenerates the array.
8208     * @return array
8209     */
8210    public function squash($force = false)
8211    {
8212        if ($this->cache !== null && !$force) {
8213            return $this->cache;
8214        }
8215        if ($this->parent) {
8216            return $this->cache = array_merge($this->parent->squash($force), $this->data);
8217        } else {
8218            return $this->cache = $this->data;
8219        }
8220    }
8221
8222    /**
8223     * Returns the parent plist.
8224     * @return HTMLPurifier_PropertyList
8225     */
8226    public function getParent()
8227    {
8228        return $this->parent;
8229    }
8230
8231    /**
8232     * Sets the parent plist.
8233     * @param HTMLPurifier_PropertyList $plist Parent plist
8234     */
8235    public function setParent($plist)
8236    {
8237        $this->parent = $plist;
8238    }
8239}
8240
8241
8242
8243
8244
8245/**
8246 * Property list iterator. Do not instantiate this class directly.
8247 */
8248class HTMLPurifier_PropertyListIterator extends FilterIterator
8249{
8250
8251    /**
8252     * @type int
8253     */
8254    protected $l;
8255    /**
8256     * @type string
8257     */
8258    protected $filter;
8259
8260    /**
8261     * @param Iterator $iterator Array of data to iterate over
8262     * @param string $filter Optional prefix to only allow values of
8263     */
8264    public function __construct(Iterator $iterator, $filter = null)
8265    {
8266        parent::__construct($iterator);
8267        $this->l = strlen($filter);
8268        $this->filter = $filter;
8269    }
8270
8271    /**
8272     * @return bool
8273     */
8274    public function accept()
8275    {
8276        $key = $this->getInnerIterator()->key();
8277        if (strncmp($key, $this->filter, $this->l) !== 0) {
8278            return false;
8279        }
8280        return true;
8281    }
8282}
8283
8284
8285
8286
8287
8288/**
8289 * A simple array-backed queue, based off of the classic Okasaki
8290 * persistent amortized queue.  The basic idea is to maintain two
8291 * stacks: an input stack and an output stack.  When the output
8292 * stack runs out, reverse the input stack and use it as the output
8293 * stack.
8294 *
8295 * We don't use the SPL implementation because it's only supported
8296 * on PHP 5.3 and later.
8297 *
8298 * Exercise: Prove that push/pop on this queue take amortized O(1) time.
8299 *
8300 * Exercise: Extend this queue to be a deque, while preserving amortized
8301 * O(1) time.  Some care must be taken on rebalancing to avoid quadratic
8302 * behaviour caused by repeatedly shuffling data from the input stack
8303 * to the output stack and back.
8304 */
8305class HTMLPurifier_Queue {
8306    private $input;
8307    private $output;
8308
8309    public function __construct($input = array()) {
8310        $this->input = $input;
8311        $this->output = array();
8312    }
8313
8314    /**
8315     * Shifts an element off the front of the queue.
8316     */
8317    public function shift() {
8318        if (empty($this->output)) {
8319            $this->output = array_reverse($this->input);
8320            $this->input = array();
8321        }
8322        if (empty($this->output)) {
8323            return NULL;
8324        }
8325        return array_pop($this->output);
8326    }
8327
8328    /**
8329     * Pushes an element onto the front of the queue.
8330     */
8331    public function push($x) {
8332        array_push($this->input, $x);
8333    }
8334
8335    /**
8336     * Checks if it's empty.
8337     */
8338    public function isEmpty() {
8339        return empty($this->input) && empty($this->output);
8340    }
8341}
8342
8343
8344
8345/**
8346 * Supertype for classes that define a strategy for modifying/purifying tokens.
8347 *
8348 * While HTMLPurifier's core purpose is fixing HTML into something proper,
8349 * strategies provide plug points for extra configuration or even extra
8350 * features, such as custom tags, custom parsing of text, etc.
8351 */
8352
8353
8354abstract class HTMLPurifier_Strategy
8355{
8356
8357    /**
8358     * Executes the strategy on the tokens.
8359     *
8360     * @param HTMLPurifier_Token[] $tokens Array of HTMLPurifier_Token objects to be operated on.
8361     * @param HTMLPurifier_Config $config
8362     * @param HTMLPurifier_Context $context
8363     * @return HTMLPurifier_Token[] Processed array of token objects.
8364     */
8365    abstract public function execute($tokens, $config, $context);
8366}
8367
8368
8369
8370
8371
8372/**
8373 * This is in almost every respect equivalent to an array except
8374 * that it keeps track of which keys were accessed.
8375 *
8376 * @warning For the sake of backwards compatibility with early versions
8377 *     of PHP 5, you must not use the $hash[$key] syntax; if you do
8378 *     our version of offsetGet is never called.
8379 */
8380class HTMLPurifier_StringHash extends ArrayObject
8381{
8382    /**
8383     * @type array
8384     */
8385    protected $accessed = array();
8386
8387    /**
8388     * Retrieves a value, and logs the access.
8389     * @param mixed $index
8390     * @return mixed
8391     */
8392    public function offsetGet($index)
8393    {
8394        $this->accessed[$index] = true;
8395        return parent::offsetGet($index);
8396    }
8397
8398    /**
8399     * Returns a lookup array of all array indexes that have been accessed.
8400     * @return array in form array($index => true).
8401     */
8402    public function getAccessed()
8403    {
8404        return $this->accessed;
8405    }
8406
8407    /**
8408     * Resets the access array.
8409     */
8410    public function resetAccessed()
8411    {
8412        $this->accessed = array();
8413    }
8414}
8415
8416
8417
8418
8419
8420/**
8421 * Parses string hash files. File format is as such:
8422 *
8423 *      DefaultKeyValue
8424 *      KEY: Value
8425 *      KEY2: Value2
8426 *      --MULTILINE-KEY--
8427 *      Multiline
8428 *      value.
8429 *
8430 * Which would output something similar to:
8431 *
8432 *      array(
8433 *          'ID' => 'DefaultKeyValue',
8434 *          'KEY' => 'Value',
8435 *          'KEY2' => 'Value2',
8436 *          'MULTILINE-KEY' => "Multiline\nvalue.\n",
8437 *      )
8438 *
8439 * We use this as an easy to use file-format for configuration schema
8440 * files, but the class itself is usage agnostic.
8441 *
8442 * You can use ---- to forcibly terminate parsing of a single string-hash;
8443 * this marker is used in multi string-hashes to delimit boundaries.
8444 */
8445class HTMLPurifier_StringHashParser
8446{
8447
8448    /**
8449     * @type string
8450     */
8451    public $default = 'ID';
8452
8453    /**
8454     * Parses a file that contains a single string-hash.
8455     * @param string $file
8456     * @return array
8457     */
8458    public function parseFile($file)
8459    {
8460        if (!file_exists($file)) {
8461            return false;
8462        }
8463        $fh = fopen($file, 'r');
8464        if (!$fh) {
8465            return false;
8466        }
8467        $ret = $this->parseHandle($fh);
8468        fclose($fh);
8469        return $ret;
8470    }
8471
8472    /**
8473     * Parses a file that contains multiple string-hashes delimited by '----'
8474     * @param string $file
8475     * @return array
8476     */
8477    public function parseMultiFile($file)
8478    {
8479        if (!file_exists($file)) {
8480            return false;
8481        }
8482        $ret = array();
8483        $fh = fopen($file, 'r');
8484        if (!$fh) {
8485            return false;
8486        }
8487        while (!feof($fh)) {
8488            $ret[] = $this->parseHandle($fh);
8489        }
8490        fclose($fh);
8491        return $ret;
8492    }
8493
8494    /**
8495     * Internal parser that acepts a file handle.
8496     * @note While it's possible to simulate in-memory parsing by using
8497     *       custom stream wrappers, if such a use-case arises we should
8498     *       factor out the file handle into its own class.
8499     * @param resource $fh File handle with pointer at start of valid string-hash
8500     *            block.
8501     * @return array
8502     */
8503    protected function parseHandle($fh)
8504    {
8505        $state   = false;
8506        $single  = false;
8507        $ret     = array();
8508        do {
8509            $line = fgets($fh);
8510            if ($line === false) {
8511                break;
8512            }
8513            $line = rtrim($line, "\n\r");
8514            if (!$state && $line === '') {
8515                continue;
8516            }
8517            if ($line === '----') {
8518                break;
8519            }
8520            if (strncmp('--#', $line, 3) === 0) {
8521                // Comment
8522                continue;
8523            } elseif (strncmp('--', $line, 2) === 0) {
8524                // Multiline declaration
8525                $state = trim($line, '- ');
8526                if (!isset($ret[$state])) {
8527                    $ret[$state] = '';
8528                }
8529                continue;
8530            } elseif (!$state) {
8531                $single = true;
8532                if (strpos($line, ':') !== false) {
8533                    // Single-line declaration
8534                    list($state, $line) = explode(':', $line, 2);
8535                    $line = trim($line);
8536                } else {
8537                    // Use default declaration
8538                    $state  = $this->default;
8539                }
8540            }
8541            if ($single) {
8542                $ret[$state] = $line;
8543                $single = false;
8544                $state  = false;
8545            } else {
8546                $ret[$state] .= "$line\n";
8547            }
8548        } while (!feof($fh));
8549        return $ret;
8550    }
8551}
8552
8553
8554
8555
8556
8557/**
8558 * Defines a mutation of an obsolete tag into a valid tag.
8559 */
8560abstract class HTMLPurifier_TagTransform
8561{
8562
8563    /**
8564     * Tag name to transform the tag to.
8565     * @type string
8566     */
8567    public $transform_to;
8568
8569    /**
8570     * Transforms the obsolete tag into the valid tag.
8571     * @param HTMLPurifier_Token_Tag $tag Tag to be transformed.
8572     * @param HTMLPurifier_Config $config Mandatory HTMLPurifier_Config object
8573     * @param HTMLPurifier_Context $context Mandatory HTMLPurifier_Context object
8574     */
8575    abstract public function transform($tag, $config, $context);
8576
8577    /**
8578     * Prepends CSS properties to the style attribute, creating the
8579     * attribute if it doesn't exist.
8580     * @warning Copied over from AttrTransform, be sure to keep in sync
8581     * @param array $attr Attribute array to process (passed by reference)
8582     * @param string $css CSS to prepend
8583     */
8584    protected function prependCSS(&$attr, $css)
8585    {
8586        $attr['style'] = isset($attr['style']) ? $attr['style'] : '';
8587        $attr['style'] = $css . $attr['style'];
8588    }
8589}
8590
8591
8592
8593
8594
8595/**
8596 * Abstract base token class that all others inherit from.
8597 */
8598abstract class HTMLPurifier_Token
8599{
8600    /**
8601     * Line number node was on in source document. Null if unknown.
8602     * @type int
8603     */
8604    public $line;
8605
8606    /**
8607     * Column of line node was on in source document. Null if unknown.
8608     * @type int
8609     */
8610    public $col;
8611
8612    /**
8613     * Lookup array of processing that this token is exempt from.
8614     * Currently, valid values are "ValidateAttributes" and
8615     * "MakeWellFormed_TagClosedError"
8616     * @type array
8617     */
8618    public $armor = array();
8619
8620    /**
8621     * Used during MakeWellFormed.  See Note [Injector skips]
8622     * @type
8623     */
8624    public $skip;
8625
8626    /**
8627     * @type
8628     */
8629    public $rewind;
8630
8631    /**
8632     * @type
8633     */
8634    public $carryover;
8635
8636    /**
8637     * @param string $n
8638     * @return null|string
8639     */
8640    public function __get($n)
8641    {
8642        if ($n === 'type') {
8643            trigger_error('Deprecated type property called; use instanceof', E_USER_NOTICE);
8644            switch (get_class($this)) {
8645                case 'HTMLPurifier_Token_Start':
8646                    return 'start';
8647                case 'HTMLPurifier_Token_Empty':
8648                    return 'empty';
8649                case 'HTMLPurifier_Token_End':
8650                    return 'end';
8651                case 'HTMLPurifier_Token_Text':
8652                    return 'text';
8653                case 'HTMLPurifier_Token_Comment':
8654                    return 'comment';
8655                default:
8656                    return null;
8657            }
8658        }
8659    }
8660
8661    /**
8662     * Sets the position of the token in the source document.
8663     * @param int $l
8664     * @param int $c
8665     */
8666    public function position($l = null, $c = null)
8667    {
8668        $this->line = $l;
8669        $this->col = $c;
8670    }
8671
8672    /**
8673     * Convenience function for DirectLex settings line/col position.
8674     * @param int $l
8675     * @param int $c
8676     */
8677    public function rawPosition($l, $c)
8678    {
8679        if ($c === -1) {
8680            $l++;
8681        }
8682        $this->line = $l;
8683        $this->col = $c;
8684    }
8685
8686    /**
8687     * Converts a token into its corresponding node.
8688     */
8689    abstract public function toNode();
8690}
8691
8692
8693
8694
8695
8696/**
8697 * Factory for token generation.
8698 *
8699 * @note Doing some benchmarking indicates that the new operator is much
8700 *       slower than the clone operator (even discounting the cost of the
8701 *       constructor).  This class is for that optimization.
8702 *       Other then that, there's not much point as we don't
8703 *       maintain parallel HTMLPurifier_Token hierarchies (the main reason why
8704 *       you'd want to use an abstract factory).
8705 * @todo Port DirectLex to use this
8706 */
8707class HTMLPurifier_TokenFactory
8708{
8709    // p stands for prototype
8710
8711    /**
8712     * @type HTMLPurifier_Token_Start
8713     */
8714    private $p_start;
8715
8716    /**
8717     * @type HTMLPurifier_Token_End
8718     */
8719    private $p_end;
8720
8721    /**
8722     * @type HTMLPurifier_Token_Empty
8723     */
8724    private $p_empty;
8725
8726    /**
8727     * @type HTMLPurifier_Token_Text
8728     */
8729    private $p_text;
8730
8731    /**
8732     * @type HTMLPurifier_Token_Comment
8733     */
8734    private $p_comment;
8735
8736    /**
8737     * Generates blank prototypes for cloning.
8738     */
8739    public function __construct()
8740    {
8741        $this->p_start = new HTMLPurifier_Token_Start('', array());
8742        $this->p_end = new HTMLPurifier_Token_End('');
8743        $this->p_empty = new HTMLPurifier_Token_Empty('', array());
8744        $this->p_text = new HTMLPurifier_Token_Text('');
8745        $this->p_comment = new HTMLPurifier_Token_Comment('');
8746    }
8747
8748    /**
8749     * Creates a HTMLPurifier_Token_Start.
8750     * @param string $name Tag name
8751     * @param array $attr Associative array of attributes
8752     * @return HTMLPurifier_Token_Start Generated HTMLPurifier_Token_Start
8753     */
8754    public function createStart($name, $attr = array())
8755    {
8756        $p = clone $this->p_start;
8757        $p->__construct($name, $attr);
8758        return $p;
8759    }
8760
8761    /**
8762     * Creates a HTMLPurifier_Token_End.
8763     * @param string $name Tag name
8764     * @return HTMLPurifier_Token_End Generated HTMLPurifier_Token_End
8765     */
8766    public function createEnd($name)
8767    {
8768        $p = clone $this->p_end;
8769        $p->__construct($name);
8770        return $p;
8771    }
8772
8773    /**
8774     * Creates a HTMLPurifier_Token_Empty.
8775     * @param string $name Tag name
8776     * @param array $attr Associative array of attributes
8777     * @return HTMLPurifier_Token_Empty Generated HTMLPurifier_Token_Empty
8778     */
8779    public function createEmpty($name, $attr = array())
8780    {
8781        $p = clone $this->p_empty;
8782        $p->__construct($name, $attr);
8783        return $p;
8784    }
8785
8786    /**
8787     * Creates a HTMLPurifier_Token_Text.
8788     * @param string $data Data of text token
8789     * @return HTMLPurifier_Token_Text Generated HTMLPurifier_Token_Text
8790     */
8791    public function createText($data)
8792    {
8793        $p = clone $this->p_text;
8794        $p->__construct($data);
8795        return $p;
8796    }
8797
8798    /**
8799     * Creates a HTMLPurifier_Token_Comment.
8800     * @param string $data Data of comment token
8801     * @return HTMLPurifier_Token_Comment Generated HTMLPurifier_Token_Comment
8802     */
8803    public function createComment($data)
8804    {
8805        $p = clone $this->p_comment;
8806        $p->__construct($data);
8807        return $p;
8808    }
8809}
8810
8811
8812
8813
8814
8815/**
8816 * HTML Purifier's internal representation of a URI.
8817 * @note
8818 *      Internal data-structures are completely escaped. If the data needs
8819 *      to be used in a non-URI context (which is very unlikely), be sure
8820 *      to decode it first. The URI may not necessarily be well-formed until
8821 *      validate() is called.
8822 */
8823class HTMLPurifier_URI
8824{
8825    /**
8826     * @type string
8827     */
8828    public $scheme;
8829
8830    /**
8831     * @type string
8832     */
8833    public $userinfo;
8834
8835    /**
8836     * @type string
8837     */
8838    public $host;
8839
8840    /**
8841     * @type int
8842     */
8843    public $port;
8844
8845    /**
8846     * @type string
8847     */
8848    public $path;
8849
8850    /**
8851     * @type string
8852     */
8853    public $query;
8854
8855    /**
8856     * @type string
8857     */
8858    public $fragment;
8859
8860    /**
8861     * @param string $scheme
8862     * @param string $userinfo
8863     * @param string $host
8864     * @param int $port
8865     * @param string $path
8866     * @param string $query
8867     * @param string $fragment
8868     * @note Automatically normalizes scheme and port
8869     */
8870    public function __construct($scheme, $userinfo, $host, $port, $path, $query, $fragment)
8871    {
8872        $this->scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme);
8873        $this->userinfo = $userinfo;
8874        $this->host = $host;
8875        $this->port = is_null($port) ? $port : (int)$port;
8876        $this->path = $path;
8877        $this->query = $query;
8878        $this->fragment = $fragment;
8879    }
8880
8881    /**
8882     * Retrieves a scheme object corresponding to the URI's scheme/default
8883     * @param HTMLPurifier_Config $config
8884     * @param HTMLPurifier_Context $context
8885     * @return HTMLPurifier_URIScheme Scheme object appropriate for validating this URI
8886     */
8887    public function getSchemeObj($config, $context)
8888    {
8889        $registry = HTMLPurifier_URISchemeRegistry::instance();
8890        if ($this->scheme !== null) {
8891            $scheme_obj = $registry->getScheme($this->scheme, $config, $context);
8892            if (!$scheme_obj) {
8893                return false;
8894            } // invalid scheme, clean it out
8895        } else {
8896            // no scheme: retrieve the default one
8897            $def = $config->getDefinition('URI');
8898            $scheme_obj = $def->getDefaultScheme($config, $context);
8899            if (!$scheme_obj) {
8900                if ($def->defaultScheme !== null) {
8901                    // something funky happened to the default scheme object
8902                    trigger_error(
8903                        'Default scheme object "' . $def->defaultScheme . '" was not readable',
8904                        E_USER_WARNING
8905                    );
8906                } // suppress error if it's null
8907                return false;
8908            }
8909        }
8910        return $scheme_obj;
8911    }
8912
8913    /**
8914     * Generic validation method applicable for all schemes. May modify
8915     * this URI in order to get it into a compliant form.
8916     * @param HTMLPurifier_Config $config
8917     * @param HTMLPurifier_Context $context
8918     * @return bool True if validation/filtering succeeds, false if failure
8919     */
8920    public function validate($config, $context)
8921    {
8922        // ABNF definitions from RFC 3986
8923        $chars_sub_delims = '!$&\'()*+,;=';
8924        $chars_gen_delims = ':/?#[]@';
8925        $chars_pchar = $chars_sub_delims . ':@';
8926
8927        // validate host
8928        if (!is_null($this->host)) {
8929            $host_def = new HTMLPurifier_AttrDef_URI_Host();
8930            $this->host = $host_def->validate($this->host, $config, $context);
8931            if ($this->host === false) {
8932                $this->host = null;
8933            }
8934        }
8935
8936        // validate scheme
8937        // NOTE: It's not appropriate to check whether or not this
8938        // scheme is in our registry, since a URIFilter may convert a
8939        // URI that we don't allow into one we do.  So instead, we just
8940        // check if the scheme can be dropped because there is no host
8941        // and it is our default scheme.
8942        if (!is_null($this->scheme) && is_null($this->host) || $this->host === '') {
8943            // support for relative paths is pretty abysmal when the
8944            // scheme is present, so axe it when possible
8945            $def = $config->getDefinition('URI');
8946            if ($def->defaultScheme === $this->scheme) {
8947                $this->scheme = null;
8948            }
8949        }
8950
8951        // validate username
8952        if (!is_null($this->userinfo)) {
8953            $encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':');
8954            $this->userinfo = $encoder->encode($this->userinfo);
8955        }
8956
8957        // validate port
8958        if (!is_null($this->port)) {
8959            if ($this->port < 1 || $this->port > 65535) {
8960                $this->port = null;
8961            }
8962        }
8963
8964        // validate path
8965        $segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/');
8966        if (!is_null($this->host)) { // this catches $this->host === ''
8967            // path-abempty (hier and relative)
8968            // http://www.example.com/my/path
8969            // //www.example.com/my/path (looks odd, but works, and
8970            //                            recognized by most browsers)
8971            // (this set is valid or invalid on a scheme by scheme
8972            // basis, so we'll deal with it later)
8973            // file:///my/path
8974            // ///my/path
8975            $this->path = $segments_encoder->encode($this->path);
8976        } elseif ($this->path !== '') {
8977            if ($this->path[0] === '/') {
8978                // path-absolute (hier and relative)
8979                // http:/my/path
8980                // /my/path
8981                if (strlen($this->path) >= 2 && $this->path[1] === '/') {
8982                    // This could happen if both the host gets stripped
8983                    // out
8984                    // http://my/path
8985                    // //my/path
8986                    $this->path = '';
8987                } else {
8988                    $this->path = $segments_encoder->encode($this->path);
8989                }
8990            } elseif (!is_null($this->scheme)) {
8991                // path-rootless (hier)
8992                // http:my/path
8993                // Short circuit evaluation means we don't need to check nz
8994                $this->path = $segments_encoder->encode($this->path);
8995            } else {
8996                // path-noscheme (relative)
8997                // my/path
8998                // (once again, not checking nz)
8999                $segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@');
9000                $c = strpos($this->path, '/');
9001                if ($c !== false) {
9002                    $this->path =
9003                        $segment_nc_encoder->encode(substr($this->path, 0, $c)) .
9004                        $segments_encoder->encode(substr($this->path, $c));
9005                } else {
9006                    $this->path = $segment_nc_encoder->encode($this->path);
9007                }
9008            }
9009        } else {
9010            // path-empty (hier and relative)
9011            $this->path = ''; // just to be safe
9012        }
9013
9014        // qf = query and fragment
9015        $qf_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/?');
9016
9017        if (!is_null($this->query)) {
9018            $this->query = $qf_encoder->encode($this->query);
9019        }
9020
9021        if (!is_null($this->fragment)) {
9022            $this->fragment = $qf_encoder->encode($this->fragment);
9023        }
9024        return true;
9025    }
9026
9027    /**
9028     * Convert URI back to string
9029     * @return string URI appropriate for output
9030     */
9031    public function toString()
9032    {
9033        // reconstruct authority
9034        $authority = null;
9035        // there is a rendering difference between a null authority
9036        // (http:foo-bar) and an empty string authority
9037        // (http:///foo-bar).
9038        if (!is_null($this->host)) {
9039            $authority = '';
9040            if (!is_null($this->userinfo)) {
9041                $authority .= $this->userinfo . '@';
9042            }
9043            $authority .= $this->host;
9044            if (!is_null($this->port)) {
9045                $authority .= ':' . $this->port;
9046            }
9047        }
9048
9049        // Reconstruct the result
9050        // One might wonder about parsing quirks from browsers after
9051        // this reconstruction.  Unfortunately, parsing behavior depends
9052        // on what *scheme* was employed (file:///foo is handled *very*
9053        // differently than http:///foo), so unfortunately we have to
9054        // defer to the schemes to do the right thing.
9055        $result = '';
9056        if (!is_null($this->scheme)) {
9057            $result .= $this->scheme . ':';
9058        }
9059        if (!is_null($authority)) {
9060            $result .= '//' . $authority;
9061        }
9062        $result .= $this->path;
9063        if (!is_null($this->query)) {
9064            $result .= '?' . $this->query;
9065        }
9066        if (!is_null($this->fragment)) {
9067            $result .= '#' . $this->fragment;
9068        }
9069
9070        return $result;
9071    }
9072
9073    /**
9074     * Returns true if this URL might be considered a 'local' URL given
9075     * the current context.  This is true when the host is null, or
9076     * when it matches the host supplied to the configuration.
9077     *
9078     * Note that this does not do any scheme checking, so it is mostly
9079     * only appropriate for metadata that doesn't care about protocol
9080     * security.  isBenign is probably what you actually want.
9081     * @param HTMLPurifier_Config $config
9082     * @param HTMLPurifier_Context $context
9083     * @return bool
9084     */
9085    public function isLocal($config, $context)
9086    {
9087        if ($this->host === null) {
9088            return true;
9089        }
9090        $uri_def = $config->getDefinition('URI');
9091        if ($uri_def->host === $this->host) {
9092            return true;
9093        }
9094        return false;
9095    }
9096
9097    /**
9098     * Returns true if this URL should be considered a 'benign' URL,
9099     * that is:
9100     *
9101     *      - It is a local URL (isLocal), and
9102     *      - It has a equal or better level of security
9103     * @param HTMLPurifier_Config $config
9104     * @param HTMLPurifier_Context $context
9105     * @return bool
9106     */
9107    public function isBenign($config, $context)
9108    {
9109        if (!$this->isLocal($config, $context)) {
9110            return false;
9111        }
9112
9113        $scheme_obj = $this->getSchemeObj($config, $context);
9114        if (!$scheme_obj) {
9115            return false;
9116        } // conservative approach
9117
9118        $current_scheme_obj = $config->getDefinition('URI')->getDefaultScheme($config, $context);
9119        if ($current_scheme_obj->secure) {
9120            if (!$scheme_obj->secure) {
9121                return false;
9122            }
9123        }
9124        return true;
9125    }
9126}
9127
9128
9129
9130
9131
9132class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition
9133{
9134
9135    public $type = 'URI';
9136    protected $filters = array();
9137    protected $postFilters = array();
9138    protected $registeredFilters = array();
9139
9140    /**
9141     * HTMLPurifier_URI object of the base specified at %URI.Base
9142     */
9143    public $base;
9144
9145    /**
9146     * String host to consider "home" base, derived off of $base
9147     */
9148    public $host;
9149
9150    /**
9151     * Name of default scheme based on %URI.DefaultScheme and %URI.Base
9152     */
9153    public $defaultScheme;
9154
9155    public function __construct()
9156    {
9157        $this->registerFilter(new HTMLPurifier_URIFilter_DisableExternal());
9158        $this->registerFilter(new HTMLPurifier_URIFilter_DisableExternalResources());
9159        $this->registerFilter(new HTMLPurifier_URIFilter_DisableResources());
9160        $this->registerFilter(new HTMLPurifier_URIFilter_HostBlacklist());
9161        $this->registerFilter(new HTMLPurifier_URIFilter_SafeIframe());
9162        $this->registerFilter(new HTMLPurifier_URIFilter_MakeAbsolute());
9163        $this->registerFilter(new HTMLPurifier_URIFilter_Munge());
9164    }
9165
9166    public function registerFilter($filter)
9167    {
9168        $this->registeredFilters[$filter->name] = $filter;
9169    }
9170
9171    public function addFilter($filter, $config)
9172    {
9173        $r = $filter->prepare($config);
9174        if ($r === false) return; // null is ok, for backwards compat
9175        if ($filter->post) {
9176            $this->postFilters[$filter->name] = $filter;
9177        } else {
9178            $this->filters[$filter->name] = $filter;
9179        }
9180    }
9181
9182    protected function doSetup($config)
9183    {
9184        $this->setupMemberVariables($config);
9185        $this->setupFilters($config);
9186    }
9187
9188    protected function setupFilters($config)
9189    {
9190        foreach ($this->registeredFilters as $name => $filter) {
9191            if ($filter->always_load) {
9192                $this->addFilter($filter, $config);
9193            } else {
9194                $conf = $config->get('URI.' . $name);
9195                if ($conf !== false && $conf !== null) {
9196                    $this->addFilter($filter, $config);
9197                }
9198            }
9199        }
9200        unset($this->registeredFilters);
9201    }
9202
9203    protected function setupMemberVariables($config)
9204    {
9205        $this->host = $config->get('URI.Host');
9206        $base_uri = $config->get('URI.Base');
9207        if (!is_null($base_uri)) {
9208            $parser = new HTMLPurifier_URIParser();
9209            $this->base = $parser->parse($base_uri);
9210            $this->defaultScheme = $this->base->scheme;
9211            if (is_null($this->host)) $this->host = $this->base->host;
9212        }
9213        if (is_null($this->defaultScheme)) $this->defaultScheme = $config->get('URI.DefaultScheme');
9214    }
9215
9216    public function getDefaultScheme($config, $context)
9217    {
9218        return HTMLPurifier_URISchemeRegistry::instance()->getScheme($this->defaultScheme, $config, $context);
9219    }
9220
9221    public function filter(&$uri, $config, $context)
9222    {
9223        foreach ($this->filters as $name => $f) {
9224            $result = $f->filter($uri, $config, $context);
9225            if (!$result) return false;
9226        }
9227        return true;
9228    }
9229
9230    public function postFilter(&$uri, $config, $context)
9231    {
9232        foreach ($this->postFilters as $name => $f) {
9233            $result = $f->filter($uri, $config, $context);
9234            if (!$result) return false;
9235        }
9236        return true;
9237    }
9238
9239}
9240
9241
9242
9243
9244
9245/**
9246 * Chainable filters for custom URI processing.
9247 *
9248 * These filters can perform custom actions on a URI filter object,
9249 * including transformation or blacklisting.  A filter named Foo
9250 * must have a corresponding configuration directive %URI.Foo,
9251 * unless always_load is specified to be true.
9252 *
9253 * The following contexts may be available while URIFilters are being
9254 * processed:
9255 *
9256 *      - EmbeddedURI: true if URI is an embedded resource that will
9257 *        be loaded automatically on page load
9258 *      - CurrentToken: a reference to the token that is currently
9259 *        being processed
9260 *      - CurrentAttr: the name of the attribute that is currently being
9261 *        processed
9262 *      - CurrentCSSProperty: the name of the CSS property that is
9263 *        currently being processed (if applicable)
9264 *
9265 * @warning This filter is called before scheme object validation occurs.
9266 *          Make sure, if you require a specific scheme object, you
9267 *          you check that it exists. This allows filters to convert
9268 *          proprietary URI schemes into regular ones.
9269 */
9270abstract class HTMLPurifier_URIFilter
9271{
9272
9273    /**
9274     * Unique identifier of filter.
9275     * @type string
9276     */
9277    public $name;
9278
9279    /**
9280     * True if this filter should be run after scheme validation.
9281     * @type bool
9282     */
9283    public $post = false;
9284
9285    /**
9286     * True if this filter should always be loaded.
9287     * This permits a filter to be named Foo without the corresponding
9288     * %URI.Foo directive existing.
9289     * @type bool
9290     */
9291    public $always_load = false;
9292
9293    /**
9294     * Performs initialization for the filter.  If the filter returns
9295     * false, this means that it shouldn't be considered active.
9296     * @param HTMLPurifier_Config $config
9297     * @return bool
9298     */
9299    public function prepare($config)
9300    {
9301        return true;
9302    }
9303
9304    /**
9305     * Filter a URI object
9306     * @param HTMLPurifier_URI $uri Reference to URI object variable
9307     * @param HTMLPurifier_Config $config
9308     * @param HTMLPurifier_Context $context
9309     * @return bool Whether or not to continue processing: false indicates
9310     *         URL is no good, true indicates continue processing. Note that
9311     *         all changes are committed directly on the URI object
9312     */
9313    abstract public function filter(&$uri, $config, $context);
9314}
9315
9316
9317
9318
9319
9320/**
9321 * Parses a URI into the components and fragment identifier as specified
9322 * by RFC 3986.
9323 */
9324class HTMLPurifier_URIParser
9325{
9326
9327    /**
9328     * Instance of HTMLPurifier_PercentEncoder to do normalization with.
9329     */
9330    protected $percentEncoder;
9331
9332    public function __construct()
9333    {
9334        $this->percentEncoder = new HTMLPurifier_PercentEncoder();
9335    }
9336
9337    /**
9338     * Parses a URI.
9339     * @param $uri string URI to parse
9340     * @return HTMLPurifier_URI representation of URI. This representation has
9341     *         not been validated yet and may not conform to RFC.
9342     */
9343    public function parse($uri)
9344    {
9345        $uri = $this->percentEncoder->normalize($uri);
9346
9347        // Regexp is as per Appendix B.
9348        // Note that ["<>] are an addition to the RFC's recommended
9349        // characters, because they represent external delimeters.
9350        $r_URI = '!'.
9351            '(([a-zA-Z0-9\.\+\-]+):)?'. // 2. Scheme
9352            '(//([^/?#"<>]*))?'. // 4. Authority
9353            '([^?#"<>]*)'.       // 5. Path
9354            '(\?([^#"<>]*))?'.   // 7. Query
9355            '(#([^"<>]*))?'.     // 8. Fragment
9356            '!';
9357
9358        $matches = array();
9359        $result = preg_match($r_URI, $uri, $matches);
9360
9361        if (!$result) return false; // *really* invalid URI
9362
9363        // seperate out parts
9364        $scheme     = !empty($matches[1]) ? $matches[2] : null;
9365        $authority  = !empty($matches[3]) ? $matches[4] : null;
9366        $path       = $matches[5]; // always present, can be empty
9367        $query      = !empty($matches[6]) ? $matches[7] : null;
9368        $fragment   = !empty($matches[8]) ? $matches[9] : null;
9369
9370        // further parse authority
9371        if ($authority !== null) {
9372            $r_authority = "/^((.+?)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/";
9373            $matches = array();
9374            preg_match($r_authority, $authority, $matches);
9375            $userinfo   = !empty($matches[1]) ? $matches[2] : null;
9376            $host       = !empty($matches[3]) ? $matches[3] : '';
9377            $port       = !empty($matches[4]) ? (int) $matches[5] : null;
9378        } else {
9379            $port = $host = $userinfo = null;
9380        }
9381
9382        return new HTMLPurifier_URI(
9383            $scheme, $userinfo, $host, $port, $path, $query, $fragment);
9384    }
9385
9386}
9387
9388
9389
9390
9391
9392/**
9393 * Validator for the components of a URI for a specific scheme
9394 */
9395abstract class HTMLPurifier_URIScheme
9396{
9397
9398    /**
9399     * Scheme's default port (integer). If an explicit port number is
9400     * specified that coincides with the default port, it will be
9401     * elided.
9402     * @type int
9403     */
9404    public $default_port = null;
9405
9406    /**
9407     * Whether or not URIs of this scheme are locatable by a browser
9408     * http and ftp are accessible, while mailto and news are not.
9409     * @type bool
9410     */
9411    public $browsable = false;
9412
9413    /**
9414     * Whether or not data transmitted over this scheme is encrypted.
9415     * https is secure, http is not.
9416     * @type bool
9417     */
9418    public $secure = false;
9419
9420    /**
9421     * Whether or not the URI always uses <hier_part>, resolves edge cases
9422     * with making relative URIs absolute
9423     * @type bool
9424     */
9425    public $hierarchical = false;
9426
9427    /**
9428     * Whether or not the URI may omit a hostname when the scheme is
9429     * explicitly specified, ala file:///path/to/file. As of writing,
9430     * 'file' is the only scheme that browsers support his properly.
9431     * @type bool
9432     */
9433    public $may_omit_host = false;
9434
9435    /**
9436     * Validates the components of a URI for a specific scheme.
9437     * @param HTMLPurifier_URI $uri Reference to a HTMLPurifier_URI object
9438     * @param HTMLPurifier_Config $config
9439     * @param HTMLPurifier_Context $context
9440     * @return bool success or failure
9441     */
9442    abstract public function doValidate(&$uri, $config, $context);
9443
9444    /**
9445     * Public interface for validating components of a URI.  Performs a
9446     * bunch of default actions. Don't overload this method.
9447     * @param HTMLPurifier_URI $uri Reference to a HTMLPurifier_URI object
9448     * @param HTMLPurifier_Config $config
9449     * @param HTMLPurifier_Context $context
9450     * @return bool success or failure
9451     */
9452    public function validate(&$uri, $config, $context)
9453    {
9454        if ($this->default_port == $uri->port) {
9455            $uri->port = null;
9456        }
9457        // kludge: browsers do funny things when the scheme but not the
9458        // authority is set
9459        if (!$this->may_omit_host &&
9460            // if the scheme is present, a missing host is always in error
9461            (!is_null($uri->scheme) && ($uri->host === '' || is_null($uri->host))) ||
9462            // if the scheme is not present, a *blank* host is in error,
9463            // since this translates into '///path' which most browsers
9464            // interpret as being 'http://path'.
9465            (is_null($uri->scheme) && $uri->host === '')
9466        ) {
9467            do {
9468                if (is_null($uri->scheme)) {
9469                    if (substr($uri->path, 0, 2) != '//') {
9470                        $uri->host = null;
9471                        break;
9472                    }
9473                    // URI is '////path', so we cannot nullify the
9474                    // host to preserve semantics.  Try expanding the
9475                    // hostname instead (fall through)
9476                }
9477                // first see if we can manually insert a hostname
9478                $host = $config->get('URI.Host');
9479                if (!is_null($host)) {
9480                    $uri->host = $host;
9481                } else {
9482                    // we can't do anything sensible, reject the URL.
9483                    return false;
9484                }
9485            } while (false);
9486        }
9487        return $this->doValidate($uri, $config, $context);
9488    }
9489}
9490
9491
9492
9493
9494
9495/**
9496 * Registry for retrieving specific URI scheme validator objects.
9497 */
9498class HTMLPurifier_URISchemeRegistry
9499{
9500
9501    /**
9502     * Retrieve sole instance of the registry.
9503     * @param HTMLPurifier_URISchemeRegistry $prototype Optional prototype to overload sole instance with,
9504     *                   or bool true to reset to default registry.
9505     * @return HTMLPurifier_URISchemeRegistry
9506     * @note Pass a registry object $prototype with a compatible interface and
9507     *       the function will copy it and return it all further times.
9508     */
9509    public static function instance($prototype = null)
9510    {
9511        static $instance = null;
9512        if ($prototype !== null) {
9513            $instance = $prototype;
9514        } elseif ($instance === null || $prototype == true) {
9515            $instance = new HTMLPurifier_URISchemeRegistry();
9516        }
9517        return $instance;
9518    }
9519
9520    /**
9521     * Cache of retrieved schemes.
9522     * @type HTMLPurifier_URIScheme[]
9523     */
9524    protected $schemes = array();
9525
9526    /**
9527     * Retrieves a scheme validator object
9528     * @param string $scheme String scheme name like http or mailto
9529     * @param HTMLPurifier_Config $config
9530     * @param HTMLPurifier_Context $context
9531     * @return HTMLPurifier_URIScheme
9532     */
9533    public function getScheme($scheme, $config, $context)
9534    {
9535        if (!$config) {
9536            $config = HTMLPurifier_Config::createDefault();
9537        }
9538
9539        // important, otherwise attacker could include arbitrary file
9540        $allowed_schemes = $config->get('URI.AllowedSchemes');
9541        if (!$config->get('URI.OverrideAllowedSchemes') &&
9542            !isset($allowed_schemes[$scheme])
9543        ) {
9544            return;
9545        }
9546
9547        if (isset($this->schemes[$scheme])) {
9548            return $this->schemes[$scheme];
9549        }
9550        if (!isset($allowed_schemes[$scheme])) {
9551            return;
9552        }
9553
9554        $class = 'HTMLPurifier_URIScheme_' . $scheme;
9555        if (!class_exists($class)) {
9556            return;
9557        }
9558        $this->schemes[$scheme] = new $class();
9559        return $this->schemes[$scheme];
9560    }
9561
9562    /**
9563     * Registers a custom scheme to the cache, bypassing reflection.
9564     * @param string $scheme Scheme name
9565     * @param HTMLPurifier_URIScheme $scheme_obj
9566     */
9567    public function register($scheme, $scheme_obj)
9568    {
9569        $this->schemes[$scheme] = $scheme_obj;
9570    }
9571}
9572
9573
9574
9575
9576
9577/**
9578 * Class for converting between different unit-lengths as specified by
9579 * CSS.
9580 */
9581class HTMLPurifier_UnitConverter
9582{
9583
9584    const ENGLISH = 1;
9585    const METRIC = 2;
9586    const DIGITAL = 3;
9587
9588    /**
9589     * Units information array. Units are grouped into measuring systems
9590     * (English, Metric), and are assigned an integer representing
9591     * the conversion factor between that unit and the smallest unit in
9592     * the system. Numeric indexes are actually magical constants that
9593     * encode conversion data from one system to the next, with a O(n^2)
9594     * constraint on memory (this is generally not a problem, since
9595     * the number of measuring systems is small.)
9596     */
9597    protected static $units = array(
9598        self::ENGLISH => array(
9599            'px' => 3, // This is as per CSS 2.1 and Firefox. Your mileage may vary
9600            'pt' => 4,
9601            'pc' => 48,
9602            'in' => 288,
9603            self::METRIC => array('pt', '0.352777778', 'mm'),
9604        ),
9605        self::METRIC => array(
9606            'mm' => 1,
9607            'cm' => 10,
9608            self::ENGLISH => array('mm', '2.83464567', 'pt'),
9609        ),
9610    );
9611
9612    /**
9613     * Minimum bcmath precision for output.
9614     * @type int
9615     */
9616    protected $outputPrecision;
9617
9618    /**
9619     * Bcmath precision for internal calculations.
9620     * @type int
9621     */
9622    protected $internalPrecision;
9623
9624    /**
9625     * Whether or not BCMath is available.
9626     * @type bool
9627     */
9628    private $bcmath;
9629
9630    public function __construct($output_precision = 4, $internal_precision = 10, $force_no_bcmath = false)
9631    {
9632        $this->outputPrecision = $output_precision;
9633        $this->internalPrecision = $internal_precision;
9634        $this->bcmath = !$force_no_bcmath && function_exists('bcmul');
9635    }
9636
9637    /**
9638     * Converts a length object of one unit into another unit.
9639     * @param HTMLPurifier_Length $length
9640     *      Instance of HTMLPurifier_Length to convert. You must validate()
9641     *      it before passing it here!
9642     * @param string $to_unit
9643     *      Unit to convert to.
9644     * @return HTMLPurifier_Length|bool
9645     * @note
9646     *      About precision: This conversion function pays very special
9647     *      attention to the incoming precision of values and attempts
9648     *      to maintain a number of significant figure. Results are
9649     *      fairly accurate up to nine digits. Some caveats:
9650     *          - If a number is zero-padded as a result of this significant
9651     *            figure tracking, the zeroes will be eliminated.
9652     *          - If a number contains less than four sigfigs ($outputPrecision)
9653     *            and this causes some decimals to be excluded, those
9654     *            decimals will be added on.
9655     */
9656    public function convert($length, $to_unit)
9657    {
9658        if (!$length->isValid()) {
9659            return false;
9660        }
9661
9662        $n = $length->getN();
9663        $unit = $length->getUnit();
9664
9665        if ($n === '0' || $unit === false) {
9666            return new HTMLPurifier_Length('0', false);
9667        }
9668
9669        $state = $dest_state = false;
9670        foreach (self::$units as $k => $x) {
9671            if (isset($x[$unit])) {
9672                $state = $k;
9673            }
9674            if (isset($x[$to_unit])) {
9675                $dest_state = $k;
9676            }
9677        }
9678        if (!$state || !$dest_state) {
9679            return false;
9680        }
9681
9682        // Some calculations about the initial precision of the number;
9683        // this will be useful when we need to do final rounding.
9684        $sigfigs = $this->getSigFigs($n);
9685        if ($sigfigs < $this->outputPrecision) {
9686            $sigfigs = $this->outputPrecision;
9687        }
9688
9689        // BCMath's internal precision deals only with decimals. Use
9690        // our default if the initial number has no decimals, or increase
9691        // it by how ever many decimals, thus, the number of guard digits
9692        // will always be greater than or equal to internalPrecision.
9693        $log = (int)floor(log(abs($n), 10));
9694        $cp = ($log < 0) ? $this->internalPrecision - $log : $this->internalPrecision; // internal precision
9695
9696        for ($i = 0; $i < 2; $i++) {
9697
9698            // Determine what unit IN THIS SYSTEM we need to convert to
9699            if ($dest_state === $state) {
9700                // Simple conversion
9701                $dest_unit = $to_unit;
9702            } else {
9703                // Convert to the smallest unit, pending a system shift
9704                $dest_unit = self::$units[$state][$dest_state][0];
9705            }
9706
9707            // Do the conversion if necessary
9708            if ($dest_unit !== $unit) {
9709                $factor = $this->div(self::$units[$state][$unit], self::$units[$state][$dest_unit], $cp);
9710                $n = $this->mul($n, $factor, $cp);
9711                $unit = $dest_unit;
9712            }
9713
9714            // Output was zero, so bail out early. Shouldn't ever happen.
9715            if ($n === '') {
9716                $n = '0';
9717                $unit = $to_unit;
9718                break;
9719            }
9720
9721            // It was a simple conversion, so bail out
9722            if ($dest_state === $state) {
9723                break;
9724            }
9725
9726            if ($i !== 0) {
9727                // Conversion failed! Apparently, the system we forwarded
9728                // to didn't have this unit. This should never happen!
9729                return false;
9730            }
9731
9732            // Pre-condition: $i == 0
9733
9734            // Perform conversion to next system of units
9735            $n = $this->mul($n, self::$units[$state][$dest_state][1], $cp);
9736            $unit = self::$units[$state][$dest_state][2];
9737            $state = $dest_state;
9738
9739            // One more loop around to convert the unit in the new system.
9740
9741        }
9742
9743        // Post-condition: $unit == $to_unit
9744        if ($unit !== $to_unit) {
9745            return false;
9746        }
9747
9748        // Useful for debugging:
9749        //echo "<pre>n";
9750        //echo "$n\nsigfigs = $sigfigs\nnew_log = $new_log\nlog = $log\nrp = $rp\n</pre>\n";
9751
9752        $n = $this->round($n, $sigfigs);
9753        if (strpos($n, '.') !== false) {
9754            $n = rtrim($n, '0');
9755        }
9756        $n = rtrim($n, '.');
9757
9758        return new HTMLPurifier_Length($n, $unit);
9759    }
9760
9761    /**
9762     * Returns the number of significant figures in a string number.
9763     * @param string $n Decimal number
9764     * @return int number of sigfigs
9765     */
9766    public function getSigFigs($n)
9767    {
9768        $n = ltrim($n, '0+-');
9769        $dp = strpos($n, '.'); // decimal position
9770        if ($dp === false) {
9771            $sigfigs = strlen(rtrim($n, '0'));
9772        } else {
9773            $sigfigs = strlen(ltrim($n, '0.')); // eliminate extra decimal character
9774            if ($dp !== 0) {
9775                $sigfigs--;
9776            }
9777        }
9778        return $sigfigs;
9779    }
9780
9781    /**
9782     * Adds two numbers, using arbitrary precision when available.
9783     * @param string $s1
9784     * @param string $s2
9785     * @param int $scale
9786     * @return string
9787     */
9788    private function add($s1, $s2, $scale)
9789    {
9790        if ($this->bcmath) {
9791            return bcadd($s1, $s2, $scale);
9792        } else {
9793            return $this->scale((float)$s1 + (float)$s2, $scale);
9794        }
9795    }
9796
9797    /**
9798     * Multiples two numbers, using arbitrary precision when available.
9799     * @param string $s1
9800     * @param string $s2
9801     * @param int $scale
9802     * @return string
9803     */
9804    private function mul($s1, $s2, $scale)
9805    {
9806        if ($this->bcmath) {
9807            return bcmul($s1, $s2, $scale);
9808        } else {
9809            return $this->scale((float)$s1 * (float)$s2, $scale);
9810        }
9811    }
9812
9813    /**
9814     * Divides two numbers, using arbitrary precision when available.
9815     * @param string $s1
9816     * @param string $s2
9817     * @param int $scale
9818     * @return string
9819     */
9820    private function div($s1, $s2, $scale)
9821    {
9822        if ($this->bcmath) {
9823            return bcdiv($s1, $s2, $scale);
9824        } else {
9825            return $this->scale((float)$s1 / (float)$s2, $scale);
9826        }
9827    }
9828
9829    /**
9830     * Rounds a number according to the number of sigfigs it should have,
9831     * using arbitrary precision when available.
9832     * @param float $n
9833     * @param int $sigfigs
9834     * @return string
9835     */
9836    private function round($n, $sigfigs)
9837    {
9838        $new_log = (int)floor(log(abs($n), 10)); // Number of digits left of decimal - 1
9839        $rp = $sigfigs - $new_log - 1; // Number of decimal places needed
9840        $neg = $n < 0 ? '-' : ''; // Negative sign
9841        if ($this->bcmath) {
9842            if ($rp >= 0) {
9843                $n = bcadd($n, $neg . '0.' . str_repeat('0', $rp) . '5', $rp + 1);
9844                $n = bcdiv($n, '1', $rp);
9845            } else {
9846                // This algorithm partially depends on the standardized
9847                // form of numbers that comes out of bcmath.
9848                $n = bcadd($n, $neg . '5' . str_repeat('0', $new_log - $sigfigs), 0);
9849                $n = substr($n, 0, $sigfigs + strlen($neg)) . str_repeat('0', $new_log - $sigfigs + 1);
9850            }
9851            return $n;
9852        } else {
9853            return $this->scale(round($n, $sigfigs - $new_log - 1), $rp + 1);
9854        }
9855    }
9856
9857    /**
9858     * Scales a float to $scale digits right of decimal point, like BCMath.
9859     * @param float $r
9860     * @param int $scale
9861     * @return string
9862     */
9863    private function scale($r, $scale)
9864    {
9865        if ($scale < 0) {
9866            // The f sprintf type doesn't support negative numbers, so we
9867            // need to cludge things manually. First get the string.
9868            $r = sprintf('%.0f', (float)$r);
9869            // Due to floating point precision loss, $r will more than likely
9870            // look something like 4652999999999.9234. We grab one more digit
9871            // than we need to precise from $r and then use that to round
9872            // appropriately.
9873            $precise = (string)round(substr($r, 0, strlen($r) + $scale), -1);
9874            // Now we return it, truncating the zero that was rounded off.
9875            return substr($precise, 0, -1) . str_repeat('0', -$scale + 1);
9876        }
9877        return sprintf('%.' . $scale . 'f', (float)$r);
9878    }
9879}
9880
9881
9882
9883
9884
9885/**
9886 * Parses string representations into their corresponding native PHP
9887 * variable type. The base implementation does a simple type-check.
9888 */
9889class HTMLPurifier_VarParser
9890{
9891
9892    const STRING = 1;
9893    const ISTRING = 2;
9894    const TEXT = 3;
9895    const ITEXT = 4;
9896    const INT = 5;
9897    const FLOAT = 6;
9898    const BOOL = 7;
9899    const LOOKUP = 8;
9900    const ALIST = 9;
9901    const HASH = 10;
9902    const MIXED = 11;
9903
9904    /**
9905     * Lookup table of allowed types. Mainly for backwards compatibility, but
9906     * also convenient for transforming string type names to the integer constants.
9907     */
9908    public static $types = array(
9909        'string' => self::STRING,
9910        'istring' => self::ISTRING,
9911        'text' => self::TEXT,
9912        'itext' => self::ITEXT,
9913        'int' => self::INT,
9914        'float' => self::FLOAT,
9915        'bool' => self::BOOL,
9916        'lookup' => self::LOOKUP,
9917        'list' => self::ALIST,
9918        'hash' => self::HASH,
9919        'mixed' => self::MIXED
9920    );
9921
9922    /**
9923     * Lookup table of types that are string, and can have aliases or
9924     * allowed value lists.
9925     */
9926    public static $stringTypes = array(
9927        self::STRING => true,
9928        self::ISTRING => true,
9929        self::TEXT => true,
9930        self::ITEXT => true,
9931    );
9932
9933    /**
9934     * Validate a variable according to type.
9935     * It may return NULL as a valid type if $allow_null is true.
9936     *
9937     * @param mixed $var Variable to validate
9938     * @param int $type Type of variable, see HTMLPurifier_VarParser->types
9939     * @param bool $allow_null Whether or not to permit null as a value
9940     * @return string Validated and type-coerced variable
9941     * @throws HTMLPurifier_VarParserException
9942     */
9943    final public function parse($var, $type, $allow_null = false)
9944    {
9945        if (is_string($type)) {
9946            if (!isset(HTMLPurifier_VarParser::$types[$type])) {
9947                throw new HTMLPurifier_VarParserException("Invalid type '$type'");
9948            } else {
9949                $type = HTMLPurifier_VarParser::$types[$type];
9950            }
9951        }
9952        $var = $this->parseImplementation($var, $type, $allow_null);
9953        if ($allow_null && $var === null) {
9954            return null;
9955        }
9956        // These are basic checks, to make sure nothing horribly wrong
9957        // happened in our implementations.
9958        switch ($type) {
9959            case (self::STRING):
9960            case (self::ISTRING):
9961            case (self::TEXT):
9962            case (self::ITEXT):
9963                if (!is_string($var)) {
9964                    break;
9965                }
9966                if ($type == self::ISTRING || $type == self::ITEXT) {
9967                    $var = strtolower($var);
9968                }
9969                return $var;
9970            case (self::INT):
9971                if (!is_int($var)) {
9972                    break;
9973                }
9974                return $var;
9975            case (self::FLOAT):
9976                if (!is_float($var)) {
9977                    break;
9978                }
9979                return $var;
9980            case (self::BOOL):
9981                if (!is_bool($var)) {
9982                    break;
9983                }
9984                return $var;
9985            case (self::LOOKUP):
9986            case (self::ALIST):
9987            case (self::HASH):
9988                if (!is_array($var)) {
9989                    break;
9990                }
9991                if ($type === self::LOOKUP) {
9992                    foreach ($var as $k) {
9993                        if ($k !== true) {
9994                            $this->error('Lookup table contains value other than true');
9995                        }
9996                    }
9997                } elseif ($type === self::ALIST) {
9998                    $keys = array_keys($var);
9999                    if (array_keys($keys) !== $keys) {
10000                        $this->error('Indices for list are not uniform');
10001                    }
10002                }
10003                return $var;
10004            case (self::MIXED):
10005                return $var;
10006            default:
10007                $this->errorInconsistent(get_class($this), $type);
10008        }
10009        $this->errorGeneric($var, $type);
10010    }
10011
10012    /**
10013     * Actually implements the parsing. Base implementation does not
10014     * do anything to $var. Subclasses should overload this!
10015     * @param mixed $var
10016     * @param int $type
10017     * @param bool $allow_null
10018     * @return string
10019     */
10020    protected function parseImplementation($var, $type, $allow_null)
10021    {
10022        return $var;
10023    }
10024
10025    /**
10026     * Throws an exception.
10027     * @throws HTMLPurifier_VarParserException
10028     */
10029    protected function error($msg)
10030    {
10031        throw new HTMLPurifier_VarParserException($msg);
10032    }
10033
10034    /**
10035     * Throws an inconsistency exception.
10036     * @note This should not ever be called. It would be called if we
10037     *       extend the allowed values of HTMLPurifier_VarParser without
10038     *       updating subclasses.
10039     * @param string $class
10040     * @param int $type
10041     * @throws HTMLPurifier_Exception
10042     */
10043    protected function errorInconsistent($class, $type)
10044    {
10045        throw new HTMLPurifier_Exception(
10046            "Inconsistency in $class: " . HTMLPurifier_VarParser::getTypeName($type) .
10047            " not implemented"
10048        );
10049    }
10050
10051    /**
10052     * Generic error for if a type didn't work.
10053     * @param mixed $var
10054     * @param int $type
10055     */
10056    protected function errorGeneric($var, $type)
10057    {
10058        $vtype = gettype($var);
10059        $this->error("Expected type " . HTMLPurifier_VarParser::getTypeName($type) . ", got $vtype");
10060    }
10061
10062    /**
10063     * @param int $type
10064     * @return string
10065     */
10066    public static function getTypeName($type)
10067    {
10068        static $lookup;
10069        if (!$lookup) {
10070            // Lazy load the alternative lookup table
10071            $lookup = array_flip(HTMLPurifier_VarParser::$types);
10072        }
10073        if (!isset($lookup[$type])) {
10074            return 'unknown';
10075        }
10076        return $lookup[$type];
10077    }
10078}
10079
10080
10081
10082
10083
10084/**
10085 * Exception type for HTMLPurifier_VarParser
10086 */
10087class HTMLPurifier_VarParserException extends HTMLPurifier_Exception
10088{
10089
10090}
10091
10092
10093
10094
10095
10096/**
10097 * A zipper is a purely-functional data structure which contains
10098 * a focus that can be efficiently manipulated.  It is known as
10099 * a "one-hole context".  This mutable variant implements a zipper
10100 * for a list as a pair of two arrays, laid out as follows:
10101 *
10102 *      Base list: 1 2 3 4 [ ] 6 7 8 9
10103 *      Front list: 1 2 3 4
10104 *      Back list: 9 8 7 6
10105 *
10106 * User is expected to keep track of the "current element" and properly
10107 * fill it back in as necessary.  (ToDo: Maybe it's more user friendly
10108 * to implicitly track the current element?)
10109 *
10110 * Nota bene: the current class gets confused if you try to store NULLs
10111 * in the list.
10112 */
10113
10114class HTMLPurifier_Zipper
10115{
10116    public $front, $back;
10117
10118    public function __construct($front, $back) {
10119        $this->front = $front;
10120        $this->back = $back;
10121    }
10122
10123    /**
10124     * Creates a zipper from an array, with a hole in the
10125     * 0-index position.
10126     * @param Array to zipper-ify.
10127     * @return Tuple of zipper and element of first position.
10128     */
10129    static public function fromArray($array) {
10130        $z = new self(array(), array_reverse($array));
10131        $t = $z->delete(); // delete the "dummy hole"
10132        return array($z, $t);
10133    }
10134
10135    /**
10136     * Convert zipper back into a normal array, optionally filling in
10137     * the hole with a value. (Usually you should supply a $t, unless you
10138     * are at the end of the array.)
10139     */
10140    public function toArray($t = NULL) {
10141        $a = $this->front;
10142        if ($t !== NULL) $a[] = $t;
10143        for ($i = count($this->back)-1; $i >= 0; $i--) {
10144            $a[] = $this->back[$i];
10145        }
10146        return $a;
10147    }
10148
10149    /**
10150     * Move hole to the next element.
10151     * @param $t Element to fill hole with
10152     * @return Original contents of new hole.
10153     */
10154    public function next($t) {
10155        if ($t !== NULL) array_push($this->front, $t);
10156        return empty($this->back) ? NULL : array_pop($this->back);
10157    }
10158
10159    /**
10160     * Iterated hole advancement.
10161     * @param $t Element to fill hole with
10162     * @param $i How many forward to advance hole
10163     * @return Original contents of new hole, i away
10164     */
10165    public function advance($t, $n) {
10166        for ($i = 0; $i < $n; $i++) {
10167            $t = $this->next($t);
10168        }
10169        return $t;
10170    }
10171
10172    /**
10173     * Move hole to the previous element
10174     * @param $t Element to fill hole with
10175     * @return Original contents of new hole.
10176     */
10177    public function prev($t) {
10178        if ($t !== NULL) array_push($this->back, $t);
10179        return empty($this->front) ? NULL : array_pop($this->front);
10180    }
10181
10182    /**
10183     * Delete contents of current hole, shifting hole to
10184     * next element.
10185     * @return Original contents of new hole.
10186     */
10187    public function delete() {
10188        return empty($this->back) ? NULL : array_pop($this->back);
10189    }
10190
10191    /**
10192     * Returns true if we are at the end of the list.
10193     * @return bool
10194     */
10195    public function done() {
10196        return empty($this->back);
10197    }
10198
10199    /**
10200     * Insert element before hole.
10201     * @param Element to insert
10202     */
10203    public function insertBefore($t) {
10204        if ($t !== NULL) array_push($this->front, $t);
10205    }
10206
10207    /**
10208     * Insert element after hole.
10209     * @param Element to insert
10210     */
10211    public function insertAfter($t) {
10212        if ($t !== NULL) array_push($this->back, $t);
10213    }
10214
10215    /**
10216     * Splice in multiple elements at hole.  Functional specification
10217     * in terms of array_splice:
10218     *
10219     *      $arr1 = $arr;
10220     *      $old1 = array_splice($arr1, $i, $delete, $replacement);
10221     *
10222     *      list($z, $t) = HTMLPurifier_Zipper::fromArray($arr);
10223     *      $t = $z->advance($t, $i);
10224     *      list($old2, $t) = $z->splice($t, $delete, $replacement);
10225     *      $arr2 = $z->toArray($t);
10226     *
10227     *      assert($old1 === $old2);
10228     *      assert($arr1 === $arr2);
10229     *
10230     * NB: the absolute index location after this operation is
10231     * *unchanged!*
10232     *
10233     * @param Current contents of hole.
10234     */
10235    public function splice($t, $delete, $replacement) {
10236        // delete
10237        $old = array();
10238        $r = $t;
10239        for ($i = $delete; $i > 0; $i--) {
10240            $old[] = $r;
10241            $r = $this->delete();
10242        }
10243        // insert
10244        for ($i = count($replacement)-1; $i >= 0; $i--) {
10245            $this->insertAfter($r);
10246            $r = $replacement[$i];
10247        }
10248        return array($old, $r);
10249    }
10250}
10251
10252
10253
10254/**
10255 * Validates the HTML attribute style, otherwise known as CSS.
10256 * @note We don't implement the whole CSS specification, so it might be
10257 *       difficult to reuse this component in the context of validating
10258 *       actual stylesheet declarations.
10259 * @note If we were really serious about validating the CSS, we would
10260 *       tokenize the styles and then parse the tokens. Obviously, we
10261 *       are not doing that. Doing that could seriously harm performance,
10262 *       but would make these components a lot more viable for a CSS
10263 *       filtering solution.
10264 */
10265class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
10266{
10267
10268    /**
10269     * @param string $css
10270     * @param HTMLPurifier_Config $config
10271     * @param HTMLPurifier_Context $context
10272     * @return bool|string
10273     */
10274    public function validate($css, $config, $context)
10275    {
10276        $css = $this->parseCDATA($css);
10277
10278        $definition = $config->getCSSDefinition();
10279        $allow_duplicates = $config->get("CSS.AllowDuplicates");
10280
10281
10282        // According to the CSS2.1 spec, the places where a
10283        // non-delimiting semicolon can appear are in strings
10284        // escape sequences.   So here is some dumb hack to
10285        // handle quotes.
10286        $len = strlen($css);
10287        $accum = "";
10288        $declarations = array();
10289        $quoted = false;
10290        for ($i = 0; $i < $len; $i++) {
10291            $c = strcspn($css, ";'\"", $i);
10292            $accum .= substr($css, $i, $c);
10293            $i += $c;
10294            if ($i == $len) break;
10295            $d = $css[$i];
10296            if ($quoted) {
10297                $accum .= $d;
10298                if ($d == $quoted) {
10299                    $quoted = false;
10300                }
10301            } else {
10302                if ($d == ";") {
10303                    $declarations[] = $accum;
10304                    $accum = "";
10305                } else {
10306                    $accum .= $d;
10307                    $quoted = $d;
10308                }
10309            }
10310        }
10311        if ($accum != "") $declarations[] = $accum;
10312
10313        $propvalues = array();
10314        $new_declarations = '';
10315
10316        /**
10317         * Name of the current CSS property being validated.
10318         */
10319        $property = false;
10320        $context->register('CurrentCSSProperty', $property);
10321
10322        foreach ($declarations as $declaration) {
10323            if (!$declaration) {
10324                continue;
10325            }
10326            if (!strpos($declaration, ':')) {
10327                continue;
10328            }
10329            list($property, $value) = explode(':', $declaration, 2);
10330            $property = trim($property);
10331            $value = trim($value);
10332            $ok = false;
10333            do {
10334                if (isset($definition->info[$property])) {
10335                    $ok = true;
10336                    break;
10337                }
10338                if (ctype_lower($property)) {
10339                    break;
10340                }
10341                $property = strtolower($property);
10342                if (isset($definition->info[$property])) {
10343                    $ok = true;
10344                    break;
10345                }
10346            } while (0);
10347            if (!$ok) {
10348                continue;
10349            }
10350            // inefficient call, since the validator will do this again
10351            if (strtolower(trim($value)) !== 'inherit') {
10352                // inherit works for everything (but only on the base property)
10353                $result = $definition->info[$property]->validate(
10354                    $value,
10355                    $config,
10356                    $context
10357                );
10358            } else {
10359                $result = 'inherit';
10360            }
10361            if ($result === false) {
10362                continue;
10363            }
10364            if ($allow_duplicates) {
10365                $new_declarations .= "$property:$result;";
10366            } else {
10367                $propvalues[$property] = $result;
10368            }
10369        }
10370
10371        $context->destroy('CurrentCSSProperty');
10372
10373        // procedure does not write the new CSS simultaneously, so it's
10374        // slightly inefficient, but it's the only way of getting rid of
10375        // duplicates. Perhaps config to optimize it, but not now.
10376
10377        foreach ($propvalues as $prop => $value) {
10378            $new_declarations .= "$prop:$value;";
10379        }
10380
10381        return $new_declarations ? $new_declarations : false;
10382
10383    }
10384
10385}
10386
10387
10388
10389
10390
10391/**
10392 * Dummy AttrDef that mimics another AttrDef, BUT it generates clones
10393 * with make.
10394 */
10395class HTMLPurifier_AttrDef_Clone extends HTMLPurifier_AttrDef
10396{
10397    /**
10398     * What we're cloning.
10399     * @type HTMLPurifier_AttrDef
10400     */
10401    protected $clone;
10402
10403    /**
10404     * @param HTMLPurifier_AttrDef $clone
10405     */
10406    public function __construct($clone)
10407    {
10408        $this->clone = $clone;
10409    }
10410
10411    /**
10412     * @param string $v
10413     * @param HTMLPurifier_Config $config
10414     * @param HTMLPurifier_Context $context
10415     * @return bool|string
10416     */
10417    public function validate($v, $config, $context)
10418    {
10419        return $this->clone->validate($v, $config, $context);
10420    }
10421
10422    /**
10423     * @param string $string
10424     * @return HTMLPurifier_AttrDef
10425     */
10426    public function make($string)
10427    {
10428        return clone $this->clone;
10429    }
10430}
10431
10432
10433
10434
10435
10436// Enum = Enumerated
10437/**
10438 * Validates a keyword against a list of valid values.
10439 * @warning The case-insensitive compare of this function uses PHP's
10440 *          built-in strtolower and ctype_lower functions, which may
10441 *          cause problems with international comparisons
10442 */
10443class HTMLPurifier_AttrDef_Enum extends HTMLPurifier_AttrDef
10444{
10445
10446    /**
10447     * Lookup table of valid values.
10448     * @type array
10449     * @todo Make protected
10450     */
10451    public $valid_values = array();
10452
10453    /**
10454     * Bool indicating whether or not enumeration is case sensitive.
10455     * @note In general this is always case insensitive.
10456     */
10457    protected $case_sensitive = false; // values according to W3C spec
10458
10459    /**
10460     * @param array $valid_values List of valid values
10461     * @param bool $case_sensitive Whether or not case sensitive
10462     */
10463    public function __construct($valid_values = array(), $case_sensitive = false)
10464    {
10465        $this->valid_values = array_flip($valid_values);
10466        $this->case_sensitive = $case_sensitive;
10467    }
10468
10469    /**
10470     * @param string $string
10471     * @param HTMLPurifier_Config $config
10472     * @param HTMLPurifier_Context $context
10473     * @return bool|string
10474     */
10475    public function validate($string, $config, $context)
10476    {
10477        $string = trim($string);
10478        if (!$this->case_sensitive) {
10479            // we may want to do full case-insensitive libraries
10480            $string = ctype_lower($string) ? $string : strtolower($string);
10481        }
10482        $result = isset($this->valid_values[$string]);
10483
10484        return $result ? $string : false;
10485    }
10486
10487    /**
10488     * @param string $string In form of comma-delimited list of case-insensitive
10489     *      valid values. Example: "foo,bar,baz". Prepend "s:" to make
10490     *      case sensitive
10491     * @return HTMLPurifier_AttrDef_Enum
10492     */
10493    public function make($string)
10494    {
10495        if (strlen($string) > 2 && $string[0] == 's' && $string[1] == ':') {
10496            $string = substr($string, 2);
10497            $sensitive = true;
10498        } else {
10499            $sensitive = false;
10500        }
10501        $values = explode(',', $string);
10502        return new HTMLPurifier_AttrDef_Enum($values, $sensitive);
10503    }
10504}
10505
10506
10507
10508
10509
10510/**
10511 * Validates an integer.
10512 * @note While this class was modeled off the CSS definition, no currently
10513 *       allowed CSS uses this type.  The properties that do are: widows,
10514 *       orphans, z-index, counter-increment, counter-reset.  Some of the
10515 *       HTML attributes, however, find use for a non-negative version of this.
10516 */
10517class HTMLPurifier_AttrDef_Integer extends HTMLPurifier_AttrDef
10518{
10519
10520    /**
10521     * Whether or not negative values are allowed.
10522     * @type bool
10523     */
10524    protected $negative = true;
10525
10526    /**
10527     * Whether or not zero is allowed.
10528     * @type bool
10529     */
10530    protected $zero = true;
10531
10532    /**
10533     * Whether or not positive values are allowed.
10534     * @type bool
10535     */
10536    protected $positive = true;
10537
10538    /**
10539     * @param $negative Bool indicating whether or not negative values are allowed
10540     * @param $zero Bool indicating whether or not zero is allowed
10541     * @param $positive Bool indicating whether or not positive values are allowed
10542     */
10543    public function __construct($negative = true, $zero = true, $positive = true)
10544    {
10545        $this->negative = $negative;
10546        $this->zero = $zero;
10547        $this->positive = $positive;
10548    }
10549
10550    /**
10551     * @param string $integer
10552     * @param HTMLPurifier_Config $config
10553     * @param HTMLPurifier_Context $context
10554     * @return bool|string
10555     */
10556    public function validate($integer, $config, $context)
10557    {
10558        $integer = $this->parseCDATA($integer);
10559        if ($integer === '') {
10560            return false;
10561        }
10562
10563        // we could possibly simply typecast it to integer, but there are
10564        // certain fringe cases that must not return an integer.
10565
10566        // clip leading sign
10567        if ($this->negative && $integer[0] === '-') {
10568            $digits = substr($integer, 1);
10569            if ($digits === '0') {
10570                $integer = '0';
10571            } // rm minus sign for zero
10572        } elseif ($this->positive && $integer[0] === '+') {
10573            $digits = $integer = substr($integer, 1); // rm unnecessary plus
10574        } else {
10575            $digits = $integer;
10576        }
10577
10578        // test if it's numeric
10579        if (!ctype_digit($digits)) {
10580            return false;
10581        }
10582
10583        // perform scope tests
10584        if (!$this->zero && $integer == 0) {
10585            return false;
10586        }
10587        if (!$this->positive && $integer > 0) {
10588            return false;
10589        }
10590        if (!$this->negative && $integer < 0) {
10591            return false;
10592        }
10593
10594        return $integer;
10595    }
10596}
10597
10598
10599
10600
10601
10602/**
10603 * Validates the HTML attribute lang, effectively a language code.
10604 * @note Built according to RFC 3066, which obsoleted RFC 1766
10605 */
10606class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef
10607{
10608
10609    /**
10610     * @param string $string
10611     * @param HTMLPurifier_Config $config
10612     * @param HTMLPurifier_Context $context
10613     * @return bool|string
10614     */
10615    public function validate($string, $config, $context)
10616    {
10617        $string = trim($string);
10618        if (!$string) {
10619            return false;
10620        }
10621
10622        $subtags = explode('-', $string);
10623        $num_subtags = count($subtags);
10624
10625        if ($num_subtags == 0) { // sanity check
10626            return false;
10627        }
10628
10629        // process primary subtag : $subtags[0]
10630        $length = strlen($subtags[0]);
10631        switch ($length) {
10632            case 0:
10633                return false;
10634            case 1:
10635                if (!($subtags[0] == 'x' || $subtags[0] == 'i')) {
10636                    return false;
10637                }
10638                break;
10639            case 2:
10640            case 3:
10641                if (!ctype_alpha($subtags[0])) {
10642                    return false;
10643                } elseif (!ctype_lower($subtags[0])) {
10644                    $subtags[0] = strtolower($subtags[0]);
10645                }
10646                break;
10647            default:
10648                return false;
10649        }
10650
10651        $new_string = $subtags[0];
10652        if ($num_subtags == 1) {
10653            return $new_string;
10654        }
10655
10656        // process second subtag : $subtags[1]
10657        $length = strlen($subtags[1]);
10658        if ($length == 0 || ($length == 1 && $subtags[1] != 'x') || $length > 8 || !ctype_alnum($subtags[1])) {
10659            return $new_string;
10660        }
10661        if (!ctype_lower($subtags[1])) {
10662            $subtags[1] = strtolower($subtags[1]);
10663        }
10664
10665        $new_string .= '-' . $subtags[1];
10666        if ($num_subtags == 2) {
10667            return $new_string;
10668        }
10669
10670        // process all other subtags, index 2 and up
10671        for ($i = 2; $i < $num_subtags; $i++) {
10672            $length = strlen($subtags[$i]);
10673            if ($length == 0 || $length > 8 || !ctype_alnum($subtags[$i])) {
10674                return $new_string;
10675            }
10676            if (!ctype_lower($subtags[$i])) {
10677                $subtags[$i] = strtolower($subtags[$i]);
10678            }
10679            $new_string .= '-' . $subtags[$i];
10680        }
10681        return $new_string;
10682    }
10683}
10684
10685
10686
10687
10688
10689/**
10690 * Decorator that, depending on a token, switches between two definitions.
10691 */
10692class HTMLPurifier_AttrDef_Switch
10693{
10694
10695    /**
10696     * @type string
10697     */
10698    protected $tag;
10699
10700    /**
10701     * @type HTMLPurifier_AttrDef
10702     */
10703    protected $withTag;
10704
10705    /**
10706     * @type HTMLPurifier_AttrDef
10707     */
10708    protected $withoutTag;
10709
10710    /**
10711     * @param string $tag Tag name to switch upon
10712     * @param HTMLPurifier_AttrDef $with_tag Call if token matches tag
10713     * @param HTMLPurifier_AttrDef $without_tag Call if token doesn't match, or there is no token
10714     */
10715    public function __construct($tag, $with_tag, $without_tag)
10716    {
10717        $this->tag = $tag;
10718        $this->withTag = $with_tag;
10719        $this->withoutTag = $without_tag;
10720    }
10721
10722    /**
10723     * @param string $string
10724     * @param HTMLPurifier_Config $config
10725     * @param HTMLPurifier_Context $context
10726     * @return bool|string
10727     */
10728    public function validate($string, $config, $context)
10729    {
10730        $token = $context->get('CurrentToken', true);
10731        if (!$token || $token->name !== $this->tag) {
10732            return $this->withoutTag->validate($string, $config, $context);
10733        } else {
10734            return $this->withTag->validate($string, $config, $context);
10735        }
10736    }
10737}
10738
10739
10740
10741
10742
10743/**
10744 * Validates arbitrary text according to the HTML spec.
10745 */
10746class HTMLPurifier_AttrDef_Text extends HTMLPurifier_AttrDef
10747{
10748
10749    /**
10750     * @param string $string
10751     * @param HTMLPurifier_Config $config
10752     * @param HTMLPurifier_Context $context
10753     * @return bool|string
10754     */
10755    public function validate($string, $config, $context)
10756    {
10757        return $this->parseCDATA($string);
10758    }
10759}
10760
10761
10762
10763
10764
10765/**
10766 * Validates a URI as defined by RFC 3986.
10767 * @note Scheme-specific mechanics deferred to HTMLPurifier_URIScheme
10768 */
10769class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
10770{
10771
10772    /**
10773     * @type HTMLPurifier_URIParser
10774     */
10775    protected $parser;
10776
10777    /**
10778     * @type bool
10779     */
10780    protected $embedsResource;
10781
10782    /**
10783     * @param bool $embeds_resource Does the URI here result in an extra HTTP request?
10784     */
10785    public function __construct($embeds_resource = false)
10786    {
10787        $this->parser = new HTMLPurifier_URIParser();
10788        $this->embedsResource = (bool)$embeds_resource;
10789    }
10790
10791    /**
10792     * @param string $string
10793     * @return HTMLPurifier_AttrDef_URI
10794     */
10795    public function make($string)
10796    {
10797        $embeds = ($string === 'embedded');
10798        return new HTMLPurifier_AttrDef_URI($embeds);
10799    }
10800
10801    /**
10802     * @param string $uri
10803     * @param HTMLPurifier_Config $config
10804     * @param HTMLPurifier_Context $context
10805     * @return bool|string
10806     */
10807    public function validate($uri, $config, $context)
10808    {
10809        if ($config->get('URI.Disable')) {
10810            return false;
10811        }
10812
10813        $uri = $this->parseCDATA($uri);
10814
10815        // parse the URI
10816        $uri = $this->parser->parse($uri);
10817        if ($uri === false) {
10818            return false;
10819        }
10820
10821        // add embedded flag to context for validators
10822        $context->register('EmbeddedURI', $this->embedsResource);
10823
10824        $ok = false;
10825        do {
10826
10827            // generic validation
10828            $result = $uri->validate($config, $context);
10829            if (!$result) {
10830                break;
10831            }
10832
10833            // chained filtering
10834            $uri_def = $config->getDefinition('URI');
10835            $result = $uri_def->filter($uri, $config, $context);
10836            if (!$result) {
10837                break;
10838            }
10839
10840            // scheme-specific validation
10841            $scheme_obj = $uri->getSchemeObj($config, $context);
10842            if (!$scheme_obj) {
10843                break;
10844            }
10845            if ($this->embedsResource && !$scheme_obj->browsable) {
10846                break;
10847            }
10848            $result = $scheme_obj->validate($uri, $config, $context);
10849            if (!$result) {
10850                break;
10851            }
10852
10853            // Post chained filtering
10854            $result = $uri_def->postFilter($uri, $config, $context);
10855            if (!$result) {
10856                break;
10857            }
10858
10859            // survived gauntlet
10860            $ok = true;
10861
10862        } while (false);
10863
10864        $context->destroy('EmbeddedURI');
10865        if (!$ok) {
10866            return false;
10867        }
10868        // back to string
10869        return $uri->toString();
10870    }
10871}
10872
10873
10874
10875
10876
10877/**
10878 * Validates a number as defined by the CSS spec.
10879 */
10880class HTMLPurifier_AttrDef_CSS_Number extends HTMLPurifier_AttrDef
10881{
10882
10883    /**
10884     * Indicates whether or not only positive values are allowed.
10885     * @type bool
10886     */
10887    protected $non_negative = false;
10888
10889    /**
10890     * @param bool $non_negative indicates whether negatives are forbidden
10891     */
10892    public function __construct($non_negative = false)
10893    {
10894        $this->non_negative = $non_negative;
10895    }
10896
10897    /**
10898     * @param string $number
10899     * @param HTMLPurifier_Config $config
10900     * @param HTMLPurifier_Context $context
10901     * @return string|bool
10902     * @warning Some contexts do not pass $config, $context. These
10903     *          variables should not be used without checking HTMLPurifier_Length
10904     */
10905    public function validate($number, $config, $context)
10906    {
10907        $number = $this->parseCDATA($number);
10908
10909        if ($number === '') {
10910            return false;
10911        }
10912        if ($number === '0') {
10913            return '0';
10914        }
10915
10916        $sign = '';
10917        switch ($number[0]) {
10918            case '-':
10919                if ($this->non_negative) {
10920                    return false;
10921                }
10922                $sign = '-';
10923            case '+':
10924                $number = substr($number, 1);
10925        }
10926
10927        if (ctype_digit($number)) {
10928            $number = ltrim($number, '0');
10929            return $number ? $sign . $number : '0';
10930        }
10931
10932        // Period is the only non-numeric character allowed
10933        if (strpos($number, '.') === false) {
10934            return false;
10935        }
10936
10937        list($left, $right) = explode('.', $number, 2);
10938
10939        if ($left === '' && $right === '') {
10940            return false;
10941        }
10942        if ($left !== '' && !ctype_digit($left)) {
10943            return false;
10944        }
10945
10946        $left = ltrim($left, '0');
10947        $right = rtrim($right, '0');
10948
10949        if ($right === '') {
10950            return $left ? $sign . $left : '0';
10951        } elseif (!ctype_digit($right)) {
10952            return false;
10953        }
10954        return $sign . $left . '.' . $right;
10955    }
10956}
10957
10958
10959
10960
10961
10962class HTMLPurifier_AttrDef_CSS_AlphaValue extends HTMLPurifier_AttrDef_CSS_Number
10963{
10964
10965    public function __construct()
10966    {
10967        parent::__construct(false); // opacity is non-negative, but we will clamp it
10968    }
10969
10970    /**
10971     * @param string $number
10972     * @param HTMLPurifier_Config $config
10973     * @param HTMLPurifier_Context $context
10974     * @return string
10975     */
10976    public function validate($number, $config, $context)
10977    {
10978        $result = parent::validate($number, $config, $context);
10979        if ($result === false) {
10980            return $result;
10981        }
10982        $float = (float)$result;
10983        if ($float < 0.0) {
10984            $result = '0';
10985        }
10986        if ($float > 1.0) {
10987            $result = '1';
10988        }
10989        return $result;
10990    }
10991}
10992
10993
10994
10995
10996
10997/**
10998 * Validates shorthand CSS property background.
10999 * @warning Does not support url tokens that have internal spaces.
11000 */
11001class HTMLPurifier_AttrDef_CSS_Background extends HTMLPurifier_AttrDef
11002{
11003
11004    /**
11005     * Local copy of component validators.
11006     * @type HTMLPurifier_AttrDef[]
11007     * @note See HTMLPurifier_AttrDef_Font::$info for a similar impl.
11008     */
11009    protected $info;
11010
11011    /**
11012     * @param HTMLPurifier_Config $config
11013     */
11014    public function __construct($config)
11015    {
11016        $def = $config->getCSSDefinition();
11017        $this->info['background-color'] = $def->info['background-color'];
11018        $this->info['background-image'] = $def->info['background-image'];
11019        $this->info['background-repeat'] = $def->info['background-repeat'];
11020        $this->info['background-attachment'] = $def->info['background-attachment'];
11021        $this->info['background-position'] = $def->info['background-position'];
11022    }
11023
11024    /**
11025     * @param string $string
11026     * @param HTMLPurifier_Config $config
11027     * @param HTMLPurifier_Context $context
11028     * @return bool|string
11029     */
11030    public function validate($string, $config, $context)
11031    {
11032        // regular pre-processing
11033        $string = $this->parseCDATA($string);
11034        if ($string === '') {
11035            return false;
11036        }
11037
11038        // munge rgb() decl if necessary
11039        $string = $this->mungeRgb($string);
11040
11041        // assumes URI doesn't have spaces in it
11042        $bits = explode(' ', $string); // bits to process
11043
11044        $caught = array();
11045        $caught['color'] = false;
11046        $caught['image'] = false;
11047        $caught['repeat'] = false;
11048        $caught['attachment'] = false;
11049        $caught['position'] = false;
11050
11051        $i = 0; // number of catches
11052
11053        foreach ($bits as $bit) {
11054            if ($bit === '') {
11055                continue;
11056            }
11057            foreach ($caught as $key => $status) {
11058                if ($key != 'position') {
11059                    if ($status !== false) {
11060                        continue;
11061                    }
11062                    $r = $this->info['background-' . $key]->validate($bit, $config, $context);
11063                } else {
11064                    $r = $bit;
11065                }
11066                if ($r === false) {
11067                    continue;
11068                }
11069                if ($key == 'position') {
11070                    if ($caught[$key] === false) {
11071                        $caught[$key] = '';
11072                    }
11073                    $caught[$key] .= $r . ' ';
11074                } else {
11075                    $caught[$key] = $r;
11076                }
11077                $i++;
11078                break;
11079            }
11080        }
11081
11082        if (!$i) {
11083            return false;
11084        }
11085        if ($caught['position'] !== false) {
11086            $caught['position'] = $this->info['background-position']->
11087            validate($caught['position'], $config, $context);
11088        }
11089
11090        $ret = array();
11091        foreach ($caught as $value) {
11092            if ($value === false) {
11093                continue;
11094            }
11095            $ret[] = $value;
11096        }
11097
11098        if (empty($ret)) {
11099            return false;
11100        }
11101        return implode(' ', $ret);
11102    }
11103}
11104
11105
11106
11107
11108
11109/* W3C says:
11110    [ // adjective and number must be in correct order, even if
11111      // you could switch them without introducing ambiguity.
11112      // some browsers support that syntax
11113        [
11114            <percentage> | <length> | left | center | right
11115        ]
11116        [
11117            <percentage> | <length> | top | center | bottom
11118        ]?
11119    ] |
11120    [ // this signifies that the vertical and horizontal adjectives
11121      // can be arbitrarily ordered, however, there can only be two,
11122      // one of each, or none at all
11123        [
11124            left | center | right
11125        ] ||
11126        [
11127            top | center | bottom
11128        ]
11129    ]
11130    top, left = 0%
11131    center, (none) = 50%
11132    bottom, right = 100%
11133*/
11134
11135/* QuirksMode says:
11136    keyword + length/percentage must be ordered correctly, as per W3C
11137
11138    Internet Explorer and Opera, however, support arbitrary ordering. We
11139    should fix it up.
11140
11141    Minor issue though, not strictly necessary.
11142*/
11143
11144// control freaks may appreciate the ability to convert these to
11145// percentages or something, but it's not necessary
11146
11147/**
11148 * Validates the value of background-position.
11149 */
11150class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
11151{
11152
11153    /**
11154     * @type HTMLPurifier_AttrDef_CSS_Length
11155     */
11156    protected $length;
11157
11158    /**
11159     * @type HTMLPurifier_AttrDef_CSS_Percentage
11160     */
11161    protected $percentage;
11162
11163    public function __construct()
11164    {
11165        $this->length = new HTMLPurifier_AttrDef_CSS_Length();
11166        $this->percentage = new HTMLPurifier_AttrDef_CSS_Percentage();
11167    }
11168
11169    /**
11170     * @param string $string
11171     * @param HTMLPurifier_Config $config
11172     * @param HTMLPurifier_Context $context
11173     * @return bool|string
11174     */
11175    public function validate($string, $config, $context)
11176    {
11177        $string = $this->parseCDATA($string);
11178        $bits = explode(' ', $string);
11179
11180        $keywords = array();
11181        $keywords['h'] = false; // left, right
11182        $keywords['v'] = false; // top, bottom
11183        $keywords['ch'] = false; // center (first word)
11184        $keywords['cv'] = false; // center (second word)
11185        $measures = array();
11186
11187        $i = 0;
11188
11189        $lookup = array(
11190            'top' => 'v',
11191            'bottom' => 'v',
11192            'left' => 'h',
11193            'right' => 'h',
11194            'center' => 'c'
11195        );
11196
11197        foreach ($bits as $bit) {
11198            if ($bit === '') {
11199                continue;
11200            }
11201
11202            // test for keyword
11203            $lbit = ctype_lower($bit) ? $bit : strtolower($bit);
11204            if (isset($lookup[$lbit])) {
11205                $status = $lookup[$lbit];
11206                if ($status == 'c') {
11207                    if ($i == 0) {
11208                        $status = 'ch';
11209                    } else {
11210                        $status = 'cv';
11211                    }
11212                }
11213                $keywords[$status] = $lbit;
11214                $i++;
11215            }
11216
11217            // test for length
11218            $r = $this->length->validate($bit, $config, $context);
11219            if ($r !== false) {
11220                $measures[] = $r;
11221                $i++;
11222            }
11223
11224            // test for percentage
11225            $r = $this->percentage->validate($bit, $config, $context);
11226            if ($r !== false) {
11227                $measures[] = $r;
11228                $i++;
11229            }
11230        }
11231
11232        if (!$i) {
11233            return false;
11234        } // no valid values were caught
11235
11236        $ret = array();
11237
11238        // first keyword
11239        if ($keywords['h']) {
11240            $ret[] = $keywords['h'];
11241        } elseif ($keywords['ch']) {
11242            $ret[] = $keywords['ch'];
11243            $keywords['cv'] = false; // prevent re-use: center = center center
11244        } elseif (count($measures)) {
11245            $ret[] = array_shift($measures);
11246        }
11247
11248        if ($keywords['v']) {
11249            $ret[] = $keywords['v'];
11250        } elseif ($keywords['cv']) {
11251            $ret[] = $keywords['cv'];
11252        } elseif (count($measures)) {
11253            $ret[] = array_shift($measures);
11254        }
11255
11256        if (empty($ret)) {
11257            return false;
11258        }
11259        return implode(' ', $ret);
11260    }
11261}
11262
11263
11264
11265
11266
11267/**
11268 * Validates the border property as defined by CSS.
11269 */
11270class HTMLPurifier_AttrDef_CSS_Border extends HTMLPurifier_AttrDef
11271{
11272
11273    /**
11274     * Local copy of properties this property is shorthand for.
11275     * @type HTMLPurifier_AttrDef[]
11276     */
11277    protected $info = array();
11278
11279    /**
11280     * @param HTMLPurifier_Config $config
11281     */
11282    public function __construct($config)
11283    {
11284        $def = $config->getCSSDefinition();
11285        $this->info['border-width'] = $def->info['border-width'];
11286        $this->info['border-style'] = $def->info['border-style'];
11287        $this->info['border-top-color'] = $def->info['border-top-color'];
11288    }
11289
11290    /**
11291     * @param string $string
11292     * @param HTMLPurifier_Config $config
11293     * @param HTMLPurifier_Context $context
11294     * @return bool|string
11295     */
11296    public function validate($string, $config, $context)
11297    {
11298        $string = $this->parseCDATA($string);
11299        $string = $this->mungeRgb($string);
11300        $bits = explode(' ', $string);
11301        $done = array(); // segments we've finished
11302        $ret = ''; // return value
11303        foreach ($bits as $bit) {
11304            foreach ($this->info as $propname => $validator) {
11305                if (isset($done[$propname])) {
11306                    continue;
11307                }
11308                $r = $validator->validate($bit, $config, $context);
11309                if ($r !== false) {
11310                    $ret .= $r . ' ';
11311                    $done[$propname] = true;
11312                    break;
11313                }
11314            }
11315        }
11316        return rtrim($ret);
11317    }
11318}
11319
11320
11321
11322
11323
11324/**
11325 * Validates Color as defined by CSS.
11326 */
11327class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
11328{
11329
11330    /**
11331     * @type HTMLPurifier_AttrDef_CSS_AlphaValue
11332     */
11333    protected $alpha;
11334
11335    public function __construct()
11336    {
11337        $this->alpha = new HTMLPurifier_AttrDef_CSS_AlphaValue();
11338    }
11339
11340    /**
11341     * @param string $color
11342     * @param HTMLPurifier_Config $config
11343     * @param HTMLPurifier_Context $context
11344     * @return bool|string
11345     */
11346    public function validate($color, $config, $context)
11347    {
11348        static $colors = null;
11349        if ($colors === null) {
11350            $colors = $config->get('Core.ColorKeywords');
11351        }
11352
11353        $color = trim($color);
11354        if ($color === '') {
11355            return false;
11356        }
11357
11358        $lower = strtolower($color);
11359        if (isset($colors[$lower])) {
11360            return $colors[$lower];
11361        }
11362
11363        if (preg_match('#(rgb|rgba|hsl|hsla)\(#', $color, $matches) === 1) {
11364            $length = strlen($color);
11365            if (strpos($color, ')') !== $length - 1) {
11366                return false;
11367            }
11368
11369            // get used function : rgb, rgba, hsl or hsla
11370            $function = $matches[1];
11371
11372            $parameters_size = 3;
11373            $alpha_channel = false;
11374            if (substr($function, -1) === 'a') {
11375                $parameters_size = 4;
11376                $alpha_channel = true;
11377            }
11378
11379            /*
11380             * Allowed types for values :
11381             * parameter_position => [type => max_value]
11382             */
11383            $allowed_types = array(
11384                1 => array('percentage' => 100, 'integer' => 255),
11385                2 => array('percentage' => 100, 'integer' => 255),
11386                3 => array('percentage' => 100, 'integer' => 255),
11387            );
11388            $allow_different_types = false;
11389
11390            if (strpos($function, 'hsl') !== false) {
11391                $allowed_types = array(
11392                    1 => array('integer' => 360),
11393                    2 => array('percentage' => 100),
11394                    3 => array('percentage' => 100),
11395                );
11396                $allow_different_types = true;
11397            }
11398
11399            $values = trim(str_replace($function, '', $color), ' ()');
11400
11401            $parts = explode(',', $values);
11402            if (count($parts) !== $parameters_size) {
11403                return false;
11404            }
11405
11406            $type = false;
11407            $new_parts = array();
11408            $i = 0;
11409
11410            foreach ($parts as $part) {
11411                $i++;
11412                $part = trim($part);
11413
11414                if ($part === '') {
11415                    return false;
11416                }
11417
11418                // different check for alpha channel
11419                if ($alpha_channel === true && $i === count($parts)) {
11420                    $result = $this->alpha->validate($part, $config, $context);
11421
11422                    if ($result === false) {
11423                        return false;
11424                    }
11425
11426                    $new_parts[] = (string)$result;
11427                    continue;
11428                }
11429
11430                if (substr($part, -1) === '%') {
11431                    $current_type = 'percentage';
11432                } else {
11433                    $current_type = 'integer';
11434                }
11435
11436                if (!array_key_exists($current_type, $allowed_types[$i])) {
11437                    return false;
11438                }
11439
11440                if (!$type) {
11441                    $type = $current_type;
11442                }
11443
11444                if ($allow_different_types === false && $type != $current_type) {
11445                    return false;
11446                }
11447
11448                $max_value = $allowed_types[$i][$current_type];
11449
11450                if ($current_type == 'integer') {
11451                    // Return value between range 0 -> $max_value
11452                    $new_parts[] = (int)max(min($part, $max_value), 0);
11453                } elseif ($current_type == 'percentage') {
11454                    $new_parts[] = (float)max(min(rtrim($part, '%'), $max_value), 0) . '%';
11455                }
11456            }
11457
11458            $new_values = implode(',', $new_parts);
11459
11460            $color = $function . '(' . $new_values . ')';
11461        } else {
11462            // hexadecimal handling
11463            if ($color[0] === '#') {
11464                $hex = substr($color, 1);
11465            } else {
11466                $hex = $color;
11467                $color = '#' . $color;
11468            }
11469            $length = strlen($hex);
11470            if ($length !== 3 && $length !== 6) {
11471                return false;
11472            }
11473            if (!ctype_xdigit($hex)) {
11474                return false;
11475            }
11476        }
11477        return $color;
11478    }
11479
11480}
11481
11482
11483
11484
11485
11486/**
11487 * Allows multiple validators to attempt to validate attribute.
11488 *
11489 * Composite is just what it sounds like: a composite of many validators.
11490 * This means that multiple HTMLPurifier_AttrDef objects will have a whack
11491 * at the string.  If one of them passes, that's what is returned.  This is
11492 * especially useful for CSS values, which often are a choice between
11493 * an enumerated set of predefined values or a flexible data type.
11494 */
11495class HTMLPurifier_AttrDef_CSS_Composite extends HTMLPurifier_AttrDef
11496{
11497
11498    /**
11499     * List of objects that may process strings.
11500     * @type HTMLPurifier_AttrDef[]
11501     * @todo Make protected
11502     */
11503    public $defs;
11504
11505    /**
11506     * @param HTMLPurifier_AttrDef[] $defs List of HTMLPurifier_AttrDef objects
11507     */
11508    public function __construct($defs)
11509    {
11510        $this->defs = $defs;
11511    }
11512
11513    /**
11514     * @param string $string
11515     * @param HTMLPurifier_Config $config
11516     * @param HTMLPurifier_Context $context
11517     * @return bool|string
11518     */
11519    public function validate($string, $config, $context)
11520    {
11521        foreach ($this->defs as $i => $def) {
11522            $result = $this->defs[$i]->validate($string, $config, $context);
11523            if ($result !== false) {
11524                return $result;
11525            }
11526        }
11527        return false;
11528    }
11529}
11530
11531
11532
11533
11534
11535/**
11536 * Decorator which enables CSS properties to be disabled for specific elements.
11537 */
11538class HTMLPurifier_AttrDef_CSS_DenyElementDecorator extends HTMLPurifier_AttrDef
11539{
11540    /**
11541     * @type HTMLPurifier_AttrDef
11542     */
11543    public $def;
11544    /**
11545     * @type string
11546     */
11547    public $element;
11548
11549    /**
11550     * @param HTMLPurifier_AttrDef $def Definition to wrap
11551     * @param string $element Element to deny
11552     */
11553    public function __construct($def, $element)
11554    {
11555        $this->def = $def;
11556        $this->element = $element;
11557    }
11558
11559    /**
11560     * Checks if CurrentToken is set and equal to $this->element
11561     * @param string $string
11562     * @param HTMLPurifier_Config $config
11563     * @param HTMLPurifier_Context $context
11564     * @return bool|string
11565     */
11566    public function validate($string, $config, $context)
11567    {
11568        $token = $context->get('CurrentToken', true);
11569        if ($token && $token->name == $this->element) {
11570            return false;
11571        }
11572        return $this->def->validate($string, $config, $context);
11573    }
11574}
11575
11576
11577
11578
11579
11580/**
11581 * Microsoft's proprietary filter: CSS property
11582 * @note Currently supports the alpha filter. In the future, this will
11583 *       probably need an extensible framework
11584 */
11585class HTMLPurifier_AttrDef_CSS_Filter extends HTMLPurifier_AttrDef
11586{
11587    /**
11588     * @type HTMLPurifier_AttrDef_Integer
11589     */
11590    protected $intValidator;
11591
11592    public function __construct()
11593    {
11594        $this->intValidator = new HTMLPurifier_AttrDef_Integer();
11595    }
11596
11597    /**
11598     * @param string $value
11599     * @param HTMLPurifier_Config $config
11600     * @param HTMLPurifier_Context $context
11601     * @return bool|string
11602     */
11603    public function validate($value, $config, $context)
11604    {
11605        $value = $this->parseCDATA($value);
11606        if ($value === 'none') {
11607            return $value;
11608        }
11609        // if we looped this we could support multiple filters
11610        $function_length = strcspn($value, '(');
11611        $function = trim(substr($value, 0, $function_length));
11612        if ($function !== 'alpha' &&
11613            $function !== 'Alpha' &&
11614            $function !== 'progid:DXImageTransform.Microsoft.Alpha'
11615        ) {
11616            return false;
11617        }
11618        $cursor = $function_length + 1;
11619        $parameters_length = strcspn($value, ')', $cursor);
11620        $parameters = substr($value, $cursor, $parameters_length);
11621        $params = explode(',', $parameters);
11622        $ret_params = array();
11623        $lookup = array();
11624        foreach ($params as $param) {
11625            list($key, $value) = explode('=', $param);
11626            $key = trim($key);
11627            $value = trim($value);
11628            if (isset($lookup[$key])) {
11629                continue;
11630            }
11631            if ($key !== 'opacity') {
11632                continue;
11633            }
11634            $value = $this->intValidator->validate($value, $config, $context);
11635            if ($value === false) {
11636                continue;
11637            }
11638            $int = (int)$value;
11639            if ($int > 100) {
11640                $value = '100';
11641            }
11642            if ($int < 0) {
11643                $value = '0';
11644            }
11645            $ret_params[] = "$key=$value";
11646            $lookup[$key] = true;
11647        }
11648        $ret_parameters = implode(',', $ret_params);
11649        $ret_function = "$function($ret_parameters)";
11650        return $ret_function;
11651    }
11652}
11653
11654
11655
11656
11657
11658/**
11659 * Validates shorthand CSS property font.
11660 */
11661class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef
11662{
11663
11664    /**
11665     * Local copy of validators
11666     * @type HTMLPurifier_AttrDef[]
11667     * @note If we moved specific CSS property definitions to their own
11668     *       classes instead of having them be assembled at run time by
11669     *       CSSDefinition, this wouldn't be necessary.  We'd instantiate
11670     *       our own copies.
11671     */
11672    protected $info = array();
11673
11674    /**
11675     * @param HTMLPurifier_Config $config
11676     */
11677    public function __construct($config)
11678    {
11679        $def = $config->getCSSDefinition();
11680        $this->info['font-style'] = $def->info['font-style'];
11681        $this->info['font-variant'] = $def->info['font-variant'];
11682        $this->info['font-weight'] = $def->info['font-weight'];
11683        $this->info['font-size'] = $def->info['font-size'];
11684        $this->info['line-height'] = $def->info['line-height'];
11685        $this->info['font-family'] = $def->info['font-family'];
11686    }
11687
11688    /**
11689     * @param string $string
11690     * @param HTMLPurifier_Config $config
11691     * @param HTMLPurifier_Context $context
11692     * @return bool|string
11693     */
11694    public function validate($string, $config, $context)
11695    {
11696        static $system_fonts = array(
11697            'caption' => true,
11698            'icon' => true,
11699            'menu' => true,
11700            'message-box' => true,
11701            'small-caption' => true,
11702            'status-bar' => true
11703        );
11704
11705        // regular pre-processing
11706        $string = $this->parseCDATA($string);
11707        if ($string === '') {
11708            return false;
11709        }
11710
11711        // check if it's one of the keywords
11712        $lowercase_string = strtolower($string);
11713        if (isset($system_fonts[$lowercase_string])) {
11714            return $lowercase_string;
11715        }
11716
11717        $bits = explode(' ', $string); // bits to process
11718        $stage = 0; // this indicates what we're looking for
11719        $caught = array(); // which stage 0 properties have we caught?
11720        $stage_1 = array('font-style', 'font-variant', 'font-weight');
11721        $final = ''; // output
11722
11723        for ($i = 0, $size = count($bits); $i < $size; $i++) {
11724            if ($bits[$i] === '') {
11725                continue;
11726            }
11727            switch ($stage) {
11728                case 0: // attempting to catch font-style, font-variant or font-weight
11729                    foreach ($stage_1 as $validator_name) {
11730                        if (isset($caught[$validator_name])) {
11731                            continue;
11732                        }
11733                        $r = $this->info[$validator_name]->validate(
11734                            $bits[$i],
11735                            $config,
11736                            $context
11737                        );
11738                        if ($r !== false) {
11739                            $final .= $r . ' ';
11740                            $caught[$validator_name] = true;
11741                            break;
11742                        }
11743                    }
11744                    // all three caught, continue on
11745                    if (count($caught) >= 3) {
11746                        $stage = 1;
11747                    }
11748                    if ($r !== false) {
11749                        break;
11750                    }
11751                case 1: // attempting to catch font-size and perhaps line-height
11752                    $found_slash = false;
11753                    if (strpos($bits[$i], '/') !== false) {
11754                        list($font_size, $line_height) =
11755                            explode('/', $bits[$i]);
11756                        if ($line_height === '') {
11757                            // ooh, there's a space after the slash!
11758                            $line_height = false;
11759                            $found_slash = true;
11760                        }
11761                    } else {
11762                        $font_size = $bits[$i];
11763                        $line_height = false;
11764                    }
11765                    $r = $this->info['font-size']->validate(
11766                        $font_size,
11767                        $config,
11768                        $context
11769                    );
11770                    if ($r !== false) {
11771                        $final .= $r;
11772                        // attempt to catch line-height
11773                        if ($line_height === false) {
11774                            // we need to scroll forward
11775                            for ($j = $i + 1; $j < $size; $j++) {
11776                                if ($bits[$j] === '') {
11777                                    continue;
11778                                }
11779                                if ($bits[$j] === '/') {
11780                                    if ($found_slash) {
11781                                        return false;
11782                                    } else {
11783                                        $found_slash = true;
11784                                        continue;
11785                                    }
11786                                }
11787                                $line_height = $bits[$j];
11788                                break;
11789                            }
11790                        } else {
11791                            // slash already found
11792                            $found_slash = true;
11793                            $j = $i;
11794                        }
11795                        if ($found_slash) {
11796                            $i = $j;
11797                            $r = $this->info['line-height']->validate(
11798                                $line_height,
11799                                $config,
11800                                $context
11801                            );
11802                            if ($r !== false) {
11803                                $final .= '/' . $r;
11804                            }
11805                        }
11806                        $final .= ' ';
11807                        $stage = 2;
11808                        break;
11809                    }
11810                    return false;
11811                case 2: // attempting to catch font-family
11812                    $font_family =
11813                        implode(' ', array_slice($bits, $i, $size - $i));
11814                    $r = $this->info['font-family']->validate(
11815                        $font_family,
11816                        $config,
11817                        $context
11818                    );
11819                    if ($r !== false) {
11820                        $final .= $r . ' ';
11821                        // processing completed successfully
11822                        return rtrim($final);
11823                    }
11824                    return false;
11825            }
11826        }
11827        return false;
11828    }
11829}
11830
11831
11832
11833
11834
11835/**
11836 * Validates a font family list according to CSS spec
11837 */
11838class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
11839{
11840
11841    protected $mask = null;
11842
11843    public function __construct()
11844    {
11845        $this->mask = '_- ';
11846        for ($c = 'a'; $c <= 'z'; $c++) {
11847            $this->mask .= $c;
11848        }
11849        for ($c = 'A'; $c <= 'Z'; $c++) {
11850            $this->mask .= $c;
11851        }
11852        for ($c = '0'; $c <= '9'; $c++) {
11853            $this->mask .= $c;
11854        } // cast-y, but should be fine
11855        // special bytes used by UTF-8
11856        for ($i = 0x80; $i <= 0xFF; $i++) {
11857            // We don't bother excluding invalid bytes in this range,
11858            // because the our restriction of well-formed UTF-8 will
11859            // prevent these from ever occurring.
11860            $this->mask .= chr($i);
11861        }
11862
11863        /*
11864            PHP's internal strcspn implementation is
11865            O(length of string * length of mask), making it inefficient
11866            for large masks.  However, it's still faster than
11867            preg_match 8)
11868          for (p = s1;;) {
11869            spanp = s2;
11870            do {
11871              if (*spanp == c || p == s1_end) {
11872                return p - s1;
11873              }
11874            } while (spanp++ < (s2_end - 1));
11875            c = *++p;
11876          }
11877         */
11878        // possible optimization: invert the mask.
11879    }
11880
11881    /**
11882     * @param string $string
11883     * @param HTMLPurifier_Config $config
11884     * @param HTMLPurifier_Context $context
11885     * @return bool|string
11886     */
11887    public function validate($string, $config, $context)
11888    {
11889        static $generic_names = array(
11890            'serif' => true,
11891            'sans-serif' => true,
11892            'monospace' => true,
11893            'fantasy' => true,
11894            'cursive' => true
11895        );
11896        $allowed_fonts = $config->get('CSS.AllowedFonts');
11897
11898        // assume that no font names contain commas in them
11899        $fonts = explode(',', $string);
11900        $final = '';
11901        foreach ($fonts as $font) {
11902            $font = trim($font);
11903            if ($font === '') {
11904                continue;
11905            }
11906            // match a generic name
11907            if (isset($generic_names[$font])) {
11908                if ($allowed_fonts === null || isset($allowed_fonts[$font])) {
11909                    $final .= $font . ', ';
11910                }
11911                continue;
11912            }
11913            // match a quoted name
11914            if ($font[0] === '"' || $font[0] === "'") {
11915                $length = strlen($font);
11916                if ($length <= 2) {
11917                    continue;
11918                }
11919                $quote = $font[0];
11920                if ($font[$length - 1] !== $quote) {
11921                    continue;
11922                }
11923                $font = substr($font, 1, $length - 2);
11924            }
11925
11926            $font = $this->expandCSSEscape($font);
11927
11928            // $font is a pure representation of the font name
11929
11930            if ($allowed_fonts !== null && !isset($allowed_fonts[$font])) {
11931                continue;
11932            }
11933
11934            if (ctype_alnum($font) && $font !== '') {
11935                // very simple font, allow it in unharmed
11936                $final .= $font . ', ';
11937                continue;
11938            }
11939
11940            // bugger out on whitespace.  form feed (0C) really
11941            // shouldn't show up regardless
11942            $font = str_replace(array("\n", "\t", "\r", "\x0C"), ' ', $font);
11943
11944            // Here, there are various classes of characters which need
11945            // to be treated differently:
11946            //  - Alphanumeric characters are essentially safe.  We
11947            //    handled these above.
11948            //  - Spaces require quoting, though most parsers will do
11949            //    the right thing if there aren't any characters that
11950            //    can be misinterpreted
11951            //  - Dashes rarely occur, but they fairly unproblematic
11952            //    for parsing/rendering purposes.
11953            //  The above characters cover the majority of Western font
11954            //  names.
11955            //  - Arbitrary Unicode characters not in ASCII.  Because
11956            //    most parsers give little thought to Unicode, treatment
11957            //    of these codepoints is basically uniform, even for
11958            //    punctuation-like codepoints.  These characters can
11959            //    show up in non-Western pages and are supported by most
11960            //    major browsers, for example: "MS 明朝" is a
11961            //    legitimate font-name
11962            //    <http://ja.wikipedia.org/wiki/MS_明朝>.  See
11963            //    the CSS3 spec for more examples:
11964            //    <http://www.w3.org/TR/2011/WD-css3-fonts-20110324/localizedfamilynames.png>
11965            //    You can see live samples of these on the Internet:
11966            //    <http://www.google.co.jp/search?q=font-family+MS+明朝|ゴシック>
11967            //    However, most of these fonts have ASCII equivalents:
11968            //    for example, 'MS Mincho', and it's considered
11969            //    professional to use ASCII font names instead of
11970            //    Unicode font names.  Thanks Takeshi Terada for
11971            //    providing this information.
11972            //  The following characters, to my knowledge, have not been
11973            //  used to name font names.
11974            //  - Single quote.  While theoretically you might find a
11975            //    font name that has a single quote in its name (serving
11976            //    as an apostrophe, e.g. Dave's Scribble), I haven't
11977            //    been able to find any actual examples of this.
11978            //    Internet Explorer's cssText translation (which I
11979            //    believe is invoked by innerHTML) normalizes any
11980            //    quoting to single quotes, and fails to escape single
11981            //    quotes.  (Note that this is not IE's behavior for all
11982            //    CSS properties, just some sort of special casing for
11983            //    font-family).  So a single quote *cannot* be used
11984            //    safely in the font-family context if there will be an
11985            //    innerHTML/cssText translation.  Note that Firefox 3.x
11986            //    does this too.
11987            //  - Double quote.  In IE, these get normalized to
11988            //    single-quotes, no matter what the encoding.  (Fun
11989            //    fact, in IE8, the 'content' CSS property gained
11990            //    support, where they special cased to preserve encoded
11991            //    double quotes, but still translate unadorned double
11992            //    quotes into single quotes.)  So, because their
11993            //    fixpoint behavior is identical to single quotes, they
11994            //    cannot be allowed either.  Firefox 3.x displays
11995            //    single-quote style behavior.
11996            //  - Backslashes are reduced by one (so \\ -> \) every
11997            //    iteration, so they cannot be used safely.  This shows
11998            //    up in IE7, IE8 and FF3
11999            //  - Semicolons, commas and backticks are handled properly.
12000            //  - The rest of the ASCII punctuation is handled properly.
12001            // We haven't checked what browsers do to unadorned
12002            // versions, but this is not important as long as the
12003            // browser doesn't /remove/ surrounding quotes (as IE does
12004            // for HTML).
12005            //
12006            // With these results in hand, we conclude that there are
12007            // various levels of safety:
12008            //  - Paranoid: alphanumeric, spaces and dashes(?)
12009            //  - International: Paranoid + non-ASCII Unicode
12010            //  - Edgy: Everything except quotes, backslashes
12011            //  - NoJS: Standards compliance, e.g. sod IE. Note that
12012            //    with some judicious character escaping (since certain
12013            //    types of escaping doesn't work) this is theoretically
12014            //    OK as long as innerHTML/cssText is not called.
12015            // We believe that international is a reasonable default
12016            // (that we will implement now), and once we do more
12017            // extensive research, we may feel comfortable with dropping
12018            // it down to edgy.
12019
12020            // Edgy: alphanumeric, spaces, dashes, underscores and Unicode.  Use of
12021            // str(c)spn assumes that the string was already well formed
12022            // Unicode (which of course it is).
12023            if (strspn($font, $this->mask) !== strlen($font)) {
12024                continue;
12025            }
12026
12027            // Historical:
12028            // In the absence of innerHTML/cssText, these ugly
12029            // transforms don't pose a security risk (as \\ and \"
12030            // might--these escapes are not supported by most browsers).
12031            // We could try to be clever and use single-quote wrapping
12032            // when there is a double quote present, but I have choosen
12033            // not to implement that.  (NOTE: you can reduce the amount
12034            // of escapes by one depending on what quoting style you use)
12035            // $font = str_replace('\\', '\\5C ', $font);
12036            // $font = str_replace('"',  '\\22 ', $font);
12037            // $font = str_replace("'",  '\\27 ', $font);
12038
12039            // font possibly with spaces, requires quoting
12040            $final .= "'$font', ";
12041        }
12042        $final = rtrim($final, ', ');
12043        if ($final === '') {
12044            return false;
12045        }
12046        return $final;
12047    }
12048
12049}
12050
12051
12052
12053
12054
12055/**
12056 * Validates based on {ident} CSS grammar production
12057 */
12058class HTMLPurifier_AttrDef_CSS_Ident extends HTMLPurifier_AttrDef
12059{
12060
12061    /**
12062     * @param string $string
12063     * @param HTMLPurifier_Config $config
12064     * @param HTMLPurifier_Context $context
12065     * @return bool|string
12066     */
12067    public function validate($string, $config, $context)
12068    {
12069        $string = trim($string);
12070
12071        // early abort: '' and '0' (strings that convert to false) are invalid
12072        if (!$string) {
12073            return false;
12074        }
12075
12076        $pattern = '/^(-?[A-Za-z_][A-Za-z_\-0-9]*)$/';
12077        if (!preg_match($pattern, $string)) {
12078            return false;
12079        }
12080        return $string;
12081    }
12082}
12083
12084
12085
12086
12087
12088/**
12089 * Decorator which enables !important to be used in CSS values.
12090 */
12091class HTMLPurifier_AttrDef_CSS_ImportantDecorator extends HTMLPurifier_AttrDef
12092{
12093    /**
12094     * @type HTMLPurifier_AttrDef
12095     */
12096    public $def;
12097    /**
12098     * @type bool
12099     */
12100    public $allow;
12101
12102    /**
12103     * @param HTMLPurifier_AttrDef $def Definition to wrap
12104     * @param bool $allow Whether or not to allow !important
12105     */
12106    public function __construct($def, $allow = false)
12107    {
12108        $this->def = $def;
12109        $this->allow = $allow;
12110    }
12111
12112    /**
12113     * Intercepts and removes !important if necessary
12114     * @param string $string
12115     * @param HTMLPurifier_Config $config
12116     * @param HTMLPurifier_Context $context
12117     * @return bool|string
12118     */
12119    public function validate($string, $config, $context)
12120    {
12121        // test for ! and important tokens
12122        $string = trim($string);
12123        $is_important = false;
12124        // :TODO: optimization: test directly for !important and ! important
12125        if (strlen($string) >= 9 && substr($string, -9) === 'important') {
12126            $temp = rtrim(substr($string, 0, -9));
12127            // use a temp, because we might want to restore important
12128            if (strlen($temp) >= 1 && substr($temp, -1) === '!') {
12129                $string = rtrim(substr($temp, 0, -1));
12130                $is_important = true;
12131            }
12132        }
12133        $string = $this->def->validate($string, $config, $context);
12134        if ($this->allow && $is_important) {
12135            $string .= ' !important';
12136        }
12137        return $string;
12138    }
12139}
12140
12141
12142
12143
12144
12145/**
12146 * Represents a Length as defined by CSS.
12147 */
12148class HTMLPurifier_AttrDef_CSS_Length extends HTMLPurifier_AttrDef
12149{
12150
12151    /**
12152     * @type HTMLPurifier_Length|string
12153     */
12154    protected $min;
12155
12156    /**
12157     * @type HTMLPurifier_Length|string
12158     */
12159    protected $max;
12160
12161    /**
12162     * @param HTMLPurifier_Length|string $min Minimum length, or null for no bound. String is also acceptable.
12163     * @param HTMLPurifier_Length|string $max Maximum length, or null for no bound. String is also acceptable.
12164     */
12165    public function __construct($min = null, $max = null)
12166    {
12167        $this->min = $min !== null ? HTMLPurifier_Length::make($min) : null;
12168        $this->max = $max !== null ? HTMLPurifier_Length::make($max) : null;
12169    }
12170
12171    /**
12172     * @param string $string
12173     * @param HTMLPurifier_Config $config
12174     * @param HTMLPurifier_Context $context
12175     * @return bool|string
12176     */
12177    public function validate($string, $config, $context)
12178    {
12179        $string = $this->parseCDATA($string);
12180
12181        // Optimizations
12182        if ($string === '') {
12183            return false;
12184        }
12185        if ($string === '0') {
12186            return '0';
12187        }
12188        if (strlen($string) === 1) {
12189            return false;
12190        }
12191
12192        $length = HTMLPurifier_Length::make($string);
12193        if (!$length->isValid()) {
12194            return false;
12195        }
12196
12197        if ($this->min) {
12198            $c = $length->compareTo($this->min);
12199            if ($c === false) {
12200                return false;
12201            }
12202            if ($c < 0) {
12203                return false;
12204            }
12205        }
12206        if ($this->max) {
12207            $c = $length->compareTo($this->max);
12208            if ($c === false) {
12209                return false;
12210            }
12211            if ($c > 0) {
12212                return false;
12213            }
12214        }
12215        return $length->toString();
12216    }
12217}
12218
12219
12220
12221
12222
12223/**
12224 * Validates shorthand CSS property list-style.
12225 * @warning Does not support url tokens that have internal spaces.
12226 */
12227class HTMLPurifier_AttrDef_CSS_ListStyle extends HTMLPurifier_AttrDef
12228{
12229
12230    /**
12231     * Local copy of validators.
12232     * @type HTMLPurifier_AttrDef[]
12233     * @note See HTMLPurifier_AttrDef_CSS_Font::$info for a similar impl.
12234     */
12235    protected $info;
12236
12237    /**
12238     * @param HTMLPurifier_Config $config
12239     */
12240    public function __construct($config)
12241    {
12242        $def = $config->getCSSDefinition();
12243        $this->info['list-style-type'] = $def->info['list-style-type'];
12244        $this->info['list-style-position'] = $def->info['list-style-position'];
12245        $this->info['list-style-image'] = $def->info['list-style-image'];
12246    }
12247
12248    /**
12249     * @param string $string
12250     * @param HTMLPurifier_Config $config
12251     * @param HTMLPurifier_Context $context
12252     * @return bool|string
12253     */
12254    public function validate($string, $config, $context)
12255    {
12256        // regular pre-processing
12257        $string = $this->parseCDATA($string);
12258        if ($string === '') {
12259            return false;
12260        }
12261
12262        // assumes URI doesn't have spaces in it
12263        $bits = explode(' ', strtolower($string)); // bits to process
12264
12265        $caught = array();
12266        $caught['type'] = false;
12267        $caught['position'] = false;
12268        $caught['image'] = false;
12269
12270        $i = 0; // number of catches
12271        $none = false;
12272
12273        foreach ($bits as $bit) {
12274            if ($i >= 3) {
12275                return;
12276            } // optimization bit
12277            if ($bit === '') {
12278                continue;
12279            }
12280            foreach ($caught as $key => $status) {
12281                if ($status !== false) {
12282                    continue;
12283                }
12284                $r = $this->info['list-style-' . $key]->validate($bit, $config, $context);
12285                if ($r === false) {
12286                    continue;
12287                }
12288                if ($r === 'none') {
12289                    if ($none) {
12290                        continue;
12291                    } else {
12292                        $none = true;
12293                    }
12294                    if ($key == 'image') {
12295                        continue;
12296                    }
12297                }
12298                $caught[$key] = $r;
12299                $i++;
12300                break;
12301            }
12302        }
12303
12304        if (!$i) {
12305            return false;
12306        }
12307
12308        $ret = array();
12309
12310        // construct type
12311        if ($caught['type']) {
12312            $ret[] = $caught['type'];
12313        }
12314
12315        // construct image
12316        if ($caught['image']) {
12317            $ret[] = $caught['image'];
12318        }
12319
12320        // construct position
12321        if ($caught['position']) {
12322            $ret[] = $caught['position'];
12323        }
12324
12325        if (empty($ret)) {
12326            return false;
12327        }
12328        return implode(' ', $ret);
12329    }
12330}
12331
12332
12333
12334
12335
12336/**
12337 * Framework class for strings that involve multiple values.
12338 *
12339 * Certain CSS properties such as border-width and margin allow multiple
12340 * lengths to be specified.  This class can take a vanilla border-width
12341 * definition and multiply it, usually into a max of four.
12342 *
12343 * @note Even though the CSS specification isn't clear about it, inherit
12344 *       can only be used alone: it will never manifest as part of a multi
12345 *       shorthand declaration.  Thus, this class does not allow inherit.
12346 */
12347class HTMLPurifier_AttrDef_CSS_Multiple extends HTMLPurifier_AttrDef
12348{
12349    /**
12350     * Instance of component definition to defer validation to.
12351     * @type HTMLPurifier_AttrDef
12352     * @todo Make protected
12353     */
12354    public $single;
12355
12356    /**
12357     * Max number of values allowed.
12358     * @todo Make protected
12359     */
12360    public $max;
12361
12362    /**
12363     * @param HTMLPurifier_AttrDef $single HTMLPurifier_AttrDef to multiply
12364     * @param int $max Max number of values allowed (usually four)
12365     */
12366    public function __construct($single, $max = 4)
12367    {
12368        $this->single = $single;
12369        $this->max = $max;
12370    }
12371
12372    /**
12373     * @param string $string
12374     * @param HTMLPurifier_Config $config
12375     * @param HTMLPurifier_Context $context
12376     * @return bool|string
12377     */
12378    public function validate($string, $config, $context)
12379    {
12380        $string = $this->mungeRgb($this->parseCDATA($string));
12381        if ($string === '') {
12382            return false;
12383        }
12384        $parts = explode(' ', $string); // parseCDATA replaced \r, \t and \n
12385        $length = count($parts);
12386        $final = '';
12387        for ($i = 0, $num = 0; $i < $length && $num < $this->max; $i++) {
12388            if (ctype_space($parts[$i])) {
12389                continue;
12390            }
12391            $result = $this->single->validate($parts[$i], $config, $context);
12392            if ($result !== false) {
12393                $final .= $result . ' ';
12394                $num++;
12395            }
12396        }
12397        if ($final === '') {
12398            return false;
12399        }
12400        return rtrim($final);
12401    }
12402}
12403
12404
12405
12406
12407
12408/**
12409 * Validates a Percentage as defined by the CSS spec.
12410 */
12411class HTMLPurifier_AttrDef_CSS_Percentage extends HTMLPurifier_AttrDef
12412{
12413
12414    /**
12415     * Instance to defer number validation to.
12416     * @type HTMLPurifier_AttrDef_CSS_Number
12417     */
12418    protected $number_def;
12419
12420    /**
12421     * @param bool $non_negative Whether to forbid negative values
12422     */
12423    public function __construct($non_negative = false)
12424    {
12425        $this->number_def = new HTMLPurifier_AttrDef_CSS_Number($non_negative);
12426    }
12427
12428    /**
12429     * @param string $string
12430     * @param HTMLPurifier_Config $config
12431     * @param HTMLPurifier_Context $context
12432     * @return bool|string
12433     */
12434    public function validate($string, $config, $context)
12435    {
12436        $string = $this->parseCDATA($string);
12437
12438        if ($string === '') {
12439            return false;
12440        }
12441        $length = strlen($string);
12442        if ($length === 1) {
12443            return false;
12444        }
12445        if ($string[$length - 1] !== '%') {
12446            return false;
12447        }
12448
12449        $number = substr($string, 0, $length - 1);
12450        $number = $this->number_def->validate($number, $config, $context);
12451
12452        if ($number === false) {
12453            return false;
12454        }
12455        return "$number%";
12456    }
12457}
12458
12459
12460
12461
12462
12463/**
12464 * Validates the value for the CSS property text-decoration
12465 * @note This class could be generalized into a version that acts sort of
12466 *       like Enum except you can compound the allowed values.
12467 */
12468class HTMLPurifier_AttrDef_CSS_TextDecoration extends HTMLPurifier_AttrDef
12469{
12470
12471    /**
12472     * @param string $string
12473     * @param HTMLPurifier_Config $config
12474     * @param HTMLPurifier_Context $context
12475     * @return bool|string
12476     */
12477    public function validate($string, $config, $context)
12478    {
12479        static $allowed_values = array(
12480            'line-through' => true,
12481            'overline' => true,
12482            'underline' => true,
12483        );
12484
12485        $string = strtolower($this->parseCDATA($string));
12486
12487        if ($string === 'none') {
12488            return $string;
12489        }
12490
12491        $parts = explode(' ', $string);
12492        $final = '';
12493        foreach ($parts as $part) {
12494            if (isset($allowed_values[$part])) {
12495                $final .= $part . ' ';
12496            }
12497        }
12498        $final = rtrim($final);
12499        if ($final === '') {
12500            return false;
12501        }
12502        return $final;
12503    }
12504}
12505
12506
12507
12508
12509
12510/**
12511 * Validates a URI in CSS syntax, which uses url('http://example.com')
12512 * @note While theoretically speaking a URI in a CSS document could
12513 *       be non-embedded, as of CSS2 there is no such usage so we're
12514 *       generalizing it. This may need to be changed in the future.
12515 * @warning Since HTMLPurifier_AttrDef_CSS blindly uses semicolons as
12516 *          the separator, you cannot put a literal semicolon in
12517 *          in the URI. Try percent encoding it, in that case.
12518 */
12519class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
12520{
12521
12522    public function __construct()
12523    {
12524        parent::__construct(true); // always embedded
12525    }
12526
12527    /**
12528     * @param string $uri_string
12529     * @param HTMLPurifier_Config $config
12530     * @param HTMLPurifier_Context $context
12531     * @return bool|string
12532     */
12533    public function validate($uri_string, $config, $context)
12534    {
12535        // parse the URI out of the string and then pass it onto
12536        // the parent object
12537
12538        $uri_string = $this->parseCDATA($uri_string);
12539        if (strpos($uri_string, 'url(') !== 0) {
12540            return false;
12541        }
12542        $uri_string = substr($uri_string, 4);
12543        if (strlen($uri_string) == 0) {
12544            return false;
12545        }
12546        $new_length = strlen($uri_string) - 1;
12547        if ($uri_string[$new_length] != ')') {
12548            return false;
12549        }
12550        $uri = trim(substr($uri_string, 0, $new_length));
12551
12552        if (!empty($uri) && ($uri[0] == "'" || $uri[0] == '"')) {
12553            $quote = $uri[0];
12554            $new_length = strlen($uri) - 1;
12555            if ($uri[$new_length] !== $quote) {
12556                return false;
12557            }
12558            $uri = substr($uri, 1, $new_length - 1);
12559        }
12560
12561        $uri = $this->expandCSSEscape($uri);
12562
12563        $result = parent::validate($uri, $config, $context);
12564
12565        if ($result === false) {
12566            return false;
12567        }
12568
12569        // extra sanity check; should have been done by URI
12570        $result = str_replace(array('"', "\\", "\n", "\x0c", "\r"), "", $result);
12571
12572        // suspicious characters are ()'; we're going to percent encode
12573        // them for safety.
12574        $result = str_replace(array('(', ')', "'"), array('%28', '%29', '%27'), $result);
12575
12576        // there's an extra bug where ampersands lose their escaping on
12577        // an innerHTML cycle, so a very unlucky query parameter could
12578        // then change the meaning of the URL.  Unfortunately, there's
12579        // not much we can do about that...
12580        return "url(\"$result\")";
12581    }
12582}
12583
12584
12585
12586
12587
12588/**
12589 * Validates a boolean attribute
12590 */
12591class HTMLPurifier_AttrDef_HTML_Bool extends HTMLPurifier_AttrDef
12592{
12593
12594    /**
12595     * @type bool
12596     */
12597    protected $name;
12598
12599    /**
12600     * @type bool
12601     */
12602    public $minimized = true;
12603
12604    /**
12605     * @param bool $name
12606     */
12607    public function __construct($name = false)
12608    {
12609        $this->name = $name;
12610    }
12611
12612    /**
12613     * @param string $string
12614     * @param HTMLPurifier_Config $config
12615     * @param HTMLPurifier_Context $context
12616     * @return bool|string
12617     */
12618    public function validate($string, $config, $context)
12619    {
12620        return $this->name;
12621    }
12622
12623    /**
12624     * @param string $string Name of attribute
12625     * @return HTMLPurifier_AttrDef_HTML_Bool
12626     */
12627    public function make($string)
12628    {
12629        return new HTMLPurifier_AttrDef_HTML_Bool($string);
12630    }
12631}
12632
12633
12634
12635
12636
12637/**
12638 * Validates contents based on NMTOKENS attribute type.
12639 */
12640class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef
12641{
12642
12643    /**
12644     * @param string $string
12645     * @param HTMLPurifier_Config $config
12646     * @param HTMLPurifier_Context $context
12647     * @return bool|string
12648     */
12649    public function validate($string, $config, $context)
12650    {
12651        $string = trim($string);
12652
12653        // early abort: '' and '0' (strings that convert to false) are invalid
12654        if (!$string) {
12655            return false;
12656        }
12657
12658        $tokens = $this->split($string, $config, $context);
12659        $tokens = $this->filter($tokens, $config, $context);
12660        if (empty($tokens)) {
12661            return false;
12662        }
12663        return implode(' ', $tokens);
12664    }
12665
12666    /**
12667     * Splits a space separated list of tokens into its constituent parts.
12668     * @param string $string
12669     * @param HTMLPurifier_Config $config
12670     * @param HTMLPurifier_Context $context
12671     * @return array
12672     */
12673    protected function split($string, $config, $context)
12674    {
12675        // OPTIMIZABLE!
12676        // do the preg_match, capture all subpatterns for reformulation
12677
12678        // we don't support U+00A1 and up codepoints or
12679        // escaping because I don't know how to do that with regexps
12680        // and plus it would complicate optimization efforts (you never
12681        // see that anyway).
12682        $pattern = '/(?:(?<=\s)|\A)' . // look behind for space or string start
12683            '((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)' .
12684            '(?:(?=\s)|\z)/'; // look ahead for space or string end
12685        preg_match_all($pattern, $string, $matches);
12686        return $matches[1];
12687    }
12688
12689    /**
12690     * Template method for removing certain tokens based on arbitrary criteria.
12691     * @note If we wanted to be really functional, we'd do an array_filter
12692     *       with a callback. But... we're not.
12693     * @param array $tokens
12694     * @param HTMLPurifier_Config $config
12695     * @param HTMLPurifier_Context $context
12696     * @return array
12697     */
12698    protected function filter($tokens, $config, $context)
12699    {
12700        return $tokens;
12701    }
12702}
12703
12704
12705
12706
12707
12708/**
12709 * Implements special behavior for class attribute (normally NMTOKENS)
12710 */
12711class HTMLPurifier_AttrDef_HTML_Class extends HTMLPurifier_AttrDef_HTML_Nmtokens
12712{
12713    /**
12714     * @param string $string
12715     * @param HTMLPurifier_Config $config
12716     * @param HTMLPurifier_Context $context
12717     * @return bool|string
12718     */
12719    protected function split($string, $config, $context)
12720    {
12721        // really, this twiddle should be lazy loaded
12722        $name = $config->getDefinition('HTML')->doctype->name;
12723        if ($name == "XHTML 1.1" || $name == "XHTML 2.0") {
12724            return parent::split($string, $config, $context);
12725        } else {
12726            return preg_split('/\s+/', $string);
12727        }
12728    }
12729
12730    /**
12731     * @param array $tokens
12732     * @param HTMLPurifier_Config $config
12733     * @param HTMLPurifier_Context $context
12734     * @return array
12735     */
12736    protected function filter($tokens, $config, $context)
12737    {
12738        $allowed = $config->get('Attr.AllowedClasses');
12739        $forbidden = $config->get('Attr.ForbiddenClasses');
12740        $ret = array();
12741        foreach ($tokens as $token) {
12742            if (($allowed === null || isset($allowed[$token])) &&
12743                !isset($forbidden[$token]) &&
12744                // We need this O(n) check because of PHP's array
12745                // implementation that casts -0 to 0.
12746                !in_array($token, $ret, true)
12747            ) {
12748                $ret[] = $token;
12749            }
12750        }
12751        return $ret;
12752    }
12753}
12754
12755
12756
12757/**
12758 * Validates a color according to the HTML spec.
12759 */
12760class HTMLPurifier_AttrDef_HTML_Color extends HTMLPurifier_AttrDef
12761{
12762
12763    /**
12764     * @param string $string
12765     * @param HTMLPurifier_Config $config
12766     * @param HTMLPurifier_Context $context
12767     * @return bool|string
12768     */
12769    public function validate($string, $config, $context)
12770    {
12771        static $colors = null;
12772        if ($colors === null) {
12773            $colors = $config->get('Core.ColorKeywords');
12774        }
12775
12776        $string = trim($string);
12777
12778        if (empty($string)) {
12779            return false;
12780        }
12781        $lower = strtolower($string);
12782        if (isset($colors[$lower])) {
12783            return $colors[$lower];
12784        }
12785        if ($string[0] === '#') {
12786            $hex = substr($string, 1);
12787        } else {
12788            $hex = $string;
12789        }
12790
12791        $length = strlen($hex);
12792        if ($length !== 3 && $length !== 6) {
12793            return false;
12794        }
12795        if (!ctype_xdigit($hex)) {
12796            return false;
12797        }
12798        if ($length === 3) {
12799            $hex = $hex[0] . $hex[0] . $hex[1] . $hex[1] . $hex[2] . $hex[2];
12800        }
12801        return "#$hex";
12802    }
12803}
12804
12805
12806
12807
12808
12809/**
12810 * Special-case enum attribute definition that lazy loads allowed frame targets
12811 */
12812class HTMLPurifier_AttrDef_HTML_FrameTarget extends HTMLPurifier_AttrDef_Enum
12813{
12814
12815    /**
12816     * @type array
12817     */
12818    public $valid_values = false; // uninitialized value
12819
12820    /**
12821     * @type bool
12822     */
12823    protected $case_sensitive = false;
12824
12825    public function __construct()
12826    {
12827    }
12828
12829    /**
12830     * @param string $string
12831     * @param HTMLPurifier_Config $config
12832     * @param HTMLPurifier_Context $context
12833     * @return bool|string
12834     */
12835    public function validate($string, $config, $context)
12836    {
12837        if ($this->valid_values === false) {
12838            $this->valid_values = $config->get('Attr.AllowedFrameTargets');
12839        }
12840        return parent::validate($string, $config, $context);
12841    }
12842}
12843
12844
12845
12846
12847
12848/**
12849 * Validates the HTML attribute ID.
12850 * @warning Even though this is the id processor, it
12851 *          will ignore the directive Attr:IDBlacklist, since it will only
12852 *          go according to the ID accumulator. Since the accumulator is
12853 *          automatically generated, it will have already absorbed the
12854 *          blacklist. If you're hacking around, make sure you use load()!
12855 */
12856
12857class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
12858{
12859
12860    // selector is NOT a valid thing to use for IDREFs, because IDREFs
12861    // *must* target IDs that exist, whereas selector #ids do not.
12862
12863    /**
12864     * Determines whether or not we're validating an ID in a CSS
12865     * selector context.
12866     * @type bool
12867     */
12868    protected $selector;
12869
12870    /**
12871     * @param bool $selector
12872     */
12873    public function __construct($selector = false)
12874    {
12875        $this->selector = $selector;
12876    }
12877
12878    /**
12879     * @param string $id
12880     * @param HTMLPurifier_Config $config
12881     * @param HTMLPurifier_Context $context
12882     * @return bool|string
12883     */
12884    public function validate($id, $config, $context)
12885    {
12886        if (!$this->selector && !$config->get('Attr.EnableID')) {
12887            return false;
12888        }
12889
12890        $id = trim($id); // trim it first
12891
12892        if ($id === '') {
12893            return false;
12894        }
12895
12896        $prefix = $config->get('Attr.IDPrefix');
12897        if ($prefix !== '') {
12898            $prefix .= $config->get('Attr.IDPrefixLocal');
12899            // prevent re-appending the prefix
12900            if (strpos($id, $prefix) !== 0) {
12901                $id = $prefix . $id;
12902            }
12903        } elseif ($config->get('Attr.IDPrefixLocal') !== '') {
12904            trigger_error(
12905                '%Attr.IDPrefixLocal cannot be used unless ' .
12906                '%Attr.IDPrefix is set',
12907                E_USER_WARNING
12908            );
12909        }
12910
12911        if (!$this->selector) {
12912            $id_accumulator =& $context->get('IDAccumulator');
12913            if (isset($id_accumulator->ids[$id])) {
12914                return false;
12915            }
12916        }
12917
12918        // we purposely avoid using regex, hopefully this is faster
12919
12920        if ($config->get('Attr.ID.HTML5') === true) {
12921            if (preg_match('/[\t\n\x0b\x0c ]/', $id)) {
12922                return false;
12923            }
12924        } else {
12925            if (ctype_alpha($id)) {
12926                // OK
12927            } else {
12928                if (!ctype_alpha(@$id[0])) {
12929                    return false;
12930                }
12931                // primitive style of regexps, I suppose
12932                $trim = trim(
12933                    $id,
12934                    'A..Za..z0..9:-._'
12935                );
12936                if ($trim !== '') {
12937                    return false;
12938                }
12939            }
12940        }
12941
12942        $regexp = $config->get('Attr.IDBlacklistRegexp');
12943        if ($regexp && preg_match($regexp, $id)) {
12944            return false;
12945        }
12946
12947        if (!$this->selector) {
12948            $id_accumulator->add($id);
12949        }
12950
12951        // if no change was made to the ID, return the result
12952        // else, return the new id if stripping whitespace made it
12953        //     valid, or return false.
12954        return $id;
12955    }
12956}
12957
12958
12959
12960
12961
12962/**
12963 * Validates an integer representation of pixels according to the HTML spec.
12964 */
12965class HTMLPurifier_AttrDef_HTML_Pixels extends HTMLPurifier_AttrDef
12966{
12967
12968    /**
12969     * @type int
12970     */
12971    protected $max;
12972
12973    /**
12974     * @param int $max
12975     */
12976    public function __construct($max = null)
12977    {
12978        $this->max = $max;
12979    }
12980
12981    /**
12982     * @param string $string
12983     * @param HTMLPurifier_Config $config
12984     * @param HTMLPurifier_Context $context
12985     * @return bool|string
12986     */
12987    public function validate($string, $config, $context)
12988    {
12989        $string = trim($string);
12990        if ($string === '0') {
12991            return $string;
12992        }
12993        if ($string === '') {
12994            return false;
12995        }
12996        $length = strlen($string);
12997        if (substr($string, $length - 2) == 'px') {
12998            $string = substr($string, 0, $length - 2);
12999        }
13000        if (!is_numeric($string)) {
13001            return false;
13002        }
13003        $int = (int)$string;
13004
13005        if ($int < 0) {
13006            return '0';
13007        }
13008
13009        // upper-bound value, extremely high values can
13010        // crash operating systems, see <http://ha.ckers.org/imagecrash.html>
13011        // WARNING, above link WILL crash you if you're using Windows
13012
13013        if ($this->max !== null && $int > $this->max) {
13014            return (string)$this->max;
13015        }
13016        return (string)$int;
13017    }
13018
13019    /**
13020     * @param string $string
13021     * @return HTMLPurifier_AttrDef
13022     */
13023    public function make($string)
13024    {
13025        if ($string === '') {
13026            $max = null;
13027        } else {
13028            $max = (int)$string;
13029        }
13030        $class = get_class($this);
13031        return new $class($max);
13032    }
13033}
13034
13035
13036
13037
13038
13039/**
13040 * Validates the HTML type length (not to be confused with CSS's length).
13041 *
13042 * This accepts integer pixels or percentages as lengths for certain
13043 * HTML attributes.
13044 */
13045
13046class HTMLPurifier_AttrDef_HTML_Length extends HTMLPurifier_AttrDef_HTML_Pixels
13047{
13048
13049    /**
13050     * @param string $string
13051     * @param HTMLPurifier_Config $config
13052     * @param HTMLPurifier_Context $context
13053     * @return bool|string
13054     */
13055    public function validate($string, $config, $context)
13056    {
13057        $string = trim($string);
13058        if ($string === '') {
13059            return false;
13060        }
13061
13062        $parent_result = parent::validate($string, $config, $context);
13063        if ($parent_result !== false) {
13064            return $parent_result;
13065        }
13066
13067        $length = strlen($string);
13068        $last_char = $string[$length - 1];
13069
13070        if ($last_char !== '%') {
13071            return false;
13072        }
13073
13074        $points = substr($string, 0, $length - 1);
13075
13076        if (!is_numeric($points)) {
13077            return false;
13078        }
13079
13080        $points = (int)$points;
13081
13082        if ($points < 0) {
13083            return '0%';
13084        }
13085        if ($points > 100) {
13086            return '100%';
13087        }
13088        return ((string)$points) . '%';
13089    }
13090}
13091
13092
13093
13094
13095
13096/**
13097 * Validates a rel/rev link attribute against a directive of allowed values
13098 * @note We cannot use Enum because link types allow multiple
13099 *       values.
13100 * @note Assumes link types are ASCII text
13101 */
13102class HTMLPurifier_AttrDef_HTML_LinkTypes extends HTMLPurifier_AttrDef
13103{
13104
13105    /**
13106     * Name config attribute to pull.
13107     * @type string
13108     */
13109    protected $name;
13110
13111    /**
13112     * @param string $name
13113     */
13114    public function __construct($name)
13115    {
13116        $configLookup = array(
13117            'rel' => 'AllowedRel',
13118            'rev' => 'AllowedRev'
13119        );
13120        if (!isset($configLookup[$name])) {
13121            trigger_error(
13122                'Unrecognized attribute name for link ' .
13123                'relationship.',
13124                E_USER_ERROR
13125            );
13126            return;
13127        }
13128        $this->name = $configLookup[$name];
13129    }
13130
13131    /**
13132     * @param string $string
13133     * @param HTMLPurifier_Config $config
13134     * @param HTMLPurifier_Context $context
13135     * @return bool|string
13136     */
13137    public function validate($string, $config, $context)
13138    {
13139        $allowed = $config->get('Attr.' . $this->name);
13140        if (empty($allowed)) {
13141            return false;
13142        }
13143
13144        $string = $this->parseCDATA($string);
13145        $parts = explode(' ', $string);
13146
13147        // lookup to prevent duplicates
13148        $ret_lookup = array();
13149        foreach ($parts as $part) {
13150            $part = strtolower(trim($part));
13151            if (!isset($allowed[$part])) {
13152                continue;
13153            }
13154            $ret_lookup[$part] = true;
13155        }
13156
13157        if (empty($ret_lookup)) {
13158            return false;
13159        }
13160        $string = implode(' ', array_keys($ret_lookup));
13161        return $string;
13162    }
13163}
13164
13165
13166
13167
13168
13169/**
13170 * Validates a MultiLength as defined by the HTML spec.
13171 *
13172 * A multilength is either a integer (pixel count), a percentage, or
13173 * a relative number.
13174 */
13175class HTMLPurifier_AttrDef_HTML_MultiLength extends HTMLPurifier_AttrDef_HTML_Length
13176{
13177
13178    /**
13179     * @param string $string
13180     * @param HTMLPurifier_Config $config
13181     * @param HTMLPurifier_Context $context
13182     * @return bool|string
13183     */
13184    public function validate($string, $config, $context)
13185    {
13186        $string = trim($string);
13187        if ($string === '') {
13188            return false;
13189        }
13190
13191        $parent_result = parent::validate($string, $config, $context);
13192        if ($parent_result !== false) {
13193            return $parent_result;
13194        }
13195
13196        $length = strlen($string);
13197        $last_char = $string[$length - 1];
13198
13199        if ($last_char !== '*') {
13200            return false;
13201        }
13202
13203        $int = substr($string, 0, $length - 1);
13204
13205        if ($int == '') {
13206            return '*';
13207        }
13208        if (!is_numeric($int)) {
13209            return false;
13210        }
13211
13212        $int = (int)$int;
13213        if ($int < 0) {
13214            return false;
13215        }
13216        if ($int == 0) {
13217            return '0';
13218        }
13219        if ($int == 1) {
13220            return '*';
13221        }
13222        return ((string)$int) . '*';
13223    }
13224}
13225
13226
13227
13228
13229
13230abstract class HTMLPurifier_AttrDef_URI_Email extends HTMLPurifier_AttrDef
13231{
13232
13233    /**
13234     * Unpacks a mailbox into its display-name and address
13235     * @param string $string
13236     * @return mixed
13237     */
13238    public function unpack($string)
13239    {
13240        // needs to be implemented
13241    }
13242
13243}
13244
13245// sub-implementations
13246
13247
13248
13249
13250
13251/**
13252 * Validates a host according to the IPv4, IPv6 and DNS (future) specifications.
13253 */
13254class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
13255{
13256
13257    /**
13258     * IPv4 sub-validator.
13259     * @type HTMLPurifier_AttrDef_URI_IPv4
13260     */
13261    protected $ipv4;
13262
13263    /**
13264     * IPv6 sub-validator.
13265     * @type HTMLPurifier_AttrDef_URI_IPv6
13266     */
13267    protected $ipv6;
13268
13269    public function __construct()
13270    {
13271        $this->ipv4 = new HTMLPurifier_AttrDef_URI_IPv4();
13272        $this->ipv6 = new HTMLPurifier_AttrDef_URI_IPv6();
13273    }
13274
13275    /**
13276     * @param string $string
13277     * @param HTMLPurifier_Config $config
13278     * @param HTMLPurifier_Context $context
13279     * @return bool|string
13280     */
13281    public function validate($string, $config, $context)
13282    {
13283        $length = strlen($string);
13284        // empty hostname is OK; it's usually semantically equivalent:
13285        // the default host as defined by a URI scheme is used:
13286        //
13287        //      If the URI scheme defines a default for host, then that
13288        //      default applies when the host subcomponent is undefined
13289        //      or when the registered name is empty (zero length).
13290        if ($string === '') {
13291            return '';
13292        }
13293        if ($length > 1 && $string[0] === '[' && $string[$length - 1] === ']') {
13294            //IPv6
13295            $ip = substr($string, 1, $length - 2);
13296            $valid = $this->ipv6->validate($ip, $config, $context);
13297            if ($valid === false) {
13298                return false;
13299            }
13300            return '[' . $valid . ']';
13301        }
13302
13303        // need to do checks on unusual encodings too
13304        $ipv4 = $this->ipv4->validate($string, $config, $context);
13305        if ($ipv4 !== false) {
13306            return $ipv4;
13307        }
13308
13309        // A regular domain name.
13310
13311        // This doesn't match I18N domain names, but we don't have proper IRI support,
13312        // so force users to insert Punycode.
13313
13314        // There is not a good sense in which underscores should be
13315        // allowed, since it's technically not! (And if you go as
13316        // far to allow everything as specified by the DNS spec...
13317        // well, that's literally everything, modulo some space limits
13318        // for the components and the overall name (which, by the way,
13319        // we are NOT checking!).  So we (arbitrarily) decide this:
13320        // let's allow underscores wherever we would have allowed
13321        // hyphens, if they are enabled.  This is a pretty good match
13322        // for browser behavior, for example, a large number of browsers
13323        // cannot handle foo_.example.com, but foo_bar.example.com is
13324        // fairly well supported.
13325        $underscore = $config->get('Core.AllowHostnameUnderscore') ? '_' : '';
13326
13327        // Based off of RFC 1738, but amended so that
13328        // as per RFC 3696, the top label need only not be all numeric.
13329        // The productions describing this are:
13330        $a   = '[a-z]';     // alpha
13331        $an  = '[a-z0-9]';  // alphanum
13332        $and = "[a-z0-9-$underscore]"; // alphanum | "-"
13333        // domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
13334        $domainlabel = "$an(?:$and*$an)?";
13335        // AMENDED as per RFC 3696
13336        // toplabel    = alphanum | alphanum *( alphanum | "-" ) alphanum
13337        //      side condition: not all numeric
13338        $toplabel = "$an(?:$and*$an)?";
13339        // hostname    = *( domainlabel "." ) toplabel [ "." ]
13340        if (preg_match("/^(?:$domainlabel\.)*($toplabel)\.?$/i", $string, $matches)) {
13341            if (!ctype_digit($matches[1])) {
13342                return $string;
13343            }
13344        }
13345
13346        // PHP 5.3 and later support this functionality natively
13347        if (function_exists('idn_to_ascii')) {
13348            if (defined('IDNA_NONTRANSITIONAL_TO_ASCII') && defined('INTL_IDNA_VARIANT_UTS46')) {
13349                $string = idn_to_ascii($string, IDNA_NONTRANSITIONAL_TO_ASCII, INTL_IDNA_VARIANT_UTS46);
13350            } else {
13351                $string = idn_to_ascii($string);
13352            }
13353
13354            // If we have Net_IDNA2 support, we can support IRIs by
13355            // punycoding them. (This is the most portable thing to do,
13356            // since otherwise we have to assume browsers support
13357        } elseif ($config->get('Core.EnableIDNA')) {
13358            $idna = new Net_IDNA2(array('encoding' => 'utf8', 'overlong' => false, 'strict' => true));
13359            // we need to encode each period separately
13360            $parts = explode('.', $string);
13361            try {
13362                $new_parts = array();
13363                foreach ($parts as $part) {
13364                    $encodable = false;
13365                    for ($i = 0, $c = strlen($part); $i < $c; $i++) {
13366                        if (ord($part[$i]) > 0x7a) {
13367                            $encodable = true;
13368                            break;
13369                        }
13370                    }
13371                    if (!$encodable) {
13372                        $new_parts[] = $part;
13373                    } else {
13374                        $new_parts[] = $idna->encode($part);
13375                    }
13376                }
13377                $string = implode('.', $new_parts);
13378            } catch (Exception $e) {
13379                // XXX error reporting
13380            }
13381        }
13382        // Try again
13383        if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
13384            return $string;
13385        }
13386        return false;
13387    }
13388}
13389
13390
13391
13392
13393
13394/**
13395 * Validates an IPv4 address
13396 * @author Feyd @ forums.devnetwork.net (public domain)
13397 */
13398class HTMLPurifier_AttrDef_URI_IPv4 extends HTMLPurifier_AttrDef
13399{
13400
13401    /**
13402     * IPv4 regex, protected so that IPv6 can reuse it.
13403     * @type string
13404     */
13405    protected $ip4;
13406
13407    /**
13408     * @param string $aIP
13409     * @param HTMLPurifier_Config $config
13410     * @param HTMLPurifier_Context $context
13411     * @return bool|string
13412     */
13413    public function validate($aIP, $config, $context)
13414    {
13415        if (!$this->ip4) {
13416            $this->_loadRegex();
13417        }
13418
13419        if (preg_match('#^' . $this->ip4 . '$#s', $aIP)) {
13420            return $aIP;
13421        }
13422        return false;
13423    }
13424
13425    /**
13426     * Lazy load function to prevent regex from being stuffed in
13427     * cache.
13428     */
13429    protected function _loadRegex()
13430    {
13431        $oct = '(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'; // 0-255
13432        $this->ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})";
13433    }
13434}
13435
13436
13437
13438
13439
13440/**
13441 * Validates an IPv6 address.
13442 * @author Feyd @ forums.devnetwork.net (public domain)
13443 * @note This function requires brackets to have been removed from address
13444 *       in URI.
13445 */
13446class HTMLPurifier_AttrDef_URI_IPv6 extends HTMLPurifier_AttrDef_URI_IPv4
13447{
13448
13449    /**
13450     * @param string $aIP
13451     * @param HTMLPurifier_Config $config
13452     * @param HTMLPurifier_Context $context
13453     * @return bool|string
13454     */
13455    public function validate($aIP, $config, $context)
13456    {
13457        if (!$this->ip4) {
13458            $this->_loadRegex();
13459        }
13460
13461        $original = $aIP;
13462
13463        $hex = '[0-9a-fA-F]';
13464        $blk = '(?:' . $hex . '{1,4})';
13465        $pre = '(?:/(?:12[0-8]|1[0-1][0-9]|[1-9][0-9]|[0-9]))'; // /0 - /128
13466
13467        //      prefix check
13468        if (strpos($aIP, '/') !== false) {
13469            if (preg_match('#' . $pre . '$#s', $aIP, $find)) {
13470                $aIP = substr($aIP, 0, 0 - strlen($find[0]));
13471                unset($find);
13472            } else {
13473                return false;
13474            }
13475        }
13476
13477        //      IPv4-compatiblity check
13478        if (preg_match('#(?<=:' . ')' . $this->ip4 . '$#s', $aIP, $find)) {
13479            $aIP = substr($aIP, 0, 0 - strlen($find[0]));
13480            $ip = explode('.', $find[0]);
13481            $ip = array_map('dechex', $ip);
13482            $aIP .= $ip[0] . $ip[1] . ':' . $ip[2] . $ip[3];
13483            unset($find, $ip);
13484        }
13485
13486        //      compression check
13487        $aIP = explode('::', $aIP);
13488        $c = count($aIP);
13489        if ($c > 2) {
13490            return false;
13491        } elseif ($c == 2) {
13492            list($first, $second) = $aIP;
13493            $first = explode(':', $first);
13494            $second = explode(':', $second);
13495
13496            if (count($first) + count($second) > 8) {
13497                return false;
13498            }
13499
13500            while (count($first) < 8) {
13501                array_push($first, '0');
13502            }
13503
13504            array_splice($first, 8 - count($second), 8, $second);
13505            $aIP = $first;
13506            unset($first, $second);
13507        } else {
13508            $aIP = explode(':', $aIP[0]);
13509        }
13510        $c = count($aIP);
13511
13512        if ($c != 8) {
13513            return false;
13514        }
13515
13516        //      All the pieces should be 16-bit hex strings. Are they?
13517        foreach ($aIP as $piece) {
13518            if (!preg_match('#^[0-9a-fA-F]{4}$#s', sprintf('%04s', $piece))) {
13519                return false;
13520            }
13521        }
13522        return $original;
13523    }
13524}
13525
13526
13527
13528
13529
13530/**
13531 * Primitive email validation class based on the regexp found at
13532 * http://www.regular-expressions.info/email.html
13533 */
13534class HTMLPurifier_AttrDef_URI_Email_SimpleCheck extends HTMLPurifier_AttrDef_URI_Email
13535{
13536
13537    /**
13538     * @param string $string
13539     * @param HTMLPurifier_Config $config
13540     * @param HTMLPurifier_Context $context
13541     * @return bool|string
13542     */
13543    public function validate($string, $config, $context)
13544    {
13545        // no support for named mailboxes i.e. "Bob <bob@example.com>"
13546        // that needs more percent encoding to be done
13547        if ($string == '') {
13548            return false;
13549        }
13550        $string = trim($string);
13551        $result = preg_match('/^[A-Z0-9._%-]+@[A-Z0-9.-]+\.[A-Z]{2,4}$/i', $string);
13552        return $result ? $string : false;
13553    }
13554}
13555
13556
13557
13558
13559
13560/**
13561 * Pre-transform that changes proprietary background attribute to CSS.
13562 */
13563class HTMLPurifier_AttrTransform_Background extends HTMLPurifier_AttrTransform
13564{
13565    /**
13566     * @param array $attr
13567     * @param HTMLPurifier_Config $config
13568     * @param HTMLPurifier_Context $context
13569     * @return array
13570     */
13571    public function transform($attr, $config, $context)
13572    {
13573        if (!isset($attr['background'])) {
13574            return $attr;
13575        }
13576
13577        $background = $this->confiscateAttr($attr, 'background');
13578        // some validation should happen here
13579
13580        $this->prependCSS($attr, "background-image:url($background);");
13581        return $attr;
13582    }
13583}
13584
13585
13586
13587
13588
13589// this MUST be placed in post, as it assumes that any value in dir is valid
13590
13591/**
13592 * Post-trasnform that ensures that bdo tags have the dir attribute set.
13593 */
13594class HTMLPurifier_AttrTransform_BdoDir extends HTMLPurifier_AttrTransform
13595{
13596
13597    /**
13598     * @param array $attr
13599     * @param HTMLPurifier_Config $config
13600     * @param HTMLPurifier_Context $context
13601     * @return array
13602     */
13603    public function transform($attr, $config, $context)
13604    {
13605        if (isset($attr['dir'])) {
13606            return $attr;
13607        }
13608        $attr['dir'] = $config->get('Attr.DefaultTextDir');
13609        return $attr;
13610    }
13611}
13612
13613
13614
13615
13616
13617/**
13618 * Pre-transform that changes deprecated bgcolor attribute to CSS.
13619 */
13620class HTMLPurifier_AttrTransform_BgColor extends HTMLPurifier_AttrTransform
13621{
13622    /**
13623     * @param array $attr
13624     * @param HTMLPurifier_Config $config
13625     * @param HTMLPurifier_Context $context
13626     * @return array
13627     */
13628    public function transform($attr, $config, $context)
13629    {
13630        if (!isset($attr['bgcolor'])) {
13631            return $attr;
13632        }
13633
13634        $bgcolor = $this->confiscateAttr($attr, 'bgcolor');
13635        // some validation should happen here
13636
13637        $this->prependCSS($attr, "background-color:$bgcolor;");
13638        return $attr;
13639    }
13640}
13641
13642
13643
13644
13645
13646/**
13647 * Pre-transform that changes converts a boolean attribute to fixed CSS
13648 */
13649class HTMLPurifier_AttrTransform_BoolToCSS extends HTMLPurifier_AttrTransform
13650{
13651    /**
13652     * Name of boolean attribute that is trigger.
13653     * @type string
13654     */
13655    protected $attr;
13656
13657    /**
13658     * CSS declarations to add to style, needs trailing semicolon.
13659     * @type string
13660     */
13661    protected $css;
13662
13663    /**
13664     * @param string $attr attribute name to convert from
13665     * @param string $css CSS declarations to add to style (needs semicolon)
13666     */
13667    public function __construct($attr, $css)
13668    {
13669        $this->attr = $attr;
13670        $this->css = $css;
13671    }
13672
13673    /**
13674     * @param array $attr
13675     * @param HTMLPurifier_Config $config
13676     * @param HTMLPurifier_Context $context
13677     * @return array
13678     */
13679    public function transform($attr, $config, $context)
13680    {
13681        if (!isset($attr[$this->attr])) {
13682            return $attr;
13683        }
13684        unset($attr[$this->attr]);
13685        $this->prependCSS($attr, $this->css);
13686        return $attr;
13687    }
13688}
13689
13690
13691
13692
13693
13694/**
13695 * Pre-transform that changes deprecated border attribute to CSS.
13696 */
13697class HTMLPurifier_AttrTransform_Border extends HTMLPurifier_AttrTransform
13698{
13699    /**
13700     * @param array $attr
13701     * @param HTMLPurifier_Config $config
13702     * @param HTMLPurifier_Context $context
13703     * @return array
13704     */
13705    public function transform($attr, $config, $context)
13706    {
13707        if (!isset($attr['border'])) {
13708            return $attr;
13709        }
13710        $border_width = $this->confiscateAttr($attr, 'border');
13711        // some validation should happen here
13712        $this->prependCSS($attr, "border:{$border_width}px solid;");
13713        return $attr;
13714    }
13715}
13716
13717
13718
13719
13720
13721/**
13722 * Generic pre-transform that converts an attribute with a fixed number of
13723 * values (enumerated) to CSS.
13724 */
13725class HTMLPurifier_AttrTransform_EnumToCSS extends HTMLPurifier_AttrTransform
13726{
13727    /**
13728     * Name of attribute to transform from.
13729     * @type string
13730     */
13731    protected $attr;
13732
13733    /**
13734     * Lookup array of attribute values to CSS.
13735     * @type array
13736     */
13737    protected $enumToCSS = array();
13738
13739    /**
13740     * Case sensitivity of the matching.
13741     * @type bool
13742     * @warning Currently can only be guaranteed to work with ASCII
13743     *          values.
13744     */
13745    protected $caseSensitive = false;
13746
13747    /**
13748     * @param string $attr Attribute name to transform from
13749     * @param array $enum_to_css Lookup array of attribute values to CSS
13750     * @param bool $case_sensitive Case sensitivity indicator, default false
13751     */
13752    public function __construct($attr, $enum_to_css, $case_sensitive = false)
13753    {
13754        $this->attr = $attr;
13755        $this->enumToCSS = $enum_to_css;
13756        $this->caseSensitive = (bool)$case_sensitive;
13757    }
13758
13759    /**
13760     * @param array $attr
13761     * @param HTMLPurifier_Config $config
13762     * @param HTMLPurifier_Context $context
13763     * @return array
13764     */
13765    public function transform($attr, $config, $context)
13766    {
13767        if (!isset($attr[$this->attr])) {
13768            return $attr;
13769        }
13770
13771        $value = trim($attr[$this->attr]);
13772        unset($attr[$this->attr]);
13773
13774        if (!$this->caseSensitive) {
13775            $value = strtolower($value);
13776        }
13777
13778        if (!isset($this->enumToCSS[$value])) {
13779            return $attr;
13780        }
13781        $this->prependCSS($attr, $this->enumToCSS[$value]);
13782        return $attr;
13783    }
13784}
13785
13786
13787
13788
13789
13790// must be called POST validation
13791
13792/**
13793 * Transform that supplies default values for the src and alt attributes
13794 * in img tags, as well as prevents the img tag from being removed
13795 * because of a missing alt tag. This needs to be registered as both
13796 * a pre and post attribute transform.
13797 */
13798class HTMLPurifier_AttrTransform_ImgRequired extends HTMLPurifier_AttrTransform
13799{
13800
13801    /**
13802     * @param array $attr
13803     * @param HTMLPurifier_Config $config
13804     * @param HTMLPurifier_Context $context
13805     * @return array
13806     */
13807    public function transform($attr, $config, $context)
13808    {
13809        $src = true;
13810        if (!isset($attr['src'])) {
13811            if ($config->get('Core.RemoveInvalidImg')) {
13812                return $attr;
13813            }
13814            $attr['src'] = $config->get('Attr.DefaultInvalidImage');
13815            $src = false;
13816        }
13817
13818        if (!isset($attr['alt'])) {
13819            if ($src) {
13820                $alt = $config->get('Attr.DefaultImageAlt');
13821                if ($alt === null) {
13822                    $attr['alt'] = basename($attr['src']);
13823                } else {
13824                    $attr['alt'] = $alt;
13825                }
13826            } else {
13827                $attr['alt'] = $config->get('Attr.DefaultInvalidImageAlt');
13828            }
13829        }
13830        return $attr;
13831    }
13832}
13833
13834
13835
13836
13837
13838/**
13839 * Pre-transform that changes deprecated hspace and vspace attributes to CSS
13840 */
13841class HTMLPurifier_AttrTransform_ImgSpace extends HTMLPurifier_AttrTransform
13842{
13843    /**
13844     * @type string
13845     */
13846    protected $attr;
13847
13848    /**
13849     * @type array
13850     */
13851    protected $css = array(
13852        'hspace' => array('left', 'right'),
13853        'vspace' => array('top', 'bottom')
13854    );
13855
13856    /**
13857     * @param string $attr
13858     */
13859    public function __construct($attr)
13860    {
13861        $this->attr = $attr;
13862        if (!isset($this->css[$attr])) {
13863            trigger_error(htmlspecialchars($attr) . ' is not valid space attribute');
13864        }
13865    }
13866
13867    /**
13868     * @param array $attr
13869     * @param HTMLPurifier_Config $config
13870     * @param HTMLPurifier_Context $context
13871     * @return array
13872     */
13873    public function transform($attr, $config, $context)
13874    {
13875        if (!isset($attr[$this->attr])) {
13876            return $attr;
13877        }
13878
13879        $width = $this->confiscateAttr($attr, $this->attr);
13880        // some validation could happen here
13881
13882        if (!isset($this->css[$this->attr])) {
13883            return $attr;
13884        }
13885
13886        $style = '';
13887        foreach ($this->css[$this->attr] as $suffix) {
13888            $property = "margin-$suffix";
13889            $style .= "$property:{$width}px;";
13890        }
13891        $this->prependCSS($attr, $style);
13892        return $attr;
13893    }
13894}
13895
13896
13897
13898
13899
13900/**
13901 * Performs miscellaneous cross attribute validation and filtering for
13902 * input elements. This is meant to be a post-transform.
13903 */
13904class HTMLPurifier_AttrTransform_Input extends HTMLPurifier_AttrTransform
13905{
13906    /**
13907     * @type HTMLPurifier_AttrDef_HTML_Pixels
13908     */
13909    protected $pixels;
13910
13911    public function __construct()
13912    {
13913        $this->pixels = new HTMLPurifier_AttrDef_HTML_Pixels();
13914    }
13915
13916    /**
13917     * @param array $attr
13918     * @param HTMLPurifier_Config $config
13919     * @param HTMLPurifier_Context $context
13920     * @return array
13921     */
13922    public function transform($attr, $config, $context)
13923    {
13924        if (!isset($attr['type'])) {
13925            $t = 'text';
13926        } else {
13927            $t = strtolower($attr['type']);
13928        }
13929        if (isset($attr['checked']) && $t !== 'radio' && $t !== 'checkbox') {
13930            unset($attr['checked']);
13931        }
13932        if (isset($attr['maxlength']) && $t !== 'text' && $t !== 'password') {
13933            unset($attr['maxlength']);
13934        }
13935        if (isset($attr['size']) && $t !== 'text' && $t !== 'password') {
13936            $result = $this->pixels->validate($attr['size'], $config, $context);
13937            if ($result === false) {
13938                unset($attr['size']);
13939            } else {
13940                $attr['size'] = $result;
13941            }
13942        }
13943        if (isset($attr['src']) && $t !== 'image') {
13944            unset($attr['src']);
13945        }
13946        if (!isset($attr['value']) && ($t === 'radio' || $t === 'checkbox')) {
13947            $attr['value'] = '';
13948        }
13949        return $attr;
13950    }
13951}
13952
13953
13954
13955
13956
13957/**
13958 * Post-transform that copies lang's value to xml:lang (and vice-versa)
13959 * @note Theoretically speaking, this could be a pre-transform, but putting
13960 *       post is more efficient.
13961 */
13962class HTMLPurifier_AttrTransform_Lang extends HTMLPurifier_AttrTransform
13963{
13964
13965    /**
13966     * @param array $attr
13967     * @param HTMLPurifier_Config $config
13968     * @param HTMLPurifier_Context $context
13969     * @return array
13970     */
13971    public function transform($attr, $config, $context)
13972    {
13973        $lang = isset($attr['lang']) ? $attr['lang'] : false;
13974        $xml_lang = isset($attr['xml:lang']) ? $attr['xml:lang'] : false;
13975
13976        if ($lang !== false && $xml_lang === false) {
13977            $attr['xml:lang'] = $lang;
13978        } elseif ($xml_lang !== false) {
13979            $attr['lang'] = $xml_lang;
13980        }
13981        return $attr;
13982    }
13983}
13984
13985
13986
13987
13988
13989/**
13990 * Class for handling width/height length attribute transformations to CSS
13991 */
13992class HTMLPurifier_AttrTransform_Length extends HTMLPurifier_AttrTransform
13993{
13994
13995    /**
13996     * @type string
13997     */
13998    protected $name;
13999
14000    /**
14001     * @type string
14002     */
14003    protected $cssName;
14004
14005    public function __construct($name, $css_name = null)
14006    {
14007        $this->name = $name;
14008        $this->cssName = $css_name ? $css_name : $name;
14009    }
14010
14011    /**
14012     * @param array $attr
14013     * @param HTMLPurifier_Config $config
14014     * @param HTMLPurifier_Context $context
14015     * @return array
14016     */
14017    public function transform($attr, $config, $context)
14018    {
14019        if (!isset($attr[$this->name])) {
14020            return $attr;
14021        }
14022        $length = $this->confiscateAttr($attr, $this->name);
14023        if (ctype_digit($length)) {
14024            $length .= 'px';
14025        }
14026        $this->prependCSS($attr, $this->cssName . ":$length;");
14027        return $attr;
14028    }
14029}
14030
14031
14032
14033
14034
14035/**
14036 * Pre-transform that changes deprecated name attribute to ID if necessary
14037 */
14038class HTMLPurifier_AttrTransform_Name extends HTMLPurifier_AttrTransform
14039{
14040
14041    /**
14042     * @param array $attr
14043     * @param HTMLPurifier_Config $config
14044     * @param HTMLPurifier_Context $context
14045     * @return array
14046     */
14047    public function transform($attr, $config, $context)
14048    {
14049        // Abort early if we're using relaxed definition of name
14050        if ($config->get('HTML.Attr.Name.UseCDATA')) {
14051            return $attr;
14052        }
14053        if (!isset($attr['name'])) {
14054            return $attr;
14055        }
14056        $id = $this->confiscateAttr($attr, 'name');
14057        if (isset($attr['id'])) {
14058            return $attr;
14059        }
14060        $attr['id'] = $id;
14061        return $attr;
14062    }
14063}
14064
14065
14066
14067
14068
14069/**
14070 * Post-transform that performs validation to the name attribute; if
14071 * it is present with an equivalent id attribute, it is passed through;
14072 * otherwise validation is performed.
14073 */
14074class HTMLPurifier_AttrTransform_NameSync extends HTMLPurifier_AttrTransform
14075{
14076
14077    public function __construct()
14078    {
14079        $this->idDef = new HTMLPurifier_AttrDef_HTML_ID();
14080    }
14081
14082    /**
14083     * @param array $attr
14084     * @param HTMLPurifier_Config $config
14085     * @param HTMLPurifier_Context $context
14086     * @return array
14087     */
14088    public function transform($attr, $config, $context)
14089    {
14090        if (!isset($attr['name'])) {
14091            return $attr;
14092        }
14093        $name = $attr['name'];
14094        if (isset($attr['id']) && $attr['id'] === $name) {
14095            return $attr;
14096        }
14097        $result = $this->idDef->validate($name, $config, $context);
14098        if ($result === false) {
14099            unset($attr['name']);
14100        } else {
14101            $attr['name'] = $result;
14102        }
14103        return $attr;
14104    }
14105}
14106
14107
14108
14109
14110
14111// must be called POST validation
14112
14113/**
14114 * Adds rel="nofollow" to all outbound links.  This transform is
14115 * only attached if Attr.Nofollow is TRUE.
14116 */
14117class HTMLPurifier_AttrTransform_Nofollow extends HTMLPurifier_AttrTransform
14118{
14119    /**
14120     * @type HTMLPurifier_URIParser
14121     */
14122    private $parser;
14123
14124    public function __construct()
14125    {
14126        $this->parser = new HTMLPurifier_URIParser();
14127    }
14128
14129    /**
14130     * @param array $attr
14131     * @param HTMLPurifier_Config $config
14132     * @param HTMLPurifier_Context $context
14133     * @return array
14134     */
14135    public function transform($attr, $config, $context)
14136    {
14137        if (!isset($attr['href'])) {
14138            return $attr;
14139        }
14140
14141        // XXX Kind of inefficient
14142        $url = $this->parser->parse($attr['href']);
14143        $scheme = $url->getSchemeObj($config, $context);
14144
14145        if ($scheme->browsable && !$url->isLocal($config, $context)) {
14146            if (isset($attr['rel'])) {
14147                $rels = explode(' ', $attr['rel']);
14148                if (!in_array('nofollow', $rels)) {
14149                    $rels[] = 'nofollow';
14150                }
14151                $attr['rel'] = implode(' ', $rels);
14152            } else {
14153                $attr['rel'] = 'nofollow';
14154            }
14155        }
14156        return $attr;
14157    }
14158}
14159
14160
14161
14162
14163
14164class HTMLPurifier_AttrTransform_SafeEmbed extends HTMLPurifier_AttrTransform
14165{
14166    /**
14167     * @type string
14168     */
14169    public $name = "SafeEmbed";
14170
14171    /**
14172     * @param array $attr
14173     * @param HTMLPurifier_Config $config
14174     * @param HTMLPurifier_Context $context
14175     * @return array
14176     */
14177    public function transform($attr, $config, $context)
14178    {
14179        $attr['allowscriptaccess'] = 'never';
14180        $attr['allownetworking'] = 'internal';
14181        $attr['type'] = 'application/x-shockwave-flash';
14182        return $attr;
14183    }
14184}
14185
14186
14187
14188
14189
14190/**
14191 * Writes default type for all objects. Currently only supports flash.
14192 */
14193class HTMLPurifier_AttrTransform_SafeObject extends HTMLPurifier_AttrTransform
14194{
14195    /**
14196     * @type string
14197     */
14198    public $name = "SafeObject";
14199
14200    /**
14201     * @param array $attr
14202     * @param HTMLPurifier_Config $config
14203     * @param HTMLPurifier_Context $context
14204     * @return array
14205     */
14206    public function transform($attr, $config, $context)
14207    {
14208        if (!isset($attr['type'])) {
14209            $attr['type'] = 'application/x-shockwave-flash';
14210        }
14211        return $attr;
14212    }
14213}
14214
14215
14216
14217
14218
14219/**
14220 * Validates name/value pairs in param tags to be used in safe objects. This
14221 * will only allow name values it recognizes, and pre-fill certain attributes
14222 * with required values.
14223 *
14224 * @note
14225 *      This class only supports Flash. In the future, Quicktime support
14226 *      may be added.
14227 *
14228 * @warning
14229 *      This class expects an injector to add the necessary parameters tags.
14230 */
14231class HTMLPurifier_AttrTransform_SafeParam extends HTMLPurifier_AttrTransform
14232{
14233    /**
14234     * @type string
14235     */
14236    public $name = "SafeParam";
14237
14238    /**
14239     * @type HTMLPurifier_AttrDef_URI
14240     */
14241    private $uri;
14242
14243    public function __construct()
14244    {
14245        $this->uri = new HTMLPurifier_AttrDef_URI(true); // embedded
14246        $this->wmode = new HTMLPurifier_AttrDef_Enum(array('window', 'opaque', 'transparent'));
14247    }
14248
14249    /**
14250     * @param array $attr
14251     * @param HTMLPurifier_Config $config
14252     * @param HTMLPurifier_Context $context
14253     * @return array
14254     */
14255    public function transform($attr, $config, $context)
14256    {
14257        // If we add support for other objects, we'll need to alter the
14258        // transforms.
14259        switch ($attr['name']) {
14260            // application/x-shockwave-flash
14261            // Keep this synchronized with Injector/SafeObject.php
14262            case 'allowScriptAccess':
14263                $attr['value'] = 'never';
14264                break;
14265            case 'allowNetworking':
14266                $attr['value'] = 'internal';
14267                break;
14268            case 'allowFullScreen':
14269                if ($config->get('HTML.FlashAllowFullScreen')) {
14270                    $attr['value'] = ($attr['value'] == 'true') ? 'true' : 'false';
14271                } else {
14272                    $attr['value'] = 'false';
14273                }
14274                break;
14275            case 'wmode':
14276                $attr['value'] = $this->wmode->validate($attr['value'], $config, $context);
14277                break;
14278            case 'movie':
14279            case 'src':
14280                $attr['name'] = "movie";
14281                $attr['value'] = $this->uri->validate($attr['value'], $config, $context);
14282                break;
14283            case 'flashvars':
14284                // we're going to allow arbitrary inputs to the SWF, on
14285                // the reasoning that it could only hack the SWF, not us.
14286                break;
14287            // add other cases to support other param name/value pairs
14288            default:
14289                $attr['name'] = $attr['value'] = null;
14290        }
14291        return $attr;
14292    }
14293}
14294
14295
14296
14297
14298
14299/**
14300 * Implements required attribute stipulation for <script>
14301 */
14302class HTMLPurifier_AttrTransform_ScriptRequired extends HTMLPurifier_AttrTransform
14303{
14304    /**
14305     * @param array $attr
14306     * @param HTMLPurifier_Config $config
14307     * @param HTMLPurifier_Context $context
14308     * @return array
14309     */
14310    public function transform($attr, $config, $context)
14311    {
14312        if (!isset($attr['type'])) {
14313            $attr['type'] = 'text/javascript';
14314        }
14315        return $attr;
14316    }
14317}
14318
14319
14320
14321
14322
14323// must be called POST validation
14324
14325/**
14326 * Adds target="blank" to all outbound links.  This transform is
14327 * only attached if Attr.TargetBlank is TRUE.  This works regardless
14328 * of whether or not Attr.AllowedFrameTargets
14329 */
14330class HTMLPurifier_AttrTransform_TargetBlank extends HTMLPurifier_AttrTransform
14331{
14332    /**
14333     * @type HTMLPurifier_URIParser
14334     */
14335    private $parser;
14336
14337    public function __construct()
14338    {
14339        $this->parser = new HTMLPurifier_URIParser();
14340    }
14341
14342    /**
14343     * @param array $attr
14344     * @param HTMLPurifier_Config $config
14345     * @param HTMLPurifier_Context $context
14346     * @return array
14347     */
14348    public function transform($attr, $config, $context)
14349    {
14350        if (!isset($attr['href'])) {
14351            return $attr;
14352        }
14353
14354        // XXX Kind of inefficient
14355        $url = $this->parser->parse($attr['href']);
14356        $scheme = $url->getSchemeObj($config, $context);
14357
14358        if ($scheme->browsable && !$url->isBenign($config, $context)) {
14359            $attr['target'] = '_blank';
14360        }
14361        return $attr;
14362    }
14363}
14364
14365
14366
14367
14368
14369// must be called POST validation
14370
14371/**
14372 * Adds rel="noopener" to any links which target a different window
14373 * than the current one.  This is used to prevent malicious websites
14374 * from silently replacing the original window, which could be used
14375 * to do phishing.
14376 * This transform is controlled by %HTML.TargetNoopener.
14377 */
14378class HTMLPurifier_AttrTransform_TargetNoopener extends HTMLPurifier_AttrTransform
14379{
14380    /**
14381     * @param array $attr
14382     * @param HTMLPurifier_Config $config
14383     * @param HTMLPurifier_Context $context
14384     * @return array
14385     */
14386    public function transform($attr, $config, $context)
14387    {
14388        if (isset($attr['rel'])) {
14389            $rels = explode(' ', $attr['rel']);
14390        } else {
14391            $rels = array();
14392        }
14393        if (isset($attr['target']) && !in_array('noopener', $rels)) {
14394            $rels[] = 'noopener';
14395        }
14396        if (!empty($rels) || isset($attr['rel'])) {
14397            $attr['rel'] = implode(' ', $rels);
14398        }
14399
14400        return $attr;
14401    }
14402}
14403
14404
14405
14406
14407// must be called POST validation
14408
14409/**
14410 * Adds rel="noreferrer" to any links which target a different window
14411 * than the current one.  This is used to prevent malicious websites
14412 * from silently replacing the original window, which could be used
14413 * to do phishing.
14414 * This transform is controlled by %HTML.TargetNoreferrer.
14415 */
14416class HTMLPurifier_AttrTransform_TargetNoreferrer extends HTMLPurifier_AttrTransform
14417{
14418    /**
14419     * @param array $attr
14420     * @param HTMLPurifier_Config $config
14421     * @param HTMLPurifier_Context $context
14422     * @return array
14423     */
14424    public function transform($attr, $config, $context)
14425    {
14426        if (isset($attr['rel'])) {
14427            $rels = explode(' ', $attr['rel']);
14428        } else {
14429            $rels = array();
14430        }
14431        if (isset($attr['target']) && !in_array('noreferrer', $rels)) {
14432            $rels[] = 'noreferrer';
14433        }
14434        if (!empty($rels) || isset($attr['rel'])) {
14435            $attr['rel'] = implode(' ', $rels);
14436        }
14437
14438        return $attr;
14439    }
14440}
14441
14442
14443
14444
14445/**
14446 * Sets height/width defaults for <textarea>
14447 */
14448class HTMLPurifier_AttrTransform_Textarea extends HTMLPurifier_AttrTransform
14449{
14450    /**
14451     * @param array $attr
14452     * @param HTMLPurifier_Config $config
14453     * @param HTMLPurifier_Context $context
14454     * @return array
14455     */
14456    public function transform($attr, $config, $context)
14457    {
14458        // Calculated from Firefox
14459        if (!isset($attr['cols'])) {
14460            $attr['cols'] = '22';
14461        }
14462        if (!isset($attr['rows'])) {
14463            $attr['rows'] = '3';
14464        }
14465        return $attr;
14466    }
14467}
14468
14469
14470
14471
14472
14473/**
14474 * Definition that uses different definitions depending on context.
14475 *
14476 * The del and ins tags are notable because they allow different types of
14477 * elements depending on whether or not they're in a block or inline context.
14478 * Chameleon allows this behavior to happen by using two different
14479 * definitions depending on context.  While this somewhat generalized,
14480 * it is specifically intended for those two tags.
14481 */
14482class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
14483{
14484
14485    /**
14486     * Instance of the definition object to use when inline. Usually stricter.
14487     * @type HTMLPurifier_ChildDef_Optional
14488     */
14489    public $inline;
14490
14491    /**
14492     * Instance of the definition object to use when block.
14493     * @type HTMLPurifier_ChildDef_Optional
14494     */
14495    public $block;
14496
14497    /**
14498     * @type string
14499     */
14500    public $type = 'chameleon';
14501
14502    /**
14503     * @param array $inline List of elements to allow when inline.
14504     * @param array $block List of elements to allow when block.
14505     */
14506    public function __construct($inline, $block)
14507    {
14508        $this->inline = new HTMLPurifier_ChildDef_Optional($inline);
14509        $this->block = new HTMLPurifier_ChildDef_Optional($block);
14510        $this->elements = $this->block->elements;
14511    }
14512
14513    /**
14514     * @param HTMLPurifier_Node[] $children
14515     * @param HTMLPurifier_Config $config
14516     * @param HTMLPurifier_Context $context
14517     * @return bool
14518     */
14519    public function validateChildren($children, $config, $context)
14520    {
14521        if ($context->get('IsInline') === false) {
14522            return $this->block->validateChildren(
14523                $children,
14524                $config,
14525                $context
14526            );
14527        } else {
14528            return $this->inline->validateChildren(
14529                $children,
14530                $config,
14531                $context
14532            );
14533        }
14534    }
14535}
14536
14537
14538
14539
14540
14541/**
14542 * Custom validation class, accepts DTD child definitions
14543 *
14544 * @warning Currently this class is an all or nothing proposition, that is,
14545 *          it will only give a bool return value.
14546 */
14547class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
14548{
14549    /**
14550     * @type string
14551     */
14552    public $type = 'custom';
14553
14554    /**
14555     * @type bool
14556     */
14557    public $allow_empty = false;
14558
14559    /**
14560     * Allowed child pattern as defined by the DTD.
14561     * @type string
14562     */
14563    public $dtd_regex;
14564
14565    /**
14566     * PCRE regex derived from $dtd_regex.
14567     * @type string
14568     */
14569    private $_pcre_regex;
14570
14571    /**
14572     * @param $dtd_regex Allowed child pattern from the DTD
14573     */
14574    public function __construct($dtd_regex)
14575    {
14576        $this->dtd_regex = $dtd_regex;
14577        $this->_compileRegex();
14578    }
14579
14580    /**
14581     * Compiles the PCRE regex from a DTD regex ($dtd_regex to $_pcre_regex)
14582     */
14583    protected function _compileRegex()
14584    {
14585        $raw = str_replace(' ', '', $this->dtd_regex);
14586        if ($raw[0] != '(') {
14587            $raw = "($raw)";
14588        }
14589        $el = '[#a-zA-Z0-9_.-]+';
14590        $reg = $raw;
14591
14592        // COMPLICATED! AND MIGHT BE BUGGY! I HAVE NO CLUE WHAT I'M
14593        // DOING! Seriously: if there's problems, please report them.
14594
14595        // collect all elements into the $elements array
14596        preg_match_all("/$el/", $reg, $matches);
14597        foreach ($matches[0] as $match) {
14598            $this->elements[$match] = true;
14599        }
14600
14601        // setup all elements as parentheticals with leading commas
14602        $reg = preg_replace("/$el/", '(,\\0)', $reg);
14603
14604        // remove commas when they were not solicited
14605        $reg = preg_replace("/([^,(|]\(+),/", '\\1', $reg);
14606
14607        // remove all non-paranthetical commas: they are handled by first regex
14608        $reg = preg_replace("/,\(/", '(', $reg);
14609
14610        $this->_pcre_regex = $reg;
14611    }
14612
14613    /**
14614     * @param HTMLPurifier_Node[] $children
14615     * @param HTMLPurifier_Config $config
14616     * @param HTMLPurifier_Context $context
14617     * @return bool
14618     */
14619    public function validateChildren($children, $config, $context)
14620    {
14621        $list_of_children = '';
14622        $nesting = 0; // depth into the nest
14623        foreach ($children as $node) {
14624            if (!empty($node->is_whitespace)) {
14625                continue;
14626            }
14627            $list_of_children .= $node->name . ',';
14628        }
14629        // add leading comma to deal with stray comma declarations
14630        $list_of_children = ',' . rtrim($list_of_children, ',');
14631        $okay =
14632            preg_match(
14633                '/^,?' . $this->_pcre_regex . '$/',
14634                $list_of_children
14635            );
14636        return (bool)$okay;
14637    }
14638}
14639
14640
14641
14642
14643
14644/**
14645 * Definition that disallows all elements.
14646 * @warning validateChildren() in this class is actually never called, because
14647 *          empty elements are corrected in HTMLPurifier_Strategy_MakeWellFormed
14648 *          before child definitions are parsed in earnest by
14649 *          HTMLPurifier_Strategy_FixNesting.
14650 */
14651class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef
14652{
14653    /**
14654     * @type bool
14655     */
14656    public $allow_empty = true;
14657
14658    /**
14659     * @type string
14660     */
14661    public $type = 'empty';
14662
14663    public function __construct()
14664    {
14665    }
14666
14667    /**
14668     * @param HTMLPurifier_Node[] $children
14669     * @param HTMLPurifier_Config $config
14670     * @param HTMLPurifier_Context $context
14671     * @return array
14672     */
14673    public function validateChildren($children, $config, $context)
14674    {
14675        return array();
14676    }
14677}
14678
14679
14680
14681
14682
14683/**
14684 * Definition for list containers ul and ol.
14685 *
14686 * What does this do?  The big thing is to handle ol/ul at the top
14687 * level of list nodes, which should be handled specially by /folding/
14688 * them into the previous list node.  We generally shouldn't ever
14689 * see other disallowed elements, because the autoclose behavior
14690 * in MakeWellFormed handles it.
14691 */
14692class HTMLPurifier_ChildDef_List extends HTMLPurifier_ChildDef
14693{
14694    /**
14695     * @type string
14696     */
14697    public $type = 'list';
14698    /**
14699     * @type array
14700     */
14701    // lying a little bit, so that we can handle ul and ol ourselves
14702    // XXX: This whole business with 'wrap' is all a bit unsatisfactory
14703    public $elements = array('li' => true, 'ul' => true, 'ol' => true);
14704
14705    /**
14706     * @param array $children
14707     * @param HTMLPurifier_Config $config
14708     * @param HTMLPurifier_Context $context
14709     * @return array
14710     */
14711    public function validateChildren($children, $config, $context)
14712    {
14713        // Flag for subclasses
14714        $this->whitespace = false;
14715
14716        // if there are no tokens, delete parent node
14717        if (empty($children)) {
14718            return false;
14719        }
14720
14721        // if li is not allowed, delete parent node
14722        if (!isset($config->getHTMLDefinition()->info['li'])) {
14723            trigger_error("Cannot allow ul/ol without allowing li", E_USER_WARNING);
14724            return false;
14725        }
14726
14727        // the new set of children
14728        $result = array();
14729
14730        // a little sanity check to make sure it's not ALL whitespace
14731        $all_whitespace = true;
14732
14733        $current_li = null;
14734
14735        foreach ($children as $node) {
14736            if (!empty($node->is_whitespace)) {
14737                $result[] = $node;
14738                continue;
14739            }
14740            $all_whitespace = false; // phew, we're not talking about whitespace
14741
14742            if ($node->name === 'li') {
14743                // good
14744                $current_li = $node;
14745                $result[] = $node;
14746            } else {
14747                // we want to tuck this into the previous li
14748                // Invariant: we expect the node to be ol/ul
14749                // ToDo: Make this more robust in the case of not ol/ul
14750                // by distinguishing between existing li and li created
14751                // to handle non-list elements; non-list elements should
14752                // not be appended to an existing li; only li created
14753                // for non-list. This distinction is not currently made.
14754                if ($current_li === null) {
14755                    $current_li = new HTMLPurifier_Node_Element('li');
14756                    $result[] = $current_li;
14757                }
14758                $current_li->children[] = $node;
14759                $current_li->empty = false; // XXX fascinating! Check for this error elsewhere ToDo
14760            }
14761        }
14762        if (empty($result)) {
14763            return false;
14764        }
14765        if ($all_whitespace) {
14766            return false;
14767        }
14768        return $result;
14769    }
14770}
14771
14772
14773
14774
14775
14776/**
14777 * Definition that allows a set of elements, but disallows empty children.
14778 */
14779class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
14780{
14781    /**
14782     * Lookup table of allowed elements.
14783     * @type array
14784     */
14785    public $elements = array();
14786
14787    /**
14788     * Whether or not the last passed node was all whitespace.
14789     * @type bool
14790     */
14791    protected $whitespace = false;
14792
14793    /**
14794     * @param array|string $elements List of allowed element names (lowercase).
14795     */
14796    public function __construct($elements)
14797    {
14798        if (is_string($elements)) {
14799            $elements = str_replace(' ', '', $elements);
14800            $elements = explode('|', $elements);
14801        }
14802        $keys = array_keys($elements);
14803        if ($keys == array_keys($keys)) {
14804            $elements = array_flip($elements);
14805            foreach ($elements as $i => $x) {
14806                $elements[$i] = true;
14807                if (empty($i)) {
14808                    unset($elements[$i]);
14809                } // remove blank
14810            }
14811        }
14812        $this->elements = $elements;
14813    }
14814
14815    /**
14816     * @type bool
14817     */
14818    public $allow_empty = false;
14819
14820    /**
14821     * @type string
14822     */
14823    public $type = 'required';
14824
14825    /**
14826     * @param array $children
14827     * @param HTMLPurifier_Config $config
14828     * @param HTMLPurifier_Context $context
14829     * @return array
14830     */
14831    public function validateChildren($children, $config, $context)
14832    {
14833        // Flag for subclasses
14834        $this->whitespace = false;
14835
14836        // if there are no tokens, delete parent node
14837        if (empty($children)) {
14838            return false;
14839        }
14840
14841        // the new set of children
14842        $result = array();
14843
14844        // whether or not parsed character data is allowed
14845        // this controls whether or not we silently drop a tag
14846        // or generate escaped HTML from it
14847        $pcdata_allowed = isset($this->elements['#PCDATA']);
14848
14849        // a little sanity check to make sure it's not ALL whitespace
14850        $all_whitespace = true;
14851
14852        $stack = array_reverse($children);
14853        while (!empty($stack)) {
14854            $node = array_pop($stack);
14855            if (!empty($node->is_whitespace)) {
14856                $result[] = $node;
14857                continue;
14858            }
14859            $all_whitespace = false; // phew, we're not talking about whitespace
14860
14861            if (!isset($this->elements[$node->name])) {
14862                // special case text
14863                // XXX One of these ought to be redundant or something
14864                if ($pcdata_allowed && $node instanceof HTMLPurifier_Node_Text) {
14865                    $result[] = $node;
14866                    continue;
14867                }
14868                // spill the child contents in
14869                // ToDo: Make configurable
14870                if ($node instanceof HTMLPurifier_Node_Element) {
14871                    for ($i = count($node->children) - 1; $i >= 0; $i--) {
14872                        $stack[] = $node->children[$i];
14873                    }
14874                    continue;
14875                }
14876                continue;
14877            }
14878            $result[] = $node;
14879        }
14880        if (empty($result)) {
14881            return false;
14882        }
14883        if ($all_whitespace) {
14884            $this->whitespace = true;
14885            return false;
14886        }
14887        return $result;
14888    }
14889}
14890
14891
14892
14893
14894
14895/**
14896 * Definition that allows a set of elements, and allows no children.
14897 * @note This is a hack to reuse code from HTMLPurifier_ChildDef_Required,
14898 *       really, one shouldn't inherit from the other.  Only altered behavior
14899 *       is to overload a returned false with an array.  Thus, it will never
14900 *       return false.
14901 */
14902class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required
14903{
14904    /**
14905     * @type bool
14906     */
14907    public $allow_empty = true;
14908
14909    /**
14910     * @type string
14911     */
14912    public $type = 'optional';
14913
14914    /**
14915     * @param array $children
14916     * @param HTMLPurifier_Config $config
14917     * @param HTMLPurifier_Context $context
14918     * @return array
14919     */
14920    public function validateChildren($children, $config, $context)
14921    {
14922        $result = parent::validateChildren($children, $config, $context);
14923        // we assume that $children is not modified
14924        if ($result === false) {
14925            if (empty($children)) {
14926                return true;
14927            } elseif ($this->whitespace) {
14928                return $children;
14929            } else {
14930                return array();
14931            }
14932        }
14933        return $result;
14934    }
14935}
14936
14937
14938
14939
14940
14941/**
14942 * Takes the contents of blockquote when in strict and reformats for validation.
14943 */
14944class HTMLPurifier_ChildDef_StrictBlockquote extends HTMLPurifier_ChildDef_Required
14945{
14946    /**
14947     * @type array
14948     */
14949    protected $real_elements;
14950
14951    /**
14952     * @type array
14953     */
14954    protected $fake_elements;
14955
14956    /**
14957     * @type bool
14958     */
14959    public $allow_empty = true;
14960
14961    /**
14962     * @type string
14963     */
14964    public $type = 'strictblockquote';
14965
14966    /**
14967     * @type bool
14968     */
14969    protected $init = false;
14970
14971    /**
14972     * @param HTMLPurifier_Config $config
14973     * @return array
14974     * @note We don't want MakeWellFormed to auto-close inline elements since
14975     *       they might be allowed.
14976     */
14977    public function getAllowedElements($config)
14978    {
14979        $this->init($config);
14980        return $this->fake_elements;
14981    }
14982
14983    /**
14984     * @param array $children
14985     * @param HTMLPurifier_Config $config
14986     * @param HTMLPurifier_Context $context
14987     * @return array
14988     */
14989    public function validateChildren($children, $config, $context)
14990    {
14991        $this->init($config);
14992
14993        // trick the parent class into thinking it allows more
14994        $this->elements = $this->fake_elements;
14995        $result = parent::validateChildren($children, $config, $context);
14996        $this->elements = $this->real_elements;
14997
14998        if ($result === false) {
14999            return array();
15000        }
15001        if ($result === true) {
15002            $result = $children;
15003        }
15004
15005        $def = $config->getHTMLDefinition();
15006        $block_wrap_name = $def->info_block_wrapper;
15007        $block_wrap = false;
15008        $ret = array();
15009
15010        foreach ($result as $node) {
15011            if ($block_wrap === false) {
15012                if (($node instanceof HTMLPurifier_Node_Text && !$node->is_whitespace) ||
15013                    ($node instanceof HTMLPurifier_Node_Element && !isset($this->elements[$node->name]))) {
15014                    $block_wrap = new HTMLPurifier_Node_Element($def->info_block_wrapper);
15015                    $ret[] = $block_wrap;
15016                }
15017            } else {
15018                if ($node instanceof HTMLPurifier_Node_Element && isset($this->elements[$node->name])) {
15019                    $block_wrap = false;
15020
15021                }
15022            }
15023            if ($block_wrap) {
15024                $block_wrap->children[] = $node;
15025            } else {
15026                $ret[] = $node;
15027            }
15028        }
15029        return $ret;
15030    }
15031
15032    /**
15033     * @param HTMLPurifier_Config $config
15034     */
15035    private function init($config)
15036    {
15037        if (!$this->init) {
15038            $def = $config->getHTMLDefinition();
15039            // allow all inline elements
15040            $this->real_elements = $this->elements;
15041            $this->fake_elements = $def->info_content_sets['Flow'];
15042            $this->fake_elements['#PCDATA'] = true;
15043            $this->init = true;
15044        }
15045    }
15046}
15047
15048
15049
15050
15051
15052/**
15053 * Definition for tables.  The general idea is to extract out all of the
15054 * essential bits, and then reconstruct it later.
15055 *
15056 * This is a bit confusing, because the DTDs and the W3C
15057 * validators seem to disagree on the appropriate definition. The
15058 * DTD claims:
15059 *
15060 *      (CAPTION?, (COL*|COLGROUP*), THEAD?, TFOOT?, TBODY+)
15061 *
15062 * But actually, the HTML4 spec then has this to say:
15063 *
15064 *      The TBODY start tag is always required except when the table
15065 *      contains only one table body and no table head or foot sections.
15066 *      The TBODY end tag may always be safely omitted.
15067 *
15068 * So the DTD is kind of wrong.  The validator is, unfortunately, kind
15069 * of on crack.
15070 *
15071 * The definition changed again in XHTML1.1; and in my opinion, this
15072 * formulation makes the most sense.
15073 *
15074 *      caption?, ( col* | colgroup* ), (( thead?, tfoot?, tbody+ ) | ( tr+ ))
15075 *
15076 * Essentially, we have two modes: thead/tfoot/tbody mode, and tr mode.
15077 * If we encounter a thead, tfoot or tbody, we are placed in the former
15078 * mode, and we *must* wrap any stray tr segments with a tbody. But if
15079 * we don't run into any of them, just have tr tags is OK.
15080 */
15081class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
15082{
15083    /**
15084     * @type bool
15085     */
15086    public $allow_empty = false;
15087
15088    /**
15089     * @type string
15090     */
15091    public $type = 'table';
15092
15093    /**
15094     * @type array
15095     */
15096    public $elements = array(
15097        'tr' => true,
15098        'tbody' => true,
15099        'thead' => true,
15100        'tfoot' => true,
15101        'caption' => true,
15102        'colgroup' => true,
15103        'col' => true
15104    );
15105
15106    public function __construct()
15107    {
15108    }
15109
15110    /**
15111     * @param array $children
15112     * @param HTMLPurifier_Config $config
15113     * @param HTMLPurifier_Context $context
15114     * @return array
15115     */
15116    public function validateChildren($children, $config, $context)
15117    {
15118        if (empty($children)) {
15119            return false;
15120        }
15121
15122        // only one of these elements is allowed in a table
15123        $caption = false;
15124        $thead = false;
15125        $tfoot = false;
15126
15127        // whitespace
15128        $initial_ws = array();
15129        $after_caption_ws = array();
15130        $after_thead_ws = array();
15131        $after_tfoot_ws = array();
15132
15133        // as many of these as you want
15134        $cols = array();
15135        $content = array();
15136
15137        $tbody_mode = false; // if true, then we need to wrap any stray
15138        // <tr>s with a <tbody>.
15139
15140        $ws_accum =& $initial_ws;
15141
15142        foreach ($children as $node) {
15143            if ($node instanceof HTMLPurifier_Node_Comment) {
15144                $ws_accum[] = $node;
15145                continue;
15146            }
15147            switch ($node->name) {
15148                case 'tbody':
15149                    $tbody_mode = true;
15150                // fall through
15151                case 'tr':
15152                    $content[] = $node;
15153                    $ws_accum =& $content;
15154                    break;
15155                case 'caption':
15156                    // there can only be one caption!
15157                    if ($caption !== false)  break;
15158                    $caption = $node;
15159                    $ws_accum =& $after_caption_ws;
15160                    break;
15161                case 'thead':
15162                    $tbody_mode = true;
15163                    // XXX This breaks rendering properties with
15164                    // Firefox, which never floats a <thead> to
15165                    // the top. Ever. (Our scheme will float the
15166                    // first <thead> to the top.)  So maybe
15167                    // <thead>s that are not first should be
15168                    // turned into <tbody>? Very tricky, indeed.
15169                    if ($thead === false) {
15170                        $thead = $node;
15171                        $ws_accum =& $after_thead_ws;
15172                    } else {
15173                        // Oops, there's a second one! What
15174                        // should we do?  Current behavior is to
15175                        // transmutate the first and last entries into
15176                        // tbody tags, and then put into content.
15177                        // Maybe a better idea is to *attach
15178                        // it* to the existing thead or tfoot?
15179                        // We don't do this, because Firefox
15180                        // doesn't float an extra tfoot to the
15181                        // bottom like it does for the first one.
15182                        $node->name = 'tbody';
15183                        $content[] = $node;
15184                        $ws_accum =& $content;
15185                    }
15186                    break;
15187                case 'tfoot':
15188                    // see above for some aveats
15189                    $tbody_mode = true;
15190                    if ($tfoot === false) {
15191                        $tfoot = $node;
15192                        $ws_accum =& $after_tfoot_ws;
15193                    } else {
15194                        $node->name = 'tbody';
15195                        $content[] = $node;
15196                        $ws_accum =& $content;
15197                    }
15198                    break;
15199                case 'colgroup':
15200                case 'col':
15201                    $cols[] = $node;
15202                    $ws_accum =& $cols;
15203                    break;
15204                case '#PCDATA':
15205                    // How is whitespace handled? We treat is as sticky to
15206                    // the *end* of the previous element. So all of the
15207                    // nonsense we have worked on is to keep things
15208                    // together.
15209                    if (!empty($node->is_whitespace)) {
15210                        $ws_accum[] = $node;
15211                    }
15212                    break;
15213            }
15214        }
15215
15216        if (empty($content)) {
15217            return false;
15218        }
15219
15220        $ret = $initial_ws;
15221        if ($caption !== false) {
15222            $ret[] = $caption;
15223            $ret = array_merge($ret, $after_caption_ws);
15224        }
15225        if ($cols !== false) {
15226            $ret = array_merge($ret, $cols);
15227        }
15228        if ($thead !== false) {
15229            $ret[] = $thead;
15230            $ret = array_merge($ret, $after_thead_ws);
15231        }
15232        if ($tfoot !== false) {
15233            $ret[] = $tfoot;
15234            $ret = array_merge($ret, $after_tfoot_ws);
15235        }
15236
15237        if ($tbody_mode) {
15238            // we have to shuffle tr into tbody
15239            $current_tr_tbody = null;
15240
15241            foreach($content as $node) {
15242                switch ($node->name) {
15243                    case 'tbody':
15244                        $current_tr_tbody = null;
15245                        $ret[] = $node;
15246                        break;
15247                    case 'tr':
15248                        if ($current_tr_tbody === null) {
15249                            $current_tr_tbody = new HTMLPurifier_Node_Element('tbody');
15250                            $ret[] = $current_tr_tbody;
15251                        }
15252                        $current_tr_tbody->children[] = $node;
15253                        break;
15254                    case '#PCDATA':
15255                        //assert($node->is_whitespace);
15256                        if ($current_tr_tbody === null) {
15257                            $ret[] = $node;
15258                        } else {
15259                            $current_tr_tbody->children[] = $node;
15260                        }
15261                        break;
15262                }
15263            }
15264        } else {
15265            $ret = array_merge($ret, $content);
15266        }
15267
15268        return $ret;
15269
15270    }
15271}
15272
15273
15274
15275
15276
15277class HTMLPurifier_DefinitionCache_Decorator extends HTMLPurifier_DefinitionCache
15278{
15279
15280    /**
15281     * Cache object we are decorating
15282     * @type HTMLPurifier_DefinitionCache
15283     */
15284    public $cache;
15285
15286    /**
15287     * The name of the decorator
15288     * @var string
15289     */
15290    public $name;
15291
15292    public function __construct()
15293    {
15294    }
15295
15296    /**
15297     * Lazy decorator function
15298     * @param HTMLPurifier_DefinitionCache $cache Reference to cache object to decorate
15299     * @return HTMLPurifier_DefinitionCache_Decorator
15300     */
15301    public function decorate(&$cache)
15302    {
15303        $decorator = $this->copy();
15304        // reference is necessary for mocks in PHP 4
15305        $decorator->cache =& $cache;
15306        $decorator->type = $cache->type;
15307        return $decorator;
15308    }
15309
15310    /**
15311     * Cross-compatible clone substitute
15312     * @return HTMLPurifier_DefinitionCache_Decorator
15313     */
15314    public function copy()
15315    {
15316        return new HTMLPurifier_DefinitionCache_Decorator();
15317    }
15318
15319    /**
15320     * @param HTMLPurifier_Definition $def
15321     * @param HTMLPurifier_Config $config
15322     * @return mixed
15323     */
15324    public function add($def, $config)
15325    {
15326        return $this->cache->add($def, $config);
15327    }
15328
15329    /**
15330     * @param HTMLPurifier_Definition $def
15331     * @param HTMLPurifier_Config $config
15332     * @return mixed
15333     */
15334    public function set($def, $config)
15335    {
15336        return $this->cache->set($def, $config);
15337    }
15338
15339    /**
15340     * @param HTMLPurifier_Definition $def
15341     * @param HTMLPurifier_Config $config
15342     * @return mixed
15343     */
15344    public function replace($def, $config)
15345    {
15346        return $this->cache->replace($def, $config);
15347    }
15348
15349    /**
15350     * @param HTMLPurifier_Config $config
15351     * @return mixed
15352     */
15353    public function get($config)
15354    {
15355        return $this->cache->get($config);
15356    }
15357
15358    /**
15359     * @param HTMLPurifier_Config $config
15360     * @return mixed
15361     */
15362    public function remove($config)
15363    {
15364        return $this->cache->remove($config);
15365    }
15366
15367    /**
15368     * @param HTMLPurifier_Config $config
15369     * @return mixed
15370     */
15371    public function flush($config)
15372    {
15373        return $this->cache->flush($config);
15374    }
15375
15376    /**
15377     * @param HTMLPurifier_Config $config
15378     * @return mixed
15379     */
15380    public function cleanup($config)
15381    {
15382        return $this->cache->cleanup($config);
15383    }
15384}
15385
15386
15387
15388
15389
15390/**
15391 * Null cache object to use when no caching is on.
15392 */
15393class HTMLPurifier_DefinitionCache_Null extends HTMLPurifier_DefinitionCache
15394{
15395
15396    /**
15397     * @param HTMLPurifier_Definition $def
15398     * @param HTMLPurifier_Config $config
15399     * @return bool
15400     */
15401    public function add($def, $config)
15402    {
15403        return false;
15404    }
15405
15406    /**
15407     * @param HTMLPurifier_Definition $def
15408     * @param HTMLPurifier_Config $config
15409     * @return bool
15410     */
15411    public function set($def, $config)
15412    {
15413        return false;
15414    }
15415
15416    /**
15417     * @param HTMLPurifier_Definition $def
15418     * @param HTMLPurifier_Config $config
15419     * @return bool
15420     */
15421    public function replace($def, $config)
15422    {
15423        return false;
15424    }
15425
15426    /**
15427     * @param HTMLPurifier_Config $config
15428     * @return bool
15429     */
15430    public function remove($config)
15431    {
15432        return false;
15433    }
15434
15435    /**
15436     * @param HTMLPurifier_Config $config
15437     * @return bool
15438     */
15439    public function get($config)
15440    {
15441        return false;
15442    }
15443
15444    /**
15445     * @param HTMLPurifier_Config $config
15446     * @return bool
15447     */
15448    public function flush($config)
15449    {
15450        return false;
15451    }
15452
15453    /**
15454     * @param HTMLPurifier_Config $config
15455     * @return bool
15456     */
15457    public function cleanup($config)
15458    {
15459        return false;
15460    }
15461}
15462
15463
15464
15465
15466
15467class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCache
15468{
15469
15470    /**
15471     * @param HTMLPurifier_Definition $def
15472     * @param HTMLPurifier_Config $config
15473     * @return int|bool
15474     */
15475    public function add($def, $config)
15476    {
15477        if (!$this->checkDefType($def)) {
15478            return;
15479        }
15480        $file = $this->generateFilePath($config);
15481        if (file_exists($file)) {
15482            return false;
15483        }
15484        if (!$this->_prepareDir($config)) {
15485            return false;
15486        }
15487        return $this->_write($file, serialize($def), $config);
15488    }
15489
15490    /**
15491     * @param HTMLPurifier_Definition $def
15492     * @param HTMLPurifier_Config $config
15493     * @return int|bool
15494     */
15495    public function set($def, $config)
15496    {
15497        if (!$this->checkDefType($def)) {
15498            return;
15499        }
15500        $file = $this->generateFilePath($config);
15501        if (!$this->_prepareDir($config)) {
15502            return false;
15503        }
15504        return $this->_write($file, serialize($def), $config);
15505    }
15506
15507    /**
15508     * @param HTMLPurifier_Definition $def
15509     * @param HTMLPurifier_Config $config
15510     * @return int|bool
15511     */
15512    public function replace($def, $config)
15513    {
15514        if (!$this->checkDefType($def)) {
15515            return;
15516        }
15517        $file = $this->generateFilePath($config);
15518        if (!file_exists($file)) {
15519            return false;
15520        }
15521        if (!$this->_prepareDir($config)) {
15522            return false;
15523        }
15524        return $this->_write($file, serialize($def), $config);
15525    }
15526
15527    /**
15528     * @param HTMLPurifier_Config $config
15529     * @return bool|HTMLPurifier_Config
15530     */
15531    public function get($config)
15532    {
15533        $file = $this->generateFilePath($config);
15534        if (!file_exists($file)) {
15535            return false;
15536        }
15537        return unserialize(file_get_contents($file));
15538    }
15539
15540    /**
15541     * @param HTMLPurifier_Config $config
15542     * @return bool
15543     */
15544    public function remove($config)
15545    {
15546        $file = $this->generateFilePath($config);
15547        if (!file_exists($file)) {
15548            return false;
15549        }
15550        return unlink($file);
15551    }
15552
15553    /**
15554     * @param HTMLPurifier_Config $config
15555     * @return bool
15556     */
15557    public function flush($config)
15558    {
15559        if (!$this->_prepareDir($config)) {
15560            return false;
15561        }
15562        $dir = $this->generateDirectoryPath($config);
15563        $dh = opendir($dir);
15564        // Apparently, on some versions of PHP, readdir will return
15565        // an empty string if you pass an invalid argument to readdir.
15566        // So you need this test.  See #49.
15567        if (false === $dh) {
15568            return false;
15569        }
15570        while (false !== ($filename = readdir($dh))) {
15571            if (empty($filename)) {
15572                continue;
15573            }
15574            if ($filename[0] === '.') {
15575                continue;
15576            }
15577            unlink($dir . '/' . $filename);
15578        }
15579        closedir($dh);
15580        return true;
15581    }
15582
15583    /**
15584     * @param HTMLPurifier_Config $config
15585     * @return bool
15586     */
15587    public function cleanup($config)
15588    {
15589        if (!$this->_prepareDir($config)) {
15590            return false;
15591        }
15592        $dir = $this->generateDirectoryPath($config);
15593        $dh = opendir($dir);
15594        // See #49 (and above).
15595        if (false === $dh) {
15596            return false;
15597        }
15598        while (false !== ($filename = readdir($dh))) {
15599            if (empty($filename)) {
15600                continue;
15601            }
15602            if ($filename[0] === '.') {
15603                continue;
15604            }
15605            $key = substr($filename, 0, strlen($filename) - 4);
15606            if ($this->isOld($key, $config)) {
15607                unlink($dir . '/' . $filename);
15608            }
15609        }
15610        closedir($dh);
15611        return true;
15612    }
15613
15614    /**
15615     * Generates the file path to the serial file corresponding to
15616     * the configuration and definition name
15617     * @param HTMLPurifier_Config $config
15618     * @return string
15619     * @todo Make protected
15620     */
15621    public function generateFilePath($config)
15622    {
15623        $key = $this->generateKey($config);
15624        return $this->generateDirectoryPath($config) . '/' . $key . '.ser';
15625    }
15626
15627    /**
15628     * Generates the path to the directory contain this cache's serial files
15629     * @param HTMLPurifier_Config $config
15630     * @return string
15631     * @note No trailing slash
15632     * @todo Make protected
15633     */
15634    public function generateDirectoryPath($config)
15635    {
15636        $base = $this->generateBaseDirectoryPath($config);
15637        return $base . '/' . $this->type;
15638    }
15639
15640    /**
15641     * Generates path to base directory that contains all definition type
15642     * serials
15643     * @param HTMLPurifier_Config $config
15644     * @return mixed|string
15645     * @todo Make protected
15646     */
15647    public function generateBaseDirectoryPath($config)
15648    {
15649        $base = $config->get('Cache.SerializerPath');
15650        $base = is_null($base) ? HTMLPURIFIER_PREFIX . '/HTMLPurifier/DefinitionCache/Serializer' : $base;
15651        return $base;
15652    }
15653
15654    /**
15655     * Convenience wrapper function for file_put_contents
15656     * @param string $file File name to write to
15657     * @param string $data Data to write into file
15658     * @param HTMLPurifier_Config $config
15659     * @return int|bool Number of bytes written if success, or false if failure.
15660     */
15661    private function _write($file, $data, $config)
15662    {
15663        $result = file_put_contents($file, $data);
15664        if ($result !== false) {
15665            // set permissions of the new file (no execute)
15666            $chmod = $config->get('Cache.SerializerPermissions');
15667            if ($chmod !== null) {
15668                chmod($file, $chmod & 0666);
15669            }
15670        }
15671        return $result;
15672    }
15673
15674    /**
15675     * Prepares the directory that this type stores the serials in
15676     * @param HTMLPurifier_Config $config
15677     * @return bool True if successful
15678     */
15679    private function _prepareDir($config)
15680    {
15681        $directory = $this->generateDirectoryPath($config);
15682        $chmod = $config->get('Cache.SerializerPermissions');
15683        if ($chmod === null) {
15684            if (!@mkdir($directory) && !is_dir($directory)) {
15685                trigger_error(
15686                    'Could not create directory ' . $directory . '',
15687                    E_USER_WARNING
15688                );
15689                return false;
15690            }
15691            return true;
15692        }
15693        if (!is_dir($directory)) {
15694            $base = $this->generateBaseDirectoryPath($config);
15695            if (!is_dir($base)) {
15696                trigger_error(
15697                    'Base directory ' . $base . ' does not exist,
15698                    please create or change using %Cache.SerializerPath',
15699                    E_USER_WARNING
15700                );
15701                return false;
15702            } elseif (!$this->_testPermissions($base, $chmod)) {
15703                return false;
15704            }
15705            if (!@mkdir($directory, $chmod) && !is_dir($directory)) {
15706                trigger_error(
15707                    'Could not create directory ' . $directory . '',
15708                    E_USER_WARNING
15709                );
15710                return false;
15711            }
15712            if (!$this->_testPermissions($directory, $chmod)) {
15713                return false;
15714            }
15715        } elseif (!$this->_testPermissions($directory, $chmod)) {
15716            return false;
15717        }
15718        return true;
15719    }
15720
15721    /**
15722     * Tests permissions on a directory and throws out friendly
15723     * error messages and attempts to chmod it itself if possible
15724     * @param string $dir Directory path
15725     * @param int $chmod Permissions
15726     * @return bool True if directory is writable
15727     */
15728    private function _testPermissions($dir, $chmod)
15729    {
15730        // early abort, if it is writable, everything is hunky-dory
15731        if (is_writable($dir)) {
15732            return true;
15733        }
15734        if (!is_dir($dir)) {
15735            // generally, you'll want to handle this beforehand
15736            // so a more specific error message can be given
15737            trigger_error(
15738                'Directory ' . $dir . ' does not exist',
15739                E_USER_WARNING
15740            );
15741            return false;
15742        }
15743        if (function_exists('posix_getuid') && $chmod !== null) {
15744            // POSIX system, we can give more specific advice
15745            if (fileowner($dir) === posix_getuid()) {
15746                // we can chmod it ourselves
15747                $chmod = $chmod | 0700;
15748                if (chmod($dir, $chmod)) {
15749                    return true;
15750                }
15751            } elseif (filegroup($dir) === posix_getgid()) {
15752                $chmod = $chmod | 0070;
15753            } else {
15754                // PHP's probably running as nobody, so we'll
15755                // need to give global permissions
15756                $chmod = $chmod | 0777;
15757            }
15758            trigger_error(
15759                'Directory ' . $dir . ' not writable, ' .
15760                'please chmod to ' . decoct($chmod),
15761                E_USER_WARNING
15762            );
15763        } else {
15764            // generic error message
15765            trigger_error(
15766                'Directory ' . $dir . ' not writable, ' .
15767                'please alter file permissions',
15768                E_USER_WARNING
15769            );
15770        }
15771        return false;
15772    }
15773}
15774
15775
15776
15777
15778
15779/**
15780 * Definition cache decorator class that cleans up the cache
15781 * whenever there is a cache miss.
15782 */
15783class HTMLPurifier_DefinitionCache_Decorator_Cleanup extends HTMLPurifier_DefinitionCache_Decorator
15784{
15785    /**
15786     * @type string
15787     */
15788    public $name = 'Cleanup';
15789
15790    /**
15791     * @return HTMLPurifier_DefinitionCache_Decorator_Cleanup
15792     */
15793    public function copy()
15794    {
15795        return new HTMLPurifier_DefinitionCache_Decorator_Cleanup();
15796    }
15797
15798    /**
15799     * @param HTMLPurifier_Definition $def
15800     * @param HTMLPurifier_Config $config
15801     * @return mixed
15802     */
15803    public function add($def, $config)
15804    {
15805        $status = parent::add($def, $config);
15806        if (!$status) {
15807            parent::cleanup($config);
15808        }
15809        return $status;
15810    }
15811
15812    /**
15813     * @param HTMLPurifier_Definition $def
15814     * @param HTMLPurifier_Config $config
15815     * @return mixed
15816     */
15817    public function set($def, $config)
15818    {
15819        $status = parent::set($def, $config);
15820        if (!$status) {
15821            parent::cleanup($config);
15822        }
15823        return $status;
15824    }
15825
15826    /**
15827     * @param HTMLPurifier_Definition $def
15828     * @param HTMLPurifier_Config $config
15829     * @return mixed
15830     */
15831    public function replace($def, $config)
15832    {
15833        $status = parent::replace($def, $config);
15834        if (!$status) {
15835            parent::cleanup($config);
15836        }
15837        return $status;
15838    }
15839
15840    /**
15841     * @param HTMLPurifier_Config $config
15842     * @return mixed
15843     */
15844    public function get($config)
15845    {
15846        $ret = parent::get($config);
15847        if (!$ret) {
15848            parent::cleanup($config);
15849        }
15850        return $ret;
15851    }
15852}
15853
15854
15855
15856
15857
15858/**
15859 * Definition cache decorator class that saves all cache retrievals
15860 * to PHP's memory; good for unit tests or circumstances where
15861 * there are lots of configuration objects floating around.
15862 */
15863class HTMLPurifier_DefinitionCache_Decorator_Memory extends HTMLPurifier_DefinitionCache_Decorator
15864{
15865    /**
15866     * @type array
15867     */
15868    protected $definitions;
15869
15870    /**
15871     * @type string
15872     */
15873    public $name = 'Memory';
15874
15875    /**
15876     * @return HTMLPurifier_DefinitionCache_Decorator_Memory
15877     */
15878    public function copy()
15879    {
15880        return new HTMLPurifier_DefinitionCache_Decorator_Memory();
15881    }
15882
15883    /**
15884     * @param HTMLPurifier_Definition $def
15885     * @param HTMLPurifier_Config $config
15886     * @return mixed
15887     */
15888    public function add($def, $config)
15889    {
15890        $status = parent::add($def, $config);
15891        if ($status) {
15892            $this->definitions[$this->generateKey($config)] = $def;
15893        }
15894        return $status;
15895    }
15896
15897    /**
15898     * @param HTMLPurifier_Definition $def
15899     * @param HTMLPurifier_Config $config
15900     * @return mixed
15901     */
15902    public function set($def, $config)
15903    {
15904        $status = parent::set($def, $config);
15905        if ($status) {
15906            $this->definitions[$this->generateKey($config)] = $def;
15907        }
15908        return $status;
15909    }
15910
15911    /**
15912     * @param HTMLPurifier_Definition $def
15913     * @param HTMLPurifier_Config $config
15914     * @return mixed
15915     */
15916    public function replace($def, $config)
15917    {
15918        $status = parent::replace($def, $config);
15919        if ($status) {
15920            $this->definitions[$this->generateKey($config)] = $def;
15921        }
15922        return $status;
15923    }
15924
15925    /**
15926     * @param HTMLPurifier_Config $config
15927     * @return mixed
15928     */
15929    public function get($config)
15930    {
15931        $key = $this->generateKey($config);
15932        if (isset($this->definitions[$key])) {
15933            return $this->definitions[$key];
15934        }
15935        $this->definitions[$key] = parent::get($config);
15936        return $this->definitions[$key];
15937    }
15938}
15939
15940
15941
15942
15943
15944/**
15945 * XHTML 1.1 Bi-directional Text Module, defines elements that
15946 * declare directionality of content. Text Extension Module.
15947 */
15948class HTMLPurifier_HTMLModule_Bdo extends HTMLPurifier_HTMLModule
15949{
15950
15951    /**
15952     * @type string
15953     */
15954    public $name = 'Bdo';
15955
15956    /**
15957     * @type array
15958     */
15959    public $attr_collections = array(
15960        'I18N' => array('dir' => false)
15961    );
15962
15963    /**
15964     * @param HTMLPurifier_Config $config
15965     */
15966    public function setup($config)
15967    {
15968        $bdo = $this->addElement(
15969            'bdo',
15970            'Inline',
15971            'Inline',
15972            array('Core', 'Lang'),
15973            array(
15974                'dir' => 'Enum#ltr,rtl', // required
15975                // The Abstract Module specification has the attribute
15976                // inclusions wrong for bdo: bdo allows Lang
15977            )
15978        );
15979        $bdo->attr_transform_post[] = new HTMLPurifier_AttrTransform_BdoDir();
15980
15981        $this->attr_collections['I18N']['dir'] = 'Enum#ltr,rtl';
15982    }
15983}
15984
15985
15986
15987
15988
15989class HTMLPurifier_HTMLModule_CommonAttributes extends HTMLPurifier_HTMLModule
15990{
15991    /**
15992     * @type string
15993     */
15994    public $name = 'CommonAttributes';
15995
15996    /**
15997     * @type array
15998     */
15999    public $attr_collections = array(
16000        'Core' => array(
16001            0 => array('Style'),
16002            // 'xml:space' => false,
16003            'class' => 'Class',
16004            'id' => 'ID',
16005            'title' => 'CDATA',
16006        ),
16007        'Lang' => array(),
16008        'I18N' => array(
16009            0 => array('Lang'), // proprietary, for xml:lang/lang
16010        ),
16011        'Common' => array(
16012            0 => array('Core', 'I18N')
16013        )
16014    );
16015}
16016
16017
16018
16019
16020
16021/**
16022 * XHTML 1.1 Edit Module, defines editing-related elements. Text Extension
16023 * Module.
16024 */
16025class HTMLPurifier_HTMLModule_Edit extends HTMLPurifier_HTMLModule
16026{
16027
16028    /**
16029     * @type string
16030     */
16031    public $name = 'Edit';
16032
16033    /**
16034     * @param HTMLPurifier_Config $config
16035     */
16036    public function setup($config)
16037    {
16038        $contents = 'Chameleon: #PCDATA | Inline ! #PCDATA | Flow';
16039        $attr = array(
16040            'cite' => 'URI',
16041            // 'datetime' => 'Datetime', // not implemented
16042        );
16043        $this->addElement('del', 'Inline', $contents, 'Common', $attr);
16044        $this->addElement('ins', 'Inline', $contents, 'Common', $attr);
16045    }
16046
16047    // HTML 4.01 specifies that ins/del must not contain block
16048    // elements when used in an inline context, chameleon is
16049    // a complicated workaround to acheive this effect
16050
16051    // Inline context ! Block context (exclamation mark is
16052    // separator, see getChildDef for parsing)
16053
16054    /**
16055     * @type bool
16056     */
16057    public $defines_child_def = true;
16058
16059    /**
16060     * @param HTMLPurifier_ElementDef $def
16061     * @return HTMLPurifier_ChildDef_Chameleon
16062     */
16063    public function getChildDef($def)
16064    {
16065        if ($def->content_model_type != 'chameleon') {
16066            return false;
16067        }
16068        $value = explode('!', $def->content_model);
16069        return new HTMLPurifier_ChildDef_Chameleon($value[0], $value[1]);
16070    }
16071}
16072
16073
16074
16075
16076
16077/**
16078 * XHTML 1.1 Forms module, defines all form-related elements found in HTML 4.
16079 */
16080class HTMLPurifier_HTMLModule_Forms extends HTMLPurifier_HTMLModule
16081{
16082    /**
16083     * @type string
16084     */
16085    public $name = 'Forms';
16086
16087    /**
16088     * @type bool
16089     */
16090    public $safe = false;
16091
16092    /**
16093     * @type array
16094     */
16095    public $content_sets = array(
16096        'Block' => 'Form',
16097        'Inline' => 'Formctrl',
16098    );
16099
16100    /**
16101     * @param HTMLPurifier_Config $config
16102     */
16103    public function setup($config)
16104    {
16105        $form = $this->addElement(
16106            'form',
16107            'Form',
16108            'Required: Heading | List | Block | fieldset',
16109            'Common',
16110            array(
16111                'accept' => 'ContentTypes',
16112                'accept-charset' => 'Charsets',
16113                'action*' => 'URI',
16114                'method' => 'Enum#get,post',
16115                // really ContentType, but these two are the only ones used today
16116                'enctype' => 'Enum#application/x-www-form-urlencoded,multipart/form-data',
16117            )
16118        );
16119        $form->excludes = array('form' => true);
16120
16121        $input = $this->addElement(
16122            'input',
16123            'Formctrl',
16124            'Empty',
16125            'Common',
16126            array(
16127                'accept' => 'ContentTypes',
16128                'accesskey' => 'Character',
16129                'alt' => 'Text',
16130                'checked' => 'Bool#checked',
16131                'disabled' => 'Bool#disabled',
16132                'maxlength' => 'Number',
16133                'name' => 'CDATA',
16134                'readonly' => 'Bool#readonly',
16135                'size' => 'Number',
16136                'src' => 'URI#embedded',
16137                'tabindex' => 'Number',
16138                'type' => 'Enum#text,password,checkbox,button,radio,submit,reset,file,hidden,image',
16139                'value' => 'CDATA',
16140            )
16141        );
16142        $input->attr_transform_post[] = new HTMLPurifier_AttrTransform_Input();
16143
16144        $this->addElement(
16145            'select',
16146            'Formctrl',
16147            'Required: optgroup | option',
16148            'Common',
16149            array(
16150                'disabled' => 'Bool#disabled',
16151                'multiple' => 'Bool#multiple',
16152                'name' => 'CDATA',
16153                'size' => 'Number',
16154                'tabindex' => 'Number',
16155            )
16156        );
16157
16158        $this->addElement(
16159            'option',
16160            false,
16161            'Optional: #PCDATA',
16162            'Common',
16163            array(
16164                'disabled' => 'Bool#disabled',
16165                'label' => 'Text',
16166                'selected' => 'Bool#selected',
16167                'value' => 'CDATA',
16168            )
16169        );
16170        // It's illegal for there to be more than one selected, but not
16171        // be multiple. Also, no selected means undefined behavior. This might
16172        // be difficult to implement; perhaps an injector, or a context variable.
16173
16174        $textarea = $this->addElement(
16175            'textarea',
16176            'Formctrl',
16177            'Optional: #PCDATA',
16178            'Common',
16179            array(
16180                'accesskey' => 'Character',
16181                'cols*' => 'Number',
16182                'disabled' => 'Bool#disabled',
16183                'name' => 'CDATA',
16184                'readonly' => 'Bool#readonly',
16185                'rows*' => 'Number',
16186                'tabindex' => 'Number',
16187            )
16188        );
16189        $textarea->attr_transform_pre[] = new HTMLPurifier_AttrTransform_Textarea();
16190
16191        $button = $this->addElement(
16192            'button',
16193            'Formctrl',
16194            'Optional: #PCDATA | Heading | List | Block | Inline',
16195            'Common',
16196            array(
16197                'accesskey' => 'Character',
16198                'disabled' => 'Bool#disabled',
16199                'name' => 'CDATA',
16200                'tabindex' => 'Number',
16201                'type' => 'Enum#button,submit,reset',
16202                'value' => 'CDATA',
16203            )
16204        );
16205
16206        // For exclusions, ideally we'd specify content sets, not literal elements
16207        $button->excludes = $this->makeLookup(
16208            'form',
16209            'fieldset', // Form
16210            'input',
16211            'select',
16212            'textarea',
16213            'label',
16214            'button', // Formctrl
16215            'a', // as per HTML 4.01 spec, this is omitted by modularization
16216            'isindex',
16217            'iframe' // legacy items
16218        );
16219
16220        // Extra exclusion: img usemap="" is not permitted within this element.
16221        // We'll omit this for now, since we don't have any good way of
16222        // indicating it yet.
16223
16224        // This is HIGHLY user-unfriendly; we need a custom child-def for this
16225        $this->addElement('fieldset', 'Form', 'Custom: (#WS?,legend,(Flow|#PCDATA)*)', 'Common');
16226
16227        $label = $this->addElement(
16228            'label',
16229            'Formctrl',
16230            'Optional: #PCDATA | Inline',
16231            'Common',
16232            array(
16233                'accesskey' => 'Character',
16234                // 'for' => 'IDREF', // IDREF not implemented, cannot allow
16235            )
16236        );
16237        $label->excludes = array('label' => true);
16238
16239        $this->addElement(
16240            'legend',
16241            false,
16242            'Optional: #PCDATA | Inline',
16243            'Common',
16244            array(
16245                'accesskey' => 'Character',
16246            )
16247        );
16248
16249        $this->addElement(
16250            'optgroup',
16251            false,
16252            'Required: option',
16253            'Common',
16254            array(
16255                'disabled' => 'Bool#disabled',
16256                'label*' => 'Text',
16257            )
16258        );
16259        // Don't forget an injector for <isindex>. This one's a little complex
16260        // because it maps to multiple elements.
16261    }
16262}
16263
16264
16265
16266
16267
16268/**
16269 * XHTML 1.1 Hypertext Module, defines hypertext links. Core Module.
16270 */
16271class HTMLPurifier_HTMLModule_Hypertext extends HTMLPurifier_HTMLModule
16272{
16273
16274    /**
16275     * @type string
16276     */
16277    public $name = 'Hypertext';
16278
16279    /**
16280     * @param HTMLPurifier_Config $config
16281     */
16282    public function setup($config)
16283    {
16284        $a = $this->addElement(
16285            'a',
16286            'Inline',
16287            'Inline',
16288            'Common',
16289            array(
16290                // 'accesskey' => 'Character',
16291                // 'charset' => 'Charset',
16292                'href' => 'URI',
16293                // 'hreflang' => 'LanguageCode',
16294                'rel' => new HTMLPurifier_AttrDef_HTML_LinkTypes('rel'),
16295                'rev' => new HTMLPurifier_AttrDef_HTML_LinkTypes('rev'),
16296                // 'tabindex' => 'Number',
16297                // 'type' => 'ContentType',
16298            )
16299        );
16300        $a->formatting = true;
16301        $a->excludes = array('a' => true);
16302    }
16303}
16304
16305
16306
16307
16308
16309/**
16310 * XHTML 1.1 Iframe Module provides inline frames.
16311 *
16312 * @note This module is not considered safe unless an Iframe
16313 * whitelisting mechanism is specified.  Currently, the only
16314 * such mechanism is %URL.SafeIframeRegexp
16315 */
16316class HTMLPurifier_HTMLModule_Iframe extends HTMLPurifier_HTMLModule
16317{
16318
16319    /**
16320     * @type string
16321     */
16322    public $name = 'Iframe';
16323
16324    /**
16325     * @type bool
16326     */
16327    public $safe = false;
16328
16329    /**
16330     * @param HTMLPurifier_Config $config
16331     */
16332    public function setup($config)
16333    {
16334        if ($config->get('HTML.SafeIframe')) {
16335            $this->safe = true;
16336        }
16337        $this->addElement(
16338            'iframe',
16339            'Inline',
16340            'Flow',
16341            'Common',
16342            array(
16343                'src' => 'URI#embedded',
16344                'width' => 'Length',
16345                'height' => 'Length',
16346                'name' => 'ID',
16347                'scrolling' => 'Enum#yes,no,auto',
16348                'frameborder' => 'Enum#0,1',
16349                'longdesc' => 'URI',
16350                'marginheight' => 'Pixels',
16351                'marginwidth' => 'Pixels',
16352            )
16353        );
16354    }
16355}
16356
16357
16358
16359
16360
16361/**
16362 * XHTML 1.1 Image Module provides basic image embedding.
16363 * @note There is specialized code for removing empty images in
16364 *       HTMLPurifier_Strategy_RemoveForeignElements
16365 */
16366class HTMLPurifier_HTMLModule_Image extends HTMLPurifier_HTMLModule
16367{
16368
16369    /**
16370     * @type string
16371     */
16372    public $name = 'Image';
16373
16374    /**
16375     * @param HTMLPurifier_Config $config
16376     */
16377    public function setup($config)
16378    {
16379        $max = $config->get('HTML.MaxImgLength');
16380        $img = $this->addElement(
16381            'img',
16382            'Inline',
16383            'Empty',
16384            'Common',
16385            array(
16386                'alt*' => 'Text',
16387                // According to the spec, it's Length, but percents can
16388                // be abused, so we allow only Pixels.
16389                'height' => 'Pixels#' . $max,
16390                'width' => 'Pixels#' . $max,
16391                'longdesc' => 'URI',
16392                'src*' => new HTMLPurifier_AttrDef_URI(true), // embedded
16393            )
16394        );
16395        if ($max === null || $config->get('HTML.Trusted')) {
16396            $img->attr['height'] =
16397            $img->attr['width'] = 'Length';
16398        }
16399
16400        // kind of strange, but splitting things up would be inefficient
16401        $img->attr_transform_pre[] =
16402        $img->attr_transform_post[] =
16403            new HTMLPurifier_AttrTransform_ImgRequired();
16404    }
16405}
16406
16407
16408
16409
16410
16411/**
16412 * XHTML 1.1 Legacy module defines elements that were previously
16413 * deprecated.
16414 *
16415 * @note Not all legacy elements have been implemented yet, which
16416 *       is a bit of a reverse problem as compared to browsers! In
16417 *       addition, this legacy module may implement a bit more than
16418 *       mandated by XHTML 1.1.
16419 *
16420 * This module can be used in combination with TransformToStrict in order
16421 * to transform as many deprecated elements as possible, but retain
16422 * questionably deprecated elements that do not have good alternatives
16423 * as well as transform elements that don't have an implementation.
16424 * See docs/ref-strictness.txt for more details.
16425 */
16426
16427class HTMLPurifier_HTMLModule_Legacy extends HTMLPurifier_HTMLModule
16428{
16429    /**
16430     * @type string
16431     */
16432    public $name = 'Legacy';
16433
16434    /**
16435     * @param HTMLPurifier_Config $config
16436     */
16437    public function setup($config)
16438    {
16439        $this->addElement(
16440            'basefont',
16441            'Inline',
16442            'Empty',
16443            null,
16444            array(
16445                'color' => 'Color',
16446                'face' => 'Text', // extremely broad, we should
16447                'size' => 'Text', // tighten it
16448                'id' => 'ID'
16449            )
16450        );
16451        $this->addElement('center', 'Block', 'Flow', 'Common');
16452        $this->addElement(
16453            'dir',
16454            'Block',
16455            'Required: li',
16456            'Common',
16457            array(
16458                'compact' => 'Bool#compact'
16459            )
16460        );
16461        $this->addElement(
16462            'font',
16463            'Inline',
16464            'Inline',
16465            array('Core', 'I18N'),
16466            array(
16467                'color' => 'Color',
16468                'face' => 'Text', // extremely broad, we should
16469                'size' => 'Text', // tighten it
16470            )
16471        );
16472        $this->addElement(
16473            'menu',
16474            'Block',
16475            'Required: li',
16476            'Common',
16477            array(
16478                'compact' => 'Bool#compact'
16479            )
16480        );
16481
16482        $s = $this->addElement('s', 'Inline', 'Inline', 'Common');
16483        $s->formatting = true;
16484
16485        $strike = $this->addElement('strike', 'Inline', 'Inline', 'Common');
16486        $strike->formatting = true;
16487
16488        $u = $this->addElement('u', 'Inline', 'Inline', 'Common');
16489        $u->formatting = true;
16490
16491        // setup modifications to old elements
16492
16493        $align = 'Enum#left,right,center,justify';
16494
16495        $address = $this->addBlankElement('address');
16496        $address->content_model = 'Inline | #PCDATA | p';
16497        $address->content_model_type = 'optional';
16498        $address->child = false;
16499
16500        $blockquote = $this->addBlankElement('blockquote');
16501        $blockquote->content_model = 'Flow | #PCDATA';
16502        $blockquote->content_model_type = 'optional';
16503        $blockquote->child = false;
16504
16505        $br = $this->addBlankElement('br');
16506        $br->attr['clear'] = 'Enum#left,all,right,none';
16507
16508        $caption = $this->addBlankElement('caption');
16509        $caption->attr['align'] = 'Enum#top,bottom,left,right';
16510
16511        $div = $this->addBlankElement('div');
16512        $div->attr['align'] = $align;
16513
16514        $dl = $this->addBlankElement('dl');
16515        $dl->attr['compact'] = 'Bool#compact';
16516
16517        for ($i = 1; $i <= 6; $i++) {
16518            $h = $this->addBlankElement("h$i");
16519            $h->attr['align'] = $align;
16520        }
16521
16522        $hr = $this->addBlankElement('hr');
16523        $hr->attr['align'] = $align;
16524        $hr->attr['noshade'] = 'Bool#noshade';
16525        $hr->attr['size'] = 'Pixels';
16526        $hr->attr['width'] = 'Length';
16527
16528        $img = $this->addBlankElement('img');
16529        $img->attr['align'] = 'IAlign';
16530        $img->attr['border'] = 'Pixels';
16531        $img->attr['hspace'] = 'Pixels';
16532        $img->attr['vspace'] = 'Pixels';
16533
16534        // figure out this integer business
16535
16536        $li = $this->addBlankElement('li');
16537        $li->attr['value'] = new HTMLPurifier_AttrDef_Integer();
16538        $li->attr['type'] = 'Enum#s:1,i,I,a,A,disc,square,circle';
16539
16540        $ol = $this->addBlankElement('ol');
16541        $ol->attr['compact'] = 'Bool#compact';
16542        $ol->attr['start'] = new HTMLPurifier_AttrDef_Integer();
16543        $ol->attr['type'] = 'Enum#s:1,i,I,a,A';
16544
16545        $p = $this->addBlankElement('p');
16546        $p->attr['align'] = $align;
16547
16548        $pre = $this->addBlankElement('pre');
16549        $pre->attr['width'] = 'Number';
16550
16551        // script omitted
16552
16553        $table = $this->addBlankElement('table');
16554        $table->attr['align'] = 'Enum#left,center,right';
16555        $table->attr['bgcolor'] = 'Color';
16556
16557        $tr = $this->addBlankElement('tr');
16558        $tr->attr['bgcolor'] = 'Color';
16559
16560        $th = $this->addBlankElement('th');
16561        $th->attr['bgcolor'] = 'Color';
16562        $th->attr['height'] = 'Length';
16563        $th->attr['nowrap'] = 'Bool#nowrap';
16564        $th->attr['width'] = 'Length';
16565
16566        $td = $this->addBlankElement('td');
16567        $td->attr['bgcolor'] = 'Color';
16568        $td->attr['height'] = 'Length';
16569        $td->attr['nowrap'] = 'Bool#nowrap';
16570        $td->attr['width'] = 'Length';
16571
16572        $ul = $this->addBlankElement('ul');
16573        $ul->attr['compact'] = 'Bool#compact';
16574        $ul->attr['type'] = 'Enum#square,disc,circle';
16575
16576        // "safe" modifications to "unsafe" elements
16577        // WARNING: If you want to add support for an unsafe, legacy
16578        // attribute, make a new TrustedLegacy module with the trusted
16579        // bit set appropriately
16580
16581        $form = $this->addBlankElement('form');
16582        $form->content_model = 'Flow | #PCDATA';
16583        $form->content_model_type = 'optional';
16584        $form->attr['target'] = 'FrameTarget';
16585
16586        $input = $this->addBlankElement('input');
16587        $input->attr['align'] = 'IAlign';
16588
16589        $legend = $this->addBlankElement('legend');
16590        $legend->attr['align'] = 'LAlign';
16591    }
16592}
16593
16594
16595
16596
16597
16598/**
16599 * XHTML 1.1 List Module, defines list-oriented elements. Core Module.
16600 */
16601class HTMLPurifier_HTMLModule_List extends HTMLPurifier_HTMLModule
16602{
16603    /**
16604     * @type string
16605     */
16606    public $name = 'List';
16607
16608    // According to the abstract schema, the List content set is a fully formed
16609    // one or more expr, but it invariably occurs in an optional declaration
16610    // so we're not going to do that subtlety. It might cause trouble
16611    // if a user defines "List" and expects that multiple lists are
16612    // allowed to be specified, but then again, that's not very intuitive.
16613    // Furthermore, the actual XML Schema may disagree. Regardless,
16614    // we don't have support for such nested expressions without using
16615    // the incredibly inefficient and draconic Custom ChildDef.
16616
16617    /**
16618     * @type array
16619     */
16620    public $content_sets = array('Flow' => 'List');
16621
16622    /**
16623     * @param HTMLPurifier_Config $config
16624     */
16625    public function setup($config)
16626    {
16627        $ol = $this->addElement('ol', 'List', new HTMLPurifier_ChildDef_List(), 'Common');
16628        $ul = $this->addElement('ul', 'List', new HTMLPurifier_ChildDef_List(), 'Common');
16629        // XXX The wrap attribute is handled by MakeWellFormed.  This is all
16630        // quite unsatisfactory, because we generated this
16631        // *specifically* for lists, and now a big chunk of the handling
16632        // is done properly by the List ChildDef.  So actually, we just
16633        // want enough information to make autoclosing work properly,
16634        // and then hand off the tricky stuff to the ChildDef.
16635        $ol->wrap = 'li';
16636        $ul->wrap = 'li';
16637        $this->addElement('dl', 'List', 'Required: dt | dd', 'Common');
16638
16639        $this->addElement('li', false, 'Flow', 'Common');
16640
16641        $this->addElement('dd', false, 'Flow', 'Common');
16642        $this->addElement('dt', false, 'Inline', 'Common');
16643    }
16644}
16645
16646
16647
16648
16649
16650class HTMLPurifier_HTMLModule_Name extends HTMLPurifier_HTMLModule
16651{
16652    /**
16653     * @type string
16654     */
16655    public $name = 'Name';
16656
16657    /**
16658     * @param HTMLPurifier_Config $config
16659     */
16660    public function setup($config)
16661    {
16662        $elements = array('a', 'applet', 'form', 'frame', 'iframe', 'img', 'map');
16663        foreach ($elements as $name) {
16664            $element = $this->addBlankElement($name);
16665            $element->attr['name'] = 'CDATA';
16666            if (!$config->get('HTML.Attr.Name.UseCDATA')) {
16667                $element->attr_transform_post[] = new HTMLPurifier_AttrTransform_NameSync();
16668            }
16669        }
16670    }
16671}
16672
16673
16674
16675
16676
16677/**
16678 * Module adds the nofollow attribute transformation to a tags.  It
16679 * is enabled by HTML.Nofollow
16680 */
16681class HTMLPurifier_HTMLModule_Nofollow extends HTMLPurifier_HTMLModule
16682{
16683
16684    /**
16685     * @type string
16686     */
16687    public $name = 'Nofollow';
16688
16689    /**
16690     * @param HTMLPurifier_Config $config
16691     */
16692    public function setup($config)
16693    {
16694        $a = $this->addBlankElement('a');
16695        $a->attr_transform_post[] = new HTMLPurifier_AttrTransform_Nofollow();
16696    }
16697}
16698
16699
16700
16701
16702
16703class HTMLPurifier_HTMLModule_NonXMLCommonAttributes extends HTMLPurifier_HTMLModule
16704{
16705    /**
16706     * @type string
16707     */
16708    public $name = 'NonXMLCommonAttributes';
16709
16710    /**
16711     * @type array
16712     */
16713    public $attr_collections = array(
16714        'Lang' => array(
16715            'lang' => 'LanguageCode',
16716        )
16717    );
16718}
16719
16720
16721
16722
16723
16724/**
16725 * XHTML 1.1 Object Module, defines elements for generic object inclusion
16726 * @warning Users will commonly use <embed> to cater to legacy browsers: this
16727 *      module does not allow this sort of behavior
16728 */
16729class HTMLPurifier_HTMLModule_Object extends HTMLPurifier_HTMLModule
16730{
16731    /**
16732     * @type string
16733     */
16734    public $name = 'Object';
16735
16736    /**
16737     * @type bool
16738     */
16739    public $safe = false;
16740
16741    /**
16742     * @param HTMLPurifier_Config $config
16743     */
16744    public function setup($config)
16745    {
16746        $this->addElement(
16747            'object',
16748            'Inline',
16749            'Optional: #PCDATA | Flow | param',
16750            'Common',
16751            array(
16752                'archive' => 'URI',
16753                'classid' => 'URI',
16754                'codebase' => 'URI',
16755                'codetype' => 'Text',
16756                'data' => 'URI',
16757                'declare' => 'Bool#declare',
16758                'height' => 'Length',
16759                'name' => 'CDATA',
16760                'standby' => 'Text',
16761                'tabindex' => 'Number',
16762                'type' => 'ContentType',
16763                'width' => 'Length'
16764            )
16765        );
16766
16767        $this->addElement(
16768            'param',
16769            false,
16770            'Empty',
16771            null,
16772            array(
16773                'id' => 'ID',
16774                'name*' => 'Text',
16775                'type' => 'Text',
16776                'value' => 'Text',
16777                'valuetype' => 'Enum#data,ref,object'
16778            )
16779        );
16780    }
16781}
16782
16783
16784
16785
16786
16787/**
16788 * XHTML 1.1 Presentation Module, defines simple presentation-related
16789 * markup. Text Extension Module.
16790 * @note The official XML Schema and DTD specs further divide this into
16791 *       two modules:
16792 *          - Block Presentation (hr)
16793 *          - Inline Presentation (b, big, i, small, sub, sup, tt)
16794 *       We have chosen not to heed this distinction, as content_sets
16795 *       provides satisfactory disambiguation.
16796 */
16797class HTMLPurifier_HTMLModule_Presentation extends HTMLPurifier_HTMLModule
16798{
16799
16800    /**
16801     * @type string
16802     */
16803    public $name = 'Presentation';
16804
16805    /**
16806     * @param HTMLPurifier_Config $config
16807     */
16808    public function setup($config)
16809    {
16810        $this->addElement('hr', 'Block', 'Empty', 'Common');
16811        $this->addElement('sub', 'Inline', 'Inline', 'Common');
16812        $this->addElement('sup', 'Inline', 'Inline', 'Common');
16813        $b = $this->addElement('b', 'Inline', 'Inline', 'Common');
16814        $b->formatting = true;
16815        $big = $this->addElement('big', 'Inline', 'Inline', 'Common');
16816        $big->formatting = true;
16817        $i = $this->addElement('i', 'Inline', 'Inline', 'Common');
16818        $i->formatting = true;
16819        $small = $this->addElement('small', 'Inline', 'Inline', 'Common');
16820        $small->formatting = true;
16821        $tt = $this->addElement('tt', 'Inline', 'Inline', 'Common');
16822        $tt->formatting = true;
16823    }
16824}
16825
16826
16827
16828
16829
16830/**
16831 * Module defines proprietary tags and attributes in HTML.
16832 * @warning If this module is enabled, standards-compliance is off!
16833 */
16834class HTMLPurifier_HTMLModule_Proprietary extends HTMLPurifier_HTMLModule
16835{
16836    /**
16837     * @type string
16838     */
16839    public $name = 'Proprietary';
16840
16841    /**
16842     * @param HTMLPurifier_Config $config
16843     */
16844    public function setup($config)
16845    {
16846        $this->addElement(
16847            'marquee',
16848            'Inline',
16849            'Flow',
16850            'Common',
16851            array(
16852                'direction' => 'Enum#left,right,up,down',
16853                'behavior' => 'Enum#alternate',
16854                'width' => 'Length',
16855                'height' => 'Length',
16856                'scrolldelay' => 'Number',
16857                'scrollamount' => 'Number',
16858                'loop' => 'Number',
16859                'bgcolor' => 'Color',
16860                'hspace' => 'Pixels',
16861                'vspace' => 'Pixels',
16862            )
16863        );
16864    }
16865}
16866
16867
16868
16869
16870
16871/**
16872 * XHTML 1.1 Ruby Annotation Module, defines elements that indicate
16873 * short runs of text alongside base text for annotation or pronounciation.
16874 */
16875class HTMLPurifier_HTMLModule_Ruby extends HTMLPurifier_HTMLModule
16876{
16877
16878    /**
16879     * @type string
16880     */
16881    public $name = 'Ruby';
16882
16883    /**
16884     * @param HTMLPurifier_Config $config
16885     */
16886    public function setup($config)
16887    {
16888        $this->addElement(
16889            'ruby',
16890            'Inline',
16891            'Custom: ((rb, (rt | (rp, rt, rp))) | (rbc, rtc, rtc?))',
16892            'Common'
16893        );
16894        $this->addElement('rbc', false, 'Required: rb', 'Common');
16895        $this->addElement('rtc', false, 'Required: rt', 'Common');
16896        $rb = $this->addElement('rb', false, 'Inline', 'Common');
16897        $rb->excludes = array('ruby' => true);
16898        $rt = $this->addElement('rt', false, 'Inline', 'Common', array('rbspan' => 'Number'));
16899        $rt->excludes = array('ruby' => true);
16900        $this->addElement('rp', false, 'Optional: #PCDATA', 'Common');
16901    }
16902}
16903
16904
16905
16906
16907
16908/**
16909 * A "safe" embed module. See SafeObject. This is a proprietary element.
16910 */
16911class HTMLPurifier_HTMLModule_SafeEmbed extends HTMLPurifier_HTMLModule
16912{
16913    /**
16914     * @type string
16915     */
16916    public $name = 'SafeEmbed';
16917
16918    /**
16919     * @param HTMLPurifier_Config $config
16920     */
16921    public function setup($config)
16922    {
16923        $max = $config->get('HTML.MaxImgLength');
16924        $embed = $this->addElement(
16925            'embed',
16926            'Inline',
16927            'Empty',
16928            'Common',
16929            array(
16930                'src*' => 'URI#embedded',
16931                'type' => 'Enum#application/x-shockwave-flash',
16932                'width' => 'Pixels#' . $max,
16933                'height' => 'Pixels#' . $max,
16934                'allowscriptaccess' => 'Enum#never',
16935                'allownetworking' => 'Enum#internal',
16936                'flashvars' => 'Text',
16937                'wmode' => 'Enum#window,transparent,opaque',
16938                'name' => 'ID',
16939            )
16940        );
16941        $embed->attr_transform_post[] = new HTMLPurifier_AttrTransform_SafeEmbed();
16942    }
16943}
16944
16945
16946
16947
16948
16949/**
16950 * A "safe" object module. In theory, objects permitted by this module will
16951 * be safe, and untrusted users can be allowed to embed arbitrary flash objects
16952 * (maybe other types too, but only Flash is supported as of right now).
16953 * Highly experimental.
16954 */
16955class HTMLPurifier_HTMLModule_SafeObject extends HTMLPurifier_HTMLModule
16956{
16957    /**
16958     * @type string
16959     */
16960    public $name = 'SafeObject';
16961
16962    /**
16963     * @param HTMLPurifier_Config $config
16964     */
16965    public function setup($config)
16966    {
16967        // These definitions are not intrinsically safe: the attribute transforms
16968        // are a vital part of ensuring safety.
16969
16970        $max = $config->get('HTML.MaxImgLength');
16971        $object = $this->addElement(
16972            'object',
16973            'Inline',
16974            'Optional: param | Flow | #PCDATA',
16975            'Common',
16976            array(
16977                // While technically not required by the spec, we're forcing
16978                // it to this value.
16979                'type' => 'Enum#application/x-shockwave-flash',
16980                'width' => 'Pixels#' . $max,
16981                'height' => 'Pixels#' . $max,
16982                'data' => 'URI#embedded',
16983                'codebase' => new HTMLPurifier_AttrDef_Enum(
16984                    array(
16985                        'http://download.macromedia.com/pub/shockwave/cabs/flash/swflash.cab#version=6,0,40,0'
16986                    )
16987                ),
16988            )
16989        );
16990        $object->attr_transform_post[] = new HTMLPurifier_AttrTransform_SafeObject();
16991
16992        $param = $this->addElement(
16993            'param',
16994            false,
16995            'Empty',
16996            false,
16997            array(
16998                'id' => 'ID',
16999                'name*' => 'Text',
17000                'value' => 'Text'
17001            )
17002        );
17003        $param->attr_transform_post[] = new HTMLPurifier_AttrTransform_SafeParam();
17004        $this->info_injector[] = 'SafeObject';
17005    }
17006}
17007
17008
17009
17010
17011
17012/**
17013 * A "safe" script module. No inline JS is allowed, and pointed to JS
17014 * files must match whitelist.
17015 */
17016class HTMLPurifier_HTMLModule_SafeScripting extends HTMLPurifier_HTMLModule
17017{
17018    /**
17019     * @type string
17020     */
17021    public $name = 'SafeScripting';
17022
17023    /**
17024     * @param HTMLPurifier_Config $config
17025     */
17026    public function setup($config)
17027    {
17028        // These definitions are not intrinsically safe: the attribute transforms
17029        // are a vital part of ensuring safety.
17030
17031        $allowed = $config->get('HTML.SafeScripting');
17032        $script = $this->addElement(
17033            'script',
17034            'Inline',
17035            'Empty',
17036            null,
17037            array(
17038                // While technically not required by the spec, we're forcing
17039                // it to this value.
17040                'type' => 'Enum#text/javascript',
17041                'src*' => new HTMLPurifier_AttrDef_Enum(array_keys($allowed))
17042            )
17043        );
17044        $script->attr_transform_pre[] =
17045        $script->attr_transform_post[] = new HTMLPurifier_AttrTransform_ScriptRequired();
17046    }
17047}
17048
17049
17050
17051
17052
17053/*
17054
17055WARNING: THIS MODULE IS EXTREMELY DANGEROUS AS IT ENABLES INLINE SCRIPTING
17056INSIDE HTML PURIFIER DOCUMENTS. USE ONLY WITH TRUSTED USER INPUT!!!
17057
17058*/
17059
17060/**
17061 * XHTML 1.1 Scripting module, defines elements that are used to contain
17062 * information pertaining to executable scripts or the lack of support
17063 * for executable scripts.
17064 * @note This module does not contain inline scripting elements
17065 */
17066class HTMLPurifier_HTMLModule_Scripting extends HTMLPurifier_HTMLModule
17067{
17068    /**
17069     * @type string
17070     */
17071    public $name = 'Scripting';
17072
17073    /**
17074     * @type array
17075     */
17076    public $elements = array('script', 'noscript');
17077
17078    /**
17079     * @type array
17080     */
17081    public $content_sets = array('Block' => 'script | noscript', 'Inline' => 'script | noscript');
17082
17083    /**
17084     * @type bool
17085     */
17086    public $safe = false;
17087
17088    /**
17089     * @param HTMLPurifier_Config $config
17090     */
17091    public function setup($config)
17092    {
17093        // TODO: create custom child-definition for noscript that
17094        // auto-wraps stray #PCDATA in a similar manner to
17095        // blockquote's custom definition (we would use it but
17096        // blockquote's contents are optional while noscript's contents
17097        // are required)
17098
17099        // TODO: convert this to new syntax, main problem is getting
17100        // both content sets working
17101
17102        // In theory, this could be safe, but I don't see any reason to
17103        // allow it.
17104        $this->info['noscript'] = new HTMLPurifier_ElementDef();
17105        $this->info['noscript']->attr = array(0 => array('Common'));
17106        $this->info['noscript']->content_model = 'Heading | List | Block';
17107        $this->info['noscript']->content_model_type = 'required';
17108
17109        $this->info['script'] = new HTMLPurifier_ElementDef();
17110        $this->info['script']->attr = array(
17111            'defer' => new HTMLPurifier_AttrDef_Enum(array('defer')),
17112            'src' => new HTMLPurifier_AttrDef_URI(true),
17113            'type' => new HTMLPurifier_AttrDef_Enum(array('text/javascript'))
17114        );
17115        $this->info['script']->content_model = '#PCDATA';
17116        $this->info['script']->content_model_type = 'optional';
17117        $this->info['script']->attr_transform_pre[] =
17118        $this->info['script']->attr_transform_post[] =
17119            new HTMLPurifier_AttrTransform_ScriptRequired();
17120    }
17121}
17122
17123
17124
17125
17126
17127/**
17128 * XHTML 1.1 Edit Module, defines editing-related elements. Text Extension
17129 * Module.
17130 */
17131class HTMLPurifier_HTMLModule_StyleAttribute extends HTMLPurifier_HTMLModule
17132{
17133    /**
17134     * @type string
17135     */
17136    public $name = 'StyleAttribute';
17137
17138    /**
17139     * @type array
17140     */
17141    public $attr_collections = array(
17142        // The inclusion routine differs from the Abstract Modules but
17143        // is in line with the DTD and XML Schemas.
17144        'Style' => array('style' => false), // see constructor
17145        'Core' => array(0 => array('Style'))
17146    );
17147
17148    /**
17149     * @param HTMLPurifier_Config $config
17150     */
17151    public function setup($config)
17152    {
17153        $this->attr_collections['Style']['style'] = new HTMLPurifier_AttrDef_CSS();
17154    }
17155}
17156
17157
17158
17159
17160
17161/**
17162 * XHTML 1.1 Tables Module, fully defines accessible table elements.
17163 */
17164class HTMLPurifier_HTMLModule_Tables extends HTMLPurifier_HTMLModule
17165{
17166    /**
17167     * @type string
17168     */
17169    public $name = 'Tables';
17170
17171    /**
17172     * @param HTMLPurifier_Config $config
17173     */
17174    public function setup($config)
17175    {
17176        $this->addElement('caption', false, 'Inline', 'Common');
17177
17178        $this->addElement(
17179            'table',
17180            'Block',
17181            new HTMLPurifier_ChildDef_Table(),
17182            'Common',
17183            array(
17184                'border' => 'Pixels',
17185                'cellpadding' => 'Length',
17186                'cellspacing' => 'Length',
17187                'frame' => 'Enum#void,above,below,hsides,lhs,rhs,vsides,box,border',
17188                'rules' => 'Enum#none,groups,rows,cols,all',
17189                'summary' => 'Text',
17190                'width' => 'Length'
17191            )
17192        );
17193
17194        // common attributes
17195        $cell_align = array(
17196            'align' => 'Enum#left,center,right,justify,char',
17197            'charoff' => 'Length',
17198            'valign' => 'Enum#top,middle,bottom,baseline',
17199        );
17200
17201        $cell_t = array_merge(
17202            array(
17203                'abbr' => 'Text',
17204                'colspan' => 'Number',
17205                'rowspan' => 'Number',
17206                // Apparently, as of HTML5 this attribute only applies
17207                // to 'th' elements.
17208                'scope' => 'Enum#row,col,rowgroup,colgroup',
17209            ),
17210            $cell_align
17211        );
17212        $this->addElement('td', false, 'Flow', 'Common', $cell_t);
17213        $this->addElement('th', false, 'Flow', 'Common', $cell_t);
17214
17215        $this->addElement('tr', false, 'Required: td | th', 'Common', $cell_align);
17216
17217        $cell_col = array_merge(
17218            array(
17219                'span' => 'Number',
17220                'width' => 'MultiLength',
17221            ),
17222            $cell_align
17223        );
17224        $this->addElement('col', false, 'Empty', 'Common', $cell_col);
17225        $this->addElement('colgroup', false, 'Optional: col', 'Common', $cell_col);
17226
17227        $this->addElement('tbody', false, 'Required: tr', 'Common', $cell_align);
17228        $this->addElement('thead', false, 'Required: tr', 'Common', $cell_align);
17229        $this->addElement('tfoot', false, 'Required: tr', 'Common', $cell_align);
17230    }
17231}
17232
17233
17234
17235
17236
17237/**
17238 * XHTML 1.1 Target Module, defines target attribute in link elements.
17239 */
17240class HTMLPurifier_HTMLModule_Target extends HTMLPurifier_HTMLModule
17241{
17242    /**
17243     * @type string
17244     */
17245    public $name = 'Target';
17246
17247    /**
17248     * @param HTMLPurifier_Config $config
17249     */
17250    public function setup($config)
17251    {
17252        $elements = array('a');
17253        foreach ($elements as $name) {
17254            $e = $this->addBlankElement($name);
17255            $e->attr = array(
17256                'target' => new HTMLPurifier_AttrDef_HTML_FrameTarget()
17257            );
17258        }
17259    }
17260}
17261
17262
17263
17264
17265
17266/**
17267 * Module adds the target=blank attribute transformation to a tags.  It
17268 * is enabled by HTML.TargetBlank
17269 */
17270class HTMLPurifier_HTMLModule_TargetBlank extends HTMLPurifier_HTMLModule
17271{
17272    /**
17273     * @type string
17274     */
17275    public $name = 'TargetBlank';
17276
17277    /**
17278     * @param HTMLPurifier_Config $config
17279     */
17280    public function setup($config)
17281    {
17282        $a = $this->addBlankElement('a');
17283        $a->attr_transform_post[] = new HTMLPurifier_AttrTransform_TargetBlank();
17284    }
17285}
17286
17287
17288
17289
17290
17291/**
17292 * Module adds the target-based noopener attribute transformation to a tags.  It
17293 * is enabled by HTML.TargetNoopener
17294 */
17295class HTMLPurifier_HTMLModule_TargetNoopener extends HTMLPurifier_HTMLModule
17296{
17297    /**
17298     * @type string
17299     */
17300    public $name = 'TargetNoopener';
17301
17302    /**
17303     * @param HTMLPurifier_Config $config
17304     */
17305    public function setup($config) {
17306        $a = $this->addBlankElement('a');
17307        $a->attr_transform_post[] = new HTMLPurifier_AttrTransform_TargetNoopener();
17308    }
17309}
17310
17311
17312
17313/**
17314 * Module adds the target-based noreferrer attribute transformation to a tags.  It
17315 * is enabled by HTML.TargetNoreferrer
17316 */
17317class HTMLPurifier_HTMLModule_TargetNoreferrer extends HTMLPurifier_HTMLModule
17318{
17319    /**
17320     * @type string
17321     */
17322    public $name = 'TargetNoreferrer';
17323
17324    /**
17325     * @param HTMLPurifier_Config $config
17326     */
17327    public function setup($config) {
17328        $a = $this->addBlankElement('a');
17329        $a->attr_transform_post[] = new HTMLPurifier_AttrTransform_TargetNoreferrer();
17330    }
17331}
17332
17333
17334
17335/**
17336 * XHTML 1.1 Text Module, defines basic text containers. Core Module.
17337 * @note In the normative XML Schema specification, this module
17338 *       is further abstracted into the following modules:
17339 *          - Block Phrasal (address, blockquote, pre, h1, h2, h3, h4, h5, h6)
17340 *          - Block Structural (div, p)
17341 *          - Inline Phrasal (abbr, acronym, cite, code, dfn, em, kbd, q, samp, strong, var)
17342 *          - Inline Structural (br, span)
17343 *       This module, functionally, does not distinguish between these
17344 *       sub-modules, but the code is internally structured to reflect
17345 *       these distinctions.
17346 */
17347class HTMLPurifier_HTMLModule_Text extends HTMLPurifier_HTMLModule
17348{
17349    /**
17350     * @type string
17351     */
17352    public $name = 'Text';
17353
17354    /**
17355     * @type array
17356     */
17357    public $content_sets = array(
17358        'Flow' => 'Heading | Block | Inline'
17359    );
17360
17361    /**
17362     * @param HTMLPurifier_Config $config
17363     */
17364    public function setup($config)
17365    {
17366        // Inline Phrasal -------------------------------------------------
17367        $this->addElement('abbr', 'Inline', 'Inline', 'Common');
17368        $this->addElement('acronym', 'Inline', 'Inline', 'Common');
17369        $this->addElement('cite', 'Inline', 'Inline', 'Common');
17370        $this->addElement('dfn', 'Inline', 'Inline', 'Common');
17371        $this->addElement('kbd', 'Inline', 'Inline', 'Common');
17372        $this->addElement('q', 'Inline', 'Inline', 'Common', array('cite' => 'URI'));
17373        $this->addElement('samp', 'Inline', 'Inline', 'Common');
17374        $this->addElement('var', 'Inline', 'Inline', 'Common');
17375
17376        $em = $this->addElement('em', 'Inline', 'Inline', 'Common');
17377        $em->formatting = true;
17378
17379        $strong = $this->addElement('strong', 'Inline', 'Inline', 'Common');
17380        $strong->formatting = true;
17381
17382        $code = $this->addElement('code', 'Inline', 'Inline', 'Common');
17383        $code->formatting = true;
17384
17385        // Inline Structural ----------------------------------------------
17386        $this->addElement('span', 'Inline', 'Inline', 'Common');
17387        $this->addElement('br', 'Inline', 'Empty', 'Core');
17388
17389        // Block Phrasal --------------------------------------------------
17390        $this->addElement('address', 'Block', 'Inline', 'Common');
17391        $this->addElement('blockquote', 'Block', 'Optional: Heading | Block | List', 'Common', array('cite' => 'URI'));
17392        $pre = $this->addElement('pre', 'Block', 'Inline', 'Common');
17393        $pre->excludes = $this->makeLookup(
17394            'img',
17395            'big',
17396            'small',
17397            'object',
17398            'applet',
17399            'font',
17400            'basefont'
17401        );
17402        $this->addElement('h1', 'Heading', 'Inline', 'Common');
17403        $this->addElement('h2', 'Heading', 'Inline', 'Common');
17404        $this->addElement('h3', 'Heading', 'Inline', 'Common');
17405        $this->addElement('h4', 'Heading', 'Inline', 'Common');
17406        $this->addElement('h5', 'Heading', 'Inline', 'Common');
17407        $this->addElement('h6', 'Heading', 'Inline', 'Common');
17408
17409        // Block Structural -----------------------------------------------
17410        $p = $this->addElement('p', 'Block', 'Inline', 'Common');
17411        $p->autoclose = array_flip(
17412            array("address", "blockquote", "center", "dir", "div", "dl", "fieldset", "ol", "p", "ul")
17413        );
17414
17415        $this->addElement('div', 'Block', 'Flow', 'Common');
17416    }
17417}
17418
17419
17420
17421
17422
17423/**
17424 * Abstract class for a set of proprietary modules that clean up (tidy)
17425 * poorly written HTML.
17426 * @todo Figure out how to protect some of these methods/properties
17427 */
17428class HTMLPurifier_HTMLModule_Tidy extends HTMLPurifier_HTMLModule
17429{
17430    /**
17431     * List of supported levels.
17432     * Index zero is a special case "no fixes" level.
17433     * @type array
17434     */
17435    public $levels = array(0 => 'none', 'light', 'medium', 'heavy');
17436
17437    /**
17438     * Default level to place all fixes in.
17439     * Disabled by default.
17440     * @type string
17441     */
17442    public $defaultLevel = null;
17443
17444    /**
17445     * Lists of fixes used by getFixesForLevel().
17446     * Format is:
17447     *      HTMLModule_Tidy->fixesForLevel[$level] = array('fix-1', 'fix-2');
17448     * @type array
17449     */
17450    public $fixesForLevel = array(
17451        'light' => array(),
17452        'medium' => array(),
17453        'heavy' => array()
17454    );
17455
17456    /**
17457     * Lazy load constructs the module by determining the necessary
17458     * fixes to create and then delegating to the populate() function.
17459     * @param HTMLPurifier_Config $config
17460     * @todo Wildcard matching and error reporting when an added or
17461     *       subtracted fix has no effect.
17462     */
17463    public function setup($config)
17464    {
17465        // create fixes, initialize fixesForLevel
17466        $fixes = $this->makeFixes();
17467        $this->makeFixesForLevel($fixes);
17468
17469        // figure out which fixes to use
17470        $level = $config->get('HTML.TidyLevel');
17471        $fixes_lookup = $this->getFixesForLevel($level);
17472
17473        // get custom fix declarations: these need namespace processing
17474        $add_fixes = $config->get('HTML.TidyAdd');
17475        $remove_fixes = $config->get('HTML.TidyRemove');
17476
17477        foreach ($fixes as $name => $fix) {
17478            // needs to be refactored a little to implement globbing
17479            if (isset($remove_fixes[$name]) ||
17480                (!isset($add_fixes[$name]) && !isset($fixes_lookup[$name]))) {
17481                unset($fixes[$name]);
17482            }
17483        }
17484
17485        // populate this module with necessary fixes
17486        $this->populate($fixes);
17487    }
17488
17489    /**
17490     * Retrieves all fixes per a level, returning fixes for that specific
17491     * level as well as all levels below it.
17492     * @param string $level level identifier, see $levels for valid values
17493     * @return array Lookup up table of fixes
17494     */
17495    public function getFixesForLevel($level)
17496    {
17497        if ($level == $this->levels[0]) {
17498            return array();
17499        }
17500        $activated_levels = array();
17501        for ($i = 1, $c = count($this->levels); $i < $c; $i++) {
17502            $activated_levels[] = $this->levels[$i];
17503            if ($this->levels[$i] == $level) {
17504                break;
17505            }
17506        }
17507        if ($i == $c) {
17508            trigger_error(
17509                'Tidy level ' . htmlspecialchars($level) . ' not recognized',
17510                E_USER_WARNING
17511            );
17512            return array();
17513        }
17514        $ret = array();
17515        foreach ($activated_levels as $level) {
17516            foreach ($this->fixesForLevel[$level] as $fix) {
17517                $ret[$fix] = true;
17518            }
17519        }
17520        return $ret;
17521    }
17522
17523    /**
17524     * Dynamically populates the $fixesForLevel member variable using
17525     * the fixes array. It may be custom overloaded, used in conjunction
17526     * with $defaultLevel, or not used at all.
17527     * @param array $fixes
17528     */
17529    public function makeFixesForLevel($fixes)
17530    {
17531        if (!isset($this->defaultLevel)) {
17532            return;
17533        }
17534        if (!isset($this->fixesForLevel[$this->defaultLevel])) {
17535            trigger_error(
17536                'Default level ' . $this->defaultLevel . ' does not exist',
17537                E_USER_ERROR
17538            );
17539            return;
17540        }
17541        $this->fixesForLevel[$this->defaultLevel] = array_keys($fixes);
17542    }
17543
17544    /**
17545     * Populates the module with transforms and other special-case code
17546     * based on a list of fixes passed to it
17547     * @param array $fixes Lookup table of fixes to activate
17548     */
17549    public function populate($fixes)
17550    {
17551        foreach ($fixes as $name => $fix) {
17552            // determine what the fix is for
17553            list($type, $params) = $this->getFixType($name);
17554            switch ($type) {
17555                case 'attr_transform_pre':
17556                case 'attr_transform_post':
17557                    $attr = $params['attr'];
17558                    if (isset($params['element'])) {
17559                        $element = $params['element'];
17560                        if (empty($this->info[$element])) {
17561                            $e = $this->addBlankElement($element);
17562                        } else {
17563                            $e = $this->info[$element];
17564                        }
17565                    } else {
17566                        $type = "info_$type";
17567                        $e = $this;
17568                    }
17569                    // PHP does some weird parsing when I do
17570                    // $e->$type[$attr], so I have to assign a ref.
17571                    $f =& $e->$type;
17572                    $f[$attr] = $fix;
17573                    break;
17574                case 'tag_transform':
17575                    $this->info_tag_transform[$params['element']] = $fix;
17576                    break;
17577                case 'child':
17578                case 'content_model_type':
17579                    $element = $params['element'];
17580                    if (empty($this->info[$element])) {
17581                        $e = $this->addBlankElement($element);
17582                    } else {
17583                        $e = $this->info[$element];
17584                    }
17585                    $e->$type = $fix;
17586                    break;
17587                default:
17588                    trigger_error("Fix type $type not supported", E_USER_ERROR);
17589                    break;
17590            }
17591        }
17592    }
17593
17594    /**
17595     * Parses a fix name and determines what kind of fix it is, as well
17596     * as other information defined by the fix
17597     * @param $name String name of fix
17598     * @return array(string $fix_type, array $fix_parameters)
17599     * @note $fix_parameters is type dependant, see populate() for usage
17600     *       of these parameters
17601     */
17602    public function getFixType($name)
17603    {
17604        // parse it
17605        $property = $attr = null;
17606        if (strpos($name, '#') !== false) {
17607            list($name, $property) = explode('#', $name);
17608        }
17609        if (strpos($name, '@') !== false) {
17610            list($name, $attr) = explode('@', $name);
17611        }
17612
17613        // figure out the parameters
17614        $params = array();
17615        if ($name !== '') {
17616            $params['element'] = $name;
17617        }
17618        if (!is_null($attr)) {
17619            $params['attr'] = $attr;
17620        }
17621
17622        // special case: attribute transform
17623        if (!is_null($attr)) {
17624            if (is_null($property)) {
17625                $property = 'pre';
17626            }
17627            $type = 'attr_transform_' . $property;
17628            return array($type, $params);
17629        }
17630
17631        // special case: tag transform
17632        if (is_null($property)) {
17633            return array('tag_transform', $params);
17634        }
17635
17636        return array($property, $params);
17637
17638    }
17639
17640    /**
17641     * Defines all fixes the module will perform in a compact
17642     * associative array of fix name to fix implementation.
17643     * @return array
17644     */
17645    public function makeFixes()
17646    {
17647    }
17648}
17649
17650
17651
17652
17653
17654class HTMLPurifier_HTMLModule_XMLCommonAttributes extends HTMLPurifier_HTMLModule
17655{
17656    /**
17657     * @type string
17658     */
17659    public $name = 'XMLCommonAttributes';
17660
17661    /**
17662     * @type array
17663     */
17664    public $attr_collections = array(
17665        'Lang' => array(
17666            'xml:lang' => 'LanguageCode',
17667        )
17668    );
17669}
17670
17671
17672
17673
17674
17675/**
17676 * Name is deprecated, but allowed in strict doctypes, so onl
17677 */
17678class HTMLPurifier_HTMLModule_Tidy_Name extends HTMLPurifier_HTMLModule_Tidy
17679{
17680    /**
17681     * @type string
17682     */
17683    public $name = 'Tidy_Name';
17684
17685    /**
17686     * @type string
17687     */
17688    public $defaultLevel = 'heavy';
17689
17690    /**
17691     * @return array
17692     */
17693    public function makeFixes()
17694    {
17695        $r = array();
17696        // @name for img, a -----------------------------------------------
17697        // Technically, it's allowed even on strict, so we allow authors to use
17698        // it. However, it's deprecated in future versions of XHTML.
17699        $r['img@name'] =
17700        $r['a@name'] = new HTMLPurifier_AttrTransform_Name();
17701        return $r;
17702    }
17703}
17704
17705
17706
17707
17708
17709class HTMLPurifier_HTMLModule_Tidy_Proprietary extends HTMLPurifier_HTMLModule_Tidy
17710{
17711
17712    /**
17713     * @type string
17714     */
17715    public $name = 'Tidy_Proprietary';
17716
17717    /**
17718     * @type string
17719     */
17720    public $defaultLevel = 'light';
17721
17722    /**
17723     * @return array
17724     */
17725    public function makeFixes()
17726    {
17727        $r = array();
17728        $r['table@background'] = new HTMLPurifier_AttrTransform_Background();
17729        $r['td@background']    = new HTMLPurifier_AttrTransform_Background();
17730        $r['th@background']    = new HTMLPurifier_AttrTransform_Background();
17731        $r['tr@background']    = new HTMLPurifier_AttrTransform_Background();
17732        $r['thead@background'] = new HTMLPurifier_AttrTransform_Background();
17733        $r['tfoot@background'] = new HTMLPurifier_AttrTransform_Background();
17734        $r['tbody@background'] = new HTMLPurifier_AttrTransform_Background();
17735        $r['table@height']     = new HTMLPurifier_AttrTransform_Length('height');
17736        return $r;
17737    }
17738}
17739
17740
17741
17742
17743
17744class HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4 extends HTMLPurifier_HTMLModule_Tidy
17745{
17746
17747    /**
17748     * @return array
17749     */
17750    public function makeFixes()
17751    {
17752        $r = array();
17753
17754        // == deprecated tag transforms ===================================
17755
17756        $r['font'] = new HTMLPurifier_TagTransform_Font();
17757        $r['menu'] = new HTMLPurifier_TagTransform_Simple('ul');
17758        $r['dir'] = new HTMLPurifier_TagTransform_Simple('ul');
17759        $r['center'] = new HTMLPurifier_TagTransform_Simple('div', 'text-align:center;');
17760        $r['u'] = new HTMLPurifier_TagTransform_Simple('span', 'text-decoration:underline;');
17761        $r['s'] = new HTMLPurifier_TagTransform_Simple('span', 'text-decoration:line-through;');
17762        $r['strike'] = new HTMLPurifier_TagTransform_Simple('span', 'text-decoration:line-through;');
17763
17764        // == deprecated attribute transforms =============================
17765
17766        $r['caption@align'] =
17767            new HTMLPurifier_AttrTransform_EnumToCSS(
17768                'align',
17769                array(
17770                    // we're following IE's behavior, not Firefox's, due
17771                    // to the fact that no one supports caption-side:right,
17772                    // W3C included (with CSS 2.1). This is a slightly
17773                    // unreasonable attribute!
17774                    'left' => 'text-align:left;',
17775                    'right' => 'text-align:right;',
17776                    'top' => 'caption-side:top;',
17777                    'bottom' => 'caption-side:bottom;' // not supported by IE
17778                )
17779            );
17780
17781        // @align for img -------------------------------------------------
17782        $r['img@align'] =
17783            new HTMLPurifier_AttrTransform_EnumToCSS(
17784                'align',
17785                array(
17786                    'left' => 'float:left;',
17787                    'right' => 'float:right;',
17788                    'top' => 'vertical-align:top;',
17789                    'middle' => 'vertical-align:middle;',
17790                    'bottom' => 'vertical-align:baseline;',
17791                )
17792            );
17793
17794        // @align for table -----------------------------------------------
17795        $r['table@align'] =
17796            new HTMLPurifier_AttrTransform_EnumToCSS(
17797                'align',
17798                array(
17799                    'left' => 'float:left;',
17800                    'center' => 'margin-left:auto;margin-right:auto;',
17801                    'right' => 'float:right;'
17802                )
17803            );
17804
17805        // @align for hr -----------------------------------------------
17806        $r['hr@align'] =
17807            new HTMLPurifier_AttrTransform_EnumToCSS(
17808                'align',
17809                array(
17810                    // we use both text-align and margin because these work
17811                    // for different browsers (IE and Firefox, respectively)
17812                    // and the melange makes for a pretty cross-compatible
17813                    // solution
17814                    'left' => 'margin-left:0;margin-right:auto;text-align:left;',
17815                    'center' => 'margin-left:auto;margin-right:auto;text-align:center;',
17816                    'right' => 'margin-left:auto;margin-right:0;text-align:right;'
17817                )
17818            );
17819
17820        // @align for h1, h2, h3, h4, h5, h6, p, div ----------------------
17821        // {{{
17822        $align_lookup = array();
17823        $align_values = array('left', 'right', 'center', 'justify');
17824        foreach ($align_values as $v) {
17825            $align_lookup[$v] = "text-align:$v;";
17826        }
17827        // }}}
17828        $r['h1@align'] =
17829        $r['h2@align'] =
17830        $r['h3@align'] =
17831        $r['h4@align'] =
17832        $r['h5@align'] =
17833        $r['h6@align'] =
17834        $r['p@align'] =
17835        $r['div@align'] =
17836            new HTMLPurifier_AttrTransform_EnumToCSS('align', $align_lookup);
17837
17838        // @bgcolor for table, tr, td, th ---------------------------------
17839        $r['table@bgcolor'] =
17840        $r['td@bgcolor'] =
17841        $r['th@bgcolor'] =
17842            new HTMLPurifier_AttrTransform_BgColor();
17843
17844        // @border for img ------------------------------------------------
17845        $r['img@border'] = new HTMLPurifier_AttrTransform_Border();
17846
17847        // @clear for br --------------------------------------------------
17848        $r['br@clear'] =
17849            new HTMLPurifier_AttrTransform_EnumToCSS(
17850                'clear',
17851                array(
17852                    'left' => 'clear:left;',
17853                    'right' => 'clear:right;',
17854                    'all' => 'clear:both;',
17855                    'none' => 'clear:none;',
17856                )
17857            );
17858
17859        // @height for td, th ---------------------------------------------
17860        $r['td@height'] =
17861        $r['th@height'] =
17862            new HTMLPurifier_AttrTransform_Length('height');
17863
17864        // @hspace for img ------------------------------------------------
17865        $r['img@hspace'] = new HTMLPurifier_AttrTransform_ImgSpace('hspace');
17866
17867        // @noshade for hr ------------------------------------------------
17868        // this transformation is not precise but often good enough.
17869        // different browsers use different styles to designate noshade
17870        $r['hr@noshade'] =
17871            new HTMLPurifier_AttrTransform_BoolToCSS(
17872                'noshade',
17873                'color:#808080;background-color:#808080;border:0;'
17874            );
17875
17876        // @nowrap for td, th ---------------------------------------------
17877        $r['td@nowrap'] =
17878        $r['th@nowrap'] =
17879            new HTMLPurifier_AttrTransform_BoolToCSS(
17880                'nowrap',
17881                'white-space:nowrap;'
17882            );
17883
17884        // @size for hr  --------------------------------------------------
17885        $r['hr@size'] = new HTMLPurifier_AttrTransform_Length('size', 'height');
17886
17887        // @type for li, ol, ul -------------------------------------------
17888        // {{{
17889        $ul_types = array(
17890            'disc' => 'list-style-type:disc;',
17891            'square' => 'list-style-type:square;',
17892            'circle' => 'list-style-type:circle;'
17893        );
17894        $ol_types = array(
17895            '1' => 'list-style-type:decimal;',
17896            'i' => 'list-style-type:lower-roman;',
17897            'I' => 'list-style-type:upper-roman;',
17898            'a' => 'list-style-type:lower-alpha;',
17899            'A' => 'list-style-type:upper-alpha;'
17900        );
17901        $li_types = $ul_types + $ol_types;
17902        // }}}
17903
17904        $r['ul@type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $ul_types);
17905        $r['ol@type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $ol_types, true);
17906        $r['li@type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $li_types, true);
17907
17908        // @vspace for img ------------------------------------------------
17909        $r['img@vspace'] = new HTMLPurifier_AttrTransform_ImgSpace('vspace');
17910
17911        // @width for hr, td, th ------------------------------------------
17912        $r['td@width'] =
17913        $r['th@width'] =
17914        $r['hr@width'] = new HTMLPurifier_AttrTransform_Length('width');
17915
17916        return $r;
17917    }
17918}
17919
17920
17921
17922
17923
17924class HTMLPurifier_HTMLModule_Tidy_Strict extends HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4
17925{
17926    /**
17927     * @type string
17928     */
17929    public $name = 'Tidy_Strict';
17930
17931    /**
17932     * @type string
17933     */
17934    public $defaultLevel = 'light';
17935
17936    /**
17937     * @return array
17938     */
17939    public function makeFixes()
17940    {
17941        $r = parent::makeFixes();
17942        $r['blockquote#content_model_type'] = 'strictblockquote';
17943        return $r;
17944    }
17945
17946    /**
17947     * @type bool
17948     */
17949    public $defines_child_def = true;
17950
17951    /**
17952     * @param HTMLPurifier_ElementDef $def
17953     * @return HTMLPurifier_ChildDef_StrictBlockquote
17954     */
17955    public function getChildDef($def)
17956    {
17957        if ($def->content_model_type != 'strictblockquote') {
17958            return parent::getChildDef($def);
17959        }
17960        return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model);
17961    }
17962}
17963
17964
17965
17966
17967
17968class HTMLPurifier_HTMLModule_Tidy_Transitional extends HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4
17969{
17970    /**
17971     * @type string
17972     */
17973    public $name = 'Tidy_Transitional';
17974
17975    /**
17976     * @type string
17977     */
17978    public $defaultLevel = 'heavy';
17979}
17980
17981
17982
17983
17984
17985class HTMLPurifier_HTMLModule_Tidy_XHTML extends HTMLPurifier_HTMLModule_Tidy
17986{
17987    /**
17988     * @type string
17989     */
17990    public $name = 'Tidy_XHTML';
17991
17992    /**
17993     * @type string
17994     */
17995    public $defaultLevel = 'medium';
17996
17997    /**
17998     * @return array
17999     */
18000    public function makeFixes()
18001    {
18002        $r = array();
18003        $r['@lang'] = new HTMLPurifier_AttrTransform_Lang();
18004        return $r;
18005    }
18006}
18007
18008
18009
18010
18011
18012/**
18013 * Injector that auto paragraphs text in the root node based on
18014 * double-spacing.
18015 * @todo Ensure all states are unit tested, including variations as well.
18016 * @todo Make a graph of the flow control for this Injector.
18017 */
18018class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
18019{
18020    /**
18021     * @type string
18022     */
18023    public $name = 'AutoParagraph';
18024
18025    /**
18026     * @type array
18027     */
18028    public $needed = array('p');
18029
18030    /**
18031     * @return HTMLPurifier_Token_Start
18032     */
18033    private function _pStart()
18034    {
18035        $par = new HTMLPurifier_Token_Start('p');
18036        $par->armor['MakeWellFormed_TagClosedError'] = true;
18037        return $par;
18038    }
18039
18040    /**
18041     * @param HTMLPurifier_Token_Text $token
18042     */
18043    public function handleText(&$token)
18044    {
18045        $text = $token->data;
18046        // Does the current parent allow <p> tags?
18047        if ($this->allowsElement('p')) {
18048            if (empty($this->currentNesting) || strpos($text, "\n\n") !== false) {
18049                // Note that we have differing behavior when dealing with text
18050                // in the anonymous root node, or a node inside the document.
18051                // If the text as a double-newline, the treatment is the same;
18052                // if it doesn't, see the next if-block if you're in the document.
18053
18054                $i = $nesting = null;
18055                if (!$this->forwardUntilEndToken($i, $current, $nesting) && $token->is_whitespace) {
18056                    // State 1.1: ...    ^ (whitespace, then document end)
18057                    //               ----
18058                    // This is a degenerate case
18059                } else {
18060                    if (!$token->is_whitespace || $this->_isInline($current)) {
18061                        // State 1.2: PAR1
18062                        //            ----
18063
18064                        // State 1.3: PAR1\n\nPAR2
18065                        //            ------------
18066
18067                        // State 1.4: <div>PAR1\n\nPAR2 (see State 2)
18068                        //                 ------------
18069                        $token = array($this->_pStart());
18070                        $this->_splitText($text, $token);
18071                    } else {
18072                        // State 1.5: \n<hr />
18073                        //            --
18074                    }
18075                }
18076            } else {
18077                // State 2:   <div>PAR1... (similar to 1.4)
18078                //                 ----
18079
18080                // We're in an element that allows paragraph tags, but we're not
18081                // sure if we're going to need them.
18082                if ($this->_pLookAhead()) {
18083                    // State 2.1: <div>PAR1<b>PAR1\n\nPAR2
18084                    //                 ----
18085                    // Note: This will always be the first child, since any
18086                    // previous inline element would have triggered this very
18087                    // same routine, and found the double newline. One possible
18088                    // exception would be a comment.
18089                    $token = array($this->_pStart(), $token);
18090                } else {
18091                    // State 2.2.1: <div>PAR1<div>
18092                    //                   ----
18093
18094                    // State 2.2.2: <div>PAR1<b>PAR1</b></div>
18095                    //                   ----
18096                }
18097            }
18098            // Is the current parent a <p> tag?
18099        } elseif (!empty($this->currentNesting) &&
18100            $this->currentNesting[count($this->currentNesting) - 1]->name == 'p') {
18101            // State 3.1: ...<p>PAR1
18102            //                  ----
18103
18104            // State 3.2: ...<p>PAR1\n\nPAR2
18105            //                  ------------
18106            $token = array();
18107            $this->_splitText($text, $token);
18108            // Abort!
18109        } else {
18110            // State 4.1: ...<b>PAR1
18111            //                  ----
18112
18113            // State 4.2: ...<b>PAR1\n\nPAR2
18114            //                  ------------
18115        }
18116    }
18117
18118    /**
18119     * @param HTMLPurifier_Token $token
18120     */
18121    public function handleElement(&$token)
18122    {
18123        // We don't have to check if we're already in a <p> tag for block
18124        // tokens, because the tag would have been autoclosed by MakeWellFormed.
18125        if ($this->allowsElement('p')) {
18126            if (!empty($this->currentNesting)) {
18127                if ($this->_isInline($token)) {
18128                    // State 1: <div>...<b>
18129                    //                  ---
18130                    // Check if this token is adjacent to the parent token
18131                    // (seek backwards until token isn't whitespace)
18132                    $i = null;
18133                    $this->backward($i, $prev);
18134
18135                    if (!$prev instanceof HTMLPurifier_Token_Start) {
18136                        // Token wasn't adjacent
18137                        if ($prev instanceof HTMLPurifier_Token_Text &&
18138                            substr($prev->data, -2) === "\n\n"
18139                        ) {
18140                            // State 1.1.4: <div><p>PAR1</p>\n\n<b>
18141                            //                                  ---
18142                            // Quite frankly, this should be handled by splitText
18143                            $token = array($this->_pStart(), $token);
18144                        } else {
18145                            // State 1.1.1: <div><p>PAR1</p><b>
18146                            //                              ---
18147                            // State 1.1.2: <div><br /><b>
18148                            //                         ---
18149                            // State 1.1.3: <div>PAR<b>
18150                            //                      ---
18151                        }
18152                    } else {
18153                        // State 1.2.1: <div><b>
18154                        //                   ---
18155                        // Lookahead to see if <p> is needed.
18156                        if ($this->_pLookAhead()) {
18157                            // State 1.3.1: <div><b>PAR1\n\nPAR2
18158                            //                   ---
18159                            $token = array($this->_pStart(), $token);
18160                        } else {
18161                            // State 1.3.2: <div><b>PAR1</b></div>
18162                            //                   ---
18163
18164                            // State 1.3.3: <div><b>PAR1</b><div></div>\n\n</div>
18165                            //                   ---
18166                        }
18167                    }
18168                } else {
18169                    // State 2.3: ...<div>
18170                    //               -----
18171                }
18172            } else {
18173                if ($this->_isInline($token)) {
18174                    // State 3.1: <b>
18175                    //            ---
18176                    // This is where the {p} tag is inserted, not reflected in
18177                    // inputTokens yet, however.
18178                    $token = array($this->_pStart(), $token);
18179                } else {
18180                    // State 3.2: <div>
18181                    //            -----
18182                }
18183
18184                $i = null;
18185                if ($this->backward($i, $prev)) {
18186                    if (!$prev instanceof HTMLPurifier_Token_Text) {
18187                        // State 3.1.1: ...</p>{p}<b>
18188                        //                        ---
18189                        // State 3.2.1: ...</p><div>
18190                        //                     -----
18191                        if (!is_array($token)) {
18192                            $token = array($token);
18193                        }
18194                        array_unshift($token, new HTMLPurifier_Token_Text("\n\n"));
18195                    } else {
18196                        // State 3.1.2: ...</p>\n\n{p}<b>
18197                        //                            ---
18198                        // State 3.2.2: ...</p>\n\n<div>
18199                        //                         -----
18200                        // Note: PAR<ELEM> cannot occur because PAR would have been
18201                        // wrapped in <p> tags.
18202                    }
18203                }
18204            }
18205        } else {
18206            // State 2.2: <ul><li>
18207            //                ----
18208            // State 2.4: <p><b>
18209            //               ---
18210        }
18211    }
18212
18213    /**
18214     * Splits up a text in paragraph tokens and appends them
18215     * to the result stream that will replace the original
18216     * @param string $data String text data that will be processed
18217     *    into paragraphs
18218     * @param HTMLPurifier_Token[] $result Reference to array of tokens that the
18219     *    tags will be appended onto
18220     */
18221    private function _splitText($data, &$result)
18222    {
18223        $raw_paragraphs = explode("\n\n", $data);
18224        $paragraphs = array(); // without empty paragraphs
18225        $needs_start = false;
18226        $needs_end = false;
18227
18228        $c = count($raw_paragraphs);
18229        if ($c == 1) {
18230            // There were no double-newlines, abort quickly. In theory this
18231            // should never happen.
18232            $result[] = new HTMLPurifier_Token_Text($data);
18233            return;
18234        }
18235        for ($i = 0; $i < $c; $i++) {
18236            $par = $raw_paragraphs[$i];
18237            if (trim($par) !== '') {
18238                $paragraphs[] = $par;
18239            } else {
18240                if ($i == 0) {
18241                    // Double newline at the front
18242                    if (empty($result)) {
18243                        // The empty result indicates that the AutoParagraph
18244                        // injector did not add any start paragraph tokens.
18245                        // This means that we have been in a paragraph for
18246                        // a while, and the newline means we should start a new one.
18247                        $result[] = new HTMLPurifier_Token_End('p');
18248                        $result[] = new HTMLPurifier_Token_Text("\n\n");
18249                        // However, the start token should only be added if
18250                        // there is more processing to be done (i.e. there are
18251                        // real paragraphs in here). If there are none, the
18252                        // next start paragraph tag will be handled by the
18253                        // next call to the injector
18254                        $needs_start = true;
18255                    } else {
18256                        // We just started a new paragraph!
18257                        // Reinstate a double-newline for presentation's sake, since
18258                        // it was in the source code.
18259                        array_unshift($result, new HTMLPurifier_Token_Text("\n\n"));
18260                    }
18261                } elseif ($i + 1 == $c) {
18262                    // Double newline at the end
18263                    // There should be a trailing </p> when we're finally done.
18264                    $needs_end = true;
18265                }
18266            }
18267        }
18268
18269        // Check if this was just a giant blob of whitespace. Move this earlier,
18270        // perhaps?
18271        if (empty($paragraphs)) {
18272            return;
18273        }
18274
18275        // Add the start tag indicated by \n\n at the beginning of $data
18276        if ($needs_start) {
18277            $result[] = $this->_pStart();
18278        }
18279
18280        // Append the paragraphs onto the result
18281        foreach ($paragraphs as $par) {
18282            $result[] = new HTMLPurifier_Token_Text($par);
18283            $result[] = new HTMLPurifier_Token_End('p');
18284            $result[] = new HTMLPurifier_Token_Text("\n\n");
18285            $result[] = $this->_pStart();
18286        }
18287
18288        // Remove trailing start token; Injector will handle this later if
18289        // it was indeed needed. This prevents from needing to do a lookahead,
18290        // at the cost of a lookbehind later.
18291        array_pop($result);
18292
18293        // If there is no need for an end tag, remove all of it and let
18294        // MakeWellFormed close it later.
18295        if (!$needs_end) {
18296            array_pop($result); // removes \n\n
18297            array_pop($result); // removes </p>
18298        }
18299    }
18300
18301    /**
18302     * Returns true if passed token is inline (and, ergo, allowed in
18303     * paragraph tags)
18304     * @param HTMLPurifier_Token $token
18305     * @return bool
18306     */
18307    private function _isInline($token)
18308    {
18309        return isset($this->htmlDefinition->info['p']->child->elements[$token->name]);
18310    }
18311
18312    /**
18313     * Looks ahead in the token list and determines whether or not we need
18314     * to insert a <p> tag.
18315     * @return bool
18316     */
18317    private function _pLookAhead()
18318    {
18319        if ($this->currentToken instanceof HTMLPurifier_Token_Start) {
18320            $nesting = 1;
18321        } else {
18322            $nesting = 0;
18323        }
18324        $ok = false;
18325        $i = null;
18326        while ($this->forwardUntilEndToken($i, $current, $nesting)) {
18327            $result = $this->_checkNeedsP($current);
18328            if ($result !== null) {
18329                $ok = $result;
18330                break;
18331            }
18332        }
18333        return $ok;
18334    }
18335
18336    /**
18337     * Determines if a particular token requires an earlier inline token
18338     * to get a paragraph. This should be used with _forwardUntilEndToken
18339     * @param HTMLPurifier_Token $current
18340     * @return bool
18341     */
18342    private function _checkNeedsP($current)
18343    {
18344        if ($current instanceof HTMLPurifier_Token_Start) {
18345            if (!$this->_isInline($current)) {
18346                // <div>PAR1<div>
18347                //      ----
18348                // Terminate early, since we hit a block element
18349                return false;
18350            }
18351        } elseif ($current instanceof HTMLPurifier_Token_Text) {
18352            if (strpos($current->data, "\n\n") !== false) {
18353                // <div>PAR1<b>PAR1\n\nPAR2
18354                //      ----
18355                return true;
18356            } else {
18357                // <div>PAR1<b>PAR1...
18358                //      ----
18359            }
18360        }
18361        return null;
18362    }
18363}
18364
18365
18366
18367
18368
18369/**
18370 * Injector that displays the URL of an anchor instead of linking to it, in addition to showing the text of the link.
18371 */
18372class HTMLPurifier_Injector_DisplayLinkURI extends HTMLPurifier_Injector
18373{
18374    /**
18375     * @type string
18376     */
18377    public $name = 'DisplayLinkURI';
18378
18379    /**
18380     * @type array
18381     */
18382    public $needed = array('a');
18383
18384    /**
18385     * @param $token
18386     */
18387    public function handleElement(&$token)
18388    {
18389    }
18390
18391    /**
18392     * @param HTMLPurifier_Token $token
18393     */
18394    public function handleEnd(&$token)
18395    {
18396        if (isset($token->start->attr['href'])) {
18397            $url = $token->start->attr['href'];
18398            unset($token->start->attr['href']);
18399            $token = array($token, new HTMLPurifier_Token_Text(" ($url)"));
18400        } else {
18401            // nothing to display
18402        }
18403    }
18404}
18405
18406
18407
18408
18409
18410/**
18411 * Injector that converts http, https and ftp text URLs to actual links.
18412 */
18413class HTMLPurifier_Injector_Linkify extends HTMLPurifier_Injector
18414{
18415    /**
18416     * @type string
18417     */
18418    public $name = 'Linkify';
18419
18420    /**
18421     * @type array
18422     */
18423    public $needed = array('a' => array('href'));
18424
18425    /**
18426     * @param HTMLPurifier_Token $token
18427     */
18428    public function handleText(&$token)
18429    {
18430        if (!$this->allowsElement('a')) {
18431            return;
18432        }
18433
18434        if (strpos($token->data, '://') === false) {
18435            // our really quick heuristic failed, abort
18436            // this may not work so well if we want to match things like
18437            // "google.com", but then again, most people don't
18438            return;
18439        }
18440
18441        // there is/are URL(s). Let's split the string.
18442        // We use this regex:
18443        // https://gist.github.com/gruber/249502
18444        // but with @cscott's backtracking fix and also
18445        // the Unicode characters un-Unicodified.
18446        $bits = preg_split(
18447            '/\\b((?:[a-z][\\w\\-]+:(?:\\/{1,3}|[a-z0-9%])|www\\d{0,3}[.]|[a-z0-9.\\-]+[.][a-z]{2,4}\\/)(?:[^\\s()<>]|\\((?:[^\\s()<>]|(?:\\([^\\s()<>]+\\)))*\\))+(?:\\((?:[^\\s()<>]|(?:\\([^\\s()<>]+\\)))*\\)|[^\\s`!()\\[\\]{};:\'".,<>?\x{00ab}\x{00bb}\x{201c}\x{201d}\x{2018}\x{2019}]))/iu',
18448            $token->data, -1, PREG_SPLIT_DELIM_CAPTURE);
18449
18450
18451        $token = array();
18452
18453        // $i = index
18454        // $c = count
18455        // $l = is link
18456        for ($i = 0, $c = count($bits), $l = false; $i < $c; $i++, $l = !$l) {
18457            if (!$l) {
18458                if ($bits[$i] === '') {
18459                    continue;
18460                }
18461                $token[] = new HTMLPurifier_Token_Text($bits[$i]);
18462            } else {
18463                $token[] = new HTMLPurifier_Token_Start('a', array('href' => $bits[$i]));
18464                $token[] = new HTMLPurifier_Token_Text($bits[$i]);
18465                $token[] = new HTMLPurifier_Token_End('a');
18466            }
18467        }
18468    }
18469}
18470
18471
18472
18473
18474
18475/**
18476 * Injector that converts configuration directive syntax %Namespace.Directive
18477 * to links
18478 */
18479class HTMLPurifier_Injector_PurifierLinkify extends HTMLPurifier_Injector
18480{
18481    /**
18482     * @type string
18483     */
18484    public $name = 'PurifierLinkify';
18485
18486    /**
18487     * @type string
18488     */
18489    public $docURL;
18490
18491    /**
18492     * @type array
18493     */
18494    public $needed = array('a' => array('href'));
18495
18496    /**
18497     * @param HTMLPurifier_Config $config
18498     * @param HTMLPurifier_Context $context
18499     * @return string
18500     */
18501    public function prepare($config, $context)
18502    {
18503        $this->docURL = $config->get('AutoFormat.PurifierLinkify.DocURL');
18504        return parent::prepare($config, $context);
18505    }
18506
18507    /**
18508     * @param HTMLPurifier_Token $token
18509     */
18510    public function handleText(&$token)
18511    {
18512        if (!$this->allowsElement('a')) {
18513            return;
18514        }
18515        if (strpos($token->data, '%') === false) {
18516            return;
18517        }
18518
18519        $bits = preg_split('#%([a-z0-9]+\.[a-z0-9]+)#Si', $token->data, -1, PREG_SPLIT_DELIM_CAPTURE);
18520        $token = array();
18521
18522        // $i = index
18523        // $c = count
18524        // $l = is link
18525        for ($i = 0, $c = count($bits), $l = false; $i < $c; $i++, $l = !$l) {
18526            if (!$l) {
18527                if ($bits[$i] === '') {
18528                    continue;
18529                }
18530                $token[] = new HTMLPurifier_Token_Text($bits[$i]);
18531            } else {
18532                $token[] = new HTMLPurifier_Token_Start(
18533                    'a',
18534                    array('href' => str_replace('%s', $bits[$i], $this->docURL))
18535                );
18536                $token[] = new HTMLPurifier_Token_Text('%' . $bits[$i]);
18537                $token[] = new HTMLPurifier_Token_End('a');
18538            }
18539        }
18540    }
18541}
18542
18543
18544
18545
18546
18547class HTMLPurifier_Injector_RemoveEmpty extends HTMLPurifier_Injector
18548{
18549    /**
18550     * @type HTMLPurifier_Context
18551     */
18552    private $context;
18553
18554    /**
18555     * @type HTMLPurifier_Config
18556     */
18557    private $config;
18558
18559    /**
18560     * @type HTMLPurifier_AttrValidator
18561     */
18562    private $attrValidator;
18563
18564    /**
18565     * @type bool
18566     */
18567    private $removeNbsp;
18568
18569    /**
18570     * @type bool
18571     */
18572    private $removeNbspExceptions;
18573
18574    /**
18575     * Cached contents of %AutoFormat.RemoveEmpty.Predicate
18576     * @type array
18577     */
18578    private $exclude;
18579
18580    /**
18581     * @param HTMLPurifier_Config $config
18582     * @param HTMLPurifier_Context $context
18583     * @return void
18584     */
18585    public function prepare($config, $context)
18586    {
18587        parent::prepare($config, $context);
18588        $this->config = $config;
18589        $this->context = $context;
18590        $this->removeNbsp = $config->get('AutoFormat.RemoveEmpty.RemoveNbsp');
18591        $this->removeNbspExceptions = $config->get('AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions');
18592        $this->exclude = $config->get('AutoFormat.RemoveEmpty.Predicate');
18593        foreach ($this->exclude as $key => $attrs) {
18594            if (!is_array($attrs)) {
18595                // HACK, see HTMLPurifier/Printer/ConfigForm.php
18596                $this->exclude[$key] = explode(';', $attrs);
18597            }
18598        }
18599        $this->attrValidator = new HTMLPurifier_AttrValidator();
18600    }
18601
18602    /**
18603     * @param HTMLPurifier_Token $token
18604     */
18605    public function handleElement(&$token)
18606    {
18607        if (!$token instanceof HTMLPurifier_Token_Start) {
18608            return;
18609        }
18610        $next = false;
18611        $deleted = 1; // the current tag
18612        for ($i = count($this->inputZipper->back) - 1; $i >= 0; $i--, $deleted++) {
18613            $next = $this->inputZipper->back[$i];
18614            if ($next instanceof HTMLPurifier_Token_Text) {
18615                if ($next->is_whitespace) {
18616                    continue;
18617                }
18618                if ($this->removeNbsp && !isset($this->removeNbspExceptions[$token->name])) {
18619                    $plain = str_replace("\xC2\xA0", "", $next->data);
18620                    $isWsOrNbsp = $plain === '' || ctype_space($plain);
18621                    if ($isWsOrNbsp) {
18622                        continue;
18623                    }
18624                }
18625            }
18626            break;
18627        }
18628        if (!$next || ($next instanceof HTMLPurifier_Token_End && $next->name == $token->name)) {
18629            $this->attrValidator->validateToken($token, $this->config, $this->context);
18630            $token->armor['ValidateAttributes'] = true;
18631            if (isset($this->exclude[$token->name])) {
18632                $r = true;
18633                foreach ($this->exclude[$token->name] as $elem) {
18634                    if (!isset($token->attr[$elem])) $r = false;
18635                }
18636                if ($r) return;
18637            }
18638            if (isset($token->attr['id']) || isset($token->attr['name'])) {
18639                return;
18640            }
18641            $token = $deleted + 1;
18642            for ($b = 0, $c = count($this->inputZipper->front); $b < $c; $b++) {
18643                $prev = $this->inputZipper->front[$b];
18644                if ($prev instanceof HTMLPurifier_Token_Text && $prev->is_whitespace) {
18645                    continue;
18646                }
18647                break;
18648            }
18649            // This is safe because we removed the token that triggered this.
18650            $this->rewindOffset($b+$deleted);
18651            return;
18652        }
18653    }
18654}
18655
18656
18657
18658
18659
18660/**
18661 * Injector that removes spans with no attributes
18662 */
18663class HTMLPurifier_Injector_RemoveSpansWithoutAttributes extends HTMLPurifier_Injector
18664{
18665    /**
18666     * @type string
18667     */
18668    public $name = 'RemoveSpansWithoutAttributes';
18669
18670    /**
18671     * @type array
18672     */
18673    public $needed = array('span');
18674
18675    /**
18676     * @type HTMLPurifier_AttrValidator
18677     */
18678    private $attrValidator;
18679
18680    /**
18681     * Used by AttrValidator.
18682     * @type HTMLPurifier_Config
18683     */
18684    private $config;
18685
18686    /**
18687     * @type HTMLPurifier_Context
18688     */
18689    private $context;
18690
18691    public function prepare($config, $context)
18692    {
18693        $this->attrValidator = new HTMLPurifier_AttrValidator();
18694        $this->config = $config;
18695        $this->context = $context;
18696        return parent::prepare($config, $context);
18697    }
18698
18699    /**
18700     * @param HTMLPurifier_Token $token
18701     */
18702    public function handleElement(&$token)
18703    {
18704        if ($token->name !== 'span' || !$token instanceof HTMLPurifier_Token_Start) {
18705            return;
18706        }
18707
18708        // We need to validate the attributes now since this doesn't normally
18709        // happen until after MakeWellFormed. If all the attributes are removed
18710        // the span needs to be removed too.
18711        $this->attrValidator->validateToken($token, $this->config, $this->context);
18712        $token->armor['ValidateAttributes'] = true;
18713
18714        if (!empty($token->attr)) {
18715            return;
18716        }
18717
18718        $nesting = 0;
18719        while ($this->forwardUntilEndToken($i, $current, $nesting)) {
18720        }
18721
18722        if ($current instanceof HTMLPurifier_Token_End && $current->name === 'span') {
18723            // Mark closing span tag for deletion
18724            $current->markForDeletion = true;
18725            // Delete open span tag
18726            $token = false;
18727        }
18728    }
18729
18730    /**
18731     * @param HTMLPurifier_Token $token
18732     */
18733    public function handleEnd(&$token)
18734    {
18735        if ($token->markForDeletion) {
18736            $token = false;
18737        }
18738    }
18739}
18740
18741
18742
18743
18744
18745/**
18746 * Adds important param elements to inside of object in order to make
18747 * things safe.
18748 */
18749class HTMLPurifier_Injector_SafeObject extends HTMLPurifier_Injector
18750{
18751    /**
18752     * @type string
18753     */
18754    public $name = 'SafeObject';
18755
18756    /**
18757     * @type array
18758     */
18759    public $needed = array('object', 'param');
18760
18761    /**
18762     * @type array
18763     */
18764    protected $objectStack = array();
18765
18766    /**
18767     * @type array
18768     */
18769    protected $paramStack = array();
18770
18771    /**
18772     * Keep this synchronized with AttrTransform/SafeParam.php.
18773     * @type array
18774     */
18775    protected $addParam = array(
18776        'allowScriptAccess' => 'never',
18777        'allowNetworking' => 'internal',
18778    );
18779
18780    /**
18781     * These are all lower-case keys.
18782     * @type array
18783     */
18784    protected $allowedParam = array(
18785        'wmode' => true,
18786        'movie' => true,
18787        'flashvars' => true,
18788        'src' => true,
18789        'allowfullscreen' => true, // if omitted, assume to be 'false'
18790    );
18791
18792    /**
18793     * @param HTMLPurifier_Config $config
18794     * @param HTMLPurifier_Context $context
18795     * @return void
18796     */
18797    public function prepare($config, $context)
18798    {
18799        parent::prepare($config, $context);
18800    }
18801
18802    /**
18803     * @param HTMLPurifier_Token $token
18804     */
18805    public function handleElement(&$token)
18806    {
18807        if ($token->name == 'object') {
18808            $this->objectStack[] = $token;
18809            $this->paramStack[] = array();
18810            $new = array($token);
18811            foreach ($this->addParam as $name => $value) {
18812                $new[] = new HTMLPurifier_Token_Empty('param', array('name' => $name, 'value' => $value));
18813            }
18814            $token = $new;
18815        } elseif ($token->name == 'param') {
18816            $nest = count($this->currentNesting) - 1;
18817            if ($nest >= 0 && $this->currentNesting[$nest]->name === 'object') {
18818                $i = count($this->objectStack) - 1;
18819                if (!isset($token->attr['name'])) {
18820                    $token = false;
18821                    return;
18822                }
18823                $n = $token->attr['name'];
18824                // We need this fix because YouTube doesn't supply a data
18825                // attribute, which we need if a type is specified. This is
18826                // *very* Flash specific.
18827                if (!isset($this->objectStack[$i]->attr['data']) &&
18828                    ($token->attr['name'] == 'movie' || $token->attr['name'] == 'src')
18829                ) {
18830                    $this->objectStack[$i]->attr['data'] = $token->attr['value'];
18831                }
18832                // Check if the parameter is the correct value but has not
18833                // already been added
18834                if (!isset($this->paramStack[$i][$n]) &&
18835                    isset($this->addParam[$n]) &&
18836                    $token->attr['name'] === $this->addParam[$n]) {
18837                    // keep token, and add to param stack
18838                    $this->paramStack[$i][$n] = true;
18839                } elseif (isset($this->allowedParam[strtolower($n)])) {
18840                    // keep token, don't do anything to it
18841                    // (could possibly check for duplicates here)
18842                    // Note: In principle, parameters should be case sensitive.
18843                    // But it seems they are not really; so accept any case.
18844                } else {
18845                    $token = false;
18846                }
18847            } else {
18848                // not directly inside an object, DENY!
18849                $token = false;
18850            }
18851        }
18852    }
18853
18854    public function handleEnd(&$token)
18855    {
18856        // This is the WRONG way of handling the object and param stacks;
18857        // we should be inserting them directly on the relevant object tokens
18858        // so that the global stack handling handles it.
18859        if ($token->name == 'object') {
18860            array_pop($this->objectStack);
18861            array_pop($this->paramStack);
18862        }
18863    }
18864}
18865
18866
18867
18868
18869
18870/**
18871 * Parser that uses PHP 5's DOM extension (part of the core).
18872 *
18873 * In PHP 5, the DOM XML extension was revamped into DOM and added to the core.
18874 * It gives us a forgiving HTML parser, which we use to transform the HTML
18875 * into a DOM, and then into the tokens.  It is blazingly fast (for large
18876 * documents, it performs twenty times faster than
18877 * HTMLPurifier_Lexer_DirectLex,and is the default choice for PHP 5.
18878 *
18879 * @note Any empty elements will have empty tokens associated with them, even if
18880 * this is prohibited by the spec. This is cannot be fixed until the spec
18881 * comes into play.
18882 *
18883 * @note PHP's DOM extension does not actually parse any entities, we use
18884 *       our own function to do that.
18885 *
18886 * @warning DOM tends to drop whitespace, which may wreak havoc on indenting.
18887 *          If this is a huge problem, due to the fact that HTML is hand
18888 *          edited and you are unable to get a parser cache that caches the
18889 *          the output of HTML Purifier while keeping the original HTML lying
18890 *          around, you may want to run Tidy on the resulting output or use
18891 *          HTMLPurifier_DirectLex
18892 */
18893
18894class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
18895{
18896
18897    /**
18898     * @type HTMLPurifier_TokenFactory
18899     */
18900    private $factory;
18901
18902    public function __construct()
18903    {
18904        // setup the factory
18905        parent::__construct();
18906        $this->factory = new HTMLPurifier_TokenFactory();
18907    }
18908
18909    /**
18910     * @param string $html
18911     * @param HTMLPurifier_Config $config
18912     * @param HTMLPurifier_Context $context
18913     * @return HTMLPurifier_Token[]
18914     */
18915    public function tokenizeHTML($html, $config, $context)
18916    {
18917        $html = $this->normalize($html, $config, $context);
18918
18919        // attempt to armor stray angled brackets that cannot possibly
18920        // form tags and thus are probably being used as emoticons
18921        if ($config->get('Core.AggressivelyFixLt')) {
18922            $char = '[^a-z!\/]';
18923            $comment = "/<!--(.*?)(-->|\z)/is";
18924            $html = preg_replace_callback($comment, array($this, 'callbackArmorCommentEntities'), $html);
18925            do {
18926                $old = $html;
18927                $html = preg_replace("/<($char)/i", '&lt;\\1', $html);
18928            } while ($html !== $old);
18929            $html = preg_replace_callback($comment, array($this, 'callbackUndoCommentSubst'), $html); // fix comments
18930        }
18931
18932        // preprocess html, essential for UTF-8
18933        $html = $this->wrapHTML($html, $config, $context);
18934
18935        $doc = new DOMDocument();
18936        $doc->encoding = 'UTF-8'; // theoretically, the above has this covered
18937
18938        set_error_handler(array($this, 'muteErrorHandler'));
18939        $doc->loadHTML($html);
18940        restore_error_handler();
18941
18942        $body = $doc->getElementsByTagName('html')->item(0)-> // <html>
18943        getElementsByTagName('body')->item(0);  // <body>
18944
18945        $div = $body->getElementsByTagName('div')->item(0); // <div>
18946        $tokens = array();
18947        $this->tokenizeDOM($div, $tokens, $config);
18948        // If the div has a sibling, that means we tripped across
18949        // a premature </div> tag.  So remove the div we parsed,
18950        // and then tokenize the rest of body.  We can't tokenize
18951        // the sibling directly as we'll lose the tags in that case.
18952        if ($div->nextSibling) {
18953            $body->removeChild($div);
18954            $this->tokenizeDOM($body, $tokens, $config);
18955        }
18956        return $tokens;
18957    }
18958
18959    /**
18960     * Iterative function that tokenizes a node, putting it into an accumulator.
18961     * To iterate is human, to recurse divine - L. Peter Deutsch
18962     * @param DOMNode $node DOMNode to be tokenized.
18963     * @param HTMLPurifier_Token[] $tokens   Array-list of already tokenized tokens.
18964     * @return HTMLPurifier_Token of node appended to previously passed tokens.
18965     */
18966    protected function tokenizeDOM($node, &$tokens, $config)
18967    {
18968        $level = 0;
18969        $nodes = array($level => new HTMLPurifier_Queue(array($node)));
18970        $closingNodes = array();
18971        do {
18972            while (!$nodes[$level]->isEmpty()) {
18973                $node = $nodes[$level]->shift(); // FIFO
18974                $collect = $level > 0 ? true : false;
18975                $needEndingTag = $this->createStartNode($node, $tokens, $collect, $config);
18976                if ($needEndingTag) {
18977                    $closingNodes[$level][] = $node;
18978                }
18979                if ($node->childNodes && $node->childNodes->length) {
18980                    $level++;
18981                    $nodes[$level] = new HTMLPurifier_Queue();
18982                    foreach ($node->childNodes as $childNode) {
18983                        $nodes[$level]->push($childNode);
18984                    }
18985                }
18986            }
18987            $level--;
18988            if ($level && isset($closingNodes[$level])) {
18989                while ($node = array_pop($closingNodes[$level])) {
18990                    $this->createEndNode($node, $tokens);
18991                }
18992            }
18993        } while ($level > 0);
18994    }
18995
18996    /**
18997     * Portably retrieve the tag name of a node; deals with older versions
18998     * of libxml like 2.7.6
18999     * @param DOMNode $node
19000     */
19001    protected function getTagName($node)
19002    {
19003        if (property_exists($node, 'tagName')) {
19004            return $node->tagName;
19005        } else if (property_exists($node, 'nodeName')) {
19006            return $node->nodeName;
19007        } else if (property_exists($node, 'localName')) {
19008            return $node->localName;
19009        }
19010        return null;
19011    }
19012
19013    /**
19014     * Portably retrieve the data of a node; deals with older versions
19015     * of libxml like 2.7.6
19016     * @param DOMNode $node
19017     */
19018    protected function getData($node)
19019    {
19020        if (property_exists($node, 'data')) {
19021            return $node->data;
19022        } else if (property_exists($node, 'nodeValue')) {
19023            return $node->nodeValue;
19024        } else if (property_exists($node, 'textContent')) {
19025            return $node->textContent;
19026        }
19027        return null;
19028    }
19029
19030
19031    /**
19032     * @param DOMNode $node DOMNode to be tokenized.
19033     * @param HTMLPurifier_Token[] $tokens   Array-list of already tokenized tokens.
19034     * @param bool $collect  Says whether or start and close are collected, set to
19035     *                    false at first recursion because it's the implicit DIV
19036     *                    tag you're dealing with.
19037     * @return bool if the token needs an endtoken
19038     * @todo data and tagName properties don't seem to exist in DOMNode?
19039     */
19040    protected function createStartNode($node, &$tokens, $collect, $config)
19041    {
19042        // intercept non element nodes. WE MUST catch all of them,
19043        // but we're not getting the character reference nodes because
19044        // those should have been preprocessed
19045        if ($node->nodeType === XML_TEXT_NODE) {
19046            $data = $this->getData($node); // Handle variable data property
19047            if ($data !== null) {
19048              $tokens[] = $this->factory->createText($data);
19049            }
19050            return false;
19051        } elseif ($node->nodeType === XML_CDATA_SECTION_NODE) {
19052            // undo libxml's special treatment of <script> and <style> tags
19053            $last = end($tokens);
19054            $data = $node->data;
19055            // (note $node->tagname is already normalized)
19056            if ($last instanceof HTMLPurifier_Token_Start && ($last->name == 'script' || $last->name == 'style')) {
19057                $new_data = trim($data);
19058                if (substr($new_data, 0, 4) === '<!--') {
19059                    $data = substr($new_data, 4);
19060                    if (substr($data, -3) === '-->') {
19061                        $data = substr($data, 0, -3);
19062                    } else {
19063                        // Highly suspicious! Not sure what to do...
19064                    }
19065                }
19066            }
19067            $tokens[] = $this->factory->createText($this->parseText($data, $config));
19068            return false;
19069        } elseif ($node->nodeType === XML_COMMENT_NODE) {
19070            // this is code is only invoked for comments in script/style in versions
19071            // of libxml pre-2.6.28 (regular comments, of course, are still
19072            // handled regularly)
19073            $tokens[] = $this->factory->createComment($node->data);
19074            return false;
19075        } elseif ($node->nodeType !== XML_ELEMENT_NODE) {
19076            // not-well tested: there may be other nodes we have to grab
19077            return false;
19078        }
19079        $attr = $node->hasAttributes() ? $this->transformAttrToAssoc($node->attributes) : array();
19080        $tag_name = $this->getTagName($node); // Handle variable tagName property
19081        if (empty($tag_name)) {
19082            return (bool) $node->childNodes->length;
19083        }
19084        // We still have to make sure that the element actually IS empty
19085        if (!$node->childNodes->length) {
19086            if ($collect) {
19087                $tokens[] = $this->factory->createEmpty($tag_name, $attr);
19088            }
19089            return false;
19090        } else {
19091            if ($collect) {
19092                $tokens[] = $this->factory->createStart($tag_name, $attr);
19093            }
19094            return true;
19095        }
19096    }
19097
19098    /**
19099     * @param DOMNode $node
19100     * @param HTMLPurifier_Token[] $tokens
19101     */
19102    protected function createEndNode($node, &$tokens)
19103    {
19104        $tag_name = $this->getTagName($node); // Handle variable tagName property
19105        $tokens[] = $this->factory->createEnd($tag_name);
19106    }
19107
19108    /**
19109     * Converts a DOMNamedNodeMap of DOMAttr objects into an assoc array.
19110     *
19111     * @param DOMNamedNodeMap $node_map DOMNamedNodeMap of DOMAttr objects.
19112     * @return array Associative array of attributes.
19113     */
19114    protected function transformAttrToAssoc($node_map)
19115    {
19116        // NamedNodeMap is documented very well, so we're using undocumented
19117        // features, namely, the fact that it implements Iterator and
19118        // has a ->length attribute
19119        if ($node_map->length === 0) {
19120            return array();
19121        }
19122        $array = array();
19123        foreach ($node_map as $attr) {
19124            $array[$attr->name] = $attr->value;
19125        }
19126        return $array;
19127    }
19128
19129    /**
19130     * An error handler that mutes all errors
19131     * @param int $errno
19132     * @param string $errstr
19133     */
19134    public function muteErrorHandler($errno, $errstr)
19135    {
19136    }
19137
19138    /**
19139     * Callback function for undoing escaping of stray angled brackets
19140     * in comments
19141     * @param array $matches
19142     * @return string
19143     */
19144    public function callbackUndoCommentSubst($matches)
19145    {
19146        return '<!--' . strtr($matches[1], array('&amp;' => '&', '&lt;' => '<')) . $matches[2];
19147    }
19148
19149    /**
19150     * Callback function that entity-izes ampersands in comments so that
19151     * callbackUndoCommentSubst doesn't clobber them
19152     * @param array $matches
19153     * @return string
19154     */
19155    public function callbackArmorCommentEntities($matches)
19156    {
19157        return '<!--' . str_replace('&', '&amp;', $matches[1]) . $matches[2];
19158    }
19159
19160    /**
19161     * Wraps an HTML fragment in the necessary HTML
19162     * @param string $html
19163     * @param HTMLPurifier_Config $config
19164     * @param HTMLPurifier_Context $context
19165     * @return string
19166     */
19167    protected function wrapHTML($html, $config, $context, $use_div = true)
19168    {
19169        $def = $config->getDefinition('HTML');
19170        $ret = '';
19171
19172        if (!empty($def->doctype->dtdPublic) || !empty($def->doctype->dtdSystem)) {
19173            $ret .= '<!DOCTYPE html ';
19174            if (!empty($def->doctype->dtdPublic)) {
19175                $ret .= 'PUBLIC "' . $def->doctype->dtdPublic . '" ';
19176            }
19177            if (!empty($def->doctype->dtdSystem)) {
19178                $ret .= '"' . $def->doctype->dtdSystem . '" ';
19179            }
19180            $ret .= '>';
19181        }
19182
19183        $ret .= '<html><head>';
19184        $ret .= '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />';
19185        // No protection if $html contains a stray </div>!
19186        $ret .= '</head><body>';
19187        if ($use_div) $ret .= '<div>';
19188        $ret .= $html;
19189        if ($use_div) $ret .= '</div>';
19190        $ret .= '</body></html>';
19191        return $ret;
19192    }
19193}
19194
19195
19196
19197
19198
19199/**
19200 * Our in-house implementation of a parser.
19201 *
19202 * A pure PHP parser, DirectLex has absolutely no dependencies, making
19203 * it a reasonably good default for PHP4.  Written with efficiency in mind,
19204 * it can be four times faster than HTMLPurifier_Lexer_PEARSax3, although it
19205 * pales in comparison to HTMLPurifier_Lexer_DOMLex.
19206 *
19207 * @todo Reread XML spec and document differences.
19208 */
19209class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
19210{
19211    /**
19212     * @type bool
19213     */
19214    public $tracksLineNumbers = true;
19215
19216    /**
19217     * Whitespace characters for str(c)spn.
19218     * @type string
19219     */
19220    protected $_whitespace = "\x20\x09\x0D\x0A";
19221
19222    /**
19223     * Callback function for script CDATA fudge
19224     * @param array $matches, in form of array(opening tag, contents, closing tag)
19225     * @return string
19226     */
19227    protected function scriptCallback($matches)
19228    {
19229        return $matches[1] . htmlspecialchars($matches[2], ENT_COMPAT, 'UTF-8') . $matches[3];
19230    }
19231
19232    /**
19233     * @param String $html
19234     * @param HTMLPurifier_Config $config
19235     * @param HTMLPurifier_Context $context
19236     * @return array|HTMLPurifier_Token[]
19237     */
19238    public function tokenizeHTML($html, $config, $context)
19239    {
19240        // special normalization for script tags without any armor
19241        // our "armor" heurstic is a < sign any number of whitespaces after
19242        // the first script tag
19243        if ($config->get('HTML.Trusted')) {
19244            $html = preg_replace_callback(
19245                '#(<script[^>]*>)(\s*[^<].+?)(</script>)#si',
19246                array($this, 'scriptCallback'),
19247                $html
19248            );
19249        }
19250
19251        $html = $this->normalize($html, $config, $context);
19252
19253        $cursor = 0; // our location in the text
19254        $inside_tag = false; // whether or not we're parsing the inside of a tag
19255        $array = array(); // result array
19256
19257        // This is also treated to mean maintain *column* numbers too
19258        $maintain_line_numbers = $config->get('Core.MaintainLineNumbers');
19259
19260        if ($maintain_line_numbers === null) {
19261            // automatically determine line numbering by checking
19262            // if error collection is on
19263            $maintain_line_numbers = $config->get('Core.CollectErrors');
19264        }
19265
19266        if ($maintain_line_numbers) {
19267            $current_line = 1;
19268            $current_col = 0;
19269            $length = strlen($html);
19270        } else {
19271            $current_line = false;
19272            $current_col = false;
19273            $length = false;
19274        }
19275        $context->register('CurrentLine', $current_line);
19276        $context->register('CurrentCol', $current_col);
19277        $nl = "\n";
19278        // how often to manually recalculate. This will ALWAYS be right,
19279        // but it's pretty wasteful. Set to 0 to turn off
19280        $synchronize_interval = $config->get('Core.DirectLexLineNumberSyncInterval');
19281
19282        $e = false;
19283        if ($config->get('Core.CollectErrors')) {
19284            $e =& $context->get('ErrorCollector');
19285        }
19286
19287        // for testing synchronization
19288        $loops = 0;
19289
19290        while (++$loops) {
19291            // $cursor is either at the start of a token, or inside of
19292            // a tag (i.e. there was a < immediately before it), as indicated
19293            // by $inside_tag
19294
19295            if ($maintain_line_numbers) {
19296                // $rcursor, however, is always at the start of a token.
19297                $rcursor = $cursor - (int)$inside_tag;
19298
19299                // Column number is cheap, so we calculate it every round.
19300                // We're interested at the *end* of the newline string, so
19301                // we need to add strlen($nl) == 1 to $nl_pos before subtracting it
19302                // from our "rcursor" position.
19303                $nl_pos = strrpos($html, $nl, $rcursor - $length);
19304                $current_col = $rcursor - (is_bool($nl_pos) ? 0 : $nl_pos + 1);
19305
19306                // recalculate lines
19307                if ($synchronize_interval && // synchronization is on
19308                    $cursor > 0 && // cursor is further than zero
19309                    $loops % $synchronize_interval === 0) { // time to synchronize!
19310                    $current_line = 1 + $this->substrCount($html, $nl, 0, $cursor);
19311                }
19312            }
19313
19314            $position_next_lt = strpos($html, '<', $cursor);
19315            $position_next_gt = strpos($html, '>', $cursor);
19316
19317            // triggers on "<b>asdf</b>" but not "asdf <b></b>"
19318            // special case to set up context
19319            if ($position_next_lt === $cursor) {
19320                $inside_tag = true;
19321                $cursor++;
19322            }
19323
19324            if (!$inside_tag && $position_next_lt !== false) {
19325                // We are not inside tag and there still is another tag to parse
19326                $token = new
19327                HTMLPurifier_Token_Text(
19328                    $this->parseText(
19329                        substr(
19330                            $html,
19331                            $cursor,
19332                            $position_next_lt - $cursor
19333                        ), $config
19334                    )
19335                );
19336                if ($maintain_line_numbers) {
19337                    $token->rawPosition($current_line, $current_col);
19338                    $current_line += $this->substrCount($html, $nl, $cursor, $position_next_lt - $cursor);
19339                }
19340                $array[] = $token;
19341                $cursor = $position_next_lt + 1;
19342                $inside_tag = true;
19343                continue;
19344            } elseif (!$inside_tag) {
19345                // We are not inside tag but there are no more tags
19346                // If we're already at the end, break
19347                if ($cursor === strlen($html)) {
19348                    break;
19349                }
19350                // Create Text of rest of string
19351                $token = new
19352                HTMLPurifier_Token_Text(
19353                    $this->parseText(
19354                        substr(
19355                            $html,
19356                            $cursor
19357                        ), $config
19358                    )
19359                );
19360                if ($maintain_line_numbers) {
19361                    $token->rawPosition($current_line, $current_col);
19362                }
19363                $array[] = $token;
19364                break;
19365            } elseif ($inside_tag && $position_next_gt !== false) {
19366                // We are in tag and it is well formed
19367                // Grab the internals of the tag
19368                $strlen_segment = $position_next_gt - $cursor;
19369
19370                if ($strlen_segment < 1) {
19371                    // there's nothing to process!
19372                    $token = new HTMLPurifier_Token_Text('<');
19373                    $cursor++;
19374                    continue;
19375                }
19376
19377                $segment = substr($html, $cursor, $strlen_segment);
19378
19379                if ($segment === false) {
19380                    // somehow, we attempted to access beyond the end of
19381                    // the string, defense-in-depth, reported by Nate Abele
19382                    break;
19383                }
19384
19385                // Check if it's a comment
19386                if (substr($segment, 0, 3) === '!--') {
19387                    // re-determine segment length, looking for -->
19388                    $position_comment_end = strpos($html, '-->', $cursor);
19389                    if ($position_comment_end === false) {
19390                        // uh oh, we have a comment that extends to
19391                        // infinity. Can't be helped: set comment
19392                        // end position to end of string
19393                        if ($e) {
19394                            $e->send(E_WARNING, 'Lexer: Unclosed comment');
19395                        }
19396                        $position_comment_end = strlen($html);
19397                        $end = true;
19398                    } else {
19399                        $end = false;
19400                    }
19401                    $strlen_segment = $position_comment_end - $cursor;
19402                    $segment = substr($html, $cursor, $strlen_segment);
19403                    $token = new
19404                    HTMLPurifier_Token_Comment(
19405                        substr(
19406                            $segment,
19407                            3,
19408                            $strlen_segment - 3
19409                        )
19410                    );
19411                    if ($maintain_line_numbers) {
19412                        $token->rawPosition($current_line, $current_col);
19413                        $current_line += $this->substrCount($html, $nl, $cursor, $strlen_segment);
19414                    }
19415                    $array[] = $token;
19416                    $cursor = $end ? $position_comment_end : $position_comment_end + 3;
19417                    $inside_tag = false;
19418                    continue;
19419                }
19420
19421                // Check if it's an end tag
19422                $is_end_tag = (strpos($segment, '/') === 0);
19423                if ($is_end_tag) {
19424                    $type = substr($segment, 1);
19425                    $token = new HTMLPurifier_Token_End($type);
19426                    if ($maintain_line_numbers) {
19427                        $token->rawPosition($current_line, $current_col);
19428                        $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
19429                    }
19430                    $array[] = $token;
19431                    $inside_tag = false;
19432                    $cursor = $position_next_gt + 1;
19433                    continue;
19434                }
19435
19436                // Check leading character is alnum, if not, we may
19437                // have accidently grabbed an emoticon. Translate into
19438                // text and go our merry way
19439                if (!ctype_alpha($segment[0])) {
19440                    // XML:  $segment[0] !== '_' && $segment[0] !== ':'
19441                    if ($e) {
19442                        $e->send(E_NOTICE, 'Lexer: Unescaped lt');
19443                    }
19444                    $token = new HTMLPurifier_Token_Text('<');
19445                    if ($maintain_line_numbers) {
19446                        $token->rawPosition($current_line, $current_col);
19447                        $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
19448                    }
19449                    $array[] = $token;
19450                    $inside_tag = false;
19451                    continue;
19452                }
19453
19454                // Check if it is explicitly self closing, if so, remove
19455                // trailing slash. Remember, we could have a tag like <br>, so
19456                // any later token processing scripts must convert improperly
19457                // classified EmptyTags from StartTags.
19458                $is_self_closing = (strrpos($segment, '/') === $strlen_segment - 1);
19459                if ($is_self_closing) {
19460                    $strlen_segment--;
19461                    $segment = substr($segment, 0, $strlen_segment);
19462                }
19463
19464                // Check if there are any attributes
19465                $position_first_space = strcspn($segment, $this->_whitespace);
19466
19467                if ($position_first_space >= $strlen_segment) {
19468                    if ($is_self_closing) {
19469                        $token = new HTMLPurifier_Token_Empty($segment);
19470                    } else {
19471                        $token = new HTMLPurifier_Token_Start($segment);
19472                    }
19473                    if ($maintain_line_numbers) {
19474                        $token->rawPosition($current_line, $current_col);
19475                        $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
19476                    }
19477                    $array[] = $token;
19478                    $inside_tag = false;
19479                    $cursor = $position_next_gt + 1;
19480                    continue;
19481                }
19482
19483                // Grab out all the data
19484                $type = substr($segment, 0, $position_first_space);
19485                $attribute_string =
19486                    trim(
19487                        substr(
19488                            $segment,
19489                            $position_first_space
19490                        )
19491                    );
19492                if ($attribute_string) {
19493                    $attr = $this->parseAttributeString(
19494                        $attribute_string,
19495                        $config,
19496                        $context
19497                    );
19498                } else {
19499                    $attr = array();
19500                }
19501
19502                if ($is_self_closing) {
19503                    $token = new HTMLPurifier_Token_Empty($type, $attr);
19504                } else {
19505                    $token = new HTMLPurifier_Token_Start($type, $attr);
19506                }
19507                if ($maintain_line_numbers) {
19508                    $token->rawPosition($current_line, $current_col);
19509                    $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
19510                }
19511                $array[] = $token;
19512                $cursor = $position_next_gt + 1;
19513                $inside_tag = false;
19514                continue;
19515            } else {
19516                // inside tag, but there's no ending > sign
19517                if ($e) {
19518                    $e->send(E_WARNING, 'Lexer: Missing gt');
19519                }
19520                $token = new
19521                HTMLPurifier_Token_Text(
19522                    '<' .
19523                    $this->parseText(
19524                        substr($html, $cursor), $config
19525                    )
19526                );
19527                if ($maintain_line_numbers) {
19528                    $token->rawPosition($current_line, $current_col);
19529                }
19530                // no cursor scroll? Hmm...
19531                $array[] = $token;
19532                break;
19533            }
19534            break;
19535        }
19536
19537        $context->destroy('CurrentLine');
19538        $context->destroy('CurrentCol');
19539        return $array;
19540    }
19541
19542    /**
19543     * PHP 5.0.x compatible substr_count that implements offset and length
19544     * @param string $haystack
19545     * @param string $needle
19546     * @param int $offset
19547     * @param int $length
19548     * @return int
19549     */
19550    protected function substrCount($haystack, $needle, $offset, $length)
19551    {
19552        static $oldVersion;
19553        if ($oldVersion === null) {
19554            $oldVersion = version_compare(PHP_VERSION, '5.1', '<');
19555        }
19556        if ($oldVersion) {
19557            $haystack = substr($haystack, $offset, $length);
19558            return substr_count($haystack, $needle);
19559        } else {
19560            return substr_count($haystack, $needle, $offset, $length);
19561        }
19562    }
19563
19564    /**
19565     * Takes the inside of an HTML tag and makes an assoc array of attributes.
19566     *
19567     * @param string $string Inside of tag excluding name.
19568     * @param HTMLPurifier_Config $config
19569     * @param HTMLPurifier_Context $context
19570     * @return array Assoc array of attributes.
19571     */
19572    public function parseAttributeString($string, $config, $context)
19573    {
19574        $string = (string)$string; // quick typecast
19575
19576        if ($string == '') {
19577            return array();
19578        } // no attributes
19579
19580        $e = false;
19581        if ($config->get('Core.CollectErrors')) {
19582            $e =& $context->get('ErrorCollector');
19583        }
19584
19585        // let's see if we can abort as quickly as possible
19586        // one equal sign, no spaces => one attribute
19587        $num_equal = substr_count($string, '=');
19588        $has_space = strpos($string, ' ');
19589        if ($num_equal === 0 && !$has_space) {
19590            // bool attribute
19591            return array($string => $string);
19592        } elseif ($num_equal === 1 && !$has_space) {
19593            // only one attribute
19594            list($key, $quoted_value) = explode('=', $string);
19595            $quoted_value = trim($quoted_value);
19596            if (!$key) {
19597                if ($e) {
19598                    $e->send(E_ERROR, 'Lexer: Missing attribute key');
19599                }
19600                return array();
19601            }
19602            if (!$quoted_value) {
19603                return array($key => '');
19604            }
19605            $first_char = @$quoted_value[0];
19606            $last_char = @$quoted_value[strlen($quoted_value) - 1];
19607
19608            $same_quote = ($first_char == $last_char);
19609            $open_quote = ($first_char == '"' || $first_char == "'");
19610
19611            if ($same_quote && $open_quote) {
19612                // well behaved
19613                $value = substr($quoted_value, 1, strlen($quoted_value) - 2);
19614            } else {
19615                // not well behaved
19616                if ($open_quote) {
19617                    if ($e) {
19618                        $e->send(E_ERROR, 'Lexer: Missing end quote');
19619                    }
19620                    $value = substr($quoted_value, 1);
19621                } else {
19622                    $value = $quoted_value;
19623                }
19624            }
19625            if ($value === false) {
19626                $value = '';
19627            }
19628            return array($key => $this->parseAttr($value, $config));
19629        }
19630
19631        // setup loop environment
19632        $array = array(); // return assoc array of attributes
19633        $cursor = 0; // current position in string (moves forward)
19634        $size = strlen($string); // size of the string (stays the same)
19635
19636        // if we have unquoted attributes, the parser expects a terminating
19637        // space, so let's guarantee that there's always a terminating space.
19638        $string .= ' ';
19639
19640        $old_cursor = -1;
19641        while ($cursor < $size) {
19642            if ($old_cursor >= $cursor) {
19643                throw new Exception("Infinite loop detected");
19644            }
19645            $old_cursor = $cursor;
19646
19647            $cursor += ($value = strspn($string, $this->_whitespace, $cursor));
19648            // grab the key
19649
19650            $key_begin = $cursor; //we're currently at the start of the key
19651
19652            // scroll past all characters that are the key (not whitespace or =)
19653            $cursor += strcspn($string, $this->_whitespace . '=', $cursor);
19654
19655            $key_end = $cursor; // now at the end of the key
19656
19657            $key = substr($string, $key_begin, $key_end - $key_begin);
19658
19659            if (!$key) {
19660                if ($e) {
19661                    $e->send(E_ERROR, 'Lexer: Missing attribute key');
19662                }
19663                $cursor += 1 + strcspn($string, $this->_whitespace, $cursor + 1); // prevent infinite loop
19664                continue; // empty key
19665            }
19666
19667            // scroll past all whitespace
19668            $cursor += strspn($string, $this->_whitespace, $cursor);
19669
19670            if ($cursor >= $size) {
19671                $array[$key] = $key;
19672                break;
19673            }
19674
19675            // if the next character is an equal sign, we've got a regular
19676            // pair, otherwise, it's a bool attribute
19677            $first_char = @$string[$cursor];
19678
19679            if ($first_char == '=') {
19680                // key="value"
19681
19682                $cursor++;
19683                $cursor += strspn($string, $this->_whitespace, $cursor);
19684
19685                if ($cursor === false) {
19686                    $array[$key] = '';
19687                    break;
19688                }
19689
19690                // we might be in front of a quote right now
19691
19692                $char = @$string[$cursor];
19693
19694                if ($char == '"' || $char == "'") {
19695                    // it's quoted, end bound is $char
19696                    $cursor++;
19697                    $value_begin = $cursor;
19698                    $cursor = strpos($string, $char, $cursor);
19699                    $value_end = $cursor;
19700                } else {
19701                    // it's not quoted, end bound is whitespace
19702                    $value_begin = $cursor;
19703                    $cursor += strcspn($string, $this->_whitespace, $cursor);
19704                    $value_end = $cursor;
19705                }
19706
19707                // we reached a premature end
19708                if ($cursor === false) {
19709                    $cursor = $size;
19710                    $value_end = $cursor;
19711                }
19712
19713                $value = substr($string, $value_begin, $value_end - $value_begin);
19714                if ($value === false) {
19715                    $value = '';
19716                }
19717                $array[$key] = $this->parseAttr($value, $config);
19718                $cursor++;
19719            } else {
19720                // boolattr
19721                if ($key !== '') {
19722                    $array[$key] = $key;
19723                } else {
19724                    // purely theoretical
19725                    if ($e) {
19726                        $e->send(E_ERROR, 'Lexer: Missing attribute key');
19727                    }
19728                }
19729            }
19730        }
19731        return $array;
19732    }
19733}
19734
19735
19736
19737
19738
19739/**
19740 * Concrete comment node class.
19741 */
19742class HTMLPurifier_Node_Comment extends HTMLPurifier_Node
19743{
19744    /**
19745     * Character data within comment.
19746     * @type string
19747     */
19748    public $data;
19749
19750    /**
19751     * @type bool
19752     */
19753    public $is_whitespace = true;
19754
19755    /**
19756     * Transparent constructor.
19757     *
19758     * @param string $data String comment data.
19759     * @param int $line
19760     * @param int $col
19761     */
19762    public function __construct($data, $line = null, $col = null)
19763    {
19764        $this->data = $data;
19765        $this->line = $line;
19766        $this->col = $col;
19767    }
19768
19769    public function toTokenPair() {
19770        return array(new HTMLPurifier_Token_Comment($this->data, $this->line, $this->col), null);
19771    }
19772}
19773
19774
19775
19776/**
19777 * Concrete element node class.
19778 */
19779class HTMLPurifier_Node_Element extends HTMLPurifier_Node
19780{
19781    /**
19782     * The lower-case name of the tag, like 'a', 'b' or 'blockquote'.
19783     *
19784     * @note Strictly speaking, XML tags are case sensitive, so we shouldn't
19785     * be lower-casing them, but these tokens cater to HTML tags, which are
19786     * insensitive.
19787     * @type string
19788     */
19789    public $name;
19790
19791    /**
19792     * Associative array of the node's attributes.
19793     * @type array
19794     */
19795    public $attr = array();
19796
19797    /**
19798     * List of child elements.
19799     * @type array
19800     */
19801    public $children = array();
19802
19803    /**
19804     * Does this use the <a></a> form or the </a> form, i.e.
19805     * is it a pair of start/end tokens or an empty token.
19806     * @bool
19807     */
19808    public $empty = false;
19809
19810    public $endCol = null, $endLine = null, $endArmor = array();
19811
19812    public function __construct($name, $attr = array(), $line = null, $col = null, $armor = array()) {
19813        $this->name = $name;
19814        $this->attr = $attr;
19815        $this->line = $line;
19816        $this->col = $col;
19817        $this->armor = $armor;
19818    }
19819
19820    public function toTokenPair() {
19821        // XXX inefficiency here, normalization is not necessary
19822        if ($this->empty) {
19823            return array(new HTMLPurifier_Token_Empty($this->name, $this->attr, $this->line, $this->col, $this->armor), null);
19824        } else {
19825            $start = new HTMLPurifier_Token_Start($this->name, $this->attr, $this->line, $this->col, $this->armor);
19826            $end = new HTMLPurifier_Token_End($this->name, array(), $this->endLine, $this->endCol, $this->endArmor);
19827            //$end->start = $start;
19828            return array($start, $end);
19829        }
19830    }
19831}
19832
19833
19834
19835
19836/**
19837 * Concrete text token class.
19838 *
19839 * Text tokens comprise of regular parsed character data (PCDATA) and raw
19840 * character data (from the CDATA sections). Internally, their
19841 * data is parsed with all entities expanded. Surprisingly, the text token
19842 * does have a "tag name" called #PCDATA, which is how the DTD represents it
19843 * in permissible child nodes.
19844 */
19845class HTMLPurifier_Node_Text extends HTMLPurifier_Node
19846{
19847
19848    /**
19849     * PCDATA tag name compatible with DTD, see
19850     * HTMLPurifier_ChildDef_Custom for details.
19851     * @type string
19852     */
19853    public $name = '#PCDATA';
19854
19855    /**
19856     * @type string
19857     */
19858    public $data;
19859    /**< Parsed character data of text. */
19860
19861    /**
19862     * @type bool
19863     */
19864    public $is_whitespace;
19865
19866    /**< Bool indicating if node is whitespace. */
19867
19868    /**
19869     * Constructor, accepts data and determines if it is whitespace.
19870     * @param string $data String parsed character data.
19871     * @param int $line
19872     * @param int $col
19873     */
19874    public function __construct($data, $is_whitespace, $line = null, $col = null)
19875    {
19876        $this->data = $data;
19877        $this->is_whitespace = $is_whitespace;
19878        $this->line = $line;
19879        $this->col = $col;
19880    }
19881
19882    public function toTokenPair() {
19883        return array(new HTMLPurifier_Token_Text($this->data, $this->line, $this->col), null);
19884    }
19885}
19886
19887
19888
19889
19890
19891/**
19892 * Composite strategy that runs multiple strategies on tokens.
19893 */
19894abstract class HTMLPurifier_Strategy_Composite extends HTMLPurifier_Strategy
19895{
19896
19897    /**
19898     * List of strategies to run tokens through.
19899     * @type HTMLPurifier_Strategy[]
19900     */
19901    protected $strategies = array();
19902
19903    /**
19904     * @param HTMLPurifier_Token[] $tokens
19905     * @param HTMLPurifier_Config $config
19906     * @param HTMLPurifier_Context $context
19907     * @return HTMLPurifier_Token[]
19908     */
19909    public function execute($tokens, $config, $context)
19910    {
19911        foreach ($this->strategies as $strategy) {
19912            $tokens = $strategy->execute($tokens, $config, $context);
19913        }
19914        return $tokens;
19915    }
19916}
19917
19918
19919
19920
19921
19922/**
19923 * Core strategy composed of the big four strategies.
19924 */
19925class HTMLPurifier_Strategy_Core extends HTMLPurifier_Strategy_Composite
19926{
19927    public function __construct()
19928    {
19929        $this->strategies[] = new HTMLPurifier_Strategy_RemoveForeignElements();
19930        $this->strategies[] = new HTMLPurifier_Strategy_MakeWellFormed();
19931        $this->strategies[] = new HTMLPurifier_Strategy_FixNesting();
19932        $this->strategies[] = new HTMLPurifier_Strategy_ValidateAttributes();
19933    }
19934}
19935
19936
19937
19938
19939
19940/**
19941 * Takes a well formed list of tokens and fixes their nesting.
19942 *
19943 * HTML elements dictate which elements are allowed to be their children,
19944 * for example, you can't have a p tag in a span tag.  Other elements have
19945 * much more rigorous definitions: tables, for instance, require a specific
19946 * order for their elements.  There are also constraints not expressible by
19947 * document type definitions, such as the chameleon nature of ins/del
19948 * tags and global child exclusions.
19949 *
19950 * The first major objective of this strategy is to iterate through all
19951 * the nodes and determine whether or not their children conform to the
19952 * element's definition.  If they do not, the child definition may
19953 * optionally supply an amended list of elements that is valid or
19954 * require that the entire node be deleted (and the previous node
19955 * rescanned).
19956 *
19957 * The second objective is to ensure that explicitly excluded elements of
19958 * an element do not appear in its children.  Code that accomplishes this
19959 * task is pervasive through the strategy, though the two are distinct tasks
19960 * and could, theoretically, be seperated (although it's not recommended).
19961 *
19962 * @note Whether or not unrecognized children are silently dropped or
19963 *       translated into text depends on the child definitions.
19964 *
19965 * @todo Enable nodes to be bubbled out of the structure.  This is
19966 *       easier with our new algorithm.
19967 */
19968
19969class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
19970{
19971
19972    /**
19973     * @param HTMLPurifier_Token[] $tokens
19974     * @param HTMLPurifier_Config $config
19975     * @param HTMLPurifier_Context $context
19976     * @return array|HTMLPurifier_Token[]
19977     */
19978    public function execute($tokens, $config, $context)
19979    {
19980
19981        //####################################################################//
19982        // Pre-processing
19983
19984        // O(n) pass to convert to a tree, so that we can efficiently
19985        // refer to substrings
19986        $top_node = HTMLPurifier_Arborize::arborize($tokens, $config, $context);
19987
19988        // get a copy of the HTML definition
19989        $definition = $config->getHTMLDefinition();
19990
19991        $excludes_enabled = !$config->get('Core.DisableExcludes');
19992
19993        // setup the context variable 'IsInline', for chameleon processing
19994        // is 'false' when we are not inline, 'true' when it must always
19995        // be inline, and an integer when it is inline for a certain
19996        // branch of the document tree
19997        $is_inline = $definition->info_parent_def->descendants_are_inline;
19998        $context->register('IsInline', $is_inline);
19999
20000        // setup error collector
20001        $e =& $context->get('ErrorCollector', true);
20002
20003        //####################################################################//
20004        // Loop initialization
20005
20006        // stack that contains all elements that are excluded
20007        // it is organized by parent elements, similar to $stack,
20008        // but it is only populated when an element with exclusions is
20009        // processed, i.e. there won't be empty exclusions.
20010        $exclude_stack = array($definition->info_parent_def->excludes);
20011
20012        // variable that contains the start token while we are processing
20013        // nodes. This enables error reporting to do its job
20014        $node = $top_node;
20015        // dummy token
20016        list($token, $d) = $node->toTokenPair();
20017        $context->register('CurrentNode', $node);
20018        $context->register('CurrentToken', $token);
20019
20020        //####################################################################//
20021        // Loop
20022
20023        // We need to implement a post-order traversal iteratively, to
20024        // avoid running into stack space limits.  This is pretty tricky
20025        // to reason about, so we just manually stack-ify the recursive
20026        // variant:
20027        //
20028        //  function f($node) {
20029        //      foreach ($node->children as $child) {
20030        //          f($child);
20031        //      }
20032        //      validate($node);
20033        //  }
20034        //
20035        // Thus, we will represent a stack frame as array($node,
20036        // $is_inline, stack of children)
20037        // e.g. array_reverse($node->children) - already processed
20038        // children.
20039
20040        $parent_def = $definition->info_parent_def;
20041        $stack = array(
20042            array($top_node,
20043                $parent_def->descendants_are_inline,
20044                $parent_def->excludes, // exclusions
20045                0)
20046        );
20047
20048        while (!empty($stack)) {
20049            list($node, $is_inline, $excludes, $ix) = array_pop($stack);
20050            // recursive call
20051            $go = false;
20052            $def = empty($stack) ? $definition->info_parent_def : $definition->info[$node->name];
20053            while (isset($node->children[$ix])) {
20054                $child = $node->children[$ix++];
20055                if ($child instanceof HTMLPurifier_Node_Element) {
20056                    $go = true;
20057                    $stack[] = array($node, $is_inline, $excludes, $ix);
20058                    $stack[] = array($child,
20059                        // ToDo: I don't think it matters if it's def or
20060                        // child_def, but double check this...
20061                        $is_inline || $def->descendants_are_inline,
20062                        empty($def->excludes) ? $excludes
20063                            : array_merge($excludes, $def->excludes),
20064                        0);
20065                    break;
20066                }
20067            };
20068            if ($go) continue;
20069            list($token, $d) = $node->toTokenPair();
20070            // base case
20071            if ($excludes_enabled && isset($excludes[$node->name])) {
20072                $node->dead = true;
20073                if ($e) $e->send(E_ERROR, 'Strategy_FixNesting: Node excluded');
20074            } else {
20075                // XXX I suppose it would be slightly more efficient to
20076                // avoid the allocation here and have children
20077                // strategies handle it
20078                $children = array();
20079                foreach ($node->children as $child) {
20080                    if (!$child->dead) $children[] = $child;
20081                }
20082                $result = $def->child->validateChildren($children, $config, $context);
20083                if ($result === true) {
20084                    // nop
20085                    $node->children = $children;
20086                } elseif ($result === false) {
20087                    $node->dead = true;
20088                    if ($e) $e->send(E_ERROR, 'Strategy_FixNesting: Node removed');
20089                } else {
20090                    $node->children = $result;
20091                    if ($e) {
20092                        // XXX This will miss mutations of internal nodes. Perhaps defer to the child validators
20093                        if (empty($result) && !empty($children)) {
20094                            $e->send(E_ERROR, 'Strategy_FixNesting: Node contents removed');
20095                        } else if ($result != $children) {
20096                            $e->send(E_WARNING, 'Strategy_FixNesting: Node reorganized');
20097                        }
20098                    }
20099                }
20100            }
20101        }
20102
20103        //####################################################################//
20104        // Post-processing
20105
20106        // remove context variables
20107        $context->destroy('IsInline');
20108        $context->destroy('CurrentNode');
20109        $context->destroy('CurrentToken');
20110
20111        //####################################################################//
20112        // Return
20113
20114        return HTMLPurifier_Arborize::flatten($node, $config, $context);
20115    }
20116}
20117
20118
20119
20120
20121
20122/**
20123 * Takes tokens makes them well-formed (balance end tags, etc.)
20124 *
20125 * Specification of the armor attributes this strategy uses:
20126 *
20127 *      - MakeWellFormed_TagClosedError: This armor field is used to
20128 *        suppress tag closed errors for certain tokens [TagClosedSuppress],
20129 *        in particular, if a tag was generated automatically by HTML
20130 *        Purifier, we may rely on our infrastructure to close it for us
20131 *        and shouldn't report an error to the user [TagClosedAuto].
20132 */
20133class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
20134{
20135
20136    /**
20137     * Array stream of tokens being processed.
20138     * @type HTMLPurifier_Token[]
20139     */
20140    protected $tokens;
20141
20142    /**
20143     * Current token.
20144     * @type HTMLPurifier_Token
20145     */
20146    protected $token;
20147
20148    /**
20149     * Zipper managing the true state.
20150     * @type HTMLPurifier_Zipper
20151     */
20152    protected $zipper;
20153
20154    /**
20155     * Current nesting of elements.
20156     * @type array
20157     */
20158    protected $stack;
20159
20160    /**
20161     * Injectors active in this stream processing.
20162     * @type HTMLPurifier_Injector[]
20163     */
20164    protected $injectors;
20165
20166    /**
20167     * Current instance of HTMLPurifier_Config.
20168     * @type HTMLPurifier_Config
20169     */
20170    protected $config;
20171
20172    /**
20173     * Current instance of HTMLPurifier_Context.
20174     * @type HTMLPurifier_Context
20175     */
20176    protected $context;
20177
20178    /**
20179     * @param HTMLPurifier_Token[] $tokens
20180     * @param HTMLPurifier_Config $config
20181     * @param HTMLPurifier_Context $context
20182     * @return HTMLPurifier_Token[]
20183     * @throws HTMLPurifier_Exception
20184     */
20185    public function execute($tokens, $config, $context)
20186    {
20187        $definition = $config->getHTMLDefinition();
20188
20189        // local variables
20190        $generator = new HTMLPurifier_Generator($config, $context);
20191        $escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
20192        // used for autoclose early abortion
20193        $global_parent_allowed_elements = $definition->info_parent_def->child->getAllowedElements($config);
20194        $e = $context->get('ErrorCollector', true);
20195        $i = false; // injector index
20196        list($zipper, $token) = HTMLPurifier_Zipper::fromArray($tokens);
20197        if ($token === NULL) {
20198            return array();
20199        }
20200        $reprocess = false; // whether or not to reprocess the same token
20201        $stack = array();
20202
20203        // member variables
20204        $this->stack =& $stack;
20205        $this->tokens =& $tokens;
20206        $this->token =& $token;
20207        $this->zipper =& $zipper;
20208        $this->config = $config;
20209        $this->context = $context;
20210
20211        // context variables
20212        $context->register('CurrentNesting', $stack);
20213        $context->register('InputZipper', $zipper);
20214        $context->register('CurrentToken', $token);
20215
20216        // -- begin INJECTOR --
20217
20218        $this->injectors = array();
20219
20220        $injectors = $config->getBatch('AutoFormat');
20221        $def_injectors = $definition->info_injector;
20222        $custom_injectors = $injectors['Custom'];
20223        unset($injectors['Custom']); // special case
20224        foreach ($injectors as $injector => $b) {
20225            // XXX: Fix with a legitimate lookup table of enabled filters
20226            if (strpos($injector, '.') !== false) {
20227                continue;
20228            }
20229            $injector = "HTMLPurifier_Injector_$injector";
20230            if (!$b) {
20231                continue;
20232            }
20233            $this->injectors[] = new $injector;
20234        }
20235        foreach ($def_injectors as $injector) {
20236            // assumed to be objects
20237            $this->injectors[] = $injector;
20238        }
20239        foreach ($custom_injectors as $injector) {
20240            if (!$injector) {
20241                continue;
20242            }
20243            if (is_string($injector)) {
20244                $injector = "HTMLPurifier_Injector_$injector";
20245                $injector = new $injector;
20246            }
20247            $this->injectors[] = $injector;
20248        }
20249
20250        // give the injectors references to the definition and context
20251        // variables for performance reasons
20252        foreach ($this->injectors as $ix => $injector) {
20253            $error = $injector->prepare($config, $context);
20254            if (!$error) {
20255                continue;
20256            }
20257            array_splice($this->injectors, $ix, 1); // rm the injector
20258            trigger_error("Cannot enable {$injector->name} injector because $error is not allowed", E_USER_WARNING);
20259        }
20260
20261        // -- end INJECTOR --
20262
20263        // a note on reprocessing:
20264        //      In order to reduce code duplication, whenever some code needs
20265        //      to make HTML changes in order to make things "correct", the
20266        //      new HTML gets sent through the purifier, regardless of its
20267        //      status. This means that if we add a start token, because it
20268        //      was totally necessary, we don't have to update nesting; we just
20269        //      punt ($reprocess = true; continue;) and it does that for us.
20270
20271        // isset is in loop because $tokens size changes during loop exec
20272        for (;;
20273            // only increment if we don't need to reprocess
20274            $reprocess ? $reprocess = false : $token = $zipper->next($token)) {
20275
20276            // check for a rewind
20277            if (is_int($i)) {
20278                // possibility: disable rewinding if the current token has a
20279                // rewind set on it already. This would offer protection from
20280                // infinite loop, but might hinder some advanced rewinding.
20281                $rewind_offset = $this->injectors[$i]->getRewindOffset();
20282                if (is_int($rewind_offset)) {
20283                    for ($j = 0; $j < $rewind_offset; $j++) {
20284                        if (empty($zipper->front)) break;
20285                        $token = $zipper->prev($token);
20286                        // indicate that other injectors should not process this token,
20287                        // but we need to reprocess it.  See Note [Injector skips]
20288                        unset($token->skip[$i]);
20289                        $token->rewind = $i;
20290                        if ($token instanceof HTMLPurifier_Token_Start) {
20291                            array_pop($this->stack);
20292                        } elseif ($token instanceof HTMLPurifier_Token_End) {
20293                            $this->stack[] = $token->start;
20294                        }
20295                    }
20296                }
20297                $i = false;
20298            }
20299
20300            // handle case of document end
20301            if ($token === NULL) {
20302                // kill processing if stack is empty
20303                if (empty($this->stack)) {
20304                    break;
20305                }
20306
20307                // peek
20308                $top_nesting = array_pop($this->stack);
20309                $this->stack[] = $top_nesting;
20310
20311                // send error [TagClosedSuppress]
20312                if ($e && !isset($top_nesting->armor['MakeWellFormed_TagClosedError'])) {
20313                    $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $top_nesting);
20314                }
20315
20316                // append, don't splice, since this is the end
20317                $token = new HTMLPurifier_Token_End($top_nesting->name);
20318
20319                // punt!
20320                $reprocess = true;
20321                continue;
20322            }
20323
20324            //echo '<br>'; printZipper($zipper, $token);//printTokens($this->stack);
20325            //flush();
20326
20327            // quick-check: if it's not a tag, no need to process
20328            if (empty($token->is_tag)) {
20329                if ($token instanceof HTMLPurifier_Token_Text) {
20330                    foreach ($this->injectors as $i => $injector) {
20331                        if (isset($token->skip[$i])) {
20332                            // See Note [Injector skips]
20333                            continue;
20334                        }
20335                        if ($token->rewind !== null && $token->rewind !== $i) {
20336                            continue;
20337                        }
20338                        // XXX fuckup
20339                        $r = $token;
20340                        $injector->handleText($r);
20341                        $token = $this->processToken($r, $i);
20342                        $reprocess = true;
20343                        break;
20344                    }
20345                }
20346                // another possibility is a comment
20347                continue;
20348            }
20349
20350            if (isset($definition->info[$token->name])) {
20351                $type = $definition->info[$token->name]->child->type;
20352            } else {
20353                $type = false; // Type is unknown, treat accordingly
20354            }
20355
20356            // quick tag checks: anything that's *not* an end tag
20357            $ok = false;
20358            if ($type === 'empty' && $token instanceof HTMLPurifier_Token_Start) {
20359                // claims to be a start tag but is empty
20360                $token = new HTMLPurifier_Token_Empty(
20361                    $token->name,
20362                    $token->attr,
20363                    $token->line,
20364                    $token->col,
20365                    $token->armor
20366                );
20367                $ok = true;
20368            } elseif ($type && $type !== 'empty' && $token instanceof HTMLPurifier_Token_Empty) {
20369                // claims to be empty but really is a start tag
20370                // NB: this assignment is required
20371                $old_token = $token;
20372                $token = new HTMLPurifier_Token_End($token->name);
20373                $token = $this->insertBefore(
20374                    new HTMLPurifier_Token_Start($old_token->name, $old_token->attr, $old_token->line, $old_token->col, $old_token->armor)
20375                );
20376                // punt (since we had to modify the input stream in a non-trivial way)
20377                $reprocess = true;
20378                continue;
20379            } elseif ($token instanceof HTMLPurifier_Token_Empty) {
20380                // real empty token
20381                $ok = true;
20382            } elseif ($token instanceof HTMLPurifier_Token_Start) {
20383                // start tag
20384
20385                // ...unless they also have to close their parent
20386                if (!empty($this->stack)) {
20387
20388                    // Performance note: you might think that it's rather
20389                    // inefficient, recalculating the autoclose information
20390                    // for every tag that a token closes (since when we
20391                    // do an autoclose, we push a new token into the
20392                    // stream and then /process/ that, before
20393                    // re-processing this token.)  But this is
20394                    // necessary, because an injector can make an
20395                    // arbitrary transformations to the autoclosing
20396                    // tokens we introduce, so things may have changed
20397                    // in the meantime.  Also, doing the inefficient thing is
20398                    // "easy" to reason about (for certain perverse definitions
20399                    // of "easy")
20400
20401                    $parent = array_pop($this->stack);
20402                    $this->stack[] = $parent;
20403
20404                    $parent_def = null;
20405                    $parent_elements = null;
20406                    $autoclose = false;
20407                    if (isset($definition->info[$parent->name])) {
20408                        $parent_def = $definition->info[$parent->name];
20409                        $parent_elements = $parent_def->child->getAllowedElements($config);
20410                        $autoclose = !isset($parent_elements[$token->name]);
20411                    }
20412
20413                    if ($autoclose && $definition->info[$token->name]->wrap) {
20414                        // Check if an element can be wrapped by another
20415                        // element to make it valid in a context (for
20416                        // example, <ul><ul> needs a <li> in between)
20417                        $wrapname = $definition->info[$token->name]->wrap;
20418                        $wrapdef = $definition->info[$wrapname];
20419                        $elements = $wrapdef->child->getAllowedElements($config);
20420                        if (isset($elements[$token->name]) && isset($parent_elements[$wrapname])) {
20421                            $newtoken = new HTMLPurifier_Token_Start($wrapname);
20422                            $token = $this->insertBefore($newtoken);
20423                            $reprocess = true;
20424                            continue;
20425                        }
20426                    }
20427
20428                    $carryover = false;
20429                    if ($autoclose && $parent_def->formatting) {
20430                        $carryover = true;
20431                    }
20432
20433                    if ($autoclose) {
20434                        // check if this autoclose is doomed to fail
20435                        // (this rechecks $parent, which his harmless)
20436                        $autoclose_ok = isset($global_parent_allowed_elements[$token->name]);
20437                        if (!$autoclose_ok) {
20438                            foreach ($this->stack as $ancestor) {
20439                                $elements = $definition->info[$ancestor->name]->child->getAllowedElements($config);
20440                                if (isset($elements[$token->name])) {
20441                                    $autoclose_ok = true;
20442                                    break;
20443                                }
20444                                if ($definition->info[$token->name]->wrap) {
20445                                    $wrapname = $definition->info[$token->name]->wrap;
20446                                    $wrapdef = $definition->info[$wrapname];
20447                                    $wrap_elements = $wrapdef->child->getAllowedElements($config);
20448                                    if (isset($wrap_elements[$token->name]) && isset($elements[$wrapname])) {
20449                                        $autoclose_ok = true;
20450                                        break;
20451                                    }
20452                                }
20453                            }
20454                        }
20455                        if ($autoclose_ok) {
20456                            // errors need to be updated
20457                            $new_token = new HTMLPurifier_Token_End($parent->name);
20458                            $new_token->start = $parent;
20459                            // [TagClosedSuppress]
20460                            if ($e && !isset($parent->armor['MakeWellFormed_TagClosedError'])) {
20461                                if (!$carryover) {
20462                                    $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
20463                                } else {
20464                                    $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $parent);
20465                                }
20466                            }
20467                            if ($carryover) {
20468                                $element = clone $parent;
20469                                // [TagClosedAuto]
20470                                $element->armor['MakeWellFormed_TagClosedError'] = true;
20471                                $element->carryover = true;
20472                                $token = $this->processToken(array($new_token, $token, $element));
20473                            } else {
20474                                $token = $this->insertBefore($new_token);
20475                            }
20476                        } else {
20477                            $token = $this->remove();
20478                        }
20479                        $reprocess = true;
20480                        continue;
20481                    }
20482
20483                }
20484                $ok = true;
20485            }
20486
20487            if ($ok) {
20488                foreach ($this->injectors as $i => $injector) {
20489                    if (isset($token->skip[$i])) {
20490                        // See Note [Injector skips]
20491                        continue;
20492                    }
20493                    if ($token->rewind !== null && $token->rewind !== $i) {
20494                        continue;
20495                    }
20496                    $r = $token;
20497                    $injector->handleElement($r);
20498                    $token = $this->processToken($r, $i);
20499                    $reprocess = true;
20500                    break;
20501                }
20502                if (!$reprocess) {
20503                    // ah, nothing interesting happened; do normal processing
20504                    if ($token instanceof HTMLPurifier_Token_Start) {
20505                        $this->stack[] = $token;
20506                    } elseif ($token instanceof HTMLPurifier_Token_End) {
20507                        throw new HTMLPurifier_Exception(
20508                            'Improper handling of end tag in start code; possible error in MakeWellFormed'
20509                        );
20510                    }
20511                }
20512                continue;
20513            }
20514
20515            // sanity check: we should be dealing with a closing tag
20516            if (!$token instanceof HTMLPurifier_Token_End) {
20517                throw new HTMLPurifier_Exception('Unaccounted for tag token in input stream, bug in HTML Purifier');
20518            }
20519
20520            // make sure that we have something open
20521            if (empty($this->stack)) {
20522                if ($escape_invalid_tags) {
20523                    if ($e) {
20524                        $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text');
20525                    }
20526                    $token = new HTMLPurifier_Token_Text($generator->generateFromToken($token));
20527                } else {
20528                    if ($e) {
20529                        $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed');
20530                    }
20531                    $token = $this->remove();
20532                }
20533                $reprocess = true;
20534                continue;
20535            }
20536
20537            // first, check for the simplest case: everything closes neatly.
20538            // Eventually, everything passes through here; if there are problems
20539            // we modify the input stream accordingly and then punt, so that
20540            // the tokens get processed again.
20541            $current_parent = array_pop($this->stack);
20542            if ($current_parent->name == $token->name) {
20543                $token->start = $current_parent;
20544                foreach ($this->injectors as $i => $injector) {
20545                    if (isset($token->skip[$i])) {
20546                        // See Note [Injector skips]
20547                        continue;
20548                    }
20549                    if ($token->rewind !== null && $token->rewind !== $i) {
20550                        continue;
20551                    }
20552                    $r = $token;
20553                    $injector->handleEnd($r);
20554                    $token = $this->processToken($r, $i);
20555                    $this->stack[] = $current_parent;
20556                    $reprocess = true;
20557                    break;
20558                }
20559                continue;
20560            }
20561
20562            // okay, so we're trying to close the wrong tag
20563
20564            // undo the pop previous pop
20565            $this->stack[] = $current_parent;
20566
20567            // scroll back the entire nest, trying to find our tag.
20568            // (feature could be to specify how far you'd like to go)
20569            $size = count($this->stack);
20570            // -2 because -1 is the last element, but we already checked that
20571            $skipped_tags = false;
20572            for ($j = $size - 2; $j >= 0; $j--) {
20573                if ($this->stack[$j]->name == $token->name) {
20574                    $skipped_tags = array_slice($this->stack, $j);
20575                    break;
20576                }
20577            }
20578
20579            // we didn't find the tag, so remove
20580            if ($skipped_tags === false) {
20581                if ($escape_invalid_tags) {
20582                    if ($e) {
20583                        $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text');
20584                    }
20585                    $token = new HTMLPurifier_Token_Text($generator->generateFromToken($token));
20586                } else {
20587                    if ($e) {
20588                        $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed');
20589                    }
20590                    $token = $this->remove();
20591                }
20592                $reprocess = true;
20593                continue;
20594            }
20595
20596            // do errors, in REVERSE $j order: a,b,c with </a></b></c>
20597            $c = count($skipped_tags);
20598            if ($e) {
20599                for ($j = $c - 1; $j > 0; $j--) {
20600                    // notice we exclude $j == 0, i.e. the current ending tag, from
20601                    // the errors... [TagClosedSuppress]
20602                    if (!isset($skipped_tags[$j]->armor['MakeWellFormed_TagClosedError'])) {
20603                        $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$j]);
20604                    }
20605                }
20606            }
20607
20608            // insert tags, in FORWARD $j order: c,b,a with </a></b></c>
20609            $replace = array($token);
20610            for ($j = 1; $j < $c; $j++) {
20611                // ...as well as from the insertions
20612                $new_token = new HTMLPurifier_Token_End($skipped_tags[$j]->name);
20613                $new_token->start = $skipped_tags[$j];
20614                array_unshift($replace, $new_token);
20615                if (isset($definition->info[$new_token->name]) && $definition->info[$new_token->name]->formatting) {
20616                    // [TagClosedAuto]
20617                    $element = clone $skipped_tags[$j];
20618                    $element->carryover = true;
20619                    $element->armor['MakeWellFormed_TagClosedError'] = true;
20620                    $replace[] = $element;
20621                }
20622            }
20623            $token = $this->processToken($replace);
20624            $reprocess = true;
20625            continue;
20626        }
20627
20628        $context->destroy('CurrentToken');
20629        $context->destroy('CurrentNesting');
20630        $context->destroy('InputZipper');
20631
20632        unset($this->injectors, $this->stack, $this->tokens);
20633        return $zipper->toArray($token);
20634    }
20635
20636    /**
20637     * Processes arbitrary token values for complicated substitution patterns.
20638     * In general:
20639     *
20640     * If $token is an array, it is a list of tokens to substitute for the
20641     * current token. These tokens then get individually processed. If there
20642     * is a leading integer in the list, that integer determines how many
20643     * tokens from the stream should be removed.
20644     *
20645     * If $token is a regular token, it is swapped with the current token.
20646     *
20647     * If $token is false, the current token is deleted.
20648     *
20649     * If $token is an integer, that number of tokens (with the first token
20650     * being the current one) will be deleted.
20651     *
20652     * @param HTMLPurifier_Token|array|int|bool $token Token substitution value
20653     * @param HTMLPurifier_Injector|int $injector Injector that performed the substitution; default is if
20654     *        this is not an injector related operation.
20655     * @throws HTMLPurifier_Exception
20656     */
20657    protected function processToken($token, $injector = -1)
20658    {
20659        // Zend OpCache miscompiles $token = array($token), so
20660        // avoid this pattern.  See: https://github.com/ezyang/htmlpurifier/issues/108
20661
20662        // normalize forms of token
20663        if (is_object($token)) {
20664            $tmp = $token;
20665            $token = array(1, $tmp);
20666        }
20667        if (is_int($token)) {
20668            $tmp = $token;
20669            $token = array($tmp);
20670        }
20671        if ($token === false) {
20672            $token = array(1);
20673        }
20674        if (!is_array($token)) {
20675            throw new HTMLPurifier_Exception('Invalid token type from injector');
20676        }
20677        if (!is_int($token[0])) {
20678            array_unshift($token, 1);
20679        }
20680        if ($token[0] === 0) {
20681            throw new HTMLPurifier_Exception('Deleting zero tokens is not valid');
20682        }
20683
20684        // $token is now an array with the following form:
20685        // array(number nodes to delete, new node 1, new node 2, ...)
20686
20687        $delete = array_shift($token);
20688        list($old, $r) = $this->zipper->splice($this->token, $delete, $token);
20689
20690        if ($injector > -1) {
20691            // See Note [Injector skips]
20692            // Determine appropriate skips.  Here's what the code does:
20693            //  *If* we deleted one or more tokens, copy the skips
20694            //  of those tokens into the skips of the new tokens (in $token).
20695            //  Also, mark the newly inserted tokens as having come from
20696            //  $injector.
20697            $oldskip = isset($old[0]) ? $old[0]->skip : array();
20698            foreach ($token as $object) {
20699                $object->skip = $oldskip;
20700                $object->skip[$injector] = true;
20701            }
20702        }
20703
20704        return $r;
20705
20706    }
20707
20708    /**
20709     * Inserts a token before the current token. Cursor now points to
20710     * this token.  You must reprocess after this.
20711     * @param HTMLPurifier_Token $token
20712     */
20713    private function insertBefore($token)
20714    {
20715        // NB not $this->zipper->insertBefore(), due to positioning
20716        // differences
20717        $splice = $this->zipper->splice($this->token, 0, array($token));
20718
20719        return $splice[1];
20720    }
20721
20722    /**
20723     * Removes current token. Cursor now points to new token occupying previously
20724     * occupied space.  You must reprocess after this.
20725     */
20726    private function remove()
20727    {
20728        return $this->zipper->delete();
20729    }
20730}
20731
20732// Note [Injector skips]
20733// ~~~~~~~~~~~~~~~~~~~~~
20734// When I originally designed this class, the idea behind the 'skip'
20735// property of HTMLPurifier_Token was to help avoid infinite loops
20736// in injector processing.  For example, suppose you wrote an injector
20737// that bolded swear words.  Naively, you might write it so that
20738// whenever you saw ****, you replaced it with <strong>****</strong>.
20739//
20740// When this happens, we will reprocess all of the tokens with the
20741// other injectors.  Now there is an opportunity for infinite loop:
20742// if we rerun the swear-word injector on these tokens, we might
20743// see **** and then reprocess again to get
20744// <strong><strong>****</strong></strong> ad infinitum.
20745//
20746// Thus, the idea of a skip is that once we process a token with
20747// an injector, we mark all of those tokens as having "come from"
20748// the injector, and we never run the injector again on these
20749// tokens.
20750//
20751// There were two more complications, however:
20752//
20753//  - With HTMLPurifier_Injector_RemoveEmpty, we noticed that if
20754//    you had <b><i></i></b>, after you removed the <i></i>, you
20755//    really would like this injector to go back and reprocess
20756//    the <b> tag, discovering that it is now empty and can be
20757//    removed.  So we reintroduced the possibility of infinite looping
20758//    by adding a "rewind" function, which let you go back to an
20759//    earlier point in the token stream and reprocess it with injectors.
20760//    Needless to say, we need to UN-skip the token so it gets
20761//    reprocessed.
20762//
20763//  - Suppose that you successfuly process a token, replace it with
20764//    one with your skip mark, but now another injector wants to
20765//    process the skipped token with another token.  Should you continue
20766//    to skip that new token, or reprocess it?  If you reprocess,
20767//    you can end up with an infinite loop where one injector converts
20768//    <a> to <b>, and then another injector converts it back.  So
20769//    we inherit the skips, but for some reason, I thought that we
20770//    should inherit the skip from the first token of the token
20771//    that we deleted.  Why?  Well, it seems to work OK.
20772//
20773// If I were to redesign this functionality, I would absolutely not
20774// go about doing it this way: the semantics are just not very well
20775// defined, and in any case you probably wanted to operate on trees,
20776// not token streams.
20777
20778
20779
20780
20781
20782/**
20783 * Removes all unrecognized tags from the list of tokens.
20784 *
20785 * This strategy iterates through all the tokens and removes unrecognized
20786 * tokens. If a token is not recognized but a TagTransform is defined for
20787 * that element, the element will be transformed accordingly.
20788 */
20789
20790class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
20791{
20792
20793    /**
20794     * @param HTMLPurifier_Token[] $tokens
20795     * @param HTMLPurifier_Config $config
20796     * @param HTMLPurifier_Context $context
20797     * @return array|HTMLPurifier_Token[]
20798     */
20799    public function execute($tokens, $config, $context)
20800    {
20801        $definition = $config->getHTMLDefinition();
20802        $generator = new HTMLPurifier_Generator($config, $context);
20803        $result = array();
20804
20805        $escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
20806        $remove_invalid_img = $config->get('Core.RemoveInvalidImg');
20807
20808        // currently only used to determine if comments should be kept
20809        $trusted = $config->get('HTML.Trusted');
20810        $comment_lookup = $config->get('HTML.AllowedComments');
20811        $comment_regexp = $config->get('HTML.AllowedCommentsRegexp');
20812        $check_comments = $comment_lookup !== array() || $comment_regexp !== null;
20813
20814        $remove_script_contents = $config->get('Core.RemoveScriptContents');
20815        $hidden_elements = $config->get('Core.HiddenElements');
20816
20817        // remove script contents compatibility
20818        if ($remove_script_contents === true) {
20819            $hidden_elements['script'] = true;
20820        } elseif ($remove_script_contents === false && isset($hidden_elements['script'])) {
20821            unset($hidden_elements['script']);
20822        }
20823
20824        $attr_validator = new HTMLPurifier_AttrValidator();
20825
20826        // removes tokens until it reaches a closing tag with its value
20827        $remove_until = false;
20828
20829        // converts comments into text tokens when this is equal to a tag name
20830        $textify_comments = false;
20831
20832        $token = false;
20833        $context->register('CurrentToken', $token);
20834
20835        $e = false;
20836        if ($config->get('Core.CollectErrors')) {
20837            $e =& $context->get('ErrorCollector');
20838        }
20839
20840        foreach ($tokens as $token) {
20841            if ($remove_until) {
20842                if (empty($token->is_tag) || $token->name !== $remove_until) {
20843                    continue;
20844                }
20845            }
20846            if (!empty($token->is_tag)) {
20847                // DEFINITION CALL
20848
20849                // before any processing, try to transform the element
20850                if (isset($definition->info_tag_transform[$token->name])) {
20851                    $original_name = $token->name;
20852                    // there is a transformation for this tag
20853                    // DEFINITION CALL
20854                    $token = $definition->
20855                    info_tag_transform[$token->name]->transform($token, $config, $context);
20856                    if ($e) {
20857                        $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name);
20858                    }
20859                }
20860
20861                if (isset($definition->info[$token->name])) {
20862                    // mostly everything's good, but
20863                    // we need to make sure required attributes are in order
20864                    if (($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) &&
20865                        $definition->info[$token->name]->required_attr &&
20866                        ($token->name != 'img' || $remove_invalid_img) // ensure config option still works
20867                    ) {
20868                        $attr_validator->validateToken($token, $config, $context);
20869                        $ok = true;
20870                        foreach ($definition->info[$token->name]->required_attr as $name) {
20871                            if (!isset($token->attr[$name])) {
20872                                $ok = false;
20873                                break;
20874                            }
20875                        }
20876                        if (!$ok) {
20877                            if ($e) {
20878                                $e->send(
20879                                    E_ERROR,
20880                                    'Strategy_RemoveForeignElements: Missing required attribute',
20881                                    $name
20882                                );
20883                            }
20884                            continue;
20885                        }
20886                        $token->armor['ValidateAttributes'] = true;
20887                    }
20888
20889                    if (isset($hidden_elements[$token->name]) && $token instanceof HTMLPurifier_Token_Start) {
20890                        $textify_comments = $token->name;
20891                    } elseif ($token->name === $textify_comments && $token instanceof HTMLPurifier_Token_End) {
20892                        $textify_comments = false;
20893                    }
20894
20895                } elseif ($escape_invalid_tags) {
20896                    // invalid tag, generate HTML representation and insert in
20897                    if ($e) {
20898                        $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text');
20899                    }
20900                    $token = new HTMLPurifier_Token_Text(
20901                        $generator->generateFromToken($token)
20902                    );
20903                } else {
20904                    // check if we need to destroy all of the tag's children
20905                    // CAN BE GENERICIZED
20906                    if (isset($hidden_elements[$token->name])) {
20907                        if ($token instanceof HTMLPurifier_Token_Start) {
20908                            $remove_until = $token->name;
20909                        } elseif ($token instanceof HTMLPurifier_Token_Empty) {
20910                            // do nothing: we're still looking
20911                        } else {
20912                            $remove_until = false;
20913                        }
20914                        if ($e) {
20915                            $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed');
20916                        }
20917                    } else {
20918                        if ($e) {
20919                            $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed');
20920                        }
20921                    }
20922                    continue;
20923                }
20924            } elseif ($token instanceof HTMLPurifier_Token_Comment) {
20925                // textify comments in script tags when they are allowed
20926                if ($textify_comments !== false) {
20927                    $data = $token->data;
20928                    $token = new HTMLPurifier_Token_Text($data);
20929                } elseif ($trusted || $check_comments) {
20930                    // always cleanup comments
20931                    $trailing_hyphen = false;
20932                    if ($e) {
20933                        // perform check whether or not there's a trailing hyphen
20934                        if (substr($token->data, -1) == '-') {
20935                            $trailing_hyphen = true;
20936                        }
20937                    }
20938                    $token->data = rtrim($token->data, '-');
20939                    $found_double_hyphen = false;
20940                    while (strpos($token->data, '--') !== false) {
20941                        $found_double_hyphen = true;
20942                        $token->data = str_replace('--', '-', $token->data);
20943                    }
20944                    if ($trusted || !empty($comment_lookup[trim($token->data)]) ||
20945                        ($comment_regexp !== null && preg_match($comment_regexp, trim($token->data)))) {
20946                        // OK good
20947                        if ($e) {
20948                            if ($trailing_hyphen) {
20949                                $e->send(
20950                                    E_NOTICE,
20951                                    'Strategy_RemoveForeignElements: Trailing hyphen in comment removed'
20952                                );
20953                            }
20954                            if ($found_double_hyphen) {
20955                                $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
20956                            }
20957                        }
20958                    } else {
20959                        if ($e) {
20960                            $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
20961                        }
20962                        continue;
20963                    }
20964                } else {
20965                    // strip comments
20966                    if ($e) {
20967                        $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
20968                    }
20969                    continue;
20970                }
20971            } elseif ($token instanceof HTMLPurifier_Token_Text) {
20972            } else {
20973                continue;
20974            }
20975            $result[] = $token;
20976        }
20977        if ($remove_until && $e) {
20978            // we removed tokens until the end, throw error
20979            $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until);
20980        }
20981        $context->destroy('CurrentToken');
20982        return $result;
20983    }
20984}
20985
20986
20987
20988
20989
20990/**
20991 * Validate all attributes in the tokens.
20992 */
20993
20994class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
20995{
20996
20997    /**
20998     * @param HTMLPurifier_Token[] $tokens
20999     * @param HTMLPurifier_Config $config
21000     * @param HTMLPurifier_Context $context
21001     * @return HTMLPurifier_Token[]
21002     */
21003    public function execute($tokens, $config, $context)
21004    {
21005        // setup validator
21006        $validator = new HTMLPurifier_AttrValidator();
21007
21008        $token = false;
21009        $context->register('CurrentToken', $token);
21010
21011        foreach ($tokens as $key => $token) {
21012
21013            // only process tokens that have attributes,
21014            //   namely start and empty tags
21015            if (!$token instanceof HTMLPurifier_Token_Start && !$token instanceof HTMLPurifier_Token_Empty) {
21016                continue;
21017            }
21018
21019            // skip tokens that are armored
21020            if (!empty($token->armor['ValidateAttributes'])) {
21021                continue;
21022            }
21023
21024            // note that we have no facilities here for removing tokens
21025            $validator->validateToken($token, $config, $context);
21026        }
21027        $context->destroy('CurrentToken');
21028        return $tokens;
21029    }
21030}
21031
21032
21033
21034
21035
21036/**
21037 * Transforms FONT tags to the proper form (SPAN with CSS styling)
21038 *
21039 * This transformation takes the three proprietary attributes of FONT and
21040 * transforms them into their corresponding CSS attributes.  These are color,
21041 * face, and size.
21042 *
21043 * @note Size is an interesting case because it doesn't map cleanly to CSS.
21044 *       Thanks to
21045 *       http://style.cleverchimp.com/font_size_intervals/altintervals.html
21046 *       for reasonable mappings.
21047 * @warning This doesn't work completely correctly; specifically, this
21048 *          TagTransform operates before well-formedness is enforced, so
21049 *          the "active formatting elements" algorithm doesn't get applied.
21050 */
21051class HTMLPurifier_TagTransform_Font extends HTMLPurifier_TagTransform
21052{
21053    /**
21054     * @type string
21055     */
21056    public $transform_to = 'span';
21057
21058    /**
21059     * @type array
21060     */
21061    protected $_size_lookup = array(
21062        '0' => 'xx-small',
21063        '1' => 'xx-small',
21064        '2' => 'small',
21065        '3' => 'medium',
21066        '4' => 'large',
21067        '5' => 'x-large',
21068        '6' => 'xx-large',
21069        '7' => '300%',
21070        '-1' => 'smaller',
21071        '-2' => '60%',
21072        '+1' => 'larger',
21073        '+2' => '150%',
21074        '+3' => '200%',
21075        '+4' => '300%'
21076    );
21077
21078    /**
21079     * @param HTMLPurifier_Token_Tag $tag
21080     * @param HTMLPurifier_Config $config
21081     * @param HTMLPurifier_Context $context
21082     * @return HTMLPurifier_Token_End|string
21083     */
21084    public function transform($tag, $config, $context)
21085    {
21086        if ($tag instanceof HTMLPurifier_Token_End) {
21087            $new_tag = clone $tag;
21088            $new_tag->name = $this->transform_to;
21089            return $new_tag;
21090        }
21091
21092        $attr = $tag->attr;
21093        $prepend_style = '';
21094
21095        // handle color transform
21096        if (isset($attr['color'])) {
21097            $prepend_style .= 'color:' . $attr['color'] . ';';
21098            unset($attr['color']);
21099        }
21100
21101        // handle face transform
21102        if (isset($attr['face'])) {
21103            $prepend_style .= 'font-family:' . $attr['face'] . ';';
21104            unset($attr['face']);
21105        }
21106
21107        // handle size transform
21108        if (isset($attr['size'])) {
21109            // normalize large numbers
21110            if ($attr['size'] !== '') {
21111                if ($attr['size'][0] == '+' || $attr['size'][0] == '-') {
21112                    $size = (int)$attr['size'];
21113                    if ($size < -2) {
21114                        $attr['size'] = '-2';
21115                    }
21116                    if ($size > 4) {
21117                        $attr['size'] = '+4';
21118                    }
21119                } else {
21120                    $size = (int)$attr['size'];
21121                    if ($size > 7) {
21122                        $attr['size'] = '7';
21123                    }
21124                }
21125            }
21126            if (isset($this->_size_lookup[$attr['size']])) {
21127                $prepend_style .= 'font-size:' .
21128                    $this->_size_lookup[$attr['size']] . ';';
21129            }
21130            unset($attr['size']);
21131        }
21132
21133        if ($prepend_style) {
21134            $attr['style'] = isset($attr['style']) ?
21135                $prepend_style . $attr['style'] :
21136                $prepend_style;
21137        }
21138
21139        $new_tag = clone $tag;
21140        $new_tag->name = $this->transform_to;
21141        $new_tag->attr = $attr;
21142
21143        return $new_tag;
21144    }
21145}
21146
21147
21148
21149
21150
21151/**
21152 * Simple transformation, just change tag name to something else,
21153 * and possibly add some styling. This will cover most of the deprecated
21154 * tag cases.
21155 */
21156class HTMLPurifier_TagTransform_Simple extends HTMLPurifier_TagTransform
21157{
21158    /**
21159     * @type string
21160     */
21161    protected $style;
21162
21163    /**
21164     * @param string $transform_to Tag name to transform to.
21165     * @param string $style CSS style to add to the tag
21166     */
21167    public function __construct($transform_to, $style = null)
21168    {
21169        $this->transform_to = $transform_to;
21170        $this->style = $style;
21171    }
21172
21173    /**
21174     * @param HTMLPurifier_Token_Tag $tag
21175     * @param HTMLPurifier_Config $config
21176     * @param HTMLPurifier_Context $context
21177     * @return string
21178     */
21179    public function transform($tag, $config, $context)
21180    {
21181        $new_tag = clone $tag;
21182        $new_tag->name = $this->transform_to;
21183        if (!is_null($this->style) &&
21184            ($new_tag instanceof HTMLPurifier_Token_Start || $new_tag instanceof HTMLPurifier_Token_Empty)
21185        ) {
21186            $this->prependCSS($new_tag->attr, $this->style);
21187        }
21188        return $new_tag;
21189    }
21190}
21191
21192
21193
21194
21195
21196/**
21197 * Concrete comment token class. Generally will be ignored.
21198 */
21199class HTMLPurifier_Token_Comment extends HTMLPurifier_Token
21200{
21201    /**
21202     * Character data within comment.
21203     * @type string
21204     */
21205    public $data;
21206
21207    /**
21208     * @type bool
21209     */
21210    public $is_whitespace = true;
21211
21212    /**
21213     * Transparent constructor.
21214     *
21215     * @param string $data String comment data.
21216     * @param int $line
21217     * @param int $col
21218     */
21219    public function __construct($data, $line = null, $col = null)
21220    {
21221        $this->data = $data;
21222        $this->line = $line;
21223        $this->col = $col;
21224    }
21225
21226    public function toNode() {
21227        return new HTMLPurifier_Node_Comment($this->data, $this->line, $this->col);
21228    }
21229}
21230
21231
21232
21233
21234
21235/**
21236 * Abstract class of a tag token (start, end or empty), and its behavior.
21237 */
21238abstract class HTMLPurifier_Token_Tag extends HTMLPurifier_Token
21239{
21240    /**
21241     * Static bool marker that indicates the class is a tag.
21242     *
21243     * This allows us to check objects with <tt>!empty($obj->is_tag)</tt>
21244     * without having to use a function call <tt>is_a()</tt>.
21245     * @type bool
21246     */
21247    public $is_tag = true;
21248
21249    /**
21250     * The lower-case name of the tag, like 'a', 'b' or 'blockquote'.
21251     *
21252     * @note Strictly speaking, XML tags are case sensitive, so we shouldn't
21253     * be lower-casing them, but these tokens cater to HTML tags, which are
21254     * insensitive.
21255     * @type string
21256     */
21257    public $name;
21258
21259    /**
21260     * Associative array of the tag's attributes.
21261     * @type array
21262     */
21263    public $attr = array();
21264
21265    /**
21266     * Non-overloaded constructor, which lower-cases passed tag name.
21267     *
21268     * @param string $name String name.
21269     * @param array $attr Associative array of attributes.
21270     * @param int $line
21271     * @param int $col
21272     * @param array $armor
21273     */
21274    public function __construct($name, $attr = array(), $line = null, $col = null, $armor = array())
21275    {
21276        $this->name = ctype_lower($name) ? $name : strtolower($name);
21277        foreach ($attr as $key => $value) {
21278            // normalization only necessary when key is not lowercase
21279            if (!ctype_lower($key)) {
21280                $new_key = strtolower($key);
21281                if (!isset($attr[$new_key])) {
21282                    $attr[$new_key] = $attr[$key];
21283                }
21284                if ($new_key !== $key) {
21285                    unset($attr[$key]);
21286                }
21287            }
21288        }
21289        $this->attr = $attr;
21290        $this->line = $line;
21291        $this->col = $col;
21292        $this->armor = $armor;
21293    }
21294
21295    public function toNode() {
21296        return new HTMLPurifier_Node_Element($this->name, $this->attr, $this->line, $this->col, $this->armor);
21297    }
21298}
21299
21300
21301
21302
21303
21304/**
21305 * Concrete empty token class.
21306 */
21307class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag
21308{
21309    public function toNode() {
21310        $n = parent::toNode();
21311        $n->empty = true;
21312        return $n;
21313    }
21314}
21315
21316
21317
21318
21319
21320/**
21321 * Concrete end token class.
21322 *
21323 * @warning This class accepts attributes even though end tags cannot. This
21324 * is for optimization reasons, as under normal circumstances, the Lexers
21325 * do not pass attributes.
21326 */
21327class HTMLPurifier_Token_End extends HTMLPurifier_Token_Tag
21328{
21329    /**
21330     * Token that started this node.
21331     * Added by MakeWellFormed. Please do not edit this!
21332     * @type HTMLPurifier_Token
21333     */
21334    public $start;
21335
21336    public function toNode() {
21337        throw new Exception("HTMLPurifier_Token_End->toNode not supported!");
21338    }
21339}
21340
21341
21342
21343
21344
21345/**
21346 * Concrete start token class.
21347 */
21348class HTMLPurifier_Token_Start extends HTMLPurifier_Token_Tag
21349{
21350}
21351
21352
21353
21354
21355
21356/**
21357 * Concrete text token class.
21358 *
21359 * Text tokens comprise of regular parsed character data (PCDATA) and raw
21360 * character data (from the CDATA sections). Internally, their
21361 * data is parsed with all entities expanded. Surprisingly, the text token
21362 * does have a "tag name" called #PCDATA, which is how the DTD represents it
21363 * in permissible child nodes.
21364 */
21365class HTMLPurifier_Token_Text extends HTMLPurifier_Token
21366{
21367
21368    /**
21369     * @type string
21370     */
21371    public $name = '#PCDATA';
21372    /**< PCDATA tag name compatible with DTD. */
21373
21374    /**
21375     * @type string
21376     */
21377    public $data;
21378    /**< Parsed character data of text. */
21379
21380    /**
21381     * @type bool
21382     */
21383    public $is_whitespace;
21384
21385    /**< Bool indicating if node is whitespace. */
21386
21387    /**
21388     * Constructor, accepts data and determines if it is whitespace.
21389     * @param string $data String parsed character data.
21390     * @param int $line
21391     * @param int $col
21392     */
21393    public function __construct($data, $line = null, $col = null)
21394    {
21395        $this->data = $data;
21396        $this->is_whitespace = ctype_space($data);
21397        $this->line = $line;
21398        $this->col = $col;
21399    }
21400
21401    public function toNode() {
21402        return new HTMLPurifier_Node_Text($this->data, $this->is_whitespace, $this->line, $this->col);
21403    }
21404}
21405
21406
21407
21408
21409
21410class HTMLPurifier_URIFilter_DisableExternal extends HTMLPurifier_URIFilter
21411{
21412    /**
21413     * @type string
21414     */
21415    public $name = 'DisableExternal';
21416
21417    /**
21418     * @type array
21419     */
21420    protected $ourHostParts = false;
21421
21422    /**
21423     * @param HTMLPurifier_Config $config
21424     * @return void
21425     */
21426    public function prepare($config)
21427    {
21428        $our_host = $config->getDefinition('URI')->host;
21429        if ($our_host !== null) {
21430            $this->ourHostParts = array_reverse(explode('.', $our_host));
21431        }
21432    }
21433
21434    /**
21435     * @param HTMLPurifier_URI $uri Reference
21436     * @param HTMLPurifier_Config $config
21437     * @param HTMLPurifier_Context $context
21438     * @return bool
21439     */
21440    public function filter(&$uri, $config, $context)
21441    {
21442        if (is_null($uri->host)) {
21443            return true;
21444        }
21445        if ($this->ourHostParts === false) {
21446            return false;
21447        }
21448        $host_parts = array_reverse(explode('.', $uri->host));
21449        foreach ($this->ourHostParts as $i => $x) {
21450            if (!isset($host_parts[$i])) {
21451                return false;
21452            }
21453            if ($host_parts[$i] != $this->ourHostParts[$i]) {
21454                return false;
21455            }
21456        }
21457        return true;
21458    }
21459}
21460
21461
21462
21463
21464
21465class HTMLPurifier_URIFilter_DisableExternalResources extends HTMLPurifier_URIFilter_DisableExternal
21466{
21467    /**
21468     * @type string
21469     */
21470    public $name = 'DisableExternalResources';
21471
21472    /**
21473     * @param HTMLPurifier_URI $uri
21474     * @param HTMLPurifier_Config $config
21475     * @param HTMLPurifier_Context $context
21476     * @return bool
21477     */
21478    public function filter(&$uri, $config, $context)
21479    {
21480        if (!$context->get('EmbeddedURI', true)) {
21481            return true;
21482        }
21483        return parent::filter($uri, $config, $context);
21484    }
21485}
21486
21487
21488
21489
21490
21491class HTMLPurifier_URIFilter_DisableResources extends HTMLPurifier_URIFilter
21492{
21493    /**
21494     * @type string
21495     */
21496    public $name = 'DisableResources';
21497
21498    /**
21499     * @param HTMLPurifier_URI $uri
21500     * @param HTMLPurifier_Config $config
21501     * @param HTMLPurifier_Context $context
21502     * @return bool
21503     */
21504    public function filter(&$uri, $config, $context)
21505    {
21506        return !$context->get('EmbeddedURI', true);
21507    }
21508}
21509
21510
21511
21512
21513
21514// It's not clear to me whether or not Punycode means that hostnames
21515// do not have canonical forms anymore. As far as I can tell, it's
21516// not a problem (punycoding should be identity when no Unicode
21517// points are involved), but I'm not 100% sure
21518class HTMLPurifier_URIFilter_HostBlacklist extends HTMLPurifier_URIFilter
21519{
21520    /**
21521     * @type string
21522     */
21523    public $name = 'HostBlacklist';
21524
21525    /**
21526     * @type array
21527     */
21528    protected $blacklist = array();
21529
21530    /**
21531     * @param HTMLPurifier_Config $config
21532     * @return bool
21533     */
21534    public function prepare($config)
21535    {
21536        $this->blacklist = $config->get('URI.HostBlacklist');
21537        return true;
21538    }
21539
21540    /**
21541     * @param HTMLPurifier_URI $uri
21542     * @param HTMLPurifier_Config $config
21543     * @param HTMLPurifier_Context $context
21544     * @return bool
21545     */
21546    public function filter(&$uri, $config, $context)
21547    {
21548        foreach ($this->blacklist as $blacklisted_host_fragment) {
21549            if (strpos($uri->host, $blacklisted_host_fragment) !== false) {
21550                return false;
21551            }
21552        }
21553        return true;
21554    }
21555}
21556
21557
21558
21559
21560
21561// does not support network paths
21562
21563class HTMLPurifier_URIFilter_MakeAbsolute extends HTMLPurifier_URIFilter
21564{
21565    /**
21566     * @type string
21567     */
21568    public $name = 'MakeAbsolute';
21569
21570    /**
21571     * @type
21572     */
21573    protected $base;
21574
21575    /**
21576     * @type array
21577     */
21578    protected $basePathStack = array();
21579
21580    /**
21581     * @param HTMLPurifier_Config $config
21582     * @return bool
21583     */
21584    public function prepare($config)
21585    {
21586        $def = $config->getDefinition('URI');
21587        $this->base = $def->base;
21588        if (is_null($this->base)) {
21589            trigger_error(
21590                'URI.MakeAbsolute is being ignored due to lack of ' .
21591                'value for URI.Base configuration',
21592                E_USER_WARNING
21593            );
21594            return false;
21595        }
21596        $this->base->fragment = null; // fragment is invalid for base URI
21597        $stack = explode('/', $this->base->path);
21598        array_pop($stack); // discard last segment
21599        $stack = $this->_collapseStack($stack); // do pre-parsing
21600        $this->basePathStack = $stack;
21601        return true;
21602    }
21603
21604    /**
21605     * @param HTMLPurifier_URI $uri
21606     * @param HTMLPurifier_Config $config
21607     * @param HTMLPurifier_Context $context
21608     * @return bool
21609     */
21610    public function filter(&$uri, $config, $context)
21611    {
21612        if (is_null($this->base)) {
21613            return true;
21614        } // abort early
21615        if ($uri->path === '' && is_null($uri->scheme) &&
21616            is_null($uri->host) && is_null($uri->query) && is_null($uri->fragment)) {
21617            // reference to current document
21618            $uri = clone $this->base;
21619            return true;
21620        }
21621        if (!is_null($uri->scheme)) {
21622            // absolute URI already: don't change
21623            if (!is_null($uri->host)) {
21624                return true;
21625            }
21626            $scheme_obj = $uri->getSchemeObj($config, $context);
21627            if (!$scheme_obj) {
21628                // scheme not recognized
21629                return false;
21630            }
21631            if (!$scheme_obj->hierarchical) {
21632                // non-hierarchal URI with explicit scheme, don't change
21633                return true;
21634            }
21635            // special case: had a scheme but always is hierarchical and had no authority
21636        }
21637        if (!is_null($uri->host)) {
21638            // network path, don't bother
21639            return true;
21640        }
21641        if ($uri->path === '') {
21642            $uri->path = $this->base->path;
21643        } elseif ($uri->path[0] !== '/') {
21644            // relative path, needs more complicated processing
21645            $stack = explode('/', $uri->path);
21646            $new_stack = array_merge($this->basePathStack, $stack);
21647            if ($new_stack[0] !== '' && !is_null($this->base->host)) {
21648                array_unshift($new_stack, '');
21649            }
21650            $new_stack = $this->_collapseStack($new_stack);
21651            $uri->path = implode('/', $new_stack);
21652        } else {
21653            // absolute path, but still we should collapse
21654            $uri->path = implode('/', $this->_collapseStack(explode('/', $uri->path)));
21655        }
21656        // re-combine
21657        $uri->scheme = $this->base->scheme;
21658        if (is_null($uri->userinfo)) {
21659            $uri->userinfo = $this->base->userinfo;
21660        }
21661        if (is_null($uri->host)) {
21662            $uri->host = $this->base->host;
21663        }
21664        if (is_null($uri->port)) {
21665            $uri->port = $this->base->port;
21666        }
21667        return true;
21668    }
21669
21670    /**
21671     * Resolve dots and double-dots in a path stack
21672     * @param array $stack
21673     * @return array
21674     */
21675    private function _collapseStack($stack)
21676    {
21677        $result = array();
21678        $is_folder = false;
21679        for ($i = 0; isset($stack[$i]); $i++) {
21680            $is_folder = false;
21681            // absorb an internally duplicated slash
21682            if ($stack[$i] == '' && $i && isset($stack[$i + 1])) {
21683                continue;
21684            }
21685            if ($stack[$i] == '..') {
21686                if (!empty($result)) {
21687                    $segment = array_pop($result);
21688                    if ($segment === '' && empty($result)) {
21689                        // error case: attempted to back out too far:
21690                        // restore the leading slash
21691                        $result[] = '';
21692                    } elseif ($segment === '..') {
21693                        $result[] = '..'; // cannot remove .. with ..
21694                    }
21695                } else {
21696                    // relative path, preserve the double-dots
21697                    $result[] = '..';
21698                }
21699                $is_folder = true;
21700                continue;
21701            }
21702            if ($stack[$i] == '.') {
21703                // silently absorb
21704                $is_folder = true;
21705                continue;
21706            }
21707            $result[] = $stack[$i];
21708        }
21709        if ($is_folder) {
21710            $result[] = '';
21711        }
21712        return $result;
21713    }
21714}
21715
21716
21717
21718
21719
21720class HTMLPurifier_URIFilter_Munge extends HTMLPurifier_URIFilter
21721{
21722    /**
21723     * @type string
21724     */
21725    public $name = 'Munge';
21726
21727    /**
21728     * @type bool
21729     */
21730    public $post = true;
21731
21732    /**
21733     * @type string
21734     */
21735    private $target;
21736
21737    /**
21738     * @type HTMLPurifier_URIParser
21739     */
21740    private $parser;
21741
21742    /**
21743     * @type bool
21744     */
21745    private $doEmbed;
21746
21747    /**
21748     * @type string
21749     */
21750    private $secretKey;
21751
21752    /**
21753     * @type array
21754     */
21755    protected $replace = array();
21756
21757    /**
21758     * @param HTMLPurifier_Config $config
21759     * @return bool
21760     */
21761    public function prepare($config)
21762    {
21763        $this->target = $config->get('URI.' . $this->name);
21764        $this->parser = new HTMLPurifier_URIParser();
21765        $this->doEmbed = $config->get('URI.MungeResources');
21766        $this->secretKey = $config->get('URI.MungeSecretKey');
21767        if ($this->secretKey && !function_exists('hash_hmac')) {
21768            throw new Exception("Cannot use %URI.MungeSecretKey without hash_hmac support.");
21769        }
21770        return true;
21771    }
21772
21773    /**
21774     * @param HTMLPurifier_URI $uri
21775     * @param HTMLPurifier_Config $config
21776     * @param HTMLPurifier_Context $context
21777     * @return bool
21778     */
21779    public function filter(&$uri, $config, $context)
21780    {
21781        if ($context->get('EmbeddedURI', true) && !$this->doEmbed) {
21782            return true;
21783        }
21784
21785        $scheme_obj = $uri->getSchemeObj($config, $context);
21786        if (!$scheme_obj) {
21787            return true;
21788        } // ignore unknown schemes, maybe another postfilter did it
21789        if (!$scheme_obj->browsable) {
21790            return true;
21791        } // ignore non-browseable schemes, since we can't munge those in a reasonable way
21792        if ($uri->isBenign($config, $context)) {
21793            return true;
21794        } // don't redirect if a benign URL
21795
21796        $this->makeReplace($uri, $config, $context);
21797        $this->replace = array_map('rawurlencode', $this->replace);
21798
21799        $new_uri = strtr($this->target, $this->replace);
21800        $new_uri = $this->parser->parse($new_uri);
21801        // don't redirect if the target host is the same as the
21802        // starting host
21803        if ($uri->host === $new_uri->host) {
21804            return true;
21805        }
21806        $uri = $new_uri; // overwrite
21807        return true;
21808    }
21809
21810    /**
21811     * @param HTMLPurifier_URI $uri
21812     * @param HTMLPurifier_Config $config
21813     * @param HTMLPurifier_Context $context
21814     */
21815    protected function makeReplace($uri, $config, $context)
21816    {
21817        $string = $uri->toString();
21818        // always available
21819        $this->replace['%s'] = $string;
21820        $this->replace['%r'] = $context->get('EmbeddedURI', true);
21821        $token = $context->get('CurrentToken', true);
21822        $this->replace['%n'] = $token ? $token->name : null;
21823        $this->replace['%m'] = $context->get('CurrentAttr', true);
21824        $this->replace['%p'] = $context->get('CurrentCSSProperty', true);
21825        // not always available
21826        if ($this->secretKey) {
21827            $this->replace['%t'] = hash_hmac("sha256", $string, $this->secretKey);
21828        }
21829    }
21830}
21831
21832
21833
21834
21835
21836/**
21837 * Implements safety checks for safe iframes.
21838 *
21839 * @warning This filter is *critical* for ensuring that %HTML.SafeIframe
21840 * works safely.
21841 */
21842class HTMLPurifier_URIFilter_SafeIframe extends HTMLPurifier_URIFilter
21843{
21844    /**
21845     * @type string
21846     */
21847    public $name = 'SafeIframe';
21848
21849    /**
21850     * @type bool
21851     */
21852    public $always_load = true;
21853
21854    /**
21855     * @type string
21856     */
21857    protected $regexp = null;
21858
21859    // XXX: The not so good bit about how this is all set up now is we
21860    // can't check HTML.SafeIframe in the 'prepare' step: we have to
21861    // defer till the actual filtering.
21862    /**
21863     * @param HTMLPurifier_Config $config
21864     * @return bool
21865     */
21866    public function prepare($config)
21867    {
21868        $this->regexp = $config->get('URI.SafeIframeRegexp');
21869        return true;
21870    }
21871
21872    /**
21873     * @param HTMLPurifier_URI $uri
21874     * @param HTMLPurifier_Config $config
21875     * @param HTMLPurifier_Context $context
21876     * @return bool
21877     */
21878    public function filter(&$uri, $config, $context)
21879    {
21880        // check if filter not applicable
21881        if (!$config->get('HTML.SafeIframe')) {
21882            return true;
21883        }
21884        // check if the filter should actually trigger
21885        if (!$context->get('EmbeddedURI', true)) {
21886            return true;
21887        }
21888        $token = $context->get('CurrentToken', true);
21889        if (!($token && $token->name == 'iframe')) {
21890            return true;
21891        }
21892        // check if we actually have some whitelists enabled
21893        if ($this->regexp === null) {
21894            return false;
21895        }
21896        // actually check the whitelists
21897        return preg_match($this->regexp, $uri->toString());
21898    }
21899}
21900
21901
21902
21903
21904
21905/**
21906 * Implements data: URI for base64 encoded images supported by GD.
21907 */
21908class HTMLPurifier_URIScheme_data extends HTMLPurifier_URIScheme
21909{
21910    /**
21911     * @type bool
21912     */
21913    public $browsable = true;
21914
21915    /**
21916     * @type array
21917     */
21918    public $allowed_types = array(
21919        // you better write validation code for other types if you
21920        // decide to allow them
21921        'image/jpeg' => true,
21922        'image/gif' => true,
21923        'image/png' => true,
21924    );
21925    // this is actually irrelevant since we only write out the path
21926    // component
21927    /**
21928     * @type bool
21929     */
21930    public $may_omit_host = true;
21931
21932    /**
21933     * @param HTMLPurifier_URI $uri
21934     * @param HTMLPurifier_Config $config
21935     * @param HTMLPurifier_Context $context
21936     * @return bool
21937     */
21938    public function doValidate(&$uri, $config, $context)
21939    {
21940        $result = explode(',', $uri->path, 2);
21941        $is_base64 = false;
21942        $charset = null;
21943        $content_type = null;
21944        if (count($result) == 2) {
21945            list($metadata, $data) = $result;
21946            // do some legwork on the metadata
21947            $metas = explode(';', $metadata);
21948            while (!empty($metas)) {
21949                $cur = array_shift($metas);
21950                if ($cur == 'base64') {
21951                    $is_base64 = true;
21952                    break;
21953                }
21954                if (substr($cur, 0, 8) == 'charset=') {
21955                    // doesn't match if there are arbitrary spaces, but
21956                    // whatever dude
21957                    if ($charset !== null) {
21958                        continue;
21959                    } // garbage
21960                    $charset = substr($cur, 8); // not used
21961                } else {
21962                    if ($content_type !== null) {
21963                        continue;
21964                    } // garbage
21965                    $content_type = $cur;
21966                }
21967            }
21968        } else {
21969            $data = $result[0];
21970        }
21971        if ($content_type !== null && empty($this->allowed_types[$content_type])) {
21972            return false;
21973        }
21974        if ($charset !== null) {
21975            // error; we don't allow plaintext stuff
21976            $charset = null;
21977        }
21978        $data = rawurldecode($data);
21979        if ($is_base64) {
21980            $raw_data = base64_decode($data);
21981        } else {
21982            $raw_data = $data;
21983        }
21984        if ( strlen($raw_data) < 12 ) {
21985            // error; exif_imagetype throws exception with small files,
21986            // and this likely indicates a corrupt URI/failed parse anyway
21987            return false;
21988        }
21989        // XXX probably want to refactor this into a general mechanism
21990        // for filtering arbitrary content types
21991        if (function_exists('sys_get_temp_dir')) {
21992            $file = tempnam(sys_get_temp_dir(), "");
21993        } else {
21994            $file = tempnam("/tmp", "");
21995        }
21996        file_put_contents($file, $raw_data);
21997        if (function_exists('exif_imagetype')) {
21998            $image_code = exif_imagetype($file);
21999            unlink($file);
22000        } elseif (function_exists('getimagesize')) {
22001            set_error_handler(array($this, 'muteErrorHandler'));
22002            $info = getimagesize($file);
22003            restore_error_handler();
22004            unlink($file);
22005            if ($info == false) {
22006                return false;
22007            }
22008            $image_code = $info[2];
22009        } else {
22010            trigger_error("could not find exif_imagetype or getimagesize functions", E_USER_ERROR);
22011        }
22012        $real_content_type = image_type_to_mime_type($image_code);
22013        if ($real_content_type != $content_type) {
22014            // we're nice guys; if the content type is something else we
22015            // support, change it over
22016            if (empty($this->allowed_types[$real_content_type])) {
22017                return false;
22018            }
22019            $content_type = $real_content_type;
22020        }
22021        // ok, it's kosher, rewrite what we need
22022        $uri->userinfo = null;
22023        $uri->host = null;
22024        $uri->port = null;
22025        $uri->fragment = null;
22026        $uri->query = null;
22027        $uri->path = "$content_type;base64," . base64_encode($raw_data);
22028        return true;
22029    }
22030
22031    /**
22032     * @param int $errno
22033     * @param string $errstr
22034     */
22035    public function muteErrorHandler($errno, $errstr)
22036    {
22037    }
22038}
22039
22040
22041
22042/**
22043 * Validates file as defined by RFC 1630 and RFC 1738.
22044 */
22045class HTMLPurifier_URIScheme_file extends HTMLPurifier_URIScheme
22046{
22047    /**
22048     * Generally file:// URLs are not accessible from most
22049     * machines, so placing them as an img src is incorrect.
22050     * @type bool
22051     */
22052    public $browsable = false;
22053
22054    /**
22055     * Basically the *only* URI scheme for which this is true, since
22056     * accessing files on the local machine is very common.  In fact,
22057     * browsers on some operating systems don't understand the
22058     * authority, though I hear it is used on Windows to refer to
22059     * network shares.
22060     * @type bool
22061     */
22062    public $may_omit_host = true;
22063
22064    /**
22065     * @param HTMLPurifier_URI $uri
22066     * @param HTMLPurifier_Config $config
22067     * @param HTMLPurifier_Context $context
22068     * @return bool
22069     */
22070    public function doValidate(&$uri, $config, $context)
22071    {
22072        // Authentication method is not supported
22073        $uri->userinfo = null;
22074        // file:// makes no provisions for accessing the resource
22075        $uri->port = null;
22076        // While it seems to work on Firefox, the querystring has
22077        // no possible effect and is thus stripped.
22078        $uri->query = null;
22079        return true;
22080    }
22081}
22082
22083
22084
22085
22086
22087/**
22088 * Validates ftp (File Transfer Protocol) URIs as defined by generic RFC 1738.
22089 */
22090class HTMLPurifier_URIScheme_ftp extends HTMLPurifier_URIScheme
22091{
22092    /**
22093     * @type int
22094     */
22095    public $default_port = 21;
22096
22097    /**
22098     * @type bool
22099     */
22100    public $browsable = true; // usually
22101
22102    /**
22103     * @type bool
22104     */
22105    public $hierarchical = true;
22106
22107    /**
22108     * @param HTMLPurifier_URI $uri
22109     * @param HTMLPurifier_Config $config
22110     * @param HTMLPurifier_Context $context
22111     * @return bool
22112     */
22113    public function doValidate(&$uri, $config, $context)
22114    {
22115        $uri->query = null;
22116
22117        // typecode check
22118        $semicolon_pos = strrpos($uri->path, ';'); // reverse
22119        if ($semicolon_pos !== false) {
22120            $type = substr($uri->path, $semicolon_pos + 1); // no semicolon
22121            $uri->path = substr($uri->path, 0, $semicolon_pos);
22122            $type_ret = '';
22123            if (strpos($type, '=') !== false) {
22124                // figure out whether or not the declaration is correct
22125                list($key, $typecode) = explode('=', $type, 2);
22126                if ($key !== 'type') {
22127                    // invalid key, tack it back on encoded
22128                    $uri->path .= '%3B' . $type;
22129                } elseif ($typecode === 'a' || $typecode === 'i' || $typecode === 'd') {
22130                    $type_ret = ";type=$typecode";
22131                }
22132            } else {
22133                $uri->path .= '%3B' . $type;
22134            }
22135            $uri->path = str_replace(';', '%3B', $uri->path);
22136            $uri->path .= $type_ret;
22137        }
22138        return true;
22139    }
22140}
22141
22142
22143
22144
22145
22146/**
22147 * Validates http (HyperText Transfer Protocol) as defined by RFC 2616
22148 */
22149class HTMLPurifier_URIScheme_http extends HTMLPurifier_URIScheme
22150{
22151    /**
22152     * @type int
22153     */
22154    public $default_port = 80;
22155
22156    /**
22157     * @type bool
22158     */
22159    public $browsable = true;
22160
22161    /**
22162     * @type bool
22163     */
22164    public $hierarchical = true;
22165
22166    /**
22167     * @param HTMLPurifier_URI $uri
22168     * @param HTMLPurifier_Config $config
22169     * @param HTMLPurifier_Context $context
22170     * @return bool
22171     */
22172    public function doValidate(&$uri, $config, $context)
22173    {
22174        $uri->userinfo = null;
22175        return true;
22176    }
22177}
22178
22179
22180
22181
22182
22183/**
22184 * Validates https (Secure HTTP) according to http scheme.
22185 */
22186class HTMLPurifier_URIScheme_https extends HTMLPurifier_URIScheme_http
22187{
22188    /**
22189     * @type int
22190     */
22191    public $default_port = 443;
22192    /**
22193     * @type bool
22194     */
22195    public $secure = true;
22196}
22197
22198
22199
22200
22201
22202// VERY RELAXED! Shouldn't cause problems, not even Firefox checks if the
22203// email is valid, but be careful!
22204
22205/**
22206 * Validates mailto (for E-mail) according to RFC 2368
22207 * @todo Validate the email address
22208 * @todo Filter allowed query parameters
22209 */
22210
22211class HTMLPurifier_URIScheme_mailto extends HTMLPurifier_URIScheme
22212{
22213    /**
22214     * @type bool
22215     */
22216    public $browsable = false;
22217
22218    /**
22219     * @type bool
22220     */
22221    public $may_omit_host = true;
22222
22223    /**
22224     * @param HTMLPurifier_URI $uri
22225     * @param HTMLPurifier_Config $config
22226     * @param HTMLPurifier_Context $context
22227     * @return bool
22228     */
22229    public function doValidate(&$uri, $config, $context)
22230    {
22231        $uri->userinfo = null;
22232        $uri->host     = null;
22233        $uri->port     = null;
22234        // we need to validate path against RFC 2368's addr-spec
22235        return true;
22236    }
22237}
22238
22239
22240
22241
22242
22243/**
22244 * Validates news (Usenet) as defined by generic RFC 1738
22245 */
22246class HTMLPurifier_URIScheme_news extends HTMLPurifier_URIScheme
22247{
22248    /**
22249     * @type bool
22250     */
22251    public $browsable = false;
22252
22253    /**
22254     * @type bool
22255     */
22256    public $may_omit_host = true;
22257
22258    /**
22259     * @param HTMLPurifier_URI $uri
22260     * @param HTMLPurifier_Config $config
22261     * @param HTMLPurifier_Context $context
22262     * @return bool
22263     */
22264    public function doValidate(&$uri, $config, $context)
22265    {
22266        $uri->userinfo = null;
22267        $uri->host = null;
22268        $uri->port = null;
22269        $uri->query = null;
22270        // typecode check needed on path
22271        return true;
22272    }
22273}
22274
22275
22276
22277
22278
22279/**
22280 * Validates nntp (Network News Transfer Protocol) as defined by generic RFC 1738
22281 */
22282class HTMLPurifier_URIScheme_nntp extends HTMLPurifier_URIScheme
22283{
22284    /**
22285     * @type int
22286     */
22287    public $default_port = 119;
22288
22289    /**
22290     * @type bool
22291     */
22292    public $browsable = false;
22293
22294    /**
22295     * @param HTMLPurifier_URI $uri
22296     * @param HTMLPurifier_Config $config
22297     * @param HTMLPurifier_Context $context
22298     * @return bool
22299     */
22300    public function doValidate(&$uri, $config, $context)
22301    {
22302        $uri->userinfo = null;
22303        $uri->query = null;
22304        return true;
22305    }
22306}
22307
22308
22309
22310
22311
22312/**
22313 * Validates tel (for phone numbers).
22314 *
22315 * The relevant specifications for this protocol are RFC 3966 and RFC 5341,
22316 * but this class takes a much simpler approach: we normalize phone
22317 * numbers so that they only include (possibly) a leading plus,
22318 * and then any number of digits and x'es.
22319 */
22320
22321class HTMLPurifier_URIScheme_tel extends HTMLPurifier_URIScheme
22322{
22323    /**
22324     * @type bool
22325     */
22326    public $browsable = false;
22327
22328    /**
22329     * @type bool
22330     */
22331    public $may_omit_host = true;
22332
22333    /**
22334     * @param HTMLPurifier_URI $uri
22335     * @param HTMLPurifier_Config $config
22336     * @param HTMLPurifier_Context $context
22337     * @return bool
22338     */
22339    public function doValidate(&$uri, $config, $context)
22340    {
22341        $uri->userinfo = null;
22342        $uri->host     = null;
22343        $uri->port     = null;
22344
22345        // Delete all non-numeric characters, non-x characters
22346        // from phone number, EXCEPT for a leading plus sign.
22347        $uri->path = preg_replace('/(?!^\+)[^\dx]/', '',
22348            // Normalize e(x)tension to lower-case
22349            str_replace('X', 'x', $uri->path));
22350
22351        return true;
22352    }
22353}
22354
22355
22356
22357
22358
22359/**
22360 * Performs safe variable parsing based on types which can be used by
22361 * users. This may not be able to represent all possible data inputs,
22362 * however.
22363 */
22364class HTMLPurifier_VarParser_Flexible extends HTMLPurifier_VarParser
22365{
22366    /**
22367     * @param mixed $var
22368     * @param int $type
22369     * @param bool $allow_null
22370     * @return array|bool|float|int|mixed|null|string
22371     * @throws HTMLPurifier_VarParserException
22372     */
22373    protected function parseImplementation($var, $type, $allow_null)
22374    {
22375        if ($allow_null && $var === null) {
22376            return null;
22377        }
22378        switch ($type) {
22379            // Note: if code "breaks" from the switch, it triggers a generic
22380            // exception to be thrown. Specific errors can be specifically
22381            // done here.
22382            case self::MIXED:
22383            case self::ISTRING:
22384            case self::STRING:
22385            case self::TEXT:
22386            case self::ITEXT:
22387                return $var;
22388            case self::INT:
22389                if (is_string($var) && ctype_digit($var)) {
22390                    $var = (int)$var;
22391                }
22392                return $var;
22393            case self::FLOAT:
22394                if ((is_string($var) && is_numeric($var)) || is_int($var)) {
22395                    $var = (float)$var;
22396                }
22397                return $var;
22398            case self::BOOL:
22399                if (is_int($var) && ($var === 0 || $var === 1)) {
22400                    $var = (bool)$var;
22401                } elseif (is_string($var)) {
22402                    if ($var == 'on' || $var == 'true' || $var == '1') {
22403                        $var = true;
22404                    } elseif ($var == 'off' || $var == 'false' || $var == '0') {
22405                        $var = false;
22406                    } else {
22407                        throw new HTMLPurifier_VarParserException("Unrecognized value '$var' for $type");
22408                    }
22409                }
22410                return $var;
22411            case self::ALIST:
22412            case self::HASH:
22413            case self::LOOKUP:
22414                if (is_string($var)) {
22415                    // special case: technically, this is an array with
22416                    // a single empty string item, but having an empty
22417                    // array is more intuitive
22418                    if ($var == '') {
22419                        return array();
22420                    }
22421                    if (strpos($var, "\n") === false && strpos($var, "\r") === false) {
22422                        // simplistic string to array method that only works
22423                        // for simple lists of tag names or alphanumeric characters
22424                        $var = explode(',', $var);
22425                    } else {
22426                        $var = preg_split('/(,|[\n\r]+)/', $var);
22427                    }
22428                    // remove spaces
22429                    foreach ($var as $i => $j) {
22430                        $var[$i] = trim($j);
22431                    }
22432                    if ($type === self::HASH) {
22433                        // key:value,key2:value2
22434                        $nvar = array();
22435                        foreach ($var as $keypair) {
22436                            $c = explode(':', $keypair, 2);
22437                            if (!isset($c[1])) {
22438                                continue;
22439                            }
22440                            $nvar[trim($c[0])] = trim($c[1]);
22441                        }
22442                        $var = $nvar;
22443                    }
22444                }
22445                if (!is_array($var)) {
22446                    break;
22447                }
22448                $keys = array_keys($var);
22449                if ($keys === array_keys($keys)) {
22450                    if ($type == self::ALIST) {
22451                        return $var;
22452                    } elseif ($type == self::LOOKUP) {
22453                        $new = array();
22454                        foreach ($var as $key) {
22455                            $new[$key] = true;
22456                        }
22457                        return $new;
22458                    } else {
22459                        break;
22460                    }
22461                }
22462                if ($type === self::ALIST) {
22463                    trigger_error("Array list did not have consecutive integer indexes", E_USER_WARNING);
22464                    return array_values($var);
22465                }
22466                if ($type === self::LOOKUP) {
22467                    foreach ($var as $key => $value) {
22468                        if ($value !== true) {
22469                            trigger_error(
22470                                "Lookup array has non-true value at key '$key'; " .
22471                                "maybe your input array was not indexed numerically",
22472                                E_USER_WARNING
22473                            );
22474                        }
22475                        $var[$key] = true;
22476                    }
22477                }
22478                return $var;
22479            default:
22480                $this->errorInconsistent(__CLASS__, $type);
22481        }
22482        $this->errorGeneric($var, $type);
22483    }
22484}
22485
22486
22487
22488
22489
22490/**
22491 * This variable parser uses PHP's internal code engine. Because it does
22492 * this, it can represent all inputs; however, it is dangerous and cannot
22493 * be used by users.
22494 */
22495class HTMLPurifier_VarParser_Native extends HTMLPurifier_VarParser
22496{
22497
22498    /**
22499     * @param mixed $var
22500     * @param int $type
22501     * @param bool $allow_null
22502     * @return null|string
22503     */
22504    protected function parseImplementation($var, $type, $allow_null)
22505    {
22506        return $this->evalExpression($var);
22507    }
22508
22509    /**
22510     * @param string $expr
22511     * @return mixed
22512     * @throws HTMLPurifier_VarParserException
22513     */
22514    protected function evalExpression($expr)
22515    {
22516        $var = null;
22517        $result = eval("\$var = $expr;");
22518        if ($result === false) {
22519            throw new HTMLPurifier_VarParserException("Fatal error in evaluated code");
22520        }
22521        return $var;
22522    }
22523}
22524
22525
22526
22527