1<?php
2
3/**
4 * Represents an XHTML 1.1 module, with information on elements, tags
5 * and attributes.
6 * @note Even though this is technically XHTML 1.1, it is also used for
7 *       regular HTML parsing. We are using modulization as a convenient
8 *       way to represent the internals of HTMLDefinition, and our
9 *       implementation is by no means conforming and does not directly
10 *       use the normative DTDs or XML schemas.
11 * @note The public variables in a module should almost directly
12 *       correspond to the variables in HTMLPurifier_HTMLDefinition.
13 *       However, the prefix info carries no special meaning in these
14 *       objects (include it anyway if that's the correspondence though).
15 * @todo Consider making some member functions protected
16 */
17
18class HTMLPurifier_HTMLModule
19{
20
21    // -- Overloadable ----------------------------------------------------
22
23    /**
24     * Short unique string identifier of the module.
25     * @type string
26     */
27    public $name;
28
29    /**
30     * Informally, a list of elements this module changes.
31     * Not used in any significant way.
32     * @type array
33     */
34    public $elements = array();
35
36    /**
37     * Associative array of element names to element definitions.
38     * Some definitions may be incomplete, to be merged in later
39     * with the full definition.
40     * @type array
41     */
42    public $info = array();
43
44    /**
45     * Associative array of content set names to content set additions.
46     * This is commonly used to, say, add an A element to the Inline
47     * content set. This corresponds to an internal variable $content_sets
48     * and NOT info_content_sets member variable of HTMLDefinition.
49     * @type array
50     */
51    public $content_sets = array();
52
53    /**
54     * Associative array of attribute collection names to attribute
55     * collection additions. More rarely used for adding attributes to
56     * the global collections. Example is the StyleAttribute module adding
57     * the style attribute to the Core. Corresponds to HTMLDefinition's
58     * attr_collections->info, since the object's data is only info,
59     * with extra behavior associated with it.
60     * @type array
61     */
62    public $attr_collections = array();
63
64    /**
65     * Associative array of deprecated tag name to HTMLPurifier_TagTransform.
66     * @type array
67     */
68    public $info_tag_transform = array();
69
70    /**
71     * List of HTMLPurifier_AttrTransform to be performed before validation.
72     * @type array
73     */
74    public $info_attr_transform_pre = array();
75
76    /**
77     * List of HTMLPurifier_AttrTransform to be performed after validation.
78     * @type array
79     */
80    public $info_attr_transform_post = array();
81
82    /**
83     * List of HTMLPurifier_Injector to be performed during well-formedness fixing.
84     * An injector will only be invoked if all of it's pre-requisites are met;
85     * if an injector fails setup, there will be no error; it will simply be
86     * silently disabled.
87     * @type array
88     */
89    public $info_injector = array();
90
91    /**
92     * Boolean flag that indicates whether or not getChildDef is implemented.
93     * For optimization reasons: may save a call to a function. Be sure
94     * to set it if you do implement getChildDef(), otherwise it will have
95     * no effect!
96     * @type bool
97     */
98    public $defines_child_def = false;
99
100    /**
101     * Boolean flag whether or not this module is safe. If it is not safe, all
102     * of its members are unsafe. Modules are safe by default (this might be
103     * slightly dangerous, but it doesn't make much sense to force HTML Purifier,
104     * which is based off of safe HTML, to explicitly say, "This is safe," even
105     * though there are modules which are "unsafe")
106     *
107     * @type bool
108     * @note Previously, safety could be applied at an element level granularity.
109     *       We've removed this ability, so in order to add "unsafe" elements
110     *       or attributes, a dedicated module with this property set to false
111     *       must be used.
112     */
113    public $safe = true;
114
115    /**
116     * Retrieves a proper HTMLPurifier_ChildDef subclass based on
117     * content_model and content_model_type member variables of
118     * the HTMLPurifier_ElementDef class. There is a similar function
119     * in HTMLPurifier_HTMLDefinition.
120     * @param HTMLPurifier_ElementDef $def
121     * @return HTMLPurifier_ChildDef subclass
122     */
123    public function getChildDef($def)
124    {
125        return false;
126    }
127
128    // -- Convenience -----------------------------------------------------
129
130    /**
131     * Convenience function that sets up a new element
132     * @param string $element Name of element to add
133     * @param string|bool $type What content set should element be registered to?
134     *              Set as false to skip this step.
135     * @param string|HTMLPurifier_ChildDef $contents Allowed children in form of:
136     *              "$content_model_type: $content_model"
137     * @param array|string $attr_includes What attribute collections to register to
138     *              element?
139     * @param array $attr What unique attributes does the element define?
140     * @see HTMLPurifier_ElementDef:: for in-depth descriptions of these parameters.
141     * @return HTMLPurifier_ElementDef Created element definition object, so you
142     *         can set advanced parameters
143     */
144    public function addElement($element, $type, $contents, $attr_includes = array(), $attr = array())
145    {
146        $this->elements[] = $element;
147        // parse content_model
148        list($content_model_type, $content_model) = $this->parseContents($contents);
149        // merge in attribute inclusions
150        $this->mergeInAttrIncludes($attr, $attr_includes);
151        // add element to content sets
152        if ($type) {
153            $this->addElementToContentSet($element, $type);
154        }
155        // create element
156        $this->info[$element] = HTMLPurifier_ElementDef::create(
157            $content_model,
158            $content_model_type,
159            $attr
160        );
161        // literal object $contents means direct child manipulation
162        if (!is_string($contents)) {
163            $this->info[$element]->child = $contents;
164        }
165        return $this->info[$element];
166    }
167
168    /**
169     * Convenience function that creates a totally blank, non-standalone
170     * element.
171     * @param string $element Name of element to create
172     * @return HTMLPurifier_ElementDef Created element
173     */
174    public function addBlankElement($element)
175    {
176        if (!isset($this->info[$element])) {
177            $this->elements[] = $element;
178            $this->info[$element] = new HTMLPurifier_ElementDef();
179            $this->info[$element]->standalone = false;
180        } else {
181            trigger_error("Definition for $element already exists in module, cannot redefine");
182        }
183        return $this->info[$element];
184    }
185
186    /**
187     * Convenience function that registers an element to a content set
188     * @param string $element Element to register
189     * @param string $type Name content set (warning: case sensitive, usually upper-case
190     *        first letter)
191     */
192    public function addElementToContentSet($element, $type)
193    {
194        if (!isset($this->content_sets[$type])) {
195            $this->content_sets[$type] = '';
196        } else {
197            $this->content_sets[$type] .= ' | ';
198        }
199        $this->content_sets[$type] .= $element;
200    }
201
202    /**
203     * Convenience function that transforms single-string contents
204     * into separate content model and content model type
205     * @param string $contents Allowed children in form of:
206     *                  "$content_model_type: $content_model"
207     * @return array
208     * @note If contents is an object, an array of two nulls will be
209     *       returned, and the callee needs to take the original $contents
210     *       and use it directly.
211     */
212    public function parseContents($contents)
213    {
214        if (!is_string($contents)) {
215            return array(null, null);
216        } // defer
217        switch ($contents) {
218            // check for shorthand content model forms
219            case 'Empty':
220                return array('empty', '');
221            case 'Inline':
222                return array('optional', 'Inline | #PCDATA');
223            case 'Flow':
224                return array('optional', 'Flow | #PCDATA');
225        }
226        list($content_model_type, $content_model) = explode(':', $contents);
227        $content_model_type = strtolower(trim($content_model_type));
228        $content_model = trim($content_model);
229        return array($content_model_type, $content_model);
230    }
231
232    /**
233     * Convenience function that merges a list of attribute includes into
234     * an attribute array.
235     * @param array $attr Reference to attr array to modify
236     * @param array $attr_includes Array of includes / string include to merge in
237     */
238    public function mergeInAttrIncludes(&$attr, $attr_includes)
239    {
240        if (!is_array($attr_includes)) {
241            if (empty($attr_includes)) {
242                $attr_includes = array();
243            } else {
244                $attr_includes = array($attr_includes);
245            }
246        }
247        $attr[0] = $attr_includes;
248    }
249
250    /**
251     * Convenience function that generates a lookup table with boolean
252     * true as value.
253     * @param string $list List of values to turn into a lookup
254     * @note You can also pass an arbitrary number of arguments in
255     *       place of the regular argument
256     * @return array array equivalent of list
257     */
258    public function makeLookup($list)
259    {
260        if (is_string($list)) {
261            $list = func_get_args();
262        }
263        $ret = array();
264        foreach ($list as $value) {
265            if (is_null($value)) {
266                continue;
267            }
268            $ret[$value] = true;
269        }
270        return $ret;
271    }
272
273    /**
274     * Lazy load construction of the module after determining whether
275     * or not it's needed, and also when a finalized configuration object
276     * is available.
277     * @param HTMLPurifier_Config $config
278     */
279    public function setup($config)
280    {
281    }
282}
283
284// vim: et sw=4 sts=4
285