1<?php
2
3/*
4htmLawed 1.2.5, 24 September 2019
5Copyright Santosh Patnaik
6Dual licensed with LGPL 3 and GPL 2+
7A PHP Labware internal utility - www.bioinformatics.org/phplabware/internal_utilities/htmLawed
8
9See htmLawed_README.txt/htm
10*/
11
12function htmLawed($t, $C = 1, $S = array()) {
13    $C = is_array($C) ? $C : array();
14    if (!empty($C['valid_xhtml'])) {
15        $C['elements'] = empty($C['elements']) ? '*-acronym-big-center-dir-font-isindex-s-strike-tt' : $C['elements'];
16        $C['make_tag_strict'] = isset($C['make_tag_strict']) ? $C['make_tag_strict'] : 2;
17        $C['xml:lang'] = isset($C['xml:lang']) ? $C['xml:lang'] : 2;
18    }
19    // config eles
20    $e = array('a' => 1, 'abbr' => 1, 'acronym' => 1, 'address' => 1, 'applet' => 1, 'area' => 1, 'article' => 1, 'aside' => 1, 'audio' => 1, 'b' => 1, 'bdi' => 1, 'bdo' => 1, 'big' => 1, 'blockquote' => 1, 'br' => 1, 'button' => 1, 'canvas' => 1, 'caption' => 1, 'center' => 1, 'cite' => 1, 'code' => 1, 'col' => 1, 'colgroup' => 1, 'command' => 1, 'data' => 1, 'datalist' => 1, 'dd' => 1, 'del' => 1, 'details' => 1, 'dfn' => 1, 'dir' => 1, 'div' => 1, 'dl' => 1, 'dt' => 1, 'em' => 1, 'embed' => 1, 'fieldset' => 1, 'figcaption' => 1, 'figure' => 1, 'font' => 1, 'footer' => 1, 'form' => 1, 'h1' => 1, 'h2' => 1, 'h3' => 1, 'h4' => 1, 'h5' => 1, 'h6' => 1, 'header' => 1, 'hgroup' => 1, 'hr' => 1, 'i' => 1, 'iframe' => 1, 'img' => 1, 'input' => 1, 'ins' => 1, 'isindex' => 1, 'kbd' => 1, 'keygen' => 1, 'label' => 1, 'legend' => 1, 'li' => 1, 'link' => 1, 'main' => 1, 'map' => 1, 'mark' => 1, 'menu' => 1, 'meta' => 1, 'meter' => 1, 'nav' => 1, 'noscript' => 1, 'object' => 1, 'ol' => 1, 'optgroup' => 1, 'option' => 1, 'output' => 1, 'p' => 1, 'param' => 1, 'pre' => 1, 'progress' => 1, 'q' => 1, 'rb' => 1, 'rbc' => 1, 'rp' => 1, 'rt' => 1, 'rtc' => 1, 'ruby' => 1, 's' => 1, 'samp' => 1, 'script' => 1, 'section' => 1, 'select' => 1, 'small' => 1, 'source' => 1, 'span' => 1, 'strike' => 1, 'strong' => 1, 'style' => 1, 'sub' => 1, 'summary' => 1, 'sup' => 1, 'table' => 1, 'tbody' => 1, 'td' => 1, 'textarea' => 1, 'tfoot' => 1, 'th' => 1, 'thead' => 1, 'time' => 1, 'tr' => 1, 'track' => 1, 'tt' => 1, 'u' => 1, 'ul' => 1, 'var' => 1, 'video' => 1, 'wbr' => 1); // 118 incl. deprecated & some Ruby
21
22    if (!empty($C['safe'])) {
23        unset($e['applet'], $e['audio'], $e['canvas'], $e['embed'], $e['iframe'], $e['object'], $e['script'], $e['video']);
24    }
25    $x = !empty($C['elements']) ? str_replace(array("\n", "\r", "\t", ' '), '', $C['elements']) : '*';
26    if ($x == '-*') {
27        $e = array();
28    } elseif (strpos($x, '*') === false) {
29        $e = array_flip(explode(',', $x));
30    } else {
31        if (isset($x[1])) {
32            preg_match_all('`(?:^|-|\+)[^\-+]+?(?=-|\+|$)`', $x, $m, PREG_SET_ORDER);
33            for ($i = count($m); --$i >= 0;) {
34                $m[$i] = $m[$i][0];
35            }
36            foreach ($m as $v) {
37                if ($v[0] == '+') {
38                    $e[substr($v, 1)] = 1;
39                }
40                if ($v[0] == '-' && isset($e[($v = substr($v, 1))]) && !in_array('+'.$v, $m)) {
41                    unset($e[$v]);
42                }
43            }
44        }
45    }
46    $C['elements'] = &$e;
47    // config attrs
48    $x = !empty($C['deny_attribute']) ? strtolower(str_replace(array("\n", "\r", "\t", ' '), '', $C['deny_attribute'])) : '';
49    $x = array_flip((isset($x[0]) && $x[0] == '*') ? str_replace('/', 'data-', explode('-', str_replace('data-', '/', $x))) : explode(',', $x.(!empty($C['safe']) ? ',on*' : '')));
50    $C['deny_attribute'] = $x;
51    // config URLs
52    $x = (isset($C['schemes'][2]) && strpos($C['schemes'], ':')) ? strtolower($C['schemes']) : 'href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, tel, telnet'.(empty($C['safe']) ? ', app, javascript; *: data, javascript, ' : '; *:').'file, http, https';
53    $C['schemes'] = array();
54    foreach (explode(';', trim(str_replace(array(' ', "\t", "\r", "\n"), '', $x), ';')) as $v) {
55        $x = $x2 = null;
56        list($x, $x2) = explode(':', $v, 2);
57        if ($x2) {
58            $C['schemes'][$x] = array_flip(explode(',', $x2));
59        }
60    }
61    if (!isset($C['schemes']['*'])) {
62        $C['schemes']['*'] = array('file' => 1, 'http' => 1, 'https' => 1);
63        if (empty($C['safe'])) {
64            $C['schemes']['*'] += array('data' => 1, 'javascript' => 1);
65        }
66    }
67    if (!empty($C['safe']) && empty($C['schemes']['style'])) {
68        $C['schemes']['style'] = array('!' => 1);
69    }
70    $C['abs_url'] = isset($C['abs_url']) ? $C['abs_url'] : 0;
71    if (!isset($C['base_url']) or !preg_match('`^[a-zA-Z\d.+\-]+://[^/]+/(.+?/)?$`', $C['base_url'])) {
72        $C['base_url'] = $C['abs_url'] = 0;
73    }
74    // config rest
75    $C['and_mark'] = empty($C['and_mark']) ? 0 : 1;
76    $C['anti_link_spam'] = (isset($C['anti_link_spam']) && is_array($C['anti_link_spam']) && count($C['anti_link_spam']) == 2 && (empty($C['anti_link_spam'][0]) or hl_regex($C['anti_link_spam'][0])) && (empty($C['anti_link_spam'][1]) or hl_regex($C['anti_link_spam'][1]))) ? $C['anti_link_spam'] : 0;
77    $C['anti_mail_spam'] = isset($C['anti_mail_spam']) ? $C['anti_mail_spam'] : 0;
78    $C['balance'] = isset($C['balance']) ? (bool)$C['balance'] : 1;
79    $C['cdata'] = isset($C['cdata']) ? $C['cdata'] : (empty($C['safe']) ? 3 : 0);
80    $C['clean_ms_char'] = empty($C['clean_ms_char']) ? 0 : $C['clean_ms_char'];
81    $C['comment'] = isset($C['comment']) ? $C['comment'] : (empty($C['safe']) ? 3 : 0);
82    $C['css_expression'] = empty($C['css_expression']) ? 0 : 1;
83    $C['direct_list_nest'] = empty($C['direct_list_nest']) ? 0 : 1;
84    $C['hexdec_entity'] = isset($C['hexdec_entity']) ? $C['hexdec_entity'] : 1;
85    $C['hook'] = (!empty($C['hook']) && function_exists($C['hook'])) ? $C['hook'] : 0;
86    $C['hook_tag'] = (!empty($C['hook_tag']) && function_exists($C['hook_tag'])) ? $C['hook_tag'] : 0;
87    $C['keep_bad'] = isset($C['keep_bad']) ? $C['keep_bad'] : 6;
88    $C['lc_std_val'] = isset($C['lc_std_val']) ? (bool)$C['lc_std_val'] : 1;
89    $C['make_tag_strict'] = isset($C['make_tag_strict']) ? $C['make_tag_strict'] : 1;
90    $C['named_entity'] = isset($C['named_entity']) ? (bool)$C['named_entity'] : 1;
91    $C['no_deprecated_attr'] = isset($C['no_deprecated_attr']) ? $C['no_deprecated_attr'] : 1;
92    $C['parent'] = isset($C['parent'][0]) ? strtolower($C['parent']) : 'body';
93    $C['show_setting'] = !empty($C['show_setting']) ? $C['show_setting'] : 0;
94    $C['style_pass'] = empty($C['style_pass']) ? 0 : 1;
95    $C['tidy'] = empty($C['tidy']) ? 0 : $C['tidy'];
96    $C['unique_ids'] = isset($C['unique_ids']) && (!preg_match('`\W`', $C['unique_ids'])) ? $C['unique_ids'] : 1;
97    $C['xml:lang'] = isset($C['xml:lang']) ? $C['xml:lang'] : 0;
98
99    if (isset($GLOBALS['C'])) {
100        $reC = $GLOBALS['C'];
101    }
102    $GLOBALS['C'] = $C;
103    $S = is_array($S) ? $S : hl_spec($S);
104    if (isset($GLOBALS['S'])) {
105        $reS = $GLOBALS['S'];
106    }
107    $GLOBALS['S'] = $S;
108
109    $t = preg_replace('`[\x00-\x08\x0b-\x0c\x0e-\x1f]`', '', $t);
110    if ($C['clean_ms_char']) {
111        $x = array("\x7f" => '', "\x80" => '&#8364;', "\x81" => '', "\x83" => '&#402;', "\x85" => '&#8230;', "\x86" => '&#8224;', "\x87" => '&#8225;', "\x88" => '&#710;', "\x89" => '&#8240;', "\x8a" => '&#352;', "\x8b" => '&#8249;', "\x8c" => '&#338;', "\x8d" => '', "\x8e" => '&#381;', "\x8f" => '', "\x90" => '', "\x95" => '&#8226;', "\x96" => '&#8211;', "\x97" => '&#8212;', "\x98" => '&#732;', "\x99" => '&#8482;', "\x9a" => '&#353;', "\x9b" => '&#8250;', "\x9c" => '&#339;', "\x9d" => '', "\x9e" => '&#382;', "\x9f" => '&#376;');
112        $x = $x + ($C['clean_ms_char'] == 1 ? array("\x82" => '&#8218;', "\x84" => '&#8222;', "\x91" => '&#8216;', "\x92" => '&#8217;', "\x93" => '&#8220;', "\x94" => '&#8221;') : array("\x82" => '\'', "\x84" => '"', "\x91" => '\'', "\x92" => '\'', "\x93" => '"', "\x94" => '"'));
113        $t = strtr($t, $x);
114    }
115    if ($C['cdata'] or $C['comment']) {
116        $t = preg_replace_callback('`<!(?:(?:--.*?--)|(?:\[CDATA\[.*?\]\]))>`sm', 'hl_cmtcd', $t);
117    }
118    $t = preg_replace_callback('`&amp;([a-zA-Z][a-zA-Z0-9]{1,30}|#(?:[0-9]{1,8}|[Xx][0-9A-Fa-f]{1,7}));`', 'hl_ent', str_replace('&', '&amp;', $t));
119    if ($C['unique_ids'] && !isset($GLOBALS['hl_Ids'])) {
120        $GLOBALS['hl_Ids'] = array();
121    }
122    if ($C['hook']) {
123        $t = $C['hook']($t, $C, $S);
124    }
125    if ($C['show_setting'] && preg_match('`^[a-z][a-z0-9_]*$`i', $C['show_setting'])) {
126        $GLOBALS[$C['show_setting']] = array('config' => $C, 'spec' => $S, 'time' => microtime());
127    }
128    // main
129    $t = preg_replace_callback('`<(?:(?:\s|$)|(?:[^>]*(?:>|$)))|>`m', 'hl_tag', $t);
130    $t = $C['balance'] ? hl_bal($t, $C['keep_bad'], $C['parent']) : $t;
131    $t = (($C['cdata'] or $C['comment']) && strpos($t, "\x01") !== false) ? str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05"), array('', '', '&', '<', '>'), $t) : $t;
132    $t = $C['tidy'] ? hl_tidy($t, $C['tidy'], $C['parent']) : $t;
133    unset($C, $e);
134    if (isset($reC)) {
135        $GLOBALS['C'] = $reC;
136    }
137    if (isset($reS)) {
138        $GLOBALS['S'] = $reS;
139    }
140
141    return $t;
142}
143
144function hl_attrval($a, $t, $p) {
145    // check attr val against $S
146    static $ma = array('accesskey', 'class', 'itemtype', 'rel');
147    $s = in_array($a, $ma) ? ' ' : ($a == 'srcset' ? ',' : '');
148    $r = array();
149    $t = !empty($s) ? explode($s, $t) : array($t);
150    foreach ($t as $tk => $tv) {
151        $o = 1;
152        $tv = trim($tv);
153        $l = strlen($tv);
154        foreach ($p as $k => $v) {
155            if (!$l) {
156                continue;
157            }
158            switch ($k) {
159                case 'maxlen':
160                    if ($l > $v) {
161                        $o = 0;
162                    }
163                    break;
164                case 'minlen':
165                    if ($l < $v) {
166                        $o = 0;
167                    }
168                    break;
169                case 'maxval':
170                    if ((float)($tv) > $v) {
171                        $o = 0;
172                    }
173                    break;
174                case 'minval':
175                    if ((float)($tv) < $v) {
176                        $o = 0;
177                    }
178                    break;
179                case 'match':
180                    if (!preg_match($v, $tv)) {
181                        $o = 0;
182                    }
183                    break;
184                case 'nomatch':
185                    if (preg_match($v, $tv)) {
186                        $o = 0;
187                    }
188                    break;
189                case 'oneof':
190                    $m = 0;
191                    foreach (explode('|', $v) as $n) {
192                        if ($tv == $n) {
193                            $m = 1;
194                            break;
195                        }
196                    }
197                    $o = $m;
198                    break;
199                case 'noneof':
200                    $m = 1;
201                    foreach (explode('|', $v) as $n) {
202                        if ($tv == $n) {
203                            $m = 0;
204                            break;
205                        }
206                    }
207                    $o = $m;
208                    break;
209                default:
210                    break;
211            }
212            if (!$o) {
213                break;
214            }
215        }
216        if ($o) {
217            $r[] = $tv;
218        }
219    }
220    if ($s == ',') {
221        $s = ', ';
222    }
223    $r = implode($s, $r);
224
225    return isset($r[0]) ? $r : (isset($p['default']) ? $p['default'] : 0);
226}
227
228function hl_bal($t, $do = 1, $in = 'div') {
229    // balance tags
230// by content
231    $cB = array('form' => 1, 'map' => 1, 'noscript' => 1); // Block
232    $cE = array('area' => 1, 'br' => 1, 'col' => 1, 'command' => 1, 'embed' => 1, 'hr' => 1, 'img' => 1, 'input' => 1, 'isindex' => 1, 'keygen' => 1, 'link' => 1, 'meta' => 1, 'param' => 1, 'source' => 1, 'track' => 1, 'wbr' => 1); // Empty
233    $cF = array('a' => 1, 'article' => 1, 'aside' => 1, 'audio' => 1, 'button' => 1, 'blockquote' => 1, 'canvas' => 1, 'del' => 1, 'details' => 1, 'div' => 1, 'dd' => 1, 'fieldset' => 1, 'figure' => 1, 'footer' => 1, 'header' => 1, 'iframe' => 1, 'ins' => 1, 'li' => 1, 'main' => 1, 'menu' => 1, 'nav' => 1, 'noscript' => 1, 'object' => 1, 'section' => 1, 'style' => 1, 'td' => 1, 'th' => 1, 'video' => 1); // Flow; later context-wise dynamic move of ins & del to $cI
234    $cI = array('abbr' => 1, 'acronym' => 1, 'address' => 1, 'b' => 1, 'bdi' => 1, 'bdo' => 1, 'big' => 1, 'caption' => 1, 'cite' => 1, 'code' => 1, 'data' => 1, 'datalist' => 1, 'dfn' => 1, 'dt' => 1, 'em' => 1, 'figcaption' => 1, 'font' => 1, 'h1' => 1, 'h2' => 1, 'h3' => 1, 'h4' => 1, 'h5' => 1, 'h6' => 1, 'hgroup' => 1, 'i' => 1, 'kbd' => 1, 'label' => 1, 'legend' => 1, 'mark' => 1, 'meter' => 1, 'output' => 1, 'p' => 1, 'pre' => 1, 'progress' => 1, 'q' => 1, 'rb' => 1, 'rt' => 1, 's' => 1, 'samp' => 1, 'small' => 1, 'span' => 1, 'strike' => 1, 'strong' => 1, 'sub' => 1, 'summary' => 1, 'sup' => 1, 'time' => 1, 'tt' => 1, 'u' => 1, 'var' => 1); // Inline
235    $cN = array('a' => array('a' => 1, 'address' => 1, 'button' => 1, 'details' => 1, 'embed' => 1, 'keygen' => 1, 'label' => 1, 'select' => 1, 'textarea' => 1), 'address' => array('address' => 1, 'article' => 1, 'aside' => 1, 'header' => 1, 'keygen' => 1, 'footer' => 1, 'nav' => 1, 'section' => 1), 'button' => array('a' => 1, 'address' => 1, 'button' => 1, 'details' => 1, 'embed' => 1, 'fieldset' => 1, 'form' => 1, 'iframe' => 1, 'input' => 1, 'keygen' => 1, 'label' => 1, 'select' => 1, 'textarea' => 1), 'fieldset' => array('fieldset' => 1), 'footer' => array('header' => 1, 'footer' => 1), 'form' => array('form' => 1), 'header' => array('header' => 1, 'footer' => 1), 'label' => array('label' => 1), 'main' => array('main' => 1), 'meter' => array('meter' => 1), 'noscript' => array('script' => 1), 'pre' => array('big' => 1, 'font' => 1, 'img' => 1, 'object' => 1, 'script' => 1, 'small' => 1, 'sub' => 1, 'sup' => 1), 'progress' => array('progress' => 1), 'rb' => array('ruby' => 1), 'rt' => array('ruby' => 1), 'time' => array('time' => 1)); // Illegal
236    $cN2 = array_keys($cN);
237    $cS = array('colgroup' => array('col' => 1), 'datalist' => array('option' => 1), 'dir' => array('li' => 1), 'dl' => array('dd' => 1, 'dt' => 1), 'hgroup' => array('h1' => 1, 'h2' => 1, 'h3' => 1, 'h4' => 1, 'h5' => 1, 'h6' => 1), 'menu' => array('li' => 1), 'ol' => array('li' => 1), 'optgroup' => array('option' => 1), 'option' => array('#pcdata' => 1), 'rbc' => array('rb' => 1), 'rp' => array('#pcdata' => 1), 'rtc' => array('rt' => 1), 'ruby' => array('rb' => 1, 'rbc' => 1, 'rp' => 1, 'rt' => 1, 'rtc' => 1), 'select' => array('optgroup' => 1, 'option' => 1), 'script' => array('#pcdata' => 1), 'table' => array('caption' => 1, 'col' => 1, 'colgroup' => 1, 'tfoot' => 1, 'tbody' => 1, 'tr' => 1, 'thead' => 1), 'tbody' => array('tr' => 1), 'tfoot' => array('tr' => 1), 'textarea' => array('#pcdata' => 1), 'thead' => array('tr' => 1), 'tr' => array('td' => 1, 'th' => 1), 'ul' => array('li' => 1)); // Specific - immediate parent-child
238    if ($GLOBALS['C']['direct_list_nest']) {
239        $cS['ol'] = $cS['ul'] = $cS['menu'] += array('menu' => 1, 'ol' => 1, 'ul' => 1);
240    }
241    $cO = array('address' => array('p' => 1), 'applet' => array('param' => 1), 'audio' => array('source' => 1, 'track' => 1), 'blockquote' => array('script' => 1), 'details' => array('summary' => 1), 'fieldset' => array('legend' => 1, '#pcdata' => 1),  'figure' => array('figcaption' => 1), 'form' => array('script' => 1), 'map' => array('area' => 1), 'object' => array('param' => 1, 'embed' => 1), 'video' => array('source' => 1, 'track' => 1)); // Other
242    $cT = array('colgroup' => 1, 'dd' => 1, 'dt' => 1, 'li' => 1, 'option' => 1, 'p' => 1, 'td' => 1, 'tfoot' => 1, 'th' => 1, 'thead' => 1, 'tr' => 1); // Omitable closing
243// block/inline type; a/ins/del both type; #pcdata: text
244    $eB = array('a' => 1, 'address' => 1, 'article' => 1, 'aside' => 1, 'blockquote' => 1, 'center' => 1, 'del' => 1, 'details' => 1, 'dir' => 1, 'dl' => 1, 'div' => 1, 'fieldset' => 1, 'figure' => 1, 'footer' => 1, 'form' => 1, 'ins' => 1, 'h1' => 1, 'h2' => 1, 'h3' => 1, 'h4' => 1, 'h5' => 1, 'h6' => 1, 'header' => 1, 'hr' => 1, 'isindex' => 1, 'main' => 1, 'menu' => 1, 'nav' => 1, 'noscript' => 1, 'ol' => 1, 'p' => 1, 'pre' => 1, 'section' => 1, 'style' => 1, 'table' => 1, 'ul' => 1);
245    $eI = array('#pcdata' => 1, 'a' => 1, 'abbr' => 1, 'acronym' => 1, 'applet' => 1, 'audio' => 1, 'b' => 1, 'bdi' => 1, 'bdo' => 1, 'big' => 1, 'br' => 1, 'button' => 1, 'canvas' => 1, 'cite' => 1, 'code' => 1, 'command' => 1, 'data' => 1, 'datalist' => 1, 'del' => 1, 'dfn' => 1, 'em' => 1, 'embed' => 1, 'figcaption' => 1, 'font' => 1, 'i' => 1, 'iframe' => 1, 'img' => 1, 'input' => 1, 'ins' => 1, 'kbd' => 1, 'label' => 1, 'link' => 1, 'map' => 1, 'mark' => 1, 'meta' => 1, 'meter' => 1, 'object' => 1, 'output' => 1, 'progress' => 1, 'q' => 1, 'ruby' => 1, 's' => 1, 'samp' => 1, 'select' => 1, 'script' => 1, 'small' => 1, 'span' => 1, 'strike' => 1, 'strong' => 1, 'sub' => 1, 'summary' => 1, 'sup' => 1, 'textarea' => 1, 'time' => 1, 'tt' => 1, 'u' => 1, 'var' => 1, 'video' => 1, 'wbr' => 1);
246    $eN = array('a' => 1, 'address' => 1, 'article' => 1, 'aside' => 1, 'big' => 1, 'button' => 1, 'details' => 1, 'embed' => 1, 'fieldset' => 1, 'font' => 1, 'footer' => 1, 'form' => 1, 'header' => 1, 'iframe' => 1, 'img' => 1, 'input' => 1, 'keygen' => 1, 'label' => 1, 'meter' => 1, 'nav' => 1, 'object' => 1, 'progress' => 1, 'ruby' => 1, 'script' => 1, 'select' => 1, 'small' => 1, 'sub' => 1, 'sup' => 1, 'textarea' => 1, 'time' => 1); // Exclude from specific ele; $cN values
247    $eO = array('area' => 1, 'caption' => 1, 'col' => 1, 'colgroup' => 1, 'command' => 1, 'dd' => 1, 'dt' => 1, 'hgroup' => 1, 'keygen' => 1, 'legend' => 1, 'li' => 1, 'optgroup' => 1, 'option' => 1, 'param' => 1, 'rb' => 1, 'rbc' => 1, 'rp' => 1, 'rt' => 1, 'rtc' => 1, 'script' => 1, 'source' => 1, 'tbody' => 1, 'td' => 1, 'tfoot' => 1, 'thead' => 1, 'th' => 1, 'tr' => 1, 'track' => 1); // Missing in $eB & $eI
248    $eF = $eB + $eI;
249
250    // $in sets allowed child
251    $in = ((isset($eF[$in]) && $in != '#pcdata') or isset($eO[$in])) ? $in : 'div';
252    if (isset($cE[$in])) {
253        return !$do ? '' : str_replace(array('<', '>'), array('&lt;', '&gt;'), $t);
254    }
255    if (isset($cS[$in])) {
256        $inOk = $cS[$in];
257    } elseif (isset($cI[$in])) {
258        $inOk = $eI;
259        $cI['del'] = 1;
260        $cI['ins'] = 1;
261    } elseif (isset($cF[$in])) {
262        $inOk = $eF;
263        unset($cI['del'], $cI['ins']);
264    } elseif (isset($cB[$in])) {
265        $inOk = $eB;
266        unset($cI['del'], $cI['ins']);
267    }
268    if (isset($cO[$in])) {
269        $inOk = $inOk + $cO[$in];
270    }
271    if (isset($cN[$in])) {
272        $inOk = array_diff_assoc($inOk, $cN[$in]);
273    }
274
275    $t = explode('<', $t);
276    $ok = $q = array(); // $q seq list of open non-empty ele
277    ob_start();
278
279    for ($i = -1, $ci = count($t); ++$i < $ci;) {
280        // allowed $ok in parent $p
281        if ($ql = count($q)) {
282            $p = array_pop($q);
283            $q[] = $p;
284            if (isset($cS[$p])) {
285                $ok = $cS[$p];
286            } elseif (isset($cI[$p])) {
287                $ok = $eI;
288                $cI['del'] = 1;
289                $cI['ins'] = 1;
290            } elseif (isset($cF[$p])) {
291                $ok = $eF;
292                unset($cI['del'], $cI['ins']);
293            } elseif (isset($cB[$p])) {
294                $ok = $eB;
295                unset($cI['del'], $cI['ins']);
296            }
297            if (isset($cO[$p])) {
298                $ok = $ok + $cO[$p];
299            }
300            if (isset($cN[$p])) {
301                $ok = array_diff_assoc($ok, $cN[$p]);
302            }
303        } else {
304            $ok = $inOk;
305            unset($cI['del'], $cI['ins']);
306        }
307        // bad tags, & ele content
308        if (isset($e) && ($do == 1 or (isset($ok['#pcdata']) && ($do == 3 or $do == 5)))) {
309            echo '&lt;', $s, $e, $a, '&gt;';
310        }
311        if (isset($x[0])) {
312            if (strlen(trim($x)) && (($ql && isset($cB[$p])) or (isset($cB[$in]) && !$ql))) {
313                echo '<div>', $x, '</div>';
314            } elseif ($do < 3 or isset($ok['#pcdata'])) {
315                echo $x;
316            } elseif (strpos($x, "\x02\x04")) {
317                foreach (preg_split('`(\x01\x02[^\x01\x02]+\x02\x01)`', $x, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY) as $v) {
318                    echo substr($v, 0, 2) == "\x01\x02" ? $v : ($do > 4 ? preg_replace('`\S`', '', $v) : '');
319                }
320            } elseif ($do > 4) {
321                echo preg_replace('`\S`', '', $x);
322            }
323        }
324        // get markup
325        if (!preg_match('`^(/?)([a-z1-6]+)([^>]*)>(.*)`sm', $t[$i], $r)) {
326            $x = $t[$i];
327            continue;
328        }
329        $s = null;
330        $e = null;
331        $a = null;
332        $x = null;
333        list($all, $s, $e, $a, $x) = $r;
334        // close tag
335        if ($s) {
336            if (isset($cE[$e]) or !in_array($e, $q)) {
337                continue;
338            } // Empty/unopen
339            if ($p == $e) {
340                array_pop($q);
341                echo '</', $e, '>';
342                unset($e);
343                continue;
344            } // Last open
345            $add = ''; // Nesting - close open tags that need to be
346            for ($j = -1, $cj = count($q); ++$j < $cj;) {
347                if (($d = array_pop($q)) == $e) {
348                    break;
349                } else {
350                    $add .= "</{$d}>";
351                }
352            }
353            echo $add, '</', $e, '>';
354            unset($e);
355            continue;
356        }
357        // open tag
358        // $cB ele needs $eB ele as child
359        if (isset($cB[$e]) && strlen(trim($x))) {
360            $t[$i] = "{$e}{$a}>";
361            array_splice($t, $i + 1, 0, 'div>'.$x);
362            unset($e, $x);
363            ++$ci;
364            --$i;
365            continue;
366        }
367        if ((($ql && isset($cB[$p])) or (isset($cB[$in]) && !$ql)) && !isset($eB[$e]) && !isset($ok[$e])) {
368            array_splice($t, $i, 0, 'div>');
369            unset($e, $x);
370            ++$ci;
371            --$i;
372            continue;
373        }
374        // if no open ele, $in = parent; mostly immediate parent-child relation should hold
375        if (!$ql or !isset($eN[$e]) or !array_intersect($q, $cN2)) {
376            if (!isset($ok[$e])) {
377                if ($ql && isset($cT[$p])) {
378                    echo '</', array_pop($q), '>';
379                    unset($e, $x);
380                    --$i;
381                }
382                continue;
383            }
384            if ($e !== 'span' || !empty($a)) {
385                if (!isset($cE[$e])) {
386                    $q[] = $e;
387                }
388                echo '<', $e, $a, '>';
389            }
390            unset($e);
391            continue;
392        }
393        // specific parent-child
394        if (isset($cS[$p][$e])) {
395            if (!isset($cE[$e])) {
396                $q[] = $e;
397            }
398            echo '<', $e, $a, '>';
399            unset($e);
400            continue;
401        }
402        // nesting
403        $add = '';
404        $q2 = array();
405        for ($k = -1, $kc = count($q); ++$k < $kc;) {
406            $d = $q[$k];
407            $ok2 = array();
408            if (isset($cS[$d])) {
409                $q2[] = $d;
410                continue;
411            }
412            $ok2 = isset($cI[$d]) ? $eI : $eF;
413            if (isset($cO[$d])) {
414                $ok2 = $ok2 + $cO[$d];
415            }
416            if (isset($cN[$d])) {
417                $ok2 = array_diff_assoc($ok2, $cN[$d]);
418            }
419            if (!isset($ok2[$e])) {
420                if (!$k && !isset($inOk[$e])) {
421                    continue 2;
422                }
423                $add = "</{$d}>";
424                for (; ++$k < $kc;) {
425                    $add = "</{$q[$k]}>{$add}";
426                }
427                break;
428            } else {
429                $q2[] = $d;
430            }
431        }
432        $q = $q2;
433        if (!isset($cE[$e])) {
434            $q[] = $e;
435        }
436        echo $add, '<', $e, $a, '>';
437        unset($e);
438        continue;
439    }
440
441    // end
442    if ($ql = count($q)) {
443        $p = array_pop($q);
444        $q[] = $p;
445        if (isset($cS[$p])) {
446            $ok = $cS[$p];
447        } elseif (isset($cI[$p])) {
448            $ok = $eI;
449            $cI['del'] = 1;
450            $cI['ins'] = 1;
451        } elseif (isset($cF[$p])) {
452            $ok = $eF;
453            unset($cI['del'], $cI['ins']);
454        } elseif (isset($cB[$p])) {
455            $ok = $eB;
456            unset($cI['del'], $cI['ins']);
457        }
458        if (isset($cO[$p])) {
459            $ok = $ok + $cO[$p];
460        }
461        if (isset($cN[$p])) {
462            $ok = array_diff_assoc($ok, $cN[$p]);
463        }
464    } else {
465        $ok = $inOk;
466        unset($cI['del'], $cI['ins']);
467    }
468    if (isset($e) && ($do == 1 or (isset($ok['#pcdata']) && ($do == 3 or $do == 5)))) {
469        echo '&lt;', $s, $e, $a, '&gt;';
470    }
471    if (isset($x[0])) {
472        if (strlen(trim($x)) && (($ql && isset($cB[$p])) or (isset($cB[$in]) && !$ql))) {
473            echo '<div>', $x, '</div>';
474        } elseif ($do < 3 or isset($ok['#pcdata'])) {
475            echo $x;
476        } elseif (strpos($x, "\x02\x04")) {
477            foreach (preg_split('`(\x01\x02[^\x01\x02]+\x02\x01)`', $x, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY) as $v) {
478                echo substr($v, 0, 2) == "\x01\x02" ? $v : ($do > 4 ? preg_replace('`\S`', '', $v) : '');
479            }
480        } elseif ($do > 4) {
481            echo preg_replace('`\S`', '', $x);
482        }
483    }
484    while (!empty($q) && ($e = array_pop($q))) {
485        echo '</', $e, '>';
486    }
487    $o = ob_get_contents();
488    ob_end_clean();
489
490    return $o;
491}
492
493function hl_cmtcd($t) {
494    // comment/CDATA sec handler
495    $t = $t[0];
496    global $C;
497    if (!($v = $C[$n = $t[3] == '-' ? 'comment' : 'cdata'])) {
498        return $t;
499    }
500    if ($v == 1) {
501        return '';
502    }
503    if ($n == 'comment' && $v < 4) {
504        if (substr(($t = preg_replace('`--+`', '-', substr($t, 4, -3))), -1) != ' ') {
505            $t .= ' ';
506        }
507    } else {
508        $t = substr($t, 1, -1);
509    }
510    $t = $v == 2 ? str_replace(array('&', '<', '>'), array('&amp;', '&lt;', '&gt;'), $t) : $t;
511
512    return str_replace(array('&', '<', '>'), array("\x03", "\x04", "\x05"), ($n == 'comment' ? "\x01\x02\x04!--$t--\x05\x02\x01" : "\x01\x01\x04$t\x05\x01\x01"));
513}
514
515function hl_ent($t) {
516    // entitity handler
517    global $C;
518    $t = $t[1];
519    static $U = array('quot' => 1, 'amp' => 1, 'lt' => 1, 'gt' => 1);
520    static $N = array('fnof' => '402', 'Alpha' => '913', 'Beta' => '914', 'Gamma' => '915', 'Delta' => '916', 'Epsilon' => '917', 'Zeta' => '918', 'Eta' => '919', 'Theta' => '920', 'Iota' => '921', 'Kappa' => '922', 'Lambda' => '923', 'Mu' => '924', 'Nu' => '925', 'Xi' => '926', 'Omicron' => '927', 'Pi' => '928', 'Rho' => '929', 'Sigma' => '931', 'Tau' => '932', 'Upsilon' => '933', 'Phi' => '934', 'Chi' => '935', 'Psi' => '936', 'Omega' => '937', 'alpha' => '945', 'beta' => '946', 'gamma' => '947', 'delta' => '948', 'epsilon' => '949', 'zeta' => '950', 'eta' => '951', 'theta' => '952', 'iota' => '953', 'kappa' => '954', 'lambda' => '955', 'mu' => '956', 'nu' => '957', 'xi' => '958', 'omicron' => '959', 'pi' => '960', 'rho' => '961', 'sigmaf' => '962', 'sigma' => '963', 'tau' => '964', 'upsilon' => '965', 'phi' => '966', 'chi' => '967', 'psi' => '968', 'omega' => '969', 'thetasym' => '977', 'upsih' => '978', 'piv' => '982', 'bull' => '8226', 'hellip' => '8230', 'prime' => '8242', 'Prime' => '8243', 'oline' => '8254', 'frasl' => '8260', 'weierp' => '8472', 'image' => '8465', 'real' => '8476', 'trade' => '8482', 'alefsym' => '8501', 'larr' => '8592', 'uarr' => '8593', 'rarr' => '8594', 'darr' => '8595', 'harr' => '8596', 'crarr' => '8629', 'lArr' => '8656', 'uArr' => '8657', 'rArr' => '8658', 'dArr' => '8659', 'hArr' => '8660', 'forall' => '8704', 'part' => '8706', 'exist' => '8707', 'empty' => '8709', 'nabla' => '8711', 'isin' => '8712', 'notin' => '8713', 'ni' => '8715', 'prod' => '8719', 'sum' => '8721', 'minus' => '8722', 'lowast' => '8727', 'radic' => '8730', 'prop' => '8733', 'infin' => '8734', 'ang' => '8736', 'and' => '8743', 'or' => '8744', 'cap' => '8745', 'cup' => '8746', 'int' => '8747', 'there4' => '8756', 'sim' => '8764', 'cong' => '8773', 'asymp' => '8776', 'ne' => '8800', 'equiv' => '8801', 'le' => '8804', 'ge' => '8805', 'sub' => '8834', 'sup' => '8835', 'nsub' => '8836', 'sube' => '8838', 'supe' => '8839', 'oplus' => '8853', 'otimes' => '8855', 'perp' => '8869', 'sdot' => '8901', 'lceil' => '8968', 'rceil' => '8969', 'lfloor' => '8970', 'rfloor' => '8971', 'lang' => '9001', 'rang' => '9002', 'loz' => '9674', 'spades' => '9824', 'clubs' => '9827', 'hearts' => '9829', 'diams' => '9830', 'apos' => '39',  'OElig' => '338', 'oelig' => '339', 'Scaron' => '352', 'scaron' => '353', 'Yuml' => '376', 'circ' => '710', 'tilde' => '732', 'ensp' => '8194', 'emsp' => '8195', 'thinsp' => '8201', 'zwnj' => '8204', 'zwj' => '8205', 'lrm' => '8206', 'rlm' => '8207', 'ndash' => '8211', 'mdash' => '8212', 'lsquo' => '8216', 'rsquo' => '8217', 'sbquo' => '8218', 'ldquo' => '8220', 'rdquo' => '8221', 'bdquo' => '8222', 'dagger' => '8224', 'Dagger' => '8225', 'permil' => '8240', 'lsaquo' => '8249', 'rsaquo' => '8250', 'euro' => '8364', 'nbsp' => '160', 'iexcl' => '161', 'cent' => '162', 'pound' => '163', 'curren' => '164', 'yen' => '165', 'brvbar' => '166', 'sect' => '167', 'uml' => '168', 'copy' => '169', 'ordf' => '170', 'laquo' => '171', 'not' => '172', 'shy' => '173', 'reg' => '174', 'macr' => '175', 'deg' => '176', 'plusmn' => '177', 'sup2' => '178', 'sup3' => '179', 'acute' => '180', 'micro' => '181', 'para' => '182', 'middot' => '183', 'cedil' => '184', 'sup1' => '185', 'ordm' => '186', 'raquo' => '187', 'frac14' => '188', 'frac12' => '189', 'frac34' => '190', 'iquest' => '191', 'Agrave' => '192', 'Aacute' => '193', 'Acirc' => '194', 'Atilde' => '195', 'Auml' => '196', 'Aring' => '197', 'AElig' => '198', 'Ccedil' => '199', 'Egrave' => '200', 'Eacute' => '201', 'Ecirc' => '202', 'Euml' => '203', 'Igrave' => '204', 'Iacute' => '205', 'Icirc' => '206', 'Iuml' => '207', 'ETH' => '208', 'Ntilde' => '209', 'Ograve' => '210', 'Oacute' => '211', 'Ocirc' => '212', 'Otilde' => '213', 'Ouml' => '214', 'times' => '215', 'Oslash' => '216', 'Ugrave' => '217', 'Uacute' => '218', 'Ucirc' => '219', 'Uuml' => '220', 'Yacute' => '221', 'THORN' => '222', 'szlig' => '223', 'agrave' => '224', 'aacute' => '225', 'acirc' => '226', 'atilde' => '227', 'auml' => '228', 'aring' => '229', 'aelig' => '230', 'ccedil' => '231', 'egrave' => '232', 'eacute' => '233', 'ecirc' => '234', 'euml' => '235', 'igrave' => '236', 'iacute' => '237', 'icirc' => '238', 'iuml' => '239', 'eth' => '240', 'ntilde' => '241', 'ograve' => '242', 'oacute' => '243', 'ocirc' => '244', 'otilde' => '245', 'ouml' => '246', 'divide' => '247', 'oslash' => '248', 'ugrave' => '249', 'uacute' => '250', 'ucirc' => '251', 'uuml' => '252', 'yacute' => '253', 'thorn' => '254', 'yuml' => '255');
521    if ($t[0] != '#') {
522        return ($C['and_mark'] ? "\x06" : '&').(isset($U[$t]) ? $t : (isset($N[$t]) ? (!$C['named_entity'] ? '#'.($C['hexdec_entity'] > 1 ? 'x'.dechex($N[$t]) : $N[$t]) : $t) : 'amp;'.$t)).';';
523    }
524    if (($n = ctype_digit($t = substr($t, 1)) ? intval($t) : hexdec(substr($t, 1))) < 9 or ($n > 13 && $n < 32) or $n == 11 or $n == 12 or ($n > 126 && $n < 160 && $n != 133) or ($n > 55295 && ($n < 57344 or ($n > 64975 && $n < 64992) or $n == 65534 or $n == 65535 or $n > 1114111))) {
525        return ($C['and_mark'] ? "\x06" : '&')."amp;#{$t};";
526    }
527
528    return ($C['and_mark'] ? "\x06" : '&').'#'.(((ctype_digit($t) && $C['hexdec_entity'] < 2) or !$C['hexdec_entity']) ? $n : 'x'.dechex($n)).';';
529}
530
531function hl_prot($p, $c = null) {
532    // check URL scheme
533    global $C;
534    $b = $a = '';
535    if ($c == null) {
536        $c = 'style';
537        $b = $p[1];
538        $a = $p[3];
539        $p = trim($p[2]);
540    }
541    $c = isset($C['schemes'][$c]) ? $C['schemes'][$c] : $C['schemes']['*'];
542    static $d = 'denied:';
543    if (isset($c['!']) && substr($p, 0, 7) != $d) {
544        $p = "$d$p";
545    }
546    if (isset($c['*']) or !strcspn($p, '#?;') or (substr($p, 0, 7) == $d)) {
547        return "{$b}{$p}{$a}";
548    } // All ok, frag, query, param
549    if (preg_match('`^([^:?[@!$()*,=/\'\]]+?)(:|&#(58|x3a);|%3a|\\\\0{0,4}3a).`i', $p, $m) && !isset($c[strtolower($m[1])])) { // Denied prot
550        return "{$b}{$d}{$p}{$a}";
551    }
552    if ($C['abs_url']) {
553        if ($C['abs_url'] == -1 && strpos($p, $C['base_url']) === 0) { // Make url rel
554            $p = substr($p, strlen($C['base_url']));
555        } elseif (empty($m[1])) { // Make URL abs
556            if (substr($p, 0, 2) == '//') {
557                $p = substr($C['base_url'], 0, strpos($C['base_url'], ':') + 1).$p;
558            } elseif ($p[0] == '/') {
559                $p = preg_replace('`(^.+?://[^/]+)(.*)`', '$1', $C['base_url']).$p;
560            } elseif (strcspn($p, './')) {
561                $p = $C['base_url'].$p;
562            } else {
563                preg_match('`^([a-zA-Z\d\-+.]+://[^/]+)(.*)`', $C['base_url'], $m);
564                $p = preg_replace('`(?<=/)\./`', '', $m[2].$p);
565                while (preg_match('`(?<=/)([^/]{3,}|[^/.]+?|\.[^/.]|[^/.]\.)/\.\./`', $p)) {
566                    $p = preg_replace('`(?<=/)([^/]{3,}|[^/.]+?|\.[^/.]|[^/.]\.)/\.\./`', '', $p);
567                }
568                $p = $m[1].$p;
569            }
570        }
571    }
572
573    return "{$b}{$p}{$a}";
574}
575
576function hl_regex($p) {
577    // check regex
578    if (empty($p)) {
579        return 0;
580    }
581    if ($v = function_exists('error_clear_last') && function_exists('error_get_last')) {
582        error_clear_last();
583    } else {
584        if ($t = ini_get('track_errors')) {
585            $o = isset($php_errormsg) ? $php_errormsg : null;
586        } else {
587            ini_set('track_errors', 1);
588        }
589        unset($php_errormsg);
590    }
591    if (($d = ini_get('display_errors'))) {
592        ini_set('display_errors', 0);
593    }
594    preg_match($p, '');
595    if ($v) {
596        $r = error_get_last() == null ? 1 : 0;
597    } else {
598        $r = isset($php_errormsg) ? 0 : 1;
599        if ($t) {
600            $php_errormsg = isset($o) ? $o : null;
601        } else {
602            ini_set('track_errors', 0);
603        }
604    }
605    if ($d) {
606        ini_set('display_errors', 1);
607    }
608
609    return $r;
610}
611
612function hl_spec($t) {
613    // final $spec
614    $s = array();
615    if (!function_exists('hl_aux1')) {
616        function hl_aux1($m) {
617            return substr(str_replace(array(';', '|', '~', ' ', ',', '/', '(', ')', '`"'), array("\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07", "\x08", '"'), $m[0]), 1, -1);
618        }
619    }
620    $t = str_replace(array("\t", "\r", "\n", ' '), '', preg_replace_callback('/"(?>(`.|[^"])*)"/sm', 'hl_aux1', trim($t)));
621    for ($i = count(($t = explode(';', $t))); --$i >= 0;) {
622        $w = $t[$i];
623        if (empty($w) or ($e = strpos($w, '=')) === false or !strlen(($a = substr($w, $e + 1)))) {
624            continue;
625        }
626        $y = $n = array();
627        foreach (explode(',', $a) as $v) {
628            if (!preg_match('`^([a-z:\-\*]+)(?:\((.*?)\))?`i', $v, $m)) {
629                continue;
630            }
631            if (($x = strtolower($m[1])) == '-*') {
632                $n['*'] = 1;
633                continue;
634            }
635            if ($x[0] == '-') {
636                $n[substr($x, 1)] = 1;
637                continue;
638            }
639            if (!isset($m[2])) {
640                $y[$x] = 1;
641                continue;
642            }
643            foreach (explode('/', $m[2]) as $m) {
644                if (empty($m) or ($p = strpos($m, '=')) == 0 or $p < 5) {
645                    $y[$x] = 1;
646                    continue;
647                }
648                $y[$x][strtolower(substr($m, 0, $p))] = str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07", "\x08"), array(';', '|', '~', ' ', ',', '/', '(', ')'), substr($m, $p + 1));
649            }
650            if (isset($y[$x]['match']) && !hl_regex($y[$x]['match'])) {
651                unset($y[$x]['match']);
652            }
653            if (isset($y[$x]['nomatch']) && !hl_regex($y[$x]['nomatch'])) {
654                unset($y[$x]['nomatch']);
655            }
656        }
657        if (!count($y) && !count($n)) {
658            continue;
659        }
660        foreach (explode(',', substr($w, 0, $e)) as $v) {
661            if (!strlen(($v = strtolower($v)))) {
662                continue;
663            }
664            if (count($y)) {
665                if (!isset($s[$v])) {
666                    $s[$v] = $y;
667                } else {
668                    $s[$v] = array_merge($s[$v], $y);
669                }
670            }
671            if (count($n)) {
672                if (!isset($s[$v]['n'])) {
673                    $s[$v]['n'] = $n;
674                } else {
675                    $s[$v]['n'] = array_merge($s[$v]['n'], $n);
676                }
677            }
678        }
679    }
680
681    return $s;
682}
683
684function hl_tag($t) {
685    // tag/attribute handler
686    global $C;
687    $t = $t[0];
688    // invalid < >
689    if ($t == '< ') {
690        return '&lt; ';
691    }
692    if ($t == '>') {
693        return '&gt;';
694    }
695    if (!preg_match('`^<(/?)([a-zA-Z][a-zA-Z1-6]*)([^>]*?)\s?>$`m', $t, $m)) {
696        return str_replace(array('<', '>'), array('&lt;', '&gt;'), $t);
697    } elseif (!isset($C['elements'][($e = strtolower($m[2]))])) {
698        return ($C['keep_bad'] % 2) ? str_replace(array('<', '>'), array('&lt;', '&gt;'), $t) : '';
699    }
700    // attr string
701    $a = str_replace(array("\n", "\r", "\t"), ' ', trim($m[3]));
702    // tag transform
703    static $eD = array('acronym' => 1, 'applet' => 1, 'big' => 1, 'center' => 1, 'dir' => 1, 'font' => 1, 'isindex' => 1, 's' => 1, 'strike' => 1, 'tt' => 1); // Deprecated
704    if ($C['make_tag_strict'] && isset($eD[$e])) {
705        $trt = hl_tag2($e, $a, $C['make_tag_strict']);
706        if (!$e) {
707            return ($C['keep_bad'] % 2) ? str_replace(array('<', '>'), array('&lt;', '&gt;'), $t) : '';
708        }
709    }
710    // close tag
711    static $eE = array('area' => 1, 'br' => 1, 'col' => 1, 'command' => 1, 'embed' => 1, 'hr' => 1, 'img' => 1, 'input' => 1, 'isindex' => 1, 'keygen' => 1, 'link' => 1, 'meta' => 1, 'param' => 1, 'source' => 1, 'track' => 1, 'wbr' => 1); // Empty ele
712    if (!empty($m[1])) {
713        return !isset($eE[$e]) ? (empty($C['hook_tag']) ? "</$e>" : $C['hook_tag']($e)) : (($C['keep_bad']) % 2 ? str_replace(array('<', '>'), array('&lt;', '&gt;'), $t) : '');
714    }
715
716    // open tag & attr
717    static $aN = array('abbr' => array('td' => 1, 'th' => 1), 'accept' => array('form' => 1, 'input' => 1), 'accept-charset' => array('form' => 1), 'action' => array('form' => 1), 'align' => array('applet' => 1, 'caption' => 1, 'col' => 1, 'colgroup' => 1, 'div' => 1, 'embed' => 1, 'h1' => 1, 'h2' => 1, 'h3' => 1, 'h4' => 1, 'h5' => 1, 'h6' => 1, 'hr' => 1, 'iframe' => 1, 'img' => 1, 'input' => 1, 'legend' => 1, 'object' => 1, 'p' => 1, 'table' => 1, 'tbody' => 1, 'td' => 1, 'tfoot' => 1, 'th' => 1, 'thead' => 1, 'tr' => 1), 'allowfullscreen' => array('iframe' => 1), 'alt' => array('applet' => 1, 'area' => 1, 'img' => 1, 'input' => 1), 'archive' => array('applet' => 1, 'object' => 1), 'async' => array('script' => 1), 'autocomplete' => array('form' => 1, 'input' => 1), 'autofocus' => array('button' => 1, 'input' => 1, 'keygen' => 1, 'select' => 1, 'textarea' => 1), 'autoplay' => array('audio' => 1, 'video' => 1), 'axis' => array('td' => 1, 'th' => 1), 'bgcolor' => array('embed' => 1, 'table' => 1, 'td' => 1, 'th' => 1, 'tr' => 1), 'border' => array('img' => 1, 'object' => 1, 'table' => 1), 'bordercolor' => array('table' => 1, 'td' => 1, 'tr' => 1), 'cellpadding' => array('table' => 1), 'cellspacing' => array('table' => 1), 'challenge' => array('keygen' => 1), 'char' => array('col' => 1, 'colgroup' => 1, 'tbody' => 1, 'td' => 1, 'tfoot' => 1, 'th' => 1, 'thead' => 1, 'tr' => 1), 'charoff' => array('col' => 1, 'colgroup' => 1, 'tbody' => 1, 'td' => 1, 'tfoot' => 1, 'th' => 1, 'thead' => 1, 'tr' => 1), 'charset' => array('a' => 1, 'script' => 1), 'checked' => array('command' => 1, 'input' => 1), 'cite' => array('blockquote' => 1, 'del' => 1, 'ins' => 1, 'q' => 1), 'classid' => array('object' => 1), 'clear' => array('br' => 1), 'code' => array('applet' => 1), 'codebase' => array('applet' => 1, 'object' => 1), 'codetype' => array('object' => 1), 'color' => array('font' => 1), 'cols' => array('textarea' => 1), 'colspan' => array('td' => 1, 'th' => 1), 'compact' => array('dir' => 1, 'dl' => 1, 'menu' => 1, 'ol' => 1, 'ul' => 1), 'content' => array('meta' => 1), 'controls' => array('audio' => 1, 'video' => 1), 'coords' => array('a' => 1, 'area' => 1), 'crossorigin' => array('img' => 1), 'data' => array('object' => 1), 'datetime' => array('del' => 1, 'ins' => 1, 'time' => 1), 'declare' => array('object' => 1), 'default' => array('track' => 1), 'defer' => array('script' => 1), 'dirname' => array('input' => 1, 'textarea' => 1), 'disabled' => array('button' => 1, 'command' => 1, 'fieldset' => 1, 'input' => 1, 'keygen' => 1, 'optgroup' => 1, 'option' => 1, 'select' => 1, 'textarea' => 1), 'download' => array('a' => 1), 'enctype' => array('form' => 1), 'face' => array('font' => 1), 'flashvars' => array('embed' => 1), 'for' => array('label' => 1, 'output' => 1), 'form' => array('button' => 1, 'fieldset' => 1, 'input' => 1, 'keygen' => 1, 'label' => 1, 'object' => 1, 'output' => 1, 'select' => 1, 'textarea' => 1), 'formaction' => array('button' => 1, 'input' => 1), 'formenctype' => array('button' => 1, 'input' => 1), 'formmethod' => array('button' => 1, 'input' => 1), 'formnovalidate' => array('button' => 1, 'input' => 1), 'formtarget' => array('button' => 1, 'input' => 1), 'frame' => array('table' => 1), 'frameborder' => array('iframe' => 1), 'headers' => array('td' => 1, 'th' => 1), 'height' => array('applet' => 1, 'canvas' => 1, 'embed' => 1, 'iframe' => 1, 'img' => 1, 'input' => 1, 'object' => 1, 'td' => 1, 'th' => 1, 'video' => 1), 'high' => array('meter' => 1), 'href' => array('a' => 1, 'area' => 1, 'link' => 1), 'hreflang' => array('a' => 1, 'area' => 1, 'link' => 1), 'hspace' => array('applet' => 1, 'embed' => 1, 'img' => 1, 'object' => 1), 'icon' => array('command' => 1), 'ismap' => array('img' => 1, 'input' => 1), 'keyparams' => array('keygen' => 1), 'keytype' => array('keygen' => 1), 'kind' => array('track' => 1), 'label' => array('command' => 1, 'menu' => 1, 'option' => 1, 'optgroup' => 1, 'track' => 1), 'language' => array('script' => 1), 'list' => array('input' => 1), 'longdesc' => array('img' => 1, 'iframe' => 1), 'loop' => array('audio' => 1, 'video' => 1), 'low' => array('meter' => 1), 'marginheight' => array('iframe' => 1), 'marginwidth' => array('iframe' => 1), 'max' => array('input' => 1, 'meter' => 1, 'progress' => 1), 'maxlength' => array('input' => 1, 'textarea' => 1), 'media' => array('a' => 1, 'area' => 1, 'link' => 1, 'source' => 1, 'style' => 1), 'mediagroup' => array('audio' => 1, 'video' => 1), 'method' => array('form' => 1), 'min' => array('input' => 1, 'meter' => 1), 'model' => array('embed' => 1), 'multiple' => array('input' => 1, 'select' => 1), 'muted' => array('audio' => 1, 'video' => 1), 'name' => array('a' => 1, 'applet' => 1, 'button' => 1, 'embed' => 1, 'fieldset' => 1, 'form' => 1, 'iframe' => 1, 'img' => 1, 'input' => 1, 'keygen' => 1, 'map' => 1, 'object' => 1, 'output' => 1, 'param' => 1, 'select' => 1, 'textarea' => 1), 'nohref' => array('area' => 1), 'noshade' => array('hr' => 1), 'novalidate' => array('form' => 1), 'nowrap' => array('td' => 1, 'th' => 1), 'object' => array('applet' => 1), 'open' => array('details' => 1), 'optimum' => array('meter' => 1), 'pattern' => array('input' => 1), 'ping' => array('a' => 1, 'area' => 1), 'placeholder' => array('input' => 1, 'textarea' => 1), 'pluginspage' => array('embed' => 1), 'pluginurl' => array('embed' => 1), 'poster' => array('video' => 1), 'pqg' => array('keygen' => 1), 'preload' => array('audio' => 1, 'video' => 1), 'prompt' => array('isindex' => 1), 'pubdate' => array('time' => 1), 'radiogroup' => array('command' => 1), 'readonly' => array('input' => 1, 'textarea' => 1), 'rel' => array('a' => 1, 'area' => 1, 'link' => 1), 'required' => array('input' => 1, 'select' => 1, 'textarea' => 1), 'rev' => array('a' => 1), 'reversed' => array('ol' => 1), 'rows' => array('textarea' => 1), 'rowspan' => array('td' => 1, 'th' => 1), 'rules' => array('table' => 1), 'sandbox' => array('iframe' => 1), 'scope' => array('td' => 1, 'th' => 1), 'scoped' => array('style' => 1), 'scrolling' => array('iframe' => 1), 'seamless' => array('iframe' => 1), 'selected' => array('option' => 1), 'shape' => array('a' => 1, 'area' => 1), 'size' => array('font' => 1, 'hr' => 1, 'input' => 1, 'select' => 1), 'sizes' => array('link' => 1), 'span' => array('col' => 1, 'colgroup' => 1), 'src' => array('audio' => 1, 'embed' => 1, 'iframe' => 1, 'img' => 1, 'input' => 1, 'script' => 1, 'source' => 1, 'track' => 1, 'video' => 1), 'srcdoc' => array('iframe' => 1), 'srclang' => array('track' => 1), 'srcset' => array('img' => 1), 'standby' => array('object' => 1), 'start' => array('ol' => 1), 'step' => array('input' => 1), 'summary' => array('table' => 1), 'target' => array('a' => 1, 'area' => 1, 'form' => 1), 'type' => array('a' => 1, 'area' => 1, 'button' => 1, 'command' => 1, 'embed' => 1, 'input' => 1, 'li' => 1, 'link' => 1, 'menu' => 1, 'object' => 1, 'ol' => 1, 'param' => 1, 'script' => 1, 'source' => 1, 'style' => 1, 'ul' => 1), 'typemustmatch' => array('object' => 1), 'usemap' => array('img' => 1, 'input' => 1, 'object' => 1), 'valign' => array('col' => 1, 'colgroup' => 1, 'tbody' => 1, 'td' => 1, 'tfoot' => 1, 'th' => 1, 'thead' => 1, 'tr' => 1), 'value' => array('button' => 1, 'data' => 1, 'input' => 1, 'li' => 1, 'meter' => 1, 'option' => 1, 'param' => 1, 'progress' => 1), 'valuetype' => array('param' => 1), 'vspace' => array('applet' => 1, 'embed' => 1, 'img' => 1, 'object' => 1), 'width' => array('applet' => 1, 'canvas' => 1, 'col' => 1, 'colgroup' => 1, 'embed' => 1, 'hr' => 1, 'iframe' => 1, 'img' => 1, 'input' => 1, 'object' => 1, 'pre' => 1, 'table' => 1, 'td' => 1, 'th' => 1, 'video' => 1), 'wmode' => array('embed' => 1), 'wrap' => array('textarea' => 1)); // Ele-specific
718    static $aNA = array('aria-activedescendant' => 1, 'aria-atomic' => 1, 'aria-autocomplete' => 1, 'aria-busy' => 1, 'aria-checked' => 1, 'aria-controls' => 1, 'aria-describedby' => 1, 'aria-disabled' => 1, 'aria-dropeffect' => 1, 'aria-expanded' => 1, 'aria-flowto' => 1, 'aria-grabbed' => 1, 'aria-haspopup' => 1, 'aria-hidden' => 1, 'aria-invalid' => 1, 'aria-label' => 1, 'aria-labelledby' => 1, 'aria-level' => 1, 'aria-live' => 1, 'aria-multiline' => 1, 'aria-multiselectable' => 1, 'aria-orientation' => 1, 'aria-owns' => 1, 'aria-posinset' => 1, 'aria-pressed' => 1, 'aria-readonly' => 1, 'aria-relevant' => 1, 'aria-required' => 1, 'aria-selected' => 1, 'aria-setsize' => 1, 'aria-sort' => 1, 'aria-valuemax' => 1, 'aria-valuemin' => 1, 'aria-valuenow' => 1, 'aria-valuetext' => 1); // ARIA
719    static $aNE = array('allowfullscreen' => 1, 'checkbox' => 1, 'checked' => 1, 'command' => 1, 'compact' => 1, 'declare' => 1, 'defer' => 1, 'default' => 1, 'disabled' => 1, 'hidden' => 1, 'inert' => 1, 'ismap' => 1, 'itemscope' => 1, 'multiple' => 1, 'nohref' => 1, 'noresize' => 1, 'noshade' => 1, 'nowrap' => 1, 'open' => 1, 'radio' => 1, 'readonly' => 1, 'required' => 1, 'reversed' => 1, 'selected' => 1); // Empty
720    static $aNO = array('onabort' => 1, 'onblur' => 1, 'oncanplay' => 1, 'oncanplaythrough' => 1, 'onchange' => 1, 'onclick' => 1, 'oncontextmenu' => 1, 'oncopy' => 1, 'oncuechange' => 1, 'oncut' => 1, 'ondblclick' => 1, 'ondrag' => 1, 'ondragend' => 1, 'ondragenter' => 1, 'ondragleave' => 1, 'ondragover' => 1, 'ondragstart' => 1, 'ondrop' => 1, 'ondurationchange' => 1, 'onemptied' => 1, 'onended' => 1, 'onerror' => 1, 'onfocus' => 1, 'onformchange' => 1, 'onforminput' => 1, 'oninput' => 1, 'oninvalid' => 1, 'onkeydown' => 1, 'onkeypress' => 1, 'onkeyup' => 1, 'onload' => 1, 'onloadeddata' => 1, 'onloadedmetadata' => 1, 'onloadstart' => 1, 'onlostpointercapture' => 1, 'onmousedown' => 1, 'onmousemove' => 1, 'onmouseout' => 1, 'onmouseover' => 1, 'onmouseup' => 1, 'onmousewheel' => 1, 'onpaste' => 1, 'onpause' => 1, 'onplay' => 1, 'onplaying' => 1, 'onpointercancel' => 1, 'ongotpointercapture' => 1, 'onpointerdown' => 1, 'onpointerenter' => 1, 'onpointerleave' => 1, 'onpointermove' => 1, 'onpointerout' => 1, 'onpointerover' => 1, 'onpointerup' => 1, 'onprogress' => 1, 'onratechange' => 1, 'onreadystatechange' => 1, 'onreset' => 1, 'onsearch' => 1, 'onscroll' => 1, 'onseeked' => 1, 'onseeking' => 1, 'onselect' => 1, 'onshow' => 1, 'onstalled' => 1, 'onsubmit' => 1, 'onsuspend' => 1, 'ontimeupdate' => 1, 'ontoggle' => 1, 'ontouchcancel' => 1, 'ontouchend' => 1, 'ontouchmove' => 1, 'ontouchstart' => 1, 'onvolumechange' => 1, 'onwaiting' => 1, 'onwheel' => 1); // Event
721    static $aNP = array('action' => 1, 'cite' => 1, 'classid' => 1, 'codebase' => 1, 'data' => 1, 'href' => 1, 'itemtype' => 1, 'longdesc' => 1, 'model' => 1, 'pluginspage' => 1, 'pluginurl' => 1, 'src' => 1, 'srcset' => 1, 'usemap' => 1); // Need scheme check; excludes style, on*
722    static $aNU = array('accesskey' => 1, 'class' => 1, 'contenteditable' => 1, 'contextmenu' => 1, 'dir' => 1, 'draggable' => 1, 'dropzone' => 1, 'hidden' => 1, 'id' => 1, 'inert' => 1, 'itemid' => 1, 'itemprop' => 1, 'itemref' => 1, 'itemscope' => 1, 'itemtype' => 1, 'lang' => 1, 'role' => 1, 'spellcheck' => 1, 'style' => 1, 'tabindex' => 1, 'title' => 1, 'translate' => 1, 'xmlns' => 1, 'xml:base' => 1, 'xml:lang' => 1, 'xml:space' => 1); // Univ; excludes on*, aria*
723
724    if ($C['lc_std_val']) {
725        // predef attr vals for $eAL & $aNE ele
726        static $aNL = array('all' => 1, 'auto' => 1, 'baseline' => 1, 'bottom' => 1, 'button' => 1, 'captions' => 1, 'center' => 1, 'chapters' => 1, 'char' => 1, 'checkbox' => 1, 'circle' => 1, 'col' => 1, 'colgroup' => 1, 'color' => 1, 'cols' => 1, 'data' => 1, 'date' => 1, 'datetime' => 1, 'datetime-local' => 1, 'default' => 1, 'descriptions' => 1, 'email' => 1, 'file' => 1, 'get' => 1, 'groups' => 1, 'hidden' => 1, 'image' => 1, 'justify' => 1, 'left' => 1, 'ltr' => 1, 'metadata' => 1, 'middle' => 1, 'month' => 1, 'none' => 1, 'number' => 1, 'object' => 1, 'password' => 1, 'poly' => 1, 'post' => 1, 'preserve' => 1, 'radio' => 1, 'range' => 1, 'rect' => 1, 'ref' => 1, 'reset' => 1, 'right' => 1, 'row' => 1, 'rowgroup' => 1, 'rows' => 1, 'rtl' => 1, 'search' => 1, 'submit' => 1, 'subtitles' => 1, 'tel' => 1, 'text' => 1, 'time' => 1, 'top' => 1, 'url' => 1, 'week' => 1);
727        static $eAL = array('a' => 1, 'area' => 1, 'bdo' => 1, 'button' => 1, 'col' => 1, 'fieldset' => 1, 'form' => 1, 'img' => 1, 'input' => 1, 'object' => 1, 'ol' => 1, 'optgroup' => 1, 'option' => 1, 'param' => 1, 'script' => 1, 'select' => 1, 'table' => 1, 'td' => 1, 'textarea' => 1, 'tfoot' => 1, 'th' => 1, 'thead' => 1, 'tr' => 1, 'track' => 1, 'xml:space' => 1);
728        $lcase = isset($eAL[$e]) ? 1 : 0;
729    }
730
731    $depTr = 0;
732    if ($C['no_deprecated_attr']) {
733        // depr attr:applicable ele
734        static $aND = array('align' => array('caption' => 1, 'div' => 1, 'h1' => 1, 'h2' => 1, 'h3' => 1, 'h4' => 1, 'h5' => 1, 'h6' => 1, 'hr' => 1, 'img' => 1, 'input' => 1, 'legend' => 1, 'object' => 1, 'p' => 1, 'table' => 1), 'bgcolor' => array('table' => 1, 'td' => 1, 'th' => 1, 'tr' => 1), 'border' => array('object' => 1), 'bordercolor' => array('table' => 1, 'td' => 1, 'tr' => 1), 'cellspacing' => array('table' => 1), 'clear' => array('br' => 1), 'compact' => array('dl' => 1, 'ol' => 1, 'ul' => 1), 'height' => array('td' => 1, 'th' => 1), 'hspace' => array('img' => 1, 'object' => 1), 'language' => array('script' => 1), 'name' => array('a' => 1, 'form' => 1, 'iframe' => 1, 'img' => 1, 'map' => 1), 'noshade' => array('hr' => 1), 'nowrap' => array('td' => 1, 'th' => 1), 'size' => array('hr' => 1), 'vspace' => array('img' => 1, 'object' => 1), 'width' => array('hr' => 1, 'pre' => 1, 'table' => 1, 'td' => 1, 'th' => 1));
735        static $eAD = array('a' => 1, 'br' => 1, 'caption' => 1, 'div' => 1, 'dl' => 1, 'form' => 1, 'h1' => 1, 'h2' => 1, 'h3' => 1, 'h4' => 1, 'h5' => 1, 'h6' => 1, 'hr' => 1, 'iframe' => 1, 'img' => 1, 'input' => 1, 'legend' => 1, 'map' => 1, 'object' => 1, 'ol' => 1, 'p' => 1, 'pre' => 1, 'script' => 1, 'table' => 1, 'td' => 1, 'th' => 1, 'tr' => 1, 'ul' => 1);
736        $depTr = isset($eAD[$e]) ? 1 : 0;
737    }
738
739    // attr name-vals
740    if (strpos($a, "\x01") !== false) {
741        $a = preg_replace('`\x01[^\x01]*\x01`', '', $a);
742    } // No comment/CDATA sec
743    $mode = 0;
744    $a = trim($a, ' /');
745    $aA = array();
746    while (strlen($a)) {
747        $w = 0;
748        switch ($mode) {
749            case 0: // Name
750                if (preg_match('`^[a-zA-Z][^\s=/]+`', $a, $m)) {
751                    $nm = strtolower($m[0]);
752                    $w = $mode = 1;
753                    $a = ltrim(substr_replace($a, '', 0, strlen($m[0])));
754                }
755                break;
756            case 1:
757                if ($a[0] == '=') { // =
758                    $w = 1;
759                    $mode = 2;
760                    $a = ltrim($a, '= ');
761                } else { // No val
762                    $w = 1;
763                    $mode = 0;
764                    $a = ltrim($a);
765                    $aA[$nm] = '';
766                }
767                break;
768            case 2: // Val
769                if (preg_match('`^((?:"[^"]*")|(?:\'[^\']*\')|(?:\s*[^\s"\']+))(.*)`', $a, $m)) {
770                    $a = ltrim($m[2]);
771                    $m = $m[1];
772                    $w = 1;
773                    $mode = 0;
774                    $aA[$nm] = trim(str_replace('<', '&lt;', ($m[0] == '"' or $m[0] == '\'') ? substr($m, 1, -1) : $m));
775                }
776                break;
777        }
778        if ($w == 0) { // Parse errs, deal with space, " & '
779            $a = preg_replace('`^(?:"[^"]*("|$)|\'[^\']*(\'|$)|\S)*\s*`', '', $a);
780            $mode = 0;
781        }
782    }
783    if ($mode == 1) {
784        $aA[$nm] = '';
785    }
786
787    // clean attrs
788    global $S;
789    $rl = isset($S[$e]) ? $S[$e] : array();
790    $a = array();
791    $nfr = 0;
792    $d = $C['deny_attribute'];
793    foreach ($aA as $k => $v) {
794        if (((isset($d['*']) ? isset($d[$k]) : !isset($d[$k])) && (isset($aN[$k][$e]) or isset($aNU[$k]) or (isset($aNO[$k]) && !isset($d['on*'])) or (isset($aNA[$k]) && !isset($d['aria*'])) or (!isset($d['data*']) && preg_match('`data-((?!xml)[^:]+$)`', $k))) && !isset($rl['n'][$k]) && !isset($rl['n']['*'])) or isset($rl[$k])) {
795            if (isset($aNE[$k])) {
796                $v = $k;
797            } elseif (!empty($lcase) && (($e != 'button' or $e != 'input') or $k == 'type')) { // Rather loose but ?not cause issues
798                $v = (isset($aNL[($v2 = strtolower($v))])) ? $v2 : $v;
799            }
800            if ($k == 'style' && !$C['style_pass']) {
801                if (false !== strpos($v, '&#')) {
802                    static $sC = array('&#x20;' => ' ', '&#32;' => ' ', '&#x45;' => 'e', '&#69;' => 'e', '&#x65;' => 'e', '&#101;' => 'e', '&#x58;' => 'x', '&#88;' => 'x', '&#x78;' => 'x', '&#120;' => 'x', '&#x50;' => 'p', '&#80;' => 'p', '&#x70;' => 'p', '&#112;' => 'p', '&#x53;' => 's', '&#83;' => 's', '&#x73;' => 's', '&#115;' => 's', '&#x49;' => 'i', '&#73;' => 'i', '&#x69;' => 'i', '&#105;' => 'i', '&#x4f;' => 'o', '&#79;' => 'o', '&#x6f;' => 'o', '&#111;' => 'o', '&#x4e;' => 'n', '&#78;' => 'n', '&#x6e;' => 'n', '&#110;' => 'n', '&#x55;' => 'u', '&#85;' => 'u', '&#x75;' => 'u', '&#117;' => 'u', '&#x52;' => 'r', '&#82;' => 'r', '&#x72;' => 'r', '&#114;' => 'r', '&#x4c;' => 'l', '&#76;' => 'l', '&#x6c;' => 'l', '&#108;' => 'l', '&#x28;' => '(', '&#40;' => '(', '&#x29;' => ')', '&#41;' => ')', '&#x20;' => ':', '&#32;' => ':', '&#x22;' => '"', '&#34;' => '"', '&#x27;' => "'", '&#39;' => "'", '&#x2f;' => '/', '&#47;' => '/', '&#x2a;' => '*', '&#42;' => '*', '&#x5c;' => '\\', '&#92;' => '\\');
803                    $v = strtr($v, $sC);
804                }
805                $v = preg_replace_callback('`(url(?:\()(?: )*(?:\'|"|&(?:quot|apos);)?)(.+?)((?:\'|"|&(?:quot|apos);)?(?: )*(?:\)))`iS', 'hl_prot', $v);
806                $v = !$C['css_expression'] ? preg_replace('`expression`i', ' ', preg_replace('`\\\\\S|(/|(%2f))(\*|(%2a))`i', ' ', $v)) : $v;
807            } elseif (isset($aNP[$k]) or isset($aNO[$k])) {
808                $v = str_replace('­', ' ', (strpos($v, '&') !== false ? str_replace(array('&#xad;', '&#173;', '&shy;'), ' ', $v) : $v)); // double-quoted char: soft-hyphen; appears here as "­" or hyphen or something else depending on viewing software
809                if ($k == 'srcset') {
810                    $v2 = '';
811                    foreach (explode(',', $v) as $k1 => $v1) {
812                        $v1 = explode(' ', ltrim($v1), 2);
813                        $k1 = isset($v1[1]) ? trim($v1[1]) : '';
814                        $v1 = trim($v1[0]);
815                        if (isset($v1[0])) {
816                            $v2 .= hl_prot($v1, $k).(empty($k1) ? '' : ' '.$k1).', ';
817                        }
818                    }
819                    $v = trim($v2, ', ');
820                }
821                if ($k == 'itemtype') {
822                    $v2 = '';
823                    foreach (explode(' ', $v) as $v1) {
824                        if (isset($v1[0])) {
825                            $v2 .= hl_prot($v1, $k).' ';
826                        }
827                    }
828                    $v = trim($v2, ' ');
829                } else {
830                    $v = hl_prot($v, $k);
831                }
832                if ($k == 'href') { // X-spam
833                    if ($C['anti_mail_spam'] && strpos($v, 'mailto:') === 0) {
834                        $v = str_replace('@', htmlspecialchars($C['anti_mail_spam']), $v);
835                    } elseif ($C['anti_link_spam']) {
836                        $r1 = $C['anti_link_spam'][1];
837                        if (!empty($r1) && preg_match($r1, $v)) {
838                            continue;
839                        }
840                        $r0 = $C['anti_link_spam'][0];
841                        if (!empty($r0) && preg_match($r0, $v)) {
842                            if (isset($a['rel'])) {
843                                if (!preg_match('`\bnofollow\b`i', $a['rel'])) {
844                                    $a['rel'] .= ' nofollow';
845                                }
846                            } elseif (isset($aA['rel'])) {
847                                if (!preg_match('`\bnofollow\b`i', $aA['rel'])) {
848                                    $nfr = 1;
849                                }
850                            } else {
851                                $a['rel'] = 'nofollow';
852                            }
853                        }
854                    }
855                }
856            }
857            if (isset($rl[$k]) && is_array($rl[$k]) && ($v = hl_attrval($k, $v, $rl[$k])) === 0) {
858                continue;
859            }
860            $a[$k] = str_replace('"', '&quot;', $v);
861        }
862    }
863    if ($nfr) {
864        $a['rel'] = isset($a['rel']) ? $a['rel'].' nofollow' : 'nofollow';
865    }
866
867    // rqd attr
868    static $eAR = array('area' => array('alt' => 'area'), 'bdo' => array('dir' => 'ltr'), 'command' => array('label' => ''), 'form' => array('action' => ''), 'img' => array('src' => '', 'alt' => 'image'), 'map' => array('name' => ''), 'optgroup' => array('label' => ''), 'param' => array('name' => ''), 'style' => array('scoped' => ''), 'textarea' => array('rows' => '10', 'cols' => '50'));
869    if (isset($eAR[$e])) {
870        foreach ($eAR[$e] as $k => $v) {
871            if (!isset($a[$k])) {
872                $a[$k] = isset($v[0]) ? $v : $k;
873            }
874        }
875    }
876
877    // depr attr
878    if ($depTr) {
879        $c = array();
880        foreach ($a as $k => $v) {
881            if ($k == 'style' or !isset($aND[$k][$e])) {
882                continue;
883            }
884            $v = str_replace(array('\\', ':', ';', '&#'), '', $v);
885            if ($k == 'align') {
886                unset($a['align']);
887                if ($e == 'img' && ($v == 'left' or $v == 'right')) {
888                    $c[] = 'float: '.$v;
889                } elseif (($e == 'div' or $e == 'table') && $v == 'center') {
890                    $c[] = 'margin: auto';
891                } else {
892                    $c[] = 'text-align: '.$v;
893                }
894            } elseif ($k == 'bgcolor') {
895                unset($a['bgcolor']);
896                $c[] = 'background-color: '.$v;
897            } elseif ($k == 'border') {
898                unset($a['border']);
899                $c[] = "border: {$v}px";
900            } elseif ($k == 'bordercolor') {
901                unset($a['bordercolor']);
902                $c[] = 'border-color: '.$v;
903            } elseif ($k == 'cellspacing') {
904                unset($a['cellspacing']);
905                $c[] = "border-spacing: {$v}px";
906            } elseif ($k == 'clear') {
907                unset($a['clear']);
908                $c[] = 'clear: '.($v != 'all' ? $v : 'both');
909            } elseif ($k == 'compact') {
910                unset($a['compact']);
911                $c[] = 'font-size: 85%';
912            } elseif ($k == 'height' or $k == 'width') {
913                unset($a[$k]);
914                $c[] = $k.': '.($v[0] != '*' ? $v.(ctype_digit($v) ? 'px' : '') : 'auto');
915            } elseif ($k == 'hspace') {
916                unset($a['hspace']);
917                $c[] = "margin-left: {$v}px; margin-right: {$v}px";
918            } elseif ($k == 'language' && !isset($a['type'])) {
919                unset($a['language']);
920                $a['type'] = 'text/'.strtolower($v);
921            } elseif ($k == 'name') {
922                if ($C['no_deprecated_attr'] == 2 or ($e != 'a' && $e != 'map')) {
923                    unset($a['name']);
924                }
925                if (!isset($a['id']) && !preg_match('`\W`', $v)) {
926                    $a['id'] = $v;
927                }
928            } elseif ($k == 'noshade') {
929                unset($a['noshade']);
930                $c[] = 'border-style: none; border: 0; background-color: gray; color: gray';
931            } elseif ($k == 'nowrap') {
932                unset($a['nowrap']);
933                $c[] = 'white-space: nowrap';
934            } elseif ($k == 'size') {
935                unset($a['size']);
936                $c[] = 'size: '.$v.'px';
937            } elseif ($k == 'vspace') {
938                unset($a['vspace']);
939                $c[] = "margin-top: {$v}px; margin-bottom: {$v}px";
940            }
941        }
942        if (count($c)) {
943            $c = implode('; ', $c);
944            $a['style'] = isset($a['style']) ? rtrim($a['style'], ' ;').'; '.$c.';' : $c.';';
945        }
946    }
947    // unique ID
948    if ($C['unique_ids'] && isset($a['id'])) {
949        if (preg_match('`\s`', ($id = $a['id'])) or (isset($GLOBALS['hl_Ids'][$id]) && $C['unique_ids'] == 1)) {
950            unset($a['id']);
951        } else {
952            while (isset($GLOBALS['hl_Ids'][$id])) {
953                $id = $C['unique_ids'].$id;
954            }
955            $GLOBALS['hl_Ids'][($a['id'] = $id)] = 1;
956        }
957    }
958    // xml:lang
959    if ($C['xml:lang'] && isset($a['lang'])) {
960        $a['xml:lang'] = isset($a['xml:lang']) ? $a['xml:lang'] : $a['lang'];
961        if ($C['xml:lang'] == 2) {
962            unset($a['lang']);
963        }
964    }
965    // for transformed tag
966    if (!empty($trt)) {
967        $a['style'] = isset($a['style']) ? rtrim($a['style'], ' ;').'; '.$trt : $trt;
968    }
969    // return with empty ele /
970    if (empty($C['hook_tag'])) {
971        $aA = '';
972        foreach ($a as $k => $v) {
973            $aA .= " {$k}=\"{$v}\"";
974        }
975
976        return "<{$e}{$aA}".(isset($eE[$e]) ? ' /' : '').'>';
977    } else {
978        return $C['hook_tag']($e, $a);
979    }
980}
981
982function hl_tag2(&$e, &$a, $t = 1) {
983    // transform tag
984    if ($e == 'big') {
985        $e = 'span';
986
987        return 'font-size: larger;';
988    }
989    if ($e == 's' or $e == 'strike') {
990        $e = 'span';
991
992        return 'text-decoration: line-through;';
993    }
994    if ($e == 'tt') {
995        $e = 'code';
996
997        return '';
998    }
999    if ($e == 'center') {
1000        $e = 'div';
1001
1002        return 'text-align: center;';
1003    }
1004    static $fs = array('0' => 'xx-small', '1' => 'xx-small', '2' => 'small', '3' => 'medium', '4' => 'large', '5' => 'x-large', '6' => 'xx-large', '7' => '300%', '-1' => 'smaller', '-2' => '60%', '+1' => 'larger', '+2' => '150%', '+3' => '200%', '+4' => '300%');
1005    if ($e == 'font') {
1006        $a2 = '';
1007        while (preg_match('`(^|\s)(color|size)\s*=\s*(\'|")?(.+?)(\\3|\s|$)`i', $a, $m)) {
1008            $a = str_replace($m[0], ' ', $a);
1009            $a2 .= strtolower($m[2]) == 'color' ? (' color: '.str_replace(array('"', ';', ':'), '\'', trim($m[4])).';') : (isset($fs[($m = trim($m[4]))]) ? (' font-size: '.$fs[$m].';') : '');
1010        }
1011        while (preg_match('`(^|\s)face\s*=\s*(\'|")?([^=]+?)\\2`i', $a, $m) or preg_match('`(^|\s)face\s*=(\s*)(\S+)`i', $a, $m)) {
1012            $a = str_replace($m[0], ' ', $a);
1013            $a2 .= ' font-family: '.str_replace(array('"', ';', ':'), '\'', trim($m[3])).';';
1014        }
1015        $e = 'span';
1016
1017        return ltrim(str_replace('<', '', $a2));
1018    }
1019    if ($e == 'acronym') {
1020        $e = 'abbr';
1021
1022        return '';
1023    }
1024    if ($e == 'dir') {
1025        $e = 'ul';
1026
1027        return '';
1028    }
1029    if ($t == 2) {
1030        $e = 0;
1031
1032        return 0;
1033    }
1034
1035    return '';
1036}
1037
1038function hl_tidy($t, $w, $p) {
1039    // tidy/compact HTM
1040    if (strpos(' pre,script,textarea', "$p,")) {
1041        return $t;
1042    }
1043    if (!function_exists('hl_aux2')) {
1044        function hl_aux2($m) {
1045            return $m[1].str_replace(array('<', '>', "\n", "\r", "\t", ' '), array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), $m[3]).$m[4];
1046        }
1047    }
1048    $t = preg_replace(array('`(<\w[^>]*(?<!/)>)\s+`', '`\s+`', '`(<\w[^>]*(?<!/)>) `'), array(' $1', ' ', '$1'), preg_replace_callback(array('`(<(!\[CDATA\[))(.+?)(\]\]>)`sm', '`(<(!--))(.+?)(-->)`sm', '`(<(pre|script|textarea)[^>]*?>)(.+?)(</\2>)`sm'), 'hl_aux2', $t));
1049    if (($w = strtolower($w)) == -1) {
1050        return str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), array('<', '>', "\n", "\r", "\t", ' '), $t);
1051    }
1052    $s = strpos(" $w", 't') ? "\t" : ' ';
1053    $s = preg_match('`\d`', $w, $m) ? str_repeat($s, $m[0]) : str_repeat($s, ($s == "\t" ? 1 : 2));
1054    $N = preg_match('`[ts]([1-9])`', $w, $m) ? $m[1] : 0;
1055    $a = array('br' => 1);
1056    $b = array('button' => 1, 'command' => 1, 'input' => 1, 'option' => 1, 'param' => 1, 'track' => 1);
1057    $c = array('audio' => 1, 'canvas' => 1, 'caption' => 1, 'dd' => 1, 'dt' => 1, 'figcaption' => 1, 'h1' => 1, 'h2' => 1, 'h3' => 1, 'h4' => 1, 'h5' => 1, 'h6' => 1, 'isindex' => 1, 'label' => 1, 'legend' => 1, 'li' => 1, 'object' => 1, 'p' => 1, 'pre' => 1, 'style' => 1, 'summary' => 1, 'td' => 1, 'textarea' => 1, 'th' => 1, 'video' => 1);
1058    $d = array('address' => 1, 'article' => 1, 'aside' => 1, 'blockquote' => 1, 'center' => 1, 'colgroup' => 1, 'datalist' => 1, 'details' => 1, 'dir' => 1, 'div' => 1, 'dl' => 1, 'fieldset' => 1, 'figure' => 1, 'footer' => 1, 'form' => 1, 'header' => 1, 'hgroup' => 1, 'hr' => 1, 'iframe' => 1, 'main' => 1, 'map' => 1, 'menu' => 1, 'nav' => 1, 'noscript' => 1, 'ol' => 1, 'optgroup' => 1, 'rbc' => 1, 'rtc' => 1, 'ruby' => 1, 'script' => 1, 'section' => 1, 'select' => 1, 'table' => 1, 'tbody' => 1, 'tfoot' => 1, 'thead' => 1, 'tr' => 1, 'ul' => 1);
1059    $T = explode('<', $t);
1060    $X = 1;
1061    while ($X) {
1062        $n = $N;
1063        $t = $T;
1064        ob_start();
1065        if (isset($d[$p])) {
1066            echo str_repeat($s, ++$n);
1067        }
1068        echo ltrim(array_shift($t));
1069        for ($i = -1, $j = count($t); ++$i < $j;) {
1070            $r = '';
1071            list($e, $r) = explode('>', $t[$i]);
1072            $x = $e[0] == '/' ? 0 : (substr($e, -1) == '/' ? 1 : ($e[0] != '!' ? 2 : -1));
1073            $y = !$x ? ltrim($e, '/') : ($x > 0 ? substr($e, 0, strcspn($e, ' ')) : 0);
1074            $e = "<$e>";
1075            if (isset($d[$y])) {
1076                if (!$x) {
1077                    if ($n) {
1078                        echo "\n", str_repeat($s, --$n), "$e\n", str_repeat($s, $n);
1079                    } else {
1080                        ++$N;
1081                        ob_end_clean();
1082                        continue 2;
1083                    }
1084                } else {
1085                    echo "\n", str_repeat($s, $n), "$e\n", str_repeat($s, ($x != 1 ? ++$n : $n));
1086                }
1087                echo $r;
1088                continue;
1089            }
1090            $f = "\n".str_repeat($s, $n);
1091            if (isset($c[$y])) {
1092                if (!$x) {
1093                    echo $e, $f, $r;
1094                } else {
1095                    echo $f, $e, $r;
1096                }
1097            } elseif (isset($b[$y])) {
1098                echo $f, $e, $r;
1099            } elseif (isset($a[$y])) {
1100                echo $e, $f, $r;
1101            } elseif (!$y) {
1102                echo $f, $e, $f, $r;
1103            } else {
1104                echo $e, $r;
1105            }
1106        }
1107        $X = 0;
1108    }
1109    $t = str_replace(array("\n ", " \n"), "\n", preg_replace('`[\n]\s*?[\n]+`', "\n", ob_get_contents()));
1110    ob_end_clean();
1111    if (($l = strpos(" $w", 'r') ? (strpos(" $w", 'n') ? "\r\n" : "\r") : 0)) {
1112        $t = str_replace("\n", $l, $t);
1113    }
1114
1115    return str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), array('<', '>', "\n", "\r", "\t", ' '), $t);
1116}
1117
1118function hl_version() {
1119    // version
1120    return '1.2.5';
1121}
1122