1<?php 2// $Id: bbcode-filter.inc,v 1.66 2008/11/30 08:50:08 naudefj Exp $ 3 4function _bbcode_filter_process(&$body, $format = -1) { 5 6 $quote_text = t('Quote:'); 7 $quote_user = t('\\1 wrote:'); 8 9 // Encode all script tags to prevent XSS html injection attacks 10 $body = preg_replace(array('#<script([^>]*)>#i', '#</script([^>]*)>#i'), array('<script\\1>', '</script\\1>'), $body); 11 12 // Find all [code] tags and check if they contain a newline. If we find a newline, 13 // that [code] should be rendered as a block, otherwise it will still be inline 14 $mode = variable_get("bbcode_paragraph_breaks_$format", 2); 15 $pre = array(); $i = 0; 16 if (preg_match_all('#\[code(?::\w+)?\](.*?)\[/code(?::\w+)?\]#si', $body, $code_tags, PREG_SET_ORDER)) { 17 foreach ($code_tags as $code_tag) { 18 $code_tag[1] = str_replace(array('<', '>'), array('<', '>'), $code_tag[1]); 19 if (strpos($code_tag[1], "\n") === FALSE) 20 $body = str_replace($code_tag[0], '<code class="bb-code">'. $code_tag[1] .'</code>', $body); 21 elseif ($mode) { 22 // Strip preformatted code blocks from text during line break processing, replaced below 23 $body = str_replace($code_tag[0], "***pRe_sTrInG$i***", $body); 24 $pre[$i++] = '<pre class="bb-code-block">'. $code_tag[1] .'</pre>'; 25 } 26 else 27 $body = str_replace($code_tag[0], '<pre class="bb-code-block">'. $code_tag[1] .'</pre>', $body); 28 } 29 } 30 31 // Apply line and paragraph breaks (skipping preformatted code) 32 if ($mode) { 33 34 if ($mode == 1) // Line breaks only (starting with PHP 4.0.5, nl2br() is XHTML compliant) 35 $body = nl2br($body); 36 37 if ($mode == 2) { // Line and paragraph breaks (may not always be XHTML compliant) 38 $body = preg_replace("/(\r\n|\n|\r)/", "\n", $body); 39 $body = preg_replace("/\n\n+/", "\n\n", $body); 40 $parts = explode("\n\n", $body); 41 for ($i=0; $i<sizeof($parts); $i++) { 42 // No linebreaks if paragraph starts with an HTML tag 43 if ( !preg_match('/^<.*>/', $parts[$i]) ) 44 $parts[$i] = nl2br($parts[$i]); 45 46 // Some tags should not be in paragraph blocks 47 if ( !preg_match('/^(?:<|\[)(?:table|list|ol|ul|pre|select|form|blockquote|hr)/i', $parts[$i]) ) 48 $parts[$i] = '<p>'. $parts[$i] .'</p>'; 49 } 50 $body = implode("\n\n", $parts); 51 } 52 53 // Reinsert preformatted code blocks 54 foreach ($pre as $i => $code_tag) 55 $body = str_replace("***pRe_sTrInG$i***", $code_tag, $body); 56 } 57 58 // Replace any improper quote tags with proper quote tags 59 $body = str_replace('[quote/]', '[/quote]', $body); 60 61 // Add closing tags to prevent users from disruping your site's HTML 62 // (required for nestable tags only: [list] and [quote]) 63 preg_match_all('/\[quote/i', $body, $matches); 64 $opentags = count($matches['0']); 65 preg_match_all('/\[\/quote\]/i', $body, $matches); 66 $unclosed = $opentags - count($matches['0']); 67 for ($i = 0; $i < $unclosed; $i++) { 68 $body .= '[/quote]'; 69 } 70 // Also add opening tags, if needed 71 for ($i = $unclosed; $i < 0; $i++) { 72 $body = '[quote]' . $body; 73 } 74 preg_match_all('/\[list/i', $body, $matches); 75 $opentags = count($matches['0']); 76 preg_match_all('/\[\/list\]/i', $body, $matches); 77 $unclosed = $opentags - count($matches['0']); 78 for ($i = 0; $i < $unclosed; $i++) { 79 $body .= '[/list]'; 80 } 81 for ($i = $unclosed; $i < 0; $i++) { 82 $body = '[list]' . $body; 83 } 84 85 // begin processing for [size] 86 if (stristr($body, '[size=') !== FALSE) { // prevent useless processing 87 $arr = array( 88 'tag' => 'size', 89 'pattern' => '#\[\x07=([\d]+)(?::\w+)?\]([^\x07]*)\[/\x07(?::\w+)?\]#esi', 90 'replacement' => '"<span style=\"font-size:". _bbcode_round_size_val(\'$1\') ."px\">". str_replace(\'\"\', \'"\', \'$2\') ."</span>"', 91 'text' => $body); 92 $body = _bbcode_replace_nest_tag($arr); 93 } // end processing for [size] 94 95 // begin processing for [color] 96 if (stristr($body, '[color=') !== FALSE) { // prevent useless processing 97 $arr = array( 98 'tag' => 'color', 99 'pattern' => '#\[\x07=([\#\w]+)(?::\w+)?\]([^\x07]*)\[/\x07(?::\w+)?\]#si', 100 'replacement' => '<span style="color:$1">$2</span>', 101 'text' => $body); 102 $body = _bbcode_replace_nest_tag($arr); 103 } // end processing for [color] 104 105 // begin processing for [font] 106 if (stristr($body, '[font=') !== FALSE) { // prevent useless processing 107 $arr = array( 108 'tag' => 'font', 109 'pattern' => '#\[\x07=([\w\s]+)(?::\w+)?\]([^\x07]*)\[/\x07(?::\w+)?\]#si', 110 'replacement' => '<span style="font-family:$1">$2</span>', 111 'text' => $body); 112 $body = _bbcode_replace_nest_tag($arr); 113 } // end processing for [font] 114 115 // begin processing for [list] and [*] 116 if (stristr($body, '[list') !== FALSE) { // prevent useless processing 117 $l_type = array( 118 NULL => array('style' => 'circle', 'tag' => 'ul'), 119 'c' => array('style' => 'circle', 'tag' => 'ul'), 120 'd' => array('style' => 'disc', 'tag' => 'ul'), 121 's' => array('style' => 'square', 'tag' => 'ul'), 122 '1' => array('style' => 'decimal', 'tag' => 'ol'), 123 'a' => array('style' => 'lower-alpha', 'tag' => 'ol'), 124 'A' => array('style' => 'upper-alpha', 'tag' => 'ol'), 125 'i' => array('style' => 'lower-roman', 'tag' => 'ol'), 126 'I' => array('style' => 'upper-roman', 'tag' => 'ol') 127 ); 128 $body = preg_replace('#(\[[/]*)list(.*?\])#si', "$1\x07$2", $body); 129 130 // replace to <li> tags - [*]..[*]|[*]..[/list] 131 $body = preg_replace('#\[\*(?::\w+)?\]([^\x07]*?)(?=\s*?(\[\*(?::\w+)?\]|\[/\x07(?::\w+)?\]))#si', '<li>$1</li>', $body); 132 // add </li> tags to nested <li> - [/list]..[/list] 133 $body = preg_replace('#(\[/\x07(?::\w+)?\])(?=[^\x07]*?\[/\x07(?::\w+)?\])#si', '$1</li>', $body); 134 // add </li> tags to nested <li> - [/list]..[*]..[list] 135 $body = preg_replace('#(\[/\x07(?::\w+)?\])(?=[^\x07]*?\[\*(?::\w+)?\][^\x07]*?\[\x07.*(?::\w+)?\])#si', '$1</li>', $body); 136 // replace to <li> tags for nested <li> - [*]..[list] 137 $body = preg_replace('#\[\*(?::\w+)?\]([^\x07]*)?(?=\[\x07.*(?::\w+)?\])#si', '<li>$1', $body); 138 139 // replace to <ol>/<ul> and </ol>/</ul> tags 140 // It will be better to use &count and do-while, if php 5 or higher. 141 while (preg_match("#\[\x07[=]*((?-i)[cds1aAiI])*(?::\w+)?\]([^\x07]*)\[/\x07(?::\w+)?\]#si", $body)) { 142 $body = preg_replace("#\[\x07[=]*((?-i)[cds1aAiI])*(?::\w+)?\]([^\x07]*)\[/\x07(?::\w+)?\]#esi", '"<". $l_type[\'$1\']["tag"] ." class=\"bb-list\" style=\"list-style-type:". $l_type[\'$1\']["style"] .";\">". str_replace(\'\"\', \'"\', \'$2\') ."</". $l_type[\'$1\']["tag"] .">"', $body); 143 } 144 145 // remove <br /> tags 146 $body = preg_replace('#(<[/]*([uo]l|li).*>.*)<br />#i', '$1', $body); 147 } // end processing for [list] and [*] 148 149 // Define BBCode tags 150 $preg = array( 151 // Implement [notag] 152 '#\[notag(?::\w+)?\](.*?)\[/notag(?::\w+)?\]#sie' => '_bbcode_notag_tag(\'\\1\')', 153 154 // Headings and indexes - articles will almost always need them 155 '#\[h([1-6])(?::\w+)?\](.*?)\[/h[1-6](?::\w+)?\]#sie' => '_bbcode_generate_heading(\\1, \'\\2\')', 156 '#\[index\s*/?\]#sie' => '_bbcode_generate_index($body)', 157 '#\[index style=(ol|ul)\]#sie' => '_bbcode_generate_index($body, \'\\1\')', 158 159 // Font, text and alignment 160 '#\[align=(\w+)(?::\w+)?\](.*?)\[/align(?::\w+)?\]#si' => '<span style="text-align:\\1">\\2</span>', 161 '#\[float=(left|right)(?::\w+)?\](.*?)\[/float(?::\w+)?\]#si' => '<span style="float:\\1">\\2</span>', 162 '#\[justify(?::\w+)?\](.*?)\[/justify(?::\w+)?\]#si' => '<div style="text-align:justify;">\\1</div>', 163 '#\[(b|strong)(?::\w+)?\](.*?)\[/(b|strong)(?::\w+)?\]#si' => '<span style="font-weight:bold">\\2</span>', 164 '#\[(i|em)(?::\w+)?\](.*?)\[/(i|em)(?::\w+)?\]#si' => '<span style="font-style:italic">\\2</span>', 165 '#\[u(?::\w+)?\](.*?)\[/u(?::\w+)?\]#si' => '<span style="text-decoration:underline">\\1</span>', 166 '#\[s(?::\w+)?\](.*?)\[/s(?::\w+)?\]#si' => '<s>\\1</s>', 167 '#\[sup(?::\w+)?\](.*?)\[/sup(?::\w+)?\]#si' => '<sup>\\1</sup>', 168 '#\[sub(?::\w+)?\](.*?)\[/sub(?::\w+)?\]#si' => '<sub>\\1</sub>', 169 '#\[center(?::\w+)?\](.*?)\[/center(?::\w+)?\]#si' => '<div style="text-align:center">\\1</div>', 170 '#\[left(?::\w+)?\](.*?)\[/left(?::\w+)?\]#si' => '<div style="text-align:left">\\1</div>', 171 '#\[right(?::\w+)?\](.*?)\[/right(?::\w+)?\]#si' => '<div style="text-align:right">\\1</div>', 172 173 // Links without a protocol, with a protocol, and with good looking text 174 '#\[url(?::\w+)?\]www\.([\w:;&,%+~!=@\/\.\-\#\?]+?)\[/url(?::\w+)?\]#si' => '<a href="http://www.\\1" class="bb-url">\\1</a>', 175 '#\[url(?::\w+)?\]([\w:;&,%+~!=@\/\.\-\#\?]+?)\[/url(?::\w+)?\]#si' => '<a href="\\1" class="bb-url">\\1</a>', 176 '#\[url=www\.([\w:;&,%+~!=@\/\.\-\#\?]+?)\](.*?)\[/url(?::\w+)?\]#si' => '<a href="http://www.\\1" class="bb-url">\\2</a>', 177 '#\[url=([\w:;&,%+~!=@\/\.\-\#\?]+?)\](.*?)\[/url(?::\w+)?\]#si' => '<a href="\\1" class="bb-url">\\2</a>', 178 179 // Anchor tags for linking within documents 180 '#\[anchor=(\w+)(?::\w+)?\](.*?)\[/anchor(?::\w+)?\]#si' => '<a name="\\1">\\2</a>', 181 182 // Images without or with client-side sizing 183 '#\[img(?::\w+)?\]([\w:;&,~%+!=@\/\.\-\#\?]+)\[/img(?::\w+)?\]#si' => '<img src="\\1" alt="" class="bb-image" />', 184 '#\[img=(\d+)x(\d+)(?::\w+)?\]([\w:;&,~%+!=@\/\.\-\#\?]+)\[/img(?::\w+)?\]#si' => '<img width="\\1" height="\\2" alt="" src="\\3" class="bb-image" />', 185 '#\[img=([\w\s:;,\.\-\'\(\)]+)(?::\w+)?\]([\w:;&,~%+!=@\/\.\-\#\?]+)\[/img(?::\w+)?\]#si' => '<img alt="\\1" src="\\2" class="bb-image" />', 186 '#\[img align=(left|right|center)(?::\w+)?\]([\w:;&,~%+!=@\/\.\-\#\?]+)\[/img(?::\w+)?\]#si' => '<img src="\\2" alt="" align="\\1" class="bb-image" />', 187 188 // Flash animations and other special effects 189 '#\[flash=(\d+)x(\d+)(?::\w+)?\]([\w:;&,~%+!=@\/\.\-\#\?]+)\[/flash(?::\w+)?\]#si' => '<object type="application/x-shockwave-flash" data="\\3" width="\\1" height="\\2"><param name="movie" value="\\3" /></object>', 190 191 // Acronyms & abbreviations - show description when mouse moves over tag 192 '#\[acronym=([\w\s-,\.]+)(?::\w+)?\](.*?)\[/acronym(?::\w+)?\]#si' => '<acronym title="\\1">\\2</acronym>', 193 '#\[abbr=([\w\s-,\.]+)(?::\w+)?\](.*?)\[/abbr(?::\w+)?\]#si' => '<abbr title="\\1">\\2</abbr>', 194 195 // Quoting with or without specifying the source 196 '#\[quote(?::\w+)?\]#i' => '<div class="bb-quote">'.$quote_text.'<blockquote class="bb-quote-body">', 197 '#\[quote=(?:"|"|\')?(.*?)["\']?(?:"|"|\')?\]#i' => '<div class="bb-quote"><b>'.$quote_user.'</b><blockquote class="bb-quote-body">', 198 '#\[/quote(?::\w+)?\]#si' => '</blockquote></div>', 199 200 // PHP code blocks (syntax highlighted) 201 '#\[php(?::\w+)?\](?:[\r\n])*(.*?)\[/php(?::\w+)?\]#sie' => '_bbcode_php_tag(\'\\1\')', 202 203 // Links to popular sites 204 '#\[google(?::\w+)?\]([\w\s-]+?)\[/google(?::\w+)?\]#si' => '<a href="http://www.google.com/search?q=\\1">\\1</a>', 205 '#\[wikipedia(?::\w+)?\]([\w\s-]+?)\[/wikipedia(?::\w+)?\]#si' => '<a href="http://www.wikipedia.org/wiki/\\1">\\1</a>', 206 '#\[youtube\]([0-9a-zA-Z_\-]+)\[/youtube\]#si' => '<object width="425" height="366"><param name="movie" value="http://www.youtube.com/v/\\1"></param><embed src="http://www.youtube.com/v/\\1" type="application/x-shockwave-flash" width="425" height="366"></embed></object>', 207 208 // Table tags 209 '#\[table\](.+?)\[/table\]#si' => '<table class="bb-table">\\1</table>', 210 '#\[(row|r|tr)\](.+?)\[/(row|r|tr)\]#si' => '<tr>\\2</tr>', 211 '#\[(row|r|tr) color=([\#\w]+)\](.+?)\[/(row|r|tr)\]#si' => '<tr bgcolor=\\2>\\3</tr>', 212 '#\[(header|head|h)\](.+?)\[/(header|head|h)\]#si' => '<th>\\2</th>', 213 '#\[(col|c|td)\](.+?)\[/(col|c|td)\]#si' => '<td valign="top">\\2</td>', 214 215 // Cleanup table output (td, th and tr tags) 216 '#<([\/]?)t([dhr])><br />#si' => '<\\1t\\2>', 217 '#<table(.+?)><br />#si' => '<table\\1>', 218 ); 219 $body = preg_replace(array_keys($preg), array_values($preg), $body); 220 221 // Simple replacements (str_replace is faster than preg_replace) 222 $str = array( 223 // Horizontal delimiter 224 '[hr]' => '<hr class="bb-hr" />', 225 // Force line break 226 '[br]' => '<br class="bb-br" />', 227 // Force space 228 '[sp]' => ' ', 229 ); 230 $body = str_replace(array_keys($str), array_values($str), $body); 231 232 // We cannot evaluate the variable in callback function because 233 // there is no way to pass the $format variable 234 if (variable_get("bbcode_encode_mailto_$format", 1)) { 235 // Replacing email addresses with encoded html 236 $body = preg_replace_callback('#\[email(?::\w+)?\]([\w\.\-\+~@]+)\[/email(?::\w+)?\]#si', '_bbcode_encode_mailto', $body); 237 $body = preg_replace_callback('#\[email=(.*?)(?::\w+)?\](.*?)\[/email(?::\w+)?\]#si', '_bbcode_encode_mailto', $body); 238 } 239 else { 240 $body = preg_replace( 241 array('#\[email(?::\w+)?\](.*?)\[/email(?::\w+)?\]#si','#\[email=(.*?)(?::\w+)?\]([\w\s]+)\[/email(?::\w+)?\]#si'), 242 array('<a href="mailto:\\1" class="bb-email">\\1</a>', '<a href="mailto:\\1" class="bb-email">\\2</a>'), 243 $body); 244 } 245 246 // Turns web and e-mail addresses into clickable links 247 if (variable_get("bbcode_make_links_$format", 1)) { 248 249 // pad with a space so we can match things at the start of the 1st line 250 $ret = ' ' . $body; 251 // padding to already filtered links 252 $ret = preg_replace('#(<a.+>)(.+</a>)#i', "$1\x07$2", $ret); 253 254 // matches an "xxx://yyyy" URL at the start of a line, or after a space. 255 // xxxx can only be alpha characters. 256 // yyyy is anything up to the first space, newline, comma, double quote or < 257 $ret = preg_replace('#(?<=^|[\t\r\n >\(\[\]\|])([a-z]+?://[\w\-]+\.([\w\-]+\.)*\w+(:[0-9]+)?(/[^ "\'\(\n\r\t<\)\[\]\|]*)?)((?<![,\.])|(?!\s))#i', '<a href="\1">\1</a>', $ret); 258 259 // matches a "www|ftp.xxxx.yyyy[/zzzz]" kinda lazy URL thing 260 // Must contain at least 2 dots. xxxx contains either alphanum, or "-" 261 // zzzz is optional.. will contain everything up to the first space, newline, 262 // comma, double quote or <. 263 $ret = preg_replace('#([\t\r\n >\(\[\|])(www|ftp)\.(([\w\-]+\.)*[\w]+(:[0-9]+)?(/[^ \"\'\(\n\r\t<\)\[\]\|]*)?)#i', '\1<a href="http://\2.\3">\2.\3</a>', $ret); 264 265 // matches an email@domain type address at the start of a line, or after a space. 266 // Note: Only the followed chars are valid; alphanums, "-", "_" and or ".". 267 if (variable_get("bbcode_encode_mailto_$format", 1)) 268 $ret = preg_replace_callback("#([\t\r\n ])([a-z0-9\-_.]+?)@([\w\-]+\.([\w\-\.]+\.)*[\w]+)#i", '_bbcode_encode_mailto', $ret); 269 else 270 $ret = preg_replace('#([\t\r\n ])([a-z0-9\-_.]+?)@([\w\-]+\.([\w\-\.]+\.)*[\w]+)#i', '\\1<a href="mailto:\\2@\\3">\\2@\\3</a>', $ret); 271 272 // Remove our padding 273 $ret = str_replace("\x07", '', $ret); 274 $body = substr($ret, 1); 275 } 276 277 if (variable_get("bbcode_filter_nofollow_$format", 0)) { 278 $body = preg_replace('#<a([^>]+)>#i', '<a\\1 rel="nofollow">', $body); 279 } 280 281 return $body; 282} 283 284function _bbcode_generate_heading($level, $text) { 285 $anchor = preg_replace('/([\s]+)/', '_', $text); 286 $anchor = preg_replace('/([\W]+)/', '', $anchor); 287 return '<h'. $level .'><a name="'. $anchor .'">'. $text .'</a></h'. $level .'>'; 288} 289 290function _bbcode_generate_index($body, $tag = 'ol') { 291 $level = 0; 292 $index = '<'. $tag .">\n"; 293 $close_tags = 0; 294 295 if (preg_match_all('#\[h([1-6]).*?\](.*?)\[/h([1-6]).*?\]#si', $body, $head_tags, PREG_SET_ORDER)) { 296 foreach ($head_tags as $head_tag) { 297 if ($level == 0) $level = $head_tag[1]; 298 $anchor = preg_replace('/([\s]+)/', '_', $head_tag[2]); 299 $anchor = preg_replace('/([\W]+)/', '', $anchor); 300 301 if ($head_tag[1] > $level) { 302 $index .= '<'. $tag .">\n"; 303 $index .= '<li><a href="#'. $anchor .'">'. $head_tag[2] ."</a>\n"; 304 $close_tags++; 305 $level = $head_tag[1]; 306 } else if ($head_tag[1] < $level) { 307 while ($close_tags > 0) { 308 $index .= '</'. $tag .">\n"; 309 $close_tags--; 310 } 311 $index .= '<li><a href="#'. $anchor .'">'. $head_tag[2] ."</a>\n"; 312 $level = $head_tag[1]; 313 } else { 314 $index .= '<li><a href="#'. $anchor .'">'. $head_tag[2] ."</a>\n"; 315 $level = $head_tag[1]; 316 } 317 } 318 } 319 while ($close_tags >= 0) { 320 $index .= '</'. $tag .">\n"; 321 $close_tags--; 322 } 323 return $index; 324} 325 326function _bbcode_encode_mailto($matches) { 327 if (isset($matches[3])) 328 $link = 'document.write(\'<a href="mailto:' . $matches[2].'@'.$matches[3] . '">' . $matches[2].'@'.$matches[3] . '</a>\');'; 329 else 330 $link = 'document.write(\'<a href="mailto:' . $matches[1] . '" class="bb-email">' . (isset($matches[2]) ? $matches[2] : $matches[1]) . '</a>\');'; 331 332 $js_encode = ''; 333 for ($x = 0; $x < strlen($link); $x++) 334 $js_encode .= '%' . bin2hex($link{$x}); 335 336 $link = '<script type="text/javascript">eval(unescape(\''.$js_encode.'\'))</script>'; 337 if (isset($matches[3])) 338 $link = $matches[1] . $link; 339 340 return $link; 341} 342 343function _bbcode_notag_tag($text = NULL) { 344 return str_replace( array('[', ']', '@'), array('[', ']', '@'), stripslashes($text)); 345} 346 347function _bbcode_php_tag($text = NULL) { 348 return '<pre>'. highlight_string( str_replace('<br />', '', stripslashes($text)), true) .'</pre>'; 349} 350 351function _bbcode_round_size_val($size) { 352 if ($size < 6) 353 return 6; 354 elseif ($size > 48) 355 return 48; 356 else 357 return $size; 358} 359 360function _bbcode_replace_nest_tag($arr = NULL) { 361 $text = preg_replace('#(\[[/]*)'. $arr['tag'] .'(.*?\])#si', "$1\x07$2", $arr['text']); 362 // It will be better to use &count and do-while, if php 5 or higher. 363 while (preg_match($arr['pattern'], $text)) { 364 $text = preg_replace($arr['pattern'], $arr['replacement'], $text); 365 } 366 return $text; 367} 368 369