1<?php
2// $Id: bbcode-filter.inc,v 1.66 2008/11/30 08:50:08 naudefj Exp $
3
4function _bbcode_filter_process(&$body, $format = -1) {
5
6  $quote_text = t('Quote:');
7  $quote_user = t('\\1 wrote:');
8
9  // Encode all script tags to prevent XSS html injection attacks
10  $body = preg_replace(array('#<script([^>]*)>#i', '#</script([^>]*)>#i'), array('&lt;script\\1&gt;', '&lt;/script\\1&gt;'), $body);
11
12  // Find all [code] tags and check if they contain a newline. If we find a newline,
13  // that [code] should be rendered as a block, otherwise it will still be inline
14  $mode = variable_get("bbcode_paragraph_breaks_$format", 2);
15  $pre = array(); $i = 0;
16  if (preg_match_all('#\[code(?::\w+)?\](.*?)\[/code(?::\w+)?\]#si', $body, $code_tags, PREG_SET_ORDER)) {
17    foreach ($code_tags as $code_tag) {
18      $code_tag[1] = str_replace(array('<', '>'), array('&lt;', '&gt;'), $code_tag[1]);
19      if (strpos($code_tag[1], "\n") === FALSE)
20        $body = str_replace($code_tag[0], '<code class="bb-code">'. $code_tag[1] .'</code>', $body);
21      elseif ($mode) {
22        // Strip preformatted code blocks from text during line break processing, replaced below
23        $body = str_replace($code_tag[0], "***pRe_sTrInG$i***", $body);
24        $pre[$i++] = '<pre class="bb-code-block">'. $code_tag[1] .'</pre>';
25      }
26      else
27        $body = str_replace($code_tag[0], '<pre class="bb-code-block">'. $code_tag[1] .'</pre>', $body);
28    }
29  }
30
31  // Apply line and paragraph breaks (skipping preformatted code)
32  if ($mode) {
33
34    if ($mode == 1) 	// Line breaks only (starting with PHP 4.0.5, nl2br() is XHTML compliant)
35      $body = nl2br($body);
36
37    if ($mode == 2) {	// Line and paragraph breaks (may not always be XHTML compliant)
38      $body  = preg_replace("/(\r\n|\n|\r)/", "\n", $body);
39      $body  = preg_replace("/\n\n+/", "\n\n", $body);
40      $parts = explode("\n\n", $body);
41      for ($i=0; $i<sizeof($parts); $i++) {
42         // No linebreaks if paragraph starts with an HTML tag
43         if ( !preg_match('/^<.*>/', $parts[$i]) )
44           $parts[$i] = nl2br($parts[$i]);
45
46         // Some tags should not be in paragraph blocks
47         if ( !preg_match('/^(?:<|\[)(?:table|list|ol|ul|pre|select|form|blockquote|hr)/i', $parts[$i]) )
48           $parts[$i] = '<p>'. $parts[$i] .'</p>';
49      }
50      $body = implode("\n\n", $parts);
51    }
52
53    // Reinsert preformatted code blocks
54    foreach ($pre as $i => $code_tag)
55       $body = str_replace("***pRe_sTrInG$i***", $code_tag, $body);
56  }
57
58  // Replace any improper quote tags with proper quote tags
59  $body = str_replace('[quote/]', '[/quote]', $body);
60
61  // Add closing tags to prevent users from disruping your site's HTML
62  // (required for nestable tags only: [list] and [quote])
63  preg_match_all('/\[quote/i', $body, $matches);
64  $opentags = count($matches['0']);
65  preg_match_all('/\[\/quote\]/i', $body, $matches);
66  $unclosed = $opentags - count($matches['0']);
67  for ($i = 0; $i < $unclosed; $i++) {
68    $body .= '[/quote]';
69  }
70  // Also add opening tags, if needed
71  for ($i = $unclosed; $i < 0; $i++) {
72    $body = '[quote]' . $body;
73  }
74  preg_match_all('/\[list/i', $body, $matches);
75  $opentags = count($matches['0']);
76  preg_match_all('/\[\/list\]/i', $body, $matches);
77  $unclosed = $opentags - count($matches['0']);
78  for ($i = 0; $i < $unclosed; $i++) {
79    $body .= '[/list]';
80  }
81  for ($i = $unclosed; $i < 0; $i++) {
82    $body = '[list]' . $body;
83  }
84
85  // begin processing for [size]
86  if (stristr($body, '[size=') !== FALSE) { // prevent useless processing
87    $arr = array(
88      'tag'         => 'size',
89      'pattern'     => '#\[\x07=([\d]+)(?::\w+)?\]([^\x07]*)\[/\x07(?::\w+)?\]#esi',
90      'replacement' => '"<span style=\"font-size:". _bbcode_round_size_val(\'$1\') ."px\">". str_replace(\'\"\', \'"\', \'$2\') ."</span>"',
91      'text'        => $body);
92    $body = _bbcode_replace_nest_tag($arr);
93  } // end processing for [size]
94
95  // begin processing for [color]
96  if (stristr($body, '[color=') !== FALSE) { // prevent useless processing
97    $arr = array(
98      'tag'         => 'color',
99      'pattern'     => '#\[\x07=([\#\w]+)(?::\w+)?\]([^\x07]*)\[/\x07(?::\w+)?\]#si',
100      'replacement' => '<span style="color:$1">$2</span>',
101      'text'        => $body);
102    $body = _bbcode_replace_nest_tag($arr);
103  } // end processing for [color]
104
105  // begin processing for [font]
106  if (stristr($body, '[font=') !== FALSE) { // prevent useless processing
107    $arr = array(
108      'tag'         => 'font',
109      'pattern'     => '#\[\x07=([\w\s]+)(?::\w+)?\]([^\x07]*)\[/\x07(?::\w+)?\]#si',
110      'replacement' => '<span style="font-family:$1">$2</span>',
111      'text'        => $body);
112    $body = _bbcode_replace_nest_tag($arr);
113  } // end processing for [font]
114
115  // begin processing for [list] and [*]
116  if (stristr($body, '[list') !== FALSE) { // prevent useless processing
117    $l_type = array(
118      NULL   => array('style' => 'circle', 'tag' => 'ul'),
119      'c'    => array('style' => 'circle', 'tag' => 'ul'),
120      'd'    => array('style' => 'disc', 'tag' => 'ul'),
121      's'    => array('style' => 'square', 'tag' => 'ul'),
122      '1'    => array('style' => 'decimal', 'tag' => 'ol'),
123      'a'    => array('style' => 'lower-alpha', 'tag' => 'ol'),
124      'A'    => array('style' => 'upper-alpha', 'tag' => 'ol'),
125      'i'    => array('style' => 'lower-roman', 'tag' => 'ol'),
126      'I'    => array('style' => 'upper-roman', 'tag' => 'ol')
127      );
128    $body = preg_replace('#(\[[/]*)list(.*?\])#si', "$1\x07$2", $body);
129
130    // replace to <li> tags - [*]..[*]|[*]..[/list]
131    $body = preg_replace('#\[\*(?::\w+)?\]([^\x07]*?)(?=\s*?(\[\*(?::\w+)?\]|\[/\x07(?::\w+)?\]))#si', '<li>$1</li>', $body);
132    // add </li> tags to nested <li> - [/list]..[/list]
133    $body = preg_replace('#(\[/\x07(?::\w+)?\])(?=[^\x07]*?\[/\x07(?::\w+)?\])#si', '$1</li>', $body);
134    // add </li> tags to nested <li> - [/list]..[*]..[list]
135    $body = preg_replace('#(\[/\x07(?::\w+)?\])(?=[^\x07]*?\[\*(?::\w+)?\][^\x07]*?\[\x07.*(?::\w+)?\])#si', '$1</li>', $body);
136    // replace to <li> tags for nested <li> - [*]..[list]
137    $body = preg_replace('#\[\*(?::\w+)?\]([^\x07]*)?(?=\[\x07.*(?::\w+)?\])#si', '<li>$1', $body);
138
139    // replace to <ol>/<ul> and </ol>/</ul> tags
140    // It will be better to use &count and do-while, if php 5 or higher.
141    while (preg_match("#\[\x07[=]*((?-i)[cds1aAiI])*(?::\w+)?\]([^\x07]*)\[/\x07(?::\w+)?\]#si", $body)) {
142      $body = preg_replace("#\[\x07[=]*((?-i)[cds1aAiI])*(?::\w+)?\]([^\x07]*)\[/\x07(?::\w+)?\]#esi", '"<". $l_type[\'$1\']["tag"] ." class=\"bb-list\" style=\"list-style-type:". $l_type[\'$1\']["style"] .";\">". str_replace(\'\"\', \'"\', \'$2\') ."</". $l_type[\'$1\']["tag"] .">"', $body);
143    }
144
145    // remove <br /> tags
146    $body = preg_replace('#(<[/]*([uo]l|li).*>.*)<br />#i', '$1', $body);
147  } // end processing for [list] and [*]
148
149  // Define BBCode tags
150  $preg = array(
151    // Implement [notag]
152    '#\[notag(?::\w+)?\](.*?)\[/notag(?::\w+)?\]#sie'        => '_bbcode_notag_tag(\'\\1\')',
153
154    // Headings and indexes - articles will almost always need them
155    '#\[h([1-6])(?::\w+)?\](.*?)\[/h[1-6](?::\w+)?\]#sie'    => '_bbcode_generate_heading(\\1, \'\\2\')',
156    '#\[index\s*/?\]#sie'                                    => '_bbcode_generate_index($body)',
157    '#\[index style=(ol|ul)\]#sie'                           => '_bbcode_generate_index($body, \'\\1\')',
158
159    // Font, text and alignment
160    '#\[align=(\w+)(?::\w+)?\](.*?)\[/align(?::\w+)?\]#si'   => '<span style="text-align:\\1">\\2</span>',
161    '#\[float=(left|right)(?::\w+)?\](.*?)\[/float(?::\w+)?\]#si' => '<span style="float:\\1">\\2</span>',
162    '#\[justify(?::\w+)?\](.*?)\[/justify(?::\w+)?\]#si'     => '<div style="text-align:justify;">\\1</div>',
163    '#\[(b|strong)(?::\w+)?\](.*?)\[/(b|strong)(?::\w+)?\]#si' => '<span style="font-weight:bold">\\2</span>',
164    '#\[(i|em)(?::\w+)?\](.*?)\[/(i|em)(?::\w+)?\]#si'       => '<span style="font-style:italic">\\2</span>',
165    '#\[u(?::\w+)?\](.*?)\[/u(?::\w+)?\]#si'                 => '<span style="text-decoration:underline">\\1</span>',
166    '#\[s(?::\w+)?\](.*?)\[/s(?::\w+)?\]#si'                 => '<s>\\1</s>',
167    '#\[sup(?::\w+)?\](.*?)\[/sup(?::\w+)?\]#si'             => '<sup>\\1</sup>',
168    '#\[sub(?::\w+)?\](.*?)\[/sub(?::\w+)?\]#si'             => '<sub>\\1</sub>',
169    '#\[center(?::\w+)?\](.*?)\[/center(?::\w+)?\]#si'       => '<div style="text-align:center">\\1</div>',
170    '#\[left(?::\w+)?\](.*?)\[/left(?::\w+)?\]#si'           => '<div style="text-align:left">\\1</div>',
171    '#\[right(?::\w+)?\](.*?)\[/right(?::\w+)?\]#si'         => '<div style="text-align:right">\\1</div>',
172
173    // Links without a protocol, with a protocol, and with good looking text
174    '#\[url(?::\w+)?\]www\.([\w:;&,%+~!=@\/\.\-\#\?]+?)\[/url(?::\w+)?\]#si' => '<a href="http://www.\\1" class="bb-url">\\1</a>',
175    '#\[url(?::\w+)?\]([\w:;&,%+~!=@\/\.\-\#\?]+?)\[/url(?::\w+)?\]#si'   => '<a href="\\1" class="bb-url">\\1</a>',
176    '#\[url=www\.([\w:;&,%+~!=@\/\.\-\#\?]+?)\](.*?)\[/url(?::\w+)?\]#si' => '<a href="http://www.\\1" class="bb-url">\\2</a>',
177    '#\[url=([\w:;&,%+~!=@\/\.\-\#\?]+?)\](.*?)\[/url(?::\w+)?\]#si'      => '<a href="\\1" class="bb-url">\\2</a>',
178
179    // Anchor tags for linking within documents
180    '#\[anchor=(\w+)(?::\w+)?\](.*?)\[/anchor(?::\w+)?\]#si' => '<a name="\\1">\\2</a>',
181
182    // Images without or with client-side sizing
183    '#\[img(?::\w+)?\]([\w:;&,~%+!=@\/\.\-\#\?]+)\[/img(?::\w+)?\]#si' => '<img src="\\1" alt="" class="bb-image" />',
184    '#\[img=(\d+)x(\d+)(?::\w+)?\]([\w:;&,~%+!=@\/\.\-\#\?]+)\[/img(?::\w+)?\]#si' => '<img width="\\1" height="\\2" alt="" src="\\3" class="bb-image" />',
185    '#\[img=([\w\s:;,\.\-\'\(\)]+)(?::\w+)?\]([\w:;&,~%+!=@\/\.\-\#\?]+)\[/img(?::\w+)?\]#si' => '<img alt="\\1" src="\\2" class="bb-image" />',
186    '#\[img align=(left|right|center)(?::\w+)?\]([\w:;&,~%+!=@\/\.\-\#\?]+)\[/img(?::\w+)?\]#si' => '<img src="\\2" alt="" align="\\1" class="bb-image" />',
187
188    // Flash animations and other special effects
189    '#\[flash=(\d+)x(\d+)(?::\w+)?\]([\w:;&,~%+!=@\/\.\-\#\?]+)\[/flash(?::\w+)?\]#si' => '<object type="application/x-shockwave-flash" data="\\3" width="\\1" height="\\2"><param name="movie" value="\\3" /></object>',
190
191    // Acronyms & abbreviations - show description when mouse moves over tag
192    '#\[acronym=([\w\s-,\.]+)(?::\w+)?\](.*?)\[/acronym(?::\w+)?\]#si' => '<acronym title="\\1">\\2</acronym>',
193    '#\[abbr=([\w\s-,\.]+)(?::\w+)?\](.*?)\[/abbr(?::\w+)?\]#si'       => '<abbr title="\\1">\\2</abbr>',
194
195    // Quoting with or without specifying the source
196    '#\[quote(?::\w+)?\]#i'                                    => '<div class="bb-quote">'.$quote_text.'<blockquote class="bb-quote-body">',
197    '#\[quote=(?:&quot;|"|\')?(.*?)["\']?(?:&quot;|"|\')?\]#i' => '<div class="bb-quote"><b>'.$quote_user.'</b><blockquote class="bb-quote-body">',
198    '#\[/quote(?::\w+)?\]#si'                                  => '</blockquote></div>',
199
200    // PHP code blocks (syntax highlighted)
201    '#\[php(?::\w+)?\](?:[\r\n])*(.*?)\[/php(?::\w+)?\]#sie' => '_bbcode_php_tag(\'\\1\')',
202
203    // Links to popular sites
204    '#\[google(?::\w+)?\]([\w\s-]+?)\[/google(?::\w+)?\]#si'       => '<a href="http://www.google.com/search?q=\\1">\\1</a>',
205    '#\[wikipedia(?::\w+)?\]([\w\s-]+?)\[/wikipedia(?::\w+)?\]#si' => '<a href="http://www.wikipedia.org/wiki/\\1">\\1</a>',
206    '#\[youtube\]([0-9a-zA-Z_\-]+)\[/youtube\]#si'                 => '<object width="425" height="366"><param name="movie" value="http://www.youtube.com/v/\\1"></param><embed src="http://www.youtube.com/v/\\1" type="application/x-shockwave-flash" width="425" height="366"></embed></object>',
207
208    // Table tags
209    '#\[table\](.+?)\[/table\]#si'                           => '<table class="bb-table">\\1</table>',
210    '#\[(row|r|tr)\](.+?)\[/(row|r|tr)\]#si'                 => '<tr>\\2</tr>',
211    '#\[(row|r|tr) color=([\#\w]+)\](.+?)\[/(row|r|tr)\]#si' => '<tr bgcolor=\\2>\\3</tr>',
212    '#\[(header|head|h)\](.+?)\[/(header|head|h)\]#si'       => '<th>\\2</th>',
213    '#\[(col|c|td)\](.+?)\[/(col|c|td)\]#si'                 => '<td valign="top">\\2</td>',
214
215    // Cleanup table output (td, th and tr tags)
216    '#<([\/]?)t([dhr])><br />#si'                           => '<\\1t\\2>',
217    '#<table(.+?)><br />#si'                                => '<table\\1>',
218  );
219  $body = preg_replace(array_keys($preg), array_values($preg), $body);
220
221  // Simple replacements (str_replace is faster than preg_replace)
222  $str = array(
223    // Horizontal delimiter
224    '[hr]'   => '<hr class="bb-hr" />',
225    // Force line break
226    '[br]'   => '<br class="bb-br" />',
227    // Force space
228    '[sp]'   => '&nbsp;',
229  );
230  $body = str_replace(array_keys($str), array_values($str), $body);
231
232  // We cannot evaluate the variable in callback function because
233  // there is no way to pass the $format variable
234  if (variable_get("bbcode_encode_mailto_$format", 1)) {
235    // Replacing email addresses with encoded html
236    $body = preg_replace_callback('#\[email(?::\w+)?\]([\w\.\-\+~@]+)\[/email(?::\w+)?\]#si', '_bbcode_encode_mailto', $body);
237    $body = preg_replace_callback('#\[email=(.*?)(?::\w+)?\](.*?)\[/email(?::\w+)?\]#si', '_bbcode_encode_mailto', $body);
238  }
239  else {
240    $body = preg_replace(
241      array('#\[email(?::\w+)?\](.*?)\[/email(?::\w+)?\]#si','#\[email=(.*?)(?::\w+)?\]([\w\s]+)\[/email(?::\w+)?\]#si'),
242      array('<a href="mailto:\\1" class="bb-email">\\1</a>', '<a href="mailto:\\1" class="bb-email">\\2</a>'),
243      $body);
244  }
245
246  // Turns web and e-mail addresses into clickable links
247  if (variable_get("bbcode_make_links_$format", 1)) {
248
249    // pad with a space so we can match things at the start of the 1st line
250    $ret = ' ' . $body;
251    // padding to already filtered links
252    $ret = preg_replace('#(<a.+>)(.+</a>)#i', "$1\x07$2", $ret);
253
254    // matches an "xxx://yyyy" URL at the start of a line, or after a space.
255    // xxxx can only be alpha characters.
256    // yyyy is anything up to the first space, newline, comma, double quote or <
257    $ret = preg_replace('#(?<=^|[\t\r\n >\(\[\]\|])([a-z]+?://[\w\-]+\.([\w\-]+\.)*\w+(:[0-9]+)?(/[^ "\'\(\n\r\t<\)\[\]\|]*)?)((?<![,\.])|(?!\s))#i', '<a href="\1">\1</a>', $ret);
258
259    // matches a "www|ftp.xxxx.yyyy[/zzzz]" kinda lazy URL thing
260    // Must contain at least 2 dots. xxxx contains either alphanum, or "-"
261    // zzzz is optional.. will contain everything up to the first space, newline,
262    // comma, double quote or <.
263     $ret = preg_replace('#([\t\r\n >\(\[\|])(www|ftp)\.(([\w\-]+\.)*[\w]+(:[0-9]+)?(/[^ \"\'\(\n\r\t<\)\[\]\|]*)?)#i', '\1<a href="http://\2.\3">\2.\3</a>', $ret);
264
265    // matches an email@domain type address at the start of a line, or after a space.
266    // Note: Only the followed chars are valid; alphanums, "-", "_" and or ".".
267    if (variable_get("bbcode_encode_mailto_$format", 1))
268      $ret = preg_replace_callback("#([\t\r\n ])([a-z0-9\-_.]+?)@([\w\-]+\.([\w\-\.]+\.)*[\w]+)#i", '_bbcode_encode_mailto', $ret);
269    else
270      $ret = preg_replace('#([\t\r\n ])([a-z0-9\-_.]+?)@([\w\-]+\.([\w\-\.]+\.)*[\w]+)#i', '\\1<a href="mailto:\\2@\\3">\\2@\\3</a>', $ret);
271
272    // Remove our padding
273    $ret = str_replace("\x07", '', $ret);
274    $body = substr($ret, 1);
275  }
276
277  if (variable_get("bbcode_filter_nofollow_$format", 0)) {
278    $body = preg_replace('#<a([^>]+)>#i', '<a\\1 rel="nofollow">', $body);
279  }
280
281  return $body;
282}
283
284function _bbcode_generate_heading($level, $text) {
285  $anchor = preg_replace('/([\s]+)/', '_', $text);
286  $anchor = preg_replace('/([\W]+)/', '',  $anchor);
287  return '<h'. $level .'><a name="'. $anchor .'">'. $text .'</a></h'. $level .'>';
288}
289
290function _bbcode_generate_index($body, $tag = 'ol') {
291  $level = 0;
292  $index = '<'. $tag .">\n";
293  $close_tags = 0;
294
295  if (preg_match_all('#\[h([1-6]).*?\](.*?)\[/h([1-6]).*?\]#si', $body, $head_tags, PREG_SET_ORDER)) {
296    foreach ($head_tags as $head_tag) {
297      if ($level == 0) $level = $head_tag[1];
298      $anchor = preg_replace('/([\s]+)/', '_', $head_tag[2]);
299      $anchor = preg_replace('/([\W]+)/', '',  $anchor);
300
301      if ($head_tag[1] > $level) {
302        $index .= '<'. $tag .">\n";
303        $index .= '<li><a href="#'. $anchor .'">'. $head_tag[2] ."</a>\n";
304        $close_tags++;
305        $level = $head_tag[1];
306      } else if ($head_tag[1] < $level) {
307        while ($close_tags > 0) {
308           $index .= '</'. $tag .">\n";
309           $close_tags--;
310        }
311        $index .= '<li><a href="#'. $anchor .'">'. $head_tag[2] ."</a>\n";
312        $level = $head_tag[1];
313      } else {
314        $index .= '<li><a href="#'. $anchor .'">'. $head_tag[2] ."</a>\n";
315        $level = $head_tag[1];
316      }
317    }
318  }
319  while ($close_tags >= 0) {
320    $index .= '</'. $tag .">\n";
321    $close_tags--;
322  }
323  return $index;
324}
325
326function _bbcode_encode_mailto($matches) {
327  if (isset($matches[3]))
328    $link = 'document.write(\'<a href="mailto:' . $matches[2].'@'.$matches[3] . '">' . $matches[2].'@'.$matches[3] . '</a>\');';
329  else
330    $link = 'document.write(\'<a href="mailto:' . $matches[1] . '" class="bb-email">' . (isset($matches[2]) ? $matches[2] : $matches[1]) . '</a>\');';
331
332  $js_encode = '';
333  for ($x = 0; $x < strlen($link); $x++)
334    $js_encode .= '%' . bin2hex($link{$x});
335
336  $link = '<script type="text/javascript">eval(unescape(\''.$js_encode.'\'))</script>';
337  if (isset($matches[3]))
338    $link = $matches[1] . $link;
339
340  return $link;
341}
342
343function _bbcode_notag_tag($text = NULL) {
344  return str_replace( array('[', ']', '@'), array('&#91;', '&#93;', '&#64;'), stripslashes($text));
345}
346
347function _bbcode_php_tag($text = NULL) {
348  return '<pre>'. highlight_string( str_replace('<br />', '', stripslashes($text)), true) .'</pre>';
349}
350
351function _bbcode_round_size_val($size) {
352  if ($size < 6)
353      return 6;
354  elseif ($size > 48)
355      return 48;
356  else
357     return $size;
358}
359
360function _bbcode_replace_nest_tag($arr = NULL) {
361  $text = preg_replace('#(\[[/]*)'. $arr['tag'] .'(.*?\])#si', "$1\x07$2", $arr['text']);
362  // It will be better to use &count and do-while, if php 5 or higher.
363  while (preg_match($arr['pattern'], $text)) {
364    $text = preg_replace($arr['pattern'], $arr['replacement'], $text);
365  }
366  return $text;
367}
368
369