1<?php
2/**
3 * GeSHi - Generic Syntax Highlighter
4 *
5 * The GeSHi class for Generic Syntax Highlighting. Please refer to the
6 * documentation at http://qbnz.com/highlighter/documentation.php for more
7 * information about how to use this class.
8 *
9 * For changes, release notes, TODOs etc, see the relevant files in the docs/
10 * directory.
11 *
12 *   This file is part of GeSHi.
13 *
14 *  GeSHi is free software; you can redistribute it and/or modify
15 *  it under the terms of the GNU General Public License as published by
16 *  the Free Software Foundation; either version 2 of the License, or
17 *  (at your option) any later version.
18 *
19 *  GeSHi is distributed in the hope that it will be useful,
20 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
21 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22 *  GNU General Public License for more details.
23 *
24 *  You should have received a copy of the GNU General Public License
25 *  along with GeSHi; if not, write to the Free Software
26 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
27 *
28 * @package    geshi
29 * @subpackage core
30 * @author     Nigel McNie <nigel@geshi.org>, Benny Baumann <BenBE@omorphia.de>
31 * @copyright  (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2008 Benny Baumann
32 * @license    http://gnu.org/copyleft/gpl.html GNU GPL
33 *
34 */
35
36//
37// GeSHi Constants
38// You should use these constant names in your programs instead of
39// their values - you never know when a value may change in a future
40// version
41//
42
43/** The version of this GeSHi file */
44define('GESHI_VERSION', '1.0.8.12');
45
46// Define the root directory for the GeSHi code tree
47if (!defined('GESHI_ROOT')) {
48    /** The root directory for GeSHi */
49    define('GESHI_ROOT', dirname(__FILE__) . DIRECTORY_SEPARATOR);
50}
51/** The language file directory for GeSHi
52    @access private */
53define('GESHI_LANG_ROOT', GESHI_ROOT . 'geshi' . DIRECTORY_SEPARATOR);
54
55// Define if GeSHi should be paranoid about security
56if (!defined('GESHI_SECURITY_PARANOID')) {
57    /** Tells GeSHi to be paranoid about security settings */
58    define('GESHI_SECURITY_PARANOID', false);
59}
60
61// Line numbers - use with enable_line_numbers()
62/** Use no line numbers when building the result */
63define('GESHI_NO_LINE_NUMBERS', 0);
64/** Use normal line numbers when building the result */
65define('GESHI_NORMAL_LINE_NUMBERS', 1);
66/** Use fancy line numbers when building the result */
67define('GESHI_FANCY_LINE_NUMBERS', 2);
68
69// Container HTML type
70/** Use nothing to surround the source */
71define('GESHI_HEADER_NONE', 0);
72/** Use a "div" to surround the source */
73define('GESHI_HEADER_DIV', 1);
74/** Use a "pre" to surround the source */
75define('GESHI_HEADER_PRE', 2);
76/** Use a pre to wrap lines when line numbers are enabled or to wrap the whole code. */
77define('GESHI_HEADER_PRE_VALID', 3);
78/**
79 * Use a "table" to surround the source:
80 *
81 *  <table>
82 *    <thead><tr><td colspan="2">$header</td></tr></thead>
83 *    <tbody><tr><td><pre>$linenumbers</pre></td><td><pre>$code></pre></td></tr></tbody>
84 *    <tfooter><tr><td colspan="2">$footer</td></tr></tfoot>
85 *  </table>
86 *
87 * this is essentially only a workaround for Firefox, see sf#1651996 or take a look at
88 * https://bugzilla.mozilla.org/show_bug.cgi?id=365805
89 * @note when linenumbers are disabled this is essentially the same as GESHI_HEADER_PRE
90 */
91define('GESHI_HEADER_PRE_TABLE', 4);
92
93// Capatalisation constants
94/** Lowercase keywords found */
95define('GESHI_CAPS_NO_CHANGE', 0);
96/** Uppercase keywords found */
97define('GESHI_CAPS_UPPER', 1);
98/** Leave keywords found as the case that they are */
99define('GESHI_CAPS_LOWER', 2);
100
101// Link style constants
102/** Links in the source in the :link state */
103define('GESHI_LINK', 0);
104/** Links in the source in the :hover state */
105define('GESHI_HOVER', 1);
106/** Links in the source in the :active state */
107define('GESHI_ACTIVE', 2);
108/** Links in the source in the :visited state */
109define('GESHI_VISITED', 3);
110
111// Important string starter/finisher
112// Note that if you change these, they should be as-is: i.e., don't
113// write them as if they had been run through htmlentities()
114/** The starter for important parts of the source */
115define('GESHI_START_IMPORTANT', '<BEGIN GeSHi>');
116/** The ender for important parts of the source */
117define('GESHI_END_IMPORTANT', '<END GeSHi>');
118
119/**#@+
120 *  @access private
121 */
122// When strict mode applies for a language
123/** Strict mode never applies (this is the most common) */
124define('GESHI_NEVER', 0);
125/** Strict mode *might* apply, and can be enabled or
126    disabled by {@link GeSHi->enable_strict_mode()} */
127define('GESHI_MAYBE', 1);
128/** Strict mode always applies */
129define('GESHI_ALWAYS', 2);
130
131// Advanced regexp handling constants, used in language files
132/** The key of the regex array defining what to search for */
133define('GESHI_SEARCH', 0);
134/** The key of the regex array defining what bracket group in a
135    matched search to use as a replacement */
136define('GESHI_REPLACE', 1);
137/** The key of the regex array defining any modifiers to the regular expression */
138define('GESHI_MODIFIERS', 2);
139/** The key of the regex array defining what bracket group in a
140    matched search to put before the replacement */
141define('GESHI_BEFORE', 3);
142/** The key of the regex array defining what bracket group in a
143    matched search to put after the replacement */
144define('GESHI_AFTER', 4);
145/** The key of the regex array defining a custom keyword to use
146    for this regexp's html tag class */
147define('GESHI_CLASS', 5);
148
149/** Used in language files to mark comments */
150define('GESHI_COMMENTS', 0);
151
152/** Used to work around missing PHP features **/
153define('GESHI_PHP_PRE_433', !(version_compare(PHP_VERSION, '4.3.3') === 1));
154
155/** make sure we can call stripos **/
156if (!function_exists('stripos')) {
157    // the offset param of preg_match is not supported below PHP 4.3.3
158    if (GESHI_PHP_PRE_433) {
159        /**
160         * @ignore
161         */
162        function stripos($haystack, $needle, $offset = null) {
163            if (!is_null($offset)) {
164                $haystack = substr($haystack, $offset);
165            }
166            if (preg_match('/'. preg_quote($needle, '/') . '/', $haystack, $match, PREG_OFFSET_CAPTURE)) {
167                return $match[0][1];
168            }
169            return false;
170        }
171    }
172    else {
173        /**
174         * @ignore
175         */
176        function stripos($haystack, $needle, $offset = null) {
177            if (preg_match('/'. preg_quote($needle, '/') . '/', $haystack, $match, PREG_OFFSET_CAPTURE, $offset)) {
178                return $match[0][1];
179            }
180            return false;
181        }
182    }
183}
184
185/** some old PHP / PCRE subpatterns only support up to xxx subpatterns in
186    regular expressions. Set this to false if your PCRE lib is up to date
187    @see GeSHi->optimize_regexp_list()
188    **/
189define('GESHI_MAX_PCRE_SUBPATTERNS', 500);
190/** it's also important not to generate too long regular expressions
191    be generous here... but keep in mind, that when reaching this limit we
192    still have to close open patterns. 12k should do just fine on a 16k limit.
193    @see GeSHi->optimize_regexp_list()
194    **/
195define('GESHI_MAX_PCRE_LENGTH', 12288);
196
197//Number format specification
198/** Basic number format for integers */
199define('GESHI_NUMBER_INT_BASIC', 1);        //Default integers \d+
200/** Enhanced number format for integers like seen in C */
201define('GESHI_NUMBER_INT_CSTYLE', 2);       //Default C-Style \d+[lL]?
202/** Number format to highlight binary numbers with a suffix "b" */
203define('GESHI_NUMBER_BIN_SUFFIX', 16);           //[01]+[bB]
204/** Number format to highlight binary numbers with a prefix % */
205define('GESHI_NUMBER_BIN_PREFIX_PERCENT', 32);   //%[01]+
206/** Number format to highlight binary numbers with a prefix 0b (C) */
207define('GESHI_NUMBER_BIN_PREFIX_0B', 64);        //0b[01]+
208/** Number format to highlight octal numbers with a leading zero */
209define('GESHI_NUMBER_OCT_PREFIX', 256);           //0[0-7]+
210/** Number format to highlight octal numbers with a prefix 0o (logtalk) */
211define('GESHI_NUMBER_OCT_PREFIX_0O', 512);           //0[0-7]+
212/** Number format to highlight octal numbers with a leading @ (Used in HiSofts Devpac series). */
213define('GESHI_NUMBER_OCT_PREFIX_AT', 1024);           //@[0-7]+
214/** Number format to highlight octal numbers with a suffix of o */
215define('GESHI_NUMBER_OCT_SUFFIX', 2048);           //[0-7]+[oO]
216/** Number format to highlight hex numbers with a prefix 0x */
217define('GESHI_NUMBER_HEX_PREFIX', 4096);           //0x[0-9a-fA-F]+
218/** Number format to highlight hex numbers with a prefix $ */
219define('GESHI_NUMBER_HEX_PREFIX_DOLLAR', 8192);           //$[0-9a-fA-F]+
220/** Number format to highlight hex numbers with a suffix of h */
221define('GESHI_NUMBER_HEX_SUFFIX', 16384);           //[0-9][0-9a-fA-F]*h
222/** Number format to highlight floating-point numbers without support for scientific notation */
223define('GESHI_NUMBER_FLT_NONSCI', 65536);          //\d+\.\d+
224/** Number format to highlight floating-point numbers without support for scientific notation */
225define('GESHI_NUMBER_FLT_NONSCI_F', 131072);       //\d+(\.\d+)?f
226/** Number format to highlight floating-point numbers with support for scientific notation (E) and optional leading zero */
227define('GESHI_NUMBER_FLT_SCI_SHORT', 262144);      //\.\d+e\d+
228/** Number format to highlight floating-point numbers with support for scientific notation (E) and required leading digit */
229define('GESHI_NUMBER_FLT_SCI_ZERO', 524288);       //\d+(\.\d+)?e\d+
230//Custom formats are passed by RX array
231
232// Error detection - use these to analyse faults
233/** No sourcecode to highlight was specified
234 * @deprecated
235 */
236define('GESHI_ERROR_NO_INPUT', 1);
237/** The language specified does not exist */
238define('GESHI_ERROR_NO_SUCH_LANG', 2);
239/** GeSHi could not open a file for reading (generally a language file) */
240define('GESHI_ERROR_FILE_NOT_READABLE', 3);
241/** The header type passed to {@link GeSHi->set_header_type()} was invalid */
242define('GESHI_ERROR_INVALID_HEADER_TYPE', 4);
243/** The line number type passed to {@link GeSHi->enable_line_numbers()} was invalid */
244define('GESHI_ERROR_INVALID_LINE_NUMBER_TYPE', 5);
245/**#@-*/
246
247
248/**
249 * The GeSHi Class.
250 *
251 * Please refer to the documentation for GeSHi 1.0.X that is available
252 * at http://qbnz.com/highlighter/documentation.php for more information
253 * about how to use this class.
254 *
255 * @package   geshi
256 * @author    Nigel McNie <nigel@geshi.org>, Benny Baumann <BenBE@omorphia.de>
257 * @copyright (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2008 Benny Baumann
258 */
259class GeSHi {
260    /**#@+
261     * @access private
262     */
263    /**
264     * The source code to highlight
265     * @var string
266     */
267    var $source = '';
268
269    /**
270     * The language to use when highlighting
271     * @var string
272     */
273    var $language = '';
274
275    /**
276     * The data for the language used
277     * @var array
278     */
279    var $language_data = array();
280
281    /**
282     * The path to the language files
283     * @var string
284     */
285    var $language_path = GESHI_LANG_ROOT;
286
287    /**
288     * The error message associated with an error
289     * @var string
290     * @todo check err reporting works
291     */
292    var $error = false;
293
294    /**
295     * Possible error messages
296     * @var array
297     */
298    var $error_messages = array(
299        GESHI_ERROR_NO_SUCH_LANG => 'GeSHi could not find the language {LANGUAGE} (using path {PATH})',
300        GESHI_ERROR_FILE_NOT_READABLE => 'The file specified for load_from_file was not readable',
301        GESHI_ERROR_INVALID_HEADER_TYPE => 'The header type specified is invalid',
302        GESHI_ERROR_INVALID_LINE_NUMBER_TYPE => 'The line number type specified is invalid'
303    );
304
305    /**
306     * Whether highlighting is strict or not
307     * @var boolean
308     */
309    var $strict_mode = false;
310
311    /**
312     * Whether to use CSS classes in output
313     * @var boolean
314     */
315    var $use_classes = false;
316
317    /**
318     * The type of header to use. Can be one of the following
319     * values:
320     *
321     * - GESHI_HEADER_PRE: Source is outputted in a "pre" HTML element.
322     * - GESHI_HEADER_DIV: Source is outputted in a "div" HTML element.
323     * - GESHI_HEADER_NONE: No header is outputted.
324     *
325     * @var int
326     */
327    var $header_type = GESHI_HEADER_PRE;
328
329    /**
330     * Array of permissions for which lexics should be highlighted
331     * @var array
332     */
333    var $lexic_permissions = array(
334        'KEYWORDS' =>    array(),
335        'COMMENTS' =>    array('MULTI' => true),
336        'REGEXPS' =>     array(),
337        'ESCAPE_CHAR' => true,
338        'BRACKETS' =>    true,
339        'SYMBOLS' =>     false,
340        'STRINGS' =>     true,
341        'NUMBERS' =>     true,
342        'METHODS' =>     true,
343        'SCRIPT' =>      true
344    );
345
346    /**
347     * The time it took to parse the code
348     * @var double
349     */
350    var $time = 0;
351
352    /**
353     * The content of the header block
354     * @var string
355     */
356    var $header_content = '';
357
358    /**
359     * The content of the footer block
360     * @var string
361     */
362    var $footer_content = '';
363
364    /**
365     * The style of the header block
366     * @var string
367     */
368    var $header_content_style = '';
369
370    /**
371     * The style of the footer block
372     * @var string
373     */
374    var $footer_content_style = '';
375
376    /**
377     * Tells if a block around the highlighted source should be forced
378     * if not using line numbering
379     * @var boolean
380     */
381    var $force_code_block = false;
382
383    /**
384     * The styles for hyperlinks in the code
385     * @var array
386     */
387    var $link_styles = array();
388
389    /**
390     * Whether important blocks should be recognised or not
391     * @var boolean
392     * @deprecated
393     * @todo REMOVE THIS FUNCTIONALITY!
394     */
395    var $enable_important_blocks = false;
396
397    /**
398     * Styles for important parts of the code
399     * @var string
400     * @deprecated
401     * @todo As above - rethink the whole idea of important blocks as it is buggy and
402     * will be hard to implement in 1.2
403     */
404    var $important_styles = 'font-weight: bold; color: red;'; // Styles for important parts of the code
405
406    /**
407     * Whether CSS IDs should be added to the code
408     * @var boolean
409     */
410    var $add_ids = false;
411
412    /**
413     * Lines that should be highlighted extra
414     * @var array
415     */
416    var $highlight_extra_lines = array();
417
418    /**
419     * Styles of lines that should be highlighted extra
420     * @var array
421     */
422    var $highlight_extra_lines_styles = array();
423
424    /**
425     * Styles of extra-highlighted lines
426     * @var string
427     */
428    var $highlight_extra_lines_style = 'background-color: #ffc;';
429
430    /**
431     * The line ending
432     * If null, nl2br() will be used on the result string.
433     * Otherwise, all instances of \n will be replaced with $line_ending
434     * @var string
435     */
436    var $line_ending = null;
437
438    /**
439     * Number at which line numbers should start at
440     * @var int
441     */
442    var $line_numbers_start = 1;
443
444    /**
445     * The overall style for this code block
446     * @var string
447     */
448    var $overall_style = 'font-family:monospace;';
449
450    /**
451     *  The style for the actual code
452     * @var string
453     */
454    var $code_style = 'font: normal normal 1em/1.2em monospace; margin:0; padding:0; background:none; vertical-align:top;';
455
456    /**
457     * The overall class for this code block
458     * @var string
459     */
460    var $overall_class = '';
461
462    /**
463     * The overall ID for this code block
464     * @var string
465     */
466    var $overall_id = '';
467
468    /**
469     * Line number styles
470     * @var string
471     */
472    var $line_style1 = 'font-weight: normal; vertical-align:top;';
473
474    /**
475     * Line number styles for fancy lines
476     * @var string
477     */
478    var $line_style2 = 'font-weight: bold; vertical-align:top;';
479
480    /**
481     * Style for line numbers when GESHI_HEADER_PRE_TABLE is chosen
482     * @var string
483     */
484    var $table_linenumber_style = 'width:1px;text-align:right;margin:0;padding:0 2px;vertical-align:top;';
485
486    /**
487     * Flag for how line numbers are displayed
488     * @var boolean
489     */
490    var $line_numbers = GESHI_NO_LINE_NUMBERS;
491
492    /**
493     * Flag to decide if multi line spans are allowed. Set it to false to make sure
494     * each tag is closed before and reopened after each linefeed.
495     * @var boolean
496     */
497    var $allow_multiline_span = true;
498
499    /**
500     * The "nth" value for fancy line highlighting
501     * @var int
502     */
503    var $line_nth_row = 0;
504
505    /**
506     * The size of tab stops
507     * @var int
508     */
509    var $tab_width = 8;
510
511    /**
512     * Should we use language-defined tab stop widths?
513     * @var int
514     */
515    var $use_language_tab_width = false;
516
517    /**
518     * Default target for keyword links
519     * @var string
520     */
521    var $link_target = '';
522
523    /**
524     * The encoding to use for entity encoding
525     * NOTE: Used with Escape Char Sequences to fix UTF-8 handling (cf. SF#2037598)
526     * @var string
527     */
528    var $encoding = 'utf-8';
529
530    /**
531     * Should keywords be linked?
532     * @var boolean
533     */
534    var $keyword_links = true;
535
536    /**
537     * Currently loaded language file
538     * @var string
539     * @since 1.0.7.22
540     */
541    var $loaded_language = '';
542
543    /**
544     * Wether the caches needed for parsing are built or not
545     *
546     * @var bool
547     * @since 1.0.8
548     */
549    var $parse_cache_built = false;
550
551    /**
552     * Work around for Suhosin Patch with disabled /e modifier
553     *
554     * Note from suhosins author in config file:
555     * <blockquote>
556     *   The /e modifier inside <code>preg_replace()</code> allows code execution.
557     *   Often it is the cause for remote code execution exploits. It is wise to
558     *   deactivate this feature and test where in the application it is used.
559     *   The developer using the /e modifier should be made aware that he should
560     *   use <code>preg_replace_callback()</code> instead
561     * </blockquote>
562     *
563     * @var array
564     * @since 1.0.8
565     */
566    var $_kw_replace_group = 0;
567    var $_rx_key = 0;
568
569    /**
570     * some "callback parameters" for handle_multiline_regexps
571     *
572     * @since 1.0.8
573     * @access private
574     * @var string
575     */
576    var $_hmr_before = '';
577    var $_hmr_replace = '';
578    var $_hmr_after = '';
579    var $_hmr_key = 0;
580
581    /**#@-*/
582
583    /**
584     * Creates a new GeSHi object, with source and language
585     *
586     * @param string The source code to highlight
587     * @param string The language to highlight the source with
588     * @param string The path to the language file directory. <b>This
589     *               is deprecated!</b> I've backported the auto path
590     *               detection from the 1.1.X dev branch, so now it
591     *               should be automatically set correctly. If you have
592     *               renamed the language directory however, you will
593     *               still need to set the path using this parameter or
594     *               {@link GeSHi->set_language_path()}
595     * @since 1.0.0
596     */
597    function __construct($source = '', $language = '', $path = '') {
598        if (!empty($source)) {
599            $this->set_source($source);
600        }
601        if (!empty($language)) {
602            $this->set_language($language);
603        }
604        $this->set_language_path($path);
605    }
606
607    /**
608     * Returns the version of GeSHi
609     *
610     * @return string
611     * @since 1 0.8.11
612     */
613    function get_version()
614    {
615        return GESHI_VERSION;
616    }
617
618    /**
619     * Returns an error message associated with the last GeSHi operation,
620     * or false if no error has occured
621     *
622     * @return string|false An error message if there has been an error, else false
623     * @since  1.0.0
624     */
625    function error() {
626        if ($this->error) {
627            //Put some template variables for debugging here ...
628            $debug_tpl_vars = array(
629                '{LANGUAGE}' => $this->language,
630                '{PATH}' => $this->language_path
631            );
632            $msg = str_replace(
633                array_keys($debug_tpl_vars),
634                array_values($debug_tpl_vars),
635                $this->error_messages[$this->error]);
636
637            return "<br /><strong>GeSHi Error:</strong> $msg (code {$this->error})<br />";
638        }
639        return false;
640    }
641
642    /**
643     * Gets a human-readable language name (thanks to Simon Patterson
644     * for the idea :))
645     *
646     * @return string The name for the current language
647     * @since  1.0.2
648     */
649    function get_language_name() {
650        if (GESHI_ERROR_NO_SUCH_LANG == $this->error) {
651            return $this->language_data['LANG_NAME'] . ' (Unknown Language)';
652        }
653        return $this->language_data['LANG_NAME'];
654    }
655
656    /**
657     * Sets the source code for this object
658     *
659     * @param string The source code to highlight
660     * @since 1.0.0
661     */
662    function set_source($source) {
663        $this->source = $source;
664        $this->highlight_extra_lines = array();
665    }
666
667    /**
668     * Sets the language for this object
669     *
670     * @note since 1.0.8 this function won't reset language-settings by default anymore!
671     *       if you need this set $force_reset = true
672     *
673     * @param string The name of the language to use
674     * @since 1.0.0
675     */
676    function set_language($language, $force_reset = false) {
677        if ($force_reset) {
678            $this->loaded_language = false;
679        }
680
681        //Clean up the language name to prevent malicious code injection
682        $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language);
683
684        $language = strtolower($language);
685
686        //Retreive the full filename
687        $file_name = $this->language_path . $language . '.php';
688        if ($file_name == $this->loaded_language) {
689            // this language is already loaded!
690            return;
691        }
692
693        $this->language = $language;
694
695        $this->error = false;
696        $this->strict_mode = GESHI_NEVER;
697
698        //Check if we can read the desired file
699        if (!is_readable($file_name)) {
700            $this->error = GESHI_ERROR_NO_SUCH_LANG;
701            return;
702        }
703
704        // Load the language for parsing
705        $this->load_language($file_name);
706    }
707
708    /**
709     * Sets the path to the directory containing the language files. Note
710     * that this path is relative to the directory of the script that included
711     * geshi.php, NOT geshi.php itself.
712     *
713     * @param string The path to the language directory
714     * @since 1.0.0
715     * @deprecated The path to the language files should now be automatically
716     *             detected, so this method should no longer be needed. The
717     *             1.1.X branch handles manual setting of the path differently
718     *             so this method will disappear in 1.2.0.
719     */
720    function set_language_path($path) {
721        if(strpos($path,':')) {
722            //Security Fix to prevent external directories using fopen wrappers.
723            if(DIRECTORY_SEPARATOR == "\\") {
724                if(!preg_match('#^[a-zA-Z]:#', $path) || false !== strpos($path, ':', 2)) {
725                    return;
726                }
727            } else {
728                return;
729            }
730        }
731        if(preg_match('#[^/a-zA-Z0-9_\.\-\\\s:]#', $path)) {
732            //Security Fix to prevent external directories using fopen wrappers.
733            return;
734        }
735        if(GESHI_SECURITY_PARANOID && false !== strpos($path, '/.')) {
736            //Security Fix to prevent external directories using fopen wrappers.
737            return;
738        }
739        if(GESHI_SECURITY_PARANOID && false !== strpos($path, '..')) {
740            //Security Fix to prevent external directories using fopen wrappers.
741            return;
742        }
743        if ($path) {
744            $this->language_path = ('/' == $path[strlen($path) - 1]) ? $path : $path . '/';
745            $this->set_language($this->language); // otherwise set_language_path has no effect
746        }
747    }
748
749    /**
750     * Get supported langs or an associative array lang=>full_name.
751     * @param boolean $longnames
752     * @return array
753     */
754    function get_supported_languages($full_names=false)
755    {
756        // return array
757        $back = array();
758
759        // we walk the lang root
760        $dir = dir($this->language_path);
761
762        // foreach entry
763        while (false !== ($entry = $dir->read()))
764        {
765            $full_path = $this->language_path.$entry;
766
767            // Skip all dirs
768            if (is_dir($full_path)) {
769                continue;
770            }
771
772            // we only want lang.php files
773            if (!preg_match('/^([^.]+)\.php$/', $entry, $matches)) {
774                continue;
775            }
776
777            // Raw lang name is here
778            $langname = $matches[1];
779
780            // We want the fullname too?
781            if ($full_names === true)
782            {
783                if (false !== ($fullname = $this->get_language_fullname($langname)))
784                {
785                    $back[$langname] = $fullname; // we go associative
786                }
787            }
788            else
789            {
790                // just store raw langname
791                $back[] = $langname;
792            }
793        }
794
795        $dir->close();
796
797        return $back;
798    }
799
800    /**
801     * Get full_name for a lang or false.
802     * @param string $language short langname (html4strict for example)
803     * @return mixed
804     */
805    function get_language_fullname($language)
806    {
807        //Clean up the language name to prevent malicious code injection
808        $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language);
809
810        $language = strtolower($language);
811
812        // get fullpath-filename for a langname
813        $fullpath = $this->language_path.$language.'.php';
814
815        // we need to get contents :S
816        if (false === ($data = file_get_contents($fullpath))) {
817            $this->error = sprintf('Geshi::get_lang_fullname() Unknown Language: %s', $language);
818            return false;
819        }
820
821        // match the langname
822        if (!preg_match('/\'LANG_NAME\'\s*=>\s*\'((?:[^\']|\\\')+?)\'/', $data, $matches)) {
823            $this->error = sprintf('Geshi::get_lang_fullname(%s): Regex can not detect language', $language);
824            return false;
825        }
826
827        // return fullname for langname
828        return stripcslashes($matches[1]);
829    }
830
831    /**
832     * Sets the type of header to be used.
833     *
834     * If GESHI_HEADER_DIV is used, the code is surrounded in a "div".This
835     * means more source code but more control over tab width and line-wrapping.
836     * GESHI_HEADER_PRE means that a "pre" is used - less source, but less
837     * control. Default is GESHI_HEADER_PRE.
838     *
839     * From 1.0.7.2, you can use GESHI_HEADER_NONE to specify that no header code
840     * should be outputted.
841     *
842     * @param int The type of header to be used
843     * @since 1.0.0
844     */
845    function set_header_type($type) {
846        //Check if we got a valid header type
847        if (!in_array($type, array(GESHI_HEADER_NONE, GESHI_HEADER_DIV,
848            GESHI_HEADER_PRE, GESHI_HEADER_PRE_VALID, GESHI_HEADER_PRE_TABLE))) {
849            $this->error = GESHI_ERROR_INVALID_HEADER_TYPE;
850            return;
851        }
852
853        //Set that new header type
854        $this->header_type = $type;
855    }
856
857    /**
858     * Sets the styles for the code that will be outputted
859     * when this object is parsed. The style should be a
860     * string of valid stylesheet declarations
861     *
862     * @param string  The overall style for the outputted code block
863     * @param boolean Whether to merge the styles with the current styles or not
864     * @since 1.0.0
865     */
866    function set_overall_style($style, $preserve_defaults = false) {
867        if (!$preserve_defaults) {
868            $this->overall_style = $style;
869        } else {
870            $this->overall_style .= $style;
871        }
872    }
873
874    /**
875     * Sets the overall classname for this block of code. This
876     * class can then be used in a stylesheet to style this object's
877     * output
878     *
879     * @param string The class name to use for this block of code
880     * @since 1.0.0
881     */
882    function set_overall_class($class) {
883        $this->overall_class = $class;
884    }
885
886    /**
887     * Sets the overall id for this block of code. This id can then
888     * be used in a stylesheet to style this object's output
889     *
890     * @param string The ID to use for this block of code
891     * @since 1.0.0
892     */
893    function set_overall_id($id) {
894        $this->overall_id = $id;
895    }
896
897    /**
898     * Sets whether CSS classes should be used to highlight the source. Default
899     * is off, calling this method with no arguments will turn it on
900     *
901     * @param boolean Whether to turn classes on or not
902     * @since 1.0.0
903     */
904    function enable_classes($flag = true) {
905        $this->use_classes = ($flag) ? true : false;
906    }
907
908    /**
909     * Sets the style for the actual code. This should be a string
910     * containing valid stylesheet declarations. If $preserve_defaults is
911     * true, then styles are merged with the default styles, with the
912     * user defined styles having priority
913     *
914     * Note: Use this method to override any style changes you made to
915     * the line numbers if you are using line numbers, else the line of
916     * code will have the same style as the line number! Consult the
917     * GeSHi documentation for more information about this.
918     *
919     * @param string  The style to use for actual code
920     * @param boolean Whether to merge the current styles with the new styles
921     * @since 1.0.2
922     */
923    function set_code_style($style, $preserve_defaults = false) {
924        if (!$preserve_defaults) {
925            $this->code_style = $style;
926        } else {
927            $this->code_style .= $style;
928        }
929    }
930
931    /**
932     * Sets the styles for the line numbers.
933     *
934     * @param string The style for the line numbers that are "normal"
935     * @param string|boolean If a string, this is the style of the line
936     *        numbers that are "fancy", otherwise if boolean then this
937     *        defines whether the normal styles should be merged with the
938     *        new normal styles or not
939     * @param boolean If set, is the flag for whether to merge the "fancy"
940     *        styles with the current styles or not
941     * @since 1.0.2
942     */
943    function set_line_style($style1, $style2 = '', $preserve_defaults = false) {
944        //Check if we got 2 or three parameters
945        if (is_bool($style2)) {
946            $preserve_defaults = $style2;
947            $style2 = '';
948        }
949
950        //Actually set the new styles
951        if (!$preserve_defaults) {
952            $this->line_style1 = $style1;
953            $this->line_style2 = $style2;
954        } else {
955            $this->line_style1 .= $style1;
956            $this->line_style2 .= $style2;
957        }
958    }
959
960    /**
961     * Sets whether line numbers should be displayed.
962     *
963     * Valid values for the first parameter are:
964     *
965     *  - GESHI_NO_LINE_NUMBERS: Line numbers will not be displayed
966     *  - GESHI_NORMAL_LINE_NUMBERS: Line numbers will be displayed
967     *  - GESHI_FANCY_LINE_NUMBERS: Fancy line numbers will be displayed
968     *
969     * For fancy line numbers, the second parameter is used to signal which lines
970     * are to be fancy. For example, if the value of this parameter is 5 then every
971     * 5th line will be fancy.
972     *
973     * @param int How line numbers should be displayed
974     * @param int Defines which lines are fancy
975     * @since 1.0.0
976     */
977    function enable_line_numbers($flag, $nth_row = 5) {
978        if (GESHI_NO_LINE_NUMBERS != $flag && GESHI_NORMAL_LINE_NUMBERS != $flag
979            && GESHI_FANCY_LINE_NUMBERS != $flag) {
980            $this->error = GESHI_ERROR_INVALID_LINE_NUMBER_TYPE;
981        }
982        $this->line_numbers = $flag;
983        $this->line_nth_row = $nth_row;
984    }
985
986    /**
987     * Sets wether spans and other HTML markup generated by GeSHi can
988     * span over multiple lines or not. Defaults to true to reduce overhead.
989     * Set it to false if you want to manipulate the output or manually display
990     * the code in an ordered list.
991     *
992     * @param boolean Wether multiline spans are allowed or not
993     * @since 1.0.7.22
994     */
995    function enable_multiline_span($flag) {
996        $this->allow_multiline_span = (bool) $flag;
997    }
998
999    /**
1000     * Get current setting for multiline spans, see GeSHi->enable_multiline_span().
1001     *
1002     * @see enable_multiline_span
1003     * @return bool
1004     */
1005    function get_multiline_span() {
1006        return $this->allow_multiline_span;
1007    }
1008
1009    /**
1010     * Sets the style for a keyword group. If $preserve_defaults is
1011     * true, then styles are merged with the default styles, with the
1012     * user defined styles having priority
1013     *
1014     * @param int     The key of the keyword group to change the styles of
1015     * @param string  The style to make the keywords
1016     * @param boolean Whether to merge the new styles with the old or just
1017     *                to overwrite them
1018     * @since 1.0.0
1019     */
1020    function set_keyword_group_style($key, $style, $preserve_defaults = false) {
1021        //Set the style for this keyword group
1022        if (!$preserve_defaults) {
1023            $this->language_data['STYLES']['KEYWORDS'][$key] = $style;
1024        } else {
1025            $this->language_data['STYLES']['KEYWORDS'][$key] .= $style;
1026        }
1027
1028        //Update the lexic permissions
1029        if (!isset($this->lexic_permissions['KEYWORDS'][$key])) {
1030            $this->lexic_permissions['KEYWORDS'][$key] = true;
1031        }
1032    }
1033
1034    /**
1035     * Turns highlighting on/off for a keyword group
1036     *
1037     * @param int     The key of the keyword group to turn on or off
1038     * @param boolean Whether to turn highlighting for that group on or off
1039     * @since 1.0.0
1040     */
1041    function set_keyword_group_highlighting($key, $flag = true) {
1042        $this->lexic_permissions['KEYWORDS'][$key] = ($flag) ? true : false;
1043    }
1044
1045    /**
1046     * Sets the styles for comment groups.  If $preserve_defaults is
1047     * true, then styles are merged with the default styles, with the
1048     * user defined styles having priority
1049     *
1050     * @param int     The key of the comment group to change the styles of
1051     * @param string  The style to make the comments
1052     * @param boolean Whether to merge the new styles with the old or just
1053     *                to overwrite them
1054     * @since 1.0.0
1055     */
1056    function set_comments_style($key, $style, $preserve_defaults = false) {
1057        if (!$preserve_defaults) {
1058            $this->language_data['STYLES']['COMMENTS'][$key] = $style;
1059        } else {
1060            $this->language_data['STYLES']['COMMENTS'][$key] .= $style;
1061        }
1062    }
1063
1064    /**
1065     * Turns highlighting on/off for comment groups
1066     *
1067     * @param int     The key of the comment group to turn on or off
1068     * @param boolean Whether to turn highlighting for that group on or off
1069     * @since 1.0.0
1070     */
1071    function set_comments_highlighting($key, $flag = true) {
1072        $this->lexic_permissions['COMMENTS'][$key] = ($flag) ? true : false;
1073    }
1074
1075    /**
1076     * Sets the styles for escaped characters. If $preserve_defaults is
1077     * true, then styles are merged with the default styles, with the
1078     * user defined styles having priority
1079     *
1080     * @param string  The style to make the escape characters
1081     * @param boolean Whether to merge the new styles with the old or just
1082     *                to overwrite them
1083     * @since 1.0.0
1084     */
1085    function set_escape_characters_style($style, $preserve_defaults = false, $group = 0) {
1086        if (!$preserve_defaults) {
1087            $this->language_data['STYLES']['ESCAPE_CHAR'][$group] = $style;
1088        } else {
1089            $this->language_data['STYLES']['ESCAPE_CHAR'][$group] .= $style;
1090        }
1091    }
1092
1093    /**
1094     * Turns highlighting on/off for escaped characters
1095     *
1096     * @param boolean Whether to turn highlighting for escape characters on or off
1097     * @since 1.0.0
1098     */
1099    function set_escape_characters_highlighting($flag = true) {
1100        $this->lexic_permissions['ESCAPE_CHAR'] = ($flag) ? true : false;
1101    }
1102
1103    /**
1104     * Sets the styles for brackets. If $preserve_defaults is
1105     * true, then styles are merged with the default styles, with the
1106     * user defined styles having priority
1107     *
1108     * This method is DEPRECATED: use set_symbols_style instead.
1109     * This method will be removed in 1.2.X
1110     *
1111     * @param string  The style to make the brackets
1112     * @param boolean Whether to merge the new styles with the old or just
1113     *                to overwrite them
1114     * @since 1.0.0
1115     * @deprecated In favour of set_symbols_style
1116     */
1117    function set_brackets_style($style, $preserve_defaults = false) {
1118        if (!$preserve_defaults) {
1119            $this->language_data['STYLES']['BRACKETS'][0] = $style;
1120        } else {
1121            $this->language_data['STYLES']['BRACKETS'][0] .= $style;
1122        }
1123    }
1124
1125    /**
1126     * Turns highlighting on/off for brackets
1127     *
1128     * This method is DEPRECATED: use set_symbols_highlighting instead.
1129     * This method will be remove in 1.2.X
1130     *
1131     * @param boolean Whether to turn highlighting for brackets on or off
1132     * @since 1.0.0
1133     * @deprecated In favour of set_symbols_highlighting
1134     */
1135    function set_brackets_highlighting($flag) {
1136        $this->lexic_permissions['BRACKETS'] = ($flag) ? true : false;
1137    }
1138
1139    /**
1140     * Sets the styles for symbols. If $preserve_defaults is
1141     * true, then styles are merged with the default styles, with the
1142     * user defined styles having priority
1143     *
1144     * @param string  The style to make the symbols
1145     * @param boolean Whether to merge the new styles with the old or just
1146     *                to overwrite them
1147     * @param int     Tells the group of symbols for which style should be set.
1148     * @since 1.0.1
1149     */
1150    function set_symbols_style($style, $preserve_defaults = false, $group = 0) {
1151        // Update the style of symbols
1152        if (!$preserve_defaults) {
1153            $this->language_data['STYLES']['SYMBOLS'][$group] = $style;
1154        } else {
1155            $this->language_data['STYLES']['SYMBOLS'][$group] .= $style;
1156        }
1157
1158        // For backward compatibility
1159        if (0 == $group) {
1160            $this->set_brackets_style ($style, $preserve_defaults);
1161        }
1162    }
1163
1164    /**
1165     * Turns highlighting on/off for symbols
1166     *
1167     * @param boolean Whether to turn highlighting for symbols on or off
1168     * @since 1.0.0
1169     */
1170    function set_symbols_highlighting($flag) {
1171        // Update lexic permissions for this symbol group
1172        $this->lexic_permissions['SYMBOLS'] = ($flag) ? true : false;
1173
1174        // For backward compatibility
1175        $this->set_brackets_highlighting ($flag);
1176    }
1177
1178    /**
1179     * Sets the styles for strings. If $preserve_defaults is
1180     * true, then styles are merged with the default styles, with the
1181     * user defined styles having priority
1182     *
1183     * @param string  The style to make the escape characters
1184     * @param boolean Whether to merge the new styles with the old or just
1185     *                to overwrite them
1186     * @param int     Tells the group of strings for which style should be set.
1187     * @since 1.0.0
1188     */
1189    function set_strings_style($style, $preserve_defaults = false, $group = 0) {
1190        if (!$preserve_defaults) {
1191            $this->language_data['STYLES']['STRINGS'][$group] = $style;
1192        } else {
1193            $this->language_data['STYLES']['STRINGS'][$group] .= $style;
1194        }
1195    }
1196
1197    /**
1198     * Turns highlighting on/off for strings
1199     *
1200     * @param boolean Whether to turn highlighting for strings on or off
1201     * @since 1.0.0
1202     */
1203    function set_strings_highlighting($flag) {
1204        $this->lexic_permissions['STRINGS'] = ($flag) ? true : false;
1205    }
1206
1207    /**
1208     * Sets the styles for strict code blocks. If $preserve_defaults is
1209     * true, then styles are merged with the default styles, with the
1210     * user defined styles having priority
1211     *
1212     * @param string  The style to make the script blocks
1213     * @param boolean Whether to merge the new styles with the old or just
1214     *                to overwrite them
1215     * @param int     Tells the group of script blocks for which style should be set.
1216     * @since 1.0.8.4
1217     */
1218    function set_script_style($style, $preserve_defaults = false, $group = 0) {
1219        // Update the style of symbols
1220        if (!$preserve_defaults) {
1221            $this->language_data['STYLES']['SCRIPT'][$group] = $style;
1222        } else {
1223            $this->language_data['STYLES']['SCRIPT'][$group] .= $style;
1224        }
1225    }
1226
1227    /**
1228     * Sets the styles for numbers. If $preserve_defaults is
1229     * true, then styles are merged with the default styles, with the
1230     * user defined styles having priority
1231     *
1232     * @param string  The style to make the numbers
1233     * @param boolean Whether to merge the new styles with the old or just
1234     *                to overwrite them
1235     * @param int     Tells the group of numbers for which style should be set.
1236     * @since 1.0.0
1237     */
1238    function set_numbers_style($style, $preserve_defaults = false, $group = 0) {
1239        if (!$preserve_defaults) {
1240            $this->language_data['STYLES']['NUMBERS'][$group] = $style;
1241        } else {
1242            $this->language_data['STYLES']['NUMBERS'][$group] .= $style;
1243        }
1244    }
1245
1246    /**
1247     * Turns highlighting on/off for numbers
1248     *
1249     * @param boolean Whether to turn highlighting for numbers on or off
1250     * @since 1.0.0
1251     */
1252    function set_numbers_highlighting($flag) {
1253        $this->lexic_permissions['NUMBERS'] = ($flag) ? true : false;
1254    }
1255
1256    /**
1257     * Sets the styles for methods. $key is a number that references the
1258     * appropriate "object splitter" - see the language file for the language
1259     * you are highlighting to get this number. If $preserve_defaults is
1260     * true, then styles are merged with the default styles, with the
1261     * user defined styles having priority
1262     *
1263     * @param int     The key of the object splitter to change the styles of
1264     * @param string  The style to make the methods
1265     * @param boolean Whether to merge the new styles with the old or just
1266     *                to overwrite them
1267     * @since 1.0.0
1268     */
1269    function set_methods_style($key, $style, $preserve_defaults = false) {
1270        if (!$preserve_defaults) {
1271            $this->language_data['STYLES']['METHODS'][$key] = $style;
1272        } else {
1273            $this->language_data['STYLES']['METHODS'][$key] .= $style;
1274        }
1275    }
1276
1277    /**
1278     * Turns highlighting on/off for methods
1279     *
1280     * @param boolean Whether to turn highlighting for methods on or off
1281     * @since 1.0.0
1282     */
1283    function set_methods_highlighting($flag) {
1284        $this->lexic_permissions['METHODS'] = ($flag) ? true : false;
1285    }
1286
1287    /**
1288     * Sets the styles for regexps. If $preserve_defaults is
1289     * true, then styles are merged with the default styles, with the
1290     * user defined styles having priority
1291     *
1292     * @param string  The style to make the regular expression matches
1293     * @param boolean Whether to merge the new styles with the old or just
1294     *                to overwrite them
1295     * @since 1.0.0
1296     */
1297    function set_regexps_style($key, $style, $preserve_defaults = false) {
1298        if (!$preserve_defaults) {
1299            $this->language_data['STYLES']['REGEXPS'][$key] = $style;
1300        } else {
1301            $this->language_data['STYLES']['REGEXPS'][$key] .= $style;
1302        }
1303    }
1304
1305    /**
1306     * Turns highlighting on/off for regexps
1307     *
1308     * @param int     The key of the regular expression group to turn on or off
1309     * @param boolean Whether to turn highlighting for the regular expression group on or off
1310     * @since 1.0.0
1311     */
1312    function set_regexps_highlighting($key, $flag) {
1313        $this->lexic_permissions['REGEXPS'][$key] = ($flag) ? true : false;
1314    }
1315
1316    /**
1317     * Sets whether a set of keywords are checked for in a case sensitive manner
1318     *
1319     * @param int The key of the keyword group to change the case sensitivity of
1320     * @param boolean Whether to check in a case sensitive manner or not
1321     * @since 1.0.0
1322     */
1323    function set_case_sensitivity($key, $case) {
1324        $this->language_data['CASE_SENSITIVE'][$key] = ($case) ? true : false;
1325    }
1326
1327    /**
1328     * Sets the case that keywords should use when found. Use the constants:
1329     *
1330     *  - GESHI_CAPS_NO_CHANGE: leave keywords as-is
1331     *  - GESHI_CAPS_UPPER: convert all keywords to uppercase where found
1332     *  - GESHI_CAPS_LOWER: convert all keywords to lowercase where found
1333     *
1334     * @param int A constant specifying what to do with matched keywords
1335     * @since 1.0.1
1336     */
1337    function set_case_keywords($case) {
1338        if (in_array($case, array(
1339            GESHI_CAPS_NO_CHANGE, GESHI_CAPS_UPPER, GESHI_CAPS_LOWER))) {
1340            $this->language_data['CASE_KEYWORDS'] = $case;
1341        }
1342    }
1343
1344    /**
1345     * Sets how many spaces a tab is substituted for
1346     *
1347     * Widths below zero are ignored
1348     *
1349     * @param int The tab width
1350     * @since 1.0.0
1351     */
1352    function set_tab_width($width) {
1353        $this->tab_width = intval($width);
1354
1355        //Check if it fit's the constraints:
1356        if ($this->tab_width < 1) {
1357            //Return it to the default
1358            $this->tab_width = 8;
1359        }
1360    }
1361
1362    /**
1363     * Sets whether or not to use tab-stop width specifed by language
1364     *
1365     * @param boolean Whether to use language-specific tab-stop widths
1366     * @since 1.0.7.20
1367     */
1368    function set_use_language_tab_width($use) {
1369        $this->use_language_tab_width = (bool) $use;
1370    }
1371
1372    /**
1373     * Returns the tab width to use, based on the current language and user
1374     * preference
1375     *
1376     * @return int Tab width
1377     * @since 1.0.7.20
1378     */
1379    function get_real_tab_width() {
1380        if (!$this->use_language_tab_width ||
1381            !isset($this->language_data['TAB_WIDTH'])) {
1382            return $this->tab_width;
1383        } else {
1384            return $this->language_data['TAB_WIDTH'];
1385        }
1386    }
1387
1388    /**
1389     * Enables/disables strict highlighting. Default is off, calling this
1390     * method without parameters will turn it on. See documentation
1391     * for more details on strict mode and where to use it.
1392     *
1393     * @param boolean Whether to enable strict mode or not
1394     * @since 1.0.0
1395     */
1396    function enable_strict_mode($mode = true) {
1397        if (GESHI_MAYBE == $this->language_data['STRICT_MODE_APPLIES']) {
1398            $this->strict_mode = ($mode) ? GESHI_ALWAYS : GESHI_NEVER;
1399        }
1400    }
1401
1402    /**
1403     * Disables all highlighting
1404     *
1405     * @since 1.0.0
1406     * @todo  Rewrite with array traversal
1407     * @deprecated In favour of enable_highlighting
1408     */
1409    function disable_highlighting() {
1410        $this->enable_highlighting(false);
1411    }
1412
1413    /**
1414     * Enables all highlighting
1415     *
1416     * The optional flag parameter was added in version 1.0.7.21 and can be used
1417     * to enable (true) or disable (false) all highlighting.
1418     *
1419     * @since 1.0.0
1420     * @param boolean A flag specifying whether to enable or disable all highlighting
1421     * @todo  Rewrite with array traversal
1422     */
1423    function enable_highlighting($flag = true) {
1424        $flag = $flag ? true : false;
1425        foreach ($this->lexic_permissions as $key => $value) {
1426            if (is_array($value)) {
1427                foreach ($value as $k => $v) {
1428                    $this->lexic_permissions[$key][$k] = $flag;
1429                }
1430            } else {
1431                $this->lexic_permissions[$key] = $flag;
1432            }
1433        }
1434
1435        // Context blocks
1436        $this->enable_important_blocks = $flag;
1437    }
1438
1439    /**
1440     * Given a file extension, this method returns either a valid geshi language
1441     * name, or the empty string if it couldn't be found
1442     *
1443     * @param string The extension to get a language name for
1444     * @param array  A lookup array to use instead of the default one
1445     * @since 1.0.5
1446     * @todo Re-think about how this method works (maybe make it private and/or make it
1447     *       a extension->lang lookup?)
1448     * @todo static?
1449     */
1450    function get_language_name_from_extension( $extension, $lookup = array() ) {
1451        $extension = strtolower($extension);
1452
1453        if ( !is_array($lookup) || empty($lookup)) {
1454            $lookup = array(
1455                '6502acme' => array( 'a', 's', 'asm', 'inc' ),
1456                '6502tasm' => array( 'a', 's', 'asm', 'inc' ),
1457                '6502kickass' => array( 'a', 's', 'asm', 'inc' ),
1458                '68000devpac' => array( 'a', 's', 'asm', 'inc' ),
1459                'abap' => array('abap'),
1460                'actionscript' => array('as'),
1461                'ada' => array('a', 'ada', 'adb', 'ads'),
1462                'apache' => array('conf'),
1463                'asm' => array('ash', 'asm', 'inc'),
1464                'asp' => array('asp'),
1465                'bash' => array('sh'),
1466                'bf' => array('bf'),
1467                'c' => array('c', 'h'),
1468                'c_mac' => array('c', 'h'),
1469                'caddcl' => array(),
1470                'cadlisp' => array(),
1471                'cdfg' => array('cdfg'),
1472                'cobol' => array('cbl'),
1473                'cpp' => array('cpp', 'hpp', 'C', 'H', 'CPP', 'HPP'),
1474                'csharp' => array('cs'),
1475                'css' => array('css'),
1476                'd' => array('d'),
1477                'delphi' => array('dpk', 'dpr', 'pp', 'pas'),
1478                'diff' => array('diff', 'patch'),
1479                'dos' => array('bat', 'cmd'),
1480                'gdb' => array('kcrash', 'crash', 'bt'),
1481                'gettext' => array('po', 'pot'),
1482                'gml' => array('gml'),
1483                'gnuplot' => array('plt'),
1484                'groovy' => array('groovy'),
1485                'haskell' => array('hs'),
1486                'haxe' => array('hx'),
1487                'html4strict' => array('html', 'htm'),
1488                'ini' => array('ini', 'desktop'),
1489                'java' => array('java'),
1490                'javascript' => array('js'),
1491                'klonec' => array('kl1'),
1492                'klonecpp' => array('klx'),
1493                'latex' => array('tex'),
1494                'lisp' => array('lisp'),
1495                'lua' => array('lua'),
1496                'matlab' => array('m'),
1497                'mpasm' => array(),
1498                'mysql' => array('sql'),
1499                'nsis' => array(),
1500                'objc' => array(),
1501                'oobas' => array(),
1502                'oracle8' => array(),
1503                'oracle10' => array(),
1504                'pascal' => array('pas'),
1505                'perl' => array('pl', 'pm'),
1506                'php' => array('php', 'php5', 'phtml', 'phps'),
1507                'povray' => array('pov'),
1508                'providex' => array('pvc', 'pvx'),
1509                'prolog' => array('pl'),
1510                'python' => array('py'),
1511                'qbasic' => array('bi'),
1512                'reg' => array('reg'),
1513                'ruby' => array('rb'),
1514                'sas' => array('sas'),
1515                'scala' => array('scala'),
1516                'scheme' => array('scm'),
1517                'scilab' => array('sci'),
1518                'smalltalk' => array('st'),
1519                'smarty' => array(),
1520                'tcl' => array('tcl'),
1521                'text' => array('txt'),
1522                'vb' => array('bas'),
1523                'vbnet' => array(),
1524                'visualfoxpro' => array(),
1525                'whitespace' => array('ws'),
1526                'xml' => array('xml', 'svg', 'xrc'),
1527                'z80' => array('z80', 'asm', 'inc')
1528            );
1529        }
1530
1531        foreach ($lookup as $lang => $extensions) {
1532            if (in_array($extension, $extensions)) {
1533                return $lang;
1534            }
1535        }
1536
1537        return 'text';
1538    }
1539
1540    /**
1541     * Given a file name, this method loads its contents in, and attempts
1542     * to set the language automatically. An optional lookup table can be
1543     * passed for looking up the language name. If not specified a default
1544     * table is used
1545     *
1546     * The language table is in the form
1547     * <pre>array(
1548     *   'lang_name' => array('extension', 'extension', ...),
1549     *   'lang_name' ...
1550     * );</pre>
1551     *
1552     * @param string The filename to load the source from
1553     * @param array  A lookup array to use instead of the default one
1554     * @todo Complete rethink of this and above method
1555     * @since 1.0.5
1556     */
1557    function load_from_file($file_name, $lookup = array()) {
1558        if (is_readable($file_name)) {
1559            $this->set_source(file_get_contents($file_name));
1560            $this->set_language($this->get_language_name_from_extension(substr(strrchr($file_name, '.'), 1), $lookup));
1561        } else {
1562            $this->error = GESHI_ERROR_FILE_NOT_READABLE;
1563        }
1564    }
1565
1566    /**
1567     * Adds a keyword to a keyword group for highlighting
1568     *
1569     * @param int    The key of the keyword group to add the keyword to
1570     * @param string The word to add to the keyword group
1571     * @since 1.0.0
1572     */
1573    function add_keyword($key, $word) {
1574        if (!is_array($this->language_data['KEYWORDS'][$key])) {
1575            $this->language_data['KEYWORDS'][$key] = array();
1576        }
1577        if (!in_array($word, $this->language_data['KEYWORDS'][$key])) {
1578            $this->language_data['KEYWORDS'][$key][] = $word;
1579
1580            //NEW in 1.0.8 don't recompile the whole optimized regexp, simply append it
1581            if ($this->parse_cache_built) {
1582                $subkey = count($this->language_data['CACHED_KEYWORD_LISTS'][$key]) - 1;
1583                $this->language_data['CACHED_KEYWORD_LISTS'][$key][$subkey] .= '|' . preg_quote($word, '/');
1584            }
1585        }
1586    }
1587
1588    /**
1589     * Removes a keyword from a keyword group
1590     *
1591     * @param int    The key of the keyword group to remove the keyword from
1592     * @param string The word to remove from the keyword group
1593     * @param bool   Wether to automatically recompile the optimized regexp list or not.
1594     *               Note: if you set this to false and @see GeSHi->parse_code() was already called once,
1595     *               for the current language, you have to manually call @see GeSHi->optimize_keyword_group()
1596     *               or the removed keyword will stay in cache and still be highlighted! On the other hand
1597     *               it might be too expensive to recompile the regexp list for every removal if you want to
1598     *               remove a lot of keywords.
1599     * @since 1.0.0
1600     */
1601    function remove_keyword($key, $word, $recompile = true) {
1602        $key_to_remove = array_search($word, $this->language_data['KEYWORDS'][$key]);
1603        if ($key_to_remove !== false) {
1604            unset($this->language_data['KEYWORDS'][$key][$key_to_remove]);
1605
1606            //NEW in 1.0.8, optionally recompile keyword group
1607            if ($recompile && $this->parse_cache_built) {
1608                $this->optimize_keyword_group($key);
1609            }
1610        }
1611    }
1612
1613    /**
1614     * Creates a new keyword group
1615     *
1616     * @param int    The key of the keyword group to create
1617     * @param string The styles for the keyword group
1618     * @param boolean Whether the keyword group is case sensitive ornot
1619     * @param array  The words to use for the keyword group
1620     * @since 1.0.0
1621     */
1622    function add_keyword_group($key, $styles, $case_sensitive = true, $words = array()) {
1623        $words = (array) $words;
1624        if  (empty($words)) {
1625            // empty word lists mess up highlighting
1626            return false;
1627        }
1628
1629        //Add the new keyword group internally
1630        $this->language_data['KEYWORDS'][$key] = $words;
1631        $this->lexic_permissions['KEYWORDS'][$key] = true;
1632        $this->language_data['CASE_SENSITIVE'][$key] = $case_sensitive;
1633        $this->language_data['STYLES']['KEYWORDS'][$key] = $styles;
1634
1635        //NEW in 1.0.8, cache keyword regexp
1636        if ($this->parse_cache_built) {
1637            $this->optimize_keyword_group($key);
1638        }
1639    }
1640
1641    /**
1642     * Removes a keyword group
1643     *
1644     * @param int    The key of the keyword group to remove
1645     * @since 1.0.0
1646     */
1647    function remove_keyword_group ($key) {
1648        //Remove the keyword group internally
1649        unset($this->language_data['KEYWORDS'][$key]);
1650        unset($this->lexic_permissions['KEYWORDS'][$key]);
1651        unset($this->language_data['CASE_SENSITIVE'][$key]);
1652        unset($this->language_data['STYLES']['KEYWORDS'][$key]);
1653
1654        //NEW in 1.0.8
1655        unset($this->language_data['CACHED_KEYWORD_LISTS'][$key]);
1656    }
1657
1658    /**
1659     * compile optimized regexp list for keyword group
1660     *
1661     * @param int   The key of the keyword group to compile & optimize
1662     * @since 1.0.8
1663     */
1664    function optimize_keyword_group($key) {
1665        $this->language_data['CACHED_KEYWORD_LISTS'][$key] =
1666            $this->optimize_regexp_list($this->language_data['KEYWORDS'][$key]);
1667        $space_as_whitespace = false;
1668        if(isset($this->language_data['PARSER_CONTROL'])) {
1669            if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
1670                if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'])) {
1671                    $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'];
1672                }
1673                if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
1674                    if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
1675                        $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'];
1676                    }
1677                }
1678            }
1679        }
1680        if($space_as_whitespace) {
1681            foreach($this->language_data['CACHED_KEYWORD_LISTS'][$key] as $rxk => $rxv) {
1682                $this->language_data['CACHED_KEYWORD_LISTS'][$key][$rxk] =
1683                    str_replace(" ", "\\s+", $rxv);
1684            }
1685        }
1686    }
1687
1688    /**
1689     * Sets the content of the header block
1690     *
1691     * @param string The content of the header block
1692     * @since 1.0.2
1693     */
1694    function set_header_content($content) {
1695        $this->header_content = $content;
1696    }
1697
1698    /**
1699     * Sets the content of the footer block
1700     *
1701     * @param string The content of the footer block
1702     * @since 1.0.2
1703     */
1704    function set_footer_content($content) {
1705        $this->footer_content = $content;
1706    }
1707
1708    /**
1709     * Sets the style for the header content
1710     *
1711     * @param string The style for the header content
1712     * @since 1.0.2
1713     */
1714    function set_header_content_style($style) {
1715        $this->header_content_style = $style;
1716    }
1717
1718    /**
1719     * Sets the style for the footer content
1720     *
1721     * @param string The style for the footer content
1722     * @since 1.0.2
1723     */
1724    function set_footer_content_style($style) {
1725        $this->footer_content_style = $style;
1726    }
1727
1728    /**
1729     * Sets whether to force a surrounding block around
1730     * the highlighted code or not
1731     *
1732     * @param boolean Tells whether to enable or disable this feature
1733     * @since 1.0.7.20
1734     */
1735    function enable_inner_code_block($flag) {
1736        $this->force_code_block = (bool)$flag;
1737    }
1738
1739    /**
1740     * Sets the base URL to be used for keywords
1741     *
1742     * @param int The key of the keyword group to set the URL for
1743     * @param string The URL to set for the group. If {FNAME} is in
1744     *               the url somewhere, it is replaced by the keyword
1745     *               that the URL is being made for
1746     * @since 1.0.2
1747     */
1748    function set_url_for_keyword_group($group, $url) {
1749        $this->language_data['URLS'][$group] = $url;
1750    }
1751
1752    /**
1753     * Sets styles for links in code
1754     *
1755     * @param int A constant that specifies what state the style is being
1756     *            set for - e.g. :hover or :visited
1757     * @param string The styles to use for that state
1758     * @since 1.0.2
1759     */
1760    function set_link_styles($type, $styles) {
1761        $this->link_styles[$type] = $styles;
1762    }
1763
1764    /**
1765     * Sets the target for links in code
1766     *
1767     * @param string The target for links in the code, e.g. _blank
1768     * @since 1.0.3
1769     */
1770    function set_link_target($target) {
1771        if (!$target) {
1772            $this->link_target = '';
1773        } else {
1774            $this->link_target = ' target="' . $target . '"';
1775        }
1776    }
1777
1778    /**
1779     * Sets styles for important parts of the code
1780     *
1781     * @param string The styles to use on important parts of the code
1782     * @since 1.0.2
1783     */
1784    function set_important_styles($styles) {
1785        $this->important_styles = $styles;
1786    }
1787
1788    /**
1789     * Sets whether context-important blocks are highlighted
1790     *
1791     * @param boolean Tells whether to enable or disable highlighting of important blocks
1792     * @todo REMOVE THIS SHIZ FROM GESHI!
1793     * @deprecated
1794     * @since 1.0.2
1795     */
1796    function enable_important_blocks($flag) {
1797        $this->enable_important_blocks = ( $flag ) ? true : false;
1798    }
1799
1800    /**
1801     * Whether CSS IDs should be added to each line
1802     *
1803     * @param boolean If true, IDs will be added to each line.
1804     * @since 1.0.2
1805     */
1806    function enable_ids($flag = true) {
1807        $this->add_ids = ($flag) ? true : false;
1808    }
1809
1810    /**
1811     * Specifies which lines to highlight extra
1812     *
1813     * The extra style parameter was added in 1.0.7.21.
1814     *
1815     * @param mixed An array of line numbers to highlight, or just a line
1816     *              number on its own.
1817     * @param string A string specifying the style to use for this line.
1818     *              If null is specified, the default style is used.
1819     *              If false is specified, the line will be removed from
1820     *              special highlighting
1821     * @since 1.0.2
1822     * @todo  Some data replication here that could be cut down on
1823     */
1824    function highlight_lines_extra($lines, $style = null) {
1825        if (is_array($lines)) {
1826            //Split up the job using single lines at a time
1827            foreach ($lines as $line) {
1828                $this->highlight_lines_extra($line, $style);
1829            }
1830        } else {
1831            //Mark the line as being highlighted specially
1832            $lines = intval($lines);
1833            $this->highlight_extra_lines[$lines] = $lines;
1834
1835            //Decide on which style to use
1836            if ($style === null) { //Check if we should use default style
1837                unset($this->highlight_extra_lines_styles[$lines]);
1838            } elseif ($style === false) { //Check if to remove this line
1839                unset($this->highlight_extra_lines[$lines]);
1840                unset($this->highlight_extra_lines_styles[$lines]);
1841            } else {
1842                $this->highlight_extra_lines_styles[$lines] = $style;
1843            }
1844        }
1845    }
1846
1847    /**
1848     * Sets the style for extra-highlighted lines
1849     *
1850     * @param string The style for extra-highlighted lines
1851     * @since 1.0.2
1852     */
1853    function set_highlight_lines_extra_style($styles) {
1854        $this->highlight_extra_lines_style = $styles;
1855    }
1856
1857    /**
1858     * Sets the line-ending
1859     *
1860     * @param string The new line-ending
1861     * @since 1.0.2
1862     */
1863    function set_line_ending($line_ending) {
1864        $this->line_ending = (string)$line_ending;
1865    }
1866
1867    /**
1868     * Sets what number line numbers should start at. Should
1869     * be a positive integer, and will be converted to one.
1870     *
1871     * <b>Warning:</b> Using this method will add the "start"
1872     * attribute to the &lt;ol&gt; that is used for line numbering.
1873     * This is <b>not</b> valid XHTML strict, so if that's what you
1874     * care about then don't use this method. Firefox is getting
1875     * support for the CSS method of doing this in 1.1 and Opera
1876     * has support for the CSS method, but (of course) IE doesn't
1877     * so it's not worth doing it the CSS way yet.
1878     *
1879     * @param int The number to start line numbers at
1880     * @since 1.0.2
1881     */
1882    function start_line_numbers_at($number) {
1883        $this->line_numbers_start = abs(intval($number));
1884    }
1885
1886    /**
1887     * Sets the encoding used for htmlspecialchars(), for international
1888     * support.
1889     *
1890     * NOTE: This is not needed for now because htmlspecialchars() is not
1891     * being used (it has a security hole in PHP4 that has not been patched).
1892     * Maybe in a future version it may make a return for speed reasons, but
1893     * I doubt it.
1894     *
1895     * @param string The encoding to use for the source
1896     * @since 1.0.3
1897     */
1898    function set_encoding($encoding) {
1899        if ($encoding) {
1900          $this->encoding = strtolower($encoding);
1901        }
1902    }
1903
1904    /**
1905     * Turns linking of keywords on or off.
1906     *
1907     * @param boolean If true, links will be added to keywords
1908     * @since 1.0.2
1909     */
1910    function enable_keyword_links($enable = true) {
1911        $this->keyword_links = (bool) $enable;
1912    }
1913
1914    /**
1915     * Setup caches needed for styling. This is automatically called in
1916     * parse_code() and get_stylesheet() when appropriate. This function helps
1917     * stylesheet generators as they rely on some style information being
1918     * preprocessed
1919     *
1920     * @since 1.0.8
1921     * @access private
1922     */
1923    function build_style_cache() {
1924        //Build the style cache needed to highlight numbers appropriate
1925        if($this->lexic_permissions['NUMBERS']) {
1926            //First check what way highlighting information for numbers are given
1927            if(!isset($this->language_data['NUMBERS'])) {
1928                $this->language_data['NUMBERS'] = 0;
1929            }
1930
1931            if(is_array($this->language_data['NUMBERS'])) {
1932                $this->language_data['NUMBERS_CACHE'] = $this->language_data['NUMBERS'];
1933            } else {
1934                $this->language_data['NUMBERS_CACHE'] = array();
1935                if(!$this->language_data['NUMBERS']) {
1936                    $this->language_data['NUMBERS'] =
1937                        GESHI_NUMBER_INT_BASIC |
1938                        GESHI_NUMBER_FLT_NONSCI;
1939                }
1940
1941                for($i = 0, $j = $this->language_data['NUMBERS']; $j > 0; ++$i, $j>>=1) {
1942                    //Rearrange style indices if required ...
1943                    if(isset($this->language_data['STYLES']['NUMBERS'][1<<$i])) {
1944                        $this->language_data['STYLES']['NUMBERS'][$i] =
1945                            $this->language_data['STYLES']['NUMBERS'][1<<$i];
1946                        unset($this->language_data['STYLES']['NUMBERS'][1<<$i]);
1947                    }
1948
1949                    //Check if this bit is set for highlighting
1950                    if($j&1) {
1951                        //So this bit is set ...
1952                        //Check if it belongs to group 0 or the actual stylegroup
1953                        if(isset($this->language_data['STYLES']['NUMBERS'][$i])) {
1954                            $this->language_data['NUMBERS_CACHE'][$i] = 1 << $i;
1955                        } else {
1956                            if(!isset($this->language_data['NUMBERS_CACHE'][0])) {
1957                                $this->language_data['NUMBERS_CACHE'][0] = 0;
1958                            }
1959                            $this->language_data['NUMBERS_CACHE'][0] |= 1 << $i;
1960                        }
1961                    }
1962                }
1963            }
1964        }
1965    }
1966
1967    /**
1968     * Setup caches needed for parsing. This is automatically called in parse_code() when appropriate.
1969     * This function makes stylesheet generators much faster as they do not need these caches.
1970     *
1971     * @since 1.0.8
1972     * @access private
1973     */
1974    function build_parse_cache() {
1975        // cache symbol regexp
1976        //As this is a costy operation, we avoid doing it for multiple groups ...
1977        //Instead we perform it for all symbols at once.
1978        //
1979        //For this to work, we need to reorganize the data arrays.
1980        if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) {
1981            $this->language_data['MULTIPLE_SYMBOL_GROUPS'] = count($this->language_data['STYLES']['SYMBOLS']) > 1;
1982
1983            $this->language_data['SYMBOL_DATA'] = array();
1984            $symbol_preg_multi = array(); // multi char symbols
1985            $symbol_preg_single = array(); // single char symbols
1986            foreach ($this->language_data['SYMBOLS'] as $key => $symbols) {
1987                if (is_array($symbols)) {
1988                    foreach ($symbols as $sym) {
1989                        $sym = $this->hsc($sym);
1990                        if (!isset($this->language_data['SYMBOL_DATA'][$sym])) {
1991                            $this->language_data['SYMBOL_DATA'][$sym] = $key;
1992                            if (isset($sym[1])) { // multiple chars
1993                                $symbol_preg_multi[] = preg_quote($sym, '/');
1994                            } else { // single char
1995                                if ($sym == '-') {
1996                                    // don't trigger range out of order error
1997                                    $symbol_preg_single[] = '\-';
1998                                } else {
1999                                    $symbol_preg_single[] = preg_quote($sym, '/');
2000                                }
2001                            }
2002                        }
2003                    }
2004                } else {
2005                    $symbols = $this->hsc($symbols);
2006                    if (!isset($this->language_data['SYMBOL_DATA'][$symbols])) {
2007                        $this->language_data['SYMBOL_DATA'][$symbols] = 0;
2008                        if (isset($symbols[1])) { // multiple chars
2009                            $symbol_preg_multi[] = preg_quote($symbols, '/');
2010                        } elseif ($symbols == '-') {
2011                            // don't trigger range out of order error
2012                            $symbol_preg_single[] = '\-';
2013                        } else { // single char
2014                            $symbol_preg_single[] = preg_quote($symbols, '/');
2015                        }
2016                    }
2017                }
2018            }
2019
2020            //Now we have an array with each possible symbol as the key and the style as the actual data.
2021            //This way we can set the correct style just the moment we highlight ...
2022            //
2023            //Now we need to rewrite our array to get a search string that
2024            $symbol_preg = array();
2025            if (!empty($symbol_preg_multi)) {
2026                rsort($symbol_preg_multi);
2027                $symbol_preg[] = implode('|', $symbol_preg_multi);
2028            }
2029            if (!empty($symbol_preg_single)) {
2030                rsort($symbol_preg_single);
2031                $symbol_preg[] = '[' . implode('', $symbol_preg_single) . ']';
2032            }
2033            $this->language_data['SYMBOL_SEARCH'] = implode("|", $symbol_preg);
2034        }
2035
2036        // cache optimized regexp for keyword matching
2037        // remove old cache
2038        $this->language_data['CACHED_KEYWORD_LISTS'] = array();
2039        foreach (array_keys($this->language_data['KEYWORDS']) as $key) {
2040            if (!isset($this->lexic_permissions['KEYWORDS'][$key]) ||
2041                    $this->lexic_permissions['KEYWORDS'][$key]) {
2042                $this->optimize_keyword_group($key);
2043            }
2044        }
2045
2046        // brackets
2047        if ($this->lexic_permissions['BRACKETS']) {
2048            $this->language_data['CACHE_BRACKET_MATCH'] = array('[', ']', '(', ')', '{', '}');
2049            if (!$this->use_classes && isset($this->language_data['STYLES']['BRACKETS'][0])) {
2050                $this->language_data['CACHE_BRACKET_REPLACE'] = array(
2051                    '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#91;|>',
2052                    '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#93;|>',
2053                    '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#40;|>',
2054                    '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#41;|>',
2055                    '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#123;|>',
2056                    '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#125;|>',
2057                );
2058            }
2059            else {
2060                $this->language_data['CACHE_BRACKET_REPLACE'] = array(
2061                    '<| class="br0">&#91;|>',
2062                    '<| class="br0">&#93;|>',
2063                    '<| class="br0">&#40;|>',
2064                    '<| class="br0">&#41;|>',
2065                    '<| class="br0">&#123;|>',
2066                    '<| class="br0">&#125;|>',
2067                );
2068            }
2069        }
2070
2071        //Build the parse cache needed to highlight numbers appropriate
2072        if($this->lexic_permissions['NUMBERS']) {
2073            //Check if the style rearrangements have been processed ...
2074            //This also does some preprocessing to check which style groups are useable ...
2075            if(!isset($this->language_data['NUMBERS_CACHE'])) {
2076                $this->build_style_cache();
2077            }
2078
2079            //Number format specification
2080            //All this formats are matched case-insensitively!
2081            static $numbers_format = array(
2082                GESHI_NUMBER_INT_BASIC =>
2083                    '(?:(?<![0-9a-z_\.%$@])|(?<=\.\.))(?<![\d\.]e[+\-])([1-9]\d*?|0)(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2084                GESHI_NUMBER_INT_CSTYLE =>
2085                    '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])([1-9]\d*?|0)l(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2086                GESHI_NUMBER_BIN_SUFFIX =>
2087                    '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[01]+?[bB](?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2088                GESHI_NUMBER_BIN_PREFIX_PERCENT =>
2089                    '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])%[01]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2090                GESHI_NUMBER_BIN_PREFIX_0B =>
2091                    '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0b[01]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2092                GESHI_NUMBER_OCT_PREFIX =>
2093                    '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2094                GESHI_NUMBER_OCT_PREFIX_0O =>
2095                    '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0o[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2096                GESHI_NUMBER_OCT_PREFIX_AT =>
2097                    '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])\@[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2098                GESHI_NUMBER_OCT_SUFFIX =>
2099                    '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[0-7]+?o(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2100                GESHI_NUMBER_HEX_PREFIX =>
2101                    '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0x[0-9a-fA-F]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2102                GESHI_NUMBER_HEX_PREFIX_DOLLAR =>
2103                    '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\$[0-9a-fA-F]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2104                GESHI_NUMBER_HEX_SUFFIX =>
2105                    '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d[0-9a-fA-F]*?[hH](?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2106                GESHI_NUMBER_FLT_NONSCI =>
2107                    '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d+?\.\d+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2108                GESHI_NUMBER_FLT_NONSCI_F =>
2109                    '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)f(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2110                GESHI_NUMBER_FLT_SCI_SHORT =>
2111                    '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\.\d+?(?:e[+\-]?\d+?)?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2112                GESHI_NUMBER_FLT_SCI_ZERO =>
2113                    '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)(?:e[+\-]?\d+?)?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)'
2114                );
2115
2116            //At this step we have an associative array with flag groups for a
2117            //specific style or an string denoting a regexp given its index.
2118            $this->language_data['NUMBERS_RXCACHE'] = array();
2119            foreach($this->language_data['NUMBERS_CACHE'] as $key => $rxdata) {
2120                if(is_string($rxdata)) {
2121                    $regexp = $rxdata;
2122                } else {
2123                    //This is a bitfield of number flags to highlight:
2124                    //Build an array, implode them together and make this the actual RX
2125                    $rxuse = array();
2126                    for($i = 1; $i <= $rxdata; $i<<=1) {
2127                        if($rxdata & $i) {
2128                            $rxuse[] = $numbers_format[$i];
2129                        }
2130                    }
2131                    $regexp = implode("|", $rxuse);
2132                }
2133
2134                $this->language_data['NUMBERS_RXCACHE'][$key] =
2135                    "/(?<!<\|\/)(?<!<\|!REG3XP)(?<!<\|\/NUM!)(?<!\d\/>)($regexp)(?!(?:<DOT>|(?>[^\<]))+>)(?![^<]*>)(?!\|>)(?!\/>)/i"; //
2136            }
2137
2138            if(!isset($this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'])) {
2139                $this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'] = '#\d#';
2140            }
2141        }
2142
2143        $this->parse_cache_built = true;
2144    }
2145
2146    /**
2147     * Returns the code in $this->source, highlighted and surrounded by the
2148     * nessecary HTML.
2149     *
2150     * This should only be called ONCE, cos it's SLOW! If you want to highlight
2151     * the same source multiple times, you're better off doing a whole lot of
2152     * str_replaces to replace the &lt;span&gt;s
2153     *
2154     * @since 1.0.0
2155     */
2156    function parse_code () {
2157        // Start the timer
2158        $start_time = microtime();
2159
2160        // Replace all newlines to a common form.
2161        $code = str_replace("\r\n", "\n", $this->source);
2162        $code = str_replace("\r", "\n", $code);
2163
2164        // Firstly, if there is an error, we won't highlight
2165        if ($this->error) {
2166            //Escape the source for output
2167            $result = $this->hsc($this->source);
2168
2169            //This fix is related to SF#1923020, but has to be applied regardless of
2170            //actually highlighting symbols.
2171            $result = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $result);
2172
2173            // Timing is irrelevant
2174            $this->set_time($start_time, $start_time);
2175            $this->finalise($result);
2176            return $result;
2177        }
2178
2179        // make sure the parse cache is up2date
2180        if (!$this->parse_cache_built) {
2181            $this->build_parse_cache();
2182        }
2183
2184        // Initialise various stuff
2185        $length           = strlen($code);
2186        $COMMENT_MATCHED  = false;
2187        $stuff_to_parse   = '';
2188        $endresult        = '';
2189
2190        // "Important" selections are handled like multiline comments
2191        // @todo GET RID OF THIS SHIZ
2192        if ($this->enable_important_blocks) {
2193            $this->language_data['COMMENT_MULTI'][GESHI_START_IMPORTANT] = GESHI_END_IMPORTANT;
2194        }
2195
2196        if ($this->strict_mode) {
2197            // Break the source into bits. Each bit will be a portion of the code
2198            // within script delimiters - for example, HTML between < and >
2199            $k = 0;
2200            $parts = array();
2201            $matches = array();
2202            $next_match_pointer = null;
2203            // we use a copy to unset delimiters on demand (when they are not found)
2204            $delim_copy = $this->language_data['SCRIPT_DELIMITERS'];
2205            $i = 0;
2206            while ($i < $length) {
2207                $next_match_pos = $length + 1; // never true
2208                foreach ($delim_copy as $dk => $delimiters) {
2209                    if(is_array($delimiters)) {
2210                        foreach ($delimiters as $open => $close) {
2211                            // make sure the cache is setup properly
2212                            if (!isset($matches[$dk][$open])) {
2213                                $matches[$dk][$open] = array(
2214                                    'next_match' => -1,
2215                                    'dk' => $dk,
2216
2217                                    'open' => $open, // needed for grouping of adjacent code blocks (see below)
2218                                    'open_strlen' => strlen($open),
2219
2220                                    'close' => $close,
2221                                    'close_strlen' => strlen($close),
2222                                );
2223                            }
2224                            // Get the next little bit for this opening string
2225                            if ($matches[$dk][$open]['next_match'] < $i) {
2226                                // only find the next pos if it was not already cached
2227                                $open_pos = strpos($code, $open, $i);
2228                                if ($open_pos === false) {
2229                                    // no match for this delimiter ever
2230                                    unset($delim_copy[$dk][$open]);
2231                                    continue;
2232                                }
2233                                $matches[$dk][$open]['next_match'] = $open_pos;
2234                            }
2235                            if ($matches[$dk][$open]['next_match'] < $next_match_pos) {
2236                                //So we got a new match, update the close_pos
2237                                $matches[$dk][$open]['close_pos'] =
2238                                    strpos($code, $close, $matches[$dk][$open]['next_match']+1);
2239
2240                                $next_match_pointer =& $matches[$dk][$open];
2241                                $next_match_pos = $matches[$dk][$open]['next_match'];
2242                            }
2243                        }
2244                    } else {
2245                        //So we should match an RegExp as Strict Block ...
2246                        /**
2247                         * The value in $delimiters is expected to be an RegExp
2248                         * containing exactly 2 matching groups:
2249                         *  - Group 1 is the opener
2250                         *  - Group 2 is the closer
2251                         */
2252                        if(!GESHI_PHP_PRE_433 && //Needs proper rewrite to work with PHP >=4.3.0; 4.3.3 is guaranteed to work.
2253                            preg_match($delimiters, $code, $matches_rx, PREG_OFFSET_CAPTURE, $i)) {
2254                            //We got a match ...
2255                            if(isset($matches_rx['start']) && isset($matches_rx['end']))
2256                            {
2257                                $matches[$dk] = array(
2258                                    'next_match' => $matches_rx['start'][1],
2259                                    'dk' => $dk,
2260
2261                                    'close_strlen' => strlen($matches_rx['end'][0]),
2262                                    'close_pos' => $matches_rx['end'][1],
2263                                    );
2264                            } else {
2265                                $matches[$dk] = array(
2266                                    'next_match' => $matches_rx[1][1],
2267                                    'dk' => $dk,
2268
2269                                    'close_strlen' => strlen($matches_rx[2][0]),
2270                                    'close_pos' => $matches_rx[2][1],
2271                                    );
2272                            }
2273                        } else {
2274                            // no match for this delimiter ever
2275                            unset($delim_copy[$dk]);
2276                            continue;
2277                        }
2278
2279                        if ($matches[$dk]['next_match'] <= $next_match_pos) {
2280                            $next_match_pointer =& $matches[$dk];
2281                            $next_match_pos = $matches[$dk]['next_match'];
2282                        }
2283                    }
2284                }
2285
2286                // non-highlightable text
2287                $parts[$k] = array(
2288                    1 => substr($code, $i, $next_match_pos - $i)
2289                );
2290                ++$k;
2291
2292                if ($next_match_pos > $length) {
2293                    // out of bounds means no next match was found
2294                    break;
2295                }
2296
2297                // highlightable code
2298                $parts[$k][0] = $next_match_pointer['dk'];
2299
2300                //Only combine for non-rx script blocks
2301                if(is_array($delim_copy[$next_match_pointer['dk']])) {
2302                    // group adjacent script blocks, e.g. <foobar><asdf> should be one block, not three!
2303                    $i = $next_match_pos + $next_match_pointer['open_strlen'];
2304                    while (true) {
2305                        $close_pos = strpos($code, $next_match_pointer['close'], $i);
2306                        if ($close_pos == false) {
2307                            break;
2308                        }
2309                        $i = $close_pos + $next_match_pointer['close_strlen'];
2310                        if ($i == $length) {
2311                            break;
2312                        }
2313                        if ($code[$i] == $next_match_pointer['open'][0] && ($next_match_pointer['open_strlen'] == 1 ||
2314                            substr($code, $i, $next_match_pointer['open_strlen']) == $next_match_pointer['open'])) {
2315                            // merge adjacent but make sure we don't merge things like <tag><!-- comment -->
2316                            foreach ($matches as $submatches) {
2317                                foreach ($submatches as $match) {
2318                                    if ($match['next_match'] == $i) {
2319                                        // a different block already matches here!
2320                                        break 3;
2321                                    }
2322                                }
2323                            }
2324                        } else {
2325                            break;
2326                        }
2327                    }
2328                } else {
2329                    $close_pos = $next_match_pointer['close_pos'] + $next_match_pointer['close_strlen'];
2330                    $i = $close_pos;
2331                }
2332
2333                if ($close_pos === false) {
2334                    // no closing delimiter found!
2335                    $parts[$k][1] = substr($code, $next_match_pos);
2336                    ++$k;
2337                    break;
2338                } else {
2339                    $parts[$k][1] = substr($code, $next_match_pos, $i - $next_match_pos);
2340                    ++$k;
2341                }
2342            }
2343            unset($delim_copy, $next_match_pointer, $next_match_pos, $matches);
2344            $num_parts = $k;
2345
2346            if ($num_parts == 1 && $this->strict_mode == GESHI_MAYBE) {
2347                // when we have only one part, we don't have anything to highlight at all.
2348                // if we have a "maybe" strict language, this should be handled as highlightable code
2349                $parts = array(
2350                    0 => array(
2351                        0 => '',
2352                        1 => ''
2353                    ),
2354                    1 => array(
2355                        0 => null,
2356                        1 => $parts[0][1]
2357                    )
2358                );
2359                $num_parts = 2;
2360            }
2361
2362        } else {
2363            // Not strict mode - simply dump the source into
2364            // the array at index 1 (the first highlightable block)
2365            $parts = array(
2366                0 => array(
2367                    0 => '',
2368                    1 => ''
2369                ),
2370                1 => array(
2371                    0 => null,
2372                    1 => $code
2373                )
2374            );
2375            $num_parts = 2;
2376        }
2377
2378        //Unset variables we won't need any longer
2379        unset($code);
2380
2381        //Preload some repeatedly used values regarding hardquotes ...
2382        $hq = isset($this->language_data['HARDQUOTE']) ? $this->language_data['HARDQUOTE'][0] : false;
2383        $hq_strlen = strlen($hq);
2384
2385        //Preload if line numbers are to be generated afterwards
2386        //Added a check if line breaks should be forced even without line numbers, fixes SF#1727398
2387        $check_linenumbers = $this->line_numbers != GESHI_NO_LINE_NUMBERS ||
2388            !empty($this->highlight_extra_lines) || !$this->allow_multiline_span;
2389
2390        //preload the escape char for faster checking ...
2391        $escaped_escape_char = $this->hsc($this->language_data['ESCAPE_CHAR']);
2392
2393        // this is used for single-line comments
2394        $sc_disallowed_before = "";
2395        $sc_disallowed_after = "";
2396
2397        if (isset($this->language_data['PARSER_CONTROL'])) {
2398            if (isset($this->language_data['PARSER_CONTROL']['COMMENTS'])) {
2399                if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'])) {
2400                    $sc_disallowed_before = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'];
2401                }
2402                if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'])) {
2403                    $sc_disallowed_after = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'];
2404                }
2405            }
2406        }
2407
2408        //Fix for SF#1932083: Multichar Quotemarks unsupported
2409        $is_string_starter = array();
2410        if ($this->lexic_permissions['STRINGS']) {
2411            foreach ($this->language_data['QUOTEMARKS'] as $quotemark) {
2412                if (!isset($is_string_starter[$quotemark[0]])) {
2413                    $is_string_starter[$quotemark[0]] = (string)$quotemark;
2414                } elseif (is_string($is_string_starter[$quotemark[0]])) {
2415                    $is_string_starter[$quotemark[0]] = array(
2416                        $is_string_starter[$quotemark[0]],
2417                        $quotemark);
2418                } else {
2419                    $is_string_starter[$quotemark[0]][] = $quotemark;
2420                }
2421            }
2422        }
2423
2424        // Now we go through each part. We know that even-indexed parts are
2425        // code that shouldn't be highlighted, and odd-indexed parts should
2426        // be highlighted
2427        for ($key = 0; $key < $num_parts; ++$key) {
2428            $STRICTATTRS = '';
2429
2430            // If this block should be highlighted...
2431            if (!($key & 1)) {
2432                // Else not a block to highlight
2433                $endresult .= $this->hsc($parts[$key][1]);
2434                unset($parts[$key]);
2435                continue;
2436            }
2437
2438            $result = '';
2439            $part = $parts[$key][1];
2440
2441            $highlight_part = true;
2442            if ($this->strict_mode && !is_null($parts[$key][0])) {
2443                // get the class key for this block of code
2444                $script_key = $parts[$key][0];
2445                $highlight_part = $this->language_data['HIGHLIGHT_STRICT_BLOCK'][$script_key];
2446                if ($this->language_data['STYLES']['SCRIPT'][$script_key] != '' &&
2447                    $this->lexic_permissions['SCRIPT']) {
2448                    // Add a span element around the source to
2449                    // highlight the overall source block
2450                    if (!$this->use_classes &&
2451                        $this->language_data['STYLES']['SCRIPT'][$script_key] != '') {
2452                        $attributes = ' style="' . $this->language_data['STYLES']['SCRIPT'][$script_key] . '"';
2453                    } else {
2454                        $attributes = ' class="sc' . $script_key . '"';
2455                    }
2456                    $result .= "<span$attributes>";
2457                    $STRICTATTRS = $attributes;
2458                }
2459            }
2460
2461            if ($highlight_part) {
2462                // Now, highlight the code in this block. This code
2463                // is really the engine of GeSHi (along with the method
2464                // parse_non_string_part).
2465
2466                // cache comment regexps incrementally
2467                $next_comment_regexp_key = '';
2468                $next_comment_regexp_pos = -1;
2469                $next_comment_multi_pos = -1;
2470                $next_comment_single_pos = -1;
2471                $comment_regexp_cache_per_key = array();
2472                $comment_multi_cache_per_key = array();
2473                $comment_single_cache_per_key = array();
2474                $next_open_comment_multi = '';
2475                $next_comment_single_key = '';
2476                $escape_regexp_cache_per_key = array();
2477                $next_escape_regexp_key = '';
2478                $next_escape_regexp_pos = -1;
2479
2480                $length = strlen($part);
2481                for ($i = 0; $i < $length; ++$i) {
2482                    // Get the next char
2483                    $char = $part[$i];
2484                    $char_len = 1;
2485
2486                    // update regexp comment cache if needed
2487                    if (isset($this->language_data['COMMENT_REGEXP']) && $next_comment_regexp_pos < $i) {
2488                        $next_comment_regexp_pos = $length;
2489                        foreach ($this->language_data['COMMENT_REGEXP'] as $comment_key => $regexp) {
2490                            $match_i = false;
2491                            if (isset($comment_regexp_cache_per_key[$comment_key]) &&
2492                                ($comment_regexp_cache_per_key[$comment_key]['pos'] >= $i ||
2493                                 $comment_regexp_cache_per_key[$comment_key]['pos'] === false)) {
2494                                // we have already matched something
2495                                if ($comment_regexp_cache_per_key[$comment_key]['pos'] === false) {
2496                                    // this comment is never matched
2497                                    continue;
2498                                }
2499                                $match_i = $comment_regexp_cache_per_key[$comment_key]['pos'];
2500                            } elseif (
2501                                //This is to allow use of the offset parameter in preg_match and stay as compatible with older PHP versions as possible
2502                                (GESHI_PHP_PRE_433 && preg_match($regexp, substr($part, $i), $match, PREG_OFFSET_CAPTURE)) ||
2503                                (!GESHI_PHP_PRE_433 && preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $i))
2504                                ) {
2505                                $match_i = $match[0][1];
2506                                if (GESHI_PHP_PRE_433) {
2507                                    $match_i += $i;
2508                                }
2509
2510                                $comment_regexp_cache_per_key[$comment_key] = array(
2511                                    'key' => $comment_key,
2512                                    'length' => strlen($match[0][0]),
2513                                    'pos' => $match_i
2514                                );
2515                            } else {
2516                                $comment_regexp_cache_per_key[$comment_key]['pos'] = false;
2517                                continue;
2518                            }
2519
2520                            if ($match_i !== false && $match_i < $next_comment_regexp_pos) {
2521                                $next_comment_regexp_pos = $match_i;
2522                                $next_comment_regexp_key = $comment_key;
2523                                if ($match_i === $i) {
2524                                    break;
2525                                }
2526                            }
2527                        }
2528                    }
2529
2530                    $string_started = false;
2531
2532                    if (isset($is_string_starter[$char])) {
2533                        // Possibly the start of a new string ...
2534
2535                        //Check which starter it was ...
2536                        //Fix for SF#1932083: Multichar Quotemarks unsupported
2537                        if (is_array($is_string_starter[$char])) {
2538                            $char_new = '';
2539                            foreach ($is_string_starter[$char] as $testchar) {
2540                                if ($testchar === substr($part, $i, strlen($testchar)) &&
2541                                    strlen($testchar) > strlen($char_new)) {
2542                                    $char_new = $testchar;
2543                                    $string_started = true;
2544                                }
2545                            }
2546                            if ($string_started) {
2547                                $char = $char_new;
2548                            }
2549                        } else {
2550                            $testchar = $is_string_starter[$char];
2551                            if ($testchar === substr($part, $i, strlen($testchar))) {
2552                                $char = $testchar;
2553                                $string_started = true;
2554                            }
2555                        }
2556                        $char_len = strlen($char);
2557                    }
2558
2559                    if ($string_started && ($i != $next_comment_regexp_pos)) {
2560                        // Hand out the correct style information for this string
2561                        $string_key = array_search($char, $this->language_data['QUOTEMARKS']);
2562                        if (!isset($this->language_data['STYLES']['STRINGS'][$string_key]) ||
2563                            !isset($this->language_data['STYLES']['ESCAPE_CHAR'][$string_key])) {
2564                            $string_key = 0;
2565                        }
2566
2567                        // parse the stuff before this
2568                        $result .= $this->parse_non_string_part($stuff_to_parse);
2569                        $stuff_to_parse = '';
2570
2571                        if (!$this->use_classes) {
2572                            $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS'][$string_key] . '"';
2573                        } else {
2574                            $string_attributes = ' class="st'.$string_key.'"';
2575                        }
2576
2577                        // now handle the string
2578                        $string = "<span$string_attributes>" . GeSHi::hsc($char);
2579                        $start = $i + $char_len;
2580                        $string_open = true;
2581
2582                        if(empty($this->language_data['ESCAPE_REGEXP'])) {
2583                            $next_escape_regexp_pos = $length;
2584                        }
2585
2586                        do {
2587                            //Get the regular ending pos ...
2588                            $close_pos = strpos($part, $char, $start);
2589                            if(false === $close_pos) {
2590                                $close_pos = $length;
2591                            }
2592
2593                            if($this->lexic_permissions['ESCAPE_CHAR']) {
2594                                // update escape regexp cache if needed
2595                                if (isset($this->language_data['ESCAPE_REGEXP']) && $next_escape_regexp_pos < $start) {
2596                                    $next_escape_regexp_pos = $length;
2597                                    foreach ($this->language_data['ESCAPE_REGEXP'] as $escape_key => $regexp) {
2598                                        $match_i = false;
2599                                        if (isset($escape_regexp_cache_per_key[$escape_key]) &&
2600                                            ($escape_regexp_cache_per_key[$escape_key]['pos'] >= $start ||
2601                                             $escape_regexp_cache_per_key[$escape_key]['pos'] === false)) {
2602                                            // we have already matched something
2603                                            if ($escape_regexp_cache_per_key[$escape_key]['pos'] === false) {
2604                                                // this comment is never matched
2605                                                continue;
2606                                            }
2607                                            $match_i = $escape_regexp_cache_per_key[$escape_key]['pos'];
2608                                        } elseif (
2609                                            //This is to allow use of the offset parameter in preg_match and stay as compatible with older PHP versions as possible
2610                                            (GESHI_PHP_PRE_433 && preg_match($regexp, substr($part, $start), $match, PREG_OFFSET_CAPTURE)) ||
2611                                            (!GESHI_PHP_PRE_433 && preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $start))
2612                                            ) {
2613                                            $match_i = $match[0][1];
2614                                            if (GESHI_PHP_PRE_433) {
2615                                                $match_i += $start;
2616                                            }
2617
2618                                            $escape_regexp_cache_per_key[$escape_key] = array(
2619                                                'key' => $escape_key,
2620                                                'length' => strlen($match[0][0]),
2621                                                'pos' => $match_i
2622                                            );
2623                                        } else {
2624                                            $escape_regexp_cache_per_key[$escape_key]['pos'] = false;
2625                                            continue;
2626                                        }
2627
2628                                        if ($match_i !== false && $match_i < $next_escape_regexp_pos) {
2629                                            $next_escape_regexp_pos = $match_i;
2630                                            $next_escape_regexp_key = $escape_key;
2631                                            if ($match_i === $start) {
2632                                                break;
2633                                            }
2634                                        }
2635                                    }
2636                                }
2637
2638                                //Find the next simple escape position
2639                                if('' != $this->language_data['ESCAPE_CHAR']) {
2640                                    $simple_escape = strpos($part, $this->language_data['ESCAPE_CHAR'], $start);
2641                                    if(false === $simple_escape) {
2642                                        $simple_escape = $length;
2643                                    }
2644                                } else {
2645                                    $simple_escape = $length;
2646                                }
2647                            } else {
2648                                $next_escape_regexp_pos = $length;
2649                                $simple_escape = $length;
2650                            }
2651
2652                            if($simple_escape < $next_escape_regexp_pos &&
2653                                $simple_escape < $length &&
2654                                $simple_escape < $close_pos) {
2655                                //The nexxt escape sequence is a simple one ...
2656                                $es_pos = $simple_escape;
2657
2658                                //Add the stuff not in the string yet ...
2659                                $string .= $this->hsc(substr($part, $start, $es_pos - $start));
2660
2661                                //Get the style for this escaped char ...
2662                                if (!$this->use_classes) {
2663                                    $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][0] . '"';
2664                                } else {
2665                                    $escape_char_attributes = ' class="es0"';
2666                                }
2667
2668                                //Add the style for the escape char ...
2669                                $string .= "<span$escape_char_attributes>" .
2670                                    GeSHi::hsc($this->language_data['ESCAPE_CHAR']);
2671
2672                                //Get the byte AFTER the ESCAPE_CHAR we just found
2673                                $es_char = $part[$es_pos + 1];
2674                                if ($es_char == "\n") {
2675                                    // don't put a newline around newlines
2676                                    $string .= "</span>\n";
2677                                    $start = $es_pos + 2;
2678                                } elseif (ord($es_char) >= 128) {
2679                                    //This is an non-ASCII char (UTF8 or single byte)
2680                                    //This code tries to work around SF#2037598 ...
2681                                    if(function_exists('mb_substr')) {
2682                                        $es_char_m = mb_substr(substr($part, $es_pos+1, 16), 0, 1, $this->encoding);
2683                                        $string .= $es_char_m . '</span>';
2684                                    } elseif (!GESHI_PHP_PRE_433 && 'utf-8' == $this->encoding) {
2685                                        if(preg_match("/[\xC2-\xDF][\x80-\xBF]".
2686                                            "|\xE0[\xA0-\xBF][\x80-\xBF]".
2687                                            "|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}".
2688                                            "|\xED[\x80-\x9F][\x80-\xBF]".
2689                                            "|\xF0[\x90-\xBF][\x80-\xBF]{2}".
2690                                            "|[\xF1-\xF3][\x80-\xBF]{3}".
2691                                            "|\xF4[\x80-\x8F][\x80-\xBF]{2}/s",
2692                                            $part, $es_char_m, null, $es_pos + 1)) {
2693                                            $es_char_m = $es_char_m[0];
2694                                        } else {
2695                                            $es_char_m = $es_char;
2696                                        }
2697                                        $string .= $this->hsc($es_char_m) . '</span>';
2698                                    } else {
2699                                        $es_char_m = $this->hsc($es_char);
2700                                    }
2701                                    $start = $es_pos + strlen($es_char_m) + 1;
2702                                } else {
2703                                    $string .= $this->hsc($es_char) . '</span>';
2704                                    $start = $es_pos + 2;
2705                                }
2706                            } elseif ($next_escape_regexp_pos < $length &&
2707                                $next_escape_regexp_pos < $close_pos) {
2708                                $es_pos = $next_escape_regexp_pos;
2709                                //Add the stuff not in the string yet ...
2710                                $string .= $this->hsc(substr($part, $start, $es_pos - $start));
2711
2712                                //Get the key and length of this match ...
2713                                $escape = $escape_regexp_cache_per_key[$next_escape_regexp_key];
2714                                $escape_str = substr($part, $es_pos, $escape['length']);
2715                                $escape_key = $escape['key'];
2716
2717                                //Get the style for this escaped char ...
2718                                if (!$this->use_classes) {
2719                                    $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][$escape_key] . '"';
2720                                } else {
2721                                    $escape_char_attributes = ' class="es' . $escape_key . '"';
2722                                }
2723
2724                                //Add the style for the escape char ...
2725                                $string .= "<span$escape_char_attributes>" .
2726                                    $this->hsc($escape_str) . '</span>';
2727
2728                                $start = $es_pos + $escape['length'];
2729                            } else {
2730                                //Copy the remainder of the string ...
2731                                $string .= $this->hsc(substr($part, $start, $close_pos - $start + $char_len)) . '</span>';
2732                                $start = $close_pos + $char_len;
2733                                $string_open = false;
2734                            }
2735                        } while($string_open);
2736
2737                        if ($check_linenumbers) {
2738                            // Are line numbers used? If, we should end the string before
2739                            // the newline and begin it again (so when <li>s are put in the source
2740                            // remains XHTML compliant)
2741                            // note to self: This opens up possibility of config files specifying
2742                            // that languages can/cannot have multiline strings???
2743                            $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
2744                        }
2745
2746                        $result .= $string;
2747                        $string = '';
2748                        $i = $start - 1;
2749                        continue;
2750                    } elseif ($this->lexic_permissions['STRINGS'] && $hq && $hq[0] == $char &&
2751                        substr($part, $i, $hq_strlen) == $hq && ($i != $next_comment_regexp_pos)) {
2752                        // The start of a hard quoted string
2753                        if (!$this->use_classes) {
2754                            $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS']['HARD'] . '"';
2755                            $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR']['HARD'] . '"';
2756                        } else {
2757                            $string_attributes = ' class="st_h"';
2758                            $escape_char_attributes = ' class="es_h"';
2759                        }
2760                        // parse the stuff before this
2761                        $result .= $this->parse_non_string_part($stuff_to_parse);
2762                        $stuff_to_parse = '';
2763
2764                        // now handle the string
2765                        $string = '';
2766
2767                        // look for closing quote
2768                        $start = $i + $hq_strlen;
2769                        while ($close_pos = strpos($part, $this->language_data['HARDQUOTE'][1], $start)) {
2770                            $start = $close_pos + 1;
2771                            if ($this->lexic_permissions['ESCAPE_CHAR'] && $part[$close_pos - 1] == $this->language_data['HARDCHAR'] &&
2772                                (($i + $hq_strlen) != ($close_pos))) { //Support empty string for HQ escapes if Starter = Escape
2773                                // make sure this quote is not escaped
2774                                foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
2775                                    if (substr($part, $close_pos - 1, strlen($hardescape)) == $hardescape) {
2776                                        // check wether this quote is escaped or if it is something like '\\'
2777                                        $escape_char_pos = $close_pos - 1;
2778                                        while ($escape_char_pos > 0
2779                                                && $part[$escape_char_pos - 1] == $this->language_data['HARDCHAR']) {
2780                                            --$escape_char_pos;
2781                                        }
2782                                        if (($close_pos - $escape_char_pos) & 1) {
2783                                            // uneven number of escape chars => this quote is escaped
2784                                            continue 2;
2785                                        }
2786                                    }
2787                                }
2788                            }
2789
2790                            // found closing quote
2791                            break;
2792                        }
2793
2794                        //Found the closing delimiter?
2795                        if (!$close_pos) {
2796                            // span till the end of this $part when no closing delimiter is found
2797                            $close_pos = $length;
2798                        }
2799
2800                        //Get the actual string
2801                        $string = substr($part, $i, $close_pos - $i + 1);
2802                        $i = $close_pos;
2803
2804                        // handle escape chars and encode html chars
2805                        // (special because when we have escape chars within our string they may not be escaped)
2806                        if ($this->lexic_permissions['ESCAPE_CHAR'] && $this->language_data['ESCAPE_CHAR']) {
2807                            $start = 0;
2808                            $new_string = '';
2809                            while ($es_pos = strpos($string, $this->language_data['ESCAPE_CHAR'], $start)) {
2810                                // hmtl escape stuff before
2811                                $new_string .= $this->hsc(substr($string, $start, $es_pos - $start));
2812                                // check if this is a hard escape
2813                                foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
2814                                    if (substr($string, $es_pos, strlen($hardescape)) == $hardescape) {
2815                                        // indeed, this is a hardescape
2816                                        $new_string .= "<span$escape_char_attributes>" .
2817                                            $this->hsc($hardescape) . '</span>';
2818                                        $start = $es_pos + strlen($hardescape);
2819                                        continue 2;
2820                                    }
2821                                }
2822                                // not a hard escape, but a normal escape
2823                                // they come in pairs of two
2824                                $c = 0;
2825                                while (isset($string[$es_pos + $c]) && isset($string[$es_pos + $c + 1])
2826                                    && $string[$es_pos + $c] == $this->language_data['ESCAPE_CHAR']
2827                                    && $string[$es_pos + $c + 1] == $this->language_data['ESCAPE_CHAR']) {
2828                                    $c += 2;
2829                                }
2830                                if ($c) {
2831                                    $new_string .= "<span$escape_char_attributes>" .
2832                                        str_repeat($escaped_escape_char, $c) .
2833                                        '</span>';
2834                                    $start = $es_pos + $c;
2835                                } else {
2836                                    // this is just a single lonely escape char...
2837                                    $new_string .= $escaped_escape_char;
2838                                    $start = $es_pos + 1;
2839                                }
2840                            }
2841                            $string = $new_string . $this->hsc(substr($string, $start));
2842                        } else {
2843                            $string = $this->hsc($string);
2844                        }
2845
2846                        if ($check_linenumbers) {
2847                            // Are line numbers used? If, we should end the string before
2848                            // the newline and begin it again (so when <li>s are put in the source
2849                            // remains XHTML compliant)
2850                            // note to self: This opens up possibility of config files specifying
2851                            // that languages can/cannot have multiline strings???
2852                            $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
2853                        }
2854
2855                        $result .= "<span$string_attributes>" . $string . '</span>';
2856                        $string = '';
2857                        continue;
2858                    } else {
2859                        //Have a look for regexp comments
2860                        if ($i == $next_comment_regexp_pos) {
2861                            $COMMENT_MATCHED = true;
2862                            $comment = $comment_regexp_cache_per_key[$next_comment_regexp_key];
2863                            $test_str = $this->hsc(substr($part, $i, $comment['length']));
2864
2865                            //@todo If remove important do remove here
2866                            if ($this->lexic_permissions['COMMENTS']['MULTI']) {
2867                                if (!$this->use_classes) {
2868                                    $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment['key']] . '"';
2869                                } else {
2870                                    $attributes = ' class="co' . $comment['key'] . '"';
2871                                }
2872
2873                                $test_str = "<span$attributes>" . $test_str . "</span>";
2874
2875                                // Short-cut through all the multiline code
2876                                if ($check_linenumbers) {
2877                                    // strreplace to put close span and open span around multiline newlines
2878                                    $test_str = str_replace(
2879                                        "\n", "</span>\n<span$attributes>",
2880                                        str_replace("\n ", "\n&nbsp;", $test_str)
2881                                    );
2882                                }
2883                            }
2884
2885                            $i += $comment['length'] - 1;
2886
2887                            // parse the rest
2888                            $result .= $this->parse_non_string_part($stuff_to_parse);
2889                            $stuff_to_parse = '';
2890                        }
2891
2892                        // If we haven't matched a regexp comment, try multi-line comments
2893                        if (!$COMMENT_MATCHED) {
2894                            // Is this a multiline comment?
2895                            if (!empty($this->language_data['COMMENT_MULTI']) && $next_comment_multi_pos < $i) {
2896                                $next_comment_multi_pos = $length;
2897                                foreach ($this->language_data['COMMENT_MULTI'] as $open => $close) {
2898                                    $match_i = false;
2899                                    if (isset($comment_multi_cache_per_key[$open]) &&
2900                                        ($comment_multi_cache_per_key[$open] >= $i ||
2901                                         $comment_multi_cache_per_key[$open] === false)) {
2902                                        // we have already matched something
2903                                        if ($comment_multi_cache_per_key[$open] === false) {
2904                                            // this comment is never matched
2905                                            continue;
2906                                        }
2907                                        $match_i = $comment_multi_cache_per_key[$open];
2908                                    } elseif (($match_i = stripos($part, $open, $i)) !== false) {
2909                                        $comment_multi_cache_per_key[$open] = $match_i;
2910                                    } else {
2911                                        $comment_multi_cache_per_key[$open] = false;
2912                                        continue;
2913                                    }
2914                                    if ($match_i !== false && $match_i < $next_comment_multi_pos) {
2915                                        $next_comment_multi_pos = $match_i;
2916                                        $next_open_comment_multi = $open;
2917                                        if ($match_i === $i) {
2918                                            break;
2919                                        }
2920                                    }
2921                                }
2922                            }
2923                            if ($i == $next_comment_multi_pos) {
2924                                $open = $next_open_comment_multi;
2925                                $close = $this->language_data['COMMENT_MULTI'][$open];
2926                                $open_strlen = strlen($open);
2927                                $close_strlen = strlen($close);
2928                                $COMMENT_MATCHED = true;
2929                                $test_str_match = $open;
2930                                //@todo If remove important do remove here
2931                                if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
2932                                    $open == GESHI_START_IMPORTANT) {
2933                                    if ($open != GESHI_START_IMPORTANT) {
2934                                        if (!$this->use_classes) {
2935                                            $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS']['MULTI'] . '"';
2936                                        } else {
2937                                            $attributes = ' class="coMULTI"';
2938                                        }
2939                                        $test_str = "<span$attributes>" . $this->hsc($open);
2940                                    } else {
2941                                        if (!$this->use_classes) {
2942                                            $attributes = ' style="' . $this->important_styles . '"';
2943                                        } else {
2944                                            $attributes = ' class="imp"';
2945                                        }
2946
2947                                        // We don't include the start of the comment if it's an
2948                                        // "important" part
2949                                        $test_str = "<span$attributes>";
2950                                    }
2951                                } else {
2952                                    $test_str = $this->hsc($open);
2953                                }
2954
2955                                $close_pos = strpos( $part, $close, $i + $open_strlen );
2956
2957                                if ($close_pos === false) {
2958                                    $close_pos = $length;
2959                                }
2960
2961                                // Short-cut through all the multiline code
2962                                $rest_of_comment = $this->hsc(substr($part, $i + $open_strlen, $close_pos - $i - $open_strlen + $close_strlen));
2963                                if (($this->lexic_permissions['COMMENTS']['MULTI'] ||
2964                                    $test_str_match == GESHI_START_IMPORTANT) &&
2965                                    $check_linenumbers) {
2966
2967                                    // strreplace to put close span and open span around multiline newlines
2968                                    $test_str .= str_replace(
2969                                        "\n", "</span>\n<span$attributes>",
2970                                        str_replace("\n ", "\n&nbsp;", $rest_of_comment)
2971                                    );
2972                                } else {
2973                                    $test_str .= $rest_of_comment;
2974                                }
2975
2976                                if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
2977                                    $test_str_match == GESHI_START_IMPORTANT) {
2978                                    $test_str .= '</span>';
2979                                }
2980
2981                                $i = $close_pos + $close_strlen - 1;
2982
2983                                // parse the rest
2984                                $result .= $this->parse_non_string_part($stuff_to_parse);
2985                                $stuff_to_parse = '';
2986                            }
2987                        }
2988
2989                        // If we haven't matched a multiline comment, try single-line comments
2990                        if (!$COMMENT_MATCHED) {
2991                            // cache potential single line comment occurances
2992                            if (!empty($this->language_data['COMMENT_SINGLE']) && $next_comment_single_pos < $i) {
2993                                $next_comment_single_pos = $length;
2994                                foreach ($this->language_data['COMMENT_SINGLE'] as $comment_key => $comment_mark) {
2995                                    $match_i = false;
2996                                    if (isset($comment_single_cache_per_key[$comment_key]) &&
2997                                        ($comment_single_cache_per_key[$comment_key] >= $i ||
2998                                         $comment_single_cache_per_key[$comment_key] === false)) {
2999                                        // we have already matched something
3000                                        if ($comment_single_cache_per_key[$comment_key] === false) {
3001                                            // this comment is never matched
3002                                            continue;
3003                                        }
3004                                        $match_i = $comment_single_cache_per_key[$comment_key];
3005                                    } elseif (
3006                                        // case sensitive comments
3007                                        ($this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] &&
3008                                        ($match_i = stripos($part, $comment_mark, $i)) !== false) ||
3009                                        // non case sensitive
3010                                        (!$this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] &&
3011                                          (($match_i = strpos($part, $comment_mark, $i)) !== false))) {
3012                                        $comment_single_cache_per_key[$comment_key] = $match_i;
3013                                    } else {
3014                                        $comment_single_cache_per_key[$comment_key] = false;
3015                                        continue;
3016                                    }
3017                                    if ($match_i !== false && $match_i < $next_comment_single_pos) {
3018                                        $next_comment_single_pos = $match_i;
3019                                        $next_comment_single_key = $comment_key;
3020                                        if ($match_i === $i) {
3021                                            break;
3022                                        }
3023                                    }
3024                                }
3025                            }
3026                            if ($next_comment_single_pos == $i) {
3027                                $comment_key = $next_comment_single_key;
3028                                $comment_mark = $this->language_data['COMMENT_SINGLE'][$comment_key];
3029                                $com_len = strlen($comment_mark);
3030
3031                                // This check will find special variables like $# in bash
3032                                // or compiler directives of Delphi beginning {$
3033                                if ((empty($sc_disallowed_before) || ($i == 0) ||
3034                                    (false === strpos($sc_disallowed_before, $part[$i-1]))) &&
3035                                    (empty($sc_disallowed_after) || ($length <= $i + $com_len) ||
3036                                    (false === strpos($sc_disallowed_after, $part[$i + $com_len]))))
3037                                {
3038                                    // this is a valid comment
3039                                    $COMMENT_MATCHED = true;
3040                                    if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
3041                                        if (!$this->use_classes) {
3042                                            $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment_key] . '"';
3043                                        } else {
3044                                            $attributes = ' class="co' . $comment_key . '"';
3045                                        }
3046                                        $test_str = "<span$attributes>" . $this->hsc($this->change_case($comment_mark));
3047                                    } else {
3048                                        $test_str = $this->hsc($comment_mark);
3049                                    }
3050
3051                                    //Check if this comment is the last in the source
3052                                    $close_pos = strpos($part, "\n", $i);
3053                                    $oops = false;
3054                                    if ($close_pos === false) {
3055                                        $close_pos = $length;
3056                                        $oops = true;
3057                                    }
3058                                    $test_str .= $this->hsc(substr($part, $i + $com_len, $close_pos - $i - $com_len));
3059                                    if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
3060                                        $test_str .= "</span>";
3061                                    }
3062
3063                                    // Take into account that the comment might be the last in the source
3064                                    if (!$oops) {
3065                                      $test_str .= "\n";
3066                                    }
3067
3068                                    $i = $close_pos;
3069
3070                                    // parse the rest
3071                                    $result .= $this->parse_non_string_part($stuff_to_parse);
3072                                    $stuff_to_parse = '';
3073                                }
3074                            }
3075                        }
3076                    }
3077
3078                    // Where are we adding this char?
3079                    if (!$COMMENT_MATCHED) {
3080                        $stuff_to_parse .= $char;
3081                    } else {
3082                        $result .= $test_str;
3083                        unset($test_str);
3084                        $COMMENT_MATCHED = false;
3085                    }
3086                }
3087                // Parse the last bit
3088                $result .= $this->parse_non_string_part($stuff_to_parse);
3089                $stuff_to_parse = '';
3090            } else {
3091                $result .= $this->hsc($part);
3092            }
3093            // Close the <span> that surrounds the block
3094            if ($STRICTATTRS != '') {
3095                $result = str_replace("\n", "</span>\n<span$STRICTATTRS>", $result);
3096                $result .= '</span>';
3097            }
3098
3099            $endresult .= $result;
3100            unset($part, $parts[$key], $result);
3101        }
3102
3103        //This fix is related to SF#1923020, but has to be applied regardless of
3104        //actually highlighting symbols.
3105        /** NOTE: memorypeak #3 */
3106        $endresult = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $endresult);
3107
3108//        // Parse the last stuff (redundant?)
3109//        $result .= $this->parse_non_string_part($stuff_to_parse);
3110
3111        // Lop off the very first and last spaces
3112//        $result = substr($result, 1, -1);
3113
3114        // We're finished: stop timing
3115        $this->set_time($start_time, microtime());
3116
3117        $this->finalise($endresult);
3118        return $endresult;
3119    }
3120
3121    /**
3122     * Swaps out spaces and tabs for HTML indentation. Not needed if
3123     * the code is in a pre block...
3124     *
3125     * @param  string The source to indent (reference!)
3126     * @since  1.0.0
3127     * @access private
3128     */
3129    function indent(&$result) {
3130        /// Replace tabs with the correct number of spaces
3131        if (false !== strpos($result, "\t")) {
3132            $lines = explode("\n", $result);
3133            $result = null;//Save memory while we process the lines individually
3134            $tab_width = $this->get_real_tab_width();
3135            $tab_string = '&nbsp;' . str_repeat(' ', $tab_width);
3136
3137            for ($key = 0, $n = count($lines); $key < $n; $key++) {
3138                $line = $lines[$key];
3139                if (false === strpos($line, "\t")) {
3140                    continue;
3141                }
3142
3143                $pos = 0;
3144                $length = strlen($line);
3145                $lines[$key] = ''; // reduce memory
3146
3147                $IN_TAG = false;
3148                for ($i = 0; $i < $length; ++$i) {
3149                    $char = $line[$i];
3150                    // Simple engine to work out whether we're in a tag.
3151                    // If we are we modify $pos. This is so we ignore HTML
3152                    // in the line and only workout the tab replacement
3153                    // via the actual content of the string
3154                    // This test could be improved to include strings in the
3155                    // html so that < or > would be allowed in user's styles
3156                    // (e.g. quotes: '<' '>'; or similar)
3157                    if ($IN_TAG) {
3158                        if ('>' == $char) {
3159                            $IN_TAG = false;
3160                        }
3161                        $lines[$key] .= $char;
3162                    } elseif ('<' == $char) {
3163                        $IN_TAG = true;
3164                        $lines[$key] .= '<';
3165                    } elseif ('&' == $char) {
3166                        $substr = substr($line, $i + 3, 5);
3167                        $posi = strpos($substr, ';');
3168                        if (false === $posi) {
3169                            ++$pos;
3170                        } else {
3171                            $pos -= $posi+2;
3172                        }
3173                        $lines[$key] .= $char;
3174                    } elseif ("\t" == $char) {
3175                        $str = '';
3176                        // OPTIMISE - move $strs out. Make an array:
3177                        // $tabs = array(
3178                        //  1 => '&nbsp;',
3179                        //  2 => '&nbsp; ',
3180                        //  3 => '&nbsp; &nbsp;' etc etc
3181                        // to use instead of building a string every time
3182                        $tab_end_width = $tab_width - ($pos % $tab_width); //Moved out of the look as it doesn't change within the loop
3183                        if (($pos & 1) || 1 == $tab_end_width) {
3184                            $str .= substr($tab_string, 6, $tab_end_width);
3185                        } else {
3186                            $str .= substr($tab_string, 0, $tab_end_width+5);
3187                        }
3188                        $lines[$key] .= $str;
3189                        $pos += $tab_end_width;
3190
3191                        if (false === strpos($line, "\t", $i + 1)) {
3192                            $lines[$key] .= substr($line, $i + 1);
3193                            break;
3194                        }
3195                    } elseif (0 == $pos && ' ' == $char) {
3196                        $lines[$key] .= '&nbsp;';
3197                        ++$pos;
3198                    } else {
3199                        $lines[$key] .= $char;
3200                        ++$pos;
3201                    }
3202                }
3203            }
3204            $result = implode("\n", $lines);
3205            unset($lines);//We don't need the lines separated beyond this --- free them!
3206        }
3207        // Other whitespace
3208        // BenBE: Fix to reduce the number of replacements to be done
3209        $result = preg_replace('/^ /m', '&nbsp;', $result);
3210        $result = str_replace('  ', ' &nbsp;', $result);
3211
3212        if ($this->line_numbers == GESHI_NO_LINE_NUMBERS && $this->header_type != GESHI_HEADER_PRE_TABLE) {
3213            if ($this->line_ending === null) {
3214                $result = nl2br($result);
3215            } else {
3216                $result = str_replace("\n", $this->line_ending, $result);
3217            }
3218        }
3219    }
3220
3221    /**
3222     * Changes the case of a keyword for those languages where a change is asked for
3223     *
3224     * @param  string The keyword to change the case of
3225     * @return string The keyword with its case changed
3226     * @since  1.0.0
3227     * @access private
3228     */
3229    function change_case($instr) {
3230        switch ($this->language_data['CASE_KEYWORDS']) {
3231            case GESHI_CAPS_UPPER:
3232                return strtoupper($instr);
3233            case GESHI_CAPS_LOWER:
3234                return strtolower($instr);
3235            default:
3236                return $instr;
3237        }
3238    }
3239
3240    /**
3241     * Handles replacements of keywords to include markup and links if requested
3242     *
3243     * @param  string The keyword to add the Markup to
3244     * @return The HTML for the match found
3245     * @since  1.0.8
3246     * @access private
3247     *
3248     * @todo   Get rid of ender in keyword links
3249     */
3250    function handle_keyword_replace($match) {
3251        $k = $this->_kw_replace_group;
3252        $keyword = $match[0];
3253        $keyword_match = $match[1];
3254
3255        $before = '';
3256        $after = '';
3257
3258        if ($this->keyword_links) {
3259            // Keyword links have been ebabled
3260
3261            if (isset($this->language_data['URLS'][$k]) &&
3262                $this->language_data['URLS'][$k] != '') {
3263                // There is a base group for this keyword
3264
3265                // Old system: strtolower
3266                //$keyword = ( $this->language_data['CASE_SENSITIVE'][$group] ) ? $keyword : strtolower($keyword);
3267                // New system: get keyword from language file to get correct case
3268                if (!$this->language_data['CASE_SENSITIVE'][$k] &&
3269                    strpos($this->language_data['URLS'][$k], '{FNAME}') !== false) {
3270                    foreach ($this->language_data['KEYWORDS'][$k] as $word) {
3271                        if (strcasecmp($word, $keyword_match) == 0) {
3272                            break;
3273                        }
3274                    }
3275                } else {
3276                    $word = $keyword_match;
3277                }
3278
3279                $before = '<|UR1|"' .
3280                    str_replace(
3281                        array(
3282                            '{FNAME}',
3283                            '{FNAMEL}',
3284                            '{FNAMEU}',
3285                            '.'),
3286                        array(
3287                            str_replace('+', '%20', urlencode($this->hsc($word))),
3288                            str_replace('+', '%20', urlencode($this->hsc(strtolower($word)))),
3289                            str_replace('+', '%20', urlencode($this->hsc(strtoupper($word)))),
3290                            '<DOT>'),
3291                        $this->language_data['URLS'][$k]
3292                    ) . '">';
3293                $after = '</a>';
3294            }
3295        }
3296
3297        return $before . '<|/'. $k .'/>' . $this->change_case($keyword) . '|>' . $after;
3298    }
3299
3300    /**
3301     * handles regular expressions highlighting-definitions with callback functions
3302     *
3303     * @note this is a callback, don't use it directly
3304     *
3305     * @param array the matches array
3306     * @return The highlighted string
3307     * @since 1.0.8
3308     * @access private
3309     */
3310    function handle_regexps_callback($matches) {
3311        // before: "' style=\"' . call_user_func(\"$func\", '\\1') . '\"\\1|>'",
3312        return  ' style="' . call_user_func($this->language_data['STYLES']['REGEXPS'][$this->_rx_key], $matches[1]) . '"'. $matches[1] . '|>';
3313    }
3314
3315    /**
3316     * handles newlines in REGEXPS matches. Set the _hmr_* vars before calling this
3317     *
3318     * @note this is a callback, don't use it directly
3319     *
3320     * @param array the matches array
3321     * @return string
3322     * @since 1.0.8
3323     * @access private
3324     */
3325    function handle_multiline_regexps($matches) {
3326        $before = $this->_hmr_before;
3327        $after = $this->_hmr_after;
3328        if ($this->_hmr_replace) {
3329            $replace = $this->_hmr_replace;
3330            $search = array();
3331
3332            foreach (array_keys($matches) as $k) {
3333                $search[] = '\\' . $k;
3334            }
3335
3336            $before = str_replace($search, $matches, $before);
3337            $after = str_replace($search, $matches, $after);
3338            $replace = str_replace($search, $matches, $replace);
3339        } else {
3340            $replace = $matches[0];
3341        }
3342        return $before
3343                    . '<|!REG3XP' . $this->_hmr_key .'!>'
3344                        . str_replace("\n", "|>\n<|!REG3XP" . $this->_hmr_key . '!>', $replace)
3345                    . '|>'
3346              . $after;
3347    }
3348
3349    /**
3350     * Takes a string that has no strings or comments in it, and highlights
3351     * stuff like keywords, numbers and methods.
3352     *
3353     * @param string The string to parse for keyword, numbers etc.
3354     * @since 1.0.0
3355     * @access private
3356     * @todo BUGGY! Why? Why not build string and return?
3357     */
3358    function parse_non_string_part($stuff_to_parse) {
3359        $stuff_to_parse = ' ' . $this->hsc($stuff_to_parse);
3360
3361        // Highlight keywords
3362        $disallowed_before = "(?<![a-zA-Z0-9\$_\|\#|^&";
3363        $disallowed_after = "(?![a-zA-Z0-9_\|%\\-&;";
3364        if ($this->lexic_permissions['STRINGS']) {
3365            $quotemarks = preg_quote(implode($this->language_data['QUOTEMARKS']), '/');
3366            $disallowed_before .= $quotemarks;
3367            $disallowed_after .= $quotemarks;
3368        }
3369        $disallowed_before .= "])";
3370        $disallowed_after .= "])";
3371
3372        $parser_control_pergroup = false;
3373        if (isset($this->language_data['PARSER_CONTROL'])) {
3374            if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
3375                $x = 0; // check wether per-keyword-group parser_control is enabled
3376                if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'])) {
3377                    $disallowed_before = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'];
3378                    ++$x;
3379                }
3380                if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'])) {
3381                    $disallowed_after = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'];
3382                    ++$x;
3383                }
3384                $parser_control_pergroup = (count($this->language_data['PARSER_CONTROL']['KEYWORDS']) - $x) > 0;
3385            }
3386        }
3387
3388        foreach (array_keys($this->language_data['KEYWORDS']) as $k) {
3389            if (!isset($this->lexic_permissions['KEYWORDS'][$k]) ||
3390                $this->lexic_permissions['KEYWORDS'][$k]) {
3391
3392                $case_sensitive = $this->language_data['CASE_SENSITIVE'][$k];
3393                $modifiers = $case_sensitive ? '' : 'i';
3394
3395                // NEW in 1.0.8 - per-keyword-group parser control
3396                $disallowed_before_local = $disallowed_before;
3397                $disallowed_after_local = $disallowed_after;
3398                if ($parser_control_pergroup && isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k])) {
3399                    if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'])) {
3400                        $disallowed_before_local =
3401                            $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'];
3402                    }
3403
3404                    if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'])) {
3405                        $disallowed_after_local =
3406                            $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'];
3407                    }
3408                }
3409
3410                $this->_kw_replace_group = $k;
3411
3412                //NEW in 1.0.8, the cached regexp list
3413                // since we don't want PHP / PCRE to crash due to too large patterns we split them into smaller chunks
3414                for ($set = 0, $set_length = count($this->language_data['CACHED_KEYWORD_LISTS'][$k]); $set <  $set_length; ++$set) {
3415                    $keywordset =& $this->language_data['CACHED_KEYWORD_LISTS'][$k][$set];
3416                    // Might make a more unique string for putting the number in soon
3417                    // Basically, we don't put the styles in yet because then the styles themselves will
3418                    // get highlighted if the language has a CSS keyword in it (like CSS, for example ;))
3419                    $stuff_to_parse = preg_replace_callback(
3420                        "/$disallowed_before_local({$keywordset})(?!\<DOT\>(?:htm|php|aspx?))$disallowed_after_local/$modifiers",
3421                        array($this, 'handle_keyword_replace'),
3422                        $stuff_to_parse
3423                        );
3424                }
3425            }
3426        }
3427
3428        // Regular expressions
3429        foreach ($this->language_data['REGEXPS'] as $key => $regexp) {
3430            if ($this->lexic_permissions['REGEXPS'][$key]) {
3431                if (is_array($regexp)) {
3432                    if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3433                        // produce valid HTML when we match multiple lines
3434                        $this->_hmr_replace = $regexp[GESHI_REPLACE];
3435                        $this->_hmr_before = $regexp[GESHI_BEFORE];
3436                        $this->_hmr_key = $key;
3437                        $this->_hmr_after = $regexp[GESHI_AFTER];
3438                        $stuff_to_parse = preg_replace_callback(
3439                            "/" . $regexp[GESHI_SEARCH] . "/{$regexp[GESHI_MODIFIERS]}",
3440                            array($this, 'handle_multiline_regexps'),
3441                            $stuff_to_parse);
3442                        $this->_hmr_replace = false;
3443                        $this->_hmr_before = '';
3444                        $this->_hmr_after = '';
3445                    } else {
3446                        $stuff_to_parse = preg_replace(
3447                            '/' . $regexp[GESHI_SEARCH] . '/' . $regexp[GESHI_MODIFIERS],
3448                            $regexp[GESHI_BEFORE] . '<|!REG3XP'. $key .'!>' . $regexp[GESHI_REPLACE] . '|>' . $regexp[GESHI_AFTER],
3449                            $stuff_to_parse);
3450                    }
3451                } else {
3452                    if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3453                        // produce valid HTML when we match multiple lines
3454                        $this->_hmr_key = $key;
3455                        $stuff_to_parse = preg_replace_callback( "/(" . $regexp . ")/",
3456                                              array($this, 'handle_multiline_regexps'), $stuff_to_parse);
3457                        $this->_hmr_key = '';
3458                    } else {
3459                        $stuff_to_parse = preg_replace( "/(" . $regexp . ")/", "<|!REG3XP$key!>\\1|>", $stuff_to_parse);
3460                    }
3461                }
3462            }
3463        }
3464
3465        // Highlight numbers. As of 1.0.8 we support different types of numbers
3466        $numbers_found = false;
3467
3468        if ($this->lexic_permissions['NUMBERS'] && preg_match($this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'], $stuff_to_parse )) {
3469            $numbers_found = true;
3470
3471            //For each of the formats ...
3472            foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) {
3473                //Check if it should be highlighted ...
3474                $stuff_to_parse = preg_replace($regexp, "<|/NUM!$id/>\\1|>", $stuff_to_parse);
3475            }
3476        }
3477
3478        //
3479        // Now that's all done, replace /[number]/ with the correct styles
3480        //
3481        foreach (array_keys($this->language_data['KEYWORDS']) as $k) {
3482            if (!$this->use_classes) {
3483                $attributes = ' style="' .
3484                    (isset($this->language_data['STYLES']['KEYWORDS'][$k]) ?
3485                    $this->language_data['STYLES']['KEYWORDS'][$k] : "") . '"';
3486            } else {
3487                $attributes = ' class="kw' . $k . '"';
3488            }
3489            $stuff_to_parse = str_replace("<|/$k/>", "<|$attributes>", $stuff_to_parse);
3490        }
3491
3492        if ($numbers_found) {
3493            // Put number styles in
3494            foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) {
3495                //Commented out for now, as this needs some review ...
3496                //                if ($numbers_permissions & $id) {
3497                //Get the appropriate style ...
3498                //Checking for unset styles is done by the style cache builder ...
3499                if (!$this->use_classes) {
3500                    $attributes = ' style="' . $this->language_data['STYLES']['NUMBERS'][$id] . '"';
3501                } else {
3502                    $attributes = ' class="nu'.$id.'"';
3503                }
3504
3505                //Set in the correct styles ...
3506                $stuff_to_parse = str_replace("/NUM!$id/", $attributes, $stuff_to_parse);
3507                //                }
3508            }
3509        }
3510
3511        // Highlight methods and fields in objects
3512        if ($this->lexic_permissions['METHODS'] && $this->language_data['OOLANG']) {
3513            $oolang_spaces = "[\s]*";
3514            $oolang_before = "";
3515            $oolang_after = "[a-zA-Z][a-zA-Z0-9_]*";
3516            if (isset($this->language_data['PARSER_CONTROL'])) {
3517                if (isset($this->language_data['PARSER_CONTROL']['OOLANG'])) {
3518                    if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'])) {
3519                        $oolang_before = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'];
3520                    }
3521                    if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'])) {
3522                        $oolang_after = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'];
3523                    }
3524                    if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'])) {
3525                        $oolang_spaces = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'];
3526                    }
3527                }
3528            }
3529
3530            foreach ($this->language_data['OBJECT_SPLITTERS'] as $key => $splitter) {
3531                if (false !== strpos($stuff_to_parse, $splitter)) {
3532                    if (!$this->use_classes) {
3533                        $attributes = ' style="' . $this->language_data['STYLES']['METHODS'][$key] . '"';
3534                    } else {
3535                        $attributes = ' class="me' . $key . '"';
3536                    }
3537                    $stuff_to_parse = preg_replace("/($oolang_before)(" . preg_quote($this->language_data['OBJECT_SPLITTERS'][$key], '/') . ")($oolang_spaces)($oolang_after)/", "\\1\\2\\3<|$attributes>\\4|>", $stuff_to_parse);
3538                }
3539            }
3540        }
3541
3542        //
3543        // Highlight brackets. Yes, I've tried adding a semi-colon to this list.
3544        // You try it, and see what happens ;)
3545        // TODO: Fix lexic permissions not converting entities if shouldn't
3546        // be highlighting regardless
3547        //
3548        if ($this->lexic_permissions['BRACKETS']) {
3549            $stuff_to_parse = str_replace( $this->language_data['CACHE_BRACKET_MATCH'],
3550                              $this->language_data['CACHE_BRACKET_REPLACE'], $stuff_to_parse );
3551        }
3552
3553
3554        //FIX for symbol highlighting ...
3555        if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) {
3556            //Get all matches and throw away those witin a block that is already highlighted... (i.e. matched by a regexp)
3557            $n_symbols = preg_match_all("/<\|(?:<DOT>|[^>])+>(?:(?!\|>).*?)\|>|<\/a>|(?:" . $this->language_data['SYMBOL_SEARCH'] . ")+(?![^<]+?>)/", $stuff_to_parse, $pot_symbols, PREG_OFFSET_CAPTURE | PREG_SET_ORDER);
3558            $global_offset = 0;
3559            for ($s_id = 0; $s_id < $n_symbols; ++$s_id) {
3560                $symbol_match = $pot_symbols[$s_id][0][0];
3561                if (strpos($symbol_match, '<') !== false || strpos($symbol_match, '>') !== false) {
3562                    // already highlighted blocks _must_ include either < or >
3563                    // so if this conditional applies, we have to skip this match
3564                    // BenBE: UNLESS the block contains <SEMI> or <PIPE>
3565                    if(strpos($symbol_match, '<SEMI>') === false &&
3566                        strpos($symbol_match, '<PIPE>') === false) {
3567                        continue;
3568                    }
3569                }
3570
3571                // if we reach this point, we have a valid match which needs to be highlighted
3572
3573                $symbol_length = strlen($symbol_match);
3574                $symbol_offset = $pot_symbols[$s_id][0][1];
3575                unset($pot_symbols[$s_id]);
3576                $symbol_end = $symbol_length + $symbol_offset;
3577                $symbol_hl = "";
3578
3579                // if we have multiple styles, we have to handle them properly
3580                if ($this->language_data['MULTIPLE_SYMBOL_GROUPS']) {
3581                    $old_sym = -1;
3582                    // Split the current stuff to replace into its atomic symbols ...
3583                    preg_match_all("/" . $this->language_data['SYMBOL_SEARCH'] . "/", $symbol_match, $sym_match_syms, PREG_PATTERN_ORDER);
3584                    foreach ($sym_match_syms[0] as $sym_ms) {
3585                        //Check if consequtive symbols belong to the same group to save output ...
3586                        if (isset($this->language_data['SYMBOL_DATA'][$sym_ms])
3587                            && ($this->language_data['SYMBOL_DATA'][$sym_ms] != $old_sym)) {
3588                            if (-1 != $old_sym) {
3589                                $symbol_hl .= "|>";
3590                            }
3591                            $old_sym = $this->language_data['SYMBOL_DATA'][$sym_ms];
3592                            if (!$this->use_classes) {
3593                                $symbol_hl .= '<| style="' . $this->language_data['STYLES']['SYMBOLS'][$old_sym] . '">';
3594                            } else {
3595                                $symbol_hl .= '<| class="sy' . $old_sym . '">';
3596                            }
3597                        }
3598                        $symbol_hl .= $sym_ms;
3599                    }
3600                    unset($sym_match_syms);
3601
3602                    //Close remaining tags and insert the replacement at the right position ...
3603                    //Take caution if symbol_hl is empty to avoid doubled closing spans.
3604                    if (-1 != $old_sym) {
3605                        $symbol_hl .= "|>";
3606                    }
3607                } else {
3608                    if (!$this->use_classes) {
3609                        $symbol_hl = '<| style="' . $this->language_data['STYLES']['SYMBOLS'][0] . '">';
3610                    } else {
3611                        $symbol_hl = '<| class="sy0">';
3612                    }
3613                    $symbol_hl .= $symbol_match . '|>';
3614                }
3615
3616                $stuff_to_parse = substr_replace($stuff_to_parse, $symbol_hl, $symbol_offset + $global_offset, $symbol_length);
3617
3618                // since we replace old text with something of different size,
3619                // we'll have to keep track of the differences
3620                $global_offset += strlen($symbol_hl) - $symbol_length;
3621            }
3622        }
3623        //FIX for symbol highlighting ...
3624
3625        // Add class/style for regexps
3626        foreach (array_keys($this->language_data['REGEXPS']) as $key) {
3627            if ($this->lexic_permissions['REGEXPS'][$key]) {
3628                if (is_callable($this->language_data['STYLES']['REGEXPS'][$key])) {
3629                    $this->_rx_key = $key;
3630                    $stuff_to_parse = preg_replace_callback("/!REG3XP$key!(.*)\|>/U",
3631                        array($this, 'handle_regexps_callback'),
3632                        $stuff_to_parse);
3633                } else {
3634                    if (!$this->use_classes) {
3635                        $attributes = ' style="' . $this->language_data['STYLES']['REGEXPS'][$key] . '"';
3636                    } else {
3637                        if (is_array($this->language_data['REGEXPS'][$key]) &&
3638                            array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$key])) {
3639                            $attributes = ' class="' .
3640                                $this->language_data['REGEXPS'][$key][GESHI_CLASS] . '"';
3641                        } else {
3642                           $attributes = ' class="re' . $key . '"';
3643                        }
3644                    }
3645                    $stuff_to_parse = str_replace("!REG3XP$key!", "$attributes", $stuff_to_parse);
3646                }
3647            }
3648        }
3649
3650        // Replace <DOT> with . for urls
3651        $stuff_to_parse = str_replace('<DOT>', '.', $stuff_to_parse);
3652        // Replace <|UR1| with <a href= for urls also
3653        if (isset($this->link_styles[GESHI_LINK])) {
3654            if ($this->use_classes) {
3655                $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse);
3656            } else {
3657                $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' style="' . $this->link_styles[GESHI_LINK] . '" href=', $stuff_to_parse);
3658            }
3659        } else {
3660            $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse);
3661        }
3662
3663        //
3664        // NOW we add the span thingy ;)
3665        //
3666
3667        $stuff_to_parse = str_replace('<|', '<span', $stuff_to_parse);
3668        $stuff_to_parse = str_replace ( '|>', '</span>', $stuff_to_parse );
3669        return substr($stuff_to_parse, 1);
3670    }
3671
3672    /**
3673     * Sets the time taken to parse the code
3674     *
3675     * @param microtime The time when parsing started
3676     * @param microtime The time when parsing ended
3677     * @since 1.0.2
3678     * @access private
3679     */
3680    function set_time($start_time, $end_time) {
3681        $start = explode(' ', $start_time);
3682        $end = explode(' ', $end_time);
3683        $this->time = $end[0] + $end[1] - $start[0] - $start[1];
3684    }
3685
3686    /**
3687     * Gets the time taken to parse the code
3688     *
3689     * @return double The time taken to parse the code
3690     * @since  1.0.2
3691     */
3692    function get_time() {
3693        return $this->time;
3694    }
3695
3696    /**
3697     * Merges arrays recursively, overwriting values of the first array with values of later arrays
3698     *
3699     * @since 1.0.8
3700     * @access private
3701     */
3702    function merge_arrays() {
3703        $arrays = func_get_args();
3704        $narrays = count($arrays);
3705
3706        // check arguments
3707        // comment out if more performance is necessary (in this case the foreach loop will trigger a warning if the argument is not an array)
3708        for ($i = 0; $i < $narrays; $i ++) {
3709            if (!is_array($arrays[$i])) {
3710                // also array_merge_recursive returns nothing in this case
3711                trigger_error('Argument #' . ($i+1) . ' is not an array - trying to merge array with scalar! Returning false!', E_USER_WARNING);
3712                return false;
3713            }
3714        }
3715
3716        // the first array is in the output set in every case
3717        $ret = $arrays[0];
3718
3719        // merege $ret with the remaining arrays
3720        for ($i = 1; $i < $narrays; $i ++) {
3721            foreach ($arrays[$i] as $key => $value) {
3722                if (is_array($value) && isset($ret[$key])) {
3723                    // if $ret[$key] is not an array you try to merge an scalar value with an array - the result is not defined (incompatible arrays)
3724                    // in this case the call will trigger an E_USER_WARNING and the $ret[$key] will be false.
3725                    $ret[$key] = $this->merge_arrays($ret[$key], $value);
3726                } else {
3727                    $ret[$key] = $value;
3728                }
3729            }
3730        }
3731
3732        return $ret;
3733    }
3734
3735    /**
3736     * Gets language information and stores it for later use
3737     *
3738     * @param string The filename of the language file you want to load
3739     * @since 1.0.0
3740     * @access private
3741     * @todo Needs to load keys for lexic permissions for keywords, regexps etc
3742     */
3743    function load_language($file_name) {
3744        if ($file_name == $this->loaded_language) {
3745            // this file is already loaded!
3746            return;
3747        }
3748
3749        //Prepare some stuff before actually loading the language file
3750        $this->loaded_language = $file_name;
3751        $this->parse_cache_built = false;
3752        $this->enable_highlighting();
3753        $language_data = array();
3754
3755        //Load the language file
3756        require $file_name;
3757
3758        // Perhaps some checking might be added here later to check that
3759        // $language data is a valid thing but maybe not
3760        $this->language_data = $language_data;
3761
3762        // Set strict mode if should be set
3763        $this->strict_mode = $this->language_data['STRICT_MODE_APPLIES'];
3764
3765        // Set permissions for all lexics to true
3766        // so they'll be highlighted by default
3767        foreach (array_keys($this->language_data['KEYWORDS']) as $key) {
3768            if (!empty($this->language_data['KEYWORDS'][$key])) {
3769                $this->lexic_permissions['KEYWORDS'][$key] = true;
3770            } else {
3771                $this->lexic_permissions['KEYWORDS'][$key] = false;
3772            }
3773        }
3774
3775        foreach (array_keys($this->language_data['COMMENT_SINGLE']) as $key) {
3776            $this->lexic_permissions['COMMENTS'][$key] = true;
3777        }
3778        foreach (array_keys($this->language_data['REGEXPS']) as $key) {
3779            $this->lexic_permissions['REGEXPS'][$key] = true;
3780        }
3781
3782        // for BenBE and future code reviews:
3783        // we can use empty here since we only check for existance and emptiness of an array
3784        // if it is not an array at all but rather false or null this will work as intended as well
3785        // even if $this->language_data['PARSER_CONTROL'] is undefined this won't trigger a notice
3786        if (!empty($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'])) {
3787            foreach ($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'] as $flag => $value) {
3788                // it's either true or false and maybe is true as well
3789                $perm = $value !== GESHI_NEVER;
3790                if ($flag == 'ALL') {
3791                    $this->enable_highlighting($perm);
3792                    continue;
3793                }
3794                if (!isset($this->lexic_permissions[$flag])) {
3795                    // unknown lexic permission
3796                    continue;
3797                }
3798                if (is_array($this->lexic_permissions[$flag])) {
3799                    foreach ($this->lexic_permissions[$flag] as $key => $val) {
3800                        $this->lexic_permissions[$flag][$key] = $perm;
3801                    }
3802                } else {
3803                    $this->lexic_permissions[$flag] = $perm;
3804                }
3805            }
3806            unset($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS']);
3807        }
3808
3809        //Fix: Problem where hardescapes weren't handled if no ESCAPE_CHAR was given
3810        //You need to set one for HARDESCAPES only in this case.
3811        if(!isset($this->language_data['HARDCHAR'])) {
3812            $this->language_data['HARDCHAR'] = $this->language_data['ESCAPE_CHAR'];
3813        }
3814
3815        //NEW in 1.0.8: Allow styles to be loaded from a separate file to override defaults
3816        $style_filename = substr($file_name, 0, -4) . '.style.php';
3817        if (is_readable($style_filename)) {
3818            //Clear any style_data that could have been set before ...
3819            if (isset($style_data)) {
3820                unset($style_data);
3821            }
3822
3823            //Read the Style Information from the style file
3824            include $style_filename;
3825
3826            //Apply the new styles to our current language styles
3827            if (isset($style_data) && is_array($style_data)) {
3828                $this->language_data['STYLES'] =
3829                    $this->merge_arrays($this->language_data['STYLES'], $style_data);
3830            }
3831        }
3832    }
3833
3834    /**
3835     * Takes the parsed code and various options, and creates the HTML
3836     * surrounding it to make it look nice.
3837     *
3838     * @param  string The code already parsed (reference!)
3839     * @since  1.0.0
3840     * @access private
3841     */
3842    function finalise(&$parsed_code) {
3843        // Remove end parts of important declarations
3844        // This is BUGGY!! My fault for bad code: fix coming in 1.2
3845        // @todo Remove this crap
3846        if ($this->enable_important_blocks &&
3847            (strpos($parsed_code, $this->hsc(GESHI_START_IMPORTANT)) === false)) {
3848            $parsed_code = str_replace($this->hsc(GESHI_END_IMPORTANT), '', $parsed_code);
3849        }
3850
3851        // Add HTML whitespace stuff if we're using the <div> header
3852        if ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) {
3853            $this->indent($parsed_code);
3854        }
3855
3856        // purge some unnecessary stuff
3857        /** NOTE: memorypeak #1 */
3858        $parsed_code = preg_replace('#<span[^>]+>(\s*)</span>#', '\\1', $parsed_code);
3859
3860        // If we are using IDs for line numbers, there needs to be an overall
3861        // ID set to prevent collisions.
3862        if ($this->add_ids && !$this->overall_id) {
3863            $this->overall_id = 'geshi-' . substr(md5(microtime()), 0, 4);
3864        }
3865
3866        // Get code into lines
3867        /** NOTE: memorypeak #2 */
3868        $code = explode("\n", $parsed_code);
3869        $parsed_code = $this->header();
3870
3871        // If we're using line numbers, we insert <li>s and appropriate
3872        // markup to style them (otherwise we don't need to do anything)
3873        if ($this->line_numbers != GESHI_NO_LINE_NUMBERS && $this->header_type != GESHI_HEADER_PRE_TABLE) {
3874            // If we're using the <pre> header, we shouldn't add newlines because
3875            // the <pre> will line-break them (and the <li>s already do this for us)
3876            $ls = ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) ? "\n" : '';
3877
3878            // Set vars to defaults for following loop
3879            $i = 0;
3880
3881            // Foreach line...
3882            for ($i = 0, $n = count($code); $i < $n;) {
3883                //Reset the attributes for a new line ...
3884                $attrs = array();
3885
3886                // Make lines have at least one space in them if they're empty
3887                // BenBE: Checking emptiness using trim instead of relying on blanks
3888                if ('' == trim($code[$i])) {
3889                    $code[$i] = '&nbsp;';
3890                }
3891
3892                // If this is a "special line"...
3893                if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
3894                    $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
3895                    // Set the attributes to style the line
3896                    if ($this->use_classes) {
3897                        //$attr = ' class="li2"';
3898                        $attrs['class'][] = 'li2';
3899                        $def_attr = ' class="de2"';
3900                    } else {
3901                        //$attr = ' style="' . $this->line_style2 . '"';
3902                        $attrs['style'][] = $this->line_style2;
3903                        // This style "covers up" the special styles set for special lines
3904                        // so that styles applied to special lines don't apply to the actual
3905                        // code on that line
3906                        $def_attr = ' style="' . $this->code_style . '"';
3907                    }
3908                } else {
3909                    if ($this->use_classes) {
3910                        //$attr = ' class="li1"';
3911                        $attrs['class'][] = 'li1';
3912                        $def_attr = ' class="de1"';
3913                    } else {
3914                        //$attr = ' style="' . $this->line_style1 . '"';
3915                        $attrs['style'][] = $this->line_style1;
3916                        $def_attr = ' style="' . $this->code_style . '"';
3917                    }
3918                }
3919
3920                //Check which type of tag to insert for this line
3921                if ($this->header_type == GESHI_HEADER_PRE_VALID) {
3922                    $start = "<pre$def_attr>";
3923                    $end = '</pre>';
3924                } else {
3925                    // Span or div?
3926                    $start = "<div$def_attr>";
3927                    $end = '</div>';
3928                }
3929
3930                ++$i;
3931
3932                // Are we supposed to use ids? If so, add them
3933                if ($this->add_ids) {
3934                    $attrs['id'][] = "$this->overall_id-$i";
3935                }
3936
3937                //Is this some line with extra styles???
3938                if (in_array($i, $this->highlight_extra_lines)) {
3939                    if ($this->use_classes) {
3940                        if (isset($this->highlight_extra_lines_styles[$i])) {
3941                            $attrs['class'][] = "lx$i";
3942                        } else {
3943                            $attrs['class'][] = "ln-xtra";
3944                        }
3945                    } else {
3946                        array_push($attrs['style'], $this->get_line_style($i));
3947                    }
3948                }
3949
3950                // Add in the line surrounded by appropriate list HTML
3951                $attr_string = '';
3952                foreach ($attrs as $key => $attr) {
3953                    $attr_string .= ' ' . $key . '="' . implode(' ', $attr) . '"';
3954                }
3955
3956                $parsed_code .= "<li$attr_string>$start{$code[$i-1]}$end</li>$ls";
3957                unset($code[$i - 1]);
3958            }
3959        } else {
3960            $n = count($code);
3961            if ($this->use_classes) {
3962                $attributes = ' class="de1"';
3963            } else {
3964                $attributes = ' style="'. $this->code_style .'"';
3965            }
3966            if ($this->header_type == GESHI_HEADER_PRE_VALID) {
3967                $parsed_code .= '<pre'. $attributes .'>';
3968            } elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
3969                if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3970                    if ($this->use_classes) {
3971                        $attrs = ' class="ln"';
3972                    } else {
3973                        $attrs = ' style="'. $this->table_linenumber_style .'"';
3974                    }
3975                    $parsed_code .= '<td'.$attrs.'><pre'.$attributes.'>';
3976                    // get linenumbers
3977                    // we don't merge it with the for below, since it should be better for
3978                    // memory consumption this way
3979                    // @todo: but... actually it would still be somewhat nice to merge the two loops
3980                    //        the mem peaks are at different positions
3981                    for ($i = 0; $i < $n; ++$i) {
3982                        $close = 0;
3983                        // fancy lines
3984                        if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
3985                            $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
3986                            // Set the attributes to style the line
3987                            if ($this->use_classes) {
3988                                $parsed_code .= '<span class="xtra li2"><span class="de2">';
3989                            } else {
3990                                // This style "covers up" the special styles set for special lines
3991                                // so that styles applied to special lines don't apply to the actual
3992                                // code on that line
3993                                $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">'
3994                                                  .'<span style="' . $this->code_style .'">';
3995                            }
3996                            $close += 2;
3997                        }
3998                        //Is this some line with extra styles???
3999                        if (in_array($i + 1, $this->highlight_extra_lines)) {
4000                            if ($this->use_classes) {
4001                                if (isset($this->highlight_extra_lines_styles[$i])) {
4002                                    $parsed_code .= "<span class=\"xtra lx$i\">";
4003                                } else {
4004                                    $parsed_code .= "<span class=\"xtra ln-xtra\">";
4005                                }
4006                            } else {
4007                                $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">";
4008                            }
4009                            ++$close;
4010                        }
4011                        $parsed_code .= $this->line_numbers_start + $i;
4012                        if ($close) {
4013                            $parsed_code .= str_repeat('</span>', $close);
4014                        } elseif ($i != $n) {
4015                            $parsed_code .= "\n";
4016                        }
4017                    }
4018                    $parsed_code .= '</pre></td><td'.$attributes.'>';
4019                }
4020                $parsed_code .= '<pre'. $attributes .'>';
4021            }
4022            // No line numbers, but still need to handle highlighting lines extra.
4023            // Have to use divs so the full width of the code is highlighted
4024            $close = 0;
4025            for ($i = 0; $i < $n; ++$i) {
4026                // Make lines have at least one space in them if they're empty
4027                // BenBE: Checking emptiness using trim instead of relying on blanks
4028                if ('' == trim($code[$i])) {
4029                    $code[$i] = '&nbsp;';
4030                }
4031                // fancy lines
4032                if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
4033                    $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
4034                    // Set the attributes to style the line
4035                    if ($this->use_classes) {
4036                        $parsed_code .= '<span class="xtra li2"><span class="de2">';
4037                    } else {
4038                        // This style "covers up" the special styles set for special lines
4039                        // so that styles applied to special lines don't apply to the actual
4040                        // code on that line
4041                        $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">'
4042                                          .'<span style="' . $this->code_style .'">';
4043                    }
4044                    $close += 2;
4045                }
4046                //Is this some line with extra styles???
4047                if (in_array($i + 1, $this->highlight_extra_lines)) {
4048                    if ($this->use_classes) {
4049                        if (isset($this->highlight_extra_lines_styles[$i])) {
4050                            $parsed_code .= "<span class=\"xtra lx$i\">";
4051                        } else {
4052                            $parsed_code .= "<span class=\"xtra ln-xtra\">";
4053                        }
4054                    } else {
4055                        $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">";
4056                    }
4057                    ++$close;
4058                }
4059
4060                $parsed_code .= $code[$i];
4061
4062                if ($close) {
4063                  $parsed_code .= str_repeat('</span>', $close);
4064                  $close = 0;
4065                }
4066                elseif ($i + 1 < $n) {
4067                    $parsed_code .= "\n";
4068                }
4069                unset($code[$i]);
4070            }
4071
4072            if ($this->header_type == GESHI_HEADER_PRE_VALID || $this->header_type == GESHI_HEADER_PRE_TABLE) {
4073                $parsed_code .= '</pre>';
4074            }
4075            if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4076                $parsed_code .= '</td>';
4077            }
4078        }
4079
4080        $parsed_code .= $this->footer();
4081    }
4082
4083    /**
4084     * Creates the header for the code block (with correct attributes)
4085     *
4086     * @return string The header for the code block
4087     * @since  1.0.0
4088     * @access private
4089     */
4090    function header() {
4091        // Get attributes needed
4092        /**
4093         * @todo   Document behaviour change - class is outputted regardless of whether
4094         *         we're using classes or not. Same with style
4095         */
4096        $attributes = ' class="' . $this->_genCSSName($this->language);
4097        if ($this->overall_class != '') {
4098            $attributes .= " ".$this->_genCSSName($this->overall_class);
4099        }
4100        $attributes .= '"';
4101
4102        if ($this->overall_id != '') {
4103            $attributes .= " id=\"{$this->overall_id}\"";
4104        }
4105        if ($this->overall_style != '' && !$this->use_classes) {
4106            $attributes .= ' style="' . $this->overall_style . '"';
4107        }
4108
4109        $ol_attributes = '';
4110
4111        if ($this->line_numbers_start != 1) {
4112            $ol_attributes .= ' start="' . $this->line_numbers_start . '"';
4113        }
4114
4115        // Get the header HTML
4116        $header = $this->header_content;
4117        if ($header) {
4118            if ($this->header_type == GESHI_HEADER_PRE || $this->header_type == GESHI_HEADER_PRE_VALID) {
4119                $header = str_replace("\n", '', $header);
4120            }
4121            $header = $this->replace_keywords($header);
4122
4123            if ($this->use_classes) {
4124                $attr = ' class="head"';
4125            } else {
4126                $attr = " style=\"{$this->header_content_style}\"";
4127            }
4128            if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4129                $header = "<thead><tr><td colspan=\"2\" $attr>$header</td></tr></thead>";
4130            } else {
4131                $header = "<div$attr>$header</div>";
4132            }
4133        }
4134
4135        if (GESHI_HEADER_NONE == $this->header_type) {
4136            if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4137                return "$header<ol$attributes$ol_attributes>";
4138            }
4139            return $header . ($this->force_code_block ? '<div>' : '');
4140        }
4141
4142        // Work out what to return and do it
4143        if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4144            if ($this->header_type == GESHI_HEADER_PRE) {
4145                return "<pre$attributes>$header<ol$ol_attributes>";
4146            } elseif ($this->header_type == GESHI_HEADER_DIV ||
4147                $this->header_type == GESHI_HEADER_PRE_VALID) {
4148                return "<div$attributes>$header<ol$ol_attributes>";
4149            } elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
4150                return "<table$attributes>$header<tbody><tr class=\"li1\">";
4151            }
4152        } else {
4153            if ($this->header_type == GESHI_HEADER_PRE) {
4154                return "<pre$attributes>$header"  .
4155                    ($this->force_code_block ? '<div>' : '');
4156            } else {
4157                return "<div$attributes>$header" .
4158                    ($this->force_code_block ? '<div>' : '');
4159            }
4160        }
4161    }
4162
4163    /**
4164     * Returns the footer for the code block.
4165     *
4166     * @return string The footer for the code block
4167     * @since  1.0.0
4168     * @access private
4169     */
4170    function footer() {
4171        $footer = $this->footer_content;
4172        if ($footer) {
4173            if ($this->header_type == GESHI_HEADER_PRE) {
4174                $footer = str_replace("\n", '', $footer);;
4175            }
4176            $footer = $this->replace_keywords($footer);
4177
4178            if ($this->use_classes) {
4179                $attr = ' class="foot"';
4180            } else {
4181                $attr = " style=\"{$this->footer_content_style}\"";
4182            }
4183            if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4184                $footer = "<tfoot><tr><td colspan=\"2\">$footer</td></tr></tfoot>";
4185            } else {
4186                $footer = "<div$attr>$footer</div>";
4187            }
4188        }
4189
4190        if (GESHI_HEADER_NONE == $this->header_type) {
4191            return ($this->line_numbers != GESHI_NO_LINE_NUMBERS) ? '</ol>' . $footer : $footer;
4192        }
4193
4194        if ($this->header_type == GESHI_HEADER_DIV || $this->header_type == GESHI_HEADER_PRE_VALID) {
4195            if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4196                return "</ol>$footer</div>";
4197            }
4198            return ($this->force_code_block ? '</div>' : '') .
4199                "$footer</div>";
4200        }
4201        elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
4202            if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4203                return "</tr></tbody>$footer</table>";
4204            }
4205            return ($this->force_code_block ? '</div>' : '') .
4206                "$footer</div>";
4207        }
4208        else {
4209            if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4210                return "</ol>$footer</pre>";
4211            }
4212            return ($this->force_code_block ? '</div>' : '') .
4213                "$footer</pre>";
4214        }
4215    }
4216
4217    /**
4218     * Replaces certain keywords in the header and footer with
4219     * certain configuration values
4220     *
4221     * @param  string The header or footer content to do replacement on
4222     * @return string The header or footer with replaced keywords
4223     * @since  1.0.2
4224     * @access private
4225     */
4226    function replace_keywords($instr) {
4227        $keywords = $replacements = array();
4228
4229        $keywords[] = '<TIME>';
4230        $keywords[] = '{TIME}';
4231        $replacements[] = $replacements[] = number_format($time = $this->get_time(), 3);
4232
4233        $keywords[] = '<LANGUAGE>';
4234        $keywords[] = '{LANGUAGE}';
4235        $replacements[] = $replacements[] = $this->language_data['LANG_NAME'];
4236
4237        $keywords[] = '<VERSION>';
4238        $keywords[] = '{VERSION}';
4239        $replacements[] = $replacements[] = GESHI_VERSION;
4240
4241        $keywords[] = '<SPEED>';
4242        $keywords[] = '{SPEED}';
4243        if ($time <= 0) {
4244            $speed = 'N/A';
4245        } else {
4246            $speed = strlen($this->source) / $time;
4247            if ($speed >= 1024) {
4248                $speed = sprintf("%.2f KB/s", $speed / 1024.0);
4249            } else {
4250                $speed = sprintf("%.0f B/s", $speed);
4251            }
4252        }
4253        $replacements[] = $replacements[] = $speed;
4254
4255        return str_replace($keywords, $replacements, $instr);
4256    }
4257
4258    /**
4259     * Secure replacement for PHP built-in function htmlspecialchars().
4260     *
4261     * See ticket #427 (http://wush.net/trac/wikka/ticket/427) for the rationale
4262     * for this replacement function.
4263     *
4264     * The INTERFACE for this function is almost the same as that for
4265     * htmlspecialchars(), with the same default for quote style; however, there
4266     * is no 'charset' parameter. The reason for this is as follows:
4267     *
4268     * The PHP docs say:
4269     *      "The third argument charset defines character set used in conversion."
4270     *
4271     * I suspect PHP's htmlspecialchars() is working at the byte-value level and
4272     * thus _needs_ to know (or asssume) a character set because the special
4273     * characters to be replaced could exist at different code points in
4274     * different character sets. (If indeed htmlspecialchars() works at
4275     * byte-value level that goes some  way towards explaining why the
4276     * vulnerability would exist in this function, too, and not only in
4277     * htmlentities() which certainly is working at byte-value level.)
4278     *
4279     * This replacement function however works at character level and should
4280     * therefore be "immune" to character set differences - so no charset
4281     * parameter is needed or provided. If a third parameter is passed, it will
4282     * be silently ignored.
4283     *
4284     * In the OUTPUT there is a minor difference in that we use '&#39;' instead
4285     * of PHP's '&#039;' for a single quote: this provides compatibility with
4286     *      get_html_translation_table(HTML_SPECIALCHARS, ENT_QUOTES)
4287     * (see comment by mikiwoz at yahoo dot co dot uk on
4288     * http://php.net/htmlspecialchars); it also matches the entity definition
4289     * for XML 1.0
4290     * (http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Special_characters).
4291     * Like PHP we use a numeric character reference instead of '&apos;' for the
4292     * single quote. For the other special characters we use the named entity
4293     * references, as PHP is doing.
4294     *
4295     * @author      {@link http://wikkawiki.org/JavaWoman Marjolein Katsma}
4296     *
4297     * @license     http://www.gnu.org/copyleft/lgpl.html
4298     *              GNU Lesser General Public License
4299     * @copyright   Copyright 2007, {@link http://wikkawiki.org/CreditsPage
4300     *              Wikka Development Team}
4301     *
4302     * @access      private
4303     * @param       string  $string string to be converted
4304     * @param       integer $quote_style
4305     *                      - ENT_COMPAT:   escapes &, <, > and double quote (default)
4306     *                      - ENT_NOQUOTES: escapes only &, < and >
4307     *                      - ENT_QUOTES:   escapes &, <, >, double and single quotes
4308     * @return      string  converted string
4309     * @since       1.0.7.18
4310     */
4311    function hsc($string, $quote_style = ENT_COMPAT) {
4312        // init
4313        static $aTransSpecchar = array(
4314            '&' => '&amp;',
4315            '"' => '&quot;',
4316            '<' => '&lt;',
4317            '>' => '&gt;',
4318
4319            //This fix is related to SF#1923020, but has to be applied
4320            //regardless of actually highlighting symbols.
4321
4322            //Circumvent a bug with symbol highlighting
4323            //This is required as ; would produce undesirable side-effects if it
4324            //was not to be processed as an entity.
4325            ';' => '<SEMI>', // Force ; to be processed as entity
4326            '|' => '<PIPE>' // Force | to be processed as entity
4327            );                      // ENT_COMPAT set
4328
4329        switch ($quote_style) {
4330            case ENT_NOQUOTES: // don't convert double quotes
4331                unset($aTransSpecchar['"']);
4332                break;
4333            case ENT_QUOTES: // convert single quotes as well
4334                $aTransSpecchar["'"] = '&#39;'; // (apos) htmlspecialchars() uses '&#039;'
4335                break;
4336        }
4337
4338        // return translated string
4339        return strtr($string, $aTransSpecchar);
4340    }
4341
4342    function _genCSSName($name){
4343        return (is_numeric($name[0]) ? '_' : '') . $name;
4344    }
4345
4346    /**
4347     * Returns a stylesheet for the highlighted code. If $economy mode
4348     * is true, we only return the stylesheet declarations that matter for
4349     * this code block instead of the whole thing
4350     *
4351     * @param  boolean Whether to use economy mode or not
4352     * @return string A stylesheet built on the data for the current language
4353     * @since  1.0.0
4354     */
4355    function get_stylesheet($economy_mode = true) {
4356        // If there's an error, chances are that the language file
4357        // won't have populated the language data file, so we can't
4358        // risk getting a stylesheet...
4359        if ($this->error) {
4360            return '';
4361        }
4362
4363        //Check if the style rearrangements have been processed ...
4364        //This also does some preprocessing to check which style groups are useable ...
4365        if(!isset($this->language_data['NUMBERS_CACHE'])) {
4366            $this->build_style_cache();
4367        }
4368
4369        // First, work out what the selector should be. If there's an ID,
4370        // that should be used, the same for a class. Otherwise, a selector
4371        // of '' means that these styles will be applied anywhere
4372        if ($this->overall_id) {
4373            $selector = '#' . $this->_genCSSName($this->overall_id);
4374        } else {
4375            $selector = '.' . $this->_genCSSName($this->language);
4376            if ($this->overall_class) {
4377                $selector .= '.' . $this->_genCSSName($this->overall_class);
4378            }
4379        }
4380        $selector .= ' ';
4381
4382        // Header of the stylesheet
4383        if (!$economy_mode) {
4384            $stylesheet = "/**\n".
4385                " * GeSHi Dynamically Generated Stylesheet\n".
4386                " * --------------------------------------\n".
4387                " * Dynamically generated stylesheet for {$this->language}\n".
4388                " * CSS class: {$this->overall_class}, CSS id: {$this->overall_id}\n".
4389                " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2008 Benny Baumann\n" .
4390                " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n".
4391                " * --------------------------------------\n".
4392                " */\n";
4393        } else {
4394            $stylesheet = "/**\n".
4395                " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2008 Benny Baumann\n" .
4396                " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n".
4397                " */\n";
4398        }
4399
4400        // Set the <ol> to have no effect at all if there are line numbers
4401        // (<ol>s have margins that should be destroyed so all layout is
4402        // controlled by the set_overall_style method, which works on the
4403        // <pre> or <div> container). Additionally, set default styles for lines
4404        if (!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4405            //$stylesheet .= "$selector, {$selector}ol, {$selector}ol li {margin: 0;}\n";
4406            $stylesheet .= "$selector.de1, $selector.de2 {{$this->code_style}}\n";
4407        }
4408
4409        // Add overall styles
4410        // note: neglect economy_mode, empty styles are meaningless
4411        if ($this->overall_style != '') {
4412            $stylesheet .= "$selector {{$this->overall_style}}\n";
4413        }
4414
4415        // Add styles for links
4416        // note: economy mode does not make _any_ sense here
4417        //       either the style is empty and thus no selector is needed
4418        //       or the appropriate key is given.
4419        foreach ($this->link_styles as $key => $style) {
4420            if ($style != '') {
4421                switch ($key) {
4422                    case GESHI_LINK:
4423                        $stylesheet .= "{$selector}a:link {{$style}}\n";
4424                        break;
4425                    case GESHI_HOVER:
4426                        $stylesheet .= "{$selector}a:hover {{$style}}\n";
4427                        break;
4428                    case GESHI_ACTIVE:
4429                        $stylesheet .= "{$selector}a:active {{$style}}\n";
4430                        break;
4431                    case GESHI_VISITED:
4432                        $stylesheet .= "{$selector}a:visited {{$style}}\n";
4433                        break;
4434                }
4435            }
4436        }
4437
4438        // Header and footer
4439        // note: neglect economy_mode, empty styles are meaningless
4440        if ($this->header_content_style != '') {
4441            $stylesheet .= "$selector.head {{$this->header_content_style}}\n";
4442        }
4443        if ($this->footer_content_style != '') {
4444            $stylesheet .= "$selector.foot {{$this->footer_content_style}}\n";
4445        }
4446
4447        // Styles for important stuff
4448        // note: neglect economy_mode, empty styles are meaningless
4449        if ($this->important_styles != '') {
4450            $stylesheet .= "$selector.imp {{$this->important_styles}}\n";
4451        }
4452
4453        // Simple line number styles
4454        if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->line_style1 != '') {
4455            $stylesheet .= "{$selector}li, {$selector}.li1 {{$this->line_style1}}\n";
4456        }
4457        if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->table_linenumber_style != '') {
4458            $stylesheet .= "{$selector}.ln {{$this->table_linenumber_style}}\n";
4459        }
4460        // If there is a style set for fancy line numbers, echo it out
4461        if ((!$economy_mode || $this->line_numbers == GESHI_FANCY_LINE_NUMBERS) && $this->line_style2 != '') {
4462            $stylesheet .= "{$selector}.li2 {{$this->line_style2}}\n";
4463        }
4464
4465        // note: empty styles are meaningless
4466        foreach ($this->language_data['STYLES']['KEYWORDS'] as $group => $styles) {
4467            if ($styles != '' && (!$economy_mode ||
4468                (isset($this->lexic_permissions['KEYWORDS'][$group]) &&
4469                $this->lexic_permissions['KEYWORDS'][$group]))) {
4470                $stylesheet .= "$selector.kw$group {{$styles}}\n";
4471            }
4472        }
4473        foreach ($this->language_data['STYLES']['COMMENTS'] as $group => $styles) {
4474            if ($styles != '' && (!$economy_mode ||
4475                (isset($this->lexic_permissions['COMMENTS'][$group]) &&
4476                $this->lexic_permissions['COMMENTS'][$group]) ||
4477                (!empty($this->language_data['COMMENT_REGEXP']) &&
4478                !empty($this->language_data['COMMENT_REGEXP'][$group])))) {
4479                $stylesheet .= "$selector.co$group {{$styles}}\n";
4480            }
4481        }
4482        foreach ($this->language_data['STYLES']['ESCAPE_CHAR'] as $group => $styles) {
4483            if ($styles != '' && (!$economy_mode || $this->lexic_permissions['ESCAPE_CHAR'])) {
4484                // NEW: since 1.0.8 we have to handle hardescapes
4485                if ($group === 'HARD') {
4486                    $group = '_h';
4487                }
4488                $stylesheet .= "$selector.es$group {{$styles}}\n";
4489            }
4490        }
4491        foreach ($this->language_data['STYLES']['BRACKETS'] as $group => $styles) {
4492            if ($styles != '' && (!$economy_mode || $this->lexic_permissions['BRACKETS'])) {
4493                $stylesheet .= "$selector.br$group {{$styles}}\n";
4494            }
4495        }
4496        foreach ($this->language_data['STYLES']['SYMBOLS'] as $group => $styles) {
4497            if ($styles != '' && (!$economy_mode || $this->lexic_permissions['SYMBOLS'])) {
4498                $stylesheet .= "$selector.sy$group {{$styles}}\n";
4499            }
4500        }
4501        foreach ($this->language_data['STYLES']['STRINGS'] as $group => $styles) {
4502            if ($styles != '' && (!$economy_mode || $this->lexic_permissions['STRINGS'])) {
4503                // NEW: since 1.0.8 we have to handle hardquotes
4504                if ($group === 'HARD') {
4505                    $group = '_h';
4506                }
4507                $stylesheet .= "$selector.st$group {{$styles}}\n";
4508            }
4509        }
4510        foreach ($this->language_data['STYLES']['NUMBERS'] as $group => $styles) {
4511            if ($styles != '' && (!$economy_mode || $this->lexic_permissions['NUMBERS'])) {
4512                $stylesheet .= "$selector.nu$group {{$styles}}\n";
4513            }
4514        }
4515        foreach ($this->language_data['STYLES']['METHODS'] as $group => $styles) {
4516            if ($styles != '' && (!$economy_mode || $this->lexic_permissions['METHODS'])) {
4517                $stylesheet .= "$selector.me$group {{$styles}}\n";
4518            }
4519        }
4520        // note: neglect economy_mode, empty styles are meaningless
4521        foreach ($this->language_data['STYLES']['SCRIPT'] as $group => $styles) {
4522            if ($styles != '') {
4523                $stylesheet .= "$selector.sc$group {{$styles}}\n";
4524            }
4525        }
4526        foreach ($this->language_data['STYLES']['REGEXPS'] as $group => $styles) {
4527            if ($styles != '' && (!$economy_mode ||
4528                (isset($this->lexic_permissions['REGEXPS'][$group]) &&
4529                $this->lexic_permissions['REGEXPS'][$group]))) {
4530                if (is_array($this->language_data['REGEXPS'][$group]) &&
4531                    array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$group])) {
4532                    $stylesheet .= "$selector.";
4533                    $stylesheet .= $this->language_data['REGEXPS'][$group][GESHI_CLASS];
4534                    $stylesheet .= " {{$styles}}\n";
4535                } else {
4536                    $stylesheet .= "$selector.re$group {{$styles}}\n";
4537                }
4538            }
4539        }
4540        // Styles for lines being highlighted extra
4541        if (!$economy_mode || (count($this->highlight_extra_lines)!=count($this->highlight_extra_lines_styles))) {
4542            $stylesheet .= "{$selector}.ln-xtra, {$selector}li.ln-xtra, {$selector}div.ln-xtra {{$this->highlight_extra_lines_style}}\n";
4543        }
4544        $stylesheet .= "{$selector}span.xtra { display:block; }\n";
4545        foreach ($this->highlight_extra_lines_styles as $lineid => $linestyle) {
4546            $stylesheet .= "{$selector}.lx$lineid, {$selector}li.lx$lineid, {$selector}div.lx$lineid {{$linestyle}}\n";
4547        }
4548
4549        return $stylesheet;
4550    }
4551
4552    /**
4553     * Get's the style that is used for the specified line
4554     *
4555     * @param int The line number information is requested for
4556     * @access private
4557     * @since 1.0.7.21
4558     */
4559    function get_line_style($line) {
4560        //$style = null;
4561        $style = null;
4562        if (isset($this->highlight_extra_lines_styles[$line])) {
4563            $style = $this->highlight_extra_lines_styles[$line];
4564        } else { // if no "extra" style assigned
4565            $style = $this->highlight_extra_lines_style;
4566        }
4567
4568        return $style;
4569    }
4570
4571    /**
4572    * this functions creates an optimized regular expression list
4573    * of an array of strings.
4574    *
4575    * Example:
4576    * <code>$list = array('faa', 'foo', 'foobar');
4577    *          => string 'f(aa|oo(bar)?)'</code>
4578    *
4579    * @param $list array of (unquoted) strings
4580    * @param $regexp_delimiter your regular expression delimiter, @see preg_quote()
4581    * @return string for regular expression
4582    * @author Milian Wolff <mail@milianw.de>
4583    * @since 1.0.8
4584    * @access private
4585    */
4586    function optimize_regexp_list($list, $regexp_delimiter = '/') {
4587        $regex_chars = array('.', '\\', '+', '-', '*', '?', '[', '^', ']', '$',
4588            '(', ')', '{', '}', '=', '!', '<', '>', '|', ':', $regexp_delimiter);
4589        sort($list);
4590        $regexp_list = array('');
4591        $num_subpatterns = 0;
4592        $list_key = 0;
4593
4594        // the tokens which we will use to generate the regexp list
4595        $tokens = array();
4596        $prev_keys = array();
4597        // go through all entries of the list and generate the token list
4598        $cur_len = 0;
4599        for ($i = 0, $i_max = count($list); $i < $i_max; ++$i) {
4600            if ($cur_len > GESHI_MAX_PCRE_LENGTH) {
4601                // seems like the length of this pcre is growing exorbitantly
4602                $regexp_list[++$list_key] = $this->_optimize_regexp_list_tokens_to_string($tokens);
4603                $num_subpatterns = substr_count($regexp_list[$list_key], '(?:');
4604                $tokens = array();
4605                $cur_len = 0;
4606            }
4607            $level = 0;
4608            $entry = preg_quote((string) $list[$i], $regexp_delimiter);
4609            $pointer = &$tokens;
4610            // properly assign the new entry to the correct position in the token array
4611            // possibly generate smaller common denominator keys
4612            while (true) {
4613                // get the common denominator
4614                if (isset($prev_keys[$level])) {
4615                    if ($prev_keys[$level] == $entry) {
4616                        // this is a duplicate entry, skip it
4617                        continue 2;
4618                    }
4619                    $char = 0;
4620                    while (isset($entry[$char]) && isset($prev_keys[$level][$char])
4621                            && $entry[$char] == $prev_keys[$level][$char]) {
4622                        ++$char;
4623                    }
4624                    if ($char > 0) {
4625                        // this entry has at least some chars in common with the current key
4626                        if ($char == strlen($prev_keys[$level])) {
4627                            // current key is totally matched, i.e. this entry has just some bits appended
4628                            $pointer = &$pointer[$prev_keys[$level]];
4629                        } else {
4630                            // only part of the keys match
4631                            $new_key_part1 = substr($prev_keys[$level], 0, $char);
4632                            $new_key_part2 = substr($prev_keys[$level], $char);
4633
4634                            if (in_array($new_key_part1[0], $regex_chars)
4635                                || in_array($new_key_part2[0], $regex_chars)) {
4636                                // this is bad, a regex char as first character
4637                                $pointer[$entry] = array('' => true);
4638                                array_splice($prev_keys, $level, count($prev_keys), $entry);
4639                                $cur_len += strlen($entry);
4640                                continue;
4641                            } else {
4642                                // relocate previous tokens
4643                                $pointer[$new_key_part1] = array($new_key_part2 => $pointer[$prev_keys[$level]]);
4644                                unset($pointer[$prev_keys[$level]]);
4645                                $pointer = &$pointer[$new_key_part1];
4646                                // recreate key index
4647                                array_splice($prev_keys, $level, count($prev_keys), array($new_key_part1, $new_key_part2));
4648                                $cur_len += strlen($new_key_part2);
4649                            }
4650                        }
4651                        ++$level;
4652                        $entry = substr($entry, $char);
4653                        continue;
4654                    }
4655                    // else: fall trough, i.e. no common denominator was found
4656                }
4657                if ($level == 0 && !empty($tokens)) {
4658                    // we can dump current tokens into the string and throw them away afterwards
4659                    $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens);
4660                    $new_subpatterns = substr_count($new_entry, '(?:');
4661                    if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + $new_subpatterns > GESHI_MAX_PCRE_SUBPATTERNS) {
4662                        $regexp_list[++$list_key] = $new_entry;
4663                        $num_subpatterns = $new_subpatterns;
4664                    } else {
4665                        if (!empty($regexp_list[$list_key])) {
4666                            $new_entry = '|' . $new_entry;
4667                        }
4668                        $regexp_list[$list_key] .= $new_entry;
4669                        $num_subpatterns += $new_subpatterns;
4670                    }
4671                    $tokens = array();
4672                    $cur_len = 0;
4673                }
4674                // no further common denominator found
4675                $pointer[$entry] = array('' => true);
4676                array_splice($prev_keys, $level, count($prev_keys), $entry);
4677
4678                $cur_len += strlen($entry);
4679                break;
4680            }
4681            unset($list[$i]);
4682        }
4683        // make sure the last tokens get converted as well
4684        $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens);
4685        if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + substr_count($new_entry, '(?:') > GESHI_MAX_PCRE_SUBPATTERNS) {
4686            if ( !empty($regexp_list[$list_key]) ) {
4687              ++$list_key;
4688            }
4689            $regexp_list[$list_key] = $new_entry;
4690        } else {
4691            if (!empty($regexp_list[$list_key])) {
4692                $new_entry = '|' . $new_entry;
4693            }
4694            $regexp_list[$list_key] .= $new_entry;
4695        }
4696        return $regexp_list;
4697    }
4698    /**
4699    * this function creates the appropriate regexp string of an token array
4700    * you should not call this function directly, @see $this->optimize_regexp_list().
4701    *
4702    * @param &$tokens array of tokens
4703    * @param $recursed bool to know wether we recursed or not
4704    * @return string
4705    * @author Milian Wolff <mail@milianw.de>
4706    * @since 1.0.8
4707    * @access private
4708    */
4709    function _optimize_regexp_list_tokens_to_string(&$tokens, $recursed = false) {
4710        $list = '';
4711        foreach ($tokens as $token => $sub_tokens) {
4712            $list .= $token;
4713            $close_entry = isset($sub_tokens['']);
4714            unset($sub_tokens['']);
4715            if (!empty($sub_tokens)) {
4716                $list .= '(?:' . $this->_optimize_regexp_list_tokens_to_string($sub_tokens, true) . ')';
4717                if ($close_entry) {
4718                    // make sub_tokens optional
4719                    $list .= '?';
4720                }
4721            }
4722            $list .= '|';
4723        }
4724        if (!$recursed) {
4725            // do some optimizations
4726            // common trailing strings
4727            // BUGGY!
4728            //$list = preg_replace_callback('#(?<=^|\:|\|)\w+?(\w+)(?:\|.+\1)+(?=\|)#', create_function(
4729            //    '$matches', 'return "(?:" . preg_replace("#" . preg_quote($matches[1], "#") . "(?=\||$)#", "", $matches[0]) . ")" . $matches[1];'), $list);
4730            // (?:p)? => p?
4731            $list = preg_replace('#\(\?\:(.)\)\?#', '\1?', $list);
4732            // (?:a|b|c|d|...)? => [abcd...]?
4733            // TODO: a|bb|c => [ac]|bb
4734            static $callback_2;
4735            if (!isset($callback_2)) {
4736                $callback_2 = function($matches) {
4737                    return '[' . str_replace('|', '', $matches[1]) . ']';
4738                };
4739            }
4740            $list = preg_replace_callback('#\(\?\:((?:.\|)+.)\)#', $callback_2, $list);
4741        }
4742        // return $list without trailing pipe
4743        return substr($list, 0, -1);
4744    }
4745} // End Class GeSHi
4746
4747
4748if (!function_exists('geshi_highlight')) {
4749    /**
4750     * Easy way to highlight stuff. Behaves just like highlight_string
4751     *
4752     * @param string The code to highlight
4753     * @param string The language to highlight the code in
4754     * @param string The path to the language files. You can leave this blank if you need
4755     *               as from version 1.0.7 the path should be automatically detected
4756     * @param boolean Whether to return the result or to echo
4757     * @return string The code highlighted (if $return is true)
4758     * @since 1.0.2
4759     */
4760    function geshi_highlight($string, $language, $path = null, $return = false) {
4761        $geshi = new GeSHi($string, $language, $path);
4762        $geshi->set_header_type(GESHI_HEADER_NONE);
4763
4764        if ($return) {
4765            return '<code>' . $geshi->parse_code() . '</code>';
4766        }
4767
4768        echo '<code>' . $geshi->parse_code() . '</code>';
4769
4770        if ($geshi->error()) {
4771            return false;
4772        }
4773        return true;
4774    }
4775}
4776
4777