1<?php
2/**
3 * GeSHi - Generic Syntax Highlighter
4 *
5 * The GeSHi class for Generic Syntax Highlighting. Please refer to the
6 * documentation at http://qbnz.com/highlighter/documentation.php for more
7 * information about how to use this class.
8 *
9 * For changes, release notes, TODOs etc, see the relevant files in the docs/
10 * directory.
11 *
12 *   This file is part of GeSHi.
13 *
14 *  GeSHi is free software; you can redistribute it and/or modify
15 *  it under the terms of the GNU General Public License as published by
16 *  the Free Software Foundation; either version 2 of the License, or
17 *  (at your option) any later version.
18 *
19 *  GeSHi is distributed in the hope that it will be useful,
20 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
21 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22 *  GNU General Public License for more details.
23 *
24 *  You should have received a copy of the GNU General Public License
25 *  along with GeSHi; if not, write to the Free Software
26 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
27 *
28 * @package    geshi
29 * @subpackage core
30 * @author     Nigel McNie <nigel@geshi.org>, Benny Baumann <BenBE@omorphia.de>
31 * @copyright  (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2014 Benny Baumann
32 * @license    http://gnu.org/copyleft/gpl.html GNU GPL
33 */
34
35//
36// GeSHi Constants
37// You should use these constant names in your programs instead of
38// their values - you never know when a value may change in a future
39// version
40//
41
42/** The version of this GeSHi file */
43define('GESHI_VERSION', '1.0.9.0');
44
45// Define the root directory for the GeSHi code tree
46if (!defined('GESHI_ROOT')) {
47    /** The root directory for GeSHi */
48    define('GESHI_ROOT', dirname(__FILE__) . DIRECTORY_SEPARATOR);
49}
50/** The language file directory for GeSHi
51    @access private */
52define('GESHI_LANG_ROOT', GESHI_ROOT . 'geshi' . DIRECTORY_SEPARATOR);
53
54// Define if GeSHi should be paranoid about security
55if (!defined('GESHI_SECURITY_PARANOID')) {
56    /** Tells GeSHi to be paranoid about security settings */
57    define('GESHI_SECURITY_PARANOID', false);
58}
59
60// Line numbers - use with enable_line_numbers()
61/** Use no line numbers when building the result */
62define('GESHI_NO_LINE_NUMBERS', 0);
63/** Use normal line numbers when building the result */
64define('GESHI_NORMAL_LINE_NUMBERS', 1);
65/** Use fancy line numbers when building the result */
66define('GESHI_FANCY_LINE_NUMBERS', 2);
67
68// Container HTML type
69/** Use nothing to surround the source */
70define('GESHI_HEADER_NONE', 0);
71/** Use a "div" to surround the source */
72define('GESHI_HEADER_DIV', 1);
73/** Use a "pre" to surround the source */
74define('GESHI_HEADER_PRE', 2);
75/** Use a pre to wrap lines when line numbers are enabled or to wrap the whole code. */
76define('GESHI_HEADER_PRE_VALID', 3);
77/**
78 * Use a "table" to surround the source:
79 *
80 *  <table>
81 *    <thead><tr><td colspan="2">$header</td></tr></thead>
82 *    <tbody><tr><td><pre>$linenumbers</pre></td><td><pre>$code></pre></td></tr></tbody>
83 *    <tfooter><tr><td colspan="2">$footer</td></tr></tfoot>
84 *  </table>
85 *
86 * this is essentially only a workaround for Firefox, see sf#1651996 or take a look at
87 * https://bugzilla.mozilla.org/show_bug.cgi?id=365805
88 * @note when linenumbers are disabled this is essentially the same as GESHI_HEADER_PRE
89 */
90define('GESHI_HEADER_PRE_TABLE', 4);
91
92// Capatalisation constants
93/** Lowercase keywords found */
94define('GESHI_CAPS_NO_CHANGE', 0);
95/** Uppercase keywords found */
96define('GESHI_CAPS_UPPER', 1);
97/** Leave keywords found as the case that they are */
98define('GESHI_CAPS_LOWER', 2);
99
100// Link style constants
101/** Links in the source in the :link state */
102define('GESHI_LINK', 0);
103/** Links in the source in the :hover state */
104define('GESHI_HOVER', 1);
105/** Links in the source in the :active state */
106define('GESHI_ACTIVE', 2);
107/** Links in the source in the :visited state */
108define('GESHI_VISITED', 3);
109
110// Important string starter/finisher
111// Note that if you change these, they should be as-is: i.e., don't
112// write them as if they had been run through htmlentities()
113/** The starter for important parts of the source */
114define('GESHI_START_IMPORTANT', '<BEGIN GeSHi>');
115/** The ender for important parts of the source */
116define('GESHI_END_IMPORTANT', '<END GeSHi>');
117
118/**#@+
119 *  @access private
120 */
121// When strict mode applies for a language
122/** Strict mode never applies (this is the most common) */
123define('GESHI_NEVER', 0);
124/** Strict mode *might* apply, and can be enabled or
125    disabled by {@link GeSHi->enable_strict_mode()} */
126define('GESHI_MAYBE', 1);
127/** Strict mode always applies */
128define('GESHI_ALWAYS', 2);
129
130// Advanced regexp handling constants, used in language files
131/** The key of the regex array defining what to search for */
132define('GESHI_SEARCH', 0);
133/** The key of the regex array defining what bracket group in a
134    matched search to use as a replacement */
135define('GESHI_REPLACE', 1);
136/** The key of the regex array defining any modifiers to the regular expression */
137define('GESHI_MODIFIERS', 2);
138/** The key of the regex array defining what bracket group in a
139    matched search to put before the replacement */
140define('GESHI_BEFORE', 3);
141/** The key of the regex array defining what bracket group in a
142    matched search to put after the replacement */
143define('GESHI_AFTER', 4);
144/** The key of the regex array defining a custom keyword to use
145    for this regexp's html tag class */
146define('GESHI_CLASS', 5);
147
148/** Used in language files to mark comments */
149define('GESHI_COMMENTS', 0);
150
151/** some old PHP / PCRE subpatterns only support up to xxx subpatterns in
152    regular expressions. Set this to false if your PCRE lib is up to date
153    @see GeSHi->optimize_regexp_list()
154    **/
155define('GESHI_MAX_PCRE_SUBPATTERNS', 500);
156/** it's also important not to generate too long regular expressions
157    be generous here... but keep in mind, that when reaching this limit we
158    still have to close open patterns. 12k should do just fine on a 16k limit.
159    @see GeSHi->optimize_regexp_list()
160    **/
161define('GESHI_MAX_PCRE_LENGTH', 12288);
162
163//Number format specification
164/** Basic number format for integers */
165define('GESHI_NUMBER_INT_BASIC', 1);        //Default integers \d+
166/** Enhanced number format for integers like seen in C */
167define('GESHI_NUMBER_INT_CSTYLE', 2);       //Default C-Style \d+[lL]?
168/** Number format to highlight binary numbers with a suffix "b" */
169define('GESHI_NUMBER_BIN_SUFFIX', 16);           //[01]+[bB]
170/** Number format to highlight binary numbers with a prefix % */
171define('GESHI_NUMBER_BIN_PREFIX_PERCENT', 32);   //%[01]+
172/** Number format to highlight binary numbers with a prefix 0b (C) */
173define('GESHI_NUMBER_BIN_PREFIX_0B', 64);        //0b[01]+
174/** Number format to highlight octal numbers with a leading zero */
175define('GESHI_NUMBER_OCT_PREFIX', 256);           //0[0-7]+
176/** Number format to highlight octal numbers with a prefix 0o (logtalk) */
177define('GESHI_NUMBER_OCT_PREFIX_0O', 512);           //0[0-7]+
178/** Number format to highlight octal numbers with a leading @ (Used in HiSofts Devpac series). */
179define('GESHI_NUMBER_OCT_PREFIX_AT', 1024);           //@[0-7]+
180/** Number format to highlight octal numbers with a suffix of o */
181define('GESHI_NUMBER_OCT_SUFFIX', 2048);           //[0-7]+[oO]
182/** Number format to highlight hex numbers with a prefix 0x */
183define('GESHI_NUMBER_HEX_PREFIX', 4096);           //0x[0-9a-fA-F]+
184/** Number format to highlight hex numbers with a prefix $ */
185define('GESHI_NUMBER_HEX_PREFIX_DOLLAR', 8192);           //$[0-9a-fA-F]+
186/** Number format to highlight hex numbers with a suffix of h */
187define('GESHI_NUMBER_HEX_SUFFIX', 16384);           //[0-9][0-9a-fA-F]*h
188/** Number format to highlight floating-point numbers without support for scientific notation */
189define('GESHI_NUMBER_FLT_NONSCI', 65536);          //\d+\.\d+
190/** Number format to highlight floating-point numbers without support for scientific notation */
191define('GESHI_NUMBER_FLT_NONSCI_F', 131072);       //\d+(\.\d+)?f
192/** Number format to highlight floating-point numbers with support for scientific notation (E) and optional leading zero */
193define('GESHI_NUMBER_FLT_SCI_SHORT', 262144);      //\.\d+e\d+
194/** Number format to highlight floating-point numbers with support for scientific notation (E) and required leading digit */
195define('GESHI_NUMBER_FLT_SCI_ZERO', 524288);       //\d+(\.\d+)?e\d+
196//Custom formats are passed by RX array
197
198// Error detection - use these to analyse faults
199/** No sourcecode to highlight was specified
200 * @deprecated
201 */
202define('GESHI_ERROR_NO_INPUT', 1);
203/** The language specified does not exist */
204define('GESHI_ERROR_NO_SUCH_LANG', 2);
205/** GeSHi could not open a file for reading (generally a language file) */
206define('GESHI_ERROR_FILE_NOT_READABLE', 3);
207/** The header type passed to {@link GeSHi->set_header_type()} was invalid */
208define('GESHI_ERROR_INVALID_HEADER_TYPE', 4);
209/** The line number type passed to {@link GeSHi->enable_line_numbers()} was invalid */
210define('GESHI_ERROR_INVALID_LINE_NUMBER_TYPE', 5);
211/**#@-*/
212
213
214/**
215 * The GeSHi Class.
216 *
217 * Please refer to the documentation for GeSHi 1.0.X that is available
218 * at http://qbnz.com/highlighter/documentation.php for more information
219 * about how to use this class.
220 *
221 * @package   geshi
222 * @author    Nigel McNie <nigel@geshi.org>
223 * @author    Benny Baumann <BenBE@omorphia.de>
224 * @copyright (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2014 Benny Baumann
225 */
226class GeSHi {
227
228    /**
229     * The source code to highlight
230     * @var string
231     */
232    protected $source = '';
233
234    /**
235     * The language to use when highlighting
236     * @var string
237     */
238    protected $language = '';
239
240    /**
241     * The data for the language used
242     * @var array
243     */
244    protected $language_data = array();
245
246    /**
247     * The path to the language files
248     * @var string
249     */
250    protected $language_path = GESHI_LANG_ROOT;
251
252    /**
253     * The error message associated with an error
254     * @var string
255     * @todo check err reporting works
256     */
257    protected $error = false;
258
259    /**
260     * Possible error messages
261     * @var array
262     */
263    protected $error_messages = array(
264        GESHI_ERROR_NO_SUCH_LANG => 'GeSHi could not find the language {LANGUAGE} (using path {PATH})',
265        GESHI_ERROR_FILE_NOT_READABLE => 'The file specified for load_from_file was not readable',
266        GESHI_ERROR_INVALID_HEADER_TYPE => 'The header type specified is invalid',
267        GESHI_ERROR_INVALID_LINE_NUMBER_TYPE => 'The line number type specified is invalid'
268    );
269
270    /**
271     * Whether highlighting is strict or not
272     * @var boolean
273     */
274    protected $strict_mode = false;
275
276    /**
277     * Whether to use CSS classes in output
278     * @var boolean
279     */
280    protected $use_classes = false;
281
282    /**
283     * The type of header to use. Can be one of the following
284     * values:
285     *
286     * - GESHI_HEADER_PRE: Source is outputted in a "pre" HTML element.
287     * - GESHI_HEADER_DIV: Source is outputted in a "div" HTML element.
288     * - GESHI_HEADER_NONE: No header is outputted.
289     *
290     * @var int
291     */
292    protected $header_type = GESHI_HEADER_PRE;
293
294    /**
295     * Array of permissions for which lexics should be highlighted
296     * @var array
297     */
298    protected $lexic_permissions = array(
299        'KEYWORDS' =>    array(),
300        'COMMENTS' =>    array('MULTI' => true),
301        'REGEXPS' =>     array(),
302        'ESCAPE_CHAR' => true,
303        'BRACKETS' =>    true,
304        'SYMBOLS' =>     false,
305        'STRINGS' =>     true,
306        'NUMBERS' =>     true,
307        'METHODS' =>     true,
308        'SCRIPT' =>      true
309    );
310
311    /**
312     * The time it took to parse the code
313     * @var double
314     */
315    protected $time = 0;
316
317    /**
318     * The content of the header block
319     * @var string
320     */
321    protected $header_content = '';
322
323    /**
324     * The content of the footer block
325     * @var string
326     */
327    protected $footer_content = '';
328
329    /**
330     * The style of the header block
331     * @var string
332     */
333    protected $header_content_style = '';
334
335    /**
336     * The style of the footer block
337     * @var string
338     */
339    protected $footer_content_style = '';
340
341    /**
342     * Tells if a block around the highlighted source should be forced
343     * if not using line numbering
344     * @var boolean
345     */
346    protected $force_code_block = false;
347
348    /**
349     * The styles for hyperlinks in the code
350     * @var array
351     */
352    protected $link_styles = array();
353
354    /**
355     * Whether important blocks should be recognised or not
356     * @var boolean
357     * @deprecated
358     * @todo REMOVE THIS FUNCTIONALITY!
359     */
360    protected $enable_important_blocks = false;
361
362    /**
363     * Styles for important parts of the code
364     * @var string
365     * @deprecated
366     * @todo As above - rethink the whole idea of important blocks as it is buggy and
367     * will be hard to implement in 1.2
368     */
369    protected $important_styles = 'font-weight: bold; color: red;'; // Styles for important parts of the code
370
371    /**
372     * Whether CSS IDs should be added to the code
373     * @var boolean
374     */
375    protected $add_ids = false;
376
377    /**
378     * Lines that should be highlighted extra
379     * @var array
380     */
381    protected $highlight_extra_lines = array();
382
383    /**
384     * Styles of lines that should be highlighted extra
385     * @var array
386     */
387    protected $highlight_extra_lines_styles = array();
388
389    /**
390     * Styles of extra-highlighted lines
391     * @var string
392     */
393    protected $highlight_extra_lines_style = 'background-color: #ffc;';
394
395    /**
396     * The line ending
397     * If null, nl2br() will be used on the result string.
398     * Otherwise, all instances of \n will be replaced with $line_ending
399     * @var string
400     */
401    protected $line_ending = null;
402
403    /**
404     * Number at which line numbers should start at
405     * @var int
406     */
407    protected $line_numbers_start = 1;
408
409    /**
410     * The overall style for this code block
411     * @var string
412     */
413    protected $overall_style = 'font-family:monospace;';
414
415    /**
416     *  The style for the actual code
417     * @var string
418     */
419    protected $code_style = 'font: normal normal 1em/1.2em monospace; margin:0; padding:0; background:none; vertical-align:top;';
420
421    /**
422     * The overall class for this code block
423     * @var string
424     */
425    protected $overall_class = '';
426
427    /**
428     * The overall ID for this code block
429     * @var string
430     */
431    protected $overall_id = '';
432
433    /**
434     * Line number styles
435     * @var string
436     */
437    protected $line_style1 = 'font-weight: normal; vertical-align:top;';
438
439    /**
440     * Line number styles for fancy lines
441     * @var string
442     */
443    protected $line_style2 = 'font-weight: bold; vertical-align:top;';
444
445    /**
446     * Style for line numbers when GESHI_HEADER_PRE_TABLE is chosen
447     * @var string
448     */
449    protected $table_linenumber_style = 'width:1px;text-align:right;margin:0;padding:0 2px;vertical-align:top;';
450
451    /**
452     * Flag for how line numbers are displayed
453     * @var boolean
454     */
455    protected $line_numbers = GESHI_NO_LINE_NUMBERS;
456
457    /**
458     * Flag to decide if multi line spans are allowed. Set it to false to make sure
459     * each tag is closed before and reopened after each linefeed.
460     * @var boolean
461     */
462    protected $allow_multiline_span = true;
463
464    /**
465     * The "nth" value for fancy line highlighting
466     * @var int
467     */
468    protected $line_nth_row = 0;
469
470    /**
471     * The size of tab stops
472     * @var int
473     */
474    protected $tab_width = 8;
475
476    /**
477     * Should we use language-defined tab stop widths?
478     * @var int
479     */
480    protected $use_language_tab_width = false;
481
482    /**
483     * Default target for keyword links
484     * @var string
485     */
486    protected $link_target = '';
487
488    /**
489     * The encoding to use for entity encoding
490     * NOTE: Used with Escape Char Sequences to fix UTF-8 handling (cf. SF#2037598)
491     * @var string
492     */
493    protected $encoding = 'utf-8';
494
495    /**
496     * Should keywords be linked?
497     * @var boolean
498     */
499    protected $keyword_links = true;
500
501    /**
502     * Currently loaded language file
503     * @var    string
504     * @since 1.0.7.22
505     */
506    protected $loaded_language = '';
507
508    /**
509     * Wether the caches needed for parsing are built or not
510     *
511     * @var   bool
512     * @since 1.0.8
513     */
514    protected $parse_cache_built = false;
515
516    /**
517     * Work around for Suhosin Patch with disabled /e modifier
518     *
519     * Note from suhosins author in config file:
520     * <blockquote>
521     *   The /e modifier inside <code>preg_replace()</code> allows code execution.
522     *   Often it is the cause for remote code execution exploits. It is wise to
523     *   deactivate this feature and test where in the application it is used.
524     *   The developer using the /e modifier should be made aware that he should
525     *   use <code>preg_replace_callback()</code> instead
526     * </blockquote>
527     *
528     * @var   array
529     * @since 1.0.8
530     */
531    protected $_kw_replace_group = 0;
532    protected $_rx_key = 0;
533
534    /**
535     * some "callback parameters" for handle_multiline_regexps
536     *
537     * @since  1.0.8
538     * @access private
539     * @var    string
540     */
541    protected $_hmr_before = '';
542    protected $_hmr_replace = '';
543    protected $_hmr_after = '';
544    protected $_hmr_key = 0;
545
546    /**
547     * Creates a new GeSHi object, with source and language
548     *
549     * @param string $source   The source code to highlight
550     * @param string $language The language to highlight the source with
551     * @param string $path     The path to the language file directory. <b>This
552     *               is deprecated!</b> I've backported the auto path
553     *               detection from the 1.1.X dev branch, so now it
554     *               should be automatically set correctly. If you have
555     *               renamed the language directory however, you will
556     *               still need to set the path using this parameter or
557     *               {@link GeSHi->set_language_path()}
558     * @since 1.0.0
559     */
560    public function __construct($source = '', $language = '', $path = '') {
561        if ( is_string($source) && ($source !== '') ) {
562            $this->set_source($source);
563        }
564        if ( is_string($language) && ($language !== '') ) {
565            $this->set_language($language);
566        }
567        $this->set_language_path($path);
568    }
569
570    /**
571     * Returns the version of GeSHi
572     *
573     * @return string
574     * @since  1.0.8.11
575     */
576    public function get_version()
577    {
578        return GESHI_VERSION;
579    }
580
581    /**
582     * Returns an error message associated with the last GeSHi operation,
583     * or false if no error has occurred
584     *
585     * @return string|false An error message if there has been an error, else false
586     * @since  1.0.0
587     */
588    public function error() {
589        if ($this->error) {
590            //Put some template variables for debugging here ...
591            $debug_tpl_vars = array(
592                '{LANGUAGE}' => $this->language,
593                '{PATH}' => $this->language_path
594            );
595            $msg = str_replace(
596                array_keys($debug_tpl_vars),
597                array_values($debug_tpl_vars),
598                $this->error_messages[$this->error]);
599
600            return "<br /><strong>GeSHi Error:</strong> $msg (code {$this->error})<br />";
601        }
602        return false;
603    }
604
605    /**
606     * Gets a human-readable language name (thanks to Simon Patterson
607     * for the idea :))
608     *
609     * @return string The name for the current language
610     * @since  1.0.2
611     */
612    public function get_language_name() {
613        if (GESHI_ERROR_NO_SUCH_LANG == $this->error) {
614            return $this->language_data['LANG_NAME'] . ' (Unknown Language)';
615        }
616        return $this->language_data['LANG_NAME'];
617    }
618
619    /**
620     * Sets the source code for this object
621     *
622     * @param string $source The source code to highlight
623     * @since 1.0.0
624     */
625    public function set_source($source) {
626        $this->source = $source;
627        $this->highlight_extra_lines = array();
628    }
629
630    /**
631     * Sets the language for this object
632     *
633     * @note since 1.0.8 this function won't reset language-settings by default anymore!
634     *       if you need this set $force_reset = true
635     *
636     * @param string $language    The name of the language to use
637     * @param bool   $force_reset
638     * @since 1.0.0
639     */
640    public function set_language($language, $force_reset = false) {
641        $this->error = false;
642        $this->strict_mode = GESHI_NEVER;
643
644        if ($force_reset) {
645            $this->loaded_language = false;
646        }
647
648        //Clean up the language name to prevent malicious code injection
649        $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language);
650
651        $language = strtolower($language);
652
653        //Retreive the full filename
654        $file_name = $this->language_path . $language . '.php';
655        if ($file_name == $this->loaded_language) {
656            // this language is already loaded!
657            return;
658        }
659
660        $this->language = $language;
661
662        //Check if we can read the desired file
663        if (!is_readable($file_name)) {
664            $this->error = GESHI_ERROR_NO_SUCH_LANG;
665            return;
666        }
667
668        // Load the language for parsing
669        $this->load_language($file_name);
670    }
671
672    /**
673     * Sets the path to the directory containing the language files. Note
674     * that this path is relative to the directory of the script that included
675     * geshi.php, NOT geshi.php itself.
676     *
677     * @param string $path The path to the language directory
678     * @since 1.0.0
679     * @deprecated The path to the language files should now be automatically
680     *             detected, so this method should no longer be needed. The
681     *             1.1.X branch handles manual setting of the path differently
682     *             so this method will disappear in 1.2.0.
683     */
684    public function set_language_path($path) {
685        if(strpos($path,':')) {
686            //Security Fix to prevent external directories using fopen wrappers.
687            if(DIRECTORY_SEPARATOR == "\\") {
688                if(!preg_match('#^[a-zA-Z]:#', $path) || false !== strpos($path, ':', 2)) {
689                    return;
690                }
691            } else {
692                return;
693            }
694        }
695        if(preg_match('#[^/a-zA-Z0-9_\.\-\\\s:]#', $path)) {
696            //Security Fix to prevent external directories using fopen wrappers.
697            return;
698        }
699        if(GESHI_SECURITY_PARANOID && false !== strpos($path, '/.')) {
700            //Security Fix to prevent external directories using fopen wrappers.
701            return;
702        }
703        if(GESHI_SECURITY_PARANOID && false !== strpos($path, '..')) {
704            //Security Fix to prevent external directories using fopen wrappers.
705            return;
706        }
707        if ($path) {
708            $this->language_path = ('/' == $path[strlen($path) - 1]) ? $path : $path . '/';
709            $this->set_language($this->language); // otherwise set_language_path has no effect
710        }
711    }
712
713    /**
714     * Get supported langs or an associative array lang=>full_name.
715     * @param boolean $full_names
716     * @return array
717     */
718    public function get_supported_languages($full_names=false)
719    {
720        // return array
721        $back = array();
722
723        // we walk the lang root
724        $dir = dir($this->language_path);
725
726        // foreach entry
727        while (false !== ($entry = $dir->read()))
728        {
729            $full_path = $this->language_path.$entry;
730
731            // Skip all dirs
732            if (is_dir($full_path)) {
733                continue;
734            }
735
736            // we only want lang.php files
737            if (!preg_match('/^([^.]+)\.php$/', $entry, $matches)) {
738                continue;
739            }
740
741            // Raw lang name is here
742            $langname = $matches[1];
743
744            // We want the fullname too?
745            if ($full_names === true)
746            {
747                if (false !== ($fullname = $this->get_language_fullname($langname)))
748                {
749                    $back[$langname] = $fullname; // we go associative
750                }
751            }
752            else
753            {
754                // just store raw langname
755                $back[] = $langname;
756            }
757        }
758
759        $dir->close();
760
761        return $back;
762    }
763
764    /**
765     * Get full_name for a lang or false.
766     * @param string $language short langname (html4strict for example)
767     * @return mixed
768     */
769    public function get_language_fullname($language)
770    {
771        //Clean up the language name to prevent malicious code injection
772        $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language);
773
774        $language = strtolower($language);
775
776        // get fullpath-filename for a langname
777        $fullpath = $this->language_path.$language.'.php';
778
779        // we need to get contents :S
780        if (false === ($data = file_get_contents($fullpath))) {
781            $this->error = sprintf('Geshi::get_lang_fullname() Unknown Language: %s', $language);
782            return false;
783        }
784
785        // match the langname
786        if (!preg_match('/\'LANG_NAME\'\s*=>\s*\'((?:[^\']|\\\')+?)\'/', $data, $matches)) {
787            $this->error = sprintf('Geshi::get_lang_fullname(%s): Regex can not detect language', $language);
788            return false;
789        }
790
791        // return fullname for langname
792        return stripcslashes($matches[1]);
793    }
794
795    /**
796     * Sets the type of header to be used.
797     *
798     * If GESHI_HEADER_DIV is used, the code is surrounded in a "div".This
799     * means more source code but more control over tab width and line-wrapping.
800     * GESHI_HEADER_PRE means that a "pre" is used - less source, but less
801     * control. Default is GESHI_HEADER_PRE.
802     *
803     * From 1.0.7.2, you can use GESHI_HEADER_NONE to specify that no header code
804     * should be outputted.
805     *
806     * @param int $type The type of header to be used
807     * @since 1.0.0
808     */
809    public function set_header_type($type) {
810        //Check if we got a valid header type
811        if (!in_array($type, array(GESHI_HEADER_NONE, GESHI_HEADER_DIV,
812            GESHI_HEADER_PRE, GESHI_HEADER_PRE_VALID, GESHI_HEADER_PRE_TABLE))) {
813            $this->error = GESHI_ERROR_INVALID_HEADER_TYPE;
814            return;
815        }
816
817        //Set that new header type
818        $this->header_type = $type;
819    }
820
821    /**
822     * Sets the styles for the code that will be outputted
823     * when this object is parsed. The style should be a
824     * string of valid stylesheet declarations
825     *
826     * @param string  $style             The overall style for the outputted code block
827     * @param boolean $preserve_defaults Whether to merge the styles with the current styles or not
828     * @since 1.0.0
829     */
830    public function set_overall_style($style, $preserve_defaults = false) {
831        if (!$preserve_defaults) {
832            $this->overall_style = $style;
833        } else {
834            $this->overall_style .= $style;
835        }
836    }
837
838    /**
839     * Sets the overall classname for this block of code. This
840     * class can then be used in a stylesheet to style this object's
841     * output
842     *
843     * @param string $class The class name to use for this block of code
844     * @since 1.0.0
845     */
846    public function set_overall_class($class) {
847        $this->overall_class = $class;
848    }
849
850    /**
851     * Sets the overall id for this block of code. This id can then
852     * be used in a stylesheet to style this object's output
853     *
854     * @param string $id The ID to use for this block of code
855     * @since 1.0.0
856     */
857    public function set_overall_id($id) {
858        $this->overall_id = $id;
859    }
860
861    /**
862     * Sets whether CSS classes should be used to highlight the source. Default
863     * is off, calling this method with no arguments will turn it on
864     *
865     * @param boolean $flag Whether to turn classes on or not
866     * @since 1.0.0
867     */
868    public function enable_classes($flag = true) {
869        $this->use_classes = ($flag) ? true : false;
870    }
871
872    /**
873     * Sets the style for the actual code. This should be a string
874     * containing valid stylesheet declarations. If $preserve_defaults is
875     * true, then styles are merged with the default styles, with the
876     * user defined styles having priority
877     *
878     * Note: Use this method to override any style changes you made to
879     * the line numbers if you are using line numbers, else the line of
880     * code will have the same style as the line number! Consult the
881     * GeSHi documentation for more information about this.
882     *
883     * @param string  $style             The style to use for actual code
884     * @param boolean $preserve_defaults Whether to merge the current styles with the new styles
885     * @since 1.0.2
886     */
887    public function set_code_style($style, $preserve_defaults = false) {
888        if (!$preserve_defaults) {
889            $this->code_style = $style;
890        } else {
891            $this->code_style .= $style;
892        }
893    }
894
895    /**
896     * Sets the styles for the line numbers.
897     *
898     * @param string         $style1 The style for the line numbers that are "normal"
899     * @param string|boolean $style2 If a string, this is the style of the line
900     *        numbers that are "fancy", otherwise if boolean then this
901     *        defines whether the normal styles should be merged with the
902     *        new normal styles or not
903     * @param boolean        $preserve_defaults If set, is the flag for whether to merge the "fancy"
904     *        styles with the current styles or not
905     * @since 1.0.2
906     */
907    public function set_line_style($style1, $style2 = '', $preserve_defaults = false) {
908        //Check if we got 2 or three parameters
909        if (is_bool($style2)) {
910            $preserve_defaults = $style2;
911            $style2 = '';
912        }
913
914        //Actually set the new styles
915        if (!$preserve_defaults) {
916            $this->line_style1 = $style1;
917            $this->line_style2 = $style2;
918        } else {
919            $this->line_style1 .= $style1;
920            $this->line_style2 .= $style2;
921        }
922    }
923
924    /**
925     * Sets whether line numbers should be displayed.
926     *
927     * Valid values for the first parameter are:
928     *
929     *  - GESHI_NO_LINE_NUMBERS: Line numbers will not be displayed
930     *  - GESHI_NORMAL_LINE_NUMBERS: Line numbers will be displayed
931     *  - GESHI_FANCY_LINE_NUMBERS: Fancy line numbers will be displayed
932     *
933     * For fancy line numbers, the second parameter is used to signal which lines
934     * are to be fancy. For example, if the value of this parameter is 5 then every
935     * 5th line will be fancy.
936     *
937     * @param int $flag    How line numbers should be displayed
938     * @param int $nth_row Defines which lines are fancy
939     * @since 1.0.0
940     */
941    public function enable_line_numbers($flag, $nth_row = 5) {
942        if (GESHI_NO_LINE_NUMBERS != $flag && GESHI_NORMAL_LINE_NUMBERS != $flag
943            && GESHI_FANCY_LINE_NUMBERS != $flag) {
944            $this->error = GESHI_ERROR_INVALID_LINE_NUMBER_TYPE;
945        }
946        $this->line_numbers = $flag;
947        $this->line_nth_row = $nth_row;
948    }
949
950    /**
951     * Sets wether spans and other HTML markup generated by GeSHi can
952     * span over multiple lines or not. Defaults to true to reduce overhead.
953     * Set it to false if you want to manipulate the output or manually display
954     * the code in an ordered list.
955     *
956     * @param boolean $flag Wether multiline spans are allowed or not
957     * @since 1.0.7.22
958     */
959    public function enable_multiline_span($flag) {
960        $this->allow_multiline_span = (bool) $flag;
961    }
962
963    /**
964     * Get current setting for multiline spans, see GeSHi->enable_multiline_span().
965     *
966     * @see enable_multiline_span
967     * @return bool
968     */
969    public function get_multiline_span() {
970        return $this->allow_multiline_span;
971    }
972
973    /**
974     * Sets the style for a keyword group. If $preserve_defaults is
975     * true, then styles are merged with the default styles, with the
976     * user defined styles having priority
977     *
978     * @param int     $key               The key of the keyword group to change the styles of
979     * @param string  $style             The style to make the keywords
980     * @param boolean $preserve_defaults Whether to merge the new styles with the old or just
981     *                to overwrite them
982     * @since 1.0.0
983     */
984    public function set_keyword_group_style($key, $style, $preserve_defaults = false) {
985        //Set the style for this keyword group
986        if('*' == $key) {
987            foreach($this->language_data['STYLES']['KEYWORDS'] as $_key => $_value) {
988                if (!$preserve_defaults) {
989                    $this->language_data['STYLES']['KEYWORDS'][$_key] = $style;
990                } else {
991                    $this->language_data['STYLES']['KEYWORDS'][$_key] .= $style;
992                }
993            }
994        } else {
995            if (!$preserve_defaults) {
996                $this->language_data['STYLES']['KEYWORDS'][$key] = $style;
997            } else {
998                $this->language_data['STYLES']['KEYWORDS'][$key] .= $style;
999            }
1000        }
1001
1002        //Update the lexic permissions
1003        if (!isset($this->lexic_permissions['KEYWORDS'][$key])) {
1004            $this->lexic_permissions['KEYWORDS'][$key] = true;
1005        }
1006    }
1007
1008    /**
1009     * Turns highlighting on/off for a keyword group
1010     *
1011     * @param int     $key  The key of the keyword group to turn on or off
1012     * @param boolean $flag Whether to turn highlighting for that group on or off
1013     * @since 1.0.0
1014     */
1015    public function set_keyword_group_highlighting($key, $flag = true) {
1016        $this->lexic_permissions['KEYWORDS'][$key] = ($flag) ? true : false;
1017    }
1018
1019    /**
1020     * Sets the styles for comment groups.  If $preserve_defaults is
1021     * true, then styles are merged with the default styles, with the
1022     * user defined styles having priority
1023     *
1024     * @param int     $key               The key of the comment group to change the styles of
1025     * @param string  $style             The style to make the comments
1026     * @param boolean $preserve_defaults Whether to merge the new styles with the old or just
1027     *                to overwrite them
1028     * @since 1.0.0
1029     */
1030    public function set_comments_style($key, $style, $preserve_defaults = false) {
1031        if('*' == $key) {
1032            foreach($this->language_data['STYLES']['COMMENTS'] as $_key => $_value) {
1033                if (!$preserve_defaults) {
1034                    $this->language_data['STYLES']['COMMENTS'][$_key] = $style;
1035                } else {
1036                    $this->language_data['STYLES']['COMMENTS'][$_key] .= $style;
1037                }
1038            }
1039        } else {
1040            if (!$preserve_defaults) {
1041                $this->language_data['STYLES']['COMMENTS'][$key] = $style;
1042            } else {
1043                $this->language_data['STYLES']['COMMENTS'][$key] .= $style;
1044            }
1045        }
1046    }
1047
1048    /**
1049     * Turns highlighting on/off for comment groups
1050     *
1051     * @param int     $key  The key of the comment group to turn on or off
1052     * @param boolean $flag Whether to turn highlighting for that group on or off
1053     * @since 1.0.0
1054     */
1055    public function set_comments_highlighting($key, $flag = true) {
1056        $this->lexic_permissions['COMMENTS'][$key] = ($flag) ? true : false;
1057    }
1058
1059    /**
1060     * Sets the styles for escaped characters. If $preserve_defaults is
1061     * true, then styles are merged with the default styles, with the
1062     * user defined styles having priority
1063     *
1064     * @param string  $style             The style to make the escape characters
1065     * @param boolean $preserve_defaults Whether to merge the new styles with the old or just
1066     *                                   to overwrite them
1067     * @param int     $group             Tells the group of symbols for which style should be set.
1068     * @since 1.0.0
1069     */
1070    public function set_escape_characters_style($style, $preserve_defaults = false, $group = 0) {
1071        if (!$preserve_defaults) {
1072            $this->language_data['STYLES']['ESCAPE_CHAR'][$group] = $style;
1073        } else {
1074            $this->language_data['STYLES']['ESCAPE_CHAR'][$group] .= $style;
1075        }
1076    }
1077
1078    /**
1079     * Turns highlighting on/off for escaped characters
1080     *
1081     * @param boolean $flag Whether to turn highlighting for escape characters on or off
1082     * @since 1.0.0
1083     */
1084    public function set_escape_characters_highlighting($flag = true) {
1085        $this->lexic_permissions['ESCAPE_CHAR'] = ($flag) ? true : false;
1086    }
1087
1088    /**
1089     * Sets the styles for brackets. If $preserve_defaults is
1090     * true, then styles are merged with the default styles, with the
1091     * user defined styles having priority
1092     *
1093     * This method is DEPRECATED: use set_symbols_style instead.
1094     * This method will be removed in 1.2.X
1095     *
1096     * @param string  $style             The style to make the brackets
1097     * @param boolean $preserve_defaults Whether to merge the new styles with the old or just
1098     *                to overwrite them
1099     * @since 1.0.0
1100     * @deprecated In favour of set_symbols_style
1101     */
1102    public function set_brackets_style($style, $preserve_defaults = false) {
1103        if (!$preserve_defaults) {
1104            $this->language_data['STYLES']['BRACKETS'][0] = $style;
1105        } else {
1106            $this->language_data['STYLES']['BRACKETS'][0] .= $style;
1107        }
1108    }
1109
1110    /**
1111     * Turns highlighting on/off for brackets
1112     *
1113     * This method is DEPRECATED: use set_symbols_highlighting instead.
1114     * This method will be remove in 1.2.X
1115     *
1116     * @param boolean $flag Whether to turn highlighting for brackets on or off
1117     * @since 1.0.0
1118     * @deprecated In favour of set_symbols_highlighting
1119     */
1120    public function set_brackets_highlighting($flag) {
1121        $this->lexic_permissions['BRACKETS'] = ($flag) ? true : false;
1122    }
1123
1124    /**
1125     * Sets the styles for symbols. If $preserve_defaults is
1126     * true, then styles are merged with the default styles, with the
1127     * user defined styles having priority
1128     *
1129     * @param string  $style             The style to make the symbols
1130     * @param boolean $preserve_defaults Whether to merge the new styles with the old or just
1131     *                                   to overwrite them
1132     * @param int     $group             Tells the group of symbols for which style should be set.
1133     * @since 1.0.1
1134     */
1135    public function set_symbols_style($style, $preserve_defaults = false, $group = 0) {
1136        // Update the style of symbols
1137        if (!$preserve_defaults) {
1138            $this->language_data['STYLES']['SYMBOLS'][$group] = $style;
1139        } else {
1140            $this->language_data['STYLES']['SYMBOLS'][$group] .= $style;
1141        }
1142
1143        // For backward compatibility
1144        if (0 == $group) {
1145            $this->set_brackets_style ($style, $preserve_defaults);
1146        }
1147    }
1148
1149    /**
1150     * Turns highlighting on/off for symbols
1151     *
1152     * @param boolean $flag Whether to turn highlighting for symbols on or off
1153     * @since 1.0.0
1154     */
1155    public function set_symbols_highlighting($flag) {
1156        // Update lexic permissions for this symbol group
1157        $this->lexic_permissions['SYMBOLS'] = ($flag) ? true : false;
1158
1159        // For backward compatibility
1160        $this->set_brackets_highlighting ($flag);
1161    }
1162
1163    /**
1164     * Sets the styles for strings. If $preserve_defaults is
1165     * true, then styles are merged with the default styles, with the
1166     * user defined styles having priority
1167     *
1168     * @param string  $style             The style to make the escape characters
1169     * @param boolean $preserve_defaults Whether to merge the new styles with the old or just
1170     *                                   to overwrite them
1171     * @param int     $group             Tells the group of strings for which style should be set.
1172     * @since 1.0.0
1173     */
1174    public function set_strings_style($style, $preserve_defaults = false, $group = 0) {
1175        if (!$preserve_defaults) {
1176            $this->language_data['STYLES']['STRINGS'][$group] = $style;
1177        } else {
1178            $this->language_data['STYLES']['STRINGS'][$group] .= $style;
1179        }
1180    }
1181
1182    /**
1183     * Turns highlighting on/off for strings
1184     *
1185     * @param boolean $flag Whether to turn highlighting for strings on or off
1186     * @since 1.0.0
1187     */
1188    public function set_strings_highlighting($flag) {
1189        $this->lexic_permissions['STRINGS'] = ($flag) ? true : false;
1190    }
1191
1192    /**
1193     * Sets the styles for strict code blocks. If $preserve_defaults is
1194     * true, then styles are merged with the default styles, with the
1195     * user defined styles having priority
1196     *
1197     * @param string  $style             The style to make the script blocks
1198     * @param boolean $preserve_defaults Whether to merge the new styles with the old or just
1199     *                                   to overwrite them
1200     * @param int     $group             Tells the group of script blocks for which style should be set.
1201     * @since 1.0.8.4
1202     */
1203    public function set_script_style($style, $preserve_defaults = false, $group = 0) {
1204        // Update the style of symbols
1205        if (!$preserve_defaults) {
1206            $this->language_data['STYLES']['SCRIPT'][$group] = $style;
1207        } else {
1208            $this->language_data['STYLES']['SCRIPT'][$group] .= $style;
1209        }
1210    }
1211
1212    /**
1213     * Sets the styles for numbers. If $preserve_defaults is
1214     * true, then styles are merged with the default styles, with the
1215     * user defined styles having priority
1216     *
1217     * @param string  $style             The style to make the numbers
1218     * @param boolean $preserve_defaults Whether to merge the new styles with the old or just
1219     *                                   to overwrite them
1220     * @param int     $group             Tells the group of numbers for which style should be set.
1221     * @since 1.0.0
1222     */
1223    public function set_numbers_style($style, $preserve_defaults = false, $group = 0) {
1224        if (!$preserve_defaults) {
1225            $this->language_data['STYLES']['NUMBERS'][$group] = $style;
1226        } else {
1227            $this->language_data['STYLES']['NUMBERS'][$group] .= $style;
1228        }
1229    }
1230
1231    /**
1232     * Turns highlighting on/off for numbers
1233     *
1234     * @param boolean $flag Whether to turn highlighting for numbers on or off
1235     * @since 1.0.0
1236     */
1237    public function set_numbers_highlighting($flag) {
1238        $this->lexic_permissions['NUMBERS'] = ($flag) ? true : false;
1239    }
1240
1241    /**
1242     * Sets the styles for methods. $key is a number that references the
1243     * appropriate "object splitter" - see the language file for the language
1244     * you are highlighting to get this number. If $preserve_defaults is
1245     * true, then styles are merged with the default styles, with the
1246     * user defined styles having priority
1247     *
1248     * @param int     $key               The key of the object splitter to change the styles of
1249     * @param string  $style             The style to make the methods
1250     * @param boolean $preserve_defaults Whether to merge the new styles with the old or just
1251     *                                   to overwrite them
1252     * @since 1.0.0
1253     */
1254    public function set_methods_style($key, $style, $preserve_defaults = false) {
1255        if (!$preserve_defaults) {
1256            $this->language_data['STYLES']['METHODS'][$key] = $style;
1257        } else {
1258            $this->language_data['STYLES']['METHODS'][$key] .= $style;
1259        }
1260    }
1261
1262    /**
1263     * Turns highlighting on/off for methods
1264     *
1265     * @param boolean $flag Whether to turn highlighting for methods on or off
1266     * @since 1.0.0
1267     */
1268    public function set_methods_highlighting($flag) {
1269        $this->lexic_permissions['METHODS'] = ($flag) ? true : false;
1270    }
1271
1272    /**
1273     * Sets the styles for regexps. If $preserve_defaults is
1274     * true, then styles are merged with the default styles, with the
1275     * user defined styles having priority
1276     *
1277     * @param string  $key               The style to make the regular expression matches
1278     * @param boolean $style             Whether to merge the new styles with the old or just
1279     *                                   to overwrite them
1280     * @param bool    $preserve_defaults Whether to merge the new styles with the old or just
1281     *                                to overwrite them
1282     * @since 1.0.0
1283     */
1284    public function set_regexps_style($key, $style, $preserve_defaults = false) {
1285        if (!$preserve_defaults) {
1286            $this->language_data['STYLES']['REGEXPS'][$key] = $style;
1287        } else {
1288            $this->language_data['STYLES']['REGEXPS'][$key] .= $style;
1289        }
1290    }
1291
1292    /**
1293     * Turns highlighting on/off for regexps
1294     *
1295     * @param int     $key  The key of the regular expression group to turn on or off
1296     * @param boolean $flag Whether to turn highlighting for the regular expression group on or off
1297     * @since 1.0.0
1298     */
1299    public function set_regexps_highlighting($key, $flag) {
1300        $this->lexic_permissions['REGEXPS'][$key] = ($flag) ? true : false;
1301    }
1302
1303    /**
1304     * Sets whether a set of keywords are checked for in a case sensitive manner
1305     *
1306     * @param int     $key  The key of the keyword group to change the case sensitivity of
1307     * @param boolean $case Whether to check in a case sensitive manner or not
1308     * @since 1.0.0
1309     */
1310    public function set_case_sensitivity($key, $case) {
1311        $this->language_data['CASE_SENSITIVE'][$key] = ($case) ? true : false;
1312    }
1313
1314    /**
1315     * Sets the case that keywords should use when found. Use the constants:
1316     *
1317     *  - GESHI_CAPS_NO_CHANGE: leave keywords as-is
1318     *  - GESHI_CAPS_UPPER: convert all keywords to uppercase where found
1319     *  - GESHI_CAPS_LOWER: convert all keywords to lowercase where found
1320     *
1321     * @param int $case A constant specifying what to do with matched keywords
1322     * @since 1.0.1
1323     */
1324    public function set_case_keywords($case) {
1325        if (in_array($case, array(
1326            GESHI_CAPS_NO_CHANGE, GESHI_CAPS_UPPER, GESHI_CAPS_LOWER))) {
1327            $this->language_data['CASE_KEYWORDS'] = $case;
1328        }
1329    }
1330
1331    /**
1332     * Sets how many spaces a tab is substituted for
1333     *
1334     * Widths below zero are ignored
1335     *
1336     * @param int $width The tab width
1337     * @since 1.0.0
1338     */
1339    public function set_tab_width($width) {
1340        $this->tab_width = intval($width);
1341
1342        //Check if it fit's the constraints:
1343        if ($this->tab_width < 1) {
1344            //Return it to the default
1345            $this->tab_width = 8;
1346        }
1347    }
1348
1349    /**
1350     * Sets whether or not to use tab-stop width specifed by language
1351     *
1352     * @param boolean $use Whether to use language-specific tab-stop widths
1353     * @since 1.0.7.20
1354     */
1355    public function set_use_language_tab_width($use) {
1356        $this->use_language_tab_width = (bool) $use;
1357    }
1358
1359    /**
1360     * Returns the tab width to use, based on the current language and user
1361     * preference
1362     *
1363     * @return int Tab width
1364     * @since 1.0.7.20
1365     */
1366    public function get_real_tab_width() {
1367        if (!$this->use_language_tab_width ||
1368            !isset($this->language_data['TAB_WIDTH'])) {
1369            return $this->tab_width;
1370        } else {
1371            return $this->language_data['TAB_WIDTH'];
1372        }
1373    }
1374
1375    /**
1376     * Enables/disables strict highlighting. Default is off, calling this
1377     * method without parameters will turn it on. See documentation
1378     * for more details on strict mode and where to use it.
1379     *
1380     * @param boolean $mode Whether to enable strict mode or not
1381     * @since 1.0.0
1382     */
1383    public function enable_strict_mode($mode = true) {
1384        if (GESHI_MAYBE == $this->language_data['STRICT_MODE_APPLIES']) {
1385            $this->strict_mode = ($mode) ? GESHI_ALWAYS : GESHI_NEVER;
1386        }
1387    }
1388
1389    /**
1390     * Disables all highlighting
1391     *
1392     * @since 1.0.0
1393     * @todo  Rewrite with array traversal
1394     * @deprecated In favour of enable_highlighting
1395     */
1396    public function disable_highlighting() {
1397        $this->enable_highlighting(false);
1398    }
1399
1400    /**
1401     * Enables all highlighting
1402     *
1403     * The optional flag parameter was added in version 1.0.7.21 and can be used
1404     * to enable (true) or disable (false) all highlighting.
1405     *
1406     * @since 1.0.0
1407     * @param boolean $flag A flag specifying whether to enable or disable all highlighting
1408     * @todo  Rewrite with array traversal
1409     */
1410    public function enable_highlighting($flag = true) {
1411        $flag = $flag ? true : false;
1412        foreach ($this->lexic_permissions as $key => $value) {
1413            if (is_array($value)) {
1414                foreach ($value as $k => $v) {
1415                    $this->lexic_permissions[$key][$k] = $flag;
1416                }
1417            } else {
1418                $this->lexic_permissions[$key] = $flag;
1419            }
1420        }
1421
1422        // Context blocks
1423        $this->enable_important_blocks = $flag;
1424    }
1425
1426    /**
1427     * Given a file extension, this method returns either a valid geshi language
1428     * name, or the empty string if it couldn't be found
1429     *
1430     * @param string $extension The extension to get a language name for
1431     * @param array  $lookup    A lookup array to use instead of the default one
1432     * @since 1.0.5
1433     * @todo Re-think about how this method works (maybe make it private and/or make it
1434     *       a extension->lang lookup?)
1435     * @return int|string
1436     */
1437    public static function get_language_name_from_extension( $extension, $lookup = array() ) {
1438        $extension = strtolower($extension);
1439
1440        if ( !is_array($lookup) || empty($lookup)) {
1441            $lookup = array(
1442                '6502acme' => array( 'a', 's', 'asm', 'inc' ),
1443                '6502tasm' => array( 'a', 's', 'asm', 'inc' ),
1444                '6502kickass' => array( 'a', 's', 'asm', 'inc' ),
1445                '68000devpac' => array( 'a', 's', 'asm', 'inc' ),
1446                'abap' => array('abap'),
1447                'actionscript' => array('as'),
1448                'ada' => array('a', 'ada', 'adb', 'ads'),
1449                'apache' => array('conf'),
1450                'asm' => array('ash', 'asm', 'inc'),
1451                'asp' => array('asp'),
1452                'bash' => array('sh'),
1453                'bf' => array('bf'),
1454                'c' => array('c', 'h'),
1455                'c_mac' => array('c', 'h'),
1456                'caddcl' => array(),
1457                'cadlisp' => array(),
1458                'cdfg' => array('cdfg'),
1459                'cobol' => array('cbl'),
1460                'cpp' => array('cpp', 'hpp', 'C', 'H', 'CPP', 'HPP'),
1461                'csharp' => array('cs'),
1462                'css' => array('css'),
1463                'd' => array('d'),
1464                'delphi' => array('dpk', 'dpr', 'pp', 'pas'),
1465                'diff' => array('diff', 'patch'),
1466                'dos' => array('bat', 'cmd'),
1467                'gdb' => array('kcrash', 'crash', 'bt'),
1468                'gettext' => array('po', 'pot'),
1469                'gml' => array('gml'),
1470                'gnuplot' => array('plt'),
1471                'groovy' => array('groovy'),
1472                'haskell' => array('hs'),
1473                'haxe' => array('hx'),
1474                'html4strict' => array('html', 'htm'),
1475                'ini' => array('ini', 'desktop'),
1476                'java' => array('java'),
1477                'javascript' => array('js'),
1478                'klonec' => array('kl1'),
1479                'klonecpp' => array('klx'),
1480                'latex' => array('tex'),
1481                'lisp' => array('lisp'),
1482                'lua' => array('lua'),
1483                'matlab' => array('m'),
1484                'mpasm' => array(),
1485                'mysql' => array('sql'),
1486                'nsis' => array(),
1487                'objc' => array(),
1488                'oobas' => array(),
1489                'oracle8' => array(),
1490                'oracle10' => array(),
1491                'pascal' => array('pas'),
1492                'perl' => array('pl', 'pm'),
1493                'php' => array('php', 'php5', 'phtml', 'phps'),
1494                'povray' => array('pov'),
1495                'providex' => array('pvc', 'pvx'),
1496                'prolog' => array('pl'),
1497                'python' => array('py'),
1498                'qbasic' => array('bi'),
1499                'reg' => array('reg'),
1500                'ruby' => array('rb'),
1501                'sas' => array('sas'),
1502                'scala' => array('scala'),
1503                'scheme' => array('scm'),
1504                'scilab' => array('sci'),
1505                'smalltalk' => array('st'),
1506                'smarty' => array(),
1507                'tcl' => array('tcl'),
1508                'text' => array('txt'),
1509                'vb' => array('bas'),
1510                'vbnet' => array(),
1511                'visualfoxpro' => array(),
1512                'whitespace' => array('ws'),
1513                'xml' => array('xml', 'svg', 'xrc'),
1514                'z80' => array('z80', 'asm', 'inc')
1515            );
1516        }
1517
1518        foreach ($lookup as $lang => $extensions) {
1519            if (in_array($extension, $extensions)) {
1520                return $lang;
1521            }
1522        }
1523
1524        return 'text';
1525    }
1526
1527    /**
1528     * Given a file name, this method loads its contents in, and attempts
1529     * to set the language automatically. An optional lookup table can be
1530     * passed for looking up the language name. If not specified a default
1531     * table is used
1532     *
1533     * The language table is in the form
1534     * <pre>array(
1535     *   'lang_name' => array('extension', 'extension', ...),
1536     *   'lang_name' ...
1537     * );</pre>
1538     *
1539     * @param string $file_name The filename to load the source from
1540     * @param array  $lookup    A lookup array to use instead of the default one
1541     * @todo Complete rethink of this and above method
1542     * @since 1.0.5
1543     */
1544    public function load_from_file($file_name, $lookup = array()) {
1545        if (is_readable($file_name)) {
1546            $this->set_source(file_get_contents($file_name));
1547            $this->set_language(self::get_language_name_from_extension(substr(strrchr($file_name, '.'), 1), $lookup));
1548        } else {
1549            $this->error = GESHI_ERROR_FILE_NOT_READABLE;
1550        }
1551    }
1552
1553    /**
1554     * Adds a keyword to a keyword group for highlighting
1555     *
1556     * @param int    $key  The key of the keyword group to add the keyword to
1557     * @param string $word The word to add to the keyword group
1558     * @since 1.0.0
1559     */
1560    public function add_keyword($key, $word) {
1561        if (!is_array($this->language_data['KEYWORDS'][$key])) {
1562            $this->language_data['KEYWORDS'][$key] = array();
1563        }
1564        if (!in_array($word, $this->language_data['KEYWORDS'][$key])) {
1565            $this->language_data['KEYWORDS'][$key][] = $word;
1566
1567            //NEW in 1.0.8 don't recompile the whole optimized regexp, simply append it
1568            if ($this->parse_cache_built) {
1569                $subkey = count($this->language_data['CACHED_KEYWORD_LISTS'][$key]) - 1;
1570                $this->language_data['CACHED_KEYWORD_LISTS'][$key][$subkey] .= '|' . preg_quote($word, '/');
1571            }
1572        }
1573    }
1574
1575    /**
1576     * Removes a keyword from a keyword group
1577     *
1578     * @param int    $key       The key of the keyword group to remove the keyword from
1579     * @param string $word      The word to remove from the keyword group
1580     * @param bool   $recompile Wether to automatically recompile the optimized regexp list or not.
1581     *               Note: if you set this to false and @see GeSHi->parse_code() was already called once,
1582     *               for the current language, you have to manually call @see GeSHi->optimize_keyword_group()
1583     *               or the removed keyword will stay in cache and still be highlighted! On the other hand
1584     *               it might be too expensive to recompile the regexp list for every removal if you want to
1585     *               remove a lot of keywords.
1586     * @since 1.0.0
1587     */
1588    public function remove_keyword($key, $word, $recompile = true) {
1589        $key_to_remove = array_search($word, $this->language_data['KEYWORDS'][$key]);
1590        if ($key_to_remove !== false) {
1591            unset($this->language_data['KEYWORDS'][$key][$key_to_remove]);
1592
1593            //NEW in 1.0.8, optionally recompile keyword group
1594            if ($recompile && $this->parse_cache_built) {
1595                $this->optimize_keyword_group($key);
1596            }
1597        }
1598    }
1599
1600    /**
1601     * Creates a new keyword group
1602     *
1603     * @param int     $key            The key of the keyword group to create
1604     * @param string  $styles         The styles for the keyword group
1605     * @param boolean $case_sensitive Whether the keyword group is case sensitive ornot
1606     * @param array   $words          The words to use for the keyword group
1607     * @since 1.0.0
1608     * @return bool
1609     */
1610    public function add_keyword_group($key, $styles, $case_sensitive = true, $words = array()) {
1611        $words = (array) $words;
1612        if  (empty($words)) {
1613            // empty word lists mess up highlighting
1614            return false;
1615        }
1616
1617        //Add the new keyword group internally
1618        $this->language_data['KEYWORDS'][$key] = $words;
1619        $this->lexic_permissions['KEYWORDS'][$key] = true;
1620        $this->language_data['CASE_SENSITIVE'][$key] = $case_sensitive;
1621        $this->language_data['STYLES']['KEYWORDS'][$key] = $styles;
1622
1623        //NEW in 1.0.8, cache keyword regexp
1624        if ($this->parse_cache_built) {
1625            $this->optimize_keyword_group($key);
1626        }
1627        return true;
1628    }
1629
1630    /**
1631     * Removes a keyword group
1632     *
1633     * @param int $key The key of the keyword group to remove
1634     * @since 1.0.0
1635     */
1636    public function remove_keyword_group ($key) {
1637        //Remove the keyword group internally
1638        unset($this->language_data['KEYWORDS'][$key]);
1639        unset($this->lexic_permissions['KEYWORDS'][$key]);
1640        unset($this->language_data['CASE_SENSITIVE'][$key]);
1641        unset($this->language_data['STYLES']['KEYWORDS'][$key]);
1642
1643        //NEW in 1.0.8
1644        unset($this->language_data['CACHED_KEYWORD_LISTS'][$key]);
1645    }
1646
1647    /**
1648     * compile optimized regexp list for keyword group
1649     *
1650     * @param int $key The key of the keyword group to compile & optimize
1651     * @since 1.0.8
1652     */
1653    public function optimize_keyword_group($key) {
1654        $this->language_data['CACHED_KEYWORD_LISTS'][$key] =
1655            $this->optimize_regexp_list($this->language_data['KEYWORDS'][$key]);
1656        $space_as_whitespace = false;
1657        if(isset($this->language_data['PARSER_CONTROL'])) {
1658            if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
1659                if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'])) {
1660                    $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'];
1661                }
1662                if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
1663                    if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
1664                        $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'];
1665                    }
1666                }
1667            }
1668        }
1669        if($space_as_whitespace) {
1670            foreach($this->language_data['CACHED_KEYWORD_LISTS'][$key] as $rxk => $rxv) {
1671                $this->language_data['CACHED_KEYWORD_LISTS'][$key][$rxk] =
1672                    str_replace(" ", "\\s+", $rxv);
1673            }
1674        }
1675    }
1676
1677    /**
1678     * Sets the content of the header block
1679     *
1680     * @param string $content The content of the header block
1681     * @since 1.0.2
1682     */
1683    public function set_header_content($content) {
1684        $this->header_content = $content;
1685    }
1686
1687    /**
1688     * Sets the content of the footer block
1689     *
1690     * @param string $content The content of the footer block
1691     * @since 1.0.2
1692     */
1693    public function set_footer_content($content) {
1694        $this->footer_content = $content;
1695    }
1696
1697    /**
1698     * Sets the style for the header content
1699     *
1700     * @param string $style The style for the header content
1701     * @since 1.0.2
1702     */
1703    public function set_header_content_style($style) {
1704        $this->header_content_style = $style;
1705    }
1706
1707    /**
1708     * Sets the style for the footer content
1709     *
1710     * @param string $style The style for the footer content
1711     * @since 1.0.2
1712     */
1713    public function set_footer_content_style($style) {
1714        $this->footer_content_style = $style;
1715    }
1716
1717    /**
1718     * Sets whether to force a surrounding block around
1719     * the highlighted code or not
1720     *
1721     * @param boolean $flag Tells whether to enable or disable this feature
1722     * @since 1.0.7.20
1723     */
1724    public function enable_inner_code_block($flag) {
1725        $this->force_code_block = (bool)$flag;
1726    }
1727
1728    /**
1729     * Sets the base URL to be used for keywords
1730     *
1731     * @param int    $group The key of the keyword group to set the URL for
1732     * @param string $url   The URL to set for the group. If {FNAME} is in
1733     *                      the url somewhere, it is replaced by the keyword
1734     *                      that the URL is being made for
1735     * @since 1.0.2
1736     */
1737    public function set_url_for_keyword_group($group, $url) {
1738        $this->language_data['URLS'][$group] = $url;
1739    }
1740
1741    /**
1742     * Sets styles for links in code
1743     *
1744     * @param int    $type   A constant that specifies what state the style is being
1745     *                       set for - e.g. :hover or :visited
1746     * @param string $styles The styles to use for that state
1747     * @since 1.0.2
1748     */
1749    public function set_link_styles($type, $styles) {
1750        $this->link_styles[$type] = $styles;
1751    }
1752
1753    /**
1754     * Sets the target for links in code
1755     *
1756     * @param string $target The target for links in the code, e.g. _blank
1757     * @since 1.0.3
1758     */
1759    public function set_link_target($target) {
1760        if (!$target) {
1761            $this->link_target = '';
1762        } else {
1763            $this->link_target = ' target="' . $target . '"';
1764        }
1765    }
1766
1767    /**
1768     * Sets styles for important parts of the code
1769     *
1770     * @param string $styles The styles to use on important parts of the code
1771     * @since 1.0.2
1772     */
1773    public function set_important_styles($styles) {
1774        $this->important_styles = $styles;
1775    }
1776
1777    /**
1778     * Sets whether context-important blocks are highlighted
1779     *
1780     * @param boolean $flag Tells whether to enable or disable highlighting of important blocks
1781     * @todo REMOVE THIS SHIZ FROM GESHI!
1782     * @deprecated
1783     * @since 1.0.2
1784     */
1785    public function enable_important_blocks($flag) {
1786        $this->enable_important_blocks = ( $flag ) ? true : false;
1787    }
1788
1789    /**
1790     * Whether CSS IDs should be added to each line
1791     *
1792     * @param boolean $flag If true, IDs will be added to each line.
1793     * @since 1.0.2
1794     */
1795    public function enable_ids($flag = true) {
1796        $this->add_ids = ($flag) ? true : false;
1797    }
1798
1799    /**
1800     * Specifies which lines to highlight extra
1801     *
1802     * The extra style parameter was added in 1.0.7.21.
1803     *
1804     * @param mixed  $lines An array of line numbers to highlight, or just a line
1805     *                      number on its own.
1806     * @param string $style A string specifying the style to use for this line.
1807     *                      If null is specified, the default style is used.
1808     *                      If false is specified, the line will be removed from
1809     *                      special highlighting
1810     * @since 1.0.2
1811     * @todo  Some data replication here that could be cut down on
1812     */
1813    public function highlight_lines_extra($lines, $style = null) {
1814        if (is_array($lines)) {
1815            //Split up the job using single lines at a time
1816            foreach ($lines as $line) {
1817                $this->highlight_lines_extra($line, $style);
1818            }
1819        } else {
1820            //Mark the line as being highlighted specially
1821            $lines = intval($lines);
1822            $this->highlight_extra_lines[$lines] = $lines;
1823
1824            //Decide on which style to use
1825            if ($style === null) { //Check if we should use default style
1826                unset($this->highlight_extra_lines_styles[$lines]);
1827            } elseif ($style === false) { //Check if to remove this line
1828                unset($this->highlight_extra_lines[$lines]);
1829                unset($this->highlight_extra_lines_styles[$lines]);
1830            } else {
1831                $this->highlight_extra_lines_styles[$lines] = $style;
1832            }
1833        }
1834    }
1835
1836    /**
1837     * Sets the style for extra-highlighted lines
1838     *
1839     * @param string $styles The style for extra-highlighted lines
1840     * @since 1.0.2
1841     */
1842    public function set_highlight_lines_extra_style($styles) {
1843        $this->highlight_extra_lines_style = $styles;
1844    }
1845
1846    /**
1847     * Sets the line-ending
1848     *
1849     * @param string $line_ending The new line-ending
1850     * @since 1.0.2
1851     */
1852    public function set_line_ending($line_ending) {
1853        $this->line_ending = (string)$line_ending;
1854    }
1855
1856    /**
1857     * Sets what number line numbers should start at. Should
1858     * be a positive integer, and will be converted to one.
1859     *
1860     * <b>Warning:</b> Using this method will add the "start"
1861     * attribute to the &lt;ol&gt; that is used for line numbering.
1862     * This is <b>not</b> valid XHTML strict, so if that's what you
1863     * care about then don't use this method. Firefox is getting
1864     * support for the CSS method of doing this in 1.1 and Opera
1865     * has support for the CSS method, but (of course) IE doesn't
1866     * so it's not worth doing it the CSS way yet.
1867     *
1868     * @param int $number The number to start line numbers at
1869     * @since 1.0.2
1870     */
1871    public function start_line_numbers_at($number) {
1872        $this->line_numbers_start = abs(intval($number));
1873    }
1874
1875    /**
1876     * Sets the encoding used for htmlspecialchars(), for international
1877     * support.
1878     *
1879     * NOTE: This is not needed for now because htmlspecialchars() is not
1880     * being used (it has a security hole in PHP4 that has not been patched).
1881     * Maybe in a future version it may make a return for speed reasons, but
1882     * I doubt it.
1883     *
1884     * @param string $encoding The encoding to use for the source
1885     * @since 1.0.3
1886     */
1887    public function set_encoding($encoding) {
1888        if ($encoding) {
1889          $this->encoding = strtolower($encoding);
1890        }
1891    }
1892
1893    /**
1894     * Turns linking of keywords on or off.
1895     *
1896     * @param boolean $enable If true, links will be added to keywords
1897     * @since 1.0.2
1898     */
1899    public function enable_keyword_links($enable = true) {
1900        $this->keyword_links = (bool) $enable;
1901    }
1902
1903    /**
1904     * Setup caches needed for styling. This is automatically called in
1905     * parse_code() and get_stylesheet() when appropriate. This function helps
1906     * stylesheet generators as they rely on some style information being
1907     * preprocessed
1908     *
1909     * @since 1.0.8
1910     */
1911    protected function build_style_cache() {
1912        //Build the style cache needed to highlight numbers appropriate
1913        if($this->lexic_permissions['NUMBERS']) {
1914            //First check what way highlighting information for numbers are given
1915            if(!isset($this->language_data['NUMBERS'])) {
1916                $this->language_data['NUMBERS'] = 0;
1917            }
1918
1919            if(is_array($this->language_data['NUMBERS'])) {
1920                $this->language_data['NUMBERS_CACHE'] = $this->language_data['NUMBERS'];
1921            } else {
1922                $this->language_data['NUMBERS_CACHE'] = array();
1923                if(!$this->language_data['NUMBERS']) {
1924                    $this->language_data['NUMBERS'] =
1925                        GESHI_NUMBER_INT_BASIC |
1926                        GESHI_NUMBER_FLT_NONSCI;
1927                }
1928
1929                for($i = 0, $j = $this->language_data['NUMBERS']; $j > 0; ++$i, $j>>=1) {
1930                    //Rearrange style indices if required ...
1931                    if(isset($this->language_data['STYLES']['NUMBERS'][1<<$i])) {
1932                        $this->language_data['STYLES']['NUMBERS'][$i] =
1933                            $this->language_data['STYLES']['NUMBERS'][1<<$i];
1934                        unset($this->language_data['STYLES']['NUMBERS'][1<<$i]);
1935                    }
1936
1937                    //Check if this bit is set for highlighting
1938                    if($j&1) {
1939                        //So this bit is set ...
1940                        //Check if it belongs to group 0 or the actual stylegroup
1941                        if(isset($this->language_data['STYLES']['NUMBERS'][$i])) {
1942                            $this->language_data['NUMBERS_CACHE'][$i] = 1 << $i;
1943                        } else {
1944                            if(!isset($this->language_data['NUMBERS_CACHE'][0])) {
1945                                $this->language_data['NUMBERS_CACHE'][0] = 0;
1946                            }
1947                            $this->language_data['NUMBERS_CACHE'][0] |= 1 << $i;
1948                        }
1949                    }
1950                }
1951            }
1952        }
1953    }
1954
1955    /**
1956     * Setup caches needed for parsing. This is automatically called in parse_code() when appropriate.
1957     * This function makes stylesheet generators much faster as they do not need these caches.
1958     *
1959     * @since 1.0.8
1960     */
1961    protected function build_parse_cache() {
1962        // cache symbol regexp
1963        //As this is a costy operation, we avoid doing it for multiple groups ...
1964        //Instead we perform it for all symbols at once.
1965        //
1966        //For this to work, we need to reorganize the data arrays.
1967        if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) {
1968            $this->language_data['MULTIPLE_SYMBOL_GROUPS'] = count($this->language_data['STYLES']['SYMBOLS']) > 1;
1969
1970            $this->language_data['SYMBOL_DATA'] = array();
1971            $symbol_preg_multi = array(); // multi char symbols
1972            $symbol_preg_single = array(); // single char symbols
1973            foreach ($this->language_data['SYMBOLS'] as $key => $symbols) {
1974                if (is_array($symbols)) {
1975                    foreach ($symbols as $sym) {
1976                        $sym = $this->hsc($sym);
1977                        if (!isset($this->language_data['SYMBOL_DATA'][$sym])) {
1978                            $this->language_data['SYMBOL_DATA'][$sym] = $key;
1979                            if (isset($sym[1])) { // multiple chars
1980                                $symbol_preg_multi[] = preg_quote($sym, '/');
1981                            } else { // single char
1982                                if ($sym == '-') {
1983                                    // don't trigger range out of order error
1984                                    $symbol_preg_single[] = '\-';
1985                                } else {
1986                                    $symbol_preg_single[] = preg_quote($sym, '/');
1987                                }
1988                            }
1989                        }
1990                    }
1991                } else {
1992                    $symbols = $this->hsc($symbols);
1993                    if (!isset($this->language_data['SYMBOL_DATA'][$symbols])) {
1994                        $this->language_data['SYMBOL_DATA'][$symbols] = 0;
1995                        if (isset($symbols[1])) { // multiple chars
1996                            $symbol_preg_multi[] = preg_quote($symbols, '/');
1997                        } elseif ($symbols == '-') {
1998                            // don't trigger range out of order error
1999                            $symbol_preg_single[] = '\-';
2000                        } else { // single char
2001                            $symbol_preg_single[] = preg_quote($symbols, '/');
2002                        }
2003                    }
2004                }
2005            }
2006
2007            //Now we have an array with each possible symbol as the key and the style as the actual data.
2008            //This way we can set the correct style just the moment we highlight ...
2009            //
2010            //Now we need to rewrite our array to get a search string that
2011            $symbol_preg = array();
2012            if (!empty($symbol_preg_multi)) {
2013                rsort($symbol_preg_multi);
2014                $symbol_preg[] = implode('|', $symbol_preg_multi);
2015            }
2016            if (!empty($symbol_preg_single)) {
2017                rsort($symbol_preg_single);
2018                $symbol_preg[] = '[' . implode('', $symbol_preg_single) . ']';
2019            }
2020            $this->language_data['SYMBOL_SEARCH'] = implode("|", $symbol_preg);
2021        }
2022
2023        // cache optimized regexp for keyword matching
2024        // remove old cache
2025        $this->language_data['CACHED_KEYWORD_LISTS'] = array();
2026        foreach (array_keys($this->language_data['KEYWORDS']) as $key) {
2027            if (!isset($this->lexic_permissions['KEYWORDS'][$key]) ||
2028                    $this->lexic_permissions['KEYWORDS'][$key]) {
2029                $this->optimize_keyword_group($key);
2030            }
2031        }
2032
2033        // brackets
2034        if ($this->lexic_permissions['BRACKETS']) {
2035            $this->language_data['CACHE_BRACKET_MATCH'] = array('[', ']', '(', ')', '{', '}');
2036            if (!$this->use_classes && isset($this->language_data['STYLES']['BRACKETS'][0])) {
2037                $this->language_data['CACHE_BRACKET_REPLACE'] = array(
2038                    '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#91;|>',
2039                    '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#93;|>',
2040                    '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#40;|>',
2041                    '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#41;|>',
2042                    '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#123;|>',
2043                    '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#125;|>',
2044                );
2045            }
2046            else {
2047                $this->language_data['CACHE_BRACKET_REPLACE'] = array(
2048                    '<| class="br0">&#91;|>',
2049                    '<| class="br0">&#93;|>',
2050                    '<| class="br0">&#40;|>',
2051                    '<| class="br0">&#41;|>',
2052                    '<| class="br0">&#123;|>',
2053                    '<| class="br0">&#125;|>',
2054                );
2055            }
2056        }
2057
2058        //Build the parse cache needed to highlight numbers appropriate
2059        if($this->lexic_permissions['NUMBERS']) {
2060            //Check if the style rearrangements have been processed ...
2061            //This also does some preprocessing to check which style groups are useable ...
2062            if(!isset($this->language_data['NUMBERS_CACHE'])) {
2063                $this->build_style_cache();
2064            }
2065
2066            //Number format specification
2067            //All this formats are matched case-insensitively!
2068            static $numbers_format = array(
2069                GESHI_NUMBER_INT_BASIC =>
2070                    '(?:(?<![0-9a-z_\.%$@])|(?<=\.\.))(?<![\d\.]e[+\-])([1-9]\d*?|0)(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2071                GESHI_NUMBER_INT_CSTYLE =>
2072                    '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])([1-9]\d*?|0)l(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2073                GESHI_NUMBER_BIN_SUFFIX =>
2074                    '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[01]+?[bB](?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2075                GESHI_NUMBER_BIN_PREFIX_PERCENT =>
2076                    '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])%[01]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2077                GESHI_NUMBER_BIN_PREFIX_0B =>
2078                    '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0b[01]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2079                GESHI_NUMBER_OCT_PREFIX =>
2080                    '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2081                GESHI_NUMBER_OCT_PREFIX_0O =>
2082                    '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0o[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2083                GESHI_NUMBER_OCT_PREFIX_AT =>
2084                    '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])\@[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2085                GESHI_NUMBER_OCT_SUFFIX =>
2086                    '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[0-7]+?o(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2087                GESHI_NUMBER_HEX_PREFIX =>
2088                    '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0x[0-9a-fA-F]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2089                GESHI_NUMBER_HEX_PREFIX_DOLLAR =>
2090                    '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\$[0-9a-fA-F]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2091                GESHI_NUMBER_HEX_SUFFIX =>
2092                    '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d[0-9a-fA-F]*?[hH](?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2093                GESHI_NUMBER_FLT_NONSCI =>
2094                    '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d+?\.\d+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2095                GESHI_NUMBER_FLT_NONSCI_F =>
2096                    '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)f(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2097                GESHI_NUMBER_FLT_SCI_SHORT =>
2098                    '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\.\d+?(?:e[+\-]?\d+?)?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2099                GESHI_NUMBER_FLT_SCI_ZERO =>
2100                    '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)(?:e[+\-]?\d+?)?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)'
2101                );
2102
2103            //At this step we have an associative array with flag groups for a
2104            //specific style or an string denoting a regexp given its index.
2105            $this->language_data['NUMBERS_RXCACHE'] = array();
2106            foreach($this->language_data['NUMBERS_CACHE'] as $key => $rxdata) {
2107                if(is_string($rxdata)) {
2108                    $regexp = $rxdata;
2109                } else {
2110                    //This is a bitfield of number flags to highlight:
2111                    //Build an array, implode them together and make this the actual RX
2112                    $rxuse = array();
2113                    for($i = 1; $i <= $rxdata; $i<<=1) {
2114                        if($rxdata & $i) {
2115                            $rxuse[] = $numbers_format[$i];
2116                        }
2117                    }
2118                    $regexp = implode("|", $rxuse);
2119                }
2120
2121                $this->language_data['NUMBERS_RXCACHE'][$key] =
2122                    "/(?<!<\|\/)(?<!<\|!REG3XP)(?<!<\|\/NUM!)(?<!\d\/>)($regexp)(?!(?:<DOT>|(?>[^\<]))+>)(?![^<]*>)(?!\|>)(?!\/>)/i";
2123            }
2124
2125            if(!isset($this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'])) {
2126                $this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'] = '#\d#';
2127            }
2128        }
2129
2130        $this->parse_cache_built = true;
2131    }
2132
2133    /**
2134     * Returns the code in $this->source, highlighted and surrounded by the
2135     * nessecary HTML.
2136     *
2137     * This should only be called ONCE, cos it's SLOW! If you want to highlight
2138     * the same source multiple times, you're better off doing a whole lot of
2139     * str_replaces to replace the &lt;span&gt;s
2140     *
2141     * @since 1.0.0
2142     */
2143    public function parse_code () {
2144        // Start the timer
2145        $start_time = microtime();
2146
2147        // Replace all newlines to a common form.
2148        $code = str_replace("\r\n", "\n", $this->source);
2149        $code = str_replace("\r", "\n", $code);
2150
2151        // Firstly, if there is an error, we won't highlight
2152        if ($this->error) {
2153            //Escape the source for output
2154            $result = $this->hsc($this->source);
2155
2156            //This fix is related to SF#1923020, but has to be applied regardless of
2157            //actually highlighting symbols.
2158            $result = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $result);
2159
2160            // Timing is irrelevant
2161            $this->set_time($start_time, $start_time);
2162            $this->finalise($result);
2163            return $result;
2164        }
2165
2166        // make sure the parse cache is up2date
2167        if (!$this->parse_cache_built) {
2168            $this->build_parse_cache();
2169        }
2170
2171        // Initialise various stuff
2172        $length           = strlen($code);
2173        $COMMENT_MATCHED  = false;
2174        $stuff_to_parse   = '';
2175        $endresult        = '';
2176
2177        // "Important" selections are handled like multiline comments
2178        // @todo GET RID OF THIS SHIZ
2179        if ($this->enable_important_blocks) {
2180            $this->language_data['COMMENT_MULTI'][GESHI_START_IMPORTANT] = GESHI_END_IMPORTANT;
2181        }
2182
2183        if ($this->strict_mode) {
2184            // Break the source into bits. Each bit will be a portion of the code
2185            // within script delimiters - for example, HTML between < and >
2186            $k = 0;
2187            $parts = array();
2188            $matches = array();
2189            $next_match_pointer = null;
2190            // we use a copy to unset delimiters on demand (when they are not found)
2191            $delim_copy = $this->language_data['SCRIPT_DELIMITERS'];
2192            $i = 0;
2193            while ($i < $length) {
2194                $next_match_pos = $length + 1; // never true
2195                foreach ($delim_copy as $dk => $delimiters) {
2196                    if(is_array($delimiters)) {
2197                        foreach ($delimiters as $open => $close) {
2198                            // make sure the cache is setup properly
2199                            if (!isset($matches[$dk][$open])) {
2200                                $matches[$dk][$open] = array(
2201                                    'next_match' => -1,
2202                                    'dk' => $dk,
2203
2204                                    'open' => $open, // needed for grouping of adjacent code blocks (see below)
2205                                    'open_strlen' => strlen($open),
2206
2207                                    'close' => $close,
2208                                    'close_strlen' => strlen($close),
2209                                );
2210                            }
2211                            // Get the next little bit for this opening string
2212                            if ($matches[$dk][$open]['next_match'] < $i) {
2213                                // only find the next pos if it was not already cached
2214                                $open_pos = strpos($code, $open, $i);
2215                                if ($open_pos === false) {
2216                                    // no match for this delimiter ever
2217                                    unset($delim_copy[$dk][$open]);
2218                                    continue;
2219                                }
2220                                $matches[$dk][$open]['next_match'] = $open_pos;
2221                            }
2222                            if ($matches[$dk][$open]['next_match'] < $next_match_pos) {
2223                                //So we got a new match, update the close_pos
2224                                $matches[$dk][$open]['close_pos'] =
2225                                    strpos($code, $close, $matches[$dk][$open]['next_match']+1);
2226
2227                                $next_match_pointer =& $matches[$dk][$open];
2228                                $next_match_pos = $matches[$dk][$open]['next_match'];
2229                            }
2230                        }
2231                    } else {
2232                        //So we should match an RegExp as Strict Block ...
2233                        /**
2234                         * The value in $delimiters is expected to be an RegExp
2235                         * containing exactly 2 matching groups:
2236                         *  - Group 1 is the opener
2237                         *  - Group 2 is the closer
2238                         */
2239                        if(preg_match($delimiters, $code, $matches_rx, PREG_OFFSET_CAPTURE, $i)) {
2240                            //We got a match ...
2241                            if(isset($matches_rx['start']) && isset($matches_rx['end']))
2242                            {
2243                                $matches[$dk] = array(
2244                                    'next_match' => $matches_rx['start'][1],
2245                                    'dk' => $dk,
2246
2247                                    'close_strlen' => strlen($matches_rx['end'][0]),
2248                                    'close_pos' => $matches_rx['end'][1],
2249                                    );
2250                            } else {
2251                                $matches[$dk] = array(
2252                                    'next_match' => $matches_rx[1][1],
2253                                    'dk' => $dk,
2254
2255                                    'close_strlen' => strlen($matches_rx[2][0]),
2256                                    'close_pos' => $matches_rx[2][1],
2257                                    );
2258                            }
2259                        } else {
2260                            // no match for this delimiter ever
2261                            unset($delim_copy[$dk]);
2262                            continue;
2263                        }
2264
2265                        if ($matches[$dk]['next_match'] <= $next_match_pos) {
2266                            $next_match_pointer =& $matches[$dk];
2267                            $next_match_pos = $matches[$dk]['next_match'];
2268                        }
2269                    }
2270                }
2271
2272                // non-highlightable text
2273                $parts[$k] = array(
2274                    1 => substr($code, $i, $next_match_pos - $i)
2275                );
2276                ++$k;
2277
2278                if ($next_match_pos > $length) {
2279                    // out of bounds means no next match was found
2280                    break;
2281                }
2282
2283                // highlightable code
2284                $parts[$k][0] = $next_match_pointer['dk'];
2285
2286                //Only combine for non-rx script blocks
2287                if(is_array($delim_copy[$next_match_pointer['dk']])) {
2288                    // group adjacent script blocks, e.g. <foobar><asdf> should be one block, not three!
2289                    $i = $next_match_pos + $next_match_pointer['open_strlen'];
2290                    while (true) {
2291                        $close_pos = strpos($code, $next_match_pointer['close'], $i);
2292                        if ($close_pos == false) {
2293                            break;
2294                        }
2295                        $i = $close_pos + $next_match_pointer['close_strlen'];
2296                        if ($i == $length) {
2297                            break;
2298                        }
2299                        if ($code[$i] == $next_match_pointer['open'][0] && ($next_match_pointer['open_strlen'] == 1 ||
2300                            substr($code, $i, $next_match_pointer['open_strlen']) == $next_match_pointer['open'])) {
2301                            // merge adjacent but make sure we don't merge things like <tag><!-- comment -->
2302                            foreach ($matches as $submatches) {
2303                                foreach ($submatches as $match) {
2304                                    if ($match['next_match'] == $i) {
2305                                        // a different block already matches here!
2306                                        break 3;
2307                                    }
2308                                }
2309                            }
2310                        } else {
2311                            break;
2312                        }
2313                    }
2314                } else {
2315                    $close_pos = $next_match_pointer['close_pos'] + $next_match_pointer['close_strlen'];
2316                    $i = $close_pos;
2317                }
2318
2319                if ($close_pos === false) {
2320                    // no closing delimiter found!
2321                    $parts[$k][1] = substr($code, $next_match_pos);
2322                    ++$k;
2323                    break;
2324                } else {
2325                    $parts[$k][1] = substr($code, $next_match_pos, $i - $next_match_pos);
2326                    ++$k;
2327                }
2328            }
2329            unset($delim_copy, $next_match_pointer, $next_match_pos, $matches);
2330            $num_parts = $k;
2331
2332            if ($num_parts == 1 && $this->strict_mode == GESHI_MAYBE) {
2333                // when we have only one part, we don't have anything to highlight at all.
2334                // if we have a "maybe" strict language, this should be handled as highlightable code
2335                $parts = array(
2336                    0 => array(
2337                        0 => '',
2338                        1 => ''
2339                    ),
2340                    1 => array(
2341                        0 => null,
2342                        1 => $parts[0][1]
2343                    )
2344                );
2345                $num_parts = 2;
2346            }
2347
2348        } else {
2349            // Not strict mode - simply dump the source into
2350            // the array at index 1 (the first highlightable block)
2351            $parts = array(
2352                0 => array(
2353                    0 => '',
2354                    1 => ''
2355                ),
2356                1 => array(
2357                    0 => null,
2358                    1 => $code
2359                )
2360            );
2361            $num_parts = 2;
2362        }
2363
2364        //Unset variables we won't need any longer
2365        unset($code);
2366
2367        //Preload some repeatedly used values regarding hardquotes ...
2368        $hq = isset($this->language_data['HARDQUOTE']) ? $this->language_data['HARDQUOTE'][0] : false;
2369        $hq_strlen = strlen($hq);
2370
2371        //Preload if line numbers are to be generated afterwards
2372        //Added a check if line breaks should be forced even without line numbers, fixes SF#1727398
2373        $check_linenumbers = $this->line_numbers != GESHI_NO_LINE_NUMBERS ||
2374            !empty($this->highlight_extra_lines) || !$this->allow_multiline_span;
2375
2376        //preload the escape char for faster checking ...
2377        $escaped_escape_char = $this->hsc($this->language_data['ESCAPE_CHAR']);
2378
2379        // this is used for single-line comments
2380        $sc_disallowed_before = "";
2381        $sc_disallowed_after = "";
2382
2383        if (isset($this->language_data['PARSER_CONTROL'])) {
2384            if (isset($this->language_data['PARSER_CONTROL']['COMMENTS'])) {
2385                if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'])) {
2386                    $sc_disallowed_before = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'];
2387                }
2388                if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'])) {
2389                    $sc_disallowed_after = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'];
2390                }
2391            }
2392        }
2393
2394        //Fix for SF#1932083: Multichar Quotemarks unsupported
2395        $is_string_starter = array();
2396        if ($this->lexic_permissions['STRINGS']) {
2397            foreach ($this->language_data['QUOTEMARKS'] as $quotemark) {
2398                if (!isset($is_string_starter[$quotemark[0]])) {
2399                    $is_string_starter[$quotemark[0]] = (string)$quotemark;
2400                } elseif (is_string($is_string_starter[$quotemark[0]])) {
2401                    $is_string_starter[$quotemark[0]] = array(
2402                        $is_string_starter[$quotemark[0]],
2403                        $quotemark);
2404                } else {
2405                    $is_string_starter[$quotemark[0]][] = $quotemark;
2406                }
2407            }
2408        }
2409
2410        // Now we go through each part. We know that even-indexed parts are
2411        // code that shouldn't be highlighted, and odd-indexed parts should
2412        // be highlighted
2413        for ($key = 0; $key < $num_parts; ++$key) {
2414            $STRICTATTRS = '';
2415
2416            // If this block should be highlighted...
2417            if (!($key & 1)) {
2418                // Else not a block to highlight
2419                $endresult .= $this->hsc($parts[$key][1]);
2420                unset($parts[$key]);
2421                continue;
2422            }
2423
2424            $result = '';
2425            $part = $parts[$key][1];
2426
2427            $highlight_part = true;
2428            if ($this->strict_mode && !is_null($parts[$key][0])) {
2429                // get the class key for this block of code
2430                $script_key = $parts[$key][0];
2431                $highlight_part = $this->language_data['HIGHLIGHT_STRICT_BLOCK'][$script_key];
2432                if ($this->language_data['STYLES']['SCRIPT'][$script_key] != '' &&
2433                    $this->lexic_permissions['SCRIPT']) {
2434                    // Add a span element around the source to
2435                    // highlight the overall source block
2436                    if (!$this->use_classes &&
2437                        $this->language_data['STYLES']['SCRIPT'][$script_key] != '') {
2438                        $attributes = ' style="' . $this->language_data['STYLES']['SCRIPT'][$script_key] . '"';
2439                    } else {
2440                        $attributes = ' class="sc' . $script_key . '"';
2441                    }
2442                    $result .= "<span$attributes>";
2443                    $STRICTATTRS = $attributes;
2444                }
2445            }
2446
2447            if ($highlight_part) {
2448                // Now, highlight the code in this block. This code
2449                // is really the engine of GeSHi (along with the method
2450                // parse_non_string_part).
2451
2452                // cache comment regexps incrementally
2453                $next_comment_regexp_key = '';
2454                $next_comment_regexp_pos = -1;
2455                $next_comment_multi_pos = -1;
2456                $next_comment_single_pos = -1;
2457                $comment_regexp_cache_per_key = array();
2458                $comment_multi_cache_per_key = array();
2459                $comment_single_cache_per_key = array();
2460                $next_open_comment_multi = '';
2461                $next_comment_single_key = '';
2462                $escape_regexp_cache_per_key = array();
2463                $next_escape_regexp_key = '';
2464                $next_escape_regexp_pos = -1;
2465
2466                $length = strlen($part);
2467                for ($i = 0; $i < $length; ++$i) {
2468                    // Get the next char
2469                    $char = $part[$i];
2470                    $char_len = 1;
2471
2472                    // update regexp comment cache if needed
2473                    if (isset($this->language_data['COMMENT_REGEXP']) && $next_comment_regexp_pos < $i) {
2474                        $next_comment_regexp_pos = $length;
2475                        foreach ($this->language_data['COMMENT_REGEXP'] as $comment_key => $regexp) {
2476                            $match_i = false;
2477                            if (isset($comment_regexp_cache_per_key[$comment_key]) &&
2478                                ($comment_regexp_cache_per_key[$comment_key]['pos'] >= $i ||
2479                                 $comment_regexp_cache_per_key[$comment_key]['pos'] === false)) {
2480                                // we have already matched something
2481                                if ($comment_regexp_cache_per_key[$comment_key]['pos'] === false) {
2482                                    // this comment is never matched
2483                                    continue;
2484                                }
2485                                $match_i = $comment_regexp_cache_per_key[$comment_key]['pos'];
2486                            } elseif (preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $i)) {
2487                                $match_i = $match[0][1];
2488
2489                                $comment_regexp_cache_per_key[$comment_key] = array(
2490                                    'key' => $comment_key,
2491                                    'length' => strlen($match[0][0]),
2492                                    'pos' => $match_i
2493                                );
2494                            } else {
2495                                $comment_regexp_cache_per_key[$comment_key]['pos'] = false;
2496                                continue;
2497                            }
2498
2499                            if ($match_i !== false && $match_i < $next_comment_regexp_pos) {
2500                                $next_comment_regexp_pos = $match_i;
2501                                $next_comment_regexp_key = $comment_key;
2502                                if ($match_i === $i) {
2503                                    break;
2504                                }
2505                            }
2506                        }
2507                    }
2508
2509                    $string_started = false;
2510
2511                    if (isset($is_string_starter[$char])) {
2512                        // Possibly the start of a new string ...
2513
2514                        //Check which starter it was ...
2515                        //Fix for SF#1932083: Multichar Quotemarks unsupported
2516                        if (is_array($is_string_starter[$char])) {
2517                            $char_new = '';
2518                            foreach ($is_string_starter[$char] as $testchar) {
2519                                if ($testchar === substr($part, $i, strlen($testchar)) &&
2520                                    strlen($testchar) > strlen($char_new)) {
2521                                    $char_new = $testchar;
2522                                    $string_started = true;
2523                                }
2524                            }
2525                            if ($string_started) {
2526                                $char = $char_new;
2527                            }
2528                        } else {
2529                            $testchar = $is_string_starter[$char];
2530                            if ($testchar === substr($part, $i, strlen($testchar))) {
2531                                $char = $testchar;
2532                                $string_started = true;
2533                            }
2534                        }
2535                        $char_len = strlen($char);
2536                    }
2537
2538                    if ($string_started && ($i != $next_comment_regexp_pos)) {
2539                        // Hand out the correct style information for this string
2540                        $string_key = array_search($char, $this->language_data['QUOTEMARKS']);
2541                        if (!isset($this->language_data['STYLES']['STRINGS'][$string_key]) ||
2542                            !isset($this->language_data['STYLES']['ESCAPE_CHAR'][$string_key])) {
2543                            $string_key = 0;
2544                        }
2545
2546                        // parse the stuff before this
2547                        $result .= $this->parse_non_string_part($stuff_to_parse);
2548                        $stuff_to_parse = '';
2549
2550                        if (!$this->use_classes) {
2551                            $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS'][$string_key] . '"';
2552                        } else {
2553                            $string_attributes = ' class="st'.$string_key.'"';
2554                        }
2555
2556                        // now handle the string
2557                        $string = "<span$string_attributes>" . GeSHi::hsc($char);
2558                        $start = $i + $char_len;
2559                        $string_open = true;
2560
2561                        if(empty($this->language_data['ESCAPE_REGEXP'])) {
2562                            $next_escape_regexp_pos = $length;
2563                        }
2564
2565                        do {
2566                            //Get the regular ending pos ...
2567                            $close_pos = strpos($part, $char, $start);
2568                            if(false === $close_pos) {
2569                                $close_pos = $length;
2570                            }
2571
2572                            if($this->lexic_permissions['ESCAPE_CHAR']) {
2573                                // update escape regexp cache if needed
2574                                if (isset($this->language_data['ESCAPE_REGEXP']) && $next_escape_regexp_pos < $start) {
2575                                    $next_escape_regexp_pos = $length;
2576                                    foreach ($this->language_data['ESCAPE_REGEXP'] as $escape_key => $regexp) {
2577                                        $match_i = false;
2578                                        if (isset($escape_regexp_cache_per_key[$escape_key]) &&
2579                                            ($escape_regexp_cache_per_key[$escape_key]['pos'] >= $start ||
2580                                             $escape_regexp_cache_per_key[$escape_key]['pos'] === false)) {
2581                                            // we have already matched something
2582                                            if ($escape_regexp_cache_per_key[$escape_key]['pos'] === false) {
2583                                                // this comment is never matched
2584                                                continue;
2585                                            }
2586                                            $match_i = $escape_regexp_cache_per_key[$escape_key]['pos'];
2587                                        } elseif (preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $start)) {
2588                                            $match_i = $match[0][1];
2589
2590                                            $escape_regexp_cache_per_key[$escape_key] = array(
2591                                                'key' => $escape_key,
2592                                                'length' => strlen($match[0][0]),
2593                                                'pos' => $match_i
2594                                            );
2595                                        } else {
2596                                            $escape_regexp_cache_per_key[$escape_key]['pos'] = false;
2597                                            continue;
2598                                        }
2599
2600                                        if ($match_i !== false && $match_i < $next_escape_regexp_pos) {
2601                                            $next_escape_regexp_pos = $match_i;
2602                                            $next_escape_regexp_key = $escape_key;
2603                                            if ($match_i === $start) {
2604                                                break;
2605                                            }
2606                                        }
2607                                    }
2608                                }
2609
2610                                //Find the next simple escape position
2611                                if('' != $this->language_data['ESCAPE_CHAR']) {
2612                                    $simple_escape = strpos($part, $this->language_data['ESCAPE_CHAR'], $start);
2613                                    if(false === $simple_escape) {
2614                                        $simple_escape = $length;
2615                                    }
2616                                } else {
2617                                    $simple_escape = $length;
2618                                }
2619                            } else {
2620                                $next_escape_regexp_pos = $length;
2621                                $simple_escape = $length;
2622                            }
2623
2624                            if($simple_escape < $next_escape_regexp_pos &&
2625                                $simple_escape < $length &&
2626                                $simple_escape < $close_pos) {
2627                                //The nexxt escape sequence is a simple one ...
2628                                $es_pos = $simple_escape;
2629
2630                                //Add the stuff not in the string yet ...
2631                                $string .= $this->hsc(substr($part, $start, $es_pos - $start));
2632
2633                                //Get the style for this escaped char ...
2634                                if (!$this->use_classes) {
2635                                    $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][0] . '"';
2636                                } else {
2637                                    $escape_char_attributes = ' class="es0"';
2638                                }
2639
2640                                //Add the style for the escape char ...
2641                                $string .= "<span$escape_char_attributes>" .
2642                                    GeSHi::hsc($this->language_data['ESCAPE_CHAR']);
2643
2644                                //Get the byte AFTER the ESCAPE_CHAR we just found
2645                                $es_char = $part[$es_pos + 1];
2646                                if ($es_char == "\n") {
2647                                    // don't put a newline around newlines
2648                                    $string .= "</span>\n";
2649                                    $start = $es_pos + 2;
2650                                } elseif (ord($es_char) >= 128) {
2651                                    //This is an non-ASCII char (UTF8 or single byte)
2652                                    //This code tries to work around SF#2037598 ...
2653                                    if(function_exists('mb_substr')) {
2654                                        $es_char_m = mb_substr(substr($part, $es_pos+1, 16), 0, 1, $this->encoding);
2655                                        $string .= $es_char_m . '</span>';
2656                                    } elseif ('utf-8' == $this->encoding) {
2657                                        if(preg_match("/[\xC2-\xDF][\x80-\xBF]".
2658                                            "|\xE0[\xA0-\xBF][\x80-\xBF]".
2659                                            "|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}".
2660                                            "|\xED[\x80-\x9F][\x80-\xBF]".
2661                                            "|\xF0[\x90-\xBF][\x80-\xBF]{2}".
2662                                            "|[\xF1-\xF3][\x80-\xBF]{3}".
2663                                            "|\xF4[\x80-\x8F][\x80-\xBF]{2}/s",
2664                                            $part, $es_char_m, null, $es_pos + 1)) {
2665                                            $es_char_m = $es_char_m[0];
2666                                        } else {
2667                                            $es_char_m = $es_char;
2668                                        }
2669                                        $string .= $this->hsc($es_char_m) . '</span>';
2670                                    } else {
2671                                        $es_char_m = $this->hsc($es_char);
2672                                    }
2673                                    $start = $es_pos + strlen($es_char_m) + 1;
2674                                } else {
2675                                    $string .= $this->hsc($es_char) . '</span>';
2676                                    $start = $es_pos + 2;
2677                                }
2678                            } elseif ($next_escape_regexp_pos < $length &&
2679                                $next_escape_regexp_pos < $close_pos) {
2680                                $es_pos = $next_escape_regexp_pos;
2681                                //Add the stuff not in the string yet ...
2682                                $string .= $this->hsc(substr($part, $start, $es_pos - $start));
2683
2684                                //Get the key and length of this match ...
2685                                $escape = $escape_regexp_cache_per_key[$next_escape_regexp_key];
2686                                $escape_str = substr($part, $es_pos, $escape['length']);
2687                                $escape_key = $escape['key'];
2688
2689                                //Get the style for this escaped char ...
2690                                if (!$this->use_classes) {
2691                                    $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][$escape_key] . '"';
2692                                } else {
2693                                    $escape_char_attributes = ' class="es' . $escape_key . '"';
2694                                }
2695
2696                                //Add the style for the escape char ...
2697                                $string .= "<span$escape_char_attributes>" .
2698                                    $this->hsc($escape_str) . '</span>';
2699
2700                                $start = $es_pos + $escape['length'];
2701                            } else {
2702                                //Copy the remainder of the string ...
2703                                $string .= $this->hsc(substr($part, $start, $close_pos - $start + $char_len)) . '</span>';
2704                                $start = $close_pos + $char_len;
2705                                $string_open = false;
2706                            }
2707                        } while($string_open);
2708
2709                        if ($check_linenumbers) {
2710                            // Are line numbers used? If, we should end the string before
2711                            // the newline and begin it again (so when <li>s are put in the source
2712                            // remains XHTML compliant)
2713                            // note to self: This opens up possibility of config files specifying
2714                            // that languages can/cannot have multiline strings???
2715                            $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
2716                        }
2717
2718                        $result .= $string;
2719                        $string = '';
2720                        $i = $start - 1;
2721                        continue;
2722                    } elseif ($this->lexic_permissions['STRINGS'] && $hq && $hq[0] == $char &&
2723                        substr($part, $i, $hq_strlen) == $hq && ($i != $next_comment_regexp_pos)) {
2724                        // The start of a hard quoted string
2725                        if (!$this->use_classes) {
2726                            $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS']['HARD'] . '"';
2727                            $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR']['HARD'] . '"';
2728                        } else {
2729                            $string_attributes = ' class="st_h"';
2730                            $escape_char_attributes = ' class="es_h"';
2731                        }
2732                        // parse the stuff before this
2733                        $result .= $this->parse_non_string_part($stuff_to_parse);
2734                        $stuff_to_parse = '';
2735
2736                        // now handle the string
2737                        $string = '';
2738
2739                        // look for closing quote
2740                        $start = $i + $hq_strlen;
2741                        while ($close_pos = strpos($part, $this->language_data['HARDQUOTE'][1], $start)) {
2742                            $start = $close_pos + 1;
2743                            if ($this->lexic_permissions['ESCAPE_CHAR'] && $part[$close_pos - 1] == $this->language_data['HARDCHAR'] &&
2744                                (($i + $hq_strlen) != ($close_pos))) { //Support empty string for HQ escapes if Starter = Escape
2745                                // make sure this quote is not escaped
2746                                foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
2747                                    if (substr($part, $close_pos - 1, strlen($hardescape)) == $hardescape) {
2748                                        // check wether this quote is escaped or if it is something like '\\'
2749                                        $escape_char_pos = $close_pos - 1;
2750                                        while ($escape_char_pos > 0
2751                                                && $part[$escape_char_pos - 1] == $this->language_data['HARDCHAR']) {
2752                                            --$escape_char_pos;
2753                                        }
2754                                        if (($close_pos - $escape_char_pos) & 1) {
2755                                            // uneven number of escape chars => this quote is escaped
2756                                            continue 2;
2757                                        }
2758                                    }
2759                                }
2760                            }
2761
2762                            // found closing quote
2763                            break;
2764                        }
2765
2766                        //Found the closing delimiter?
2767                        if (!$close_pos) {
2768                            // span till the end of this $part when no closing delimiter is found
2769                            $close_pos = $length;
2770                        }
2771
2772                        //Get the actual string
2773                        $string = substr($part, $i, $close_pos - $i + 1);
2774                        $i = $close_pos;
2775
2776                        // handle escape chars and encode html chars
2777                        // (special because when we have escape chars within our string they may not be escaped)
2778                        if ($this->lexic_permissions['ESCAPE_CHAR'] && $this->language_data['ESCAPE_CHAR']) {
2779                            $start = 0;
2780                            $new_string = '';
2781                            while ($es_pos = strpos($string, $this->language_data['ESCAPE_CHAR'], $start)) {
2782                                // hmtl escape stuff before
2783                                $new_string .= $this->hsc(substr($string, $start, $es_pos - $start));
2784                                // check if this is a hard escape
2785                                foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
2786                                    if (substr($string, $es_pos, strlen($hardescape)) == $hardescape) {
2787                                        // indeed, this is a hardescape
2788                                        $new_string .= "<span$escape_char_attributes>" .
2789                                            $this->hsc($hardescape) . '</span>';
2790                                        $start = $es_pos + strlen($hardescape);
2791                                        continue 2;
2792                                    }
2793                                }
2794                                // not a hard escape, but a normal escape
2795                                // they come in pairs of two
2796                                $c = 0;
2797                                while (isset($string[$es_pos + $c]) && isset($string[$es_pos + $c + 1])
2798                                    && $string[$es_pos + $c] == $this->language_data['ESCAPE_CHAR']
2799                                    && $string[$es_pos + $c + 1] == $this->language_data['ESCAPE_CHAR']) {
2800                                    $c += 2;
2801                                }
2802                                if ($c) {
2803                                    $new_string .= "<span$escape_char_attributes>" .
2804                                        str_repeat($escaped_escape_char, $c) .
2805                                        '</span>';
2806                                    $start = $es_pos + $c;
2807                                } else {
2808                                    // this is just a single lonely escape char...
2809                                    $new_string .= $escaped_escape_char;
2810                                    $start = $es_pos + 1;
2811                                }
2812                            }
2813                            $string = $new_string . $this->hsc(substr($string, $start));
2814                        } else {
2815                            $string = $this->hsc($string);
2816                        }
2817
2818                        if ($check_linenumbers) {
2819                            // Are line numbers used? If, we should end the string before
2820                            // the newline and begin it again (so when <li>s are put in the source
2821                            // remains XHTML compliant)
2822                            // note to self: This opens up possibility of config files specifying
2823                            // that languages can/cannot have multiline strings???
2824                            $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
2825                        }
2826
2827                        $result .= "<span$string_attributes>" . $string . '</span>';
2828                        $string = '';
2829                        continue;
2830                    } else {
2831                        //Have a look for regexp comments
2832                        if ($i == $next_comment_regexp_pos) {
2833                            $COMMENT_MATCHED = true;
2834                            $comment = $comment_regexp_cache_per_key[$next_comment_regexp_key];
2835                            $test_str = $this->hsc(substr($part, $i, $comment['length']));
2836
2837                            //@todo If remove important do remove here
2838                            if ($this->lexic_permissions['COMMENTS']['MULTI']) {
2839                                if (!$this->use_classes) {
2840                                    $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment['key']] . '"';
2841                                } else {
2842                                    $attributes = ' class="co' . $comment['key'] . '"';
2843                                }
2844
2845                                $test_str = "<span$attributes>" . $test_str . "</span>";
2846
2847                                // Short-cut through all the multiline code
2848                                if ($check_linenumbers) {
2849                                    // strreplace to put close span and open span around multiline newlines
2850                                    $test_str = str_replace(
2851                                        "\n", "</span>\n<span$attributes>",
2852                                        str_replace("\n ", "\n&nbsp;", $test_str)
2853                                    );
2854                                }
2855                            }
2856
2857                            $i += $comment['length'] - 1;
2858
2859                            // parse the rest
2860                            $result .= $this->parse_non_string_part($stuff_to_parse);
2861                            $stuff_to_parse = '';
2862                        }
2863
2864                        // If we haven't matched a regexp comment, try multi-line comments
2865                        if (!$COMMENT_MATCHED) {
2866                            // Is this a multiline comment?
2867                            if (!empty($this->language_data['COMMENT_MULTI']) && $next_comment_multi_pos < $i) {
2868                                $next_comment_multi_pos = $length;
2869                                foreach ($this->language_data['COMMENT_MULTI'] as $open => $close) {
2870                                    $match_i = false;
2871                                    if (isset($comment_multi_cache_per_key[$open]) &&
2872                                        ($comment_multi_cache_per_key[$open] >= $i ||
2873                                         $comment_multi_cache_per_key[$open] === false)) {
2874                                        // we have already matched something
2875                                        if ($comment_multi_cache_per_key[$open] === false) {
2876                                            // this comment is never matched
2877                                            continue;
2878                                        }
2879                                        $match_i = $comment_multi_cache_per_key[$open];
2880                                    } elseif (($match_i = stripos($part, $open, $i)) !== false) {
2881                                        $comment_multi_cache_per_key[$open] = $match_i;
2882                                    } else {
2883                                        $comment_multi_cache_per_key[$open] = false;
2884                                        continue;
2885                                    }
2886                                    if ($match_i !== false && $match_i < $next_comment_multi_pos) {
2887                                        $next_comment_multi_pos = $match_i;
2888                                        $next_open_comment_multi = $open;
2889                                        if ($match_i === $i) {
2890                                            break;
2891                                        }
2892                                    }
2893                                }
2894                            }
2895                            if ($i == $next_comment_multi_pos) {
2896                                $open = $next_open_comment_multi;
2897                                $close = $this->language_data['COMMENT_MULTI'][$open];
2898                                $open_strlen = strlen($open);
2899                                $close_strlen = strlen($close);
2900                                $COMMENT_MATCHED = true;
2901                                $test_str_match = $open;
2902                                //@todo If remove important do remove here
2903                                if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
2904                                    $open == GESHI_START_IMPORTANT) {
2905                                    if ($open != GESHI_START_IMPORTANT) {
2906                                        if (!$this->use_classes) {
2907                                            $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS']['MULTI'] . '"';
2908                                        } else {
2909                                            $attributes = ' class="coMULTI"';
2910                                        }
2911                                        $test_str = "<span$attributes>" . $this->hsc($open);
2912                                    } else {
2913                                        if (!$this->use_classes) {
2914                                            $attributes = ' style="' . $this->important_styles . '"';
2915                                        } else {
2916                                            $attributes = ' class="imp"';
2917                                        }
2918
2919                                        // We don't include the start of the comment if it's an
2920                                        // "important" part
2921                                        $test_str = "<span$attributes>";
2922                                    }
2923                                } else {
2924                                    $test_str = $this->hsc($open);
2925                                }
2926
2927                                $close_pos = strpos( $part, $close, $i + $open_strlen );
2928
2929                                if ($close_pos === false) {
2930                                    $close_pos = $length;
2931                                }
2932
2933                                // Short-cut through all the multiline code
2934                                $rest_of_comment = $this->hsc(substr($part, $i + $open_strlen, $close_pos - $i - $open_strlen + $close_strlen));
2935                                if (($this->lexic_permissions['COMMENTS']['MULTI'] ||
2936                                    $test_str_match == GESHI_START_IMPORTANT) &&
2937                                    $check_linenumbers) {
2938
2939                                    // strreplace to put close span and open span around multiline newlines
2940                                    $test_str .= str_replace(
2941                                        "\n", "</span>\n<span$attributes>",
2942                                        str_replace("\n ", "\n&nbsp;", $rest_of_comment)
2943                                    );
2944                                } else {
2945                                    $test_str .= $rest_of_comment;
2946                                }
2947
2948                                if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
2949                                    $test_str_match == GESHI_START_IMPORTANT) {
2950                                    $test_str .= '</span>';
2951                                }
2952
2953                                $i = $close_pos + $close_strlen - 1;
2954
2955                                // parse the rest
2956                                $result .= $this->parse_non_string_part($stuff_to_parse);
2957                                $stuff_to_parse = '';
2958                            }
2959                        }
2960
2961                        // If we haven't matched a multiline comment, try single-line comments
2962                        if (!$COMMENT_MATCHED) {
2963                            // cache potential single line comment occurances
2964                            if (!empty($this->language_data['COMMENT_SINGLE']) && $next_comment_single_pos < $i) {
2965                                $next_comment_single_pos = $length;
2966                                foreach ($this->language_data['COMMENT_SINGLE'] as $comment_key => $comment_mark) {
2967                                    $match_i = false;
2968                                    if (isset($comment_single_cache_per_key[$comment_key]) &&
2969                                        ($comment_single_cache_per_key[$comment_key] >= $i ||
2970                                         $comment_single_cache_per_key[$comment_key] === false)) {
2971                                        // we have already matched something
2972                                        if ($comment_single_cache_per_key[$comment_key] === false) {
2973                                            // this comment is never matched
2974                                            continue;
2975                                        }
2976                                        $match_i = $comment_single_cache_per_key[$comment_key];
2977                                    } elseif (
2978                                        // case sensitive comments
2979                                        ($this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] &&
2980                                        ($match_i = stripos($part, $comment_mark, $i)) !== false) ||
2981                                        // non case sensitive
2982                                        (!$this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] &&
2983                                          (($match_i = strpos($part, $comment_mark, $i)) !== false))) {
2984                                        $comment_single_cache_per_key[$comment_key] = $match_i;
2985                                    } else {
2986                                        $comment_single_cache_per_key[$comment_key] = false;
2987                                        continue;
2988                                    }
2989                                    if ($match_i !== false && $match_i < $next_comment_single_pos) {
2990                                        $next_comment_single_pos = $match_i;
2991                                        $next_comment_single_key = $comment_key;
2992                                        if ($match_i === $i) {
2993                                            break;
2994                                        }
2995                                    }
2996                                }
2997                            }
2998                            if ($next_comment_single_pos == $i) {
2999                                $comment_key = $next_comment_single_key;
3000                                $comment_mark = $this->language_data['COMMENT_SINGLE'][$comment_key];
3001                                $com_len = strlen($comment_mark);
3002
3003                                // This check will find special variables like $# in bash
3004                                // or compiler directives of Delphi beginning {$
3005                                if ((empty($sc_disallowed_before) || ($i == 0) ||
3006                                    (false === strpos($sc_disallowed_before, $part[$i-1]))) &&
3007                                    (empty($sc_disallowed_after) || ($length <= $i + $com_len) ||
3008                                    (false === strpos($sc_disallowed_after, $part[$i + $com_len]))))
3009                                {
3010                                    // this is a valid comment
3011                                    $COMMENT_MATCHED = true;
3012                                    if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
3013                                        if (!$this->use_classes) {
3014                                            $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment_key] . '"';
3015                                        } else {
3016                                            $attributes = ' class="co' . $comment_key . '"';
3017                                        }
3018                                        $test_str = "<span$attributes>" . $this->hsc($this->change_case($comment_mark));
3019                                    } else {
3020                                        $test_str = $this->hsc($comment_mark);
3021                                    }
3022
3023                                    //Check if this comment is the last in the source
3024                                    $close_pos = strpos($part, "\n", $i);
3025                                    $oops = false;
3026                                    if ($close_pos === false) {
3027                                        $close_pos = $length;
3028                                        $oops = true;
3029                                    }
3030                                    $test_str .= $this->hsc(substr($part, $i + $com_len, $close_pos - $i - $com_len));
3031                                    if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
3032                                        $test_str .= "</span>";
3033                                    }
3034
3035                                    // Take into account that the comment might be the last in the source
3036                                    if (!$oops) {
3037                                      $test_str .= "\n";
3038                                    }
3039
3040                                    $i = $close_pos;
3041
3042                                    // parse the rest
3043                                    $result .= $this->parse_non_string_part($stuff_to_parse);
3044                                    $stuff_to_parse = '';
3045                                }
3046                            }
3047                        }
3048                    }
3049
3050                    // Where are we adding this char?
3051                    if (!$COMMENT_MATCHED) {
3052                        $stuff_to_parse .= $char;
3053                    } else {
3054                        $result .= $test_str;
3055                        unset($test_str);
3056                        $COMMENT_MATCHED = false;
3057                    }
3058                }
3059                // Parse the last bit
3060                $result .= $this->parse_non_string_part($stuff_to_parse);
3061                $stuff_to_parse = '';
3062            } else {
3063                $result .= $this->hsc($part);
3064            }
3065            // Close the <span> that surrounds the block
3066            if ($STRICTATTRS != '') {
3067                $result = str_replace("\n", "</span>\n<span$STRICTATTRS>", $result);
3068                $result .= '</span>';
3069            }
3070
3071            $endresult .= $result;
3072            unset($part, $parts[$key], $result);
3073        }
3074
3075        //This fix is related to SF#1923020, but has to be applied regardless of
3076        //actually highlighting symbols.
3077        /** NOTE: memorypeak #3 */
3078        $endresult = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $endresult);
3079
3080//        // Parse the last stuff (redundant?)
3081//        $result .= $this->parse_non_string_part($stuff_to_parse);
3082
3083        // Lop off the very first and last spaces
3084//        $result = substr($result, 1, -1);
3085
3086        // We're finished: stop timing
3087        $this->set_time($start_time, microtime());
3088
3089        $this->finalise($endresult);
3090        return $endresult;
3091    }
3092
3093    /**
3094     * Swaps out spaces and tabs for HTML indentation. Not needed if
3095     * the code is in a pre block...
3096     *
3097     * @param  string $result The source to indent (reference!)
3098     * @since  1.0.0
3099     */
3100    protected function indent(&$result) {
3101        /// Replace tabs with the correct number of spaces
3102        if (false !== strpos($result, "\t")) {
3103            $lines = explode("\n", $result);
3104            $result = null;//Save memory while we process the lines individually
3105            $tab_width = $this->get_real_tab_width();
3106            $tab_string = '&nbsp;' . str_repeat(' ', $tab_width);
3107
3108            for ($key = 0, $n = count($lines); $key < $n; $key++) {
3109                $line = $lines[$key];
3110                if (false === strpos($line, "\t")) {
3111                    continue;
3112                }
3113
3114                $pos = 0;
3115                $length = strlen($line);
3116                $lines[$key] = ''; // reduce memory
3117
3118                $IN_TAG = false;
3119                for ($i = 0; $i < $length; ++$i) {
3120                    $char = $line[$i];
3121                    // Simple engine to work out whether we're in a tag.
3122                    // If we are we modify $pos. This is so we ignore HTML
3123                    // in the line and only workout the tab replacement
3124                    // via the actual content of the string
3125                    // This test could be improved to include strings in the
3126                    // html so that < or > would be allowed in user's styles
3127                    // (e.g. quotes: '<' '>'; or similar)
3128                    if ($IN_TAG) {
3129                        if ('>' == $char) {
3130                            $IN_TAG = false;
3131                        }
3132                        $lines[$key] .= $char;
3133                    } elseif ('<' == $char) {
3134                        $IN_TAG = true;
3135                        $lines[$key] .= '<';
3136                    } elseif ('&' == $char) {
3137                        $substr = substr($line, $i + 3, 5);
3138                        $posi = strpos($substr, ';');
3139                        if (false === $posi) {
3140                            ++$pos;
3141                        } else {
3142                            $pos -= $posi+2;
3143                        }
3144                        $lines[$key] .= $char;
3145                    } elseif ("\t" == $char) {
3146                        $str = '';
3147                        // OPTIMISE - move $strs out. Make an array:
3148                        // $tabs = array(
3149                        //  1 => '&nbsp;',
3150                        //  2 => '&nbsp; ',
3151                        //  3 => '&nbsp; &nbsp;' etc etc
3152                        // to use instead of building a string every time
3153                        $tab_end_width = $tab_width - ($pos % $tab_width); //Moved out of the look as it doesn't change within the loop
3154                        if (($pos & 1) || 1 == $tab_end_width) {
3155                            $str .= substr($tab_string, 6, $tab_end_width);
3156                        } else {
3157                            $str .= substr($tab_string, 0, $tab_end_width+5);
3158                        }
3159                        $lines[$key] .= $str;
3160                        $pos += $tab_end_width;
3161
3162                        if (false === strpos($line, "\t", $i + 1)) {
3163                            $lines[$key] .= substr($line, $i + 1);
3164                            break;
3165                        }
3166                    } elseif (0 == $pos && ' ' == $char) {
3167                        $lines[$key] .= '&nbsp;';
3168                        ++$pos;
3169                    } else {
3170                        $lines[$key] .= $char;
3171                        ++$pos;
3172                    }
3173                }
3174            }
3175            $result = implode("\n", $lines);
3176            unset($lines);//We don't need the lines separated beyond this --- free them!
3177        }
3178        // Other whitespace
3179        // BenBE: Fix to reduce the number of replacements to be done
3180        $result = preg_replace('/^ /m', '&nbsp;', $result);
3181        $result = str_replace('  ', ' &nbsp;', $result);
3182
3183        if ($this->line_numbers == GESHI_NO_LINE_NUMBERS && $this->header_type != GESHI_HEADER_PRE_TABLE) {
3184            if ($this->line_ending === null) {
3185                $result = nl2br($result);
3186            } else {
3187                $result = str_replace("\n", $this->line_ending, $result);
3188            }
3189        }
3190    }
3191
3192    /**
3193     * Changes the case of a keyword for those languages where a change is asked for
3194     *
3195     * @param  string $instr The keyword to change the case of
3196     * @return string The keyword with its case changed
3197     * @since  1.0.0
3198     */
3199    protected function change_case($instr) {
3200        switch ($this->language_data['CASE_KEYWORDS']) {
3201            case GESHI_CAPS_UPPER:
3202                return strtoupper($instr);
3203            case GESHI_CAPS_LOWER:
3204                return strtolower($instr);
3205            default:
3206                return $instr;
3207        }
3208    }
3209
3210    /**
3211     * Handles replacements of keywords to include markup and links if requested
3212     *
3213     * @param  string $match The keyword to add the Markup to
3214     * @return string The HTML for the match found
3215     * @since  1.0.8
3216     *
3217     * @todo   Get rid of ender in keyword links
3218     */
3219    protected function handle_keyword_replace($match) {
3220        $k = $this->_kw_replace_group;
3221        $keyword = $match[0];
3222        $keyword_match = $match[1];
3223
3224        $before = '';
3225        $after = '';
3226
3227        if ($this->keyword_links) {
3228            // Keyword links have been ebabled
3229
3230            if (isset($this->language_data['URLS'][$k]) &&
3231                $this->language_data['URLS'][$k] != '') {
3232                // There is a base group for this keyword
3233
3234                // Old system: strtolower
3235                //$keyword = ( $this->language_data['CASE_SENSITIVE'][$group] ) ? $keyword : strtolower($keyword);
3236                // New system: get keyword from language file to get correct case
3237                if (!$this->language_data['CASE_SENSITIVE'][$k] &&
3238                    strpos($this->language_data['URLS'][$k], '{FNAME}') !== false) {
3239                    foreach ($this->language_data['KEYWORDS'][$k] as $word) {
3240                        if (strcasecmp($word, $keyword_match) == 0) {
3241                            break;
3242                        }
3243                    }
3244                } else {
3245                    $word = $keyword_match;
3246                }
3247
3248                $before = '<|UR1|"' .
3249                    str_replace(
3250                        array(
3251                            '{FNAME}',
3252                            '{FNAMEL}',
3253                            '{FNAMEU}',
3254                            '{FNAMEUF}',
3255                            '.'),
3256                        array(
3257                            str_replace('+', '%20', urlencode($this->hsc($word))),
3258                            str_replace('+', '%20', urlencode($this->hsc(strtolower($word)))),
3259                            str_replace('+', '%20', urlencode($this->hsc(strtoupper($word)))),
3260                            str_replace('+', '%20', urlencode($this->hsc(ucfirst($word)))),
3261                            '<DOT>'),
3262                        $this->language_data['URLS'][$k]
3263                    ) . '">';
3264                $after = '</a>';
3265            }
3266        }
3267
3268        return $before . '<|/'. $k .'/>' . $this->change_case($keyword) . '|>' . $after;
3269    }
3270
3271    /**
3272     * handles regular expressions highlighting-definitions with callback functions
3273     *
3274     * @note this is a callback, don't use it directly
3275     *
3276     * @param array $matches the matches array
3277     * @return string The highlighted string
3278     * @since 1.0.8
3279     */
3280    protected function handle_regexps_callback($matches) {
3281        // before: "' style=\"' . call_user_func(\"$func\", '\\1') . '\"\\1|>'",
3282        return  ' style="' . call_user_func($this->language_data['STYLES']['REGEXPS'][$this->_rx_key], $matches[1]) . '"'. $matches[1] . '|>';
3283    }
3284
3285    /**
3286     * handles newlines in REGEXPS matches. Set the _hmr_* vars before calling this
3287     *
3288     * @note this is a callback, don't use it directly
3289     *
3290     * @param array $matches the matches array
3291     * @return string
3292     * @since 1.0.8
3293     */
3294    protected function handle_multiline_regexps($matches) {
3295        $before = $this->_hmr_before;
3296        $after = $this->_hmr_after;
3297        if ($this->_hmr_replace) {
3298            $replace = $this->_hmr_replace;
3299            $search = array();
3300
3301            foreach (array_keys($matches) as $k) {
3302                $search[] = '\\' . $k;
3303            }
3304
3305            $before = str_replace($search, $matches, $before);
3306            $after = str_replace($search, $matches, $after);
3307            $replace = str_replace($search, $matches, $replace);
3308        } else {
3309            $replace = $matches[0];
3310        }
3311        return $before
3312                    . '<|!REG3XP' . $this->_hmr_key .'!>'
3313                        . str_replace("\n", "|>\n<|!REG3XP" . $this->_hmr_key . '!>', $replace)
3314                    . '|>'
3315              . $after;
3316    }
3317
3318    /**
3319     * Takes a string that has no strings or comments in it, and highlights
3320     * stuff like keywords, numbers and methods.
3321     *
3322     * @param string $stuff_to_parse The string to parse for keyword, numbers etc.
3323     * @since 1.0.0
3324     * @todo BUGGY! Why? Why not build string and return?
3325     * @return string
3326     */
3327    protected function parse_non_string_part($stuff_to_parse) {
3328        $stuff_to_parse = ' ' . $this->hsc($stuff_to_parse);
3329
3330        // Highlight keywords
3331        $disallowed_before = "(?<![a-zA-Z0-9\$_\|\#|^&";
3332        $disallowed_after = "(?![a-zA-Z0-9_\|%\\-&;";
3333        if ($this->lexic_permissions['STRINGS']) {
3334            $quotemarks = preg_quote(implode($this->language_data['QUOTEMARKS']), '/');
3335            $disallowed_before .= $quotemarks;
3336            $disallowed_after .= $quotemarks;
3337        }
3338        $disallowed_before .= "])";
3339        $disallowed_after .= "])";
3340
3341        $parser_control_pergroup = false;
3342        if (isset($this->language_data['PARSER_CONTROL'])) {
3343            if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
3344                $x = 0; // check wether per-keyword-group parser_control is enabled
3345                if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'])) {
3346                    $disallowed_before = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'];
3347                    ++$x;
3348                }
3349                if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'])) {
3350                    $disallowed_after = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'];
3351                    ++$x;
3352                }
3353                $parser_control_pergroup = (count($this->language_data['PARSER_CONTROL']['KEYWORDS']) - $x) > 0;
3354            }
3355        }
3356
3357        foreach (array_keys($this->language_data['KEYWORDS']) as $k) {
3358            if (!isset($this->lexic_permissions['KEYWORDS'][$k]) ||
3359                $this->lexic_permissions['KEYWORDS'][$k]) {
3360
3361                $case_sensitive = $this->language_data['CASE_SENSITIVE'][$k];
3362                $modifiers = $case_sensitive ? '' : 'i';
3363
3364                // NEW in 1.0.8 - per-keyword-group parser control
3365                $disallowed_before_local = $disallowed_before;
3366                $disallowed_after_local = $disallowed_after;
3367                if ($parser_control_pergroup && isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k])) {
3368                    if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'])) {
3369                        $disallowed_before_local =
3370                            $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'];
3371                    }
3372
3373                    if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'])) {
3374                        $disallowed_after_local =
3375                            $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'];
3376                    }
3377                }
3378
3379                $this->_kw_replace_group = $k;
3380
3381                //NEW in 1.0.8, the cached regexp list
3382                // since we don't want PHP / PCRE to crash due to too large patterns we split them into smaller chunks
3383                for ($set = 0, $set_length = count($this->language_data['CACHED_KEYWORD_LISTS'][$k]); $set <  $set_length; ++$set) {
3384                    $keywordset =& $this->language_data['CACHED_KEYWORD_LISTS'][$k][$set];
3385                    // Might make a more unique string for putting the number in soon
3386                    // Basically, we don't put the styles in yet because then the styles themselves will
3387                    // get highlighted if the language has a CSS keyword in it (like CSS, for example ;))
3388                    $stuff_to_parse = preg_replace_callback(
3389                        "/$disallowed_before_local({$keywordset})(?!\<DOT\>(?:htm|php|aspx?))$disallowed_after_local/$modifiers",
3390                        array($this, 'handle_keyword_replace'),
3391                        $stuff_to_parse
3392                        );
3393                }
3394            }
3395        }
3396
3397        // Regular expressions
3398        foreach ($this->language_data['REGEXPS'] as $key => $regexp) {
3399            if ($this->lexic_permissions['REGEXPS'][$key]) {
3400                if (is_array($regexp)) {
3401                    if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3402                        // produce valid HTML when we match multiple lines
3403                        $this->_hmr_replace = $regexp[GESHI_REPLACE];
3404                        $this->_hmr_before = $regexp[GESHI_BEFORE];
3405                        $this->_hmr_key = $key;
3406                        $this->_hmr_after = $regexp[GESHI_AFTER];
3407                        $stuff_to_parse = preg_replace_callback(
3408                            "/" . $regexp[GESHI_SEARCH] . "/{$regexp[GESHI_MODIFIERS]}",
3409                            array($this, 'handle_multiline_regexps'),
3410                            $stuff_to_parse);
3411                        $this->_hmr_replace = false;
3412                        $this->_hmr_before = '';
3413                        $this->_hmr_after = '';
3414                    } else {
3415                        $stuff_to_parse = preg_replace(
3416                            '/' . $regexp[GESHI_SEARCH] . '/' . $regexp[GESHI_MODIFIERS],
3417                            $regexp[GESHI_BEFORE] . '<|!REG3XP'. $key .'!>' . $regexp[GESHI_REPLACE] . '|>' . $regexp[GESHI_AFTER],
3418                            $stuff_to_parse);
3419                    }
3420                } else {
3421                    if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3422                        // produce valid HTML when we match multiple lines
3423                        $this->_hmr_key = $key;
3424                        $stuff_to_parse = preg_replace_callback( "/(" . $regexp . ")/",
3425                                              array($this, 'handle_multiline_regexps'), $stuff_to_parse);
3426                        $this->_hmr_key = '';
3427                    } else {
3428                        $stuff_to_parse = preg_replace( "/(" . $regexp . ")/", "<|!REG3XP$key!>\\1|>", $stuff_to_parse);
3429                    }
3430                }
3431            }
3432        }
3433
3434        // Highlight numbers. As of 1.0.8 we support different types of numbers
3435        $numbers_found = false;
3436
3437        if ($this->lexic_permissions['NUMBERS'] && preg_match($this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'], $stuff_to_parse )) {
3438            $numbers_found = true;
3439
3440            //For each of the formats ...
3441            foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) {
3442                //Check if it should be highlighted ...
3443                $stuff_to_parse = preg_replace($regexp, "<|/NUM!$id/>\\1|>", $stuff_to_parse);
3444            }
3445        }
3446
3447        //
3448        // Now that's all done, replace /[number]/ with the correct styles
3449        //
3450        foreach (array_keys($this->language_data['KEYWORDS']) as $k) {
3451            if (!$this->use_classes) {
3452                $attributes = ' style="' .
3453                    (isset($this->language_data['STYLES']['KEYWORDS'][$k]) ?
3454                    $this->language_data['STYLES']['KEYWORDS'][$k] : "") . '"';
3455            } else {
3456                $attributes = ' class="kw' . $k . '"';
3457            }
3458            $stuff_to_parse = str_replace("<|/$k/>", "<|$attributes>", $stuff_to_parse);
3459        }
3460
3461        if ($numbers_found) {
3462            // Put number styles in
3463            foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) {
3464                //Commented out for now, as this needs some review ...
3465                //                if ($numbers_permissions & $id) {
3466                //Get the appropriate style ...
3467                //Checking for unset styles is done by the style cache builder ...
3468                if (!$this->use_classes) {
3469                    $attributes = ' style="' . $this->language_data['STYLES']['NUMBERS'][$id] . '"';
3470                } else {
3471                    $attributes = ' class="nu'.$id.'"';
3472                }
3473
3474                //Set in the correct styles ...
3475                $stuff_to_parse = str_replace("/NUM!$id/", $attributes, $stuff_to_parse);
3476                //                }
3477            }
3478        }
3479
3480        // Highlight methods and fields in objects
3481        if ($this->lexic_permissions['METHODS'] && $this->language_data['OOLANG']) {
3482            $oolang_spaces = "[\s]*";
3483            $oolang_before = "";
3484            $oolang_after = "[a-zA-Z][a-zA-Z0-9_]*";
3485            if (isset($this->language_data['PARSER_CONTROL'])) {
3486                if (isset($this->language_data['PARSER_CONTROL']['OOLANG'])) {
3487                    if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'])) {
3488                        $oolang_before = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'];
3489                    }
3490                    if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'])) {
3491                        $oolang_after = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'];
3492                    }
3493                    if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'])) {
3494                        $oolang_spaces = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'];
3495                    }
3496                }
3497            }
3498
3499            foreach ($this->language_data['OBJECT_SPLITTERS'] as $key => $splitter) {
3500                if (false !== strpos($stuff_to_parse, $splitter)) {
3501                    if (!$this->use_classes) {
3502                        $attributes = ' style="' . $this->language_data['STYLES']['METHODS'][$key] . '"';
3503                    } else {
3504                        $attributes = ' class="me' . $key . '"';
3505                    }
3506                    $stuff_to_parse = preg_replace("/($oolang_before)(" . preg_quote($this->language_data['OBJECT_SPLITTERS'][$key], '/') . ")($oolang_spaces)($oolang_after)/", "\\1\\2\\3<|$attributes>\\4|>", $stuff_to_parse);
3507                }
3508            }
3509        }
3510
3511        //
3512        // Highlight brackets. Yes, I've tried adding a semi-colon to this list.
3513        // You try it, and see what happens ;)
3514        // TODO: Fix lexic permissions not converting entities if shouldn't
3515        // be highlighting regardless
3516        //
3517        if ($this->lexic_permissions['BRACKETS']) {
3518            $stuff_to_parse = str_replace( $this->language_data['CACHE_BRACKET_MATCH'],
3519                              $this->language_data['CACHE_BRACKET_REPLACE'], $stuff_to_parse );
3520        }
3521
3522
3523        //FIX for symbol highlighting ...
3524        if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) {
3525            //Get all matches and throw away those witin a block that is already highlighted... (i.e. matched by a regexp)
3526            $n_symbols = preg_match_all("/<\|(?:<DOT>|[^>])+>(?:(?!\|>).*?)\|>|<\/a>|(?:" . $this->language_data['SYMBOL_SEARCH'] . ")+(?![^<]+?>)/", $stuff_to_parse, $pot_symbols, PREG_OFFSET_CAPTURE | PREG_SET_ORDER);
3527            $global_offset = 0;
3528            for ($s_id = 0; $s_id < $n_symbols; ++$s_id) {
3529                $symbol_match = $pot_symbols[$s_id][0][0];
3530                if (strpos($symbol_match, '<') !== false || strpos($symbol_match, '>') !== false) {
3531                    // already highlighted blocks _must_ include either < or >
3532                    // so if this conditional applies, we have to skip this match
3533                    // BenBE: UNLESS the block contains <SEMI> or <PIPE>
3534                    if(strpos($symbol_match, '<SEMI>') === false &&
3535                        strpos($symbol_match, '<PIPE>') === false) {
3536                        continue;
3537                    }
3538                }
3539
3540                // if we reach this point, we have a valid match which needs to be highlighted
3541
3542                $symbol_length = strlen($symbol_match);
3543                $symbol_offset = $pot_symbols[$s_id][0][1];
3544                unset($pot_symbols[$s_id]);
3545                $symbol_hl = "";
3546
3547                // if we have multiple styles, we have to handle them properly
3548                if ($this->language_data['MULTIPLE_SYMBOL_GROUPS']) {
3549                    $old_sym = -1;
3550                    // Split the current stuff to replace into its atomic symbols ...
3551                    preg_match_all("/" . $this->language_data['SYMBOL_SEARCH'] . "/", $symbol_match, $sym_match_syms, PREG_PATTERN_ORDER);
3552                    foreach ($sym_match_syms[0] as $sym_ms) {
3553                        //Check if consequtive symbols belong to the same group to save output ...
3554                        if (isset($this->language_data['SYMBOL_DATA'][$sym_ms])
3555                            && ($this->language_data['SYMBOL_DATA'][$sym_ms] != $old_sym)) {
3556                            if (-1 != $old_sym) {
3557                                $symbol_hl .= "|>";
3558                            }
3559                            $old_sym = $this->language_data['SYMBOL_DATA'][$sym_ms];
3560                            if (!$this->use_classes) {
3561                                $symbol_hl .= '<| style="' . $this->language_data['STYLES']['SYMBOLS'][$old_sym] . '">';
3562                            } else {
3563                                $symbol_hl .= '<| class="sy' . $old_sym . '">';
3564                            }
3565                        }
3566                        $symbol_hl .= $sym_ms;
3567                    }
3568                    unset($sym_match_syms);
3569
3570                    //Close remaining tags and insert the replacement at the right position ...
3571                    //Take caution if symbol_hl is empty to avoid doubled closing spans.
3572                    if (-1 != $old_sym) {
3573                        $symbol_hl .= "|>";
3574                    }
3575                } else {
3576                    if (!$this->use_classes) {
3577                        $symbol_hl = '<| style="' . $this->language_data['STYLES']['SYMBOLS'][0] . '">';
3578                    } else {
3579                        $symbol_hl = '<| class="sy0">';
3580                    }
3581                    $symbol_hl .= $symbol_match . '|>';
3582                }
3583
3584                $stuff_to_parse = substr_replace($stuff_to_parse, $symbol_hl, $symbol_offset + $global_offset, $symbol_length);
3585
3586                // since we replace old text with something of different size,
3587                // we'll have to keep track of the differences
3588                $global_offset += strlen($symbol_hl) - $symbol_length;
3589            }
3590        }
3591        //FIX for symbol highlighting ...
3592
3593        // Add class/style for regexps
3594        foreach (array_keys($this->language_data['REGEXPS']) as $key) {
3595            if ($this->lexic_permissions['REGEXPS'][$key]) {
3596                if (is_callable($this->language_data['STYLES']['REGEXPS'][$key])) {
3597                    $this->_rx_key = $key;
3598                    $stuff_to_parse = preg_replace_callback("/!REG3XP$key!(.*)\|>/U",
3599                        array($this, 'handle_regexps_callback'),
3600                        $stuff_to_parse);
3601                } else {
3602                    if (!$this->use_classes) {
3603                        $attributes = ' style="' . $this->language_data['STYLES']['REGEXPS'][$key] . '"';
3604                    } else {
3605                        if (is_array($this->language_data['REGEXPS'][$key]) &&
3606                            array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$key])) {
3607                            $attributes = ' class="' .
3608                                $this->language_data['REGEXPS'][$key][GESHI_CLASS] . '"';
3609                        } else {
3610                           $attributes = ' class="re' . $key . '"';
3611                        }
3612                    }
3613                    $stuff_to_parse = str_replace("!REG3XP$key!", "$attributes", $stuff_to_parse);
3614                }
3615            }
3616        }
3617
3618        // Replace <DOT> with . for urls
3619        $stuff_to_parse = str_replace('<DOT>', '.', $stuff_to_parse);
3620        // Replace <|UR1| with <a href= for urls also
3621        if (isset($this->link_styles[GESHI_LINK])) {
3622            if ($this->use_classes) {
3623                $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse);
3624            } else {
3625                $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' style="' . $this->link_styles[GESHI_LINK] . '" href=', $stuff_to_parse);
3626            }
3627        } else {
3628            $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse);
3629        }
3630
3631        //
3632        // NOW we add the span thingy ;)
3633        //
3634
3635        $stuff_to_parse = str_replace('<|', '<span', $stuff_to_parse);
3636        $stuff_to_parse = str_replace ( '|>', '</span>', $stuff_to_parse );
3637        return substr($stuff_to_parse, 1);
3638    }
3639
3640    /**
3641     * Sets the time taken to parse the code
3642     *
3643     * @param string $start_time The time when parsing started as returned by @see microtime()
3644     * @param string $end_time   The time when parsing ended as returned by @see microtime()
3645     * @since 1.0.2
3646     */
3647    protected function set_time($start_time, $end_time) {
3648        $start = explode(' ', $start_time);
3649        $end = explode(' ', $end_time);
3650        $this->time = $end[0] + $end[1] - $start[0] - $start[1];
3651    }
3652
3653    /**
3654     * Gets the time taken to parse the code
3655     *
3656     * @return double The time taken to parse the code
3657     * @since  1.0.2
3658     */
3659    public function get_time() {
3660        return $this->time;
3661    }
3662
3663    /**
3664     * Merges arrays recursively, overwriting values of the first array with values of later arrays
3665     *
3666     * @since 1.0.8
3667     */
3668    protected function merge_arrays() {
3669        $arrays = func_get_args();
3670        $narrays = count($arrays);
3671
3672        // check arguments
3673        // comment out if more performance is necessary (in this case the foreach loop will trigger a warning if the argument is not an array)
3674        for ($i = 0; $i < $narrays; $i ++) {
3675            if (!is_array($arrays[$i])) {
3676                // also array_merge_recursive returns nothing in this case
3677                trigger_error('Argument #' . ($i+1) . ' is not an array - trying to merge array with scalar! Returning false!', E_USER_WARNING);
3678                return false;
3679            }
3680        }
3681
3682        // the first array is in the output set in every case
3683        $ret = $arrays[0];
3684
3685        // merege $ret with the remaining arrays
3686        for ($i = 1; $i < $narrays; $i ++) {
3687            foreach ($arrays[$i] as $key => $value) {
3688                if (is_array($value) && isset($ret[$key])) {
3689                    // if $ret[$key] is not an array you try to merge an scalar value with an array - the result is not defined (incompatible arrays)
3690                    // in this case the call will trigger an E_USER_WARNING and the $ret[$key] will be false.
3691                    $ret[$key] = $this->merge_arrays($ret[$key], $value);
3692                } else {
3693                    $ret[$key] = $value;
3694                }
3695            }
3696        }
3697
3698        return $ret;
3699    }
3700
3701    /**
3702     * Gets language information and stores it for later use
3703     *
3704     * @param string $file_name The filename of the language file you want to load
3705     * @since 1.0.0
3706     * @todo Needs to load keys for lexic permissions for keywords, regexps etc
3707     */
3708    protected function load_language($file_name) {
3709        if ($file_name == $this->loaded_language) {
3710            // this file is already loaded!
3711            return;
3712        }
3713
3714        //Prepare some stuff before actually loading the language file
3715        $this->loaded_language = $file_name;
3716        $this->parse_cache_built = false;
3717        $this->enable_highlighting();
3718        $language_data = array();
3719
3720        //Load the language file
3721        require $file_name;
3722
3723        // Perhaps some checking might be added here later to check that
3724        // $language data is a valid thing but maybe not
3725        $this->language_data = $language_data;
3726
3727        // Set strict mode if should be set
3728        $this->strict_mode = $this->language_data['STRICT_MODE_APPLIES'];
3729
3730        // Set permissions for all lexics to true
3731        // so they'll be highlighted by default
3732        foreach (array_keys($this->language_data['KEYWORDS']) as $key) {
3733            if (!empty($this->language_data['KEYWORDS'][$key])) {
3734                $this->lexic_permissions['KEYWORDS'][$key] = true;
3735            } else {
3736                $this->lexic_permissions['KEYWORDS'][$key] = false;
3737            }
3738        }
3739
3740        foreach (array_keys($this->language_data['COMMENT_SINGLE']) as $key) {
3741            $this->lexic_permissions['COMMENTS'][$key] = true;
3742        }
3743        foreach (array_keys($this->language_data['REGEXPS']) as $key) {
3744            $this->lexic_permissions['REGEXPS'][$key] = true;
3745        }
3746
3747        // for BenBE and future code reviews:
3748        // we can use empty here since we only check for existance and emptiness of an array
3749        // if it is not an array at all but rather false or null this will work as intended as well
3750        // even if $this->language_data['PARSER_CONTROL'] is undefined this won't trigger a notice
3751        if (!empty($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'])) {
3752            foreach ($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'] as $flag => $value) {
3753                // it's either true or false and maybe is true as well
3754                $perm = $value !== GESHI_NEVER;
3755                if ($flag == 'ALL') {
3756                    $this->enable_highlighting($perm);
3757                    continue;
3758                }
3759                if (!isset($this->lexic_permissions[$flag])) {
3760                    // unknown lexic permission
3761                    continue;
3762                }
3763                if (is_array($this->lexic_permissions[$flag])) {
3764                    foreach ($this->lexic_permissions[$flag] as $key => $val) {
3765                        $this->lexic_permissions[$flag][$key] = $perm;
3766                    }
3767                } else {
3768                    $this->lexic_permissions[$flag] = $perm;
3769                }
3770            }
3771            unset($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS']);
3772        }
3773
3774        //Fix: Problem where hardescapes weren't handled if no ESCAPE_CHAR was given
3775        //You need to set one for HARDESCAPES only in this case.
3776        if(!isset($this->language_data['HARDCHAR'])) {
3777            $this->language_data['HARDCHAR'] = $this->language_data['ESCAPE_CHAR'];
3778        }
3779
3780        //NEW in 1.0.8: Allow styles to be loaded from a separate file to override defaults
3781        $style_filename = substr($file_name, 0, -4) . '.style.php';
3782        if (is_readable($style_filename)) {
3783            //Clear any style_data that could have been set before ...
3784            if (isset($style_data)) {
3785                unset($style_data);
3786            }
3787
3788            //Read the Style Information from the style file
3789            include $style_filename;
3790
3791            //Apply the new styles to our current language styles
3792            if (isset($style_data) && is_array($style_data)) {
3793                $this->language_data['STYLES'] =
3794                    $this->merge_arrays($this->language_data['STYLES'], $style_data);
3795            }
3796        }
3797    }
3798
3799    /**
3800     * Takes the parsed code and various options, and creates the HTML
3801     * surrounding it to make it look nice.
3802     *
3803     * @param  string $parsed_code The code already parsed (reference!)
3804     * @since  1.0.0
3805     */
3806    protected function finalise(&$parsed_code) {
3807        // Remove end parts of important declarations
3808        // This is BUGGY!! My fault for bad code: fix coming in 1.2
3809        // @todo Remove this crap
3810        if ($this->enable_important_blocks &&
3811            (strpos($parsed_code, $this->hsc(GESHI_START_IMPORTANT)) === false)) {
3812            $parsed_code = str_replace($this->hsc(GESHI_END_IMPORTANT), '', $parsed_code);
3813        }
3814
3815        // Add HTML whitespace stuff if we're using the <div> header
3816        if ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) {
3817            $this->indent($parsed_code);
3818        }
3819
3820        // purge some unnecessary stuff
3821        /** NOTE: memorypeak #1 */
3822        $parsed_code = preg_replace('#<span[^>]+>(\s*)</span>#', '\\1', $parsed_code);
3823
3824        // If we are using IDs for line numbers, there needs to be an overall
3825        // ID set to prevent collisions.
3826        if ($this->add_ids && !$this->overall_id) {
3827            $this->overall_id = 'geshi-' . substr(md5(microtime()), 0, 4);
3828        }
3829
3830        // Get code into lines
3831        /** NOTE: memorypeak #2 */
3832        $code = explode("\n", $parsed_code);
3833        $parsed_code = $this->header();
3834
3835        // If we're using line numbers, we insert <li>s and appropriate
3836        // markup to style them (otherwise we don't need to do anything)
3837        if ($this->line_numbers != GESHI_NO_LINE_NUMBERS && $this->header_type != GESHI_HEADER_PRE_TABLE) {
3838            // If we're using the <pre> header, we shouldn't add newlines because
3839            // the <pre> will line-break them (and the <li>s already do this for us)
3840            $ls = ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) ? "\n" : '';
3841
3842            // Foreach line...
3843            for ($i = 0, $n = count($code); $i < $n;) {
3844                //Reset the attributes for a new line ...
3845                $attrs = array();
3846
3847                // Make lines have at least one space in them if they're empty
3848                // BenBE: Checking emptiness using trim instead of relying on blanks
3849                if ('' == trim($code[$i])) {
3850                    $code[$i] = '&nbsp;';
3851                }
3852
3853                // If this is a "special line"...
3854                if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
3855                    $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
3856                    // Set the attributes to style the line
3857                    if ($this->use_classes) {
3858                        //$attr = ' class="li2"';
3859                        $attrs['class'][] = 'li2';
3860                        $def_attr = ' class="de2"';
3861                    } else {
3862                        //$attr = ' style="' . $this->line_style2 . '"';
3863                        $attrs['style'][] = $this->line_style2;
3864                        // This style "covers up" the special styles set for special lines
3865                        // so that styles applied to special lines don't apply to the actual
3866                        // code on that line
3867                        $def_attr = ' style="' . $this->code_style . '"';
3868                    }
3869                } else {
3870                    if ($this->use_classes) {
3871                        //$attr = ' class="li1"';
3872                        $attrs['class'][] = 'li1';
3873                        $def_attr = ' class="de1"';
3874                    } else {
3875                        //$attr = ' style="' . $this->line_style1 . '"';
3876                        $attrs['style'][] = $this->line_style1;
3877                        $def_attr = ' style="' . $this->code_style . '"';
3878                    }
3879                }
3880
3881                //Check which type of tag to insert for this line
3882                if ($this->header_type == GESHI_HEADER_PRE_VALID) {
3883                    $start = "<pre$def_attr>";
3884                    $end = '</pre>';
3885                } else {
3886                    // Span or div?
3887                    $start = "<div$def_attr>";
3888                    $end = '</div>';
3889                }
3890
3891                ++$i;
3892
3893                // Are we supposed to use ids? If so, add them
3894                if ($this->add_ids) {
3895                    $attrs['id'][] = "$this->overall_id-$i";
3896                }
3897
3898                //Is this some line with extra styles???
3899                if (in_array($i, $this->highlight_extra_lines)) {
3900                    if ($this->use_classes) {
3901                        if (isset($this->highlight_extra_lines_styles[$i])) {
3902                            $attrs['class'][] = "lx$i";
3903                        } else {
3904                            $attrs['class'][] = "ln-xtra";
3905                        }
3906                    } else {
3907                        array_push($attrs['style'], $this->get_line_style($i));
3908                    }
3909                }
3910
3911                // Add in the line surrounded by appropriate list HTML
3912                $attr_string = '';
3913                foreach ($attrs as $key => $attr) {
3914                    $attr_string .= ' ' . $key . '="' . implode(' ', $attr) . '"';
3915                }
3916
3917                $parsed_code .= "<li$attr_string>$start{$code[$i-1]}$end</li>$ls";
3918                unset($code[$i - 1]);
3919            }
3920        } else {
3921            $n = count($code);
3922            if ($this->use_classes) {
3923                $attributes = ' class="de1"';
3924            } else {
3925                $attributes = ' style="'. $this->code_style .'"';
3926            }
3927            if ($this->header_type == GESHI_HEADER_PRE_VALID) {
3928                $parsed_code .= '<pre'. $attributes .'>';
3929            } elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
3930                if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3931                    if ($this->use_classes) {
3932                        $attrs = ' class="ln"';
3933                    } else {
3934                        $attrs = ' style="'. $this->table_linenumber_style .'"';
3935                    }
3936                    $parsed_code .= '<td'.$attrs.'><pre'.$attributes.'>';
3937                    // get linenumbers
3938                    // we don't merge it with the for below, since it should be better for
3939                    // memory consumption this way
3940                    // @todo: but... actually it would still be somewhat nice to merge the two loops
3941                    //        the mem peaks are at different positions
3942                    for ($i = 0; $i < $n; ++$i) {
3943                        $close = 0;
3944                        // fancy lines
3945                        if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
3946                            $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
3947                            // Set the attributes to style the line
3948                            if ($this->use_classes) {
3949                                $parsed_code .= '<span class="xtra li2"><span class="de2">';
3950                            } else {
3951                                // This style "covers up" the special styles set for special lines
3952                                // so that styles applied to special lines don't apply to the actual
3953                                // code on that line
3954                                $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">'
3955                                                  .'<span style="' . $this->code_style .'">';
3956                            }
3957                            $close += 2;
3958                        }
3959                        //Is this some line with extra styles???
3960                        if (in_array($i + 1, $this->highlight_extra_lines)) {
3961                            if ($this->use_classes) {
3962                                if (isset($this->highlight_extra_lines_styles[$i])) {
3963                                    $parsed_code .= "<span class=\"xtra lx$i\">";
3964                                } else {
3965                                    $parsed_code .= "<span class=\"xtra ln-xtra\">";
3966                                }
3967                            } else {
3968                                $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">";
3969                            }
3970                            ++$close;
3971                        }
3972                        $parsed_code .= $this->line_numbers_start + $i;
3973                        if ($close) {
3974                            $parsed_code .= str_repeat('</span>', $close);
3975                        } elseif ($i != $n) {
3976                            $parsed_code .= "\n";
3977                        }
3978                    }
3979                    $parsed_code .= '</pre></td><td'.$attributes.'>';
3980                }
3981                $parsed_code .= '<pre'. $attributes .'>';
3982            }
3983            // No line numbers, but still need to handle highlighting lines extra.
3984            // Have to use divs so the full width of the code is highlighted
3985            $close = 0;
3986            for ($i = 0; $i < $n; ++$i) {
3987                // Make lines have at least one space in them if they're empty
3988                // BenBE: Checking emptiness using trim instead of relying on blanks
3989                if ('' == trim($code[$i])) {
3990                    $code[$i] = '&nbsp;';
3991                }
3992                // fancy lines
3993                if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
3994                    $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
3995                    // Set the attributes to style the line
3996                    if ($this->use_classes) {
3997                        $parsed_code .= '<span class="xtra li2"><span class="de2">';
3998                    } else {
3999                        // This style "covers up" the special styles set for special lines
4000                        // so that styles applied to special lines don't apply to the actual
4001                        // code on that line
4002                        $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">'
4003                                          .'<span style="' . $this->code_style .'">';
4004                    }
4005                    $close += 2;
4006                }
4007                //Is this some line with extra styles???
4008                if (in_array($i + 1, $this->highlight_extra_lines)) {
4009                    if ($this->use_classes) {
4010                        if (isset($this->highlight_extra_lines_styles[$i])) {
4011                            $parsed_code .= "<span class=\"xtra lx$i\">";
4012                        } else {
4013                            $parsed_code .= "<span class=\"xtra ln-xtra\">";
4014                        }
4015                    } else {
4016                        $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">";
4017                    }
4018                    ++$close;
4019                }
4020
4021                $parsed_code .= $code[$i];
4022
4023                if ($close) {
4024                  $parsed_code .= str_repeat('</span>', $close);
4025                  $close = 0;
4026                }
4027                elseif ($i + 1 < $n) {
4028                    $parsed_code .= "\n";
4029                }
4030                unset($code[$i]);
4031            }
4032
4033            if ($this->header_type == GESHI_HEADER_PRE_VALID || $this->header_type == GESHI_HEADER_PRE_TABLE) {
4034                $parsed_code .= '</pre>';
4035            }
4036            if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4037                $parsed_code .= '</td>';
4038            }
4039        }
4040
4041        $parsed_code .= $this->footer();
4042    }
4043
4044    /**
4045     * Creates the header for the code block (with correct attributes)
4046     *
4047     * @return string The header for the code block
4048     * @since  1.0.0
4049     */
4050    protected function header() {
4051        // Get attributes needed
4052        /**
4053         * @todo   Document behaviour change - class is outputted regardless of whether
4054         *         we're using classes or not. Same with style
4055         */
4056        $attributes = ' class="' . $this->_genCSSName($this->language);
4057        if ($this->overall_class != '') {
4058            $attributes .= " ".$this->_genCSSName($this->overall_class);
4059        }
4060        $attributes .= '"';
4061
4062        if ($this->overall_id != '') {
4063            $attributes .= " id=\"{$this->overall_id}\"";
4064        }
4065        if ($this->overall_style != '' && !$this->use_classes) {
4066            $attributes .= ' style="' . $this->overall_style . '"';
4067        }
4068
4069        $ol_attributes = '';
4070
4071        if ($this->line_numbers_start != 1) {
4072            $ol_attributes .= ' start="' . $this->line_numbers_start . '"';
4073        }
4074
4075        // Get the header HTML
4076        $header = $this->header_content;
4077        if ($header) {
4078            if ($this->header_type == GESHI_HEADER_PRE || $this->header_type == GESHI_HEADER_PRE_VALID) {
4079                $header = str_replace("\n", '', $header);
4080            }
4081            $header = $this->replace_keywords($header);
4082
4083            if ($this->use_classes) {
4084                $attr = ' class="head"';
4085            } else {
4086                $attr = " style=\"{$this->header_content_style}\"";
4087            }
4088            if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4089                $header = "<thead><tr><td colspan=\"2\" $attr>$header</td></tr></thead>";
4090            } else {
4091                $header = "<div$attr>$header</div>";
4092            }
4093        }
4094
4095        if (GESHI_HEADER_NONE == $this->header_type) {
4096            if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4097                return "$header<ol$attributes$ol_attributes>";
4098            }
4099            return $header . ($this->force_code_block ? '<div>' : '');
4100        }
4101
4102        // Work out what to return and do it
4103        if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4104            if ($this->header_type == GESHI_HEADER_PRE) {
4105                return "<pre$attributes>$header<ol$ol_attributes>";
4106            } elseif ($this->header_type == GESHI_HEADER_DIV ||
4107                $this->header_type == GESHI_HEADER_PRE_VALID) {
4108                return "<div$attributes>$header<ol$ol_attributes>";
4109            } elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
4110                return "<table$attributes>$header<tbody><tr class=\"li1\">";
4111            }
4112        } else {
4113            if ($this->header_type == GESHI_HEADER_PRE) {
4114                return "<pre$attributes>$header"  .
4115                    ($this->force_code_block ? '<div>' : '');
4116            } else {
4117                return "<div$attributes>$header" .
4118                    ($this->force_code_block ? '<div>' : '');
4119            }
4120        }
4121    }
4122
4123    /**
4124     * Returns the footer for the code block.
4125     *
4126     * @return string The footer for the code block
4127     * @since  1.0.0
4128     */
4129    protected function footer() {
4130        $footer = $this->footer_content;
4131        if ($footer) {
4132            if ($this->header_type == GESHI_HEADER_PRE) {
4133                $footer = str_replace("\n", '', $footer);;
4134            }
4135            $footer = $this->replace_keywords($footer);
4136
4137            if ($this->use_classes) {
4138                $attr = ' class="foot"';
4139            } else {
4140                $attr = " style=\"{$this->footer_content_style}\"";
4141            }
4142            if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4143                $footer = "<tfoot><tr><td colspan=\"2\">$footer</td></tr></tfoot>";
4144            } else {
4145                $footer = "<div$attr>$footer</div>";
4146            }
4147        }
4148
4149        if (GESHI_HEADER_NONE == $this->header_type) {
4150            return ($this->line_numbers != GESHI_NO_LINE_NUMBERS) ? '</ol>' . $footer : $footer;
4151        }
4152
4153        if ($this->header_type == GESHI_HEADER_DIV || $this->header_type == GESHI_HEADER_PRE_VALID) {
4154            if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4155                return "</ol>$footer</div>";
4156            }
4157            return ($this->force_code_block ? '</div>' : '') .
4158                "$footer</div>";
4159        }
4160        elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
4161            if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4162                return "</tr></tbody>$footer</table>";
4163            }
4164            return ($this->force_code_block ? '</div>' : '') .
4165                "$footer</div>";
4166        }
4167        else {
4168            if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4169                return "</ol>$footer</pre>";
4170            }
4171            return ($this->force_code_block ? '</div>' : '') .
4172                "$footer</pre>";
4173        }
4174    }
4175
4176    /**
4177     * Replaces certain keywords in the header and footer with
4178     * certain configuration values
4179     *
4180     * @param  string $instr The header or footer content to do replacement on
4181     * @return string The header or footer with replaced keywords
4182     * @since  1.0.2
4183     */
4184    protected function replace_keywords($instr) {
4185        $keywords = $replacements = array();
4186
4187        $keywords[] = '<TIME>';
4188        $keywords[] = '{TIME}';
4189        $replacements[] = $replacements[] = number_format($time = $this->get_time(), 3);
4190
4191        $keywords[] = '<LANGUAGE>';
4192        $keywords[] = '{LANGUAGE}';
4193        $replacements[] = $replacements[] = $this->language_data['LANG_NAME'];
4194
4195        $keywords[] = '<VERSION>';
4196        $keywords[] = '{VERSION}';
4197        $replacements[] = $replacements[] = GESHI_VERSION;
4198
4199        $keywords[] = '<SPEED>';
4200        $keywords[] = '{SPEED}';
4201        if ($time <= 0) {
4202            $speed = 'N/A';
4203        } else {
4204            $speed = strlen($this->source) / $time;
4205            if ($speed >= 1024) {
4206                $speed = sprintf("%.2f KB/s", $speed / 1024.0);
4207            } else {
4208                $speed = sprintf("%.0f B/s", $speed);
4209            }
4210        }
4211        $replacements[] = $replacements[] = $speed;
4212
4213        return str_replace($keywords, $replacements, $instr);
4214    }
4215
4216    /**
4217     * Secure replacement for PHP built-in function htmlspecialchars().
4218     *
4219     * See ticket #427 (http://wush.net/trac/wikka/ticket/427) for the rationale
4220     * for this replacement function.
4221     *
4222     * The INTERFACE for this function is almost the same as that for
4223     * htmlspecialchars(), with the same default for quote style; however, there
4224     * is no 'charset' parameter. The reason for this is as follows:
4225     *
4226     * The PHP docs say:
4227     *      "The third argument charset defines character set used in conversion."
4228     *
4229     * I suspect PHP's htmlspecialchars() is working at the byte-value level and
4230     * thus _needs_ to know (or asssume) a character set because the special
4231     * characters to be replaced could exist at different code points in
4232     * different character sets. (If indeed htmlspecialchars() works at
4233     * byte-value level that goes some  way towards explaining why the
4234     * vulnerability would exist in this function, too, and not only in
4235     * htmlentities() which certainly is working at byte-value level.)
4236     *
4237     * This replacement function however works at character level and should
4238     * therefore be "immune" to character set differences - so no charset
4239     * parameter is needed or provided. If a third parameter is passed, it will
4240     * be silently ignored.
4241     *
4242     * In the OUTPUT there is a minor difference in that we use '&#39;' instead
4243     * of PHP's '&#039;' for a single quote: this provides compatibility with
4244     *      get_html_translation_table(HTML_SPECIALCHARS, ENT_QUOTES)
4245     * (see comment by mikiwoz at yahoo dot co dot uk on
4246     * http://php.net/htmlspecialchars); it also matches the entity definition
4247     * for XML 1.0
4248     * (http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Special_characters).
4249     * Like PHP we use a numeric character reference instead of '&apos;' for the
4250     * single quote. For the other special characters we use the named entity
4251     * references, as PHP is doing.
4252     *
4253     * @author      {@link http://wikkawiki.org/JavaWoman Marjolein Katsma}
4254     *
4255     * @license     http://www.gnu.org/copyleft/lgpl.html
4256     *              GNU Lesser General Public License
4257     * @copyright   Copyright 2007, {@link http://wikkawiki.org/CreditsPage
4258     *              Wikka Development Team}
4259     *
4260     * @param       string  $string string to be converted
4261     * @param       integer $quote_style
4262     *                      - ENT_COMPAT:   escapes &, <, > and double quote (default)
4263     *                      - ENT_NOQUOTES: escapes only &, < and >
4264     *                      - ENT_QUOTES:   escapes &, <, >, double and single quotes
4265     * @return      string  converted string
4266     * @since       1.0.7.18
4267     */
4268    protected function hsc($string, $quote_style = ENT_COMPAT) {
4269        // init
4270        static $aTransSpecchar = array(
4271            '&' => '&amp;',
4272            '"' => '&quot;',
4273            '<' => '&lt;',
4274            '>' => '&gt;',
4275
4276            //This fix is related to SF#1923020, but has to be applied
4277            //regardless of actually highlighting symbols.
4278
4279            //Circumvent a bug with symbol highlighting
4280            //This is required as ; would produce undesirable side-effects if it
4281            //was not to be processed as an entity.
4282            ';' => '<SEMI>', // Force ; to be processed as entity
4283            '|' => '<PIPE>' // Force | to be processed as entity
4284            );                      // ENT_COMPAT set
4285
4286        switch ($quote_style) {
4287            case ENT_NOQUOTES: // don't convert double quotes
4288                unset($aTransSpecchar['"']);
4289                break;
4290            case ENT_QUOTES: // convert single quotes as well
4291                $aTransSpecchar["'"] = '&#39;'; // (apos) htmlspecialchars() uses '&#039;'
4292                break;
4293        }
4294
4295        // return translated string
4296        return strtr($string, $aTransSpecchar);
4297    }
4298
4299    /**
4300     * Generate a CSS class name from a given string.
4301     * Prevents invalid CSS classes.
4302     *
4303     * @param string $name Proposed class name
4304     *
4305     * @return string Safe CSS class name
4306     */
4307    protected function _genCSSName($name) {
4308        return (is_numeric($name[0]) ? '_' : '') . $name;
4309    }
4310
4311    /**
4312     * Returns a stylesheet for the highlighted code. If $economy mode
4313     * is true, we only return the stylesheet declarations that matter for
4314     * this code block instead of the whole thing
4315     *
4316     * @param  boolean $economy_mode Whether to use economy mode or not
4317     * @return string A stylesheet built on the data for the current language
4318     * @since  1.0.0
4319     */
4320    public function get_stylesheet($economy_mode = true) {
4321        // If there's an error, chances are that the language file
4322        // won't have populated the language data file, so we can't
4323        // risk getting a stylesheet...
4324        if ($this->error) {
4325            return '';
4326        }
4327
4328        //Check if the style rearrangements have been processed ...
4329        //This also does some preprocessing to check which style groups are useable ...
4330        if(!isset($this->language_data['NUMBERS_CACHE'])) {
4331            $this->build_style_cache();
4332        }
4333
4334        // First, work out what the selector should be. If there's an ID,
4335        // that should be used, the same for a class. Otherwise, a selector
4336        // of '' means that these styles will be applied anywhere
4337        if ($this->overall_id) {
4338            $selector = '#' . $this->_genCSSName($this->overall_id);
4339        } else {
4340            $selector = '.' . $this->_genCSSName($this->language);
4341            if ($this->overall_class) {
4342                $selector .= '.' . $this->_genCSSName($this->overall_class);
4343            }
4344        }
4345        $selector .= ' ';
4346
4347        // Header of the stylesheet
4348        if (!$economy_mode) {
4349            $stylesheet = "/**\n".
4350                " * GeSHi Dynamically Generated Stylesheet\n".
4351                " * --------------------------------------\n".
4352                " * Dynamically generated stylesheet for {$this->language}\n".
4353                " * CSS class: {$this->overall_class}, CSS id: {$this->overall_id}\n".
4354                " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2014 Benny Baumann\n" .
4355                " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n".
4356                " * --------------------------------------\n".
4357                " */\n";
4358        } else {
4359            $stylesheet = "/**\n".
4360                " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2014 Benny Baumann\n" .
4361                " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n".
4362                " */\n";
4363        }
4364
4365        // Set the <ol> to have no effect at all if there are line numbers
4366        // (<ol>s have margins that should be destroyed so all layout is
4367        // controlled by the set_overall_style method, which works on the
4368        // <pre> or <div> container). Additionally, set default styles for lines
4369        if (!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4370            //$stylesheet .= "$selector, {$selector}ol, {$selector}ol li {margin: 0;}\n";
4371            $stylesheet .= "$selector.de1, $selector.de2 {{$this->code_style}}\n";
4372        }
4373
4374        // Add overall styles
4375        // note: neglect economy_mode, empty styles are meaningless
4376        if ($this->overall_style != '') {
4377            $stylesheet .= "$selector {{$this->overall_style}}\n";
4378        }
4379
4380        // Add styles for links
4381        // note: economy mode does not make _any_ sense here
4382        //       either the style is empty and thus no selector is needed
4383        //       or the appropriate key is given.
4384        foreach ($this->link_styles as $key => $style) {
4385            if ($style != '') {
4386                switch ($key) {
4387                    case GESHI_LINK:
4388                        $stylesheet .= "{$selector}a:link {{$style}}\n";
4389                        break;
4390                    case GESHI_HOVER:
4391                        $stylesheet .= "{$selector}a:hover {{$style}}\n";
4392                        break;
4393                    case GESHI_ACTIVE:
4394                        $stylesheet .= "{$selector}a:active {{$style}}\n";
4395                        break;
4396                    case GESHI_VISITED:
4397                        $stylesheet .= "{$selector}a:visited {{$style}}\n";
4398                        break;
4399                }
4400            }
4401        }
4402
4403        // Header and footer
4404        // note: neglect economy_mode, empty styles are meaningless
4405        if ($this->header_content_style != '') {
4406            $stylesheet .= "$selector.head {{$this->header_content_style}}\n";
4407        }
4408        if ($this->footer_content_style != '') {
4409            $stylesheet .= "$selector.foot {{$this->footer_content_style}}\n";
4410        }
4411
4412        // Styles for important stuff
4413        // note: neglect economy_mode, empty styles are meaningless
4414        if ($this->important_styles != '') {
4415            $stylesheet .= "$selector.imp {{$this->important_styles}}\n";
4416        }
4417
4418        // Simple line number styles
4419        if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->line_style1 != '') {
4420            $stylesheet .= "{$selector}li, {$selector}.li1 {{$this->line_style1}}\n";
4421        }
4422        if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->table_linenumber_style != '') {
4423            $stylesheet .= "{$selector}.ln {{$this->table_linenumber_style}}\n";
4424        }
4425        // If there is a style set for fancy line numbers, echo it out
4426        if ((!$economy_mode || $this->line_numbers == GESHI_FANCY_LINE_NUMBERS) && $this->line_style2 != '') {
4427            $stylesheet .= "{$selector}.li2 {{$this->line_style2}}\n";
4428        }
4429
4430        // note: empty styles are meaningless
4431        foreach ($this->language_data['STYLES']['KEYWORDS'] as $group => $styles) {
4432            if ($styles != '' && (!$economy_mode ||
4433                (isset($this->lexic_permissions['KEYWORDS'][$group]) &&
4434                $this->lexic_permissions['KEYWORDS'][$group]))) {
4435                $stylesheet .= "$selector.kw$group {{$styles}}\n";
4436            }
4437        }
4438        foreach ($this->language_data['STYLES']['COMMENTS'] as $group => $styles) {
4439            if ($styles != '' && (!$economy_mode ||
4440                (isset($this->lexic_permissions['COMMENTS'][$group]) &&
4441                $this->lexic_permissions['COMMENTS'][$group]) ||
4442                (!empty($this->language_data['COMMENT_REGEXP']) &&
4443                !empty($this->language_data['COMMENT_REGEXP'][$group])))) {
4444                $stylesheet .= "$selector.co$group {{$styles}}\n";
4445            }
4446        }
4447        foreach ($this->language_data['STYLES']['ESCAPE_CHAR'] as $group => $styles) {
4448            if ($styles != '' && (!$economy_mode || $this->lexic_permissions['ESCAPE_CHAR'])) {
4449                // NEW: since 1.0.8 we have to handle hardescapes
4450                if ($group === 'HARD') {
4451                    $group = '_h';
4452                }
4453                $stylesheet .= "$selector.es$group {{$styles}}\n";
4454            }
4455        }
4456        foreach ($this->language_data['STYLES']['BRACKETS'] as $group => $styles) {
4457            if ($styles != '' && (!$economy_mode || $this->lexic_permissions['BRACKETS'])) {
4458                $stylesheet .= "$selector.br$group {{$styles}}\n";
4459            }
4460        }
4461        foreach ($this->language_data['STYLES']['SYMBOLS'] as $group => $styles) {
4462            if ($styles != '' && (!$economy_mode || $this->lexic_permissions['SYMBOLS'])) {
4463                $stylesheet .= "$selector.sy$group {{$styles}}\n";
4464            }
4465        }
4466        foreach ($this->language_data['STYLES']['STRINGS'] as $group => $styles) {
4467            if ($styles != '' && (!$economy_mode || $this->lexic_permissions['STRINGS'])) {
4468                // NEW: since 1.0.8 we have to handle hardquotes
4469                if ($group === 'HARD') {
4470                    $group = '_h';
4471                }
4472                $stylesheet .= "$selector.st$group {{$styles}}\n";
4473            }
4474        }
4475        foreach ($this->language_data['STYLES']['NUMBERS'] as $group => $styles) {
4476            if ($styles != '' && (!$economy_mode || $this->lexic_permissions['NUMBERS'])) {
4477                $stylesheet .= "$selector.nu$group {{$styles}}\n";
4478            }
4479        }
4480        foreach ($this->language_data['STYLES']['METHODS'] as $group => $styles) {
4481            if ($styles != '' && (!$economy_mode || $this->lexic_permissions['METHODS'])) {
4482                $stylesheet .= "$selector.me$group {{$styles}}\n";
4483            }
4484        }
4485        // note: neglect economy_mode, empty styles are meaningless
4486        foreach ($this->language_data['STYLES']['SCRIPT'] as $group => $styles) {
4487            if ($styles != '') {
4488                $stylesheet .= "$selector.sc$group {{$styles}}\n";
4489            }
4490        }
4491        foreach ($this->language_data['STYLES']['REGEXPS'] as $group => $styles) {
4492            if ($styles != '' && (!$economy_mode ||
4493                (isset($this->lexic_permissions['REGEXPS'][$group]) &&
4494                $this->lexic_permissions['REGEXPS'][$group]))) {
4495                if (is_array($this->language_data['REGEXPS'][$group]) &&
4496                    array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$group])) {
4497                    $stylesheet .= "$selector.";
4498                    $stylesheet .= $this->language_data['REGEXPS'][$group][GESHI_CLASS];
4499                    $stylesheet .= " {{$styles}}\n";
4500                } else {
4501                    $stylesheet .= "$selector.re$group {{$styles}}\n";
4502                }
4503            }
4504        }
4505        // Styles for lines being highlighted extra
4506        if (!$economy_mode || (count($this->highlight_extra_lines)!=count($this->highlight_extra_lines_styles))) {
4507            $stylesheet .= "{$selector}.ln-xtra, {$selector}li.ln-xtra, {$selector}div.ln-xtra {{$this->highlight_extra_lines_style}}\n";
4508        }
4509        $stylesheet .= "{$selector}span.xtra { display:block; }\n";
4510        foreach ($this->highlight_extra_lines_styles as $lineid => $linestyle) {
4511            $stylesheet .= "{$selector}.lx$lineid, {$selector}li.lx$lineid, {$selector}div.lx$lineid {{$linestyle}}\n";
4512        }
4513
4514        return $stylesheet;
4515    }
4516
4517    /**
4518     * Get's the style that is used for the specified line
4519     *
4520     * @param int $line The line number information is requested for
4521     * @since 1.0.7.21
4522     */
4523    protected function get_line_style($line) {
4524        $style = null;
4525        if (isset($this->highlight_extra_lines_styles[$line])) {
4526            $style = $this->highlight_extra_lines_styles[$line];
4527        } else { // if no "extra" style assigned
4528            $style = $this->highlight_extra_lines_style;
4529        }
4530
4531        return $style;
4532    }
4533
4534    /**
4535    * this functions creates an optimized regular expression list
4536    * of an array of strings.
4537    *
4538    * Example:
4539    * <code>$list = array('faa', 'foo', 'foobar');
4540    *          => string 'f(aa|oo(bar)?)'</code>
4541    *
4542    * @param array  $list             array of (unquoted) strings
4543    * @param string $regexp_delimiter your regular expression delimiter, @see preg_quote()
4544    * @return string for regular expression
4545    * @author Milian Wolff <mail@milianw.de>
4546    * @since 1.0.8
4547    */
4548    protected function optimize_regexp_list($list, $regexp_delimiter = '/') {
4549        $regex_chars = array('.', '\\', '+', '-', '*', '?', '[', '^', ']', '$',
4550            '(', ')', '{', '}', '=', '!', '<', '>', '|', ':', $regexp_delimiter);
4551        sort($list);
4552        $regexp_list = array('');
4553        $num_subpatterns = 0;
4554        $list_key = 0;
4555
4556        // the tokens which we will use to generate the regexp list
4557        $tokens = array();
4558        $prev_keys = array();
4559        // go through all entries of the list and generate the token list
4560        $cur_len = 0;
4561        for ($i = 0, $i_max = count($list); $i < $i_max; ++$i) {
4562            if ($cur_len > GESHI_MAX_PCRE_LENGTH) {
4563                // seems like the length of this pcre is growing exorbitantly
4564                $regexp_list[++$list_key] = $this->_optimize_regexp_list_tokens_to_string($tokens);
4565                $num_subpatterns = substr_count($regexp_list[$list_key], '(?:');
4566                $tokens = array();
4567                $cur_len = 0;
4568            }
4569            $level = 0;
4570            $entry = preg_quote((string) $list[$i], $regexp_delimiter);
4571            $pointer = &$tokens;
4572            // properly assign the new entry to the correct position in the token array
4573            // possibly generate smaller common denominator keys
4574            while (true) {
4575                // get the common denominator
4576                if (isset($prev_keys[$level])) {
4577                    if ($prev_keys[$level] == $entry) {
4578                        // this is a duplicate entry, skip it
4579                        continue 2;
4580                    }
4581                    $char = 0;
4582                    while (isset($entry[$char]) && isset($prev_keys[$level][$char])
4583                            && $entry[$char] == $prev_keys[$level][$char]) {
4584                        ++$char;
4585                    }
4586                    if ($char > 0) {
4587                        // this entry has at least some chars in common with the current key
4588                        if ($char == strlen($prev_keys[$level])) {
4589                            // current key is totally matched, i.e. this entry has just some bits appended
4590                            $pointer = &$pointer[$prev_keys[$level]];
4591                        } else {
4592                            // only part of the keys match
4593                            $new_key_part1 = substr($prev_keys[$level], 0, $char);
4594                            $new_key_part2 = substr($prev_keys[$level], $char);
4595
4596                            if (in_array($new_key_part1[0], $regex_chars)
4597                                || in_array($new_key_part2[0], $regex_chars)) {
4598                                // this is bad, a regex char as first character
4599                                $pointer[$entry] = array('' => true);
4600                                array_splice($prev_keys, $level, count($prev_keys), $entry);
4601                                $cur_len += strlen($entry);
4602                                continue;
4603                            } else {
4604                                // relocate previous tokens
4605                                $pointer[$new_key_part1] = array($new_key_part2 => $pointer[$prev_keys[$level]]);
4606                                unset($pointer[$prev_keys[$level]]);
4607                                $pointer = &$pointer[$new_key_part1];
4608                                // recreate key index
4609                                array_splice($prev_keys, $level, count($prev_keys), array($new_key_part1, $new_key_part2));
4610                                $cur_len += strlen($new_key_part2);
4611                            }
4612                        }
4613                        ++$level;
4614                        $entry = substr($entry, $char);
4615                        continue;
4616                    }
4617                    // else: fall trough, i.e. no common denominator was found
4618                }
4619                if ($level == 0 && !empty($tokens)) {
4620                    // we can dump current tokens into the string and throw them away afterwards
4621                    $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens);
4622                    $new_subpatterns = substr_count($new_entry, '(?:');
4623                    if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + $new_subpatterns > GESHI_MAX_PCRE_SUBPATTERNS) {
4624                        $regexp_list[++$list_key] = $new_entry;
4625                        $num_subpatterns = $new_subpatterns;
4626                    } else {
4627                        if (!empty($regexp_list[$list_key])) {
4628                            $new_entry = '|' . $new_entry;
4629                        }
4630                        $regexp_list[$list_key] .= $new_entry;
4631                        $num_subpatterns += $new_subpatterns;
4632                    }
4633                    $tokens = array();
4634                    $cur_len = 0;
4635                }
4636                // no further common denominator found
4637                $pointer[$entry] = array('' => true);
4638                array_splice($prev_keys, $level, count($prev_keys), $entry);
4639
4640                $cur_len += strlen($entry);
4641                break;
4642            }
4643            unset($list[$i]);
4644        }
4645        // make sure the last tokens get converted as well
4646        $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens);
4647        if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + substr_count($new_entry, '(?:') > GESHI_MAX_PCRE_SUBPATTERNS) {
4648            if ( !empty($regexp_list[$list_key]) ) {
4649              ++$list_key;
4650            }
4651            $regexp_list[$list_key] = $new_entry;
4652        } else {
4653            if (!empty($regexp_list[$list_key])) {
4654                $new_entry = '|' . $new_entry;
4655            }
4656            $regexp_list[$list_key] .= $new_entry;
4657        }
4658        return $regexp_list;
4659    }
4660
4661    /**
4662    * this function creates the appropriate regexp string of an token array
4663    * you should not call this function directly, @see $this->optimize_regexp_list().
4664    *
4665    * @param array $tokens   array of tokens
4666    * @param bool  $recursed to know wether we recursed or not
4667    * @return string
4668    * @author Milian Wolff <mail@milianw.de>
4669    * @since 1.0.8
4670    */
4671    protected function _optimize_regexp_list_tokens_to_string(&$tokens, $recursed = false) {
4672        $list = '';
4673        foreach ($tokens as $token => $sub_tokens) {
4674            $list .= $token;
4675            $close_entry = isset($sub_tokens['']);
4676            unset($sub_tokens['']);
4677            if (!empty($sub_tokens)) {
4678                $list .= '(?:' . $this->_optimize_regexp_list_tokens_to_string($sub_tokens, true) . ')';
4679                if ($close_entry) {
4680                    // make sub_tokens optional
4681                    $list .= '?';
4682                }
4683            }
4684            $list .= '|';
4685        }
4686        if (!$recursed) {
4687            // do some optimizations
4688            // common trailing strings
4689            // BUGGY!
4690            //$list = preg_replace_callback('#(?<=^|\:|\|)\w+?(\w+)(?:\|.+\1)+(?=\|)#', create_function(
4691            //    '$matches', 'return "(?:" . preg_replace("#" . preg_quote($matches[1], "#") . "(?=\||$)#", "", $matches[0]) . ")" . $matches[1];'), $list);
4692            // (?:p)? => p?
4693            $list = preg_replace('#\(\?\:(.)\)\?#', '\1?', $list);
4694            // (?:a|b|c|d|...)? => [abcd...]?
4695            // TODO: a|bb|c => [ac]|bb
4696            static $callback_2;
4697            if (!isset($callback_2)) {
4698                $callback_2 = function($matches) { return "[" . str_replace("|", "", $matches[1]) . "]"; };
4699            }
4700            $list = preg_replace_callback('#\(\?\:((?:.\|)+.)\)#', $callback_2, $list);
4701        }
4702        // return $list without trailing pipe
4703        return substr($list, 0, -1);
4704    }
4705} // End Class GeSHi
4706
4707
4708if (!function_exists('geshi_highlight')) {
4709    /**
4710     * Easy way to highlight stuff. Behaves just like highlight_string
4711     *
4712     * @param string $string   The code to highlight
4713     * @param string $language The language to highlight the code in
4714     * @param string $path     The path to the language files. You can leave this blank if you need
4715     *                         as from version 1.0.7 the path should be automatically detected
4716     * @param boolean $return  Whether to return the result or to echo
4717     * @return string The code highlighted (if $return is true)
4718     * @since 1.0.2
4719     */
4720    function geshi_highlight($string, $language, $path = null, $return = false) {
4721        $geshi = new GeSHi($string, $language, $path);
4722        $geshi->set_header_type(GESHI_HEADER_NONE);
4723
4724        if ($return) {
4725            return '<code>' . $geshi->parse_code() . '</code>';
4726        }
4727
4728        echo '<code>' . $geshi->parse_code() . '</code>';
4729
4730        if ($geshi->error()) {
4731            return false;
4732        }
4733        return true;
4734    }
4735}
4736