1<?php 2/** 3 * GeSHi - Generic Syntax Highlighter 4 * 5 * The GeSHi class for Generic Syntax Highlighting. Please refer to the 6 * documentation at http://qbnz.com/highlighter/documentation.php for more 7 * information about how to use this class. 8 * 9 * For changes, release notes, TODOs etc, see the relevant files in the docs/ 10 * directory. 11 * 12 * This file is part of GeSHi. 13 * 14 * GeSHi is free software; you can redistribute it and/or modify 15 * it under the terms of the GNU General Public License as published by 16 * the Free Software Foundation; either version 2 of the License, or 17 * (at your option) any later version. 18 * 19 * GeSHi is distributed in the hope that it will be useful, 20 * but WITHOUT ANY WARRANTY; without even the implied warranty of 21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 22 * GNU General Public License for more details. 23 * 24 * You should have received a copy of the GNU General Public License 25 * along with GeSHi; if not, write to the Free Software 26 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 27 * 28 * @package geshi 29 * @subpackage core 30 * @author Nigel McNie <nigel@geshi.org>, Benny Baumann <BenBE@omorphia.de> 31 * @copyright (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2014 Benny Baumann 32 * @license http://gnu.org/copyleft/gpl.html GNU GPL 33 */ 34 35// 36// GeSHi Constants 37// You should use these constant names in your programs instead of 38// their values - you never know when a value may change in a future 39// version 40// 41 42/** The version of this GeSHi file */ 43define('GESHI_VERSION', '1.0.9.0'); 44 45// Define the root directory for the GeSHi code tree 46if (!defined('GESHI_ROOT')) { 47 /** The root directory for GeSHi */ 48 define('GESHI_ROOT', dirname(__FILE__) . DIRECTORY_SEPARATOR); 49} 50/** The language file directory for GeSHi 51 @access private */ 52define('GESHI_LANG_ROOT', GESHI_ROOT . 'geshi' . DIRECTORY_SEPARATOR); 53 54// Define if GeSHi should be paranoid about security 55if (!defined('GESHI_SECURITY_PARANOID')) { 56 /** Tells GeSHi to be paranoid about security settings */ 57 define('GESHI_SECURITY_PARANOID', false); 58} 59 60// Line numbers - use with enable_line_numbers() 61/** Use no line numbers when building the result */ 62define('GESHI_NO_LINE_NUMBERS', 0); 63/** Use normal line numbers when building the result */ 64define('GESHI_NORMAL_LINE_NUMBERS', 1); 65/** Use fancy line numbers when building the result */ 66define('GESHI_FANCY_LINE_NUMBERS', 2); 67 68// Container HTML type 69/** Use nothing to surround the source */ 70define('GESHI_HEADER_NONE', 0); 71/** Use a "div" to surround the source */ 72define('GESHI_HEADER_DIV', 1); 73/** Use a "pre" to surround the source */ 74define('GESHI_HEADER_PRE', 2); 75/** Use a pre to wrap lines when line numbers are enabled or to wrap the whole code. */ 76define('GESHI_HEADER_PRE_VALID', 3); 77/** 78 * Use a "table" to surround the source: 79 * 80 * <table> 81 * <thead><tr><td colspan="2">$header</td></tr></thead> 82 * <tbody><tr><td><pre>$linenumbers</pre></td><td><pre>$code></pre></td></tr></tbody> 83 * <tfooter><tr><td colspan="2">$footer</td></tr></tfoot> 84 * </table> 85 * 86 * this is essentially only a workaround for Firefox, see sf#1651996 or take a look at 87 * https://bugzilla.mozilla.org/show_bug.cgi?id=365805 88 * @note when linenumbers are disabled this is essentially the same as GESHI_HEADER_PRE 89 */ 90define('GESHI_HEADER_PRE_TABLE', 4); 91 92// Capatalisation constants 93/** Lowercase keywords found */ 94define('GESHI_CAPS_NO_CHANGE', 0); 95/** Uppercase keywords found */ 96define('GESHI_CAPS_UPPER', 1); 97/** Leave keywords found as the case that they are */ 98define('GESHI_CAPS_LOWER', 2); 99 100// Link style constants 101/** Links in the source in the :link state */ 102define('GESHI_LINK', 0); 103/** Links in the source in the :hover state */ 104define('GESHI_HOVER', 1); 105/** Links in the source in the :active state */ 106define('GESHI_ACTIVE', 2); 107/** Links in the source in the :visited state */ 108define('GESHI_VISITED', 3); 109 110// Important string starter/finisher 111// Note that if you change these, they should be as-is: i.e., don't 112// write them as if they had been run through htmlentities() 113/** The starter for important parts of the source */ 114define('GESHI_START_IMPORTANT', '<BEGIN GeSHi>'); 115/** The ender for important parts of the source */ 116define('GESHI_END_IMPORTANT', '<END GeSHi>'); 117 118/**#@+ 119 * @access private 120 */ 121// When strict mode applies for a language 122/** Strict mode never applies (this is the most common) */ 123define('GESHI_NEVER', 0); 124/** Strict mode *might* apply, and can be enabled or 125 disabled by {@link GeSHi->enable_strict_mode()} */ 126define('GESHI_MAYBE', 1); 127/** Strict mode always applies */ 128define('GESHI_ALWAYS', 2); 129 130// Advanced regexp handling constants, used in language files 131/** The key of the regex array defining what to search for */ 132define('GESHI_SEARCH', 0); 133/** The key of the regex array defining what bracket group in a 134 matched search to use as a replacement */ 135define('GESHI_REPLACE', 1); 136/** The key of the regex array defining any modifiers to the regular expression */ 137define('GESHI_MODIFIERS', 2); 138/** The key of the regex array defining what bracket group in a 139 matched search to put before the replacement */ 140define('GESHI_BEFORE', 3); 141/** The key of the regex array defining what bracket group in a 142 matched search to put after the replacement */ 143define('GESHI_AFTER', 4); 144/** The key of the regex array defining a custom keyword to use 145 for this regexp's html tag class */ 146define('GESHI_CLASS', 5); 147 148/** Used in language files to mark comments */ 149define('GESHI_COMMENTS', 0); 150 151/** some old PHP / PCRE subpatterns only support up to xxx subpatterns in 152 regular expressions. Set this to false if your PCRE lib is up to date 153 @see GeSHi->optimize_regexp_list() 154 **/ 155define('GESHI_MAX_PCRE_SUBPATTERNS', 500); 156/** it's also important not to generate too long regular expressions 157 be generous here... but keep in mind, that when reaching this limit we 158 still have to close open patterns. 12k should do just fine on a 16k limit. 159 @see GeSHi->optimize_regexp_list() 160 **/ 161define('GESHI_MAX_PCRE_LENGTH', 12288); 162 163//Number format specification 164/** Basic number format for integers */ 165define('GESHI_NUMBER_INT_BASIC', 1); //Default integers \d+ 166/** Enhanced number format for integers like seen in C */ 167define('GESHI_NUMBER_INT_CSTYLE', 2); //Default C-Style \d+[lL]? 168/** Number format to highlight binary numbers with a suffix "b" */ 169define('GESHI_NUMBER_BIN_SUFFIX', 16); //[01]+[bB] 170/** Number format to highlight binary numbers with a prefix % */ 171define('GESHI_NUMBER_BIN_PREFIX_PERCENT', 32); //%[01]+ 172/** Number format to highlight binary numbers with a prefix 0b (C) */ 173define('GESHI_NUMBER_BIN_PREFIX_0B', 64); //0b[01]+ 174/** Number format to highlight octal numbers with a leading zero */ 175define('GESHI_NUMBER_OCT_PREFIX', 256); //0[0-7]+ 176/** Number format to highlight octal numbers with a prefix 0o (logtalk) */ 177define('GESHI_NUMBER_OCT_PREFIX_0O', 512); //0[0-7]+ 178/** Number format to highlight octal numbers with a leading @ (Used in HiSofts Devpac series). */ 179define('GESHI_NUMBER_OCT_PREFIX_AT', 1024); //@[0-7]+ 180/** Number format to highlight octal numbers with a suffix of o */ 181define('GESHI_NUMBER_OCT_SUFFIX', 2048); //[0-7]+[oO] 182/** Number format to highlight hex numbers with a prefix 0x */ 183define('GESHI_NUMBER_HEX_PREFIX', 4096); //0x[0-9a-fA-F]+ 184/** Number format to highlight hex numbers with a prefix $ */ 185define('GESHI_NUMBER_HEX_PREFIX_DOLLAR', 8192); //$[0-9a-fA-F]+ 186/** Number format to highlight hex numbers with a suffix of h */ 187define('GESHI_NUMBER_HEX_SUFFIX', 16384); //[0-9][0-9a-fA-F]*h 188/** Number format to highlight floating-point numbers without support for scientific notation */ 189define('GESHI_NUMBER_FLT_NONSCI', 65536); //\d+\.\d+ 190/** Number format to highlight floating-point numbers without support for scientific notation */ 191define('GESHI_NUMBER_FLT_NONSCI_F', 131072); //\d+(\.\d+)?f 192/** Number format to highlight floating-point numbers with support for scientific notation (E) and optional leading zero */ 193define('GESHI_NUMBER_FLT_SCI_SHORT', 262144); //\.\d+e\d+ 194/** Number format to highlight floating-point numbers with support for scientific notation (E) and required leading digit */ 195define('GESHI_NUMBER_FLT_SCI_ZERO', 524288); //\d+(\.\d+)?e\d+ 196//Custom formats are passed by RX array 197 198// Error detection - use these to analyse faults 199/** No sourcecode to highlight was specified 200 * @deprecated 201 */ 202define('GESHI_ERROR_NO_INPUT', 1); 203/** The language specified does not exist */ 204define('GESHI_ERROR_NO_SUCH_LANG', 2); 205/** GeSHi could not open a file for reading (generally a language file) */ 206define('GESHI_ERROR_FILE_NOT_READABLE', 3); 207/** The header type passed to {@link GeSHi->set_header_type()} was invalid */ 208define('GESHI_ERROR_INVALID_HEADER_TYPE', 4); 209/** The line number type passed to {@link GeSHi->enable_line_numbers()} was invalid */ 210define('GESHI_ERROR_INVALID_LINE_NUMBER_TYPE', 5); 211/**#@-*/ 212 213 214/** 215 * The GeSHi Class. 216 * 217 * Please refer to the documentation for GeSHi 1.0.X that is available 218 * at http://qbnz.com/highlighter/documentation.php for more information 219 * about how to use this class. 220 * 221 * @package geshi 222 * @author Nigel McNie <nigel@geshi.org> 223 * @author Benny Baumann <BenBE@omorphia.de> 224 * @copyright (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2014 Benny Baumann 225 */ 226class GeSHi { 227 228 /** 229 * The source code to highlight 230 * @var string 231 */ 232 protected $source = ''; 233 234 /** 235 * The language to use when highlighting 236 * @var string 237 */ 238 protected $language = ''; 239 240 /** 241 * The data for the language used 242 * @var array 243 */ 244 protected $language_data = array(); 245 246 /** 247 * The path to the language files 248 * @var string 249 */ 250 protected $language_path = GESHI_LANG_ROOT; 251 252 /** 253 * The error message associated with an error 254 * @var string 255 * @todo check err reporting works 256 */ 257 protected $error = false; 258 259 /** 260 * Possible error messages 261 * @var array 262 */ 263 protected $error_messages = array( 264 GESHI_ERROR_NO_SUCH_LANG => 'GeSHi could not find the language {LANGUAGE} (using path {PATH})', 265 GESHI_ERROR_FILE_NOT_READABLE => 'The file specified for load_from_file was not readable', 266 GESHI_ERROR_INVALID_HEADER_TYPE => 'The header type specified is invalid', 267 GESHI_ERROR_INVALID_LINE_NUMBER_TYPE => 'The line number type specified is invalid' 268 ); 269 270 /** 271 * Whether highlighting is strict or not 272 * @var boolean 273 */ 274 protected $strict_mode = false; 275 276 /** 277 * Whether to use CSS classes in output 278 * @var boolean 279 */ 280 protected $use_classes = false; 281 282 /** 283 * The type of header to use. Can be one of the following 284 * values: 285 * 286 * - GESHI_HEADER_PRE: Source is outputted in a "pre" HTML element. 287 * - GESHI_HEADER_DIV: Source is outputted in a "div" HTML element. 288 * - GESHI_HEADER_NONE: No header is outputted. 289 * 290 * @var int 291 */ 292 protected $header_type = GESHI_HEADER_PRE; 293 294 /** 295 * Array of permissions for which lexics should be highlighted 296 * @var array 297 */ 298 protected $lexic_permissions = array( 299 'KEYWORDS' => array(), 300 'COMMENTS' => array('MULTI' => true), 301 'REGEXPS' => array(), 302 'ESCAPE_CHAR' => true, 303 'BRACKETS' => true, 304 'SYMBOLS' => false, 305 'STRINGS' => true, 306 'NUMBERS' => true, 307 'METHODS' => true, 308 'SCRIPT' => true 309 ); 310 311 /** 312 * The time it took to parse the code 313 * @var double 314 */ 315 protected $time = 0; 316 317 /** 318 * The content of the header block 319 * @var string 320 */ 321 protected $header_content = ''; 322 323 /** 324 * The content of the footer block 325 * @var string 326 */ 327 protected $footer_content = ''; 328 329 /** 330 * The style of the header block 331 * @var string 332 */ 333 protected $header_content_style = ''; 334 335 /** 336 * The style of the footer block 337 * @var string 338 */ 339 protected $footer_content_style = ''; 340 341 /** 342 * Tells if a block around the highlighted source should be forced 343 * if not using line numbering 344 * @var boolean 345 */ 346 protected $force_code_block = false; 347 348 /** 349 * The styles for hyperlinks in the code 350 * @var array 351 */ 352 protected $link_styles = array(); 353 354 /** 355 * Whether important blocks should be recognised or not 356 * @var boolean 357 * @deprecated 358 * @todo REMOVE THIS FUNCTIONALITY! 359 */ 360 protected $enable_important_blocks = false; 361 362 /** 363 * Styles for important parts of the code 364 * @var string 365 * @deprecated 366 * @todo As above - rethink the whole idea of important blocks as it is buggy and 367 * will be hard to implement in 1.2 368 */ 369 protected $important_styles = 'font-weight: bold; color: red;'; // Styles for important parts of the code 370 371 /** 372 * Whether CSS IDs should be added to the code 373 * @var boolean 374 */ 375 protected $add_ids = false; 376 377 /** 378 * Lines that should be highlighted extra 379 * @var array 380 */ 381 protected $highlight_extra_lines = array(); 382 383 /** 384 * Styles of lines that should be highlighted extra 385 * @var array 386 */ 387 protected $highlight_extra_lines_styles = array(); 388 389 /** 390 * Styles of extra-highlighted lines 391 * @var string 392 */ 393 protected $highlight_extra_lines_style = 'background-color: #ffc;'; 394 395 /** 396 * The line ending 397 * If null, nl2br() will be used on the result string. 398 * Otherwise, all instances of \n will be replaced with $line_ending 399 * @var string 400 */ 401 protected $line_ending = null; 402 403 /** 404 * Number at which line numbers should start at 405 * @var int 406 */ 407 protected $line_numbers_start = 1; 408 409 /** 410 * The overall style for this code block 411 * @var string 412 */ 413 protected $overall_style = 'font-family:monospace;'; 414 415 /** 416 * The style for the actual code 417 * @var string 418 */ 419 protected $code_style = 'font: normal normal 1em/1.2em monospace; margin:0; padding:0; background:none; vertical-align:top;'; 420 421 /** 422 * The overall class for this code block 423 * @var string 424 */ 425 protected $overall_class = ''; 426 427 /** 428 * The overall ID for this code block 429 * @var string 430 */ 431 protected $overall_id = ''; 432 433 /** 434 * Line number styles 435 * @var string 436 */ 437 protected $line_style1 = 'font-weight: normal; vertical-align:top;'; 438 439 /** 440 * Line number styles for fancy lines 441 * @var string 442 */ 443 protected $line_style2 = 'font-weight: bold; vertical-align:top;'; 444 445 /** 446 * Style for line numbers when GESHI_HEADER_PRE_TABLE is chosen 447 * @var string 448 */ 449 protected $table_linenumber_style = 'width:1px;text-align:right;margin:0;padding:0 2px;vertical-align:top;'; 450 451 /** 452 * Flag for how line numbers are displayed 453 * @var boolean 454 */ 455 protected $line_numbers = GESHI_NO_LINE_NUMBERS; 456 457 /** 458 * Flag to decide if multi line spans are allowed. Set it to false to make sure 459 * each tag is closed before and reopened after each linefeed. 460 * @var boolean 461 */ 462 protected $allow_multiline_span = true; 463 464 /** 465 * The "nth" value for fancy line highlighting 466 * @var int 467 */ 468 protected $line_nth_row = 0; 469 470 /** 471 * The size of tab stops 472 * @var int 473 */ 474 protected $tab_width = 8; 475 476 /** 477 * Should we use language-defined tab stop widths? 478 * @var int 479 */ 480 protected $use_language_tab_width = false; 481 482 /** 483 * Default target for keyword links 484 * @var string 485 */ 486 protected $link_target = ''; 487 488 /** 489 * The encoding to use for entity encoding 490 * NOTE: Used with Escape Char Sequences to fix UTF-8 handling (cf. SF#2037598) 491 * @var string 492 */ 493 protected $encoding = 'utf-8'; 494 495 /** 496 * Should keywords be linked? 497 * @var boolean 498 */ 499 protected $keyword_links = true; 500 501 /** 502 * Currently loaded language file 503 * @var string 504 * @since 1.0.7.22 505 */ 506 protected $loaded_language = ''; 507 508 /** 509 * Wether the caches needed for parsing are built or not 510 * 511 * @var bool 512 * @since 1.0.8 513 */ 514 protected $parse_cache_built = false; 515 516 /** 517 * Work around for Suhosin Patch with disabled /e modifier 518 * 519 * Note from suhosins author in config file: 520 * <blockquote> 521 * The /e modifier inside <code>preg_replace()</code> allows code execution. 522 * Often it is the cause for remote code execution exploits. It is wise to 523 * deactivate this feature and test where in the application it is used. 524 * The developer using the /e modifier should be made aware that he should 525 * use <code>preg_replace_callback()</code> instead 526 * </blockquote> 527 * 528 * @var array 529 * @since 1.0.8 530 */ 531 protected $_kw_replace_group = 0; 532 protected $_rx_key = 0; 533 534 /** 535 * some "callback parameters" for handle_multiline_regexps 536 * 537 * @since 1.0.8 538 * @access private 539 * @var string 540 */ 541 protected $_hmr_before = ''; 542 protected $_hmr_replace = ''; 543 protected $_hmr_after = ''; 544 protected $_hmr_key = 0; 545 546 /** 547 * Creates a new GeSHi object, with source and language 548 * 549 * @param string $source The source code to highlight 550 * @param string $language The language to highlight the source with 551 * @param string $path The path to the language file directory. <b>This 552 * is deprecated!</b> I've backported the auto path 553 * detection from the 1.1.X dev branch, so now it 554 * should be automatically set correctly. If you have 555 * renamed the language directory however, you will 556 * still need to set the path using this parameter or 557 * {@link GeSHi->set_language_path()} 558 * @since 1.0.0 559 */ 560 public function __construct($source = '', $language = '', $path = '') { 561 if ( is_string($source) && ($source !== '') ) { 562 $this->set_source($source); 563 } 564 if ( is_string($language) && ($language !== '') ) { 565 $this->set_language($language); 566 } 567 $this->set_language_path($path); 568 } 569 570 /** 571 * Returns the version of GeSHi 572 * 573 * @return string 574 * @since 1.0.8.11 575 */ 576 public function get_version() 577 { 578 return GESHI_VERSION; 579 } 580 581 /** 582 * Returns an error message associated with the last GeSHi operation, 583 * or false if no error has occurred 584 * 585 * @return string|false An error message if there has been an error, else false 586 * @since 1.0.0 587 */ 588 public function error() { 589 if ($this->error) { 590 //Put some template variables for debugging here ... 591 $debug_tpl_vars = array( 592 '{LANGUAGE}' => $this->language, 593 '{PATH}' => $this->language_path 594 ); 595 $msg = str_replace( 596 array_keys($debug_tpl_vars), 597 array_values($debug_tpl_vars), 598 $this->error_messages[$this->error]); 599 600 return "<br /><strong>GeSHi Error:</strong> $msg (code {$this->error})<br />"; 601 } 602 return false; 603 } 604 605 /** 606 * Gets a human-readable language name (thanks to Simon Patterson 607 * for the idea :)) 608 * 609 * @return string The name for the current language 610 * @since 1.0.2 611 */ 612 public function get_language_name() { 613 if (GESHI_ERROR_NO_SUCH_LANG == $this->error) { 614 return $this->language_data['LANG_NAME'] . ' (Unknown Language)'; 615 } 616 return $this->language_data['LANG_NAME']; 617 } 618 619 /** 620 * Sets the source code for this object 621 * 622 * @param string $source The source code to highlight 623 * @since 1.0.0 624 */ 625 public function set_source($source) { 626 $this->source = $source; 627 $this->highlight_extra_lines = array(); 628 } 629 630 /** 631 * Sets the language for this object 632 * 633 * @note since 1.0.8 this function won't reset language-settings by default anymore! 634 * if you need this set $force_reset = true 635 * 636 * @param string $language The name of the language to use 637 * @param bool $force_reset 638 * @since 1.0.0 639 */ 640 public function set_language($language, $force_reset = false) { 641 $this->error = false; 642 $this->strict_mode = GESHI_NEVER; 643 644 if ($force_reset) { 645 $this->loaded_language = false; 646 } 647 648 //Clean up the language name to prevent malicious code injection 649 $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language); 650 651 $language = strtolower($language); 652 653 //Retreive the full filename 654 $file_name = $this->language_path . $language . '.php'; 655 if ($file_name == $this->loaded_language) { 656 // this language is already loaded! 657 return; 658 } 659 660 $this->language = $language; 661 662 //Check if we can read the desired file 663 if (!is_readable($file_name)) { 664 $this->error = GESHI_ERROR_NO_SUCH_LANG; 665 return; 666 } 667 668 // Load the language for parsing 669 $this->load_language($file_name); 670 } 671 672 /** 673 * Sets the path to the directory containing the language files. Note 674 * that this path is relative to the directory of the script that included 675 * geshi.php, NOT geshi.php itself. 676 * 677 * @param string $path The path to the language directory 678 * @since 1.0.0 679 * @deprecated The path to the language files should now be automatically 680 * detected, so this method should no longer be needed. The 681 * 1.1.X branch handles manual setting of the path differently 682 * so this method will disappear in 1.2.0. 683 */ 684 public function set_language_path($path) { 685 if(strpos($path,':')) { 686 //Security Fix to prevent external directories using fopen wrappers. 687 if(DIRECTORY_SEPARATOR == "\\") { 688 if(!preg_match('#^[a-zA-Z]:#', $path) || false !== strpos($path, ':', 2)) { 689 return; 690 } 691 } else { 692 return; 693 } 694 } 695 if(preg_match('#[^/a-zA-Z0-9_\.\-\\\s:]#', $path)) { 696 //Security Fix to prevent external directories using fopen wrappers. 697 return; 698 } 699 if(GESHI_SECURITY_PARANOID && false !== strpos($path, '/.')) { 700 //Security Fix to prevent external directories using fopen wrappers. 701 return; 702 } 703 if(GESHI_SECURITY_PARANOID && false !== strpos($path, '..')) { 704 //Security Fix to prevent external directories using fopen wrappers. 705 return; 706 } 707 if ($path) { 708 $this->language_path = ('/' == $path[strlen($path) - 1]) ? $path : $path . '/'; 709 $this->set_language($this->language); // otherwise set_language_path has no effect 710 } 711 } 712 713 /** 714 * Get supported langs or an associative array lang=>full_name. 715 * @param boolean $full_names 716 * @return array 717 */ 718 public function get_supported_languages($full_names=false) 719 { 720 // return array 721 $back = array(); 722 723 // we walk the lang root 724 $dir = dir($this->language_path); 725 726 // foreach entry 727 while (false !== ($entry = $dir->read())) 728 { 729 $full_path = $this->language_path.$entry; 730 731 // Skip all dirs 732 if (is_dir($full_path)) { 733 continue; 734 } 735 736 // we only want lang.php files 737 if (!preg_match('/^([^.]+)\.php$/', $entry, $matches)) { 738 continue; 739 } 740 741 // Raw lang name is here 742 $langname = $matches[1]; 743 744 // We want the fullname too? 745 if ($full_names === true) 746 { 747 if (false !== ($fullname = $this->get_language_fullname($langname))) 748 { 749 $back[$langname] = $fullname; // we go associative 750 } 751 } 752 else 753 { 754 // just store raw langname 755 $back[] = $langname; 756 } 757 } 758 759 $dir->close(); 760 761 return $back; 762 } 763 764 /** 765 * Get full_name for a lang or false. 766 * @param string $language short langname (html4strict for example) 767 * @return mixed 768 */ 769 public function get_language_fullname($language) 770 { 771 //Clean up the language name to prevent malicious code injection 772 $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language); 773 774 $language = strtolower($language); 775 776 // get fullpath-filename for a langname 777 $fullpath = $this->language_path.$language.'.php'; 778 779 // we need to get contents :S 780 if (false === ($data = file_get_contents($fullpath))) { 781 $this->error = sprintf('Geshi::get_lang_fullname() Unknown Language: %s', $language); 782 return false; 783 } 784 785 // match the langname 786 if (!preg_match('/\'LANG_NAME\'\s*=>\s*\'((?:[^\']|\\\')+?)\'/', $data, $matches)) { 787 $this->error = sprintf('Geshi::get_lang_fullname(%s): Regex can not detect language', $language); 788 return false; 789 } 790 791 // return fullname for langname 792 return stripcslashes($matches[1]); 793 } 794 795 /** 796 * Sets the type of header to be used. 797 * 798 * If GESHI_HEADER_DIV is used, the code is surrounded in a "div".This 799 * means more source code but more control over tab width and line-wrapping. 800 * GESHI_HEADER_PRE means that a "pre" is used - less source, but less 801 * control. Default is GESHI_HEADER_PRE. 802 * 803 * From 1.0.7.2, you can use GESHI_HEADER_NONE to specify that no header code 804 * should be outputted. 805 * 806 * @param int $type The type of header to be used 807 * @since 1.0.0 808 */ 809 public function set_header_type($type) { 810 //Check if we got a valid header type 811 if (!in_array($type, array(GESHI_HEADER_NONE, GESHI_HEADER_DIV, 812 GESHI_HEADER_PRE, GESHI_HEADER_PRE_VALID, GESHI_HEADER_PRE_TABLE))) { 813 $this->error = GESHI_ERROR_INVALID_HEADER_TYPE; 814 return; 815 } 816 817 //Set that new header type 818 $this->header_type = $type; 819 } 820 821 /** 822 * Sets the styles for the code that will be outputted 823 * when this object is parsed. The style should be a 824 * string of valid stylesheet declarations 825 * 826 * @param string $style The overall style for the outputted code block 827 * @param boolean $preserve_defaults Whether to merge the styles with the current styles or not 828 * @since 1.0.0 829 */ 830 public function set_overall_style($style, $preserve_defaults = false) { 831 if (!$preserve_defaults) { 832 $this->overall_style = $style; 833 } else { 834 $this->overall_style .= $style; 835 } 836 } 837 838 /** 839 * Sets the overall classname for this block of code. This 840 * class can then be used in a stylesheet to style this object's 841 * output 842 * 843 * @param string $class The class name to use for this block of code 844 * @since 1.0.0 845 */ 846 public function set_overall_class($class) { 847 $this->overall_class = $class; 848 } 849 850 /** 851 * Sets the overall id for this block of code. This id can then 852 * be used in a stylesheet to style this object's output 853 * 854 * @param string $id The ID to use for this block of code 855 * @since 1.0.0 856 */ 857 public function set_overall_id($id) { 858 $this->overall_id = $id; 859 } 860 861 /** 862 * Sets whether CSS classes should be used to highlight the source. Default 863 * is off, calling this method with no arguments will turn it on 864 * 865 * @param boolean $flag Whether to turn classes on or not 866 * @since 1.0.0 867 */ 868 public function enable_classes($flag = true) { 869 $this->use_classes = ($flag) ? true : false; 870 } 871 872 /** 873 * Sets the style for the actual code. This should be a string 874 * containing valid stylesheet declarations. If $preserve_defaults is 875 * true, then styles are merged with the default styles, with the 876 * user defined styles having priority 877 * 878 * Note: Use this method to override any style changes you made to 879 * the line numbers if you are using line numbers, else the line of 880 * code will have the same style as the line number! Consult the 881 * GeSHi documentation for more information about this. 882 * 883 * @param string $style The style to use for actual code 884 * @param boolean $preserve_defaults Whether to merge the current styles with the new styles 885 * @since 1.0.2 886 */ 887 public function set_code_style($style, $preserve_defaults = false) { 888 if (!$preserve_defaults) { 889 $this->code_style = $style; 890 } else { 891 $this->code_style .= $style; 892 } 893 } 894 895 /** 896 * Sets the styles for the line numbers. 897 * 898 * @param string $style1 The style for the line numbers that are "normal" 899 * @param string|boolean $style2 If a string, this is the style of the line 900 * numbers that are "fancy", otherwise if boolean then this 901 * defines whether the normal styles should be merged with the 902 * new normal styles or not 903 * @param boolean $preserve_defaults If set, is the flag for whether to merge the "fancy" 904 * styles with the current styles or not 905 * @since 1.0.2 906 */ 907 public function set_line_style($style1, $style2 = '', $preserve_defaults = false) { 908 //Check if we got 2 or three parameters 909 if (is_bool($style2)) { 910 $preserve_defaults = $style2; 911 $style2 = ''; 912 } 913 914 //Actually set the new styles 915 if (!$preserve_defaults) { 916 $this->line_style1 = $style1; 917 $this->line_style2 = $style2; 918 } else { 919 $this->line_style1 .= $style1; 920 $this->line_style2 .= $style2; 921 } 922 } 923 924 /** 925 * Sets whether line numbers should be displayed. 926 * 927 * Valid values for the first parameter are: 928 * 929 * - GESHI_NO_LINE_NUMBERS: Line numbers will not be displayed 930 * - GESHI_NORMAL_LINE_NUMBERS: Line numbers will be displayed 931 * - GESHI_FANCY_LINE_NUMBERS: Fancy line numbers will be displayed 932 * 933 * For fancy line numbers, the second parameter is used to signal which lines 934 * are to be fancy. For example, if the value of this parameter is 5 then every 935 * 5th line will be fancy. 936 * 937 * @param int $flag How line numbers should be displayed 938 * @param int $nth_row Defines which lines are fancy 939 * @since 1.0.0 940 */ 941 public function enable_line_numbers($flag, $nth_row = 5) { 942 if (GESHI_NO_LINE_NUMBERS != $flag && GESHI_NORMAL_LINE_NUMBERS != $flag 943 && GESHI_FANCY_LINE_NUMBERS != $flag) { 944 $this->error = GESHI_ERROR_INVALID_LINE_NUMBER_TYPE; 945 } 946 $this->line_numbers = $flag; 947 $this->line_nth_row = $nth_row; 948 } 949 950 /** 951 * Sets wether spans and other HTML markup generated by GeSHi can 952 * span over multiple lines or not. Defaults to true to reduce overhead. 953 * Set it to false if you want to manipulate the output or manually display 954 * the code in an ordered list. 955 * 956 * @param boolean $flag Wether multiline spans are allowed or not 957 * @since 1.0.7.22 958 */ 959 public function enable_multiline_span($flag) { 960 $this->allow_multiline_span = (bool) $flag; 961 } 962 963 /** 964 * Get current setting for multiline spans, see GeSHi->enable_multiline_span(). 965 * 966 * @see enable_multiline_span 967 * @return bool 968 */ 969 public function get_multiline_span() { 970 return $this->allow_multiline_span; 971 } 972 973 /** 974 * Sets the style for a keyword group. If $preserve_defaults is 975 * true, then styles are merged with the default styles, with the 976 * user defined styles having priority 977 * 978 * @param int $key The key of the keyword group to change the styles of 979 * @param string $style The style to make the keywords 980 * @param boolean $preserve_defaults Whether to merge the new styles with the old or just 981 * to overwrite them 982 * @since 1.0.0 983 */ 984 public function set_keyword_group_style($key, $style, $preserve_defaults = false) { 985 //Set the style for this keyword group 986 if('*' == $key) { 987 foreach($this->language_data['STYLES']['KEYWORDS'] as $_key => $_value) { 988 if (!$preserve_defaults) { 989 $this->language_data['STYLES']['KEYWORDS'][$_key] = $style; 990 } else { 991 $this->language_data['STYLES']['KEYWORDS'][$_key] .= $style; 992 } 993 } 994 } else { 995 if (!$preserve_defaults) { 996 $this->language_data['STYLES']['KEYWORDS'][$key] = $style; 997 } else { 998 $this->language_data['STYLES']['KEYWORDS'][$key] .= $style; 999 } 1000 } 1001 1002 //Update the lexic permissions 1003 if (!isset($this->lexic_permissions['KEYWORDS'][$key])) { 1004 $this->lexic_permissions['KEYWORDS'][$key] = true; 1005 } 1006 } 1007 1008 /** 1009 * Turns highlighting on/off for a keyword group 1010 * 1011 * @param int $key The key of the keyword group to turn on or off 1012 * @param boolean $flag Whether to turn highlighting for that group on or off 1013 * @since 1.0.0 1014 */ 1015 public function set_keyword_group_highlighting($key, $flag = true) { 1016 $this->lexic_permissions['KEYWORDS'][$key] = ($flag) ? true : false; 1017 } 1018 1019 /** 1020 * Sets the styles for comment groups. If $preserve_defaults is 1021 * true, then styles are merged with the default styles, with the 1022 * user defined styles having priority 1023 * 1024 * @param int $key The key of the comment group to change the styles of 1025 * @param string $style The style to make the comments 1026 * @param boolean $preserve_defaults Whether to merge the new styles with the old or just 1027 * to overwrite them 1028 * @since 1.0.0 1029 */ 1030 public function set_comments_style($key, $style, $preserve_defaults = false) { 1031 if('*' == $key) { 1032 foreach($this->language_data['STYLES']['COMMENTS'] as $_key => $_value) { 1033 if (!$preserve_defaults) { 1034 $this->language_data['STYLES']['COMMENTS'][$_key] = $style; 1035 } else { 1036 $this->language_data['STYLES']['COMMENTS'][$_key] .= $style; 1037 } 1038 } 1039 } else { 1040 if (!$preserve_defaults) { 1041 $this->language_data['STYLES']['COMMENTS'][$key] = $style; 1042 } else { 1043 $this->language_data['STYLES']['COMMENTS'][$key] .= $style; 1044 } 1045 } 1046 } 1047 1048 /** 1049 * Turns highlighting on/off for comment groups 1050 * 1051 * @param int $key The key of the comment group to turn on or off 1052 * @param boolean $flag Whether to turn highlighting for that group on or off 1053 * @since 1.0.0 1054 */ 1055 public function set_comments_highlighting($key, $flag = true) { 1056 $this->lexic_permissions['COMMENTS'][$key] = ($flag) ? true : false; 1057 } 1058 1059 /** 1060 * Sets the styles for escaped characters. If $preserve_defaults is 1061 * true, then styles are merged with the default styles, with the 1062 * user defined styles having priority 1063 * 1064 * @param string $style The style to make the escape characters 1065 * @param boolean $preserve_defaults Whether to merge the new styles with the old or just 1066 * to overwrite them 1067 * @param int $group Tells the group of symbols for which style should be set. 1068 * @since 1.0.0 1069 */ 1070 public function set_escape_characters_style($style, $preserve_defaults = false, $group = 0) { 1071 if (!$preserve_defaults) { 1072 $this->language_data['STYLES']['ESCAPE_CHAR'][$group] = $style; 1073 } else { 1074 $this->language_data['STYLES']['ESCAPE_CHAR'][$group] .= $style; 1075 } 1076 } 1077 1078 /** 1079 * Turns highlighting on/off for escaped characters 1080 * 1081 * @param boolean $flag Whether to turn highlighting for escape characters on or off 1082 * @since 1.0.0 1083 */ 1084 public function set_escape_characters_highlighting($flag = true) { 1085 $this->lexic_permissions['ESCAPE_CHAR'] = ($flag) ? true : false; 1086 } 1087 1088 /** 1089 * Sets the styles for brackets. If $preserve_defaults is 1090 * true, then styles are merged with the default styles, with the 1091 * user defined styles having priority 1092 * 1093 * This method is DEPRECATED: use set_symbols_style instead. 1094 * This method will be removed in 1.2.X 1095 * 1096 * @param string $style The style to make the brackets 1097 * @param boolean $preserve_defaults Whether to merge the new styles with the old or just 1098 * to overwrite them 1099 * @since 1.0.0 1100 * @deprecated In favour of set_symbols_style 1101 */ 1102 public function set_brackets_style($style, $preserve_defaults = false) { 1103 if (!$preserve_defaults) { 1104 $this->language_data['STYLES']['BRACKETS'][0] = $style; 1105 } else { 1106 $this->language_data['STYLES']['BRACKETS'][0] .= $style; 1107 } 1108 } 1109 1110 /** 1111 * Turns highlighting on/off for brackets 1112 * 1113 * This method is DEPRECATED: use set_symbols_highlighting instead. 1114 * This method will be remove in 1.2.X 1115 * 1116 * @param boolean $flag Whether to turn highlighting for brackets on or off 1117 * @since 1.0.0 1118 * @deprecated In favour of set_symbols_highlighting 1119 */ 1120 public function set_brackets_highlighting($flag) { 1121 $this->lexic_permissions['BRACKETS'] = ($flag) ? true : false; 1122 } 1123 1124 /** 1125 * Sets the styles for symbols. If $preserve_defaults is 1126 * true, then styles are merged with the default styles, with the 1127 * user defined styles having priority 1128 * 1129 * @param string $style The style to make the symbols 1130 * @param boolean $preserve_defaults Whether to merge the new styles with the old or just 1131 * to overwrite them 1132 * @param int $group Tells the group of symbols for which style should be set. 1133 * @since 1.0.1 1134 */ 1135 public function set_symbols_style($style, $preserve_defaults = false, $group = 0) { 1136 // Update the style of symbols 1137 if (!$preserve_defaults) { 1138 $this->language_data['STYLES']['SYMBOLS'][$group] = $style; 1139 } else { 1140 $this->language_data['STYLES']['SYMBOLS'][$group] .= $style; 1141 } 1142 1143 // For backward compatibility 1144 if (0 == $group) { 1145 $this->set_brackets_style ($style, $preserve_defaults); 1146 } 1147 } 1148 1149 /** 1150 * Turns highlighting on/off for symbols 1151 * 1152 * @param boolean $flag Whether to turn highlighting for symbols on or off 1153 * @since 1.0.0 1154 */ 1155 public function set_symbols_highlighting($flag) { 1156 // Update lexic permissions for this symbol group 1157 $this->lexic_permissions['SYMBOLS'] = ($flag) ? true : false; 1158 1159 // For backward compatibility 1160 $this->set_brackets_highlighting ($flag); 1161 } 1162 1163 /** 1164 * Sets the styles for strings. If $preserve_defaults is 1165 * true, then styles are merged with the default styles, with the 1166 * user defined styles having priority 1167 * 1168 * @param string $style The style to make the escape characters 1169 * @param boolean $preserve_defaults Whether to merge the new styles with the old or just 1170 * to overwrite them 1171 * @param int $group Tells the group of strings for which style should be set. 1172 * @since 1.0.0 1173 */ 1174 public function set_strings_style($style, $preserve_defaults = false, $group = 0) { 1175 if (!$preserve_defaults) { 1176 $this->language_data['STYLES']['STRINGS'][$group] = $style; 1177 } else { 1178 $this->language_data['STYLES']['STRINGS'][$group] .= $style; 1179 } 1180 } 1181 1182 /** 1183 * Turns highlighting on/off for strings 1184 * 1185 * @param boolean $flag Whether to turn highlighting for strings on or off 1186 * @since 1.0.0 1187 */ 1188 public function set_strings_highlighting($flag) { 1189 $this->lexic_permissions['STRINGS'] = ($flag) ? true : false; 1190 } 1191 1192 /** 1193 * Sets the styles for strict code blocks. If $preserve_defaults is 1194 * true, then styles are merged with the default styles, with the 1195 * user defined styles having priority 1196 * 1197 * @param string $style The style to make the script blocks 1198 * @param boolean $preserve_defaults Whether to merge the new styles with the old or just 1199 * to overwrite them 1200 * @param int $group Tells the group of script blocks for which style should be set. 1201 * @since 1.0.8.4 1202 */ 1203 public function set_script_style($style, $preserve_defaults = false, $group = 0) { 1204 // Update the style of symbols 1205 if (!$preserve_defaults) { 1206 $this->language_data['STYLES']['SCRIPT'][$group] = $style; 1207 } else { 1208 $this->language_data['STYLES']['SCRIPT'][$group] .= $style; 1209 } 1210 } 1211 1212 /** 1213 * Sets the styles for numbers. If $preserve_defaults is 1214 * true, then styles are merged with the default styles, with the 1215 * user defined styles having priority 1216 * 1217 * @param string $style The style to make the numbers 1218 * @param boolean $preserve_defaults Whether to merge the new styles with the old or just 1219 * to overwrite them 1220 * @param int $group Tells the group of numbers for which style should be set. 1221 * @since 1.0.0 1222 */ 1223 public function set_numbers_style($style, $preserve_defaults = false, $group = 0) { 1224 if (!$preserve_defaults) { 1225 $this->language_data['STYLES']['NUMBERS'][$group] = $style; 1226 } else { 1227 $this->language_data['STYLES']['NUMBERS'][$group] .= $style; 1228 } 1229 } 1230 1231 /** 1232 * Turns highlighting on/off for numbers 1233 * 1234 * @param boolean $flag Whether to turn highlighting for numbers on or off 1235 * @since 1.0.0 1236 */ 1237 public function set_numbers_highlighting($flag) { 1238 $this->lexic_permissions['NUMBERS'] = ($flag) ? true : false; 1239 } 1240 1241 /** 1242 * Sets the styles for methods. $key is a number that references the 1243 * appropriate "object splitter" - see the language file for the language 1244 * you are highlighting to get this number. If $preserve_defaults is 1245 * true, then styles are merged with the default styles, with the 1246 * user defined styles having priority 1247 * 1248 * @param int $key The key of the object splitter to change the styles of 1249 * @param string $style The style to make the methods 1250 * @param boolean $preserve_defaults Whether to merge the new styles with the old or just 1251 * to overwrite them 1252 * @since 1.0.0 1253 */ 1254 public function set_methods_style($key, $style, $preserve_defaults = false) { 1255 if (!$preserve_defaults) { 1256 $this->language_data['STYLES']['METHODS'][$key] = $style; 1257 } else { 1258 $this->language_data['STYLES']['METHODS'][$key] .= $style; 1259 } 1260 } 1261 1262 /** 1263 * Turns highlighting on/off for methods 1264 * 1265 * @param boolean $flag Whether to turn highlighting for methods on or off 1266 * @since 1.0.0 1267 */ 1268 public function set_methods_highlighting($flag) { 1269 $this->lexic_permissions['METHODS'] = ($flag) ? true : false; 1270 } 1271 1272 /** 1273 * Sets the styles for regexps. If $preserve_defaults is 1274 * true, then styles are merged with the default styles, with the 1275 * user defined styles having priority 1276 * 1277 * @param string $key The style to make the regular expression matches 1278 * @param boolean $style Whether to merge the new styles with the old or just 1279 * to overwrite them 1280 * @param bool $preserve_defaults Whether to merge the new styles with the old or just 1281 * to overwrite them 1282 * @since 1.0.0 1283 */ 1284 public function set_regexps_style($key, $style, $preserve_defaults = false) { 1285 if (!$preserve_defaults) { 1286 $this->language_data['STYLES']['REGEXPS'][$key] = $style; 1287 } else { 1288 $this->language_data['STYLES']['REGEXPS'][$key] .= $style; 1289 } 1290 } 1291 1292 /** 1293 * Turns highlighting on/off for regexps 1294 * 1295 * @param int $key The key of the regular expression group to turn on or off 1296 * @param boolean $flag Whether to turn highlighting for the regular expression group on or off 1297 * @since 1.0.0 1298 */ 1299 public function set_regexps_highlighting($key, $flag) { 1300 $this->lexic_permissions['REGEXPS'][$key] = ($flag) ? true : false; 1301 } 1302 1303 /** 1304 * Sets whether a set of keywords are checked for in a case sensitive manner 1305 * 1306 * @param int $key The key of the keyword group to change the case sensitivity of 1307 * @param boolean $case Whether to check in a case sensitive manner or not 1308 * @since 1.0.0 1309 */ 1310 public function set_case_sensitivity($key, $case) { 1311 $this->language_data['CASE_SENSITIVE'][$key] = ($case) ? true : false; 1312 } 1313 1314 /** 1315 * Sets the case that keywords should use when found. Use the constants: 1316 * 1317 * - GESHI_CAPS_NO_CHANGE: leave keywords as-is 1318 * - GESHI_CAPS_UPPER: convert all keywords to uppercase where found 1319 * - GESHI_CAPS_LOWER: convert all keywords to lowercase where found 1320 * 1321 * @param int $case A constant specifying what to do with matched keywords 1322 * @since 1.0.1 1323 */ 1324 public function set_case_keywords($case) { 1325 if (in_array($case, array( 1326 GESHI_CAPS_NO_CHANGE, GESHI_CAPS_UPPER, GESHI_CAPS_LOWER))) { 1327 $this->language_data['CASE_KEYWORDS'] = $case; 1328 } 1329 } 1330 1331 /** 1332 * Sets how many spaces a tab is substituted for 1333 * 1334 * Widths below zero are ignored 1335 * 1336 * @param int $width The tab width 1337 * @since 1.0.0 1338 */ 1339 public function set_tab_width($width) { 1340 $this->tab_width = intval($width); 1341 1342 //Check if it fit's the constraints: 1343 if ($this->tab_width < 1) { 1344 //Return it to the default 1345 $this->tab_width = 8; 1346 } 1347 } 1348 1349 /** 1350 * Sets whether or not to use tab-stop width specifed by language 1351 * 1352 * @param boolean $use Whether to use language-specific tab-stop widths 1353 * @since 1.0.7.20 1354 */ 1355 public function set_use_language_tab_width($use) { 1356 $this->use_language_tab_width = (bool) $use; 1357 } 1358 1359 /** 1360 * Returns the tab width to use, based on the current language and user 1361 * preference 1362 * 1363 * @return int Tab width 1364 * @since 1.0.7.20 1365 */ 1366 public function get_real_tab_width() { 1367 if (!$this->use_language_tab_width || 1368 !isset($this->language_data['TAB_WIDTH'])) { 1369 return $this->tab_width; 1370 } else { 1371 return $this->language_data['TAB_WIDTH']; 1372 } 1373 } 1374 1375 /** 1376 * Enables/disables strict highlighting. Default is off, calling this 1377 * method without parameters will turn it on. See documentation 1378 * for more details on strict mode and where to use it. 1379 * 1380 * @param boolean $mode Whether to enable strict mode or not 1381 * @since 1.0.0 1382 */ 1383 public function enable_strict_mode($mode = true) { 1384 if (GESHI_MAYBE == $this->language_data['STRICT_MODE_APPLIES']) { 1385 $this->strict_mode = ($mode) ? GESHI_ALWAYS : GESHI_NEVER; 1386 } 1387 } 1388 1389 /** 1390 * Disables all highlighting 1391 * 1392 * @since 1.0.0 1393 * @todo Rewrite with array traversal 1394 * @deprecated In favour of enable_highlighting 1395 */ 1396 public function disable_highlighting() { 1397 $this->enable_highlighting(false); 1398 } 1399 1400 /** 1401 * Enables all highlighting 1402 * 1403 * The optional flag parameter was added in version 1.0.7.21 and can be used 1404 * to enable (true) or disable (false) all highlighting. 1405 * 1406 * @since 1.0.0 1407 * @param boolean $flag A flag specifying whether to enable or disable all highlighting 1408 * @todo Rewrite with array traversal 1409 */ 1410 public function enable_highlighting($flag = true) { 1411 $flag = $flag ? true : false; 1412 foreach ($this->lexic_permissions as $key => $value) { 1413 if (is_array($value)) { 1414 foreach ($value as $k => $v) { 1415 $this->lexic_permissions[$key][$k] = $flag; 1416 } 1417 } else { 1418 $this->lexic_permissions[$key] = $flag; 1419 } 1420 } 1421 1422 // Context blocks 1423 $this->enable_important_blocks = $flag; 1424 } 1425 1426 /** 1427 * Given a file extension, this method returns either a valid geshi language 1428 * name, or the empty string if it couldn't be found 1429 * 1430 * @param string $extension The extension to get a language name for 1431 * @param array $lookup A lookup array to use instead of the default one 1432 * @since 1.0.5 1433 * @todo Re-think about how this method works (maybe make it private and/or make it 1434 * a extension->lang lookup?) 1435 * @return int|string 1436 */ 1437 public static function get_language_name_from_extension( $extension, $lookup = array() ) { 1438 $extension = strtolower($extension); 1439 1440 if ( !is_array($lookup) || empty($lookup)) { 1441 $lookup = array( 1442 '6502acme' => array( 'a', 's', 'asm', 'inc' ), 1443 '6502tasm' => array( 'a', 's', 'asm', 'inc' ), 1444 '6502kickass' => array( 'a', 's', 'asm', 'inc' ), 1445 '68000devpac' => array( 'a', 's', 'asm', 'inc' ), 1446 'abap' => array('abap'), 1447 'actionscript' => array('as'), 1448 'ada' => array('a', 'ada', 'adb', 'ads'), 1449 'apache' => array('conf'), 1450 'asm' => array('ash', 'asm', 'inc'), 1451 'asp' => array('asp'), 1452 'bash' => array('sh'), 1453 'bf' => array('bf'), 1454 'c' => array('c', 'h'), 1455 'c_mac' => array('c', 'h'), 1456 'caddcl' => array(), 1457 'cadlisp' => array(), 1458 'cdfg' => array('cdfg'), 1459 'cobol' => array('cbl'), 1460 'cpp' => array('cpp', 'hpp', 'C', 'H', 'CPP', 'HPP'), 1461 'csharp' => array('cs'), 1462 'css' => array('css'), 1463 'd' => array('d'), 1464 'delphi' => array('dpk', 'dpr', 'pp', 'pas'), 1465 'diff' => array('diff', 'patch'), 1466 'dos' => array('bat', 'cmd'), 1467 'gdb' => array('kcrash', 'crash', 'bt'), 1468 'gettext' => array('po', 'pot'), 1469 'gml' => array('gml'), 1470 'gnuplot' => array('plt'), 1471 'groovy' => array('groovy'), 1472 'haskell' => array('hs'), 1473 'haxe' => array('hx'), 1474 'html4strict' => array('html', 'htm'), 1475 'ini' => array('ini', 'desktop'), 1476 'java' => array('java'), 1477 'javascript' => array('js'), 1478 'klonec' => array('kl1'), 1479 'klonecpp' => array('klx'), 1480 'latex' => array('tex'), 1481 'lisp' => array('lisp'), 1482 'lua' => array('lua'), 1483 'matlab' => array('m'), 1484 'mpasm' => array(), 1485 'mysql' => array('sql'), 1486 'nsis' => array(), 1487 'objc' => array(), 1488 'oobas' => array(), 1489 'oracle8' => array(), 1490 'oracle10' => array(), 1491 'pascal' => array('pas'), 1492 'perl' => array('pl', 'pm'), 1493 'php' => array('php', 'php5', 'phtml', 'phps'), 1494 'povray' => array('pov'), 1495 'providex' => array('pvc', 'pvx'), 1496 'prolog' => array('pl'), 1497 'python' => array('py'), 1498 'qbasic' => array('bi'), 1499 'reg' => array('reg'), 1500 'ruby' => array('rb'), 1501 'sas' => array('sas'), 1502 'scala' => array('scala'), 1503 'scheme' => array('scm'), 1504 'scilab' => array('sci'), 1505 'smalltalk' => array('st'), 1506 'smarty' => array(), 1507 'tcl' => array('tcl'), 1508 'text' => array('txt'), 1509 'vb' => array('bas'), 1510 'vbnet' => array(), 1511 'visualfoxpro' => array(), 1512 'whitespace' => array('ws'), 1513 'xml' => array('xml', 'svg', 'xrc'), 1514 'z80' => array('z80', 'asm', 'inc') 1515 ); 1516 } 1517 1518 foreach ($lookup as $lang => $extensions) { 1519 if (in_array($extension, $extensions)) { 1520 return $lang; 1521 } 1522 } 1523 1524 return 'text'; 1525 } 1526 1527 /** 1528 * Given a file name, this method loads its contents in, and attempts 1529 * to set the language automatically. An optional lookup table can be 1530 * passed for looking up the language name. If not specified a default 1531 * table is used 1532 * 1533 * The language table is in the form 1534 * <pre>array( 1535 * 'lang_name' => array('extension', 'extension', ...), 1536 * 'lang_name' ... 1537 * );</pre> 1538 * 1539 * @param string $file_name The filename to load the source from 1540 * @param array $lookup A lookup array to use instead of the default one 1541 * @todo Complete rethink of this and above method 1542 * @since 1.0.5 1543 */ 1544 public function load_from_file($file_name, $lookup = array()) { 1545 if (is_readable($file_name)) { 1546 $this->set_source(file_get_contents($file_name)); 1547 $this->set_language(self::get_language_name_from_extension(substr(strrchr($file_name, '.'), 1), $lookup)); 1548 } else { 1549 $this->error = GESHI_ERROR_FILE_NOT_READABLE; 1550 } 1551 } 1552 1553 /** 1554 * Adds a keyword to a keyword group for highlighting 1555 * 1556 * @param int $key The key of the keyword group to add the keyword to 1557 * @param string $word The word to add to the keyword group 1558 * @since 1.0.0 1559 */ 1560 public function add_keyword($key, $word) { 1561 if (!is_array($this->language_data['KEYWORDS'][$key])) { 1562 $this->language_data['KEYWORDS'][$key] = array(); 1563 } 1564 if (!in_array($word, $this->language_data['KEYWORDS'][$key])) { 1565 $this->language_data['KEYWORDS'][$key][] = $word; 1566 1567 //NEW in 1.0.8 don't recompile the whole optimized regexp, simply append it 1568 if ($this->parse_cache_built) { 1569 $subkey = count($this->language_data['CACHED_KEYWORD_LISTS'][$key]) - 1; 1570 $this->language_data['CACHED_KEYWORD_LISTS'][$key][$subkey] .= '|' . preg_quote($word, '/'); 1571 } 1572 } 1573 } 1574 1575 /** 1576 * Removes a keyword from a keyword group 1577 * 1578 * @param int $key The key of the keyword group to remove the keyword from 1579 * @param string $word The word to remove from the keyword group 1580 * @param bool $recompile Wether to automatically recompile the optimized regexp list or not. 1581 * Note: if you set this to false and @see GeSHi->parse_code() was already called once, 1582 * for the current language, you have to manually call @see GeSHi->optimize_keyword_group() 1583 * or the removed keyword will stay in cache and still be highlighted! On the other hand 1584 * it might be too expensive to recompile the regexp list for every removal if you want to 1585 * remove a lot of keywords. 1586 * @since 1.0.0 1587 */ 1588 public function remove_keyword($key, $word, $recompile = true) { 1589 $key_to_remove = array_search($word, $this->language_data['KEYWORDS'][$key]); 1590 if ($key_to_remove !== false) { 1591 unset($this->language_data['KEYWORDS'][$key][$key_to_remove]); 1592 1593 //NEW in 1.0.8, optionally recompile keyword group 1594 if ($recompile && $this->parse_cache_built) { 1595 $this->optimize_keyword_group($key); 1596 } 1597 } 1598 } 1599 1600 /** 1601 * Creates a new keyword group 1602 * 1603 * @param int $key The key of the keyword group to create 1604 * @param string $styles The styles for the keyword group 1605 * @param boolean $case_sensitive Whether the keyword group is case sensitive ornot 1606 * @param array $words The words to use for the keyword group 1607 * @since 1.0.0 1608 * @return bool 1609 */ 1610 public function add_keyword_group($key, $styles, $case_sensitive = true, $words = array()) { 1611 $words = (array) $words; 1612 if (empty($words)) { 1613 // empty word lists mess up highlighting 1614 return false; 1615 } 1616 1617 //Add the new keyword group internally 1618 $this->language_data['KEYWORDS'][$key] = $words; 1619 $this->lexic_permissions['KEYWORDS'][$key] = true; 1620 $this->language_data['CASE_SENSITIVE'][$key] = $case_sensitive; 1621 $this->language_data['STYLES']['KEYWORDS'][$key] = $styles; 1622 1623 //NEW in 1.0.8, cache keyword regexp 1624 if ($this->parse_cache_built) { 1625 $this->optimize_keyword_group($key); 1626 } 1627 return true; 1628 } 1629 1630 /** 1631 * Removes a keyword group 1632 * 1633 * @param int $key The key of the keyword group to remove 1634 * @since 1.0.0 1635 */ 1636 public function remove_keyword_group ($key) { 1637 //Remove the keyword group internally 1638 unset($this->language_data['KEYWORDS'][$key]); 1639 unset($this->lexic_permissions['KEYWORDS'][$key]); 1640 unset($this->language_data['CASE_SENSITIVE'][$key]); 1641 unset($this->language_data['STYLES']['KEYWORDS'][$key]); 1642 1643 //NEW in 1.0.8 1644 unset($this->language_data['CACHED_KEYWORD_LISTS'][$key]); 1645 } 1646 1647 /** 1648 * compile optimized regexp list for keyword group 1649 * 1650 * @param int $key The key of the keyword group to compile & optimize 1651 * @since 1.0.8 1652 */ 1653 public function optimize_keyword_group($key) { 1654 $this->language_data['CACHED_KEYWORD_LISTS'][$key] = 1655 $this->optimize_regexp_list($this->language_data['KEYWORDS'][$key]); 1656 $space_as_whitespace = false; 1657 if(isset($this->language_data['PARSER_CONTROL'])) { 1658 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) { 1659 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'])) { 1660 $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE']; 1661 } 1662 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) { 1663 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) { 1664 $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE']; 1665 } 1666 } 1667 } 1668 } 1669 if($space_as_whitespace) { 1670 foreach($this->language_data['CACHED_KEYWORD_LISTS'][$key] as $rxk => $rxv) { 1671 $this->language_data['CACHED_KEYWORD_LISTS'][$key][$rxk] = 1672 str_replace(" ", "\\s+", $rxv); 1673 } 1674 } 1675 } 1676 1677 /** 1678 * Sets the content of the header block 1679 * 1680 * @param string $content The content of the header block 1681 * @since 1.0.2 1682 */ 1683 public function set_header_content($content) { 1684 $this->header_content = $content; 1685 } 1686 1687 /** 1688 * Sets the content of the footer block 1689 * 1690 * @param string $content The content of the footer block 1691 * @since 1.0.2 1692 */ 1693 public function set_footer_content($content) { 1694 $this->footer_content = $content; 1695 } 1696 1697 /** 1698 * Sets the style for the header content 1699 * 1700 * @param string $style The style for the header content 1701 * @since 1.0.2 1702 */ 1703 public function set_header_content_style($style) { 1704 $this->header_content_style = $style; 1705 } 1706 1707 /** 1708 * Sets the style for the footer content 1709 * 1710 * @param string $style The style for the footer content 1711 * @since 1.0.2 1712 */ 1713 public function set_footer_content_style($style) { 1714 $this->footer_content_style = $style; 1715 } 1716 1717 /** 1718 * Sets whether to force a surrounding block around 1719 * the highlighted code or not 1720 * 1721 * @param boolean $flag Tells whether to enable or disable this feature 1722 * @since 1.0.7.20 1723 */ 1724 public function enable_inner_code_block($flag) { 1725 $this->force_code_block = (bool)$flag; 1726 } 1727 1728 /** 1729 * Sets the base URL to be used for keywords 1730 * 1731 * @param int $group The key of the keyword group to set the URL for 1732 * @param string $url The URL to set for the group. If {FNAME} is in 1733 * the url somewhere, it is replaced by the keyword 1734 * that the URL is being made for 1735 * @since 1.0.2 1736 */ 1737 public function set_url_for_keyword_group($group, $url) { 1738 $this->language_data['URLS'][$group] = $url; 1739 } 1740 1741 /** 1742 * Sets styles for links in code 1743 * 1744 * @param int $type A constant that specifies what state the style is being 1745 * set for - e.g. :hover or :visited 1746 * @param string $styles The styles to use for that state 1747 * @since 1.0.2 1748 */ 1749 public function set_link_styles($type, $styles) { 1750 $this->link_styles[$type] = $styles; 1751 } 1752 1753 /** 1754 * Sets the target for links in code 1755 * 1756 * @param string $target The target for links in the code, e.g. _blank 1757 * @since 1.0.3 1758 */ 1759 public function set_link_target($target) { 1760 if (!$target) { 1761 $this->link_target = ''; 1762 } else { 1763 $this->link_target = ' target="' . $target . '"'; 1764 } 1765 } 1766 1767 /** 1768 * Sets styles for important parts of the code 1769 * 1770 * @param string $styles The styles to use on important parts of the code 1771 * @since 1.0.2 1772 */ 1773 public function set_important_styles($styles) { 1774 $this->important_styles = $styles; 1775 } 1776 1777 /** 1778 * Sets whether context-important blocks are highlighted 1779 * 1780 * @param boolean $flag Tells whether to enable or disable highlighting of important blocks 1781 * @todo REMOVE THIS SHIZ FROM GESHI! 1782 * @deprecated 1783 * @since 1.0.2 1784 */ 1785 public function enable_important_blocks($flag) { 1786 $this->enable_important_blocks = ( $flag ) ? true : false; 1787 } 1788 1789 /** 1790 * Whether CSS IDs should be added to each line 1791 * 1792 * @param boolean $flag If true, IDs will be added to each line. 1793 * @since 1.0.2 1794 */ 1795 public function enable_ids($flag = true) { 1796 $this->add_ids = ($flag) ? true : false; 1797 } 1798 1799 /** 1800 * Specifies which lines to highlight extra 1801 * 1802 * The extra style parameter was added in 1.0.7.21. 1803 * 1804 * @param mixed $lines An array of line numbers to highlight, or just a line 1805 * number on its own. 1806 * @param string $style A string specifying the style to use for this line. 1807 * If null is specified, the default style is used. 1808 * If false is specified, the line will be removed from 1809 * special highlighting 1810 * @since 1.0.2 1811 * @todo Some data replication here that could be cut down on 1812 */ 1813 public function highlight_lines_extra($lines, $style = null) { 1814 if (is_array($lines)) { 1815 //Split up the job using single lines at a time 1816 foreach ($lines as $line) { 1817 $this->highlight_lines_extra($line, $style); 1818 } 1819 } else { 1820 //Mark the line as being highlighted specially 1821 $lines = intval($lines); 1822 $this->highlight_extra_lines[$lines] = $lines; 1823 1824 //Decide on which style to use 1825 if ($style === null) { //Check if we should use default style 1826 unset($this->highlight_extra_lines_styles[$lines]); 1827 } elseif ($style === false) { //Check if to remove this line 1828 unset($this->highlight_extra_lines[$lines]); 1829 unset($this->highlight_extra_lines_styles[$lines]); 1830 } else { 1831 $this->highlight_extra_lines_styles[$lines] = $style; 1832 } 1833 } 1834 } 1835 1836 /** 1837 * Sets the style for extra-highlighted lines 1838 * 1839 * @param string $styles The style for extra-highlighted lines 1840 * @since 1.0.2 1841 */ 1842 public function set_highlight_lines_extra_style($styles) { 1843 $this->highlight_extra_lines_style = $styles; 1844 } 1845 1846 /** 1847 * Sets the line-ending 1848 * 1849 * @param string $line_ending The new line-ending 1850 * @since 1.0.2 1851 */ 1852 public function set_line_ending($line_ending) { 1853 $this->line_ending = (string)$line_ending; 1854 } 1855 1856 /** 1857 * Sets what number line numbers should start at. Should 1858 * be a positive integer, and will be converted to one. 1859 * 1860 * <b>Warning:</b> Using this method will add the "start" 1861 * attribute to the <ol> that is used for line numbering. 1862 * This is <b>not</b> valid XHTML strict, so if that's what you 1863 * care about then don't use this method. Firefox is getting 1864 * support for the CSS method of doing this in 1.1 and Opera 1865 * has support for the CSS method, but (of course) IE doesn't 1866 * so it's not worth doing it the CSS way yet. 1867 * 1868 * @param int $number The number to start line numbers at 1869 * @since 1.0.2 1870 */ 1871 public function start_line_numbers_at($number) { 1872 $this->line_numbers_start = abs(intval($number)); 1873 } 1874 1875 /** 1876 * Sets the encoding used for htmlspecialchars(), for international 1877 * support. 1878 * 1879 * NOTE: This is not needed for now because htmlspecialchars() is not 1880 * being used (it has a security hole in PHP4 that has not been patched). 1881 * Maybe in a future version it may make a return for speed reasons, but 1882 * I doubt it. 1883 * 1884 * @param string $encoding The encoding to use for the source 1885 * @since 1.0.3 1886 */ 1887 public function set_encoding($encoding) { 1888 if ($encoding) { 1889 $this->encoding = strtolower($encoding); 1890 } 1891 } 1892 1893 /** 1894 * Turns linking of keywords on or off. 1895 * 1896 * @param boolean $enable If true, links will be added to keywords 1897 * @since 1.0.2 1898 */ 1899 public function enable_keyword_links($enable = true) { 1900 $this->keyword_links = (bool) $enable; 1901 } 1902 1903 /** 1904 * Setup caches needed for styling. This is automatically called in 1905 * parse_code() and get_stylesheet() when appropriate. This function helps 1906 * stylesheet generators as they rely on some style information being 1907 * preprocessed 1908 * 1909 * @since 1.0.8 1910 */ 1911 protected function build_style_cache() { 1912 //Build the style cache needed to highlight numbers appropriate 1913 if($this->lexic_permissions['NUMBERS']) { 1914 //First check what way highlighting information for numbers are given 1915 if(!isset($this->language_data['NUMBERS'])) { 1916 $this->language_data['NUMBERS'] = 0; 1917 } 1918 1919 if(is_array($this->language_data['NUMBERS'])) { 1920 $this->language_data['NUMBERS_CACHE'] = $this->language_data['NUMBERS']; 1921 } else { 1922 $this->language_data['NUMBERS_CACHE'] = array(); 1923 if(!$this->language_data['NUMBERS']) { 1924 $this->language_data['NUMBERS'] = 1925 GESHI_NUMBER_INT_BASIC | 1926 GESHI_NUMBER_FLT_NONSCI; 1927 } 1928 1929 for($i = 0, $j = $this->language_data['NUMBERS']; $j > 0; ++$i, $j>>=1) { 1930 //Rearrange style indices if required ... 1931 if(isset($this->language_data['STYLES']['NUMBERS'][1<<$i])) { 1932 $this->language_data['STYLES']['NUMBERS'][$i] = 1933 $this->language_data['STYLES']['NUMBERS'][1<<$i]; 1934 unset($this->language_data['STYLES']['NUMBERS'][1<<$i]); 1935 } 1936 1937 //Check if this bit is set for highlighting 1938 if($j&1) { 1939 //So this bit is set ... 1940 //Check if it belongs to group 0 or the actual stylegroup 1941 if(isset($this->language_data['STYLES']['NUMBERS'][$i])) { 1942 $this->language_data['NUMBERS_CACHE'][$i] = 1 << $i; 1943 } else { 1944 if(!isset($this->language_data['NUMBERS_CACHE'][0])) { 1945 $this->language_data['NUMBERS_CACHE'][0] = 0; 1946 } 1947 $this->language_data['NUMBERS_CACHE'][0] |= 1 << $i; 1948 } 1949 } 1950 } 1951 } 1952 } 1953 } 1954 1955 /** 1956 * Setup caches needed for parsing. This is automatically called in parse_code() when appropriate. 1957 * This function makes stylesheet generators much faster as they do not need these caches. 1958 * 1959 * @since 1.0.8 1960 */ 1961 protected function build_parse_cache() { 1962 // cache symbol regexp 1963 //As this is a costy operation, we avoid doing it for multiple groups ... 1964 //Instead we perform it for all symbols at once. 1965 // 1966 //For this to work, we need to reorganize the data arrays. 1967 if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) { 1968 $this->language_data['MULTIPLE_SYMBOL_GROUPS'] = count($this->language_data['STYLES']['SYMBOLS']) > 1; 1969 1970 $this->language_data['SYMBOL_DATA'] = array(); 1971 $symbol_preg_multi = array(); // multi char symbols 1972 $symbol_preg_single = array(); // single char symbols 1973 foreach ($this->language_data['SYMBOLS'] as $key => $symbols) { 1974 if (is_array($symbols)) { 1975 foreach ($symbols as $sym) { 1976 $sym = $this->hsc($sym); 1977 if (!isset($this->language_data['SYMBOL_DATA'][$sym])) { 1978 $this->language_data['SYMBOL_DATA'][$sym] = $key; 1979 if (isset($sym[1])) { // multiple chars 1980 $symbol_preg_multi[] = preg_quote($sym, '/'); 1981 } else { // single char 1982 if ($sym == '-') { 1983 // don't trigger range out of order error 1984 $symbol_preg_single[] = '\-'; 1985 } else { 1986 $symbol_preg_single[] = preg_quote($sym, '/'); 1987 } 1988 } 1989 } 1990 } 1991 } else { 1992 $symbols = $this->hsc($symbols); 1993 if (!isset($this->language_data['SYMBOL_DATA'][$symbols])) { 1994 $this->language_data['SYMBOL_DATA'][$symbols] = 0; 1995 if (isset($symbols[1])) { // multiple chars 1996 $symbol_preg_multi[] = preg_quote($symbols, '/'); 1997 } elseif ($symbols == '-') { 1998 // don't trigger range out of order error 1999 $symbol_preg_single[] = '\-'; 2000 } else { // single char 2001 $symbol_preg_single[] = preg_quote($symbols, '/'); 2002 } 2003 } 2004 } 2005 } 2006 2007 //Now we have an array with each possible symbol as the key and the style as the actual data. 2008 //This way we can set the correct style just the moment we highlight ... 2009 // 2010 //Now we need to rewrite our array to get a search string that 2011 $symbol_preg = array(); 2012 if (!empty($symbol_preg_multi)) { 2013 rsort($symbol_preg_multi); 2014 $symbol_preg[] = implode('|', $symbol_preg_multi); 2015 } 2016 if (!empty($symbol_preg_single)) { 2017 rsort($symbol_preg_single); 2018 $symbol_preg[] = '[' . implode('', $symbol_preg_single) . ']'; 2019 } 2020 $this->language_data['SYMBOL_SEARCH'] = implode("|", $symbol_preg); 2021 } 2022 2023 // cache optimized regexp for keyword matching 2024 // remove old cache 2025 $this->language_data['CACHED_KEYWORD_LISTS'] = array(); 2026 foreach (array_keys($this->language_data['KEYWORDS']) as $key) { 2027 if (!isset($this->lexic_permissions['KEYWORDS'][$key]) || 2028 $this->lexic_permissions['KEYWORDS'][$key]) { 2029 $this->optimize_keyword_group($key); 2030 } 2031 } 2032 2033 // brackets 2034 if ($this->lexic_permissions['BRACKETS']) { 2035 $this->language_data['CACHE_BRACKET_MATCH'] = array('[', ']', '(', ')', '{', '}'); 2036 if (!$this->use_classes && isset($this->language_data['STYLES']['BRACKETS'][0])) { 2037 $this->language_data['CACHE_BRACKET_REPLACE'] = array( 2038 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">[|>', 2039 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">]|>', 2040 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">(|>', 2041 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">)|>', 2042 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">{|>', 2043 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">}|>', 2044 ); 2045 } 2046 else { 2047 $this->language_data['CACHE_BRACKET_REPLACE'] = array( 2048 '<| class="br0">[|>', 2049 '<| class="br0">]|>', 2050 '<| class="br0">(|>', 2051 '<| class="br0">)|>', 2052 '<| class="br0">{|>', 2053 '<| class="br0">}|>', 2054 ); 2055 } 2056 } 2057 2058 //Build the parse cache needed to highlight numbers appropriate 2059 if($this->lexic_permissions['NUMBERS']) { 2060 //Check if the style rearrangements have been processed ... 2061 //This also does some preprocessing to check which style groups are useable ... 2062 if(!isset($this->language_data['NUMBERS_CACHE'])) { 2063 $this->build_style_cache(); 2064 } 2065 2066 //Number format specification 2067 //All this formats are matched case-insensitively! 2068 static $numbers_format = array( 2069 GESHI_NUMBER_INT_BASIC => 2070 '(?:(?<![0-9a-z_\.%$@])|(?<=\.\.))(?<![\d\.]e[+\-])([1-9]\d*?|0)(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2071 GESHI_NUMBER_INT_CSTYLE => 2072 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])([1-9]\d*?|0)l(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2073 GESHI_NUMBER_BIN_SUFFIX => 2074 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[01]+?[bB](?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2075 GESHI_NUMBER_BIN_PREFIX_PERCENT => 2076 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])%[01]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2077 GESHI_NUMBER_BIN_PREFIX_0B => 2078 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0b[01]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2079 GESHI_NUMBER_OCT_PREFIX => 2080 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2081 GESHI_NUMBER_OCT_PREFIX_0O => 2082 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0o[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2083 GESHI_NUMBER_OCT_PREFIX_AT => 2084 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])\@[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2085 GESHI_NUMBER_OCT_SUFFIX => 2086 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[0-7]+?o(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2087 GESHI_NUMBER_HEX_PREFIX => 2088 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0x[0-9a-fA-F]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2089 GESHI_NUMBER_HEX_PREFIX_DOLLAR => 2090 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\$[0-9a-fA-F]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2091 GESHI_NUMBER_HEX_SUFFIX => 2092 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d[0-9a-fA-F]*?[hH](?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2093 GESHI_NUMBER_FLT_NONSCI => 2094 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d+?\.\d+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2095 GESHI_NUMBER_FLT_NONSCI_F => 2096 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)f(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2097 GESHI_NUMBER_FLT_SCI_SHORT => 2098 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\.\d+?(?:e[+\-]?\d+?)?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2099 GESHI_NUMBER_FLT_SCI_ZERO => 2100 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)(?:e[+\-]?\d+?)?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)' 2101 ); 2102 2103 //At this step we have an associative array with flag groups for a 2104 //specific style or an string denoting a regexp given its index. 2105 $this->language_data['NUMBERS_RXCACHE'] = array(); 2106 foreach($this->language_data['NUMBERS_CACHE'] as $key => $rxdata) { 2107 if(is_string($rxdata)) { 2108 $regexp = $rxdata; 2109 } else { 2110 //This is a bitfield of number flags to highlight: 2111 //Build an array, implode them together and make this the actual RX 2112 $rxuse = array(); 2113 for($i = 1; $i <= $rxdata; $i<<=1) { 2114 if($rxdata & $i) { 2115 $rxuse[] = $numbers_format[$i]; 2116 } 2117 } 2118 $regexp = implode("|", $rxuse); 2119 } 2120 2121 $this->language_data['NUMBERS_RXCACHE'][$key] = 2122 "/(?<!<\|\/)(?<!<\|!REG3XP)(?<!<\|\/NUM!)(?<!\d\/>)($regexp)(?!(?:<DOT>|(?>[^\<]))+>)(?![^<]*>)(?!\|>)(?!\/>)/i"; 2123 } 2124 2125 if(!isset($this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'])) { 2126 $this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'] = '#\d#'; 2127 } 2128 } 2129 2130 $this->parse_cache_built = true; 2131 } 2132 2133 /** 2134 * Returns the code in $this->source, highlighted and surrounded by the 2135 * nessecary HTML. 2136 * 2137 * This should only be called ONCE, cos it's SLOW! If you want to highlight 2138 * the same source multiple times, you're better off doing a whole lot of 2139 * str_replaces to replace the <span>s 2140 * 2141 * @since 1.0.0 2142 */ 2143 public function parse_code () { 2144 // Start the timer 2145 $start_time = microtime(); 2146 2147 // Replace all newlines to a common form. 2148 $code = str_replace("\r\n", "\n", $this->source); 2149 $code = str_replace("\r", "\n", $code); 2150 2151 // Firstly, if there is an error, we won't highlight 2152 if ($this->error) { 2153 //Escape the source for output 2154 $result = $this->hsc($this->source); 2155 2156 //This fix is related to SF#1923020, but has to be applied regardless of 2157 //actually highlighting symbols. 2158 $result = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $result); 2159 2160 // Timing is irrelevant 2161 $this->set_time($start_time, $start_time); 2162 $this->finalise($result); 2163 return $result; 2164 } 2165 2166 // make sure the parse cache is up2date 2167 if (!$this->parse_cache_built) { 2168 $this->build_parse_cache(); 2169 } 2170 2171 // Initialise various stuff 2172 $length = strlen($code); 2173 $COMMENT_MATCHED = false; 2174 $stuff_to_parse = ''; 2175 $endresult = ''; 2176 2177 // "Important" selections are handled like multiline comments 2178 // @todo GET RID OF THIS SHIZ 2179 if ($this->enable_important_blocks) { 2180 $this->language_data['COMMENT_MULTI'][GESHI_START_IMPORTANT] = GESHI_END_IMPORTANT; 2181 } 2182 2183 if ($this->strict_mode) { 2184 // Break the source into bits. Each bit will be a portion of the code 2185 // within script delimiters - for example, HTML between < and > 2186 $k = 0; 2187 $parts = array(); 2188 $matches = array(); 2189 $next_match_pointer = null; 2190 // we use a copy to unset delimiters on demand (when they are not found) 2191 $delim_copy = $this->language_data['SCRIPT_DELIMITERS']; 2192 $i = 0; 2193 while ($i < $length) { 2194 $next_match_pos = $length + 1; // never true 2195 foreach ($delim_copy as $dk => $delimiters) { 2196 if(is_array($delimiters)) { 2197 foreach ($delimiters as $open => $close) { 2198 // make sure the cache is setup properly 2199 if (!isset($matches[$dk][$open])) { 2200 $matches[$dk][$open] = array( 2201 'next_match' => -1, 2202 'dk' => $dk, 2203 2204 'open' => $open, // needed for grouping of adjacent code blocks (see below) 2205 'open_strlen' => strlen($open), 2206 2207 'close' => $close, 2208 'close_strlen' => strlen($close), 2209 ); 2210 } 2211 // Get the next little bit for this opening string 2212 if ($matches[$dk][$open]['next_match'] < $i) { 2213 // only find the next pos if it was not already cached 2214 $open_pos = strpos($code, $open, $i); 2215 if ($open_pos === false) { 2216 // no match for this delimiter ever 2217 unset($delim_copy[$dk][$open]); 2218 continue; 2219 } 2220 $matches[$dk][$open]['next_match'] = $open_pos; 2221 } 2222 if ($matches[$dk][$open]['next_match'] < $next_match_pos) { 2223 //So we got a new match, update the close_pos 2224 $matches[$dk][$open]['close_pos'] = 2225 strpos($code, $close, $matches[$dk][$open]['next_match']+1); 2226 2227 $next_match_pointer =& $matches[$dk][$open]; 2228 $next_match_pos = $matches[$dk][$open]['next_match']; 2229 } 2230 } 2231 } else { 2232 //So we should match an RegExp as Strict Block ... 2233 /** 2234 * The value in $delimiters is expected to be an RegExp 2235 * containing exactly 2 matching groups: 2236 * - Group 1 is the opener 2237 * - Group 2 is the closer 2238 */ 2239 if(preg_match($delimiters, $code, $matches_rx, PREG_OFFSET_CAPTURE, $i)) { 2240 //We got a match ... 2241 if(isset($matches_rx['start']) && isset($matches_rx['end'])) 2242 { 2243 $matches[$dk] = array( 2244 'next_match' => $matches_rx['start'][1], 2245 'dk' => $dk, 2246 2247 'close_strlen' => strlen($matches_rx['end'][0]), 2248 'close_pos' => $matches_rx['end'][1], 2249 ); 2250 } else { 2251 $matches[$dk] = array( 2252 'next_match' => $matches_rx[1][1], 2253 'dk' => $dk, 2254 2255 'close_strlen' => strlen($matches_rx[2][0]), 2256 'close_pos' => $matches_rx[2][1], 2257 ); 2258 } 2259 } else { 2260 // no match for this delimiter ever 2261 unset($delim_copy[$dk]); 2262 continue; 2263 } 2264 2265 if ($matches[$dk]['next_match'] <= $next_match_pos) { 2266 $next_match_pointer =& $matches[$dk]; 2267 $next_match_pos = $matches[$dk]['next_match']; 2268 } 2269 } 2270 } 2271 2272 // non-highlightable text 2273 $parts[$k] = array( 2274 1 => substr($code, $i, $next_match_pos - $i) 2275 ); 2276 ++$k; 2277 2278 if ($next_match_pos > $length) { 2279 // out of bounds means no next match was found 2280 break; 2281 } 2282 2283 // highlightable code 2284 $parts[$k][0] = $next_match_pointer['dk']; 2285 2286 //Only combine for non-rx script blocks 2287 if(is_array($delim_copy[$next_match_pointer['dk']])) { 2288 // group adjacent script blocks, e.g. <foobar><asdf> should be one block, not three! 2289 $i = $next_match_pos + $next_match_pointer['open_strlen']; 2290 while (true) { 2291 $close_pos = strpos($code, $next_match_pointer['close'], $i); 2292 if ($close_pos == false) { 2293 break; 2294 } 2295 $i = $close_pos + $next_match_pointer['close_strlen']; 2296 if ($i == $length) { 2297 break; 2298 } 2299 if ($code[$i] == $next_match_pointer['open'][0] && ($next_match_pointer['open_strlen'] == 1 || 2300 substr($code, $i, $next_match_pointer['open_strlen']) == $next_match_pointer['open'])) { 2301 // merge adjacent but make sure we don't merge things like <tag><!-- comment --> 2302 foreach ($matches as $submatches) { 2303 foreach ($submatches as $match) { 2304 if ($match['next_match'] == $i) { 2305 // a different block already matches here! 2306 break 3; 2307 } 2308 } 2309 } 2310 } else { 2311 break; 2312 } 2313 } 2314 } else { 2315 $close_pos = $next_match_pointer['close_pos'] + $next_match_pointer['close_strlen']; 2316 $i = $close_pos; 2317 } 2318 2319 if ($close_pos === false) { 2320 // no closing delimiter found! 2321 $parts[$k][1] = substr($code, $next_match_pos); 2322 ++$k; 2323 break; 2324 } else { 2325 $parts[$k][1] = substr($code, $next_match_pos, $i - $next_match_pos); 2326 ++$k; 2327 } 2328 } 2329 unset($delim_copy, $next_match_pointer, $next_match_pos, $matches); 2330 $num_parts = $k; 2331 2332 if ($num_parts == 1 && $this->strict_mode == GESHI_MAYBE) { 2333 // when we have only one part, we don't have anything to highlight at all. 2334 // if we have a "maybe" strict language, this should be handled as highlightable code 2335 $parts = array( 2336 0 => array( 2337 0 => '', 2338 1 => '' 2339 ), 2340 1 => array( 2341 0 => null, 2342 1 => $parts[0][1] 2343 ) 2344 ); 2345 $num_parts = 2; 2346 } 2347 2348 } else { 2349 // Not strict mode - simply dump the source into 2350 // the array at index 1 (the first highlightable block) 2351 $parts = array( 2352 0 => array( 2353 0 => '', 2354 1 => '' 2355 ), 2356 1 => array( 2357 0 => null, 2358 1 => $code 2359 ) 2360 ); 2361 $num_parts = 2; 2362 } 2363 2364 //Unset variables we won't need any longer 2365 unset($code); 2366 2367 //Preload some repeatedly used values regarding hardquotes ... 2368 $hq = isset($this->language_data['HARDQUOTE']) ? $this->language_data['HARDQUOTE'][0] : false; 2369 $hq_strlen = strlen($hq); 2370 2371 //Preload if line numbers are to be generated afterwards 2372 //Added a check if line breaks should be forced even without line numbers, fixes SF#1727398 2373 $check_linenumbers = $this->line_numbers != GESHI_NO_LINE_NUMBERS || 2374 !empty($this->highlight_extra_lines) || !$this->allow_multiline_span; 2375 2376 //preload the escape char for faster checking ... 2377 $escaped_escape_char = $this->hsc($this->language_data['ESCAPE_CHAR']); 2378 2379 // this is used for single-line comments 2380 $sc_disallowed_before = ""; 2381 $sc_disallowed_after = ""; 2382 2383 if (isset($this->language_data['PARSER_CONTROL'])) { 2384 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS'])) { 2385 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'])) { 2386 $sc_disallowed_before = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE']; 2387 } 2388 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'])) { 2389 $sc_disallowed_after = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER']; 2390 } 2391 } 2392 } 2393 2394 //Fix for SF#1932083: Multichar Quotemarks unsupported 2395 $is_string_starter = array(); 2396 if ($this->lexic_permissions['STRINGS']) { 2397 foreach ($this->language_data['QUOTEMARKS'] as $quotemark) { 2398 if (!isset($is_string_starter[$quotemark[0]])) { 2399 $is_string_starter[$quotemark[0]] = (string)$quotemark; 2400 } elseif (is_string($is_string_starter[$quotemark[0]])) { 2401 $is_string_starter[$quotemark[0]] = array( 2402 $is_string_starter[$quotemark[0]], 2403 $quotemark); 2404 } else { 2405 $is_string_starter[$quotemark[0]][] = $quotemark; 2406 } 2407 } 2408 } 2409 2410 // Now we go through each part. We know that even-indexed parts are 2411 // code that shouldn't be highlighted, and odd-indexed parts should 2412 // be highlighted 2413 for ($key = 0; $key < $num_parts; ++$key) { 2414 $STRICTATTRS = ''; 2415 2416 // If this block should be highlighted... 2417 if (!($key & 1)) { 2418 // Else not a block to highlight 2419 $endresult .= $this->hsc($parts[$key][1]); 2420 unset($parts[$key]); 2421 continue; 2422 } 2423 2424 $result = ''; 2425 $part = $parts[$key][1]; 2426 2427 $highlight_part = true; 2428 if ($this->strict_mode && !is_null($parts[$key][0])) { 2429 // get the class key for this block of code 2430 $script_key = $parts[$key][0]; 2431 $highlight_part = $this->language_data['HIGHLIGHT_STRICT_BLOCK'][$script_key]; 2432 if ($this->language_data['STYLES']['SCRIPT'][$script_key] != '' && 2433 $this->lexic_permissions['SCRIPT']) { 2434 // Add a span element around the source to 2435 // highlight the overall source block 2436 if (!$this->use_classes && 2437 $this->language_data['STYLES']['SCRIPT'][$script_key] != '') { 2438 $attributes = ' style="' . $this->language_data['STYLES']['SCRIPT'][$script_key] . '"'; 2439 } else { 2440 $attributes = ' class="sc' . $script_key . '"'; 2441 } 2442 $result .= "<span$attributes>"; 2443 $STRICTATTRS = $attributes; 2444 } 2445 } 2446 2447 if ($highlight_part) { 2448 // Now, highlight the code in this block. This code 2449 // is really the engine of GeSHi (along with the method 2450 // parse_non_string_part). 2451 2452 // cache comment regexps incrementally 2453 $next_comment_regexp_key = ''; 2454 $next_comment_regexp_pos = -1; 2455 $next_comment_multi_pos = -1; 2456 $next_comment_single_pos = -1; 2457 $comment_regexp_cache_per_key = array(); 2458 $comment_multi_cache_per_key = array(); 2459 $comment_single_cache_per_key = array(); 2460 $next_open_comment_multi = ''; 2461 $next_comment_single_key = ''; 2462 $escape_regexp_cache_per_key = array(); 2463 $next_escape_regexp_key = ''; 2464 $next_escape_regexp_pos = -1; 2465 2466 $length = strlen($part); 2467 for ($i = 0; $i < $length; ++$i) { 2468 // Get the next char 2469 $char = $part[$i]; 2470 $char_len = 1; 2471 2472 // update regexp comment cache if needed 2473 if (isset($this->language_data['COMMENT_REGEXP']) && $next_comment_regexp_pos < $i) { 2474 $next_comment_regexp_pos = $length; 2475 foreach ($this->language_data['COMMENT_REGEXP'] as $comment_key => $regexp) { 2476 $match_i = false; 2477 if (isset($comment_regexp_cache_per_key[$comment_key]) && 2478 ($comment_regexp_cache_per_key[$comment_key]['pos'] >= $i || 2479 $comment_regexp_cache_per_key[$comment_key]['pos'] === false)) { 2480 // we have already matched something 2481 if ($comment_regexp_cache_per_key[$comment_key]['pos'] === false) { 2482 // this comment is never matched 2483 continue; 2484 } 2485 $match_i = $comment_regexp_cache_per_key[$comment_key]['pos']; 2486 } elseif (preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $i)) { 2487 $match_i = $match[0][1]; 2488 2489 $comment_regexp_cache_per_key[$comment_key] = array( 2490 'key' => $comment_key, 2491 'length' => strlen($match[0][0]), 2492 'pos' => $match_i 2493 ); 2494 } else { 2495 $comment_regexp_cache_per_key[$comment_key]['pos'] = false; 2496 continue; 2497 } 2498 2499 if ($match_i !== false && $match_i < $next_comment_regexp_pos) { 2500 $next_comment_regexp_pos = $match_i; 2501 $next_comment_regexp_key = $comment_key; 2502 if ($match_i === $i) { 2503 break; 2504 } 2505 } 2506 } 2507 } 2508 2509 $string_started = false; 2510 2511 if (isset($is_string_starter[$char])) { 2512 // Possibly the start of a new string ... 2513 2514 //Check which starter it was ... 2515 //Fix for SF#1932083: Multichar Quotemarks unsupported 2516 if (is_array($is_string_starter[$char])) { 2517 $char_new = ''; 2518 foreach ($is_string_starter[$char] as $testchar) { 2519 if ($testchar === substr($part, $i, strlen($testchar)) && 2520 strlen($testchar) > strlen($char_new)) { 2521 $char_new = $testchar; 2522 $string_started = true; 2523 } 2524 } 2525 if ($string_started) { 2526 $char = $char_new; 2527 } 2528 } else { 2529 $testchar = $is_string_starter[$char]; 2530 if ($testchar === substr($part, $i, strlen($testchar))) { 2531 $char = $testchar; 2532 $string_started = true; 2533 } 2534 } 2535 $char_len = strlen($char); 2536 } 2537 2538 if ($string_started && ($i != $next_comment_regexp_pos)) { 2539 // Hand out the correct style information for this string 2540 $string_key = array_search($char, $this->language_data['QUOTEMARKS']); 2541 if (!isset($this->language_data['STYLES']['STRINGS'][$string_key]) || 2542 !isset($this->language_data['STYLES']['ESCAPE_CHAR'][$string_key])) { 2543 $string_key = 0; 2544 } 2545 2546 // parse the stuff before this 2547 $result .= $this->parse_non_string_part($stuff_to_parse); 2548 $stuff_to_parse = ''; 2549 2550 if (!$this->use_classes) { 2551 $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS'][$string_key] . '"'; 2552 } else { 2553 $string_attributes = ' class="st'.$string_key.'"'; 2554 } 2555 2556 // now handle the string 2557 $string = "<span$string_attributes>" . GeSHi::hsc($char); 2558 $start = $i + $char_len; 2559 $string_open = true; 2560 2561 if(empty($this->language_data['ESCAPE_REGEXP'])) { 2562 $next_escape_regexp_pos = $length; 2563 } 2564 2565 do { 2566 //Get the regular ending pos ... 2567 $close_pos = strpos($part, $char, $start); 2568 if(false === $close_pos) { 2569 $close_pos = $length; 2570 } 2571 2572 if($this->lexic_permissions['ESCAPE_CHAR']) { 2573 // update escape regexp cache if needed 2574 if (isset($this->language_data['ESCAPE_REGEXP']) && $next_escape_regexp_pos < $start) { 2575 $next_escape_regexp_pos = $length; 2576 foreach ($this->language_data['ESCAPE_REGEXP'] as $escape_key => $regexp) { 2577 $match_i = false; 2578 if (isset($escape_regexp_cache_per_key[$escape_key]) && 2579 ($escape_regexp_cache_per_key[$escape_key]['pos'] >= $start || 2580 $escape_regexp_cache_per_key[$escape_key]['pos'] === false)) { 2581 // we have already matched something 2582 if ($escape_regexp_cache_per_key[$escape_key]['pos'] === false) { 2583 // this comment is never matched 2584 continue; 2585 } 2586 $match_i = $escape_regexp_cache_per_key[$escape_key]['pos']; 2587 } elseif (preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $start)) { 2588 $match_i = $match[0][1]; 2589 2590 $escape_regexp_cache_per_key[$escape_key] = array( 2591 'key' => $escape_key, 2592 'length' => strlen($match[0][0]), 2593 'pos' => $match_i 2594 ); 2595 } else { 2596 $escape_regexp_cache_per_key[$escape_key]['pos'] = false; 2597 continue; 2598 } 2599 2600 if ($match_i !== false && $match_i < $next_escape_regexp_pos) { 2601 $next_escape_regexp_pos = $match_i; 2602 $next_escape_regexp_key = $escape_key; 2603 if ($match_i === $start) { 2604 break; 2605 } 2606 } 2607 } 2608 } 2609 2610 //Find the next simple escape position 2611 if('' != $this->language_data['ESCAPE_CHAR']) { 2612 $simple_escape = strpos($part, $this->language_data['ESCAPE_CHAR'], $start); 2613 if(false === $simple_escape) { 2614 $simple_escape = $length; 2615 } 2616 } else { 2617 $simple_escape = $length; 2618 } 2619 } else { 2620 $next_escape_regexp_pos = $length; 2621 $simple_escape = $length; 2622 } 2623 2624 if($simple_escape < $next_escape_regexp_pos && 2625 $simple_escape < $length && 2626 $simple_escape < $close_pos) { 2627 //The nexxt escape sequence is a simple one ... 2628 $es_pos = $simple_escape; 2629 2630 //Add the stuff not in the string yet ... 2631 $string .= $this->hsc(substr($part, $start, $es_pos - $start)); 2632 2633 //Get the style for this escaped char ... 2634 if (!$this->use_classes) { 2635 $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][0] . '"'; 2636 } else { 2637 $escape_char_attributes = ' class="es0"'; 2638 } 2639 2640 //Add the style for the escape char ... 2641 $string .= "<span$escape_char_attributes>" . 2642 GeSHi::hsc($this->language_data['ESCAPE_CHAR']); 2643 2644 //Get the byte AFTER the ESCAPE_CHAR we just found 2645 $es_char = $part[$es_pos + 1]; 2646 if ($es_char == "\n") { 2647 // don't put a newline around newlines 2648 $string .= "</span>\n"; 2649 $start = $es_pos + 2; 2650 } elseif (ord($es_char) >= 128) { 2651 //This is an non-ASCII char (UTF8 or single byte) 2652 //This code tries to work around SF#2037598 ... 2653 if(function_exists('mb_substr')) { 2654 $es_char_m = mb_substr(substr($part, $es_pos+1, 16), 0, 1, $this->encoding); 2655 $string .= $es_char_m . '</span>'; 2656 } elseif ('utf-8' == $this->encoding) { 2657 if(preg_match("/[\xC2-\xDF][\x80-\xBF]". 2658 "|\xE0[\xA0-\xBF][\x80-\xBF]". 2659 "|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}". 2660 "|\xED[\x80-\x9F][\x80-\xBF]". 2661 "|\xF0[\x90-\xBF][\x80-\xBF]{2}". 2662 "|[\xF1-\xF3][\x80-\xBF]{3}". 2663 "|\xF4[\x80-\x8F][\x80-\xBF]{2}/s", 2664 $part, $es_char_m, null, $es_pos + 1)) { 2665 $es_char_m = $es_char_m[0]; 2666 } else { 2667 $es_char_m = $es_char; 2668 } 2669 $string .= $this->hsc($es_char_m) . '</span>'; 2670 } else { 2671 $es_char_m = $this->hsc($es_char); 2672 } 2673 $start = $es_pos + strlen($es_char_m) + 1; 2674 } else { 2675 $string .= $this->hsc($es_char) . '</span>'; 2676 $start = $es_pos + 2; 2677 } 2678 } elseif ($next_escape_regexp_pos < $length && 2679 $next_escape_regexp_pos < $close_pos) { 2680 $es_pos = $next_escape_regexp_pos; 2681 //Add the stuff not in the string yet ... 2682 $string .= $this->hsc(substr($part, $start, $es_pos - $start)); 2683 2684 //Get the key and length of this match ... 2685 $escape = $escape_regexp_cache_per_key[$next_escape_regexp_key]; 2686 $escape_str = substr($part, $es_pos, $escape['length']); 2687 $escape_key = $escape['key']; 2688 2689 //Get the style for this escaped char ... 2690 if (!$this->use_classes) { 2691 $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][$escape_key] . '"'; 2692 } else { 2693 $escape_char_attributes = ' class="es' . $escape_key . '"'; 2694 } 2695 2696 //Add the style for the escape char ... 2697 $string .= "<span$escape_char_attributes>" . 2698 $this->hsc($escape_str) . '</span>'; 2699 2700 $start = $es_pos + $escape['length']; 2701 } else { 2702 //Copy the remainder of the string ... 2703 $string .= $this->hsc(substr($part, $start, $close_pos - $start + $char_len)) . '</span>'; 2704 $start = $close_pos + $char_len; 2705 $string_open = false; 2706 } 2707 } while($string_open); 2708 2709 if ($check_linenumbers) { 2710 // Are line numbers used? If, we should end the string before 2711 // the newline and begin it again (so when <li>s are put in the source 2712 // remains XHTML compliant) 2713 // note to self: This opens up possibility of config files specifying 2714 // that languages can/cannot have multiline strings??? 2715 $string = str_replace("\n", "</span>\n<span$string_attributes>", $string); 2716 } 2717 2718 $result .= $string; 2719 $string = ''; 2720 $i = $start - 1; 2721 continue; 2722 } elseif ($this->lexic_permissions['STRINGS'] && $hq && $hq[0] == $char && 2723 substr($part, $i, $hq_strlen) == $hq && ($i != $next_comment_regexp_pos)) { 2724 // The start of a hard quoted string 2725 if (!$this->use_classes) { 2726 $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS']['HARD'] . '"'; 2727 $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR']['HARD'] . '"'; 2728 } else { 2729 $string_attributes = ' class="st_h"'; 2730 $escape_char_attributes = ' class="es_h"'; 2731 } 2732 // parse the stuff before this 2733 $result .= $this->parse_non_string_part($stuff_to_parse); 2734 $stuff_to_parse = ''; 2735 2736 // now handle the string 2737 $string = ''; 2738 2739 // look for closing quote 2740 $start = $i + $hq_strlen; 2741 while ($close_pos = strpos($part, $this->language_data['HARDQUOTE'][1], $start)) { 2742 $start = $close_pos + 1; 2743 if ($this->lexic_permissions['ESCAPE_CHAR'] && $part[$close_pos - 1] == $this->language_data['HARDCHAR'] && 2744 (($i + $hq_strlen) != ($close_pos))) { //Support empty string for HQ escapes if Starter = Escape 2745 // make sure this quote is not escaped 2746 foreach ($this->language_data['HARDESCAPE'] as $hardescape) { 2747 if (substr($part, $close_pos - 1, strlen($hardescape)) == $hardescape) { 2748 // check wether this quote is escaped or if it is something like '\\' 2749 $escape_char_pos = $close_pos - 1; 2750 while ($escape_char_pos > 0 2751 && $part[$escape_char_pos - 1] == $this->language_data['HARDCHAR']) { 2752 --$escape_char_pos; 2753 } 2754 if (($close_pos - $escape_char_pos) & 1) { 2755 // uneven number of escape chars => this quote is escaped 2756 continue 2; 2757 } 2758 } 2759 } 2760 } 2761 2762 // found closing quote 2763 break; 2764 } 2765 2766 //Found the closing delimiter? 2767 if (!$close_pos) { 2768 // span till the end of this $part when no closing delimiter is found 2769 $close_pos = $length; 2770 } 2771 2772 //Get the actual string 2773 $string = substr($part, $i, $close_pos - $i + 1); 2774 $i = $close_pos; 2775 2776 // handle escape chars and encode html chars 2777 // (special because when we have escape chars within our string they may not be escaped) 2778 if ($this->lexic_permissions['ESCAPE_CHAR'] && $this->language_data['ESCAPE_CHAR']) { 2779 $start = 0; 2780 $new_string = ''; 2781 while ($es_pos = strpos($string, $this->language_data['ESCAPE_CHAR'], $start)) { 2782 // hmtl escape stuff before 2783 $new_string .= $this->hsc(substr($string, $start, $es_pos - $start)); 2784 // check if this is a hard escape 2785 foreach ($this->language_data['HARDESCAPE'] as $hardescape) { 2786 if (substr($string, $es_pos, strlen($hardescape)) == $hardescape) { 2787 // indeed, this is a hardescape 2788 $new_string .= "<span$escape_char_attributes>" . 2789 $this->hsc($hardescape) . '</span>'; 2790 $start = $es_pos + strlen($hardescape); 2791 continue 2; 2792 } 2793 } 2794 // not a hard escape, but a normal escape 2795 // they come in pairs of two 2796 $c = 0; 2797 while (isset($string[$es_pos + $c]) && isset($string[$es_pos + $c + 1]) 2798 && $string[$es_pos + $c] == $this->language_data['ESCAPE_CHAR'] 2799 && $string[$es_pos + $c + 1] == $this->language_data['ESCAPE_CHAR']) { 2800 $c += 2; 2801 } 2802 if ($c) { 2803 $new_string .= "<span$escape_char_attributes>" . 2804 str_repeat($escaped_escape_char, $c) . 2805 '</span>'; 2806 $start = $es_pos + $c; 2807 } else { 2808 // this is just a single lonely escape char... 2809 $new_string .= $escaped_escape_char; 2810 $start = $es_pos + 1; 2811 } 2812 } 2813 $string = $new_string . $this->hsc(substr($string, $start)); 2814 } else { 2815 $string = $this->hsc($string); 2816 } 2817 2818 if ($check_linenumbers) { 2819 // Are line numbers used? If, we should end the string before 2820 // the newline and begin it again (so when <li>s are put in the source 2821 // remains XHTML compliant) 2822 // note to self: This opens up possibility of config files specifying 2823 // that languages can/cannot have multiline strings??? 2824 $string = str_replace("\n", "</span>\n<span$string_attributes>", $string); 2825 } 2826 2827 $result .= "<span$string_attributes>" . $string . '</span>'; 2828 $string = ''; 2829 continue; 2830 } else { 2831 //Have a look for regexp comments 2832 if ($i == $next_comment_regexp_pos) { 2833 $COMMENT_MATCHED = true; 2834 $comment = $comment_regexp_cache_per_key[$next_comment_regexp_key]; 2835 $test_str = $this->hsc(substr($part, $i, $comment['length'])); 2836 2837 //@todo If remove important do remove here 2838 if ($this->lexic_permissions['COMMENTS']['MULTI']) { 2839 if (!$this->use_classes) { 2840 $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment['key']] . '"'; 2841 } else { 2842 $attributes = ' class="co' . $comment['key'] . '"'; 2843 } 2844 2845 $test_str = "<span$attributes>" . $test_str . "</span>"; 2846 2847 // Short-cut through all the multiline code 2848 if ($check_linenumbers) { 2849 // strreplace to put close span and open span around multiline newlines 2850 $test_str = str_replace( 2851 "\n", "</span>\n<span$attributes>", 2852 str_replace("\n ", "\n ", $test_str) 2853 ); 2854 } 2855 } 2856 2857 $i += $comment['length'] - 1; 2858 2859 // parse the rest 2860 $result .= $this->parse_non_string_part($stuff_to_parse); 2861 $stuff_to_parse = ''; 2862 } 2863 2864 // If we haven't matched a regexp comment, try multi-line comments 2865 if (!$COMMENT_MATCHED) { 2866 // Is this a multiline comment? 2867 if (!empty($this->language_data['COMMENT_MULTI']) && $next_comment_multi_pos < $i) { 2868 $next_comment_multi_pos = $length; 2869 foreach ($this->language_data['COMMENT_MULTI'] as $open => $close) { 2870 $match_i = false; 2871 if (isset($comment_multi_cache_per_key[$open]) && 2872 ($comment_multi_cache_per_key[$open] >= $i || 2873 $comment_multi_cache_per_key[$open] === false)) { 2874 // we have already matched something 2875 if ($comment_multi_cache_per_key[$open] === false) { 2876 // this comment is never matched 2877 continue; 2878 } 2879 $match_i = $comment_multi_cache_per_key[$open]; 2880 } elseif (($match_i = stripos($part, $open, $i)) !== false) { 2881 $comment_multi_cache_per_key[$open] = $match_i; 2882 } else { 2883 $comment_multi_cache_per_key[$open] = false; 2884 continue; 2885 } 2886 if ($match_i !== false && $match_i < $next_comment_multi_pos) { 2887 $next_comment_multi_pos = $match_i; 2888 $next_open_comment_multi = $open; 2889 if ($match_i === $i) { 2890 break; 2891 } 2892 } 2893 } 2894 } 2895 if ($i == $next_comment_multi_pos) { 2896 $open = $next_open_comment_multi; 2897 $close = $this->language_data['COMMENT_MULTI'][$open]; 2898 $open_strlen = strlen($open); 2899 $close_strlen = strlen($close); 2900 $COMMENT_MATCHED = true; 2901 $test_str_match = $open; 2902 //@todo If remove important do remove here 2903 if ($this->lexic_permissions['COMMENTS']['MULTI'] || 2904 $open == GESHI_START_IMPORTANT) { 2905 if ($open != GESHI_START_IMPORTANT) { 2906 if (!$this->use_classes) { 2907 $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS']['MULTI'] . '"'; 2908 } else { 2909 $attributes = ' class="coMULTI"'; 2910 } 2911 $test_str = "<span$attributes>" . $this->hsc($open); 2912 } else { 2913 if (!$this->use_classes) { 2914 $attributes = ' style="' . $this->important_styles . '"'; 2915 } else { 2916 $attributes = ' class="imp"'; 2917 } 2918 2919 // We don't include the start of the comment if it's an 2920 // "important" part 2921 $test_str = "<span$attributes>"; 2922 } 2923 } else { 2924 $test_str = $this->hsc($open); 2925 } 2926 2927 $close_pos = strpos( $part, $close, $i + $open_strlen ); 2928 2929 if ($close_pos === false) { 2930 $close_pos = $length; 2931 } 2932 2933 // Short-cut through all the multiline code 2934 $rest_of_comment = $this->hsc(substr($part, $i + $open_strlen, $close_pos - $i - $open_strlen + $close_strlen)); 2935 if (($this->lexic_permissions['COMMENTS']['MULTI'] || 2936 $test_str_match == GESHI_START_IMPORTANT) && 2937 $check_linenumbers) { 2938 2939 // strreplace to put close span and open span around multiline newlines 2940 $test_str .= str_replace( 2941 "\n", "</span>\n<span$attributes>", 2942 str_replace("\n ", "\n ", $rest_of_comment) 2943 ); 2944 } else { 2945 $test_str .= $rest_of_comment; 2946 } 2947 2948 if ($this->lexic_permissions['COMMENTS']['MULTI'] || 2949 $test_str_match == GESHI_START_IMPORTANT) { 2950 $test_str .= '</span>'; 2951 } 2952 2953 $i = $close_pos + $close_strlen - 1; 2954 2955 // parse the rest 2956 $result .= $this->parse_non_string_part($stuff_to_parse); 2957 $stuff_to_parse = ''; 2958 } 2959 } 2960 2961 // If we haven't matched a multiline comment, try single-line comments 2962 if (!$COMMENT_MATCHED) { 2963 // cache potential single line comment occurances 2964 if (!empty($this->language_data['COMMENT_SINGLE']) && $next_comment_single_pos < $i) { 2965 $next_comment_single_pos = $length; 2966 foreach ($this->language_data['COMMENT_SINGLE'] as $comment_key => $comment_mark) { 2967 $match_i = false; 2968 if (isset($comment_single_cache_per_key[$comment_key]) && 2969 ($comment_single_cache_per_key[$comment_key] >= $i || 2970 $comment_single_cache_per_key[$comment_key] === false)) { 2971 // we have already matched something 2972 if ($comment_single_cache_per_key[$comment_key] === false) { 2973 // this comment is never matched 2974 continue; 2975 } 2976 $match_i = $comment_single_cache_per_key[$comment_key]; 2977 } elseif ( 2978 // case sensitive comments 2979 ($this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] && 2980 ($match_i = stripos($part, $comment_mark, $i)) !== false) || 2981 // non case sensitive 2982 (!$this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] && 2983 (($match_i = strpos($part, $comment_mark, $i)) !== false))) { 2984 $comment_single_cache_per_key[$comment_key] = $match_i; 2985 } else { 2986 $comment_single_cache_per_key[$comment_key] = false; 2987 continue; 2988 } 2989 if ($match_i !== false && $match_i < $next_comment_single_pos) { 2990 $next_comment_single_pos = $match_i; 2991 $next_comment_single_key = $comment_key; 2992 if ($match_i === $i) { 2993 break; 2994 } 2995 } 2996 } 2997 } 2998 if ($next_comment_single_pos == $i) { 2999 $comment_key = $next_comment_single_key; 3000 $comment_mark = $this->language_data['COMMENT_SINGLE'][$comment_key]; 3001 $com_len = strlen($comment_mark); 3002 3003 // This check will find special variables like $# in bash 3004 // or compiler directives of Delphi beginning {$ 3005 if ((empty($sc_disallowed_before) || ($i == 0) || 3006 (false === strpos($sc_disallowed_before, $part[$i-1]))) && 3007 (empty($sc_disallowed_after) || ($length <= $i + $com_len) || 3008 (false === strpos($sc_disallowed_after, $part[$i + $com_len])))) 3009 { 3010 // this is a valid comment 3011 $COMMENT_MATCHED = true; 3012 if ($this->lexic_permissions['COMMENTS'][$comment_key]) { 3013 if (!$this->use_classes) { 3014 $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment_key] . '"'; 3015 } else { 3016 $attributes = ' class="co' . $comment_key . '"'; 3017 } 3018 $test_str = "<span$attributes>" . $this->hsc($this->change_case($comment_mark)); 3019 } else { 3020 $test_str = $this->hsc($comment_mark); 3021 } 3022 3023 //Check if this comment is the last in the source 3024 $close_pos = strpos($part, "\n", $i); 3025 $oops = false; 3026 if ($close_pos === false) { 3027 $close_pos = $length; 3028 $oops = true; 3029 } 3030 $test_str .= $this->hsc(substr($part, $i + $com_len, $close_pos - $i - $com_len)); 3031 if ($this->lexic_permissions['COMMENTS'][$comment_key]) { 3032 $test_str .= "</span>"; 3033 } 3034 3035 // Take into account that the comment might be the last in the source 3036 if (!$oops) { 3037 $test_str .= "\n"; 3038 } 3039 3040 $i = $close_pos; 3041 3042 // parse the rest 3043 $result .= $this->parse_non_string_part($stuff_to_parse); 3044 $stuff_to_parse = ''; 3045 } 3046 } 3047 } 3048 } 3049 3050 // Where are we adding this char? 3051 if (!$COMMENT_MATCHED) { 3052 $stuff_to_parse .= $char; 3053 } else { 3054 $result .= $test_str; 3055 unset($test_str); 3056 $COMMENT_MATCHED = false; 3057 } 3058 } 3059 // Parse the last bit 3060 $result .= $this->parse_non_string_part($stuff_to_parse); 3061 $stuff_to_parse = ''; 3062 } else { 3063 $result .= $this->hsc($part); 3064 } 3065 // Close the <span> that surrounds the block 3066 if ($STRICTATTRS != '') { 3067 $result = str_replace("\n", "</span>\n<span$STRICTATTRS>", $result); 3068 $result .= '</span>'; 3069 } 3070 3071 $endresult .= $result; 3072 unset($part, $parts[$key], $result); 3073 } 3074 3075 //This fix is related to SF#1923020, but has to be applied regardless of 3076 //actually highlighting symbols. 3077 /** NOTE: memorypeak #3 */ 3078 $endresult = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $endresult); 3079 3080// // Parse the last stuff (redundant?) 3081// $result .= $this->parse_non_string_part($stuff_to_parse); 3082 3083 // Lop off the very first and last spaces 3084// $result = substr($result, 1, -1); 3085 3086 // We're finished: stop timing 3087 $this->set_time($start_time, microtime()); 3088 3089 $this->finalise($endresult); 3090 return $endresult; 3091 } 3092 3093 /** 3094 * Swaps out spaces and tabs for HTML indentation. Not needed if 3095 * the code is in a pre block... 3096 * 3097 * @param string $result The source to indent (reference!) 3098 * @since 1.0.0 3099 */ 3100 protected function indent(&$result) { 3101 /// Replace tabs with the correct number of spaces 3102 if (false !== strpos($result, "\t")) { 3103 $lines = explode("\n", $result); 3104 $result = null;//Save memory while we process the lines individually 3105 $tab_width = $this->get_real_tab_width(); 3106 $tab_string = ' ' . str_repeat(' ', $tab_width); 3107 3108 for ($key = 0, $n = count($lines); $key < $n; $key++) { 3109 $line = $lines[$key]; 3110 if (false === strpos($line, "\t")) { 3111 continue; 3112 } 3113 3114 $pos = 0; 3115 $length = strlen($line); 3116 $lines[$key] = ''; // reduce memory 3117 3118 $IN_TAG = false; 3119 for ($i = 0; $i < $length; ++$i) { 3120 $char = $line[$i]; 3121 // Simple engine to work out whether we're in a tag. 3122 // If we are we modify $pos. This is so we ignore HTML 3123 // in the line and only workout the tab replacement 3124 // via the actual content of the string 3125 // This test could be improved to include strings in the 3126 // html so that < or > would be allowed in user's styles 3127 // (e.g. quotes: '<' '>'; or similar) 3128 if ($IN_TAG) { 3129 if ('>' == $char) { 3130 $IN_TAG = false; 3131 } 3132 $lines[$key] .= $char; 3133 } elseif ('<' == $char) { 3134 $IN_TAG = true; 3135 $lines[$key] .= '<'; 3136 } elseif ('&' == $char) { 3137 $substr = substr($line, $i + 3, 5); 3138 $posi = strpos($substr, ';'); 3139 if (false === $posi) { 3140 ++$pos; 3141 } else { 3142 $pos -= $posi+2; 3143 } 3144 $lines[$key] .= $char; 3145 } elseif ("\t" == $char) { 3146 $str = ''; 3147 // OPTIMISE - move $strs out. Make an array: 3148 // $tabs = array( 3149 // 1 => ' ', 3150 // 2 => ' ', 3151 // 3 => ' ' etc etc 3152 // to use instead of building a string every time 3153 $tab_end_width = $tab_width - ($pos % $tab_width); //Moved out of the look as it doesn't change within the loop 3154 if (($pos & 1) || 1 == $tab_end_width) { 3155 $str .= substr($tab_string, 6, $tab_end_width); 3156 } else { 3157 $str .= substr($tab_string, 0, $tab_end_width+5); 3158 } 3159 $lines[$key] .= $str; 3160 $pos += $tab_end_width; 3161 3162 if (false === strpos($line, "\t", $i + 1)) { 3163 $lines[$key] .= substr($line, $i + 1); 3164 break; 3165 } 3166 } elseif (0 == $pos && ' ' == $char) { 3167 $lines[$key] .= ' '; 3168 ++$pos; 3169 } else { 3170 $lines[$key] .= $char; 3171 ++$pos; 3172 } 3173 } 3174 } 3175 $result = implode("\n", $lines); 3176 unset($lines);//We don't need the lines separated beyond this --- free them! 3177 } 3178 // Other whitespace 3179 // BenBE: Fix to reduce the number of replacements to be done 3180 $result = preg_replace('/^ /m', ' ', $result); 3181 $result = str_replace(' ', ' ', $result); 3182 3183 if ($this->line_numbers == GESHI_NO_LINE_NUMBERS && $this->header_type != GESHI_HEADER_PRE_TABLE) { 3184 if ($this->line_ending === null) { 3185 $result = nl2br($result); 3186 } else { 3187 $result = str_replace("\n", $this->line_ending, $result); 3188 } 3189 } 3190 } 3191 3192 /** 3193 * Changes the case of a keyword for those languages where a change is asked for 3194 * 3195 * @param string $instr The keyword to change the case of 3196 * @return string The keyword with its case changed 3197 * @since 1.0.0 3198 */ 3199 protected function change_case($instr) { 3200 switch ($this->language_data['CASE_KEYWORDS']) { 3201 case GESHI_CAPS_UPPER: 3202 return strtoupper($instr); 3203 case GESHI_CAPS_LOWER: 3204 return strtolower($instr); 3205 default: 3206 return $instr; 3207 } 3208 } 3209 3210 /** 3211 * Handles replacements of keywords to include markup and links if requested 3212 * 3213 * @param string $match The keyword to add the Markup to 3214 * @return string The HTML for the match found 3215 * @since 1.0.8 3216 * 3217 * @todo Get rid of ender in keyword links 3218 */ 3219 protected function handle_keyword_replace($match) { 3220 $k = $this->_kw_replace_group; 3221 $keyword = $match[0]; 3222 $keyword_match = $match[1]; 3223 3224 $before = ''; 3225 $after = ''; 3226 3227 if ($this->keyword_links) { 3228 // Keyword links have been ebabled 3229 3230 if (isset($this->language_data['URLS'][$k]) && 3231 $this->language_data['URLS'][$k] != '') { 3232 // There is a base group for this keyword 3233 3234 // Old system: strtolower 3235 //$keyword = ( $this->language_data['CASE_SENSITIVE'][$group] ) ? $keyword : strtolower($keyword); 3236 // New system: get keyword from language file to get correct case 3237 if (!$this->language_data['CASE_SENSITIVE'][$k] && 3238 strpos($this->language_data['URLS'][$k], '{FNAME}') !== false) { 3239 foreach ($this->language_data['KEYWORDS'][$k] as $word) { 3240 if (strcasecmp($word, $keyword_match) == 0) { 3241 break; 3242 } 3243 } 3244 } else { 3245 $word = $keyword_match; 3246 } 3247 3248 $before = '<|UR1|"' . 3249 str_replace( 3250 array( 3251 '{FNAME}', 3252 '{FNAMEL}', 3253 '{FNAMEU}', 3254 '{FNAMEUF}', 3255 '.'), 3256 array( 3257 str_replace('+', '%20', urlencode($this->hsc($word))), 3258 str_replace('+', '%20', urlencode($this->hsc(strtolower($word)))), 3259 str_replace('+', '%20', urlencode($this->hsc(strtoupper($word)))), 3260 str_replace('+', '%20', urlencode($this->hsc(ucfirst($word)))), 3261 '<DOT>'), 3262 $this->language_data['URLS'][$k] 3263 ) . '">'; 3264 $after = '</a>'; 3265 } 3266 } 3267 3268 return $before . '<|/'. $k .'/>' . $this->change_case($keyword) . '|>' . $after; 3269 } 3270 3271 /** 3272 * handles regular expressions highlighting-definitions with callback functions 3273 * 3274 * @note this is a callback, don't use it directly 3275 * 3276 * @param array $matches the matches array 3277 * @return string The highlighted string 3278 * @since 1.0.8 3279 */ 3280 protected function handle_regexps_callback($matches) { 3281 // before: "' style=\"' . call_user_func(\"$func\", '\\1') . '\"\\1|>'", 3282 return ' style="' . call_user_func($this->language_data['STYLES']['REGEXPS'][$this->_rx_key], $matches[1]) . '"'. $matches[1] . '|>'; 3283 } 3284 3285 /** 3286 * handles newlines in REGEXPS matches. Set the _hmr_* vars before calling this 3287 * 3288 * @note this is a callback, don't use it directly 3289 * 3290 * @param array $matches the matches array 3291 * @return string 3292 * @since 1.0.8 3293 */ 3294 protected function handle_multiline_regexps($matches) { 3295 $before = $this->_hmr_before; 3296 $after = $this->_hmr_after; 3297 if ($this->_hmr_replace) { 3298 $replace = $this->_hmr_replace; 3299 $search = array(); 3300 3301 foreach (array_keys($matches) as $k) { 3302 $search[] = '\\' . $k; 3303 } 3304 3305 $before = str_replace($search, $matches, $before); 3306 $after = str_replace($search, $matches, $after); 3307 $replace = str_replace($search, $matches, $replace); 3308 } else { 3309 $replace = $matches[0]; 3310 } 3311 return $before 3312 . '<|!REG3XP' . $this->_hmr_key .'!>' 3313 . str_replace("\n", "|>\n<|!REG3XP" . $this->_hmr_key . '!>', $replace) 3314 . '|>' 3315 . $after; 3316 } 3317 3318 /** 3319 * Takes a string that has no strings or comments in it, and highlights 3320 * stuff like keywords, numbers and methods. 3321 * 3322 * @param string $stuff_to_parse The string to parse for keyword, numbers etc. 3323 * @since 1.0.0 3324 * @todo BUGGY! Why? Why not build string and return? 3325 * @return string 3326 */ 3327 protected function parse_non_string_part($stuff_to_parse) { 3328 $stuff_to_parse = ' ' . $this->hsc($stuff_to_parse); 3329 3330 // Highlight keywords 3331 $disallowed_before = "(?<![a-zA-Z0-9\$_\|\#|^&"; 3332 $disallowed_after = "(?![a-zA-Z0-9_\|%\\-&;"; 3333 if ($this->lexic_permissions['STRINGS']) { 3334 $quotemarks = preg_quote(implode($this->language_data['QUOTEMARKS']), '/'); 3335 $disallowed_before .= $quotemarks; 3336 $disallowed_after .= $quotemarks; 3337 } 3338 $disallowed_before .= "])"; 3339 $disallowed_after .= "])"; 3340 3341 $parser_control_pergroup = false; 3342 if (isset($this->language_data['PARSER_CONTROL'])) { 3343 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) { 3344 $x = 0; // check wether per-keyword-group parser_control is enabled 3345 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'])) { 3346 $disallowed_before = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE']; 3347 ++$x; 3348 } 3349 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'])) { 3350 $disallowed_after = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER']; 3351 ++$x; 3352 } 3353 $parser_control_pergroup = (count($this->language_data['PARSER_CONTROL']['KEYWORDS']) - $x) > 0; 3354 } 3355 } 3356 3357 foreach (array_keys($this->language_data['KEYWORDS']) as $k) { 3358 if (!isset($this->lexic_permissions['KEYWORDS'][$k]) || 3359 $this->lexic_permissions['KEYWORDS'][$k]) { 3360 3361 $case_sensitive = $this->language_data['CASE_SENSITIVE'][$k]; 3362 $modifiers = $case_sensitive ? '' : 'i'; 3363 3364 // NEW in 1.0.8 - per-keyword-group parser control 3365 $disallowed_before_local = $disallowed_before; 3366 $disallowed_after_local = $disallowed_after; 3367 if ($parser_control_pergroup && isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k])) { 3368 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'])) { 3369 $disallowed_before_local = 3370 $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE']; 3371 } 3372 3373 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'])) { 3374 $disallowed_after_local = 3375 $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER']; 3376 } 3377 } 3378 3379 $this->_kw_replace_group = $k; 3380 3381 //NEW in 1.0.8, the cached regexp list 3382 // since we don't want PHP / PCRE to crash due to too large patterns we split them into smaller chunks 3383 for ($set = 0, $set_length = count($this->language_data['CACHED_KEYWORD_LISTS'][$k]); $set < $set_length; ++$set) { 3384 $keywordset =& $this->language_data['CACHED_KEYWORD_LISTS'][$k][$set]; 3385 // Might make a more unique string for putting the number in soon 3386 // Basically, we don't put the styles in yet because then the styles themselves will 3387 // get highlighted if the language has a CSS keyword in it (like CSS, for example ;)) 3388 $stuff_to_parse = preg_replace_callback( 3389 "/$disallowed_before_local({$keywordset})(?!\<DOT\>(?:htm|php|aspx?))$disallowed_after_local/$modifiers", 3390 array($this, 'handle_keyword_replace'), 3391 $stuff_to_parse 3392 ); 3393 } 3394 } 3395 } 3396 3397 // Regular expressions 3398 foreach ($this->language_data['REGEXPS'] as $key => $regexp) { 3399 if ($this->lexic_permissions['REGEXPS'][$key]) { 3400 if (is_array($regexp)) { 3401 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) { 3402 // produce valid HTML when we match multiple lines 3403 $this->_hmr_replace = $regexp[GESHI_REPLACE]; 3404 $this->_hmr_before = $regexp[GESHI_BEFORE]; 3405 $this->_hmr_key = $key; 3406 $this->_hmr_after = $regexp[GESHI_AFTER]; 3407 $stuff_to_parse = preg_replace_callback( 3408 "/" . $regexp[GESHI_SEARCH] . "/{$regexp[GESHI_MODIFIERS]}", 3409 array($this, 'handle_multiline_regexps'), 3410 $stuff_to_parse); 3411 $this->_hmr_replace = false; 3412 $this->_hmr_before = ''; 3413 $this->_hmr_after = ''; 3414 } else { 3415 $stuff_to_parse = preg_replace( 3416 '/' . $regexp[GESHI_SEARCH] . '/' . $regexp[GESHI_MODIFIERS], 3417 $regexp[GESHI_BEFORE] . '<|!REG3XP'. $key .'!>' . $regexp[GESHI_REPLACE] . '|>' . $regexp[GESHI_AFTER], 3418 $stuff_to_parse); 3419 } 3420 } else { 3421 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) { 3422 // produce valid HTML when we match multiple lines 3423 $this->_hmr_key = $key; 3424 $stuff_to_parse = preg_replace_callback( "/(" . $regexp . ")/", 3425 array($this, 'handle_multiline_regexps'), $stuff_to_parse); 3426 $this->_hmr_key = ''; 3427 } else { 3428 $stuff_to_parse = preg_replace( "/(" . $regexp . ")/", "<|!REG3XP$key!>\\1|>", $stuff_to_parse); 3429 } 3430 } 3431 } 3432 } 3433 3434 // Highlight numbers. As of 1.0.8 we support different types of numbers 3435 $numbers_found = false; 3436 3437 if ($this->lexic_permissions['NUMBERS'] && preg_match($this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'], $stuff_to_parse )) { 3438 $numbers_found = true; 3439 3440 //For each of the formats ... 3441 foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) { 3442 //Check if it should be highlighted ... 3443 $stuff_to_parse = preg_replace($regexp, "<|/NUM!$id/>\\1|>", $stuff_to_parse); 3444 } 3445 } 3446 3447 // 3448 // Now that's all done, replace /[number]/ with the correct styles 3449 // 3450 foreach (array_keys($this->language_data['KEYWORDS']) as $k) { 3451 if (!$this->use_classes) { 3452 $attributes = ' style="' . 3453 (isset($this->language_data['STYLES']['KEYWORDS'][$k]) ? 3454 $this->language_data['STYLES']['KEYWORDS'][$k] : "") . '"'; 3455 } else { 3456 $attributes = ' class="kw' . $k . '"'; 3457 } 3458 $stuff_to_parse = str_replace("<|/$k/>", "<|$attributes>", $stuff_to_parse); 3459 } 3460 3461 if ($numbers_found) { 3462 // Put number styles in 3463 foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) { 3464 //Commented out for now, as this needs some review ... 3465 // if ($numbers_permissions & $id) { 3466 //Get the appropriate style ... 3467 //Checking for unset styles is done by the style cache builder ... 3468 if (!$this->use_classes) { 3469 $attributes = ' style="' . $this->language_data['STYLES']['NUMBERS'][$id] . '"'; 3470 } else { 3471 $attributes = ' class="nu'.$id.'"'; 3472 } 3473 3474 //Set in the correct styles ... 3475 $stuff_to_parse = str_replace("/NUM!$id/", $attributes, $stuff_to_parse); 3476 // } 3477 } 3478 } 3479 3480 // Highlight methods and fields in objects 3481 if ($this->lexic_permissions['METHODS'] && $this->language_data['OOLANG']) { 3482 $oolang_spaces = "[\s]*"; 3483 $oolang_before = ""; 3484 $oolang_after = "[a-zA-Z][a-zA-Z0-9_]*"; 3485 if (isset($this->language_data['PARSER_CONTROL'])) { 3486 if (isset($this->language_data['PARSER_CONTROL']['OOLANG'])) { 3487 if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'])) { 3488 $oolang_before = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE']; 3489 } 3490 if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'])) { 3491 $oolang_after = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER']; 3492 } 3493 if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'])) { 3494 $oolang_spaces = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES']; 3495 } 3496 } 3497 } 3498 3499 foreach ($this->language_data['OBJECT_SPLITTERS'] as $key => $splitter) { 3500 if (false !== strpos($stuff_to_parse, $splitter)) { 3501 if (!$this->use_classes) { 3502 $attributes = ' style="' . $this->language_data['STYLES']['METHODS'][$key] . '"'; 3503 } else { 3504 $attributes = ' class="me' . $key . '"'; 3505 } 3506 $stuff_to_parse = preg_replace("/($oolang_before)(" . preg_quote($this->language_data['OBJECT_SPLITTERS'][$key], '/') . ")($oolang_spaces)($oolang_after)/", "\\1\\2\\3<|$attributes>\\4|>", $stuff_to_parse); 3507 } 3508 } 3509 } 3510 3511 // 3512 // Highlight brackets. Yes, I've tried adding a semi-colon to this list. 3513 // You try it, and see what happens ;) 3514 // TODO: Fix lexic permissions not converting entities if shouldn't 3515 // be highlighting regardless 3516 // 3517 if ($this->lexic_permissions['BRACKETS']) { 3518 $stuff_to_parse = str_replace( $this->language_data['CACHE_BRACKET_MATCH'], 3519 $this->language_data['CACHE_BRACKET_REPLACE'], $stuff_to_parse ); 3520 } 3521 3522 3523 //FIX for symbol highlighting ... 3524 if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) { 3525 //Get all matches and throw away those witin a block that is already highlighted... (i.e. matched by a regexp) 3526 $n_symbols = preg_match_all("/<\|(?:<DOT>|[^>])+>(?:(?!\|>).*?)\|>|<\/a>|(?:" . $this->language_data['SYMBOL_SEARCH'] . ")+(?![^<]+?>)/", $stuff_to_parse, $pot_symbols, PREG_OFFSET_CAPTURE | PREG_SET_ORDER); 3527 $global_offset = 0; 3528 for ($s_id = 0; $s_id < $n_symbols; ++$s_id) { 3529 $symbol_match = $pot_symbols[$s_id][0][0]; 3530 if (strpos($symbol_match, '<') !== false || strpos($symbol_match, '>') !== false) { 3531 // already highlighted blocks _must_ include either < or > 3532 // so if this conditional applies, we have to skip this match 3533 // BenBE: UNLESS the block contains <SEMI> or <PIPE> 3534 if(strpos($symbol_match, '<SEMI>') === false && 3535 strpos($symbol_match, '<PIPE>') === false) { 3536 continue; 3537 } 3538 } 3539 3540 // if we reach this point, we have a valid match which needs to be highlighted 3541 3542 $symbol_length = strlen($symbol_match); 3543 $symbol_offset = $pot_symbols[$s_id][0][1]; 3544 unset($pot_symbols[$s_id]); 3545 $symbol_hl = ""; 3546 3547 // if we have multiple styles, we have to handle them properly 3548 if ($this->language_data['MULTIPLE_SYMBOL_GROUPS']) { 3549 $old_sym = -1; 3550 // Split the current stuff to replace into its atomic symbols ... 3551 preg_match_all("/" . $this->language_data['SYMBOL_SEARCH'] . "/", $symbol_match, $sym_match_syms, PREG_PATTERN_ORDER); 3552 foreach ($sym_match_syms[0] as $sym_ms) { 3553 //Check if consequtive symbols belong to the same group to save output ... 3554 if (isset($this->language_data['SYMBOL_DATA'][$sym_ms]) 3555 && ($this->language_data['SYMBOL_DATA'][$sym_ms] != $old_sym)) { 3556 if (-1 != $old_sym) { 3557 $symbol_hl .= "|>"; 3558 } 3559 $old_sym = $this->language_data['SYMBOL_DATA'][$sym_ms]; 3560 if (!$this->use_classes) { 3561 $symbol_hl .= '<| style="' . $this->language_data['STYLES']['SYMBOLS'][$old_sym] . '">'; 3562 } else { 3563 $symbol_hl .= '<| class="sy' . $old_sym . '">'; 3564 } 3565 } 3566 $symbol_hl .= $sym_ms; 3567 } 3568 unset($sym_match_syms); 3569 3570 //Close remaining tags and insert the replacement at the right position ... 3571 //Take caution if symbol_hl is empty to avoid doubled closing spans. 3572 if (-1 != $old_sym) { 3573 $symbol_hl .= "|>"; 3574 } 3575 } else { 3576 if (!$this->use_classes) { 3577 $symbol_hl = '<| style="' . $this->language_data['STYLES']['SYMBOLS'][0] . '">'; 3578 } else { 3579 $symbol_hl = '<| class="sy0">'; 3580 } 3581 $symbol_hl .= $symbol_match . '|>'; 3582 } 3583 3584 $stuff_to_parse = substr_replace($stuff_to_parse, $symbol_hl, $symbol_offset + $global_offset, $symbol_length); 3585 3586 // since we replace old text with something of different size, 3587 // we'll have to keep track of the differences 3588 $global_offset += strlen($symbol_hl) - $symbol_length; 3589 } 3590 } 3591 //FIX for symbol highlighting ... 3592 3593 // Add class/style for regexps 3594 foreach (array_keys($this->language_data['REGEXPS']) as $key) { 3595 if ($this->lexic_permissions['REGEXPS'][$key]) { 3596 if (is_callable($this->language_data['STYLES']['REGEXPS'][$key])) { 3597 $this->_rx_key = $key; 3598 $stuff_to_parse = preg_replace_callback("/!REG3XP$key!(.*)\|>/U", 3599 array($this, 'handle_regexps_callback'), 3600 $stuff_to_parse); 3601 } else { 3602 if (!$this->use_classes) { 3603 $attributes = ' style="' . $this->language_data['STYLES']['REGEXPS'][$key] . '"'; 3604 } else { 3605 if (is_array($this->language_data['REGEXPS'][$key]) && 3606 array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$key])) { 3607 $attributes = ' class="' . 3608 $this->language_data['REGEXPS'][$key][GESHI_CLASS] . '"'; 3609 } else { 3610 $attributes = ' class="re' . $key . '"'; 3611 } 3612 } 3613 $stuff_to_parse = str_replace("!REG3XP$key!", "$attributes", $stuff_to_parse); 3614 } 3615 } 3616 } 3617 3618 // Replace <DOT> with . for urls 3619 $stuff_to_parse = str_replace('<DOT>', '.', $stuff_to_parse); 3620 // Replace <|UR1| with <a href= for urls also 3621 if (isset($this->link_styles[GESHI_LINK])) { 3622 if ($this->use_classes) { 3623 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse); 3624 } else { 3625 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' style="' . $this->link_styles[GESHI_LINK] . '" href=', $stuff_to_parse); 3626 } 3627 } else { 3628 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse); 3629 } 3630 3631 // 3632 // NOW we add the span thingy ;) 3633 // 3634 3635 $stuff_to_parse = str_replace('<|', '<span', $stuff_to_parse); 3636 $stuff_to_parse = str_replace ( '|>', '</span>', $stuff_to_parse ); 3637 return substr($stuff_to_parse, 1); 3638 } 3639 3640 /** 3641 * Sets the time taken to parse the code 3642 * 3643 * @param string $start_time The time when parsing started as returned by @see microtime() 3644 * @param string $end_time The time when parsing ended as returned by @see microtime() 3645 * @since 1.0.2 3646 */ 3647 protected function set_time($start_time, $end_time) { 3648 $start = explode(' ', $start_time); 3649 $end = explode(' ', $end_time); 3650 $this->time = $end[0] + $end[1] - $start[0] - $start[1]; 3651 } 3652 3653 /** 3654 * Gets the time taken to parse the code 3655 * 3656 * @return double The time taken to parse the code 3657 * @since 1.0.2 3658 */ 3659 public function get_time() { 3660 return $this->time; 3661 } 3662 3663 /** 3664 * Merges arrays recursively, overwriting values of the first array with values of later arrays 3665 * 3666 * @since 1.0.8 3667 */ 3668 protected function merge_arrays() { 3669 $arrays = func_get_args(); 3670 $narrays = count($arrays); 3671 3672 // check arguments 3673 // comment out if more performance is necessary (in this case the foreach loop will trigger a warning if the argument is not an array) 3674 for ($i = 0; $i < $narrays; $i ++) { 3675 if (!is_array($arrays[$i])) { 3676 // also array_merge_recursive returns nothing in this case 3677 trigger_error('Argument #' . ($i+1) . ' is not an array - trying to merge array with scalar! Returning false!', E_USER_WARNING); 3678 return false; 3679 } 3680 } 3681 3682 // the first array is in the output set in every case 3683 $ret = $arrays[0]; 3684 3685 // merege $ret with the remaining arrays 3686 for ($i = 1; $i < $narrays; $i ++) { 3687 foreach ($arrays[$i] as $key => $value) { 3688 if (is_array($value) && isset($ret[$key])) { 3689 // if $ret[$key] is not an array you try to merge an scalar value with an array - the result is not defined (incompatible arrays) 3690 // in this case the call will trigger an E_USER_WARNING and the $ret[$key] will be false. 3691 $ret[$key] = $this->merge_arrays($ret[$key], $value); 3692 } else { 3693 $ret[$key] = $value; 3694 } 3695 } 3696 } 3697 3698 return $ret; 3699 } 3700 3701 /** 3702 * Gets language information and stores it for later use 3703 * 3704 * @param string $file_name The filename of the language file you want to load 3705 * @since 1.0.0 3706 * @todo Needs to load keys for lexic permissions for keywords, regexps etc 3707 */ 3708 protected function load_language($file_name) { 3709 if ($file_name == $this->loaded_language) { 3710 // this file is already loaded! 3711 return; 3712 } 3713 3714 //Prepare some stuff before actually loading the language file 3715 $this->loaded_language = $file_name; 3716 $this->parse_cache_built = false; 3717 $this->enable_highlighting(); 3718 $language_data = array(); 3719 3720 //Load the language file 3721 require $file_name; 3722 3723 // Perhaps some checking might be added here later to check that 3724 // $language data is a valid thing but maybe not 3725 $this->language_data = $language_data; 3726 3727 // Set strict mode if should be set 3728 $this->strict_mode = $this->language_data['STRICT_MODE_APPLIES']; 3729 3730 // Set permissions for all lexics to true 3731 // so they'll be highlighted by default 3732 foreach (array_keys($this->language_data['KEYWORDS']) as $key) { 3733 if (!empty($this->language_data['KEYWORDS'][$key])) { 3734 $this->lexic_permissions['KEYWORDS'][$key] = true; 3735 } else { 3736 $this->lexic_permissions['KEYWORDS'][$key] = false; 3737 } 3738 } 3739 3740 foreach (array_keys($this->language_data['COMMENT_SINGLE']) as $key) { 3741 $this->lexic_permissions['COMMENTS'][$key] = true; 3742 } 3743 foreach (array_keys($this->language_data['REGEXPS']) as $key) { 3744 $this->lexic_permissions['REGEXPS'][$key] = true; 3745 } 3746 3747 // for BenBE and future code reviews: 3748 // we can use empty here since we only check for existance and emptiness of an array 3749 // if it is not an array at all but rather false or null this will work as intended as well 3750 // even if $this->language_data['PARSER_CONTROL'] is undefined this won't trigger a notice 3751 if (!empty($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'])) { 3752 foreach ($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'] as $flag => $value) { 3753 // it's either true or false and maybe is true as well 3754 $perm = $value !== GESHI_NEVER; 3755 if ($flag == 'ALL') { 3756 $this->enable_highlighting($perm); 3757 continue; 3758 } 3759 if (!isset($this->lexic_permissions[$flag])) { 3760 // unknown lexic permission 3761 continue; 3762 } 3763 if (is_array($this->lexic_permissions[$flag])) { 3764 foreach ($this->lexic_permissions[$flag] as $key => $val) { 3765 $this->lexic_permissions[$flag][$key] = $perm; 3766 } 3767 } else { 3768 $this->lexic_permissions[$flag] = $perm; 3769 } 3770 } 3771 unset($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS']); 3772 } 3773 3774 //Fix: Problem where hardescapes weren't handled if no ESCAPE_CHAR was given 3775 //You need to set one for HARDESCAPES only in this case. 3776 if(!isset($this->language_data['HARDCHAR'])) { 3777 $this->language_data['HARDCHAR'] = $this->language_data['ESCAPE_CHAR']; 3778 } 3779 3780 //NEW in 1.0.8: Allow styles to be loaded from a separate file to override defaults 3781 $style_filename = substr($file_name, 0, -4) . '.style.php'; 3782 if (is_readable($style_filename)) { 3783 //Clear any style_data that could have been set before ... 3784 if (isset($style_data)) { 3785 unset($style_data); 3786 } 3787 3788 //Read the Style Information from the style file 3789 include $style_filename; 3790 3791 //Apply the new styles to our current language styles 3792 if (isset($style_data) && is_array($style_data)) { 3793 $this->language_data['STYLES'] = 3794 $this->merge_arrays($this->language_data['STYLES'], $style_data); 3795 } 3796 } 3797 } 3798 3799 /** 3800 * Takes the parsed code and various options, and creates the HTML 3801 * surrounding it to make it look nice. 3802 * 3803 * @param string $parsed_code The code already parsed (reference!) 3804 * @since 1.0.0 3805 */ 3806 protected function finalise(&$parsed_code) { 3807 // Remove end parts of important declarations 3808 // This is BUGGY!! My fault for bad code: fix coming in 1.2 3809 // @todo Remove this crap 3810 if ($this->enable_important_blocks && 3811 (strpos($parsed_code, $this->hsc(GESHI_START_IMPORTANT)) === false)) { 3812 $parsed_code = str_replace($this->hsc(GESHI_END_IMPORTANT), '', $parsed_code); 3813 } 3814 3815 // Add HTML whitespace stuff if we're using the <div> header 3816 if ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) { 3817 $this->indent($parsed_code); 3818 } 3819 3820 // purge some unnecessary stuff 3821 /** NOTE: memorypeak #1 */ 3822 $parsed_code = preg_replace('#<span[^>]+>(\s*)</span>#', '\\1', $parsed_code); 3823 3824 // If we are using IDs for line numbers, there needs to be an overall 3825 // ID set to prevent collisions. 3826 if ($this->add_ids && !$this->overall_id) { 3827 $this->overall_id = 'geshi-' . substr(md5(microtime()), 0, 4); 3828 } 3829 3830 // Get code into lines 3831 /** NOTE: memorypeak #2 */ 3832 $code = explode("\n", $parsed_code); 3833 $parsed_code = $this->header(); 3834 3835 // If we're using line numbers, we insert <li>s and appropriate 3836 // markup to style them (otherwise we don't need to do anything) 3837 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS && $this->header_type != GESHI_HEADER_PRE_TABLE) { 3838 // If we're using the <pre> header, we shouldn't add newlines because 3839 // the <pre> will line-break them (and the <li>s already do this for us) 3840 $ls = ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) ? "\n" : ''; 3841 3842 // Foreach line... 3843 for ($i = 0, $n = count($code); $i < $n;) { 3844 //Reset the attributes for a new line ... 3845 $attrs = array(); 3846 3847 // Make lines have at least one space in them if they're empty 3848 // BenBE: Checking emptiness using trim instead of relying on blanks 3849 if ('' == trim($code[$i])) { 3850 $code[$i] = ' '; 3851 } 3852 3853 // If this is a "special line"... 3854 if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS && 3855 $i % $this->line_nth_row == ($this->line_nth_row - 1)) { 3856 // Set the attributes to style the line 3857 if ($this->use_classes) { 3858 //$attr = ' class="li2"'; 3859 $attrs['class'][] = 'li2'; 3860 $def_attr = ' class="de2"'; 3861 } else { 3862 //$attr = ' style="' . $this->line_style2 . '"'; 3863 $attrs['style'][] = $this->line_style2; 3864 // This style "covers up" the special styles set for special lines 3865 // so that styles applied to special lines don't apply to the actual 3866 // code on that line 3867 $def_attr = ' style="' . $this->code_style . '"'; 3868 } 3869 } else { 3870 if ($this->use_classes) { 3871 //$attr = ' class="li1"'; 3872 $attrs['class'][] = 'li1'; 3873 $def_attr = ' class="de1"'; 3874 } else { 3875 //$attr = ' style="' . $this->line_style1 . '"'; 3876 $attrs['style'][] = $this->line_style1; 3877 $def_attr = ' style="' . $this->code_style . '"'; 3878 } 3879 } 3880 3881 //Check which type of tag to insert for this line 3882 if ($this->header_type == GESHI_HEADER_PRE_VALID) { 3883 $start = "<pre$def_attr>"; 3884 $end = '</pre>'; 3885 } else { 3886 // Span or div? 3887 $start = "<div$def_attr>"; 3888 $end = '</div>'; 3889 } 3890 3891 ++$i; 3892 3893 // Are we supposed to use ids? If so, add them 3894 if ($this->add_ids) { 3895 $attrs['id'][] = "$this->overall_id-$i"; 3896 } 3897 3898 //Is this some line with extra styles??? 3899 if (in_array($i, $this->highlight_extra_lines)) { 3900 if ($this->use_classes) { 3901 if (isset($this->highlight_extra_lines_styles[$i])) { 3902 $attrs['class'][] = "lx$i"; 3903 } else { 3904 $attrs['class'][] = "ln-xtra"; 3905 } 3906 } else { 3907 array_push($attrs['style'], $this->get_line_style($i)); 3908 } 3909 } 3910 3911 // Add in the line surrounded by appropriate list HTML 3912 $attr_string = ''; 3913 foreach ($attrs as $key => $attr) { 3914 $attr_string .= ' ' . $key . '="' . implode(' ', $attr) . '"'; 3915 } 3916 3917 $parsed_code .= "<li$attr_string>$start{$code[$i-1]}$end</li>$ls"; 3918 unset($code[$i - 1]); 3919 } 3920 } else { 3921 $n = count($code); 3922 if ($this->use_classes) { 3923 $attributes = ' class="de1"'; 3924 } else { 3925 $attributes = ' style="'. $this->code_style .'"'; 3926 } 3927 if ($this->header_type == GESHI_HEADER_PRE_VALID) { 3928 $parsed_code .= '<pre'. $attributes .'>'; 3929 } elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) { 3930 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) { 3931 if ($this->use_classes) { 3932 $attrs = ' class="ln"'; 3933 } else { 3934 $attrs = ' style="'. $this->table_linenumber_style .'"'; 3935 } 3936 $parsed_code .= '<td'.$attrs.'><pre'.$attributes.'>'; 3937 // get linenumbers 3938 // we don't merge it with the for below, since it should be better for 3939 // memory consumption this way 3940 // @todo: but... actually it would still be somewhat nice to merge the two loops 3941 // the mem peaks are at different positions 3942 for ($i = 0; $i < $n; ++$i) { 3943 $close = 0; 3944 // fancy lines 3945 if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS && 3946 $i % $this->line_nth_row == ($this->line_nth_row - 1)) { 3947 // Set the attributes to style the line 3948 if ($this->use_classes) { 3949 $parsed_code .= '<span class="xtra li2"><span class="de2">'; 3950 } else { 3951 // This style "covers up" the special styles set for special lines 3952 // so that styles applied to special lines don't apply to the actual 3953 // code on that line 3954 $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">' 3955 .'<span style="' . $this->code_style .'">'; 3956 } 3957 $close += 2; 3958 } 3959 //Is this some line with extra styles??? 3960 if (in_array($i + 1, $this->highlight_extra_lines)) { 3961 if ($this->use_classes) { 3962 if (isset($this->highlight_extra_lines_styles[$i])) { 3963 $parsed_code .= "<span class=\"xtra lx$i\">"; 3964 } else { 3965 $parsed_code .= "<span class=\"xtra ln-xtra\">"; 3966 } 3967 } else { 3968 $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">"; 3969 } 3970 ++$close; 3971 } 3972 $parsed_code .= $this->line_numbers_start + $i; 3973 if ($close) { 3974 $parsed_code .= str_repeat('</span>', $close); 3975 } elseif ($i != $n) { 3976 $parsed_code .= "\n"; 3977 } 3978 } 3979 $parsed_code .= '</pre></td><td'.$attributes.'>'; 3980 } 3981 $parsed_code .= '<pre'. $attributes .'>'; 3982 } 3983 // No line numbers, but still need to handle highlighting lines extra. 3984 // Have to use divs so the full width of the code is highlighted 3985 $close = 0; 3986 for ($i = 0; $i < $n; ++$i) { 3987 // Make lines have at least one space in them if they're empty 3988 // BenBE: Checking emptiness using trim instead of relying on blanks 3989 if ('' == trim($code[$i])) { 3990 $code[$i] = ' '; 3991 } 3992 // fancy lines 3993 if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS && 3994 $i % $this->line_nth_row == ($this->line_nth_row - 1)) { 3995 // Set the attributes to style the line 3996 if ($this->use_classes) { 3997 $parsed_code .= '<span class="xtra li2"><span class="de2">'; 3998 } else { 3999 // This style "covers up" the special styles set for special lines 4000 // so that styles applied to special lines don't apply to the actual 4001 // code on that line 4002 $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">' 4003 .'<span style="' . $this->code_style .'">'; 4004 } 4005 $close += 2; 4006 } 4007 //Is this some line with extra styles??? 4008 if (in_array($i + 1, $this->highlight_extra_lines)) { 4009 if ($this->use_classes) { 4010 if (isset($this->highlight_extra_lines_styles[$i])) { 4011 $parsed_code .= "<span class=\"xtra lx$i\">"; 4012 } else { 4013 $parsed_code .= "<span class=\"xtra ln-xtra\">"; 4014 } 4015 } else { 4016 $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">"; 4017 } 4018 ++$close; 4019 } 4020 4021 $parsed_code .= $code[$i]; 4022 4023 if ($close) { 4024 $parsed_code .= str_repeat('</span>', $close); 4025 $close = 0; 4026 } 4027 elseif ($i + 1 < $n) { 4028 $parsed_code .= "\n"; 4029 } 4030 unset($code[$i]); 4031 } 4032 4033 if ($this->header_type == GESHI_HEADER_PRE_VALID || $this->header_type == GESHI_HEADER_PRE_TABLE) { 4034 $parsed_code .= '</pre>'; 4035 } 4036 if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) { 4037 $parsed_code .= '</td>'; 4038 } 4039 } 4040 4041 $parsed_code .= $this->footer(); 4042 } 4043 4044 /** 4045 * Creates the header for the code block (with correct attributes) 4046 * 4047 * @return string The header for the code block 4048 * @since 1.0.0 4049 */ 4050 protected function header() { 4051 // Get attributes needed 4052 /** 4053 * @todo Document behaviour change - class is outputted regardless of whether 4054 * we're using classes or not. Same with style 4055 */ 4056 $attributes = ' class="' . $this->_genCSSName($this->language); 4057 if ($this->overall_class != '') { 4058 $attributes .= " ".$this->_genCSSName($this->overall_class); 4059 } 4060 $attributes .= '"'; 4061 4062 if ($this->overall_id != '') { 4063 $attributes .= " id=\"{$this->overall_id}\""; 4064 } 4065 if ($this->overall_style != '' && !$this->use_classes) { 4066 $attributes .= ' style="' . $this->overall_style . '"'; 4067 } 4068 4069 $ol_attributes = ''; 4070 4071 if ($this->line_numbers_start != 1) { 4072 $ol_attributes .= ' start="' . $this->line_numbers_start . '"'; 4073 } 4074 4075 // Get the header HTML 4076 $header = $this->header_content; 4077 if ($header) { 4078 if ($this->header_type == GESHI_HEADER_PRE || $this->header_type == GESHI_HEADER_PRE_VALID) { 4079 $header = str_replace("\n", '', $header); 4080 } 4081 $header = $this->replace_keywords($header); 4082 4083 if ($this->use_classes) { 4084 $attr = ' class="head"'; 4085 } else { 4086 $attr = " style=\"{$this->header_content_style}\""; 4087 } 4088 if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) { 4089 $header = "<thead><tr><td colspan=\"2\" $attr>$header</td></tr></thead>"; 4090 } else { 4091 $header = "<div$attr>$header</div>"; 4092 } 4093 } 4094 4095 if (GESHI_HEADER_NONE == $this->header_type) { 4096 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) { 4097 return "$header<ol$attributes$ol_attributes>"; 4098 } 4099 return $header . ($this->force_code_block ? '<div>' : ''); 4100 } 4101 4102 // Work out what to return and do it 4103 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) { 4104 if ($this->header_type == GESHI_HEADER_PRE) { 4105 return "<pre$attributes>$header<ol$ol_attributes>"; 4106 } elseif ($this->header_type == GESHI_HEADER_DIV || 4107 $this->header_type == GESHI_HEADER_PRE_VALID) { 4108 return "<div$attributes>$header<ol$ol_attributes>"; 4109 } elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) { 4110 return "<table$attributes>$header<tbody><tr class=\"li1\">"; 4111 } 4112 } else { 4113 if ($this->header_type == GESHI_HEADER_PRE) { 4114 return "<pre$attributes>$header" . 4115 ($this->force_code_block ? '<div>' : ''); 4116 } else { 4117 return "<div$attributes>$header" . 4118 ($this->force_code_block ? '<div>' : ''); 4119 } 4120 } 4121 } 4122 4123 /** 4124 * Returns the footer for the code block. 4125 * 4126 * @return string The footer for the code block 4127 * @since 1.0.0 4128 */ 4129 protected function footer() { 4130 $footer = $this->footer_content; 4131 if ($footer) { 4132 if ($this->header_type == GESHI_HEADER_PRE) { 4133 $footer = str_replace("\n", '', $footer);; 4134 } 4135 $footer = $this->replace_keywords($footer); 4136 4137 if ($this->use_classes) { 4138 $attr = ' class="foot"'; 4139 } else { 4140 $attr = " style=\"{$this->footer_content_style}\""; 4141 } 4142 if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) { 4143 $footer = "<tfoot><tr><td colspan=\"2\">$footer</td></tr></tfoot>"; 4144 } else { 4145 $footer = "<div$attr>$footer</div>"; 4146 } 4147 } 4148 4149 if (GESHI_HEADER_NONE == $this->header_type) { 4150 return ($this->line_numbers != GESHI_NO_LINE_NUMBERS) ? '</ol>' . $footer : $footer; 4151 } 4152 4153 if ($this->header_type == GESHI_HEADER_DIV || $this->header_type == GESHI_HEADER_PRE_VALID) { 4154 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) { 4155 return "</ol>$footer</div>"; 4156 } 4157 return ($this->force_code_block ? '</div>' : '') . 4158 "$footer</div>"; 4159 } 4160 elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) { 4161 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) { 4162 return "</tr></tbody>$footer</table>"; 4163 } 4164 return ($this->force_code_block ? '</div>' : '') . 4165 "$footer</div>"; 4166 } 4167 else { 4168 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) { 4169 return "</ol>$footer</pre>"; 4170 } 4171 return ($this->force_code_block ? '</div>' : '') . 4172 "$footer</pre>"; 4173 } 4174 } 4175 4176 /** 4177 * Replaces certain keywords in the header and footer with 4178 * certain configuration values 4179 * 4180 * @param string $instr The header or footer content to do replacement on 4181 * @return string The header or footer with replaced keywords 4182 * @since 1.0.2 4183 */ 4184 protected function replace_keywords($instr) { 4185 $keywords = $replacements = array(); 4186 4187 $keywords[] = '<TIME>'; 4188 $keywords[] = '{TIME}'; 4189 $replacements[] = $replacements[] = number_format($time = $this->get_time(), 3); 4190 4191 $keywords[] = '<LANGUAGE>'; 4192 $keywords[] = '{LANGUAGE}'; 4193 $replacements[] = $replacements[] = $this->language_data['LANG_NAME']; 4194 4195 $keywords[] = '<VERSION>'; 4196 $keywords[] = '{VERSION}'; 4197 $replacements[] = $replacements[] = GESHI_VERSION; 4198 4199 $keywords[] = '<SPEED>'; 4200 $keywords[] = '{SPEED}'; 4201 if ($time <= 0) { 4202 $speed = 'N/A'; 4203 } else { 4204 $speed = strlen($this->source) / $time; 4205 if ($speed >= 1024) { 4206 $speed = sprintf("%.2f KB/s", $speed / 1024.0); 4207 } else { 4208 $speed = sprintf("%.0f B/s", $speed); 4209 } 4210 } 4211 $replacements[] = $replacements[] = $speed; 4212 4213 return str_replace($keywords, $replacements, $instr); 4214 } 4215 4216 /** 4217 * Secure replacement for PHP built-in function htmlspecialchars(). 4218 * 4219 * See ticket #427 (http://wush.net/trac/wikka/ticket/427) for the rationale 4220 * for this replacement function. 4221 * 4222 * The INTERFACE for this function is almost the same as that for 4223 * htmlspecialchars(), with the same default for quote style; however, there 4224 * is no 'charset' parameter. The reason for this is as follows: 4225 * 4226 * The PHP docs say: 4227 * "The third argument charset defines character set used in conversion." 4228 * 4229 * I suspect PHP's htmlspecialchars() is working at the byte-value level and 4230 * thus _needs_ to know (or asssume) a character set because the special 4231 * characters to be replaced could exist at different code points in 4232 * different character sets. (If indeed htmlspecialchars() works at 4233 * byte-value level that goes some way towards explaining why the 4234 * vulnerability would exist in this function, too, and not only in 4235 * htmlentities() which certainly is working at byte-value level.) 4236 * 4237 * This replacement function however works at character level and should 4238 * therefore be "immune" to character set differences - so no charset 4239 * parameter is needed or provided. If a third parameter is passed, it will 4240 * be silently ignored. 4241 * 4242 * In the OUTPUT there is a minor difference in that we use ''' instead 4243 * of PHP's ''' for a single quote: this provides compatibility with 4244 * get_html_translation_table(HTML_SPECIALCHARS, ENT_QUOTES) 4245 * (see comment by mikiwoz at yahoo dot co dot uk on 4246 * http://php.net/htmlspecialchars); it also matches the entity definition 4247 * for XML 1.0 4248 * (http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Special_characters). 4249 * Like PHP we use a numeric character reference instead of ''' for the 4250 * single quote. For the other special characters we use the named entity 4251 * references, as PHP is doing. 4252 * 4253 * @author {@link http://wikkawiki.org/JavaWoman Marjolein Katsma} 4254 * 4255 * @license http://www.gnu.org/copyleft/lgpl.html 4256 * GNU Lesser General Public License 4257 * @copyright Copyright 2007, {@link http://wikkawiki.org/CreditsPage 4258 * Wikka Development Team} 4259 * 4260 * @param string $string string to be converted 4261 * @param integer $quote_style 4262 * - ENT_COMPAT: escapes &, <, > and double quote (default) 4263 * - ENT_NOQUOTES: escapes only &, < and > 4264 * - ENT_QUOTES: escapes &, <, >, double and single quotes 4265 * @return string converted string 4266 * @since 1.0.7.18 4267 */ 4268 protected function hsc($string, $quote_style = ENT_COMPAT) { 4269 // init 4270 static $aTransSpecchar = array( 4271 '&' => '&', 4272 '"' => '"', 4273 '<' => '<', 4274 '>' => '>', 4275 4276 //This fix is related to SF#1923020, but has to be applied 4277 //regardless of actually highlighting symbols. 4278 4279 //Circumvent a bug with symbol highlighting 4280 //This is required as ; would produce undesirable side-effects if it 4281 //was not to be processed as an entity. 4282 ';' => '<SEMI>', // Force ; to be processed as entity 4283 '|' => '<PIPE>' // Force | to be processed as entity 4284 ); // ENT_COMPAT set 4285 4286 switch ($quote_style) { 4287 case ENT_NOQUOTES: // don't convert double quotes 4288 unset($aTransSpecchar['"']); 4289 break; 4290 case ENT_QUOTES: // convert single quotes as well 4291 $aTransSpecchar["'"] = '''; // (apos) htmlspecialchars() uses ''' 4292 break; 4293 } 4294 4295 // return translated string 4296 return strtr($string, $aTransSpecchar); 4297 } 4298 4299 /** 4300 * Generate a CSS class name from a given string. 4301 * Prevents invalid CSS classes. 4302 * 4303 * @param string $name Proposed class name 4304 * 4305 * @return string Safe CSS class name 4306 */ 4307 protected function _genCSSName($name) { 4308 return (is_numeric($name[0]) ? '_' : '') . $name; 4309 } 4310 4311 /** 4312 * Returns a stylesheet for the highlighted code. If $economy mode 4313 * is true, we only return the stylesheet declarations that matter for 4314 * this code block instead of the whole thing 4315 * 4316 * @param boolean $economy_mode Whether to use economy mode or not 4317 * @return string A stylesheet built on the data for the current language 4318 * @since 1.0.0 4319 */ 4320 public function get_stylesheet($economy_mode = true) { 4321 // If there's an error, chances are that the language file 4322 // won't have populated the language data file, so we can't 4323 // risk getting a stylesheet... 4324 if ($this->error) { 4325 return ''; 4326 } 4327 4328 //Check if the style rearrangements have been processed ... 4329 //This also does some preprocessing to check which style groups are useable ... 4330 if(!isset($this->language_data['NUMBERS_CACHE'])) { 4331 $this->build_style_cache(); 4332 } 4333 4334 // First, work out what the selector should be. If there's an ID, 4335 // that should be used, the same for a class. Otherwise, a selector 4336 // of '' means that these styles will be applied anywhere 4337 if ($this->overall_id) { 4338 $selector = '#' . $this->_genCSSName($this->overall_id); 4339 } else { 4340 $selector = '.' . $this->_genCSSName($this->language); 4341 if ($this->overall_class) { 4342 $selector .= '.' . $this->_genCSSName($this->overall_class); 4343 } 4344 } 4345 $selector .= ' '; 4346 4347 // Header of the stylesheet 4348 if (!$economy_mode) { 4349 $stylesheet = "/**\n". 4350 " * GeSHi Dynamically Generated Stylesheet\n". 4351 " * --------------------------------------\n". 4352 " * Dynamically generated stylesheet for {$this->language}\n". 4353 " * CSS class: {$this->overall_class}, CSS id: {$this->overall_id}\n". 4354 " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2014 Benny Baumann\n" . 4355 " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n". 4356 " * --------------------------------------\n". 4357 " */\n"; 4358 } else { 4359 $stylesheet = "/**\n". 4360 " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2014 Benny Baumann\n" . 4361 " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n". 4362 " */\n"; 4363 } 4364 4365 // Set the <ol> to have no effect at all if there are line numbers 4366 // (<ol>s have margins that should be destroyed so all layout is 4367 // controlled by the set_overall_style method, which works on the 4368 // <pre> or <div> container). Additionally, set default styles for lines 4369 if (!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) { 4370 //$stylesheet .= "$selector, {$selector}ol, {$selector}ol li {margin: 0;}\n"; 4371 $stylesheet .= "$selector.de1, $selector.de2 {{$this->code_style}}\n"; 4372 } 4373 4374 // Add overall styles 4375 // note: neglect economy_mode, empty styles are meaningless 4376 if ($this->overall_style != '') { 4377 $stylesheet .= "$selector {{$this->overall_style}}\n"; 4378 } 4379 4380 // Add styles for links 4381 // note: economy mode does not make _any_ sense here 4382 // either the style is empty and thus no selector is needed 4383 // or the appropriate key is given. 4384 foreach ($this->link_styles as $key => $style) { 4385 if ($style != '') { 4386 switch ($key) { 4387 case GESHI_LINK: 4388 $stylesheet .= "{$selector}a:link {{$style}}\n"; 4389 break; 4390 case GESHI_HOVER: 4391 $stylesheet .= "{$selector}a:hover {{$style}}\n"; 4392 break; 4393 case GESHI_ACTIVE: 4394 $stylesheet .= "{$selector}a:active {{$style}}\n"; 4395 break; 4396 case GESHI_VISITED: 4397 $stylesheet .= "{$selector}a:visited {{$style}}\n"; 4398 break; 4399 } 4400 } 4401 } 4402 4403 // Header and footer 4404 // note: neglect economy_mode, empty styles are meaningless 4405 if ($this->header_content_style != '') { 4406 $stylesheet .= "$selector.head {{$this->header_content_style}}\n"; 4407 } 4408 if ($this->footer_content_style != '') { 4409 $stylesheet .= "$selector.foot {{$this->footer_content_style}}\n"; 4410 } 4411 4412 // Styles for important stuff 4413 // note: neglect economy_mode, empty styles are meaningless 4414 if ($this->important_styles != '') { 4415 $stylesheet .= "$selector.imp {{$this->important_styles}}\n"; 4416 } 4417 4418 // Simple line number styles 4419 if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->line_style1 != '') { 4420 $stylesheet .= "{$selector}li, {$selector}.li1 {{$this->line_style1}}\n"; 4421 } 4422 if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->table_linenumber_style != '') { 4423 $stylesheet .= "{$selector}.ln {{$this->table_linenumber_style}}\n"; 4424 } 4425 // If there is a style set for fancy line numbers, echo it out 4426 if ((!$economy_mode || $this->line_numbers == GESHI_FANCY_LINE_NUMBERS) && $this->line_style2 != '') { 4427 $stylesheet .= "{$selector}.li2 {{$this->line_style2}}\n"; 4428 } 4429 4430 // note: empty styles are meaningless 4431 foreach ($this->language_data['STYLES']['KEYWORDS'] as $group => $styles) { 4432 if ($styles != '' && (!$economy_mode || 4433 (isset($this->lexic_permissions['KEYWORDS'][$group]) && 4434 $this->lexic_permissions['KEYWORDS'][$group]))) { 4435 $stylesheet .= "$selector.kw$group {{$styles}}\n"; 4436 } 4437 } 4438 foreach ($this->language_data['STYLES']['COMMENTS'] as $group => $styles) { 4439 if ($styles != '' && (!$economy_mode || 4440 (isset($this->lexic_permissions['COMMENTS'][$group]) && 4441 $this->lexic_permissions['COMMENTS'][$group]) || 4442 (!empty($this->language_data['COMMENT_REGEXP']) && 4443 !empty($this->language_data['COMMENT_REGEXP'][$group])))) { 4444 $stylesheet .= "$selector.co$group {{$styles}}\n"; 4445 } 4446 } 4447 foreach ($this->language_data['STYLES']['ESCAPE_CHAR'] as $group => $styles) { 4448 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['ESCAPE_CHAR'])) { 4449 // NEW: since 1.0.8 we have to handle hardescapes 4450 if ($group === 'HARD') { 4451 $group = '_h'; 4452 } 4453 $stylesheet .= "$selector.es$group {{$styles}}\n"; 4454 } 4455 } 4456 foreach ($this->language_data['STYLES']['BRACKETS'] as $group => $styles) { 4457 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['BRACKETS'])) { 4458 $stylesheet .= "$selector.br$group {{$styles}}\n"; 4459 } 4460 } 4461 foreach ($this->language_data['STYLES']['SYMBOLS'] as $group => $styles) { 4462 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['SYMBOLS'])) { 4463 $stylesheet .= "$selector.sy$group {{$styles}}\n"; 4464 } 4465 } 4466 foreach ($this->language_data['STYLES']['STRINGS'] as $group => $styles) { 4467 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['STRINGS'])) { 4468 // NEW: since 1.0.8 we have to handle hardquotes 4469 if ($group === 'HARD') { 4470 $group = '_h'; 4471 } 4472 $stylesheet .= "$selector.st$group {{$styles}}\n"; 4473 } 4474 } 4475 foreach ($this->language_data['STYLES']['NUMBERS'] as $group => $styles) { 4476 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['NUMBERS'])) { 4477 $stylesheet .= "$selector.nu$group {{$styles}}\n"; 4478 } 4479 } 4480 foreach ($this->language_data['STYLES']['METHODS'] as $group => $styles) { 4481 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['METHODS'])) { 4482 $stylesheet .= "$selector.me$group {{$styles}}\n"; 4483 } 4484 } 4485 // note: neglect economy_mode, empty styles are meaningless 4486 foreach ($this->language_data['STYLES']['SCRIPT'] as $group => $styles) { 4487 if ($styles != '') { 4488 $stylesheet .= "$selector.sc$group {{$styles}}\n"; 4489 } 4490 } 4491 foreach ($this->language_data['STYLES']['REGEXPS'] as $group => $styles) { 4492 if ($styles != '' && (!$economy_mode || 4493 (isset($this->lexic_permissions['REGEXPS'][$group]) && 4494 $this->lexic_permissions['REGEXPS'][$group]))) { 4495 if (is_array($this->language_data['REGEXPS'][$group]) && 4496 array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$group])) { 4497 $stylesheet .= "$selector."; 4498 $stylesheet .= $this->language_data['REGEXPS'][$group][GESHI_CLASS]; 4499 $stylesheet .= " {{$styles}}\n"; 4500 } else { 4501 $stylesheet .= "$selector.re$group {{$styles}}\n"; 4502 } 4503 } 4504 } 4505 // Styles for lines being highlighted extra 4506 if (!$economy_mode || (count($this->highlight_extra_lines)!=count($this->highlight_extra_lines_styles))) { 4507 $stylesheet .= "{$selector}.ln-xtra, {$selector}li.ln-xtra, {$selector}div.ln-xtra {{$this->highlight_extra_lines_style}}\n"; 4508 } 4509 $stylesheet .= "{$selector}span.xtra { display:block; }\n"; 4510 foreach ($this->highlight_extra_lines_styles as $lineid => $linestyle) { 4511 $stylesheet .= "{$selector}.lx$lineid, {$selector}li.lx$lineid, {$selector}div.lx$lineid {{$linestyle}}\n"; 4512 } 4513 4514 return $stylesheet; 4515 } 4516 4517 /** 4518 * Get's the style that is used for the specified line 4519 * 4520 * @param int $line The line number information is requested for 4521 * @since 1.0.7.21 4522 */ 4523 protected function get_line_style($line) { 4524 $style = null; 4525 if (isset($this->highlight_extra_lines_styles[$line])) { 4526 $style = $this->highlight_extra_lines_styles[$line]; 4527 } else { // if no "extra" style assigned 4528 $style = $this->highlight_extra_lines_style; 4529 } 4530 4531 return $style; 4532 } 4533 4534 /** 4535 * this functions creates an optimized regular expression list 4536 * of an array of strings. 4537 * 4538 * Example: 4539 * <code>$list = array('faa', 'foo', 'foobar'); 4540 * => string 'f(aa|oo(bar)?)'</code> 4541 * 4542 * @param array $list array of (unquoted) strings 4543 * @param string $regexp_delimiter your regular expression delimiter, @see preg_quote() 4544 * @return string for regular expression 4545 * @author Milian Wolff <mail@milianw.de> 4546 * @since 1.0.8 4547 */ 4548 protected function optimize_regexp_list($list, $regexp_delimiter = '/') { 4549 $regex_chars = array('.', '\\', '+', '-', '*', '?', '[', '^', ']', '$', 4550 '(', ')', '{', '}', '=', '!', '<', '>', '|', ':', $regexp_delimiter); 4551 sort($list); 4552 $regexp_list = array(''); 4553 $num_subpatterns = 0; 4554 $list_key = 0; 4555 4556 // the tokens which we will use to generate the regexp list 4557 $tokens = array(); 4558 $prev_keys = array(); 4559 // go through all entries of the list and generate the token list 4560 $cur_len = 0; 4561 for ($i = 0, $i_max = count($list); $i < $i_max; ++$i) { 4562 if ($cur_len > GESHI_MAX_PCRE_LENGTH) { 4563 // seems like the length of this pcre is growing exorbitantly 4564 $regexp_list[++$list_key] = $this->_optimize_regexp_list_tokens_to_string($tokens); 4565 $num_subpatterns = substr_count($regexp_list[$list_key], '(?:'); 4566 $tokens = array(); 4567 $cur_len = 0; 4568 } 4569 $level = 0; 4570 $entry = preg_quote((string) $list[$i], $regexp_delimiter); 4571 $pointer = &$tokens; 4572 // properly assign the new entry to the correct position in the token array 4573 // possibly generate smaller common denominator keys 4574 while (true) { 4575 // get the common denominator 4576 if (isset($prev_keys[$level])) { 4577 if ($prev_keys[$level] == $entry) { 4578 // this is a duplicate entry, skip it 4579 continue 2; 4580 } 4581 $char = 0; 4582 while (isset($entry[$char]) && isset($prev_keys[$level][$char]) 4583 && $entry[$char] == $prev_keys[$level][$char]) { 4584 ++$char; 4585 } 4586 if ($char > 0) { 4587 // this entry has at least some chars in common with the current key 4588 if ($char == strlen($prev_keys[$level])) { 4589 // current key is totally matched, i.e. this entry has just some bits appended 4590 $pointer = &$pointer[$prev_keys[$level]]; 4591 } else { 4592 // only part of the keys match 4593 $new_key_part1 = substr($prev_keys[$level], 0, $char); 4594 $new_key_part2 = substr($prev_keys[$level], $char); 4595 4596 if (in_array($new_key_part1[0], $regex_chars) 4597 || in_array($new_key_part2[0], $regex_chars)) { 4598 // this is bad, a regex char as first character 4599 $pointer[$entry] = array('' => true); 4600 array_splice($prev_keys, $level, count($prev_keys), $entry); 4601 $cur_len += strlen($entry); 4602 continue; 4603 } else { 4604 // relocate previous tokens 4605 $pointer[$new_key_part1] = array($new_key_part2 => $pointer[$prev_keys[$level]]); 4606 unset($pointer[$prev_keys[$level]]); 4607 $pointer = &$pointer[$new_key_part1]; 4608 // recreate key index 4609 array_splice($prev_keys, $level, count($prev_keys), array($new_key_part1, $new_key_part2)); 4610 $cur_len += strlen($new_key_part2); 4611 } 4612 } 4613 ++$level; 4614 $entry = substr($entry, $char); 4615 continue; 4616 } 4617 // else: fall trough, i.e. no common denominator was found 4618 } 4619 if ($level == 0 && !empty($tokens)) { 4620 // we can dump current tokens into the string and throw them away afterwards 4621 $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens); 4622 $new_subpatterns = substr_count($new_entry, '(?:'); 4623 if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + $new_subpatterns > GESHI_MAX_PCRE_SUBPATTERNS) { 4624 $regexp_list[++$list_key] = $new_entry; 4625 $num_subpatterns = $new_subpatterns; 4626 } else { 4627 if (!empty($regexp_list[$list_key])) { 4628 $new_entry = '|' . $new_entry; 4629 } 4630 $regexp_list[$list_key] .= $new_entry; 4631 $num_subpatterns += $new_subpatterns; 4632 } 4633 $tokens = array(); 4634 $cur_len = 0; 4635 } 4636 // no further common denominator found 4637 $pointer[$entry] = array('' => true); 4638 array_splice($prev_keys, $level, count($prev_keys), $entry); 4639 4640 $cur_len += strlen($entry); 4641 break; 4642 } 4643 unset($list[$i]); 4644 } 4645 // make sure the last tokens get converted as well 4646 $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens); 4647 if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + substr_count($new_entry, '(?:') > GESHI_MAX_PCRE_SUBPATTERNS) { 4648 if ( !empty($regexp_list[$list_key]) ) { 4649 ++$list_key; 4650 } 4651 $regexp_list[$list_key] = $new_entry; 4652 } else { 4653 if (!empty($regexp_list[$list_key])) { 4654 $new_entry = '|' . $new_entry; 4655 } 4656 $regexp_list[$list_key] .= $new_entry; 4657 } 4658 return $regexp_list; 4659 } 4660 4661 /** 4662 * this function creates the appropriate regexp string of an token array 4663 * you should not call this function directly, @see $this->optimize_regexp_list(). 4664 * 4665 * @param array $tokens array of tokens 4666 * @param bool $recursed to know wether we recursed or not 4667 * @return string 4668 * @author Milian Wolff <mail@milianw.de> 4669 * @since 1.0.8 4670 */ 4671 protected function _optimize_regexp_list_tokens_to_string(&$tokens, $recursed = false) { 4672 $list = ''; 4673 foreach ($tokens as $token => $sub_tokens) { 4674 $list .= $token; 4675 $close_entry = isset($sub_tokens['']); 4676 unset($sub_tokens['']); 4677 if (!empty($sub_tokens)) { 4678 $list .= '(?:' . $this->_optimize_regexp_list_tokens_to_string($sub_tokens, true) . ')'; 4679 if ($close_entry) { 4680 // make sub_tokens optional 4681 $list .= '?'; 4682 } 4683 } 4684 $list .= '|'; 4685 } 4686 if (!$recursed) { 4687 // do some optimizations 4688 // common trailing strings 4689 // BUGGY! 4690 //$list = preg_replace_callback('#(?<=^|\:|\|)\w+?(\w+)(?:\|.+\1)+(?=\|)#', create_function( 4691 // '$matches', 'return "(?:" . preg_replace("#" . preg_quote($matches[1], "#") . "(?=\||$)#", "", $matches[0]) . ")" . $matches[1];'), $list); 4692 // (?:p)? => p? 4693 $list = preg_replace('#\(\?\:(.)\)\?#', '\1?', $list); 4694 // (?:a|b|c|d|...)? => [abcd...]? 4695 // TODO: a|bb|c => [ac]|bb 4696 static $callback_2; 4697 if (!isset($callback_2)) { 4698 $callback_2 = function($matches) { return "[" . str_replace("|", "", $matches[1]) . "]"; }; 4699 } 4700 $list = preg_replace_callback('#\(\?\:((?:.\|)+.)\)#', $callback_2, $list); 4701 } 4702 // return $list without trailing pipe 4703 return substr($list, 0, -1); 4704 } 4705} // End Class GeSHi 4706 4707 4708if (!function_exists('geshi_highlight')) { 4709 /** 4710 * Easy way to highlight stuff. Behaves just like highlight_string 4711 * 4712 * @param string $string The code to highlight 4713 * @param string $language The language to highlight the code in 4714 * @param string $path The path to the language files. You can leave this blank if you need 4715 * as from version 1.0.7 the path should be automatically detected 4716 * @param boolean $return Whether to return the result or to echo 4717 * @return string The code highlighted (if $return is true) 4718 * @since 1.0.2 4719 */ 4720 function geshi_highlight($string, $language, $path = null, $return = false) { 4721 $geshi = new GeSHi($string, $language, $path); 4722 $geshi->set_header_type(GESHI_HEADER_NONE); 4723 4724 if ($return) { 4725 return '<code>' . $geshi->parse_code() . '</code>'; 4726 } 4727 4728 echo '<code>' . $geshi->parse_code() . '</code>'; 4729 4730 if ($geshi->error()) { 4731 return false; 4732 } 4733 return true; 4734 } 4735} 4736