1<?php 2 3# 4# 5# Parsedown 6# http://parsedown.org 7# 8# (c) Emanuil Rusev 9# http://erusev.com 10# 11# For the full license information, view the LICENSE file that was distributed 12# with this source code. 13# 14# 15 16class Parsedown 17{ 18 # ~ 19 20 const version = '1.7.3'; 21 22 # ~ 23 24 function text($text) 25 { 26 # make sure no definitions are set 27 $this->DefinitionData = array(); 28 29 # standardize line breaks 30 $text = str_replace(array("\r\n", "\r"), "\n", $text); 31 32 # remove surrounding line breaks 33 $text = trim($text, "\n"); 34 35 # split text into lines 36 $lines = explode("\n", $text); 37 38 # iterate through lines to identify blocks 39 $markup = $this->lines($lines); 40 41 # trim line breaks 42 $markup = trim($markup, "\n"); 43 44 return $markup; 45 } 46 47 # 48 # Setters 49 # 50 51 function setBreaksEnabled($breaksEnabled) 52 { 53 $this->breaksEnabled = $breaksEnabled; 54 55 return $this; 56 } 57 58 protected $breaksEnabled; 59 60 function setMarkupEscaped($markupEscaped) 61 { 62 $this->markupEscaped = $markupEscaped; 63 64 return $this; 65 } 66 67 protected $markupEscaped; 68 69 function setUrlsLinked($urlsLinked) 70 { 71 $this->urlsLinked = $urlsLinked; 72 73 return $this; 74 } 75 76 protected $urlsLinked = true; 77 78 function setSafeMode($safeMode) 79 { 80 $this->safeMode = (bool) $safeMode; 81 82 return $this; 83 } 84 85 protected $safeMode; 86 87 protected $safeLinksWhitelist = array( 88 'http://', 89 'https://', 90 'ftp://', 91 'ftps://', 92 'mailto:', 93 'data:image/png;base64,', 94 'data:image/gif;base64,', 95 'data:image/jpeg;base64,', 96 'irc:', 97 'ircs:', 98 'git:', 99 'ssh:', 100 'news:', 101 'steam:', 102 ); 103 104 # 105 # Lines 106 # 107 108 protected $BlockTypes = array( 109 '#' => array('Header'), 110 '*' => array('Rule', 'List'), 111 '+' => array('List'), 112 '-' => array('SetextHeader', 'Table', 'Rule', 'List'), 113 '0' => array('List'), 114 '1' => array('List'), 115 '2' => array('List'), 116 '3' => array('List'), 117 '4' => array('List'), 118 '5' => array('List'), 119 '6' => array('List'), 120 '7' => array('List'), 121 '8' => array('List'), 122 '9' => array('List'), 123 ':' => array('Table'), 124 '<' => array('Comment', 'Markup'), 125 '=' => array('SetextHeader'), 126 '>' => array('Quote'), 127 '[' => array('Reference'), 128 '_' => array('Rule'), 129 '`' => array('FencedCode'), 130 '|' => array('Table'), 131 '~' => array('FencedCode'), 132 ); 133 134 # ~ 135 136 protected $unmarkedBlockTypes = array( 137 'Code', 138 ); 139 140 # 141 # Blocks 142 # 143 144 protected function lines(array $lines) 145 { 146 $CurrentBlock = null; 147 148 foreach ($lines as $line) 149 { 150 if (chop($line) === '') 151 { 152 if (isset($CurrentBlock)) 153 { 154 $CurrentBlock['interrupted'] = true; 155 } 156 157 continue; 158 } 159 160 if (strpos($line, "\t") !== false) 161 { 162 $parts = explode("\t", $line); 163 164 $line = $parts[0]; 165 166 unset($parts[0]); 167 168 foreach ($parts as $part) 169 { 170 $shortage = 4 - mb_strlen($line, 'utf-8') % 4; 171 172 $line .= str_repeat(' ', $shortage); 173 $line .= $part; 174 } 175 } 176 177 $indent = 0; 178 179 while (isset($line[$indent]) and $line[$indent] === ' ') 180 { 181 $indent ++; 182 } 183 184 $text = $indent > 0 ? substr($line, $indent) : $line; 185 186 # ~ 187 188 $Line = array('body' => $line, 'indent' => $indent, 'text' => $text); 189 190 # ~ 191 192 if (isset($CurrentBlock['continuable'])) 193 { 194 $Block = $this->{'block'.$CurrentBlock['type'].'Continue'}($Line, $CurrentBlock); 195 196 if (isset($Block)) 197 { 198 $CurrentBlock = $Block; 199 200 continue; 201 } 202 else 203 { 204 if ($this->isBlockCompletable($CurrentBlock['type'])) 205 { 206 $CurrentBlock = $this->{'block'.$CurrentBlock['type'].'Complete'}($CurrentBlock); 207 } 208 } 209 } 210 211 # ~ 212 213 $marker = $text[0]; 214 215 # ~ 216 217 $blockTypes = $this->unmarkedBlockTypes; 218 219 if (isset($this->BlockTypes[$marker])) 220 { 221 foreach ($this->BlockTypes[$marker] as $blockType) 222 { 223 $blockTypes []= $blockType; 224 } 225 } 226 227 # 228 # ~ 229 230 foreach ($blockTypes as $blockType) 231 { 232 $Block = $this->{'block'.$blockType}($Line, $CurrentBlock); 233 234 if (isset($Block)) 235 { 236 $Block['type'] = $blockType; 237 238 if ( ! isset($Block['identified'])) 239 { 240 $Blocks []= $CurrentBlock; 241 242 $Block['identified'] = true; 243 } 244 245 if ($this->isBlockContinuable($blockType)) 246 { 247 $Block['continuable'] = true; 248 } 249 250 $CurrentBlock = $Block; 251 252 continue 2; 253 } 254 } 255 256 # ~ 257 258 if (isset($CurrentBlock) and ! isset($CurrentBlock['type']) and ! isset($CurrentBlock['interrupted'])) 259 { 260 $CurrentBlock['element']['text'] .= "\n".$text; 261 } 262 else 263 { 264 $Blocks []= $CurrentBlock; 265 266 $CurrentBlock = $this->paragraph($Line); 267 268 $CurrentBlock['identified'] = true; 269 } 270 } 271 272 # ~ 273 274 if (isset($CurrentBlock['continuable']) and $this->isBlockCompletable($CurrentBlock['type'])) 275 { 276 $CurrentBlock = $this->{'block'.$CurrentBlock['type'].'Complete'}($CurrentBlock); 277 } 278 279 # ~ 280 281 $Blocks []= $CurrentBlock; 282 283 unset($Blocks[0]); 284 285 # ~ 286 287 $markup = ''; 288 289 foreach ($Blocks as $Block) 290 { 291 if (isset($Block['hidden'])) 292 { 293 continue; 294 } 295 296 $markup .= "\n"; 297 $markup .= isset($Block['markup']) ? $Block['markup'] : $this->element($Block['element']); 298 } 299 300 $markup .= "\n"; 301 302 # ~ 303 304 return $markup; 305 } 306 307 protected function isBlockContinuable($Type) 308 { 309 return method_exists($this, 'block'.$Type.'Continue'); 310 } 311 312 protected function isBlockCompletable($Type) 313 { 314 return method_exists($this, 'block'.$Type.'Complete'); 315 } 316 317 # 318 # Code 319 320 protected function blockCode($Line, $Block = null) 321 { 322 if (isset($Block) and ! isset($Block['type']) and ! isset($Block['interrupted'])) 323 { 324 return; 325 } 326 327 if ($Line['indent'] >= 4) 328 { 329 $text = substr($Line['body'], 4); 330 331 $Block = array( 332 'element' => array( 333 'name' => 'pre', 334 'handler' => 'element', 335 'text' => array( 336 'name' => 'code', 337 'text' => $text, 338 ), 339 ), 340 ); 341 342 return $Block; 343 } 344 } 345 346 protected function blockCodeContinue($Line, $Block) 347 { 348 if ($Line['indent'] >= 4) 349 { 350 if (isset($Block['interrupted'])) 351 { 352 $Block['element']['text']['text'] .= "\n"; 353 354 unset($Block['interrupted']); 355 } 356 357 $Block['element']['text']['text'] .= "\n"; 358 359 $text = substr($Line['body'], 4); 360 361 $Block['element']['text']['text'] .= $text; 362 363 return $Block; 364 } 365 } 366 367 protected function blockCodeComplete($Block) 368 { 369 $text = $Block['element']['text']['text']; 370 371 $Block['element']['text']['text'] = $text; 372 373 return $Block; 374 } 375 376 # 377 # Comment 378 379 protected function blockComment($Line) 380 { 381 if ($this->markupEscaped or $this->safeMode) 382 { 383 return; 384 } 385 386 if (isset($Line['text'][3]) and $Line['text'][3] === '-' and $Line['text'][2] === '-' and $Line['text'][1] === '!') 387 { 388 $Block = array( 389 'markup' => $Line['body'], 390 ); 391 392 if (preg_match('/-->$/', $Line['text'])) 393 { 394 $Block['closed'] = true; 395 } 396 397 return $Block; 398 } 399 } 400 401 protected function blockCommentContinue($Line, array $Block) 402 { 403 if (isset($Block['closed'])) 404 { 405 return; 406 } 407 408 $Block['markup'] .= "\n" . $Line['body']; 409 410 if (preg_match('/-->$/', $Line['text'])) 411 { 412 $Block['closed'] = true; 413 } 414 415 return $Block; 416 } 417 418 # 419 # Fenced Code 420 421 protected function blockFencedCode($Line) 422 { 423 if (preg_match('/^['.$Line['text'][0].']{3,}[ ]*([^`]+)?[ ]*$/', $Line['text'], $matches)) 424 { 425 $Element = array( 426 'name' => 'code', 427 'text' => '', 428 ); 429 430 if (isset($matches[1])) 431 { 432 /** 433 * https://www.w3.org/TR/2011/WD-html5-20110525/elements.html#classes 434 * Every HTML element may have a class attribute specified. 435 * The attribute, if specified, must have a value that is a set 436 * of space-separated tokens representing the various classes 437 * that the element belongs to. 438 * [...] 439 * The space characters, for the purposes of this specification, 440 * are U+0020 SPACE, U+0009 CHARACTER TABULATION (tab), 441 * U+000A LINE FEED (LF), U+000C FORM FEED (FF), and 442 * U+000D CARRIAGE RETURN (CR). 443 */ 444 $language = substr($matches[1], 0, strcspn($matches[1], " \t\n\f\r")); 445 446 $class = 'language-'.$language; 447 448 $Element['attributes'] = array( 449 'class' => $class, 450 ); 451 } 452 453 $Block = array( 454 'char' => $Line['text'][0], 455 'element' => array( 456 'name' => 'pre', 457 'handler' => 'element', 458 'text' => $Element, 459 ), 460 ); 461 462 return $Block; 463 } 464 } 465 466 protected function blockFencedCodeContinue($Line, $Block) 467 { 468 if (isset($Block['complete'])) 469 { 470 return; 471 } 472 473 if (isset($Block['interrupted'])) 474 { 475 $Block['element']['text']['text'] .= "\n"; 476 477 unset($Block['interrupted']); 478 } 479 480 if (preg_match('/^'.$Block['char'].'{3,}[ ]*$/', $Line['text'])) 481 { 482 $Block['element']['text']['text'] = substr($Block['element']['text']['text'], 1); 483 484 $Block['complete'] = true; 485 486 return $Block; 487 } 488 489 $Block['element']['text']['text'] .= "\n".$Line['body']; 490 491 return $Block; 492 } 493 494 protected function blockFencedCodeComplete($Block) 495 { 496 $text = $Block['element']['text']['text']; 497 498 $Block['element']['text']['text'] = $text; 499 500 return $Block; 501 } 502 503 # 504 # Header 505 506 protected function blockHeader($Line) 507 { 508 if (isset($Line['text'][1])) 509 { 510 $level = 1; 511 512 while (isset($Line['text'][$level]) and $Line['text'][$level] === '#') 513 { 514 $level ++; 515 } 516 517 if ($level > 6) 518 { 519 return; 520 } 521 522 $text = trim($Line['text'], '# '); 523 524 $Block = array( 525 'element' => array( 526 'name' => 'h' . min(6, $level), 527 'text' => $text, 528 'handler' => 'line', 529 ), 530 ); 531 532 return $Block; 533 } 534 } 535 536 # 537 # List 538 539 protected function blockList($Line) 540 { 541 list($name, $pattern) = $Line['text'][0] <= '-' ? array('ul', '[*+-]') : array('ol', '[0-9]+[.]'); 542 543 if (preg_match('/^('.$pattern.'[ ]+)(.*)/', $Line['text'], $matches)) 544 { 545 $Block = array( 546 'indent' => $Line['indent'], 547 'pattern' => $pattern, 548 'element' => array( 549 'name' => $name, 550 'handler' => 'elements', 551 ), 552 ); 553 554 if($name === 'ol') 555 { 556 $listStart = stristr($matches[0], '.', true); 557 558 if($listStart !== '1') 559 { 560 $Block['element']['attributes'] = array('start' => $listStart); 561 } 562 } 563 564 $Block['li'] = array( 565 'name' => 'li', 566 'handler' => 'li', 567 'text' => array( 568 $matches[2], 569 ), 570 ); 571 572 $Block['element']['text'] []= & $Block['li']; 573 574 return $Block; 575 } 576 } 577 578 protected function blockListContinue($Line, array $Block) 579 { 580 if ($Block['indent'] === $Line['indent'] and preg_match('/^'.$Block['pattern'].'(?:[ ]+(.*)|$)/', $Line['text'], $matches)) 581 { 582 if (isset($Block['interrupted'])) 583 { 584 $Block['li']['text'] []= ''; 585 586 $Block['loose'] = true; 587 588 unset($Block['interrupted']); 589 } 590 591 unset($Block['li']); 592 593 $text = isset($matches[1]) ? $matches[1] : ''; 594 595 $Block['li'] = array( 596 'name' => 'li', 597 'handler' => 'li', 598 'text' => array( 599 $text, 600 ), 601 ); 602 603 $Block['element']['text'] []= & $Block['li']; 604 605 return $Block; 606 } 607 608 if ($Line['text'][0] === '[' and $this->blockReference($Line)) 609 { 610 return $Block; 611 } 612 613 if ( ! isset($Block['interrupted'])) 614 { 615 $text = preg_replace('/^[ ]{0,4}/', '', $Line['body']); 616 617 $Block['li']['text'] []= $text; 618 619 return $Block; 620 } 621 622 if ($Line['indent'] > 0) 623 { 624 $Block['li']['text'] []= ''; 625 626 $text = preg_replace('/^[ ]{0,4}/', '', $Line['body']); 627 628 $Block['li']['text'] []= $text; 629 630 unset($Block['interrupted']); 631 632 return $Block; 633 } 634 } 635 636 protected function blockListComplete(array $Block) 637 { 638 if (isset($Block['loose'])) 639 { 640 foreach ($Block['element']['text'] as &$li) 641 { 642 if (end($li['text']) !== '') 643 { 644 $li['text'] []= ''; 645 } 646 } 647 } 648 649 return $Block; 650 } 651 652 # 653 # Quote 654 655 protected function blockQuote($Line) 656 { 657 if (preg_match('/^>[ ]?(.*)/', $Line['text'], $matches)) 658 { 659 $Block = array( 660 'element' => array( 661 'name' => 'blockquote', 662 'handler' => 'lines', 663 'text' => (array) $matches[1], 664 ), 665 ); 666 667 return $Block; 668 } 669 } 670 671 protected function blockQuoteContinue($Line, array $Block) 672 { 673 if ($Line['text'][0] === '>' and preg_match('/^>[ ]?(.*)/', $Line['text'], $matches)) 674 { 675 if (isset($Block['interrupted'])) 676 { 677 $Block['element']['text'] []= ''; 678 679 unset($Block['interrupted']); 680 } 681 682 $Block['element']['text'] []= $matches[1]; 683 684 return $Block; 685 } 686 687 if ( ! isset($Block['interrupted'])) 688 { 689 $Block['element']['text'] []= $Line['text']; 690 691 return $Block; 692 } 693 } 694 695 # 696 # Rule 697 698 protected function blockRule($Line) 699 { 700 if (preg_match('/^(['.$Line['text'][0].'])([ ]*\1){2,}[ ]*$/', $Line['text'])) 701 { 702 $Block = array( 703 'element' => array( 704 'name' => 'hr' 705 ), 706 ); 707 708 return $Block; 709 } 710 } 711 712 # 713 # Setext 714 715 protected function blockSetextHeader($Line, array $Block = null) 716 { 717 if ( ! isset($Block) or isset($Block['type']) or isset($Block['interrupted'])) 718 { 719 return; 720 } 721 722 if (chop($Line['text'], $Line['text'][0]) === '') 723 { 724 $Block['element']['name'] = $Line['text'][0] === '=' ? 'h1' : 'h2'; 725 726 return $Block; 727 } 728 } 729 730 # 731 # Markup 732 733 protected function blockMarkup($Line) 734 { 735 if ($this->markupEscaped or $this->safeMode) 736 { 737 return; 738 } 739 740 if (preg_match('/^<(\w[\w-]*)(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*(\/)?>/', $Line['text'], $matches)) 741 { 742 $element = strtolower($matches[1]); 743 744 if (in_array($element, $this->textLevelElements)) 745 { 746 return; 747 } 748 749 $Block = array( 750 'name' => $matches[1], 751 'depth' => 0, 752 'markup' => $Line['text'], 753 ); 754 755 $length = strlen($matches[0]); 756 757 $remainder = substr($Line['text'], $length); 758 759 if (trim($remainder) === '') 760 { 761 if (isset($matches[2]) or in_array($matches[1], $this->voidElements)) 762 { 763 $Block['closed'] = true; 764 765 $Block['void'] = true; 766 } 767 } 768 else 769 { 770 if (isset($matches[2]) or in_array($matches[1], $this->voidElements)) 771 { 772 return; 773 } 774 775 if (preg_match('/<\/'.$matches[1].'>[ ]*$/i', $remainder)) 776 { 777 $Block['closed'] = true; 778 } 779 } 780 781 return $Block; 782 } 783 } 784 785 protected function blockMarkupContinue($Line, array $Block) 786 { 787 if (isset($Block['closed'])) 788 { 789 return; 790 } 791 792 if (preg_match('/^<'.$Block['name'].'(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*>/i', $Line['text'])) # open 793 { 794 $Block['depth'] ++; 795 } 796 797 if (preg_match('/(.*?)<\/'.$Block['name'].'>[ ]*$/i', $Line['text'], $matches)) # close 798 { 799 if ($Block['depth'] > 0) 800 { 801 $Block['depth'] --; 802 } 803 else 804 { 805 $Block['closed'] = true; 806 } 807 } 808 809 if (isset($Block['interrupted'])) 810 { 811 $Block['markup'] .= "\n"; 812 813 unset($Block['interrupted']); 814 } 815 816 $Block['markup'] .= "\n".$Line['body']; 817 818 return $Block; 819 } 820 821 # 822 # Reference 823 824 protected function blockReference($Line) 825 { 826 if (preg_match('/^\[(.+?)\]:[ ]*<?(\S+?)>?(?:[ ]+["\'(](.+)["\')])?[ ]*$/', $Line['text'], $matches)) 827 { 828 $id = strtolower($matches[1]); 829 830 $Data = array( 831 'url' => $matches[2], 832 'title' => null, 833 ); 834 835 if (isset($matches[3])) 836 { 837 $Data['title'] = $matches[3]; 838 } 839 840 $this->DefinitionData['Reference'][$id] = $Data; 841 842 $Block = array( 843 'hidden' => true, 844 ); 845 846 return $Block; 847 } 848 } 849 850 # 851 # Table 852 853 protected function blockTable($Line, array $Block = null) 854 { 855 if ( ! isset($Block) or isset($Block['type']) or isset($Block['interrupted'])) 856 { 857 return; 858 } 859 860 if (strpos($Block['element']['text'], '|') !== false and chop($Line['text'], ' -:|') === '') 861 { 862 $alignments = array(); 863 864 $divider = $Line['text']; 865 866 $divider = trim($divider); 867 $divider = trim($divider, '|'); 868 869 $dividerCells = explode('|', $divider); 870 871 foreach ($dividerCells as $dividerCell) 872 { 873 $dividerCell = trim($dividerCell); 874 875 if ($dividerCell === '') 876 { 877 continue; 878 } 879 880 $alignment = null; 881 882 if ($dividerCell[0] === ':') 883 { 884 $alignment = 'left'; 885 } 886 887 if (substr($dividerCell, - 1) === ':') 888 { 889 $alignment = $alignment === 'left' ? 'center' : 'right'; 890 } 891 892 $alignments []= $alignment; 893 } 894 895 # ~ 896 897 $HeaderElements = array(); 898 899 $header = $Block['element']['text']; 900 901 $header = trim($header); 902 $header = trim($header, '|'); 903 904 $headerCells = explode('|', $header); 905 906 foreach ($headerCells as $index => $headerCell) 907 { 908 $headerCell = trim($headerCell); 909 910 $HeaderElement = array( 911 'name' => 'th', 912 'text' => $headerCell, 913 'handler' => 'line', 914 ); 915 916 if (isset($alignments[$index])) 917 { 918 $alignment = $alignments[$index]; 919 920 $HeaderElement['attributes'] = array( 921 'style' => 'text-align: '.$alignment.';', 922 ); 923 } 924 925 $HeaderElements []= $HeaderElement; 926 } 927 928 # ~ 929 930 $Block = array( 931 'alignments' => $alignments, 932 'identified' => true, 933 'element' => array( 934 'name' => 'table', 935 'handler' => 'elements', 936 ), 937 ); 938 939 $Block['element']['text'] []= array( 940 'name' => 'thead', 941 'handler' => 'elements', 942 ); 943 944 $Block['element']['text'] []= array( 945 'name' => 'tbody', 946 'handler' => 'elements', 947 'text' => array(), 948 ); 949 950 $Block['element']['text'][0]['text'] []= array( 951 'name' => 'tr', 952 'handler' => 'elements', 953 'text' => $HeaderElements, 954 ); 955 956 return $Block; 957 } 958 } 959 960 protected function blockTableContinue($Line, array $Block) 961 { 962 if (isset($Block['interrupted'])) 963 { 964 return; 965 } 966 967 if ($Line['text'][0] === '|' or strpos($Line['text'], '|')) 968 { 969 $Elements = array(); 970 971 $row = $Line['text']; 972 973 $row = trim($row); 974 $row = trim($row, '|'); 975 976 preg_match_all('/(?:(\\\\[|])|[^|`]|`[^`]+`|`)+/', $row, $matches); 977 978 foreach ($matches[0] as $index => $cell) 979 { 980 $cell = trim($cell); 981 982 $Element = array( 983 'name' => 'td', 984 'handler' => 'line', 985 'text' => $cell, 986 ); 987 988 if (isset($Block['alignments'][$index])) 989 { 990 $Element['attributes'] = array( 991 'style' => 'text-align: '.$Block['alignments'][$index].';', 992 ); 993 } 994 995 $Elements []= $Element; 996 } 997 998 $Element = array( 999 'name' => 'tr', 1000 'handler' => 'elements', 1001 'text' => $Elements, 1002 ); 1003 1004 $Block['element']['text'][1]['text'] []= $Element; 1005 1006 return $Block; 1007 } 1008 } 1009 1010 # 1011 # ~ 1012 # 1013 1014 protected function paragraph($Line) 1015 { 1016 $Block = array( 1017 'element' => array( 1018 'name' => 'p', 1019 'text' => $Line['text'], 1020 'handler' => 'line', 1021 ), 1022 ); 1023 1024 return $Block; 1025 } 1026 1027 # 1028 # Inline Elements 1029 # 1030 1031 protected $InlineTypes = array( 1032 '"' => array('SpecialCharacter'), 1033 '!' => array('Image'), 1034 '&' => array('SpecialCharacter'), 1035 '*' => array('Emphasis'), 1036 ':' => array('Url'), 1037 '<' => array('UrlTag', 'EmailTag', 'Markup', 'SpecialCharacter'), 1038 '>' => array('SpecialCharacter'), 1039 '[' => array('Link'), 1040 '_' => array('Emphasis'), 1041 '`' => array('Code'), 1042 '~' => array('Strikethrough'), 1043 '\\' => array('EscapeSequence'), 1044 ); 1045 1046 # ~ 1047 1048 protected $inlineMarkerList = '!"*_&[:<>`~\\'; 1049 1050 # 1051 # ~ 1052 # 1053 1054 public function line($text, $nonNestables=array()) 1055 { 1056 $markup = ''; 1057 1058 # $excerpt is based on the first occurrence of a marker 1059 1060 while ($excerpt = strpbrk($text, $this->inlineMarkerList)) 1061 { 1062 $marker = $excerpt[0]; 1063 1064 $markerPosition = strpos($text, $marker); 1065 1066 $Excerpt = array('text' => $excerpt, 'context' => $text); 1067 1068 foreach ($this->InlineTypes[$marker] as $inlineType) 1069 { 1070 # check to see if the current inline type is nestable in the current context 1071 1072 if ( ! empty($nonNestables) and in_array($inlineType, $nonNestables)) 1073 { 1074 continue; 1075 } 1076 1077 $Inline = $this->{'inline'.$inlineType}($Excerpt); 1078 1079 if ( ! isset($Inline)) 1080 { 1081 continue; 1082 } 1083 1084 # makes sure that the inline belongs to "our" marker 1085 1086 if (isset($Inline['position']) and $Inline['position'] > $markerPosition) 1087 { 1088 continue; 1089 } 1090 1091 # sets a default inline position 1092 1093 if ( ! isset($Inline['position'])) 1094 { 1095 $Inline['position'] = $markerPosition; 1096 } 1097 1098 # cause the new element to 'inherit' our non nestables 1099 1100 foreach ($nonNestables as $non_nestable) 1101 { 1102 $Inline['element']['nonNestables'][] = $non_nestable; 1103 } 1104 1105 # the text that comes before the inline 1106 $unmarkedText = substr($text, 0, $Inline['position']); 1107 1108 # compile the unmarked text 1109 $markup .= $this->unmarkedText($unmarkedText); 1110 1111 # compile the inline 1112 $markup .= isset($Inline['markup']) ? $Inline['markup'] : $this->element($Inline['element']); 1113 1114 # remove the examined text 1115 $text = substr($text, $Inline['position'] + $Inline['extent']); 1116 1117 continue 2; 1118 } 1119 1120 # the marker does not belong to an inline 1121 1122 $unmarkedText = substr($text, 0, $markerPosition + 1); 1123 1124 $markup .= $this->unmarkedText($unmarkedText); 1125 1126 $text = substr($text, $markerPosition + 1); 1127 } 1128 1129 $markup .= $this->unmarkedText($text); 1130 1131 return $markup; 1132 } 1133 1134 # 1135 # ~ 1136 # 1137 1138 protected function inlineCode($Excerpt) 1139 { 1140 $marker = $Excerpt['text'][0]; 1141 1142 if (preg_match('/^('.$marker.'+)[ ]*(.+?)[ ]*(?<!'.$marker.')\1(?!'.$marker.')/s', $Excerpt['text'], $matches)) 1143 { 1144 $text = $matches[2]; 1145 $text = preg_replace("/[ ]*\n/", ' ', $text); 1146 1147 return array( 1148 'extent' => strlen($matches[0]), 1149 'element' => array( 1150 'name' => 'code', 1151 'text' => $text, 1152 ), 1153 ); 1154 } 1155 } 1156 1157 protected function inlineEmailTag($Excerpt) 1158 { 1159 if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<((mailto:)?\S+?@\S+?)>/i', $Excerpt['text'], $matches)) 1160 { 1161 $url = $matches[1]; 1162 1163 if ( ! isset($matches[2])) 1164 { 1165 $url = 'mailto:' . $url; 1166 } 1167 1168 return array( 1169 'extent' => strlen($matches[0]), 1170 'element' => array( 1171 'name' => 'a', 1172 'text' => $matches[1], 1173 'attributes' => array( 1174 'href' => $url, 1175 ), 1176 ), 1177 ); 1178 } 1179 } 1180 1181 protected function inlineEmphasis($Excerpt) 1182 { 1183 if ( ! isset($Excerpt['text'][1])) 1184 { 1185 return; 1186 } 1187 1188 $marker = $Excerpt['text'][0]; 1189 1190 if ($Excerpt['text'][1] === $marker and preg_match($this->StrongRegex[$marker], $Excerpt['text'], $matches)) 1191 { 1192 $emphasis = 'strong'; 1193 } 1194 elseif (preg_match($this->EmRegex[$marker], $Excerpt['text'], $matches)) 1195 { 1196 $emphasis = 'em'; 1197 } 1198 else 1199 { 1200 return; 1201 } 1202 1203 return array( 1204 'extent' => strlen($matches[0]), 1205 'element' => array( 1206 'name' => $emphasis, 1207 'handler' => 'line', 1208 'text' => $matches[1], 1209 ), 1210 ); 1211 } 1212 1213 protected function inlineEscapeSequence($Excerpt) 1214 { 1215 if (isset($Excerpt['text'][1]) and in_array($Excerpt['text'][1], $this->specialCharacters)) 1216 { 1217 return array( 1218 'markup' => $Excerpt['text'][1], 1219 'extent' => 2, 1220 ); 1221 } 1222 } 1223 1224 protected function inlineImage($Excerpt) 1225 { 1226 if ( ! isset($Excerpt['text'][1]) or $Excerpt['text'][1] !== '[') 1227 { 1228 return; 1229 } 1230 1231 $Excerpt['text']= substr($Excerpt['text'], 1); 1232 1233 $Link = $this->inlineLink($Excerpt); 1234 1235 if ($Link === null) 1236 { 1237 return; 1238 } 1239 1240 $Inline = array( 1241 'extent' => $Link['extent'] + 1, 1242 'element' => array( 1243 'name' => 'img', 1244 'attributes' => array( 1245 'src' => $Link['element']['attributes']['href'], 1246 'alt' => $Link['element']['text'], 1247 ), 1248 ), 1249 ); 1250 1251 $Inline['element']['attributes'] += $Link['element']['attributes']; 1252 1253 unset($Inline['element']['attributes']['href']); 1254 1255 return $Inline; 1256 } 1257 1258 protected function inlineLink($Excerpt) 1259 { 1260 $Element = array( 1261 'name' => 'a', 1262 'handler' => 'line', 1263 'nonNestables' => array('Url', 'Link'), 1264 'text' => null, 1265 'attributes' => array( 1266 'href' => null, 1267 'title' => null, 1268 ), 1269 ); 1270 1271 $extent = 0; 1272 1273 $remainder = $Excerpt['text']; 1274 1275 if (preg_match('/\[((?:[^][]++|(?R))*+)\]/', $remainder, $matches)) 1276 { 1277 $Element['text'] = $matches[1]; 1278 1279 $extent += strlen($matches[0]); 1280 1281 $remainder = substr($remainder, $extent); 1282 } 1283 else 1284 { 1285 return; 1286 } 1287 1288 if (preg_match('/^[(]\s*+((?:[^ ()]++|[(][^ )]+[)])++)(?:[ ]+("[^"]*"|\'[^\']*\'))?\s*[)]/', $remainder, $matches)) 1289 { 1290 $Element['attributes']['href'] = $matches[1]; 1291 1292 if (isset($matches[2])) 1293 { 1294 $Element['attributes']['title'] = substr($matches[2], 1, - 1); 1295 } 1296 1297 $extent += strlen($matches[0]); 1298 } 1299 else 1300 { 1301 if (preg_match('/^\s*\[(.*?)\]/', $remainder, $matches)) 1302 { 1303 $definition = strlen($matches[1]) ? $matches[1] : $Element['text']; 1304 $definition = strtolower($definition); 1305 1306 $extent += strlen($matches[0]); 1307 } 1308 else 1309 { 1310 $definition = strtolower($Element['text']); 1311 } 1312 1313 if ( ! isset($this->DefinitionData['Reference'][$definition])) 1314 { 1315 return; 1316 } 1317 1318 $Definition = $this->DefinitionData['Reference'][$definition]; 1319 1320 $Element['attributes']['href'] = $Definition['url']; 1321 $Element['attributes']['title'] = $Definition['title']; 1322 } 1323 1324 return array( 1325 'extent' => $extent, 1326 'element' => $Element, 1327 ); 1328 } 1329 1330 protected function inlineMarkup($Excerpt) 1331 { 1332 if ($this->markupEscaped or $this->safeMode or strpos($Excerpt['text'], '>') === false) 1333 { 1334 return; 1335 } 1336 1337 if ($Excerpt['text'][1] === '/' and preg_match('/^<\/\w[\w-]*[ ]*>/s', $Excerpt['text'], $matches)) 1338 { 1339 return array( 1340 'markup' => $matches[0], 1341 'extent' => strlen($matches[0]), 1342 ); 1343 } 1344 1345 if ($Excerpt['text'][1] === '!' and preg_match('/^<!---?[^>-](?:-?[^-])*-->/s', $Excerpt['text'], $matches)) 1346 { 1347 return array( 1348 'markup' => $matches[0], 1349 'extent' => strlen($matches[0]), 1350 ); 1351 } 1352 1353 if ($Excerpt['text'][1] !== ' ' and preg_match('/^<\w[\w-]*(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*\/?>/s', $Excerpt['text'], $matches)) 1354 { 1355 return array( 1356 'markup' => $matches[0], 1357 'extent' => strlen($matches[0]), 1358 ); 1359 } 1360 } 1361 1362 protected function inlineSpecialCharacter($Excerpt) 1363 { 1364 if ($Excerpt['text'][0] === '&' and ! preg_match('/^&#?\w+;/', $Excerpt['text'])) 1365 { 1366 return array( 1367 'markup' => '&', 1368 'extent' => 1, 1369 ); 1370 } 1371 1372 $SpecialCharacter = array('>' => 'gt', '<' => 'lt', '"' => 'quot'); 1373 1374 if (isset($SpecialCharacter[$Excerpt['text'][0]])) 1375 { 1376 return array( 1377 'markup' => '&'.$SpecialCharacter[$Excerpt['text'][0]].';', 1378 'extent' => 1, 1379 ); 1380 } 1381 } 1382 1383 protected function inlineStrikethrough($Excerpt) 1384 { 1385 if ( ! isset($Excerpt['text'][1])) 1386 { 1387 return; 1388 } 1389 1390 if ($Excerpt['text'][1] === '~' and preg_match('/^~~(?=\S)(.+?)(?<=\S)~~/', $Excerpt['text'], $matches)) 1391 { 1392 return array( 1393 'extent' => strlen($matches[0]), 1394 'element' => array( 1395 'name' => 'del', 1396 'text' => $matches[1], 1397 'handler' => 'line', 1398 ), 1399 ); 1400 } 1401 } 1402 1403 protected function inlineUrl($Excerpt) 1404 { 1405 if ($this->urlsLinked !== true or ! isset($Excerpt['text'][2]) or $Excerpt['text'][2] !== '/') 1406 { 1407 return; 1408 } 1409 1410 if (preg_match('/\bhttps?:[\/]{2}[^\s<]+\b\/*/ui', $Excerpt['context'], $matches, PREG_OFFSET_CAPTURE)) 1411 { 1412 $url = $matches[0][0]; 1413 1414 $Inline = array( 1415 'extent' => strlen($matches[0][0]), 1416 'position' => $matches[0][1], 1417 'element' => array( 1418 'name' => 'a', 1419 'text' => $url, 1420 'attributes' => array( 1421 'href' => $url, 1422 ), 1423 ), 1424 ); 1425 1426 return $Inline; 1427 } 1428 } 1429 1430 protected function inlineUrlTag($Excerpt) 1431 { 1432 if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<(\w+:\/{2}[^ >]+)>/i', $Excerpt['text'], $matches)) 1433 { 1434 $url = $matches[1]; 1435 1436 return array( 1437 'extent' => strlen($matches[0]), 1438 'element' => array( 1439 'name' => 'a', 1440 'text' => $url, 1441 'attributes' => array( 1442 'href' => $url, 1443 ), 1444 ), 1445 ); 1446 } 1447 } 1448 1449 # ~ 1450 1451 protected function unmarkedText($text) 1452 { 1453 if ($this->breaksEnabled) 1454 { 1455 $text = preg_replace('/[ ]*\n/', "<br />\n", $text); 1456 } 1457 else 1458 { 1459 $text = preg_replace('/(?:[ ][ ]+|[ ]*\\\\)\n/', "<br />\n", $text); 1460 $text = str_replace(" \n", "\n", $text); 1461 } 1462 1463 return $text; 1464 } 1465 1466 # 1467 # Handlers 1468 # 1469 1470 protected function element(array $Element) 1471 { 1472 if ($this->safeMode) 1473 { 1474 $Element = $this->sanitiseElement($Element); 1475 } 1476 1477 $markup = '<'.$Element['name']; 1478 1479 if (isset($Element['attributes'])) 1480 { 1481 foreach ($Element['attributes'] as $name => $value) 1482 { 1483 if ($value === null) 1484 { 1485 continue; 1486 } 1487 1488 $markup .= ' '.$name.'="'.self::escape($value).'"'; 1489 } 1490 } 1491 1492 if (isset($Element['text'])) 1493 { 1494 $markup .= '>'; 1495 1496 if (!isset($Element['nonNestables'])) 1497 { 1498 $Element['nonNestables'] = array(); 1499 } 1500 1501 if (isset($Element['handler'])) 1502 { 1503 $markup .= $this->{$Element['handler']}($Element['text'], $Element['nonNestables']); 1504 } 1505 else 1506 { 1507 $markup .= self::escape($Element['text'], true); 1508 } 1509 1510 $markup .= '</'.$Element['name'].'>'; 1511 } 1512 else 1513 { 1514 $markup .= ' />'; 1515 } 1516 1517 return $markup; 1518 } 1519 1520 protected function elements(array $Elements) 1521 { 1522 $markup = ''; 1523 1524 foreach ($Elements as $Element) 1525 { 1526 $markup .= "\n" . $this->element($Element); 1527 } 1528 1529 $markup .= "\n"; 1530 1531 return $markup; 1532 } 1533 1534 # ~ 1535 1536 protected function li($lines) 1537 { 1538 $markup = $this->lines($lines); 1539 1540 $trimmedMarkup = trim($markup); 1541 1542 if ( ! in_array('', $lines) and substr($trimmedMarkup, 0, 3) === '<p>') 1543 { 1544 $markup = $trimmedMarkup; 1545 $markup = substr($markup, 3); 1546 1547 $position = strpos($markup, "</p>"); 1548 1549 $markup = substr_replace($markup, '', $position, 4); 1550 } 1551 1552 return $markup; 1553 } 1554 1555 # 1556 # Deprecated Methods 1557 # 1558 1559 function parse($text) 1560 { 1561 $markup = $this->text($text); 1562 1563 return $markup; 1564 } 1565 1566 protected function sanitiseElement(array $Element) 1567 { 1568 static $goodAttribute = '/^[a-zA-Z0-9][a-zA-Z0-9-_]*+$/'; 1569 static $safeUrlNameToAtt = array( 1570 'a' => 'href', 1571 'img' => 'src', 1572 ); 1573 1574 if (isset($safeUrlNameToAtt[$Element['name']])) 1575 { 1576 $Element = $this->filterUnsafeUrlInAttribute($Element, $safeUrlNameToAtt[$Element['name']]); 1577 } 1578 1579 if ( ! empty($Element['attributes'])) 1580 { 1581 foreach ($Element['attributes'] as $att => $val) 1582 { 1583 # filter out badly parsed attribute 1584 if ( ! preg_match($goodAttribute, $att)) 1585 { 1586 unset($Element['attributes'][$att]); 1587 } 1588 # dump onevent attribute 1589 elseif (self::striAtStart($att, 'on')) 1590 { 1591 unset($Element['attributes'][$att]); 1592 } 1593 } 1594 } 1595 1596 return $Element; 1597 } 1598 1599 protected function filterUnsafeUrlInAttribute(array $Element, $attribute) 1600 { 1601 foreach ($this->safeLinksWhitelist as $scheme) 1602 { 1603 if (self::striAtStart($Element['attributes'][$attribute], $scheme)) 1604 { 1605 return $Element; 1606 } 1607 } 1608 1609 $Element['attributes'][$attribute] = str_replace(':', '%3A', $Element['attributes'][$attribute]); 1610 1611 return $Element; 1612 } 1613 1614 # 1615 # Static Methods 1616 # 1617 1618 protected static function escape($text, $allowQuotes = false) 1619 { 1620 return htmlspecialchars($text, $allowQuotes ? ENT_NOQUOTES : ENT_QUOTES, 'UTF-8'); 1621 } 1622 1623 protected static function striAtStart($string, $needle) 1624 { 1625 $len = strlen($needle); 1626 1627 if ($len > strlen($string)) 1628 { 1629 return false; 1630 } 1631 else 1632 { 1633 return strtolower(substr($string, 0, $len)) === strtolower($needle); 1634 } 1635 } 1636 1637 static function instance($name = 'default') 1638 { 1639 if (isset(self::$instances[$name])) 1640 { 1641 return self::$instances[$name]; 1642 } 1643 1644 $instance = new static(); 1645 1646 self::$instances[$name] = $instance; 1647 1648 return $instance; 1649 } 1650 1651 private static $instances = array(); 1652 1653 # 1654 # Fields 1655 # 1656 1657 protected $DefinitionData; 1658 1659 # 1660 # Read-Only 1661 1662 protected $specialCharacters = array( 1663 '\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '>', '#', '+', '-', '.', '!', '|', 1664 ); 1665 1666 protected $StrongRegex = array( 1667 '*' => '/^[*]{2}((?:\\\\\*|[^*]|[*][^*]*[*])+?)[*]{2}(?![*])/s', 1668 '_' => '/^__((?:\\\\_|[^_]|_[^_]*_)+?)__(?!_)/us', 1669 ); 1670 1671 protected $EmRegex = array( 1672 '*' => '/^[*]((?:\\\\\*|[^*]|[*][*][^*]+?[*][*])+?)[*](?![*])/s', 1673 '_' => '/^_((?:\\\\_|[^_]|__[^_]*__)+?)_(?!_)\b/us', 1674 ); 1675 1676 protected $regexHtmlAttribute = '[a-zA-Z_:][\w:.-]*(?:\s*=\s*(?:[^"\'=<>`\s]+|"[^"]*"|\'[^\']*\'))?'; 1677 1678 protected $voidElements = array( 1679 'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 1680 ); 1681 1682 protected $textLevelElements = array( 1683 'a', 'br', 'bdo', 'abbr', 'blink', 'nextid', 'acronym', 'basefont', 1684 'b', 'em', 'big', 'cite', 'small', 'spacer', 'listing', 1685 'i', 'rp', 'del', 'code', 'strike', 'marquee', 1686 'q', 'rt', 'ins', 'font', 'strong', 1687 's', 'tt', 'kbd', 'mark', 1688 'u', 'xm', 'sub', 'nobr', 1689 'sup', 'ruby', 1690 'var', 'span', 1691 'wbr', 'time', 1692 ); 1693} 1694