1<?php 2 3/* 4 * This file is part of the league/commonmark package. 5 * 6 * (c) Colin O'Dell <colinodell@gmail.com> 7 * 8 * Original code based on the CommonMark JS reference parser (https://bitly.com/commonmark-js) 9 * - (c) John MacFarlane 10 * 11 * For the full copyright and license information, please view the LICENSE 12 * file that was distributed with this source code. 13 */ 14 15namespace League\CommonMark\Util; 16 17use League\CommonMark\Cursor; 18 19final class LinkParserHelper 20{ 21 /** 22 * Attempt to parse link destination 23 * 24 * @param Cursor $cursor 25 * 26 * @return null|string The string, or null if no match 27 */ 28 public static function parseLinkDestination(Cursor $cursor): ?string 29 { 30 if ($res = $cursor->match(RegexHelper::REGEX_LINK_DESTINATION_BRACES)) { 31 // Chop off surrounding <..>: 32 return UrlEncoder::unescapeAndEncode( 33 RegexHelper::unescape(\substr($res, 1, -1)) 34 ); 35 } 36 37 if ($cursor->getCharacter() === '<') { 38 return null; 39 } 40 41 $destination = self::manuallyParseLinkDestination($cursor); 42 if ($destination === null) { 43 return null; 44 } 45 46 return UrlEncoder::unescapeAndEncode( 47 RegexHelper::unescape($destination) 48 ); 49 } 50 51 public static function parseLinkLabel(Cursor $cursor): int 52 { 53 $match = $cursor->match('/^\[(?:[^\\\\\[\]]|\\\\.){0,1000}\]/'); 54 if ($match === null) { 55 return 0; 56 } 57 58 $length = \mb_strlen($match, 'utf-8'); 59 60 if ($length > 1001) { 61 return 0; 62 } 63 64 return $length; 65 } 66 67 /** 68 * Attempt to parse link title (sans quotes) 69 * 70 * @param Cursor $cursor 71 * 72 * @return null|string The string, or null if no match 73 */ 74 public static function parseLinkTitle(Cursor $cursor): ?string 75 { 76 if ($title = $cursor->match('/' . RegexHelper::PARTIAL_LINK_TITLE . '/')) { 77 // Chop off quotes from title and unescape 78 return RegexHelper::unescape(\substr($title, 1, -1)); 79 } 80 81 return null; 82 } 83 84 private static function manuallyParseLinkDestination(Cursor $cursor): ?string 85 { 86 $oldPosition = $cursor->getPosition(); 87 $oldState = $cursor->saveState(); 88 89 $openParens = 0; 90 while (($c = $cursor->getCharacter()) !== null) { 91 if ($c === '\\' && $cursor->peek() !== null && RegexHelper::isEscapable($cursor->peek())) { 92 $cursor->advanceBy(2); 93 } elseif ($c === '(') { 94 $cursor->advanceBy(1); 95 $openParens++; 96 } elseif ($c === ')') { 97 if ($openParens < 1) { 98 break; 99 } 100 101 $cursor->advanceBy(1); 102 $openParens--; 103 } elseif (\preg_match(RegexHelper::REGEX_WHITESPACE_CHAR, $c)) { 104 break; 105 } else { 106 $cursor->advanceBy(1); 107 } 108 } 109 110 if ($openParens !== 0) { 111 return null; 112 } 113 114 if ($cursor->getPosition() === $oldPosition && $c !== ')') { 115 return null; 116 } 117 118 $newPos = $cursor->getPosition(); 119 $cursor->restoreState($oldState); 120 121 $cursor->advanceBy($newPos - $cursor->getPosition()); 122 123 return $cursor->getPreviousText(); 124 } 125} 126