1<?php 2 3/* 4 * This file is part of the league/commonmark package. 5 * 6 * (c) Colin O'Dell <colinodell@gmail.com> 7 * 8 * Original code based on the CommonMark JS reference parser (https://bitly.com/commonmark-js) 9 * - (c) John MacFarlane 10 * 11 * For the full copyright and license information, please view the LICENSE 12 * file that was distributed with this source code. 13 */ 14 15namespace League\CommonMark; 16 17use League\CommonMark\Block\Element\AbstractStringContainerBlock; 18use League\CommonMark\Delimiter\Delimiter; 19use League\CommonMark\Delimiter\Processor\DelimiterProcessorInterface; 20use League\CommonMark\Inline\AdjacentTextMerger; 21use League\CommonMark\Inline\Element\Text; 22use League\CommonMark\Node\Node; 23use League\CommonMark\Reference\ReferenceMapInterface; 24use League\CommonMark\Util\RegexHelper; 25 26/** 27 * @internal 28 */ 29final class InlineParserEngine 30{ 31 /** @var EnvironmentInterface */ 32 protected $environment; 33 34 public function __construct(EnvironmentInterface $environment) 35 { 36 $this->environment = $environment; 37 } 38 39 /** 40 * @param AbstractStringContainerBlock $container 41 * @param ReferenceMapInterface $referenceMap 42 * 43 * @return void 44 */ 45 public function parse(AbstractStringContainerBlock $container, ReferenceMapInterface $referenceMap) 46 { 47 $inlineParserContext = new InlineParserContext($container, $referenceMap); 48 $cursor = $inlineParserContext->getCursor(); 49 while (($character = $cursor->getCharacter()) !== null) { 50 if (!$this->parseCharacter($character, $inlineParserContext)) { 51 $this->addPlainText($character, $container, $inlineParserContext); 52 } 53 } 54 55 $this->processInlines($inlineParserContext); 56 57 AdjacentTextMerger::mergeChildNodes($container); 58 } 59 60 /** 61 * @param string $character 62 * @param InlineParserContext $inlineParserContext 63 * 64 * @return bool Whether we successfully parsed a character at that position 65 */ 66 private function parseCharacter(string $character, InlineParserContext $inlineParserContext): bool 67 { 68 foreach ($this->environment->getInlineParsersForCharacter($character) as $parser) { 69 if ($parser->parse($inlineParserContext)) { 70 return true; 71 } 72 } 73 74 if ($delimiterProcessor = $this->environment->getDelimiterProcessors()->getDelimiterProcessor($character)) { 75 return $this->parseDelimiters($delimiterProcessor, $inlineParserContext); 76 } 77 78 return false; 79 } 80 81 private function parseDelimiters(DelimiterProcessorInterface $delimiterProcessor, InlineParserContext $inlineContext): bool 82 { 83 $cursor = $inlineContext->getCursor(); 84 $character = $cursor->getCharacter(); 85 $numDelims = 0; 86 87 $charBefore = $cursor->peek(-1); 88 if ($charBefore === null) { 89 $charBefore = "\n"; 90 } 91 92 while ($cursor->peek($numDelims) === $character) { 93 ++$numDelims; 94 } 95 96 if ($numDelims < $delimiterProcessor->getMinLength()) { 97 return false; 98 } 99 100 $cursor->advanceBy($numDelims); 101 102 $charAfter = $cursor->getCharacter(); 103 if ($charAfter === null) { 104 $charAfter = "\n"; 105 } 106 107 list($canOpen, $canClose) = self::determineCanOpenOrClose($charBefore, $charAfter, $character, $delimiterProcessor); 108 109 $node = new Text(\str_repeat($character, $numDelims), [ 110 'delim' => true, 111 ]); 112 $inlineContext->getContainer()->appendChild($node); 113 114 // Add entry to stack to this opener 115 if ($canOpen || $canClose) { 116 $delimiter = new Delimiter($character, $numDelims, $node, $canOpen, $canClose); 117 $inlineContext->getDelimiterStack()->push($delimiter); 118 } 119 120 return true; 121 } 122 123 /** 124 * @param InlineParserContext $inlineParserContext 125 * 126 * @return void 127 */ 128 private function processInlines(InlineParserContext $inlineParserContext) 129 { 130 $delimiterStack = $inlineParserContext->getDelimiterStack(); 131 $delimiterStack->processDelimiters(null, $this->environment->getDelimiterProcessors()); 132 133 // Remove all delimiters 134 $delimiterStack->removeAll(); 135 } 136 137 /** 138 * @param string $character 139 * @param Node $container 140 * @param InlineParserContext $inlineParserContext 141 * 142 * @return void 143 */ 144 private function addPlainText(string $character, Node $container, InlineParserContext $inlineParserContext) 145 { 146 // We reach here if none of the parsers can handle the input 147 // Attempt to match multiple non-special characters at once 148 $text = $inlineParserContext->getCursor()->match($this->environment->getInlineParserCharacterRegex()); 149 // This might fail if we're currently at a special character which wasn't parsed; if so, just add that character 150 if ($text === null) { 151 $inlineParserContext->getCursor()->advanceBy(1); 152 $text = $character; 153 } 154 155 $lastInline = $container->lastChild(); 156 if ($lastInline instanceof Text && !isset($lastInline->data['delim'])) { 157 $lastInline->append($text); 158 } else { 159 $container->appendChild(new Text($text)); 160 } 161 } 162 163 /** 164 * @param string $charBefore 165 * @param string $charAfter 166 * @param string $character 167 * @param DelimiterProcessorInterface $delimiterProcessor 168 * 169 * @return bool[] 170 */ 171 private static function determineCanOpenOrClose(string $charBefore, string $charAfter, string $character, DelimiterProcessorInterface $delimiterProcessor) 172 { 173 $afterIsWhitespace = \preg_match(RegexHelper::REGEX_UNICODE_WHITESPACE_CHAR, $charAfter); 174 $afterIsPunctuation = \preg_match(RegexHelper::REGEX_PUNCTUATION, $charAfter); 175 $beforeIsWhitespace = \preg_match(RegexHelper::REGEX_UNICODE_WHITESPACE_CHAR, $charBefore); 176 $beforeIsPunctuation = \preg_match(RegexHelper::REGEX_PUNCTUATION, $charBefore); 177 178 $leftFlanking = !$afterIsWhitespace && (!$afterIsPunctuation || $beforeIsWhitespace || $beforeIsPunctuation); 179 $rightFlanking = !$beforeIsWhitespace && (!$beforeIsPunctuation || $afterIsWhitespace || $afterIsPunctuation); 180 181 if ($character === '_') { 182 $canOpen = $leftFlanking && (!$rightFlanking || $beforeIsPunctuation); 183 $canClose = $rightFlanking && (!$leftFlanking || $afterIsPunctuation); 184 } else { 185 $canOpen = $leftFlanking && $character === $delimiterProcessor->getOpeningCharacter(); 186 $canClose = $rightFlanking && $character === $delimiterProcessor->getClosingCharacter(); 187 } 188 189 return [$canOpen, $canClose]; 190 } 191} 192