tag from typical bodytext fields * are an example of this. * This class contains generic parsers for the most well-known types * which are default for most TYPO3 installations. Soft References can also be userdefined. * The Soft Reference parsers are used by the system to find these references and process them accordingly in import/export actions and copy operations. * * Example of usage * Soft References: * if ($conf['softref'] && (strong)$value !== '')) { // Check if a TCA configured field has softreferences defined (see TYPO3 Core API document) * $softRefs = \TYPO3\CMS\Backend\Utility\BackendUtility::explodeSoftRefParserList($conf['softref']); // Explode the list of softreferences/parameters * if ($softRefs !== FALSE) { // If there are soft references * foreach($softRefs as $spKey => $spParams) { // Traverse soft references * $softRefObj = \TYPO3\CMS\Backend\Utility\BackendUtility::softRefParserObj($spKey); // create / get object * if (is_object($softRefObj)) { // If there was an object returned...: * $resultArray = $softRefObj->findRef($table, $field, $uid, $softRefValue, $spKey, $spParams); // Do processing * * Result Array: * The Result array should contain two keys: "content" and "elements". * "content" is a string containing the input content but possibly with tokens inside. * Tokens are strings like {softref:[tokenID]} which is a placeholder for a value extracted by a softref parser * For each token there MUST be an entry in the "elements" key which has a "subst" key defining the tokenID and the tokenValue. See below. * "elements" is an array where the keys are insignificant, but the values are arrays with these keys: * "matchString" => The value of the match. This is only for informational purposes to show what was found. * "error" => An error message can be set here, like "file not found" etc. * "subst" => array( // If this array is found there MUST be a token in the output content as well! * "tokenID" => The tokenID string corresponding to the token in output content, {softref:[tokenID]}. This is typically an md5 hash of a string defining uniquely the position of the element. * "tokenValue" => The value that the token substitutes in the text. Basically, if this value is inserted instead of the token the content should match what was inputted originally. * "type" => file / db / string = the type of substitution. "file" means it is a relative file [automatically mapped], "db" means a database record reference [automatically mapped], "string" means it is manually modified string content (eg. an email address) * "relFileName" => (for "file" type): Relative filename. May not necessarily exist. This could be noticed in the error key. * "recordRef" => (for "db" type) : Reference to DB record on the form [table]:[uid]. May not necessarily exist. * "title" => Title of element (for backend information) * "description" => Description of element (for backend information) * ) */ /** * Class for processing of the default soft reference types for CMS: * * - 'substitute' : A full field value targeted for manual substitution (for import /export features) * - 'notify' : Just report if a value is found, nothing more. * - 'images' : HTML tags for RTE images * - 'typolink' : references to page id or file, possibly with anchor/target, possibly commaseparated list. * - 'typolink_tag' : As typolink, but searching for tag to encapsulate it. * - 'email' : Email highlight * - 'url' : URL highlights (with a scheme) */ class SoftReferenceIndex implements SingletonInterface { /** * @var string */ public $tokenID_basePrefix = ''; /** * @var EventDispatcherInterface */ protected $eventDispatcher; /** * @var int */ private $referenceUid = 0; /** * @var string */ private $referenceTable = ''; public function __construct(EventDispatcherInterface $eventDispatcher) { $this->eventDispatcher = $eventDispatcher; } /** * Main function through which all processing happens * * @param string $table Database table name * @param string $field Field name for which processing occurs * @param int $uid UID of the record * @param string $content The content/value of the field * @param string $spKey The softlink parser key. This is only interesting if more than one parser is grouped in the same class. That is the case with this parser. * @param array $spParams Parameters of the softlink parser. Basically this is the content inside optional []-brackets after the softref keys. Parameters are exploded by "; * @param string $structurePath If running from inside a FlexForm structure, this is the path of the tag. * @return array|bool|null Result array on positive matches, see description above. Otherwise FALSE or null */ public function findRef($table, $field, $uid, $content, $spKey, $spParams, $structurePath = '') { $this->referenceUid = $uid; $this->referenceTable = $table; $this->tokenID_basePrefix = $table . ':' . $uid . ':' . $field . ':' . $structurePath . ':' . $spKey; switch ($spKey) { case 'notify': // Simple notification $resultArray = [ 'elements' => [ [ 'matchString' => $content ] ] ]; $retVal = $resultArray; break; case 'substitute': $tokenID = $this->makeTokenID(); $resultArray = [ 'content' => '{softref:' . $tokenID . '}', 'elements' => [ [ 'matchString' => $content, 'subst' => [ 'type' => 'string', 'tokenID' => $tokenID, 'tokenValue' => $content ] ] ] ]; $retVal = $resultArray; break; case 'typolink': $retVal = $this->findRef_typolink($content, $spParams); break; case 'typolink_tag': $retVal = $this->findRef_typolink_tag($content); break; case 'ext_fileref': $retVal = $this->findRef_extension_fileref($content); break; case 'email': $retVal = $this->findRef_email($content, $spParams); break; case 'url': $retVal = $this->findRef_url($content, $spParams); break; default: $retVal = false; } $this->referenceUid = 0; $this->referenceTable = ''; return $retVal; } /** * TypoLink value processing. * Will process input value as a TypoLink value. * * @param string $content The input content to analyze * @param array $spParams Parameters set for the softref parser key in TCA/columns. value "linkList" will split the string by comma before processing. * @return array|null Result array on positive matches, see description above. Otherwise null * @see \TYPO3\CMS\Frontend\ContentObject::typolink() * @see getTypoLinkParts() */ public function findRef_typolink($content, $spParams) { // First, split the input string by a comma if the "linkList" parameter is set. // An example: the link field for images in content elements of type "textpic" or "image". This field CAN be configured to define a link per image, separated by comma. if (is_array($spParams) && in_array('linkList', $spParams)) { // Preserving whitespace on purpose. $linkElement = explode(',', $content); } else { // If only one element, just set in this array to make it easy below. $linkElement = [$content]; } // Traverse the links now: $elements = []; foreach ($linkElement as $k => $typolinkValue) { $tLP = $this->getTypoLinkParts($typolinkValue); $linkElement[$k] = $this->setTypoLinkPartsElement($tLP, $elements, $typolinkValue, $k); } // Return output: if (!empty($elements)) { $resultArray = [ 'content' => implode(',', $linkElement), 'elements' => $elements ]; return $resultArray; } return null; } /** * TypoLink tag processing. * Will search for and tags in the content string and process any found. * * @param string $content The input content to analyze * @return array|null Result array on positive matches, see description above. Otherwise null * @see \TYPO3\CMS\Frontend\ContentObject\ContentObjectRenderer::typolink() * @see getTypoLinkParts() */ public function findRef_typolink_tag($content) { // Parse string for special TYPO3 tag: $htmlParser = GeneralUtility::makeInstance(HtmlParser::class); $linkService = GeneralUtility::makeInstance(LinkService::class); $linkTags = $htmlParser->splitTags('a', $content); // Traverse result: $elements = []; foreach ($linkTags as $key => $foundValue) { if ($key % 2) { if (preg_match('/href="([^"]+)"/', $foundValue, $matches)) { try { $linkDetails = $linkService->resolve($matches[1]); if ($linkDetails['type'] === LinkService::TYPE_FILE && preg_match('/file\?uid=(\d+)/', $matches[1], $fileIdMatch)) { $token = $this->makeTokenID($key); $elements[$key]['matchString'] = $linkTags[$key]; $linkTags[$key] = str_replace($matches[1], '{softref:' . $token . '}', $linkTags[$key]); $elements[$key]['subst'] = [ 'type' => 'db', 'recordRef' => 'sys_file:' . $fileIdMatch[1], 'tokenID' => $token, 'tokenValue' => 'file:' . ($linkDetails['file'] instanceof File ? $linkDetails['file']->getUid() : $fileIdMatch[1]) ]; } elseif ($linkDetails['type'] === LinkService::TYPE_PAGE && preg_match('/page\?uid=(\d+)#?(\d+)?/', $matches[1], $pageAndAnchorMatches)) { $token = $this->makeTokenID($key); $content = '{softref:' . $token . '}'; $elements[$key]['matchString'] = $linkTags[$key]; $elements[$key]['subst'] = [ 'type' => 'db', 'recordRef' => 'pages:' . $linkDetails['pageuid'], 'tokenID' => $token, 'tokenValue' => $linkDetails['pageuid'] ]; if (isset($pageAndAnchorMatches[2]) && $pageAndAnchorMatches[2] !== '') { // Anchor is assumed to point to a content elements: if (MathUtility::canBeInterpretedAsInteger($pageAndAnchorMatches[2])) { // Initialize a new entry because we have a new relation: $newTokenID = $this->makeTokenID('setTypoLinkPartsElement:anchor:' . $key); $elements[$newTokenID . ':' . $key] = []; $elements[$newTokenID . ':' . $key]['matchString'] = 'Anchor Content Element: ' . $pageAndAnchorMatches[2]; $content .= '#{softref:' . $newTokenID . '}'; $elements[$newTokenID . ':' . $key]['subst'] = [ 'type' => 'db', 'recordRef' => 'tt_content:' . $pageAndAnchorMatches[2], 'tokenID' => $newTokenID, 'tokenValue' => $pageAndAnchorMatches[2] ]; } else { // Anchor is a hardcoded string $content .= '#' . $pageAndAnchorMatches[2]; } } $linkTags[$key] = str_replace($matches[1], $content, $linkTags[$key]); } elseif ($linkDetails['type'] === LinkService::TYPE_URL) { $token = $this->makeTokenID($key); $elements[$key]['matchString'] = $linkTags[$key]; $linkTags[$key] = str_replace($matches[1], '{softref:' . $token . '}', $linkTags[$key]); $elements[$key]['subst'] = [ 'type' => 'external', 'tokenID' => $token, 'tokenValue' => $linkDetails['url'] ]; } elseif ($linkDetails['type'] === LinkService::TYPE_EMAIL) { $token = $this->makeTokenID($key); $elements[$key]['matchString'] = $linkTags[$key]; $linkTags[$key] = str_replace($matches[1], '{softref:' . $token . '}', $linkTags[$key]); $elements[$key]['subst'] = [ 'type' => 'string', 'tokenID' => $token, 'tokenValue' => $linkDetails['email'] ]; } elseif ($linkDetails['type'] === LinkService::TYPE_TELEPHONE) { $token = $this->makeTokenID($key); $elements[$key]['matchString'] = $linkTags[$key]; $linkTags[$key] = str_replace($matches[1], '{softref:' . $token . '}', $linkTags[$key]); $elements[$key]['subst'] = [ 'type' => 'string', 'tokenID' => $token, 'tokenValue' => $linkDetails['telephone'] ]; } } catch (\Exception $e) { // skip invalid links } } } } // Return output: if (!empty($elements)) { $resultArray = [ 'content' => implode('', $linkTags), 'elements' => $elements ]; return $resultArray; } return null; } /** * Finding email addresses in content and making them substitutable. * * @param string $content The input content to analyze * @param array $spParams Parameters set for the softref parser key in TCA/columns * @return array|null Result array on positive matches, see description above. Otherwise null */ public function findRef_email($content, $spParams) { $elements = []; // Email: $parts = preg_split('/([^[:alnum:]]+)([A-Za-z0-9\\._-]+[@][A-Za-z0-9\\._-]+[\\.].[A-Za-z0-9]+)/', ' ' . $content . ' ', 10000, PREG_SPLIT_DELIM_CAPTURE); foreach ($parts as $idx => $value) { if ($idx % 3 == 2) { $tokenID = $this->makeTokenID($idx); $elements[$idx] = []; $elements[$idx]['matchString'] = $value; if (is_array($spParams) && in_array('subst', $spParams)) { $parts[$idx] = '{softref:' . $tokenID . '}'; $elements[$idx]['subst'] = [ 'type' => 'string', 'tokenID' => $tokenID, 'tokenValue' => $value ]; } } } // Return output: if (!empty($elements)) { $resultArray = [ 'content' => substr(implode('', $parts), 1, -1), 'elements' => $elements ]; return $resultArray; } return null; } /** * Finding URLs in content * * @param string $content The input content to analyze * @param array $spParams Parameters set for the softref parser key in TCA/columns * @return array|null Result array on positive matches, see description above. Otherwise null */ public function findRef_url($content, $spParams) { $elements = []; // URLs $parts = preg_split('/([^[:alnum:]"\']+)((https?|ftp):\\/\\/[^[:space:]"\'<>]*)([[:space:]])/', ' ' . $content . ' ', 10000, PREG_SPLIT_DELIM_CAPTURE); foreach ($parts as $idx => $value) { if ($idx % 5 == 3) { unset($parts[$idx]); } if ($idx % 5 == 2) { $tokenID = $this->makeTokenID($idx); $elements[$idx] = []; $elements[$idx]['matchString'] = $value; if (is_array($spParams) && in_array('subst', $spParams)) { $parts[$idx] = '{softref:' . $tokenID . '}'; $elements[$idx]['subst'] = [ 'type' => 'string', 'tokenID' => $tokenID, 'tokenValue' => $value ]; } } } // Return output: if (!empty($elements)) { $resultArray = [ 'content' => substr(implode('', $parts), 1, -1), 'elements' => $elements ]; return $resultArray; } return null; } /** * Finding reference to files from extensions in content, but only to notify about their existence. No substitution * * @param string $content The input content to analyze * @return array|null Result array on positive matches, see description above. Otherwise null */ public function findRef_extension_fileref($content) { $elements = []; // Files starting with EXT: $parts = preg_split('/([^[:alnum:]"\']+)(EXT:[[:alnum:]_]+\\/[^[:space:]"\',]*)/', ' ' . $content . ' ', 10000, PREG_SPLIT_DELIM_CAPTURE) ?: []; foreach ($parts as $idx => $value) { if ($idx % 3 == 2) { $this->makeTokenID((string)$idx); $elements[$idx] = []; $elements[$idx]['matchString'] = $value; } } // Return output: if (!empty($elements)) { $resultArray = [ 'content' => substr(implode('', $parts), 1, -1), 'elements' => $elements ]; return $resultArray; } return null; } /************************* * * Helper functions * *************************/ /** * Analyze content as a TypoLink value and return an array with properties. * TypoLinks format is: . * See TYPO3\CMS\Frontend\ContentObject\ContentObjectRenderer::typolink() * The syntax of the [typolink] part is: [typolink] = [page id][,[type value]][#[anchor, if integer = tt_content uid]] * The extraction is based on how \TYPO3\CMS\Frontend\ContentObject::typolink() behaves. * * @param string $typolinkValue TypoLink value. * @return array Array with the properties of the input link specified. The key "type" will reveal the type. If that is blank it could not be determined. * @see \TYPO3\CMS\Frontend\ContentObject\ContentObjectRenderer::typolink() * @see setTypoLinkPartsElement() */ public function getTypoLinkParts($typolinkValue) { $finalTagParts = GeneralUtility::makeInstance(TypoLinkCodecService::class)->decode($typolinkValue); $link_param = $finalTagParts['url']; // we define various keys below, "url" might be misleading unset($finalTagParts['url']); if (stripos(rawurldecode(trim($link_param)), 'phar://') === 0) { throw new \RuntimeException( 'phar scheme not allowed as soft reference target', 1530030672 ); } $linkService = GeneralUtility::makeInstance(LinkService::class); try { $linkData = $linkService->resolve($link_param); switch ($linkData['type']) { case LinkService::TYPE_RECORD: $referencePageId = $this->referenceTable === 'pages' ? $this->referenceUid : (int)(BackendUtility::getRecord($this->referenceTable, $this->referenceUid)['pid'] ?? 0); if ($referencePageId) { $pageTsConfig = BackendUtility::getPagesTSconfig($referencePageId); $table = $pageTsConfig['TCEMAIN.']['linkHandler.'][$linkData['identifier'] . '.']['configuration.']['table'] ?? $linkData['identifier']; } else { // Backwards compatibility for the old behaviour, where the identifier was saved as the table. $table = $linkData['identifier']; } $finalTagParts['table'] = $table; $finalTagParts['uid'] = $linkData['uid']; break; case LinkService::TYPE_PAGE: $linkData['pageuid'] = (int)$linkData['pageuid']; if (isset($linkData['pagetype'])) { $linkData['pagetype'] = (int)$linkData['pagetype']; } if (isset($linkData['fragment'])) { $finalTagParts['anchor'] = $linkData['fragment']; } break; case LinkService::TYPE_FILE: case LinkService::TYPE_UNKNOWN: if (isset($linkData['file'])) { $finalTagParts['type'] = LinkService::TYPE_FILE; $linkData['file'] = $linkData['file'] instanceof FileInterface ? $linkData['file']->getUid() : $linkData['file']; } else { $pU = parse_url($link_param); parse_str($pU['query'] ?? '', $query); if (isset($query['uid'])) { $finalTagParts['type'] = LinkService::TYPE_FILE; $finalTagParts['file'] = (int)$query['uid']; } } break; } return array_merge($finalTagParts, $linkData); } catch (UnknownLinkHandlerException $e) { // Cannot handle anything return $finalTagParts; } } /** * Recompile a TypoLink value from the array of properties made with getTypoLinkParts() into an elements array * * @param array $tLP TypoLink properties * @param array $elements Array of elements to be modified with substitution / information entries. * @param string $content The content to process. * @param int $idx Index value of the found element - user to make unique but stable tokenID * @return string The input content, possibly containing tokens now according to the added substitution entries in $elements * @see getTypoLinkParts() */ public function setTypoLinkPartsElement($tLP, &$elements, $content, $idx) { // Initialize, set basic values. In any case a link will be shown $tokenID = $this->makeTokenID('setTypoLinkPartsElement:' . $idx); $elements[$tokenID . ':' . $idx] = []; $elements[$tokenID . ':' . $idx]['matchString'] = $content; // Based on link type, maybe do more: switch ((string)$tLP['type']) { case LinkService::TYPE_EMAIL: // Mail addresses can be substituted manually: $elements[$tokenID . ':' . $idx]['subst'] = [ 'type' => 'string', 'tokenID' => $tokenID, 'tokenValue' => $tLP['email'] ]; // Output content will be the token instead: $content = '{softref:' . $tokenID . '}'; break; case LinkService::TYPE_TELEPHONE: // phone number can be substituted manually: $elements[$tokenID . ':' . $idx]['subst'] = [ 'type' => 'string', 'tokenID' => $tokenID, 'tokenValue' => $tLP['telephone'] ]; // Output content will be the token instead: $content = '{softref:' . $tokenID . '}'; break; case LinkService::TYPE_URL: // URLs can be substituted manually $elements[$tokenID . ':' . $idx]['subst'] = [ 'type' => 'external', 'tokenID' => $tokenID, 'tokenValue' => $tLP['url'] ]; // Output content will be the token instead: $content = '{softref:' . $tokenID . '}'; break; case LinkService::TYPE_FOLDER: // This is a link to a folder... unset($elements[$tokenID . ':' . $idx]); return $content; case LinkService::TYPE_FILE: // Process files referenced by their FAL uid if (isset($tLP['file'])) { $fileId = $tLP['file'] instanceof FileInterface ? $tLP['file']->getUid() : $tLP['file']; // Token and substitute value $elements[$tokenID . ':' . $idx]['subst'] = [ 'type' => 'db', 'recordRef' => 'sys_file:' . $fileId, 'tokenID' => $tokenID, 'tokenValue' => 'file:' . $fileId, ]; // Output content will be the token instead: $content = '{softref:' . $tokenID . '}'; } elseif ($tLP['identifier']) { [$linkHandlerKeyword, $linkHandlerValue] = explode(':', trim($tLP['identifier']), 2); if (MathUtility::canBeInterpretedAsInteger($linkHandlerValue)) { // Token and substitute value $elements[$tokenID . ':' . $idx]['subst'] = [ 'type' => 'db', 'recordRef' => 'sys_file:' . $linkHandlerValue, 'tokenID' => $tokenID, 'tokenValue' => $tLP['identifier'], ]; // Output content will be the token instead: $content = '{softref:' . $tokenID . '}'; } else { // This is a link to a folder... return $content; } } else { return $content; } break; case LinkService::TYPE_PAGE: // Rebuild page reference typolink part: $content = ''; // Set page id: if ($tLP['pageuid']) { $content .= '{softref:' . $tokenID . '}'; $elements[$tokenID . ':' . $idx]['subst'] = [ 'type' => 'db', 'recordRef' => 'pages:' . $tLP['pageuid'], 'tokenID' => $tokenID, 'tokenValue' => $tLP['pageuid'] ]; } // Add type if applicable if ((string)($tLP['pagetype'] ?? '') !== '') { $content .= ',' . $tLP['pagetype']; } // Add anchor if applicable if ((string)($tLP['anchor'] ?? '') !== '') { // Anchor is assumed to point to a content elements: if (MathUtility::canBeInterpretedAsInteger($tLP['anchor'])) { // Initialize a new entry because we have a new relation: $newTokenID = $this->makeTokenID('setTypoLinkPartsElement:anchor:' . $idx); $elements[$newTokenID . ':' . $idx] = []; $elements[$newTokenID . ':' . $idx]['matchString'] = 'Anchor Content Element: ' . $tLP['anchor']; $content .= '#{softref:' . $newTokenID . '}'; $elements[$newTokenID . ':' . $idx]['subst'] = [ 'type' => 'db', 'recordRef' => 'tt_content:' . $tLP['anchor'], 'tokenID' => $newTokenID, 'tokenValue' => $tLP['anchor'] ]; } else { // Anchor is a hardcoded string $content .= '#' . $tLP['anchor']; } } break; case LinkService::TYPE_RECORD: $elements[$tokenID . ':' . $idx]['subst'] = [ 'type' => 'db', 'recordRef' => $tLP['table'] . ':' . $tLP['uid'], 'tokenID' => $tokenID, 'tokenValue' => $content, ]; $content = '{softref:' . $tokenID . '}'; break; default: $event = new AppendLinkHandlerElementsEvent($tLP, $content, $elements, $idx, $tokenID); $this->eventDispatcher->dispatch($event); $elements = $event->getElements(); $tLP = $event->getLinkParts(); $content = $event->getContent(); if (!$event->isResolved()) { $elements[$tokenID . ':' . $idx]['error'] = 'Couldn\'t decide typolink mode.'; return $content; } } // Finally, for all entries that was rebuild with tokens, add target, class, title and additionalParams in the end: $tLP['url'] = $content; $content = GeneralUtility::makeInstance(TypoLinkCodecService::class)->encode($tLP); // Return rebuilt typolink value: return $content; } /** * Make Token ID for input index. * * @param string $index Suffix value. * @return string Token ID */ public function makeTokenID($index = '') { return md5($this->tokenID_basePrefix . ':' . $index); } }