1<?php
2declare(strict_types = 1);
3namespace TYPO3\CMS\Core\DataHandling;
4
5/*
6 * This file is part of the TYPO3 CMS project.
7 *
8 * It is free software; you can redistribute it and/or modify it under
9 * the terms of the GNU General Public License, either version 2
10 * of the License, or any later version.
11 *
12 * For the full copyright and license information, please read the
13 * LICENSE.txt file that was distributed with this source code.
14 *
15 * The TYPO3 project - inspiring people to share!
16 */
17
18use Doctrine\DBAL\Connection;
19use TYPO3\CMS\Backend\Utility\BackendUtility;
20use TYPO3\CMS\Core\Charset\CharsetConverter;
21use TYPO3\CMS\Core\Database\ConnectionPool;
22use TYPO3\CMS\Core\Database\Query\QueryBuilder;
23use TYPO3\CMS\Core\Database\Query\Restriction\DeletedRestriction;
24use TYPO3\CMS\Core\DataHandling\Model\RecordState;
25use TYPO3\CMS\Core\DataHandling\Model\RecordStateFactory;
26use TYPO3\CMS\Core\Exception\SiteNotFoundException;
27use TYPO3\CMS\Core\Routing\SiteMatcher;
28use TYPO3\CMS\Core\Utility\GeneralUtility;
29use TYPO3\CMS\Core\Utility\MathUtility;
30use TYPO3\CMS\Core\Versioning\VersionState;
31
32/**
33 * Generates, sanitizes and validates slugs for a TCA field
34 */
35class SlugHelper
36{
37    /**
38     * @var string
39     */
40    protected $tableName;
41
42    /**
43     * @var string
44     */
45    protected $fieldName;
46
47    /**
48     * @var int
49     */
50    protected $workspaceId;
51
52    /**
53     * @var array
54     */
55    protected $configuration = [];
56
57    /**
58     * @var bool
59     */
60    protected $workspaceEnabled;
61
62    /**
63     * Defines whether the slug field should start with "/".
64     * For pages (due to rootline functionality), this is a must have, otherwise the root level page
65     * would have an empty value.
66     *
67     * @var bool
68     */
69    protected $prependSlashInSlug;
70
71    /**
72     * Slug constructor.
73     *
74     * @param string $tableName TCA table
75     * @param string $fieldName TCA field
76     * @param array $configuration TCA configuration of the field
77     * @param int $workspaceId the workspace ID to be working on.
78     */
79    public function __construct(string $tableName, string $fieldName, array $configuration, int $workspaceId = 0)
80    {
81        $this->tableName = $tableName;
82        $this->fieldName = $fieldName;
83        $this->configuration = $configuration;
84        $this->workspaceId = $workspaceId;
85
86        if ($this->tableName === 'pages' && $this->fieldName === 'slug') {
87            $this->prependSlashInSlug = true;
88        } else {
89            $this->prependSlashInSlug = $this->configuration['prependSlash'] ?? false;
90        }
91
92        $this->workspaceEnabled = BackendUtility::isTableWorkspaceEnabled($tableName);
93    }
94
95    /**
96     * Cleans a slug value so it is used directly in the path segment of a URL.
97     *
98     * @param string $slug
99     * @return string
100     */
101    public function sanitize(string $slug): string
102    {
103        // Convert to lowercase + remove tags
104        $slug = mb_strtolower($slug, 'utf-8');
105        $slug = strip_tags($slug);
106
107        // Convert some special tokens (space, "_" and "-") to the space character
108        $fallbackCharacter = (string)($this->configuration['fallbackCharacter'] ?? '-');
109        $slug = preg_replace('/[ \t\x{00A0}\-+_]+/u', $fallbackCharacter, $slug);
110
111        // Convert extended letters to ascii equivalents
112        // The specCharsToASCII() converts "€" to "EUR"
113        $slug = GeneralUtility::makeInstance(CharsetConverter::class)->specCharsToASCII('utf-8', $slug);
114
115        // Get rid of all invalid characters, but allow slashes
116        $slug = preg_replace('/[^\p{L}\p{M}0-9\/' . preg_quote($fallbackCharacter) . ']/u', '', $slug);
117
118        // Convert multiple fallback characters to a single one
119        if ($fallbackCharacter !== '') {
120            $slug = preg_replace('/' . preg_quote($fallbackCharacter) . '{2,}/', $fallbackCharacter, $slug);
121        }
122
123        // Ensure slug is lower cased after all replacement was done
124        $slug = mb_strtolower($slug, 'utf-8');
125        // Extract slug, thus it does not have wrapping fallback and slash characters
126        $extractedSlug = $this->extract($slug);
127        // Remove trailing and beginning slashes, except if the trailing slash was added, then we'll re-add it
128        $appendTrailingSlash = $extractedSlug !== '' && substr($slug, -1) === '/';
129        $slug = $extractedSlug . ($appendTrailingSlash ? '/' : '');
130        if ($this->prependSlashInSlug && ($slug[0] ?? '') !== '/') {
131            $slug = '/' . $slug;
132        }
133        return $slug;
134    }
135
136    /**
137     * Extracts payload of slug and removes wrapping delimiters,
138     * e.g. `/hello/world/` will become `hello/world`.
139     *
140     * @param string $slug
141     * @return string
142     */
143    public function extract(string $slug): string
144    {
145        // Convert some special tokens (space, "_" and "-") to the space character
146        $fallbackCharacter = $this->configuration['fallbackCharacter'] ?? '-';
147        return trim($slug, $fallbackCharacter . '/');
148    }
149
150    /**
151     * Used when no slug exists for a record
152     *
153     * @param array $recordData
154     * @param int $pid The uid of the page to generate the slug for
155     * @return string
156     */
157    public function generate(array $recordData, int $pid): string
158    {
159        if ($pid === 0 || (!empty($recordData['is_siteroot']) && $this->tableName === 'pages')) {
160            return '/';
161        }
162        $prefix = '';
163        if ($this->configuration['generatorOptions']['prefixParentPageSlug'] ?? false) {
164            $languageFieldName = $GLOBALS['TCA'][$this->tableName]['ctrl']['languageField'] ?? null;
165            $languageId = (int)($recordData[$languageFieldName] ?? 0);
166            $parentPageRecord = $this->resolveParentPageRecord($pid, $languageId);
167            if (is_array($parentPageRecord)) {
168                // If the parent page has a slug, use that instead of "re-generating" the slug from the parents' page title
169                if (!empty($parentPageRecord['slug'])) {
170                    $rootLineItemSlug = $parentPageRecord['slug'];
171                } else {
172                    $rootLineItemSlug = $this->generate($parentPageRecord, (int)$parentPageRecord['pid']);
173                }
174                $rootLineItemSlug = trim($rootLineItemSlug, '/');
175                if (!empty($rootLineItemSlug)) {
176                    $prefix = $rootLineItemSlug;
177                }
178            }
179        }
180
181        $fieldSeparator = $this->configuration['generatorOptions']['fieldSeparator'] ?? '/';
182        $slugParts = [];
183
184        $replaceConfiguration = $this->configuration['generatorOptions']['replacements'] ?? [];
185        foreach ($this->configuration['generatorOptions']['fields'] ?? [] as $fieldNameParts) {
186            if (is_string($fieldNameParts)) {
187                $fieldNameParts = GeneralUtility::trimExplode(',', $fieldNameParts);
188            }
189            foreach ($fieldNameParts as $fieldName) {
190                if (!empty($recordData[$fieldName])) {
191                    $pieceOfSlug = $recordData[$fieldName];
192                    $pieceOfSlug = str_replace(
193                        array_keys($replaceConfiguration),
194                        array_values($replaceConfiguration),
195                        $pieceOfSlug
196                    );
197                    $slugParts[] = $pieceOfSlug;
198                    break;
199                }
200            }
201        }
202        $slug = implode($fieldSeparator, $slugParts);
203        $slug = $this->sanitize($slug);
204        // No valid data found
205        if ($slug === '' || $slug === '/') {
206            $slug = 'default-' . GeneralUtility::shortMD5(json_encode($recordData));
207        }
208        if ($this->prependSlashInSlug && ($slug[0] ?? '') !== '/') {
209            $slug = '/' . $slug;
210        }
211        if (!empty($prefix)) {
212            $slug = $prefix . $slug;
213        }
214
215        // Hook for alternative ways of filling/modifying the slug data
216        foreach ($this->configuration['generatorOptions']['postModifiers'] ?? [] as $funcName) {
217            $hookParameters = [
218                'slug' => $slug,
219                'workspaceId' => $this->workspaceId,
220                'configuration' => $this->configuration,
221                'record' => $recordData,
222                'pid' => $pid,
223                'prefix' => $prefix,
224                'tableName' => $this->tableName,
225                'fieldName' => $this->fieldName,
226            ];
227            $slug = GeneralUtility::callUserFunction($funcName, $hookParameters, $this);
228        }
229        return $this->sanitize($slug);
230    }
231
232    /**
233     * Checks if there are other records with the same slug that are located on the same PID.
234     *
235     * @param string $slug
236     * @param RecordState $state
237     * @return bool
238     */
239    public function isUniqueInPid(string $slug, RecordState $state): bool
240    {
241        $pageId = (int)$state->resolveNodeIdentifier();
242        $recordId = $state->getSubject()->getIdentifier();
243        $languageId = $state->getContext()->getLanguageId();
244
245        if ($pageId < 0) {
246            $pageId = $this->resolveLivePageId($recordId);
247        }
248
249        $queryBuilder = $this->createPreparedQueryBuilder();
250        $this->applySlugConstraint($queryBuilder, $slug);
251        $this->applyPageIdConstraint($queryBuilder, $pageId);
252        $this->applyRecordConstraint($queryBuilder, $recordId);
253        $this->applyLanguageConstraint($queryBuilder, $languageId);
254        $this->applyWorkspaceConstraint($queryBuilder);
255        $statement = $queryBuilder->execute();
256
257        $records = $this->resolveVersionOverlays(
258            $statement->fetchAll()
259        );
260        return count($records) === 0;
261    }
262
263    /**
264     * Check if there are other records with the same slug that are located on the same site.
265     *
266     * @param string $slug
267     * @param RecordState $state
268     * @return bool
269     * @throws \TYPO3\CMS\Core\Exception\SiteNotFoundException
270     */
271    public function isUniqueInSite(string $slug, RecordState $state): bool
272    {
273        $pageId = $state->resolveNodeAggregateIdentifier();
274        $recordId = $state->getSubject()->getIdentifier();
275        $languageId = $state->getContext()->getLanguageId();
276
277        if (!MathUtility::canBeInterpretedAsInteger($pageId)) {
278            // If this is a new page, we use the parent page to resolve the site
279            $pageId = $state->getNode()->getIdentifier();
280        }
281        $pageId = (int)$pageId;
282
283        if ($pageId < 0) {
284            $pageId = $this->resolveLivePageId($recordId);
285        }
286
287        $queryBuilder = $this->createPreparedQueryBuilder();
288        $this->applySlugConstraint($queryBuilder, $slug);
289        $this->applyRecordConstraint($queryBuilder, $recordId);
290        $this->applyLanguageConstraint($queryBuilder, $languageId);
291        $this->applyWorkspaceConstraint($queryBuilder);
292        $statement = $queryBuilder->execute();
293
294        $records = $this->resolveVersionOverlays(
295            $statement->fetchAll()
296        );
297        if (count($records) === 0) {
298            return true;
299        }
300
301        // The installation contains at least ONE other record with the same slug
302        // Now find out if it is the same root page ID
303        $siteMatcher = GeneralUtility::makeInstance(SiteMatcher::class);
304        $siteMatcher->refresh();
305        $siteOfCurrentRecord = $siteMatcher->matchByPageId($pageId);
306        // TODO: Rather than silently ignoring this misconfiguration
307        // (when getting a PseudoSite or NullSite), a warning should
308        // be thrown here, or maybe even let the exception bubble up
309        // and catch it in places that uses this API
310        foreach ($records as $record) {
311            try {
312                $recordState = RecordStateFactory::forName($this->tableName)->fromArray($record);
313                $siteOfExistingRecord = $siteMatcher->matchByPageId(
314                    (int)$recordState->resolveNodeAggregateIdentifier()
315                );
316            } catch (SiteNotFoundException $exception) {
317                // In case not site is found, the record is not
318                // organized in any site or pseudo-site
319                continue;
320            }
321            if ($siteOfExistingRecord->getRootPageId() === $siteOfCurrentRecord->getRootPageId()) {
322                return false;
323            }
324        }
325
326        // Otherwise, everything is still fine
327        return true;
328    }
329
330    /**
331     * Check if there are other records with the same slug.
332     *
333     * @param string $slug
334     * @param RecordState $state
335     * @return bool
336     * @throws \TYPO3\CMS\Core\Exception\SiteNotFoundException
337     */
338    public function isUniqueInTable(string $slug, RecordState $state): bool
339    {
340        $recordId = $state->getSubject()->getIdentifier();
341        $languageId = $state->getContext()->getLanguageId();
342
343        $queryBuilder = $this->createPreparedQueryBuilder();
344        $this->applySlugConstraint($queryBuilder, $slug);
345        $this->applyRecordConstraint($queryBuilder, $recordId);
346        $this->applyLanguageConstraint($queryBuilder, $languageId);
347        $this->applyWorkspaceConstraint($queryBuilder);
348        $statement = $queryBuilder->execute();
349
350        $records = $this->resolveVersionOverlays(
351            $statement->fetchAll()
352        );
353
354        return count($records) === 0;
355    }
356
357    /**
358     * Generate a slug with a suffix "/mytitle-1" if that is in use already.
359     *
360     * @param string $slug proposed slug
361     * @param RecordState $state
362     * @return string
363     * @throws \TYPO3\CMS\Core\Exception\SiteNotFoundException
364     */
365    public function buildSlugForUniqueInSite(string $slug, RecordState $state): string
366    {
367        $slug = $this->sanitize($slug);
368        $rawValue = $this->extract($slug);
369        $newValue = $slug;
370        $counter = 0;
371        while (!$this->isUniqueInSite(
372            $newValue,
373            $state
374        ) && $counter++ < 100
375        ) {
376            $newValue = $this->sanitize($rawValue . '-' . $counter);
377        }
378        if ($counter === 100) {
379            $newValue = $this->sanitize($rawValue . '-' . GeneralUtility::shortMD5($rawValue));
380        }
381        return $newValue;
382    }
383
384    /**
385     * Generate a slug with a suffix "/mytitle-1" if the suggested slug is in use already.
386     *
387     * @param string $slug proposed slug
388     * @param RecordState $state
389     * @return string
390     */
391    public function buildSlugForUniqueInPid(string $slug, RecordState $state): string
392    {
393        $slug = $this->sanitize($slug);
394        $rawValue = $this->extract($slug);
395        $newValue = $slug;
396        $counter = 0;
397        while (!$this->isUniqueInPid(
398            $newValue,
399            $state
400        ) && $counter++ < 100
401        ) {
402            $newValue = $this->sanitize($rawValue . '-' . $counter);
403        }
404        if ($counter === 100) {
405            $newValue = $this->sanitize($rawValue . '-' . GeneralUtility::shortMD5($rawValue));
406        }
407        return $newValue;
408    }
409
410    /**
411     * Generate a slug with a suffix "/mytitle-1" if that is in use already.
412     *
413     * @param string $slug proposed slug
414     * @param RecordState $state
415     * @return string
416     * @throws \TYPO3\CMS\Core\Exception\SiteNotFoundException
417     */
418    public function buildSlugForUniqueInTable(string $slug, RecordState $state): string
419    {
420        $slug = $this->sanitize($slug);
421        $rawValue = $this->extract($slug);
422        $newValue = $slug;
423        $counter = 0;
424        while (!$this->isUniqueInTable(
425            $newValue,
426            $state
427        ) && $counter++ < 100
428        ) {
429            $newValue = $this->sanitize($rawValue . '-' . $counter);
430        }
431        if ($counter === 100) {
432            $newValue = $this->sanitize($rawValue . '-' . GeneralUtility::shortMD5($rawValue));
433        }
434        return $newValue;
435    }
436
437    /**
438     * @return QueryBuilder
439     */
440    protected function createPreparedQueryBuilder(): QueryBuilder
441    {
442        $fieldNames = ['uid', 'pid', $this->fieldName];
443        if ($this->workspaceEnabled) {
444            $fieldNames[] = 't3ver_state';
445        }
446        $languageFieldName = $GLOBALS['TCA'][$this->tableName]['ctrl']['languageField'] ?? null;
447        if (is_string($languageFieldName)) {
448            $fieldNames[] = $languageFieldName;
449        }
450        $languageParentFieldName = $GLOBALS['TCA'][$this->tableName]['ctrl']['transOrigPointerField'] ?? null;
451        if (is_string($languageParentFieldName)) {
452            $fieldNames[] = $languageParentFieldName;
453        }
454
455        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable($this->tableName);
456        $queryBuilder->getRestrictions()
457            ->removeAll()
458            ->add(GeneralUtility::makeInstance(DeletedRestriction::class));
459        $queryBuilder
460            ->select(...$fieldNames)
461            ->from($this->tableName);
462        return $queryBuilder;
463    }
464
465    /**
466     * @param QueryBuilder $queryBuilder
467     */
468    protected function applyWorkspaceConstraint(QueryBuilder $queryBuilder)
469    {
470        if (!$this->workspaceEnabled) {
471            return;
472        }
473
474        $workspaceIds = [0];
475        if ($this->workspaceId > 0) {
476            $workspaceIds[] = $this->workspaceId;
477        }
478        $queryBuilder->andWhere(
479            $queryBuilder->expr()->in(
480                't3ver_wsid',
481                $queryBuilder->createNamedParameter($workspaceIds, Connection::PARAM_INT_ARRAY)
482            ),
483            $queryBuilder->expr()->neq(
484                'pid',
485                $queryBuilder->createNamedParameter(-1, \PDO::PARAM_INT)
486            )
487        );
488    }
489
490    /**
491     * @param QueryBuilder $queryBuilder
492     * @param int $languageId
493     */
494    protected function applyLanguageConstraint(QueryBuilder $queryBuilder, int $languageId)
495    {
496        $languageFieldName = $GLOBALS['TCA'][$this->tableName]['ctrl']['languageField'] ?? null;
497        if (!is_string($languageFieldName)) {
498            return;
499        }
500
501        // Only check records of the given language
502        $queryBuilder->andWhere(
503            $queryBuilder->expr()->eq(
504                $languageFieldName,
505                $queryBuilder->createNamedParameter($languageId, \PDO::PARAM_INT)
506            )
507        );
508    }
509
510    /**
511     * @param QueryBuilder $queryBuilder
512     * @param string $slug
513     */
514    protected function applySlugConstraint(QueryBuilder $queryBuilder, string $slug)
515    {
516        $queryBuilder->where(
517            $queryBuilder->expr()->eq(
518                $this->fieldName,
519                $queryBuilder->createNamedParameter($slug)
520            )
521        );
522    }
523
524    /**
525     * @param QueryBuilder $queryBuilder
526     * @param int $pageId
527     */
528    protected function applyPageIdConstraint(QueryBuilder $queryBuilder, int $pageId)
529    {
530        if ($pageId < 0) {
531            throw new \RuntimeException(
532                sprintf(
533                    'Page id must be positive "%d"',
534                    $pageId
535                ),
536                1534962573
537            );
538        }
539
540        $queryBuilder->andWhere(
541            $queryBuilder->expr()->eq(
542                'pid',
543                $queryBuilder->createNamedParameter($pageId, \PDO::PARAM_INT)
544            )
545        );
546    }
547
548    /**
549     * @param QueryBuilder $queryBuilder
550     * @param string|int $recordId
551     */
552    protected function applyRecordConstraint(QueryBuilder $queryBuilder, $recordId)
553    {
554        // Exclude the current record if it is an existing record
555        if (!MathUtility::canBeInterpretedAsInteger($recordId)) {
556            return;
557        }
558
559        $queryBuilder->andWhere(
560            $queryBuilder->expr()->neq('uid', $queryBuilder->createNamedParameter($recordId, \PDO::PARAM_INT))
561        );
562        if ($this->workspaceId > 0 && $this->workspaceEnabled) {
563            $liveId = BackendUtility::getLiveVersionIdOfRecord($this->tableName, $recordId) ?? $recordId;
564            $queryBuilder->andWhere(
565                $queryBuilder->expr()->neq('uid', $queryBuilder->createNamedParameter($liveId, \PDO::PARAM_INT))
566            );
567        }
568    }
569
570    /**
571     * @param int $recordId
572     * @return int
573     * @throws \RuntimeException
574     */
575    protected function resolveLivePageId($recordId): int
576    {
577        if (!MathUtility::canBeInterpretedAsInteger($recordId)) {
578            throw new \RuntimeException(
579                sprintf(
580                    'Cannot resolve live page id for non-numeric identifier "%s"',
581                    $recordId
582                ),
583                1534951024
584            );
585        }
586
587        $liveVersion = BackendUtility::getLiveVersionOfRecord(
588            $this->tableName,
589            $recordId,
590            'pid'
591        );
592
593        if (empty($liveVersion)) {
594            throw new \RuntimeException(
595                sprintf(
596                    'Cannot resolve live page id for record "%s:%d"',
597                    $this->tableName,
598                    $recordId
599                ),
600                1534951025
601            );
602        }
603
604        return (int)$liveVersion['pid'];
605    }
606
607    /**
608     * @param array $records
609     * @return array
610     */
611    protected function resolveVersionOverlays(array $records): array
612    {
613        if (!$this->workspaceEnabled) {
614            return $records;
615        }
616
617        return array_filter(
618            array_map(
619                function (array $record) {
620                    BackendUtility::workspaceOL(
621                        $this->tableName,
622                        $record,
623                        $this->workspaceId,
624                        true
625                    );
626                    if (VersionState::cast($record['t3ver_state'] ?? null)
627                        ->equals(VersionState::DELETE_PLACEHOLDER)) {
628                        return null;
629                    }
630                    return $record;
631                },
632                $records
633            )
634        );
635    }
636
637    /**
638     * Fetch a parent page, but exclude spacers, recyclers and sys-folders and all doktypes > 200
639     * @param int $pid
640     * @param int $languageId
641     * @return array|null
642     */
643    protected function resolveParentPageRecord(int $pid, int $languageId): ?array
644    {
645        $parentPageRecord = null;
646        $rootLine = BackendUtility::BEgetRootLine($pid, '', true, ['nav_title']);
647        do {
648            $parentPageRecord = array_shift($rootLine);
649            // do not use spacers (199), recyclers and folders and everything else
650        } while (!empty($rootLine) && (int)$parentPageRecord['doktype'] >= 199);
651        if ($languageId > 0) {
652            $languageIds = [$languageId];
653            $siteMatcher = GeneralUtility::makeInstance(SiteMatcher::class);
654            $siteMatcher->refresh();
655
656            try {
657                $site = $siteMatcher->matchByPageId($pid);
658                $siteLanguage = $site->getLanguageById($languageId);
659                $languageIds = array_merge($languageIds, $siteLanguage->getFallbackLanguageIds());
660            } catch (SiteNotFoundException | \InvalidArgumentException $e) {
661                // no site or requested language available - move on
662            }
663
664            foreach ($languageIds as $languageId) {
665                $localizedParentPageRecord = BackendUtility::getRecordLocalization('pages', $parentPageRecord['uid'], $languageId);
666                if (!empty($localizedParentPageRecord)) {
667                    $parentPageRecord = reset($localizedParentPageRecord);
668                    break;
669                }
670            }
671        }
672        return $parentPageRecord;
673    }
674}
675