1<?php
2
3declare(strict_types=1);
4
5/*
6 * This file is part of the TYPO3 CMS project.
7 *
8 * It is free software; you can redistribute it and/or modify it under
9 * the terms of the GNU General Public License, either version 2
10 * of the License, or any later version.
11 *
12 * For the full copyright and license information, please read the
13 * LICENSE.txt file that was distributed with this source code.
14 *
15 * The TYPO3 project - inspiring people to share!
16 */
17
18namespace TYPO3\CMS\Redirects\Service;
19
20use Psr\Http\Message\UriInterface;
21use Psr\Log\LoggerAwareInterface;
22use Psr\Log\LoggerAwareTrait;
23use TYPO3\CMS\Core\Context\Context;
24use TYPO3\CMS\Core\Domain\Repository\PageRepository;
25use TYPO3\CMS\Core\Http\Uri;
26use TYPO3\CMS\Core\LinkHandling\LinkService;
27use TYPO3\CMS\Core\Resource\Exception\InvalidPathException;
28use TYPO3\CMS\Core\Resource\File;
29use TYPO3\CMS\Core\Resource\Folder;
30use TYPO3\CMS\Core\Routing\PageArguments;
31use TYPO3\CMS\Core\Site\Entity\NullSite;
32use TYPO3\CMS\Core\Site\Entity\SiteInterface;
33use TYPO3\CMS\Core\Site\SiteFinder;
34use TYPO3\CMS\Core\Utility\GeneralUtility;
35use TYPO3\CMS\Core\Utility\HttpUtility;
36use TYPO3\CMS\Frontend\Authentication\FrontendUserAuthentication;
37use TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController;
38use TYPO3\CMS\Frontend\Service\TypoLinkCodecService;
39use TYPO3\CMS\Frontend\Typolink\AbstractTypolinkBuilder;
40use TYPO3\CMS\Frontend\Typolink\UnableToLinkException;
41
42/**
43 * Creates a proper URL to redirect from a matched redirect of a request
44 *
45 * @internal due to some possible refactorings in TYPO3 v9
46 */
47class RedirectService implements LoggerAwareInterface
48{
49    use LoggerAwareTrait;
50
51    /**
52     * @var RedirectCacheService
53     */
54    protected $redirectCacheService;
55
56    /**
57     * @var LinkService
58     */
59    protected $linkService;
60
61    /**
62     * @var SiteFinder
63     */
64    protected $siteFinder;
65
66    public function __construct(RedirectCacheService $redirectCacheService, LinkService $linkService, SiteFinder $siteFinder)
67    {
68        $this->redirectCacheService = $redirectCacheService;
69        $this->linkService = $linkService;
70        $this->siteFinder = $siteFinder;
71    }
72
73    /**
74     * Checks against all available redirects "flat" or "regexp", and against starttime/endtime
75     *
76     * @param string $domain
77     * @param string $path
78     * @param string $query
79     * @return array|null
80     */
81    public function matchRedirect(string $domain, string $path, string $query = '')
82    {
83        $allRedirects = $this->fetchRedirects();
84        $path = rawurldecode($path);
85        // Check if the domain matches, or if there is a
86        // redirect fitting for any domain
87        foreach ([$domain, '*'] as $domainName) {
88            if (empty($allRedirects[$domainName])) {
89                continue;
90            }
91
92            $possibleRedirects = [];
93            // check if a flat redirect matches
94            if (!empty($allRedirects[$domainName]['flat'][rtrim($path, '/') . '/'])) {
95                $possibleRedirects = $allRedirects[$domainName]['flat'][rtrim($path, '/') . '/'];
96            }
97            // check if a flat redirect matches with the Query applied
98            if (!empty($query)) {
99                $pathWithQuery = rtrim($path, '/') . '?' . ltrim($query, '?');
100                if (!empty($allRedirects[$domainName]['respect_query_parameters'][$pathWithQuery])) {
101                    $possibleRedirects = $allRedirects[$domainName]['respect_query_parameters'][$pathWithQuery];
102                } else {
103                    $pathWithQueryAndSlash = rtrim($path, '/') . '/?' . ltrim($query, '?');
104                    if (!empty($allRedirects[$domainName]['respect_query_parameters'][$pathWithQueryAndSlash])) {
105                        $possibleRedirects = $allRedirects[$domainName]['respect_query_parameters'][$pathWithQueryAndSlash];
106                    }
107                }
108            }
109            // check all redirects that are registered as regex
110            if (!empty($allRedirects[$domainName]['regexp'])) {
111                $allRegexps = array_keys($allRedirects[$domainName]['regexp']);
112                $regExpPath = $path;
113                if (!empty($query)) {
114                    $regExpPath .= '?' . ltrim($query, '?');
115                }
116                foreach ($allRegexps as $regexp) {
117                    $matchResult = @preg_match((string)$regexp, $regExpPath);
118                    if ($matchResult > 0) {
119                        $possibleRedirects += $allRedirects[$domainName]['regexp'][$regexp];
120                        continue;
121                    }
122
123                    // Log invalid regular expression
124                    if ($matchResult === false) {
125                        $this->logger->warning('Invalid regex in redirect', ['regex' => $regexp]);
126                        continue;
127                    }
128
129                    // We need a second match run to evaluate against path only, even when query parameters where
130                    // provided to ensure regexp without query parameters in mind are still processed.
131                    // We need to do this only if there are query parameters in the request, otherwise first
132                    // preg_match would have found it.
133                    if (!empty($query)) {
134                        $matchResult = preg_match((string)$regexp, $path);
135                        if ($matchResult > 0) {
136                            $possibleRedirects += $allRedirects[$domainName]['regexp'][$regexp];
137                            continue;
138                        }
139                    }
140                }
141            }
142
143            foreach ($possibleRedirects as $possibleRedirect) {
144                // check starttime and endtime for all existing records
145                if ($this->isRedirectActive($possibleRedirect)) {
146                    return $possibleRedirect;
147                }
148            }
149        }
150
151        return null;
152    }
153
154    /**
155     * Check if a redirect record matches the starttime and endtime and disable restrictions
156     *
157     * @param array $redirectRecord
158     *
159     * @return bool whether the redirect is active and should be used for redirecting the current request
160     */
161    protected function isRedirectActive(array $redirectRecord): bool
162    {
163        return !$redirectRecord['disabled'] && $redirectRecord['starttime'] <= $GLOBALS['SIM_ACCESS_TIME'] &&
164               (!$redirectRecord['endtime'] || $redirectRecord['endtime'] >= $GLOBALS['SIM_ACCESS_TIME']);
165    }
166
167    /**
168     * Fetches all redirects from the DB and caches them, grouped by the domain
169     * does NOT take starttime/endtime into account, as it is cached.
170     *
171     * @return array
172     */
173    protected function fetchRedirects(): array
174    {
175        return $this->redirectCacheService->getRedirects();
176    }
177
178    /**
179     * Check if the current request is actually a redirect, and then process the redirect.
180     *
181     * @param string $redirectTarget
182     *
183     * @return array the link details from the linkService
184     */
185    protected function resolveLinkDetailsFromLinkTarget(string $redirectTarget): array
186    {
187        try {
188            $linkDetails = $this->linkService->resolve($redirectTarget);
189            switch ($linkDetails['type']) {
190                case LinkService::TYPE_URL:
191                    // all set up, nothing to do
192                    break;
193                case LinkService::TYPE_FILE:
194                    /** @var File $file */
195                    $file = $linkDetails['file'];
196                    if ($file instanceof File) {
197                        $linkDetails['url'] = $file->getPublicUrl();
198                    }
199                    break;
200                case LinkService::TYPE_FOLDER:
201                    /** @var Folder $folder */
202                    $folder = $linkDetails['folder'];
203                    if ($folder instanceof Folder) {
204                        $linkDetails['url'] = $folder->getPublicUrl();
205                    }
206                    break;
207                case LinkService::TYPE_UNKNOWN:
208                    // If $redirectTarget could not be resolved, we can only assume $redirectTarget with leading '/'
209                    // as relative redirect and try to resolve it with enriched information from current request.
210                    // That ensures that regexp redirects ending in replaceRegExpCaptureGroup(), but also ensures
211                    // that relative urls are not left as unknown file here.
212                    if (str_starts_with($redirectTarget, '/')) {
213                        $linkDetails = [
214                            'type' => LinkService::TYPE_URL,
215                            'url' => $redirectTarget,
216                        ];
217                    }
218                    break;
219                default:
220                    // we have to return the link details without having a "URL" parameter
221            }
222        } catch (InvalidPathException $e) {
223            return [];
224        }
225
226        return $linkDetails;
227    }
228
229    /**
230     * @param array $matchedRedirect
231     * @param array $queryParams
232     * @param FrontendUserAuthentication $frontendUserAuthentication
233     * @param UriInterface $uri
234     * @param SiteInterface|null $site
235     * @return UriInterface|null
236     */
237    public function getTargetUrl(array $matchedRedirect, array $queryParams, FrontendUserAuthentication $frontendUserAuthentication, UriInterface $uri, ?SiteInterface $site = null): ?UriInterface
238    {
239        $this->logger->debug('Found a redirect to process', $matchedRedirect);
240        $linkParameterParts = GeneralUtility::makeInstance(TypoLinkCodecService::class)->decode((string)$matchedRedirect['target']);
241        $redirectTarget = $linkParameterParts['url'];
242        $linkDetails = $this->resolveLinkDetailsFromLinkTarget($redirectTarget);
243        $this->logger->debug('Resolved link details for redirect', $linkDetails);
244        if (!empty($linkParameterParts['additionalParams']) && $matchedRedirect['keep_query_parameters']) {
245            $params = GeneralUtility::explodeUrl2Array($linkParameterParts['additionalParams']);
246            foreach ($params as $key => $value) {
247                $queryParams[$key] = $value;
248            }
249        }
250        // Do this for files, folders, external URLs or relative urls
251        if (!empty($linkDetails['url'])) {
252            if ($matchedRedirect['is_regexp'] ?? false) {
253                $linkDetails = $this->replaceRegExpCaptureGroup($matchedRedirect, $uri, $linkDetails);
254            }
255
256            $url = new Uri($linkDetails['url']);
257            if ($matchedRedirect['force_https']) {
258                $url = $url->withScheme('https');
259            }
260            if ($matchedRedirect['keep_query_parameters']) {
261                $url = $this->addQueryParams($queryParams, $url);
262            }
263            return $url;
264        }
265        if (($site === null || $site instanceof NullSite) && $linkDetails['type'] === 'page') {
266            $site = $this->siteFinder->getSiteByPageId((int)$linkDetails['pageuid']);
267        }
268        // If it's a record or page, then boot up TSFE and use typolink
269        return $this->getUriFromCustomLinkDetails($matchedRedirect, $frontendUserAuthentication, $site, $linkDetails, $queryParams);
270    }
271
272    /**
273     * Adds query parameters to a Uri object
274     *
275     * @param array $queryParams
276     * @param Uri $url
277     * @return Uri
278     */
279    protected function addQueryParams(array $queryParams, Uri $url): Uri
280    {
281        // New query parameters overrule the ones that should be kept
282        $newQueryParamString = $url->getQuery();
283        if (!empty($newQueryParamString)) {
284            $newQueryParams = [];
285            parse_str($newQueryParamString, $newQueryParams);
286            $queryParams = array_replace_recursive($queryParams, $newQueryParams);
287        }
288        $query = http_build_query($queryParams, '', '&', PHP_QUERY_RFC3986);
289        if ($query) {
290            $url = $url->withQuery($query);
291        }
292        return $url;
293    }
294
295    /**
296     * Called when TypoScript/TSFE is available, so typolink is used to generate the URL
297     *
298     * @param array $redirectRecord
299     * @param FrontendUserAuthentication $frontendUserAuthentication
300     * @param SiteInterface|null $site
301     * @param array $linkDetails
302     * @param array $queryParams
303     * @return UriInterface|null
304     */
305    protected function getUriFromCustomLinkDetails(array $redirectRecord, FrontendUserAuthentication $frontendUserAuthentication, ?SiteInterface $site, array $linkDetails, array $queryParams): ?UriInterface
306    {
307        if (!isset($linkDetails['type'], $GLOBALS['TYPO3_CONF_VARS']['FE']['typolinkBuilder'][$linkDetails['type']])) {
308            return null;
309        }
310        $controller = $this->bootFrontendController($frontendUserAuthentication, $site, $queryParams);
311        /** @var AbstractTypolinkBuilder $linkBuilder */
312        $linkBuilder = GeneralUtility::makeInstance(
313            $GLOBALS['TYPO3_CONF_VARS']['FE']['typolinkBuilder'][$linkDetails['type']],
314            $controller->cObj,
315            $controller
316        );
317        try {
318            $configuration = [
319                'parameter' => (string)$redirectRecord['target'],
320                'forceAbsoluteUrl' => true,
321                'linkAccessRestrictedPages' => true,
322            ];
323            if ($redirectRecord['force_https']) {
324                $configuration['forceAbsoluteUrl.']['scheme'] = 'https';
325            }
326            if ($redirectRecord['keep_query_parameters']) {
327                $configuration['additionalParams'] = HttpUtility::buildQueryString($queryParams, '&');
328            }
329            [$url] = $linkBuilder->build($linkDetails, '', '', $configuration);
330            return new Uri($url);
331        } catch (UnableToLinkException $e) {
332            // This exception is also thrown by the DatabaseRecordTypolinkBuilder
333            $url = $controller->cObj->lastTypoLinkUrl;
334            if (!empty($url)) {
335                return new Uri($url);
336            }
337            return null;
338        }
339    }
340
341    /**
342     * Finishing booting up TSFE, after that the following properties are available.
343     *
344     * Instantiating is done by the middleware stack (see Configuration/RequestMiddlewares.php)
345     *
346     * - TSFE->fe_user
347     * - TSFE->sys_page
348     * - TSFE->tmpl
349     * - TSFE->config
350     * - TSFE->cObj
351     *
352     * So a link to a page can be generated.
353     *
354     * @param FrontendUserAuthentication $frontendUserAuthentication
355     * @param SiteInterface|null $site
356     * @param array $queryParams
357     * @return TypoScriptFrontendController
358     */
359    protected function bootFrontendController(FrontendUserAuthentication $frontendUserAuthentication, ?SiteInterface $site, array $queryParams): TypoScriptFrontendController
360    {
361        $pageId = $site ? $site->getRootPageId() : ($GLOBALS['TSFE'] ? $GLOBALS['TSFE']->id : 0);
362        $controller = GeneralUtility::makeInstance(
363            TypoScriptFrontendController::class,
364            GeneralUtility::makeInstance(Context::class),
365            $site,
366            $site->getDefaultLanguage(),
367            new PageArguments((int)$pageId, '0', [])
368        );
369        $controller->fe_user = $frontendUserAuthentication;
370        $controller->fetch_the_id();
371        $controller->calculateLinkVars($queryParams);
372        $controller->getConfigArray();
373        $controller->settingLanguage();
374        $controller->newCObj();
375        if (!$GLOBALS['TSFE'] instanceof TypoScriptFrontendController) {
376            $GLOBALS['TSFE'] = $controller;
377        }
378        if (!$GLOBALS['TSFE']->sys_page instanceof PageRepository) {
379            $GLOBALS['TSFE']->sys_page = GeneralUtility::makeInstance(PageRepository::class);
380        }
381        return $controller;
382    }
383
384    /**
385     * @param array $matchedRedirect
386     * @param UriInterface $uri
387     * @param array $linkDetails
388     * @return array
389     */
390    protected function replaceRegExpCaptureGroup(array $matchedRedirect, UriInterface $uri, array $linkDetails): array
391    {
392        $uriToCheck = rawurldecode($uri->getPath());
393        if (($matchedRedirect['respect_query_parameters'] ?? false) && $uri->getQuery()) {
394            $uriToCheck .= '?' . rawurldecode($uri->getQuery());
395        }
396        $matchResult = preg_match($matchedRedirect['source_path'], $uriToCheck, $matches);
397        if ($matchResult > 0) {
398            foreach ($matches as $key => $val) {
399                // Unsafe regexp captching group may lead to adding query parameters to result url, which we need
400                // to prevent here, thus throwing everything beginning with ? away
401                if (strpos($val, '?') !== false) {
402                    $val = explode('?', $val, 2)[0] ?? '';
403                    $this->logger->warning(
404                        sprintf(
405                            'Unsafe captching group regex in redirect #%s, including query parameters in matched group',
406                            $matchedRedirect['uid'] ?? 0
407                        ),
408                        ['regex' => $matchedRedirect['source_path']]
409                    );
410                }
411                $linkDetails['url'] = str_replace('$' . $key, $val, $linkDetails['url']);
412            }
413        }
414        return $linkDetails;
415    }
416}
417