1<?php
2
3declare(strict_types=1);
4
5/*
6 * This file is part of the TYPO3 CMS project.
7 *
8 * It is free software; you can redistribute it and/or modify it under
9 * the terms of the GNU General Public License, either version 2
10 * of the License, or any later version.
11 *
12 * For the full copyright and license information, please read the
13 * LICENSE.txt file that was distributed with this source code.
14 *
15 * The TYPO3 project - inspiring people to share!
16 */
17
18namespace TYPO3\CMS\Core\Routing;
19
20use Psr\Http\Message\ServerRequestInterface;
21use Symfony\Component\Routing\Exception\NoConfigurationException;
22use Symfony\Component\Routing\Exception\ResourceNotFoundException;
23use Symfony\Component\Routing\Matcher\UrlMatcher;
24use Symfony\Component\Routing\RequestContext;
25use TYPO3\CMS\Core\Cache\CacheManager;
26use TYPO3\CMS\Core\Exception\SiteNotFoundException;
27use TYPO3\CMS\Core\Http\NormalizedParams;
28use TYPO3\CMS\Core\SingletonInterface;
29use TYPO3\CMS\Core\Site\Entity\NullSite;
30use TYPO3\CMS\Core\Site\Entity\SiteInterface;
31use TYPO3\CMS\Core\Site\Entity\SiteLanguage;
32use TYPO3\CMS\Core\Site\SiteFinder;
33use TYPO3\CMS\Core\Utility\GeneralUtility;
34use TYPO3\CMS\Core\Utility\RootlineUtility;
35
36/**
37 * Returns a site based on a given request.
38 *
39 * The main usage is the ->matchRequest() functionality, which receives a request object and boots up
40 * Symfony Routing to find the proper route with its defaults / attributes.
41 *
42 * On top, this is also commonly used throughout TYPO3 to fetch a site by a given pageId.
43 * ->matchPageId().
44 *
45 * The concept of the SiteMatcher is to *resolve*, and not build URIs. On top, it is a facade to hide the
46 * dependency to symfony and to not expose its logic.
47 *
48 * @internal Please note that the site matcher will be probably cease to exist and adapted to the SiteFinder concept when Pseudo-Site handling will be removed.
49 */
50class SiteMatcher implements SingletonInterface
51{
52    /**
53     * @var SiteFinder
54     */
55    protected $finder;
56
57    /**
58     * Injects necessary objects.
59     *
60     * @param SiteFinder|null $finder
61     */
62    public function __construct(SiteFinder $finder = null)
63    {
64        $this->finder = $finder ?? GeneralUtility::makeInstance(SiteFinder::class);
65    }
66
67    /**
68     * Only used when a page is moved but the pseudo site caches has this information hard-coded, so the caches
69     * need to be flushed.
70     *
71     * @internal
72     * @throws \TYPO3\CMS\Core\Cache\Exception\NoSuchCacheException
73     */
74    public function refresh()
75    {
76        /** Ensure root line caches are flushed */
77        RootlineUtility::purgeCaches();
78        GeneralUtility::makeInstance(CacheManager::class)->getCache('rootline')->flush();
79    }
80
81    /**
82     * First, it is checked, if a "id" GET/POST parameter is found.
83     * If it is, we check for a valid site mounted there.
84     *
85     * If it isn't the quest continues by validating the whole request URL and validating against
86     * all available site records (and their language prefixes).
87     *
88     * @param ServerRequestInterface $request
89     * @return RouteResultInterface
90     */
91    public function matchRequest(ServerRequestInterface $request): RouteResultInterface
92    {
93        $site = new NullSite();
94        $language = null;
95        $defaultLanguage = null;
96
97        $pageId = $request->getQueryParams()['id'] ?? $request->getParsedBody()['id'] ?? 0;
98
99        // First, check if we have a _GET/_POST parameter for "id", then a site information can be resolved based.
100        if ($pageId > 0) {
101            // Loop over the whole rootline without permissions to get the actual site information
102            try {
103                $site = $this->finder->getSiteByPageId((int)$pageId);
104                // If a "L" parameter is given, we take that one into account.
105                $languageId = $request->getQueryParams()['L'] ?? $request->getParsedBody()['L'] ?? null;
106                if ($languageId !== null) {
107                    $language = $site->getLanguageById((int)$languageId);
108                } else {
109                    // Use this later below
110                    $defaultLanguage = $site->getDefaultLanguage();
111                }
112            } catch (SiteNotFoundException $e) {
113                // No site found by the given page
114            } catch (\InvalidArgumentException $e) {
115                // The language fetched by getLanguageById() was not available, now the PSR-15 middleware
116                // redirects to the default page.
117            }
118        }
119
120        $uri = $request->getUri();
121        if (!empty($uri->getPath())) {
122            $normalizedParams = $request->getAttribute('normalizedParams');
123            if ($normalizedParams instanceof NormalizedParams) {
124                $urlPath = ltrim($uri->getPath(), '/');
125                $scriptName = ltrim($normalizedParams->getScriptName(), '/');
126                $scriptPath = ltrim($normalizedParams->getSitePath(), '/');
127                if ($scriptName !== '' && str_starts_with($urlPath, $scriptName)) {
128                    $urlPath = '/' . $scriptPath . substr($urlPath, mb_strlen($scriptName));
129                    $uri = $uri->withPath($urlPath);
130                }
131            }
132        }
133
134        // No language found at this point means that the URL was not used with a valid "?id=1&L=2" parameter
135        // which resulted in a site / language combination that was found. Now, the matching is done
136        // on the incoming URL.
137        if (!($language instanceof SiteLanguage)) {
138            $collection = $this->getRouteCollectionForAllSites();
139            $context = new RequestContext(
140                '',
141                $request->getMethod(),
142                (string)idn_to_ascii($uri->getHost()),
143                $uri->getScheme(),
144                // Ports are only necessary for URL generation in Symfony which is not used by TYPO3
145                80,
146                443,
147                $uri->getPath()
148            );
149            $matcher = new UrlMatcher($collection, $context);
150            try {
151                $result = $matcher->match($uri->getPath());
152                return new SiteRouteResult(
153                    $uri,
154                    $result['site'],
155                    // if no language is found, this usually results due to "/" called instead of "/fr/"
156                    // but it could also be the reason that "/index.php?id=23" was called, so the default
157                    // language is used as a fallback here then.
158                    $result['language'] ?? $defaultLanguage,
159                    $result['tail']
160                );
161            } catch (NoConfigurationException | ResourceNotFoundException $e) {
162                // At this point we discard a possible found site via ?id=123
163                // Because ?id=123 _can_ only work if the actual domain/site base works
164                // so www.domain-without-site-configuration/index.php?id=123 (where 123 is a page referring
165                // to a page within a site configuration will never be resolved here) properly
166                $site = new NullSite();
167            }
168        }
169
170        return new SiteRouteResult($uri, $site, $language);
171    }
172
173    /**
174     * If a given page ID is handed in, a Site/NullSite is returned.
175     *
176     * @param int $pageId uid of a page in default language
177     * @param array|null $rootLine an alternative root line, if already at and.
178     * @return SiteInterface
179     * @throws SiteNotFoundException
180     */
181    public function matchByPageId(int $pageId, array $rootLine = null): SiteInterface
182    {
183        try {
184            return $this->finder->getSiteByPageId($pageId, $rootLine);
185        } catch (SiteNotFoundException $e) {
186            return new NullSite();
187        }
188    }
189
190    /**
191     * Returns a Symfony RouteCollection containing all routes to all sites.
192     *
193     * @return RouteCollection
194     */
195    protected function getRouteCollectionForAllSites(): RouteCollection
196    {
197        $groupedRoutes = [];
198        foreach ($this->finder->getAllSites() as $site) {
199            // Add the site as entrypoint
200            // @todo Find a way to test only this basic route against chinese characters, as site languages kicking
201            //       always in. Do the rawurldecode() here to to be consistent with language preparations.
202            $uri = $site->getBase();
203            $route = new Route(
204                (rawurldecode($uri->getPath()) ?: '/') . '{tail}',
205                ['site' => $site, 'language' => null, 'tail' => ''],
206                array_filter(['tail' => '.*', 'port' => (string)$uri->getPort()]),
207                ['utf8' => true],
208                // @todo Verify if host should here covered with idn_to_ascii() to be consistent with preparation for languages.
209                $uri->getHost() ?: '',
210                $uri->getScheme() === '' ? [] : [$uri->getScheme()]
211            );
212            $identifier = 'site_' . $site->getIdentifier();
213            $groupedRoutes[($uri->getScheme() ?: '-') . ($uri->getHost() ?: '-')][$uri->getPath() ?: '/'][$identifier] = $route;
214            // Add all languages
215            foreach ($site->getAllLanguages() as $siteLanguage) {
216                $uri = $siteLanguage->getBase();
217                $route = new Route(
218                    (rawurldecode($uri->getPath()) ?: '/') . '{tail}',
219                    ['site' => $site, 'language' => $siteLanguage, 'tail' => ''],
220                    array_filter(['tail' => '.*', 'port' => (string)$uri->getPort()]),
221                    ['utf8' => true],
222                    (string)idn_to_ascii($uri->getHost()),
223                    $uri->getScheme() === '' ? [] : [$uri->getScheme()]
224                );
225                $identifier = 'site_' . $site->getIdentifier() . '_' . $siteLanguage->getLanguageId();
226                $groupedRoutes[($uri->getScheme() ?: '-') . ($uri->getHost() ?: '-')][$uri->getPath() ?: '/'][$identifier] = $route;
227            }
228        }
229        return $this->createRouteCollectionFromGroupedRoutes($groupedRoutes);
230    }
231
232    /**
233     * As the {tail} parameter is greedy, it needs to be ensured that the one with the
234     * most specific part matches first.
235     *
236     * @param array $groupedRoutes
237     * @return RouteCollection
238     */
239    protected function createRouteCollectionFromGroupedRoutes(array $groupedRoutes): RouteCollection
240    {
241        $collection = new RouteCollection();
242        // Ensure more generic routes containing '-' in host identifier, processed at last
243        krsort($groupedRoutes);
244        foreach ($groupedRoutes as $groupedRoutesPerHost) {
245            krsort($groupedRoutesPerHost);
246            foreach ($groupedRoutesPerHost as $groupedRoutesPerPath) {
247                krsort($groupedRoutesPerPath);
248                foreach ($groupedRoutesPerPath as $identifier => $route) {
249                    $collection->add($identifier, $route);
250                }
251            }
252        }
253        return $collection;
254    }
255}
256