1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2021 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Module;
21
22use Aura\Router\Route;
23use Aura\Router\RouterContainer;
24use Fig\Http\Message\StatusCodeInterface;
25use Fisharebest\Webtrees\Auth;
26use Fisharebest\Webtrees\Exceptions\HttpNotFoundException;
27use Fisharebest\Webtrees\Registry;
28use Fisharebest\Webtrees\Family;
29use Fisharebest\Webtrees\FlashMessages;
30use Fisharebest\Webtrees\GedcomRecord;
31use Fisharebest\Webtrees\Html;
32use Fisharebest\Webtrees\I18N;
33use Fisharebest\Webtrees\Individual;
34use Fisharebest\Webtrees\Media;
35use Fisharebest\Webtrees\Note;
36use Fisharebest\Webtrees\Repository;
37use Fisharebest\Webtrees\Services\TreeService;
38use Fisharebest\Webtrees\Source;
39use Fisharebest\Webtrees\Submitter;
40use Fisharebest\Webtrees\Tree;
41use Illuminate\Database\Capsule\Manager as DB;
42use Illuminate\Database\Query\Expression;
43use Illuminate\Support\Collection;
44use Psr\Http\Message\ResponseInterface;
45use Psr\Http\Message\ServerRequestInterface;
46use Psr\Http\Server\RequestHandlerInterface;
47
48use function app;
49use function assert;
50use function date;
51use function redirect;
52use function response;
53use function route;
54use function view;
55
56/**
57 * Class SiteMapModule
58 */
59class SiteMapModule extends AbstractModule implements ModuleConfigInterface, RequestHandlerInterface
60{
61    use ModuleConfigTrait;
62
63    private const RECORDS_PER_VOLUME = 500; // Keep sitemap files small, for memory, CPU and max_allowed_packet limits.
64    private const CACHE_LIFE         = 209600; // Two weeks
65
66    private const PRIORITY = [
67        Family::RECORD_TYPE     => 0.7,
68        Individual::RECORD_TYPE => 0.9,
69        Media::RECORD_TYPE      => 0.5,
70        Note::RECORD_TYPE       => 0.3,
71        Repository::RECORD_TYPE => 0.5,
72        Source::RECORD_TYPE     => 0.5,
73        Submitter::RECORD_TYPE  => 0.3,
74    ];
75
76    /** @var TreeService */
77    private $tree_service;
78
79    /**
80     * TreesMenuModule constructor.
81     *
82     * @param TreeService $tree_service
83     */
84    public function __construct(TreeService $tree_service)
85    {
86        $this->tree_service = $tree_service;
87    }
88
89    /**
90     * Initialization.
91     *
92     * @return void
93     */
94    public function boot(): void
95    {
96        $router_container = app(RouterContainer::class);
97        assert($router_container instanceof RouterContainer);
98
99        $router_container->getMap()
100            ->get('sitemap-style', '/sitemap.xsl', $this);
101
102        $router_container->getMap()
103            ->get('sitemap-index', '/sitemap.xml', $this);
104
105        $router_container->getMap()
106            ->get('sitemap-file', '/sitemap-{tree}-{type}-{page}.xml', $this);
107    }
108
109    /**
110     * A sentence describing what this module does.
111     *
112     * @return string
113     */
114    public function description(): string
115    {
116        /* I18N: Description of the “Sitemaps” module */
117        return I18N::translate('Generate sitemap files for search engines.');
118    }
119
120    /**
121     * Should this module be enabled when it is first installed?
122     *
123     * @return bool
124     */
125    public function isEnabledByDefault(): bool
126    {
127        return false;
128    }
129
130    /**
131     * @param ServerRequestInterface $request
132     *
133     * @return ResponseInterface
134     */
135    public function getAdminAction(ServerRequestInterface $request): ResponseInterface
136    {
137        $this->layout = 'layouts/administration';
138
139        $sitemap_url = route('sitemap-index');
140
141        // This list comes from https://en.wikipedia.org/wiki/Sitemaps
142        $submit_urls = [
143            'Bing/Yahoo' => Html::url('https://www.bing.com/webmaster/ping.aspx', ['siteMap' => $sitemap_url]),
144            'Google'     => Html::url('https://www.google.com/webmasters/tools/ping', ['sitemap' => $sitemap_url]),
145        ];
146
147        return $this->viewResponse('modules/sitemap/config', [
148            'all_trees'   => $this->tree_service->all(),
149            'sitemap_url' => $sitemap_url,
150            'submit_urls' => $submit_urls,
151            'title'       => $this->title(),
152        ]);
153    }
154
155    /**
156     * How should this module be identified in the control panel, etc.?
157     *
158     * @return string
159     */
160    public function title(): string
161    {
162        /* I18N: Name of a module - see http://en.wikipedia.org/wiki/Sitemaps */
163        return I18N::translate('Sitemaps');
164    }
165
166    /**
167     * @param ServerRequestInterface $request
168     *
169     * @return ResponseInterface
170     */
171    public function postAdminAction(ServerRequestInterface $request): ResponseInterface
172    {
173        $params = (array) $request->getParsedBody();
174
175        foreach ($this->tree_service->all() as $tree) {
176            $include_in_sitemap = (bool) ($params['sitemap' . $tree->id()] ?? false);
177            $tree->setPreference('include_in_sitemap', (string) $include_in_sitemap);
178        }
179
180        FlashMessages::addMessage(I18N::translate('The preferences for the module “%s” have been updated.', $this->title()), 'success');
181
182        return redirect($this->getConfigLink());
183    }
184
185    /**
186     * @param ServerRequestInterface $request
187     *
188     * @return ResponseInterface
189     */
190    public function handle(ServerRequestInterface $request): ResponseInterface
191    {
192        $route = $request->getAttribute('route');
193        assert($route instanceof Route);
194
195        if ($route->name === 'sitemap-style') {
196            $content = view('modules/sitemap/sitemap-xsl');
197
198            return response($content, StatusCodeInterface::STATUS_OK, [
199                'Content-Type' => 'application/xml',
200            ]);
201        }
202
203        if ($route->name === 'sitemap-index') {
204            return $this->siteMapIndex($request);
205        }
206
207        return $this->siteMapFile($request);
208    }
209
210    /**
211     * @param ServerRequestInterface $request
212     *
213     * @return ResponseInterface
214     */
215    private function siteMapIndex(ServerRequestInterface $request): ResponseInterface
216    {
217        $content = Registry::cache()->file()->remember('sitemap.xml', function (): string {
218            // Which trees have sitemaps enabled?
219            $tree_ids = $this->tree_service->all()->filter(static function (Tree $tree): bool {
220                return $tree->getPreference('include_in_sitemap') === '1';
221            })->map(static function (Tree $tree): int {
222                return $tree->id();
223            });
224
225            $count_families = DB::table('families')
226                ->join('gedcom', 'f_file', '=', 'gedcom_id')
227                ->whereIn('gedcom_id', $tree_ids)
228                ->groupBy(['gedcom_id'])
229                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
230                ->pluck('total', 'gedcom_name');
231
232            $count_individuals = DB::table('individuals')
233                ->join('gedcom', 'i_file', '=', 'gedcom_id')
234                ->whereIn('gedcom_id', $tree_ids)
235                ->groupBy(['gedcom_id'])
236                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
237                ->pluck('total', 'gedcom_name');
238
239            $count_media = DB::table('media')
240                ->join('gedcom', 'm_file', '=', 'gedcom_id')
241                ->whereIn('gedcom_id', $tree_ids)
242                ->groupBy(['gedcom_id'])
243                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
244                ->pluck('total', 'gedcom_name');
245
246            $count_notes = DB::table('other')
247                ->join('gedcom', 'o_file', '=', 'gedcom_id')
248                ->whereIn('gedcom_id', $tree_ids)
249                ->where('o_type', '=', Note::RECORD_TYPE)
250                ->groupBy(['gedcom_id'])
251                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
252                ->pluck('total', 'gedcom_name');
253
254            $count_repositories = DB::table('other')
255                ->join('gedcom', 'o_file', '=', 'gedcom_id')
256                ->whereIn('gedcom_id', $tree_ids)
257                ->where('o_type', '=', Repository::RECORD_TYPE)
258                ->groupBy(['gedcom_id'])
259                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
260                ->pluck('total', 'gedcom_name');
261
262            $count_sources = DB::table('sources')
263                ->join('gedcom', 's_file', '=', 'gedcom_id')
264                ->whereIn('gedcom_id', $tree_ids)
265                ->groupBy(['gedcom_id'])
266                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
267                ->pluck('total', 'gedcom_name');
268
269            $count_submitters = DB::table('other')
270                ->join('gedcom', 'o_file', '=', 'gedcom_id')
271                ->whereIn('gedcom_id', $tree_ids)
272                ->where('o_type', '=', Submitter::RECORD_TYPE)
273                ->groupBy(['gedcom_id'])
274                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
275                ->pluck('total', 'gedcom_name');
276
277            // Versions 2.0.1 and earlier of this module stored large amounts of data in the settings.
278            DB::table('module_setting')
279                ->where('module_name', '=', $this->name())
280                ->delete();
281
282            return view('modules/sitemap/sitemap-index-xml', [
283                'all_trees'          => $this->tree_service->all(),
284                'count_families'     => $count_families,
285                'count_individuals'  => $count_individuals,
286                'count_media'        => $count_media,
287                'count_notes'        => $count_notes,
288                'count_repositories' => $count_repositories,
289                'count_sources'      => $count_sources,
290                'count_submitters'   => $count_submitters,
291                'last_mod'           => date('Y-m-d'),
292                'records_per_volume' => self::RECORDS_PER_VOLUME,
293                'sitemap_xsl'        => route('sitemap-style'),
294            ]);
295        }, self::CACHE_LIFE);
296
297        return response($content, StatusCodeInterface::STATUS_OK, [
298            'Content-Type' => 'application/xml',
299        ]);
300    }
301
302    /**
303     * @param ServerRequestInterface $request
304     *
305     * @return ResponseInterface
306     */
307    private function siteMapFile(ServerRequestInterface $request): ResponseInterface
308    {
309        $tree = $request->getAttribute('tree');
310        assert($tree instanceof Tree);
311
312        $type = $request->getAttribute('type');
313        $page = (int) $request->getAttribute('page');
314
315        if ($tree->getPreference('include_in_sitemap') !== '1') {
316            throw new HttpNotFoundException();
317        }
318
319        $cache_key = 'sitemap/' . $tree->id() . '/' . $type . '/' . $page . '.xml';
320
321        $content = Registry::cache()->file()->remember($cache_key, function () use ($tree, $type, $page): string {
322            $records = $this->sitemapRecords($tree, $type, self::RECORDS_PER_VOLUME, self::RECORDS_PER_VOLUME * $page);
323
324            return view('modules/sitemap/sitemap-file-xml', [
325                'priority'    => self::PRIORITY[$type],
326                'records'     => $records,
327                'sitemap_xsl' => route('sitemap-style'),
328                'tree'        => $tree,
329            ]);
330        }, self::CACHE_LIFE);
331
332        return response($content, StatusCodeInterface::STATUS_OK, [
333            'Content-Type' => 'application/xml',
334        ]);
335    }
336
337    /**
338     * @param Tree   $tree
339     * @param string $type
340     * @param int    $limit
341     * @param int    $offset
342     *
343     * @return Collection<GedcomRecord>
344     */
345    private function sitemapRecords(Tree $tree, string $type, int $limit, int $offset): Collection
346    {
347        switch ($type) {
348            case Family::RECORD_TYPE:
349                $records = $this->sitemapFamilies($tree, $limit, $offset);
350                break;
351
352            case Individual::RECORD_TYPE:
353                $records = $this->sitemapIndividuals($tree, $limit, $offset);
354                break;
355
356            case Media::RECORD_TYPE:
357                $records = $this->sitemapMedia($tree, $limit, $offset);
358                break;
359
360            case Note::RECORD_TYPE:
361                $records = $this->sitemapNotes($tree, $limit, $offset);
362                break;
363
364            case Repository::RECORD_TYPE:
365                $records = $this->sitemapRepositories($tree, $limit, $offset);
366                break;
367
368            case Source::RECORD_TYPE:
369                $records = $this->sitemapSources($tree, $limit, $offset);
370                break;
371
372            case Submitter::RECORD_TYPE:
373                $records = $this->sitemapSubmitters($tree, $limit, $offset);
374                break;
375
376            default:
377                throw new HttpNotFoundException('Invalid record type: ' . $type);
378        }
379
380        // Skip private records.
381        $records = $records->filter(static function (GedcomRecord $record): bool {
382            return $record->canShow(Auth::PRIV_PRIVATE);
383        });
384
385        return $records;
386    }
387
388    /**
389     * @param Tree $tree
390     * @param int  $limit
391     * @param int  $offset
392     *
393     * @return Collection<Family>
394     */
395    private function sitemapFamilies(Tree $tree, int $limit, int $offset): Collection
396    {
397        return DB::table('families')
398            ->where('f_file', '=', $tree->id())
399            ->orderBy('f_id')
400            ->skip($offset)
401            ->take($limit)
402            ->get()
403            ->map(Registry::familyFactory()->mapper($tree));
404    }
405
406    /**
407     * @param Tree $tree
408     * @param int  $limit
409     * @param int  $offset
410     *
411     * @return Collection<Individual>
412     */
413    private function sitemapIndividuals(Tree $tree, int $limit, int $offset): Collection
414    {
415        return DB::table('individuals')
416            ->where('i_file', '=', $tree->id())
417            ->orderBy('i_id')
418            ->skip($offset)
419            ->take($limit)
420            ->get()
421            ->map(Registry::individualFactory()->mapper($tree));
422    }
423
424    /**
425     * @param Tree $tree
426     * @param int  $limit
427     * @param int  $offset
428     *
429     * @return Collection<Media>
430     */
431    private function sitemapMedia(Tree $tree, int $limit, int $offset): Collection
432    {
433        return DB::table('media')
434            ->where('m_file', '=', $tree->id())
435            ->orderBy('m_id')
436            ->skip($offset)
437            ->take($limit)
438            ->get()
439            ->map(Registry::mediaFactory()->mapper($tree));
440    }
441
442    /**
443     * @param Tree $tree
444     * @param int  $limit
445     * @param int  $offset
446     *
447     * @return Collection<Note>
448     */
449    private function sitemapNotes(Tree $tree, int $limit, int $offset): Collection
450    {
451        return DB::table('other')
452            ->where('o_file', '=', $tree->id())
453            ->where('o_type', '=', Note::RECORD_TYPE)
454            ->orderBy('o_id')
455            ->skip($offset)
456            ->take($limit)
457            ->get()
458            ->map(Registry::noteFactory()->mapper($tree));
459    }
460
461    /**
462     * @param Tree $tree
463     * @param int  $limit
464     * @param int  $offset
465     *
466     * @return Collection<Repository>
467     */
468    private function sitemapRepositories(Tree $tree, int $limit, int $offset): Collection
469    {
470        return DB::table('other')
471            ->where('o_file', '=', $tree->id())
472            ->where('o_type', '=', Repository::RECORD_TYPE)
473            ->orderBy('o_id')
474            ->skip($offset)
475            ->take($limit)
476            ->get()
477            ->map(Registry::repositoryFactory()->mapper($tree));
478    }
479
480    /**
481     * @param Tree $tree
482     * @param int  $limit
483     * @param int  $offset
484     *
485     * @return Collection<Source>
486     */
487    private function sitemapSources(Tree $tree, int $limit, int $offset): Collection
488    {
489        return DB::table('sources')
490            ->where('s_file', '=', $tree->id())
491            ->orderBy('s_id')
492            ->skip($offset)
493            ->take($limit)
494            ->get()
495            ->map(Registry::sourceFactory()->mapper($tree));
496    }
497
498    /**
499     * @param Tree $tree
500     * @param int  $limit
501     * @param int  $offset
502     *
503     * @return Collection<Submitter>
504     */
505    private function sitemapSubmitters(Tree $tree, int $limit, int $offset): Collection
506    {
507        return DB::table('other')
508            ->where('o_file', '=', $tree->id())
509            ->where('o_type', '=', Submitter::RECORD_TYPE)
510            ->orderBy('o_id')
511            ->skip($offset)
512            ->take($limit)
513            ->get()
514            ->map(Registry::submitterFactory()->mapper($tree));
515    }
516}
517