1<?php
2/**
3 * webtrees: online genealogy
4 * Copyright (C) 2019 webtrees development team
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16namespace Fisharebest\Webtrees\Module;
17
18use Fisharebest\Webtrees\Auth;
19use Fisharebest\Webtrees\Controller\PageController;
20use Fisharebest\Webtrees\Database;
21use Fisharebest\Webtrees\Filter;
22use Fisharebest\Webtrees\I18N;
23use Fisharebest\Webtrees\Individual;
24use Fisharebest\Webtrees\Media;
25use Fisharebest\Webtrees\Note;
26use Fisharebest\Webtrees\Repository;
27use Fisharebest\Webtrees\Source;
28use Fisharebest\Webtrees\Tree;
29
30/**
31 * Class SiteMapModule
32 */
33class SiteMapModule extends AbstractModule implements ModuleConfigInterface
34{
35    const RECORDS_PER_VOLUME = 500; // Keep sitemap files small, for memory, CPU and max_allowed_packet limits.
36    const CACHE_LIFE         = 1209600; // Two weeks
37
38    /** {@inheritdoc} */
39    public function getTitle()
40    {
41        return /* I18N: Name of a module - see http://en.wikipedia.org/wiki/Sitemaps */ I18N::translate('Sitemaps');
42    }
43
44    /** {@inheritdoc} */
45    public function getDescription()
46    {
47        return /* I18N: Description of the “Sitemaps” module */ I18N::translate('Generate sitemap files for search engines.');
48    }
49
50    /**
51     * This is a general purpose hook, allowing modules to respond to routes
52     * of the form module.php?mod=FOO&mod_action=BAR
53     *
54     * @param string $mod_action
55     */
56    public function modAction($mod_action)
57    {
58        switch ($mod_action) {
59            case 'admin':
60                $this->admin();
61                break;
62            case 'generate':
63                $this->generate(Filter::get('file'));
64                break;
65            default:
66                http_response_code(404);
67        }
68    }
69
70    /**
71     * Generate an XML file.
72     *
73     * @param string $file
74     */
75    private function generate($file)
76    {
77        if ($file == 'sitemap.xml') {
78            $this->generateIndex();
79        } elseif (preg_match('/^sitemap-(\d+)-([isrmn])-(\d+).xml$/', $file, $match)) {
80            $this->generateFile($match[1], $match[2], $match[3]);
81        } else {
82            http_response_code(404);
83        }
84    }
85
86    /**
87     * The index file contains references to all the other files.
88     * These files are the same for visitors/users/admins.
89     */
90    private function generateIndex()
91    {
92        // Check the cache
93        $timestamp = $this->getSetting('sitemap.timestamp');
94        if ($timestamp > WT_TIMESTAMP - self::CACHE_LIFE) {
95            $data = $this->getSetting('sitemap.xml');
96        } else {
97            $data    = '';
98            $lastmod = '<lastmod>' . date('Y-m-d') . '</lastmod>';
99            foreach (Tree::getAll() as $tree) {
100                if ($tree->getPreference('include_in_sitemap')) {
101                    $n = Database::prepare(
102                        "SELECT COUNT(*) FROM `##individuals` WHERE i_file = :tree_id"
103                    )->execute(array('tree_id' => $tree->getTreeId()))->fetchOne();
104                    for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) {
105                        $data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&amp;mod_action=generate&amp;file=sitemap-' . $tree->getTreeId() . '-i-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL;
106                    }
107                    $n = Database::prepare(
108                        "SELECT COUNT(*) FROM `##sources` WHERE s_file = :tree_id"
109                    )->execute(array('tree_id' => $tree->getTreeId()))->fetchOne();
110                    if ($n) {
111                        for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) {
112                            $data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&amp;mod_action=generate&amp;file=sitemap-' . $tree->getTreeId() . '-s-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL;
113                        }
114                    }
115                    $n = Database::prepare(
116                        "SELECT COUNT(*) FROM `##other` WHERE o_file = :tree_id AND o_type = 'REPO'"
117                    )->execute(array('tree_id' => $tree->getTreeId()))->fetchOne();
118                    if ($n) {
119                        for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) {
120                            $data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&amp;mod_action=generate&amp;file=sitemap-' . $tree->getTreeId() . '-r-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL;
121                        }
122                    }
123                    $n = Database::prepare(
124                        "SELECT COUNT(*) FROM `##other` WHERE o_file = :tree_id AND o_type = 'NOTE'"
125                    )->execute(array('tree_id' => $tree->getTreeId()))->fetchOne();
126                    if ($n) {
127                        for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) {
128                            $data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&amp;mod_action=generate&amp;file=sitemap-' . $tree->getTreeId() . '-n-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL;
129                        }
130                    }
131                    $n = Database::prepare(
132                        "SELECT COUNT(*) FROM `##media` WHERE m_file = :tree_id"
133                    )->execute(array('tree_id' => $tree->getTreeId()))->fetchOne();
134                    if ($n) {
135                        for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) {
136                            $data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&amp;mod_action=generate&amp;file=sitemap-' . $tree->getTreeId() . '-m-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL;
137                        }
138                    }
139                }
140            }
141            $data = '<' . '?xml version="1.0" encoding="UTF-8" ?' . '>' . PHP_EOL . '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . PHP_EOL . $data . '</sitemapindex>' . PHP_EOL;
142            // Cache this data.
143            $this->setSetting('sitemap.xml', $data);
144            $this->setSetting('sitemap.timestamp', WT_TIMESTAMP);
145        }
146        header('Content-Type: application/xml');
147        header('Content-Length: ' . strlen($data));
148        echo $data;
149    }
150
151    /**
152     * A separate file for each family tree and each record type.
153     * These files depend on access levels, so only cache for visitors.
154     *
155     * @param int    $ged_id
156     * @param string $rec_type
157     * @param string $volume
158     */
159    private function generateFile($ged_id, $rec_type, $volume)
160    {
161        $tree = Tree::findById($ged_id);
162        // Check the cache
163        $timestamp = $this->getSetting('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.timestamp');
164        if ($timestamp > WT_TIMESTAMP - self::CACHE_LIFE && !Auth::check()) {
165            $data = $this->getSetting('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.xml');
166        } else {
167            $data    = '<url><loc>' . WT_BASE_URL . 'index.php?ctype=gedcom&amp;ged=' . $tree->getNameUrl() . '</loc></url>' . PHP_EOL;
168            $records = array();
169            switch ($rec_type) {
170                case 'i':
171                    $rows = Database::prepare(
172                    "SELECT i_id AS xref, i_gedcom AS gedcom" .
173                    " FROM `##individuals`" .
174                    " WHERE i_file = :tree_id" .
175                    " ORDER BY i_id" .
176                    " LIMIT :limit OFFSET :offset"
177                    )->execute(array(
178                        'tree_id' => $ged_id,
179                        'limit'   => self::RECORDS_PER_VOLUME,
180                        'offset'  => self::RECORDS_PER_VOLUME * $volume,
181                    ))->fetchAll();
182                    foreach ($rows as $row) {
183                        $records[] = Individual::getInstance($row->xref, $tree, $row->gedcom);
184                    }
185                    break;
186                case 's':
187                    $rows = Database::prepare(
188                    "SELECT s_id AS xref, s_gedcom AS gedcom" .
189                    " FROM `##sources`" .
190                    " WHERE s_file = :tree_id" .
191                    " ORDER BY s_id" .
192                    " LIMIT :limit OFFSET :offset"
193                    )->execute(array(
194                        'tree_id' => $ged_id,
195                        'limit'   => self::RECORDS_PER_VOLUME,
196                        'offset'  => self::RECORDS_PER_VOLUME * $volume,
197                    ))->fetchAll();
198                    foreach ($rows as $row) {
199                        $records[] = Source::getInstance($row->xref, $tree, $row->gedcom);
200                    }
201                    break;
202                case 'r':
203                    $rows = Database::prepare(
204                    "SELECT o_id AS xref, o_gedcom AS gedcom" .
205                    " FROM `##other`" .
206                    " WHERE o_file = :tree_id AND o_type = 'REPO'" .
207                    " ORDER BY o_id" .
208                    " LIMIT :limit OFFSET :offset"
209                    )->execute(array(
210                        'tree_id' => $ged_id,
211                        'limit'   => self::RECORDS_PER_VOLUME,
212                        'offset'  => self::RECORDS_PER_VOLUME * $volume,
213                    ))->fetchAll();
214                    foreach ($rows as $row) {
215                        $records[] = Repository::getInstance($row->xref, $tree, $row->gedcom);
216                    }
217                    break;
218                case 'n':
219                    $rows = Database::prepare(
220                    "SELECT o_id AS xref, o_gedcom AS gedcom" .
221                    " FROM `##other`" .
222                    " WHERE o_file = :tree_id AND o_type = 'NOTE'" .
223                    " ORDER BY o_id" .
224                    " LIMIT :limit OFFSET :offset"
225                    )->execute(array(
226                        'tree_id' => $ged_id,
227                        'limit'   => self::RECORDS_PER_VOLUME,
228                        'offset'  => self::RECORDS_PER_VOLUME * $volume,
229                    ))->fetchAll();
230                    foreach ($rows as $row) {
231                        $records[] = Note::getInstance($row->xref, $tree, $row->gedcom);
232                    }
233                    break;
234                case 'm':
235                    $rows = Database::prepare(
236                    "SELECT m_id AS xref, m_gedcom AS gedcom" .
237                    " FROM `##media`" .
238                    " WHERE m_file = :tree_id" .
239                    " ORDER BY m_id" .
240                    " LIMIT :limit OFFSET :offset"
241                    )->execute(array(
242                        'tree_id' => $ged_id,
243                        'limit'   => self::RECORDS_PER_VOLUME,
244                        'offset'  => self::RECORDS_PER_VOLUME * $volume,
245                    ))->fetchAll();
246                    foreach ($rows as $row) {
247                        $records[] = Media::getInstance($row->xref, $tree, $row->gedcom);
248                    }
249                    break;
250            }
251            foreach ($records as $record) {
252                if ($record->canShowName()) {
253                    $data .= '<url>';
254                    $data .= '<loc>' . WT_BASE_URL . $record->getHtmlUrl() . '</loc>';
255                    $chan = $record->getFirstFact('CHAN');
256                    if ($chan) {
257                        $date = $chan->getDate();
258                        if ($date->isOK()) {
259                            $data .= '<lastmod>' . $date->minimumDate()->Format('%Y-%m-%d') . '</lastmod>';
260                        }
261                    }
262                    $data .= '</url>' . PHP_EOL;
263                }
264            }
265            $data = '<' . '?xml version="1.0" encoding="UTF-8" ?' . '>' . PHP_EOL . '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">' . PHP_EOL . $data . '</urlset>' . PHP_EOL;
266            // Cache this data - but only for visitors, as we don’t want
267            // visitors to see data created by signed-in users.
268            if (!Auth::check()) {
269                $this->setSetting('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.xml', $data);
270                $this->setSetting('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.timestamp', WT_TIMESTAMP);
271            }
272        }
273        header('Content-Type: application/xml');
274        header('Content-Length: ' . strlen($data));
275        echo $data;
276    }
277
278    /**
279     * Edit the configuration
280     */
281    private function admin()
282    {
283        $controller = new PageController;
284        $controller
285            ->restrictAccess(Auth::isAdmin())
286            ->setPageTitle($this->getTitle())
287            ->pageHeader();
288
289        // Save the updated preferences
290        if (Filter::post('action') == 'save') {
291            foreach (Tree::getAll() as $tree) {
292                $tree->setPreference('include_in_sitemap', Filter::postBool('include' . $tree->getTreeId()));
293            }
294            // Clear cache and force files to be regenerated
295            Database::prepare(
296                "DELETE FROM `##module_setting` WHERE setting_name LIKE 'sitemap%'"
297            )->execute();
298        }
299
300        $include_any = false;
301
302        ?>
303        <ol class="breadcrumb small">
304            <li><a href="admin.php"><?php echo I18N::translate('Control panel'); ?></a></li>
305            <li><a href="admin_modules.php"><?php echo I18N::translate('Module administration'); ?></a></li>
306            <li class="active"><?php echo $controller->getPageTitle(); ?></li>
307        </ol>
308        <h1><?php echo $controller->getPageTitle(); ?></h1>
309        <?php
310
311        echo
312        '<p>',
313            /* I18N: The www.sitemaps.org site is translated into many languages (e.g. http://www.sitemaps.org/fr/) - choose an appropriate URL. */
314            I18N::translate('Sitemaps are a way for webmasters to tell search engines about the pages on a website that are available for crawling. All major search engines support sitemaps. For more information, see <a href="http://www.sitemaps.org/">www.sitemaps.org</a>.') .
315            '</p>',
316        '<p>', /* I18N: Label for a configuration option */ I18N::translate('Which family trees should be included in the sitemaps'), '</p>',
317            '<form method="post" action="module.php?mod=' . $this->getName() . '&amp;mod_action=admin">',
318        '<input type="hidden" name="action" value="save">';
319        foreach (Tree::getAll() as $tree) {
320            echo '<div class="checkbox"><label><input type="checkbox" name="include', $tree->getTreeId(), '" ';
321            if ($tree->getPreference('include_in_sitemap')) {
322                echo 'checked';
323                $include_any = true;
324            }
325            echo '>', $tree->getTitleHtml(), '</label></div>';
326        }
327        echo
328        '<input type="submit" value="', I18N::translate('save'), '">',
329        '</form>',
330        '<hr>';
331
332        if ($include_any) {
333            $site_map_url1 = WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&amp;mod_action=generate&amp;file=sitemap.xml';
334            $site_map_url2 = rawurlencode(WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap.xml');
335            echo
336                '<p>', I18N::translate('To tell search engines that sitemaps are available, you should add the following line to your robots.txt file.'), '</p>',
337                '<pre>Sitemap: ', $site_map_url1, '</pre>',
338                '<hr>',
339                '<p>', I18N::translate('To tell search engines that sitemaps are available, you can use the following links.'), '</p>',
340                '<ul>',
341                // This list comes from http://en.wikipedia.org/wiki/Sitemaps
342                '<li><a href="https://www.bing.com/webmaster/ping.aspx?siteMap=' . $site_map_url2 . '">Bing</a></li>',
343                '<li><a href="https://www.google.com/webmasters/tools/ping?sitemap=' . $site_map_url2 . '">Google</a></li>',
344                '</ul>';
345
346        }
347    }
348
349    /** {@inheritdoc} */
350    public function getConfigLink()
351    {
352        return 'module.php?mod=' . $this->getName() . '&amp;mod_action=admin';
353    }
354}
355