1<?php 2/** 3 * webtrees: online genealogy 4 * Copyright (C) 2019 webtrees development team 5 * This program is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation, either version 3 of the License, or 8 * (at your option) any later version. 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * You should have received a copy of the GNU General Public License 14 * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 */ 16namespace Fisharebest\Webtrees\Module; 17 18use Fisharebest\Webtrees\Auth; 19use Fisharebest\Webtrees\Controller\PageController; 20use Fisharebest\Webtrees\Database; 21use Fisharebest\Webtrees\Filter; 22use Fisharebest\Webtrees\I18N; 23use Fisharebest\Webtrees\Individual; 24use Fisharebest\Webtrees\Media; 25use Fisharebest\Webtrees\Note; 26use Fisharebest\Webtrees\Repository; 27use Fisharebest\Webtrees\Source; 28use Fisharebest\Webtrees\Tree; 29 30/** 31 * Class SiteMapModule 32 */ 33class SiteMapModule extends AbstractModule implements ModuleConfigInterface 34{ 35 const RECORDS_PER_VOLUME = 500; // Keep sitemap files small, for memory, CPU and max_allowed_packet limits. 36 const CACHE_LIFE = 1209600; // Two weeks 37 38 /** {@inheritdoc} */ 39 public function getTitle() 40 { 41 return /* I18N: Name of a module - see http://en.wikipedia.org/wiki/Sitemaps */ I18N::translate('Sitemaps'); 42 } 43 44 /** {@inheritdoc} */ 45 public function getDescription() 46 { 47 return /* I18N: Description of the “Sitemaps” module */ I18N::translate('Generate sitemap files for search engines.'); 48 } 49 50 /** 51 * This is a general purpose hook, allowing modules to respond to routes 52 * of the form module.php?mod=FOO&mod_action=BAR 53 * 54 * @param string $mod_action 55 */ 56 public function modAction($mod_action) 57 { 58 switch ($mod_action) { 59 case 'admin': 60 $this->admin(); 61 break; 62 case 'generate': 63 $this->generate(Filter::get('file')); 64 break; 65 default: 66 http_response_code(404); 67 } 68 } 69 70 /** 71 * Generate an XML file. 72 * 73 * @param string $file 74 */ 75 private function generate($file) 76 { 77 if ($file == 'sitemap.xml') { 78 $this->generateIndex(); 79 } elseif (preg_match('/^sitemap-(\d+)-([isrmn])-(\d+).xml$/', $file, $match)) { 80 $this->generateFile($match[1], $match[2], $match[3]); 81 } else { 82 http_response_code(404); 83 } 84 } 85 86 /** 87 * The index file contains references to all the other files. 88 * These files are the same for visitors/users/admins. 89 */ 90 private function generateIndex() 91 { 92 // Check the cache 93 $timestamp = $this->getSetting('sitemap.timestamp'); 94 if ($timestamp > WT_TIMESTAMP - self::CACHE_LIFE) { 95 $data = $this->getSetting('sitemap.xml'); 96 } else { 97 $data = ''; 98 $lastmod = '<lastmod>' . date('Y-m-d') . '</lastmod>'; 99 foreach (Tree::getAll() as $tree) { 100 if ($tree->getPreference('include_in_sitemap')) { 101 $n = Database::prepare( 102 "SELECT COUNT(*) FROM `##individuals` WHERE i_file = :tree_id" 103 )->execute(array('tree_id' => $tree->getTreeId()))->fetchOne(); 104 for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) { 105 $data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap-' . $tree->getTreeId() . '-i-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL; 106 } 107 $n = Database::prepare( 108 "SELECT COUNT(*) FROM `##sources` WHERE s_file = :tree_id" 109 )->execute(array('tree_id' => $tree->getTreeId()))->fetchOne(); 110 if ($n) { 111 for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) { 112 $data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap-' . $tree->getTreeId() . '-s-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL; 113 } 114 } 115 $n = Database::prepare( 116 "SELECT COUNT(*) FROM `##other` WHERE o_file = :tree_id AND o_type = 'REPO'" 117 )->execute(array('tree_id' => $tree->getTreeId()))->fetchOne(); 118 if ($n) { 119 for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) { 120 $data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap-' . $tree->getTreeId() . '-r-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL; 121 } 122 } 123 $n = Database::prepare( 124 "SELECT COUNT(*) FROM `##other` WHERE o_file = :tree_id AND o_type = 'NOTE'" 125 )->execute(array('tree_id' => $tree->getTreeId()))->fetchOne(); 126 if ($n) { 127 for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) { 128 $data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap-' . $tree->getTreeId() . '-n-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL; 129 } 130 } 131 $n = Database::prepare( 132 "SELECT COUNT(*) FROM `##media` WHERE m_file = :tree_id" 133 )->execute(array('tree_id' => $tree->getTreeId()))->fetchOne(); 134 if ($n) { 135 for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) { 136 $data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap-' . $tree->getTreeId() . '-m-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL; 137 } 138 } 139 } 140 } 141 $data = '<' . '?xml version="1.0" encoding="UTF-8" ?' . '>' . PHP_EOL . '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . PHP_EOL . $data . '</sitemapindex>' . PHP_EOL; 142 // Cache this data. 143 $this->setSetting('sitemap.xml', $data); 144 $this->setSetting('sitemap.timestamp', WT_TIMESTAMP); 145 } 146 header('Content-Type: application/xml'); 147 header('Content-Length: ' . strlen($data)); 148 echo $data; 149 } 150 151 /** 152 * A separate file for each family tree and each record type. 153 * These files depend on access levels, so only cache for visitors. 154 * 155 * @param int $ged_id 156 * @param string $rec_type 157 * @param string $volume 158 */ 159 private function generateFile($ged_id, $rec_type, $volume) 160 { 161 $tree = Tree::findById($ged_id); 162 // Check the cache 163 $timestamp = $this->getSetting('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.timestamp'); 164 if ($timestamp > WT_TIMESTAMP - self::CACHE_LIFE && !Auth::check()) { 165 $data = $this->getSetting('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.xml'); 166 } else { 167 $data = '<url><loc>' . WT_BASE_URL . 'index.php?ctype=gedcom&ged=' . $tree->getNameUrl() . '</loc></url>' . PHP_EOL; 168 $records = array(); 169 switch ($rec_type) { 170 case 'i': 171 $rows = Database::prepare( 172 "SELECT i_id AS xref, i_gedcom AS gedcom" . 173 " FROM `##individuals`" . 174 " WHERE i_file = :tree_id" . 175 " ORDER BY i_id" . 176 " LIMIT :limit OFFSET :offset" 177 )->execute(array( 178 'tree_id' => $ged_id, 179 'limit' => self::RECORDS_PER_VOLUME, 180 'offset' => self::RECORDS_PER_VOLUME * $volume, 181 ))->fetchAll(); 182 foreach ($rows as $row) { 183 $records[] = Individual::getInstance($row->xref, $tree, $row->gedcom); 184 } 185 break; 186 case 's': 187 $rows = Database::prepare( 188 "SELECT s_id AS xref, s_gedcom AS gedcom" . 189 " FROM `##sources`" . 190 " WHERE s_file = :tree_id" . 191 " ORDER BY s_id" . 192 " LIMIT :limit OFFSET :offset" 193 )->execute(array( 194 'tree_id' => $ged_id, 195 'limit' => self::RECORDS_PER_VOLUME, 196 'offset' => self::RECORDS_PER_VOLUME * $volume, 197 ))->fetchAll(); 198 foreach ($rows as $row) { 199 $records[] = Source::getInstance($row->xref, $tree, $row->gedcom); 200 } 201 break; 202 case 'r': 203 $rows = Database::prepare( 204 "SELECT o_id AS xref, o_gedcom AS gedcom" . 205 " FROM `##other`" . 206 " WHERE o_file = :tree_id AND o_type = 'REPO'" . 207 " ORDER BY o_id" . 208 " LIMIT :limit OFFSET :offset" 209 )->execute(array( 210 'tree_id' => $ged_id, 211 'limit' => self::RECORDS_PER_VOLUME, 212 'offset' => self::RECORDS_PER_VOLUME * $volume, 213 ))->fetchAll(); 214 foreach ($rows as $row) { 215 $records[] = Repository::getInstance($row->xref, $tree, $row->gedcom); 216 } 217 break; 218 case 'n': 219 $rows = Database::prepare( 220 "SELECT o_id AS xref, o_gedcom AS gedcom" . 221 " FROM `##other`" . 222 " WHERE o_file = :tree_id AND o_type = 'NOTE'" . 223 " ORDER BY o_id" . 224 " LIMIT :limit OFFSET :offset" 225 )->execute(array( 226 'tree_id' => $ged_id, 227 'limit' => self::RECORDS_PER_VOLUME, 228 'offset' => self::RECORDS_PER_VOLUME * $volume, 229 ))->fetchAll(); 230 foreach ($rows as $row) { 231 $records[] = Note::getInstance($row->xref, $tree, $row->gedcom); 232 } 233 break; 234 case 'm': 235 $rows = Database::prepare( 236 "SELECT m_id AS xref, m_gedcom AS gedcom" . 237 " FROM `##media`" . 238 " WHERE m_file = :tree_id" . 239 " ORDER BY m_id" . 240 " LIMIT :limit OFFSET :offset" 241 )->execute(array( 242 'tree_id' => $ged_id, 243 'limit' => self::RECORDS_PER_VOLUME, 244 'offset' => self::RECORDS_PER_VOLUME * $volume, 245 ))->fetchAll(); 246 foreach ($rows as $row) { 247 $records[] = Media::getInstance($row->xref, $tree, $row->gedcom); 248 } 249 break; 250 } 251 foreach ($records as $record) { 252 if ($record->canShowName()) { 253 $data .= '<url>'; 254 $data .= '<loc>' . WT_BASE_URL . $record->getHtmlUrl() . '</loc>'; 255 $chan = $record->getFirstFact('CHAN'); 256 if ($chan) { 257 $date = $chan->getDate(); 258 if ($date->isOK()) { 259 $data .= '<lastmod>' . $date->minimumDate()->Format('%Y-%m-%d') . '</lastmod>'; 260 } 261 } 262 $data .= '</url>' . PHP_EOL; 263 } 264 } 265 $data = '<' . '?xml version="1.0" encoding="UTF-8" ?' . '>' . PHP_EOL . '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">' . PHP_EOL . $data . '</urlset>' . PHP_EOL; 266 // Cache this data - but only for visitors, as we don’t want 267 // visitors to see data created by signed-in users. 268 if (!Auth::check()) { 269 $this->setSetting('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.xml', $data); 270 $this->setSetting('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.timestamp', WT_TIMESTAMP); 271 } 272 } 273 header('Content-Type: application/xml'); 274 header('Content-Length: ' . strlen($data)); 275 echo $data; 276 } 277 278 /** 279 * Edit the configuration 280 */ 281 private function admin() 282 { 283 $controller = new PageController; 284 $controller 285 ->restrictAccess(Auth::isAdmin()) 286 ->setPageTitle($this->getTitle()) 287 ->pageHeader(); 288 289 // Save the updated preferences 290 if (Filter::post('action') == 'save') { 291 foreach (Tree::getAll() as $tree) { 292 $tree->setPreference('include_in_sitemap', Filter::postBool('include' . $tree->getTreeId())); 293 } 294 // Clear cache and force files to be regenerated 295 Database::prepare( 296 "DELETE FROM `##module_setting` WHERE setting_name LIKE 'sitemap%'" 297 )->execute(); 298 } 299 300 $include_any = false; 301 302 ?> 303 <ol class="breadcrumb small"> 304 <li><a href="admin.php"><?php echo I18N::translate('Control panel'); ?></a></li> 305 <li><a href="admin_modules.php"><?php echo I18N::translate('Module administration'); ?></a></li> 306 <li class="active"><?php echo $controller->getPageTitle(); ?></li> 307 </ol> 308 <h1><?php echo $controller->getPageTitle(); ?></h1> 309 <?php 310 311 echo 312 '<p>', 313 /* I18N: The www.sitemaps.org site is translated into many languages (e.g. http://www.sitemaps.org/fr/) - choose an appropriate URL. */ 314 I18N::translate('Sitemaps are a way for webmasters to tell search engines about the pages on a website that are available for crawling. All major search engines support sitemaps. For more information, see <a href="http://www.sitemaps.org/">www.sitemaps.org</a>.') . 315 '</p>', 316 '<p>', /* I18N: Label for a configuration option */ I18N::translate('Which family trees should be included in the sitemaps'), '</p>', 317 '<form method="post" action="module.php?mod=' . $this->getName() . '&mod_action=admin">', 318 '<input type="hidden" name="action" value="save">'; 319 foreach (Tree::getAll() as $tree) { 320 echo '<div class="checkbox"><label><input type="checkbox" name="include', $tree->getTreeId(), '" '; 321 if ($tree->getPreference('include_in_sitemap')) { 322 echo 'checked'; 323 $include_any = true; 324 } 325 echo '>', $tree->getTitleHtml(), '</label></div>'; 326 } 327 echo 328 '<input type="submit" value="', I18N::translate('save'), '">', 329 '</form>', 330 '<hr>'; 331 332 if ($include_any) { 333 $site_map_url1 = WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap.xml'; 334 $site_map_url2 = rawurlencode(WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap.xml'); 335 echo 336 '<p>', I18N::translate('To tell search engines that sitemaps are available, you should add the following line to your robots.txt file.'), '</p>', 337 '<pre>Sitemap: ', $site_map_url1, '</pre>', 338 '<hr>', 339 '<p>', I18N::translate('To tell search engines that sitemaps are available, you can use the following links.'), '</p>', 340 '<ul>', 341 // This list comes from http://en.wikipedia.org/wiki/Sitemaps 342 '<li><a href="https://www.bing.com/webmaster/ping.aspx?siteMap=' . $site_map_url2 . '">Bing</a></li>', 343 '<li><a href="https://www.google.com/webmasters/tools/ping?sitemap=' . $site_map_url2 . '">Google</a></li>', 344 '</ul>'; 345 346 } 347 } 348 349 /** {@inheritdoc} */ 350 public function getConfigLink() 351 { 352 return 'module.php?mod=' . $this->getName() . '&mod_action=admin'; 353 } 354} 355