1<?php
2/**
3 * Periodic off-peak updating of the search index.
4 *
5 * Usage: php updateSearchIndex.php [-s START] [-e END] [-p POSFILE] [-l LOCKTIME] [-q]
6 * Where START is the starting timestamp
7 * END is the ending timestamp
8 * POSFILE is a file to load timestamps from and save them to, searchUpdate.WIKI_ID.pos by default
9 * LOCKTIME is how long the searchindex and revision tables will be locked for
10 * -q means quiet
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License along
23 * with this program; if not, write to the Free Software Foundation, Inc.,
24 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
25 * http://www.gnu.org/copyleft/gpl.html
26 *
27 * @file
28 * @ingroup Maintenance
29 */
30
31use MediaWiki\MediaWikiServices;
32use MediaWiki\Revision\SlotRecord;
33
34require_once __DIR__ . '/Maintenance.php';
35
36/**
37 * Maintenance script for periodic off-peak updating of the search index.
38 *
39 * @ingroup Maintenance
40 */
41class UpdateSearchIndex extends Maintenance {
42
43	public function __construct() {
44		parent::__construct();
45		$this->addDescription( 'Script for periodic off-peak updating of the search index' );
46		$this->addOption( 's', 'Starting timestamp', false, true );
47		$this->addOption( 'e', 'Ending timestamp', false, true );
48		$this->addOption(
49			'p',
50			'File for saving/loading timestamps, searchUpdate.WIKI_ID.pos by default',
51			false,
52			true
53		);
54		$this->addOption(
55			'l',
56			'Deprecated, has no effect (formerly lock time)',
57			false,
58			true
59		);
60	}
61
62	public function getDbType() {
63		return Maintenance::DB_ADMIN;
64	}
65
66	public function execute() {
67		$dbDomain = WikiMap::getCurrentWikiDbDomain()->getId();
68		$posFile = $this->getOption( 'p', 'searchUpdate.' . rawurlencode( $dbDomain ) . '.pos' );
69		$end = $this->getOption( 'e', wfTimestampNow() );
70		if ( $this->hasOption( 's' ) ) {
71			$start = $this->getOption( 's' );
72		} elseif ( is_readable( $posFile ) ) {
73			$start = file_get_contents( $posFile );
74		} else {
75			$start = wfTimestamp( TS_MW, time() - 86400 );
76		}
77
78		$this->doUpdateSearchIndex( $start, $end );
79		$file = fopen( $posFile, 'w' );
80		if ( $file !== false ) {
81			fwrite( $file, $end );
82			fclose( $file );
83		} else {
84			$this->error( "*** Couldn't write to the $posFile!\n" );
85		}
86	}
87
88	private function doUpdateSearchIndex( $start, $end ) {
89		global $wgDisableSearchUpdate;
90
91		$wgDisableSearchUpdate = false;
92
93		$dbw = $this->getDB( DB_MASTER );
94
95		$this->output( "Updating searchindex between $start and $end\n" );
96
97		# Select entries from recentchanges which are on top and between the specified times
98		$start = $dbw->timestamp( $start );
99		$end = $dbw->timestamp( $end );
100
101		$res = $dbw->select(
102			[ 'recentchanges', 'page' ],
103			'rc_cur_id',
104			[
105				'rc_type != ' . $dbw->addQuotes( RC_LOG ),
106				'rc_timestamp BETWEEN ' . $dbw->addQuotes( $start ) . ' AND ' . $dbw->addQuotes( $end )
107			],
108			__METHOD__,
109			[],
110			[
111				'page' => [ 'JOIN', 'rc_cur_id=page_id AND rc_this_oldid=page_latest' ]
112			]
113		);
114
115		foreach ( $res as $row ) {
116			$this->updateSearchIndexForPage( (int)$row->rc_cur_id );
117		}
118		$this->output( "Done\n" );
119	}
120
121	/**
122	 * Update the searchindex table for a given pageid
123	 * @param int $pageId The page ID to update.
124	 * @return null|string
125	 */
126	public function updateSearchIndexForPage( int $pageId ) {
127		// Get current revision
128		$rev = MediaWikiServices::getInstance()
129			->getRevisionLookup()
130			->getRevisionByPageId( $pageId, 0, IDBAccessObject::READ_LATEST );
131		$title = null;
132		if ( $rev ) {
133			$titleObj = Title::newFromLinkTarget( $rev->getPageAsLinkTarget() );
134			$title = $titleObj->getPrefixedDBkey();
135			$this->output( "$title..." );
136			# Update searchindex
137			$u = new SearchUpdate( $pageId, $titleObj, $rev->getContent( SlotRecord::MAIN ) );
138			$u->doUpdate();
139			$this->output( "\n" );
140		}
141
142		return $title;
143	}
144}
145
146$maintClass = UpdateSearchIndex::class;
147require_once RUN_MAINTENANCE_IF_MAIN;
148