1<?php
2/**
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License along
14 * with this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 * http://www.gnu.org/copyleft/gpl.html
17 *
18 * @file
19 * @ingroup Maintenance
20 */
21
22use MediaWiki\MediaWikiServices;
23use MediaWiki\User\UserFactory;
24use MediaWiki\User\UserNameUtils;
25use Wikimedia\Rdbms\LBFactory;
26use Wikimedia\Rdbms\LoadBalancer;
27
28require_once __DIR__ . '/Maintenance.php';
29
30/**
31 * Maintenance script for finding and replacing invalid actor IDs, see T261325.
32 *
33 * @ingroup Maintenance
34 */
35class FindMissingActors extends Maintenance {
36
37	/**
38	 * @var UserFactory|null
39	 */
40	private $userFactory;
41
42	/**
43	 * @var UserNameUtils|null
44	 */
45	private $userNameUtils;
46
47	/**
48	 * @var LoadBalancer|null
49	 */
50	private $loadBalancer;
51
52	/**
53	 * @var LBFactory
54	 */
55	private $lbFactory;
56
57	private const TABLES = [
58		// 'rev_actor' => [ 'revision', 'rev_actor', 'rev_id' ], // not yet used in 1.35
59		'revactor_actor' => [ 'revision_actor_temp', 'revactor_actor', 'revactor_rev' ],
60		'ar_actor' => [ 'archive', 'ar_actor', 'ar_id' ],
61		'ipb_by_actor' => [ 'ipblocks', 'ipb_by_actor', 'ipb_id' ], // no index on ipb_by_actor!
62		'img_actor' => [ 'image', 'img_actor', 'img_name' ],
63		'oi_actor' => [ 'oldimage', 'oi_actor', 'oi_archive_name' ], // no index on oi_archive_name!
64		'fa_actor' => [ 'filearchive', 'fa_actor', 'fa_id' ],
65		'rc_actor' => [ 'recentchanges', 'rc_actor', 'rc_id' ],
66		'log_actor' => [ 'logging', 'log_actor', 'log_id' ],
67	];
68
69	public function __construct() {
70		parent::__construct();
71
72		$this->addDescription( 'Find and fix invalid actor IDs.' );
73		$this->addOption( 'field', 'The name of a database field to process. '
74			. 'Possible values: ' . implode( ', ', array_keys( self::TABLES ) ),
75			 true, true );
76		$this->addOption( 'skip', 'A comma-separated list of actor IDs to skip.',
77			false, true );
78		$this->addOption( 'overwrite-with', 'Replace missing actors with this user. '
79			. 'Typically, this would be "Unknown user", but it could be any reserved '
80			. 'system user (per $wgReservedUsernames) or locally registered user. '
81			. 'If not given, invalid actors will only be listed, not fixed. '
82			. 'You will be prompted for confirmation before data is written. ',
83			 false, true );
84
85		$this->setBatchSize( 1000 );
86	}
87
88	public function initializeServices(
89		?UserFactory $userFactory = null,
90		?UserNameUtils $userNameUtils = null,
91		?LoadBalancer $loadBalancer = null,
92		?LBFactory $lbFactory = null
93	) {
94		$services = MediaWikiServices::getInstance();
95
96		$this->userFactory = $userFactory ?? $this->userFactory ?? $services->getUserFactory();
97		$this->userNameUtils = $userNameUtils ?? $this->userNameUtils ?? $services->getUserNameUtils();
98		$this->loadBalancer = $loadBalancer ?? $this->loadBalancer ?? $services->getDBLoadBalancer();
99		$this->lbFactory = $lbFactory ?? $this->lbFactory ?? $services->getDBLoadBalancerFactory();
100	}
101
102	/**
103	 * Returns the actor ID of the user specified with the --overwrite-with option,
104	 * or null if --overwrite-with is not set.
105	 *
106	 * Existing users and reserved system users are supported.
107	 * If the user does not have an actor ID yet, one will be assigned.
108	 *
109	 * @return int|null
110	 */
111	private function getNewActorId() {
112		$name = $this->getOption( 'overwrite-with' );
113
114		if ( $name === null ) {
115			return null;
116		}
117
118		$user = $this->userFactory->newFromName( $name );
119
120		if ( !$user ) {
121			$this->fatalError( "Not a valid user name: '$user'" );
122		}
123
124		$name = $this->userNameUtils->getCanonical( $name, UserNameUtils::RIGOR_NONE );
125
126		if ( $user->isRegistered() ) {
127			$this->output( "Using existing user: '$user'\n" );
128		} elseif ( !$this->userNameUtils->isValid( $name ) ) {
129			$this->fatalError( "Not a valid user name: '$name'" );
130		} elseif ( !$this->userNameUtils->isUsable( $name ) ) {
131			$this->output( "Using system user: '$name'\n" );
132		} else {
133			$this->fatalError( "Unknown user: '$name'" );
134		}
135
136		// Supply write connection to assign an actor ID if needed.
137		$dbw = $this->loadBalancer->getConnectionRef( DB_MASTER );
138		$actorId = $user->getActorId( $dbw );
139
140		if ( !$actorId ) {
141			$this->fatalError( "Failed to acquire an actor ID for user '$user'" );
142		}
143
144		$this->output( "Replacement actor ID is $actorId.\n" );
145		return $actorId;
146	}
147
148	/**
149	 * @inheritDoc
150	 */
151	public function execute() {
152		$this->initializeServices();
153
154		$field = $this->getOption( 'field' );
155		if ( !isset( self::TABLES[$field] ) ) {
156			$this->fatalError( "Unknown field: $field.\n" );
157		}
158
159		$skip = $this->parseIntList( $this->getOption( 'skip', '' ) );
160		$overwrite = $this->getNewActorId();
161
162		$bad = $this->findBadActors( $field, $skip );
163
164		if ( $bad && $overwrite ) {
165			$this->output( "\n" );
166			$this->output( "Do you want to OVERWRITE the listed actor IDs?\n" );
167			$this->output( "Information about the invalid IDs will be lost!\n" );
168			$this->output( "\n" );
169			$confirm = $this->readconsole( 'Type "yes" to continue: ' );
170
171			if ( $confirm === 'yes' ) {
172				$this->overwriteActorIDs( $field, array_keys( $bad ), $overwrite );
173			} else {
174				$this->fatalError( 'Aborted.' );
175			}
176		}
177
178		$this->output( "Done.\n" );
179	}
180
181	/**
182	 * Find rows that have bad actor IDs.
183	 *
184	 * @param string $field the database field in which to detect bad actor IDs.
185	 * @param int[] $skip bad actor IDs not to replace.
186	 *
187	 * @return array a list of row IDs, identifying rows in which the actor ID needs to be replaced.
188	 */
189	private function findBadActors( $field, $skip ) {
190		[ $table, $actorField, $idField ] = self::TABLES[$field];
191		$this->output( "Finding invalid actor IDs in $table.$actorField...\n" );
192
193		$dbr = $this->loadBalancer->getConnectionRef(
194			DB_REPLICA,
195			[ 'maintenance', 'vslow', 'slow' ]
196		);
197
198		/*
199		We are building an SQL query like this one here, performing a left join
200		to detect rows in $table that lack a matching row in the actor table.
201
202		In this example, $field is 'log_actor', so $table is 'logging',
203		$actorField is 'log_actor', and $idField is 'log_id'.
204		Further, $skip is [ 1, 2, 3, 4 ] and the batch size is 1000.
205
206		SELECT log_id
207		FROM logging
208		JOIN actor ON log_actor = actor_id
209		WHERE actor_id IS NULL
210		AND log_actor NOT IN (1, 2, 3, 4)
211		LIMIT 1000;
212		 */
213
214		$conds = [ 'actor_id' => null ];
215
216		if ( $skip ) {
217			$conds[] = $actorField . ' NOT IN ( ' . $dbr->makeList( $skip ) . ' ) ';
218		}
219
220		$queryBuilder = $dbr->newSelectQueryBuilder();
221		$queryBuilder->table( $table )
222			->fields( [ $actorField, $idField ] )
223			->conds( $conds )
224			->leftJoin( 'actor', null, [ "$actorField = actor_id" ] )
225			->limit( $this->getBatchSize() )
226			->caller( __METHOD__ );
227
228		$res = $queryBuilder->fetchResultSet();
229		$count = $res->numRows();
230
231		$bad = [];
232
233		if ( $count ) {
234			$this->output( "\t\tID\tACTOR\n" );
235		}
236
237		foreach ( $res as $row ) {
238			$id = $row->$idField;
239			$actor = (int)( $row->$actorField );
240
241			$bad[$id] = $actor;
242			$this->output( "\t\t$id\t$actor\n" );
243		}
244
245		$this->output( "\tFound $count invalid actor IDs.\n" );
246
247		if ( $count >= $this->getBatchSize() ) {
248			$this->output( "\tBatch size reached, run again after fixing the current batch.\n" );
249		}
250
251		return $bad;
252	}
253
254	/**
255	 * Overwrite the actor ID in a given set of rows.
256	 *
257	 * @param string $field the database field in which to replace IDs.
258	 * @param array $ids The row IDs of the rows in which the actor ID should be replaced
259	 * @param int $overwrite The actor ID to write to the rows identified by $ids.
260	 *
261	 * @return int
262	 */
263	private function overwriteActorIDs( $field, array $ids, int $overwrite ) {
264		[ $table, $actorField, $idField ] = self::TABLES[$field];
265
266		$count = count( $ids );
267		$this->output( "OVERWRITING $count actor IDs in $table.$actorField with $overwrite...\n" );
268
269		$dbw = $this->loadBalancer->getConnectionRef( DB_MASTER );
270
271		$dbw->update( $table, [ $actorField => $overwrite ], [ $idField => $ids ], __METHOD__ );
272
273		$count = $dbw->affectedRows();
274
275		$this->lbFactory->waitForReplication();
276		$this->output( "\tUpdated $count rows.\n" );
277
278		return $count;
279	}
280
281}
282
283$maintClass = FindMissingActors::class;
284require_once RUN_MAINTENANCE_IF_MAIN;
285