1<?php 2/** 3 * This program is free software; you can redistribute it and/or modify 4 * it under the terms of the GNU General Public License as published by 5 * the Free Software Foundation; either version 2 of the License, or 6 * (at your option) any later version. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * GNU General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License along 14 * with this program; if not, write to the Free Software Foundation, Inc., 15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 16 * http://www.gnu.org/copyleft/gpl.html 17 * 18 * @file 19 * @ingroup Maintenance 20 */ 21 22use MediaWiki\MediaWikiServices; 23use MediaWiki\User\UserFactory; 24use MediaWiki\User\UserNameUtils; 25use Wikimedia\Rdbms\LBFactory; 26use Wikimedia\Rdbms\LoadBalancer; 27 28require_once __DIR__ . '/Maintenance.php'; 29 30/** 31 * Maintenance script for finding and replacing invalid actor IDs, see T261325. 32 * 33 * @ingroup Maintenance 34 */ 35class FindMissingActors extends Maintenance { 36 37 /** 38 * @var UserFactory|null 39 */ 40 private $userFactory; 41 42 /** 43 * @var UserNameUtils|null 44 */ 45 private $userNameUtils; 46 47 /** 48 * @var LoadBalancer|null 49 */ 50 private $loadBalancer; 51 52 /** 53 * @var LBFactory 54 */ 55 private $lbFactory; 56 57 private const TABLES = [ 58 // 'rev_actor' => [ 'revision', 'rev_actor', 'rev_id' ], // not yet used in 1.35 59 'revactor_actor' => [ 'revision_actor_temp', 'revactor_actor', 'revactor_rev' ], 60 'ar_actor' => [ 'archive', 'ar_actor', 'ar_id' ], 61 'ipb_by_actor' => [ 'ipblocks', 'ipb_by_actor', 'ipb_id' ], // no index on ipb_by_actor! 62 'img_actor' => [ 'image', 'img_actor', 'img_name' ], 63 'oi_actor' => [ 'oldimage', 'oi_actor', 'oi_archive_name' ], // no index on oi_archive_name! 64 'fa_actor' => [ 'filearchive', 'fa_actor', 'fa_id' ], 65 'rc_actor' => [ 'recentchanges', 'rc_actor', 'rc_id' ], 66 'log_actor' => [ 'logging', 'log_actor', 'log_id' ], 67 ]; 68 69 public function __construct() { 70 parent::__construct(); 71 72 $this->addDescription( 'Find and fix invalid actor IDs.' ); 73 $this->addOption( 'field', 'The name of a database field to process. ' 74 . 'Possible values: ' . implode( ', ', array_keys( self::TABLES ) ), 75 true, true ); 76 $this->addOption( 'skip', 'A comma-separated list of actor IDs to skip.', 77 false, true ); 78 $this->addOption( 'overwrite-with', 'Replace missing actors with this user. ' 79 . 'Typically, this would be "Unknown user", but it could be any reserved ' 80 . 'system user (per $wgReservedUsernames) or locally registered user. ' 81 . 'If not given, invalid actors will only be listed, not fixed. ' 82 . 'You will be prompted for confirmation before data is written. ', 83 false, true ); 84 85 $this->setBatchSize( 1000 ); 86 } 87 88 public function initializeServices( 89 ?UserFactory $userFactory = null, 90 ?UserNameUtils $userNameUtils = null, 91 ?LoadBalancer $loadBalancer = null, 92 ?LBFactory $lbFactory = null 93 ) { 94 $services = MediaWikiServices::getInstance(); 95 96 $this->userFactory = $userFactory ?? $this->userFactory ?? $services->getUserFactory(); 97 $this->userNameUtils = $userNameUtils ?? $this->userNameUtils ?? $services->getUserNameUtils(); 98 $this->loadBalancer = $loadBalancer ?? $this->loadBalancer ?? $services->getDBLoadBalancer(); 99 $this->lbFactory = $lbFactory ?? $this->lbFactory ?? $services->getDBLoadBalancerFactory(); 100 } 101 102 /** 103 * Returns the actor ID of the user specified with the --overwrite-with option, 104 * or null if --overwrite-with is not set. 105 * 106 * Existing users and reserved system users are supported. 107 * If the user does not have an actor ID yet, one will be assigned. 108 * 109 * @return int|null 110 */ 111 private function getNewActorId() { 112 $name = $this->getOption( 'overwrite-with' ); 113 114 if ( $name === null ) { 115 return null; 116 } 117 118 $user = $this->userFactory->newFromName( $name ); 119 120 if ( !$user ) { 121 $this->fatalError( "Not a valid user name: '$user'" ); 122 } 123 124 $name = $this->userNameUtils->getCanonical( $name, UserNameUtils::RIGOR_NONE ); 125 126 if ( $user->isRegistered() ) { 127 $this->output( "Using existing user: '$user'\n" ); 128 } elseif ( !$this->userNameUtils->isValid( $name ) ) { 129 $this->fatalError( "Not a valid user name: '$name'" ); 130 } elseif ( !$this->userNameUtils->isUsable( $name ) ) { 131 $this->output( "Using system user: '$name'\n" ); 132 } else { 133 $this->fatalError( "Unknown user: '$name'" ); 134 } 135 136 // Supply write connection to assign an actor ID if needed. 137 $dbw = $this->loadBalancer->getConnectionRef( DB_MASTER ); 138 $actorId = $user->getActorId( $dbw ); 139 140 if ( !$actorId ) { 141 $this->fatalError( "Failed to acquire an actor ID for user '$user'" ); 142 } 143 144 $this->output( "Replacement actor ID is $actorId.\n" ); 145 return $actorId; 146 } 147 148 /** 149 * @inheritDoc 150 */ 151 public function execute() { 152 $this->initializeServices(); 153 154 $field = $this->getOption( 'field' ); 155 if ( !isset( self::TABLES[$field] ) ) { 156 $this->fatalError( "Unknown field: $field.\n" ); 157 } 158 159 $skip = $this->parseIntList( $this->getOption( 'skip', '' ) ); 160 $overwrite = $this->getNewActorId(); 161 162 $bad = $this->findBadActors( $field, $skip ); 163 164 if ( $bad && $overwrite ) { 165 $this->output( "\n" ); 166 $this->output( "Do you want to OVERWRITE the listed actor IDs?\n" ); 167 $this->output( "Information about the invalid IDs will be lost!\n" ); 168 $this->output( "\n" ); 169 $confirm = $this->readconsole( 'Type "yes" to continue: ' ); 170 171 if ( $confirm === 'yes' ) { 172 $this->overwriteActorIDs( $field, array_keys( $bad ), $overwrite ); 173 } else { 174 $this->fatalError( 'Aborted.' ); 175 } 176 } 177 178 $this->output( "Done.\n" ); 179 } 180 181 /** 182 * Find rows that have bad actor IDs. 183 * 184 * @param string $field the database field in which to detect bad actor IDs. 185 * @param int[] $skip bad actor IDs not to replace. 186 * 187 * @return array a list of row IDs, identifying rows in which the actor ID needs to be replaced. 188 */ 189 private function findBadActors( $field, $skip ) { 190 [ $table, $actorField, $idField ] = self::TABLES[$field]; 191 $this->output( "Finding invalid actor IDs in $table.$actorField...\n" ); 192 193 $dbr = $this->loadBalancer->getConnectionRef( 194 DB_REPLICA, 195 [ 'maintenance', 'vslow', 'slow' ] 196 ); 197 198 /* 199 We are building an SQL query like this one here, performing a left join 200 to detect rows in $table that lack a matching row in the actor table. 201 202 In this example, $field is 'log_actor', so $table is 'logging', 203 $actorField is 'log_actor', and $idField is 'log_id'. 204 Further, $skip is [ 1, 2, 3, 4 ] and the batch size is 1000. 205 206 SELECT log_id 207 FROM logging 208 JOIN actor ON log_actor = actor_id 209 WHERE actor_id IS NULL 210 AND log_actor NOT IN (1, 2, 3, 4) 211 LIMIT 1000; 212 */ 213 214 $conds = [ 'actor_id' => null ]; 215 216 if ( $skip ) { 217 $conds[] = $actorField . ' NOT IN ( ' . $dbr->makeList( $skip ) . ' ) '; 218 } 219 220 $queryBuilder = $dbr->newSelectQueryBuilder(); 221 $queryBuilder->table( $table ) 222 ->fields( [ $actorField, $idField ] ) 223 ->conds( $conds ) 224 ->leftJoin( 'actor', null, [ "$actorField = actor_id" ] ) 225 ->limit( $this->getBatchSize() ) 226 ->caller( __METHOD__ ); 227 228 $res = $queryBuilder->fetchResultSet(); 229 $count = $res->numRows(); 230 231 $bad = []; 232 233 if ( $count ) { 234 $this->output( "\t\tID\tACTOR\n" ); 235 } 236 237 foreach ( $res as $row ) { 238 $id = $row->$idField; 239 $actor = (int)( $row->$actorField ); 240 241 $bad[$id] = $actor; 242 $this->output( "\t\t$id\t$actor\n" ); 243 } 244 245 $this->output( "\tFound $count invalid actor IDs.\n" ); 246 247 if ( $count >= $this->getBatchSize() ) { 248 $this->output( "\tBatch size reached, run again after fixing the current batch.\n" ); 249 } 250 251 return $bad; 252 } 253 254 /** 255 * Overwrite the actor ID in a given set of rows. 256 * 257 * @param string $field the database field in which to replace IDs. 258 * @param array $ids The row IDs of the rows in which the actor ID should be replaced 259 * @param int $overwrite The actor ID to write to the rows identified by $ids. 260 * 261 * @return int 262 */ 263 private function overwriteActorIDs( $field, array $ids, int $overwrite ) { 264 [ $table, $actorField, $idField ] = self::TABLES[$field]; 265 266 $count = count( $ids ); 267 $this->output( "OVERWRITING $count actor IDs in $table.$actorField with $overwrite...\n" ); 268 269 $dbw = $this->loadBalancer->getConnectionRef( DB_MASTER ); 270 271 $dbw->update( $table, [ $actorField => $overwrite ], [ $idField => $ids ], __METHOD__ ); 272 273 $count = $dbw->affectedRows(); 274 275 $this->lbFactory->waitForReplication(); 276 $this->output( "\tUpdated $count rows.\n" ); 277 278 return $count; 279 } 280 281} 282 283$maintClass = FindMissingActors::class; 284require_once RUN_MAINTENANCE_IF_MAIN; 285