1<?php 2/** 3 * Refresh image metadata fields. See also rebuildImages.php 4 * 5 * Usage: php refreshImageMetadata.php 6 * 7 * Copyright © 2011 Brian Wolff 8 * https://www.mediawiki.org/ 9 * 10 * This program is free software; you can redistribute it and/or modify 11 * it under the terms of the GNU General Public License as published by 12 * the Free Software Foundation; either version 2 of the License, or 13 * (at your option) any later version. 14 * 15 * This program is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU General Public License for more details. 19 * 20 * You should have received a copy of the GNU General Public License along 21 * with this program; if not, write to the Free Software Foundation, Inc., 22 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 23 * http://www.gnu.org/copyleft/gpl.html 24 * 25 * @file 26 * @author Brian Wolff 27 * @ingroup Maintenance 28 */ 29 30require_once __DIR__ . '/Maintenance.php'; 31 32use MediaWiki\MediaWikiServices; 33use Wikimedia\Rdbms\IDatabase; 34use Wikimedia\Rdbms\IMaintainableDatabase; 35 36/** 37 * Maintenance script to refresh image metadata fields. 38 * 39 * @ingroup Maintenance 40 */ 41class RefreshImageMetadata extends Maintenance { 42 43 /** 44 * @var IMaintainableDatabase 45 */ 46 protected $dbw; 47 48 public function __construct() { 49 parent::__construct(); 50 51 $this->addDescription( 'Script to update image metadata records' ); 52 $this->setBatchSize( 200 ); 53 54 $this->addOption( 55 'force', 56 'Reload metadata from file even if the metadata looks ok', 57 false, 58 false, 59 'f' 60 ); 61 $this->addOption( 62 'broken-only', 63 'Only fix really broken records, leave old but still compatible records alone.' 64 ); 65 $this->addOption( 66 'verbose', 67 'Output extra information about each upgraded/non-upgraded file.', 68 false, 69 false, 70 'v' 71 ); 72 $this->addOption( 'start', 'Name of file to start with', false, true ); 73 $this->addOption( 'end', 'Name of file to end with', false, true ); 74 75 $this->addOption( 76 'mediatype', 77 'Only refresh files with this media type, e.g. BITMAP, UNKNOWN etc.', 78 false, 79 true 80 ); 81 $this->addOption( 82 'mime', 83 "Only refresh files with this MIME type. Can accept wild-card 'image/*'. " 84 . "Potentially inefficient unless 'mediatype' is also specified", 85 false, 86 true 87 ); 88 $this->addOption( 89 'metadata-contains', 90 '(Inefficient!) Only refresh files where the img_metadata field ' 91 . 'contains this string. Can be used if its known a specific ' 92 . 'property was being extracted incorrectly.', 93 false, 94 true 95 ); 96 } 97 98 public function execute() { 99 $force = $this->hasOption( 'force' ); 100 $brokenOnly = $this->hasOption( 'broken-only' ); 101 $verbose = $this->hasOption( 'verbose' ); 102 $start = $this->getOption( 'start', false ); 103 $this->setupParameters( $force, $brokenOnly ); 104 105 $upgraded = 0; 106 $leftAlone = 0; 107 $error = 0; 108 109 $dbw = $this->getDB( DB_MASTER ); 110 $batchSize = $this->getBatchSize(); 111 if ( $batchSize <= 0 ) { 112 $this->fatalError( "Batch size is too low...", 12 ); 113 } 114 115 $repo = MediaWikiServices::getInstance()->getRepoGroup()->getLocalRepo(); 116 $conds = $this->getConditions( $dbw ); 117 118 // For the WHERE img_name > 'foo' condition that comes after doing a batch 119 $conds2 = []; 120 if ( $start !== false ) { 121 $conds2[] = 'img_name >= ' . $dbw->addQuotes( $start ); 122 } 123 124 $options = [ 125 'LIMIT' => $batchSize, 126 'ORDER BY' => 'img_name ASC', 127 ]; 128 129 $fileQuery = LocalFile::getQueryInfo(); 130 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory(); 131 132 do { 133 $res = $dbw->select( 134 $fileQuery['tables'], 135 $fileQuery['fields'], 136 array_merge( $conds, $conds2 ), 137 __METHOD__, 138 $options, 139 $fileQuery['joins'] 140 ); 141 142 if ( $res->numRows() > 0 ) { 143 $row1 = $res->current(); 144 $this->output( "Processing next {$res->numRows()} row(s) starting with {$row1->img_name}.\n" ); 145 $res->rewind(); 146 } 147 148 foreach ( $res as $row ) { 149 try { 150 // LocalFile will upgrade immediately here if obsolete 151 $file = $repo->newFileFromRow( $row ); 152 if ( $file->getUpgraded() ) { 153 // File was upgraded. 154 $upgraded++; 155 $newLength = strlen( $file->getMetadata() ); 156 $oldLength = strlen( $row->img_metadata ); 157 if ( $newLength < $oldLength - 5 ) { 158 // If after updating, the metadata is smaller then 159 // what it was before, that's probably not a good thing 160 // because we extract more data with time, not less. 161 // Thus this probably indicates an error of some sort, 162 // or at the very least is suspicious. Have the - 5 just 163 // to weed out any inconsequential changes. 164 $error++; 165 $this->output( 166 "Warning: File:{$row->img_name} used to have " . 167 "$oldLength bytes of metadata but now has $newLength bytes.\n" 168 ); 169 } elseif ( $verbose ) { 170 $this->output( "Refreshed File:{$row->img_name}.\n" ); 171 } 172 } else { 173 $leftAlone++; 174 if ( $force ) { 175 $file->upgradeRow(); 176 $newLength = strlen( $file->getMetadata() ); 177 $oldLength = strlen( $row->img_metadata ); 178 if ( $newLength < $oldLength - 5 ) { 179 $error++; 180 $this->output( 181 "Warning: File:{$row->img_name} used to have " . 182 "$oldLength bytes of metadata but now has $newLength bytes. (forced)\n" 183 ); 184 } 185 if ( $verbose ) { 186 $this->output( "Forcibly refreshed File:{$row->img_name}.\n" ); 187 } 188 } else { 189 if ( $verbose ) { 190 $this->output( "Skipping File:{$row->img_name}.\n" ); 191 } 192 } 193 } 194 } catch ( Exception $e ) { 195 $this->output( "{$row->img_name} failed. {$e->getMessage()}\n" ); 196 } 197 } 198 $conds2 = [ 'img_name > ' . $dbw->addQuotes( $row->img_name ) ]; 199 $lbFactory->waitForReplication(); 200 } while ( $res->numRows() === $batchSize ); 201 202 $total = $upgraded + $leftAlone; 203 if ( $force ) { 204 $this->output( "\nFinished refreshing file metadata for $total files. " 205 . "$upgraded needed to be refreshed, $leftAlone did not need to " 206 . "be but were refreshed anyways, and $error refreshes were suspicious.\n" ); 207 } else { 208 $this->output( "\nFinished refreshing file metadata for $total files. " 209 . "$upgraded were refreshed, $leftAlone were already up to date, " 210 . "and $error refreshes were suspicious.\n" ); 211 } 212 } 213 214 /** 215 * @param IDatabase $dbw 216 * @return array 217 */ 218 private function getConditions( $dbw ) { 219 $conds = []; 220 221 $end = $this->getOption( 'end', false ); 222 $mime = $this->getOption( 'mime', false ); 223 $mediatype = $this->getOption( 'mediatype', false ); 224 $like = $this->getOption( 'metadata-contains', false ); 225 226 if ( $end !== false ) { 227 $conds[] = 'img_name <= ' . $dbw->addQuotes( $end ); 228 } 229 if ( $mime !== false ) { 230 list( $major, $minor ) = File::splitMime( $mime ); 231 $conds['img_major_mime'] = $major; 232 if ( $minor !== '*' ) { 233 $conds['img_minor_mime'] = $minor; 234 } 235 } 236 if ( $mediatype !== false ) { 237 $conds['img_media_type'] = $mediatype; 238 } 239 if ( $like ) { 240 $conds[] = 'img_metadata ' . $dbw->buildLike( $dbw->anyString(), $like, $dbw->anyString() ); 241 } 242 243 return $conds; 244 } 245 246 /** 247 * @param bool $force 248 * @param bool $brokenOnly 249 */ 250 private function setupParameters( $force, $brokenOnly ) { 251 global $wgUpdateCompatibleMetadata; 252 253 if ( $brokenOnly ) { 254 $wgUpdateCompatibleMetadata = false; 255 } else { 256 $wgUpdateCompatibleMetadata = true; 257 } 258 259 if ( $brokenOnly && $force ) { 260 $this->fatalError( 'Cannot use --broken-only and --force together. ', 2 ); 261 } 262 } 263} 264 265$maintClass = RefreshImageMetadata::class; 266require_once RUN_MAINTENANCE_IF_MAIN; 267