1<?php
2/**
3 * Refresh image metadata fields. See also rebuildImages.php
4 *
5 * Usage: php refreshImageMetadata.php
6 *
7 * Copyright © 2011 Brian Wolff
8 * https://www.mediawiki.org/
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License along
21 * with this program; if not, write to the Free Software Foundation, Inc.,
22 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
23 * http://www.gnu.org/copyleft/gpl.html
24 *
25 * @file
26 * @author Brian Wolff
27 * @ingroup Maintenance
28 */
29
30require_once __DIR__ . '/Maintenance.php';
31
32use MediaWiki\MediaWikiServices;
33use Wikimedia\Rdbms\IDatabase;
34use Wikimedia\Rdbms\IMaintainableDatabase;
35
36/**
37 * Maintenance script to refresh image metadata fields.
38 *
39 * @ingroup Maintenance
40 */
41class RefreshImageMetadata extends Maintenance {
42
43	/**
44	 * @var IMaintainableDatabase
45	 */
46	protected $dbw;
47
48	public function __construct() {
49		parent::__construct();
50
51		$this->addDescription( 'Script to update image metadata records' );
52		$this->setBatchSize( 200 );
53
54		$this->addOption(
55			'force',
56			'Reload metadata from file even if the metadata looks ok',
57			false,
58			false,
59			'f'
60		);
61		$this->addOption(
62			'broken-only',
63			'Only fix really broken records, leave old but still compatible records alone.'
64		);
65		$this->addOption(
66			'verbose',
67			'Output extra information about each upgraded/non-upgraded file.',
68			false,
69			false,
70			'v'
71		);
72		$this->addOption( 'start', 'Name of file to start with', false, true );
73		$this->addOption( 'end', 'Name of file to end with', false, true );
74
75		$this->addOption(
76			'mediatype',
77			'Only refresh files with this media type, e.g. BITMAP, UNKNOWN etc.',
78			false,
79			true
80		);
81		$this->addOption(
82			'mime',
83			"Only refresh files with this MIME type. Can accept wild-card 'image/*'. "
84				. "Potentially inefficient unless 'mediatype' is also specified",
85			false,
86			true
87		);
88		$this->addOption(
89			'metadata-contains',
90			'(Inefficient!) Only refresh files where the img_metadata field '
91				. 'contains this string. Can be used if its known a specific '
92				. 'property was being extracted incorrectly.',
93			false,
94			true
95		);
96	}
97
98	public function execute() {
99		$force = $this->hasOption( 'force' );
100		$brokenOnly = $this->hasOption( 'broken-only' );
101		$verbose = $this->hasOption( 'verbose' );
102		$start = $this->getOption( 'start', false );
103		$this->setupParameters( $force, $brokenOnly );
104
105		$upgraded = 0;
106		$leftAlone = 0;
107		$error = 0;
108
109		$dbw = $this->getDB( DB_MASTER );
110		$batchSize = $this->getBatchSize();
111		if ( $batchSize <= 0 ) {
112			$this->fatalError( "Batch size is too low...", 12 );
113		}
114
115		$repo = MediaWikiServices::getInstance()->getRepoGroup()->getLocalRepo();
116		$conds = $this->getConditions( $dbw );
117
118		// For the WHERE img_name > 'foo' condition that comes after doing a batch
119		$conds2 = [];
120		if ( $start !== false ) {
121			$conds2[] = 'img_name >= ' . $dbw->addQuotes( $start );
122		}
123
124		$options = [
125			'LIMIT' => $batchSize,
126			'ORDER BY' => 'img_name ASC',
127		];
128
129		$fileQuery = LocalFile::getQueryInfo();
130		$lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
131
132		do {
133			$res = $dbw->select(
134				$fileQuery['tables'],
135				$fileQuery['fields'],
136				array_merge( $conds, $conds2 ),
137				__METHOD__,
138				$options,
139				$fileQuery['joins']
140			);
141
142			if ( $res->numRows() > 0 ) {
143				$row1 = $res->current();
144				$this->output( "Processing next {$res->numRows()} row(s) starting with {$row1->img_name}.\n" );
145				$res->rewind();
146			}
147
148			foreach ( $res as $row ) {
149				try {
150					// LocalFile will upgrade immediately here if obsolete
151					$file = $repo->newFileFromRow( $row );
152					if ( $file->getUpgraded() ) {
153						// File was upgraded.
154						$upgraded++;
155						$newLength = strlen( $file->getMetadata() );
156						$oldLength = strlen( $row->img_metadata );
157						if ( $newLength < $oldLength - 5 ) {
158							// If after updating, the metadata is smaller then
159							// what it was before, that's probably not a good thing
160							// because we extract more data with time, not less.
161							// Thus this probably indicates an error of some sort,
162							// or at the very least is suspicious. Have the - 5 just
163							// to weed out any inconsequential changes.
164							$error++;
165							$this->output(
166								"Warning: File:{$row->img_name} used to have " .
167								"$oldLength bytes of metadata but now has $newLength bytes.\n"
168							);
169						} elseif ( $verbose ) {
170							$this->output( "Refreshed File:{$row->img_name}.\n" );
171						}
172					} else {
173						$leftAlone++;
174						if ( $force ) {
175							$file->upgradeRow();
176							$newLength = strlen( $file->getMetadata() );
177							$oldLength = strlen( $row->img_metadata );
178							if ( $newLength < $oldLength - 5 ) {
179								$error++;
180								$this->output(
181									"Warning: File:{$row->img_name} used to have " .
182									"$oldLength bytes of metadata but now has $newLength bytes. (forced)\n"
183								);
184							}
185							if ( $verbose ) {
186								$this->output( "Forcibly refreshed File:{$row->img_name}.\n" );
187							}
188						} else {
189							if ( $verbose ) {
190								$this->output( "Skipping File:{$row->img_name}.\n" );
191							}
192						}
193					}
194				} catch ( Exception $e ) {
195					$this->output( "{$row->img_name} failed. {$e->getMessage()}\n" );
196				}
197			}
198			$conds2 = [ 'img_name > ' . $dbw->addQuotes( $row->img_name ) ];
199			$lbFactory->waitForReplication();
200		} while ( $res->numRows() === $batchSize );
201
202		$total = $upgraded + $leftAlone;
203		if ( $force ) {
204			$this->output( "\nFinished refreshing file metadata for $total files. "
205				. "$upgraded needed to be refreshed, $leftAlone did not need to "
206				. "be but were refreshed anyways, and $error refreshes were suspicious.\n" );
207		} else {
208			$this->output( "\nFinished refreshing file metadata for $total files. "
209				. "$upgraded were refreshed, $leftAlone were already up to date, "
210				. "and $error refreshes were suspicious.\n" );
211		}
212	}
213
214	/**
215	 * @param IDatabase $dbw
216	 * @return array
217	 */
218	private function getConditions( $dbw ) {
219		$conds = [];
220
221		$end = $this->getOption( 'end', false );
222		$mime = $this->getOption( 'mime', false );
223		$mediatype = $this->getOption( 'mediatype', false );
224		$like = $this->getOption( 'metadata-contains', false );
225
226		if ( $end !== false ) {
227			$conds[] = 'img_name <= ' . $dbw->addQuotes( $end );
228		}
229		if ( $mime !== false ) {
230			list( $major, $minor ) = File::splitMime( $mime );
231			$conds['img_major_mime'] = $major;
232			if ( $minor !== '*' ) {
233				$conds['img_minor_mime'] = $minor;
234			}
235		}
236		if ( $mediatype !== false ) {
237			$conds['img_media_type'] = $mediatype;
238		}
239		if ( $like ) {
240			$conds[] = 'img_metadata ' . $dbw->buildLike( $dbw->anyString(), $like, $dbw->anyString() );
241		}
242
243		return $conds;
244	}
245
246	/**
247	 * @param bool $force
248	 * @param bool $brokenOnly
249	 */
250	private function setupParameters( $force, $brokenOnly ) {
251		global $wgUpdateCompatibleMetadata;
252
253		if ( $brokenOnly ) {
254			$wgUpdateCompatibleMetadata = false;
255		} else {
256			$wgUpdateCompatibleMetadata = true;
257		}
258
259		if ( $brokenOnly && $force ) {
260			$this->fatalError( 'Cannot use --broken-only and --force together. ', 2 );
261		}
262	}
263}
264
265$maintClass = RefreshImageMetadata::class;
266require_once RUN_MAINTENANCE_IF_MAIN;
267