1<?php
2/**
3 * Base class for the backend of file upload.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup Upload
22 */
23
24use MediaWiki\HookContainer\ProtectedHookAccessorTrait;
25use MediaWiki\MediaWikiServices;
26use MediaWiki\Permissions\Authority;
27use MediaWiki\Permissions\PermissionStatus;
28use MediaWiki\Shell\Shell;
29use MediaWiki\User\UserIdentity;
30
31/**
32 * @defgroup Upload Upload related
33 */
34
35/**
36 * @ingroup Upload
37 *
38 * UploadBase and subclasses are the backend of MediaWiki's file uploads.
39 * The frontends are formed by ApiUpload and SpecialUpload.
40 *
41 * @stable to extend
42 *
43 * @author Brion Vibber
44 * @author Bryan Tong Minh
45 * @author Michael Dale
46 */
47abstract class UploadBase {
48	use ProtectedHookAccessorTrait;
49
50	/** @var string|null Local file system path to the file to upload (or a local copy) */
51	protected $mTempPath;
52	/** @var TempFSFile|null Wrapper to handle deleting the temp file */
53	protected $tempFileObj;
54	/** @var string|null */
55	protected $mDesiredDestName;
56	/** @var string|null */
57	protected $mDestName;
58	/** @var bool|null */
59	protected $mRemoveTempFile;
60	/** @var string|null */
61	protected $mSourceType;
62	/** @var Title|bool */
63	protected $mTitle = false;
64	/** @var int */
65	protected $mTitleError = 0;
66	/** @var string|null */
67	protected $mFilteredName;
68	/** @var string|null */
69	protected $mFinalExtension;
70	/** @var LocalFile|null */
71	protected $mLocalFile;
72	/** @var UploadStashFile|null */
73	protected $mStashFile;
74	/** @var int|null */
75	protected $mFileSize;
76	/** @var array|null */
77	protected $mFileProps;
78	/** @var string[] */
79	protected $mBlackListedExtensions;
80	/** @var bool|null */
81	protected $mJavaDetected;
82	/** @var string|null */
83	protected $mSVGNSError;
84
85	protected static $safeXmlEncodings = [
86		'UTF-8',
87		'ISO-8859-1',
88		'ISO-8859-2',
89		'UTF-16',
90		'UTF-32',
91		'WINDOWS-1250',
92		'WINDOWS-1251',
93		'WINDOWS-1252',
94		'WINDOWS-1253',
95		'WINDOWS-1254',
96		'WINDOWS-1255',
97		'WINDOWS-1256',
98		'WINDOWS-1257',
99		'WINDOWS-1258',
100	];
101
102	public const SUCCESS = 0;
103	public const OK = 0;
104	public const EMPTY_FILE = 3;
105	public const MIN_LENGTH_PARTNAME = 4;
106	public const ILLEGAL_FILENAME = 5;
107	public const OVERWRITE_EXISTING_FILE = 7; # Not used anymore; handled by verifyTitlePermissions()
108	public const FILETYPE_MISSING = 8;
109	public const FILETYPE_BADTYPE = 9;
110	public const VERIFICATION_ERROR = 10;
111	public const HOOK_ABORTED = 11;
112	public const FILE_TOO_LARGE = 12;
113	public const WINDOWS_NONASCII_FILENAME = 13;
114	public const FILENAME_TOO_LONG = 14;
115
116	/**
117	 * @param int $error
118	 * @return string
119	 */
120	public function getVerificationErrorCode( $error ) {
121		$code_to_status = [
122			self::EMPTY_FILE => 'empty-file',
123			self::FILE_TOO_LARGE => 'file-too-large',
124			self::FILETYPE_MISSING => 'filetype-missing',
125			self::FILETYPE_BADTYPE => 'filetype-banned',
126			self::MIN_LENGTH_PARTNAME => 'filename-tooshort',
127			self::ILLEGAL_FILENAME => 'illegal-filename',
128			self::OVERWRITE_EXISTING_FILE => 'overwrite',
129			self::VERIFICATION_ERROR => 'verification-error',
130			self::HOOK_ABORTED => 'hookaborted',
131			self::WINDOWS_NONASCII_FILENAME => 'windows-nonascii-filename',
132			self::FILENAME_TOO_LONG => 'filename-toolong',
133		];
134		return $code_to_status[$error] ?? 'unknown-error';
135	}
136
137	/**
138	 * Returns true if uploads are enabled.
139	 * Can be override by subclasses.
140	 * @stable to override
141	 * @return bool
142	 */
143	public static function isEnabled() {
144		global $wgEnableUploads;
145
146		return $wgEnableUploads && wfIniGetBool( 'file_uploads' );
147	}
148
149	/**
150	 * Returns true if the user can use this upload module or else a string
151	 * identifying the missing permission.
152	 * Can be overridden by subclasses.
153	 *
154	 * @param Authority $performer
155	 * @return bool|string
156	 */
157	public static function isAllowed( Authority $performer ) {
158		foreach ( [ 'upload', 'edit' ] as $permission ) {
159			if ( !$performer->isAllowed( $permission ) ) {
160				return $permission;
161			}
162		}
163
164		return true;
165	}
166
167	/**
168	 * Returns true if the user has surpassed the upload rate limit, false otherwise.
169	 *
170	 * @param User $user
171	 * @return bool
172	 */
173	public static function isThrottled( $user ) {
174		return $user->pingLimiter( 'upload' );
175	}
176
177	/** @var string[] Upload handlers. Should probably just be a global. */
178	private static $uploadHandlers = [ 'Stash', 'File', 'Url' ];
179
180	/**
181	 * Create a form of UploadBase depending on wpSourceType and initializes it
182	 *
183	 * @param WebRequest &$request
184	 * @param string|null $type
185	 * @return null|self
186	 */
187	public static function createFromRequest( &$request, $type = null ) {
188		$type = $type ?: $request->getVal( 'wpSourceType', 'File' );
189
190		if ( !$type ) {
191			return null;
192		}
193
194		// Get the upload class
195		$type = ucfirst( $type );
196
197		// Give hooks the chance to handle this request
198		/** @var self|null $className */
199		$className = null;
200		Hooks::runner()->onUploadCreateFromRequest( $type, $className );
201		if ( $className === null ) {
202			$className = 'UploadFrom' . $type;
203			wfDebug( __METHOD__ . ": class name: $className" );
204			if ( !in_array( $type, self::$uploadHandlers ) ) {
205				return null;
206			}
207		}
208
209		// Check whether this upload class is enabled
210		if ( !$className::isEnabled() ) {
211			return null;
212		}
213
214		// Check whether the request is valid
215		if ( !$className::isValidRequest( $request ) ) {
216			return null;
217		}
218
219		/** @var self $handler */
220		$handler = new $className;
221
222		$handler->initializeFromRequest( $request );
223
224		return $handler;
225	}
226
227	/**
228	 * Check whether a request if valid for this handler
229	 * @param WebRequest $request
230	 * @return bool
231	 */
232	public static function isValidRequest( $request ) {
233		return false;
234	}
235
236	/**
237	 * @stable to call
238	 */
239	public function __construct() {
240	}
241
242	/**
243	 * Returns the upload type. Should be overridden by child classes
244	 *
245	 * @since 1.18
246	 * @stable to override
247	 * @return string
248	 */
249	public function getSourceType() {
250		return null;
251	}
252
253	/**
254	 * @param string $name The desired destination name
255	 * @param string $tempPath
256	 * @param int|null $fileSize
257	 * @param bool $removeTempFile (false) remove the temporary file?
258	 * @throws MWException
259	 */
260	public function initializePathInfo( $name, $tempPath, $fileSize, $removeTempFile = false ) {
261		$this->mDesiredDestName = $name;
262		if ( FileBackend::isStoragePath( $tempPath ) ) {
263			throw new MWException( __METHOD__ . " given storage path `$tempPath`." );
264		}
265
266		$this->setTempFile( $tempPath, $fileSize );
267		$this->mRemoveTempFile = $removeTempFile;
268	}
269
270	/**
271	 * Initialize from a WebRequest. Override this in a subclass.
272	 *
273	 * @param WebRequest &$request
274	 */
275	abstract public function initializeFromRequest( &$request );
276
277	/**
278	 * @param string $tempPath File system path to temporary file containing the upload
279	 * @param int|null $fileSize
280	 */
281	protected function setTempFile( $tempPath, $fileSize = null ) {
282		$this->mTempPath = $tempPath;
283		$this->mFileSize = $fileSize ?: null;
284		if ( strlen( $this->mTempPath ) && file_exists( $this->mTempPath ) ) {
285			$this->tempFileObj = new TempFSFile( $this->mTempPath );
286			if ( !$fileSize ) {
287				$this->mFileSize = filesize( $this->mTempPath );
288			}
289		} else {
290			$this->tempFileObj = null;
291		}
292	}
293
294	/**
295	 * Fetch the file. Usually a no-op
296	 * @stable to override
297	 * @return Status
298	 */
299	public function fetchFile() {
300		return Status::newGood();
301	}
302
303	/**
304	 * Return true if the file is empty
305	 * @return bool
306	 */
307	public function isEmptyFile() {
308		return empty( $this->mFileSize );
309	}
310
311	/**
312	 * Return the file size
313	 * @return int
314	 */
315	public function getFileSize() {
316		return $this->mFileSize;
317	}
318
319	/**
320	 * Get the base 36 SHA1 of the file
321	 * @stable to override
322	 * @return string
323	 */
324	public function getTempFileSha1Base36() {
325		return FSFile::getSha1Base36FromPath( $this->mTempPath );
326	}
327
328	/**
329	 * @param string $srcPath The source path
330	 * @return string|bool The real path if it was a virtual URL Returns false on failure
331	 */
332	public function getRealPath( $srcPath ) {
333		$repo = MediaWikiServices::getInstance()->getRepoGroup()->getLocalRepo();
334		if ( FileRepo::isVirtualUrl( $srcPath ) ) {
335			/** @todo Just make uploads work with storage paths UploadFromStash
336			 *  loads files via virtual URLs.
337			 */
338			$tmpFile = $repo->getLocalCopy( $srcPath );
339			if ( $tmpFile ) {
340				$tmpFile->bind( $this ); // keep alive with $this
341			}
342			$path = $tmpFile ? $tmpFile->getPath() : false;
343		} else {
344			$path = $srcPath;
345		}
346
347		return $path;
348	}
349
350	/**
351	 * Verify whether the upload is sane.
352	 *
353	 * Return a status array representing the outcome of the verification.
354	 * Possible keys are:
355	 * - 'status': set to self::OK in case of success, or to one of the error constants defined in
356	 *   this class in case of failure
357	 * - 'max': set to the maximum allowed file size ($wgMaxUploadSize) if the upload is too large
358	 * - 'details': set to error details if the file type is valid but contents are corrupt
359	 * - 'filtered': set to the sanitized file name if the requested file name is invalid
360	 * - 'finalExt': set to the file's file extension if it is not an allowed file extension
361	 * - 'blacklistedExt': set to the list of blacklisted file extensions if the current file extension
362	 *    is not allowed for uploads and the blacklist is not empty
363	 *
364	 * @stable to override
365	 * @return mixed[] array representing the result of the verification
366	 */
367	public function verifyUpload() {
368		/**
369		 * If there was no filename or a zero size given, give up quick.
370		 */
371		if ( $this->isEmptyFile() ) {
372			return [ 'status' => self::EMPTY_FILE ];
373		}
374
375		/**
376		 * Honor $wgMaxUploadSize
377		 */
378		$maxSize = self::getMaxUploadSize( $this->getSourceType() );
379		if ( $this->mFileSize > $maxSize ) {
380			return [
381				'status' => self::FILE_TOO_LARGE,
382				'max' => $maxSize,
383			];
384		}
385
386		/**
387		 * Look at the contents of the file; if we can recognize the
388		 * type but it's corrupt or data of the wrong type, we should
389		 * probably not accept it.
390		 */
391		$verification = $this->verifyFile();
392		if ( $verification !== true ) {
393			return [
394				'status' => self::VERIFICATION_ERROR,
395				'details' => $verification
396			];
397		}
398
399		/**
400		 * Make sure this file can be created
401		 */
402		$result = $this->validateName();
403		if ( $result !== true ) {
404			return $result;
405		}
406
407		return [ 'status' => self::OK ];
408	}
409
410	/**
411	 * Verify that the name is valid and, if necessary, that we can overwrite
412	 *
413	 * @return array|bool True if valid, otherwise an array with 'status'
414	 * and other keys
415	 */
416	public function validateName() {
417		$nt = $this->getTitle();
418		if ( $nt === null ) {
419			$result = [ 'status' => $this->mTitleError ];
420			if ( $this->mTitleError == self::ILLEGAL_FILENAME ) {
421				$result['filtered'] = $this->mFilteredName;
422			}
423			if ( $this->mTitleError == self::FILETYPE_BADTYPE ) {
424				$result['finalExt'] = $this->mFinalExtension;
425				if ( count( $this->mBlackListedExtensions ) ) {
426					$result['blacklistedExt'] = $this->mBlackListedExtensions;
427				}
428			}
429
430			return $result;
431		}
432		$this->mDestName = $this->getLocalFile()->getName();
433
434		return true;
435	}
436
437	/**
438	 * Verify the MIME type.
439	 *
440	 * @note Only checks that it is not an evil MIME. The "does it have
441	 *  correct extension given its MIME type?" check is in verifyFile.
442	 *  in `verifyFile()` that MIME type and file extension correlate.
443	 * @param string $mime Representing the MIME
444	 * @return array|bool True if the file is verified, an array otherwise
445	 */
446	protected function verifyMimeType( $mime ) {
447		global $wgVerifyMimeType, $wgVerifyMimeTypeIE;
448		if ( $wgVerifyMimeType ) {
449			wfDebug( "mime: <$mime> extension: <{$this->mFinalExtension}>" );
450			global $wgMimeTypeBlacklist;
451			if ( $this->checkFileExtension( $mime, $wgMimeTypeBlacklist ) ) {
452				return [ 'filetype-badmime', $mime ];
453			}
454
455			if ( $wgVerifyMimeTypeIE ) {
456				# Check what Internet Explorer would detect
457				$fp = fopen( $this->mTempPath, 'rb' );
458				if ( $fp ) {
459					$chunk = fread( $fp, 256 );
460					fclose( $fp );
461
462					$magic = MediaWiki\MediaWikiServices::getInstance()->getMimeAnalyzer();
463					$extMime = $magic->getMimeTypeFromExtensionOrNull( (string)$this->mFinalExtension ) ?? '';
464					$ieTypes = $magic->getIEMimeTypes( $this->mTempPath, $chunk, $extMime );
465					foreach ( $ieTypes as $ieType ) {
466						if ( $this->checkFileExtension( $ieType, $wgMimeTypeBlacklist ) ) {
467							return [ 'filetype-bad-ie-mime', $ieType ];
468						}
469					}
470				}
471			}
472		}
473
474		return true;
475	}
476
477	/**
478	 * Verifies that it's ok to include the uploaded file
479	 *
480	 * @return array|bool True of the file is verified, array otherwise.
481	 */
482	protected function verifyFile() {
483		global $wgVerifyMimeType, $wgDisableUploadScriptChecks;
484
485		$status = $this->verifyPartialFile();
486		if ( $status !== true ) {
487			return $status;
488		}
489
490		$mwProps = new MWFileProps( MediaWiki\MediaWikiServices::getInstance()->getMimeAnalyzer() );
491		$this->mFileProps = $mwProps->getPropsFromPath( $this->mTempPath, $this->mFinalExtension );
492		$mime = $this->mFileProps['mime'];
493
494		if ( $wgVerifyMimeType ) {
495			# XXX: Missing extension will be caught by validateName() via getTitle()
496			if ( (string)$this->mFinalExtension !== '' &&
497				!$this->verifyExtension( $mime, $this->mFinalExtension )
498			) {
499				return [ 'filetype-mime-mismatch', $this->mFinalExtension, $mime ];
500			}
501		}
502
503		# check for htmlish code and javascript
504		if ( !$wgDisableUploadScriptChecks ) {
505			if ( $this->mFinalExtension == 'svg' || $mime == 'image/svg+xml' ) {
506				$svgStatus = $this->detectScriptInSvg( $this->mTempPath, false );
507				if ( $svgStatus !== false ) {
508					return $svgStatus;
509				}
510			}
511		}
512
513		$handler = MediaHandler::getHandler( $mime );
514		if ( $handler ) {
515			$handlerStatus = $handler->verifyUpload( $this->mTempPath );
516			if ( !$handlerStatus->isOK() ) {
517				$errors = $handlerStatus->getErrorsArray();
518
519				return reset( $errors );
520			}
521		}
522
523		$error = true;
524		$this->getHookRunner()->onUploadVerifyFile( $this, $mime, $error );
525		if ( $error !== true ) {
526			if ( !is_array( $error ) ) {
527				$error = [ $error ];
528			}
529			return $error;
530		}
531
532		wfDebug( __METHOD__ . ": all clear; passing." );
533
534		return true;
535	}
536
537	/**
538	 * A verification routine suitable for partial files
539	 *
540	 * Runs the blacklist checks, but not any checks that may
541	 * assume the entire file is present.
542	 *
543	 * @return array|bool True if the file is valid, else an array with error message key.
544	 */
545	protected function verifyPartialFile() {
546		global $wgAllowJavaUploads, $wgDisableUploadScriptChecks;
547
548		# getTitle() sets some internal parameters like $this->mFinalExtension
549		$this->getTitle();
550
551		$mwProps = new MWFileProps( MediaWiki\MediaWikiServices::getInstance()->getMimeAnalyzer() );
552		$this->mFileProps = $mwProps->getPropsFromPath( $this->mTempPath, $this->mFinalExtension );
553
554		# check MIME type, if desired
555		$mime = $this->mFileProps['file-mime'];
556		$status = $this->verifyMimeType( $mime );
557		if ( $status !== true ) {
558			return $status;
559		}
560
561		# check for htmlish code and javascript
562		if ( !$wgDisableUploadScriptChecks ) {
563			if ( self::detectScript( $this->mTempPath, $mime, $this->mFinalExtension ) ) {
564				return [ 'uploadscripted' ];
565			}
566			if ( $this->mFinalExtension == 'svg' || $mime == 'image/svg+xml' ) {
567				$svgStatus = $this->detectScriptInSvg( $this->mTempPath, true );
568				if ( $svgStatus !== false ) {
569					return $svgStatus;
570				}
571			}
572		}
573
574		# Check for Java applets, which if uploaded can bypass cross-site
575		# restrictions.
576		if ( !$wgAllowJavaUploads ) {
577			$this->mJavaDetected = false;
578			$zipStatus = ZipDirectoryReader::read( $this->mTempPath,
579				[ $this, 'zipEntryCallback' ] );
580			if ( !$zipStatus->isOK() ) {
581				$errors = $zipStatus->getErrorsArray();
582				$error = reset( $errors );
583				if ( $error[0] !== 'zip-wrong-format' ) {
584					return $error;
585				}
586			}
587			if ( $this->mJavaDetected ) {
588				return [ 'uploadjava' ];
589			}
590		}
591
592		# Scan the uploaded file for viruses
593		$virus = $this->detectVirus( $this->mTempPath );
594		if ( $virus ) {
595			return [ 'uploadvirus', $virus ];
596		}
597
598		return true;
599	}
600
601	/**
602	 * Callback for ZipDirectoryReader to detect Java class files.
603	 *
604	 * @param array $entry
605	 */
606	public function zipEntryCallback( $entry ) {
607		$names = [ $entry['name'] ];
608
609		// If there is a null character, cut off the name at it, because JDK's
610		// ZIP_GetEntry() uses strcmp() if the name hashes match. If a file name
611		// were constructed which had ".class\0" followed by a string chosen to
612		// make the hash collide with the truncated name, that file could be
613		// returned in response to a request for the .class file.
614		$nullPos = strpos( $entry['name'], "\000" );
615		if ( $nullPos !== false ) {
616			$names[] = substr( $entry['name'], 0, $nullPos );
617		}
618
619		// If there is a trailing slash in the file name, we have to strip it,
620		// because that's what ZIP_GetEntry() does.
621		if ( preg_grep( '!\.class/?$!', $names ) ) {
622			$this->mJavaDetected = true;
623		}
624	}
625
626	/**
627	 * Alias for verifyTitlePermissions. The function was originally
628	 * 'verifyPermissions', but that suggests it's checking the user, when it's
629	 * really checking the title + user combination.
630	 *
631	 * @param Authority $performer to verify the permissions against
632	 * @return array|bool An array as returned by getPermissionErrors or true
633	 *   in case the user has proper permissions.
634	 */
635	public function verifyPermissions( Authority $performer ) {
636		return $this->verifyTitlePermissions( $performer );
637	}
638
639	/**
640	 * Check whether the user can edit, upload and create the image. This
641	 * checks only against the current title; if it returns errors, it may
642	 * very well be that another title will not give errors. Therefore
643	 * isAllowed() should be called as well for generic is-user-blocked or
644	 * can-user-upload checking.
645	 *
646	 * @param Authority $performer to verify the permissions against
647	 * @return array|bool An array as returned by getPermissionErrors or true
648	 *   in case the user has proper permissions.
649	 */
650	public function verifyTitlePermissions( Authority $performer ) {
651		/**
652		 * If the image is protected, non-sysop users won't be able
653		 * to modify it by uploading a new revision.
654		 */
655		$nt = $this->getTitle();
656		if ( $nt === null ) {
657			return true;
658		}
659
660		$status = PermissionStatus::newEmpty();
661		$performer->authorizeWrite( 'edit', $nt, $status );
662		$performer->authorizeWrite( 'upload', $nt, $status );
663		if ( !$nt->exists() ) {
664			$performer->authorizeWrite( 'create', $nt, $status );
665		}
666		if ( !$status->isGood() ) {
667			return $status->toLegacyErrorArray();
668		}
669
670		$overwriteError = $this->checkOverwrite( $performer );
671		if ( $overwriteError !== true ) {
672			return [ $overwriteError ];
673		}
674
675		return true;
676	}
677
678	/**
679	 * Check for non fatal problems with the file.
680	 *
681	 * This should not assume that mTempPath is set.
682	 *
683	 * @param User|null $user Accepted since 1.35
684	 *
685	 * @return mixed[] Array of warnings
686	 */
687	public function checkWarnings( $user = null ) {
688		if ( $user === null ) {
689			// TODO check uses and hard deprecate
690			$user = RequestContext::getMain()->getUser();
691		}
692
693		$warnings = [];
694
695		$localFile = $this->getLocalFile();
696		$localFile->load( File::READ_LATEST );
697		$filename = $localFile->getName();
698		$hash = $this->getTempFileSha1Base36();
699
700		$badFileName = $this->checkBadFileName( $filename, $this->mDesiredDestName );
701		if ( $badFileName !== null ) {
702			$warnings['badfilename'] = $badFileName;
703		}
704
705		$unwantedFileExtensionDetails = $this->checkUnwantedFileExtensions( (string)$this->mFinalExtension );
706		if ( $unwantedFileExtensionDetails !== null ) {
707			$warnings['filetype-unwanted-type'] = $unwantedFileExtensionDetails;
708		}
709
710		$fileSizeWarnings = $this->checkFileSize( $this->mFileSize );
711		if ( $fileSizeWarnings ) {
712			$warnings = array_merge( $warnings, $fileSizeWarnings );
713		}
714
715		$localFileExistsWarnings = $this->checkLocalFileExists( $localFile, $hash );
716		if ( $localFileExistsWarnings ) {
717			$warnings = array_merge( $warnings, $localFileExistsWarnings );
718		}
719
720		if ( $this->checkLocalFileWasDeleted( $localFile ) ) {
721			$warnings['was-deleted'] = $filename;
722		}
723
724		// If a file with the same name exists locally then the local file has already been tested
725		// for duplication of content
726		$ignoreLocalDupes = isset( $warnings['exists'] );
727		$dupes = $this->checkAgainstExistingDupes( $hash, $ignoreLocalDupes );
728		if ( $dupes ) {
729			$warnings['duplicate'] = $dupes;
730		}
731
732		$archivedDupes = $this->checkAgainstArchiveDupes( $hash, $user );
733		if ( $archivedDupes !== null ) {
734			$warnings['duplicate-archive'] = $archivedDupes;
735		}
736
737		return $warnings;
738	}
739
740	/**
741	 * Convert the warnings array returned by checkWarnings() to something that
742	 * can be serialized. File objects will be converted to an associative array
743	 * with the following keys:
744	 *
745	 *   - fileName: The name of the file
746	 *   - timestamp: The upload timestamp
747	 *
748	 * @param mixed[] $warnings
749	 * @return mixed[]
750	 */
751	public static function makeWarningsSerializable( $warnings ) {
752		array_walk_recursive( $warnings, static function ( &$param, $key ) {
753			if ( $param instanceof File ) {
754				$param = [
755					'fileName' => $param->getName(),
756					'timestamp' => $param->getTimestamp()
757				];
758			} elseif ( is_object( $param ) ) {
759				throw new InvalidArgumentException(
760					'UploadBase::makeWarningsSerializable: ' .
761					'Unexpected object of class ' . get_class( $param ) );
762			}
763		} );
764		return $warnings;
765	}
766
767	/**
768	 * Check whether the resulting filename is different from the desired one,
769	 * but ignore things like ucfirst() and spaces/underscore things
770	 *
771	 * @param string $filename
772	 * @param string $desiredFileName
773	 *
774	 * @return string|null String that was determined to be bad or null if the filename is okay
775	 */
776	private function checkBadFileName( $filename, $desiredFileName ) {
777		$comparableName = str_replace( ' ', '_', $desiredFileName );
778		$comparableName = Title::capitalize( $comparableName, NS_FILE );
779
780		if ( $desiredFileName != $filename && $comparableName != $filename ) {
781			return $filename;
782		}
783
784		return null;
785	}
786
787	/**
788	 * @param string $fileExtension The file extension to check
789	 *
790	 * @return array|null array with the following keys:
791	 *                    0 => string The final extension being used
792	 *                    1 => string[] The extensions that are allowed
793	 *                    2 => int The number of extensions that are allowed.
794	 */
795	private function checkUnwantedFileExtensions( $fileExtension ) {
796		global $wgCheckFileExtensions, $wgFileExtensions, $wgLang;
797
798		if ( $wgCheckFileExtensions ) {
799			$extensions = array_unique( $wgFileExtensions );
800			if ( !$this->checkFileExtension( $fileExtension, $extensions ) ) {
801				return [
802					$fileExtension,
803					$wgLang->commaList( $extensions ),
804					count( $extensions )
805				];
806			}
807		}
808
809		return null;
810	}
811
812	/**
813	 * @param int $fileSize
814	 *
815	 * @return array warnings
816	 */
817	private function checkFileSize( $fileSize ) {
818		global $wgUploadSizeWarning;
819
820		$warnings = [];
821
822		if ( $wgUploadSizeWarning && ( $fileSize > $wgUploadSizeWarning ) ) {
823			$warnings['large-file'] = [
824				Message::sizeParam( $wgUploadSizeWarning ),
825				Message::sizeParam( $fileSize ),
826			];
827		}
828
829		if ( $fileSize == 0 ) {
830			$warnings['empty-file'] = true;
831		}
832
833		return $warnings;
834	}
835
836	/**
837	 * @param LocalFile $localFile
838	 * @param string $hash sha1 hash of the file to check
839	 *
840	 * @return array warnings
841	 */
842	private function checkLocalFileExists( LocalFile $localFile, $hash ) {
843		$warnings = [];
844
845		$exists = self::getExistsWarning( $localFile );
846		if ( $exists !== false ) {
847			$warnings['exists'] = $exists;
848
849			// check if file is an exact duplicate of current file version
850			if ( $hash === $localFile->getSha1() ) {
851				$warnings['no-change'] = $localFile;
852			}
853
854			// check if file is an exact duplicate of older versions of this file
855			$history = $localFile->getHistory();
856			foreach ( $history as $oldFile ) {
857				if ( $hash === $oldFile->getSha1() ) {
858					$warnings['duplicate-version'][] = $oldFile;
859				}
860			}
861		}
862
863		return $warnings;
864	}
865
866	private function checkLocalFileWasDeleted( LocalFile $localFile ) {
867		return $localFile->wasDeleted() && !$localFile->exists();
868	}
869
870	/**
871	 * @param string $hash sha1 hash of the file to check
872	 * @param bool $ignoreLocalDupes True to ignore local duplicates
873	 *
874	 * @return File[] Duplicate files, if found.
875	 */
876	private function checkAgainstExistingDupes( $hash, $ignoreLocalDupes ) {
877		$dupes = MediaWikiServices::getInstance()->getRepoGroup()->findBySha1( $hash );
878		$title = $this->getTitle();
879		foreach ( $dupes as $key => $dupe ) {
880			if (
881				( $dupe instanceof LocalFile ) &&
882				$ignoreLocalDupes &&
883				$title->equals( $dupe->getTitle() )
884			) {
885				unset( $dupes[$key] );
886			}
887		}
888
889		return $dupes;
890	}
891
892	/**
893	 * @param string $hash sha1 hash of the file to check
894	 * @param User $user
895	 *
896	 * @return string|null Name of the dupe or empty string if discovered (depending on visibility)
897	 *                     null if the check discovered no dupes.
898	 */
899	private function checkAgainstArchiveDupes( $hash, User $user ) {
900		$archivedFile = new ArchivedFile( null, 0, '', $hash );
901		if ( $archivedFile->getID() > 0 ) {
902			if ( $archivedFile->userCan( File::DELETED_FILE, $user ) ) {
903				return $archivedFile->getName();
904			} else {
905				return '';
906			}
907		}
908
909		return null;
910	}
911
912	/**
913	 * Really perform the upload. Stores the file in the local repo, watches
914	 * if necessary and runs the UploadComplete hook.
915	 *
916	 * @param string $comment
917	 * @param string $pageText
918	 * @param bool $watch Whether the file page should be added to user's watchlist.
919	 *   (This doesn't check $user's permissions.)
920	 * @param User $user
921	 * @param string[] $tags Change tags to add to the log entry and page revision.
922	 *   (This doesn't check $user's permissions.)
923	 * @param string|null $watchlistExpiry Optional watchlist expiry timestamp in any format
924	 *   acceptable to wfTimestamp().
925	 * @return Status Indicating the whether the upload succeeded.
926	 *
927	 * @since 1.35 Accepts $watchlistExpiry parameter.
928	 */
929	public function performUpload(
930		$comment, $pageText, $watch, $user, $tags = [], ?string $watchlistExpiry = null
931	) {
932		$this->getLocalFile()->load( File::READ_LATEST );
933		$props = $this->mFileProps;
934
935		$error = null;
936		$this->getHookRunner()->onUploadVerifyUpload( $this, $user, $props, $comment, $pageText, $error );
937		if ( $error ) {
938			if ( !is_array( $error ) ) {
939				$error = [ $error ];
940			}
941			return Status::newFatal( ...$error );
942		}
943
944		$status = $this->getLocalFile()->upload(
945			$this->mTempPath,
946			$comment,
947			$pageText,
948			File::DELETE_SOURCE,
949			$props,
950			false,
951			$user,
952			$tags
953		);
954
955		if ( $status->isGood() ) {
956			if ( $watch ) {
957				WatchAction::doWatch(
958					$this->getLocalFile()->getTitle(),
959					$user,
960					User::IGNORE_USER_RIGHTS,
961					$watchlistExpiry
962				);
963			}
964			$this->getHookRunner()->onUploadComplete( $this );
965
966			$this->postProcessUpload();
967		}
968
969		return $status;
970	}
971
972	/**
973	 * Perform extra steps after a successful upload.
974	 *
975	 * @stable to override
976	 * @since  1.25
977	 */
978	public function postProcessUpload() {
979	}
980
981	/**
982	 * Returns the title of the file to be uploaded. Sets mTitleError in case
983	 * the name was illegal.
984	 *
985	 * @return Title|null The title of the file or null in case the name was illegal
986	 */
987	public function getTitle() {
988		if ( $this->mTitle !== false ) {
989			return $this->mTitle;
990		}
991		if ( !is_string( $this->mDesiredDestName ) ) {
992			$this->mTitleError = self::ILLEGAL_FILENAME;
993			$this->mTitle = null;
994
995			return $this->mTitle;
996		}
997		/* Assume that if a user specified File:Something.jpg, this is an error
998		 * and that the namespace prefix needs to be stripped of.
999		 */
1000		$title = Title::newFromText( $this->mDesiredDestName );
1001		if ( $title && $title->getNamespace() === NS_FILE ) {
1002			$this->mFilteredName = $title->getDBkey();
1003		} else {
1004			$this->mFilteredName = $this->mDesiredDestName;
1005		}
1006
1007		# oi_archive_name is max 255 bytes, which include a timestamp and an
1008		# exclamation mark, so restrict file name to 240 bytes.
1009		if ( strlen( $this->mFilteredName ) > 240 ) {
1010			$this->mTitleError = self::FILENAME_TOO_LONG;
1011			$this->mTitle = null;
1012
1013			return $this->mTitle;
1014		}
1015
1016		/**
1017		 * Chop off any directories in the given filename. Then
1018		 * filter out illegal characters, and try to make a legible name
1019		 * out of it. We'll strip some silently that Title would die on.
1020		 */
1021		$this->mFilteredName = wfStripIllegalFilenameChars( $this->mFilteredName );
1022		/* Normalize to title form before we do any further processing */
1023		$nt = Title::makeTitleSafe( NS_FILE, $this->mFilteredName );
1024		if ( $nt === null ) {
1025			$this->mTitleError = self::ILLEGAL_FILENAME;
1026			$this->mTitle = null;
1027
1028			return $this->mTitle;
1029		}
1030		$this->mFilteredName = $nt->getDBkey();
1031
1032		/**
1033		 * We'll want to blacklist against *any* 'extension', and use
1034		 * only the final one for the whitelist.
1035		 */
1036		list( $partname, $ext ) = $this->splitExtensions( $this->mFilteredName );
1037
1038		if ( $ext !== [] ) {
1039			$this->mFinalExtension = trim( end( $ext ) );
1040		} else {
1041			$this->mFinalExtension = '';
1042
1043			// No extension, try guessing one from the temporary file
1044			// FIXME: Sometimes we mTempPath isn't set yet here, possibly due to an unrealistic
1045			// or incomplete test case in UploadBaseTest (T272328)
1046			if ( $this->mTempPath !== null ) {
1047				$magic = MediaWiki\MediaWikiServices::getInstance()->getMimeAnalyzer();
1048				$mime = $magic->guessMimeType( $this->mTempPath );
1049				if ( $mime !== 'unknown/unknown' ) {
1050					# Get a space separated list of extensions
1051					$mimeExt = $magic->getExtensionFromMimeTypeOrNull( $mime );
1052					if ( $mimeExt !== null ) {
1053						# Set the extension to the canonical extension
1054						$this->mFinalExtension = $mimeExt;
1055
1056						# Fix up the other variables
1057						$this->mFilteredName .= ".{$this->mFinalExtension}";
1058						$nt = Title::makeTitleSafe( NS_FILE, $this->mFilteredName );
1059						$ext = [ $this->mFinalExtension ];
1060					}
1061				}
1062			}
1063		}
1064
1065		/* Don't allow users to override the blacklist (check file extension) */
1066		global $wgCheckFileExtensions, $wgStrictFileExtensions;
1067		global $wgFileExtensions, $wgFileBlacklist;
1068
1069		$blackListedExtensions = $this->checkFileExtensionList( $ext, $wgFileBlacklist );
1070
1071		if ( $this->mFinalExtension == '' ) {
1072			$this->mTitleError = self::FILETYPE_MISSING;
1073			$this->mTitle = null;
1074
1075			return $this->mTitle;
1076		} elseif ( $blackListedExtensions ||
1077			( $wgCheckFileExtensions && $wgStrictFileExtensions &&
1078				!$this->checkFileExtension( $this->mFinalExtension, $wgFileExtensions ) )
1079		) {
1080			$this->mBlackListedExtensions = $blackListedExtensions;
1081			$this->mTitleError = self::FILETYPE_BADTYPE;
1082			$this->mTitle = null;
1083
1084			return $this->mTitle;
1085		}
1086
1087		// Windows may be broken with special characters, see T3780
1088		if ( !preg_match( '/^[\x0-\x7f]*$/', $nt->getText() )
1089			&& !MediaWikiServices::getInstance()->getRepoGroup()
1090				->getLocalRepo()->backendSupportsUnicodePaths()
1091		) {
1092			$this->mTitleError = self::WINDOWS_NONASCII_FILENAME;
1093			$this->mTitle = null;
1094
1095			return $this->mTitle;
1096		}
1097
1098		# If there was more than one "extension", reassemble the base
1099		# filename to prevent bogus complaints about length
1100		if ( count( $ext ) > 1 ) {
1101			$iterations = count( $ext ) - 1;
1102			for ( $i = 0; $i < $iterations; $i++ ) {
1103				$partname .= '.' . $ext[$i];
1104			}
1105		}
1106
1107		if ( strlen( $partname ) < 1 ) {
1108			$this->mTitleError = self::MIN_LENGTH_PARTNAME;
1109			$this->mTitle = null;
1110
1111			return $this->mTitle;
1112		}
1113
1114		$this->mTitle = $nt;
1115
1116		return $this->mTitle;
1117	}
1118
1119	/**
1120	 * Return the local file and initializes if necessary.
1121	 *
1122	 * @stable to override
1123	 * @return LocalFile|null
1124	 */
1125	public function getLocalFile() {
1126		if ( $this->mLocalFile === null ) {
1127			$nt = $this->getTitle();
1128			$this->mLocalFile = $nt === null
1129				? null
1130				: MediaWikiServices::getInstance()->getRepoGroup()->getLocalRepo()->newFile( $nt );
1131		}
1132
1133		return $this->mLocalFile;
1134	}
1135
1136	/**
1137	 * @return UploadStashFile|null
1138	 */
1139	public function getStashFile() {
1140		return $this->mStashFile;
1141	}
1142
1143	/**
1144	 * Like stashFile(), but respects extensions' wishes to prevent the stashing. verifyUpload() must
1145	 * be called before calling this method (unless $isPartial is true).
1146	 *
1147	 * Upload stash exceptions are also caught and converted to an error status.
1148	 *
1149	 * @since 1.28
1150	 * @stable to override
1151	 * @param User $user
1152	 * @param bool $isPartial Pass `true` if this is a part of a chunked upload (not a complete file).
1153	 * @return Status If successful, value is an UploadStashFile instance
1154	 */
1155	public function tryStashFile( User $user, $isPartial = false ) {
1156		if ( !$isPartial ) {
1157			$error = $this->runUploadStashFileHook( $user );
1158			if ( $error ) {
1159				return Status::newFatal( ...$error );
1160			}
1161		}
1162		try {
1163			$file = $this->doStashFile( $user );
1164			return Status::newGood( $file );
1165		} catch ( UploadStashException $e ) {
1166			return Status::newFatal( 'uploadstash-exception', get_class( $e ), $e->getMessage() );
1167		}
1168	}
1169
1170	/**
1171	 * @param User $user
1172	 * @return array|null Error message and parameters, null if there's no error
1173	 */
1174	protected function runUploadStashFileHook( User $user ) {
1175		$props = $this->mFileProps;
1176		$error = null;
1177		$this->getHookRunner()->onUploadStashFile( $this, $user, $props, $error );
1178		if ( $error && !is_array( $error ) ) {
1179			$error = [ $error ];
1180		}
1181		return $error;
1182	}
1183
1184	/**
1185	 * If the user does not supply all necessary information in the first upload
1186	 * form submission (either by accident or by design) then we may want to
1187	 * stash the file temporarily, get more information, and publish the file
1188	 * later.
1189	 *
1190	 * This method will stash a file in a temporary directory for later
1191	 * processing, and save the necessary descriptive info into the database.
1192	 * This method returns the file object, which also has a 'fileKey' property
1193	 * which can be passed through a form or API request to find this stashed
1194	 * file again.
1195	 *
1196	 * @deprecated since 1.28 Use tryStashFile() instead
1197	 * @param User|null $user
1198	 * @return UploadStashFile Stashed file
1199	 * @throws UploadStashBadPathException
1200	 * @throws UploadStashFileException
1201	 * @throws UploadStashNotLoggedInException
1202	 */
1203	public function stashFile( User $user = null ) {
1204		wfDeprecated( __METHOD__, '1.28' );
1205
1206		return $this->doStashFile( $user );
1207	}
1208
1209	/**
1210	 * Implementation for stashFile() and tryStashFile().
1211	 *
1212	 * @stable to override
1213	 * @param User|null $user
1214	 * @return UploadStashFile Stashed file
1215	 */
1216	protected function doStashFile( User $user = null ) {
1217		$stash = MediaWikiServices::getInstance()->getRepoGroup()
1218			->getLocalRepo()->getUploadStash( $user );
1219		$file = $stash->stashFile( $this->mTempPath, $this->getSourceType() );
1220		$this->mStashFile = $file;
1221
1222		return $file;
1223	}
1224
1225	/**
1226	 * If we've modified the upload file we need to manually remove it
1227	 * on exit to clean up.
1228	 */
1229	public function cleanupTempFile() {
1230		if ( $this->mRemoveTempFile && $this->tempFileObj ) {
1231			// Delete when all relevant TempFSFile handles go out of scope
1232			wfDebug( __METHOD__ . ": Marked temporary file '{$this->mTempPath}' for removal" );
1233			$this->tempFileObj->autocollect();
1234		}
1235	}
1236
1237	public function getTempPath() {
1238		return $this->mTempPath;
1239	}
1240
1241	/**
1242	 * Split a file into a base name and all dot-delimited 'extensions'
1243	 * on the end. Some web server configurations will fall back to
1244	 * earlier pseudo-'extensions' to determine type and execute
1245	 * scripts, so the blacklist needs to check them all.
1246	 *
1247	 * @param string $filename
1248	 * @return array [ string, string[] ]
1249	 */
1250	public static function splitExtensions( $filename ) {
1251		$bits = explode( '.', $filename );
1252		$basename = array_shift( $bits );
1253
1254		return [ $basename, $bits ];
1255	}
1256
1257	/**
1258	 * Perform case-insensitive match against a list of file extensions.
1259	 * Returns true if the extension is in the list.
1260	 *
1261	 * @param string $ext
1262	 * @param array $list
1263	 * @return bool
1264	 */
1265	public static function checkFileExtension( $ext, $list ) {
1266		return in_array( strtolower( $ext ), $list );
1267	}
1268
1269	/**
1270	 * Perform case-insensitive match against a list of file extensions.
1271	 * Returns an array of matching extensions.
1272	 *
1273	 * @param string[] $ext
1274	 * @param string[] $list
1275	 * @return string[]
1276	 */
1277	public static function checkFileExtensionList( $ext, $list ) {
1278		return array_intersect( array_map( 'strtolower', $ext ), $list );
1279	}
1280
1281	/**
1282	 * Checks if the MIME type of the uploaded file matches the file extension.
1283	 *
1284	 * @param string $mime The MIME type of the uploaded file
1285	 * @param string $extension The filename extension that the file is to be served with
1286	 * @return bool
1287	 */
1288	public static function verifyExtension( $mime, $extension ) {
1289		$magic = MediaWiki\MediaWikiServices::getInstance()->getMimeAnalyzer();
1290
1291		if ( !$mime || $mime == 'unknown' || $mime == 'unknown/unknown' ) {
1292			if ( !$magic->isRecognizableExtension( $extension ) ) {
1293				wfDebug( __METHOD__ . ": passing file with unknown detected mime type; " .
1294					"unrecognized extension '$extension', can't verify" );
1295
1296				return true;
1297			} else {
1298				wfDebug( __METHOD__ . ": rejecting file with unknown detected mime type; " .
1299					"recognized extension '$extension', so probably invalid file" );
1300
1301				return false;
1302			}
1303		}
1304
1305		$match = $magic->isMatchingExtension( $extension, $mime );
1306
1307		if ( $match === null ) {
1308			if ( $magic->getMimeTypesFromExtension( $extension ) !== [] ) {
1309				wfDebug( __METHOD__ . ": No extension known for $mime, but we know a mime for $extension" );
1310
1311				return false;
1312			} else {
1313				wfDebug( __METHOD__ . ": no file extension known for mime type $mime, passing file" );
1314
1315				return true;
1316			}
1317		} elseif ( $match === true ) {
1318			wfDebug( __METHOD__ . ": mime type $mime matches extension $extension, passing file" );
1319
1320			/** @todo If it's a bitmap, make sure PHP or ImageMagick resp. can handle it! */
1321			return true;
1322		} else {
1323			wfDebug( __METHOD__
1324				. ": mime type $mime mismatches file extension $extension, rejecting file" );
1325
1326			return false;
1327		}
1328	}
1329
1330	/**
1331	 * Heuristic for detecting files that *could* contain JavaScript instructions or
1332	 * things that may look like HTML to a browser and are thus
1333	 * potentially harmful. The present implementation will produce false
1334	 * positives in some situations.
1335	 *
1336	 * @param string $file Pathname to the temporary upload file
1337	 * @param string $mime The MIME type of the file
1338	 * @param string|null $extension The extension of the file
1339	 * @return bool True if the file contains something looking like embedded scripts
1340	 */
1341	public static function detectScript( $file, $mime, $extension ) {
1342		# ugly hack: for text files, always look at the entire file.
1343		# For binary field, just check the first K.
1344
1345		$isText = strpos( $mime, 'text/' ) === 0;
1346		if ( $isText ) {
1347			$chunk = file_get_contents( $file );
1348		} else {
1349			$fp = fopen( $file, 'rb' );
1350			if ( !$fp ) {
1351				return false;
1352			}
1353			$chunk = fread( $fp, 1024 );
1354			fclose( $fp );
1355		}
1356
1357		$chunk = strtolower( $chunk );
1358
1359		if ( !$chunk ) {
1360			return false;
1361		}
1362
1363		# decode from UTF-16 if needed (could be used for obfuscation).
1364		if ( substr( $chunk, 0, 2 ) == "\xfe\xff" ) {
1365			$enc = 'UTF-16BE';
1366		} elseif ( substr( $chunk, 0, 2 ) == "\xff\xfe" ) {
1367			$enc = 'UTF-16LE';
1368		} else {
1369			$enc = null;
1370		}
1371
1372		if ( $enc !== null ) {
1373			$chunk = iconv( $enc, "ASCII//IGNORE", $chunk );
1374		}
1375
1376		$chunk = trim( $chunk );
1377
1378		/** @todo FIXME: Convert from UTF-16 if necessary! */
1379		wfDebug( __METHOD__ . ": checking for embedded scripts and HTML stuff" );
1380
1381		# check for HTML doctype
1382		if ( preg_match( "/<!DOCTYPE *X?HTML/i", $chunk ) ) {
1383			return true;
1384		}
1385
1386		// Some browsers will interpret obscure xml encodings as UTF-8, while
1387		// PHP/expat will interpret the given encoding in the xml declaration (T49304)
1388		if ( $extension == 'svg' || strpos( $mime, 'image/svg' ) === 0 ) {
1389			if ( self::checkXMLEncodingMissmatch( $file ) ) {
1390				return true;
1391			}
1392		}
1393
1394		// Quick check for HTML heuristics in old IE and Safari.
1395		//
1396		// The exact heuristics IE uses are checked separately via verifyMimeType(), so we
1397		// don't need them all here as it can cause many false positives.
1398		//
1399		// Check for `<script` and such still to forbid script tags and embedded HTML in SVG:
1400		$tags = [
1401			'<body',
1402			'<head',
1403			'<html', # also in safari
1404			'<script', # also in safari
1405		];
1406
1407		foreach ( $tags as $tag ) {
1408			if ( strpos( $chunk, $tag ) !== false ) {
1409				wfDebug( __METHOD__ . ": found something that may make it be mistaken for html: $tag" );
1410
1411				return true;
1412			}
1413		}
1414
1415		/*
1416		 * look for JavaScript
1417		 */
1418
1419		# resolve entity-refs to look at attributes. may be harsh on big files... cache result?
1420		$chunk = Sanitizer::decodeCharReferences( $chunk );
1421
1422		# look for script-types
1423		if ( preg_match( '!type\s*=\s*[\'"]?\s*(?:\w*/)?(?:ecma|java)!sim', $chunk ) ) {
1424			wfDebug( __METHOD__ . ": found script types" );
1425
1426			return true;
1427		}
1428
1429		# look for html-style script-urls
1430		if ( preg_match( '!(?:href|src|data)\s*=\s*[\'"]?\s*(?:ecma|java)script:!sim', $chunk ) ) {
1431			wfDebug( __METHOD__ . ": found html-style script urls" );
1432
1433			return true;
1434		}
1435
1436		# look for css-style script-urls
1437		if ( preg_match( '!url\s*\(\s*[\'"]?\s*(?:ecma|java)script:!sim', $chunk ) ) {
1438			wfDebug( __METHOD__ . ": found css-style script urls" );
1439
1440			return true;
1441		}
1442
1443		wfDebug( __METHOD__ . ": no scripts found" );
1444
1445		return false;
1446	}
1447
1448	/**
1449	 * Check a whitelist of xml encodings that are known not to be interpreted differently
1450	 * by the server's xml parser (expat) and some common browsers.
1451	 *
1452	 * @param string $file Pathname to the temporary upload file
1453	 * @return bool True if the file contains an encoding that could be misinterpreted
1454	 */
1455	public static function checkXMLEncodingMissmatch( $file ) {
1456		global $wgSVGMetadataCutoff;
1457		$contents = file_get_contents( $file, false, null, 0, $wgSVGMetadataCutoff );
1458		$encodingRegex = '!encoding[ \t\n\r]*=[ \t\n\r]*[\'"](.*?)[\'"]!si';
1459
1460		if ( preg_match( "!<\?xml\b(.*?)\?>!si", $contents, $matches ) ) {
1461			if ( preg_match( $encodingRegex, $matches[1], $encMatch )
1462				&& !in_array( strtoupper( $encMatch[1] ), self::$safeXmlEncodings )
1463			) {
1464				wfDebug( __METHOD__ . ": Found unsafe XML encoding '{$encMatch[1]}'" );
1465
1466				return true;
1467			}
1468		} elseif ( preg_match( "!<\?xml\b!si", $contents ) ) {
1469			// Start of XML declaration without an end in the first $wgSVGMetadataCutoff
1470			// bytes. There shouldn't be a legitimate reason for this to happen.
1471			wfDebug( __METHOD__ . ": Unmatched XML declaration start" );
1472
1473			return true;
1474		} elseif ( substr( $contents, 0, 4 ) == "\x4C\x6F\xA7\x94" ) {
1475			// EBCDIC encoded XML
1476			wfDebug( __METHOD__ . ": EBCDIC Encoded XML" );
1477
1478			return true;
1479		}
1480
1481		// It's possible the file is encoded with multi-byte encoding, so re-encode attempt to
1482		// detect the encoding in case is specifies an encoding not whitelisted in self::$safeXmlEncodings
1483		$attemptEncodings = [ 'UTF-16', 'UTF-16BE', 'UTF-32', 'UTF-32BE' ];
1484		foreach ( $attemptEncodings as $encoding ) {
1485			Wikimedia\suppressWarnings();
1486			$str = iconv( $encoding, 'UTF-8', $contents );
1487			Wikimedia\restoreWarnings();
1488			if ( $str != '' && preg_match( "!<\?xml\b(.*?)\?>!si", $str, $matches ) ) {
1489				if ( preg_match( $encodingRegex, $matches[1], $encMatch )
1490					&& !in_array( strtoupper( $encMatch[1] ), self::$safeXmlEncodings )
1491				) {
1492					wfDebug( __METHOD__ . ": Found unsafe XML encoding '{$encMatch[1]}'" );
1493
1494					return true;
1495				}
1496			} elseif ( $str != '' && preg_match( "!<\?xml\b!si", $str ) ) {
1497				// Start of XML declaration without an end in the first $wgSVGMetadataCutoff
1498				// bytes. There shouldn't be a legitimate reason for this to happen.
1499				wfDebug( __METHOD__ . ": Unmatched XML declaration start" );
1500
1501				return true;
1502			}
1503		}
1504
1505		return false;
1506	}
1507
1508	/**
1509	 * @param string $filename
1510	 * @param bool $partial
1511	 * @return bool|array
1512	 */
1513	protected function detectScriptInSvg( $filename, $partial ) {
1514		$this->mSVGNSError = false;
1515		$check = new XmlTypeCheck(
1516			$filename,
1517			[ $this, 'checkSvgScriptCallback' ],
1518			true,
1519			[
1520				'processing_instruction_handler' => [ __CLASS__, 'checkSvgPICallback' ],
1521				'external_dtd_handler' => [ __CLASS__, 'checkSvgExternalDTD' ],
1522			]
1523		);
1524		if ( $check->wellFormed !== true ) {
1525			// Invalid xml (T60553)
1526			// But only when non-partial (T67724)
1527			return $partial ? false : [ 'uploadinvalidxml' ];
1528		} elseif ( $check->filterMatch ) {
1529			if ( $this->mSVGNSError ) {
1530				return [ 'uploadscriptednamespace', $this->mSVGNSError ];
1531			}
1532
1533			return $check->filterMatchType;
1534		}
1535
1536		return false;
1537	}
1538
1539	/**
1540	 * Callback to filter SVG Processing Instructions.
1541	 * @param string $target Processing instruction name
1542	 * @param string $data Processing instruction attribute and value
1543	 * @return bool|array
1544	 */
1545	public static function checkSvgPICallback( $target, $data ) {
1546		// Don't allow external stylesheets (T59550)
1547		if ( preg_match( '/xml-stylesheet/i', $target ) ) {
1548			return [ 'upload-scripted-pi-callback' ];
1549		}
1550
1551		return false;
1552	}
1553
1554	/**
1555	 * Verify that DTD urls referenced are only the standard dtds
1556	 *
1557	 * Browsers seem to ignore external dtds. However just to be on the
1558	 * safe side, only allow dtds from the svg standard.
1559	 *
1560	 * @param string $type PUBLIC or SYSTEM
1561	 * @param string $publicId The well-known public identifier for the dtd
1562	 * @param string $systemId The url for the external dtd
1563	 * @return bool|array
1564	 */
1565	public static function checkSvgExternalDTD( $type, $publicId, $systemId ) {
1566		// This doesn't include the XHTML+MathML+SVG doctype since we don't
1567		// allow XHTML anyways.
1568		$allowedDTDs = [
1569			'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd',
1570			'http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd',
1571			'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-basic.dtd',
1572			'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-tiny.dtd',
1573			// https://phabricator.wikimedia.org/T168856
1574			'http://www.w3.org/TR/2001/PR-SVG-20010719/DTD/svg10.dtd',
1575		];
1576		if ( $type !== 'PUBLIC'
1577			|| !in_array( $systemId, $allowedDTDs )
1578			|| strpos( $publicId, "-//W3C//" ) !== 0
1579		) {
1580			return [ 'upload-scripted-dtd' ];
1581		}
1582		return false;
1583	}
1584
1585	/**
1586	 * @todo Replace this with a whitelist filter!
1587	 * @param string $element
1588	 * @param array $attribs
1589	 * @param string|null $data
1590	 * @return bool|array
1591	 */
1592	public function checkSvgScriptCallback( $element, $attribs, $data = null ) {
1593		list( $namespace, $strippedElement ) = $this->splitXmlNamespace( $element );
1594
1595		// We specifically don't include:
1596		// http://www.w3.org/1999/xhtml (T62771)
1597		static $validNamespaces = [
1598			'',
1599			'adobe:ns:meta/',
1600			'http://creativecommons.org/ns#',
1601			'http://inkscape.sourceforge.net/dtd/sodipodi-0.dtd',
1602			'http://ns.adobe.com/adobeillustrator/10.0/',
1603			'http://ns.adobe.com/adobesvgviewerextensions/3.0/',
1604			'http://ns.adobe.com/extensibility/1.0/',
1605			'http://ns.adobe.com/flows/1.0/',
1606			'http://ns.adobe.com/illustrator/1.0/',
1607			'http://ns.adobe.com/imagereplacement/1.0/',
1608			'http://ns.adobe.com/pdf/1.3/',
1609			'http://ns.adobe.com/photoshop/1.0/',
1610			'http://ns.adobe.com/saveforweb/1.0/',
1611			'http://ns.adobe.com/variables/1.0/',
1612			'http://ns.adobe.com/xap/1.0/',
1613			'http://ns.adobe.com/xap/1.0/g/',
1614			'http://ns.adobe.com/xap/1.0/g/img/',
1615			'http://ns.adobe.com/xap/1.0/mm/',
1616			'http://ns.adobe.com/xap/1.0/rights/',
1617			'http://ns.adobe.com/xap/1.0/stype/dimensions#',
1618			'http://ns.adobe.com/xap/1.0/stype/font#',
1619			'http://ns.adobe.com/xap/1.0/stype/manifestitem#',
1620			'http://ns.adobe.com/xap/1.0/stype/resourceevent#',
1621			'http://ns.adobe.com/xap/1.0/stype/resourceref#',
1622			'http://ns.adobe.com/xap/1.0/t/pg/',
1623			'http://purl.org/dc/elements/1.1/',
1624			'http://purl.org/dc/elements/1.1',
1625			'http://schemas.microsoft.com/visio/2003/svgextensions/',
1626			'http://sodipodi.sourceforge.net/dtd/sodipodi-0.dtd',
1627			'http://taptrix.com/inkpad/svg_extensions',
1628			'http://web.resource.org/cc/',
1629			'http://www.freesoftware.fsf.org/bkchem/cdml',
1630			'http://www.inkscape.org/namespaces/inkscape',
1631			'http://www.opengis.net/gml',
1632			'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
1633			'http://www.w3.org/2000/svg',
1634			'http://www.w3.org/tr/rec-rdf-syntax/',
1635			'http://www.w3.org/2000/01/rdf-schema#',
1636		];
1637
1638		// Inkscape mangles namespace definitions created by Adobe Illustrator.
1639		// This is nasty but harmless. (T144827)
1640		$isBuggyInkscape = preg_match( '/^&(#38;)*ns_[a-z_]+;$/', $namespace );
1641
1642		if ( !( $isBuggyInkscape || in_array( $namespace, $validNamespaces ) ) ) {
1643			wfDebug( __METHOD__ . ": Non-svg namespace '$namespace' in uploaded file." );
1644			/** @todo Return a status object to a closure in XmlTypeCheck, for MW1.21+ */
1645			$this->mSVGNSError = $namespace;
1646
1647			return true;
1648		}
1649
1650		/*
1651		 * check for elements that can contain javascript
1652		 */
1653		if ( $strippedElement == 'script' ) {
1654			wfDebug( __METHOD__ . ": Found script element '$element' in uploaded file." );
1655
1656			return [ 'uploaded-script-svg', $strippedElement ];
1657		}
1658
1659		# e.g., <svg xmlns="http://www.w3.org/2000/svg">
1660		#  <handler xmlns:ev="http://www.w3.org/2001/xml-events" ev:event="load">alert(1)</handler> </svg>
1661		if ( $strippedElement == 'handler' ) {
1662			wfDebug( __METHOD__ . ": Found scriptable element '$element' in uploaded file." );
1663
1664			return [ 'uploaded-script-svg', $strippedElement ];
1665		}
1666
1667		# SVG reported in Feb '12 that used xml:stylesheet to generate javascript block
1668		if ( $strippedElement == 'stylesheet' ) {
1669			wfDebug( __METHOD__ . ": Found scriptable element '$element' in uploaded file." );
1670
1671			return [ 'uploaded-script-svg', $strippedElement ];
1672		}
1673
1674		# Block iframes, in case they pass the namespace check
1675		if ( $strippedElement == 'iframe' ) {
1676			wfDebug( __METHOD__ . ": iframe in uploaded file." );
1677
1678			return [ 'uploaded-script-svg', $strippedElement ];
1679		}
1680
1681		# Check <style> css
1682		if ( $strippedElement == 'style'
1683			&& self::checkCssFragment( Sanitizer::normalizeCss( $data ) )
1684		) {
1685			wfDebug( __METHOD__ . ": hostile css in style element." );
1686			return [ 'uploaded-hostile-svg' ];
1687		}
1688
1689		foreach ( $attribs as $attrib => $value ) {
1690			$stripped = $this->stripXmlNamespace( $attrib );
1691			$value = strtolower( $value );
1692
1693			if ( substr( $stripped, 0, 2 ) == 'on' ) {
1694				wfDebug( __METHOD__
1695					. ": Found event-handler attribute '$attrib'='$value' in uploaded file." );
1696
1697				return [ 'uploaded-event-handler-on-svg', $attrib, $value ];
1698			}
1699
1700			# Do not allow relative links, or unsafe url schemas.
1701			# For <a> tags, only data:, http: and https: and same-document
1702			# fragment links are allowed. For all other tags, only data:
1703			# and fragment are allowed.
1704			if ( $stripped == 'href'
1705				&& $value !== ''
1706				&& strpos( $value, 'data:' ) !== 0
1707				&& strpos( $value, '#' ) !== 0
1708			) {
1709				if ( !( $strippedElement === 'a'
1710					&& preg_match( '!^https?://!i', $value ) )
1711				) {
1712					wfDebug( __METHOD__ . ": Found href attribute <$strippedElement "
1713						. "'$attrib'='$value' in uploaded file." );
1714
1715					return [ 'uploaded-href-attribute-svg', $strippedElement, $attrib, $value ];
1716				}
1717			}
1718
1719			# only allow data: targets that should be safe. This prevents vectors like,
1720			# image/svg, text/xml, application/xml, and text/html, which can contain scripts
1721			if ( $stripped == 'href' && strncasecmp( 'data:', $value, 5 ) === 0 ) {
1722				// rfc2397 parameters. This is only slightly slower than (;[\w;]+)*.
1723				// phpcs:ignore Generic.Files.LineLength
1724				$parameters = '(?>;[a-zA-Z0-9\!#$&\'*+.^_`{|}~-]+=(?>[a-zA-Z0-9\!#$&\'*+.^_`{|}~-]+|"(?>[\0-\x0c\x0e-\x21\x23-\x5b\x5d-\x7f]+|\\\\[\0-\x7f])*"))*(?:;base64)?';
1725
1726				if ( !preg_match( "!^data:\s*image/(gif|jpeg|jpg|png)$parameters,!i", $value ) ) {
1727					wfDebug( __METHOD__ . ": Found href to unwhitelisted data: uri "
1728						. "\"<$strippedElement '$attrib'='$value'...\" in uploaded file." );
1729					return [ 'uploaded-href-unsafe-target-svg', $strippedElement, $attrib, $value ];
1730				}
1731			}
1732
1733			# Change href with animate from (http://html5sec.org/#137).
1734			if ( $stripped === 'attributename'
1735				&& $strippedElement === 'animate'
1736				&& $this->stripXmlNamespace( $value ) == 'href'
1737			) {
1738				wfDebug( __METHOD__ . ": Found animate that might be changing href using from "
1739					. "\"<$strippedElement '$attrib'='$value'...\" in uploaded file." );
1740
1741				return [ 'uploaded-animate-svg', $strippedElement, $attrib, $value ];
1742			}
1743
1744			# use set/animate to add event-handler attribute to parent
1745			if ( ( $strippedElement == 'set' || $strippedElement == 'animate' )
1746				&& $stripped == 'attributename'
1747				&& substr( $value, 0, 2 ) == 'on'
1748			) {
1749				wfDebug( __METHOD__ . ": Found svg setting event-handler attribute with "
1750					. "\"<$strippedElement $stripped='$value'...\" in uploaded file." );
1751
1752				return [ 'uploaded-setting-event-handler-svg', $strippedElement, $stripped, $value ];
1753			}
1754
1755			# use set to add href attribute to parent element
1756			if ( $strippedElement == 'set'
1757				&& $stripped == 'attributename'
1758				&& strpos( $value, 'href' ) !== false
1759			) {
1760				wfDebug( __METHOD__ . ": Found svg setting href attribute '$value' in uploaded file." );
1761
1762				return [ 'uploaded-setting-href-svg' ];
1763			}
1764
1765			# use set to add a remote / data / script target to an element
1766			if ( $strippedElement == 'set'
1767				&& $stripped == 'to'
1768				&& preg_match( '!(http|https|data|script):!sim', $value )
1769			) {
1770				wfDebug( __METHOD__ . ": Found svg setting attribute to '$value' in uploaded file." );
1771
1772				return [ 'uploaded-wrong-setting-svg', $value ];
1773			}
1774
1775			# use handler attribute with remote / data / script
1776			if ( $stripped == 'handler' && preg_match( '!(http|https|data|script):!sim', $value ) ) {
1777				wfDebug( __METHOD__ . ": Found svg setting handler with remote/data/script "
1778					. "'$attrib'='$value' in uploaded file." );
1779
1780				return [ 'uploaded-setting-handler-svg', $attrib, $value ];
1781			}
1782
1783			# use CSS styles to bring in remote code
1784			if ( $stripped == 'style'
1785				&& self::checkCssFragment( Sanitizer::normalizeCss( $value ) )
1786			) {
1787				wfDebug( __METHOD__ . ": Found svg setting a style with "
1788					. "remote url '$attrib'='$value' in uploaded file." );
1789				return [ 'uploaded-remote-url-svg', $attrib, $value ];
1790			}
1791
1792			# Several attributes can include css, css character escaping isn't allowed
1793			$cssAttrs = [ 'font', 'clip-path', 'fill', 'filter', 'marker',
1794				'marker-end', 'marker-mid', 'marker-start', 'mask', 'stroke' ];
1795			if ( in_array( $stripped, $cssAttrs )
1796				&& self::checkCssFragment( $value )
1797			) {
1798				wfDebug( __METHOD__ . ": Found svg setting a style with "
1799					. "remote url '$attrib'='$value' in uploaded file." );
1800				return [ 'uploaded-remote-url-svg', $attrib, $value ];
1801			}
1802
1803			# image filters can pull in url, which could be svg that executes scripts
1804			# Only allow url( "#foo" ). Do not allow url( http://example.com )
1805			if ( $strippedElement == 'image'
1806				&& $stripped == 'filter'
1807				&& preg_match( '!url\s*\(\s*["\']?[^#]!sim', $value )
1808			) {
1809				wfDebug( __METHOD__ . ": Found image filter with url: "
1810					. "\"<$strippedElement $stripped='$value'...\" in uploaded file." );
1811
1812				return [ 'uploaded-image-filter-svg', $strippedElement, $stripped, $value ];
1813			}
1814		}
1815
1816		return false; // No scripts detected
1817	}
1818
1819	/**
1820	 * Check a block of CSS or CSS fragment for anything that looks like
1821	 * it is bringing in remote code.
1822	 * @param string $value a string of CSS
1823	 * @return bool true if the CSS contains an illegal string, false if otherwise
1824	 */
1825	private static function checkCssFragment( $value ) {
1826		# Forbid external stylesheets, for both reliability and to protect viewer's privacy
1827		if ( stripos( $value, '@import' ) !== false ) {
1828			return true;
1829		}
1830
1831		# We allow @font-face to embed fonts with data: urls, so we snip the string
1832		# 'url' out so this case won't match when we check for urls below
1833		$pattern = '!(@font-face\s*{[^}]*src:)url(\("data:;base64,)!im';
1834		$value = preg_replace( $pattern, '$1$2', $value );
1835
1836		# Check for remote and executable CSS. Unlike in Sanitizer::checkCss, the CSS
1837		# properties filter and accelerator don't seem to be useful for xss in SVG files.
1838		# Expression and -o-link don't seem to work either, but filtering them here in case.
1839		# Additionally, we catch remote urls like url("http:..., url('http:..., url(http:...,
1840		# but not local ones such as url("#..., url('#..., url(#....
1841		if ( preg_match( '!expression
1842				| -o-link\s*:
1843				| -o-link-source\s*:
1844				| -o-replace\s*:!imx', $value ) ) {
1845			return true;
1846		}
1847
1848		if ( preg_match_all(
1849				"!(\s*(url|image|image-set)\s*\(\s*[\"']?\s*[^#]+.*?\))!sim",
1850				$value,
1851				$matches
1852			) !== 0
1853		) {
1854			# TODO: redo this in one regex. Until then, url("#whatever") matches the first
1855			foreach ( $matches[1] as $match ) {
1856				if ( !preg_match( "!\s*(url|image|image-set)\s*\(\s*(#|'#|\"#)!im", $match ) ) {
1857					return true;
1858				}
1859			}
1860		}
1861
1862		if ( preg_match( '/[\000-\010\013\016-\037\177]/', $value ) ) {
1863			return true;
1864		}
1865
1866		return false;
1867	}
1868
1869	/**
1870	 * Divide the element name passed by the xml parser to the callback into URI and prifix.
1871	 * @param string $element
1872	 * @return array Containing the namespace URI and prefix
1873	 */
1874	private static function splitXmlNamespace( $element ) {
1875		// 'http://www.w3.org/2000/svg:script' -> [ 'http://www.w3.org/2000/svg', 'script' ]
1876		$parts = explode( ':', strtolower( $element ) );
1877		$name = array_pop( $parts );
1878		$ns = implode( ':', $parts );
1879
1880		return [ $ns, $name ];
1881	}
1882
1883	/**
1884	 * @param string $name
1885	 * @return string
1886	 */
1887	private function stripXmlNamespace( $name ) {
1888		// 'http://www.w3.org/2000/svg:script' -> 'script'
1889		$parts = explode( ':', strtolower( $name ) );
1890
1891		return array_pop( $parts );
1892	}
1893
1894	/**
1895	 * Generic wrapper function for a virus scanner program.
1896	 * This relies on the $wgAntivirus and $wgAntivirusSetup variables.
1897	 * $wgAntivirusRequired may be used to deny upload if the scan fails.
1898	 *
1899	 * @param string $file Pathname to the temporary upload file
1900	 * @return bool|null|string False if not virus is found, null if the scan fails or is disabled,
1901	 *   or a string containing feedback from the virus scanner if a virus was found.
1902	 *   If textual feedback is missing but a virus was found, this function returns true.
1903	 */
1904	public static function detectVirus( $file ) {
1905		global $wgAntivirus, $wgAntivirusSetup, $wgAntivirusRequired, $wgOut;
1906
1907		if ( !$wgAntivirus ) {
1908			wfDebug( __METHOD__ . ": virus scanner disabled" );
1909
1910			return null;
1911		}
1912
1913		if ( !$wgAntivirusSetup[$wgAntivirus] ) {
1914			wfDebug( __METHOD__ . ": unknown virus scanner: $wgAntivirus" );
1915			$wgOut->wrapWikiMsg( "<div class=\"error\">\n$1\n</div>",
1916				[ 'virus-badscanner', $wgAntivirus ] );
1917
1918			return wfMessage( 'virus-unknownscanner' )->text() . " $wgAntivirus";
1919		}
1920
1921		# look up scanner configuration
1922		$command = $wgAntivirusSetup[$wgAntivirus]['command'];
1923		$exitCodeMap = $wgAntivirusSetup[$wgAntivirus]['codemap'];
1924		$msgPattern = $wgAntivirusSetup[$wgAntivirus]['messagepattern'] ?? null;
1925
1926		if ( strpos( $command, "%f" ) === false ) {
1927			# simple pattern: append file to scan
1928			$command .= " " . Shell::escape( $file );
1929		} else {
1930			# complex pattern: replace "%f" with file to scan
1931			$command = str_replace( "%f", Shell::escape( $file ), $command );
1932		}
1933
1934		wfDebug( __METHOD__ . ": running virus scan: $command " );
1935
1936		# execute virus scanner
1937		$exitCode = false;
1938
1939		# NOTE: there's a 50 line workaround to make stderr redirection work on windows, too.
1940		#      that does not seem to be worth the pain.
1941		#      Ask me (Duesentrieb) about it if it's ever needed.
1942		$output = wfShellExecWithStderr( $command, $exitCode );
1943
1944		# map exit code to AV_xxx constants.
1945		$mappedCode = $exitCode;
1946		if ( $exitCodeMap ) {
1947			if ( isset( $exitCodeMap[$exitCode] ) ) {
1948				$mappedCode = $exitCodeMap[$exitCode];
1949			} elseif ( isset( $exitCodeMap["*"] ) ) {
1950				$mappedCode = $exitCodeMap["*"];
1951			}
1952		}
1953
1954		/* NB: AV_NO_VIRUS is 0 but AV_SCAN_FAILED is false,
1955		 * so we need the strict equalities === and thus can't use a switch here
1956		 */
1957		if ( $mappedCode === AV_SCAN_FAILED ) {
1958			# scan failed (code was mapped to false by $exitCodeMap)
1959			wfDebug( __METHOD__ . ": failed to scan $file (code $exitCode)." );
1960
1961			$output = $wgAntivirusRequired
1962				? wfMessage( 'virus-scanfailed', [ $exitCode ] )->text()
1963				: null;
1964		} elseif ( $mappedCode === AV_SCAN_ABORTED ) {
1965			# scan failed because filetype is unknown (probably imune)
1966			wfDebug( __METHOD__ . ": unsupported file type $file (code $exitCode)." );
1967			$output = null;
1968		} elseif ( $mappedCode === AV_NO_VIRUS ) {
1969			# no virus found
1970			wfDebug( __METHOD__ . ": file passed virus scan." );
1971			$output = false;
1972		} else {
1973			$output = trim( $output );
1974
1975			if ( !$output ) {
1976				$output = true; # if there's no output, return true
1977			} elseif ( $msgPattern ) {
1978				$groups = [];
1979				if ( preg_match( $msgPattern, $output, $groups ) && $groups[1] ) {
1980					$output = $groups[1];
1981				}
1982			}
1983
1984			wfDebug( __METHOD__ . ": FOUND VIRUS! scanner feedback: $output" );
1985		}
1986
1987		return $output;
1988	}
1989
1990	/**
1991	 * Check if there's an overwrite conflict and, if so, if restrictions
1992	 * forbid this user from performing the upload.
1993	 *
1994	 * @param Authority $performer
1995	 *
1996	 * @return bool|array
1997	 */
1998	private function checkOverwrite( Authority $performer ) {
1999		// First check whether the local file can be overwritten
2000		$file = $this->getLocalFile();
2001		$file->load( File::READ_LATEST );
2002		if ( $file->exists() ) {
2003			if ( !self::userCanReUpload( $performer, $file ) ) {
2004				return [ 'fileexists-forbidden', $file->getName() ];
2005			} else {
2006				return true;
2007			}
2008		}
2009
2010		$services = MediaWikiServices::getInstance();
2011
2012		/* Check shared conflicts: if the local file does not exist, but
2013		 * RepoGroup::findFile finds a file, it exists in a shared repository.
2014		 */
2015		$file = $services->getRepoGroup()->findFile( $this->getTitle(), [ 'latest' => true ] );
2016		if ( $file && !$performer->isAllowed( 'reupload-shared' )
2017		) {
2018			return [ 'fileexists-shared-forbidden', $file->getName() ];
2019		}
2020
2021		return true;
2022	}
2023
2024	/**
2025	 * Check if a user is the last uploader
2026	 *
2027	 * @param Authority $performer
2028	 * @param File $img
2029	 * @return bool
2030	 */
2031	public static function userCanReUpload( Authority $performer, File $img ) {
2032		if ( $performer->isAllowed( 'reupload' ) ) {
2033			return true; // non-conditional
2034		} elseif ( !$performer->isAllowed( 'reupload-own' ) ) {
2035			return false;
2036		}
2037
2038		if ( !( $img instanceof LocalFile ) ) {
2039			return false;
2040		}
2041
2042		$img->load();
2043
2044		return $performer->getUser()->getId() == $img->getUser( 'id' );
2045	}
2046
2047	/**
2048	 * Helper function that does various existence checks for a file.
2049	 * The following checks are performed:
2050	 * - The file exists
2051	 * - Article with the same name as the file exists
2052	 * - File exists with normalized extension
2053	 * - The file looks like a thumbnail and the original exists
2054	 *
2055	 * @param File $file The File object to check
2056	 * @return array|bool False if the file does not exist, else an array
2057	 */
2058	public static function getExistsWarning( $file ) {
2059		if ( $file->exists() ) {
2060			return [ 'warning' => 'exists', 'file' => $file ];
2061		}
2062
2063		if ( $file->getTitle()->getArticleID() ) {
2064			return [ 'warning' => 'page-exists', 'file' => $file ];
2065		}
2066
2067		if ( strpos( $file->getName(), '.' ) == false ) {
2068			$partname = $file->getName();
2069			$extension = '';
2070		} else {
2071			$n = strrpos( $file->getName(), '.' );
2072			$extension = substr( $file->getName(), $n + 1 );
2073			$partname = substr( $file->getName(), 0, $n );
2074		}
2075		$normalizedExtension = File::normalizeExtension( $extension );
2076		$localRepo = MediaWikiServices::getInstance()->getRepoGroup()->getLocalRepo();
2077
2078		if ( $normalizedExtension != $extension ) {
2079			// We're not using the normalized form of the extension.
2080			// Normal form is lowercase, using most common of alternate
2081			// extensions (eg 'jpg' rather than 'JPEG').
2082
2083			// Check for another file using the normalized form...
2084			$nt_lc = Title::makeTitle( NS_FILE, "{$partname}.{$normalizedExtension}" );
2085			$file_lc = $localRepo->newFile( $nt_lc );
2086
2087			if ( $file_lc->exists() ) {
2088				return [
2089					'warning' => 'exists-normalized',
2090					'file' => $file,
2091					'normalizedFile' => $file_lc
2092				];
2093			}
2094		}
2095
2096		// Check for files with the same name but a different extension
2097		$similarFiles = $localRepo->findFilesByPrefix( "{$partname}.", 1 );
2098		if ( count( $similarFiles ) ) {
2099			return [
2100				'warning' => 'exists-normalized',
2101				'file' => $file,
2102				'normalizedFile' => $similarFiles[0],
2103			];
2104		}
2105
2106		if ( self::isThumbName( $file->getName() ) ) {
2107			# Check for filenames like 50px- or 180px-, these are mostly thumbnails
2108			$nt_thb = Title::newFromText(
2109				substr( $partname, strpos( $partname, '-' ) + 1 ) . '.' . $extension,
2110				NS_FILE
2111			);
2112			$file_thb = $localRepo->newFile( $nt_thb );
2113			if ( $file_thb->exists() ) {
2114				return [
2115					'warning' => 'thumb',
2116					'file' => $file,
2117					'thumbFile' => $file_thb
2118				];
2119			} else {
2120				// File does not exist, but we just don't like the name
2121				return [
2122					'warning' => 'thumb-name',
2123					'file' => $file,
2124					'thumbFile' => $file_thb
2125				];
2126			}
2127		}
2128
2129		foreach ( self::getFilenamePrefixBlacklist() as $prefix ) {
2130			if ( substr( $partname, 0, strlen( $prefix ) ) == $prefix ) {
2131				return [
2132					'warning' => 'bad-prefix',
2133					'file' => $file,
2134					'prefix' => $prefix
2135				];
2136			}
2137		}
2138
2139		return false;
2140	}
2141
2142	/**
2143	 * Helper function that checks whether the filename looks like a thumbnail
2144	 * @param string $filename
2145	 * @return bool
2146	 */
2147	public static function isThumbName( $filename ) {
2148		$n = strrpos( $filename, '.' );
2149		$partname = $n ? substr( $filename, 0, $n ) : $filename;
2150
2151		return (
2152				substr( $partname, 3, 3 ) == 'px-' ||
2153				substr( $partname, 2, 3 ) == 'px-'
2154			) &&
2155			preg_match( "/[0-9]{2}/", substr( $partname, 0, 2 ) );
2156	}
2157
2158	/**
2159	 * Get a list of blacklisted filename prefixes from [[MediaWiki:Filename-prefix-blacklist]]
2160	 *
2161	 * @return string[] List of prefixes
2162	 */
2163	public static function getFilenamePrefixBlacklist() {
2164		$list = [];
2165		$message = wfMessage( 'filename-prefix-blacklist' )->inContentLanguage();
2166		if ( !$message->isDisabled() ) {
2167			$lines = explode( "\n", $message->plain() );
2168			foreach ( $lines as $line ) {
2169				// Remove comment lines
2170				$comment = substr( trim( $line ), 0, 1 );
2171				if ( $comment == '#' || $comment == '' ) {
2172					continue;
2173				}
2174				// Remove additional comments after a prefix
2175				$comment = strpos( $line, '#' );
2176				if ( $comment > 0 ) {
2177					$line = substr( $line, 0, $comment - 1 );
2178				}
2179				$list[] = trim( $line );
2180			}
2181		}
2182
2183		return $list;
2184	}
2185
2186	/**
2187	 * Gets image info about the file just uploaded.
2188	 *
2189	 * Also has the effect of setting metadata to be an 'indexed tag name' in
2190	 * returned API result if 'metadata' was requested. Oddly, we have to pass
2191	 * the "result" object down just so it can do that with the appropriate
2192	 * format, presumably.
2193	 *
2194	 * @param ApiResult $result
2195	 * @return array Image info
2196	 */
2197	public function getImageInfo( $result ) {
2198		$localFile = $this->getLocalFile();
2199		$stashFile = $this->getStashFile();
2200		// Calling a different API module depending on whether the file was stashed is less than optimal.
2201		// In fact, calling API modules here at all is less than optimal. Maybe it should be refactored.
2202		if ( $stashFile ) {
2203			$imParam = ApiQueryStashImageInfo::getPropertyNames();
2204			$info = ApiQueryStashImageInfo::getInfo( $stashFile, array_flip( $imParam ), $result );
2205		} else {
2206			$imParam = ApiQueryImageInfo::getPropertyNames();
2207			$info = ApiQueryImageInfo::getInfo( $localFile, array_flip( $imParam ), $result );
2208		}
2209
2210		return $info;
2211	}
2212
2213	/**
2214	 * @param array $error
2215	 * @return Status
2216	 */
2217	public function convertVerifyErrorToStatus( $error ) {
2218		$code = $error['status'];
2219		unset( $code['status'] );
2220
2221		return Status::newFatal( $this->getVerificationErrorCode( $code ), $error );
2222	}
2223
2224	/**
2225	 * Get MediaWiki's maximum uploaded file size for given type of upload, based on
2226	 * $wgMaxUploadSize.
2227	 *
2228	 * @param null|string $forType
2229	 * @return int
2230	 */
2231	public static function getMaxUploadSize( $forType = null ) {
2232		global $wgMaxUploadSize;
2233
2234		if ( is_array( $wgMaxUploadSize ) ) {
2235			if ( $forType !== null && isset( $wgMaxUploadSize[$forType] ) ) {
2236				return $wgMaxUploadSize[$forType];
2237			} else {
2238				return $wgMaxUploadSize['*'];
2239			}
2240		} else {
2241			return intval( $wgMaxUploadSize );
2242		}
2243	}
2244
2245	/**
2246	 * Get the PHP maximum uploaded file size, based on ini settings. If there is no limit or the
2247	 * limit can't be guessed, returns a very large number (PHP_INT_MAX).
2248	 *
2249	 * @since 1.27
2250	 * @return int
2251	 */
2252	public static function getMaxPhpUploadSize() {
2253		$phpMaxFileSize = wfShorthandToInteger(
2254			ini_get( 'upload_max_filesize' ),
2255			PHP_INT_MAX
2256		);
2257		$phpMaxPostSize = wfShorthandToInteger(
2258			ini_get( 'post_max_size' ),
2259			PHP_INT_MAX
2260		) ?: PHP_INT_MAX;
2261		return min( $phpMaxFileSize, $phpMaxPostSize );
2262	}
2263
2264	/**
2265	 * Get the current status of a chunked upload (used for polling)
2266	 *
2267	 * The value will be read from cache.
2268	 *
2269	 * @param User $user
2270	 * @param string $statusKey
2271	 * @return Status[]|bool
2272	 */
2273	public static function getSessionStatus( User $user, $statusKey ) {
2274		$store = self::getUploadSessionStore();
2275		$key = self::getUploadSessionKey( $store, $user, $statusKey );
2276
2277		return $store->get( $key );
2278	}
2279
2280	/**
2281	 * Set the current status of a chunked upload (used for polling)
2282	 *
2283	 * The value will be set in cache for 1 day
2284	 *
2285	 * Avoid triggering this method on HTTP GET/HEAD requests
2286	 *
2287	 * @param User $user
2288	 * @param string $statusKey
2289	 * @param array|bool $value
2290	 * @return void
2291	 */
2292	public static function setSessionStatus( User $user, $statusKey, $value ) {
2293		$store = self::getUploadSessionStore();
2294		$key = self::getUploadSessionKey( $store, $user, $statusKey );
2295
2296		if ( $value === false ) {
2297			$store->delete( $key );
2298		} else {
2299			$store->set( $key, $value, $store::TTL_DAY );
2300		}
2301	}
2302
2303	/**
2304	 * @param BagOStuff $store
2305	 * @param UserIdentity $user
2306	 * @param string $statusKey
2307	 * @return string
2308	 */
2309	private static function getUploadSessionKey( BagOStuff $store, UserIdentity $user, $statusKey ) {
2310		return $store->makeKey(
2311			'uploadstatus',
2312			$user->getId() ?: md5( $user->getName() ),
2313			$statusKey
2314		);
2315	}
2316
2317	/**
2318	 * @return BagOStuff
2319	 */
2320	private static function getUploadSessionStore() {
2321		return ObjectCache::getInstance( 'db-replicated' );
2322	}
2323}
2324