1<?php
2/**
3 * Backend for uploading files from a HTTP resource.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup Upload
22 */
23
24use MediaWiki\MediaWikiServices;
25use MediaWiki\Permissions\Authority;
26
27/**
28 * Implements uploading from a HTTP resource.
29 *
30 * @ingroup Upload
31 * @author Bryan Tong Minh
32 * @author Michael Dale
33 */
34class UploadFromUrl extends UploadBase {
35	protected $mUrl;
36
37	protected $mTempPath, $mTmpHandle;
38
39	protected static $allowedUrls = [];
40
41	/**
42	 * Checks if the user is allowed to use the upload-by-URL feature. If the
43	 * user is not allowed, return the name of the user right as a string. If
44	 * the user is allowed, have the parent do further permissions checking.
45	 *
46	 * @param Authority $performer
47	 *
48	 * @return bool|string
49	 */
50	public static function isAllowed( Authority $performer ) {
51		if ( !$performer->isAllowed( 'upload_by_url' )
52		) {
53			return 'upload_by_url';
54		}
55
56		return parent::isAllowed( $performer );
57	}
58
59	/**
60	 * Checks if the upload from URL feature is enabled
61	 * @return bool
62	 */
63	public static function isEnabled() {
64		global $wgAllowCopyUploads;
65
66		return $wgAllowCopyUploads && parent::isEnabled();
67	}
68
69	/**
70	 * Checks whether the URL is for an allowed host
71	 * The domains in the allowlist can include wildcard characters (*) in place
72	 * of any of the domain levels, e.g. '*.flickr.com' or 'upload.*.gov.uk'.
73	 *
74	 * @param string $url
75	 * @return bool
76	 */
77	public static function isAllowedHost( $url ) {
78		global $wgCopyUploadsDomains;
79		if ( !count( $wgCopyUploadsDomains ) ) {
80			return true;
81		}
82		$parsedUrl = wfParseUrl( $url );
83		if ( !$parsedUrl ) {
84			return false;
85		}
86		$valid = false;
87		foreach ( $wgCopyUploadsDomains as $domain ) {
88			// See if the domain for the upload matches this allowed domain
89			$domainPieces = explode( '.', $domain );
90			$uploadDomainPieces = explode( '.', $parsedUrl['host'] );
91			if ( count( $domainPieces ) === count( $uploadDomainPieces ) ) {
92				$valid = true;
93				// See if all the pieces match or not (excluding wildcards)
94				foreach ( $domainPieces as $index => $piece ) {
95					if ( $piece !== '*' && $piece !== $uploadDomainPieces[$index] ) {
96						$valid = false;
97					}
98				}
99				if ( $valid ) {
100					// We found a match, so quit comparing against the list
101					break;
102				}
103			}
104			/* Non-wildcard test
105			if ( $parsedUrl['host'] === $domain ) {
106				$valid = true;
107				break;
108			}
109			*/
110		}
111
112		return $valid;
113	}
114
115	/**
116	 * Checks whether the URL is not allowed.
117	 *
118	 * @param string $url
119	 * @return bool
120	 */
121	public static function isAllowedUrl( $url ) {
122		if ( !isset( self::$allowedUrls[$url] ) ) {
123			$allowed = true;
124			Hooks::runner()->onIsUploadAllowedFromUrl( $url, $allowed );
125			self::$allowedUrls[$url] = $allowed;
126		}
127
128		return self::$allowedUrls[$url];
129	}
130
131	/**
132	 * Entry point for API upload
133	 *
134	 * @param string $name
135	 * @param string $url
136	 * @throws MWException
137	 */
138	public function initialize( $name, $url ) {
139		$this->mUrl = $url;
140
141		$tempPath = $this->makeTemporaryFile();
142		# File size and removeTempFile will be filled in later
143		$this->initializePathInfo( $name, $tempPath, 0, false );
144	}
145
146	/**
147	 * Entry point for SpecialUpload
148	 * @param WebRequest &$request
149	 */
150	public function initializeFromRequest( &$request ) {
151		$desiredDestName = $request->getText( 'wpDestFile' );
152		if ( !$desiredDestName ) {
153			$desiredDestName = $request->getText( 'wpUploadFileURL' );
154		}
155		$this->initialize(
156			$desiredDestName,
157			trim( $request->getVal( 'wpUploadFileURL' ) )
158		);
159	}
160
161	/**
162	 * @param WebRequest $request
163	 * @return bool
164	 */
165	public static function isValidRequest( $request ) {
166		$user = RequestContext::getMain()->getUser();
167
168		$url = $request->getVal( 'wpUploadFileURL' );
169
170		return !empty( $url )
171			&& MediaWikiServices::getInstance()
172				->getPermissionManager()
173				->userHasRight( $user, 'upload_by_url' );
174	}
175
176	/**
177	 * @return string
178	 */
179	public function getSourceType() {
180		return 'url';
181	}
182
183	/**
184	 * Download the file
185	 *
186	 * @param array $httpOptions Array of options for MWHttpRequest.
187	 *   This could be used to override the timeout on the http request.
188	 * @return Status
189	 */
190	public function fetchFile( $httpOptions = [] ) {
191		if ( !MWHttpRequest::isValidURI( $this->mUrl ) ) {
192			return Status::newFatal( 'http-invalid-url', $this->mUrl );
193		}
194
195		if ( !self::isAllowedHost( $this->mUrl ) ) {
196			return Status::newFatal( 'upload-copy-upload-invalid-domain' );
197		}
198		if ( !self::isAllowedUrl( $this->mUrl ) ) {
199			return Status::newFatal( 'upload-copy-upload-invalid-url' );
200		}
201		return $this->reallyFetchFile( $httpOptions );
202	}
203
204	/**
205	 * Create a new temporary file in the URL subdirectory of wfTempDir().
206	 *
207	 * @return string Path to the file
208	 */
209	protected function makeTemporaryFile() {
210		$tmpFile = MediaWikiServices::getInstance()->getTempFSFileFactory()
211			->newTempFSFile( 'URL', 'urlupload_' );
212		$tmpFile->bind( $this );
213
214		return $tmpFile->getPath();
215	}
216
217	/**
218	 * Callback: save a chunk of the result of a HTTP request to the temporary file
219	 *
220	 * @param mixed $req
221	 * @param string $buffer
222	 * @return int Number of bytes handled
223	 */
224	public function saveTempFileChunk( $req, $buffer ) {
225		wfDebugLog( 'fileupload', 'Received chunk of ' . strlen( $buffer ) . ' bytes' );
226		$nbytes = fwrite( $this->mTmpHandle, $buffer );
227
228		if ( $nbytes == strlen( $buffer ) ) {
229			$this->mFileSize += $nbytes;
230		} else {
231			// Well... that's not good!
232			wfDebugLog(
233				'fileupload',
234				'Short write ' . $nbytes . '/' . strlen( $buffer ) .
235					' bytes, aborting with ' . $this->mFileSize . ' uploaded so far'
236			);
237			fclose( $this->mTmpHandle );
238			$this->mTmpHandle = false;
239		}
240
241		return $nbytes;
242	}
243
244	/**
245	 * Download the file, save it to the temporary file and update the file
246	 * size and set $mRemoveTempFile to true.
247	 *
248	 * @param array $httpOptions Array of options for MWHttpRequest
249	 * @return Status
250	 */
251	protected function reallyFetchFile( $httpOptions = [] ) {
252		global $wgCopyUploadProxy, $wgCopyUploadTimeout;
253		if ( $this->mTempPath === false ) {
254			return Status::newFatal( 'tmp-create-error' );
255		}
256
257		// Note the temporary file should already be created by makeTemporaryFile()
258		$this->mTmpHandle = fopen( $this->mTempPath, 'wb' );
259		if ( !$this->mTmpHandle ) {
260			return Status::newFatal( 'tmp-create-error' );
261		}
262		wfDebugLog( 'fileupload', 'Temporary file created "' . $this->mTempPath . '"' );
263
264		$this->mRemoveTempFile = true;
265		$this->mFileSize = 0;
266
267		$options = $httpOptions + [ 'followRedirects' => false ];
268
269		if ( $wgCopyUploadProxy !== false ) {
270			$options['proxy'] = $wgCopyUploadProxy;
271		}
272
273		if ( $wgCopyUploadTimeout && !isset( $options['timeout'] ) ) {
274			$options['timeout'] = $wgCopyUploadTimeout;
275		}
276		wfDebugLog(
277			'fileupload',
278			'Starting download from "' . $this->mUrl . '" ' .
279				'<' . implode( ',', array_keys( array_filter( $options ) ) ) . '>'
280		);
281
282		// Manually follow any redirects up to the limit and reset the output file before each new request to prevent
283		// capturing the redirect response as part of the file.
284		$attemptsLeft = $options['maxRedirects'] ?? 5;
285		$targetUrl = $this->mUrl;
286		$requestFactory = MediaWikiServices::getInstance()->getHttpRequestFactory();
287		while ( $attemptsLeft > 0 ) {
288			$req = $requestFactory->create( $targetUrl, $options, __METHOD__ );
289			$req->setCallback( [ $this, 'saveTempFileChunk' ] );
290			$status = $req->execute();
291			if ( !$req->isRedirect() ) {
292				break;
293			}
294			$targetUrl = $req->getFinalUrl();
295			// Remove redirect response content from file.
296			ftruncate( $this->mTmpHandle, 0 );
297			rewind( $this->mTmpHandle );
298			$attemptsLeft--;
299		}
300
301		if ( $attemptsLeft == 0 ) {
302			return Status::newFatal( 'upload-too-many-redirects' );
303		}
304
305		if ( $this->mTmpHandle ) {
306			// File got written ok...
307			fclose( $this->mTmpHandle );
308			$this->mTmpHandle = null;
309		} else {
310			// We encountered a write error during the download...
311			return Status::newFatal( 'tmp-write-error' );
312		}
313
314		wfDebugLog( 'fileupload', $status );
315		if ( $status->isOK() ) {
316			wfDebugLog( 'fileupload', 'Download by URL completed successfully.' );
317		} else {
318			wfDebugLog(
319				'fileupload',
320				'Download by URL completed with HTTP status ' . $req->getStatus()
321			);
322		}
323
324		return $status;
325	}
326}
327