1<?php 2/** 3 * Backend for uploading files from a HTTP resource. 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License along 16 * with this program; if not, write to the Free Software Foundation, Inc., 17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 18 * http://www.gnu.org/copyleft/gpl.html 19 * 20 * @file 21 * @ingroup Upload 22 */ 23 24use MediaWiki\MediaWikiServices; 25use MediaWiki\Permissions\Authority; 26 27/** 28 * Implements uploading from a HTTP resource. 29 * 30 * @ingroup Upload 31 * @author Bryan Tong Minh 32 * @author Michael Dale 33 */ 34class UploadFromUrl extends UploadBase { 35 protected $mUrl; 36 37 protected $mTempPath, $mTmpHandle; 38 39 protected static $allowedUrls = []; 40 41 /** 42 * Checks if the user is allowed to use the upload-by-URL feature. If the 43 * user is not allowed, return the name of the user right as a string. If 44 * the user is allowed, have the parent do further permissions checking. 45 * 46 * @param Authority $performer 47 * 48 * @return bool|string 49 */ 50 public static function isAllowed( Authority $performer ) { 51 if ( !$performer->isAllowed( 'upload_by_url' ) 52 ) { 53 return 'upload_by_url'; 54 } 55 56 return parent::isAllowed( $performer ); 57 } 58 59 /** 60 * Checks if the upload from URL feature is enabled 61 * @return bool 62 */ 63 public static function isEnabled() { 64 global $wgAllowCopyUploads; 65 66 return $wgAllowCopyUploads && parent::isEnabled(); 67 } 68 69 /** 70 * Checks whether the URL is for an allowed host 71 * The domains in the allowlist can include wildcard characters (*) in place 72 * of any of the domain levels, e.g. '*.flickr.com' or 'upload.*.gov.uk'. 73 * 74 * @param string $url 75 * @return bool 76 */ 77 public static function isAllowedHost( $url ) { 78 global $wgCopyUploadsDomains; 79 if ( !count( $wgCopyUploadsDomains ) ) { 80 return true; 81 } 82 $parsedUrl = wfParseUrl( $url ); 83 if ( !$parsedUrl ) { 84 return false; 85 } 86 $valid = false; 87 foreach ( $wgCopyUploadsDomains as $domain ) { 88 // See if the domain for the upload matches this allowed domain 89 $domainPieces = explode( '.', $domain ); 90 $uploadDomainPieces = explode( '.', $parsedUrl['host'] ); 91 if ( count( $domainPieces ) === count( $uploadDomainPieces ) ) { 92 $valid = true; 93 // See if all the pieces match or not (excluding wildcards) 94 foreach ( $domainPieces as $index => $piece ) { 95 if ( $piece !== '*' && $piece !== $uploadDomainPieces[$index] ) { 96 $valid = false; 97 } 98 } 99 if ( $valid ) { 100 // We found a match, so quit comparing against the list 101 break; 102 } 103 } 104 /* Non-wildcard test 105 if ( $parsedUrl['host'] === $domain ) { 106 $valid = true; 107 break; 108 } 109 */ 110 } 111 112 return $valid; 113 } 114 115 /** 116 * Checks whether the URL is not allowed. 117 * 118 * @param string $url 119 * @return bool 120 */ 121 public static function isAllowedUrl( $url ) { 122 if ( !isset( self::$allowedUrls[$url] ) ) { 123 $allowed = true; 124 Hooks::runner()->onIsUploadAllowedFromUrl( $url, $allowed ); 125 self::$allowedUrls[$url] = $allowed; 126 } 127 128 return self::$allowedUrls[$url]; 129 } 130 131 /** 132 * Entry point for API upload 133 * 134 * @param string $name 135 * @param string $url 136 * @throws MWException 137 */ 138 public function initialize( $name, $url ) { 139 $this->mUrl = $url; 140 141 $tempPath = $this->makeTemporaryFile(); 142 # File size and removeTempFile will be filled in later 143 $this->initializePathInfo( $name, $tempPath, 0, false ); 144 } 145 146 /** 147 * Entry point for SpecialUpload 148 * @param WebRequest &$request 149 */ 150 public function initializeFromRequest( &$request ) { 151 $desiredDestName = $request->getText( 'wpDestFile' ); 152 if ( !$desiredDestName ) { 153 $desiredDestName = $request->getText( 'wpUploadFileURL' ); 154 } 155 $this->initialize( 156 $desiredDestName, 157 trim( $request->getVal( 'wpUploadFileURL' ) ) 158 ); 159 } 160 161 /** 162 * @param WebRequest $request 163 * @return bool 164 */ 165 public static function isValidRequest( $request ) { 166 $user = RequestContext::getMain()->getUser(); 167 168 $url = $request->getVal( 'wpUploadFileURL' ); 169 170 return !empty( $url ) 171 && MediaWikiServices::getInstance() 172 ->getPermissionManager() 173 ->userHasRight( $user, 'upload_by_url' ); 174 } 175 176 /** 177 * @return string 178 */ 179 public function getSourceType() { 180 return 'url'; 181 } 182 183 /** 184 * Download the file 185 * 186 * @param array $httpOptions Array of options for MWHttpRequest. 187 * This could be used to override the timeout on the http request. 188 * @return Status 189 */ 190 public function fetchFile( $httpOptions = [] ) { 191 if ( !MWHttpRequest::isValidURI( $this->mUrl ) ) { 192 return Status::newFatal( 'http-invalid-url', $this->mUrl ); 193 } 194 195 if ( !self::isAllowedHost( $this->mUrl ) ) { 196 return Status::newFatal( 'upload-copy-upload-invalid-domain' ); 197 } 198 if ( !self::isAllowedUrl( $this->mUrl ) ) { 199 return Status::newFatal( 'upload-copy-upload-invalid-url' ); 200 } 201 return $this->reallyFetchFile( $httpOptions ); 202 } 203 204 /** 205 * Create a new temporary file in the URL subdirectory of wfTempDir(). 206 * 207 * @return string Path to the file 208 */ 209 protected function makeTemporaryFile() { 210 $tmpFile = MediaWikiServices::getInstance()->getTempFSFileFactory() 211 ->newTempFSFile( 'URL', 'urlupload_' ); 212 $tmpFile->bind( $this ); 213 214 return $tmpFile->getPath(); 215 } 216 217 /** 218 * Callback: save a chunk of the result of a HTTP request to the temporary file 219 * 220 * @param mixed $req 221 * @param string $buffer 222 * @return int Number of bytes handled 223 */ 224 public function saveTempFileChunk( $req, $buffer ) { 225 wfDebugLog( 'fileupload', 'Received chunk of ' . strlen( $buffer ) . ' bytes' ); 226 $nbytes = fwrite( $this->mTmpHandle, $buffer ); 227 228 if ( $nbytes == strlen( $buffer ) ) { 229 $this->mFileSize += $nbytes; 230 } else { 231 // Well... that's not good! 232 wfDebugLog( 233 'fileupload', 234 'Short write ' . $nbytes . '/' . strlen( $buffer ) . 235 ' bytes, aborting with ' . $this->mFileSize . ' uploaded so far' 236 ); 237 fclose( $this->mTmpHandle ); 238 $this->mTmpHandle = false; 239 } 240 241 return $nbytes; 242 } 243 244 /** 245 * Download the file, save it to the temporary file and update the file 246 * size and set $mRemoveTempFile to true. 247 * 248 * @param array $httpOptions Array of options for MWHttpRequest 249 * @return Status 250 */ 251 protected function reallyFetchFile( $httpOptions = [] ) { 252 global $wgCopyUploadProxy, $wgCopyUploadTimeout; 253 if ( $this->mTempPath === false ) { 254 return Status::newFatal( 'tmp-create-error' ); 255 } 256 257 // Note the temporary file should already be created by makeTemporaryFile() 258 $this->mTmpHandle = fopen( $this->mTempPath, 'wb' ); 259 if ( !$this->mTmpHandle ) { 260 return Status::newFatal( 'tmp-create-error' ); 261 } 262 wfDebugLog( 'fileupload', 'Temporary file created "' . $this->mTempPath . '"' ); 263 264 $this->mRemoveTempFile = true; 265 $this->mFileSize = 0; 266 267 $options = $httpOptions + [ 'followRedirects' => false ]; 268 269 if ( $wgCopyUploadProxy !== false ) { 270 $options['proxy'] = $wgCopyUploadProxy; 271 } 272 273 if ( $wgCopyUploadTimeout && !isset( $options['timeout'] ) ) { 274 $options['timeout'] = $wgCopyUploadTimeout; 275 } 276 wfDebugLog( 277 'fileupload', 278 'Starting download from "' . $this->mUrl . '" ' . 279 '<' . implode( ',', array_keys( array_filter( $options ) ) ) . '>' 280 ); 281 282 // Manually follow any redirects up to the limit and reset the output file before each new request to prevent 283 // capturing the redirect response as part of the file. 284 $attemptsLeft = $options['maxRedirects'] ?? 5; 285 $targetUrl = $this->mUrl; 286 $requestFactory = MediaWikiServices::getInstance()->getHttpRequestFactory(); 287 while ( $attemptsLeft > 0 ) { 288 $req = $requestFactory->create( $targetUrl, $options, __METHOD__ ); 289 $req->setCallback( [ $this, 'saveTempFileChunk' ] ); 290 $status = $req->execute(); 291 if ( !$req->isRedirect() ) { 292 break; 293 } 294 $targetUrl = $req->getFinalUrl(); 295 // Remove redirect response content from file. 296 ftruncate( $this->mTmpHandle, 0 ); 297 rewind( $this->mTmpHandle ); 298 $attemptsLeft--; 299 } 300 301 if ( $attemptsLeft == 0 ) { 302 return Status::newFatal( 'upload-too-many-redirects' ); 303 } 304 305 if ( $this->mTmpHandle ) { 306 // File got written ok... 307 fclose( $this->mTmpHandle ); 308 $this->mTmpHandle = null; 309 } else { 310 // We encountered a write error during the download... 311 return Status::newFatal( 'tmp-write-error' ); 312 } 313 314 wfDebugLog( 'fileupload', $status ); 315 if ( $status->isOK() ) { 316 wfDebugLog( 'fileupload', 'Download by URL completed successfully.' ); 317 } else { 318 wfDebugLog( 319 'fileupload', 320 'Download by URL completed with HTTP status ' . $req->getStatus() 321 ); 322 } 323 324 return $status; 325 } 326} 327