1<?php 2/** 3 * This program is free software; you can redistribute it and/or modify 4 * it under the terms of the GNU General Public License as published by 5 * the Free Software Foundation; either version 2 of the License, or 6 * (at your option) any later version. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * GNU General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License along 14 * with this program; if not, write to the Free Software Foundation, Inc., 15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 16 * http://www.gnu.org/copyleft/gpl.html 17 * 18 * @file 19 * @ingroup FileRepo 20 */ 21 22use MediaWiki\Logger\LoggerFactory; 23 24/** 25 * A foreign repository for a remote MediaWiki accessible through api.php requests. 26 * 27 * Example config: 28 * 29 * $wgForeignFileRepos[] = [ 30 * 'class' => ForeignAPIRepo::class, 31 * 'name' => 'shared', 32 * 'apibase' => 'https://en.wikipedia.org/w/api.php', 33 * 'fetchDescription' => true, // Optional 34 * 'descriptionCacheExpiry' => 3600, 35 * ]; 36 * 37 * @ingroup FileRepo 38 */ 39class ForeignAPIRepo extends FileRepo { 40 /* This version string is used in the user agent for requests and will help 41 * server maintainers in identify ForeignAPI usage. 42 * Update the version every time you make breaking or significant changes. */ 43 private const VERSION = "2.1"; 44 45 /** 46 * List of iiprop values for the thumbnail fetch queries. 47 */ 48 private const IMAGE_INFO_PROPS = [ 49 'url', 50 'timestamp', 51 ]; 52 53 protected $fileFactory = [ ForeignAPIFile::class, 'newFromTitle' ]; 54 /** @var int Check back with Commons after this expiry */ 55 protected $apiThumbCacheExpiry = 86400; // 1 day (24*3600) 56 57 /** @var int Redownload thumbnail files after this expiry */ 58 protected $fileCacheExpiry = 2592000; // 1 month (30*24*3600) 59 60 /** @var array */ 61 protected $mFileExists = []; 62 63 /** @var string */ 64 private $mApiBase; 65 66 /** 67 * @param array|null $info 68 */ 69 public function __construct( $info ) { 70 global $wgLocalFileRepo; 71 parent::__construct( $info ); 72 73 // https://commons.wikimedia.org/w/api.php 74 $this->mApiBase = $info['apibase'] ?? null; 75 76 if ( isset( $info['apiThumbCacheExpiry'] ) ) { 77 $this->apiThumbCacheExpiry = $info['apiThumbCacheExpiry']; 78 } 79 if ( isset( $info['fileCacheExpiry'] ) ) { 80 $this->fileCacheExpiry = $info['fileCacheExpiry']; 81 } 82 if ( !$this->scriptDirUrl ) { 83 // hack for description fetches 84 $this->scriptDirUrl = dirname( $this->mApiBase ); 85 } 86 // If we can cache thumbs we can guess sane defaults for these 87 if ( $this->canCacheThumbs() && !$this->url ) { 88 $this->url = $wgLocalFileRepo['url']; 89 } 90 if ( $this->canCacheThumbs() && !$this->thumbUrl ) { 91 $this->thumbUrl = $this->url . '/thumb'; 92 } 93 } 94 95 /** 96 * @return string 97 */ 98 private function getApiUrl() { 99 return $this->mApiBase; 100 } 101 102 /** 103 * Per docs in FileRepo, this needs to return false if we don't support versioned 104 * files. Well, we don't. 105 * 106 * @param Title $title 107 * @param string|bool $time 108 * @return File|false 109 */ 110 public function newFile( $title, $time = false ) { 111 if ( $time ) { 112 return false; 113 } 114 115 return parent::newFile( $title, $time ); 116 } 117 118 /** 119 * @param string[] $files 120 * @return array 121 */ 122 public function fileExistsBatch( array $files ) { 123 $results = []; 124 foreach ( $files as $k => $f ) { 125 if ( isset( $this->mFileExists[$f] ) ) { 126 $results[$k] = $this->mFileExists[$f]; 127 unset( $files[$k] ); 128 } elseif ( self::isVirtualUrl( $f ) ) { 129 # @todo FIXME: We need to be able to handle virtual 130 # URLs better, at least when we know they refer to the 131 # same repo. 132 $results[$k] = false; 133 unset( $files[$k] ); 134 } elseif ( FileBackend::isStoragePath( $f ) ) { 135 $results[$k] = false; 136 unset( $files[$k] ); 137 wfWarn( "Got mwstore:// path '$f'." ); 138 } 139 } 140 141 $data = $this->fetchImageQuery( [ 142 'titles' => implode( '|', $files ), 143 'prop' => 'imageinfo' ] 144 ); 145 146 if ( isset( $data['query']['pages'] ) ) { 147 # First, get results from the query. Note we only care whether the image exists, 148 # not whether it has a description page. 149 foreach ( $data['query']['pages'] as $p ) { 150 $this->mFileExists[$p['title']] = ( $p['imagerepository'] !== '' ); 151 } 152 # Second, copy the results to any redirects that were queried 153 if ( isset( $data['query']['redirects'] ) ) { 154 foreach ( $data['query']['redirects'] as $r ) { 155 $this->mFileExists[$r['from']] = $this->mFileExists[$r['to']]; 156 } 157 } 158 # Third, copy the results to any non-normalized titles that were queried 159 if ( isset( $data['query']['normalized'] ) ) { 160 foreach ( $data['query']['normalized'] as $n ) { 161 $this->mFileExists[$n['from']] = $this->mFileExists[$n['to']]; 162 } 163 } 164 # Finally, copy the results to the output 165 foreach ( $files as $key => $file ) { 166 $results[$key] = $this->mFileExists[$file]; 167 } 168 } 169 170 return $results; 171 } 172 173 /** 174 * @param string $virtualUrl 175 * @return array 176 */ 177 public function getFileProps( $virtualUrl ) { 178 return []; 179 } 180 181 /** 182 * @param array $query 183 * @return array|null 184 */ 185 public function fetchImageQuery( $query ) { 186 global $wgLanguageCode; 187 188 $query = array_merge( $query, 189 [ 190 'format' => 'json', 191 'action' => 'query', 192 'redirects' => 'true' 193 ] ); 194 195 if ( !isset( $query['uselang'] ) ) { // uselang is unset or null 196 $query['uselang'] = $wgLanguageCode; 197 } 198 199 $data = $this->httpGetCached( 'Metadata', $query ); 200 201 if ( $data ) { 202 return FormatJson::decode( $data, true ); 203 } else { 204 return null; 205 } 206 } 207 208 /** 209 * @param array $data 210 * @return bool|array 211 */ 212 public function getImageInfo( $data ) { 213 if ( $data && isset( $data['query']['pages'] ) ) { 214 foreach ( $data['query']['pages'] as $info ) { 215 if ( isset( $info['imageinfo'][0] ) ) { 216 $return = $info['imageinfo'][0]; 217 if ( isset( $info['pageid'] ) ) { 218 $return['pageid'] = $info['pageid']; 219 } 220 return $return; 221 } 222 } 223 } 224 225 return false; 226 } 227 228 /** 229 * @param string $hash 230 * @return ForeignAPIFile[] 231 */ 232 public function findBySha1( $hash ) { 233 $results = $this->fetchImageQuery( [ 234 'aisha1base36' => $hash, 235 'aiprop' => ForeignAPIFile::getProps(), 236 'list' => 'allimages', 237 ] ); 238 $ret = []; 239 if ( isset( $results['query']['allimages'] ) ) { 240 foreach ( $results['query']['allimages'] as $img ) { 241 // 1.14 was broken, doesn't return name attribute 242 if ( !isset( $img['name'] ) ) { 243 continue; 244 } 245 $ret[] = new ForeignAPIFile( Title::makeTitle( NS_FILE, $img['name'] ), $this, $img ); 246 } 247 } 248 249 return $ret; 250 } 251 252 /** 253 * @param string $name 254 * @param int $width 255 * @param int $height 256 * @param array|null &$result Output-only parameter, guaranteed to become an array 257 * @param string $otherParams 258 * 259 * @return string|false 260 */ 261 private function getThumbUrl( 262 $name, $width = -1, $height = -1, &$result = null, $otherParams = '' 263 ) { 264 $data = $this->fetchImageQuery( [ 265 'titles' => 'File:' . $name, 266 'iiprop' => self::getIIProps(), 267 'iiurlwidth' => $width, 268 'iiurlheight' => $height, 269 'iiurlparam' => $otherParams, 270 'prop' => 'imageinfo' ] ); 271 $info = $this->getImageInfo( $data ); 272 273 if ( $data && $info && isset( $info['thumburl'] ) ) { 274 wfDebug( __METHOD__ . " got remote thumb " . $info['thumburl'] ); 275 $result = $info; 276 277 return $info['thumburl']; 278 } else { 279 return false; 280 } 281 } 282 283 /** 284 * @param string $name 285 * @param int $width 286 * @param int $height 287 * @param string $otherParams 288 * @param string|null $lang Language code for language of error 289 * @return bool|MediaTransformError 290 * @since 1.22 291 */ 292 public function getThumbError( 293 $name, $width = -1, $height = -1, $otherParams = '', $lang = null 294 ) { 295 $data = $this->fetchImageQuery( [ 296 'titles' => 'File:' . $name, 297 'iiprop' => self::getIIProps(), 298 'iiurlwidth' => $width, 299 'iiurlheight' => $height, 300 'iiurlparam' => $otherParams, 301 'prop' => 'imageinfo', 302 'uselang' => $lang, 303 ] ); 304 $info = $this->getImageInfo( $data ); 305 306 if ( $data && $info && isset( $info['thumberror'] ) ) { 307 wfDebug( __METHOD__ . " got remote thumb error " . $info['thumberror'] ); 308 309 return new MediaTransformError( 310 'thumbnail_error_remote', 311 $width, 312 $height, 313 $this->getDisplayName(), 314 $info['thumberror'] // already parsed message from foreign repo 315 ); 316 } else { 317 return false; 318 } 319 } 320 321 /** 322 * Return the imageurl from cache if possible 323 * 324 * If the url has been requested today, get it from cache 325 * Otherwise retrieve remote thumb url, check for local file. 326 * 327 * @param string $name Is a dbkey form of a title 328 * @param int $width 329 * @param int $height 330 * @param string $params Other rendering parameters (page number, etc) 331 * from handler's makeParamString. 332 * @return bool|string 333 */ 334 public function getThumbUrlFromCache( $name, $width, $height, $params = "" ) { 335 // We can't check the local cache using FileRepo functions because 336 // we override fileExistsBatch(). We have to use the FileBackend directly. 337 $backend = $this->getBackend(); // convenience 338 339 if ( !$this->canCacheThumbs() ) { 340 $result = null; // can't pass "null" by reference, but it's ok as default value 341 342 return $this->getThumbUrl( $name, $width, $height, $result, $params ); 343 } 344 345 $key = $this->getLocalCacheKey( 'file-thumb-url', sha1( $name ) ); 346 $sizekey = "$width:$height:$params"; 347 348 /* Get the array of urls that we already know */ 349 $knownThumbUrls = $this->wanCache->get( $key ); 350 if ( !$knownThumbUrls ) { 351 /* No knownThumbUrls for this file */ 352 $knownThumbUrls = []; 353 } elseif ( isset( $knownThumbUrls[$sizekey] ) ) { 354 wfDebug( __METHOD__ . ': Got thumburl from local cache: ' . 355 "{$knownThumbUrls[$sizekey]}" ); 356 357 return $knownThumbUrls[$sizekey]; 358 } 359 360 $metadata = null; 361 $foreignUrl = $this->getThumbUrl( $name, $width, $height, $metadata, $params ); 362 363 if ( !$foreignUrl ) { 364 wfDebug( __METHOD__ . " Could not find thumburl" ); 365 366 return false; 367 } 368 369 // We need the same filename as the remote one :) 370 $fileName = rawurldecode( pathinfo( $foreignUrl, PATHINFO_BASENAME ) ); 371 if ( !$this->validateFilename( $fileName ) ) { 372 wfDebug( __METHOD__ . " The deduced filename $fileName is not safe" ); 373 374 return false; 375 } 376 $localPath = $this->getZonePath( 'thumb' ) . "/" . $this->getHashPath( $name ) . $name; 377 $localFilename = $localPath . "/" . $fileName; 378 $localUrl = $this->getZoneUrl( 'thumb' ) . "/" . $this->getHashPath( $name ) . 379 rawurlencode( $name ) . "/" . rawurlencode( $fileName ); 380 381 if ( $backend->fileExists( [ 'src' => $localFilename ] ) 382 && isset( $metadata['timestamp'] ) 383 ) { 384 wfDebug( __METHOD__ . " Thumbnail was already downloaded before" ); 385 $modified = $backend->getFileTimestamp( [ 'src' => $localFilename ] ); 386 $remoteModified = strtotime( $metadata['timestamp'] ); 387 $current = time(); 388 $diff = abs( $modified - $current ); 389 if ( $remoteModified < $modified && $diff < $this->fileCacheExpiry ) { 390 /* Use our current and already downloaded thumbnail */ 391 $knownThumbUrls[$sizekey] = $localUrl; 392 $this->wanCache->set( $key, $knownThumbUrls, $this->apiThumbCacheExpiry ); 393 394 return $localUrl; 395 } 396 /* There is a new Commons file, or existing thumbnail older than a month */ 397 } 398 399 $thumb = self::httpGet( $foreignUrl, 'default', [], $mtime ); 400 if ( !$thumb ) { 401 wfDebug( __METHOD__ . " Could not download thumb" ); 402 403 return false; 404 } 405 406 # @todo FIXME: Delete old thumbs that aren't being used. Maintenance script? 407 $backend->prepare( [ 'dir' => dirname( $localFilename ) ] ); 408 $params = [ 'dst' => $localFilename, 'content' => $thumb ]; 409 if ( !$backend->quickCreate( $params )->isOK() ) { 410 wfDebug( __METHOD__ . " could not write to thumb path '$localFilename'" ); 411 412 return $foreignUrl; 413 } 414 $knownThumbUrls[$sizekey] = $localUrl; 415 416 $ttl = $mtime 417 ? $this->wanCache->adaptiveTTL( $mtime, $this->apiThumbCacheExpiry ) 418 : $this->apiThumbCacheExpiry; 419 $this->wanCache->set( $key, $knownThumbUrls, $ttl ); 420 wfDebug( __METHOD__ . " got local thumb $localUrl, saving to cache" ); 421 422 return $localUrl; 423 } 424 425 /** 426 * @see FileRepo::getZoneUrl() 427 * @param string $zone 428 * @param string|null $ext Optional file extension 429 * @return string 430 */ 431 public function getZoneUrl( $zone, $ext = null ) { 432 switch ( $zone ) { 433 case 'public': 434 return $this->url; 435 case 'thumb': 436 return $this->thumbUrl; 437 default: 438 return parent::getZoneUrl( $zone, $ext ); 439 } 440 } 441 442 /** 443 * Get the local directory corresponding to one of the basic zones 444 * @param string $zone 445 * @return bool|null|string 446 */ 447 public function getZonePath( $zone ) { 448 $supported = [ 'public', 'thumb' ]; 449 if ( in_array( $zone, $supported ) ) { 450 return parent::getZonePath( $zone ); 451 } 452 453 return false; 454 } 455 456 /** 457 * Are we locally caching the thumbnails? 458 * @return bool 459 */ 460 public function canCacheThumbs() { 461 return ( $this->apiThumbCacheExpiry > 0 ); 462 } 463 464 /** 465 * The user agent the ForeignAPIRepo will use. 466 * @return string 467 */ 468 public static function getUserAgent() { 469 return Http::userAgent() . " ForeignAPIRepo/" . self::VERSION; 470 } 471 472 /** 473 * Get information about the repo - overrides/extends the parent 474 * class's information. 475 * @return array 476 * @since 1.22 477 */ 478 public function getInfo() { 479 $info = parent::getInfo(); 480 $info['apiurl'] = $this->getApiUrl(); 481 482 $query = [ 483 'format' => 'json', 484 'action' => 'query', 485 'meta' => 'siteinfo', 486 'siprop' => 'general', 487 ]; 488 489 $data = $this->httpGetCached( 'SiteInfo', $query, 7200 ); 490 491 if ( $data ) { 492 $siteInfo = FormatJson::decode( $data, true ); 493 $general = $siteInfo['query']['general']; 494 495 $info['articlepath'] = $general['articlepath']; 496 $info['server'] = $general['server']; 497 498 if ( isset( $general['favicon'] ) ) { 499 $info['favicon'] = $general['favicon']; 500 } 501 } 502 503 return $info; 504 } 505 506 /** 507 * Like a HttpRequestFactory::get request, but with custom User-Agent. 508 * @see HttpRequestFactory::get 509 * @todo Can this use HttpRequestFactory::get() but just pass the 'userAgent' option? 510 * @param string $url 511 * @param string $timeout 512 * @param array $options 513 * @param int|bool &$mtime Resulting Last-Modified UNIX timestamp if received 514 * @return bool|string 515 */ 516 public static function httpGet( 517 $url, $timeout = 'default', $options = [], &$mtime = false 518 ) { 519 $options['timeout'] = $timeout; 520 /* Http::get */ 521 $url = wfExpandUrl( $url, PROTO_HTTP ); 522 wfDebug( "ForeignAPIRepo: HTTP GET: $url" ); 523 $options['method'] = "GET"; 524 525 if ( !isset( $options['timeout'] ) ) { 526 $options['timeout'] = 'default'; 527 } 528 529 $req = MWHttpRequest::factory( $url, $options, __METHOD__ ); 530 $req->setUserAgent( self::getUserAgent() ); 531 $status = $req->execute(); 532 533 if ( $status->isOK() ) { 534 $lmod = $req->getResponseHeader( 'Last-Modified' ); 535 $mtime = $lmod ? wfTimestamp( TS_UNIX, $lmod ) : false; 536 537 return $req->getContent(); 538 } else { 539 $logger = LoggerFactory::getInstance( 'http' ); 540 $logger->warning( 541 $status->getWikiText( false, false, 'en' ), 542 [ 'caller' => 'ForeignAPIRepo::httpGet' ] 543 ); 544 545 return false; 546 } 547 } 548 549 /** 550 * @return string 551 * @since 1.23 552 */ 553 protected static function getIIProps() { 554 return implode( '|', self::IMAGE_INFO_PROPS ); 555 } 556 557 /** 558 * HTTP GET request to a mediawiki API (with caching) 559 * @param string $attribute Used in cache key creation, mostly 560 * @param array $query The query parameters for the API request 561 * @param int $cacheTTL Time to live for the memcached caching 562 * @return string|null 563 */ 564 public function httpGetCached( $attribute, $query, $cacheTTL = 3600 ) { 565 if ( $this->mApiBase ) { 566 $url = wfAppendQuery( $this->mApiBase, $query ); 567 } else { 568 $url = $this->makeUrl( $query, 'api' ); 569 } 570 571 return $this->wanCache->getWithSetCallback( 572 $this->getLocalCacheKey( $attribute, sha1( $url ) ), 573 $cacheTTL, 574 function ( $curValue, &$ttl ) use ( $url ) { 575 $html = self::httpGet( $url, 'default', [], $mtime ); 576 if ( $html !== false ) { 577 $ttl = $mtime ? $this->wanCache->adaptiveTTL( $mtime, $ttl ) : $ttl; 578 } else { 579 $ttl = $this->wanCache->adaptiveTTL( $mtime, $ttl ); 580 $html = null; // caches negatives 581 } 582 583 return $html; 584 }, 585 [ 'pcGroup' => 'http-get:3', 'pcTTL' => WANObjectCache::TTL_PROC_LONG ] 586 ); 587 } 588 589 /** 590 * @param callable $callback 591 * @throws MWException 592 */ 593 public function enumFiles( $callback ) { 594 throw new MWException( 'enumFiles is not supported by ' . static::class ); 595 } 596 597 /** 598 * @throws MWException 599 */ 600 protected function assertWritableRepo() { 601 throw new MWException( static::class . ': write operations are not supported.' ); 602 } 603} 604