1<?php
2/**
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License along
14 * with this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 * http://www.gnu.org/copyleft/gpl.html
17 *
18 * @file
19 * @ingroup FileRepo
20 */
21
22use MediaWiki\Logger\LoggerFactory;
23
24/**
25 * A foreign repository for a remote MediaWiki accessible through api.php requests.
26 *
27 * Example config:
28 *
29 * $wgForeignFileRepos[] = [
30 *   'class'                  => ForeignAPIRepo::class,
31 *   'name'                   => 'shared',
32 *   'apibase'                => 'https://en.wikipedia.org/w/api.php',
33 *   'fetchDescription'       => true, // Optional
34 *   'descriptionCacheExpiry' => 3600,
35 * ];
36 *
37 * @ingroup FileRepo
38 */
39class ForeignAPIRepo extends FileRepo {
40	/* This version string is used in the user agent for requests and will help
41	 * server maintainers in identify ForeignAPI usage.
42	 * Update the version every time you make breaking or significant changes. */
43	private const VERSION = "2.1";
44
45	/**
46	 * List of iiprop values for the thumbnail fetch queries.
47	 */
48	private const IMAGE_INFO_PROPS = [
49		'url',
50		'timestamp',
51	];
52
53	protected $fileFactory = [ ForeignAPIFile::class, 'newFromTitle' ];
54	/** @var int Check back with Commons after this expiry */
55	protected $apiThumbCacheExpiry = 86400; // 1 day (24*3600)
56
57	/** @var int Redownload thumbnail files after this expiry */
58	protected $fileCacheExpiry = 2592000; // 1 month (30*24*3600)
59
60	/** @var array */
61	protected $mFileExists = [];
62
63	/** @var string */
64	private $mApiBase;
65
66	/**
67	 * @param array|null $info
68	 */
69	public function __construct( $info ) {
70		global $wgLocalFileRepo;
71		parent::__construct( $info );
72
73		// https://commons.wikimedia.org/w/api.php
74		$this->mApiBase = $info['apibase'] ?? null;
75
76		if ( isset( $info['apiThumbCacheExpiry'] ) ) {
77			$this->apiThumbCacheExpiry = $info['apiThumbCacheExpiry'];
78		}
79		if ( isset( $info['fileCacheExpiry'] ) ) {
80			$this->fileCacheExpiry = $info['fileCacheExpiry'];
81		}
82		if ( !$this->scriptDirUrl ) {
83			// hack for description fetches
84			$this->scriptDirUrl = dirname( $this->mApiBase );
85		}
86		// If we can cache thumbs we can guess sane defaults for these
87		if ( $this->canCacheThumbs() && !$this->url ) {
88			$this->url = $wgLocalFileRepo['url'];
89		}
90		if ( $this->canCacheThumbs() && !$this->thumbUrl ) {
91			$this->thumbUrl = $this->url . '/thumb';
92		}
93	}
94
95	/**
96	 * @return string
97	 */
98	private function getApiUrl() {
99		return $this->mApiBase;
100	}
101
102	/**
103	 * Per docs in FileRepo, this needs to return false if we don't support versioned
104	 * files. Well, we don't.
105	 *
106	 * @param Title $title
107	 * @param string|bool $time
108	 * @return File|false
109	 */
110	public function newFile( $title, $time = false ) {
111		if ( $time ) {
112			return false;
113		}
114
115		return parent::newFile( $title, $time );
116	}
117
118	/**
119	 * @param string[] $files
120	 * @return array
121	 */
122	public function fileExistsBatch( array $files ) {
123		$results = [];
124		foreach ( $files as $k => $f ) {
125			if ( isset( $this->mFileExists[$f] ) ) {
126				$results[$k] = $this->mFileExists[$f];
127				unset( $files[$k] );
128			} elseif ( self::isVirtualUrl( $f ) ) {
129				# @todo FIXME: We need to be able to handle virtual
130				# URLs better, at least when we know they refer to the
131				# same repo.
132				$results[$k] = false;
133				unset( $files[$k] );
134			} elseif ( FileBackend::isStoragePath( $f ) ) {
135				$results[$k] = false;
136				unset( $files[$k] );
137				wfWarn( "Got mwstore:// path '$f'." );
138			}
139		}
140
141		$data = $this->fetchImageQuery( [
142			'titles' => implode( '|', $files ),
143			'prop' => 'imageinfo' ]
144		);
145
146		if ( isset( $data['query']['pages'] ) ) {
147			# First, get results from the query. Note we only care whether the image exists,
148			# not whether it has a description page.
149			foreach ( $data['query']['pages'] as $p ) {
150				$this->mFileExists[$p['title']] = ( $p['imagerepository'] !== '' );
151			}
152			# Second, copy the results to any redirects that were queried
153			if ( isset( $data['query']['redirects'] ) ) {
154				foreach ( $data['query']['redirects'] as $r ) {
155					$this->mFileExists[$r['from']] = $this->mFileExists[$r['to']];
156				}
157			}
158			# Third, copy the results to any non-normalized titles that were queried
159			if ( isset( $data['query']['normalized'] ) ) {
160				foreach ( $data['query']['normalized'] as $n ) {
161					$this->mFileExists[$n['from']] = $this->mFileExists[$n['to']];
162				}
163			}
164			# Finally, copy the results to the output
165			foreach ( $files as $key => $file ) {
166				$results[$key] = $this->mFileExists[$file];
167			}
168		}
169
170		return $results;
171	}
172
173	/**
174	 * @param string $virtualUrl
175	 * @return array
176	 */
177	public function getFileProps( $virtualUrl ) {
178		return [];
179	}
180
181	/**
182	 * @param array $query
183	 * @return array|null
184	 */
185	public function fetchImageQuery( $query ) {
186		global $wgLanguageCode;
187
188		$query = array_merge( $query,
189			[
190				'format' => 'json',
191				'action' => 'query',
192				'redirects' => 'true'
193			] );
194
195		if ( !isset( $query['uselang'] ) ) { // uselang is unset or null
196			$query['uselang'] = $wgLanguageCode;
197		}
198
199		$data = $this->httpGetCached( 'Metadata', $query );
200
201		if ( $data ) {
202			return FormatJson::decode( $data, true );
203		} else {
204			return null;
205		}
206	}
207
208	/**
209	 * @param array $data
210	 * @return bool|array
211	 */
212	public function getImageInfo( $data ) {
213		if ( $data && isset( $data['query']['pages'] ) ) {
214			foreach ( $data['query']['pages'] as $info ) {
215				if ( isset( $info['imageinfo'][0] ) ) {
216					$return = $info['imageinfo'][0];
217					if ( isset( $info['pageid'] ) ) {
218						$return['pageid'] = $info['pageid'];
219					}
220					return $return;
221				}
222			}
223		}
224
225		return false;
226	}
227
228	/**
229	 * @param string $hash
230	 * @return ForeignAPIFile[]
231	 */
232	public function findBySha1( $hash ) {
233		$results = $this->fetchImageQuery( [
234			'aisha1base36' => $hash,
235			'aiprop' => ForeignAPIFile::getProps(),
236			'list' => 'allimages',
237		] );
238		$ret = [];
239		if ( isset( $results['query']['allimages'] ) ) {
240			foreach ( $results['query']['allimages'] as $img ) {
241				// 1.14 was broken, doesn't return name attribute
242				if ( !isset( $img['name'] ) ) {
243					continue;
244				}
245				$ret[] = new ForeignAPIFile( Title::makeTitle( NS_FILE, $img['name'] ), $this, $img );
246			}
247		}
248
249		return $ret;
250	}
251
252	/**
253	 * @param string $name
254	 * @param int $width
255	 * @param int $height
256	 * @param array|null &$result Output-only parameter, guaranteed to become an array
257	 * @param string $otherParams
258	 *
259	 * @return string|false
260	 */
261	private function getThumbUrl(
262		$name, $width = -1, $height = -1, &$result = null, $otherParams = ''
263	) {
264		$data = $this->fetchImageQuery( [
265			'titles' => 'File:' . $name,
266			'iiprop' => self::getIIProps(),
267			'iiurlwidth' => $width,
268			'iiurlheight' => $height,
269			'iiurlparam' => $otherParams,
270			'prop' => 'imageinfo' ] );
271		$info = $this->getImageInfo( $data );
272
273		if ( $data && $info && isset( $info['thumburl'] ) ) {
274			wfDebug( __METHOD__ . " got remote thumb " . $info['thumburl'] );
275			$result = $info;
276
277			return $info['thumburl'];
278		} else {
279			return false;
280		}
281	}
282
283	/**
284	 * @param string $name
285	 * @param int $width
286	 * @param int $height
287	 * @param string $otherParams
288	 * @param string|null $lang Language code for language of error
289	 * @return bool|MediaTransformError
290	 * @since 1.22
291	 */
292	public function getThumbError(
293		$name, $width = -1, $height = -1, $otherParams = '', $lang = null
294	) {
295		$data = $this->fetchImageQuery( [
296			'titles' => 'File:' . $name,
297			'iiprop' => self::getIIProps(),
298			'iiurlwidth' => $width,
299			'iiurlheight' => $height,
300			'iiurlparam' => $otherParams,
301			'prop' => 'imageinfo',
302			'uselang' => $lang,
303		] );
304		$info = $this->getImageInfo( $data );
305
306		if ( $data && $info && isset( $info['thumberror'] ) ) {
307			wfDebug( __METHOD__ . " got remote thumb error " . $info['thumberror'] );
308
309			return new MediaTransformError(
310				'thumbnail_error_remote',
311				$width,
312				$height,
313				$this->getDisplayName(),
314				$info['thumberror'] // already parsed message from foreign repo
315			);
316		} else {
317			return false;
318		}
319	}
320
321	/**
322	 * Return the imageurl from cache if possible
323	 *
324	 * If the url has been requested today, get it from cache
325	 * Otherwise retrieve remote thumb url, check for local file.
326	 *
327	 * @param string $name Is a dbkey form of a title
328	 * @param int $width
329	 * @param int $height
330	 * @param string $params Other rendering parameters (page number, etc)
331	 *   from handler's makeParamString.
332	 * @return bool|string
333	 */
334	public function getThumbUrlFromCache( $name, $width, $height, $params = "" ) {
335		// We can't check the local cache using FileRepo functions because
336		// we override fileExistsBatch(). We have to use the FileBackend directly.
337		$backend = $this->getBackend(); // convenience
338
339		if ( !$this->canCacheThumbs() ) {
340			$result = null; // can't pass "null" by reference, but it's ok as default value
341
342			return $this->getThumbUrl( $name, $width, $height, $result, $params );
343		}
344
345		$key = $this->getLocalCacheKey( 'file-thumb-url', sha1( $name ) );
346		$sizekey = "$width:$height:$params";
347
348		/* Get the array of urls that we already know */
349		$knownThumbUrls = $this->wanCache->get( $key );
350		if ( !$knownThumbUrls ) {
351			/* No knownThumbUrls for this file */
352			$knownThumbUrls = [];
353		} elseif ( isset( $knownThumbUrls[$sizekey] ) ) {
354			wfDebug( __METHOD__ . ': Got thumburl from local cache: ' .
355				"{$knownThumbUrls[$sizekey]}" );
356
357			return $knownThumbUrls[$sizekey];
358		}
359
360		$metadata = null;
361		$foreignUrl = $this->getThumbUrl( $name, $width, $height, $metadata, $params );
362
363		if ( !$foreignUrl ) {
364			wfDebug( __METHOD__ . " Could not find thumburl" );
365
366			return false;
367		}
368
369		// We need the same filename as the remote one :)
370		$fileName = rawurldecode( pathinfo( $foreignUrl, PATHINFO_BASENAME ) );
371		if ( !$this->validateFilename( $fileName ) ) {
372			wfDebug( __METHOD__ . " The deduced filename $fileName is not safe" );
373
374			return false;
375		}
376		$localPath = $this->getZonePath( 'thumb' ) . "/" . $this->getHashPath( $name ) . $name;
377		$localFilename = $localPath . "/" . $fileName;
378		$localUrl = $this->getZoneUrl( 'thumb' ) . "/" . $this->getHashPath( $name ) .
379			rawurlencode( $name ) . "/" . rawurlencode( $fileName );
380
381		if ( $backend->fileExists( [ 'src' => $localFilename ] )
382			&& isset( $metadata['timestamp'] )
383		) {
384			wfDebug( __METHOD__ . " Thumbnail was already downloaded before" );
385			$modified = $backend->getFileTimestamp( [ 'src' => $localFilename ] );
386			$remoteModified = strtotime( $metadata['timestamp'] );
387			$current = time();
388			$diff = abs( $modified - $current );
389			if ( $remoteModified < $modified && $diff < $this->fileCacheExpiry ) {
390				/* Use our current and already downloaded thumbnail */
391				$knownThumbUrls[$sizekey] = $localUrl;
392				$this->wanCache->set( $key, $knownThumbUrls, $this->apiThumbCacheExpiry );
393
394				return $localUrl;
395			}
396			/* There is a new Commons file, or existing thumbnail older than a month */
397		}
398
399		$thumb = self::httpGet( $foreignUrl, 'default', [], $mtime );
400		if ( !$thumb ) {
401			wfDebug( __METHOD__ . " Could not download thumb" );
402
403			return false;
404		}
405
406		# @todo FIXME: Delete old thumbs that aren't being used. Maintenance script?
407		$backend->prepare( [ 'dir' => dirname( $localFilename ) ] );
408		$params = [ 'dst' => $localFilename, 'content' => $thumb ];
409		if ( !$backend->quickCreate( $params )->isOK() ) {
410			wfDebug( __METHOD__ . " could not write to thumb path '$localFilename'" );
411
412			return $foreignUrl;
413		}
414		$knownThumbUrls[$sizekey] = $localUrl;
415
416		$ttl = $mtime
417			? $this->wanCache->adaptiveTTL( $mtime, $this->apiThumbCacheExpiry )
418			: $this->apiThumbCacheExpiry;
419		$this->wanCache->set( $key, $knownThumbUrls, $ttl );
420		wfDebug( __METHOD__ . " got local thumb $localUrl, saving to cache" );
421
422		return $localUrl;
423	}
424
425	/**
426	 * @see FileRepo::getZoneUrl()
427	 * @param string $zone
428	 * @param string|null $ext Optional file extension
429	 * @return string
430	 */
431	public function getZoneUrl( $zone, $ext = null ) {
432		switch ( $zone ) {
433			case 'public':
434				return $this->url;
435			case 'thumb':
436				return $this->thumbUrl;
437			default:
438				return parent::getZoneUrl( $zone, $ext );
439		}
440	}
441
442	/**
443	 * Get the local directory corresponding to one of the basic zones
444	 * @param string $zone
445	 * @return bool|null|string
446	 */
447	public function getZonePath( $zone ) {
448		$supported = [ 'public', 'thumb' ];
449		if ( in_array( $zone, $supported ) ) {
450			return parent::getZonePath( $zone );
451		}
452
453		return false;
454	}
455
456	/**
457	 * Are we locally caching the thumbnails?
458	 * @return bool
459	 */
460	public function canCacheThumbs() {
461		return ( $this->apiThumbCacheExpiry > 0 );
462	}
463
464	/**
465	 * The user agent the ForeignAPIRepo will use.
466	 * @return string
467	 */
468	public static function getUserAgent() {
469		return Http::userAgent() . " ForeignAPIRepo/" . self::VERSION;
470	}
471
472	/**
473	 * Get information about the repo - overrides/extends the parent
474	 * class's information.
475	 * @return array
476	 * @since 1.22
477	 */
478	public function getInfo() {
479		$info = parent::getInfo();
480		$info['apiurl'] = $this->getApiUrl();
481
482		$query = [
483			'format' => 'json',
484			'action' => 'query',
485			'meta' => 'siteinfo',
486			'siprop' => 'general',
487		];
488
489		$data = $this->httpGetCached( 'SiteInfo', $query, 7200 );
490
491		if ( $data ) {
492			$siteInfo = FormatJson::decode( $data, true );
493			$general = $siteInfo['query']['general'];
494
495			$info['articlepath'] = $general['articlepath'];
496			$info['server'] = $general['server'];
497
498			if ( isset( $general['favicon'] ) ) {
499				$info['favicon'] = $general['favicon'];
500			}
501		}
502
503		return $info;
504	}
505
506	/**
507	 * Like a HttpRequestFactory::get request, but with custom User-Agent.
508	 * @see HttpRequestFactory::get
509	 * @todo Can this use HttpRequestFactory::get() but just pass the 'userAgent' option?
510	 * @param string $url
511	 * @param string $timeout
512	 * @param array $options
513	 * @param int|bool &$mtime Resulting Last-Modified UNIX timestamp if received
514	 * @return bool|string
515	 */
516	public static function httpGet(
517		$url, $timeout = 'default', $options = [], &$mtime = false
518	) {
519		$options['timeout'] = $timeout;
520		/* Http::get */
521		$url = wfExpandUrl( $url, PROTO_HTTP );
522		wfDebug( "ForeignAPIRepo: HTTP GET: $url" );
523		$options['method'] = "GET";
524
525		if ( !isset( $options['timeout'] ) ) {
526			$options['timeout'] = 'default';
527		}
528
529		$req = MWHttpRequest::factory( $url, $options, __METHOD__ );
530		$req->setUserAgent( self::getUserAgent() );
531		$status = $req->execute();
532
533		if ( $status->isOK() ) {
534			$lmod = $req->getResponseHeader( 'Last-Modified' );
535			$mtime = $lmod ? wfTimestamp( TS_UNIX, $lmod ) : false;
536
537			return $req->getContent();
538		} else {
539			$logger = LoggerFactory::getInstance( 'http' );
540			$logger->warning(
541				$status->getWikiText( false, false, 'en' ),
542				[ 'caller' => 'ForeignAPIRepo::httpGet' ]
543			);
544
545			return false;
546		}
547	}
548
549	/**
550	 * @return string
551	 * @since 1.23
552	 */
553	protected static function getIIProps() {
554		return implode( '|', self::IMAGE_INFO_PROPS );
555	}
556
557	/**
558	 * HTTP GET request to a mediawiki API (with caching)
559	 * @param string $attribute Used in cache key creation, mostly
560	 * @param array $query The query parameters for the API request
561	 * @param int $cacheTTL Time to live for the memcached caching
562	 * @return string|null
563	 */
564	public function httpGetCached( $attribute, $query, $cacheTTL = 3600 ) {
565		if ( $this->mApiBase ) {
566			$url = wfAppendQuery( $this->mApiBase, $query );
567		} else {
568			$url = $this->makeUrl( $query, 'api' );
569		}
570
571		return $this->wanCache->getWithSetCallback(
572			$this->getLocalCacheKey( $attribute, sha1( $url ) ),
573			$cacheTTL,
574			function ( $curValue, &$ttl ) use ( $url ) {
575				$html = self::httpGet( $url, 'default', [], $mtime );
576				if ( $html !== false ) {
577					$ttl = $mtime ? $this->wanCache->adaptiveTTL( $mtime, $ttl ) : $ttl;
578				} else {
579					$ttl = $this->wanCache->adaptiveTTL( $mtime, $ttl );
580					$html = null; // caches negatives
581				}
582
583				return $html;
584			},
585			[ 'pcGroup' => 'http-get:3', 'pcTTL' => WANObjectCache::TTL_PROC_LONG ]
586		);
587	}
588
589	/**
590	 * @param callable $callback
591	 * @throws MWException
592	 */
593	public function enumFiles( $callback ) {
594		throw new MWException( 'enumFiles is not supported by ' . static::class );
595	}
596
597	/**
598	 * @throws MWException
599	 */
600	protected function assertWritableRepo() {
601		throw new MWException( static::class . ': write operations are not supported.' );
602	}
603}
604