1<?php
2
3namespace MediaWiki\Rest\Handler;
4
5use ChangeTags;
6use MediaWiki\Permissions\PermissionManager;
7use MediaWiki\Rest\LocalizedHttpException;
8use MediaWiki\Rest\Response;
9use MediaWiki\Rest\SimpleHandler;
10use MediaWiki\Revision\RevisionRecord;
11use MediaWiki\Revision\RevisionStore;
12use MediaWiki\Storage\NameTableAccessException;
13use MediaWiki\Storage\NameTableStore;
14use MediaWiki\Storage\NameTableStoreFactory;
15use Title;
16use WANObjectCache;
17use Wikimedia\Message\MessageValue;
18use Wikimedia\Message\ParamType;
19use Wikimedia\Message\ScalarParam;
20use Wikimedia\ParamValidator\ParamValidator;
21use Wikimedia\Rdbms\ILoadBalancer;
22
23/**
24 * Handler class for Core REST API endpoints that perform operations on revisions
25 */
26class PageHistoryCountHandler extends SimpleHandler {
27	/** The maximum number of counts to return per type of revision */
28	private const COUNT_LIMITS = [
29		'anonymous' => 10000,
30		'bot' => 10000,
31		'editors' => 25000,
32		'edits' => 30000,
33		'minor' => 1000,
34		'reverted' => 30000
35	];
36
37	private const DEPRECATED_COUNT_TYPES = [
38		'anonedits' => 'anonymous',
39		'botedits' => 'bot',
40		'revertededits' => 'reverted'
41	];
42
43	private const MAX_AGE_200 = 60;
44
45	/** @var RevisionStore */
46	private $revisionStore;
47
48	/** @var NameTableStore */
49	private $changeTagDefStore;
50
51	/** @var PermissionManager */
52	private $permissionManager;
53
54	/** @var ILoadBalancer */
55	private $loadBalancer;
56
57	/** @var WANObjectCache */
58	private $cache;
59
60	/** @var RevisionRecord|bool */
61	private $revision;
62
63	/** @var array */
64	private $lastModifiedTimes;
65
66	/** @var Title */
67	private $titleObject;
68
69	/**
70	 * @param RevisionStore $revisionStore
71	 * @param NameTableStoreFactory $nameTableStoreFactory
72	 * @param PermissionManager $permissionManager
73	 * @param ILoadBalancer $loadBalancer
74	 * @param WANObjectCache $cache
75	 */
76	public function __construct(
77		RevisionStore $revisionStore,
78		NameTableStoreFactory $nameTableStoreFactory,
79		PermissionManager $permissionManager,
80		ILoadBalancer $loadBalancer,
81		WANObjectCache $cache
82	) {
83		$this->revisionStore = $revisionStore;
84		$this->changeTagDefStore = $nameTableStoreFactory->getChangeTagDef();
85		$this->permissionManager = $permissionManager;
86		$this->loadBalancer = $loadBalancer;
87		$this->cache = $cache;
88	}
89
90	private function normalizeType( $type ) {
91		return self::DEPRECATED_COUNT_TYPES[$type] ?? $type;
92	}
93
94	/**
95	 * Validates that the provided parameter combination is supported.
96	 *
97	 * @param string $type
98	 * @throws LocalizedHttpException
99	 */
100	private function validateParameterCombination( $type ) {
101		$params = $this->getValidatedParams();
102		if ( !$params ) {
103			return;
104		}
105
106		if ( $params['from'] || $params['to'] ) {
107			if ( $type === 'edits' || $type === 'editors' ) {
108				if ( !$params['from'] || !$params['to'] ) {
109					throw new LocalizedHttpException(
110						new MessageValue( 'rest-pagehistorycount-parameters-invalid' ),
111						400
112					);
113				}
114			} else {
115				throw new LocalizedHttpException(
116					new MessageValue( 'rest-pagehistorycount-parameters-invalid' ),
117					400
118				);
119			}
120		}
121	}
122
123	/**
124	 * @param Title $title the title of the page to load history for
125	 * @param string $type the validated count type
126	 * @return Response
127	 * @throws LocalizedHttpException
128	 */
129	public function run( $title, $type ) {
130		$normalizedType = $this->normalizeType( $type );
131		$this->validateParameterCombination( $normalizedType );
132		$titleObj = $this->getTitle();
133		if ( !$titleObj || !$titleObj->getArticleID() ) {
134			throw new LocalizedHttpException(
135				new MessageValue( 'rest-nonexistent-title',
136					[ new ScalarParam( ParamType::PLAINTEXT, $title ) ]
137				),
138				404
139			);
140		}
141
142		if ( !$this->getAuthority()->authorizeRead( 'read', $titleObj ) ) {
143			throw new LocalizedHttpException(
144				new MessageValue( 'rest-permission-denied-title',
145					[ new ScalarParam( ParamType::PLAINTEXT, $title ) ]
146				),
147				403
148			);
149		}
150
151		$count = $this->getCount( $normalizedType );
152		$countLimit = self::COUNT_LIMITS[$normalizedType];
153		$response = $this->getResponseFactory()->createJson( [
154				'count' => $count > $countLimit ? $countLimit : $count,
155				'limit' => $count > $countLimit
156		] );
157		$response->setHeader( 'Cache-Control', 'max-age=' . self::MAX_AGE_200 );
158
159		// Inform clients who use a deprecated "type" value, so they can adjust
160		if ( isset( self::DEPRECATED_COUNT_TYPES[$type] ) ) {
161			$docs = '<https://www.mediawiki.org/wiki/API:REST/History_API' .
162				'#Get_page_history_counts>; rel="deprecation"';
163			$response->setHeader( 'Deprecation', 'version="v1"' );
164			$response->setHeader( 'Link', $docs );
165		}
166
167		return $response;
168	}
169
170	/**
171	 * @param string $type the validated count type
172	 * @return int the article count
173	 * @throws LocalizedHttpException
174	 */
175	private function getCount( $type ) {
176		$pageId = $this->getTitle()->getArticleID();
177		switch ( $type ) {
178			case 'anonymous':
179				return $this->getCachedCount( $type,
180					function ( RevisionRecord $fromRev = null ) use ( $pageId ) {
181						return $this->getAnonCount( $pageId, $fromRev );
182					}
183				);
184
185			case 'bot':
186				return $this->getCachedCount( $type,
187					function ( RevisionRecord $fromRev = null ) use ( $pageId ) {
188						return $this->getBotCount( $pageId, $fromRev );
189					}
190				);
191
192			case 'editors':
193				$from = $this->getValidatedParams()['from'] ?? null;
194				$to = $this->getValidatedParams()['to'] ?? null;
195				if ( $from || $to ) {
196					return $this->getEditorsCount(
197						$pageId,
198						$from ? $this->getRevisionOrThrow( $from ) : null,
199						$to ? $this->getRevisionOrThrow( $to ) : null
200					);
201				} else {
202					return $this->getCachedCount( $type,
203						function ( RevisionRecord $fromRev = null ) use ( $pageId ) {
204							return $this->getEditorsCount( $pageId, $fromRev );
205						} );
206				}
207
208			case 'edits':
209				$from = $this->getValidatedParams()['from'] ?? null;
210				$to = $this->getValidatedParams()['to'] ?? null;
211				if ( $from || $to ) {
212					return $this->getEditsCount(
213						$pageId,
214						$from ? $this->getRevisionOrThrow( $from ) : null,
215						$to ? $this->getRevisionOrThrow( $to ) : null
216					);
217				} else {
218					return $this->getCachedCount( $type,
219						function ( RevisionRecord $fromRev = null ) use ( $pageId ) {
220							return $this->getEditsCount( $pageId, $fromRev );
221						}
222					);
223				}
224
225			case 'reverted':
226				return $this->getCachedCount( $type,
227					function ( RevisionRecord $fromRev = null ) use ( $pageId ) {
228						return $this->getRevertedCount( $pageId, $fromRev );
229					}
230				);
231
232			case 'minor':
233				// The query for minor counts is inefficient for the database for pages with many revisions.
234				// If the specified title contains more revisions than allowed, we will return an error.
235				$editsCount = $this->getCachedCount( 'edits',
236					function ( RevisionRecord $fromRev = null ) use ( $pageId ) {
237						return $this->getEditsCount( $pageId, $fromRev );
238					}
239				);
240				if ( $editsCount > self::COUNT_LIMITS[$type] * 2 ) {
241					throw new LocalizedHttpException(
242						new MessageValue( 'rest-pagehistorycount-too-many-revisions' ),
243						500
244					);
245				}
246				return $this->getCachedCount( $type,
247					function ( RevisionRecord $fromRev = null ) use ( $pageId ) {
248						return $this->getMinorCount( $pageId, $fromRev );
249					}
250				);
251
252			// Sanity check
253			default:
254				throw new LocalizedHttpException(
255					new MessageValue( 'rest-pagehistorycount-type-unrecognized',
256						[ new ScalarParam( ParamType::PLAINTEXT, $type ) ]
257					),
258					500
259				);
260		}
261	}
262
263	/**
264	 * @return RevisionRecord|bool current revision or false if unable to retrieve revision
265	 */
266	private function getCurrentRevision() {
267		if ( $this->revision === null ) {
268			$title = $this->getTitle();
269			if ( $title && $title->getArticleID() ) {
270				$this->revision = $this->revisionStore->getKnownCurrentRevision( $title );
271			} else {
272				$this->revision = false;
273			}
274		}
275		return $this->revision;
276	}
277
278	/**
279	 * @return Title|bool Title or false if unable to retrieve title
280	 */
281	private function getTitle() {
282		if ( $this->titleObject === null ) {
283			$this->titleObject = Title::newFromText( $this->getValidatedParams()['title'] );
284		}
285		return $this->titleObject;
286	}
287
288	/**
289	 * Returns latest of 2 timestamps:
290	 * 1. Current revision
291	 * 2. OR entry from the DB logging table for the given page
292	 * @return int|null
293	 */
294	protected function getLastModified() {
295		$lastModifiedTimes = $this->getLastModifiedTimes();
296		if ( $lastModifiedTimes ) {
297			return max( array_values( $lastModifiedTimes ) );
298		}
299	}
300
301	/**
302	 * Returns array with 2 timestamps:
303	 * 1. Current revision
304	 * 2. OR entry from the DB logging table for the given page
305	 * @return array
306	 */
307	protected function getLastModifiedTimes() {
308		$currentRev = $this->getCurrentRevision();
309		if ( !$currentRev ) {
310			return null;
311		}
312		if ( $this->lastModifiedTimes === null ) {
313			$currentRevTime = (int)wfTimestampOrNull( TS_UNIX, $currentRev->getTimestamp() );
314			$loggingTableTime = $this->loggingTableTime( $currentRev->getPageId() );
315			$this->lastModifiedTimes = [
316				'currentRevTS' => $currentRevTime,
317				'dependencyModTS' => $loggingTableTime
318			];
319		}
320		return $this->lastModifiedTimes;
321	}
322
323	/**
324	 * Return timestamp of latest entry in logging table for given page id
325	 * @param int $pageId
326	 * @return int|null
327	 */
328	private function loggingTableTime( $pageId ) {
329		$res = $this->loadBalancer->getConnectionRef( DB_REPLICA )->selectField(
330			'logging',
331			'MAX(log_timestamp)',
332			[ 'log_page' => $pageId ],
333			__METHOD__
334		);
335		return $res ? (int)wfTimestamp( TS_UNIX, $res ) : null;
336	}
337
338	/**
339	 * Choosing to not implement etags in this handler.
340	 * Generating an etag when getting revision counts must account for things like visibility settings
341	 * (e.g. rev_deleted bit) which requires hitting the database anyway. The response for these
342	 * requests are so small that we wouldn't be gaining much efficiency.
343	 * Etags are strong validators and if provided would take precendence over
344	 * last modified time, a weak validator. We want to ensure only last modified time is used
345	 * since it is more efficient than using etags for this particular case.
346	 * @return null
347	 */
348	protected function getEtag() {
349		return null;
350	}
351
352	/**
353	 * @param string $type
354	 * @param callable $fetchCount
355	 * @return int
356	 */
357	private function getCachedCount( $type,
358		callable $fetchCount
359	) {
360		$titleObj = $this->getTitle();
361		$pageId = $titleObj->getArticleID();
362		return $this->cache->getWithSetCallback(
363			$this->cache->makeKey( 'rest', 'pagehistorycount', $pageId, $type ),
364			WANObjectCache::TTL_WEEK,
365			function ( $oldValue ) use ( $fetchCount ) {
366				$currentRev = $this->getCurrentRevision();
367				if ( $oldValue ) {
368					// Last modified timestamp was NOT a dependency change (e.g. revdel)
369					$doIncrementalUpdate = (
370						$this->getLastModified() != $this->getLastModifiedTimes()['dependencyModTS']
371					);
372					if ( $doIncrementalUpdate ) {
373						$rev = $this->revisionStore->getRevisionById( $oldValue['revision'] );
374						if ( $rev ) {
375							$additionalCount = $fetchCount( $rev );
376							return [
377								'revision' => $currentRev->getId(),
378								'count' => $oldValue['count'] + $additionalCount,
379								'dependencyModTS' => $this->getLastModifiedTimes()['dependencyModTS']
380							];
381						}
382					}
383				}
384				// Nothing was previously stored, or incremental update was done for too long,
385				// recalculate from scratch.
386				return [
387					'revision' => $currentRev->getId(),
388					'count' => $fetchCount(),
389					'dependencyModTS' => $this->getLastModifiedTimes()['dependencyModTS']
390				];
391			},
392			[
393				'touchedCallback' => function (){
394					return $this->getLastModified();
395				},
396				'version' => 2,
397				'lockTSE' => WANObjectCache::TTL_MINUTE * 5
398			]
399		)['count'];
400	}
401
402	/**
403	 * @param int $pageId the id of the page to load history for
404	 * @param RevisionRecord|null $fromRev
405	 * @return int the count
406	 */
407	protected function getAnonCount( $pageId, RevisionRecord $fromRev = null ) {
408		$dbr = $this->loadBalancer->getConnectionRef( DB_REPLICA );
409
410		$cond = [
411			'rev_page' => $pageId,
412			'actor_user IS NULL',
413			$dbr->bitAnd( 'rev_deleted',
414				RevisionRecord::DELETED_TEXT | RevisionRecord::DELETED_USER ) . " = 0"
415		];
416
417		if ( $fromRev ) {
418			$oldTs = $dbr->addQuotes( $dbr->timestamp( $fromRev->getTimestamp() ) );
419			$cond[] = "(rev_timestamp = {$oldTs} AND rev_id > {$fromRev->getId()}) " .
420				"OR rev_timestamp > {$oldTs}";
421		}
422
423		$edits = $dbr->selectRowCount(
424			[
425				'revision_actor_temp',
426				'revision',
427				'actor'
428			],
429			'1',
430			$cond,
431			__METHOD__,
432			[ 'LIMIT' => self::COUNT_LIMITS['anonymous'] + 1 ], // extra to detect truncation
433			[
434				'revision' => [
435					'JOIN',
436					'revactor_rev = rev_id AND revactor_page = rev_page'
437				],
438				'actor' => [
439					'JOIN',
440					'revactor_actor = actor_id'
441				]
442			]
443		);
444		return $edits;
445	}
446
447	/**
448	 * @param int $pageId the id of the page to load history for
449	 * @param RevisionRecord|null $fromRev
450	 * @return int the count
451	 */
452	protected function getBotCount( $pageId, RevisionRecord $fromRev = null ) {
453		$dbr = $this->loadBalancer->getConnectionRef( DB_REPLICA );
454
455		$cond = [
456			'rev_page=' . intval( $pageId ),
457			$dbr->bitAnd( 'rev_deleted',
458				RevisionRecord::DELETED_TEXT | RevisionRecord::DELETED_USER ) . " = 0",
459			'EXISTS(' .
460				$dbr->selectSQLText(
461					'user_groups',
462					'1',
463					[
464						'actor.actor_user = ug_user',
465						'ug_group' => $this->permissionManager->getGroupsWithPermission( 'bot' ),
466						'ug_expiry IS NULL OR ug_expiry >= ' . $dbr->addQuotes( $dbr->timestamp() )
467					],
468					__METHOD__
469				) .
470			')'
471		];
472		if ( $fromRev ) {
473			$oldTs = $dbr->addQuotes( $dbr->timestamp( $fromRev->getTimestamp() ) );
474			$cond[] = "(rev_timestamp = {$oldTs} AND rev_id > {$fromRev->getId()}) " .
475				"OR rev_timestamp > {$oldTs}";
476		}
477
478		$edits = $dbr->selectRowCount(
479			[
480				'revision_actor_temp',
481				'revision',
482				'actor',
483			],
484			'1',
485			$cond,
486			__METHOD__,
487			[ 'LIMIT' => self::COUNT_LIMITS['bot'] + 1 ], // extra to detect truncation
488			[
489				'revision' => [
490					'JOIN',
491					'revactor_rev = rev_id AND revactor_page = rev_page'
492				],
493				'actor' => [
494					'JOIN',
495					'revactor_actor = actor_id'
496				],
497			]
498		);
499		return $edits;
500	}
501
502	/**
503	 * @param int $pageId the id of the page to load history for
504	 * @param RevisionRecord|null $fromRev
505	 * @param RevisionRecord|null $toRev
506	 * @return int the count
507	 */
508	protected function getEditorsCount( $pageId,
509		RevisionRecord $fromRev = null,
510		RevisionRecord $toRev = null
511	) {
512		list( $fromRev, $toRev ) = $this->orderRevisions( $fromRev, $toRev );
513		return $this->revisionStore->countAuthorsBetween( $pageId, $fromRev,
514			$toRev, $this->getAuthority(), self::COUNT_LIMITS['editors'] );
515	}
516
517	/**
518	 * @param int $pageId the id of the page to load history for
519	 * @param RevisionRecord|null $fromRev
520	 * @return int the count
521	 */
522	protected function getRevertedCount( $pageId, RevisionRecord $fromRev = null ) {
523		$tagIds = [];
524
525		foreach ( ChangeTags::REVERT_TAGS as $tagName ) {
526			try {
527				$tagIds[] = $this->changeTagDefStore->getId( $tagName );
528			} catch ( NameTableAccessException $e ) {
529				// If no revisions are tagged with a name, no tag id will be present
530			}
531		}
532		if ( !$tagIds ) {
533			return 0;
534		}
535
536		$dbr = $this->loadBalancer->getConnectionRef( DB_REPLICA );
537
538		$cond = [
539			'rev_page' => $pageId,
540			$dbr->bitAnd( 'rev_deleted', RevisionRecord::DELETED_TEXT ) . " = 0"
541		];
542		if ( $fromRev ) {
543			$oldTs = $dbr->addQuotes( $dbr->timestamp( $fromRev->getTimestamp() ) );
544			$cond[] = "(rev_timestamp = {$oldTs} AND rev_id > {$fromRev->getId()}) " .
545				"OR rev_timestamp > {$oldTs}";
546		}
547		$edits = $dbr->selectRowCount(
548			[
549				'revision',
550				'change_tag'
551			],
552			'1',
553			[ 'rev_page' => $pageId ],
554			__METHOD__,
555			[
556				'LIMIT' => self::COUNT_LIMITS['reverted'] + 1, // extra to detect truncation
557				'GROUP BY' => 'rev_id'
558			],
559			[
560				'change_tag' => [
561					'JOIN',
562					[
563						'ct_rev_id = rev_id',
564						'ct_tag_id' => $tagIds,
565					]
566				],
567			]
568		);
569		return $edits;
570	}
571
572	/**
573	 * @param int $pageId the id of the page to load history for
574	 * @param RevisionRecord|null $fromRev
575	 * @return int the count
576	 */
577	protected function getMinorCount( $pageId, RevisionRecord $fromRev = null ) {
578		$dbr = $this->loadBalancer->getConnectionRef( DB_REPLICA );
579		$cond = [
580			'rev_page' => $pageId,
581			'rev_minor_edit != 0',
582			$dbr->bitAnd( 'rev_deleted', RevisionRecord::DELETED_TEXT ) . " = 0"
583		];
584		if ( $fromRev ) {
585			$oldTs = $dbr->addQuotes( $dbr->timestamp( $fromRev->getTimestamp() ) );
586			$cond[] = "(rev_timestamp = {$oldTs} AND rev_id > {$fromRev->getId()}) " .
587				"OR rev_timestamp > {$oldTs}";
588		}
589		$edits = $dbr->selectRowCount( 'revision', '1',
590			$cond,
591			__METHOD__,
592			[ 'LIMIT' => self::COUNT_LIMITS['minor'] + 1 ] // extra to detect truncation
593		);
594
595		return $edits;
596	}
597
598	/**
599	 * @param int $pageId the id of the page to load history for
600	 * @param RevisionRecord|null $fromRev
601	 * @param RevisionRecord|null $toRev
602	 * @return int the count
603	 */
604	protected function getEditsCount(
605		$pageId,
606		RevisionRecord $fromRev = null,
607		RevisionRecord $toRev = null
608	) {
609		list( $fromRev, $toRev ) = $this->orderRevisions( $fromRev, $toRev );
610		return $this->revisionStore->countRevisionsBetween(
611			$pageId,
612			$fromRev,
613			$toRev,
614			self::COUNT_LIMITS['edits'] // Will be increased by 1 to detect truncation
615		);
616	}
617
618	/**
619	 * @param int $revId
620	 * @return RevisionRecord
621	 * @throws LocalizedHttpException
622	 */
623	private function getRevisionOrThrow( $revId ) {
624		$rev = $this->revisionStore->getRevisionById( $revId );
625		if ( !$rev ) {
626			throw new LocalizedHttpException(
627				new MessageValue( 'rest-nonexistent-revision', [ $revId ] ),
628				404
629			);
630		}
631		return $rev;
632	}
633
634	/**
635	 * Reorders revisions if they are present
636	 * @param RevisionRecord|null $fromRev
637	 * @param RevisionRecord|null $toRev
638	 * @return array
639	 * @phan-return array{0:RevisionRecord|null,1:RevisionRecord|null}
640	 */
641	private function orderRevisions(
642		RevisionRecord $fromRev = null,
643		RevisionRecord $toRev = null
644	) {
645		if ( $fromRev && $toRev && ( $fromRev->getTimestamp() > $toRev->getTimestamp() ||
646				( $fromRev->getTimestamp() === $toRev->getTimestamp()
647					&& $fromRev->getId() > $toRev->getId() ) )
648		) {
649			return [ $toRev, $fromRev ];
650		}
651		return [ $fromRev, $toRev ];
652	}
653
654	public function needsWriteAccess() {
655		return false;
656	}
657
658	public function getParamSettings() {
659		return [
660			'title' => [
661				self::PARAM_SOURCE => 'path',
662				ParamValidator::PARAM_TYPE => 'string',
663				ParamValidator::PARAM_REQUIRED => true,
664			],
665			'type' => [
666				self::PARAM_SOURCE => 'path',
667				ParamValidator::PARAM_TYPE => array_merge(
668					array_keys( self::COUNT_LIMITS ),
669					array_keys( self::DEPRECATED_COUNT_TYPES )
670				),
671				ParamValidator::PARAM_REQUIRED => true,
672			],
673			'from' => [
674				self::PARAM_SOURCE => 'query',
675				ParamValidator::PARAM_TYPE => 'integer',
676				ParamValidator::PARAM_REQUIRED => false
677			],
678			'to' => [
679				self::PARAM_SOURCE => 'query',
680				ParamValidator::PARAM_TYPE => 'integer',
681				ParamValidator::PARAM_REQUIRED => false
682			]
683		];
684	}
685}
686