1<?php
2
3namespace MediaWiki\Rest\Handler;
4
5use MediaWiki\Permissions\PermissionManager;
6use MediaWiki\Rest\LocalizedHttpException;
7use MediaWiki\Rest\Response;
8use MediaWiki\Rest\SimpleHandler;
9use MediaWiki\Revision\RevisionRecord;
10use MediaWiki\Revision\RevisionStore;
11use MediaWiki\Storage\NameTableAccessException;
12use MediaWiki\Storage\NameTableStore;
13use MediaWiki\Storage\NameTableStoreFactory;
14use RequestContext;
15use Title;
16use User;
17use WANObjectCache;
18use Wikimedia\Message\MessageValue;
19use Wikimedia\Message\ParamType;
20use Wikimedia\Message\ScalarParam;
21use Wikimedia\ParamValidator\ParamValidator;
22use Wikimedia\Rdbms\ILoadBalancer;
23
24/**
25 * Handler class for Core REST API endpoints that perform operations on revisions
26 */
27class PageHistoryCountHandler extends SimpleHandler {
28	/** The maximum number of counts to return per type of revision */
29	private const COUNT_LIMITS = [
30		'anonymous' => 10000,
31		'bot' => 10000,
32		'editors' => 25000,
33		'edits' => 30000,
34		'minor' => 1000,
35		'reverted' => 30000
36	];
37
38	private const DEPRECATED_COUNT_TYPES = [
39		'anonedits' => 'anonymous',
40		'botedits' => 'bot',
41		'revertededits' => 'reverted'
42	];
43
44	private const MAX_AGE_200 = 60;
45
46	private const REVERTED_TAG_NAMES = [ 'mw-undo', 'mw-rollback' ];
47
48	/** @var RevisionStore */
49	private $revisionStore;
50
51	/** @var NameTableStore */
52	private $changeTagDefStore;
53
54	/** @var PermissionManager */
55	private $permissionManager;
56
57	/** @var ILoadBalancer */
58	private $loadBalancer;
59
60	/** @var WANObjectCache */
61	private $cache;
62
63	/** @var User */
64	private $user;
65
66	/** @var RevisionRecord|bool */
67	private $revision;
68
69	/** @var array */
70	private $lastModifiedTimes;
71
72	/** @var Title */
73	private $titleObject;
74
75	/**
76	 * @param RevisionStore $revisionStore
77	 * @param NameTableStoreFactory $nameTableStoreFactory
78	 * @param PermissionManager $permissionManager
79	 * @param ILoadBalancer $loadBalancer
80	 * @param WANObjectCache $cache
81	 */
82	public function __construct(
83		RevisionStore $revisionStore,
84		NameTableStoreFactory $nameTableStoreFactory,
85		PermissionManager $permissionManager,
86		ILoadBalancer $loadBalancer,
87		WANObjectCache $cache
88	) {
89		$this->revisionStore = $revisionStore;
90		$this->changeTagDefStore = $nameTableStoreFactory->getChangeTagDef();
91		$this->permissionManager = $permissionManager;
92		$this->loadBalancer = $loadBalancer;
93		$this->cache = $cache;
94
95		// @todo Inject this, when there is a good way to do that
96		$this->user = RequestContext::getMain()->getUser();
97	}
98
99	private function normalizeType( $type ) {
100		return self::DEPRECATED_COUNT_TYPES[$type] ?? $type;
101	}
102
103	/**
104	 * Validates that the provided parameter combination is supported.
105	 *
106	 * @param string $type
107	 * @throws LocalizedHttpException
108	 */
109	private function validateParameterCombination( $type ) {
110		$params = $this->getValidatedParams();
111		if ( !$params ) {
112			return;
113		}
114
115		if ( $params['from'] || $params['to'] ) {
116			if ( $type === 'edits' || $type === 'editors' ) {
117				if ( !$params['from'] || !$params['to'] ) {
118					throw new LocalizedHttpException(
119						new MessageValue( 'rest-pagehistorycount-parameters-invalid' ),
120						400
121					);
122				}
123			} else {
124				throw new LocalizedHttpException(
125					new MessageValue( 'rest-pagehistorycount-parameters-invalid' ),
126					400
127				);
128			}
129		}
130	}
131
132	/**
133	 * @param Title $title the title of the page to load history for
134	 * @param string $type the validated count type
135	 * @return Response
136	 * @throws LocalizedHttpException
137	 */
138	public function run( $title, $type ) {
139		$normalizedType = $this->normalizeType( $type );
140		$this->validateParameterCombination( $normalizedType );
141		$titleObj = $this->getTitle();
142		if ( !$titleObj || !$titleObj->getArticleID() ) {
143			throw new LocalizedHttpException(
144				new MessageValue( 'rest-nonexistent-title',
145					[ new ScalarParam( ParamType::PLAINTEXT, $title ) ]
146				),
147				404
148			);
149		}
150
151		if ( !$this->permissionManager->userCan( 'read', $this->user, $titleObj ) ) {
152			throw new LocalizedHttpException(
153				new MessageValue( 'rest-permission-denied-title',
154					[ new ScalarParam( ParamType::PLAINTEXT, $title ) ]
155				),
156				403
157			);
158		}
159
160		$count = $this->getCount( $normalizedType );
161		$countLimit = self::COUNT_LIMITS[$normalizedType];
162		$response = $this->getResponseFactory()->createJson( [
163				'count' => $count > $countLimit ? $countLimit : $count,
164				'limit' => $count > $countLimit
165		] );
166		$response->setHeader( 'Cache-Control', 'max-age=' . self::MAX_AGE_200 );
167
168		// Inform clients who use a deprecated "type" value, so they can adjust
169		if ( isset( self::DEPRECATED_COUNT_TYPES[$type] ) ) {
170			$docs = '<https://www.mediawiki.org/wiki/API:REST/History_API' .
171				'#Get_page_history_counts>; rel="deprecation"';
172			$response->setHeader( 'Deprecation', 'version="v1"' );
173			$response->setHeader( 'Link', $docs );
174		}
175
176		return $response;
177	}
178
179	/**
180	 * @param string $type the validated count type
181	 * @return int the article count
182	 * @throws LocalizedHttpException
183	 */
184	private function getCount( $type ) {
185		$pageId = $this->getTitle()->getArticleID();
186		switch ( $type ) {
187			case 'anonymous':
188				return $this->getCachedCount( $type,
189					function ( RevisionRecord $fromRev = null ) use ( $pageId ) {
190						return $this->getAnonCount( $pageId, $fromRev );
191					}
192				);
193
194			case 'bot':
195				return $this->getCachedCount( $type,
196					function ( RevisionRecord $fromRev = null ) use ( $pageId ) {
197						return $this->getBotCount( $pageId, $fromRev );
198					}
199				);
200
201			case 'editors':
202				$from = $this->getValidatedParams()['from'] ?? null;
203				$to = $this->getValidatedParams()['to'] ?? null;
204				if ( $from || $to ) {
205					return $this->getEditorsCount(
206						$pageId,
207						$from ? $this->getRevisionOrThrow( $from ) : null,
208						$to ? $this->getRevisionOrThrow( $to ) : null
209					);
210				} else {
211					return $this->getCachedCount( $type,
212						function ( RevisionRecord $fromRev = null ) use ( $pageId ) {
213							return $this->getEditorsCount( $pageId, $fromRev );
214						} );
215				}
216
217			case 'edits':
218				$from = $this->getValidatedParams()['from'] ?? null;
219				$to = $this->getValidatedParams()['to'] ?? null;
220				if ( $from || $to ) {
221					return $this->getEditsCount(
222						$pageId,
223						$from ? $this->getRevisionOrThrow( $from ) : null,
224						$to ? $this->getRevisionOrThrow( $to ) : null
225					);
226				} else {
227					return $this->getCachedCount( $type,
228						function ( RevisionRecord $fromRev = null ) use ( $pageId ) {
229							return $this->getEditsCount( $pageId, $fromRev );
230						}
231					);
232				}
233
234			case 'reverted':
235				return $this->getCachedCount( $type,
236					function ( RevisionRecord $fromRev = null ) use ( $pageId ) {
237						return $this->getRevertedCount( $pageId, $fromRev );
238					}
239				);
240
241			case 'minor':
242				// The query for minor counts is inefficient for the database for pages with many revisions.
243				// If the specified title contains more revisions than allowed, we will return an error.
244				$editsCount = $this->getCachedCount( 'edits',
245					function ( RevisionRecord $fromRev = null ) use ( $pageId ) {
246						return $this->getEditsCount( $pageId, $fromRev );
247					}
248				);
249				if ( $editsCount > self::COUNT_LIMITS[$type] * 2 ) {
250					throw new LocalizedHttpException(
251						new MessageValue( 'rest-pagehistorycount-too-many-revisions' ),
252						500
253					);
254				}
255				return $this->getCachedCount( $type,
256					function ( RevisionRecord $fromRev = null ) use ( $pageId ) {
257						return $this->getMinorCount( $pageId, $fromRev );
258					}
259				);
260
261			// Sanity check
262			default:
263				throw new LocalizedHttpException(
264					new MessageValue( 'rest-pagehistorycount-type-unrecognized',
265						[ new ScalarParam( ParamType::PLAINTEXT, $type ) ]
266					),
267					500
268				);
269		}
270	}
271
272	/**
273	 * @return RevisionRecord|bool current revision or false if unable to retrieve revision
274	 */
275	private function getCurrentRevision() {
276		if ( $this->revision === null ) {
277			$title = $this->getTitle();
278			if ( $title && $title->getArticleID() ) {
279				$this->revision = $this->revisionStore->getKnownCurrentRevision( $title );
280			} else {
281				$this->revision = false;
282			}
283		}
284		return $this->revision;
285	}
286
287	/**
288	 * @return Title|bool Title or false if unable to retrieve title
289	 */
290	private function getTitle() {
291		if ( $this->titleObject === null ) {
292			$this->titleObject = Title::newFromText( $this->getValidatedParams()['title'] );
293		}
294		return $this->titleObject;
295	}
296
297	/**
298	 * Returns latest of 2 timestamps:
299	 * 1. Current revision
300	 * 2. OR entry from the DB logging table for the given page
301	 * @return int|null
302	 */
303	protected function getLastModified() {
304		$lastModifiedTimes = $this->getLastModifiedTimes();
305		if ( $lastModifiedTimes ) {
306			return max( array_values( $lastModifiedTimes ) );
307		}
308	}
309
310	/**
311	 * Returns array with 2 timestamps:
312	 * 1. Current revision
313	 * 2. OR entry from the DB logging table for the given page
314	 * @return array
315	 */
316	protected function getLastModifiedTimes() {
317		$currentRev = $this->getCurrentRevision();
318		if ( !$currentRev ) {
319			return null;
320		}
321		if ( $this->lastModifiedTimes === null ) {
322			$currentRevTime = (int)wfTimestampOrNull( TS_UNIX, $currentRev->getTimestamp() );
323			$loggingTableTime = $this->loggingTableTime( $currentRev->getPageId() );
324			$this->lastModifiedTimes = [
325				'currentRevTS' => $currentRevTime,
326				'dependencyModTS' => $loggingTableTime
327			];
328		}
329		return $this->lastModifiedTimes;
330	}
331
332	/**
333	 * Return timestamp of latest entry in logging table for given page id
334	 * @param int $pageId
335	 * @return int|null
336	 */
337	private function loggingTableTime( $pageId ) {
338		$res = $this->loadBalancer->getConnectionRef( DB_REPLICA )->selectField(
339			'logging',
340			'MAX(log_timestamp)',
341			[ 'log_page' => $pageId ],
342			__METHOD__
343		);
344		return $res ? (int)wfTimestamp( TS_UNIX, $res ) : null;
345	}
346
347	/**
348	 * Choosing to not implement etags in this handler.
349	 * Generating an etag when getting revision counts must account for things like visibility settings
350	 * (e.g. rev_deleted bit) which requires hitting the database anyway. The response for these
351	 * requests are so small that we wouldn't be gaining much efficiency.
352	 * Etags are strong validators and if provided would take precendence over
353	 * last modified time, a weak validator. We want to ensure only last modified time is used
354	 * since it is more efficient than using etags for this particular case.
355	 * @return null
356	 */
357	protected function getEtag() {
358		return null;
359	}
360
361	/**
362	 * @param string $type
363	 * @param callable $fetchCount
364	 * @return int
365	 */
366	private function getCachedCount( $type,
367		callable $fetchCount
368	) {
369		$titleObj = $this->getTitle();
370		$pageId = $titleObj->getArticleID();
371		return $this->cache->getWithSetCallback(
372			$this->cache->makeKey( 'rest', 'pagehistorycount', $pageId, $type ),
373			WANObjectCache::TTL_WEEK,
374			function ( $oldValue ) use ( $fetchCount ) {
375				$currentRev = $this->getCurrentRevision();
376				if ( $oldValue ) {
377					// Last modified timestamp was NOT a dependency change (e.g. revdel)
378					$doIncrementalUpdate = (
379						$this->getLastModified() != $this->getLastModifiedTimes()['dependencyModTS']
380					);
381					if ( $doIncrementalUpdate ) {
382						$rev = $this->revisionStore->getRevisionById( $oldValue['revision'] );
383						if ( $rev ) {
384							$additionalCount = $fetchCount( $rev );
385							return [
386								'revision' => $currentRev->getId(),
387								'count' => $oldValue['count'] + $additionalCount,
388								'dependencyModTS' => $this->getLastModifiedTimes()['dependencyModTS']
389							];
390						}
391					}
392				}
393				// Nothing was previously stored, or incremental update was done for too long,
394				// recalculate from scratch.
395				return [
396					'revision' => $currentRev->getId(),
397					'count' => $fetchCount(),
398					'dependencyModTS' => $this->getLastModifiedTimes()['dependencyModTS']
399				];
400			},
401			[
402				'touchedCallback' => function (){
403					return $this->getLastModified();
404				},
405				'version' => 2,
406				'lockTSE' => WANObjectCache::TTL_MINUTE * 5
407			]
408		)['count'];
409	}
410
411	/**
412	 * @param int $pageId the id of the page to load history for
413	 * @param RevisionRecord|null $fromRev
414	 * @return int the count
415	 */
416	protected function getAnonCount( $pageId, RevisionRecord $fromRev = null ) {
417		$dbr = $this->loadBalancer->getConnectionRef( DB_REPLICA );
418
419		$cond = [
420			'rev_page' => $pageId,
421			'actor_user IS NULL',
422			$dbr->bitAnd( 'rev_deleted',
423				RevisionRecord::DELETED_TEXT | RevisionRecord::DELETED_USER ) . " = 0"
424		];
425
426		if ( $fromRev ) {
427			$oldTs = $dbr->addQuotes( $dbr->timestamp( $fromRev->getTimestamp() ) );
428			$cond[] = "(rev_timestamp = {$oldTs} AND rev_id > {$fromRev->getId()}) " .
429				"OR rev_timestamp > {$oldTs}";
430		}
431
432		$edits = $dbr->selectRowCount(
433			[
434				'revision_actor_temp',
435				'revision',
436				'actor'
437			],
438			'1',
439			$cond,
440			__METHOD__,
441			[ 'LIMIT' => self::COUNT_LIMITS['anonymous'] + 1 ], // extra to detect truncation
442			[
443				'revision' => [
444					'JOIN',
445					'revactor_rev = rev_id AND revactor_page = rev_page'
446				],
447				'actor' => [
448					'JOIN',
449					'revactor_actor = actor_id'
450				]
451			]
452		);
453		return $edits;
454	}
455
456	/**
457	 * @param int $pageId the id of the page to load history for
458	 * @param RevisionRecord|null $fromRev
459	 * @return int the count
460	 */
461	protected function getBotCount( $pageId, RevisionRecord $fromRev = null ) {
462		$dbr = $this->loadBalancer->getConnectionRef( DB_REPLICA );
463
464		$cond = [
465			'rev_page=' . intval( $pageId ),
466			$dbr->bitAnd( 'rev_deleted',
467				RevisionRecord::DELETED_TEXT | RevisionRecord::DELETED_USER ) . " = 0",
468			'EXISTS(' .
469				$dbr->selectSQLText(
470					'user_groups',
471					'1',
472					[
473						'actor.actor_user = ug_user',
474						'ug_group' => $this->permissionManager->getGroupsWithPermission( 'bot' ),
475						'ug_expiry IS NULL OR ug_expiry >= ' . $dbr->addQuotes( $dbr->timestamp() )
476					],
477					__METHOD__
478				) .
479			')'
480		];
481		if ( $fromRev ) {
482			$oldTs = $dbr->addQuotes( $dbr->timestamp( $fromRev->getTimestamp() ) );
483			$cond[] = "(rev_timestamp = {$oldTs} AND rev_id > {$fromRev->getId()}) " .
484				"OR rev_timestamp > {$oldTs}";
485		}
486
487		$edits = $dbr->selectRowCount(
488			[
489				'revision_actor_temp',
490				'revision',
491				'actor',
492			],
493			'1',
494			$cond,
495			__METHOD__,
496			[ 'LIMIT' => self::COUNT_LIMITS['bot'] + 1 ], // extra to detect truncation
497			[
498				'revision' => [
499					'JOIN',
500					'revactor_rev = rev_id AND revactor_page = rev_page'
501				],
502				'actor' => [
503					'JOIN',
504					'revactor_actor = actor_id'
505				],
506			]
507		);
508		return $edits;
509	}
510
511	/**
512	 * @param int $pageId the id of the page to load history for
513	 * @param RevisionRecord|null $fromRev
514	 * @param RevisionRecord|null $toRev
515	 * @return int the count
516	 */
517	protected function getEditorsCount( $pageId,
518		RevisionRecord $fromRev = null,
519		RevisionRecord $toRev = null
520	) {
521		list( $fromRev, $toRev ) = $this->orderRevisions( $fromRev, $toRev );
522		return $this->revisionStore->countAuthorsBetween( $pageId, $fromRev,
523			$toRev, $this->user, self::COUNT_LIMITS['editors'] );
524	}
525
526	/**
527	 * @param int $pageId the id of the page to load history for
528	 * @param RevisionRecord|null $fromRev
529	 * @return int the count
530	 */
531	protected function getRevertedCount( $pageId, RevisionRecord $fromRev = null ) {
532		$tagIds = [];
533
534		foreach ( self::REVERTED_TAG_NAMES as $tagName ) {
535			try {
536				$tagIds[] = $this->changeTagDefStore->getId( $tagName );
537			} catch ( NameTableAccessException $e ) {
538				// If no revisions are tagged with a name, no tag id will be present
539			}
540		}
541		if ( !$tagIds ) {
542			return 0;
543		}
544
545		$dbr = $this->loadBalancer->getConnectionRef( DB_REPLICA );
546
547		$cond = [
548			'rev_page' => $pageId,
549			$dbr->bitAnd( 'rev_deleted', RevisionRecord::DELETED_TEXT ) . " = 0"
550		];
551		if ( $fromRev ) {
552			$oldTs = $dbr->addQuotes( $dbr->timestamp( $fromRev->getTimestamp() ) );
553			$cond[] = "(rev_timestamp = {$oldTs} AND rev_id > {$fromRev->getId()}) " .
554				"OR rev_timestamp > {$oldTs}";
555		}
556		$edits = $dbr->selectRowCount(
557			[
558				'revision',
559				'change_tag'
560			],
561			'1',
562			[ 'rev_page' => $pageId ],
563			__METHOD__,
564			[
565				'LIMIT' => self::COUNT_LIMITS['reverted'] + 1, // extra to detect truncation
566				'GROUP BY' => 'rev_id'
567			],
568			[
569				'change_tag' => [
570					'JOIN',
571					[
572						'ct_rev_id = rev_id',
573						'ct_tag_id' => $tagIds,
574					]
575				],
576			]
577		);
578		return $edits;
579	}
580
581	/**
582	 * @param int $pageId the id of the page to load history for
583	 * @param RevisionRecord|null $fromRev
584	 * @return int the count
585	 */
586	protected function getMinorCount( $pageId, RevisionRecord $fromRev = null ) {
587		$dbr = $this->loadBalancer->getConnectionRef( DB_REPLICA );
588		$cond = [
589			'rev_page' => $pageId,
590			'rev_minor_edit != 0',
591			$dbr->bitAnd( 'rev_deleted', RevisionRecord::DELETED_TEXT ) . " = 0"
592		];
593		if ( $fromRev ) {
594			$oldTs = $dbr->addQuotes( $dbr->timestamp( $fromRev->getTimestamp() ) );
595			$cond[] = "(rev_timestamp = {$oldTs} AND rev_id > {$fromRev->getId()}) " .
596				"OR rev_timestamp > {$oldTs}";
597		}
598		$edits = $dbr->selectRowCount( 'revision', '1',
599			$cond,
600			__METHOD__,
601			[ 'LIMIT' => self::COUNT_LIMITS['minor'] + 1 ] // extra to detect truncation
602		);
603
604		return $edits;
605	}
606
607	/**
608	 * @param int $pageId the id of the page to load history for
609	 * @param RevisionRecord|null $fromRev
610	 * @param RevisionRecord|null $toRev
611	 * @return int the count
612	 */
613	protected function getEditsCount(
614		$pageId,
615		RevisionRecord $fromRev = null,
616		RevisionRecord $toRev = null
617	) {
618		list( $fromRev, $toRev ) = $this->orderRevisions( $fromRev, $toRev );
619		return $this->revisionStore->countRevisionsBetween(
620			$pageId,
621			$fromRev,
622			$toRev,
623			self::COUNT_LIMITS['edits'] // Will be increased by 1 to detect truncation
624		);
625	}
626
627	/**
628	 * @param int $revId
629	 * @return RevisionRecord
630	 * @throws LocalizedHttpException
631	 */
632	private function getRevisionOrThrow( $revId ) {
633		$rev = $this->revisionStore->getRevisionById( $revId );
634		if ( !$rev ) {
635			throw new LocalizedHttpException(
636				new MessageValue( 'rest-nonexistent-revision', [ $revId ] ),
637				404
638			);
639		}
640		return $rev;
641	}
642
643	/**
644	 * Reorders revisions if they are present
645	 * @param RevisionRecord|null $fromRev
646	 * @param RevisionRecord|null $toRev
647	 * @return array
648	 * @phan-return array{0:RevisionRecord|null,1:RevisionRecord|null}
649	 */
650	private function orderRevisions(
651		RevisionRecord $fromRev = null,
652		RevisionRecord $toRev = null
653	) {
654		if ( $fromRev && $toRev && ( $fromRev->getTimestamp() > $toRev->getTimestamp() ||
655				( $fromRev->getTimestamp() === $toRev->getTimestamp()
656					&& $fromRev->getId() > $toRev->getId() ) )
657		) {
658			return [ $toRev, $fromRev ];
659		}
660		return [ $fromRev, $toRev ];
661	}
662
663	public function needsWriteAccess() {
664		return false;
665	}
666
667	public function getParamSettings() {
668		return [
669			'title' => [
670				self::PARAM_SOURCE => 'path',
671				ParamValidator::PARAM_TYPE => 'string',
672				ParamValidator::PARAM_REQUIRED => true,
673			],
674			'type' => [
675				self::PARAM_SOURCE => 'path',
676				ParamValidator::PARAM_TYPE => array_merge(
677					array_keys( self::COUNT_LIMITS ),
678					array_keys( self::DEPRECATED_COUNT_TYPES )
679				),
680				ParamValidator::PARAM_REQUIRED => true,
681			],
682			'from' => [
683				self::PARAM_SOURCE => 'query',
684				ParamValidator::PARAM_TYPE => 'integer',
685				ParamValidator::PARAM_REQUIRED => false
686			],
687			'to' => [
688				self::PARAM_SOURCE => 'query',
689				ParamValidator::PARAM_TYPE => 'integer',
690				ParamValidator::PARAM_REQUIRED => false
691			]
692		];
693	}
694}
695