1<?php 2 3namespace MediaWiki\Rest\Handler; 4 5use ChangeTags; 6use MediaWiki\Permissions\PermissionManager; 7use MediaWiki\Rest\LocalizedHttpException; 8use MediaWiki\Rest\Response; 9use MediaWiki\Rest\SimpleHandler; 10use MediaWiki\Revision\RevisionRecord; 11use MediaWiki\Revision\RevisionStore; 12use MediaWiki\Storage\NameTableAccessException; 13use MediaWiki\Storage\NameTableStore; 14use MediaWiki\Storage\NameTableStoreFactory; 15use Title; 16use WANObjectCache; 17use Wikimedia\Message\MessageValue; 18use Wikimedia\Message\ParamType; 19use Wikimedia\Message\ScalarParam; 20use Wikimedia\ParamValidator\ParamValidator; 21use Wikimedia\Rdbms\ILoadBalancer; 22 23/** 24 * Handler class for Core REST API endpoints that perform operations on revisions 25 */ 26class PageHistoryCountHandler extends SimpleHandler { 27 /** The maximum number of counts to return per type of revision */ 28 private const COUNT_LIMITS = [ 29 'anonymous' => 10000, 30 'bot' => 10000, 31 'editors' => 25000, 32 'edits' => 30000, 33 'minor' => 1000, 34 'reverted' => 30000 35 ]; 36 37 private const DEPRECATED_COUNT_TYPES = [ 38 'anonedits' => 'anonymous', 39 'botedits' => 'bot', 40 'revertededits' => 'reverted' 41 ]; 42 43 private const MAX_AGE_200 = 60; 44 45 /** @var RevisionStore */ 46 private $revisionStore; 47 48 /** @var NameTableStore */ 49 private $changeTagDefStore; 50 51 /** @var PermissionManager */ 52 private $permissionManager; 53 54 /** @var ILoadBalancer */ 55 private $loadBalancer; 56 57 /** @var WANObjectCache */ 58 private $cache; 59 60 /** @var RevisionRecord|bool */ 61 private $revision; 62 63 /** @var array */ 64 private $lastModifiedTimes; 65 66 /** @var Title */ 67 private $titleObject; 68 69 /** 70 * @param RevisionStore $revisionStore 71 * @param NameTableStoreFactory $nameTableStoreFactory 72 * @param PermissionManager $permissionManager 73 * @param ILoadBalancer $loadBalancer 74 * @param WANObjectCache $cache 75 */ 76 public function __construct( 77 RevisionStore $revisionStore, 78 NameTableStoreFactory $nameTableStoreFactory, 79 PermissionManager $permissionManager, 80 ILoadBalancer $loadBalancer, 81 WANObjectCache $cache 82 ) { 83 $this->revisionStore = $revisionStore; 84 $this->changeTagDefStore = $nameTableStoreFactory->getChangeTagDef(); 85 $this->permissionManager = $permissionManager; 86 $this->loadBalancer = $loadBalancer; 87 $this->cache = $cache; 88 } 89 90 private function normalizeType( $type ) { 91 return self::DEPRECATED_COUNT_TYPES[$type] ?? $type; 92 } 93 94 /** 95 * Validates that the provided parameter combination is supported. 96 * 97 * @param string $type 98 * @throws LocalizedHttpException 99 */ 100 private function validateParameterCombination( $type ) { 101 $params = $this->getValidatedParams(); 102 if ( !$params ) { 103 return; 104 } 105 106 if ( $params['from'] || $params['to'] ) { 107 if ( $type === 'edits' || $type === 'editors' ) { 108 if ( !$params['from'] || !$params['to'] ) { 109 throw new LocalizedHttpException( 110 new MessageValue( 'rest-pagehistorycount-parameters-invalid' ), 111 400 112 ); 113 } 114 } else { 115 throw new LocalizedHttpException( 116 new MessageValue( 'rest-pagehistorycount-parameters-invalid' ), 117 400 118 ); 119 } 120 } 121 } 122 123 /** 124 * @param Title $title the title of the page to load history for 125 * @param string $type the validated count type 126 * @return Response 127 * @throws LocalizedHttpException 128 */ 129 public function run( $title, $type ) { 130 $normalizedType = $this->normalizeType( $type ); 131 $this->validateParameterCombination( $normalizedType ); 132 $titleObj = $this->getTitle(); 133 if ( !$titleObj || !$titleObj->getArticleID() ) { 134 throw new LocalizedHttpException( 135 new MessageValue( 'rest-nonexistent-title', 136 [ new ScalarParam( ParamType::PLAINTEXT, $title ) ] 137 ), 138 404 139 ); 140 } 141 142 if ( !$this->getAuthority()->authorizeRead( 'read', $titleObj ) ) { 143 throw new LocalizedHttpException( 144 new MessageValue( 'rest-permission-denied-title', 145 [ new ScalarParam( ParamType::PLAINTEXT, $title ) ] 146 ), 147 403 148 ); 149 } 150 151 $count = $this->getCount( $normalizedType ); 152 $countLimit = self::COUNT_LIMITS[$normalizedType]; 153 $response = $this->getResponseFactory()->createJson( [ 154 'count' => $count > $countLimit ? $countLimit : $count, 155 'limit' => $count > $countLimit 156 ] ); 157 $response->setHeader( 'Cache-Control', 'max-age=' . self::MAX_AGE_200 ); 158 159 // Inform clients who use a deprecated "type" value, so they can adjust 160 if ( isset( self::DEPRECATED_COUNT_TYPES[$type] ) ) { 161 $docs = '<https://www.mediawiki.org/wiki/API:REST/History_API' . 162 '#Get_page_history_counts>; rel="deprecation"'; 163 $response->setHeader( 'Deprecation', 'version="v1"' ); 164 $response->setHeader( 'Link', $docs ); 165 } 166 167 return $response; 168 } 169 170 /** 171 * @param string $type the validated count type 172 * @return int the article count 173 * @throws LocalizedHttpException 174 */ 175 private function getCount( $type ) { 176 $pageId = $this->getTitle()->getArticleID(); 177 switch ( $type ) { 178 case 'anonymous': 179 return $this->getCachedCount( $type, 180 function ( RevisionRecord $fromRev = null ) use ( $pageId ) { 181 return $this->getAnonCount( $pageId, $fromRev ); 182 } 183 ); 184 185 case 'bot': 186 return $this->getCachedCount( $type, 187 function ( RevisionRecord $fromRev = null ) use ( $pageId ) { 188 return $this->getBotCount( $pageId, $fromRev ); 189 } 190 ); 191 192 case 'editors': 193 $from = $this->getValidatedParams()['from'] ?? null; 194 $to = $this->getValidatedParams()['to'] ?? null; 195 if ( $from || $to ) { 196 return $this->getEditorsCount( 197 $pageId, 198 $from ? $this->getRevisionOrThrow( $from ) : null, 199 $to ? $this->getRevisionOrThrow( $to ) : null 200 ); 201 } else { 202 return $this->getCachedCount( $type, 203 function ( RevisionRecord $fromRev = null ) use ( $pageId ) { 204 return $this->getEditorsCount( $pageId, $fromRev ); 205 } ); 206 } 207 208 case 'edits': 209 $from = $this->getValidatedParams()['from'] ?? null; 210 $to = $this->getValidatedParams()['to'] ?? null; 211 if ( $from || $to ) { 212 return $this->getEditsCount( 213 $pageId, 214 $from ? $this->getRevisionOrThrow( $from ) : null, 215 $to ? $this->getRevisionOrThrow( $to ) : null 216 ); 217 } else { 218 return $this->getCachedCount( $type, 219 function ( RevisionRecord $fromRev = null ) use ( $pageId ) { 220 return $this->getEditsCount( $pageId, $fromRev ); 221 } 222 ); 223 } 224 225 case 'reverted': 226 return $this->getCachedCount( $type, 227 function ( RevisionRecord $fromRev = null ) use ( $pageId ) { 228 return $this->getRevertedCount( $pageId, $fromRev ); 229 } 230 ); 231 232 case 'minor': 233 // The query for minor counts is inefficient for the database for pages with many revisions. 234 // If the specified title contains more revisions than allowed, we will return an error. 235 $editsCount = $this->getCachedCount( 'edits', 236 function ( RevisionRecord $fromRev = null ) use ( $pageId ) { 237 return $this->getEditsCount( $pageId, $fromRev ); 238 } 239 ); 240 if ( $editsCount > self::COUNT_LIMITS[$type] * 2 ) { 241 throw new LocalizedHttpException( 242 new MessageValue( 'rest-pagehistorycount-too-many-revisions' ), 243 500 244 ); 245 } 246 return $this->getCachedCount( $type, 247 function ( RevisionRecord $fromRev = null ) use ( $pageId ) { 248 return $this->getMinorCount( $pageId, $fromRev ); 249 } 250 ); 251 252 // Sanity check 253 default: 254 throw new LocalizedHttpException( 255 new MessageValue( 'rest-pagehistorycount-type-unrecognized', 256 [ new ScalarParam( ParamType::PLAINTEXT, $type ) ] 257 ), 258 500 259 ); 260 } 261 } 262 263 /** 264 * @return RevisionRecord|bool current revision or false if unable to retrieve revision 265 */ 266 private function getCurrentRevision() { 267 if ( $this->revision === null ) { 268 $title = $this->getTitle(); 269 if ( $title && $title->getArticleID() ) { 270 $this->revision = $this->revisionStore->getKnownCurrentRevision( $title ); 271 } else { 272 $this->revision = false; 273 } 274 } 275 return $this->revision; 276 } 277 278 /** 279 * @return Title|bool Title or false if unable to retrieve title 280 */ 281 private function getTitle() { 282 if ( $this->titleObject === null ) { 283 $this->titleObject = Title::newFromText( $this->getValidatedParams()['title'] ); 284 } 285 return $this->titleObject; 286 } 287 288 /** 289 * Returns latest of 2 timestamps: 290 * 1. Current revision 291 * 2. OR entry from the DB logging table for the given page 292 * @return int|null 293 */ 294 protected function getLastModified() { 295 $lastModifiedTimes = $this->getLastModifiedTimes(); 296 if ( $lastModifiedTimes ) { 297 return max( array_values( $lastModifiedTimes ) ); 298 } 299 } 300 301 /** 302 * Returns array with 2 timestamps: 303 * 1. Current revision 304 * 2. OR entry from the DB logging table for the given page 305 * @return array 306 */ 307 protected function getLastModifiedTimes() { 308 $currentRev = $this->getCurrentRevision(); 309 if ( !$currentRev ) { 310 return null; 311 } 312 if ( $this->lastModifiedTimes === null ) { 313 $currentRevTime = (int)wfTimestampOrNull( TS_UNIX, $currentRev->getTimestamp() ); 314 $loggingTableTime = $this->loggingTableTime( $currentRev->getPageId() ); 315 $this->lastModifiedTimes = [ 316 'currentRevTS' => $currentRevTime, 317 'dependencyModTS' => $loggingTableTime 318 ]; 319 } 320 return $this->lastModifiedTimes; 321 } 322 323 /** 324 * Return timestamp of latest entry in logging table for given page id 325 * @param int $pageId 326 * @return int|null 327 */ 328 private function loggingTableTime( $pageId ) { 329 $res = $this->loadBalancer->getConnectionRef( DB_REPLICA )->selectField( 330 'logging', 331 'MAX(log_timestamp)', 332 [ 'log_page' => $pageId ], 333 __METHOD__ 334 ); 335 return $res ? (int)wfTimestamp( TS_UNIX, $res ) : null; 336 } 337 338 /** 339 * Choosing to not implement etags in this handler. 340 * Generating an etag when getting revision counts must account for things like visibility settings 341 * (e.g. rev_deleted bit) which requires hitting the database anyway. The response for these 342 * requests are so small that we wouldn't be gaining much efficiency. 343 * Etags are strong validators and if provided would take precendence over 344 * last modified time, a weak validator. We want to ensure only last modified time is used 345 * since it is more efficient than using etags for this particular case. 346 * @return null 347 */ 348 protected function getEtag() { 349 return null; 350 } 351 352 /** 353 * @param string $type 354 * @param callable $fetchCount 355 * @return int 356 */ 357 private function getCachedCount( $type, 358 callable $fetchCount 359 ) { 360 $titleObj = $this->getTitle(); 361 $pageId = $titleObj->getArticleID(); 362 return $this->cache->getWithSetCallback( 363 $this->cache->makeKey( 'rest', 'pagehistorycount', $pageId, $type ), 364 WANObjectCache::TTL_WEEK, 365 function ( $oldValue ) use ( $fetchCount ) { 366 $currentRev = $this->getCurrentRevision(); 367 if ( $oldValue ) { 368 // Last modified timestamp was NOT a dependency change (e.g. revdel) 369 $doIncrementalUpdate = ( 370 $this->getLastModified() != $this->getLastModifiedTimes()['dependencyModTS'] 371 ); 372 if ( $doIncrementalUpdate ) { 373 $rev = $this->revisionStore->getRevisionById( $oldValue['revision'] ); 374 if ( $rev ) { 375 $additionalCount = $fetchCount( $rev ); 376 return [ 377 'revision' => $currentRev->getId(), 378 'count' => $oldValue['count'] + $additionalCount, 379 'dependencyModTS' => $this->getLastModifiedTimes()['dependencyModTS'] 380 ]; 381 } 382 } 383 } 384 // Nothing was previously stored, or incremental update was done for too long, 385 // recalculate from scratch. 386 return [ 387 'revision' => $currentRev->getId(), 388 'count' => $fetchCount(), 389 'dependencyModTS' => $this->getLastModifiedTimes()['dependencyModTS'] 390 ]; 391 }, 392 [ 393 'touchedCallback' => function (){ 394 return $this->getLastModified(); 395 }, 396 'version' => 2, 397 'lockTSE' => WANObjectCache::TTL_MINUTE * 5 398 ] 399 )['count']; 400 } 401 402 /** 403 * @param int $pageId the id of the page to load history for 404 * @param RevisionRecord|null $fromRev 405 * @return int the count 406 */ 407 protected function getAnonCount( $pageId, RevisionRecord $fromRev = null ) { 408 $dbr = $this->loadBalancer->getConnectionRef( DB_REPLICA ); 409 410 $cond = [ 411 'rev_page' => $pageId, 412 'actor_user IS NULL', 413 $dbr->bitAnd( 'rev_deleted', 414 RevisionRecord::DELETED_TEXT | RevisionRecord::DELETED_USER ) . " = 0" 415 ]; 416 417 if ( $fromRev ) { 418 $oldTs = $dbr->addQuotes( $dbr->timestamp( $fromRev->getTimestamp() ) ); 419 $cond[] = "(rev_timestamp = {$oldTs} AND rev_id > {$fromRev->getId()}) " . 420 "OR rev_timestamp > {$oldTs}"; 421 } 422 423 $edits = $dbr->selectRowCount( 424 [ 425 'revision_actor_temp', 426 'revision', 427 'actor' 428 ], 429 '1', 430 $cond, 431 __METHOD__, 432 [ 'LIMIT' => self::COUNT_LIMITS['anonymous'] + 1 ], // extra to detect truncation 433 [ 434 'revision' => [ 435 'JOIN', 436 'revactor_rev = rev_id AND revactor_page = rev_page' 437 ], 438 'actor' => [ 439 'JOIN', 440 'revactor_actor = actor_id' 441 ] 442 ] 443 ); 444 return $edits; 445 } 446 447 /** 448 * @param int $pageId the id of the page to load history for 449 * @param RevisionRecord|null $fromRev 450 * @return int the count 451 */ 452 protected function getBotCount( $pageId, RevisionRecord $fromRev = null ) { 453 $dbr = $this->loadBalancer->getConnectionRef( DB_REPLICA ); 454 455 $cond = [ 456 'rev_page=' . intval( $pageId ), 457 $dbr->bitAnd( 'rev_deleted', 458 RevisionRecord::DELETED_TEXT | RevisionRecord::DELETED_USER ) . " = 0", 459 'EXISTS(' . 460 $dbr->selectSQLText( 461 'user_groups', 462 '1', 463 [ 464 'actor.actor_user = ug_user', 465 'ug_group' => $this->permissionManager->getGroupsWithPermission( 'bot' ), 466 'ug_expiry IS NULL OR ug_expiry >= ' . $dbr->addQuotes( $dbr->timestamp() ) 467 ], 468 __METHOD__ 469 ) . 470 ')' 471 ]; 472 if ( $fromRev ) { 473 $oldTs = $dbr->addQuotes( $dbr->timestamp( $fromRev->getTimestamp() ) ); 474 $cond[] = "(rev_timestamp = {$oldTs} AND rev_id > {$fromRev->getId()}) " . 475 "OR rev_timestamp > {$oldTs}"; 476 } 477 478 $edits = $dbr->selectRowCount( 479 [ 480 'revision_actor_temp', 481 'revision', 482 'actor', 483 ], 484 '1', 485 $cond, 486 __METHOD__, 487 [ 'LIMIT' => self::COUNT_LIMITS['bot'] + 1 ], // extra to detect truncation 488 [ 489 'revision' => [ 490 'JOIN', 491 'revactor_rev = rev_id AND revactor_page = rev_page' 492 ], 493 'actor' => [ 494 'JOIN', 495 'revactor_actor = actor_id' 496 ], 497 ] 498 ); 499 return $edits; 500 } 501 502 /** 503 * @param int $pageId the id of the page to load history for 504 * @param RevisionRecord|null $fromRev 505 * @param RevisionRecord|null $toRev 506 * @return int the count 507 */ 508 protected function getEditorsCount( $pageId, 509 RevisionRecord $fromRev = null, 510 RevisionRecord $toRev = null 511 ) { 512 list( $fromRev, $toRev ) = $this->orderRevisions( $fromRev, $toRev ); 513 return $this->revisionStore->countAuthorsBetween( $pageId, $fromRev, 514 $toRev, $this->getAuthority(), self::COUNT_LIMITS['editors'] ); 515 } 516 517 /** 518 * @param int $pageId the id of the page to load history for 519 * @param RevisionRecord|null $fromRev 520 * @return int the count 521 */ 522 protected function getRevertedCount( $pageId, RevisionRecord $fromRev = null ) { 523 $tagIds = []; 524 525 foreach ( ChangeTags::REVERT_TAGS as $tagName ) { 526 try { 527 $tagIds[] = $this->changeTagDefStore->getId( $tagName ); 528 } catch ( NameTableAccessException $e ) { 529 // If no revisions are tagged with a name, no tag id will be present 530 } 531 } 532 if ( !$tagIds ) { 533 return 0; 534 } 535 536 $dbr = $this->loadBalancer->getConnectionRef( DB_REPLICA ); 537 538 $cond = [ 539 'rev_page' => $pageId, 540 $dbr->bitAnd( 'rev_deleted', RevisionRecord::DELETED_TEXT ) . " = 0" 541 ]; 542 if ( $fromRev ) { 543 $oldTs = $dbr->addQuotes( $dbr->timestamp( $fromRev->getTimestamp() ) ); 544 $cond[] = "(rev_timestamp = {$oldTs} AND rev_id > {$fromRev->getId()}) " . 545 "OR rev_timestamp > {$oldTs}"; 546 } 547 $edits = $dbr->selectRowCount( 548 [ 549 'revision', 550 'change_tag' 551 ], 552 '1', 553 [ 'rev_page' => $pageId ], 554 __METHOD__, 555 [ 556 'LIMIT' => self::COUNT_LIMITS['reverted'] + 1, // extra to detect truncation 557 'GROUP BY' => 'rev_id' 558 ], 559 [ 560 'change_tag' => [ 561 'JOIN', 562 [ 563 'ct_rev_id = rev_id', 564 'ct_tag_id' => $tagIds, 565 ] 566 ], 567 ] 568 ); 569 return $edits; 570 } 571 572 /** 573 * @param int $pageId the id of the page to load history for 574 * @param RevisionRecord|null $fromRev 575 * @return int the count 576 */ 577 protected function getMinorCount( $pageId, RevisionRecord $fromRev = null ) { 578 $dbr = $this->loadBalancer->getConnectionRef( DB_REPLICA ); 579 $cond = [ 580 'rev_page' => $pageId, 581 'rev_minor_edit != 0', 582 $dbr->bitAnd( 'rev_deleted', RevisionRecord::DELETED_TEXT ) . " = 0" 583 ]; 584 if ( $fromRev ) { 585 $oldTs = $dbr->addQuotes( $dbr->timestamp( $fromRev->getTimestamp() ) ); 586 $cond[] = "(rev_timestamp = {$oldTs} AND rev_id > {$fromRev->getId()}) " . 587 "OR rev_timestamp > {$oldTs}"; 588 } 589 $edits = $dbr->selectRowCount( 'revision', '1', 590 $cond, 591 __METHOD__, 592 [ 'LIMIT' => self::COUNT_LIMITS['minor'] + 1 ] // extra to detect truncation 593 ); 594 595 return $edits; 596 } 597 598 /** 599 * @param int $pageId the id of the page to load history for 600 * @param RevisionRecord|null $fromRev 601 * @param RevisionRecord|null $toRev 602 * @return int the count 603 */ 604 protected function getEditsCount( 605 $pageId, 606 RevisionRecord $fromRev = null, 607 RevisionRecord $toRev = null 608 ) { 609 list( $fromRev, $toRev ) = $this->orderRevisions( $fromRev, $toRev ); 610 return $this->revisionStore->countRevisionsBetween( 611 $pageId, 612 $fromRev, 613 $toRev, 614 self::COUNT_LIMITS['edits'] // Will be increased by 1 to detect truncation 615 ); 616 } 617 618 /** 619 * @param int $revId 620 * @return RevisionRecord 621 * @throws LocalizedHttpException 622 */ 623 private function getRevisionOrThrow( $revId ) { 624 $rev = $this->revisionStore->getRevisionById( $revId ); 625 if ( !$rev ) { 626 throw new LocalizedHttpException( 627 new MessageValue( 'rest-nonexistent-revision', [ $revId ] ), 628 404 629 ); 630 } 631 return $rev; 632 } 633 634 /** 635 * Reorders revisions if they are present 636 * @param RevisionRecord|null $fromRev 637 * @param RevisionRecord|null $toRev 638 * @return array 639 * @phan-return array{0:RevisionRecord|null,1:RevisionRecord|null} 640 */ 641 private function orderRevisions( 642 RevisionRecord $fromRev = null, 643 RevisionRecord $toRev = null 644 ) { 645 if ( $fromRev && $toRev && ( $fromRev->getTimestamp() > $toRev->getTimestamp() || 646 ( $fromRev->getTimestamp() === $toRev->getTimestamp() 647 && $fromRev->getId() > $toRev->getId() ) ) 648 ) { 649 return [ $toRev, $fromRev ]; 650 } 651 return [ $fromRev, $toRev ]; 652 } 653 654 public function needsWriteAccess() { 655 return false; 656 } 657 658 public function getParamSettings() { 659 return [ 660 'title' => [ 661 self::PARAM_SOURCE => 'path', 662 ParamValidator::PARAM_TYPE => 'string', 663 ParamValidator::PARAM_REQUIRED => true, 664 ], 665 'type' => [ 666 self::PARAM_SOURCE => 'path', 667 ParamValidator::PARAM_TYPE => array_merge( 668 array_keys( self::COUNT_LIMITS ), 669 array_keys( self::DEPRECATED_COUNT_TYPES ) 670 ), 671 ParamValidator::PARAM_REQUIRED => true, 672 ], 673 'from' => [ 674 self::PARAM_SOURCE => 'query', 675 ParamValidator::PARAM_TYPE => 'integer', 676 ParamValidator::PARAM_REQUIRED => false 677 ], 678 'to' => [ 679 self::PARAM_SOURCE => 'query', 680 ParamValidator::PARAM_TYPE => 'integer', 681 ParamValidator::PARAM_REQUIRED => false 682 ] 683 ]; 684 } 685} 686