1<?php 2 3namespace MediaWiki\Rest\Handler; 4 5use MediaWiki\Permissions\PermissionManager; 6use MediaWiki\Rest\LocalizedHttpException; 7use MediaWiki\Rest\Response; 8use MediaWiki\Rest\SimpleHandler; 9use MediaWiki\Revision\RevisionRecord; 10use MediaWiki\Revision\RevisionStore; 11use MediaWiki\Storage\NameTableAccessException; 12use MediaWiki\Storage\NameTableStore; 13use MediaWiki\Storage\NameTableStoreFactory; 14use RequestContext; 15use Title; 16use User; 17use WANObjectCache; 18use Wikimedia\Message\MessageValue; 19use Wikimedia\Message\ParamType; 20use Wikimedia\Message\ScalarParam; 21use Wikimedia\ParamValidator\ParamValidator; 22use Wikimedia\Rdbms\ILoadBalancer; 23 24/** 25 * Handler class for Core REST API endpoints that perform operations on revisions 26 */ 27class PageHistoryCountHandler extends SimpleHandler { 28 /** The maximum number of counts to return per type of revision */ 29 private const COUNT_LIMITS = [ 30 'anonymous' => 10000, 31 'bot' => 10000, 32 'editors' => 25000, 33 'edits' => 30000, 34 'minor' => 1000, 35 'reverted' => 30000 36 ]; 37 38 private const DEPRECATED_COUNT_TYPES = [ 39 'anonedits' => 'anonymous', 40 'botedits' => 'bot', 41 'revertededits' => 'reverted' 42 ]; 43 44 private const MAX_AGE_200 = 60; 45 46 private const REVERTED_TAG_NAMES = [ 'mw-undo', 'mw-rollback' ]; 47 48 /** @var RevisionStore */ 49 private $revisionStore; 50 51 /** @var NameTableStore */ 52 private $changeTagDefStore; 53 54 /** @var PermissionManager */ 55 private $permissionManager; 56 57 /** @var ILoadBalancer */ 58 private $loadBalancer; 59 60 /** @var WANObjectCache */ 61 private $cache; 62 63 /** @var User */ 64 private $user; 65 66 /** @var RevisionRecord|bool */ 67 private $revision; 68 69 /** @var array */ 70 private $lastModifiedTimes; 71 72 /** @var Title */ 73 private $titleObject; 74 75 /** 76 * @param RevisionStore $revisionStore 77 * @param NameTableStoreFactory $nameTableStoreFactory 78 * @param PermissionManager $permissionManager 79 * @param ILoadBalancer $loadBalancer 80 * @param WANObjectCache $cache 81 */ 82 public function __construct( 83 RevisionStore $revisionStore, 84 NameTableStoreFactory $nameTableStoreFactory, 85 PermissionManager $permissionManager, 86 ILoadBalancer $loadBalancer, 87 WANObjectCache $cache 88 ) { 89 $this->revisionStore = $revisionStore; 90 $this->changeTagDefStore = $nameTableStoreFactory->getChangeTagDef(); 91 $this->permissionManager = $permissionManager; 92 $this->loadBalancer = $loadBalancer; 93 $this->cache = $cache; 94 95 // @todo Inject this, when there is a good way to do that 96 $this->user = RequestContext::getMain()->getUser(); 97 } 98 99 private function normalizeType( $type ) { 100 return self::DEPRECATED_COUNT_TYPES[$type] ?? $type; 101 } 102 103 /** 104 * Validates that the provided parameter combination is supported. 105 * 106 * @param string $type 107 * @throws LocalizedHttpException 108 */ 109 private function validateParameterCombination( $type ) { 110 $params = $this->getValidatedParams(); 111 if ( !$params ) { 112 return; 113 } 114 115 if ( $params['from'] || $params['to'] ) { 116 if ( $type === 'edits' || $type === 'editors' ) { 117 if ( !$params['from'] || !$params['to'] ) { 118 throw new LocalizedHttpException( 119 new MessageValue( 'rest-pagehistorycount-parameters-invalid' ), 120 400 121 ); 122 } 123 } else { 124 throw new LocalizedHttpException( 125 new MessageValue( 'rest-pagehistorycount-parameters-invalid' ), 126 400 127 ); 128 } 129 } 130 } 131 132 /** 133 * @param Title $title the title of the page to load history for 134 * @param string $type the validated count type 135 * @return Response 136 * @throws LocalizedHttpException 137 */ 138 public function run( $title, $type ) { 139 $normalizedType = $this->normalizeType( $type ); 140 $this->validateParameterCombination( $normalizedType ); 141 $titleObj = $this->getTitle(); 142 if ( !$titleObj || !$titleObj->getArticleID() ) { 143 throw new LocalizedHttpException( 144 new MessageValue( 'rest-nonexistent-title', 145 [ new ScalarParam( ParamType::PLAINTEXT, $title ) ] 146 ), 147 404 148 ); 149 } 150 151 if ( !$this->permissionManager->userCan( 'read', $this->user, $titleObj ) ) { 152 throw new LocalizedHttpException( 153 new MessageValue( 'rest-permission-denied-title', 154 [ new ScalarParam( ParamType::PLAINTEXT, $title ) ] 155 ), 156 403 157 ); 158 } 159 160 $count = $this->getCount( $normalizedType ); 161 $countLimit = self::COUNT_LIMITS[$normalizedType]; 162 $response = $this->getResponseFactory()->createJson( [ 163 'count' => $count > $countLimit ? $countLimit : $count, 164 'limit' => $count > $countLimit 165 ] ); 166 $response->setHeader( 'Cache-Control', 'max-age=' . self::MAX_AGE_200 ); 167 168 // Inform clients who use a deprecated "type" value, so they can adjust 169 if ( isset( self::DEPRECATED_COUNT_TYPES[$type] ) ) { 170 $docs = '<https://www.mediawiki.org/wiki/API:REST/History_API' . 171 '#Get_page_history_counts>; rel="deprecation"'; 172 $response->setHeader( 'Deprecation', 'version="v1"' ); 173 $response->setHeader( 'Link', $docs ); 174 } 175 176 return $response; 177 } 178 179 /** 180 * @param string $type the validated count type 181 * @return int the article count 182 * @throws LocalizedHttpException 183 */ 184 private function getCount( $type ) { 185 $pageId = $this->getTitle()->getArticleID(); 186 switch ( $type ) { 187 case 'anonymous': 188 return $this->getCachedCount( $type, 189 function ( RevisionRecord $fromRev = null ) use ( $pageId ) { 190 return $this->getAnonCount( $pageId, $fromRev ); 191 } 192 ); 193 194 case 'bot': 195 return $this->getCachedCount( $type, 196 function ( RevisionRecord $fromRev = null ) use ( $pageId ) { 197 return $this->getBotCount( $pageId, $fromRev ); 198 } 199 ); 200 201 case 'editors': 202 $from = $this->getValidatedParams()['from'] ?? null; 203 $to = $this->getValidatedParams()['to'] ?? null; 204 if ( $from || $to ) { 205 return $this->getEditorsCount( 206 $pageId, 207 $from ? $this->getRevisionOrThrow( $from ) : null, 208 $to ? $this->getRevisionOrThrow( $to ) : null 209 ); 210 } else { 211 return $this->getCachedCount( $type, 212 function ( RevisionRecord $fromRev = null ) use ( $pageId ) { 213 return $this->getEditorsCount( $pageId, $fromRev ); 214 } ); 215 } 216 217 case 'edits': 218 $from = $this->getValidatedParams()['from'] ?? null; 219 $to = $this->getValidatedParams()['to'] ?? null; 220 if ( $from || $to ) { 221 return $this->getEditsCount( 222 $pageId, 223 $from ? $this->getRevisionOrThrow( $from ) : null, 224 $to ? $this->getRevisionOrThrow( $to ) : null 225 ); 226 } else { 227 return $this->getCachedCount( $type, 228 function ( RevisionRecord $fromRev = null ) use ( $pageId ) { 229 return $this->getEditsCount( $pageId, $fromRev ); 230 } 231 ); 232 } 233 234 case 'reverted': 235 return $this->getCachedCount( $type, 236 function ( RevisionRecord $fromRev = null ) use ( $pageId ) { 237 return $this->getRevertedCount( $pageId, $fromRev ); 238 } 239 ); 240 241 case 'minor': 242 // The query for minor counts is inefficient for the database for pages with many revisions. 243 // If the specified title contains more revisions than allowed, we will return an error. 244 $editsCount = $this->getCachedCount( 'edits', 245 function ( RevisionRecord $fromRev = null ) use ( $pageId ) { 246 return $this->getEditsCount( $pageId, $fromRev ); 247 } 248 ); 249 if ( $editsCount > self::COUNT_LIMITS[$type] * 2 ) { 250 throw new LocalizedHttpException( 251 new MessageValue( 'rest-pagehistorycount-too-many-revisions' ), 252 500 253 ); 254 } 255 return $this->getCachedCount( $type, 256 function ( RevisionRecord $fromRev = null ) use ( $pageId ) { 257 return $this->getMinorCount( $pageId, $fromRev ); 258 } 259 ); 260 261 // Sanity check 262 default: 263 throw new LocalizedHttpException( 264 new MessageValue( 'rest-pagehistorycount-type-unrecognized', 265 [ new ScalarParam( ParamType::PLAINTEXT, $type ) ] 266 ), 267 500 268 ); 269 } 270 } 271 272 /** 273 * @return RevisionRecord|bool current revision or false if unable to retrieve revision 274 */ 275 private function getCurrentRevision() { 276 if ( $this->revision === null ) { 277 $title = $this->getTitle(); 278 if ( $title && $title->getArticleID() ) { 279 $this->revision = $this->revisionStore->getKnownCurrentRevision( $title ); 280 } else { 281 $this->revision = false; 282 } 283 } 284 return $this->revision; 285 } 286 287 /** 288 * @return Title|bool Title or false if unable to retrieve title 289 */ 290 private function getTitle() { 291 if ( $this->titleObject === null ) { 292 $this->titleObject = Title::newFromText( $this->getValidatedParams()['title'] ); 293 } 294 return $this->titleObject; 295 } 296 297 /** 298 * Returns latest of 2 timestamps: 299 * 1. Current revision 300 * 2. OR entry from the DB logging table for the given page 301 * @return int|null 302 */ 303 protected function getLastModified() { 304 $lastModifiedTimes = $this->getLastModifiedTimes(); 305 if ( $lastModifiedTimes ) { 306 return max( array_values( $lastModifiedTimes ) ); 307 } 308 } 309 310 /** 311 * Returns array with 2 timestamps: 312 * 1. Current revision 313 * 2. OR entry from the DB logging table for the given page 314 * @return array 315 */ 316 protected function getLastModifiedTimes() { 317 $currentRev = $this->getCurrentRevision(); 318 if ( !$currentRev ) { 319 return null; 320 } 321 if ( $this->lastModifiedTimes === null ) { 322 $currentRevTime = (int)wfTimestampOrNull( TS_UNIX, $currentRev->getTimestamp() ); 323 $loggingTableTime = $this->loggingTableTime( $currentRev->getPageId() ); 324 $this->lastModifiedTimes = [ 325 'currentRevTS' => $currentRevTime, 326 'dependencyModTS' => $loggingTableTime 327 ]; 328 } 329 return $this->lastModifiedTimes; 330 } 331 332 /** 333 * Return timestamp of latest entry in logging table for given page id 334 * @param int $pageId 335 * @return int|null 336 */ 337 private function loggingTableTime( $pageId ) { 338 $res = $this->loadBalancer->getConnectionRef( DB_REPLICA )->selectField( 339 'logging', 340 'MAX(log_timestamp)', 341 [ 'log_page' => $pageId ], 342 __METHOD__ 343 ); 344 return $res ? (int)wfTimestamp( TS_UNIX, $res ) : null; 345 } 346 347 /** 348 * Choosing to not implement etags in this handler. 349 * Generating an etag when getting revision counts must account for things like visibility settings 350 * (e.g. rev_deleted bit) which requires hitting the database anyway. The response for these 351 * requests are so small that we wouldn't be gaining much efficiency. 352 * Etags are strong validators and if provided would take precendence over 353 * last modified time, a weak validator. We want to ensure only last modified time is used 354 * since it is more efficient than using etags for this particular case. 355 * @return null 356 */ 357 protected function getEtag() { 358 return null; 359 } 360 361 /** 362 * @param string $type 363 * @param callable $fetchCount 364 * @return int 365 */ 366 private function getCachedCount( $type, 367 callable $fetchCount 368 ) { 369 $titleObj = $this->getTitle(); 370 $pageId = $titleObj->getArticleID(); 371 return $this->cache->getWithSetCallback( 372 $this->cache->makeKey( 'rest', 'pagehistorycount', $pageId, $type ), 373 WANObjectCache::TTL_WEEK, 374 function ( $oldValue ) use ( $fetchCount ) { 375 $currentRev = $this->getCurrentRevision(); 376 if ( $oldValue ) { 377 // Last modified timestamp was NOT a dependency change (e.g. revdel) 378 $doIncrementalUpdate = ( 379 $this->getLastModified() != $this->getLastModifiedTimes()['dependencyModTS'] 380 ); 381 if ( $doIncrementalUpdate ) { 382 $rev = $this->revisionStore->getRevisionById( $oldValue['revision'] ); 383 if ( $rev ) { 384 $additionalCount = $fetchCount( $rev ); 385 return [ 386 'revision' => $currentRev->getId(), 387 'count' => $oldValue['count'] + $additionalCount, 388 'dependencyModTS' => $this->getLastModifiedTimes()['dependencyModTS'] 389 ]; 390 } 391 } 392 } 393 // Nothing was previously stored, or incremental update was done for too long, 394 // recalculate from scratch. 395 return [ 396 'revision' => $currentRev->getId(), 397 'count' => $fetchCount(), 398 'dependencyModTS' => $this->getLastModifiedTimes()['dependencyModTS'] 399 ]; 400 }, 401 [ 402 'touchedCallback' => function (){ 403 return $this->getLastModified(); 404 }, 405 'version' => 2, 406 'lockTSE' => WANObjectCache::TTL_MINUTE * 5 407 ] 408 )['count']; 409 } 410 411 /** 412 * @param int $pageId the id of the page to load history for 413 * @param RevisionRecord|null $fromRev 414 * @return int the count 415 */ 416 protected function getAnonCount( $pageId, RevisionRecord $fromRev = null ) { 417 $dbr = $this->loadBalancer->getConnectionRef( DB_REPLICA ); 418 419 $cond = [ 420 'rev_page' => $pageId, 421 'actor_user IS NULL', 422 $dbr->bitAnd( 'rev_deleted', 423 RevisionRecord::DELETED_TEXT | RevisionRecord::DELETED_USER ) . " = 0" 424 ]; 425 426 if ( $fromRev ) { 427 $oldTs = $dbr->addQuotes( $dbr->timestamp( $fromRev->getTimestamp() ) ); 428 $cond[] = "(rev_timestamp = {$oldTs} AND rev_id > {$fromRev->getId()}) " . 429 "OR rev_timestamp > {$oldTs}"; 430 } 431 432 $edits = $dbr->selectRowCount( 433 [ 434 'revision_actor_temp', 435 'revision', 436 'actor' 437 ], 438 '1', 439 $cond, 440 __METHOD__, 441 [ 'LIMIT' => self::COUNT_LIMITS['anonymous'] + 1 ], // extra to detect truncation 442 [ 443 'revision' => [ 444 'JOIN', 445 'revactor_rev = rev_id AND revactor_page = rev_page' 446 ], 447 'actor' => [ 448 'JOIN', 449 'revactor_actor = actor_id' 450 ] 451 ] 452 ); 453 return $edits; 454 } 455 456 /** 457 * @param int $pageId the id of the page to load history for 458 * @param RevisionRecord|null $fromRev 459 * @return int the count 460 */ 461 protected function getBotCount( $pageId, RevisionRecord $fromRev = null ) { 462 $dbr = $this->loadBalancer->getConnectionRef( DB_REPLICA ); 463 464 $cond = [ 465 'rev_page=' . intval( $pageId ), 466 $dbr->bitAnd( 'rev_deleted', 467 RevisionRecord::DELETED_TEXT | RevisionRecord::DELETED_USER ) . " = 0", 468 'EXISTS(' . 469 $dbr->selectSQLText( 470 'user_groups', 471 '1', 472 [ 473 'actor.actor_user = ug_user', 474 'ug_group' => $this->permissionManager->getGroupsWithPermission( 'bot' ), 475 'ug_expiry IS NULL OR ug_expiry >= ' . $dbr->addQuotes( $dbr->timestamp() ) 476 ], 477 __METHOD__ 478 ) . 479 ')' 480 ]; 481 if ( $fromRev ) { 482 $oldTs = $dbr->addQuotes( $dbr->timestamp( $fromRev->getTimestamp() ) ); 483 $cond[] = "(rev_timestamp = {$oldTs} AND rev_id > {$fromRev->getId()}) " . 484 "OR rev_timestamp > {$oldTs}"; 485 } 486 487 $edits = $dbr->selectRowCount( 488 [ 489 'revision_actor_temp', 490 'revision', 491 'actor', 492 ], 493 '1', 494 $cond, 495 __METHOD__, 496 [ 'LIMIT' => self::COUNT_LIMITS['bot'] + 1 ], // extra to detect truncation 497 [ 498 'revision' => [ 499 'JOIN', 500 'revactor_rev = rev_id AND revactor_page = rev_page' 501 ], 502 'actor' => [ 503 'JOIN', 504 'revactor_actor = actor_id' 505 ], 506 ] 507 ); 508 return $edits; 509 } 510 511 /** 512 * @param int $pageId the id of the page to load history for 513 * @param RevisionRecord|null $fromRev 514 * @param RevisionRecord|null $toRev 515 * @return int the count 516 */ 517 protected function getEditorsCount( $pageId, 518 RevisionRecord $fromRev = null, 519 RevisionRecord $toRev = null 520 ) { 521 list( $fromRev, $toRev ) = $this->orderRevisions( $fromRev, $toRev ); 522 return $this->revisionStore->countAuthorsBetween( $pageId, $fromRev, 523 $toRev, $this->user, self::COUNT_LIMITS['editors'] ); 524 } 525 526 /** 527 * @param int $pageId the id of the page to load history for 528 * @param RevisionRecord|null $fromRev 529 * @return int the count 530 */ 531 protected function getRevertedCount( $pageId, RevisionRecord $fromRev = null ) { 532 $tagIds = []; 533 534 foreach ( self::REVERTED_TAG_NAMES as $tagName ) { 535 try { 536 $tagIds[] = $this->changeTagDefStore->getId( $tagName ); 537 } catch ( NameTableAccessException $e ) { 538 // If no revisions are tagged with a name, no tag id will be present 539 } 540 } 541 if ( !$tagIds ) { 542 return 0; 543 } 544 545 $dbr = $this->loadBalancer->getConnectionRef( DB_REPLICA ); 546 547 $cond = [ 548 'rev_page' => $pageId, 549 $dbr->bitAnd( 'rev_deleted', RevisionRecord::DELETED_TEXT ) . " = 0" 550 ]; 551 if ( $fromRev ) { 552 $oldTs = $dbr->addQuotes( $dbr->timestamp( $fromRev->getTimestamp() ) ); 553 $cond[] = "(rev_timestamp = {$oldTs} AND rev_id > {$fromRev->getId()}) " . 554 "OR rev_timestamp > {$oldTs}"; 555 } 556 $edits = $dbr->selectRowCount( 557 [ 558 'revision', 559 'change_tag' 560 ], 561 '1', 562 [ 'rev_page' => $pageId ], 563 __METHOD__, 564 [ 565 'LIMIT' => self::COUNT_LIMITS['reverted'] + 1, // extra to detect truncation 566 'GROUP BY' => 'rev_id' 567 ], 568 [ 569 'change_tag' => [ 570 'JOIN', 571 [ 572 'ct_rev_id = rev_id', 573 'ct_tag_id' => $tagIds, 574 ] 575 ], 576 ] 577 ); 578 return $edits; 579 } 580 581 /** 582 * @param int $pageId the id of the page to load history for 583 * @param RevisionRecord|null $fromRev 584 * @return int the count 585 */ 586 protected function getMinorCount( $pageId, RevisionRecord $fromRev = null ) { 587 $dbr = $this->loadBalancer->getConnectionRef( DB_REPLICA ); 588 $cond = [ 589 'rev_page' => $pageId, 590 'rev_minor_edit != 0', 591 $dbr->bitAnd( 'rev_deleted', RevisionRecord::DELETED_TEXT ) . " = 0" 592 ]; 593 if ( $fromRev ) { 594 $oldTs = $dbr->addQuotes( $dbr->timestamp( $fromRev->getTimestamp() ) ); 595 $cond[] = "(rev_timestamp = {$oldTs} AND rev_id > {$fromRev->getId()}) " . 596 "OR rev_timestamp > {$oldTs}"; 597 } 598 $edits = $dbr->selectRowCount( 'revision', '1', 599 $cond, 600 __METHOD__, 601 [ 'LIMIT' => self::COUNT_LIMITS['minor'] + 1 ] // extra to detect truncation 602 ); 603 604 return $edits; 605 } 606 607 /** 608 * @param int $pageId the id of the page to load history for 609 * @param RevisionRecord|null $fromRev 610 * @param RevisionRecord|null $toRev 611 * @return int the count 612 */ 613 protected function getEditsCount( 614 $pageId, 615 RevisionRecord $fromRev = null, 616 RevisionRecord $toRev = null 617 ) { 618 list( $fromRev, $toRev ) = $this->orderRevisions( $fromRev, $toRev ); 619 return $this->revisionStore->countRevisionsBetween( 620 $pageId, 621 $fromRev, 622 $toRev, 623 self::COUNT_LIMITS['edits'] // Will be increased by 1 to detect truncation 624 ); 625 } 626 627 /** 628 * @param int $revId 629 * @return RevisionRecord 630 * @throws LocalizedHttpException 631 */ 632 private function getRevisionOrThrow( $revId ) { 633 $rev = $this->revisionStore->getRevisionById( $revId ); 634 if ( !$rev ) { 635 throw new LocalizedHttpException( 636 new MessageValue( 'rest-nonexistent-revision', [ $revId ] ), 637 404 638 ); 639 } 640 return $rev; 641 } 642 643 /** 644 * Reorders revisions if they are present 645 * @param RevisionRecord|null $fromRev 646 * @param RevisionRecord|null $toRev 647 * @return array 648 * @phan-return array{0:RevisionRecord|null,1:RevisionRecord|null} 649 */ 650 private function orderRevisions( 651 RevisionRecord $fromRev = null, 652 RevisionRecord $toRev = null 653 ) { 654 if ( $fromRev && $toRev && ( $fromRev->getTimestamp() > $toRev->getTimestamp() || 655 ( $fromRev->getTimestamp() === $toRev->getTimestamp() 656 && $fromRev->getId() > $toRev->getId() ) ) 657 ) { 658 return [ $toRev, $fromRev ]; 659 } 660 return [ $fromRev, $toRev ]; 661 } 662 663 public function needsWriteAccess() { 664 return false; 665 } 666 667 public function getParamSettings() { 668 return [ 669 'title' => [ 670 self::PARAM_SOURCE => 'path', 671 ParamValidator::PARAM_TYPE => 'string', 672 ParamValidator::PARAM_REQUIRED => true, 673 ], 674 'type' => [ 675 self::PARAM_SOURCE => 'path', 676 ParamValidator::PARAM_TYPE => array_merge( 677 array_keys( self::COUNT_LIMITS ), 678 array_keys( self::DEPRECATED_COUNT_TYPES ) 679 ), 680 ParamValidator::PARAM_REQUIRED => true, 681 ], 682 'from' => [ 683 self::PARAM_SOURCE => 'query', 684 ParamValidator::PARAM_TYPE => 'integer', 685 ParamValidator::PARAM_REQUIRED => false 686 ], 687 'to' => [ 688 self::PARAM_SOURCE => 'query', 689 ParamValidator::PARAM_TYPE => 'integer', 690 ParamValidator::PARAM_REQUIRED => false 691 ] 692 ]; 693 } 694} 695