1<?php 2/** 3 * A handle for managing updates for derived page data on edit, import, purge, etc. 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License along 16 * with this program; if not, write to the Free Software Foundation, Inc., 17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 18 * http://www.gnu.org/copyleft/gpl.html 19 * 20 * @file 21 */ 22 23namespace MediaWiki\Storage; 24 25use CategoryMembershipChangeJob; 26use Content; 27use ContentHandler; 28use DeferrableUpdate; 29use DeferredUpdates; 30use IDBAccessObject; 31use InvalidArgumentException; 32use JobQueueGroup; 33use Language; 34use LinksDeletionUpdate; 35use LinksUpdate; 36use LogicException; 37use MediaWiki\Content\IContentHandlerFactory; 38use MediaWiki\Edit\PreparedEdit; 39use MediaWiki\HookContainer\HookContainer; 40use MediaWiki\HookContainer\HookRunner; 41use MediaWiki\MediaWikiServices; 42use MediaWiki\Revision\MutableRevisionRecord; 43use MediaWiki\Revision\RenderedRevision; 44use MediaWiki\Revision\RevisionRecord; 45use MediaWiki\Revision\RevisionRenderer; 46use MediaWiki\Revision\RevisionSlots; 47use MediaWiki\Revision\RevisionStore; 48use MediaWiki\Revision\SlotRecord; 49use MediaWiki\Revision\SlotRoleRegistry; 50use MediaWiki\User\UserIdentity; 51use MessageCache; 52use MWTimestamp; 53use MWUnknownContentModelException; 54use ParserCache; 55use ParserOptions; 56use ParserOutput; 57use Psr\Log\LoggerAwareInterface; 58use Psr\Log\LoggerInterface; 59use Psr\Log\NullLogger; 60use RefreshSecondaryDataUpdate; 61use ResourceLoaderWikiModule; 62use RevertedTagUpdateJob; 63use Revision; 64use SearchUpdate; 65use SiteStatsUpdate; 66use Title; 67use User; 68use Wikimedia\Assert\Assert; 69use Wikimedia\Rdbms\ILBFactory; 70use WikiPage; 71 72/** 73 * A handle for managing updates for derived page data on edit, import, purge, etc. 74 * 75 * @note Avoid direct usage of DerivedPageDataUpdater. 76 * 77 * @todo Define interfaces for the different use cases of DerivedPageDataUpdater, particularly 78 * providing access to post-PST content and ParserOutput to callbacks during revision creation, 79 * which currently use WikiPage::prepareContentForEdit, and allowing updates to be triggered on 80 * purge, import, and undeletion, which currently use WikiPage::doEditUpdates() and 81 * Content::getSecondaryDataUpdates(). 82 * 83 * DerivedPageDataUpdater instances are designed to be cached inside a WikiPage instance, 84 * and re-used by callback code over the course of an update operation. It's a stepping stone 85 * on the way to a more complete refactoring of WikiPage. 86 * 87 * When using a DerivedPageDataUpdater, the following life cycle must be observed: 88 * grabCurrentRevision (optional), prepareContent (optional), prepareUpdate (required 89 * for doUpdates). getCanonicalParserOutput, getSlots, and getSecondaryDataUpdates 90 * require prepareContent or prepareUpdate to have been called first, to initialize the 91 * DerivedPageDataUpdater. 92 * 93 * @see docs/pageupdater.md for more information. 94 * 95 * MCR migration note: this replaces the relevant methods in WikiPage, and covers the use cases 96 * of PreparedEdit. 97 * 98 * @internal 99 * 100 * @since 1.32 101 * @ingroup Page 102 */ 103class DerivedPageDataUpdater implements IDBAccessObject, LoggerAwareInterface { 104 105 /** 106 * @var UserIdentity|null 107 */ 108 private $user = null; 109 110 /** 111 * @var WikiPage 112 */ 113 private $wikiPage; 114 115 /** 116 * @var ParserCache 117 */ 118 private $parserCache; 119 120 /** 121 * @var RevisionStore 122 */ 123 private $revisionStore; 124 125 /** 126 * @var Language 127 */ 128 private $contLang; 129 130 /** 131 * @var JobQueueGroup 132 */ 133 private $jobQueueGroup; 134 135 /** 136 * @var MessageCache 137 */ 138 private $messageCache; 139 140 /** 141 * @var ILBFactory 142 */ 143 private $loadbalancerFactory; 144 145 /** 146 * @var HookRunner 147 */ 148 private $hookRunner; 149 150 /** 151 * @var LoggerInterface 152 */ 153 private $logger; 154 155 /** 156 * @var string see $wgArticleCountMethod 157 */ 158 private $articleCountMethod; 159 160 /** 161 * @var bool see $wgRCWatchCategoryMembership 162 */ 163 private $rcWatchCategoryMembership = false; 164 165 /** 166 * Stores (most of) the $options parameter of prepareUpdate(). 167 * @see prepareUpdate() 168 * 169 * @phpcs:ignore Generic.Files.LineLength 170 * @phan-var array{changed:bool,created:bool,moved:bool,restored:bool,oldrevision:null|RevisionRecord,triggeringUser:null|UserIdentity,oldredirect:bool|null|string,oldcountable:bool|null|string,causeAction:null|string,causeAgent:null|string,editResult:null|EditResult,approved:bool} 171 */ 172 private $options = [ 173 'changed' => true, 174 // newrev is true if prepareUpdate is handling the creation of a new revision, 175 // as opposed to a null edit or a forced update. 176 'newrev' => false, 177 'created' => false, 178 'moved' => false, 179 'restored' => false, 180 'oldrevision' => null, 181 'oldcountable' => null, 182 'oldredirect' => null, 183 'triggeringUser' => null, 184 // causeAction/causeAgent default to 'unknown' but that's handled where it's read, 185 // to make the life of prepareUpdate() callers easier. 186 'causeAction' => null, 187 'causeAgent' => null, 188 'editResult' => null, 189 'approved' => false, 190 ]; 191 192 /** 193 * The state of the relevant row in page table before the edit. 194 * This is determined by the first call to grabCurrentRevision, prepareContent, 195 * or prepareUpdate (so it is only accessible in 'knows-current' or a later stage). 196 * If pageState was not initialized when prepareUpdate() is called, prepareUpdate() will 197 * attempt to emulate the state of the page table before the edit. 198 * 199 * Contains the following fields: 200 * - oldRevision (RevisionRecord|null): the revision that was current before the change 201 * associated with this update. Might not be set, use getParentRevision(). 202 * - oldId (int|null): the id of the above revision. 0 if there is no such revision (the change 203 * was about creating a new page); null if not known (that should not happen). 204 * - oldIsRedirect (bool|null): whether the page was a redirect before the change. Lazy-loaded, 205 * can be null; use wasRedirect() instead of direct access. 206 * - oldCountable (bool|null): whether the page was countable before the change (or null 207 * if we don't have that information) 208 * 209 * @var array 210 */ 211 private $pageState = null; 212 213 /** 214 * @var RevisionSlotsUpdate|null 215 */ 216 private $slotsUpdate = null; 217 218 /** 219 * @var RevisionRecord|null 220 */ 221 private $parentRevision = null; 222 223 /** 224 * @var RevisionRecord|null 225 */ 226 private $revision = null; 227 228 /** 229 * @var RenderedRevision 230 */ 231 private $renderedRevision = null; 232 233 /** 234 * @var RevisionRenderer 235 */ 236 private $revisionRenderer; 237 238 /** @var SlotRoleRegistry */ 239 private $slotRoleRegistry; 240 241 /** 242 * A stage identifier for managing the life cycle of this instance. 243 * Possible stages are 'new', 'knows-current', 'has-content', 'has-revision', and 'done'. 244 * 245 * @see docs/pageupdater.md for documentation of the life cycle. 246 * 247 * @var string 248 */ 249 private $stage = 'new'; 250 251 /** 252 * Transition table for managing the life cycle of DerivedPageDateUpdater instances. 253 * 254 * XXX: Overkill. This is a linear order, we could just count. Names are nice though, 255 * and constants are also overkill... 256 * 257 * @see docs/pageupdater.md for documentation of the life cycle. 258 * 259 * @var array[] 260 */ 261 private const TRANSITIONS = [ 262 'new' => [ 263 'new' => true, 264 'knows-current' => true, 265 'has-content' => true, 266 'has-revision' => true, 267 ], 268 'knows-current' => [ 269 'knows-current' => true, 270 'has-content' => true, 271 'has-revision' => true, 272 ], 273 'has-content' => [ 274 'has-content' => true, 275 'has-revision' => true, 276 ], 277 'has-revision' => [ 278 'has-revision' => true, 279 'done' => true, 280 ], 281 ]; 282 283 /** @var IContentHandlerFactory */ 284 private $contentHandlerFactory; 285 286 /** @var EditResultCache */ 287 private $editResultCache; 288 289 /** 290 * @param WikiPage $wikiPage , 291 * @param RevisionStore $revisionStore 292 * @param RevisionRenderer $revisionRenderer 293 * @param SlotRoleRegistry $slotRoleRegistry 294 * @param ParserCache $parserCache 295 * @param JobQueueGroup $jobQueueGroup 296 * @param MessageCache $messageCache 297 * @param Language $contLang 298 * @param ILBFactory $loadbalancerFactory 299 * @param IContentHandlerFactory $contentHandlerFactory 300 * @param HookContainer $hookContainer 301 * @param EditResultCache $editResultCache 302 */ 303 public function __construct( 304 WikiPage $wikiPage, 305 RevisionStore $revisionStore, 306 RevisionRenderer $revisionRenderer, 307 SlotRoleRegistry $slotRoleRegistry, 308 ParserCache $parserCache, 309 JobQueueGroup $jobQueueGroup, 310 MessageCache $messageCache, 311 Language $contLang, 312 ILBFactory $loadbalancerFactory, 313 IContentHandlerFactory $contentHandlerFactory, 314 HookContainer $hookContainer, 315 EditResultCache $editResultCache 316 ) { 317 $this->wikiPage = $wikiPage; 318 319 $this->parserCache = $parserCache; 320 $this->revisionStore = $revisionStore; 321 $this->revisionRenderer = $revisionRenderer; 322 $this->slotRoleRegistry = $slotRoleRegistry; 323 $this->jobQueueGroup = $jobQueueGroup; 324 $this->messageCache = $messageCache; 325 $this->contLang = $contLang; 326 // XXX only needed for waiting for replicas to catch up; there should be a narrower 327 // interface for that. 328 $this->loadbalancerFactory = $loadbalancerFactory; 329 $this->contentHandlerFactory = $contentHandlerFactory; 330 $this->hookRunner = new HookRunner( $hookContainer ); 331 $this->editResultCache = $editResultCache; 332 333 $this->logger = new NullLogger(); 334 } 335 336 /** 337 * @param UserIdentity $user 338 * 339 * @return User 340 */ 341 private static function toLegacyUser( UserIdentity $user ) { 342 return User::newFromIdentity( $user ); 343 } 344 345 public function setLogger( LoggerInterface $logger ) { 346 $this->logger = $logger; 347 } 348 349 /** 350 * Transition function for managing the life cycle of this instances. 351 * 352 * @see docs/pageupdater.md for documentation of the life cycle. 353 * 354 * @param string $newStage the new stage 355 * @return string the previous stage 356 * 357 * @throws LogicException If a transition to the given stage is not possible in the current 358 * stage. 359 */ 360 private function doTransition( $newStage ) { 361 $this->assertTransition( $newStage ); 362 363 $oldStage = $this->stage; 364 $this->stage = $newStage; 365 366 return $oldStage; 367 } 368 369 /** 370 * Asserts that a transition to the given stage is possible, without performing it. 371 * 372 * @see docs/pageupdater.md for documentation of the life cycle. 373 * 374 * @param string $newStage the new stage 375 * 376 * @throws LogicException If this instance is not in the expected stage 377 */ 378 private function assertTransition( $newStage ) { 379 if ( empty( self::TRANSITIONS[$this->stage][$newStage] ) ) { 380 throw new LogicException( "Cannot transition from {$this->stage} to $newStage" ); 381 } 382 } 383 384 /** 385 * Checks whether this DerivedPageDataUpdater can be re-used for running updates targeting 386 * the given revision. 387 * 388 * @param UserIdentity|null $user The user creating the revision in question 389 * @param RevisionRecord|null $revision New revision (after save, if already saved) 390 * @param RevisionSlotsUpdate|null $slotsUpdate New content (before PST) 391 * @param null|int $parentId Parent revision of the edit (use 0 for page creation) 392 * 393 * @return bool 394 */ 395 public function isReusableFor( 396 UserIdentity $user = null, 397 RevisionRecord $revision = null, 398 RevisionSlotsUpdate $slotsUpdate = null, 399 $parentId = null 400 ) { 401 if ( $revision 402 && $parentId 403 && $revision->getParentId() !== $parentId 404 ) { 405 throw new InvalidArgumentException( '$parentId should match the parent of $revision' ); 406 } 407 408 // NOTE: For null revisions, $user may be different from $this->revision->getUser 409 // and also from $revision->getUser. 410 // But $user should always match $this->user. 411 if ( $user && $this->user && $user->getName() !== $this->user->getName() ) { 412 return false; 413 } 414 415 if ( $revision && $this->revision && $this->revision->getId() 416 && $this->revision->getId() !== $revision->getId() 417 ) { 418 return false; 419 } 420 421 if ( $this->pageState 422 && $revision 423 && $revision->getParentId() !== null 424 && $this->pageState['oldId'] !== $revision->getParentId() 425 ) { 426 return false; 427 } 428 429 if ( $this->pageState 430 && $parentId !== null 431 && $this->pageState['oldId'] !== $parentId 432 ) { 433 return false; 434 } 435 436 // NOTE: this check is the primary reason for having the $this->slotsUpdate field! 437 if ( $this->slotsUpdate 438 && $slotsUpdate 439 && !$this->slotsUpdate->hasSameUpdates( $slotsUpdate ) 440 ) { 441 return false; 442 } 443 444 if ( $revision 445 && $this->revision 446 && !$this->revision->getSlots()->hasSameContent( $revision->getSlots() ) 447 ) { 448 return false; 449 } 450 451 return true; 452 } 453 454 /** 455 * @param string $articleCountMethod "any" or "link". 456 * @see $wgArticleCountMethod 457 */ 458 public function setArticleCountMethod( $articleCountMethod ) { 459 $this->articleCountMethod = $articleCountMethod; 460 } 461 462 /** 463 * @param bool $rcWatchCategoryMembership 464 * @see $wgRCWatchCategoryMembership 465 */ 466 public function setRcWatchCategoryMembership( $rcWatchCategoryMembership ) { 467 $this->rcWatchCategoryMembership = $rcWatchCategoryMembership; 468 } 469 470 /** 471 * @return Title 472 */ 473 private function getTitle() { 474 // NOTE: eventually, we won't get a WikiPage passed into the constructor any more 475 return $this->wikiPage->getTitle(); 476 } 477 478 /** 479 * @return WikiPage 480 */ 481 private function getWikiPage() { 482 // NOTE: eventually, we won't get a WikiPage passed into the constructor any more 483 return $this->wikiPage; 484 } 485 486 /** 487 * Determines whether the page being edited already existed. 488 * Only defined after calling grabCurrentRevision() or prepareContent() or prepareUpdate()! 489 * 490 * @return bool 491 * @throws LogicException if called before grabCurrentRevision 492 */ 493 public function pageExisted() { 494 $this->assertHasPageState( __METHOD__ ); 495 496 return $this->pageState['oldId'] > 0; 497 } 498 499 /** 500 * Returns the parent revision of the new revision wrapped by this update. 501 * If the update is a null-edit, this will return the parent of the current (and new) revision. 502 * This will return null if the revision wrapped by this update created the page. 503 * Only defined after calling prepareContent() or prepareUpdate()! 504 * 505 * @return RevisionRecord|null the parent revision of the new revision, or null if 506 * the update created the page. 507 */ 508 private function getParentRevision() { 509 $this->assertPrepared( __METHOD__ ); 510 511 if ( $this->parentRevision ) { 512 return $this->parentRevision; 513 } 514 515 if ( !$this->pageState['oldId'] ) { 516 // If there was no current revision, there is no parent revision, 517 // since the page didn't exist. 518 return null; 519 } 520 521 $oldId = $this->revision->getParentId(); 522 $flags = $this->useMaster() ? RevisionStore::READ_LATEST : 0; 523 $this->parentRevision = $oldId 524 ? $this->revisionStore->getRevisionById( $oldId, $flags ) 525 : null; 526 527 return $this->parentRevision; 528 } 529 530 /** 531 * Returns the revision that was the page's current revision when grabCurrentRevision() 532 * was first called. 533 * 534 * During an edit, that revision will act as the logical parent of the new revision. 535 * 536 * Some updates are performed based on the difference between the database state at the 537 * moment this method is first called, and the state after the edit. 538 * 539 * @see docs/pageupdater.md for more information on when thie method can and should be called. 540 * 541 * @note After prepareUpdate() was called, grabCurrentRevision() will throw an exception 542 * to avoid confusion, since the page's current revision is then the new revision after 543 * the edit, which was presumably passed to prepareUpdate() as the $revision parameter. 544 * Use getParentRevision() instead to access the revision that is the parent of the 545 * new revision. 546 * 547 * @return RevisionRecord|null the page's current revision, or null if the page does not 548 * yet exist. 549 */ 550 public function grabCurrentRevision() { 551 if ( $this->pageState ) { 552 return $this->pageState['oldRevision']; 553 } 554 555 $this->assertTransition( 'knows-current' ); 556 557 // NOTE: eventually, we won't get a WikiPage passed into the constructor any more 558 $wikiPage = $this->getWikiPage(); 559 560 // Do not call WikiPage::clear(), since the caller may already have caused page data 561 // to be loaded with SELECT FOR UPDATE. Just assert it's loaded now. 562 $wikiPage->loadPageData( self::READ_LATEST ); 563 $current = $wikiPage->getRevisionRecord(); 564 565 $this->pageState = [ 566 'oldRevision' => $current, 567 'oldId' => $current ? $current->getId() : 0, 568 'oldIsRedirect' => $wikiPage->isRedirect(), // NOTE: uses page table 569 'oldCountable' => $wikiPage->isCountable(), // NOTE: uses pagelinks table 570 ]; 571 572 $this->doTransition( 'knows-current' ); 573 574 return $this->pageState['oldRevision']; 575 } 576 577 /** 578 * Whether prepareUpdate() or prepareContent() have been called on this instance. 579 * 580 * @return bool 581 */ 582 public function isContentPrepared() { 583 return $this->revision !== null; 584 } 585 586 /** 587 * Whether prepareUpdate() has been called on this instance. 588 * 589 * @note will also return null in case of a null-edit! 590 * 591 * @return bool 592 */ 593 public function isUpdatePrepared() { 594 return $this->revision !== null && $this->revision->getId() !== null; 595 } 596 597 /** 598 * @return int 599 */ 600 private function getPageId() { 601 // NOTE: eventually, we won't get a WikiPage passed into the constructor any more 602 return $this->wikiPage->getId(); 603 } 604 605 /** 606 * Whether the content is deleted and thus not visible to the public. 607 * 608 * @return bool 609 */ 610 public function isContentDeleted() { 611 if ( $this->revision ) { 612 return $this->revision->isDeleted( RevisionRecord::DELETED_TEXT ); 613 } else { 614 // If the content has not been saved yet, it cannot have been deleted yet. 615 return false; 616 } 617 } 618 619 /** 620 * Returns the slot, modified or inherited, after PST, with no audience checks applied. 621 * 622 * @param string $role slot role name 623 * 624 * @throws PageUpdateException If the slot is neither set for update nor inherited from the 625 * parent revision. 626 * @return SlotRecord 627 */ 628 public function getRawSlot( $role ) { 629 return $this->getSlots()->getSlot( $role ); 630 } 631 632 /** 633 * Returns the content of the given slot, with no audience checks. 634 * 635 * @throws PageUpdateException If the slot is neither set for update nor inherited from the 636 * parent revision. 637 * @param string $role slot role name 638 * @return Content 639 */ 640 public function getRawContent( $role ) { 641 return $this->getRawSlot( $role )->getContent(); 642 } 643 644 /** 645 * Returns the content model of the given slot 646 * 647 * @param string $role slot role name 648 * @return string 649 */ 650 private function getContentModel( $role ) { 651 return $this->getRawSlot( $role )->getModel(); 652 } 653 654 /** 655 * @param string $role slot role name 656 * @return ContentHandler 657 * @throws MWUnknownContentModelException 658 */ 659 private function getContentHandler( $role ): ContentHandler { 660 return $this->contentHandlerFactory 661 ->getContentHandler( $this->getContentModel( $role ) ); 662 } 663 664 private function useMaster() { 665 // TODO: can we just set a flag to true in prepareContent()? 666 return $this->wikiPage->wasLoadedFrom( self::READ_LATEST ); 667 } 668 669 /** 670 * @return bool 671 */ 672 public function isCountable() { 673 // NOTE: Keep in sync with WikiPage::isCountable. 674 675 if ( !$this->getTitle()->isContentPage() ) { 676 return false; 677 } 678 679 if ( $this->isContentDeleted() ) { 680 // This should be irrelevant: countability only applies to the current revision, 681 // and the current revision is never suppressed. 682 return false; 683 } 684 685 if ( $this->isRedirect() ) { 686 return false; 687 } 688 689 $hasLinks = null; 690 691 if ( $this->articleCountMethod === 'link' ) { 692 // NOTE: it would be more appropriate to determine for each slot separately 693 // whether it has links, and use that information with that slot's 694 // isCountable() method. However, that would break parity with 695 // WikiPage::isCountable, which uses the pagelinks table to determine 696 // whether the current revision has links. 697 $hasLinks = (bool)count( $this->getCanonicalParserOutput()->getLinks() ); 698 } 699 700 foreach ( $this->getSlots()->getSlotRoles() as $role ) { 701 $roleHandler = $this->slotRoleRegistry->getRoleHandler( $role ); 702 if ( $roleHandler->supportsArticleCount() ) { 703 $content = $this->getRawContent( $role ); 704 705 if ( $content->isCountable( $hasLinks ) ) { 706 return true; 707 } 708 } 709 } 710 711 return false; 712 } 713 714 /** 715 * @return bool 716 */ 717 public function isRedirect() { 718 // NOTE: main slot determines redirect status 719 // TODO: MCR: this should be controlled by a PageTypeHandler 720 $mainContent = $this->getRawContent( SlotRecord::MAIN ); 721 722 return $mainContent->isRedirect(); 723 } 724 725 /** 726 * @param RevisionRecord $rev 727 * 728 * @return bool 729 */ 730 private function revisionIsRedirect( RevisionRecord $rev ) { 731 // NOTE: main slot determines redirect status 732 $mainContent = $rev->getContent( SlotRecord::MAIN, RevisionRecord::RAW ); 733 734 return $mainContent->isRedirect(); 735 } 736 737 /** 738 * Prepare updates based on an update which has not yet been saved. 739 * 740 * This may be used to create derived data that is needed when creating a new revision; 741 * particularly, this makes available the slots of the new revision via the getSlots() 742 * method, after applying PST and slot inheritance. 743 * 744 * The derived data prepared for revision creation may then later be re-used by doUpdates(), 745 * without the need to re-calculate. 746 * 747 * @see docs/pageupdater.md for more information on when thie method can and should be called. 748 * 749 * @note Calling this method more than once with the same $slotsUpdate 750 * has no effect. Calling this method multiple times with different content will cause 751 * an exception. 752 * 753 * @note Calling this method after prepareUpdate() has been called will cause an exception. 754 * 755 * @param UserIdentity $user The user to act as context for pre-save transformation (PST). 756 * Type hint should be reduced to UserIdentity at some point. 757 * @param RevisionSlotsUpdate $slotsUpdate The new content of the slots to be updated 758 * by this edit, before PST. 759 * @param bool $useStash Whether to use stashed ParserOutput 760 */ 761 public function prepareContent( 762 UserIdentity $user, 763 RevisionSlotsUpdate $slotsUpdate, 764 $useStash = true 765 ) { 766 if ( $this->slotsUpdate ) { 767 if ( !$this->user ) { 768 throw new LogicException( 769 'Unexpected state: $this->slotsUpdate was initialized, ' 770 . 'but $this->user was not.' 771 ); 772 } 773 774 if ( $this->user->getName() !== $user->getName() ) { 775 throw new LogicException( 'Can\'t call prepareContent() again for different user! ' 776 . 'Expected ' . $this->user->getName() . ', got ' . $user->getName() 777 ); 778 } 779 780 if ( !$this->slotsUpdate->hasSameUpdates( $slotsUpdate ) ) { 781 throw new LogicException( 782 'Can\'t call prepareContent() again with different slot content!' 783 ); 784 } 785 786 return; // prepareContent() already done, nothing to do 787 } 788 789 $this->assertTransition( 'has-content' ); 790 791 $wikiPage = $this->getWikiPage(); // TODO: use only for legacy hooks! 792 $title = $this->getTitle(); 793 794 $parentRevision = $this->grabCurrentRevision(); 795 796 // The edit may have already been prepared via api.php?action=stashedit 797 $stashedEdit = false; 798 799 $legacyUser = self::toLegacyUser( $user ); 800 801 // TODO: MCR: allow output for all slots to be stashed. 802 if ( $useStash && $slotsUpdate->isModifiedSlot( SlotRecord::MAIN ) ) { 803 $editStash = MediaWikiServices::getInstance()->getPageEditStash(); 804 $stashedEdit = $editStash->checkCache( 805 $title, 806 $slotsUpdate->getModifiedSlot( SlotRecord::MAIN )->getContent(), 807 $legacyUser 808 ); 809 } 810 811 $userPopts = ParserOptions::newFromUserAndLang( $legacyUser, $this->contLang ); 812 $this->hookRunner->onArticlePrepareTextForEdit( $wikiPage, $userPopts ); 813 814 $this->user = $user; 815 $this->slotsUpdate = $slotsUpdate; 816 817 if ( $parentRevision ) { 818 $this->revision = MutableRevisionRecord::newFromParentRevision( $parentRevision ); 819 } else { 820 $this->revision = new MutableRevisionRecord( $title ); 821 } 822 823 // NOTE: user and timestamp must be set, so they can be used for 824 // {{subst:REVISIONUSER}} and {{subst:REVISIONTIMESTAMP}} in PST! 825 $this->revision->setTimestamp( MWTimestamp::now( TS_MW ) ); 826 $this->revision->setUser( $user ); 827 828 // Set up ParserOptions to operate on the new revision 829 $oldCallback = $userPopts->getCurrentRevisionRecordCallback(); 830 $userPopts->setCurrentRevisionRecordCallback( 831 function ( Title $parserTitle, $parser = null ) use ( $title, $oldCallback ) { 832 if ( $parserTitle->equals( $title ) ) { 833 return $this->revision; 834 } else { 835 return call_user_func( $oldCallback, $parserTitle, $parser ); 836 } 837 } 838 ); 839 840 $pstContentSlots = $this->revision->getSlots(); 841 842 foreach ( $slotsUpdate->getModifiedRoles() as $role ) { 843 $slot = $slotsUpdate->getModifiedSlot( $role ); 844 845 if ( $slot->isInherited() ) { 846 // No PST for inherited slots! Note that "modified" slots may still be inherited 847 // from an earlier version, e.g. for rollbacks. 848 $pstSlot = $slot; 849 } elseif ( $role === SlotRecord::MAIN && $stashedEdit ) { 850 // TODO: MCR: allow PST content for all slots to be stashed. 851 $pstSlot = SlotRecord::newUnsaved( $role, $stashedEdit->pstContent ); 852 } else { 853 $content = $slot->getContent(); 854 $legacyUser = self::toLegacyUser( $user ); 855 $pstContent = $content->preSaveTransform( $title, $legacyUser, $userPopts ); 856 $pstSlot = SlotRecord::newUnsaved( $role, $pstContent ); 857 } 858 859 $pstContentSlots->setSlot( $pstSlot ); 860 } 861 862 foreach ( $slotsUpdate->getRemovedRoles() as $role ) { 863 $pstContentSlots->removeSlot( $role ); 864 } 865 866 $this->options['created'] = ( $parentRevision === null ); 867 $this->options['changed'] = ( $parentRevision === null 868 || !$pstContentSlots->hasSameContent( $parentRevision->getSlots() ) ); 869 870 $this->doTransition( 'has-content' ); 871 872 if ( !$this->options['changed'] ) { 873 // null-edit! 874 875 // TODO: move this into MutableRevisionRecord 876 // TODO: This needs to behave differently for a forced dummy edit! 877 $this->revision->setId( $parentRevision->getId() ); 878 $this->revision->setTimestamp( $parentRevision->getTimestamp() ); 879 $this->revision->setPageId( $parentRevision->getPageId() ); 880 $this->revision->setParentId( $parentRevision->getParentId() ); 881 $this->revision->setUser( $parentRevision->getUser( RevisionRecord::RAW ) ); 882 $this->revision->setComment( $parentRevision->getComment( RevisionRecord::RAW ) ); 883 $this->revision->setMinorEdit( $parentRevision->isMinor() ); 884 $this->revision->setVisibility( $parentRevision->getVisibility() ); 885 886 // prepareUpdate() is redundant for null-edits 887 $this->doTransition( 'has-revision' ); 888 } else { 889 $this->parentRevision = $parentRevision; 890 } 891 892 $renderHints = [ 'use-master' => $this->useMaster(), 'audience' => RevisionRecord::RAW ]; 893 894 if ( $stashedEdit ) { 895 /** @var ParserOutput $output */ 896 $output = $stashedEdit->output; 897 // TODO: this should happen when stashing the ParserOutput, not now! 898 $output->setCacheTime( $stashedEdit->timestamp ); 899 900 $renderHints['known-revision-output'] = $output; 901 902 $this->logger->debug( __METHOD__ . ': using stashed edit output...' ); 903 } 904 905 // NOTE: we want a canonical rendering, so don't pass $this->user or ParserOptions 906 // NOTE: the revision is either new or current, so we can bypass audience checks. 907 $this->renderedRevision = $this->revisionRenderer->getRenderedRevision( 908 $this->revision, 909 null, 910 null, 911 $renderHints 912 ); 913 } 914 915 /** 916 * Returns the update's target revision - that is, the revision that will be the current 917 * revision after the update. 918 * 919 * @note Callers must treat the returned RevisionRecord's content as immutable, even 920 * if it is a MutableRevisionRecord instance. Other aspects of a MutableRevisionRecord 921 * returned from here, such as the user or the comment, may be changed, but may not 922 * be reflected in ParserOutput until after prepareUpdate() has been called. 923 * 924 * @todo This is currently used by PageUpdater::makeNewRevision() to construct an unsaved 925 * MutableRevisionRecord instance. Introduce something like an UnsavedRevisionFactory service 926 * for that purpose instead! 927 * 928 * @return RevisionRecord 929 */ 930 public function getRevision() { 931 $this->assertPrepared( __METHOD__ ); 932 return $this->revision; 933 } 934 935 /** 936 * @return RenderedRevision 937 */ 938 public function getRenderedRevision() { 939 $this->assertPrepared( __METHOD__ ); 940 941 return $this->renderedRevision; 942 } 943 944 private function assertHasPageState( $method ) { 945 if ( !$this->pageState ) { 946 throw new LogicException( 947 'Must call grabCurrentRevision() or prepareContent() ' 948 . 'or prepareUpdate() before calling ' . $method 949 ); 950 } 951 } 952 953 private function assertPrepared( $method ) { 954 if ( !$this->revision ) { 955 throw new LogicException( 956 'Must call prepareContent() or prepareUpdate() before calling ' . $method 957 ); 958 } 959 } 960 961 private function assertHasRevision( $method ) { 962 if ( !$this->revision->getId() ) { 963 throw new LogicException( 964 'Must call prepareUpdate() before calling ' . $method 965 ); 966 } 967 } 968 969 /** 970 * Whether the edit creates the page. 971 * 972 * @return bool 973 */ 974 public function isCreation() { 975 $this->assertPrepared( __METHOD__ ); 976 return $this->options['created']; 977 } 978 979 /** 980 * Whether the edit created, or should create, a new revision (that is, it's not a null-edit). 981 * 982 * @warning at present, "null-revisions" that do not change content but do have a revision 983 * record would return false after prepareContent(), but true after prepareUpdate()! 984 * This should probably be fixed. 985 * 986 * @return bool 987 */ 988 public function isChange() { 989 $this->assertPrepared( __METHOD__ ); 990 return $this->options['changed']; 991 } 992 993 /** 994 * Whether the page was a redirect before the edit. 995 * 996 * @return bool 997 */ 998 public function wasRedirect() { 999 $this->assertHasPageState( __METHOD__ ); 1000 1001 if ( $this->pageState['oldIsRedirect'] === null ) { 1002 /** @var RevisionRecord $rev */ 1003 $rev = $this->pageState['oldRevision']; 1004 if ( $rev ) { 1005 $this->pageState['oldIsRedirect'] = $this->revisionIsRedirect( $rev ); 1006 } else { 1007 $this->pageState['oldIsRedirect'] = false; 1008 } 1009 } 1010 1011 return $this->pageState['oldIsRedirect']; 1012 } 1013 1014 /** 1015 * Returns the slots of the target revision, after PST. 1016 * 1017 * @note Callers must treat the returned RevisionSlots instance as immutable, even 1018 * if it is a MutableRevisionSlots instance. 1019 * 1020 * @return RevisionSlots 1021 */ 1022 public function getSlots() { 1023 $this->assertPrepared( __METHOD__ ); 1024 return $this->revision->getSlots(); 1025 } 1026 1027 /** 1028 * Returns the RevisionSlotsUpdate for this updater. 1029 * 1030 * @return RevisionSlotsUpdate 1031 */ 1032 private function getRevisionSlotsUpdate() { 1033 $this->assertPrepared( __METHOD__ ); 1034 1035 if ( !$this->slotsUpdate ) { 1036 $old = $this->getParentRevision(); 1037 $this->slotsUpdate = RevisionSlotsUpdate::newFromRevisionSlots( 1038 $this->revision->getSlots(), 1039 $old ? $old->getSlots() : null 1040 ); 1041 } 1042 return $this->slotsUpdate; 1043 } 1044 1045 /** 1046 * Returns the role names of the slots touched by the new revision, 1047 * including removed roles. 1048 * 1049 * @return string[] 1050 */ 1051 public function getTouchedSlotRoles() { 1052 return $this->getRevisionSlotsUpdate()->getTouchedRoles(); 1053 } 1054 1055 /** 1056 * Returns the role names of the slots modified by the new revision, 1057 * not including removed roles. 1058 * 1059 * @return string[] 1060 */ 1061 public function getModifiedSlotRoles() { 1062 return $this->getRevisionSlotsUpdate()->getModifiedRoles(); 1063 } 1064 1065 /** 1066 * Returns the role names of the slots removed by the new revision. 1067 * 1068 * @return string[] 1069 */ 1070 public function getRemovedSlotRoles() { 1071 return $this->getRevisionSlotsUpdate()->getRemovedRoles(); 1072 } 1073 1074 /** 1075 * Prepare derived data updates targeting the given Revision. 1076 * 1077 * Calling this method requires the given revision to be present in the database. 1078 * This may be right after a new revision has been created, or when re-generating 1079 * derived data e.g. in ApiPurge, RefreshLinksJob, and the refreshLinks 1080 * script. 1081 * 1082 * @see docs/pageupdater.md for more information on when thie method can and should be called. 1083 * 1084 * @note Calling this method more than once with the same revision has no effect. 1085 * $options are only used for the first call. Calling this method multiple times with 1086 * different revisions will cause an exception. 1087 * 1088 * @note If grabCurrentRevision() (or prepareContent()) has been called before 1089 * calling this method, $revision->getParentRevision() has to refer to the revision that 1090 * was the current revision at the time grabCurrentRevision() was called. 1091 * 1092 * @param RevisionRecord $revision 1093 * @param array $options Array of options, following indexes are used: 1094 * - changed: bool, whether the revision changed the content (default true) 1095 * - created: bool, whether the revision created the page (default false) 1096 * - moved: bool, whether the page was moved (default false) 1097 * - restored: bool, whether the page was undeleted (default false) 1098 * - oldrevision: RevisionRecord object for the pre-update revision (default null) 1099 * can also be a Revision object, which is deprecated since 1.35 1100 * - triggeringUser: The user triggering the update (UserIdentity, defaults to the 1101 * user who created the revision) 1102 * - oldredirect: bool, null, or string 'no-change' (default null): 1103 * - bool: whether the page was counted as a redirect before that 1104 * revision, only used in changed is true and created is false 1105 * - null or 'no-change': don't update the redirect status. 1106 * - oldcountable: bool, null, or string 'no-change' (default null): 1107 * - bool: whether the page was counted as an article before that 1108 * revision, only used in changed is true and created is false 1109 * - null: if created is false, don't update the article count; if created 1110 * is true, do update the article count 1111 * - 'no-change': don't update the article count, ever 1112 * When set to null, pageState['oldCountable'] will be used instead if available. 1113 * - causeAction: an arbitrary string identifying the reason for the update. 1114 * See DataUpdate::getCauseAction(). (default 'unknown') 1115 * - causeAgent: name of the user who caused the update. See DataUpdate::getCauseAgent(). 1116 * (string, default 'unknown') 1117 * - known-revision-output: a combined canonical ParserOutput for the revision, perhaps 1118 * from some cache. The caller is responsible for ensuring that the ParserOutput indeed 1119 * matched the $rev and $options. This mechanism is intended as a temporary stop-gap, 1120 * for the time until caches have been changed to store RenderedRevision states instead 1121 * of ParserOutput objects. (default: null) (since 1.33) 1122 * - editResult: EditResult object created during the update. Required to perform reverted 1123 * tag update using RevertedTagUpdateJob. (default: null) (since 1.36) 1124 * - approved: whether the edit is somehow "approved" and the RevertedTagUpdateJob should 1125 * be scheduled right away. Required only if EditResult::isRevert() is true. (boolean, 1126 * default: false) (since 1.36) 1127 */ 1128 public function prepareUpdate( RevisionRecord $revision, array $options = [] ) { 1129 if ( isset( $options['oldrevision'] ) && $options['oldrevision'] instanceof Revision ) { 1130 wfDeprecated( 1131 __METHOD__ . ' with the `oldrevision` option being a ' . 1132 'Revision object', 1133 '1.35' 1134 ); 1135 $options['oldrevision'] = $options['oldrevision']->getRevisionRecord(); 1136 } 1137 Assert::parameter( 1138 !isset( $options['oldrevision'] ) 1139 || $options['oldrevision'] instanceof RevisionRecord, 1140 '$options["oldrevision"]', 1141 'must be a RevisionRecord (or Revision)' 1142 ); 1143 Assert::parameter( 1144 !isset( $options['triggeringUser'] ) 1145 || $options['triggeringUser'] instanceof UserIdentity, 1146 '$options["triggeringUser"]', 1147 'must be a UserIdentity' 1148 ); 1149 Assert::parameter( 1150 !isset( $options['editResult'] ) 1151 || $options['editResult'] instanceof EditResult, 1152 '$options["editResult"]', 1153 'must be an EditResult' 1154 ); 1155 1156 if ( !$revision->getId() ) { 1157 throw new InvalidArgumentException( 1158 'Revision must have an ID set for it to be used with prepareUpdate()!' 1159 ); 1160 } 1161 1162 if ( $this->revision && $this->revision->getId() ) { 1163 if ( $this->revision->getId() === $revision->getId() ) { 1164 return; // nothing to do! 1165 } else { 1166 throw new LogicException( 1167 'Trying to re-use DerivedPageDataUpdater with revision ' 1168 . $revision->getId() 1169 . ', but it\'s already bound to revision ' 1170 . $this->revision->getId() 1171 ); 1172 } 1173 } 1174 1175 if ( $this->revision 1176 && !$this->revision->getSlots()->hasSameContent( $revision->getSlots() ) 1177 ) { 1178 throw new LogicException( 1179 'The Revision provided has mismatching content!' 1180 ); 1181 } 1182 1183 // Override fields defined in $this->options with values from $options. 1184 $this->options = array_intersect_key( $options, $this->options ) + $this->options; 1185 1186 if ( $this->revision ) { 1187 $oldId = $this->pageState['oldId'] ?? 0; 1188 $this->options['newrev'] = ( $revision->getId() !== $oldId ); 1189 } elseif ( isset( $this->options['oldrevision'] ) ) { 1190 /** @var RevisionRecord $oldRev */ 1191 $oldRev = $this->options['oldrevision']; 1192 $oldId = $oldRev->getId(); 1193 $this->options['newrev'] = ( $revision->getId() !== $oldId ); 1194 } else { 1195 $oldId = $revision->getParentId(); 1196 } 1197 1198 if ( $oldId !== null ) { 1199 // XXX: what if $options['changed'] disagrees? 1200 // MovePage creates a dummy revision with changed = false! 1201 // We may want to explicitly distinguish between "no new revision" (null-edit) 1202 // and "new revision without new content" (dummy revision). 1203 1204 if ( $oldId === $revision->getParentId() ) { 1205 // NOTE: this may still be a NullRevision! 1206 // New revision! 1207 $this->options['changed'] = true; 1208 } elseif ( $oldId === $revision->getId() ) { 1209 // Null-edit! 1210 $this->options['changed'] = false; 1211 } else { 1212 // This indicates that calling code has given us the wrong Revision object 1213 throw new LogicException( 1214 'The Revision mismatches old revision ID: ' 1215 . 'Old ID is ' . $oldId 1216 . ', parent ID is ' . $revision->getParentId() 1217 . ', revision ID is ' . $revision->getId() 1218 ); 1219 } 1220 } 1221 1222 // If prepareContent() was used to generate the PST content (which is indicated by 1223 // $this->slotsUpdate being set), and this is not a null-edit, then the given 1224 // revision must have the acting user as the revision author. Otherwise, user 1225 // signatures generated by PST would mismatch the user in the revision record. 1226 if ( $this->user !== null && $this->options['changed'] && $this->slotsUpdate ) { 1227 $user = $revision->getUser(); 1228 if ( !$this->user->equals( $user ) ) { 1229 throw new LogicException( 1230 'The Revision provided has a mismatching actor: expected ' 1231 . $this->user->getName() 1232 . ', got ' 1233 . $user->getName() 1234 ); 1235 } 1236 } 1237 1238 // If $this->pageState was not yet initialized by grabCurrentRevision or prepareContent, 1239 // emulate the state of the page table before the edit, as good as we can. 1240 if ( !$this->pageState ) { 1241 $this->pageState = [ 1242 'oldIsRedirect' => isset( $this->options['oldredirect'] ) 1243 && is_bool( $this->options['oldredirect'] ) 1244 ? $this->options['oldredirect'] 1245 : null, 1246 'oldCountable' => isset( $this->options['oldcountable'] ) 1247 && is_bool( $this->options['oldcountable'] ) 1248 ? $this->options['oldcountable'] 1249 : null, 1250 ]; 1251 1252 if ( $this->options['changed'] ) { 1253 // The edit created a new revision 1254 $this->pageState['oldId'] = $revision->getParentId(); 1255 1256 if ( isset( $this->options['oldrevision'] ) ) { 1257 $rev = $this->options['oldrevision']; 1258 $this->pageState['oldRevision'] = $rev; 1259 } 1260 } else { 1261 // This is a null-edit, so the old revision IS the new revision! 1262 $this->pageState['oldId'] = $revision->getId(); 1263 $this->pageState['oldRevision'] = $revision; 1264 } 1265 } 1266 1267 // "created" is forced here 1268 $this->options['created'] = ( $this->options['created'] || 1269 ( $this->pageState['oldId'] === 0 ) ); 1270 1271 $this->revision = $revision; 1272 1273 $this->doTransition( 'has-revision' ); 1274 1275 // NOTE: in case we have a User object, don't override with a UserIdentity. 1276 // We already checked that $revision->getUser() mathces $this->user; 1277 if ( !$this->user ) { 1278 $this->user = $revision->getUser( RevisionRecord::RAW ); 1279 } 1280 1281 // Prune any output that depends on the revision ID. 1282 if ( $this->renderedRevision ) { 1283 $this->renderedRevision->updateRevision( $revision ); 1284 } else { 1285 // NOTE: we want a canonical rendering, so don't pass $this->user or ParserOptions 1286 // NOTE: the revision is either new or current, so we can bypass audience checks. 1287 $this->renderedRevision = $this->revisionRenderer->getRenderedRevision( 1288 $this->revision, 1289 null, 1290 null, 1291 [ 1292 'use-master' => $this->useMaster(), 1293 'audience' => RevisionRecord::RAW, 1294 'known-revision-output' => $options['known-revision-output'] ?? null 1295 ] 1296 ); 1297 1298 // XXX: Since we presumably are dealing with the current revision, 1299 // we could try to get the ParserOutput from the parser cache. 1300 } 1301 1302 // TODO: optionally get ParserOutput from the ParserCache here. 1303 // Move the logic used by RefreshLinksJob here! 1304 } 1305 1306 /** 1307 * @deprecated This only exists for B/C, use the getters on DerivedPageDataUpdater directly! 1308 * @return PreparedEdit 1309 */ 1310 public function getPreparedEdit() { 1311 $this->assertPrepared( __METHOD__ ); 1312 1313 $slotsUpdate = $this->getRevisionSlotsUpdate(); 1314 $preparedEdit = new PreparedEdit(); 1315 1316 $preparedEdit->popts = $this->getCanonicalParserOptions(); 1317 $preparedEdit->parserOutputCallback = [ $this, 'getCanonicalParserOutput' ]; 1318 $preparedEdit->pstContent = $this->revision->getContent( SlotRecord::MAIN ); 1319 $preparedEdit->newContent = 1320 $slotsUpdate->isModifiedSlot( SlotRecord::MAIN ) 1321 ? $slotsUpdate->getModifiedSlot( SlotRecord::MAIN )->getContent() 1322 : $this->revision->getContent( SlotRecord::MAIN ); // XXX: can we just remove this? 1323 $preparedEdit->oldContent = null; // unused. // XXX: could get this from the parent revision 1324 $preparedEdit->revid = $this->revision ? $this->revision->getId() : null; 1325 $preparedEdit->timestamp = $preparedEdit->output->getCacheTime(); 1326 $preparedEdit->format = $preparedEdit->pstContent->getDefaultFormat(); 1327 1328 return $preparedEdit; 1329 } 1330 1331 /** 1332 * @param string $role 1333 * @param bool $generateHtml 1334 * @return ParserOutput 1335 */ 1336 public function getSlotParserOutput( $role, $generateHtml = true ) { 1337 return $this->getRenderedRevision()->getSlotParserOutput( 1338 $role, 1339 [ 'generate-html' => $generateHtml ] 1340 ); 1341 } 1342 1343 /** 1344 * @return ParserOutput 1345 */ 1346 public function getCanonicalParserOutput() { 1347 return $this->getRenderedRevision()->getRevisionParserOutput(); 1348 } 1349 1350 /** 1351 * @return ParserOptions 1352 */ 1353 public function getCanonicalParserOptions() { 1354 return $this->getRenderedRevision()->getOptions(); 1355 } 1356 1357 /** 1358 * @param bool $recursive 1359 * 1360 * @return DeferrableUpdate[] 1361 */ 1362 public function getSecondaryDataUpdates( $recursive = false ) { 1363 if ( $this->isContentDeleted() ) { 1364 // This shouldn't happen, since the current content is always public, 1365 // and DataUpates are only needed for current content. 1366 return []; 1367 } 1368 1369 $wikiPage = $this->getWikiPage(); 1370 $wikiPage->loadPageData( WikiPage::READ_LATEST ); 1371 if ( !$wikiPage->exists() ) { 1372 // page deleted while defering the update 1373 return []; 1374 } 1375 1376 $output = $this->getCanonicalParserOutput(); 1377 $title = $wikiPage->getTitle(); 1378 1379 // Construct a LinksUpdate for the combined canonical output. 1380 $linksUpdate = new LinksUpdate( 1381 $title, 1382 $output, 1383 $recursive 1384 ); 1385 1386 $allUpdates = [ $linksUpdate ]; 1387 1388 // NOTE: Run updates for all slots, not just the modified slots! Otherwise, 1389 // info for an inherited slot may end up being removed. This is also needed 1390 // to ensure that purges are effective. 1391 $renderedRevision = $this->getRenderedRevision(); 1392 foreach ( $this->getSlots()->getSlotRoles() as $role ) { 1393 $slot = $this->getRawSlot( $role ); 1394 $content = $slot->getContent(); 1395 $handler = $content->getContentHandler(); 1396 1397 $updates = $handler->getSecondaryDataUpdates( 1398 $title, 1399 $content, 1400 $role, 1401 $renderedRevision 1402 ); 1403 $allUpdates = array_merge( $allUpdates, $updates ); 1404 1405 // TODO: remove B/C hack in 1.32! 1406 // NOTE: we assume that the combined output contains all relevant meta-data for 1407 // all slots! 1408 $legacyUpdates = $content->getSecondaryDataUpdates( 1409 $title, 1410 null, 1411 $recursive, 1412 $output 1413 ); 1414 1415 // HACK: filter out redundant and incomplete LinksUpdates 1416 $legacyUpdates = array_filter( $legacyUpdates, static function ( $update ) { 1417 return !( $update instanceof LinksUpdate ); 1418 } ); 1419 1420 $allUpdates = array_merge( $allUpdates, $legacyUpdates ); 1421 } 1422 1423 // XXX: if a slot was removed by an earlier edit, but deletion updates failed to run at 1424 // that time, we don't know for which slots to run deletion updates when purging a page. 1425 // We'd have to examine the entire history of the page to determine that. Perhaps there 1426 // could be a "try extra hard" mode for that case that would run a DB query to find all 1427 // roles/models ever used on the page. On the other hand, removing slots should be quite 1428 // rare, so perhaps this isn't worth the trouble. 1429 1430 // TODO: consolidate with similar logic in WikiPage::getDeletionUpdates() 1431 $parentRevision = $this->getParentRevision(); 1432 foreach ( $this->getRemovedSlotRoles() as $role ) { 1433 // HACK: we should get the content model of the removed slot from a SlotRoleHandler! 1434 // For now, find the slot in the parent revision - if the slot was removed, it should 1435 // always exist in the parent revision. 1436 $parentSlot = $parentRevision->getSlot( $role, RevisionRecord::RAW ); 1437 $content = $parentSlot->getContent(); 1438 $handler = $content->getContentHandler(); 1439 1440 $updates = $handler->getDeletionUpdates( 1441 $title, 1442 $role 1443 ); 1444 $allUpdates = array_merge( $allUpdates, $updates ); 1445 1446 // TODO: remove B/C hack in 1.32! 1447 $legacyUpdates = $content->getDeletionUpdates( $wikiPage ); 1448 1449 // HACK: filter out redundant and incomplete LinksDeletionUpdate 1450 $legacyUpdates = array_filter( $legacyUpdates, static function ( $update ) { 1451 return !( $update instanceof LinksDeletionUpdate ); 1452 } ); 1453 1454 $allUpdates = array_merge( $allUpdates, $legacyUpdates ); 1455 } 1456 1457 // TODO: hard deprecate SecondaryDataUpdates in favor of RevisionDataUpdates in 1.33! 1458 $this->hookRunner->onRevisionDataUpdates( $title, $renderedRevision, $allUpdates ); 1459 1460 return $allUpdates; 1461 } 1462 1463 /** 1464 * Do standard updates after page edit, purge, or import. 1465 * Update links tables, site stats, search index, title cache, message cache, etc. 1466 * Purges pages that depend on this page when appropriate. 1467 * With a 10% chance, triggers pruning the recent changes table. 1468 * 1469 * @note prepareUpdate() must be called before calling this method! 1470 * 1471 * MCR migration note: this replaces WikiPage::doEditUpdates. 1472 */ 1473 public function doUpdates() { 1474 $this->assertTransition( 'done' ); 1475 1476 // TODO: move logic into a PageEventEmitter service 1477 1478 $wikiPage = $this->getWikiPage(); // TODO: use only for legacy hooks! 1479 1480 $legacyUser = self::toLegacyUser( $this->user ); 1481 1482 $userParserOptions = ParserOptions::newFromUser( $legacyUser ); 1483 // Decide whether to save the final canonical parser ouput based on the fact that 1484 // users are typically redirected to viewing pages right after they edit those pages. 1485 // Due to vary-revision-id, getting/saving that output here might require a reparse. 1486 if ( $userParserOptions->matchesForCacheKey( $this->getCanonicalParserOptions() ) ) { 1487 // Whether getting the final output requires a reparse or not, the user will 1488 // need canonical output anyway, since that is what their parser options use. 1489 // A reparse now at least has the benefit of various warm process caches. 1490 $this->doParserCacheUpdate(); 1491 } else { 1492 // If the user does not have canonical parse options, then don't risk another parse 1493 // to make output they cannot use on the page refresh that typically occurs after 1494 // editing. Doing the parser output save post-send will still benefit *other* users. 1495 DeferredUpdates::addCallableUpdate( function () { 1496 $this->doParserCacheUpdate(); 1497 } ); 1498 } 1499 1500 $this->doSecondaryDataUpdates( [ 1501 // T52785 do not update any other pages on a null edit 1502 'recursive' => $this->options['changed'], 1503 // Defer the getCannonicalParserOutput() call made by getSecondaryDataUpdates() 1504 'defer' => DeferredUpdates::POSTSEND 1505 ] ); 1506 1507 // TODO: MCR: check if *any* changed slot supports categories! 1508 if ( $this->rcWatchCategoryMembership 1509 && $this->getContentHandler( SlotRecord::MAIN )->supportsCategories() === true 1510 && ( $this->options['changed'] || $this->options['created'] ) 1511 && !$this->options['restored'] 1512 ) { 1513 // Note: jobs are pushed after deferred updates, so the job should be able to see 1514 // the recent change entry (also done via deferred updates) and carry over any 1515 // bot/deletion/IP flags, ect. 1516 $this->jobQueueGroup->lazyPush( 1517 CategoryMembershipChangeJob::newSpec( 1518 $this->getTitle(), 1519 $this->revision->getTimestamp() 1520 ) 1521 ); 1522 } 1523 1524 // TODO: replace legacy hook! Use a listener on PageEventEmitter instead! 1525 // @note: Extensions should *avoid* calling getCannonicalParserOutput() when using 1526 // this hook whenever possible in order to avoid unnecessary additional parses. 1527 $editInfo = $this->getPreparedEdit(); 1528 $this->hookRunner->onArticleEditUpdates( $wikiPage, $editInfo, $this->options['changed'] ); 1529 1530 $id = $this->getPageId(); 1531 $title = $this->getTitle(); 1532 $shortTitle = $title->getDBkey(); 1533 1534 if ( !$title->exists() ) { 1535 wfDebug( __METHOD__ . ": Page doesn't exist any more, bailing out" ); 1536 1537 $this->doTransition( 'done' ); 1538 return; 1539 } 1540 1541 DeferredUpdates::addCallableUpdate( function () { 1542 if ( 1543 $this->options['oldcountable'] === 'no-change' || 1544 ( !$this->options['changed'] && !$this->options['moved'] ) 1545 ) { 1546 $good = 0; 1547 } elseif ( $this->options['created'] ) { 1548 $good = (int)$this->isCountable(); 1549 } elseif ( $this->options['oldcountable'] !== null ) { 1550 $good = (int)$this->isCountable() 1551 - (int)$this->options['oldcountable']; 1552 } elseif ( isset( $this->pageState['oldCountable'] ) ) { 1553 $good = (int)$this->isCountable() 1554 - (int)$this->pageState['oldCountable']; 1555 } else { 1556 $good = 0; 1557 } 1558 $edits = $this->options['changed'] ? 1 : 0; 1559 $pages = $this->options['created'] ? 1 : 0; 1560 1561 DeferredUpdates::addUpdate( SiteStatsUpdate::factory( 1562 [ 'edits' => $edits, 'articles' => $good, 'pages' => $pages ] 1563 ) ); 1564 } ); 1565 1566 // TODO: make search infrastructure aware of slots! 1567 $mainSlot = $this->revision->getSlot( SlotRecord::MAIN ); 1568 if ( !$mainSlot->isInherited() && !$this->isContentDeleted() ) { 1569 DeferredUpdates::addUpdate( new SearchUpdate( $id, $title, $mainSlot->getContent() ) ); 1570 } 1571 1572 // If this is another user's talk page, update newtalk. 1573 // Don't do this if $options['changed'] = false (null-edits) nor if 1574 // it's a minor edit and the user making the edit doesn't generate notifications for those. 1575 // TODO: the permission check should be performed by the callers, see T276181. 1576 if ( $this->options['changed'] 1577 && $title->getNamespace() === NS_USER_TALK 1578 && $shortTitle != $legacyUser->getTitleKey() 1579 && !( $this->revision->isMinor() && MediaWikiServices::getInstance() 1580 ->getPermissionManager() 1581 ->userHasRight( $legacyUser, 'nominornewtalk' ) ) 1582 ) { 1583 $recipient = User::newFromName( $shortTitle, false ); 1584 if ( !$recipient ) { 1585 wfDebug( __METHOD__ . ": invalid username" ); 1586 } else { 1587 // Allow extensions to prevent user notification 1588 // when a new message is added to their talk page 1589 // TODO: replace legacy hook! Use a listener on PageEventEmitter instead! 1590 if ( $this->hookRunner->onArticleEditUpdateNewTalk( $wikiPage, $recipient ) ) { 1591 $revRecord = $this->revision; 1592 $talkPageNotificationManager = MediaWikiServices::getInstance() 1593 ->getTalkPageNotificationManager(); 1594 if ( User::isIP( $shortTitle ) ) { 1595 // An anonymous user 1596 $talkPageNotificationManager->setUserHasNewMessages( $recipient, $revRecord ); 1597 } elseif ( $recipient->isRegistered() ) { 1598 $talkPageNotificationManager->setUserHasNewMessages( $recipient, $revRecord ); 1599 } else { 1600 wfDebug( __METHOD__ . ": don't need to notify a nonexistent user" ); 1601 } 1602 } 1603 } 1604 } 1605 1606 if ( $title->getNamespace() === NS_MEDIAWIKI 1607 && $this->getRevisionSlotsUpdate()->isModifiedSlot( SlotRecord::MAIN ) 1608 ) { 1609 $mainContent = $this->isContentDeleted() ? null : $this->getRawContent( SlotRecord::MAIN ); 1610 1611 $this->messageCache->updateMessageOverride( $title, $mainContent ); 1612 } 1613 1614 // TODO: move onArticleCreate and onArticle into a PageEventEmitter service 1615 if ( $this->options['created'] ) { 1616 WikiPage::onArticleCreate( $title ); 1617 } elseif ( $this->options['changed'] ) { // T52785 1618 WikiPage::onArticleEdit( $title, $this->revision, $this->getTouchedSlotRoles() ); 1619 } elseif ( $this->options['restored'] ) { 1620 MediaWikiServices::getInstance()->getMainWANObjectCache()->touchCheckKey( 1621 "DerivedPageDataUpdater:restore:page:$id" 1622 ); 1623 } 1624 1625 $oldRevisionRecord = $this->getParentRevision(); 1626 1627 // TODO: In the wiring, register a listener for this on the new PageEventEmitter 1628 ResourceLoaderWikiModule::invalidateModuleCache( 1629 $title, 1630 $oldRevisionRecord, 1631 $this->revision, 1632 $this->loadbalancerFactory->getLocalDomainID() 1633 ); 1634 1635 // Schedule a deferred update for marking reverted edits if applicable. 1636 $this->maybeEnqueueRevertedTagUpdateJob(); 1637 1638 $this->doTransition( 'done' ); 1639 } 1640 1641 /** 1642 * If the edit was a revert and it is considered "approved", enqueues the 1643 * RevertedTagUpdateJob for it. If the edit is not yet approved, the EditResult is 1644 * persisted in cache for later use. 1645 */ 1646 private function maybeEnqueueRevertedTagUpdateJob() { 1647 if ( $this->options['editResult'] === null ) { 1648 return; 1649 } 1650 1651 $editResult = $this->options['editResult']; 1652 if ( !$editResult->isRevert() ) { 1653 return; 1654 } 1655 1656 if ( $this->options['approved'] ) { 1657 // Enqueue the job 1658 $this->jobQueueGroup->lazyPush( 1659 RevertedTagUpdateJob::newSpec( 1660 $this->revision->getId(), 1661 $this->options['editResult'] 1662 ) 1663 ); 1664 } else { 1665 // Cache EditResult for later use 1666 $this->editResultCache->set( 1667 $this->revision->getId(), 1668 $this->options['editResult'] 1669 ); 1670 } 1671 } 1672 1673 /** 1674 * Do secondary data updates (e.g. updating link tables) or schedule them as deferred updates 1675 * 1676 * MCR note: this method is temporarily exposed via WikiPage::doSecondaryDataUpdates. 1677 * 1678 * @param array $options 1679 * - recursive: make the update recursive, i.e. also update pages which transclude the 1680 * current page or otherwise depend on it (default: false) 1681 * - defer: one of the DeferredUpdates constants, or false to run immediately after waiting 1682 * for replication of the changes from the SecondaryDataUpdates hooks (default: false) 1683 * @since 1.32 1684 */ 1685 public function doSecondaryDataUpdates( array $options = [] ) { 1686 $this->assertHasRevision( __METHOD__ ); 1687 $options += [ 'recursive' => false, 'defer' => false ]; 1688 $deferValues = [ false, DeferredUpdates::PRESEND, DeferredUpdates::POSTSEND ]; 1689 if ( !in_array( $options['defer'], $deferValues, true ) ) { 1690 throw new InvalidArgumentException( 'Invalid value for defer: ' . $options['defer'] ); 1691 } 1692 1693 $triggeringUser = $this->options['triggeringUser'] ?? $this->user; 1694 if ( !$triggeringUser instanceof User ) { 1695 $triggeringUser = self::toLegacyUser( $triggeringUser ); 1696 } 1697 $causeAction = $this->options['causeAction'] ?? 'unknown'; 1698 $causeAgent = $this->options['causeAgent'] ?? 'unknown'; 1699 1700 // Bundle all of the data updates into a single deferred update wrapper so that 1701 // any failure will cause at most one refreshLinks job to be enqueued by 1702 // DeferredUpdates::doUpdates(). This is hard to do when there are many separate 1703 // updates that are not defined as being related. 1704 $update = new RefreshSecondaryDataUpdate( 1705 $this->loadbalancerFactory, 1706 $triggeringUser, 1707 $this->wikiPage, 1708 $this->revision, 1709 $this, 1710 [ 'recursive' => $options['recursive'] ] 1711 ); 1712 $update->setCause( $causeAction, $causeAgent ); 1713 1714 if ( $options['defer'] === false ) { 1715 DeferredUpdates::attemptUpdate( $update, $this->loadbalancerFactory ); 1716 } else { 1717 DeferredUpdates::addUpdate( $update, $options['defer'] ); 1718 } 1719 } 1720 1721 public function doParserCacheUpdate() { 1722 $this->assertHasRevision( __METHOD__ ); 1723 1724 $wikiPage = $this->getWikiPage(); // TODO: ParserCache should accept a RevisionRecord instead 1725 1726 // NOTE: this may trigger the first parsing of the new content after an edit (when not 1727 // using pre-generated stashed output). 1728 // XXX: we may want to use the PoolCounter here. This would perhaps allow the initial parse 1729 // to be performed post-send. The client could already follow a HTTP redirect to the 1730 // page view, but would then have to wait for a response until rendering is complete. 1731 $output = $this->getCanonicalParserOutput(); 1732 1733 // Save it to the parser cache. Use the revision timestamp in the case of a 1734 // freshly saved edit, as that matches page_touched and a mismatch would trigger an 1735 // unnecessary reparse. 1736 $timestamp = $this->options['newrev'] ? $this->revision->getTimestamp() 1737 : $output->getCacheTime(); 1738 $this->parserCache->save( 1739 $output, $wikiPage, $this->getCanonicalParserOptions(), 1740 $timestamp, $this->revision->getId() 1741 ); 1742 } 1743 1744} 1745