1<?php 2/** 3 * A handle for managing updates for derived page data on edit, import, purge, etc. 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License along 16 * with this program; if not, write to the Free Software Foundation, Inc., 17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 18 * http://www.gnu.org/copyleft/gpl.html 19 * 20 * @file 21 */ 22 23namespace MediaWiki\Storage; 24 25use CategoryMembershipChangeJob; 26use Content; 27use ContentHandler; 28use DeferrableUpdate; 29use DeferredUpdates; 30use IDBAccessObject; 31use InvalidArgumentException; 32use JobQueueGroup; 33use Language; 34use LinksDeletionUpdate; 35use LinksUpdate; 36use LogicException; 37use MediaWiki\Content\IContentHandlerFactory; 38use MediaWiki\Edit\PreparedEdit; 39use MediaWiki\HookContainer\HookContainer; 40use MediaWiki\HookContainer\HookRunner; 41use MediaWiki\MediaWikiServices; 42use MediaWiki\Revision\MutableRevisionRecord; 43use MediaWiki\Revision\RenderedRevision; 44use MediaWiki\Revision\RevisionRecord; 45use MediaWiki\Revision\RevisionRenderer; 46use MediaWiki\Revision\RevisionSlots; 47use MediaWiki\Revision\RevisionStore; 48use MediaWiki\Revision\SlotRecord; 49use MediaWiki\Revision\SlotRoleRegistry; 50use MediaWiki\User\UserIdentity; 51use MessageCache; 52use MWTimestamp; 53use MWUnknownContentModelException; 54use ParserCache; 55use ParserOptions; 56use ParserOutput; 57use Psr\Log\LoggerAwareInterface; 58use Psr\Log\LoggerInterface; 59use Psr\Log\NullLogger; 60use RecentChangesUpdateJob; 61use RefreshSecondaryDataUpdate; 62use ResourceLoaderWikiModule; 63use Revision; 64use SearchUpdate; 65use SiteStatsUpdate; 66use Title; 67use User; 68use Wikimedia\Assert\Assert; 69use Wikimedia\Rdbms\ILBFactory; 70use WikiPage; 71 72/** 73 * A handle for managing updates for derived page data on edit, import, purge, etc. 74 * 75 * @note Avoid direct usage of DerivedPageDataUpdater. 76 * 77 * @todo Define interfaces for the different use cases of DerivedPageDataUpdater, particularly 78 * providing access to post-PST content and ParserOutput to callbacks during revision creation, 79 * which currently use WikiPage::prepareContentForEdit, and allowing updates to be triggered on 80 * purge, import, and undeletion, which currently use WikiPage::doEditUpdates() and 81 * Content::getSecondaryDataUpdates(). 82 * 83 * DerivedPageDataUpdater instances are designed to be cached inside a WikiPage instance, 84 * and re-used by callback code over the course of an update operation. It's a stepping stone 85 * on the way to a more complete refactoring of WikiPage. 86 * 87 * When using a DerivedPageDataUpdater, the following life cycle must be observed: 88 * grabCurrentRevision (optional), prepareContent (optional), prepareUpdate (required 89 * for doUpdates). getCanonicalParserOutput, getSlots, and getSecondaryDataUpdates 90 * require prepareContent or prepareUpdate to have been called first, to initialize the 91 * DerivedPageDataUpdater. 92 * 93 * @see docs/pageupdater.md for more information. 94 * 95 * MCR migration note: this replaces the relevant methods in WikiPage, and covers the use cases 96 * of PreparedEdit. 97 * 98 * @internal 99 * 100 * @since 1.32 101 * @ingroup Page 102 */ 103class DerivedPageDataUpdater implements IDBAccessObject, LoggerAwareInterface { 104 105 /** 106 * @var UserIdentity|null 107 */ 108 private $user = null; 109 110 /** 111 * @var WikiPage 112 */ 113 private $wikiPage; 114 115 /** 116 * @var ParserCache 117 */ 118 private $parserCache; 119 120 /** 121 * @var RevisionStore 122 */ 123 private $revisionStore; 124 125 /** 126 * @var Language 127 */ 128 private $contLang; 129 130 /** 131 * @var JobQueueGroup 132 */ 133 private $jobQueueGroup; 134 135 /** 136 * @var MessageCache 137 */ 138 private $messageCache; 139 140 /** 141 * @var ILBFactory 142 */ 143 private $loadbalancerFactory; 144 145 /** 146 * @var HookRunner 147 */ 148 private $hookRunner; 149 150 /** 151 * @var LoggerInterface 152 */ 153 private $logger; 154 155 /** 156 * @var string see $wgArticleCountMethod 157 */ 158 private $articleCountMethod; 159 160 /** 161 * @var boolean see $wgRCWatchCategoryMembership 162 */ 163 private $rcWatchCategoryMembership = false; 164 165 /** 166 * Stores (most of) the $options parameter of prepareUpdate(). 167 * @see prepareUpdate() 168 */ 169 private $options = [ 170 'changed' => true, 171 // newrev is true if prepareUpdate is handling the creation of a new revision, 172 // as opposed to a null edit or a forced update. 173 'newrev' => false, 174 'created' => false, 175 'moved' => false, 176 'restored' => false, 177 'oldrevision' => null, 178 'oldcountable' => null, 179 'oldredirect' => null, 180 'triggeringUser' => null, 181 // causeAction/causeAgent default to 'unknown' but that's handled where it's read, 182 // to make the life of prepareUpdate() callers easier. 183 'causeAction' => null, 184 'causeAgent' => null, 185 ]; 186 187 /** 188 * The state of the relevant row in page table before the edit. 189 * This is determined by the first call to grabCurrentRevision, prepareContent, 190 * or prepareUpdate (so it is only accessible in 'knows-current' or a later stage). 191 * If pageState was not initialized when prepareUpdate() is called, prepareUpdate() will 192 * attempt to emulate the state of the page table before the edit. 193 * 194 * Contains the following fields: 195 * - oldRevision (RevisionRecord|null): the revision that was current before the change 196 * associated with this update. Might not be set, use getParentRevision(). 197 * - oldId (int|null): the id of the above revision. 0 if there is no such revision (the change 198 * was about creating a new page); null if not known (that should not happen). 199 * - oldIsRedirect (bool|null): whether the page was a redirect before the change. Lazy-loaded, 200 * can be null; use wasRedirect() instead of direct access. 201 * - oldCountable (bool|null): whether the page was countable before the change (or null 202 * if we don't have that information) 203 * 204 * @var array 205 */ 206 private $pageState = null; 207 208 /** 209 * @var RevisionSlotsUpdate|null 210 */ 211 private $slotsUpdate = null; 212 213 /** 214 * @var RevisionRecord|null 215 */ 216 private $parentRevision = null; 217 218 /** 219 * @var RevisionRecord|null 220 */ 221 private $revision = null; 222 223 /** 224 * @var RenderedRevision 225 */ 226 private $renderedRevision = null; 227 228 /** 229 * @var RevisionRenderer 230 */ 231 private $revisionRenderer; 232 233 /** @var SlotRoleRegistry */ 234 private $slotRoleRegistry; 235 236 /** 237 * A stage identifier for managing the life cycle of this instance. 238 * Possible stages are 'new', 'knows-current', 'has-content', 'has-revision', and 'done'. 239 * 240 * @see docs/pageupdater.md for documentation of the life cycle. 241 * 242 * @var string 243 */ 244 private $stage = 'new'; 245 246 /** 247 * Transition table for managing the life cycle of DerivedPageDateUpdater instances. 248 * 249 * XXX: Overkill. This is a linear order, we could just count. Names are nice though, 250 * and constants are also overkill... 251 * 252 * @see docs/pageupdater.md for documentation of the life cycle. 253 * 254 * @var array[] 255 */ 256 private const TRANSITIONS = [ 257 'new' => [ 258 'new' => true, 259 'knows-current' => true, 260 'has-content' => true, 261 'has-revision' => true, 262 ], 263 'knows-current' => [ 264 'knows-current' => true, 265 'has-content' => true, 266 'has-revision' => true, 267 ], 268 'has-content' => [ 269 'has-content' => true, 270 'has-revision' => true, 271 ], 272 'has-revision' => [ 273 'has-revision' => true, 274 'done' => true, 275 ], 276 ]; 277 278 /** 279 * @var IContentHandlerFactory 280 */ 281 private $contentHandlerFactory; 282 283 /** 284 * @param WikiPage $wikiPage , 285 * @param RevisionStore $revisionStore 286 * @param RevisionRenderer $revisionRenderer 287 * @param SlotRoleRegistry $slotRoleRegistry 288 * @param ParserCache $parserCache 289 * @param JobQueueGroup $jobQueueGroup 290 * @param MessageCache $messageCache 291 * @param Language $contLang 292 * @param ILBFactory $loadbalancerFactory 293 * @param IContentHandlerFactory $contentHandlerFactory 294 * @param HookContainer $hookContainer 295 */ 296 public function __construct( 297 WikiPage $wikiPage, 298 RevisionStore $revisionStore, 299 RevisionRenderer $revisionRenderer, 300 SlotRoleRegistry $slotRoleRegistry, 301 ParserCache $parserCache, 302 JobQueueGroup $jobQueueGroup, 303 MessageCache $messageCache, 304 Language $contLang, 305 ILBFactory $loadbalancerFactory, 306 IContentHandlerFactory $contentHandlerFactory, 307 HookContainer $hookContainer 308 ) { 309 $this->wikiPage = $wikiPage; 310 311 $this->parserCache = $parserCache; 312 $this->revisionStore = $revisionStore; 313 $this->revisionRenderer = $revisionRenderer; 314 $this->slotRoleRegistry = $slotRoleRegistry; 315 $this->jobQueueGroup = $jobQueueGroup; 316 $this->messageCache = $messageCache; 317 $this->contLang = $contLang; 318 // XXX only needed for waiting for replicas to catch up; there should be a narrower 319 // interface for that. 320 $this->loadbalancerFactory = $loadbalancerFactory; 321 $this->contentHandlerFactory = $contentHandlerFactory; 322 $this->hookRunner = new HookRunner( $hookContainer ); 323 324 $this->logger = new NullLogger(); 325 } 326 327 public function setLogger( LoggerInterface $logger ) { 328 $this->logger = $logger; 329 } 330 331 /** 332 * Transition function for managing the life cycle of this instances. 333 * 334 * @see docs/pageupdater.md for documentation of the life cycle. 335 * 336 * @param string $newStage the new stage 337 * @return string the previous stage 338 * 339 * @throws LogicException If a transition to the given stage is not possible in the current 340 * stage. 341 */ 342 private function doTransition( $newStage ) { 343 $this->assertTransition( $newStage ); 344 345 $oldStage = $this->stage; 346 $this->stage = $newStage; 347 348 return $oldStage; 349 } 350 351 /** 352 * Asserts that a transition to the given stage is possible, without performing it. 353 * 354 * @see docs/pageupdater.md for documentation of the life cycle. 355 * 356 * @param string $newStage the new stage 357 * 358 * @throws LogicException If this instance is not in the expected stage 359 */ 360 private function assertTransition( $newStage ) { 361 if ( empty( self::TRANSITIONS[$this->stage][$newStage] ) ) { 362 throw new LogicException( "Cannot transition from {$this->stage} to $newStage" ); 363 } 364 } 365 366 /** 367 * Checks whether this DerivedPageDataUpdater can be re-used for running updates targeting 368 * the given revision. 369 * 370 * @param UserIdentity|null $user The user creating the revision in question 371 * @param RevisionRecord|null $revision New revision (after save, if already saved) 372 * @param RevisionSlotsUpdate|null $slotsUpdate New content (before PST) 373 * @param null|int $parentId Parent revision of the edit (use 0 for page creation) 374 * 375 * @return bool 376 */ 377 public function isReusableFor( 378 UserIdentity $user = null, 379 RevisionRecord $revision = null, 380 RevisionSlotsUpdate $slotsUpdate = null, 381 $parentId = null 382 ) { 383 if ( $revision 384 && $parentId 385 && $revision->getParentId() !== $parentId 386 ) { 387 throw new InvalidArgumentException( '$parentId should match the parent of $revision' ); 388 } 389 390 // NOTE: For null revisions, $user may be different from $this->revision->getUser 391 // and also from $revision->getUser. 392 // But $user should always match $this->user. 393 if ( $user && $this->user && $user->getName() !== $this->user->getName() ) { 394 return false; 395 } 396 397 if ( $revision && $this->revision && $this->revision->getId() 398 && $this->revision->getId() !== $revision->getId() 399 ) { 400 return false; 401 } 402 403 if ( $this->pageState 404 && $revision 405 && $revision->getParentId() !== null 406 && $this->pageState['oldId'] !== $revision->getParentId() 407 ) { 408 return false; 409 } 410 411 if ( $this->pageState 412 && $parentId !== null 413 && $this->pageState['oldId'] !== $parentId 414 ) { 415 return false; 416 } 417 418 // NOTE: this check is the primary reason for having the $this->slotsUpdate field! 419 if ( $this->slotsUpdate 420 && $slotsUpdate 421 && !$this->slotsUpdate->hasSameUpdates( $slotsUpdate ) 422 ) { 423 return false; 424 } 425 426 if ( $revision 427 && $this->revision 428 && !$this->revision->getSlots()->hasSameContent( $revision->getSlots() ) 429 ) { 430 return false; 431 } 432 433 return true; 434 } 435 436 /** 437 * @param string $articleCountMethod "any" or "link". 438 * @see $wgArticleCountMethod 439 */ 440 public function setArticleCountMethod( $articleCountMethod ) { 441 $this->articleCountMethod = $articleCountMethod; 442 } 443 444 /** 445 * @param bool $rcWatchCategoryMembership 446 * @see $wgRCWatchCategoryMembership 447 */ 448 public function setRcWatchCategoryMembership( $rcWatchCategoryMembership ) { 449 $this->rcWatchCategoryMembership = $rcWatchCategoryMembership; 450 } 451 452 /** 453 * @return Title 454 */ 455 private function getTitle() { 456 // NOTE: eventually, we won't get a WikiPage passed into the constructor any more 457 return $this->wikiPage->getTitle(); 458 } 459 460 /** 461 * @return WikiPage 462 */ 463 private function getWikiPage() { 464 // NOTE: eventually, we won't get a WikiPage passed into the constructor any more 465 return $this->wikiPage; 466 } 467 468 /** 469 * Determines whether the page being edited already existed. 470 * Only defined after calling grabCurrentRevision() or prepareContent() or prepareUpdate()! 471 * 472 * @return bool 473 * @throws LogicException if called before grabCurrentRevision 474 */ 475 public function pageExisted() { 476 $this->assertHasPageState( __METHOD__ ); 477 478 return $this->pageState['oldId'] > 0; 479 } 480 481 /** 482 * Returns the parent revision of the new revision wrapped by this update. 483 * If the update is a null-edit, this will return the parent of the current (and new) revision. 484 * This will return null if the revision wrapped by this update created the page. 485 * Only defined after calling prepareContent() or prepareUpdate()! 486 * 487 * @return RevisionRecord|null the parent revision of the new revision, or null if 488 * the update created the page. 489 */ 490 private function getParentRevision() { 491 $this->assertPrepared( __METHOD__ ); 492 493 if ( $this->parentRevision ) { 494 return $this->parentRevision; 495 } 496 497 if ( !$this->pageState['oldId'] ) { 498 // If there was no current revision, there is no parent revision, 499 // since the page didn't exist. 500 return null; 501 } 502 503 $oldId = $this->revision->getParentId(); 504 $flags = $this->useMaster() ? RevisionStore::READ_LATEST : 0; 505 $this->parentRevision = $oldId 506 ? $this->revisionStore->getRevisionById( $oldId, $flags ) 507 : null; 508 509 return $this->parentRevision; 510 } 511 512 /** 513 * Returns the revision that was the page's current revision when grabCurrentRevision() 514 * was first called. 515 * 516 * During an edit, that revision will act as the logical parent of the new revision. 517 * 518 * Some updates are performed based on the difference between the database state at the 519 * moment this method is first called, and the state after the edit. 520 * 521 * @see docs/pageupdater.md for more information on when thie method can and should be called. 522 * 523 * @note After prepareUpdate() was called, grabCurrentRevision() will throw an exception 524 * to avoid confusion, since the page's current revision is then the new revision after 525 * the edit, which was presumably passed to prepareUpdate() as the $revision parameter. 526 * Use getParentRevision() instead to access the revision that is the parent of the 527 * new revision. 528 * 529 * @return RevisionRecord|null the page's current revision, or null if the page does not 530 * yet exist. 531 */ 532 public function grabCurrentRevision() { 533 if ( $this->pageState ) { 534 return $this->pageState['oldRevision']; 535 } 536 537 $this->assertTransition( 'knows-current' ); 538 539 // NOTE: eventually, we won't get a WikiPage passed into the constructor any more 540 $wikiPage = $this->getWikiPage(); 541 542 // Do not call WikiPage::clear(), since the caller may already have caused page data 543 // to be loaded with SELECT FOR UPDATE. Just assert it's loaded now. 544 $wikiPage->loadPageData( self::READ_LATEST ); 545 $current = $wikiPage->getRevisionRecord(); 546 547 $this->pageState = [ 548 'oldRevision' => $current, 549 'oldId' => $current ? $current->getId() : 0, 550 'oldIsRedirect' => $wikiPage->isRedirect(), // NOTE: uses page table 551 'oldCountable' => $wikiPage->isCountable(), // NOTE: uses pagelinks table 552 ]; 553 554 $this->doTransition( 'knows-current' ); 555 556 return $this->pageState['oldRevision']; 557 } 558 559 /** 560 * Whether prepareUpdate() or prepareContent() have been called on this instance. 561 * 562 * @return bool 563 */ 564 public function isContentPrepared() { 565 return $this->revision !== null; 566 } 567 568 /** 569 * Whether prepareUpdate() has been called on this instance. 570 * 571 * @note will also return null in case of a null-edit! 572 * 573 * @return bool 574 */ 575 public function isUpdatePrepared() { 576 return $this->revision !== null && $this->revision->getId() !== null; 577 } 578 579 /** 580 * @return int 581 */ 582 private function getPageId() { 583 // NOTE: eventually, we won't get a WikiPage passed into the constructor any more 584 return $this->wikiPage->getId(); 585 } 586 587 /** 588 * Whether the content is deleted and thus not visible to the public. 589 * 590 * @return bool 591 */ 592 public function isContentDeleted() { 593 if ( $this->revision ) { 594 return $this->revision->isDeleted( RevisionRecord::DELETED_TEXT ); 595 } else { 596 // If the content has not been saved yet, it cannot have been deleted yet. 597 return false; 598 } 599 } 600 601 /** 602 * Returns the slot, modified or inherited, after PST, with no audience checks applied. 603 * 604 * @param string $role slot role name 605 * 606 * @throws PageUpdateException If the slot is neither set for update nor inherited from the 607 * parent revision. 608 * @return SlotRecord 609 */ 610 public function getRawSlot( $role ) { 611 return $this->getSlots()->getSlot( $role ); 612 } 613 614 /** 615 * Returns the content of the given slot, with no audience checks. 616 * 617 * @throws PageUpdateException If the slot is neither set for update nor inherited from the 618 * parent revision. 619 * @param string $role slot role name 620 * @return Content 621 */ 622 public function getRawContent( $role ) { 623 return $this->getRawSlot( $role )->getContent(); 624 } 625 626 /** 627 * Returns the content model of the given slot 628 * 629 * @param string $role slot role name 630 * @return string 631 */ 632 private function getContentModel( $role ) { 633 return $this->getRawSlot( $role )->getModel(); 634 } 635 636 /** 637 * @param string $role slot role name 638 * @return ContentHandler 639 * @throws MWUnknownContentModelException 640 */ 641 private function getContentHandler( $role ): ContentHandler { 642 return $this->contentHandlerFactory 643 ->getContentHandler( $this->getContentModel( $role ) ); 644 } 645 646 private function useMaster() { 647 // TODO: can we just set a flag to true in prepareContent()? 648 return $this->wikiPage->wasLoadedFrom( self::READ_LATEST ); 649 } 650 651 /** 652 * @return bool 653 */ 654 public function isCountable() { 655 // NOTE: Keep in sync with WikiPage::isCountable. 656 657 if ( !$this->getTitle()->isContentPage() ) { 658 return false; 659 } 660 661 if ( $this->isContentDeleted() ) { 662 // This should be irrelevant: countability only applies to the current revision, 663 // and the current revision is never suppressed. 664 return false; 665 } 666 667 if ( $this->isRedirect() ) { 668 return false; 669 } 670 671 $hasLinks = null; 672 673 if ( $this->articleCountMethod === 'link' ) { 674 // NOTE: it would be more appropriate to determine for each slot separately 675 // whether it has links, and use that information with that slot's 676 // isCountable() method. However, that would break parity with 677 // WikiPage::isCountable, which uses the pagelinks table to determine 678 // whether the current revision has links. 679 $hasLinks = (bool)count( $this->getCanonicalParserOutput()->getLinks() ); 680 } 681 682 foreach ( $this->getSlots()->getSlotRoles() as $role ) { 683 $roleHandler = $this->slotRoleRegistry->getRoleHandler( $role ); 684 if ( $roleHandler->supportsArticleCount() ) { 685 $content = $this->getRawContent( $role ); 686 687 if ( $content->isCountable( $hasLinks ) ) { 688 return true; 689 } 690 } 691 } 692 693 return false; 694 } 695 696 /** 697 * @return bool 698 */ 699 public function isRedirect() { 700 // NOTE: main slot determines redirect status 701 // TODO: MCR: this should be controlled by a PageTypeHandler 702 $mainContent = $this->getRawContent( SlotRecord::MAIN ); 703 704 return $mainContent->isRedirect(); 705 } 706 707 /** 708 * @param RevisionRecord $rev 709 * 710 * @return bool 711 */ 712 private function revisionIsRedirect( RevisionRecord $rev ) { 713 // NOTE: main slot determines redirect status 714 $mainContent = $rev->getContent( SlotRecord::MAIN, RevisionRecord::RAW ); 715 716 return $mainContent->isRedirect(); 717 } 718 719 /** 720 * Prepare updates based on an update which has not yet been saved. 721 * 722 * This may be used to create derived data that is needed when creating a new revision; 723 * particularly, this makes available the slots of the new revision via the getSlots() 724 * method, after applying PST and slot inheritance. 725 * 726 * The derived data prepared for revision creation may then later be re-used by doUpdates(), 727 * without the need to re-calculate. 728 * 729 * @see docs/pageupdater.md for more information on when thie method can and should be called. 730 * 731 * @note Calling this method more than once with the same $slotsUpdate 732 * has no effect. Calling this method multiple times with different content will cause 733 * an exception. 734 * 735 * @note Calling this method after prepareUpdate() has been called will cause an exception. 736 * 737 * @param User $user The user to act as context for pre-save transformation (PST). 738 * Type hint should be reduced to UserIdentity at some point. 739 * @param RevisionSlotsUpdate $slotsUpdate The new content of the slots to be updated 740 * by this edit, before PST. 741 * @param bool $useStash Whether to use stashed ParserOutput 742 */ 743 public function prepareContent( 744 User $user, 745 RevisionSlotsUpdate $slotsUpdate, 746 $useStash = true 747 ) { 748 if ( $this->slotsUpdate ) { 749 if ( !$this->user ) { 750 throw new LogicException( 751 'Unexpected state: $this->slotsUpdate was initialized, ' 752 . 'but $this->user was not.' 753 ); 754 } 755 756 if ( $this->user->getName() !== $user->getName() ) { 757 throw new LogicException( 'Can\'t call prepareContent() again for different user! ' 758 . 'Expected ' . $this->user->getName() . ', got ' . $user->getName() 759 ); 760 } 761 762 if ( !$this->slotsUpdate->hasSameUpdates( $slotsUpdate ) ) { 763 throw new LogicException( 764 'Can\'t call prepareContent() again with different slot content!' 765 ); 766 } 767 768 return; // prepareContent() already done, nothing to do 769 } 770 771 $this->assertTransition( 'has-content' ); 772 773 $wikiPage = $this->getWikiPage(); // TODO: use only for legacy hooks! 774 $title = $this->getTitle(); 775 776 $parentRevision = $this->grabCurrentRevision(); 777 778 // The edit may have already been prepared via api.php?action=stashedit 779 $stashedEdit = false; 780 781 // TODO: MCR: allow output for all slots to be stashed. 782 if ( $useStash && $slotsUpdate->isModifiedSlot( SlotRecord::MAIN ) ) { 783 $editStash = MediaWikiServices::getInstance()->getPageEditStash(); 784 $stashedEdit = $editStash->checkCache( 785 $title, 786 $slotsUpdate->getModifiedSlot( SlotRecord::MAIN )->getContent(), 787 User::newFromIdentity( $user ) 788 ); 789 } 790 791 $userPopts = ParserOptions::newFromUserAndLang( $user, $this->contLang ); 792 $this->hookRunner->onArticlePrepareTextForEdit( $wikiPage, $userPopts ); 793 794 $this->user = $user; 795 $this->slotsUpdate = $slotsUpdate; 796 797 if ( $parentRevision ) { 798 $this->revision = MutableRevisionRecord::newFromParentRevision( $parentRevision ); 799 } else { 800 $this->revision = new MutableRevisionRecord( $title ); 801 } 802 803 // NOTE: user and timestamp must be set, so they can be used for 804 // {{subst:REVISIONUSER}} and {{subst:REVISIONTIMESTAMP}} in PST! 805 $this->revision->setTimestamp( MWTimestamp::now( TS_MW ) ); 806 $this->revision->setUser( $user ); 807 808 // Set up ParserOptions to operate on the new revision 809 $oldCallback = $userPopts->getCurrentRevisionRecordCallback(); 810 $userPopts->setCurrentRevisionRecordCallback( 811 function ( Title $parserTitle, $parser = null ) use ( $title, $oldCallback ) { 812 if ( $parserTitle->equals( $title ) ) { 813 return $this->revision; 814 } else { 815 return call_user_func( $oldCallback, $parserTitle, $parser ); 816 } 817 } 818 ); 819 820 $pstContentSlots = $this->revision->getSlots(); 821 822 foreach ( $slotsUpdate->getModifiedRoles() as $role ) { 823 $slot = $slotsUpdate->getModifiedSlot( $role ); 824 825 if ( $slot->isInherited() ) { 826 // No PST for inherited slots! Note that "modified" slots may still be inherited 827 // from an earlier version, e.g. for rollbacks. 828 $pstSlot = $slot; 829 } elseif ( $role === SlotRecord::MAIN && $stashedEdit ) { 830 // TODO: MCR: allow PST content for all slots to be stashed. 831 $pstSlot = SlotRecord::newUnsaved( $role, $stashedEdit->pstContent ); 832 } else { 833 $content = $slot->getContent(); 834 $pstContent = $content->preSaveTransform( $title, $this->user, $userPopts ); 835 $pstSlot = SlotRecord::newUnsaved( $role, $pstContent ); 836 } 837 838 $pstContentSlots->setSlot( $pstSlot ); 839 } 840 841 foreach ( $slotsUpdate->getRemovedRoles() as $role ) { 842 $pstContentSlots->removeSlot( $role ); 843 } 844 845 $this->options['created'] = ( $parentRevision === null ); 846 $this->options['changed'] = ( $parentRevision === null 847 || !$pstContentSlots->hasSameContent( $parentRevision->getSlots() ) ); 848 849 $this->doTransition( 'has-content' ); 850 851 if ( !$this->options['changed'] ) { 852 // null-edit! 853 854 // TODO: move this into MutableRevisionRecord 855 // TODO: This needs to behave differently for a forced dummy edit! 856 $this->revision->setId( $parentRevision->getId() ); 857 $this->revision->setTimestamp( $parentRevision->getTimestamp() ); 858 $this->revision->setPageId( $parentRevision->getPageId() ); 859 $this->revision->setParentId( $parentRevision->getParentId() ); 860 $this->revision->setUser( $parentRevision->getUser( RevisionRecord::RAW ) ); 861 $this->revision->setComment( $parentRevision->getComment( RevisionRecord::RAW ) ); 862 $this->revision->setMinorEdit( $parentRevision->isMinor() ); 863 $this->revision->setVisibility( $parentRevision->getVisibility() ); 864 865 // prepareUpdate() is redundant for null-edits 866 $this->doTransition( 'has-revision' ); 867 } else { 868 $this->parentRevision = $parentRevision; 869 } 870 871 $renderHints = [ 'use-master' => $this->useMaster(), 'audience' => RevisionRecord::RAW ]; 872 873 if ( $stashedEdit ) { 874 /** @var ParserOutput $output */ 875 $output = $stashedEdit->output; 876 // TODO: this should happen when stashing the ParserOutput, not now! 877 $output->setCacheTime( $stashedEdit->timestamp ); 878 879 $renderHints['known-revision-output'] = $output; 880 881 $this->logger->debug( __METHOD__ . ': using stashed edit output...' ); 882 } 883 884 // NOTE: we want a canonical rendering, so don't pass $this->user or ParserOptions 885 // NOTE: the revision is either new or current, so we can bypass audience checks. 886 $this->renderedRevision = $this->revisionRenderer->getRenderedRevision( 887 $this->revision, 888 null, 889 null, 890 $renderHints 891 ); 892 } 893 894 /** 895 * Returns the update's target revision - that is, the revision that will be the current 896 * revision after the update. 897 * 898 * @note Callers must treat the returned RevisionRecord's content as immutable, even 899 * if it is a MutableRevisionRecord instance. Other aspects of a MutableRevisionRecord 900 * returned from here, such as the user or the comment, may be changed, but may not 901 * be reflected in ParserOutput until after prepareUpdate() has been called. 902 * 903 * @todo This is currently used by PageUpdater::makeNewRevision() to construct an unsaved 904 * MutableRevisionRecord instance. Introduce something like an UnsavedRevisionFactory service 905 * for that purpose instead! 906 * 907 * @return RevisionRecord 908 */ 909 public function getRevision() { 910 $this->assertPrepared( __METHOD__ ); 911 return $this->revision; 912 } 913 914 /** 915 * @return RenderedRevision 916 */ 917 public function getRenderedRevision() { 918 $this->assertPrepared( __METHOD__ ); 919 920 return $this->renderedRevision; 921 } 922 923 private function assertHasPageState( $method ) { 924 if ( !$this->pageState ) { 925 throw new LogicException( 926 'Must call grabCurrentRevision() or prepareContent() ' 927 . 'or prepareUpdate() before calling ' . $method 928 ); 929 } 930 } 931 932 private function assertPrepared( $method ) { 933 if ( !$this->revision ) { 934 throw new LogicException( 935 'Must call prepareContent() or prepareUpdate() before calling ' . $method 936 ); 937 } 938 } 939 940 private function assertHasRevision( $method ) { 941 if ( !$this->revision->getId() ) { 942 throw new LogicException( 943 'Must call prepareUpdate() before calling ' . $method 944 ); 945 } 946 } 947 948 /** 949 * Whether the edit creates the page. 950 * 951 * @return bool 952 */ 953 public function isCreation() { 954 $this->assertPrepared( __METHOD__ ); 955 return $this->options['created']; 956 } 957 958 /** 959 * Whether the edit created, or should create, a new revision (that is, it's not a null-edit). 960 * 961 * @warning at present, "null-revisions" that do not change content but do have a revision 962 * record would return false after prepareContent(), but true after prepareUpdate()! 963 * This should probably be fixed. 964 * 965 * @return bool 966 */ 967 public function isChange() { 968 $this->assertPrepared( __METHOD__ ); 969 return $this->options['changed']; 970 } 971 972 /** 973 * Whether the page was a redirect before the edit. 974 * 975 * @return bool 976 */ 977 public function wasRedirect() { 978 $this->assertHasPageState( __METHOD__ ); 979 980 if ( $this->pageState['oldIsRedirect'] === null ) { 981 /** @var RevisionRecord $rev */ 982 $rev = $this->pageState['oldRevision']; 983 if ( $rev ) { 984 $this->pageState['oldIsRedirect'] = $this->revisionIsRedirect( $rev ); 985 } else { 986 $this->pageState['oldIsRedirect'] = false; 987 } 988 } 989 990 return $this->pageState['oldIsRedirect']; 991 } 992 993 /** 994 * Returns the slots of the target revision, after PST. 995 * 996 * @note Callers must treat the returned RevisionSlots instance as immutable, even 997 * if it is a MutableRevisionSlots instance. 998 * 999 * @return RevisionSlots 1000 */ 1001 public function getSlots() { 1002 $this->assertPrepared( __METHOD__ ); 1003 return $this->revision->getSlots(); 1004 } 1005 1006 /** 1007 * Returns the RevisionSlotsUpdate for this updater. 1008 * 1009 * @return RevisionSlotsUpdate 1010 */ 1011 private function getRevisionSlotsUpdate() { 1012 $this->assertPrepared( __METHOD__ ); 1013 1014 if ( !$this->slotsUpdate ) { 1015 $old = $this->getParentRevision(); 1016 $this->slotsUpdate = RevisionSlotsUpdate::newFromRevisionSlots( 1017 $this->revision->getSlots(), 1018 $old ? $old->getSlots() : null 1019 ); 1020 } 1021 return $this->slotsUpdate; 1022 } 1023 1024 /** 1025 * Returns the role names of the slots touched by the new revision, 1026 * including removed roles. 1027 * 1028 * @return string[] 1029 */ 1030 public function getTouchedSlotRoles() { 1031 return $this->getRevisionSlotsUpdate()->getTouchedRoles(); 1032 } 1033 1034 /** 1035 * Returns the role names of the slots modified by the new revision, 1036 * not including removed roles. 1037 * 1038 * @return string[] 1039 */ 1040 public function getModifiedSlotRoles() { 1041 return $this->getRevisionSlotsUpdate()->getModifiedRoles(); 1042 } 1043 1044 /** 1045 * Returns the role names of the slots removed by the new revision. 1046 * 1047 * @return string[] 1048 */ 1049 public function getRemovedSlotRoles() { 1050 return $this->getRevisionSlotsUpdate()->getRemovedRoles(); 1051 } 1052 1053 /** 1054 * Prepare derived data updates targeting the given Revision. 1055 * 1056 * Calling this method requires the given revision to be present in the database. 1057 * This may be right after a new revision has been created, or when re-generating 1058 * derived data e.g. in ApiPurge, RefreshLinksJob, and the refreshLinks 1059 * script. 1060 * 1061 * @see docs/pageupdater.md for more information on when thie method can and should be called. 1062 * 1063 * @note Calling this method more than once with the same revision has no effect. 1064 * $options are only used for the first call. Calling this method multiple times with 1065 * different revisions will cause an exception. 1066 * 1067 * @note If grabCurrentRevision() (or prepareContent()) has been called before 1068 * calling this method, $revision->getParentRevision() has to refer to the revision that 1069 * was the current revision at the time grabCurrentRevision() was called. 1070 * 1071 * @param RevisionRecord $revision 1072 * @param array $options Array of options, following indexes are used: 1073 * - changed: bool, whether the revision changed the content (default true) 1074 * - created: bool, whether the revision created the page (default false) 1075 * - moved: bool, whether the page was moved (default false) 1076 * - restored: bool, whether the page was undeleted (default false) 1077 * - oldrevision: RevisionRecord object for the pre-update revision (default null) 1078 * can also be a Revision object, which is deprecated since 1.35 1079 * - triggeringUser: The user triggering the update (UserIdentity, defaults to the 1080 * user who created the revision) 1081 * - oldredirect: bool, null, or string 'no-change' (default null): 1082 * - bool: whether the page was counted as a redirect before that 1083 * revision, only used in changed is true and created is false 1084 * - null or 'no-change': don't update the redirect status. 1085 * - oldcountable: bool, null, or string 'no-change' (default null): 1086 * - bool: whether the page was counted as an article before that 1087 * revision, only used in changed is true and created is false 1088 * - null: if created is false, don't update the article count; if created 1089 * is true, do update the article count 1090 * - 'no-change': don't update the article count, ever 1091 * When set to null, pageState['oldCountable'] will be used instead if available. 1092 * - causeAction: an arbitrary string identifying the reason for the update. 1093 * See DataUpdate::getCauseAction(). (default 'unknown') 1094 * - causeAgent: name of the user who caused the update. See DataUpdate::getCauseAgent(). 1095 * (string, default 'unknown') 1096 * - known-revision-output: a combined canonical ParserOutput for the revision, perhaps 1097 * from some cache. The caller is responsible for ensuring that the ParserOutput indeed 1098 * matched the $rev and $options. This mechanism is intended as a temporary stop-gap, 1099 * for the time until caches have been changed to store RenderedRevision states instead 1100 * of ParserOutput objects. (default: null) (since 1.33) 1101 */ 1102 public function prepareUpdate( RevisionRecord $revision, array $options = [] ) { 1103 if ( isset( $options['oldrevision'] ) && $options['oldrevision'] instanceof Revision ) { 1104 wfDeprecated( 1105 __METHOD__ . ' with the `oldrevision` option being a ' . 1106 'Revision object', 1107 '1.35' 1108 ); 1109 $options['oldrevision'] = $options['oldrevision']->getRevisionRecord(); 1110 } 1111 Assert::parameter( 1112 !isset( $options['oldrevision'] ) 1113 || $options['oldrevision'] instanceof RevisionRecord, 1114 '$options["oldrevision"]', 1115 'must be a RevisionRecord (or Revision)' 1116 ); 1117 Assert::parameter( 1118 !isset( $options['triggeringUser'] ) 1119 || $options['triggeringUser'] instanceof UserIdentity, 1120 '$options["triggeringUser"]', 1121 'must be a UserIdentity' 1122 ); 1123 1124 if ( !$revision->getId() ) { 1125 throw new InvalidArgumentException( 1126 'Revision must have an ID set for it to be used with prepareUpdate()!' 1127 ); 1128 } 1129 1130 if ( $this->revision && $this->revision->getId() ) { 1131 if ( $this->revision->getId() === $revision->getId() ) { 1132 return; // nothing to do! 1133 } else { 1134 throw new LogicException( 1135 'Trying to re-use DerivedPageDataUpdater with revision ' 1136 . $revision->getId() 1137 . ', but it\'s already bound to revision ' 1138 . $this->revision->getId() 1139 ); 1140 } 1141 } 1142 1143 if ( $this->revision 1144 && !$this->revision->getSlots()->hasSameContent( $revision->getSlots() ) 1145 ) { 1146 throw new LogicException( 1147 'The Revision provided has mismatching content!' 1148 ); 1149 } 1150 1151 // Override fields defined in $this->options with values from $options. 1152 $this->options = array_intersect_key( $options, $this->options ) + $this->options; 1153 1154 if ( $this->revision ) { 1155 $oldId = $this->pageState['oldId'] ?? 0; 1156 $this->options['newrev'] = ( $revision->getId() !== $oldId ); 1157 } elseif ( isset( $this->options['oldrevision'] ) ) { 1158 /** @var RevisionRecord $oldRev */ 1159 $oldRev = $this->options['oldrevision']; 1160 $oldId = $oldRev->getId(); 1161 $this->options['newrev'] = ( $revision->getId() !== $oldId ); 1162 } else { 1163 $oldId = $revision->getParentId(); 1164 } 1165 1166 if ( $oldId !== null ) { 1167 // XXX: what if $options['changed'] disagrees? 1168 // MovePage creates a dummy revision with changed = false! 1169 // We may want to explicitly distinguish between "no new revision" (null-edit) 1170 // and "new revision without new content" (dummy revision). 1171 1172 if ( $oldId === $revision->getParentId() ) { 1173 // NOTE: this may still be a NullRevision! 1174 // New revision! 1175 $this->options['changed'] = true; 1176 } elseif ( $oldId === $revision->getId() ) { 1177 // Null-edit! 1178 $this->options['changed'] = false; 1179 } else { 1180 // This indicates that calling code has given us the wrong Revision object 1181 throw new LogicException( 1182 'The Revision mismatches old revision ID: ' 1183 . 'Old ID is ' . $oldId 1184 . ', parent ID is ' . $revision->getParentId() 1185 . ', revision ID is ' . $revision->getId() 1186 ); 1187 } 1188 } 1189 1190 // If prepareContent() was used to generate the PST content (which is indicated by 1191 // $this->slotsUpdate being set), and this is not a null-edit, then the given 1192 // revision must have the acting user as the revision author. Otherwise, user 1193 // signatures generated by PST would mismatch the user in the revision record. 1194 if ( $this->user !== null && $this->options['changed'] && $this->slotsUpdate ) { 1195 $user = $revision->getUser(); 1196 if ( !$this->user->equals( $user ) ) { 1197 throw new LogicException( 1198 'The Revision provided has a mismatching actor: expected ' 1199 . $this->user->getName() 1200 . ', got ' 1201 . $user->getName() 1202 ); 1203 } 1204 } 1205 1206 // If $this->pageState was not yet initialized by grabCurrentRevision or prepareContent, 1207 // emulate the state of the page table before the edit, as good as we can. 1208 if ( !$this->pageState ) { 1209 $this->pageState = [ 1210 'oldIsRedirect' => isset( $this->options['oldredirect'] ) 1211 && is_bool( $this->options['oldredirect'] ) 1212 ? $this->options['oldredirect'] 1213 : null, 1214 'oldCountable' => isset( $this->options['oldcountable'] ) 1215 && is_bool( $this->options['oldcountable'] ) 1216 ? $this->options['oldcountable'] 1217 : null, 1218 ]; 1219 1220 if ( $this->options['changed'] ) { 1221 // The edit created a new revision 1222 $this->pageState['oldId'] = $revision->getParentId(); 1223 1224 if ( isset( $this->options['oldrevision'] ) ) { 1225 $rev = $this->options['oldrevision']; 1226 $this->pageState['oldRevision'] = $rev; 1227 } 1228 } else { 1229 // This is a null-edit, so the old revision IS the new revision! 1230 $this->pageState['oldId'] = $revision->getId(); 1231 $this->pageState['oldRevision'] = $revision; 1232 } 1233 } 1234 1235 // "created" is forced here 1236 $this->options['created'] = ( $this->options['created'] || 1237 ( $this->pageState['oldId'] === 0 ) ); 1238 1239 $this->revision = $revision; 1240 1241 $this->doTransition( 'has-revision' ); 1242 1243 // NOTE: in case we have a User object, don't override with a UserIdentity. 1244 // We already checked that $revision->getUser() mathces $this->user; 1245 if ( !$this->user ) { 1246 $this->user = $revision->getUser( RevisionRecord::RAW ); 1247 } 1248 1249 // Prune any output that depends on the revision ID. 1250 if ( $this->renderedRevision ) { 1251 $this->renderedRevision->updateRevision( $revision ); 1252 } else { 1253 // NOTE: we want a canonical rendering, so don't pass $this->user or ParserOptions 1254 // NOTE: the revision is either new or current, so we can bypass audience checks. 1255 $this->renderedRevision = $this->revisionRenderer->getRenderedRevision( 1256 $this->revision, 1257 null, 1258 null, 1259 [ 1260 'use-master' => $this->useMaster(), 1261 'audience' => RevisionRecord::RAW, 1262 'known-revision-output' => $options['known-revision-output'] ?? null 1263 ] 1264 ); 1265 1266 // XXX: Since we presumably are dealing with the current revision, 1267 // we could try to get the ParserOutput from the parser cache. 1268 } 1269 1270 // TODO: optionally get ParserOutput from the ParserCache here. 1271 // Move the logic used by RefreshLinksJob here! 1272 } 1273 1274 /** 1275 * @deprecated This only exists for B/C, use the getters on DerivedPageDataUpdater directly! 1276 * @return PreparedEdit 1277 */ 1278 public function getPreparedEdit() { 1279 $this->assertPrepared( __METHOD__ ); 1280 1281 $slotsUpdate = $this->getRevisionSlotsUpdate(); 1282 $preparedEdit = new PreparedEdit(); 1283 1284 $preparedEdit->popts = $this->getCanonicalParserOptions(); 1285 $preparedEdit->parserOutputCallback = [ $this, 'getCanonicalParserOutput' ]; 1286 $preparedEdit->pstContent = $this->revision->getContent( SlotRecord::MAIN ); 1287 $preparedEdit->newContent = 1288 $slotsUpdate->isModifiedSlot( SlotRecord::MAIN ) 1289 ? $slotsUpdate->getModifiedSlot( SlotRecord::MAIN )->getContent() 1290 : $this->revision->getContent( SlotRecord::MAIN ); // XXX: can we just remove this? 1291 $preparedEdit->oldContent = null; // unused. // XXX: could get this from the parent revision 1292 $preparedEdit->revid = $this->revision ? $this->revision->getId() : null; 1293 $preparedEdit->timestamp = $preparedEdit->output->getCacheTime(); 1294 $preparedEdit->format = $preparedEdit->pstContent->getDefaultFormat(); 1295 1296 return $preparedEdit; 1297 } 1298 1299 /** 1300 * @param string $role 1301 * @param bool $generateHtml 1302 * @return ParserOutput 1303 */ 1304 public function getSlotParserOutput( $role, $generateHtml = true ) { 1305 return $this->getRenderedRevision()->getSlotParserOutput( 1306 $role, 1307 [ 'generate-html' => $generateHtml ] 1308 ); 1309 } 1310 1311 /** 1312 * @return ParserOutput 1313 */ 1314 public function getCanonicalParserOutput() { 1315 return $this->getRenderedRevision()->getRevisionParserOutput(); 1316 } 1317 1318 /** 1319 * @return ParserOptions 1320 */ 1321 public function getCanonicalParserOptions() { 1322 return $this->getRenderedRevision()->getOptions(); 1323 } 1324 1325 /** 1326 * @param bool $recursive 1327 * 1328 * @return DeferrableUpdate[] 1329 */ 1330 public function getSecondaryDataUpdates( $recursive = false ) { 1331 if ( $this->isContentDeleted() ) { 1332 // This shouldn't happen, since the current content is always public, 1333 // and DataUpates are only needed for current content. 1334 return []; 1335 } 1336 1337 $output = $this->getCanonicalParserOutput(); 1338 1339 // Construct a LinksUpdate for the combined canonical output. 1340 $linksUpdate = new LinksUpdate( 1341 $this->getTitle(), 1342 $output, 1343 $recursive 1344 ); 1345 1346 $allUpdates = [ $linksUpdate ]; 1347 1348 // NOTE: Run updates for all slots, not just the modified slots! Otherwise, 1349 // info for an inherited slot may end up being removed. This is also needed 1350 // to ensure that purges are effective. 1351 $renderedRevision = $this->getRenderedRevision(); 1352 foreach ( $this->getSlots()->getSlotRoles() as $role ) { 1353 $slot = $this->getRawSlot( $role ); 1354 $content = $slot->getContent(); 1355 $handler = $content->getContentHandler(); 1356 1357 $updates = $handler->getSecondaryDataUpdates( 1358 $this->getTitle(), 1359 $content, 1360 $role, 1361 $renderedRevision 1362 ); 1363 $allUpdates = array_merge( $allUpdates, $updates ); 1364 1365 // TODO: remove B/C hack in 1.32! 1366 // NOTE: we assume that the combined output contains all relevant meta-data for 1367 // all slots! 1368 $legacyUpdates = $content->getSecondaryDataUpdates( 1369 $this->getTitle(), 1370 null, 1371 $recursive, 1372 $output 1373 ); 1374 1375 // HACK: filter out redundant and incomplete LinksUpdates 1376 $legacyUpdates = array_filter( $legacyUpdates, function ( $update ) { 1377 return !( $update instanceof LinksUpdate ); 1378 } ); 1379 1380 $allUpdates = array_merge( $allUpdates, $legacyUpdates ); 1381 } 1382 1383 // XXX: if a slot was removed by an earlier edit, but deletion updates failed to run at 1384 // that time, we don't know for which slots to run deletion updates when purging a page. 1385 // We'd have to examine the entire history of the page to determine that. Perhaps there 1386 // could be a "try extra hard" mode for that case that would run a DB query to find all 1387 // roles/models ever used on the page. On the other hand, removing slots should be quite 1388 // rare, so perhaps this isn't worth the trouble. 1389 1390 // TODO: consolidate with similar logic in WikiPage::getDeletionUpdates() 1391 $wikiPage = $this->getWikiPage(); 1392 $parentRevision = $this->getParentRevision(); 1393 foreach ( $this->getRemovedSlotRoles() as $role ) { 1394 // HACK: we should get the content model of the removed slot from a SlotRoleHandler! 1395 // For now, find the slot in the parent revision - if the slot was removed, it should 1396 // always exist in the parent revision. 1397 $parentSlot = $parentRevision->getSlot( $role, RevisionRecord::RAW ); 1398 $content = $parentSlot->getContent(); 1399 $handler = $content->getContentHandler(); 1400 1401 $updates = $handler->getDeletionUpdates( 1402 $this->getTitle(), 1403 $role 1404 ); 1405 $allUpdates = array_merge( $allUpdates, $updates ); 1406 1407 // TODO: remove B/C hack in 1.32! 1408 $legacyUpdates = $content->getDeletionUpdates( $wikiPage ); 1409 1410 // HACK: filter out redundant and incomplete LinksDeletionUpdate 1411 $legacyUpdates = array_filter( $legacyUpdates, function ( $update ) { 1412 return !( $update instanceof LinksDeletionUpdate ); 1413 } ); 1414 1415 $allUpdates = array_merge( $allUpdates, $legacyUpdates ); 1416 } 1417 1418 // TODO: hard deprecate SecondaryDataUpdates in favor of RevisionDataUpdates in 1.33! 1419 $this->hookRunner->onRevisionDataUpdates( 1420 $this->getTitle(), $renderedRevision, $allUpdates ); 1421 1422 return $allUpdates; 1423 } 1424 1425 /** 1426 * Do standard updates after page edit, purge, or import. 1427 * Update links tables, site stats, search index, title cache, message cache, etc. 1428 * Purges pages that depend on this page when appropriate. 1429 * With a 10% chance, triggers pruning the recent changes table. 1430 * 1431 * @note prepareUpdate() must be called before calling this method! 1432 * 1433 * MCR migration note: this replaces WikiPage::doEditUpdates. 1434 */ 1435 public function doUpdates() { 1436 $this->assertTransition( 'done' ); 1437 1438 // TODO: move logic into a PageEventEmitter service 1439 1440 $wikiPage = $this->getWikiPage(); // TODO: use only for legacy hooks! 1441 1442 $legacyUser = User::newFromIdentity( $this->user ); 1443 1444 $userParserOptions = ParserOptions::newFromUser( $legacyUser ); 1445 // Decide whether to save the final canonical parser ouput based on the fact that 1446 // users are typically redirected to viewing pages right after they edit those pages. 1447 // Due to vary-revision-id, getting/saving that output here might require a reparse. 1448 if ( $userParserOptions->matchesForCacheKey( $this->getCanonicalParserOptions() ) ) { 1449 // Whether getting the final output requires a reparse or not, the user will 1450 // need canonical output anyway, since that is what their parser options use. 1451 // A reparse now at least has the benefit of various warm process caches. 1452 $this->doParserCacheUpdate(); 1453 } else { 1454 // If the user does not have canonical parse options, then don't risk another parse 1455 // to make output they cannot use on the page refresh that typically occurs after 1456 // editing. Doing the parser output save post-send will still benefit *other* users. 1457 DeferredUpdates::addCallableUpdate( function () { 1458 $this->doParserCacheUpdate(); 1459 } ); 1460 } 1461 1462 $this->doSecondaryDataUpdates( [ 1463 // T52785 do not update any other pages on a null edit 1464 'recursive' => $this->options['changed'], 1465 // Defer the getCannonicalParserOutput() call made by getSecondaryDataUpdates() 1466 'defer' => DeferredUpdates::POSTSEND 1467 ] ); 1468 1469 // TODO: MCR: check if *any* changed slot supports categories! 1470 if ( $this->rcWatchCategoryMembership 1471 && $this->getContentHandler( SlotRecord::MAIN )->supportsCategories() === true 1472 && ( $this->options['changed'] || $this->options['created'] ) 1473 && !$this->options['restored'] 1474 ) { 1475 // Note: jobs are pushed after deferred updates, so the job should be able to see 1476 // the recent change entry (also done via deferred updates) and carry over any 1477 // bot/deletion/IP flags, ect. 1478 $this->jobQueueGroup->lazyPush( 1479 CategoryMembershipChangeJob::newSpec( 1480 $this->getTitle(), 1481 $this->revision->getTimestamp() 1482 ) 1483 ); 1484 } 1485 1486 // TODO: replace legacy hook! Use a listener on PageEventEmitter instead! 1487 // @note: Extensions should *avoid* calling getCannonicalParserOutput() when using 1488 // this hook whenever possible in order to avoid unnecessary additional parses. 1489 $editInfo = $this->getPreparedEdit(); 1490 $this->hookRunner->onArticleEditUpdates( $wikiPage, $editInfo, $this->options['changed'] ); 1491 1492 // TODO: replace legacy hook! Use a listener on PageEventEmitter instead! 1493 if ( $this->hookRunner->onArticleEditUpdatesDeleteFromRecentchanges( $wikiPage ) ) { 1494 // Flush old entries from the `recentchanges` table 1495 if ( mt_rand( 0, 9 ) == 0 ) { 1496 $this->jobQueueGroup->lazyPush( RecentChangesUpdateJob::newPurgeJob() ); 1497 } 1498 } 1499 1500 $id = $this->getPageId(); 1501 $title = $this->getTitle(); 1502 $shortTitle = $title->getDBkey(); 1503 1504 if ( !$title->exists() ) { 1505 wfDebug( __METHOD__ . ": Page doesn't exist any more, bailing out" ); 1506 1507 $this->doTransition( 'done' ); 1508 return; 1509 } 1510 1511 DeferredUpdates::addCallableUpdate( function () { 1512 if ( 1513 $this->options['oldcountable'] === 'no-change' || 1514 ( !$this->options['changed'] && !$this->options['moved'] ) 1515 ) { 1516 $good = 0; 1517 } elseif ( $this->options['created'] ) { 1518 $good = (int)$this->isCountable(); 1519 } elseif ( $this->options['oldcountable'] !== null ) { 1520 $good = (int)$this->isCountable() 1521 - (int)$this->options['oldcountable']; 1522 } elseif ( isset( $this->pageState['oldCountable'] ) ) { 1523 $good = (int)$this->isCountable() 1524 - (int)$this->pageState['oldCountable']; 1525 } else { 1526 $good = 0; 1527 } 1528 $edits = $this->options['changed'] ? 1 : 0; 1529 $pages = $this->options['created'] ? 1 : 0; 1530 1531 DeferredUpdates::addUpdate( SiteStatsUpdate::factory( 1532 [ 'edits' => $edits, 'articles' => $good, 'pages' => $pages ] 1533 ) ); 1534 } ); 1535 1536 // TODO: make search infrastructure aware of slots! 1537 $mainSlot = $this->revision->getSlot( SlotRecord::MAIN ); 1538 if ( !$mainSlot->isInherited() && !$this->isContentDeleted() ) { 1539 DeferredUpdates::addUpdate( new SearchUpdate( $id, $title, $mainSlot->getContent() ) ); 1540 } 1541 1542 // If this is another user's talk page, update newtalk. 1543 // Don't do this if $options['changed'] = false (null-edits) nor if 1544 // it's a minor edit and the user making the edit doesn't generate notifications for those. 1545 if ( $this->options['changed'] 1546 && $title->getNamespace() == NS_USER_TALK 1547 && $shortTitle != $legacyUser->getTitleKey() 1548 && !( $this->revision->isMinor() && MediaWikiServices::getInstance() 1549 ->getPermissionManager() 1550 ->userHasRight( $legacyUser, 'nominornewtalk' ) ) 1551 ) { 1552 $recipient = User::newFromName( $shortTitle, false ); 1553 if ( !$recipient ) { 1554 wfDebug( __METHOD__ . ": invalid username" ); 1555 } else { 1556 // Allow extensions to prevent user notification 1557 // when a new message is added to their talk page 1558 // TODO: replace legacy hook! Use a listener on PageEventEmitter instead! 1559 if ( $this->hookRunner->onArticleEditUpdateNewTalk( $wikiPage, $recipient ) ) { 1560 $revRecord = $this->revision; 1561 $talkPageNotificationManager = MediaWikiServices::getInstance() 1562 ->getTalkPageNotificationManager(); 1563 if ( User::isIP( $shortTitle ) ) { 1564 // An anonymous user 1565 $talkPageNotificationManager->setUserHasNewMessages( $recipient, $revRecord ); 1566 } elseif ( $recipient->isLoggedIn() ) { 1567 $talkPageNotificationManager->setUserHasNewMessages( $recipient, $revRecord ); 1568 } else { 1569 wfDebug( __METHOD__ . ": don't need to notify a nonexistent user" ); 1570 } 1571 } 1572 } 1573 } 1574 1575 if ( $title->getNamespace() == NS_MEDIAWIKI 1576 && $this->getRevisionSlotsUpdate()->isModifiedSlot( SlotRecord::MAIN ) 1577 ) { 1578 $mainContent = $this->isContentDeleted() ? null : $this->getRawContent( SlotRecord::MAIN ); 1579 1580 $this->messageCache->updateMessageOverride( $title, $mainContent ); 1581 } 1582 1583 // TODO: move onArticleCreate and onArticle into a PageEventEmitter service 1584 if ( $this->options['created'] ) { 1585 WikiPage::onArticleCreate( $title ); 1586 } elseif ( $this->options['changed'] ) { // T52785 1587 WikiPage::onArticleEdit( $title, $this->revision, $this->getTouchedSlotRoles() ); 1588 } elseif ( $this->options['restored'] ) { 1589 MediaWikiServices::getInstance()->getMainWANObjectCache()->touchCheckKey( 1590 "DerivedPageDataUpdater:restore:page:$id" 1591 ); 1592 } 1593 1594 $oldRevisionRecord = $this->getParentRevision(); 1595 1596 // TODO: In the wiring, register a listener for this on the new PageEventEmitter 1597 ResourceLoaderWikiModule::invalidateModuleCache( 1598 $title, 1599 $oldRevisionRecord, 1600 $this->revision, 1601 $this->loadbalancerFactory->getLocalDomainID() 1602 ); 1603 1604 $this->doTransition( 'done' ); 1605 } 1606 1607 /** 1608 * Do secondary data updates (e.g. updating link tables) or schedule them as deferred updates 1609 * 1610 * MCR note: this method is temporarily exposed via WikiPage::doSecondaryDataUpdates. 1611 * 1612 * @param array $options 1613 * - recursive: make the update recursive, i.e. also update pages which transclude the 1614 * current page or otherwise depend on it (default: false) 1615 * - defer: one of the DeferredUpdates constants, or false to run immediately after waiting 1616 * for replication of the changes from the SecondaryDataUpdates hooks (default: false) 1617 * @since 1.32 1618 */ 1619 public function doSecondaryDataUpdates( array $options = [] ) { 1620 $this->assertHasRevision( __METHOD__ ); 1621 $options += [ 'recursive' => false, 'defer' => false ]; 1622 $deferValues = [ false, DeferredUpdates::PRESEND, DeferredUpdates::POSTSEND ]; 1623 if ( !in_array( $options['defer'], $deferValues, true ) ) { 1624 throw new InvalidArgumentException( 'Invalid value for defer: ' . $options['defer'] ); 1625 } 1626 1627 $triggeringUser = $this->options['triggeringUser'] ?? $this->user; 1628 if ( !$triggeringUser instanceof User ) { 1629 $triggeringUser = User::newFromIdentity( $triggeringUser ); 1630 } 1631 $causeAction = $this->options['causeAction'] ?? 'unknown'; 1632 $causeAgent = $this->options['causeAgent'] ?? 'unknown'; 1633 1634 // Bundle all of the data updates into a single deferred update wrapper so that 1635 // any failure will cause at most one refreshLinks job to be enqueued by 1636 // DeferredUpdates::doUpdates(). This is hard to do when there are many separate 1637 // updates that are not defined as being related. 1638 $update = new RefreshSecondaryDataUpdate( 1639 $this->loadbalancerFactory, 1640 $triggeringUser, 1641 $this->wikiPage, 1642 $this->revision, 1643 $this, 1644 [ 'recursive' => $options['recursive'] ] 1645 ); 1646 $update->setCause( $causeAction, $causeAgent ); 1647 1648 if ( $options['defer'] === false ) { 1649 DeferredUpdates::attemptUpdate( $update, $this->loadbalancerFactory ); 1650 } else { 1651 DeferredUpdates::addUpdate( $update, $options['defer'] ); 1652 } 1653 } 1654 1655 public function doParserCacheUpdate() { 1656 $this->assertHasRevision( __METHOD__ ); 1657 1658 $wikiPage = $this->getWikiPage(); // TODO: ParserCache should accept a RevisionRecord instead 1659 1660 // NOTE: this may trigger the first parsing of the new content after an edit (when not 1661 // using pre-generated stashed output). 1662 // XXX: we may want to use the PoolCounter here. This would perhaps allow the initial parse 1663 // to be performed post-send. The client could already follow a HTTP redirect to the 1664 // page view, but would then have to wait for a response until rendering is complete. 1665 $output = $this->getCanonicalParserOutput(); 1666 1667 // Save it to the parser cache. Use the revision timestamp in the case of a 1668 // freshly saved edit, as that matches page_touched and a mismatch would trigger an 1669 // unnecessary reparse. 1670 $timestamp = $this->options['newrev'] ? $this->revision->getTimestamp() 1671 : $output->getCacheTime(); 1672 $this->parserCache->save( 1673 $output, $wikiPage, $this->getCanonicalParserOptions(), 1674 $timestamp, $this->revision->getId() 1675 ); 1676 } 1677 1678} 1679