1<?php 2 3use MediaWiki\Json\JsonUnserializable; 4use MediaWiki\Json\JsonUnserializableTrait; 5use MediaWiki\Json\JsonUnserializer; 6use MediaWiki\Logger\LoggerFactory; 7use Wikimedia\Reflection\GhostFieldAccessTrait; 8 9/** 10 * Output of the PHP parser. 11 * 12 * This program is free software; you can redistribute it and/or modify 13 * it under the terms of the GNU General Public License as published by 14 * the Free Software Foundation; either version 2 of the License, or 15 * (at your option) any later version. 16 * 17 * This program is distributed in the hope that it will be useful, 18 * but WITHOUT ANY WARRANTY; without even the implied warranty of 19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20 * GNU General Public License for more details. 21 * 22 * You should have received a copy of the GNU General Public License along 23 * with this program; if not, write to the Free Software Foundation, Inc., 24 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 25 * http://www.gnu.org/copyleft/gpl.html 26 * 27 * @file 28 * @ingroup Parser 29 */ 30 31class ParserOutput extends CacheTime { 32 use GhostFieldAccessTrait; 33 use JsonUnserializableTrait; 34 35 /** 36 * Feature flags to indicate to extensions that MediaWiki core supports and 37 * uses getText() stateless transforms. 38 * 39 * @since 1.31 40 */ 41 public const SUPPORTS_STATELESS_TRANSFORMS = 1; 42 43 /** 44 * @since 1.31 45 */ 46 public const SUPPORTS_UNWRAP_TRANSFORM = 1; 47 48 /** 49 * @var string|null The output text 50 */ 51 public $mText = null; 52 53 /** 54 * @var array List of the full text of language links, 55 * in the order they appear. 56 */ 57 public $mLanguageLinks; 58 59 /** 60 * @var array Map of category names to sort keys 61 */ 62 public $mCategories; 63 64 /** 65 * @var array Page status indicators, usually displayed in top-right corner. 66 */ 67 public $mIndicators = []; 68 69 /** 70 * @var string Title text of the chosen language variant, as HTML. 71 */ 72 public $mTitleText; 73 74 /** 75 * @var int[][] 2-D map of NS/DBK to ID for the links in the document. 76 * ID=zero for broken. 77 * @phan-var array<int,array<string,int>> 78 */ 79 public $mLinks = []; 80 81 /** 82 * @var array Keys are DBKs for the links to special pages in the document. 83 * @since 1.35 84 */ 85 public $mLinksSpecial = []; 86 87 /** 88 * @var array 2-D map of NS/DBK to ID for the template references. 89 * ID=zero for broken. 90 */ 91 public $mTemplates = []; 92 93 /** 94 * @var array 2-D map of NS/DBK to rev ID for the template references. 95 * ID=zero for broken. 96 */ 97 public $mTemplateIds = []; 98 99 /** 100 * @var array DB keys of the images used, in the array key only 101 */ 102 public $mImages = []; 103 104 /** 105 * @var array DB keys of the images used mapped to sha1 and MW timestamp. 106 */ 107 public $mFileSearchOptions = []; 108 109 /** 110 * @var array External link URLs, in the key only. 111 */ 112 public $mExternalLinks = []; 113 114 /** 115 * @var array 2-D map of prefix/DBK (in keys only) 116 * for the inline interwiki links in the document. 117 */ 118 public $mInterwikiLinks = []; 119 120 /** 121 * @var bool Show a new section link? 122 */ 123 public $mNewSection = false; 124 125 /** 126 * @var bool Hide the new section link? 127 */ 128 public $mHideNewSection = false; 129 130 /** 131 * @var bool No gallery on category page? (__NOGALLERY__). 132 */ 133 public $mNoGallery = false; 134 135 /** 136 * @var array Items to put in the <head> section 137 */ 138 public $mHeadItems = []; 139 140 /** 141 * @var array Modules to be loaded by ResourceLoader 142 */ 143 public $mModules = []; 144 145 /** 146 * @var array Modules of which only the CSSS will be loaded by ResourceLoader. 147 */ 148 public $mModuleStyles = []; 149 150 /** 151 * @var array JavaScript config variable for mw.config combined with this page. 152 */ 153 public $mJsConfigVars = []; 154 155 /** 156 * @var array Hook tags as per $wgParserOutputHooks. 157 */ 158 public $mOutputHooks = []; 159 160 /** 161 * @var array Warning text to be returned to the user. 162 * Wikitext formatted, in the key only. 163 */ 164 public $mWarnings = []; 165 166 /** 167 * @var array Table of contents 168 */ 169 public $mSections = []; 170 171 /** 172 * @var array Name/value pairs to be cached in the DB. 173 */ 174 public $mProperties = []; 175 176 /** 177 * @var string HTML of the TOC. 178 */ 179 public $mTOCHTML = ''; 180 181 /** 182 * @var string Timestamp of the revision. 183 */ 184 public $mTimestamp; 185 186 /** 187 * @var bool Whether OOUI should be enabled. 188 */ 189 public $mEnableOOUI = false; 190 191 /** 192 * @var string 'index' or 'noindex'? Any other value will result in no change. 193 */ 194 private $mIndexPolicy = ''; 195 196 /** 197 * @var array extra data used by extensions. 198 */ 199 private $mExtensionData = []; 200 201 /** 202 * @var array Parser limit report data. 203 */ 204 private $mLimitReportData = []; 205 206 /** @var array Parser limit report data for JSON */ 207 private $mLimitReportJSData = []; 208 209 /** 210 * @var array Timestamps for getTimeSinceStart(). 211 */ 212 private $mParseStartTime = []; 213 214 /** 215 * @var bool Whether to emit X-Frame-Options: DENY. 216 */ 217 private $mPreventClickjacking = false; 218 219 /** 220 * @var array Extra script-src for CSP 221 */ 222 private $mExtraScriptSrcs = []; 223 224 /** 225 * @var array Extra default-src for CSP [Everything but script and style] 226 */ 227 private $mExtraDefaultSrcs = []; 228 229 /** 230 * @var array Extra style-src for CSP 231 */ 232 private $mExtraStyleSrcs = []; 233 234 /** 235 * @var array Generic flags. 236 */ 237 private $mFlags = []; 238 239 /** @var string[] */ 240 private const SPECULATIVE_FIELDS = [ 241 'speculativePageIdUsed', 242 'mSpeculativeRevId', 243 'revisionTimestampUsed' 244 ]; 245 246 /** @var int|null Assumed rev ID for {{REVISIONID}} if no revision is set */ 247 private $mSpeculativeRevId; 248 /** @var int|null Assumed page ID for {{PAGEID}} if no revision is set */ 249 private $speculativePageIdUsed; 250 /** @var int|null Assumed rev timestamp for {{REVISIONTIMESTAMP}} if no revision is set */ 251 private $revisionTimestampUsed; 252 253 /** @var string|null SHA-1 base 36 hash of any self-transclusion */ 254 private $revisionUsedSha1Base36; 255 256 /** string CSS classes to use for the wrapping div, stored in the array keys. 257 * If no class is given, no wrapper is added. 258 */ 259 private $mWrapperDivClasses = []; 260 261 /** @var int Upper bound of expiry based on parse duration */ 262 private $mMaxAdaptiveExpiry = INF; 263 264 private const EDITSECTION_REGEX = 265 '#<(?:mw:)?editsection page="(.*?)" section="(.*?)"(?:/>|>(.*?)(</(?:mw:)?editsection>))#s'; 266 267 // finalizeAdaptiveCacheExpiry() uses TTL = MAX( m * PARSE_TIME + b, MIN_AR_TTL) 268 // Current values imply that m=3933.333333 and b=-333.333333 269 // See https://www.nngroup.com/articles/website-response-times/ 270 private const PARSE_FAST_SEC = 0.100; // perceived "fast" page parse 271 private const PARSE_SLOW_SEC = 1.0; // perceived "slow" page parse 272 private const FAST_AR_TTL = 60; // adaptive TTL for "fast" pages 273 private const SLOW_AR_TTL = 3600; // adaptive TTL for "slow" pages 274 private const MIN_AR_TTL = 15; // min adaptive TTL (for sanity, pool counter, and edit stashing) 275 276 /** 277 * @param string|null $text HTML. Use null to indicate that this ParserOutput contains only 278 * meta-data, and the HTML output is undetermined, as opposed to empty. Passing null 279 * here causes hasText() to return false. 280 * @param array $languageLinks 281 * @param array $categoryLinks 282 * @param bool $unused 283 * @param string $titletext 284 */ 285 public function __construct( $text = '', $languageLinks = [], $categoryLinks = [], 286 $unused = false, $titletext = '' 287 ) { 288 $this->mText = $text; 289 $this->mLanguageLinks = $languageLinks; 290 $this->mCategories = $categoryLinks; 291 $this->mTitleText = $titletext; 292 } 293 294 /** 295 * Returns true if text was passed to the constructor, or set using setText(). Returns false 296 * if null was passed to the $text parameter of the constructor to indicate that this 297 * ParserOutput only contains meta-data, and the HTML output is undetermined. 298 * 299 * @since 1.32 300 * 301 * @return bool Whether this ParserOutput contains rendered text. If this returns false, the 302 * ParserOutput contains meta-data only. 303 */ 304 public function hasText() { 305 return ( $this->mText !== null ); 306 } 307 308 /** 309 * Get the cacheable text with <mw:editsection> markers still in it. The 310 * return value is suitable for writing back via setText() but is not valid 311 * for display to the user. 312 * 313 * @return string 314 * @since 1.27 315 */ 316 public function getRawText() { 317 if ( $this->mText === null ) { 318 throw new LogicException( 'This ParserOutput contains no text!' ); 319 } 320 321 return $this->mText; 322 } 323 324 /** 325 * Get the output HTML 326 * 327 * @param array $options (since 1.31) Transformations to apply to the HTML 328 * - allowTOC: (bool) Show the TOC, assuming there were enough headings 329 * to generate one and `__NOTOC__` wasn't used. Default is true, 330 * but might be statefully overridden. 331 * - enableSectionEditLinks: (bool) Include section edit links, assuming 332 * section edit link tokens are present in the HTML. Default is true, 333 * but might be statefully overridden. 334 * - skin: (Skin) Skin object used for transforming section edit links. 335 * - unwrap: (bool) Return text without a wrapper div. Default is false, 336 * meaning a wrapper div will be added if getWrapperDivClass() returns 337 * a non-empty string. 338 * - wrapperDivClass: (string) Wrap the output in a div and apply the given 339 * CSS class to that div. This overrides the output of getWrapperDivClass(). 340 * Setting this to an empty string has the same effect as 'unwrap' => true. 341 * - deduplicateStyles: (bool) When true, which is the default, `<style>` 342 * tags with the `data-mw-deduplicate` attribute set are deduplicated by 343 * value of the attribute: all but the first will be replaced by `<link 344 * rel="mw-deduplicated-inline-style" href="mw-data:..."/>` tags, where 345 * the scheme-specific-part of the href is the (percent-encoded) value 346 * of the `data-mw-deduplicate` attribute. 347 * @return string HTML 348 * @return-taint escaped 349 */ 350 public function getText( $options = [] ) { 351 $options += [ 352 'allowTOC' => true, 353 'enableSectionEditLinks' => true, 354 'skin' => null, 355 'unwrap' => false, 356 'deduplicateStyles' => true, 357 'wrapperDivClass' => $this->getWrapperDivClass(), 358 ]; 359 $text = $this->getRawText(); 360 361 Hooks::runner()->onParserOutputPostCacheTransform( $this, $text, $options ); 362 363 if ( $options['wrapperDivClass'] !== '' && !$options['unwrap'] ) { 364 $text = Html::rawElement( 'div', [ 'class' => $options['wrapperDivClass'] ], $text ); 365 } 366 367 if ( $options['enableSectionEditLinks'] ) { 368 // TODO: Passing the skin should be required 369 $skin = $options['skin'] ?: RequestContext::getMain()->getSkin(); 370 371 $text = preg_replace_callback( 372 self::EDITSECTION_REGEX, 373 function ( $m ) use ( $skin ) { 374 $editsectionPage = Title::newFromText( htmlspecialchars_decode( $m[1] ) ); 375 $editsectionSection = htmlspecialchars_decode( $m[2] ); 376 $editsectionContent = isset( $m[4] ) ? Sanitizer::decodeCharReferences( $m[3] ) : null; 377 378 if ( !is_object( $editsectionPage ) ) { 379 LoggerFactory::getInstance( 'Parser' ) 380 ->error( 381 'ParserOutput::getText(): bad title in editsection placeholder', 382 [ 383 'placeholder' => $m[0], 384 'editsectionPage' => $m[1], 385 'titletext' => $this->getTitleText(), 386 'phab' => 'T261347' 387 ] 388 ); 389 return ''; 390 } 391 392 return $skin->doEditSectionLink( 393 $editsectionPage, 394 $editsectionSection, 395 $editsectionContent, 396 $skin->getLanguage() 397 ); 398 }, 399 $text 400 ); 401 } else { 402 $text = preg_replace( self::EDITSECTION_REGEX, '', $text ); 403 } 404 405 if ( $options['allowTOC'] ) { 406 $text = str_replace( [ Parser::TOC_START, Parser::TOC_END ], '', $text ); 407 } else { 408 $text = preg_replace( 409 '#' . preg_quote( Parser::TOC_START, '#' ) . '.*?' . preg_quote( Parser::TOC_END, '#' ) . '#s', 410 '', 411 $text 412 ); 413 } 414 415 if ( $options['deduplicateStyles'] ) { 416 $seen = []; 417 $text = preg_replace_callback( 418 '#<style\s+([^>]*data-mw-deduplicate\s*=[^>]*)>.*?</style>#s', 419 static function ( $m ) use ( &$seen ) { 420 $attr = Sanitizer::decodeTagAttributes( $m[1] ); 421 if ( !isset( $attr['data-mw-deduplicate'] ) ) { 422 return $m[0]; 423 } 424 425 $key = $attr['data-mw-deduplicate']; 426 if ( !isset( $seen[$key] ) ) { 427 $seen[$key] = true; 428 return $m[0]; 429 } 430 431 // We were going to use an empty <style> here, but there 432 // was concern that would be too much overhead for browsers. 433 // So let's hope a <link> with a non-standard rel and href isn't 434 // going to be misinterpreted or mangled by any subsequent processing. 435 return Html::element( 'link', [ 436 'rel' => 'mw-deduplicated-inline-style', 437 'href' => "mw-data:" . wfUrlencode( $key ), 438 ] ); 439 }, 440 $text 441 ); 442 } 443 444 // Hydrate slot section header placeholders generated by RevisionRenderer. 445 $text = preg_replace_callback( 446 '#<mw:slotheader>(.*?)</mw:slotheader>#', 447 static function ( $m ) { 448 $role = htmlspecialchars_decode( $m[1] ); 449 // TODO: map to message, using the interface language. Set lang="xyz" accordingly. 450 $headerText = $role; 451 return $headerText; 452 }, 453 $text 454 ); 455 return $text; 456 } 457 458 /** 459 * Adds a comment notice about cache state to the text of the page 460 * @param string $msg 461 * @internal used by ParserCache 462 */ 463 public function addCacheMessage( string $msg ) { 464 $this->mText .= "\n<!-- $msg\n -->\n"; 465 } 466 467 /** 468 * Add a CSS class to use for the wrapping div. If no class is given, no wrapper is added. 469 * 470 * @param string $class 471 */ 472 public function addWrapperDivClass( $class ) { 473 $this->mWrapperDivClasses[$class] = true; 474 } 475 476 /** 477 * Clears the CSS class to use for the wrapping div, effectively disabling the wrapper div 478 * until addWrapperDivClass() is called. 479 */ 480 public function clearWrapperDivClass() { 481 $this->mWrapperDivClasses = []; 482 } 483 484 /** 485 * Returns the class (or classes) to be used with the wrapper div for this otuput. 486 * If there is no wrapper class given, no wrapper div should be added. 487 * The wrapper div is added automatically by getText(). 488 * 489 * @return string 490 */ 491 public function getWrapperDivClass() { 492 return implode( ' ', array_keys( $this->mWrapperDivClasses ) ); 493 } 494 495 /** 496 * @param int $id 497 * @since 1.28 498 */ 499 public function setSpeculativeRevIdUsed( $id ) { 500 $this->mSpeculativeRevId = $id; 501 } 502 503 /** 504 * @return int|null 505 * @since 1.28 506 */ 507 public function getSpeculativeRevIdUsed() { 508 return $this->mSpeculativeRevId; 509 } 510 511 /** 512 * @param int $id 513 * @since 1.34 514 */ 515 public function setSpeculativePageIdUsed( $id ) { 516 $this->speculativePageIdUsed = $id; 517 } 518 519 /** 520 * @return int|null 521 * @since 1.34 522 */ 523 public function getSpeculativePageIdUsed() { 524 return $this->speculativePageIdUsed; 525 } 526 527 /** 528 * @param string $timestamp TS_MW timestamp 529 * @since 1.34 530 */ 531 public function setRevisionTimestampUsed( $timestamp ) { 532 $this->revisionTimestampUsed = $timestamp; 533 } 534 535 /** 536 * @return string|null TS_MW timestamp or null if not used 537 * @since 1.34 538 */ 539 public function getRevisionTimestampUsed() { 540 return $this->revisionTimestampUsed; 541 } 542 543 /** 544 * @param string $hash Lowercase SHA-1 base 36 hash 545 * @since 1.34 546 */ 547 public function setRevisionUsedSha1Base36( $hash ) { 548 if ( $hash === null ) { 549 return; // e.g. RevisionRecord::getSha1() returned null 550 } 551 552 if ( 553 $this->revisionUsedSha1Base36 !== null && 554 $this->revisionUsedSha1Base36 !== $hash 555 ) { 556 $this->revisionUsedSha1Base36 = ''; // mismatched 557 } else { 558 $this->revisionUsedSha1Base36 = $hash; 559 } 560 } 561 562 /** 563 * @return string|null Lowercase SHA-1 base 36 hash, null if unused, or "" on inconsistency 564 * @since 1.34 565 */ 566 public function getRevisionUsedSha1Base36() { 567 return $this->revisionUsedSha1Base36; 568 } 569 570 public function &getLanguageLinks() { 571 return $this->mLanguageLinks; 572 } 573 574 public function getInterwikiLinks() { 575 return $this->mInterwikiLinks; 576 } 577 578 public function getCategoryLinks() { 579 return array_keys( $this->mCategories ); 580 } 581 582 public function &getCategories() { 583 return $this->mCategories; 584 } 585 586 /** 587 * @return array 588 * @since 1.25 589 */ 590 public function getIndicators() { 591 return $this->mIndicators; 592 } 593 594 public function getTitleText() { 595 return $this->mTitleText; 596 } 597 598 public function getSections() { 599 return $this->mSections; 600 } 601 602 public function &getLinks() { 603 return $this->mLinks; 604 } 605 606 /** 607 * @return array Keys are DBKs for the links to special pages in the document 608 * @since 1.35 609 */ 610 public function &getLinksSpecial() { 611 return $this->mLinksSpecial; 612 } 613 614 public function &getTemplates() { 615 return $this->mTemplates; 616 } 617 618 public function &getTemplateIds() { 619 return $this->mTemplateIds; 620 } 621 622 public function &getImages() { 623 return $this->mImages; 624 } 625 626 public function &getFileSearchOptions() { 627 return $this->mFileSearchOptions; 628 } 629 630 public function &getExternalLinks() { 631 return $this->mExternalLinks; 632 } 633 634 public function setNoGallery( $value ) { 635 $this->mNoGallery = (bool)$value; 636 } 637 638 public function getNoGallery() { 639 return $this->mNoGallery; 640 } 641 642 public function getHeadItems() { 643 return $this->mHeadItems; 644 } 645 646 public function getModules() { 647 return $this->mModules; 648 } 649 650 public function getModuleStyles() { 651 return $this->mModuleStyles; 652 } 653 654 /** 655 * @return array 656 * @since 1.23 657 */ 658 public function getJsConfigVars() { 659 return $this->mJsConfigVars; 660 } 661 662 public function getOutputHooks() { 663 return (array)$this->mOutputHooks; 664 } 665 666 public function getWarnings() { 667 return array_keys( $this->mWarnings ); 668 } 669 670 public function getIndexPolicy() { 671 return $this->mIndexPolicy; 672 } 673 674 public function getTOCHTML() { 675 return $this->mTOCHTML; 676 } 677 678 /** 679 * @return string|null TS_MW timestamp of the revision content 680 */ 681 public function getTimestamp() { 682 return $this->mTimestamp; 683 } 684 685 public function getLimitReportData() { 686 return $this->mLimitReportData; 687 } 688 689 public function getLimitReportJSData() { 690 return $this->mLimitReportJSData; 691 } 692 693 public function getEnableOOUI() { 694 return $this->mEnableOOUI; 695 } 696 697 /** 698 * Get extra Content-Security-Policy 'default-src' directives 699 * @since 1.35 700 * @return array 701 */ 702 public function getExtraCSPDefaultSrcs() { 703 return $this->mExtraDefaultSrcs; 704 } 705 706 /** 707 * Get extra Content-Security-Policy 'script-src' directives 708 * @since 1.35 709 * @return array 710 */ 711 public function getExtraCSPScriptSrcs() { 712 return $this->mExtraScriptSrcs; 713 } 714 715 /** 716 * Get extra Content-Security-Policy 'style-src' directives 717 * @since 1.35 718 * @return array 719 */ 720 public function getExtraCSPStyleSrcs() { 721 return $this->mExtraStyleSrcs; 722 } 723 724 public function setText( $text ) { 725 return wfSetVar( $this->mText, $text ); 726 } 727 728 public function setLanguageLinks( $ll ) { 729 return wfSetVar( $this->mLanguageLinks, $ll ); 730 } 731 732 public function setCategoryLinks( $cl ) { 733 return wfSetVar( $this->mCategories, $cl ); 734 } 735 736 public function setTitleText( $t ) { 737 return wfSetVar( $this->mTitleText, $t ); 738 } 739 740 public function setSections( $toc ) { 741 return wfSetVar( $this->mSections, $toc ); 742 } 743 744 public function setIndexPolicy( $policy ) { 745 return wfSetVar( $this->mIndexPolicy, $policy ); 746 } 747 748 public function setTOCHTML( $tochtml ) { 749 return wfSetVar( $this->mTOCHTML, $tochtml ); 750 } 751 752 public function setTimestamp( $timestamp ) { 753 return wfSetVar( $this->mTimestamp, $timestamp ); 754 } 755 756 public function addCategory( $c, $sort ) { 757 $this->mCategories[$c] = $sort; 758 } 759 760 /** 761 * @param string $id 762 * @param string $content 763 * @since 1.25 764 */ 765 public function setIndicator( $id, $content ) { 766 $this->mIndicators[$id] = $content; 767 } 768 769 /** 770 * Enables OOUI, if true, in any OutputPage instance this ParserOutput 771 * object is added to. 772 * 773 * @since 1.26 774 * @param bool $enable If OOUI should be enabled or not 775 */ 776 public function setEnableOOUI( $enable = false ) { 777 $this->mEnableOOUI = $enable; 778 } 779 780 public function addLanguageLink( $t ) { 781 $this->mLanguageLinks[] = $t; 782 } 783 784 public function addWarning( $s ) { 785 $this->mWarnings[$s] = 1; 786 } 787 788 public function addOutputHook( $hook, $data = false ) { 789 $this->mOutputHooks[] = [ $hook, $data ]; 790 } 791 792 public function setNewSection( $value ) { 793 $this->mNewSection = (bool)$value; 794 } 795 796 public function hideNewSection( $value ) { 797 $this->mHideNewSection = (bool)$value; 798 } 799 800 public function getHideNewSection() { 801 return (bool)$this->mHideNewSection; 802 } 803 804 public function getNewSection() { 805 return (bool)$this->mNewSection; 806 } 807 808 /** 809 * Checks, if a url is pointing to the own server 810 * 811 * @param string $internal The server to check against 812 * @param string $url The url to check 813 * @return bool 814 */ 815 public static function isLinkInternal( $internal, $url ) { 816 return (bool)preg_match( '/^' . 817 # If server is proto relative, check also for http/https links 818 ( substr( $internal, 0, 2 ) === '//' ? '(?:https?:)?' : '' ) . 819 preg_quote( $internal, '/' ) . 820 # check for query/path/anchor or end of link in each case 821 '(?:[\?\/\#]|$)/i', 822 $url 823 ); 824 } 825 826 public function addExternalLink( $url ) { 827 # We don't register links pointing to our own server, unless... :-) 828 global $wgServer, $wgRegisterInternalExternals; 829 830 # Replace unnecessary URL escape codes with the referenced character 831 # This prevents spammers from hiding links from the filters 832 $url = Parser::normalizeLinkUrl( $url ); 833 834 $registerExternalLink = true; 835 if ( !$wgRegisterInternalExternals ) { 836 $registerExternalLink = !self::isLinkInternal( $wgServer, $url ); 837 } 838 if ( $registerExternalLink ) { 839 $this->mExternalLinks[$url] = 1; 840 } 841 } 842 843 /** 844 * Record a local or interwiki inline link for saving in future link tables. 845 * 846 * @param Title $title 847 * @param int|null $id Optional known page_id so we can skip the lookup 848 */ 849 public function addLink( Title $title, $id = null ) { 850 if ( $title->isExternal() ) { 851 // Don't record interwikis in pagelinks 852 $this->addInterwikiLink( $title ); 853 return; 854 } 855 $ns = $title->getNamespace(); 856 $dbk = $title->getDBkey(); 857 if ( $ns === NS_MEDIA ) { 858 // Normalize this pseudo-alias if it makes it down here... 859 $ns = NS_FILE; 860 } elseif ( $ns === NS_SPECIAL ) { 861 // We don't want to record Special: links in the database, so put them in a separate place. 862 // It might actually be wise to, but we'd need to do some normalization. 863 $this->mLinksSpecial[$dbk] = 1; 864 return; 865 } elseif ( $dbk === '' ) { 866 // Don't record self links - [[#Foo]] 867 return; 868 } 869 if ( !isset( $this->mLinks[$ns] ) ) { 870 $this->mLinks[$ns] = []; 871 } 872 if ( $id === null ) { 873 $id = $title->getArticleID(); 874 } 875 $this->mLinks[$ns][$dbk] = $id; 876 } 877 878 /** 879 * Register a file dependency for this output 880 * @param string $name Title dbKey 881 * @param string|false|null $timestamp MW timestamp of file creation (or false if non-existing) 882 * @param string|false|null $sha1 Base 36 SHA-1 of file (or false if non-existing) 883 */ 884 public function addImage( $name, $timestamp = null, $sha1 = null ) { 885 $this->mImages[$name] = 1; 886 if ( $timestamp !== null && $sha1 !== null ) { 887 $this->mFileSearchOptions[$name] = [ 'time' => $timestamp, 'sha1' => $sha1 ]; 888 } 889 } 890 891 /** 892 * Register a template dependency for this output 893 * @param Title $title 894 * @param int $page_id 895 * @param int $rev_id 896 */ 897 public function addTemplate( $title, $page_id, $rev_id ) { 898 $ns = $title->getNamespace(); 899 $dbk = $title->getDBkey(); 900 if ( !isset( $this->mTemplates[$ns] ) ) { 901 $this->mTemplates[$ns] = []; 902 } 903 $this->mTemplates[$ns][$dbk] = $page_id; 904 if ( !isset( $this->mTemplateIds[$ns] ) ) { 905 $this->mTemplateIds[$ns] = []; 906 } 907 $this->mTemplateIds[$ns][$dbk] = $rev_id; // For versioning 908 } 909 910 /** 911 * @param Title $title Title object, must be an interwiki link 912 * @throws MWException If given invalid input 913 */ 914 public function addInterwikiLink( $title ) { 915 if ( !$title->isExternal() ) { 916 throw new MWException( 'Non-interwiki link passed, internal parser error.' ); 917 } 918 $prefix = $title->getInterwiki(); 919 if ( !isset( $this->mInterwikiLinks[$prefix] ) ) { 920 $this->mInterwikiLinks[$prefix] = []; 921 } 922 $this->mInterwikiLinks[$prefix][$title->getDBkey()] = 1; 923 } 924 925 /** 926 * Add some text to the "<head>". 927 * If $tag is set, the section with that tag will only be included once 928 * in a given page. 929 * @param string $section 930 * @param string|bool $tag 931 */ 932 public function addHeadItem( $section, $tag = false ) { 933 if ( $tag !== false ) { 934 $this->mHeadItems[$tag] = $section; 935 } else { 936 $this->mHeadItems[] = $section; 937 } 938 } 939 940 /** 941 * @see OutputPage::addModules 942 * @param string|array $modules 943 */ 944 public function addModules( $modules ) { 945 $this->mModules = array_merge( $this->mModules, (array)$modules ); 946 } 947 948 /** 949 * @see OutputPage::addModuleStyles 950 * @param string|array $modules 951 */ 952 public function addModuleStyles( $modules ) { 953 $this->mModuleStyles = array_merge( $this->mModuleStyles, (array)$modules ); 954 } 955 956 /** 957 * Add one or more variables to be set in mw.config in JavaScript. 958 * 959 * @param string|array $keys Key or array of key/value pairs. 960 * @param mixed|null $value [optional] Value of the configuration variable. 961 * @since 1.23 962 */ 963 public function addJsConfigVars( $keys, $value = null ) { 964 if ( is_array( $keys ) ) { 965 foreach ( $keys as $key => $value ) { 966 $this->mJsConfigVars[$key] = $value; 967 } 968 return; 969 } 970 971 $this->mJsConfigVars[$keys] = $value; 972 } 973 974 /** 975 * Copy items from the OutputPage object into this one 976 * 977 * @param OutputPage $out 978 */ 979 public function addOutputPageMetadata( OutputPage $out ) { 980 $this->addModules( $out->getModules() ); 981 $this->addModuleStyles( $out->getModuleStyles() ); 982 $this->addJsConfigVars( $out->getJsConfigVars() ); 983 984 $this->mHeadItems = array_merge( $this->mHeadItems, $out->getHeadItemsArray() ); 985 $this->mPreventClickjacking = $this->mPreventClickjacking || $out->getPreventClickjacking(); 986 } 987 988 /** 989 * Add a tracking category, getting the title from a system message, 990 * or print a debug message if the title is invalid. 991 * 992 * Any message used with this function should be registered so it will 993 * show up on Special:TrackingCategories. Core messages should be added 994 * to SpecialTrackingCategories::$coreTrackingCategories, and extensions 995 * should add to "TrackingCategories" in their extension.json. 996 * 997 * @todo Migrate some code to TrackingCategories 998 * 999 * @param string $msg Message key 1000 * @param Title $title title of the page which is being tracked 1001 * @return bool Whether the addition was successful 1002 * @since 1.25 1003 */ 1004 public function addTrackingCategory( $msg, $title ) { 1005 if ( $title->isSpecialPage() ) { 1006 wfDebug( __METHOD__ . ": Not adding tracking category $msg to special page!" ); 1007 return false; 1008 } 1009 1010 // Important to parse with correct title (T33469) 1011 $cat = wfMessage( $msg ) 1012 ->title( $title ) 1013 ->inContentLanguage() 1014 ->text(); 1015 1016 # Allow tracking categories to be disabled by setting them to "-" 1017 if ( $cat === '-' ) { 1018 return false; 1019 } 1020 1021 $containerCategory = Title::makeTitleSafe( NS_CATEGORY, $cat ); 1022 if ( $containerCategory ) { 1023 $this->addCategory( $containerCategory->getDBkey(), $this->getProperty( 'defaultsort' ) ?: '' ); 1024 return true; 1025 } else { 1026 wfDebug( __METHOD__ . ": [[MediaWiki:$msg]] is not a valid title!" ); 1027 return false; 1028 } 1029 } 1030 1031 /** 1032 * Override the title to be used for display 1033 * 1034 * @note this is assumed to have been validated 1035 * (check equal normalisation, etc.) 1036 * 1037 * @note this is expected to be safe HTML, 1038 * ready to be served to the client. 1039 * 1040 * @param string $text Desired title text 1041 */ 1042 public function setDisplayTitle( $text ) { 1043 $this->setTitleText( $text ); 1044 $this->setProperty( 'displaytitle', $text ); 1045 } 1046 1047 /** 1048 * Get the title to be used for display. 1049 * 1050 * As per the contract of setDisplayTitle(), this is safe HTML, 1051 * ready to be served to the client. 1052 * 1053 * @return string HTML 1054 */ 1055 public function getDisplayTitle() { 1056 $t = $this->getTitleText(); 1057 if ( $t === '' ) { 1058 return false; 1059 } 1060 return $t; 1061 } 1062 1063 /** 1064 * Attach a flag to the output so that it can be checked later to handle special cases 1065 * 1066 * @param string $flag 1067 */ 1068 public function setFlag( $flag ) { 1069 $this->mFlags[$flag] = true; 1070 } 1071 1072 /** 1073 * @param string $flag 1074 * @return bool Whether the given flag was set to signify a special case 1075 */ 1076 public function getFlag( $flag ) { 1077 return isset( $this->mFlags[$flag] ); 1078 } 1079 1080 /** 1081 * @return string[] List of flags signifying special cases 1082 * @since 1.34 1083 */ 1084 public function getAllFlags() { 1085 return array_keys( $this->mFlags ); 1086 } 1087 1088 /** 1089 * Set a property to be stored in the page_props database table. 1090 * 1091 * page_props is a key value store indexed by the page ID. This allows 1092 * the parser to set a property on a page which can then be quickly 1093 * retrieved given the page ID or via a DB join when given the page 1094 * title. 1095 * 1096 * Since 1.23, page_props are also indexed by numeric value, to allow 1097 * for efficient "top k" queries of pages wrt a given property. 1098 * 1099 * setProperty() is thus used to propagate properties from the parsed 1100 * page to request contexts other than a page view of the currently parsed 1101 * article. 1102 * 1103 * Some applications examples: 1104 * 1105 * * To implement hidden categories, hiding pages from category listings 1106 * by storing a property. 1107 * 1108 * * Overriding the displayed article title (ParserOutput::setDisplayTitle()). 1109 * 1110 * * To implement image tagging, for example displaying an icon on an 1111 * image thumbnail to indicate that it is listed for deletion on 1112 * Wikimedia Commons. 1113 * This is not actually implemented, yet but would be pretty cool. 1114 * 1115 * @note Do not use setProperty() to set a property which is only used 1116 * in a context where the ParserOutput object itself is already available, 1117 * for example a normal page view. There is no need to save such a property 1118 * in the database since the text is already parsed. You can just hook 1119 * OutputPageParserOutput and get your data out of the ParserOutput object. 1120 * 1121 * If you are writing an extension where you want to set a property in the 1122 * parser which is used by an OutputPageParserOutput hook, you have to 1123 * associate the extension data directly with the ParserOutput object. 1124 * Since MediaWiki 1.21, you can use setExtensionData() to do this: 1125 * 1126 * @par Example: 1127 * @code 1128 * $parser->getOutput()->setExtensionData( 'my_ext_foo', '...' ); 1129 * @endcode 1130 * 1131 * And then later, in OutputPageParserOutput or similar: 1132 * 1133 * @par Example: 1134 * @code 1135 * $output->getExtensionData( 'my_ext_foo' ); 1136 * @endcode 1137 * 1138 * In MediaWiki 1.20 and older, you have to use a custom member variable 1139 * within the ParserOutput object: 1140 * 1141 * @par Example: 1142 * @code 1143 * $parser->getOutput()->my_ext_foo = '...'; 1144 * @endcode 1145 * 1146 * @note Only scalar values like numbers and strings are supported 1147 * as a value. Attempt to use an object or array will 1148 * not work properly with LinksUpdate. 1149 * 1150 * @param string $name 1151 * @param int|float|string|bool|null $value 1152 */ 1153 public function setProperty( $name, $value ) { 1154 $this->mProperties[$name] = $value; 1155 } 1156 1157 /** 1158 * @param string $name The property name to look up. 1159 * 1160 * @return mixed|bool The value previously set using setProperty(). False if null or no value 1161 * was set for the given property name. 1162 * 1163 * @note You need to use getProperties() to check for boolean and null properties. 1164 */ 1165 public function getProperty( $name ) { 1166 return $this->mProperties[$name] ?? false; 1167 } 1168 1169 public function unsetProperty( $name ) { 1170 unset( $this->mProperties[$name] ); 1171 } 1172 1173 public function getProperties() { 1174 if ( !isset( $this->mProperties ) ) { 1175 $this->mProperties = []; 1176 } 1177 return $this->mProperties; 1178 } 1179 1180 /** 1181 * Attaches arbitrary data to this ParserObject. This can be used to store some information in 1182 * the ParserOutput object for later use during page output. The data will be cached along with 1183 * the ParserOutput object, but unlike data set using setProperty(), it is not recorded in the 1184 * database. 1185 * 1186 * This method is provided to overcome the unsafe practice of attaching extra information to a 1187 * ParserObject by directly assigning member variables. 1188 * 1189 * To use setExtensionData() to pass extension information from a hook inside the parser to a 1190 * hook in the page output, use this in the parser hook: 1191 * 1192 * @par Example: 1193 * @code 1194 * $parser->getOutput()->setExtensionData( 'my_ext_foo', '...' ); 1195 * @endcode 1196 * 1197 * And then later, in OutputPageParserOutput or similar: 1198 * 1199 * @par Example: 1200 * @code 1201 * $output->getExtensionData( 'my_ext_foo' ); 1202 * @endcode 1203 * 1204 * In MediaWiki 1.20 and older, you have to use a custom member variable 1205 * within the ParserOutput object: 1206 * 1207 * @par Example: 1208 * @code 1209 * $parser->getOutput()->my_ext_foo = '...'; 1210 * @endcode 1211 * 1212 * @note Only scalar values, e.g. numbers, strings, arrays or MediaWiki\Json\JsonUnserializable 1213 * instances are supported as a value. Attempt to set other class instance as a extension data 1214 * will break ParserCache for the page. 1215 * 1216 * @param string $key The key for accessing the data. Extensions should take care to avoid 1217 * conflicts in naming keys. It is suggested to use the extension's name as a prefix. 1218 * 1219 * @param mixed|JsonUnserializable $value The value to set. 1220 * Setting a value to null is equivalent to removing the value. 1221 * @since 1.21 1222 */ 1223 public function setExtensionData( $key, $value ) { 1224 if ( $value === null ) { 1225 unset( $this->mExtensionData[$key] ); 1226 } else { 1227 $this->mExtensionData[$key] = $value; 1228 } 1229 } 1230 1231 /** 1232 * Gets extensions data previously attached to this ParserOutput using setExtensionData(). 1233 * Typically, such data would be set while parsing the page, e.g. by a parser function. 1234 * 1235 * @since 1.21 1236 * 1237 * @param string $key The key to look up. 1238 * 1239 * @return mixed|null The value previously set for the given key using setExtensionData() 1240 * or null if no value was set for this key. 1241 */ 1242 public function getExtensionData( $key ) { 1243 return $this->mExtensionData[$key] ?? null; 1244 } 1245 1246 private static function getTimes( $clock = null ) { 1247 $ret = []; 1248 if ( !$clock || $clock === 'wall' ) { 1249 $ret['wall'] = microtime( true ); 1250 } 1251 if ( !$clock || $clock === 'cpu' ) { 1252 $ru = getrusage( 0 /* RUSAGE_SELF */ ); 1253 $ret['cpu'] = $ru['ru_utime.tv_sec'] + $ru['ru_utime.tv_usec'] / 1e6; 1254 $ret['cpu'] += $ru['ru_stime.tv_sec'] + $ru['ru_stime.tv_usec'] / 1e6; 1255 } 1256 return $ret; 1257 } 1258 1259 /** 1260 * Resets the parse start timestamps for future calls to getTimeSinceStart() 1261 * @since 1.22 1262 */ 1263 public function resetParseStartTime() { 1264 $this->mParseStartTime = self::getTimes(); 1265 } 1266 1267 /** 1268 * Returns the time since resetParseStartTime() was last called 1269 * 1270 * Clocks available are: 1271 * - wall: Wall clock time 1272 * - cpu: CPU time (requires getrusage) 1273 * 1274 * @since 1.22 1275 * @param string $clock 1276 * @return float|null 1277 */ 1278 public function getTimeSinceStart( $clock ) { 1279 if ( !isset( $this->mParseStartTime[$clock] ) ) { 1280 return null; 1281 } 1282 1283 $end = self::getTimes( $clock ); 1284 return $end[$clock] - $this->mParseStartTime[$clock]; 1285 } 1286 1287 /** 1288 * Sets parser limit report data for a key 1289 * 1290 * The key is used as the prefix for various messages used for formatting: 1291 * - $key: The label for the field in the limit report 1292 * - $key-value-text: Message used to format the value in the "NewPP limit 1293 * report" HTML comment. If missing, uses $key-format. 1294 * - $key-value-html: Message used to format the value in the preview 1295 * limit report table. If missing, uses $key-format. 1296 * - $key-value: Message used to format the value. If missing, uses "$1". 1297 * 1298 * Note that all values are interpreted as wikitext, and so should be 1299 * encoded with htmlspecialchars() as necessary, but should avoid complex 1300 * HTML for sanity of display in the "NewPP limit report" comment. 1301 * 1302 * @since 1.22 1303 * @param string $key Message key 1304 * @param mixed $value Appropriate for Message::params() 1305 */ 1306 public function setLimitReportData( $key, $value ) { 1307 $this->mLimitReportData[$key] = $value; 1308 1309 if ( is_array( $value ) ) { 1310 if ( array_keys( $value ) === [ 0, 1 ] 1311 && is_numeric( $value[0] ) 1312 && is_numeric( $value[1] ) 1313 ) { 1314 $data = [ 'value' => $value[0], 'limit' => $value[1] ]; 1315 } else { 1316 $data = $value; 1317 } 1318 } else { 1319 $data = $value; 1320 } 1321 1322 if ( strpos( $key, '-' ) ) { 1323 list( $ns, $name ) = explode( '-', $key, 2 ); 1324 $this->mLimitReportJSData[$ns][$name] = $data; 1325 } else { 1326 $this->mLimitReportJSData[$key] = $data; 1327 } 1328 } 1329 1330 /** 1331 * Check whether the cache TTL was lowered due to dynamic content 1332 * 1333 * When content is determined by more than hard state (e.g. page edits), 1334 * such as template/file transclusions based on the current timestamp or 1335 * extension tags that generate lists based on queries, this return true. 1336 * 1337 * @return bool 1338 * @since 1.25 1339 */ 1340 public function hasDynamicContent() { 1341 global $wgParserCacheExpireTime; 1342 1343 return $this->getCacheExpiry() < $wgParserCacheExpireTime; 1344 } 1345 1346 /** 1347 * Get or set the prevent-clickjacking flag 1348 * 1349 * @since 1.24 1350 * @param bool|null $flag New flag value, or null to leave it unchanged 1351 * @return bool Old flag value 1352 */ 1353 public function preventClickjacking( $flag = null ) { 1354 return wfSetVar( $this->mPreventClickjacking, $flag ); 1355 } 1356 1357 /** 1358 * Lower the runtime adaptive TTL to at most this value 1359 * 1360 * @param int $ttl 1361 * @since 1.28 1362 */ 1363 public function updateRuntimeAdaptiveExpiry( $ttl ) { 1364 $this->mMaxAdaptiveExpiry = min( $ttl, $this->mMaxAdaptiveExpiry ); 1365 $this->updateCacheExpiry( $ttl ); 1366 } 1367 1368 /** 1369 * Add an extra value to Content-Security-Policy default-src directive 1370 * 1371 * Call this if you are including a resource (e.g. image) from a third party domain. 1372 * This is used for all source types except style and script. 1373 * 1374 * @since 1.35 1375 * @param string $src CSP source e.g. example.com 1376 */ 1377 public function addExtraCSPDefaultSrc( $src ) { 1378 $this->mExtraDefaultSrcs[] = $src; 1379 } 1380 1381 /** 1382 * Add an extra value to Content-Security-Policy style-src directive 1383 * 1384 * @since 1.35 1385 * @param string $src CSP source e.g. example.com 1386 */ 1387 public function addExtraCSPStyleSrc( $src ) { 1388 $this->mExtraStyleSrcs[] = $src; 1389 } 1390 1391 /** 1392 * Add an extra value to Content-Security-Policy script-src directive 1393 * 1394 * Call this if you are loading third-party Javascript 1395 * 1396 * @since 1.35 1397 * @param string $src CSP source e.g. example.com 1398 */ 1399 public function addExtraCSPScriptSrc( $src ) { 1400 $this->mExtraScriptSrcs[] = $src; 1401 } 1402 1403 /** 1404 * Call this when parsing is done to lower the TTL based on low parse times 1405 * 1406 * @since 1.28 1407 */ 1408 public function finalizeAdaptiveCacheExpiry() { 1409 if ( is_infinite( $this->mMaxAdaptiveExpiry ) ) { 1410 return; // not set 1411 } 1412 1413 $runtime = $this->getTimeSinceStart( 'wall' ); 1414 if ( is_float( $runtime ) ) { 1415 $slope = ( self::SLOW_AR_TTL - self::FAST_AR_TTL ) 1416 / ( self::PARSE_SLOW_SEC - self::PARSE_FAST_SEC ); 1417 // SLOW_AR_TTL = PARSE_SLOW_SEC * $slope + $point 1418 $point = self::SLOW_AR_TTL - self::PARSE_SLOW_SEC * $slope; 1419 1420 $adaptiveTTL = min( 1421 max( $slope * $runtime + $point, self::MIN_AR_TTL ), 1422 $this->mMaxAdaptiveExpiry 1423 ); 1424 $this->updateCacheExpiry( $adaptiveTTL ); 1425 } 1426 } 1427 1428 public function __sleep() { 1429 return array_filter( array_keys( get_object_vars( $this ) ), 1430 static function ( $field ) { 1431 if ( $field === 'mParseStartTime' ) { 1432 return false; 1433 } elseif ( strpos( $field, "\0" ) !== false ) { 1434 // Unserializing unknown private fields in HHVM causes 1435 // member variables with nulls in their names (T229366) 1436 return false; 1437 } else { 1438 return true; 1439 } 1440 } 1441 ); 1442 } 1443 1444 /** 1445 * Merges internal metadata such as flags, accessed options, and profiling info 1446 * from $source into this ParserOutput. This should be used whenever the state of $source 1447 * has any impact on the state of this ParserOutput. 1448 * 1449 * @param ParserOutput $source 1450 */ 1451 public function mergeInternalMetaDataFrom( ParserOutput $source ) { 1452 $this->mOutputHooks = self::mergeList( $this->mOutputHooks, $source->getOutputHooks() ); 1453 $this->mWarnings = self::mergeMap( $this->mWarnings, $source->mWarnings ); // don't use getter 1454 $this->mTimestamp = $this->useMaxValue( $this->mTimestamp, $source->getTimestamp() ); 1455 1456 foreach ( self::SPECULATIVE_FIELDS as $field ) { 1457 if ( $this->$field && $source->$field && $this->$field !== $source->$field ) { 1458 wfLogWarning( __METHOD__ . ": inconsistent '$field' properties!" ); 1459 } 1460 $this->$field = $this->useMaxValue( $this->$field, $source->$field ); 1461 } 1462 1463 $this->mParseStartTime = $this->useEachMinValue( 1464 $this->mParseStartTime, 1465 $source->mParseStartTime 1466 ); 1467 1468 $this->mFlags = self::mergeMap( $this->mFlags, $source->mFlags ); 1469 $this->mParseUsedOptions = self::mergeMap( $this->mParseUsedOptions, $source->mParseUsedOptions ); 1470 1471 // TODO: maintain per-slot limit reports! 1472 if ( empty( $this->mLimitReportData ) ) { 1473 $this->mLimitReportData = $source->mLimitReportData; 1474 } 1475 if ( empty( $this->mLimitReportJSData ) ) { 1476 $this->mLimitReportJSData = $source->mLimitReportJSData; 1477 } 1478 } 1479 1480 /** 1481 * Merges HTML metadata such as head items, JS config vars, and HTTP cache control info 1482 * from $source into this ParserOutput. This should be used whenever the HTML in $source 1483 * has been somehow mered into the HTML of this ParserOutput. 1484 * 1485 * @param ParserOutput $source 1486 */ 1487 public function mergeHtmlMetaDataFrom( ParserOutput $source ) { 1488 // HTML and HTTP 1489 $this->mHeadItems = self::mergeMixedList( $this->mHeadItems, $source->getHeadItems() ); 1490 $this->mModules = self::mergeList( $this->mModules, $source->getModules() ); 1491 $this->mModuleStyles = self::mergeList( $this->mModuleStyles, $source->getModuleStyles() ); 1492 $this->mJsConfigVars = self::mergeMap( $this->mJsConfigVars, $source->getJsConfigVars() ); 1493 $this->mMaxAdaptiveExpiry = min( $this->mMaxAdaptiveExpiry, $source->mMaxAdaptiveExpiry ); 1494 $this->mExtraStyleSrcs = self::mergeList( 1495 $this->mExtraStyleSrcs, 1496 $source->getExtraCSPStyleSrcs() 1497 ); 1498 $this->mExtraScriptSrcs = self::mergeList( 1499 $this->mExtraScriptSrcs, 1500 $source->getExtraCSPScriptSrcs() 1501 ); 1502 $this->mExtraDefaultSrcs = self::mergeList( 1503 $this->mExtraDefaultSrcs, 1504 $source->getExtraCSPDefaultSrcs() 1505 ); 1506 1507 // "noindex" always wins! 1508 if ( $this->mIndexPolicy === 'noindex' || $source->mIndexPolicy === 'noindex' ) { 1509 $this->mIndexPolicy = 'noindex'; 1510 } elseif ( $this->mIndexPolicy !== 'index' ) { 1511 $this->mIndexPolicy = $source->mIndexPolicy; 1512 } 1513 1514 // Skin control 1515 $this->mNewSection = $this->mNewSection || $source->getNewSection(); 1516 $this->mHideNewSection = $this->mHideNewSection || $source->getHideNewSection(); 1517 $this->mNoGallery = $this->mNoGallery || $source->getNoGallery(); 1518 $this->mEnableOOUI = $this->mEnableOOUI || $source->getEnableOOUI(); 1519 $this->mPreventClickjacking = $this->mPreventClickjacking || $source->preventClickjacking(); 1520 1521 // TODO: we'll have to be smarter about this! 1522 $this->mSections = array_merge( $this->mSections, $source->getSections() ); 1523 $this->mTOCHTML .= $source->mTOCHTML; 1524 1525 // XXX: we don't want to concatenate title text, so first write wins. 1526 // We should use the first *modified* title text, but we don't have the original to check. 1527 if ( $this->mTitleText === null || $this->mTitleText === '' ) { 1528 $this->mTitleText = $source->mTitleText; 1529 } 1530 1531 // class names are stored in array keys 1532 $this->mWrapperDivClasses = self::mergeMap( 1533 $this->mWrapperDivClasses, 1534 $source->mWrapperDivClasses 1535 ); 1536 1537 // NOTE: last write wins, same as within one ParserOutput 1538 $this->mIndicators = self::mergeMap( $this->mIndicators, $source->getIndicators() ); 1539 1540 // NOTE: include extension data in "tracking meta data" as well as "html meta data"! 1541 // TODO: add a $mergeStrategy parameter to setExtensionData to allow different 1542 // kinds of extension data to be merged in different ways. 1543 $this->mExtensionData = self::mergeMap( 1544 $this->mExtensionData, 1545 $source->mExtensionData 1546 ); 1547 } 1548 1549 /** 1550 * Merges dependency tracking metadata such as backlinks, images used, and extension data 1551 * from $source into this ParserOutput. This allows dependency tracking to be done for the 1552 * combined output of multiple content slots. 1553 * 1554 * @param ParserOutput $source 1555 */ 1556 public function mergeTrackingMetaDataFrom( ParserOutput $source ) { 1557 $this->mLanguageLinks = self::mergeList( $this->mLanguageLinks, $source->getLanguageLinks() ); 1558 $this->mCategories = self::mergeMap( $this->mCategories, $source->getCategories() ); 1559 $this->mLinks = self::merge2D( $this->mLinks, $source->getLinks() ); 1560 $this->mTemplates = self::merge2D( $this->mTemplates, $source->getTemplates() ); 1561 $this->mTemplateIds = self::merge2D( $this->mTemplateIds, $source->getTemplateIds() ); 1562 $this->mImages = self::mergeMap( $this->mImages, $source->getImages() ); 1563 $this->mFileSearchOptions = self::mergeMap( 1564 $this->mFileSearchOptions, 1565 $source->getFileSearchOptions() 1566 ); 1567 $this->mExternalLinks = self::mergeMap( $this->mExternalLinks, $source->getExternalLinks() ); 1568 $this->mInterwikiLinks = self::merge2D( 1569 $this->mInterwikiLinks, 1570 $source->getInterwikiLinks() 1571 ); 1572 1573 // TODO: add a $mergeStrategy parameter to setProperty to allow different 1574 // kinds of properties to be merged in different ways. 1575 $this->mProperties = self::mergeMap( $this->mProperties, $source->getProperties() ); 1576 1577 // NOTE: include extension data in "tracking meta data" as well as "html meta data"! 1578 // TODO: add a $mergeStrategy parameter to setExtensionData to allow different 1579 // kinds of extension data to be merged in different ways. 1580 $this->mExtensionData = self::mergeMap( 1581 $this->mExtensionData, 1582 $source->mExtensionData 1583 ); 1584 } 1585 1586 private static function mergeMixedList( array $a, array $b ) { 1587 return array_unique( array_merge( $a, $b ), SORT_REGULAR ); 1588 } 1589 1590 private static function mergeList( array $a, array $b ) { 1591 return array_values( array_unique( array_merge( $a, $b ), SORT_REGULAR ) ); 1592 } 1593 1594 private static function mergeMap( array $a, array $b ) { 1595 return array_replace( $a, $b ); 1596 } 1597 1598 private static function merge2D( array $a, array $b ) { 1599 $values = []; 1600 $keys = array_merge( array_keys( $a ), array_keys( $b ) ); 1601 1602 foreach ( $keys as $k ) { 1603 if ( empty( $a[$k] ) ) { 1604 $values[$k] = $b[$k]; 1605 } elseif ( empty( $b[$k] ) ) { 1606 $values[$k] = $a[$k]; 1607 } elseif ( is_array( $a[$k] ) && is_array( $b[$k] ) ) { 1608 $values[$k] = array_replace( $a[$k], $b[$k] ); 1609 } else { 1610 $values[$k] = $b[$k]; 1611 } 1612 } 1613 1614 return $values; 1615 } 1616 1617 private static function useEachMinValue( array $a, array $b ) { 1618 $values = []; 1619 $keys = array_merge( array_keys( $a ), array_keys( $b ) ); 1620 1621 foreach ( $keys as $k ) { 1622 if ( is_array( $a[$k] ?? null ) && is_array( $b[$k] ?? null ) ) { 1623 $values[$k] = self::useEachMinValue( $a[$k], $b[$k] ); 1624 } else { 1625 $values[$k] = self::useMinValue( $a[$k] ?? null, $b[$k] ?? null ); 1626 } 1627 } 1628 1629 return $values; 1630 } 1631 1632 private static function useMinValue( $a, $b ) { 1633 if ( $a === null ) { 1634 return $b; 1635 } 1636 1637 if ( $b === null ) { 1638 return $a; 1639 } 1640 1641 return min( $a, $b ); 1642 } 1643 1644 private static function useMaxValue( $a, $b ) { 1645 if ( $a === null ) { 1646 return $b; 1647 } 1648 1649 if ( $b === null ) { 1650 return $a; 1651 } 1652 1653 return max( $a, $b ); 1654 } 1655 1656 /** 1657 * Returns a JSON serializable structure representing this ParserOutput instance. 1658 * @see newFromJson() 1659 * 1660 * @return array 1661 */ 1662 protected function toJsonArray(): array { 1663 $data = [ 1664 'Text' => $this->mText, 1665 'LanguageLinks' => $this->mLanguageLinks, 1666 'Categories' => $this->mCategories, 1667 'Indicators' => $this->mIndicators, 1668 'TitleText' => $this->mTitleText, 1669 'Links' => $this->mLinks, 1670 'LinksSpecial' => $this->mLinksSpecial, 1671 'Templates' => $this->mTemplates, 1672 'TemplateIds' => $this->mTemplateIds, 1673 'Images' => $this->mImages, 1674 'FileSearchOptions' => $this->mFileSearchOptions, 1675 'ExternalLinks' => $this->mExternalLinks, 1676 'InterwikiLinks' => $this->mInterwikiLinks, 1677 'NewSection' => $this->mNewSection, 1678 'HideNewSection' => $this->mHideNewSection, 1679 'NoGallery' => $this->mNoGallery, 1680 'HeadItems' => $this->mHeadItems, 1681 'Modules' => $this->mModules, 1682 'ModuleStyles' => $this->mModuleStyles, 1683 'JsConfigVars' => $this->mJsConfigVars, 1684 'OutputHooks' => $this->mOutputHooks, 1685 'Warnings' => $this->mWarnings, 1686 'Sections' => $this->mSections, 1687 'Properties' => self::detectAndEncodeBinary( $this->mProperties ), 1688 'TOCHTML' => $this->mTOCHTML, 1689 'Timestamp' => $this->mTimestamp, 1690 'EnableOOUI' => $this->mEnableOOUI, 1691 'IndexPolicy' => $this->mIndexPolicy, 1692 // may contain arbitrary structures! 1693 'ExtensionData' => $this->mExtensionData, 1694 'LimitReportData' => $this->mLimitReportData, 1695 'LimitReportJSData' => $this->mLimitReportJSData, 1696 'ParseStartTime' => $this->mParseStartTime, 1697 'PreventClickjacking' => $this->mPreventClickjacking, 1698 'ExtraScriptSrcs' => $this->mExtraScriptSrcs, 1699 'ExtraDefaultSrcs' => $this->mExtraDefaultSrcs, 1700 'ExtraStyleSrcs' => $this->mExtraStyleSrcs, 1701 'Flags' => $this->mFlags, 1702 'SpeculativeRevId' => $this->mSpeculativeRevId, 1703 'SpeculativePageIdUsed' => $this->speculativePageIdUsed, 1704 'RevisionTimestampUsed' => $this->revisionTimestampUsed, 1705 'RevisionUsedSha1Base36' => $this->revisionUsedSha1Base36, 1706 'WrapperDivClasses' => $this->mWrapperDivClasses, 1707 ]; 1708 1709 // Fill in missing fields from parents. Array addition does not override existing fields. 1710 $data += parent::toJsonArray(); 1711 1712 // TODO: make more fields optional! 1713 1714 if ( $this->mMaxAdaptiveExpiry !== INF ) { 1715 // NOTE: JSON can't encode infinity! 1716 $data['MaxAdaptiveExpiry'] = $this->mMaxAdaptiveExpiry; 1717 } 1718 1719 return $data; 1720 } 1721 1722 public static function newFromJsonArray( JsonUnserializer $unserializer, array $json ) { 1723 $parserOutput = new ParserOutput(); 1724 $parserOutput->initFromJson( $unserializer, $json ); 1725 return $parserOutput; 1726 } 1727 1728 /** 1729 * Initialize member fields from an array returned by jsonSerialize(). 1730 * @param JsonUnserializer $unserializer 1731 * @param array $jsonData 1732 */ 1733 protected function initFromJson( JsonUnserializer $unserializer, array $jsonData ) { 1734 parent::initFromJson( $unserializer, $jsonData ); 1735 1736 $this->mText = $jsonData['Text']; 1737 $this->mLanguageLinks = $jsonData['LanguageLinks']; 1738 $this->mCategories = $jsonData['Categories']; 1739 $this->mIndicators = $jsonData['Indicators']; 1740 $this->mTitleText = $jsonData['TitleText']; 1741 $this->mLinks = $jsonData['Links']; 1742 $this->mLinksSpecial = $jsonData['LinksSpecial']; 1743 $this->mTemplates = $jsonData['Templates']; 1744 $this->mTemplateIds = $jsonData['TemplateIds']; 1745 $this->mImages = $jsonData['Images']; 1746 $this->mFileSearchOptions = $jsonData['FileSearchOptions']; 1747 $this->mExternalLinks = $jsonData['ExternalLinks']; 1748 $this->mInterwikiLinks = $jsonData['InterwikiLinks']; 1749 $this->mNewSection = $jsonData['NewSection']; 1750 $this->mHideNewSection = $jsonData['HideNewSection']; 1751 $this->mNoGallery = $jsonData['NoGallery']; 1752 $this->mHeadItems = $jsonData['HeadItems']; 1753 $this->mModules = $jsonData['Modules']; 1754 $this->mModuleStyles = $jsonData['ModuleStyles']; 1755 $this->mJsConfigVars = $jsonData['JsConfigVars']; 1756 $this->mOutputHooks = $jsonData['OutputHooks']; 1757 $this->mWarnings = $jsonData['Warnings']; 1758 $this->mSections = $jsonData['Sections']; 1759 $this->mProperties = self::detectAndDecodeBinary( $jsonData['Properties'] ); 1760 $this->mTOCHTML = $jsonData['TOCHTML']; 1761 $this->mTimestamp = $jsonData['Timestamp']; 1762 $this->mEnableOOUI = $jsonData['EnableOOUI']; 1763 $this->mIndexPolicy = $jsonData['IndexPolicy']; 1764 $this->mExtensionData = $unserializer->unserializeArray( $jsonData['ExtensionData'] ?? [] ); 1765 $this->mLimitReportData = $jsonData['LimitReportData']; 1766 $this->mLimitReportJSData = $jsonData['LimitReportJSData']; 1767 $this->mParseStartTime = $jsonData['ParseStartTime']; 1768 $this->mPreventClickjacking = $jsonData['PreventClickjacking']; 1769 $this->mExtraScriptSrcs = $jsonData['ExtraScriptSrcs']; 1770 $this->mExtraDefaultSrcs = $jsonData['ExtraDefaultSrcs']; 1771 $this->mExtraStyleSrcs = $jsonData['ExtraStyleSrcs']; 1772 $this->mFlags = $jsonData['Flags']; 1773 $this->mSpeculativeRevId = $jsonData['SpeculativeRevId']; 1774 $this->speculativePageIdUsed = $jsonData['SpeculativePageIdUsed']; 1775 $this->revisionTimestampUsed = $jsonData['RevisionTimestampUsed']; 1776 $this->revisionUsedSha1Base36 = $jsonData['RevisionUsedSha1Base36']; 1777 $this->mWrapperDivClasses = $jsonData['WrapperDivClasses']; 1778 $this->mMaxAdaptiveExpiry = $jsonData['MaxAdaptiveExpiry'] ?? INF; 1779 } 1780 1781 /** 1782 * Finds any non-utf8 strings in the given array and replaces them with 1783 * an associative array that wraps a base64 encoded version of the data. 1784 * Inverse of detectAndDecodeBinary(). 1785 * 1786 * @param array $properties 1787 * 1788 * @return array 1789 */ 1790 private static function detectAndEncodeBinary( array $properties ) { 1791 foreach ( $properties as $key => $value ) { 1792 if ( is_string( $value ) ) { 1793 if ( !mb_detect_encoding( $value, 'UTF-8', true ) ) { 1794 $properties[$key] = [ 1795 '_type_' => 'string', 1796 '_encoding_' => 'base64', 1797 '_data_' => base64_encode( $value ), 1798 ]; 1799 } 1800 } 1801 } 1802 1803 return $properties; 1804 } 1805 1806 /** 1807 * Finds any associative arrays that represent encoded binary strings, and 1808 * replaces them with the decoded binary data. 1809 * 1810 * @param array $properties 1811 * 1812 * @return array 1813 */ 1814 private static function detectAndDecodeBinary( array $properties ) { 1815 foreach ( $properties as $key => $value ) { 1816 if ( is_array( $value ) && isset( $value['_encoding_'] ) ) { 1817 if ( $value['_encoding_'] === 'base64' ) { 1818 $properties[$key] = base64_decode( $value['_data_'] ); 1819 } 1820 } 1821 } 1822 1823 return $properties; 1824 } 1825 1826 public function __wakeup() { 1827 // Backwards compatibility, pre 1.36 1828 $priorAccessedOptions = $this->getGhostFieldValue( 'mAccessedOptions' ); 1829 if ( $priorAccessedOptions ) { 1830 $this->mParseUsedOptions = $priorAccessedOptions; 1831 } 1832 } 1833} 1834