1<?php 2/** 3 * XmlDumpWriter 4 * 5 * Copyright © 2003, 2005, 2006 Brion Vibber <brion@pobox.com> 6 * https://www.mediawiki.org/ 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License as published by 10 * the Free Software Foundation; either version 2 of the License, or 11 * (at your option) any later version. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License along 19 * with this program; if not, write to the Free Software Foundation, Inc., 20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 21 * http://www.gnu.org/copyleft/gpl.html 22 * 23 * @file 24 */ 25 26use MediaWiki\HookContainer\HookRunner; 27use MediaWiki\MediaWikiServices; 28use MediaWiki\Revision\RevisionRecord; 29use MediaWiki\Revision\RevisionStore; 30use MediaWiki\Revision\SlotRecord; 31use MediaWiki\Revision\SuppressedDataException; 32use MediaWiki\Storage\SqlBlobStore; 33use Wikimedia\Assert\Assert; 34use Wikimedia\IPUtils; 35 36/** 37 * @ingroup Dump 38 */ 39class XmlDumpWriter { 40 41 /** Output serialized revision content. */ 42 public const WRITE_CONTENT = 0; 43 44 /** Only output subs for revision content. */ 45 public const WRITE_STUB = 1; 46 47 /** 48 * Only output subs for revision content, indicating that the content has been 49 * deleted/suppressed. 50 */ 51 private const WRITE_STUB_DELETED = 2; 52 53 /** 54 * @var string[] the schema versions supported for output 55 * @final 56 */ 57 public static $supportedSchemas = [ 58 XML_DUMP_SCHEMA_VERSION_10, 59 XML_DUMP_SCHEMA_VERSION_11 60 ]; 61 62 /** 63 * @var string which schema version the generated XML should comply to. 64 * One of the values from self::$supportedSchemas, using the SCHEMA_VERSION_XX 65 * constants. 66 */ 67 private $schemaVersion; 68 69 /** 70 * Title of the currently processed page 71 * 72 * @var Title|null 73 */ 74 private $currentTitle = null; 75 76 /** 77 * @var int Whether to output revision content or just stubs. WRITE_CONTENT or WRITE_STUB. 78 */ 79 private $contentMode; 80 81 /** @var HookRunner */ 82 private $hookRunner; 83 84 /** 85 * @param int $contentMode WRITE_CONTENT or WRITE_STUB. 86 * @param string $schemaVersion which schema version the generated XML should comply to. 87 * One of the values from self::$supportedSchemas, using the XML_DUMP_SCHEMA_VERSION_XX 88 * constants. 89 */ 90 public function __construct( 91 $contentMode = self::WRITE_CONTENT, 92 $schemaVersion = XML_DUMP_SCHEMA_VERSION_11 93 ) { 94 Assert::parameter( 95 in_array( $contentMode, [ self::WRITE_CONTENT, self::WRITE_STUB ] ), 96 '$contentMode', 97 'must be one of the following constants: WRITE_CONTENT or WRITE_STUB.' 98 ); 99 100 Assert::parameter( 101 in_array( $schemaVersion, self::$supportedSchemas ), 102 '$schemaVersion', 103 'must be one of the following schema versions: ' 104 . implode( ',', self::$supportedSchemas ) 105 ); 106 107 $this->contentMode = $contentMode; 108 $this->schemaVersion = $schemaVersion; 109 $this->hookRunner = new HookRunner( MediaWikiServices::getInstance()->getHookContainer() ); 110 } 111 112 /** 113 * Opens the XML output stream's root "<mediawiki>" element. 114 * This does not include an xml directive, so is safe to include 115 * as a subelement in a larger XML stream. Namespace and XML Schema 116 * references are included. 117 * 118 * Output will be encoded in UTF-8. 119 * 120 * @return string 121 */ 122 public function openStream() { 123 $ver = $this->schemaVersion; 124 return Xml::element( 'mediawiki', [ 125 'xmlns' => "http://www.mediawiki.org/xml/export-$ver/", 126 'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance", 127 /* 128 * When a new version of the schema is created, it needs staging on mediawiki.org. 129 * This requires a change in the operations/mediawiki-config git repo. 130 * 131 * Create a changeset like https://gerrit.wikimedia.org/r/#/c/149643/ in which 132 * you copy in the new xsd file. 133 * 134 * After it is reviewed, merged and deployed (sync-docroot), the index.html needs purging. 135 * echo "https://www.mediawiki.org/xml/index.html" | mwscript purgeList.php --wiki=aawiki 136 */ 137 'xsi:schemaLocation' => "http://www.mediawiki.org/xml/export-$ver/ " . 138 "http://www.mediawiki.org/xml/export-$ver.xsd", 139 'version' => $ver, 140 'xml:lang' => MediaWikiServices::getInstance()->getContentLanguage()->getHtmlCode() ], 141 null ) . 142 "\n" . 143 $this->siteInfo(); 144 } 145 146 /** 147 * @return string 148 */ 149 private function siteInfo() { 150 $info = [ 151 $this->sitename(), 152 $this->dbname(), 153 $this->homelink(), 154 $this->generator(), 155 $this->caseSetting(), 156 $this->namespaces() ]; 157 return " <siteinfo>\n " . 158 implode( "\n ", $info ) . 159 "\n </siteinfo>\n"; 160 } 161 162 /** 163 * @return string 164 */ 165 private function sitename() { 166 global $wgSitename; 167 return Xml::element( 'sitename', [], $wgSitename ); 168 } 169 170 /** 171 * @return string 172 */ 173 private function dbname() { 174 global $wgDBname; 175 return Xml::element( 'dbname', [], $wgDBname ); 176 } 177 178 /** 179 * @return string 180 */ 181 private function generator() { 182 return Xml::element( 'generator', [], 'MediaWiki ' . MW_VERSION ); 183 } 184 185 /** 186 * @return string 187 */ 188 private function homelink() { 189 return Xml::element( 'base', [], Title::newMainPage()->getCanonicalURL() ); 190 } 191 192 /** 193 * @return string 194 */ 195 private function caseSetting() { 196 global $wgCapitalLinks; 197 // "case-insensitive" option is reserved for future 198 $sensitivity = $wgCapitalLinks ? 'first-letter' : 'case-sensitive'; 199 return Xml::element( 'case', [], $sensitivity ); 200 } 201 202 /** 203 * @return string 204 */ 205 private function namespaces() { 206 $spaces = "<namespaces>\n"; 207 $nsInfo = MediaWikiServices::getInstance()->getNamespaceInfo(); 208 foreach ( 209 MediaWikiServices::getInstance()->getContentLanguage()->getFormattedNamespaces() 210 as $ns => $title 211 ) { 212 $spaces .= ' ' . 213 Xml::element( 'namespace', 214 [ 215 'key' => $ns, 216 'case' => $nsInfo->isCapitalized( $ns ) 217 ? 'first-letter' : 'case-sensitive', 218 ], $title ) . "\n"; 219 } 220 $spaces .= " </namespaces>"; 221 return $spaces; 222 } 223 224 /** 225 * Closes the output stream with the closing root element. 226 * Call when finished dumping things. 227 * 228 * @return string 229 */ 230 public function closeStream() { 231 return "</mediawiki>\n"; 232 } 233 234 /** 235 * Opens a "<page>" section on the output stream, with data 236 * from the given database row. 237 * 238 * @param stdClass $row 239 * @return string 240 */ 241 public function openPage( $row ) { 242 $out = " <page>\n"; 243 $this->currentTitle = Title::newFromRow( $row ); 244 $canonicalTitle = self::canonicalTitle( $this->currentTitle ); 245 $out .= ' ' . Xml::elementClean( 'title', [], $canonicalTitle ) . "\n"; 246 $out .= ' ' . Xml::element( 'ns', [], strval( $row->page_namespace ) ) . "\n"; 247 $out .= ' ' . Xml::element( 'id', [], strval( $row->page_id ) ) . "\n"; 248 if ( $row->page_is_redirect ) { 249 $page = MediaWikiServices::getInstance()->getWikiPageFactory()->newFromTitle( $this->currentTitle ); 250 $redirect = $this->invokeLenient( 251 static function () use ( $page ) { 252 return $page->getRedirectTarget(); 253 }, 254 'Failed to get redirect target of page ' . $page->getId() 255 ); 256 if ( $redirect instanceof Title && $redirect->isValidRedirectTarget() ) { 257 $out .= ' '; 258 $out .= Xml::element( 'redirect', [ 'title' => self::canonicalTitle( $redirect ) ] ); 259 $out .= "\n"; 260 } 261 } 262 263 if ( $row->page_restrictions != '' ) { 264 $out .= ' ' . Xml::element( 'restrictions', [], 265 strval( $row->page_restrictions ) ) . "\n"; 266 } 267 268 $this->hookRunner->onXmlDumpWriterOpenPage( $this, $out, $row, $this->currentTitle ); 269 270 return $out; 271 } 272 273 /** 274 * Closes a "<page>" section on the output stream. 275 * 276 * @internal 277 * @return string 278 */ 279 public function closePage() { 280 if ( $this->currentTitle !== null ) { 281 $linkCache = MediaWikiServices::getInstance()->getLinkCache(); 282 // In rare cases, link cache has the same key for some pages which 283 // might be read as part of the same batch. T220424 and T220316 284 $linkCache->clearLink( $this->currentTitle ); 285 } 286 return " </page>\n"; 287 } 288 289 /** 290 * @return RevisionStore 291 */ 292 private function getRevisionStore() { 293 return MediaWikiServices::getInstance()->getRevisionStore(); 294 } 295 296 /** 297 * @return SqlBlobStore 298 */ 299 private function getBlobStore() { 300 return MediaWikiServices::getInstance()->getBlobStore(); 301 } 302 303 /** 304 * Invokes the given callback, catching and logging any storage related 305 * exceptions. 306 * 307 * @param callable $callback 308 * @param string $warning The warning to output in case of a storage related exception. 309 * 310 * @return mixed Returns the method's return value, 311 * or null in case of a storage related exception. 312 * @throws Exception 313 */ 314 private function invokeLenient( $callback, $warning ) { 315 try { 316 return $callback(); 317 } catch ( SuppressedDataException $ex ) { 318 return null; 319 } catch ( Exception $ex ) { 320 if ( $ex instanceof MWException || $ex instanceof RuntimeException || 321 $ex instanceof InvalidArgumentException ) { 322 MWDebug::warning( $warning . ': ' . $ex->getMessage() ); 323 return null; 324 } else { 325 throw $ex; 326 } 327 } 328 } 329 330 /** 331 * Dumps a "<revision>" section on the output stream, with 332 * data filled in from the given database row. 333 * 334 * @param stdClass $row 335 * @param null|stdClass[] $slotRows 336 * 337 * @return string 338 * @throws FatalError 339 * @throws MWException 340 */ 341 public function writeRevision( $row, $slotRows = null ) { 342 $rev = $this->getRevisionStore()->newRevisionFromRowAndSlots( 343 $row, 344 $slotRows, 345 0, 346 $this->currentTitle 347 ); 348 349 $out = " <revision>\n"; 350 $out .= " " . Xml::element( 'id', null, strval( $rev->getId() ) ) . "\n"; 351 352 if ( $rev->getParentId() ) { 353 $out .= " " . Xml::element( 'parentid', null, strval( $rev->getParentId() ) ) . "\n"; 354 } 355 356 $out .= $this->writeTimestamp( $rev->getTimestamp() ); 357 358 if ( $rev->isDeleted( RevisionRecord::DELETED_USER ) ) { 359 $out .= " " . Xml::element( 'contributor', [ 'deleted' => 'deleted' ] ) . "\n"; 360 } else { 361 // empty values get written out as uid 0, see T224221 362 $user = $rev->getUser(); 363 $out .= $this->writeContributor( 364 $user ? $user->getId() : 0, 365 $user ? $user->getName() : '' 366 ); 367 } 368 369 if ( $rev->isMinor() ) { 370 $out .= " <minor/>\n"; 371 } 372 if ( $rev->isDeleted( RevisionRecord::DELETED_COMMENT ) ) { 373 $out .= " " . Xml::element( 'comment', [ 'deleted' => 'deleted' ] ) . "\n"; 374 } else { 375 if ( $rev->getComment()->text != '' ) { 376 $out .= " " 377 . Xml::elementClean( 'comment', [], strval( $rev->getComment()->text ) ) 378 . "\n"; 379 } 380 } 381 382 $contentMode = $rev->isDeleted( RevisionRecord::DELETED_TEXT ) ? self::WRITE_STUB_DELETED 383 : $this->contentMode; 384 385 $slots = $rev->getSlots()->getSlots(); 386 387 // use predictable order, put main slot first 388 ksort( $slots ); 389 $out .= $this->writeSlot( $slots[SlotRecord::MAIN], $contentMode ); 390 391 foreach ( $slots as $role => $slot ) { 392 if ( $role === SlotRecord::MAIN ) { 393 continue; 394 } 395 $out .= $this->writeSlot( $slot, $contentMode ); 396 } 397 398 if ( $rev->isDeleted( RevisionRecord::DELETED_TEXT ) ) { 399 $out .= " <sha1/>\n"; 400 } else { 401 $sha1 = $this->invokeLenient( 402 static function () use ( $rev ) { 403 return $rev->getSha1(); 404 }, 405 'failed to determine sha1 for revision ' . $rev->getId() 406 ); 407 $out .= " " . Xml::element( 'sha1', null, strval( $sha1 ) ) . "\n"; 408 } 409 410 // Avoid PHP 7.1 warning from passing $this by reference 411 $writer = $this; 412 $text = ''; 413 if ( $contentMode === self::WRITE_CONTENT ) { 414 /** @var Content $content */ 415 $content = $this->invokeLenient( 416 static function () use ( $rev ) { 417 return $rev->getContent( SlotRecord::MAIN, RevisionRecord::RAW ); 418 }, 419 'Failed to load main slot content of revision ' . $rev->getId() 420 ); 421 422 $text = $content ? $content->serialize() : ''; 423 } 424 $this->hookRunner->onXmlDumpWriterWriteRevision( $writer, $out, $row, $text, $rev ); 425 426 $out .= " </revision>\n"; 427 428 return $out; 429 } 430 431 /** 432 * @param SlotRecord $slot 433 * @param int $contentMode see the WRITE_XXX constants 434 * 435 * @return string 436 */ 437 private function writeSlot( SlotRecord $slot, $contentMode ) { 438 $isMain = $slot->getRole() === SlotRecord::MAIN; 439 $isV11 = $this->schemaVersion >= XML_DUMP_SCHEMA_VERSION_11; 440 441 if ( !$isV11 && !$isMain ) { 442 // ignore extra slots 443 return ''; 444 } 445 446 $out = ''; 447 $indent = ' '; 448 449 if ( !$isMain ) { 450 // non-main slots are wrapped into an additional element. 451 $out .= ' ' . Xml::openElement( 'content' ) . "\n"; 452 $indent .= ' '; 453 $out .= $indent . Xml::element( 'role', null, strval( $slot->getRole() ) ) . "\n"; 454 } 455 456 if ( $isV11 ) { 457 $out .= $indent . Xml::element( 'origin', null, strval( $slot->getOrigin() ) ) . "\n"; 458 } 459 460 $contentModel = $slot->getModel(); 461 $contentHandler = MediaWikiServices::getInstance() 462 ->getContentHandlerFactory() 463 ->getContentHandler( $contentModel ); 464 $contentFormat = $contentHandler->getDefaultFormat(); 465 466 // XXX: The content format is only relevant when actually outputting serialized content. 467 // It should probably be an attribute on the text tag. 468 $out .= $indent . Xml::element( 'model', null, strval( $contentModel ) ) . "\n"; 469 $out .= $indent . Xml::element( 'format', null, strval( $contentFormat ) ) . "\n"; 470 471 $textAttributes = [ 472 'bytes' => $this->invokeLenient( 473 static function () use ( $slot ) { 474 return $slot->getSize(); 475 }, 476 'failed to determine size for slot ' . $slot->getRole() . ' of revision ' 477 . $slot->getRevision() 478 ) ?: '0' 479 ]; 480 481 if ( $isV11 ) { 482 $textAttributes['sha1'] = $this->invokeLenient( 483 static function () use ( $slot ) { 484 return $slot->getSha1(); 485 }, 486 'failed to determine sha1 for slot ' . $slot->getRole() . ' of revision ' 487 . $slot->getRevision() 488 ) ?: ''; 489 } 490 491 if ( $contentMode === self::WRITE_CONTENT ) { 492 $content = $this->invokeLenient( 493 static function () use ( $slot ) { 494 return $slot->getContent(); 495 }, 496 'failed to load content for slot ' . $slot->getRole() . ' of revision ' 497 . $slot->getRevision() 498 ); 499 500 if ( $content === null ) { 501 $out .= $indent . Xml::element( 'text', $textAttributes ) . "\n"; 502 } else { 503 $out .= $this->writeText( $content, $textAttributes, $indent ); 504 } 505 } elseif ( $contentMode === self::WRITE_STUB_DELETED ) { 506 // write <text> placeholder tag 507 $textAttributes['deleted'] = 'deleted'; 508 $out .= $indent . Xml::element( 'text', $textAttributes ) . "\n"; 509 } else { 510 // write <text> stub tag 511 if ( $isV11 ) { 512 $textAttributes['location'] = $slot->getAddress(); 513 } 514 515 if ( $isMain ) { 516 // Output the numerical text ID if possible, for backwards compatibility. 517 // Note that this is currently the ONLY reason we have a BlobStore here at all. 518 // When removing this line, check whether the BlobStore has become unused. 519 try { 520 // NOTE: this will only work for addresses of the form "tt:12345". 521 // If we want to support other kinds of addresses in the future, 522 // we will have to silently ignore failures here. 523 // For now, this fails for "tt:0", which is present in the WMF production 524 // database as of July 2019, due to data corruption. 525 $textId = $this->getBlobStore()->getTextIdFromAddress( $slot->getAddress() ); 526 } catch ( InvalidArgumentException $ex ) { 527 MWDebug::warning( 'Bad content address for slot ' . $slot->getRole() 528 . ' of revision ' . $slot->getRevision() . ': ' . $ex->getMessage() ); 529 $textId = 0; 530 } 531 532 if ( is_int( $textId ) ) { 533 $textAttributes['id'] = $textId; 534 } 535 } 536 537 $out .= $indent . Xml::element( 'text', $textAttributes ) . "\n"; 538 } 539 540 if ( !$isMain ) { 541 $out .= ' ' . Xml::closeElement( 'content' ) . "\n"; 542 } 543 544 return $out; 545 } 546 547 /** 548 * @param Content $content 549 * @param string[] $textAttributes 550 * @param string $indent 551 * 552 * @return string 553 */ 554 private function writeText( Content $content, $textAttributes, $indent ) { 555 $out = ''; 556 557 $contentHandler = $content->getContentHandler(); 558 $contentFormat = $contentHandler->getDefaultFormat(); 559 560 if ( $content instanceof TextContent ) { 561 // HACK: For text based models, bypass the serialization step. This allows extensions (like Flow) 562 // that use incompatible combinations of serialization format and content model. 563 $data = $content->getNativeData(); 564 } else { 565 $data = $content->serialize( $contentFormat ); 566 } 567 568 $data = $contentHandler->exportTransform( $data, $contentFormat ); 569 $textAttributes['bytes'] = $size = strlen( $data ); // make sure to use the actual size 570 $textAttributes['xml:space'] = 'preserve'; 571 $out .= $indent . Xml::elementClean( 'text', $textAttributes, strval( $data ) ) . "\n"; 572 573 return $out; 574 } 575 576 /** 577 * Dumps a "<logitem>" section on the output stream, with 578 * data filled in from the given database row. 579 * 580 * @param stdClass $row 581 * @return string 582 */ 583 public function writeLogItem( $row ) { 584 $out = " <logitem>\n"; 585 $out .= " " . Xml::element( 'id', null, strval( $row->log_id ) ) . "\n"; 586 587 $out .= $this->writeTimestamp( $row->log_timestamp, " " ); 588 589 if ( $row->log_deleted & LogPage::DELETED_USER ) { 590 $out .= " " . Xml::element( 'contributor', [ 'deleted' => 'deleted' ] ) . "\n"; 591 } else { 592 $out .= $this->writeContributor( $row->log_user, $row->user_name, " " ); 593 } 594 595 if ( $row->log_deleted & LogPage::DELETED_COMMENT ) { 596 $out .= " " . Xml::element( 'comment', [ 'deleted' => 'deleted' ] ) . "\n"; 597 } else { 598 $comment = CommentStore::getStore()->getComment( 'log_comment', $row )->text; 599 if ( $comment != '' ) { 600 $out .= " " . Xml::elementClean( 'comment', null, strval( $comment ) ) . "\n"; 601 } 602 } 603 604 $out .= " " . Xml::element( 'type', null, strval( $row->log_type ) ) . "\n"; 605 $out .= " " . Xml::element( 'action', null, strval( $row->log_action ) ) . "\n"; 606 607 if ( $row->log_deleted & LogPage::DELETED_ACTION ) { 608 $out .= " " . Xml::element( 'text', [ 'deleted' => 'deleted' ] ) . "\n"; 609 } else { 610 $title = Title::makeTitle( $row->log_namespace, $row->log_title ); 611 $out .= " " . Xml::elementClean( 'logtitle', null, self::canonicalTitle( $title ) ) . "\n"; 612 $out .= " " . Xml::elementClean( 'params', 613 [ 'xml:space' => 'preserve' ], 614 strval( $row->log_params ) ) . "\n"; 615 } 616 617 $out .= " </logitem>\n"; 618 619 return $out; 620 } 621 622 /** 623 * @param string $timestamp 624 * @param string $indent Default to six spaces 625 * @return string 626 */ 627 public function writeTimestamp( $timestamp, $indent = " " ) { 628 $ts = wfTimestamp( TS_ISO_8601, $timestamp ); 629 return $indent . Xml::element( 'timestamp', null, $ts ) . "\n"; 630 } 631 632 /** 633 * @param int $id 634 * @param string $text 635 * @param string $indent Default to six spaces 636 * @return string 637 */ 638 public function writeContributor( $id, $text, $indent = " " ) { 639 $out = $indent . "<contributor>\n"; 640 if ( $id || !IPUtils::isValid( $text ) ) { 641 $out .= $indent . " " . Xml::elementClean( 'username', null, strval( $text ) ) . "\n"; 642 $out .= $indent . " " . Xml::element( 'id', null, strval( $id ) ) . "\n"; 643 } else { 644 $out .= $indent . " " . Xml::elementClean( 'ip', null, strval( $text ) ) . "\n"; 645 } 646 $out .= $indent . "</contributor>\n"; 647 return $out; 648 } 649 650 /** 651 * Warning! This data is potentially inconsistent. :( 652 * @param stdClass $row 653 * @param bool $dumpContents 654 * @return string 655 */ 656 public function writeUploads( $row, $dumpContents = false ) { 657 if ( $row->page_namespace == NS_FILE ) { 658 $img = MediaWikiServices::getInstance()->getRepoGroup()->getLocalRepo() 659 ->newFile( $row->page_title ); 660 if ( $img && $img->exists() ) { 661 $out = ''; 662 foreach ( array_reverse( $img->getHistory() ) as $ver ) { 663 $out .= $this->writeUpload( $ver, $dumpContents ); 664 } 665 $out .= $this->writeUpload( $img, $dumpContents ); 666 return $out; 667 } 668 } 669 return ''; 670 } 671 672 /** 673 * @param File $file 674 * @param bool $dumpContents 675 * @return string 676 */ 677 private function writeUpload( $file, $dumpContents = false ) { 678 if ( $file->isOld() ) { 679 /** @var OldLocalFile $file */ 680 '@phan-var OldLocalFile $file'; 681 $archiveName = " " . 682 Xml::element( 'archivename', null, $file->getArchiveName() ) . "\n"; 683 } else { 684 $archiveName = ''; 685 } 686 if ( $dumpContents ) { 687 $be = $file->getRepo()->getBackend(); 688 # Dump file as base64 689 # Uses only XML-safe characters, so does not need escaping 690 # @todo Too bad this loads the contents into memory (script might swap) 691 $contents = ' <contents encoding="base64">' . 692 chunk_split( base64_encode( 693 $be->getFileContents( [ 'src' => $file->getPath() ] ) ) ) . 694 " </contents>\n"; 695 } else { 696 $contents = ''; 697 } 698 if ( $file->isDeleted( File::DELETED_COMMENT ) ) { 699 $comment = Xml::element( 'comment', [ 'deleted' => 'deleted' ] ); 700 } else { 701 $comment = Xml::elementClean( 'comment', null, strval( $file->getDescription() ) ); 702 } 703 return " <upload>\n" . 704 $this->writeTimestamp( $file->getTimestamp() ) . 705 $this->writeContributor( $file->getUser( 'id' ), $file->getUser( 'text' ) ) . 706 " " . $comment . "\n" . 707 " " . Xml::element( 'filename', null, $file->getName() ) . "\n" . 708 $archiveName . 709 " " . Xml::element( 'src', null, $file->getCanonicalUrl() ) . "\n" . 710 " " . Xml::element( 'size', null, $file->getSize() ) . "\n" . 711 " " . Xml::element( 'sha1base36', null, $file->getSha1() ) . "\n" . 712 " " . Xml::element( 'rel', null, $file->getRel() ) . "\n" . 713 $contents . 714 " </upload>\n"; 715 } 716 717 /** 718 * Return prefixed text form of title, but using the content language's 719 * canonical namespace. This skips any special-casing such as gendered 720 * user namespaces -- which while useful, are not yet listed in the 721 * XML "<siteinfo>" data so are unsafe in export. 722 * 723 * @param Title $title 724 * @return string 725 * @since 1.18 726 */ 727 public static function canonicalTitle( Title $title ) { 728 if ( $title->isExternal() ) { 729 return $title->getPrefixedText(); 730 } 731 732 $prefix = MediaWikiServices::getInstance()->getContentLanguage()-> 733 getFormattedNsText( $title->getNamespace() ); 734 735 // @todo Emit some kind of warning to the user if $title->getNamespace() !== 736 // NS_MAIN and $prefix === '' (viz. pages in an unregistered namespace) 737 738 if ( $prefix !== '' ) { 739 $prefix .= ':'; 740 } 741 742 return $prefix . $title->getText(); 743 } 744} 745