1<?php
2/**
3 * XmlDumpWriter
4 *
5 * Copyright © 2003, 2005, 2006 Brion Vibber <brion@pobox.com>
6 * https://www.mediawiki.org/
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License along
19 * with this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 * http://www.gnu.org/copyleft/gpl.html
22 *
23 * @file
24 */
25
26use MediaWiki\HookContainer\HookRunner;
27use MediaWiki\MediaWikiServices;
28use MediaWiki\Revision\RevisionRecord;
29use MediaWiki\Revision\RevisionStore;
30use MediaWiki\Revision\SlotRecord;
31use MediaWiki\Revision\SuppressedDataException;
32use MediaWiki\Storage\SqlBlobStore;
33use Wikimedia\Assert\Assert;
34use Wikimedia\IPUtils;
35
36/**
37 * @ingroup Dump
38 */
39class XmlDumpWriter {
40
41	/** Output serialized revision content. */
42	public const WRITE_CONTENT = 0;
43
44	/** Only output subs for revision content. */
45	public const WRITE_STUB = 1;
46
47	/**
48	 * Only output subs for revision content, indicating that the content has been
49	 * deleted/suppressed.
50	 */
51	private const WRITE_STUB_DELETED = 2;
52
53	/**
54	 * @var string[] the schema versions supported for output
55	 * @final
56	 */
57	public static $supportedSchemas = [
58		XML_DUMP_SCHEMA_VERSION_10,
59		XML_DUMP_SCHEMA_VERSION_11
60	];
61
62	/**
63	 * @var string which schema version the generated XML should comply to.
64	 * One of the values from self::$supportedSchemas, using the SCHEMA_VERSION_XX
65	 * constants.
66	 */
67	private $schemaVersion;
68
69	/**
70	 * Title of the currently processed page
71	 *
72	 * @var Title|null
73	 */
74	private $currentTitle = null;
75
76	/**
77	 * @var int Whether to output revision content or just stubs. WRITE_CONTENT or WRITE_STUB.
78	 */
79	private $contentMode;
80
81	/** @var HookRunner */
82	private $hookRunner;
83
84	/**
85	 * @param int $contentMode WRITE_CONTENT or WRITE_STUB.
86	 * @param string $schemaVersion which schema version the generated XML should comply to.
87	 * One of the values from self::$supportedSchemas, using the XML_DUMP_SCHEMA_VERSION_XX
88	 * constants.
89	 */
90	public function __construct(
91		$contentMode = self::WRITE_CONTENT,
92		$schemaVersion = XML_DUMP_SCHEMA_VERSION_11
93	) {
94		Assert::parameter(
95			in_array( $contentMode, [ self::WRITE_CONTENT, self::WRITE_STUB ] ),
96			'$contentMode',
97			'must be one of the following constants: WRITE_CONTENT or WRITE_STUB.'
98		);
99
100		Assert::parameter(
101			in_array( $schemaVersion, self::$supportedSchemas ),
102			'$schemaVersion',
103			'must be one of the following schema versions: '
104				. implode( ',', self::$supportedSchemas )
105		);
106
107		$this->contentMode = $contentMode;
108		$this->schemaVersion = $schemaVersion;
109		$this->hookRunner = new HookRunner( MediaWikiServices::getInstance()->getHookContainer() );
110	}
111
112	/**
113	 * Opens the XML output stream's root "<mediawiki>" element.
114	 * This does not include an xml directive, so is safe to include
115	 * as a subelement in a larger XML stream. Namespace and XML Schema
116	 * references are included.
117	 *
118	 * Output will be encoded in UTF-8.
119	 *
120	 * @return string
121	 */
122	public function openStream() {
123		$ver = $this->schemaVersion;
124		return Xml::element( 'mediawiki', [
125			'xmlns'              => "http://www.mediawiki.org/xml/export-$ver/",
126			'xmlns:xsi'          => "http://www.w3.org/2001/XMLSchema-instance",
127			/*
128			 * When a new version of the schema is created, it needs staging on mediawiki.org.
129			 * This requires a change in the operations/mediawiki-config git repo.
130			 *
131			 * Create a changeset like https://gerrit.wikimedia.org/r/#/c/149643/ in which
132			 * you copy in the new xsd file.
133			 *
134			 * After it is reviewed, merged and deployed (sync-docroot), the index.html needs purging.
135			 * echo "https://www.mediawiki.org/xml/index.html" | mwscript purgeList.php --wiki=aawiki
136			 */
137			'xsi:schemaLocation' => "http://www.mediawiki.org/xml/export-$ver/ " .
138				"http://www.mediawiki.org/xml/export-$ver.xsd",
139			'version' => $ver,
140			'xml:lang' => MediaWikiServices::getInstance()->getContentLanguage()->getHtmlCode() ],
141			null ) .
142			"\n" .
143			$this->siteInfo();
144	}
145
146	/**
147	 * @return string
148	 */
149	private function siteInfo() {
150		$info = [
151			$this->sitename(),
152			$this->dbname(),
153			$this->homelink(),
154			$this->generator(),
155			$this->caseSetting(),
156			$this->namespaces() ];
157		return "  <siteinfo>\n    " .
158			implode( "\n    ", $info ) .
159			"\n  </siteinfo>\n";
160	}
161
162	/**
163	 * @return string
164	 */
165	private function sitename() {
166		global $wgSitename;
167		return Xml::element( 'sitename', [], $wgSitename );
168	}
169
170	/**
171	 * @return string
172	 */
173	private function dbname() {
174		global $wgDBname;
175		return Xml::element( 'dbname', [], $wgDBname );
176	}
177
178	/**
179	 * @return string
180	 */
181	private function generator() {
182		return Xml::element( 'generator', [], 'MediaWiki ' . MW_VERSION );
183	}
184
185	/**
186	 * @return string
187	 */
188	private function homelink() {
189		return Xml::element( 'base', [], Title::newMainPage()->getCanonicalURL() );
190	}
191
192	/**
193	 * @return string
194	 */
195	private function caseSetting() {
196		global $wgCapitalLinks;
197		// "case-insensitive" option is reserved for future
198		$sensitivity = $wgCapitalLinks ? 'first-letter' : 'case-sensitive';
199		return Xml::element( 'case', [], $sensitivity );
200	}
201
202	/**
203	 * @return string
204	 */
205	private function namespaces() {
206		$spaces = "<namespaces>\n";
207		$nsInfo = MediaWikiServices::getInstance()->getNamespaceInfo();
208		foreach (
209			MediaWikiServices::getInstance()->getContentLanguage()->getFormattedNamespaces()
210			as $ns => $title
211		) {
212			$spaces .= '      ' .
213				Xml::element( 'namespace',
214					[
215						'key' => $ns,
216						'case' => $nsInfo->isCapitalized( $ns )
217							? 'first-letter' : 'case-sensitive',
218					], $title ) . "\n";
219		}
220		$spaces .= "    </namespaces>";
221		return $spaces;
222	}
223
224	/**
225	 * Closes the output stream with the closing root element.
226	 * Call when finished dumping things.
227	 *
228	 * @return string
229	 */
230	public function closeStream() {
231		return "</mediawiki>\n";
232	}
233
234	/**
235	 * Opens a "<page>" section on the output stream, with data
236	 * from the given database row.
237	 *
238	 * @param object $row
239	 * @return string
240	 */
241	public function openPage( $row ) {
242		$out = "  <page>\n";
243		$this->currentTitle = Title::newFromRow( $row );
244		$canonicalTitle = self::canonicalTitle( $this->currentTitle );
245		$out .= '    ' . Xml::elementClean( 'title', [], $canonicalTitle ) . "\n";
246		$out .= '    ' . Xml::element( 'ns', [], strval( $row->page_namespace ) ) . "\n";
247		$out .= '    ' . Xml::element( 'id', [], strval( $row->page_id ) ) . "\n";
248		if ( $row->page_is_redirect ) {
249			$page = WikiPage::factory( $this->currentTitle );
250			$redirect = $this->invokeLenient(
251				function () use ( $page ) {
252					return $page->getRedirectTarget();
253				},
254				'Failed to get redirect target of page ' . $page->getId()
255			);
256			if ( $redirect instanceof Title && $redirect->isValidRedirectTarget() ) {
257				$out .= '    ';
258				$out .= Xml::element( 'redirect', [ 'title' => self::canonicalTitle( $redirect ) ] );
259				$out .= "\n";
260			}
261		}
262
263		if ( $row->page_restrictions != '' ) {
264			$out .= '    ' . Xml::element( 'restrictions', [],
265				strval( $row->page_restrictions ) ) . "\n";
266		}
267
268		$this->hookRunner->onXmlDumpWriterOpenPage( $this, $out, $row, $this->currentTitle );
269
270		return $out;
271	}
272
273	/**
274	 * Closes a "<page>" section on the output stream.
275	 *
276	 * @internal
277	 * @return string
278	 */
279	public function closePage() {
280		if ( $this->currentTitle !== null ) {
281			$linkCache = MediaWikiServices::getInstance()->getLinkCache();
282			// In rare cases, link cache has the same key for some pages which
283			// might be read as part of the same batch. T220424 and T220316
284			$linkCache->clearLink( $this->currentTitle );
285		}
286		return "  </page>\n";
287	}
288
289	/**
290	 * @return RevisionStore
291	 */
292	private function getRevisionStore() {
293		return MediaWikiServices::getInstance()->getRevisionStore();
294	}
295
296	/**
297	 * @return SqlBlobStore
298	 */
299	private function getBlobStore() {
300		return MediaWikiServices::getInstance()->getBlobStore();
301	}
302
303	/**
304	 * Invokes the given callback, catching and logging any storage related
305	 * exceptions.
306	 *
307	 * @param callable $callback
308	 * @param string $warning The warning to output in case of a storage related exception.
309	 *
310	 * @return mixed Returns the method's return value,
311	 *         or null in case of a storage related exception.
312	 * @throws Exception
313	 */
314	private function invokeLenient( $callback, $warning ) {
315		try {
316			return $callback();
317		} catch ( SuppressedDataException $ex ) {
318			return null;
319		} catch ( Exception $ex ) {
320			if ( $ex instanceof MWException || $ex instanceof RuntimeException ||
321				$ex instanceof InvalidArgumentException ) {
322				MWDebug::warning( $warning . ': ' . $ex->getMessage() );
323				return null;
324			} else {
325				throw $ex;
326			}
327		}
328	}
329
330	/**
331	 * Dumps a "<revision>" section on the output stream, with
332	 * data filled in from the given database row.
333	 *
334	 * @param object $row
335	 * @param null|object[] $slotRows
336	 *
337	 * @return string
338	 * @throws FatalError
339	 * @throws MWException
340	 */
341	public function writeRevision( $row, $slotRows = null ) {
342		$rev = $this->getRevisionStore()->newRevisionFromRowAndSlots(
343			$row,
344			$slotRows,
345			0,
346			$this->currentTitle
347		);
348
349		$out = "    <revision>\n";
350		$out .= "      " . Xml::element( 'id', null, strval( $rev->getId() ) ) . "\n";
351
352		if ( $rev->getParentId() ) {
353			$out .= "      " . Xml::element( 'parentid', null, strval( $rev->getParentId() ) ) . "\n";
354		}
355
356		$out .= $this->writeTimestamp( $rev->getTimestamp() );
357
358		if ( $rev->isDeleted( RevisionRecord::DELETED_USER ) ) {
359			$out .= "      " . Xml::element( 'contributor', [ 'deleted' => 'deleted' ] ) . "\n";
360		} else {
361			// empty values get written out as uid 0, see T224221
362			$user = $rev->getUser();
363			$out .= $this->writeContributor(
364				$user ? $user->getId() : 0,
365				$user ? $user->getName() : ''
366			);
367		}
368
369		if ( $rev->isMinor() ) {
370			$out .= "      <minor/>\n";
371		}
372		if ( $rev->isDeleted( RevisionRecord::DELETED_COMMENT ) ) {
373			$out .= "      " . Xml::element( 'comment', [ 'deleted' => 'deleted' ] ) . "\n";
374		} else {
375			if ( $rev->getComment()->text != '' ) {
376				$out .= "      "
377					. Xml::elementClean( 'comment', [], strval( $rev->getComment()->text ) )
378					. "\n";
379			}
380		}
381
382		$contentMode = $rev->isDeleted( RevisionRecord::DELETED_TEXT ) ? self::WRITE_STUB_DELETED
383			: $this->contentMode;
384
385		$slots = $rev->getSlots()->getSlots();
386
387		// use predictable order, put main slot first
388		ksort( $slots );
389		$out .= $this->writeSlot( $slots[SlotRecord::MAIN], $contentMode );
390
391		foreach ( $slots as $role => $slot ) {
392			if ( $role === SlotRecord::MAIN ) {
393				continue;
394			}
395			$out .= $this->writeSlot( $slot, $contentMode );
396		}
397
398		if ( $rev->isDeleted( RevisionRecord::DELETED_TEXT ) ) {
399			$out .= "      <sha1/>\n";
400		} else {
401			$sha1 = $this->invokeLenient(
402				function () use ( $rev ) {
403					return $rev->getSha1();
404				},
405				'failed to determine sha1 for revision ' . $rev->getId()
406			);
407			$out .= "      " . Xml::element( 'sha1', null, strval( $sha1 ) ) . "\n";
408		}
409
410		// Avoid PHP 7.1 warning from passing $this by reference
411		$writer = $this;
412		$text = '';
413		if ( $contentMode === self::WRITE_CONTENT ) {
414			/** @var Content $content */
415			$content = $this->invokeLenient(
416				function () use ( $rev ) {
417					return $rev->getContent( SlotRecord::MAIN, RevisionRecord::RAW );
418				},
419				'Failed to load main slot content of revision ' . $rev->getId()
420			);
421
422			$text = $content ? $content->serialize() : '';
423		}
424		$this->hookRunner->onXmlDumpWriterWriteRevision( $writer, $out, $row, $text, $rev );
425
426		$out .= "    </revision>\n";
427
428		return $out;
429	}
430
431	/**
432	 * @param SlotRecord $slot
433	 * @param int $contentMode see the WRITE_XXX constants
434	 *
435	 * @return string
436	 */
437	private function writeSlot( SlotRecord $slot, $contentMode ) {
438		$isMain = $slot->getRole() === SlotRecord::MAIN;
439		$isV11 = $this->schemaVersion >= XML_DUMP_SCHEMA_VERSION_11;
440
441		if ( !$isV11 && !$isMain ) {
442			// ignore extra slots
443			return '';
444		}
445
446		$out = '';
447		$indent = '      ';
448
449		if ( !$isMain ) {
450			// non-main slots are wrapped into an additional element.
451			$out .= '      ' . Xml::openElement( 'content' ) . "\n";
452			$indent .= '  ';
453			$out .= $indent . Xml::element( 'role', null, strval( $slot->getRole() ) ) . "\n";
454		}
455
456		if ( $isV11 ) {
457			$out .= $indent . Xml::element( 'origin', null, strval( $slot->getOrigin() ) ) . "\n";
458		}
459
460		$contentModel = $slot->getModel();
461		$contentHandler = MediaWikiServices::getInstance()
462			->getContentHandlerFactory()
463			->getContentHandler( $contentModel );
464		$contentFormat = $contentHandler->getDefaultFormat();
465
466		// XXX: The content format is only relevant when actually outputting serialized content.
467		// It should probably be an attribute on the text tag.
468		$out .= $indent . Xml::element( 'model', null, strval( $contentModel ) ) . "\n";
469		$out .= $indent . Xml::element( 'format', null, strval( $contentFormat ) ) . "\n";
470
471		$textAttributes = [
472			'bytes' => $this->invokeLenient(
473				function () use ( $slot ) {
474					return $slot->getSize();
475				},
476				'failed to determine size for slot ' . $slot->getRole() . ' of revision '
477				. $slot->getRevision()
478			) ?: '0'
479		];
480
481		if ( $isV11 ) {
482			$textAttributes['sha1'] = $this->invokeLenient(
483				function () use ( $slot ) {
484					return $slot->getSha1();
485				},
486				'failed to determine sha1 for slot ' . $slot->getRole() . ' of revision '
487				. $slot->getRevision()
488			) ?: '';
489		}
490
491		if ( $contentMode === self::WRITE_CONTENT ) {
492			$content = $this->invokeLenient(
493				function () use ( $slot ) {
494					return $slot->getContent();
495				},
496				'failed to load content for slot ' . $slot->getRole() . ' of revision '
497				. $slot->getRevision()
498			);
499
500			if ( $content === null ) {
501				$out .= $indent . Xml::element( 'text', $textAttributes ) . "\n";
502			} else {
503				$out .= $this->writeText( $content, $textAttributes, $indent );
504			}
505		} elseif ( $contentMode === self::WRITE_STUB_DELETED ) {
506			// write <text> placeholder tag
507			$textAttributes['deleted'] = 'deleted';
508			$out .= $indent . Xml::element( 'text', $textAttributes ) . "\n";
509		} else {
510			// write <text> stub tag
511			if ( $isV11 ) {
512				$textAttributes['location'] = $slot->getAddress();
513			}
514
515			if ( $isMain ) {
516				// Output the numerical text ID if possible, for backwards compatibility.
517				// Note that this is currently the ONLY reason we have a BlobStore here at all.
518				// When removing this line, check whether the BlobStore has become unused.
519				try {
520					// NOTE: this will only work for addresses of the form "tt:12345".
521					// If we want to support other kinds of addresses in the future,
522					// we will have to silently ignore failures here.
523					// For now, this fails for "tt:0", which is present in the WMF production
524					// database of of Juli 2019, due to data corruption.
525					$textId = $this->getBlobStore()->getTextIdFromAddress( $slot->getAddress() );
526				} catch ( InvalidArgumentException $ex ) {
527					MWDebug::warning( 'Bad content address for slot ' . $slot->getRole()
528						. ' of revision ' . $slot->getRevision() . ': ' . $ex->getMessage() );
529					$textId = 0;
530				}
531
532				if ( is_int( $textId ) ) {
533					$textAttributes['id'] = $textId;
534				}
535			}
536
537			$out .= $indent . Xml::element( 'text', $textAttributes ) . "\n";
538		}
539
540		if ( !$isMain ) {
541			$out .= '      ' . Xml::closeElement( 'content' ) . "\n";
542		}
543
544		return $out;
545	}
546
547	/**
548	 * @param Content $content
549	 * @param string[] $textAttributes
550	 * @param string $indent
551	 *
552	 * @return string
553	 */
554	private function writeText( Content $content, $textAttributes, $indent ) {
555		$out = '';
556
557		$contentHandler = $content->getContentHandler();
558		$contentFormat = $contentHandler->getDefaultFormat();
559
560		if ( $content instanceof TextContent ) {
561			// HACK: For text based models, bypass the serialization step. This allows extensions (like Flow)
562			// that use incompatible combinations of serialization format and content model.
563			$data = $content->getNativeData();
564		} else {
565			$data = $content->serialize( $contentFormat );
566		}
567
568		$data = $contentHandler->exportTransform( $data, $contentFormat );
569		$textAttributes['bytes'] = $size = strlen( $data ); // make sure to use the actual size
570		$textAttributes['xml:space'] = 'preserve';
571		$out .= $indent . Xml::elementClean( 'text', $textAttributes, strval( $data ) ) . "\n";
572
573		return $out;
574	}
575
576	/**
577	 * Dumps a "<logitem>" section on the output stream, with
578	 * data filled in from the given database row.
579	 *
580	 * @param object $row
581	 * @return string
582	 */
583	public function writeLogItem( $row ) {
584		$out = "  <logitem>\n";
585		$out .= "    " . Xml::element( 'id', null, strval( $row->log_id ) ) . "\n";
586
587		$out .= $this->writeTimestamp( $row->log_timestamp, "    " );
588
589		if ( $row->log_deleted & LogPage::DELETED_USER ) {
590			$out .= "    " . Xml::element( 'contributor', [ 'deleted' => 'deleted' ] ) . "\n";
591		} else {
592			$out .= $this->writeContributor( $row->log_user, $row->user_name, "    " );
593		}
594
595		if ( $row->log_deleted & LogPage::DELETED_COMMENT ) {
596			$out .= "    " . Xml::element( 'comment', [ 'deleted' => 'deleted' ] ) . "\n";
597		} else {
598			$comment = CommentStore::getStore()->getComment( 'log_comment', $row )->text;
599			if ( $comment != '' ) {
600				$out .= "    " . Xml::elementClean( 'comment', null, strval( $comment ) ) . "\n";
601			}
602		}
603
604		$out .= "    " . Xml::element( 'type', null, strval( $row->log_type ) ) . "\n";
605		$out .= "    " . Xml::element( 'action', null, strval( $row->log_action ) ) . "\n";
606
607		if ( $row->log_deleted & LogPage::DELETED_ACTION ) {
608			$out .= "    " . Xml::element( 'text', [ 'deleted' => 'deleted' ] ) . "\n";
609		} else {
610			$title = Title::makeTitle( $row->log_namespace, $row->log_title );
611			$out .= "    " . Xml::elementClean( 'logtitle', null, self::canonicalTitle( $title ) ) . "\n";
612			$out .= "    " . Xml::elementClean( 'params',
613				[ 'xml:space' => 'preserve' ],
614				strval( $row->log_params ) ) . "\n";
615		}
616
617		$out .= "  </logitem>\n";
618
619		return $out;
620	}
621
622	/**
623	 * @param string $timestamp
624	 * @param string $indent Default to six spaces
625	 * @return string
626	 */
627	public function writeTimestamp( $timestamp, $indent = "      " ) {
628		$ts = wfTimestamp( TS_ISO_8601, $timestamp );
629		return $indent . Xml::element( 'timestamp', null, $ts ) . "\n";
630	}
631
632	/**
633	 * @param int $id
634	 * @param string $text
635	 * @param string $indent Default to six spaces
636	 * @return string
637	 */
638	public function writeContributor( $id, $text, $indent = "      " ) {
639		$out = $indent . "<contributor>\n";
640		if ( $id || !IPUtils::isValid( $text ) ) {
641			$out .= $indent . "  " . Xml::elementClean( 'username', null, strval( $text ) ) . "\n";
642			$out .= $indent . "  " . Xml::element( 'id', null, strval( $id ) ) . "\n";
643		} else {
644			$out .= $indent . "  " . Xml::elementClean( 'ip', null, strval( $text ) ) . "\n";
645		}
646		$out .= $indent . "</contributor>\n";
647		return $out;
648	}
649
650	/**
651	 * Warning! This data is potentially inconsistent. :(
652	 * @param object $row
653	 * @param bool $dumpContents
654	 * @return string
655	 */
656	public function writeUploads( $row, $dumpContents = false ) {
657		if ( $row->page_namespace == NS_FILE ) {
658			$img = MediaWikiServices::getInstance()->getRepoGroup()->getLocalRepo()
659				->newFile( $row->page_title );
660			if ( $img && $img->exists() ) {
661				$out = '';
662				foreach ( array_reverse( $img->getHistory() ) as $ver ) {
663					$out .= $this->writeUpload( $ver, $dumpContents );
664				}
665				$out .= $this->writeUpload( $img, $dumpContents );
666				return $out;
667			}
668		}
669		return '';
670	}
671
672	/**
673	 * @param File $file
674	 * @param bool $dumpContents
675	 * @return string
676	 */
677	private function writeUpload( $file, $dumpContents = false ) {
678		if ( $file->isOld() ) {
679			/** @var OldLocalFile $file */
680			'@phan-var OldLocalFile $file';
681			$archiveName = "      " .
682				Xml::element( 'archivename', null, $file->getArchiveName() ) . "\n";
683		} else {
684			$archiveName = '';
685		}
686		if ( $dumpContents ) {
687			$be = $file->getRepo()->getBackend();
688			# Dump file as base64
689			# Uses only XML-safe characters, so does not need escaping
690			# @todo Too bad this loads the contents into memory (script might swap)
691			$contents = '      <contents encoding="base64">' .
692				chunk_split( base64_encode(
693					$be->getFileContents( [ 'src' => $file->getPath() ] ) ) ) .
694				"      </contents>\n";
695		} else {
696			$contents = '';
697		}
698		if ( $file->isDeleted( File::DELETED_COMMENT ) ) {
699			$comment = Xml::element( 'comment', [ 'deleted' => 'deleted' ] );
700		} else {
701			$comment = Xml::elementClean( 'comment', null, strval( $file->getDescription() ) );
702		}
703		return "    <upload>\n" .
704			$this->writeTimestamp( $file->getTimestamp() ) .
705			$this->writeContributor( $file->getUser( 'id' ), $file->getUser( 'text' ) ) .
706			"      " . $comment . "\n" .
707			"      " . Xml::element( 'filename', null, $file->getName() ) . "\n" .
708			$archiveName .
709			"      " . Xml::element( 'src', null, $file->getCanonicalUrl() ) . "\n" .
710			"      " . Xml::element( 'size', null, $file->getSize() ) . "\n" .
711			"      " . Xml::element( 'sha1base36', null, $file->getSha1() ) . "\n" .
712			"      " . Xml::element( 'rel', null, $file->getRel() ) . "\n" .
713			$contents .
714			"    </upload>\n";
715	}
716
717	/**
718	 * Return prefixed text form of title, but using the content language's
719	 * canonical namespace. This skips any special-casing such as gendered
720	 * user namespaces -- which while useful, are not yet listed in the
721	 * XML "<siteinfo>" data so are unsafe in export.
722	 *
723	 * @param Title $title
724	 * @return string
725	 * @since 1.18
726	 */
727	public static function canonicalTitle( Title $title ) {
728		if ( $title->isExternal() ) {
729			return $title->getPrefixedText();
730		}
731
732		$prefix = MediaWikiServices::getInstance()->getContentLanguage()->
733			getFormattedNsText( $title->getNamespace() );
734
735		// @todo Emit some kind of warning to the user if $title->getNamespace() !==
736		// NS_MAIN and $prefix === '' (viz. pages in an unregistered namespace)
737
738		if ( $prefix !== '' ) {
739			$prefix .= ':';
740		}
741
742		return $prefix . $title->getText();
743	}
744}
745