1<?php
2
3namespace MediaWiki\Tests\Maintenance;
4
5use CommentStoreComment;
6use Content;
7use ContentHandler;
8use DOMDocument;
9use ExecutableFinder;
10use MediaWiki\MediaWikiServices;
11use MediaWiki\Revision\RevisionAccessException;
12use MediaWiki\Revision\RevisionRecord;
13use MediaWiki\Storage\SlotRecord;
14use MediaWikiLangTestCase;
15use MWException;
16use WikiExporter;
17use WikiPage;
18
19/**
20 * Base TestCase for dumps
21 */
22abstract class DumpTestCase extends MediaWikiLangTestCase {
23
24	/**
25	 * exception to be rethrown once in sound PHPUnit surrounding
26	 *
27	 * As the current MediaWikiIntegrationTestCase::run is not robust enough to recover
28	 * from thrown exceptions directly, we cannot throw frow within
29	 * self::addDBData, although it would be appropriate. Hence, we catch the
30	 * exception and store it until we are in setUp and may finally rethrow
31	 * the exception without crashing the test suite.
32	 *
33	 * @var \Exception|null
34	 */
35	protected $exceptionFromAddDBData = null;
36
37	/** @var bool|null Whether the 'gzip' utility is available */
38	protected static $hasGzip = null;
39
40	/**
41	 * Skip the test if 'gzip' is not in $PATH.
42	 *
43	 * @return bool
44	 */
45	protected function checkHasGzip() {
46		if ( self::$hasGzip === null ) {
47			self::$hasGzip = ( ExecutableFinder::findInDefaultPaths( 'gzip' ) !== false );
48		}
49
50		if ( !self::$hasGzip ) {
51			$this->markTestSkipped( "Skip test, requires the gzip utility in PATH" );
52		}
53
54		return self::$hasGzip;
55	}
56
57	/**
58	 * Adds a revision to a page, while returning the resuting revision's id text id.
59	 *
60	 * @param WikiPage $page Page to add the revision to
61	 * @param string $text Revisions text
62	 * @param string $summary Revisions summary
63	 * @param string $model The model ID (defaults to wikitext)
64	 *
65	 * @throws MWException
66	 * @return array
67	 */
68	protected function addRevision(
69		WikiPage $page,
70		$text,
71		$summary,
72		$model = CONTENT_MODEL_WIKITEXT
73	) {
74		$contentHandler = ContentHandler::getForModelID( $model );
75		$content = $contentHandler->unserializeContent( $text );
76
77		$rev = $this->addMultiSlotRevision( $page, [ 'main' => $content ], $summary );
78
79		if ( !$rev ) {
80			throw new MWException( "Could not create revision" );
81		}
82
83		$text_id = $this->getSlotTextId( $rev->getSlot( SlotRecord::MAIN ) );
84		return [ $rev->getId(), $text_id, $rev ];
85	}
86
87	/**
88	 * @param SlotRecord $slot
89	 *
90	 * @return string|null
91	 */
92	protected function getSlotText( SlotRecord $slot ) {
93		try {
94			return $slot->getContent()->serialize();
95		} catch ( RevisionAccessException $ex ) {
96			return null;
97		}
98	}
99
100	/**
101	 * @param SlotRecord $slot
102	 *
103	 * @return int
104	 */
105	protected function getSlotTextId( SlotRecord $slot ) {
106		return (int)preg_replace( '/^tt:/', '', $slot->getAddress() );
107	}
108
109	/**
110	 * @param SlotRecord $slot
111	 *
112	 * @return string
113	 */
114	protected function getSlotFormat( SlotRecord $slot ) {
115		$contentHandler = ContentHandler::getForModelID( $slot->getModel() );
116		return $contentHandler->getDefaultFormat();
117	}
118
119	/**
120	 * Adds a revision to a page, while returning the resulting revision's id and text id.
121	 *
122	 * @param WikiPage $page Page to add the revision to
123	 * @param Content[] $slots A mapping of slot names to Content objects
124	 * @param string $summary Revisions summary
125	 *
126	 * @throws MWException
127	 * @return RevisionRecord
128	 */
129	protected function addMultiSlotRevision(
130		WikiPage $page,
131		array $slots,
132		$summary
133	) {
134		$slotRoleRegistry = MediaWikiServices::getInstance()->getSlotRoleRegistry();
135
136		$updater = $page->newPageUpdater( $this->getTestUser()->getUser() );
137
138		foreach ( $slots as $role => $content ) {
139			if ( !$slotRoleRegistry->isDefinedRole( $role ) ) {
140				$slotRoleRegistry->defineRoleWithModel( $role, $content->getModel() );
141			}
142
143			$updater->setContent( $role, $content );
144		}
145
146		$updater->saveRevision( CommentStoreComment::newUnsavedComment( trim( $summary ) ) );
147		return $updater->getNewRevision();
148	}
149
150	/**
151	 * gunzips the given file and stores the result in the original file name
152	 *
153	 * @param string $fname Filename to read the gzipped data from and stored
154	 *   the gunzipped data into
155	 */
156	protected function gunzip( $fname ) {
157		$gzipped_contents = file_get_contents( $fname );
158		if ( $gzipped_contents === false ) {
159			$this->fail( "Could not get contents of $fname" );
160		}
161
162		$contents = gzdecode( $gzipped_contents );
163
164		$this->assertEquals(
165			strlen( $contents ),
166			file_put_contents( $fname, $contents ),
167			'# bytes written'
168		);
169	}
170
171	public static function setUpBeforeClass() : void {
172		parent::setUpBeforeClass();
173
174		if ( !function_exists( 'libxml_set_external_entity_loader' ) ) {
175			return;
176		}
177
178		// The W3C is intentionally slow about returning schema files,
179		// see <https://www.w3.org/Help/Webmaster#slowdtd>.
180		// To work around that, we keep our own copies of the relevant schema files.
181		libxml_set_external_entity_loader(
182			static function ( $public, $system, $context ) {
183				switch ( $system ) {
184					// if more schema files are needed, add them here.
185					case 'http://www.w3.org/2001/xml.xsd':
186						$file = __DIR__ . '/xml.xsd';
187						break;
188					default:
189						if ( is_file( $system ) ) {
190							$file = $system;
191						} else {
192							return null;
193						}
194				}
195
196				return $file;
197			}
198		);
199	}
200
201	/**
202	 * Default set up function.
203	 *
204	 * Reports errors from addDBData to PHPUnit
205	 */
206	protected function setUp() : void {
207		parent::setUp();
208
209		// Check if any Exception is stored for rethrowing from addDBData
210		// @see self::exceptionFromAddDBData
211		if ( $this->exceptionFromAddDBData !== null ) {
212			throw $this->exceptionFromAddDBData;
213		}
214	}
215
216	/**
217	 * Returns the path to the XML schema file for the given schema version.
218	 *
219	 * @param string|null $schemaVersion
220	 *
221	 * @return string
222	 */
223	protected function getXmlSchemaPath( $schemaVersion = null ) {
224		global $IP, $wgXmlDumpSchemaVersion;
225
226		$schemaVersion = $schemaVersion ?: $wgXmlDumpSchemaVersion;
227
228		return "$IP/docs/export-$schemaVersion.xsd";
229	}
230
231	/**
232	 * Checks for test output consisting only of lines containing ETA announcements
233	 */
234	protected function expectETAOutput() {
235		// Newer PHPUnits require assertion about the output using PHPUnit's own
236		// expectOutput[...] functions. However, the PHPUnit shipped prediactes
237		// do not allow to check /each/ line of the output using /readable/ REs.
238		// So we ...
239
240		// 1. ... add a dummy output checking to make PHPUnit not complain
241		//    about unchecked test output
242		$this->expectOutputRegex( '//' );
243
244		// 2. Do the real output checking on our own.
245		$lines = explode( "\n", $this->getActualOutput() );
246		$this->assertGreaterThan( 1, count( $lines ), "Minimal lines of produced output" );
247		$this->assertSame( '', array_pop( $lines ), "Output ends in LF" );
248		$timestamp_re = "[0-9]{4}-[01][0-9]-[0-3][0-9] [0-2][0-9]:[0-5][0-9]:[0-6][0-9]";
249		foreach ( $lines as $line ) {
250			$this->assertRegExp(
251				"/$timestamp_re: .* \(ID [0-9]+\) [0-9]* pages .*, [0-9]* revs .*, ETA/",
252				$line
253			);
254		}
255	}
256
257	/**
258	 * @param null|string $schemaVersion
259	 *
260	 * @return DumpAsserter
261	 */
262	protected function getDumpAsserter( $schemaVersion = null ) {
263		$schemaVersion = $schemaVersion ?: WikiExporter::schemaVersion();
264		return new DumpAsserter( $schemaVersion );
265	}
266
267	/**
268	 * Checks an XML file against an XSD schema.
269	 * @param string $fname
270	 * @param string $schemaFile
271	 */
272	protected function assertDumpSchema( $fname, $schemaFile ) {
273		if ( !function_exists( 'libxml_use_internal_errors' ) ) {
274			// Would be nice to leave a warning somehow.
275			// We don't want to skip all of the test case that calls this, though.
276			$this->markAsRisky();
277			return;
278		}
279		$xml = new DOMDocument();
280		$this->assertTrue( $xml->load( $fname ),
281			"Opening temporary file $fname via DOMDocument failed" );
282
283		// Don't throw
284		$oldLibXmlInternalErrors = libxml_use_internal_errors( true );
285
286		// NOTE: if this reports "Invalid Schema", the schema may be referencing an external
287		// entity (typically, another schema) that needs to be mapped in the
288		// libxml_set_external_entity_loader callback defined in setUpBeforeClass() above!
289		// Or $schemaFile doesn't point to a schema file, or the schema is indeed just broken.
290		if ( !$xml->schemaValidate( $schemaFile ) ) {
291			$errorText = '';
292
293			foreach ( libxml_get_errors() as $error ) {
294				$errorText .= "\nline {$error->line}: {$error->message}";
295			}
296
297			libxml_clear_errors();
298
299			$this->fail(
300				"Failed asserting that $fname conforms to the schema in $schemaFile:\n$errorText"
301			);
302		}
303
304		libxml_use_internal_errors( $oldLibXmlInternalErrors );
305	}
306
307}
308