1<?php 2 3namespace MediaWiki\Tests\Maintenance; 4 5use CommentStoreComment; 6use Content; 7use ContentHandler; 8use DOMDocument; 9use ExecutableFinder; 10use MediaWiki\MediaWikiServices; 11use MediaWiki\Revision\RevisionAccessException; 12use MediaWiki\Revision\RevisionRecord; 13use MediaWiki\Storage\SlotRecord; 14use MediaWikiLangTestCase; 15use MWException; 16use WikiExporter; 17use WikiPage; 18 19/** 20 * Base TestCase for dumps 21 */ 22abstract class DumpTestCase extends MediaWikiLangTestCase { 23 24 /** 25 * exception to be rethrown once in sound PHPUnit surrounding 26 * 27 * As the current MediaWikiIntegrationTestCase::run is not robust enough to recover 28 * from thrown exceptions directly, we cannot throw frow within 29 * self::addDBData, although it would be appropriate. Hence, we catch the 30 * exception and store it until we are in setUp and may finally rethrow 31 * the exception without crashing the test suite. 32 * 33 * @var \Exception|null 34 */ 35 protected $exceptionFromAddDBData = null; 36 37 /** @var bool|null Whether the 'gzip' utility is available */ 38 protected static $hasGzip = null; 39 40 /** 41 * Skip the test if 'gzip' is not in $PATH. 42 * 43 * @return bool 44 */ 45 protected function checkHasGzip() { 46 if ( self::$hasGzip === null ) { 47 self::$hasGzip = ( ExecutableFinder::findInDefaultPaths( 'gzip' ) !== false ); 48 } 49 50 if ( !self::$hasGzip ) { 51 $this->markTestSkipped( "Skip test, requires the gzip utility in PATH" ); 52 } 53 54 return self::$hasGzip; 55 } 56 57 /** 58 * Adds a revision to a page, while returning the resuting revision's id text id. 59 * 60 * @param WikiPage $page Page to add the revision to 61 * @param string $text Revisions text 62 * @param string $summary Revisions summary 63 * @param string $model The model ID (defaults to wikitext) 64 * 65 * @throws MWException 66 * @return array 67 */ 68 protected function addRevision( 69 WikiPage $page, 70 $text, 71 $summary, 72 $model = CONTENT_MODEL_WIKITEXT 73 ) { 74 $contentHandler = ContentHandler::getForModelID( $model ); 75 $content = $contentHandler->unserializeContent( $text ); 76 77 $rev = $this->addMultiSlotRevision( $page, [ 'main' => $content ], $summary ); 78 79 if ( !$rev ) { 80 throw new MWException( "Could not create revision" ); 81 } 82 83 $text_id = $this->getSlotTextId( $rev->getSlot( SlotRecord::MAIN ) ); 84 return [ $rev->getId(), $text_id, $rev ]; 85 } 86 87 /** 88 * @param SlotRecord $slot 89 * 90 * @return string|null 91 */ 92 protected function getSlotText( SlotRecord $slot ) { 93 try { 94 return $slot->getContent()->serialize(); 95 } catch ( RevisionAccessException $ex ) { 96 return null; 97 } 98 } 99 100 /** 101 * @param SlotRecord $slot 102 * 103 * @return int 104 */ 105 protected function getSlotTextId( SlotRecord $slot ) { 106 return (int)preg_replace( '/^tt:/', '', $slot->getAddress() ); 107 } 108 109 /** 110 * @param SlotRecord $slot 111 * 112 * @return string 113 */ 114 protected function getSlotFormat( SlotRecord $slot ) { 115 $contentHandler = ContentHandler::getForModelID( $slot->getModel() ); 116 return $contentHandler->getDefaultFormat(); 117 } 118 119 /** 120 * Adds a revision to a page, while returning the resulting revision's id and text id. 121 * 122 * @param WikiPage $page Page to add the revision to 123 * @param Content[] $slots A mapping of slot names to Content objects 124 * @param string $summary Revisions summary 125 * 126 * @throws MWException 127 * @return RevisionRecord 128 */ 129 protected function addMultiSlotRevision( 130 WikiPage $page, 131 array $slots, 132 $summary 133 ) { 134 $slotRoleRegistry = MediaWikiServices::getInstance()->getSlotRoleRegistry(); 135 136 $updater = $page->newPageUpdater( $this->getTestUser()->getUser() ); 137 138 foreach ( $slots as $role => $content ) { 139 if ( !$slotRoleRegistry->isDefinedRole( $role ) ) { 140 $slotRoleRegistry->defineRoleWithModel( $role, $content->getModel() ); 141 } 142 143 $updater->setContent( $role, $content ); 144 } 145 146 $updater->saveRevision( CommentStoreComment::newUnsavedComment( trim( $summary ) ) ); 147 return $updater->getNewRevision(); 148 } 149 150 /** 151 * gunzips the given file and stores the result in the original file name 152 * 153 * @param string $fname Filename to read the gzipped data from and stored 154 * the gunzipped data into 155 */ 156 protected function gunzip( $fname ) { 157 $gzipped_contents = file_get_contents( $fname ); 158 if ( $gzipped_contents === false ) { 159 $this->fail( "Could not get contents of $fname" ); 160 } 161 162 $contents = gzdecode( $gzipped_contents ); 163 164 $this->assertEquals( 165 strlen( $contents ), 166 file_put_contents( $fname, $contents ), 167 '# bytes written' 168 ); 169 } 170 171 public static function setUpBeforeClass() : void { 172 parent::setUpBeforeClass(); 173 174 if ( !function_exists( 'libxml_set_external_entity_loader' ) ) { 175 return; 176 } 177 178 // The W3C is intentionally slow about returning schema files, 179 // see <https://www.w3.org/Help/Webmaster#slowdtd>. 180 // To work around that, we keep our own copies of the relevant schema files. 181 libxml_set_external_entity_loader( 182 static function ( $public, $system, $context ) { 183 switch ( $system ) { 184 // if more schema files are needed, add them here. 185 case 'http://www.w3.org/2001/xml.xsd': 186 $file = __DIR__ . '/xml.xsd'; 187 break; 188 default: 189 if ( is_file( $system ) ) { 190 $file = $system; 191 } else { 192 return null; 193 } 194 } 195 196 return $file; 197 } 198 ); 199 } 200 201 /** 202 * Default set up function. 203 * 204 * Reports errors from addDBData to PHPUnit 205 */ 206 protected function setUp() : void { 207 parent::setUp(); 208 209 // Check if any Exception is stored for rethrowing from addDBData 210 // @see self::exceptionFromAddDBData 211 if ( $this->exceptionFromAddDBData !== null ) { 212 throw $this->exceptionFromAddDBData; 213 } 214 } 215 216 /** 217 * Returns the path to the XML schema file for the given schema version. 218 * 219 * @param string|null $schemaVersion 220 * 221 * @return string 222 */ 223 protected function getXmlSchemaPath( $schemaVersion = null ) { 224 global $IP, $wgXmlDumpSchemaVersion; 225 226 $schemaVersion = $schemaVersion ?: $wgXmlDumpSchemaVersion; 227 228 return "$IP/docs/export-$schemaVersion.xsd"; 229 } 230 231 /** 232 * Checks for test output consisting only of lines containing ETA announcements 233 */ 234 protected function expectETAOutput() { 235 // Newer PHPUnits require assertion about the output using PHPUnit's own 236 // expectOutput[...] functions. However, the PHPUnit shipped prediactes 237 // do not allow to check /each/ line of the output using /readable/ REs. 238 // So we ... 239 240 // 1. ... add a dummy output checking to make PHPUnit not complain 241 // about unchecked test output 242 $this->expectOutputRegex( '//' ); 243 244 // 2. Do the real output checking on our own. 245 $lines = explode( "\n", $this->getActualOutput() ); 246 $this->assertGreaterThan( 1, count( $lines ), "Minimal lines of produced output" ); 247 $this->assertSame( '', array_pop( $lines ), "Output ends in LF" ); 248 $timestamp_re = "[0-9]{4}-[01][0-9]-[0-3][0-9] [0-2][0-9]:[0-5][0-9]:[0-6][0-9]"; 249 foreach ( $lines as $line ) { 250 $this->assertRegExp( 251 "/$timestamp_re: .* \(ID [0-9]+\) [0-9]* pages .*, [0-9]* revs .*, ETA/", 252 $line 253 ); 254 } 255 } 256 257 /** 258 * @param null|string $schemaVersion 259 * 260 * @return DumpAsserter 261 */ 262 protected function getDumpAsserter( $schemaVersion = null ) { 263 $schemaVersion = $schemaVersion ?: WikiExporter::schemaVersion(); 264 return new DumpAsserter( $schemaVersion ); 265 } 266 267 /** 268 * Checks an XML file against an XSD schema. 269 * @param string $fname 270 * @param string $schemaFile 271 */ 272 protected function assertDumpSchema( $fname, $schemaFile ) { 273 if ( !function_exists( 'libxml_use_internal_errors' ) ) { 274 // Would be nice to leave a warning somehow. 275 // We don't want to skip all of the test case that calls this, though. 276 $this->markAsRisky(); 277 return; 278 } 279 $xml = new DOMDocument(); 280 $this->assertTrue( $xml->load( $fname ), 281 "Opening temporary file $fname via DOMDocument failed" ); 282 283 // Don't throw 284 $oldLibXmlInternalErrors = libxml_use_internal_errors( true ); 285 286 // NOTE: if this reports "Invalid Schema", the schema may be referencing an external 287 // entity (typically, another schema) that needs to be mapped in the 288 // libxml_set_external_entity_loader callback defined in setUpBeforeClass() above! 289 // Or $schemaFile doesn't point to a schema file, or the schema is indeed just broken. 290 if ( !$xml->schemaValidate( $schemaFile ) ) { 291 $errorText = ''; 292 293 foreach ( libxml_get_errors() as $error ) { 294 $errorText .= "\nline {$error->line}: {$error->message}"; 295 } 296 297 libxml_clear_errors(); 298 299 $this->fail( 300 "Failed asserting that $fname conforms to the schema in $schemaFile:\n$errorText" 301 ); 302 } 303 304 libxml_use_internal_errors( $oldLibXmlInternalErrors ); 305 } 306 307} 308