1<?php 2declare( strict_types = 1 ); 3 4namespace Wikimedia\Parsoid\Ext\Pre; 5 6use DOMDocument; 7use Wikimedia\Parsoid\Ext\DOMDataUtils; 8use Wikimedia\Parsoid\Ext\ExtensionModule; 9use Wikimedia\Parsoid\Ext\ExtensionTagHandler; 10use Wikimedia\Parsoid\Ext\ParsoidExtensionAPI; 11use Wikimedia\Parsoid\Ext\Utils; 12use Wikimedia\Parsoid\Utils\DOMCompat; 13 14/** 15 * The `<pre>` extension tag shadows the html pre tag, but has different 16 * semantics. It treats anything inside it as plaintext. 17 */ 18class Pre extends ExtensionTagHandler implements ExtensionModule { 19 20 /** @inheritDoc */ 21 public function getConfig(): array { 22 return [ 23 'name' => '<pre>', 24 'tags' => [ 25 [ 26 'name' => 'pre', 27 'handler' => self::class, 28 ] 29 ] 30 ]; 31 } 32 33 /** @inheritDoc */ 34 public function sourceToDom( 35 ParsoidExtensionAPI $extApi, string $txt, array $extArgs 36 ): DOMDocument { 37 $doc = $extApi->htmlToDom( '' ); // Empty doc 38 $pre = $doc->createElement( 'pre' ); 39 40 $extApi->sanitizeArgs( $pre, $extArgs ); 41 DOMDataUtils::getDataParsoid( $pre )->stx = 'html'; 42 43 // Support nowikis in pre. Do this before stripping newlines, see test, 44 // "<pre> with <nowiki> inside (compatibility with 1.6 and earlier)" 45 $txt = preg_replace( '/<nowiki\s*>(.*?)<\/nowiki\s*>/s', '$1', $txt ); 46 47 // Strip leading newline to match legacy php parser. This is probably because 48 // it doesn't do xml serialization accounting for `newlineStrippingElements` 49 // Of course, this leads to indistinguishability between n=0 and n=1 50 // newlines, but that only seems to affect parserTests output. Rendering 51 // is the same, and the newline is preserved for rt in the `extSrc`. 52 $txt = preg_replace( '/^\n/', '', $txt, 1 ); 53 54 // `extSrc` will take care of rt'ing these 55 $txt = Utils::decodeWtEntities( $txt ); 56 57 $pre->appendChild( $doc->createTextNode( $txt ) ); 58 DOMCompat::getBody( $doc )->appendChild( $pre ); 59 60 return $doc; 61 } 62 63} 64