1<?php
2declare( strict_types = 1 );
3
4namespace Wikimedia\Parsoid\Ext\Pre;
5
6use DOMDocument;
7use Wikimedia\Parsoid\Ext\DOMDataUtils;
8use Wikimedia\Parsoid\Ext\ExtensionModule;
9use Wikimedia\Parsoid\Ext\ExtensionTagHandler;
10use Wikimedia\Parsoid\Ext\ParsoidExtensionAPI;
11use Wikimedia\Parsoid\Ext\Utils;
12use Wikimedia\Parsoid\Utils\DOMCompat;
13
14/**
15 * The `<pre>` extension tag shadows the html pre tag, but has different
16 * semantics.  It treats anything inside it as plaintext.
17 */
18class Pre extends ExtensionTagHandler implements ExtensionModule {
19
20	/** @inheritDoc */
21	public function getConfig(): array {
22		return [
23			'name' => '<pre>',
24			'tags' => [
25				[
26					'name' => 'pre',
27					'handler' => self::class,
28				]
29			]
30		];
31	}
32
33	/** @inheritDoc */
34	public function sourceToDom(
35		ParsoidExtensionAPI $extApi, string $txt, array $extArgs
36	): DOMDocument {
37		$doc = $extApi->htmlToDom( '' ); // Empty doc
38		$pre = $doc->createElement( 'pre' );
39
40		$extApi->sanitizeArgs( $pre, $extArgs );
41		DOMDataUtils::getDataParsoid( $pre )->stx = 'html';
42
43		// Support nowikis in pre.  Do this before stripping newlines, see test,
44		// "<pre> with <nowiki> inside (compatibility with 1.6 and earlier)"
45		$txt = preg_replace( '/<nowiki\s*>(.*?)<\/nowiki\s*>/s', '$1', $txt );
46
47		// Strip leading newline to match legacy php parser.  This is probably because
48		// it doesn't do xml serialization accounting for `newlineStrippingElements`
49		// Of course, this leads to indistinguishability between n=0 and n=1
50		// newlines, but that only seems to affect parserTests output.  Rendering
51		// is the same, and the newline is preserved for rt in the `extSrc`.
52		$txt = preg_replace( '/^\n/', '', $txt, 1 );
53
54		// `extSrc` will take care of rt'ing these
55		$txt = Utils::decodeWtEntities( $txt );
56
57		$pre->appendChild( $doc->createTextNode( $txt ) );
58		DOMCompat::getBody( $doc )->appendChild( $pre );
59
60		return $doc;
61	}
62
63}
64