1<?php
2declare( strict_types = 1 );
3
4namespace Wikimedia\Parsoid\Html2Wt\DOMHandlers;
5
6use DOMElement;
7use DOMNode;
8use stdClass;
9use Wikimedia\Parsoid\Config\WikitextConstants;
10use Wikimedia\Parsoid\Html2Wt\SerializerState;
11use Wikimedia\Parsoid\Utils\DOMDataUtils;
12use Wikimedia\Parsoid\Utils\DOMUtils;
13use Wikimedia\Parsoid\Utils\WTUtils;
14
15class PHandler extends DOMHandler {
16
17	public function __construct() {
18		// Counterintuitive but seems right.
19		// Otherwise the generated wikitext will parse as an indent-pre
20		// escapeWikitext nowiking will deal with leading space for content
21		// inside the p-tag, but forceSOL suppresses whitespace before the p-tag.
22		parent::__construct( true );
23	}
24
25	/** @inheritDoc */
26	public function handle(
27		DOMElement $node, SerializerState $state, bool $wrapperUnmodified = false
28	): ?DOMNode {
29		// XXX: Handle single-line mode by switching to HTML handler!
30		$state->serializeChildren( $node );
31		return $node->nextSibling;
32	}
33
34	/** @inheritDoc */
35	public function before( DOMElement $node, DOMNode $otherNode, SerializerState $state ): array {
36		$otherNodeName = $otherNode->nodeName;
37		$tableCellOrBody = [ 'td', 'th', 'body' ];
38		if ( $node->parentNode === $otherNode
39			&& ( DOMUtils::isListItem( $otherNode ) || in_array( $otherNodeName, $tableCellOrBody, true ) )
40		) {
41			if ( in_array( $otherNodeName, $tableCellOrBody, true ) ) {
42				return [ 'min' => 0, 'max' => 1 ];
43			} else {
44				return [ 'min' => 0, 'max' => 0 ];
45			}
46		} elseif ( ( $otherNode === DOMUtils::previousNonDeletedSibling( $node )
47				// p-p transition
48				&& $otherNodeName === 'p'
49				&& $otherNode instanceof DOMElement // for static analyzers
50				&& ( DOMDataUtils::getDataParsoid( $otherNode )->stx ?? null ) !== 'html' )
51			|| ( self::treatAsPPTransition( $otherNode )
52				&& $otherNode === DOMUtils::previousNonSepSibling( $node )
53				// A new wikitext line could start at this P-tag. We have to figure out
54				// if 'node' needs a separation of 2 newlines from that P-tag. Examine
55				// previous siblings of 'node' to see if we emitted a block tag
56				// there => we can make do with 1 newline separator instead of 2
57				// before the P-tag.
58				&& !$this->currWikitextLineHasBlockNode( $state->currLine, $otherNode ) )
59		) {
60			return [ 'min' => 2, 'max' => 2 ];
61		} elseif ( self::treatAsPPTransition( $otherNode )
62			|| ( DOMUtils::isBlockNode( $otherNode )
63				&& $otherNode->nodeName !== 'blockquote'
64				&& $node->parentNode === $otherNode )
65			// new p-node added after sol-transparent wikitext should always
66			// get serialized onto a new wikitext line.
67			|| ( WTUtils::emitsSolTransparentSingleLineWT( $otherNode )
68				&& WTUtils::isNewElt( $node ) )
69		) {
70			if ( !DOMUtils::hasAncestorOfName( $otherNode, 'figcaption' ) ) {
71				return [ 'min' => 1, 'max' => 2 ];
72			} else {
73				return [ 'min' => 0, 'max' => 2 ];
74			}
75		} else {
76			return [ 'min' => 0, 'max' => 2 ];
77		}
78	}
79
80	/** @inheritDoc */
81	public function after( DOMElement $node, DOMNode $otherNode, SerializerState $state ): array {
82		if ( !( $node->lastChild && $node->lastChild->nodeName === 'br' )
83			&& self::isPPTransition( $otherNode )
84			// A new wikitext line could start at this P-tag. We have to figure out
85			// if 'node' needs a separation of 2 newlines from that P-tag. Examine
86			// previous siblings of 'node' to see if we emitted a block tag
87			// there => we can make do with 1 newline separator instead of 2
88			// before the P-tag.
89			 && !$this->currWikitextLineHasBlockNode( $state->currLine, $node, true )
90			// Since we are going to emit newlines before the other P-tag, we know it
91			// is going to start a new wikitext line. We have to figure out if 'node'
92			// needs a separation of 2 newlines from that P-tag. Examine following
93			// siblings of 'node' to see if we might emit a block tag there => we can
94			// make do with 1 newline separator instead of 2 before the P-tag.
95			 && !$this->newWikitextLineMightHaveBlockNode( $otherNode )
96		) {
97			return [ 'min' => 2, 'max' => 2 ];
98		} elseif ( DOMUtils::isBody( $otherNode ) ) {
99			return [ 'min' => 0, 'max' => 2 ];
100		} elseif ( self::treatAsPPTransition( $otherNode )
101			|| ( DOMUtils::isBlockNode( $otherNode )
102				&& $otherNode->nodeName !== 'blockquote'
103				&& $node->parentNode === $otherNode )
104		) {
105			if ( !DOMUtils::hasAncestorOfName( $otherNode, 'figcaption' ) ) {
106				return [ 'min' => 1, 'max' => 2 ];
107			} else {
108				return [ 'min' => 0, 'max' => 2 ];
109			}
110		} else {
111			return [ 'min' => 0, 'max' => 2 ];
112		}
113	}
114
115	// IMPORTANT: Do not start walking from line.firstNode forward. Always
116	// walk backward from node. This is because in selser mode, it looks like
117	// line.firstNode doesn't always correspond to the wikitext line that is
118	// being processed since the previous emitted node might have been an unmodified
119	// DOM node that generated multiple wikitext lines.
120
121	/**
122	 * @param stdClass|null $line See SerializerState::$currLine
123	 * @param DOMNode $node
124	 * @param bool $skipNode
125	 * @return bool
126	 */
127	private function currWikitextLineHasBlockNode(
128		?stdClass $line, DOMNode $node, bool $skipNode = false
129	): bool {
130		$parentNode = $node->parentNode;
131		if ( !$skipNode ) {
132			// If this node could break this wikitext line and emit
133			// non-ws content on a new line, the P-tag will be on that new line
134			// with text content that needs P-wrapping.
135			// PORT-FIXME: does regex whitespace semantics change matter?
136			if ( preg_match( '/\n[^\s]/', $node->textContent ) ) {
137				return false;
138			}
139		}
140		$node = DOMUtils::previousNonDeletedSibling( $node );
141		while ( !$node || !DOMUtils::atTheTop( $node ) ) {
142			while ( $node ) {
143				// If we hit a block node that will render on the same line, we are done!
144				if ( WTUtils::isBlockNodeWithVisibleWT( $node ) ) {
145					return true;
146				}
147
148				// If this node could break this wikitext line, we are done.
149				// This is conservative because textContent could be looking at descendents
150				// of 'node' that may not have been serialized yet. But this is safe.
151				if ( preg_match( '/\n/', $node->textContent ) ) {
152					return false;
153				}
154
155				$node = DOMUtils::previousNonDeletedSibling( $node );
156
157				// Don't go past the current line in any case.
158				if ( !empty( $line->firstNode ) && $node &&
159					DOMUtils::isAncestorOf( $node, $line->firstNode )
160				) {
161					return false;
162				}
163			}
164			$node = $parentNode;
165			$parentNode = $node->parentNode;
166		}
167
168		return false;
169	}
170
171	/**
172	 * @param DOMNode $node
173	 * @return bool
174	 */
175	private function newWikitextLineMightHaveBlockNode( DOMNode $node ): bool {
176		$node = DOMUtils::nextNonDeletedSibling( $node );
177		while ( $node ) {
178			if ( DOMUtils::isText( $node ) ) {
179				// If this node will break this wikitext line, we are done!
180				if ( preg_match( '/\n/', $node->nodeValue ) ) {
181					return false;
182				}
183			} elseif ( DOMUtils::isElt( $node ) ) {
184				// These tags will always serialize onto a new line
185				if (
186					isset( WikitextConstants::$HTMLTagsRequiringSOLContext[$node->nodeName] ) &&
187					!WTUtils::isLiteralHTMLNode( $node )
188				) {
189					return false;
190				}
191
192				// We hit a block node that will render on the same line
193				if ( WTUtils::isBlockNodeWithVisibleWT( $node ) ) {
194					return true;
195				}
196
197				// Go conservative
198				return false;
199			}
200
201			$node = DOMUtils::nextNonDeletedSibling( $node );
202		}
203		return false;
204	}
205
206	/**
207	 * Node is being serialized before/after a P-tag.
208	 * While computing newline constraints, this function tests
209	 * if node should be treated as a P-wrapped node.
210	 * @param DOMNode $node
211	 * @return bool
212	 */
213	private static function treatAsPPTransition( DOMNode $node ): bool {
214		// Treat text/p similar to p/p transition
215		// If an element, it should not be a:
216		// * block node or literal HTML node
217		// * template wrapper
218		// * mw:Includes meta or a SOL-transparent link
219		return DOMUtils::isText( $node )
220			|| ( !DOMUtils::isBody( $node )
221				&& !DOMUtils::isBlockNode( $node )
222				&& !WTUtils::isLiteralHTMLNode( $node )
223				&& !WTUtils::isEncapsulationWrapper( $node )
224				&& !WTUtils::isSolTransparentLink( $node )
225				&& !DOMUtils::matchTypeOf( $node, '#^mw:Includes/#' ) );
226	}
227
228	/**
229	 * Test if $node is a P-wrapped node or should be treated as one.
230	 *
231	 * @param DOMNode|null $node
232	 * @return bool
233	 */
234	public static function isPPTransition( ?DOMNode $node ): bool {
235		if ( !$node ) {
236			return false;
237		}
238		return $node->nodeName === 'p'
239				&& $node instanceof DOMElement // for static analyzers
240				&& ( DOMDataUtils::getDataParsoid( $node )->stx ?? '' ) !== 'html'
241			|| self::treatAsPPTransition( $node );
242	}
243
244}
245