1<?php 2declare( strict_types = 1 ); 3 4namespace Wikimedia\Parsoid\Html2Wt\DOMHandlers; 5 6use DOMElement; 7use DOMNode; 8use stdClass; 9use Wikimedia\Parsoid\Config\WikitextConstants; 10use Wikimedia\Parsoid\Html2Wt\SerializerState; 11use Wikimedia\Parsoid\Utils\DOMDataUtils; 12use Wikimedia\Parsoid\Utils\DOMUtils; 13use Wikimedia\Parsoid\Utils\WTUtils; 14 15class PHandler extends DOMHandler { 16 17 public function __construct() { 18 // Counterintuitive but seems right. 19 // Otherwise the generated wikitext will parse as an indent-pre 20 // escapeWikitext nowiking will deal with leading space for content 21 // inside the p-tag, but forceSOL suppresses whitespace before the p-tag. 22 parent::__construct( true ); 23 } 24 25 /** @inheritDoc */ 26 public function handle( 27 DOMElement $node, SerializerState $state, bool $wrapperUnmodified = false 28 ): ?DOMNode { 29 // XXX: Handle single-line mode by switching to HTML handler! 30 $state->serializeChildren( $node ); 31 return $node->nextSibling; 32 } 33 34 /** @inheritDoc */ 35 public function before( DOMElement $node, DOMNode $otherNode, SerializerState $state ): array { 36 $otherNodeName = $otherNode->nodeName; 37 $tableCellOrBody = [ 'td', 'th', 'body' ]; 38 if ( $node->parentNode === $otherNode 39 && ( DOMUtils::isListItem( $otherNode ) || in_array( $otherNodeName, $tableCellOrBody, true ) ) 40 ) { 41 if ( in_array( $otherNodeName, $tableCellOrBody, true ) ) { 42 return [ 'min' => 0, 'max' => 1 ]; 43 } else { 44 return [ 'min' => 0, 'max' => 0 ]; 45 } 46 } elseif ( ( $otherNode === DOMUtils::previousNonDeletedSibling( $node ) 47 // p-p transition 48 && $otherNodeName === 'p' 49 && $otherNode instanceof DOMElement // for static analyzers 50 && ( DOMDataUtils::getDataParsoid( $otherNode )->stx ?? null ) !== 'html' ) 51 || ( self::treatAsPPTransition( $otherNode ) 52 && $otherNode === DOMUtils::previousNonSepSibling( $node ) 53 // A new wikitext line could start at this P-tag. We have to figure out 54 // if 'node' needs a separation of 2 newlines from that P-tag. Examine 55 // previous siblings of 'node' to see if we emitted a block tag 56 // there => we can make do with 1 newline separator instead of 2 57 // before the P-tag. 58 && !$this->currWikitextLineHasBlockNode( $state->currLine, $otherNode ) ) 59 ) { 60 return [ 'min' => 2, 'max' => 2 ]; 61 } elseif ( self::treatAsPPTransition( $otherNode ) 62 || ( DOMUtils::isBlockNode( $otherNode ) 63 && $otherNode->nodeName !== 'blockquote' 64 && $node->parentNode === $otherNode ) 65 // new p-node added after sol-transparent wikitext should always 66 // get serialized onto a new wikitext line. 67 || ( WTUtils::emitsSolTransparentSingleLineWT( $otherNode ) 68 && WTUtils::isNewElt( $node ) ) 69 ) { 70 if ( !DOMUtils::hasAncestorOfName( $otherNode, 'figcaption' ) ) { 71 return [ 'min' => 1, 'max' => 2 ]; 72 } else { 73 return [ 'min' => 0, 'max' => 2 ]; 74 } 75 } else { 76 return [ 'min' => 0, 'max' => 2 ]; 77 } 78 } 79 80 /** @inheritDoc */ 81 public function after( DOMElement $node, DOMNode $otherNode, SerializerState $state ): array { 82 if ( !( $node->lastChild && $node->lastChild->nodeName === 'br' ) 83 && self::isPPTransition( $otherNode ) 84 // A new wikitext line could start at this P-tag. We have to figure out 85 // if 'node' needs a separation of 2 newlines from that P-tag. Examine 86 // previous siblings of 'node' to see if we emitted a block tag 87 // there => we can make do with 1 newline separator instead of 2 88 // before the P-tag. 89 && !$this->currWikitextLineHasBlockNode( $state->currLine, $node, true ) 90 // Since we are going to emit newlines before the other P-tag, we know it 91 // is going to start a new wikitext line. We have to figure out if 'node' 92 // needs a separation of 2 newlines from that P-tag. Examine following 93 // siblings of 'node' to see if we might emit a block tag there => we can 94 // make do with 1 newline separator instead of 2 before the P-tag. 95 && !$this->newWikitextLineMightHaveBlockNode( $otherNode ) 96 ) { 97 return [ 'min' => 2, 'max' => 2 ]; 98 } elseif ( DOMUtils::isBody( $otherNode ) ) { 99 return [ 'min' => 0, 'max' => 2 ]; 100 } elseif ( self::treatAsPPTransition( $otherNode ) 101 || ( DOMUtils::isBlockNode( $otherNode ) 102 && $otherNode->nodeName !== 'blockquote' 103 && $node->parentNode === $otherNode ) 104 ) { 105 if ( !DOMUtils::hasAncestorOfName( $otherNode, 'figcaption' ) ) { 106 return [ 'min' => 1, 'max' => 2 ]; 107 } else { 108 return [ 'min' => 0, 'max' => 2 ]; 109 } 110 } else { 111 return [ 'min' => 0, 'max' => 2 ]; 112 } 113 } 114 115 // IMPORTANT: Do not start walking from line.firstNode forward. Always 116 // walk backward from node. This is because in selser mode, it looks like 117 // line.firstNode doesn't always correspond to the wikitext line that is 118 // being processed since the previous emitted node might have been an unmodified 119 // DOM node that generated multiple wikitext lines. 120 121 /** 122 * @param stdClass|null $line See SerializerState::$currLine 123 * @param DOMNode $node 124 * @param bool $skipNode 125 * @return bool 126 */ 127 private function currWikitextLineHasBlockNode( 128 ?stdClass $line, DOMNode $node, bool $skipNode = false 129 ): bool { 130 $parentNode = $node->parentNode; 131 if ( !$skipNode ) { 132 // If this node could break this wikitext line and emit 133 // non-ws content on a new line, the P-tag will be on that new line 134 // with text content that needs P-wrapping. 135 // PORT-FIXME: does regex whitespace semantics change matter? 136 if ( preg_match( '/\n[^\s]/', $node->textContent ) ) { 137 return false; 138 } 139 } 140 $node = DOMUtils::previousNonDeletedSibling( $node ); 141 while ( !$node || !DOMUtils::atTheTop( $node ) ) { 142 while ( $node ) { 143 // If we hit a block node that will render on the same line, we are done! 144 if ( WTUtils::isBlockNodeWithVisibleWT( $node ) ) { 145 return true; 146 } 147 148 // If this node could break this wikitext line, we are done. 149 // This is conservative because textContent could be looking at descendents 150 // of 'node' that may not have been serialized yet. But this is safe. 151 if ( preg_match( '/\n/', $node->textContent ) ) { 152 return false; 153 } 154 155 $node = DOMUtils::previousNonDeletedSibling( $node ); 156 157 // Don't go past the current line in any case. 158 if ( !empty( $line->firstNode ) && $node && 159 DOMUtils::isAncestorOf( $node, $line->firstNode ) 160 ) { 161 return false; 162 } 163 } 164 $node = $parentNode; 165 $parentNode = $node->parentNode; 166 } 167 168 return false; 169 } 170 171 /** 172 * @param DOMNode $node 173 * @return bool 174 */ 175 private function newWikitextLineMightHaveBlockNode( DOMNode $node ): bool { 176 $node = DOMUtils::nextNonDeletedSibling( $node ); 177 while ( $node ) { 178 if ( DOMUtils::isText( $node ) ) { 179 // If this node will break this wikitext line, we are done! 180 if ( preg_match( '/\n/', $node->nodeValue ) ) { 181 return false; 182 } 183 } elseif ( DOMUtils::isElt( $node ) ) { 184 // These tags will always serialize onto a new line 185 if ( 186 isset( WikitextConstants::$HTMLTagsRequiringSOLContext[$node->nodeName] ) && 187 !WTUtils::isLiteralHTMLNode( $node ) 188 ) { 189 return false; 190 } 191 192 // We hit a block node that will render on the same line 193 if ( WTUtils::isBlockNodeWithVisibleWT( $node ) ) { 194 return true; 195 } 196 197 // Go conservative 198 return false; 199 } 200 201 $node = DOMUtils::nextNonDeletedSibling( $node ); 202 } 203 return false; 204 } 205 206 /** 207 * Node is being serialized before/after a P-tag. 208 * While computing newline constraints, this function tests 209 * if node should be treated as a P-wrapped node. 210 * @param DOMNode $node 211 * @return bool 212 */ 213 private static function treatAsPPTransition( DOMNode $node ): bool { 214 // Treat text/p similar to p/p transition 215 // If an element, it should not be a: 216 // * block node or literal HTML node 217 // * template wrapper 218 // * mw:Includes meta or a SOL-transparent link 219 return DOMUtils::isText( $node ) 220 || ( !DOMUtils::isBody( $node ) 221 && !DOMUtils::isBlockNode( $node ) 222 && !WTUtils::isLiteralHTMLNode( $node ) 223 && !WTUtils::isEncapsulationWrapper( $node ) 224 && !WTUtils::isSolTransparentLink( $node ) 225 && !DOMUtils::matchTypeOf( $node, '#^mw:Includes/#' ) ); 226 } 227 228 /** 229 * Test if $node is a P-wrapped node or should be treated as one. 230 * 231 * @param DOMNode|null $node 232 * @return bool 233 */ 234 public static function isPPTransition( ?DOMNode $node ): bool { 235 if ( !$node ) { 236 return false; 237 } 238 return $node->nodeName === 'p' 239 && $node instanceof DOMElement // for static analyzers 240 && ( DOMDataUtils::getDataParsoid( $node )->stx ?? '' ) !== 'html' 241 || self::treatAsPPTransition( $node ); 242 } 243 244} 245