1<?php 2declare( strict_types = 1 ); 3 4namespace Wikimedia\Parsoid\Ext\Cite; 5 6use DOMDocument; 7use DOMElement; 8use DOMNode; 9use stdClass; 10use Wikimedia\Parsoid\Core\DomSourceRange; 11use Wikimedia\Parsoid\Ext\DOMDataUtils; 12use Wikimedia\Parsoid\Ext\DOMUtils; 13use Wikimedia\Parsoid\Ext\ExtensionTagHandler; 14use Wikimedia\Parsoid\Ext\ParsoidExtensionAPI; 15use Wikimedia\Parsoid\Ext\PHPUtils; 16use Wikimedia\Parsoid\Ext\WTUtils; 17use Wikimedia\Parsoid\Utils\DOMCompat; 18 19class References extends ExtensionTagHandler { 20 /** 21 * @param DOMNode $node 22 * @return bool 23 */ 24 private static function hasRef( DOMNode $node ): bool { 25 $c = $node->firstChild; 26 while ( $c ) { 27 if ( DOMUtils::isElt( $c ) ) { 28 if ( WTUtils::isSealedFragmentOfType( $c, 'ref' ) ) { 29 return true; 30 } 31 if ( self::hasRef( $c ) ) { 32 return true; 33 } 34 } 35 $c = $c->nextSibling; 36 } 37 return false; 38 } 39 40 /** 41 * @param ParsoidExtensionAPI $extApi 42 * @param DOMDocument $doc 43 * @param DOMNode|null $body 44 * @param array $refsOpts 45 * @param callable|null $modifyDp 46 * @param bool $autoGenerated 47 * @return DOMElement 48 */ 49 private static function createReferences( 50 ParsoidExtensionAPI $extApi, DOMDocument $doc, ?DOMNode $body, 51 array $refsOpts, ?callable $modifyDp, bool $autoGenerated = false 52 ): DOMElement { 53 $ol = $doc->createElement( 'ol' ); 54 DOMCompat::getClassList( $ol )->add( 'mw-references' ); 55 DOMCompat::getClassList( $ol )->add( 'references' ); 56 57 if ( $body ) { 58 DOMUtils::migrateChildren( $body, $ol ); 59 } 60 61 // Support the `responsive` parameter 62 $rrOpts = $extApi->getSiteConfig()->responsiveReferences(); 63 $responsiveWrap = !empty( $rrOpts['enabled'] ); 64 if ( $refsOpts['responsive'] !== null ) { 65 $responsiveWrap = $refsOpts['responsive'] !== '0'; 66 } 67 68 if ( $responsiveWrap ) { 69 $div = $doc->createElement( 'div' ); 70 DOMCompat::getClassList( $div )->add( 'mw-references-wrap' ); 71 $div->appendChild( $ol ); 72 $frag = $div; 73 } else { 74 $frag = $ol; 75 } 76 77 if ( $autoGenerated ) { 78 // FIXME: This is very much trying to copy ExtensionHandler::onDocument 79 DOMUtils::addAttributes( $frag, [ 80 'typeof' => 'mw:Extension/references', 81 'about' => $extApi->newAboutId() 82 ] ); 83 $dataMw = (object)[ 84 'name' => 'references', 85 'attrs' => new stdClass, 86 ]; 87 // Dont emit empty keys 88 if ( $refsOpts['group'] ) { 89 $dataMw->attrs->group = $refsOpts['group']; 90 } 91 DOMDataUtils::setDataMw( $frag, $dataMw ); 92 } 93 94 $dp = DOMDataUtils::getDataParsoid( $frag ); 95 if ( $refsOpts['group'] ) { // No group for the empty string either 96 $dp->group = $refsOpts['group']; 97 $ol->setAttribute( 'data-mw-group', $refsOpts['group'] ); 98 } 99 if ( $modifyDp ) { 100 $modifyDp( $dp ); 101 } 102 103 return $frag; 104 } 105 106 /** 107 * @param ParsoidExtensionAPI $extApi 108 * @param DOMElement $node 109 * @param ReferencesData $refsData 110 * @param ?string $referencesAboutId 111 * @param ?string $referencesGroup 112 */ 113 private static function extractRefFromNode( 114 ParsoidExtensionAPI $extApi, 115 DOMElement $node, ReferencesData $refsData, 116 ?string $referencesAboutId = null, ?string $referencesGroup = '' 117 ): void { 118 $doc = $node->ownerDocument; 119 $nestedInReferences = $referencesAboutId !== null; 120 121 // This is data-parsoid from the dom fragment node that's gone through 122 // dsr computation and template wrapping. 123 $nodeDp = DOMDataUtils::getDataParsoid( $node ); 124 $typeOf = $node->getAttribute( 'typeof' ); 125 $isTplWrapper = DOMUtils::matchTypeOf( $node, '/^mw:Transclusion$/' ); 126 $nodeType = preg_replace( '#mw:DOMFragment/sealed/ref#', '', $typeOf, 1 ); 127 $contentId = $nodeDp->html; 128 $tplDmw = $isTplWrapper ? DOMDataUtils::getDataMw( $node ) : null; 129 130 // This is the <sup> that's the meat of the sealed fragment 131 $c = $extApi->getContentDOM( $contentId ); 132 $cDp = DOMDataUtils::getDataParsoid( $c ); 133 $refDmw = DOMDataUtils::getDataMw( $c ); 134 if ( empty( $cDp->empty ) && self::hasRef( $c ) ) { // nested ref-in-ref 135 self::processRefs( $extApi, $refsData, $c ); 136 } 137 138 // Use the about attribute on the wrapper with priority, since it's 139 // only added when the wrapper is a template sibling. 140 $about = $node->hasAttribute( 'about' ) 141 ? $node->getAttribute( 'about' ) 142 : $c->getAttribute( 'about' ); 143 144 // FIXME(SSS): Need to clarify semantics here. 145 // If both the containing <references> elt as well as the nested <ref> 146 // elt has a group attribute, what takes precedence? 147 $group = $refDmw->attrs->group ?? $referencesGroup ?? ''; 148 149 // NOTE: This will have been trimmed in Utils::getExtArgInfo()'s call 150 // to TokenUtils::kvToHash() and ExtensionHandler::normalizeExtOptions() 151 $refName = $refDmw->attrs->name ?? ''; 152 153 // Add ref-index linkback 154 $linkBack = $doc->createElement( 'sup' ); 155 156 $ref = $refsData->add( 157 $extApi, $group, $refName, $about, $nestedInReferences, $linkBack 158 ); 159 160 $errs = []; 161 162 // Check for missing content 163 $missingContent = ( !empty( $cDp->empty ) || trim( $refDmw->body->extsrc ) === '' ); 164 165 if ( $missingContent ) { 166 // Check for missing name and content to generate error code 167 if ( $refName === '' ) { 168 if ( !empty( $cDp->selfClose ) ) { 169 $errs[] = [ 'key' => 'cite_error_ref_no_key' ]; 170 } else { 171 $errs[] = [ 'key' => 'cite_error_ref_no_input' ]; 172 } 173 } 174 175 if ( !empty( $cDp->selfClose ) ) { 176 unset( $refDmw->body ); 177 } else { 178 $refDmw->body = (object)[ 'html' => $refDmw->body->extsrc ]; 179 } 180 } else { 181 // If there are multiple <ref>s with the same name, but different content, 182 // the content of the first <ref> shows up in the <references> section. 183 // in order to ensure lossless RT-ing for later <refs>, we have to record 184 // HTML inline for all of them. 185 $html = ''; 186 $contentDiffers = false; 187 if ( $ref->hasMultiples ) { 188 $html = $extApi->domToHtml( $c, true, true ); 189 $c = null; // $c is being release in the call above 190 $contentDiffers = $html !== $ref->cachedHtml; 191 if ( $contentDiffers ) { 192 // TODO: Since this error is being placed on the ref, the 193 // key should arguably be "cite_error_ref_duplicate_key" 194 $errs[] = [ 'key' => 'cite_error_references_duplicate_key' ]; 195 } 196 } 197 if ( $contentDiffers ) { 198 $refDmw->body = (object)[ 'html' => $html ]; 199 } else { 200 $refDmw->body = (object)[ 'id' => 'mw-reference-text-' . $ref->target ]; 201 } 202 } 203 204 $lastLinkback = $ref->linkbacks[count( $ref->linkbacks ) - 1] ?? null; 205 DOMUtils::addAttributes( $linkBack, [ 206 'about' => $about, 207 'class' => 'mw-ref', 208 'id' => $nestedInReferences ? null : ( $ref->name ? $lastLinkback : $ref->id ), 209 'rel' => 'dc:references', 210 'typeof' => $nodeType 211 ] 212 ); 213 DOMUtils::addTypeOf( $linkBack, 'mw:Extension/ref' ); 214 if ( count( $errs ) > 0 ) { 215 DOMUtils::addTypeOf( $linkBack, 'mw:Error' ); 216 } 217 218 $dataParsoid = new stdClass; 219 if ( isset( $nodeDp->src ) ) { 220 $dataParsoid->src = $nodeDp->src; 221 } 222 if ( isset( $nodeDp->dsr ) ) { 223 $dataParsoid->dsr = $nodeDp->dsr; 224 } 225 if ( isset( $nodeDp->pi ) ) { 226 $dataParsoid->pi = $nodeDp->pi; 227 } 228 DOMDataUtils::setDataParsoid( $linkBack, $dataParsoid ); 229 230 $dmw = $isTplWrapper ? $tplDmw : $refDmw; 231 if ( count( $errs ) > 0 ) { 232 if ( is_array( $dmw->errors ?? null ) ) { 233 $errs = array_merge( $dmw->errors, $errs ); 234 } 235 $dmw->errors = $errs; 236 } 237 DOMDataUtils::setDataMw( $linkBack, $dmw ); 238 239 // refLink is the link to the citation 240 $refLink = $doc->createElement( 'a' ); 241 DOMUtils::addAttributes( $refLink, [ 242 'href' => $extApi->getPageUri() . '#' . $ref->target, 243 'style' => 'counter-reset: mw-Ref ' . $ref->groupIndex . ';', 244 ] 245 ); 246 if ( $ref->group ) { 247 $refLink->setAttribute( 'data-mw-group', $ref->group ); 248 } 249 250 // refLink-span which will contain a default rendering of the cite link 251 // for browsers that don't support counters 252 $refLinkSpan = $doc->createElement( 'span' ); 253 $refLinkSpan->setAttribute( 'class', 'mw-reflink-text' ); 254 $refLinkSpan->appendChild( $doc->createTextNode( 255 '[' . ( $ref->group ? $ref->group . ' ' : '' ) . $ref->groupIndex . ']' 256 ) 257 ); 258 $refLink->appendChild( $refLinkSpan ); 259 $linkBack->appendChild( $refLink ); 260 261 $node->parentNode->replaceChild( $linkBack, $node ); 262 263 // Keep the first content to compare multiple <ref>s with the same name. 264 if ( $ref->contentId === null && !$missingContent ) { 265 $ref->contentId = $contentId; 266 $ref->dir = strtolower( $refDmw->attrs->dir ?? '' ); 267 } 268 } 269 270 /** 271 * @param ParsoidExtensionAPI $extApi 272 * @param DOMElement $refsNode 273 * @param ReferencesData $refsData 274 * @param bool $autoGenerated 275 */ 276 private static function insertReferencesIntoDOM( 277 ParsoidExtensionAPI $extApi, DOMElement $refsNode, 278 ReferencesData $refsData, bool $autoGenerated = false 279 ): void { 280 $isTplWrapper = DOMUtils::matchTypeOf( $refsNode, '/^mw:Transclusion$/' ); 281 $dp = DOMDataUtils::getDataParsoid( $refsNode ); 282 $group = $dp->group ?? ''; 283 $refGroup = $refsData->getRefGroup( $group ); 284 285 // Iterate through the named ref list for refs without content and 286 // back-patch typeof and data-mw error information into named ref 287 // instances without content 288 // FIXME: This doesn't update the refs found while processEmbeddedRefs 289 if ( $refGroup ) { 290 foreach ( $refGroup->indexByName as $ref ) { 291 if ( $ref->contentId === null ) { 292 foreach ( $ref->nodes as $linkBack ) { 293 DOMUtils::addTypeOf( $linkBack, 'mw:Error' ); 294 $dmw = DOMDataUtils::getDataMw( $linkBack ); 295 // TODO: Since this error is being placed on the ref, 296 // the key should arguably be "cite_error_ref_no_text" 297 $errs = [ [ 'key' => 'cite_error_references_no_text' ] ]; 298 if ( is_array( $dmw->errors ?? null ) ) { 299 $errs = array_merge( $dmw->errors, $errs ); 300 } 301 $dmw->errors = $errs; 302 } 303 } 304 } 305 } 306 307 $nestedRefsHTML = array_map( 308 function ( DOMElement $sup ) use ( $extApi ) { 309 return $extApi->domToHtml( $sup, false, true ) . "\n"; 310 }, 311 DOMCompat::querySelectorAll( 312 $refsNode, 'sup[typeof~=\'mw:Extension/ref\']' 313 ) 314 ); 315 316 if ( !$isTplWrapper ) { 317 $dataMw = DOMDataUtils::getDataMw( $refsNode ); 318 // Mark this auto-generated so that we can skip this during 319 // html -> wt and so that clients can strip it if necessary. 320 if ( $autoGenerated ) { 321 $dataMw->autoGenerated = true; 322 } elseif ( count( $nestedRefsHTML ) > 0 ) { 323 $dataMw->body = (object)[ 'html' => "\n" . implode( $nestedRefsHTML ) ]; 324 } elseif ( empty( $dp->selfClose ) ) { 325 $dataMw->body = PHPUtils::arrayToObject( [ 'html' => '' ] ); 326 } else { 327 unset( $dataMw->body ); 328 } 329 // @phan-suppress-next-line PhanTypeObjectUnsetDeclaredProperty 330 unset( $dp->selfClose ); 331 } 332 333 // Deal with responsive wrapper 334 if ( DOMCompat::getClassList( $refsNode )->contains( 'mw-references-wrap' ) ) { 335 $rrOpts = $extApi->getSiteConfig()->responsiveReferences(); 336 if ( $refGroup && count( $refGroup->refs ) > $rrOpts['threshold'] ) { 337 DOMCompat::getClassList( $refsNode )->add( 'mw-references-columns' ); 338 } 339 $refsNode = $refsNode->firstChild; 340 } 341 342 // Remove all children from the references node 343 // 344 // Ex: When {{Reflist}} is reused from the cache, it comes with 345 // a bunch of references as well. We have to remove all those cached 346 // references before generating fresh references. 347 while ( $refsNode->firstChild ) { 348 $refsNode->removeChild( $refsNode->firstChild ); 349 } 350 351 if ( $refGroup ) { 352 foreach ( $refGroup->refs as $ref ) { 353 $refGroup->renderLine( $extApi, $refsNode, $ref ); 354 } 355 } 356 357 // Remove the group from refsData 358 $refsData->removeRefGroup( $group ); 359 } 360 361 /** 362 * Process `<ref>`s left behind after the DOM is fully processed. 363 * We process them as if there was an implicit `<references />` tag at 364 * the end of the DOM. 365 * 366 * @param ParsoidExtensionAPI $extApi 367 * @param ReferencesData $refsData 368 * @param DOMNode $node 369 */ 370 public static function insertMissingReferencesIntoDOM( 371 ParsoidExtensionAPI $extApi, ReferencesData $refsData, DOMNode $node 372 ): void { 373 $doc = $node->ownerDocument; 374 375 foreach ( $refsData->getRefGroups() as $groupName => $refsGroup ) { 376 $frag = self::createReferences( 377 $extApi, 378 $doc, 379 null, 380 [ 381 // Force string cast here since in the foreach above, $groupName 382 // is an array key. In that context, number-like strings are 383 // silently converted to a numeric value! 384 // Ex: In <ref group="2" />, the "2" becomes 2 in the foreach 385 'group' => (string)$groupName, 386 'responsive' => null, 387 ], 388 function ( $dp ) use ( $extApi ) { 389 // The new references come out of "nowhere", so to make selser work 390 // properly, add a zero-sized DSR pointing to the end of the document. 391 $content = $extApi->getPageConfig()->getRevisionContent()->getContent( 'main' ); 392 $contentLength = strlen( $content ); 393 $dp->dsr = new DomSourceRange( $contentLength, $contentLength, 0, 0 ); 394 }, 395 true 396 ); 397 398 // Add a \n before the <ol> so that when serialized to wikitext, 399 // each <references /> tag appears on its own line. 400 $node->appendChild( $doc->createTextNode( "\n" ) ); 401 $node->appendChild( $frag ); 402 403 self::insertReferencesIntoDOM( $extApi, $frag, $refsData, true ); 404 } 405 } 406 407 /** 408 * @param ParsoidExtensionAPI $extApi 409 * @param ReferencesData $refsData 410 * @param string $str 411 * @return string 412 */ 413 private static function processEmbeddedRefs( 414 ParsoidExtensionAPI $extApi, ReferencesData $refsData, string $str 415 ): string { 416 $domBody = DOMCompat::getBody( $extApi->htmlToDom( $str ) ); 417 self::processRefs( $extApi, $refsData, $domBody ); 418 return $extApi->domToHtml( $domBody, true, true ); 419 } 420 421 /** 422 * @param ParsoidExtensionAPI $extApi 423 * @param ReferencesData $refsData 424 * @param DOMElement $node 425 */ 426 public static function processRefs( 427 ParsoidExtensionAPI $extApi, ReferencesData $refsData, DOMElement $node 428 ): void { 429 $child = $node->firstChild; 430 while ( $child !== null ) { 431 $nextChild = $child->nextSibling; 432 if ( $child instanceof DOMElement ) { 433 if ( WTUtils::isSealedFragmentOfType( $child, 'ref' ) ) { 434 self::extractRefFromNode( $extApi, $child, $refsData ); 435 } elseif ( DOMUtils::matchTypeOf( $child, '#^mw:Extension/references$#' ) ) { 436 $referencesId = $child->getAttribute( 'about' ) ?? ''; 437 $referencesGroup = DOMDataUtils::getDataParsoid( $child )->group ?? null; 438 self::processRefsInReferences( 439 $extApi, 440 $refsData, 441 $child, 442 $referencesId, 443 $referencesGroup 444 ); 445 self::insertReferencesIntoDOM( $extApi, $child, $refsData, false ); 446 } else { 447 // Look for <ref>s embedded in data attributes 448 $extApi->processHiddenHTMLInDataAttributes( $child, 449 function ( string $html ) use ( $extApi, $refsData ) { 450 return self::processEmbeddedRefs( $extApi, $refsData, $html ); 451 } 452 ); 453 454 if ( $child->hasChildNodes() ) { 455 self::processRefs( $extApi, $refsData, $child ); 456 } 457 } 458 } 459 $child = $nextChild; 460 } 461 } 462 463 /** 464 * This handles wikitext like this: 465 * ``` 466 * <references> <ref>foo</ref> </references> 467 * <references> <ref>bar</ref> </references> 468 * ``` 469 * 470 * @param ParsoidExtensionAPI $extApi 471 * @param ReferencesData $refsData 472 * @param DOMElement $node 473 * @param string $referencesId 474 * @param string|null $referencesGroup 475 */ 476 private static function processRefsInReferences( 477 ParsoidExtensionAPI $extApi, ReferencesData $refsData, 478 DOMElement $node, string $referencesId, ?string $referencesGroup 479 ): void { 480 $child = $node->firstChild; 481 while ( $child !== null ) { 482 $nextChild = $child->nextSibling; 483 if ( $child instanceof DOMElement ) { 484 if ( WTUtils::isSealedFragmentOfType( $child, 'ref' ) ) { 485 self::extractRefFromNode( 486 $extApi, 487 $child, 488 $refsData, 489 $referencesId, 490 $referencesGroup 491 ); 492 } elseif ( $child->hasChildNodes() ) { 493 self::processRefsInReferences( 494 $extApi, 495 $refsData, 496 $child, 497 $referencesId, 498 $referencesGroup 499 ); 500 } 501 } 502 $child = $nextChild; 503 } 504 } 505 506 /** @inheritDoc */ 507 public function sourceToDom( 508 ParsoidExtensionAPI $extApi, string $txt, array $extArgs 509 ): DOMDocument { 510 $doc = $extApi->extTagToDOM( 511 $extArgs, 512 '', 513 $txt, 514 [ 515 'wrapperTag' => 'div', 516 'parseOpts' => [ 'extTag' => 'references' ], 517 ] 518 ); 519 520 $refsOpts = $extApi->extArgsToArray( $extArgs ) + [ 521 'group' => null, 522 'responsive' => null, 523 ]; 524 525 $docBody = DOMCompat::getBody( $doc ); 526 527 $frag = self::createReferences( 528 $extApi, 529 $doc, 530 $docBody, 531 $refsOpts, 532 function ( $dp ) use ( $extApi ) { 533 $dp->src = $extApi->getExtSource(); 534 // Setting redundant info on fragment. 535 // $docBody->firstChild info feels cumbersome to use downstream. 536 if ( $extApi->isSelfClosedExtTag() ) { 537 $dp->selfClose = true; 538 } 539 } 540 ); 541 DOMCompat::getBody( $doc )->appendChild( $frag ); 542 return $doc; 543 } 544 545 /** @inheritDoc */ 546 public function domToWikitext( 547 ParsoidExtensionAPI $extApi, DOMElement $node, bool $wrapperUnmodified 548 ) { 549 $dataMw = DOMDataUtils::getDataMw( $node ); 550 if ( !empty( $dataMw->autoGenerated ) && $extApi->rtTestMode() ) { 551 // Eliminate auto-inserted <references /> noise in rt-testing 552 return ''; 553 } else { 554 $startTagSrc = $extApi->extStartTagToWikitext( $node ); 555 if ( empty( $dataMw->body ) ) { 556 return $startTagSrc; // We self-closed this already. 557 } else { 558 if ( is_string( $dataMw->body->html ) ) { 559 $src = $extApi->htmlToWikitext( 560 [ 'extName' => $dataMw->name ], 561 $dataMw->body->html 562 ); 563 return $startTagSrc . $src . '</' . $dataMw->name . '>'; 564 } else { 565 $extApi->log( 'error', 566 'References body unavailable for: ' . DOMCompat::getOuterHTML( $node ) 567 ); 568 return ''; // Drop it! 569 } 570 } 571 } 572 } 573 574 /** @inheritDoc */ 575 public function lintHandler( 576 ParsoidExtensionAPI $extApi, DOMElement $refs, callable $defaultHandler 577 ): ?DOMNode { 578 // Nothing to do 579 // 580 // FIXME: Not entirely true for scenarios where the <ref> tags 581 // are defined in the references section that is itself templated. 582 // 583 // {{1x|<references>\n<ref name='x'><b>foo</ref>\n</references>}} 584 // 585 // In this example, the references tag has the right tplInfo and 586 // when the <ref> tag is processed in the body of the article where 587 // it is accessed, there is no relevant template or dsr info available. 588 // 589 // Ignoring for now. 590 return $refs->nextSibling; 591 } 592} 593