1<?php
2declare( strict_types = 1 );
3
4namespace Wikimedia\Parsoid\Ext\Cite;
5
6use DOMDocumentFragment;
7use DOMElement;
8use DOMNode;
9use stdClass;
10use Wikimedia\Parsoid\Core\DomSourceRange;
11use Wikimedia\Parsoid\Ext\DOMDataUtils;
12use Wikimedia\Parsoid\Ext\DOMUtils;
13use Wikimedia\Parsoid\Ext\ExtensionTagHandler;
14use Wikimedia\Parsoid\Ext\ParsoidExtensionAPI;
15use Wikimedia\Parsoid\Ext\PHPUtils;
16use Wikimedia\Parsoid\Ext\WTUtils;
17use Wikimedia\Parsoid\Utils\DOMCompat;
18
19class References extends ExtensionTagHandler {
20	/**
21	 * @param DOMNode $node
22	 * @return bool
23	 */
24	private static function hasRef( DOMNode $node ): bool {
25		$c = $node->firstChild;
26		while ( $c ) {
27			if ( DOMUtils::isElt( $c ) ) {
28				if ( WTUtils::isSealedFragmentOfType( $c, 'ref' ) ) {
29					return true;
30				}
31				if ( self::hasRef( $c ) ) {
32					return true;
33				}
34			}
35			$c = $c->nextSibling;
36		}
37		return false;
38	}
39
40	/**
41	 * It should be sufficient to only include styles when we're rendering
42	 * a references tag.
43	 *
44	 * @return array
45	 */
46	private static function getModuleStyles(): array {
47		return [
48			'ext.cite.style',
49			'ext.cite.styles'
50		];
51	}
52
53	/**
54	 * @param ParsoidExtensionAPI $extApi
55	 * @param DOMDocumentFragment $domFragment
56	 * @param array $refsOpts
57	 * @param ?callable $modifyDp
58	 * @param bool $autoGenerated
59	 * @return DOMElement
60	 */
61	private static function createReferences(
62		ParsoidExtensionAPI $extApi, DOMDocumentFragment $domFragment,
63		array $refsOpts, ?callable $modifyDp, bool $autoGenerated = false
64	): DOMElement {
65		$doc = $domFragment->ownerDocument;
66
67		$ol = $doc->createElement( 'ol' );
68		DOMCompat::getClassList( $ol )->add( 'mw-references' );
69		DOMCompat::getClassList( $ol )->add( 'references' );
70
71		DOMUtils::migrateChildren( $domFragment, $ol );
72
73		// Support the `responsive` parameter
74		$rrOpts = $extApi->getSiteConfig()->responsiveReferences();
75		$responsiveWrap = !empty( $rrOpts['enabled'] );
76		if ( $refsOpts['responsive'] !== null ) {
77			$responsiveWrap = $refsOpts['responsive'] !== '0';
78		}
79
80		if ( $responsiveWrap ) {
81			$div = $doc->createElement( 'div' );
82			DOMCompat::getClassList( $div )->add( 'mw-references-wrap' );
83			$div->appendChild( $ol );
84			$frag = $div;
85		} else {
86			$frag = $ol;
87		}
88
89		if ( $autoGenerated ) {
90			// FIXME: This is very much trying to copy ExtensionHandler::onDocument
91			DOMUtils::addAttributes( $frag, [
92				'typeof' => 'mw:Extension/references',
93				'about' => $extApi->newAboutId()
94			] );
95			$dataMw = (object)[
96				'name' => 'references',
97				'attrs' => new stdClass
98			];
99			// Dont emit empty keys
100			if ( $refsOpts['group'] ) {
101				$dataMw->attrs->group = $refsOpts['group'];
102			}
103			DOMDataUtils::setDataMw( $frag, $dataMw );
104		}
105
106		$dp = DOMDataUtils::getDataParsoid( $frag );
107		if ( $refsOpts['group'] ) {  // No group for the empty string either
108			$dp->group = $refsOpts['group'];
109			$ol->setAttribute( 'data-mw-group', $refsOpts['group'] );
110		}
111		if ( $modifyDp ) {
112			$modifyDp( $dp );
113		}
114
115		$extApi->addModuleStyles( self::getModuleStyles() );
116
117		return $frag;
118	}
119
120	/**
121	 * @param ParsoidExtensionAPI $extApi
122	 * @param DOMElement $node
123	 * @param ReferencesData $refsData
124	 */
125	private static function extractRefFromNode(
126		ParsoidExtensionAPI $extApi, DOMElement $node, ReferencesData $refsData
127	): void {
128		$doc = $node->ownerDocument;
129		$errs = [];
130
131		// This is data-parsoid from the dom fragment node that's gone through
132		// dsr computation and template wrapping.
133		$nodeDp = DOMDataUtils::getDataParsoid( $node );
134		$typeOf = $node->getAttribute( 'typeof' );
135		$isTplWrapper = DOMUtils::hasTypeOf( $node, 'mw:Transclusion' );
136		$nodeType = preg_replace( '#mw:DOMFragment/sealed/ref#', '', $typeOf, 1 );
137		$contentId = $nodeDp->html;
138		$tplDmw = $isTplWrapper ? DOMDataUtils::getDataMw( $node ) : null;
139
140		// This is the <sup> that's the meat of the sealed fragment
141		$c = $extApi->getContentDOM( $contentId )->firstChild;
142		DOMUtils::assertElt( $c );
143		$cDp = DOMDataUtils::getDataParsoid( $c );
144		$refDmw = DOMDataUtils::getDataMw( $c );
145		if ( empty( $cDp->empty ) && self::hasRef( $c ) ) { // nested ref-in-ref
146			$refsData->pushInEmbeddedContent();
147			self::processRefs( $extApi, $refsData, $c );
148			$refsData->popInEmbeddedContent();
149		}
150
151		// Use the about attribute on the wrapper with priority, since it's
152		// only added when the wrapper is a template sibling.
153		$about = $node->hasAttribute( 'about' )
154			? $node->getAttribute( 'about' )
155			: $c->getAttribute( 'about' );
156
157		// FIXME(SSS): Need to clarify semantics here.
158		// If both the containing <references> elt as well as the nested <ref>
159		// elt has a group attribute, what takes precedence?
160		$groupName = $refDmw->attrs->group ?? $refsData->referencesGroup;
161
162		if (
163			$refsData->inReferencesContent() &&
164			$groupName !== $refsData->referencesGroup
165		) {
166			$errs[] = [ 'key' => 'cite_error_references_group_mismatch',
167				'params' => [ $refDmw->attrs->group ] ];
168		}
169
170		// NOTE: This will have been trimmed in Utils::getExtArgInfo()'s call
171		// to TokenUtils::kvToHash() and ExtensionHandler::normalizeExtOptions()
172		$refName = $refDmw->attrs->name ?? '';
173		$followName = $refDmw->attrs->follow ?? '';
174		$refDir = strtolower( $refDmw->attrs->dir ?? '' );
175
176		// Add ref-index linkback
177		$linkBack = $doc->createElement( 'sup' );
178
179		$ref = null;
180
181		$hasRefName = strlen( $refName ) > 0;
182		$hasFollow = strlen( $followName ) > 0;
183
184		$validFollow = false;
185
186		if ( $hasFollow ) {
187			// Always wrap follows content so that there's no ambiguity
188			// where to find it when roundtripping
189			$span = $doc->createElement( 'span' );
190			DOMUtils::addTypeOf( $span, 'mw:Cite/Follow' );
191			$span->setAttribute( 'about', $about );
192			$span->appendChild(
193				$doc->createTextNode( ' ' )
194			);
195			DOMUtils::migrateChildren( $c, $span );
196			$c->appendChild( $span );
197		}
198
199		if ( $hasRefName ) {
200			if ( $hasFollow ) {
201				// Presumably, "name" has higher precedence
202				$errs[] = [ 'key' => 'cite_error_ref_too_many_keys' ];
203			}
204			if ( $refsData->inReferencesContent() ) {
205				$group = $refsData->getRefGroup( $groupName );
206				if ( !isset( $group->indexByName[$refName] ) ) {
207					$errs[] = [ 'key' => 'cite_error_references_missing_key',
208						'params' => [ $refDmw->attrs->name ] ];
209				}
210			}
211		} else {
212			if ( $hasFollow ) {
213				// This is a follows ref, so check that a named ref has already
214				// been defined
215				$group = $refsData->getRefGroup( $groupName );
216				if ( isset( $group->indexByName[$followName] ) ) {
217					$validFollow = true;
218					$ref = $group->indexByName[$followName];
219
220					if ( $ref->contentId ) {
221						$refContent = $extApi->getContentDOM( $ref->contentId )->firstChild;
222						DOMUtils::migrateChildren( $c, $refContent );
223					} else {
224						// Otherwise, we have a follow that comes after a named
225						// ref without content so use the follow fragment as
226						// the content
227						// This will be set below with `$ref->contentId = $contentId;`
228					}
229				} else {
230					// FIXME: This key isn't exactly appropriate since this
231					// is more general than just being in a <references>
232					// section and it's the $followName we care about, but the
233					// extension to the legacy parser doesn't have an
234					// equivalent key and just outputs something wacky.
235					$errs[] = [ 'key' => 'cite_error_references_missing_key',
236						'params' => [ $refDmw->attrs->follow ] ];
237				}
238			} elseif ( $refsData->inReferencesContent() ) {
239				$errs[] = [ 'key' => 'cite_error_references_no_key' ];
240			}
241		}
242
243		if ( !$ref ) {
244			$ref = $refsData->add(
245				$extApi, $groupName, $refName, $about, $linkBack
246			);
247		}
248
249		if ( isset( $refDmw->attrs->dir ) && $refDir !== 'rtl' && $refDir !== 'ltr' ) {
250			$errs[] = [ 'key' => 'cite_error_ref_invalid_dir',
251				'params' => [ $refDmw->attrs->dir ] ];
252		}
253
254		// FIXME: At some point this error message can be changed to a warning, as Parsoid Cite now
255		// supports numerals as a name without it being an actual error, but core Cite does not.
256		// Follow refs do not duplicate the error which can be correlated with the original ref.
257		if ( ctype_digit( $refName ) ) {
258			$errs[] = [ 'key' => 'cite_error_ref_numeric_key' ];
259		}
260
261		// Check for missing content, added ?? '' to fix T259676 crasher
262		// FIXME: See T260082 for a more complete description of cause and deeper fix
263		$missingContent = ( !empty( $cDp->empty ) || trim( $refDmw->body->extsrc ?? '' ) === '' );
264
265		if ( $missingContent ) {
266			// Check for missing name and content to generate error code
267			//
268			// In references content, refs should be used for definition so missing content
269			// is an error.  It's possible that no name is present (!hasRefName), which also
270			// gets the error "cite_error_references_no_key" above, so protect against that.
271			if ( $refsData->inReferencesContent() ) {
272				$errs[] = [ 'key' => 'cite_error_empty_references_define',
273					'params' => [ $refDmw->attrs->name ?? '' ] ];
274			} elseif ( !$hasRefName ) {
275				if ( !empty( $cDp->selfClose ) ) {
276					$errs[] = [ 'key' => 'cite_error_ref_no_key' ];
277				} else {
278					$errs[] = [ 'key' => 'cite_error_ref_no_input' ];
279				}
280			}
281
282			if ( !empty( $cDp->selfClose ) ) {
283				unset( $refDmw->body );
284			} else {
285				// Empty the <sup> since we've serialized its children and
286				// removing it below asserts everything has been migrated out
287				DOMCompat::replaceChildren( $c );
288				$refDmw->body = (object)[ 'html' => $refDmw->body->extsrc ?? '' ];
289			}
290		} else {
291			// If there are multiple <ref>s with the same name, but different content,
292			// the content of the first <ref> shows up in the <references> section.
293			// in order to ensure lossless RT-ing for later <refs>, we have to record
294			// HTML inline for all of them.
295			$html = '';
296			$contentDiffers = false;
297			if ( $ref->hasMultiples && !$validFollow ) {
298				// FIXME: Strip the mw:Cite/Follow wrappers
299				// See the test, "Forward-referenced ref with magical follow edge case"
300				$html = $extApi->domToHtml( $c, true, true );
301				// Empty the <sup> since we've serialized its children and
302				// removing it below asserts everything has been migrated out
303				DOMCompat::replaceChildren( $c );
304				$contentDiffers = $html !== $ref->cachedHtml;
305				if ( $contentDiffers ) {
306					// TODO: Since this error is being placed on the ref, the
307					// key should arguably be "cite_error_ref_duplicate_key"
308					$errs[] = [ 'key' => 'cite_error_references_duplicate_key',
309						'params' => [ $refDmw->attrs->name ] ];
310				}
311			}
312			if ( $contentDiffers ) {
313				$refDmw->body = (object)[ 'html' => $html ];
314			} else {
315				$refDmw->body = (object)[ 'id' => 'mw-reference-text-' . $ref->target ];
316			}
317		}
318
319		$class = 'mw-ref reference';
320		if ( $validFollow ) {
321			$class .= ' mw-ref-follow';
322		}
323
324		$lastLinkback = $ref->linkbacks[count( $ref->linkbacks ) - 1] ?? null;
325		DOMUtils::addAttributes( $linkBack, [
326				'about' => $about,
327				'class' => $class,
328				'id' => ( $refsData->inEmbeddedContent() || $validFollow ) ?
329					null : ( $ref->name ? $lastLinkback : $ref->id ),
330				'rel' => 'dc:references',
331				'typeof' => $nodeType
332			]
333		);
334		DOMUtils::addTypeOf( $linkBack, 'mw:Extension/ref' );
335
336		$dataParsoid = new stdClass;
337		if ( isset( $nodeDp->src ) ) {
338			$dataParsoid->src = $nodeDp->src;
339		}
340		if ( isset( $nodeDp->dsr ) ) {
341			$dataParsoid->dsr = $nodeDp->dsr;
342		}
343		if ( isset( $nodeDp->pi ) ) {
344			$dataParsoid->pi = $nodeDp->pi;
345		}
346		DOMDataUtils::setDataParsoid( $linkBack, $dataParsoid );
347
348		$dmw = $isTplWrapper ? $tplDmw : $refDmw;
349		DOMDataUtils::setDataMw( $linkBack, $dmw );
350
351		// FIXME(T214241): Should the errors be added to data-mw if
352		// $isTplWrapper?  Here and other calls to addErrorsToNode.
353		if ( count( $errs ) > 0 ) {
354			self::addErrorsToNode( $linkBack, $errs );
355		}
356
357		// refLink is the link to the citation
358		$refLink = $doc->createElement( 'a' );
359		DOMUtils::addAttributes( $refLink, [
360			'href' => $extApi->getPageUri() . '#' . $ref->target,
361			'style' => 'counter-reset: mw-Ref ' . $ref->groupIndex . ';',
362		] );
363		if ( $ref->group ) {
364			$refLink->setAttribute( 'data-mw-group', $ref->group );
365		}
366
367		// refLink-span which will contain a default rendering of the cite link
368		// for browsers that don't support counters
369		$refLinkSpan = $doc->createElement( 'span' );
370		$refLinkSpan->setAttribute( 'class', 'mw-reflink-text' );
371		$refLinkSpan->appendChild( $doc->createTextNode(
372			'[' . ( $ref->group ? $ref->group . ' ' : '' ) . $ref->groupIndex . ']'
373		) );
374
375		$refLink->appendChild( $refLinkSpan );
376		$linkBack->appendChild( $refLink );
377
378		$node->parentNode->replaceChild( $linkBack, $node );
379
380		// Keep the first content to compare multiple <ref>s with the same name.
381		if ( $ref->contentId === null && !$missingContent ) {
382			$ref->contentId = $contentId;
383			$ref->dir = $refDir;
384		} else {
385			DOMCompat::remove( $c );
386			$extApi->clearContentDOM( $contentId );
387		}
388	}
389
390	/**
391	 * @param DOMElement $node
392	 * @param array $errs
393	 */
394	private static function addErrorsToNode( DOMElement $node, array $errs ) {
395		DOMUtils::addTypeOf( $node, 'mw:Error' );
396		$dmw = DOMDataUtils::getDataMw( $node );
397		$dmw->errors = is_array( $dmw->errors ?? null ) ?
398			array_merge( $dmw->errors, $errs ) : $errs;
399	}
400
401	/**
402	 * @param ParsoidExtensionAPI $extApi
403	 * @param DOMElement $refsNode
404	 * @param ReferencesData $refsData
405	 * @param bool $autoGenerated
406	 */
407	private static function insertReferencesIntoDOM(
408		ParsoidExtensionAPI $extApi, DOMElement $refsNode,
409		ReferencesData $refsData, bool $autoGenerated = false
410	): void {
411		$isTplWrapper = DOMUtils::hasTypeOf( $refsNode, 'mw:Transclusion' );
412		$dp = DOMDataUtils::getDataParsoid( $refsNode );
413		$group = $dp->group ?? '';
414		$refGroup = $refsData->getRefGroup( $group );
415
416		// Iterate through the ref list to back-patch typeof and data-mw error
417		// information into ref for errors only known at time of references
418		// insertion.  Refs in the top level dom will be processed immediately,
419		// whereas embedded refs will be gathered for batch processing, since
420		// we need to parse embedded content to find them.
421		if ( $refGroup ) {
422			$autoGeneratedWithGroup = ( $autoGenerated && $group !== '' );
423			foreach ( $refGroup->refs as $ref ) {
424				$errs = [];
425				// Mark all refs that are part of a group that is autogenerated
426				if ( $autoGeneratedWithGroup ) {
427					$errs[] = [ 'key' => 'cite_error_group_refs_without_references',
428						'params' => [ $group ] ];
429				}
430				// Mark all refs that are named without content
431				if ( ( $ref->name !== '' ) && $ref->contentId === null ) {
432					// TODO: Since this error is being placed on the ref,
433					// the key should arguably be "cite_error_ref_no_text"
434					$errs[] = [ 'key' => 'cite_error_references_no_text' ];
435				}
436				if ( count( $errs ) > 0 ) {
437					foreach ( $ref->nodes as $node ) {
438						self::addErrorsToNode( $node, $errs );
439					}
440					foreach ( $ref->embeddedNodes as $about ) {
441						$refsData->embeddedErrors[$about] = $errs;
442					}
443				}
444			}
445		}
446
447		// Note that `$sup`s here are probably all we really need to check for
448		// errors caught with `$refsData->inReferencesContent()` but it's
449		// probably easier to just know that state while they're being
450		// constructed.
451		$nestedRefsHTML = array_map(
452			function ( DOMElement $sup ) use ( $extApi ) {
453				return $extApi->domToHtml( $sup, false, true ) . "\n";
454			},
455			DOMCompat::querySelectorAll(
456				$refsNode, 'sup[typeof~=\'mw:Extension/ref\']'
457			)
458		);
459
460		if ( !$isTplWrapper ) {
461			$dataMw = DOMDataUtils::getDataMw( $refsNode );
462			// Mark this auto-generated so that we can skip this during
463			// html -> wt and so that clients can strip it if necessary.
464			if ( $autoGenerated ) {
465				$dataMw->autoGenerated = true;
466			} elseif ( count( $nestedRefsHTML ) > 0 ) {
467				$dataMw->body = (object)[ 'html' => "\n" . implode( $nestedRefsHTML ) ];
468			} elseif ( empty( $dp->selfClose ) ) {
469				$dataMw->body = PHPUtils::arrayToObject( [ 'html' => '' ] );
470			} else {
471				unset( $dataMw->body );
472			}
473			// @phan-suppress-next-line PhanTypeObjectUnsetDeclaredProperty
474			unset( $dp->selfClose );
475		}
476
477		// Deal with responsive wrapper
478		if ( DOMCompat::getClassList( $refsNode )->contains( 'mw-references-wrap' ) ) {
479			$rrOpts = $extApi->getSiteConfig()->responsiveReferences();
480			if ( $refGroup && count( $refGroup->refs ) > $rrOpts['threshold'] ) {
481				DOMCompat::getClassList( $refsNode )->add( 'mw-references-columns' );
482			}
483			$refsNode = $refsNode->firstChild;
484		}
485
486		// Remove all children from the references node
487		//
488		// Ex: When {{Reflist}} is reused from the cache, it comes with
489		// a bunch of references as well. We have to remove all those cached
490		// references before generating fresh references.
491		DOMCompat::replaceChildren( $refsNode );
492
493		if ( $refGroup ) {
494			foreach ( $refGroup->refs as $ref ) {
495				$refGroup->renderLine( $extApi, $refsNode, $ref );
496			}
497		}
498
499		// Remove the group from refsData
500		$refsData->removeRefGroup( $group );
501	}
502
503	/**
504	 * Process `<ref>`s left behind after the DOM is fully processed.
505	 * We process them as if there was an implicit `<references />` tag at
506	 * the end of the DOM.
507	 *
508	 * @param ParsoidExtensionAPI $extApi
509	 * @param ReferencesData $refsData
510	 * @param DOMNode $node
511	 */
512	public static function insertMissingReferencesIntoDOM(
513		ParsoidExtensionAPI $extApi, ReferencesData $refsData, DOMNode $node
514	): void {
515		$doc = $node->ownerDocument;
516		foreach ( $refsData->getRefGroups() as $groupName => $refsGroup ) {
517			$domFragment = $doc->createDocumentFragment();
518			$frag = self::createReferences(
519				$extApi,
520				$domFragment,
521				[
522					// Force string cast here since in the foreach above, $groupName
523					// is an array key. In that context, number-like strings are
524					// silently converted to a numeric value!
525					// Ex: In <ref group="2" />, the "2" becomes 2 in the foreach
526					'group' => (string)$groupName,
527					'responsive' => null,
528				],
529				function ( $dp ) use ( $extApi ) {
530					// The new references come out of "nowhere", so to make selser work
531					// properly, add a zero-sized DSR pointing to the end of the document.
532					$content = $extApi->getPageConfig()->getRevisionContent()->getContent( 'main' );
533					$contentLength = strlen( $content );
534					$dp->dsr = new DomSourceRange( $contentLength, $contentLength, 0, 0 );
535				},
536				true
537			);
538
539			// Add a \n before the <ol> so that when serialized to wikitext,
540			// each <references /> tag appears on its own line.
541			$node->appendChild( $doc->createTextNode( "\n" ) );
542			$node->appendChild( $frag );
543
544			self::insertReferencesIntoDOM( $extApi, $frag, $refsData, true );
545		}
546	}
547
548	/**
549	 * @param ParsoidExtensionAPI $extApi
550	 * @param ReferencesData $refsData
551	 * @param string $str
552	 * @return string
553	 */
554	private static function processEmbeddedRefs(
555		ParsoidExtensionAPI $extApi, ReferencesData $refsData, string $str
556	): string {
557		$domFragment = $extApi->htmlToDom( $str );
558		self::processRefs( $extApi, $refsData, $domFragment );
559		return $extApi->domToHtml( $domFragment, true, true );
560	}
561
562	/**
563	 * @param ParsoidExtensionAPI $extApi
564	 * @param ReferencesData $refsData
565	 * @param DOMNode $node
566	 */
567	public static function processRefs(
568		ParsoidExtensionAPI $extApi, ReferencesData $refsData, DOMNode $node
569	): void {
570		$child = $node->firstChild;
571		while ( $child !== null ) {
572			$nextChild = $child->nextSibling;
573			if ( $child instanceof DOMElement ) {
574				if ( WTUtils::isSealedFragmentOfType( $child, 'ref' ) ) {
575					self::extractRefFromNode( $extApi, $child, $refsData );
576				} elseif ( DOMUtils::hasTypeOf( $child, 'mw:Extension/references' ) ) {
577					if ( !$refsData->inReferencesContent() ) {
578						$refsData->referencesGroup =
579							DOMDataUtils::getDataParsoid( $child )->group ?? '';
580					}
581					$refsData->pushInEmbeddedContent( 'references' );
582					if ( $child->hasChildNodes() ) {
583						self::processRefs( $extApi, $refsData, $child );
584					}
585					$refsData->popInEmbeddedContent();
586					if ( !$refsData->inReferencesContent() ) {
587						$refsData->referencesGroup = '';
588						self::insertReferencesIntoDOM( $extApi, $child, $refsData, false );
589					}
590				} else {
591					$refsData->pushInEmbeddedContent();
592					// Look for <ref>s embedded in data attributes
593					$extApi->processHiddenHTMLInDataAttributes( $child,
594						function ( string $html ) use ( $extApi, $refsData ) {
595							return self::processEmbeddedRefs( $extApi, $refsData, $html );
596						}
597					);
598					$refsData->popInEmbeddedContent();
599					if ( $child->hasChildNodes() ) {
600						self::processRefs( $extApi, $refsData, $child );
601					}
602				}
603			}
604			$child = $nextChild;
605		}
606	}
607
608	/**
609	 * Traverse into all the embedded content and mark up the refs in there
610	 * that have errors that weren't known before the content was serialized.
611	 *
612	 * Some errors are only known at the time when we're inserting the
613	 * references lists, at which point, embedded content has already been
614	 * serialized and stored, so we no longer have live access to it.  We
615	 * therefore map about ids to errors for a ref at that time, and then do
616	 * one final walk of the dom to peak into all the embedded content and
617	 * mark up the errors where necessary.
618	 *
619	 * @param ParsoidExtensionAPI $extApi
620	 * @param ReferencesData $refsData
621	 * @param DOMNode $node
622	 */
623	public static function addEmbeddedErrors(
624		ParsoidExtensionAPI $extApi, ReferencesData $refsData, DOMNode $node
625	): void {
626		$processEmbeddedErrors = function ( string $html ) use ( $extApi, $refsData ) {
627			// Similar to processEmbeddedRefs
628			$domFragment = $extApi->htmlToDom( $html );
629			self::addEmbeddedErrors( $extApi, $refsData, $domFragment );
630			return $extApi->domToHtml( $domFragment, true, true );
631		};
632		$processBodyHtml = function ( DOMElement $n ) use ( $processEmbeddedErrors ) {
633			$dataMw = DOMDataUtils::getDataMw( $n );
634			if ( is_string( $dataMw->body->html ?? null ) ) {
635				$dataMw->body->html = $processEmbeddedErrors(
636					$dataMw->body->html
637				);
638			}
639		};
640		$child = $node->firstChild;
641		while ( $child !== null ) {
642			$nextChild = $child->nextSibling;
643			if ( $child instanceof DOMElement ) {
644				if ( DOMUtils::hasTypeOf( $child, 'mw:Extension/ref' ) ) {
645					$processBodyHtml( $child );
646					$about = $child->getAttribute( 'about' ) ?? '';
647					$errs = $refsData->embeddedErrors[$about] ?? null;
648					if ( $errs ) {
649						self::addErrorsToNode( $child, $errs );
650					}
651				} elseif ( DOMUtils::hasTypeOf( $child, 'mw:Extension/references' ) ) {
652					$processBodyHtml( $child );
653				} else {
654					$extApi->processHiddenHTMLInDataAttributes(
655						$child, $processEmbeddedErrors
656					);
657				}
658				if ( $child->hasChildNodes() ) {
659					self::addEmbeddedErrors( $extApi, $refsData, $child );
660				}
661			}
662			$child = $nextChild;
663		}
664	}
665
666	/** @inheritDoc */
667	public function sourceToDom(
668		ParsoidExtensionAPI $extApi, string $txt, array $extArgs
669	): DOMDocumentFragment {
670		$domFragment = $extApi->extTagToDOM(
671			$extArgs,
672			'',
673			$txt,
674			[
675				'wrapperTag' => 'div',
676				'parseOpts' => [ 'extTag' => 'references' ],
677			]
678		);
679
680		$refsOpts = $extApi->extArgsToArray( $extArgs ) + [
681			'group' => null,
682			'responsive' => null,
683		];
684
685		// Detect invalid parameters on the references tag
686		$knownAttributes = [ 'group', 'responsive' ];
687		foreach ( $refsOpts as $key => $value ) {
688			if ( !in_array( strtolower( (string)$key ), $knownAttributes, true ) ) {
689				$extApi->pushError( 'cite_error_references_invalid_parameters' );
690				break;
691			}
692		}
693
694		$frag = self::createReferences(
695			$extApi,
696			$domFragment,
697			$refsOpts,
698			function ( $dp ) use ( $extApi ) {
699				$dp->src = $extApi->extTag->getSource();
700				// Setting redundant info on fragment.
701				// $docBody->firstChild info feels cumbersome to use downstream.
702				if ( $extApi->extTag->isSelfClosed() ) {
703					$dp->selfClose = true;
704				}
705			}
706		);
707		$domFragment->appendChild( $frag );
708		return $domFragment;
709	}
710
711	/** @inheritDoc */
712	public function domToWikitext(
713		ParsoidExtensionAPI $extApi, DOMElement $node, bool $wrapperUnmodified
714	) {
715		$dataMw = DOMDataUtils::getDataMw( $node );
716		if ( !empty( $dataMw->autoGenerated ) && ( $dataMw->attrs->group ?? '' ) !== '' ) {
717			// Eliminate auto-inserted <references /> noise in rt-testing
718			return '';
719		} else {
720			$startTagSrc = $extApi->extStartTagToWikitext( $node );
721			if ( empty( $dataMw->body ) ) {
722				return $startTagSrc; // We self-closed this already.
723			} else {
724				if ( is_string( $dataMw->body->html ) ) {
725					$src = $extApi->htmlToWikitext(
726						[ 'extName' => $dataMw->name ],
727						$dataMw->body->html
728					);
729					return $startTagSrc . $src . '</' . $dataMw->name . '>';
730				} else {
731					$extApi->log( 'error',
732						'References body unavailable for: ' . DOMCompat::getOuterHTML( $node )
733					);
734					return ''; // Drop it!
735				}
736			}
737		}
738	}
739
740	/** @inheritDoc */
741	public function lintHandler(
742		ParsoidExtensionAPI $extApi, DOMElement $refs, callable $defaultHandler
743	): ?DOMNode {
744		// Nothing to do
745		//
746		// FIXME: Not entirely true for scenarios where the <ref> tags
747		// are defined in the references section that is itself templated.
748		//
749		// {{1x|<references>\n<ref name='x'><b>foo</ref>\n</references>}}
750		//
751		// In this example, the references tag has the right tplInfo and
752		// when the <ref> tag is processed in the body of the article where
753		// it is accessed, there is no relevant template or dsr info available.
754		//
755		// Ignoring for now.
756		return $refs->nextSibling;
757	}
758}
759