1<?php
2declare( strict_types = 1 );
3
4namespace Wikimedia\Parsoid\Ext\Cite;
5
6use DOMDocument;
7use DOMElement;
8use DOMNode;
9use stdClass;
10use Wikimedia\Parsoid\Core\DomSourceRange;
11use Wikimedia\Parsoid\Ext\DOMDataUtils;
12use Wikimedia\Parsoid\Ext\DOMUtils;
13use Wikimedia\Parsoid\Ext\ExtensionTagHandler;
14use Wikimedia\Parsoid\Ext\ParsoidExtensionAPI;
15use Wikimedia\Parsoid\Ext\PHPUtils;
16use Wikimedia\Parsoid\Ext\WTUtils;
17use Wikimedia\Parsoid\Utils\DOMCompat;
18
19class References extends ExtensionTagHandler {
20	/**
21	 * @param DOMNode $node
22	 * @return bool
23	 */
24	private static function hasRef( DOMNode $node ): bool {
25		$c = $node->firstChild;
26		while ( $c ) {
27			if ( DOMUtils::isElt( $c ) ) {
28				if ( WTUtils::isSealedFragmentOfType( $c, 'ref' ) ) {
29					return true;
30				}
31				if ( self::hasRef( $c ) ) {
32					return true;
33				}
34			}
35			$c = $c->nextSibling;
36		}
37		return false;
38	}
39
40	/**
41	 * @param ParsoidExtensionAPI $extApi
42	 * @param DOMDocument $doc
43	 * @param DOMNode|null $body
44	 * @param array $refsOpts
45	 * @param callable|null $modifyDp
46	 * @param bool $autoGenerated
47	 * @return DOMElement
48	 */
49	private static function createReferences(
50		ParsoidExtensionAPI $extApi, DOMDocument $doc, ?DOMNode $body,
51		array $refsOpts, ?callable $modifyDp, bool $autoGenerated = false
52	): DOMElement {
53		$ol = $doc->createElement( 'ol' );
54		DOMCompat::getClassList( $ol )->add( 'mw-references' );
55		DOMCompat::getClassList( $ol )->add( 'references' );
56
57		if ( $body ) {
58			DOMUtils::migrateChildren( $body, $ol );
59		}
60
61		// Support the `responsive` parameter
62		$rrOpts = $extApi->getSiteConfig()->responsiveReferences();
63		$responsiveWrap = !empty( $rrOpts['enabled'] );
64		if ( $refsOpts['responsive'] !== null ) {
65			$responsiveWrap = $refsOpts['responsive'] !== '0';
66		}
67
68		if ( $responsiveWrap ) {
69			$div = $doc->createElement( 'div' );
70			DOMCompat::getClassList( $div )->add( 'mw-references-wrap' );
71			$div->appendChild( $ol );
72			$frag = $div;
73		} else {
74			$frag = $ol;
75		}
76
77		if ( $autoGenerated ) {
78			// FIXME: This is very much trying to copy ExtensionHandler::onDocument
79			DOMUtils::addAttributes( $frag, [
80				'typeof' => 'mw:Extension/references',
81				'about' => $extApi->newAboutId()
82			] );
83			$dataMw = (object)[
84				'name' => 'references',
85				'attrs' => new stdClass,
86			];
87			// Dont emit empty keys
88			if ( $refsOpts['group'] ) {
89				$dataMw->attrs->group = $refsOpts['group'];
90			}
91			DOMDataUtils::setDataMw( $frag, $dataMw );
92		}
93
94		$dp = DOMDataUtils::getDataParsoid( $frag );
95		if ( $refsOpts['group'] ) {  // No group for the empty string either
96			$dp->group = $refsOpts['group'];
97			$ol->setAttribute( 'data-mw-group', $refsOpts['group'] );
98		}
99		if ( $modifyDp ) {
100			$modifyDp( $dp );
101		}
102
103		return $frag;
104	}
105
106	/**
107	 * @param ParsoidExtensionAPI $extApi
108	 * @param DOMElement $node
109	 * @param ReferencesData $refsData
110	 * @param ?string $referencesAboutId
111	 * @param ?string $referencesGroup
112	 */
113	private static function extractRefFromNode(
114		ParsoidExtensionAPI $extApi,
115		DOMElement $node, ReferencesData $refsData,
116		?string $referencesAboutId = null, ?string $referencesGroup = ''
117	): void {
118		$doc = $node->ownerDocument;
119		$nestedInReferences = $referencesAboutId !== null;
120
121		// This is data-parsoid from the dom fragment node that's gone through
122		// dsr computation and template wrapping.
123		$nodeDp = DOMDataUtils::getDataParsoid( $node );
124		$typeOf = $node->getAttribute( 'typeof' );
125		$isTplWrapper = DOMUtils::matchTypeOf( $node, '/^mw:Transclusion$/' );
126		$nodeType = preg_replace( '#mw:DOMFragment/sealed/ref#', '', $typeOf, 1 );
127		$contentId = $nodeDp->html;
128		$tplDmw = $isTplWrapper ? DOMDataUtils::getDataMw( $node ) : null;
129
130		// This is the <sup> that's the meat of the sealed fragment
131		$c = $extApi->getContentDOM( $contentId );
132		$cDp = DOMDataUtils::getDataParsoid( $c );
133		$refDmw = DOMDataUtils::getDataMw( $c );
134		if ( empty( $cDp->empty ) && self::hasRef( $c ) ) { // nested ref-in-ref
135			self::processRefs( $extApi, $refsData, $c );
136		}
137
138		// Use the about attribute on the wrapper with priority, since it's
139		// only added when the wrapper is a template sibling.
140		$about = $node->hasAttribute( 'about' )
141			? $node->getAttribute( 'about' )
142			: $c->getAttribute( 'about' );
143
144		// FIXME(SSS): Need to clarify semantics here.
145		// If both the containing <references> elt as well as the nested <ref>
146		// elt has a group attribute, what takes precedence?
147		$group = $refDmw->attrs->group ?? $referencesGroup ?? '';
148
149		// NOTE: This will have been trimmed in Utils::getExtArgInfo()'s call
150		// to TokenUtils::kvToHash() and ExtensionHandler::normalizeExtOptions()
151		$refName = $refDmw->attrs->name ?? '';
152
153		// Add ref-index linkback
154		$linkBack = $doc->createElement( 'sup' );
155
156		$ref = $refsData->add(
157			$extApi, $group, $refName, $about, $nestedInReferences, $linkBack
158		);
159
160		$errs = [];
161
162		// Check for missing content
163		$missingContent = ( !empty( $cDp->empty ) || trim( $refDmw->body->extsrc ) === '' );
164
165		if ( $missingContent ) {
166			// Check for missing name and content to generate error code
167			if ( $refName === '' ) {
168				if ( !empty( $cDp->selfClose ) ) {
169					$errs[] = [ 'key' => 'cite_error_ref_no_key' ];
170				} else {
171					$errs[] = [ 'key' => 'cite_error_ref_no_input' ];
172				}
173			}
174
175			if ( !empty( $cDp->selfClose ) ) {
176				unset( $refDmw->body );
177			} else {
178				$refDmw->body = (object)[ 'html' => $refDmw->body->extsrc ];
179			}
180		} else {
181			// If there are multiple <ref>s with the same name, but different content,
182			// the content of the first <ref> shows up in the <references> section.
183			// in order to ensure lossless RT-ing for later <refs>, we have to record
184			// HTML inline for all of them.
185			$html = '';
186			$contentDiffers = false;
187			if ( $ref->hasMultiples ) {
188				$html = $extApi->domToHtml( $c, true, true );
189				$c = null; // $c is being release in the call above
190				$contentDiffers = $html !== $ref->cachedHtml;
191				if ( $contentDiffers ) {
192					// TODO: Since this error is being placed on the ref, the
193					// key should arguably be "cite_error_ref_duplicate_key"
194					$errs[] = [ 'key' => 'cite_error_references_duplicate_key' ];
195				}
196			}
197			if ( $contentDiffers ) {
198				$refDmw->body = (object)[ 'html' => $html ];
199			} else {
200				$refDmw->body = (object)[ 'id' => 'mw-reference-text-' . $ref->target ];
201			}
202		}
203
204		$lastLinkback = $ref->linkbacks[count( $ref->linkbacks ) - 1] ?? null;
205		DOMUtils::addAttributes( $linkBack, [
206				'about' => $about,
207				'class' => 'mw-ref',
208				'id' => $nestedInReferences ? null : ( $ref->name ? $lastLinkback : $ref->id ),
209				'rel' => 'dc:references',
210				'typeof' => $nodeType
211			]
212		);
213		DOMUtils::addTypeOf( $linkBack, 'mw:Extension/ref' );
214		if ( count( $errs ) > 0 ) {
215			DOMUtils::addTypeOf( $linkBack, 'mw:Error' );
216		}
217
218		$dataParsoid = new stdClass;
219		if ( isset( $nodeDp->src ) ) {
220			$dataParsoid->src = $nodeDp->src;
221		}
222		if ( isset( $nodeDp->dsr ) ) {
223			$dataParsoid->dsr = $nodeDp->dsr;
224		}
225		if ( isset( $nodeDp->pi ) ) {
226			$dataParsoid->pi = $nodeDp->pi;
227		}
228		DOMDataUtils::setDataParsoid( $linkBack, $dataParsoid );
229
230		$dmw = $isTplWrapper ? $tplDmw : $refDmw;
231		if ( count( $errs ) > 0 ) {
232			if ( is_array( $dmw->errors ?? null ) ) {
233				$errs = array_merge( $dmw->errors, $errs );
234			}
235			$dmw->errors = $errs;
236		}
237		DOMDataUtils::setDataMw( $linkBack, $dmw );
238
239		// refLink is the link to the citation
240		$refLink = $doc->createElement( 'a' );
241		DOMUtils::addAttributes( $refLink, [
242				'href' => $extApi->getPageUri() . '#' . $ref->target,
243				'style' => 'counter-reset: mw-Ref ' . $ref->groupIndex . ';',
244			]
245		);
246		if ( $ref->group ) {
247			$refLink->setAttribute( 'data-mw-group', $ref->group );
248		}
249
250		// refLink-span which will contain a default rendering of the cite link
251		// for browsers that don't support counters
252		$refLinkSpan = $doc->createElement( 'span' );
253		$refLinkSpan->setAttribute( 'class', 'mw-reflink-text' );
254		$refLinkSpan->appendChild( $doc->createTextNode(
255			'[' . ( $ref->group ? $ref->group . ' ' : '' ) . $ref->groupIndex . ']'
256			)
257		);
258		$refLink->appendChild( $refLinkSpan );
259		$linkBack->appendChild( $refLink );
260
261		$node->parentNode->replaceChild( $linkBack, $node );
262
263		// Keep the first content to compare multiple <ref>s with the same name.
264		if ( $ref->contentId === null && !$missingContent ) {
265			$ref->contentId = $contentId;
266			$ref->dir = strtolower( $refDmw->attrs->dir ?? '' );
267		}
268	}
269
270	/**
271	 * @param ParsoidExtensionAPI $extApi
272	 * @param DOMElement $refsNode
273	 * @param ReferencesData $refsData
274	 * @param bool $autoGenerated
275	 */
276	private static function insertReferencesIntoDOM(
277		ParsoidExtensionAPI $extApi, DOMElement $refsNode,
278		ReferencesData $refsData, bool $autoGenerated = false
279	): void {
280		$isTplWrapper = DOMUtils::matchTypeOf( $refsNode, '/^mw:Transclusion$/' );
281		$dp = DOMDataUtils::getDataParsoid( $refsNode );
282		$group = $dp->group ?? '';
283		$refGroup = $refsData->getRefGroup( $group );
284
285		// Iterate through the named ref list for refs without content and
286		// back-patch typeof and data-mw error information into named ref
287		// instances without content
288		// FIXME: This doesn't update the refs found while processEmbeddedRefs
289		if ( $refGroup ) {
290			foreach ( $refGroup->indexByName as $ref ) {
291				if ( $ref->contentId === null ) {
292					foreach ( $ref->nodes as $linkBack ) {
293						DOMUtils::addTypeOf( $linkBack, 'mw:Error' );
294						$dmw = DOMDataUtils::getDataMw( $linkBack );
295						// TODO: Since this error is being placed on the ref,
296						// the key should arguably be "cite_error_ref_no_text"
297						$errs = [ [ 'key' => 'cite_error_references_no_text' ] ];
298						if ( is_array( $dmw->errors ?? null ) ) {
299							$errs = array_merge( $dmw->errors, $errs );
300						}
301						$dmw->errors = $errs;
302					}
303				}
304			}
305		}
306
307		$nestedRefsHTML = array_map(
308			function ( DOMElement $sup ) use ( $extApi ) {
309				return $extApi->domToHtml( $sup, false, true ) . "\n";
310			},
311			DOMCompat::querySelectorAll(
312				$refsNode, 'sup[typeof~=\'mw:Extension/ref\']'
313			)
314		);
315
316		if ( !$isTplWrapper ) {
317			$dataMw = DOMDataUtils::getDataMw( $refsNode );
318			// Mark this auto-generated so that we can skip this during
319			// html -> wt and so that clients can strip it if necessary.
320			if ( $autoGenerated ) {
321				$dataMw->autoGenerated = true;
322			} elseif ( count( $nestedRefsHTML ) > 0 ) {
323				$dataMw->body = (object)[ 'html' => "\n" . implode( $nestedRefsHTML ) ];
324			} elseif ( empty( $dp->selfClose ) ) {
325				$dataMw->body = PHPUtils::arrayToObject( [ 'html' => '' ] );
326			} else {
327				unset( $dataMw->body );
328			}
329			// @phan-suppress-next-line PhanTypeObjectUnsetDeclaredProperty
330			unset( $dp->selfClose );
331		}
332
333		// Deal with responsive wrapper
334		if ( DOMCompat::getClassList( $refsNode )->contains( 'mw-references-wrap' ) ) {
335			$rrOpts = $extApi->getSiteConfig()->responsiveReferences();
336			if ( $refGroup && count( $refGroup->refs ) > $rrOpts['threshold'] ) {
337				DOMCompat::getClassList( $refsNode )->add( 'mw-references-columns' );
338			}
339			$refsNode = $refsNode->firstChild;
340		}
341
342		// Remove all children from the references node
343		//
344		// Ex: When {{Reflist}} is reused from the cache, it comes with
345		// a bunch of references as well. We have to remove all those cached
346		// references before generating fresh references.
347		while ( $refsNode->firstChild ) {
348			$refsNode->removeChild( $refsNode->firstChild );
349		}
350
351		if ( $refGroup ) {
352			foreach ( $refGroup->refs as $ref ) {
353				$refGroup->renderLine( $extApi, $refsNode, $ref );
354			}
355		}
356
357		// Remove the group from refsData
358		$refsData->removeRefGroup( $group );
359	}
360
361	/**
362	 * Process `<ref>`s left behind after the DOM is fully processed.
363	 * We process them as if there was an implicit `<references />` tag at
364	 * the end of the DOM.
365	 *
366	 * @param ParsoidExtensionAPI $extApi
367	 * @param ReferencesData $refsData
368	 * @param DOMNode $node
369	 */
370	public static function insertMissingReferencesIntoDOM(
371		ParsoidExtensionAPI $extApi, ReferencesData $refsData, DOMNode $node
372	): void {
373		$doc = $node->ownerDocument;
374
375		foreach ( $refsData->getRefGroups() as $groupName => $refsGroup ) {
376			$frag = self::createReferences(
377				$extApi,
378				$doc,
379				null,
380				[
381					// Force string cast here since in the foreach above, $groupName
382					// is an array key. In that context, number-like strings are
383					// silently converted to a numeric value!
384					// Ex: In <ref group="2" />, the "2" becomes 2 in the foreach
385					'group' => (string)$groupName,
386					'responsive' => null,
387				],
388				function ( $dp ) use ( $extApi ) {
389					// The new references come out of "nowhere", so to make selser work
390					// properly, add a zero-sized DSR pointing to the end of the document.
391					$content = $extApi->getPageConfig()->getRevisionContent()->getContent( 'main' );
392					$contentLength = strlen( $content );
393					$dp->dsr = new DomSourceRange( $contentLength, $contentLength, 0, 0 );
394				},
395				true
396			);
397
398			// Add a \n before the <ol> so that when serialized to wikitext,
399			// each <references /> tag appears on its own line.
400			$node->appendChild( $doc->createTextNode( "\n" ) );
401			$node->appendChild( $frag );
402
403			self::insertReferencesIntoDOM( $extApi, $frag, $refsData, true );
404		}
405	}
406
407	/**
408	 * @param ParsoidExtensionAPI $extApi
409	 * @param ReferencesData $refsData
410	 * @param string $str
411	 * @return string
412	 */
413	private static function processEmbeddedRefs(
414		ParsoidExtensionAPI $extApi, ReferencesData $refsData, string $str
415	): string {
416		$domBody = DOMCompat::getBody( $extApi->htmlToDom( $str ) );
417		self::processRefs( $extApi, $refsData, $domBody );
418		return $extApi->domToHtml( $domBody, true, true );
419	}
420
421	/**
422	 * @param ParsoidExtensionAPI $extApi
423	 * @param ReferencesData $refsData
424	 * @param DOMElement $node
425	 */
426	public static function processRefs(
427		ParsoidExtensionAPI $extApi, ReferencesData $refsData, DOMElement $node
428	): void {
429		$child = $node->firstChild;
430		while ( $child !== null ) {
431			$nextChild = $child->nextSibling;
432			if ( $child instanceof DOMElement ) {
433				if ( WTUtils::isSealedFragmentOfType( $child, 'ref' ) ) {
434					self::extractRefFromNode( $extApi, $child, $refsData );
435				} elseif ( DOMUtils::matchTypeOf( $child, '#^mw:Extension/references$#' ) ) {
436					$referencesId = $child->getAttribute( 'about' ) ?? '';
437					$referencesGroup = DOMDataUtils::getDataParsoid( $child )->group ?? null;
438					self::processRefsInReferences(
439						$extApi,
440						$refsData,
441						$child,
442						$referencesId,
443						$referencesGroup
444					);
445					self::insertReferencesIntoDOM( $extApi, $child, $refsData, false );
446				} else {
447					// Look for <ref>s embedded in data attributes
448					$extApi->processHiddenHTMLInDataAttributes( $child,
449						function ( string $html ) use ( $extApi, $refsData ) {
450							return self::processEmbeddedRefs( $extApi, $refsData, $html );
451						}
452					);
453
454					if ( $child->hasChildNodes() ) {
455						self::processRefs( $extApi, $refsData, $child );
456					}
457				}
458			}
459			$child = $nextChild;
460		}
461	}
462
463	/**
464	 * This handles wikitext like this:
465	 * ```
466	 *   <references> <ref>foo</ref> </references>
467	 *   <references> <ref>bar</ref> </references>
468	 * ```
469	 *
470	 * @param ParsoidExtensionAPI $extApi
471	 * @param ReferencesData $refsData
472	 * @param DOMElement $node
473	 * @param string $referencesId
474	 * @param string|null $referencesGroup
475	 */
476	private static function processRefsInReferences(
477		ParsoidExtensionAPI $extApi, ReferencesData $refsData,
478		DOMElement $node, string $referencesId, ?string $referencesGroup
479	): void {
480		$child = $node->firstChild;
481		while ( $child !== null ) {
482			$nextChild = $child->nextSibling;
483			if ( $child instanceof DOMElement ) {
484				if ( WTUtils::isSealedFragmentOfType( $child, 'ref' ) ) {
485					self::extractRefFromNode(
486						$extApi,
487						$child,
488						$refsData,
489						$referencesId,
490						$referencesGroup
491					);
492				} elseif ( $child->hasChildNodes() ) {
493					self::processRefsInReferences(
494						$extApi,
495						$refsData,
496						$child,
497						$referencesId,
498						$referencesGroup
499					);
500				}
501			}
502			$child = $nextChild;
503		}
504	}
505
506	/** @inheritDoc */
507	public function sourceToDom(
508		ParsoidExtensionAPI $extApi, string $txt, array $extArgs
509	): DOMDocument {
510		$doc = $extApi->extTagToDOM(
511			$extArgs,
512			'',
513			$txt,
514			[
515				'wrapperTag' => 'div',
516				'parseOpts' => [ 'extTag' => 'references' ],
517			]
518		);
519
520		$refsOpts = $extApi->extArgsToArray( $extArgs ) + [
521			'group' => null,
522			'responsive' => null,
523		];
524
525		$docBody = DOMCompat::getBody( $doc );
526
527		$frag = self::createReferences(
528			$extApi,
529			$doc,
530			$docBody,
531			$refsOpts,
532			function ( $dp ) use ( $extApi ) {
533				$dp->src = $extApi->getExtSource();
534				// Setting redundant info on fragment.
535				// $docBody->firstChild info feels cumbersome to use downstream.
536				if ( $extApi->isSelfClosedExtTag() ) {
537					$dp->selfClose = true;
538				}
539			}
540		);
541		DOMCompat::getBody( $doc )->appendChild( $frag );
542		return $doc;
543	}
544
545	/** @inheritDoc */
546	public function domToWikitext(
547		ParsoidExtensionAPI $extApi, DOMElement $node, bool $wrapperUnmodified
548	) {
549		$dataMw = DOMDataUtils::getDataMw( $node );
550		if ( !empty( $dataMw->autoGenerated ) && $extApi->rtTestMode() ) {
551			// Eliminate auto-inserted <references /> noise in rt-testing
552			return '';
553		} else {
554			$startTagSrc = $extApi->extStartTagToWikitext( $node );
555			if ( empty( $dataMw->body ) ) {
556				return $startTagSrc; // We self-closed this already.
557			} else {
558				if ( is_string( $dataMw->body->html ) ) {
559					$src = $extApi->htmlToWikitext(
560						[ 'extName' => $dataMw->name ],
561						$dataMw->body->html
562					);
563					return $startTagSrc . $src . '</' . $dataMw->name . '>';
564				} else {
565					$extApi->log( 'error',
566						'References body unavailable for: ' . DOMCompat::getOuterHTML( $node )
567					);
568					return ''; // Drop it!
569				}
570			}
571		}
572	}
573
574	/** @inheritDoc */
575	public function lintHandler(
576		ParsoidExtensionAPI $extApi, DOMElement $refs, callable $defaultHandler
577	): ?DOMNode {
578		// Nothing to do
579		//
580		// FIXME: Not entirely true for scenarios where the <ref> tags
581		// are defined in the references section that is itself templated.
582		//
583		// {{1x|<references>\n<ref name='x'><b>foo</ref>\n</references>}}
584		//
585		// In this example, the references tag has the right tplInfo and
586		// when the <ref> tag is processed in the body of the article where
587		// it is accessed, there is no relevant template or dsr info available.
588		//
589		// Ignoring for now.
590		return $refs->nextSibling;
591	}
592}
593