1<?php
2declare( strict_types = 1 );
3
4namespace Wikimedia\Parsoid\Html2Wt;
5
6use Composer\Semver\Semver;
7use DOMDocumentFragment;
8use DOMElement;
9use DOMNode;
10use stdClass;
11use Wikimedia\Assert\Assert;
12use Wikimedia\Parsoid\Config\Env;
13use Wikimedia\Parsoid\Core\SelserData;
14use Wikimedia\Parsoid\Ext\ParsoidExtensionAPI;
15use Wikimedia\Parsoid\Html2Wt\ConstrainedText\ConstrainedText;
16use Wikimedia\Parsoid\Utils\DOMDataUtils;
17use Wikimedia\Parsoid\Utils\DOMUtils;
18use Wikimedia\Parsoid\Utils\PHPUtils;
19use Wikimedia\Parsoid\Utils\Utils;
20use Wikimedia\Parsoid\Utils\WTUtils;
21
22/**
23 * State object for the wikitext serializers.
24 */
25class SerializerState {
26
27	/**
28	 * Regexp for checking if what we have consumed wikimarkup that has special meaning at the
29	 * beginning of the line, and is indeed at the beginning of the line (modulo comments and
30	 * other ignored elements).
31	 *
32	 * @return string
33	 */
34	private function solWikitextRegexp(): string {
35		static $solWikitextRegexp = null;
36		if ( $solWikitextRegexp === null ) {
37			$sol = PHPUtils::reStrip(
38				$this->env->getSiteConfig()->solTransparentWikitextNoWsRegexp(),
39				'@'
40			);
41			$solWikitextRegexp = '@' .
42				'^((?:' . $sol . '|' .
43				# SSS FIXME: What about onlyinclude and noinclude?
44				'<includeonly>.*?</includeonly>' .
45				')*)' .
46				'([\ \*#:;{\|!=].*)$' .
47				'@D';
48		}
49		return $solWikitextRegexp;
50	}
51
52	/**
53	 * Regexp for checking whether we are at the start of the line (modulo comments and
54	 * other ignored elements).
55	 *
56	 * @return string
57	 */
58	private function solRegexp(): string {
59		static $solRegexp = null;
60		if ( $solRegexp === null ) {
61			$sol = PHPUtils::reStrip(
62				$this->env->getSiteConfig()->solTransparentWikitextNoWsRegexp(),
63				'@'
64			);
65			$solRegexp = '@' .
66				'(^|\\n)' .
67				'(' .
68				# SSS FIXME: What about onlyinclude and noinclude?
69				'<includeonly>.*?</includeonly>' .
70				'|' . $sol .
71				')*$' .
72				'@D';
73		}
74		return $solRegexp;
75	}
76
77	/**
78	 * Separator information:
79	 * - constraints (array<array|int>|null): min/max number of newlines
80	 * - src (string|null): collected separator text from DOM text/comment nodes
81	 * - lastSourceNode (?DOMNode): Seems to be bookkeeping to make sure we don't reuse
82	 *     original separators when `emitChunk` is called
83	 *     consecutively on the same node.  However, it also
84	 *     differs from `state.prevNode` in that it only gets
85	 *     updated when a node calls `emitChunk` so that nodes
86	 *     serializing `justChildren` don't mix up `buildSep`.
87	 * PORT-FIXME: could use a dedicated class
88	 * @var stdClass
89	 */
90	public $sep;
91
92	/**
93	 * Is the serializer at the start of a new wikitext line?
94	 * @var bool
95	 */
96	public $onSOL = true;
97
98	/**
99	 * True when wts kicks off, false after the first char has been output
100	 * SSS FIXME: Can this be done away with in some way?
101	 * @var bool
102	 */
103	public $atStartOfOutput = true;
104
105	/**
106	 * Is the serializer currently handling link content (children of `<a>`)?
107	 * @var bool
108	 */
109	public $inLink = false;
110
111	/**
112	 * Is the serializer currently handling caption content?
113	 * @var bool
114	 */
115	public $inCaption = false;
116
117	/**
118	 * Is the serializer currently handling an indent-pre tag?
119	 * @var bool
120	 */
121	public $inIndentPre = false;
122
123	/**
124	 * Is the serializer currently handling a html-pre tag?
125	 * @var bool
126	 */
127	public $inHTMLPre = false;
128
129	/**
130	 * Is the serializer currently handling a tag that the PHP parser
131	 * treats as a block tag?
132	 * @var bool
133	 */
134	public $inPHPBlock = false;
135
136	/**
137	 * Is the serializer being invoked recursively to serialize a
138	 * template-generated attribute (via `WSP.getAttributeValue`'s
139	 * template handling).  If so, we should suppress some
140	 * serialization escapes, like autolink protection, since
141	 * these are not valid for attribute values.
142	 * @var bool
143	 */
144	public $inAttribute = false;
145
146	/**
147	 * Is the serializer currently processing a subtree that has been
148	 * modified compared to original content (ex: via VE / CX)?
149	 *
150	 * @var bool
151	 */
152	public $inModifiedContent;
153
154	/**
155	 * Did we introduce nowikis for indent-pre protection?
156	 * If yes, we might run a post-pass to strip useless ones.
157	 * @var bool
158	 */
159	public $hasIndentPreNowikis = false;
160
161	/**
162	 * Did we introduce nowikis to preserve quote semantics?
163	 * If yes, we might run a post-pass to strip useless ones.
164	 * @var bool
165	 */
166	public $hasQuoteNowikis = false;
167
168	/**
169	 * Did we introduce `<nowiki />`s?
170	 * If yes, we do a postpass to remove unnecessary trailing ones.
171	 * @var bool
172	 */
173	public $hasSelfClosingNowikis = false;
174
175	/**
176	 * Did we introduce nowikis around `=.*=` text?
177	 * If yes, we do a postpass to remove unnecessary escapes.
178	 * @var bool
179	 */
180	public $hasHeadingEscapes = false;
181
182	/**
183	 * Records the nesting level of wikitext tables
184	 * @var int
185	 */
186	public $wikiTableNesting = 0;
187
188	/**
189	 * Stack of wikitext escaping handlers -- these handlers are responsible
190	 * for smart escaping when the surrounding wikitext context is known.
191	 * @var callable[] See {@link serializeChildren()}
192	 */
193	public $wteHandlerStack = [];
194
195	/**
196	 * This array is used by the wikitext escaping algorithm -- represents
197	 * a "single line" of output wikitext as represented by a block node in
198	 * the DOM.
199	 * - firstNode (?DOMNode): first DOM node processed on this line
200	 * - text (string): output so far from all nodes on the current line
201	 * - chunks (ConstrainedText[]): list of chunks comprising the current line
202	 * @var stdClass
203	 * XXX: replace with output buffering per line
204	 * PORT-FIXME: could use a dedicated class
205	 */
206	public $currLine;
207
208	/**
209	 * Stack used to enforce single-line context
210	 * @var SingleLineContext
211	 */
212	public $singleLineContext;
213
214	/**
215	 * Text to be emitted at the start of file, for redirects
216	 * @var string|null
217	 */
218	public $redirectText = null;
219
220	/** @var WikitextSerializer */
221	public $serializer;
222
223	/** @var ParsoidExtensionAPI */
224	public $extApi;
225
226	/** @var string The serialized output */
227	public $out = '';
228
229	/**
230	 * Whether to use heuristics to determine if a list item, heading, table cell, etc.
231	 * should have whitespace inserted after the "*#=|!" wikitext chars? This is normally
232	 * true by default, but not so if HTML content version is older than 1.7.0.
233	 * In practice, we are now at version 2.1, but Flow stores HTML, so till Flow migrates
234	 * all its content over to a later version, we need a boolean flag.
235	 * @var bool
236	 */
237	public $useWhitespaceHeuristics;
238
239	/**
240	 * Are we in selective serialization mode?
241	 * @see SelectiveSerializer
242	 * @var bool
243	 */
244	public $selserMode;
245
246	/** @var SelserData */
247	private $selserData;
248
249	/**
250	 * If in selser mode, while processing a node, do we know if
251	 * its previous node has not been modified in an edit?
252	 * @var bool
253	 */
254	public $prevNodeUnmodified;
255
256	/**
257	 * If in selser mode, while processing a node, do we know if
258	 * it has not been modified in an edit?
259	 * @var bool
260	 */
261	public $currNodeUnmodified;
262
263	/**
264	 * Should we run the wikitext escaping code on the wikitext chunk
265	 * that will be emitted? True unless we are in HTML <pre>.
266	 * @var bool
267	 */
268	public $escapeText = false;
269
270	/**
271	 * Used as fast patch for special protected characters in WikitextEscapeHandlers and
272	 * comes from LanguageVariantHandler
273	 * @var string|null
274	 */
275	public $protect;
276
277	/** @var Separators */
278	public $separators;
279
280	/** @var Env */
281	private $env;
282
283	/** @var DOMElement */
284	private $prevNode;
285
286	/**
287	 * Log prefix to use in trace output
288	 * @var string
289	 */
290	private $logPrefix = 'OUT:';
291
292	public $haveTrimmedWsDSR = false;
293
294	/**
295	 * @param WikitextSerializer $serializer
296	 * @param array $options
297	 */
298	public function __construct( WikitextSerializer $serializer, array $options = [] ) {
299		$this->env = $serializer->env;
300		$this->serializer = $serializer;
301		$this->extApi = new ParsoidExtensionAPI( $this->env, [ 'html2wt' => [ 'state' => $this ] ] );
302		foreach ( $options as $name => $option ) {
303			// PORT-FIXME validate
304			if ( !( $option instanceof Env ) ) {
305				$this->$name = Utils::clone( $option );
306			}
307		}
308		$this->resetCurrLine( null );
309		$this->singleLineContext = new SingleLineContext();
310		$this->resetSep();
311		$this->haveTrimmedWsDSR = Semver::satisfies( $this->env->getInputContentVersion(), '>=2.1.1' );
312		$this->separators = new Separators( $this->env, $this );
313	}
314
315	/**
316	 * @note Porting note: this replaces direct access
317	 * @return Env
318	 */
319	public function getEnv(): Env {
320		return $this->env;
321	}
322
323	/**
324	 * Initialize a few boolean flags based on serialization mode.
325	 * FIXME: Ideally, this should be private. Requires shuffing around
326	 * where SerializerState is constructed so that $selserMode is known
327	 * at the time of construction.
328	 * @private for use by WikitextSerializer only
329	 * @param bool $selserMode Are we running selective serialization?
330	 */
331	public function initMode( bool $selserMode ): void {
332		$this->useWhitespaceHeuristics =
333			Semver::satisfies( $this->env->getInputContentVersion(), '>=1.7.0' );
334		$this->selserMode = $selserMode;
335	}
336
337	/**
338	 * Appends the seperator source and updates the SOL state if necessary.
339	 * @param string $src
340	 * @param DOMNode $node
341	 */
342	public function appendSep( string $src, DOMNode $node ): void {
343		$this->sep->src = ( $this->sep->src ?: '' ) . $src;
344		$this->sepIntroducedSOL( $src, $node );
345	}
346
347	/**
348	 * Cycle the state after processing a node.
349	 * @param DOMNode $node
350	 */
351	public function updateSep( DOMNode $node ): void {
352		$this->sep->lastSourceNode = $node;
353	}
354
355	private function resetSep() {
356		$this->sep = PHPUtils::arrayToObject( [
357			'constraints' => null,
358			'src' => null,
359			'lastSourceNode' => null,
360		] );
361	}
362
363	/**
364	 * Reset the current line state.
365	 * @param ?DOMNode $node
366	 */
367	private function resetCurrLine( ?DOMNode $node ): void {
368		$this->currLine = (object)[
369			'text' => '',
370			'chunks' => [],
371			'firstNode' => $node
372		];
373	}
374
375	/**
376	 * Process and emit a line of ConstrainedText chunks, adjusting chunk boundaries as necessary.
377	 * (Start of line and end of line are always safe for ConstrainedText chunks, so we don't need
378	 * to buffer more than the last line.)
379	 */
380	private function flushLine(): void {
381		$this->out .= ConstrainedText::escapeLine( $this->currLine->chunks );
382		$this->currLine->chunks = [];
383	}
384
385	/**
386	 * Extracts a subset of the page source bound by the supplied indices.
387	 * @param int $start Start offset, in bytes
388	 * @param int $end End offset, in bytes
389	 * @return string|null
390	 */
391	public function getOrigSrc( int $start, int $end ): ?string {
392		Assert::invariant( $this->selserMode, 'SerializerState::$selserMode must be set' );
393		if (
394			$start <= $end &&
395			// FIXME: Having a $start greater than the source length is
396			// probably a canary for corruption.  Maybe we should be throwing
397			// here instead.  See T240053
398			$start <= strlen( $this->selserData->oldText )
399		) {
400			return substr( $this->selserData->oldText, $start, $end - $start );
401		} else {
402			return null;
403		}
404	}
405
406	/**
407	 * Like it says on the tin.
408	 * @param DOMNode $node
409	 */
410	public function updateModificationFlags( DOMNode $node ): void {
411		$this->prevNodeUnmodified = $this->currNodeUnmodified;
412		$this->currNodeUnmodified = false;
413		$this->prevNode = $node;
414	}
415
416	/**
417	 * Separators put us in SOL state.
418	 * @param string $sep
419	 * @param DOMNode $node
420	 */
421	private function sepIntroducedSOL( string $sep, DOMNode $node ): void {
422		// Don't get tripped by newlines in comments!  Be wary of nowikis added
423		// by makeSepIndentPreSafe on the last line.
424		$nonCommentSep = preg_replace( Utils::COMMENT_REGEXP, '', $sep );
425		if ( substr( $nonCommentSep, -1 ) === "\n" ) {
426			// Since we are stashing away newlines for emitting
427			// before the next element, we are in SOL state wrt
428			// the content of that next element.
429			//
430			// FIXME: The only serious caveat is if all these newlines
431			// will get stripped out in the context of any parent node
432			// that suppress newlines (ex: <li> nodes that are forcibly
433			// converted to non-html wikitext representation -- newlines
434			// will get suppressed in those context). We currently don't
435			// handle arbitrary HTML which cause these headaches. And,
436			// in any case, we might decide to emit such HTML as native
437			// HTML to avoid these problems. To be figured out later when
438			// it is a real issue.
439			$this->onSOL = true;
440		}
441
442		if ( preg_match( '/\n/', $nonCommentSep ) ) {
443			// process escapes in our full line
444			$this->flushLine();
445			$this->resetCurrLine( $node );
446		}
447	}
448
449	/**
450	 * Accumulates chunks on the current line.
451	 * @param ConstrainedText $chunk
452	 * @param string $logPrefix
453	 */
454	private function pushToCurrLine( ConstrainedText $chunk, string $logPrefix ) {
455		// Emitting text that has not been escaped
456		$this->currLine->text .= $chunk->text;
457
458		$this->currLine->chunks[] = $chunk;
459
460		$this->serializer->trace( '--->', $logPrefix, function () use ( $chunk ) {
461			return PHPUtils::jsonEncode( $chunk->text );
462		} );
463	}
464
465	/**
466	 * Pushes the seperator to the current line and resets the separator state.
467	 * @param string $sep
468	 * @param DOMNode $node
469	 * @param string $debugPrefix
470	 */
471	private function emitSep( string $sep, DOMNode $node, string $debugPrefix ): void {
472		$sep = ConstrainedText::cast( $sep, $node );
473
474		// Replace newlines if we're in a single-line context
475		if ( $this->singleLineContext->enforced() ) {
476			$sep->text = preg_replace( '/\n/', ' ', $sep->text );
477		}
478
479		$this->pushToCurrLine( $sep, $debugPrefix );
480		$this->sepIntroducedSOL( $sep->text, $node );
481
482		// Reset separator state
483		$this->resetSep();
484		$this->updateSep( $node );
485	}
486
487	/**
488	 * Determines if we can use the original seperator for this node or if we
489	 * need to build one based on its constraints, and then emits it.
490	 *
491	 * @param DOMNode $node
492	 */
493	private function emitSepForNode( DOMNode $node ): void {
494		/* When block nodes are deleted, the deletion affects whether unmodified
495		 * newline separators between a pair of unmodified P tags can be reused.
496		 *
497		 * Example:
498		 * ```
499		 * Original WT  : "<div>x</div>foo\nbar"
500		 * Original HTML: "<div>x</div><p>foo</p>\n<p>bar</p>"
501		 * Edited HTML  : "<p>foo</p>\n<p>bar</p>"
502		 * Annotated DOM: "<mw:DiffMarker is-block><p>foo</p>\n<p>bar</p>"
503		 * Expected WT  : "foo\n\nbar"
504		 * ```
505		 *
506		 * Note the additional newline between "foo" and "bar" even though originally,
507		 * there was just a single newline.
508		 *
509		 * So, even though the two P tags and the separator between them is
510		 * unmodified, it is insufficient to rely on just that. We have to look at
511		 * what has happened on the two wikitext lines onto which the two P tags
512		 * will get serialized.
513		 *
514		 * Now, if you check the code for `nextToDeletedBlockNodeInWT`, that code is
515		 * not really looking at ALL the nodes before/after the nodes that could
516		 * serialize onto the wikitext lines. It is looking at the immediately
517		 * adjacent nodes, i.e. it is not necessary to look if a block-tag was
518		 * deleted 2 or 5 siblings away. If we had to actually examine all of those,
519		 * nodes, this would get very complex, and it would be much simpler to just
520		 * discard the original separators => potentially lots of dirty diffs.
521		 *
522		 * To understand why it is sufficient (for correctness) to examine just
523		 * the immediately adjacent nodes, let us look at an additional example.
524		 * ```
525		 * Original WT  : "a<div>b</div>c<div>d</div>e\nf"
526		 * Original HTML: "<p>a</p><div>b</div><p>c</p><div>d</div><p>e</p>\n<p>f</p>"
527		 * ```
528		 * Note how `<block>` tags and `<p>` tags interleave in the HTML. This would be
529		 * the case always no matter how much inline content showed up between the
530		 * block tags in wikitext. If the b-`<div>` was deleted, we don't care
531		 * about it, since we still have the d-`<div>` before the P tag that preserves
532		 * the correctness of the single `"\n"` separator. If the d-`<div>` was deleted,
533		 * we conservatively ignore the original separator and let normal P-P constraints
534		 * take care of it. At worst, we might generate a dirty diff in this scenario. */
535		$again = ( $node === $this->sep->lastSourceNode );
536		$origSepUsable = !$again &&
537			(
538				// first-content-node of <body> ($this->prevNode)
539				(
540					DOMUtils::isBody( $this->prevNode ) &&
541					$node->parentNode === $this->prevNode
542				)
543				||
544				// unmodified sibling node of $this->prevNode
545				(
546					$this->prevNode && $this->prevNodeUnmodified &&
547					$node->parentNode === $this->prevNode->parentNode &&
548					!WTSUtils::nextToDeletedBlockNodeInWT( $this->prevNode, true )
549				)
550			) &&
551			$this->currNodeUnmodified && !WTSUtils::nextToDeletedBlockNodeInWT( $node, false );
552
553		$origSep = null;
554		if ( $origSepUsable ) {
555			if ( DOMUtils::isElt( $this->prevNode ) && DOMUtils::isElt( $node ) ) {
556				'@phan-var DOMElement $node';/** @var DOMElement $node */
557				$origSep = $this->getOrigSrc(
558					// <body> won't have DSR in body_only scenarios
559					( DOMUtils::isBody( $this->prevNode ) ?
560						0 : DOMDataUtils::getDataParsoid( $this->prevNode )->dsr->end ),
561					DOMDataUtils::getDataParsoid( $node )->dsr->start
562				);
563			} elseif ( $this->sep->src && WTSUtils::isValidSep( $this->sep->src ) ) {
564				// We don't know where '$this->sep->src' comes from. So, reuse it
565				// only if it is a valid separator string.
566				$origSep = $this->sep->src;
567			}
568		}
569
570		if ( $origSep !== null ) {
571			$this->emitSep( $origSep, $node, 'ORIG-SEP:' );
572		} else {
573			$sep = $this->separators->buildSep( $node );
574			$this->emitSep( $sep ?: '', $node, 'SEP:' );
575		}
576	}
577
578	/**
579	 * Recovers and emits any trimmed whitespace for $node
580	 * @param DOMNode $node
581	 * @param bool $leading
582	 *   if true, trimmed leading whitespace is emitted
583	 *   if false, trimmed railing whitespace is emitted
584	 * @return string|null
585	 */
586	public function recoverTrimmedWhitespace( DOMNode $node, bool $leading ): ?string {
587		$sep = $this->separators->recoverTrimmedWhitespace( $node, $leading );
588		$this->serializer->trace( '--->', "TRIMMED-SEP:", function () use ( $sep ) {
589			return PHPUtils::jsonEncode( $sep );
590		} );
591		return $sep;
592	}
593
594	/**
595	 * Pushes the chunk to the current line.
596	 * @param ConstrainedText|string $res
597	 * @param DOMNode $node
598	 */
599	public function emitChunk( $res, DOMNode $node ): void {
600		$res = ConstrainedText::cast( $res, $node );
601
602		// Replace newlines if we're in a single-line context
603		if ( $this->singleLineContext->enforced() ) {
604			$res->text = preg_replace( '/\n/', ' ', $res->text );
605		}
606
607		// Emit separator first
608		if ( $res->noSep ) {
609			/* skip separators for internal tokens from SelSer */
610			if ( $this->onSOL ) {
611				// process escapes in our full line
612				$this->flushLine();
613				$this->resetCurrLine( $node );
614			}
615		} else {
616			$this->emitSepForNode( $node );
617		}
618
619		// Escape 'res' if necessary
620		if ( $this->escapeText ) {
621			$res = new ConstrainedText( [
622				'text' => $this->serializer->escapeWikiText( $this, $res->text, [
623					'node' => $node,
624					'isLastChild' => DOMUtils::nextNonDeletedSibling( $node ) === null,
625				] ),
626				'prefix' => $res->prefix,
627				'suffix' => $res->suffix,
628				'node' => $res->node,
629			] );
630			$this->escapeText = false;
631		} else {
632			// If 'res' is coming from selser and the current node is a paragraph tag,
633			// check if 'res' might need some leading chars nowiki-escaped before being output.
634			// Because of block-tag p-wrapping behavior, sol-sensitive characters that used to
635			// be in non-sol positions, but yet wrapped in p-tags, could end up in sol-position
636			// if those block tags get deleted during edits.
637			//
638			// Ex: a<div>foo</div>*b
639			// -- wt2html --> <p>a</p><div>foo<div><p>*b</p>
640			// --   EDIT  --> <p>a</p><p>*b</p>
641			// -- html2wt --> a\n\n<nowiki>*</nowiki>b
642			//
643			// In this scenario, the <p>a</p>, <p>*b</p>, and <p>#c</p>
644			// will be marked unmodified and will be processed below.
645			if ( $this->selserMode
646				&& $this->onSOL
647				&& $this->currNodeUnmodified
648				// 'node' came from original Parsoid HTML unmodified. So, if its content
649				// needs nowiki-escaping, we know that the reason it didn't parse into
650				// lists/headings/whatever is because it didn't occur at the start of the
651				// line => it had a block-tag in the original wikitext. So if the previous
652				// node was also unmodified (and since it also came from original Parsoid
653				// HTML), we can safely infer that it couldn't have been an inline node or
654				// a P-tag (if it were, the p-wrapping code would have swallowed that content
655				// into 'node'). So, it would have to be some sort of block tag => this.onSOL
656				// couldn't have been true (because we could have serialized 'node' on the
657				// same line as the block tag) => we can save some effort by eliminating
658				// scenarios where 'this.prevNodeUnmodified' is true.
659				 && !$this->prevNodeUnmodified
660				&& $node->nodeName === 'p' && !WTUtils::isLiteralHTMLNode( $node )
661			) {
662				$pChild = DOMUtils::firstNonSepChild( $node );
663				// If a text node, we have to make sure that the text doesn't
664				// get reparsed as non-text in the wt2html pipeline.
665				if ( $pChild && DOMUtils::isText( $pChild ) ) {
666					$match = $res->matches( $this->solWikitextRegexp() );
667					if ( $match && isset( $match[2] ) ) {
668						if ( preg_match( '/^([\*#:;]|{\||.*=$)/D', $match[2] )
669							// ! and | chars are harmless outside tables
670							|| ( preg_match( '/^[\|!]/', $match[2] ) && $this->wikiTableNesting > 0 )
671							// indent-pres are suppressed inside <blockquote>
672							|| ( preg_match( '/^ [^\s]/', $match[2] )
673								&& !DOMUtils::hasNameOrHasAncestorOfName( $node, 'blockquote' ) )
674						) {
675							$res = ConstrainedText::cast( ( $match[1] ?: '' )
676								. '<nowiki>' . substr( $match[2], 0, 1 ) . '</nowiki>'
677								. substr( $match[2], 1 ), $node );
678						}
679					}
680				}
681			}
682		}
683
684		// Output res
685		$this->pushToCurrLine( $res, $this->logPrefix );
686
687		// Update sol flag. Test for newlines followed by optional includeonly or comments
688		if ( !$res->matches( $this->solRegexp() ) ) {
689			$this->onSOL = false;
690		}
691
692		// We've emit something so we're no longer at SOO.
693		$this->atStartOfOutput = false;
694	}
695
696	/**
697	 * Serialize the children of a DOM node, sharing the global serializer state.
698	 * Typically called by a DOM-based handler to continue handling its children.
699	 * @param DOMElement|DOMDocumentFragment $node
700	 * @param ?callable $wtEscaper ( $state, $text, $opts )
701	 *   PORT-FIXME document better; should this be done via WikitextEscapeHandlers somehow?
702	 * @param ?DOMNode $firstChild
703	 */
704	public function serializeChildren(
705		DOMNode $node, ?callable $wtEscaper = null, ?DOMNode $firstChild = null
706	): void {
707		// SSS FIXME: Unsure if this is the right thing always
708		if ( $wtEscaper ) {
709			$this->wteHandlerStack[] = $wtEscaper;
710		}
711
712		$child = $firstChild ?: $node->firstChild;
713		while ( $child !== null ) {
714			// We always get the next child to process
715			$child = $this->serializer->serializeNode( $child );
716		}
717
718		if ( $wtEscaper ) {
719			array_pop( $this->wteHandlerStack );
720		}
721
722		// If we serialized children explicitly,
723		// we were obviously processing a modified node.
724		$this->currNodeUnmodified = false;
725	}
726
727	/**
728	 * Abstracts some steps taken in `serializeChildrenToString` and `serializeDOM`
729	 *
730	 * @param DOMElement|DOMDocumentFragment $node
731	 * @param ?callable $wtEscaper See {@link serializeChildren()}
732	 * @internal For use by WikitextSerializer only
733	 */
734	public function kickOffSerialize(
735		DOMNode $node, ?callable $wtEscaper = null
736	): void {
737		$this->updateSep( $node );
738		$this->currNodeUnmodified = false;
739		$this->updateModificationFlags( $node );
740		$this->resetCurrLine( $node->firstChild );
741		$this->serializeChildren( $node, $wtEscaper );
742		// Emit child-parent seps.
743		$this->emitSepForNode( $node );
744		// We've reached EOF, flush the remaining buffered text.
745		$this->flushLine();
746	}
747
748	/**
749	 * Serialize children to a string
750	 *
751	 * FIXME(arlorla): Shouldn't affect the separator state, but accidents have
752	 * have been known to happen. T109793 suggests using its own wts / state.
753	 *
754	 * @param DOMElement|DOMDocumentFragment $node
755	 * @param ?callable $wtEscaper See {@link serializeChildren()}
756	 * @param string $inState
757	 * @return string
758	 */
759	private function serializeChildrenToString(
760		DOMNode $node, ?callable $wtEscaper, string $inState
761	): string {
762		$states = [ 'inLink', 'inCaption', 'inIndentPre', 'inHTMLPre', 'inPHPBlock', 'inAttribute' ];
763		Assert::parameter( in_array( $inState, $states, true ), '$inState', 'Must be one of: '
764			. implode( ', ', $states ) );
765		// FIXME: Make sure that the separators emitted here conform to the
766		// syntactic constraints of syntactic context.
767		$oldSep = $this->sep;
768		$oldSOL = $this->onSOL;
769		$oldOut = $this->out;
770		$oldStart = $this->atStartOfOutput;
771		$oldCurrLine = $this->currLine;
772		$oldLogPrefix = $this->logPrefix;
773		// Modification flags
774		$oldPrevNodeUnmodified = $this->prevNodeUnmodified;
775		$oldCurrNodeUnmodified = $this->currNodeUnmodified;
776		$oldPrevNode = $this->prevNode;
777
778		$this->out = '';
779		$this->logPrefix = 'OUT(C):';
780		$this->resetSep();
781		$this->onSOL = false;
782		$this->atStartOfOutput = false;
783		$this->$inState = true;
784
785		$this->singleLineContext->disable();
786		$this->kickOffSerialize( $node, $wtEscaper );
787		$this->singleLineContext->pop();
788
789		// restore the state
790		$bits = $this->out;
791		$this->out = $oldOut;
792		$this->$inState = false;
793		$this->sep = $oldSep;
794		$this->onSOL = $oldSOL;
795		$this->atStartOfOutput = $oldStart;
796		$this->currLine = $oldCurrLine;
797		$this->logPrefix = $oldLogPrefix;
798		// Modification flags
799		$this->prevNodeUnmodified = $oldPrevNodeUnmodified;
800		$this->currNodeUnmodified = $oldCurrNodeUnmodified;
801		$this->prevNode = $oldPrevNode;
802		return $bits;
803	}
804
805	/**
806	 * Serialize children of a link to a string
807	 * @param DOMElement|DOMDocumentFragment $node
808	 * @param ?callable $wtEscaper See {@link serializeChildren()}
809	 * @return string
810	 */
811	public function serializeLinkChildrenToString(
812		DOMNode $node, ?callable $wtEscaper = null
813	): string {
814		return $this->serializeChildrenToString( $node, $wtEscaper, 'inLink' );
815	}
816
817	/**
818	 * Serialize children of a caption to a string
819	 * @param DOMElement|DOMDocumentFragment $node
820	 * @param ?callable $wtEscaper See {@link serializeChildren()}
821	 * @return string
822	 */
823	public function serializeCaptionChildrenToString(
824		DOMNode $node, ?callable $wtEscaper = null
825	): string {
826		return $this->serializeChildrenToString( $node, $wtEscaper, 'inCaption' );
827	}
828
829	/**
830	 * Serialize children of an indent-pre to a string
831	 * @param DOMElement|DOMDocumentFragment $node
832	 * @param ?callable $wtEscaper See {@link serializeChildren()}
833	 * @return string
834	 */
835	public function serializeIndentPreChildrenToString(
836		DOMNode $node, ?callable $wtEscaper = null
837	): string {
838		return $this->serializeChildrenToString( $node, $wtEscaper, 'inIndentPre' );
839	}
840
841}
842