1<?php
2declare( strict_types = 1 );
3
4namespace Wikimedia\Parsoid\Wt2Html\TT;
5
6use Wikimedia\Parsoid\Config\WikitextConstants;
7use Wikimedia\Parsoid\Tokens\EndTagTk;
8use Wikimedia\Parsoid\Tokens\EOFTk;
9use Wikimedia\Parsoid\Tokens\KV;
10use Wikimedia\Parsoid\Tokens\SourceRange;
11use Wikimedia\Parsoid\Tokens\TagTk;
12use Wikimedia\Parsoid\Tokens\Token;
13use Wikimedia\Parsoid\Utils\ContentUtils;
14use Wikimedia\Parsoid\Utils\DOMUtils;
15use Wikimedia\Parsoid\Utils\PHPUtils;
16use Wikimedia\Parsoid\Utils\PipelineUtils;
17use Wikimedia\Parsoid\Wt2Html\TokenTransformManager;
18
19/**
20 * Handler for language conversion markup, which looks like `-{ ... }-`.
21 */
22class LanguageVariantHandler extends TokenHandler {
23	/** @inheritDoc */
24	public function __construct( TokenTransformManager $manager, array $options ) {
25		parent::__construct( $manager, $options );
26	}
27
28	/**
29	 * convert one variant text to dom.
30	 * @param TokenTransformManager $manager
31	 * @param array $options
32	 * @param string $t
33	 * @param array $attribs
34	 * @return array
35	 */
36	private function convertOne( TokenTransformManager $manager, array $options, string $t,
37		array $attribs ): array {
38		// we're going to fetch the actual token list from attribs
39		// (this ensures that it has gone through the earlier stages
40		// of the pipeline already to be expanded)
41		$t = preg_replace( '/^mw:lv/', '', $t, 1 );
42		$srcOffsets = $attribs[$t]->srcOffsets;
43		$domFragment = PipelineUtils::processContentInPipeline(
44			$manager->env, $manager->getFrame(), array_merge( $attribs[$t]->v, [ new EOFTk() ] ),
45			[
46				'pipelineType' => 'tokens/x-mediawiki/expanded',
47				'pipelineOpts' => [
48					'inlineContext' => true,
49					'expandTemplates' => $options['expandTemplates'],
50					'inTemplate' => $options['inTemplate']
51				],
52				'srcOffsets' => $srcOffsets->value ?? null,
53				'sol' => true
54			]
55		);
56		return [
57			'xmlstr' => ContentUtils::ppToXML(
58				$domFragment, [ 'innerXML' => true ]
59			),
60			'isBlock' => DOMUtils::hasBlockElementDescendant( $domFragment ),
61		];
62	}
63
64	/**
65	 * compress a whitespace sequence
66	 * @param ?array $a
67	 * @return ?array
68	 */
69	private function compressSpArray( ?array $a ): ?array {
70		$result = [];
71		$ctr = 0;
72		if ( $a === null ) {
73			return $a;
74		}
75		foreach ( $a as $sp ) {
76			if ( $sp === '' ) {
77				$ctr++;
78			} else {
79				if ( $ctr > 0 ) {
80					$result[] = $ctr;
81					$ctr = 0;
82				}
83				$result[] = $sp;
84			}
85		}
86		if ( $ctr > 0 ) {
87			$result[] = $ctr;
88		}
89		return $result;
90	}
91
92	/**
93	 * Main handler.
94	 * See {@link TokenTransformManager#addTransform}'s transformation parameter
95	 * @param Token $token
96	 * @return array
97	 */
98	private function onLanguageVariant( Token $token ): array {
99		$manager = $this->manager;
100		$options = $this->options;
101		$attribs = $token->attribs;
102		$dataAttribs = $token->dataAttribs;
103		$tsr = $dataAttribs->tsr;
104		$flags = $dataAttribs->flags;
105		$flagSp = $dataAttribs->flagSp;
106		$isMeta = false;
107		$sawFlagA = false;
108
109		// remove trailing semicolon marker, if present
110		$trailingSemi = false;
111		if ( count( $dataAttribs->texts ) &&
112			( $dataAttribs->texts[count( $dataAttribs->texts ) - 1]['semi'] ?? null )
113		) {
114			$trailingSemi = array_pop( $dataAttribs->texts )['sp'] ?? null;
115		}
116		// convert all variant texts to DOM
117		$isBlock = false;
118		$texts = array_map( function ( array $t ) use ( $manager, $options, $attribs, &$isBlock ) {
119			$text = null;
120			$from = null;
121			$to = null;
122			if ( isset( $t['twoway'] ) ) {
123				$text = $this->convertOne( $manager, $options, $t['text'], $attribs );
124				$isBlock = $isBlock || !empty( $text['isBlock'] );
125				return [ 'lang' => $t['lang'], 'text' => $text['xmlstr'], 'twoway' => true, 'sp' => $t['sp'] ];
126			} elseif ( isset( $t['lang'] ) ) {
127				$from = $this->convertOne( $manager, $options, $t['from'], $attribs );
128				$to = $this->convertOne( $manager, $options, $t['to'], $attribs );
129				$isBlock = $isBlock || !empty( $from['isBlock'] ) || !empty( $to['isBlock'] );
130				return [ 'lang' => $t['lang'], 'from' => $from['xmlstr'], 'to' => $to['xmlstr'],
131					'sp' => $t['sp'] ];
132			} else {
133				$text = $this->convertOne( $manager, $options, $t['text'], $attribs );
134				$isBlock = $isBlock || !empty( $text['isBlock'] );
135				return [ 'text' => $text['xmlstr'], 'sp' => [] ];
136			}
137		}, $dataAttribs->texts );
138		// collect two-way/one-way conversion rules
139		$oneway = [];
140		$twoway = [];
141		$sawTwoway = false;
142		$sawOneway = false;
143		$textSp = null;
144		$twowaySp = [];
145		$onewaySp = [];
146		foreach ( $texts as $t ) {
147			if ( isset( $t['twoway'] ) ) {
148				$twoway[] = [ 'l' => $t['lang'], 't' => $t['text'] ];
149				array_push( $twowaySp, $t['sp'][0], $t['sp'][1], $t['sp'][2] );
150				$sawTwoway = true;
151			} elseif ( isset( $t['lang'] ) ) {
152				$oneway[] = [ 'l' => $t['lang'], 'f' => $t['from'], 't' => $t['to'] ];
153				array_push( $onewaySp, $t['sp'][0], $t['sp'][1], $t['sp'][2], $t['sp'][3] );
154				$sawOneway = true;
155			}
156		}
157
158		// To avoid too much data-mw bloat, only the top level keys in
159		// data-mw-variant are "human readable".  Nested keys are single-letter:
160		// `l` for `language`, `t` for `text` or `to`, `f` for `from`.
161		$dataMWV = null;
162		if ( count( $flags ) === 0 && count( $dataAttribs->variants ) > 0 ) {
163			// "Restrict possible variants to a limited set"
164			$dataMWV = [
165				'filter' => [ 'l' => $dataAttribs->variants, 't' => $texts[0]['text'] ],
166				'show' => true
167			];
168		} else {
169			$dataMWV = array_reduce( $flags, function ( array $dmwv, string $f ) use ( &$sawFlagA ) {
170				if ( array_key_exists( $f, WikitextConstants::$LCFlagMap ) ) {
171					if ( WikitextConstants::$LCFlagMap[$f] ) {
172						$dmwv[WikitextConstants::$LCFlagMap[$f]] = true;
173						if ( $f === 'A' ) {
174							$sawFlagA = true;
175						}
176					}
177				} else {
178					$dmwv['error'] = true;
179				}
180				return $dmwv;
181			}, [] );
182			// (this test is done at the top of ConverterRule::getRuleConvertedStr)
183			// (also partially in ConverterRule::parse)
184			if ( count( $texts ) === 1 &&
185				!isset( $texts[0]['lang'] ) && !isset( $dataMWV['name'] )
186			) {
187				if ( isset( $dataMWV['add'] ) || isset( $dataMWV['remove'] ) ) {
188					$variants = [ '*' ];
189					$twoway = array_map( function ( string $code ) use ( $texts, &$sawTwoway ) {
190						return [ 'l' => $code, 't' => $texts[0]['text'] ];
191					}, $variants );
192					$sawTwoway = true;
193				} else {
194					$dataMWV['disabled'] = true;
195					unset( $dataMWV['describe'] );
196				}
197			}
198			if ( isset( $dataMWV['describe'] ) ) {
199				if ( !$sawFlagA ) {
200					$dataMWV['show'] = true;
201				}
202			}
203			if ( isset( $dataMWV['disabled'] ) || isset( $dataMWV['name'] ) ) {
204				if ( isset( $dataMWV['disabled'] ) ) {
205					$dataMWV['disabled'] = [ 't' => $texts[0]['text'] ?? '' ];
206				} else {
207					$dataMWV['name'] = [ 't' => $texts[0]['text'] ?? '' ];
208				}
209				if ( isset( $dataMWV['title'] ) || isset( $dataMWV['add'] ) ) {
210					unset( $dataMWV['show'] );
211				} else {
212					$dataMWV['show'] = true;
213				}
214			} elseif ( $sawTwoway ) {
215				$dataMWV['twoway'] = $twoway;
216				$textSp = $twowaySp;
217				if ( $sawOneway ) {
218					$dataMWV['error'] = true;
219				}
220			} else {
221				$dataMWV['oneway'] = $oneway;
222				$textSp = $onewaySp;
223				if ( !$sawOneway ) {
224					$dataMWV['error'] = true;
225				}
226			}
227		}
228		// Use meta/not meta instead of explicit 'show' flag.
229		$isMeta = !isset( $dataMWV['show'] );
230		unset( $dataMWV['show'] );
231		// Trim some data from data-parsoid if it matches the defaults
232		if ( count( $flagSp ) === 2 * count( $dataAttribs->original ) ) {
233			$result = true;
234			foreach ( $flagSp as $s ) {
235				if ( $s !== '' ) {
236					$result = false;
237					break;
238				}
239			}
240			if ( $result ) {
241				$flagSp = null;
242			}
243		}
244		if ( $trailingSemi !== false && $textSp ) {
245			$textSp[] = $trailingSemi;
246		}
247
248		// Our markup is always the same, except for the contents of
249		// the data-mw-variant attribute and whether it's a span, div, or a
250		// meta, depending on (respectively) whether conversion output
251		// contains only inline content, could contain block content,
252		// or never contains any content.
253
254		$das = [
255			'fl' => $dataAttribs->original, // original "fl"ags
256			'flSp' => $this->compressSpArray( $flagSp ), // spaces around flags
257			'src' => $dataAttribs->src,
258			'tSp' => $this->compressSpArray( $textSp ), // spaces around texts
259			'tsr' => new SourceRange( $tsr->start, $isMeta ? $tsr->end : ( $tsr->end - 2 ) )
260		];
261
262		if ( $das['flSp'] === null ) {
263			unset( $das['flSp'] );
264		}
265
266		if ( $das['tSp'] === null ) {
267			unset( $das['tSp'] );
268		}
269
270		PHPUtils::sortArray( $dataMWV );
271		$tokens = [
272			new TagTk( $isMeta ? 'meta' : ( $isBlock ? 'div' : 'span' ), [
273					new KV( 'typeof', 'mw:LanguageVariant' ),
274					new KV( 'data-mw-variant', PHPUtils::jsonEncode( $dataMWV ) )
275				], (object)$das
276			)
277		];
278		if ( !$isMeta ) {
279			$tokens[] = new EndTagTk( $isBlock ? 'div' : 'span', [],
280				(object)[
281					'tsr' => new SourceRange( $tsr->end - 2, $tsr->end )
282				]
283			);
284		}
285
286		return [ 'tokens' => $tokens ];
287	}
288
289	/**
290	 * @inheritDoc
291	 */
292	public function onTag( Token $token ) {
293		return $token->getName() === 'language-variant' ? $this->onLanguageVariant( $token ) : $token;
294	}
295}
296