1<?php 2declare( strict_types = 1 ); 3 4namespace Wikimedia\Parsoid\Wt2Html\TT; 5 6use Wikimedia\Parsoid\Config\WikitextConstants; 7use Wikimedia\Parsoid\Tokens\EndTagTk; 8use Wikimedia\Parsoid\Tokens\EOFTk; 9use Wikimedia\Parsoid\Tokens\KV; 10use Wikimedia\Parsoid\Tokens\SourceRange; 11use Wikimedia\Parsoid\Tokens\TagTk; 12use Wikimedia\Parsoid\Tokens\Token; 13use Wikimedia\Parsoid\Utils\ContentUtils; 14use Wikimedia\Parsoid\Utils\DOMUtils; 15use Wikimedia\Parsoid\Utils\PHPUtils; 16use Wikimedia\Parsoid\Utils\PipelineUtils; 17use Wikimedia\Parsoid\Wt2Html\TokenTransformManager; 18 19/** 20 * Handler for language conversion markup, which looks like `-{ ... }-`. 21 */ 22class LanguageVariantHandler extends TokenHandler { 23 /** @inheritDoc */ 24 public function __construct( TokenTransformManager $manager, array $options ) { 25 parent::__construct( $manager, $options ); 26 } 27 28 /** 29 * convert one variant text to dom. 30 * @param TokenTransformManager $manager 31 * @param array $options 32 * @param string $t 33 * @param array $attribs 34 * @return array 35 */ 36 private function convertOne( TokenTransformManager $manager, array $options, string $t, 37 array $attribs ): array { 38 // we're going to fetch the actual token list from attribs 39 // (this ensures that it has gone through the earlier stages 40 // of the pipeline already to be expanded) 41 $t = preg_replace( '/^mw:lv/', '', $t, 1 ); 42 $srcOffsets = $attribs[$t]->srcOffsets; 43 $domFragment = PipelineUtils::processContentInPipeline( 44 $manager->env, $manager->getFrame(), array_merge( $attribs[$t]->v, [ new EOFTk() ] ), 45 [ 46 'pipelineType' => 'tokens/x-mediawiki/expanded', 47 'pipelineOpts' => [ 48 'inlineContext' => true, 49 'expandTemplates' => $options['expandTemplates'], 50 'inTemplate' => $options['inTemplate'] 51 ], 52 'srcOffsets' => $srcOffsets->value ?? null, 53 'sol' => true 54 ] 55 ); 56 return [ 57 'xmlstr' => ContentUtils::ppToXML( 58 $domFragment, [ 'innerXML' => true ] 59 ), 60 'isBlock' => DOMUtils::hasBlockElementDescendant( $domFragment ), 61 ]; 62 } 63 64 /** 65 * compress a whitespace sequence 66 * @param ?array $a 67 * @return ?array 68 */ 69 private function compressSpArray( ?array $a ): ?array { 70 $result = []; 71 $ctr = 0; 72 if ( $a === null ) { 73 return $a; 74 } 75 foreach ( $a as $sp ) { 76 if ( $sp === '' ) { 77 $ctr++; 78 } else { 79 if ( $ctr > 0 ) { 80 $result[] = $ctr; 81 $ctr = 0; 82 } 83 $result[] = $sp; 84 } 85 } 86 if ( $ctr > 0 ) { 87 $result[] = $ctr; 88 } 89 return $result; 90 } 91 92 /** 93 * Main handler. 94 * See {@link TokenTransformManager#addTransform}'s transformation parameter 95 * @param Token $token 96 * @return array 97 */ 98 private function onLanguageVariant( Token $token ): array { 99 $manager = $this->manager; 100 $options = $this->options; 101 $attribs = $token->attribs; 102 $dataAttribs = $token->dataAttribs; 103 $tsr = $dataAttribs->tsr; 104 $flags = $dataAttribs->flags; 105 $flagSp = $dataAttribs->flagSp; 106 $isMeta = false; 107 $sawFlagA = false; 108 109 // remove trailing semicolon marker, if present 110 $trailingSemi = false; 111 if ( count( $dataAttribs->texts ) && 112 ( $dataAttribs->texts[count( $dataAttribs->texts ) - 1]['semi'] ?? null ) 113 ) { 114 $trailingSemi = array_pop( $dataAttribs->texts )['sp'] ?? null; 115 } 116 // convert all variant texts to DOM 117 $isBlock = false; 118 $texts = array_map( function ( array $t ) use ( $manager, $options, $attribs, &$isBlock ) { 119 $text = null; 120 $from = null; 121 $to = null; 122 if ( isset( $t['twoway'] ) ) { 123 $text = $this->convertOne( $manager, $options, $t['text'], $attribs ); 124 $isBlock = $isBlock || !empty( $text['isBlock'] ); 125 return [ 'lang' => $t['lang'], 'text' => $text['xmlstr'], 'twoway' => true, 'sp' => $t['sp'] ]; 126 } elseif ( isset( $t['lang'] ) ) { 127 $from = $this->convertOne( $manager, $options, $t['from'], $attribs ); 128 $to = $this->convertOne( $manager, $options, $t['to'], $attribs ); 129 $isBlock = $isBlock || !empty( $from['isBlock'] ) || !empty( $to['isBlock'] ); 130 return [ 'lang' => $t['lang'], 'from' => $from['xmlstr'], 'to' => $to['xmlstr'], 131 'sp' => $t['sp'] ]; 132 } else { 133 $text = $this->convertOne( $manager, $options, $t['text'], $attribs ); 134 $isBlock = $isBlock || !empty( $text['isBlock'] ); 135 return [ 'text' => $text['xmlstr'], 'sp' => [] ]; 136 } 137 }, $dataAttribs->texts ); 138 // collect two-way/one-way conversion rules 139 $oneway = []; 140 $twoway = []; 141 $sawTwoway = false; 142 $sawOneway = false; 143 $textSp = null; 144 $twowaySp = []; 145 $onewaySp = []; 146 foreach ( $texts as $t ) { 147 if ( isset( $t['twoway'] ) ) { 148 $twoway[] = [ 'l' => $t['lang'], 't' => $t['text'] ]; 149 array_push( $twowaySp, $t['sp'][0], $t['sp'][1], $t['sp'][2] ); 150 $sawTwoway = true; 151 } elseif ( isset( $t['lang'] ) ) { 152 $oneway[] = [ 'l' => $t['lang'], 'f' => $t['from'], 't' => $t['to'] ]; 153 array_push( $onewaySp, $t['sp'][0], $t['sp'][1], $t['sp'][2], $t['sp'][3] ); 154 $sawOneway = true; 155 } 156 } 157 158 // To avoid too much data-mw bloat, only the top level keys in 159 // data-mw-variant are "human readable". Nested keys are single-letter: 160 // `l` for `language`, `t` for `text` or `to`, `f` for `from`. 161 $dataMWV = null; 162 if ( count( $flags ) === 0 && count( $dataAttribs->variants ) > 0 ) { 163 // "Restrict possible variants to a limited set" 164 $dataMWV = [ 165 'filter' => [ 'l' => $dataAttribs->variants, 't' => $texts[0]['text'] ], 166 'show' => true 167 ]; 168 } else { 169 $dataMWV = array_reduce( $flags, function ( array $dmwv, string $f ) use ( &$sawFlagA ) { 170 if ( array_key_exists( $f, WikitextConstants::$LCFlagMap ) ) { 171 if ( WikitextConstants::$LCFlagMap[$f] ) { 172 $dmwv[WikitextConstants::$LCFlagMap[$f]] = true; 173 if ( $f === 'A' ) { 174 $sawFlagA = true; 175 } 176 } 177 } else { 178 $dmwv['error'] = true; 179 } 180 return $dmwv; 181 }, [] ); 182 // (this test is done at the top of ConverterRule::getRuleConvertedStr) 183 // (also partially in ConverterRule::parse) 184 if ( count( $texts ) === 1 && 185 !isset( $texts[0]['lang'] ) && !isset( $dataMWV['name'] ) 186 ) { 187 if ( isset( $dataMWV['add'] ) || isset( $dataMWV['remove'] ) ) { 188 $variants = [ '*' ]; 189 $twoway = array_map( function ( string $code ) use ( $texts, &$sawTwoway ) { 190 return [ 'l' => $code, 't' => $texts[0]['text'] ]; 191 }, $variants ); 192 $sawTwoway = true; 193 } else { 194 $dataMWV['disabled'] = true; 195 unset( $dataMWV['describe'] ); 196 } 197 } 198 if ( isset( $dataMWV['describe'] ) ) { 199 if ( !$sawFlagA ) { 200 $dataMWV['show'] = true; 201 } 202 } 203 if ( isset( $dataMWV['disabled'] ) || isset( $dataMWV['name'] ) ) { 204 if ( isset( $dataMWV['disabled'] ) ) { 205 $dataMWV['disabled'] = [ 't' => $texts[0]['text'] ?? '' ]; 206 } else { 207 $dataMWV['name'] = [ 't' => $texts[0]['text'] ?? '' ]; 208 } 209 if ( isset( $dataMWV['title'] ) || isset( $dataMWV['add'] ) ) { 210 unset( $dataMWV['show'] ); 211 } else { 212 $dataMWV['show'] = true; 213 } 214 } elseif ( $sawTwoway ) { 215 $dataMWV['twoway'] = $twoway; 216 $textSp = $twowaySp; 217 if ( $sawOneway ) { 218 $dataMWV['error'] = true; 219 } 220 } else { 221 $dataMWV['oneway'] = $oneway; 222 $textSp = $onewaySp; 223 if ( !$sawOneway ) { 224 $dataMWV['error'] = true; 225 } 226 } 227 } 228 // Use meta/not meta instead of explicit 'show' flag. 229 $isMeta = !isset( $dataMWV['show'] ); 230 unset( $dataMWV['show'] ); 231 // Trim some data from data-parsoid if it matches the defaults 232 if ( count( $flagSp ) === 2 * count( $dataAttribs->original ) ) { 233 $result = true; 234 foreach ( $flagSp as $s ) { 235 if ( $s !== '' ) { 236 $result = false; 237 break; 238 } 239 } 240 if ( $result ) { 241 $flagSp = null; 242 } 243 } 244 if ( $trailingSemi !== false && $textSp ) { 245 $textSp[] = $trailingSemi; 246 } 247 248 // Our markup is always the same, except for the contents of 249 // the data-mw-variant attribute and whether it's a span, div, or a 250 // meta, depending on (respectively) whether conversion output 251 // contains only inline content, could contain block content, 252 // or never contains any content. 253 254 $das = [ 255 'fl' => $dataAttribs->original, // original "fl"ags 256 'flSp' => $this->compressSpArray( $flagSp ), // spaces around flags 257 'src' => $dataAttribs->src, 258 'tSp' => $this->compressSpArray( $textSp ), // spaces around texts 259 'tsr' => new SourceRange( $tsr->start, $isMeta ? $tsr->end : ( $tsr->end - 2 ) ) 260 ]; 261 262 if ( $das['flSp'] === null ) { 263 unset( $das['flSp'] ); 264 } 265 266 if ( $das['tSp'] === null ) { 267 unset( $das['tSp'] ); 268 } 269 270 PHPUtils::sortArray( $dataMWV ); 271 $tokens = [ 272 new TagTk( $isMeta ? 'meta' : ( $isBlock ? 'div' : 'span' ), [ 273 new KV( 'typeof', 'mw:LanguageVariant' ), 274 new KV( 'data-mw-variant', PHPUtils::jsonEncode( $dataMWV ) ) 275 ], (object)$das 276 ) 277 ]; 278 if ( !$isMeta ) { 279 $tokens[] = new EndTagTk( $isBlock ? 'div' : 'span', [], 280 (object)[ 281 'tsr' => new SourceRange( $tsr->end - 2, $tsr->end ) 282 ] 283 ); 284 } 285 286 return [ 'tokens' => $tokens ]; 287 } 288 289 /** 290 * @inheritDoc 291 */ 292 public function onTag( Token $token ) { 293 return $token->getName() === 'language-variant' ? $this->onLanguageVariant( $token ) : $token; 294 } 295} 296