1<?php 2 3use MediaWiki\Languages\LanguageConverterFactory; 4use MediaWiki\MediaWikiServices; 5use Wikimedia\TestingAccessWrapper; 6 7/** 8 * @group large 9 * @group Language 10 * @coversDefaultClass MediaWiki\Languages\LanguageConverterFactory 11 */ 12class LanguageConverterFactoryTest extends MediaWikiLangTestCase { 13 /** 14 * @covers ::__construct 15 * @covers ::classFromCode 16 * @covers ::getLanguageConverter 17 * @dataProvider codeProvider 18 */ 19 public function testLanguageConverters( 20 $code, 21 $type, 22 $variants, 23 $variantFallbacks, 24 $variantNames, 25 $flags, 26 $manualLevel 27 ) { 28 $lang = MediaWikiServices::getInstance()->getLanguageFactory()->getLanguage( $code ); 29 $factory = new LanguageConverterFactory( false, false, false, static function () use ( $lang ) { 30 return $lang; 31 } ); 32 $this->assertFalse( $factory->isConversionDisabled() ); 33 $this->assertFalse( $factory->isTitleConversionDisabled() ); 34 $this->assertFalse( $factory->isLinkConversionDisabled() ); 35 $converter = $factory->getLanguageConverter( $lang ); 36 $this->verifyConverter( 37 $converter, 38 $lang, 39 $code, 40 $type, 41 $variants, 42 $variantFallbacks, 43 $variantNames, 44 $flags, 45 $manualLevel 46 ); 47 } 48 49 /** 50 * @covers ::__construct 51 * @covers ::classFromCode 52 * @covers ::getLanguageConverter 53 */ 54 public function testCreateFromCodeEnPigLatin() { 55 $lang = MediaWikiServices::getInstance()->getLanguageFactory()->getLanguage( 'en' ); 56 $factory = new LanguageConverterFactory( true, false, false, static function () use ( $lang ) { 57 return $lang; 58 } ); 59 $this->assertFalse( $factory->isConversionDisabled() ); 60 $this->assertFalse( $factory->isTitleConversionDisabled() ); 61 $this->assertFalse( $factory->isLinkConversionDisabled() ); 62 63 $converter = $factory->getLanguageConverter( $lang ); 64 65 $this->verifyConverter( 66 $converter, 67 $lang, 68 'en', 69 'EnConverter', 70 [ 'en', 'en-x-piglatin' ], 71 [], 72 [], 73 [], 74 [ 'en' => 'bidirectional', 'en-x-piglatin' => 'bidirectional' ] 75 ); 76 } 77 78 /** 79 * @covers ::__construct 80 * @covers ::classFromCode 81 * @covers ::getLanguageConverter 82 * @dataProvider booleanProvider 83 */ 84 public function testDisabledBooleans( $pigLatinDisabled, $conversionDisabled, $titleDisabled ) { 85 $lang = MediaWikiServices::getInstance()->getLanguageFactory()->getLanguage( 'en' ); 86 $factory = new LanguageConverterFactory( 87 !$pigLatinDisabled, 88 $conversionDisabled, 89 $titleDisabled, 90 static function () use ( $lang ) { 91 return $lang; 92 } 93 ); 94 $converter = $factory->getLanguageConverter( $lang ); 95 96 $this->assertSame( $conversionDisabled, $factory->isConversionDisabled() ); 97 $this->assertSame( $titleDisabled, $factory->isTitleConversionDisabled() ); 98 $this->assertSame( $conversionDisabled || $titleDisabled, $factory->isLinkConversionDisabled() ); 99 100 if ( $pigLatinDisabled ) { 101 $this->assertNotContains( 102 'en-x-piglatin', $converter->getVariants() 103 ); 104 } else { 105 $this->assertContains( 106 'en-x-piglatin', $converter->getVariants() 107 ); 108 } 109 } 110 111 public function booleanProvider() { 112 return [ 113 [ false, false, false ], 114 [ false, false, true ], 115 [ false,true,false ], 116 [ false,true,true ], 117 [ true, false, false ], 118 [ true, false, true ], 119 [ true,true,false ], 120 [ true,true,true ], 121 ]; 122 } 123 124 /** 125 * @covers ::__construct 126 * @covers ::classFromCode 127 * @covers ::getLanguageConverter 128 */ 129 public function testDefaultContentLanguageFallback() { 130 $lang = MediaWikiServices::getInstance()->getLanguageFactory()->getLanguage( 'en' ); 131 $factory = new LanguageConverterFactory( false, false, false, static function () use ( $lang ) { 132 return $lang; 133 } ); 134 $this->assertFalse( $factory->isConversionDisabled() ); 135 $this->assertFalse( $factory->isTitleConversionDisabled() ); 136 $this->assertFalse( $factory->isLinkConversionDisabled() ); 137 138 $converter = $factory->getLanguageConverter(); 139 140 $this->verifyConverter( 141 $converter, 142 $lang, 143 'en', 144 'TrivialLanguageConverter', 145 [ 'en' ], 146 [], 147 [], 148 [], 149 [] 150 ); 151 } 152 153 private function verifyConverter( 154 $converter, 155 $lang, 156 $code, 157 $type, 158 $variants, 159 $variantFallbacks, 160 $variantNames, 161 $flags, 162 $manualLevel 163 ) { 164 $this->assertEquals( $type, get_class( $converter ) ); 165 166 if ( is_a( $converter, LanguageConverter::class ) ) { 167 $testConverter = TestingAccessWrapper::newFromObject( $converter ); 168 $this->assertSame( $lang, $testConverter->mLangObj, "Language should be as provided" ); 169 170 $this->assertEquals( $code, $testConverter->getMainCode(), 171 "mMainLanguageCode should be as $code" ); 172 $this->assertEquals( $manualLevel, $testConverter->getManualLevel(), "Manual Level" ); 173 174 $this->assertEquals( $variants, $testConverter->getVariants(), "Variants" ); 175 $this->assertEquals( $variantFallbacks, $testConverter->getVariantsFallbacks(), "Variant Fallbacks" ); 176 $defaultFlags = [ 177 'A' => 'A', 178 'T' => 'T', 179 'R' => 'R', 180 'D' => 'D', 181 '-' => '-', 182 'H' => 'H', 183 'N' => 'N', 184 ]; 185 $this->assertArraySubmapSame( 186 array_merge( $defaultFlags, $flags ), 187 $converter->getFlags(), 188 "Flags" 189 ); 190 } 191 } 192 193 public function codeProvider() { 194 $trivialWithNothingElseCodes = [ 195 'aa', 'ab', 'abs', 'ace', 'ady', 'ady-cyrl', 'aeb', 'aeb-arab', 'aeb-latn', 196 'af', 'ak', 'aln', 'als', 'am', 'an', 'ang', 'anp', 'ar', 'arc', 'arn', 197 'arq', 'ary', 'arz', 'as', 'ase', 'ast', 'atj', 'av', 'avk', 'awa', 'ay', 198 'az', 'azb', 'ba', 'ban-bali', 'bar', 'bat-smg', 'bbc', 'bbc-latn', 'bcc', 199 'bcl', 'be', 'be-tarask', 'be-x-old', 'bg', 'bgn', 'bh', 'bho', 'bi', 'bjn', 200 'bm', 'bn', 'bo', 'bpy', 'bqi', 'br', 'brh', 'bs', 'btm', 'bto', 'bug', 'bxr', 201 'ca', 'cbk-zam', 'cdo', 'ce', 'ceb', 'ch', 'cho', 'chr', 'chy', 'ckb', 'co', 202 'cps', 'cr', 'crh-latn', 'crh-cyrl', 'cs', 'csb', 'cu', 'cv', 'cy', 'da', 203 'de', 'de-at', 'de-ch', 'de-formal', 'din', 'diq', 'dsb', 'dtp', 'dty', 204 'dv', 'dz', 'ee', 'egl', 'el', 'eml', 'en', 'en-ca', 'en-gb', 'eo', 'es', 205 'es-419', 'es-formal', 'et', 'eu', 'ext', 'fa', 'ff', 'fi', 'fit', 'fiu-vro', 206 'fj', 'fo', 'fr', 'frc', 'frp', 'frr', 'fur', 'fy', 'ga', 'gag', 'gan-hans', 207 'gan-hant', 'gcr', 'gd', 'gl', 'glk', 'gn', 'gom', 'gom-deva', 'gom-latn', 208 'gor', 'got', 'grc', 'gsw', 'gu', 'gv', 'ha', 'hak', 'haw', 'he', 'hi', 209 'hif', 'hif-latn', 'hil', 'ho', 'hr', 'hrx', 'hsb', 'ht', 'hu', 'hu-formal', 210 'hy', 'hyw', 'hz', 'ia', 'id', 'ie', 'ig', 'ii', 'ik', 'ike-cans', 211 'ike-latn', 'ilo', 'inh', 'io', 'is', 'it', 'ja', 'jam', 'jbo', 'jut', 212 'jv', 'ka', 'kaa', 'kab', 'kbd', 'kbd-cyrl', 'kbp', 'kg', 'khw', 'ki', 213 'kiu', 'kj', 'kjp', 'kk-arab', 'kk-cyrl', 'kk-latn', 'kk-cn', 'kk-kz', 214 'kk-tr', 'kl', 'km', 'kn', 'ko', 'ko-kp', 'koi', 'kr', 'krc', 'kri', 'krj', 215 'krl', 'ks', 'ks-arab', 'ks-deva', 'ksh', 'ku-latn', 'ku-arab', 'kum', 'kv', 216 'kw', 'ky', 'la', 'lad', 'lb', 'lbe', 'lez', 'lfn', 'lg', 'li', 'lij', 'liv', 217 'lki', 'lmo', 'ln', 'lo', 'lrc', 'loz', 'lt', 'ltg', 'lus', 'luz', 'lv', 218 'lzh', 'lzz', 'mai', 'map-bms', 'mdf', 'mg', 'mh', 'mhr', 'mi', 'min', 'mk', 219 'ml', 'mn', 'mni', 'mnw', 'mo', 'mr', 'mrj', 'ms', 'mt', 'mus', 'mwl', 'my', 220 'myv', 'mzn', 'na', 'nah', 'nan', 'nap', 'nb', 'nds', 'nds-nl', 'ne', 'new', 221 'ng', 'niu', 'nl', 'nl-informal', 'nn', 'no', 'nov', 'nqo', 'nrm', 'nso', 222 'nv', 'ny', 'nys', 'oc', 'olo', 'om', 'or', 'os', 'pa', 'pag', 'pam', 'pap', 223 'pcd', 'pdc', 'pdt', 'pfl', 'pi', 'pih', 'pl', 'pms', 'pnb', 'pnt', 'prg', 224 'ps', 'pt', 'pt-br', 'qu', 'qug', 'rgn', 'rif', 'rm', 'rmy', 'rn', 'ro', 225 'roa-rup', 'roa-tara', 'ru', 'rue', 'rup', 'ruq', 'ruq-cyrl', 'ruq-latn', 226 'rw', 'sa', 'sah', 'sat', 'sc', 'scn', 'sco', 'sd', 'sdc', 'sdh', 'se', 227 'sei', 'ses', 'sg', 'sgs', 'sh', 'shi-tfng', 'shi-latn', 'shn', 'shy-latn', 228 'si', 'simple', 'sk', 'skr', 'skr-arab', 'sl', 'sli', 'sm', 'sma', 'sn', 229 'so', 'sq', 'sr-ec', 'sr-el', 'srn', 'ss', 'st', 'sty', 'stq', 'su', 'sv', 230 'sw', 'szl', 'szy', 'ta', 'tay', 'tcy', 'te', 'tet', 'tg-cyrl', 'tg-latn', 231 'th', 'ti', 'tk', 'tl', 'tly-latn', 'tn', 'to', 'tpi', 'tr', 'tru', 'ts', 'tt', 232 'tt-cyrl', 'tt-latn', 'tum', 'tw', 'ty', 'tyv', 'tzm', 'udm', 'ug', 'ug-arab', 233 'ug-latn', 'uk', 'ur', 'uz-cyrl', 'uz-latn', 've', 'vec', 'vep', 'vi', 'vls', 234 'vmf', 'vo', 'vot', 'vro', 'wa', 'war', 'wo', 'wuu', 'xal', 'xh', 'xmf', 'xsy', 235 'yi', 'yo', 'yue', 'za', 'zea', 'zgh', 'zh-classical', 'zh-cn', 'zh-hans', 236 'zh-hant', 'zh-hk', 'zh-min-nan', 'zh-mo', 'zh-my', 'zh-sg', 'zh-tw', 237 'zh-yue', 'zu', 238 ]; 239 foreach ( $trivialWithNothingElseCodes as $code ) { 240 # $code, $type, $variants, $variantFallbacks, $variantNames, $flags, $manualLevel 241 yield $code => [ $code, 'TrivialLanguageConverter', [], [], [], [], [] ]; 242 } 243 244 // Languages with a type of than TrivialLanguageConverter or with variants/flags/manual level 245 yield 'ban' => [ 246 'ban', 'BanConverter', 247 [ 'ban', 'ban-bali', 'ban-x-dharma', 'ban-x-palmleaf', 'ban-x-pku' ], 248 [ 249 'ban-bali' => 'ban', 250 'ban-x-dharma' => 'ban', 251 'ban-x-palmleaf' => 'ban', 252 'ban-x-pku' => 'ban', 253 ], [], [], [ 254 'ban' => 'bidirectional', 255 'ban-bali' => 'bidirectional', 256 'ban-x-dharma' => 'bidirectional', 257 'ban-x-palmleaf' => 'bidirectional', 258 'ban-x-pku' => 'bidirectional', 259 ] 260 ]; 261 262 yield 'crh' => [ 263 'crh', 'CrhConverter', 264 [ 'crh', 'crh-cyrl', 'crh-latn' ], 265 [ 266 'crh' => 'crh-latn', 267 'crh-cyrl' => 'crh-latn', 268 'crh-latn' => 'crh-cyrl', 269 ], [], [], [ 270 'crh' => 'bidirectional', 271 'crh-cyrl' => 'bidirectional', 272 'crh-latn' => 'bidirectional' 273 ] 274 ]; 275 276 yield 'gan' => [ 277 'gan', 'GanConverter', 278 [ 'gan', 'gan-hans', 'gan-hant' ], 279 [ 280 'gan' => [ 'gan-hans', 'gan-hant' ], 281 'gan-hans' => [ 'gan' ], 282 'gan-hant' => [ 'gan' ], 283 ], [], [], [ 284 'gan' => 'disable', 285 'gan-hans' => 'bidirectional', 286 'gan-hant' => 'bidirectional' 287 ] 288 ]; 289 290 yield 'iu' => [ 291 'iu', 'IuConverter', 292 [ 'iu', 'ike-cans', 'ike-latn' ], 293 [ 294 'iu' => 'ike-cans', 295 'ike-cans' => 'iu', 296 'ike-latn' => 'iu', 297 ], [], [], [ 298 'iu' => 'bidirectional', 299 'ike-cans' => 'bidirectional', 300 'ike-latn' => 'bidirectional' 301 ] 302 ]; 303 304 yield 'kk' => [ 305 'kk', 'KkConverter', 306 [ 'kk', 'kk-cyrl', 'kk-latn', 'kk-arab', 'kk-kz', 'kk-tr', 'kk-cn' ], 307 [ 308 'kk' => 'kk-cyrl', 309 'kk-cyrl' => 'kk', 310 'kk-latn' => 'kk', 311 'kk-arab' => 'kk', 312 'kk-kz' => 'kk-cyrl', 313 'kk-tr' => 'kk-latn', 314 'kk-cn' => 'kk-arab' 315 ], [], [], [ 316 'kk' => 'bidirectional', 317 'kk-cyrl' => 'bidirectional', 318 'kk-latn' => 'bidirectional', 319 'kk-arab' => 'bidirectional', 320 'kk-kz' => 'bidirectional', 321 'kk-tr' => 'bidirectional', 322 'kk-cn' => 'bidirectional' 323 ] 324 ]; 325 326 yield 'ku' => [ 327 'ku', 'KuConverter', 328 [ 'ku', 'ku-arab', 'ku-latn' ], 329 [ 330 'ku' => 'ku-latn', 331 'ku-arab' => 'ku-latn', 332 'ku-latn' => 'ku-arab' 333 ], [], [], [ 334 'ku' => 'bidirectional', 335 'ku-arab' => 'bidirectional', 336 'ku-latn' => 'bidirectional' 337 ] 338 ]; 339 340 yield 'shi' => [ 341 'shi', 'ShiConverter', 342 [ 'shi', 'shi-tfng', 'shi-latn' ], 343 [ 'shi' => 'shi-tfng','shi-tfng' => 'shi','shi-latn' => 'shi' ], 344 [], [], 345 [ 346 'shi' => 'bidirectional', 347 'shi-tfng' => 'bidirectional', 348 'shi-latn' => 'bidirectional' 349 ] 350 ]; 351 352 yield 'sr' => [ 353 'sr', 'SrConverter', 354 [ 'sr', 'sr-ec', 'sr-el' ], [ 355 'sr' => 'sr-ec', 356 'sr-ec' => 'sr', 357 'sr-el' => 'sr' 358 ], [], [ 359 'S' => 'S', 360 'писмо' => 'S', 361 'pismo' => 'S', 362 'W' => 'W', 363 'реч' => 'W', 364 'reč' => 'W', 365 'ријеч' => 'W', 366 'riječ' => 'W' 367 ], [ 368 'sr' => 'bidirectional', 369 'sr-ec' => 'bidirectional', 370 'sr-el' => 'bidirectional' 371 ] 372 ]; 373 374 yield 'tg' => [ 375 'tg', 'TgConverter', 376 [ 'tg', 'tg-latn' ], 377 [], [], [], [ 378 'tg' => 'bidirectional', 379 'tg-latn' => 'bidirectional' 380 ] 381 ]; 382 383 yield 'tly' => [ 384 'tly', 'TlyConverter', 385 [ 'tly', 'tly-cyrl' ], 386 [ 'tly-cyrl' => 'tly' ], 387 [], 388 [ 389 'tly' => 'tly', 390 'tly-cyrl' => 'tly-cyrl' 391 ], 392 [ 393 'tly' => 'bidirectional', 394 'tly-cyrl' => 'bidirectional', 395 ] 396 ]; 397 398 yield 'uz' => [ 399 'uz', 'UzConverter', 400 [ 'uz', 'uz-latn', 'uz-cyrl' ], 401 [ 402 'uz' => 'uz-latn', 403 'uz-cyrl' => 'uz', 404 'uz-latn' => 'uz', 405 ], [], [ 406 'uz' => 'uz', 407 'uz-latn' => 'uz-latn', 408 'uz-cyrl' => 'uz-cyrl' 409 ], [ 410 'uz' => 'bidirectional', 411 'uz-latn' => 'bidirectional', 412 'uz-cyrl' => 'bidirectional', 413 ] 414 ]; 415 416 $zh_variants = [ 417 'zh', 418 'zh-hans', 419 'zh-hant', 420 'zh-cn', 421 'zh-hk', 422 'zh-mo', 423 'zh-my', 424 'zh-sg', 425 'zh-tw' 426 ]; 427 428 $zh_variantfallbacks = [ 429 'zh' => [ 'zh-hans', 'zh-hant', 'zh-cn', 'zh-tw', 'zh-hk', 'zh-sg', 'zh-mo', 'zh-my' ], 430 'zh-hans' => [ 'zh-cn', 'zh-sg', 'zh-my' ], 431 'zh-hant' => [ 'zh-tw', 'zh-hk', 'zh-mo' ], 432 'zh-cn' => [ 'zh-hans', 'zh-sg', 'zh-my' ], 433 'zh-sg' => [ 'zh-hans', 'zh-cn', 'zh-my' ], 434 'zh-my' => [ 'zh-hans', 'zh-sg', 'zh-cn' ], 435 'zh-tw' => [ 'zh-hant', 'zh-hk', 'zh-mo' ], 436 'zh-hk' => [ 'zh-hant', 'zh-mo', 'zh-tw' ], 437 'zh-mo' => [ 'zh-hant', 'zh-hk', 'zh-tw' ], 438 ]; 439 $zh_ml = [ 440 'zh' => 'disable', 441 'zh-hans' => 'unidirectional', 442 'zh-hant' => 'unidirectional', 443 'zh-cn' => 'bidirectional', 444 'zh-hk' => 'bidirectional', 445 'zh-mo' => 'bidirectional', 446 'zh-my' => 'bidirectional', 447 'zh-sg' => 'bidirectional', 448 'zh-tw' => 'bidirectional', 449 ]; 450 451 $zh_flags = [ 452 'A' => 'A', 453 'T' => 'T', 454 'R' => 'R', 455 'D' => 'D', 456 '-' => '-', 457 'H' => 'H', 458 'N' => 'N', 459 'zh' => 'zh', 460 'zh-hans' => 'zh-hans', 461 'zh-hant' => 'zh-hant', 462 'zh-cn' => 'zh-cn', 463 'zh-hk' => 'zh-hk', 464 'zh-mo' => 'zh-mo', 465 'zh-my' => 'zh-my', 466 'zh-sg' => 'zh-sg', 467 'zh-tw' => 'zh-tw' 468 ]; 469 yield 'zh' => [ 'zh', 'ZhConverter', $zh_variants, $zh_variantfallbacks,[], $zh_flags, $zh_ml ]; 470 } 471} 472