1<?php
2
3use MediaWiki\Languages\LanguageConverterFactory;
4use MediaWiki\MediaWikiServices;
5use Wikimedia\TestingAccessWrapper;
6
7/**
8 * @group large
9 * @group Language
10 * @coversDefaultClass MediaWiki\Languages\LanguageConverterFactory
11 */
12class LanguageConverterFactoryTest extends MediaWikiLangTestCase {
13	/**
14	 * @covers ::__construct
15	 * @covers ::classFromCode
16	 * @covers ::getLanguageConverter
17	 * @dataProvider codeProvider
18	 */
19	public function testLanguageConverters(
20		$code,
21		$type,
22		$variants,
23		$variantFallbacks,
24		$variantNames,
25		$flags,
26		$manualLevel
27	) {
28		$lang = MediaWikiServices::getInstance()->getLanguageFactory()->getLanguage( $code );
29		$factory = new LanguageConverterFactory( false, false, false, static function () use ( $lang ) {
30			return $lang;
31		} );
32		$this->assertFalse( $factory->isConversionDisabled() );
33		$this->assertFalse( $factory->isTitleConversionDisabled() );
34		$this->assertFalse( $factory->isLinkConversionDisabled() );
35		$converter = $factory->getLanguageConverter( $lang );
36		$this->verifyConverter(
37			$converter,
38			$lang,
39			$code,
40			$type,
41			$variants,
42			$variantFallbacks,
43			$variantNames,
44			$flags,
45			$manualLevel
46		);
47	}
48
49	/**
50	 * @covers ::__construct
51	 * @covers ::classFromCode
52	 * @covers ::getLanguageConverter
53	 */
54	public function testCreateFromCodeEnPigLatin() {
55		$lang = MediaWikiServices::getInstance()->getLanguageFactory()->getLanguage( 'en' );
56		$factory = new LanguageConverterFactory( true, false, false, static function () use ( $lang ) {
57			return $lang;
58		} );
59		$this->assertFalse( $factory->isConversionDisabled() );
60		$this->assertFalse( $factory->isTitleConversionDisabled() );
61		$this->assertFalse( $factory->isLinkConversionDisabled() );
62
63		$converter = $factory->getLanguageConverter( $lang );
64
65		$this->verifyConverter(
66			$converter,
67			$lang,
68			'en',
69			'EnConverter',
70			[ 'en', 'en-x-piglatin' ],
71			[],
72			[],
73			[],
74			[ 'en' => 'bidirectional', 'en-x-piglatin' => 'bidirectional' ]
75		);
76	}
77
78	/**
79	 * @covers ::__construct
80	 * @covers ::classFromCode
81	 * @covers ::getLanguageConverter
82	 * @dataProvider booleanProvider
83	 */
84	public function testDisabledBooleans( $pigLatinDisabled, $conversionDisabled, $titleDisabled ) {
85		$lang = MediaWikiServices::getInstance()->getLanguageFactory()->getLanguage( 'en' );
86		$factory = new LanguageConverterFactory(
87			!$pigLatinDisabled,
88			$conversionDisabled,
89			$titleDisabled,
90			static function () use ( $lang ) {
91				return $lang;
92			}
93		);
94		$converter = $factory->getLanguageConverter( $lang );
95
96		$this->assertSame( $conversionDisabled, $factory->isConversionDisabled() );
97		$this->assertSame( $titleDisabled, $factory->isTitleConversionDisabled() );
98		$this->assertSame( $conversionDisabled || $titleDisabled, $factory->isLinkConversionDisabled() );
99
100		if ( $pigLatinDisabled ) {
101			$this->assertNotContains(
102				'en-x-piglatin', $converter->getVariants()
103			);
104		} else {
105			$this->assertContains(
106				'en-x-piglatin', $converter->getVariants()
107			);
108		}
109	}
110
111	public function booleanProvider() {
112		return [
113			[ false, false, false ],
114			[ false, false, true ],
115			[ false,true,false ],
116			[ false,true,true ],
117			[ true, false, false ],
118			[ true, false, true ],
119			[ true,true,false ],
120			[ true,true,true ],
121		];
122	}
123
124	/**
125	 * @covers ::__construct
126	 * @covers ::classFromCode
127	 * @covers ::getLanguageConverter
128	 */
129	public function testDefaultContentLanguageFallback() {
130		$lang = MediaWikiServices::getInstance()->getLanguageFactory()->getLanguage( 'en' );
131		$factory = new LanguageConverterFactory( false, false, false, static function () use ( $lang ) {
132			return $lang;
133		} );
134		$this->assertFalse( $factory->isConversionDisabled() );
135		$this->assertFalse( $factory->isTitleConversionDisabled() );
136		$this->assertFalse( $factory->isLinkConversionDisabled() );
137
138		$converter = $factory->getLanguageConverter();
139
140		$this->verifyConverter(
141			$converter,
142			$lang,
143			'en',
144			'TrivialLanguageConverter',
145			[ 'en' ],
146			[],
147			[],
148			[],
149			[]
150		);
151	}
152
153	private function verifyConverter(
154		$converter,
155		$lang,
156		$code,
157		$type,
158		$variants,
159		$variantFallbacks,
160		$variantNames,
161		$flags,
162		$manualLevel
163	) {
164		$this->assertEquals( $type, get_class( $converter ) );
165
166		if ( is_a( $converter, LanguageConverter::class ) ) {
167			$testConverter = TestingAccessWrapper::newFromObject( $converter );
168			$this->assertSame( $lang, $testConverter->mLangObj, "Language should be as provided" );
169
170			$this->assertEquals( $code, $testConverter->getMainCode(),
171				"mMainLanguageCode should be as $code" );
172			$this->assertEquals( $manualLevel, $testConverter->getManualLevel(), "Manual Level" );
173
174			$this->assertEquals( $variants, $testConverter->getVariants(), "Variants" );
175			$this->assertEquals( $variantFallbacks, $testConverter->getVariantsFallbacks(), "Variant Fallbacks" );
176			$defaultFlags = [
177				'A' => 'A',
178				'T' => 'T',
179				'R' => 'R',
180				'D' => 'D',
181				'-' => '-',
182				'H' => 'H',
183				'N' => 'N',
184			];
185			$this->assertArraySubmapSame(
186				array_merge( $defaultFlags, $flags ),
187				$converter->getFlags(),
188				"Flags"
189			);
190		}
191	}
192
193	public function codeProvider() {
194		$trivialWithNothingElseCodes = [
195			'aa', 'ab', 'abs', 'ace', 'ady', 'ady-cyrl', 'aeb', 'aeb-arab', 'aeb-latn',
196			'af', 'ak', 'aln', 'als', 'am', 'an', 'ang', 'anp', 'ar', 'arc', 'arn',
197			'arq', 'ary', 'arz', 'as', 'ase', 'ast', 'atj', 'av', 'avk', 'awa', 'ay',
198			'az', 'azb', 'ba', 'ban-bali', 'bar', 'bat-smg', 'bbc', 'bbc-latn', 'bcc',
199			'bcl', 'be', 'be-tarask', 'be-x-old', 'bg', 'bgn', 'bh', 'bho', 'bi', 'bjn',
200			'bm', 'bn', 'bo', 'bpy', 'bqi', 'br', 'brh', 'bs', 'btm', 'bto', 'bug', 'bxr',
201			'ca', 'cbk-zam', 'cdo', 'ce', 'ceb', 'ch', 'cho', 'chr', 'chy', 'ckb', 'co',
202			'cps', 'cr', 'crh-latn', 'crh-cyrl', 'cs', 'csb', 'cu', 'cv', 'cy', 'da',
203			'de', 'de-at', 'de-ch', 'de-formal', 'din', 'diq', 'dsb', 'dtp', 'dty',
204			'dv', 'dz', 'ee', 'egl', 'el', 'eml', 'en', 'en-ca', 'en-gb', 'eo', 'es',
205			'es-419', 'es-formal', 'et', 'eu', 'ext', 'fa', 'ff', 'fi', 'fit', 'fiu-vro',
206			'fj', 'fo', 'fr', 'frc', 'frp', 'frr', 'fur', 'fy', 'ga', 'gag', 'gan-hans',
207			'gan-hant', 'gcr', 'gd', 'gl', 'glk', 'gn', 'gom', 'gom-deva', 'gom-latn',
208			'gor', 'got', 'grc', 'gsw', 'gu', 'gv', 'ha', 'hak', 'haw', 'he', 'hi',
209			'hif', 'hif-latn', 'hil', 'ho', 'hr', 'hrx', 'hsb', 'ht', 'hu', 'hu-formal',
210			'hy', 'hyw', 'hz', 'ia', 'id', 'ie', 'ig', 'ii', 'ik', 'ike-cans',
211			'ike-latn', 'ilo', 'inh', 'io', 'is', 'it', 'ja', 'jam', 'jbo', 'jut',
212			'jv', 'ka', 'kaa', 'kab', 'kbd', 'kbd-cyrl', 'kbp', 'kg', 'khw', 'ki',
213			'kiu', 'kj', 'kjp', 'kk-arab', 'kk-cyrl', 'kk-latn', 'kk-cn', 'kk-kz',
214			'kk-tr', 'kl', 'km', 'kn', 'ko', 'ko-kp', 'koi', 'kr', 'krc', 'kri', 'krj',
215			'krl', 'ks', 'ks-arab', 'ks-deva', 'ksh', 'ku-latn', 'ku-arab', 'kum', 'kv',
216			'kw', 'ky', 'la', 'lad', 'lb', 'lbe', 'lez', 'lfn', 'lg', 'li', 'lij', 'liv',
217			'lki', 'lmo', 'ln', 'lo', 'lrc', 'loz', 'lt', 'ltg', 'lus', 'luz', 'lv',
218			'lzh', 'lzz', 'mai', 'map-bms', 'mdf', 'mg', 'mh', 'mhr', 'mi', 'min', 'mk',
219			'ml', 'mn', 'mni', 'mnw', 'mo', 'mr', 'mrj', 'ms', 'mt', 'mus', 'mwl', 'my',
220			'myv', 'mzn', 'na', 'nah', 'nan', 'nap', 'nb', 'nds', 'nds-nl', 'ne', 'new',
221			'ng', 'niu', 'nl', 'nl-informal', 'nn', 'no', 'nov', 'nqo', 'nrm', 'nso',
222			'nv', 'ny', 'nys', 'oc', 'olo', 'om', 'or', 'os', 'pa', 'pag', 'pam', 'pap',
223			'pcd', 'pdc', 'pdt', 'pfl', 'pi', 'pih', 'pl', 'pms', 'pnb', 'pnt', 'prg',
224			'ps', 'pt', 'pt-br', 'qu', 'qug', 'rgn', 'rif', 'rm', 'rmy', 'rn', 'ro',
225			'roa-rup', 'roa-tara', 'ru', 'rue', 'rup', 'ruq', 'ruq-cyrl', 'ruq-latn',
226			'rw', 'sa', 'sah', 'sat', 'sc', 'scn', 'sco', 'sd', 'sdc', 'sdh', 'se',
227			'sei', 'ses', 'sg', 'sgs', 'sh', 'shi-tfng', 'shi-latn', 'shn', 'shy-latn',
228			'si', 'simple', 'sk', 'skr', 'skr-arab', 'sl', 'sli', 'sm', 'sma', 'sn',
229			'so', 'sq', 'sr-ec', 'sr-el', 'srn', 'ss', 'st', 'sty', 'stq', 'su', 'sv',
230			'sw', 'szl', 'szy', 'ta', 'tay', 'tcy', 'te', 'tet', 'tg-cyrl', 'tg-latn',
231			'th', 'ti', 'tk', 'tl', 'tly-latn', 'tn', 'to', 'tpi', 'tr', 'tru', 'ts', 'tt',
232			'tt-cyrl', 'tt-latn', 'tum', 'tw', 'ty', 'tyv', 'tzm', 'udm', 'ug', 'ug-arab',
233			'ug-latn', 'uk', 'ur', 'uz-cyrl', 'uz-latn', 've', 'vec', 'vep', 'vi', 'vls',
234			'vmf', 'vo', 'vot', 'vro', 'wa', 'war', 'wo', 'wuu', 'xal', 'xh', 'xmf', 'xsy',
235			'yi', 'yo', 'yue', 'za', 'zea', 'zgh', 'zh-classical', 'zh-cn', 'zh-hans',
236			'zh-hant', 'zh-hk', 'zh-min-nan', 'zh-mo', 'zh-my', 'zh-sg', 'zh-tw',
237			'zh-yue', 'zu',
238		];
239		foreach ( $trivialWithNothingElseCodes as $code ) {
240			# $code, $type, $variants, $variantFallbacks, $variantNames, $flags, $manualLevel
241			yield $code => [ $code, 'TrivialLanguageConverter', [], [], [], [], [] ];
242		}
243
244		// Languages with a type of than TrivialLanguageConverter or with variants/flags/manual level
245		yield 'ban' => [
246			'ban', 'BanConverter',
247			[ 'ban', 'ban-bali', 'ban-x-dharma', 'ban-x-palmleaf', 'ban-x-pku' ],
248			[
249				'ban-bali' => 'ban',
250				'ban-x-dharma' => 'ban',
251				'ban-x-palmleaf' => 'ban',
252				'ban-x-pku' => 'ban',
253			], [], [], [
254				'ban' => 'bidirectional',
255				'ban-bali' => 'bidirectional',
256				'ban-x-dharma' => 'bidirectional',
257				'ban-x-palmleaf' => 'bidirectional',
258				'ban-x-pku' => 'bidirectional',
259			]
260		];
261
262		yield 'crh' => [
263			'crh', 'CrhConverter',
264			[ 'crh', 'crh-cyrl', 'crh-latn' ],
265			[
266				'crh' => 'crh-latn',
267				'crh-cyrl' => 'crh-latn',
268				'crh-latn' => 'crh-cyrl',
269			], [], [], [
270				'crh' => 'bidirectional',
271				'crh-cyrl' => 'bidirectional',
272				'crh-latn' => 'bidirectional'
273			]
274		];
275
276		yield 'gan' => [
277			'gan', 'GanConverter',
278			[ 'gan', 'gan-hans', 'gan-hant' ],
279			[
280				'gan' => [ 'gan-hans', 'gan-hant' ],
281				'gan-hans' => [ 'gan' ],
282				'gan-hant' => [ 'gan' ],
283			], [], [], [
284				'gan' => 'disable',
285				'gan-hans' => 'bidirectional',
286				'gan-hant' => 'bidirectional'
287			]
288		];
289
290		yield 'iu' => [
291			'iu', 'IuConverter',
292			[ 'iu', 'ike-cans', 'ike-latn' ],
293			[
294				'iu' => 'ike-cans',
295				'ike-cans' => 'iu',
296				'ike-latn' => 'iu',
297			], [], [], [
298				'iu' => 'bidirectional',
299				'ike-cans' => 'bidirectional',
300				'ike-latn' => 'bidirectional'
301			]
302		];
303
304		yield 'kk' => [
305			'kk', 'KkConverter',
306			[ 'kk', 'kk-cyrl', 'kk-latn', 'kk-arab', 'kk-kz', 'kk-tr', 'kk-cn' ],
307			[
308				'kk' => 'kk-cyrl',
309				'kk-cyrl' => 'kk',
310				'kk-latn' => 'kk',
311				'kk-arab' => 'kk',
312				'kk-kz' => 'kk-cyrl',
313				'kk-tr' => 'kk-latn',
314				'kk-cn' => 'kk-arab'
315			], [], [], [
316				'kk' => 'bidirectional',
317				'kk-cyrl' => 'bidirectional',
318				'kk-latn' => 'bidirectional',
319				'kk-arab' => 'bidirectional',
320				'kk-kz' => 'bidirectional',
321				'kk-tr' => 'bidirectional',
322				'kk-cn' => 'bidirectional'
323			]
324		];
325
326		yield 'ku' => [
327			'ku', 'KuConverter',
328			[ 'ku', 'ku-arab', 'ku-latn' ],
329			[
330				'ku' => 'ku-latn',
331				'ku-arab' => 'ku-latn',
332				'ku-latn' => 'ku-arab'
333			], [], [], [
334				'ku' => 'bidirectional',
335				'ku-arab' => 'bidirectional',
336				'ku-latn' => 'bidirectional'
337			]
338		];
339
340		yield 'shi' => [
341			'shi', 'ShiConverter',
342			[ 'shi', 'shi-tfng', 'shi-latn' ],
343			[ 'shi' => 'shi-tfng','shi-tfng' => 'shi','shi-latn' => 'shi' ],
344			[], [],
345			[
346				'shi' => 'bidirectional',
347				'shi-tfng' => 'bidirectional',
348				'shi-latn' => 'bidirectional'
349			]
350		];
351
352		yield 'sr' => [
353			'sr', 'SrConverter',
354			[ 'sr', 'sr-ec', 'sr-el' ], [
355				'sr' => 'sr-ec',
356				'sr-ec' => 'sr',
357				'sr-el' => 'sr'
358			], [], [
359				'S' => 'S',
360				'писмо' => 'S',
361				'pismo' => 'S',
362				'W' => 'W',
363				'реч' => 'W',
364				'reč' => 'W',
365				'ријеч' => 'W',
366				'riječ' => 'W'
367			], [
368				'sr' => 'bidirectional',
369				'sr-ec' => 'bidirectional',
370				'sr-el' => 'bidirectional'
371			]
372		];
373
374		yield 'tg' => [
375			'tg', 'TgConverter',
376			[ 'tg', 'tg-latn' ],
377			[], [], [], [
378				'tg' => 'bidirectional',
379				'tg-latn' => 'bidirectional'
380			]
381		];
382
383		yield 'tly' => [
384			'tly', 'TlyConverter',
385			[ 'tly', 'tly-cyrl' ],
386			[ 'tly-cyrl' => 'tly' ],
387			[],
388			[
389				'tly' => 'tly',
390				'tly-cyrl' => 'tly-cyrl'
391			],
392			[
393				'tly' => 'bidirectional',
394				'tly-cyrl' => 'bidirectional',
395			]
396		];
397
398		yield 'uz' => [
399			'uz', 'UzConverter',
400			[ 'uz', 'uz-latn', 'uz-cyrl' ],
401			[
402				'uz' => 'uz-latn',
403				'uz-cyrl' => 'uz',
404				'uz-latn' => 'uz',
405			], [], [
406				'uz' => 'uz',
407				'uz-latn' => 'uz-latn',
408				'uz-cyrl' => 'uz-cyrl'
409			], [
410				'uz' => 'bidirectional',
411				'uz-latn' => 'bidirectional',
412				'uz-cyrl' => 'bidirectional',
413			]
414		];
415
416		$zh_variants = [
417			'zh',
418			'zh-hans',
419			'zh-hant',
420			'zh-cn',
421			'zh-hk',
422			'zh-mo',
423			'zh-my',
424			'zh-sg',
425			'zh-tw'
426		];
427
428		$zh_variantfallbacks = [
429			'zh' => [ 'zh-hans', 'zh-hant', 'zh-cn', 'zh-tw', 'zh-hk', 'zh-sg', 'zh-mo', 'zh-my' ],
430			'zh-hans' => [ 'zh-cn', 'zh-sg', 'zh-my' ],
431			'zh-hant' => [ 'zh-tw', 'zh-hk', 'zh-mo' ],
432			'zh-cn' => [ 'zh-hans', 'zh-sg', 'zh-my' ],
433			'zh-sg' => [ 'zh-hans', 'zh-cn', 'zh-my' ],
434			'zh-my' => [ 'zh-hans', 'zh-sg', 'zh-cn' ],
435			'zh-tw' => [ 'zh-hant', 'zh-hk', 'zh-mo' ],
436			'zh-hk' => [ 'zh-hant', 'zh-mo', 'zh-tw' ],
437			'zh-mo' => [ 'zh-hant', 'zh-hk', 'zh-tw' ],
438		];
439		$zh_ml = [
440			'zh' => 'disable',
441			'zh-hans' => 'unidirectional',
442			'zh-hant' => 'unidirectional',
443			'zh-cn' => 'bidirectional',
444			'zh-hk' => 'bidirectional',
445			'zh-mo' => 'bidirectional',
446			'zh-my' => 'bidirectional',
447			'zh-sg' => 'bidirectional',
448			'zh-tw' => 'bidirectional',
449		];
450
451		$zh_flags = [
452			'A' => 'A',
453			'T' => 'T',
454			'R' => 'R',
455			'D' => 'D',
456			'-' => '-',
457			'H' => 'H',
458			'N' => 'N',
459			'zh' => 'zh',
460			'zh-hans' => 'zh-hans',
461			'zh-hant' => 'zh-hant',
462			'zh-cn' => 'zh-cn',
463			'zh-hk' => 'zh-hk',
464			'zh-mo' => 'zh-mo',
465			'zh-my' => 'zh-my',
466			'zh-sg' => 'zh-sg',
467			'zh-tw' => 'zh-tw'
468		];
469		yield 'zh' => [ 'zh', 'ZhConverter', $zh_variants, $zh_variantfallbacks,[], $zh_flags, $zh_ml ];
470	}
471}
472