1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  */
9 
10 #include <sal/config.h>
11 
12 #include <cppunit/TestFixture.h>
13 #include <cppunit/TestAssert.h>
14 #include <cppunit/extensions/HelperMacros.h>
15 #include <cppunit/plugin/TestPlugIn.h>
16 
17 #include <i18nlangtag/mslangid.hxx>
18 #include <i18nlangtag/languagetag.hxx>
19 
20 #include <rtl/ustring.hxx>
21 
22 #include <com/sun/star/lang/Locale.hpp>
23 #include <com/sun/star/i18n/ScriptType.hpp>
24 
25 using namespace com::sun::star;
26 
27 namespace {
28 
29 class TestLanguageTag : public CppUnit::TestFixture
30 {
31 public:
TestLanguageTag()32     TestLanguageTag() {}
33 
34     void testAllTags();
35     void testAllIsoLangEntries();
36 
37     CPPUNIT_TEST_SUITE(TestLanguageTag);
38     CPPUNIT_TEST(testAllTags);
39     CPPUNIT_TEST(testAllIsoLangEntries);
40     CPPUNIT_TEST_SUITE_END();
41 };
42 
testAllTags()43 void TestLanguageTag::testAllTags()
44 {
45     {
46         OUString const s_de_Latn_DE( "de-Latn-DE" );
47         LanguageTag de_DE( s_de_Latn_DE, true );
48         OUString aBcp47 = de_DE.getBcp47();
49         lang::Locale aLocale = de_DE.getLocale();
50         LanguageType nLanguageType = de_DE.getLanguageType();
51         CPPUNIT_ASSERT_EQUAL_MESSAGE("Default script should be stripped after canonicalize.", OUString("de-DE"), aBcp47 );
52         CPPUNIT_ASSERT_EQUAL( OUString("de"), aLocale.Language );
53         CPPUNIT_ASSERT_EQUAL( OUString("DE"), aLocale.Country );
54         CPPUNIT_ASSERT( aLocale.Variant.isEmpty() );
55         CPPUNIT_ASSERT_EQUAL( LANGUAGE_GERMAN, nLanguageType );
56         CPPUNIT_ASSERT_EQUAL( OUString("de"), de_DE.getLanguage() );
57         CPPUNIT_ASSERT_EQUAL( OUString("DE"), de_DE.getCountry() );
58         CPPUNIT_ASSERT( de_DE.getScript().isEmpty() );
59         CPPUNIT_ASSERT_EQUAL( OUString("de"), de_DE.getLanguageAndScript() );
60         CPPUNIT_ASSERT_EQUAL( OUString("de-DE"), de_DE.makeFallback().getBcp47() );
61     }
62 
63     {
64         OUString const s_klingon( "i-klingon" );
65         LanguageTag klingon( s_klingon, true );
66         lang::Locale aLocale = klingon.getLocale();
67         CPPUNIT_ASSERT_EQUAL( OUString("tlh"), klingon.getBcp47() );
68         CPPUNIT_ASSERT_EQUAL( OUString("tlh"), aLocale.Language );
69         CPPUNIT_ASSERT( aLocale.Country.isEmpty() );
70         CPPUNIT_ASSERT( aLocale.Variant.isEmpty() );
71         CPPUNIT_ASSERT( LanguageTag::isOnTheFlyID( klingon.getLanguageType()) );
72         CPPUNIT_ASSERT( klingon.isValidBcp47() );
73         CPPUNIT_ASSERT( klingon.isIsoLocale() );
74         CPPUNIT_ASSERT( klingon.isIsoODF() );
75         LanguageType nLang = klingon.getLanguageType();
76         LanguageTag klingon_id( nLang);
77         CPPUNIT_ASSERT_EQUAL( OUString("tlh"), klingon_id.getBcp47() );
78     }
79 
80     {
81         OUString s_sr_RS( "sr-RS" );
82         LanguageTag sr_RS( s_sr_RS, true );
83         lang::Locale aLocale = sr_RS.getLocale();
84         CPPUNIT_ASSERT_EQUAL( s_sr_RS, sr_RS.getBcp47() );
85         CPPUNIT_ASSERT_EQUAL( OUString("sr"), aLocale.Language );
86         CPPUNIT_ASSERT_EQUAL( OUString("RS"), aLocale.Country );
87         CPPUNIT_ASSERT( aLocale.Variant.isEmpty() );
88         CPPUNIT_ASSERT_EQUAL( LANGUAGE_USER_SERBIAN_CYRILLIC_SERBIA, sr_RS.getLanguageType() );
89         CPPUNIT_ASSERT( sr_RS.isValidBcp47() );
90         CPPUNIT_ASSERT( sr_RS.isIsoLocale() );
91         CPPUNIT_ASSERT( sr_RS.isIsoODF() );
92     }
93 
94     {
95         OUString s_sr_Latn_RS( "sr-Latn-RS" );
96         LanguageTag sr_RS( s_sr_Latn_RS, true );
97         lang::Locale aLocale = sr_RS.getLocale();
98         CPPUNIT_ASSERT_EQUAL( s_sr_Latn_RS, sr_RS.getBcp47() );
99         CPPUNIT_ASSERT_EQUAL( OUString("qlt"), aLocale.Language );
100         CPPUNIT_ASSERT_EQUAL( OUString("RS"), aLocale.Country );
101         CPPUNIT_ASSERT_EQUAL( s_sr_Latn_RS, aLocale.Variant );
102         CPPUNIT_ASSERT_EQUAL( LANGUAGE_USER_SERBIAN_LATIN_SERBIA, sr_RS.getLanguageType() );
103         CPPUNIT_ASSERT( sr_RS.isValidBcp47() );
104         CPPUNIT_ASSERT( !sr_RS.isIsoLocale() );
105         CPPUNIT_ASSERT( sr_RS.isIsoODF() );
106         CPPUNIT_ASSERT_EQUAL( OUString("sr"), sr_RS.getLanguage() );
107         CPPUNIT_ASSERT_EQUAL( OUString("RS"), sr_RS.getCountry() );
108         CPPUNIT_ASSERT_EQUAL( OUString("Latn"), sr_RS.getScript() );
109         CPPUNIT_ASSERT_EQUAL( OUString("sr-Latn"), sr_RS.getLanguageAndScript() );
110     }
111 
112     {
113         OUString s_sr_Latn_CS( "sr-Latn-CS" );
114         LanguageTag sr_Latn_CS( s_sr_Latn_CS, true );
115         lang::Locale aLocale = sr_Latn_CS.getLocale();
116         CPPUNIT_ASSERT_EQUAL( s_sr_Latn_CS, sr_Latn_CS.getBcp47() );
117         CPPUNIT_ASSERT_EQUAL( OUString("qlt"), aLocale.Language );
118         CPPUNIT_ASSERT_EQUAL( OUString("CS"), aLocale.Country );
119         CPPUNIT_ASSERT_EQUAL( s_sr_Latn_CS, aLocale.Variant );
120         CPPUNIT_ASSERT_EQUAL( LANGUAGE_SERBIAN_LATIN_SAM, sr_Latn_CS.getLanguageType() );
121         CPPUNIT_ASSERT( sr_Latn_CS.isValidBcp47() );
122         CPPUNIT_ASSERT( !sr_Latn_CS.isIsoLocale() );
123         CPPUNIT_ASSERT( sr_Latn_CS.isIsoODF() );
124         CPPUNIT_ASSERT_EQUAL( OUString("sr"), sr_Latn_CS.getLanguage() );
125         CPPUNIT_ASSERT_EQUAL( OUString("CS"), sr_Latn_CS.getCountry() );
126         CPPUNIT_ASSERT_EQUAL( OUString("Latn"), sr_Latn_CS.getScript() );
127         CPPUNIT_ASSERT_EQUAL( OUString("sr-Latn"), sr_Latn_CS.getLanguageAndScript() );
128         ::std::vector< OUString > sr_Latn_CS_Fallbacks( sr_Latn_CS.getFallbackStrings( true));
129         CPPUNIT_ASSERT_EQUAL( static_cast<size_t>(9), sr_Latn_CS_Fallbacks.size());
130         CPPUNIT_ASSERT_EQUAL( OUString("sr-Latn-CS"), sr_Latn_CS_Fallbacks[0]);
131         CPPUNIT_ASSERT_EQUAL( OUString("sr-Latn-YU"), sr_Latn_CS_Fallbacks[1]);
132         CPPUNIT_ASSERT_EQUAL( OUString("sh-CS"), sr_Latn_CS_Fallbacks[2]);
133         CPPUNIT_ASSERT_EQUAL( OUString("sh-YU"), sr_Latn_CS_Fallbacks[3]);
134         CPPUNIT_ASSERT_EQUAL( OUString("sr-Latn"), sr_Latn_CS_Fallbacks[4]);
135         CPPUNIT_ASSERT_EQUAL( OUString("sh"), sr_Latn_CS_Fallbacks[5]);
136         CPPUNIT_ASSERT_EQUAL( OUString("sr-CS"), sr_Latn_CS_Fallbacks[6]);
137         CPPUNIT_ASSERT_EQUAL( OUString("sr-YU"), sr_Latn_CS_Fallbacks[7]);
138         CPPUNIT_ASSERT_EQUAL( OUString("sr"), sr_Latn_CS_Fallbacks[8]);
139         CPPUNIT_ASSERT_EQUAL( OUString("sr-Latn-CS"), sr_Latn_CS.makeFallback().getBcp47());
140     }
141 
142     // 'sh-RS' has an internal override to 'sr-Latn-RS'
143     {
144         OUString const s_sh_RS( "sh-RS" );
145         LanguageTag sh_RS( s_sh_RS, true );
146         lang::Locale aLocale = sh_RS.getLocale();
147         CPPUNIT_ASSERT_EQUAL( OUString("sr-Latn-RS"), sh_RS.getBcp47() );
148         CPPUNIT_ASSERT_EQUAL( OUString(I18NLANGTAG_QLT) , aLocale.Language);
149         CPPUNIT_ASSERT_EQUAL( OUString("RS"), aLocale.Country );
150         CPPUNIT_ASSERT_EQUAL( OUString("sr-Latn-RS"), aLocale.Variant );
151         CPPUNIT_ASSERT_EQUAL( LANGUAGE_USER_SERBIAN_LATIN_SERBIA, sh_RS.getLanguageType() );
152         CPPUNIT_ASSERT( sh_RS.isValidBcp47() );
153         CPPUNIT_ASSERT( !sh_RS.isIsoLocale() );
154         CPPUNIT_ASSERT( sh_RS.isIsoODF() );
155         CPPUNIT_ASSERT_EQUAL( OUString("sr"), sh_RS.getLanguage() );
156         CPPUNIT_ASSERT_EQUAL( OUString("RS"), sh_RS.getCountry() );
157         CPPUNIT_ASSERT_EQUAL( OUString("Latn"), sh_RS.getScript() );
158         CPPUNIT_ASSERT_EQUAL( OUString("sr-Latn"), sh_RS.getLanguageAndScript() );
159         ::std::vector< OUString > sh_RS_Fallbacks( sh_RS.getFallbackStrings( true));
160         CPPUNIT_ASSERT_EQUAL( static_cast<size_t>(6), sh_RS_Fallbacks.size());
161         CPPUNIT_ASSERT_EQUAL( OUString("sr-Latn-RS"), sh_RS_Fallbacks[0]);
162         CPPUNIT_ASSERT_EQUAL( OUString("sh-RS"), sh_RS_Fallbacks[1]);
163         CPPUNIT_ASSERT_EQUAL( OUString("sr-Latn"), sh_RS_Fallbacks[2]);
164         CPPUNIT_ASSERT_EQUAL( OUString("sh"), sh_RS_Fallbacks[3]);
165         CPPUNIT_ASSERT_EQUAL( OUString("sr-RS"), sh_RS_Fallbacks[4]);
166         CPPUNIT_ASSERT_EQUAL( OUString("sr"), sh_RS_Fallbacks[5]);
167         CPPUNIT_ASSERT_EQUAL( OUString("sr-Latn-RS"), sh_RS.makeFallback().getBcp47());
168         CPPUNIT_ASSERT_EQUAL( OUString("sr-Latn-RS"), sh_RS.getBcp47());
169         CPPUNIT_ASSERT_EQUAL( LANGUAGE_USER_SERBIAN_LATIN_SERBIA, sh_RS.getLanguageType() );
170     }
171 
172     // 'bs-Latn-BA' with 'Latn' suppress-script, we map that ourselves for a
173     // known LangID with an override and canonicalization should work the same
174     // without liblangtag.
175     {
176         OUString const s_bs_Latn_BA( "bs-Latn-BA" );
177         LanguageTag bs_Latn_BA( s_bs_Latn_BA, true );
178         lang::Locale aLocale = bs_Latn_BA.getLocale();
179         CPPUNIT_ASSERT_EQUAL( OUString("bs-BA"), bs_Latn_BA.getBcp47() );
180         CPPUNIT_ASSERT_EQUAL( OUString("bs"), aLocale.Language );
181         CPPUNIT_ASSERT_EQUAL( OUString("BA"), aLocale.Country );
182         CPPUNIT_ASSERT( aLocale.Variant.isEmpty() );
183         CPPUNIT_ASSERT_EQUAL( LANGUAGE_BOSNIAN_LATIN_BOSNIA_HERZEGOVINA, bs_Latn_BA.getLanguageType() );
184         CPPUNIT_ASSERT( bs_Latn_BA.isValidBcp47() );
185         CPPUNIT_ASSERT( bs_Latn_BA.isIsoLocale() );
186         CPPUNIT_ASSERT( bs_Latn_BA.isIsoODF() );
187         CPPUNIT_ASSERT_EQUAL( OUString("bs"), bs_Latn_BA.getLanguage() );
188         CPPUNIT_ASSERT_EQUAL( OUString("BA"), bs_Latn_BA.getCountry() );
189         CPPUNIT_ASSERT( bs_Latn_BA.getScript().isEmpty() );
190         CPPUNIT_ASSERT_EQUAL( OUString("bs"), bs_Latn_BA.getLanguageAndScript() );
191         ::std::vector< OUString > bs_Latn_BA_Fallbacks( bs_Latn_BA.getFallbackStrings( true));
192         CPPUNIT_ASSERT_EQUAL( static_cast<size_t>(2), bs_Latn_BA_Fallbacks.size());
193         CPPUNIT_ASSERT_EQUAL( OUString("bs-BA"), bs_Latn_BA_Fallbacks[0]);
194         CPPUNIT_ASSERT_EQUAL( OUString("bs"), bs_Latn_BA_Fallbacks[1]);
195         CPPUNIT_ASSERT_EQUAL( OUString("bs-BA"), bs_Latn_BA.makeFallback().getBcp47());
196         CPPUNIT_ASSERT_EQUAL( OUString("bs-BA"), bs_Latn_BA.getBcp47());
197         CPPUNIT_ASSERT_EQUAL( LANGUAGE_BOSNIAN_LATIN_BOSNIA_HERZEGOVINA, bs_Latn_BA.getLanguageType() );
198     }
199 
200     {
201         OUString s_ca_ES_valencia( "ca-ES-valencia" );
202         LanguageTag ca_ES_valencia( s_ca_ES_valencia, true );
203         lang::Locale aLocale = ca_ES_valencia.getLocale();
204         CPPUNIT_ASSERT_EQUAL( s_ca_ES_valencia, ca_ES_valencia.getBcp47() );
205         CPPUNIT_ASSERT_EQUAL( OUString("qlt"), aLocale.Language );
206         CPPUNIT_ASSERT_EQUAL( OUString("ES"), aLocale.Country );
207         CPPUNIT_ASSERT_EQUAL( s_ca_ES_valencia, aLocale.Variant );
208         CPPUNIT_ASSERT_EQUAL( LANGUAGE_CATALAN_VALENCIAN, ca_ES_valencia.getLanguageType() );
209         CPPUNIT_ASSERT( ca_ES_valencia.isValidBcp47() );
210         CPPUNIT_ASSERT( !ca_ES_valencia.isIsoLocale() );
211         CPPUNIT_ASSERT( !ca_ES_valencia.isIsoODF() );
212         CPPUNIT_ASSERT_EQUAL( OUString("ca"), ca_ES_valencia.getLanguage() );
213         CPPUNIT_ASSERT_EQUAL( OUString("ES"), ca_ES_valencia.getCountry() );
214         CPPUNIT_ASSERT( ca_ES_valencia.getScript().isEmpty() );
215         CPPUNIT_ASSERT_EQUAL( OUString("ca"), ca_ES_valencia.getLanguageAndScript() );
216         ::std::vector< OUString > ca_ES_valencia_Fallbacks( ca_ES_valencia.getFallbackStrings( true));
217         CPPUNIT_ASSERT_EQUAL( static_cast<size_t>(5), ca_ES_valencia_Fallbacks.size());
218         CPPUNIT_ASSERT_EQUAL( OUString("ca-ES-valencia"), ca_ES_valencia_Fallbacks[0]);
219         CPPUNIT_ASSERT_EQUAL( OUString("ca-XV"), ca_ES_valencia_Fallbacks[1]);
220         CPPUNIT_ASSERT_EQUAL( OUString("ca-valencia"), ca_ES_valencia_Fallbacks[2]);
221         CPPUNIT_ASSERT_EQUAL( OUString("ca-ES"), ca_ES_valencia_Fallbacks[3]);
222         CPPUNIT_ASSERT_EQUAL( OUString("ca"), ca_ES_valencia_Fallbacks[4]);
223         CPPUNIT_ASSERT_EQUAL( OUString("ca-ES-valencia"), ca_ES_valencia.makeFallback().getBcp47());
224     }
225 
226     {
227         OUString s_ca_valencia( "ca-valencia" );
228         LanguageTag ca_valencia( s_ca_valencia, true );
229         lang::Locale aLocale = ca_valencia.getLocale();
230         CPPUNIT_ASSERT_EQUAL( s_ca_valencia, ca_valencia.getBcp47() );
231         CPPUNIT_ASSERT_EQUAL( OUString("qlt"), aLocale.Language );
232         CPPUNIT_ASSERT( aLocale.Country.isEmpty() );
233         CPPUNIT_ASSERT_EQUAL( s_ca_valencia, aLocale.Variant );
234         CPPUNIT_ASSERT( LanguageTag::isOnTheFlyID( ca_valencia.getLanguageType()) );
235         CPPUNIT_ASSERT( ca_valencia.isValidBcp47() );
236         CPPUNIT_ASSERT( !ca_valencia.isIsoLocale() );
237         CPPUNIT_ASSERT( !ca_valencia.isIsoODF() );
238         CPPUNIT_ASSERT_EQUAL( OUString("ca"), ca_valencia.getLanguage() );
239         CPPUNIT_ASSERT( ca_valencia.getCountry().isEmpty() );
240         CPPUNIT_ASSERT( ca_valencia.getScript().isEmpty() );
241         CPPUNIT_ASSERT_EQUAL( OUString("ca"), ca_valencia.getLanguageAndScript() );
242         ::std::vector< OUString > ca_valencia_Fallbacks( ca_valencia.getFallbackStrings( true));
243         CPPUNIT_ASSERT_EQUAL( static_cast<size_t>(2), ca_valencia_Fallbacks.size());
244         CPPUNIT_ASSERT_EQUAL( OUString("ca-valencia"), ca_valencia_Fallbacks[0]);
245         CPPUNIT_ASSERT_EQUAL( OUString("ca"), ca_valencia_Fallbacks[1]);
246         CPPUNIT_ASSERT_EQUAL( OUString("ca-ES-valencia"), ca_valencia.makeFallback().getBcp47());
247     }
248 
249     // 'ca-XV' has an internal override to 'ca-ES-valencia'
250     {
251         OUString const s_ca_XV( "ca-XV" );
252         OUString s_ca_ES_valencia( "ca-ES-valencia" );
253         LanguageTag ca_XV( s_ca_XV, true );
254         lang::Locale aLocale = ca_XV.getLocale();
255         CPPUNIT_ASSERT_EQUAL( s_ca_ES_valencia, ca_XV.getBcp47() );
256         CPPUNIT_ASSERT_EQUAL( OUString(I18NLANGTAG_QLT) , aLocale.Language);
257         CPPUNIT_ASSERT_EQUAL( OUString("ES"), aLocale.Country );
258         CPPUNIT_ASSERT_EQUAL( s_ca_ES_valencia, aLocale.Variant );
259         CPPUNIT_ASSERT_EQUAL( LANGUAGE_CATALAN_VALENCIAN, ca_XV.getLanguageType() );
260         CPPUNIT_ASSERT( ca_XV.isValidBcp47() );
261         CPPUNIT_ASSERT( !ca_XV.isIsoLocale() );
262         CPPUNIT_ASSERT( !ca_XV.isIsoODF() );
263         CPPUNIT_ASSERT_EQUAL( OUString("ca"), ca_XV.getLanguage() );
264         CPPUNIT_ASSERT_EQUAL( OUString("ES"), ca_XV.getCountry() );
265         CPPUNIT_ASSERT( ca_XV.getScript().isEmpty() );
266         CPPUNIT_ASSERT_EQUAL( OUString("ca"), ca_XV.getLanguageAndScript() );
267         ::std::vector< OUString > ca_XV_Fallbacks( ca_XV.getFallbackStrings( true));
268         CPPUNIT_ASSERT_EQUAL( static_cast<size_t>(5), ca_XV_Fallbacks.size());
269         CPPUNIT_ASSERT_EQUAL( OUString("ca-ES-valencia"), ca_XV_Fallbacks[0]);
270         CPPUNIT_ASSERT_EQUAL( OUString("ca-XV"), ca_XV_Fallbacks[1]);
271         CPPUNIT_ASSERT_EQUAL( OUString("ca-valencia"), ca_XV_Fallbacks[2]);
272         CPPUNIT_ASSERT_EQUAL( OUString("ca-ES"), ca_XV_Fallbacks[3]);
273         CPPUNIT_ASSERT_EQUAL( OUString("ca"), ca_XV_Fallbacks[4]);
274         CPPUNIT_ASSERT_EQUAL( OUString("ca-ES-valencia"), ca_XV.makeFallback().getBcp47());
275     }
276 
277     {
278         OUString s_de_DE( "de-DE" );
279         LanguageTag de_DE( s_de_DE, true );
280         lang::Locale aLocale = de_DE.getLocale();
281         CPPUNIT_ASSERT_EQUAL( s_de_DE, de_DE.getBcp47() );
282         CPPUNIT_ASSERT_EQUAL( OUString("de"), aLocale.Language );
283         CPPUNIT_ASSERT_EQUAL( OUString("DE"), aLocale.Country );
284         CPPUNIT_ASSERT( aLocale.Variant.isEmpty() );
285         CPPUNIT_ASSERT_EQUAL( LANGUAGE_GERMAN, de_DE.getLanguageType() );
286         CPPUNIT_ASSERT( de_DE.isValidBcp47() );
287         CPPUNIT_ASSERT( de_DE.isIsoLocale() );
288         CPPUNIT_ASSERT( de_DE.isIsoODF() );
289         CPPUNIT_ASSERT_EQUAL( OUString("de"), de_DE.getLanguage() );
290         CPPUNIT_ASSERT_EQUAL( OUString("DE"), de_DE.getCountry() );
291         CPPUNIT_ASSERT( de_DE.getScript().isEmpty() );
292         CPPUNIT_ASSERT_EQUAL( OUString("de"), de_DE.getLanguageAndScript() );
293         ::std::vector< OUString > de_DE_Fallbacks( de_DE.getFallbackStrings( true));
294         CPPUNIT_ASSERT_EQUAL( static_cast<size_t>(2), de_DE_Fallbacks.size());
295         CPPUNIT_ASSERT_EQUAL( OUString("de-DE"), de_DE_Fallbacks[0]);
296         CPPUNIT_ASSERT_EQUAL( OUString("de"), de_DE_Fallbacks[1]);
297         CPPUNIT_ASSERT_EQUAL( OUString("de-DE"), de_DE.makeFallback().getBcp47());
298     }
299 
300     {
301         OUString const s_de_DE( "de-DE" );
302         LanguageTag de_DE( lang::Locale( "de", "DE", "" ) );
303         lang::Locale aLocale = de_DE.getLocale();
304         CPPUNIT_ASSERT_EQUAL( s_de_DE, de_DE.getBcp47() );
305         CPPUNIT_ASSERT_EQUAL( OUString("de"), aLocale.Language );
306         CPPUNIT_ASSERT_EQUAL( OUString("DE"), aLocale.Country );
307         CPPUNIT_ASSERT( aLocale.Variant.isEmpty() );
308         CPPUNIT_ASSERT_EQUAL( LANGUAGE_GERMAN, de_DE.getLanguageType() );
309     }
310 
311     {
312         OUString const s_de_DE( "de-DE" );
313         LanguageTag de_DE( LANGUAGE_GERMAN );
314         lang::Locale aLocale = de_DE.getLocale();
315         CPPUNIT_ASSERT_EQUAL( s_de_DE, de_DE.getBcp47() );
316         CPPUNIT_ASSERT_EQUAL( OUString("de"), aLocale.Language );
317         CPPUNIT_ASSERT_EQUAL( OUString("DE"), aLocale.Country );
318         CPPUNIT_ASSERT( aLocale.Variant.isEmpty() );
319         CPPUNIT_ASSERT_EQUAL( LANGUAGE_GERMAN, de_DE.getLanguageType() );
320     }
321 
322     // Unmapped but known language-only.
323     {
324         OUString s_de( "de" );
325         LanguageTag de( s_de, true );
326         lang::Locale aLocale = de.getLocale();
327         CPPUNIT_ASSERT_EQUAL( s_de, de.getBcp47() );
328         CPPUNIT_ASSERT_EQUAL( OUString("de"), aLocale.Language );
329         CPPUNIT_ASSERT( aLocale.Country.isEmpty() );
330         CPPUNIT_ASSERT( aLocale.Variant.isEmpty() );
331         LanguageType de_LangID = de.getLanguageType();
332         CPPUNIT_ASSERT( de_LangID != LANGUAGE_GERMAN );
333         CPPUNIT_ASSERT_EQUAL( MsLangId::getPrimaryLanguage( LANGUAGE_GERMAN) , de_LangID);
334         CPPUNIT_ASSERT_EQUAL( OUString("de-DE"), de.makeFallback().getBcp47());
335         // Check registered mapping.
336         LanguageTag de_l( de_LangID);
337         CPPUNIT_ASSERT_EQUAL( s_de, de_l.getBcp47() );
338     }
339 
340     // "bo" and "dz" share the same primary language ID, only one gets it
341     // assigned, "dz" language-only has a special mapping.
342     {
343         LanguageTag bo( "bo", true );
344         CPPUNIT_ASSERT_EQUAL( MsLangId::getPrimaryLanguage( LANGUAGE_TIBETAN), bo.getLanguageType() );
345         LanguageTag dz( "dz", true );
346         CPPUNIT_ASSERT_EQUAL( LANGUAGE_USER_DZONGKHA_MAP_LONLY, dz.getLanguageType() );
347     }
348 
349     // "no", "nb" and "nn" share the same primary language ID, which even is
350     // assigned to "no-NO" for legacy so none gets it assigned, all on-the-fly
351     // except if there is a defined MS-LCID for LanguageScriptOnly (LSO).
352     {
353         LanguageTag no( "no", true );
354         CPPUNIT_ASSERT( LanguageTag::isOnTheFlyID( no.getLanguageType()) );
355         LanguageTag nb( "nb", true );
356         CPPUNIT_ASSERT_EQUAL( LANGUAGE_NORWEGIAN_BOKMAL_LSO, nb.getLanguageType() );
357         LanguageTag nn( "nn", true );
358         CPPUNIT_ASSERT_EQUAL( LANGUAGE_NORWEGIAN_NYNORSK_LSO, nn.getLanguageType() );
359         LanguageTag no_NO( "no-NO", true );
360         CPPUNIT_ASSERT_EQUAL( LANGUAGE_NORWEGIAN, no_NO.getLanguageType() );
361     }
362 
363     // 'de-1901' derived from 'de-DE-1901' grandfathered to check that it is
364     // accepted as (DIGIT 3ALNUM) variant
365     {
366         OUString s_de_1901( "de-1901" );
367         LanguageTag de_1901( s_de_1901 );
368         lang::Locale aLocale = de_1901.getLocale();
369         CPPUNIT_ASSERT_EQUAL( s_de_1901, de_1901.getBcp47() );
370         CPPUNIT_ASSERT_EQUAL( OUString("qlt"), aLocale.Language );
371         CPPUNIT_ASSERT( aLocale.Country.isEmpty() );
372         CPPUNIT_ASSERT_EQUAL( s_de_1901, aLocale.Variant );
373         CPPUNIT_ASSERT( LanguageTag::isOnTheFlyID( de_1901.getLanguageType()) );
374         CPPUNIT_ASSERT( de_1901.isValidBcp47() );
375         CPPUNIT_ASSERT( !de_1901.isIsoLocale() );
376         CPPUNIT_ASSERT( !de_1901.isIsoODF() );
377         CPPUNIT_ASSERT_EQUAL( OUString("de"), de_1901.getLanguageAndScript() );
378         CPPUNIT_ASSERT_EQUAL( OUString("1901"), de_1901.getVariants() );
379         ::std::vector< OUString > de_1901_Fallbacks( de_1901.getFallbackStrings( true));
380         CPPUNIT_ASSERT_EQUAL( static_cast<size_t>(2), de_1901_Fallbacks.size());
381         CPPUNIT_ASSERT_EQUAL( OUString("de-1901"), de_1901_Fallbacks[0]);
382         CPPUNIT_ASSERT_EQUAL( OUString("de"), de_1901_Fallbacks[1]);
383     }
384 
385     // 'en-GB-oed' is known grandfathered for English, Oxford English
386     // Dictionary spelling.
387     // Deprecated as of 2015-04-17, prefer en-GB-oxendict instead.
388     // As of 2017-03-14 we also alias to en-GB-oxendict.
389     {
390         OUString s_en_GB_oxendict( "en-GB-oxendict" );
391         LanguageTag en_GB_oed( "en-GB-oed" );
392         lang::Locale aLocale = en_GB_oed.getLocale();
393         CPPUNIT_ASSERT_EQUAL( s_en_GB_oxendict, en_GB_oed.getBcp47() );
394         CPPUNIT_ASSERT_EQUAL( OUString("qlt"), aLocale.Language );
395         CPPUNIT_ASSERT_EQUAL( OUString("GB"), aLocale.Country );
396         CPPUNIT_ASSERT_EQUAL( s_en_GB_oxendict, aLocale.Variant );
397         CPPUNIT_ASSERT_EQUAL( LANGUAGE_USER_ENGLISH_UK_OXENDICT, en_GB_oed.getLanguageType() );
398         CPPUNIT_ASSERT( en_GB_oed.isValidBcp47() );
399         CPPUNIT_ASSERT( !en_GB_oed.isIsoLocale() );
400         CPPUNIT_ASSERT( !en_GB_oed.isIsoODF() );
401         CPPUNIT_ASSERT_EQUAL( OUString("en"), en_GB_oed.getLanguageAndScript() );
402         CPPUNIT_ASSERT_EQUAL( OUString("oxendict"), en_GB_oed.getVariants() );
403         ::std::vector< OUString > en_GB_oed_Fallbacks( en_GB_oed.getFallbackStrings( true));
404         CPPUNIT_ASSERT_EQUAL( static_cast<size_t>(5), en_GB_oed_Fallbacks.size() );
405         CPPUNIT_ASSERT_EQUAL( OUString("en-GB-oxendict"), en_GB_oed_Fallbacks[0]);
406         CPPUNIT_ASSERT_EQUAL( OUString("en-GB-oed"), en_GB_oed_Fallbacks[1]);
407         CPPUNIT_ASSERT_EQUAL( OUString("en-oxendict"), en_GB_oed_Fallbacks[2]);
408         CPPUNIT_ASSERT_EQUAL( OUString("en-GB"), en_GB_oed_Fallbacks[3]);
409         CPPUNIT_ASSERT_EQUAL( OUString("en"), en_GB_oed_Fallbacks[4]);
410         // 'en-oed' is not a valid fallback!
411     }
412 
413     // 'en-GB-oxendict' as preferred over 'en-GB-oed'.
414     {
415         OUString s_en_GB_oxendict( "en-GB-oxendict" );
416         LanguageTag en_GB_oxendict( s_en_GB_oxendict );
417         lang::Locale aLocale = en_GB_oxendict.getLocale();
418         CPPUNIT_ASSERT_EQUAL( s_en_GB_oxendict, en_GB_oxendict.getBcp47() );
419         CPPUNIT_ASSERT_EQUAL( OUString("qlt"), aLocale.Language );
420         CPPUNIT_ASSERT_EQUAL( OUString("GB"), aLocale.Country );
421         CPPUNIT_ASSERT_EQUAL( s_en_GB_oxendict, aLocale.Variant );
422         CPPUNIT_ASSERT_EQUAL( LANGUAGE_USER_ENGLISH_UK_OXENDICT, en_GB_oxendict.getLanguageType() );
423         CPPUNIT_ASSERT( en_GB_oxendict.isValidBcp47() );
424         CPPUNIT_ASSERT( !en_GB_oxendict.isIsoLocale() );
425         CPPUNIT_ASSERT( !en_GB_oxendict.isIsoODF() );
426         CPPUNIT_ASSERT_EQUAL( OUString("en"), en_GB_oxendict.getLanguageAndScript() );
427         CPPUNIT_ASSERT_EQUAL( OUString("oxendict"), en_GB_oxendict.getVariants() );
428         ::std::vector< OUString > en_GB_oxendict_Fallbacks( en_GB_oxendict.getFallbackStrings( true));
429         CPPUNIT_ASSERT_EQUAL( static_cast<size_t>(5), en_GB_oxendict_Fallbacks.size() );
430         CPPUNIT_ASSERT_EQUAL( OUString("en-GB-oxendict"), en_GB_oxendict_Fallbacks[0]);
431         CPPUNIT_ASSERT_EQUAL( OUString("en-GB-oed"), en_GB_oxendict_Fallbacks[1]);
432         CPPUNIT_ASSERT_EQUAL( OUString("en-oxendict"), en_GB_oxendict_Fallbacks[2]);
433         CPPUNIT_ASSERT_EQUAL( OUString("en-GB"), en_GB_oxendict_Fallbacks[3]);
434         CPPUNIT_ASSERT_EQUAL( OUString("en"), en_GB_oxendict_Fallbacks[4]);
435     }
436 
437     // 'es-ES-u-co-trad' is a valid (and known) Extension U tag
438     {
439         OUString s_es_ES_u_co_trad( "es-ES-u-co-trad" );
440         LanguageTag es_ES_u_co_trad( s_es_ES_u_co_trad );
441         lang::Locale aLocale = es_ES_u_co_trad.getLocale();
442         CPPUNIT_ASSERT_EQUAL( s_es_ES_u_co_trad, es_ES_u_co_trad.getBcp47() );
443         CPPUNIT_ASSERT_EQUAL( OUString("qlt"), aLocale.Language );
444         CPPUNIT_ASSERT_EQUAL( OUString("ES"), aLocale.Country );
445         CPPUNIT_ASSERT_EQUAL( s_es_ES_u_co_trad, aLocale.Variant );
446         CPPUNIT_ASSERT_EQUAL( LANGUAGE_SPANISH_DATED, es_ES_u_co_trad.getLanguageType() );
447         CPPUNIT_ASSERT( es_ES_u_co_trad.isValidBcp47() );
448         CPPUNIT_ASSERT( !es_ES_u_co_trad.isIsoLocale() );
449         CPPUNIT_ASSERT( !es_ES_u_co_trad.isIsoODF() );
450         CPPUNIT_ASSERT_EQUAL( OUString("es"), es_ES_u_co_trad.getLanguageAndScript() );
451         CPPUNIT_ASSERT_EQUAL( OUString("u-co-trad"), es_ES_u_co_trad.getVariants() );
452         ::std::vector< OUString > es_ES_u_co_trad_Fallbacks( es_ES_u_co_trad.getFallbackStrings( true));
453         CPPUNIT_ASSERT_EQUAL( static_cast<size_t>(4), es_ES_u_co_trad_Fallbacks.size() );
454         CPPUNIT_ASSERT_EQUAL( OUString("es-ES-u-co-trad"), es_ES_u_co_trad_Fallbacks[0]);
455         CPPUNIT_ASSERT_EQUAL( OUString("es-u-co-trad"), es_ES_u_co_trad_Fallbacks[1]);
456         CPPUNIT_ASSERT_EQUAL( OUString("es-ES"), es_ES_u_co_trad_Fallbacks[2]);
457         CPPUNIT_ASSERT_EQUAL( OUString("es"), es_ES_u_co_trad_Fallbacks[3]);
458         // Map to broken MS.
459         CPPUNIT_ASSERT_EQUAL( OUString("es-ES_tradnl"), es_ES_u_co_trad.getBcp47MS() );
460     }
461 
462     // 'es-ES_tradnl' (broken MS) maps to 'es-ES-u-co-trad'
463     {
464         OUString s_es_ES_u_co_trad( "es-ES-u-co-trad" );
465         OUString s_es_ES_tradnl( "es-ES_tradnl" );
466         LanguageTag es_ES_tradnl( s_es_ES_tradnl );
467         lang::Locale aLocale = es_ES_tradnl.getLocale();
468         CPPUNIT_ASSERT_EQUAL( s_es_ES_u_co_trad, es_ES_tradnl.getBcp47() );
469         CPPUNIT_ASSERT_EQUAL( OUString("qlt"), aLocale.Language );
470         CPPUNIT_ASSERT_EQUAL( OUString("ES"), aLocale.Country );
471         CPPUNIT_ASSERT_EQUAL( s_es_ES_u_co_trad, aLocale.Variant );
472         CPPUNIT_ASSERT_EQUAL( LANGUAGE_SPANISH_DATED, es_ES_tradnl.getLanguageType() );
473         CPPUNIT_ASSERT( es_ES_tradnl.isValidBcp47() );
474         CPPUNIT_ASSERT( !es_ES_tradnl.isIsoLocale() );
475         CPPUNIT_ASSERT( !es_ES_tradnl.isIsoODF() );
476         CPPUNIT_ASSERT_EQUAL( OUString("es"), es_ES_tradnl.getLanguageAndScript() );
477         CPPUNIT_ASSERT_EQUAL( OUString("u-co-trad"), es_ES_tradnl.getVariants() );
478         ::std::vector< OUString > es_ES_tradnl_Fallbacks( es_ES_tradnl.getFallbackStrings( true));
479         CPPUNIT_ASSERT_EQUAL( static_cast<size_t>(4), es_ES_tradnl_Fallbacks.size() );
480         CPPUNIT_ASSERT_EQUAL( OUString("es-ES-u-co-trad"), es_ES_tradnl_Fallbacks[0]);
481         CPPUNIT_ASSERT_EQUAL( OUString("es-u-co-trad"), es_ES_tradnl_Fallbacks[1]);
482         CPPUNIT_ASSERT_EQUAL( OUString("es-ES"), es_ES_tradnl_Fallbacks[2]);
483         CPPUNIT_ASSERT_EQUAL( OUString("es"), es_ES_tradnl_Fallbacks[3]);
484         // Map back to broken MS.
485         CPPUNIT_ASSERT_EQUAL( s_es_ES_tradnl, es_ES_tradnl.getBcp47MS() );
486     }
487 
488     // 'zh-yue-HK' uses redundant 'zh-yue' and should be preferred 'yue-HK'
489 #if 0
490     /* XXX Disabled because liblangtag in lt_tag_canonicalize() after replacing
491      * 'zh-yue' with the preferred 'yue' does:
492      * "If the language tag starts with a primary language subtag that is also
493      * an extlang subtag, then the language tag is prepended with the extlang's
494      * 'Prefix'."
495      * Primary language 'yue' is also extlang 'yue' for which the prefix
496      * happens to be 'zh' ... so the result is 'zh-yue-HK' again. */
497     {
498         OUString s_zh_yue_HK( "zh-yue-HK" );
499         LanguageTag zh_yue_HK( s_zh_yue_HK );
500         lang::Locale aLocale = zh_yue_HK.getLocale();
501         CPPUNIT_ASSERT( zh_yue_HK.getBcp47() == "yue-HK" );
502         CPPUNIT_ASSERT( aLocale.Language == "yue" );
503         CPPUNIT_ASSERT( aLocale.Country == "HK" );
504         CPPUNIT_ASSERT( aLocale.Variant == "" );
505         CPPUNIT_ASSERT( zh_yue_HK.getLanguageType() == LANGUAGE_YUE_CHINESE_HONGKONG );
506         CPPUNIT_ASSERT( zh_yue_HK.isValidBcp47() == true );
507         CPPUNIT_ASSERT( zh_yue_HK.isIsoLocale() == true );
508         CPPUNIT_ASSERT( zh_yue_HK.isIsoODF() == true );
509         CPPUNIT_ASSERT( zh_yue_HK.getLanguageAndScript() == "yue" );
510         CPPUNIT_ASSERT( zh_yue_HK.getVariants() == "" );
511         ::std::vector< OUString > zh_yue_HK_Fallbacks( zh_yue_HK.getFallbackStrings( true));
512         CPPUNIT_ASSERT( zh_yue_HK_Fallbacks.size() == 2);
513         CPPUNIT_ASSERT( zh_yue_HK_Fallbacks[0] == "yue-HK");
514         CPPUNIT_ASSERT( zh_yue_HK_Fallbacks[1] == "yue");
515     }
516 #endif
517 
518     // 'qtz' is a local use known pseudolocale for key ID resource
519     {
520         OUString s_qtz( "qtz" );
521         LanguageTag qtz( s_qtz );
522         lang::Locale aLocale = qtz.getLocale();
523         CPPUNIT_ASSERT_EQUAL( s_qtz, qtz.getBcp47() );
524         CPPUNIT_ASSERT_EQUAL( OUString("qtz"), aLocale.Language );
525         CPPUNIT_ASSERT( aLocale.Country.isEmpty() );
526         CPPUNIT_ASSERT( aLocale.Variant.isEmpty() );
527         CPPUNIT_ASSERT_EQUAL( LANGUAGE_USER_KEYID, qtz.getLanguageType() );
528     }
529 
530     // 'qty' is a local use unknown locale
531     {
532         OUString s_qty( "qty" );
533         LanguageTag qty( s_qty );
534         lang::Locale aLocale = qty.getLocale();
535         CPPUNIT_ASSERT_EQUAL( s_qty, qty.getBcp47() );
536         CPPUNIT_ASSERT_EQUAL( OUString("qty"), aLocale.Language );
537         CPPUNIT_ASSERT( aLocale.Country.isEmpty() );
538         CPPUNIT_ASSERT( aLocale.Variant.isEmpty() );
539         CPPUNIT_ASSERT( LanguageTag::isOnTheFlyID( qty.getLanguageType()) );
540     }
541 
542     // 'qtx' is an unknown new mslangid
543     {
544         OUString const s_qtx( "qtx" );
545         LanguageTag qtx( s_qtx );
546         qtx.setScriptType( LanguageTag::ScriptType::RTL );
547         LanguageType n_qtx = qtx.getLanguageType();
548         CPPUNIT_ASSERT_EQUAL( css::i18n::ScriptType::COMPLEX, MsLangId::getScriptType(n_qtx) );
549         CPPUNIT_ASSERT( MsLangId::isRightToLeft(n_qtx) );
550         CPPUNIT_ASSERT( !MsLangId::isCJK(n_qtx) );
551     }
552 
553     // 'x-comment' is a privateuse known "locale"
554     {
555         OUString s_xcomment( "x-comment" );
556         LanguageTag xcomment( s_xcomment );
557         lang::Locale aLocale = xcomment.getLocale();
558         CPPUNIT_ASSERT_EQUAL( s_xcomment, xcomment.getBcp47() );
559         CPPUNIT_ASSERT_EQUAL( OUString("qlt"), aLocale.Language );
560         CPPUNIT_ASSERT( aLocale.Country.isEmpty() );
561         CPPUNIT_ASSERT_EQUAL( OUString("x-comment"), aLocale.Variant );
562         CPPUNIT_ASSERT_EQUAL( LANGUAGE_USER_PRIV_COMMENT, xcomment.getLanguageType() );
563     }
564 
565     // 'x-foobar' is a privateuse unknown "locale"
566     {
567         OUString s_xfoobar( "x-foobar" );
568         LanguageTag xfoobar( s_xfoobar );
569         lang::Locale aLocale = xfoobar.getLocale();
570         CPPUNIT_ASSERT_EQUAL( s_xfoobar, xfoobar.getBcp47() );
571         CPPUNIT_ASSERT_EQUAL( OUString("qlt"), aLocale.Language );
572         CPPUNIT_ASSERT( aLocale.Country.isEmpty() );
573         CPPUNIT_ASSERT_EQUAL( OUString("x-foobar"), aLocale.Variant );
574         CPPUNIT_ASSERT( LanguageTag::isOnTheFlyID( xfoobar.getLanguageType()) );
575     }
576 
577     // '*' the dreaded jolly joker is a "privateuse" known "locale"
578     {
579         OUString s_joker( "*" );
580         LanguageTag joker( s_joker );
581         lang::Locale aLocale = joker.getLocale();
582         CPPUNIT_ASSERT_EQUAL( s_joker, joker.getBcp47() );
583         CPPUNIT_ASSERT_EQUAL( OUString("qlt"), aLocale.Language );
584         CPPUNIT_ASSERT( aLocale.Country.isEmpty() );
585         CPPUNIT_ASSERT_EQUAL( OUString("*"), aLocale.Variant );
586         CPPUNIT_ASSERT_EQUAL( LANGUAGE_USER_PRIV_JOKER, joker.getLanguageType() );
587 
588         joker.reset( LANGUAGE_USER_PRIV_JOKER );
589         aLocale = joker.getLocale();
590         CPPUNIT_ASSERT_EQUAL( s_joker, joker.getBcp47() );
591         CPPUNIT_ASSERT_EQUAL( OUString("qlt"), aLocale.Language );
592         CPPUNIT_ASSERT( aLocale.Country.isEmpty() );
593         CPPUNIT_ASSERT_EQUAL( OUString("*"), aLocale.Variant );
594         CPPUNIT_ASSERT_EQUAL( LANGUAGE_USER_PRIV_JOKER, joker.getLanguageType() );
595     }
596 
597     // 'C' locale shall map to 'en-US'
598     {
599         LanguageTag aTag( "C" );
600         CPPUNIT_ASSERT_EQUAL( OUString("en"), aTag.getLanguage() );
601         CPPUNIT_ASSERT_EQUAL( OUString("US"), aTag.getCountry() );
602         lang::Locale aLocale = aTag.getLocale();
603         CPPUNIT_ASSERT_EQUAL( OUString("en-US"), aTag.getBcp47() );
604         CPPUNIT_ASSERT_EQUAL( OUString("en"), aLocale.Language );
605         CPPUNIT_ASSERT_EQUAL( OUString("US"), aLocale.Country );
606         CPPUNIT_ASSERT( aLocale.Variant.isEmpty() );
607         CPPUNIT_ASSERT_EQUAL( LANGUAGE_ENGLISH_US, aTag.getLanguageType() );
608     }
609     {
610         LanguageTag aTag( lang::Locale("C","","") );
611         CPPUNIT_ASSERT_EQUAL( OUString("en"), aTag.getLanguage() );
612         CPPUNIT_ASSERT_EQUAL( OUString("US"), aTag.getCountry() );
613         lang::Locale aLocale = aTag.getLocale();
614         CPPUNIT_ASSERT_EQUAL( OUString("en-US"), aTag.getBcp47() );
615         CPPUNIT_ASSERT_EQUAL( OUString("en"), aLocale.Language );
616         CPPUNIT_ASSERT_EQUAL( OUString("US"), aLocale.Country );
617         CPPUNIT_ASSERT( aLocale.Variant.isEmpty() );
618         CPPUNIT_ASSERT_EQUAL( LANGUAGE_ENGLISH_US, aTag.getLanguageType() );
619     }
620 
621     // test reset() methods
622     {
623         LanguageTag aTag( LANGUAGE_DONTKNOW );
624         lang::Locale aLocale;
625 
626         aTag.reset( LANGUAGE_GERMAN );
627         aLocale = aTag.getLocale();
628         CPPUNIT_ASSERT_EQUAL( OUString("de-DE"), aTag.getBcp47() );
629         CPPUNIT_ASSERT_EQUAL( OUString("de"), aLocale.Language );
630         CPPUNIT_ASSERT_EQUAL( OUString("DE"), aLocale.Country );
631         CPPUNIT_ASSERT( aLocale.Variant.isEmpty() );
632         CPPUNIT_ASSERT_EQUAL( LANGUAGE_GERMAN, aTag.getLanguageType() );
633 
634         aTag.reset( "en-US" );
635         aLocale = aTag.getLocale();
636         CPPUNIT_ASSERT_EQUAL( OUString("en-US"), aTag.getBcp47() );
637         CPPUNIT_ASSERT_EQUAL( OUString("en"), aLocale.Language );
638         CPPUNIT_ASSERT_EQUAL( OUString("US"), aLocale.Country );
639         CPPUNIT_ASSERT( aLocale.Variant.isEmpty() );
640         CPPUNIT_ASSERT_EQUAL( LANGUAGE_ENGLISH_US, aTag.getLanguageType() );
641 
642         aTag.reset( lang::Locale( "de", "DE", "" ) );
643         aLocale = aTag.getLocale();
644         CPPUNIT_ASSERT_EQUAL( OUString("de-DE"), aTag.getBcp47() );
645         CPPUNIT_ASSERT_EQUAL( OUString("de"), aLocale.Language );
646         CPPUNIT_ASSERT_EQUAL( OUString("DE"), aLocale.Country );
647         CPPUNIT_ASSERT( aLocale.Variant.isEmpty() );
648         CPPUNIT_ASSERT_EQUAL( LANGUAGE_GERMAN, aTag.getLanguageType() );
649     }
650 
651     {
652         OUString s_uab( "unreg-and-bad" );
653         LanguageTag uab( s_uab, true );
654         lang::Locale aLocale = uab.getLocale();
655         CPPUNIT_ASSERT_EQUAL( s_uab, uab.getBcp47() );
656         CPPUNIT_ASSERT_EQUAL( OUString("qlt"), aLocale.Language );
657         CPPUNIT_ASSERT( aLocale.Country.isEmpty() );
658         CPPUNIT_ASSERT_EQUAL( s_uab, aLocale.Variant );
659         CPPUNIT_ASSERT_EQUAL( LANGUAGE_DONTKNOW, uab.getLanguageType() );
660         CPPUNIT_ASSERT( !uab.isValidBcp47() );
661         CPPUNIT_ASSERT( !uab.isIsoLocale() );
662         CPPUNIT_ASSERT( !uab.isIsoODF() );
663     }
664 
665     // test static isValidBcp47() method
666     {
667         OUString aCanonicalized;
668         CPPUNIT_ASSERT( LanguageTag::isValidBcp47( "en-US", &aCanonicalized) && aCanonicalized == "en-US" );
669         CPPUNIT_ASSERT( LanguageTag::isValidBcp47( "x-foobar", &aCanonicalized) && aCanonicalized == "x-foobar" );
670         CPPUNIT_ASSERT( LanguageTag::isValidBcp47( "qaa", &aCanonicalized) && aCanonicalized == "qaa" );
671         CPPUNIT_ASSERT( !LanguageTag::isValidBcp47( "unreg-and-bad", &aCanonicalized) );
672         CPPUNIT_ASSERT( LanguageTag::isValidBcp47( "en-US", &aCanonicalized, true) && aCanonicalized == "en-US" );
673         CPPUNIT_ASSERT( !LanguageTag::isValidBcp47( "x-foobar", &aCanonicalized, true) && aCanonicalized == "x-foobar" );
674         CPPUNIT_ASSERT( LanguageTag::isValidBcp47( "qaa", &aCanonicalized, true) && aCanonicalized == "qaa" );
675         CPPUNIT_ASSERT( LanguageTag::isValidBcp47( "de-Latn-DE", &aCanonicalized) && aCanonicalized == "de-DE" );
676         /* TODO: at least some (those we know) grandfathered tags should be
677          * recognized by the replacement code. */
678         CPPUNIT_ASSERT( LanguageTag::isValidBcp47( "en-GB-oed", &aCanonicalized) );
679         // en-GB-oed has become deprecated in updated language-subtag-registry database
680         // (liblangtag 0.5.7)
681         CPPUNIT_ASSERT( ( aCanonicalized == "en-GB-oxendict" ) || ( aCanonicalized == "en-GB-oed" ) );
682     }
683 }
684 
checkMapping(const OUString & rStr1,const OUString & rStr2)685 bool checkMapping( const OUString& rStr1, const OUString& rStr2 )
686 {
687     if (rStr1 == "la-Latn"     ) return rStr2 == "la";
688     if (rStr1 == "tzm-Latn-DZ" ) return rStr2 == "kab-DZ";
689     if (rStr1 == "bs-Latn-BA"  ) return rStr2 == "bs-BA";
690     if (rStr1 == "bs-Latn"     ) return rStr2 == "bs";
691     if (rStr1 == "cz"          ) return rStr2 == "cs-CZ";
692     if (rStr1 == "iw-IL"       ) return rStr2 == "he-IL";
693     if (rStr1 == "in-ID"       ) return rStr2 == "id-ID";
694     if (rStr1 == "sr-YU"       ) return rStr2 == "sr-CS";
695     if (rStr1 == "sh-RS"       ) return rStr2 == "sr-Latn-RS";
696     if (rStr1 == "sh-YU"       ) return rStr2 == "sr-Latn-CS";
697     if (rStr1 == "sh-CS"       ) return rStr2 == "sr-Latn-CS";
698     if (rStr1 == "sh-ME"       ) return rStr2 == "sr-Latn-ME";
699     if (rStr1 == "sh-BA"       ) return rStr2 == "sr-Latn-BA";
700     if (rStr1 == "sh"          ) return rStr2 == "sr-Latn";
701     if (rStr1 == "lah-PK"      ) return rStr2 == "pnb-Arab-PK";
702     if (rStr1 == "pa-PK"       ) return rStr2 == "pnb-Arab-PK";
703     if (rStr1 == "ca-XV"       ) return rStr2 == "ca-ES-valencia";
704     if (rStr1 == "qcv-ES"      ) return rStr2 == "ca-ES-valencia";
705     if (rStr1 == "ns-ZA"       ) return rStr2 == "nso-ZA";
706     if (rStr1 == "ven-ZA"      ) return rStr2 == "ve-ZA";
707     if (rStr1 == "qu-EC"       ) return rStr2 == "quz-EC";
708     if (rStr1 == "qu-PE"       ) return rStr2 == "quz-PE";
709     if (rStr1 == "ff-NG"       ) return rStr2 == "fuv-NG";
710     if (rStr1 == "ji-IL"       ) return rStr2 == "yi-IL";
711     if (rStr1 == "iu-CA"       ) return rStr2 == "iu-Latn-CA";
712     if (rStr1 == "iu"          ) return rStr2 == "iu-Latn";
713     if (rStr1 == "gbz-AF"      ) return rStr2 == "prs-AF";
714     if (rStr1 == "ber-DZ"      ) return rStr2 == "kab-DZ";
715     if (rStr1 == "tmz-MA"      ) return rStr2 == "tzm-Tfng-MA";
716     if (rStr1 == "ber-MA"      ) return rStr2 == "tzm-Tfng-MA";
717     if (rStr1 == "mg-MG"       ) return rStr2 == "plt-MG";
718     if (rStr1 == "pli"         ) return rStr2 == "pi-Latn";
719     if (rStr1 == "ks"          ) return rStr2 == "ks-Arab";
720     if (rStr1 == "chr-US"      ) return rStr2 == "chr-Cher-US";
721     if (rStr1 == "sd-PK"       ) return rStr2 == "sd-Arab-PK";
722     if (rStr1 == "sr-Cyrl-RS"  ) return rStr2 == "sr-RS";
723     if (rStr1 == "sr-Cyrl-ME"  ) return rStr2 == "sr-ME";
724     if (rStr1 == "sr-Cyrl-BA"  ) return rStr2 == "sr-BA";
725     if (rStr1 == "sr-Cyrl-CS"  ) return rStr2 == "sr-CS";
726     if (rStr1 == "sr-Cyrl"     ) return rStr2 == "sr";
727     if (rStr1 == "yi-Hebr-US"  ) return rStr2 == "yi-US";
728     if (rStr1 == "yi-Hebr-IL"  ) return rStr2 == "yi-IL";
729     if (rStr1 == "ha-NG"       ) return rStr2 == "ha-Latn-NG";
730     if (rStr1 == "ha-GH"       ) return rStr2 == "ha-Latn-GH";
731     if (rStr1 == "ku-Arab-IQ"  ) return rStr2 == "ckb-IQ";
732     if (rStr1 == "ku-Arab"     ) return rStr2 == "ckb";
733     if (rStr1 == "kmr-TR"      ) return rStr2 == "kmr-Latn-TR";
734     if (rStr1 == "ku-TR"       ) return rStr2 == "kmr-Latn-TR";
735     if (rStr1 == "kmr-SY"      ) return rStr2 == "kmr-Latn-SY";
736     if (rStr1 == "ku-SY"       ) return rStr2 == "kmr-Latn-SY";
737     if (rStr1 == "ku-IQ"       ) return rStr2 == "ckb-IQ";
738     if (rStr1 == "ku-IR"       ) return rStr2 == "ckb-IR";
739     if (rStr1 == "eu"          ) return rStr2 == "eu-ES";
740     if (rStr1 == "crk-Latn-CN" ) return rStr2 == "crk-Latn-CA";
741     if (rStr1 == "crk-Cans-CN" ) return rStr2 == "crk-Cans-CA";
742     if (rStr1 == "en-GB-oed"   ) return rStr2 == "en-GB-oxendict";
743     if (rStr1 == "es-ES_tradnl") return rStr2 == "es-ES-u-co-trad";
744     if (rStr1 == "sd-IN"       ) return rStr2 == "sd-Deva-IN";
745     if (rStr1 == "cmn-CN"      ) return rStr2 == "zh-CN";
746     if (rStr1 == "cmn-TW"      ) return rStr2 == "zh-TW";
747     return rStr1 == rStr2;
748 }
749 
testAllIsoLangEntries()750 void TestLanguageTag::testAllIsoLangEntries()
751 {
752     const ::std::vector< MsLangId::LanguagetagMapping > aList( MsLangId::getDefinedLanguagetags());
753     for (auto const& elem : aList)
754     {
755         bool b=false;
756         if (elem.maBcp47 == "la-VA")
757             b=true;
758         (void)b;
759 
760         LanguageTag aTagString( elem.maBcp47, true);
761         LanguageTag aTagID( elem.mnLang);
762         if (!checkMapping( elem.maBcp47, aTagString.getBcp47()))
763         {
764             OString aMessage( OUStringToOString( elem.maBcp47, RTL_TEXTENCODING_ASCII_US));
765             aMessage += " -> " + OUStringToOString( aTagString.getBcp47(), RTL_TEXTENCODING_ASCII_US);
766             CPPUNIT_ASSERT_EQUAL_MESSAGE( aMessage.getStr(), aTagString.getBcp47(), elem.maBcp47 );
767         }
768         if (elem.maBcp47 != aTagID.getBcp47())
769         {
770             // There are multiple mappings, ID must be equal after conversions.
771             LanguageTag aTagBack( aTagID.getBcp47(), true);
772             if (aTagString.getLanguageType() != aTagBack.getLanguageType())
773             {
774                 OString aMessage( OUStringToOString( elem.maBcp47, RTL_TEXTENCODING_ASCII_US));
775                 aMessage += " " + OUStringToOString( aTagString.getBcp47(), RTL_TEXTENCODING_ASCII_US) + ": " +
776                     OUStringToOString( aTagString.getBcp47(), RTL_TEXTENCODING_ASCII_US) + " " +
777                     OString::number( static_cast<sal_uInt16>(aTagString.getLanguageType()), 16) +
778                     " -> " + OUStringToOString( aTagBack.getBcp47(), RTL_TEXTENCODING_ASCII_US) + " " +
779                     OString::number( static_cast<sal_uInt16>(aTagBack.getLanguageType()), 16);
780                 CPPUNIT_ASSERT_EQUAL_MESSAGE( aMessage.getStr(), aTagBack.getLanguageType(), aTagString.getLanguageType());
781             }
782         }
783 #if 0
784         // This does not hold, there are cases like 'ar'
785         // LANGUAGE_ARABIC_PRIMARY_ONLY that when mapped back results in
786         // 'ar-SA' as default locale.
787         if (elem.mnLang != aTagString.getLanguageType())
788         {
789             // There are multiple mappings, string must be equal after conversions.
790             LanguageTag aTagBack( aTagString.getLanguageType());
791             if (aTagID.getBcp47() != aTagBack.getBcp47())
792             {
793                 OString aMessage( OUStringToOString( elem.maBcp47, RTL_TEXTENCODING_ASCII_US));
794                 aMessage += " " + OUStringToOString( aTagID.getBcp47(), RTL_TEXTENCODING_ASCII_US) +
795                     " -> " + OUStringToOString( aTagBack.getBcp47(), RTL_TEXTENCODING_ASCII_US);
796                 CPPUNIT_ASSERT_MESSAGE( aMessage.getStr(), aTagID.getBcp47() == aTagBack.getBcp47());
797             }
798         }
799 #endif
800     }
801 
802     // Uncommenting this makes the test break and output SAL_WARN/INFO
803     //CPPUNIT_ASSERT( true == false );
804 }
805 
806 CPPUNIT_TEST_SUITE_REGISTRATION( TestLanguageTag );
807 
808 }
809 
810 CPPUNIT_PLUGIN_IMPLEMENT();
811 
812 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
813