1 // Generated by make_intl_data.py. DO NOT EDIT.
2 // Version: CLDR-39
3 // URL: https://unicode.org/Public/cldr/39/core.zip
4
5 #include "mozilla/Assertions.h"
6 #include "mozilla/Span.h"
7 #include "mozilla/TextUtils.h"
8
9 #include <algorithm>
10 #include <cstdint>
11 #include <cstring>
12 #include <iterator>
13 #include <string>
14 #include <type_traits>
15
16 #include "builtin/intl/LanguageTag.h"
17 #include "util/Text.h"
18 #include "vm/JSContext.h"
19
20 using namespace js::intl::LanguageTagLimits;
21
22 template <size_t Length, size_t TagLength, size_t SubtagLength>
HasReplacement(const char (& subtags)[Length][TagLength],const js::intl::LanguageTagSubtag<SubtagLength> & subtag)23 static inline bool HasReplacement(
24 const char (&subtags)[Length][TagLength],
25 const js::intl::LanguageTagSubtag<SubtagLength>& subtag) {
26 MOZ_ASSERT(subtag.length() == TagLength - 1,
27 "subtag must have the same length as the list of subtags");
28
29 const char* ptr = subtag.span().data();
30 return std::binary_search(std::begin(subtags), std::end(subtags), ptr,
31 [](const char* a, const char* b) {
32 return memcmp(a, b, TagLength - 1) < 0;
33 });
34 }
35
36 template <size_t Length, size_t TagLength, size_t SubtagLength>
SearchReplacement(const char (& subtags)[Length][TagLength],const char * (& aliases)[Length],const js::intl::LanguageTagSubtag<SubtagLength> & subtag)37 static inline const char* SearchReplacement(
38 const char (&subtags)[Length][TagLength],
39 const char* (&aliases)[Length],
40 const js::intl::LanguageTagSubtag<SubtagLength>& subtag) {
41 MOZ_ASSERT(subtag.length() == TagLength - 1,
42 "subtag must have the same length as the list of subtags");
43
44 const char* ptr = subtag.span().data();
45 auto p = std::lower_bound(std::begin(subtags), std::end(subtags), ptr,
46 [](const char* a, const char* b) {
47 return memcmp(a, b, TagLength - 1) < 0;
48 });
49 if (p != std::end(subtags) && memcmp(*p, ptr, TagLength - 1) == 0) {
50 return aliases[std::distance(std::begin(subtags), p)];
51 }
52 return nullptr;
53 }
54
55 #ifdef DEBUG
IsAsciiLowercaseAlphanumeric(char c)56 static bool IsAsciiLowercaseAlphanumeric(char c) {
57 return mozilla::IsAsciiLowercaseAlpha(c) || mozilla::IsAsciiDigit(c);
58 }
59
IsAsciiLowercaseAlphanumericOrDash(char c)60 static bool IsAsciiLowercaseAlphanumericOrDash(char c) {
61 return IsAsciiLowercaseAlphanumeric(c) || c == '-';
62 }
63
IsCanonicallyCasedLanguageTag(mozilla::Span<const char> span)64 static bool IsCanonicallyCasedLanguageTag(mozilla::Span<const char> span) {
65 // Tell the analysis the |std::all_of| function can't GC.
66 JS::AutoSuppressGCAnalysis nogc;
67
68 return std::all_of(span.begin(), span.end(), mozilla::IsAsciiLowercaseAlpha<char>);
69 }
70
IsCanonicallyCasedScriptTag(mozilla::Span<const char> span)71 static bool IsCanonicallyCasedScriptTag(mozilla::Span<const char> span) {
72 // Tell the analysis the |std::all_of| function can't GC.
73 JS::AutoSuppressGCAnalysis nogc;
74
75 return mozilla::IsAsciiUppercaseAlpha(span[0]) &&
76 std::all_of(span.begin() + 1, span.end(), mozilla::IsAsciiLowercaseAlpha<char>);
77 }
78
IsCanonicallyCasedRegionTag(mozilla::Span<const char> span)79 static bool IsCanonicallyCasedRegionTag(mozilla::Span<const char> span) {
80 // Tell the analysis the |std::all_of| function can't GC.
81 JS::AutoSuppressGCAnalysis nogc;
82
83 return std::all_of(span.begin(), span.end(), mozilla::IsAsciiUppercaseAlpha<char>) ||
84 std::all_of(span.begin(), span.end(), mozilla::IsAsciiDigit<char>);
85 }
86
IsCanonicallyCasedVariantTag(mozilla::Span<const char> span)87 static bool IsCanonicallyCasedVariantTag(mozilla::Span<const char> span) {
88 // Tell the analysis the |std::all_of| function can't GC.
89 JS::AutoSuppressGCAnalysis nogc;
90
91 return std::all_of(span.begin(), span.end(), IsAsciiLowercaseAlphanumeric);
92 }
93
IsCanonicallyCasedUnicodeKey(mozilla::Span<const char> key)94 static bool IsCanonicallyCasedUnicodeKey(mozilla::Span<const char> key) {
95 return std::all_of(key.begin(), key.end(), IsAsciiLowercaseAlphanumeric);
96 }
97
IsCanonicallyCasedUnicodeType(mozilla::Span<const char> type)98 static bool IsCanonicallyCasedUnicodeType(mozilla::Span<const char> type) {
99 return std::all_of(type.begin(), type.end(), IsAsciiLowercaseAlphanumericOrDash);
100 }
101
IsCanonicallyCasedTransformKey(mozilla::Span<const char> key)102 static bool IsCanonicallyCasedTransformKey(mozilla::Span<const char> key) {
103 return std::all_of(key.begin(), key.end(), IsAsciiLowercaseAlphanumeric);
104 }
105
IsCanonicallyCasedTransformType(mozilla::Span<const char> type)106 static bool IsCanonicallyCasedTransformType(mozilla::Span<const char> type) {
107 return std::all_of(type.begin(), type.end(), IsAsciiLowercaseAlphanumericOrDash);
108 }
109 #endif
110
111 // Mappings from language subtags to preferred values.
112 // Derived from CLDR Supplemental Data, version 39.
113 // https://unicode.org/Public/cldr/39/core.zip
languageMapping(LanguageSubtag & language)114 bool js::intl::LanguageTag::languageMapping(LanguageSubtag& language) {
115 MOZ_ASSERT(IsStructurallyValidLanguageTag(language.span()));
116 MOZ_ASSERT(IsCanonicallyCasedLanguageTag(language.span()));
117
118 if (language.length() == 2) {
119 static const char languages[8][3] = {
120 "bh", "in", "iw", "ji", "jw", "mo", "tl", "tw",
121 };
122 static const char* aliases[8] = {
123 "bho", "id", "he", "yi", "jv", "ro", "fil", "ak",
124 };
125
126 if (const char* replacement = SearchReplacement(languages, aliases, language)) {
127 language.set(mozilla::MakeStringSpan(replacement));
128 return true;
129 }
130 return false;
131 }
132
133 if (language.length() == 3) {
134 static const char languages[401][4] = {
135 "aam", "aar", "abk", "adp", "afr", "agp", "ais", "aju", "aka", "alb",
136 "als", "amh", "ara", "arb", "arg", "arm", "asd", "asm", "aue", "ava",
137 "ave", "aym", "ayr", "ayx", "aze", "azj", "bak", "bam", "baq", "baz",
138 "bcc", "bcl", "bel", "ben", "bgm", "bhk", "bih", "bis", "bjd", "bjq",
139 "bkb", "bod", "bos", "bre", "btb", "bul", "bur", "bxk", "bxr", "cat",
140 "ccq", "ces", "cha", "che", "chi", "chu", "chv", "cjr", "cka", "cld",
141 "cmk", "cmn", "cor", "cos", "coy", "cqu", "cre", "cwd", "cym", "cze",
142 "daf", "dan", "dap", "deu", "dgo", "dhd", "dik", "diq", "dit", "div",
143 "djl", "dkl", "drh", "drr", "dud", "duj", "dut", "dwl", "dzo", "ekk",
144 "ell", "elp", "emk", "eng", "epo", "esk", "est", "eus", "ewe", "fao",
145 "fas", "fat", "fij", "fin", "fra", "fre", "fry", "fuc", "ful", "gav",
146 "gaz", "gbc", "gbo", "geo", "ger", "gfx", "ggn", "ggo", "ggr", "gio",
147 "gla", "gle", "glg", "gli", "glv", "gno", "gre", "grn", "gti", "gug",
148 "guj", "guv", "gya", "hat", "hau", "hdn", "hea", "heb", "her", "him",
149 "hin", "hmo", "hrr", "hrv", "hun", "hye", "ibi", "ibo", "ice", "ido",
150 "iii", "ike", "iku", "ile", "ill", "ilw", "ina", "ind", "ipk", "isl",
151 "ita", "izi", "jar", "jav", "jeg", "jpn", "kal", "kan", "kas", "kat",
152 "kau", "kaz", "kdv", "kgc", "kgd", "kgh", "khk", "khm", "kik", "kin",
153 "kir", "kmr", "knc", "kng", "knn", "koj", "kom", "kon", "kor", "kpp",
154 "kpv", "krm", "ktr", "kua", "kur", "kvs", "kwq", "kxe", "kxl", "kzh",
155 "kzj", "kzt", "lao", "lat", "lav", "lbk", "leg", "lii", "lim", "lin",
156 "lit", "llo", "lmm", "ltz", "lub", "lug", "lvs", "mac", "mah", "mal",
157 "mao", "mar", "may", "meg", "mgx", "mhr", "mkd", "mlg", "mlt", "mnk",
158 "mnt", "mof", "mol", "mon", "mri", "msa", "mst", "mup", "mwd", "mwj",
159 "mya", "myd", "myt", "nad", "nau", "nav", "nbf", "nbl", "nbx", "ncp",
160 "nde", "ndo", "nep", "nld", "nln", "nlr", "nno", "nns", "nnx", "nob",
161 "noo", "nor", "npi", "nts", "nxu", "nya", "oci", "ojg", "oji", "ori",
162 "orm", "ory", "oss", "oun", "pan", "pbu", "pcr", "per", "pes", "pli",
163 "plt", "pmc", "pmu", "pnb", "pol", "por", "ppa", "ppr", "pry", "pus",
164 "puz", "que", "quz", "rmr", "rmy", "roh", "ron", "rum", "run", "rus",
165 "sag", "san", "sap", "sca", "scc", "scr", "sgl", "sin", "skk", "slk",
166 "slo", "slv", "sme", "smo", "sna", "snd", "som", "sot", "spa", "spy",
167 "sqi", "src", "srd", "srp", "ssw", "sul", "sum", "sun", "swa", "swe",
168 "swh", "tah", "tam", "tat", "tdu", "tel", "tgg", "tgk", "tgl", "tha",
169 "thc", "thw", "thx", "tib", "tid", "tie", "tir", "tkk", "tlw", "tmp",
170 "tne", "ton", "tsf", "tsn", "tso", "ttq", "tuk", "tur", "twi", "uig",
171 "ukr", "umu", "unp", "uok", "urd", "uzb", "uzn", "ven", "vie", "vol",
172 "wel", "wgw", "wit", "wiw", "wln", "wol", "xba", "xho", "xia", "xkh",
173 "xpe", "xrq", "xsj", "xsl", "ybd", "ydd", "yen", "yid", "yiy", "yma",
174 "ymt", "yor", "yos", "yuu", "zai", "zha", "zho", "zir", "zsm", "zul",
175 "zyb",
176 };
177 static const char* aliases[401] = {
178 "aas", "aa", "ab", "dz", "af", "apf", "ami", "jrb", "ak", "sq",
179 "sq", "am", "ar", "ar", "an", "hy", "snz", "as", "ktz", "av",
180 "ae", "ay", "ay", "nun", "az", "az", "ba", "bm", "eu", "nvo",
181 "bal", "bik", "be", "bn", "bcg", "fbl", "bho", "bi", "drl", "bzc",
182 "ebk", "bo", "bs", "br", "beb", "bg", "my", "luy", "bua", "ca",
183 "rki", "cs", "ch", "ce", "zh", "cu", "cv", "mom", "cmr", "syr",
184 "xch", "zh", "kw", "co", "pij", "quh", "cr", "cr", "cy", "cs",
185 "dnj", "da", "njz", "de", "doi", "mwr", "din", "zza", "dif", "dv",
186 "dze", "aqd", "mn", "kzk", "uth", "dwu", "nl", "dbt", "dz", "et",
187 "el", "amq", "man", "en", "eo", "ik", "et", "eu", "ee", "fo",
188 "fa", "ak", "fj", "fi", "fr", "fr", "fy", "ff", "ff", "dev",
189 "om", "wny", "grb", "ka", "de", "vaj", "gvr", "esg", "gtu", "aou",
190 "gd", "ga", "gl", "kzk", "gv", "gon", "el", "gn", "nyc", "gn",
191 "gu", "duz", "gba", "ht", "ha", "hai", "hmn", "he", "hz", "srx",
192 "hi", "ho", "jal", "hr", "hu", "hy", "opa", "ig", "is", "io",
193 "ii", "iu", "iu", "ie", "ilm", "gal", "ia", "id", "ik", "is",
194 "it", "eza", "jgk", "jv", "oyb", "ja", "kl", "kn", "ks", "ka",
195 "kr", "kk", "zkd", "tdf", "ncq", "kml", "mn", "km", "ki", "rw",
196 "ky", "ku", "kr", "kg", "kok", "kwv", "kv", "kg", "ko", "jkm",
197 "kv", "bmf", "dtp", "kj", "ku", "gdj", "yam", "tvd", "kru", "dgl",
198 "dtp", "dtp", "lo", "la", "lv", "bnc", "enl", "raq", "li", "ln",
199 "lt", "ngt", "rmx", "lb", "lu", "lg", "lv", "mk", "mh", "ml",
200 "mi", "mr", "ms", "cir", "jbk", "chm", "mk", "mg", "mt", "man",
201 "wnn", "xnt", "ro", "mn", "mi", "ms", "mry", "raj", "dmw", "vaj",
202 "my", "aog", "mry", "xny", "na", "nv", "nru", "nr", "ekc", "kdz",
203 "nd", "ng", "ne", "nl", "azd", "nrk", "nn", "nbr", "ngv", "nb",
204 "dtd", "no", "ne", "pij", "bpp", "ny", "oc", "oj", "oj", "or",
205 "om", "or", "os", "vaj", "pa", "ps", "adx", "fa", "fa", "pi",
206 "mg", "huw", "phr", "lah", "pl", "pt", "bfy", "lcq", "prt", "ps",
207 "pub", "qu", "qu", "emx", "rom", "rm", "ro", "ro", "rn", "ru",
208 "sg", "sa", "aqt", "hle", "sr", "hr", "isk", "si", "oyb", "sk",
209 "sk", "sl", "se", "sm", "sn", "sd", "so", "st", "es", "kln",
210 "sq", "sc", "sc", "sr", "ss", "sgd", "ulw", "su", "sw", "sv",
211 "sw", "ty", "ta", "tt", "dtp", "te", "bjp", "tg", "fil", "th",
212 "tpo", "ola", "oyb", "bo", "itd", "ras", "ti", "twm", "weo", "tyj",
213 "kak", "to", "taj", "tn", "ts", "tmh", "tk", "tr", "ak", "ug",
214 "uk", "del", "wro", "ema", "ur", "uz", "uz", "ve", "vi", "vo",
215 "cy", "wgb", "nol", "nwo", "wa", "wo", "cax", "xh", "acn", "waw",
216 "kpe", "dmw", "suj", "den", "rki", "yi", "ynq", "yi", "yrm", "lrr",
217 "mtm", "yo", "zom", "yug", "zap", "za", "zh", "scv", "ms", "zu",
218 "za",
219 };
220
221 if (const char* replacement = SearchReplacement(languages, aliases, language)) {
222 language.set(mozilla::MakeStringSpan(replacement));
223 return true;
224 }
225 return false;
226 }
227
228 return false;
229 }
230
231 // Language subtags with complex mappings.
232 // Derived from CLDR Supplemental Data, version 39.
233 // https://unicode.org/Public/cldr/39/core.zip
complexLanguageMapping(const LanguageSubtag & language)234 bool js::intl::LanguageTag::complexLanguageMapping(const LanguageSubtag& language) {
235 MOZ_ASSERT(IsStructurallyValidLanguageTag(language.span()));
236 MOZ_ASSERT(IsCanonicallyCasedLanguageTag(language.span()));
237
238 if (language.length() == 2) {
239 return language.equalTo("sh");
240 }
241
242 if (language.length() == 3) {
243 static const char languages[6][4] = {
244 "cnr", "drw", "hbs", "prs", "swc", "tnf",
245 };
246
247 return HasReplacement(languages, language);
248 }
249
250 return false;
251 }
252
253 // Mappings from script subtags to preferred values.
254 // Derived from CLDR Supplemental Data, version 39.
255 // https://unicode.org/Public/cldr/39/core.zip
scriptMapping(ScriptSubtag & script)256 bool js::intl::LanguageTag::scriptMapping(ScriptSubtag& script) {
257 MOZ_ASSERT(IsStructurallyValidScriptTag(script.span()));
258 MOZ_ASSERT(IsCanonicallyCasedScriptTag(script.span()));
259
260 {
261 if (script.equalTo("Qaai")) {
262 script.set(mozilla::MakeStringSpan("Zinh"));
263 return true;
264 }
265 return false;
266 }
267 }
268
269 // Mappings from region subtags to preferred values.
270 // Derived from CLDR Supplemental Data, version 39.
271 // https://unicode.org/Public/cldr/39/core.zip
regionMapping(RegionSubtag & region)272 bool js::intl::LanguageTag::regionMapping(RegionSubtag& region) {
273 MOZ_ASSERT(IsStructurallyValidRegionTag(region.span()));
274 MOZ_ASSERT(IsCanonicallyCasedRegionTag(region.span()));
275
276 if (region.length() == 2) {
277 static const char regions[23][3] = {
278 "BU", "CS", "CT", "DD", "DY", "FQ", "FX", "HV", "JT", "MI",
279 "NH", "NQ", "PU", "PZ", "QU", "RH", "TP", "UK", "VD", "WK",
280 "YD", "YU", "ZR",
281 };
282 static const char* aliases[23] = {
283 "MM", "RS", "KI", "DE", "BJ", "AQ", "FR", "BF", "UM", "UM",
284 "VU", "AQ", "UM", "PA", "EU", "ZW", "TL", "GB", "VN", "UM",
285 "YE", "RS", "CD",
286 };
287
288 if (const char* replacement = SearchReplacement(regions, aliases, region)) {
289 region.set(mozilla::MakeStringSpan(replacement));
290 return true;
291 }
292 return false;
293 }
294
295 {
296 static const char regions[300][4] = {
297 "004", "008", "010", "012", "016", "020", "024", "028", "031", "032",
298 "036", "040", "044", "048", "050", "051", "052", "056", "060", "062",
299 "064", "068", "070", "072", "074", "076", "084", "086", "090", "092",
300 "096", "100", "104", "108", "112", "116", "120", "124", "132", "136",
301 "140", "144", "148", "152", "156", "158", "162", "166", "170", "174",
302 "175", "178", "180", "184", "188", "191", "192", "196", "203", "204",
303 "208", "212", "214", "218", "222", "226", "230", "231", "232", "233",
304 "234", "238", "239", "242", "246", "248", "249", "250", "254", "258",
305 "260", "262", "266", "268", "270", "275", "276", "278", "280", "288",
306 "292", "296", "300", "304", "308", "312", "316", "320", "324", "328",
307 "332", "334", "336", "340", "344", "348", "352", "356", "360", "364",
308 "368", "372", "376", "380", "384", "388", "392", "398", "400", "404",
309 "408", "410", "414", "417", "418", "422", "426", "428", "430", "434",
310 "438", "440", "442", "446", "450", "454", "458", "462", "466", "470",
311 "474", "478", "480", "484", "492", "496", "498", "499", "500", "504",
312 "508", "512", "516", "520", "524", "528", "531", "533", "534", "535",
313 "540", "548", "554", "558", "562", "566", "570", "574", "578", "580",
314 "581", "583", "584", "585", "586", "591", "598", "600", "604", "608",
315 "612", "616", "620", "624", "626", "630", "634", "638", "642", "643",
316 "646", "652", "654", "659", "660", "662", "663", "666", "670", "674",
317 "678", "682", "686", "688", "690", "694", "702", "703", "704", "705",
318 "706", "710", "716", "720", "724", "728", "729", "732", "736", "740",
319 "744", "748", "752", "756", "760", "762", "764", "768", "772", "776",
320 "780", "784", "788", "792", "795", "796", "798", "800", "804", "807",
321 "818", "826", "830", "831", "832", "833", "834", "840", "850", "854",
322 "858", "860", "862", "876", "882", "886", "887", "891", "894", "958",
323 "959", "960", "962", "963", "964", "965", "966", "967", "968", "969",
324 "970", "971", "972", "973", "974", "975", "976", "977", "978", "979",
325 "980", "981", "982", "983", "984", "985", "986", "987", "988", "989",
326 "990", "991", "992", "993", "994", "995", "996", "997", "998", "999",
327 };
328 static const char* aliases[300] = {
329 "AF", "AL", "AQ", "DZ", "AS", "AD", "AO", "AG", "AZ", "AR",
330 "AU", "AT", "BS", "BH", "BD", "AM", "BB", "BE", "BM", "034",
331 "BT", "BO", "BA", "BW", "BV", "BR", "BZ", "IO", "SB", "VG",
332 "BN", "BG", "MM", "BI", "BY", "KH", "CM", "CA", "CV", "KY",
333 "CF", "LK", "TD", "CL", "CN", "TW", "CX", "CC", "CO", "KM",
334 "YT", "CG", "CD", "CK", "CR", "HR", "CU", "CY", "CZ", "BJ",
335 "DK", "DM", "DO", "EC", "SV", "GQ", "ET", "ET", "ER", "EE",
336 "FO", "FK", "GS", "FJ", "FI", "AX", "FR", "FR", "GF", "PF",
337 "TF", "DJ", "GA", "GE", "GM", "PS", "DE", "DE", "DE", "GH",
338 "GI", "KI", "GR", "GL", "GD", "GP", "GU", "GT", "GN", "GY",
339 "HT", "HM", "VA", "HN", "HK", "HU", "IS", "IN", "ID", "IR",
340 "IQ", "IE", "IL", "IT", "CI", "JM", "JP", "KZ", "JO", "KE",
341 "KP", "KR", "KW", "KG", "LA", "LB", "LS", "LV", "LR", "LY",
342 "LI", "LT", "LU", "MO", "MG", "MW", "MY", "MV", "ML", "MT",
343 "MQ", "MR", "MU", "MX", "MC", "MN", "MD", "ME", "MS", "MA",
344 "MZ", "OM", "NA", "NR", "NP", "NL", "CW", "AW", "SX", "BQ",
345 "NC", "VU", "NZ", "NI", "NE", "NG", "NU", "NF", "NO", "MP",
346 "UM", "FM", "MH", "PW", "PK", "PA", "PG", "PY", "PE", "PH",
347 "PN", "PL", "PT", "GW", "TL", "PR", "QA", "RE", "RO", "RU",
348 "RW", "BL", "SH", "KN", "AI", "LC", "MF", "PM", "VC", "SM",
349 "ST", "SA", "SN", "RS", "SC", "SL", "SG", "SK", "VN", "SI",
350 "SO", "ZA", "ZW", "YE", "ES", "SS", "SD", "EH", "SD", "SR",
351 "SJ", "SZ", "SE", "CH", "SY", "TJ", "TH", "TG", "TK", "TO",
352 "TT", "AE", "TN", "TR", "TM", "TC", "TV", "UG", "UA", "MK",
353 "EG", "GB", "JE", "GG", "JE", "IM", "TZ", "US", "VI", "BF",
354 "UY", "UZ", "VE", "WF", "WS", "YE", "YE", "RS", "ZM", "AA",
355 "QM", "QN", "QP", "QQ", "QR", "QS", "QT", "EU", "QV", "QW",
356 "QX", "QY", "QZ", "XA", "XB", "XC", "XD", "XE", "XF", "XG",
357 "XH", "XI", "XJ", "XK", "XL", "XM", "XN", "XO", "XP", "XQ",
358 "XR", "XS", "XT", "XU", "XV", "XW", "XX", "XY", "XZ", "ZZ",
359 };
360
361 if (const char* replacement = SearchReplacement(regions, aliases, region)) {
362 region.set(mozilla::MakeStringSpan(replacement));
363 return true;
364 }
365 return false;
366 }
367 }
368
369 // Region subtags with complex mappings.
370 // Derived from CLDR Supplemental Data, version 39.
371 // https://unicode.org/Public/cldr/39/core.zip
complexRegionMapping(const RegionSubtag & region)372 bool js::intl::LanguageTag::complexRegionMapping(const RegionSubtag& region) {
373 MOZ_ASSERT(IsStructurallyValidRegionTag(region.span()));
374 MOZ_ASSERT(IsCanonicallyCasedRegionTag(region.span()));
375
376 if (region.length() == 2) {
377 return region.equalTo("AN") ||
378 region.equalTo("NT") ||
379 region.equalTo("PC") ||
380 region.equalTo("SU");
381 }
382
383 {
384 static const char regions[8][4] = {
385 "172", "200", "530", "532", "536", "582", "810", "890",
386 };
387
388 return HasReplacement(regions, region);
389 }
390 }
391
392 // Language subtags with complex mappings.
393 // Derived from CLDR Supplemental Data, version 39.
394 // https://unicode.org/Public/cldr/39/core.zip
performComplexLanguageMappings()395 void js::intl::LanguageTag::performComplexLanguageMappings() {
396 MOZ_ASSERT(IsStructurallyValidLanguageTag(language().span()));
397 MOZ_ASSERT(IsCanonicallyCasedLanguageTag(language().span()));
398
399 if (language().equalTo("cnr")) {
400 setLanguage("sr");
401 if (region().missing()) {
402 setRegion("ME");
403 }
404 }
405 else if (language().equalTo("drw") ||
406 language().equalTo("prs") ||
407 language().equalTo("tnf")) {
408 setLanguage("fa");
409 if (region().missing()) {
410 setRegion("AF");
411 }
412 }
413 else if (language().equalTo("hbs") ||
414 language().equalTo("sh")) {
415 setLanguage("sr");
416 if (script().missing()) {
417 setScript("Latn");
418 }
419 }
420 else if (language().equalTo("swc")) {
421 setLanguage("sw");
422 if (region().missing()) {
423 setRegion("CD");
424 }
425 }
426 }
427
428 // Region subtags with complex mappings.
429 // Derived from CLDR Supplemental Data, version 39.
430 // https://unicode.org/Public/cldr/39/core.zip
performComplexRegionMappings()431 void js::intl::LanguageTag::performComplexRegionMappings() {
432 MOZ_ASSERT(IsStructurallyValidLanguageTag(language().span()));
433 MOZ_ASSERT(IsCanonicallyCasedLanguageTag(language().span()));
434 MOZ_ASSERT(IsStructurallyValidRegionTag(region().span()));
435 MOZ_ASSERT(IsCanonicallyCasedRegionTag(region().span()));
436
437 if (region().equalTo("172")) {
438 if (language().equalTo("hy") ||
439 (language().equalTo("und") && script().equalTo("Armn"))) {
440 setRegion("AM");
441 }
442 else if (language().equalTo("az") ||
443 language().equalTo("tkr") ||
444 language().equalTo("tly") ||
445 language().equalTo("ttt")) {
446 setRegion("AZ");
447 }
448 else if (language().equalTo("be")) {
449 setRegion("BY");
450 }
451 else if (language().equalTo("ab") ||
452 language().equalTo("ka") ||
453 (language().equalTo("ku") && script().equalTo("Yezi")) ||
454 language().equalTo("os") ||
455 (language().equalTo("und") && script().equalTo("Geor")) ||
456 (language().equalTo("und") && script().equalTo("Yezi")) ||
457 language().equalTo("xmf")) {
458 setRegion("GE");
459 }
460 else if (language().equalTo("ky")) {
461 setRegion("KG");
462 }
463 else if (language().equalTo("kk") ||
464 (language().equalTo("ug") && script().equalTo("Cyrl"))) {
465 setRegion("KZ");
466 }
467 else if (language().equalTo("gag")) {
468 setRegion("MD");
469 }
470 else if (language().equalTo("tg")) {
471 setRegion("TJ");
472 }
473 else if (language().equalTo("tk")) {
474 setRegion("TM");
475 }
476 else if (language().equalTo("crh") ||
477 language().equalTo("got") ||
478 language().equalTo("ji") ||
479 language().equalTo("rue") ||
480 language().equalTo("uk") ||
481 (language().equalTo("und") && script().equalTo("Goth"))) {
482 setRegion("UA");
483 }
484 else if (language().equalTo("kaa") ||
485 language().equalTo("sog") ||
486 (language().equalTo("und") && script().equalTo("Chrs")) ||
487 (language().equalTo("und") && script().equalTo("Sogd")) ||
488 (language().equalTo("und") && script().equalTo("Sogo")) ||
489 language().equalTo("uz") ||
490 language().equalTo("xco")) {
491 setRegion("UZ");
492 }
493 else {
494 setRegion("RU");
495 }
496 }
497 else if (region().equalTo("200")) {
498 if (language().equalTo("sk")) {
499 setRegion("SK");
500 }
501 else {
502 setRegion("CZ");
503 }
504 }
505 else if (region().equalTo("530") ||
506 region().equalTo("532") ||
507 region().equalTo("AN")) {
508 if (language().equalTo("vic")) {
509 setRegion("SX");
510 }
511 else {
512 setRegion("CW");
513 }
514 }
515 else if (region().equalTo("536") ||
516 region().equalTo("NT")) {
517 if (language().equalTo("akk") ||
518 language().equalTo("ckb") ||
519 (language().equalTo("ku") && script().equalTo("Arab")) ||
520 language().equalTo("syr") ||
521 (language().equalTo("und") && script().equalTo("Syrc")) ||
522 (language().equalTo("und") && script().equalTo("Xsux"))) {
523 setRegion("IQ");
524 }
525 else {
526 setRegion("SA");
527 }
528 }
529 else if (region().equalTo("582") ||
530 region().equalTo("PC")) {
531 if (language().equalTo("mh")) {
532 setRegion("MH");
533 }
534 else if (language().equalTo("pau")) {
535 setRegion("PW");
536 }
537 else {
538 setRegion("FM");
539 }
540 }
541 else if (region().equalTo("810") ||
542 region().equalTo("SU")) {
543 if (language().equalTo("hy") ||
544 (language().equalTo("und") && script().equalTo("Armn"))) {
545 setRegion("AM");
546 }
547 else if (language().equalTo("az") ||
548 language().equalTo("tkr") ||
549 language().equalTo("tly") ||
550 language().equalTo("ttt")) {
551 setRegion("AZ");
552 }
553 else if (language().equalTo("be")) {
554 setRegion("BY");
555 }
556 else if (language().equalTo("et") ||
557 language().equalTo("vro")) {
558 setRegion("EE");
559 }
560 else if (language().equalTo("ab") ||
561 language().equalTo("ka") ||
562 (language().equalTo("ku") && script().equalTo("Yezi")) ||
563 language().equalTo("os") ||
564 (language().equalTo("und") && script().equalTo("Geor")) ||
565 (language().equalTo("und") && script().equalTo("Yezi")) ||
566 language().equalTo("xmf")) {
567 setRegion("GE");
568 }
569 else if (language().equalTo("ky")) {
570 setRegion("KG");
571 }
572 else if (language().equalTo("kk") ||
573 (language().equalTo("ug") && script().equalTo("Cyrl"))) {
574 setRegion("KZ");
575 }
576 else if (language().equalTo("lt") ||
577 language().equalTo("sgs")) {
578 setRegion("LT");
579 }
580 else if (language().equalTo("ltg") ||
581 language().equalTo("lv")) {
582 setRegion("LV");
583 }
584 else if (language().equalTo("gag")) {
585 setRegion("MD");
586 }
587 else if (language().equalTo("tg")) {
588 setRegion("TJ");
589 }
590 else if (language().equalTo("tk")) {
591 setRegion("TM");
592 }
593 else if (language().equalTo("crh") ||
594 language().equalTo("got") ||
595 language().equalTo("ji") ||
596 language().equalTo("rue") ||
597 language().equalTo("uk") ||
598 (language().equalTo("und") && script().equalTo("Goth"))) {
599 setRegion("UA");
600 }
601 else if (language().equalTo("kaa") ||
602 language().equalTo("sog") ||
603 (language().equalTo("und") && script().equalTo("Chrs")) ||
604 (language().equalTo("und") && script().equalTo("Sogd")) ||
605 (language().equalTo("und") && script().equalTo("Sogo")) ||
606 language().equalTo("uz") ||
607 language().equalTo("xco")) {
608 setRegion("UZ");
609 }
610 else {
611 setRegion("RU");
612 }
613 }
614 else if (region().equalTo("890")) {
615 if (language().equalTo("bs")) {
616 setRegion("BA");
617 }
618 else if (language().equalTo("hr")) {
619 setRegion("HR");
620 }
621 else if (language().equalTo("mk")) {
622 setRegion("MK");
623 }
624 else if (language().equalTo("sl")) {
625 setRegion("SI");
626 }
627 else {
628 setRegion("RS");
629 }
630 }
631 }
632
ToCharPointer(const char * str)633 static const char* ToCharPointer(const char* str) {
634 return str;
635 }
636
ToCharPointer(const js::UniqueChars & str)637 static const char* ToCharPointer(const js::UniqueChars& str) {
638 return str.get();
639 }
640
641 template <typename T, typename U = T>
IsLessThan(const T & a,const U & b)642 static bool IsLessThan(const T& a, const U& b) {
643 return strcmp(ToCharPointer(a), ToCharPointer(b)) < 0;
644 }
645
646 // Mappings from variant subtags to preferred values.
647 // Derived from CLDR Supplemental Data, version 39.
648 // https://unicode.org/Public/cldr/39/core.zip
performVariantMappings(JSContext * cx)649 bool js::intl::LanguageTag::performVariantMappings(JSContext* cx) {
650 // The variant subtags need to be sorted for binary search.
651 MOZ_ASSERT(std::is_sorted(variants_.begin(), variants_.end(),
652 IsLessThan<decltype(variants_)::ElementType>));
653
654 auto removeVariantAt = [&](size_t index) {
655 variants_.erase(variants_.begin() + index);
656 };
657
658 auto insertVariantSortedIfNotPresent = [&](const char* variant) {
659 auto* p = std::lower_bound(variants_.begin(), variants_.end(), variant,
660 IsLessThan<decltype(variants_)::ElementType,
661 decltype(variant)>);
662
663 // Don't insert the replacement when already present.
664 if (p != variants_.end() && strcmp(p->get(), variant) == 0) {
665 return true;
666 }
667
668 // Insert the preferred variant in sort order.
669 auto preferred = DuplicateString(cx, variant);
670 if (!preferred) {
671 return false;
672 }
673 return !!variants_.insert(p, std::move(preferred));
674 };
675
676 for (size_t i = 0; i < variants_.length(); ) {
677 const char* variant = variants_[i].get();
678 MOZ_ASSERT(IsCanonicallyCasedVariantTag(mozilla::MakeStringSpan(variant)));
679
680 if (strcmp(variant, "arevela") == 0 ||
681 strcmp(variant, "arevmda") == 0 ||
682 strcmp(variant, "bokmal") == 0 ||
683 strcmp(variant, "hakka") == 0 ||
684 strcmp(variant, "lojban") == 0 ||
685 strcmp(variant, "nynorsk") == 0 ||
686 strcmp(variant, "saaho") == 0 ||
687 strcmp(variant, "xiang") == 0) {
688 removeVariantAt(i);
689 }
690 else if (strcmp(variant, "aaland") == 0) {
691 removeVariantAt(i);
692 setRegion("AX");
693 }
694 else if (strcmp(variant, "heploc") == 0) {
695 removeVariantAt(i);
696 if (!insertVariantSortedIfNotPresent("alalc97")) {
697 return false;
698 }
699 }
700 else if (strcmp(variant, "polytoni") == 0) {
701 removeVariantAt(i);
702 if (!insertVariantSortedIfNotPresent("polyton")) {
703 return false;
704 }
705 }
706 else {
707 i++;
708 }
709 }
710 return true;
711 }
712
713 // Canonicalize legacy locale identifiers.
714 // Derived from CLDR Supplemental Data, version 39.
715 // https://unicode.org/Public/cldr/39/core.zip
updateLegacyMappings(JSContext * cx)716 bool js::intl::LanguageTag::updateLegacyMappings(JSContext* cx) {
717 // We're mapping legacy tags to non-legacy form here.
718 // Other tags remain unchanged.
719 //
720 // Legacy tags are either sign language tags ("sgn") or have one or multiple
721 // variant subtags. Therefore we can quickly exclude most tags by checking
722 // these two subtags.
723
724 MOZ_ASSERT(IsCanonicallyCasedLanguageTag(language().span()));
725
726 if (!language().equalTo("sgn") && variants().length() == 0) {
727 return true;
728 }
729
730 for ([[maybe_unused]] const auto& variant : variants()) {
731 MOZ_ASSERT(IsStructurallyValidVariantTag(mozilla::MakeStringSpan(variant.get())));
732 MOZ_ASSERT(IsCanonicallyCasedVariantTag(mozilla::MakeStringSpan(variant.get())));
733 }
734
735 // The variant subtags need to be sorted for binary search.
736 MOZ_ASSERT(std::is_sorted(variants_.begin(), variants_.end(),
737 IsLessThan<decltype(variants_)::ElementType>));
738
739 auto findVariant = [this](const char* variant) {
740 auto* p = std::lower_bound(variants_.begin(), variants_.end(), variant,
741 IsLessThan<decltype(variants_)::ElementType,
742 decltype(variant)>);
743
744 if (p != variants_.end() && strcmp(p->get(), variant) == 0) {
745 return p;
746 }
747 return static_cast<decltype(p)>(nullptr);
748 };
749
750 auto insertVariantSortedIfNotPresent = [&](const char* variant) {
751 auto* p = std::lower_bound(variants_.begin(), variants_.end(), variant,
752 IsLessThan<decltype(variants_)::ElementType,
753 decltype(variant)>);
754
755 // Don't insert the replacement when already present.
756 if (p != variants_.end() && strcmp(p->get(), variant) == 0) {
757 return true;
758 }
759
760 // Insert the preferred variant in sort order.
761 auto preferred = DuplicateString(cx, variant);
762 if (!preferred) {
763 return false;
764 }
765 return !!variants_.insert(p, std::move(preferred));
766 };
767
768 auto removeVariant = [&](auto* p) {
769 size_t index = std::distance(variants_.begin(), p);
770 variants_.erase(variants_.begin() + index);
771 };
772
773 auto removeVariants = [&](auto* p, auto* q) {
774 size_t pIndex = std::distance(variants_.begin(), p);
775 size_t qIndex = std::distance(variants_.begin(), q);
776 MOZ_ASSERT(pIndex < qIndex, "variant subtags are sorted");
777
778 variants_.erase(variants_.begin() + qIndex);
779 variants_.erase(variants_.begin() + pIndex);
780 };
781
782 if (variants().length() >= 2) {
783 if (auto* hepburn = findVariant("hepburn")) {
784 if (auto* heploc = findVariant("heploc")) {
785 removeVariants(hepburn, heploc);
786
787 if (!insertVariantSortedIfNotPresent("alalc97")) {
788 return false;
789 }
790 }
791 }
792 }
793
794 if (language().equalTo("sgn")) {
795 if (region().present() && signLanguageMapping(language_, region())) {
796 region_.set(mozilla::MakeStringSpan(""));
797 }
798 }
799 else if (language().equalTo("aa") ||
800 language().equalTo("aar")) {
801 if (auto* saaho = findVariant("saaho")) {
802 removeVariant(saaho);
803 setLanguage("ssy");
804 }
805 }
806 else if (language().equalTo("arm") ||
807 language().equalTo("hy") ||
808 language().equalTo("hye")) {
809 if (auto* arevmda = findVariant("arevmda")) {
810 removeVariant(arevmda);
811 setLanguage("hyw");
812 }
813 }
814 else if (language().equalTo("art")) {
815 if (auto* lojban = findVariant("lojban")) {
816 removeVariant(lojban);
817 setLanguage("jbo");
818 }
819 }
820 else if (language().equalTo("cel")) {
821 if (auto* gaulish = findVariant("gaulish")) {
822 removeVariant(gaulish);
823 setLanguage("xtg");
824 }
825 }
826 else if (language().equalTo("chi") ||
827 language().equalTo("cmn") ||
828 language().equalTo("zh") ||
829 language().equalTo("zho")) {
830 if (auto* guoyu = findVariant("guoyu")) {
831 if (auto* hakka = findVariant("hakka")) {
832 removeVariants(guoyu, hakka);
833 setLanguage("hak");
834 return true;
835 }
836 }
837 if (auto* guoyu = findVariant("guoyu")) {
838 if (auto* xiang = findVariant("xiang")) {
839 removeVariants(guoyu, xiang);
840 setLanguage("hsn");
841 return true;
842 }
843 }
844 if (auto* guoyu = findVariant("guoyu")) {
845 removeVariant(guoyu);
846 setLanguage("zh");
847 }
848 else if (auto* hakka = findVariant("hakka")) {
849 removeVariant(hakka);
850 setLanguage("hak");
851 }
852 else if (auto* xiang = findVariant("xiang")) {
853 removeVariant(xiang);
854 setLanguage("hsn");
855 }
856 }
857 else if (language().equalTo("no") ||
858 language().equalTo("nor")) {
859 if (auto* bokmal = findVariant("bokmal")) {
860 removeVariant(bokmal);
861 setLanguage("nb");
862 }
863 else if (auto* nynorsk = findVariant("nynorsk")) {
864 removeVariant(nynorsk);
865 setLanguage("nn");
866 }
867 }
868
869 return true;
870 }
871
872 // Mappings from legacy sign languages.
873 // Derived from CLDR Supplemental Data, version 39.
874 // https://unicode.org/Public/cldr/39/core.zip
signLanguageMapping(LanguageSubtag & language,const RegionSubtag & region)875 bool js::intl::LanguageTag::signLanguageMapping(LanguageSubtag& language,
876 const RegionSubtag& region) {
877 MOZ_ASSERT(language.equalTo("sgn"));
878 MOZ_ASSERT(IsStructurallyValidRegionTag(region.span()));
879 MOZ_ASSERT(IsCanonicallyCasedRegionTag(region.span()));
880
881 if (region.length() == 2) {
882 static const char regions[22][3] = {
883 "BR", "CO", "DD", "DE", "DK", "ES", "FR", "FX", "GB", "GR",
884 "IE", "IT", "JP", "MX", "NI", "NL", "NO", "PT", "SE", "UK",
885 "US", "ZA",
886 };
887 static const char* aliases[22] = {
888 "bzs", "csn", "gsg", "gsg", "dsl", "ssp", "fsl", "fsl", "bfi", "gss",
889 "isg", "ise", "jsl", "mfs", "ncs", "dse", "nsi", "psr", "swl", "bfi",
890 "ase", "sfs",
891 };
892
893 if (const char* replacement = SearchReplacement(regions, aliases, region)) {
894 language.set(mozilla::MakeStringSpan(replacement));
895 return true;
896 }
897 return false;
898 }
899
900 {
901 static const char regions[22][4] = {
902 "076", "170", "208", "249", "250", "276", "278", "280", "300", "372",
903 "380", "392", "484", "528", "558", "578", "620", "710", "724", "752",
904 "826", "840",
905 };
906 static const char* aliases[22] = {
907 "bzs", "csn", "dsl", "fsl", "fsl", "gsg", "gsg", "gsg", "gss", "isg",
908 "ise", "jsl", "mfs", "dse", "ncs", "nsi", "psr", "sfs", "ssp", "swl",
909 "bfi", "ase",
910 };
911
912 if (const char* replacement = SearchReplacement(regions, aliases, region)) {
913 language.set(mozilla::MakeStringSpan(replacement));
914 return true;
915 }
916 return false;
917 }
918 }
919
920 template <size_t Length>
IsUnicodeKey(mozilla::Span<const char> key,const char (& str)[Length])921 static inline bool IsUnicodeKey(
922 mozilla::Span<const char> key, const char (&str)[Length]) {
923 static_assert(Length == UnicodeKeyLength + 1,
924 "Unicode extension key is two characters long");
925 return memcmp(key.data(), str, Length - 1) == 0;
926 }
927
928 template <size_t Length>
IsUnicodeType(mozilla::Span<const char> type,const char (& str)[Length])929 static inline bool IsUnicodeType(
930 mozilla::Span<const char> type, const char (&str)[Length]) {
931 static_assert(Length > UnicodeKeyLength + 1,
932 "Unicode extension type contains more than two characters");
933 return type.size() == (Length - 1) &&
934 memcmp(type.data(), str, Length - 1) == 0;
935 }
936
CompareUnicodeType(const char * a,mozilla::Span<const char> b)937 static int32_t CompareUnicodeType(const char* a, mozilla::Span<const char> b) {
938 MOZ_ASSERT(!std::char_traits<char>::find(b.data(), b.size(), '\0'),
939 "unexpected null-character in string");
940
941 using UnsignedChar = unsigned char;
942 for (size_t i = 0; i < b.size(); i++) {
943 // |a| is zero-terminated and |b| doesn't contain a null-terminator. So if
944 // we've reached the end of |a|, the below if-statement will always be true.
945 // That ensures we don't read past the end of |a|.
946 if (int32_t r = UnsignedChar(a[i]) - UnsignedChar(b[i])) {
947 return r;
948 }
949 }
950
951 // Return zero if both strings are equal or a negative number if |b| is a
952 // prefix of |a|.
953 return -int32_t(UnsignedChar(a[b.size()]));
954 }
955
956 template <size_t Length>
SearchUnicodeReplacement(const char * (& types)[Length],const char * (& aliases)[Length],mozilla::Span<const char> type)957 static inline const char* SearchUnicodeReplacement(
958 const char* (&types)[Length], const char* (&aliases)[Length],
959 mozilla::Span<const char> type) {
960
961 auto p = std::lower_bound(std::begin(types), std::end(types), type,
962 [](const auto& a, const auto& b) {
963 return CompareUnicodeType(a, b) < 0;
964 });
965 if (p != std::end(types) && CompareUnicodeType(*p, type) == 0) {
966 return aliases[std::distance(std::begin(types), p)];
967 }
968 return nullptr;
969 }
970
971 /**
972 * Mapping from deprecated BCP 47 Unicode extension types to their preferred
973 * values.
974 *
975 * Spec: https://www.unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files
976 * Spec: https://www.unicode.org/reports/tr35/#t_Extension
977 */
replaceUnicodeExtensionType(mozilla::Span<const char> key,mozilla::Span<const char> type)978 const char* js::intl::LanguageTag::replaceUnicodeExtensionType(
979 mozilla::Span<const char> key, mozilla::Span<const char> type) {
980 MOZ_ASSERT(key.size() == UnicodeKeyLength);
981 MOZ_ASSERT(IsCanonicallyCasedUnicodeKey(key));
982
983 MOZ_ASSERT(type.size() > UnicodeKeyLength);
984 MOZ_ASSERT(IsCanonicallyCasedUnicodeType(type));
985
986 if (IsUnicodeKey(key, "ca")) {
987 if (IsUnicodeType(type, "ethiopic-amete-alem")) {
988 return "ethioaa";
989 }
990 if (IsUnicodeType(type, "islamicc")) {
991 return "islamic-civil";
992 }
993 }
994 else if (IsUnicodeKey(key, "kb") ||
995 IsUnicodeKey(key, "kc") ||
996 IsUnicodeKey(key, "kh") ||
997 IsUnicodeKey(key, "kk") ||
998 IsUnicodeKey(key, "kn")) {
999 if (IsUnicodeType(type, "yes")) {
1000 return "true";
1001 }
1002 }
1003 else if (IsUnicodeKey(key, "ks")) {
1004 if (IsUnicodeType(type, "primary")) {
1005 return "level1";
1006 }
1007 if (IsUnicodeType(type, "tertiary")) {
1008 return "level3";
1009 }
1010 }
1011 else if (IsUnicodeKey(key, "ms")) {
1012 if (IsUnicodeType(type, "imperial")) {
1013 return "uksystem";
1014 }
1015 }
1016 else if (IsUnicodeKey(key, "rg") ||
1017 IsUnicodeKey(key, "sd")) {
1018 static const char* types[117] = {
1019 "cn11", "cn12", "cn13", "cn14", "cn15", "cn21", "cn22", "cn23",
1020 "cn31", "cn32", "cn33", "cn34", "cn35", "cn36", "cn37", "cn41",
1021 "cn42", "cn43", "cn44", "cn45", "cn46", "cn50", "cn51", "cn52",
1022 "cn53", "cn54", "cn61", "cn62", "cn63", "cn64", "cn65", "cz10a",
1023 "cz10b", "cz10c", "cz10d", "cz10e", "cz10f", "cz611", "cz612", "cz613",
1024 "cz614", "cz615", "cz621", "cz622", "cz623", "cz624", "cz626", "cz627",
1025 "czjc", "czjm", "czka", "czkr", "czli", "czmo", "czol", "czpa",
1026 "czpl", "czpr", "czst", "czus", "czvy", "czzl", "fra", "frb",
1027 "frc", "frd", "fre", "frf", "frg", "frh", "fri", "frj",
1028 "frk", "frl", "frm", "frn", "fro", "frp", "frq", "frr",
1029 "frs", "frt", "fru", "frv", "laxn", "lud", "lug", "lul",
1030 "mrnkc", "no23", "nzn", "nzs", "omba", "omsh", "plds", "plkp",
1031 "pllb", "plld", "pllu", "plma", "plmz", "plop", "plpd", "plpk",
1032 "plpm", "plsk", "plsl", "plwn", "plwp", "plzp", "tteto", "ttrcm",
1033 "ttwto", "twkhq", "twtnq", "twtpq", "twtxq",
1034 };
1035 static const char* aliases[117] = {
1036 "cnbj", "cntj", "cnhe", "cnsx", "cnmn", "cnln", "cnjl", "cnhl",
1037 "cnsh", "cnjs", "cnzj", "cnah", "cnfj", "cnjx", "cnsd", "cnha",
1038 "cnhb", "cnhn", "cngd", "cngx", "cnhi", "cncq", "cnsc", "cngz",
1039 "cnyn", "cnxz", "cnsn", "cngs", "cnqh", "cnnx", "cnxj", "cz110",
1040 "cz111", "cz112", "cz113", "cz114", "cz115", "cz663", "cz632", "cz633",
1041 "cz634", "cz635", "cz641", "cz642", "cz643", "cz644", "cz646", "cz647",
1042 "cz31", "cz64", "cz41", "cz52", "cz51", "cz80", "cz71", "cz53",
1043 "cz32", "cz10", "cz20", "cz42", "cz63", "cz72", "frges", "frnaq",
1044 "frara", "frbfc", "frbre", "frcvl", "frges", "frcor", "frbfc", "fridf",
1045 "frocc", "frnaq", "frges", "frocc", "frhdf", "frnor", "frnor", "frpdl",
1046 "frhdf", "frnaq", "frpac", "frara", "laxs", "lucl", "luec", "luca",
1047 "mr13", "no50", "nzauk", "nzcan", "ombj", "omsj", "pl02", "pl04",
1048 "pl08", "pl10", "pl06", "pl12", "pl14", "pl16", "pl20", "pl18",
1049 "pl22", "pl26", "pl24", "pl28", "pl30", "pl32", "tttob", "ttmrc",
1050 "tttob", "twkhh", "twtnn", "twnwt", "twtxg",
1051 };
1052 return SearchUnicodeReplacement(types, aliases, type);
1053 }
1054 else if (IsUnicodeKey(key, "tz")) {
1055 static const char* types[28] = {
1056 "aqams", "cnckg", "cnhrb", "cnkhg", "cuba", "egypt",
1057 "eire", "est", "gmt0", "hongkong", "hst", "iceland",
1058 "iran", "israel", "jamaica", "japan", "libya", "mst",
1059 "navajo", "poland", "portugal", "prc", "roc", "rok",
1060 "turkey", "uct", "usnavajo", "zulu",
1061 };
1062 static const char* aliases[28] = {
1063 "nzakl", "cnsha", "cnsha", "cnurc", "cuhav", "egcai",
1064 "iedub", "utcw05", "gmt", "hkhkg", "utcw10", "isrey",
1065 "irthr", "jeruslm", "jmkin", "jptyo", "lytip", "utcw07",
1066 "usden", "plwaw", "ptlis", "cnsha", "twtpe", "krsel",
1067 "trist", "utc", "usden", "utc",
1068 };
1069 return SearchUnicodeReplacement(types, aliases, type);
1070 }
1071 return nullptr;
1072 }
1073
1074 template <size_t Length>
IsTransformKey(mozilla::Span<const char> key,const char (& str)[Length])1075 static inline bool IsTransformKey(
1076 mozilla::Span<const char> key, const char (&str)[Length]) {
1077 static_assert(Length == TransformKeyLength + 1,
1078 "Transform extension key is two characters long");
1079 return memcmp(key.data(), str, Length - 1) == 0;
1080 }
1081
1082 template <size_t Length>
IsTransformType(mozilla::Span<const char> type,const char (& str)[Length])1083 static inline bool IsTransformType(
1084 mozilla::Span<const char> type, const char (&str)[Length]) {
1085 static_assert(Length > TransformKeyLength + 1,
1086 "Transform extension type contains more than two characters");
1087 return type.size() == (Length - 1) &&
1088 memcmp(type.data(), str, Length - 1) == 0;
1089 }
1090
1091 /**
1092 * Mapping from deprecated BCP 47 Transform extension types to their preferred
1093 * values.
1094 *
1095 * Spec: https://www.unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files
1096 * Spec: https://www.unicode.org/reports/tr35/#t_Extension
1097 */
replaceTransformExtensionType(mozilla::Span<const char> key,mozilla::Span<const char> type)1098 const char* js::intl::LanguageTag::replaceTransformExtensionType(
1099 mozilla::Span<const char> key, mozilla::Span<const char> type) {
1100 MOZ_ASSERT(key.size() == TransformKeyLength);
1101 MOZ_ASSERT(IsCanonicallyCasedTransformKey(key));
1102
1103 MOZ_ASSERT(type.size() > TransformKeyLength);
1104 MOZ_ASSERT(IsCanonicallyCasedTransformType(type));
1105
1106 if (IsTransformKey(key, "d0")) {
1107 if (IsTransformType(type, "name")) {
1108 return "charname";
1109 }
1110 }
1111 else if (IsTransformKey(key, "m0")) {
1112 if (IsTransformType(type, "names")) {
1113 return "prprname";
1114 }
1115 }
1116 return nullptr;
1117 }
1118