1// Copyright 2013 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package language
6
7import (
8	"testing"
9
10	"golang.org/x/text/internal/tag"
11)
12
13func b(s string) []byte {
14	return []byte(s)
15}
16
17func TestLangID(t *testing.T) {
18	tests := []struct {
19		id, bcp47, iso3, norm string
20		err                   error
21	}{
22		{id: "", bcp47: "und", iso3: "und", err: ErrSyntax},
23		{id: "  ", bcp47: "und", iso3: "und", err: ErrSyntax},
24		{id: "   ", bcp47: "und", iso3: "und", err: ErrSyntax},
25		{id: "    ", bcp47: "und", iso3: "und", err: ErrSyntax},
26		{id: "xxx", bcp47: "und", iso3: "und", err: NewValueError([]byte("xxx"))},
27		{id: "und", bcp47: "und", iso3: "und"},
28		{id: "aju", bcp47: "aju", iso3: "aju", norm: "jrb"},
29		{id: "jrb", bcp47: "jrb", iso3: "jrb"},
30		{id: "es", bcp47: "es", iso3: "spa"},
31		{id: "spa", bcp47: "es", iso3: "spa"},
32		{id: "ji", bcp47: "ji", iso3: "yid-", norm: "yi"},
33		{id: "jw", bcp47: "jw", iso3: "jav-", norm: "jv"},
34		{id: "ar", bcp47: "ar", iso3: "ara"},
35		{id: "kw", bcp47: "kw", iso3: "cor"},
36		{id: "arb", bcp47: "arb", iso3: "arb", norm: "ar"},
37		{id: "ar", bcp47: "ar", iso3: "ara"},
38		{id: "kur", bcp47: "ku", iso3: "kur"},
39		{id: "nl", bcp47: "nl", iso3: "nld"},
40		{id: "NL", bcp47: "nl", iso3: "nld"},
41		{id: "gsw", bcp47: "gsw", iso3: "gsw"},
42		{id: "gSW", bcp47: "gsw", iso3: "gsw"},
43		{id: "und", bcp47: "und", iso3: "und"},
44		{id: "sh", bcp47: "sh", iso3: "hbs", norm: "sr"},
45		{id: "hbs", bcp47: "sh", iso3: "hbs", norm: "sr"},
46		{id: "no", bcp47: "no", iso3: "nor", norm: "no"},
47		{id: "nor", bcp47: "no", iso3: "nor", norm: "no"},
48		{id: "cmn", bcp47: "cmn", iso3: "cmn", norm: "zh"},
49	}
50	for i, tt := range tests {
51		want, err := getLangID(b(tt.id))
52		if err != tt.err {
53			t.Errorf("%d:err(%s): found %q; want %q", i, tt.id, err, tt.err)
54		}
55		if err != nil {
56			continue
57		}
58		if id, _ := getLangISO2(b(tt.bcp47)); len(tt.bcp47) == 2 && want != id {
59			t.Errorf("%d:getISO2(%s): found %v; want %v", i, tt.bcp47, id, want)
60		}
61		if len(tt.iso3) == 3 {
62			if id, _ := getLangISO3(b(tt.iso3)); want != id {
63				t.Errorf("%d:getISO3(%s): found %q; want %q", i, tt.iso3, id, want)
64			}
65			if id, _ := getLangID(b(tt.iso3)); want != id {
66				t.Errorf("%d:getID3(%s): found %v; want %v", i, tt.iso3, id, want)
67			}
68		}
69		norm := want
70		if tt.norm != "" {
71			norm, _ = getLangID(b(tt.norm))
72		}
73		id, _ := normLang(want)
74		if id != norm {
75			t.Errorf("%d:norm(%s): found %v; want %v", i, tt.id, id, norm)
76		}
77		if id := want.String(); tt.bcp47 != id {
78			t.Errorf("%d:String(): found %s; want %s", i, id, tt.bcp47)
79		}
80		if id := want.ISO3(); tt.iso3[:3] != id {
81			t.Errorf("%d:iso3(): found %s; want %s", i, id, tt.iso3[:3])
82		}
83	}
84}
85
86func TestGrandfathered(t *testing.T) {
87	for _, tt := range []struct{ in, out string }{
88		{"art-lojban", "jbo"},
89		{"i-ami", "ami"},
90		{"i-bnn", "bnn"},
91		{"i-hak", "hak"},
92		{"i-klingon", "tlh"},
93		{"i-lux", "lb"},
94		{"i-navajo", "nv"},
95		{"i-pwn", "pwn"},
96		{"i-tao", "tao"},
97		{"i-tay", "tay"},
98		{"i-tsu", "tsu"},
99		{"no-bok", "nb"},
100		{"no-nyn", "nn"},
101		{"sgn-BE-FR", "sfb"},
102		{"sgn-BE-NL", "vgt"},
103		{"sgn-CH-DE", "sgg"},
104		{"sgn-ch-de", "sgg"},
105		{"zh-guoyu", "cmn"},
106		{"zh-hakka", "hak"},
107		{"zh-min-nan", "nan"},
108		{"zh-xiang", "hsn"},
109
110		// Grandfathered tags with no modern replacement will be converted as follows:
111		{"cel-gaulish", "xtg-x-cel-gaulish"},
112		{"en-GB-oed", "en-GB-oxendict"},
113		{"en-gb-oed", "en-GB-oxendict"},
114		{"i-default", "en-x-i-default"},
115		{"i-enochian", "und-x-i-enochian"},
116		{"i-mingo", "see-x-i-mingo"},
117		{"zh-min", "nan-x-zh-min"},
118
119		{"root", "und"},
120		{"en_US_POSIX", "en-US-u-va-posix"},
121		{"en_us_posix", "en-US-u-va-posix"},
122		{"en-us-posix", "en-US-u-va-posix"},
123	} {
124		got := Make(tt.in)
125		want := MustParse(tt.out)
126		if got != want {
127			t.Errorf("%s: got %q; want %q", tt.in, got, want)
128		}
129	}
130}
131
132func TestRegionID(t *testing.T) {
133	tests := []struct {
134		in, out string
135	}{
136		{"_  ", ""},
137		{"_000", ""},
138		{"419", "419"},
139		{"AA", "AA"},
140		{"ATF", "TF"},
141		{"HV", "HV"},
142		{"CT", "CT"},
143		{"DY", "DY"},
144		{"IC", "IC"},
145		{"FQ", "FQ"},
146		{"JT", "JT"},
147		{"ZZ", "ZZ"},
148		{"EU", "EU"},
149		{"QO", "QO"},
150		{"FX", "FX"},
151	}
152	for i, tt := range tests {
153		if tt.in[0] == '_' {
154			id := tt.in[1:]
155			if _, err := getRegionID(b(id)); err == nil {
156				t.Errorf("%d:err(%s): found nil; want error", i, id)
157			}
158			continue
159		}
160		want, _ := getRegionID(b(tt.in))
161		if s := want.String(); s != tt.out {
162			t.Errorf("%d:%s: found %q; want %q", i, tt.in, s, tt.out)
163		}
164		if len(tt.in) == 2 {
165			want, _ := getRegionISO2(b(tt.in))
166			if s := want.String(); s != tt.out {
167				t.Errorf("%d:getISO2(%s): found %q; want %q", i, tt.in, s, tt.out)
168			}
169		}
170	}
171}
172
173func TestRegionType(t *testing.T) {
174	for _, tt := range []struct {
175		r string
176		t byte
177	}{
178		{"NL", bcp47Region | ccTLD},
179		{"EU", bcp47Region | ccTLD}, // exceptionally reserved
180		{"AN", bcp47Region | ccTLD}, // transitionally reserved
181
182		{"DD", bcp47Region}, // deleted in ISO, deprecated in BCP 47
183		{"NT", bcp47Region}, // transitionally reserved, deprecated in BCP 47
184
185		{"XA", iso3166UserAssigned | bcp47Region},
186		{"ZZ", iso3166UserAssigned | bcp47Region},
187		{"AA", iso3166UserAssigned | bcp47Region},
188		{"QO", iso3166UserAssigned | bcp47Region},
189		{"QM", iso3166UserAssigned | bcp47Region},
190		{"XK", iso3166UserAssigned | bcp47Region},
191
192		{"CT", 0}, // deleted in ISO, not in BCP 47, canonicalized in CLDR
193	} {
194		r := MustParseRegion(tt.r)
195		if tp := r.typ(); tp != tt.t {
196			t.Errorf("Type(%s): got %x; want %x", tt.r, tp, tt.t)
197		}
198	}
199}
200
201func TestRegionISO3(t *testing.T) {
202	tests := []struct {
203		from, iso3, to string
204	}{
205		{"  ", "ZZZ", "ZZ"},
206		{"000", "ZZZ", "ZZ"},
207		{"AA", "AAA", ""},
208		{"CT", "CTE", ""},
209		{"DY", "DHY", ""},
210		{"EU", "QUU", ""},
211		{"HV", "HVO", ""},
212		{"IC", "ZZZ", "ZZ"},
213		{"JT", "JTN", ""},
214		{"PZ", "PCZ", ""},
215		{"QU", "QUU", "EU"},
216		{"QO", "QOO", ""},
217		{"YD", "YMD", ""},
218		{"FQ", "ATF", "TF"},
219		{"TF", "ATF", ""},
220		{"FX", "FXX", ""},
221		{"ZZ", "ZZZ", ""},
222		{"419", "ZZZ", "ZZ"},
223	}
224	for _, tt := range tests {
225		r, _ := getRegionID(b(tt.from))
226		if s := r.ISO3(); s != tt.iso3 {
227			t.Errorf("iso3(%q): found %q; want %q", tt.from, s, tt.iso3)
228		}
229		if tt.iso3 == "" {
230			continue
231		}
232		want := tt.to
233		if tt.to == "" {
234			want = tt.from
235		}
236		r, _ = getRegionID(b(want))
237		if id, _ := getRegionISO3(b(tt.iso3)); id != r {
238			t.Errorf("%s: found %q; want %q", tt.iso3, id, want)
239		}
240	}
241}
242
243func TestRegionM49(t *testing.T) {
244	fromTests := []struct {
245		m49 int
246		id  string
247	}{
248		{0, ""},
249		{-1, ""},
250		{1000, ""},
251		{10000, ""},
252
253		{001, "001"},
254		{104, "MM"},
255		{180, "CD"},
256		{230, "ET"},
257		{231, "ET"},
258		{249, "FX"},
259		{250, "FR"},
260		{276, "DE"},
261		{278, "DD"},
262		{280, "DE"},
263		{419, "419"},
264		{626, "TL"},
265		{736, "SD"},
266		{840, "US"},
267		{854, "BF"},
268		{891, "CS"},
269		{899, ""},
270		{958, "AA"},
271		{966, "QT"},
272		{967, "EU"},
273		{999, "ZZ"},
274	}
275	for _, tt := range fromTests {
276		id, err := getRegionM49(tt.m49)
277		if want, have := err != nil, tt.id == ""; want != have {
278			t.Errorf("error(%d): have %v; want %v", tt.m49, have, want)
279			continue
280		}
281		r, _ := getRegionID(b(tt.id))
282		if r != id {
283			t.Errorf("region(%d): have %s; want %s", tt.m49, id, r)
284		}
285	}
286
287	toTests := []struct {
288		m49 int
289		id  string
290	}{
291		{0, "000"},
292		{0, "IC"}, // Some codes don't have an ID
293
294		{001, "001"},
295		{104, "MM"},
296		{104, "BU"},
297		{180, "CD"},
298		{180, "ZR"},
299		{231, "ET"},
300		{250, "FR"},
301		{249, "FX"},
302		{276, "DE"},
303		{278, "DD"},
304		{419, "419"},
305		{626, "TL"},
306		{626, "TP"},
307		{729, "SD"},
308		{826, "GB"},
309		{840, "US"},
310		{854, "BF"},
311		{891, "YU"},
312		{891, "CS"},
313		{958, "AA"},
314		{966, "QT"},
315		{967, "EU"},
316		{967, "QU"},
317		{999, "ZZ"},
318		// For codes that don't have an M49 code use the replacement value,
319		// if available.
320		{854, "HV"}, // maps to Burkino Faso
321	}
322	for _, tt := range toTests {
323		r, _ := getRegionID(b(tt.id))
324		if r.M49() != tt.m49 {
325			t.Errorf("m49(%q): have %d; want %d", tt.id, r.M49(), tt.m49)
326		}
327	}
328}
329
330func TestRegionDeprecation(t *testing.T) {
331	tests := []struct{ in, out string }{
332		{"BU", "MM"},
333		{"BUR", "MM"},
334		{"CT", "KI"},
335		{"DD", "DE"},
336		{"DDR", "DE"},
337		{"DY", "BJ"},
338		{"FX", "FR"},
339		{"HV", "BF"},
340		{"JT", "UM"},
341		{"MI", "UM"},
342		{"NH", "VU"},
343		{"NQ", "AQ"},
344		{"PU", "UM"},
345		{"PZ", "PA"},
346		{"QU", "EU"},
347		{"RH", "ZW"},
348		{"TP", "TL"},
349		{"UK", "GB"},
350		{"VD", "VN"},
351		{"WK", "UM"},
352		{"YD", "YE"},
353		{"NL", "NL"},
354	}
355	for _, tt := range tests {
356		rIn, _ := getRegionID([]byte(tt.in))
357		rOut, _ := getRegionISO2([]byte(tt.out))
358		r := normRegion(rIn)
359		if rOut == rIn && r != 0 {
360			t.Errorf("%s: was %q; want %q", tt.in, r, tt.in)
361		}
362		if rOut != rIn && r != rOut {
363			t.Errorf("%s: was %q; want %q", tt.in, r, tt.out)
364		}
365
366	}
367}
368
369func TestGetScriptID(t *testing.T) {
370	idx := tag.Index("0000BbbbDdddEeeeZzzz\xff\xff\xff\xff")
371	tests := []struct {
372		in  string
373		out Script
374	}{
375		{"    ", 0},
376		{"      ", 0},
377		{"  ", 0},
378		{"", 0},
379		{"Aaaa", 0},
380		{"Bbbb", 1},
381		{"Dddd", 2},
382		{"dddd", 2},
383		{"dDDD", 2},
384		{"Eeee", 3},
385		{"Zzzz", 4},
386	}
387	for i, tt := range tests {
388		if id, err := getScriptID(idx, b(tt.in)); id != tt.out {
389			t.Errorf("%d:%s: found %d; want %d", i, tt.in, id, tt.out)
390		} else if id == 0 && err == nil {
391			t.Errorf("%d:%s: no error; expected one", i, tt.in)
392		}
393	}
394}
395
396func TestIsPrivateUse(t *testing.T) {
397	type test struct {
398		s       string
399		private bool
400	}
401	tests := []test{
402		{"en", false},
403		{"und", false},
404		{"pzn", false},
405		{"qaa", true},
406		{"qtz", true},
407		{"qua", false},
408	}
409	for i, tt := range tests {
410		x, _ := getLangID([]byte(tt.s))
411		if b := x.IsPrivateUse(); b != tt.private {
412			t.Errorf("%d: langID.IsPrivateUse(%s) was %v; want %v", i, tt.s, b, tt.private)
413		}
414	}
415	tests = []test{
416		{"001", false},
417		{"419", false},
418		{"899", false},
419		{"900", false},
420		{"957", false},
421		{"958", true},
422		{"AA", true},
423		{"AC", false},
424		{"EU", false}, // CLDR grouping, exceptionally reserved in ISO.
425		{"QU", true},  // Canonicalizes to EU, User-assigned in ISO.
426		{"QO", true},  // CLDR grouping, User-assigned in ISO.
427		{"QA", false},
428		{"QM", true},
429		{"QZ", true},
430		{"XA", true},
431		{"XK", true}, // Assigned to Kosovo in CLDR, User-assigned in ISO.
432		{"XZ", true},
433		{"ZW", false},
434		{"ZZ", true},
435	}
436	for i, tt := range tests {
437		x, _ := getRegionID([]byte(tt.s))
438		if b := x.IsPrivateUse(); b != tt.private {
439			t.Errorf("%d: regionID.IsPrivateUse(%s) was %v; want %v", i, tt.s, b, tt.private)
440		}
441	}
442	tests = []test{
443		{"Latn", false},
444		{"Laaa", false}, // invalid
445		{"Qaaa", true},
446		{"Qabx", true},
447		{"Qaby", false},
448		{"Zyyy", false},
449		{"Zzzz", false},
450	}
451	for i, tt := range tests {
452		x, _ := getScriptID(script, []byte(tt.s))
453		if b := x.IsPrivateUse(); b != tt.private {
454			t.Errorf("%d: scriptID.IsPrivateUse(%s) was %v; want %v", i, tt.s, b, tt.private)
455		}
456	}
457}
458