1// Copyright 2013 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package language
6
7import (
8	"bytes"
9	"flag"
10	"fmt"
11	"os"
12	"path"
13	"path/filepath"
14	"strings"
15	"testing"
16
17	"golang.org/x/text/internal/testtext"
18	"golang.org/x/text/internal/ucd"
19)
20
21var verbose = flag.Bool("verbose", false, "set to true to print the internal tables of matchers")
22
23func TestCompliance(t *testing.T) {
24	filepath.Walk("testdata", func(file string, info os.FileInfo, err error) error {
25		if info.IsDir() {
26			return nil
27		}
28		r, err := os.Open(file)
29		if err != nil {
30			t.Fatal(err)
31		}
32		ucd.Parse(r, func(p *ucd.Parser) {
33			name := strings.Replace(path.Join(p.String(0), p.String(1)), " ", "", -1)
34			if skip[name] {
35				return
36			}
37			t.Run(info.Name()+"/"+name, func(t *testing.T) {
38				supported := makeTagList(p.String(0))
39				desired := makeTagList(p.String(1))
40				gotCombined, index, conf := NewMatcher(supported).Match(desired...)
41
42				gotMatch := supported[index]
43				wantMatch := Raw.Make(p.String(2)) // wantMatch may be null
44				if gotMatch != wantMatch {
45					t.Fatalf("match: got %q; want %q (%v)", gotMatch, wantMatch, conf)
46				}
47				if tag := strings.TrimSpace(p.String(3)); tag != "" {
48					wantCombined := Raw.MustParse(tag)
49					if err == nil && gotCombined != wantCombined {
50						t.Errorf("combined: got %q; want %q (%v)", gotCombined, wantCombined, conf)
51					}
52				}
53			})
54		})
55		return nil
56	})
57}
58
59var skip = map[string]bool{
60	// TODO: bugs
61	// Honor the wildcard match. This may only be useful to select non-exact
62	// stuff.
63	"mul,af/nl": true, // match: got "af"; want "mul"
64
65	// TODO: include other extensions.
66	// combined: got "en-GB-u-ca-buddhist-nu-arab"; want "en-GB-fonipa-t-m0-iso-i0-pinyin-u-ca-buddhist-nu-arab"
67	"und,en-GB-u-sd-gbsct/en-fonipa-u-nu-Arab-ca-buddhist-t-m0-iso-i0-pinyin": true,
68
69	// Inconsistencies with Mark Davis' implementation where it is not clear
70	// which is better.
71
72	// Inconsistencies in combined. I think the Go approach is more appropriate.
73	// We could use -u-rg- as alternative.
74	"und,fr/fr-BE-fonipa":              true, // combined: got "fr"; want "fr-BE-fonipa"
75	"und,fr-CA/fr-BE-fonipa":           true, // combined: got "fr-CA"; want "fr-BE-fonipa"
76	"und,fr-fonupa/fr-BE-fonipa":       true, // combined: got "fr-fonupa"; want "fr-BE-fonipa"
77	"und,no/nn-BE-fonipa":              true, // combined: got "no"; want "no-BE-fonipa"
78	"50,und,fr-CA-fonupa/fr-BE-fonipa": true, // combined: got "fr-CA-fonupa"; want "fr-BE-fonipa"
79
80	// The initial number is a threshold. As we don't use scoring, we will not
81	// implement this.
82	"50,und,fr-Cyrl-CA-fonupa/fr-BE-fonipa": true,
83	// match: got "und"; want "fr-Cyrl-CA-fonupa"
84	// combined: got "und"; want "fr-Cyrl-BE-fonipa"
85
86	// Other interesting cases to test:
87	// - Should same language or same script have the preference if there is
88	//   usually no understanding of the other script?
89	// - More specific region in desired may replace enclosing supported.
90}
91
92func makeTagList(s string) (tags []Tag) {
93	for _, s := range strings.Split(s, ",") {
94		tags = append(tags, mk(strings.TrimSpace(s)))
95	}
96	return tags
97}
98
99func TestMatchStrings(t *testing.T) {
100	testCases := []struct {
101		supported string
102		desired   string // strings separted by |
103		tag       string
104		index     int
105	}{{
106		supported: "en",
107		desired:   "",
108		tag:       "en",
109		index:     0,
110	}, {
111		supported: "en",
112		desired:   "nl",
113		tag:       "en",
114		index:     0,
115	}, {
116		supported: "en,nl",
117		desired:   "nl",
118		tag:       "nl",
119		index:     1,
120	}, {
121		supported: "en,nl",
122		desired:   "nl|en",
123		tag:       "nl",
124		index:     1,
125	}, {
126		supported: "en-GB,nl",
127		desired:   "en ; q=0.1,nl",
128		tag:       "nl",
129		index:     1,
130	}, {
131		supported: "en-GB,nl",
132		desired:   "en;q=0.005 | dk; q=0.1,nl ",
133		tag:       "en-GB",
134		index:     0,
135	}, {
136		// do not match faulty tags with und
137		supported: "en,und",
138		desired:   "|en",
139		tag:       "en",
140		index:     0,
141	}}
142	for _, tc := range testCases {
143		t.Run(path.Join(tc.supported, tc.desired), func(t *testing.T) {
144			m := NewMatcher(makeTagList(tc.supported))
145			tag, index := MatchStrings(m, strings.Split(tc.desired, "|")...)
146			if tag.String() != tc.tag || index != tc.index {
147				t.Errorf("got %v, %d; want %v, %d", tag, index, tc.tag, tc.index)
148			}
149		})
150	}
151}
152
153func TestRegionGroups(t *testing.T) {
154	testCases := []struct {
155		a, b     string
156		distance uint8
157	}{
158		{"zh-TW", "zh-HK", 5},
159		{"zh-MO", "zh-HK", 4},
160		{"es-ES", "es-AR", 5},
161		{"es-ES", "es", 4},
162		{"es-419", "es-MX", 4},
163		{"es-AR", "es-MX", 4},
164		{"es-ES", "es-MX", 5},
165		{"es-PT", "es-MX", 5},
166	}
167	for _, tc := range testCases {
168		a := MustParse(tc.a)
169		aScript, _ := a.Script()
170		b := MustParse(tc.b)
171		bScript, _ := b.Script()
172
173		if aScript != bScript {
174			t.Errorf("scripts differ: %q vs %q", aScript, bScript)
175			continue
176		}
177		d, _ := regionGroupDist(a.region(), b.region(), aScript.scriptID, a.lang())
178		if d != tc.distance {
179			t.Errorf("got %q; want %q", d, tc.distance)
180		}
181	}
182}
183
184func TestIsParadigmLocale(t *testing.T) {
185	testCases := map[string]bool{
186		"en-US":  true,
187		"en-GB":  true,
188		"en-VI":  false,
189		"es-GB":  false,
190		"es-ES":  true,
191		"es-419": true,
192	}
193	for str, want := range testCases {
194		tt := Make(str)
195		tag := tt.tag()
196		got := isParadigmLocale(tag.LangID, tag.RegionID)
197		if got != want {
198			t.Errorf("isPL(%q) = %v; want %v", str, got, want)
199		}
200	}
201}
202
203// Implementation of String methods for various types for debugging purposes.
204
205func (m *matcher) String() string {
206	w := &bytes.Buffer{}
207	fmt.Fprintln(w, "Default:", m.default_)
208	for tag, h := range m.index {
209		fmt.Fprintf(w, "  %s: %v\n", tag, h)
210	}
211	return w.String()
212}
213
214func (h *matchHeader) String() string {
215	w := &bytes.Buffer{}
216	fmt.Fprint(w, "haveTag: ")
217	for _, h := range h.haveTags {
218		fmt.Fprintf(w, "%v, ", h)
219	}
220	return w.String()
221}
222
223func (t haveTag) String() string {
224	return fmt.Sprintf("%v:%d:%v:%v-%v|%v", t.tag, t.index, t.conf, t.maxRegion, t.maxScript, t.altScript)
225}
226
227func TestIssue43834(t *testing.T) {
228	matcher := NewMatcher([]Tag{English})
229
230	// ZZ is the largest region code and should not cause overflow.
231	desired, _, err := ParseAcceptLanguage("en-ZZ")
232	if err != nil {
233		t.Error(err)
234	}
235	_, i, _ := matcher.Match(desired...)
236	if i != 0 {
237		t.Errorf("got %v; want 0", i)
238	}
239}
240
241func TestBestMatchAlloc(t *testing.T) {
242	m := NewMatcher(makeTagList("en sr nl"))
243	// Go allocates when creating a list of tags from a single tag!
244	list := []Tag{English}
245	avg := testtext.AllocsPerRun(1, func() {
246		m.Match(list...)
247	})
248	if avg > 0 {
249		t.Errorf("got %f; want 0", avg)
250	}
251}
252
253var benchHave = []Tag{
254	mk("en"),
255	mk("en-GB"),
256	mk("za"),
257	mk("zh-Hant"),
258	mk("zh-Hans-CN"),
259	mk("zh"),
260	mk("zh-HK"),
261	mk("ar-MK"),
262	mk("en-CA"),
263	mk("fr-CA"),
264	mk("fr-US"),
265	mk("fr-CH"),
266	mk("fr"),
267	mk("lt"),
268	mk("lv"),
269	mk("iw"),
270	mk("iw-NL"),
271	mk("he"),
272	mk("he-IT"),
273	mk("tlh"),
274	mk("ja"),
275	mk("ja-Jpan"),
276	mk("ja-Jpan-JP"),
277	mk("de"),
278	mk("de-CH"),
279	mk("de-AT"),
280	mk("de-DE"),
281	mk("sr"),
282	mk("sr-Latn"),
283	mk("sr-Cyrl"),
284	mk("sr-ME"),
285}
286
287var benchWant = [][]Tag{
288	[]Tag{
289		mk("en"),
290	},
291	[]Tag{
292		mk("en-AU"),
293		mk("de-HK"),
294		mk("nl"),
295		mk("fy"),
296		mk("lv"),
297	},
298	[]Tag{
299		mk("en-AU"),
300		mk("de-HK"),
301		mk("nl"),
302		mk("fy"),
303	},
304	[]Tag{
305		mk("ja-Hant"),
306		mk("da-HK"),
307		mk("nl"),
308		mk("zh-TW"),
309	},
310	[]Tag{
311		mk("ja-Hant"),
312		mk("da-HK"),
313		mk("nl"),
314		mk("hr"),
315	},
316}
317
318func BenchmarkMatch(b *testing.B) {
319	m := newMatcher(benchHave, nil)
320	for i := 0; i < b.N; i++ {
321		for _, want := range benchWant {
322			m.getBest(want...)
323		}
324	}
325}
326
327func BenchmarkMatchExact(b *testing.B) {
328	want := mk("en")
329	m := newMatcher(benchHave, nil)
330	for i := 0; i < b.N; i++ {
331		m.getBest(want)
332	}
333}
334
335func BenchmarkMatchAltLanguagePresent(b *testing.B) {
336	want := mk("hr")
337	m := newMatcher(benchHave, nil)
338	for i := 0; i < b.N; i++ {
339		m.getBest(want)
340	}
341}
342
343func BenchmarkMatchAltLanguageNotPresent(b *testing.B) {
344	want := mk("nn")
345	m := newMatcher(benchHave, nil)
346	for i := 0; i < b.N; i++ {
347		m.getBest(want)
348	}
349}
350
351func BenchmarkMatchAltScriptPresent(b *testing.B) {
352	want := mk("zh-Hant-CN")
353	m := newMatcher(benchHave, nil)
354	for i := 0; i < b.N; i++ {
355		m.getBest(want)
356	}
357}
358
359func BenchmarkMatchAltScriptNotPresent(b *testing.B) {
360	want := mk("fr-Cyrl")
361	m := newMatcher(benchHave, nil)
362	for i := 0; i < b.N; i++ {
363		m.getBest(want)
364	}
365}
366
367func BenchmarkMatchLimitedExact(b *testing.B) {
368	want := []Tag{mk("he-NL"), mk("iw-NL")}
369	m := newMatcher(benchHave, nil)
370	for i := 0; i < b.N; i++ {
371		m.getBest(want...)
372	}
373}
374