1// Copyright 2013 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package language 6 7import ( 8 "bytes" 9 "flag" 10 "fmt" 11 "os" 12 "path" 13 "path/filepath" 14 "strings" 15 "testing" 16 17 "golang.org/x/text/internal/testtext" 18 "golang.org/x/text/internal/ucd" 19) 20 21var verbose = flag.Bool("verbose", false, "set to true to print the internal tables of matchers") 22 23func TestCompliance(t *testing.T) { 24 filepath.Walk("testdata", func(file string, info os.FileInfo, err error) error { 25 if info.IsDir() { 26 return nil 27 } 28 r, err := os.Open(file) 29 if err != nil { 30 t.Fatal(err) 31 } 32 ucd.Parse(r, func(p *ucd.Parser) { 33 name := strings.Replace(path.Join(p.String(0), p.String(1)), " ", "", -1) 34 if skip[name] { 35 return 36 } 37 t.Run(info.Name()+"/"+name, func(t *testing.T) { 38 supported := makeTagList(p.String(0)) 39 desired := makeTagList(p.String(1)) 40 gotCombined, index, conf := NewMatcher(supported).Match(desired...) 41 42 gotMatch := supported[index] 43 wantMatch := Raw.Make(p.String(2)) // wantMatch may be null 44 if gotMatch != wantMatch { 45 t.Fatalf("match: got %q; want %q (%v)", gotMatch, wantMatch, conf) 46 } 47 if tag := strings.TrimSpace(p.String(3)); tag != "" { 48 wantCombined := Raw.MustParse(tag) 49 if err == nil && gotCombined != wantCombined { 50 t.Errorf("combined: got %q; want %q (%v)", gotCombined, wantCombined, conf) 51 } 52 } 53 }) 54 }) 55 return nil 56 }) 57} 58 59var skip = map[string]bool{ 60 // TODO: bugs 61 // Honor the wildcard match. This may only be useful to select non-exact 62 // stuff. 63 "mul,af/nl": true, // match: got "af"; want "mul" 64 65 // TODO: include other extensions. 66 // combined: got "en-GB-u-ca-buddhist-nu-arab"; want "en-GB-fonipa-t-m0-iso-i0-pinyin-u-ca-buddhist-nu-arab" 67 "und,en-GB-u-sd-gbsct/en-fonipa-u-nu-Arab-ca-buddhist-t-m0-iso-i0-pinyin": true, 68 69 // Inconsistencies with Mark Davis' implementation where it is not clear 70 // which is better. 71 72 // Inconsistencies in combined. I think the Go approach is more appropriate. 73 // We could use -u-rg- as alternative. 74 "und,fr/fr-BE-fonipa": true, // combined: got "fr"; want "fr-BE-fonipa" 75 "und,fr-CA/fr-BE-fonipa": true, // combined: got "fr-CA"; want "fr-BE-fonipa" 76 "und,fr-fonupa/fr-BE-fonipa": true, // combined: got "fr-fonupa"; want "fr-BE-fonipa" 77 "und,no/nn-BE-fonipa": true, // combined: got "no"; want "no-BE-fonipa" 78 "50,und,fr-CA-fonupa/fr-BE-fonipa": true, // combined: got "fr-CA-fonupa"; want "fr-BE-fonipa" 79 80 // The initial number is a threshold. As we don't use scoring, we will not 81 // implement this. 82 "50,und,fr-Cyrl-CA-fonupa/fr-BE-fonipa": true, 83 // match: got "und"; want "fr-Cyrl-CA-fonupa" 84 // combined: got "und"; want "fr-Cyrl-BE-fonipa" 85 86 // Other interesting cases to test: 87 // - Should same language or same script have the preference if there is 88 // usually no understanding of the other script? 89 // - More specific region in desired may replace enclosing supported. 90} 91 92func makeTagList(s string) (tags []Tag) { 93 for _, s := range strings.Split(s, ",") { 94 tags = append(tags, mk(strings.TrimSpace(s))) 95 } 96 return tags 97} 98 99func TestMatchStrings(t *testing.T) { 100 testCases := []struct { 101 supported string 102 desired string // strings separted by | 103 tag string 104 index int 105 }{{ 106 supported: "en", 107 desired: "", 108 tag: "en", 109 index: 0, 110 }, { 111 supported: "en", 112 desired: "nl", 113 tag: "en", 114 index: 0, 115 }, { 116 supported: "en,nl", 117 desired: "nl", 118 tag: "nl", 119 index: 1, 120 }, { 121 supported: "en,nl", 122 desired: "nl|en", 123 tag: "nl", 124 index: 1, 125 }, { 126 supported: "en-GB,nl", 127 desired: "en ; q=0.1,nl", 128 tag: "nl", 129 index: 1, 130 }, { 131 supported: "en-GB,nl", 132 desired: "en;q=0.005 | dk; q=0.1,nl ", 133 tag: "en-GB", 134 index: 0, 135 }, { 136 // do not match faulty tags with und 137 supported: "en,und", 138 desired: "|en", 139 tag: "en", 140 index: 0, 141 }} 142 for _, tc := range testCases { 143 t.Run(path.Join(tc.supported, tc.desired), func(t *testing.T) { 144 m := NewMatcher(makeTagList(tc.supported)) 145 tag, index := MatchStrings(m, strings.Split(tc.desired, "|")...) 146 if tag.String() != tc.tag || index != tc.index { 147 t.Errorf("got %v, %d; want %v, %d", tag, index, tc.tag, tc.index) 148 } 149 }) 150 } 151} 152 153func TestRegionGroups(t *testing.T) { 154 testCases := []struct { 155 a, b string 156 distance uint8 157 }{ 158 {"zh-TW", "zh-HK", 5}, 159 {"zh-MO", "zh-HK", 4}, 160 {"es-ES", "es-AR", 5}, 161 {"es-ES", "es", 4}, 162 {"es-419", "es-MX", 4}, 163 {"es-AR", "es-MX", 4}, 164 {"es-ES", "es-MX", 5}, 165 {"es-PT", "es-MX", 5}, 166 } 167 for _, tc := range testCases { 168 a := MustParse(tc.a) 169 aScript, _ := a.Script() 170 b := MustParse(tc.b) 171 bScript, _ := b.Script() 172 173 if aScript != bScript { 174 t.Errorf("scripts differ: %q vs %q", aScript, bScript) 175 continue 176 } 177 d, _ := regionGroupDist(a.region(), b.region(), aScript.scriptID, a.lang()) 178 if d != tc.distance { 179 t.Errorf("got %q; want %q", d, tc.distance) 180 } 181 } 182} 183 184func TestIsParadigmLocale(t *testing.T) { 185 testCases := map[string]bool{ 186 "en-US": true, 187 "en-GB": true, 188 "en-VI": false, 189 "es-GB": false, 190 "es-ES": true, 191 "es-419": true, 192 } 193 for str, want := range testCases { 194 tt := Make(str) 195 tag := tt.tag() 196 got := isParadigmLocale(tag.LangID, tag.RegionID) 197 if got != want { 198 t.Errorf("isPL(%q) = %v; want %v", str, got, want) 199 } 200 } 201} 202 203// Implementation of String methods for various types for debugging purposes. 204 205func (m *matcher) String() string { 206 w := &bytes.Buffer{} 207 fmt.Fprintln(w, "Default:", m.default_) 208 for tag, h := range m.index { 209 fmt.Fprintf(w, " %s: %v\n", tag, h) 210 } 211 return w.String() 212} 213 214func (h *matchHeader) String() string { 215 w := &bytes.Buffer{} 216 fmt.Fprint(w, "haveTag: ") 217 for _, h := range h.haveTags { 218 fmt.Fprintf(w, "%v, ", h) 219 } 220 return w.String() 221} 222 223func (t haveTag) String() string { 224 return fmt.Sprintf("%v:%d:%v:%v-%v|%v", t.tag, t.index, t.conf, t.maxRegion, t.maxScript, t.altScript) 225} 226 227func TestIssue43834(t *testing.T) { 228 matcher := NewMatcher([]Tag{English}) 229 230 // ZZ is the largest region code and should not cause overflow. 231 desired, _, err := ParseAcceptLanguage("en-ZZ") 232 if err != nil { 233 t.Error(err) 234 } 235 _, i, _ := matcher.Match(desired...) 236 if i != 0 { 237 t.Errorf("got %v; want 0", i) 238 } 239} 240 241func TestBestMatchAlloc(t *testing.T) { 242 m := NewMatcher(makeTagList("en sr nl")) 243 // Go allocates when creating a list of tags from a single tag! 244 list := []Tag{English} 245 avg := testtext.AllocsPerRun(1, func() { 246 m.Match(list...) 247 }) 248 if avg > 0 { 249 t.Errorf("got %f; want 0", avg) 250 } 251} 252 253var benchHave = []Tag{ 254 mk("en"), 255 mk("en-GB"), 256 mk("za"), 257 mk("zh-Hant"), 258 mk("zh-Hans-CN"), 259 mk("zh"), 260 mk("zh-HK"), 261 mk("ar-MK"), 262 mk("en-CA"), 263 mk("fr-CA"), 264 mk("fr-US"), 265 mk("fr-CH"), 266 mk("fr"), 267 mk("lt"), 268 mk("lv"), 269 mk("iw"), 270 mk("iw-NL"), 271 mk("he"), 272 mk("he-IT"), 273 mk("tlh"), 274 mk("ja"), 275 mk("ja-Jpan"), 276 mk("ja-Jpan-JP"), 277 mk("de"), 278 mk("de-CH"), 279 mk("de-AT"), 280 mk("de-DE"), 281 mk("sr"), 282 mk("sr-Latn"), 283 mk("sr-Cyrl"), 284 mk("sr-ME"), 285} 286 287var benchWant = [][]Tag{ 288 []Tag{ 289 mk("en"), 290 }, 291 []Tag{ 292 mk("en-AU"), 293 mk("de-HK"), 294 mk("nl"), 295 mk("fy"), 296 mk("lv"), 297 }, 298 []Tag{ 299 mk("en-AU"), 300 mk("de-HK"), 301 mk("nl"), 302 mk("fy"), 303 }, 304 []Tag{ 305 mk("ja-Hant"), 306 mk("da-HK"), 307 mk("nl"), 308 mk("zh-TW"), 309 }, 310 []Tag{ 311 mk("ja-Hant"), 312 mk("da-HK"), 313 mk("nl"), 314 mk("hr"), 315 }, 316} 317 318func BenchmarkMatch(b *testing.B) { 319 m := newMatcher(benchHave, nil) 320 for i := 0; i < b.N; i++ { 321 for _, want := range benchWant { 322 m.getBest(want...) 323 } 324 } 325} 326 327func BenchmarkMatchExact(b *testing.B) { 328 want := mk("en") 329 m := newMatcher(benchHave, nil) 330 for i := 0; i < b.N; i++ { 331 m.getBest(want) 332 } 333} 334 335func BenchmarkMatchAltLanguagePresent(b *testing.B) { 336 want := mk("hr") 337 m := newMatcher(benchHave, nil) 338 for i := 0; i < b.N; i++ { 339 m.getBest(want) 340 } 341} 342 343func BenchmarkMatchAltLanguageNotPresent(b *testing.B) { 344 want := mk("nn") 345 m := newMatcher(benchHave, nil) 346 for i := 0; i < b.N; i++ { 347 m.getBest(want) 348 } 349} 350 351func BenchmarkMatchAltScriptPresent(b *testing.B) { 352 want := mk("zh-Hant-CN") 353 m := newMatcher(benchHave, nil) 354 for i := 0; i < b.N; i++ { 355 m.getBest(want) 356 } 357} 358 359func BenchmarkMatchAltScriptNotPresent(b *testing.B) { 360 want := mk("fr-Cyrl") 361 m := newMatcher(benchHave, nil) 362 for i := 0; i < b.N; i++ { 363 m.getBest(want) 364 } 365} 366 367func BenchmarkMatchLimitedExact(b *testing.B) { 368 want := []Tag{mk("he-NL"), mk("iw-NL")} 369 m := newMatcher(benchHave, nil) 370 for i := 0; i < b.N; i++ { 371 m.getBest(want...) 372 } 373} 374