1// Copyright 2013 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package language 6 7import ( 8 "bytes" 9 "flag" 10 "fmt" 11 "os" 12 "path" 13 "path/filepath" 14 "strings" 15 "testing" 16 17 "golang.org/x/text/internal/testtext" 18 "golang.org/x/text/internal/ucd" 19) 20 21var verbose = flag.Bool("verbose", false, "set to true to print the internal tables of matchers") 22 23func TestCompliance(t *testing.T) { 24 filepath.Walk("testdata", func(file string, info os.FileInfo, err error) error { 25 if info.IsDir() { 26 return nil 27 } 28 r, err := os.Open(file) 29 if err != nil { 30 t.Fatal(err) 31 } 32 ucd.Parse(r, func(p *ucd.Parser) { 33 name := strings.Replace(path.Join(p.String(0), p.String(1)), " ", "", -1) 34 if skip[name] { 35 return 36 } 37 t.Run(info.Name()+"/"+name, func(t *testing.T) { 38 supported := makeTagList(p.String(0)) 39 desired := makeTagList(p.String(1)) 40 gotCombined, index, conf := NewMatcher(supported).Match(desired...) 41 42 gotMatch := supported[index] 43 wantMatch := Raw.Make(p.String(2)) // wantMatch may be null 44 if gotMatch != wantMatch { 45 t.Fatalf("match: got %q; want %q (%v)", gotMatch, wantMatch, conf) 46 } 47 if tag := strings.TrimSpace(p.String(3)); tag != "" { 48 wantCombined := Raw.MustParse(tag) 49 if err == nil && gotCombined != wantCombined { 50 t.Errorf("combined: got %q; want %q (%v)", gotCombined, wantCombined, conf) 51 } 52 } 53 }) 54 }) 55 return nil 56 }) 57} 58 59var skip = map[string]bool{ 60 // TODO: bugs 61 // Honor the wildcard match. This may only be useful to select non-exact 62 // stuff. 63 "mul,af/nl": true, // match: got "af"; want "mul" 64 65 // TODO: include other extensions. 66 // combined: got "en-GB-u-ca-buddhist-nu-arab"; want "en-GB-fonipa-t-m0-iso-i0-pinyin-u-ca-buddhist-nu-arab" 67 "und,en-GB-u-sd-gbsct/en-fonipa-u-nu-Arab-ca-buddhist-t-m0-iso-i0-pinyin": true, 68 69 // Inconsistencies with Mark Davis' implementation where it is not clear 70 // which is better. 71 72 // Inconsistencies in combined. I think the Go approach is more appropriate. 73 // We could use -u-rg- as alternative. 74 "und,fr/fr-BE-fonipa": true, // combined: got "fr"; want "fr-BE-fonipa" 75 "und,fr-CA/fr-BE-fonipa": true, // combined: got "fr-CA"; want "fr-BE-fonipa" 76 "und,fr-fonupa/fr-BE-fonipa": true, // combined: got "fr-fonupa"; want "fr-BE-fonipa" 77 "und,no/nn-BE-fonipa": true, // combined: got "no"; want "no-BE-fonipa" 78 "50,und,fr-CA-fonupa/fr-BE-fonipa": true, // combined: got "fr-CA-fonupa"; want "fr-BE-fonipa" 79 80 // The initial number is a threshold. As we don't use scoring, we will not 81 // implement this. 82 "50,und,fr-Cyrl-CA-fonupa/fr-BE-fonipa": true, 83 // match: got "und"; want "fr-Cyrl-CA-fonupa" 84 // combined: got "und"; want "fr-Cyrl-BE-fonipa" 85 86 // Other interesting cases to test: 87 // - Should same language or same script have the preference if there is 88 // usually no understanding of the other script? 89 // - More specific region in desired may replace enclosing supported. 90} 91 92func makeTagList(s string) (tags []Tag) { 93 for _, s := range strings.Split(s, ",") { 94 tags = append(tags, mk(strings.TrimSpace(s))) 95 } 96 return tags 97} 98 99func TestMatchStrings(t *testing.T) { 100 testCases := []struct { 101 supported string 102 desired string // strings separted by | 103 tag string 104 index int 105 }{{ 106 supported: "en", 107 desired: "", 108 tag: "en", 109 index: 0, 110 }, { 111 supported: "en", 112 desired: "nl", 113 tag: "en", 114 index: 0, 115 }, { 116 supported: "en,nl", 117 desired: "nl", 118 tag: "nl", 119 index: 1, 120 }, { 121 supported: "en,nl", 122 desired: "nl|en", 123 tag: "nl", 124 index: 1, 125 }, { 126 supported: "en-GB,nl", 127 desired: "en ; q=0.1,nl", 128 tag: "nl", 129 index: 1, 130 }, { 131 supported: "en-GB,nl", 132 desired: "en;q=0.005 | dk; q=0.1,nl ", 133 tag: "en-GB", 134 index: 0, 135 }, { 136 // do not match faulty tags with und 137 supported: "en,und", 138 desired: "|en", 139 tag: "en", 140 index: 0, 141 }} 142 for _, tc := range testCases { 143 t.Run(path.Join(tc.supported, tc.desired), func(t *testing.T) { 144 m := NewMatcher(makeTagList(tc.supported)) 145 tag, index := MatchStrings(m, strings.Split(tc.desired, "|")...) 146 if tag.String() != tc.tag || index != tc.index { 147 t.Errorf("got %v, %d; want %v, %d", tag, index, tc.tag, tc.index) 148 } 149 }) 150 } 151} 152 153func TestRegionGroups(t *testing.T) { 154 testCases := []struct { 155 a, b string 156 distance uint8 157 }{ 158 {"zh-TW", "zh-HK", 5}, 159 {"zh-MO", "zh-HK", 4}, 160 {"es-ES", "es-AR", 5}, 161 {"es-ES", "es", 4}, 162 {"es-419", "es-MX", 4}, 163 {"es-AR", "es-MX", 4}, 164 {"es-ES", "es-MX", 5}, 165 {"es-PT", "es-MX", 5}, 166 } 167 for _, tc := range testCases { 168 a := MustParse(tc.a) 169 aScript, _ := a.Script() 170 b := MustParse(tc.b) 171 bScript, _ := b.Script() 172 173 if aScript != bScript { 174 t.Errorf("scripts differ: %q vs %q", aScript, bScript) 175 continue 176 } 177 d, _ := regionGroupDist(a.region(), b.region(), aScript.scriptID, a.lang()) 178 if d != tc.distance { 179 t.Errorf("got %q; want %q", d, tc.distance) 180 } 181 } 182} 183 184func TestIsParadigmLocale(t *testing.T) { 185 testCases := map[string]bool{ 186 "en-US": true, 187 "en-GB": true, 188 "en-VI": false, 189 "es-GB": false, 190 "es-ES": true, 191 "es-419": true, 192 } 193 for str, want := range testCases { 194 tt := Make(str) 195 tag := tt.tag() 196 got := isParadigmLocale(tag.LangID, tag.RegionID) 197 if got != want { 198 t.Errorf("isPL(%q) = %v; want %v", str, got, want) 199 } 200 } 201} 202 203// Implementation of String methods for various types for debugging purposes. 204 205func (m *matcher) String() string { 206 w := &bytes.Buffer{} 207 fmt.Fprintln(w, "Default:", m.default_) 208 for tag, h := range m.index { 209 fmt.Fprintf(w, " %s: %v\n", tag, h) 210 } 211 return w.String() 212} 213 214func (h *matchHeader) String() string { 215 w := &bytes.Buffer{} 216 fmt.Fprint(w, "haveTag: ") 217 for _, h := range h.haveTags { 218 fmt.Fprintf(w, "%v, ", h) 219 } 220 return w.String() 221} 222 223func (t haveTag) String() string { 224 return fmt.Sprintf("%v:%d:%v:%v-%v|%v", t.tag, t.index, t.conf, t.maxRegion, t.maxScript, t.altScript) 225} 226 227func TestBestMatchAlloc(t *testing.T) { 228 m := NewMatcher(makeTagList("en sr nl")) 229 // Go allocates when creating a list of tags from a single tag! 230 list := []Tag{English} 231 avg := testtext.AllocsPerRun(1, func() { 232 m.Match(list...) 233 }) 234 if avg > 0 { 235 t.Errorf("got %f; want 0", avg) 236 } 237} 238 239var benchHave = []Tag{ 240 mk("en"), 241 mk("en-GB"), 242 mk("za"), 243 mk("zh-Hant"), 244 mk("zh-Hans-CN"), 245 mk("zh"), 246 mk("zh-HK"), 247 mk("ar-MK"), 248 mk("en-CA"), 249 mk("fr-CA"), 250 mk("fr-US"), 251 mk("fr-CH"), 252 mk("fr"), 253 mk("lt"), 254 mk("lv"), 255 mk("iw"), 256 mk("iw-NL"), 257 mk("he"), 258 mk("he-IT"), 259 mk("tlh"), 260 mk("ja"), 261 mk("ja-Jpan"), 262 mk("ja-Jpan-JP"), 263 mk("de"), 264 mk("de-CH"), 265 mk("de-AT"), 266 mk("de-DE"), 267 mk("sr"), 268 mk("sr-Latn"), 269 mk("sr-Cyrl"), 270 mk("sr-ME"), 271} 272 273var benchWant = [][]Tag{ 274 []Tag{ 275 mk("en"), 276 }, 277 []Tag{ 278 mk("en-AU"), 279 mk("de-HK"), 280 mk("nl"), 281 mk("fy"), 282 mk("lv"), 283 }, 284 []Tag{ 285 mk("en-AU"), 286 mk("de-HK"), 287 mk("nl"), 288 mk("fy"), 289 }, 290 []Tag{ 291 mk("ja-Hant"), 292 mk("da-HK"), 293 mk("nl"), 294 mk("zh-TW"), 295 }, 296 []Tag{ 297 mk("ja-Hant"), 298 mk("da-HK"), 299 mk("nl"), 300 mk("hr"), 301 }, 302} 303 304func BenchmarkMatch(b *testing.B) { 305 m := newMatcher(benchHave, nil) 306 for i := 0; i < b.N; i++ { 307 for _, want := range benchWant { 308 m.getBest(want...) 309 } 310 } 311} 312 313func BenchmarkMatchExact(b *testing.B) { 314 want := mk("en") 315 m := newMatcher(benchHave, nil) 316 for i := 0; i < b.N; i++ { 317 m.getBest(want) 318 } 319} 320 321func BenchmarkMatchAltLanguagePresent(b *testing.B) { 322 want := mk("hr") 323 m := newMatcher(benchHave, nil) 324 for i := 0; i < b.N; i++ { 325 m.getBest(want) 326 } 327} 328 329func BenchmarkMatchAltLanguageNotPresent(b *testing.B) { 330 want := mk("nn") 331 m := newMatcher(benchHave, nil) 332 for i := 0; i < b.N; i++ { 333 m.getBest(want) 334 } 335} 336 337func BenchmarkMatchAltScriptPresent(b *testing.B) { 338 want := mk("zh-Hant-CN") 339 m := newMatcher(benchHave, nil) 340 for i := 0; i < b.N; i++ { 341 m.getBest(want) 342 } 343} 344 345func BenchmarkMatchAltScriptNotPresent(b *testing.B) { 346 want := mk("fr-Cyrl") 347 m := newMatcher(benchHave, nil) 348 for i := 0; i < b.N; i++ { 349 m.getBest(want) 350 } 351} 352 353func BenchmarkMatchLimitedExact(b *testing.B) { 354 want := []Tag{mk("he-NL"), mk("iw-NL")} 355 m := newMatcher(benchHave, nil) 356 for i := 0; i < b.N; i++ { 357 m.getBest(want...) 358 } 359} 360