1// Copyright 2014 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5//go:build ignore 6// +build ignore 7 8// Generator for display name tables. 9 10package main 11 12import ( 13 "bytes" 14 "flag" 15 "fmt" 16 "log" 17 "reflect" 18 "sort" 19 "strings" 20 21 "golang.org/x/text/internal/gen" 22 "golang.org/x/text/language" 23 "golang.org/x/text/unicode/cldr" 24) 25 26var ( 27 test = flag.Bool("test", false, 28 "test existing tables; can be used to compare web data with package data.") 29 outputFile = flag.String("output", "tables.go", "output file") 30 31 stats = flag.Bool("stats", false, "prints statistics to stderr") 32 33 short = flag.Bool("short", false, `Use "short" alternatives, when available.`) 34 draft = flag.String("draft", 35 "contributed", 36 `Minimal draft requirements (approved, contributed, provisional, unconfirmed).`) 37 pkg = flag.String("package", 38 "display", 39 "the name of the package in which the generated file is to be included") 40 41 tags = newTagSet("tags", 42 []language.Tag{}, 43 "space-separated list of tags to include or empty for all") 44 dict = newTagSet("dict", 45 dictTags(), 46 "space-separated list or tags for which to include a Dictionary. "+ 47 `"" means the common list from go.text/language.`) 48) 49 50func dictTags() (tag []language.Tag) { 51 // TODO: replace with language.Common.Tags() once supported. 52 const str = "af am ar ar-001 az bg bn ca cs da de el en en-US en-GB " + 53 "es es-ES es-419 et fa fi fil fr fr-CA gu he hi hr hu hy id is it ja " + 54 "ka kk km kn ko ky lo lt lv mk ml mn mr ms my ne nl no pa pl pt pt-BR " + 55 "pt-PT ro ru si sk sl sq sr sr-Latn sv sw ta te th tr uk ur uz vi " + 56 "zh zh-Hans zh-Hant zu" 57 58 for _, s := range strings.Split(str, " ") { 59 tag = append(tag, language.MustParse(s)) 60 } 61 return tag 62} 63 64func main() { 65 gen.Init() 66 67 // Read the CLDR zip file. 68 r := gen.OpenCLDRCoreZip() 69 defer r.Close() 70 71 d := &cldr.Decoder{} 72 d.SetDirFilter("main", "supplemental") 73 d.SetSectionFilter("localeDisplayNames") 74 data, err := d.DecodeZip(r) 75 if err != nil { 76 log.Fatalf("DecodeZip: %v", err) 77 } 78 79 w := gen.NewCodeWriter() 80 defer w.WriteGoFile(*outputFile, "display") 81 82 gen.WriteCLDRVersion(w) 83 84 b := builder{ 85 w: w, 86 data: data, 87 group: make(map[string]*group), 88 } 89 b.generate() 90} 91 92const tagForm = language.All 93 94// tagSet is used to parse command line flags of tags. It implements the 95// flag.Value interface. 96type tagSet map[language.Tag]bool 97 98func newTagSet(name string, tags []language.Tag, usage string) tagSet { 99 f := tagSet(make(map[language.Tag]bool)) 100 for _, t := range tags { 101 f[t] = true 102 } 103 flag.Var(f, name, usage) 104 return f 105} 106 107// String implements the String method of the flag.Value interface. 108func (f tagSet) String() string { 109 tags := []string{} 110 for t := range f { 111 tags = append(tags, t.String()) 112 } 113 sort.Strings(tags) 114 return strings.Join(tags, " ") 115} 116 117// Set implements Set from the flag.Value interface. 118func (f tagSet) Set(s string) error { 119 if s != "" { 120 for _, s := range strings.Split(s, " ") { 121 if s != "" { 122 tag, err := tagForm.Parse(s) 123 if err != nil { 124 return err 125 } 126 f[tag] = true 127 } 128 } 129 } 130 return nil 131} 132 133func (f tagSet) contains(t language.Tag) bool { 134 if len(f) == 0 { 135 return true 136 } 137 return f[t] 138} 139 140// builder is used to create all tables with display name information. 141type builder struct { 142 w *gen.CodeWriter 143 144 data *cldr.CLDR 145 146 fromLocs []string 147 148 // destination tags for the current locale. 149 toTags []string 150 toTagIndex map[string]int 151 152 // list of supported tags 153 supported []language.Tag 154 155 // key-value pairs per group 156 group map[string]*group 157 158 // statistics 159 sizeIndex int // total size of all indexes of headers 160 sizeData int // total size of all data of headers 161 totalSize int 162} 163 164type group struct { 165 // Maps from a given language to the Namer data for this language. 166 lang map[language.Tag]keyValues 167 headers []header 168 169 toTags []string 170 threeStart int 171 fourPlusStart int 172} 173 174// set sets the typ to the name for locale loc. 175func (g *group) set(t language.Tag, typ, name string) { 176 kv := g.lang[t] 177 if kv == nil { 178 kv = make(keyValues) 179 g.lang[t] = kv 180 } 181 if kv[typ] == "" { 182 kv[typ] = name 183 } 184} 185 186type keyValues map[string]string 187 188type header struct { 189 tag language.Tag 190 data string 191 index []uint16 192} 193 194var versionInfo = `// Version is deprecated. Use CLDRVersion. 195const Version = %#v 196 197` 198 199var self = language.MustParse("mul") 200 201// generate builds and writes all tables. 202func (b *builder) generate() { 203 fmt.Fprintf(b.w, versionInfo, cldr.Version) 204 205 b.filter() 206 b.setData("lang", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) { 207 if ldn.Languages != nil { 208 for _, v := range ldn.Languages.Language { 209 lang := v.Type 210 if lang == "root" { 211 // We prefer the data from "und" 212 // TODO: allow both the data for root and und somehow. 213 continue 214 } 215 tag := tagForm.MustParse(lang) 216 if tags.contains(tag) { 217 g.set(loc, tag.String(), v.Data()) 218 } 219 } 220 } 221 }) 222 b.setData("script", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) { 223 if ldn.Scripts != nil { 224 for _, v := range ldn.Scripts.Script { 225 code := language.MustParseScript(v.Type) 226 if code.IsPrivateUse() { // Qaaa..Qabx 227 // TODO: data currently appears to be very meager. 228 // Reconsider if we have data for English. 229 if loc == language.English { 230 log.Fatal("Consider including data for private use scripts.") 231 } 232 continue 233 } 234 g.set(loc, code.String(), v.Data()) 235 } 236 } 237 }) 238 b.setData("region", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) { 239 if ldn.Territories != nil { 240 for _, v := range ldn.Territories.Territory { 241 g.set(loc, language.MustParseRegion(v.Type).String(), v.Data()) 242 } 243 } 244 }) 245 246 b.makeSupported() 247 248 b.writeParents() 249 250 b.writeGroup("lang") 251 b.writeGroup("script") 252 b.writeGroup("region") 253 254 b.w.WriteConst("numSupported", len(b.supported)) 255 buf := bytes.Buffer{} 256 for _, tag := range b.supported { 257 fmt.Fprint(&buf, tag.String(), "|") 258 } 259 b.w.WriteConst("supported", buf.String()) 260 261 b.writeDictionaries() 262 263 b.supported = []language.Tag{self} 264 265 // Compute the names of locales in their own language. Some of these names 266 // may be specified in their parent locales. We iterate the maximum depth 267 // of the parent three times to match successive parents of tags until a 268 // possible match is found. 269 for i := 0; i < 4; i++ { 270 b.setData("self", func(g *group, tag language.Tag, ldn *cldr.LocaleDisplayNames) { 271 parent := tag 272 if b, s, r := tag.Raw(); i > 0 && (s != language.Script{} && r == language.Region{}) { 273 parent, _ = language.Raw.Compose(b) 274 } 275 if ldn.Languages != nil { 276 for _, v := range ldn.Languages.Language { 277 key := tagForm.MustParse(v.Type) 278 saved := key 279 if key == parent { 280 g.set(self, tag.String(), v.Data()) 281 } 282 for k := 0; k < i; k++ { 283 key = key.Parent() 284 } 285 if key == tag { 286 g.set(self, saved.String(), v.Data()) // set does not overwrite a value. 287 } 288 } 289 } 290 }) 291 } 292 293 b.writeGroup("self") 294} 295 296func (b *builder) setData(name string, f func(*group, language.Tag, *cldr.LocaleDisplayNames)) { 297 b.sizeIndex = 0 298 b.sizeData = 0 299 b.toTags = nil 300 b.fromLocs = nil 301 b.toTagIndex = make(map[string]int) 302 303 g := b.group[name] 304 if g == nil { 305 g = &group{lang: make(map[language.Tag]keyValues)} 306 b.group[name] = g 307 } 308 for _, loc := range b.data.Locales() { 309 // We use RawLDML instead of LDML as we are managing our own inheritance 310 // in this implementation. 311 ldml := b.data.RawLDML(loc) 312 313 // We do not support the POSIX variant (it is not a supported BCP 47 314 // variant). This locale also doesn't happen to contain any data, so 315 // we'll skip it by checking for this. 316 tag, err := tagForm.Parse(loc) 317 if err != nil { 318 if ldml.LocaleDisplayNames != nil { 319 log.Fatalf("setData: %v", err) 320 } 321 continue 322 } 323 if ldml.LocaleDisplayNames != nil && tags.contains(tag) { 324 f(g, tag, ldml.LocaleDisplayNames) 325 } 326 } 327} 328 329func (b *builder) filter() { 330 filter := func(s *cldr.Slice) { 331 if *short { 332 s.SelectOnePerGroup("alt", []string{"short", ""}) 333 } else { 334 s.SelectOnePerGroup("alt", []string{"stand-alone", ""}) 335 } 336 d, err := cldr.ParseDraft(*draft) 337 if err != nil { 338 log.Fatalf("filter: %v", err) 339 } 340 s.SelectDraft(d) 341 } 342 for _, loc := range b.data.Locales() { 343 if ldn := b.data.RawLDML(loc).LocaleDisplayNames; ldn != nil { 344 if ldn.Languages != nil { 345 s := cldr.MakeSlice(&ldn.Languages.Language) 346 if filter(&s); len(ldn.Languages.Language) == 0 { 347 ldn.Languages = nil 348 } 349 } 350 if ldn.Scripts != nil { 351 s := cldr.MakeSlice(&ldn.Scripts.Script) 352 if filter(&s); len(ldn.Scripts.Script) == 0 { 353 ldn.Scripts = nil 354 } 355 } 356 if ldn.Territories != nil { 357 s := cldr.MakeSlice(&ldn.Territories.Territory) 358 if filter(&s); len(ldn.Territories.Territory) == 0 { 359 ldn.Territories = nil 360 } 361 } 362 } 363 } 364} 365 366// makeSupported creates a list of all supported locales. 367func (b *builder) makeSupported() { 368 // tags across groups 369 for _, g := range b.group { 370 for t, _ := range g.lang { 371 b.supported = append(b.supported, t) 372 } 373 } 374 b.supported = b.supported[:unique(tagsSorter(b.supported))] 375 376} 377 378type tagsSorter []language.Tag 379 380func (a tagsSorter) Len() int { return len(a) } 381func (a tagsSorter) Swap(i, j int) { a[i], a[j] = a[j], a[i] } 382func (a tagsSorter) Less(i, j int) bool { return a[i].String() < a[j].String() } 383 384func (b *builder) writeGroup(name string) { 385 g := b.group[name] 386 387 for _, kv := range g.lang { 388 for t, _ := range kv { 389 g.toTags = append(g.toTags, t) 390 } 391 } 392 g.toTags = g.toTags[:unique(tagsBySize(g.toTags))] 393 394 // Allocate header per supported value. 395 g.headers = make([]header, len(b.supported)) 396 for i, sup := range b.supported { 397 kv, ok := g.lang[sup] 398 if !ok { 399 g.headers[i].tag = sup 400 continue 401 } 402 data := []byte{} 403 index := make([]uint16, len(g.toTags), len(g.toTags)+1) 404 for j, t := range g.toTags { 405 index[j] = uint16(len(data)) 406 data = append(data, kv[t]...) 407 } 408 index = append(index, uint16(len(data))) 409 410 // Trim the tail of the index. 411 // TODO: indexes can be reduced in size quite a bit more. 412 n := len(index) 413 for ; n >= 2 && index[n-2] == index[n-1]; n-- { 414 } 415 index = index[:n] 416 417 // Workaround for a bug in CLDR 26. 418 // See https://unicode.org/cldr/trac/ticket/8042. 419 if cldr.Version == "26" && sup.String() == "hsb" { 420 data = bytes.Replace(data, []byte{'"'}, nil, 1) 421 } 422 g.headers[i] = header{sup, string(data), index} 423 } 424 g.writeTable(b.w, name) 425} 426 427type tagsBySize []string 428 429func (l tagsBySize) Len() int { return len(l) } 430func (l tagsBySize) Swap(i, j int) { l[i], l[j] = l[j], l[i] } 431func (l tagsBySize) Less(i, j int) bool { 432 a, b := l[i], l[j] 433 // Sort single-tag entries based on size first. Otherwise alphabetic. 434 if len(a) != len(b) && (len(a) <= 4 || len(b) <= 4) { 435 return len(a) < len(b) 436 } 437 return a < b 438} 439 440// parentIndices returns slice a of len(tags) where tags[a[i]] is the parent 441// of tags[i]. 442func parentIndices(tags []language.Tag) []int16 { 443 index := make(map[language.Tag]int16) 444 for i, t := range tags { 445 index[t] = int16(i) 446 } 447 448 // Construct default parents. 449 parents := make([]int16, len(tags)) 450 for i, t := range tags { 451 parents[i] = -1 452 for t = t.Parent(); t != language.Und; t = t.Parent() { 453 if j, ok := index[t]; ok { 454 parents[i] = j 455 break 456 } 457 } 458 } 459 return parents 460} 461 462func (b *builder) writeParents() { 463 parents := parentIndices(b.supported) 464 fmt.Fprintf(b.w, "var parents = ") 465 b.w.WriteArray(parents) 466} 467 468// writeKeys writes keys to a special index used by the display package. 469// tags are assumed to be sorted by length. 470func writeKeys(w *gen.CodeWriter, name string, keys []string) { 471 w.Size += int(3 * reflect.TypeOf("").Size()) 472 w.WriteComment("Number of keys: %d", len(keys)) 473 fmt.Fprintf(w, "var (\n\t%sIndex = tagIndex{\n", name) 474 for i := 2; i <= 4; i++ { 475 sub := []string{} 476 for _, t := range keys { 477 if len(t) != i { 478 break 479 } 480 sub = append(sub, t) 481 } 482 s := strings.Join(sub, "") 483 w.WriteString(s) 484 fmt.Fprintf(w, ",\n") 485 keys = keys[len(sub):] 486 } 487 fmt.Fprintln(w, "\t}") 488 if len(keys) > 0 { 489 w.Size += int(reflect.TypeOf([]string{}).Size()) 490 fmt.Fprintf(w, "\t%sTagsLong = ", name) 491 w.WriteSlice(keys) 492 } 493 fmt.Fprintln(w, ")\n") 494} 495 496// identifier creates an identifier from the given tag. 497func identifier(t language.Tag) string { 498 return strings.Replace(t.String(), "-", "", -1) 499} 500 501func (h *header) writeEntry(w *gen.CodeWriter, name string) { 502 if len(dict) > 0 && dict.contains(h.tag) { 503 fmt.Fprintf(w, "\t{ // %s\n", h.tag) 504 fmt.Fprintf(w, "\t\t%[1]s%[2]sStr,\n\t\t%[1]s%[2]sIdx,\n", identifier(h.tag), name) 505 fmt.Fprintln(w, "\t},") 506 } else if len(h.data) == 0 { 507 fmt.Fprintln(w, "\t\t{}, //", h.tag) 508 } else { 509 fmt.Fprintf(w, "\t{ // %s\n", h.tag) 510 w.WriteString(h.data) 511 fmt.Fprintln(w, ",") 512 w.WriteSlice(h.index) 513 fmt.Fprintln(w, ",\n\t},") 514 } 515} 516 517// write the data for the given header as single entries. The size for this data 518// was already accounted for in writeEntry. 519func (h *header) writeSingle(w *gen.CodeWriter, name string) { 520 if len(dict) > 0 && dict.contains(h.tag) { 521 tag := identifier(h.tag) 522 w.WriteConst(tag+name+"Str", h.data) 523 524 // Note that we create a slice instead of an array. If we use an array 525 // we need to refer to it as a[:] in other tables, which will cause the 526 // array to always be included by the linker. See Issue 7651. 527 w.WriteVar(tag+name+"Idx", h.index) 528 } 529} 530 531// WriteTable writes an entry for a single Namer. 532func (g *group) writeTable(w *gen.CodeWriter, name string) { 533 start := w.Size 534 writeKeys(w, name, g.toTags) 535 w.Size += len(g.headers) * int(reflect.ValueOf(g.headers[0]).Type().Size()) 536 537 fmt.Fprintf(w, "var %sHeaders = [%d]header{\n", name, len(g.headers)) 538 539 title := strings.Title(name) 540 for _, h := range g.headers { 541 h.writeEntry(w, title) 542 } 543 fmt.Fprintln(w, "}\n") 544 545 for _, h := range g.headers { 546 h.writeSingle(w, title) 547 } 548 n := w.Size - start 549 fmt.Fprintf(w, "// Total size for %s: %d bytes (%d KB)\n\n", name, n, n/1000) 550} 551 552func (b *builder) writeDictionaries() { 553 fmt.Fprintln(b.w, "// Dictionary entries of frequent languages") 554 fmt.Fprintln(b.w, "var (") 555 parents := parentIndices(b.supported) 556 557 for i, t := range b.supported { 558 if dict.contains(t) { 559 ident := identifier(t) 560 fmt.Fprintf(b.w, "\t%s = Dictionary{ // %s\n", ident, t) 561 if p := parents[i]; p == -1 { 562 fmt.Fprintln(b.w, "\t\tnil,") 563 } else { 564 fmt.Fprintf(b.w, "\t\t&%s,\n", identifier(b.supported[p])) 565 } 566 fmt.Fprintf(b.w, "\t\theader{%[1]sLangStr, %[1]sLangIdx},\n", ident) 567 fmt.Fprintf(b.w, "\t\theader{%[1]sScriptStr, %[1]sScriptIdx},\n", ident) 568 fmt.Fprintf(b.w, "\t\theader{%[1]sRegionStr, %[1]sRegionIdx},\n", ident) 569 fmt.Fprintln(b.w, "\t}") 570 } 571 } 572 fmt.Fprintln(b.w, ")") 573 574 var s string 575 var a []uint16 576 sz := reflect.TypeOf(s).Size() 577 sz += reflect.TypeOf(a).Size() 578 sz *= 3 579 sz += reflect.TypeOf(&a).Size() 580 n := int(sz) * len(dict) 581 fmt.Fprintf(b.w, "// Total size for %d entries: %d bytes (%d KB)\n\n", len(dict), n, n/1000) 582 583 b.w.Size += n 584} 585 586// unique sorts the given lists and removes duplicate entries by swapping them 587// past position k, where k is the number of unique values. It returns k. 588func unique(a sort.Interface) int { 589 if a.Len() == 0 { 590 return 0 591 } 592 sort.Sort(a) 593 k := 1 594 for i := 1; i < a.Len(); i++ { 595 if a.Less(k-1, i) { 596 if k != i { 597 a.Swap(k, i) 598 } 599 k++ 600 } 601 } 602 return k 603} 604