1// Copyright 2016 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// +build ignore
6
7package main
8
9import (
10	"flag"
11	"fmt"
12	"log"
13	"reflect"
14	"strings"
15	"unicode/utf8"
16
17	"golang.org/x/text/internal"
18	"golang.org/x/text/internal/gen"
19	"golang.org/x/text/internal/number"
20	"golang.org/x/text/internal/stringset"
21	"golang.org/x/text/language"
22	"golang.org/x/text/unicode/cldr"
23)
24
25var (
26	test = flag.Bool("test", false,
27		"test existing tables; can be used to compare web data with package data.")
28	outputFile     = flag.String("output", "tables.go", "output file")
29	outputTestFile = flag.String("testoutput", "data_test.go", "output file")
30
31	draft = flag.String("draft",
32		"contributed",
33		`Minimal draft requirements (approved, contributed, provisional, unconfirmed).`)
34)
35
36func main() {
37	gen.Init()
38
39	const pkg = "number"
40
41	gen.Repackage("gen_common.go", "common.go", pkg)
42	// Read the CLDR zip file.
43	r := gen.OpenCLDRCoreZip()
44	defer r.Close()
45
46	d := &cldr.Decoder{}
47	d.SetDirFilter("supplemental", "main")
48	d.SetSectionFilter("numbers", "numberingSystem")
49	data, err := d.DecodeZip(r)
50	if err != nil {
51		log.Fatalf("DecodeZip: %v", err)
52	}
53
54	w := gen.NewCodeWriter()
55	defer w.WriteGoFile(*outputFile, pkg)
56
57	fmt.Fprintln(w, `import "golang.org/x/text/internal/stringset"`)
58
59	gen.WriteCLDRVersion(w)
60
61	genNumSystem(w, data)
62	genSymbols(w, data)
63	genFormats(w, data)
64}
65
66var systemMap = map[string]system{"latn": 0}
67
68func getNumberSystem(str string) system {
69	ns, ok := systemMap[str]
70	if !ok {
71		log.Fatalf("No index for numbering system %q", str)
72	}
73	return ns
74}
75
76func genNumSystem(w *gen.CodeWriter, data *cldr.CLDR) {
77	numSysData := []systemData{
78		{digitSize: 1, zero: [4]byte{'0'}},
79	}
80
81	for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem {
82		if len(ns.Digits) == 0 {
83			continue
84		}
85		switch ns.Id {
86		case "latn":
87			// hard-wired
88			continue
89		case "hanidec":
90			// non-consecutive digits: treat as "algorithmic"
91			continue
92		}
93
94		zero, sz := utf8.DecodeRuneInString(ns.Digits)
95		if ns.Digits[sz-1]+9 > 0xBF { // 1011 1111: highest continuation byte
96			log.Fatalf("Last byte of zero value overflows for %s", ns.Id)
97		}
98
99		i := rune(0)
100		for _, r := range ns.Digits {
101			// Verify that we can do simple math on the UTF-8 byte sequence
102			// of zero to get the digit.
103			if zero+i != r {
104				// Runes not consecutive.
105				log.Fatalf("Digit %d of %s (%U) is not offset correctly from zero value", i, ns.Id, r)
106			}
107			i++
108		}
109		var x [utf8.UTFMax]byte
110		utf8.EncodeRune(x[:], zero)
111		id := system(len(numSysData))
112		systemMap[ns.Id] = id
113		numSysData = append(numSysData, systemData{
114			id:        id,
115			digitSize: byte(sz),
116			zero:      x,
117		})
118	}
119	w.WriteVar("numSysData", numSysData)
120
121	algoID := system(len(numSysData))
122	fmt.Fprintln(w, "const (")
123	for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem {
124		id, ok := systemMap[ns.Id]
125		if !ok {
126			id = algoID
127			systemMap[ns.Id] = id
128			algoID++
129		}
130		fmt.Fprintf(w, "num%s = %#x\n", strings.Title(ns.Id), id)
131	}
132	fmt.Fprintln(w, "numNumberSystems")
133	fmt.Fprintln(w, ")")
134
135	fmt.Fprintln(w, "var systemMap = map[string]system{")
136	for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem {
137		fmt.Fprintf(w, "%q: num%s,\n", ns.Id, strings.Title(ns.Id))
138		w.Size += len(ns.Id) + 16 + 1 // very coarse approximation
139	}
140	fmt.Fprintln(w, "}")
141}
142
143func genSymbols(w *gen.CodeWriter, data *cldr.CLDR) {
144	d, err := cldr.ParseDraft(*draft)
145	if err != nil {
146		log.Fatalf("invalid draft level: %v", err)
147	}
148
149	nNumberSystems := system(len(systemMap))
150
151	type symbols [NumSymbolTypes]string
152
153	type key struct {
154		tag    int // from language.CompactIndex
155		system system
156	}
157	symbolMap := map[key]*symbols{}
158
159	defaults := map[int]system{}
160
161	for _, lang := range data.Locales() {
162		ldml := data.RawLDML(lang)
163		if ldml.Numbers == nil {
164			continue
165		}
166		langIndex, ok := language.CompactIndex(language.MustParse(lang))
167		if !ok {
168			log.Fatalf("No compact index for language %s", lang)
169		}
170		if d := ldml.Numbers.DefaultNumberingSystem; len(d) > 0 {
171			defaults[langIndex] = getNumberSystem(d[0].Data())
172		}
173
174		syms := cldr.MakeSlice(&ldml.Numbers.Symbols)
175		syms.SelectDraft(d)
176
177		getFirst := func(name string, x interface{}) string {
178			v := reflect.ValueOf(x)
179			slice := cldr.MakeSlice(x)
180			slice.SelectAnyOf("alt", "", "alt")
181			if reflect.Indirect(v).Len() == 0 {
182				return ""
183			} else if reflect.Indirect(v).Len() > 1 {
184				log.Fatalf("%s: multiple values of %q within single symbol not supported.", lang, name)
185			}
186			return reflect.Indirect(v).Index(0).MethodByName("Data").Call(nil)[0].String()
187		}
188
189		for _, sym := range ldml.Numbers.Symbols {
190			if sym.NumberSystem == "" {
191				// This is just linking the default of root to "latn".
192				continue
193			}
194			symbolMap[key{langIndex, getNumberSystem(sym.NumberSystem)}] = &symbols{
195				SymDecimal:                getFirst("decimal", &sym.Decimal),
196				SymGroup:                  getFirst("group", &sym.Group),
197				SymList:                   getFirst("list", &sym.List),
198				SymPercentSign:            getFirst("percentSign", &sym.PercentSign),
199				SymPlusSign:               getFirst("plusSign", &sym.PlusSign),
200				SymMinusSign:              getFirst("minusSign", &sym.MinusSign),
201				SymExponential:            getFirst("exponential", &sym.Exponential),
202				SymSuperscriptingExponent: getFirst("superscriptingExponent", &sym.SuperscriptingExponent),
203				SymPerMille:               getFirst("perMille", &sym.PerMille),
204				SymInfinity:               getFirst("infinity", &sym.Infinity),
205				SymNan:                    getFirst("nan", &sym.Nan),
206				SymTimeSeparator:          getFirst("timeSeparator", &sym.TimeSeparator),
207			}
208		}
209	}
210
211	// Expand all values.
212	for k, syms := range symbolMap {
213		for t := SymDecimal; t < NumSymbolTypes; t++ {
214			p := k.tag
215			for syms[t] == "" {
216				p = int(internal.Parent[p])
217				if pSyms, ok := symbolMap[key{p, k.system}]; ok && (*pSyms)[t] != "" {
218					syms[t] = (*pSyms)[t]
219					break
220				}
221				if p == 0 /* und */ {
222					// Default to root, latn.
223					syms[t] = (*symbolMap[key{}])[t]
224				}
225			}
226		}
227	}
228
229	// Unique the symbol sets and write the string data.
230	m := map[symbols]int{}
231	sb := stringset.NewBuilder()
232
233	symIndex := [][NumSymbolTypes]byte{}
234
235	for ns := system(0); ns < nNumberSystems; ns++ {
236		for _, l := range data.Locales() {
237			langIndex, _ := language.CompactIndex(language.MustParse(l))
238			s := symbolMap[key{langIndex, ns}]
239			if s == nil {
240				continue
241			}
242			if _, ok := m[*s]; !ok {
243				m[*s] = len(symIndex)
244				sb.Add(s[:]...)
245				var x [NumSymbolTypes]byte
246				for i := SymDecimal; i < NumSymbolTypes; i++ {
247					x[i] = byte(sb.Index((*s)[i]))
248				}
249				symIndex = append(symIndex, x)
250			}
251		}
252	}
253	w.WriteVar("symIndex", symIndex)
254	w.WriteVar("symData", sb.Set())
255
256	// resolveSymbolIndex gets the index from the closest matching locale,
257	// including the locale itself.
258	resolveSymbolIndex := func(langIndex int, ns system) byte {
259		for {
260			if sym := symbolMap[key{langIndex, ns}]; sym != nil {
261				return byte(m[*sym])
262			}
263			if langIndex == 0 {
264				return 0 // und, latn
265			}
266			langIndex = int(internal.Parent[langIndex])
267		}
268	}
269
270	// Create an index with the symbols for each locale for the latn numbering
271	// system. If this is not the default, or the only one, for a locale, we
272	// will overwrite the value later.
273	var langToDefaults [language.NumCompactTags]byte
274	for _, l := range data.Locales() {
275		langIndex, _ := language.CompactIndex(language.MustParse(l))
276		langToDefaults[langIndex] = resolveSymbolIndex(langIndex, 0)
277	}
278
279	// Delete redundant entries.
280	for _, l := range data.Locales() {
281		langIndex, _ := language.CompactIndex(language.MustParse(l))
282		def := defaults[langIndex]
283		syms := symbolMap[key{langIndex, def}]
284		if syms == nil {
285			continue
286		}
287		for ns := system(0); ns < nNumberSystems; ns++ {
288			if ns == def {
289				continue
290			}
291			if altSyms, ok := symbolMap[key{langIndex, ns}]; ok && *altSyms == *syms {
292				delete(symbolMap, key{langIndex, ns})
293			}
294		}
295	}
296
297	// Create a sorted list of alternatives per language. This will only need to
298	// be referenced if a user specified an alternative numbering system.
299	var langToAlt []altSymData
300	for _, l := range data.Locales() {
301		langIndex, _ := language.CompactIndex(language.MustParse(l))
302		start := len(langToAlt)
303		if start > 0x7F {
304			log.Fatal("Number of alternative assignments > 0x7F")
305		}
306		// Create the entry for the default value.
307		def := defaults[langIndex]
308		langToAlt = append(langToAlt, altSymData{
309			compactTag: uint16(langIndex),
310			system:     def,
311			symIndex:   resolveSymbolIndex(langIndex, def),
312		})
313
314		for ns := system(0); ns < nNumberSystems; ns++ {
315			if def == ns {
316				continue
317			}
318			if sym := symbolMap[key{langIndex, ns}]; sym != nil {
319				langToAlt = append(langToAlt, altSymData{
320					compactTag: uint16(langIndex),
321					system:     ns,
322					symIndex:   resolveSymbolIndex(langIndex, ns),
323				})
324			}
325		}
326		if def == 0 && len(langToAlt) == start+1 {
327			// No additional data: erase the entry.
328			langToAlt = langToAlt[:start]
329		} else {
330			// Overwrite the entry in langToDefaults.
331			langToDefaults[langIndex] = 0x80 | byte(start)
332		}
333	}
334	w.WriteComment(`
335langToDefaults maps a compact language index to the default numbering system
336and default symbol set`)
337	w.WriteVar("langToDefaults", langToDefaults)
338
339	w.WriteComment(`
340langToAlt is a list of numbering system and symbol set pairs, sorted and
341marked by compact language index.`)
342	w.WriteVar("langToAlt", langToAlt)
343}
344
345// genFormats generates the lookup table for decimal, scientific and percent
346// patterns.
347//
348// CLDR allows for patterns to be different per language for different numbering
349// systems. In practice the patterns are set to be consistent for a language
350// independent of the numbering system. genFormats verifies that no language
351// deviates from this.
352func genFormats(w *gen.CodeWriter, data *cldr.CLDR) {
353	d, err := cldr.ParseDraft(*draft)
354	if err != nil {
355		log.Fatalf("invalid draft level: %v", err)
356	}
357
358	// Fill the first slot with a dummy so we can identify unspecified tags.
359	formats := []number.Pattern{{}}
360	patterns := map[string]int{}
361
362	// TODO: It would be possible to eliminate two of these slices by having
363	// another indirection and store a reference to the combination of patterns.
364	decimal := make([]byte, language.NumCompactTags)
365	scientific := make([]byte, language.NumCompactTags)
366	percent := make([]byte, language.NumCompactTags)
367
368	for _, lang := range data.Locales() {
369		ldml := data.RawLDML(lang)
370		if ldml.Numbers == nil {
371			continue
372		}
373		langIndex, ok := language.CompactIndex(language.MustParse(lang))
374		if !ok {
375			log.Fatalf("No compact index for language %s", lang)
376		}
377		type patternSlice []*struct {
378			cldr.Common
379			Numbers string `xml:"numbers,attr"`
380			Count   string `xml:"count,attr"`
381		}
382
383		add := func(name string, tags []byte, ps patternSlice) {
384			sl := cldr.MakeSlice(&ps)
385			sl.SelectDraft(d)
386			if len(ps) == 0 {
387				return
388			}
389			if len(ps) > 2 || len(ps) == 2 && ps[0] != ps[1] {
390				log.Fatalf("Inconsistent %d patterns for language %s", name, lang)
391			}
392			s := ps[0].Data()
393
394			index, ok := patterns[s]
395			if !ok {
396				nf, err := number.ParsePattern(s)
397				if err != nil {
398					log.Fatal(err)
399				}
400				index = len(formats)
401				patterns[s] = index
402				formats = append(formats, *nf)
403			}
404			tags[langIndex] = byte(index)
405		}
406
407		for _, df := range ldml.Numbers.DecimalFormats {
408			for _, l := range df.DecimalFormatLength {
409				if l.Type != "" {
410					continue
411				}
412				for _, f := range l.DecimalFormat {
413					add("decimal", decimal, f.Pattern)
414				}
415			}
416		}
417		for _, df := range ldml.Numbers.ScientificFormats {
418			for _, l := range df.ScientificFormatLength {
419				if l.Type != "" {
420					continue
421				}
422				for _, f := range l.ScientificFormat {
423					add("scientific", scientific, f.Pattern)
424				}
425			}
426		}
427		for _, df := range ldml.Numbers.PercentFormats {
428			for _, l := range df.PercentFormatLength {
429				if l.Type != "" {
430					continue
431				}
432				for _, f := range l.PercentFormat {
433					add("percent", percent, f.Pattern)
434				}
435			}
436		}
437	}
438
439	// Complete the parent tag array to reflect inheritance. An index of 0
440	// indicates an unspecified value.
441	for _, data := range [][]byte{decimal, scientific, percent} {
442		for i := range data {
443			p := uint16(i)
444			for ; data[p] == 0; p = internal.Parent[p] {
445			}
446			data[i] = data[p]
447		}
448	}
449	w.WriteVar("tagToDecimal", decimal)
450	w.WriteVar("tagToScientific", scientific)
451	w.WriteVar("tagToPercent", percent)
452
453	value := strings.Replace(fmt.Sprintf("%#v", formats), "number.", "", -1)
454	// Break up the lines. This won't give ideal perfect formatting, but it is
455	// better than one huge line.
456	value = strings.Replace(value, ", ", ",\n", -1)
457	fmt.Fprintf(w, "var formats = %s\n", value)
458}
459