1// Copyright 2017 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:build ignore
6// +build ignore
7
8package main
9
10import (
11	"encoding/xml"
12	"fmt"
13	"io"
14	"log"
15	"sort"
16	"strconv"
17	"strings"
18
19	"golang.org/x/text/encoding/internal/identifier"
20	"golang.org/x/text/internal/gen"
21)
22
23type registry struct {
24	XMLName  xml.Name `xml:"registry"`
25	Updated  string   `xml:"updated"`
26	Registry []struct {
27		ID     string `xml:"id,attr"`
28		Record []struct {
29			Name string `xml:"name"`
30			Xref []struct {
31				Type string `xml:"type,attr"`
32				Data string `xml:"data,attr"`
33			} `xml:"xref"`
34			Desc struct {
35				Data string `xml:",innerxml"`
36			} `xml:"description,"`
37			MIB   string   `xml:"value"`
38			Alias []string `xml:"alias"`
39			MIME  string   `xml:"preferred_alias"`
40		} `xml:"record"`
41	} `xml:"registry"`
42}
43
44func main() {
45	r := gen.OpenIANAFile("assignments/character-sets/character-sets.xml")
46	reg := &registry{}
47	if err := xml.NewDecoder(r).Decode(&reg); err != nil && err != io.EOF {
48		log.Fatalf("Error decoding charset registry: %v", err)
49	}
50	if len(reg.Registry) == 0 || reg.Registry[0].ID != "character-sets-1" {
51		log.Fatalf("Unexpected ID %s", reg.Registry[0].ID)
52	}
53
54	x := &indexInfo{}
55
56	for _, rec := range reg.Registry[0].Record {
57		mib := identifier.MIB(parseInt(rec.MIB))
58		x.addEntry(mib, rec.Name)
59		for _, a := range rec.Alias {
60			a = strings.Split(a, " ")[0] // strip comments.
61			x.addAlias(a, mib)
62			// MIB name aliases are prefixed with a "cs" (character set) in the
63			// registry to identify them as display names and to ensure that
64			// the name starts with a lowercase letter in case it is used as
65			// an identifier. We remove it to be left with a nice clean name.
66			if strings.HasPrefix(a, "cs") {
67				x.setName(2, a[2:])
68			}
69		}
70		if rec.MIME != "" {
71			x.addAlias(rec.MIME, mib)
72			x.setName(1, rec.MIME)
73		}
74	}
75
76	w := gen.NewCodeWriter()
77
78	fmt.Fprintln(w, `import "golang.org/x/text/encoding/internal/identifier"`)
79
80	writeIndex(w, x)
81
82	w.WriteGoFile("tables.go", "ianaindex")
83}
84
85type alias struct {
86	name string
87	mib  identifier.MIB
88}
89
90type indexInfo struct {
91	// compacted index from code to MIB
92	codeToMIB []identifier.MIB
93	alias     []alias
94	names     [][3]string
95}
96
97func (ii *indexInfo) Len() int {
98	return len(ii.codeToMIB)
99}
100
101func (ii *indexInfo) Less(a, b int) bool {
102	return ii.codeToMIB[a] < ii.codeToMIB[b]
103}
104
105func (ii *indexInfo) Swap(a, b int) {
106	ii.codeToMIB[a], ii.codeToMIB[b] = ii.codeToMIB[b], ii.codeToMIB[a]
107	// Co-sort the names.
108	ii.names[a], ii.names[b] = ii.names[b], ii.names[a]
109}
110
111func (ii *indexInfo) setName(i int, name string) {
112	ii.names[len(ii.names)-1][i] = name
113}
114
115func (ii *indexInfo) addEntry(mib identifier.MIB, name string) {
116	ii.names = append(ii.names, [3]string{name, name, name})
117	ii.addAlias(name, mib)
118	ii.codeToMIB = append(ii.codeToMIB, mib)
119}
120
121func (ii *indexInfo) addAlias(name string, mib identifier.MIB) {
122	// Don't add duplicates for the same mib. Adding duplicate aliases for
123	// different MIBs will cause the compiler to barf on an invalid map: great!.
124	for i := len(ii.alias) - 1; i >= 0 && ii.alias[i].mib == mib; i-- {
125		if ii.alias[i].name == name {
126			return
127		}
128	}
129	ii.alias = append(ii.alias, alias{name, mib})
130	lower := strings.ToLower(name)
131	if lower != name {
132		ii.addAlias(lower, mib)
133	}
134}
135
136const maxMIMENameLen = '0' - 1 // officially 40, but we leave some buffer.
137
138func writeIndex(w *gen.CodeWriter, x *indexInfo) {
139	sort.Stable(x)
140
141	// Write constants.
142	fmt.Fprintln(w, "const (")
143	for i, m := range x.codeToMIB {
144		if i == 0 {
145			fmt.Fprintf(w, "enc%d = iota\n", m)
146		} else {
147			fmt.Fprintf(w, "enc%d\n", m)
148		}
149	}
150	fmt.Fprintln(w, "numIANA")
151	fmt.Fprintln(w, ")")
152
153	w.WriteVar("ianaToMIB", x.codeToMIB)
154
155	var ianaNames, mibNames []string
156	for _, names := range x.names {
157		n := names[0]
158		if names[0] != names[1] {
159			// MIME names are mostly identical to IANA names. We share the
160			// tables by setting the first byte of the string to an index into
161			// the string itself (< maxMIMENameLen) to the IANA name. The MIME
162			// name immediately follows the index.
163			x := len(names[1]) + 1
164			if x > maxMIMENameLen {
165				log.Fatalf("MIME name length (%d) > %d", x, maxMIMENameLen)
166			}
167			n = string(x) + names[1] + names[0]
168		}
169		ianaNames = append(ianaNames, n)
170		mibNames = append(mibNames, names[2])
171	}
172
173	w.WriteVar("ianaNames", ianaNames)
174	w.WriteVar("mibNames", mibNames)
175
176	w.WriteComment(`
177	TODO: Instead of using a map, we could use binary search strings doing
178	on-the fly lower-casing per character. This allows to always avoid
179	allocation and will be considerably more compact.`)
180	fmt.Fprintln(w, "var ianaAliases = map[string]int{")
181	for _, a := range x.alias {
182		fmt.Fprintf(w, "%q: enc%d,\n", a.name, a.mib)
183	}
184	fmt.Fprintln(w, "}")
185}
186
187func parseInt(s string) int {
188	x, err := strconv.ParseInt(s, 10, 64)
189	if err != nil {
190		log.Fatalf("Could not parse integer: %v", err)
191	}
192	return int(x)
193}
194