1// Copyright 2017 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// +build ignore
6
7package main
8
9import (
10	"encoding/xml"
11	"fmt"
12	"io"
13	"log"
14	"sort"
15	"strconv"
16	"strings"
17
18	"golang.org/x/text/encoding/internal/identifier"
19	"golang.org/x/text/internal/gen"
20)
21
22type registry struct {
23	XMLName  xml.Name `xml:"registry"`
24	Updated  string   `xml:"updated"`
25	Registry []struct {
26		ID     string `xml:"id,attr"`
27		Record []struct {
28			Name string `xml:"name"`
29			Xref []struct {
30				Type string `xml:"type,attr"`
31				Data string `xml:"data,attr"`
32			} `xml:"xref"`
33			Desc struct {
34				Data string `xml:",innerxml"`
35			} `xml:"description,"`
36			MIB   string   `xml:"value"`
37			Alias []string `xml:"alias"`
38			MIME  string   `xml:"preferred_alias"`
39		} `xml:"record"`
40	} `xml:"registry"`
41}
42
43func main() {
44	r := gen.OpenIANAFile("assignments/character-sets/character-sets.xml")
45	reg := &registry{}
46	if err := xml.NewDecoder(r).Decode(&reg); err != nil && err != io.EOF {
47		log.Fatalf("Error decoding charset registry: %v", err)
48	}
49	if len(reg.Registry) == 0 || reg.Registry[0].ID != "character-sets-1" {
50		log.Fatalf("Unexpected ID %s", reg.Registry[0].ID)
51	}
52
53	x := &indexInfo{}
54
55	for _, rec := range reg.Registry[0].Record {
56		mib := identifier.MIB(parseInt(rec.MIB))
57		x.addEntry(mib, rec.Name)
58		for _, a := range rec.Alias {
59			a = strings.Split(a, " ")[0] // strip comments.
60			x.addAlias(a, mib)
61			// MIB name aliases are prefixed with a "cs" (character set) in the
62			// registry to identify them as display names and to ensure that
63			// the name starts with a lowercase letter in case it is used as
64			// an identifier. We remove it to be left with a nice clean name.
65			if strings.HasPrefix(a, "cs") {
66				x.setName(2, a[2:])
67			}
68		}
69		if rec.MIME != "" {
70			x.addAlias(rec.MIME, mib)
71			x.setName(1, rec.MIME)
72		}
73	}
74
75	w := gen.NewCodeWriter()
76
77	fmt.Fprintln(w, `import "golang.org/x/text/encoding/internal/identifier"`)
78
79	writeIndex(w, x)
80
81	w.WriteGoFile("tables.go", "ianaindex")
82}
83
84type alias struct {
85	name string
86	mib  identifier.MIB
87}
88
89type indexInfo struct {
90	// compacted index from code to MIB
91	codeToMIB []identifier.MIB
92	alias     []alias
93	names     [][3]string
94}
95
96func (ii *indexInfo) Len() int {
97	return len(ii.codeToMIB)
98}
99
100func (ii *indexInfo) Less(a, b int) bool {
101	return ii.codeToMIB[a] < ii.codeToMIB[b]
102}
103
104func (ii *indexInfo) Swap(a, b int) {
105	ii.codeToMIB[a], ii.codeToMIB[b] = ii.codeToMIB[b], ii.codeToMIB[a]
106	// Co-sort the names.
107	ii.names[a], ii.names[b] = ii.names[b], ii.names[a]
108}
109
110func (ii *indexInfo) setName(i int, name string) {
111	ii.names[len(ii.names)-1][i] = name
112}
113
114func (ii *indexInfo) addEntry(mib identifier.MIB, name string) {
115	ii.names = append(ii.names, [3]string{name, name, name})
116	ii.addAlias(name, mib)
117	ii.codeToMIB = append(ii.codeToMIB, mib)
118}
119
120func (ii *indexInfo) addAlias(name string, mib identifier.MIB) {
121	// Don't add duplicates for the same mib. Adding duplicate aliases for
122	// different MIBs will cause the compiler to barf on an invalid map: great!.
123	for i := len(ii.alias) - 1; i >= 0 && ii.alias[i].mib == mib; i-- {
124		if ii.alias[i].name == name {
125			return
126		}
127	}
128	ii.alias = append(ii.alias, alias{name, mib})
129	lower := strings.ToLower(name)
130	if lower != name {
131		ii.addAlias(lower, mib)
132	}
133}
134
135const maxMIMENameLen = '0' - 1 // officially 40, but we leave some buffer.
136
137func writeIndex(w *gen.CodeWriter, x *indexInfo) {
138	sort.Stable(x)
139
140	// Write constants.
141	fmt.Fprintln(w, "const (")
142	for i, m := range x.codeToMIB {
143		if i == 0 {
144			fmt.Fprintf(w, "enc%d = iota\n", m)
145		} else {
146			fmt.Fprintf(w, "enc%d\n", m)
147		}
148	}
149	fmt.Fprintln(w, "numIANA")
150	fmt.Fprintln(w, ")")
151
152	w.WriteVar("ianaToMIB", x.codeToMIB)
153
154	var ianaNames, mibNames []string
155	for _, names := range x.names {
156		n := names[0]
157		if names[0] != names[1] {
158			// MIME names are mostly identical to IANA names. We share the
159			// tables by setting the first byte of the string to an index into
160			// the string itself (< maxMIMENameLen) to the IANA name. The MIME
161			// name immediately follows the index.
162			x := len(names[1]) + 1
163			if x > maxMIMENameLen {
164				log.Fatalf("MIME name length (%d) > %d", x, maxMIMENameLen)
165			}
166			n = string(x) + names[1] + names[0]
167		}
168		ianaNames = append(ianaNames, n)
169		mibNames = append(mibNames, names[2])
170	}
171
172	w.WriteVar("ianaNames", ianaNames)
173	w.WriteVar("mibNames", mibNames)
174
175	w.WriteComment(`
176	TODO: Instead of using a map, we could use binary search strings doing
177	on-the fly lower-casing per character. This allows to always avoid
178	allocation and will be considerably more compact.`)
179	fmt.Fprintln(w, "var ianaAliases = map[string]int{")
180	for _, a := range x.alias {
181		fmt.Fprintf(w, "%q: enc%d,\n", a.name, a.mib)
182	}
183	fmt.Fprintln(w, "}")
184}
185
186func parseInt(s string) int {
187	x, err := strconv.ParseInt(s, 10, 64)
188	if err != nil {
189		log.Fatalf("Could not parse integer: %v", err)
190	}
191	return int(x)
192}
193