1// Copyright 2017 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5//go:build ignore 6// +build ignore 7 8package main 9 10import ( 11 "encoding/xml" 12 "fmt" 13 "io" 14 "log" 15 "sort" 16 "strconv" 17 "strings" 18 19 "golang.org/x/text/encoding/internal/identifier" 20 "golang.org/x/text/internal/gen" 21) 22 23type registry struct { 24 XMLName xml.Name `xml:"registry"` 25 Updated string `xml:"updated"` 26 Registry []struct { 27 ID string `xml:"id,attr"` 28 Record []struct { 29 Name string `xml:"name"` 30 Xref []struct { 31 Type string `xml:"type,attr"` 32 Data string `xml:"data,attr"` 33 } `xml:"xref"` 34 Desc struct { 35 Data string `xml:",innerxml"` 36 } `xml:"description,"` 37 MIB string `xml:"value"` 38 Alias []string `xml:"alias"` 39 MIME string `xml:"preferred_alias"` 40 } `xml:"record"` 41 } `xml:"registry"` 42} 43 44func main() { 45 r := gen.OpenIANAFile("assignments/character-sets/character-sets.xml") 46 reg := ®istry{} 47 if err := xml.NewDecoder(r).Decode(®); err != nil && err != io.EOF { 48 log.Fatalf("Error decoding charset registry: %v", err) 49 } 50 if len(reg.Registry) == 0 || reg.Registry[0].ID != "character-sets-1" { 51 log.Fatalf("Unexpected ID %s", reg.Registry[0].ID) 52 } 53 54 x := &indexInfo{} 55 56 for _, rec := range reg.Registry[0].Record { 57 mib := identifier.MIB(parseInt(rec.MIB)) 58 x.addEntry(mib, rec.Name) 59 for _, a := range rec.Alias { 60 a = strings.Split(a, " ")[0] // strip comments. 61 x.addAlias(a, mib) 62 // MIB name aliases are prefixed with a "cs" (character set) in the 63 // registry to identify them as display names and to ensure that 64 // the name starts with a lowercase letter in case it is used as 65 // an identifier. We remove it to be left with a nice clean name. 66 if strings.HasPrefix(a, "cs") { 67 x.setName(2, a[2:]) 68 } 69 } 70 if rec.MIME != "" { 71 x.addAlias(rec.MIME, mib) 72 x.setName(1, rec.MIME) 73 } 74 } 75 76 w := gen.NewCodeWriter() 77 78 fmt.Fprintln(w, `import "golang.org/x/text/encoding/internal/identifier"`) 79 80 writeIndex(w, x) 81 82 w.WriteGoFile("tables.go", "ianaindex") 83} 84 85type alias struct { 86 name string 87 mib identifier.MIB 88} 89 90type indexInfo struct { 91 // compacted index from code to MIB 92 codeToMIB []identifier.MIB 93 alias []alias 94 names [][3]string 95} 96 97func (ii *indexInfo) Len() int { 98 return len(ii.codeToMIB) 99} 100 101func (ii *indexInfo) Less(a, b int) bool { 102 return ii.codeToMIB[a] < ii.codeToMIB[b] 103} 104 105func (ii *indexInfo) Swap(a, b int) { 106 ii.codeToMIB[a], ii.codeToMIB[b] = ii.codeToMIB[b], ii.codeToMIB[a] 107 // Co-sort the names. 108 ii.names[a], ii.names[b] = ii.names[b], ii.names[a] 109} 110 111func (ii *indexInfo) setName(i int, name string) { 112 ii.names[len(ii.names)-1][i] = name 113} 114 115func (ii *indexInfo) addEntry(mib identifier.MIB, name string) { 116 ii.names = append(ii.names, [3]string{name, name, name}) 117 ii.addAlias(name, mib) 118 ii.codeToMIB = append(ii.codeToMIB, mib) 119} 120 121func (ii *indexInfo) addAlias(name string, mib identifier.MIB) { 122 // Don't add duplicates for the same mib. Adding duplicate aliases for 123 // different MIBs will cause the compiler to barf on an invalid map: great!. 124 for i := len(ii.alias) - 1; i >= 0 && ii.alias[i].mib == mib; i-- { 125 if ii.alias[i].name == name { 126 return 127 } 128 } 129 ii.alias = append(ii.alias, alias{name, mib}) 130 lower := strings.ToLower(name) 131 if lower != name { 132 ii.addAlias(lower, mib) 133 } 134} 135 136const maxMIMENameLen = '0' - 1 // officially 40, but we leave some buffer. 137 138func writeIndex(w *gen.CodeWriter, x *indexInfo) { 139 sort.Stable(x) 140 141 // Write constants. 142 fmt.Fprintln(w, "const (") 143 for i, m := range x.codeToMIB { 144 if i == 0 { 145 fmt.Fprintf(w, "enc%d = iota\n", m) 146 } else { 147 fmt.Fprintf(w, "enc%d\n", m) 148 } 149 } 150 fmt.Fprintln(w, "numIANA") 151 fmt.Fprintln(w, ")") 152 153 w.WriteVar("ianaToMIB", x.codeToMIB) 154 155 var ianaNames, mibNames []string 156 for _, names := range x.names { 157 n := names[0] 158 if names[0] != names[1] { 159 // MIME names are mostly identical to IANA names. We share the 160 // tables by setting the first byte of the string to an index into 161 // the string itself (< maxMIMENameLen) to the IANA name. The MIME 162 // name immediately follows the index. 163 x := len(names[1]) + 1 164 if x > maxMIMENameLen { 165 log.Fatalf("MIME name length (%d) > %d", x, maxMIMENameLen) 166 } 167 n = string(x) + names[1] + names[0] 168 } 169 ianaNames = append(ianaNames, n) 170 mibNames = append(mibNames, names[2]) 171 } 172 173 w.WriteVar("ianaNames", ianaNames) 174 w.WriteVar("mibNames", mibNames) 175 176 w.WriteComment(` 177 TODO: Instead of using a map, we could use binary search strings doing 178 on-the fly lower-casing per character. This allows to always avoid 179 allocation and will be considerably more compact.`) 180 fmt.Fprintln(w, "var ianaAliases = map[string]int{") 181 for _, a := range x.alias { 182 fmt.Fprintf(w, "%q: enc%d,\n", a.name, a.mib) 183 } 184 fmt.Fprintln(w, "}") 185} 186 187func parseInt(s string) int { 188 x, err := strconv.ParseInt(s, 10, 64) 189 if err != nil { 190 log.Fatalf("Could not parse integer: %v", err) 191 } 192 return int(x) 193} 194