1// Copyright 2015 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:generate go run gen.go
6
7// Package ianaindex maps names to Encodings as specified by the IANA registry.
8// This includes both the MIME and IANA names.
9//
10// See http://www.iana.org/assignments/character-sets/character-sets.xhtml for
11// more details.
12package ianaindex
13
14import (
15	"errors"
16	"sort"
17	"strings"
18
19	"golang.org/x/text/encoding"
20	"golang.org/x/text/encoding/charmap"
21	"golang.org/x/text/encoding/internal/identifier"
22	"golang.org/x/text/encoding/japanese"
23	"golang.org/x/text/encoding/korean"
24	"golang.org/x/text/encoding/simplifiedchinese"
25	"golang.org/x/text/encoding/traditionalchinese"
26	"golang.org/x/text/encoding/unicode"
27)
28
29// TODO: remove the "Status... incomplete" in the package doc comment.
30// TODO: allow users to specify their own aliases?
31// TODO: allow users to specify their own indexes?
32// TODO: allow canonicalizing names
33
34// NOTE: only use these top-level variables if we can get the linker to drop
35// the indexes when they are not used. Make them a function or perhaps only
36// support MIME otherwise.
37
38var (
39	// MIME is an index to map MIME names.
40	MIME *Index = mime
41
42	// IANA is an index that supports all names and aliases using IANA names as
43	// the canonical identifier.
44	IANA *Index = iana
45
46	// MIB is an index that associates the MIB display name with an Encoding.
47	MIB *Index = mib
48
49	mime = &Index{mimeName, ianaToMIB, ianaAliases, encodings[:]}
50	iana = &Index{ianaName, ianaToMIB, ianaAliases, encodings[:]}
51	mib  = &Index{mibName, ianaToMIB, ianaAliases, encodings[:]}
52)
53
54// Index maps names registered by IANA to Encodings.
55// Currently different Indexes only differ in the names they return for
56// encodings. In the future they may also differ in supported aliases.
57type Index struct {
58	names func(i int) string
59	toMIB []identifier.MIB // Sorted slice of supported MIBs
60	alias map[string]int
61	enc   []encoding.Encoding
62}
63
64var (
65	errInvalidName = errors.New("ianaindex: invalid encoding name")
66	errUnknown     = errors.New("ianaindex: unknown Encoding")
67	errUnsupported = errors.New("ianaindex: unsupported Encoding")
68)
69
70// Encoding returns an Encoding for IANA-registered names. Matching is
71// case-insensitive.
72//
73// If the provided name doesn't match a IANA-registered charset, an error is
74// returned. If the name matches a IANA-registered charset but isn't supported,
75// a nil encoding and a nil error are returned.
76func (x *Index) Encoding(name string) (encoding.Encoding, error) {
77	name = strings.TrimSpace(name)
78	// First try without lowercasing (possibly creating an allocation).
79	i, ok := x.alias[name]
80	if !ok {
81		i, ok = x.alias[strings.ToLower(name)]
82		if !ok {
83			return nil, errInvalidName
84		}
85	}
86	return x.enc[i], nil
87}
88
89// Name reports the canonical name of the given Encoding. It will return an
90// error if the e is not associated with a known encoding scheme.
91func (x *Index) Name(e encoding.Encoding) (string, error) {
92	id, ok := e.(identifier.Interface)
93	if !ok {
94		return "", errUnknown
95	}
96	mib, _ := id.ID()
97	if mib == 0 {
98		return "", errUnknown
99	}
100	v := findMIB(x.toMIB, mib)
101	if v == -1 {
102		return "", errUnsupported
103	}
104	return x.names(v), nil
105}
106
107// TODO: the coverage of this index is rather spotty. Allowing users to set
108// encodings would allow:
109// - users to increase coverage
110// - allow a partially loaded set of encodings in case the user doesn't need to
111//   them all.
112// - write an OS-specific wrapper for supported encodings and set them.
113// The exact definition of Set depends a bit on if and how we want to let users
114// write their own Encoding implementations. Also, it is not possible yet to
115// only partially load the encodings without doing some refactoring. Until this
116// is solved, we might as well not support Set.
117// // Set sets the e to be used for the encoding scheme identified by name. Only
118// // canonical names may be used. An empty name assigns e to its internally
119// // associated encoding scheme.
120// func (x *Index) Set(name string, e encoding.Encoding) error {
121// 	panic("TODO: implement")
122// }
123
124func findMIB(x []identifier.MIB, mib identifier.MIB) int {
125	i := sort.Search(len(x), func(i int) bool { return x[i] >= mib })
126	if i < len(x) && x[i] == mib {
127		return i
128	}
129	return -1
130}
131
132const maxMIMENameLen = '0' - 1 // officially 40, but we leave some buffer.
133
134func mimeName(x int) string {
135	n := ianaNames[x]
136	// See gen.go for a description of the encoding.
137	if n[0] <= maxMIMENameLen {
138		return n[1:n[0]]
139	}
140	return n
141}
142
143func ianaName(x int) string {
144	n := ianaNames[x]
145	// See gen.go for a description of the encoding.
146	if n[0] <= maxMIMENameLen {
147		return n[n[0]:]
148	}
149	return n
150}
151
152func mibName(x int) string {
153	return mibNames[x]
154}
155
156var encodings = [numIANA]encoding.Encoding{
157	enc3:    asciiEnc,
158	enc106:  unicode.UTF8,
159	enc1015: unicode.UTF16(unicode.BigEndian, unicode.UseBOM),
160	enc1013: unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM),
161	enc1014: unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM),
162	enc2028: charmap.CodePage037,
163	enc2011: charmap.CodePage437,
164	enc2009: charmap.CodePage850,
165	enc2010: charmap.CodePage852,
166	enc2046: charmap.CodePage855,
167	enc2089: charmap.CodePage858,
168	enc2048: charmap.CodePage860,
169	enc2013: charmap.CodePage862,
170	enc2050: charmap.CodePage863,
171	enc2052: charmap.CodePage865,
172	enc2086: charmap.CodePage866,
173	enc2102: charmap.CodePage1047,
174	enc2091: charmap.CodePage1140,
175	enc4:    charmap.ISO8859_1,
176	enc5:    charmap.ISO8859_2,
177	enc6:    charmap.ISO8859_3,
178	enc7:    charmap.ISO8859_4,
179	enc8:    charmap.ISO8859_5,
180	enc9:    charmap.ISO8859_6,
181	enc81:   charmap.ISO8859_6E,
182	enc82:   charmap.ISO8859_6I,
183	enc10:   charmap.ISO8859_7,
184	enc11:   charmap.ISO8859_8,
185	enc84:   charmap.ISO8859_8E,
186	enc85:   charmap.ISO8859_8I,
187	enc12:   charmap.ISO8859_9,
188	enc13:   charmap.ISO8859_10,
189	enc109:  charmap.ISO8859_13,
190	enc110:  charmap.ISO8859_14,
191	enc111:  charmap.ISO8859_15,
192	enc112:  charmap.ISO8859_16,
193	enc2084: charmap.KOI8R,
194	enc2088: charmap.KOI8U,
195	enc2027: charmap.Macintosh,
196	enc2109: charmap.Windows874,
197	enc2250: charmap.Windows1250,
198	enc2251: charmap.Windows1251,
199	enc2252: charmap.Windows1252,
200	enc2253: charmap.Windows1253,
201	enc2254: charmap.Windows1254,
202	enc2255: charmap.Windows1255,
203	enc2256: charmap.Windows1256,
204	enc2257: charmap.Windows1257,
205	enc2258: charmap.Windows1258,
206	enc18:   japanese.EUCJP,
207	enc39:   japanese.ISO2022JP,
208	enc17:   japanese.ShiftJIS,
209	enc38:   korean.EUCKR,
210	enc114:  simplifiedchinese.GB18030,
211	enc113:  simplifiedchinese.GBK,
212	enc2085: simplifiedchinese.HZGB2312,
213	enc2026: traditionalchinese.Big5,
214}
215