1// Copyright 2015 The TCell Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use file except in compliance with the License.
5// You may obtain a copy of the license at
6//
7//    http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package tcell
16
17import (
18	"strings"
19	"sync"
20
21	"golang.org/x/text/encoding"
22
23	gencoding "github.com/gdamore/encoding"
24)
25
26var encodings map[string]encoding.Encoding
27var encodingLk sync.Mutex
28var encodingFallback EncodingFallback = EncodingFallbackFail
29
30// RegisterEncoding may be called by the application to register an encoding.
31// The presence of additional encodings will facilitate application usage with
32// terminal environments where the I/O subsystem does not support Unicode.
33//
34// Windows systems use Unicode natively, and do not need any of the encoding
35// subsystem when using Windows Console screens.
36//
37// Please see the Go documentation for golang.org/x/text/encoding -- most of
38// the common ones exist already as stock variables.  For example, ISO8859-15
39// can be registered using the following code:
40//
41//   import "golang.org/x/text/encoding/charmap"
42//
43//     ...
44//     RegisterEncoding("ISO8859-15", charmap.ISO8859_15)
45//
46// Aliases can be registered as well, for example "8859-15" could be an alias
47// for "ISO8859-15".
48//
49// For POSIX systems, the tcell package will check the environment variables
50// LC_ALL, LC_CTYPE,  and LANG (in that order) to determine the character set.
51// These are expected to have the following pattern:
52//
53//	 $language[.$codeset[@$variant]
54//
55// We extract only the $codeset part, which will usually be something like
56// UTF-8 or ISO8859-15 or KOI8-R.  Note that if the locale is either "POSIX"
57// or "C", then we assume US-ASCII (the POSIX 'portable character set'
58// and assume all other characters are somehow invalid.)
59//
60// Modern POSIX systems and terminal emulators may use UTF-8, and for those
61// systems, this API is also unnecessary.  For example, Darwin (MacOS X) and
62// modern Linux running modern xterm generally will out of the box without
63// any of this.  Use of UTF-8 is recommended when possible, as it saves
64// quite a lot processing overhead.
65//
66// Note that some encodings are quite large (for example GB18030 which is a
67// superset of Unicode) and so the application size can be expected ot
68// increase quite a bit as each encoding is added.  The East Asian encodings
69// have been seen to add 100-200K per encoding to the application size.
70//
71func RegisterEncoding(charset string, enc encoding.Encoding) {
72	encodingLk.Lock()
73	charset = strings.ToLower(charset)
74	encodings[charset] = enc
75	encodingLk.Unlock()
76}
77
78// EncodingFallback describes how the system behavees when the locale
79// requires a character set that we do not support.  The system always
80// supports UTF-8 and US-ASCII. On Windows consoles, UTF-16LE is also
81// supported automatically.  Other character sets must be added using the
82// RegisterEncoding API.  (A large group of nearly all of them can be
83// added using the RegisterAll function in the encoding sub package.)
84type EncodingFallback int
85
86const (
87	// EncodingFallbackFail behavior causes GetEncoding to fail
88	// when it cannot find an encoding.
89	EncodingFallbackFail = iota
90
91	// EncodingFallbackASCII behaviore causes GetEncoding to fall back
92	// to a 7-bit ASCII encoding, if no other encoding can be found.
93	EncodingFallbackASCII
94
95	// EncodingFallbackUTF8 behavior causes GetEncoding to assume
96	// UTF8 can pass unmodified upon failure.  Note that this behavior
97	// is not recommended, unless you are sure your terminal can cope
98	// with real UTF8 sequences.
99	EncodingFallbackUTF8
100)
101
102// SetEncodingFallback changes the behavior of GetEncoding when a suitable
103// encoding is not found.  The default is EncodingFallbackFail, which
104// causes GetEncoding to simply return nil.
105func SetEncodingFallback(fb EncodingFallback) {
106	encodingLk.Lock()
107	encodingFallback = fb
108	encodingLk.Unlock()
109}
110
111// GetEncoding is used by Screen implementors who want to locate an encoding
112// for the given character set name.  Note that this will return nil for
113// either the Unicode (UTF-8) or ASCII encodings, since we don't use
114// encodings for them but instead have our own native methods.
115func GetEncoding(charset string) encoding.Encoding {
116	charset = strings.ToLower(charset)
117	encodingLk.Lock()
118	defer encodingLk.Unlock()
119	if enc, ok := encodings[charset]; ok {
120		return enc
121	}
122	switch encodingFallback {
123	case EncodingFallbackASCII:
124		return gencoding.ASCII
125	case EncodingFallbackUTF8:
126		return encoding.Nop
127	}
128	return nil
129}
130
131func init() {
132	// We always support UTF-8 and ASCII.
133	encodings = make(map[string]encoding.Encoding)
134	encodings["utf-8"] = gencoding.UTF8
135	encodings["utf8"] = gencoding.UTF8
136	encodings["us-ascii"] = gencoding.ASCII
137	encodings["ascii"] = gencoding.ASCII
138	encodings["iso646"] = gencoding.ASCII
139}
140