1// Copyright 2015 The TCell Authors 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use file except in compliance with the License. 5// You may obtain a copy of the license at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package tcell 16 17import ( 18 "strings" 19 "sync" 20 21 "golang.org/x/text/encoding" 22 23 gencoding "github.com/gdamore/encoding" 24) 25 26var encodings map[string]encoding.Encoding 27var encodingLk sync.Mutex 28var encodingFallback EncodingFallback = EncodingFallbackFail 29 30// RegisterEncoding may be called by the application to register an encoding. 31// The presence of additional encodings will facilitate application usage with 32// terminal environments where the I/O subsystem does not support Unicode. 33// 34// Windows systems use Unicode natively, and do not need any of the encoding 35// subsystem when using Windows Console screens. 36// 37// Please see the Go documentation for golang.org/x/text/encoding -- most of 38// the common ones exist already as stock variables. For example, ISO8859-15 39// can be registered using the following code: 40// 41// import "golang.org/x/text/encoding/charmap" 42// 43// ... 44// RegisterEncoding("ISO8859-15", charmap.ISO8859_15) 45// 46// Aliases can be registered as well, for example "8859-15" could be an alias 47// for "ISO8859-15". 48// 49// For POSIX systems, the tcell package will check the environment variables 50// LC_ALL, LC_CTYPE, and LANG (in that order) to determine the character set. 51// These are expected to have the following pattern: 52// 53// $language[.$codeset[@$variant] 54// 55// We extract only the $codeset part, which will usually be something like 56// UTF-8 or ISO8859-15 or KOI8-R. Note that if the locale is either "POSIX" 57// or "C", then we assume US-ASCII (the POSIX 'portable character set' 58// and assume all other characters are somehow invalid.) 59// 60// Modern POSIX systems and terminal emulators may use UTF-8, and for those 61// systems, this API is also unnecessary. For example, Darwin (MacOS X) and 62// modern Linux running modern xterm generally will out of the box without 63// any of this. Use of UTF-8 is recommended when possible, as it saves 64// quite a lot processing overhead. 65// 66// Note that some encodings are quite large (for example GB18030 which is a 67// superset of Unicode) and so the application size can be expected ot 68// increase quite a bit as each encoding is added. The East Asian encodings 69// have been seen to add 100-200K per encoding to the application size. 70// 71func RegisterEncoding(charset string, enc encoding.Encoding) { 72 encodingLk.Lock() 73 charset = strings.ToLower(charset) 74 encodings[charset] = enc 75 encodingLk.Unlock() 76} 77 78// EncodingFallback describes how the system behavees when the locale 79// requires a character set that we do not support. The system always 80// supports UTF-8 and US-ASCII. On Windows consoles, UTF-16LE is also 81// supported automatically. Other character sets must be added using the 82// RegisterEncoding API. (A large group of nearly all of them can be 83// added using the RegisterAll function in the encoding sub package.) 84type EncodingFallback int 85 86const ( 87 // EncodingFallbackFail behavior causes GetEncoding to fail 88 // when it cannot find an encoding. 89 EncodingFallbackFail = iota 90 91 // EncodingFallbackASCII behaviore causes GetEncoding to fall back 92 // to a 7-bit ASCII encoding, if no other encoding can be found. 93 EncodingFallbackASCII 94 95 // EncodingFallbackUTF8 behavior causes GetEncoding to assume 96 // UTF8 can pass unmodified upon failure. Note that this behavior 97 // is not recommended, unless you are sure your terminal can cope 98 // with real UTF8 sequences. 99 EncodingFallbackUTF8 100) 101 102// SetEncodingFallback changes the behavior of GetEncoding when a suitable 103// encoding is not found. The default is EncodingFallbackFail, which 104// causes GetEncoding to simply return nil. 105func SetEncodingFallback(fb EncodingFallback) { 106 encodingLk.Lock() 107 encodingFallback = fb 108 encodingLk.Unlock() 109} 110 111// GetEncoding is used by Screen implementors who want to locate an encoding 112// for the given character set name. Note that this will return nil for 113// either the Unicode (UTF-8) or ASCII encodings, since we don't use 114// encodings for them but instead have our own native methods. 115func GetEncoding(charset string) encoding.Encoding { 116 charset = strings.ToLower(charset) 117 encodingLk.Lock() 118 defer encodingLk.Unlock() 119 if enc, ok := encodings[charset]; ok { 120 return enc 121 } 122 switch encodingFallback { 123 case EncodingFallbackASCII: 124 return gencoding.ASCII 125 case EncodingFallbackUTF8: 126 return encoding.Nop 127 } 128 return nil 129} 130 131func init() { 132 // We always support UTF-8 and ASCII. 133 encodings = make(map[string]encoding.Encoding) 134 encodings["utf-8"] = gencoding.UTF8 135 encodings["utf8"] = gencoding.UTF8 136 encodings["us-ascii"] = gencoding.ASCII 137 encodings["ascii"] = gencoding.ASCII 138 encodings["iso646"] = gencoding.ASCII 139} 140