1// Package textfile allows reading files that contain text. It automatically 2// detects and converts several encodings and removes Byte Order Marks (BOM). 3package textfile 4 5import ( 6 "bytes" 7 "io/ioutil" 8 9 "golang.org/x/text/encoding/unicode" 10) 11 12// All supported BOMs (Byte Order Marks) 13var ( 14 bomUTF8 = []byte{0xef, 0xbb, 0xbf} 15 bomUTF16BigEndian = []byte{0xfe, 0xff} 16 bomUTF16LittleEndian = []byte{0xff, 0xfe} 17) 18 19// Decode removes a byte order mark and converts the bytes to UTF-8. 20func Decode(data []byte) ([]byte, error) { 21 if bytes.HasPrefix(data, bomUTF8) { 22 return data[len(bomUTF8):], nil 23 } 24 25 if !bytes.HasPrefix(data, bomUTF16BigEndian) && !bytes.HasPrefix(data, bomUTF16LittleEndian) { 26 // no encoding specified, let's assume UTF-8 27 return data, nil 28 } 29 30 // UseBom means automatic endianness selection 31 e := unicode.UTF16(unicode.BigEndian, unicode.UseBOM) 32 return e.NewDecoder().Bytes(data) 33} 34 35// Read returns the contents of the file, converted to UTF-8, stripped of any BOM. 36func Read(filename string) ([]byte, error) { 37 data, err := ioutil.ReadFile(filename) 38 if err != nil { 39 return nil, err 40 } 41 42 return Decode(data) 43} 44