1// Package textfile allows reading files that contain text. It automatically
2// detects and converts several encodings and removes Byte Order Marks (BOM).
3package textfile
4
5import (
6	"bytes"
7	"io/ioutil"
8
9	"golang.org/x/text/encoding/unicode"
10)
11
12// All supported BOMs (Byte Order Marks)
13var (
14	bomUTF8              = []byte{0xef, 0xbb, 0xbf}
15	bomUTF16BigEndian    = []byte{0xfe, 0xff}
16	bomUTF16LittleEndian = []byte{0xff, 0xfe}
17)
18
19// Decode removes a byte order mark and converts the bytes to UTF-8.
20func Decode(data []byte) ([]byte, error) {
21	if bytes.HasPrefix(data, bomUTF8) {
22		return data[len(bomUTF8):], nil
23	}
24
25	if !bytes.HasPrefix(data, bomUTF16BigEndian) && !bytes.HasPrefix(data, bomUTF16LittleEndian) {
26		// no encoding specified, let's assume UTF-8
27		return data, nil
28	}
29
30	// UseBom means automatic endianness selection
31	e := unicode.UTF16(unicode.BigEndian, unicode.UseBOM)
32	return e.NewDecoder().Bytes(data)
33}
34
35// Read returns the contents of the file, converted to UTF-8, stripped of any BOM.
36func Read(filename string) ([]byte, error) {
37	data, err := ioutil.ReadFile(filename)
38	if err != nil {
39		return nil, err
40	}
41
42	return Decode(data)
43}
44