1package utfbom
2
3import (
4	"io"
5	"io/ioutil"
6	"reflect"
7	"testing"
8	"testing/iotest"
9	"time"
10)
11
12var testCases = []struct {
13	name       string
14	input      []byte
15	inputError error
16	encoding   Encoding
17	output     []byte
18}{
19	{"1", []byte{}, nil, Unknown, []byte{}},
20	{"2", []byte("hello"), nil, Unknown, []byte("hello")},
21	{"3", []byte("\xEF\xBB\xBF"), nil, UTF8, []byte{}},
22	{"4", []byte("\xEF\xBB\xBFhello"), nil, UTF8, []byte("hello")},
23	{"5", []byte("\xFE\xFF"), nil, UTF16BigEndian, []byte{}},
24	{"6", []byte("\xFF\xFE"), nil, UTF16LittleEndian, []byte{}},
25	{"7", []byte("\x00\x00\xFE\xFF"), nil, UTF32BigEndian, []byte{}},
26	{"8", []byte("\xFF\xFE\x00\x00"), nil, UTF32LittleEndian, []byte{}},
27	{"5", []byte("\xFE\xFF\x00\x68\x00\x65\x00\x6C\x00\x6C\x00\x6F"), nil,
28		UTF16BigEndian, []byte{0x00, 0x68, 0x00, 0x65, 0x00, 0x6C, 0x00, 0x6C, 0x00, 0x6F}},
29	{"6", []byte("\xFF\xFE\x68\x00\x65\x00\x6C\x00\x6C\x00\x6F\x00"), nil,
30		UTF16LittleEndian, []byte{0x68, 0x00, 0x65, 0x00, 0x6C, 0x00, 0x6C, 0x00, 0x6F, 0x00}},
31	{"7", []byte("\x00\x00\xFE\xFF\x00\x00\x00\x68\x00\x00\x00\x65\x00\x00\x00\x6C\x00\x00\x00\x6C\x00\x00\x00\x6F"), nil,
32		UTF32BigEndian,
33		[]byte{0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x6C, 0x00, 0x00, 0x00, 0x6C, 0x00, 0x00, 0x00, 0x6F}},
34	{"8", []byte("\xFF\xFE\x00\x00\x68\x00\x00\x00\x65\x00\x00\x00\x6C\x00\x00\x00\x6C\x00\x00\x00\x6F\x00\x00\x00"), nil,
35		UTF32LittleEndian,
36		[]byte{0x68, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x6C, 0x00, 0x00, 0x00, 0x6C, 0x00, 0x00, 0x00, 0x6F, 0x00, 0x00, 0x00}},
37	{"9", []byte("\xEF"), nil, Unknown, []byte("\xEF")},
38	{"10", []byte("\xEF\xBB"), nil, Unknown, []byte("\xEF\xBB")},
39	{"11", []byte("\xEF\xBB\xBF"), io.ErrClosedPipe, UTF8, []byte{}},
40	{"12", []byte("\xFE\xFF"), io.ErrClosedPipe, Unknown, []byte("\xFE\xFF")},
41	{"13", []byte("\xFE"), io.ErrClosedPipe, Unknown, []byte("\xFE")},
42	{"14", []byte("\xFF\xFE"), io.ErrClosedPipe, Unknown, []byte("\xFF\xFE")},
43	{"15", []byte("\x00\x00\xFE\xFF"), io.ErrClosedPipe, UTF32BigEndian, []byte{}},
44	{"16", []byte("\x00\x00\xFE"), io.ErrClosedPipe, Unknown, []byte{0x00, 0x00, 0xFE}},
45	{"17", []byte("\x00\x00"), io.ErrClosedPipe, Unknown, []byte{0x00, 0x00}},
46	{"18", []byte("\x00"), io.ErrClosedPipe, Unknown, []byte{0x00}},
47	{"19", []byte("\xFF\xFE\x00\x00"), io.ErrClosedPipe, UTF32LittleEndian, []byte{}},
48	{"20", []byte("\xFF\xFE\x00"), io.ErrClosedPipe, Unknown, []byte{0xFF, 0xFE, 0x00}},
49	{"21", []byte("\xFF\xFE"), io.ErrClosedPipe, Unknown, []byte{0xFF, 0xFE}},
50	{"22", []byte("\xFF"), io.ErrClosedPipe, Unknown, []byte{0xFF}},
51	{"23", []byte("\x68\x65"), nil, Unknown, []byte{0x68, 0x65}},
52}
53
54type sliceReader struct {
55	input      []byte
56	inputError error
57}
58
59func (r *sliceReader) Read(p []byte) (n int, err error) {
60	if len(p) == 0 {
61		return
62	}
63
64	if err = r.getError(); err != nil {
65		return
66	}
67
68	n = copy(p, r.input)
69	r.input = r.input[n:]
70	err = r.getError()
71	return
72}
73
74func (r *sliceReader) getError() (err error) {
75	if len(r.input) == 0 {
76		if r.inputError == nil {
77			err = io.EOF
78		} else {
79			err = r.inputError
80		}
81	}
82	return
83}
84
85var readMakers = []struct {
86	name string
87	fn   func(io.Reader) io.Reader
88}{
89	{"full", func(r io.Reader) io.Reader { return r }},
90	{"byte", iotest.OneByteReader},
91}
92
93func TestSkip(t *testing.T) {
94	for _, tc := range testCases {
95		for _, readMaker := range readMakers {
96			r := readMaker.fn(&sliceReader{tc.input, tc.inputError})
97
98			sr, enc := Skip(r)
99			if enc != tc.encoding {
100				t.Fatalf("test %v reader=%s: expected encoding %v, but got %v", tc.name, readMaker.name, tc.encoding, enc)
101			}
102
103			output, err := ioutil.ReadAll(sr)
104			if !reflect.DeepEqual(output, tc.output) {
105				t.Fatalf("test %v reader=%s: expected to read %+#v, but got %+#v", tc.name, readMaker.name, tc.output, output)
106			}
107			if err != tc.inputError {
108				t.Fatalf("test %v reader=%s: expected to get %+#v error, but got %+#v", tc.name, readMaker.name, tc.inputError, err)
109			}
110		}
111	}
112}
113
114func TestSkipSkip(t *testing.T) {
115	for _, tc := range testCases {
116		for _, readMaker := range readMakers {
117			r := readMaker.fn(&sliceReader{tc.input, tc.inputError})
118
119			sr0, _ := Skip(r)
120			sr, enc := Skip(sr0)
121			if enc != Unknown {
122				t.Fatalf("test %v reader=%s: expected encoding %v, but got %v", tc.name, readMaker.name, Unknown, enc)
123			}
124
125			output, err := ioutil.ReadAll(sr)
126			if !reflect.DeepEqual(output, tc.output) {
127				t.Fatalf("test %v reader=%s: expected to read %+#v, but got %+#v", tc.name, readMaker.name, tc.output, output)
128			}
129			if err != tc.inputError {
130				t.Fatalf("test %v reader=%s: expected to get %+#v error, but got %+#v", tc.name, readMaker.name, tc.inputError, err)
131			}
132		}
133	}
134}
135
136func TestSkipOnly(t *testing.T) {
137	for _, tc := range testCases {
138		for _, readMaker := range readMakers {
139			r := readMaker.fn(&sliceReader{tc.input, tc.inputError})
140
141			sr := SkipOnly(r)
142
143			output, err := ioutil.ReadAll(sr)
144			if !reflect.DeepEqual(output, tc.output) {
145				t.Fatalf("test %v reader=%s: expected to read %+#v, but got %+#v", tc.name, readMaker.name, tc.output, output)
146			}
147			if err != tc.inputError {
148				t.Fatalf("test %v reader=%s: expected to get %+#v error, but got %+#v", tc.name, readMaker.name, tc.inputError, err)
149			}
150		}
151	}
152}
153
154type zeroReader struct{}
155
156func (zeroReader) Read(p []byte) (int, error) {
157	return 0, nil
158}
159
160type readerEncoding struct {
161	Rd  *Reader
162	Enc Encoding
163}
164
165func TestSkipZeroReader(t *testing.T) {
166	var z zeroReader
167
168	c := make(chan readerEncoding)
169	go func() {
170		r, enc := Skip(z)
171		c <- readerEncoding{r, enc}
172	}()
173
174	select {
175	case re := <-c:
176		if re.Enc != Unknown {
177			t.Error("Unknown encoding expected")
178		} else {
179			var b [1]byte
180			n, err := re.Rd.Read(b[:])
181			if n != 0 {
182				t.Error("unexpected bytes count:", n)
183			}
184			if err != io.ErrNoProgress {
185				t.Error("unexpected error:", err)
186			}
187		}
188	case <-time.After(time.Second):
189		t.Error("test timed out (endless loop in Skip?)")
190	}
191}
192
193func TestSkipOnlyZeroReader(t *testing.T) {
194	var z zeroReader
195
196	c := make(chan *Reader)
197	go func() {
198		r := SkipOnly(z)
199		c <- r
200	}()
201
202	select {
203	case r := <-c:
204		var b [1]byte
205		n, err := r.Read(b[:])
206		if n != 0 {
207			t.Error("unexpected bytes count:", n)
208		}
209		if err != io.ErrNoProgress {
210			t.Error("unexpected error:", err)
211		}
212	case <-time.After(time.Second):
213		t.Error("test timed out (endless loop in Skip?)")
214	}
215}
216
217func TestReader_ReadEmpty(t *testing.T) {
218	for _, tc := range testCases {
219		for _, readMaker := range readMakers {
220			r := readMaker.fn(&sliceReader{tc.input, tc.inputError})
221
222			sr := SkipOnly(r)
223
224			n, err := sr.Read(nil)
225			if n != 0 {
226				t.Fatalf("test %v reader=%s: expected to read zero bytes, but got %v", tc.name, readMaker.name, n)
227			}
228			if err != nil {
229				t.Fatalf("test %v reader=%s: expected to get <nil> error, but got %+#v", tc.name, readMaker.name, err)
230			}
231		}
232	}
233}
234
235func TestEncoding_String(t *testing.T) {
236	for e := Unknown; e <= UTF32LittleEndian; e++ {
237		s := e.String()
238		if s == "" {
239			t.Errorf("no string for %#v", e)
240		}
241	}
242	s := Encoding(999).String()
243	if s != "Unknown" {
244		t.Errorf("wrong string '%s' for invalid encoding", s)
245	}
246}
247