1// Copyright 2013 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package encoding_test
6
7import (
8	"io/ioutil"
9	"strings"
10	"testing"
11
12	"golang.org/x/text/encoding"
13	"golang.org/x/text/encoding/charmap"
14	"golang.org/x/text/transform"
15)
16
17func TestEncodeInvalidUTF8(t *testing.T) {
18	inputs := []string{
19		"hello.",
20		"wo\ufffdld.",
21		"ABC\xff\x80\x80", // Invalid UTF-8.
22		"\x80\x80\x80\x80\x80",
23		"\x80\x80D\x80\x80",          // Valid rune at "D".
24		"E\xed\xa0\x80\xed\xbf\xbfF", // Two invalid UTF-8 runes (surrogates).
25		"G",
26		"H\xe2\x82",     // U+20AC in UTF-8 is "\xe2\x82\xac", which we split over two
27		"\xacI\xe2\x82", // input lines. It maps to 0x80 in the Windows-1252 encoding.
28	}
29	// Each invalid source byte becomes '\x1a'.
30	want := strings.Replace("hello.wo?ld.ABC??????????D??E??????FGH\x80I??", "?", "\x1a", -1)
31
32	transformer := encoding.ReplaceUnsupported(charmap.Windows1252.NewEncoder())
33	gotBuf := make([]byte, 0, 1024)
34	src := make([]byte, 0, 1024)
35	for i, input := range inputs {
36		dst := make([]byte, 1024)
37		src = append(src, input...)
38		atEOF := i == len(inputs)-1
39		nDst, nSrc, err := transformer.Transform(dst, src, atEOF)
40		gotBuf = append(gotBuf, dst[:nDst]...)
41		src = src[nSrc:]
42		if err != nil && err != transform.ErrShortSrc {
43			t.Fatalf("i=%d: %v", i, err)
44		}
45		if atEOF && err != nil {
46			t.Fatalf("i=%d: atEOF: %v", i, err)
47		}
48	}
49	if got := string(gotBuf); got != want {
50		t.Fatalf("\ngot  %+q\nwant %+q", got, want)
51	}
52}
53
54func TestReplacement(t *testing.T) {
55	for _, direction := range []string{"Decode", "Encode"} {
56		enc, want := (transform.Transformer)(nil), ""
57		if direction == "Decode" {
58			enc = encoding.Replacement.NewDecoder()
59			want = "\ufffd"
60		} else {
61			enc = encoding.Replacement.NewEncoder()
62			want = "AB\x00CD\ufffdYZ"
63		}
64		sr := strings.NewReader("AB\x00CD\x80YZ")
65		g, err := ioutil.ReadAll(transform.NewReader(sr, enc))
66		if err != nil {
67			t.Errorf("%s: ReadAll: %v", direction, err)
68			continue
69		}
70		if got := string(g); got != want {
71			t.Errorf("%s:\ngot  %q\nwant %q", direction, got, want)
72			continue
73		}
74	}
75}
76
77func TestUTF8Validator(t *testing.T) {
78	testCases := []struct {
79		desc    string
80		dstSize int
81		src     string
82		atEOF   bool
83		want    string
84		wantErr error
85	}{
86		{
87			"empty input",
88			100,
89			"",
90			false,
91			"",
92			nil,
93		},
94		{
95			"valid 1-byte 1-rune input",
96			100,
97			"a",
98			false,
99			"a",
100			nil,
101		},
102		{
103			"valid 3-byte 1-rune input",
104			100,
105			"\u1234",
106			false,
107			"\u1234",
108			nil,
109		},
110		{
111			"valid 5-byte 3-rune input",
112			100,
113			"a\u0100\u0101",
114			false,
115			"a\u0100\u0101",
116			nil,
117		},
118		{
119			"perfectly sized dst (non-ASCII)",
120			5,
121			"a\u0100\u0101",
122			false,
123			"a\u0100\u0101",
124			nil,
125		},
126		{
127			"short dst (non-ASCII)",
128			4,
129			"a\u0100\u0101",
130			false,
131			"a\u0100",
132			transform.ErrShortDst,
133		},
134		{
135			"perfectly sized dst (ASCII)",
136			5,
137			"abcde",
138			false,
139			"abcde",
140			nil,
141		},
142		{
143			"short dst (ASCII)",
144			4,
145			"abcde",
146			false,
147			"abcd",
148			transform.ErrShortDst,
149		},
150		{
151			"partial input (!EOF)",
152			100,
153			"a\u0100\xf1",
154			false,
155			"a\u0100",
156			transform.ErrShortSrc,
157		},
158		{
159			"invalid input (EOF)",
160			100,
161			"a\u0100\xf1",
162			true,
163			"a\u0100",
164			encoding.ErrInvalidUTF8,
165		},
166		{
167			"invalid input (!EOF)",
168			100,
169			"a\u0100\x80",
170			false,
171			"a\u0100",
172			encoding.ErrInvalidUTF8,
173		},
174		{
175			"invalid input (above U+10FFFF)",
176			100,
177			"a\u0100\xf7\xbf\xbf\xbf",
178			false,
179			"a\u0100",
180			encoding.ErrInvalidUTF8,
181		},
182		{
183			"invalid input (surrogate half)",
184			100,
185			"a\u0100\xed\xa0\x80",
186			false,
187			"a\u0100",
188			encoding.ErrInvalidUTF8,
189		},
190	}
191	for _, tc := range testCases {
192		dst := make([]byte, tc.dstSize)
193		nDst, nSrc, err := encoding.UTF8Validator.Transform(dst, []byte(tc.src), tc.atEOF)
194		if nDst < 0 || len(dst) < nDst {
195			t.Errorf("%s: nDst=%d out of range", tc.desc, nDst)
196			continue
197		}
198		got := string(dst[:nDst])
199		if got != tc.want || nSrc != len(tc.want) || err != tc.wantErr {
200			t.Errorf("%s:\ngot  %+q, %d, %v\nwant %+q, %d, %v",
201				tc.desc, got, nSrc, err, tc.want, len(tc.want), tc.wantErr)
202			continue
203		}
204	}
205}
206
207func TestErrorHandler(t *testing.T) {
208	testCases := []struct {
209		desc      string
210		handler   func(*encoding.Encoder) *encoding.Encoder
211		sizeDst   int
212		src, want string
213		nSrc      int
214		err       error
215	}{
216		{
217			desc:    "one rune replacement",
218			handler: encoding.ReplaceUnsupported,
219			sizeDst: 100,
220			src:     "\uAC00",
221			want:    "\x1a",
222			nSrc:    3,
223		},
224		{
225			desc:    "mid-stream rune replacement",
226			handler: encoding.ReplaceUnsupported,
227			sizeDst: 100,
228			src:     "a\uAC00bcd\u00e9",
229			want:    "a\x1abcd\xe9",
230			nSrc:    9,
231		},
232		{
233			desc:    "at end rune replacement",
234			handler: encoding.ReplaceUnsupported,
235			sizeDst: 10,
236			src:     "\u00e9\uAC00",
237			want:    "\xe9\x1a",
238			nSrc:    5,
239		},
240		{
241			desc:    "short buffer replacement",
242			handler: encoding.ReplaceUnsupported,
243			sizeDst: 1,
244			src:     "\u00e9\uAC00",
245			want:    "\xe9",
246			nSrc:    2,
247			err:     transform.ErrShortDst,
248		},
249		{
250			desc:    "one rune html escape",
251			handler: encoding.HTMLEscapeUnsupported,
252			sizeDst: 100,
253			src:     "\uAC00",
254			want:    "&#44032;",
255			nSrc:    3,
256		},
257		{
258			desc:    "mid-stream html escape",
259			handler: encoding.HTMLEscapeUnsupported,
260			sizeDst: 100,
261			src:     "\u00e9\uAC00dcba",
262			want:    "\xe9&#44032;dcba",
263			nSrc:    9,
264		},
265		{
266			desc:    "short buffer html escape",
267			handler: encoding.HTMLEscapeUnsupported,
268			sizeDst: 9,
269			src:     "ab\uAC01",
270			want:    "ab",
271			nSrc:    2,
272			err:     transform.ErrShortDst,
273		},
274	}
275	for i, tc := range testCases {
276		tr := tc.handler(charmap.Windows1250.NewEncoder())
277		b := make([]byte, tc.sizeDst)
278		nDst, nSrc, err := tr.Transform(b, []byte(tc.src), true)
279		if err != tc.err {
280			t.Errorf("%d:%s: error was %v; want %v", i, tc.desc, err, tc.err)
281		}
282		if got := string(b[:nDst]); got != tc.want {
283			t.Errorf("%d:%s: result was %q: want %q", i, tc.desc, got, tc.want)
284		}
285		if nSrc != tc.nSrc {
286			t.Errorf("%d:%s: nSrc was %d; want %d", i, tc.desc, nSrc, tc.nSrc)
287		}
288
289	}
290}
291