1// Copyright 2017 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package enctest
6
7import (
8	"bytes"
9	"fmt"
10	"io"
11	"io/ioutil"
12	"strings"
13	"testing"
14
15	"golang.org/x/text/encoding"
16	"golang.org/x/text/encoding/internal/identifier"
17	"golang.org/x/text/transform"
18)
19
20// Encoder or Decoder
21type Transcoder interface {
22	transform.Transformer
23	Bytes([]byte) ([]byte, error)
24	String(string) (string, error)
25}
26
27func TestEncoding(t *testing.T, e encoding.Encoding, encoded, utf8, prefix, suffix string) {
28	for _, direction := range []string{"Decode", "Encode"} {
29		t.Run(fmt.Sprintf("%v/%s", e, direction), func(t *testing.T) {
30
31			var coder Transcoder
32			var want, src, wPrefix, sPrefix, wSuffix, sSuffix string
33			if direction == "Decode" {
34				coder, want, src = e.NewDecoder(), utf8, encoded
35				wPrefix, sPrefix, wSuffix, sSuffix = "", prefix, "", suffix
36			} else {
37				coder, want, src = e.NewEncoder(), encoded, utf8
38				wPrefix, sPrefix, wSuffix, sSuffix = prefix, "", suffix, ""
39			}
40
41			dst := make([]byte, len(wPrefix)+len(want)+len(wSuffix))
42			nDst, nSrc, err := coder.Transform(dst, []byte(sPrefix+src+sSuffix), true)
43			if err != nil {
44				t.Fatal(err)
45			}
46			if nDst != len(wPrefix)+len(want)+len(wSuffix) {
47				t.Fatalf("nDst got %d, want %d",
48					nDst, len(wPrefix)+len(want)+len(wSuffix))
49			}
50			if nSrc != len(sPrefix)+len(src)+len(sSuffix) {
51				t.Fatalf("nSrc got %d, want %d",
52					nSrc, len(sPrefix)+len(src)+len(sSuffix))
53			}
54			if got := string(dst); got != wPrefix+want+wSuffix {
55				t.Fatalf("\ngot  %q\nwant %q", got, wPrefix+want+wSuffix)
56			}
57
58			for _, n := range []int{0, 1, 2, 10, 123, 4567} {
59				input := sPrefix + strings.Repeat(src, n) + sSuffix
60				g, err := coder.String(input)
61				if err != nil {
62					t.Fatalf("Bytes: n=%d: %v", n, err)
63				}
64				if len(g) == 0 && len(input) == 0 {
65					// If the input is empty then the output can be empty,
66					// regardless of whatever wPrefix is.
67					continue
68				}
69				got1, want1 := string(g), wPrefix+strings.Repeat(want, n)+wSuffix
70				if got1 != want1 {
71					t.Fatalf("ReadAll: n=%d\ngot  %q\nwant %q",
72						n, trim(got1), trim(want1))
73				}
74			}
75		})
76	}
77}
78
79func TestFile(t *testing.T, e encoding.Encoding) {
80	for _, dir := range []string{"Decode", "Encode"} {
81		t.Run(fmt.Sprintf("%s/%s", e, dir), func(t *testing.T) {
82			dst, src, transformer, err := load(dir, e)
83			if err != nil {
84				t.Fatalf("load: %v", err)
85			}
86			buf, err := transformer.Bytes(src)
87			if err != nil {
88				t.Fatalf("transform: %v", err)
89			}
90			if !bytes.Equal(buf, dst) {
91				t.Error("transformed bytes did not match golden file")
92			}
93		})
94	}
95}
96
97func Benchmark(b *testing.B, enc encoding.Encoding) {
98	for _, direction := range []string{"Decode", "Encode"} {
99		b.Run(fmt.Sprintf("%s/%s", enc, direction), func(b *testing.B) {
100			_, src, transformer, err := load(direction, enc)
101			if err != nil {
102				b.Fatal(err)
103			}
104			b.SetBytes(int64(len(src)))
105			b.ResetTimer()
106			for i := 0; i < b.N; i++ {
107				r := transform.NewReader(bytes.NewReader(src), transformer)
108				io.Copy(ioutil.Discard, r)
109			}
110		})
111	}
112}
113
114// testdataFiles are files in testdata/*.txt.
115var testdataFiles = []struct {
116	mib           identifier.MIB
117	basename, ext string
118}{
119	{identifier.Windows1252, "candide", "windows-1252"},
120	{identifier.EUCPkdFmtJapanese, "rashomon", "euc-jp"},
121	{identifier.ISO2022JP, "rashomon", "iso-2022-jp"},
122	{identifier.ShiftJIS, "rashomon", "shift-jis"},
123	{identifier.EUCKR, "unsu-joh-eun-nal", "euc-kr"},
124	{identifier.GBK, "sunzi-bingfa-simplified", "gbk"},
125	{identifier.HZGB2312, "sunzi-bingfa-gb-levels-1-and-2", "hz-gb2312"},
126	{identifier.Big5, "sunzi-bingfa-traditional", "big5"},
127	{identifier.UTF16LE, "candide", "utf-16le"},
128	{identifier.UTF8, "candide", "utf-8"},
129	{identifier.UTF32BE, "candide", "utf-32be"},
130
131	// GB18030 is a superset of GBK and is nominally a Simplified Chinese
132	// encoding, but it can also represent the entire Basic Multilingual
133	// Plane, including codepoints like 'â' that aren't encodable by GBK.
134	// GB18030 on Simplified Chinese should perform similarly to GBK on
135	// Simplified Chinese. GB18030 on "candide" is more interesting.
136	{identifier.GB18030, "candide", "gb18030"},
137}
138
139func load(direction string, enc encoding.Encoding) ([]byte, []byte, Transcoder, error) {
140	basename, ext, count := "", "", 0
141	for _, tf := range testdataFiles {
142		if mib, _ := enc.(identifier.Interface).ID(); tf.mib == mib {
143			basename, ext = tf.basename, tf.ext
144			count++
145		}
146	}
147	if count != 1 {
148		if count == 0 {
149			return nil, nil, nil, fmt.Errorf("no testdataFiles for %s", enc)
150		}
151		return nil, nil, nil, fmt.Errorf("too many testdataFiles for %s", enc)
152	}
153	dstFile := fmt.Sprintf("../testdata/%s-%s.txt", basename, ext)
154	srcFile := fmt.Sprintf("../testdata/%s-utf-8.txt", basename)
155	var coder Transcoder = encoding.ReplaceUnsupported(enc.NewEncoder())
156	if direction == "Decode" {
157		dstFile, srcFile = srcFile, dstFile
158		coder = enc.NewDecoder()
159	}
160	dst, err := ioutil.ReadFile(dstFile)
161	if err != nil {
162		if dst, err = ioutil.ReadFile("../" + dstFile); err != nil {
163			return nil, nil, nil, err
164		}
165	}
166	src, err := ioutil.ReadFile(srcFile)
167	if err != nil {
168		if src, err = ioutil.ReadFile("../" + srcFile); err != nil {
169			return nil, nil, nil, err
170		}
171	}
172	return dst, src, coder, nil
173}
174
175func trim(s string) string {
176	if len(s) < 120 {
177		return s
178	}
179	return s[:50] + "..." + s[len(s)-50:]
180}
181