1// Copyright 2017 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package enctest 6 7import ( 8 "bytes" 9 "fmt" 10 "io" 11 "io/ioutil" 12 "strings" 13 "testing" 14 15 "golang.org/x/text/encoding" 16 "golang.org/x/text/encoding/internal/identifier" 17 "golang.org/x/text/transform" 18) 19 20// Encoder or Decoder 21type Transcoder interface { 22 transform.Transformer 23 Bytes([]byte) ([]byte, error) 24 String(string) (string, error) 25} 26 27func TestEncoding(t *testing.T, e encoding.Encoding, encoded, utf8, prefix, suffix string) { 28 for _, direction := range []string{"Decode", "Encode"} { 29 t.Run(fmt.Sprintf("%v/%s", e, direction), func(t *testing.T) { 30 31 var coder Transcoder 32 var want, src, wPrefix, sPrefix, wSuffix, sSuffix string 33 if direction == "Decode" { 34 coder, want, src = e.NewDecoder(), utf8, encoded 35 wPrefix, sPrefix, wSuffix, sSuffix = "", prefix, "", suffix 36 } else { 37 coder, want, src = e.NewEncoder(), encoded, utf8 38 wPrefix, sPrefix, wSuffix, sSuffix = prefix, "", suffix, "" 39 } 40 41 dst := make([]byte, len(wPrefix)+len(want)+len(wSuffix)) 42 nDst, nSrc, err := coder.Transform(dst, []byte(sPrefix+src+sSuffix), true) 43 if err != nil { 44 t.Fatal(err) 45 } 46 if nDst != len(wPrefix)+len(want)+len(wSuffix) { 47 t.Fatalf("nDst got %d, want %d", 48 nDst, len(wPrefix)+len(want)+len(wSuffix)) 49 } 50 if nSrc != len(sPrefix)+len(src)+len(sSuffix) { 51 t.Fatalf("nSrc got %d, want %d", 52 nSrc, len(sPrefix)+len(src)+len(sSuffix)) 53 } 54 if got := string(dst); got != wPrefix+want+wSuffix { 55 t.Fatalf("\ngot %q\nwant %q", got, wPrefix+want+wSuffix) 56 } 57 58 for _, n := range []int{0, 1, 2, 10, 123, 4567} { 59 input := sPrefix + strings.Repeat(src, n) + sSuffix 60 g, err := coder.String(input) 61 if err != nil { 62 t.Fatalf("Bytes: n=%d: %v", n, err) 63 } 64 if len(g) == 0 && len(input) == 0 { 65 // If the input is empty then the output can be empty, 66 // regardless of whatever wPrefix is. 67 continue 68 } 69 got1, want1 := string(g), wPrefix+strings.Repeat(want, n)+wSuffix 70 if got1 != want1 { 71 t.Fatalf("ReadAll: n=%d\ngot %q\nwant %q", 72 n, trim(got1), trim(want1)) 73 } 74 } 75 }) 76 } 77} 78 79func TestFile(t *testing.T, e encoding.Encoding) { 80 for _, dir := range []string{"Decode", "Encode"} { 81 t.Run(fmt.Sprintf("%s/%s", e, dir), func(t *testing.T) { 82 dst, src, transformer, err := load(dir, e) 83 if err != nil { 84 t.Fatalf("load: %v", err) 85 } 86 buf, err := transformer.Bytes(src) 87 if err != nil { 88 t.Fatalf("transform: %v", err) 89 } 90 if !bytes.Equal(buf, dst) { 91 t.Error("transformed bytes did not match golden file") 92 } 93 }) 94 } 95} 96 97func Benchmark(b *testing.B, enc encoding.Encoding) { 98 for _, direction := range []string{"Decode", "Encode"} { 99 b.Run(fmt.Sprintf("%s/%s", enc, direction), func(b *testing.B) { 100 _, src, transformer, err := load(direction, enc) 101 if err != nil { 102 b.Fatal(err) 103 } 104 b.SetBytes(int64(len(src))) 105 b.ResetTimer() 106 for i := 0; i < b.N; i++ { 107 r := transform.NewReader(bytes.NewReader(src), transformer) 108 io.Copy(ioutil.Discard, r) 109 } 110 }) 111 } 112} 113 114// testdataFiles are files in testdata/*.txt. 115var testdataFiles = []struct { 116 mib identifier.MIB 117 basename, ext string 118}{ 119 {identifier.Windows1252, "candide", "windows-1252"}, 120 {identifier.EUCPkdFmtJapanese, "rashomon", "euc-jp"}, 121 {identifier.ISO2022JP, "rashomon", "iso-2022-jp"}, 122 {identifier.ShiftJIS, "rashomon", "shift-jis"}, 123 {identifier.EUCKR, "unsu-joh-eun-nal", "euc-kr"}, 124 {identifier.GBK, "sunzi-bingfa-simplified", "gbk"}, 125 {identifier.HZGB2312, "sunzi-bingfa-gb-levels-1-and-2", "hz-gb2312"}, 126 {identifier.Big5, "sunzi-bingfa-traditional", "big5"}, 127 {identifier.UTF16LE, "candide", "utf-16le"}, 128 {identifier.UTF8, "candide", "utf-8"}, 129 {identifier.UTF32BE, "candide", "utf-32be"}, 130 131 // GB18030 is a superset of GBK and is nominally a Simplified Chinese 132 // encoding, but it can also represent the entire Basic Multilingual 133 // Plane, including codepoints like 'â' that aren't encodable by GBK. 134 // GB18030 on Simplified Chinese should perform similarly to GBK on 135 // Simplified Chinese. GB18030 on "candide" is more interesting. 136 {identifier.GB18030, "candide", "gb18030"}, 137} 138 139func load(direction string, enc encoding.Encoding) ([]byte, []byte, Transcoder, error) { 140 basename, ext, count := "", "", 0 141 for _, tf := range testdataFiles { 142 if mib, _ := enc.(identifier.Interface).ID(); tf.mib == mib { 143 basename, ext = tf.basename, tf.ext 144 count++ 145 } 146 } 147 if count != 1 { 148 if count == 0 { 149 return nil, nil, nil, fmt.Errorf("no testdataFiles for %s", enc) 150 } 151 return nil, nil, nil, fmt.Errorf("too many testdataFiles for %s", enc) 152 } 153 dstFile := fmt.Sprintf("../testdata/%s-%s.txt", basename, ext) 154 srcFile := fmt.Sprintf("../testdata/%s-utf-8.txt", basename) 155 var coder Transcoder = encoding.ReplaceUnsupported(enc.NewEncoder()) 156 if direction == "Decode" { 157 dstFile, srcFile = srcFile, dstFile 158 coder = enc.NewDecoder() 159 } 160 dst, err := ioutil.ReadFile(dstFile) 161 if err != nil { 162 if dst, err = ioutil.ReadFile("../" + dstFile); err != nil { 163 return nil, nil, nil, err 164 } 165 } 166 src, err := ioutil.ReadFile(srcFile) 167 if err != nil { 168 if src, err = ioutil.ReadFile("../" + srcFile); err != nil { 169 return nil, nil, nil, err 170 } 171 } 172 return dst, src, coder, nil 173} 174 175func trim(s string) string { 176 if len(s) < 120 { 177 return s 178 } 179 return s[:50] + "..." + s[len(s)-50:] 180} 181