1package utfbom 2 3import ( 4 "io" 5 "io/ioutil" 6 "reflect" 7 "testing" 8 "testing/iotest" 9 "time" 10) 11 12var testCases = []struct { 13 name string 14 input []byte 15 inputError error 16 encoding Encoding 17 output []byte 18}{ 19 {"1", []byte{}, nil, Unknown, []byte{}}, 20 {"2", []byte("hello"), nil, Unknown, []byte("hello")}, 21 {"3", []byte("\xEF\xBB\xBF"), nil, UTF8, []byte{}}, 22 {"4", []byte("\xEF\xBB\xBFhello"), nil, UTF8, []byte("hello")}, 23 {"5", []byte("\xFE\xFF"), nil, UTF16BigEndian, []byte{}}, 24 {"6", []byte("\xFF\xFE"), nil, UTF16LittleEndian, []byte{}}, 25 {"7", []byte("\x00\x00\xFE\xFF"), nil, UTF32BigEndian, []byte{}}, 26 {"8", []byte("\xFF\xFE\x00\x00"), nil, UTF32LittleEndian, []byte{}}, 27 {"5", []byte("\xFE\xFF\x00\x68\x00\x65\x00\x6C\x00\x6C\x00\x6F"), nil, 28 UTF16BigEndian, []byte{0x00, 0x68, 0x00, 0x65, 0x00, 0x6C, 0x00, 0x6C, 0x00, 0x6F}}, 29 {"6", []byte("\xFF\xFE\x68\x00\x65\x00\x6C\x00\x6C\x00\x6F\x00"), nil, 30 UTF16LittleEndian, []byte{0x68, 0x00, 0x65, 0x00, 0x6C, 0x00, 0x6C, 0x00, 0x6F, 0x00}}, 31 {"7", []byte("\x00\x00\xFE\xFF\x00\x00\x00\x68\x00\x00\x00\x65\x00\x00\x00\x6C\x00\x00\x00\x6C\x00\x00\x00\x6F"), nil, 32 UTF32BigEndian, 33 []byte{0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x6C, 0x00, 0x00, 0x00, 0x6C, 0x00, 0x00, 0x00, 0x6F}}, 34 {"8", []byte("\xFF\xFE\x00\x00\x68\x00\x00\x00\x65\x00\x00\x00\x6C\x00\x00\x00\x6C\x00\x00\x00\x6F\x00\x00\x00"), nil, 35 UTF32LittleEndian, 36 []byte{0x68, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x6C, 0x00, 0x00, 0x00, 0x6C, 0x00, 0x00, 0x00, 0x6F, 0x00, 0x00, 0x00}}, 37 {"9", []byte("\xEF"), nil, Unknown, []byte("\xEF")}, 38 {"10", []byte("\xEF\xBB"), nil, Unknown, []byte("\xEF\xBB")}, 39 {"11", []byte("\xEF\xBB\xBF"), io.ErrClosedPipe, UTF8, []byte{}}, 40 {"12", []byte("\xFE\xFF"), io.ErrClosedPipe, Unknown, []byte("\xFE\xFF")}, 41 {"13", []byte("\xFE"), io.ErrClosedPipe, Unknown, []byte("\xFE")}, 42 {"14", []byte("\xFF\xFE"), io.ErrClosedPipe, Unknown, []byte("\xFF\xFE")}, 43 {"15", []byte("\x00\x00\xFE\xFF"), io.ErrClosedPipe, UTF32BigEndian, []byte{}}, 44 {"16", []byte("\x00\x00\xFE"), io.ErrClosedPipe, Unknown, []byte{0x00, 0x00, 0xFE}}, 45 {"17", []byte("\x00\x00"), io.ErrClosedPipe, Unknown, []byte{0x00, 0x00}}, 46 {"18", []byte("\x00"), io.ErrClosedPipe, Unknown, []byte{0x00}}, 47 {"19", []byte("\xFF\xFE\x00\x00"), io.ErrClosedPipe, UTF32LittleEndian, []byte{}}, 48 {"20", []byte("\xFF\xFE\x00"), io.ErrClosedPipe, Unknown, []byte{0xFF, 0xFE, 0x00}}, 49 {"21", []byte("\xFF\xFE"), io.ErrClosedPipe, Unknown, []byte{0xFF, 0xFE}}, 50 {"22", []byte("\xFF"), io.ErrClosedPipe, Unknown, []byte{0xFF}}, 51 {"23", []byte("\x68\x65"), nil, Unknown, []byte{0x68, 0x65}}, 52} 53 54type sliceReader struct { 55 input []byte 56 inputError error 57} 58 59func (r *sliceReader) Read(p []byte) (n int, err error) { 60 if len(p) == 0 { 61 return 62 } 63 64 if err = r.getError(); err != nil { 65 return 66 } 67 68 n = copy(p, r.input) 69 r.input = r.input[n:] 70 err = r.getError() 71 return 72} 73 74func (r *sliceReader) getError() (err error) { 75 if len(r.input) == 0 { 76 if r.inputError == nil { 77 err = io.EOF 78 } else { 79 err = r.inputError 80 } 81 } 82 return 83} 84 85var readMakers = []struct { 86 name string 87 fn func(io.Reader) io.Reader 88}{ 89 {"full", func(r io.Reader) io.Reader { return r }}, 90 {"byte", iotest.OneByteReader}, 91} 92 93func TestSkip(t *testing.T) { 94 for _, tc := range testCases { 95 for _, readMaker := range readMakers { 96 r := readMaker.fn(&sliceReader{tc.input, tc.inputError}) 97 98 sr, enc := Skip(r) 99 if enc != tc.encoding { 100 t.Fatalf("test %v reader=%s: expected encoding %v, but got %v", tc.name, readMaker.name, tc.encoding, enc) 101 } 102 103 output, err := ioutil.ReadAll(sr) 104 if !reflect.DeepEqual(output, tc.output) { 105 t.Fatalf("test %v reader=%s: expected to read %+#v, but got %+#v", tc.name, readMaker.name, tc.output, output) 106 } 107 if err != tc.inputError { 108 t.Fatalf("test %v reader=%s: expected to get %+#v error, but got %+#v", tc.name, readMaker.name, tc.inputError, err) 109 } 110 } 111 } 112} 113 114func TestSkipSkip(t *testing.T) { 115 for _, tc := range testCases { 116 for _, readMaker := range readMakers { 117 r := readMaker.fn(&sliceReader{tc.input, tc.inputError}) 118 119 sr0, _ := Skip(r) 120 sr, enc := Skip(sr0) 121 if enc != Unknown { 122 t.Fatalf("test %v reader=%s: expected encoding %v, but got %v", tc.name, readMaker.name, Unknown, enc) 123 } 124 125 output, err := ioutil.ReadAll(sr) 126 if !reflect.DeepEqual(output, tc.output) { 127 t.Fatalf("test %v reader=%s: expected to read %+#v, but got %+#v", tc.name, readMaker.name, tc.output, output) 128 } 129 if err != tc.inputError { 130 t.Fatalf("test %v reader=%s: expected to get %+#v error, but got %+#v", tc.name, readMaker.name, tc.inputError, err) 131 } 132 } 133 } 134} 135 136func TestSkipOnly(t *testing.T) { 137 for _, tc := range testCases { 138 for _, readMaker := range readMakers { 139 r := readMaker.fn(&sliceReader{tc.input, tc.inputError}) 140 141 sr := SkipOnly(r) 142 143 output, err := ioutil.ReadAll(sr) 144 if !reflect.DeepEqual(output, tc.output) { 145 t.Fatalf("test %v reader=%s: expected to read %+#v, but got %+#v", tc.name, readMaker.name, tc.output, output) 146 } 147 if err != tc.inputError { 148 t.Fatalf("test %v reader=%s: expected to get %+#v error, but got %+#v", tc.name, readMaker.name, tc.inputError, err) 149 } 150 } 151 } 152} 153 154type zeroReader struct{} 155 156func (zeroReader) Read(p []byte) (int, error) { 157 return 0, nil 158} 159 160type readerEncoding struct { 161 Rd *Reader 162 Enc Encoding 163} 164 165func TestSkipZeroReader(t *testing.T) { 166 var z zeroReader 167 168 c := make(chan readerEncoding) 169 go func() { 170 r, enc := Skip(z) 171 c <- readerEncoding{r, enc} 172 }() 173 174 select { 175 case re := <-c: 176 if re.Enc != Unknown { 177 t.Error("Unknown encoding expected") 178 } else { 179 var b [1]byte 180 n, err := re.Rd.Read(b[:]) 181 if n != 0 { 182 t.Error("unexpected bytes count:", n) 183 } 184 if err != io.ErrNoProgress { 185 t.Error("unexpected error:", err) 186 } 187 } 188 case <-time.After(time.Second): 189 t.Error("test timed out (endless loop in Skip?)") 190 } 191} 192 193func TestSkipOnlyZeroReader(t *testing.T) { 194 var z zeroReader 195 196 c := make(chan *Reader) 197 go func() { 198 r := SkipOnly(z) 199 c <- r 200 }() 201 202 select { 203 case r := <-c: 204 var b [1]byte 205 n, err := r.Read(b[:]) 206 if n != 0 { 207 t.Error("unexpected bytes count:", n) 208 } 209 if err != io.ErrNoProgress { 210 t.Error("unexpected error:", err) 211 } 212 case <-time.After(time.Second): 213 t.Error("test timed out (endless loop in Skip?)") 214 } 215} 216 217func TestReader_ReadEmpty(t *testing.T) { 218 for _, tc := range testCases { 219 for _, readMaker := range readMakers { 220 r := readMaker.fn(&sliceReader{tc.input, tc.inputError}) 221 222 sr := SkipOnly(r) 223 224 n, err := sr.Read(nil) 225 if n != 0 { 226 t.Fatalf("test %v reader=%s: expected to read zero bytes, but got %v", tc.name, readMaker.name, n) 227 } 228 if err != nil { 229 t.Fatalf("test %v reader=%s: expected to get <nil> error, but got %+#v", tc.name, readMaker.name, err) 230 } 231 } 232 } 233} 234 235func TestEncoding_String(t *testing.T) { 236 for e := Unknown; e <= UTF32LittleEndian; e++ { 237 s := e.String() 238 if s == "" { 239 t.Errorf("no string for %#v", e) 240 } 241 } 242 s := Encoding(999).String() 243 if s != "Unknown" { 244 t.Errorf("wrong string '%s' for invalid encoding", s) 245 } 246} 247