1// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package lzw
6
7import (
8	"bytes"
9	"fmt"
10	"io"
11	"io/ioutil"
12	"math"
13	"runtime"
14	"strconv"
15	"strings"
16	"testing"
17)
18
19type lzwTest struct {
20	desc       string
21	raw        string
22	compressed string
23	err        error
24}
25
26var lzwTests = []lzwTest{
27	{
28		"empty;LSB;8",
29		"",
30		"\x01\x01",
31		nil,
32	},
33	{
34		"empty;MSB;8",
35		"",
36		"\x80\x80",
37		nil,
38	},
39	{
40		"tobe;LSB;7",
41		"TOBEORNOTTOBEORTOBEORNOT",
42		"\x54\x4f\x42\x45\x4f\x52\x4e\x4f\x54\x82\x84\x86\x8b\x85\x87\x89\x81",
43		nil,
44	},
45	{
46		"tobe;LSB;8",
47		"TOBEORNOTTOBEORTOBEORNOT",
48		"\x54\x9e\x08\x29\xf2\x44\x8a\x93\x27\x54\x04\x12\x34\xb8\xb0\xe0\xc1\x84\x01\x01",
49		nil,
50	},
51	{
52		"tobe;MSB;7",
53		"TOBEORNOTTOBEORTOBEORNOT",
54		"\x54\x4f\x42\x45\x4f\x52\x4e\x4f\x54\x82\x84\x86\x8b\x85\x87\x89\x81",
55		nil,
56	},
57	{
58		"tobe;MSB;8",
59		"TOBEORNOTTOBEORTOBEORNOT",
60		"\x2a\x13\xc8\x44\x52\x79\x48\x9c\x4f\x2a\x40\xa0\x90\x68\x5c\x16\x0f\x09\x80\x80",
61		nil,
62	},
63	{
64		"tobe-truncated;LSB;8",
65		"TOBEORNOTTOBEORTOBEORNOT",
66		"\x54\x9e\x08\x29\xf2\x44\x8a\x93\x27\x54\x04",
67		io.ErrUnexpectedEOF,
68	},
69	// This example comes from http://en.wikipedia.org/wiki/Graphics_Interchange_Format.
70	{
71		"gif;LSB;8",
72		"\x28\xff\xff\xff\x28\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff",
73		"\x00\x51\xfc\x1b\x28\x70\xa0\xc1\x83\x01\x01",
74		nil,
75	},
76	// This example comes from http://compgroups.net/comp.lang.ruby/Decompressing-LZW-compression-from-PDF-file
77	{
78		"pdf;MSB;8",
79		"-----A---B",
80		"\x80\x0b\x60\x50\x22\x0c\x0c\x85\x01",
81		nil,
82	},
83}
84
85func TestReader(t *testing.T) {
86	var b bytes.Buffer
87	for _, tt := range lzwTests {
88		d := strings.Split(tt.desc, ";")
89		var order Order
90		switch d[1] {
91		case "LSB":
92			order = LSB
93		case "MSB":
94			order = MSB
95		default:
96			t.Errorf("%s: bad order %q", tt.desc, d[1])
97		}
98		litWidth, _ := strconv.Atoi(d[2])
99		rc := NewReader(strings.NewReader(tt.compressed), order, litWidth)
100		defer rc.Close()
101		b.Reset()
102		n, err := io.Copy(&b, rc)
103		s := b.String()
104		if err != nil {
105			if err != tt.err {
106				t.Errorf("%s: io.Copy: %v want %v", tt.desc, err, tt.err)
107			}
108			if err == io.ErrUnexpectedEOF {
109				// Even if the input is truncated, we should still return the
110				// partial decoded result.
111				if n == 0 || !strings.HasPrefix(tt.raw, s) {
112					t.Errorf("got %d bytes (%q), want a non-empty prefix of %q", n, s, tt.raw)
113				}
114			}
115			continue
116		}
117		if s != tt.raw {
118			t.Errorf("%s: got %d-byte %q want %d-byte %q", tt.desc, n, s, len(tt.raw), tt.raw)
119		}
120	}
121}
122
123type devZero struct{}
124
125func (devZero) Read(p []byte) (int, error) {
126	for i := range p {
127		p[i] = 0
128	}
129	return len(p), nil
130}
131
132func TestHiCodeDoesNotOverflow(t *testing.T) {
133	r := NewReader(devZero{}, LSB, 8)
134	d := r.(*decoder)
135	buf := make([]byte, 1024)
136	oldHi := uint16(0)
137	for i := 0; i < 100; i++ {
138		if _, err := io.ReadFull(r, buf); err != nil {
139			t.Fatalf("i=%d: %v", i, err)
140		}
141		// The hi code should never decrease.
142		if d.hi < oldHi {
143			t.Fatalf("i=%d: hi=%d decreased from previous value %d", i, d.hi, oldHi)
144		}
145		oldHi = d.hi
146	}
147}
148
149// TestNoLongerSavingPriorExpansions tests the decoder state when codes other
150// than clear codes continue to be seen after decoder.hi and decoder.width
151// reach their maximum values (4095 and 12), i.e. after we no longer save prior
152// expansions. In particular, it tests seeing the highest possible code, 4095.
153func TestNoLongerSavingPriorExpansions(t *testing.T) {
154	// Iterations is used to calculate how many input bits are needed to get
155	// the decoder.hi and decoder.width values up to their maximum.
156	iterations := []struct {
157		width, n int
158	}{
159		// The final term is 257, not 256, as NewReader initializes d.hi to
160		// d.clear+1 and the clear code is 256.
161		{9, 512 - 257},
162		{10, 1024 - 512},
163		{11, 2048 - 1024},
164		{12, 4096 - 2048},
165	}
166	nCodes, nBits := 0, 0
167	for _, e := range iterations {
168		nCodes += e.n
169		nBits += e.n * e.width
170	}
171	if nCodes != 3839 {
172		t.Fatalf("nCodes: got %v, want %v", nCodes, 3839)
173	}
174	if nBits != 43255 {
175		t.Fatalf("nBits: got %v, want %v", nBits, 43255)
176	}
177
178	// Construct our input of 43255 zero bits (which gets d.hi and d.width up
179	// to 4095 and 12), followed by 0xfff (4095) as 12 bits, followed by 0x101
180	// (EOF) as 12 bits.
181	//
182	// 43255 = 5406*8 + 7, and codes are read in LSB order. The final bytes are
183	// therefore:
184	//
185	// xwwwwwww xxxxxxxx yyyyyxxx zyyyyyyy
186	// 10000000 11111111 00001111 00001000
187	//
188	// or split out:
189	//
190	// .0000000 ........ ........ ........   w = 0x000
191	// 1....... 11111111 .....111 ........   x = 0xfff
192	// ........ ........ 00001... .0001000   y = 0x101
193	//
194	// The 12 'w' bits (not all are shown) form the 3839'th code, with value
195	// 0x000. Just after decoder.read returns that code, d.hi == 4095 and
196	// d.last == 0.
197	//
198	// The 12 'x' bits form the 3840'th code, with value 0xfff or 4095. Just
199	// after decoder.read returns that code, d.hi == 4095 and d.last ==
200	// decoderInvalidCode.
201	//
202	// The 12 'y' bits form the 3841'st code, with value 0x101, the EOF code.
203	//
204	// The 'z' bit is unused.
205	in := make([]byte, 5406)
206	in = append(in, 0x80, 0xff, 0x0f, 0x08)
207
208	r := NewReader(bytes.NewReader(in), LSB, 8)
209	nDecoded, err := io.Copy(ioutil.Discard, r)
210	if err != nil {
211		t.Fatalf("Copy: %v", err)
212	}
213	// nDecoded should be 3841: 3839 literal codes and then 2 decoded bytes
214	// from 1 non-literal code. The EOF code contributes 0 decoded bytes.
215	if nDecoded != int64(nCodes+2) {
216		t.Fatalf("nDecoded: got %v, want %v", nDecoded, nCodes+2)
217	}
218}
219
220func BenchmarkDecoder(b *testing.B) {
221	buf, err := ioutil.ReadFile("../testdata/e.txt")
222	if err != nil {
223		b.Fatal(err)
224	}
225	if len(buf) == 0 {
226		b.Fatalf("test file has no data")
227	}
228
229	for e := 4; e <= 6; e++ {
230		n := int(math.Pow10(e))
231		b.Run(fmt.Sprint("1e", e), func(b *testing.B) {
232			b.StopTimer()
233			b.SetBytes(int64(n))
234			buf0 := buf
235			compressed := new(bytes.Buffer)
236			w := NewWriter(compressed, LSB, 8)
237			for i := 0; i < n; i += len(buf0) {
238				if len(buf0) > n-i {
239					buf0 = buf0[:n-i]
240				}
241				w.Write(buf0)
242			}
243			w.Close()
244			buf1 := compressed.Bytes()
245			buf0, compressed, w = nil, nil, nil
246			runtime.GC()
247			b.StartTimer()
248			for i := 0; i < b.N; i++ {
249				io.Copy(ioutil.Discard, NewReader(bytes.NewReader(buf1), LSB, 8))
250			}
251		})
252	}
253}
254