1// Copyright 2015, Joe Tsai. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE.md file.
4
5package meta
6
7import (
8	"bytes"
9	"io"
10
11	"github.com/dsnet/compress/internal/errors"
12	"github.com/dsnet/compress/internal/prefix"
13)
14
15// A Reader is an io.Reader that can read XFLATE's meta encoding.
16// The zero value of Reader is valid once Reset is called.
17type Reader struct {
18	InputOffset  int64 // Total number of bytes read from underlying io.Reader
19	OutputOffset int64 // Total number of bytes emitted from Read
20	NumBlocks    int64 // Number of blocks decoded
21
22	// FinalMode indicates which final bits (if any) were set.
23	// This will be valid after a call to Close or upon hitting io.EOF.
24	FinalMode FinalMode
25
26	rd *prefix.Reader
27	br prefix.Reader // Pre-allocated prefix.Reader to wrap input Reader
28	bw prefix.Writer // Temporary bit writer
29	bb bytes.Buffer  // Buffer for bw to write into
30
31	final FinalMode
32	buf   []byte
33	err   error
34}
35
36// NewReader creates a new Reader reading from the given reader.
37// If rd does not also implement compress.ByteReader or compress.BufferedReader,
38// then the decoder may read more data than necessary from rd.
39func NewReader(rd io.Reader) *Reader {
40	mr := new(Reader)
41	mr.Reset(rd)
42	return mr
43}
44
45// Reset discards the Reader's state and makes it equivalent to the result
46// of a call to NewReader, but reading from rd instead.
47//
48// This is used to reduce memory allocations.
49func (mr *Reader) Reset(rd io.Reader) {
50	*mr = Reader{
51		br: mr.br,
52		bw: mr.bw,
53		bb: mr.bb,
54	}
55	if br, ok := rd.(*prefix.Reader); ok {
56		// Use input Reader directly as a prefix.Reader.
57		mr.rd = br
58	} else {
59		// Use pre-allocated prefix.Reader to wrap input Reader.
60		mr.rd = &mr.br
61		mr.rd.Init(rd, false)
62	}
63	return
64}
65
66// Read reads the decoded meta data from the underlying io.Reader.
67// This returns io.EOF either when a meta block with final bits set is found or
68// when io.EOF is hit in the underlying reader.
69func (mr *Reader) Read(buf []byte) (int, error) {
70	if mr.err != nil {
71		return 0, mr.err
72	}
73
74	var rdCnt int
75	for len(buf) > 0 {
76		if len(mr.buf) > 0 {
77			cpCnt := copy(buf, mr.buf)
78			mr.buf = mr.buf[cpCnt:]
79			rdCnt += cpCnt
80			break
81		}
82
83		if mr.final != FinalNil {
84			mr.FinalMode = mr.final
85			mr.err = io.EOF
86			break
87		}
88
89		mr.err = mr.decodeBlock()
90		if mr.err != nil {
91			break
92		}
93	}
94
95	mr.OutputOffset += int64(rdCnt)
96	return rdCnt, mr.err
97}
98
99// Close ends the meta stream.
100// The FinalMode encountered becomes valid after calling Close.
101func (mr *Reader) Close() error {
102	if mr.err == errClosed {
103		return nil
104	}
105	if mr.err != nil && mr.err != io.EOF {
106		return mr.err
107	}
108
109	mr.FinalMode = mr.final
110	mr.err = errClosed
111	mr.rd = nil // Release reference to underlying Reader
112	return nil
113}
114
115// decodeBlock decodes a single meta block from the underlying Reader
116// into mr.buf and sets mr.final based on the block's final bits.
117// It also manages the statistic variables: InputOffset and NumBlocks.
118func (mr *Reader) decodeBlock() (err error) {
119	defer errors.Recover(&err)
120
121	// Update the number of bytes read from underlying Reader.
122	offset := mr.rd.Offset
123	defer func() {
124		if _, errFl := mr.rd.Flush(); errFl != nil {
125			err = errFl
126		}
127		mr.InputOffset += mr.rd.Offset - offset
128	}()
129
130	mr.bb.Reset()
131	mr.bw.Init(&mr.bb, false)
132
133	if err := mr.rd.PullBits(1); err != nil {
134		if err == io.ErrUnexpectedEOF {
135			return io.EOF // EOF is okay for first bit
136		}
137		return err
138	}
139	magic := mr.rd.ReadBits(32)
140	if uint32(magic)&magicMask != magicVals {
141		return errorf(errors.Corrupted, "invalid meta magic value")
142	}
143
144	// Decode header.
145	var fail bool
146	finalStream := (magic>>0)&1 > 0
147	pads := (magic >> 3) & 7       // 0..7
148	numHCLen := 4 + (magic>>13)&15 // 6..18, always even
149	fail = fail || numHCLen < 6
150	for i := uint(5); i < numHCLen-1; i++ {
151		fail = fail || mr.rd.ReadBits(3) != 0 // Empty HCLen code
152	}
153	fail = fail || mr.rd.ReadBits(3) != 2 // Final HCLen code
154	fail = fail || mr.rd.ReadBits(1) != 0 // First symbol always symZero
155	if fail {
156		return errorf(errors.Corrupted, "invalid meta header")
157	}
158
159	huffLen := 8 - (numHCLen-4)/2 // Based on XFLATE specification
160	huffRange := 1 << huffLen
161
162	// Read symbols.
163	var bit, ones uint
164	fifo := byte(0xff)
165	mr.bw.WriteBits(0, 1) // First symbol is symZero
166	for idx := 0; idx < maxSyms-1; {
167		cnt := 1
168		sym, ok := mr.rd.TryReadSymbol(&decHuff)
169		if !ok {
170			sym = mr.rd.ReadSymbol(&decHuff)
171		}
172		switch sym {
173		case symZero:
174			bit = 0
175			fifo = (fifo >> 1) | byte(0<<7)
176		case symOne:
177			bit = 1
178			fifo = (fifo >> 2) | byte(1<<6)
179		case symRepLast:
180			val, ok := mr.rd.TryReadBits(2)
181			if !ok {
182				val = mr.rd.ReadBits(2)
183			}
184			cnt = int(val + minRepLast)
185			fifo = (fifo >> 3) | byte(3<<5)
186			fifo = (fifo >> 2) | byte(val<<6)
187		case symRepZero:
188			bit = 0
189			val, ok := mr.rd.TryReadBits(7)
190			if !ok {
191				val = mr.rd.ReadBits(7)
192			}
193			cnt = int(val + minRepZero)
194			fifo = (fifo >> 3) | byte(7<<5)
195			fifo = (fifo >> 7) | byte(val<<1)
196		}
197
198		if fifo == 0x00 {
199			// The specification forbids a sequence of 8 zero bits to appear
200			// in the data section. This ensures that the magic value never
201			// appears in the meta encoding by accident.
202			return errorf(errors.Corrupted, "invalid sequence of meta symbols")
203		}
204		for i := 0; i < cnt; i++ {
205			if ok := mr.bw.TryWriteBits(bit, 1); !ok {
206				mr.bw.WriteBits(bit, 1)
207			}
208			ones += bit
209		}
210		idx += cnt
211	}
212	if mr.bw.BitsWritten() != maxSyms {
213		return errorf(errors.Corrupted, "excessive number of meta symbols")
214	}
215	mr.bw.WriteBits(0, numPads(maxSyms)) // Flush to byte boundary
216
217	// Decode data segment.
218	const idxEOB = maxSyms - 1
219	mr.bw.Flush()
220	syms := mr.bb.Bytes() // Exactly 33 bytes
221	if int(ones) != huffRange {
222		return errorf(errors.Corrupted, "degenerate meta prefix tree")
223	}
224	if syms[idxEOB/8]&(1<<(idxEOB%8)) == 0 {
225		return errorf(errors.Corrupted, "missing meta terminator symbol")
226	}
227
228	flags := syms[0]
229	finalMeta := (flags>>1)&1 > 0
230	invert := (flags>>2)&1 > 0
231	size := (flags >> 3) & 31 // 0..31
232
233	buf := syms[1 : 1+size] // Skip first header byte
234	if invert {
235		for i, b := range buf {
236			buf[i] = ^b
237		}
238	}
239
240	final := FinalMode(btoi(finalMeta) + btoi(finalStream))
241	if finalStream && !finalMeta {
242		return errorf(errors.Corrupted, "invalid combination of final bits")
243	}
244
245	// Decode footer.
246	fail = fail || mr.rd.ReadBits(pads) > 0                     // Pads must be zero
247	fail = fail || mr.rd.ReadBits(1) > 0                        // HDistTree must be empty
248	fail = fail || mr.rd.ReadBits(huffLen) != uint(huffRange-1) // EOB marker
249	fail = fail || mr.rd.BitsRead()%8 > 0                       // Bit reader not byte-aligned
250	if fail {
251		return errorf(errors.Corrupted, "invalid meta footer")
252	}
253
254	mr.buf, mr.final = buf, final
255	mr.NumBlocks++
256	return nil
257}
258