1// Copyright 2015, Joe Tsai. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE.md file. 4 5package meta 6 7import ( 8 "bytes" 9 "io" 10 11 "github.com/dsnet/compress/internal/errors" 12 "github.com/dsnet/compress/internal/prefix" 13) 14 15// A Reader is an io.Reader that can read XFLATE's meta encoding. 16// The zero value of Reader is valid once Reset is called. 17type Reader struct { 18 InputOffset int64 // Total number of bytes read from underlying io.Reader 19 OutputOffset int64 // Total number of bytes emitted from Read 20 NumBlocks int64 // Number of blocks decoded 21 22 // FinalMode indicates which final bits (if any) were set. 23 // This will be valid after a call to Close or upon hitting io.EOF. 24 FinalMode FinalMode 25 26 rd *prefix.Reader 27 br prefix.Reader // Pre-allocated prefix.Reader to wrap input Reader 28 bw prefix.Writer // Temporary bit writer 29 bb bytes.Buffer // Buffer for bw to write into 30 31 final FinalMode 32 buf []byte 33 err error 34} 35 36// NewReader creates a new Reader reading from the given reader. 37// If rd does not also implement compress.ByteReader or compress.BufferedReader, 38// then the decoder may read more data than necessary from rd. 39func NewReader(rd io.Reader) *Reader { 40 mr := new(Reader) 41 mr.Reset(rd) 42 return mr 43} 44 45// Reset discards the Reader's state and makes it equivalent to the result 46// of a call to NewReader, but reading from rd instead. 47// 48// This is used to reduce memory allocations. 49func (mr *Reader) Reset(rd io.Reader) { 50 *mr = Reader{ 51 br: mr.br, 52 bw: mr.bw, 53 bb: mr.bb, 54 } 55 if br, ok := rd.(*prefix.Reader); ok { 56 // Use input Reader directly as a prefix.Reader. 57 mr.rd = br 58 } else { 59 // Use pre-allocated prefix.Reader to wrap input Reader. 60 mr.rd = &mr.br 61 mr.rd.Init(rd, false) 62 } 63 return 64} 65 66// Read reads the decoded meta data from the underlying io.Reader. 67// This returns io.EOF either when a meta block with final bits set is found or 68// when io.EOF is hit in the underlying reader. 69func (mr *Reader) Read(buf []byte) (int, error) { 70 if mr.err != nil { 71 return 0, mr.err 72 } 73 74 var rdCnt int 75 for len(buf) > 0 { 76 if len(mr.buf) > 0 { 77 cpCnt := copy(buf, mr.buf) 78 mr.buf = mr.buf[cpCnt:] 79 rdCnt += cpCnt 80 break 81 } 82 83 if mr.final != FinalNil { 84 mr.FinalMode = mr.final 85 mr.err = io.EOF 86 break 87 } 88 89 mr.err = mr.decodeBlock() 90 if mr.err != nil { 91 break 92 } 93 } 94 95 mr.OutputOffset += int64(rdCnt) 96 return rdCnt, mr.err 97} 98 99// Close ends the meta stream. 100// The FinalMode encountered becomes valid after calling Close. 101func (mr *Reader) Close() error { 102 if mr.err == errClosed { 103 return nil 104 } 105 if mr.err != nil && mr.err != io.EOF { 106 return mr.err 107 } 108 109 mr.FinalMode = mr.final 110 mr.err = errClosed 111 mr.rd = nil // Release reference to underlying Reader 112 return nil 113} 114 115// decodeBlock decodes a single meta block from the underlying Reader 116// into mr.buf and sets mr.final based on the block's final bits. 117// It also manages the statistic variables: InputOffset and NumBlocks. 118func (mr *Reader) decodeBlock() (err error) { 119 defer errors.Recover(&err) 120 121 // Update the number of bytes read from underlying Reader. 122 offset := mr.rd.Offset 123 defer func() { 124 if _, errFl := mr.rd.Flush(); errFl != nil { 125 err = errFl 126 } 127 mr.InputOffset += mr.rd.Offset - offset 128 }() 129 130 mr.bb.Reset() 131 mr.bw.Init(&mr.bb, false) 132 133 if err := mr.rd.PullBits(1); err != nil { 134 if err == io.ErrUnexpectedEOF { 135 return io.EOF // EOF is okay for first bit 136 } 137 return err 138 } 139 magic := mr.rd.ReadBits(32) 140 if uint32(magic)&magicMask != magicVals { 141 return errorf(errors.Corrupted, "invalid meta magic value") 142 } 143 144 // Decode header. 145 var fail bool 146 finalStream := (magic>>0)&1 > 0 147 pads := (magic >> 3) & 7 // 0..7 148 numHCLen := 4 + (magic>>13)&15 // 6..18, always even 149 fail = fail || numHCLen < 6 150 for i := uint(5); i < numHCLen-1; i++ { 151 fail = fail || mr.rd.ReadBits(3) != 0 // Empty HCLen code 152 } 153 fail = fail || mr.rd.ReadBits(3) != 2 // Final HCLen code 154 fail = fail || mr.rd.ReadBits(1) != 0 // First symbol always symZero 155 if fail { 156 return errorf(errors.Corrupted, "invalid meta header") 157 } 158 159 huffLen := 8 - (numHCLen-4)/2 // Based on XFLATE specification 160 huffRange := 1 << huffLen 161 162 // Read symbols. 163 var bit, ones uint 164 fifo := byte(0xff) 165 mr.bw.WriteBits(0, 1) // First symbol is symZero 166 for idx := 0; idx < maxSyms-1; { 167 cnt := 1 168 sym, ok := mr.rd.TryReadSymbol(&decHuff) 169 if !ok { 170 sym = mr.rd.ReadSymbol(&decHuff) 171 } 172 switch sym { 173 case symZero: 174 bit = 0 175 fifo = (fifo >> 1) | byte(0<<7) 176 case symOne: 177 bit = 1 178 fifo = (fifo >> 2) | byte(1<<6) 179 case symRepLast: 180 val, ok := mr.rd.TryReadBits(2) 181 if !ok { 182 val = mr.rd.ReadBits(2) 183 } 184 cnt = int(val + minRepLast) 185 fifo = (fifo >> 3) | byte(3<<5) 186 fifo = (fifo >> 2) | byte(val<<6) 187 case symRepZero: 188 bit = 0 189 val, ok := mr.rd.TryReadBits(7) 190 if !ok { 191 val = mr.rd.ReadBits(7) 192 } 193 cnt = int(val + minRepZero) 194 fifo = (fifo >> 3) | byte(7<<5) 195 fifo = (fifo >> 7) | byte(val<<1) 196 } 197 198 if fifo == 0x00 { 199 // The specification forbids a sequence of 8 zero bits to appear 200 // in the data section. This ensures that the magic value never 201 // appears in the meta encoding by accident. 202 return errorf(errors.Corrupted, "invalid sequence of meta symbols") 203 } 204 for i := 0; i < cnt; i++ { 205 if ok := mr.bw.TryWriteBits(bit, 1); !ok { 206 mr.bw.WriteBits(bit, 1) 207 } 208 ones += bit 209 } 210 idx += cnt 211 } 212 if mr.bw.BitsWritten() != maxSyms { 213 return errorf(errors.Corrupted, "excessive number of meta symbols") 214 } 215 mr.bw.WriteBits(0, numPads(maxSyms)) // Flush to byte boundary 216 217 // Decode data segment. 218 const idxEOB = maxSyms - 1 219 mr.bw.Flush() 220 syms := mr.bb.Bytes() // Exactly 33 bytes 221 if int(ones) != huffRange { 222 return errorf(errors.Corrupted, "degenerate meta prefix tree") 223 } 224 if syms[idxEOB/8]&(1<<(idxEOB%8)) == 0 { 225 return errorf(errors.Corrupted, "missing meta terminator symbol") 226 } 227 228 flags := syms[0] 229 finalMeta := (flags>>1)&1 > 0 230 invert := (flags>>2)&1 > 0 231 size := (flags >> 3) & 31 // 0..31 232 233 buf := syms[1 : 1+size] // Skip first header byte 234 if invert { 235 for i, b := range buf { 236 buf[i] = ^b 237 } 238 } 239 240 final := FinalMode(btoi(finalMeta) + btoi(finalStream)) 241 if finalStream && !finalMeta { 242 return errorf(errors.Corrupted, "invalid combination of final bits") 243 } 244 245 // Decode footer. 246 fail = fail || mr.rd.ReadBits(pads) > 0 // Pads must be zero 247 fail = fail || mr.rd.ReadBits(1) > 0 // HDistTree must be empty 248 fail = fail || mr.rd.ReadBits(huffLen) != uint(huffRange-1) // EOB marker 249 fail = fail || mr.rd.BitsRead()%8 > 0 // Bit reader not byte-aligned 250 if fail { 251 return errorf(errors.Corrupted, "invalid meta footer") 252 } 253 254 mr.buf, mr.final = buf, final 255 mr.NumBlocks++ 256 return nil 257} 258