1// Copyright (c) 2010, Andrei Vieru. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5// The lzma package implements reading and writing of LZMA format compressed data. 6// Reference implementation is LZMA SDK version 4.65 originaly developed by Igor 7// Pavlov, available online at: 8// 9// http://www.7-zip.org/sdk.html 10// 11// 12// 13// Usage examples. Write compressed data to a buffer: 14// 15// var b bytes.Buffer 16// w := lzma.NewWriter(&b) 17// w.Write([]byte("hello, world\n")) 18// w.Close() 19// 20// read that data back: 21// 22// r := lzma.NewReader(&b) 23// io.Copy(os.Stdout, r) 24// r.Close() 25// 26// 27// 28// If the data is bigger than you'd like to hold into memory, use pipes. Write 29// compressed data to an io.PipeWriter: 30// 31// pr, pw := io.Pipe() 32// go func() { 33// defer pw.Close() 34// w := lzma.NewWriter(pw) 35// defer w.Close() 36// // the bytes.Buffer would be an io.Reader used to read uncompressed data from 37// io.Copy(w, bytes.NewBuffer([]byte("hello, world\n"))) 38// }() 39// 40// and read it back: 41// 42// defer pr.Close() 43// r := lzma.NewReader(pr) 44// defer r.Close() 45// // the os.Stdout would be an io.Writer used to write uncompressed data to 46// io.Copy(os.Stdout, r) 47// 48// 49// 50package lzma 51 52import ( 53 "errors" 54 "io" 55) 56 57const ( 58 inBufSize = 1 << 16 59 outBufSize = 1 << 16 60 lzmaPropSize = 5 61 lzmaHeaderSize = lzmaPropSize + 8 62 lzmaMaxReqInputSize = 20 63 64 kNumRepDistances = 4 65 kNumStates = 12 66 kNumPosSlotBits = 6 67 kDicLogSizeMin = 0 68 kNumLenToPosStatesBits = 2 69 kNumLenToPosStates = 1 << kNumLenToPosStatesBits 70 kMatchMinLen = 2 71 kNumAlignBits = 4 72 kAlignTableSize = 1 << kNumAlignBits 73 kAlignMask = kAlignTableSize - 1 74 kStartPosModelIndex = 4 75 kEndPosModelIndex = 14 76 kNumPosModels = kEndPosModelIndex - kStartPosModelIndex 77 kNumFullDistances = 1 << (kEndPosModelIndex / 2) 78 kNumLitPosStatesBitsEncodingMax = 4 79 kNumLitContextBitsMax = 8 80 kNumPosStatesBitsMax = 4 81 kNumPosStatesMax = 1 << kNumPosStatesBitsMax 82 kNumLowLenBits = 3 83 kNumMidLenBits = 3 84 kNumHighLenBits = 8 85 kNumLowLenSymbols = 1 << kNumLowLenBits 86 kNumMidLenSymbols = 1 << kNumMidLenBits 87 kNumLenSymbols = kNumLowLenSymbols + kNumMidLenSymbols + (1 << kNumHighLenBits) 88 kMatchMaxLen = kMatchMinLen + kNumLenSymbols - 1 89) 90 91// A streamError reports the presence of corrupt input stream. 92var streamError = errors.New("error in lzma encoded data stream") 93 94// A headerError reports an error in the header of the lzma encoder file. 95var headerError = errors.New("error in lzma header") 96 97// A nReadError reports what its message reads 98var nReadError = errors.New("number of bytes returned by Reader.Read() didn't meet expectances") 99 100// A nWriteError reports what its message reads 101var nWriteError = errors.New("number of bytes returned by Writer.Write() didn't meet expectances") 102 103// TODO: implement this err 104// A dataIntegrityError reports an error encountered while cheching data integrity. 105// -- from lzma.txt: 106// You can use multiple checks to test data integrity after full decompression: 107// 1) Check Result and "status" variable. 108// 2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize. 109// 3) Check that output(srcLen) = compressedSize, if you know real compressedSize. 110// You must use correct finish mode in that case. 111// 112//type dataIntegrityError struct { 113// msg string 114// // hz 115//} 116 117func stateUpdateChar(index uint32) uint32 { 118 if index < 4 { 119 return 0 120 } 121 if index < 10 { 122 return index - 3 123 } 124 return index - 6 125} 126 127func stateUpdateMatch(index uint32) uint32 { 128 if index < 7 { 129 return 7 130 } 131 return 10 132} 133 134func stateUpdateRep(index uint32) uint32 { 135 if index < 7 { 136 return 8 137 } 138 return 11 139} 140 141func stateUpdateShortRep(index uint32) uint32 { 142 if index < 7 { 143 return 9 144 } 145 return 11 146} 147 148func stateIsCharState(index uint32) bool { 149 if index < 7 { 150 return true 151 } 152 return false 153} 154 155func getLenToPosState(length uint32) uint32 { 156 length -= kMatchMinLen 157 if length < kNumLenToPosStates { 158 return length 159 } 160 return kNumLenToPosStates - 1 161} 162 163// LZMA compressed file format 164// --------------------------- 165// Offset Size Description 166// 0 1 Special LZMA properties (lc,lp, pb in encoded form) 167// 1 4 Dictionary size (little endian) 168// 5 8 Uncompressed size (little endian). Size -1 stands for unknown size 169 170// lzma properties 171type props struct { 172 litContextBits, // lc 173 litPosStateBits, // lp 174 posStateBits uint8 // pb 175 dictSize uint32 176} 177 178func (p *props) decodeProps(buf []byte) { 179 d := buf[0] 180 if d > (9 * 5 * 5) { 181 throw(headerError) 182 } 183 p.litContextBits = d % 9 184 d /= 9 185 p.posStateBits = d / 5 186 p.litPosStateBits = d % 5 187 if p.litContextBits > kNumLitContextBitsMax || p.litPosStateBits > 4 || p.posStateBits > kNumPosStatesBitsMax { 188 throw(headerError) 189 } 190 for i := 0; i < 4; i++ { 191 p.dictSize += uint32(buf[i+1]) << uint32(i*8) 192 } 193} 194 195type decoder struct { 196 // i/o 197 rd *rangeDecoder // r 198 outWin *lzOutWindow // w 199 200 // lzma header 201 prop *props 202 unpackSize int64 203 204 // hz 205 matchDecoders []uint16 206 repDecoders []uint16 207 repG0Decoders []uint16 208 repG1Decoders []uint16 209 repG2Decoders []uint16 210 rep0LongDecoders []uint16 211 posSlotCoders []*rangeBitTreeCoder 212 posDecoders []uint16 213 posAlignCoder *rangeBitTreeCoder 214 lenCoder *lenCoder 215 repLenCoder *lenCoder 216 litCoder *litCoder 217 dictSizeCheck uint32 218 posStateMask uint32 219} 220 221func (z *decoder) doDecode() { 222 var state uint32 = 0 223 var rep0 uint32 = 0 224 var rep1 uint32 = 0 225 var rep2 uint32 = 0 226 var rep3 uint32 = 0 227 var nowPos uint64 = 0 228 var prevByte byte = 0 229 230 for z.unpackSize < 0 || int64(nowPos) < z.unpackSize { 231 posState := uint32(nowPos) & z.posStateMask 232 if z.rd.decodeBit(z.matchDecoders, state<<kNumPosStatesBitsMax+posState) == 0 { 233 lsc := z.litCoder.getSubCoder(uint32(nowPos), prevByte) 234 if !stateIsCharState(state) { 235 prevByte = lsc.decodeWithMatchByte(z.rd, z.outWin.getByte(rep0)) 236 } else { 237 prevByte = lsc.decodeNormal(z.rd) 238 } 239 z.outWin.putByte(prevByte) 240 state = stateUpdateChar(state) 241 nowPos++ 242 } else { 243 var length uint32 244 if z.rd.decodeBit(z.repDecoders, state) == 1 { 245 length = 0 246 if z.rd.decodeBit(z.repG0Decoders, state) == 0 { 247 if z.rd.decodeBit(z.rep0LongDecoders, state<<kNumPosStatesBitsMax+posState) == 0 { 248 state = stateUpdateShortRep(state) 249 length = 1 250 } 251 } else { 252 var distance uint32 253 if z.rd.decodeBit(z.repG1Decoders, state) == 0 { 254 distance = rep1 255 } else { 256 if z.rd.decodeBit(z.repG2Decoders, state) == 0 { 257 distance = rep2 258 } else { 259 distance, rep3 = rep3, rep2 260 } 261 rep2 = rep1 262 } 263 rep1, rep0 = rep0, distance 264 } 265 if length == 0 { 266 length = z.repLenCoder.decode(z.rd, posState) + kMatchMinLen 267 state = stateUpdateRep(state) 268 } 269 } else { 270 rep3, rep2, rep1 = rep2, rep1, rep0 271 length = z.lenCoder.decode(z.rd, posState) + kMatchMinLen 272 state = stateUpdateMatch(state) 273 posSlot := z.posSlotCoders[getLenToPosState(length)].decode(z.rd) 274 if posSlot >= kStartPosModelIndex { 275 numDirectBits := posSlot>>1 - 1 276 rep0 = (2 | posSlot&1) << numDirectBits 277 if posSlot < kEndPosModelIndex { 278 rep0 += reverseDecodeIndex(z.rd, z.posDecoders, rep0-posSlot-1, numDirectBits) 279 } else { 280 rep0 += z.rd.decodeDirectBits(numDirectBits-kNumAlignBits) << kNumAlignBits 281 rep0 += z.posAlignCoder.reverseDecode(z.rd) 282 if int32(rep0) < 0 { 283 if rep0 == 0xFFFFFFFF { 284 break 285 } 286 throw(streamError) 287 } 288 } 289 } else { 290 rep0 = posSlot 291 } 292 } 293 if uint64(rep0) >= nowPos || rep0 >= z.dictSizeCheck { 294 throw(streamError) 295 } 296 z.outWin.copyBlock(rep0, length) 297 nowPos += uint64(length) 298 prevByte = z.outWin.getByte(0) 299 } 300 } 301 z.outWin.flush() 302 //if z.unpackSize != -1 { 303 // if z.outWin.unpacked != z.unpackSize { 304 // throw(&dataIntegrityError{}) 305 // } 306 //} 307} 308 309func (z *decoder) decoder(r io.Reader, w io.Writer) (err error) { 310 defer handlePanics(&err) 311 312 // read 13 bytes (lzma header) 313 header := make([]byte, lzmaHeaderSize) 314 n, err := r.Read(header) 315 if err != nil { 316 return 317 } 318 if n != lzmaHeaderSize { 319 return nReadError 320 } 321 z.prop = &props{} 322 z.prop.decodeProps(header) 323 324 z.unpackSize = 0 325 for i := 0; i < 8; i++ { 326 b := header[lzmaPropSize+i] 327 z.unpackSize = z.unpackSize | int64(b)<<uint64(8*i) 328 } 329 330 // do not move before r.Read(header) 331 z.rd = newRangeDecoder(r) 332 333 z.dictSizeCheck = maxUInt32(z.prop.dictSize, 1) 334 z.outWin = newLzOutWindow(w, maxUInt32(z.dictSizeCheck, 1<<12)) 335 336 z.litCoder = newLitCoder(uint32(z.prop.litPosStateBits), uint32(z.prop.litContextBits)) 337 z.lenCoder = newLenCoder(uint32(1 << z.prop.posStateBits)) 338 z.repLenCoder = newLenCoder(uint32(1 << z.prop.posStateBits)) 339 z.posStateMask = uint32(1<<z.prop.posStateBits - 1) 340 z.matchDecoders = initBitModels(kNumStates << kNumPosStatesBitsMax) 341 z.repDecoders = initBitModels(kNumStates) 342 z.repG0Decoders = initBitModels(kNumStates) 343 z.repG1Decoders = initBitModels(kNumStates) 344 z.repG2Decoders = initBitModels(kNumStates) 345 z.rep0LongDecoders = initBitModels(kNumStates << kNumPosStatesBitsMax) 346 z.posDecoders = initBitModels(kNumFullDistances - kEndPosModelIndex) 347 z.posSlotCoders = make([]*rangeBitTreeCoder, kNumLenToPosStates) 348 for i := 0; i < kNumLenToPosStates; i++ { 349 z.posSlotCoders[i] = newRangeBitTreeCoder(kNumPosSlotBits) 350 } 351 z.posAlignCoder = newRangeBitTreeCoder(kNumAlignBits) 352 353 z.doDecode() 354 return 355} 356 357// NewReader returns a new ReadCloser that can be used to read the uncompressed 358// version of r. It is the caller's responsibility to call Close on the ReadCloser 359// when finished reading. 360// 361func NewReader(r io.Reader) io.ReadCloser { 362 var z decoder 363 pr, pw := io.Pipe() 364 go func() { 365 err := z.decoder(r, pw) 366 pw.CloseWithError(err) 367 }() 368 return pr 369} 370