1// Copyright 2014-2017 Ulrich Kunitz. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package lzma 6 7import ( 8 "errors" 9 "fmt" 10 "io" 11) 12 13const ( 14 // maximum size of compressed data in a chunk 15 maxCompressed = 1 << 16 16 // maximum size of uncompressed data in a chunk 17 maxUncompressed = 1 << 21 18) 19 20// chunkType represents the type of an LZMA2 chunk. Note that this 21// value is an internal representation and no actual encoding of a LZMA2 22// chunk header. 23type chunkType byte 24 25// Possible values for the chunk type. 26const ( 27 // end of stream 28 cEOS chunkType = iota 29 // uncompressed; reset dictionary 30 cUD 31 // uncompressed; no reset of dictionary 32 cU 33 // LZMA compressed; no reset 34 cL 35 // LZMA compressed; reset state 36 cLR 37 // LZMA compressed; reset state; new property value 38 cLRN 39 // LZMA compressed; reset state; new property value; reset dictionary 40 cLRND 41) 42 43// chunkTypeStrings provide a string representation for the chunk types. 44var chunkTypeStrings = [...]string{ 45 cEOS: "EOS", 46 cU: "U", 47 cUD: "UD", 48 cL: "L", 49 cLR: "LR", 50 cLRN: "LRN", 51 cLRND: "LRND", 52} 53 54// String returns a string representation of the chunk type. 55func (c chunkType) String() string { 56 if !(cEOS <= c && c <= cLRND) { 57 return "unknown" 58 } 59 return chunkTypeStrings[c] 60} 61 62// Actual encodings for the chunk types in the value. Note that the high 63// uncompressed size bits are stored in the header byte additionally. 64const ( 65 hEOS = 0 66 hUD = 1 67 hU = 2 68 hL = 1 << 7 69 hLR = 1<<7 | 1<<5 70 hLRN = 1<<7 | 1<<6 71 hLRND = 1<<7 | 1<<6 | 1<<5 72) 73 74// errHeaderByte indicates an unsupported value for the chunk header 75// byte. These bytes starts the variable-length chunk header. 76var errHeaderByte = errors.New("lzma: unsupported chunk header byte") 77 78// headerChunkType converts the header byte into a chunk type. It 79// ignores the uncompressed size bits in the chunk header byte. 80func headerChunkType(h byte) (c chunkType, err error) { 81 if h&hL == 0 { 82 // no compression 83 switch h { 84 case hEOS: 85 c = cEOS 86 case hUD: 87 c = cUD 88 case hU: 89 c = cU 90 default: 91 return 0, errHeaderByte 92 } 93 return 94 } 95 switch h & hLRND { 96 case hL: 97 c = cL 98 case hLR: 99 c = cLR 100 case hLRN: 101 c = cLRN 102 case hLRND: 103 c = cLRND 104 default: 105 return 0, errHeaderByte 106 } 107 return 108} 109 110// uncompressedHeaderLen provides the length of an uncompressed header 111const uncompressedHeaderLen = 3 112 113// headerLen returns the length of the LZMA2 header for a given chunk 114// type. 115func headerLen(c chunkType) int { 116 switch c { 117 case cEOS: 118 return 1 119 case cU, cUD: 120 return uncompressedHeaderLen 121 case cL, cLR: 122 return 5 123 case cLRN, cLRND: 124 return 6 125 } 126 panic(fmt.Errorf("unsupported chunk type %d", c)) 127} 128 129// chunkHeader represents the contents of a chunk header. 130type chunkHeader struct { 131 ctype chunkType 132 uncompressed uint32 133 compressed uint16 134 props Properties 135} 136 137// String returns a string representation of the chunk header. 138func (h *chunkHeader) String() string { 139 return fmt.Sprintf("%s %d %d %s", h.ctype, h.uncompressed, 140 h.compressed, &h.props) 141} 142 143// UnmarshalBinary reads the content of the chunk header from the data 144// slice. The slice must have the correct length. 145func (h *chunkHeader) UnmarshalBinary(data []byte) error { 146 if len(data) == 0 { 147 return errors.New("no data") 148 } 149 c, err := headerChunkType(data[0]) 150 if err != nil { 151 return err 152 } 153 154 n := headerLen(c) 155 if len(data) < n { 156 return errors.New("incomplete data") 157 } 158 if len(data) > n { 159 return errors.New("invalid data length") 160 } 161 162 *h = chunkHeader{ctype: c} 163 if c == cEOS { 164 return nil 165 } 166 167 h.uncompressed = uint32(uint16BE(data[1:3])) 168 if c <= cU { 169 return nil 170 } 171 h.uncompressed |= uint32(data[0]&^hLRND) << 16 172 173 h.compressed = uint16BE(data[3:5]) 174 if c <= cLR { 175 return nil 176 } 177 178 h.props, err = PropertiesForCode(data[5]) 179 return err 180} 181 182// MarshalBinary encodes the chunk header value. The function checks 183// whether the content of the chunk header is correct. 184func (h *chunkHeader) MarshalBinary() (data []byte, err error) { 185 if h.ctype > cLRND { 186 return nil, errors.New("invalid chunk type") 187 } 188 if err = h.props.verify(); err != nil { 189 return nil, err 190 } 191 192 data = make([]byte, headerLen(h.ctype)) 193 194 switch h.ctype { 195 case cEOS: 196 return data, nil 197 case cUD: 198 data[0] = hUD 199 case cU: 200 data[0] = hU 201 case cL: 202 data[0] = hL 203 case cLR: 204 data[0] = hLR 205 case cLRN: 206 data[0] = hLRN 207 case cLRND: 208 data[0] = hLRND 209 } 210 211 putUint16BE(data[1:3], uint16(h.uncompressed)) 212 if h.ctype <= cU { 213 return data, nil 214 } 215 data[0] |= byte(h.uncompressed>>16) &^ hLRND 216 217 putUint16BE(data[3:5], h.compressed) 218 if h.ctype <= cLR { 219 return data, nil 220 } 221 222 data[5] = h.props.Code() 223 return data, nil 224} 225 226// readChunkHeader reads the chunk header from the IO reader. 227func readChunkHeader(r io.Reader) (h *chunkHeader, err error) { 228 p := make([]byte, 1, 6) 229 if _, err = io.ReadFull(r, p); err != nil { 230 return 231 } 232 c, err := headerChunkType(p[0]) 233 if err != nil { 234 return 235 } 236 p = p[:headerLen(c)] 237 if _, err = io.ReadFull(r, p[1:]); err != nil { 238 return 239 } 240 h = new(chunkHeader) 241 if err = h.UnmarshalBinary(p); err != nil { 242 return nil, err 243 } 244 return h, nil 245} 246 247// uint16BE converts a big-endian uint16 representation to an uint16 248// value. 249func uint16BE(p []byte) uint16 { 250 return uint16(p[0])<<8 | uint16(p[1]) 251} 252 253// putUint16BE puts the big-endian uint16 presentation into the given 254// slice. 255func putUint16BE(p []byte, x uint16) { 256 p[0] = byte(x >> 8) 257 p[1] = byte(x) 258} 259 260// chunkState is used to manage the state of the chunks 261type chunkState byte 262 263// start and stop define the initial and terminating state of the chunk 264// state 265const ( 266 start chunkState = 'S' 267 stop = 'T' 268) 269 270// errors for the chunk state handling 271var ( 272 errChunkType = errors.New("lzma: unexpected chunk type") 273 errState = errors.New("lzma: wrong chunk state") 274) 275 276// next transitions state based on chunk type input 277func (c *chunkState) next(ctype chunkType) error { 278 switch *c { 279 // start state 280 case 'S': 281 switch ctype { 282 case cEOS: 283 *c = 'T' 284 case cUD: 285 *c = 'R' 286 case cLRND: 287 *c = 'L' 288 default: 289 return errChunkType 290 } 291 // normal LZMA mode 292 case 'L': 293 switch ctype { 294 case cEOS: 295 *c = 'T' 296 case cUD: 297 *c = 'R' 298 case cU: 299 *c = 'U' 300 case cL, cLR, cLRN, cLRND: 301 break 302 default: 303 return errChunkType 304 } 305 // reset required 306 case 'R': 307 switch ctype { 308 case cEOS: 309 *c = 'T' 310 case cUD, cU: 311 break 312 case cLRN, cLRND: 313 *c = 'L' 314 default: 315 return errChunkType 316 } 317 // uncompressed 318 case 'U': 319 switch ctype { 320 case cEOS: 321 *c = 'T' 322 case cUD: 323 *c = 'R' 324 case cU: 325 break 326 case cL, cLR, cLRN, cLRND: 327 *c = 'L' 328 default: 329 return errChunkType 330 } 331 // terminal state 332 case 'T': 333 return errChunkType 334 default: 335 return errState 336 } 337 return nil 338} 339 340// defaultChunkType returns the default chunk type for each chunk state. 341func (c chunkState) defaultChunkType() chunkType { 342 switch c { 343 case 'S': 344 return cLRND 345 case 'L', 'U': 346 return cL 347 case 'R': 348 return cLRN 349 default: 350 // no error 351 return cEOS 352 } 353} 354 355// maxDictCap defines the maximum dictionary capacity supported by the 356// LZMA2 dictionary capacity encoding. 357const maxDictCap = 1<<32 - 1 358 359// maxDictCapCode defines the maximum dictionary capacity code. 360const maxDictCapCode = 40 361 362// The function decodes the dictionary capacity byte, but doesn't change 363// for the correct range of the given byte. 364func decodeDictCap(c byte) int64 { 365 return (2 | int64(c)&1) << (11 + (c>>1)&0x1f) 366} 367 368// DecodeDictCap decodes the encoded dictionary capacity. The function 369// returns an error if the code is out of range. 370func DecodeDictCap(c byte) (n int64, err error) { 371 if c >= maxDictCapCode { 372 if c == maxDictCapCode { 373 return maxDictCap, nil 374 } 375 return 0, errors.New("lzma: invalid dictionary size code") 376 } 377 return decodeDictCap(c), nil 378} 379 380// EncodeDictCap encodes a dictionary capacity. The function returns the 381// code for the capacity that is greater or equal n. If n exceeds the 382// maximum support dictionary capacity, the maximum value is returned. 383func EncodeDictCap(n int64) byte { 384 a, b := byte(0), byte(40) 385 for a < b { 386 c := a + (b-a)>>1 387 m := decodeDictCap(c) 388 if n <= m { 389 if n == m { 390 return c 391 } 392 b = c 393 } else { 394 a = c + 1 395 } 396 } 397 return a 398} 399