1// Copyright 2014-2017 Ulrich Kunitz. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package lzma 6 7import ( 8 "fmt" 9 "io" 10) 11 12// opLenMargin provides the upper limit of the number of bytes required 13// to encode a single operation. 14const opLenMargin = 10 15 16// compressFlags control the compression process. 17type compressFlags uint32 18 19// Values for compressFlags. 20const ( 21 // all data should be compressed, even if compression is not 22 // optimal. 23 all compressFlags = 1 << iota 24) 25 26// encoderFlags provide the flags for an encoder. 27type encoderFlags uint32 28 29// Flags for the encoder. 30const ( 31 // eosMarker requests an EOS marker to be written. 32 eosMarker encoderFlags = 1 << iota 33) 34 35// Encoder compresses data buffered in the encoder dictionary and writes 36// it into a byte writer. 37type encoder struct { 38 dict *encoderDict 39 state *state 40 re *rangeEncoder 41 start int64 42 // generate eos marker 43 marker bool 44 limit bool 45 margin int 46} 47 48// newEncoder creates a new encoder. If the byte writer must be 49// limited use LimitedByteWriter provided by this package. The flags 50// argument supports the eosMarker flag, controlling whether a 51// terminating end-of-stream marker must be written. 52func newEncoder(bw io.ByteWriter, state *state, dict *encoderDict, 53 flags encoderFlags) (e *encoder, err error) { 54 55 re, err := newRangeEncoder(bw) 56 if err != nil { 57 return nil, err 58 } 59 e = &encoder{ 60 dict: dict, 61 state: state, 62 re: re, 63 marker: flags&eosMarker != 0, 64 start: dict.Pos(), 65 margin: opLenMargin, 66 } 67 if e.marker { 68 e.margin += 5 69 } 70 return e, nil 71} 72 73// Write writes the bytes from p into the dictionary. If not enough 74// space is available the data in the dictionary buffer will be 75// compressed to make additional space available. If the limit of the 76// underlying writer has been reached ErrLimit will be returned. 77func (e *encoder) Write(p []byte) (n int, err error) { 78 for { 79 k, err := e.dict.Write(p[n:]) 80 n += k 81 if err == ErrNoSpace { 82 if err = e.compress(0); err != nil { 83 return n, err 84 } 85 continue 86 } 87 return n, err 88 } 89} 90 91// Reopen reopens the encoder with a new byte writer. 92func (e *encoder) Reopen(bw io.ByteWriter) error { 93 var err error 94 if e.re, err = newRangeEncoder(bw); err != nil { 95 return err 96 } 97 e.start = e.dict.Pos() 98 e.limit = false 99 return nil 100} 101 102// writeLiteral writes a literal into the LZMA stream 103func (e *encoder) writeLiteral(l lit) error { 104 var err error 105 state, state2, _ := e.state.states(e.dict.Pos()) 106 if err = e.state.isMatch[state2].Encode(e.re, 0); err != nil { 107 return err 108 } 109 litState := e.state.litState(e.dict.ByteAt(1), e.dict.Pos()) 110 match := e.dict.ByteAt(int(e.state.rep[0]) + 1) 111 err = e.state.litCodec.Encode(e.re, l.b, state, match, litState) 112 if err != nil { 113 return err 114 } 115 e.state.updateStateLiteral() 116 return nil 117} 118 119// iverson implements the Iverson operator as proposed by Donald Knuth in his 120// book Concrete Mathematics. 121func iverson(ok bool) uint32 { 122 if ok { 123 return 1 124 } 125 return 0 126} 127 128// writeMatch writes a repetition operation into the operation stream 129func (e *encoder) writeMatch(m match) error { 130 var err error 131 if !(minDistance <= m.distance && m.distance <= maxDistance) { 132 panic(fmt.Errorf("match distance %d out of range", m.distance)) 133 } 134 dist := uint32(m.distance - minDistance) 135 if !(minMatchLen <= m.n && m.n <= maxMatchLen) && 136 !(dist == e.state.rep[0] && m.n == 1) { 137 panic(fmt.Errorf( 138 "match length %d out of range; dist %d rep[0] %d", 139 m.n, dist, e.state.rep[0])) 140 } 141 state, state2, posState := e.state.states(e.dict.Pos()) 142 if err = e.state.isMatch[state2].Encode(e.re, 1); err != nil { 143 return err 144 } 145 g := 0 146 for ; g < 4; g++ { 147 if e.state.rep[g] == dist { 148 break 149 } 150 } 151 b := iverson(g < 4) 152 if err = e.state.isRep[state].Encode(e.re, b); err != nil { 153 return err 154 } 155 n := uint32(m.n - minMatchLen) 156 if b == 0 { 157 // simple match 158 e.state.rep[3], e.state.rep[2], e.state.rep[1], e.state.rep[0] = 159 e.state.rep[2], e.state.rep[1], e.state.rep[0], dist 160 e.state.updateStateMatch() 161 if err = e.state.lenCodec.Encode(e.re, n, posState); err != nil { 162 return err 163 } 164 return e.state.distCodec.Encode(e.re, dist, n) 165 } 166 b = iverson(g != 0) 167 if err = e.state.isRepG0[state].Encode(e.re, b); err != nil { 168 return err 169 } 170 if b == 0 { 171 // g == 0 172 b = iverson(m.n != 1) 173 if err = e.state.isRepG0Long[state2].Encode(e.re, b); err != nil { 174 return err 175 } 176 if b == 0 { 177 e.state.updateStateShortRep() 178 return nil 179 } 180 } else { 181 // g in {1,2,3} 182 b = iverson(g != 1) 183 if err = e.state.isRepG1[state].Encode(e.re, b); err != nil { 184 return err 185 } 186 if b == 1 { 187 // g in {2,3} 188 b = iverson(g != 2) 189 err = e.state.isRepG2[state].Encode(e.re, b) 190 if err != nil { 191 return err 192 } 193 if b == 1 { 194 e.state.rep[3] = e.state.rep[2] 195 } 196 e.state.rep[2] = e.state.rep[1] 197 } 198 e.state.rep[1] = e.state.rep[0] 199 e.state.rep[0] = dist 200 } 201 e.state.updateStateRep() 202 return e.state.repLenCodec.Encode(e.re, n, posState) 203} 204 205// writeOp writes a single operation to the range encoder. The function 206// checks whether there is enough space available to close the LZMA 207// stream. 208func (e *encoder) writeOp(op operation) error { 209 if e.re.Available() < int64(e.margin) { 210 return ErrLimit 211 } 212 switch x := op.(type) { 213 case lit: 214 return e.writeLiteral(x) 215 case match: 216 return e.writeMatch(x) 217 default: 218 panic("unexpected operation") 219 } 220} 221 222// compress compressed data from the dictionary buffer. If the flag all 223// is set, all data in the dictionary buffer will be compressed. The 224// function returns ErrLimit if the underlying writer has reached its 225// limit. 226func (e *encoder) compress(flags compressFlags) error { 227 n := 0 228 if flags&all == 0 { 229 n = maxMatchLen - 1 230 } 231 d := e.dict 232 m := d.m 233 for d.Buffered() > n { 234 op := m.NextOp(e.state.rep) 235 if err := e.writeOp(op); err != nil { 236 return err 237 } 238 d.Discard(op.Len()) 239 } 240 return nil 241} 242 243// eosMatch is a pseudo operation that indicates the end of the stream. 244var eosMatch = match{distance: maxDistance, n: minMatchLen} 245 246// Close terminates the LZMA stream. If requested the end-of-stream 247// marker will be written. If the byte writer limit has been or will be 248// reached during compression of the remaining data in the buffer the 249// LZMA stream will be closed and data will remain in the buffer. 250func (e *encoder) Close() error { 251 err := e.compress(all) 252 if err != nil && err != ErrLimit { 253 return err 254 } 255 if e.marker { 256 if err := e.writeMatch(eosMatch); err != nil { 257 return err 258 } 259 } 260 err = e.re.Close() 261 return err 262} 263 264// Compressed returns the number bytes of the input data that been 265// compressed. 266func (e *encoder) Compressed() int64 { 267 return e.dict.Pos() - e.start 268} 269