1// Copyright 2011 The Snappy-Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package snappy 6 7import ( 8 "encoding/binary" 9 "errors" 10 "io" 11) 12 13// Encode returns the encoded form of src. The returned slice may be a sub- 14// slice of dst if dst was large enough to hold the entire encoded block. 15// Otherwise, a newly allocated slice will be returned. 16// 17// The dst and src must not overlap. It is valid to pass a nil dst. 18// 19// Encode handles the Snappy block format, not the Snappy stream format. 20func Encode(dst, src []byte) []byte { 21 if n := MaxEncodedLen(len(src)); n < 0 { 22 panic(ErrTooLarge) 23 } else if len(dst) < n { 24 dst = make([]byte, n) 25 } 26 27 // The block starts with the varint-encoded length of the decompressed bytes. 28 d := binary.PutUvarint(dst, uint64(len(src))) 29 30 for len(src) > 0 { 31 p := src 32 src = nil 33 if len(p) > maxBlockSize { 34 p, src = p[:maxBlockSize], p[maxBlockSize:] 35 } 36 if len(p) < minNonLiteralBlockSize { 37 d += emitLiteral(dst[d:], p) 38 } else { 39 d += encodeBlock(dst[d:], p) 40 } 41 } 42 return dst[:d] 43} 44 45// inputMargin is the minimum number of extra input bytes to keep, inside 46// encodeBlock's inner loop. On some architectures, this margin lets us 47// implement a fast path for emitLiteral, where the copy of short (<= 16 byte) 48// literals can be implemented as a single load to and store from a 16-byte 49// register. That literal's actual length can be as short as 1 byte, so this 50// can copy up to 15 bytes too much, but that's OK as subsequent iterations of 51// the encoding loop will fix up the copy overrun, and this inputMargin ensures 52// that we don't overrun the dst and src buffers. 53const inputMargin = 16 - 1 54 55// minNonLiteralBlockSize is the minimum size of the input to encodeBlock that 56// could be encoded with a copy tag. This is the minimum with respect to the 57// algorithm used by encodeBlock, not a minimum enforced by the file format. 58// 59// The encoded output must start with at least a 1 byte literal, as there are 60// no previous bytes to copy. A minimal (1 byte) copy after that, generated 61// from an emitCopy call in encodeBlock's main loop, would require at least 62// another inputMargin bytes, for the reason above: we want any emitLiteral 63// calls inside encodeBlock's main loop to use the fast path if possible, which 64// requires being able to overrun by inputMargin bytes. Thus, 65// minNonLiteralBlockSize equals 1 + 1 + inputMargin. 66// 67// The C++ code doesn't use this exact threshold, but it could, as discussed at 68// https://groups.google.com/d/topic/snappy-compression/oGbhsdIJSJ8/discussion 69// The difference between Go (2+inputMargin) and C++ (inputMargin) is purely an 70// optimization. It should not affect the encoded form. This is tested by 71// TestSameEncodingAsCppShortCopies. 72const minNonLiteralBlockSize = 1 + 1 + inputMargin 73 74// MaxEncodedLen returns the maximum length of a snappy block, given its 75// uncompressed length. 76// 77// It will return a negative value if srcLen is too large to encode. 78func MaxEncodedLen(srcLen int) int { 79 n := uint64(srcLen) 80 if n > 0xffffffff { 81 return -1 82 } 83 // Compressed data can be defined as: 84 // compressed := item* literal* 85 // item := literal* copy 86 // 87 // The trailing literal sequence has a space blowup of at most 62/60 88 // since a literal of length 60 needs one tag byte + one extra byte 89 // for length information. 90 // 91 // Item blowup is trickier to measure. Suppose the "copy" op copies 92 // 4 bytes of data. Because of a special check in the encoding code, 93 // we produce a 4-byte copy only if the offset is < 65536. Therefore 94 // the copy op takes 3 bytes to encode, and this type of item leads 95 // to at most the 62/60 blowup for representing literals. 96 // 97 // Suppose the "copy" op copies 5 bytes of data. If the offset is big 98 // enough, it will take 5 bytes to encode the copy op. Therefore the 99 // worst case here is a one-byte literal followed by a five-byte copy. 100 // That is, 6 bytes of input turn into 7 bytes of "compressed" data. 101 // 102 // This last factor dominates the blowup, so the final estimate is: 103 n = 32 + n + n/6 104 if n > 0xffffffff { 105 return -1 106 } 107 return int(n) 108} 109 110var errClosed = errors.New("snappy: Writer is closed") 111 112// NewWriter returns a new Writer that compresses to w. 113// 114// The Writer returned does not buffer writes. There is no need to Flush or 115// Close such a Writer. 116// 117// Deprecated: the Writer returned is not suitable for many small writes, only 118// for few large writes. Use NewBufferedWriter instead, which is efficient 119// regardless of the frequency and shape of the writes, and remember to Close 120// that Writer when done. 121func NewWriter(w io.Writer) *Writer { 122 return &Writer{ 123 w: w, 124 obuf: make([]byte, obufLen), 125 } 126} 127 128// NewBufferedWriter returns a new Writer that compresses to w, using the 129// framing format described at 130// https://github.com/google/snappy/blob/master/framing_format.txt 131// 132// The Writer returned buffers writes. Users must call Close to guarantee all 133// data has been forwarded to the underlying io.Writer. They may also call 134// Flush zero or more times before calling Close. 135func NewBufferedWriter(w io.Writer) *Writer { 136 return &Writer{ 137 w: w, 138 ibuf: make([]byte, 0, maxBlockSize), 139 obuf: make([]byte, obufLen), 140 } 141} 142 143// Writer is an io.Writer that can write Snappy-compressed bytes. 144// 145// Writer handles the Snappy stream format, not the Snappy block format. 146type Writer struct { 147 w io.Writer 148 err error 149 150 // ibuf is a buffer for the incoming (uncompressed) bytes. 151 // 152 // Its use is optional. For backwards compatibility, Writers created by the 153 // NewWriter function have ibuf == nil, do not buffer incoming bytes, and 154 // therefore do not need to be Flush'ed or Close'd. 155 ibuf []byte 156 157 // obuf is a buffer for the outgoing (compressed) bytes. 158 obuf []byte 159 160 // wroteStreamHeader is whether we have written the stream header. 161 wroteStreamHeader bool 162} 163 164// Reset discards the writer's state and switches the Snappy writer to write to 165// w. This permits reusing a Writer rather than allocating a new one. 166func (w *Writer) Reset(writer io.Writer) { 167 w.w = writer 168 w.err = nil 169 if w.ibuf != nil { 170 w.ibuf = w.ibuf[:0] 171 } 172 w.wroteStreamHeader = false 173} 174 175// Write satisfies the io.Writer interface. 176func (w *Writer) Write(p []byte) (nRet int, errRet error) { 177 if w.ibuf == nil { 178 // Do not buffer incoming bytes. This does not perform or compress well 179 // if the caller of Writer.Write writes many small slices. This 180 // behavior is therefore deprecated, but still supported for backwards 181 // compatibility with code that doesn't explicitly Flush or Close. 182 return w.write(p) 183 } 184 185 // The remainder of this method is based on bufio.Writer.Write from the 186 // standard library. 187 188 for len(p) > (cap(w.ibuf)-len(w.ibuf)) && w.err == nil { 189 var n int 190 if len(w.ibuf) == 0 { 191 // Large write, empty buffer. 192 // Write directly from p to avoid copy. 193 n, _ = w.write(p) 194 } else { 195 n = copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p) 196 w.ibuf = w.ibuf[:len(w.ibuf)+n] 197 w.Flush() 198 } 199 nRet += n 200 p = p[n:] 201 } 202 if w.err != nil { 203 return nRet, w.err 204 } 205 n := copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p) 206 w.ibuf = w.ibuf[:len(w.ibuf)+n] 207 nRet += n 208 return nRet, nil 209} 210 211func (w *Writer) write(p []byte) (nRet int, errRet error) { 212 if w.err != nil { 213 return 0, w.err 214 } 215 for len(p) > 0 { 216 obufStart := len(magicChunk) 217 if !w.wroteStreamHeader { 218 w.wroteStreamHeader = true 219 copy(w.obuf, magicChunk) 220 obufStart = 0 221 } 222 223 var uncompressed []byte 224 if len(p) > maxBlockSize { 225 uncompressed, p = p[:maxBlockSize], p[maxBlockSize:] 226 } else { 227 uncompressed, p = p, nil 228 } 229 checksum := crc(uncompressed) 230 231 // Compress the buffer, discarding the result if the improvement 232 // isn't at least 12.5%. 233 compressed := Encode(w.obuf[obufHeaderLen:], uncompressed) 234 chunkType := uint8(chunkTypeCompressedData) 235 chunkLen := 4 + len(compressed) 236 obufEnd := obufHeaderLen + len(compressed) 237 if len(compressed) >= len(uncompressed)-len(uncompressed)/8 { 238 chunkType = chunkTypeUncompressedData 239 chunkLen = 4 + len(uncompressed) 240 obufEnd = obufHeaderLen 241 } 242 243 // Fill in the per-chunk header that comes before the body. 244 w.obuf[len(magicChunk)+0] = chunkType 245 w.obuf[len(magicChunk)+1] = uint8(chunkLen >> 0) 246 w.obuf[len(magicChunk)+2] = uint8(chunkLen >> 8) 247 w.obuf[len(magicChunk)+3] = uint8(chunkLen >> 16) 248 w.obuf[len(magicChunk)+4] = uint8(checksum >> 0) 249 w.obuf[len(magicChunk)+5] = uint8(checksum >> 8) 250 w.obuf[len(magicChunk)+6] = uint8(checksum >> 16) 251 w.obuf[len(magicChunk)+7] = uint8(checksum >> 24) 252 253 if _, err := w.w.Write(w.obuf[obufStart:obufEnd]); err != nil { 254 w.err = err 255 return nRet, err 256 } 257 if chunkType == chunkTypeUncompressedData { 258 if _, err := w.w.Write(uncompressed); err != nil { 259 w.err = err 260 return nRet, err 261 } 262 } 263 nRet += len(uncompressed) 264 } 265 return nRet, nil 266} 267 268// Flush flushes the Writer to its underlying io.Writer. 269func (w *Writer) Flush() error { 270 if w.err != nil { 271 return w.err 272 } 273 if len(w.ibuf) == 0 { 274 return nil 275 } 276 w.write(w.ibuf) 277 w.ibuf = w.ibuf[:0] 278 return w.err 279} 280 281// Close calls Flush and then closes the Writer. 282func (w *Writer) Close() error { 283 w.Flush() 284 ret := w.err 285 if w.err == nil { 286 w.err = errClosed 287 } 288 return ret 289} 290