1// Copyright 2011 The Snappy-Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package snappy 6 7import ( 8 "encoding/binary" 9) 10 11// We limit how far copy back-references can go, the same as the C++ code. 12const maxOffset = 1 << 15 13 14// emitLiteral writes a literal chunk and returns the number of bytes written. 15func emitLiteral(dst, lit []byte) int { 16 i, n := 0, uint(len(lit)-1) 17 switch { 18 case n < 60: 19 dst[0] = uint8(n)<<2 | tagLiteral 20 i = 1 21 case n < 1<<8: 22 dst[0] = 60<<2 | tagLiteral 23 dst[1] = uint8(n) 24 i = 2 25 case n < 1<<16: 26 dst[0] = 61<<2 | tagLiteral 27 dst[1] = uint8(n) 28 dst[2] = uint8(n >> 8) 29 i = 3 30 case n < 1<<24: 31 dst[0] = 62<<2 | tagLiteral 32 dst[1] = uint8(n) 33 dst[2] = uint8(n >> 8) 34 dst[3] = uint8(n >> 16) 35 i = 4 36 case int64(n) < 1<<32: 37 dst[0] = 63<<2 | tagLiteral 38 dst[1] = uint8(n) 39 dst[2] = uint8(n >> 8) 40 dst[3] = uint8(n >> 16) 41 dst[4] = uint8(n >> 24) 42 i = 5 43 default: 44 panic("snappy: source buffer is too long") 45 } 46 if copy(dst[i:], lit) != len(lit) { 47 panic("snappy: destination buffer is too short") 48 } 49 return i + len(lit) 50} 51 52// emitCopy writes a copy chunk and returns the number of bytes written. 53func emitCopy(dst []byte, offset, length int) int { 54 i := 0 55 for length > 0 { 56 x := length - 4 57 if 0 <= x && x < 1<<3 && offset < 1<<11 { 58 dst[i+0] = uint8(offset>>8)&0x07<<5 | uint8(x)<<2 | tagCopy1 59 dst[i+1] = uint8(offset) 60 i += 2 61 break 62 } 63 64 x = length 65 if x > 1<<6 { 66 x = 1 << 6 67 } 68 dst[i+0] = uint8(x-1)<<2 | tagCopy2 69 dst[i+1] = uint8(offset) 70 dst[i+2] = uint8(offset >> 8) 71 i += 3 72 length -= x 73 } 74 return i 75} 76 77// Encode returns the encoded form of src. The returned slice may be a sub- 78// slice of dst if dst was large enough to hold the entire encoded block. 79// Otherwise, a newly allocated slice will be returned. 80// It is valid to pass a nil dst. 81func Encode(dst, src []byte) ([]byte, error) { 82 if n := MaxEncodedLen(len(src)); len(dst) < n { 83 dst = make([]byte, n) 84 } 85 86 // The block starts with the varint-encoded length of the decompressed bytes. 87 d := binary.PutUvarint(dst, uint64(len(src))) 88 89 // Return early if src is short. 90 if len(src) <= 4 { 91 if len(src) != 0 { 92 d += emitLiteral(dst[d:], src) 93 } 94 return dst[:d], nil 95 } 96 97 // Initialize the hash table. Its size ranges from 1<<8 to 1<<14 inclusive. 98 const maxTableSize = 1 << 14 99 shift, tableSize := uint(32-8), 1<<8 100 for tableSize < maxTableSize && tableSize < len(src) { 101 shift-- 102 tableSize *= 2 103 } 104 var table [maxTableSize]int 105 106 // Iterate over the source bytes. 107 var ( 108 s int // The iterator position. 109 t int // The last position with the same hash as s. 110 lit int // The start position of any pending literal bytes. 111 ) 112 for s+3 < len(src) { 113 // Update the hash table. 114 b0, b1, b2, b3 := src[s], src[s+1], src[s+2], src[s+3] 115 h := uint32(b0) | uint32(b1)<<8 | uint32(b2)<<16 | uint32(b3)<<24 116 p := &table[(h*0x1e35a7bd)>>shift] 117 // We need to to store values in [-1, inf) in table. To save 118 // some initialization time, (re)use the table's zero value 119 // and shift the values against this zero: add 1 on writes, 120 // subtract 1 on reads. 121 t, *p = *p-1, s+1 122 // If t is invalid or src[s:s+4] differs from src[t:t+4], accumulate a literal byte. 123 if t < 0 || s-t >= maxOffset || b0 != src[t] || b1 != src[t+1] || b2 != src[t+2] || b3 != src[t+3] { 124 s++ 125 continue 126 } 127 // Otherwise, we have a match. First, emit any pending literal bytes. 128 if lit != s { 129 d += emitLiteral(dst[d:], src[lit:s]) 130 } 131 // Extend the match to be as long as possible. 132 s0 := s 133 s, t = s+4, t+4 134 for s < len(src) && src[s] == src[t] { 135 s++ 136 t++ 137 } 138 // Emit the copied bytes. 139 d += emitCopy(dst[d:], s-t, s-s0) 140 lit = s 141 } 142 143 // Emit any final pending literal bytes and return. 144 if lit != len(src) { 145 d += emitLiteral(dst[d:], src[lit:]) 146 } 147 return dst[:d], nil 148} 149 150// MaxEncodedLen returns the maximum length of a snappy block, given its 151// uncompressed length. 152func MaxEncodedLen(srcLen int) int { 153 // Compressed data can be defined as: 154 // compressed := item* literal* 155 // item := literal* copy 156 // 157 // The trailing literal sequence has a space blowup of at most 62/60 158 // since a literal of length 60 needs one tag byte + one extra byte 159 // for length information. 160 // 161 // Item blowup is trickier to measure. Suppose the "copy" op copies 162 // 4 bytes of data. Because of a special check in the encoding code, 163 // we produce a 4-byte copy only if the offset is < 65536. Therefore 164 // the copy op takes 3 bytes to encode, and this type of item leads 165 // to at most the 62/60 blowup for representing literals. 166 // 167 // Suppose the "copy" op copies 5 bytes of data. If the offset is big 168 // enough, it will take 5 bytes to encode the copy op. Therefore the 169 // worst case here is a one-byte literal followed by a five-byte copy. 170 // That is, 6 bytes of input turn into 7 bytes of "compressed" data. 171 // 172 // This last factor dominates the blowup, so the final estimate is: 173 return 32 + srcLen + srcLen/6 174} 175