1// Copyright 2016 The Snappy-Go Authors. All rights reserved. 2// Copyright (c) 2019 Klaus Post. All rights reserved. 3// Use of this source code is governed by a BSD-style 4// license that can be found in the LICENSE file. 5 6package s2 7 8import ( 9 "math/bits" 10) 11 12// hash4 returns the hash of the lowest 4 bytes of u to fit in a hash table with h bits. 13// Preferably h should be a constant and should always be <32. 14func hash4(u uint64, h uint8) uint32 { 15 const prime4bytes = 2654435761 16 return (uint32(u) * prime4bytes) >> ((32 - h) & 31) 17} 18 19// hash5 returns the hash of the lowest 5 bytes of u to fit in a hash table with h bits. 20// Preferably h should be a constant and should always be <64. 21func hash5(u uint64, h uint8) uint32 { 22 const prime5bytes = 889523592379 23 return uint32(((u << (64 - 40)) * prime5bytes) >> ((64 - h) & 63)) 24} 25 26// hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits. 27// Preferably h should be a constant and should always be <64. 28func hash7(u uint64, h uint8) uint32 { 29 const prime7bytes = 58295818150454627 30 return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & 63)) 31} 32 33// hash8 returns the hash of u to fit in a hash table with h bits. 34// Preferably h should be a constant and should always be <64. 35func hash8(u uint64, h uint8) uint32 { 36 const prime8bytes = 0xcf1bbcdcb7a56463 37 return uint32((u * prime8bytes) >> ((64 - h) & 63)) 38} 39 40// encodeBlockBetter encodes a non-empty src to a guaranteed-large-enough dst. It 41// assumes that the varint-encoded length of the decompressed bytes has already 42// been written. 43// 44// It also assumes that: 45// len(dst) >= MaxEncodedLen(len(src)) && 46// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize 47func encodeBlockBetterGo(dst, src []byte) (d int) { 48 // Initialize the hash tables. 49 const ( 50 // Long hash matches. 51 lTableBits = 16 52 maxLTableSize = 1 << lTableBits 53 54 // Short hash matches. 55 sTableBits = 14 56 maxSTableSize = 1 << sTableBits 57 ) 58 59 var lTable [maxLTableSize]uint32 60 var sTable [maxSTableSize]uint32 61 62 // sLimit is when to stop looking for offset/length copies. The inputMargin 63 // lets us use a fast path for emitLiteral in the main loop, while we are 64 // looking for copies. 65 sLimit := len(src) - inputMargin 66 if len(src) < minNonLiteralBlockSize { 67 return 0 68 } 69 70 // Bail if we can't compress to at least this. 71 dstLimit := len(src) - len(src)>>5 - 6 72 73 // nextEmit is where in src the next emitLiteral should start from. 74 nextEmit := 0 75 76 // The encoded form must start with a literal, as there are no previous 77 // bytes to copy, so we start looking for hash matches at s == 1. 78 s := 1 79 cv := load64(src, s) 80 81 // We initialize repeat to 0, so we never match on first attempt 82 repeat := 0 83 84 for { 85 candidateL := 0 86 nextS := 0 87 for { 88 // Next src position to check 89 nextS = s + (s-nextEmit)>>7 + 1 90 if nextS > sLimit { 91 goto emitRemainder 92 } 93 hashL := hash7(cv, lTableBits) 94 hashS := hash4(cv, sTableBits) 95 candidateL = int(lTable[hashL]) 96 candidateS := int(sTable[hashS]) 97 lTable[hashL] = uint32(s) 98 sTable[hashS] = uint32(s) 99 100 // Check repeat at offset checkRep. 101 const checkRep = 1 102 if false && uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) { 103 base := s + checkRep 104 // Extend back 105 for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; { 106 i-- 107 base-- 108 } 109 d += emitLiteral(dst[d:], src[nextEmit:base]) 110 111 // Extend forward 112 candidate := s - repeat + 4 + checkRep 113 s += 4 + checkRep 114 for s <= sLimit { 115 if diff := load64(src, s) ^ load64(src, candidate); diff != 0 { 116 s += bits.TrailingZeros64(diff) >> 3 117 break 118 } 119 s += 8 120 candidate += 8 121 } 122 if nextEmit > 0 { 123 // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset. 124 d += emitRepeat(dst[d:], repeat, s-base) 125 } else { 126 // First match, cannot be repeat. 127 d += emitCopy(dst[d:], repeat, s-base) 128 } 129 nextEmit = s 130 if s >= sLimit { 131 goto emitRemainder 132 } 133 134 cv = load64(src, s) 135 continue 136 } 137 138 if uint32(cv) == load32(src, candidateL) { 139 break 140 } 141 142 // Check our short candidate 143 if uint32(cv) == load32(src, candidateS) { 144 // Try a long candidate at s+1 145 hashL = hash7(cv>>8, lTableBits) 146 candidateL = int(lTable[hashL]) 147 lTable[hashL] = uint32(s + 1) 148 if uint32(cv>>8) == load32(src, candidateL) { 149 s++ 150 break 151 } 152 // Use our short candidate. 153 candidateL = candidateS 154 break 155 } 156 157 cv = load64(src, nextS) 158 s = nextS 159 } 160 161 // Extend backwards 162 for candidateL > 0 && s > nextEmit && src[candidateL-1] == src[s-1] { 163 candidateL-- 164 s-- 165 } 166 167 // Bail if we exceed the maximum size. 168 if d+(s-nextEmit) > dstLimit { 169 return 0 170 } 171 172 base := s 173 offset := base - candidateL 174 175 // Extend the 4-byte match as long as possible. 176 s += 4 177 candidateL += 4 178 for s <= len(src)-8 { 179 if diff := load64(src, s) ^ load64(src, candidateL); diff != 0 { 180 s += bits.TrailingZeros64(diff) >> 3 181 break 182 } 183 s += 8 184 candidateL += 8 185 } 186 187 if offset > 65535 && s-base <= 5 && repeat != offset { 188 // Bail if the match is equal or worse to the encoding. 189 s = nextS + 1 190 if s >= sLimit { 191 goto emitRemainder 192 } 193 cv = load64(src, s) 194 continue 195 } 196 197 d += emitLiteral(dst[d:], src[nextEmit:base]) 198 if repeat == offset { 199 d += emitRepeat(dst[d:], offset, s-base) 200 } else { 201 d += emitCopy(dst[d:], offset, s-base) 202 repeat = offset 203 } 204 205 nextEmit = s 206 if s >= sLimit { 207 goto emitRemainder 208 } 209 210 if d > dstLimit { 211 // Do we have space for more, if not bail. 212 return 0 213 } 214 // Index match start+1 (long) and start+2 (short) 215 index0 := base + 1 216 // Index match end-2 (long) and end-1 (short) 217 index1 := s - 2 218 219 cv0 := load64(src, index0) 220 cv1 := load64(src, index1) 221 cv = load64(src, s) 222 lTable[hash7(cv0, lTableBits)] = uint32(index0) 223 lTable[hash7(cv0>>8, lTableBits)] = uint32(index0 + 1) 224 lTable[hash7(cv1, lTableBits)] = uint32(index1) 225 lTable[hash7(cv1>>8, lTableBits)] = uint32(index1 + 1) 226 sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1) 227 sTable[hash4(cv0>>16, sTableBits)] = uint32(index0 + 2) 228 sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1) 229 } 230 231emitRemainder: 232 if nextEmit < len(src) { 233 // Bail if we exceed the maximum size. 234 if d+len(src)-nextEmit > dstLimit { 235 return 0 236 } 237 d += emitLiteral(dst[d:], src[nextEmit:]) 238 } 239 return d 240} 241