1package brotli 2 3import "encoding/binary" 4 5/* Copyright 2016 Google Inc. All Rights Reserved. 6 7 Distributed under MIT license. 8 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT 9*/ 10 11func (*hashForgetfulChain) HashTypeLength() uint { 12 return 4 13} 14 15func (*hashForgetfulChain) StoreLookahead() uint { 16 return 4 17} 18 19/* HashBytes is the function that chooses the bucket to place the address in.*/ 20func (h *hashForgetfulChain) HashBytes(data []byte) uint { 21 var hash uint32 = binary.LittleEndian.Uint32(data) * kHashMul32 22 23 /* The higher bits contain more mixture from the multiplication, 24 so we take our results from there. */ 25 return uint(hash >> (32 - h.bucketBits)) 26} 27 28type slot struct { 29 delta uint16 30 next uint16 31} 32 33/* A (forgetful) hash table to the data seen by the compressor, to 34 help create backward references to previous data. 35 36 Hashes are stored in chains which are bucketed to groups. Group of chains 37 share a storage "bank". When more than "bank size" chain nodes are added, 38 oldest nodes are replaced; this way several chains may share a tail. */ 39type hashForgetfulChain struct { 40 hasherCommon 41 42 bucketBits uint 43 numBanks uint 44 bankBits uint 45 numLastDistancesToCheck int 46 47 addr []uint32 48 head []uint16 49 tiny_hash [65536]byte 50 banks [][]slot 51 free_slot_idx []uint16 52 max_hops uint 53} 54 55func (h *hashForgetfulChain) Initialize(params *encoderParams) { 56 var q uint 57 if params.quality > 6 { 58 q = 7 59 } else { 60 q = 8 61 } 62 h.max_hops = q << uint(params.quality-4) 63 64 bankSize := 1 << h.bankBits 65 bucketSize := 1 << h.bucketBits 66 67 h.addr = make([]uint32, bucketSize) 68 h.head = make([]uint16, bucketSize) 69 h.banks = make([][]slot, h.numBanks) 70 for i := range h.banks { 71 h.banks[i] = make([]slot, bankSize) 72 } 73 h.free_slot_idx = make([]uint16, h.numBanks) 74} 75 76func (h *hashForgetfulChain) Prepare(one_shot bool, input_size uint, data []byte) { 77 var partial_prepare_threshold uint = (1 << h.bucketBits) >> 6 78 /* Partial preparation is 100 times slower (per socket). */ 79 if one_shot && input_size <= partial_prepare_threshold { 80 var i uint 81 for i = 0; i < input_size; i++ { 82 var bucket uint = h.HashBytes(data[i:]) 83 84 /* See InitEmpty comment. */ 85 h.addr[bucket] = 0xCCCCCCCC 86 87 h.head[bucket] = 0xCCCC 88 } 89 } else { 90 /* Fill |addr| array with 0xCCCCCCCC value. Because of wrapping, position 91 processed by hasher never reaches 3GB + 64M; this makes all new chains 92 to be terminated after the first node. */ 93 for i := range h.addr { 94 h.addr[i] = 0xCCCCCCCC 95 } 96 97 for i := range h.head { 98 h.head[i] = 0 99 } 100 } 101 102 h.tiny_hash = [65536]byte{} 103 for i := range h.free_slot_idx { 104 h.free_slot_idx[i] = 0 105 } 106} 107 108/* Look at 4 bytes at &data[ix & mask]. Compute a hash from these, and prepend 109 node to corresponding chain; also update tiny_hash for current position. */ 110func (h *hashForgetfulChain) Store(data []byte, mask uint, ix uint) { 111 var key uint = h.HashBytes(data[ix&mask:]) 112 var bank uint = key & (h.numBanks - 1) 113 var idx uint 114 idx = uint(h.free_slot_idx[bank]) & ((1 << h.bankBits) - 1) 115 h.free_slot_idx[bank]++ 116 var delta uint = ix - uint(h.addr[key]) 117 h.tiny_hash[uint16(ix)] = byte(key) 118 if delta > 0xFFFF { 119 delta = 0xFFFF 120 } 121 h.banks[bank][idx].delta = uint16(delta) 122 h.banks[bank][idx].next = h.head[key] 123 h.addr[key] = uint32(ix) 124 h.head[key] = uint16(idx) 125} 126 127func (h *hashForgetfulChain) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) { 128 var i uint 129 for i = ix_start; i < ix_end; i++ { 130 h.Store(data, mask, i) 131 } 132} 133 134func (h *hashForgetfulChain) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ring_buffer_mask uint) { 135 if num_bytes >= h.HashTypeLength()-1 && position >= 3 { 136 /* Prepare the hashes for three last bytes of the last write. 137 These could not be calculated before, since they require knowledge 138 of both the previous and the current block. */ 139 h.Store(ringbuffer, ring_buffer_mask, position-3) 140 h.Store(ringbuffer, ring_buffer_mask, position-2) 141 h.Store(ringbuffer, ring_buffer_mask, position-1) 142 } 143} 144 145func (h *hashForgetfulChain) PrepareDistanceCache(distance_cache []int) { 146 prepareDistanceCache(distance_cache, h.numLastDistancesToCheck) 147} 148 149/* Find a longest backward match of &data[cur_ix] up to the length of 150 max_length and stores the position cur_ix in the hash table. 151 152 REQUIRES: PrepareDistanceCachehashForgetfulChain must be invoked for current distance cache 153 values; if this method is invoked repeatedly with the same distance 154 cache values, it is enough to invoke PrepareDistanceCachehashForgetfulChain once. 155 156 Does not look for matches longer than max_length. 157 Does not look for matches further away than max_backward. 158 Writes the best match into |out|. 159 |out|->score is updated only if a better match is found. */ 160func (h *hashForgetfulChain) FindLongestMatch(dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *hasherSearchResult) { 161 var cur_ix_masked uint = cur_ix & ring_buffer_mask 162 var min_score uint = out.score 163 var best_score uint = out.score 164 var best_len uint = out.len 165 var key uint = h.HashBytes(data[cur_ix_masked:]) 166 var tiny_hash byte = byte(key) 167 /* Don't accept a short copy from far away. */ 168 out.len = 0 169 170 out.len_code_delta = 0 171 172 /* Try last distance first. */ 173 for i := 0; i < h.numLastDistancesToCheck; i++ { 174 var backward uint = uint(distance_cache[i]) 175 var prev_ix uint = (cur_ix - backward) 176 177 /* For distance code 0 we want to consider 2-byte matches. */ 178 if i > 0 && h.tiny_hash[uint16(prev_ix)] != tiny_hash { 179 continue 180 } 181 if prev_ix >= cur_ix || backward > max_backward { 182 continue 183 } 184 185 prev_ix &= ring_buffer_mask 186 { 187 var len uint = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length) 188 if len >= 2 { 189 var score uint = backwardReferenceScoreUsingLastDistance(uint(len)) 190 if best_score < score { 191 if i != 0 { 192 score -= backwardReferencePenaltyUsingLastDistance(uint(i)) 193 } 194 if best_score < score { 195 best_score = score 196 best_len = uint(len) 197 out.len = best_len 198 out.distance = backward 199 out.score = best_score 200 } 201 } 202 } 203 } 204 } 205 { 206 var bank uint = key & (h.numBanks - 1) 207 var backward uint = 0 208 var hops uint = h.max_hops 209 var delta uint = cur_ix - uint(h.addr[key]) 210 var slot uint = uint(h.head[key]) 211 for { 212 tmp6 := hops 213 hops-- 214 if tmp6 == 0 { 215 break 216 } 217 var prev_ix uint 218 var last uint = slot 219 backward += delta 220 if backward > max_backward { 221 break 222 } 223 prev_ix = (cur_ix - backward) & ring_buffer_mask 224 slot = uint(h.banks[bank][last].next) 225 delta = uint(h.banks[bank][last].delta) 226 if cur_ix_masked+best_len > ring_buffer_mask || prev_ix+best_len > ring_buffer_mask || data[cur_ix_masked+best_len] != data[prev_ix+best_len] { 227 continue 228 } 229 { 230 var len uint = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length) 231 if len >= 4 { 232 /* Comparing for >= 3 does not change the semantics, but just saves 233 for a few unnecessary binary logarithms in backward reference 234 score, since we are not interested in such short matches. */ 235 var score uint = backwardReferenceScore(uint(len), backward) 236 if best_score < score { 237 best_score = score 238 best_len = uint(len) 239 out.len = best_len 240 out.distance = backward 241 out.score = best_score 242 } 243 } 244 } 245 } 246 247 h.Store(data, ring_buffer_mask, cur_ix) 248 } 249 250 if out.score == min_score { 251 searchInStaticDictionary(dictionary, h, data[cur_ix_masked:], max_length, max_backward+gap, max_distance, out, false) 252 } 253} 254