1 /* NOLINT(build/header_guard) */
2 /* Copyright 2018 Google Inc. All Rights Reserved.
3 
4    Distributed under MIT license.
5    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
6 */
7 
8 /* template parameters: FN, JUMP, NUMBUCKETS, MASK, CHUNKLEN */
9 /* NUMBUCKETS / (MASK + 1) = probability of storing and using hash code. */
10 /* JUMP = skip bytes for speedup */
11 
12 /* Rolling hash for long distance long string matches. Stores one position
13    per bucket, bucket key is computed over a long region. */
14 
15 #define HashRolling HASHER()
16 
17 static const uint32_t FN(kRollingHashMul32) = 69069;
18 static const uint32_t FN(kInvalidPos) = 0xffffffff;
19 
20 /* This hasher uses a longer forward length, but returning a higher value here
21    will hurt compression by the main hasher when combined with a composite
22    hasher. The hasher tests for forward itself instead. */
FN(HashTypeLength)23 static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
FN(StoreLookahead)24 static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }
25 
26 /* Computes a code from a single byte. A lookup table of 256 values could be
27    used, but simply adding 1 works about as good. */
FN(HashByte)28 static uint32_t FN(HashByte)(uint8_t byte) {
29   return (uint32_t)byte + 1u;
30 }
31 
FN(HashRollingFunctionInitial)32 static uint32_t FN(HashRollingFunctionInitial)(uint32_t state, uint8_t add,
33                                                uint32_t factor) {
34   return (uint32_t)(factor * state + FN(HashByte)(add));
35 }
36 
FN(HashRollingFunction)37 static uint32_t FN(HashRollingFunction)(uint32_t state, uint8_t add,
38                                         uint8_t rem, uint32_t factor,
39                                         uint32_t factor_remove) {
40   return (uint32_t)(factor * state +
41       FN(HashByte)(add) - factor_remove * FN(HashByte)(rem));
42 }
43 
44 typedef struct HashRolling {
45   uint32_t state;
46   uint32_t* table;
47   size_t next_ix;
48 
49   uint32_t chunk_len;
50   uint32_t factor;
51   uint32_t factor_remove;
52 } HashRolling;
53 
FN(Initialize)54 static void FN(Initialize)(
55     HasherCommon* common, HashRolling* BROTLI_RESTRICT self,
56     const BrotliEncoderParams* params) {
57   size_t i;
58   self->state = 0;
59   self->next_ix = 0;
60 
61   self->factor = FN(kRollingHashMul32);
62 
63   /* Compute the factor of the oldest byte to remove: factor**steps modulo
64      0xffffffff (the multiplications rely on 32-bit overflow) */
65   self->factor_remove = 1;
66   for (i = 0; i < CHUNKLEN; i += JUMP) {
67     self->factor_remove *= self->factor;
68   }
69 
70   self->table = (uint32_t*)common->extra;
71   for (i = 0; i < NUMBUCKETS; i++) {
72     self->table[i] = FN(kInvalidPos);
73   }
74 
75   BROTLI_UNUSED(params);
76 }
77 
FN(Prepare)78 static void FN(Prepare)(HashRolling* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
79     size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
80   size_t i;
81   /* Too small size, cannot use this hasher. */
82   if (input_size < CHUNKLEN) return;
83   self->state = 0;
84   for (i = 0; i < CHUNKLEN; i += JUMP) {
85     self->state = FN(HashRollingFunctionInitial)(
86         self->state, data[i], self->factor);
87   }
88   BROTLI_UNUSED(one_shot);
89 }
90 
FN(HashMemAllocInBytes)91 static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
92     const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
93     size_t input_size) {
94   return NUMBUCKETS * sizeof(uint32_t);
95   BROTLI_UNUSED(params);
96   BROTLI_UNUSED(one_shot);
97   BROTLI_UNUSED(input_size);
98 }
99 
FN(Store)100 static BROTLI_INLINE void FN(Store)(HashRolling* BROTLI_RESTRICT self,
101     const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
102   BROTLI_UNUSED(self);
103   BROTLI_UNUSED(data);
104   BROTLI_UNUSED(mask);
105   BROTLI_UNUSED(ix);
106 }
107 
FN(StoreRange)108 static BROTLI_INLINE void FN(StoreRange)(HashRolling* BROTLI_RESTRICT self,
109     const uint8_t* BROTLI_RESTRICT data, const size_t mask,
110     const size_t ix_start, const size_t ix_end) {
111   BROTLI_UNUSED(self);
112   BROTLI_UNUSED(data);
113   BROTLI_UNUSED(mask);
114   BROTLI_UNUSED(ix_start);
115   BROTLI_UNUSED(ix_end);
116 }
117 
FN(StitchToPreviousBlock)118 static BROTLI_INLINE void FN(StitchToPreviousBlock)(
119     HashRolling* BROTLI_RESTRICT self,
120     size_t num_bytes, size_t position, const uint8_t* ringbuffer,
121     size_t ring_buffer_mask) {
122   /* In this case we must re-initialize the hasher from scratch from the
123      current position. */
124   size_t position_masked;
125   size_t available = num_bytes;
126   if ((position & (JUMP - 1)) != 0) {
127     size_t diff = JUMP - (position & (JUMP - 1));
128     available = (diff > available) ? 0 : (available - diff);
129     position += diff;
130   }
131   position_masked = position & ring_buffer_mask;
132   /* wrapping around ringbuffer not handled. */
133   if (available > ring_buffer_mask - position_masked) {
134     available = ring_buffer_mask - position_masked;
135   }
136 
137   FN(Prepare)(self, BROTLI_FALSE, available,
138       ringbuffer + (position & ring_buffer_mask));
139   self->next_ix = position;
140   BROTLI_UNUSED(num_bytes);
141 }
142 
FN(PrepareDistanceCache)143 static BROTLI_INLINE void FN(PrepareDistanceCache)(
144     HashRolling* BROTLI_RESTRICT self,
145     int* BROTLI_RESTRICT distance_cache) {
146   BROTLI_UNUSED(self);
147   BROTLI_UNUSED(distance_cache);
148 }
149 
FN(FindLongestMatch)150 static BROTLI_INLINE void FN(FindLongestMatch)(
151     HashRolling* BROTLI_RESTRICT self,
152     const BrotliEncoderDictionary* dictionary,
153     const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
154     const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
155     const size_t max_length, const size_t max_backward,
156     const size_t dictionary_distance, const size_t max_distance,
157     HasherSearchResult* BROTLI_RESTRICT out) {
158   const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
159   size_t pos;
160 
161   if ((cur_ix & (JUMP - 1)) != 0) return;
162 
163   /* Not enough lookahead */
164   if (max_length < CHUNKLEN) return;
165 
166   for (pos = self->next_ix; pos <= cur_ix; pos += JUMP) {
167     uint32_t code = self->state & MASK;
168 
169     uint8_t rem = data[pos & ring_buffer_mask];
170     uint8_t add = data[(pos + CHUNKLEN) & ring_buffer_mask];
171     size_t found_ix = FN(kInvalidPos);
172 
173     self->state = FN(HashRollingFunction)(
174         self->state, add, rem, self->factor, self->factor_remove);
175 
176     if (code < NUMBUCKETS) {
177       found_ix = self->table[code];
178       self->table[code] = (uint32_t)pos;
179       if (pos == cur_ix && found_ix != FN(kInvalidPos)) {
180         /* The cast to 32-bit makes backward distances up to 4GB work even
181            if cur_ix is above 4GB, despite using 32-bit values in the table. */
182         size_t backward = (uint32_t)(cur_ix - found_ix);
183         if (backward <= max_backward) {
184           const size_t found_ix_masked = found_ix & ring_buffer_mask;
185           const size_t len = FindMatchLengthWithLimit(&data[found_ix_masked],
186                                                       &data[cur_ix_masked],
187                                                       max_length);
188           if (len >= 4 && len > out->len) {
189             score_t score = BackwardReferenceScore(len, backward);
190             if (score > out->score) {
191               out->len = len;
192               out->distance = backward;
193               out->score = score;
194               out->len_code_delta = 0;
195             }
196           }
197         }
198       }
199     }
200   }
201 
202   self->next_ix = cur_ix + JUMP;
203 
204   /* NOTE: this hasher does not search in the dictionary. It is used as
205      backup-hasher, the main hasher already searches in it. */
206   BROTLI_UNUSED(dictionary);
207   BROTLI_UNUSED(distance_cache);
208   BROTLI_UNUSED(dictionary_distance);
209   BROTLI_UNUSED(max_distance);
210 }
211 
212 #undef HashRolling
213