1 /* deflate_medium.c -- The deflate_medium deflate strategy
2  *
3  * Copyright (C) 2013 Intel Corporation. All rights reserved.
4  * Authors:
5  *  Arjan van de Ven    <arjan@linux.intel.com>
6  *
7  * For conditions of distribution and use, see copyright notice in zlib.h
8  */
9 #ifndef NO_MEDIUM_STRATEGY
10 #include <stdint.h>
11 #include "zbuild.h"
12 #include "deflate.h"
13 #include "deflate_p.h"
14 #include "functable.h"
15 
16 struct match {
17     uint16_t match_start;
18     uint16_t match_length;
19     uint16_t strstart;
20     uint16_t orgstart;
21 };
22 
emit_match(deflate_state * s,struct match match)23 static int emit_match(deflate_state *s, struct match match) {
24     int bflush = 0;
25 
26     /* matches that are not long enough we need to emit as literals */
27     if (match.match_length < MIN_MATCH) {
28         while (match.match_length) {
29             bflush += zng_tr_tally_lit(s, s->window[match.strstart]);
30             s->lookahead--;
31             match.strstart++;
32             match.match_length--;
33         }
34         return bflush;
35     }
36 
37     check_match(s, match.strstart, match.match_start, match.match_length);
38 
39     bflush += zng_tr_tally_dist(s, match.strstart - match.match_start, match.match_length - MIN_MATCH);
40 
41     s->lookahead -= match.match_length;
42     return bflush;
43 }
44 
insert_match(deflate_state * s,struct match match)45 static void insert_match(deflate_state *s, struct match match) {
46     if (UNLIKELY(s->lookahead <= (unsigned int)(match.match_length + MIN_MATCH)))
47         return;
48 
49     /* matches that are not long enough we need to emit as literals */
50     if (LIKELY(match.match_length < MIN_MATCH)) {
51         match.strstart++;
52         match.match_length--;
53         if (UNLIKELY(match.match_length > 0)) {
54             if (match.strstart >= match.orgstart) {
55                 if (match.strstart + match.match_length - 1 >= match.orgstart) {
56                     functable.insert_string(s, match.strstart, match.match_length);
57                 } else {
58                     functable.insert_string(s, match.strstart, match.orgstart - match.strstart + 1);
59                 }
60                 match.strstart += match.match_length;
61                 match.match_length = 0;
62             }
63         }
64         return;
65     }
66 
67     /* Insert new strings in the hash table only if the match length
68      * is not too large. This saves time but degrades compression.
69      */
70     if (match.match_length <= 16* s->max_insert_length && s->lookahead >= MIN_MATCH) {
71         match.match_length--; /* string at strstart already in table */
72         match.strstart++;
73 
74         if (LIKELY(match.strstart >= match.orgstart)) {
75             if (LIKELY(match.strstart + match.match_length - 1 >= match.orgstart)) {
76                 functable.insert_string(s, match.strstart, match.match_length);
77             } else {
78                 functable.insert_string(s, match.strstart, match.orgstart - match.strstart + 1);
79             }
80         } else if (match.orgstart < match.strstart + match.match_length) {
81             functable.insert_string(s, match.orgstart, match.strstart + match.match_length - match.orgstart);
82         }
83         match.strstart += match.match_length;
84         match.match_length = 0;
85     } else {
86         match.strstart += match.match_length;
87         match.match_length = 0;
88         if (match.strstart >= (MIN_MATCH - 2))
89 #if MIN_MATCH != 3
90             functable.insert_string(s, match.strstart + 2 - MIN_MATCH, MIN_MATCH - 2);
91 #else
92             functable.quick_insert_string(s, match.strstart + 2 - MIN_MATCH);
93 #endif
94         /* If lookahead < MIN_MATCH, ins_h is garbage, but it does not
95          * matter since it will be recomputed at next deflate call.
96          */
97     }
98 }
99 
fizzle_matches(deflate_state * s,struct match * current,struct match * next)100 static void fizzle_matches(deflate_state *s, struct match *current, struct match *next) {
101     Pos limit;
102     unsigned char *match, *orig;
103     int changed = 0;
104     struct match c, n;
105     /* step zero: sanity checks */
106 
107     if (current->match_length <= 1)
108         return;
109 
110     if (UNLIKELY(current->match_length > 1 + next->match_start))
111         return;
112 
113     if (UNLIKELY(current->match_length > 1 + next->strstart))
114         return;
115 
116     match = s->window - current->match_length + 1 + next->match_start;
117     orig  = s->window - current->match_length + 1 + next->strstart;
118 
119     /* quick exit check.. if this fails then don't bother with anything else */
120     if (LIKELY(*match != *orig))
121         return;
122 
123     c = *current;
124     n = *next;
125 
126     /* step one: try to move the "next" match to the left as much as possible */
127     limit = next->strstart > MAX_DIST(s) ? next->strstart - MAX_DIST(s) : 0;
128 
129     match = s->window + n.match_start - 1;
130     orig = s->window + n.strstart - 1;
131 
132     while (*match == *orig) {
133         if (UNLIKELY(c.match_length < 1))
134             break;
135         if (UNLIKELY(n.strstart <= limit))
136             break;
137         if (UNLIKELY(n.match_length >= 256))
138             break;
139         if (UNLIKELY(n.match_start <= 1))
140             break;
141 
142         n.strstart--;
143         n.match_start--;
144         n.match_length++;
145         c.match_length--;
146         match--;
147         orig--;
148         changed++;
149     }
150 
151     if (!changed)
152         return;
153 
154     if (c.match_length <= 1 && n.match_length != 2) {
155         n.orgstart++;
156         *current = c;
157         *next = n;
158     } else {
159         return;
160     }
161 }
162 
deflate_medium(deflate_state * s,int flush)163 ZLIB_INTERNAL block_state deflate_medium(deflate_state *s, int flush) {
164     /* Align the first struct to start on a new cacheline, this allows us to fit both structs in one cacheline */
165     ALIGNED_(16) struct match current_match;
166                  struct match next_match;
167 
168     memset(&current_match, 0, sizeof(struct match));
169     memset(&next_match, 0, sizeof(struct match));
170 
171     for (;;) {
172         Pos hash_head = 0;    /* head of the hash chain */
173         int bflush = 0;       /* set if current block must be flushed */
174 
175         /* Make sure that we always have enough lookahead, except
176          * at the end of the input file. We need MAX_MATCH bytes
177          * for the next match, plus MIN_MATCH bytes to insert the
178          * string following the next current_match.
179          */
180         if (s->lookahead < MIN_LOOKAHEAD) {
181             fill_window(s);
182             if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) {
183                 return need_more;
184             }
185             if (UNLIKELY(s->lookahead == 0))
186                 break; /* flush the current block */
187             next_match.match_length = 0;
188         }
189 
190         /* Insert the string window[strstart .. strstart+2] in the
191          * dictionary, and set hash_head to the head of the hash chain:
192          */
193 
194         /* If we already have a future match from a previous round, just use that */
195         if (next_match.match_length > 0) {
196             current_match = next_match;
197             next_match.match_length = 0;
198         } else {
199             hash_head = 0;
200             if (s->lookahead >= MIN_MATCH) {
201                 hash_head = functable.quick_insert_string(s, s->strstart);
202             }
203 
204             current_match.strstart = s->strstart;
205             current_match.orgstart = current_match.strstart;
206 
207             /* Find the longest match, discarding those <= prev_length.
208              * At this point we have always match_length < MIN_MATCH
209              */
210 
211             if (hash_head != 0 && s->strstart - hash_head <= MAX_DIST(s)) {
212                 /* To simplify the code, we prevent matches with the string
213                  * of window index 0 (in particular we have to avoid a match
214                  * of the string with itself at the start of the input file).
215                  */
216                 current_match.match_length = functable.longest_match(s, hash_head);
217                 current_match.match_start = s->match_start;
218                 if (UNLIKELY(current_match.match_length < MIN_MATCH))
219                     current_match.match_length = 1;
220                 if (UNLIKELY(current_match.match_start >= current_match.strstart)) {
221                     /* this can happen due to some restarts */
222                     current_match.match_length = 1;
223                 }
224             } else {
225                 /* Set up the match to be a 1 byte literal */
226                 current_match.match_start = 0;
227                 current_match.match_length = 1;
228             }
229         }
230 
231         insert_match(s, current_match);
232 
233         /* now, look ahead one */
234         if (LIKELY(s->lookahead > MIN_LOOKAHEAD && (uint32_t)(current_match.strstart + current_match.match_length) < (s->window_size - MIN_LOOKAHEAD))) {
235             s->strstart = current_match.strstart + current_match.match_length;
236             hash_head = functable.quick_insert_string(s, s->strstart);
237 
238             next_match.strstart = s->strstart;
239             next_match.orgstart = next_match.strstart;
240 
241             /* Find the longest match, discarding those <= prev_length.
242              * At this point we have always match_length < MIN_MATCH
243              */
244             if (hash_head != 0 && s->strstart - hash_head <= MAX_DIST(s)) {
245                 /* To simplify the code, we prevent matches with the string
246                  * of window index 0 (in particular we have to avoid a match
247                  * of the string with itself at the start of the input file).
248                  */
249                 next_match.match_length = functable.longest_match(s, hash_head);
250                 next_match.match_start = s->match_start;
251                 if (UNLIKELY(next_match.match_start >= next_match.strstart)) {
252                     /* this can happen due to some restarts */
253                     next_match.match_length = 1;
254                 }
255                 if (next_match.match_length < MIN_MATCH)
256                     next_match.match_length = 1;
257                 else
258                     fizzle_matches(s, &current_match, &next_match);
259             } else {
260                 /* Set up the match to be a 1 byte literal */
261                 next_match.match_start = 0;
262                 next_match.match_length = 1;
263             }
264 
265             s->strstart = current_match.strstart;
266         } else {
267             next_match.match_length = 0;
268         }
269 
270         /* now emit the current match */
271         bflush = emit_match(s, current_match);
272 
273         /* move the "cursor" forward */
274         s->strstart += current_match.match_length;
275 
276         if (UNLIKELY(bflush))
277             FLUSH_BLOCK(s, 0);
278     }
279     s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1;
280     if (flush == Z_FINISH) {
281         FLUSH_BLOCK(s, 1);
282         return finish_done;
283     }
284     if (UNLIKELY(s->sym_next))
285         FLUSH_BLOCK(s, 0);
286 
287     return block_done;
288 }
289 #endif
290