1 #include "stdinc.h"
2 #include "whack.h"
3 
4 typedef struct Huff	Huff;
5 int compressblocks = 1;
6 
7 enum
8 {
9 	MaxFastLen	= 9,
10 	BigLenCode	= 0x1f4,	/* minimum code for large lenth encoding */
11 	BigLenBits	= 9,
12 	BigLenBase	= 4,		/* starting items to encode for big lens */
13 
14 	MinOffBits	= 6,
15 	MaxOffBits	= MinOffBits + 8,
16 
17 	MaxLen		= 2051		/* max. length encodable in 24 bits */
18 };
19 
20 enum
21 {
22 	StatBytes,
23 	StatOutBytes,
24 	StatLits,
25 	StatMatches,
26 	StatLitBits,
27 	StatOffBits,
28 	StatLenBits,
29 
30 	MaxStat
31 };
32 
33 struct Huff
34 {
35 	short	bits;				/* length of the code */
36 	ulong	encode;				/* the code */
37 };
38 
39 static	Huff	lentab[MaxFastLen] =
40 {
41 	{2,	0x2},		/* 10 */
42 	{3,	0x6},		/* 110 */
43 	{5,	0x1c},		/* 11100 */
44 	{5,	0x1d},		/* 11101 */
45 	{6,	0x3c},		/* 111100 */
46 	{7,	0x7a},		/* 1111010 */
47 	{7,	0x7b},		/* 1111011 */
48 	{8,	0xf8},		/* 11111000 */
49 	{8,	0xf9},		/* 11111001 */
50 };
51 
52 static int	thwmaxcheck;
53 
54 void
whackinit(Whack * tw,int level)55 whackinit(Whack *tw, int level)
56 {
57 	thwmaxcheck = (1 << level);
58 	thwmaxcheck -= thwmaxcheck >> 2;
59 	if(thwmaxcheck < 2)
60 		thwmaxcheck = 2;
61 	else if(thwmaxcheck > 1024)
62 		thwmaxcheck = 1024;
63 	memset(tw, 0, sizeof *tw);
64 	tw->begin = 2 * WhackMaxOff;
65 }
66 
67 /*
68  * find a string in the dictionary
69  */
70 static int
whackmatch(Whack * b,uchar ** ss,uchar * esrc,ulong h,ulong now)71 whackmatch(Whack *b, uchar **ss, uchar *esrc, ulong h, ulong now)
72 {
73 	ushort then, off, last;
74 	int bestoff, bestlen, check;
75 	uchar *s, *t;
76 
77 	s = *ss;
78 	if(esrc < s + MinMatch)
79 		return -1;
80 	if(s + MaxLen < esrc)
81 		esrc = s + MaxLen;
82 
83 	bestoff = 0;
84 	bestlen = 0;
85 	check = thwmaxcheck;
86 	last = 0;
87 	for(then = b->hash[h]; check-- > 0; then = b->next[then & (WhackMaxOff - 1)]){
88 		off = now - then;
89 		if(off <= last || off > WhackMaxOff)
90 			break;
91 
92 		/*
93 		 * don't need to check for the end because
94 		 * 1) s too close check above
95 		 */
96 		t = s - off;
97 		if(s[0] == t[0] && s[1] == t[1] && s[2] == t[2]){
98 			if(!bestlen || esrc - s > bestlen && s[bestlen] == t[bestlen]){
99 				t += 3;
100 				for(s += 3; s < esrc; s++){
101 					if(*s != *t)
102 						break;
103 					t++;
104 				}
105 				if(s - *ss > bestlen){
106 					bestlen = s - *ss;
107 					bestoff = off;
108 					if(bestlen > thwmaxcheck)
109 						break;
110 				}
111 			}
112 		}
113 		s = *ss;
114 		last = off;
115 	}
116 	*ss += bestlen;
117 	return bestoff;
118 }
119 
120 /*
121  * knuth vol. 3 multiplicative hashing
122  * each byte x chosen according to rules
123  * 1/4 < x < 3/10, 1/3 x < < 3/7, 4/7 < x < 2/3, 7/10 < x < 3/4
124  * with reasonable spread between the bytes & their complements
125  *
126  * the 3 byte value appears to be as almost good as the 4 byte value,
127  * and might be faster on some machines
128  */
129 /*
130 #define hashit(c)	((((ulong)(c) * 0x6b43a9) >> (24 - HashLog)) & HashMask)
131 */
132 #define hashit(c)	(((((ulong)(c) & 0xffffff) * 0x6b43a9b5) >> (32 - HashLog)) & HashMask)
133 
134 /*
135  * lz77 compression with single lookup in a hash table for each block
136  */
137 int
whack(Whack * w,uchar * dst,uchar * src,int n,ulong stats[WhackStats])138 whack(Whack *w, uchar *dst, uchar *src, int n, ulong stats[WhackStats])
139 {
140 	uchar *s, *ss, *sss, *esrc, *half, *wdst, *wdmax;
141 	ulong cont, code, wbits;
142 	ushort now;
143 	int toff, lithist, h, len, bits, use, wnbits, lits, matches, offbits, lenbits;
144 
145 	if(!compressblocks || n < MinMatch)
146 		return -1;
147 
148 	wdst = dst;
149 	wdmax = dst + n;
150 
151 	now = w->begin;
152 	s = src;
153 	w->data = s;
154 
155 	cont = (s[0] << 16) | (s[1] << 8) | s[2];
156 
157 	esrc = s + n;
158 	half = s + (n >> 1);
159 	wnbits = 0;
160 	wbits = 0;
161 	lits = 0;
162 	matches = 0;
163 	offbits = 0;
164 	lenbits = 0;
165 	lithist = ~0;
166 	while(s < esrc){
167 		h = hashit(cont);
168 
169 		sss = s;
170 		toff = whackmatch(w, &sss, esrc, h, now);
171 		ss = sss;
172 
173 		len = ss - s;
174 		for(; wnbits >= 8; wnbits -= 8){
175 			if(wdst >= wdmax){
176 				w->begin = now;
177 				return -1;
178 			}
179 			*wdst++ = wbits >> (wnbits - 8);
180 		}
181 		if(len < MinMatch){
182 			toff = *s;
183 			lithist = (lithist << 1) | toff < 32 | toff > 127;
184 			if(lithist & 0x1e){
185 				wbits = (wbits << 9) | toff;
186 				wnbits += 9;
187 			}else if(lithist & 1){
188 				toff = (toff + 64) & 0xff;
189 				if(toff < 96){
190 					wbits = (wbits << 10) | toff;
191 					wnbits += 10;
192 				}else{
193 					wbits = (wbits << 11) | toff;
194 					wnbits += 11;
195 				}
196 			}else{
197 				wbits = (wbits << 8) | toff;
198 				wnbits += 8;
199 			}
200 			lits++;
201 
202 			/*
203 			 * speed hack
204 			 * check for compression progress, bail if none achieved
205 			 */
206 			if(s > half){
207 				if(4 * (s - src) < 5 * lits){
208 					w->begin = now;
209 					return -1;
210 				}
211 				half = esrc;
212 			}
213 
214 			if(s + MinMatch <= esrc){
215 				w->next[now & (WhackMaxOff - 1)] = w->hash[h];
216 				w->hash[h] = now;
217 				if(s + MinMatch < esrc)
218 					cont = (cont << 8) | s[MinMatch];
219 			}
220 			now++;
221 			s++;
222 			continue;
223 		}
224 
225 		matches++;
226 
227 		/*
228 		 * length of match
229 		 */
230 		if(len > MaxLen){
231 			len = MaxLen;
232 			ss = s + len;
233 		}
234 		len -= MinMatch;
235 		if(len < MaxFastLen){
236 			bits = lentab[len].bits;
237 			wbits = (wbits << bits) | lentab[len].encode;
238 			wnbits += bits;
239 			lenbits += bits;
240 		}else{
241 			code = BigLenCode;
242 			bits = BigLenBits;
243 			use = BigLenBase;
244 			len -= MaxFastLen;
245 			while(len >= use){
246 				len -= use;
247 				code = (code + use) << 1;
248 				use <<= (bits & 1) ^ 1;
249 				bits++;
250 			}
251 
252 			wbits = (wbits << bits) | (code + len);
253 			wnbits += bits;
254 			lenbits += bits;
255 
256 			for(; wnbits >= 8; wnbits -= 8){
257 				if(wdst >= wdmax){
258 					w->begin = now;
259 					return -1;
260 				}
261 				*wdst++ = wbits >> (wnbits - 8);
262 			}
263 		}
264 
265 		/*
266 		 * offset in history
267 		 */
268 		toff--;
269 		for(bits = MinOffBits; toff >= (1 << bits); bits++)
270 			;
271 		if(bits < MaxOffBits-1){
272 			wbits = (wbits << 3) | (bits - MinOffBits);
273 			if(bits != MinOffBits)
274 				bits--;
275 			wnbits += bits + 3;
276 			offbits += bits + 3;
277 		}else{
278 			wbits = (wbits << 4) | 0xe | (bits - (MaxOffBits-1));
279 			bits--;
280 			wnbits += bits + 4;
281 			offbits += bits + 4;
282 		}
283 		wbits = (wbits << bits) | toff & ((1 << bits) - 1);
284 
285 		for(; s != ss; s++){
286 			if(s + MinMatch <= esrc){
287 				h = hashit(cont);
288 				w->next[now & (WhackMaxOff - 1)] = w->hash[h];
289 				w->hash[h] = now;
290 				if(s + MinMatch < esrc)
291 					cont = (cont << 8) | s[MinMatch];
292 			}
293 			now++;
294 		}
295 	}
296 
297 	w->begin = now;
298 
299 	stats[StatBytes] += esrc - src;
300 	stats[StatLits] += lits;
301 	stats[StatMatches] += matches;
302 	stats[StatLitBits] += (wdst - (dst + 2)) * 8 + wnbits - offbits - lenbits;
303 	stats[StatOffBits] += offbits;
304 	stats[StatLenBits] += lenbits;
305 
306 	if(wnbits & 7){
307 		wbits <<= 8 - (wnbits & 7);
308 		wnbits += 8 - (wnbits & 7);
309 	}
310 	for(; wnbits >= 8; wnbits -= 8){
311 		if(wdst >= wdmax)
312 			return -1;
313 		*wdst++ = wbits >> (wnbits - 8);
314 	}
315 
316 	stats[StatOutBytes] += wdst - dst;
317 
318 	return wdst - dst;
319 }
320 
321 int
whackblock(uchar * dst,uchar * src,int ssize)322 whackblock(uchar *dst, uchar *src, int ssize)
323 {
324 	Whack w;
325 	ulong stats[MaxStat];
326 	int r;
327 
328 	whackinit(&w, 6);
329 	r = whack(&w, dst, src, ssize, stats);
330 	return r;
331 }
332