xref: /freebsd/sys/dev/cxgbe/cudbg/fastlz.c (revision e0c4386e)
1 /*
2    FastLZ - lightning-fast lossless compression library
3 
4    Copyright (C) 2007 Ariya Hidayat (ariya@kde.org)
5    Copyright (C) 2006 Ariya Hidayat (ariya@kde.org)
6    Copyright (C) 2005 Ariya Hidayat (ariya@kde.org)
7 
8    Permission is hereby granted, free of charge, to any person obtaining a copy
9    of this software and associated documentation files (the "Software"), to deal
10    in the Software without restriction, including without limitation the rights
11    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12    copies of the Software, and to permit persons to whom the Software is
13    furnished to do so, subject to the following conditions:
14 
15    The above copyright notice and this permission notice shall be included in
16    all copies or substantial portions of the Software.
17 
18    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24    THE SOFTWARE.
25    */
26 #include <sys/cdefs.h>
27 #include "osdep.h"
28 #include "fastlz.h"
29 
30 #if !defined(FASTLZ__COMPRESSOR) && !defined(FASTLZ_DECOMPRESSOR)
31 
32 /*
33  * Always check for bound when decompressing.
34  * Generally it is best to leave it defined.
35  */
36 #define FASTLZ_SAFE
37 
38 #if defined(WIN32) || defined(__NT__) || defined(_WIN32) || defined(__WIN32__)
39 #if defined(_MSC_VER) || defined(__GNUC__)
40 /* #include <windows.h> */
41 #pragma warning(disable : 4242)
42 #pragma warning(disable : 4244)
43 #endif
44 #endif
45 
46 /*
47  * Give hints to the compiler for branch prediction optimization.
48  */
49 #if defined(__GNUC__) && (__GNUC__ > 2)
50 #define FASTLZ_EXPECT_CONDITIONAL(c)	(__builtin_expect((c), 1))
51 #define FASTLZ_UNEXPECT_CONDITIONAL(c)	(__builtin_expect((c), 0))
52 #else
53 #define FASTLZ_EXPECT_CONDITIONAL(c)	(c)
54 #define FASTLZ_UNEXPECT_CONDITIONAL(c)	(c)
55 #endif
56 
57 /*
58  * Use inlined functions for supported systems.
59  */
60 #if defined(__GNUC__) || defined(__DMC__) || defined(__POCC__) ||\
61 	defined(__WATCOMC__) || defined(__SUNPRO_C)
62 #define FASTLZ_INLINE inline
63 #elif defined(__BORLANDC__) || defined(_MSC_VER) || defined(__LCC__)
64 #define FASTLZ_INLINE __inline
65 #else
66 #define FASTLZ_INLINE
67 #endif
68 
69 /*
70  * Prevent accessing more than 8-bit at once, except on x86 architectures.
71  */
72 #if !defined(FASTLZ_STRICT_ALIGN)
73 #define FASTLZ_STRICT_ALIGN
74 #if defined(__i386__) || defined(__386)  /* GNU C, Sun Studio */
75 #undef FASTLZ_STRICT_ALIGN
76 #elif defined(__i486__) || defined(__i586__) || defined(__i686__) /* GNU C */
77 #undef FASTLZ_STRICT_ALIGN
78 #elif defined(_M_IX86) /* Intel, MSVC */
79 #undef FASTLZ_STRICT_ALIGN
80 #elif defined(__386)
81 #undef FASTLZ_STRICT_ALIGN
82 #elif defined(_X86_) /* MinGW */
83 #undef FASTLZ_STRICT_ALIGN
84 #elif defined(__I86__) /* Digital Mars */
85 #undef FASTLZ_STRICT_ALIGN
86 #endif
87 #endif
88 
89 /*
90  * FIXME: use preprocessor magic to set this on different platforms!
91  */
92 
93 #define MAX_COPY       32
94 #define MAX_LEN       264  /* 256 + 8 */
95 #define MAX_DISTANCE 8192
96 
97 #if !defined(FASTLZ_STRICT_ALIGN)
98 #define FASTLZ_READU16(p) (*((const unsigned short *)(p)))
99 #else
100 #define FASTLZ_READU16(p) ((p)[0] | (p)[1]<<8)
101 #endif
102 
103 #define HASH_LOG  13
104 #define HASH_SIZE (1 << HASH_LOG)
105 #define HASH_MASK  (HASH_SIZE - 1)
106 #define HASH_FUNCTION(v, p) {\
107 				v = FASTLZ_READU16(p);\
108 				v ^= FASTLZ_READU16(p + 1)^\
109 				     (v>>(16 - HASH_LOG));\
110 				v &= HASH_MASK;\
111 			    }
112 
113 #undef FASTLZ_LEVEL
114 #define FASTLZ_LEVEL 1
115 
116 #undef FASTLZ_COMPRESSOR
117 #undef FASTLZ_DECOMPRESSOR
118 #define FASTLZ_COMPRESSOR fastlz1_compress
119 #define FASTLZ_DECOMPRESSOR fastlz1_decompress
120 static FASTLZ_INLINE int FASTLZ_COMPRESSOR(const void *input, int length,
121 					   void *output);
122 static FASTLZ_INLINE int FASTLZ_DECOMPRESSOR(const void *input, int length,
123 					     void *output, int maxout);
124 #include "fastlz.c"
125 
126 #undef FASTLZ_LEVEL
127 #define FASTLZ_LEVEL 2
128 
129 #undef MAX_DISTANCE
130 #define MAX_DISTANCE 8191
131 #define MAX_FARDISTANCE (65535 + MAX_DISTANCE - 1)
132 
133 #undef FASTLZ_COMPRESSOR
134 #undef FASTLZ_DECOMPRESSOR
135 #define FASTLZ_COMPRESSOR fastlz2_compress
136 #define FASTLZ_DECOMPRESSOR fastlz2_decompress
137 static FASTLZ_INLINE int FASTLZ_COMPRESSOR(const void *input, int length,
138 					   void *output);
139 static FASTLZ_INLINE int FASTLZ_DECOMPRESSOR(const void *input, int length,
140 					     void *output, int maxout);
141 #include "fastlz.c"
142 
143 int fastlz_compress(const void *input, int length, void *output)
144 {
145 	/* for short block, choose fastlz1 */
146 	if (length < 65536)
147 		return fastlz1_compress(input, length, output);
148 
149 	/* else... */
150 	return fastlz2_compress(input, length, output);
151 }
152 
153 int fastlz_decompress(const void *input, int length, void *output, int maxout)
154 {
155 	/* magic identifier for compression level */
156 	int level = ((*(const unsigned char *)input) >> 5) + 1;
157 
158 	if (level == 1)
159 		return fastlz1_decompress(input, length, output, maxout);
160 	if (level == 2)
161 		return fastlz2_decompress(input, length, output, maxout);
162 
163 	/* unknown level, trigger error */
164 	return 0;
165 }
166 
167 int fastlz_compress_level(int level, const void *input, int length,
168 			  void *output)
169 {
170 	if (level == 1)
171 		return fastlz1_compress(input, length, output);
172 	if (level == 2)
173 		return fastlz2_compress(input, length, output);
174 
175 	return 0;
176 }
177 
178 #else /* !defined(FASTLZ_COMPRESSOR) && !defined(FASTLZ_DECOMPRESSOR) */
179 
180 
181 static FASTLZ_INLINE int FASTLZ_COMPRESSOR(const void *input, int length,
182 					   void *output)
183 {
184 	const unsigned char *ip = (const unsigned char *) input;
185 	const unsigned char *ip_bound = ip + length - 2;
186 	const unsigned char *ip_limit = ip + length - 12;
187 	unsigned char *op = (unsigned char *) output;
188 	static const unsigned char *g_htab[HASH_SIZE];
189 
190 	const unsigned char **htab = g_htab;
191 	const unsigned char **hslot;
192 	unsigned int hval;
193 
194 	unsigned int copy;
195 
196 	/* sanity check */
197 	if (FASTLZ_UNEXPECT_CONDITIONAL(length < 4)) {
198 		if (length) {
199 			/* create literal copy only */
200 			*op++ = length - 1;
201 			ip_bound++;
202 			while (ip <= ip_bound)
203 				*op++ = *ip++;
204 			return length + 1;
205 		} else
206 			return 0;
207 	}
208 
209 	/* initializes hash table */
210 	for (hslot = htab; hslot < htab + HASH_SIZE; hslot++)
211 		*hslot = ip;
212 
213 	/* we start with literal copy */
214 	copy = 2;
215 	*op++ = MAX_COPY - 1;
216 	*op++ = *ip++;
217 	*op++ = *ip++;
218 
219 	/* main loop */
220 	while (FASTLZ_EXPECT_CONDITIONAL(ip < ip_limit)) {
221 		const unsigned char *ref;
222 		unsigned int distance;
223 
224 		/* minimum match length */
225 		unsigned int len = 3;
226 
227 		/* comparison starting-point */
228 		const unsigned char *anchor = ip;
229 
230 		/* check for a run */
231 #if FASTLZ_LEVEL == 2
232 		if (ip[0] == ip[-1] &&
233 		    FASTLZ_READU16(ip - 1) == FASTLZ_READU16(ip + 1)) {
234 			distance = 1;
235 			ip += 3;
236 			ref = anchor - 1 + 3;
237 			goto match;
238 		}
239 #endif
240 
241 		/* find potential match */
242 		HASH_FUNCTION(hval, ip);
243 		hslot = htab + hval;
244 		ref = htab[hval];
245 
246 		/* calculate distance to the match */
247 		distance = anchor - ref;
248 
249 		/* update hash table */
250 		*hslot = anchor;
251 
252 		if (!ref)
253 			goto literal;
254 		/* is this a match? check the first 3 bytes */
255 		if (distance == 0 ||
256 #if FASTLZ_LEVEL == 1
257 				(distance >= MAX_DISTANCE) ||
258 #else
259 				(distance >= MAX_FARDISTANCE) ||
260 #endif
261 				*ref++ != *ip++ || *ref++ != *ip++ ||
262 				*ref++ != *ip++)
263 			goto literal;
264 
265 #if FASTLZ_LEVEL == 2
266 		/* far, needs at least 5-byte match */
267 		if (distance >= MAX_DISTANCE) {
268 			if (*ip++ != *ref++ || *ip++ != *ref++)
269 				goto literal;
270 			len += 2;
271 		}
272 
273 match:
274 #endif
275 
276 		/* last matched byte */
277 		ip = anchor + len;
278 
279 		/* distance is biased */
280 		distance--;
281 
282 		if (!distance) {
283 			/* zero distance means a run */
284 			unsigned char x = ip[-1];
285 			while (ip < ip_bound)
286 				if (*ref++ != x)
287 					break;
288 				else
289 					ip++;
290 		} else
291 			for (;;) {
292 				/* safe because the outer check
293 				 * against ip limit */
294 				if (*ref++ != *ip++)
295 					break;
296 				if (*ref++ != *ip++)
297 					break;
298 				if (*ref++ != *ip++)
299 					break;
300 				if (*ref++ != *ip++)
301 					break;
302 				if (*ref++ != *ip++)
303 					break;
304 				if (*ref++ != *ip++)
305 					break;
306 				if (*ref++ != *ip++)
307 					break;
308 				if (*ref++ != *ip++)
309 					break;
310 				while (ip < ip_bound)
311 					if (*ref++ != *ip++)
312 						break;
313 				break;
314 			}
315 
316 		/* if we have copied something, adjust the copy count */
317 		if (copy)
318 			/* copy is biased, '0' means 1 byte copy */
319 			*(op - copy - 1) = copy - 1;
320 		else
321 			/* back, to overwrite the copy count */
322 			op--;
323 
324 		/* reset literal counter */
325 		copy = 0;
326 
327 		/* length is biased, '1' means a match of 3 bytes */
328 		ip -= 3;
329 		len = ip - anchor;
330 
331 		/* encode the match */
332 #if FASTLZ_LEVEL == 2
333 		if (distance < MAX_DISTANCE) {
334 			if (len < 7) {
335 				*op++ = (len << 5) + (distance >> 8);
336 				*op++ = (distance & 255);
337 			} else {
338 				*op++ = (7 << 5) + (distance >> 8);
339 				for (len -= 7; len >= 255; len -= 255)
340 					*op++ = 255;
341 				*op++ = len;
342 				*op++ = (distance & 255);
343 			}
344 		} else {
345 			/* far away, but not yet in the another galaxy... */
346 			if (len < 7) {
347 				distance -= MAX_DISTANCE;
348 				*op++ = (len << 5) + 31;
349 				*op++ = 255;
350 				*op++ = distance >> 8;
351 				*op++ = distance & 255;
352 			} else {
353 				distance -= MAX_DISTANCE;
354 				*op++ = (7 << 5) + 31;
355 				for (len -= 7; len >= 255; len -= 255)
356 					*op++ = 255;
357 				*op++ = len;
358 				*op++ = 255;
359 				*op++ = distance >> 8;
360 				*op++ = distance & 255;
361 			}
362 		}
363 #else
364 
365 		if (FASTLZ_UNEXPECT_CONDITIONAL(len > MAX_LEN - 2))
366 			while (len > MAX_LEN - 2) {
367 				*op++ = (7 << 5) + (distance >> 8);
368 				*op++ = MAX_LEN - 2 - 7 - 2;
369 				*op++ = (distance & 255);
370 				len -= MAX_LEN - 2;
371 			}
372 
373 		if (len < 7) {
374 			*op++ = (len << 5) + (distance >> 8);
375 			*op++ = (distance & 255);
376 		} else {
377 			*op++ = (7 << 5) + (distance >> 8);
378 			*op++ = len - 7;
379 			*op++ = (distance & 255);
380 		}
381 #endif
382 
383 		/* update the hash at match boundary */
384 		HASH_FUNCTION(hval, ip);
385 		htab[hval] = ip++;
386 		HASH_FUNCTION(hval, ip);
387 		htab[hval] = ip++;
388 
389 		/* assuming literal copy */
390 		*op++ = MAX_COPY - 1;
391 
392 		continue;
393 
394 literal:
395 		*op++ = *anchor++;
396 		ip = anchor;
397 		copy++;
398 		if (FASTLZ_UNEXPECT_CONDITIONAL(copy == MAX_COPY)) {
399 			copy = 0;
400 			*op++ = MAX_COPY - 1;
401 		}
402 	}
403 
404 	/* left-over as literal copy */
405 	ip_bound++;
406 	while (ip <= ip_bound) {
407 		*op++ = *ip++;
408 		copy++;
409 		if (copy == MAX_COPY) {
410 			copy = 0;
411 			*op++ = MAX_COPY - 1;
412 		}
413 	}
414 
415 	/* if we have copied something, adjust the copy length */
416 	if (copy)
417 		*(op - copy - 1) = copy - 1;
418 	else
419 		op--;
420 
421 #if FASTLZ_LEVEL == 2
422 	/* marker for fastlz2 */
423 	*(unsigned char *)output |= (1 << 5);
424 #endif
425 
426 	return op - (unsigned char *)output;
427 }
428 
429 static FASTLZ_INLINE int FASTLZ_DECOMPRESSOR(const void *input, int length,
430 					     void *output, int maxout)
431 {
432 	const unsigned char *ip = (const unsigned char *) input;
433 	const unsigned char *ip_limit  = ip + length;
434 	unsigned char *op = (unsigned char *) output;
435 	unsigned char *op_limit = op + maxout;
436 	unsigned int ctrl = (*ip++) & 31;
437 	int loop = 1;
438 
439 	do {
440 		const unsigned char *ref = op;
441 		unsigned int len = ctrl >> 5;
442 		unsigned int ofs = (ctrl & 31) << 8;
443 
444 		if (ctrl >= 32) {
445 #if FASTLZ_LEVEL == 2
446 			unsigned char code;
447 #endif
448 			len--;
449 			ref -= ofs;
450 			if (len == 7 - 1)
451 #if FASTLZ_LEVEL == 1
452 				len += *ip++;
453 			ref -= *ip++;
454 #else
455 			do {
456 				code = *ip++;
457 				len += code;
458 			} while (code == 255);
459 			code = *ip++;
460 			ref -= code;
461 
462 			/* match from 16-bit distance */
463 			if (FASTLZ_UNEXPECT_CONDITIONAL(code == 255))
464 				if (FASTLZ_EXPECT_CONDITIONAL(ofs ==
465 							      (31 << 8))) {
466 					ofs = (*ip++) << 8;
467 					ofs += *ip++;
468 					ref = op - ofs - MAX_DISTANCE;
469 				}
470 #endif
471 
472 #ifdef FASTLZ_SAFE
473 			if (FASTLZ_UNEXPECT_CONDITIONAL(op + len + 3 >
474 							op_limit))
475 				return 0;
476 
477 			if (FASTLZ_UNEXPECT_CONDITIONAL(ref - 1 <
478 							(unsigned char *)output)
479 						       )
480 				return 0;
481 #endif
482 
483 			if (FASTLZ_EXPECT_CONDITIONAL(ip < ip_limit))
484 				ctrl = *ip++;
485 			else
486 				loop = 0;
487 
488 			if (ref == op) {
489 				/* optimize copy for a run */
490 				unsigned char b = ref[-1];
491 				*op++ = b;
492 				*op++ = b;
493 				*op++ = b;
494 				for (; len; --len)
495 					*op++ = b;
496 			} else {
497 #if !defined(FASTLZ_STRICT_ALIGN)
498 				const unsigned short *p;
499 				unsigned short *q;
500 #endif
501 				/* copy from reference */
502 				ref--;
503 				*op++ = *ref++;
504 				*op++ = *ref++;
505 				*op++ = *ref++;
506 
507 #if !defined(FASTLZ_STRICT_ALIGN)
508 				/* copy a byte, so that now it's word aligned */
509 				if (len & 1) {
510 					*op++ = *ref++;
511 					len--;
512 				}
513 
514 				/* copy 16-bit at once */
515 				q = (unsigned short *) op;
516 				op += len;
517 				p = (const unsigned short *) ref;
518 				for (len >>= 1; len > 4; len -= 4) {
519 					*q++ = *p++;
520 					*q++ = *p++;
521 					*q++ = *p++;
522 					*q++ = *p++;
523 				}
524 				for (; len; --len)
525 					*q++ = *p++;
526 #else
527 				for (; len; --len)
528 					*op++ = *ref++;
529 #endif
530 			}
531 		} else {
532 			ctrl++;
533 #ifdef FASTLZ_SAFE
534 			if (FASTLZ_UNEXPECT_CONDITIONAL(op + ctrl > op_limit))
535 				return 0;
536 			if (FASTLZ_UNEXPECT_CONDITIONAL(ip + ctrl > ip_limit))
537 				return 0;
538 #endif
539 
540 			*op++ = *ip++;
541 			for (--ctrl; ctrl; ctrl--)
542 				*op++ = *ip++;
543 
544 			loop = FASTLZ_EXPECT_CONDITIONAL(ip < ip_limit);
545 			if (loop)
546 				ctrl = *ip++;
547 		}
548 	} while (FASTLZ_EXPECT_CONDITIONAL(loop));
549 
550 	return op - (unsigned char *)output;
551 }
552 
553 #endif /* !defined(FASTLZ_COMPRESSOR) && !defined(FASTLZ_DECOMPRESSOR) */
554