xref: /freebsd/sys/contrib/openzfs/module/zfs/lz4_zfs.c (revision 9768746b)
1 /*
2  * LZ4 - Fast LZ compression algorithm
3  * Header File
4  * Copyright (C) 2011-2013, Yann Collet.
5  * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are
9  * met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  * notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above
14  * copyright notice, this list of conditions and the following disclaimer
15  * in the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *
30  * You can contact the author at :
31  * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
32  * - LZ4 source repository : http://code.google.com/p/lz4/
33  */
34 
35 /*
36  * N.B. - This file seems to be based on LZ4 r85, dated Dec 10, 2012
37  */
38 
39 #include <sys/zfs_context.h>
40 #include <sys/zio_compress.h>
41 
42 static int real_LZ4_compress(const char *source, char *dest, int isize,
43     int osize);
44 static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
45     int isize, int osize);
46 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
47     int isize, int osize);
48 
49 /* See lz4.c */
50 int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
51     int isize, int maxOutputSize);
52 
53 static void *lz4_alloc(int flags);
54 static void lz4_free(void *ctx);
55 
56 size_t
57 lz4_compress_zfs(void *s_start, void *d_start, size_t s_len,
58     size_t d_len, int n)
59 {
60 	(void) n;
61 	uint32_t bufsiz;
62 	char *dest = d_start;
63 
64 	ASSERT(d_len >= sizeof (bufsiz));
65 
66 	bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
67 	    d_len - sizeof (bufsiz));
68 
69 	/* Signal an error if the compression routine returned zero. */
70 	if (bufsiz == 0)
71 		return (s_len);
72 
73 	/*
74 	 * The exact compressed size is needed by the decompression routine,
75 	 * so it is stored at the start of the buffer. Note that this may be
76 	 * less than the compressed block size, which is rounded up to a
77 	 * multiple of 1<<ashift.
78 	 */
79 	*(uint32_t *)dest = BE_32(bufsiz);
80 
81 	return (bufsiz + sizeof (bufsiz));
82 }
83 
84 int
85 lz4_decompress_zfs(void *s_start, void *d_start, size_t s_len,
86     size_t d_len, int n)
87 {
88 	(void) n;
89 	const char *src = s_start;
90 	uint32_t bufsiz = BE_IN32(src);
91 
92 	/* invalid compressed buffer size encoded at start */
93 	if (bufsiz + sizeof (bufsiz) > s_len)
94 		return (1);
95 
96 	/*
97 	 * Returns 0 on success (decompression function returned non-negative)
98 	 * and non-zero on failure (decompression function returned negative).
99 	 */
100 	return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
101 	    d_start, bufsiz, d_len) < 0);
102 }
103 
104 /*
105  * LZ4 API Description:
106  *
107  * Simple Functions:
108  * real_LZ4_compress() :
109  * 	isize  : is the input size. Max supported value is ~1.9GB
110  * 	return : the number of bytes written in buffer dest
111  *		 or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
112  * 	note : destination buffer must be already allocated.
113  * 		destination buffer must be sized to handle worst cases
114  * 		situations (input data not compressible) worst case size
115  * 		evaluation is provided by function LZ4_compressBound().
116  *
117  * real_LZ4_uncompress() :
118  * 	osize  : is the output size, therefore the original size
119  * 	return : the number of bytes read in the source buffer.
120  * 		If the source stream is malformed, the function will stop
121  * 		decoding and return a negative result, indicating the byte
122  * 		position of the faulty instruction. This function never
123  * 		writes beyond dest + osize, and is therefore protected
124  * 		against malicious data packets.
125  * 	note : destination buffer must be already allocated
126  *	note : real_LZ4_uncompress() is not used in ZFS so its code
127  *	       is not present here.
128  *
129  * Advanced Functions
130  *
131  * LZ4_compressBound() :
132  * 	Provides the maximum size that LZ4 may output in a "worst case"
133  * 	scenario (input data not compressible) primarily useful for memory
134  * 	allocation of output buffer.
135  *
136  * 	isize  : is the input size. Max supported value is ~1.9GB
137  * 	return : maximum output size in a "worst case" scenario
138  * 	note : this function is limited by "int" range (2^31-1)
139  *
140  * LZ4_uncompress_unknownOutputSize() :
141  * 	isize  : is the input size, therefore the compressed size
142  * 	maxOutputSize : is the size of the destination buffer (which must be
143  * 		already allocated)
144  * 	return : the number of bytes decoded in the destination buffer
145  * 		(necessarily <= maxOutputSize). If the source stream is
146  * 		malformed, the function will stop decoding and return a
147  * 		negative result, indicating the byte position of the faulty
148  * 		instruction. This function never writes beyond dest +
149  * 		maxOutputSize, and is therefore protected against malicious
150  * 		data packets.
151  * 	note   : Destination buffer must be already allocated.
152  *		This version is slightly slower than real_LZ4_uncompress()
153  *
154  * LZ4_compressCtx() :
155  * 	This function explicitly handles the CTX memory structure.
156  *
157  * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
158  * 	by the caller (either on the stack or using kmem_cache_alloc). Passing
159  * 	NULL isn't valid.
160  *
161  * LZ4_compress64kCtx() :
162  * 	Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
163  * 	isize *Must* be <64KB, otherwise the output will be corrupted.
164  *
165  * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
166  * 	by the caller (either on the stack or using kmem_cache_alloc). Passing
167  * 	NULL isn't valid.
168  */
169 
170 /*
171  * Tuning parameters
172  */
173 
174 /*
175  * COMPRESSIONLEVEL: Increasing this value improves compression ratio
176  *	 Lowering this value reduces memory usage. Reduced memory usage
177  *	typically improves speed, due to cache effect (ex: L1 32KB for Intel,
178  *	L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
179  *	(examples : 12 -> 16KB ; 17 -> 512KB)
180  */
181 #define	COMPRESSIONLEVEL 12
182 
183 /*
184  * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
185  *	algorithm skip faster data segments considered "incompressible".
186  *	This may decrease compression ratio dramatically, but will be
187  *	faster on incompressible data. Increasing this value will make
188  *	the algorithm search more before declaring a segment "incompressible".
189  *	This could improve compression a bit, but will be slower on
190  *	incompressible data. The default value (6) is recommended.
191  */
192 #define	NOTCOMPRESSIBLE_CONFIRMATION 6
193 
194 /*
195  * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
196  * performance for big endian cpu, but the resulting compressed stream
197  * will be incompatible with little-endian CPU. You can set this option
198  * to 1 in situations where data will stay within closed environment.
199  * This option is useless on Little_Endian CPU (such as x86).
200  */
201 /* #define	BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
202 
203 /*
204  * CPU Feature Detection
205  */
206 
207 /* 32 or 64 bits ? */
208 #if defined(_LP64)
209 #define	LZ4_ARCH64 1
210 #else
211 #define	LZ4_ARCH64 0
212 #endif
213 
214 /*
215  * Little Endian or Big Endian?
216  * Note: overwrite the below #define if you know your architecture endianness.
217  */
218 #if defined(_ZFS_BIG_ENDIAN)
219 #define	LZ4_BIG_ENDIAN 1
220 #else
221 /*
222  * Little Endian assumed. PDP Endian and other very rare endian format
223  * are unsupported.
224  */
225 #undef LZ4_BIG_ENDIAN
226 #endif
227 
228 /*
229  * Unaligned memory access is automatically enabled for "common" CPU,
230  * such as x86. For others CPU, the compiler will be more cautious, and
231  * insert extra code to ensure aligned access is respected. If you know
232  * your target CPU supports unaligned memory access, you may want to
233  * force this option manually to improve performance
234  */
235 #if defined(__ARM_FEATURE_UNALIGNED)
236 #define	LZ4_FORCE_UNALIGNED_ACCESS 1
237 #endif
238 
239 /*
240  * Illumos : we can't use GCC's __builtin_ctz family of builtins in the
241  * kernel
242  * Linux : we can use GCC's __builtin_ctz family of builtins in the
243  * kernel
244  */
245 #undef	LZ4_FORCE_SW_BITCOUNT
246 #if defined(__sparc)
247 #define	LZ4_FORCE_SW_BITCOUNT
248 #endif
249 
250 /*
251  * Compiler Options
252  */
253 /* Disable restrict */
254 #define	restrict
255 
256 /*
257  * Linux : GCC_VERSION is defined as of 3.9-rc1, so undefine it.
258  * torvalds/linux@3f3f8d2f48acfd8ed3b8e6b7377935da57b27b16
259  */
260 #ifdef GCC_VERSION
261 #undef GCC_VERSION
262 #endif
263 
264 #define	GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
265 
266 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
267 #define	expect(expr, value)    (__builtin_expect((expr), (value)))
268 #else
269 #define	expect(expr, value)    (expr)
270 #endif
271 
272 #ifndef likely
273 #define	likely(expr)	expect((expr) != 0, 1)
274 #endif
275 
276 #ifndef unlikely
277 #define	unlikely(expr)	expect((expr) != 0, 0)
278 #endif
279 
280 #define	lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
281 	(((x) & 0xffu) << 8)))
282 
283 /* Basic types */
284 #define	BYTE	uint8_t
285 #define	U16	uint16_t
286 #define	U32	uint32_t
287 #define	S32	int32_t
288 #define	U64	uint64_t
289 
290 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
291 #pragma pack(1)
292 #endif
293 
294 typedef struct _U16_S {
295 	U16 v;
296 } U16_S;
297 typedef struct _U32_S {
298 	U32 v;
299 } U32_S;
300 typedef struct _U64_S {
301 	U64 v;
302 } U64_S;
303 
304 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
305 #pragma pack()
306 #endif
307 
308 #define	A64(x) (((U64_S *)(x))->v)
309 #define	A32(x) (((U32_S *)(x))->v)
310 #define	A16(x) (((U16_S *)(x))->v)
311 
312 /*
313  * Constants
314  */
315 #define	MINMATCH 4
316 
317 #define	HASH_LOG COMPRESSIONLEVEL
318 #define	HASHTABLESIZE (1 << HASH_LOG)
319 #define	HASH_MASK (HASHTABLESIZE - 1)
320 
321 #define	SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
322 	NOTCOMPRESSIBLE_CONFIRMATION : 2)
323 
324 #define	COPYLENGTH 8
325 #define	LASTLITERALS 5
326 #define	MFLIMIT (COPYLENGTH + MINMATCH)
327 #define	MINLENGTH (MFLIMIT + 1)
328 
329 #define	MAXD_LOG 16
330 #define	MAX_DISTANCE ((1 << MAXD_LOG) - 1)
331 
332 #define	ML_BITS 4
333 #define	ML_MASK ((1U<<ML_BITS)-1)
334 #define	RUN_BITS (8-ML_BITS)
335 #define	RUN_MASK ((1U<<RUN_BITS)-1)
336 
337 
338 /*
339  * Architecture-specific macros
340  */
341 #if LZ4_ARCH64
342 #define	STEPSIZE 8
343 #define	UARCH U64
344 #define	AARCH A64
345 #define	LZ4_COPYSTEP(s, d)	A64(d) = A64(s); d += 8; s += 8;
346 #define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d)
347 #define	LZ4_SECURECOPY(s, d, e)	if (d < e) LZ4_WILDCOPY(s, d, e)
348 #define	HTYPE U32
349 #define	INITBASE(base)		const BYTE* const base = ip
350 #else /* !LZ4_ARCH64 */
351 #define	STEPSIZE 4
352 #define	UARCH U32
353 #define	AARCH A32
354 #define	LZ4_COPYSTEP(s, d)	A32(d) = A32(s); d += 4; s += 4;
355 #define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
356 #define	LZ4_SECURECOPY		LZ4_WILDCOPY
357 #define	HTYPE const BYTE *
358 #define	INITBASE(base)		const int base = 0
359 #endif /* !LZ4_ARCH64 */
360 
361 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
362 #define	LZ4_READ_LITTLEENDIAN_16(d, s, p) \
363 	{ U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
364 #define	LZ4_WRITE_LITTLEENDIAN_16(p, i) \
365 	{ U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
366 #else
367 #define	LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
368 #define	LZ4_WRITE_LITTLEENDIAN_16(p, v)  { A16(p) = v; p += 2; }
369 #endif
370 
371 
372 /* Local structures */
373 struct refTables {
374 	HTYPE hashTable[HASHTABLESIZE];
375 };
376 
377 
378 /* Macros */
379 #define	LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
380 	HASH_LOG))
381 #define	LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
382 #define	LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
383 #define	LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
384 	d = e; }
385 
386 
387 /* Private functions */
388 #if LZ4_ARCH64
389 
390 static inline int
391 LZ4_NbCommonBytes(register U64 val)
392 {
393 #if defined(LZ4_BIG_ENDIAN)
394 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
395 	!defined(LZ4_FORCE_SW_BITCOUNT)
396 	return (__builtin_clzll(val) >> 3);
397 #else
398 	int r;
399 	if (!(val >> 32)) {
400 		r = 4;
401 	} else {
402 		r = 0;
403 		val >>= 32;
404 	}
405 	if (!(val >> 16)) {
406 		r += 2;
407 		val >>= 8;
408 	} else {
409 		val >>= 24;
410 	}
411 	r += (!val);
412 	return (r);
413 #endif
414 #else
415 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
416 	!defined(LZ4_FORCE_SW_BITCOUNT)
417 	return (__builtin_ctzll(val) >> 3);
418 #else
419 	static const int DeBruijnBytePos[64] =
420 	    { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
421 		3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
422 		5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
423 		4, 5, 7, 2, 6, 5, 7, 6, 7, 7
424 	};
425 	return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
426 	    58];
427 #endif
428 #endif
429 }
430 
431 #else
432 
433 static inline int
434 LZ4_NbCommonBytes(register U32 val)
435 {
436 #if defined(LZ4_BIG_ENDIAN)
437 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
438 	!defined(LZ4_FORCE_SW_BITCOUNT)
439 	return (__builtin_clz(val) >> 3);
440 #else
441 	int r;
442 	if (!(val >> 16)) {
443 		r = 2;
444 		val >>= 8;
445 	} else {
446 		r = 0;
447 		val >>= 24;
448 	}
449 	r += (!val);
450 	return (r);
451 #endif
452 #else
453 #if defined(__GNUC__) && (GCC_VERSION >= 304) && \
454 	!defined(LZ4_FORCE_SW_BITCOUNT)
455 	return (__builtin_ctz(val) >> 3);
456 #else
457 	static const int DeBruijnBytePos[32] = {
458 		0, 0, 3, 0, 3, 1, 3, 0,
459 		3, 2, 2, 1, 3, 2, 0, 1,
460 		3, 3, 1, 2, 2, 2, 2, 0,
461 		3, 1, 2, 0, 1, 0, 1, 1
462 	};
463 	return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
464 	    27];
465 #endif
466 #endif
467 }
468 
469 #endif
470 
471 /* Compression functions */
472 
473 static int
474 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
475     int osize)
476 {
477 	struct refTables *srt = (struct refTables *)ctx;
478 	HTYPE *HashTable = (HTYPE *) (srt->hashTable);
479 
480 	const BYTE *ip = (BYTE *) source;
481 	INITBASE(base);
482 	const BYTE *anchor = ip;
483 	const BYTE *const iend = ip + isize;
484 	const BYTE *const oend = (BYTE *) dest + osize;
485 	const BYTE *const mflimit = iend - MFLIMIT;
486 #define	matchlimit (iend - LASTLITERALS)
487 
488 	BYTE *op = (BYTE *) dest;
489 
490 	int len, length;
491 	const int skipStrength = SKIPSTRENGTH;
492 	U32 forwardH;
493 
494 
495 	/* Init */
496 	if (isize < MINLENGTH)
497 		goto _last_literals;
498 
499 	/* First Byte */
500 	HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
501 	ip++;
502 	forwardH = LZ4_HASH_VALUE(ip);
503 
504 	/* Main Loop */
505 	for (;;) {
506 		int findMatchAttempts = (1U << skipStrength) + 3;
507 		const BYTE *forwardIp = ip;
508 		const BYTE *ref;
509 		BYTE *token;
510 
511 		/* Find a match */
512 		do {
513 			U32 h = forwardH;
514 			int step = findMatchAttempts++ >> skipStrength;
515 			ip = forwardIp;
516 			forwardIp = ip + step;
517 
518 			if (unlikely(forwardIp > mflimit)) {
519 				goto _last_literals;
520 			}
521 
522 			forwardH = LZ4_HASH_VALUE(forwardIp);
523 			ref = base + HashTable[h];
524 			HashTable[h] = ip - base;
525 
526 		} while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
527 
528 		/* Catch up */
529 		while ((ip > anchor) && (ref > (BYTE *) source) &&
530 		    unlikely(ip[-1] == ref[-1])) {
531 			ip--;
532 			ref--;
533 		}
534 
535 		/* Encode Literal length */
536 		length = ip - anchor;
537 		token = op++;
538 
539 		/* Check output limit */
540 		if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
541 		    (length >> 8) > oend))
542 			return (0);
543 
544 		if (length >= (int)RUN_MASK) {
545 			*token = (RUN_MASK << ML_BITS);
546 			len = length - RUN_MASK;
547 			for (; len > 254; len -= 255)
548 				*op++ = 255;
549 			*op++ = (BYTE)len;
550 		} else
551 			*token = (length << ML_BITS);
552 
553 		/* Copy Literals */
554 		LZ4_BLINDCOPY(anchor, op, length);
555 
556 		_next_match:
557 		/* Encode Offset */
558 		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
559 
560 		/* Start Counting */
561 		ip += MINMATCH;
562 		ref += MINMATCH;	/* MinMatch verified */
563 		anchor = ip;
564 		while (likely(ip < matchlimit - (STEPSIZE - 1))) {
565 			UARCH diff = AARCH(ref) ^ AARCH(ip);
566 			if (!diff) {
567 				ip += STEPSIZE;
568 				ref += STEPSIZE;
569 				continue;
570 			}
571 			ip += LZ4_NbCommonBytes(diff);
572 			goto _endCount;
573 		}
574 #if LZ4_ARCH64
575 		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
576 			ip += 4;
577 			ref += 4;
578 		}
579 #endif
580 		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
581 			ip += 2;
582 			ref += 2;
583 		}
584 		if ((ip < matchlimit) && (*ref == *ip))
585 			ip++;
586 		_endCount:
587 
588 		/* Encode MatchLength */
589 		len = (ip - anchor);
590 		/* Check output limit */
591 		if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
592 			return (0);
593 		if (len >= (int)ML_MASK) {
594 			*token += ML_MASK;
595 			len -= ML_MASK;
596 			for (; len > 509; len -= 510) {
597 				*op++ = 255;
598 				*op++ = 255;
599 			}
600 			if (len > 254) {
601 				len -= 255;
602 				*op++ = 255;
603 			}
604 			*op++ = (BYTE)len;
605 		} else
606 			*token += len;
607 
608 		/* Test end of chunk */
609 		if (ip > mflimit) {
610 			anchor = ip;
611 			break;
612 		}
613 		/* Fill table */
614 		HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
615 
616 		/* Test next position */
617 		ref = base + HashTable[LZ4_HASH_VALUE(ip)];
618 		HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
619 		if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
620 			token = op++;
621 			*token = 0;
622 			goto _next_match;
623 		}
624 		/* Prepare next loop */
625 		anchor = ip++;
626 		forwardH = LZ4_HASH_VALUE(ip);
627 	}
628 
629 	_last_literals:
630 	/* Encode Last Literals */
631 	{
632 		int lastRun = iend - anchor;
633 		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
634 		    oend)
635 			return (0);
636 		if (lastRun >= (int)RUN_MASK) {
637 			*op++ = (RUN_MASK << ML_BITS);
638 			lastRun -= RUN_MASK;
639 			for (; lastRun > 254; lastRun -= 255) {
640 				*op++ = 255;
641 			}
642 			*op++ = (BYTE)lastRun;
643 		} else
644 			*op++ = (lastRun << ML_BITS);
645 		(void) memcpy(op, anchor, iend - anchor);
646 		op += iend - anchor;
647 	}
648 
649 	/* End */
650 	return (int)(((char *)op) - dest);
651 }
652 
653 
654 
655 /* Note : this function is valid only if isize < LZ4_64KLIMIT */
656 #define	LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
657 #define	HASHLOG64K (HASH_LOG + 1)
658 #define	HASH64KTABLESIZE (1U << HASHLOG64K)
659 #define	LZ4_HASH64K_FUNCTION(i)	(((i) * 2654435761U) >> ((MINMATCH*8) - \
660 	HASHLOG64K))
661 #define	LZ4_HASH64K_VALUE(p)	LZ4_HASH64K_FUNCTION(A32(p))
662 
663 static int
664 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
665     int osize)
666 {
667 	struct refTables *srt = (struct refTables *)ctx;
668 	U16 *HashTable = (U16 *) (srt->hashTable);
669 
670 	const BYTE *ip = (BYTE *) source;
671 	const BYTE *anchor = ip;
672 	const BYTE *const base = ip;
673 	const BYTE *const iend = ip + isize;
674 	const BYTE *const oend = (BYTE *) dest + osize;
675 	const BYTE *const mflimit = iend - MFLIMIT;
676 #define	matchlimit (iend - LASTLITERALS)
677 
678 	BYTE *op = (BYTE *) dest;
679 
680 	int len, length;
681 	const int skipStrength = SKIPSTRENGTH;
682 	U32 forwardH;
683 
684 	/* Init */
685 	if (isize < MINLENGTH)
686 		goto _last_literals;
687 
688 	/* First Byte */
689 	ip++;
690 	forwardH = LZ4_HASH64K_VALUE(ip);
691 
692 	/* Main Loop */
693 	for (;;) {
694 		int findMatchAttempts = (1U << skipStrength) + 3;
695 		const BYTE *forwardIp = ip;
696 		const BYTE *ref;
697 		BYTE *token;
698 
699 		/* Find a match */
700 		do {
701 			U32 h = forwardH;
702 			int step = findMatchAttempts++ >> skipStrength;
703 			ip = forwardIp;
704 			forwardIp = ip + step;
705 
706 			if (forwardIp > mflimit) {
707 				goto _last_literals;
708 			}
709 
710 			forwardH = LZ4_HASH64K_VALUE(forwardIp);
711 			ref = base + HashTable[h];
712 			HashTable[h] = ip - base;
713 
714 		} while (A32(ref) != A32(ip));
715 
716 		/* Catch up */
717 		while ((ip > anchor) && (ref > (BYTE *) source) &&
718 		    (ip[-1] == ref[-1])) {
719 			ip--;
720 			ref--;
721 		}
722 
723 		/* Encode Literal length */
724 		length = ip - anchor;
725 		token = op++;
726 
727 		/* Check output limit */
728 		if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
729 		    (length >> 8) > oend))
730 			return (0);
731 
732 		if (length >= (int)RUN_MASK) {
733 			*token = (RUN_MASK << ML_BITS);
734 			len = length - RUN_MASK;
735 			for (; len > 254; len -= 255)
736 				*op++ = 255;
737 			*op++ = (BYTE)len;
738 		} else
739 			*token = (length << ML_BITS);
740 
741 		/* Copy Literals */
742 		LZ4_BLINDCOPY(anchor, op, length);
743 
744 		_next_match:
745 		/* Encode Offset */
746 		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
747 
748 		/* Start Counting */
749 		ip += MINMATCH;
750 		ref += MINMATCH;	/* MinMatch verified */
751 		anchor = ip;
752 		while (ip < matchlimit - (STEPSIZE - 1)) {
753 			UARCH diff = AARCH(ref) ^ AARCH(ip);
754 			if (!diff) {
755 				ip += STEPSIZE;
756 				ref += STEPSIZE;
757 				continue;
758 			}
759 			ip += LZ4_NbCommonBytes(diff);
760 			goto _endCount;
761 		}
762 #if LZ4_ARCH64
763 		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
764 			ip += 4;
765 			ref += 4;
766 		}
767 #endif
768 		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
769 			ip += 2;
770 			ref += 2;
771 		}
772 		if ((ip < matchlimit) && (*ref == *ip))
773 			ip++;
774 		_endCount:
775 
776 		/* Encode MatchLength */
777 		len = (ip - anchor);
778 		/* Check output limit */
779 		if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
780 			return (0);
781 		if (len >= (int)ML_MASK) {
782 			*token += ML_MASK;
783 			len -= ML_MASK;
784 			for (; len > 509; len -= 510) {
785 				*op++ = 255;
786 				*op++ = 255;
787 			}
788 			if (len > 254) {
789 				len -= 255;
790 				*op++ = 255;
791 			}
792 			*op++ = (BYTE)len;
793 		} else
794 			*token += len;
795 
796 		/* Test end of chunk */
797 		if (ip > mflimit) {
798 			anchor = ip;
799 			break;
800 		}
801 		/* Fill table */
802 		HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
803 
804 		/* Test next position */
805 		ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
806 		HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
807 		if (A32(ref) == A32(ip)) {
808 			token = op++;
809 			*token = 0;
810 			goto _next_match;
811 		}
812 		/* Prepare next loop */
813 		anchor = ip++;
814 		forwardH = LZ4_HASH64K_VALUE(ip);
815 	}
816 
817 	_last_literals:
818 	/* Encode Last Literals */
819 	{
820 		int lastRun = iend - anchor;
821 		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
822 		    oend)
823 			return (0);
824 		if (lastRun >= (int)RUN_MASK) {
825 			*op++ = (RUN_MASK << ML_BITS);
826 			lastRun -= RUN_MASK;
827 			for (; lastRun > 254; lastRun -= 255)
828 				*op++ = 255;
829 			*op++ = (BYTE)lastRun;
830 		} else
831 			*op++ = (lastRun << ML_BITS);
832 		(void) memcpy(op, anchor, iend - anchor);
833 		op += iend - anchor;
834 	}
835 
836 	/* End */
837 	return (int)(((char *)op) - dest);
838 }
839 
840 static int
841 real_LZ4_compress(const char *source, char *dest, int isize, int osize)
842 {
843 	void *ctx;
844 	int result;
845 
846 	ctx = lz4_alloc(KM_SLEEP);
847 
848 	/*
849 	 * out of kernel memory, gently fall through - this will disable
850 	 * compression in zio_compress_data
851 	 */
852 	if (ctx == NULL)
853 		return (0);
854 
855 	memset(ctx, 0, sizeof (struct refTables));
856 
857 	if (isize < LZ4_64KLIMIT)
858 		result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
859 	else
860 		result = LZ4_compressCtx(ctx, source, dest, isize, osize);
861 
862 	lz4_free(ctx);
863 	return (result);
864 }
865 
866 #ifdef __FreeBSD__
867 /*
868  * FreeBSD has 4, 8 and 16 KB malloc zones which can be used here.
869  * Should struct refTables get resized this may need to be revisited, hence
870  * compiler-time asserts.
871  */
872 _Static_assert(sizeof(struct refTables) <= 16384,
873     "refTables too big for malloc");
874 _Static_assert((sizeof(struct refTables) % 4096) == 0,
875     "refTables not a multiple of page size");
876 #else
877 #define ZFS_LZ4_USE_CACHE
878 #endif
879 
880 #ifdef ZFS_LZ4_USE_CACHE
881 static kmem_cache_t *lz4_cache;
882 #endif
883 
884 #ifdef ZFS_LZ4_USE_CACHE
885 void
886 lz4_init(void)
887 {
888 	lz4_cache = kmem_cache_create("lz4_cache",
889 	    sizeof (struct refTables), 0, NULL, NULL, NULL, NULL, NULL, 0);
890 }
891 
892 void
893 lz4_fini(void)
894 {
895 	if (lz4_cache) {
896 		kmem_cache_destroy(lz4_cache);
897 		lz4_cache = NULL;
898 	}
899 }
900 
901 static void *
902 lz4_alloc(int flags)
903 {
904 	ASSERT(lz4_cache != NULL);
905 	return (kmem_cache_alloc(lz4_cache, flags));
906 }
907 
908 static void
909 lz4_free(void *ctx)
910 {
911 	kmem_cache_free(lz4_cache, ctx);
912 }
913 #else
914 void
915 lz4_init(void)
916 {
917 }
918 
919 void
920 lz4_fini(void)
921 {
922 }
923 
924 static void *
925 lz4_alloc(int flags)
926 {
927 	return (kmem_alloc(sizeof (struct refTables), flags));
928 }
929 
930 static void
931 lz4_free(void *ctx)
932 {
933 	kmem_free(ctx, sizeof (struct refTables));
934 }
935 #endif
936