1 /*
2 LZ4 Encoder - Part of LZ4 compression algorithm
3 Copyright (C) 2011-2013, Yann Collet.
4 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are
8 met:
9
10 * Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12 * Redistributions in binary form must reproduce the above
13 copyright notice, this list of conditions and the following disclaimer
14 in the documentation and/or other materials provided with the
15 distribution.
16
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29 You can contact the author at :
30 - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
31 - LZ4 source repository : http://code.google.com/p/lz4/
32 */
33
34 /* lz4_encoder.h must be included into lz4.c
35 The objective of this file is to create a single LZ4 compression function source
36 which will be instanciated multiple times with minor variations
37 depending on a set of #define.
38 */
39
40 void*
41 LZ4_create(void);
42 int
43 LZ4_free(void* ctx);
44
45 int
46 LZ4_compress_heap_limitedOutput(
47 void* ctx,
48 char* source,
49 char* dest,
50 int inputSize,
51 int maxOutputSize);
52
53 int
54 LZ4_compress64k_heap_limitedOutput(
55 void* ctx,
56 char* source,
57 char* dest,
58 int inputSize,
59 int maxOutputSize);
60
61
62 //****************************
63 // Local definitions
64 //****************************
65
66 #ifdef COMPRESS_64K
67 # define HASHLOG (MEMORY_USAGE-1)
68 # define CURRENT_H_TYPE U16
69 # define CURRENTBASE(base) BYTE* base = ip
70 #else
71 # define HASHLOG (MEMORY_USAGE-2)
72 # define CURRENT_H_TYPE HTYPE
73 # define CURRENTBASE(base) INITBASE(base)
74 #endif
75
76 #define HASHTABLE_NBCELLS (1U<<HASHLOG)
77 #define LZ4_HASH(i) (((i) * 2654435761U) >> ((MINMATCH*8)-HASHLOG))
78 #define LZ4_HASHVALUE(p) LZ4_HASH(A32(p))
79
80
81
82 //****************************
83 // Function code
84 //****************************
85
86 int
LZ4_compress_heap_limitedOutput(void * ctx,char * source,char * dest,int inputSize,int maxOutputSize)87 LZ4_compress_heap_limitedOutput(
88 void* ctx,
89 char* source,
90 char* dest,
91 int inputSize,
92 int maxOutputSize)
93 {
94 CURRENT_H_TYPE* HashTable = (CURRENT_H_TYPE*)ctx;
95
96 BYTE* ip = (BYTE*) source;
97 CURRENTBASE(base);
98 BYTE* anchor = ip;
99 BYTE* iend = ip + inputSize;
100 BYTE* mflimit = iend - MFLIMIT;
101 #define matchlimit (iend - LASTLITERALS)
102
103 BYTE* op = (BYTE*) dest;
104 BYTE* oend = op + maxOutputSize;
105
106 int length;
107 int skipStrength = SKIPSTRENGTH;
108 U32 forwardH;
109
110
111 // Init
112 if (inputSize<MINLENGTH) goto _last_literals;
113
114 memset((void*)HashTable, 0, HASHTABLESIZE);
115
116 // First Byte
117 HashTable[LZ4_HASHVALUE(ip)] = (CURRENT_H_TYPE)(ip - base);
118 ip++;
119 forwardH = LZ4_HASHVALUE(ip);
120
121 // Main Loop
122 for ( ; ; )
123 {
124 int findMatchAttempts = (1U << skipStrength) + 3;
125 BYTE* forwardIp = ip;
126 BYTE* ref;
127 BYTE* token;
128
129 // Find a match
130 do {
131 U32 h = forwardH;
132 int step = findMatchAttempts++ >> skipStrength;
133 ip = forwardIp;
134 forwardIp = ip + step;
135
136 if unlikely(forwardIp > mflimit) {
137 goto _last_literals;
138 }
139
140 forwardH = LZ4_HASHVALUE(forwardIp);
141 ref = base + HashTable[h];
142 HashTable[h] = (CURRENT_H_TYPE)(ip - base);
143
144 } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
145
146 // Catch up
147 while ((ip>anchor) && (ref>(BYTE*)source) && unlikely(ip[-1]==ref[-1])) {
148 ip--;
149 ref--;
150 }
151
152 // Encode Literal length
153 length = (int)(ip - anchor);
154 token = op++;
155
156 if unlikely(op + length + (2 + 1 + LASTLITERALS) + (length>>8) > oend)
157 return 0; // Check output limit
158
159 if (length>=(int)RUN_MASK)
160 {
161 int len = length-RUN_MASK;
162 *token=(RUN_MASK<<ML_BITS);
163 for(; len >= 255 ; len-=255)
164 *op++ = 255;
165 *op++ = (BYTE)len;
166 }
167 else *token = (BYTE)(length<<ML_BITS);
168
169 // Copy Literals
170 LZ4_BLINDCOPY(anchor, op, length);
171
172 _next_match:
173 // Encode Offset
174 LZ4_WRITE_LITTLEENDIAN_16(op,(U16)(ip-ref));
175
176 // Start Counting
177 ip+=MINMATCH; ref+=MINMATCH; // MinMatch already verified
178 anchor = ip;
179 while likely(ip<matchlimit-(STEPSIZE-1))
180 {
181 UARCH diff = AARCH(ref) ^ AARCH(ip);
182 if (!diff) {
183 ip+=STEPSIZE;
184 ref+=STEPSIZE;
185 continue;
186 }
187 ip += LZ4_NbCommonBytes(diff);
188 goto _endCount;
189 }
190 if (LZ4_ARCH64) if ((ip<(matchlimit-3)) && (A32(ref) == A32(ip))) {
191 ip+=4;
192 ref+=4;
193 }
194 if ((ip<(matchlimit-1)) && (A16(ref) == A16(ip))) {
195 ip+=2;
196 ref+=2;
197 }
198 if ((ip<matchlimit) && (*ref == *ip))
199 ip++;
200 _endCount:
201
202 // Encode MatchLength
203 length = (int)(ip - anchor);
204
205 if unlikely(op + (1 + LASTLITERALS) + (length>>8) > oend)
206 return 0; // Check output limit
207
208 if (length>=(int)ML_MASK)
209 {
210 *token += ML_MASK;
211 length -= ML_MASK;
212 for (; length > 509 ; length-=510) {
213 *op++ = 255;
214 *op++ = 255;
215 }
216 if (length >= 255) {
217 length-=255;
218 *op++ = 255;
219 }
220 *op++ = (BYTE)length;
221 }
222 else *token += (BYTE)length;
223
224 // Test end of chunk
225 if (ip > mflimit) {
226 anchor = ip;
227 break;
228 }
229
230 // Fill table
231 HashTable[LZ4_HASHVALUE(ip-2)] = (CURRENT_H_TYPE)(ip - 2 - base);
232
233 // Test next position
234 ref = base + HashTable[LZ4_HASHVALUE(ip)];
235 HashTable[LZ4_HASHVALUE(ip)] = (CURRENT_H_TYPE)(ip - base);
236 if ((ref >= ip - MAX_DISTANCE) && (A32(ref) == A32(ip))) {
237 token = op++;
238 *token=0;
239 goto _next_match;
240 }
241
242 // Prepare next loop
243 anchor = ip++;
244 forwardH = LZ4_HASHVALUE(ip);
245 }
246
247 _last_literals:
248 // Encode Last Literals
249 {
250 int lastRun = (int)(iend - anchor);
251
252 if (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)
253 return 0; // Check output limit
254
255 if (lastRun>=(int)RUN_MASK) {
256 *op++=(RUN_MASK<<ML_BITS);
257 lastRun-=RUN_MASK;
258 for(; lastRun >= 255 ; lastRun-=255)
259 *op++ = 255;
260 *op++ = (BYTE) lastRun;
261 }
262 else *op++ = (BYTE)(lastRun<<ML_BITS);
263 memcpy(op, anchor, iend - anchor);
264 op += iend-anchor;
265 }
266
267 // End
268 return (int) (((char*)op)-dest);
269 }
270
271 int
LZ4_compress64k_heap_limitedOutput(void * ctx,char * source,char * dest,int inputSize,int maxOutputSize)272 LZ4_compress64k_heap_limitedOutput(
273 void* ctx,
274 char* source,
275 char* dest,
276 int inputSize,
277 int maxOutputSize)
278 {
279 CURRENT_H_TYPE* HashTable = (CURRENT_H_TYPE*)ctx;
280
281 BYTE* ip = (BYTE*) source;
282 CURRENTBASE(base);
283 BYTE* anchor = ip;
284 BYTE* iend = ip + inputSize;
285 BYTE* mflimit = iend - MFLIMIT;
286 #define matchlimit (iend - LASTLITERALS)
287
288 BYTE* op = (BYTE*) dest;
289 BYTE* oend = op + maxOutputSize;
290
291 int length;
292 int skipStrength = SKIPSTRENGTH;
293 U32 forwardH;
294
295
296 // Init
297 if (inputSize<MINLENGTH) goto _last_literals;
298
299 memset((void*)HashTable, 0, HASHTABLESIZE);
300
301 // First Byte
302 HashTable[LZ4_HASHVALUE(ip)] = (CURRENT_H_TYPE)(ip - base);
303 ip++;
304 forwardH = LZ4_HASHVALUE(ip);
305
306 // Main Loop
307 for ( ; ; )
308 {
309 int findMatchAttempts = (1U << skipStrength) + 3;
310 BYTE* forwardIp = ip;
311 BYTE* ref;
312 BYTE* token;
313
314 // Find a match
315 do {
316 U32 h = forwardH;
317 int step = findMatchAttempts++ >> skipStrength;
318 ip = forwardIp;
319 forwardIp = ip + step;
320
321 if unlikely(forwardIp > mflimit) {
322 goto _last_literals;
323 }
324
325 forwardH = LZ4_HASHVALUE(forwardIp);
326 ref = base + HashTable[h];
327 HashTable[h] = (CURRENT_H_TYPE)(ip - base);
328
329 } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
330
331 // Catch up
332 while ((ip>anchor) && (ref>(BYTE*)source) && unlikely(ip[-1]==ref[-1])) {
333 ip--;
334 ref--;
335 }
336
337 // Encode Literal length
338 length = (int)(ip - anchor);
339 token = op++;
340
341 if unlikely(op + length + (2 + 1 + LASTLITERALS) + (length>>8) > oend)
342 return 0; // Check output limit
343
344 if (length>=(int)RUN_MASK)
345 {
346 int len = length-RUN_MASK;
347 *token=(RUN_MASK<<ML_BITS);
348 for(; len >= 255 ; len-=255)
349 *op++ = 255;
350 *op++ = (BYTE)len;
351 }
352 else *token = (BYTE)(length<<ML_BITS);
353
354 // Copy Literals
355 LZ4_BLINDCOPY(anchor, op, length);
356
357 _next_match:
358 // Encode Offset
359 LZ4_WRITE_LITTLEENDIAN_16(op,(U16)(ip-ref));
360
361 // Start Counting
362 ip+=MINMATCH; ref+=MINMATCH; // MinMatch already verified
363 anchor = ip;
364 while likely(ip<matchlimit-(STEPSIZE-1))
365 {
366 UARCH diff = AARCH(ref) ^ AARCH(ip);
367 if (!diff) {
368 ip+=STEPSIZE;
369 ref+=STEPSIZE;
370 continue;
371 }
372 ip += LZ4_NbCommonBytes(diff);
373 goto _endCount;
374 }
375 if (LZ4_ARCH64) if ((ip<(matchlimit-3)) && (A32(ref) == A32(ip))) {
376 ip+=4;
377 ref+=4;
378 }
379 if ((ip<(matchlimit-1)) && (A16(ref) == A16(ip))) {
380 ip+=2;
381 ref+=2;
382 }
383 if ((ip<matchlimit) && (*ref == *ip))
384 ip++;
385 _endCount:
386
387 // Encode MatchLength
388 length = (int)(ip - anchor);
389
390 if unlikely(op + (1 + LASTLITERALS) + (length>>8) > oend)
391 return 0; // Check output limit
392
393 if (length>=(int)ML_MASK)
394 {
395 *token += ML_MASK;
396 length -= ML_MASK;
397 for (; length > 509 ; length-=510) {
398 *op++ = 255;
399 *op++ = 255;
400 }
401 if (length >= 255) {
402 length-=255;
403 *op++ = 255;
404 }
405 *op++ = (BYTE)length;
406 }
407 else *token += (BYTE)length;
408
409 // Test end of chunk
410 if (ip > mflimit) {
411 anchor = ip;
412 break;
413 }
414
415 // Fill table
416 HashTable[LZ4_HASHVALUE(ip-2)] = (CURRENT_H_TYPE)(ip - 2 - base);
417
418 // Test next position
419 ref = base + HashTable[LZ4_HASHVALUE(ip)];
420 HashTable[LZ4_HASHVALUE(ip)] = (CURRENT_H_TYPE)(ip - base);
421 if ((ref >= ip - MAX_DISTANCE) && (A32(ref) == A32(ip))) {
422 token = op++;
423 *token=0;
424 goto _next_match;
425 }
426
427 // Prepare next loop
428 anchor = ip++;
429 forwardH = LZ4_HASHVALUE(ip);
430 }
431
432 _last_literals:
433 // Encode Last Literals
434 {
435 int lastRun = (int)(iend - anchor);
436
437 if (((char*)op - dest) + lastRun + 1 +
438 ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)
439 return 0; // Check output limit
440
441 if (lastRun>=(int)RUN_MASK) {
442 *op++=(RUN_MASK<<ML_BITS);
443 lastRun-=RUN_MASK;
444 for(; lastRun >= 255 ; lastRun-=255)
445 *op++ = 255;
446 *op++ = (BYTE) lastRun;
447 }
448 else *op++ = (BYTE)(lastRun<<ML_BITS);
449 memcpy(op, anchor, iend - anchor);
450 op += iend-anchor;
451 }
452
453 // End
454 return (int) (((char*)op)-dest);
455 }
456
457 //****************************
458 // Clean defines
459 //****************************
460
461 // Locally Generated
462 #undef HASHLOG
463 #undef HASHTABLE_NBCELLS
464 #undef LZ4_HASH
465 #undef LZ4_HASHVALUE
466 #undef CURRENT_H_TYPE
467 #undef CURRENTBASE
468