1 /* This file is part of the KDE project
2 Copyright (C) 2005-2006 Ariya Hidayat <ariya@kde.org>
3 Copyright (C) 2015 Friedrich W. H. Kossebau <kossebau@kde.org>
4
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public
7 License as published by the Free Software Foundation; either
8 version 2 of the License, or (at your option) any later version.
9
10 This library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
14
15 You should have received a copy of the GNU Library General Public License
16 along with this library; see the file COPYING.LIB. If not, write to
17 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 Boston, MA 02110-1301, USA.
19 */
20
21 #include "KoLZF.h"
22
23 #include <QByteArray>
24 #include <QDebug>
25
26
27 namespace KoLZF {
28
29 #define HASH_LOG 12
30 #define HASH_SIZE (1<< HASH_LOG)
31 #define HASH_MASK (HASH_SIZE-1)
32
33 #ifdef __GNUC__
34 #pragma GCC diagnostic ignored "-Wcast-align"
35 #endif
36 #define UPDATE_HASH(v,p) { v = *((quint16*)p); v ^= *((quint16*)(p+1))^(v>>(16-HASH_LOG)); }
37
38 #define MAX_COPY 32
39 #define MAX_LEN 264 /* 256 + 8 */
40 #define MAX_DISTANCE 8192
41
42 // Lossless compression using LZF algorithm, this is faster on modern CPU than
43 // the original implementation in http://liblzf.plan9.de/
compress(const void * input,int length,void * output,int maxout)44 int compress(const void* input, int length, void* output, int maxout)
45 {
46 if (input == 0 || length < 1 || output == 0 || maxout < 2) {
47 return 0;
48 }
49
50 const quint8* ip = (const quint8*) input;
51 const quint8* ip_limit = ip + length - MAX_COPY - 4;
52 quint8* op = (quint8*) output;
53 const quint8* last_op = (quint8*) output + maxout - 1;
54
55 const quint8* htab[HASH_SIZE];
56 const quint8** hslot;
57 quint32 hval;
58
59 quint8* ref;
60 qint32 copy;
61 qint32 len;
62 qint32 distance;
63 quint8* anchor;
64
65 /* initializes hash table */
66 for (hslot = htab; hslot < htab + HASH_SIZE; ++hslot) {
67 *hslot = ip;
68 }
69
70 /* we start with literal copy */
71 copy = 0;
72 *op++ = MAX_COPY - 1;
73
74 /* main loop */
75 while (ip < ip_limit) {
76 /* find potential match */
77 UPDATE_HASH(hval, ip);
78 hslot = htab + (hval & HASH_MASK);
79 ref = (quint8*) * hslot;
80
81 /* update hash table */
82 *hslot = ip;
83
84 /* find itself? then it's no match */
85 if (ip == ref)
86 goto literal;
87
88 /* is this a match? check the first 2 bytes */
89 if (*((quint16*)ref) != *((quint16*)ip))
90 goto literal;
91
92 /* now check the 3rd byte */
93 if (ref[2] != ip[2])
94 goto literal;
95
96 /* calculate distance to the match */
97 distance = ip - ref;
98
99 /* skip if too far away */
100 if (distance >= MAX_DISTANCE)
101 goto literal;
102
103 /* here we have 3-byte matches */
104 anchor = (quint8*)ip;
105 len = 3;
106 ref += 3;
107 ip += 3;
108
109 /* now we have to check how long the match is */
110 if (ip < ip_limit - MAX_LEN) {
111 while (len < MAX_LEN - 8) {
112 /* unroll 8 times */
113 if (*ref++ != *ip++) break;
114 if (*ref++ != *ip++) break;
115 if (*ref++ != *ip++) break;
116 if (*ref++ != *ip++) break;
117 if (*ref++ != *ip++) break;
118 if (*ref++ != *ip++) break;
119 if (*ref++ != *ip++) break;
120 if (*ref++ != *ip++) break;
121 len += 8;
122 }
123 --ip;
124 }
125 len = ip - anchor;
126
127 /* just before the last non-matching byte */
128 ip = anchor + len;
129
130 /* if we have copied something, adjust the copy count */
131 if (copy) {
132 /* copy is biased, '0' means 1 byte copy */
133 anchor = anchor - copy - 1;
134 *(op - copy - 1) = copy - 1;
135 copy = 0;
136 } else {
137 /* back, to overwrite the copy count */
138 --op;
139 }
140
141 /* length is biased, '1' means a match of 3 bytes */
142 len -= 2;
143
144 /* distance is also biased */
145 --distance;
146
147 /* encode the match */
148 if (len < 7) {
149 if (op + 2 > last_op) {
150 return 0;
151 }
152 *op++ = (len << 5) + (distance >> 8);
153 } else {
154 if (op + 3 > last_op) {
155 return 0;
156 }
157 *op++ = (7 << 5) + (distance >> 8);
158 *op++ = len - 7;
159 }
160 *op++ = (distance & 255);
161
162 /* assuming next will be literal copy */
163 *op++ = MAX_COPY - 1;
164
165 /* update the hash at match boundary */
166 --ip;
167 UPDATE_HASH(hval, ip);
168 htab[hval & HASH_MASK] = ip;
169 ++ip;
170
171 continue;
172
173 literal:
174 if (op + 1 > last_op) {
175 return 0;
176 }
177 *op++ = *ip++;
178 ++copy;
179 if (copy >= MAX_COPY) {
180 // start next literal copy item
181 copy = 0;
182 *op++ = MAX_COPY - 1;
183 }
184 }
185
186 /* left-over as literal copy */
187 ip_limit = (const quint8*)input + length;
188
189 // TODO: smart calculation to see here if enough output is left
190
191 while (ip < ip_limit) {
192 if (op == last_op) {
193 return 0;
194 }
195 *op++ = *ip++;
196 ++copy;
197 if (copy == MAX_COPY) {
198 // start next literal copy item
199 copy = 0;
200 if (ip < ip_limit) {
201 if (op == last_op) {
202 return 0;
203 }
204 *op++ = MAX_COPY - 1;
205 } else {
206 // do not write possibly out of bounds
207 // just pretend we moved one more, for the final treatment
208 ++op;
209 }
210 }
211 }
212
213 /* if we have copied something, adjust the copy length */
214 if (copy) {
215 *(op - copy - 1) = copy - 1;
216 } else {
217 --op;
218 }
219
220 return op - (quint8*)output;
221 }
222
decompress(const void * input,int length,void * output,int maxout)223 int decompress(const void* input, int length, void* output, int maxout)
224 {
225 if (input == 0 || length < 1) {
226 return 0;
227 }
228 if (output == 0 || maxout < 1) {
229 return 0;
230 }
231
232 const quint8* ip = (const quint8*) input;
233 const quint8* ip_limit = ip + length - 1;
234 quint8* op = (quint8*) output;
235 quint8* op_limit = op + maxout;
236 quint8* ref;
237
238 while (ip < ip_limit) {
239 quint32 ctrl = (*ip) + 1;
240 quint32 ofs = ((*ip) & 31) << 8;
241 quint32 len = (*ip++) >> 5;
242
243 if (ctrl < 33) {
244 /* literal copy */
245 if (op + ctrl > op_limit)
246 return 0;
247
248 /* crazy unrolling */
249 if (ctrl) {
250 *op++ = *ip++;
251 --ctrl;
252
253 if (ctrl) {
254 *op++ = *ip++;
255 --ctrl;
256
257 if (ctrl) {
258 *op++ = *ip++;
259 --ctrl;
260
261 for (;ctrl; --ctrl)
262 *op++ = *ip++;
263 }
264 }
265 }
266 } else {
267 /* back reference */
268 --len;
269 ref = op - ofs;
270 --ref;
271
272 if (len == 7 - 1)
273 len += *ip++;
274
275 ref -= *ip++;
276
277 if (op + len + 3 > op_limit)
278 return 0;
279
280 if (ref < (quint8 *)output)
281 return 0;
282
283 *op++ = *ref++;
284 *op++ = *ref++;
285 *op++ = *ref++;
286 if (len)
287 for (; len; --len)
288 *op++ = *ref++;
289 }
290 }
291
292 return op - (quint8*)output;
293 }
294
295
compress(const QByteArray & input)296 QByteArray compress(const QByteArray& input)
297 {
298 const void* const in_data = (const void*) input.constData();
299 unsigned int in_len = (unsigned int)input.size();
300
301 QByteArray output;
302 output.resize(in_len + 4 + 1);
303
304 // we use 4 bytes to store uncompressed length
305 // and 1 extra byte as flag (0=uncompressed, 1=compressed)
306 output[0] = in_len & 255;
307 output[1] = (in_len >> 8) & 255;
308 output[2] = (in_len >> 16) & 255;
309 output[3] = (in_len >> 24) & 255;
310 output[4] = 1;
311
312 unsigned int out_len = in_len - 1;
313 unsigned char* out_data = (unsigned char*) output.data() + 5;
314
315 unsigned int len = compress(in_data, in_len, out_data, out_len);
316
317 if ((len > out_len) || (len == 0)) {
318 // output buffer is too small, likely because the data can't
319 // be compressed. so here just copy without compression
320 output.replace(5, output.size()-5, input);
321
322 // flag must indicate "uncompressed block"
323 output[4] = 0;
324 } else {
325 output.resize(len + 4 + 1);
326 }
327
328 // minimize memory
329 output.squeeze();
330
331 return output;
332 }
333
334 // will not squeeze output
decompress(const QByteArray & input,QByteArray & output)335 void decompress(const QByteArray& input, QByteArray& output)
336 {
337 Q_ASSERT(input.size() >= 5);
338
339 // read out first how big is the uncompressed size
340 unsigned int unpack_size = 0;
341 unpack_size |= ((quint8)input[0]);
342 unpack_size |= ((quint8)input[1]) << 8;
343 unpack_size |= ((quint8)input[2]) << 16;
344 unpack_size |= ((quint8)input[3]) << 24;
345
346 // prepare the output
347 output.resize(unpack_size);
348
349 // compression flag
350 quint8 flag = (quint8)input[4];
351
352 // prepare for lzf
353 const void* const in_data = (const void*)(input.constData() + 5);
354 unsigned int in_len = (unsigned int)input.size() - 5;
355 unsigned char* out_data = (unsigned char*) output.data();
356 unsigned int out_len = (unsigned int)unpack_size;
357
358 if (flag == 0) {
359 memcpy(output.data(), in_data, in_len);
360 } else {
361 unsigned int len = decompress(in_data, in_len, out_data, out_len);
362 Q_ASSERT(len == out_len);
363 Q_UNUSED(len)
364 }
365 }
366
367 }
368