1 /* This file is part of the KDE project
2    Copyright (C) 2005-2006 Ariya Hidayat <ariya@kde.org>
3    Copyright (C) 2015 Friedrich W. H. Kossebau <kossebau@kde.org>
4 
5    This library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Library General Public
7    License as published by the Free Software Foundation; either
8    version 2 of the License, or (at your option) any later version.
9 
10    This library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Library General Public License for more details.
14 
15    You should have received a copy of the GNU Library General Public License
16    along with this library; see the file COPYING.LIB.  If not, write to
17    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18    Boston, MA 02110-1301, USA.
19 */
20 
21 #include "KoLZF.h"
22 
23 #include <QByteArray>
24 #include <QDebug>
25 
26 
27 namespace KoLZF {
28 
29 #define HASH_LOG  12
30 #define HASH_SIZE (1<< HASH_LOG)
31 #define HASH_MASK  (HASH_SIZE-1)
32 
33 #ifdef __GNUC__
34 #pragma GCC diagnostic ignored "-Wcast-align"
35 #endif
36 #define UPDATE_HASH(v,p) { v = *((quint16*)p); v ^= *((quint16*)(p+1))^(v>>(16-HASH_LOG)); }
37 
38 #define MAX_COPY       32
39 #define MAX_LEN       264  /* 256 + 8 */
40 #define MAX_DISTANCE 8192
41 
42 // Lossless compression using LZF algorithm, this is faster on modern CPU than
43 // the original implementation in http://liblzf.plan9.de/
compress(const void * input,int length,void * output,int maxout)44 int compress(const void* input, int length, void* output, int maxout)
45 {
46     if (input == 0 || length < 1 || output == 0 || maxout < 2) {
47         return 0;
48     }
49 
50     const quint8* ip = (const quint8*) input;
51     const quint8* ip_limit = ip + length - MAX_COPY - 4;
52     quint8* op = (quint8*) output;
53     const quint8* last_op = (quint8*) output + maxout - 1;
54 
55     const quint8* htab[HASH_SIZE];
56     const quint8** hslot;
57     quint32 hval;
58 
59     quint8* ref;
60     qint32 copy;
61     qint32 len;
62     qint32 distance;
63     quint8* anchor;
64 
65     /* initializes hash table */
66     for (hslot = htab; hslot < htab + HASH_SIZE; ++hslot) {
67         *hslot = ip;
68     }
69 
70     /* we start with literal copy */
71     copy = 0;
72     *op++ = MAX_COPY - 1;
73 
74     /* main loop */
75     while (ip < ip_limit) {
76         /* find potential match */
77         UPDATE_HASH(hval, ip);
78         hslot = htab + (hval & HASH_MASK);
79         ref = (quint8*) * hslot;
80 
81         /* update hash table */
82         *hslot = ip;
83 
84         /* find itself? then it's no match */
85         if (ip == ref)
86             goto literal;
87 
88         /* is this a match? check the first 2 bytes */
89         if (*((quint16*)ref) != *((quint16*)ip))
90             goto literal;
91 
92         /* now check the 3rd byte */
93         if (ref[2] != ip[2])
94             goto literal;
95 
96         /* calculate distance to the match */
97         distance = ip - ref;
98 
99         /* skip if too far away */
100         if (distance >= MAX_DISTANCE)
101             goto literal;
102 
103         /* here we have 3-byte matches */
104         anchor = (quint8*)ip;
105         len = 3;
106         ref += 3;
107         ip += 3;
108 
109         /* now we have to check how long the match is */
110         if (ip < ip_limit - MAX_LEN) {
111             while (len < MAX_LEN - 8) {
112                 /* unroll 8 times */
113                 if (*ref++ != *ip++) break;
114                 if (*ref++ != *ip++) break;
115                 if (*ref++ != *ip++) break;
116                 if (*ref++ != *ip++) break;
117                 if (*ref++ != *ip++) break;
118                 if (*ref++ != *ip++) break;
119                 if (*ref++ != *ip++) break;
120                 if (*ref++ != *ip++) break;
121                 len += 8;
122             }
123             --ip;
124         }
125         len = ip - anchor;
126 
127         /* just before the last non-matching byte */
128         ip = anchor + len;
129 
130         /* if we have copied something, adjust the copy count */
131         if (copy) {
132             /* copy is biased, '0' means 1 byte copy */
133             anchor = anchor - copy - 1;
134             *(op - copy - 1) = copy - 1;
135             copy = 0;
136         } else {
137             /* back, to overwrite the copy count */
138             --op;
139         }
140 
141         /* length is biased, '1' means a match of 3 bytes */
142         len -= 2;
143 
144         /* distance is also biased */
145         --distance;
146 
147         /* encode the match */
148         if (len < 7) {
149             if (op + 2 > last_op) {
150                 return 0;
151             }
152             *op++ = (len << 5) + (distance >> 8);
153         } else {
154             if (op + 3 > last_op) {
155                 return 0;
156             }
157             *op++ = (7 << 5) + (distance >> 8);
158             *op++ = len - 7;
159         }
160         *op++ = (distance & 255);
161 
162         /* assuming next will be literal copy */
163         *op++ = MAX_COPY - 1;
164 
165         /* update the hash at match boundary */
166         --ip;
167         UPDATE_HASH(hval, ip);
168         htab[hval & HASH_MASK] = ip;
169         ++ip;
170 
171         continue;
172 
173     literal:
174         if (op + 1 > last_op) {
175             return 0;
176         }
177         *op++ = *ip++;
178         ++copy;
179         if (copy >= MAX_COPY) {
180             // start next literal copy item
181             copy = 0;
182             *op++ = MAX_COPY - 1;
183         }
184     }
185 
186     /* left-over as literal copy */
187     ip_limit = (const quint8*)input + length;
188 
189     // TODO: smart calculation to see here if enough output is left
190 
191     while (ip < ip_limit) {
192         if (op == last_op) {
193             return 0;
194         }
195         *op++ = *ip++;
196         ++copy;
197         if (copy == MAX_COPY) {
198             // start next literal copy item
199             copy = 0;
200             if (ip < ip_limit) {
201                 if (op == last_op) {
202                     return 0;
203                 }
204                 *op++ = MAX_COPY - 1;
205             } else {
206                 // do not write possibly out of bounds
207                 // just pretend we moved one more, for the final treatment
208                 ++op;
209             }
210         }
211     }
212 
213     /* if we have copied something, adjust the copy length */
214     if (copy) {
215         *(op - copy - 1) = copy - 1;
216     } else {
217         --op;
218     }
219 
220     return op - (quint8*)output;
221 }
222 
decompress(const void * input,int length,void * output,int maxout)223 int decompress(const void* input, int length, void* output, int maxout)
224 {
225     if (input == 0 || length < 1) {
226         return 0;
227     }
228     if (output == 0 || maxout < 1) {
229         return 0;
230     }
231 
232     const quint8* ip = (const quint8*) input;
233     const quint8* ip_limit  = ip + length - 1;
234     quint8* op = (quint8*) output;
235     quint8* op_limit = op + maxout;
236     quint8* ref;
237 
238     while (ip < ip_limit) {
239         quint32 ctrl = (*ip) + 1;
240         quint32 ofs = ((*ip) & 31) << 8;
241         quint32 len = (*ip++) >> 5;
242 
243         if (ctrl < 33) {
244             /* literal copy */
245             if (op + ctrl > op_limit)
246                 return 0;
247 
248             /* crazy unrolling */
249             if (ctrl) {
250                 *op++ = *ip++;
251                 --ctrl;
252 
253                 if (ctrl) {
254                     *op++ = *ip++;
255                     --ctrl;
256 
257                     if (ctrl) {
258                         *op++ = *ip++;
259                         --ctrl;
260 
261                         for (;ctrl; --ctrl)
262                             *op++ = *ip++;
263                     }
264                 }
265             }
266         } else {
267             /* back reference */
268             --len;
269             ref = op - ofs;
270             --ref;
271 
272             if (len == 7 - 1)
273                 len += *ip++;
274 
275             ref -= *ip++;
276 
277             if (op + len + 3 > op_limit)
278                 return 0;
279 
280             if (ref < (quint8 *)output)
281                 return 0;
282 
283             *op++ = *ref++;
284             *op++ = *ref++;
285             *op++ = *ref++;
286             if (len)
287                 for (; len; --len)
288                     *op++ = *ref++;
289         }
290     }
291 
292     return op - (quint8*)output;
293 }
294 
295 
compress(const QByteArray & input)296 QByteArray compress(const QByteArray& input)
297 {
298     const void* const in_data = (const void*) input.constData();
299     unsigned int in_len = (unsigned int)input.size();
300 
301     QByteArray output;
302     output.resize(in_len + 4 + 1);
303 
304     // we use 4 bytes to store uncompressed length
305     // and 1 extra byte as flag (0=uncompressed, 1=compressed)
306     output[0] = in_len & 255;
307     output[1] = (in_len >> 8) & 255;
308     output[2] = (in_len >> 16) & 255;
309     output[3] = (in_len >> 24) & 255;
310     output[4] = 1;
311 
312     unsigned int out_len = in_len - 1;
313     unsigned char* out_data = (unsigned char*) output.data() + 5;
314 
315     unsigned int len = compress(in_data, in_len, out_data, out_len);
316 
317     if ((len > out_len) || (len == 0)) {
318         // output buffer is too small, likely because the data can't
319         // be compressed. so here just copy without compression
320         output.replace(5, output.size()-5, input);
321 
322         // flag must indicate "uncompressed block"
323         output[4] = 0;
324     } else {
325         output.resize(len + 4 + 1);
326     }
327 
328     // minimize memory
329     output.squeeze();
330 
331     return output;
332 }
333 
334 // will not squeeze output
decompress(const QByteArray & input,QByteArray & output)335 void decompress(const QByteArray& input, QByteArray& output)
336 {
337     Q_ASSERT(input.size() >= 5);
338 
339     // read out first how big is the uncompressed size
340     unsigned int unpack_size = 0;
341     unpack_size |= ((quint8)input[0]);
342     unpack_size |= ((quint8)input[1]) << 8;
343     unpack_size |= ((quint8)input[2]) << 16;
344     unpack_size |= ((quint8)input[3]) << 24;
345 
346     // prepare the output
347     output.resize(unpack_size);
348 
349     // compression flag
350     quint8 flag = (quint8)input[4];
351 
352     // prepare for lzf
353     const void* const in_data = (const void*)(input.constData() + 5);
354     unsigned int in_len = (unsigned int)input.size() - 5;
355     unsigned char* out_data = (unsigned char*) output.data();
356     unsigned int out_len = (unsigned int)unpack_size;
357 
358     if (flag == 0) {
359         memcpy(output.data(), in_data, in_len);
360     } else {
361         unsigned int len = decompress(in_data, in_len, out_data, out_len);
362         Q_ASSERT(len == out_len);
363         Q_UNUSED(len)
364     }
365 }
366 
367 }
368