1 /*
2 File lz_nonslide.c, part of lzxcomp library
3 Copyright (C) 2002 Matthew T. Russotto
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as published by
7 the Free Software Foundation; version 2.1 only
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19 /*
20 * Document here
21 */
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <assert.h>
25 #if defined(_WIN32)
26 #define WIN32_LEAN_AND_MEAN
27 #include <string.h>
28 #include <windows.h>
29 #else
30 #include <string.h>
31 #include <strings.h>
32 #ifdef DEBUG_PERF
33 #include <sys/time.h>
34 #include <sys/resource.h>
35 #endif
36 #endif
37 #include "lz_nonslide.h"
38
39 #define MAX_MATCH 253
40 #define MIN_MATCH 2
41
lz_init(lz_info * lzi,int wsize,int max_dist,int max_match,int min_match,int frame_size,get_chars_t get_chars,output_match_t output_match,output_literal_t output_literal,void * user_data)42 void lz_init(lz_info *lzi, int wsize, int max_dist,
43 int max_match, int min_match,
44 int frame_size,
45 get_chars_t get_chars,
46 output_match_t output_match,
47 output_literal_t output_literal, void *user_data)
48 {
49 /* the reason for the separate max_dist value is LZX can't reach the
50 first three characters in its nominal window. But using a smaller
51 window results in inefficiency when dealing with reset intervals
52 which are the length of the nominal window */
53
54 lzi->wsize = wsize;
55 if (max_match > wsize)
56 lzi->max_match = wsize;
57 else
58 lzi->max_match = max_match;
59
60 lzi->min_match = min_match;
61 if (lzi->min_match < 3) lzi->min_match = 3;
62
63 lzi->max_dist = max_dist;
64 lzi->block_buf_size = wsize + lzi->max_dist;
65 lzi->block_buf = malloc(lzi->block_buf_size);
66 lzi->block_bufe = lzi->block_buf + lzi->block_buf_size;
67 assert(lzi->block_buf != NULL);
68
69 lzi->cur_loc = 0;
70 lzi->block_loc = 0;
71 lzi->chars_in_buf = 0;
72 lzi->eofcount = 0;
73 lzi->get_chars = get_chars;
74 lzi->output_match = output_match;
75 lzi->output_literal = output_literal;
76 lzi->user_data = user_data;
77 lzi->frame_size = frame_size;
78 lzi->lentab = calloc(sizeof(int), lzi->block_buf_size);
79 lzi->prevtab = calloc(sizeof(u_char *), lzi->block_buf_size);
80 lzi->analysis_valid = 0;
81 }
82
lz_release(lz_info * lzi)83 void lz_release(lz_info *lzi)
84 {
85 free(lzi->block_buf);
86 free(lzi->lentab);
87 free(lzi->prevtab);
88 }
89
lz_reset(lz_info * lzi)90 void lz_reset(lz_info *lzi)
91 {
92 int residual = lzi->chars_in_buf - lzi->block_loc;
93 memmove(lzi->block_buf, lzi->block_buf + lzi->block_loc, residual);
94 lzi->chars_in_buf = residual;
95 lzi->block_loc = 0;
96 lzi->analysis_valid = 0;
97 }
98
99 #ifdef LZNONSLIDE_MAIN
100 typedef struct lz_user_data
101 {
102 FILE *infile;
103 FILE *outfile;
104 int R0, R1, R2;
105 } lz_user_data;
106
tmp_get_chars(lz_info * lzi,int n,u_char * buf)107 int tmp_get_chars(lz_info *lzi, int n, u_char *buf)
108 {
109 lz_user_data *lzud = (lz_user_data *)lzi->user_data;
110 return fread(buf, 1, n, lzud->infile);
111 }
112
tmp_output_match(lz_info * lzi,int match_pos,int match_len)113 int tmp_output_match(lz_info *lzi, int match_pos, int match_len)
114 {
115 lz_user_data *lzud = (lz_user_data *)lzi->user_data;
116 int mod_match_loc;
117
118 mod_match_loc = match_pos;
119
120 fprintf(lzud->outfile, "(%d, %d)(%d)\n", match_pos, match_len, mod_match_loc);
121 return 0;
122 }
123
tmp_output_literal(lz_info * lzi,u_char ch)124 void tmp_output_literal(lz_info *lzi, u_char ch)
125 {
126 lz_user_data *lzud = (lz_user_data *)lzi->user_data;
127 fprintf(lzud->outfile, "'%c'", ch);
128 }
129
main(int argc,char * argv[])130 int main(int argc, char *argv[])
131 {
132 int wsize = atoi(argv[1]);
133 lz_info lzi;
134 lz_user_data lzu = {stdin, stdout, 1, 1, 1};
135
136 lz_init(&lzi, wsize, wsize, MAX_MATCH, MIN_MATCH, 8192, tmp_get_chars, tmp_output_match, tmp_output_literal,&lzu);
137 lz_compress(&lzi);
138 return 0;
139 }
140 #endif
141
lz_left_to_process(lz_info * lzi)142 int lz_left_to_process(lz_info *lzi)
143 {
144 return lzi->chars_in_buf - lzi->block_loc;
145 }
146
147 static void
fill_blockbuf(lz_info * lzi,int maxchars)148 fill_blockbuf(lz_info *lzi, int maxchars)
149 {
150 int toread;
151 u_char *readhere;
152 int nread;
153
154 if (lzi->eofcount) return;
155 maxchars -= lz_left_to_process(lzi);
156 toread = lzi->block_buf_size - lzi->chars_in_buf;
157 if (toread > maxchars) toread = maxchars;
158 readhere = lzi->block_buf + lzi->chars_in_buf;
159 nread = lzi->get_chars(lzi, toread, readhere);
160 lzi->chars_in_buf += nread;
161 if (nread != toread)
162 lzi->eofcount++;
163 }
164
lz_analyze_block(lz_info * lzi)165 static void lz_analyze_block(lz_info *lzi)
166 {
167 int *lentab, *lenp;
168 u_char **prevtab, **prevp;
169 u_char *bbp, *bbe;
170 u_char *chartab[256];
171 u_char *cursor;
172 int prevlen;
173 int ch;
174 int maxlen;
175 long wasinc;
176 int max_dist = lzi->max_dist;
177 #ifdef DEBUG_ANALYZE_BLOCK
178 static short n = 0;
179 #endif
180 #ifdef DEBUG_PERF
181 struct rusage innerloop;
182 struct timeval innertime, tmptime;
183 struct rusage outerloop;
184 struct timeval outertime;
185 struct rusage initialloop;
186 struct timeval initialtime;
187 struct rusage totalloop;
188 struct timeval totaltime;
189 #endif
190
191 #ifdef DEBUG_ANALYZE_BLOCK
192 fprintf(stderr, "Analyzing block %d, cur_loc = %06x\n", n, lzi->cur_loc);
193 #endif
194 memset(chartab, 0, sizeof(chartab));
195 prevtab = prevp = lzi->prevtab;
196 lentab = lenp = lzi->lentab;
197 memset(prevtab, 0, sizeof(*prevtab) * lzi->chars_in_buf);
198 memset(lentab, 0, sizeof(*prevtab) * lzi->chars_in_buf);
199 #ifdef DEBUG_PERF
200 memset(&innertime, 0, sizeof(innertime));
201 memset(&outertime, 0, sizeof(outertime));
202 getrusage(RUSAGE_SELF, &initialloop);
203 totalloop = initialloop;
204 #endif
205 bbp = lzi->block_buf;
206 bbe = bbp + lzi->chars_in_buf;
207 while (bbp < bbe) {
208 if (chartab[ch = *bbp]) {
209 *prevp = chartab[ch];
210 *lenp = 1;
211 }
212 chartab[ch] = bbp;
213 bbp++;
214 prevp++;
215 lenp++;
216 }
217 #ifdef DEBUG_PERF
218 initialtime = initialloop.ru_utime;
219 getrusage(RUSAGE_SELF, &initialloop);
220 timersub(&initialloop.ru_utime, &initialtime, &initialtime);
221 #endif
222 wasinc = 1;
223 for (maxlen = 1; wasinc && (maxlen < lzi->max_match); maxlen++) {
224 #ifdef DEBUG_PERF
225 getrusage(RUSAGE_SELF, &outerloop);
226 #endif
227 bbp = bbe - maxlen - 1;
228 lenp = lentab + lzi->chars_in_buf - maxlen - 1;
229 prevp = prevtab + lzi->chars_in_buf - maxlen - 1;
230 wasinc = 0;
231 while (bbp > lzi->block_buf) {
232 if (*lenp == maxlen) {
233 #ifdef DEBUG_PERF
234 getrusage(RUSAGE_SELF, &innerloop);
235 #endif
236 ch = bbp[maxlen];
237 cursor = *prevp;
238 while(cursor && ((bbp - cursor) <= max_dist)) {
239 prevlen = *(cursor - lzi->block_buf + lentab);
240 if (cursor[maxlen] == ch) {
241 *prevp = cursor;
242 (*lenp)++;
243 wasinc++;
244 break;
245 }
246 if (prevlen != maxlen) break;
247 cursor = *(cursor - lzi->block_buf + prevtab);
248 }
249 #ifdef DEBUG_PERF
250 tmptime = innerloop.ru_utime;
251 getrusage(RUSAGE_SELF, &innerloop);
252 timersub(&innerloop.ru_utime, &tmptime, &tmptime);
253 timeradd(&tmptime, &innertime, &innertime);
254 #endif
255 }
256 bbp--;
257 prevp--;
258 lenp--;
259 }
260 #ifdef DEBUG_PERF
261 tmptime = outerloop.ru_utime;
262 getrusage(RUSAGE_SELF, &outerloop);
263 timersub(&outerloop.ru_utime, &tmptime, &tmptime);
264 timeradd(&tmptime, &outertime, &outertime);
265 #endif
266 // fprintf(stderr, "maxlen = %d, wasinc = %ld\n", maxlen, wasinc);
267 }
268 #ifdef DEBUG_PERF
269 totaltime = totalloop.ru_utime;
270 getrusage(RUSAGE_SELF, &totalloop);
271 timersub(&totalloop.ru_utime, &totaltime, &totaltime);
272 fprintf(stderr, "Time spend in initial loop = %f\n", initialtime.tv_sec + initialtime.tv_usec/(double)1E6);
273 fprintf(stderr, "Time spend in outer loop = %f\n", outertime.tv_sec + outertime.tv_usec/(double)1E6);
274 fprintf(stderr, "Time spend in inner loop = %f\n", innertime.tv_sec + innertime.tv_usec/(double)1E6);
275 fprintf(stderr, "Time spend in all loops = %f\n", totaltime.tv_sec + totaltime.tv_usec/(double)1E6);
276 #endif
277 lzi->analysis_valid = 1;
278 #ifdef DEBUG_ANALYZE_BLOCK
279 fprintf(stderr, "Done analyzing block %d, cur_loc = %06x\n", n++, lzi->cur_loc);
280 #endif
281 }
282
lz_stop_compressing(lz_info * lzi)283 void lz_stop_compressing(lz_info *lzi)
284 {
285 lzi->stop = 1;
286 /* fprintf(stderr, "Stopping...\n");*/
287 }
288
lz_compress(lz_info * lzi,int nchars)289 int lz_compress(lz_info *lzi, int nchars)
290 {
291
292 u_char *bbp, *bbe;
293 int *lentab, *lenp;
294 u_char **prevtab, **prevp;
295 int len;
296 int holdback;
297 short trimmed;
298
299 lzi->stop = 0;
300 while ((lz_left_to_process(lzi) || !lzi->eofcount) && !lzi->stop && nchars > 0) {
301 #if 1
302 if (!lzi->analysis_valid ||
303 (!lzi->eofcount &&
304 ((lzi->chars_in_buf- lzi->block_loc) < nchars))) {
305 int residual = lzi->chars_in_buf - lzi->block_loc;
306 int bytes_to_move = lzi->max_dist + residual;
307 if (bytes_to_move > lzi->chars_in_buf)
308 bytes_to_move = lzi->chars_in_buf;
309 #ifdef DEBUG_ANALYZE_BLOCK
310 fprintf(stderr, "Moving %06x, chars_in_buf %06x, residual = %06x, nchars= %06x block_loc = %06x\n", bytes_to_move, lzi->chars_in_buf, residual, nchars, lzi->block_loc);
311 #endif
312 memmove(lzi->block_buf, lzi->block_buf + lzi->chars_in_buf - bytes_to_move,
313 bytes_to_move);
314
315 lzi->block_loc = bytes_to_move - residual;
316 lzi->chars_in_buf = bytes_to_move;
317 #ifdef DEBUG_ANALYZE_BLOCK
318 fprintf(stderr, "New chars_in_buf %06x, new block_loc = %06x, eof = %1d\n", lzi->chars_in_buf, lzi->block_loc, lzi->eofcount);
319 #endif
320 fill_blockbuf(lzi, nchars);
321 #ifdef DEBUG_ANALYZE_BLOCK
322 fprintf(stderr, "Really new chars_in_buf %06x, new block_loc = %06x, eof = %1d\n", lzi->chars_in_buf, lzi->block_loc, lzi->eofcount);
323 #endif
324 lz_analyze_block(lzi);
325 }
326 #else
327 if (!lzi->analysis_valid ||
328 (lzi->block_loc - lzi->chars_in_buf) == 0) {
329 lzi->block_loc = 0;
330 lzi->chars_in_buf = 0;
331 fill_blockbuf(lzi, nchars);
332 lz_analyze_block(lzi);
333 }
334 #endif
335 prevtab = prevp = lzi->prevtab + lzi->block_loc;
336 lentab = lenp = lzi->lentab + lzi->block_loc;
337 bbp = lzi->block_buf + lzi->block_loc;
338 holdback = lzi->max_match;
339 if (lzi->eofcount) holdback = 0;
340 if (lzi->chars_in_buf < (nchars + lzi->block_loc))
341 bbe = lzi->block_buf + lzi->chars_in_buf - holdback;
342 else
343 bbe = bbp + nchars;
344 while ((bbp < bbe) && (!lzi->stop)) {
345 trimmed = 0;
346 len = *lenp;
347 if (lzi->frame_size && (len > (lzi->frame_size - lzi->cur_loc % lzi->frame_size))) {
348 #ifdef DEBUG_TRIMMING
349 fprintf(stderr, "Trim for framing: %06x %d %d\n", lzi->cur_loc,len, (lzi->frame_size - lzi->cur_loc % lzi->frame_size));
350 #endif
351 trimmed = 1;
352 len = (lzi->frame_size - lzi->cur_loc % lzi->frame_size);
353 }
354 if (len > nchars) {
355 #ifdef DEBUG_TRIMMING
356 fprintf(stderr, "Trim for blocking: %06x %d %d\n", lzi->cur_loc,len, nchars);
357 #endif
358 trimmed = 1;
359 len = nchars;
360 }
361 if (len >= lzi->min_match) {
362 #ifdef LAZY
363 if ((bbp < bbe -1) && !trimmed &&
364 ((lenp[1] > (len + 1)) /* || ((lenp[1] == len) && (prevp[1] > prevp[0])) */)) {
365 len = 1;
366 /* this is the lazy eval case */
367 }
368 else
369 #endif
370 if (lzi->output_match(lzi, (*prevp - lzi->block_buf) - lzi->block_loc,
371 len) < 0) {
372 // fprintf(stderr, "Match rejected: %06x %d\n", lzi->cur_loc, len);
373 len = 1; /* match rejected */
374 }
375 }
376 else
377 len = 1;
378
379 if (len < lzi->min_match) {
380 assert(len == 1);
381 lzi->output_literal(lzi, *bbp);
382 }
383 // fprintf(stderr, "len = %3d, *lenp = %3d, cur_loc = %06x, block_loc = %06x\n", len, *lenp, lzi->cur_loc, lzi->block_loc);
384 bbp += len;
385 prevp += len;
386 lenp += len;
387 lzi->cur_loc += len;
388 lzi->block_loc += len;
389 assert(nchars >= len);
390 nchars -= len;
391
392 }
393 }
394 return 0;
395 }
396