1 /*
2     File lz_nonslide.c, part of lzxcomp library
3     Copyright (C) 2002 Matthew T. Russotto
4 
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU Lesser General Public License as published by
7     the Free Software Foundation; version 2.1 only
8 
9     This program is distributed in the hope that it will be useful,
10     but WITHOUT ANY WARRANTY; without even the implied warranty of
11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12     GNU Lesser General Public License for more details.
13 
14     You should have received a copy of the GNU Lesser General Public License
15     along with this program; if not, write to the Free Software
16     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17 */
18 
19 /*
20  * Document here
21  */
22 #include <sys/types.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <assert.h>
26 #include <string.h>
27 #ifdef DEBUG_PERF
28 #include <sys/time.h>
29 #include <sys/resource.h>
30 #endif
31 #include "lz_nonslide.h"
32 
33 #define MAX_MATCH 253
34 #define MIN_MATCH 2
35 
lz_init(lz_info * lzi,int wsize,int max_dist,int max_match,int min_match,int frame_size,get_chars_t get_chars,output_match_t output_match,output_literal_t output_literal,void * user_data)36 void lz_init(lz_info *lzi, int wsize, int max_dist,
37 	     int max_match, int min_match,
38 	     int frame_size,
39 	     get_chars_t get_chars,
40 	     output_match_t output_match,
41 	     output_literal_t output_literal, void *user_data)
42 {
43   /* the reason for the separate max_dist value is LZX can't reach the
44      first three characters in its nominal window.  But using a smaller
45      window results in inefficiency when dealing with reset intervals
46      which are the length of the nominal window */
47 
48   lzi->wsize = wsize;
49   if (max_match > wsize)
50     lzi->max_match = wsize;
51   else
52     lzi->max_match = max_match;
53 
54   lzi->min_match = min_match;
55   if (lzi->min_match < 3) lzi->min_match = 3;
56 
57   lzi->max_dist = max_dist;
58   lzi->block_buf_size = wsize + lzi->max_dist;
59   lzi->block_buf = malloc(lzi->block_buf_size);
60   lzi->block_bufe = lzi->block_buf + lzi->block_buf_size;
61   assert(lzi->block_buf != NULL);
62 
63   lzi->cur_loc = 0;
64   lzi->block_loc = 0;
65   lzi->chars_in_buf = 0;
66   lzi->eofcount = 0;
67   lzi->get_chars = get_chars;
68   lzi->output_match = output_match;
69   lzi->output_literal = output_literal;
70   lzi->user_data = user_data;
71   lzi->frame_size = frame_size;
72   lzi->lentab = calloc(sizeof(int), lzi->block_buf_size);
73   lzi->prevtab = calloc(sizeof(u_char *), lzi->block_buf_size);
74   lzi->analysis_valid = 0;
75 }
76 
lz_release(lz_info * lzi)77 void lz_release(lz_info *lzi)
78 {
79   free(lzi->block_buf);
80   free(lzi->lentab);
81   free(lzi->prevtab);
82 }
83 
lz_reset(lz_info * lzi)84 void lz_reset(lz_info *lzi)
85 {
86   int residual = lzi->chars_in_buf - lzi->block_loc;
87   memmove(lzi->block_buf, lzi->block_buf + lzi->block_loc, residual);
88   lzi->chars_in_buf = residual;
89   lzi->block_loc = 0;
90   lzi->analysis_valid = 0;
91 }
92 
93 #ifdef LZNONSLIDE_MAIN
94 typedef struct lz_user_data
95 {
96   FILE *infile;
97   FILE *outfile;
98   int R0, R1, R2;
99 } lz_user_data;
100 
tmp_get_chars(lz_info * lzi,int n,u_char * buf)101 int tmp_get_chars(lz_info *lzi, int n, u_char *buf)
102 {
103   lz_user_data *lzud = (lz_user_data *)lzi->user_data;
104   return fread(buf, 1, n, lzud->infile);
105 }
106 
tmp_output_match(lz_info * lzi,int match_pos,int match_len)107 int tmp_output_match(lz_info *lzi, int match_pos, int match_len)
108 {
109   lz_user_data *lzud = (lz_user_data *)lzi->user_data;
110   int mod_match_loc;
111 
112   mod_match_loc = match_pos;
113 
114   fprintf(lzud->outfile, "(%d, %d)(%d)\n", match_pos, match_len, mod_match_loc);
115   return 0;
116 }
117 
tmp_output_literal(lz_info * lzi,u_char ch)118 void tmp_output_literal(lz_info *lzi, u_char ch)
119 {
120   lz_user_data *lzud = (lz_user_data *)lzi->user_data;
121   fprintf(lzud->outfile, "'%c'", ch);
122 }
123 
main(int argc,char * argv[])124 int main(int argc, char *argv[])
125 {
126   int wsize = atoi(argv[1]);
127   lz_info lzi;
128   lz_user_data lzu = {stdin, stdout, 1, 1, 1};
129 
130   lz_init(&lzi, wsize, wsize, MAX_MATCH, MIN_MATCH, 8192, tmp_get_chars, tmp_output_match, tmp_output_literal,&lzu);
131   lz_compress(&lzi);
132   return 0;
133 }
134 #endif
135 
lz_left_to_process(lz_info * lzi)136 __inline__ int lz_left_to_process(lz_info *lzi)
137 {
138   return lzi->chars_in_buf - lzi->block_loc;
139 }
140 
141 static void
fill_blockbuf(lz_info * lzi,int maxchars)142 fill_blockbuf(lz_info *lzi, int maxchars)
143 {
144   int toread;
145   u_char *readhere;
146   int nread;
147 
148   if (lzi->eofcount) return;
149   maxchars -= lz_left_to_process(lzi);
150   toread = lzi->block_buf_size - lzi->chars_in_buf;
151   if (toread > maxchars) toread = maxchars;
152   readhere = lzi->block_buf + lzi->chars_in_buf;
153   nread = lzi->get_chars(lzi, toread, readhere);
154   lzi->chars_in_buf += nread;
155   if (nread != toread)
156     lzi->eofcount++;
157 }
158 
lz_analyze_block(lz_info * lzi)159 static void lz_analyze_block(lz_info *lzi)
160 {
161   int *lentab, *lenp;
162   u_char **prevtab, **prevp;
163   u_char *bbp, *bbe;
164   u_char *chartab[256];
165   u_char *cursor;
166   int prevlen;
167   int ch;
168   int maxlen;
169   long wasinc;
170   int max_dist = lzi->max_dist;
171 #ifdef DEBUG_ANALYZE_BLOCK
172   static short n = 0;
173 #endif
174 #ifdef DEBUG_PERF
175   struct rusage innerloop;
176   struct timeval innertime, tmptime;
177   struct rusage outerloop;
178   struct timeval outertime;
179   struct rusage initialloop;
180   struct timeval initialtime;
181   struct rusage totalloop;
182   struct timeval totaltime;
183 #endif
184 
185 #ifdef DEBUG_ANALYZE_BLOCK
186   fprintf(stderr, "Analyzing block %d, cur_loc = %06x\n", n, lzi->cur_loc);
187 #endif
188   memset(chartab, 0, sizeof(chartab));
189   prevtab = prevp = lzi->prevtab;
190   lentab = lenp = lzi->lentab;
191   memset(prevtab, 0, sizeof(*prevtab) * lzi->chars_in_buf);
192   memset(lentab, 0, sizeof(*prevtab) * lzi->chars_in_buf);
193 #ifdef DEBUG_PERF
194   memset(&innertime, 0, sizeof(innertime));
195   memset(&outertime, 0, sizeof(outertime));
196   getrusage(RUSAGE_SELF, &initialloop);
197   totalloop = initialloop;
198 #endif
199   bbp = lzi->block_buf;
200   bbe = bbp + lzi->chars_in_buf;
201   while (bbp < bbe) {
202     if (chartab[ch = *bbp]) {
203       *prevp = chartab[ch];
204       *lenp = 1;
205     }
206     chartab[ch] = bbp;
207     bbp++;
208     prevp++;
209     lenp++;
210   }
211 #ifdef DEBUG_PERF
212   initialtime = initialloop.ru_utime;
213   getrusage(RUSAGE_SELF, &initialloop);
214   timersub(&initialloop.ru_utime, &initialtime, &initialtime);
215 #endif
216   wasinc = 1;
217   for (maxlen = 1; wasinc && (maxlen < lzi->max_match); maxlen++) {
218 #ifdef DEBUG_PERF
219     getrusage(RUSAGE_SELF, &outerloop);
220 #endif
221     bbp = bbe - maxlen - 1;
222     lenp = lentab + lzi->chars_in_buf - maxlen - 1;
223     prevp = prevtab + lzi->chars_in_buf - maxlen - 1;
224     wasinc = 0;
225     while (bbp > lzi->block_buf) {
226       if (*lenp == maxlen) {
227 #ifdef DEBUG_PERF
228 	getrusage(RUSAGE_SELF, &innerloop);
229 #endif
230 	ch = bbp[maxlen];
231 	cursor = *prevp;
232 	while(cursor && ((bbp - cursor) <= max_dist)) {
233 	  prevlen = *(cursor - lzi->block_buf + lentab);
234 	  if (cursor[maxlen] == ch) {
235 	    *prevp = cursor;
236 	    (*lenp)++;
237 	    wasinc++;
238 	    break;
239 	  }
240 	  if (prevlen != maxlen) break;
241 	  cursor = *(cursor - lzi->block_buf + prevtab);
242 	}
243 #ifdef DEBUG_PERF
244 	tmptime = innerloop.ru_utime;
245 	getrusage(RUSAGE_SELF, &innerloop);
246 	timersub(&innerloop.ru_utime, &tmptime, &tmptime);
247 	timeradd(&tmptime, &innertime, &innertime);
248 #endif
249       }
250       bbp--;
251       prevp--;
252       lenp--;
253     }
254 #ifdef DEBUG_PERF
255     tmptime = outerloop.ru_utime;
256     getrusage(RUSAGE_SELF, &outerloop);
257     timersub(&outerloop.ru_utime, &tmptime, &tmptime);
258     timeradd(&tmptime, &outertime, &outertime);
259 #endif
260     //    fprintf(stderr, "maxlen = %d, wasinc = %ld\n", maxlen, wasinc);
261   }
262 #ifdef DEBUG_PERF
263   totaltime = totalloop.ru_utime;
264   getrusage(RUSAGE_SELF, &totalloop);
265   timersub(&totalloop.ru_utime, &totaltime, &totaltime);
266   fprintf(stderr, "Time spend in initial loop = %f\n", initialtime.tv_sec + initialtime.tv_usec/(double)1E6);
267   fprintf(stderr, "Time spend in outer loop = %f\n", outertime.tv_sec + outertime.tv_usec/(double)1E6);
268   fprintf(stderr, "Time spend in inner loop = %f\n", innertime.tv_sec + innertime.tv_usec/(double)1E6);
269   fprintf(stderr, "Time spend in all loops = %f\n", totaltime.tv_sec + totaltime.tv_usec/(double)1E6);
270 #endif
271   lzi->analysis_valid = 1;
272 #ifdef DEBUG_ANALYZE_BLOCK
273   fprintf(stderr, "Done analyzing block %d, cur_loc = %06x\n", n++, lzi->cur_loc);
274 #endif
275 }
276 
lz_stop_compressing(lz_info * lzi)277 void lz_stop_compressing(lz_info *lzi)
278 {
279   lzi->stop = 1;
280   /*  fprintf(stderr, "Stopping...\n");*/
281 }
282 
lz_compress(lz_info * lzi,int nchars)283 int lz_compress(lz_info *lzi, int nchars)
284 {
285 
286   u_char *bbp, *bbe;
287   int *lentab, *lenp;
288   u_char **prevtab, **prevp;
289   int len;
290   int holdback;
291   short trimmed;
292 
293   lzi->stop = 0;
294   while ((lz_left_to_process(lzi) || !lzi->eofcount) && !lzi->stop && nchars > 0) {
295 #if 1
296     if (!lzi->analysis_valid ||
297 	(!lzi->eofcount &&
298 	 ((lzi->chars_in_buf- lzi->block_loc) < nchars))) {
299       int residual = lzi->chars_in_buf - lzi->block_loc;
300       int bytes_to_move = lzi->max_dist + residual;
301       if (bytes_to_move > lzi->chars_in_buf)
302 	bytes_to_move = lzi->chars_in_buf;
303 #ifdef DEBUG_ANALYZE_BLOCK
304       fprintf(stderr, "Moving %06x, chars_in_buf %06x, residual = %06x, nchars= %06x block_loc = %06x\n", bytes_to_move, lzi->chars_in_buf, residual, nchars, lzi->block_loc);
305 #endif
306       memmove(lzi->block_buf, lzi->block_buf + lzi->chars_in_buf - bytes_to_move,
307 	      bytes_to_move);
308 
309       lzi->block_loc = bytes_to_move - residual;
310       lzi->chars_in_buf = bytes_to_move;
311 #ifdef DEBUG_ANALYZE_BLOCK
312       fprintf(stderr, "New chars_in_buf %06x,  new block_loc = %06x, eof = %1d\n", lzi->chars_in_buf, lzi->block_loc, lzi->eofcount);
313 #endif
314       fill_blockbuf(lzi, nchars);
315 #ifdef DEBUG_ANALYZE_BLOCK
316       fprintf(stderr, "Really new chars_in_buf %06x,  new block_loc = %06x, eof = %1d\n", lzi->chars_in_buf, lzi->block_loc, lzi->eofcount);
317 #endif
318       lz_analyze_block(lzi);
319     }
320 #else
321     if (!lzi->analysis_valid ||
322 	(lzi->block_loc - lzi->chars_in_buf) == 0) {
323       lzi->block_loc = 0;
324       lzi->chars_in_buf = 0;
325       fill_blockbuf(lzi, nchars);
326       lz_analyze_block(lzi);
327     }
328 #endif
329     prevtab = prevp = lzi->prevtab + lzi->block_loc;
330     lentab = lenp = lzi->lentab + lzi->block_loc;
331     bbp = lzi->block_buf + lzi->block_loc;
332     holdback = lzi->max_match;
333     if (lzi->eofcount) holdback = 0;
334     if (lzi->chars_in_buf < (nchars + lzi->block_loc))
335       bbe = lzi->block_buf + lzi->chars_in_buf - holdback;
336     else
337       bbe = bbp + nchars;
338     while ((bbp < bbe) && (!lzi->stop)) {
339       trimmed = 0;
340       len = *lenp;
341       if (lzi->frame_size && (len > (lzi->frame_size - lzi->cur_loc % lzi->frame_size))) {
342 #ifdef DEBUG_TRIMMING
343 	fprintf(stderr, "Trim for framing: %06x %d %d\n", lzi->cur_loc,len, (lzi->frame_size - lzi->cur_loc % lzi->frame_size));
344 #endif
345 	trimmed = 1;
346 	len = (lzi->frame_size - lzi->cur_loc % lzi->frame_size);
347       }
348       if (len > nchars) {
349 #ifdef DEBUG_TRIMMING
350 	fprintf(stderr, "Trim for blocking: %06x %d %d\n", lzi->cur_loc,len, nchars);
351 #endif
352 	trimmed = 1;
353 	len = nchars;
354       }
355       if (len >= lzi->min_match) {
356 #ifdef LAZY
357 	if ((bbp < bbe -1) && !trimmed &&
358 	    ((lenp[1] > (len + 1)) /* || ((lenp[1] == len) && (prevp[1] > prevp[0])) */)) {
359 	  len = 1;
360 	  /* this is the lazy eval case */
361 	}
362 	else
363 #endif
364 	  if (lzi->output_match(lzi, (*prevp - lzi->block_buf) - lzi->block_loc,
365 				len) < 0) {
366 	    //	    fprintf(stderr, "Match rejected: %06x %d\n", lzi->cur_loc, len);
367 	    len = 1; /* match rejected */
368 	  }
369       }
370       else
371 	len = 1;
372 
373       if (len < lzi->min_match) {
374 	assert(len == 1);
375 	lzi->output_literal(lzi, *bbp);
376       }
377       //      fprintf(stderr, "len = %3d, *lenp = %3d, cur_loc = %06x, block_loc = %06x\n", len, *lenp, lzi->cur_loc, lzi->block_loc);
378       bbp += len;
379       prevp += len;
380       lenp += len;
381       lzi->cur_loc += len;
382       lzi->block_loc += len;
383       assert(nchars >= len);
384       nchars -= len;
385 
386     }
387   }
388   return 0;
389 }
390