1 /* 2 File lz_nonslide.c, part of lzxcomp library 3 Copyright (C) 2002 Matthew T. Russotto 4 5 This program is free software; you can redistribute it and/or modify 6 it under the terms of the GNU Lesser General Public License as published by 7 the Free Software Foundation; version 2.1 only 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU Lesser General Public License for more details. 13 14 You should have received a copy of the GNU Lesser General Public License 15 along with this program; if not, write to the Free Software 16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 */ 18 19 /* 20 * Document here 21 */ 22 #include <stdio.h> 23 #include <stdlib.h> 24 #include <assert.h> 25 #if defined(_WIN32) 26 #define WIN32_LEAN_AND_MEAN 27 #include <string.h> 28 #include <windows.h> 29 #else 30 #include <string.h> 31 #include <strings.h> 32 #ifdef DEBUG_PERF 33 #include <sys/time.h> 34 #include <sys/resource.h> 35 #endif 36 #endif 37 #include "lz_nonslide.h" 38 39 #define MAX_MATCH 253 40 #define MIN_MATCH 2 41 42 void lz_init(lz_info *lzi, int wsize, int max_dist, 43 int max_match, int min_match, 44 int frame_size, 45 get_chars_t get_chars, 46 output_match_t output_match, 47 output_literal_t output_literal, void *user_data) 48 { 49 /* the reason for the separate max_dist value is LZX can't reach the 50 first three characters in its nominal window. But using a smaller 51 window results in inefficiency when dealing with reset intervals 52 which are the length of the nominal window */ 53 54 lzi->wsize = wsize; 55 if (max_match > wsize) 56 lzi->max_match = wsize; 57 else 58 lzi->max_match = max_match; 59 60 lzi->min_match = min_match; 61 if (lzi->min_match < 3) lzi->min_match = 3; 62 63 lzi->max_dist = max_dist; 64 lzi->block_buf_size = wsize + lzi->max_dist; 65 lzi->block_buf = malloc(lzi->block_buf_size); 66 lzi->block_bufe = lzi->block_buf + lzi->block_buf_size; 67 assert(lzi->block_buf != NULL); 68 69 lzi->cur_loc = 0; 70 lzi->block_loc = 0; 71 lzi->chars_in_buf = 0; 72 lzi->eofcount = 0; 73 lzi->get_chars = get_chars; 74 lzi->output_match = output_match; 75 lzi->output_literal = output_literal; 76 lzi->user_data = user_data; 77 lzi->frame_size = frame_size; 78 lzi->lentab = calloc(sizeof(int), lzi->block_buf_size); 79 lzi->prevtab = calloc(sizeof(u_char *), lzi->block_buf_size); 80 lzi->analysis_valid = 0; 81 } 82 83 void lz_release(lz_info *lzi) 84 { 85 free(lzi->block_buf); 86 free(lzi->lentab); 87 free(lzi->prevtab); 88 } 89 90 void lz_reset(lz_info *lzi) 91 { 92 int residual = lzi->chars_in_buf - lzi->block_loc; 93 memmove(lzi->block_buf, lzi->block_buf + lzi->block_loc, residual); 94 lzi->chars_in_buf = residual; 95 lzi->block_loc = 0; 96 lzi->analysis_valid = 0; 97 } 98 99 #ifdef LZNONSLIDE_MAIN 100 typedef struct lz_user_data 101 { 102 FILE *infile; 103 FILE *outfile; 104 int R0, R1, R2; 105 } lz_user_data; 106 107 int tmp_get_chars(lz_info *lzi, int n, u_char *buf) 108 { 109 lz_user_data *lzud = (lz_user_data *)lzi->user_data; 110 return fread(buf, 1, n, lzud->infile); 111 } 112 113 int tmp_output_match(lz_info *lzi, int match_pos, int match_len) 114 { 115 lz_user_data *lzud = (lz_user_data *)lzi->user_data; 116 int mod_match_loc; 117 118 mod_match_loc = match_pos; 119 120 fprintf(lzud->outfile, "(%d, %d)(%d)\n", match_pos, match_len, mod_match_loc); 121 return 0; 122 } 123 124 void tmp_output_literal(lz_info *lzi, u_char ch) 125 { 126 lz_user_data *lzud = (lz_user_data *)lzi->user_data; 127 fprintf(lzud->outfile, "'%c'", ch); 128 } 129 130 int main(int argc, char *argv[]) 131 { 132 int wsize = atoi(argv[1]); 133 lz_info lzi; 134 lz_user_data lzu = {stdin, stdout, 1, 1, 1}; 135 136 lz_init(&lzi, wsize, wsize, MAX_MATCH, MIN_MATCH, 8192, tmp_get_chars, tmp_output_match, tmp_output_literal,&lzu); 137 lz_compress(&lzi); 138 return 0; 139 } 140 #endif 141 142 int lz_left_to_process(lz_info *lzi) 143 { 144 return lzi->chars_in_buf - lzi->block_loc; 145 } 146 147 static void 148 fill_blockbuf(lz_info *lzi, int maxchars) 149 { 150 int toread; 151 u_char *readhere; 152 int nread; 153 154 if (lzi->eofcount) return; 155 maxchars -= lz_left_to_process(lzi); 156 toread = lzi->block_buf_size - lzi->chars_in_buf; 157 if (toread > maxchars) toread = maxchars; 158 readhere = lzi->block_buf + lzi->chars_in_buf; 159 nread = lzi->get_chars(lzi, toread, readhere); 160 lzi->chars_in_buf += nread; 161 if (nread != toread) 162 lzi->eofcount++; 163 } 164 165 static void lz_analyze_block(lz_info *lzi) 166 { 167 int *lentab, *lenp; 168 u_char **prevtab, **prevp; 169 u_char *bbp, *bbe; 170 u_char *chartab[256]; 171 u_char *cursor; 172 int prevlen; 173 int ch; 174 int maxlen; 175 long wasinc; 176 int max_dist = lzi->max_dist; 177 #ifdef DEBUG_ANALYZE_BLOCK 178 static short n = 0; 179 #endif 180 #ifdef DEBUG_PERF 181 struct rusage innerloop; 182 struct timeval innertime, tmptime; 183 struct rusage outerloop; 184 struct timeval outertime; 185 struct rusage initialloop; 186 struct timeval initialtime; 187 struct rusage totalloop; 188 struct timeval totaltime; 189 #endif 190 191 #ifdef DEBUG_ANALYZE_BLOCK 192 fprintf(stderr, "Analyzing block %d, cur_loc = %06x\n", n, lzi->cur_loc); 193 #endif 194 memset(chartab, 0, sizeof(chartab)); 195 prevtab = prevp = lzi->prevtab; 196 lentab = lenp = lzi->lentab; 197 memset(prevtab, 0, sizeof(*prevtab) * lzi->chars_in_buf); 198 memset(lentab, 0, sizeof(*prevtab) * lzi->chars_in_buf); 199 #ifdef DEBUG_PERF 200 memset(&innertime, 0, sizeof(innertime)); 201 memset(&outertime, 0, sizeof(outertime)); 202 getrusage(RUSAGE_SELF, &initialloop); 203 totalloop = initialloop; 204 #endif 205 bbp = lzi->block_buf; 206 bbe = bbp + lzi->chars_in_buf; 207 while (bbp < bbe) { 208 if (chartab[ch = *bbp]) { 209 *prevp = chartab[ch]; 210 *lenp = 1; 211 } 212 chartab[ch] = bbp; 213 bbp++; 214 prevp++; 215 lenp++; 216 } 217 #ifdef DEBUG_PERF 218 initialtime = initialloop.ru_utime; 219 getrusage(RUSAGE_SELF, &initialloop); 220 timersub(&initialloop.ru_utime, &initialtime, &initialtime); 221 #endif 222 wasinc = 1; 223 for (maxlen = 1; wasinc && (maxlen < lzi->max_match); maxlen++) { 224 #ifdef DEBUG_PERF 225 getrusage(RUSAGE_SELF, &outerloop); 226 #endif 227 bbp = bbe - maxlen - 1; 228 lenp = lentab + lzi->chars_in_buf - maxlen - 1; 229 prevp = prevtab + lzi->chars_in_buf - maxlen - 1; 230 wasinc = 0; 231 while (bbp > lzi->block_buf) { 232 if (*lenp == maxlen) { 233 #ifdef DEBUG_PERF 234 getrusage(RUSAGE_SELF, &innerloop); 235 #endif 236 ch = bbp[maxlen]; 237 cursor = *prevp; 238 while(cursor && ((bbp - cursor) <= max_dist)) { 239 prevlen = *(cursor - lzi->block_buf + lentab); 240 if (cursor[maxlen] == ch) { 241 *prevp = cursor; 242 (*lenp)++; 243 wasinc++; 244 break; 245 } 246 if (prevlen != maxlen) break; 247 cursor = *(cursor - lzi->block_buf + prevtab); 248 } 249 #ifdef DEBUG_PERF 250 tmptime = innerloop.ru_utime; 251 getrusage(RUSAGE_SELF, &innerloop); 252 timersub(&innerloop.ru_utime, &tmptime, &tmptime); 253 timeradd(&tmptime, &innertime, &innertime); 254 #endif 255 } 256 bbp--; 257 prevp--; 258 lenp--; 259 } 260 #ifdef DEBUG_PERF 261 tmptime = outerloop.ru_utime; 262 getrusage(RUSAGE_SELF, &outerloop); 263 timersub(&outerloop.ru_utime, &tmptime, &tmptime); 264 timeradd(&tmptime, &outertime, &outertime); 265 #endif 266 // fprintf(stderr, "maxlen = %d, wasinc = %ld\n", maxlen, wasinc); 267 } 268 #ifdef DEBUG_PERF 269 totaltime = totalloop.ru_utime; 270 getrusage(RUSAGE_SELF, &totalloop); 271 timersub(&totalloop.ru_utime, &totaltime, &totaltime); 272 fprintf(stderr, "Time spend in initial loop = %f\n", initialtime.tv_sec + initialtime.tv_usec/(double)1E6); 273 fprintf(stderr, "Time spend in outer loop = %f\n", outertime.tv_sec + outertime.tv_usec/(double)1E6); 274 fprintf(stderr, "Time spend in inner loop = %f\n", innertime.tv_sec + innertime.tv_usec/(double)1E6); 275 fprintf(stderr, "Time spend in all loops = %f\n", totaltime.tv_sec + totaltime.tv_usec/(double)1E6); 276 #endif 277 lzi->analysis_valid = 1; 278 #ifdef DEBUG_ANALYZE_BLOCK 279 fprintf(stderr, "Done analyzing block %d, cur_loc = %06x\n", n++, lzi->cur_loc); 280 #endif 281 } 282 283 void lz_stop_compressing(lz_info *lzi) 284 { 285 lzi->stop = 1; 286 /* fprintf(stderr, "Stopping...\n");*/ 287 } 288 289 int lz_compress(lz_info *lzi, int nchars) 290 { 291 292 u_char *bbp, *bbe; 293 int *lentab, *lenp; 294 u_char **prevtab, **prevp; 295 int len; 296 int holdback; 297 short trimmed; 298 299 lzi->stop = 0; 300 while ((lz_left_to_process(lzi) || !lzi->eofcount) && !lzi->stop && nchars > 0) { 301 #if 1 302 if (!lzi->analysis_valid || 303 (!lzi->eofcount && 304 ((lzi->chars_in_buf- lzi->block_loc) < nchars))) { 305 int residual = lzi->chars_in_buf - lzi->block_loc; 306 int bytes_to_move = lzi->max_dist + residual; 307 if (bytes_to_move > lzi->chars_in_buf) 308 bytes_to_move = lzi->chars_in_buf; 309 #ifdef DEBUG_ANALYZE_BLOCK 310 fprintf(stderr, "Moving %06x, chars_in_buf %06x, residual = %06x, nchars= %06x block_loc = %06x\n", bytes_to_move, lzi->chars_in_buf, residual, nchars, lzi->block_loc); 311 #endif 312 memmove(lzi->block_buf, lzi->block_buf + lzi->chars_in_buf - bytes_to_move, 313 bytes_to_move); 314 315 lzi->block_loc = bytes_to_move - residual; 316 lzi->chars_in_buf = bytes_to_move; 317 #ifdef DEBUG_ANALYZE_BLOCK 318 fprintf(stderr, "New chars_in_buf %06x, new block_loc = %06x, eof = %1d\n", lzi->chars_in_buf, lzi->block_loc, lzi->eofcount); 319 #endif 320 fill_blockbuf(lzi, nchars); 321 #ifdef DEBUG_ANALYZE_BLOCK 322 fprintf(stderr, "Really new chars_in_buf %06x, new block_loc = %06x, eof = %1d\n", lzi->chars_in_buf, lzi->block_loc, lzi->eofcount); 323 #endif 324 lz_analyze_block(lzi); 325 } 326 #else 327 if (!lzi->analysis_valid || 328 (lzi->block_loc - lzi->chars_in_buf) == 0) { 329 lzi->block_loc = 0; 330 lzi->chars_in_buf = 0; 331 fill_blockbuf(lzi, nchars); 332 lz_analyze_block(lzi); 333 } 334 #endif 335 prevtab = prevp = lzi->prevtab + lzi->block_loc; 336 lentab = lenp = lzi->lentab + lzi->block_loc; 337 bbp = lzi->block_buf + lzi->block_loc; 338 holdback = lzi->max_match; 339 if (lzi->eofcount) holdback = 0; 340 if (lzi->chars_in_buf < (nchars + lzi->block_loc)) 341 bbe = lzi->block_buf + lzi->chars_in_buf - holdback; 342 else 343 bbe = bbp + nchars; 344 while ((bbp < bbe) && (!lzi->stop)) { 345 trimmed = 0; 346 len = *lenp; 347 if (lzi->frame_size && (len > (lzi->frame_size - lzi->cur_loc % lzi->frame_size))) { 348 #ifdef DEBUG_TRIMMING 349 fprintf(stderr, "Trim for framing: %06x %d %d\n", lzi->cur_loc,len, (lzi->frame_size - lzi->cur_loc % lzi->frame_size)); 350 #endif 351 trimmed = 1; 352 len = (lzi->frame_size - lzi->cur_loc % lzi->frame_size); 353 } 354 if (len > nchars) { 355 #ifdef DEBUG_TRIMMING 356 fprintf(stderr, "Trim for blocking: %06x %d %d\n", lzi->cur_loc,len, nchars); 357 #endif 358 trimmed = 1; 359 len = nchars; 360 } 361 if (len >= lzi->min_match) { 362 #ifdef LAZY 363 if ((bbp < bbe -1) && !trimmed && 364 ((lenp[1] > (len + 1)) /* || ((lenp[1] == len) && (prevp[1] > prevp[0])) */)) { 365 len = 1; 366 /* this is the lazy eval case */ 367 } 368 else 369 #endif 370 if (lzi->output_match(lzi, (*prevp - lzi->block_buf) - lzi->block_loc, 371 len) < 0) { 372 // fprintf(stderr, "Match rejected: %06x %d\n", lzi->cur_loc, len); 373 len = 1; /* match rejected */ 374 } 375 } 376 else 377 len = 1; 378 379 if (len < lzi->min_match) { 380 assert(len == 1); 381 lzi->output_literal(lzi, *bbp); 382 } 383 // fprintf(stderr, "len = %3d, *lenp = %3d, cur_loc = %06x, block_loc = %06x\n", len, *lenp, lzi->cur_loc, lzi->block_loc); 384 bbp += len; 385 prevp += len; 386 lenp += len; 387 lzi->cur_loc += len; 388 lzi->block_loc += len; 389 assert(nchars >= len); 390 nchars -= len; 391 392 } 393 } 394 return 0; 395 } 396