1title Lempel-Ziv Compressor 2; $Source: /usr/home/dhesi/zoo/RCS/lzc.asm,v $ 3; $Id: lzc.asm,v 1.4 91/07/07 09:36:18 dhesi Exp $ 4 5;Derived from Tom Pfau's public domain assembly code. 6;The contents of this file are hereby released to the public domain. 7; -- Rahul Dhesi 1988/08/24 8 9UNBUF_IO equ 1 ;use unbuffered I/O 10 11public _lzc 12 13 include asmconst.ai 14 include macros.ai 15 16check_gap equ 4000 ;Check ratio every so many bits 17scale_limit equ 32000 ;scale down if bitsout > scale_limit 18rat_thresh equ 0ffffh ;don't reset if rat > rat_thresh 19 20;Hash table entry 21hash_rec struc 22first dw ? ; First entry with this value 23next dw ? ; Next entry along chain 24char db ? ; Suffix char 25hash_rec ends 26 27extrn docrc:near ;Procedure for block CRC in lzd.asm 28 29ifdef UNBUF_IO 30extrn _read:near 31extrn _blockwrite:near 32else 33extrn _zooread:near 34extrn _zoowrite:near 35endif 36 37;Declare Segments 38_text segment byte public 'code' 39_text ends 40 41dgroup group _data 42 assume ds:dgroup,es:dgroup 43_data segment word public 'data' 44extrn _out_buf_adr:word ;address of C output buffer 45extrn _in_buf_adr:word ;address of C input buffer 46 47extrn memflag:byte ;got memory? flag 48 49save_sp dw ? 50 51bytesin dw ? ;count of bytes read 52bitsout dw ? ;count of bits written 53ratio dw ? ;recent ratio 54ratflag db ? ;flag to remind us to check ratio 55bit_interval dw ? ;interval at which to test ratio 56 57input_handle dw ? 58output_handle dw ? 59hash_seg dw ? 60prefix_code dw ? 61free_code dw ? 62max_code dw ? 63nbits dw ? 64k db ? 65bit_offset dw ? 66input_offset dw 0 67input_size dw 0 68_data ends 69 70memory segment para public 'memory' 71hash label hash_rec 72memory ends 73 74add_code macro 75 local ac1,ac2,ac3 76 mov bx,free_code ;Get code to use 77 push ds ;point to hash table 78 mov ds,hash_seg 79 or di,di ;First use of this prefix? 80 jz ac1 ;zero means yes 81 mov [si].next,bx ;point last use to new entry 82 jmp short ac2 83ac1: mov [si].first,bx ;Point first use to new entry 84ac2: cmp bx,maxmax ;Have we reached code limit? 85 je ac3 ;equal means yes, just return 86 87 ;call index ;get address of new entry 88 call_index ;macro for speed 89 90 mov [si].first,-1 ;initialize pointers 91 mov [si].next,-1 92 mov [si].char,al ;save suffix char 93 inc es:free_code ;adjust next code 94ac3: pop ds ;restore seg reg 95 endm 96 97read_char macro ;Macro for speed 98 local m$1,m$2,m$3,m$4 99 inc [bytesin] ;Maintain input byte count for ratio 100 mov di,input_offset ;Anything left in buffer? 101 cmp di,input_size 102 jb m$1 ;less means yes 103 104 mov cx,inbufsiz 105 call doread ;read block 106 107 cmp ax,-1 108 jnz m$3 109 jmp IO_err ;input error 110m$3: 111 mov dx,_in_buf_adr ;for docrc 112 call docrc 113 or ax,ax ;Anything left? 114 jz m$2 ;zero means no, finished 115 mov input_size,ax ;Save bytes read 116 mov input_offset,0 ;Point to beginning of buffer 117 mov di,0 118m$1: 119 mov si,_in_buf_adr 120 add si,di 121 lodsb ;Read it in 122 inc input_offset ;Adjust pointer 123 clc ;Success 124 jmp short m$4 125m$2: stc ;Nothing left 126m$4: 127 endm 128 129;Start writing code 130_text segment 131 assume cs:_text,ds:dgroup,es:dgroup,ss:nothing 132 133_lzc proc near 134 push bp ;Standard C entry code 135 mov bp,sp 136 push di 137 push si 138 139 push ds ;Save ds to be sure 140 mov bx,ds 141 mov es,bx 142 143;Get two parameters, both integers, that are input file handle and 144;output file handle 145 mov ax,[bp+4] 146 mov [input_handle],ax 147 mov ax,[bp+6] 148 mov [output_handle],ax 149 150 call compress ;Compress file 151 ;Status received back in AX 152 pop ds 153 pop si ;Standard C return code 154 pop di 155 mov sp,bp 156 pop bp 157 ret 158 159_lzc endp 160 161compress proc near 162 mov [save_sp],sp ;Save SP in case of error return 163 164;Initialize variables for serial re-entrancy 165 mov [bit_offset],0 166 mov [input_offset],0 167 mov [input_size],0 168 169 test memflag,0ffH ;Memory allocated? 170 jnz gotmem ;If allocated, continue 171 malloc <((maxmax * 5) / 16 + 20)> ;allocate it 172 jnc here1 173 jmp MALLOC_err1 174here1: 175 mov hash_seg,ax ;Save segment address of mem block 176 mov memflag,0ffh ;Set flag to remind us later 177gotmem: 178 179l1: call init_table ;Initialize the table and some vars 180 mov ax,clear ;Write a clear code 181 call write_code 182 ;call read_char ;Read first char 183 read_char ;macro for speed 184l4: 185 186;new code to check compression ratio 187 test [ratflag],0FFH ;Time to check ratio? 188 jz rd$1 ;Skip checking if zero 189 call check_ratio 190rd$1: 191 xor ah,ah ;Turn char into code 192l4a: mov prefix_code,ax ;Set prefix code 193 ;call read_char ;Read next char 194 read_char ;macro for speed 195 jc l17 ;Carry means eof 196 mov k,al ;Save char in k 197 mov bx,prefix_code ;Get prefix code 198 199 call lookup_code ;See if this pair in table 200 201 jnc l4a ;nc means yes, new code in ax 202 ;call add_code ;Add pair to table 203 add_code ;Macro for speed 204 push bx ;Save new code 205 mov ax,prefix_code ;Write old prefix code 206 call write_code 207 pop bx 208 mov al,k ;Get last char 209 cmp bx,max_code ;Exceed code size? 210 211 jnb l4$ 212 jmp l4 213l4$: 214 cmp nbits,maxbits ;Currently less than maxbits? 215 jb l14 ;yes 216 mov ax,clear ;Write a clear code 217 call write_code 218 call init_table ;Reinit table 219 mov al,k ;get last char 220 jmp l4 ;Start over 221l14: inc nbits ;Increase number of bits 222 shl max_code,1 ;Double max code size 223 jmp l4 ;Get next char 224l17: mov ax,prefix_code ;Write last code 225 call write_code 226 mov ax,eof ;Write eof code 227 call write_code 228 mov ax,bit_offset ;Make sure buffer is flushed to file 229 cmp ax,0 230 je OK_ret 231 mov dx,ax ;dx <- ax 232 shr ax,1 233 shr ax,1 234 shr ax,1 ;ax <- ax div 8 235 and dx,07 ;dx <- ax mod 8 236 ;If extra bits, make sure they get 237 je l17a ;written 238 inc ax 239l17a: call flush 240OK_ret: 241 xor ax,ax ;Normal return -- compressed 242 ret 243IO_err: 244 mov ax,2 ;I/O error return 245 mov sp,[save_sp] ;Restore stack pointer 246 ret 247 248MALLOC_err1: ;hash table alloc error 249 mov ax,1 ;Malloc error return 250 mov sp,[save_sp] ;Restore stack pointer 251 ret 252compress endp 253 254init_table proc near 255 mov [bytesin],0 ;Input byte count 256 mov [bitsout],0 ;Output bit count 257 mov [ratio],0 258 mov [ratflag],0 259 mov [bit_interval],check_gap 260 261 mov nbits,9 ;Set code size to 9 262 mov max_code,512 ;Set max code to 512 263 push es ;Save seg reg 264 mov es,hash_seg ;Address hash table 265 mov ax,-1 ;Unused flag 266 mov cx,640 ;Clear first 256 entries 267 mov di,offset hash ;Point to first entry 268rep stosw ;Clear it out 269 pop es ;Restore seg reg 270 mov free_code,first_free ;Set next code to use 271 ret ;done 272init_table endp 273 274write_code proc near 275 push ax ;Save code 276 mov cx,nbits 277 add [bitsout],cx ;Maintain output bit count for ratio 278 sub [bit_interval],cx 279 jg rd$2 ;OK if not reached interval 280 mov [ratflag],1 ;else set flag -- check ratio soon 281rd$2: 282 mov ax,bit_offset ;Get bit offset 283 mov cx,nbits ;Adjust bit offset by code size 284 add bit_offset,cx 285 286 mov dx,ax ;dx <- ax 287 shr ax,1 288 shr ax,1 289 shr ax,1 ;ax <- ax div 8 290 and dx,07 ;dx <- ax mod 8 291 292 ;Now ax contains byte offset and 293 ;dx contains bit offset within that byte (I think) 294 295 cmp ax,outbufsiz-4 ;Approaching end of buffer? 296 jb wc1 ;less means no 297 call flush ;Write the buffer 298 299 push dx ;dx contains offset within byte 300 add dx,nbits ;adjust by code size 301 mov bit_offset,dx ;new bit offset 302 pop dx ;restore dx 303 304;ax is an index into output buffer. Next, ax <- address of buffer[ax] 305 add ax,[_out_buf_adr] 306 mov si,ax ;put in si 307 mov al,byte ptr [si] ;move byte to first position 308 309;put value of al into first byte of output buffer 310 push bx 311 mov bx,[_out_buf_adr] 312 mov [bx],al 313 pop bx 314 xor ax,ax ;Byte offset of zero 315wc1: add ax,[_out_buf_adr] ;Point into buffer 316 mov di,ax ;Destination 317 pop ax ;Restore code 318 mov cx,dx ;offset within byte 319 xor dx,dx ;dx will catch bits rotated out 320 jcxz wc3 ;If offset in byte is zero, skip shift 321wc2: shl ax,1 ;Rotate code 322 rcl dx,1 323 loop wc2 324 or al,byte ptr [di] ;Grab bits currently in buffer 325wc3: stosw ;Save data 326 mov al,dl ;Grab extra bits 327 stosb ;and save 328 ret 329write_code endp 330 331flush proc near 332 push ax ;Save all registers 333 push bx ;AX contains number of bytes to write 334 push cx 335 push dx 336 337 push si ;may not be necessary to save si & di 338 push di 339 340 push ax ;save byte count 341 342 push ax ;byte count 343 push _out_buf_adr ;buffer address 344 push output_handle ;zoofile 345ifdef UNBUF_IO 346 call _blockwrite 347else 348 call _zoowrite 349endif 350 add sp,6 351 352 pop cx ;recover byte count 353 354 cmp ax,cx 355 356 jz here2 357 jmp IO_err ;I/O error 358 359here2: 360 pop di 361 pop si 362 363 pop dx 364 pop cx 365 pop bx 366 pop ax 367 ret 368flush endp 369 370lookup_code proc near 371 push ds ;Save seg reg 372 mov ds,hash_seg ;point to hash table 373 374 ;call index ;convert code to address 375 call_index ;macro for speed 376 377 mov di,0 ;flag 378 mov bx,[si].first 379 cmp bx,-1 ;Has this code been used? 380 je gc4 ;equal means no 381 inc di ;set flag 382gc2: 383 ;call index ;convert code to address 384 call_index ;macro for speed 385 386 cmp [si].char,al ;is char the same? 387 jne gc3 ;ne means no 388 clc ;success 389 mov ax,bx ;put found code in ax 390 pop ds ;restore seg reg 391 ret ;done 392gc3: 393 mov bx,[si].next 394 cmp bx,-1 ;More left with this prefix? 395 je gc4 ;equal means no 396 jmp gc2 ;try again 397gc4: stc ;not found 398 pop ds ;restore seg reg 399 ret ;done 400lookup_code endp 401 402comment # 403index proc near 404 mov si,bx ;si = bx * 5 (5 byte hash entries) 405 shl si,1 ;si = bx * 2 * 2 + bx 406 shl si,1 407 add si,bx 408 ret 409index endp 410# end comment 411 412check_ratio proc near 413 push ax 414 415; mov dl,'*' ;'*' printed means checking ratio 416; call sendout 417 418 ;Getting ready to check ratios. If bitsout is over scale_limit, 419 ;then we scale down both bitsout and bytesin by a factor 420 ;of 4. This will avoid overflow. 421 mov cx,[bitsout] 422 cmp cx,scale_limit 423 jb scale_ok 424 425 mov cl,2 426 shr [bytesin],cl 427 shr [bitsout],cl 428 429scale_ok: 430 ;Note MASM bug: "mov ah,high [bytesin]" and 431 ;"mov al,low [bytesin]" don't work. 432 mov ah,byte ptr [bytesin] 433 mov dl,byte ptr [bytesin+1] 434 435 sub al,al 436 sub dh,dh ;dx:ax = 8 * bitsin = 256 * bytesin 437 mov cx,[bitsout] ;cx <- bitsout 438 or cx,cx ;Division by zero? 439 jnz candivide ;No -- go ahead divide 440 mov ax,0FFFFH ;yes -- assume max poss value 441 jmp short divided 442candivide: 443 ;Calculate cx as (bytesin * 256) / bitsout = bitsin * 8 / bitsout 444 div cx ;ax <- rat <- dx:ax / cx 445 shl ax,1 446 shl ax,1 ;rat <- 4 * bytes_in / bytes_out 447divided: 448 ;Enter here with ax = rat = bitsin / bitsout. 449 450; call print_data ;print info for debugging 451 452 ;If rat > rat_thresh then ratio is good; do not reset table 453; cmp ax,rat_thresh 454; ja ratdone 455 456 ;Compare rat against ratio 457 mov bx,ax ;save rat in bx 458 cmp ax,[ratio] ;cmp rat,ratio 459 jb ratless ;trouble if ratio is now smaller 460 mov ax,[ratio] 461 call mul7 ;ax <- 7 * ratio 462 add ax,bx ;ax = 7 * ratio + rat 463 shr ax,1 464 shr ax,1 465 shr ax,1 ;ax = (7 * ratio + rat) / 8 466 mov [ratio],ax ;ratio = (7 * ratio + rat) / 8 467 jmp short ratdone 468ratless: ;ratio is going down, so... 469 mov [bytesin],0 470 mov [bitsout],0 471 472; mov dl,'#' ;'#' printed means table reset 473; call sendout 474 475 mov ax,clear ;Write a clear code 476 call write_code 477 call init_table ;Reinit table 478ratdone: 479 mov [ratflag],0 480 mov [bit_interval],check_gap 481 pop ax 482 ret 483check_ratio endp 484 485comment # 486sendout proc near ;char in dl send to console 487 push ax 488 mov ah,02 489 int 21H 490 pop ax 491 ret 492sendout endp 493# end comment 494 495mul7 proc near ;multiply ax by 7 496 push dx 497 mov dx,7 498 mul dx ;dx:ax <- 7 * ax 499 pop dx 500 ret 501mul7 endp 502 503comment # 504mul3 proc near ;multiply ax by 3 505 push dx 506 mov dx,3 507 mul dx ;dx:ax <- 3 * ax 508 pop dx 509 ret 510mul3 endp 511# end comment 512 513comment # 514mul1_125 proc near ;multiply ax by 1.125 515 push bx 516 mov bx,ax 517 shr bx,1 518 shr bx,1 519 shr bx,1 ;bx = n / 8 520 add ax,bx ;ax <- n + n / 8 521 pop bx 522 ret 523mul1_125 endp 524# end comment 525 526comment # 527print_data proc near 528 ;Debugging -- print bytesin, bitsout, rat, and ratio 529 push ax 530 push bx 531 push cx 532 push dx 533 534 push ax ;print rat 535 call _prtint 536 add sp,2 537 538 push [ratio] ;print ratio 539 call _prtint 540 add sp,2 541 542 push [bytesin] 543 call _prtint 544 add sp,2 545 546 push [bitsout] 547 call _prtint 548 add sp,2 549 550 pop dx 551 pop cx 552 pop bx 553 pop ax 554 ret 555print_data endp 556# end comment 557 558;doread reads cx characters and stores them in input buffer 559;return value from zooread is returned in ax 560doread proc near ;reads block 561 push bx 562 push cx 563 push dx 564 push si 565 push di 566 567 push cx ;byte count 568 push _in_buf_adr ;buffer address 569 push input_handle ;zoofile 570ifdef UNBUF_IO 571 call _read 572else 573 call _zooread 574endif 575 add sp,6 576 577 pop di 578 pop si 579 pop dx 580 pop cx 581 pop bx 582 ret 583doread endp 584 585_text ends 586 587end 588 589