1;=========================================================================== 2; Copyright (c) 1990-1999 Info-ZIP. All rights reserved. 3; 4; See the accompanying file LICENSE, version 1999-Oct-05 or later 5; (the contents of which are also included in zip.h) for terms of use. 6; If, for some reason, both of these files are missing, the Info-ZIP license 7; also may be found at: ftp://ftp.cdrom.com/pub/infozip/license.html 8;=========================================================================== 9; This is a 680x0 assembly language translation of the Info-ZIP source file 10; deflate.c, by Paul Kienitz. No rights reserved. The function longest_match 11; is based in part on match.a by Carsten Steger, which in turn is partly based 12; on match.s for 386 by Jean-loup Gailly and Kai Uwe Rommel. Mostly, however, 13; this material is based on deflate.c, by Gailly, Rommel, and Igor Mandrichenko. 14; This code is not commented very much; see deflate.c for comments that explain 15; what the functions are doing. 16; 17; The symbols that can be used to select different versions are as follows: 18; 19; CPU020 if defined, use 68020 instructions always. 20; 21; CPUTEST if defined, check at runtime for CPU type. Another symbol 22; specifying the platform-specific test must be used with this. 23; If neither of these is defined, use 68000 instructions only. 24; Runtime test is nonportable; it is different for each OS. 25; 26; AMIGA use Amiga-specific test for 68020, if CPUTEST defined. Also 27; tells it that registers d0/a0/d1/a1 are not preserved by 28; function calls. At present, if AMIGA is not defined, it 29; causes functions to preserve all registers. ALL OF THIS CODE 30; CURRENTLY ASSUMES THAT REGISTERS D2-D7/A2-A6 WILL BE PRESERVED 31; BY ANY FUNCTIONS THAT IT CALLS. 32; 33; DYN_ALLOC should be defined here if it is defined for C source; tells us 34; that big arrays are allocated instead of static. 35; 36; WSIZE must be defined as the same number used for WSIZE in the C 37; source, and must be a power of two <= 32768. As elsewhere, 38; the default value is 32768. 39; 40; INT16 define this if ints are 16 bits; otherwise 32 bit ints assumed. 41; 42; SMALL_MEM define this if it is defined in the C source; otherwise it uses 43; the MEDIUM_MEM model. BIG_MEM and MMAP are *not* supported. 44; The FULL_SEARCH option in deflate.c is also not supported. 45; 46; DEBUG activates some tracing output, as in the C source. 47; 48; QUADLONG this selects a different version of the innermost longest_match 49; loop code for 68020 operations, comparing bytes four at a time 50; instead of two at a time. It seems to be a tiny bit faster on 51; average, but it's slower often enough that one can't generalize. 52; 53; This code currently assumes that function results are returned in D0 for 54; all platforms. It assumes that args to functions are pushed onto the stack, 55; last arg first. It also currently assumes that all C symbols have an 56; underscore prepended when referenced from assembly. 57; 58; 1999/09/23: for Human68k: Modified by Shimazaki Ryo. 59 60 IFNDEF CPU020 61 IFNDEF CPUTEST 62CPU000 equ 1 63 ENDC 64 ENDC 65 66; Use these macros for accessing variables of type int: 67 IFDEF INT16 68MOVINT MACRO _1,_2 69 move.w _1,_2 70 ENDM 71CLRINT MACRO _1 72 clr.w _1 73 ENDM 74INTSIZE equ 2 75 ELSE ; !INT16 76MOVINT MACRO _1,_2 77 move.l _1,_2 78 ENDM 79CLRINT MACRO _1 80 clr.l _1 81 ENDM 82INTSIZE equ 4 83 ENDC 84 85 IFDEF DYN_ALLOC 86BASEPTR MACRO _1,_2 87 move.l _1,_2 88 ENDM 89 ELSE 90BASEPTR MACRO _1,_2 91 lea _1,_2 92 ENDM 93 ENDC 94 95; constants we use, many of them adjustable: 96 97MAX_MATCH equ 258 98MIN_MATCH equ 3 99TOO_FAR equ 4096 100 IFNDEF WSIZE 101WSIZE equ 32768 102 ENDC 103WMASK equ WSIZE-1 104MAX_DIST equ WSIZE-MAX_MATCH-MIN_MATCH-1 105MIN_LOOKAHEAD equ MAX_MATCH+MIN_MATCH+1 106; IFD BIG_MEM ; NOT supported -- type Pos needs to be 32 bits 107;HASH_BITS equ 15 108; ELSE 109 IFDEF SMALL_MEM 110HASH_BITS equ 13 111 ELSE 112HASH_BITS equ 14 ; default -- MEDIUM_MEM 113 ENDC 114; ENDC ; BIG_MEM 115HASH_SIZE equ 1<<HASH_BITS 116HASH_MASK equ HASH_SIZE-1 117H_SHIFT equ (HASH_BITS+MIN_MATCH-1)/MIN_MATCH 118B_SLOW equ 1 119B_FAST equ 2 120ZE_MEM equ 4 121EOF equ -1 122 123; struct config is defined by these offsets: 124Good_length equ 0 125Max_lazy equ 2 126Nice_length equ 4 127Max_chain equ 6 128Sizeof_config equ 8 129 130 131; external functions we call: 132 xref _ct_tally ; int ct_tally(int, int) 133 xref _flush_block ; unsigned long F(char *, unsigned long, int) 134 xref _ziperr ; void ziperr(int, char *) 135 xref _error ; void error(char *) 136 xref _calloc ; stdlib function: void *calloc(size_t, size_t) 137 xref _free ; stdlib function: void free(void *) 138 IFDEF DEBUG 139 xref _fputc ; stdio function: int fputc(int, FILE *) 140 xref _stderr ; pointer to FILE, which we pass to fputc 141 ENDC 142 143; our entry points: 144 xdef _lm_init ; void lm_init(int level, unsigned short *flags) 145 xdef _lm_free ; void lm_free(void) 146 xdef _deflate ; void deflate(void) ...the big one 147 xdef _fill_window ; this line is just for debugging 148 149 150; ============================================================================ 151; Here is where we have our global variables. 152 153;;; section deflatevars,data 154 155; external global variables we reference: 156 xref _verbose ; signed int 157 xref _level ; signed int 158 xref _read_buf ; int (*read_buf)(char *, unsigned int) 159 160; global variables we make available: 161 162 xdef _window 163 xdef _prev 164 xdef _head 165 xdef _window_size 166 xdef _block_start 167 xdef _strstart 168 169 bss 170 quad 171 172 IFDEF DYN_ALLOC 173_prev: ds.l 1 ; pointer to calloc()'d unsigned short array 174_head: ds.l 1 ; pointer to calloc()'d unsigned short array 175_window: ds.l 1 ; pointer to calloc()'d unsigned char array 176 ELSE ; !DYN_ALLOC 177_prev: ds.w WSIZE ; array of unsigned short 178_head: ds.w HASH_SIZE ; array of unsigned short 179_window: ds.b 2*WSIZE ; array of unsigned char 180 ENDC ; ?DYN_ALLOC 181 182 text 183 quad 184_window_size: ds.l 1 ; unsigned long 185_block_start: ds.l 1 ; unsigned long 186_strstart: ds.w INTSIZE/2 ; unsigned int 187 188; Now here are our private variables: 189 190 IFDEF CPUTEST 191is020: ds.w 1 ; bool: CPU type is '020 or higher 192 ENDC 193ins_h: ds.w 1 ; unsigned short 194sliding: ds.w 1 ; bool: the file is read a piece at a time 195eofile: ds.w 1 ; bool: we have read in the end of the file 196max_lazy_match: ds.w 1 ; unsigned short 197lookahead: ds.w 1 ; unsigned short 198 199; These are NOT DECLARED AS STATIC in deflate.c, but currently could be: 200max_chain_len: ds.w 1 ; unsigned short (unsigned int in deflate.c) 201prev_length: ds.w 1 ; unsigned short (unsigned int in deflate.c) 202good_match: ds.w 1 ; unsigned short (unsigned int in deflate.c) 203nice_match: ds.w 1 ; unsigned short (signed int in deflate.c) 204match_start: ds.w 1 ; unsigned short (unsigned int in deflate.c) 205 206; This array of struct config is a constant and could be in the code section: 207config_table: dc.w 0,0,0,0 ; level 0: store uncompressed 208 dc.w 4,4,8,4 ; level 1: fastest, loosest compression 209 dc.w 4,5,16,8 ; level 2 210 dc.w 4,6,32,32 ; level 3: highest to use deflate_fast 211 dc.w 4,4,16,16 ; level 4: lowest to use lazy matches 212 dc.w 8,16,32,32 ; level 5 213 dc.w 8,16,128,128 ; level 6: the default level 214 dc.w 8,32,128,256 ; level 7 215 dc.w 32,128,258,1024 ; level 8 216 dc.w 32,258,258,4096 ; level 9: maximum compression, slow 217 218 219;;CAL_SH MACRO ; macro for calling zcalloc() 220;; IFD INT16 221;; move.w #2,-(sp) 222;; move.w #\1,-(sp) 223;; jsr _zcalloc 224;; addq #4,sp 225;; ELSE 226;; pea 2 227;; pea \1 228;; jsr _zcalloc 229;; addq #8,sp 230;; ENDC 231;; ENDM 232 233CAL_SH MACRO _1 ; Okay, we're back to using regular calloc()... 234 movem.l d2/a2,-(sp) 235 pea 2 236 pea _1 237 jsr _calloc 238 addq #8,sp 239 movem.l (sp)+,d2/a2 240 ENDM 241 242; ============================================================================ 243; And here we begin our functions. match_init is for internal use only: 244 245;; section deflate,code 246 247match_init: 248 IFDEF CPUTEST ; now check for platform type 249 IFDEF AMIGA ; Amiga specific test for '020 CPU: 250 xref _SysBase 251 NOLIST 252 INCLUDE 'exec/execbase.i' 253 LIST 254 255 clr.w is020 ; default value is 68000 256 move.l _SysBase,a0 257 btst #AFB_68020,AttnFlags+1(a0) 258 beq.s cheap 259 move.w #1,is020 260cheap: 261 ELSE ; !AMIGA 262 263 FAIL Write an '020-detector for your system here! 264; On the Macintosh, I believe GetEnvironment() provides the information. 265 266 ENDC ; AMIGA 267 ENDC ; CPUTEST 268 rts ; match_init consists only of rts if CPUTEST unset 269 270 271; ============================================================================ 272; Here is longest_match(), the function that the rest of this was built up 273; from, the hottest hot spot in the program and therefore the most heavily 274; optimized. It has two different versions, one for '020 and higher CPUs, and 275; one for 68000/68010. It can test at runtime which version to use if you 276; create a test function in match_init for your platform. Currently such a 277; test is implemented for the Amiga. It can also be assembled to use '000 or 278; '020 code only. 279 280Cur_Match reg d0 ; unsigned int, kept valid as long 281Best_Len reg d1 ; unsigned int, kept valid as long 282Scan_Start reg d3 ; pair of bytes 283Scan_End reg d4 ; pair of bytes 284Limit reg d5 ; unsigned int 285Chain_Length reg d6 ; unsigned int 286Scan_Test reg d7 ; counter, pair of bytes sometimes 287Scan reg a0 ; pointer to unsigned char 288Match reg a1 ; pointer to unsigned char 289Prev_Address reg a2 ; pointer to unsigned short 290Scan_Ini reg a3 ; pointer to unsigned char 291Match_Ini reg a5 ; pointer to unsigned char 292; Note: "pair of bytes" means the two low order bytes of the register in 293; 68020 code, but means the lowest and third lowest bytes on the 68000. 294SAVEREGS reg d3-d7/a2/a3/a5 ; don't protect d0/d1/a0/a1 295; d2, a4, a6 not used... on Amiga, a4 is used by small-data memory model 296 297 298longest_match: 299 movem.l SAVEREGS,-(sp) 300 301; setup steps common to byte and word versions: 302 IFDEF INT16 303 and.l #$0000FFFF,Cur_Match ; upper half must be zero! 304; we use an and.l down here for the sake of ATSIGN/REGARGS. 305 moveq #0,Limit ; so adding to Scan_Ini works 306 ENDC 307 move.w (max_chain_len,pc),Chain_Length 308 move.w (prev_length,pc),Best_Len 309 MOVINT (_strstart,pc),Limit 310 BASEPTR _prev,Prev_Address 311 BASEPTR _window,Match_Ini 312 move.l Match_Ini,Scan_Ini 313 addq #MIN_MATCH,Match_Ini ; optimizes inner loop 314 add.l Limit,Scan_Ini 315 sub.w #MAX_DIST,Limit 316 bhi.s limit_ok 317 moveq #0,Limit 318limit_ok: 319 cmp.w (good_match,pc),Best_Len 320 blo.s length_ok 321 lsr.w #2,Chain_Length 322length_ok: 323 subq.w #1,Chain_Length 324 325 IFDEF CPUTEST 326 tst.w is020 ; can we use '020 stuff today? 327 bne WORD_match 328 ENDC 329 330 IFNDEF CPU020 331 332; for 68000 or 68010, use byte operations: 333 moveq #0,Scan_Start ; clear 2nd & 4th bytes, use 1st & 3rd 334 moveq #0,Scan_End ; likewise 335 moveq #0,Scan_Test ; likewise 336 move.b (Scan_Ini),Scan_Start 337 swap Scan_Start ; swap is faster than 8 bit shift 338 move.b 1(Scan_Ini),Scan_Start 339 move.b -1(Scan_Ini,Best_Len.w),Scan_End 340 swap Scan_End 341 move.b 0(Scan_Ini,Best_Len.w),Scan_End 342 bra.s bdo_scan 343 344blong_loop: 345 move.b -1(Scan_Ini,Best_Len.w),Scan_End 346 swap Scan_End 347 move.b 0(Scan_Ini,Best_Len.w),Scan_End 348 349bshort_loop: 350 add.w Cur_Match,Cur_Match ; assert value before doubling < 32K 351 IFNE 32768-WSIZE 352 and.w #(WMASK*2),Cur_Match 353 ENDC 354 move.w (Prev_Address,Cur_Match.l),Cur_Match 355 cmp.w Limit,Cur_Match 356 dbls Chain_Length,bdo_scan 357 bra return 358 359bdo_scan: 360 move.l Match_Ini,Match 361 add.l Cur_Match,Match 362 move.b -MIN_MATCH-1(Match,Best_Len.w),Scan_Test 363 swap Scan_Test 364 move.b -MIN_MATCH(Match,Best_Len.w),Scan_Test 365 cmp.l Scan_Test,Scan_End 366 bne.s bshort_loop 367 move.b -MIN_MATCH(Match),Scan_Test 368 swap Scan_Test 369 move.b -MIN_MATCH+1(Match),Scan_Test 370 cmp.l Scan_Test,Scan_Start 371 bne.s bshort_loop 372 move.w #(MAX_MATCH-3),Scan_Test 373 lea MIN_MATCH(Scan_Ini),Scan ; offset optimizes inner loop 374 375bscan_loop: 376 cmp.b (Match)+,(Scan)+ 377 dbne Scan_Test,bscan_loop 378 subq #1,Scan 379 380 sub.l Scan_Ini,Scan ; assert difference is 16 bits 381 cmp.w Best_Len,Scan 382 bls.s bshort_loop 383 MOVINT Scan,Best_Len 384 move.w Cur_Match,match_start 385 cmp.w (nice_match,pc),Best_Len 386 blo.s blong_loop 387 IFDEF CPUTEST 388 bra return 389 ENDC 390 391 ENDC ; !CPU020 392 393 IFNDEF CPU000 394;;; MACHINE MC68020 395 396; for 68020 or higher, use word operations even on odd addresses: 397WORD_match: 398 move.w (Scan_Ini),Scan_Start 399 move.w -1(Scan_Ini,Best_Len.w),Scan_End 400 bra.s wdo_scan 401 402wlong_loop: 403 move.w -1(Scan_Ini,Best_Len.w),Scan_End 404 405wshort_loop: 406 and.w #WMASK,Cur_Match 407 move.w (Prev_Address,Cur_Match.w*2),Cur_Match ; '020 addressing mode 408 cmp.w Limit,Cur_Match 409 dbls Chain_Length,wdo_scan 410 bra.s return 411 412wdo_scan: 413 move.l Match_Ini,Match 414 add.l Cur_Match,Match 415 cmp.w -MIN_MATCH-1(Match,Best_Len.w),Scan_End 416 bne.s wshort_loop 417 cmp.w -MIN_MATCH(Match),Scan_Start 418 bne.s wshort_loop 419 IFDEF QUADLONG 420; By some measurements, this version of the code is a little tiny bit faster. 421; But on some files it's slower. It probably pays off only when there are 422; long match strings, and costs in the most common case of three-byte matches. 423 moveq #((MAX_MATCH-MIN_MATCH)/16),Scan_Test ; value = 15 424 lea MIN_MATCH(Scan_Ini),Scan ; offset optimizes inner loop 425 426wscan_loop: 427 cmp.l (Match)+,(Scan)+ ; test four bytes at a time 428 bne.s odd 429 cmp.l (Match)+,(Scan)+ 430 bne.s odd 431 cmp.l (Match)+,(Scan)+ 432 bne.s odd 433 cmp.l (Match)+,(Scan)+ 434 dbne Scan_Test,wscan_loop ; '020 can cache a bigger loop 435odd: 436 subq #4,Scan 437 subq #4,Match 438 cmp.b (Match)+,(Scan)+ ; find good bytes in bad longword 439 bne.s even 440 cmp.b (Match)+,(Scan)+ 441 bne.s even 442 cmp.b (Match)+,(Scan)+ 443 beq.s steven 444even: subq #1,Scan 445 ELSE ; !QUADLONG 446 moveq #((MAX_MATCH-MIN_MATCH)/2),Scan_Test ; value = 127 447 lea MIN_MATCH(Scan_Ini),Scan ; offset optimizes inner loop 448 449wscan_loop: 450 cmp.w (Match)+,(Scan)+ 451 dbne Scan_Test,wscan_loop 452 subq #2,Scan 453 move.b -2(Match),Scan_Test 454 cmp.b (Scan),Scan_Test 455 bne.s steven 456 addq #1,Scan 457 ENDC ; ?QUADLONG 458steven: 459 sub.l Scan_Ini,Scan ; assert: difference is 16 bits 460 cmp.w Best_Len,Scan 461 bls.s wshort_loop 462 MOVINT Scan,Best_Len 463 move.w Cur_Match,match_start 464 cmp.w (nice_match,pc),Best_Len 465 blo.s wlong_loop 466 467;;; MACHINE MC68000 468 ENDC ; !CPU000 469 470return: 471 MOVINT Best_Len,d0 ; return value (upper half should be clear) 472 movem.l (sp)+,SAVEREGS 473 rts 474 475 476; ============================================================================= 477; This is the deflate() function itself, our main entry point. It calls 478; longest_match, above, and some outside functions. It is a hot spot, but not 479; as hot as longest_match. It uses no special '020 code. 480 481; ================== Several macros used in deflate() and later functions: 482 483; Arg 1 is D-reg that new ins_h value is to be left in, 484; arg 2 is the byte value to be hashed into it, which must not be the same reg 485UP_HASH MACRO _1,_2 486 move.w (ins_h,pc),_1 487 asl.w #H_SHIFT,_1 488 eor.b _2,_1 489 and.w #HASH_MASK,_1 ; ((ins_h << H_SHIFT) ^ c) & HASH_MASK 490 move.w _1,ins_h ; ins_h = that 491 ENDM 492 493; Arg 1 is scratch A, arg 2 is scratch D 494IN_STR MACRO _1,_2 495 move.l Strst,_2 496 addq.w #MIN_MATCH-1,_2 497 move.b (Window,_2.l),_2 ; window[strstart + MIN_MATCH - 1] 498 UP_HASH Head,_2 499 add.l Head,Head ; assert upper word is zero before add 500 BASEPTR _head,_1 501 add.l Head,_1 502 move.w (_1),Head ; hash_head = head[ins_h] 503 move.w Strst,(_1) ; head[ins_h] = strstart 504 move.l Strst,_2 505 IFNE WSIZE-32768 506 and.w #WMASK,_2 507 ENDC 508 add.w _2,_2 ; masks implicitly when WSIZE == 32768 509 move.w Head,(Prev,_2.l) ; prev[str_start & WMASK] = hash_head 510 ENDM 511 512; Arg 1 is bool (int) EOF flag, flush_block result is in d0, trashes d1/a0/a1 513FLUSH_B MACRO _1 514 local nenu,nun 515 movem.l d2/a2,-(sp) 516 IF _1==0 517 CLRINT -(sp) 518 ELSEIF (INTSIZE==4).and.(_1<$8000) 519 pea (_1).w 520 ELSE 521 MOVINT #_1,-(sp) 522 ENDC 523 move.l (_block_start,pc),d0 524 blt.s nenu 525 move.l Window,a0 526 add.l d0,a0 527 bra.s nun 528nenu: sub.l a0,a0 ; if block_start < 0, push NULL 529nun: sub.l Strst,d0 530 neg.l d0 531 move.l d0,-(sp) 532 move.l a0,-(sp) 533 jsr _flush_block 534 lea 8+INTSIZE(sp),sp 535 movem.l (sp)+,d2/a2 536 ENDM 537 538; This expands to nothing unless DEBUG is defined. 539; Arg 1 is a byte to be trace-outputted -- if it is d0 it must be a valid int 540TRACE_C MACRO _1 541 local qui 542 IFDEF DEBUG 543 cmp.w #1,_verbose+INTSIZE-2 ; test lower word only 544 ble.s qui 545 moveq #0,d0 546 move.b ea,d0 547 movem.l d2/a2,-(sp) 548 move.l _stderr,-(sp) 549 MOVINT d0,-(sp) 550 jsr _fputc 551 addq.l #4+INTSIZE,sp 552 movem.l (sp)+,d2/a2 553qui: 554 ENDC ; DEBUG 555 ENDM 556 557; ================== Here are the register vars we use, and deflate() itself: 558 559Window reg a2 ; cached address of window[] 560Prev reg a3 ; cached address of prev[] 561Strst reg d7 ; strstart cached as a longword 562Look reg d6 ; lookahead cached as short 563Head reg d5 ; local variable hash_head, short 564PrevL reg d4 ; prev_length cached as short 565MatchL reg d3 ; local variable match_length, unsigned short 566Avail reg d2 ; local variable available_match, bool 567PrevM reg a5 ; local variable prev_match, int in an A-reg 568 569DEFREGS reg d3-d7/a3/a5 570 571 572_deflate: ; first, setup steps common to deflate and deflate_fast: 573 movem.l DEFREGS,-(sp) 574 IFDEF INT16 575 moveq #0,Strst ; make sure strstart is valid as a long 576 ENDC 577 moveq #0,Head ; ditto for hash_head 578 MOVINT (_strstart,pc),Strst 579 move.w (lookahead,pc),Look 580 move.w (prev_length,pc),PrevL 581 BASEPTR _window,Window 582 BASEPTR _prev,Prev 583 MOVINT _level,d0 584 cmp.w #3,d0 585 ble deflate_fast 586 moveq #MIN_MATCH-1,MatchL 587 moveq #0,Avail 588 589look_loop: 590 tst.w Look 591 beq last_tally 592 IN_STR a0,d0 593 move.w MatchL,PrevL 594 move.w (match_start,pc),PrevM 595 move.w #MIN_MATCH-1,MatchL 596 597 tst.w Head 598 beq.s no_new_match 599 cmp.w (max_lazy_match,pc),PrevL 600 bhs.s no_new_match 601 move.w Strst,d0 602 sub.w Head,d0 603 cmp.w #MAX_DIST,d0 604 bhi.s no_new_match 605 move.w PrevL,prev_length ; longest_match reads these variables 606 MOVINT Strst,_strstart 607 MOVINT Head,d0 ; parm for longest_match 608 bsr longest_match ; sets match_start 609 cmp.w Look,d0 ; does length exceed valid data? 610 bls.s stml 611 move.w Look,d0 612stml: move.w d0,MatchL ; valid length of match 613 cmp.w #MIN_MATCH,MatchL ; is the match only three bytes? 614 bne.s no_new_match 615 move.w (match_start,pc),d0 616 sub.w Strst,d0 617 cmp.w #-TOO_FAR,d0 618 bge.s no_new_match 619 moveq #MIN_MATCH-1,MatchL ; mark the current match as no good 620 621no_new_match: 622 cmp.w #MIN_MATCH,PrevL 623 blo literal 624 cmp.w MatchL,PrevL 625 blo literal 626 ; CHECK_MATCH Strst-1,PrevM,PrevL 627 MOVINT Strst,_strstart ; ct_tally reads this variable 628 move.l PrevL,d0 629 subq.w #MIN_MATCH,d0 630 movem.l d2/a2,-(sp) 631 MOVINT d0,-(sp) 632 move.l Strst,d0 633 sub.w PrevM,d0 634 subq.w #1,d0 635 MOVINT d0,-(sp) 636 jsr _ct_tally ; sets d0 true if we have to flush 637 addq #2*INTSIZE,sp 638 movem.l (sp)+,d2/a2 639 subq.w #3,PrevL ; convert for dbra (prev_length - 2) 640 sub.w PrevL,Look 641 subq.w #2,Look 642insertmatch: 643 addq.w #1,Strst 644 IN_STR a0,d1 ; don't clobber d0 645 dbra PrevL,insertmatch 646 moveq #0,Avail 647 moveq #0,PrevL ; not needed? 648 moveq #MIN_MATCH-1,MatchL 649 addq.w #1,Strst 650 tst.w d0 651 beq refill 652 FLUSH_B 0 653 move.l Strst,_block_start 654 bra.s refill 655 656literal: 657 tst.w Avail 658 bne.s yeslit 659 moveq #1,Avail 660 bra.s skipliteral 661yeslit: TRACE_C <-1(Window,Strst.l)> 662 MOVINT Strst,_strstart ; ct_tally reads this variable 663 moveq #0,d0 664 move.b -1(Window,Strst.l),d0 665 movem.l d2/a2,-(sp) 666 MOVINT d0,-(sp) 667 CLRINT -(sp) 668 jsr _ct_tally 669 addq #2*INTSIZE,sp 670 movem.l (sp)+,d2/a2 671 tst.w d0 672 beq.s skipliteral 673 FLUSH_B 0 674 move.l Strst,_block_start 675skipliteral: 676 addq.w #1,Strst 677 subq.w #1,Look 678 679refill: 680 cmp.w #MIN_LOOKAHEAD,Look 681 bhs look_loop 682 bsr fill_window 683 bra look_loop 684 685last_tally: 686 tst.w Avail 687 beq last_flush 688 MOVINT Strst,_strstart ; ct_tally reads this variable 689 moveq #0,d0 690 move.b -1(Window,Strst.l),d0 691 movem.l d2/a2,-(sp) 692 MOVINT d0,-(sp) 693 CLRINT -(sp) 694 jsr _ct_tally 695 addq #2*INTSIZE,sp 696 movem.l (sp)+,d2/a2 697last_flush: 698 FLUSH_B 1 699 bra deflate_exit 700 701; ================== This is another version used for low compression levels: 702 703deflate_fast: 704 moveq #0,MatchL 705 moveq #MIN_MATCH-1,PrevL 706flook_loop: 707 tst.w Look 708 beq flast_flush 709 710 IN_STR a0,d0 711 tst.w Head 712 beq.s fno_new_match 713 move.w Strst,d0 714 sub.w Head,d0 715 cmp.w #MAX_DIST,d0 716 bhi.s fno_new_match 717 move.w PrevL,prev_length ; longest_match reads these variables 718 MOVINT Strst,_strstart 719 MOVINT Head,d0 ; parm for longest_match 720 bsr longest_match ; sets match_start 721 cmp.w Look,d0 ; does length exceed valid data? 722 bls.s fstml 723 move.w Look,d0 724fstml: move.w d0,MatchL ; valid length of match 725 726fno_new_match: 727 cmp.w #MIN_MATCH,MatchL 728 blo fliteral 729 ; CHECK_MATCH Strst,match_start,MatchL 730 MOVINT Strst,_strstart ; ct_tally reads this variable 731 move.l MatchL,d0 732 subq.w #MIN_MATCH,d0 733 movem.l d2/a2,-(sp) 734 MOVINT d0,-(sp) 735 move.l Strst,d0 736 sub.w (match_start,pc),d0 737 MOVINT d0,-(sp) 738 jsr _ct_tally ; sets d0 true if we have to flush 739 addq #2*INTSIZE,sp 740 movem.l (sp)+,d2/a2 741 sub.w MatchL,Look 742 cmp.w (max_lazy_match,pc),MatchL 743 bhi ftoolong 744 subq.w #2,MatchL 745finsertmatch: 746 addq.w #1,Strst 747 IN_STR a0,d1 ; preserve d0 748 dbra MatchL,finsertmatch 749 moveq #0,MatchL ; not needed? 750 addq.w #1,Strst 751 bra.s flushfill 752 753ftoolong: 754 add.w MatchL,Strst 755 moveq #0,MatchL 756 moveq #0,d1 ; preserve d0 757 move.b (Window,Strst.l),d1 758 move.w d1,ins_h 759; My assembler objects to passing <1(Window,Strst.l)> directly to UP_HASH... 760 move.b 1(Window,Strst.l),Avail ; Avail is not used in deflate_fast 761 UP_HASH d1,Avail ; preserve d0 762 IFNE MIN_MATCH-3 763 FAIL needs to UP_HASH another MIN_MATCH-3 times, but with what arg? 764 ENDC 765 bra.s flushfill 766 767fliteral: 768 TRACE_C <(Window,Strst.l)> 769 MOVINT Strst,_strstart ; ct_tally reads this variable 770 moveq #0,d0 771 move.b (Window,Strst.l),d0 772 movem.l d2/a2,-(sp) 773 MOVINT d0,-(sp) 774 CLRINT -(sp) 775 jsr _ct_tally ; d0 set if we need to flush 776 addq #2*INTSIZE,sp 777 movem.l (sp)+,d2/a2 778 addq.w #1,Strst 779 subq.w #1,Look 780 781flushfill: 782 tst.w d0 783 beq.s frefill 784 FLUSH_B 0 785 move.l Strst,_block_start 786frefill: 787 cmp.w #MIN_LOOKAHEAD,Look 788 bhs flook_loop 789 bsr fill_window 790 bra flook_loop 791 792flast_flush: 793 FLUSH_B 1 ; sets our return value 794 795deflate_exit: 796 MOVINT Strst,_strstart ; save back cached values 797 move.w PrevL,prev_length 798 move.w Look,lookahead 799 movem.l (sp)+,DEFREGS 800 rts 801 802 803; ========================================================================= 804; void fill_window(void) calls the input function to refill the sliding 805; window that we use to find substring matches in. 806 807More reg Head ; local variable in fill_window 808WindTop reg Prev ; local variable used for sliding 809SlidIx reg PrevL ; local variable used for sliding 810 811FWREGS reg d2-d5/a2-a6 ; does NOT include Look and Strst 812; all registers available to be clobbered by the sliding operation: 813; we exclude More, WindTop, SlidIx, Look, Strst, Window, a4 and a7. 814SPAREGS reg d0-d3/a0-a1/a5-a6 815SPCOUNT equ 8 ; number of registers in SPAREGS 816 817 818_fill_window: ; C-callable entry point 819 movem.l Look/Strst,-(sp) 820 IFDEF INT16 821 moveq #0,Strst ; Strst must be valid as a long 822 ENDC 823 MOVINT (_strstart,pc),Strst 824 move.w (lookahead,pc),Look 825 BASEPTR _window,Window 826 bsr.s fill_window 827 MOVINT Strst,_strstart 828 move.w Look,lookahead 829 movem.l (sp)+,Look/Strst 830 rts 831 832; strstart, lookahead, and window must be cached in Strst, Look, and Window: 833fill_window: ; asm-callable entry point 834 movem.l FWREGS,-(sp) 835 move.w (eofile,pc),d0 ; we put this up here for speed 836 bne fwdone 837 and.l #$FFFF,Look ; make sure Look is valid as long 838fw_refill: 839 move.l (_window_size,pc),More ; <= 64K 840 sub.l Look,More 841 sub.l Strst,More ; Strst is already valid as long 842 cmp.w #EOF,More 843 bne.s notboundary 844 subq.w #1,More 845 bra checkend 846 847notboundary: 848 move.w (sliding,pc),d0 849 beq checkend 850 cmp.w #WSIZE+MAX_DIST,Strst 851 blo checkend 852 IF (32768-WSIZE)>0 853 lea WSIZE(Window),WindTop ; WindTop is aligned when Window is 854 ELSE 855 move.l Window,WindTop 856 add.l #WSIZE,WindTop 857 ENDC 858 move.l Window,d0 859 and.w #3,d0 860 beq.s isaligned 861 subq.w #1,d0 862align: move.b (WindTop)+,(Window)+ ; copy up to a longword boundary 863 dbra d0,align 864isaligned: 865; This is faster than a simple move.l (WindTop)+,(Window)+ / dbra loop: 866 move.w #(WSIZE-1)/(4*SPCOUNT),SlidIx 867slide: movem.l (WindTop)+,SPAREGS ; copy, 32 bytes at a time! 868 movem.l SPAREGS,(Window) ; a slight overshoot doesn't matter. 869 lea 4*SPCOUNT(Window),Window ; can't use (aN)+ as movem.l dest 870 dbra SlidIx,slide 871 BASEPTR _window,Window ; restore cached value 872 sub.w #WSIZE,match_start 873 sub.w #WSIZE,Strst 874 sub.l #WSIZE,_block_start 875 add.w #WSIZE,More 876 BASEPTR _head,a0 877 move.w #HASH_SIZE-1,d0 878fixhead: 879 move.w (a0),d1 880 sub.w #WSIZE,d1 881 bpl.s headok 882 moveq #0,d1 883headok: move.w d1,(a0)+ 884 dbra d0,fixhead 885 BASEPTR _prev,a0 886 move.w #WSIZE-1,d0 887fixprev: 888 move.w (a0),d1 889 sub.w #WSIZE,d1 890 bpl.s prevok 891 moveq #0,d1 892prevok: move.w d1,(a0)+ 893 dbra d0,fixprev 894 TRACE_C #'.' 895 896 move _verbose+INTSIZE-2,d0 897 beq checkend 898 movem.l d2/a2,-(sp) 899 xref _print_period 900 jsr _print_period 901 movem.l (sp)+,d2/a2 902 903checkend: ; assert eofile is false 904 movem.l d2/a2,-(sp) 905 MOVINT More,-(sp) ; assert More's upper word is zero 906 move.l Strst,d0 907 add.w Look,d0 908 add.l Window,d0 909 move.l d0,-(sp) 910 move.l _read_buf,a0 911 jsr (a0) ; refill the upper part of the window 912 addq #4+INTSIZE,sp 913 movem.l (sp)+,d2/a2 914 tst.w d0 915 beq.s iseof 916 cmp.w #EOF,d0 917 beq.s iseof 918 add.w d0,Look 919 cmp.w #MIN_LOOKAHEAD,Look 920 blo fw_refill ; eofile is still false 921 922 bra.s fwdone 923iseof: move.w #1,eofile 924fwdone: movem.l (sp)+,FWREGS 925 rts 926 927 928; ========================================================================= 929; void lm_free(void) frees dynamic arrays in the DYN_ALLOC version. 930 931;;; xdef _lm_free ; the entry point 932 933_lm_free: 934 IFDEF DYN_ALLOC 935 move.l _window,d0 936 beq.s lf_no_window 937 movem.l d2/a2,-(sp) 938 move.l d0,-(sp) 939 jsr _free 940 addq #4,sp 941 movem.l (sp)+,d2/a2 942 clr.l _window 943lf_no_window: 944 move.l _prev,d0 945 beq.s lf_no_prev 946 movem.l d2/a2,-(sp) 947 move.l d0,-(sp) 948 jsr _free 949 move.l _head,(sp) ; reuse the same stack arg slot 950 jsr _free 951 addq #4,sp 952 movem.l (sp)+,d2/a2 953 clr.l _prev 954 clr.l _head 955lf_no_prev: 956 ENDC 957 rts 958 959; ============================================================================ 960; void lm_init(int pack_level, unsigned short *flags) allocates dynamic arrays 961; if any, and initializes all variables so that deflate() is ready to go. 962 963;;; xdef _lm_init ; the entry point 964 965Level reg d2 966;Window reg a2 ; as in deflate() 967 968_lm_init: 969 MOVINT 4(sp),d0 970 move.l 4+INTSIZE(sp),a0 971 move.w d0,Level 972 cmp.w #1,Level 973 blt.s levelerr 974 bgt.s try9 975 bset.b #B_FAST,1(a0) 976try9: cmp.w #9,Level 977 bgt.s levelerr 978 blt.s levelok 979 bset.b #B_SLOW,1(a0) 980 bra.s levelok 981levelerr: 982 pea (level_message,pc) 983 jsr _error ; never returns 984levelok: 985 clr.w sliding 986 move.l (_window_size,pc),d0 987 bne.s gotawindowsize 988 move.w #1,sliding 989 move.l #2*WSIZE,_window_size 990gotawindowsize: 991 992 BASEPTR _window,Window 993 IFDEF DYN_ALLOC 994 move.l Window,d0 ; fake tst.l 995 bne.s gotsomewind 996 CAL_SH WSIZE 997 move.l d0,Window 998 move.l d0,_window 999 bne.s gotsomewind 1000 pea (window_message,pc) 1001 bra error 1002gotsomewind: 1003 tst.l _prev 1004 bne.s gotsomehead 1005 CAL_SH WSIZE 1006 move.l d0,_prev 1007 beq.s nohead 1008 CAL_SH HASH_SIZE 1009 move.l d0,_head 1010 bne.s gotfreshhead ; newly calloc'd memory is zeroed 1011nohead: pea (hash_message,pc) 1012error: MOVINT #ZE_MEM,-(sp) 1013 jsr _ziperr ; never returns 1014gotsomehead: 1015 ENDC ; DYN_ALLOC 1016 1017 move.w #(HASH_SIZE/2)-1,d0 ; two shortwords per loop 1018 BASEPTR _head,a0 1019wipeh: clr.l (a0)+ 1020 dbra d0,wipeh 1021gotfreshhead: 1022 move.l Level,d0 1023 IFEQ Sizeof_config-8 1024 asl.l #3,d0 1025 ELSE 1026 mulu #Sizeof_config,d0 1027 ENDC 1028 lea (config_table,pc),a0 1029 add.l d0,a0 1030 move.w Max_lazy(a0),max_lazy_match 1031 move.w Good_length(a0),good_match 1032 move.w Nice_length(a0),nice_match 1033 move.w Max_chain(a0),max_chain_len 1034 CLRINT _strstart 1035 clr.l _block_start 1036 bsr match_init 1037 1038 clr.w eofile 1039 movem.l d2/a2,-(sp) 1040 MOVINT #WSIZE,-(sp) ; We read only 32K because lookahead is short 1041 move.l Window,-(sp) ; even when int size is long, as if deflate.c 1042 move.l _read_buf,a0 ; were compiled with MAXSEG_64K defined. 1043 jsr (a0) 1044 addq #4+INTSIZE,sp 1045 movem.l (sp)+,d2/a2 1046 move.w d0,lookahead 1047 beq.s noread 1048 cmp.w #EOF,d0 1049 bne.s irefill 1050noread: move.w #1,eofile 1051 clr.w lookahead 1052 bra.s init_done 1053 1054irefill: 1055 move.w (lookahead,pc),d0 1056 cmp.w #MIN_LOOKAHEAD,d0 1057 bhs.s hashify 1058 bsr _fill_window ; use the C-callable version 1059hashify: 1060 clr.w ins_h 1061 moveq #MIN_MATCH-2,d0 1062hash1: move.b (Window)+,d1 1063 UP_HASH Level,d1 1064 dbra d0,hash1 1065 1066init_done: 1067 rts 1068 1069; strings for error messages: 1070 IFDEF DYN_ALLOC 1071hash_message dc.b 'hash table allocation',0 1072window_message dc.b 'window allocation',0 1073 ENDC 1074level_message dc.b 'bad pack level',0 1075 1076 end 1077