1;===========================================================================
2; Copyright (c) 1990-1999 Info-ZIP.  All rights reserved.
3;
4; See the accompanying file LICENSE, version 1999-Oct-05 or later
5; (the contents of which are also included in zip.h) for terms of use.
6; If, for some reason, both of these files are missing, the Info-ZIP license
7; also may be found at:  ftp://ftp.cdrom.com/pub/infozip/license.html
8;===========================================================================
9; This is a 680x0 assembly language translation of the Info-ZIP source file
10; deflate.c, by Paul Kienitz.  No rights reserved.  The function longest_match
11; is based in part on match.a by Carsten Steger, which in turn is partly based
12; on match.s for 386 by Jean-loup Gailly and Kai Uwe Rommel.  Mostly, however,
13; this material is based on deflate.c, by Gailly, Rommel, and Igor Mandrichenko.
14; This code is not commented very much; see deflate.c for comments that explain
15; what the functions are doing.
16;
17; The symbols that can be used to select different versions are as follows:
18;
19;   CPU020     if defined, use 68020 instructions always.
20;
21;   CPUTEST    if defined, check at runtime for CPU type.  Another symbol
22;               specifying the platform-specific test must be used with this.
23;               If neither of these is defined, use 68000 instructions only.
24;               Runtime test is nonportable; it is different for each OS.
25;
26;   AMIGA      use Amiga-specific test for 68020, if CPUTEST defined.  Also
27;               tells it that registers d0/a0/d1/a1 are not preserved by
28;               function calls.  At present, if AMIGA is not defined, it
29;               causes functions to preserve all registers.  ALL OF THIS CODE
30;               CURRENTLY ASSUMES THAT REGISTERS D2-D7/A2-A6 WILL BE PRESERVED
31;               BY ANY FUNCTIONS THAT IT CALLS.
32;
33;   DYN_ALLOC  should be defined here if it is defined for C source; tells us
34;               that big arrays are allocated instead of static.
35;
36;   WSIZE      must be defined as the same number used for WSIZE in the C
37;               source, and must be a power of two <= 32768.  As elsewhere,
38;               the default value is 32768.
39;
40;   INT16      define this if ints are 16 bits; otherwise 32 bit ints assumed.
41;
42;   SMALL_MEM  define this if it is defined in the C source; otherwise it uses
43;               the MEDIUM_MEM model.  BIG_MEM and MMAP are *not* supported.
44;               The FULL_SEARCH option in deflate.c is also not supported.
45;
46;   DEBUG      activates some tracing output, as in the C source.
47;
48;   QUADLONG   this selects a different version of the innermost longest_match
49;               loop code for 68020 operations, comparing bytes four at a time
50;               instead of two at a time.  It seems to be a tiny bit faster on
51;               average, but it's slower often enough that one can't generalize.
52;
53; This code currently assumes that function results are returned in D0 for
54; all platforms.  It assumes that args to functions are pushed onto the stack,
55; last arg first.  It also currently assumes that all C symbols have an
56; underscore prepended when referenced from assembly.
57;
58; 1999/09/23: for Human68k: Modified by Shimazaki Ryo.
59
60    IFNDEF  CPU020
61     IFNDEF CPUTEST
62CPU000  equ     1
63     ENDC
64    ENDC
65
66; Use these macros for accessing variables of type int:
67    IFDEF   INT16
68MOVINT  MACRO   _1,_2
69        move.w  _1,_2
70        ENDM
71CLRINT  MACRO   _1
72        clr.w   _1
73        ENDM
74INTSIZE equ     2
75    ELSE    ; !INT16
76MOVINT  MACRO   _1,_2
77        move.l  _1,_2
78        ENDM
79CLRINT  MACRO   _1
80        clr.l   _1
81        ENDM
82INTSIZE equ     4
83    ENDC
84
85    IFDEF   DYN_ALLOC
86BASEPTR MACRO   _1,_2
87        move.l  _1,_2
88        ENDM
89    ELSE
90BASEPTR MACRO   _1,_2
91        lea     _1,_2
92        ENDM
93    ENDC
94
95; constants we use, many of them adjustable:
96
97MAX_MATCH       equ     258
98MIN_MATCH       equ     3
99TOO_FAR         equ     4096
100    IFNDEF  WSIZE
101WSIZE           equ     32768
102    ENDC
103WMASK           equ     WSIZE-1
104MAX_DIST        equ     WSIZE-MAX_MATCH-MIN_MATCH-1
105MIN_LOOKAHEAD   equ     MAX_MATCH+MIN_MATCH+1
106;    IFD     BIG_MEM      ; NOT supported -- type Pos needs to be 32 bits
107;HASH_BITS      equ     15
108;    ELSE
109    IFDEF  SMALL_MEM
110HASH_BITS       equ     13
111    ELSE
112HASH_BITS       equ     14      ; default -- MEDIUM_MEM
113    ENDC
114;    ENDC    ; BIG_MEM
115HASH_SIZE       equ     1<<HASH_BITS
116HASH_MASK       equ     HASH_SIZE-1
117H_SHIFT         equ     (HASH_BITS+MIN_MATCH-1)/MIN_MATCH
118B_SLOW          equ     1
119B_FAST          equ     2
120ZE_MEM          equ     4
121EOF             equ     -1
122
123; struct config is defined by these offsets:
124Good_length     equ     0
125Max_lazy        equ     2
126Nice_length     equ     4
127Max_chain       equ     6
128Sizeof_config   equ     8
129
130
131; external functions we call:
132        xref    _ct_tally       ; int ct_tally(int, int)
133        xref    _flush_block    ; unsigned long F(char *, unsigned long, int)
134        xref    _ziperr         ; void ziperr(int, char *)
135        xref    _error          ; void error(char *)
136        xref    _calloc         ; stdlib function: void *calloc(size_t, size_t)
137        xref    _free           ; stdlib function: void free(void *)
138    IFDEF   DEBUG
139        xref    _fputc          ; stdio function: int fputc(int, FILE *)
140        xref    _stderr         ; pointer to FILE, which we pass to fputc
141    ENDC
142
143; our entry points:
144        xdef    _lm_init        ; void lm_init(int level, unsigned short *flags)
145        xdef    _lm_free        ; void lm_free(void)
146        xdef    _deflate        ; void deflate(void)  ...the big one
147        xdef    _fill_window    ; this line is just for debugging
148
149
150; ============================================================================
151; Here is where we have our global variables.
152
153;;;     section deflatevars,data
154
155; external global variables we reference:
156        xref    _verbose        ; signed int
157        xref    _level          ; signed int
158        xref    _read_buf       ; int (*read_buf)(char *, unsigned int)
159
160; global variables we make available:
161
162        xdef    _window
163        xdef    _prev
164        xdef    _head
165        xdef    _window_size
166        xdef    _block_start
167        xdef    _strstart
168
169        bss
170        quad
171
172    IFDEF   DYN_ALLOC
173_prev:          ds.l    1       ; pointer to calloc()'d unsigned short array
174_head:          ds.l    1       ; pointer to calloc()'d unsigned short array
175_window:        ds.l    1       ; pointer to calloc()'d unsigned char array
176    ELSE    ; !DYN_ALLOC
177_prev:          ds.w    WSIZE           ; array of unsigned short
178_head:          ds.w    HASH_SIZE       ; array of unsigned short
179_window:        ds.b    2*WSIZE         ; array of unsigned char
180    ENDC    ; ?DYN_ALLOC
181
182        text
183        quad
184_window_size:   ds.l    1               ; unsigned long
185_block_start:   ds.l    1               ; unsigned long
186_strstart:      ds.w    INTSIZE/2       ; unsigned int
187
188; Now here are our private variables:
189
190    IFDEF   CPUTEST
191is020:          ds.w    1       ; bool: CPU type is '020 or higher
192    ENDC
193ins_h:          ds.w    1       ; unsigned short
194sliding:        ds.w    1       ; bool: the file is read a piece at a time
195eofile:         ds.w    1       ; bool: we have read in the end of the file
196max_lazy_match: ds.w    1       ; unsigned short
197lookahead:      ds.w    1       ; unsigned short
198
199; These are NOT DECLARED AS STATIC in deflate.c, but currently could be:
200max_chain_len:  ds.w    1       ; unsigned short (unsigned int in deflate.c)
201prev_length:    ds.w    1       ; unsigned short (unsigned int in deflate.c)
202good_match:     ds.w    1       ; unsigned short (unsigned int in deflate.c)
203nice_match:     ds.w    1       ; unsigned short (signed int in deflate.c)
204match_start:    ds.w    1       ; unsigned short (unsigned int in deflate.c)
205
206; This array of struct config is a constant and could be in the code section:
207config_table:   dc.w    0,0,0,0         ; level 0: store uncompressed
208                dc.w    4,4,8,4         ; level 1: fastest, loosest compression
209                dc.w    4,5,16,8        ; level 2
210                dc.w    4,6,32,32       ; level 3: highest to use deflate_fast
211                dc.w    4,4,16,16       ; level 4: lowest to use lazy matches
212                dc.w    8,16,32,32      ; level 5
213                dc.w    8,16,128,128    ; level 6: the default level
214                dc.w    8,32,128,256    ; level 7
215                dc.w    32,128,258,1024 ; level 8
216                dc.w    32,258,258,4096 ; level 9: maximum compression, slow
217
218
219;;CAL_SH  MACRO                   ; macro for calling zcalloc()
220;;     IFD    INT16
221;;        move.w  #2,-(sp)
222;;        move.w  #\1,-(sp)
223;;        jsr     _zcalloc
224;;        addq    #4,sp
225;;     ELSE
226;;        pea     2
227;;        pea     \1
228;;        jsr     _zcalloc
229;;        addq    #8,sp
230;;     ENDC
231;;        ENDM
232
233CAL_SH  MACRO   _1              ; Okay, we're back to using regular calloc()...
234        movem.l d2/a2,-(sp)
235        pea     2
236        pea     _1
237        jsr     _calloc
238        addq    #8,sp
239        movem.l (sp)+,d2/a2
240        ENDM
241
242; ============================================================================
243; And here we begin our functions.  match_init is for internal use only:
244
245;;      section deflate,code
246
247match_init:
248    IFDEF   CPUTEST             ; now check for platform type
249     IFDEF  AMIGA               ; Amiga specific test for '020 CPU:
250        xref    _SysBase
251        NOLIST
252        INCLUDE       'exec/execbase.i'
253        LIST
254
255        clr.w   is020                   ; default value is 68000
256        move.l  _SysBase,a0
257        btst    #AFB_68020,AttnFlags+1(a0)
258        beq.s   cheap
259        move.w  #1,is020
260cheap:
261     ELSE   ; !AMIGA
262
263        FAIL    Write an '020-detector for your system here!
264; On the Macintosh, I believe GetEnvironment() provides the information.
265
266     ENDC   ; AMIGA
267    ENDC    ; CPUTEST
268        rts     ; match_init consists only of rts if CPUTEST unset
269
270
271; ============================================================================
272; Here is longest_match(), the function that the rest of this was built up
273; from, the hottest hot spot in the program and therefore the most heavily
274; optimized.  It has two different versions, one for '020 and higher CPUs, and
275; one for 68000/68010.  It can test at runtime which version to use if you
276; create a test function in match_init for your platform.  Currently such a
277; test is implemented for the Amiga.  It can also be assembled to use '000 or
278; '020 code only.
279
280Cur_Match       reg     d0              ; unsigned int, kept valid as long
281Best_Len        reg     d1              ; unsigned int, kept valid as long
282Scan_Start      reg     d3              ; pair of bytes
283Scan_End        reg     d4              ; pair of bytes
284Limit           reg     d5              ; unsigned int
285Chain_Length    reg     d6              ; unsigned int
286Scan_Test       reg     d7              ; counter, pair of bytes sometimes
287Scan            reg     a0              ; pointer to unsigned char
288Match           reg     a1              ; pointer to unsigned char
289Prev_Address    reg     a2              ; pointer to unsigned short
290Scan_Ini        reg     a3              ; pointer to unsigned char
291Match_Ini       reg     a5              ; pointer to unsigned char
292; Note: "pair of bytes" means the two low order bytes of the register in
293; 68020 code, but means the lowest and third lowest bytes on the 68000.
294SAVEREGS        reg     d3-d7/a2/a3/a5      ; don't protect d0/d1/a0/a1
295; d2, a4, a6 not used... on Amiga, a4 is used by small-data memory model
296
297
298longest_match:
299        movem.l SAVEREGS,-(sp)
300
301; setup steps common to byte and word versions:
302    IFDEF   INT16
303        and.l   #$0000FFFF,Cur_Match    ; upper half must be zero!
304; we use an and.l down here for the sake of ATSIGN/REGARGS.
305        moveq   #0,Limit                ; so adding to Scan_Ini works
306    ENDC
307        move.w  (max_chain_len,pc),Chain_Length
308        move.w  (prev_length,pc),Best_Len
309        MOVINT  (_strstart,pc),Limit
310        BASEPTR _prev,Prev_Address
311        BASEPTR _window,Match_Ini
312        move.l  Match_Ini,Scan_Ini
313        addq    #MIN_MATCH,Match_Ini    ; optimizes inner loop
314        add.l   Limit,Scan_Ini
315        sub.w   #MAX_DIST,Limit
316        bhi.s   limit_ok
317        moveq   #0,Limit
318limit_ok:
319        cmp.w   (good_match,pc),Best_Len
320        blo.s   length_ok
321        lsr.w   #2,Chain_Length
322length_ok:
323        subq.w  #1,Chain_Length
324
325    IFDEF   CPUTEST
326        tst.w   is020                   ; can we use '020 stuff today?
327        bne     WORD_match
328    ENDC
329
330    IFNDEF  CPU020
331
332; for 68000 or 68010, use byte operations:
333        moveq   #0,Scan_Start           ; clear 2nd & 4th bytes, use 1st & 3rd
334        moveq   #0,Scan_End             ; likewise
335        moveq   #0,Scan_Test            ; likewise
336        move.b  (Scan_Ini),Scan_Start
337        swap    Scan_Start              ; swap is faster than 8 bit shift
338        move.b  1(Scan_Ini),Scan_Start
339        move.b  -1(Scan_Ini,Best_Len.w),Scan_End
340        swap    Scan_End
341        move.b  0(Scan_Ini,Best_Len.w),Scan_End
342        bra.s   bdo_scan
343
344blong_loop:
345        move.b  -1(Scan_Ini,Best_Len.w),Scan_End
346        swap    Scan_End
347        move.b  0(Scan_Ini,Best_Len.w),Scan_End
348
349bshort_loop:
350        add.w   Cur_Match,Cur_Match     ; assert value before doubling < 32K
351     IFNE   32768-WSIZE
352        and.w   #(WMASK*2),Cur_Match
353     ENDC
354        move.w  (Prev_Address,Cur_Match.l),Cur_Match
355        cmp.w   Limit,Cur_Match
356        dbls    Chain_Length,bdo_scan
357        bra     return
358
359bdo_scan:
360        move.l  Match_Ini,Match
361        add.l   Cur_Match,Match
362        move.b  -MIN_MATCH-1(Match,Best_Len.w),Scan_Test
363        swap    Scan_Test
364        move.b  -MIN_MATCH(Match,Best_Len.w),Scan_Test
365        cmp.l   Scan_Test,Scan_End
366        bne.s   bshort_loop
367        move.b  -MIN_MATCH(Match),Scan_Test
368        swap    Scan_Test
369        move.b  -MIN_MATCH+1(Match),Scan_Test
370        cmp.l   Scan_Test,Scan_Start
371        bne.s   bshort_loop
372        move.w  #(MAX_MATCH-3),Scan_Test
373        lea     MIN_MATCH(Scan_Ini),Scan        ; offset optimizes inner loop
374
375bscan_loop:
376        cmp.b   (Match)+,(Scan)+
377        dbne    Scan_Test,bscan_loop
378        subq    #1,Scan
379
380        sub.l   Scan_Ini,Scan           ; assert difference is 16 bits
381        cmp.w   Best_Len,Scan
382        bls.s   bshort_loop
383        MOVINT  Scan,Best_Len
384        move.w  Cur_Match,match_start
385        cmp.w   (nice_match,pc),Best_Len
386        blo.s   blong_loop
387     IFDEF  CPUTEST
388        bra     return
389     ENDC
390
391    ENDC    ; !CPU020
392
393    IFNDEF  CPU000
394;;;     MACHINE MC68020
395
396; for 68020 or higher, use word operations even on odd addresses:
397WORD_match:
398        move.w  (Scan_Ini),Scan_Start
399        move.w  -1(Scan_Ini,Best_Len.w),Scan_End
400        bra.s   wdo_scan
401
402wlong_loop:
403        move.w  -1(Scan_Ini,Best_Len.w),Scan_End
404
405wshort_loop:
406        and.w   #WMASK,Cur_Match
407        move.w  (Prev_Address,Cur_Match.w*2),Cur_Match  ; '020 addressing mode
408        cmp.w   Limit,Cur_Match
409        dbls    Chain_Length,wdo_scan
410        bra.s   return
411
412wdo_scan:
413        move.l  Match_Ini,Match
414        add.l   Cur_Match,Match
415        cmp.w   -MIN_MATCH-1(Match,Best_Len.w),Scan_End
416        bne.s   wshort_loop
417        cmp.w   -MIN_MATCH(Match),Scan_Start
418        bne.s   wshort_loop
419     IFDEF  QUADLONG
420; By some measurements, this version of the code is a little tiny bit faster.
421; But on some files it's slower.  It probably pays off only when there are
422; long match strings, and costs in the most common case of three-byte matches.
423        moveq   #((MAX_MATCH-MIN_MATCH)/16),Scan_Test     ; value = 15
424        lea     MIN_MATCH(Scan_Ini),Scan        ; offset optimizes inner loop
425
426wscan_loop:
427        cmp.l   (Match)+,(Scan)+                ; test four bytes at a time
428        bne.s   odd
429        cmp.l   (Match)+,(Scan)+
430        bne.s   odd
431        cmp.l   (Match)+,(Scan)+
432        bne.s   odd
433        cmp.l   (Match)+,(Scan)+
434        dbne    Scan_Test,wscan_loop            ; '020 can cache a bigger loop
435odd:
436        subq    #4,Scan
437        subq    #4,Match
438        cmp.b   (Match)+,(Scan)+        ; find good bytes in bad longword
439        bne.s   even
440        cmp.b   (Match)+,(Scan)+
441        bne.s   even
442        cmp.b   (Match)+,(Scan)+
443        beq.s   steven
444even:   subq    #1,Scan
445     ELSE   ; !QUADLONG
446        moveq   #((MAX_MATCH-MIN_MATCH)/2),Scan_Test    ; value = 127
447        lea     MIN_MATCH(Scan_Ini),Scan        ; offset optimizes inner loop
448
449wscan_loop:
450        cmp.w   (Match)+,(Scan)+
451        dbne    Scan_Test,wscan_loop
452        subq    #2,Scan
453        move.b  -2(Match),Scan_Test
454        cmp.b   (Scan),Scan_Test
455        bne.s   steven
456        addq    #1,Scan
457     ENDC   ; ?QUADLONG
458steven:
459        sub.l   Scan_Ini,Scan           ; assert: difference is 16 bits
460        cmp.w   Best_Len,Scan
461        bls.s   wshort_loop
462        MOVINT  Scan,Best_Len
463        move.w  Cur_Match,match_start
464        cmp.w   (nice_match,pc),Best_Len
465        blo.s   wlong_loop
466
467;;;     MACHINE MC68000
468    ENDC    ; !CPU000
469
470return:
471        MOVINT  Best_Len,d0         ; return value (upper half should be clear)
472        movem.l (sp)+,SAVEREGS
473        rts
474
475
476; =============================================================================
477; This is the deflate() function itself, our main entry point.  It calls
478; longest_match, above, and some outside functions.  It is a hot spot, but not
479; as hot as longest_match.  It uses no special '020 code.
480
481; ================== Several macros used in deflate() and later functions:
482
483; Arg 1 is D-reg that new ins_h value is to be left in,
484; arg 2 is the byte value to be hashed into it, which must not be the same reg
485UP_HASH MACRO   _1,_2
486        move.w  (ins_h,pc),_1
487        asl.w   #H_SHIFT,_1
488        eor.b   _2,_1
489        and.w   #HASH_MASK,_1           ; ((ins_h << H_SHIFT) ^ c) & HASH_MASK
490        move.w  _1,ins_h                ; ins_h = that
491        ENDM
492
493; Arg 1 is scratch A, arg 2 is scratch D
494IN_STR  MACRO   _1,_2
495        move.l  Strst,_2
496        addq.w  #MIN_MATCH-1,_2
497        move.b  (Window,_2.l),_2        ; window[strstart + MIN_MATCH - 1]
498        UP_HASH Head,_2
499        add.l   Head,Head               ; assert upper word is zero before add
500        BASEPTR _head,_1
501        add.l   Head,_1
502        move.w  (_1),Head               ; hash_head = head[ins_h]
503        move.w  Strst,(_1)              ; head[ins_h] = strstart
504        move.l  Strst,_2
505    IFNE    WSIZE-32768
506        and.w   #WMASK,_2
507    ENDC
508        add.w   _2,_2                   ; masks implicitly when WSIZE == 32768
509        move.w  Head,(Prev,_2.l)        ; prev[str_start & WMASK] = hash_head
510        ENDM
511
512; Arg 1 is bool (int) EOF flag, flush_block result is in d0, trashes d1/a0/a1
513FLUSH_B MACRO   _1
514        local   nenu,nun
515        movem.l d2/a2,-(sp)
516    IF      _1==0
517        CLRINT  -(sp)
518    ELSEIF (INTSIZE==4).and.(_1<$8000)
519        pea     (_1).w
520    ELSE
521        MOVINT  #_1,-(sp)
522    ENDC
523        move.l  (_block_start,pc),d0
524        blt.s   nenu
525        move.l  Window,a0
526        add.l   d0,a0
527        bra.s   nun
528nenu:   sub.l   a0,a0           ; if block_start < 0, push NULL
529nun:    sub.l   Strst,d0
530        neg.l   d0
531        move.l  d0,-(sp)
532        move.l  a0,-(sp)
533        jsr     _flush_block
534        lea     8+INTSIZE(sp),sp
535        movem.l (sp)+,d2/a2
536        ENDM
537
538; This expands to nothing unless DEBUG is defined.
539; Arg 1 is a byte to be trace-outputted -- if it is d0 it must be a valid int
540TRACE_C MACRO  _1
541        local  qui
542    IFDEF  DEBUG
543        cmp.w  #1,_verbose+INTSIZE-2    ; test lower word only
544        ble.s   qui
545        moveq   #0,d0
546        move.b  ea,d0
547        movem.l d2/a2,-(sp)
548        move.l  _stderr,-(sp)
549        MOVINT  d0,-(sp)
550        jsr     _fputc
551        addq.l  #4+INTSIZE,sp
552        movem.l (sp)+,d2/a2
553qui:
554    ENDC    ; DEBUG
555        ENDM
556
557; ================== Here are the register vars we use, and deflate() itself:
558
559Window  reg     a2              ; cached address of window[]
560Prev    reg     a3              ; cached address of prev[]
561Strst   reg     d7              ; strstart cached as a longword
562Look    reg     d6              ; lookahead cached as short
563Head    reg     d5              ; local variable hash_head, short
564PrevL   reg     d4              ; prev_length cached as short
565MatchL  reg     d3              ; local variable match_length, unsigned short
566Avail   reg     d2              ; local variable available_match, bool
567PrevM   reg     a5              ; local variable prev_match, int in an A-reg
568
569DEFREGS reg     d3-d7/a3/a5
570
571
572_deflate:           ; first, setup steps common to deflate and deflate_fast:
573        movem.l DEFREGS,-(sp)
574    IFDEF   INT16
575        moveq   #0,Strst                ; make sure strstart is valid as a long
576    ENDC
577        moveq   #0,Head                 ; ditto for hash_head
578        MOVINT  (_strstart,pc),Strst
579        move.w  (lookahead,pc),Look
580        move.w  (prev_length,pc),PrevL
581        BASEPTR _window,Window
582        BASEPTR _prev,Prev
583        MOVINT  _level,d0
584        cmp.w   #3,d0
585        ble     deflate_fast
586        moveq   #MIN_MATCH-1,MatchL
587        moveq   #0,Avail
588
589look_loop:
590        tst.w   Look
591        beq     last_tally
592        IN_STR  a0,d0
593        move.w  MatchL,PrevL
594        move.w  (match_start,pc),PrevM
595        move.w  #MIN_MATCH-1,MatchL
596
597        tst.w   Head
598        beq.s   no_new_match
599        cmp.w   (max_lazy_match,pc),PrevL
600        bhs.s   no_new_match
601        move.w  Strst,d0
602        sub.w   Head,d0
603        cmp.w   #MAX_DIST,d0
604        bhi.s   no_new_match
605        move.w  PrevL,prev_length       ; longest_match reads these variables
606        MOVINT  Strst,_strstart
607        MOVINT  Head,d0                 ; parm for longest_match
608        bsr     longest_match           ; sets match_start
609        cmp.w   Look,d0                 ; does length exceed valid data?
610        bls.s   stml
611        move.w  Look,d0
612stml:   move.w  d0,MatchL               ; valid length of match
613        cmp.w   #MIN_MATCH,MatchL       ; is the match only three bytes?
614        bne.s   no_new_match
615        move.w  (match_start,pc),d0
616        sub.w   Strst,d0
617        cmp.w   #-TOO_FAR,d0
618        bge.s   no_new_match
619        moveq   #MIN_MATCH-1,MatchL     ; mark the current match as no good
620
621no_new_match:
622        cmp.w   #MIN_MATCH,PrevL
623        blo     literal
624        cmp.w   MatchL,PrevL
625        blo     literal
626        ; CHECK_MATCH   Strst-1,PrevM,PrevL
627        MOVINT  Strst,_strstart         ; ct_tally reads this variable
628        move.l  PrevL,d0
629        subq.w  #MIN_MATCH,d0
630        movem.l d2/a2,-(sp)
631        MOVINT  d0,-(sp)
632        move.l  Strst,d0
633        sub.w   PrevM,d0
634        subq.w  #1,d0
635        MOVINT  d0,-(sp)
636        jsr     _ct_tally               ; sets d0 true if we have to flush
637        addq    #2*INTSIZE,sp
638        movem.l (sp)+,d2/a2
639        subq.w  #3,PrevL                ; convert for dbra (prev_length - 2)
640        sub.w   PrevL,Look
641        subq.w  #2,Look
642insertmatch:
643        addq.w  #1,Strst
644        IN_STR  a0,d1                   ; don't clobber d0
645        dbra    PrevL,insertmatch
646        moveq   #0,Avail
647        moveq   #0,PrevL                ; not needed?
648        moveq   #MIN_MATCH-1,MatchL
649        addq.w  #1,Strst
650        tst.w   d0
651        beq     refill
652        FLUSH_B 0
653        move.l  Strst,_block_start
654        bra.s   refill
655
656literal:
657        tst.w   Avail
658        bne.s   yeslit
659        moveq   #1,Avail
660        bra.s   skipliteral
661yeslit: TRACE_C <-1(Window,Strst.l)>
662        MOVINT  Strst,_strstart         ; ct_tally reads this variable
663        moveq   #0,d0
664        move.b  -1(Window,Strst.l),d0
665        movem.l d2/a2,-(sp)
666        MOVINT  d0,-(sp)
667        CLRINT  -(sp)
668        jsr     _ct_tally
669        addq    #2*INTSIZE,sp
670        movem.l (sp)+,d2/a2
671        tst.w   d0
672        beq.s   skipliteral
673        FLUSH_B 0
674        move.l  Strst,_block_start
675skipliteral:
676        addq.w  #1,Strst
677        subq.w  #1,Look
678
679refill:
680        cmp.w   #MIN_LOOKAHEAD,Look
681        bhs     look_loop
682        bsr     fill_window
683        bra     look_loop
684
685last_tally:
686        tst.w   Avail
687        beq     last_flush
688        MOVINT  Strst,_strstart         ; ct_tally reads this variable
689        moveq   #0,d0
690        move.b  -1(Window,Strst.l),d0
691        movem.l d2/a2,-(sp)
692        MOVINT  d0,-(sp)
693        CLRINT  -(sp)
694        jsr     _ct_tally
695        addq    #2*INTSIZE,sp
696        movem.l (sp)+,d2/a2
697last_flush:
698        FLUSH_B 1
699        bra     deflate_exit
700
701; ================== This is another version used for low compression levels:
702
703deflate_fast:
704        moveq   #0,MatchL
705        moveq   #MIN_MATCH-1,PrevL
706flook_loop:
707        tst.w   Look
708        beq     flast_flush
709
710        IN_STR  a0,d0
711        tst.w   Head
712        beq.s   fno_new_match
713        move.w  Strst,d0
714        sub.w   Head,d0
715        cmp.w   #MAX_DIST,d0
716        bhi.s   fno_new_match
717        move.w  PrevL,prev_length       ; longest_match reads these variables
718        MOVINT  Strst,_strstart
719        MOVINT  Head,d0                 ; parm for longest_match
720        bsr     longest_match           ; sets match_start
721        cmp.w   Look,d0                 ; does length exceed valid data?
722        bls.s   fstml
723        move.w  Look,d0
724fstml:  move.w  d0,MatchL               ; valid length of match
725
726fno_new_match:
727        cmp.w   #MIN_MATCH,MatchL
728        blo     fliteral
729        ; CHECK_MATCH   Strst,match_start,MatchL
730        MOVINT  Strst,_strstart         ; ct_tally reads this variable
731        move.l  MatchL,d0
732        subq.w  #MIN_MATCH,d0
733        movem.l d2/a2,-(sp)
734        MOVINT  d0,-(sp)
735        move.l  Strst,d0
736        sub.w   (match_start,pc),d0
737        MOVINT  d0,-(sp)
738        jsr     _ct_tally               ; sets d0 true if we have to flush
739        addq    #2*INTSIZE,sp
740        movem.l (sp)+,d2/a2
741        sub.w   MatchL,Look
742        cmp.w   (max_lazy_match,pc),MatchL
743        bhi     ftoolong
744        subq.w  #2,MatchL
745finsertmatch:
746        addq.w  #1,Strst
747        IN_STR  a0,d1                   ; preserve d0
748        dbra    MatchL,finsertmatch
749        moveq   #0,MatchL               ; not needed?
750        addq.w  #1,Strst
751        bra.s   flushfill
752
753ftoolong:
754        add.w   MatchL,Strst
755        moveq   #0,MatchL
756        moveq   #0,d1                   ; preserve d0
757        move.b  (Window,Strst.l),d1
758        move.w  d1,ins_h
759; My assembler objects to passing <1(Window,Strst.l)> directly to UP_HASH...
760        move.b  1(Window,Strst.l),Avail ; Avail is not used in deflate_fast
761        UP_HASH d1,Avail                ; preserve d0
762    IFNE    MIN_MATCH-3
763        FAIL  needs to UP_HASH another MIN_MATCH-3 times, but with what arg?
764    ENDC
765        bra.s   flushfill
766
767fliteral:
768        TRACE_C <(Window,Strst.l)>
769        MOVINT  Strst,_strstart         ; ct_tally reads this variable
770        moveq   #0,d0
771        move.b  (Window,Strst.l),d0
772        movem.l d2/a2,-(sp)
773        MOVINT  d0,-(sp)
774        CLRINT  -(sp)
775        jsr     _ct_tally               ; d0 set if we need to flush
776        addq    #2*INTSIZE,sp
777        movem.l (sp)+,d2/a2
778        addq.w  #1,Strst
779        subq.w  #1,Look
780
781flushfill:
782        tst.w   d0
783        beq.s   frefill
784        FLUSH_B 0
785        move.l  Strst,_block_start
786frefill:
787        cmp.w   #MIN_LOOKAHEAD,Look
788        bhs     flook_loop
789        bsr     fill_window
790        bra     flook_loop
791
792flast_flush:
793        FLUSH_B 1                       ; sets our return value
794
795deflate_exit:
796        MOVINT  Strst,_strstart         ; save back cached values
797        move.w  PrevL,prev_length
798        move.w  Look,lookahead
799        movem.l (sp)+,DEFREGS
800        rts
801
802
803; =========================================================================
804; void fill_window(void) calls the input function to refill the sliding
805; window that we use to find substring matches in.
806
807More    reg     Head                    ; local variable in fill_window
808WindTop reg     Prev                    ; local variable used for sliding
809SlidIx  reg     PrevL                   ; local variable used for sliding
810
811FWREGS  reg     d2-d5/a2-a6             ; does NOT include Look and Strst
812; all registers available to be clobbered by the sliding operation:
813; we exclude More, WindTop, SlidIx, Look, Strst, Window, a4 and a7.
814SPAREGS reg     d0-d3/a0-a1/a5-a6
815SPCOUNT equ     8                       ; number of registers in SPAREGS
816
817
818_fill_window:                           ; C-callable entry point
819        movem.l Look/Strst,-(sp)
820    IFDEF   INT16
821        moveq   #0,Strst                ; Strst must be valid as a long
822    ENDC
823        MOVINT  (_strstart,pc),Strst
824        move.w  (lookahead,pc),Look
825        BASEPTR _window,Window
826        bsr.s   fill_window
827        MOVINT  Strst,_strstart
828        move.w  Look,lookahead
829        movem.l (sp)+,Look/Strst
830        rts
831
832; strstart, lookahead, and window must be cached in Strst, Look, and Window:
833fill_window:                            ; asm-callable entry point
834        movem.l FWREGS,-(sp)
835        move.w  (eofile,pc),d0          ; we put this up here for speed
836        bne     fwdone
837        and.l   #$FFFF,Look             ; make sure Look is valid as long
838fw_refill:
839        move.l  (_window_size,pc),More  ; <= 64K
840        sub.l   Look,More
841        sub.l   Strst,More              ; Strst is already valid as long
842        cmp.w   #EOF,More
843        bne.s   notboundary
844        subq.w  #1,More
845        bra     checkend
846
847notboundary:
848        move.w  (sliding,pc),d0
849        beq     checkend
850        cmp.w   #WSIZE+MAX_DIST,Strst
851        blo     checkend
852    IF      (32768-WSIZE)>0
853        lea     WSIZE(Window),WindTop   ; WindTop is aligned when Window is
854    ELSE
855        move.l  Window,WindTop
856        add.l   #WSIZE,WindTop
857    ENDC
858        move.l  Window,d0
859        and.w   #3,d0
860        beq.s   isaligned
861        subq.w  #1,d0
862align:  move.b  (WindTop)+,(Window)+    ; copy up to a longword boundary
863        dbra    d0,align
864isaligned:
865; This is faster than a simple move.l (WindTop)+,(Window)+ / dbra loop:
866        move.w  #(WSIZE-1)/(4*SPCOUNT),SlidIx
867slide:  movem.l (WindTop)+,SPAREGS      ; copy, 32 bytes at a time!
868        movem.l SPAREGS,(Window)        ; a slight overshoot doesn't matter.
869        lea     4*SPCOUNT(Window),Window        ; can't use (aN)+ as movem.l dest
870        dbra    SlidIx,slide
871        BASEPTR _window,Window                  ; restore cached value
872        sub.w   #WSIZE,match_start
873        sub.w   #WSIZE,Strst
874        sub.l   #WSIZE,_block_start
875        add.w   #WSIZE,More
876        BASEPTR _head,a0
877        move.w  #HASH_SIZE-1,d0
878fixhead:
879        move.w  (a0),d1
880        sub.w   #WSIZE,d1
881        bpl.s   headok
882        moveq   #0,d1
883headok: move.w  d1,(a0)+
884        dbra    d0,fixhead
885        BASEPTR _prev,a0
886        move.w  #WSIZE-1,d0
887fixprev:
888        move.w  (a0),d1
889        sub.w   #WSIZE,d1
890        bpl.s   prevok
891        moveq   #0,d1
892prevok: move.w  d1,(a0)+
893        dbra    d0,fixprev
894        TRACE_C #'.'
895
896        move    _verbose+INTSIZE-2,d0
897        beq     checkend
898        movem.l d2/a2,-(sp)
899        xref    _print_period
900        jsr     _print_period
901        movem.l (sp)+,d2/a2
902
903checkend:                               ; assert eofile is false
904        movem.l d2/a2,-(sp)
905        MOVINT  More,-(sp)              ; assert More's upper word is zero
906        move.l  Strst,d0
907        add.w   Look,d0
908        add.l   Window,d0
909        move.l  d0,-(sp)
910        move.l  _read_buf,a0
911        jsr     (a0)                    ; refill the upper part of the window
912        addq    #4+INTSIZE,sp
913        movem.l (sp)+,d2/a2
914        tst.w   d0
915        beq.s   iseof
916        cmp.w   #EOF,d0
917        beq.s   iseof
918        add.w   d0,Look
919        cmp.w   #MIN_LOOKAHEAD,Look
920        blo     fw_refill               ; eofile is still false
921
922        bra.s   fwdone
923iseof:  move.w  #1,eofile
924fwdone: movem.l (sp)+,FWREGS
925        rts
926
927
928; =========================================================================
929; void lm_free(void) frees dynamic arrays in the DYN_ALLOC version.
930
931;;;     xdef    _lm_free                ; the entry point
932
933_lm_free:
934    IFDEF   DYN_ALLOC
935        move.l  _window,d0
936        beq.s   lf_no_window
937        movem.l d2/a2,-(sp)
938        move.l  d0,-(sp)
939        jsr     _free
940        addq    #4,sp
941        movem.l (sp)+,d2/a2
942        clr.l   _window
943lf_no_window:
944        move.l  _prev,d0
945        beq.s   lf_no_prev
946        movem.l d2/a2,-(sp)
947        move.l  d0,-(sp)
948        jsr     _free
949        move.l  _head,(sp)              ; reuse the same stack arg slot
950        jsr     _free
951        addq    #4,sp
952        movem.l (sp)+,d2/a2
953        clr.l   _prev
954        clr.l   _head
955lf_no_prev:
956    ENDC
957        rts
958
959; ============================================================================
960; void lm_init(int pack_level, unsigned short *flags) allocates dynamic arrays
961; if any, and initializes all variables so that deflate() is ready to go.
962
963;;;     xdef    _lm_init                ; the entry point
964
965Level   reg     d2
966;Window reg     a2      ; as in deflate()
967
968_lm_init:
969        MOVINT  4(sp),d0
970        move.l  4+INTSIZE(sp),a0
971        move.w  d0,Level
972        cmp.w   #1,Level
973        blt.s   levelerr
974        bgt.s   try9
975        bset.b  #B_FAST,1(a0)
976try9:   cmp.w   #9,Level
977        bgt.s   levelerr
978        blt.s   levelok
979        bset.b  #B_SLOW,1(a0)
980        bra.s   levelok
981levelerr:
982        pea     (level_message,pc)
983        jsr     _error                  ; never returns
984levelok:
985        clr.w   sliding
986        move.l  (_window_size,pc),d0
987        bne.s   gotawindowsize
988        move.w  #1,sliding
989        move.l  #2*WSIZE,_window_size
990gotawindowsize:
991
992        BASEPTR _window,Window
993    IFDEF   DYN_ALLOC
994        move.l  Window,d0               ; fake tst.l
995        bne.s   gotsomewind
996        CAL_SH  WSIZE
997        move.l  d0,Window
998        move.l  d0,_window
999        bne.s   gotsomewind
1000        pea     (window_message,pc)
1001        bra     error
1002gotsomewind:
1003        tst.l   _prev
1004        bne.s   gotsomehead
1005        CAL_SH  WSIZE
1006        move.l  d0,_prev
1007        beq.s   nohead
1008        CAL_SH  HASH_SIZE
1009        move.l  d0,_head
1010        bne.s   gotfreshhead            ; newly calloc'd memory is zeroed
1011nohead: pea     (hash_message,pc)
1012error:  MOVINT  #ZE_MEM,-(sp)
1013        jsr     _ziperr                 ; never returns
1014gotsomehead:
1015    ENDC    ; DYN_ALLOC
1016
1017        move.w  #(HASH_SIZE/2)-1,d0     ; two shortwords per loop
1018        BASEPTR _head,a0
1019wipeh:  clr.l   (a0)+
1020        dbra    d0,wipeh
1021gotfreshhead:
1022        move.l  Level,d0
1023    IFEQ    Sizeof_config-8
1024        asl.l   #3,d0
1025    ELSE
1026        mulu    #Sizeof_config,d0
1027    ENDC
1028        lea     (config_table,pc),a0
1029        add.l   d0,a0
1030        move.w  Max_lazy(a0),max_lazy_match
1031        move.w  Good_length(a0),good_match
1032        move.w  Nice_length(a0),nice_match
1033        move.w  Max_chain(a0),max_chain_len
1034        CLRINT  _strstart
1035        clr.l   _block_start
1036        bsr     match_init
1037
1038        clr.w   eofile
1039        movem.l d2/a2,-(sp)
1040        MOVINT  #WSIZE,-(sp)    ; We read only 32K because lookahead is short
1041        move.l  Window,-(sp)    ; even when int size is long, as if deflate.c
1042        move.l  _read_buf,a0    ; were compiled with MAXSEG_64K defined.
1043        jsr     (a0)
1044        addq    #4+INTSIZE,sp
1045        movem.l (sp)+,d2/a2
1046        move.w  d0,lookahead
1047        beq.s   noread
1048        cmp.w   #EOF,d0
1049        bne.s   irefill
1050noread: move.w  #1,eofile
1051        clr.w   lookahead
1052        bra.s   init_done
1053
1054irefill:
1055        move.w  (lookahead,pc),d0
1056        cmp.w   #MIN_LOOKAHEAD,d0
1057        bhs.s   hashify
1058        bsr     _fill_window            ; use the C-callable version
1059hashify:
1060        clr.w   ins_h
1061        moveq   #MIN_MATCH-2,d0
1062hash1:  move.b  (Window)+,d1
1063        UP_HASH Level,d1
1064        dbra    d0,hash1
1065
1066init_done:
1067        rts
1068
1069; strings for error messages:
1070    IFDEF   DYN_ALLOC
1071hash_message    dc.b    'hash table allocation',0
1072window_message  dc.b    'window allocation',0
1073    ENDC
1074level_message   dc.b    'bad pack level',0
1075
1076        end
1077