1*5ba6b03cSchristos;/* inffas32.asm is a hand tuned assembler version of inffast.c -- fast decoding
2*5ba6b03cSchristos; *
3*5ba6b03cSchristos; * inffas32.asm is derivated from inffas86.c, with translation of assembly code
4*5ba6b03cSchristos; *
5*5ba6b03cSchristos; * Copyright (C) 1995-2003 Mark Adler
6*5ba6b03cSchristos; * For conditions of distribution and use, see copyright notice in zlib.h
7*5ba6b03cSchristos; *
8*5ba6b03cSchristos; * Copyright (C) 2003 Chris Anderson <christop@charm.net>
9*5ba6b03cSchristos; * Please use the copyright conditions above.
10*5ba6b03cSchristos; *
11*5ba6b03cSchristos; * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from
12*5ba6b03cSchristos; * the gcc -S output of zlib-1.2.0/inffast.c.  Zlib-1.2.0 is in beta release at
13*5ba6b03cSchristos; * the moment.  I have successfully compiled and tested this code with gcc2.96,
14*5ba6b03cSchristos; * gcc3.2, icc5.0, msvc6.0.  It is very close to the speed of inffast.S
15*5ba6b03cSchristos; * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX
16*5ba6b03cSchristos; * enabled.  I will attempt to merge the MMX code into this version.  Newer
17*5ba6b03cSchristos; * versions of this and inffast.S can be found at
18*5ba6b03cSchristos; * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/
19*5ba6b03cSchristos; *
20*5ba6b03cSchristos; * 2005 : modification by Gilles Vollant
21*5ba6b03cSchristos; */
22*5ba6b03cSchristos; For Visual C++ 4.x and higher and ML 6.x and higher
23*5ba6b03cSchristos;   ml.exe is in directory \MASM611C of Win95 DDK
24*5ba6b03cSchristos;   ml.exe is also distributed in http://www.masm32.com/masmdl.htm
25*5ba6b03cSchristos;    and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/
26*5ba6b03cSchristos;
27*5ba6b03cSchristos;
28*5ba6b03cSchristos;   compile with command line option
29*5ba6b03cSchristos;   ml  /coff /Zi /c /Flinffas32.lst inffas32.asm
30*5ba6b03cSchristos
31*5ba6b03cSchristos;   if you define NO_GZIP (see inflate.h), compile with
32*5ba6b03cSchristos;   ml  /coff /Zi /c /Flinffas32.lst /DNO_GUNZIP inffas32.asm
33*5ba6b03cSchristos
34*5ba6b03cSchristos
35*5ba6b03cSchristos; zlib122sup is 0 fort zlib 1.2.2.1 and lower
36*5ba6b03cSchristos; zlib122sup is 8 fort zlib 1.2.2.2 and more (with addition of dmax and head
37*5ba6b03cSchristos;        in inflate_state in inflate.h)
38*5ba6b03cSchristoszlib1222sup      equ    8
39*5ba6b03cSchristos
40*5ba6b03cSchristos
41*5ba6b03cSchristosIFDEF GUNZIP
42*5ba6b03cSchristos  INFLATE_MODE_TYPE    equ 11
43*5ba6b03cSchristos  INFLATE_MODE_BAD     equ 26
44*5ba6b03cSchristosELSE
45*5ba6b03cSchristos  IFNDEF NO_GUNZIP
46*5ba6b03cSchristos    INFLATE_MODE_TYPE    equ 11
47*5ba6b03cSchristos    INFLATE_MODE_BAD     equ 26
48*5ba6b03cSchristos  ELSE
49*5ba6b03cSchristos    INFLATE_MODE_TYPE    equ 3
50*5ba6b03cSchristos    INFLATE_MODE_BAD     equ 17
51*5ba6b03cSchristos  ENDIF
52*5ba6b03cSchristosENDIF
53*5ba6b03cSchristos
54*5ba6b03cSchristos
55*5ba6b03cSchristos; 75 "inffast.S"
56*5ba6b03cSchristos;FILE "inffast.S"
57*5ba6b03cSchristos
58*5ba6b03cSchristos;;;GLOBAL _inflate_fast
59*5ba6b03cSchristos
60*5ba6b03cSchristos;;;SECTION .text
61*5ba6b03cSchristos
62*5ba6b03cSchristos
63*5ba6b03cSchristos
64*5ba6b03cSchristos	.586p
65*5ba6b03cSchristos	.mmx
66*5ba6b03cSchristos
67*5ba6b03cSchristos	name	inflate_fast_x86
68*5ba6b03cSchristos	.MODEL	FLAT
69*5ba6b03cSchristos
70*5ba6b03cSchristos_DATA			segment
71*5ba6b03cSchristosinflate_fast_use_mmx:
72*5ba6b03cSchristos	dd	1
73*5ba6b03cSchristos
74*5ba6b03cSchristos
75*5ba6b03cSchristos_TEXT			segment
76*5ba6b03cSchristos
77*5ba6b03cSchristos
78*5ba6b03cSchristos
79*5ba6b03cSchristosALIGN 4
80*5ba6b03cSchristos	db	'Fast decoding Code from Chris Anderson'
81*5ba6b03cSchristos	db	0
82*5ba6b03cSchristos
83*5ba6b03cSchristosALIGN 4
84*5ba6b03cSchristosinvalid_literal_length_code_msg:
85*5ba6b03cSchristos	db	'invalid literal/length code'
86*5ba6b03cSchristos	db	0
87*5ba6b03cSchristos
88*5ba6b03cSchristosALIGN 4
89*5ba6b03cSchristosinvalid_distance_code_msg:
90*5ba6b03cSchristos	db	'invalid distance code'
91*5ba6b03cSchristos	db	0
92*5ba6b03cSchristos
93*5ba6b03cSchristosALIGN 4
94*5ba6b03cSchristosinvalid_distance_too_far_msg:
95*5ba6b03cSchristos	db	'invalid distance too far back'
96*5ba6b03cSchristos	db	0
97*5ba6b03cSchristos
98*5ba6b03cSchristos
99*5ba6b03cSchristosALIGN 4
100*5ba6b03cSchristosinflate_fast_mask:
101*5ba6b03cSchristosdd	0
102*5ba6b03cSchristosdd	1
103*5ba6b03cSchristosdd	3
104*5ba6b03cSchristosdd	7
105*5ba6b03cSchristosdd	15
106*5ba6b03cSchristosdd	31
107*5ba6b03cSchristosdd	63
108*5ba6b03cSchristosdd	127
109*5ba6b03cSchristosdd	255
110*5ba6b03cSchristosdd	511
111*5ba6b03cSchristosdd	1023
112*5ba6b03cSchristosdd	2047
113*5ba6b03cSchristosdd	4095
114*5ba6b03cSchristosdd	8191
115*5ba6b03cSchristosdd	16383
116*5ba6b03cSchristosdd	32767
117*5ba6b03cSchristosdd	65535
118*5ba6b03cSchristosdd	131071
119*5ba6b03cSchristosdd	262143
120*5ba6b03cSchristosdd	524287
121*5ba6b03cSchristosdd	1048575
122*5ba6b03cSchristosdd	2097151
123*5ba6b03cSchristosdd	4194303
124*5ba6b03cSchristosdd	8388607
125*5ba6b03cSchristosdd	16777215
126*5ba6b03cSchristosdd	33554431
127*5ba6b03cSchristosdd	67108863
128*5ba6b03cSchristosdd	134217727
129*5ba6b03cSchristosdd	268435455
130*5ba6b03cSchristosdd	536870911
131*5ba6b03cSchristosdd	1073741823
132*5ba6b03cSchristosdd	2147483647
133*5ba6b03cSchristosdd	4294967295
134*5ba6b03cSchristos
135*5ba6b03cSchristos
136*5ba6b03cSchristosmode_state	 equ	0	;/* state->mode	*/
137*5ba6b03cSchristoswsize_state	 equ	(32+zlib1222sup)	;/* state->wsize */
138*5ba6b03cSchristoswrite_state	 equ	(36+4+zlib1222sup)	;/* state->write */
139*5ba6b03cSchristoswindow_state	 equ	(40+4+zlib1222sup)	;/* state->window */
140*5ba6b03cSchristoshold_state	 equ	(44+4+zlib1222sup)	;/* state->hold	*/
141*5ba6b03cSchristosbits_state	 equ	(48+4+zlib1222sup)	;/* state->bits	*/
142*5ba6b03cSchristoslencode_state	 equ	(64+4+zlib1222sup)	;/* state->lencode */
143*5ba6b03cSchristosdistcode_state	 equ	(68+4+zlib1222sup)	;/* state->distcode */
144*5ba6b03cSchristoslenbits_state	 equ	(72+4+zlib1222sup)	;/* state->lenbits */
145*5ba6b03cSchristosdistbits_state	 equ	(76+4+zlib1222sup)	;/* state->distbits */
146*5ba6b03cSchristos
147*5ba6b03cSchristos
148*5ba6b03cSchristos;;SECTION .text
149*5ba6b03cSchristos; 205 "inffast.S"
150*5ba6b03cSchristos;GLOBAL	inflate_fast_use_mmx
151*5ba6b03cSchristos
152*5ba6b03cSchristos;SECTION .data
153*5ba6b03cSchristos
154*5ba6b03cSchristos
155*5ba6b03cSchristos; GLOBAL inflate_fast_use_mmx:object
156*5ba6b03cSchristos;.size inflate_fast_use_mmx, 4
157*5ba6b03cSchristos; 226 "inffast.S"
158*5ba6b03cSchristos;SECTION .text
159*5ba6b03cSchristos
160*5ba6b03cSchristosALIGN 4
161*5ba6b03cSchristos_inflate_fast proc near
162*5ba6b03cSchristos.FPO (16, 4, 0, 0, 1, 0)
163*5ba6b03cSchristos	push  edi
164*5ba6b03cSchristos	push  esi
165*5ba6b03cSchristos	push  ebp
166*5ba6b03cSchristos	push  ebx
167*5ba6b03cSchristos	pushfd
168*5ba6b03cSchristos	sub  esp,64
169*5ba6b03cSchristos	cld
170*5ba6b03cSchristos
171*5ba6b03cSchristos
172*5ba6b03cSchristos
173*5ba6b03cSchristos
174*5ba6b03cSchristos	mov  esi, [esp+88]
175*5ba6b03cSchristos	mov  edi, [esi+28]
176*5ba6b03cSchristos
177*5ba6b03cSchristos
178*5ba6b03cSchristos
179*5ba6b03cSchristos
180*5ba6b03cSchristos
181*5ba6b03cSchristos
182*5ba6b03cSchristos
183*5ba6b03cSchristos	mov  edx, [esi+4]
184*5ba6b03cSchristos	mov  eax, [esi+0]
185*5ba6b03cSchristos
186*5ba6b03cSchristos	add  edx,eax
187*5ba6b03cSchristos	sub  edx,11
188*5ba6b03cSchristos
189*5ba6b03cSchristos	mov  [esp+44],eax
190*5ba6b03cSchristos	mov  [esp+20],edx
191*5ba6b03cSchristos
192*5ba6b03cSchristos	mov  ebp, [esp+92]
193*5ba6b03cSchristos	mov  ecx, [esi+16]
194*5ba6b03cSchristos	mov  ebx, [esi+12]
195*5ba6b03cSchristos
196*5ba6b03cSchristos	sub  ebp,ecx
197*5ba6b03cSchristos	neg  ebp
198*5ba6b03cSchristos	add  ebp,ebx
199*5ba6b03cSchristos
200*5ba6b03cSchristos	sub  ecx,257
201*5ba6b03cSchristos	add  ecx,ebx
202*5ba6b03cSchristos
203*5ba6b03cSchristos	mov  [esp+60],ebx
204*5ba6b03cSchristos	mov  [esp+40],ebp
205*5ba6b03cSchristos	mov  [esp+16],ecx
206*5ba6b03cSchristos; 285 "inffast.S"
207*5ba6b03cSchristos	mov  eax, [edi+lencode_state]
208*5ba6b03cSchristos	mov  ecx, [edi+distcode_state]
209*5ba6b03cSchristos
210*5ba6b03cSchristos	mov  [esp+8],eax
211*5ba6b03cSchristos	mov  [esp+12],ecx
212*5ba6b03cSchristos
213*5ba6b03cSchristos	mov  eax,1
214*5ba6b03cSchristos	mov  ecx, [edi+lenbits_state]
215*5ba6b03cSchristos	shl  eax,cl
216*5ba6b03cSchristos	dec  eax
217*5ba6b03cSchristos	mov  [esp+0],eax
218*5ba6b03cSchristos
219*5ba6b03cSchristos	mov  eax,1
220*5ba6b03cSchristos	mov  ecx, [edi+distbits_state]
221*5ba6b03cSchristos	shl  eax,cl
222*5ba6b03cSchristos	dec  eax
223*5ba6b03cSchristos	mov  [esp+4],eax
224*5ba6b03cSchristos
225*5ba6b03cSchristos	mov  eax, [edi+wsize_state]
226*5ba6b03cSchristos	mov  ecx, [edi+write_state]
227*5ba6b03cSchristos	mov  edx, [edi+window_state]
228*5ba6b03cSchristos
229*5ba6b03cSchristos	mov  [esp+52],eax
230*5ba6b03cSchristos	mov  [esp+48],ecx
231*5ba6b03cSchristos	mov  [esp+56],edx
232*5ba6b03cSchristos
233*5ba6b03cSchristos	mov  ebp, [edi+hold_state]
234*5ba6b03cSchristos	mov  ebx, [edi+bits_state]
235*5ba6b03cSchristos; 321 "inffast.S"
236*5ba6b03cSchristos	mov  esi, [esp+44]
237*5ba6b03cSchristos	mov  ecx, [esp+20]
238*5ba6b03cSchristos	cmp  ecx,esi
239*5ba6b03cSchristos	ja   L_align_long
240*5ba6b03cSchristos
241*5ba6b03cSchristos	add  ecx,11
242*5ba6b03cSchristos	sub  ecx,esi
243*5ba6b03cSchristos	mov  eax,12
244*5ba6b03cSchristos	sub  eax,ecx
245*5ba6b03cSchristos	lea  edi, [esp+28]
246*5ba6b03cSchristos	rep movsb
247*5ba6b03cSchristos	mov  ecx,eax
248*5ba6b03cSchristos	xor  eax,eax
249*5ba6b03cSchristos	rep stosb
250*5ba6b03cSchristos	lea  esi, [esp+28]
251*5ba6b03cSchristos	mov  [esp+20],esi
252*5ba6b03cSchristos	jmp  L_is_aligned
253*5ba6b03cSchristos
254*5ba6b03cSchristos
255*5ba6b03cSchristosL_align_long:
256*5ba6b03cSchristos	test  esi,3
257*5ba6b03cSchristos	jz   L_is_aligned
258*5ba6b03cSchristos	xor  eax,eax
259*5ba6b03cSchristos	mov  al, [esi]
260*5ba6b03cSchristos	inc  esi
261*5ba6b03cSchristos	mov  ecx,ebx
262*5ba6b03cSchristos	add  ebx,8
263*5ba6b03cSchristos	shl  eax,cl
264*5ba6b03cSchristos	or  ebp,eax
265*5ba6b03cSchristos	jmp L_align_long
266*5ba6b03cSchristos
267*5ba6b03cSchristosL_is_aligned:
268*5ba6b03cSchristos	mov  edi, [esp+60]
269*5ba6b03cSchristos; 366 "inffast.S"
270*5ba6b03cSchristosL_check_mmx:
271*5ba6b03cSchristos	cmp  dword ptr [inflate_fast_use_mmx],2
272*5ba6b03cSchristos	je   L_init_mmx
273*5ba6b03cSchristos	ja   L_do_loop
274*5ba6b03cSchristos
275*5ba6b03cSchristos	push  eax
276*5ba6b03cSchristos	push  ebx
277*5ba6b03cSchristos	push  ecx
278*5ba6b03cSchristos	push  edx
279*5ba6b03cSchristos	pushfd
280*5ba6b03cSchristos	mov  eax, [esp]
281*5ba6b03cSchristos	xor  dword ptr [esp],0200000h
282*5ba6b03cSchristos
283*5ba6b03cSchristos
284*5ba6b03cSchristos
285*5ba6b03cSchristos
286*5ba6b03cSchristos	popfd
287*5ba6b03cSchristos	pushfd
288*5ba6b03cSchristos	pop  edx
289*5ba6b03cSchristos	xor  edx,eax
290*5ba6b03cSchristos	jz   L_dont_use_mmx
291*5ba6b03cSchristos	xor  eax,eax
292*5ba6b03cSchristos	cpuid
293*5ba6b03cSchristos	cmp  ebx,0756e6547h
294*5ba6b03cSchristos	jne  L_dont_use_mmx
295*5ba6b03cSchristos	cmp  ecx,06c65746eh
296*5ba6b03cSchristos	jne  L_dont_use_mmx
297*5ba6b03cSchristos	cmp  edx,049656e69h
298*5ba6b03cSchristos	jne  L_dont_use_mmx
299*5ba6b03cSchristos	mov  eax,1
300*5ba6b03cSchristos	cpuid
301*5ba6b03cSchristos	shr  eax,8
302*5ba6b03cSchristos	and  eax,15
303*5ba6b03cSchristos	cmp  eax,6
304*5ba6b03cSchristos	jne  L_dont_use_mmx
305*5ba6b03cSchristos	test  edx,0800000h
306*5ba6b03cSchristos	jnz  L_use_mmx
307*5ba6b03cSchristos	jmp  L_dont_use_mmx
308*5ba6b03cSchristosL_use_mmx:
309*5ba6b03cSchristos	mov  dword ptr [inflate_fast_use_mmx],2
310*5ba6b03cSchristos	jmp  L_check_mmx_pop
311*5ba6b03cSchristosL_dont_use_mmx:
312*5ba6b03cSchristos	mov  dword ptr [inflate_fast_use_mmx],3
313*5ba6b03cSchristosL_check_mmx_pop:
314*5ba6b03cSchristos	pop  edx
315*5ba6b03cSchristos	pop  ecx
316*5ba6b03cSchristos	pop  ebx
317*5ba6b03cSchristos	pop  eax
318*5ba6b03cSchristos	jmp  L_check_mmx
319*5ba6b03cSchristos; 426 "inffast.S"
320*5ba6b03cSchristosALIGN 4
321*5ba6b03cSchristosL_do_loop:
322*5ba6b03cSchristos; 437 "inffast.S"
323*5ba6b03cSchristos	cmp  bl,15
324*5ba6b03cSchristos	ja   L_get_length_code
325*5ba6b03cSchristos
326*5ba6b03cSchristos	xor  eax,eax
327*5ba6b03cSchristos	lodsw
328*5ba6b03cSchristos	mov  cl,bl
329*5ba6b03cSchristos	add  bl,16
330*5ba6b03cSchristos	shl  eax,cl
331*5ba6b03cSchristos	or  ebp,eax
332*5ba6b03cSchristos
333*5ba6b03cSchristosL_get_length_code:
334*5ba6b03cSchristos	mov  edx, [esp+0]
335*5ba6b03cSchristos	mov  ecx, [esp+8]
336*5ba6b03cSchristos	and  edx,ebp
337*5ba6b03cSchristos	mov  eax, [ecx+edx*4]
338*5ba6b03cSchristos
339*5ba6b03cSchristosL_dolen:
340*5ba6b03cSchristos
341*5ba6b03cSchristos
342*5ba6b03cSchristos
343*5ba6b03cSchristos
344*5ba6b03cSchristos
345*5ba6b03cSchristos
346*5ba6b03cSchristos	mov  cl,ah
347*5ba6b03cSchristos	sub  bl,ah
348*5ba6b03cSchristos	shr  ebp,cl
349*5ba6b03cSchristos
350*5ba6b03cSchristos
351*5ba6b03cSchristos
352*5ba6b03cSchristos
353*5ba6b03cSchristos
354*5ba6b03cSchristos
355*5ba6b03cSchristos	test  al,al
356*5ba6b03cSchristos	jnz   L_test_for_length_base
357*5ba6b03cSchristos
358*5ba6b03cSchristos	shr  eax,16
359*5ba6b03cSchristos	stosb
360*5ba6b03cSchristos
361*5ba6b03cSchristosL_while_test:
362*5ba6b03cSchristos
363*5ba6b03cSchristos
364*5ba6b03cSchristos	cmp  [esp+16],edi
365*5ba6b03cSchristos	jbe  L_break_loop
366*5ba6b03cSchristos
367*5ba6b03cSchristos	cmp  [esp+20],esi
368*5ba6b03cSchristos	ja   L_do_loop
369*5ba6b03cSchristos	jmp  L_break_loop
370*5ba6b03cSchristos
371*5ba6b03cSchristosL_test_for_length_base:
372*5ba6b03cSchristos; 502 "inffast.S"
373*5ba6b03cSchristos	mov  edx,eax
374*5ba6b03cSchristos	shr  edx,16
375*5ba6b03cSchristos	mov  cl,al
376*5ba6b03cSchristos
377*5ba6b03cSchristos	test  al,16
378*5ba6b03cSchristos	jz   L_test_for_second_level_length
379*5ba6b03cSchristos	and  cl,15
380*5ba6b03cSchristos	jz   L_save_len
381*5ba6b03cSchristos	cmp  bl,cl
382*5ba6b03cSchristos	jae  L_add_bits_to_len
383*5ba6b03cSchristos
384*5ba6b03cSchristos	mov  ch,cl
385*5ba6b03cSchristos	xor  eax,eax
386*5ba6b03cSchristos	lodsw
387*5ba6b03cSchristos	mov  cl,bl
388*5ba6b03cSchristos	add  bl,16
389*5ba6b03cSchristos	shl  eax,cl
390*5ba6b03cSchristos	or  ebp,eax
391*5ba6b03cSchristos	mov  cl,ch
392*5ba6b03cSchristos
393*5ba6b03cSchristosL_add_bits_to_len:
394*5ba6b03cSchristos	mov  eax,1
395*5ba6b03cSchristos	shl  eax,cl
396*5ba6b03cSchristos	dec  eax
397*5ba6b03cSchristos	sub  bl,cl
398*5ba6b03cSchristos	and  eax,ebp
399*5ba6b03cSchristos	shr  ebp,cl
400*5ba6b03cSchristos	add  edx,eax
401*5ba6b03cSchristos
402*5ba6b03cSchristosL_save_len:
403*5ba6b03cSchristos	mov  [esp+24],edx
404*5ba6b03cSchristos
405*5ba6b03cSchristos
406*5ba6b03cSchristosL_decode_distance:
407*5ba6b03cSchristos; 549 "inffast.S"
408*5ba6b03cSchristos	cmp  bl,15
409*5ba6b03cSchristos	ja   L_get_distance_code
410*5ba6b03cSchristos
411*5ba6b03cSchristos	xor  eax,eax
412*5ba6b03cSchristos	lodsw
413*5ba6b03cSchristos	mov  cl,bl
414*5ba6b03cSchristos	add  bl,16
415*5ba6b03cSchristos	shl  eax,cl
416*5ba6b03cSchristos	or  ebp,eax
417*5ba6b03cSchristos
418*5ba6b03cSchristosL_get_distance_code:
419*5ba6b03cSchristos	mov  edx, [esp+4]
420*5ba6b03cSchristos	mov  ecx, [esp+12]
421*5ba6b03cSchristos	and  edx,ebp
422*5ba6b03cSchristos	mov  eax, [ecx+edx*4]
423*5ba6b03cSchristos
424*5ba6b03cSchristos
425*5ba6b03cSchristosL_dodist:
426*5ba6b03cSchristos	mov  edx,eax
427*5ba6b03cSchristos	shr  edx,16
428*5ba6b03cSchristos	mov  cl,ah
429*5ba6b03cSchristos	sub  bl,ah
430*5ba6b03cSchristos	shr  ebp,cl
431*5ba6b03cSchristos; 584 "inffast.S"
432*5ba6b03cSchristos	mov  cl,al
433*5ba6b03cSchristos
434*5ba6b03cSchristos	test  al,16
435*5ba6b03cSchristos	jz  L_test_for_second_level_dist
436*5ba6b03cSchristos	and  cl,15
437*5ba6b03cSchristos	jz  L_check_dist_one
438*5ba6b03cSchristos	cmp  bl,cl
439*5ba6b03cSchristos	jae  L_add_bits_to_dist
440*5ba6b03cSchristos
441*5ba6b03cSchristos	mov  ch,cl
442*5ba6b03cSchristos	xor  eax,eax
443*5ba6b03cSchristos	lodsw
444*5ba6b03cSchristos	mov  cl,bl
445*5ba6b03cSchristos	add  bl,16
446*5ba6b03cSchristos	shl  eax,cl
447*5ba6b03cSchristos	or  ebp,eax
448*5ba6b03cSchristos	mov  cl,ch
449*5ba6b03cSchristos
450*5ba6b03cSchristosL_add_bits_to_dist:
451*5ba6b03cSchristos	mov  eax,1
452*5ba6b03cSchristos	shl  eax,cl
453*5ba6b03cSchristos	dec  eax
454*5ba6b03cSchristos	sub  bl,cl
455*5ba6b03cSchristos	and  eax,ebp
456*5ba6b03cSchristos	shr  ebp,cl
457*5ba6b03cSchristos	add  edx,eax
458*5ba6b03cSchristos	jmp  L_check_window
459*5ba6b03cSchristos
460*5ba6b03cSchristosL_check_window:
461*5ba6b03cSchristos; 625 "inffast.S"
462*5ba6b03cSchristos	mov  [esp+44],esi
463*5ba6b03cSchristos	mov  eax,edi
464*5ba6b03cSchristos	sub  eax, [esp+40]
465*5ba6b03cSchristos
466*5ba6b03cSchristos	cmp  eax,edx
467*5ba6b03cSchristos	jb   L_clip_window
468*5ba6b03cSchristos
469*5ba6b03cSchristos	mov  ecx, [esp+24]
470*5ba6b03cSchristos	mov  esi,edi
471*5ba6b03cSchristos	sub  esi,edx
472*5ba6b03cSchristos
473*5ba6b03cSchristos	sub  ecx,3
474*5ba6b03cSchristos	mov  al, [esi]
475*5ba6b03cSchristos	mov  [edi],al
476*5ba6b03cSchristos	mov  al, [esi+1]
477*5ba6b03cSchristos	mov  dl, [esi+2]
478*5ba6b03cSchristos	add  esi,3
479*5ba6b03cSchristos	mov  [edi+1],al
480*5ba6b03cSchristos	mov  [edi+2],dl
481*5ba6b03cSchristos	add  edi,3
482*5ba6b03cSchristos	rep movsb
483*5ba6b03cSchristos
484*5ba6b03cSchristos	mov  esi, [esp+44]
485*5ba6b03cSchristos	jmp  L_while_test
486*5ba6b03cSchristos
487*5ba6b03cSchristosALIGN 4
488*5ba6b03cSchristosL_check_dist_one:
489*5ba6b03cSchristos	cmp  edx,1
490*5ba6b03cSchristos	jne  L_check_window
491*5ba6b03cSchristos	cmp  [esp+40],edi
492*5ba6b03cSchristos	je  L_check_window
493*5ba6b03cSchristos
494*5ba6b03cSchristos	dec  edi
495*5ba6b03cSchristos	mov  ecx, [esp+24]
496*5ba6b03cSchristos	mov  al, [edi]
497*5ba6b03cSchristos	sub  ecx,3
498*5ba6b03cSchristos
499*5ba6b03cSchristos	mov  [edi+1],al
500*5ba6b03cSchristos	mov  [edi+2],al
501*5ba6b03cSchristos	mov  [edi+3],al
502*5ba6b03cSchristos	add  edi,4
503*5ba6b03cSchristos	rep stosb
504*5ba6b03cSchristos
505*5ba6b03cSchristos	jmp  L_while_test
506*5ba6b03cSchristos
507*5ba6b03cSchristosALIGN 4
508*5ba6b03cSchristosL_test_for_second_level_length:
509*5ba6b03cSchristos
510*5ba6b03cSchristos
511*5ba6b03cSchristos
512*5ba6b03cSchristos
513*5ba6b03cSchristos	test  al,64
514*5ba6b03cSchristos	jnz   L_test_for_end_of_block
515*5ba6b03cSchristos
516*5ba6b03cSchristos	mov  eax,1
517*5ba6b03cSchristos	shl  eax,cl
518*5ba6b03cSchristos	dec  eax
519*5ba6b03cSchristos	and  eax,ebp
520*5ba6b03cSchristos	add  eax,edx
521*5ba6b03cSchristos	mov  edx, [esp+8]
522*5ba6b03cSchristos	mov  eax, [edx+eax*4]
523*5ba6b03cSchristos	jmp  L_dolen
524*5ba6b03cSchristos
525*5ba6b03cSchristosALIGN 4
526*5ba6b03cSchristosL_test_for_second_level_dist:
527*5ba6b03cSchristos
528*5ba6b03cSchristos
529*5ba6b03cSchristos
530*5ba6b03cSchristos
531*5ba6b03cSchristos	test  al,64
532*5ba6b03cSchristos	jnz   L_invalid_distance_code
533*5ba6b03cSchristos
534*5ba6b03cSchristos	mov  eax,1
535*5ba6b03cSchristos	shl  eax,cl
536*5ba6b03cSchristos	dec  eax
537*5ba6b03cSchristos	and  eax,ebp
538*5ba6b03cSchristos	add  eax,edx
539*5ba6b03cSchristos	mov  edx, [esp+12]
540*5ba6b03cSchristos	mov  eax, [edx+eax*4]
541*5ba6b03cSchristos	jmp  L_dodist
542*5ba6b03cSchristos
543*5ba6b03cSchristosALIGN 4
544*5ba6b03cSchristosL_clip_window:
545*5ba6b03cSchristos; 721 "inffast.S"
546*5ba6b03cSchristos	mov  ecx,eax
547*5ba6b03cSchristos	mov  eax, [esp+52]
548*5ba6b03cSchristos	neg  ecx
549*5ba6b03cSchristos	mov  esi, [esp+56]
550*5ba6b03cSchristos
551*5ba6b03cSchristos	cmp  eax,edx
552*5ba6b03cSchristos	jb   L_invalid_distance_too_far
553*5ba6b03cSchristos
554*5ba6b03cSchristos	add  ecx,edx
555*5ba6b03cSchristos	cmp  dword ptr [esp+48],0
556*5ba6b03cSchristos	jne  L_wrap_around_window
557*5ba6b03cSchristos
558*5ba6b03cSchristos	sub  eax,ecx
559*5ba6b03cSchristos	add  esi,eax
560*5ba6b03cSchristos; 749 "inffast.S"
561*5ba6b03cSchristos	mov  eax, [esp+24]
562*5ba6b03cSchristos	cmp  eax,ecx
563*5ba6b03cSchristos	jbe  L_do_copy1
564*5ba6b03cSchristos
565*5ba6b03cSchristos	sub  eax,ecx
566*5ba6b03cSchristos	rep movsb
567*5ba6b03cSchristos	mov  esi,edi
568*5ba6b03cSchristos	sub  esi,edx
569*5ba6b03cSchristos	jmp  L_do_copy1
570*5ba6b03cSchristos
571*5ba6b03cSchristos	cmp  eax,ecx
572*5ba6b03cSchristos	jbe  L_do_copy1
573*5ba6b03cSchristos
574*5ba6b03cSchristos	sub  eax,ecx
575*5ba6b03cSchristos	rep movsb
576*5ba6b03cSchristos	mov  esi,edi
577*5ba6b03cSchristos	sub  esi,edx
578*5ba6b03cSchristos	jmp  L_do_copy1
579*5ba6b03cSchristos
580*5ba6b03cSchristosL_wrap_around_window:
581*5ba6b03cSchristos; 793 "inffast.S"
582*5ba6b03cSchristos	mov  eax, [esp+48]
583*5ba6b03cSchristos	cmp  ecx,eax
584*5ba6b03cSchristos	jbe  L_contiguous_in_window
585*5ba6b03cSchristos
586*5ba6b03cSchristos	add  esi, [esp+52]
587*5ba6b03cSchristos	add  esi,eax
588*5ba6b03cSchristos	sub  esi,ecx
589*5ba6b03cSchristos	sub  ecx,eax
590*5ba6b03cSchristos
591*5ba6b03cSchristos
592*5ba6b03cSchristos	mov  eax, [esp+24]
593*5ba6b03cSchristos	cmp  eax,ecx
594*5ba6b03cSchristos	jbe  L_do_copy1
595*5ba6b03cSchristos
596*5ba6b03cSchristos	sub  eax,ecx
597*5ba6b03cSchristos	rep movsb
598*5ba6b03cSchristos	mov  esi, [esp+56]
599*5ba6b03cSchristos	mov  ecx, [esp+48]
600*5ba6b03cSchristos	cmp  eax,ecx
601*5ba6b03cSchristos	jbe  L_do_copy1
602*5ba6b03cSchristos
603*5ba6b03cSchristos	sub  eax,ecx
604*5ba6b03cSchristos	rep movsb
605*5ba6b03cSchristos	mov  esi,edi
606*5ba6b03cSchristos	sub  esi,edx
607*5ba6b03cSchristos	jmp  L_do_copy1
608*5ba6b03cSchristos
609*5ba6b03cSchristosL_contiguous_in_window:
610*5ba6b03cSchristos; 836 "inffast.S"
611*5ba6b03cSchristos	add  esi,eax
612*5ba6b03cSchristos	sub  esi,ecx
613*5ba6b03cSchristos
614*5ba6b03cSchristos
615*5ba6b03cSchristos	mov  eax, [esp+24]
616*5ba6b03cSchristos	cmp  eax,ecx
617*5ba6b03cSchristos	jbe  L_do_copy1
618*5ba6b03cSchristos
619*5ba6b03cSchristos	sub  eax,ecx
620*5ba6b03cSchristos	rep movsb
621*5ba6b03cSchristos	mov  esi,edi
622*5ba6b03cSchristos	sub  esi,edx
623*5ba6b03cSchristos
624*5ba6b03cSchristosL_do_copy1:
625*5ba6b03cSchristos; 862 "inffast.S"
626*5ba6b03cSchristos	mov  ecx,eax
627*5ba6b03cSchristos	rep movsb
628*5ba6b03cSchristos
629*5ba6b03cSchristos	mov  esi, [esp+44]
630*5ba6b03cSchristos	jmp  L_while_test
631*5ba6b03cSchristos; 878 "inffast.S"
632*5ba6b03cSchristosALIGN 4
633*5ba6b03cSchristosL_init_mmx:
634*5ba6b03cSchristos	emms
635*5ba6b03cSchristos
636*5ba6b03cSchristos
637*5ba6b03cSchristos
638*5ba6b03cSchristos
639*5ba6b03cSchristos
640*5ba6b03cSchristos	movd mm0,ebp
641*5ba6b03cSchristos	mov  ebp,ebx
642*5ba6b03cSchristos; 896 "inffast.S"
643*5ba6b03cSchristos	movd mm4,dword ptr [esp+0]
644*5ba6b03cSchristos	movq mm3,mm4
645*5ba6b03cSchristos	movd mm5,dword ptr [esp+4]
646*5ba6b03cSchristos	movq mm2,mm5
647*5ba6b03cSchristos	pxor mm1,mm1
648*5ba6b03cSchristos	mov  ebx, [esp+8]
649*5ba6b03cSchristos	jmp  L_do_loop_mmx
650*5ba6b03cSchristos
651*5ba6b03cSchristosALIGN 4
652*5ba6b03cSchristosL_do_loop_mmx:
653*5ba6b03cSchristos	psrlq mm0,mm1
654*5ba6b03cSchristos
655*5ba6b03cSchristos	cmp  ebp,32
656*5ba6b03cSchristos	ja  L_get_length_code_mmx
657*5ba6b03cSchristos
658*5ba6b03cSchristos	movd mm6,ebp
659*5ba6b03cSchristos	movd mm7,dword ptr [esi]
660*5ba6b03cSchristos	add  esi,4
661*5ba6b03cSchristos	psllq mm7,mm6
662*5ba6b03cSchristos	add  ebp,32
663*5ba6b03cSchristos	por mm0,mm7
664*5ba6b03cSchristos
665*5ba6b03cSchristosL_get_length_code_mmx:
666*5ba6b03cSchristos	pand mm4,mm0
667*5ba6b03cSchristos	movd eax,mm4
668*5ba6b03cSchristos	movq mm4,mm3
669*5ba6b03cSchristos	mov  eax, [ebx+eax*4]
670*5ba6b03cSchristos
671*5ba6b03cSchristosL_dolen_mmx:
672*5ba6b03cSchristos	movzx  ecx,ah
673*5ba6b03cSchristos	movd mm1,ecx
674*5ba6b03cSchristos	sub  ebp,ecx
675*5ba6b03cSchristos
676*5ba6b03cSchristos	test  al,al
677*5ba6b03cSchristos	jnz L_test_for_length_base_mmx
678*5ba6b03cSchristos
679*5ba6b03cSchristos	shr  eax,16
680*5ba6b03cSchristos	stosb
681*5ba6b03cSchristos
682*5ba6b03cSchristosL_while_test_mmx:
683*5ba6b03cSchristos
684*5ba6b03cSchristos
685*5ba6b03cSchristos	cmp  [esp+16],edi
686*5ba6b03cSchristos	jbe L_break_loop
687*5ba6b03cSchristos
688*5ba6b03cSchristos	cmp  [esp+20],esi
689*5ba6b03cSchristos	ja L_do_loop_mmx
690*5ba6b03cSchristos	jmp L_break_loop
691*5ba6b03cSchristos
692*5ba6b03cSchristosL_test_for_length_base_mmx:
693*5ba6b03cSchristos
694*5ba6b03cSchristos	mov  edx,eax
695*5ba6b03cSchristos	shr  edx,16
696*5ba6b03cSchristos
697*5ba6b03cSchristos	test  al,16
698*5ba6b03cSchristos	jz  L_test_for_second_level_length_mmx
699*5ba6b03cSchristos	and  eax,15
700*5ba6b03cSchristos	jz L_decode_distance_mmx
701*5ba6b03cSchristos
702*5ba6b03cSchristos	psrlq mm0,mm1
703*5ba6b03cSchristos	movd mm1,eax
704*5ba6b03cSchristos	movd ecx,mm0
705*5ba6b03cSchristos	sub  ebp,eax
706*5ba6b03cSchristos	and  ecx, [inflate_fast_mask+eax*4]
707*5ba6b03cSchristos	add  edx,ecx
708*5ba6b03cSchristos
709*5ba6b03cSchristosL_decode_distance_mmx:
710*5ba6b03cSchristos	psrlq mm0,mm1
711*5ba6b03cSchristos
712*5ba6b03cSchristos	cmp  ebp,32
713*5ba6b03cSchristos	ja L_get_dist_code_mmx
714*5ba6b03cSchristos
715*5ba6b03cSchristos	movd mm6,ebp
716*5ba6b03cSchristos	movd mm7,dword ptr [esi]
717*5ba6b03cSchristos	add  esi,4
718*5ba6b03cSchristos	psllq mm7,mm6
719*5ba6b03cSchristos	add  ebp,32
720*5ba6b03cSchristos	por mm0,mm7
721*5ba6b03cSchristos
722*5ba6b03cSchristosL_get_dist_code_mmx:
723*5ba6b03cSchristos	mov  ebx, [esp+12]
724*5ba6b03cSchristos	pand mm5,mm0
725*5ba6b03cSchristos	movd eax,mm5
726*5ba6b03cSchristos	movq mm5,mm2
727*5ba6b03cSchristos	mov  eax, [ebx+eax*4]
728*5ba6b03cSchristos
729*5ba6b03cSchristosL_dodist_mmx:
730*5ba6b03cSchristos
731*5ba6b03cSchristos	movzx  ecx,ah
732*5ba6b03cSchristos	mov  ebx,eax
733*5ba6b03cSchristos	shr  ebx,16
734*5ba6b03cSchristos	sub  ebp,ecx
735*5ba6b03cSchristos	movd mm1,ecx
736*5ba6b03cSchristos
737*5ba6b03cSchristos	test  al,16
738*5ba6b03cSchristos	jz L_test_for_second_level_dist_mmx
739*5ba6b03cSchristos	and  eax,15
740*5ba6b03cSchristos	jz L_check_dist_one_mmx
741*5ba6b03cSchristos
742*5ba6b03cSchristosL_add_bits_to_dist_mmx:
743*5ba6b03cSchristos	psrlq mm0,mm1
744*5ba6b03cSchristos	movd mm1,eax
745*5ba6b03cSchristos	movd ecx,mm0
746*5ba6b03cSchristos	sub  ebp,eax
747*5ba6b03cSchristos	and  ecx, [inflate_fast_mask+eax*4]
748*5ba6b03cSchristos	add  ebx,ecx
749*5ba6b03cSchristos
750*5ba6b03cSchristosL_check_window_mmx:
751*5ba6b03cSchristos	mov  [esp+44],esi
752*5ba6b03cSchristos	mov  eax,edi
753*5ba6b03cSchristos	sub  eax, [esp+40]
754*5ba6b03cSchristos
755*5ba6b03cSchristos	cmp  eax,ebx
756*5ba6b03cSchristos	jb L_clip_window_mmx
757*5ba6b03cSchristos
758*5ba6b03cSchristos	mov  ecx,edx
759*5ba6b03cSchristos	mov  esi,edi
760*5ba6b03cSchristos	sub  esi,ebx
761*5ba6b03cSchristos
762*5ba6b03cSchristos	sub  ecx,3
763*5ba6b03cSchristos	mov  al, [esi]
764*5ba6b03cSchristos	mov  [edi],al
765*5ba6b03cSchristos	mov  al, [esi+1]
766*5ba6b03cSchristos	mov  dl, [esi+2]
767*5ba6b03cSchristos	add  esi,3
768*5ba6b03cSchristos	mov  [edi+1],al
769*5ba6b03cSchristos	mov  [edi+2],dl
770*5ba6b03cSchristos	add  edi,3
771*5ba6b03cSchristos	rep movsb
772*5ba6b03cSchristos
773*5ba6b03cSchristos	mov  esi, [esp+44]
774*5ba6b03cSchristos	mov  ebx, [esp+8]
775*5ba6b03cSchristos	jmp  L_while_test_mmx
776*5ba6b03cSchristos
777*5ba6b03cSchristosALIGN 4
778*5ba6b03cSchristosL_check_dist_one_mmx:
779*5ba6b03cSchristos	cmp  ebx,1
780*5ba6b03cSchristos	jne  L_check_window_mmx
781*5ba6b03cSchristos	cmp  [esp+40],edi
782*5ba6b03cSchristos	je   L_check_window_mmx
783*5ba6b03cSchristos
784*5ba6b03cSchristos	dec  edi
785*5ba6b03cSchristos	mov  ecx,edx
786*5ba6b03cSchristos	mov  al, [edi]
787*5ba6b03cSchristos	sub  ecx,3
788*5ba6b03cSchristos
789*5ba6b03cSchristos	mov  [edi+1],al
790*5ba6b03cSchristos	mov  [edi+2],al
791*5ba6b03cSchristos	mov  [edi+3],al
792*5ba6b03cSchristos	add  edi,4
793*5ba6b03cSchristos	rep stosb
794*5ba6b03cSchristos
795*5ba6b03cSchristos	mov  ebx, [esp+8]
796*5ba6b03cSchristos	jmp  L_while_test_mmx
797*5ba6b03cSchristos
798*5ba6b03cSchristosALIGN 4
799*5ba6b03cSchristosL_test_for_second_level_length_mmx:
800*5ba6b03cSchristos	test  al,64
801*5ba6b03cSchristos	jnz L_test_for_end_of_block
802*5ba6b03cSchristos
803*5ba6b03cSchristos	and  eax,15
804*5ba6b03cSchristos	psrlq mm0,mm1
805*5ba6b03cSchristos	movd ecx,mm0
806*5ba6b03cSchristos	and  ecx, [inflate_fast_mask+eax*4]
807*5ba6b03cSchristos	add  ecx,edx
808*5ba6b03cSchristos	mov  eax, [ebx+ecx*4]
809*5ba6b03cSchristos	jmp L_dolen_mmx
810*5ba6b03cSchristos
811*5ba6b03cSchristosALIGN 4
812*5ba6b03cSchristosL_test_for_second_level_dist_mmx:
813*5ba6b03cSchristos	test  al,64
814*5ba6b03cSchristos	jnz L_invalid_distance_code
815*5ba6b03cSchristos
816*5ba6b03cSchristos	and  eax,15
817*5ba6b03cSchristos	psrlq mm0,mm1
818*5ba6b03cSchristos	movd ecx,mm0
819*5ba6b03cSchristos	and  ecx, [inflate_fast_mask+eax*4]
820*5ba6b03cSchristos	mov  eax, [esp+12]
821*5ba6b03cSchristos	add  ecx,ebx
822*5ba6b03cSchristos	mov  eax, [eax+ecx*4]
823*5ba6b03cSchristos	jmp  L_dodist_mmx
824*5ba6b03cSchristos
825*5ba6b03cSchristosALIGN 4
826*5ba6b03cSchristosL_clip_window_mmx:
827*5ba6b03cSchristos
828*5ba6b03cSchristos	mov  ecx,eax
829*5ba6b03cSchristos	mov  eax, [esp+52]
830*5ba6b03cSchristos	neg  ecx
831*5ba6b03cSchristos	mov  esi, [esp+56]
832*5ba6b03cSchristos
833*5ba6b03cSchristos	cmp  eax,ebx
834*5ba6b03cSchristos	jb  L_invalid_distance_too_far
835*5ba6b03cSchristos
836*5ba6b03cSchristos	add  ecx,ebx
837*5ba6b03cSchristos	cmp  dword ptr [esp+48],0
838*5ba6b03cSchristos	jne  L_wrap_around_window_mmx
839*5ba6b03cSchristos
840*5ba6b03cSchristos	sub  eax,ecx
841*5ba6b03cSchristos	add  esi,eax
842*5ba6b03cSchristos
843*5ba6b03cSchristos	cmp  edx,ecx
844*5ba6b03cSchristos	jbe  L_do_copy1_mmx
845*5ba6b03cSchristos
846*5ba6b03cSchristos	sub  edx,ecx
847*5ba6b03cSchristos	rep movsb
848*5ba6b03cSchristos	mov  esi,edi
849*5ba6b03cSchristos	sub  esi,ebx
850*5ba6b03cSchristos	jmp  L_do_copy1_mmx
851*5ba6b03cSchristos
852*5ba6b03cSchristos	cmp  edx,ecx
853*5ba6b03cSchristos	jbe  L_do_copy1_mmx
854*5ba6b03cSchristos
855*5ba6b03cSchristos	sub  edx,ecx
856*5ba6b03cSchristos	rep movsb
857*5ba6b03cSchristos	mov  esi,edi
858*5ba6b03cSchristos	sub  esi,ebx
859*5ba6b03cSchristos	jmp  L_do_copy1_mmx
860*5ba6b03cSchristos
861*5ba6b03cSchristosL_wrap_around_window_mmx:
862*5ba6b03cSchristos
863*5ba6b03cSchristos	mov  eax, [esp+48]
864*5ba6b03cSchristos	cmp  ecx,eax
865*5ba6b03cSchristos	jbe  L_contiguous_in_window_mmx
866*5ba6b03cSchristos
867*5ba6b03cSchristos	add  esi, [esp+52]
868*5ba6b03cSchristos	add  esi,eax
869*5ba6b03cSchristos	sub  esi,ecx
870*5ba6b03cSchristos	sub  ecx,eax
871*5ba6b03cSchristos
872*5ba6b03cSchristos
873*5ba6b03cSchristos	cmp  edx,ecx
874*5ba6b03cSchristos	jbe  L_do_copy1_mmx
875*5ba6b03cSchristos
876*5ba6b03cSchristos	sub  edx,ecx
877*5ba6b03cSchristos	rep movsb
878*5ba6b03cSchristos	mov  esi, [esp+56]
879*5ba6b03cSchristos	mov  ecx, [esp+48]
880*5ba6b03cSchristos	cmp  edx,ecx
881*5ba6b03cSchristos	jbe  L_do_copy1_mmx
882*5ba6b03cSchristos
883*5ba6b03cSchristos	sub  edx,ecx
884*5ba6b03cSchristos	rep movsb
885*5ba6b03cSchristos	mov  esi,edi
886*5ba6b03cSchristos	sub  esi,ebx
887*5ba6b03cSchristos	jmp  L_do_copy1_mmx
888*5ba6b03cSchristos
889*5ba6b03cSchristosL_contiguous_in_window_mmx:
890*5ba6b03cSchristos
891*5ba6b03cSchristos	add  esi,eax
892*5ba6b03cSchristos	sub  esi,ecx
893*5ba6b03cSchristos
894*5ba6b03cSchristos
895*5ba6b03cSchristos	cmp  edx,ecx
896*5ba6b03cSchristos	jbe  L_do_copy1_mmx
897*5ba6b03cSchristos
898*5ba6b03cSchristos	sub  edx,ecx
899*5ba6b03cSchristos	rep movsb
900*5ba6b03cSchristos	mov  esi,edi
901*5ba6b03cSchristos	sub  esi,ebx
902*5ba6b03cSchristos
903*5ba6b03cSchristosL_do_copy1_mmx:
904*5ba6b03cSchristos
905*5ba6b03cSchristos
906*5ba6b03cSchristos	mov  ecx,edx
907*5ba6b03cSchristos	rep movsb
908*5ba6b03cSchristos
909*5ba6b03cSchristos	mov  esi, [esp+44]
910*5ba6b03cSchristos	mov  ebx, [esp+8]
911*5ba6b03cSchristos	jmp  L_while_test_mmx
912*5ba6b03cSchristos; 1174 "inffast.S"
913*5ba6b03cSchristosL_invalid_distance_code:
914*5ba6b03cSchristos
915*5ba6b03cSchristos
916*5ba6b03cSchristos
917*5ba6b03cSchristos
918*5ba6b03cSchristos
919*5ba6b03cSchristos	mov  ecx, invalid_distance_code_msg
920*5ba6b03cSchristos	mov  edx,INFLATE_MODE_BAD
921*5ba6b03cSchristos	jmp  L_update_stream_state
922*5ba6b03cSchristos
923*5ba6b03cSchristosL_test_for_end_of_block:
924*5ba6b03cSchristos
925*5ba6b03cSchristos
926*5ba6b03cSchristos
927*5ba6b03cSchristos
928*5ba6b03cSchristos
929*5ba6b03cSchristos	test  al,32
930*5ba6b03cSchristos	jz  L_invalid_literal_length_code
931*5ba6b03cSchristos
932*5ba6b03cSchristos	mov  ecx,0
933*5ba6b03cSchristos	mov  edx,INFLATE_MODE_TYPE
934*5ba6b03cSchristos	jmp  L_update_stream_state
935*5ba6b03cSchristos
936*5ba6b03cSchristosL_invalid_literal_length_code:
937*5ba6b03cSchristos
938*5ba6b03cSchristos
939*5ba6b03cSchristos
940*5ba6b03cSchristos
941*5ba6b03cSchristos
942*5ba6b03cSchristos	mov  ecx, invalid_literal_length_code_msg
943*5ba6b03cSchristos	mov  edx,INFLATE_MODE_BAD
944*5ba6b03cSchristos	jmp  L_update_stream_state
945*5ba6b03cSchristos
946*5ba6b03cSchristosL_invalid_distance_too_far:
947*5ba6b03cSchristos
948*5ba6b03cSchristos
949*5ba6b03cSchristos
950*5ba6b03cSchristos	mov  esi, [esp+44]
951*5ba6b03cSchristos	mov  ecx, invalid_distance_too_far_msg
952*5ba6b03cSchristos	mov  edx,INFLATE_MODE_BAD
953*5ba6b03cSchristos	jmp  L_update_stream_state
954*5ba6b03cSchristos
955*5ba6b03cSchristosL_update_stream_state:
956*5ba6b03cSchristos
957*5ba6b03cSchristos	mov  eax, [esp+88]
958*5ba6b03cSchristos	test  ecx,ecx
959*5ba6b03cSchristos	jz  L_skip_msg
960*5ba6b03cSchristos	mov  [eax+24],ecx
961*5ba6b03cSchristosL_skip_msg:
962*5ba6b03cSchristos	mov  eax, [eax+28]
963*5ba6b03cSchristos	mov  [eax+mode_state],edx
964*5ba6b03cSchristos	jmp  L_break_loop
965*5ba6b03cSchristos
966*5ba6b03cSchristosALIGN 4
967*5ba6b03cSchristosL_break_loop:
968*5ba6b03cSchristos; 1243 "inffast.S"
969*5ba6b03cSchristos	cmp  dword ptr [inflate_fast_use_mmx],2
970*5ba6b03cSchristos	jne  L_update_next_in
971*5ba6b03cSchristos
972*5ba6b03cSchristos
973*5ba6b03cSchristos
974*5ba6b03cSchristos	mov  ebx,ebp
975*5ba6b03cSchristos
976*5ba6b03cSchristosL_update_next_in:
977*5ba6b03cSchristos; 1266 "inffast.S"
978*5ba6b03cSchristos	mov  eax, [esp+88]
979*5ba6b03cSchristos	mov  ecx,ebx
980*5ba6b03cSchristos	mov  edx, [eax+28]
981*5ba6b03cSchristos	shr  ecx,3
982*5ba6b03cSchristos	sub  esi,ecx
983*5ba6b03cSchristos	shl  ecx,3
984*5ba6b03cSchristos	sub  ebx,ecx
985*5ba6b03cSchristos	mov  [eax+12],edi
986*5ba6b03cSchristos	mov  [edx+bits_state],ebx
987*5ba6b03cSchristos	mov  ecx,ebx
988*5ba6b03cSchristos
989*5ba6b03cSchristos	lea  ebx, [esp+28]
990*5ba6b03cSchristos	cmp  [esp+20],ebx
991*5ba6b03cSchristos	jne  L_buf_not_used
992*5ba6b03cSchristos
993*5ba6b03cSchristos	sub  esi,ebx
994*5ba6b03cSchristos	mov  ebx, [eax+0]
995*5ba6b03cSchristos	mov  [esp+20],ebx
996*5ba6b03cSchristos	add  esi,ebx
997*5ba6b03cSchristos	mov  ebx, [eax+4]
998*5ba6b03cSchristos	sub  ebx,11
999*5ba6b03cSchristos	add  [esp+20],ebx
1000*5ba6b03cSchristos
1001*5ba6b03cSchristosL_buf_not_used:
1002*5ba6b03cSchristos	mov  [eax+0],esi
1003*5ba6b03cSchristos
1004*5ba6b03cSchristos	mov  ebx,1
1005*5ba6b03cSchristos	shl  ebx,cl
1006*5ba6b03cSchristos	dec  ebx
1007*5ba6b03cSchristos
1008*5ba6b03cSchristos
1009*5ba6b03cSchristos
1010*5ba6b03cSchristos
1011*5ba6b03cSchristos
1012*5ba6b03cSchristos	cmp  dword ptr [inflate_fast_use_mmx],2
1013*5ba6b03cSchristos	jne  L_update_hold
1014*5ba6b03cSchristos
1015*5ba6b03cSchristos
1016*5ba6b03cSchristos
1017*5ba6b03cSchristos	psrlq mm0,mm1
1018*5ba6b03cSchristos	movd ebp,mm0
1019*5ba6b03cSchristos
1020*5ba6b03cSchristos	emms
1021*5ba6b03cSchristos
1022*5ba6b03cSchristosL_update_hold:
1023*5ba6b03cSchristos
1024*5ba6b03cSchristos
1025*5ba6b03cSchristos
1026*5ba6b03cSchristos	and  ebp,ebx
1027*5ba6b03cSchristos	mov  [edx+hold_state],ebp
1028*5ba6b03cSchristos
1029*5ba6b03cSchristos
1030*5ba6b03cSchristos
1031*5ba6b03cSchristos
1032*5ba6b03cSchristos	mov  ebx, [esp+20]
1033*5ba6b03cSchristos	cmp  ebx,esi
1034*5ba6b03cSchristos	jbe  L_last_is_smaller
1035*5ba6b03cSchristos
1036*5ba6b03cSchristos	sub  ebx,esi
1037*5ba6b03cSchristos	add  ebx,11
1038*5ba6b03cSchristos	mov  [eax+4],ebx
1039*5ba6b03cSchristos	jmp  L_fixup_out
1040*5ba6b03cSchristosL_last_is_smaller:
1041*5ba6b03cSchristos	sub  esi,ebx
1042*5ba6b03cSchristos	neg  esi
1043*5ba6b03cSchristos	add  esi,11
1044*5ba6b03cSchristos	mov  [eax+4],esi
1045*5ba6b03cSchristos
1046*5ba6b03cSchristos
1047*5ba6b03cSchristos
1048*5ba6b03cSchristos
1049*5ba6b03cSchristosL_fixup_out:
1050*5ba6b03cSchristos
1051*5ba6b03cSchristos	mov  ebx, [esp+16]
1052*5ba6b03cSchristos	cmp  ebx,edi
1053*5ba6b03cSchristos	jbe  L_end_is_smaller
1054*5ba6b03cSchristos
1055*5ba6b03cSchristos	sub  ebx,edi
1056*5ba6b03cSchristos	add  ebx,257
1057*5ba6b03cSchristos	mov  [eax+16],ebx
1058*5ba6b03cSchristos	jmp  L_done
1059*5ba6b03cSchristosL_end_is_smaller:
1060*5ba6b03cSchristos	sub  edi,ebx
1061*5ba6b03cSchristos	neg  edi
1062*5ba6b03cSchristos	add  edi,257
1063*5ba6b03cSchristos	mov  [eax+16],edi
1064*5ba6b03cSchristos
1065*5ba6b03cSchristos
1066*5ba6b03cSchristos
1067*5ba6b03cSchristos
1068*5ba6b03cSchristos
1069*5ba6b03cSchristosL_done:
1070*5ba6b03cSchristos	add  esp,64
1071*5ba6b03cSchristos	popfd
1072*5ba6b03cSchristos	pop  ebx
1073*5ba6b03cSchristos	pop  ebp
1074*5ba6b03cSchristos	pop  esi
1075*5ba6b03cSchristos	pop  edi
1076*5ba6b03cSchristos	ret
1077*5ba6b03cSchristos_inflate_fast endp
1078*5ba6b03cSchristos
1079*5ba6b03cSchristos_TEXT	ends
1080*5ba6b03cSchristosend
1081