1*a1d25298Schristos;/* inffas32.asm is a hand tuned assembler version of inffast.c -- fast decoding
2*a1d25298Schristos; *
3*a1d25298Schristos; * inffas32.asm is derivated from inffas86.c, with translation of assembly code
4*a1d25298Schristos; *
5*a1d25298Schristos; * Copyright (C) 1995-2003 Mark Adler
6*a1d25298Schristos; * For conditions of distribution and use, see copyright notice in zlib.h
7*a1d25298Schristos; *
8*a1d25298Schristos; * Copyright (C) 2003 Chris Anderson <christop@charm.net>
9*a1d25298Schristos; * Please use the copyright conditions above.
10*a1d25298Schristos; *
11*a1d25298Schristos; * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from
12*a1d25298Schristos; * the gcc -S output of zlib-1.2.0/inffast.c.  Zlib-1.2.0 is in beta release at
13*a1d25298Schristos; * the moment.  I have successfully compiled and tested this code with gcc2.96,
14*a1d25298Schristos; * gcc3.2, icc5.0, msvc6.0.  It is very close to the speed of inffast.S
15*a1d25298Schristos; * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX
16*a1d25298Schristos; * enabled.  I will attempt to merge the MMX code into this version.  Newer
17*a1d25298Schristos; * versions of this and inffast.S can be found at
18*a1d25298Schristos; * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/
19*a1d25298Schristos; *
20*a1d25298Schristos; * 2005 : modification by Gilles Vollant
21*a1d25298Schristos; */
22*a1d25298Schristos; For Visual C++ 4.x and higher and ML 6.x and higher
23*a1d25298Schristos;   ml.exe is in directory \MASM611C of Win95 DDK
24*a1d25298Schristos;   ml.exe is also distributed in http://www.masm32.com/masmdl.htm
25*a1d25298Schristos;    and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/
26*a1d25298Schristos;
27*a1d25298Schristos;
28*a1d25298Schristos;   compile with command line option
29*a1d25298Schristos;   ml  /coff /Zi /c /Flinffas32.lst inffas32.asm
30*a1d25298Schristos
31*a1d25298Schristos;   if you define NO_GZIP (see inflate.h), compile with
32*a1d25298Schristos;   ml  /coff /Zi /c /Flinffas32.lst /DNO_GUNZIP inffas32.asm
33*a1d25298Schristos
34*a1d25298Schristos
35*a1d25298Schristos; zlib122sup is 0 fort zlib 1.2.2.1 and lower
36*a1d25298Schristos; zlib122sup is 8 fort zlib 1.2.2.2 and more (with addition of dmax and head
37*a1d25298Schristos;        in inflate_state in inflate.h)
38*a1d25298Schristoszlib1222sup      equ    8
39*a1d25298Schristos
40*a1d25298Schristos
41*a1d25298SchristosIFDEF GUNZIP
42*a1d25298Schristos  INFLATE_MODE_TYPE    equ 11
43*a1d25298Schristos  INFLATE_MODE_BAD     equ 26
44*a1d25298SchristosELSE
45*a1d25298Schristos  IFNDEF NO_GUNZIP
46*a1d25298Schristos    INFLATE_MODE_TYPE    equ 11
47*a1d25298Schristos    INFLATE_MODE_BAD     equ 26
48*a1d25298Schristos  ELSE
49*a1d25298Schristos    INFLATE_MODE_TYPE    equ 3
50*a1d25298Schristos    INFLATE_MODE_BAD     equ 17
51*a1d25298Schristos  ENDIF
52*a1d25298SchristosENDIF
53*a1d25298Schristos
54*a1d25298Schristos
55*a1d25298Schristos; 75 "inffast.S"
56*a1d25298Schristos;FILE "inffast.S"
57*a1d25298Schristos
58*a1d25298Schristos;;;GLOBAL _inflate_fast
59*a1d25298Schristos
60*a1d25298Schristos;;;SECTION .text
61*a1d25298Schristos
62*a1d25298Schristos
63*a1d25298Schristos
64*a1d25298Schristos	.586p
65*a1d25298Schristos	.mmx
66*a1d25298Schristos
67*a1d25298Schristos	name	inflate_fast_x86
68*a1d25298Schristos	.MODEL	FLAT
69*a1d25298Schristos
70*a1d25298Schristos_DATA			segment
71*a1d25298Schristosinflate_fast_use_mmx:
72*a1d25298Schristos	dd	1
73*a1d25298Schristos
74*a1d25298Schristos
75*a1d25298Schristos_TEXT			segment
76*a1d25298Schristos
77*a1d25298Schristos
78*a1d25298Schristos
79*a1d25298SchristosALIGN 4
80*a1d25298Schristos	db	'Fast decoding Code from Chris Anderson'
81*a1d25298Schristos	db	0
82*a1d25298Schristos
83*a1d25298SchristosALIGN 4
84*a1d25298Schristosinvalid_literal_length_code_msg:
85*a1d25298Schristos	db	'invalid literal/length code'
86*a1d25298Schristos	db	0
87*a1d25298Schristos
88*a1d25298SchristosALIGN 4
89*a1d25298Schristosinvalid_distance_code_msg:
90*a1d25298Schristos	db	'invalid distance code'
91*a1d25298Schristos	db	0
92*a1d25298Schristos
93*a1d25298SchristosALIGN 4
94*a1d25298Schristosinvalid_distance_too_far_msg:
95*a1d25298Schristos	db	'invalid distance too far back'
96*a1d25298Schristos	db	0
97*a1d25298Schristos
98*a1d25298Schristos
99*a1d25298SchristosALIGN 4
100*a1d25298Schristosinflate_fast_mask:
101*a1d25298Schristosdd	0
102*a1d25298Schristosdd	1
103*a1d25298Schristosdd	3
104*a1d25298Schristosdd	7
105*a1d25298Schristosdd	15
106*a1d25298Schristosdd	31
107*a1d25298Schristosdd	63
108*a1d25298Schristosdd	127
109*a1d25298Schristosdd	255
110*a1d25298Schristosdd	511
111*a1d25298Schristosdd	1023
112*a1d25298Schristosdd	2047
113*a1d25298Schristosdd	4095
114*a1d25298Schristosdd	8191
115*a1d25298Schristosdd	16383
116*a1d25298Schristosdd	32767
117*a1d25298Schristosdd	65535
118*a1d25298Schristosdd	131071
119*a1d25298Schristosdd	262143
120*a1d25298Schristosdd	524287
121*a1d25298Schristosdd	1048575
122*a1d25298Schristosdd	2097151
123*a1d25298Schristosdd	4194303
124*a1d25298Schristosdd	8388607
125*a1d25298Schristosdd	16777215
126*a1d25298Schristosdd	33554431
127*a1d25298Schristosdd	67108863
128*a1d25298Schristosdd	134217727
129*a1d25298Schristosdd	268435455
130*a1d25298Schristosdd	536870911
131*a1d25298Schristosdd	1073741823
132*a1d25298Schristosdd	2147483647
133*a1d25298Schristosdd	4294967295
134*a1d25298Schristos
135*a1d25298Schristos
136*a1d25298Schristosmode_state	 equ	0	;/* state->mode	*/
137*a1d25298Schristoswsize_state	 equ	(32+zlib1222sup)	;/* state->wsize */
138*a1d25298Schristoswrite_state	 equ	(36+4+zlib1222sup)	;/* state->write */
139*a1d25298Schristoswindow_state	 equ	(40+4+zlib1222sup)	;/* state->window */
140*a1d25298Schristoshold_state	 equ	(44+4+zlib1222sup)	;/* state->hold	*/
141*a1d25298Schristosbits_state	 equ	(48+4+zlib1222sup)	;/* state->bits	*/
142*a1d25298Schristoslencode_state	 equ	(64+4+zlib1222sup)	;/* state->lencode */
143*a1d25298Schristosdistcode_state	 equ	(68+4+zlib1222sup)	;/* state->distcode */
144*a1d25298Schristoslenbits_state	 equ	(72+4+zlib1222sup)	;/* state->lenbits */
145*a1d25298Schristosdistbits_state	 equ	(76+4+zlib1222sup)	;/* state->distbits */
146*a1d25298Schristos
147*a1d25298Schristos
148*a1d25298Schristos;;SECTION .text
149*a1d25298Schristos; 205 "inffast.S"
150*a1d25298Schristos;GLOBAL	inflate_fast_use_mmx
151*a1d25298Schristos
152*a1d25298Schristos;SECTION .data
153*a1d25298Schristos
154*a1d25298Schristos
155*a1d25298Schristos; GLOBAL inflate_fast_use_mmx:object
156*a1d25298Schristos;.size inflate_fast_use_mmx, 4
157*a1d25298Schristos; 226 "inffast.S"
158*a1d25298Schristos;SECTION .text
159*a1d25298Schristos
160*a1d25298SchristosALIGN 4
161*a1d25298Schristos_inflate_fast proc near
162*a1d25298Schristos.FPO (16, 4, 0, 0, 1, 0)
163*a1d25298Schristos	push  edi
164*a1d25298Schristos	push  esi
165*a1d25298Schristos	push  ebp
166*a1d25298Schristos	push  ebx
167*a1d25298Schristos	pushfd
168*a1d25298Schristos	sub  esp,64
169*a1d25298Schristos	cld
170*a1d25298Schristos
171*a1d25298Schristos
172*a1d25298Schristos
173*a1d25298Schristos
174*a1d25298Schristos	mov  esi, [esp+88]
175*a1d25298Schristos	mov  edi, [esi+28]
176*a1d25298Schristos
177*a1d25298Schristos
178*a1d25298Schristos
179*a1d25298Schristos
180*a1d25298Schristos
181*a1d25298Schristos
182*a1d25298Schristos
183*a1d25298Schristos	mov  edx, [esi+4]
184*a1d25298Schristos	mov  eax, [esi+0]
185*a1d25298Schristos
186*a1d25298Schristos	add  edx,eax
187*a1d25298Schristos	sub  edx,11
188*a1d25298Schristos
189*a1d25298Schristos	mov  [esp+44],eax
190*a1d25298Schristos	mov  [esp+20],edx
191*a1d25298Schristos
192*a1d25298Schristos	mov  ebp, [esp+92]
193*a1d25298Schristos	mov  ecx, [esi+16]
194*a1d25298Schristos	mov  ebx, [esi+12]
195*a1d25298Schristos
196*a1d25298Schristos	sub  ebp,ecx
197*a1d25298Schristos	neg  ebp
198*a1d25298Schristos	add  ebp,ebx
199*a1d25298Schristos
200*a1d25298Schristos	sub  ecx,257
201*a1d25298Schristos	add  ecx,ebx
202*a1d25298Schristos
203*a1d25298Schristos	mov  [esp+60],ebx
204*a1d25298Schristos	mov  [esp+40],ebp
205*a1d25298Schristos	mov  [esp+16],ecx
206*a1d25298Schristos; 285 "inffast.S"
207*a1d25298Schristos	mov  eax, [edi+lencode_state]
208*a1d25298Schristos	mov  ecx, [edi+distcode_state]
209*a1d25298Schristos
210*a1d25298Schristos	mov  [esp+8],eax
211*a1d25298Schristos	mov  [esp+12],ecx
212*a1d25298Schristos
213*a1d25298Schristos	mov  eax,1
214*a1d25298Schristos	mov  ecx, [edi+lenbits_state]
215*a1d25298Schristos	shl  eax,cl
216*a1d25298Schristos	dec  eax
217*a1d25298Schristos	mov  [esp+0],eax
218*a1d25298Schristos
219*a1d25298Schristos	mov  eax,1
220*a1d25298Schristos	mov  ecx, [edi+distbits_state]
221*a1d25298Schristos	shl  eax,cl
222*a1d25298Schristos	dec  eax
223*a1d25298Schristos	mov  [esp+4],eax
224*a1d25298Schristos
225*a1d25298Schristos	mov  eax, [edi+wsize_state]
226*a1d25298Schristos	mov  ecx, [edi+write_state]
227*a1d25298Schristos	mov  edx, [edi+window_state]
228*a1d25298Schristos
229*a1d25298Schristos	mov  [esp+52],eax
230*a1d25298Schristos	mov  [esp+48],ecx
231*a1d25298Schristos	mov  [esp+56],edx
232*a1d25298Schristos
233*a1d25298Schristos	mov  ebp, [edi+hold_state]
234*a1d25298Schristos	mov  ebx, [edi+bits_state]
235*a1d25298Schristos; 321 "inffast.S"
236*a1d25298Schristos	mov  esi, [esp+44]
237*a1d25298Schristos	mov  ecx, [esp+20]
238*a1d25298Schristos	cmp  ecx,esi
239*a1d25298Schristos	ja   L_align_long
240*a1d25298Schristos
241*a1d25298Schristos	add  ecx,11
242*a1d25298Schristos	sub  ecx,esi
243*a1d25298Schristos	mov  eax,12
244*a1d25298Schristos	sub  eax,ecx
245*a1d25298Schristos	lea  edi, [esp+28]
246*a1d25298Schristos	rep movsb
247*a1d25298Schristos	mov  ecx,eax
248*a1d25298Schristos	xor  eax,eax
249*a1d25298Schristos	rep stosb
250*a1d25298Schristos	lea  esi, [esp+28]
251*a1d25298Schristos	mov  [esp+20],esi
252*a1d25298Schristos	jmp  L_is_aligned
253*a1d25298Schristos
254*a1d25298Schristos
255*a1d25298SchristosL_align_long:
256*a1d25298Schristos	test  esi,3
257*a1d25298Schristos	jz   L_is_aligned
258*a1d25298Schristos	xor  eax,eax
259*a1d25298Schristos	mov  al, [esi]
260*a1d25298Schristos	inc  esi
261*a1d25298Schristos	mov  ecx,ebx
262*a1d25298Schristos	add  ebx,8
263*a1d25298Schristos	shl  eax,cl
264*a1d25298Schristos	or  ebp,eax
265*a1d25298Schristos	jmp L_align_long
266*a1d25298Schristos
267*a1d25298SchristosL_is_aligned:
268*a1d25298Schristos	mov  edi, [esp+60]
269*a1d25298Schristos; 366 "inffast.S"
270*a1d25298SchristosL_check_mmx:
271*a1d25298Schristos	cmp  dword ptr [inflate_fast_use_mmx],2
272*a1d25298Schristos	je   L_init_mmx
273*a1d25298Schristos	ja   L_do_loop
274*a1d25298Schristos
275*a1d25298Schristos	push  eax
276*a1d25298Schristos	push  ebx
277*a1d25298Schristos	push  ecx
278*a1d25298Schristos	push  edx
279*a1d25298Schristos	pushfd
280*a1d25298Schristos	mov  eax, [esp]
281*a1d25298Schristos	xor  dword ptr [esp],0200000h
282*a1d25298Schristos
283*a1d25298Schristos
284*a1d25298Schristos
285*a1d25298Schristos
286*a1d25298Schristos	popfd
287*a1d25298Schristos	pushfd
288*a1d25298Schristos	pop  edx
289*a1d25298Schristos	xor  edx,eax
290*a1d25298Schristos	jz   L_dont_use_mmx
291*a1d25298Schristos	xor  eax,eax
292*a1d25298Schristos	cpuid
293*a1d25298Schristos	cmp  ebx,0756e6547h
294*a1d25298Schristos	jne  L_dont_use_mmx
295*a1d25298Schristos	cmp  ecx,06c65746eh
296*a1d25298Schristos	jne  L_dont_use_mmx
297*a1d25298Schristos	cmp  edx,049656e69h
298*a1d25298Schristos	jne  L_dont_use_mmx
299*a1d25298Schristos	mov  eax,1
300*a1d25298Schristos	cpuid
301*a1d25298Schristos	shr  eax,8
302*a1d25298Schristos	and  eax,15
303*a1d25298Schristos	cmp  eax,6
304*a1d25298Schristos	jne  L_dont_use_mmx
305*a1d25298Schristos	test  edx,0800000h
306*a1d25298Schristos	jnz  L_use_mmx
307*a1d25298Schristos	jmp  L_dont_use_mmx
308*a1d25298SchristosL_use_mmx:
309*a1d25298Schristos	mov  dword ptr [inflate_fast_use_mmx],2
310*a1d25298Schristos	jmp  L_check_mmx_pop
311*a1d25298SchristosL_dont_use_mmx:
312*a1d25298Schristos	mov  dword ptr [inflate_fast_use_mmx],3
313*a1d25298SchristosL_check_mmx_pop:
314*a1d25298Schristos	pop  edx
315*a1d25298Schristos	pop  ecx
316*a1d25298Schristos	pop  ebx
317*a1d25298Schristos	pop  eax
318*a1d25298Schristos	jmp  L_check_mmx
319*a1d25298Schristos; 426 "inffast.S"
320*a1d25298SchristosALIGN 4
321*a1d25298SchristosL_do_loop:
322*a1d25298Schristos; 437 "inffast.S"
323*a1d25298Schristos	cmp  bl,15
324*a1d25298Schristos	ja   L_get_length_code
325*a1d25298Schristos
326*a1d25298Schristos	xor  eax,eax
327*a1d25298Schristos	lodsw
328*a1d25298Schristos	mov  cl,bl
329*a1d25298Schristos	add  bl,16
330*a1d25298Schristos	shl  eax,cl
331*a1d25298Schristos	or  ebp,eax
332*a1d25298Schristos
333*a1d25298SchristosL_get_length_code:
334*a1d25298Schristos	mov  edx, [esp+0]
335*a1d25298Schristos	mov  ecx, [esp+8]
336*a1d25298Schristos	and  edx,ebp
337*a1d25298Schristos	mov  eax, [ecx+edx*4]
338*a1d25298Schristos
339*a1d25298SchristosL_dolen:
340*a1d25298Schristos
341*a1d25298Schristos
342*a1d25298Schristos
343*a1d25298Schristos
344*a1d25298Schristos
345*a1d25298Schristos
346*a1d25298Schristos	mov  cl,ah
347*a1d25298Schristos	sub  bl,ah
348*a1d25298Schristos	shr  ebp,cl
349*a1d25298Schristos
350*a1d25298Schristos
351*a1d25298Schristos
352*a1d25298Schristos
353*a1d25298Schristos
354*a1d25298Schristos
355*a1d25298Schristos	test  al,al
356*a1d25298Schristos	jnz   L_test_for_length_base
357*a1d25298Schristos
358*a1d25298Schristos	shr  eax,16
359*a1d25298Schristos	stosb
360*a1d25298Schristos
361*a1d25298SchristosL_while_test:
362*a1d25298Schristos
363*a1d25298Schristos
364*a1d25298Schristos	cmp  [esp+16],edi
365*a1d25298Schristos	jbe  L_break_loop
366*a1d25298Schristos
367*a1d25298Schristos	cmp  [esp+20],esi
368*a1d25298Schristos	ja   L_do_loop
369*a1d25298Schristos	jmp  L_break_loop
370*a1d25298Schristos
371*a1d25298SchristosL_test_for_length_base:
372*a1d25298Schristos; 502 "inffast.S"
373*a1d25298Schristos	mov  edx,eax
374*a1d25298Schristos	shr  edx,16
375*a1d25298Schristos	mov  cl,al
376*a1d25298Schristos
377*a1d25298Schristos	test  al,16
378*a1d25298Schristos	jz   L_test_for_second_level_length
379*a1d25298Schristos	and  cl,15
380*a1d25298Schristos	jz   L_save_len
381*a1d25298Schristos	cmp  bl,cl
382*a1d25298Schristos	jae  L_add_bits_to_len
383*a1d25298Schristos
384*a1d25298Schristos	mov  ch,cl
385*a1d25298Schristos	xor  eax,eax
386*a1d25298Schristos	lodsw
387*a1d25298Schristos	mov  cl,bl
388*a1d25298Schristos	add  bl,16
389*a1d25298Schristos	shl  eax,cl
390*a1d25298Schristos	or  ebp,eax
391*a1d25298Schristos	mov  cl,ch
392*a1d25298Schristos
393*a1d25298SchristosL_add_bits_to_len:
394*a1d25298Schristos	mov  eax,1
395*a1d25298Schristos	shl  eax,cl
396*a1d25298Schristos	dec  eax
397*a1d25298Schristos	sub  bl,cl
398*a1d25298Schristos	and  eax,ebp
399*a1d25298Schristos	shr  ebp,cl
400*a1d25298Schristos	add  edx,eax
401*a1d25298Schristos
402*a1d25298SchristosL_save_len:
403*a1d25298Schristos	mov  [esp+24],edx
404*a1d25298Schristos
405*a1d25298Schristos
406*a1d25298SchristosL_decode_distance:
407*a1d25298Schristos; 549 "inffast.S"
408*a1d25298Schristos	cmp  bl,15
409*a1d25298Schristos	ja   L_get_distance_code
410*a1d25298Schristos
411*a1d25298Schristos	xor  eax,eax
412*a1d25298Schristos	lodsw
413*a1d25298Schristos	mov  cl,bl
414*a1d25298Schristos	add  bl,16
415*a1d25298Schristos	shl  eax,cl
416*a1d25298Schristos	or  ebp,eax
417*a1d25298Schristos
418*a1d25298SchristosL_get_distance_code:
419*a1d25298Schristos	mov  edx, [esp+4]
420*a1d25298Schristos	mov  ecx, [esp+12]
421*a1d25298Schristos	and  edx,ebp
422*a1d25298Schristos	mov  eax, [ecx+edx*4]
423*a1d25298Schristos
424*a1d25298Schristos
425*a1d25298SchristosL_dodist:
426*a1d25298Schristos	mov  edx,eax
427*a1d25298Schristos	shr  edx,16
428*a1d25298Schristos	mov  cl,ah
429*a1d25298Schristos	sub  bl,ah
430*a1d25298Schristos	shr  ebp,cl
431*a1d25298Schristos; 584 "inffast.S"
432*a1d25298Schristos	mov  cl,al
433*a1d25298Schristos
434*a1d25298Schristos	test  al,16
435*a1d25298Schristos	jz  L_test_for_second_level_dist
436*a1d25298Schristos	and  cl,15
437*a1d25298Schristos	jz  L_check_dist_one
438*a1d25298Schristos	cmp  bl,cl
439*a1d25298Schristos	jae  L_add_bits_to_dist
440*a1d25298Schristos
441*a1d25298Schristos	mov  ch,cl
442*a1d25298Schristos	xor  eax,eax
443*a1d25298Schristos	lodsw
444*a1d25298Schristos	mov  cl,bl
445*a1d25298Schristos	add  bl,16
446*a1d25298Schristos	shl  eax,cl
447*a1d25298Schristos	or  ebp,eax
448*a1d25298Schristos	mov  cl,ch
449*a1d25298Schristos
450*a1d25298SchristosL_add_bits_to_dist:
451*a1d25298Schristos	mov  eax,1
452*a1d25298Schristos	shl  eax,cl
453*a1d25298Schristos	dec  eax
454*a1d25298Schristos	sub  bl,cl
455*a1d25298Schristos	and  eax,ebp
456*a1d25298Schristos	shr  ebp,cl
457*a1d25298Schristos	add  edx,eax
458*a1d25298Schristos	jmp  L_check_window
459*a1d25298Schristos
460*a1d25298SchristosL_check_window:
461*a1d25298Schristos; 625 "inffast.S"
462*a1d25298Schristos	mov  [esp+44],esi
463*a1d25298Schristos	mov  eax,edi
464*a1d25298Schristos	sub  eax, [esp+40]
465*a1d25298Schristos
466*a1d25298Schristos	cmp  eax,edx
467*a1d25298Schristos	jb   L_clip_window
468*a1d25298Schristos
469*a1d25298Schristos	mov  ecx, [esp+24]
470*a1d25298Schristos	mov  esi,edi
471*a1d25298Schristos	sub  esi,edx
472*a1d25298Schristos
473*a1d25298Schristos	sub  ecx,3
474*a1d25298Schristos	mov  al, [esi]
475*a1d25298Schristos	mov  [edi],al
476*a1d25298Schristos	mov  al, [esi+1]
477*a1d25298Schristos	mov  dl, [esi+2]
478*a1d25298Schristos	add  esi,3
479*a1d25298Schristos	mov  [edi+1],al
480*a1d25298Schristos	mov  [edi+2],dl
481*a1d25298Schristos	add  edi,3
482*a1d25298Schristos	rep movsb
483*a1d25298Schristos
484*a1d25298Schristos	mov  esi, [esp+44]
485*a1d25298Schristos	jmp  L_while_test
486*a1d25298Schristos
487*a1d25298SchristosALIGN 4
488*a1d25298SchristosL_check_dist_one:
489*a1d25298Schristos	cmp  edx,1
490*a1d25298Schristos	jne  L_check_window
491*a1d25298Schristos	cmp  [esp+40],edi
492*a1d25298Schristos	je  L_check_window
493*a1d25298Schristos
494*a1d25298Schristos	dec  edi
495*a1d25298Schristos	mov  ecx, [esp+24]
496*a1d25298Schristos	mov  al, [edi]
497*a1d25298Schristos	sub  ecx,3
498*a1d25298Schristos
499*a1d25298Schristos	mov  [edi+1],al
500*a1d25298Schristos	mov  [edi+2],al
501*a1d25298Schristos	mov  [edi+3],al
502*a1d25298Schristos	add  edi,4
503*a1d25298Schristos	rep stosb
504*a1d25298Schristos
505*a1d25298Schristos	jmp  L_while_test
506*a1d25298Schristos
507*a1d25298SchristosALIGN 4
508*a1d25298SchristosL_test_for_second_level_length:
509*a1d25298Schristos
510*a1d25298Schristos
511*a1d25298Schristos
512*a1d25298Schristos
513*a1d25298Schristos	test  al,64
514*a1d25298Schristos	jnz   L_test_for_end_of_block
515*a1d25298Schristos
516*a1d25298Schristos	mov  eax,1
517*a1d25298Schristos	shl  eax,cl
518*a1d25298Schristos	dec  eax
519*a1d25298Schristos	and  eax,ebp
520*a1d25298Schristos	add  eax,edx
521*a1d25298Schristos	mov  edx, [esp+8]
522*a1d25298Schristos	mov  eax, [edx+eax*4]
523*a1d25298Schristos	jmp  L_dolen
524*a1d25298Schristos
525*a1d25298SchristosALIGN 4
526*a1d25298SchristosL_test_for_second_level_dist:
527*a1d25298Schristos
528*a1d25298Schristos
529*a1d25298Schristos
530*a1d25298Schristos
531*a1d25298Schristos	test  al,64
532*a1d25298Schristos	jnz   L_invalid_distance_code
533*a1d25298Schristos
534*a1d25298Schristos	mov  eax,1
535*a1d25298Schristos	shl  eax,cl
536*a1d25298Schristos	dec  eax
537*a1d25298Schristos	and  eax,ebp
538*a1d25298Schristos	add  eax,edx
539*a1d25298Schristos	mov  edx, [esp+12]
540*a1d25298Schristos	mov  eax, [edx+eax*4]
541*a1d25298Schristos	jmp  L_dodist
542*a1d25298Schristos
543*a1d25298SchristosALIGN 4
544*a1d25298SchristosL_clip_window:
545*a1d25298Schristos; 721 "inffast.S"
546*a1d25298Schristos	mov  ecx,eax
547*a1d25298Schristos	mov  eax, [esp+52]
548*a1d25298Schristos	neg  ecx
549*a1d25298Schristos	mov  esi, [esp+56]
550*a1d25298Schristos
551*a1d25298Schristos	cmp  eax,edx
552*a1d25298Schristos	jb   L_invalid_distance_too_far
553*a1d25298Schristos
554*a1d25298Schristos	add  ecx,edx
555*a1d25298Schristos	cmp  dword ptr [esp+48],0
556*a1d25298Schristos	jne  L_wrap_around_window
557*a1d25298Schristos
558*a1d25298Schristos	sub  eax,ecx
559*a1d25298Schristos	add  esi,eax
560*a1d25298Schristos; 749 "inffast.S"
561*a1d25298Schristos	mov  eax, [esp+24]
562*a1d25298Schristos	cmp  eax,ecx
563*a1d25298Schristos	jbe  L_do_copy1
564*a1d25298Schristos
565*a1d25298Schristos	sub  eax,ecx
566*a1d25298Schristos	rep movsb
567*a1d25298Schristos	mov  esi,edi
568*a1d25298Schristos	sub  esi,edx
569*a1d25298Schristos	jmp  L_do_copy1
570*a1d25298Schristos
571*a1d25298Schristos	cmp  eax,ecx
572*a1d25298Schristos	jbe  L_do_copy1
573*a1d25298Schristos
574*a1d25298Schristos	sub  eax,ecx
575*a1d25298Schristos	rep movsb
576*a1d25298Schristos	mov  esi,edi
577*a1d25298Schristos	sub  esi,edx
578*a1d25298Schristos	jmp  L_do_copy1
579*a1d25298Schristos
580*a1d25298SchristosL_wrap_around_window:
581*a1d25298Schristos; 793 "inffast.S"
582*a1d25298Schristos	mov  eax, [esp+48]
583*a1d25298Schristos	cmp  ecx,eax
584*a1d25298Schristos	jbe  L_contiguous_in_window
585*a1d25298Schristos
586*a1d25298Schristos	add  esi, [esp+52]
587*a1d25298Schristos	add  esi,eax
588*a1d25298Schristos	sub  esi,ecx
589*a1d25298Schristos	sub  ecx,eax
590*a1d25298Schristos
591*a1d25298Schristos
592*a1d25298Schristos	mov  eax, [esp+24]
593*a1d25298Schristos	cmp  eax,ecx
594*a1d25298Schristos	jbe  L_do_copy1
595*a1d25298Schristos
596*a1d25298Schristos	sub  eax,ecx
597*a1d25298Schristos	rep movsb
598*a1d25298Schristos	mov  esi, [esp+56]
599*a1d25298Schristos	mov  ecx, [esp+48]
600*a1d25298Schristos	cmp  eax,ecx
601*a1d25298Schristos	jbe  L_do_copy1
602*a1d25298Schristos
603*a1d25298Schristos	sub  eax,ecx
604*a1d25298Schristos	rep movsb
605*a1d25298Schristos	mov  esi,edi
606*a1d25298Schristos	sub  esi,edx
607*a1d25298Schristos	jmp  L_do_copy1
608*a1d25298Schristos
609*a1d25298SchristosL_contiguous_in_window:
610*a1d25298Schristos; 836 "inffast.S"
611*a1d25298Schristos	add  esi,eax
612*a1d25298Schristos	sub  esi,ecx
613*a1d25298Schristos
614*a1d25298Schristos
615*a1d25298Schristos	mov  eax, [esp+24]
616*a1d25298Schristos	cmp  eax,ecx
617*a1d25298Schristos	jbe  L_do_copy1
618*a1d25298Schristos
619*a1d25298Schristos	sub  eax,ecx
620*a1d25298Schristos	rep movsb
621*a1d25298Schristos	mov  esi,edi
622*a1d25298Schristos	sub  esi,edx
623*a1d25298Schristos
624*a1d25298SchristosL_do_copy1:
625*a1d25298Schristos; 862 "inffast.S"
626*a1d25298Schristos	mov  ecx,eax
627*a1d25298Schristos	rep movsb
628*a1d25298Schristos
629*a1d25298Schristos	mov  esi, [esp+44]
630*a1d25298Schristos	jmp  L_while_test
631*a1d25298Schristos; 878 "inffast.S"
632*a1d25298SchristosALIGN 4
633*a1d25298SchristosL_init_mmx:
634*a1d25298Schristos	emms
635*a1d25298Schristos
636*a1d25298Schristos
637*a1d25298Schristos
638*a1d25298Schristos
639*a1d25298Schristos
640*a1d25298Schristos	movd mm0,ebp
641*a1d25298Schristos	mov  ebp,ebx
642*a1d25298Schristos; 896 "inffast.S"
643*a1d25298Schristos	movd mm4,dword ptr [esp+0]
644*a1d25298Schristos	movq mm3,mm4
645*a1d25298Schristos	movd mm5,dword ptr [esp+4]
646*a1d25298Schristos	movq mm2,mm5
647*a1d25298Schristos	pxor mm1,mm1
648*a1d25298Schristos	mov  ebx, [esp+8]
649*a1d25298Schristos	jmp  L_do_loop_mmx
650*a1d25298Schristos
651*a1d25298SchristosALIGN 4
652*a1d25298SchristosL_do_loop_mmx:
653*a1d25298Schristos	psrlq mm0,mm1
654*a1d25298Schristos
655*a1d25298Schristos	cmp  ebp,32
656*a1d25298Schristos	ja  L_get_length_code_mmx
657*a1d25298Schristos
658*a1d25298Schristos	movd mm6,ebp
659*a1d25298Schristos	movd mm7,dword ptr [esi]
660*a1d25298Schristos	add  esi,4
661*a1d25298Schristos	psllq mm7,mm6
662*a1d25298Schristos	add  ebp,32
663*a1d25298Schristos	por mm0,mm7
664*a1d25298Schristos
665*a1d25298SchristosL_get_length_code_mmx:
666*a1d25298Schristos	pand mm4,mm0
667*a1d25298Schristos	movd eax,mm4
668*a1d25298Schristos	movq mm4,mm3
669*a1d25298Schristos	mov  eax, [ebx+eax*4]
670*a1d25298Schristos
671*a1d25298SchristosL_dolen_mmx:
672*a1d25298Schristos	movzx  ecx,ah
673*a1d25298Schristos	movd mm1,ecx
674*a1d25298Schristos	sub  ebp,ecx
675*a1d25298Schristos
676*a1d25298Schristos	test  al,al
677*a1d25298Schristos	jnz L_test_for_length_base_mmx
678*a1d25298Schristos
679*a1d25298Schristos	shr  eax,16
680*a1d25298Schristos	stosb
681*a1d25298Schristos
682*a1d25298SchristosL_while_test_mmx:
683*a1d25298Schristos
684*a1d25298Schristos
685*a1d25298Schristos	cmp  [esp+16],edi
686*a1d25298Schristos	jbe L_break_loop
687*a1d25298Schristos
688*a1d25298Schristos	cmp  [esp+20],esi
689*a1d25298Schristos	ja L_do_loop_mmx
690*a1d25298Schristos	jmp L_break_loop
691*a1d25298Schristos
692*a1d25298SchristosL_test_for_length_base_mmx:
693*a1d25298Schristos
694*a1d25298Schristos	mov  edx,eax
695*a1d25298Schristos	shr  edx,16
696*a1d25298Schristos
697*a1d25298Schristos	test  al,16
698*a1d25298Schristos	jz  L_test_for_second_level_length_mmx
699*a1d25298Schristos	and  eax,15
700*a1d25298Schristos	jz L_decode_distance_mmx
701*a1d25298Schristos
702*a1d25298Schristos	psrlq mm0,mm1
703*a1d25298Schristos	movd mm1,eax
704*a1d25298Schristos	movd ecx,mm0
705*a1d25298Schristos	sub  ebp,eax
706*a1d25298Schristos	and  ecx, [inflate_fast_mask+eax*4]
707*a1d25298Schristos	add  edx,ecx
708*a1d25298Schristos
709*a1d25298SchristosL_decode_distance_mmx:
710*a1d25298Schristos	psrlq mm0,mm1
711*a1d25298Schristos
712*a1d25298Schristos	cmp  ebp,32
713*a1d25298Schristos	ja L_get_dist_code_mmx
714*a1d25298Schristos
715*a1d25298Schristos	movd mm6,ebp
716*a1d25298Schristos	movd mm7,dword ptr [esi]
717*a1d25298Schristos	add  esi,4
718*a1d25298Schristos	psllq mm7,mm6
719*a1d25298Schristos	add  ebp,32
720*a1d25298Schristos	por mm0,mm7
721*a1d25298Schristos
722*a1d25298SchristosL_get_dist_code_mmx:
723*a1d25298Schristos	mov  ebx, [esp+12]
724*a1d25298Schristos	pand mm5,mm0
725*a1d25298Schristos	movd eax,mm5
726*a1d25298Schristos	movq mm5,mm2
727*a1d25298Schristos	mov  eax, [ebx+eax*4]
728*a1d25298Schristos
729*a1d25298SchristosL_dodist_mmx:
730*a1d25298Schristos
731*a1d25298Schristos	movzx  ecx,ah
732*a1d25298Schristos	mov  ebx,eax
733*a1d25298Schristos	shr  ebx,16
734*a1d25298Schristos	sub  ebp,ecx
735*a1d25298Schristos	movd mm1,ecx
736*a1d25298Schristos
737*a1d25298Schristos	test  al,16
738*a1d25298Schristos	jz L_test_for_second_level_dist_mmx
739*a1d25298Schristos	and  eax,15
740*a1d25298Schristos	jz L_check_dist_one_mmx
741*a1d25298Schristos
742*a1d25298SchristosL_add_bits_to_dist_mmx:
743*a1d25298Schristos	psrlq mm0,mm1
744*a1d25298Schristos	movd mm1,eax
745*a1d25298Schristos	movd ecx,mm0
746*a1d25298Schristos	sub  ebp,eax
747*a1d25298Schristos	and  ecx, [inflate_fast_mask+eax*4]
748*a1d25298Schristos	add  ebx,ecx
749*a1d25298Schristos
750*a1d25298SchristosL_check_window_mmx:
751*a1d25298Schristos	mov  [esp+44],esi
752*a1d25298Schristos	mov  eax,edi
753*a1d25298Schristos	sub  eax, [esp+40]
754*a1d25298Schristos
755*a1d25298Schristos	cmp  eax,ebx
756*a1d25298Schristos	jb L_clip_window_mmx
757*a1d25298Schristos
758*a1d25298Schristos	mov  ecx,edx
759*a1d25298Schristos	mov  esi,edi
760*a1d25298Schristos	sub  esi,ebx
761*a1d25298Schristos
762*a1d25298Schristos	sub  ecx,3
763*a1d25298Schristos	mov  al, [esi]
764*a1d25298Schristos	mov  [edi],al
765*a1d25298Schristos	mov  al, [esi+1]
766*a1d25298Schristos	mov  dl, [esi+2]
767*a1d25298Schristos	add  esi,3
768*a1d25298Schristos	mov  [edi+1],al
769*a1d25298Schristos	mov  [edi+2],dl
770*a1d25298Schristos	add  edi,3
771*a1d25298Schristos	rep movsb
772*a1d25298Schristos
773*a1d25298Schristos	mov  esi, [esp+44]
774*a1d25298Schristos	mov  ebx, [esp+8]
775*a1d25298Schristos	jmp  L_while_test_mmx
776*a1d25298Schristos
777*a1d25298SchristosALIGN 4
778*a1d25298SchristosL_check_dist_one_mmx:
779*a1d25298Schristos	cmp  ebx,1
780*a1d25298Schristos	jne  L_check_window_mmx
781*a1d25298Schristos	cmp  [esp+40],edi
782*a1d25298Schristos	je   L_check_window_mmx
783*a1d25298Schristos
784*a1d25298Schristos	dec  edi
785*a1d25298Schristos	mov  ecx,edx
786*a1d25298Schristos	mov  al, [edi]
787*a1d25298Schristos	sub  ecx,3
788*a1d25298Schristos
789*a1d25298Schristos	mov  [edi+1],al
790*a1d25298Schristos	mov  [edi+2],al
791*a1d25298Schristos	mov  [edi+3],al
792*a1d25298Schristos	add  edi,4
793*a1d25298Schristos	rep stosb
794*a1d25298Schristos
795*a1d25298Schristos	mov  ebx, [esp+8]
796*a1d25298Schristos	jmp  L_while_test_mmx
797*a1d25298Schristos
798*a1d25298SchristosALIGN 4
799*a1d25298SchristosL_test_for_second_level_length_mmx:
800*a1d25298Schristos	test  al,64
801*a1d25298Schristos	jnz L_test_for_end_of_block
802*a1d25298Schristos
803*a1d25298Schristos	and  eax,15
804*a1d25298Schristos	psrlq mm0,mm1
805*a1d25298Schristos	movd ecx,mm0
806*a1d25298Schristos	and  ecx, [inflate_fast_mask+eax*4]
807*a1d25298Schristos	add  ecx,edx
808*a1d25298Schristos	mov  eax, [ebx+ecx*4]
809*a1d25298Schristos	jmp L_dolen_mmx
810*a1d25298Schristos
811*a1d25298SchristosALIGN 4
812*a1d25298SchristosL_test_for_second_level_dist_mmx:
813*a1d25298Schristos	test  al,64
814*a1d25298Schristos	jnz L_invalid_distance_code
815*a1d25298Schristos
816*a1d25298Schristos	and  eax,15
817*a1d25298Schristos	psrlq mm0,mm1
818*a1d25298Schristos	movd ecx,mm0
819*a1d25298Schristos	and  ecx, [inflate_fast_mask+eax*4]
820*a1d25298Schristos	mov  eax, [esp+12]
821*a1d25298Schristos	add  ecx,ebx
822*a1d25298Schristos	mov  eax, [eax+ecx*4]
823*a1d25298Schristos	jmp  L_dodist_mmx
824*a1d25298Schristos
825*a1d25298SchristosALIGN 4
826*a1d25298SchristosL_clip_window_mmx:
827*a1d25298Schristos
828*a1d25298Schristos	mov  ecx,eax
829*a1d25298Schristos	mov  eax, [esp+52]
830*a1d25298Schristos	neg  ecx
831*a1d25298Schristos	mov  esi, [esp+56]
832*a1d25298Schristos
833*a1d25298Schristos	cmp  eax,ebx
834*a1d25298Schristos	jb  L_invalid_distance_too_far
835*a1d25298Schristos
836*a1d25298Schristos	add  ecx,ebx
837*a1d25298Schristos	cmp  dword ptr [esp+48],0
838*a1d25298Schristos	jne  L_wrap_around_window_mmx
839*a1d25298Schristos
840*a1d25298Schristos	sub  eax,ecx
841*a1d25298Schristos	add  esi,eax
842*a1d25298Schristos
843*a1d25298Schristos	cmp  edx,ecx
844*a1d25298Schristos	jbe  L_do_copy1_mmx
845*a1d25298Schristos
846*a1d25298Schristos	sub  edx,ecx
847*a1d25298Schristos	rep movsb
848*a1d25298Schristos	mov  esi,edi
849*a1d25298Schristos	sub  esi,ebx
850*a1d25298Schristos	jmp  L_do_copy1_mmx
851*a1d25298Schristos
852*a1d25298Schristos	cmp  edx,ecx
853*a1d25298Schristos	jbe  L_do_copy1_mmx
854*a1d25298Schristos
855*a1d25298Schristos	sub  edx,ecx
856*a1d25298Schristos	rep movsb
857*a1d25298Schristos	mov  esi,edi
858*a1d25298Schristos	sub  esi,ebx
859*a1d25298Schristos	jmp  L_do_copy1_mmx
860*a1d25298Schristos
861*a1d25298SchristosL_wrap_around_window_mmx:
862*a1d25298Schristos
863*a1d25298Schristos	mov  eax, [esp+48]
864*a1d25298Schristos	cmp  ecx,eax
865*a1d25298Schristos	jbe  L_contiguous_in_window_mmx
866*a1d25298Schristos
867*a1d25298Schristos	add  esi, [esp+52]
868*a1d25298Schristos	add  esi,eax
869*a1d25298Schristos	sub  esi,ecx
870*a1d25298Schristos	sub  ecx,eax
871*a1d25298Schristos
872*a1d25298Schristos
873*a1d25298Schristos	cmp  edx,ecx
874*a1d25298Schristos	jbe  L_do_copy1_mmx
875*a1d25298Schristos
876*a1d25298Schristos	sub  edx,ecx
877*a1d25298Schristos	rep movsb
878*a1d25298Schristos	mov  esi, [esp+56]
879*a1d25298Schristos	mov  ecx, [esp+48]
880*a1d25298Schristos	cmp  edx,ecx
881*a1d25298Schristos	jbe  L_do_copy1_mmx
882*a1d25298Schristos
883*a1d25298Schristos	sub  edx,ecx
884*a1d25298Schristos	rep movsb
885*a1d25298Schristos	mov  esi,edi
886*a1d25298Schristos	sub  esi,ebx
887*a1d25298Schristos	jmp  L_do_copy1_mmx
888*a1d25298Schristos
889*a1d25298SchristosL_contiguous_in_window_mmx:
890*a1d25298Schristos
891*a1d25298Schristos	add  esi,eax
892*a1d25298Schristos	sub  esi,ecx
893*a1d25298Schristos
894*a1d25298Schristos
895*a1d25298Schristos	cmp  edx,ecx
896*a1d25298Schristos	jbe  L_do_copy1_mmx
897*a1d25298Schristos
898*a1d25298Schristos	sub  edx,ecx
899*a1d25298Schristos	rep movsb
900*a1d25298Schristos	mov  esi,edi
901*a1d25298Schristos	sub  esi,ebx
902*a1d25298Schristos
903*a1d25298SchristosL_do_copy1_mmx:
904*a1d25298Schristos
905*a1d25298Schristos
906*a1d25298Schristos	mov  ecx,edx
907*a1d25298Schristos	rep movsb
908*a1d25298Schristos
909*a1d25298Schristos	mov  esi, [esp+44]
910*a1d25298Schristos	mov  ebx, [esp+8]
911*a1d25298Schristos	jmp  L_while_test_mmx
912*a1d25298Schristos; 1174 "inffast.S"
913*a1d25298SchristosL_invalid_distance_code:
914*a1d25298Schristos
915*a1d25298Schristos
916*a1d25298Schristos
917*a1d25298Schristos
918*a1d25298Schristos
919*a1d25298Schristos	mov  ecx, invalid_distance_code_msg
920*a1d25298Schristos	mov  edx,INFLATE_MODE_BAD
921*a1d25298Schristos	jmp  L_update_stream_state
922*a1d25298Schristos
923*a1d25298SchristosL_test_for_end_of_block:
924*a1d25298Schristos
925*a1d25298Schristos
926*a1d25298Schristos
927*a1d25298Schristos
928*a1d25298Schristos
929*a1d25298Schristos	test  al,32
930*a1d25298Schristos	jz  L_invalid_literal_length_code
931*a1d25298Schristos
932*a1d25298Schristos	mov  ecx,0
933*a1d25298Schristos	mov  edx,INFLATE_MODE_TYPE
934*a1d25298Schristos	jmp  L_update_stream_state
935*a1d25298Schristos
936*a1d25298SchristosL_invalid_literal_length_code:
937*a1d25298Schristos
938*a1d25298Schristos
939*a1d25298Schristos
940*a1d25298Schristos
941*a1d25298Schristos
942*a1d25298Schristos	mov  ecx, invalid_literal_length_code_msg
943*a1d25298Schristos	mov  edx,INFLATE_MODE_BAD
944*a1d25298Schristos	jmp  L_update_stream_state
945*a1d25298Schristos
946*a1d25298SchristosL_invalid_distance_too_far:
947*a1d25298Schristos
948*a1d25298Schristos
949*a1d25298Schristos
950*a1d25298Schristos	mov  esi, [esp+44]
951*a1d25298Schristos	mov  ecx, invalid_distance_too_far_msg
952*a1d25298Schristos	mov  edx,INFLATE_MODE_BAD
953*a1d25298Schristos	jmp  L_update_stream_state
954*a1d25298Schristos
955*a1d25298SchristosL_update_stream_state:
956*a1d25298Schristos
957*a1d25298Schristos	mov  eax, [esp+88]
958*a1d25298Schristos	test  ecx,ecx
959*a1d25298Schristos	jz  L_skip_msg
960*a1d25298Schristos	mov  [eax+24],ecx
961*a1d25298SchristosL_skip_msg:
962*a1d25298Schristos	mov  eax, [eax+28]
963*a1d25298Schristos	mov  [eax+mode_state],edx
964*a1d25298Schristos	jmp  L_break_loop
965*a1d25298Schristos
966*a1d25298SchristosALIGN 4
967*a1d25298SchristosL_break_loop:
968*a1d25298Schristos; 1243 "inffast.S"
969*a1d25298Schristos	cmp  dword ptr [inflate_fast_use_mmx],2
970*a1d25298Schristos	jne  L_update_next_in
971*a1d25298Schristos
972*a1d25298Schristos
973*a1d25298Schristos
974*a1d25298Schristos	mov  ebx,ebp
975*a1d25298Schristos
976*a1d25298SchristosL_update_next_in:
977*a1d25298Schristos; 1266 "inffast.S"
978*a1d25298Schristos	mov  eax, [esp+88]
979*a1d25298Schristos	mov  ecx,ebx
980*a1d25298Schristos	mov  edx, [eax+28]
981*a1d25298Schristos	shr  ecx,3
982*a1d25298Schristos	sub  esi,ecx
983*a1d25298Schristos	shl  ecx,3
984*a1d25298Schristos	sub  ebx,ecx
985*a1d25298Schristos	mov  [eax+12],edi
986*a1d25298Schristos	mov  [edx+bits_state],ebx
987*a1d25298Schristos	mov  ecx,ebx
988*a1d25298Schristos
989*a1d25298Schristos	lea  ebx, [esp+28]
990*a1d25298Schristos	cmp  [esp+20],ebx
991*a1d25298Schristos	jne  L_buf_not_used
992*a1d25298Schristos
993*a1d25298Schristos	sub  esi,ebx
994*a1d25298Schristos	mov  ebx, [eax+0]
995*a1d25298Schristos	mov  [esp+20],ebx
996*a1d25298Schristos	add  esi,ebx
997*a1d25298Schristos	mov  ebx, [eax+4]
998*a1d25298Schristos	sub  ebx,11
999*a1d25298Schristos	add  [esp+20],ebx
1000*a1d25298Schristos
1001*a1d25298SchristosL_buf_not_used:
1002*a1d25298Schristos	mov  [eax+0],esi
1003*a1d25298Schristos
1004*a1d25298Schristos	mov  ebx,1
1005*a1d25298Schristos	shl  ebx,cl
1006*a1d25298Schristos	dec  ebx
1007*a1d25298Schristos
1008*a1d25298Schristos
1009*a1d25298Schristos
1010*a1d25298Schristos
1011*a1d25298Schristos
1012*a1d25298Schristos	cmp  dword ptr [inflate_fast_use_mmx],2
1013*a1d25298Schristos	jne  L_update_hold
1014*a1d25298Schristos
1015*a1d25298Schristos
1016*a1d25298Schristos
1017*a1d25298Schristos	psrlq mm0,mm1
1018*a1d25298Schristos	movd ebp,mm0
1019*a1d25298Schristos
1020*a1d25298Schristos	emms
1021*a1d25298Schristos
1022*a1d25298SchristosL_update_hold:
1023*a1d25298Schristos
1024*a1d25298Schristos
1025*a1d25298Schristos
1026*a1d25298Schristos	and  ebp,ebx
1027*a1d25298Schristos	mov  [edx+hold_state],ebp
1028*a1d25298Schristos
1029*a1d25298Schristos
1030*a1d25298Schristos
1031*a1d25298Schristos
1032*a1d25298Schristos	mov  ebx, [esp+20]
1033*a1d25298Schristos	cmp  ebx,esi
1034*a1d25298Schristos	jbe  L_last_is_smaller
1035*a1d25298Schristos
1036*a1d25298Schristos	sub  ebx,esi
1037*a1d25298Schristos	add  ebx,11
1038*a1d25298Schristos	mov  [eax+4],ebx
1039*a1d25298Schristos	jmp  L_fixup_out
1040*a1d25298SchristosL_last_is_smaller:
1041*a1d25298Schristos	sub  esi,ebx
1042*a1d25298Schristos	neg  esi
1043*a1d25298Schristos	add  esi,11
1044*a1d25298Schristos	mov  [eax+4],esi
1045*a1d25298Schristos
1046*a1d25298Schristos
1047*a1d25298Schristos
1048*a1d25298Schristos
1049*a1d25298SchristosL_fixup_out:
1050*a1d25298Schristos
1051*a1d25298Schristos	mov  ebx, [esp+16]
1052*a1d25298Schristos	cmp  ebx,edi
1053*a1d25298Schristos	jbe  L_end_is_smaller
1054*a1d25298Schristos
1055*a1d25298Schristos	sub  ebx,edi
1056*a1d25298Schristos	add  ebx,257
1057*a1d25298Schristos	mov  [eax+16],ebx
1058*a1d25298Schristos	jmp  L_done
1059*a1d25298SchristosL_end_is_smaller:
1060*a1d25298Schristos	sub  edi,ebx
1061*a1d25298Schristos	neg  edi
1062*a1d25298Schristos	add  edi,257
1063*a1d25298Schristos	mov  [eax+16],edi
1064*a1d25298Schristos
1065*a1d25298Schristos
1066*a1d25298Schristos
1067*a1d25298Schristos
1068*a1d25298Schristos
1069*a1d25298SchristosL_done:
1070*a1d25298Schristos	add  esp,64
1071*a1d25298Schristos	popfd
1072*a1d25298Schristos	pop  ebx
1073*a1d25298Schristos	pop  ebp
1074*a1d25298Schristos	pop  esi
1075*a1d25298Schristos	pop  edi
1076*a1d25298Schristos	ret
1077*a1d25298Schristos_inflate_fast endp
1078*a1d25298Schristos
1079*a1d25298Schristos_TEXT	ends
1080*a1d25298Schristosend
1081