1 /* OpenCP Module Player
2  * copyright (c) 2010 Stian Skjelstad <stian@nixia.no>
3  *
4  * ASM emulated routines for FPU mixer
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19  */
20 
21 #include "asm_emu/x86.h"
22 dwmixfa_state_t dwmixfa_state;
23 
24 /*#define ASM_DEBUG 1*/
25 #ifdef ASM_DEBUG
26 #include <stdarg.h>
27 #include <stdio.h>
debug_printf(const char * format,...)28 static void debug_printf(const char* format, ...)
29 {
30         va_list args;
31 
32 	fprintf(stderr, "[dwmixfa.c]: ");
33 	va_start(args, format);
34 	vfprintf(stderr, format, args);
35 	va_end(args);
36 
37 }
38 #else
39 #define debug_printf(format, args...) ((void)0)
40 #endif
41 
42 
43 #define MAXVOICES MIXF_MAXCHAN
44 #define FLAG_DISABLED (~MIXF_PLAYING)
45 #if 0
46 float   *tempbuf;               /* pointer to 32 bit mix buffer (nsamples * 4) */
47 void    *outbuf;                /* pointer to mixed buffer (nsamples * 2) */
48 uint32_t nsamples;              /* # of samples to mix */
49 uint32_t nvoices;               /* # of voices to mix */
50 uint32_t freqw[MAXVOICES];      /* frequency (whole part) */
51 uint32_t freqf[MAXVOICES];      /* frequency (fractional part) */
52 float   *smpposw[MAXVOICES];    /* sample position (whole part (pointer!)) */
53 uint32_t smpposf[MAXVOICES];    /* sample position (fractional part) */
54 float   *loopend[MAXVOICES];    /* pointer to loop end */
55 uint32_t looplen[MAXVOICES];    /* loop length in samples */
56 float    volleft[MAXVOICES];    /* float: left volume (1.0=normal) */
57 float    volright[MAXVOICES];   /* float: rite volume (1.0=normal) */
58 float    rampleft[MAXVOICES];   /* float: left volramp (dvol/sample) */
59 float    rampright[MAXVOICES];  /* float: rite volramp (dvol/sample) */
60 uint32_t voiceflags[MAXVOICES]; /* voice status flags */
61 float    ffreq[MAXVOICES];      /* filter frequency (0<=x<=1) */
62 float    freso[MAXVOICES];      /* filter resonance (0<=x<1) */
63 float    fadeleft=0.0;          /* 0 */
64 float    fl1[MAXVOICES];        /* filter lp buffer */
65 float    fb1[MAXVOICES];        /* filter bp buffer */
66 float    faderight=0.0;         /* 0 */
67 int      isstereo;              /* flag for stereo output */
68 int      outfmt;                /* output format */
69 float    voll=0.0;
70 float    volr=0.0;
71 float    ct0[256];              /* interpolation tab for s[-1] */
72 float    ct1[256];              /* interpolation tab for s[0] */
73 float    ct2[256];              /* interpolation tab for s[1] */
74 float    ct3[256];              /* interpolation tab for s[2] */
75 struct mixfpostprocregstruct *postprocs;
76                                 /* pointer to postproc list */
77 uint32_t samprate;              /* sampling rate */
78 
79 
80 
81 static float volrl;
82 static float volrr;
83 #if 0
84 static float eins=1.0;
85 #endif
86 #endif
87 static const float minuseins=-1.0;
88 static const float clampmax=32767.0;
89 static const float clampmin=-32767.0;
90 static const float cremoveconst=0.992;
91 static const float minampl=0.0001; /* what the fuck? why is this a float? - stian */
92 #if 0
93 static uint32_t magic1;  /* 32bit in assembler used */
94 static uint16_t clipval; /* 16bit in assembler used */
95 static uint32_t mixlooplen; /* 32bit in assembler used, decimal. lenght of loop in samples*/
96 static uint32_t __attribute__ ((used)) looptype; /* 32bit in assembler used, local version of voiceflags[N] */
97 static float __attribute__ ((used)) ffrq;
98 static float __attribute__ ((used)) frez;
99 static float __attribute__ ((used)) __fl1;
100 static float __attribute__ ((used)) __fb1;
101 
102 #endif
103 
104 typedef void(*clippercall)(float *input, void *output, uint_fast32_t count);
105 
106 static void clip_16s(float *input, void *output, uint_fast32_t count);
107 static void clip_16u(float *input, void *output, uint_fast32_t count);
108 static void clip_8s(float *input, void *output, uint_fast32_t count);
109 static void clip_8u(float *input, void *output, uint_fast32_t count);
110 
111 static const clippercall clippers[4] = {clip_8s, clip_8u, clip_16s, clip_16u};
112 
113 /* additional data come from globals:
114 	mixlooplen = length of sample loop  R
115 	volr                                R
116 	voll                                R
117 	fadeleft                            R
118 	faderight                           R
119 	looptype = sample flags             RW
120 */
121 typedef void(*mixercall)(float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend);
122 static void mix_0   (float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend);
123 static void mixm_n  (float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend);
124 static void mixs_n  (float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend);
125 static void mixm_i  (float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend);
126 static void mixs_i  (float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend);
127 static void mixm_i2 (float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend);
128 static void mixs_i2 (float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend);
129 static void mixm_nf (float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend);
130 static void mixs_nf (float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend);
131 static void mixm_if (float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend);
132 static void mixs_if (float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend);
133 static void mixm_i2f(float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend);
134 static void mixs_i2f(float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend);
135 
136 static const mixercall mixers[16] = {
137 	mixm_n,   mixs_n,   mixm_i,  mixs_i,
138 	mixm_i2,  mixs_i2,  mix_0,   mix_0,
139 	mixm_nf,  mixs_nf,  mixm_if, mixs_if,
140 	mixm_i2f, mixs_i2f, mix_0,   mix_0
141 };
142 
writecallback(uint_fast16_t selector,uint_fast32_t addr,int size,uint_fast32_t data)143 static void writecallback(uint_fast16_t selector, uint_fast32_t addr, int size, uint_fast32_t data)
144 {
145 }
146 
readcallback(uint_fast16_t selector,uint_fast32_t addr,int size)147 static uint_fast32_t readcallback(uint_fast16_t selector, uint_fast32_t addr, int size)
148 {
149 	return 0;
150 }
151 
prepare_mixer(void)152 void prepare_mixer (void)
153 {
154 	struct assembler_state_t state;
155 
156 	init_assembler_state(&state, writecallback, readcallback);
157 	asm_xorl(&state, state.eax, &state.eax);
158 	asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.fadeleft);
159 	asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.faderight);
160 	asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.volrl);
161 	asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.volrr);
162 	asm_xorl(&state, state.ecx, &state.ecx);
163 prepare_mixer_fillloop:
164 	asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.volleft[state.ecx]);
165 	asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.volright[state.ecx]);
166 	asm_incl(&state, &state.ecx);
167 	asm_cmpl(&state, MAXVOICES, state.ecx);
168 	asm_jne(&state, prepare_mixer_fillloop);
169 }
170 
clearbufm(float ** edi_buffer,uint32_t * count)171 static inline void clearbufm(float **edi_buffer, uint32_t *count)
172 {
173 	struct assembler_state_t state;
174 
175 	debug_printf("clearbufm {\n");
176 
177 	init_assembler_state(&state, writecallback, readcallback);
178 	asm_movl(&state, 0x12345678/**edi_buffer*/, &state.edi);
179 	asm_movl(&state, *count, &state.ecx);
180 
181 	asm_flds(&state, cremoveconst);
182 	asm_flds(&state, dwmixfa_state.fadeleft);
183 clearbufm_clloop:
184 		asm_fsts(&state, *edi_buffer+0);
185 		asm_fmul(&state, 1, 0);
186 		asm_leal(&state, state.edi+4, &state.edi); *edi_buffer+=1;
187 		asm_decl(&state, &state.ecx);
188 	asm_jnz(&state, clearbufm_clloop);
189 
190 	asm_fstps(&state, &dwmixfa_state.fadeleft);
191 	asm_fstp_st(&state, 0);
192 
193 	asm_movl(&state, state.ecx, count);
194 	debug_printf("}\n");
195 }
196 
clearbufs(float ** edi_buffer,uint32_t * count)197 static inline void clearbufs(float **edi_buffer, uint32_t *count)
198 {
199 	struct assembler_state_t state;
200 
201 	debug_printf("clearbufs {\n");
202 
203 	init_assembler_state(&state, writecallback, readcallback);
204 	asm_movl(&state, 0x12345678/**edi_buffer*/, &state.edi);
205 	asm_movl(&state, *count, &state.ecx);
206 
207 	asm_flds(&state, cremoveconst);
208 	asm_flds(&state, dwmixfa_state.faderight);
209 	asm_flds(&state, dwmixfa_state.fadeleft);
210 clearbufs_clloop:
211 		asm_fsts(&state, *edi_buffer+0);
212 		asm_fmul(&state, 2, 0);
213 		asm_fxch_st(&state, 1);
214 		asm_fsts(&state, *edi_buffer+1);
215 		asm_fmul(&state, 2, 0);
216 		asm_fxch_st(&state, 1);
217 		asm_leal(&state, state.edi+8, &state.edi); *edi_buffer+=2;
218 		asm_decl(&state, &state.ecx);
219 	asm_jnz(&state, clearbufs_clloop);
220 	asm_fstps(&state, &dwmixfa_state.fadeleft);
221 	asm_fstps(&state, &dwmixfa_state.faderight);
222 	asm_fstp_st(&state, 0);
223 
224 	asm_movl(&state, state.ecx, count);
225 
226 	debug_printf("}\n");
227 }
228 
229 
mixer(void)230 void mixer (void)
231 {
232 	void *fadeleft_ptr = &dwmixfa_state.fadeleft;
233 	void *faderight_ptr = &dwmixfa_state.faderight;
234 	void *volr_ptr = &dwmixfa_state.volr;
235 	void *voll_ptr = &dwmixfa_state.voll;
236 	void *__fl1_ptr = &dwmixfa_state.__fl1;
237 	void *__fb1_ptr = &dwmixfa_state.__fb1;
238 
239 	struct assembler_state_t state;
240 	float *edi_mirror;
241 	void *edi_mirror2;
242 	float *esi_mirror2;
243 	float *eax_mirror;
244 	float *ebp_mirror;
245 	mixercall ecx_mirror;
246 	clippercall eax_mirror2;
247 	struct mixfpostprocregstruct *esi_mirror;
248 
249 	init_assembler_state(&state, writecallback, readcallback);
250 
251 	debug_printf("mixer {\n");
252 
253 	asm_pushl(&state, state.ebp);
254 	asm_finit(&state);
255 	asm_xorl(&state, state.ebx, &state.ebx);
256 	asm_movl(&state, *(uint32_t *)fadeleft_ptr, &state.eax);
257 	asm_andl(&state, 0x7fffffff, &state.eax);
258 	asm_cmpl(&state, state.eax, minampl); /* TODO, comparing of floats, typecasted to uint32_t */
259 	asm_ja(&state, mixer_nocutfl);
260 	asm_movl(&state, state.ebx, (uint32_t *)fadeleft_ptr); /* mixing of float and integer numbers.... "great" */
261 mixer_nocutfl:
262 	asm_movl(&state, *(uint32_t *)faderight_ptr, &state.eax);
263 	asm_andl(&state, 0x7fffffff, &state.eax);
264 	asm_cmpl(&state, state.eax, minampl); /* TODO, comparing of floats, typecasted to uint32_t */
265 	asm_ja(&state, mixer_nocutfr);
266 	asm_movl(&state, state.ebx, (uint32_t *)faderight_ptr); /* mixing of float and integer numbers.... "great" */
267 mixer_nocutfr:
268 	asm_movl(&state, 0x12345678/*tempbuf*/, &state.edi); edi_mirror = dwmixfa_state.tempbuf;
269 	asm_movl(&state, dwmixfa_state.nsamples, &state.ecx);
270 	asm_orl(&state, state.ecx, &state.ecx);
271 	asm_jz(&state, mixer_endall);
272 	asm_movl(&state, dwmixfa_state.isstereo, &state.eax);
273 	asm_orl(&state, state.eax, &state.eax);
274 	asm_jnz(&state, mixer_clearst);
275 		clearbufm(&edi_mirror, &state.ecx);
276 	asm_jmp(&state, mixer_clearend);
277 mixer_clearst:
278 		clearbufs(&edi_mirror, &state.ecx);
279 mixer_clearend:
280 	asm_movl(&state, dwmixfa_state.nvoices, &state.ecx);
281 	asm_decl(&state, &state.ecx);
282 
283 mixer_MixNext:
284 	debug_printf("Doing channel: %d\n", state.ecx);
285 	asm_movl(&state, dwmixfa_state.voiceflags[state.ecx], &state.eax);
286 	asm_testl(&state, MIXF_PLAYING, state.eax);
287 	asm_jz(&state, mixer_SkipVoice);
288 
289 	asm_movl(&state, state.eax, &dwmixfa_state.looptype);
290 
291 	asm_movl(&state, *(uint32_t *)&dwmixfa_state.volleft[state.ecx], &state.eax);
292 	asm_movl(&state, *(uint32_t *)&dwmixfa_state.volright[state.ecx], &state.ebx);
293 	asm_movl(&state, state.eax, (uint32_t *)voll_ptr);
294 	asm_movl(&state, state.ebx, (uint32_t *)volr_ptr);
295 
296 	asm_movl(&state, *(uint32_t *)&dwmixfa_state.rampleft[state.ecx], &state.eax);
297 	asm_movl(&state, *(uint32_t *)&dwmixfa_state.rampright[state.ecx], &state.ebx);
298 	asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.volrl);
299 	asm_movl(&state, state.ebx, (uint32_t *)&dwmixfa_state.volrr);
300 
301 	asm_movl(&state, *(uint32_t *)&dwmixfa_state.ffreq[state.ecx], &state.eax);
302 	asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.ffrq);
303 	asm_movl(&state, *(uint32_t *)&dwmixfa_state.freso[state.ecx], &state.eax);
304 	asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.frez);
305 	asm_movl(&state, *(uint32_t *)&dwmixfa_state.fl1[state.ecx], &state.eax);
306 	asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.__fl1);
307 	asm_movl(&state, *(uint32_t *)&dwmixfa_state.fb1[state.ecx], &state.eax);
308 	asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.__fb1);
309 
310 	asm_movl(&state, dwmixfa_state.looplen[state.ecx], &state.eax);
311 	asm_movl(&state, state.eax, &dwmixfa_state.mixlooplen);
312 
313 	asm_movl(&state, dwmixfa_state.freqw[state.ecx], &state.ebx);
314 	asm_movl(&state, dwmixfa_state.freqf[state.ecx], &state.esi);
315 
316 	asm_movl(&state, 0x12345678, &state.eax); eax_mirror = dwmixfa_state.smpposw[state.ecx];
317 
318 	asm_movl(&state, dwmixfa_state.smpposf[state.ecx], &state.edx);
319 
320 	asm_movl(&state, 0x12345678, &state.ebp); ebp_mirror = dwmixfa_state.loopend[state.ecx];
321 
322 	asm_pushl(&state, state.ecx);
323 	asm_movl(&state, 0x12345678, &state.edi); edi_mirror = dwmixfa_state.tempbuf;
324 	asm_movl(&state, dwmixfa_state.isstereo, &state.ecx);
325 	asm_orl(&state, dwmixfa_state.voiceflags[state.ecx], &state.ecx);
326 	asm_andl(&state, 15, &state.ecx);
327 	/*asm_movl(&state, 0x12345678, &state.ecx);*/ ecx_mirror = mixers[state.ecx];
328 		ecx_mirror(edi_mirror, &eax_mirror, &state.edx, state.ebx, state.esi, ebp_mirror);
329 	asm_popl(&state, &state.ecx);
330 /*
331 	asm_movl(&state, eax, smposw[state.ecx]);*/dwmixfa_state.smpposw[state.ecx] = eax_mirror;
332 	asm_movl(&state, state.edx, &dwmixfa_state.smpposf[state.ecx]);
333 
334 	asm_movl(&state, dwmixfa_state.looptype, &state.eax);
335 	asm_movl(&state, state.eax, &dwmixfa_state.voiceflags[state.ecx]);
336 
337 	/* update volumes */
338 	asm_movl(&state, *(uint32_t *)voll_ptr, &state.eax);
339 	asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.volleft[state.ecx]);
340 	asm_movl(&state, *(uint32_t *)volr_ptr, &state.eax);
341 	asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.volright[state.ecx]);
342 
343 	asm_movl(&state, *(uint32_t *)__fl1_ptr, &state.eax);
344 	asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.fl1[state.ecx]);
345 	asm_movl(&state, *(uint32_t *)__fb1_ptr, &state.eax);
346 	asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.fb1[state.ecx]);
347 
348 mixer_SkipVoice:
349 		asm_decl(&state, &state.ecx);
350 	asm_jns(&state, mixer_MixNext);
351 
352 	asm_movl(&state, 0x12345678 /*postprocs*/, &state.esi); esi_mirror = dwmixfa_state.postprocs;
353 mixer_PostprocLoop:
354 /*
355 	asm_orl(&state, state.esi, state.esi);*/ write_zf(state.eflags, !esi_mirror);
356 	asm_jz(&state, mixer_PostprocEnd);
357 	asm_movl(&state, dwmixfa_state.nsamples, &state.edx);
358 	asm_movl(&state, dwmixfa_state.isstereo, &state.ecx);
359 	asm_movl(&state, dwmixfa_state.samprate, &state.ebx);
360 	asm_movl(&state, 0x12345678, &state.eax); eax_mirror = dwmixfa_state.tempbuf;
361 	/* call *state.esi*/ esi_mirror->Process(eax_mirror, state.edx, state.ebx, state.ecx);
362 	asm_movl(&state, state.esi+12, &state.esi); esi_mirror = esi_mirror->next;
363 
364 	asm_jmp(&state, mixer_PostprocLoop);
365 
366 mixer_PostprocEnd:
367 
368 	asm_movl(&state, dwmixfa_state.outfmt, &state.eax);
369 /*
370 	{
371 		int i;
372 		for (i=0;i<nsamples;i++)
373 		{
374 			fprintf(stderr, "%f\n", tempbuf[i]);
375 			if (i==8)
376 				break;
377 		}
378 	}
379 */
380 	/*asm_movl(&state, clippers[state.eax], &state.eax);*/ eax_mirror2 = clippers[state.eax];
381 
382 	asm_movl(&state, 0x12345678/*outbuf*/, &state.edi); edi_mirror2 = dwmixfa_state.outbuf;
383 	asm_movl(&state, 0x12345678/*tempbuf*/, &state.esi); esi_mirror2 = dwmixfa_state.tempbuf;
384 	asm_movl(&state, dwmixfa_state.nsamples, &state.ecx);
385 
386 	asm_movl(&state, dwmixfa_state.isstereo, &state.edx);
387 	asm_orl(&state, state.edx, &state.edx);
388 	asm_jz(&state, mixer_clipmono);
389 	asm_addl(&state, state.ecx, &state.ecx);
390 mixer_clipmono:
391 	/* call *state.eax*/ eax_mirror2(esi_mirror2, edi_mirror2, state.ecx);
392 
393 mixer_endall:
394 	asm_popl(&state, &state.ebp);
395 
396 	debug_printf("}\n");
397 
398 }
399 
mix_0(float * edi_destptr,float ** eax_sample_pos,uint32_t * edx_sample_pos_fract,uint32_t ebx_sample_pitch,uint32_t esi_sample_pitch_fract,float * ebp_loopend)400 static void mix_0   (float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend)
401 {
402 	struct assembler_state_t state;
403 	float *ebp_mirror;
404 
405 	debug_printf("mix_0 {\n");
406 
407 	init_assembler_state(&state, writecallback, readcallback);
408 	asm_movl(&state, /*edi_destptr*/ 0x12345678, &state.edi);
409 	asm_movl(&state, /*eax_sample_pos*/0x12345678, &state.eax);
410 	asm_movl(&state, *edx_sample_pos_fract, &state.edx);
411 	asm_movl(&state, ebx_sample_pitch, &state.ebx);
412 	asm_movl(&state, esi_sample_pitch_fract, &state.esi);
413 	asm_movl(&state, /*ebp_loopend*/0x12345678, &state.ebp);
414 
415 
416 	asm_movl(&state, dwmixfa_state.nsamples, &state.ecx);
417 	asm_shrl(&state, 2, &state.ebp);
418 	asm_pushl(&state, state.ebp);
419 	asm_movl(&state, state.eax, &state.ebp); ebp_mirror = *eax_sample_pos;
420 	asm_shrl(&state, 2, &state.ebp);
421 mix_0_next:
422 		asm_addl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror++;
423 		asm_adcl(&state, state.ebx, &state.ebp); ebp_mirror += state.ebx;
424 mix_0_looped:
425 	/* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
426 	if (ebp_loopend == ebp_mirror)
427 	{
428 		write_cf(state.eflags, 0);
429 		write_zf(state.eflags, 1);
430 	} else if (ebp_mirror < ebp_loopend)
431 	{
432 		write_cf(state.eflags, 1);
433 		write_zf(state.eflags, 0);
434 	} else {
435 		write_cf(state.eflags, 0);
436 		write_zf(state.eflags, 0);
437 	}
438 		asm_jae(&state, mix_0_LoopHandler);
439 		asm_decl(&state, &state.ecx);
440 	asm_jnz(&state, mix_0_next);
441 mix_0_ende:
442 	asm_shll(&state, 2, &state.ebp);
443 	asm_movl(&state, state.ebp, &state.eax); *eax_sample_pos = ebp_mirror;
444 	asm_popl(&state, &state.ecx);
445 
446 
447 	asm_movl(&state, state.edx, edx_sample_pos_fract);
448 	debug_printf("}\n");
449 	return;
450 
451 mix_0_LoopHandler:
452 	asm_movl(&state, dwmixfa_state.looptype, &state.eax);
453 	asm_testl(&state, MIXF_LOOPED, state.eax);
454 	asm_jnz(&state, mix_0_loopme);
455 	asm_movl(&state, dwmixfa_state.looptype, &state.eax); /* NOT NEEDED */
456 	asm_andl(&state, FLAG_DISABLED, &state.eax);
457 	asm_movl(&state, state.eax, &dwmixfa_state.looptype);
458 	asm_jmp(&state, mix_0_ende);
459 mix_0_loopme:
460 	asm_subl(&state, dwmixfa_state.mixlooplen, &state.ebp); ebp_mirror -= dwmixfa_state.mixlooplen;
461 	asm_jmp(&state, mix_0_looped);
462 }
463 
mixm_n(float * edi_destptr,float ** eax_sample_pos,uint32_t * edx_sample_pos_fract,uint32_t ebx_sample_pitch,uint32_t esi_sample_pitch_fract,float * ebp_loopend)464 static void mixm_n  (float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend)
465 {
466 	struct assembler_state_t state;
467 	float *ebp_mirror;
468 
469 	debug_printf("mixm_n {\n");
470 
471 	init_assembler_state(&state, writecallback, readcallback);
472 	asm_movl(&state, /*edi_destptr*/ 0x12345678, &state.edi);
473 	asm_movl(&state, /*eax_sample_pos*/0x12345678, &state.eax);
474 	asm_movl(&state, *edx_sample_pos_fract, &state.edx);
475 	asm_movl(&state, ebx_sample_pitch, &state.ebx);
476 	asm_movl(&state, esi_sample_pitch_fract, &state.esi);
477 	asm_movl(&state, /*ebp_loopend*/0x12345678, &state.ebp);
478 
479 
480 	asm_movl(&state, dwmixfa_state.nsamples, &state.ecx);
481 	asm_flds(&state, dwmixfa_state.voll);
482 	asm_shrl(&state, 2, &state.ebp);
483 	asm_pushl(&state, state.ebp);
484 	asm_movl(&state, state.eax, &state.ebp); ebp_mirror = *eax_sample_pos;
485 	asm_shrl(&state, 2, &state.ebp);
486 mixm_n_next:
487 	asm_flds(&state, *ebp_mirror);
488 	asm_fld(&state, 1);
489 	asm_addl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror++;
490 	asm_leal(&state, state.edi+4, &state.edi); edi_destptr++;
491 	asm_adcl(&state, state.ebx, &state.ebp); ebp_mirror += state.ebx;
492 	asm_fmulp_stst(&state, 0, 1);
493 	asm_fxch_st(&state, 1);
494 	asm_fadds(&state, dwmixfa_state.volrl);
495 	asm_fxch_st(&state, 1);
496 	asm_fadds(&state, edi_destptr[-1]);
497 /*mixm_n_looped:*/
498 	/* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
499 	if (ebp_loopend == ebp_mirror)
500 	{
501 		write_cf(state.eflags, 0);
502 		write_zf(state.eflags, 1);
503 	} else if (ebp_mirror < ebp_loopend)
504 	{
505 		write_cf(state.eflags, 1);
506 		write_zf(state.eflags, 0);
507 	} else {
508 		write_cf(state.eflags, 0);
509 		write_zf(state.eflags, 0);
510 	}
511 		asm_jae(&state, mixm_n_LoopHandler);
512 		asm_fstps(&state, edi_destptr-1);
513 		asm_decl(&state, &state.ecx);
514 	asm_jnz(&state, mixm_n_next);
515 mixm_n_ende:
516 	asm_fstps(&state, &dwmixfa_state.voll);
517 	asm_shll(&state, 2, &state.ebp);
518 	asm_movl(&state, state.ebp, &state.eax); *eax_sample_pos = ebp_mirror;
519 	asm_popl(&state, &state.ecx);
520 
521 	asm_movl(&state, state.edx, edx_sample_pos_fract);
522 	debug_printf("mixer }\n");
523 	return;
524 
525 mixm_n_LoopHandler:
526 	asm_fstps(&state, edi_destptr-1);
527 	asm_movl(&state, dwmixfa_state.looptype, &state.eax);
528 	asm_testl(&state, MIXF_LOOPED, state.eax);
529 	asm_jnz(&state, mixm_n_loopme);
530 	asm_subl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror--;
531 	asm_sbbl(&state, state.ebx, &state.ebp); ebp_mirror -= state.ebx;
532 	asm_flds(&state, *ebp_mirror);
533 mixm_n_fill: /*  sample ends -> fill rest of buffer with last sample value */
534 		asm_fld(&state, 1);
535 		asm_fmul(&state, 1, 0);
536 		asm_fadds(&state, edi_destptr[-1]);
537 		asm_fstps(&state, edi_destptr-1);
538 		asm_fxch_st(&state, 1);
539 		asm_fadds(&state, dwmixfa_state.volrl);
540 		asm_fxch_st(&state, 1);
541 		asm_leal(&state, state.edi+4, &state.edi); edi_destptr++;
542 		asm_decl(&state, &state.ecx);
543 	asm_jnz(&state, mixm_n_fill);
544 	asm_fmul(&state, 1, 0);
545 	asm_fadds(&state, dwmixfa_state.fadeleft);
546 	asm_fstps(&state, &dwmixfa_state.fadeleft);
547 
548 	asm_movl(&state, dwmixfa_state.looptype, &state.eax); /* NOT NEEDED */
549 	asm_andl(&state, FLAG_DISABLED, &state.eax);
550 	asm_movl(&state, state.eax, &dwmixfa_state.looptype);
551 	asm_jmp(&state, mixm_n_ende);
552 
553 mixm_n_loopme: /* sample loops -> jump to loop start */
554 	asm_subl(&state, dwmixfa_state.mixlooplen, &state.ebp); ebp_mirror -= dwmixfa_state.mixlooplen;
555 	/* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
556 	if (ebp_loopend == ebp_mirror)
557 	{
558 		write_cf(state.eflags, 0);
559 		write_zf(state.eflags, 1);
560 	} else if (ebp_mirror < ebp_loopend)
561 	{
562 		write_cf(state.eflags, 1);
563 		write_zf(state.eflags, 0);
564 	} else {
565 		write_cf(state.eflags, 0);
566 		write_zf(state.eflags, 0);
567 	}
568 		asm_jae(&state, mixm_n_loopme);
569 	asm_decl(&state, &state.ecx);
570 	asm_jz(&state, mixm_n_ende);
571 	asm_jmp(&state, mixm_n_next);
572 }
573 
mixs_n(float * edi_destptr,float ** eax_sample_pos,uint32_t * edx_sample_pos_fract,uint32_t ebx_sample_pitch,uint32_t esi_sample_pitch_fract,float * ebp_loopend)574 static void mixs_n  (float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend)
575 {
576 	struct assembler_state_t state;
577 	float *ebp_mirror;
578 
579 	debug_printf("mixs_n {\n");
580 
581 	init_assembler_state(&state, writecallback, readcallback);
582 	asm_movl(&state, /*edi_destptr*/ 0x12345678, &state.edi);
583 	asm_movl(&state, /*eax_sample_pos*/0x12345678, &state.eax);
584 	asm_movl(&state, *edx_sample_pos_fract, &state.edx);
585 	asm_movl(&state, ebx_sample_pitch, &state.ebx);
586 	asm_movl(&state, esi_sample_pitch_fract, &state.esi);
587 	asm_movl(&state, /*ebp_loopend*/0x12345678, &state.ebp);
588 
589 
590 	asm_movl(&state, dwmixfa_state.nsamples, &state.ecx);
591 	asm_flds(&state, dwmixfa_state.voll);
592 	asm_flds(&state, dwmixfa_state.volr);
593 	asm_shrl(&state, 2, &state.ebp);
594 	asm_pushl(&state, state.ebp);
595 	asm_movl(&state, state.eax, &state.ebp); ebp_mirror = *eax_sample_pos;
596 	asm_shrl(&state, 2, &state.ebp);
597 mixs_n_next:
598 	asm_flds(&state, *ebp_mirror);
599 	asm_addl(&state, state.esi, &state.edx);if (read_cf(state.eflags)) ebp_mirror++;
600 	asm_leal(&state, state.edi+8, &state.edi); edi_destptr+=2;
601 	asm_adcl(&state, state.ebx, &state.ebp); ebp_mirror += state.ebx;
602 	asm_fld(&state, 1);
603 	asm_fld(&state, 3);
604 	asm_fmul(&state, 2, 0);
605 	asm_fxch_st(&state, 4);
606 	asm_fadds(&state, dwmixfa_state.volrl);
607 	asm_fxch_st(&state, 2);
608 	asm_fmulp_st(&state, 1);
609 	asm_fxch_st(&state, 2);
610 	asm_fadds(&state, dwmixfa_state.volrr);
611 	asm_fxch_st(&state, 3);
612 	asm_fadds(&state, edi_destptr[-2]);
613 	asm_fxch_st(&state, 2);
614 	asm_fadds(&state, edi_destptr[-1]);
615 
616 /*mixs_n_looped:*/
617 	/* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
618 	if (ebp_loopend == ebp_mirror)
619 	{
620 		write_cf(state.eflags, 0);
621 		write_zf(state.eflags, 1);
622 	} else if (ebp_mirror < ebp_loopend)
623 	{
624 		write_cf(state.eflags, 1);
625 		write_zf(state.eflags, 0);
626 	} else {
627 		write_cf(state.eflags, 0);
628 		write_zf(state.eflags, 0);
629 	}
630 		asm_jae(&state, mixs_n_LoopHandler);
631 		asm_fstps(&state, edi_destptr-1);
632 		asm_fxch_st(&state, 1);
633 		asm_fstps(&state, edi_destptr-2);
634 		asm_fxch_st(&state, 1);
635 		asm_decl(&state, &state.ecx);
636 	asm_jnz(&state, mixs_n_next);
637 mixs_n_ende:
638 	asm_fstps(&state, &dwmixfa_state.volr);
639 	asm_fstps(&state, &dwmixfa_state.voll);
640 	asm_shll(&state, 2, &state.ebp);
641 	asm_movl(&state, state.ebp, &state.eax); *eax_sample_pos = ebp_mirror;
642 	asm_popl(&state, &state.ecx);
643 
644 	asm_movl(&state, state.edx, edx_sample_pos_fract);
645 	debug_printf("mixer }\n");
646 	return;
647 
648 mixs_n_LoopHandler:
649 	asm_fstps(&state, edi_destptr-1);
650 	asm_fxch_st(&state, 1);
651 	asm_fstps(&state, edi_destptr-2);
652 	asm_fxch_st(&state, 1);
653 	asm_movl(&state, dwmixfa_state.looptype, &state.eax);
654 	asm_testl(&state, MIXF_LOOPED, state.eax);
655 	asm_jnz(&state, mixs_n_loopme);
656 	asm_fxch_st(&state, 1);
657 	asm_subl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror--;
658 	asm_sbbl(&state, state.ebx, &state.ebp); ebp_mirror -= state.ebx;
659 	asm_flds(&state, *ebp_mirror);
660 
661 	asm_fxch_st(&state, 2);
662 mixs_n_fill: /*  sample ends -> fill rest of buffer with last sample value */
663 		asm_fld(&state, 1);
664 		asm_fmul(&state, 3, 0);
665 		asm_fxch_st(&state, 1);
666 		asm_fld(&state, 0);
667 		asm_fmul(&state, 4, 0);
668 		asm_fxch_st(&state, 2);
669 		asm_fadds(&state, edi_destptr[-2]);
670 		asm_fstps(&state, edi_destptr-2);
671 		asm_fxch_st(&state, 1);
672 		asm_fadds(&state, edi_destptr[-1]);
673 		asm_fstps(&state, edi_destptr-1);
674 		asm_fadds(&state, dwmixfa_state.volrr);
675 		asm_fxch_st(&state, 1);
676 		asm_leal(&state, state.edi+8, &state.edi); edi_destptr+=2;
677 		asm_decl(&state, &state.ecx);
678 		asm_fadds(&state, dwmixfa_state.volrl);
679 		asm_fxch_st(&state, 1);
680 	asm_jnz(&state, mixs_n_fill);
681 	asm_fxch_st(&state, 2);
682 	asm_fld(&state, 0);
683 	asm_fmul(&state, 2, 0);
684 	asm_fxch_st(&state, 1);
685 	asm_fmul(&state, 3, 0);
686 	asm_fxch_st(&state, 1);
687 	asm_fadds(&state, dwmixfa_state.fadeleft);
688 	asm_fxch_st(&state, 1);
689 	asm_fadds(&state, dwmixfa_state.faderight);
690 	asm_fxch_st(&state, 1);
691 	asm_fstps(&state, &dwmixfa_state.fadeleft);
692 	asm_fstps(&state, &dwmixfa_state.faderight);
693 	asm_fxch_st(&state, 1);
694 
695 	asm_movl(&state, dwmixfa_state.looptype, &state.eax); /* NOT NEEDED */
696 	asm_andl(&state, FLAG_DISABLED, &state.eax);
697 	asm_movl(&state, state.eax, &dwmixfa_state.looptype);
698 	asm_jmp(&state, mixs_n_ende);
699 
700 mixs_n_loopme: /* sample loops -> jump to loop start */
701 	asm_subl(&state, dwmixfa_state.mixlooplen, &state.ebp); ebp_mirror -= dwmixfa_state.mixlooplen;
702 	/* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
703 	if (ebp_loopend == ebp_mirror)
704 	{
705 		write_cf(state.eflags, 0);
706 		write_zf(state.eflags, 1);
707 	} else if (ebp_mirror < ebp_loopend)
708 	{
709 		write_cf(state.eflags, 1);
710 		write_zf(state.eflags, 0);
711 	} else {
712 		write_cf(state.eflags, 0);
713 		write_zf(state.eflags, 0);
714 	}
715 		asm_jae(&state, mixs_n_loopme);
716 	asm_decl(&state, &state.ecx);
717 	asm_jz(&state, mixs_n_ende);
718 	asm_jmp(&state, mixs_n_next);
719 }
720 
mixm_i(float * edi_destptr,float ** eax_sample_pos,uint32_t * edx_sample_pos_fract,uint32_t ebx_sample_pitch,uint32_t esi_sample_pitch_fract,float * ebp_loopend)721 static void mixm_i  (float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend)
722 {
723 	struct assembler_state_t state;
724 	float *ebp_mirror;
725 
726 	debug_printf("mixm_i {\n");
727 
728 	init_assembler_state(&state, writecallback, readcallback);
729 	asm_movl(&state, /*edi_destptr*/ 0x12345678, &state.edi);
730 	asm_movl(&state, /*eax_sample_pos*/0x12345678, &state.eax);
731 	asm_movl(&state, *edx_sample_pos_fract, &state.edx);
732 	asm_movl(&state, ebx_sample_pitch, &state.ebx);
733 	asm_movl(&state, esi_sample_pitch_fract, &state.esi);
734 	asm_movl(&state, /*ebp_loopend*/0x12345678, &state.ebp);
735 
736 
737 	asm_movl(&state, dwmixfa_state.nsamples, &state.ecx);
738 	asm_flds(&state, minuseins);
739 	asm_flds(&state, dwmixfa_state.voll);
740 	asm_shrl(&state, 2, &state.ebp);
741 	asm_pushl(&state, state.ebp);
742 	asm_movl(&state, state.eax, &state.ebp); ebp_mirror = *eax_sample_pos;
743 	asm_movl(&state, state.edx, &state.eax);
744 	asm_shrl(&state, 9, &state.eax);
745 	asm_shrl(&state, 2, &state.ebp);
746 	asm_orl(&state, 0x3f800000, &state.eax);
747 	asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.magic1);
748 mixm_i_next:
749 	asm_flds(&state, ebp_mirror[0]);
750 	asm_fld(&state, 0);
751 	asm_fld(&state, 3);
752 	asm_fadds(&state, dwmixfa_state.magic1);
753 	asm_fxch_st(&state, 1);
754 	asm_fsubrs(&state, ebp_mirror[1]);
755 	asm_addl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror++;
756 	asm_leal(&state, state.edi+4, &state.edi); edi_destptr++;
757 	asm_adcl(&state, state.ebx, &state.ebp); ebp_mirror += state.ebx;
758 	asm_fmulp_st(&state, 1);
759 	asm_movl(&state, state.edx, &state.eax);
760 	asm_shrl(&state, 9, &state.eax);
761 	asm_faddp_stst(&state, 0, 1);
762 	asm_fld(&state, 1);
763 	asm_fmulp_stst(&state, 0, 1);
764 	asm_fxch_st(&state, 1);
765 	asm_fadds(&state, dwmixfa_state.volrl);
766 	asm_fxch_st(&state, 1);
767 	asm_fadds(&state, edi_destptr[-1]);
768 	asm_orl(&state, 0x3f800000, &state.eax);
769 /*mixm_i_looped:*/
770 	/* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
771 	if (ebp_loopend == ebp_mirror)
772 	{
773 		write_cf(state.eflags, 0);
774 		write_zf(state.eflags, 1);
775 	} else if (ebp_mirror < ebp_loopend)
776 	{
777 		write_cf(state.eflags, 1);
778 		write_zf(state.eflags, 0);
779 	} else {
780 		write_cf(state.eflags, 0);
781 		write_zf(state.eflags, 0);
782 	}
783 		asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.magic1);
784 		asm_jae(&state, mixm_i_LoopHandler);
785 		asm_fstps(&state, edi_destptr-1);
786 		asm_decl(&state, &state.ecx);
787 	asm_jnz(&state, mixm_i_next);
788 mixm_i_ende:
789 	asm_fstps(&state, &dwmixfa_state.voll);
790 	asm_fstp_st(&state, 0);
791 	asm_shll(&state, 2, &state.ebp);
792 	asm_movl(&state, state.ebp, &state.eax); *eax_sample_pos = ebp_mirror;
793 	asm_popl(&state, &state.ecx);
794 
795 	asm_movl(&state, state.edx, edx_sample_pos_fract);
796 	debug_printf("}\n");
797 	return;
798 
799 mixm_i_LoopHandler:
800 	asm_fstps(&state, edi_destptr-1);
801 	asm_movl(&state, dwmixfa_state.looptype, &state.eax);
802 	asm_testl(&state, MIXF_LOOPED, state.eax);
803 	asm_jnz(&state, mixm_i_loopme);
804 	asm_subl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror--;
805 	asm_sbbl(&state, state.ebx, &state.ebp); ebp_mirror -= state.ebx;
806 	asm_flds(&state, *ebp_mirror);
807 mixm_i_fill: /*  sample ends -> fill rest of buffer with last sample value */
808 		asm_fld(&state, 1);
809 		asm_fmul(&state, 1, 0);
810 		asm_fadds(&state, edi_destptr[-1]);
811 		asm_fstps(&state, edi_destptr-1);
812 		asm_fxch_st(&state, 1);
813 		asm_fadds(&state, dwmixfa_state.volrl);
814 		asm_fxch_st(&state, 1);
815 		asm_leal(&state, state.edi+4, &state.edi); edi_destptr++;
816 		asm_decl(&state, &state.ecx);
817 	asm_jnz(&state, mixm_i_fill);
818 	asm_fmul(&state, 1, 0);
819 	asm_fadds(&state, dwmixfa_state.fadeleft);
820 	asm_fstps(&state, &dwmixfa_state.fadeleft);
821 
822 	asm_movl(&state, dwmixfa_state.looptype, &state.eax); /* NOT NEEDED */
823 	asm_andl(&state, FLAG_DISABLED, &state.eax);
824 	asm_movl(&state, state.eax, &dwmixfa_state.looptype);
825 	asm_jmp(&state, mixm_i_ende);
826 
827 mixm_i_loopme: /* sample loops -> jump to loop start */
828 	asm_subl(&state, dwmixfa_state.mixlooplen, &state.ebp); ebp_mirror -= dwmixfa_state.mixlooplen;
829 	/* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
830 	if (ebp_loopend == ebp_mirror)
831 	{
832 		write_cf(state.eflags, 0);
833 		write_zf(state.eflags, 1);
834 	} else if (ebp_mirror < ebp_loopend)
835 	{
836 		write_cf(state.eflags, 1);
837 		write_zf(state.eflags, 0);
838 	} else {
839 		write_cf(state.eflags, 0);
840 		write_zf(state.eflags, 0);
841 	}
842 		asm_jae(&state, mixm_i_loopme);
843 	asm_decl(&state, &state.ecx);
844 	asm_jz(&state, mixm_i_ende);
845 	asm_jmp(&state, mixm_i_next);
846 }
847 
mixs_i(float * edi_destptr,float ** eax_sample_pos,uint32_t * edx_sample_pos_fract,uint32_t ebx_sample_pitch,uint32_t esi_sample_pitch_fract,float * ebp_loopend)848 static void mixs_i  (float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend)
849 {
850 	struct assembler_state_t state;
851 	float *ebp_mirror;
852 
853 	debug_printf("mixs_i {\n");
854 
855 	init_assembler_state(&state, writecallback, readcallback);
856 	asm_movl(&state, /*edi_destptr*/ 0x12345678, &state.edi);
857 	asm_movl(&state, /*eax_sample_pos*/0x12345678, &state.eax);
858 	asm_movl(&state, *edx_sample_pos_fract, &state.edx);
859 	asm_movl(&state, ebx_sample_pitch, &state.ebx);
860 	asm_movl(&state, esi_sample_pitch_fract, &state.esi);
861 	asm_movl(&state, /*ebp_loopend*/0x12345678, &state.ebp);
862 
863 
864 	asm_movl(&state, dwmixfa_state.nsamples, &state.ecx);
865 	asm_flds(&state, minuseins);
866 	asm_flds(&state, dwmixfa_state.voll);
867 	asm_flds(&state, dwmixfa_state.volr);
868 	asm_shrl(&state, 2, &state.ebp);
869 
870 	asm_pushl(&state, state.ebp);
871 
872 
873 
874 	asm_movl(&state, state.eax, &state.ebp); ebp_mirror = *eax_sample_pos;
875 	asm_movl(&state, state.edx, &state.eax);
876 	asm_shrl(&state, 9, &state.eax);
877 	asm_shrl(&state, 2, &state.ebp);
878 	asm_orl(&state, 0x3f800000, &state.eax);
879 	asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.magic1);
880 
881 
882 mixs_i_next:
883 	asm_flds(&state, ebp_mirror[0]);
884 	asm_fld(&state, 0);
885 	asm_fld(&state, 4);
886 	asm_fadds(&state, dwmixfa_state.magic1);
887 	asm_fxch_st(&state, 1);
888 	asm_fsubrs(&state, ebp_mirror[1]);
889 	asm_addl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror++;
890 	asm_leal(&state, state.edi+8, &state.edi); edi_destptr+=2;
891 	asm_adcl(&state, state.ebx, &state.ebp); ebp_mirror += state.ebx;
892 	asm_fmulp_st(&state, 1);
893 	asm_movl(&state, state.edx, &state.eax);
894 	asm_shrl(&state, 9, &state.eax);
895 	asm_faddp_stst(&state, 0, 1);
896 	asm_fld(&state, 1);
897 	asm_fld(&state, 3);
898 	asm_fmul(&state, 2, 0);
899 	asm_fxch_st(&state, 4);
900 	asm_fadds(&state, dwmixfa_state.volrl);
901 	asm_fxch_st(&state, 2);
902 	asm_fmulp_stst(&state, 0, 1);
903 	asm_fxch_st(&state, 2);
904 	asm_fadds(&state, dwmixfa_state.volrr);
905 	asm_fxch_st(&state, 3);
906 	asm_fadds(&state, edi_destptr[-2]);
907 	asm_fxch_st(&state, 2);
908 	asm_fadds(&state, edi_destptr[-1]);
909 	asm_orl(&state, 0x3f800000, &state.eax);
910 /*mixs_i_looped:*/
911 	/* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
912 	if (ebp_loopend == ebp_mirror)
913 	{
914 		write_cf(state.eflags, 0);
915 		write_zf(state.eflags, 1);
916 	} else if (ebp_mirror < ebp_loopend)
917 	{
918 		write_cf(state.eflags, 1);
919 		write_zf(state.eflags, 0);
920 	} else {
921 		write_cf(state.eflags, 0);
922 		write_zf(state.eflags, 0);
923 	}
924 		asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.magic1);
925 		asm_jae(&state, mixs_i_LoopHandler);
926 
927 		asm_fstps(&state, edi_destptr-1);
928 		asm_fxch_st(&state, 1);
929 		asm_fstps(&state, edi_destptr-2);
930 		asm_fxch_st(&state, 1);
931 		asm_decl(&state, &state.ecx);
932 	asm_jnz(&state, mixs_i_next);
933 mixs_i_ende:
934 	asm_fstps(&state, &dwmixfa_state.volr);
935 	asm_fstps(&state, &dwmixfa_state.voll);
936 	asm_fstp_st(&state, 0);
937 	asm_shll(&state, 2, &state.ebp);
938 	asm_movl(&state, state.ebp, &state.eax); *eax_sample_pos = ebp_mirror;
939 
940 	asm_popl(&state, &state.ecx);
941 
942 	asm_movl(&state, state.edx, edx_sample_pos_fract);
943 	debug_printf("}\n");
944 	return;
945 
946 mixs_i_LoopHandler:
947 	asm_fstps(&state, edi_destptr-1);
948 	asm_fxch_st(&state, 1);
949 	asm_fstps(&state, edi_destptr-2);
950 	asm_fxch_st(&state, 1);
951 	asm_movl(&state, dwmixfa_state.looptype, &state.eax);
952 	asm_testl(&state, MIXF_LOOPED, state.eax);
953 	asm_jnz(&state, mixs_i_loopme);
954 	asm_fxch_st(&state, 2);
955 	asm_fstp_st(&state, 0);
956 	asm_subl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror--;
957 	asm_sbbl(&state, state.ebx, &state.ebp); ebp_mirror -= state.ebx;
958 	asm_flds(&state, *ebp_mirror);
959 	asm_fxch_st(&state, 2);
960 mixs_i_fill:
961 	/*  sample ends -> fill rest of buffer with last sample value */
962 		asm_fld(&state, 1);
963 		asm_fmul(&state, 3, 0);
964 		asm_fxch_st(&state, 1);
965 		asm_fld(&state, 0);
966 		asm_fmul(&state, 4, 0);
967 		asm_fxch_st(&state, 2);
968 		asm_fadds(&state, edi_destptr[-2]);
969 		asm_fstps(&state, edi_destptr-2);
970 		asm_fxch_st(&state, 1);
971 		asm_fadds(&state, edi_destptr[-1]);
972 		asm_fstps(&state, edi_destptr-1);
973 		asm_fadds(&state, dwmixfa_state.volrr);
974 		asm_fxch_st(&state, 1);
975 		asm_leal(&state, state.edi+8, &state.edi); edi_destptr+=2;
976 		asm_decl(&state, &state.ecx);
977 		asm_fadds(&state, dwmixfa_state.volrl);
978 		asm_fxch_st(&state, 1);
979 	asm_jnz(&state, mixs_i_fill);
980 
981 	asm_fld(&state, 2);
982 	asm_fld(&state, 0);
983 	asm_fmul(&state, 3, 0);
984 	asm_fxch_st(&state, 1);
985 	asm_fmul(&state, 2, 0);
986 	asm_fxch_st(&state, 1);
987 	asm_fadds(&state, dwmixfa_state.fadeleft);
988 	asm_fxch_st(&state, 1);
989 	asm_fadds(&state, dwmixfa_state.faderight);
990 	asm_fxch_st(&state, 1);
991 	asm_fstps(&state, &dwmixfa_state.fadeleft);
992 	asm_fstps(&state, &dwmixfa_state.faderight);
993 	asm_movl(&state, dwmixfa_state.looptype, &state.eax); /* NOT NEEDED */
994 	asm_andl(&state, FLAG_DISABLED, &state.eax);
995 	asm_movl(&state, state.eax, &dwmixfa_state.looptype);
996 	asm_jmp(&state, mixs_i_ende);
997 
998 mixs_i_loopme: /* sample loops -> jump to loop start */
999 	asm_subl(&state, dwmixfa_state.mixlooplen, &state.ebp); ebp_mirror -= dwmixfa_state.mixlooplen;
1000 	/* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
1001 	if (ebp_loopend == ebp_mirror)
1002 	{
1003 		write_cf(state.eflags, 0);
1004 		write_zf(state.eflags, 1);
1005 	} else if (ebp_mirror < ebp_loopend)
1006 	{
1007 		write_cf(state.eflags, 1);
1008 		write_zf(state.eflags, 0);
1009 	} else {
1010 		write_cf(state.eflags, 0);
1011 		write_zf(state.eflags, 0);
1012 	}
1013 		asm_jae(&state, mixs_i_loopme);
1014 	asm_decl(&state, &state.ecx);
1015 	asm_jz(&state, mixs_i_ende);
1016 	asm_jmp(&state, mixs_i_next);
1017 }
1018 
mixm_i2(float * edi_destptr,float ** eax_sample_pos,uint32_t * edx_sample_pos_fract,uint32_t ebx_sample_pitch,uint32_t esi_sample_pitch_fract,float * ebp_loopend)1019 static void mixm_i2 (float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend)
1020 {
1021 	struct assembler_state_t state;
1022 	float *ebp_mirror;
1023 
1024 	debug_printf("mixm_i2 {\n");
1025 
1026 	init_assembler_state(&state, writecallback, readcallback);
1027 	asm_movl(&state, /*edi_destptr*/ 0x12345678, &state.edi);
1028 	asm_movl(&state, /*eax_sample_pos*/0x12345678, &state.eax);
1029 	asm_movl(&state, *edx_sample_pos_fract, &state.edx);
1030 	asm_movl(&state, ebx_sample_pitch, &state.ebx);
1031 	asm_movl(&state, esi_sample_pitch_fract, &state.esi);
1032 	asm_movl(&state, /*ebp_loopend*/0x12345678, &state.ebp);
1033 
1034 
1035 	asm_movl(&state, dwmixfa_state.nsamples, &state.ecx);
1036 	asm_flds(&state, dwmixfa_state.voll);
1037 	asm_shrl(&state, 2, &state.ebp);
1038 	asm_pushl(&state, state.ebp);
1039 	asm_movl(&state, state.eax, &state.ebp); ebp_mirror = *eax_sample_pos;
1040 	asm_shrl(&state, 2, &state.ebp);
1041 	asm_movl(&state, state.edx, &state.eax);
1042 	asm_shrl(&state, 24, &state.eax);
1043 mixm_i2_next:
1044 	asm_flds(&state, ebp_mirror[0]);
1045 	asm_fmuls(&state, dwmixfa_state.ct0[state.eax]);
1046 	asm_flds(&state, ebp_mirror[1]);
1047 	asm_fmuls(&state, dwmixfa_state.ct1[state.eax]);
1048 	asm_flds(&state, ebp_mirror[2]);
1049 	asm_fmuls(&state, dwmixfa_state.ct2[state.eax]);
1050 	asm_flds(&state, ebp_mirror[3]);
1051 	asm_fmuls(&state, dwmixfa_state.ct3[state.eax]);
1052 	asm_fxch_st(&state, 2);
1053 	asm_faddp_stst(&state, 0, 3);
1054 	asm_addl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror++;
1055 	asm_leal(&state, state.edi+4, &state.edi); edi_destptr++;
1056 	asm_faddp_stst(&state, 0, 2);
1057 	asm_adcl(&state, state.ebx, &state.ebp); ebp_mirror += state.ebx;
1058 	asm_movl(&state, state.edx, &state.eax);
1059 	asm_faddp_stst(&state, 0, 1);
1060 	asm_shrl(&state, 24, &state.eax);
1061 	asm_fld(&state, 1);
1062 	asm_fmulp_stst(&state, 0, 1);
1063 	asm_fxch_st(&state, 1);
1064 	asm_fadds(&state, dwmixfa_state.volrl);
1065 	asm_fxch_st(&state, 1);
1066 	asm_fadds(&state, edi_destptr[-1]);
1067 /*mixm_i2_looped:*/
1068 	/* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
1069 	if (ebp_loopend == ebp_mirror)
1070 	{
1071 		write_cf(state.eflags, 0);
1072 		write_zf(state.eflags, 1);
1073 	} else if (ebp_mirror < ebp_loopend)
1074 	{
1075 		write_cf(state.eflags, 1);
1076 		write_zf(state.eflags, 0);
1077 	} else {
1078 		write_cf(state.eflags, 0);
1079 		write_zf(state.eflags, 0);
1080 	}
1081 		asm_jae(&state, mixm_i2_LoopHandler);
1082 		asm_fstps(&state, edi_destptr-1);
1083 		asm_decl(&state, &state.ecx);
1084 	asm_jnz(&state, mixm_i2_next);
1085 mixm_i2_ende:
1086 	asm_fstps(&state, &dwmixfa_state.voll);
1087 	asm_shll(&state, 2, &state.ebp);
1088 	asm_movl(&state, state.ebp, &state.eax); *eax_sample_pos = ebp_mirror;
1089 	asm_popl(&state, &state.ecx);
1090 
1091 	asm_movl(&state, state.edx, edx_sample_pos_fract);
1092 	debug_printf("}\n");
1093 	return;
1094 
1095 mixm_i2_LoopHandler:
1096 	asm_fstps(&state, edi_destptr-1);
1097 	asm_pushl(&state, state.eax);
1098 	asm_movl(&state, dwmixfa_state.looptype, &state.eax);
1099 	asm_testl(&state, MIXF_LOOPED, state.eax);
1100 	asm_jnz(&state, mixm_i2_loopme);
1101 	asm_popl(&state, &state.eax);
1102 	asm_subl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror--;
1103 	asm_sbbl(&state, state.ebx, &state.ebp); ebp_mirror -= state.ebx;
1104 	asm_flds(&state, *ebp_mirror);
1105 mixm_i2_fill: /*  sample ends -> fill rest of buffer with last sample value */
1106 		asm_fld(&state, 1);
1107 		asm_fmul(&state, 1, 0);
1108 		asm_fadds(&state, edi_destptr[-1]);
1109 		asm_fstps(&state, edi_destptr-1);
1110 		asm_fxch_st(&state, 1);
1111 		asm_fadds(&state, dwmixfa_state.volrl);
1112 		asm_fxch_st(&state, 1);
1113 		asm_leal(&state, state.edi+4, &state.edi); edi_destptr++;
1114 		asm_decl(&state, &state.ecx);
1115 	asm_jnz(&state, mixm_i2_fill);
1116 	asm_fmul(&state, 1, 0);
1117 	asm_fadds(&state, dwmixfa_state.fadeleft);
1118 	asm_fstps(&state, &dwmixfa_state.fadeleft);
1119 
1120 	asm_movl(&state, dwmixfa_state.looptype, &state.eax); /* NOT NEEDED */
1121 	asm_andl(&state, FLAG_DISABLED, &state.eax);
1122 	asm_movl(&state, state.eax, &dwmixfa_state.looptype);
1123 	asm_jmp(&state, mixm_i2_ende);
1124 
1125 mixm_i2_loopme: /* sample loops -> jump to loop start */
1126 	asm_subl(&state, dwmixfa_state.mixlooplen, &state.ebp); ebp_mirror -= dwmixfa_state.mixlooplen;
1127 	/* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
1128 	if (ebp_loopend == ebp_mirror)
1129 	{
1130 		write_cf(state.eflags, 0);
1131 		write_zf(state.eflags, 1);
1132 	} else if (ebp_mirror < ebp_loopend)
1133 	{
1134 		write_cf(state.eflags, 1);
1135 		write_zf(state.eflags, 0);
1136 	} else {
1137 		write_cf(state.eflags, 0);
1138 		write_zf(state.eflags, 0);
1139 	}
1140 		asm_jae(&state, mixm_i2_loopme);
1141 	asm_decl(&state, &state.ecx);
1142 	asm_jz(&state, mixm_i2_ende);
1143 	asm_jmp(&state, mixm_i2_next);
1144 }
1145 
mixs_i2(float * edi_destptr,float ** eax_sample_pos,uint32_t * edx_sample_pos_fract,uint32_t ebx_sample_pitch,uint32_t esi_sample_pitch_fract,float * ebp_loopend)1146 static void mixs_i2 (float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend)
1147 {
1148 	struct assembler_state_t state;
1149 	float *ebp_mirror;
1150 
1151 	debug_printf("mixs_i2 {\n");
1152 
1153 	init_assembler_state(&state, writecallback, readcallback);
1154 	asm_movl(&state, /*edi_destptr*/ 0x12345678, &state.edi);
1155 	asm_movl(&state, /*eax_sample_pos*/0x12345678, &state.eax);
1156 	asm_movl(&state, *edx_sample_pos_fract, &state.edx);
1157 	asm_movl(&state, ebx_sample_pitch, &state.ebx);
1158 	asm_movl(&state, esi_sample_pitch_fract, &state.esi);
1159 	asm_movl(&state, /*ebp_loopend*/0x12345678, &state.ebp);
1160 
1161 
1162 	asm_movl(&state, dwmixfa_state.nsamples, &state.ecx);
1163 	asm_flds(&state, dwmixfa_state.voll);
1164 	asm_flds(&state, dwmixfa_state.volr);
1165 
1166 	asm_shrl(&state, 2, &state.ebp);
1167 
1168 	asm_pushl(&state, state.ebp);
1169 
1170 
1171 	asm_movl(&state, state.eax, &state.ebp); ebp_mirror = *eax_sample_pos;
1172 	asm_shrl(&state, 2, &state.ebp);
1173 	asm_movl(&state, state.edx, &state.eax);
1174 	asm_shrl(&state, 24, &state.eax);
1175 
1176 mixs_i2_next:
1177 	asm_flds(&state, ebp_mirror[0]);
1178 	asm_fmuls(&state, dwmixfa_state.ct0[state.eax]);
1179 	asm_flds(&state, ebp_mirror[1]);
1180 	asm_fmuls(&state, dwmixfa_state.ct1[state.eax]);
1181 	asm_flds(&state, ebp_mirror[2]);
1182 	asm_fmuls(&state, dwmixfa_state.ct2[state.eax]);
1183 	asm_flds(&state, ebp_mirror[3]);
1184 	asm_fmuls(&state, dwmixfa_state.ct3[state.eax]);
1185 	asm_fxch_st(&state, 2);
1186 	asm_faddp_stst(&state, 0, 3);
1187 	asm_addl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror++;
1188 	asm_leal(&state, state.edi+8, &state.edi); edi_destptr+=2;
1189 	asm_faddp_stst(&state, 0, 2);
1190 	asm_adcl(&state, state.ebx, &state.ebp); ebp_mirror += state.ebx;
1191 	asm_movl(&state, state.edx, &state.eax);
1192 	asm_faddp_stst(&state, 0, 1);
1193 	asm_shrl(&state, 24, &state.eax);
1194 	asm_fld(&state, 1);
1195 	asm_fld(&state, 3);
1196 	asm_fmul(&state, 2, 0);
1197 	asm_fxch_st(&state, 4);
1198 	asm_fadds(&state, dwmixfa_state.volrl);
1199 	asm_fxch_st(&state, 2);
1200 	asm_fmulp_stst(&state, 0, 1);
1201 	asm_fxch_st(&state, 2);
1202 	asm_fadds(&state, dwmixfa_state.volrr);
1203 	asm_fxch_st(&state, 3);
1204 	asm_fadds(&state, edi_destptr[-2]);
1205 	asm_fxch_st(&state, 2);
1206 	asm_fadds(&state, edi_destptr[-1]);
1207 /*mixs_i2_looped:*/
1208 	/* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
1209 	if (ebp_loopend == ebp_mirror)
1210 	{
1211 		write_cf(state.eflags, 0);
1212 		write_zf(state.eflags, 1);
1213 	} else if (ebp_mirror < ebp_loopend)
1214 	{
1215 		write_cf(state.eflags, 1);
1216 		write_zf(state.eflags, 0);
1217 	} else {
1218 		write_cf(state.eflags, 0);
1219 		write_zf(state.eflags, 0);
1220 	}
1221 		asm_jae(&state, mixs_i2_LoopHandler);
1222 
1223 		asm_fstps(&state, edi_destptr-1);
1224 		asm_fxch_st(&state, 1);
1225 		asm_fstps(&state, edi_destptr-2);
1226 		asm_fxch_st(&state, 1);
1227 		asm_decl(&state, &state.ecx);
1228 	asm_jnz(&state, mixs_i2_next);
1229 mixs_i2_ende:
1230 	asm_fstps(&state, &dwmixfa_state.volr);
1231 	asm_fstps(&state, &dwmixfa_state.voll);
1232 	asm_shll(&state, 2, &state.ebp);
1233 	asm_movl(&state, state.ebp, &state.eax); *eax_sample_pos = ebp_mirror;
1234 	asm_popl(&state, &state.ecx);
1235 
1236 	asm_movl(&state, state.edx, edx_sample_pos_fract);
1237 	debug_printf("}\n");
1238 	return;
1239 
1240 mixs_i2_LoopHandler:
1241 	asm_fstps(&state, edi_destptr-1);
1242 	asm_fxch_st(&state, 1);
1243 	asm_fstps(&state, edi_destptr-2);
1244 	asm_fxch_st(&state, 1);
1245 	asm_pushl(&state, state.eax);
1246 	asm_movl(&state, dwmixfa_state.looptype, &state.eax);
1247 	asm_testl(&state, MIXF_LOOPED, state.eax);
1248 	asm_jnz(&state, mixs_i2_loopme);
1249 	asm_popl(&state, &state.eax);
1250 	asm_fxch_st(&state, 1);
1251 	asm_subl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror--;
1252 	asm_sbbl(&state, state.ebx, &state.ebp); ebp_mirror -= state.ebx;
1253 	asm_flds(&state, *ebp_mirror);
1254 	asm_fxch_st(&state, 2);
1255 mixs_i2_fill: /*  sample ends -> fill rest of buffer with last sample value */
1256 
1257 		asm_fld(&state, 1);
1258 		asm_fmul(&state, 3, 0);
1259 		asm_fxch_st(&state, 1);
1260 		asm_fld(&state, 0);
1261 		asm_fmul(&state, 4, 0);
1262 		asm_fxch_st(&state, 2);
1263 		asm_fadds(&state, edi_destptr[-2]);
1264 		asm_fstps(&state, edi_destptr-2);
1265 		asm_fxch_st(&state, 1);
1266 		asm_fadds(&state, edi_destptr[-1]);
1267 		asm_fstps(&state, edi_destptr-1);
1268 		asm_fadds(&state, dwmixfa_state.volrr);
1269 		asm_fxch_st(&state, 1);
1270 		asm_leal(&state, state.edi+8, &state.edi); edi_destptr+=2;
1271 		asm_decl(&state, &state.ecx);
1272 		asm_fadds(&state, dwmixfa_state.volrl);
1273 		asm_fxch_st(&state, 1);
1274 	asm_jnz(&state, mixs_i2_fill);
1275 
1276 	asm_fxch_st(&state, 2);
1277 	asm_fld(&state, 0);
1278 	asm_fmul(&state, 2, 0);
1279 	asm_fxch_st(&state, 1);
1280 	asm_fmul(&state, 3, 0);
1281 	asm_fxch_st(&state, 1);
1282 	asm_fadds(&state, dwmixfa_state.fadeleft);
1283 	asm_fxch_st(&state, 1);
1284 	asm_fadds(&state, dwmixfa_state.faderight);
1285 	asm_fxch_st(&state, 1);
1286 	asm_fstps(&state, &dwmixfa_state.fadeleft);
1287 	asm_fstps(&state, &dwmixfa_state.faderight);
1288 	asm_fxch_st(&state, 1);
1289 	asm_movl(&state, dwmixfa_state.looptype, &state.eax); /* NOT NEEDED */
1290 	asm_andl(&state, FLAG_DISABLED, &state.eax);
1291 	asm_movl(&state, state.eax, &dwmixfa_state.looptype);
1292 	asm_jmp(&state, mixs_i2_ende);
1293 
1294 mixs_i2_loopme: /* sample loops -> jump to loop start */
1295 	asm_subl(&state, dwmixfa_state.mixlooplen, &state.ebp); ebp_mirror -= dwmixfa_state.mixlooplen;
1296 	/* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
1297 	if (ebp_loopend == ebp_mirror)
1298 	{
1299 		write_cf(state.eflags, 0);
1300 		write_zf(state.eflags, 1);
1301 	} else if (ebp_mirror < ebp_loopend)
1302 	{
1303 		write_cf(state.eflags, 1);
1304 		write_zf(state.eflags, 0);
1305 	} else {
1306 		write_cf(state.eflags, 0);
1307 		write_zf(state.eflags, 0);
1308 	}
1309 		asm_jae(&state, mixs_i2_loopme);
1310 	asm_decl(&state, &state.ecx);
1311 	asm_jz(&state, mixs_i2_ende);
1312 	asm_jmp(&state, mixs_i2_next);
1313 }
1314 
mixm_nf(float * edi_destptr,float ** eax_sample_pos,uint32_t * edx_sample_pos_fract,uint32_t ebx_sample_pitch,uint32_t esi_sample_pitch_fract,float * ebp_loopend)1315 static void mixm_nf (float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend)
1316 {
1317 	struct assembler_state_t state;
1318 	float *ebp_mirror;
1319 
1320 	debug_printf("mixm_nf {\n");
1321 
1322 	init_assembler_state(&state, writecallback, readcallback);
1323 	asm_movl(&state, /*edi_destptr*/ 0x12345678, &state.edi);
1324 	asm_movl(&state, /*eax_sample_pos*/0x12345678, &state.eax);
1325 	asm_movl(&state, *edx_sample_pos_fract, &state.edx);
1326 	asm_movl(&state, ebx_sample_pitch, &state.ebx);
1327 	asm_movl(&state, esi_sample_pitch_fract, &state.esi);
1328 	asm_movl(&state, /*ebp_loopend*/0x12345678, &state.ebp);
1329 
1330 
1331 	asm_movl(&state, dwmixfa_state.nsamples, &state.ecx);
1332 	asm_flds(&state, dwmixfa_state.voll);
1333 	asm_shrl(&state, 2, &state.ebp);
1334 	asm_pushl(&state, state.ebp);
1335 	asm_movl(&state, state.eax, &state.ebp); ebp_mirror = *eax_sample_pos;
1336 	asm_shrl(&state, 2, &state.ebp);
1337 mixm_nf_next:
1338 	asm_flds(&state, ebp_mirror[0]);
1339 	asm_fsubs(&state, dwmixfa_state.__fl1);
1340 	asm_fmuls(&state, dwmixfa_state.ffrq);
1341 	asm_flds(&state, dwmixfa_state.__fb1);
1342 	asm_fmuls(&state, dwmixfa_state.frez);
1343 	asm_faddp_stst(&state, 0, 1);
1344 	asm_fsts(&state, &dwmixfa_state.__fb1);
1345 	asm_fmuls(&state, dwmixfa_state.ffrq);
1346 	asm_fadds(&state, dwmixfa_state.__fl1);
1347 	asm_fsts(&state, &dwmixfa_state.__fl1);
1348 
1349 	asm_fld(&state, 1);
1350 	asm_addl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror++;
1351 	asm_leal(&state, state.edi+4, &state.edi); edi_destptr++;
1352 	asm_adcl(&state, state.ebx, &state.ebp); ebp_mirror += state.ebx;
1353 	asm_fmulp_stst(&state, 0, 1);
1354 	asm_fxch_st(&state, 1);
1355 	asm_fadds(&state, dwmixfa_state.volrl);
1356 	asm_fxch_st(&state, 1);
1357 	asm_fadds(&state, edi_destptr[-1]);
1358 /*ixm_nf_looped:*/
1359 	/* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
1360 	if (ebp_loopend == ebp_mirror)
1361 	{
1362 		write_cf(state.eflags, 0);
1363 		write_zf(state.eflags, 1);
1364 	} else if (ebp_mirror < ebp_loopend)
1365 	{
1366 		write_cf(state.eflags, 1);
1367 		write_zf(state.eflags, 0);
1368 	} else {
1369 		write_cf(state.eflags, 0);
1370 		write_zf(state.eflags, 0);
1371 	}
1372 		asm_jae(&state, mixm_nf_LoopHandler);
1373 		asm_fstps(&state, edi_destptr-1);
1374 		asm_decl(&state, &state.ecx);
1375 	asm_jnz(&state, mixm_nf_next);
1376 mixm_nf_ende:
1377 	asm_fstps(&state, &dwmixfa_state.voll);
1378 	asm_shll(&state, 2, &state.ebp);
1379 	asm_movl(&state, state.ebp, &state.eax); *eax_sample_pos = ebp_mirror;
1380 	asm_popl(&state, &state.ecx);
1381 
1382 	asm_movl(&state, state.edx, edx_sample_pos_fract);
1383 	debug_printf("}\n");
1384 	return;
1385 
1386 mixm_nf_LoopHandler:
1387 	asm_fstps(&state, edi_destptr-1);
1388 	asm_movl(&state, dwmixfa_state.looptype, &state.eax);
1389 	asm_testl(&state, MIXF_LOOPED, state.eax);
1390 	asm_jnz(&state, mixm_nf_loopme);
1391 	asm_subl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror--;
1392 	asm_sbbl(&state, state.ebx, &state.ebp); ebp_mirror -= state.ebx;
1393 	asm_flds(&state, *ebp_mirror);
1394 mixm_nf_fill: /*  sample ends -> fill rest of buffer with last sample value */
1395 		asm_fld(&state, 1);
1396 		asm_fmul(&state, 1, 0);
1397 		asm_fadds(&state, edi_destptr[-1]);
1398 		asm_fstps(&state, edi_destptr-1);
1399 		asm_fxch_st(&state, 1);
1400 		asm_fadds(&state, dwmixfa_state.volrl);
1401 		asm_fxch_st(&state, 1);
1402 		asm_leal(&state, state.edi+4, &state.edi); edi_destptr++;
1403 		asm_decl(&state, &state.ecx);
1404 	asm_jnz(&state, mixm_nf_fill);
1405 	asm_fmul(&state, 1, 0);
1406 	asm_fadds(&state, dwmixfa_state.fadeleft);
1407 	asm_fstps(&state, &dwmixfa_state.fadeleft);
1408 
1409 	asm_movl(&state, dwmixfa_state.looptype, &state.eax); /* NOT NEEDED */
1410 	asm_andl(&state, FLAG_DISABLED, &state.eax);
1411 	asm_movl(&state, state.eax, &dwmixfa_state.looptype);
1412 	asm_jmp(&state, mixm_nf_ende);
1413 
1414 mixm_nf_loopme: /* sample loops -> jump to loop start */
1415 	asm_subl(&state, dwmixfa_state.mixlooplen, &state.ebp); ebp_mirror -= dwmixfa_state.mixlooplen;
1416 	/* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
1417 	if (ebp_loopend == ebp_mirror)
1418 	{
1419 		write_cf(state.eflags, 0);
1420 		write_zf(state.eflags, 1);
1421 	} else if (ebp_mirror < ebp_loopend)
1422 	{
1423 		write_cf(state.eflags, 1);
1424 		write_zf(state.eflags, 0);
1425 	} else {
1426 		write_cf(state.eflags, 0);
1427 		write_zf(state.eflags, 0);
1428 	}
1429 		asm_jae(&state, mixm_nf_loopme);
1430 	asm_decl(&state, &state.ecx);
1431 	asm_jz(&state, mixm_nf_ende);
1432 	asm_jmp(&state, mixm_nf_next);
1433 }
1434 
mixs_nf(float * edi_destptr,float ** eax_sample_pos,uint32_t * edx_sample_pos_fract,uint32_t ebx_sample_pitch,uint32_t esi_sample_pitch_fract,float * ebp_loopend)1435 static void mixs_nf (float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend)
1436 {
1437 	struct assembler_state_t state;
1438 	float *ebp_mirror;
1439 
1440 	debug_printf("mixs_nf {\n");
1441 
1442 	init_assembler_state(&state, writecallback, readcallback);
1443 	asm_movl(&state, /*edi_destptr*/ 0x12345678, &state.edi);
1444 	asm_movl(&state, /*eax_sample_pos*/0x12345678, &state.eax);
1445 	asm_movl(&state, *edx_sample_pos_fract, &state.edx);
1446 	asm_movl(&state, ebx_sample_pitch, &state.ebx);
1447 	asm_movl(&state, esi_sample_pitch_fract, &state.esi);
1448 	asm_movl(&state, /*ebp_loopend*/0x12345678, &state.ebp);
1449 
1450 
1451 	asm_movl(&state, dwmixfa_state.nsamples, &state.ecx);
1452 	asm_flds(&state, dwmixfa_state.voll);
1453 	asm_flds(&state, dwmixfa_state.volr);
1454 	asm_shrl(&state, 2, &state.ebp);
1455 	asm_pushl(&state, state.ebp);
1456 	asm_movl(&state, state.eax, &state.ebp); ebp_mirror = *eax_sample_pos;
1457 	asm_shrl(&state, 2, &state.ebp);
1458 mixs_nf_next:
1459 	asm_flds(&state, ebp_mirror[0]);
1460 	asm_fsubs(&state, dwmixfa_state.__fl1);
1461 	asm_fmuls(&state, dwmixfa_state.ffrq);
1462 	asm_flds(&state, dwmixfa_state.__fb1);
1463 	asm_fmuls(&state, dwmixfa_state.frez);
1464 	asm_faddp_stst(&state, 0, 1);
1465 	asm_fsts(&state, &dwmixfa_state.__fb1);
1466 	asm_fmuls(&state, dwmixfa_state.ffrq);
1467 	asm_fadds(&state, dwmixfa_state.__fl1);
1468 	asm_fsts(&state, &dwmixfa_state.__fl1);
1469 
1470 	asm_addl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror++;
1471 	asm_leal(&state, state.edi+8, &state.edi); edi_destptr+=2;
1472 	asm_adcl(&state, state.ebx, &state.ebp); ebp_mirror += state.ebx;
1473 	asm_fld(&state, 1);
1474 	asm_fld(&state, 3);
1475 	asm_fmul(&state, 2, 0);
1476 	asm_fxch_st(&state, 4);
1477 	asm_fadds(&state, dwmixfa_state.volrl);
1478 	asm_fxch_st(&state, 2);
1479 	asm_fmulp_stst(&state, 0, 1);
1480 	asm_fxch_st(&state, 2);
1481 	asm_fadds(&state, dwmixfa_state.volrr);
1482 	asm_fxch_st(&state, 3);
1483 	asm_fadds(&state, edi_destptr[-2]);
1484 	asm_fxch_st(&state, 2);
1485 	asm_fadds(&state, edi_destptr[-1]);
1486 /*mixs_nf_looped:*/
1487 	/* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
1488 	if (ebp_loopend == ebp_mirror)
1489 	{
1490 		write_cf(state.eflags, 0);
1491 		write_zf(state.eflags, 1);
1492 	} else if (ebp_mirror < ebp_loopend)
1493 	{
1494 		write_cf(state.eflags, 1);
1495 		write_zf(state.eflags, 0);
1496 	} else {
1497 		write_cf(state.eflags, 0);
1498 		write_zf(state.eflags, 0);
1499 	}
1500 		asm_jae(&state, mixs_nf_LoopHandler);
1501 		asm_fstps(&state, edi_destptr-1);
1502 		asm_fxch_st(&state, 1);
1503 		asm_fstps(&state, edi_destptr-2);
1504 		asm_fxch_st(&state, 1);
1505 		asm_decl(&state, &state.ecx);
1506 	asm_jnz(&state, mixs_nf_next);
1507 mixs_nf_ende:
1508 	asm_fstps(&state, &dwmixfa_state.volr);
1509 	asm_fstps(&state, &dwmixfa_state.voll);
1510 	asm_shll(&state, 2, &state.ebp);
1511 	asm_movl(&state, state.ebp, &state.eax); *eax_sample_pos = ebp_mirror;
1512 	asm_popl(&state, &state.ecx);
1513 
1514 	asm_movl(&state, state.edx, edx_sample_pos_fract);
1515 	debug_printf("}\n");
1516 	return;
1517 
1518 mixs_nf_LoopHandler:
1519 	asm_fstps(&state, edi_destptr-1);
1520 	asm_fxch_stst(&state, 0, 1);
1521 	asm_fstps(&state, edi_destptr-2);
1522 	asm_fxch_stst(&state, 0, 1);
1523 	asm_movl(&state, dwmixfa_state.looptype, &state.eax);
1524 	asm_testl(&state, MIXF_LOOPED, state.eax);
1525 	asm_jnz(&state, mixs_nf_loopme);
1526 	asm_fxch_stst(&state, 0, 1);
1527 	asm_subl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror--;
1528 	asm_sbbl(&state, state.ebx, &state.ebp); ebp_mirror -= state.ebx;
1529 	asm_flds(&state, *ebp_mirror);
1530 	asm_fxch_stst(&state, 0, 2);
1531 mixs_nf_fill:
1532 	/*  sample ends -> fill rest of buffer with last sample value */
1533 		asm_fld(&state, 1);
1534 		asm_fmul(&state, 3, 0);
1535 		asm_fxch_st(&state, 1);
1536 		asm_fld(&state, 0);
1537 		asm_fmul(&state, 4, 0);
1538 		asm_fxch_st(&state, 2);
1539 		asm_fadds(&state, edi_destptr[-2]);
1540 		asm_fstps(&state, edi_destptr-2);
1541 		asm_fxch_st(&state, 1);
1542 		asm_fadds(&state, edi_destptr[-1]);
1543 		asm_fstps(&state, edi_destptr-1);
1544 		asm_fadds(&state, dwmixfa_state.volrr);
1545 		asm_fxch_st(&state, 1);
1546 		asm_leal(&state, state.edi+4, &state.edi); edi_destptr+=2;
1547 		asm_decl(&state, &state.ecx);
1548 		asm_fadds(&state, dwmixfa_state.volrl);
1549 		asm_fxch_st(&state, 1);
1550 	asm_jnz(&state, mixs_nf_fill);
1551 
1552 	asm_fxch_st(&state, 2);
1553 	asm_fld(&state, 0);
1554 	asm_fmul(&state, 2, 0);
1555 	asm_fxch_st(&state, 1);
1556 	asm_fmul(&state, 3, 0);
1557 	asm_fxch_st(&state, 1);
1558 	asm_fadds(&state, dwmixfa_state.fadeleft);
1559 	asm_fxch_st(&state, 1);
1560 	asm_fadds(&state, dwmixfa_state.faderight);
1561 	asm_fxch_st(&state, 1);
1562 	asm_fstps(&state, &dwmixfa_state.fadeleft);
1563 	asm_fstps(&state, &dwmixfa_state.faderight);
1564 	asm_fxch_st(&state, 1);
1565 	asm_movl(&state, dwmixfa_state.looptype, &state.eax); /* NOT NEEDED */
1566 	asm_andl(&state, FLAG_DISABLED, &state.eax);
1567 	asm_movl(&state, state.eax, &dwmixfa_state.looptype);
1568 	asm_jmp(&state, mixs_nf_ende);
1569 
1570 mixs_nf_loopme: /* sample loops -> jump to loop start */
1571 	asm_subl(&state, dwmixfa_state.mixlooplen, &state.ebp); ebp_mirror -= dwmixfa_state.mixlooplen;
1572 	/* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
1573 	if (ebp_loopend == ebp_mirror)
1574 	{
1575 		write_cf(state.eflags, 0);
1576 		write_zf(state.eflags, 1);
1577 	} else if (ebp_mirror < ebp_loopend)
1578 	{
1579 		write_cf(state.eflags, 1);
1580 		write_zf(state.eflags, 0);
1581 	} else {
1582 		write_cf(state.eflags, 0);
1583 		write_zf(state.eflags, 0);
1584 	}
1585 		asm_jae(&state, mixs_nf_loopme);
1586 	asm_decl(&state, &state.ecx);
1587 	asm_jz(&state, mixs_nf_ende);
1588 	asm_jmp(&state, mixs_nf_next);
1589 }
1590 
mixm_if(float * edi_destptr,float ** eax_sample_pos,uint32_t * edx_sample_pos_fract,uint32_t ebx_sample_pitch,uint32_t esi_sample_pitch_fract,float * ebp_loopend)1591 static void mixm_if (float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend)
1592 {
1593 	struct assembler_state_t state;
1594 	float *ebp_mirror;
1595 
1596 	debug_printf("mixm_if {\n");
1597 
1598 	init_assembler_state(&state, writecallback, readcallback);
1599 	asm_movl(&state, /*edi_destptr*/ 0x12345678, &state.edi);
1600 	asm_movl(&state, /*eax_sample_pos*/0x12345678, &state.eax);
1601 	asm_movl(&state, *edx_sample_pos_fract, &state.edx);
1602 	asm_movl(&state, ebx_sample_pitch, &state.ebx);
1603 	asm_movl(&state, esi_sample_pitch_fract, &state.esi);
1604 	asm_movl(&state, /*ebp_loopend*/0x12345678, &state.ebp);
1605 
1606 
1607 	asm_movl(&state, dwmixfa_state.nsamples, &state.ecx);
1608 	asm_flds(&state, minuseins);
1609 	asm_flds(&state, dwmixfa_state.voll);
1610 	asm_shrl(&state, 2, &state.ebp);
1611 	asm_pushl(&state, state.ebp);
1612 	asm_movl(&state, state.eax, &state.ebp); ebp_mirror = *eax_sample_pos;
1613 	asm_movl(&state, state.edx, &state.eax);
1614 	asm_shrl(&state, 9, &state.eax);
1615 	asm_shrl(&state, 2, &state.ebp);
1616 	asm_orl(&state, 0x3f800000, &state.eax);
1617 	asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.magic1);
1618 mixm_if_next:
1619 	asm_flds(&state, ebp_mirror[0]);
1620 	asm_fld(&state, 0);
1621 	asm_fld(&state, 3);
1622 	asm_fadds(&state, dwmixfa_state.magic1);
1623 	asm_fxch_st(&state, 1);
1624 	asm_fsubrs(&state, ebp_mirror[1]);
1625 	asm_addl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror++;
1626 	asm_leal(&state, state.edi+4, &state.edi); edi_destptr++;
1627 	asm_adcl(&state, state.ebx, &state.ebp); ebp_mirror += state.ebx;
1628 	asm_fmulp_st(&state, 1);
1629 	asm_movl(&state, state.edx, &state.eax);
1630 	asm_shrl(&state, 9, &state.eax);
1631 	asm_faddp_stst(&state, 0, 1);
1632 
1633 	asm_fsubs(&state, dwmixfa_state.__fl1);
1634 	asm_fmuls(&state, dwmixfa_state.ffrq);
1635 	asm_flds(&state, dwmixfa_state.__fb1);
1636 	asm_fmuls(&state, dwmixfa_state.frez);
1637 	asm_faddp_stst(&state, 0, 1);
1638 	asm_fsts(&state, &dwmixfa_state.__fb1);
1639 	asm_fmuls(&state, dwmixfa_state.ffrq);
1640 	asm_fadds(&state, dwmixfa_state.__fl1);
1641 	asm_fsts(&state, &dwmixfa_state.__fl1);
1642 
1643 	asm_fld(&state, 1);
1644 	asm_fmulp_stst(&state, 0, 1);
1645 	asm_fxch_st(&state, 1);
1646 	asm_fadds(&state, dwmixfa_state.volrl);
1647 	asm_fxch_st(&state, 1);
1648 	asm_fadds(&state, edi_destptr[-1]);
1649 	asm_orl(&state, 0x3f800000, &state.eax);
1650 /*mixm_if_looped:*/
1651 	/* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
1652 	if (ebp_loopend == ebp_mirror)
1653 	{
1654 		write_cf(state.eflags, 0);
1655 		write_zf(state.eflags, 1);
1656 	} else if (ebp_mirror < ebp_loopend)
1657 	{
1658 		write_cf(state.eflags, 1);
1659 		write_zf(state.eflags, 0);
1660 	} else {
1661 		write_cf(state.eflags, 0);
1662 		write_zf(state.eflags, 0);
1663 	}
1664 		asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.magic1);
1665 		asm_jae(&state, mixm_if_LoopHandler);
1666 		asm_fstps(&state, edi_destptr-1);
1667 		asm_decl(&state, &state.ecx);
1668 	asm_jnz(&state, mixm_if_next);
1669 mixm_if_ende:
1670 	asm_fstps(&state, &dwmixfa_state.voll);
1671 	asm_fstp_st(&state, 0);
1672 	asm_shll(&state, 2, &state.ebp);
1673 	asm_movl(&state, state.ebp, &state.eax); *eax_sample_pos = ebp_mirror;
1674 	asm_popl(&state, &state.ecx);
1675 
1676 	asm_movl(&state, state.edx, edx_sample_pos_fract);
1677 	debug_printf("}\n");
1678 	return;
1679 
1680 mixm_if_LoopHandler:
1681 	asm_fstps(&state, edi_destptr-1);
1682 	asm_movl(&state, dwmixfa_state.looptype, &state.eax);
1683 	asm_testl(&state, MIXF_LOOPED, state.eax);
1684 	asm_jnz(&state, mixm_if_loopme);
1685 	asm_subl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror--;
1686 	asm_sbbl(&state, state.ebx, &state.ebp); ebp_mirror -= state.ebx;
1687 	asm_flds(&state, *ebp_mirror);
1688 mixm_if_fill: /*  sample ends -> fill rest of buffer with last sample value */
1689 		asm_fld(&state, 1);
1690 		asm_fmul(&state, 1, 0);
1691 		asm_fadds(&state, edi_destptr[-1]);
1692 		asm_fstps(&state, edi_destptr-1);
1693 		asm_fxch_st(&state, 1);
1694 		asm_fadds(&state, dwmixfa_state.volrl);
1695 		asm_fxch_st(&state, 1);
1696 		asm_leal(&state, state.edi+4, &state.edi); edi_destptr++;
1697 		asm_decl(&state, &state.ecx);
1698 	asm_jnz(&state, mixm_if_fill);
1699 	asm_fmul(&state, 1, 0);
1700 	asm_fadds(&state, dwmixfa_state.fadeleft);
1701 	asm_fstps(&state, &dwmixfa_state.fadeleft);
1702 
1703 	asm_movl(&state, dwmixfa_state.looptype, &state.eax); /* NOT NEEDED */
1704 	asm_andl(&state, FLAG_DISABLED, &state.eax);
1705 	asm_movl(&state, state.eax, &dwmixfa_state.looptype);
1706 	asm_jmp(&state, mixm_if_ende);
1707 
1708 mixm_if_loopme: /* sample loops -> jump to loop start */
1709 	asm_subl(&state, dwmixfa_state.mixlooplen, &state.ebp); ebp_mirror -= dwmixfa_state.mixlooplen;
1710 	/* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
1711 	if (ebp_loopend == ebp_mirror)
1712 	{
1713 		write_cf(state.eflags, 0);
1714 		write_zf(state.eflags, 1);
1715 	} else if (ebp_mirror < ebp_loopend)
1716 	{
1717 		write_cf(state.eflags, 1);
1718 		write_zf(state.eflags, 0);
1719 	} else {
1720 		write_cf(state.eflags, 0);
1721 		write_zf(state.eflags, 0);
1722 	}
1723 		asm_jae(&state, mixm_if_loopme);
1724 	asm_decl(&state, &state.ecx);
1725 	asm_jz(&state, mixm_if_ende);
1726 	asm_jmp(&state, mixm_if_next);
1727 }
1728 
mixs_if(float * edi_destptr,float ** eax_sample_pos,uint32_t * edx_sample_pos_fract,uint32_t ebx_sample_pitch,uint32_t esi_sample_pitch_fract,float * ebp_loopend)1729 static void mixs_if (float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend)
1730 {
1731 	struct assembler_state_t state;
1732 	float *ebp_mirror;
1733 
1734 	debug_printf("mixs_if {\n");
1735 
1736 	init_assembler_state(&state, writecallback, readcallback);
1737 	asm_movl(&state, /*edi_destptr*/ 0x12345678, &state.edi);
1738 	asm_movl(&state, /*eax_sample_pos*/0x12345678, &state.eax);
1739 	asm_movl(&state, *edx_sample_pos_fract, &state.edx);
1740 	asm_movl(&state, ebx_sample_pitch, &state.ebx);
1741 	asm_movl(&state, esi_sample_pitch_fract, &state.esi);
1742 	asm_movl(&state, /*ebp_loopend*/0x12345678, &state.ebp);
1743 
1744 
1745 	asm_movl(&state, dwmixfa_state.nsamples, &state.ecx);
1746 	asm_flds(&state, minuseins);
1747 	asm_flds(&state, dwmixfa_state.voll);
1748 	asm_flds(&state, dwmixfa_state.volr);
1749 	asm_shrl(&state, 2, &state.ebp);
1750 	asm_pushl(&state, state.ebp);
1751 	asm_movl(&state, state.eax, &state.ebp); ebp_mirror = *eax_sample_pos;
1752 	asm_movl(&state, state.edx, &state.eax);
1753 	asm_shrl(&state, 9, &state.eax);
1754 	asm_shrl(&state, 2, &state.ebp);
1755 	asm_orl(&state, 0x3f800000, &state.eax);
1756 	asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.magic1);
1757 mixs_if_next:
1758 	asm_flds(&state, ebp_mirror[0]);
1759 	asm_fld(&state, 0);
1760 	asm_fld(&state, 4);
1761 	asm_fadds(&state, dwmixfa_state.magic1);
1762 	asm_fxch_st(&state, 1);
1763 	asm_fsubrs(&state, ebp_mirror[1]);
1764 	asm_addl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror++;
1765 	asm_leal(&state, state.edi+8, &state.edi); edi_destptr+=2;
1766 	asm_adcl(&state, state.ebx, &state.ebp); ebp_mirror += state.ebx;
1767 	asm_fmulp_st(&state, 1);
1768 	asm_movl(&state, state.edx, &state.eax);
1769 	asm_shrl(&state, 9, &state.eax);
1770 	asm_faddp_stst(&state, 0, 1);
1771 
1772 	asm_fsubs(&state, dwmixfa_state.__fl1);
1773 	asm_fmuls(&state, dwmixfa_state.ffrq);
1774 	asm_flds(&state, dwmixfa_state.__fb1);
1775 	asm_fmuls(&state, dwmixfa_state.frez);
1776 	asm_faddp_stst(&state, 0, 1);
1777 	asm_fsts(&state, &dwmixfa_state.__fb1);
1778 	asm_fmuls(&state, dwmixfa_state.ffrq);
1779 	asm_fadds(&state, dwmixfa_state.__fl1);
1780 	asm_fsts(&state, &dwmixfa_state.__fl1);
1781 
1782 	asm_fld(&state, 1);
1783 	asm_fld(&state, 3);
1784 	asm_fmul(&state, 2, 0);
1785 	asm_fxch_st(&state, 4);
1786 	asm_fadds(&state, dwmixfa_state.volrl);
1787 	asm_fxch_st(&state, 2);
1788 	asm_fmulp_stst(&state, 0, 1);
1789 	asm_fxch_st(&state, 2);
1790 	asm_fadds(&state, dwmixfa_state.volrr);
1791 	asm_fxch_st(&state, 3);
1792 	asm_fadds(&state, edi_destptr[-2]);
1793 	asm_fxch_st(&state, 2);
1794 	asm_fadds(&state, edi_destptr[-1]);
1795 	asm_orl(&state, 0x3f800000, &state.eax);
1796 /*mixs_if_looped:*/
1797 	/* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
1798 	if (ebp_loopend == ebp_mirror)
1799 	{
1800 		write_cf(state.eflags, 0);
1801 		write_zf(state.eflags, 1);
1802 	} else if (ebp_mirror < ebp_loopend)
1803 	{
1804 		write_cf(state.eflags, 1);
1805 		write_zf(state.eflags, 0);
1806 	} else {
1807 		write_cf(state.eflags, 0);
1808 		write_zf(state.eflags, 0);
1809 	}
1810 		asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.magic1);
1811 		asm_jae(&state, mixs_if_LoopHandler);
1812 		asm_fstps(&state, edi_destptr-1);
1813 		asm_fxch_st(&state, 1);
1814 		asm_fstps(&state, edi_destptr-2);
1815 		asm_fxch_st(&state, 1);
1816 		asm_decl(&state, &state.ecx);
1817 	asm_jnz(&state, mixs_if_next);
1818 mixs_if_ende:
1819 	asm_fstps(&state, &dwmixfa_state.volr);
1820 	asm_fstps(&state, &dwmixfa_state.voll);
1821 	asm_fstp_st(&state, 0);
1822 	asm_shll(&state, 2, &state.ebp);
1823 	asm_movl(&state, state.ebp, &state.eax); *eax_sample_pos = ebp_mirror;
1824 	asm_popl(&state, &state.ecx);
1825 
1826 	asm_movl(&state, state.edx, edx_sample_pos_fract);
1827 	debug_printf("}\n");
1828 	return;
1829 
1830 mixs_if_LoopHandler:
1831 	asm_fstps(&state, edi_destptr-1);
1832 	asm_fxch_st(&state, 1);
1833 	asm_fstps(&state, edi_destptr-2);
1834 	asm_fxch_st(&state, 1);
1835 	asm_movl(&state, dwmixfa_state.looptype, &state.eax);
1836 	asm_testl(&state, MIXF_LOOPED, state.eax);
1837 	asm_jnz(&state, mixs_if_loopme);
1838 	asm_fxch_st(&state, 2);
1839 	asm_fstp_st(&state, 0);
1840 	asm_subl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror--;
1841 	asm_sbbl(&state, state.ebx, &state.ebp); ebp_mirror -= state.ebx;
1842 	asm_flds(&state, *ebp_mirror);
1843 	asm_fxch_st(&state, 2);
1844 mixs_if_fill:
1845 	/*  sample ends -> fill rest of buffer with last sample value */
1846 		asm_fld(&state, 1);
1847 		asm_fmul(&state, 3, 0);
1848 		asm_fxch_st(&state, 1);
1849 		asm_fld(&state, 0);
1850 		asm_fmul(&state, 4, 0);
1851 		asm_fxch_st(&state, 2);
1852 		asm_fadds(&state, edi_destptr[-2]);
1853 		asm_fstps(&state, edi_destptr-2);
1854 		asm_fxch_st(&state, 1);
1855 		asm_fadds(&state, edi_destptr[-1]);
1856 		asm_fstps(&state, edi_destptr-1);
1857 		asm_fadds(&state, dwmixfa_state.volrr);
1858 		asm_fxch_st(&state, 1);
1859 		asm_leal(&state, state.edi+8, &state.edi); edi_destptr+=2;
1860 		asm_decl(&state, &state.ecx);
1861 		asm_fadds(&state, dwmixfa_state.volrl);
1862 		asm_fxch_st(&state, 1);
1863 	asm_jnz(&state, mixs_if_fill);
1864 	/*asm_fmul(&state, 1, 0);*/
1865 	asm_fld(&state, 2);
1866 	asm_fld(&state, 0);
1867 	asm_fmul(&state, 3, 0);
1868 	asm_fxch_st(&state, 1);
1869 	asm_fmul(&state, 2, 0);
1870 	asm_fxch_st(&state, 1);
1871 	asm_fadds(&state, dwmixfa_state.fadeleft);
1872 	asm_fxch_st(&state, 1);
1873 	asm_fadds(&state, dwmixfa_state.faderight);
1874 	asm_fxch_st(&state, 1);
1875 	asm_fstps(&state, &dwmixfa_state.fadeleft);
1876 	asm_fstps(&state, &dwmixfa_state.faderight);
1877 
1878 	asm_movl(&state, dwmixfa_state.looptype, &state.eax); /* NOT NEEDED */
1879 	asm_andl(&state, FLAG_DISABLED, &state.eax);
1880 	asm_movl(&state, state.eax, &dwmixfa_state.looptype);
1881 	asm_jmp(&state, mixs_if_ende);
1882 
1883 mixs_if_loopme: /* sample loops -> jump to loop start */
1884 	asm_subl(&state, dwmixfa_state.mixlooplen, &state.ebp); ebp_mirror -= dwmixfa_state.mixlooplen;
1885 	/* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
1886 	if (ebp_loopend == ebp_mirror)
1887 	{
1888 		write_cf(state.eflags, 0);
1889 		write_zf(state.eflags, 1);
1890 	} else if (ebp_mirror < ebp_loopend)
1891 	{
1892 		write_cf(state.eflags, 1);
1893 		write_zf(state.eflags, 0);
1894 	} else {
1895 		write_cf(state.eflags, 0);
1896 		write_zf(state.eflags, 0);
1897 	}
1898 		asm_jae(&state, mixs_if_loopme);
1899 	asm_decl(&state, &state.ecx);
1900 	asm_jz(&state, mixs_if_ende);
1901 	asm_jmp(&state, mixs_if_next);
1902 }
1903 
mixm_i2f(float * edi_destptr,float ** eax_sample_pos,uint32_t * edx_sample_pos_fract,uint32_t ebx_sample_pitch,uint32_t esi_sample_pitch_fract,float * ebp_loopend)1904 static void mixm_i2f(float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend)
1905 {
1906 	struct assembler_state_t state;
1907 	float *ebp_mirror;
1908 
1909 	debug_printf("mixm_i2f {\n");
1910 
1911 	init_assembler_state(&state, writecallback, readcallback);
1912 	asm_movl(&state, /*edi_destptr*/ 0x12345678, &state.edi);
1913 	asm_movl(&state, /*eax_sample_pos*/0x12345678, &state.eax);
1914 	asm_movl(&state, *edx_sample_pos_fract, &state.edx);
1915 	asm_movl(&state, ebx_sample_pitch, &state.ebx);
1916 	asm_movl(&state, esi_sample_pitch_fract, &state.esi);
1917 	asm_movl(&state, /*ebp_loopend*/0x12345678, &state.ebp);
1918 
1919 
1920 	asm_movl(&state, dwmixfa_state.nsamples, &state.ecx);
1921 	asm_flds(&state, dwmixfa_state.voll);
1922 	asm_shrl(&state, 2, &state.ebp);
1923 	asm_pushl(&state, state.ebp);
1924 	asm_movl(&state, state.eax, &state.ebp); ebp_mirror = *eax_sample_pos;
1925 	asm_shrl(&state, 2, &state.ebp);
1926 	asm_movl(&state, state.edx, &state.eax);
1927 	asm_shrl(&state, 24, &state.eax);
1928 mixm_i2f_next:
1929 	asm_flds(&state, ebp_mirror[0]);
1930 	asm_fmuls(&state, dwmixfa_state.ct0[state.eax]);
1931 	asm_flds(&state, ebp_mirror[1]);
1932 	asm_fmuls(&state, dwmixfa_state.ct1[state.eax]);
1933 	asm_flds(&state, ebp_mirror[2]);
1934 	asm_fmuls(&state, dwmixfa_state.ct2[state.eax]);
1935 	asm_flds(&state, ebp_mirror[3]);
1936 	asm_fmuls(&state, dwmixfa_state.ct3[state.eax]);
1937 	asm_fxch_st(&state, 2);
1938 	asm_faddp_stst(&state, 0, 3);
1939 	asm_addl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror++;
1940 	asm_leal(&state, state.edi+4, &state.edi); edi_destptr++;
1941 	asm_faddp_stst(&state, 0, 2);
1942 	asm_adcl(&state, state.ebx, &state.ebp); ebp_mirror += state.ebx;
1943 	asm_movl(&state, state.edx, &state.eax);
1944 	asm_faddp_stst(&state, 0, 1);
1945 
1946 	asm_fsubs(&state, dwmixfa_state.__fl1);
1947 	asm_fmuls(&state, dwmixfa_state.ffrq);
1948 	asm_flds(&state, dwmixfa_state.__fb1);
1949 	asm_fmuls(&state, dwmixfa_state.frez);
1950 	asm_faddp_stst(&state, 0, 1);
1951 	asm_fsts(&state, &dwmixfa_state.__fb1);
1952 	asm_fmuls(&state, dwmixfa_state.ffrq);
1953 	asm_fadds(&state, dwmixfa_state.__fl1);
1954 	asm_fsts(&state, &dwmixfa_state.__fl1);
1955 
1956 	asm_shrl(&state, 24, &state.eax);
1957 	asm_fld(&state, 1);
1958 	asm_fmulp_stst(&state, 0, 1);
1959 	asm_fxch_st(&state, 1);
1960 	asm_fadds(&state, dwmixfa_state.volrl);
1961 	asm_fxch_st(&state, 1);
1962 	asm_fadds(&state, edi_destptr[-1]);
1963 /*mixm_i2f_looped:*/
1964 	/* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
1965 	if (ebp_loopend == ebp_mirror)
1966 	{
1967 		write_cf(state.eflags, 0);
1968 		write_zf(state.eflags, 1);
1969 	} else if (ebp_mirror < ebp_loopend)
1970 	{
1971 		write_cf(state.eflags, 1);
1972 		write_zf(state.eflags, 0);
1973 	} else {
1974 		write_cf(state.eflags, 0);
1975 		write_zf(state.eflags, 0);
1976 	}
1977 		asm_jae(&state, mixm_i2f_LoopHandler);
1978 		asm_fstps(&state, edi_destptr-1);
1979 		asm_decl(&state, &state.ecx);
1980 	asm_jnz(&state, mixm_i2f_next);
1981 mixm_i2f_ende:
1982 	asm_fstps(&state, &dwmixfa_state.voll);
1983 	asm_shll(&state, 2, &state.ebp);
1984 	asm_movl(&state, state.ebp, &state.eax); *eax_sample_pos = ebp_mirror;
1985 	asm_popl(&state, &state.ecx);
1986 
1987 	asm_movl(&state, state.edx, edx_sample_pos_fract);
1988 	debug_printf("}\n");
1989 	return;
1990 
1991 mixm_i2f_LoopHandler:
1992 	asm_fstps(&state, edi_destptr-1);
1993 	asm_pushl(&state, state.eax);
1994 	asm_movl(&state, dwmixfa_state.looptype, &state.eax);
1995 	asm_testl(&state, MIXF_LOOPED, state.eax);
1996 	asm_jnz(&state, mixm_i2f_loopme);
1997 	asm_popl(&state, &state.eax);
1998 	asm_subl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror--;
1999 	asm_sbbl(&state, state.ebx, &state.ebp); ebp_mirror -= state.ebx;
2000 	asm_flds(&state, *ebp_mirror);
2001 mixm_i2f_fill: /*  sample ends -> fill rest of buffer with last sample value */
2002 		asm_fld(&state, 1);
2003 		asm_fmul(&state, 1, 0);
2004 		asm_fadds(&state, edi_destptr[-1]);
2005 		asm_fstps(&state, edi_destptr-1);
2006 		asm_fxch_st(&state, 1);
2007 		asm_fadds(&state, dwmixfa_state.volrl);
2008 		asm_fxch_st(&state, 1);
2009 		asm_leal(&state, state.edi+4, &state.edi); edi_destptr++;
2010 		asm_decl(&state, &state.ecx);
2011 	asm_jnz(&state, mixm_i2f_fill);
2012 	asm_fmul(&state, 1, 0);
2013 	asm_fadds(&state, dwmixfa_state.fadeleft);
2014 	asm_fstps(&state, &dwmixfa_state.fadeleft);
2015 
2016 	asm_movl(&state, dwmixfa_state.looptype, &state.eax); /* NOT NEEDED */
2017 	asm_andl(&state, FLAG_DISABLED, &state.eax);
2018 	asm_movl(&state, state.eax, &dwmixfa_state.looptype);
2019 	asm_jmp(&state, mixm_i2f_ende);
2020 
2021 mixm_i2f_loopme: /* sample loops -> jump to loop start */
2022 	asm_subl(&state, dwmixfa_state.mixlooplen, &state.ebp); ebp_mirror -= dwmixfa_state.mixlooplen;
2023 	/* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
2024 	if (ebp_loopend == ebp_mirror)
2025 	{
2026 		write_cf(state.eflags, 0);
2027 		write_zf(state.eflags, 1);
2028 	} else if (ebp_mirror < ebp_loopend)
2029 	{
2030 		write_cf(state.eflags, 1);
2031 		write_zf(state.eflags, 0);
2032 	} else {
2033 		write_cf(state.eflags, 0);
2034 		write_zf(state.eflags, 0);
2035 	}
2036 		asm_jae(&state, mixm_i2f_loopme);
2037 	asm_decl(&state, &state.ecx);
2038 	asm_jz(&state, mixm_i2f_ende);
2039 	asm_jmp(&state, mixm_i2f_next);
2040 }
2041 
mixs_i2f(float * edi_destptr,float ** eax_sample_pos,uint32_t * edx_sample_pos_fract,uint32_t ebx_sample_pitch,uint32_t esi_sample_pitch_fract,float * ebp_loopend)2042 static void mixs_i2f(float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend)
2043 {
2044 	struct assembler_state_t state;
2045 	float *ebp_mirror;
2046 
2047 	debug_printf("mixs_i2f {\n");
2048 
2049 	init_assembler_state(&state, writecallback, readcallback);
2050 	asm_movl(&state, /*edi_destptr*/ 0x12345678, &state.edi);
2051 	asm_movl(&state, /*eax_sample_pos*/0x12345678, &state.eax);
2052 	asm_movl(&state, *edx_sample_pos_fract, &state.edx);
2053 	asm_movl(&state, ebx_sample_pitch, &state.ebx);
2054 	asm_movl(&state, esi_sample_pitch_fract, &state.esi);
2055 	asm_movl(&state, /*ebp_loopend*/0x12345678, &state.ebp);
2056 
2057 
2058 	asm_movl(&state, dwmixfa_state.nsamples, &state.ecx);
2059 	asm_flds(&state, dwmixfa_state.voll);
2060 	asm_flds(&state, dwmixfa_state.volr);
2061 	asm_shrl(&state, 2, &state.ebp);
2062 
2063 	asm_pushl(&state, state.ebp);
2064 
2065 	asm_movl(&state, state.eax, &state.ebp); ebp_mirror = *eax_sample_pos;
2066 	asm_shrl(&state, 2, &state.ebp);
2067 	asm_movl(&state, state.edx, &state.eax);
2068 	asm_shrl(&state, 24, &state.eax);
2069 
2070 mixs_i2f_next:
2071 	asm_flds(&state, ebp_mirror[0]);
2072 	asm_fmuls(&state, dwmixfa_state.ct0[state.eax]);
2073 	asm_flds(&state, ebp_mirror[1]);
2074 	asm_fmuls(&state, dwmixfa_state.ct1[state.eax]);
2075 	asm_flds(&state, ebp_mirror[2]);
2076 	asm_fmuls(&state, dwmixfa_state.ct2[state.eax]);
2077 	asm_flds(&state, ebp_mirror[3]);
2078 	asm_fmuls(&state, dwmixfa_state.ct3[state.eax]);
2079 	asm_fxch_st(&state, 2);
2080 	asm_faddp_stst(&state, 0, 3);
2081 	asm_addl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror++;
2082 	asm_leal(&state, state.edi+8, &state.edi); edi_destptr+=2;
2083 	asm_faddp_stst(&state, 0, 2);
2084 	asm_adcl(&state, state.ebx, &state.ebp); ebp_mirror += state.ebx;
2085 	asm_movl(&state, state.edx, &state.eax);
2086 	asm_faddp_stst(&state, 0, 1);
2087 
2088 	asm_fsubs(&state, dwmixfa_state.__fl1);
2089 	asm_fmuls(&state, dwmixfa_state.ffrq);
2090 	asm_flds(&state, dwmixfa_state.__fb1);
2091 	asm_fmuls(&state, dwmixfa_state.frez);
2092 	asm_faddp_stst(&state, 0, 1);
2093 	asm_fsts(&state, &dwmixfa_state.__fb1);
2094 	asm_fmuls(&state, dwmixfa_state.ffrq);
2095 	asm_fadds(&state, dwmixfa_state.__fl1);
2096 	asm_fsts(&state, &dwmixfa_state.__fl1);
2097 
2098 	asm_shrl(&state, 24, &state.eax);
2099 	asm_fld(&state, 1);
2100 	asm_fld(&state, 3);
2101 	asm_fmul(&state, 2, 0);
2102 	asm_fxch_st(&state, 4);
2103 	asm_fadds(&state, dwmixfa_state.volrl);
2104 	asm_fxch_st(&state, 2);
2105 	asm_fmulp_stst(&state, 0, 1);
2106 	asm_fxch_st(&state, 2);
2107 	asm_fadds(&state, dwmixfa_state.volrr);
2108 	asm_fxch_st(&state, 3);
2109 	asm_fadds(&state, edi_destptr[-2]);
2110 	asm_fxch_st(&state, 2);
2111 	asm_fadds(&state, edi_destptr[-1]);
2112 /*mixs_i2f_looped:*/
2113 	/* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
2114 	if (ebp_loopend == ebp_mirror)
2115 	{
2116 		write_cf(state.eflags, 0);
2117 		write_zf(state.eflags, 1);
2118 	} else if (ebp_mirror < ebp_loopend)
2119 	{
2120 		write_cf(state.eflags, 1);
2121 		write_zf(state.eflags, 0);
2122 	} else {
2123 		write_cf(state.eflags, 0);
2124 		write_zf(state.eflags, 0);
2125 	}
2126 		asm_jae(&state, mixs_i2f_LoopHandler);
2127 		asm_fstps(&state, edi_destptr-1);
2128 		asm_fxch_st(&state, 1);
2129 		asm_fstps(&state, edi_destptr-2);
2130 		asm_fxch_st(&state, 1);
2131 		asm_decl(&state, &state.ecx);
2132 	asm_jnz(&state, mixs_i2f_next);
2133 mixs_i2f_ende:
2134 	asm_fstps(&state, &dwmixfa_state.volr);
2135 	asm_fstps(&state, &dwmixfa_state.voll);
2136 	asm_shll(&state, 2, &state.ebp);
2137 	asm_movl(&state, state.ebp, &state.eax); *eax_sample_pos = ebp_mirror;
2138 	asm_popl(&state, &state.ecx);
2139 
2140 	asm_movl(&state, state.edx, edx_sample_pos_fract);
2141 	debug_printf("}\n");
2142 	return;
2143 
2144 mixs_i2f_LoopHandler:
2145 	asm_fstps(&state, edi_destptr-1);
2146 	asm_fxch_st(&state, 1);
2147 	asm_fstps(&state, edi_destptr-2);
2148 	asm_fxch_st(&state, 1);
2149 	asm_pushl(&state, state.eax);
2150 	asm_movl(&state, dwmixfa_state.looptype, &state.eax);
2151 	asm_testl(&state, MIXF_LOOPED, state.eax);
2152 	asm_jnz(&state, mixs_i2f_loopme);
2153 	asm_popl(&state, &state.eax);
2154 	asm_fxch_st(&state, 1);
2155 	asm_subl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror--;
2156 	asm_sbbl(&state, state.ebx, &state.ebp); ebp_mirror -= state.ebx;
2157 	asm_flds(&state, *ebp_mirror);
2158 	asm_fxch_st(&state, 2);
2159 mixs_i2f_fill:
2160 	/*  sample ends -> fill rest of buffer with last sample value */
2161 		asm_fld(&state, 1);
2162 		asm_fmul(&state, 3, 0);
2163 		asm_fxch_st(&state, 1);
2164 		asm_fld(&state, 0);
2165 		asm_fmul(&state, 4, 0);
2166 		asm_fxch_st(&state, 2);
2167 		asm_fadds(&state, edi_destptr[-2]);
2168 		asm_fstps(&state, edi_destptr-2);
2169 		asm_fxch_st(&state, 1);
2170 		asm_fadds(&state, edi_destptr[-1]);
2171 		asm_fstps(&state, edi_destptr-1);
2172 		asm_fadds(&state, dwmixfa_state.volrr);
2173 		asm_fxch_st(&state, 1);
2174 		asm_leal(&state, state.edi+8, &state.edi); edi_destptr+=2;
2175 		asm_decl(&state, &state.ecx);
2176 		asm_fadds(&state, dwmixfa_state.volrl);
2177 		asm_fxch_st(&state, 1);
2178 	asm_jnz(&state, mixs_i2f_fill);
2179 
2180 	asm_fxch_st(&state, 2);
2181 	asm_fld(&state, 0);
2182 	asm_fmul(&state, 2, 0);
2183 	asm_fxch_st(&state, 1);
2184 	asm_fmul(&state, 3, 0);
2185 	asm_fxch_st(&state, 1);
2186 	asm_fadds(&state, dwmixfa_state.fadeleft);
2187 	asm_fxch_st(&state, 1);
2188 	asm_fadds(&state, dwmixfa_state.faderight);
2189 	asm_fxch_st(&state, 1);
2190 	asm_fstps(&state, &dwmixfa_state.fadeleft);
2191 	asm_fstps(&state, &dwmixfa_state.faderight);
2192 	asm_fxch_st(&state, 1);
2193 	asm_movl(&state, dwmixfa_state.looptype, &state.eax); /* NOT NEEDED */
2194 	asm_andl(&state, FLAG_DISABLED, &state.eax);
2195 	asm_movl(&state, state.eax, &dwmixfa_state.looptype);
2196 	asm_jmp(&state, mixs_i2f_ende);
2197 
2198 mixs_i2f_loopme: /* sample loops -> jump to loop start */
2199 	asm_subl(&state, dwmixfa_state.mixlooplen, &state.ebp); ebp_mirror -= dwmixfa_state.mixlooplen;
2200 	/* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
2201 	if (ebp_loopend == ebp_mirror)
2202 	{
2203 		write_cf(state.eflags, 0);
2204 		write_zf(state.eflags, 1);
2205 	} else if (ebp_mirror < ebp_loopend)
2206 	{
2207 		write_cf(state.eflags, 1);
2208 		write_zf(state.eflags, 0);
2209 	} else {
2210 		write_cf(state.eflags, 0);
2211 		write_zf(state.eflags, 0);
2212 	}
2213 		asm_jae(&state, mixs_i2f_loopme);
2214 	asm_decl(&state, &state.ecx);
2215 	asm_jz(&state, mixs_i2f_ende);
2216 	asm_jmp(&state, mixs_i2f_next);
2217 }
2218 
clip_16s(float * input,void * output,uint_fast32_t count)2219 static void clip_16s(float *input, void *output, uint_fast32_t count)
2220 {
2221 	struct assembler_state_t state;
2222 	float *esi_mirror;
2223 	uint16_t *edi_mirror;
2224 
2225 	debug_printf("clip_16s {\n");
2226 
2227 	init_assembler_state(&state, writecallback, readcallback);
2228 	asm_movl(&state, /*input*/ 0x12345678, &state.esi); esi_mirror = input;
2229 	asm_movl(&state, /*_output*/0x87654321, &state.edi); edi_mirror = output;
2230 	asm_movl(&state, count, &state.ecx);
2231 
2232 	asm_flds(&state, clampmin);
2233 	asm_flds(&state, clampmax);
2234 	asm_movw(&state, 32767, &state.bx);
2235 	asm_movw(&state, -32768, &state.dx);
2236 
2237 clip_16s_lp:
2238 	asm_flds(&state, *esi_mirror);
2239 	asm_fcom_st(&state, 1);
2240 	asm_fnstsw(&state, &state.ax);
2241 	asm_sahf(&state);
2242 	asm_ja(&state, clip_16s_max);
2243 	asm_fcom_st(&state, 2);
2244 	asm_fstsw(&state, &state.ax);
2245 	asm_sahf(&state);
2246 	asm_jb(&state, clip_16s_min);
2247 	asm_fistps(&state, edi_mirror);
2248 clip_16s_next:
2249 	asm_addl(&state, 4, &state.esi); esi_mirror++;
2250 	asm_addl(&state, 2, &state.edi); edi_mirror++;
2251 	asm_decl(&state, &state.ecx);
2252 	asm_jnz(&state, clip_16s_lp);
2253 	asm_jmp(&state, clip_16s_ende);
2254 clip_16s_max:
2255 	asm_fstp_st(&state, 0);
2256 	asm_movw(&state, state.bx, edi_mirror);
2257 	asm_jmp(&state, clip_16s_next);
2258 
2259 clip_16s_min:
2260 	asm_fstp_st(&state, 0);
2261 	asm_movw(&state, state.dx, edi_mirror);
2262 	asm_jmp(&state, clip_16s_next);
2263 
2264 clip_16s_ende:
2265 	asm_fstp_st(&state, 0);
2266 	asm_fstp_st(&state, 0);
2267 	debug_printf("}\n");
2268 }
2269 
clip_16u(float * input,void * output,uint_fast32_t count)2270 static void clip_16u(float *input, void *output, uint_fast32_t count)
2271 {
2272 	struct assembler_state_t state;
2273 	float *esi_mirror;
2274 	uint16_t *edi_mirror;
2275 
2276 	debug_printf("clip_16u {\n");
2277 
2278 	init_assembler_state(&state, writecallback, readcallback);
2279 	asm_movl(&state, /*input*/ 0x12345678, &state.esi); esi_mirror = input;
2280 	asm_movl(&state, /*_output*/0x87654321, &state.edi); edi_mirror = output;
2281 	asm_movl(&state, count, &state.ecx);
2282 
2283 	asm_flds(&state, clampmin);
2284 	asm_flds(&state, clampmax);
2285 	asm_movw(&state, 32767, &state.bx);
2286 	asm_movw(&state, -32768, &state.dx);
2287 
2288 clip_16u_lp:
2289 	asm_flds(&state, *esi_mirror);
2290 	asm_fcom_st(&state, 1);
2291 	asm_fnstsw(&state, &state.ax);
2292 	asm_sahf(&state);
2293 	asm_ja(&state, clip_16u_max);
2294 	asm_fcom_st(&state, 2);
2295 	asm_fstsw(&state, &state.ax);
2296 	asm_sahf(&state);
2297 	asm_jb(&state, clip_16u_min);
2298 	asm_fistps(&state, &dwmixfa_state.clipval);
2299 	asm_movw(&state, dwmixfa_state.clipval, &state.ax);
2300 clip_16u_next:
2301 	asm_xorw(&state, 0x8000, &state.ax);
2302 	asm_movw(&state, state.ax, edi_mirror);
2303 	asm_addl(&state, 4, &state.esi); esi_mirror++;
2304 	asm_addl(&state, 2, &state.edi); edi_mirror++;
2305 	asm_decl(&state, &state.ecx);
2306 	asm_jnz(&state, clip_16u_lp);
2307 	asm_jmp(&state, clip_16u_ende);
2308 clip_16u_max:
2309 	asm_fstp_st(&state, 0);
2310 	asm_movw(&state, state.bx, &state.ax);
2311 	asm_jmp(&state, clip_16u_next);
2312 
2313 clip_16u_min:
2314 	asm_fstp_st(&state, 0);
2315 	asm_movw(&state, state.bx, &state.ax);
2316 	asm_jmp(&state, clip_16u_next);
2317 
2318 clip_16u_ende:
2319 	asm_fstp_st(&state, 0);
2320 	asm_fstp_st(&state, 0);
2321 	debug_printf("}\n");
2322 }
2323 
clip_8s(float * input,void * output,uint_fast32_t count)2324 static void clip_8s(float *input, void *output, uint_fast32_t count)
2325 {
2326 	struct assembler_state_t state;
2327 	float *esi_mirror;
2328 	uint8_t *edi_mirror;
2329 
2330 	debug_printf("clip_8s {\n");
2331 
2332 	init_assembler_state(&state, writecallback, readcallback);
2333 	asm_movl(&state, /*input*/ 0x12345678, &state.esi); esi_mirror = input;
2334 	asm_movl(&state, /*_output*/0x87654321, &state.edi); edi_mirror = output;
2335 	asm_movl(&state, count, &state.ecx);
2336 
2337 	asm_flds(&state, clampmin);
2338 	asm_flds(&state, clampmax);
2339 	asm_movw(&state, 32767, &state.bx);
2340 	asm_movw(&state, -32768, &state.dx);
2341 
2342 clip_8s_lp:
2343 	asm_flds(&state, *esi_mirror);
2344 	asm_fcom_st(&state, 1);
2345 	asm_fnstsw(&state, &state.ax);
2346 	asm_sahf(&state);
2347 	asm_ja(&state, clip_8s_max);
2348 	asm_fcom_st(&state, 2);
2349 	asm_fstsw(&state, &state.ax);
2350 	asm_sahf(&state);
2351 	asm_jb(&state, clip_8s_min);
2352 	asm_fistps(&state, &dwmixfa_state.clipval);
2353 	asm_movw(&state, dwmixfa_state.clipval, &state.ax);
2354 clip_8s_next:
2355 	asm_movb(&state, state.ah, edi_mirror);
2356 	asm_addl(&state, 4, &state.esi); esi_mirror++;
2357 	asm_addl(&state, 1, &state.edi); edi_mirror++;
2358 	asm_decl(&state, &state.ecx);
2359 	asm_jnz(&state, clip_8s_lp);
2360 	asm_jmp(&state, clip_8s_ende);
2361 clip_8s_max:
2362 	asm_fstp_st(&state, 0);
2363 	asm_movw(&state, state.bx, &state.ax);
2364 	asm_jmp(&state, clip_8s_next);
2365 
2366 clip_8s_min:
2367 	asm_fstp_st(&state, 0);
2368 	asm_movw(&state, state.dx, &state.ax);
2369 	asm_jmp(&state, clip_8s_next);
2370 
2371 clip_8s_ende:
2372 	asm_fstp_st(&state, 0);
2373 	asm_fstp_st(&state, 0);
2374 	debug_printf("}\n");
2375 }
2376 
clip_8u(float * input,void * output,uint_fast32_t count)2377 static void clip_8u(float *input, void *output, uint_fast32_t count)
2378 {
2379 	struct assembler_state_t state;
2380 	float *esi_mirror;
2381 	uint8_t *edi_mirror;
2382 
2383 	debug_printf("clip_8u {\n");
2384 
2385 	init_assembler_state(&state, writecallback, readcallback);
2386 	asm_movl(&state, /*input*/ 0x12345678, &state.esi); esi_mirror = input;
2387 	asm_movl(&state, /*_output*/0x87654321, &state.edi); edi_mirror = output;
2388 	asm_movl(&state, count, &state.ecx);
2389 
2390 	asm_flds(&state, clampmin);
2391 	asm_flds(&state, clampmax);
2392 	asm_movw(&state, 32767, &state.bx);
2393 	asm_movw(&state, -32768, &state.dx);
2394 
2395 clip_8u_lp:
2396 	asm_flds(&state, *esi_mirror);
2397 	asm_fcom_st(&state, 1);
2398 	asm_fnstsw(&state, &state.ax);
2399 	asm_sahf(&state);
2400 	asm_ja(&state, clip_8u_max);
2401 	asm_fcom_st(&state, 2);
2402 	asm_fstsw(&state, &state.ax);
2403 	asm_sahf(&state);
2404 	asm_jb(&state, clip_8u_min);
2405 	asm_fistps(&state, &dwmixfa_state.clipval);
2406 	asm_movw(&state, dwmixfa_state.clipval, &state.ax);
2407 clip_8u_next:
2408 	asm_xorw(&state, 0x8000, &state.ax);
2409 	asm_movb(&state, state.ah, edi_mirror);
2410 	asm_addl(&state, 4, &state.esi); esi_mirror++;
2411 	asm_addl(&state, 1, &state.edi); edi_mirror++;
2412 	asm_decl(&state, &state.ecx);
2413 	asm_jnz(&state, clip_8u_lp);
2414 	asm_jmp(&state, clip_8u_ende);
2415 clip_8u_max:
2416 	asm_fstp_st(&state, 0);
2417 	asm_movw(&state, state.bx, &state.ax);
2418 	asm_jmp(&state, clip_8u_next);
2419 
2420 clip_8u_min:
2421 	asm_fstp_st(&state, 0);
2422 	asm_movw(&state, state.dx, &state.ax);
2423 	asm_jmp(&state, clip_8u_next);
2424 
2425 clip_8u_ende:
2426 	asm_fstp_st(&state, 0);
2427 	asm_fstp_st(&state, 0);
2428 	debug_printf("}\n");
2429 }
2430 
getchanvol(int n,int len)2431 void getchanvol (int n, int len)
2432 {
2433 	struct assembler_state_t state;
2434 
2435 	float *ebp_mirror;
2436 	float *edi_mirror;
2437 
2438 	debug_printf("getchanvol {\n");
2439 
2440 	init_assembler_state(&state, writecallback, readcallback);
2441 
2442 	state.ecx = len; /* assembler entry config */
2443 
2444 	asm_pushl(&state, state.ebp);
2445 	asm_fldz(&state);
2446 	asm_movl(&state, state.ecx, &dwmixfa_state.nsamples);
2447 
2448 	asm_movl(&state, dwmixfa_state.voiceflags[state.eax], &state.ebx);
2449 	asm_testl(&state, MIXF_PLAYING, state.ebx);
2450 	asm_jz(&state, getchanvol_SkipVoice);
2451 	asm_movl(&state, dwmixfa_state.looplen[state.eax], &state.ebx);
2452 	asm_movl(&state, state.ebx, &dwmixfa_state.mixlooplen);
2453 	asm_movl(&state, dwmixfa_state.freqw[state.eax], &state.ebx);
2454 	asm_movl(&state, dwmixfa_state.freqf[state.eax], &state.esi);
2455 	asm_movl(&state, dwmixfa_state.smpposf[state.eax], &state.edx);
2456 	asm_movl(&state, /*loopend[state.eax]*/0x12345678, &state.edi); edi_mirror = dwmixfa_state.loopend[state.eax];
2457 	asm_shrl(&state, 2, &state.edi); /* this is fucked up logic :-p */
2458 	asm_movl(&state, /*smpposw[state.eax]*/0x87654321, &state.ebp); ebp_mirror = dwmixfa_state.smpposw[state.eax];
2459 	asm_shrl(&state, 2, &state.ebp); /* this is fucked up logic :-p */
2460 /*getchanvol_next:*/
2461 	asm_flds(&state, *ebp_mirror); /* (,%ebp,4)*/
2462 	asm_testl(&state, 0x80000000, *(uint32_t *)ebp_mirror); /* sign og *ebp_mirror */
2463 	asm_jnz(&state, getchanvol_neg);
2464 	asm_faddp_stst(&state, 0, 1);
2465 	asm_jmp(&state, getchanvol_goon);
2466 getchanvol_neg:
2467 	asm_fsubp_stst(&state, 0, 1);
2468 getchanvol_goon:
2469 	asm_addl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror++;
2470 	asm_adcl(&state, state.ebx, &state.ebp); ebp_mirror += state.ebx;
2471 getchanvol_looped:
2472 /*
2473 	asm_cmpl(&state, state.edi, state.ebp);*/
2474 	if (ebp_mirror == edi_mirror)
2475 	{
2476 		write_cf(state.eflags, 0);
2477 		write_zf(state.eflags, 1);
2478 	} else if (ebp_mirror>edi_mirror) /* pos > loopend */
2479 	{
2480 		write_cf(state.eflags, 1);
2481 		write_zf(state.eflags, 0);
2482 	} else {
2483 		write_cf(state.eflags, 0);
2484 		write_zf(state.eflags, 0);
2485 	}
2486 	asm_decl(&state, &state.ecx);
2487 	asm_jnz(&state, getchanvol_LoopHandler);
2488 	asm_jmp(&state, getchanvol_SkipVoice);
2489 getchanvol_LoopHandler:
2490 	asm_testl(&state, MIXF_LOOPED, dwmixfa_state.voiceflags[state.eax]);
2491 	asm_jz(&state, getchanvol_SkipVoice);
2492 	asm_subl(&state, dwmixfa_state.looplen[state.eax], &state.ebp); ebp_mirror -= dwmixfa_state.looplen[state.eax];
2493 	asm_jmp(&state, getchanvol_looped);
2494 getchanvol_SkipVoice:
2495 	asm_fidivl(&state, dwmixfa_state.nsamples);
2496 	asm_fldx(&state, read_fpu_st(&state, 0));
2497 	asm_fmuls(&state, dwmixfa_state.volleft[state.eax]);
2498 	asm_fstps(&state, &dwmixfa_state.voll);
2499 	asm_fmuls(&state, dwmixfa_state.volright[state.eax]);
2500 	asm_fstps(&state, &dwmixfa_state.volr);
2501 
2502 	asm_popl(&state, &state.ebp);
2503 	debug_printf("}\n");
2504 }
2505