1 /* OpenCP Module Player
2 * copyright (c) 2010 Stian Skjelstad <stian@nixia.no>
3 *
4 * ASM emulated routines for FPU mixer
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */
20
21 #include "asm_emu/x86.h"
22 dwmixfa_state_t dwmixfa_state;
23
24 /*#define ASM_DEBUG 1*/
25 #ifdef ASM_DEBUG
26 #include <stdarg.h>
27 #include <stdio.h>
debug_printf(const char * format,...)28 static void debug_printf(const char* format, ...)
29 {
30 va_list args;
31
32 fprintf(stderr, "[dwmixfa.c]: ");
33 va_start(args, format);
34 vfprintf(stderr, format, args);
35 va_end(args);
36
37 }
38 #else
39 #define debug_printf(format, args...) ((void)0)
40 #endif
41
42
43 #define MAXVOICES MIXF_MAXCHAN
44 #define FLAG_DISABLED (~MIXF_PLAYING)
45 #if 0
46 float *tempbuf; /* pointer to 32 bit mix buffer (nsamples * 4) */
47 void *outbuf; /* pointer to mixed buffer (nsamples * 2) */
48 uint32_t nsamples; /* # of samples to mix */
49 uint32_t nvoices; /* # of voices to mix */
50 uint32_t freqw[MAXVOICES]; /* frequency (whole part) */
51 uint32_t freqf[MAXVOICES]; /* frequency (fractional part) */
52 float *smpposw[MAXVOICES]; /* sample position (whole part (pointer!)) */
53 uint32_t smpposf[MAXVOICES]; /* sample position (fractional part) */
54 float *loopend[MAXVOICES]; /* pointer to loop end */
55 uint32_t looplen[MAXVOICES]; /* loop length in samples */
56 float volleft[MAXVOICES]; /* float: left volume (1.0=normal) */
57 float volright[MAXVOICES]; /* float: rite volume (1.0=normal) */
58 float rampleft[MAXVOICES]; /* float: left volramp (dvol/sample) */
59 float rampright[MAXVOICES]; /* float: rite volramp (dvol/sample) */
60 uint32_t voiceflags[MAXVOICES]; /* voice status flags */
61 float ffreq[MAXVOICES]; /* filter frequency (0<=x<=1) */
62 float freso[MAXVOICES]; /* filter resonance (0<=x<1) */
63 float fadeleft=0.0; /* 0 */
64 float fl1[MAXVOICES]; /* filter lp buffer */
65 float fb1[MAXVOICES]; /* filter bp buffer */
66 float faderight=0.0; /* 0 */
67 int isstereo; /* flag for stereo output */
68 int outfmt; /* output format */
69 float voll=0.0;
70 float volr=0.0;
71 float ct0[256]; /* interpolation tab for s[-1] */
72 float ct1[256]; /* interpolation tab for s[0] */
73 float ct2[256]; /* interpolation tab for s[1] */
74 float ct3[256]; /* interpolation tab for s[2] */
75 struct mixfpostprocregstruct *postprocs;
76 /* pointer to postproc list */
77 uint32_t samprate; /* sampling rate */
78
79
80
81 static float volrl;
82 static float volrr;
83 #if 0
84 static float eins=1.0;
85 #endif
86 #endif
87 static const float minuseins=-1.0;
88 static const float clampmax=32767.0;
89 static const float clampmin=-32767.0;
90 static const float cremoveconst=0.992;
91 static const float minampl=0.0001; /* what the fuck? why is this a float? - stian */
92 #if 0
93 static uint32_t magic1; /* 32bit in assembler used */
94 static uint16_t clipval; /* 16bit in assembler used */
95 static uint32_t mixlooplen; /* 32bit in assembler used, decimal. lenght of loop in samples*/
96 static uint32_t __attribute__ ((used)) looptype; /* 32bit in assembler used, local version of voiceflags[N] */
97 static float __attribute__ ((used)) ffrq;
98 static float __attribute__ ((used)) frez;
99 static float __attribute__ ((used)) __fl1;
100 static float __attribute__ ((used)) __fb1;
101
102 #endif
103
104 typedef void(*clippercall)(float *input, void *output, uint_fast32_t count);
105
106 static void clip_16s(float *input, void *output, uint_fast32_t count);
107 static void clip_16u(float *input, void *output, uint_fast32_t count);
108 static void clip_8s(float *input, void *output, uint_fast32_t count);
109 static void clip_8u(float *input, void *output, uint_fast32_t count);
110
111 static const clippercall clippers[4] = {clip_8s, clip_8u, clip_16s, clip_16u};
112
113 /* additional data come from globals:
114 mixlooplen = length of sample loop R
115 volr R
116 voll R
117 fadeleft R
118 faderight R
119 looptype = sample flags RW
120 */
121 typedef void(*mixercall)(float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend);
122 static void mix_0 (float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend);
123 static void mixm_n (float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend);
124 static void mixs_n (float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend);
125 static void mixm_i (float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend);
126 static void mixs_i (float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend);
127 static void mixm_i2 (float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend);
128 static void mixs_i2 (float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend);
129 static void mixm_nf (float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend);
130 static void mixs_nf (float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend);
131 static void mixm_if (float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend);
132 static void mixs_if (float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend);
133 static void mixm_i2f(float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend);
134 static void mixs_i2f(float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend);
135
136 static const mixercall mixers[16] = {
137 mixm_n, mixs_n, mixm_i, mixs_i,
138 mixm_i2, mixs_i2, mix_0, mix_0,
139 mixm_nf, mixs_nf, mixm_if, mixs_if,
140 mixm_i2f, mixs_i2f, mix_0, mix_0
141 };
142
writecallback(uint_fast16_t selector,uint_fast32_t addr,int size,uint_fast32_t data)143 static void writecallback(uint_fast16_t selector, uint_fast32_t addr, int size, uint_fast32_t data)
144 {
145 }
146
readcallback(uint_fast16_t selector,uint_fast32_t addr,int size)147 static uint_fast32_t readcallback(uint_fast16_t selector, uint_fast32_t addr, int size)
148 {
149 return 0;
150 }
151
prepare_mixer(void)152 void prepare_mixer (void)
153 {
154 struct assembler_state_t state;
155
156 init_assembler_state(&state, writecallback, readcallback);
157 asm_xorl(&state, state.eax, &state.eax);
158 asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.fadeleft);
159 asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.faderight);
160 asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.volrl);
161 asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.volrr);
162 asm_xorl(&state, state.ecx, &state.ecx);
163 prepare_mixer_fillloop:
164 asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.volleft[state.ecx]);
165 asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.volright[state.ecx]);
166 asm_incl(&state, &state.ecx);
167 asm_cmpl(&state, MAXVOICES, state.ecx);
168 asm_jne(&state, prepare_mixer_fillloop);
169 }
170
clearbufm(float ** edi_buffer,uint32_t * count)171 static inline void clearbufm(float **edi_buffer, uint32_t *count)
172 {
173 struct assembler_state_t state;
174
175 debug_printf("clearbufm {\n");
176
177 init_assembler_state(&state, writecallback, readcallback);
178 asm_movl(&state, 0x12345678/**edi_buffer*/, &state.edi);
179 asm_movl(&state, *count, &state.ecx);
180
181 asm_flds(&state, cremoveconst);
182 asm_flds(&state, dwmixfa_state.fadeleft);
183 clearbufm_clloop:
184 asm_fsts(&state, *edi_buffer+0);
185 asm_fmul(&state, 1, 0);
186 asm_leal(&state, state.edi+4, &state.edi); *edi_buffer+=1;
187 asm_decl(&state, &state.ecx);
188 asm_jnz(&state, clearbufm_clloop);
189
190 asm_fstps(&state, &dwmixfa_state.fadeleft);
191 asm_fstp_st(&state, 0);
192
193 asm_movl(&state, state.ecx, count);
194 debug_printf("}\n");
195 }
196
clearbufs(float ** edi_buffer,uint32_t * count)197 static inline void clearbufs(float **edi_buffer, uint32_t *count)
198 {
199 struct assembler_state_t state;
200
201 debug_printf("clearbufs {\n");
202
203 init_assembler_state(&state, writecallback, readcallback);
204 asm_movl(&state, 0x12345678/**edi_buffer*/, &state.edi);
205 asm_movl(&state, *count, &state.ecx);
206
207 asm_flds(&state, cremoveconst);
208 asm_flds(&state, dwmixfa_state.faderight);
209 asm_flds(&state, dwmixfa_state.fadeleft);
210 clearbufs_clloop:
211 asm_fsts(&state, *edi_buffer+0);
212 asm_fmul(&state, 2, 0);
213 asm_fxch_st(&state, 1);
214 asm_fsts(&state, *edi_buffer+1);
215 asm_fmul(&state, 2, 0);
216 asm_fxch_st(&state, 1);
217 asm_leal(&state, state.edi+8, &state.edi); *edi_buffer+=2;
218 asm_decl(&state, &state.ecx);
219 asm_jnz(&state, clearbufs_clloop);
220 asm_fstps(&state, &dwmixfa_state.fadeleft);
221 asm_fstps(&state, &dwmixfa_state.faderight);
222 asm_fstp_st(&state, 0);
223
224 asm_movl(&state, state.ecx, count);
225
226 debug_printf("}\n");
227 }
228
229
mixer(void)230 void mixer (void)
231 {
232 void *fadeleft_ptr = &dwmixfa_state.fadeleft;
233 void *faderight_ptr = &dwmixfa_state.faderight;
234 void *volr_ptr = &dwmixfa_state.volr;
235 void *voll_ptr = &dwmixfa_state.voll;
236 void *__fl1_ptr = &dwmixfa_state.__fl1;
237 void *__fb1_ptr = &dwmixfa_state.__fb1;
238
239 struct assembler_state_t state;
240 float *edi_mirror;
241 void *edi_mirror2;
242 float *esi_mirror2;
243 float *eax_mirror;
244 float *ebp_mirror;
245 mixercall ecx_mirror;
246 clippercall eax_mirror2;
247 struct mixfpostprocregstruct *esi_mirror;
248
249 init_assembler_state(&state, writecallback, readcallback);
250
251 debug_printf("mixer {\n");
252
253 asm_pushl(&state, state.ebp);
254 asm_finit(&state);
255 asm_xorl(&state, state.ebx, &state.ebx);
256 asm_movl(&state, *(uint32_t *)fadeleft_ptr, &state.eax);
257 asm_andl(&state, 0x7fffffff, &state.eax);
258 asm_cmpl(&state, state.eax, minampl); /* TODO, comparing of floats, typecasted to uint32_t */
259 asm_ja(&state, mixer_nocutfl);
260 asm_movl(&state, state.ebx, (uint32_t *)fadeleft_ptr); /* mixing of float and integer numbers.... "great" */
261 mixer_nocutfl:
262 asm_movl(&state, *(uint32_t *)faderight_ptr, &state.eax);
263 asm_andl(&state, 0x7fffffff, &state.eax);
264 asm_cmpl(&state, state.eax, minampl); /* TODO, comparing of floats, typecasted to uint32_t */
265 asm_ja(&state, mixer_nocutfr);
266 asm_movl(&state, state.ebx, (uint32_t *)faderight_ptr); /* mixing of float and integer numbers.... "great" */
267 mixer_nocutfr:
268 asm_movl(&state, 0x12345678/*tempbuf*/, &state.edi); edi_mirror = dwmixfa_state.tempbuf;
269 asm_movl(&state, dwmixfa_state.nsamples, &state.ecx);
270 asm_orl(&state, state.ecx, &state.ecx);
271 asm_jz(&state, mixer_endall);
272 asm_movl(&state, dwmixfa_state.isstereo, &state.eax);
273 asm_orl(&state, state.eax, &state.eax);
274 asm_jnz(&state, mixer_clearst);
275 clearbufm(&edi_mirror, &state.ecx);
276 asm_jmp(&state, mixer_clearend);
277 mixer_clearst:
278 clearbufs(&edi_mirror, &state.ecx);
279 mixer_clearend:
280 asm_movl(&state, dwmixfa_state.nvoices, &state.ecx);
281 asm_decl(&state, &state.ecx);
282
283 mixer_MixNext:
284 debug_printf("Doing channel: %d\n", state.ecx);
285 asm_movl(&state, dwmixfa_state.voiceflags[state.ecx], &state.eax);
286 asm_testl(&state, MIXF_PLAYING, state.eax);
287 asm_jz(&state, mixer_SkipVoice);
288
289 asm_movl(&state, state.eax, &dwmixfa_state.looptype);
290
291 asm_movl(&state, *(uint32_t *)&dwmixfa_state.volleft[state.ecx], &state.eax);
292 asm_movl(&state, *(uint32_t *)&dwmixfa_state.volright[state.ecx], &state.ebx);
293 asm_movl(&state, state.eax, (uint32_t *)voll_ptr);
294 asm_movl(&state, state.ebx, (uint32_t *)volr_ptr);
295
296 asm_movl(&state, *(uint32_t *)&dwmixfa_state.rampleft[state.ecx], &state.eax);
297 asm_movl(&state, *(uint32_t *)&dwmixfa_state.rampright[state.ecx], &state.ebx);
298 asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.volrl);
299 asm_movl(&state, state.ebx, (uint32_t *)&dwmixfa_state.volrr);
300
301 asm_movl(&state, *(uint32_t *)&dwmixfa_state.ffreq[state.ecx], &state.eax);
302 asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.ffrq);
303 asm_movl(&state, *(uint32_t *)&dwmixfa_state.freso[state.ecx], &state.eax);
304 asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.frez);
305 asm_movl(&state, *(uint32_t *)&dwmixfa_state.fl1[state.ecx], &state.eax);
306 asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.__fl1);
307 asm_movl(&state, *(uint32_t *)&dwmixfa_state.fb1[state.ecx], &state.eax);
308 asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.__fb1);
309
310 asm_movl(&state, dwmixfa_state.looplen[state.ecx], &state.eax);
311 asm_movl(&state, state.eax, &dwmixfa_state.mixlooplen);
312
313 asm_movl(&state, dwmixfa_state.freqw[state.ecx], &state.ebx);
314 asm_movl(&state, dwmixfa_state.freqf[state.ecx], &state.esi);
315
316 asm_movl(&state, 0x12345678, &state.eax); eax_mirror = dwmixfa_state.smpposw[state.ecx];
317
318 asm_movl(&state, dwmixfa_state.smpposf[state.ecx], &state.edx);
319
320 asm_movl(&state, 0x12345678, &state.ebp); ebp_mirror = dwmixfa_state.loopend[state.ecx];
321
322 asm_pushl(&state, state.ecx);
323 asm_movl(&state, 0x12345678, &state.edi); edi_mirror = dwmixfa_state.tempbuf;
324 asm_movl(&state, dwmixfa_state.isstereo, &state.ecx);
325 asm_orl(&state, dwmixfa_state.voiceflags[state.ecx], &state.ecx);
326 asm_andl(&state, 15, &state.ecx);
327 /*asm_movl(&state, 0x12345678, &state.ecx);*/ ecx_mirror = mixers[state.ecx];
328 ecx_mirror(edi_mirror, &eax_mirror, &state.edx, state.ebx, state.esi, ebp_mirror);
329 asm_popl(&state, &state.ecx);
330 /*
331 asm_movl(&state, eax, smposw[state.ecx]);*/dwmixfa_state.smpposw[state.ecx] = eax_mirror;
332 asm_movl(&state, state.edx, &dwmixfa_state.smpposf[state.ecx]);
333
334 asm_movl(&state, dwmixfa_state.looptype, &state.eax);
335 asm_movl(&state, state.eax, &dwmixfa_state.voiceflags[state.ecx]);
336
337 /* update volumes */
338 asm_movl(&state, *(uint32_t *)voll_ptr, &state.eax);
339 asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.volleft[state.ecx]);
340 asm_movl(&state, *(uint32_t *)volr_ptr, &state.eax);
341 asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.volright[state.ecx]);
342
343 asm_movl(&state, *(uint32_t *)__fl1_ptr, &state.eax);
344 asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.fl1[state.ecx]);
345 asm_movl(&state, *(uint32_t *)__fb1_ptr, &state.eax);
346 asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.fb1[state.ecx]);
347
348 mixer_SkipVoice:
349 asm_decl(&state, &state.ecx);
350 asm_jns(&state, mixer_MixNext);
351
352 asm_movl(&state, 0x12345678 /*postprocs*/, &state.esi); esi_mirror = dwmixfa_state.postprocs;
353 mixer_PostprocLoop:
354 /*
355 asm_orl(&state, state.esi, state.esi);*/ write_zf(state.eflags, !esi_mirror);
356 asm_jz(&state, mixer_PostprocEnd);
357 asm_movl(&state, dwmixfa_state.nsamples, &state.edx);
358 asm_movl(&state, dwmixfa_state.isstereo, &state.ecx);
359 asm_movl(&state, dwmixfa_state.samprate, &state.ebx);
360 asm_movl(&state, 0x12345678, &state.eax); eax_mirror = dwmixfa_state.tempbuf;
361 /* call *state.esi*/ esi_mirror->Process(eax_mirror, state.edx, state.ebx, state.ecx);
362 asm_movl(&state, state.esi+12, &state.esi); esi_mirror = esi_mirror->next;
363
364 asm_jmp(&state, mixer_PostprocLoop);
365
366 mixer_PostprocEnd:
367
368 asm_movl(&state, dwmixfa_state.outfmt, &state.eax);
369 /*
370 {
371 int i;
372 for (i=0;i<nsamples;i++)
373 {
374 fprintf(stderr, "%f\n", tempbuf[i]);
375 if (i==8)
376 break;
377 }
378 }
379 */
380 /*asm_movl(&state, clippers[state.eax], &state.eax);*/ eax_mirror2 = clippers[state.eax];
381
382 asm_movl(&state, 0x12345678/*outbuf*/, &state.edi); edi_mirror2 = dwmixfa_state.outbuf;
383 asm_movl(&state, 0x12345678/*tempbuf*/, &state.esi); esi_mirror2 = dwmixfa_state.tempbuf;
384 asm_movl(&state, dwmixfa_state.nsamples, &state.ecx);
385
386 asm_movl(&state, dwmixfa_state.isstereo, &state.edx);
387 asm_orl(&state, state.edx, &state.edx);
388 asm_jz(&state, mixer_clipmono);
389 asm_addl(&state, state.ecx, &state.ecx);
390 mixer_clipmono:
391 /* call *state.eax*/ eax_mirror2(esi_mirror2, edi_mirror2, state.ecx);
392
393 mixer_endall:
394 asm_popl(&state, &state.ebp);
395
396 debug_printf("}\n");
397
398 }
399
mix_0(float * edi_destptr,float ** eax_sample_pos,uint32_t * edx_sample_pos_fract,uint32_t ebx_sample_pitch,uint32_t esi_sample_pitch_fract,float * ebp_loopend)400 static void mix_0 (float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend)
401 {
402 struct assembler_state_t state;
403 float *ebp_mirror;
404
405 debug_printf("mix_0 {\n");
406
407 init_assembler_state(&state, writecallback, readcallback);
408 asm_movl(&state, /*edi_destptr*/ 0x12345678, &state.edi);
409 asm_movl(&state, /*eax_sample_pos*/0x12345678, &state.eax);
410 asm_movl(&state, *edx_sample_pos_fract, &state.edx);
411 asm_movl(&state, ebx_sample_pitch, &state.ebx);
412 asm_movl(&state, esi_sample_pitch_fract, &state.esi);
413 asm_movl(&state, /*ebp_loopend*/0x12345678, &state.ebp);
414
415
416 asm_movl(&state, dwmixfa_state.nsamples, &state.ecx);
417 asm_shrl(&state, 2, &state.ebp);
418 asm_pushl(&state, state.ebp);
419 asm_movl(&state, state.eax, &state.ebp); ebp_mirror = *eax_sample_pos;
420 asm_shrl(&state, 2, &state.ebp);
421 mix_0_next:
422 asm_addl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror++;
423 asm_adcl(&state, state.ebx, &state.ebp); ebp_mirror += state.ebx;
424 mix_0_looped:
425 /* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
426 if (ebp_loopend == ebp_mirror)
427 {
428 write_cf(state.eflags, 0);
429 write_zf(state.eflags, 1);
430 } else if (ebp_mirror < ebp_loopend)
431 {
432 write_cf(state.eflags, 1);
433 write_zf(state.eflags, 0);
434 } else {
435 write_cf(state.eflags, 0);
436 write_zf(state.eflags, 0);
437 }
438 asm_jae(&state, mix_0_LoopHandler);
439 asm_decl(&state, &state.ecx);
440 asm_jnz(&state, mix_0_next);
441 mix_0_ende:
442 asm_shll(&state, 2, &state.ebp);
443 asm_movl(&state, state.ebp, &state.eax); *eax_sample_pos = ebp_mirror;
444 asm_popl(&state, &state.ecx);
445
446
447 asm_movl(&state, state.edx, edx_sample_pos_fract);
448 debug_printf("}\n");
449 return;
450
451 mix_0_LoopHandler:
452 asm_movl(&state, dwmixfa_state.looptype, &state.eax);
453 asm_testl(&state, MIXF_LOOPED, state.eax);
454 asm_jnz(&state, mix_0_loopme);
455 asm_movl(&state, dwmixfa_state.looptype, &state.eax); /* NOT NEEDED */
456 asm_andl(&state, FLAG_DISABLED, &state.eax);
457 asm_movl(&state, state.eax, &dwmixfa_state.looptype);
458 asm_jmp(&state, mix_0_ende);
459 mix_0_loopme:
460 asm_subl(&state, dwmixfa_state.mixlooplen, &state.ebp); ebp_mirror -= dwmixfa_state.mixlooplen;
461 asm_jmp(&state, mix_0_looped);
462 }
463
mixm_n(float * edi_destptr,float ** eax_sample_pos,uint32_t * edx_sample_pos_fract,uint32_t ebx_sample_pitch,uint32_t esi_sample_pitch_fract,float * ebp_loopend)464 static void mixm_n (float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend)
465 {
466 struct assembler_state_t state;
467 float *ebp_mirror;
468
469 debug_printf("mixm_n {\n");
470
471 init_assembler_state(&state, writecallback, readcallback);
472 asm_movl(&state, /*edi_destptr*/ 0x12345678, &state.edi);
473 asm_movl(&state, /*eax_sample_pos*/0x12345678, &state.eax);
474 asm_movl(&state, *edx_sample_pos_fract, &state.edx);
475 asm_movl(&state, ebx_sample_pitch, &state.ebx);
476 asm_movl(&state, esi_sample_pitch_fract, &state.esi);
477 asm_movl(&state, /*ebp_loopend*/0x12345678, &state.ebp);
478
479
480 asm_movl(&state, dwmixfa_state.nsamples, &state.ecx);
481 asm_flds(&state, dwmixfa_state.voll);
482 asm_shrl(&state, 2, &state.ebp);
483 asm_pushl(&state, state.ebp);
484 asm_movl(&state, state.eax, &state.ebp); ebp_mirror = *eax_sample_pos;
485 asm_shrl(&state, 2, &state.ebp);
486 mixm_n_next:
487 asm_flds(&state, *ebp_mirror);
488 asm_fld(&state, 1);
489 asm_addl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror++;
490 asm_leal(&state, state.edi+4, &state.edi); edi_destptr++;
491 asm_adcl(&state, state.ebx, &state.ebp); ebp_mirror += state.ebx;
492 asm_fmulp_stst(&state, 0, 1);
493 asm_fxch_st(&state, 1);
494 asm_fadds(&state, dwmixfa_state.volrl);
495 asm_fxch_st(&state, 1);
496 asm_fadds(&state, edi_destptr[-1]);
497 /*mixm_n_looped:*/
498 /* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
499 if (ebp_loopend == ebp_mirror)
500 {
501 write_cf(state.eflags, 0);
502 write_zf(state.eflags, 1);
503 } else if (ebp_mirror < ebp_loopend)
504 {
505 write_cf(state.eflags, 1);
506 write_zf(state.eflags, 0);
507 } else {
508 write_cf(state.eflags, 0);
509 write_zf(state.eflags, 0);
510 }
511 asm_jae(&state, mixm_n_LoopHandler);
512 asm_fstps(&state, edi_destptr-1);
513 asm_decl(&state, &state.ecx);
514 asm_jnz(&state, mixm_n_next);
515 mixm_n_ende:
516 asm_fstps(&state, &dwmixfa_state.voll);
517 asm_shll(&state, 2, &state.ebp);
518 asm_movl(&state, state.ebp, &state.eax); *eax_sample_pos = ebp_mirror;
519 asm_popl(&state, &state.ecx);
520
521 asm_movl(&state, state.edx, edx_sample_pos_fract);
522 debug_printf("mixer }\n");
523 return;
524
525 mixm_n_LoopHandler:
526 asm_fstps(&state, edi_destptr-1);
527 asm_movl(&state, dwmixfa_state.looptype, &state.eax);
528 asm_testl(&state, MIXF_LOOPED, state.eax);
529 asm_jnz(&state, mixm_n_loopme);
530 asm_subl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror--;
531 asm_sbbl(&state, state.ebx, &state.ebp); ebp_mirror -= state.ebx;
532 asm_flds(&state, *ebp_mirror);
533 mixm_n_fill: /* sample ends -> fill rest of buffer with last sample value */
534 asm_fld(&state, 1);
535 asm_fmul(&state, 1, 0);
536 asm_fadds(&state, edi_destptr[-1]);
537 asm_fstps(&state, edi_destptr-1);
538 asm_fxch_st(&state, 1);
539 asm_fadds(&state, dwmixfa_state.volrl);
540 asm_fxch_st(&state, 1);
541 asm_leal(&state, state.edi+4, &state.edi); edi_destptr++;
542 asm_decl(&state, &state.ecx);
543 asm_jnz(&state, mixm_n_fill);
544 asm_fmul(&state, 1, 0);
545 asm_fadds(&state, dwmixfa_state.fadeleft);
546 asm_fstps(&state, &dwmixfa_state.fadeleft);
547
548 asm_movl(&state, dwmixfa_state.looptype, &state.eax); /* NOT NEEDED */
549 asm_andl(&state, FLAG_DISABLED, &state.eax);
550 asm_movl(&state, state.eax, &dwmixfa_state.looptype);
551 asm_jmp(&state, mixm_n_ende);
552
553 mixm_n_loopme: /* sample loops -> jump to loop start */
554 asm_subl(&state, dwmixfa_state.mixlooplen, &state.ebp); ebp_mirror -= dwmixfa_state.mixlooplen;
555 /* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
556 if (ebp_loopend == ebp_mirror)
557 {
558 write_cf(state.eflags, 0);
559 write_zf(state.eflags, 1);
560 } else if (ebp_mirror < ebp_loopend)
561 {
562 write_cf(state.eflags, 1);
563 write_zf(state.eflags, 0);
564 } else {
565 write_cf(state.eflags, 0);
566 write_zf(state.eflags, 0);
567 }
568 asm_jae(&state, mixm_n_loopme);
569 asm_decl(&state, &state.ecx);
570 asm_jz(&state, mixm_n_ende);
571 asm_jmp(&state, mixm_n_next);
572 }
573
mixs_n(float * edi_destptr,float ** eax_sample_pos,uint32_t * edx_sample_pos_fract,uint32_t ebx_sample_pitch,uint32_t esi_sample_pitch_fract,float * ebp_loopend)574 static void mixs_n (float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend)
575 {
576 struct assembler_state_t state;
577 float *ebp_mirror;
578
579 debug_printf("mixs_n {\n");
580
581 init_assembler_state(&state, writecallback, readcallback);
582 asm_movl(&state, /*edi_destptr*/ 0x12345678, &state.edi);
583 asm_movl(&state, /*eax_sample_pos*/0x12345678, &state.eax);
584 asm_movl(&state, *edx_sample_pos_fract, &state.edx);
585 asm_movl(&state, ebx_sample_pitch, &state.ebx);
586 asm_movl(&state, esi_sample_pitch_fract, &state.esi);
587 asm_movl(&state, /*ebp_loopend*/0x12345678, &state.ebp);
588
589
590 asm_movl(&state, dwmixfa_state.nsamples, &state.ecx);
591 asm_flds(&state, dwmixfa_state.voll);
592 asm_flds(&state, dwmixfa_state.volr);
593 asm_shrl(&state, 2, &state.ebp);
594 asm_pushl(&state, state.ebp);
595 asm_movl(&state, state.eax, &state.ebp); ebp_mirror = *eax_sample_pos;
596 asm_shrl(&state, 2, &state.ebp);
597 mixs_n_next:
598 asm_flds(&state, *ebp_mirror);
599 asm_addl(&state, state.esi, &state.edx);if (read_cf(state.eflags)) ebp_mirror++;
600 asm_leal(&state, state.edi+8, &state.edi); edi_destptr+=2;
601 asm_adcl(&state, state.ebx, &state.ebp); ebp_mirror += state.ebx;
602 asm_fld(&state, 1);
603 asm_fld(&state, 3);
604 asm_fmul(&state, 2, 0);
605 asm_fxch_st(&state, 4);
606 asm_fadds(&state, dwmixfa_state.volrl);
607 asm_fxch_st(&state, 2);
608 asm_fmulp_st(&state, 1);
609 asm_fxch_st(&state, 2);
610 asm_fadds(&state, dwmixfa_state.volrr);
611 asm_fxch_st(&state, 3);
612 asm_fadds(&state, edi_destptr[-2]);
613 asm_fxch_st(&state, 2);
614 asm_fadds(&state, edi_destptr[-1]);
615
616 /*mixs_n_looped:*/
617 /* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
618 if (ebp_loopend == ebp_mirror)
619 {
620 write_cf(state.eflags, 0);
621 write_zf(state.eflags, 1);
622 } else if (ebp_mirror < ebp_loopend)
623 {
624 write_cf(state.eflags, 1);
625 write_zf(state.eflags, 0);
626 } else {
627 write_cf(state.eflags, 0);
628 write_zf(state.eflags, 0);
629 }
630 asm_jae(&state, mixs_n_LoopHandler);
631 asm_fstps(&state, edi_destptr-1);
632 asm_fxch_st(&state, 1);
633 asm_fstps(&state, edi_destptr-2);
634 asm_fxch_st(&state, 1);
635 asm_decl(&state, &state.ecx);
636 asm_jnz(&state, mixs_n_next);
637 mixs_n_ende:
638 asm_fstps(&state, &dwmixfa_state.volr);
639 asm_fstps(&state, &dwmixfa_state.voll);
640 asm_shll(&state, 2, &state.ebp);
641 asm_movl(&state, state.ebp, &state.eax); *eax_sample_pos = ebp_mirror;
642 asm_popl(&state, &state.ecx);
643
644 asm_movl(&state, state.edx, edx_sample_pos_fract);
645 debug_printf("mixer }\n");
646 return;
647
648 mixs_n_LoopHandler:
649 asm_fstps(&state, edi_destptr-1);
650 asm_fxch_st(&state, 1);
651 asm_fstps(&state, edi_destptr-2);
652 asm_fxch_st(&state, 1);
653 asm_movl(&state, dwmixfa_state.looptype, &state.eax);
654 asm_testl(&state, MIXF_LOOPED, state.eax);
655 asm_jnz(&state, mixs_n_loopme);
656 asm_fxch_st(&state, 1);
657 asm_subl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror--;
658 asm_sbbl(&state, state.ebx, &state.ebp); ebp_mirror -= state.ebx;
659 asm_flds(&state, *ebp_mirror);
660
661 asm_fxch_st(&state, 2);
662 mixs_n_fill: /* sample ends -> fill rest of buffer with last sample value */
663 asm_fld(&state, 1);
664 asm_fmul(&state, 3, 0);
665 asm_fxch_st(&state, 1);
666 asm_fld(&state, 0);
667 asm_fmul(&state, 4, 0);
668 asm_fxch_st(&state, 2);
669 asm_fadds(&state, edi_destptr[-2]);
670 asm_fstps(&state, edi_destptr-2);
671 asm_fxch_st(&state, 1);
672 asm_fadds(&state, edi_destptr[-1]);
673 asm_fstps(&state, edi_destptr-1);
674 asm_fadds(&state, dwmixfa_state.volrr);
675 asm_fxch_st(&state, 1);
676 asm_leal(&state, state.edi+8, &state.edi); edi_destptr+=2;
677 asm_decl(&state, &state.ecx);
678 asm_fadds(&state, dwmixfa_state.volrl);
679 asm_fxch_st(&state, 1);
680 asm_jnz(&state, mixs_n_fill);
681 asm_fxch_st(&state, 2);
682 asm_fld(&state, 0);
683 asm_fmul(&state, 2, 0);
684 asm_fxch_st(&state, 1);
685 asm_fmul(&state, 3, 0);
686 asm_fxch_st(&state, 1);
687 asm_fadds(&state, dwmixfa_state.fadeleft);
688 asm_fxch_st(&state, 1);
689 asm_fadds(&state, dwmixfa_state.faderight);
690 asm_fxch_st(&state, 1);
691 asm_fstps(&state, &dwmixfa_state.fadeleft);
692 asm_fstps(&state, &dwmixfa_state.faderight);
693 asm_fxch_st(&state, 1);
694
695 asm_movl(&state, dwmixfa_state.looptype, &state.eax); /* NOT NEEDED */
696 asm_andl(&state, FLAG_DISABLED, &state.eax);
697 asm_movl(&state, state.eax, &dwmixfa_state.looptype);
698 asm_jmp(&state, mixs_n_ende);
699
700 mixs_n_loopme: /* sample loops -> jump to loop start */
701 asm_subl(&state, dwmixfa_state.mixlooplen, &state.ebp); ebp_mirror -= dwmixfa_state.mixlooplen;
702 /* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
703 if (ebp_loopend == ebp_mirror)
704 {
705 write_cf(state.eflags, 0);
706 write_zf(state.eflags, 1);
707 } else if (ebp_mirror < ebp_loopend)
708 {
709 write_cf(state.eflags, 1);
710 write_zf(state.eflags, 0);
711 } else {
712 write_cf(state.eflags, 0);
713 write_zf(state.eflags, 0);
714 }
715 asm_jae(&state, mixs_n_loopme);
716 asm_decl(&state, &state.ecx);
717 asm_jz(&state, mixs_n_ende);
718 asm_jmp(&state, mixs_n_next);
719 }
720
mixm_i(float * edi_destptr,float ** eax_sample_pos,uint32_t * edx_sample_pos_fract,uint32_t ebx_sample_pitch,uint32_t esi_sample_pitch_fract,float * ebp_loopend)721 static void mixm_i (float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend)
722 {
723 struct assembler_state_t state;
724 float *ebp_mirror;
725
726 debug_printf("mixm_i {\n");
727
728 init_assembler_state(&state, writecallback, readcallback);
729 asm_movl(&state, /*edi_destptr*/ 0x12345678, &state.edi);
730 asm_movl(&state, /*eax_sample_pos*/0x12345678, &state.eax);
731 asm_movl(&state, *edx_sample_pos_fract, &state.edx);
732 asm_movl(&state, ebx_sample_pitch, &state.ebx);
733 asm_movl(&state, esi_sample_pitch_fract, &state.esi);
734 asm_movl(&state, /*ebp_loopend*/0x12345678, &state.ebp);
735
736
737 asm_movl(&state, dwmixfa_state.nsamples, &state.ecx);
738 asm_flds(&state, minuseins);
739 asm_flds(&state, dwmixfa_state.voll);
740 asm_shrl(&state, 2, &state.ebp);
741 asm_pushl(&state, state.ebp);
742 asm_movl(&state, state.eax, &state.ebp); ebp_mirror = *eax_sample_pos;
743 asm_movl(&state, state.edx, &state.eax);
744 asm_shrl(&state, 9, &state.eax);
745 asm_shrl(&state, 2, &state.ebp);
746 asm_orl(&state, 0x3f800000, &state.eax);
747 asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.magic1);
748 mixm_i_next:
749 asm_flds(&state, ebp_mirror[0]);
750 asm_fld(&state, 0);
751 asm_fld(&state, 3);
752 asm_fadds(&state, dwmixfa_state.magic1);
753 asm_fxch_st(&state, 1);
754 asm_fsubrs(&state, ebp_mirror[1]);
755 asm_addl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror++;
756 asm_leal(&state, state.edi+4, &state.edi); edi_destptr++;
757 asm_adcl(&state, state.ebx, &state.ebp); ebp_mirror += state.ebx;
758 asm_fmulp_st(&state, 1);
759 asm_movl(&state, state.edx, &state.eax);
760 asm_shrl(&state, 9, &state.eax);
761 asm_faddp_stst(&state, 0, 1);
762 asm_fld(&state, 1);
763 asm_fmulp_stst(&state, 0, 1);
764 asm_fxch_st(&state, 1);
765 asm_fadds(&state, dwmixfa_state.volrl);
766 asm_fxch_st(&state, 1);
767 asm_fadds(&state, edi_destptr[-1]);
768 asm_orl(&state, 0x3f800000, &state.eax);
769 /*mixm_i_looped:*/
770 /* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
771 if (ebp_loopend == ebp_mirror)
772 {
773 write_cf(state.eflags, 0);
774 write_zf(state.eflags, 1);
775 } else if (ebp_mirror < ebp_loopend)
776 {
777 write_cf(state.eflags, 1);
778 write_zf(state.eflags, 0);
779 } else {
780 write_cf(state.eflags, 0);
781 write_zf(state.eflags, 0);
782 }
783 asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.magic1);
784 asm_jae(&state, mixm_i_LoopHandler);
785 asm_fstps(&state, edi_destptr-1);
786 asm_decl(&state, &state.ecx);
787 asm_jnz(&state, mixm_i_next);
788 mixm_i_ende:
789 asm_fstps(&state, &dwmixfa_state.voll);
790 asm_fstp_st(&state, 0);
791 asm_shll(&state, 2, &state.ebp);
792 asm_movl(&state, state.ebp, &state.eax); *eax_sample_pos = ebp_mirror;
793 asm_popl(&state, &state.ecx);
794
795 asm_movl(&state, state.edx, edx_sample_pos_fract);
796 debug_printf("}\n");
797 return;
798
799 mixm_i_LoopHandler:
800 asm_fstps(&state, edi_destptr-1);
801 asm_movl(&state, dwmixfa_state.looptype, &state.eax);
802 asm_testl(&state, MIXF_LOOPED, state.eax);
803 asm_jnz(&state, mixm_i_loopme);
804 asm_subl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror--;
805 asm_sbbl(&state, state.ebx, &state.ebp); ebp_mirror -= state.ebx;
806 asm_flds(&state, *ebp_mirror);
807 mixm_i_fill: /* sample ends -> fill rest of buffer with last sample value */
808 asm_fld(&state, 1);
809 asm_fmul(&state, 1, 0);
810 asm_fadds(&state, edi_destptr[-1]);
811 asm_fstps(&state, edi_destptr-1);
812 asm_fxch_st(&state, 1);
813 asm_fadds(&state, dwmixfa_state.volrl);
814 asm_fxch_st(&state, 1);
815 asm_leal(&state, state.edi+4, &state.edi); edi_destptr++;
816 asm_decl(&state, &state.ecx);
817 asm_jnz(&state, mixm_i_fill);
818 asm_fmul(&state, 1, 0);
819 asm_fadds(&state, dwmixfa_state.fadeleft);
820 asm_fstps(&state, &dwmixfa_state.fadeleft);
821
822 asm_movl(&state, dwmixfa_state.looptype, &state.eax); /* NOT NEEDED */
823 asm_andl(&state, FLAG_DISABLED, &state.eax);
824 asm_movl(&state, state.eax, &dwmixfa_state.looptype);
825 asm_jmp(&state, mixm_i_ende);
826
827 mixm_i_loopme: /* sample loops -> jump to loop start */
828 asm_subl(&state, dwmixfa_state.mixlooplen, &state.ebp); ebp_mirror -= dwmixfa_state.mixlooplen;
829 /* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
830 if (ebp_loopend == ebp_mirror)
831 {
832 write_cf(state.eflags, 0);
833 write_zf(state.eflags, 1);
834 } else if (ebp_mirror < ebp_loopend)
835 {
836 write_cf(state.eflags, 1);
837 write_zf(state.eflags, 0);
838 } else {
839 write_cf(state.eflags, 0);
840 write_zf(state.eflags, 0);
841 }
842 asm_jae(&state, mixm_i_loopme);
843 asm_decl(&state, &state.ecx);
844 asm_jz(&state, mixm_i_ende);
845 asm_jmp(&state, mixm_i_next);
846 }
847
mixs_i(float * edi_destptr,float ** eax_sample_pos,uint32_t * edx_sample_pos_fract,uint32_t ebx_sample_pitch,uint32_t esi_sample_pitch_fract,float * ebp_loopend)848 static void mixs_i (float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend)
849 {
850 struct assembler_state_t state;
851 float *ebp_mirror;
852
853 debug_printf("mixs_i {\n");
854
855 init_assembler_state(&state, writecallback, readcallback);
856 asm_movl(&state, /*edi_destptr*/ 0x12345678, &state.edi);
857 asm_movl(&state, /*eax_sample_pos*/0x12345678, &state.eax);
858 asm_movl(&state, *edx_sample_pos_fract, &state.edx);
859 asm_movl(&state, ebx_sample_pitch, &state.ebx);
860 asm_movl(&state, esi_sample_pitch_fract, &state.esi);
861 asm_movl(&state, /*ebp_loopend*/0x12345678, &state.ebp);
862
863
864 asm_movl(&state, dwmixfa_state.nsamples, &state.ecx);
865 asm_flds(&state, minuseins);
866 asm_flds(&state, dwmixfa_state.voll);
867 asm_flds(&state, dwmixfa_state.volr);
868 asm_shrl(&state, 2, &state.ebp);
869
870 asm_pushl(&state, state.ebp);
871
872
873
874 asm_movl(&state, state.eax, &state.ebp); ebp_mirror = *eax_sample_pos;
875 asm_movl(&state, state.edx, &state.eax);
876 asm_shrl(&state, 9, &state.eax);
877 asm_shrl(&state, 2, &state.ebp);
878 asm_orl(&state, 0x3f800000, &state.eax);
879 asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.magic1);
880
881
882 mixs_i_next:
883 asm_flds(&state, ebp_mirror[0]);
884 asm_fld(&state, 0);
885 asm_fld(&state, 4);
886 asm_fadds(&state, dwmixfa_state.magic1);
887 asm_fxch_st(&state, 1);
888 asm_fsubrs(&state, ebp_mirror[1]);
889 asm_addl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror++;
890 asm_leal(&state, state.edi+8, &state.edi); edi_destptr+=2;
891 asm_adcl(&state, state.ebx, &state.ebp); ebp_mirror += state.ebx;
892 asm_fmulp_st(&state, 1);
893 asm_movl(&state, state.edx, &state.eax);
894 asm_shrl(&state, 9, &state.eax);
895 asm_faddp_stst(&state, 0, 1);
896 asm_fld(&state, 1);
897 asm_fld(&state, 3);
898 asm_fmul(&state, 2, 0);
899 asm_fxch_st(&state, 4);
900 asm_fadds(&state, dwmixfa_state.volrl);
901 asm_fxch_st(&state, 2);
902 asm_fmulp_stst(&state, 0, 1);
903 asm_fxch_st(&state, 2);
904 asm_fadds(&state, dwmixfa_state.volrr);
905 asm_fxch_st(&state, 3);
906 asm_fadds(&state, edi_destptr[-2]);
907 asm_fxch_st(&state, 2);
908 asm_fadds(&state, edi_destptr[-1]);
909 asm_orl(&state, 0x3f800000, &state.eax);
910 /*mixs_i_looped:*/
911 /* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
912 if (ebp_loopend == ebp_mirror)
913 {
914 write_cf(state.eflags, 0);
915 write_zf(state.eflags, 1);
916 } else if (ebp_mirror < ebp_loopend)
917 {
918 write_cf(state.eflags, 1);
919 write_zf(state.eflags, 0);
920 } else {
921 write_cf(state.eflags, 0);
922 write_zf(state.eflags, 0);
923 }
924 asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.magic1);
925 asm_jae(&state, mixs_i_LoopHandler);
926
927 asm_fstps(&state, edi_destptr-1);
928 asm_fxch_st(&state, 1);
929 asm_fstps(&state, edi_destptr-2);
930 asm_fxch_st(&state, 1);
931 asm_decl(&state, &state.ecx);
932 asm_jnz(&state, mixs_i_next);
933 mixs_i_ende:
934 asm_fstps(&state, &dwmixfa_state.volr);
935 asm_fstps(&state, &dwmixfa_state.voll);
936 asm_fstp_st(&state, 0);
937 asm_shll(&state, 2, &state.ebp);
938 asm_movl(&state, state.ebp, &state.eax); *eax_sample_pos = ebp_mirror;
939
940 asm_popl(&state, &state.ecx);
941
942 asm_movl(&state, state.edx, edx_sample_pos_fract);
943 debug_printf("}\n");
944 return;
945
946 mixs_i_LoopHandler:
947 asm_fstps(&state, edi_destptr-1);
948 asm_fxch_st(&state, 1);
949 asm_fstps(&state, edi_destptr-2);
950 asm_fxch_st(&state, 1);
951 asm_movl(&state, dwmixfa_state.looptype, &state.eax);
952 asm_testl(&state, MIXF_LOOPED, state.eax);
953 asm_jnz(&state, mixs_i_loopme);
954 asm_fxch_st(&state, 2);
955 asm_fstp_st(&state, 0);
956 asm_subl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror--;
957 asm_sbbl(&state, state.ebx, &state.ebp); ebp_mirror -= state.ebx;
958 asm_flds(&state, *ebp_mirror);
959 asm_fxch_st(&state, 2);
960 mixs_i_fill:
961 /* sample ends -> fill rest of buffer with last sample value */
962 asm_fld(&state, 1);
963 asm_fmul(&state, 3, 0);
964 asm_fxch_st(&state, 1);
965 asm_fld(&state, 0);
966 asm_fmul(&state, 4, 0);
967 asm_fxch_st(&state, 2);
968 asm_fadds(&state, edi_destptr[-2]);
969 asm_fstps(&state, edi_destptr-2);
970 asm_fxch_st(&state, 1);
971 asm_fadds(&state, edi_destptr[-1]);
972 asm_fstps(&state, edi_destptr-1);
973 asm_fadds(&state, dwmixfa_state.volrr);
974 asm_fxch_st(&state, 1);
975 asm_leal(&state, state.edi+8, &state.edi); edi_destptr+=2;
976 asm_decl(&state, &state.ecx);
977 asm_fadds(&state, dwmixfa_state.volrl);
978 asm_fxch_st(&state, 1);
979 asm_jnz(&state, mixs_i_fill);
980
981 asm_fld(&state, 2);
982 asm_fld(&state, 0);
983 asm_fmul(&state, 3, 0);
984 asm_fxch_st(&state, 1);
985 asm_fmul(&state, 2, 0);
986 asm_fxch_st(&state, 1);
987 asm_fadds(&state, dwmixfa_state.fadeleft);
988 asm_fxch_st(&state, 1);
989 asm_fadds(&state, dwmixfa_state.faderight);
990 asm_fxch_st(&state, 1);
991 asm_fstps(&state, &dwmixfa_state.fadeleft);
992 asm_fstps(&state, &dwmixfa_state.faderight);
993 asm_movl(&state, dwmixfa_state.looptype, &state.eax); /* NOT NEEDED */
994 asm_andl(&state, FLAG_DISABLED, &state.eax);
995 asm_movl(&state, state.eax, &dwmixfa_state.looptype);
996 asm_jmp(&state, mixs_i_ende);
997
998 mixs_i_loopme: /* sample loops -> jump to loop start */
999 asm_subl(&state, dwmixfa_state.mixlooplen, &state.ebp); ebp_mirror -= dwmixfa_state.mixlooplen;
1000 /* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
1001 if (ebp_loopend == ebp_mirror)
1002 {
1003 write_cf(state.eflags, 0);
1004 write_zf(state.eflags, 1);
1005 } else if (ebp_mirror < ebp_loopend)
1006 {
1007 write_cf(state.eflags, 1);
1008 write_zf(state.eflags, 0);
1009 } else {
1010 write_cf(state.eflags, 0);
1011 write_zf(state.eflags, 0);
1012 }
1013 asm_jae(&state, mixs_i_loopme);
1014 asm_decl(&state, &state.ecx);
1015 asm_jz(&state, mixs_i_ende);
1016 asm_jmp(&state, mixs_i_next);
1017 }
1018
mixm_i2(float * edi_destptr,float ** eax_sample_pos,uint32_t * edx_sample_pos_fract,uint32_t ebx_sample_pitch,uint32_t esi_sample_pitch_fract,float * ebp_loopend)1019 static void mixm_i2 (float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend)
1020 {
1021 struct assembler_state_t state;
1022 float *ebp_mirror;
1023
1024 debug_printf("mixm_i2 {\n");
1025
1026 init_assembler_state(&state, writecallback, readcallback);
1027 asm_movl(&state, /*edi_destptr*/ 0x12345678, &state.edi);
1028 asm_movl(&state, /*eax_sample_pos*/0x12345678, &state.eax);
1029 asm_movl(&state, *edx_sample_pos_fract, &state.edx);
1030 asm_movl(&state, ebx_sample_pitch, &state.ebx);
1031 asm_movl(&state, esi_sample_pitch_fract, &state.esi);
1032 asm_movl(&state, /*ebp_loopend*/0x12345678, &state.ebp);
1033
1034
1035 asm_movl(&state, dwmixfa_state.nsamples, &state.ecx);
1036 asm_flds(&state, dwmixfa_state.voll);
1037 asm_shrl(&state, 2, &state.ebp);
1038 asm_pushl(&state, state.ebp);
1039 asm_movl(&state, state.eax, &state.ebp); ebp_mirror = *eax_sample_pos;
1040 asm_shrl(&state, 2, &state.ebp);
1041 asm_movl(&state, state.edx, &state.eax);
1042 asm_shrl(&state, 24, &state.eax);
1043 mixm_i2_next:
1044 asm_flds(&state, ebp_mirror[0]);
1045 asm_fmuls(&state, dwmixfa_state.ct0[state.eax]);
1046 asm_flds(&state, ebp_mirror[1]);
1047 asm_fmuls(&state, dwmixfa_state.ct1[state.eax]);
1048 asm_flds(&state, ebp_mirror[2]);
1049 asm_fmuls(&state, dwmixfa_state.ct2[state.eax]);
1050 asm_flds(&state, ebp_mirror[3]);
1051 asm_fmuls(&state, dwmixfa_state.ct3[state.eax]);
1052 asm_fxch_st(&state, 2);
1053 asm_faddp_stst(&state, 0, 3);
1054 asm_addl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror++;
1055 asm_leal(&state, state.edi+4, &state.edi); edi_destptr++;
1056 asm_faddp_stst(&state, 0, 2);
1057 asm_adcl(&state, state.ebx, &state.ebp); ebp_mirror += state.ebx;
1058 asm_movl(&state, state.edx, &state.eax);
1059 asm_faddp_stst(&state, 0, 1);
1060 asm_shrl(&state, 24, &state.eax);
1061 asm_fld(&state, 1);
1062 asm_fmulp_stst(&state, 0, 1);
1063 asm_fxch_st(&state, 1);
1064 asm_fadds(&state, dwmixfa_state.volrl);
1065 asm_fxch_st(&state, 1);
1066 asm_fadds(&state, edi_destptr[-1]);
1067 /*mixm_i2_looped:*/
1068 /* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
1069 if (ebp_loopend == ebp_mirror)
1070 {
1071 write_cf(state.eflags, 0);
1072 write_zf(state.eflags, 1);
1073 } else if (ebp_mirror < ebp_loopend)
1074 {
1075 write_cf(state.eflags, 1);
1076 write_zf(state.eflags, 0);
1077 } else {
1078 write_cf(state.eflags, 0);
1079 write_zf(state.eflags, 0);
1080 }
1081 asm_jae(&state, mixm_i2_LoopHandler);
1082 asm_fstps(&state, edi_destptr-1);
1083 asm_decl(&state, &state.ecx);
1084 asm_jnz(&state, mixm_i2_next);
1085 mixm_i2_ende:
1086 asm_fstps(&state, &dwmixfa_state.voll);
1087 asm_shll(&state, 2, &state.ebp);
1088 asm_movl(&state, state.ebp, &state.eax); *eax_sample_pos = ebp_mirror;
1089 asm_popl(&state, &state.ecx);
1090
1091 asm_movl(&state, state.edx, edx_sample_pos_fract);
1092 debug_printf("}\n");
1093 return;
1094
1095 mixm_i2_LoopHandler:
1096 asm_fstps(&state, edi_destptr-1);
1097 asm_pushl(&state, state.eax);
1098 asm_movl(&state, dwmixfa_state.looptype, &state.eax);
1099 asm_testl(&state, MIXF_LOOPED, state.eax);
1100 asm_jnz(&state, mixm_i2_loopme);
1101 asm_popl(&state, &state.eax);
1102 asm_subl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror--;
1103 asm_sbbl(&state, state.ebx, &state.ebp); ebp_mirror -= state.ebx;
1104 asm_flds(&state, *ebp_mirror);
1105 mixm_i2_fill: /* sample ends -> fill rest of buffer with last sample value */
1106 asm_fld(&state, 1);
1107 asm_fmul(&state, 1, 0);
1108 asm_fadds(&state, edi_destptr[-1]);
1109 asm_fstps(&state, edi_destptr-1);
1110 asm_fxch_st(&state, 1);
1111 asm_fadds(&state, dwmixfa_state.volrl);
1112 asm_fxch_st(&state, 1);
1113 asm_leal(&state, state.edi+4, &state.edi); edi_destptr++;
1114 asm_decl(&state, &state.ecx);
1115 asm_jnz(&state, mixm_i2_fill);
1116 asm_fmul(&state, 1, 0);
1117 asm_fadds(&state, dwmixfa_state.fadeleft);
1118 asm_fstps(&state, &dwmixfa_state.fadeleft);
1119
1120 asm_movl(&state, dwmixfa_state.looptype, &state.eax); /* NOT NEEDED */
1121 asm_andl(&state, FLAG_DISABLED, &state.eax);
1122 asm_movl(&state, state.eax, &dwmixfa_state.looptype);
1123 asm_jmp(&state, mixm_i2_ende);
1124
1125 mixm_i2_loopme: /* sample loops -> jump to loop start */
1126 asm_subl(&state, dwmixfa_state.mixlooplen, &state.ebp); ebp_mirror -= dwmixfa_state.mixlooplen;
1127 /* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
1128 if (ebp_loopend == ebp_mirror)
1129 {
1130 write_cf(state.eflags, 0);
1131 write_zf(state.eflags, 1);
1132 } else if (ebp_mirror < ebp_loopend)
1133 {
1134 write_cf(state.eflags, 1);
1135 write_zf(state.eflags, 0);
1136 } else {
1137 write_cf(state.eflags, 0);
1138 write_zf(state.eflags, 0);
1139 }
1140 asm_jae(&state, mixm_i2_loopme);
1141 asm_decl(&state, &state.ecx);
1142 asm_jz(&state, mixm_i2_ende);
1143 asm_jmp(&state, mixm_i2_next);
1144 }
1145
mixs_i2(float * edi_destptr,float ** eax_sample_pos,uint32_t * edx_sample_pos_fract,uint32_t ebx_sample_pitch,uint32_t esi_sample_pitch_fract,float * ebp_loopend)1146 static void mixs_i2 (float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend)
1147 {
1148 struct assembler_state_t state;
1149 float *ebp_mirror;
1150
1151 debug_printf("mixs_i2 {\n");
1152
1153 init_assembler_state(&state, writecallback, readcallback);
1154 asm_movl(&state, /*edi_destptr*/ 0x12345678, &state.edi);
1155 asm_movl(&state, /*eax_sample_pos*/0x12345678, &state.eax);
1156 asm_movl(&state, *edx_sample_pos_fract, &state.edx);
1157 asm_movl(&state, ebx_sample_pitch, &state.ebx);
1158 asm_movl(&state, esi_sample_pitch_fract, &state.esi);
1159 asm_movl(&state, /*ebp_loopend*/0x12345678, &state.ebp);
1160
1161
1162 asm_movl(&state, dwmixfa_state.nsamples, &state.ecx);
1163 asm_flds(&state, dwmixfa_state.voll);
1164 asm_flds(&state, dwmixfa_state.volr);
1165
1166 asm_shrl(&state, 2, &state.ebp);
1167
1168 asm_pushl(&state, state.ebp);
1169
1170
1171 asm_movl(&state, state.eax, &state.ebp); ebp_mirror = *eax_sample_pos;
1172 asm_shrl(&state, 2, &state.ebp);
1173 asm_movl(&state, state.edx, &state.eax);
1174 asm_shrl(&state, 24, &state.eax);
1175
1176 mixs_i2_next:
1177 asm_flds(&state, ebp_mirror[0]);
1178 asm_fmuls(&state, dwmixfa_state.ct0[state.eax]);
1179 asm_flds(&state, ebp_mirror[1]);
1180 asm_fmuls(&state, dwmixfa_state.ct1[state.eax]);
1181 asm_flds(&state, ebp_mirror[2]);
1182 asm_fmuls(&state, dwmixfa_state.ct2[state.eax]);
1183 asm_flds(&state, ebp_mirror[3]);
1184 asm_fmuls(&state, dwmixfa_state.ct3[state.eax]);
1185 asm_fxch_st(&state, 2);
1186 asm_faddp_stst(&state, 0, 3);
1187 asm_addl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror++;
1188 asm_leal(&state, state.edi+8, &state.edi); edi_destptr+=2;
1189 asm_faddp_stst(&state, 0, 2);
1190 asm_adcl(&state, state.ebx, &state.ebp); ebp_mirror += state.ebx;
1191 asm_movl(&state, state.edx, &state.eax);
1192 asm_faddp_stst(&state, 0, 1);
1193 asm_shrl(&state, 24, &state.eax);
1194 asm_fld(&state, 1);
1195 asm_fld(&state, 3);
1196 asm_fmul(&state, 2, 0);
1197 asm_fxch_st(&state, 4);
1198 asm_fadds(&state, dwmixfa_state.volrl);
1199 asm_fxch_st(&state, 2);
1200 asm_fmulp_stst(&state, 0, 1);
1201 asm_fxch_st(&state, 2);
1202 asm_fadds(&state, dwmixfa_state.volrr);
1203 asm_fxch_st(&state, 3);
1204 asm_fadds(&state, edi_destptr[-2]);
1205 asm_fxch_st(&state, 2);
1206 asm_fadds(&state, edi_destptr[-1]);
1207 /*mixs_i2_looped:*/
1208 /* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
1209 if (ebp_loopend == ebp_mirror)
1210 {
1211 write_cf(state.eflags, 0);
1212 write_zf(state.eflags, 1);
1213 } else if (ebp_mirror < ebp_loopend)
1214 {
1215 write_cf(state.eflags, 1);
1216 write_zf(state.eflags, 0);
1217 } else {
1218 write_cf(state.eflags, 0);
1219 write_zf(state.eflags, 0);
1220 }
1221 asm_jae(&state, mixs_i2_LoopHandler);
1222
1223 asm_fstps(&state, edi_destptr-1);
1224 asm_fxch_st(&state, 1);
1225 asm_fstps(&state, edi_destptr-2);
1226 asm_fxch_st(&state, 1);
1227 asm_decl(&state, &state.ecx);
1228 asm_jnz(&state, mixs_i2_next);
1229 mixs_i2_ende:
1230 asm_fstps(&state, &dwmixfa_state.volr);
1231 asm_fstps(&state, &dwmixfa_state.voll);
1232 asm_shll(&state, 2, &state.ebp);
1233 asm_movl(&state, state.ebp, &state.eax); *eax_sample_pos = ebp_mirror;
1234 asm_popl(&state, &state.ecx);
1235
1236 asm_movl(&state, state.edx, edx_sample_pos_fract);
1237 debug_printf("}\n");
1238 return;
1239
1240 mixs_i2_LoopHandler:
1241 asm_fstps(&state, edi_destptr-1);
1242 asm_fxch_st(&state, 1);
1243 asm_fstps(&state, edi_destptr-2);
1244 asm_fxch_st(&state, 1);
1245 asm_pushl(&state, state.eax);
1246 asm_movl(&state, dwmixfa_state.looptype, &state.eax);
1247 asm_testl(&state, MIXF_LOOPED, state.eax);
1248 asm_jnz(&state, mixs_i2_loopme);
1249 asm_popl(&state, &state.eax);
1250 asm_fxch_st(&state, 1);
1251 asm_subl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror--;
1252 asm_sbbl(&state, state.ebx, &state.ebp); ebp_mirror -= state.ebx;
1253 asm_flds(&state, *ebp_mirror);
1254 asm_fxch_st(&state, 2);
1255 mixs_i2_fill: /* sample ends -> fill rest of buffer with last sample value */
1256
1257 asm_fld(&state, 1);
1258 asm_fmul(&state, 3, 0);
1259 asm_fxch_st(&state, 1);
1260 asm_fld(&state, 0);
1261 asm_fmul(&state, 4, 0);
1262 asm_fxch_st(&state, 2);
1263 asm_fadds(&state, edi_destptr[-2]);
1264 asm_fstps(&state, edi_destptr-2);
1265 asm_fxch_st(&state, 1);
1266 asm_fadds(&state, edi_destptr[-1]);
1267 asm_fstps(&state, edi_destptr-1);
1268 asm_fadds(&state, dwmixfa_state.volrr);
1269 asm_fxch_st(&state, 1);
1270 asm_leal(&state, state.edi+8, &state.edi); edi_destptr+=2;
1271 asm_decl(&state, &state.ecx);
1272 asm_fadds(&state, dwmixfa_state.volrl);
1273 asm_fxch_st(&state, 1);
1274 asm_jnz(&state, mixs_i2_fill);
1275
1276 asm_fxch_st(&state, 2);
1277 asm_fld(&state, 0);
1278 asm_fmul(&state, 2, 0);
1279 asm_fxch_st(&state, 1);
1280 asm_fmul(&state, 3, 0);
1281 asm_fxch_st(&state, 1);
1282 asm_fadds(&state, dwmixfa_state.fadeleft);
1283 asm_fxch_st(&state, 1);
1284 asm_fadds(&state, dwmixfa_state.faderight);
1285 asm_fxch_st(&state, 1);
1286 asm_fstps(&state, &dwmixfa_state.fadeleft);
1287 asm_fstps(&state, &dwmixfa_state.faderight);
1288 asm_fxch_st(&state, 1);
1289 asm_movl(&state, dwmixfa_state.looptype, &state.eax); /* NOT NEEDED */
1290 asm_andl(&state, FLAG_DISABLED, &state.eax);
1291 asm_movl(&state, state.eax, &dwmixfa_state.looptype);
1292 asm_jmp(&state, mixs_i2_ende);
1293
1294 mixs_i2_loopme: /* sample loops -> jump to loop start */
1295 asm_subl(&state, dwmixfa_state.mixlooplen, &state.ebp); ebp_mirror -= dwmixfa_state.mixlooplen;
1296 /* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
1297 if (ebp_loopend == ebp_mirror)
1298 {
1299 write_cf(state.eflags, 0);
1300 write_zf(state.eflags, 1);
1301 } else if (ebp_mirror < ebp_loopend)
1302 {
1303 write_cf(state.eflags, 1);
1304 write_zf(state.eflags, 0);
1305 } else {
1306 write_cf(state.eflags, 0);
1307 write_zf(state.eflags, 0);
1308 }
1309 asm_jae(&state, mixs_i2_loopme);
1310 asm_decl(&state, &state.ecx);
1311 asm_jz(&state, mixs_i2_ende);
1312 asm_jmp(&state, mixs_i2_next);
1313 }
1314
mixm_nf(float * edi_destptr,float ** eax_sample_pos,uint32_t * edx_sample_pos_fract,uint32_t ebx_sample_pitch,uint32_t esi_sample_pitch_fract,float * ebp_loopend)1315 static void mixm_nf (float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend)
1316 {
1317 struct assembler_state_t state;
1318 float *ebp_mirror;
1319
1320 debug_printf("mixm_nf {\n");
1321
1322 init_assembler_state(&state, writecallback, readcallback);
1323 asm_movl(&state, /*edi_destptr*/ 0x12345678, &state.edi);
1324 asm_movl(&state, /*eax_sample_pos*/0x12345678, &state.eax);
1325 asm_movl(&state, *edx_sample_pos_fract, &state.edx);
1326 asm_movl(&state, ebx_sample_pitch, &state.ebx);
1327 asm_movl(&state, esi_sample_pitch_fract, &state.esi);
1328 asm_movl(&state, /*ebp_loopend*/0x12345678, &state.ebp);
1329
1330
1331 asm_movl(&state, dwmixfa_state.nsamples, &state.ecx);
1332 asm_flds(&state, dwmixfa_state.voll);
1333 asm_shrl(&state, 2, &state.ebp);
1334 asm_pushl(&state, state.ebp);
1335 asm_movl(&state, state.eax, &state.ebp); ebp_mirror = *eax_sample_pos;
1336 asm_shrl(&state, 2, &state.ebp);
1337 mixm_nf_next:
1338 asm_flds(&state, ebp_mirror[0]);
1339 asm_fsubs(&state, dwmixfa_state.__fl1);
1340 asm_fmuls(&state, dwmixfa_state.ffrq);
1341 asm_flds(&state, dwmixfa_state.__fb1);
1342 asm_fmuls(&state, dwmixfa_state.frez);
1343 asm_faddp_stst(&state, 0, 1);
1344 asm_fsts(&state, &dwmixfa_state.__fb1);
1345 asm_fmuls(&state, dwmixfa_state.ffrq);
1346 asm_fadds(&state, dwmixfa_state.__fl1);
1347 asm_fsts(&state, &dwmixfa_state.__fl1);
1348
1349 asm_fld(&state, 1);
1350 asm_addl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror++;
1351 asm_leal(&state, state.edi+4, &state.edi); edi_destptr++;
1352 asm_adcl(&state, state.ebx, &state.ebp); ebp_mirror += state.ebx;
1353 asm_fmulp_stst(&state, 0, 1);
1354 asm_fxch_st(&state, 1);
1355 asm_fadds(&state, dwmixfa_state.volrl);
1356 asm_fxch_st(&state, 1);
1357 asm_fadds(&state, edi_destptr[-1]);
1358 /*ixm_nf_looped:*/
1359 /* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
1360 if (ebp_loopend == ebp_mirror)
1361 {
1362 write_cf(state.eflags, 0);
1363 write_zf(state.eflags, 1);
1364 } else if (ebp_mirror < ebp_loopend)
1365 {
1366 write_cf(state.eflags, 1);
1367 write_zf(state.eflags, 0);
1368 } else {
1369 write_cf(state.eflags, 0);
1370 write_zf(state.eflags, 0);
1371 }
1372 asm_jae(&state, mixm_nf_LoopHandler);
1373 asm_fstps(&state, edi_destptr-1);
1374 asm_decl(&state, &state.ecx);
1375 asm_jnz(&state, mixm_nf_next);
1376 mixm_nf_ende:
1377 asm_fstps(&state, &dwmixfa_state.voll);
1378 asm_shll(&state, 2, &state.ebp);
1379 asm_movl(&state, state.ebp, &state.eax); *eax_sample_pos = ebp_mirror;
1380 asm_popl(&state, &state.ecx);
1381
1382 asm_movl(&state, state.edx, edx_sample_pos_fract);
1383 debug_printf("}\n");
1384 return;
1385
1386 mixm_nf_LoopHandler:
1387 asm_fstps(&state, edi_destptr-1);
1388 asm_movl(&state, dwmixfa_state.looptype, &state.eax);
1389 asm_testl(&state, MIXF_LOOPED, state.eax);
1390 asm_jnz(&state, mixm_nf_loopme);
1391 asm_subl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror--;
1392 asm_sbbl(&state, state.ebx, &state.ebp); ebp_mirror -= state.ebx;
1393 asm_flds(&state, *ebp_mirror);
1394 mixm_nf_fill: /* sample ends -> fill rest of buffer with last sample value */
1395 asm_fld(&state, 1);
1396 asm_fmul(&state, 1, 0);
1397 asm_fadds(&state, edi_destptr[-1]);
1398 asm_fstps(&state, edi_destptr-1);
1399 asm_fxch_st(&state, 1);
1400 asm_fadds(&state, dwmixfa_state.volrl);
1401 asm_fxch_st(&state, 1);
1402 asm_leal(&state, state.edi+4, &state.edi); edi_destptr++;
1403 asm_decl(&state, &state.ecx);
1404 asm_jnz(&state, mixm_nf_fill);
1405 asm_fmul(&state, 1, 0);
1406 asm_fadds(&state, dwmixfa_state.fadeleft);
1407 asm_fstps(&state, &dwmixfa_state.fadeleft);
1408
1409 asm_movl(&state, dwmixfa_state.looptype, &state.eax); /* NOT NEEDED */
1410 asm_andl(&state, FLAG_DISABLED, &state.eax);
1411 asm_movl(&state, state.eax, &dwmixfa_state.looptype);
1412 asm_jmp(&state, mixm_nf_ende);
1413
1414 mixm_nf_loopme: /* sample loops -> jump to loop start */
1415 asm_subl(&state, dwmixfa_state.mixlooplen, &state.ebp); ebp_mirror -= dwmixfa_state.mixlooplen;
1416 /* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
1417 if (ebp_loopend == ebp_mirror)
1418 {
1419 write_cf(state.eflags, 0);
1420 write_zf(state.eflags, 1);
1421 } else if (ebp_mirror < ebp_loopend)
1422 {
1423 write_cf(state.eflags, 1);
1424 write_zf(state.eflags, 0);
1425 } else {
1426 write_cf(state.eflags, 0);
1427 write_zf(state.eflags, 0);
1428 }
1429 asm_jae(&state, mixm_nf_loopme);
1430 asm_decl(&state, &state.ecx);
1431 asm_jz(&state, mixm_nf_ende);
1432 asm_jmp(&state, mixm_nf_next);
1433 }
1434
mixs_nf(float * edi_destptr,float ** eax_sample_pos,uint32_t * edx_sample_pos_fract,uint32_t ebx_sample_pitch,uint32_t esi_sample_pitch_fract,float * ebp_loopend)1435 static void mixs_nf (float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend)
1436 {
1437 struct assembler_state_t state;
1438 float *ebp_mirror;
1439
1440 debug_printf("mixs_nf {\n");
1441
1442 init_assembler_state(&state, writecallback, readcallback);
1443 asm_movl(&state, /*edi_destptr*/ 0x12345678, &state.edi);
1444 asm_movl(&state, /*eax_sample_pos*/0x12345678, &state.eax);
1445 asm_movl(&state, *edx_sample_pos_fract, &state.edx);
1446 asm_movl(&state, ebx_sample_pitch, &state.ebx);
1447 asm_movl(&state, esi_sample_pitch_fract, &state.esi);
1448 asm_movl(&state, /*ebp_loopend*/0x12345678, &state.ebp);
1449
1450
1451 asm_movl(&state, dwmixfa_state.nsamples, &state.ecx);
1452 asm_flds(&state, dwmixfa_state.voll);
1453 asm_flds(&state, dwmixfa_state.volr);
1454 asm_shrl(&state, 2, &state.ebp);
1455 asm_pushl(&state, state.ebp);
1456 asm_movl(&state, state.eax, &state.ebp); ebp_mirror = *eax_sample_pos;
1457 asm_shrl(&state, 2, &state.ebp);
1458 mixs_nf_next:
1459 asm_flds(&state, ebp_mirror[0]);
1460 asm_fsubs(&state, dwmixfa_state.__fl1);
1461 asm_fmuls(&state, dwmixfa_state.ffrq);
1462 asm_flds(&state, dwmixfa_state.__fb1);
1463 asm_fmuls(&state, dwmixfa_state.frez);
1464 asm_faddp_stst(&state, 0, 1);
1465 asm_fsts(&state, &dwmixfa_state.__fb1);
1466 asm_fmuls(&state, dwmixfa_state.ffrq);
1467 asm_fadds(&state, dwmixfa_state.__fl1);
1468 asm_fsts(&state, &dwmixfa_state.__fl1);
1469
1470 asm_addl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror++;
1471 asm_leal(&state, state.edi+8, &state.edi); edi_destptr+=2;
1472 asm_adcl(&state, state.ebx, &state.ebp); ebp_mirror += state.ebx;
1473 asm_fld(&state, 1);
1474 asm_fld(&state, 3);
1475 asm_fmul(&state, 2, 0);
1476 asm_fxch_st(&state, 4);
1477 asm_fadds(&state, dwmixfa_state.volrl);
1478 asm_fxch_st(&state, 2);
1479 asm_fmulp_stst(&state, 0, 1);
1480 asm_fxch_st(&state, 2);
1481 asm_fadds(&state, dwmixfa_state.volrr);
1482 asm_fxch_st(&state, 3);
1483 asm_fadds(&state, edi_destptr[-2]);
1484 asm_fxch_st(&state, 2);
1485 asm_fadds(&state, edi_destptr[-1]);
1486 /*mixs_nf_looped:*/
1487 /* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
1488 if (ebp_loopend == ebp_mirror)
1489 {
1490 write_cf(state.eflags, 0);
1491 write_zf(state.eflags, 1);
1492 } else if (ebp_mirror < ebp_loopend)
1493 {
1494 write_cf(state.eflags, 1);
1495 write_zf(state.eflags, 0);
1496 } else {
1497 write_cf(state.eflags, 0);
1498 write_zf(state.eflags, 0);
1499 }
1500 asm_jae(&state, mixs_nf_LoopHandler);
1501 asm_fstps(&state, edi_destptr-1);
1502 asm_fxch_st(&state, 1);
1503 asm_fstps(&state, edi_destptr-2);
1504 asm_fxch_st(&state, 1);
1505 asm_decl(&state, &state.ecx);
1506 asm_jnz(&state, mixs_nf_next);
1507 mixs_nf_ende:
1508 asm_fstps(&state, &dwmixfa_state.volr);
1509 asm_fstps(&state, &dwmixfa_state.voll);
1510 asm_shll(&state, 2, &state.ebp);
1511 asm_movl(&state, state.ebp, &state.eax); *eax_sample_pos = ebp_mirror;
1512 asm_popl(&state, &state.ecx);
1513
1514 asm_movl(&state, state.edx, edx_sample_pos_fract);
1515 debug_printf("}\n");
1516 return;
1517
1518 mixs_nf_LoopHandler:
1519 asm_fstps(&state, edi_destptr-1);
1520 asm_fxch_stst(&state, 0, 1);
1521 asm_fstps(&state, edi_destptr-2);
1522 asm_fxch_stst(&state, 0, 1);
1523 asm_movl(&state, dwmixfa_state.looptype, &state.eax);
1524 asm_testl(&state, MIXF_LOOPED, state.eax);
1525 asm_jnz(&state, mixs_nf_loopme);
1526 asm_fxch_stst(&state, 0, 1);
1527 asm_subl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror--;
1528 asm_sbbl(&state, state.ebx, &state.ebp); ebp_mirror -= state.ebx;
1529 asm_flds(&state, *ebp_mirror);
1530 asm_fxch_stst(&state, 0, 2);
1531 mixs_nf_fill:
1532 /* sample ends -> fill rest of buffer with last sample value */
1533 asm_fld(&state, 1);
1534 asm_fmul(&state, 3, 0);
1535 asm_fxch_st(&state, 1);
1536 asm_fld(&state, 0);
1537 asm_fmul(&state, 4, 0);
1538 asm_fxch_st(&state, 2);
1539 asm_fadds(&state, edi_destptr[-2]);
1540 asm_fstps(&state, edi_destptr-2);
1541 asm_fxch_st(&state, 1);
1542 asm_fadds(&state, edi_destptr[-1]);
1543 asm_fstps(&state, edi_destptr-1);
1544 asm_fadds(&state, dwmixfa_state.volrr);
1545 asm_fxch_st(&state, 1);
1546 asm_leal(&state, state.edi+4, &state.edi); edi_destptr+=2;
1547 asm_decl(&state, &state.ecx);
1548 asm_fadds(&state, dwmixfa_state.volrl);
1549 asm_fxch_st(&state, 1);
1550 asm_jnz(&state, mixs_nf_fill);
1551
1552 asm_fxch_st(&state, 2);
1553 asm_fld(&state, 0);
1554 asm_fmul(&state, 2, 0);
1555 asm_fxch_st(&state, 1);
1556 asm_fmul(&state, 3, 0);
1557 asm_fxch_st(&state, 1);
1558 asm_fadds(&state, dwmixfa_state.fadeleft);
1559 asm_fxch_st(&state, 1);
1560 asm_fadds(&state, dwmixfa_state.faderight);
1561 asm_fxch_st(&state, 1);
1562 asm_fstps(&state, &dwmixfa_state.fadeleft);
1563 asm_fstps(&state, &dwmixfa_state.faderight);
1564 asm_fxch_st(&state, 1);
1565 asm_movl(&state, dwmixfa_state.looptype, &state.eax); /* NOT NEEDED */
1566 asm_andl(&state, FLAG_DISABLED, &state.eax);
1567 asm_movl(&state, state.eax, &dwmixfa_state.looptype);
1568 asm_jmp(&state, mixs_nf_ende);
1569
1570 mixs_nf_loopme: /* sample loops -> jump to loop start */
1571 asm_subl(&state, dwmixfa_state.mixlooplen, &state.ebp); ebp_mirror -= dwmixfa_state.mixlooplen;
1572 /* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
1573 if (ebp_loopend == ebp_mirror)
1574 {
1575 write_cf(state.eflags, 0);
1576 write_zf(state.eflags, 1);
1577 } else if (ebp_mirror < ebp_loopend)
1578 {
1579 write_cf(state.eflags, 1);
1580 write_zf(state.eflags, 0);
1581 } else {
1582 write_cf(state.eflags, 0);
1583 write_zf(state.eflags, 0);
1584 }
1585 asm_jae(&state, mixs_nf_loopme);
1586 asm_decl(&state, &state.ecx);
1587 asm_jz(&state, mixs_nf_ende);
1588 asm_jmp(&state, mixs_nf_next);
1589 }
1590
mixm_if(float * edi_destptr,float ** eax_sample_pos,uint32_t * edx_sample_pos_fract,uint32_t ebx_sample_pitch,uint32_t esi_sample_pitch_fract,float * ebp_loopend)1591 static void mixm_if (float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend)
1592 {
1593 struct assembler_state_t state;
1594 float *ebp_mirror;
1595
1596 debug_printf("mixm_if {\n");
1597
1598 init_assembler_state(&state, writecallback, readcallback);
1599 asm_movl(&state, /*edi_destptr*/ 0x12345678, &state.edi);
1600 asm_movl(&state, /*eax_sample_pos*/0x12345678, &state.eax);
1601 asm_movl(&state, *edx_sample_pos_fract, &state.edx);
1602 asm_movl(&state, ebx_sample_pitch, &state.ebx);
1603 asm_movl(&state, esi_sample_pitch_fract, &state.esi);
1604 asm_movl(&state, /*ebp_loopend*/0x12345678, &state.ebp);
1605
1606
1607 asm_movl(&state, dwmixfa_state.nsamples, &state.ecx);
1608 asm_flds(&state, minuseins);
1609 asm_flds(&state, dwmixfa_state.voll);
1610 asm_shrl(&state, 2, &state.ebp);
1611 asm_pushl(&state, state.ebp);
1612 asm_movl(&state, state.eax, &state.ebp); ebp_mirror = *eax_sample_pos;
1613 asm_movl(&state, state.edx, &state.eax);
1614 asm_shrl(&state, 9, &state.eax);
1615 asm_shrl(&state, 2, &state.ebp);
1616 asm_orl(&state, 0x3f800000, &state.eax);
1617 asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.magic1);
1618 mixm_if_next:
1619 asm_flds(&state, ebp_mirror[0]);
1620 asm_fld(&state, 0);
1621 asm_fld(&state, 3);
1622 asm_fadds(&state, dwmixfa_state.magic1);
1623 asm_fxch_st(&state, 1);
1624 asm_fsubrs(&state, ebp_mirror[1]);
1625 asm_addl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror++;
1626 asm_leal(&state, state.edi+4, &state.edi); edi_destptr++;
1627 asm_adcl(&state, state.ebx, &state.ebp); ebp_mirror += state.ebx;
1628 asm_fmulp_st(&state, 1);
1629 asm_movl(&state, state.edx, &state.eax);
1630 asm_shrl(&state, 9, &state.eax);
1631 asm_faddp_stst(&state, 0, 1);
1632
1633 asm_fsubs(&state, dwmixfa_state.__fl1);
1634 asm_fmuls(&state, dwmixfa_state.ffrq);
1635 asm_flds(&state, dwmixfa_state.__fb1);
1636 asm_fmuls(&state, dwmixfa_state.frez);
1637 asm_faddp_stst(&state, 0, 1);
1638 asm_fsts(&state, &dwmixfa_state.__fb1);
1639 asm_fmuls(&state, dwmixfa_state.ffrq);
1640 asm_fadds(&state, dwmixfa_state.__fl1);
1641 asm_fsts(&state, &dwmixfa_state.__fl1);
1642
1643 asm_fld(&state, 1);
1644 asm_fmulp_stst(&state, 0, 1);
1645 asm_fxch_st(&state, 1);
1646 asm_fadds(&state, dwmixfa_state.volrl);
1647 asm_fxch_st(&state, 1);
1648 asm_fadds(&state, edi_destptr[-1]);
1649 asm_orl(&state, 0x3f800000, &state.eax);
1650 /*mixm_if_looped:*/
1651 /* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
1652 if (ebp_loopend == ebp_mirror)
1653 {
1654 write_cf(state.eflags, 0);
1655 write_zf(state.eflags, 1);
1656 } else if (ebp_mirror < ebp_loopend)
1657 {
1658 write_cf(state.eflags, 1);
1659 write_zf(state.eflags, 0);
1660 } else {
1661 write_cf(state.eflags, 0);
1662 write_zf(state.eflags, 0);
1663 }
1664 asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.magic1);
1665 asm_jae(&state, mixm_if_LoopHandler);
1666 asm_fstps(&state, edi_destptr-1);
1667 asm_decl(&state, &state.ecx);
1668 asm_jnz(&state, mixm_if_next);
1669 mixm_if_ende:
1670 asm_fstps(&state, &dwmixfa_state.voll);
1671 asm_fstp_st(&state, 0);
1672 asm_shll(&state, 2, &state.ebp);
1673 asm_movl(&state, state.ebp, &state.eax); *eax_sample_pos = ebp_mirror;
1674 asm_popl(&state, &state.ecx);
1675
1676 asm_movl(&state, state.edx, edx_sample_pos_fract);
1677 debug_printf("}\n");
1678 return;
1679
1680 mixm_if_LoopHandler:
1681 asm_fstps(&state, edi_destptr-1);
1682 asm_movl(&state, dwmixfa_state.looptype, &state.eax);
1683 asm_testl(&state, MIXF_LOOPED, state.eax);
1684 asm_jnz(&state, mixm_if_loopme);
1685 asm_subl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror--;
1686 asm_sbbl(&state, state.ebx, &state.ebp); ebp_mirror -= state.ebx;
1687 asm_flds(&state, *ebp_mirror);
1688 mixm_if_fill: /* sample ends -> fill rest of buffer with last sample value */
1689 asm_fld(&state, 1);
1690 asm_fmul(&state, 1, 0);
1691 asm_fadds(&state, edi_destptr[-1]);
1692 asm_fstps(&state, edi_destptr-1);
1693 asm_fxch_st(&state, 1);
1694 asm_fadds(&state, dwmixfa_state.volrl);
1695 asm_fxch_st(&state, 1);
1696 asm_leal(&state, state.edi+4, &state.edi); edi_destptr++;
1697 asm_decl(&state, &state.ecx);
1698 asm_jnz(&state, mixm_if_fill);
1699 asm_fmul(&state, 1, 0);
1700 asm_fadds(&state, dwmixfa_state.fadeleft);
1701 asm_fstps(&state, &dwmixfa_state.fadeleft);
1702
1703 asm_movl(&state, dwmixfa_state.looptype, &state.eax); /* NOT NEEDED */
1704 asm_andl(&state, FLAG_DISABLED, &state.eax);
1705 asm_movl(&state, state.eax, &dwmixfa_state.looptype);
1706 asm_jmp(&state, mixm_if_ende);
1707
1708 mixm_if_loopme: /* sample loops -> jump to loop start */
1709 asm_subl(&state, dwmixfa_state.mixlooplen, &state.ebp); ebp_mirror -= dwmixfa_state.mixlooplen;
1710 /* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
1711 if (ebp_loopend == ebp_mirror)
1712 {
1713 write_cf(state.eflags, 0);
1714 write_zf(state.eflags, 1);
1715 } else if (ebp_mirror < ebp_loopend)
1716 {
1717 write_cf(state.eflags, 1);
1718 write_zf(state.eflags, 0);
1719 } else {
1720 write_cf(state.eflags, 0);
1721 write_zf(state.eflags, 0);
1722 }
1723 asm_jae(&state, mixm_if_loopme);
1724 asm_decl(&state, &state.ecx);
1725 asm_jz(&state, mixm_if_ende);
1726 asm_jmp(&state, mixm_if_next);
1727 }
1728
mixs_if(float * edi_destptr,float ** eax_sample_pos,uint32_t * edx_sample_pos_fract,uint32_t ebx_sample_pitch,uint32_t esi_sample_pitch_fract,float * ebp_loopend)1729 static void mixs_if (float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend)
1730 {
1731 struct assembler_state_t state;
1732 float *ebp_mirror;
1733
1734 debug_printf("mixs_if {\n");
1735
1736 init_assembler_state(&state, writecallback, readcallback);
1737 asm_movl(&state, /*edi_destptr*/ 0x12345678, &state.edi);
1738 asm_movl(&state, /*eax_sample_pos*/0x12345678, &state.eax);
1739 asm_movl(&state, *edx_sample_pos_fract, &state.edx);
1740 asm_movl(&state, ebx_sample_pitch, &state.ebx);
1741 asm_movl(&state, esi_sample_pitch_fract, &state.esi);
1742 asm_movl(&state, /*ebp_loopend*/0x12345678, &state.ebp);
1743
1744
1745 asm_movl(&state, dwmixfa_state.nsamples, &state.ecx);
1746 asm_flds(&state, minuseins);
1747 asm_flds(&state, dwmixfa_state.voll);
1748 asm_flds(&state, dwmixfa_state.volr);
1749 asm_shrl(&state, 2, &state.ebp);
1750 asm_pushl(&state, state.ebp);
1751 asm_movl(&state, state.eax, &state.ebp); ebp_mirror = *eax_sample_pos;
1752 asm_movl(&state, state.edx, &state.eax);
1753 asm_shrl(&state, 9, &state.eax);
1754 asm_shrl(&state, 2, &state.ebp);
1755 asm_orl(&state, 0x3f800000, &state.eax);
1756 asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.magic1);
1757 mixs_if_next:
1758 asm_flds(&state, ebp_mirror[0]);
1759 asm_fld(&state, 0);
1760 asm_fld(&state, 4);
1761 asm_fadds(&state, dwmixfa_state.magic1);
1762 asm_fxch_st(&state, 1);
1763 asm_fsubrs(&state, ebp_mirror[1]);
1764 asm_addl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror++;
1765 asm_leal(&state, state.edi+8, &state.edi); edi_destptr+=2;
1766 asm_adcl(&state, state.ebx, &state.ebp); ebp_mirror += state.ebx;
1767 asm_fmulp_st(&state, 1);
1768 asm_movl(&state, state.edx, &state.eax);
1769 asm_shrl(&state, 9, &state.eax);
1770 asm_faddp_stst(&state, 0, 1);
1771
1772 asm_fsubs(&state, dwmixfa_state.__fl1);
1773 asm_fmuls(&state, dwmixfa_state.ffrq);
1774 asm_flds(&state, dwmixfa_state.__fb1);
1775 asm_fmuls(&state, dwmixfa_state.frez);
1776 asm_faddp_stst(&state, 0, 1);
1777 asm_fsts(&state, &dwmixfa_state.__fb1);
1778 asm_fmuls(&state, dwmixfa_state.ffrq);
1779 asm_fadds(&state, dwmixfa_state.__fl1);
1780 asm_fsts(&state, &dwmixfa_state.__fl1);
1781
1782 asm_fld(&state, 1);
1783 asm_fld(&state, 3);
1784 asm_fmul(&state, 2, 0);
1785 asm_fxch_st(&state, 4);
1786 asm_fadds(&state, dwmixfa_state.volrl);
1787 asm_fxch_st(&state, 2);
1788 asm_fmulp_stst(&state, 0, 1);
1789 asm_fxch_st(&state, 2);
1790 asm_fadds(&state, dwmixfa_state.volrr);
1791 asm_fxch_st(&state, 3);
1792 asm_fadds(&state, edi_destptr[-2]);
1793 asm_fxch_st(&state, 2);
1794 asm_fadds(&state, edi_destptr[-1]);
1795 asm_orl(&state, 0x3f800000, &state.eax);
1796 /*mixs_if_looped:*/
1797 /* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
1798 if (ebp_loopend == ebp_mirror)
1799 {
1800 write_cf(state.eflags, 0);
1801 write_zf(state.eflags, 1);
1802 } else if (ebp_mirror < ebp_loopend)
1803 {
1804 write_cf(state.eflags, 1);
1805 write_zf(state.eflags, 0);
1806 } else {
1807 write_cf(state.eflags, 0);
1808 write_zf(state.eflags, 0);
1809 }
1810 asm_movl(&state, state.eax, (uint32_t *)&dwmixfa_state.magic1);
1811 asm_jae(&state, mixs_if_LoopHandler);
1812 asm_fstps(&state, edi_destptr-1);
1813 asm_fxch_st(&state, 1);
1814 asm_fstps(&state, edi_destptr-2);
1815 asm_fxch_st(&state, 1);
1816 asm_decl(&state, &state.ecx);
1817 asm_jnz(&state, mixs_if_next);
1818 mixs_if_ende:
1819 asm_fstps(&state, &dwmixfa_state.volr);
1820 asm_fstps(&state, &dwmixfa_state.voll);
1821 asm_fstp_st(&state, 0);
1822 asm_shll(&state, 2, &state.ebp);
1823 asm_movl(&state, state.ebp, &state.eax); *eax_sample_pos = ebp_mirror;
1824 asm_popl(&state, &state.ecx);
1825
1826 asm_movl(&state, state.edx, edx_sample_pos_fract);
1827 debug_printf("}\n");
1828 return;
1829
1830 mixs_if_LoopHandler:
1831 asm_fstps(&state, edi_destptr-1);
1832 asm_fxch_st(&state, 1);
1833 asm_fstps(&state, edi_destptr-2);
1834 asm_fxch_st(&state, 1);
1835 asm_movl(&state, dwmixfa_state.looptype, &state.eax);
1836 asm_testl(&state, MIXF_LOOPED, state.eax);
1837 asm_jnz(&state, mixs_if_loopme);
1838 asm_fxch_st(&state, 2);
1839 asm_fstp_st(&state, 0);
1840 asm_subl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror--;
1841 asm_sbbl(&state, state.ebx, &state.ebp); ebp_mirror -= state.ebx;
1842 asm_flds(&state, *ebp_mirror);
1843 asm_fxch_st(&state, 2);
1844 mixs_if_fill:
1845 /* sample ends -> fill rest of buffer with last sample value */
1846 asm_fld(&state, 1);
1847 asm_fmul(&state, 3, 0);
1848 asm_fxch_st(&state, 1);
1849 asm_fld(&state, 0);
1850 asm_fmul(&state, 4, 0);
1851 asm_fxch_st(&state, 2);
1852 asm_fadds(&state, edi_destptr[-2]);
1853 asm_fstps(&state, edi_destptr-2);
1854 asm_fxch_st(&state, 1);
1855 asm_fadds(&state, edi_destptr[-1]);
1856 asm_fstps(&state, edi_destptr-1);
1857 asm_fadds(&state, dwmixfa_state.volrr);
1858 asm_fxch_st(&state, 1);
1859 asm_leal(&state, state.edi+8, &state.edi); edi_destptr+=2;
1860 asm_decl(&state, &state.ecx);
1861 asm_fadds(&state, dwmixfa_state.volrl);
1862 asm_fxch_st(&state, 1);
1863 asm_jnz(&state, mixs_if_fill);
1864 /*asm_fmul(&state, 1, 0);*/
1865 asm_fld(&state, 2);
1866 asm_fld(&state, 0);
1867 asm_fmul(&state, 3, 0);
1868 asm_fxch_st(&state, 1);
1869 asm_fmul(&state, 2, 0);
1870 asm_fxch_st(&state, 1);
1871 asm_fadds(&state, dwmixfa_state.fadeleft);
1872 asm_fxch_st(&state, 1);
1873 asm_fadds(&state, dwmixfa_state.faderight);
1874 asm_fxch_st(&state, 1);
1875 asm_fstps(&state, &dwmixfa_state.fadeleft);
1876 asm_fstps(&state, &dwmixfa_state.faderight);
1877
1878 asm_movl(&state, dwmixfa_state.looptype, &state.eax); /* NOT NEEDED */
1879 asm_andl(&state, FLAG_DISABLED, &state.eax);
1880 asm_movl(&state, state.eax, &dwmixfa_state.looptype);
1881 asm_jmp(&state, mixs_if_ende);
1882
1883 mixs_if_loopme: /* sample loops -> jump to loop start */
1884 asm_subl(&state, dwmixfa_state.mixlooplen, &state.ebp); ebp_mirror -= dwmixfa_state.mixlooplen;
1885 /* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
1886 if (ebp_loopend == ebp_mirror)
1887 {
1888 write_cf(state.eflags, 0);
1889 write_zf(state.eflags, 1);
1890 } else if (ebp_mirror < ebp_loopend)
1891 {
1892 write_cf(state.eflags, 1);
1893 write_zf(state.eflags, 0);
1894 } else {
1895 write_cf(state.eflags, 0);
1896 write_zf(state.eflags, 0);
1897 }
1898 asm_jae(&state, mixs_if_loopme);
1899 asm_decl(&state, &state.ecx);
1900 asm_jz(&state, mixs_if_ende);
1901 asm_jmp(&state, mixs_if_next);
1902 }
1903
mixm_i2f(float * edi_destptr,float ** eax_sample_pos,uint32_t * edx_sample_pos_fract,uint32_t ebx_sample_pitch,uint32_t esi_sample_pitch_fract,float * ebp_loopend)1904 static void mixm_i2f(float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend)
1905 {
1906 struct assembler_state_t state;
1907 float *ebp_mirror;
1908
1909 debug_printf("mixm_i2f {\n");
1910
1911 init_assembler_state(&state, writecallback, readcallback);
1912 asm_movl(&state, /*edi_destptr*/ 0x12345678, &state.edi);
1913 asm_movl(&state, /*eax_sample_pos*/0x12345678, &state.eax);
1914 asm_movl(&state, *edx_sample_pos_fract, &state.edx);
1915 asm_movl(&state, ebx_sample_pitch, &state.ebx);
1916 asm_movl(&state, esi_sample_pitch_fract, &state.esi);
1917 asm_movl(&state, /*ebp_loopend*/0x12345678, &state.ebp);
1918
1919
1920 asm_movl(&state, dwmixfa_state.nsamples, &state.ecx);
1921 asm_flds(&state, dwmixfa_state.voll);
1922 asm_shrl(&state, 2, &state.ebp);
1923 asm_pushl(&state, state.ebp);
1924 asm_movl(&state, state.eax, &state.ebp); ebp_mirror = *eax_sample_pos;
1925 asm_shrl(&state, 2, &state.ebp);
1926 asm_movl(&state, state.edx, &state.eax);
1927 asm_shrl(&state, 24, &state.eax);
1928 mixm_i2f_next:
1929 asm_flds(&state, ebp_mirror[0]);
1930 asm_fmuls(&state, dwmixfa_state.ct0[state.eax]);
1931 asm_flds(&state, ebp_mirror[1]);
1932 asm_fmuls(&state, dwmixfa_state.ct1[state.eax]);
1933 asm_flds(&state, ebp_mirror[2]);
1934 asm_fmuls(&state, dwmixfa_state.ct2[state.eax]);
1935 asm_flds(&state, ebp_mirror[3]);
1936 asm_fmuls(&state, dwmixfa_state.ct3[state.eax]);
1937 asm_fxch_st(&state, 2);
1938 asm_faddp_stst(&state, 0, 3);
1939 asm_addl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror++;
1940 asm_leal(&state, state.edi+4, &state.edi); edi_destptr++;
1941 asm_faddp_stst(&state, 0, 2);
1942 asm_adcl(&state, state.ebx, &state.ebp); ebp_mirror += state.ebx;
1943 asm_movl(&state, state.edx, &state.eax);
1944 asm_faddp_stst(&state, 0, 1);
1945
1946 asm_fsubs(&state, dwmixfa_state.__fl1);
1947 asm_fmuls(&state, dwmixfa_state.ffrq);
1948 asm_flds(&state, dwmixfa_state.__fb1);
1949 asm_fmuls(&state, dwmixfa_state.frez);
1950 asm_faddp_stst(&state, 0, 1);
1951 asm_fsts(&state, &dwmixfa_state.__fb1);
1952 asm_fmuls(&state, dwmixfa_state.ffrq);
1953 asm_fadds(&state, dwmixfa_state.__fl1);
1954 asm_fsts(&state, &dwmixfa_state.__fl1);
1955
1956 asm_shrl(&state, 24, &state.eax);
1957 asm_fld(&state, 1);
1958 asm_fmulp_stst(&state, 0, 1);
1959 asm_fxch_st(&state, 1);
1960 asm_fadds(&state, dwmixfa_state.volrl);
1961 asm_fxch_st(&state, 1);
1962 asm_fadds(&state, edi_destptr[-1]);
1963 /*mixm_i2f_looped:*/
1964 /* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
1965 if (ebp_loopend == ebp_mirror)
1966 {
1967 write_cf(state.eflags, 0);
1968 write_zf(state.eflags, 1);
1969 } else if (ebp_mirror < ebp_loopend)
1970 {
1971 write_cf(state.eflags, 1);
1972 write_zf(state.eflags, 0);
1973 } else {
1974 write_cf(state.eflags, 0);
1975 write_zf(state.eflags, 0);
1976 }
1977 asm_jae(&state, mixm_i2f_LoopHandler);
1978 asm_fstps(&state, edi_destptr-1);
1979 asm_decl(&state, &state.ecx);
1980 asm_jnz(&state, mixm_i2f_next);
1981 mixm_i2f_ende:
1982 asm_fstps(&state, &dwmixfa_state.voll);
1983 asm_shll(&state, 2, &state.ebp);
1984 asm_movl(&state, state.ebp, &state.eax); *eax_sample_pos = ebp_mirror;
1985 asm_popl(&state, &state.ecx);
1986
1987 asm_movl(&state, state.edx, edx_sample_pos_fract);
1988 debug_printf("}\n");
1989 return;
1990
1991 mixm_i2f_LoopHandler:
1992 asm_fstps(&state, edi_destptr-1);
1993 asm_pushl(&state, state.eax);
1994 asm_movl(&state, dwmixfa_state.looptype, &state.eax);
1995 asm_testl(&state, MIXF_LOOPED, state.eax);
1996 asm_jnz(&state, mixm_i2f_loopme);
1997 asm_popl(&state, &state.eax);
1998 asm_subl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror--;
1999 asm_sbbl(&state, state.ebx, &state.ebp); ebp_mirror -= state.ebx;
2000 asm_flds(&state, *ebp_mirror);
2001 mixm_i2f_fill: /* sample ends -> fill rest of buffer with last sample value */
2002 asm_fld(&state, 1);
2003 asm_fmul(&state, 1, 0);
2004 asm_fadds(&state, edi_destptr[-1]);
2005 asm_fstps(&state, edi_destptr-1);
2006 asm_fxch_st(&state, 1);
2007 asm_fadds(&state, dwmixfa_state.volrl);
2008 asm_fxch_st(&state, 1);
2009 asm_leal(&state, state.edi+4, &state.edi); edi_destptr++;
2010 asm_decl(&state, &state.ecx);
2011 asm_jnz(&state, mixm_i2f_fill);
2012 asm_fmul(&state, 1, 0);
2013 asm_fadds(&state, dwmixfa_state.fadeleft);
2014 asm_fstps(&state, &dwmixfa_state.fadeleft);
2015
2016 asm_movl(&state, dwmixfa_state.looptype, &state.eax); /* NOT NEEDED */
2017 asm_andl(&state, FLAG_DISABLED, &state.eax);
2018 asm_movl(&state, state.eax, &dwmixfa_state.looptype);
2019 asm_jmp(&state, mixm_i2f_ende);
2020
2021 mixm_i2f_loopme: /* sample loops -> jump to loop start */
2022 asm_subl(&state, dwmixfa_state.mixlooplen, &state.ebp); ebp_mirror -= dwmixfa_state.mixlooplen;
2023 /* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
2024 if (ebp_loopend == ebp_mirror)
2025 {
2026 write_cf(state.eflags, 0);
2027 write_zf(state.eflags, 1);
2028 } else if (ebp_mirror < ebp_loopend)
2029 {
2030 write_cf(state.eflags, 1);
2031 write_zf(state.eflags, 0);
2032 } else {
2033 write_cf(state.eflags, 0);
2034 write_zf(state.eflags, 0);
2035 }
2036 asm_jae(&state, mixm_i2f_loopme);
2037 asm_decl(&state, &state.ecx);
2038 asm_jz(&state, mixm_i2f_ende);
2039 asm_jmp(&state, mixm_i2f_next);
2040 }
2041
mixs_i2f(float * edi_destptr,float ** eax_sample_pos,uint32_t * edx_sample_pos_fract,uint32_t ebx_sample_pitch,uint32_t esi_sample_pitch_fract,float * ebp_loopend)2042 static void mixs_i2f(float *edi_destptr, float **eax_sample_pos, uint32_t *edx_sample_pos_fract, uint32_t ebx_sample_pitch, uint32_t esi_sample_pitch_fract, float *ebp_loopend)
2043 {
2044 struct assembler_state_t state;
2045 float *ebp_mirror;
2046
2047 debug_printf("mixs_i2f {\n");
2048
2049 init_assembler_state(&state, writecallback, readcallback);
2050 asm_movl(&state, /*edi_destptr*/ 0x12345678, &state.edi);
2051 asm_movl(&state, /*eax_sample_pos*/0x12345678, &state.eax);
2052 asm_movl(&state, *edx_sample_pos_fract, &state.edx);
2053 asm_movl(&state, ebx_sample_pitch, &state.ebx);
2054 asm_movl(&state, esi_sample_pitch_fract, &state.esi);
2055 asm_movl(&state, /*ebp_loopend*/0x12345678, &state.ebp);
2056
2057
2058 asm_movl(&state, dwmixfa_state.nsamples, &state.ecx);
2059 asm_flds(&state, dwmixfa_state.voll);
2060 asm_flds(&state, dwmixfa_state.volr);
2061 asm_shrl(&state, 2, &state.ebp);
2062
2063 asm_pushl(&state, state.ebp);
2064
2065 asm_movl(&state, state.eax, &state.ebp); ebp_mirror = *eax_sample_pos;
2066 asm_shrl(&state, 2, &state.ebp);
2067 asm_movl(&state, state.edx, &state.eax);
2068 asm_shrl(&state, 24, &state.eax);
2069
2070 mixs_i2f_next:
2071 asm_flds(&state, ebp_mirror[0]);
2072 asm_fmuls(&state, dwmixfa_state.ct0[state.eax]);
2073 asm_flds(&state, ebp_mirror[1]);
2074 asm_fmuls(&state, dwmixfa_state.ct1[state.eax]);
2075 asm_flds(&state, ebp_mirror[2]);
2076 asm_fmuls(&state, dwmixfa_state.ct2[state.eax]);
2077 asm_flds(&state, ebp_mirror[3]);
2078 asm_fmuls(&state, dwmixfa_state.ct3[state.eax]);
2079 asm_fxch_st(&state, 2);
2080 asm_faddp_stst(&state, 0, 3);
2081 asm_addl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror++;
2082 asm_leal(&state, state.edi+8, &state.edi); edi_destptr+=2;
2083 asm_faddp_stst(&state, 0, 2);
2084 asm_adcl(&state, state.ebx, &state.ebp); ebp_mirror += state.ebx;
2085 asm_movl(&state, state.edx, &state.eax);
2086 asm_faddp_stst(&state, 0, 1);
2087
2088 asm_fsubs(&state, dwmixfa_state.__fl1);
2089 asm_fmuls(&state, dwmixfa_state.ffrq);
2090 asm_flds(&state, dwmixfa_state.__fb1);
2091 asm_fmuls(&state, dwmixfa_state.frez);
2092 asm_faddp_stst(&state, 0, 1);
2093 asm_fsts(&state, &dwmixfa_state.__fb1);
2094 asm_fmuls(&state, dwmixfa_state.ffrq);
2095 asm_fadds(&state, dwmixfa_state.__fl1);
2096 asm_fsts(&state, &dwmixfa_state.__fl1);
2097
2098 asm_shrl(&state, 24, &state.eax);
2099 asm_fld(&state, 1);
2100 asm_fld(&state, 3);
2101 asm_fmul(&state, 2, 0);
2102 asm_fxch_st(&state, 4);
2103 asm_fadds(&state, dwmixfa_state.volrl);
2104 asm_fxch_st(&state, 2);
2105 asm_fmulp_stst(&state, 0, 1);
2106 asm_fxch_st(&state, 2);
2107 asm_fadds(&state, dwmixfa_state.volrr);
2108 asm_fxch_st(&state, 3);
2109 asm_fadds(&state, edi_destptr[-2]);
2110 asm_fxch_st(&state, 2);
2111 asm_fadds(&state, edi_destptr[-1]);
2112 /*mixs_i2f_looped:*/
2113 /* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
2114 if (ebp_loopend == ebp_mirror)
2115 {
2116 write_cf(state.eflags, 0);
2117 write_zf(state.eflags, 1);
2118 } else if (ebp_mirror < ebp_loopend)
2119 {
2120 write_cf(state.eflags, 1);
2121 write_zf(state.eflags, 0);
2122 } else {
2123 write_cf(state.eflags, 0);
2124 write_zf(state.eflags, 0);
2125 }
2126 asm_jae(&state, mixs_i2f_LoopHandler);
2127 asm_fstps(&state, edi_destptr-1);
2128 asm_fxch_st(&state, 1);
2129 asm_fstps(&state, edi_destptr-2);
2130 asm_fxch_st(&state, 1);
2131 asm_decl(&state, &state.ecx);
2132 asm_jnz(&state, mixs_i2f_next);
2133 mixs_i2f_ende:
2134 asm_fstps(&state, &dwmixfa_state.volr);
2135 asm_fstps(&state, &dwmixfa_state.voll);
2136 asm_shll(&state, 2, &state.ebp);
2137 asm_movl(&state, state.ebp, &state.eax); *eax_sample_pos = ebp_mirror;
2138 asm_popl(&state, &state.ecx);
2139
2140 asm_movl(&state, state.edx, edx_sample_pos_fract);
2141 debug_printf("}\n");
2142 return;
2143
2144 mixs_i2f_LoopHandler:
2145 asm_fstps(&state, edi_destptr-1);
2146 asm_fxch_st(&state, 1);
2147 asm_fstps(&state, edi_destptr-2);
2148 asm_fxch_st(&state, 1);
2149 asm_pushl(&state, state.eax);
2150 asm_movl(&state, dwmixfa_state.looptype, &state.eax);
2151 asm_testl(&state, MIXF_LOOPED, state.eax);
2152 asm_jnz(&state, mixs_i2f_loopme);
2153 asm_popl(&state, &state.eax);
2154 asm_fxch_st(&state, 1);
2155 asm_subl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror--;
2156 asm_sbbl(&state, state.ebx, &state.ebp); ebp_mirror -= state.ebx;
2157 asm_flds(&state, *ebp_mirror);
2158 asm_fxch_st(&state, 2);
2159 mixs_i2f_fill:
2160 /* sample ends -> fill rest of buffer with last sample value */
2161 asm_fld(&state, 1);
2162 asm_fmul(&state, 3, 0);
2163 asm_fxch_st(&state, 1);
2164 asm_fld(&state, 0);
2165 asm_fmul(&state, 4, 0);
2166 asm_fxch_st(&state, 2);
2167 asm_fadds(&state, edi_destptr[-2]);
2168 asm_fstps(&state, edi_destptr-2);
2169 asm_fxch_st(&state, 1);
2170 asm_fadds(&state, edi_destptr[-1]);
2171 asm_fstps(&state, edi_destptr-1);
2172 asm_fadds(&state, dwmixfa_state.volrr);
2173 asm_fxch_st(&state, 1);
2174 asm_leal(&state, state.edi+8, &state.edi); edi_destptr+=2;
2175 asm_decl(&state, &state.ecx);
2176 asm_fadds(&state, dwmixfa_state.volrl);
2177 asm_fxch_st(&state, 1);
2178 asm_jnz(&state, mixs_i2f_fill);
2179
2180 asm_fxch_st(&state, 2);
2181 asm_fld(&state, 0);
2182 asm_fmul(&state, 2, 0);
2183 asm_fxch_st(&state, 1);
2184 asm_fmul(&state, 3, 0);
2185 asm_fxch_st(&state, 1);
2186 asm_fadds(&state, dwmixfa_state.fadeleft);
2187 asm_fxch_st(&state, 1);
2188 asm_fadds(&state, dwmixfa_state.faderight);
2189 asm_fxch_st(&state, 1);
2190 asm_fstps(&state, &dwmixfa_state.fadeleft);
2191 asm_fstps(&state, &dwmixfa_state.faderight);
2192 asm_fxch_st(&state, 1);
2193 asm_movl(&state, dwmixfa_state.looptype, &state.eax); /* NOT NEEDED */
2194 asm_andl(&state, FLAG_DISABLED, &state.eax);
2195 asm_movl(&state, state.eax, &dwmixfa_state.looptype);
2196 asm_jmp(&state, mixs_i2f_ende);
2197
2198 mixs_i2f_loopme: /* sample loops -> jump to loop start */
2199 asm_subl(&state, dwmixfa_state.mixlooplen, &state.ebp); ebp_mirror -= dwmixfa_state.mixlooplen;
2200 /* asm_cmpl(&state, (%esp) has ebp_loopend, &state.ebp);*/
2201 if (ebp_loopend == ebp_mirror)
2202 {
2203 write_cf(state.eflags, 0);
2204 write_zf(state.eflags, 1);
2205 } else if (ebp_mirror < ebp_loopend)
2206 {
2207 write_cf(state.eflags, 1);
2208 write_zf(state.eflags, 0);
2209 } else {
2210 write_cf(state.eflags, 0);
2211 write_zf(state.eflags, 0);
2212 }
2213 asm_jae(&state, mixs_i2f_loopme);
2214 asm_decl(&state, &state.ecx);
2215 asm_jz(&state, mixs_i2f_ende);
2216 asm_jmp(&state, mixs_i2f_next);
2217 }
2218
clip_16s(float * input,void * output,uint_fast32_t count)2219 static void clip_16s(float *input, void *output, uint_fast32_t count)
2220 {
2221 struct assembler_state_t state;
2222 float *esi_mirror;
2223 uint16_t *edi_mirror;
2224
2225 debug_printf("clip_16s {\n");
2226
2227 init_assembler_state(&state, writecallback, readcallback);
2228 asm_movl(&state, /*input*/ 0x12345678, &state.esi); esi_mirror = input;
2229 asm_movl(&state, /*_output*/0x87654321, &state.edi); edi_mirror = output;
2230 asm_movl(&state, count, &state.ecx);
2231
2232 asm_flds(&state, clampmin);
2233 asm_flds(&state, clampmax);
2234 asm_movw(&state, 32767, &state.bx);
2235 asm_movw(&state, -32768, &state.dx);
2236
2237 clip_16s_lp:
2238 asm_flds(&state, *esi_mirror);
2239 asm_fcom_st(&state, 1);
2240 asm_fnstsw(&state, &state.ax);
2241 asm_sahf(&state);
2242 asm_ja(&state, clip_16s_max);
2243 asm_fcom_st(&state, 2);
2244 asm_fstsw(&state, &state.ax);
2245 asm_sahf(&state);
2246 asm_jb(&state, clip_16s_min);
2247 asm_fistps(&state, edi_mirror);
2248 clip_16s_next:
2249 asm_addl(&state, 4, &state.esi); esi_mirror++;
2250 asm_addl(&state, 2, &state.edi); edi_mirror++;
2251 asm_decl(&state, &state.ecx);
2252 asm_jnz(&state, clip_16s_lp);
2253 asm_jmp(&state, clip_16s_ende);
2254 clip_16s_max:
2255 asm_fstp_st(&state, 0);
2256 asm_movw(&state, state.bx, edi_mirror);
2257 asm_jmp(&state, clip_16s_next);
2258
2259 clip_16s_min:
2260 asm_fstp_st(&state, 0);
2261 asm_movw(&state, state.dx, edi_mirror);
2262 asm_jmp(&state, clip_16s_next);
2263
2264 clip_16s_ende:
2265 asm_fstp_st(&state, 0);
2266 asm_fstp_st(&state, 0);
2267 debug_printf("}\n");
2268 }
2269
clip_16u(float * input,void * output,uint_fast32_t count)2270 static void clip_16u(float *input, void *output, uint_fast32_t count)
2271 {
2272 struct assembler_state_t state;
2273 float *esi_mirror;
2274 uint16_t *edi_mirror;
2275
2276 debug_printf("clip_16u {\n");
2277
2278 init_assembler_state(&state, writecallback, readcallback);
2279 asm_movl(&state, /*input*/ 0x12345678, &state.esi); esi_mirror = input;
2280 asm_movl(&state, /*_output*/0x87654321, &state.edi); edi_mirror = output;
2281 asm_movl(&state, count, &state.ecx);
2282
2283 asm_flds(&state, clampmin);
2284 asm_flds(&state, clampmax);
2285 asm_movw(&state, 32767, &state.bx);
2286 asm_movw(&state, -32768, &state.dx);
2287
2288 clip_16u_lp:
2289 asm_flds(&state, *esi_mirror);
2290 asm_fcom_st(&state, 1);
2291 asm_fnstsw(&state, &state.ax);
2292 asm_sahf(&state);
2293 asm_ja(&state, clip_16u_max);
2294 asm_fcom_st(&state, 2);
2295 asm_fstsw(&state, &state.ax);
2296 asm_sahf(&state);
2297 asm_jb(&state, clip_16u_min);
2298 asm_fistps(&state, &dwmixfa_state.clipval);
2299 asm_movw(&state, dwmixfa_state.clipval, &state.ax);
2300 clip_16u_next:
2301 asm_xorw(&state, 0x8000, &state.ax);
2302 asm_movw(&state, state.ax, edi_mirror);
2303 asm_addl(&state, 4, &state.esi); esi_mirror++;
2304 asm_addl(&state, 2, &state.edi); edi_mirror++;
2305 asm_decl(&state, &state.ecx);
2306 asm_jnz(&state, clip_16u_lp);
2307 asm_jmp(&state, clip_16u_ende);
2308 clip_16u_max:
2309 asm_fstp_st(&state, 0);
2310 asm_movw(&state, state.bx, &state.ax);
2311 asm_jmp(&state, clip_16u_next);
2312
2313 clip_16u_min:
2314 asm_fstp_st(&state, 0);
2315 asm_movw(&state, state.bx, &state.ax);
2316 asm_jmp(&state, clip_16u_next);
2317
2318 clip_16u_ende:
2319 asm_fstp_st(&state, 0);
2320 asm_fstp_st(&state, 0);
2321 debug_printf("}\n");
2322 }
2323
clip_8s(float * input,void * output,uint_fast32_t count)2324 static void clip_8s(float *input, void *output, uint_fast32_t count)
2325 {
2326 struct assembler_state_t state;
2327 float *esi_mirror;
2328 uint8_t *edi_mirror;
2329
2330 debug_printf("clip_8s {\n");
2331
2332 init_assembler_state(&state, writecallback, readcallback);
2333 asm_movl(&state, /*input*/ 0x12345678, &state.esi); esi_mirror = input;
2334 asm_movl(&state, /*_output*/0x87654321, &state.edi); edi_mirror = output;
2335 asm_movl(&state, count, &state.ecx);
2336
2337 asm_flds(&state, clampmin);
2338 asm_flds(&state, clampmax);
2339 asm_movw(&state, 32767, &state.bx);
2340 asm_movw(&state, -32768, &state.dx);
2341
2342 clip_8s_lp:
2343 asm_flds(&state, *esi_mirror);
2344 asm_fcom_st(&state, 1);
2345 asm_fnstsw(&state, &state.ax);
2346 asm_sahf(&state);
2347 asm_ja(&state, clip_8s_max);
2348 asm_fcom_st(&state, 2);
2349 asm_fstsw(&state, &state.ax);
2350 asm_sahf(&state);
2351 asm_jb(&state, clip_8s_min);
2352 asm_fistps(&state, &dwmixfa_state.clipval);
2353 asm_movw(&state, dwmixfa_state.clipval, &state.ax);
2354 clip_8s_next:
2355 asm_movb(&state, state.ah, edi_mirror);
2356 asm_addl(&state, 4, &state.esi); esi_mirror++;
2357 asm_addl(&state, 1, &state.edi); edi_mirror++;
2358 asm_decl(&state, &state.ecx);
2359 asm_jnz(&state, clip_8s_lp);
2360 asm_jmp(&state, clip_8s_ende);
2361 clip_8s_max:
2362 asm_fstp_st(&state, 0);
2363 asm_movw(&state, state.bx, &state.ax);
2364 asm_jmp(&state, clip_8s_next);
2365
2366 clip_8s_min:
2367 asm_fstp_st(&state, 0);
2368 asm_movw(&state, state.dx, &state.ax);
2369 asm_jmp(&state, clip_8s_next);
2370
2371 clip_8s_ende:
2372 asm_fstp_st(&state, 0);
2373 asm_fstp_st(&state, 0);
2374 debug_printf("}\n");
2375 }
2376
clip_8u(float * input,void * output,uint_fast32_t count)2377 static void clip_8u(float *input, void *output, uint_fast32_t count)
2378 {
2379 struct assembler_state_t state;
2380 float *esi_mirror;
2381 uint8_t *edi_mirror;
2382
2383 debug_printf("clip_8u {\n");
2384
2385 init_assembler_state(&state, writecallback, readcallback);
2386 asm_movl(&state, /*input*/ 0x12345678, &state.esi); esi_mirror = input;
2387 asm_movl(&state, /*_output*/0x87654321, &state.edi); edi_mirror = output;
2388 asm_movl(&state, count, &state.ecx);
2389
2390 asm_flds(&state, clampmin);
2391 asm_flds(&state, clampmax);
2392 asm_movw(&state, 32767, &state.bx);
2393 asm_movw(&state, -32768, &state.dx);
2394
2395 clip_8u_lp:
2396 asm_flds(&state, *esi_mirror);
2397 asm_fcom_st(&state, 1);
2398 asm_fnstsw(&state, &state.ax);
2399 asm_sahf(&state);
2400 asm_ja(&state, clip_8u_max);
2401 asm_fcom_st(&state, 2);
2402 asm_fstsw(&state, &state.ax);
2403 asm_sahf(&state);
2404 asm_jb(&state, clip_8u_min);
2405 asm_fistps(&state, &dwmixfa_state.clipval);
2406 asm_movw(&state, dwmixfa_state.clipval, &state.ax);
2407 clip_8u_next:
2408 asm_xorw(&state, 0x8000, &state.ax);
2409 asm_movb(&state, state.ah, edi_mirror);
2410 asm_addl(&state, 4, &state.esi); esi_mirror++;
2411 asm_addl(&state, 1, &state.edi); edi_mirror++;
2412 asm_decl(&state, &state.ecx);
2413 asm_jnz(&state, clip_8u_lp);
2414 asm_jmp(&state, clip_8u_ende);
2415 clip_8u_max:
2416 asm_fstp_st(&state, 0);
2417 asm_movw(&state, state.bx, &state.ax);
2418 asm_jmp(&state, clip_8u_next);
2419
2420 clip_8u_min:
2421 asm_fstp_st(&state, 0);
2422 asm_movw(&state, state.dx, &state.ax);
2423 asm_jmp(&state, clip_8u_next);
2424
2425 clip_8u_ende:
2426 asm_fstp_st(&state, 0);
2427 asm_fstp_st(&state, 0);
2428 debug_printf("}\n");
2429 }
2430
getchanvol(int n,int len)2431 void getchanvol (int n, int len)
2432 {
2433 struct assembler_state_t state;
2434
2435 float *ebp_mirror;
2436 float *edi_mirror;
2437
2438 debug_printf("getchanvol {\n");
2439
2440 init_assembler_state(&state, writecallback, readcallback);
2441
2442 state.ecx = len; /* assembler entry config */
2443
2444 asm_pushl(&state, state.ebp);
2445 asm_fldz(&state);
2446 asm_movl(&state, state.ecx, &dwmixfa_state.nsamples);
2447
2448 asm_movl(&state, dwmixfa_state.voiceflags[state.eax], &state.ebx);
2449 asm_testl(&state, MIXF_PLAYING, state.ebx);
2450 asm_jz(&state, getchanvol_SkipVoice);
2451 asm_movl(&state, dwmixfa_state.looplen[state.eax], &state.ebx);
2452 asm_movl(&state, state.ebx, &dwmixfa_state.mixlooplen);
2453 asm_movl(&state, dwmixfa_state.freqw[state.eax], &state.ebx);
2454 asm_movl(&state, dwmixfa_state.freqf[state.eax], &state.esi);
2455 asm_movl(&state, dwmixfa_state.smpposf[state.eax], &state.edx);
2456 asm_movl(&state, /*loopend[state.eax]*/0x12345678, &state.edi); edi_mirror = dwmixfa_state.loopend[state.eax];
2457 asm_shrl(&state, 2, &state.edi); /* this is fucked up logic :-p */
2458 asm_movl(&state, /*smpposw[state.eax]*/0x87654321, &state.ebp); ebp_mirror = dwmixfa_state.smpposw[state.eax];
2459 asm_shrl(&state, 2, &state.ebp); /* this is fucked up logic :-p */
2460 /*getchanvol_next:*/
2461 asm_flds(&state, *ebp_mirror); /* (,%ebp,4)*/
2462 asm_testl(&state, 0x80000000, *(uint32_t *)ebp_mirror); /* sign og *ebp_mirror */
2463 asm_jnz(&state, getchanvol_neg);
2464 asm_faddp_stst(&state, 0, 1);
2465 asm_jmp(&state, getchanvol_goon);
2466 getchanvol_neg:
2467 asm_fsubp_stst(&state, 0, 1);
2468 getchanvol_goon:
2469 asm_addl(&state, state.esi, &state.edx); if (read_cf(state.eflags)) ebp_mirror++;
2470 asm_adcl(&state, state.ebx, &state.ebp); ebp_mirror += state.ebx;
2471 getchanvol_looped:
2472 /*
2473 asm_cmpl(&state, state.edi, state.ebp);*/
2474 if (ebp_mirror == edi_mirror)
2475 {
2476 write_cf(state.eflags, 0);
2477 write_zf(state.eflags, 1);
2478 } else if (ebp_mirror>edi_mirror) /* pos > loopend */
2479 {
2480 write_cf(state.eflags, 1);
2481 write_zf(state.eflags, 0);
2482 } else {
2483 write_cf(state.eflags, 0);
2484 write_zf(state.eflags, 0);
2485 }
2486 asm_decl(&state, &state.ecx);
2487 asm_jnz(&state, getchanvol_LoopHandler);
2488 asm_jmp(&state, getchanvol_SkipVoice);
2489 getchanvol_LoopHandler:
2490 asm_testl(&state, MIXF_LOOPED, dwmixfa_state.voiceflags[state.eax]);
2491 asm_jz(&state, getchanvol_SkipVoice);
2492 asm_subl(&state, dwmixfa_state.looplen[state.eax], &state.ebp); ebp_mirror -= dwmixfa_state.looplen[state.eax];
2493 asm_jmp(&state, getchanvol_looped);
2494 getchanvol_SkipVoice:
2495 asm_fidivl(&state, dwmixfa_state.nsamples);
2496 asm_fldx(&state, read_fpu_st(&state, 0));
2497 asm_fmuls(&state, dwmixfa_state.volleft[state.eax]);
2498 asm_fstps(&state, &dwmixfa_state.voll);
2499 asm_fmuls(&state, dwmixfa_state.volright[state.eax]);
2500 asm_fstps(&state, &dwmixfa_state.volr);
2501
2502 asm_popl(&state, &state.ebp);
2503 debug_printf("}\n");
2504 }
2505