1 /*
2  * Plain Intel IA32 assembly implementations of PortAudio sample converter functions.
3  * Copyright (c) 1999-2002 Ross Bencina, Phil Burk
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining
6  * a copy of this software and associated documentation files
7  * (the "Software"), to deal in the Software without restriction,
8  * including without limitation the rights to use, copy, modify, merge,
9  * publish, distribute, sublicense, and/or sell copies of the Software,
10  * and to permit persons to whom the Software is furnished to do so,
11  * subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be
14  * included in all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
21  * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  */
24 
25 /*
26  * The text above constitutes the entire PortAudio license; however,
27  * the PortAudio community also makes the following non-binding requests:
28  *
29  * Any person wishing to distribute modifications to the Software is
30  * requested to send the modifications to the original developer so that
31  * they can be incorporated into the canonical version. It is also
32  * requested that these non-binding requests be included along with the
33  * license above.
34  */
35 
36 /** @file
37  @ingroup win_src
38 */
39 
40 #include "pa_x86_plain_converters.h"
41 
42 #include "pa_converters.h"
43 #include "pa_dither.h"
44 
45 /*
46     the main reason these versions are faster than the equivalent C versions
47     is that float -> int casting is expensive in C on x86 because the rounding
48     mode needs to be changed for every cast. these versions only set
49     the rounding mode once outside the loop.
50 
51     small additional speed gains are made by the way that clamping is
52     implemented.
53 
54 TODO:
55     o- inline dither code
56     o- implement Dither only (no-clip) versions
57     o- implement int8 and uint8 versions
58     o- test thouroughly
59 
60     o- the packed 24 bit functions could benefit from unrolling and avoiding
61         byte and word sized register access.
62 */
63 
64 /* -------------------------------------------------------------------------- */
65 
66 /*
67 #define PA_CLIP_( val, min, max )\
68     { val = ((val) < (min)) ? (min) : (((val) > (max)) ? (max) : (val)); }
69 */
70 
71 /*
72     the following notes were used to determine whether a floating point
73     value should be saturated (ie >1 or <-1) by loading it into an integer
74     register. these should be rewritten so that they make sense.
75 
76     an ieee floating point value
77 
78     1.xxxxxxxxxxxxxxxxxxxx?
79 
80 
81     is less than  or equal to 1 and greater than or equal to -1 either:
82 
83         if the mantissa is 0 and the unbiased exponent is 0
84 
85         OR
86 
87         if the unbiased exponent < 0
88 
89     this translates to:
90 
91         if the mantissa is 0 and the biased exponent is 7F
92 
93         or
94 
95         if the biased exponent is less than 7F
96 
97 
98     therefore the value is greater than 1 or less than -1 if
99 
100         the mantissa is not 0 and the biased exponent is 7F
101 
102         or
103 
104         if the biased exponent is greater than 7F
105 
106 
107     in other words, if we mask out the sign bit, the value is
108     greater than 1 or less than -1 if its integer representation is greater than:
109 
110     0 01111111 0000 0000 0000 0000 0000 000
111 
112     0011 1111 1000 0000 0000 0000 0000 0000 => 0x3F800000
113 */
114 
115 #if defined(_WIN64) || defined(_WIN32_WCE)
116 
117 /*
118 	-EMT64/AMD64 uses different asm
119 	-VC2005 doesnt allow _WIN64 with inline assembly either!
120  */
PaUtil_InitializeX86PlainConverters(void)121 void PaUtil_InitializeX86PlainConverters( void )
122 {
123 }
124 
125 #else
126 
127 /* -------------------------------------------------------------------------- */
128 
129 static const short fpuControlWord_ = 0x033F; /*round to nearest, 64 bit precision, all exceptions masked*/
130 static const double int32Scaler_ = 0x7FFFFFFF;
131 static const double ditheredInt32Scaler_ = 0x7FFFFFFE;
132 static const double int24Scaler_ = 0x7FFFFF;
133 static const double ditheredInt24Scaler_ = 0x7FFFFE;
134 static const double int16Scaler_ = 0x7FFF;
135 static const double ditheredInt16Scaler_ = 0x7FFE;
136 
137 #define PA_DITHER_BITS_   (15)
138 /* Multiply by PA_FLOAT_DITHER_SCALE_ to get a float between -2.0 and +1.99999 */
139 #define PA_FLOAT_DITHER_SCALE_  (1.0F / ((1<<PA_DITHER_BITS_)-1))
140 static const float const_float_dither_scale_ = PA_FLOAT_DITHER_SCALE_;
141 #define PA_DITHER_SHIFT_  ((32 - PA_DITHER_BITS_) + 1)
142 
143 /* -------------------------------------------------------------------------- */
144 
Float32_To_Int32(void * destinationBuffer,signed int destinationStride,void * sourceBuffer,signed int sourceStride,unsigned int count,PaUtilTriangularDitherGenerator * ditherGenerator)145 static void Float32_To_Int32(
146     void *destinationBuffer, signed int destinationStride,
147     void *sourceBuffer, signed int sourceStride,
148     unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
149 {
150 /*
151     float *src = (float*)sourceBuffer;
152     signed long *dest =  (signed long*)destinationBuffer;
153     (void)ditherGenerator; // unused parameter
154 
155     while( count-- )
156     {
157         // REVIEW
158         double scaled = *src * 0x7FFFFFFF;
159         *dest = (signed long) scaled;
160 
161         src += sourceStride;
162         dest += destinationStride;
163     }
164 */
165 
166     short savedFpuControlWord;
167 
168     (void) ditherGenerator; /* unused parameter */
169 
170 
171     __asm{
172         // esi -> source ptr
173         // eax -> source byte stride
174         // edi -> destination ptr
175         // ebx -> destination byte stride
176         // ecx -> source end ptr
177         // edx -> temp
178 
179         mov     esi, sourceBuffer
180 
181         mov     edx, 4                  // sizeof float32 and int32
182         mov     eax, sourceStride
183         imul    eax, edx
184 
185         mov     ecx, count
186         imul    ecx, eax
187         add     ecx, esi
188 
189         mov     edi, destinationBuffer
190 
191         mov     ebx, destinationStride
192         imul    ebx, edx
193 
194         fwait
195         fstcw   savedFpuControlWord
196         fldcw   fpuControlWord_
197 
198         fld     int32Scaler_             // stack:  (int)0x7FFFFFFF
199 
200     Float32_To_Int32_loop:
201 
202         // load unscaled value into st(0)
203         fld     dword ptr [esi]         // stack:  value, (int)0x7FFFFFFF
204         add     esi, eax                // increment source ptr
205         //lea     esi, [esi+eax]
206         fmul    st(0), st(1)            // st(0) *= st(1), stack:  value*0x7FFFFFFF, (int)0x7FFFFFFF
207         /*
208             note: we could store to a temporary qword here which would cause
209             wraparound distortion instead of int indefinite 0x10. that would
210             be more work, and given that not enabling clipping is only advisable
211             when you know that your signal isn't going to clip it isn't worth it.
212         */
213         fistp   dword ptr [edi]         // pop st(0) into dest, stack:  (int)0x7FFFFFFF
214 
215         add     edi, ebx                // increment destination ptr
216         //lea     edi, [edi+ebx]
217 
218         cmp     esi, ecx                // has src ptr reached end?
219         jne     Float32_To_Int32_loop
220 
221         ffree   st(0)
222         fincstp
223 
224         fwait
225         fnclex
226         fldcw   savedFpuControlWord
227     }
228 }
229 
230 /* -------------------------------------------------------------------------- */
231 
Float32_To_Int32_Clip(void * destinationBuffer,signed int destinationStride,void * sourceBuffer,signed int sourceStride,unsigned int count,PaUtilTriangularDitherGenerator * ditherGenerator)232 static void Float32_To_Int32_Clip(
233     void *destinationBuffer, signed int destinationStride,
234     void *sourceBuffer, signed int sourceStride,
235     unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
236 {
237 /*
238     float *src = (float*)sourceBuffer;
239     signed long *dest =  (signed long*)destinationBuffer;
240     (void) ditherGenerator; // unused parameter
241 
242     while( count-- )
243     {
244         // REVIEW
245         double scaled = *src * 0x7FFFFFFF;
246         PA_CLIP_( scaled, -2147483648., 2147483647.  );
247         *dest = (signed long) scaled;
248 
249         src += sourceStride;
250         dest += destinationStride;
251     }
252 */
253 
254     short savedFpuControlWord;
255 
256     (void) ditherGenerator; /* unused parameter */
257 
258     __asm{
259         // esi -> source ptr
260         // eax -> source byte stride
261         // edi -> destination ptr
262         // ebx -> destination byte stride
263         // ecx -> source end ptr
264         // edx -> temp
265 
266         mov     esi, sourceBuffer
267 
268         mov     edx, 4                  // sizeof float32 and int32
269         mov     eax, sourceStride
270         imul    eax, edx
271 
272         mov     ecx, count
273         imul    ecx, eax
274         add     ecx, esi
275 
276         mov     edi, destinationBuffer
277 
278         mov     ebx, destinationStride
279         imul    ebx, edx
280 
281         fwait
282         fstcw   savedFpuControlWord
283         fldcw   fpuControlWord_
284 
285         fld     int32Scaler_             // stack:  (int)0x7FFFFFFF
286 
287     Float32_To_Int32_Clip_loop:
288 
289         mov     edx, dword ptr [esi]    // load floating point value into integer register
290 
291         and     edx, 0x7FFFFFFF         // mask off sign
292         cmp     edx, 0x3F800000         // greater than 1.0 or less than -1.0
293 
294         jg      Float32_To_Int32_Clip_clamp
295 
296         // load unscaled value into st(0)
297         fld     dword ptr [esi]         // stack:  value, (int)0x7FFFFFFF
298         add     esi, eax                // increment source ptr
299         //lea     esi, [esi+eax]
300         fmul    st(0), st(1)            // st(0) *= st(1), stack:  value*0x7FFFFFFF, (int)0x7FFFFFFF
301         fistp   dword ptr [edi]         // pop st(0) into dest, stack:  (int)0x7FFFFFFF
302         jmp     Float32_To_Int32_Clip_stored
303 
304     Float32_To_Int32_Clip_clamp:
305         mov     edx, dword ptr [esi]    // load floating point value into integer register
306         shr     edx, 31                 // move sign bit into bit 0
307         add     esi, eax                // increment source ptr
308         //lea     esi, [esi+eax]
309         add     edx, 0x7FFFFFFF         // convert to maximum range integers
310         mov     dword ptr [edi], edx
311 
312     Float32_To_Int32_Clip_stored:
313 
314         //add     edi, ebx                // increment destination ptr
315         lea     edi, [edi+ebx]
316 
317         cmp     esi, ecx                // has src ptr reached end?
318         jne     Float32_To_Int32_Clip_loop
319 
320         ffree   st(0)
321         fincstp
322 
323         fwait
324         fnclex
325         fldcw   savedFpuControlWord
326     }
327 }
328 
329 /* -------------------------------------------------------------------------- */
330 
Float32_To_Int32_DitherClip(void * destinationBuffer,signed int destinationStride,void * sourceBuffer,signed int sourceStride,unsigned int count,PaUtilTriangularDitherGenerator * ditherGenerator)331 static void Float32_To_Int32_DitherClip(
332     void *destinationBuffer, signed int destinationStride,
333     void *sourceBuffer, signed int sourceStride,
334     unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
335 {
336     /*
337     float *src = (float*)sourceBuffer;
338     signed long *dest =  (signed long*)destinationBuffer;
339 
340     while( count-- )
341     {
342         // REVIEW
343         double dither  = PaUtil_GenerateFloatTriangularDither( ditherGenerator );
344         // use smaller scaler to prevent overflow when we add the dither
345         double dithered = ((double)*src * (2147483646.0)) + dither;
346         PA_CLIP_( dithered, -2147483648., 2147483647.  );
347         *dest = (signed long) dithered;
348 
349 
350         src += sourceStride;
351         dest += destinationStride;
352     }
353     */
354 
355     short savedFpuControlWord;
356 
357     // spill storage:
358     signed long sourceByteStride;
359     signed long highpassedDither;
360 
361     // dither state:
362     unsigned long ditherPrevious = ditherGenerator->previous;
363     unsigned long ditherRandSeed1 = ditherGenerator->randSeed1;
364     unsigned long ditherRandSeed2 = ditherGenerator->randSeed2;
365 
366     __asm{
367         // esi -> source ptr
368         // eax -> source byte stride
369         // edi -> destination ptr
370         // ebx -> destination byte stride
371         // ecx -> source end ptr
372         // edx -> temp
373 
374         mov     esi, sourceBuffer
375 
376         mov     edx, 4                  // sizeof float32 and int32
377         mov     eax, sourceStride
378         imul    eax, edx
379 
380         mov     ecx, count
381         imul    ecx, eax
382         add     ecx, esi
383 
384         mov     edi, destinationBuffer
385 
386         mov     ebx, destinationStride
387         imul    ebx, edx
388 
389         fwait
390         fstcw   savedFpuControlWord
391         fldcw   fpuControlWord_
392 
393         fld     ditheredInt32Scaler_    // stack:  int scaler
394 
395     Float32_To_Int32_DitherClip_loop:
396 
397         mov     edx, dword ptr [esi]    // load floating point value into integer register
398 
399         and     edx, 0x7FFFFFFF         // mask off sign
400         cmp     edx, 0x3F800000         // greater than 1.0 or less than -1.0
401 
402         jg      Float32_To_Int32_DitherClip_clamp
403 
404         // load unscaled value into st(0)
405         fld     dword ptr [esi]         // stack:  value, int scaler
406         add     esi, eax                // increment source ptr
407         //lea     esi, [esi+eax]
408         fmul    st(0), st(1)            // st(0) *= st(1), stack:  value*(int scaler), int scaler
409 
410         /*
411         // call PaUtil_GenerateFloatTriangularDither with C calling convention
412         mov     sourceByteStride, eax   // save eax
413         mov     sourceEnd, ecx          // save ecx
414         push    ditherGenerator         // pass ditherGenerator parameter on stack
415 	    call    PaUtil_GenerateFloatTriangularDither  // stack:  dither, value*(int scaler), int scaler
416 	    pop     edx                     // clear parameter off stack
417         mov     ecx, sourceEnd          // restore ecx
418         mov     eax, sourceByteStride   // restore eax
419         */
420 
421     // generate dither
422         mov     sourceByteStride, eax   // save eax
423         mov     edx, 196314165
424         mov     eax, ditherRandSeed1
425         mul     edx                     // eax:edx = eax * 196314165
426         //add     eax, 907633515
427         lea     eax, [eax+907633515]
428         mov     ditherRandSeed1, eax
429         mov     edx, 196314165
430         mov     eax, ditherRandSeed2
431         mul     edx                     // eax:edx = eax * 196314165
432         //add     eax, 907633515
433         lea     eax, [eax+907633515]
434         mov     edx, ditherRandSeed1
435         shr     edx, PA_DITHER_SHIFT_
436         mov     ditherRandSeed2, eax
437         shr     eax, PA_DITHER_SHIFT_
438         //add     eax, edx                // eax -> current
439         lea     eax, [eax+edx]
440         mov     edx, ditherPrevious
441         neg     edx
442         lea     edx, [eax+edx]          // highpass = current - previous
443         mov     highpassedDither, edx
444         mov     ditherPrevious, eax     // previous = current
445         mov     eax, sourceByteStride   // restore eax
446         fild    highpassedDither
447         fmul    const_float_dither_scale_
448     // end generate dither, dither signal in st(0)
449 
450         faddp   st(1), st(0)            // stack: dither + value*(int scaler), int scaler
451         fistp   dword ptr [edi]         // pop st(0) into dest, stack:  int scaler
452         jmp     Float32_To_Int32_DitherClip_stored
453 
454     Float32_To_Int32_DitherClip_clamp:
455         mov     edx, dword ptr [esi]    // load floating point value into integer register
456         shr     edx, 31                 // move sign bit into bit 0
457         add     esi, eax                // increment source ptr
458         //lea     esi, [esi+eax]
459         add     edx, 0x7FFFFFFF         // convert to maximum range integers
460         mov     dword ptr [edi], edx
461 
462     Float32_To_Int32_DitherClip_stored:
463 
464         //add     edi, ebx              // increment destination ptr
465         lea     edi, [edi+ebx]
466 
467         cmp     esi, ecx                // has src ptr reached end?
468         jne     Float32_To_Int32_DitherClip_loop
469 
470         ffree   st(0)
471         fincstp
472 
473         fwait
474         fnclex
475         fldcw   savedFpuControlWord
476     }
477 
478     ditherGenerator->previous = ditherPrevious;
479     ditherGenerator->randSeed1 = ditherRandSeed1;
480     ditherGenerator->randSeed2 = ditherRandSeed2;
481 }
482 
483 /* -------------------------------------------------------------------------- */
484 
Float32_To_Int24(void * destinationBuffer,signed int destinationStride,void * sourceBuffer,signed int sourceStride,unsigned int count,PaUtilTriangularDitherGenerator * ditherGenerator)485 static void Float32_To_Int24(
486     void *destinationBuffer, signed int destinationStride,
487     void *sourceBuffer, signed int sourceStride,
488     unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
489 {
490 /*
491     float *src = (float*)sourceBuffer;
492     unsigned char *dest = (unsigned char*)destinationBuffer;
493     signed long temp;
494 
495     (void) ditherGenerator; // unused parameter
496 
497     while( count-- )
498     {
499         // convert to 32 bit and drop the low 8 bits
500         double scaled = *src * 0x7FFFFFFF;
501         temp = (signed long) scaled;
502 
503         dest[0] = (unsigned char)(temp >> 8);
504         dest[1] = (unsigned char)(temp >> 16);
505         dest[2] = (unsigned char)(temp >> 24);
506 
507         src += sourceStride;
508         dest += destinationStride * 3;
509     }
510 */
511 
512     short savedFpuControlWord;
513 
514     signed long tempInt32;
515 
516     (void) ditherGenerator; /* unused parameter */
517 
518     __asm{
519         // esi -> source ptr
520         // eax -> source byte stride
521         // edi -> destination ptr
522         // ebx -> destination byte stride
523         // ecx -> source end ptr
524         // edx -> temp
525 
526         mov     esi, sourceBuffer
527 
528         mov     edx, 4                  // sizeof float32
529         mov     eax, sourceStride
530         imul    eax, edx
531 
532         mov     ecx, count
533         imul    ecx, eax
534         add     ecx, esi
535 
536         mov     edi, destinationBuffer
537 
538         mov     edx, 3                  // sizeof int24
539         mov     ebx, destinationStride
540         imul    ebx, edx
541 
542         fwait
543         fstcw   savedFpuControlWord
544         fldcw   fpuControlWord_
545 
546         fld     int24Scaler_             // stack:  (int)0x7FFFFF
547 
548     Float32_To_Int24_loop:
549 
550         // load unscaled value into st(0)
551         fld     dword ptr [esi]         // stack:  value, (int)0x7FFFFF
552         add     esi, eax                // increment source ptr
553         //lea     esi, [esi+eax]
554         fmul    st(0), st(1)            // st(0) *= st(1), stack:  value*0x7FFFFF, (int)0x7FFFFF
555         fistp   tempInt32               // pop st(0) into tempInt32, stack:  (int)0x7FFFFF
556         mov     edx, tempInt32
557 
558         mov     byte ptr [edi], DL
559         shr     edx, 8
560         //mov     byte ptr [edi+1], DL
561         //mov     byte ptr [edi+2], DH
562         mov     word ptr [edi+1], DX
563 
564         //add     edi, ebx                // increment destination ptr
565         lea     edi, [edi+ebx]
566 
567         cmp     esi, ecx                // has src ptr reached end?
568         jne     Float32_To_Int24_loop
569 
570         ffree   st(0)
571         fincstp
572 
573         fwait
574         fnclex
575         fldcw   savedFpuControlWord
576     }
577 }
578 
579 /* -------------------------------------------------------------------------- */
580 
Float32_To_Int24_Clip(void * destinationBuffer,signed int destinationStride,void * sourceBuffer,signed int sourceStride,unsigned int count,PaUtilTriangularDitherGenerator * ditherGenerator)581 static void Float32_To_Int24_Clip(
582     void *destinationBuffer, signed int destinationStride,
583     void *sourceBuffer, signed int sourceStride,
584     unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
585 {
586 /*
587     float *src = (float*)sourceBuffer;
588     unsigned char *dest = (unsigned char*)destinationBuffer;
589     signed long temp;
590 
591     (void) ditherGenerator; // unused parameter
592 
593     while( count-- )
594     {
595         // convert to 32 bit and drop the low 8 bits
596         double scaled = *src * 0x7FFFFFFF;
597         PA_CLIP_( scaled, -2147483648., 2147483647.  );
598         temp = (signed long) scaled;
599 
600         dest[0] = (unsigned char)(temp >> 8);
601         dest[1] = (unsigned char)(temp >> 16);
602         dest[2] = (unsigned char)(temp >> 24);
603 
604         src += sourceStride;
605         dest += destinationStride * 3;
606     }
607 */
608 
609     short savedFpuControlWord;
610 
611     signed long tempInt32;
612 
613     (void) ditherGenerator; /* unused parameter */
614 
615     __asm{
616         // esi -> source ptr
617         // eax -> source byte stride
618         // edi -> destination ptr
619         // ebx -> destination byte stride
620         // ecx -> source end ptr
621         // edx -> temp
622 
623         mov     esi, sourceBuffer
624 
625         mov     edx, 4                  // sizeof float32
626         mov     eax, sourceStride
627         imul    eax, edx
628 
629         mov     ecx, count
630         imul    ecx, eax
631         add     ecx, esi
632 
633         mov     edi, destinationBuffer
634 
635         mov     edx, 3                  // sizeof int24
636         mov     ebx, destinationStride
637         imul    ebx, edx
638 
639         fwait
640         fstcw   savedFpuControlWord
641         fldcw   fpuControlWord_
642 
643         fld     int24Scaler_             // stack:  (int)0x7FFFFF
644 
645     Float32_To_Int24_Clip_loop:
646 
647         mov     edx, dword ptr [esi]    // load floating point value into integer register
648 
649         and     edx, 0x7FFFFFFF         // mask off sign
650         cmp     edx, 0x3F800000         // greater than 1.0 or less than -1.0
651 
652         jg      Float32_To_Int24_Clip_clamp
653 
654         // load unscaled value into st(0)
655         fld     dword ptr [esi]         // stack:  value, (int)0x7FFFFF
656         add     esi, eax                // increment source ptr
657         //lea     esi, [esi+eax]
658         fmul    st(0), st(1)            // st(0) *= st(1), stack:  value*0x7FFFFF, (int)0x7FFFFF
659         fistp   tempInt32               // pop st(0) into tempInt32, stack:  (int)0x7FFFFF
660         mov     edx, tempInt32
661         jmp     Float32_To_Int24_Clip_store
662 
663     Float32_To_Int24_Clip_clamp:
664         mov     edx, dword ptr [esi]    // load floating point value into integer register
665         shr     edx, 31                 // move sign bit into bit 0
666         add     esi, eax                // increment source ptr
667         //lea     esi, [esi+eax]
668         add     edx, 0x7FFFFF           // convert to maximum range integers
669 
670     Float32_To_Int24_Clip_store:
671 
672         mov     byte ptr [edi], DL
673         shr     edx, 8
674         //mov     byte ptr [edi+1], DL
675         //mov     byte ptr [edi+2], DH
676         mov     word ptr [edi+1], DX
677 
678         //add     edi, ebx                // increment destination ptr
679         lea     edi, [edi+ebx]
680 
681         cmp     esi, ecx                // has src ptr reached end?
682         jne     Float32_To_Int24_Clip_loop
683 
684         ffree   st(0)
685         fincstp
686 
687         fwait
688         fnclex
689         fldcw   savedFpuControlWord
690     }
691 }
692 
693 /* -------------------------------------------------------------------------- */
694 
Float32_To_Int24_DitherClip(void * destinationBuffer,signed int destinationStride,void * sourceBuffer,signed int sourceStride,unsigned int count,PaUtilTriangularDitherGenerator * ditherGenerator)695 static void Float32_To_Int24_DitherClip(
696     void *destinationBuffer, signed int destinationStride,
697     void *sourceBuffer, signed int sourceStride,
698     unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
699 {
700 /*
701     float *src = (float*)sourceBuffer;
702     unsigned char *dest = (unsigned char*)destinationBuffer;
703     signed long temp;
704 
705     while( count-- )
706     {
707         // convert to 32 bit and drop the low 8 bits
708 
709         // FIXME: the dither amplitude here appears to be too small by 8 bits
710         double dither  = PaUtil_GenerateFloatTriangularDither( ditherGenerator );
711         // use smaller scaler to prevent overflow when we add the dither
712         double dithered = ((double)*src * (2147483646.0)) + dither;
713         PA_CLIP_( dithered, -2147483648., 2147483647.  );
714 
715         temp = (signed long) dithered;
716 
717         dest[0] = (unsigned char)(temp >> 8);
718         dest[1] = (unsigned char)(temp >> 16);
719         dest[2] = (unsigned char)(temp >> 24);
720 
721         src += sourceStride;
722         dest += destinationStride * 3;
723     }
724 */
725 
726     short savedFpuControlWord;
727 
728     // spill storage:
729     signed long sourceByteStride;
730     signed long highpassedDither;
731 
732     // dither state:
733     unsigned long ditherPrevious = ditherGenerator->previous;
734     unsigned long ditherRandSeed1 = ditherGenerator->randSeed1;
735     unsigned long ditherRandSeed2 = ditherGenerator->randSeed2;
736 
737     signed long tempInt32;
738 
739     __asm{
740         // esi -> source ptr
741         // eax -> source byte stride
742         // edi -> destination ptr
743         // ebx -> destination byte stride
744         // ecx -> source end ptr
745         // edx -> temp
746 
747         mov     esi, sourceBuffer
748 
749         mov     edx, 4                  // sizeof float32
750         mov     eax, sourceStride
751         imul    eax, edx
752 
753         mov     ecx, count
754         imul    ecx, eax
755         add     ecx, esi
756 
757         mov     edi, destinationBuffer
758 
759         mov     edx, 3                  // sizeof int24
760         mov     ebx, destinationStride
761         imul    ebx, edx
762 
763         fwait
764         fstcw   savedFpuControlWord
765         fldcw   fpuControlWord_
766 
767         fld     ditheredInt24Scaler_    // stack:  int scaler
768 
769     Float32_To_Int24_DitherClip_loop:
770 
771         mov     edx, dword ptr [esi]    // load floating point value into integer register
772 
773         and     edx, 0x7FFFFFFF         // mask off sign
774         cmp     edx, 0x3F800000         // greater than 1.0 or less than -1.0
775 
776         jg      Float32_To_Int24_DitherClip_clamp
777 
778         // load unscaled value into st(0)
779         fld     dword ptr [esi]         // stack:  value, int scaler
780         add     esi, eax                // increment source ptr
781         //lea     esi, [esi+eax]
782         fmul    st(0), st(1)            // st(0) *= st(1), stack:  value*(int scaler), int scaler
783 
784     /*
785         // call PaUtil_GenerateFloatTriangularDither with C calling convention
786         mov     sourceByteStride, eax   // save eax
787         mov     sourceEnd, ecx          // save ecx
788         push    ditherGenerator         // pass ditherGenerator parameter on stack
789 	    call    PaUtil_GenerateFloatTriangularDither  // stack:  dither, value*(int scaler), int scaler
790 	    pop     edx                     // clear parameter off stack
791         mov     ecx, sourceEnd          // restore ecx
792         mov     eax, sourceByteStride   // restore eax
793     */
794 
795     // generate dither
796         mov     sourceByteStride, eax   // save eax
797         mov     edx, 196314165
798         mov     eax, ditherRandSeed1
799         mul     edx                     // eax:edx = eax * 196314165
800         //add     eax, 907633515
801         lea     eax, [eax+907633515]
802         mov     ditherRandSeed1, eax
803         mov     edx, 196314165
804         mov     eax, ditherRandSeed2
805         mul     edx                     // eax:edx = eax * 196314165
806         //add     eax, 907633515
807         lea     eax, [eax+907633515]
808         mov     edx, ditherRandSeed1
809         shr     edx, PA_DITHER_SHIFT_
810         mov     ditherRandSeed2, eax
811         shr     eax, PA_DITHER_SHIFT_
812         //add     eax, edx                // eax -> current
813         lea     eax, [eax+edx]
814         mov     edx, ditherPrevious
815         neg     edx
816         lea     edx, [eax+edx]          // highpass = current - previous
817         mov     highpassedDither, edx
818         mov     ditherPrevious, eax     // previous = current
819         mov     eax, sourceByteStride   // restore eax
820         fild    highpassedDither
821         fmul    const_float_dither_scale_
822     // end generate dither, dither signal in st(0)
823 
824         faddp   st(1), st(0)            // stack: dither * value*(int scaler), int scaler
825         fistp   tempInt32               // pop st(0) into tempInt32, stack:  int scaler
826         mov     edx, tempInt32
827         jmp     Float32_To_Int24_DitherClip_store
828 
829     Float32_To_Int24_DitherClip_clamp:
830         mov     edx, dword ptr [esi]    // load floating point value into integer register
831         shr     edx, 31                 // move sign bit into bit 0
832         add     esi, eax                // increment source ptr
833         //lea     esi, [esi+eax]
834         add     edx, 0x7FFFFF           // convert to maximum range integers
835 
836     Float32_To_Int24_DitherClip_store:
837 
838         mov     byte ptr [edi], DL
839         shr     edx, 8
840         //mov     byte ptr [edi+1], DL
841         //mov     byte ptr [edi+2], DH
842         mov     word ptr [edi+1], DX
843 
844         //add     edi, ebx                // increment destination ptr
845         lea     edi, [edi+ebx]
846 
847         cmp     esi, ecx                // has src ptr reached end?
848         jne     Float32_To_Int24_DitherClip_loop
849 
850         ffree   st(0)
851         fincstp
852 
853         fwait
854         fnclex
855         fldcw   savedFpuControlWord
856     }
857 
858     ditherGenerator->previous = ditherPrevious;
859     ditherGenerator->randSeed1 = ditherRandSeed1;
860     ditherGenerator->randSeed2 = ditherRandSeed2;
861 }
862 
863 /* -------------------------------------------------------------------------- */
864 
Float32_To_Int16(void * destinationBuffer,signed int destinationStride,void * sourceBuffer,signed int sourceStride,unsigned int count,PaUtilTriangularDitherGenerator * ditherGenerator)865 static void Float32_To_Int16(
866     void *destinationBuffer, signed int destinationStride,
867     void *sourceBuffer, signed int sourceStride,
868     unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
869 {
870 /*
871     float *src = (float*)sourceBuffer;
872     signed short *dest =  (signed short*)destinationBuffer;
873     (void)ditherGenerator; // unused parameter
874 
875     while( count-- )
876     {
877 
878         short samp = (short) (*src * (32767.0f));
879         *dest = samp;
880 
881         src += sourceStride;
882         dest += destinationStride;
883     }
884 */
885 
886     short savedFpuControlWord;
887 
888     (void) ditherGenerator; /* unused parameter */
889 
890     __asm{
891         // esi -> source ptr
892         // eax -> source byte stride
893         // edi -> destination ptr
894         // ebx -> destination byte stride
895         // ecx -> source end ptr
896         // edx -> temp
897 
898         mov     esi, sourceBuffer
899 
900         mov     edx, 4                  // sizeof float32
901         mov     eax, sourceStride
902         imul    eax, edx                // source byte stride
903 
904         mov     ecx, count
905         imul    ecx, eax
906         add     ecx, esi                // source end ptr = count * source byte stride + source ptr
907 
908         mov     edi, destinationBuffer
909 
910         mov     edx, 2                  // sizeof int16
911         mov     ebx, destinationStride
912         imul    ebx, edx                // destination byte stride
913 
914         fwait
915         fstcw   savedFpuControlWord
916         fldcw   fpuControlWord_
917 
918         fld     int16Scaler_            // stack:  (int)0x7FFF
919 
920     Float32_To_Int16_loop:
921 
922         // load unscaled value into st(0)
923         fld     dword ptr [esi]         // stack:  value, (int)0x7FFF
924         add     esi, eax                // increment source ptr
925         //lea     esi, [esi+eax]
926         fmul    st(0), st(1)            // st(0) *= st(1), stack:  value*0x7FFF, (int)0x7FFF
927         fistp   word ptr [edi]          // store scaled int into dest, stack:  (int)0x7FFF
928 
929         add     edi, ebx                // increment destination ptr
930         //lea     edi, [edi+ebx]
931 
932         cmp     esi, ecx                // has src ptr reached end?
933         jne     Float32_To_Int16_loop
934 
935         ffree   st(0)
936         fincstp
937 
938         fwait
939         fnclex
940         fldcw   savedFpuControlWord
941     }
942 }
943 
944 /* -------------------------------------------------------------------------- */
945 
Float32_To_Int16_Clip(void * destinationBuffer,signed int destinationStride,void * sourceBuffer,signed int sourceStride,unsigned int count,PaUtilTriangularDitherGenerator * ditherGenerator)946 static void Float32_To_Int16_Clip(
947     void *destinationBuffer, signed int destinationStride,
948     void *sourceBuffer, signed int sourceStride,
949     unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
950 {
951 /*
952     float *src = (float*)sourceBuffer;
953     signed short *dest =  (signed short*)destinationBuffer;
954     (void)ditherGenerator; // unused parameter
955 
956     while( count-- )
957     {
958         long samp = (signed long) (*src * (32767.0f));
959         PA_CLIP_( samp, -0x8000, 0x7FFF );
960         *dest = (signed short) samp;
961 
962         src += sourceStride;
963         dest += destinationStride;
964     }
965 */
966 
967     short savedFpuControlWord;
968 
969     (void) ditherGenerator; /* unused parameter */
970 
971     __asm{
972         // esi -> source ptr
973         // eax -> source byte stride
974         // edi -> destination ptr
975         // ebx -> destination byte stride
976         // ecx -> source end ptr
977         // edx -> temp
978 
979         mov     esi, sourceBuffer
980 
981         mov     edx, 4                  // sizeof float32
982         mov     eax, sourceStride
983         imul    eax, edx                // source byte stride
984 
985         mov     ecx, count
986         imul    ecx, eax
987         add     ecx, esi                // source end ptr = count * source byte stride + source ptr
988 
989         mov     edi, destinationBuffer
990 
991         mov     edx, 2                  // sizeof int16
992         mov     ebx, destinationStride
993         imul    ebx, edx                // destination byte stride
994 
995         fwait
996         fstcw   savedFpuControlWord
997         fldcw   fpuControlWord_
998 
999         fld     int16Scaler_            // stack:  (int)0x7FFF
1000 
1001     Float32_To_Int16_Clip_loop:
1002 
1003         mov     edx, dword ptr [esi]    // load floating point value into integer register
1004 
1005         and     edx, 0x7FFFFFFF         // mask off sign
1006         cmp     edx, 0x3F800000         // greater than 1.0 or less than -1.0
1007 
1008         jg      Float32_To_Int16_Clip_clamp
1009 
1010         // load unscaled value into st(0)
1011         fld     dword ptr [esi]         // stack:  value, (int)0x7FFF
1012         add     esi, eax                // increment source ptr
1013         //lea     esi, [esi+eax]
1014         fmul    st(0), st(1)            // st(0) *= st(1), stack:  value*0x7FFF, (int)0x7FFF
1015         fistp   word ptr [edi]          // store scaled int into dest, stack:  (int)0x7FFF
1016         jmp     Float32_To_Int16_Clip_stored
1017 
1018     Float32_To_Int16_Clip_clamp:
1019         mov     edx, dword ptr [esi]    // load floating point value into integer register
1020         shr     edx, 31                 // move sign bit into bit 0
1021         add     esi, eax                // increment source ptr
1022         //lea     esi, [esi+eax]
1023         add     dx, 0x7FFF              // convert to maximum range integers
1024         mov     word ptr [edi], dx      // store clamped into into dest
1025 
1026     Float32_To_Int16_Clip_stored:
1027 
1028         add     edi, ebx                // increment destination ptr
1029         //lea     edi, [edi+ebx]
1030 
1031         cmp     esi, ecx                // has src ptr reached end?
1032         jne     Float32_To_Int16_Clip_loop
1033 
1034         ffree   st(0)
1035         fincstp
1036 
1037         fwait
1038         fnclex
1039         fldcw   savedFpuControlWord
1040     }
1041 }
1042 
1043 /* -------------------------------------------------------------------------- */
1044 
Float32_To_Int16_DitherClip(void * destinationBuffer,signed int destinationStride,void * sourceBuffer,signed int sourceStride,unsigned int count,PaUtilTriangularDitherGenerator * ditherGenerator)1045 static void Float32_To_Int16_DitherClip(
1046     void *destinationBuffer, signed int destinationStride,
1047     void *sourceBuffer, signed int sourceStride,
1048     unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
1049 {
1050 /*
1051     float *src = (float*)sourceBuffer;
1052     signed short *dest =  (signed short*)destinationBuffer;
1053     (void)ditherGenerator; // unused parameter
1054 
1055     while( count-- )
1056     {
1057 
1058         float dither  = PaUtil_GenerateFloatTriangularDither( ditherGenerator );
1059         // use smaller scaler to prevent overflow when we add the dither
1060         float dithered = (*src * (32766.0f)) + dither;
1061         signed long samp = (signed long) dithered;
1062         PA_CLIP_( samp, -0x8000, 0x7FFF );
1063         *dest = (signed short) samp;
1064 
1065         src += sourceStride;
1066         dest += destinationStride;
1067     }
1068 */
1069 
1070     short savedFpuControlWord;
1071 
1072     // spill storage:
1073     signed long sourceByteStride;
1074     signed long highpassedDither;
1075 
1076     // dither state:
1077     unsigned long ditherPrevious = ditherGenerator->previous;
1078     unsigned long ditherRandSeed1 = ditherGenerator->randSeed1;
1079     unsigned long ditherRandSeed2 = ditherGenerator->randSeed2;
1080 
1081     __asm{
1082         // esi -> source ptr
1083         // eax -> source byte stride
1084         // edi -> destination ptr
1085         // ebx -> destination byte stride
1086         // ecx -> source end ptr
1087         // edx -> temp
1088 
1089         mov     esi, sourceBuffer
1090 
1091         mov     edx, 4                  // sizeof float32
1092         mov     eax, sourceStride
1093         imul    eax, edx                // source byte stride
1094 
1095         mov     ecx, count
1096         imul    ecx, eax
1097         add     ecx, esi                // source end ptr = count * source byte stride + source ptr
1098 
1099         mov     edi, destinationBuffer
1100 
1101         mov     edx, 2                  // sizeof int16
1102         mov     ebx, destinationStride
1103         imul    ebx, edx                // destination byte stride
1104 
1105         fwait
1106         fstcw   savedFpuControlWord
1107         fldcw   fpuControlWord_
1108 
1109         fld     ditheredInt16Scaler_    // stack:  int scaler
1110 
1111     Float32_To_Int16_DitherClip_loop:
1112 
1113         mov     edx, dword ptr [esi]    // load floating point value into integer register
1114 
1115         and     edx, 0x7FFFFFFF         // mask off sign
1116         cmp     edx, 0x3F800000         // greater than 1.0 or less than -1.0
1117 
1118         jg      Float32_To_Int16_DitherClip_clamp
1119 
1120         // load unscaled value into st(0)
1121         fld     dword ptr [esi]         // stack:  value, int scaler
1122         add     esi, eax                // increment source ptr
1123         //lea     esi, [esi+eax]
1124         fmul    st(0), st(1)            // st(0) *= st(1), stack:  value*(int scaler), int scaler
1125 
1126         /*
1127         // call PaUtil_GenerateFloatTriangularDither with C calling convention
1128         mov     sourceByteStride, eax   // save eax
1129         mov     sourceEnd, ecx          // save ecx
1130         push    ditherGenerator         // pass ditherGenerator parameter on stack
1131 	    call    PaUtil_GenerateFloatTriangularDither  // stack:  dither, value*(int scaler), int scaler
1132 	    pop     edx                     // clear parameter off stack
1133         mov     ecx, sourceEnd          // restore ecx
1134         mov     eax, sourceByteStride   // restore eax
1135         */
1136 
1137     // generate dither
1138         mov     sourceByteStride, eax   // save eax
1139         mov     edx, 196314165
1140         mov     eax, ditherRandSeed1
1141         mul     edx                     // eax:edx = eax * 196314165
1142         //add     eax, 907633515
1143         lea     eax, [eax+907633515]
1144         mov     ditherRandSeed1, eax
1145         mov     edx, 196314165
1146         mov     eax, ditherRandSeed2
1147         mul     edx                     // eax:edx = eax * 196314165
1148         //add     eax, 907633515
1149         lea     eax, [eax+907633515]
1150         mov     edx, ditherRandSeed1
1151         shr     edx, PA_DITHER_SHIFT_
1152         mov     ditherRandSeed2, eax
1153         shr     eax, PA_DITHER_SHIFT_
1154         //add     eax, edx                // eax -> current
1155         lea     eax, [eax+edx]            // current = randSeed1>>x + randSeed2>>x
1156         mov     edx, ditherPrevious
1157         neg     edx
1158         lea     edx, [eax+edx]          // highpass = current - previous
1159         mov     highpassedDither, edx
1160         mov     ditherPrevious, eax     // previous = current
1161         mov     eax, sourceByteStride   // restore eax
1162         fild    highpassedDither
1163         fmul    const_float_dither_scale_
1164     // end generate dither, dither signal in st(0)
1165 
1166         faddp   st(1), st(0)            // stack: dither * value*(int scaler), int scaler
1167         fistp   word ptr [edi]          // store scaled int into dest, stack:  int scaler
1168         jmp     Float32_To_Int16_DitherClip_stored
1169 
1170     Float32_To_Int16_DitherClip_clamp:
1171         mov     edx, dword ptr [esi]    // load floating point value into integer register
1172         shr     edx, 31                 // move sign bit into bit 0
1173         add     esi, eax                // increment source ptr
1174         //lea     esi, [esi+eax]
1175         add     dx, 0x7FFF              // convert to maximum range integers
1176         mov     word ptr [edi], dx      // store clamped into into dest
1177 
1178     Float32_To_Int16_DitherClip_stored:
1179 
1180         add     edi, ebx                // increment destination ptr
1181         //lea     edi, [edi+ebx]
1182 
1183         cmp     esi, ecx                // has src ptr reached end?
1184         jne     Float32_To_Int16_DitherClip_loop
1185 
1186         ffree   st(0)
1187         fincstp
1188 
1189         fwait
1190         fnclex
1191         fldcw   savedFpuControlWord
1192     }
1193 
1194     ditherGenerator->previous = ditherPrevious;
1195     ditherGenerator->randSeed1 = ditherRandSeed1;
1196     ditherGenerator->randSeed2 = ditherRandSeed2;
1197 }
1198 
1199 /* -------------------------------------------------------------------------- */
1200 
PaUtil_InitializeX86PlainConverters(void)1201 void PaUtil_InitializeX86PlainConverters( void )
1202 {
1203     paConverters.Float32_To_Int32 = Float32_To_Int32;
1204     paConverters.Float32_To_Int32_Clip = Float32_To_Int32_Clip;
1205     paConverters.Float32_To_Int32_DitherClip = Float32_To_Int32_DitherClip;
1206 
1207     paConverters.Float32_To_Int24 = Float32_To_Int24;
1208     paConverters.Float32_To_Int24_Clip = Float32_To_Int24_Clip;
1209     paConverters.Float32_To_Int24_DitherClip = Float32_To_Int24_DitherClip;
1210 
1211     paConverters.Float32_To_Int16 = Float32_To_Int16;
1212     paConverters.Float32_To_Int16_Clip = Float32_To_Int16_Clip;
1213     paConverters.Float32_To_Int16_DitherClip = Float32_To_Int16_DitherClip;
1214 }
1215 
1216 #endif
1217 
1218 /* -------------------------------------------------------------------------- */
1219