1 /*
2 * Plain Intel IA32 assembly implementations of PortAudio sample converter functions.
3 * Copyright (c) 1999-2002 Ross Bencina, Phil Burk
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files
7 * (the "Software"), to deal in the Software without restriction,
8 * including without limitation the rights to use, copy, modify, merge,
9 * publish, distribute, sublicense, and/or sell copies of the Software,
10 * and to permit persons to whom the Software is furnished to do so,
11 * subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be
14 * included in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
21 * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /*
26 * The text above constitutes the entire PortAudio license; however,
27 * the PortAudio community also makes the following non-binding requests:
28 *
29 * Any person wishing to distribute modifications to the Software is
30 * requested to send the modifications to the original developer so that
31 * they can be incorporated into the canonical version. It is also
32 * requested that these non-binding requests be included along with the
33 * license above.
34 */
35
36 /** @file
37 @ingroup win_src
38 */
39
40 #include "pa_x86_plain_converters.h"
41
42 #include "pa_converters.h"
43 #include "pa_dither.h"
44
45 /*
46 the main reason these versions are faster than the equivalent C versions
47 is that float -> int casting is expensive in C on x86 because the rounding
48 mode needs to be changed for every cast. these versions only set
49 the rounding mode once outside the loop.
50
51 small additional speed gains are made by the way that clamping is
52 implemented.
53
54 TODO:
55 o- inline dither code
56 o- implement Dither only (no-clip) versions
57 o- implement int8 and uint8 versions
58 o- test thouroughly
59
60 o- the packed 24 bit functions could benefit from unrolling and avoiding
61 byte and word sized register access.
62 */
63
64 /* -------------------------------------------------------------------------- */
65
66 /*
67 #define PA_CLIP_( val, min, max )\
68 { val = ((val) < (min)) ? (min) : (((val) > (max)) ? (max) : (val)); }
69 */
70
71 /*
72 the following notes were used to determine whether a floating point
73 value should be saturated (ie >1 or <-1) by loading it into an integer
74 register. these should be rewritten so that they make sense.
75
76 an ieee floating point value
77
78 1.xxxxxxxxxxxxxxxxxxxx?
79
80
81 is less than or equal to 1 and greater than or equal to -1 either:
82
83 if the mantissa is 0 and the unbiased exponent is 0
84
85 OR
86
87 if the unbiased exponent < 0
88
89 this translates to:
90
91 if the mantissa is 0 and the biased exponent is 7F
92
93 or
94
95 if the biased exponent is less than 7F
96
97
98 therefore the value is greater than 1 or less than -1 if
99
100 the mantissa is not 0 and the biased exponent is 7F
101
102 or
103
104 if the biased exponent is greater than 7F
105
106
107 in other words, if we mask out the sign bit, the value is
108 greater than 1 or less than -1 if its integer representation is greater than:
109
110 0 01111111 0000 0000 0000 0000 0000 000
111
112 0011 1111 1000 0000 0000 0000 0000 0000 => 0x3F800000
113 */
114
115 #if defined(_WIN64) || defined(_WIN32_WCE)
116
117 /*
118 -EMT64/AMD64 uses different asm
119 -VC2005 doesnt allow _WIN64 with inline assembly either!
120 */
PaUtil_InitializeX86PlainConverters(void)121 void PaUtil_InitializeX86PlainConverters( void )
122 {
123 }
124
125 #else
126
127 /* -------------------------------------------------------------------------- */
128
129 static const short fpuControlWord_ = 0x033F; /*round to nearest, 64 bit precision, all exceptions masked*/
130 static const double int32Scaler_ = 0x7FFFFFFF;
131 static const double ditheredInt32Scaler_ = 0x7FFFFFFE;
132 static const double int24Scaler_ = 0x7FFFFF;
133 static const double ditheredInt24Scaler_ = 0x7FFFFE;
134 static const double int16Scaler_ = 0x7FFF;
135 static const double ditheredInt16Scaler_ = 0x7FFE;
136
137 #define PA_DITHER_BITS_ (15)
138 /* Multiply by PA_FLOAT_DITHER_SCALE_ to get a float between -2.0 and +1.99999 */
139 #define PA_FLOAT_DITHER_SCALE_ (1.0F / ((1<<PA_DITHER_BITS_)-1))
140 static const float const_float_dither_scale_ = PA_FLOAT_DITHER_SCALE_;
141 #define PA_DITHER_SHIFT_ ((32 - PA_DITHER_BITS_) + 1)
142
143 /* -------------------------------------------------------------------------- */
144
Float32_To_Int32(void * destinationBuffer,signed int destinationStride,void * sourceBuffer,signed int sourceStride,unsigned int count,PaUtilTriangularDitherGenerator * ditherGenerator)145 static void Float32_To_Int32(
146 void *destinationBuffer, signed int destinationStride,
147 void *sourceBuffer, signed int sourceStride,
148 unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
149 {
150 /*
151 float *src = (float*)sourceBuffer;
152 signed long *dest = (signed long*)destinationBuffer;
153 (void)ditherGenerator; // unused parameter
154
155 while( count-- )
156 {
157 // REVIEW
158 double scaled = *src * 0x7FFFFFFF;
159 *dest = (signed long) scaled;
160
161 src += sourceStride;
162 dest += destinationStride;
163 }
164 */
165
166 short savedFpuControlWord;
167
168 (void) ditherGenerator; /* unused parameter */
169
170
171 __asm{
172 // esi -> source ptr
173 // eax -> source byte stride
174 // edi -> destination ptr
175 // ebx -> destination byte stride
176 // ecx -> source end ptr
177 // edx -> temp
178
179 mov esi, sourceBuffer
180
181 mov edx, 4 // sizeof float32 and int32
182 mov eax, sourceStride
183 imul eax, edx
184
185 mov ecx, count
186 imul ecx, eax
187 add ecx, esi
188
189 mov edi, destinationBuffer
190
191 mov ebx, destinationStride
192 imul ebx, edx
193
194 fwait
195 fstcw savedFpuControlWord
196 fldcw fpuControlWord_
197
198 fld int32Scaler_ // stack: (int)0x7FFFFFFF
199
200 Float32_To_Int32_loop:
201
202 // load unscaled value into st(0)
203 fld dword ptr [esi] // stack: value, (int)0x7FFFFFFF
204 add esi, eax // increment source ptr
205 //lea esi, [esi+eax]
206 fmul st(0), st(1) // st(0) *= st(1), stack: value*0x7FFFFFFF, (int)0x7FFFFFFF
207 /*
208 note: we could store to a temporary qword here which would cause
209 wraparound distortion instead of int indefinite 0x10. that would
210 be more work, and given that not enabling clipping is only advisable
211 when you know that your signal isn't going to clip it isn't worth it.
212 */
213 fistp dword ptr [edi] // pop st(0) into dest, stack: (int)0x7FFFFFFF
214
215 add edi, ebx // increment destination ptr
216 //lea edi, [edi+ebx]
217
218 cmp esi, ecx // has src ptr reached end?
219 jne Float32_To_Int32_loop
220
221 ffree st(0)
222 fincstp
223
224 fwait
225 fnclex
226 fldcw savedFpuControlWord
227 }
228 }
229
230 /* -------------------------------------------------------------------------- */
231
Float32_To_Int32_Clip(void * destinationBuffer,signed int destinationStride,void * sourceBuffer,signed int sourceStride,unsigned int count,PaUtilTriangularDitherGenerator * ditherGenerator)232 static void Float32_To_Int32_Clip(
233 void *destinationBuffer, signed int destinationStride,
234 void *sourceBuffer, signed int sourceStride,
235 unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
236 {
237 /*
238 float *src = (float*)sourceBuffer;
239 signed long *dest = (signed long*)destinationBuffer;
240 (void) ditherGenerator; // unused parameter
241
242 while( count-- )
243 {
244 // REVIEW
245 double scaled = *src * 0x7FFFFFFF;
246 PA_CLIP_( scaled, -2147483648., 2147483647. );
247 *dest = (signed long) scaled;
248
249 src += sourceStride;
250 dest += destinationStride;
251 }
252 */
253
254 short savedFpuControlWord;
255
256 (void) ditherGenerator; /* unused parameter */
257
258 __asm{
259 // esi -> source ptr
260 // eax -> source byte stride
261 // edi -> destination ptr
262 // ebx -> destination byte stride
263 // ecx -> source end ptr
264 // edx -> temp
265
266 mov esi, sourceBuffer
267
268 mov edx, 4 // sizeof float32 and int32
269 mov eax, sourceStride
270 imul eax, edx
271
272 mov ecx, count
273 imul ecx, eax
274 add ecx, esi
275
276 mov edi, destinationBuffer
277
278 mov ebx, destinationStride
279 imul ebx, edx
280
281 fwait
282 fstcw savedFpuControlWord
283 fldcw fpuControlWord_
284
285 fld int32Scaler_ // stack: (int)0x7FFFFFFF
286
287 Float32_To_Int32_Clip_loop:
288
289 mov edx, dword ptr [esi] // load floating point value into integer register
290
291 and edx, 0x7FFFFFFF // mask off sign
292 cmp edx, 0x3F800000 // greater than 1.0 or less than -1.0
293
294 jg Float32_To_Int32_Clip_clamp
295
296 // load unscaled value into st(0)
297 fld dword ptr [esi] // stack: value, (int)0x7FFFFFFF
298 add esi, eax // increment source ptr
299 //lea esi, [esi+eax]
300 fmul st(0), st(1) // st(0) *= st(1), stack: value*0x7FFFFFFF, (int)0x7FFFFFFF
301 fistp dword ptr [edi] // pop st(0) into dest, stack: (int)0x7FFFFFFF
302 jmp Float32_To_Int32_Clip_stored
303
304 Float32_To_Int32_Clip_clamp:
305 mov edx, dword ptr [esi] // load floating point value into integer register
306 shr edx, 31 // move sign bit into bit 0
307 add esi, eax // increment source ptr
308 //lea esi, [esi+eax]
309 add edx, 0x7FFFFFFF // convert to maximum range integers
310 mov dword ptr [edi], edx
311
312 Float32_To_Int32_Clip_stored:
313
314 //add edi, ebx // increment destination ptr
315 lea edi, [edi+ebx]
316
317 cmp esi, ecx // has src ptr reached end?
318 jne Float32_To_Int32_Clip_loop
319
320 ffree st(0)
321 fincstp
322
323 fwait
324 fnclex
325 fldcw savedFpuControlWord
326 }
327 }
328
329 /* -------------------------------------------------------------------------- */
330
Float32_To_Int32_DitherClip(void * destinationBuffer,signed int destinationStride,void * sourceBuffer,signed int sourceStride,unsigned int count,PaUtilTriangularDitherGenerator * ditherGenerator)331 static void Float32_To_Int32_DitherClip(
332 void *destinationBuffer, signed int destinationStride,
333 void *sourceBuffer, signed int sourceStride,
334 unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
335 {
336 /*
337 float *src = (float*)sourceBuffer;
338 signed long *dest = (signed long*)destinationBuffer;
339
340 while( count-- )
341 {
342 // REVIEW
343 double dither = PaUtil_GenerateFloatTriangularDither( ditherGenerator );
344 // use smaller scaler to prevent overflow when we add the dither
345 double dithered = ((double)*src * (2147483646.0)) + dither;
346 PA_CLIP_( dithered, -2147483648., 2147483647. );
347 *dest = (signed long) dithered;
348
349
350 src += sourceStride;
351 dest += destinationStride;
352 }
353 */
354
355 short savedFpuControlWord;
356
357 // spill storage:
358 signed long sourceByteStride;
359 signed long highpassedDither;
360
361 // dither state:
362 unsigned long ditherPrevious = ditherGenerator->previous;
363 unsigned long ditherRandSeed1 = ditherGenerator->randSeed1;
364 unsigned long ditherRandSeed2 = ditherGenerator->randSeed2;
365
366 __asm{
367 // esi -> source ptr
368 // eax -> source byte stride
369 // edi -> destination ptr
370 // ebx -> destination byte stride
371 // ecx -> source end ptr
372 // edx -> temp
373
374 mov esi, sourceBuffer
375
376 mov edx, 4 // sizeof float32 and int32
377 mov eax, sourceStride
378 imul eax, edx
379
380 mov ecx, count
381 imul ecx, eax
382 add ecx, esi
383
384 mov edi, destinationBuffer
385
386 mov ebx, destinationStride
387 imul ebx, edx
388
389 fwait
390 fstcw savedFpuControlWord
391 fldcw fpuControlWord_
392
393 fld ditheredInt32Scaler_ // stack: int scaler
394
395 Float32_To_Int32_DitherClip_loop:
396
397 mov edx, dword ptr [esi] // load floating point value into integer register
398
399 and edx, 0x7FFFFFFF // mask off sign
400 cmp edx, 0x3F800000 // greater than 1.0 or less than -1.0
401
402 jg Float32_To_Int32_DitherClip_clamp
403
404 // load unscaled value into st(0)
405 fld dword ptr [esi] // stack: value, int scaler
406 add esi, eax // increment source ptr
407 //lea esi, [esi+eax]
408 fmul st(0), st(1) // st(0) *= st(1), stack: value*(int scaler), int scaler
409
410 /*
411 // call PaUtil_GenerateFloatTriangularDither with C calling convention
412 mov sourceByteStride, eax // save eax
413 mov sourceEnd, ecx // save ecx
414 push ditherGenerator // pass ditherGenerator parameter on stack
415 call PaUtil_GenerateFloatTriangularDither // stack: dither, value*(int scaler), int scaler
416 pop edx // clear parameter off stack
417 mov ecx, sourceEnd // restore ecx
418 mov eax, sourceByteStride // restore eax
419 */
420
421 // generate dither
422 mov sourceByteStride, eax // save eax
423 mov edx, 196314165
424 mov eax, ditherRandSeed1
425 mul edx // eax:edx = eax * 196314165
426 //add eax, 907633515
427 lea eax, [eax+907633515]
428 mov ditherRandSeed1, eax
429 mov edx, 196314165
430 mov eax, ditherRandSeed2
431 mul edx // eax:edx = eax * 196314165
432 //add eax, 907633515
433 lea eax, [eax+907633515]
434 mov edx, ditherRandSeed1
435 shr edx, PA_DITHER_SHIFT_
436 mov ditherRandSeed2, eax
437 shr eax, PA_DITHER_SHIFT_
438 //add eax, edx // eax -> current
439 lea eax, [eax+edx]
440 mov edx, ditherPrevious
441 neg edx
442 lea edx, [eax+edx] // highpass = current - previous
443 mov highpassedDither, edx
444 mov ditherPrevious, eax // previous = current
445 mov eax, sourceByteStride // restore eax
446 fild highpassedDither
447 fmul const_float_dither_scale_
448 // end generate dither, dither signal in st(0)
449
450 faddp st(1), st(0) // stack: dither + value*(int scaler), int scaler
451 fistp dword ptr [edi] // pop st(0) into dest, stack: int scaler
452 jmp Float32_To_Int32_DitherClip_stored
453
454 Float32_To_Int32_DitherClip_clamp:
455 mov edx, dword ptr [esi] // load floating point value into integer register
456 shr edx, 31 // move sign bit into bit 0
457 add esi, eax // increment source ptr
458 //lea esi, [esi+eax]
459 add edx, 0x7FFFFFFF // convert to maximum range integers
460 mov dword ptr [edi], edx
461
462 Float32_To_Int32_DitherClip_stored:
463
464 //add edi, ebx // increment destination ptr
465 lea edi, [edi+ebx]
466
467 cmp esi, ecx // has src ptr reached end?
468 jne Float32_To_Int32_DitherClip_loop
469
470 ffree st(0)
471 fincstp
472
473 fwait
474 fnclex
475 fldcw savedFpuControlWord
476 }
477
478 ditherGenerator->previous = ditherPrevious;
479 ditherGenerator->randSeed1 = ditherRandSeed1;
480 ditherGenerator->randSeed2 = ditherRandSeed2;
481 }
482
483 /* -------------------------------------------------------------------------- */
484
Float32_To_Int24(void * destinationBuffer,signed int destinationStride,void * sourceBuffer,signed int sourceStride,unsigned int count,PaUtilTriangularDitherGenerator * ditherGenerator)485 static void Float32_To_Int24(
486 void *destinationBuffer, signed int destinationStride,
487 void *sourceBuffer, signed int sourceStride,
488 unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
489 {
490 /*
491 float *src = (float*)sourceBuffer;
492 unsigned char *dest = (unsigned char*)destinationBuffer;
493 signed long temp;
494
495 (void) ditherGenerator; // unused parameter
496
497 while( count-- )
498 {
499 // convert to 32 bit and drop the low 8 bits
500 double scaled = *src * 0x7FFFFFFF;
501 temp = (signed long) scaled;
502
503 dest[0] = (unsigned char)(temp >> 8);
504 dest[1] = (unsigned char)(temp >> 16);
505 dest[2] = (unsigned char)(temp >> 24);
506
507 src += sourceStride;
508 dest += destinationStride * 3;
509 }
510 */
511
512 short savedFpuControlWord;
513
514 signed long tempInt32;
515
516 (void) ditherGenerator; /* unused parameter */
517
518 __asm{
519 // esi -> source ptr
520 // eax -> source byte stride
521 // edi -> destination ptr
522 // ebx -> destination byte stride
523 // ecx -> source end ptr
524 // edx -> temp
525
526 mov esi, sourceBuffer
527
528 mov edx, 4 // sizeof float32
529 mov eax, sourceStride
530 imul eax, edx
531
532 mov ecx, count
533 imul ecx, eax
534 add ecx, esi
535
536 mov edi, destinationBuffer
537
538 mov edx, 3 // sizeof int24
539 mov ebx, destinationStride
540 imul ebx, edx
541
542 fwait
543 fstcw savedFpuControlWord
544 fldcw fpuControlWord_
545
546 fld int24Scaler_ // stack: (int)0x7FFFFF
547
548 Float32_To_Int24_loop:
549
550 // load unscaled value into st(0)
551 fld dword ptr [esi] // stack: value, (int)0x7FFFFF
552 add esi, eax // increment source ptr
553 //lea esi, [esi+eax]
554 fmul st(0), st(1) // st(0) *= st(1), stack: value*0x7FFFFF, (int)0x7FFFFF
555 fistp tempInt32 // pop st(0) into tempInt32, stack: (int)0x7FFFFF
556 mov edx, tempInt32
557
558 mov byte ptr [edi], DL
559 shr edx, 8
560 //mov byte ptr [edi+1], DL
561 //mov byte ptr [edi+2], DH
562 mov word ptr [edi+1], DX
563
564 //add edi, ebx // increment destination ptr
565 lea edi, [edi+ebx]
566
567 cmp esi, ecx // has src ptr reached end?
568 jne Float32_To_Int24_loop
569
570 ffree st(0)
571 fincstp
572
573 fwait
574 fnclex
575 fldcw savedFpuControlWord
576 }
577 }
578
579 /* -------------------------------------------------------------------------- */
580
Float32_To_Int24_Clip(void * destinationBuffer,signed int destinationStride,void * sourceBuffer,signed int sourceStride,unsigned int count,PaUtilTriangularDitherGenerator * ditherGenerator)581 static void Float32_To_Int24_Clip(
582 void *destinationBuffer, signed int destinationStride,
583 void *sourceBuffer, signed int sourceStride,
584 unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
585 {
586 /*
587 float *src = (float*)sourceBuffer;
588 unsigned char *dest = (unsigned char*)destinationBuffer;
589 signed long temp;
590
591 (void) ditherGenerator; // unused parameter
592
593 while( count-- )
594 {
595 // convert to 32 bit and drop the low 8 bits
596 double scaled = *src * 0x7FFFFFFF;
597 PA_CLIP_( scaled, -2147483648., 2147483647. );
598 temp = (signed long) scaled;
599
600 dest[0] = (unsigned char)(temp >> 8);
601 dest[1] = (unsigned char)(temp >> 16);
602 dest[2] = (unsigned char)(temp >> 24);
603
604 src += sourceStride;
605 dest += destinationStride * 3;
606 }
607 */
608
609 short savedFpuControlWord;
610
611 signed long tempInt32;
612
613 (void) ditherGenerator; /* unused parameter */
614
615 __asm{
616 // esi -> source ptr
617 // eax -> source byte stride
618 // edi -> destination ptr
619 // ebx -> destination byte stride
620 // ecx -> source end ptr
621 // edx -> temp
622
623 mov esi, sourceBuffer
624
625 mov edx, 4 // sizeof float32
626 mov eax, sourceStride
627 imul eax, edx
628
629 mov ecx, count
630 imul ecx, eax
631 add ecx, esi
632
633 mov edi, destinationBuffer
634
635 mov edx, 3 // sizeof int24
636 mov ebx, destinationStride
637 imul ebx, edx
638
639 fwait
640 fstcw savedFpuControlWord
641 fldcw fpuControlWord_
642
643 fld int24Scaler_ // stack: (int)0x7FFFFF
644
645 Float32_To_Int24_Clip_loop:
646
647 mov edx, dword ptr [esi] // load floating point value into integer register
648
649 and edx, 0x7FFFFFFF // mask off sign
650 cmp edx, 0x3F800000 // greater than 1.0 or less than -1.0
651
652 jg Float32_To_Int24_Clip_clamp
653
654 // load unscaled value into st(0)
655 fld dword ptr [esi] // stack: value, (int)0x7FFFFF
656 add esi, eax // increment source ptr
657 //lea esi, [esi+eax]
658 fmul st(0), st(1) // st(0) *= st(1), stack: value*0x7FFFFF, (int)0x7FFFFF
659 fistp tempInt32 // pop st(0) into tempInt32, stack: (int)0x7FFFFF
660 mov edx, tempInt32
661 jmp Float32_To_Int24_Clip_store
662
663 Float32_To_Int24_Clip_clamp:
664 mov edx, dword ptr [esi] // load floating point value into integer register
665 shr edx, 31 // move sign bit into bit 0
666 add esi, eax // increment source ptr
667 //lea esi, [esi+eax]
668 add edx, 0x7FFFFF // convert to maximum range integers
669
670 Float32_To_Int24_Clip_store:
671
672 mov byte ptr [edi], DL
673 shr edx, 8
674 //mov byte ptr [edi+1], DL
675 //mov byte ptr [edi+2], DH
676 mov word ptr [edi+1], DX
677
678 //add edi, ebx // increment destination ptr
679 lea edi, [edi+ebx]
680
681 cmp esi, ecx // has src ptr reached end?
682 jne Float32_To_Int24_Clip_loop
683
684 ffree st(0)
685 fincstp
686
687 fwait
688 fnclex
689 fldcw savedFpuControlWord
690 }
691 }
692
693 /* -------------------------------------------------------------------------- */
694
Float32_To_Int24_DitherClip(void * destinationBuffer,signed int destinationStride,void * sourceBuffer,signed int sourceStride,unsigned int count,PaUtilTriangularDitherGenerator * ditherGenerator)695 static void Float32_To_Int24_DitherClip(
696 void *destinationBuffer, signed int destinationStride,
697 void *sourceBuffer, signed int sourceStride,
698 unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
699 {
700 /*
701 float *src = (float*)sourceBuffer;
702 unsigned char *dest = (unsigned char*)destinationBuffer;
703 signed long temp;
704
705 while( count-- )
706 {
707 // convert to 32 bit and drop the low 8 bits
708
709 // FIXME: the dither amplitude here appears to be too small by 8 bits
710 double dither = PaUtil_GenerateFloatTriangularDither( ditherGenerator );
711 // use smaller scaler to prevent overflow when we add the dither
712 double dithered = ((double)*src * (2147483646.0)) + dither;
713 PA_CLIP_( dithered, -2147483648., 2147483647. );
714
715 temp = (signed long) dithered;
716
717 dest[0] = (unsigned char)(temp >> 8);
718 dest[1] = (unsigned char)(temp >> 16);
719 dest[2] = (unsigned char)(temp >> 24);
720
721 src += sourceStride;
722 dest += destinationStride * 3;
723 }
724 */
725
726 short savedFpuControlWord;
727
728 // spill storage:
729 signed long sourceByteStride;
730 signed long highpassedDither;
731
732 // dither state:
733 unsigned long ditherPrevious = ditherGenerator->previous;
734 unsigned long ditherRandSeed1 = ditherGenerator->randSeed1;
735 unsigned long ditherRandSeed2 = ditherGenerator->randSeed2;
736
737 signed long tempInt32;
738
739 __asm{
740 // esi -> source ptr
741 // eax -> source byte stride
742 // edi -> destination ptr
743 // ebx -> destination byte stride
744 // ecx -> source end ptr
745 // edx -> temp
746
747 mov esi, sourceBuffer
748
749 mov edx, 4 // sizeof float32
750 mov eax, sourceStride
751 imul eax, edx
752
753 mov ecx, count
754 imul ecx, eax
755 add ecx, esi
756
757 mov edi, destinationBuffer
758
759 mov edx, 3 // sizeof int24
760 mov ebx, destinationStride
761 imul ebx, edx
762
763 fwait
764 fstcw savedFpuControlWord
765 fldcw fpuControlWord_
766
767 fld ditheredInt24Scaler_ // stack: int scaler
768
769 Float32_To_Int24_DitherClip_loop:
770
771 mov edx, dword ptr [esi] // load floating point value into integer register
772
773 and edx, 0x7FFFFFFF // mask off sign
774 cmp edx, 0x3F800000 // greater than 1.0 or less than -1.0
775
776 jg Float32_To_Int24_DitherClip_clamp
777
778 // load unscaled value into st(0)
779 fld dword ptr [esi] // stack: value, int scaler
780 add esi, eax // increment source ptr
781 //lea esi, [esi+eax]
782 fmul st(0), st(1) // st(0) *= st(1), stack: value*(int scaler), int scaler
783
784 /*
785 // call PaUtil_GenerateFloatTriangularDither with C calling convention
786 mov sourceByteStride, eax // save eax
787 mov sourceEnd, ecx // save ecx
788 push ditherGenerator // pass ditherGenerator parameter on stack
789 call PaUtil_GenerateFloatTriangularDither // stack: dither, value*(int scaler), int scaler
790 pop edx // clear parameter off stack
791 mov ecx, sourceEnd // restore ecx
792 mov eax, sourceByteStride // restore eax
793 */
794
795 // generate dither
796 mov sourceByteStride, eax // save eax
797 mov edx, 196314165
798 mov eax, ditherRandSeed1
799 mul edx // eax:edx = eax * 196314165
800 //add eax, 907633515
801 lea eax, [eax+907633515]
802 mov ditherRandSeed1, eax
803 mov edx, 196314165
804 mov eax, ditherRandSeed2
805 mul edx // eax:edx = eax * 196314165
806 //add eax, 907633515
807 lea eax, [eax+907633515]
808 mov edx, ditherRandSeed1
809 shr edx, PA_DITHER_SHIFT_
810 mov ditherRandSeed2, eax
811 shr eax, PA_DITHER_SHIFT_
812 //add eax, edx // eax -> current
813 lea eax, [eax+edx]
814 mov edx, ditherPrevious
815 neg edx
816 lea edx, [eax+edx] // highpass = current - previous
817 mov highpassedDither, edx
818 mov ditherPrevious, eax // previous = current
819 mov eax, sourceByteStride // restore eax
820 fild highpassedDither
821 fmul const_float_dither_scale_
822 // end generate dither, dither signal in st(0)
823
824 faddp st(1), st(0) // stack: dither * value*(int scaler), int scaler
825 fistp tempInt32 // pop st(0) into tempInt32, stack: int scaler
826 mov edx, tempInt32
827 jmp Float32_To_Int24_DitherClip_store
828
829 Float32_To_Int24_DitherClip_clamp:
830 mov edx, dword ptr [esi] // load floating point value into integer register
831 shr edx, 31 // move sign bit into bit 0
832 add esi, eax // increment source ptr
833 //lea esi, [esi+eax]
834 add edx, 0x7FFFFF // convert to maximum range integers
835
836 Float32_To_Int24_DitherClip_store:
837
838 mov byte ptr [edi], DL
839 shr edx, 8
840 //mov byte ptr [edi+1], DL
841 //mov byte ptr [edi+2], DH
842 mov word ptr [edi+1], DX
843
844 //add edi, ebx // increment destination ptr
845 lea edi, [edi+ebx]
846
847 cmp esi, ecx // has src ptr reached end?
848 jne Float32_To_Int24_DitherClip_loop
849
850 ffree st(0)
851 fincstp
852
853 fwait
854 fnclex
855 fldcw savedFpuControlWord
856 }
857
858 ditherGenerator->previous = ditherPrevious;
859 ditherGenerator->randSeed1 = ditherRandSeed1;
860 ditherGenerator->randSeed2 = ditherRandSeed2;
861 }
862
863 /* -------------------------------------------------------------------------- */
864
Float32_To_Int16(void * destinationBuffer,signed int destinationStride,void * sourceBuffer,signed int sourceStride,unsigned int count,PaUtilTriangularDitherGenerator * ditherGenerator)865 static void Float32_To_Int16(
866 void *destinationBuffer, signed int destinationStride,
867 void *sourceBuffer, signed int sourceStride,
868 unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
869 {
870 /*
871 float *src = (float*)sourceBuffer;
872 signed short *dest = (signed short*)destinationBuffer;
873 (void)ditherGenerator; // unused parameter
874
875 while( count-- )
876 {
877
878 short samp = (short) (*src * (32767.0f));
879 *dest = samp;
880
881 src += sourceStride;
882 dest += destinationStride;
883 }
884 */
885
886 short savedFpuControlWord;
887
888 (void) ditherGenerator; /* unused parameter */
889
890 __asm{
891 // esi -> source ptr
892 // eax -> source byte stride
893 // edi -> destination ptr
894 // ebx -> destination byte stride
895 // ecx -> source end ptr
896 // edx -> temp
897
898 mov esi, sourceBuffer
899
900 mov edx, 4 // sizeof float32
901 mov eax, sourceStride
902 imul eax, edx // source byte stride
903
904 mov ecx, count
905 imul ecx, eax
906 add ecx, esi // source end ptr = count * source byte stride + source ptr
907
908 mov edi, destinationBuffer
909
910 mov edx, 2 // sizeof int16
911 mov ebx, destinationStride
912 imul ebx, edx // destination byte stride
913
914 fwait
915 fstcw savedFpuControlWord
916 fldcw fpuControlWord_
917
918 fld int16Scaler_ // stack: (int)0x7FFF
919
920 Float32_To_Int16_loop:
921
922 // load unscaled value into st(0)
923 fld dword ptr [esi] // stack: value, (int)0x7FFF
924 add esi, eax // increment source ptr
925 //lea esi, [esi+eax]
926 fmul st(0), st(1) // st(0) *= st(1), stack: value*0x7FFF, (int)0x7FFF
927 fistp word ptr [edi] // store scaled int into dest, stack: (int)0x7FFF
928
929 add edi, ebx // increment destination ptr
930 //lea edi, [edi+ebx]
931
932 cmp esi, ecx // has src ptr reached end?
933 jne Float32_To_Int16_loop
934
935 ffree st(0)
936 fincstp
937
938 fwait
939 fnclex
940 fldcw savedFpuControlWord
941 }
942 }
943
944 /* -------------------------------------------------------------------------- */
945
Float32_To_Int16_Clip(void * destinationBuffer,signed int destinationStride,void * sourceBuffer,signed int sourceStride,unsigned int count,PaUtilTriangularDitherGenerator * ditherGenerator)946 static void Float32_To_Int16_Clip(
947 void *destinationBuffer, signed int destinationStride,
948 void *sourceBuffer, signed int sourceStride,
949 unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
950 {
951 /*
952 float *src = (float*)sourceBuffer;
953 signed short *dest = (signed short*)destinationBuffer;
954 (void)ditherGenerator; // unused parameter
955
956 while( count-- )
957 {
958 long samp = (signed long) (*src * (32767.0f));
959 PA_CLIP_( samp, -0x8000, 0x7FFF );
960 *dest = (signed short) samp;
961
962 src += sourceStride;
963 dest += destinationStride;
964 }
965 */
966
967 short savedFpuControlWord;
968
969 (void) ditherGenerator; /* unused parameter */
970
971 __asm{
972 // esi -> source ptr
973 // eax -> source byte stride
974 // edi -> destination ptr
975 // ebx -> destination byte stride
976 // ecx -> source end ptr
977 // edx -> temp
978
979 mov esi, sourceBuffer
980
981 mov edx, 4 // sizeof float32
982 mov eax, sourceStride
983 imul eax, edx // source byte stride
984
985 mov ecx, count
986 imul ecx, eax
987 add ecx, esi // source end ptr = count * source byte stride + source ptr
988
989 mov edi, destinationBuffer
990
991 mov edx, 2 // sizeof int16
992 mov ebx, destinationStride
993 imul ebx, edx // destination byte stride
994
995 fwait
996 fstcw savedFpuControlWord
997 fldcw fpuControlWord_
998
999 fld int16Scaler_ // stack: (int)0x7FFF
1000
1001 Float32_To_Int16_Clip_loop:
1002
1003 mov edx, dword ptr [esi] // load floating point value into integer register
1004
1005 and edx, 0x7FFFFFFF // mask off sign
1006 cmp edx, 0x3F800000 // greater than 1.0 or less than -1.0
1007
1008 jg Float32_To_Int16_Clip_clamp
1009
1010 // load unscaled value into st(0)
1011 fld dword ptr [esi] // stack: value, (int)0x7FFF
1012 add esi, eax // increment source ptr
1013 //lea esi, [esi+eax]
1014 fmul st(0), st(1) // st(0) *= st(1), stack: value*0x7FFF, (int)0x7FFF
1015 fistp word ptr [edi] // store scaled int into dest, stack: (int)0x7FFF
1016 jmp Float32_To_Int16_Clip_stored
1017
1018 Float32_To_Int16_Clip_clamp:
1019 mov edx, dword ptr [esi] // load floating point value into integer register
1020 shr edx, 31 // move sign bit into bit 0
1021 add esi, eax // increment source ptr
1022 //lea esi, [esi+eax]
1023 add dx, 0x7FFF // convert to maximum range integers
1024 mov word ptr [edi], dx // store clamped into into dest
1025
1026 Float32_To_Int16_Clip_stored:
1027
1028 add edi, ebx // increment destination ptr
1029 //lea edi, [edi+ebx]
1030
1031 cmp esi, ecx // has src ptr reached end?
1032 jne Float32_To_Int16_Clip_loop
1033
1034 ffree st(0)
1035 fincstp
1036
1037 fwait
1038 fnclex
1039 fldcw savedFpuControlWord
1040 }
1041 }
1042
1043 /* -------------------------------------------------------------------------- */
1044
Float32_To_Int16_DitherClip(void * destinationBuffer,signed int destinationStride,void * sourceBuffer,signed int sourceStride,unsigned int count,PaUtilTriangularDitherGenerator * ditherGenerator)1045 static void Float32_To_Int16_DitherClip(
1046 void *destinationBuffer, signed int destinationStride,
1047 void *sourceBuffer, signed int sourceStride,
1048 unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
1049 {
1050 /*
1051 float *src = (float*)sourceBuffer;
1052 signed short *dest = (signed short*)destinationBuffer;
1053 (void)ditherGenerator; // unused parameter
1054
1055 while( count-- )
1056 {
1057
1058 float dither = PaUtil_GenerateFloatTriangularDither( ditherGenerator );
1059 // use smaller scaler to prevent overflow when we add the dither
1060 float dithered = (*src * (32766.0f)) + dither;
1061 signed long samp = (signed long) dithered;
1062 PA_CLIP_( samp, -0x8000, 0x7FFF );
1063 *dest = (signed short) samp;
1064
1065 src += sourceStride;
1066 dest += destinationStride;
1067 }
1068 */
1069
1070 short savedFpuControlWord;
1071
1072 // spill storage:
1073 signed long sourceByteStride;
1074 signed long highpassedDither;
1075
1076 // dither state:
1077 unsigned long ditherPrevious = ditherGenerator->previous;
1078 unsigned long ditherRandSeed1 = ditherGenerator->randSeed1;
1079 unsigned long ditherRandSeed2 = ditherGenerator->randSeed2;
1080
1081 __asm{
1082 // esi -> source ptr
1083 // eax -> source byte stride
1084 // edi -> destination ptr
1085 // ebx -> destination byte stride
1086 // ecx -> source end ptr
1087 // edx -> temp
1088
1089 mov esi, sourceBuffer
1090
1091 mov edx, 4 // sizeof float32
1092 mov eax, sourceStride
1093 imul eax, edx // source byte stride
1094
1095 mov ecx, count
1096 imul ecx, eax
1097 add ecx, esi // source end ptr = count * source byte stride + source ptr
1098
1099 mov edi, destinationBuffer
1100
1101 mov edx, 2 // sizeof int16
1102 mov ebx, destinationStride
1103 imul ebx, edx // destination byte stride
1104
1105 fwait
1106 fstcw savedFpuControlWord
1107 fldcw fpuControlWord_
1108
1109 fld ditheredInt16Scaler_ // stack: int scaler
1110
1111 Float32_To_Int16_DitherClip_loop:
1112
1113 mov edx, dword ptr [esi] // load floating point value into integer register
1114
1115 and edx, 0x7FFFFFFF // mask off sign
1116 cmp edx, 0x3F800000 // greater than 1.0 or less than -1.0
1117
1118 jg Float32_To_Int16_DitherClip_clamp
1119
1120 // load unscaled value into st(0)
1121 fld dword ptr [esi] // stack: value, int scaler
1122 add esi, eax // increment source ptr
1123 //lea esi, [esi+eax]
1124 fmul st(0), st(1) // st(0) *= st(1), stack: value*(int scaler), int scaler
1125
1126 /*
1127 // call PaUtil_GenerateFloatTriangularDither with C calling convention
1128 mov sourceByteStride, eax // save eax
1129 mov sourceEnd, ecx // save ecx
1130 push ditherGenerator // pass ditherGenerator parameter on stack
1131 call PaUtil_GenerateFloatTriangularDither // stack: dither, value*(int scaler), int scaler
1132 pop edx // clear parameter off stack
1133 mov ecx, sourceEnd // restore ecx
1134 mov eax, sourceByteStride // restore eax
1135 */
1136
1137 // generate dither
1138 mov sourceByteStride, eax // save eax
1139 mov edx, 196314165
1140 mov eax, ditherRandSeed1
1141 mul edx // eax:edx = eax * 196314165
1142 //add eax, 907633515
1143 lea eax, [eax+907633515]
1144 mov ditherRandSeed1, eax
1145 mov edx, 196314165
1146 mov eax, ditherRandSeed2
1147 mul edx // eax:edx = eax * 196314165
1148 //add eax, 907633515
1149 lea eax, [eax+907633515]
1150 mov edx, ditherRandSeed1
1151 shr edx, PA_DITHER_SHIFT_
1152 mov ditherRandSeed2, eax
1153 shr eax, PA_DITHER_SHIFT_
1154 //add eax, edx // eax -> current
1155 lea eax, [eax+edx] // current = randSeed1>>x + randSeed2>>x
1156 mov edx, ditherPrevious
1157 neg edx
1158 lea edx, [eax+edx] // highpass = current - previous
1159 mov highpassedDither, edx
1160 mov ditherPrevious, eax // previous = current
1161 mov eax, sourceByteStride // restore eax
1162 fild highpassedDither
1163 fmul const_float_dither_scale_
1164 // end generate dither, dither signal in st(0)
1165
1166 faddp st(1), st(0) // stack: dither * value*(int scaler), int scaler
1167 fistp word ptr [edi] // store scaled int into dest, stack: int scaler
1168 jmp Float32_To_Int16_DitherClip_stored
1169
1170 Float32_To_Int16_DitherClip_clamp:
1171 mov edx, dword ptr [esi] // load floating point value into integer register
1172 shr edx, 31 // move sign bit into bit 0
1173 add esi, eax // increment source ptr
1174 //lea esi, [esi+eax]
1175 add dx, 0x7FFF // convert to maximum range integers
1176 mov word ptr [edi], dx // store clamped into into dest
1177
1178 Float32_To_Int16_DitherClip_stored:
1179
1180 add edi, ebx // increment destination ptr
1181 //lea edi, [edi+ebx]
1182
1183 cmp esi, ecx // has src ptr reached end?
1184 jne Float32_To_Int16_DitherClip_loop
1185
1186 ffree st(0)
1187 fincstp
1188
1189 fwait
1190 fnclex
1191 fldcw savedFpuControlWord
1192 }
1193
1194 ditherGenerator->previous = ditherPrevious;
1195 ditherGenerator->randSeed1 = ditherRandSeed1;
1196 ditherGenerator->randSeed2 = ditherRandSeed2;
1197 }
1198
1199 /* -------------------------------------------------------------------------- */
1200
PaUtil_InitializeX86PlainConverters(void)1201 void PaUtil_InitializeX86PlainConverters( void )
1202 {
1203 paConverters.Float32_To_Int32 = Float32_To_Int32;
1204 paConverters.Float32_To_Int32_Clip = Float32_To_Int32_Clip;
1205 paConverters.Float32_To_Int32_DitherClip = Float32_To_Int32_DitherClip;
1206
1207 paConverters.Float32_To_Int24 = Float32_To_Int24;
1208 paConverters.Float32_To_Int24_Clip = Float32_To_Int24_Clip;
1209 paConverters.Float32_To_Int24_DitherClip = Float32_To_Int24_DitherClip;
1210
1211 paConverters.Float32_To_Int16 = Float32_To_Int16;
1212 paConverters.Float32_To_Int16_Clip = Float32_To_Int16_Clip;
1213 paConverters.Float32_To_Int16_DitherClip = Float32_To_Int16_DitherClip;
1214 }
1215
1216 #endif
1217
1218 /* -------------------------------------------------------------------------- */
1219