1 /* 2 synth_s32.c: The functions for synthesizing real (float) samples, at the end of decoding. 3 4 copyright 1995-2008 by the mpg123 project - free software under the terms of the LGPL 2.1 5 see COPYING and AUTHORS files in distribution or http://mpg123.org 6 initially written by Michael Hipp, heavily dissected and rearranged by Thomas Orgis 7 */ 8 9 #include "mpg123lib_intern.h" 10 #include "sample.h" 11 #include "debug.h" 12 13 #ifdef REAL_IS_FIXED 14 #error "Do not build this file with fixed point math!" 15 #else 16 /* 17 Part 4: All synth functions that produce signed 32 bit output. 18 What we need is just a special WRITE_SAMPLE. 19 */ 20 21 #define SAMPLE_T int32_t 22 #define WRITE_SAMPLE(samples,sum,clip) WRITE_S32_SAMPLE(samples,sum,clip) 23 24 /* Part 4a: All straight 1to1 decoding functions */ 25 #define BLOCK 0x40 /* One decoding block is 64 samples. */ 26 27 #define SYNTH_NAME synth_1to1_s32 28 #include "synth.h" 29 #undef SYNTH_NAME 30 31 /* Mono-related synths; they wrap over _some_ synth_1to1_s32 (could be generic, could be i386). */ 32 #define SYNTH_NAME fr->synths.plain[r_1to1][f_32] 33 #define MONO_NAME synth_1to1_s32_mono 34 #define MONO2STEREO_NAME synth_1to1_s32_m2s 35 #include "synth_mono.h" 36 #undef SYNTH_NAME 37 #undef MONO_NAME 38 #undef MONO2STEREO_NAME 39 40 #ifdef OPT_X86 41 #define NO_AUTOINCREMENT 42 #define SYNTH_NAME synth_1to1_s32_i386 43 #include "synth.h" 44 #undef SYNTH_NAME 45 /* i386 uses the normal mono functions. */ 46 #undef NO_AUTOINCREMENT 47 #endif 48 49 #ifdef OPT_X86_64 50 /* Assembler routines. */ 51 int synth_1to1_s32_x86_64_asm(real *window, real *b0, int32_t *samples, int bo1); 52 int synth_1to1_s32_s_x86_64_asm(real *window, real *b0l, real *b0r, int32_t *samples, int bo1); 53 void dct64_real_x86_64(real *out0, real *out1, real *samples); 54 /* Hull for C mpg123 API */ 55 int synth_1to1_s32_x86_64(real *bandPtr,int channel, mpg123_handle *fr, int final) 56 { 57 int32_t *samples = (int32_t *) (fr->buffer.data+fr->buffer.fill); 58 59 real *b0, **buf; 60 int bo1; 61 int clip; 62 #ifndef NO_EQUALIZER 63 if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer); 64 #endif 65 if(!channel) 66 { 67 fr->bo--; 68 fr->bo &= 0xf; 69 buf = fr->real_buffs[0]; 70 } 71 else 72 { 73 samples++; 74 buf = fr->real_buffs[1]; 75 } 76 77 if(fr->bo & 0x1) 78 { 79 b0 = buf[0]; 80 bo1 = fr->bo; 81 dct64_real_x86_64(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr); 82 } 83 else 84 { 85 b0 = buf[1]; 86 bo1 = fr->bo+1; 87 dct64_real_x86_64(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr); 88 } 89 90 clip = synth_1to1_s32_x86_64_asm(fr->decwin, b0, samples, bo1); 91 92 if(final) fr->buffer.fill += 256; 93 94 return clip; 95 } 96 97 98 int synth_1to1_s32_stereo_x86_64(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr) 99 { 100 int32_t *samples = (int32_t *) (fr->buffer.data+fr->buffer.fill); 101 102 real *b0l, *b0r, **bufl, **bufr; 103 int bo1; 104 int clip; 105 #ifndef NO_EQUALIZER 106 if(fr->have_eq_settings) 107 { 108 do_equalizer(bandPtr_l,0,fr->equalizer); 109 do_equalizer(bandPtr_r,1,fr->equalizer); 110 } 111 #endif 112 fr->bo--; 113 fr->bo &= 0xf; 114 bufl = fr->real_buffs[0]; 115 bufr = fr->real_buffs[1]; 116 117 if(fr->bo & 0x1) 118 { 119 b0l = bufl[0]; 120 b0r = bufr[0]; 121 bo1 = fr->bo; 122 dct64_real_x86_64(bufl[1]+((fr->bo+1)&0xf),bufl[0]+fr->bo,bandPtr_l); 123 dct64_real_x86_64(bufr[1]+((fr->bo+1)&0xf),bufr[0]+fr->bo,bandPtr_r); 124 } 125 else 126 { 127 b0l = bufl[1]; 128 b0r = bufr[1]; 129 bo1 = fr->bo+1; 130 dct64_real_x86_64(bufl[0]+fr->bo,bufl[1]+fr->bo+1,bandPtr_l); 131 dct64_real_x86_64(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r); 132 } 133 134 clip = synth_1to1_s32_s_x86_64_asm(fr->decwin, b0l, b0r, samples, bo1); 135 136 fr->buffer.fill += 256; 137 138 return clip; 139 } 140 #endif 141 142 #ifdef OPT_AVX 143 /* Assembler routines. */ 144 #ifndef OPT_x86_64 145 int synth_1to1_s32_x86_64_asm(real *window, real *b0, int32_t *samples, int bo1); 146 #endif 147 int synth_1to1_s32_s_avx_asm(real *window, real *b0l, real *b0r, int32_t *samples, int bo1); 148 void dct64_real_avx(real *out0, real *out1, real *samples); 149 /* Hull for C mpg123 API */ 150 int synth_1to1_s32_avx(real *bandPtr,int channel, mpg123_handle *fr, int final) 151 { 152 int32_t *samples = (int32_t *) (fr->buffer.data+fr->buffer.fill); 153 154 real *b0, **buf; 155 int bo1; 156 int clip; 157 #ifndef NO_EQUALIZER 158 if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer); 159 #endif 160 if(!channel) 161 { 162 fr->bo--; 163 fr->bo &= 0xf; 164 buf = fr->real_buffs[0]; 165 } 166 else 167 { 168 samples++; 169 buf = fr->real_buffs[1]; 170 } 171 172 if(fr->bo & 0x1) 173 { 174 b0 = buf[0]; 175 bo1 = fr->bo; 176 dct64_real_avx(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr); 177 } 178 else 179 { 180 b0 = buf[1]; 181 bo1 = fr->bo+1; 182 dct64_real_avx(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr); 183 } 184 185 clip = synth_1to1_s32_x86_64_asm(fr->decwin, b0, samples, bo1); 186 187 if(final) fr->buffer.fill += 256; 188 189 return clip; 190 } 191 192 193 int synth_1to1_s32_stereo_avx(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr) 194 { 195 int32_t *samples = (int32_t *) (fr->buffer.data+fr->buffer.fill); 196 197 real *b0l, *b0r, **bufl, **bufr; 198 int bo1; 199 int clip; 200 #ifndef NO_EQUALIZER 201 if(fr->have_eq_settings) 202 { 203 do_equalizer(bandPtr_l,0,fr->equalizer); 204 do_equalizer(bandPtr_r,1,fr->equalizer); 205 } 206 #endif 207 fr->bo--; 208 fr->bo &= 0xf; 209 bufl = fr->real_buffs[0]; 210 bufr = fr->real_buffs[1]; 211 212 if(fr->bo & 0x1) 213 { 214 b0l = bufl[0]; 215 b0r = bufr[0]; 216 bo1 = fr->bo; 217 dct64_real_avx(bufl[1]+((fr->bo+1)&0xf),bufl[0]+fr->bo,bandPtr_l); 218 dct64_real_avx(bufr[1]+((fr->bo+1)&0xf),bufr[0]+fr->bo,bandPtr_r); 219 } 220 else 221 { 222 b0l = bufl[1]; 223 b0r = bufr[1]; 224 bo1 = fr->bo+1; 225 dct64_real_avx(bufl[0]+fr->bo,bufl[1]+fr->bo+1,bandPtr_l); 226 dct64_real_avx(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r); 227 } 228 229 clip = synth_1to1_s32_s_avx_asm(fr->decwin, b0l, b0r, samples, bo1); 230 231 fr->buffer.fill += 256; 232 233 return clip; 234 } 235 #endif 236 237 #if defined(OPT_SSE) || defined(OPT_SSE_VINTAGE) 238 /* Assembler routines. */ 239 int synth_1to1_s32_sse_asm(real *window, real *b0, int32_t *samples, int bo1); 240 int synth_1to1_s32_s_sse_asm(real *window, real *b0l, real *b0r, int32_t *samples, int bo1); 241 void dct64_real_sse(real *out0, real *out1, real *samples); 242 /* Hull for C mpg123 API */ 243 int synth_1to1_s32_sse(real *bandPtr,int channel, mpg123_handle *fr, int final) 244 { 245 int32_t *samples = (int32_t *) (fr->buffer.data+fr->buffer.fill); 246 247 real *b0, **buf; 248 int bo1; 249 int clip; 250 #ifndef NO_EQUALIZER 251 if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer); 252 #endif 253 if(!channel) 254 { 255 fr->bo--; 256 fr->bo &= 0xf; 257 buf = fr->real_buffs[0]; 258 } 259 else 260 { 261 samples++; 262 buf = fr->real_buffs[1]; 263 } 264 265 if(fr->bo & 0x1) 266 { 267 b0 = buf[0]; 268 bo1 = fr->bo; 269 dct64_real_sse(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr); 270 } 271 else 272 { 273 b0 = buf[1]; 274 bo1 = fr->bo+1; 275 dct64_real_sse(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr); 276 } 277 278 clip = synth_1to1_s32_sse_asm(fr->decwin, b0, samples, bo1); 279 280 if(final) fr->buffer.fill += 256; 281 282 return clip; 283 } 284 285 286 int synth_1to1_s32_stereo_sse(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr) 287 { 288 int32_t *samples = (int32_t *) (fr->buffer.data+fr->buffer.fill); 289 290 real *b0l, *b0r, **bufl, **bufr; 291 int bo1; 292 int clip; 293 #ifndef NO_EQUALIZER 294 if(fr->have_eq_settings) 295 { 296 do_equalizer(bandPtr_l,0,fr->equalizer); 297 do_equalizer(bandPtr_r,1,fr->equalizer); 298 } 299 #endif 300 fr->bo--; 301 fr->bo &= 0xf; 302 bufl = fr->real_buffs[0]; 303 bufr = fr->real_buffs[1]; 304 305 if(fr->bo & 0x1) 306 { 307 b0l = bufl[0]; 308 b0r = bufr[0]; 309 bo1 = fr->bo; 310 dct64_real_sse(bufl[1]+((fr->bo+1)&0xf),bufl[0]+fr->bo,bandPtr_l); 311 dct64_real_sse(bufr[1]+((fr->bo+1)&0xf),bufr[0]+fr->bo,bandPtr_r); 312 } 313 else 314 { 315 b0l = bufl[1]; 316 b0r = bufr[1]; 317 bo1 = fr->bo+1; 318 dct64_real_sse(bufl[0]+fr->bo,bufl[1]+fr->bo+1,bandPtr_l); 319 dct64_real_sse(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r); 320 } 321 322 clip = synth_1to1_s32_s_sse_asm(fr->decwin, b0l, b0r, samples, bo1); 323 324 fr->buffer.fill += 256; 325 326 return clip; 327 } 328 #endif 329 330 #ifdef OPT_NEON 331 /* Assembler routines. */ 332 int synth_1to1_s32_neon_asm(real *window, real *b0, int32_t *samples, int bo1); 333 int synth_1to1_s32_s_neon_asm(real *window, real *b0l, real *b0r, int32_t *samples, int bo1); 334 void dct64_real_neon(real *out0, real *out1, real *samples); 335 /* Hull for C mpg123 API */ 336 int synth_1to1_s32_neon(real *bandPtr,int channel, mpg123_handle *fr, int final) 337 { 338 int32_t *samples = (int32_t *) (fr->buffer.data+fr->buffer.fill); 339 340 real *b0, **buf; 341 int bo1; 342 int clip; 343 #ifndef NO_EQUALIZER 344 if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer); 345 #endif 346 if(!channel) 347 { 348 fr->bo--; 349 fr->bo &= 0xf; 350 buf = fr->real_buffs[0]; 351 } 352 else 353 { 354 samples++; 355 buf = fr->real_buffs[1]; 356 } 357 358 if(fr->bo & 0x1) 359 { 360 b0 = buf[0]; 361 bo1 = fr->bo; 362 dct64_real_neon(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr); 363 } 364 else 365 { 366 b0 = buf[1]; 367 bo1 = fr->bo+1; 368 dct64_real_neon(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr); 369 } 370 371 clip = synth_1to1_s32_neon_asm(fr->decwin, b0, samples, bo1); 372 373 if(final) fr->buffer.fill += 256; 374 375 return clip; 376 } 377 378 int synth_1to1_s32_stereo_neon(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr) 379 { 380 int32_t *samples = (int32_t *) (fr->buffer.data+fr->buffer.fill); 381 382 real *b0l, *b0r, **bufl, **bufr; 383 int bo1; 384 int clip; 385 #ifndef NO_EQUALIZER 386 if(fr->have_eq_settings) 387 { 388 do_equalizer(bandPtr_l,0,fr->equalizer); 389 do_equalizer(bandPtr_r,1,fr->equalizer); 390 } 391 #endif 392 fr->bo--; 393 fr->bo &= 0xf; 394 bufl = fr->real_buffs[0]; 395 bufr = fr->real_buffs[1]; 396 397 if(fr->bo & 0x1) 398 { 399 b0l = bufl[0]; 400 b0r = bufr[0]; 401 bo1 = fr->bo; 402 dct64_real_neon(bufl[1]+((fr->bo+1)&0xf),bufl[0]+fr->bo,bandPtr_l); 403 dct64_real_neon(bufr[1]+((fr->bo+1)&0xf),bufr[0]+fr->bo,bandPtr_r); 404 } 405 else 406 { 407 b0l = bufl[1]; 408 b0r = bufr[1]; 409 bo1 = fr->bo+1; 410 dct64_real_neon(bufl[0]+fr->bo,bufl[1]+fr->bo+1,bandPtr_l); 411 dct64_real_neon(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r); 412 } 413 414 clip = synth_1to1_s32_s_neon_asm(fr->decwin, b0l, b0r, samples, bo1); 415 416 fr->buffer.fill += 256; 417 418 return clip; 419 } 420 #endif 421 422 #ifdef OPT_NEON64 423 /* Assembler routines. */ 424 int synth_1to1_s32_neon64_asm(real *window, real *b0, int32_t *samples, int bo1); 425 int synth_1to1_s32_s_neon64_asm(real *window, real *b0l, real *b0r, int32_t *samples, int bo1); 426 void dct64_real_neon64(real *out0, real *out1, real *samples); 427 /* Hull for C mpg123 API */ 428 int synth_1to1_s32_neon64(real *bandPtr,int channel, mpg123_handle *fr, int final) 429 { 430 int32_t *samples = (int32_t *) (fr->buffer.data+fr->buffer.fill); 431 432 real *b0, **buf; 433 int bo1; 434 int clip; 435 #ifndef NO_EQUALIZER 436 if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer); 437 #endif 438 if(!channel) 439 { 440 fr->bo--; 441 fr->bo &= 0xf; 442 buf = fr->real_buffs[0]; 443 } 444 else 445 { 446 samples++; 447 buf = fr->real_buffs[1]; 448 } 449 450 if(fr->bo & 0x1) 451 { 452 b0 = buf[0]; 453 bo1 = fr->bo; 454 dct64_real_neon64(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr); 455 } 456 else 457 { 458 b0 = buf[1]; 459 bo1 = fr->bo+1; 460 dct64_real_neon64(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr); 461 } 462 463 clip = synth_1to1_s32_neon64_asm(fr->decwin, b0, samples, bo1); 464 465 if(final) fr->buffer.fill += 256; 466 467 return clip; 468 } 469 470 int synth_1to1_s32st_neon64(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr) 471 { 472 int32_t *samples = (int32_t *) (fr->buffer.data+fr->buffer.fill); 473 474 real *b0l, *b0r, **bufl, **bufr; 475 int bo1; 476 int clip; 477 #ifndef NO_EQUALIZER 478 if(fr->have_eq_settings) 479 { 480 do_equalizer(bandPtr_l,0,fr->equalizer); 481 do_equalizer(bandPtr_r,1,fr->equalizer); 482 } 483 #endif 484 fr->bo--; 485 fr->bo &= 0xf; 486 bufl = fr->real_buffs[0]; 487 bufr = fr->real_buffs[1]; 488 489 if(fr->bo & 0x1) 490 { 491 b0l = bufl[0]; 492 b0r = bufr[0]; 493 bo1 = fr->bo; 494 dct64_real_neon64(bufl[1]+((fr->bo+1)&0xf),bufl[0]+fr->bo,bandPtr_l); 495 dct64_real_neon64(bufr[1]+((fr->bo+1)&0xf),bufr[0]+fr->bo,bandPtr_r); 496 } 497 else 498 { 499 b0l = bufl[1]; 500 b0r = bufr[1]; 501 bo1 = fr->bo+1; 502 dct64_real_neon64(bufl[0]+fr->bo,bufl[1]+fr->bo+1,bandPtr_l); 503 dct64_real_neon64(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r); 504 } 505 506 clip = synth_1to1_s32_s_neon64_asm(fr->decwin, b0l, b0r, samples, bo1); 507 508 fr->buffer.fill += 256; 509 510 return clip; 511 } 512 #endif 513 514 #undef BLOCK 515 516 #ifndef NO_DOWNSAMPLE 517 518 /* 519 Part 4b: 2to1 synth. Only generic and i386. 520 */ 521 #define BLOCK 0x20 /* One decoding block is 32 samples. */ 522 523 #define SYNTH_NAME synth_2to1_s32 524 #include "synth.h" 525 #undef SYNTH_NAME 526 527 /* Mono-related synths; they wrap over _some_ synth_2to1_s32 (could be generic, could be i386). */ 528 #define SYNTH_NAME fr->synths.plain[r_2to1][f_32] 529 #define MONO_NAME synth_2to1_s32_mono 530 #define MONO2STEREO_NAME synth_2to1_s32_m2s 531 #include "synth_mono.h" 532 #undef SYNTH_NAME 533 #undef MONO_NAME 534 #undef MONO2STEREO_NAME 535 536 #ifdef OPT_X86 537 #define NO_AUTOINCREMENT 538 #define SYNTH_NAME synth_2to1_s32_i386 539 #include "synth.h" 540 #undef SYNTH_NAME 541 /* i386 uses the normal mono functions. */ 542 #undef NO_AUTOINCREMENT 543 #endif 544 545 #undef BLOCK 546 547 /* 548 Part 4c: 4to1 synth. Only generic and i386. 549 */ 550 #define BLOCK 0x10 /* One decoding block is 16 samples. */ 551 552 #define SYNTH_NAME synth_4to1_s32 553 #include "synth.h" 554 #undef SYNTH_NAME 555 556 /* Mono-related synths; they wrap over _some_ synth_4to1_s32 (could be generic, could be i386). */ 557 #define SYNTH_NAME fr->synths.plain[r_4to1][f_32] 558 #define MONO_NAME synth_4to1_s32_mono 559 #define MONO2STEREO_NAME synth_4to1_s32_m2s 560 #include "synth_mono.h" 561 #undef SYNTH_NAME 562 #undef MONO_NAME 563 #undef MONO2STEREO_NAME 564 565 #ifdef OPT_X86 566 #define NO_AUTOINCREMENT 567 #define SYNTH_NAME synth_4to1_s32_i386 568 #include "synth.h" 569 #undef SYNTH_NAME 570 /* i386 uses the normal mono functions. */ 571 #undef NO_AUTOINCREMENT 572 #endif 573 574 #undef BLOCK 575 576 #endif /* NO_DOWNSAMPLE */ 577 578 #ifndef NO_NTOM 579 /* 580 Part 4d: ntom synth. 581 Same procedure as above... Just no extra play anymore, straight synth that may use an optimized dct64. 582 */ 583 584 /* These are all in one header, there's no flexibility to gain. */ 585 #define SYNTH_NAME synth_ntom_s32 586 #define MONO_NAME synth_ntom_s32_mono 587 #define MONO2STEREO_NAME synth_ntom_s32_m2s 588 #include "synth_ntom.h" 589 #undef SYNTH_NAME 590 #undef MONO_NAME 591 #undef MONO2STEREO_NAME 592 593 #endif 594 595 #undef SAMPLE_T 596 #undef WRITE_SAMPLE 597 598 #endif /* non-fixed type */ 599