1;****************************************************************************** 2;* Copyright (c) 2012 Michael Niedermayer 3;* 4;* This file is part of FFmpeg. 5;* 6;* FFmpeg is free software; you can redistribute it and/or 7;* modify it under the terms of the GNU Lesser General Public 8;* License as published by the Free Software Foundation; either 9;* version 2.1 of the License, or (at your option) any later version. 10;* 11;* FFmpeg is distributed in the hope that it will be useful, 12;* but WITHOUT ANY WARRANTY; without even the implied warranty of 13;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14;* Lesser General Public License for more details. 15;* 16;* You should have received a copy of the GNU Lesser General Public 17;* License along with FFmpeg; if not, write to the Free Software 18;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19;****************************************************************************** 20 21%include "libavutil/x86/x86util.asm" 22 23SECTION_RODATA 32 24flt2pm31: times 8 dd 4.6566129e-10 25flt2p31 : times 8 dd 2147483648.0 26flt2p15 : times 8 dd 32768.0 27 28word_unpack_shuf : db 0, 1, 4, 5, 8, 9,12,13, 2, 3, 6, 7,10,11,14,15 29 30SECTION .text 31 32 33;to, from, a/u, log2_outsize, log_intsize, const 34%macro PACK_2CH 5-7 35cglobal pack_2ch_%2_to_%1_%3, 3, 4, 6, dst, src, len, src2 36 mov src2q , [srcq+gprsize] 37 mov srcq , [srcq] 38 mov dstq , [dstq] 39%ifidn %3, a 40 test dstq, mmsize-1 41 jne pack_2ch_%2_to_%1_u_int %+ SUFFIX 42 test srcq, mmsize-1 43 jne pack_2ch_%2_to_%1_u_int %+ SUFFIX 44 test src2q, mmsize-1 45 jne pack_2ch_%2_to_%1_u_int %+ SUFFIX 46%else 47pack_2ch_%2_to_%1_u_int %+ SUFFIX: 48%endif 49 lea srcq , [srcq + (1<<%5)*lenq] 50 lea src2q, [src2q + (1<<%5)*lenq] 51 lea dstq , [dstq + (2<<%4)*lenq] 52 neg lenq 53 %7 m0,m1,m2,m3,m4,m5 54.next: 55%if %4 >= %5 56 mov%3 m0, [ srcq +(1<<%5)*lenq] 57 mova m1, m0 58 mov%3 m2, [ src2q+(1<<%5)*lenq] 59%if %5 == 1 60 punpcklwd m0, m2 61 punpckhwd m1, m2 62%else 63 punpckldq m0, m2 64 punpckhdq m1, m2 65%endif 66 %6 m0,m1,m2,m3,m4,m5 67%else 68 mov%3 m0, [ srcq +(1<<%5)*lenq] 69 mov%3 m1, [mmsize + srcq +(1<<%5)*lenq] 70 mov%3 m2, [ src2q+(1<<%5)*lenq] 71 mov%3 m3, [mmsize + src2q+(1<<%5)*lenq] 72 %6 m0,m1,m2,m3,m4,m5 73 mova m2, m0 74 punpcklwd m0, m1 75 punpckhwd m2, m1 76 SWAP 1,2 77%endif 78 mov%3 [ dstq+(2<<%4)*lenq], m0 79 mov%3 [ mmsize + dstq+(2<<%4)*lenq], m1 80%if %4 > %5 81 mov%3 [2*mmsize + dstq+(2<<%4)*lenq], m2 82 mov%3 [3*mmsize + dstq+(2<<%4)*lenq], m3 83 add lenq, 4*mmsize/(2<<%4) 84%else 85 add lenq, 2*mmsize/(2<<%4) 86%endif 87 jl .next 88 REP_RET 89%endmacro 90 91%macro UNPACK_2CH 5-7 92cglobal unpack_2ch_%2_to_%1_%3, 3, 4, 7, dst, src, len, dst2 93 mov dst2q , [dstq+gprsize] 94 mov srcq , [srcq] 95 mov dstq , [dstq] 96%ifidn %3, a 97 test dstq, mmsize-1 98 jne unpack_2ch_%2_to_%1_u_int %+ SUFFIX 99 test srcq, mmsize-1 100 jne unpack_2ch_%2_to_%1_u_int %+ SUFFIX 101 test dst2q, mmsize-1 102 jne unpack_2ch_%2_to_%1_u_int %+ SUFFIX 103%else 104unpack_2ch_%2_to_%1_u_int %+ SUFFIX: 105%endif 106 lea srcq , [srcq + (2<<%5)*lenq] 107 lea dstq , [dstq + (1<<%4)*lenq] 108 lea dst2q, [dst2q + (1<<%4)*lenq] 109 neg lenq 110 %7 m0,m1,m2,m3,m4,m5 111 mova m6, [word_unpack_shuf] 112.next: 113 mov%3 m0, [ srcq +(2<<%5)*lenq] 114 mov%3 m2, [ mmsize + srcq +(2<<%5)*lenq] 115%if %5 == 1 116%ifidn SUFFIX, _ssse3 117 pshufb m0, m6 118 mova m1, m0 119 pshufb m2, m6 120 punpcklqdq m0,m2 121 punpckhqdq m1,m2 122%else 123 mova m1, m0 124 punpcklwd m0,m2 125 punpckhwd m1,m2 126 127 mova m2, m0 128 punpcklwd m0,m1 129 punpckhwd m2,m1 130 131 mova m1, m0 132 punpcklwd m0,m2 133 punpckhwd m1,m2 134%endif 135%else 136 mova m1, m0 137 shufps m0, m2, 10001000b 138 shufps m1, m2, 11011101b 139%endif 140%if %4 < %5 141 mov%3 m2, [2*mmsize + srcq +(2<<%5)*lenq] 142 mova m3, m2 143 mov%3 m4, [3*mmsize + srcq +(2<<%5)*lenq] 144 shufps m2, m4, 10001000b 145 shufps m3, m4, 11011101b 146 SWAP 1,2 147%endif 148 %6 m0,m1,m2,m3,m4,m5 149 mov%3 [ dstq+(1<<%4)*lenq], m0 150%if %4 > %5 151 mov%3 [ dst2q+(1<<%4)*lenq], m2 152 mov%3 [ mmsize + dstq+(1<<%4)*lenq], m1 153 mov%3 [ mmsize + dst2q+(1<<%4)*lenq], m3 154 add lenq, 2*mmsize/(1<<%4) 155%else 156 mov%3 [ dst2q+(1<<%4)*lenq], m1 157 add lenq, mmsize/(1<<%4) 158%endif 159 jl .next 160 REP_RET 161%endmacro 162 163%macro CONV 5-7 164cglobal %2_to_%1_%3, 3, 3, 6, dst, src, len 165 mov srcq , [srcq] 166 mov dstq , [dstq] 167%ifidn %3, a 168 test dstq, mmsize-1 169 jne %2_to_%1_u_int %+ SUFFIX 170 test srcq, mmsize-1 171 jne %2_to_%1_u_int %+ SUFFIX 172%else 173%2_to_%1_u_int %+ SUFFIX: 174%endif 175 lea srcq , [srcq + (1<<%5)*lenq] 176 lea dstq , [dstq + (1<<%4)*lenq] 177 neg lenq 178 %7 m0,m1,m2,m3,m4,m5 179.next: 180 mov%3 m0, [ srcq +(1<<%5)*lenq] 181 mov%3 m1, [ mmsize + srcq +(1<<%5)*lenq] 182%if %4 < %5 183 mov%3 m2, [2*mmsize + srcq +(1<<%5)*lenq] 184 mov%3 m3, [3*mmsize + srcq +(1<<%5)*lenq] 185%endif 186 %6 m0,m1,m2,m3,m4,m5 187 mov%3 [ dstq+(1<<%4)*lenq], m0 188 mov%3 [ mmsize + dstq+(1<<%4)*lenq], m1 189%if %4 > %5 190 mov%3 [2*mmsize + dstq+(1<<%4)*lenq], m2 191 mov%3 [3*mmsize + dstq+(1<<%4)*lenq], m3 192 add lenq, 4*mmsize/(1<<%4) 193%else 194 add lenq, 2*mmsize/(1<<%4) 195%endif 196 jl .next 197%if mmsize == 8 198 emms 199 RET 200%else 201 REP_RET 202%endif 203%endmacro 204 205%macro PACK_6CH 8 206cglobal pack_6ch_%2_to_%1_%3, 2, 8, %6, dst, src, src1, src2, src3, src4, src5, len 207%if ARCH_X86_64 208 mov lend, r2d 209%else 210 %define lend dword r2m 211%endif 212 mov src1q, [srcq+1*gprsize] 213 mov src2q, [srcq+2*gprsize] 214 mov src3q, [srcq+3*gprsize] 215 mov src4q, [srcq+4*gprsize] 216 mov src5q, [srcq+5*gprsize] 217 mov srcq, [srcq] 218 mov dstq, [dstq] 219%ifidn %3, a 220 test dstq, mmsize-1 221 jne pack_6ch_%2_to_%1_u_int %+ SUFFIX 222 test srcq, mmsize-1 223 jne pack_6ch_%2_to_%1_u_int %+ SUFFIX 224 test src1q, mmsize-1 225 jne pack_6ch_%2_to_%1_u_int %+ SUFFIX 226 test src2q, mmsize-1 227 jne pack_6ch_%2_to_%1_u_int %+ SUFFIX 228 test src3q, mmsize-1 229 jne pack_6ch_%2_to_%1_u_int %+ SUFFIX 230 test src4q, mmsize-1 231 jne pack_6ch_%2_to_%1_u_int %+ SUFFIX 232 test src5q, mmsize-1 233 jne pack_6ch_%2_to_%1_u_int %+ SUFFIX 234%else 235pack_6ch_%2_to_%1_u_int %+ SUFFIX: 236%endif 237 sub src1q, srcq 238 sub src2q, srcq 239 sub src3q, srcq 240 sub src4q, srcq 241 sub src5q, srcq 242 %8 x,x,x,x,m7,x 243.loop: 244 mov%3 m0, [srcq ] 245 mov%3 m1, [srcq+src1q] 246 mov%3 m2, [srcq+src2q] 247 mov%3 m3, [srcq+src3q] 248 mov%3 m4, [srcq+src4q] 249 mov%3 m5, [srcq+src5q] 250%if cpuflag(sse) 251 SBUTTERFLYPS 0, 1, 6 252 SBUTTERFLYPS 2, 3, 6 253 SBUTTERFLYPS 4, 5, 6 254 255%if cpuflag(avx) 256 blendps m6, m4, m0, 1100b 257%else 258 movaps m6, m4 259 shufps m4, m0, q3210 260 SWAP 4,6 261%endif 262 movlhps m0, m2 263 movhlps m4, m2 264%if cpuflag(avx) 265 blendps m2, m5, m1, 1100b 266%else 267 movaps m2, m5 268 shufps m5, m1, q3210 269 SWAP 2,5 270%endif 271 movlhps m1, m3 272 movhlps m5, m3 273 274 %7 m0,m6,x,x,m7,m3 275 %7 m4,m1,x,x,m7,m3 276 %7 m2,m5,x,x,m7,m3 277 278 mov %+ %3 %+ ps [dstq ], m0 279 mov %+ %3 %+ ps [dstq+16], m6 280 mov %+ %3 %+ ps [dstq+32], m4 281 mov %+ %3 %+ ps [dstq+48], m1 282 mov %+ %3 %+ ps [dstq+64], m2 283 mov %+ %3 %+ ps [dstq+80], m5 284%else ; mmx 285 SBUTTERFLY dq, 0, 1, 6 286 SBUTTERFLY dq, 2, 3, 6 287 SBUTTERFLY dq, 4, 5, 6 288 289 movq [dstq ], m0 290 movq [dstq+ 8], m2 291 movq [dstq+16], m4 292 movq [dstq+24], m1 293 movq [dstq+32], m3 294 movq [dstq+40], m5 295%endif 296 add srcq, mmsize 297 add dstq, mmsize*6 298 sub lend, mmsize/4 299 jg .loop 300%if mmsize == 8 301 emms 302 RET 303%else 304 REP_RET 305%endif 306%endmacro 307 308%macro UNPACK_6CH 8 309cglobal unpack_6ch_%2_to_%1_%3, 2, 8, %6, dst, src, dst1, dst2, dst3, dst4, dst5, len 310%if ARCH_X86_64 311 mov lend, r2d 312%else 313 %define lend dword r2m 314%endif 315 mov dst1q, [dstq+1*gprsize] 316 mov dst2q, [dstq+2*gprsize] 317 mov dst3q, [dstq+3*gprsize] 318 mov dst4q, [dstq+4*gprsize] 319 mov dst5q, [dstq+5*gprsize] 320 mov dstq, [dstq] 321 mov srcq, [srcq] 322%ifidn %3, a 323 test dstq, mmsize-1 324 jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX 325 test srcq, mmsize-1 326 jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX 327 test dst1q, mmsize-1 328 jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX 329 test dst2q, mmsize-1 330 jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX 331 test dst3q, mmsize-1 332 jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX 333 test dst4q, mmsize-1 334 jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX 335 test dst5q, mmsize-1 336 jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX 337%else 338unpack_6ch_%2_to_%1_u_int %+ SUFFIX: 339%endif 340 sub dst1q, dstq 341 sub dst2q, dstq 342 sub dst3q, dstq 343 sub dst4q, dstq 344 sub dst5q, dstq 345 %8 x,x,x,x,m7,x 346.loop: 347 mov%3 m0, [srcq ] 348 mov%3 m1, [srcq+16] 349 mov%3 m2, [srcq+32] 350 mov%3 m3, [srcq+48] 351 mov%3 m4, [srcq+64] 352 mov%3 m5, [srcq+80] 353 354 SBUTTERFLYPS 0, 3, 6 355 SBUTTERFLYPS 1, 4, 6 356 SBUTTERFLYPS 2, 5, 6 357 SBUTTERFLYPS 0, 4, 6 358 SBUTTERFLYPS 3, 2, 6 359 SBUTTERFLYPS 1, 5, 6 360 SWAP 1, 4 361 SWAP 2, 3 362 363 %7 m0,m1,x,x,m7,m6 364 %7 m2,m3,x,x,m7,m6 365 %7 m4,m5,x,x,m7,m6 366 367 mov %+ %3 %+ ps [dstq ], m0 368 mov %+ %3 %+ ps [dstq+dst1q], m1 369 mov %+ %3 %+ ps [dstq+dst2q], m2 370 mov %+ %3 %+ ps [dstq+dst3q], m3 371 mov %+ %3 %+ ps [dstq+dst4q], m4 372 mov %+ %3 %+ ps [dstq+dst5q], m5 373 374 add srcq, mmsize*6 375 add dstq, mmsize 376 sub lend, mmsize/4 377 jg .loop 378 REP_RET 379%endmacro 380 381%define PACK_8CH_GPRS (10 * ARCH_X86_64) + ((6 + HAVE_ALIGNED_STACK) * ARCH_X86_32) 382 383%macro PACK_8CH 8 384cglobal pack_8ch_%2_to_%1_%3, 2, PACK_8CH_GPRS, %6, ARCH_X86_32*48, dst, src, len, src1, src2, src3, src4, src5, src6, src7 385 mov dstq, [dstq] 386%if ARCH_X86_32 387 DEFINE_ARGS dst, src, src2, src3, src4, src5, src6 388 %define lend dword r2m 389 %define src1q r0q 390 %define src1m dword [rsp+32] 391%if HAVE_ALIGNED_STACK == 0 392 DEFINE_ARGS dst, src, src2, src3, src5, src6 393 %define src4q r0q 394 %define src4m dword [rsp+36] 395%endif 396 %define src7q r0q 397 %define src7m dword [rsp+40] 398 mov dstm, dstq 399%endif 400 mov src7q, [srcq+7*gprsize] 401 mov src6q, [srcq+6*gprsize] 402%if ARCH_X86_32 403 mov src7m, src7q 404%endif 405 mov src5q, [srcq+5*gprsize] 406 mov src4q, [srcq+4*gprsize] 407 mov src3q, [srcq+3*gprsize] 408%if ARCH_X86_32 && HAVE_ALIGNED_STACK == 0 409 mov src4m, src4q 410%endif 411 mov src2q, [srcq+2*gprsize] 412 mov src1q, [srcq+1*gprsize] 413 mov srcq, [srcq] 414%ifidn %3, a 415%if ARCH_X86_32 416 test dstmp, mmsize-1 417%else 418 test dstq, mmsize-1 419%endif 420 jne pack_8ch_%2_to_%1_u_int %+ SUFFIX 421 test srcq, mmsize-1 422 jne pack_8ch_%2_to_%1_u_int %+ SUFFIX 423 test src1q, mmsize-1 424 jne pack_8ch_%2_to_%1_u_int %+ SUFFIX 425 test src2q, mmsize-1 426 jne pack_8ch_%2_to_%1_u_int %+ SUFFIX 427 test src3q, mmsize-1 428 jne pack_8ch_%2_to_%1_u_int %+ SUFFIX 429%if ARCH_X86_32 && HAVE_ALIGNED_STACK == 0 430 test src4m, mmsize-1 431%else 432 test src4q, mmsize-1 433%endif 434 jne pack_8ch_%2_to_%1_u_int %+ SUFFIX 435 test src5q, mmsize-1 436 jne pack_8ch_%2_to_%1_u_int %+ SUFFIX 437 test src6q, mmsize-1 438 jne pack_8ch_%2_to_%1_u_int %+ SUFFIX 439%if ARCH_X86_32 440 test src7m, mmsize-1 441%else 442 test src7q, mmsize-1 443%endif 444 jne pack_8ch_%2_to_%1_u_int %+ SUFFIX 445%else 446pack_8ch_%2_to_%1_u_int %+ SUFFIX: 447%endif 448 sub src1q, srcq 449 sub src2q, srcq 450 sub src3q, srcq 451%if ARCH_X86_64 || HAVE_ALIGNED_STACK 452 sub src4q, srcq 453%else 454 sub src4m, srcq 455%endif 456 sub src5q, srcq 457 sub src6q, srcq 458%if ARCH_X86_64 459 sub src7q, srcq 460%else 461 mov src1m, src1q 462 sub src7m, srcq 463%endif 464 465%if ARCH_X86_64 466 %8 x,x,x,x,m9,x 467%elifidn %1, int32 468 %define m9 [flt2p31] 469%else 470 %define m9 [flt2pm31] 471%endif 472 473.loop: 474 mov%3 m0, [srcq ] 475 mov%3 m1, [srcq+src1q] 476 mov%3 m2, [srcq+src2q] 477%if ARCH_X86_32 && HAVE_ALIGNED_STACK == 0 478 mov src4q, src4m 479%endif 480 mov%3 m3, [srcq+src3q] 481 mov%3 m4, [srcq+src4q] 482 mov%3 m5, [srcq+src5q] 483%if ARCH_X86_32 484 mov src7q, src7m 485%endif 486 mov%3 m6, [srcq+src6q] 487 mov%3 m7, [srcq+src7q] 488 489%if ARCH_X86_64 490 TRANSPOSE8x4D 0, 1, 2, 3, 4, 5, 6, 7, 8 491 492 %7 m0,m1,x,x,m9,m8 493 %7 m2,m3,x,x,m9,m8 494 %7 m4,m5,x,x,m9,m8 495 %7 m6,m7,x,x,m9,m8 496 497 mov%3 [dstq], m0 498%else 499 mov dstq, dstm 500 501 TRANSPOSE8x4D 0, 1, 2, 3, 4, 5, 6, 7, [rsp], [rsp+16], 1 502 503 %7 m0,m1,x,x,m9,m2 504 mova m2, [rsp] 505 mov%3 [dstq], m0 506 %7 m2,m3,x,x,m9,m0 507 %7 m4,m5,x,x,m9,m0 508 %7 m6,m7,x,x,m9,m0 509 510%endif 511 512 mov%3 [dstq+16], m1 513 mov%3 [dstq+32], m2 514 mov%3 [dstq+48], m3 515 mov%3 [dstq+64], m4 516 mov%3 [dstq+80], m5 517 mov%3 [dstq+96], m6 518 mov%3 [dstq+112], m7 519 520 add srcq, mmsize 521 add dstq, mmsize*8 522%if ARCH_X86_32 523 mov dstm, dstq 524 mov src1q, src1m 525%endif 526 sub lend, mmsize/4 527 jg .loop 528 REP_RET 529%endmacro 530 531%macro INT16_TO_INT32_N 6 532 pxor m2, m2 533 pxor m3, m3 534 punpcklwd m2, m1 535 punpckhwd m3, m1 536 SWAP 4,0 537 pxor m0, m0 538 pxor m1, m1 539 punpcklwd m0, m4 540 punpckhwd m1, m4 541%endmacro 542 543%macro INT32_TO_INT16_N 6 544 psrad m0, 16 545 psrad m1, 16 546 psrad m2, 16 547 psrad m3, 16 548 packssdw m0, m1 549 packssdw m2, m3 550 SWAP 1,2 551%endmacro 552 553%macro INT32_TO_FLOAT_INIT 6 554 mova %5, [flt2pm31] 555%endmacro 556%macro INT32_TO_FLOAT_N 6 557 cvtdq2ps %1, %1 558 cvtdq2ps %2, %2 559 mulps %1, %1, %5 560 mulps %2, %2, %5 561%endmacro 562 563%macro FLOAT_TO_INT32_INIT 6 564 mova %5, [flt2p31] 565%endmacro 566%macro FLOAT_TO_INT32_N 6 567 mulps %1, %5 568 mulps %2, %5 569 cvtps2dq %6, %1 570 cmpps %1, %1, %5, 5 571 paddd %1, %6 572 cvtps2dq %6, %2 573 cmpps %2, %2, %5, 5 574 paddd %2, %6 575%endmacro 576 577%macro INT16_TO_FLOAT_INIT 6 578 mova m5, [flt2pm31] 579%endmacro 580%macro INT16_TO_FLOAT_N 6 581 INT16_TO_INT32_N %1,%2,%3,%4,%5,%6 582 cvtdq2ps m0, m0 583 cvtdq2ps m1, m1 584 cvtdq2ps m2, m2 585 cvtdq2ps m3, m3 586 mulps m0, m0, m5 587 mulps m1, m1, m5 588 mulps m2, m2, m5 589 mulps m3, m3, m5 590%endmacro 591 592%macro FLOAT_TO_INT16_INIT 6 593 mova m5, [flt2p15] 594%endmacro 595%macro FLOAT_TO_INT16_N 6 596 mulps m0, m5 597 mulps m1, m5 598 mulps m2, m5 599 mulps m3, m5 600 cvtps2dq m0, m0 601 cvtps2dq m1, m1 602 packssdw m0, m1 603 cvtps2dq m1, m2 604 cvtps2dq m3, m3 605 packssdw m1, m3 606%endmacro 607 608%macro NOP_N 0-6 609%endmacro 610 611INIT_MMX mmx 612CONV int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N 613CONV int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N 614CONV int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N 615CONV int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N 616 617PACK_6CH float, float, u, 2, 2, 0, NOP_N, NOP_N 618PACK_6CH float, float, a, 2, 2, 0, NOP_N, NOP_N 619 620INIT_XMM sse 621PACK_6CH float, float, u, 2, 2, 7, NOP_N, NOP_N 622PACK_6CH float, float, a, 2, 2, 7, NOP_N, NOP_N 623 624UNPACK_6CH float, float, u, 2, 2, 7, NOP_N, NOP_N 625UNPACK_6CH float, float, a, 2, 2, 7, NOP_N, NOP_N 626 627INIT_XMM sse2 628CONV int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N 629CONV int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N 630CONV int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N 631CONV int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N 632 633PACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N 634PACK_2CH int16, int16, a, 1, 1, NOP_N, NOP_N 635PACK_2CH int32, int32, u, 2, 2, NOP_N, NOP_N 636PACK_2CH int32, int32, a, 2, 2, NOP_N, NOP_N 637PACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N 638PACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N 639PACK_2CH int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N 640PACK_2CH int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N 641 642UNPACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N 643UNPACK_2CH int16, int16, a, 1, 1, NOP_N, NOP_N 644UNPACK_2CH int32, int32, u, 2, 2, NOP_N, NOP_N 645UNPACK_2CH int32, int32, a, 2, 2, NOP_N, NOP_N 646UNPACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N 647UNPACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N 648UNPACK_2CH int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N 649UNPACK_2CH int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N 650 651CONV float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 652CONV float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 653CONV int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 654CONV int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 655CONV float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT 656CONV float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT 657CONV int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT 658CONV int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT 659 660PACK_2CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 661PACK_2CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 662PACK_2CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 663PACK_2CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 664PACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT 665PACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT 666PACK_2CH int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT 667PACK_2CH int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT 668 669UNPACK_2CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 670UNPACK_2CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 671UNPACK_2CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 672UNPACK_2CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 673UNPACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT 674UNPACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT 675UNPACK_2CH int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT 676UNPACK_2CH int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT 677 678PACK_6CH float, int32, u, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 679PACK_6CH float, int32, a, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 680PACK_6CH int32, float, u, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 681PACK_6CH int32, float, a, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 682 683UNPACK_6CH float, int32, u, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 684UNPACK_6CH float, int32, a, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 685UNPACK_6CH int32, float, u, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 686UNPACK_6CH int32, float, a, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 687 688PACK_8CH float, float, u, 2, 2, 9, NOP_N, NOP_N 689PACK_8CH float, float, a, 2, 2, 9, NOP_N, NOP_N 690 691PACK_8CH float, int32, u, 2, 2, 10, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 692PACK_8CH float, int32, a, 2, 2, 10, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 693PACK_8CH int32, float, u, 2, 2, 10, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 694PACK_8CH int32, float, a, 2, 2, 10, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 695 696INIT_XMM ssse3 697UNPACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N 698UNPACK_2CH int16, int16, a, 1, 1, NOP_N, NOP_N 699UNPACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N 700UNPACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N 701UNPACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT 702UNPACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT 703 704%if HAVE_AVX_EXTERNAL 705INIT_XMM avx 706PACK_6CH float, float, u, 2, 2, 8, NOP_N, NOP_N 707PACK_6CH float, float, a, 2, 2, 8, NOP_N, NOP_N 708 709UNPACK_6CH float, float, u, 2, 2, 8, NOP_N, NOP_N 710UNPACK_6CH float, float, a, 2, 2, 8, NOP_N, NOP_N 711 712PACK_6CH float, int32, u, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 713PACK_6CH float, int32, a, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 714PACK_6CH int32, float, u, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 715PACK_6CH int32, float, a, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 716 717UNPACK_6CH float, int32, u, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 718UNPACK_6CH float, int32, a, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 719UNPACK_6CH int32, float, u, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 720UNPACK_6CH int32, float, a, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 721 722PACK_8CH float, float, u, 2, 2, 9, NOP_N, NOP_N 723PACK_8CH float, float, a, 2, 2, 9, NOP_N, NOP_N 724 725PACK_8CH float, int32, u, 2, 2, 10, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 726PACK_8CH float, int32, a, 2, 2, 10, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 727PACK_8CH int32, float, u, 2, 2, 10, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 728PACK_8CH int32, float, a, 2, 2, 10, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 729 730INIT_YMM avx 731CONV float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 732CONV float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 733%endif 734 735%if HAVE_AVX2_EXTERNAL 736INIT_YMM avx2 737CONV int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 738CONV int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 739%endif 740