1;******************************************************************************
2;* Copyright (c) 2012 Michael Niedermayer
3;*
4;* This file is part of FFmpeg.
5;*
6;* FFmpeg is free software; you can redistribute it and/or
7;* modify it under the terms of the GNU Lesser General Public
8;* License as published by the Free Software Foundation; either
9;* version 2.1 of the License, or (at your option) any later version.
10;*
11;* FFmpeg is distributed in the hope that it will be useful,
12;* but WITHOUT ANY WARRANTY; without even the implied warranty of
13;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14;* Lesser General Public License for more details.
15;*
16;* You should have received a copy of the GNU Lesser General Public
17;* License along with FFmpeg; if not, write to the Free Software
18;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19;******************************************************************************
20
21%include "libavutil/x86/x86util.asm"
22
23SECTION_RODATA 32
24flt2pm31: times 8 dd 4.6566129e-10
25flt2p31 : times 8 dd 2147483648.0
26flt2p15 : times 8 dd 32768.0
27
28word_unpack_shuf : db  0, 1, 4, 5, 8, 9,12,13, 2, 3, 6, 7,10,11,14,15
29
30SECTION .text
31
32
33;to, from, a/u, log2_outsize, log_intsize, const
34%macro PACK_2CH 5-7
35cglobal pack_2ch_%2_to_%1_%3, 3, 4, 6, dst, src, len, src2
36    mov src2q   , [srcq+gprsize]
37    mov srcq    , [srcq]
38    mov dstq    , [dstq]
39%ifidn %3, a
40    test dstq, mmsize-1
41        jne pack_2ch_%2_to_%1_u_int %+ SUFFIX
42    test srcq, mmsize-1
43        jne pack_2ch_%2_to_%1_u_int %+ SUFFIX
44    test src2q, mmsize-1
45        jne pack_2ch_%2_to_%1_u_int %+ SUFFIX
46%else
47pack_2ch_%2_to_%1_u_int %+ SUFFIX:
48%endif
49    lea     srcq , [srcq  + (1<<%5)*lenq]
50    lea     src2q, [src2q + (1<<%5)*lenq]
51    lea     dstq , [dstq  + (2<<%4)*lenq]
52    neg     lenq
53    %7 m0,m1,m2,m3,m4,m5
54.next:
55%if %4 >= %5
56    mov%3     m0, [         srcq +(1<<%5)*lenq]
57    mova      m1, m0
58    mov%3     m2, [         src2q+(1<<%5)*lenq]
59%if %5 == 1
60    punpcklwd m0, m2
61    punpckhwd m1, m2
62%else
63    punpckldq m0, m2
64    punpckhdq m1, m2
65%endif
66    %6 m0,m1,m2,m3,m4,m5
67%else
68    mov%3     m0, [         srcq +(1<<%5)*lenq]
69    mov%3     m1, [mmsize + srcq +(1<<%5)*lenq]
70    mov%3     m2, [         src2q+(1<<%5)*lenq]
71    mov%3     m3, [mmsize + src2q+(1<<%5)*lenq]
72    %6 m0,m1,m2,m3,m4,m5
73    mova      m2, m0
74    punpcklwd m0, m1
75    punpckhwd m2, m1
76    SWAP 1,2
77%endif
78    mov%3 [           dstq+(2<<%4)*lenq], m0
79    mov%3 [  mmsize + dstq+(2<<%4)*lenq], m1
80%if %4 > %5
81    mov%3 [2*mmsize + dstq+(2<<%4)*lenq], m2
82    mov%3 [3*mmsize + dstq+(2<<%4)*lenq], m3
83    add lenq, 4*mmsize/(2<<%4)
84%else
85    add lenq, 2*mmsize/(2<<%4)
86%endif
87        jl .next
88    REP_RET
89%endmacro
90
91%macro UNPACK_2CH 5-7
92cglobal unpack_2ch_%2_to_%1_%3, 3, 4, 7, dst, src, len, dst2
93    mov dst2q   , [dstq+gprsize]
94    mov srcq    , [srcq]
95    mov dstq    , [dstq]
96%ifidn %3, a
97    test dstq, mmsize-1
98        jne unpack_2ch_%2_to_%1_u_int %+ SUFFIX
99    test srcq, mmsize-1
100        jne unpack_2ch_%2_to_%1_u_int %+ SUFFIX
101    test dst2q, mmsize-1
102        jne unpack_2ch_%2_to_%1_u_int %+ SUFFIX
103%else
104unpack_2ch_%2_to_%1_u_int %+ SUFFIX:
105%endif
106    lea     srcq , [srcq  + (2<<%5)*lenq]
107    lea     dstq , [dstq  + (1<<%4)*lenq]
108    lea     dst2q, [dst2q + (1<<%4)*lenq]
109    neg     lenq
110    %7 m0,m1,m2,m3,m4,m5
111    mova      m6, [word_unpack_shuf]
112.next:
113    mov%3     m0, [           srcq +(2<<%5)*lenq]
114    mov%3     m2, [  mmsize + srcq +(2<<%5)*lenq]
115%if %5 == 1
116%ifidn SUFFIX, _ssse3
117    pshufb    m0, m6
118    mova      m1, m0
119    pshufb    m2, m6
120    punpcklqdq m0,m2
121    punpckhqdq m1,m2
122%else
123    mova      m1, m0
124    punpcklwd m0,m2
125    punpckhwd m1,m2
126
127    mova      m2, m0
128    punpcklwd m0,m1
129    punpckhwd m2,m1
130
131    mova      m1, m0
132    punpcklwd m0,m2
133    punpckhwd m1,m2
134%endif
135%else
136    mova      m1, m0
137    shufps    m0, m2, 10001000b
138    shufps    m1, m2, 11011101b
139%endif
140%if %4 < %5
141    mov%3     m2, [2*mmsize + srcq +(2<<%5)*lenq]
142    mova      m3, m2
143    mov%3     m4, [3*mmsize + srcq +(2<<%5)*lenq]
144    shufps    m2, m4, 10001000b
145    shufps    m3, m4, 11011101b
146    SWAP 1,2
147%endif
148    %6 m0,m1,m2,m3,m4,m5
149    mov%3 [           dstq+(1<<%4)*lenq], m0
150%if %4 > %5
151    mov%3 [          dst2q+(1<<%4)*lenq], m2
152    mov%3 [ mmsize +  dstq+(1<<%4)*lenq], m1
153    mov%3 [ mmsize + dst2q+(1<<%4)*lenq], m3
154    add lenq, 2*mmsize/(1<<%4)
155%else
156    mov%3 [          dst2q+(1<<%4)*lenq], m1
157    add lenq, mmsize/(1<<%4)
158%endif
159        jl .next
160    REP_RET
161%endmacro
162
163%macro CONV 5-7
164cglobal %2_to_%1_%3, 3, 3, 6, dst, src, len
165    mov srcq    , [srcq]
166    mov dstq    , [dstq]
167%ifidn %3, a
168    test dstq, mmsize-1
169        jne %2_to_%1_u_int %+ SUFFIX
170    test srcq, mmsize-1
171        jne %2_to_%1_u_int %+ SUFFIX
172%else
173%2_to_%1_u_int %+ SUFFIX:
174%endif
175    lea     srcq , [srcq  + (1<<%5)*lenq]
176    lea     dstq , [dstq  + (1<<%4)*lenq]
177    neg     lenq
178    %7 m0,m1,m2,m3,m4,m5
179.next:
180    mov%3     m0, [           srcq +(1<<%5)*lenq]
181    mov%3     m1, [  mmsize + srcq +(1<<%5)*lenq]
182%if %4 < %5
183    mov%3     m2, [2*mmsize + srcq +(1<<%5)*lenq]
184    mov%3     m3, [3*mmsize + srcq +(1<<%5)*lenq]
185%endif
186    %6 m0,m1,m2,m3,m4,m5
187    mov%3 [           dstq+(1<<%4)*lenq], m0
188    mov%3 [  mmsize + dstq+(1<<%4)*lenq], m1
189%if %4 > %5
190    mov%3 [2*mmsize + dstq+(1<<%4)*lenq], m2
191    mov%3 [3*mmsize + dstq+(1<<%4)*lenq], m3
192    add lenq, 4*mmsize/(1<<%4)
193%else
194    add lenq, 2*mmsize/(1<<%4)
195%endif
196        jl .next
197%if mmsize == 8
198    emms
199    RET
200%else
201    REP_RET
202%endif
203%endmacro
204
205%macro PACK_6CH 8
206cglobal pack_6ch_%2_to_%1_%3, 2, 8, %6, dst, src, src1, src2, src3, src4, src5, len
207%if ARCH_X86_64
208    mov     lend, r2d
209%else
210    %define lend dword r2m
211%endif
212    mov    src1q, [srcq+1*gprsize]
213    mov    src2q, [srcq+2*gprsize]
214    mov    src3q, [srcq+3*gprsize]
215    mov    src4q, [srcq+4*gprsize]
216    mov    src5q, [srcq+5*gprsize]
217    mov     srcq, [srcq]
218    mov     dstq, [dstq]
219%ifidn %3, a
220    test dstq, mmsize-1
221        jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
222    test srcq, mmsize-1
223        jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
224    test src1q, mmsize-1
225        jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
226    test src2q, mmsize-1
227        jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
228    test src3q, mmsize-1
229        jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
230    test src4q, mmsize-1
231        jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
232    test src5q, mmsize-1
233        jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
234%else
235pack_6ch_%2_to_%1_u_int %+ SUFFIX:
236%endif
237    sub    src1q, srcq
238    sub    src2q, srcq
239    sub    src3q, srcq
240    sub    src4q, srcq
241    sub    src5q, srcq
242    %8 x,x,x,x,m7,x
243.loop:
244    mov%3     m0, [srcq      ]
245    mov%3     m1, [srcq+src1q]
246    mov%3     m2, [srcq+src2q]
247    mov%3     m3, [srcq+src3q]
248    mov%3     m4, [srcq+src4q]
249    mov%3     m5, [srcq+src5q]
250%if cpuflag(sse)
251    SBUTTERFLYPS 0, 1, 6
252    SBUTTERFLYPS 2, 3, 6
253    SBUTTERFLYPS 4, 5, 6
254
255%if cpuflag(avx)
256    blendps   m6, m4, m0, 1100b
257%else
258    movaps    m6, m4
259    shufps    m4, m0, q3210
260    SWAP 4,6
261%endif
262    movlhps   m0, m2
263    movhlps   m4, m2
264%if cpuflag(avx)
265    blendps   m2, m5, m1, 1100b
266%else
267    movaps    m2, m5
268    shufps    m5, m1, q3210
269    SWAP 2,5
270%endif
271    movlhps   m1, m3
272    movhlps   m5, m3
273
274    %7 m0,m6,x,x,m7,m3
275    %7 m4,m1,x,x,m7,m3
276    %7 m2,m5,x,x,m7,m3
277
278    mov %+ %3 %+ ps [dstq   ], m0
279    mov %+ %3 %+ ps [dstq+16], m6
280    mov %+ %3 %+ ps [dstq+32], m4
281    mov %+ %3 %+ ps [dstq+48], m1
282    mov %+ %3 %+ ps [dstq+64], m2
283    mov %+ %3 %+ ps [dstq+80], m5
284%else ; mmx
285    SBUTTERFLY dq, 0, 1, 6
286    SBUTTERFLY dq, 2, 3, 6
287    SBUTTERFLY dq, 4, 5, 6
288
289    movq   [dstq   ], m0
290    movq   [dstq+ 8], m2
291    movq   [dstq+16], m4
292    movq   [dstq+24], m1
293    movq   [dstq+32], m3
294    movq   [dstq+40], m5
295%endif
296    add      srcq, mmsize
297    add      dstq, mmsize*6
298    sub      lend, mmsize/4
299    jg .loop
300%if mmsize == 8
301    emms
302    RET
303%else
304    REP_RET
305%endif
306%endmacro
307
308%macro UNPACK_6CH 8
309cglobal unpack_6ch_%2_to_%1_%3, 2, 8, %6, dst, src, dst1, dst2, dst3, dst4, dst5, len
310%if ARCH_X86_64
311    mov     lend, r2d
312%else
313    %define lend dword r2m
314%endif
315    mov    dst1q, [dstq+1*gprsize]
316    mov    dst2q, [dstq+2*gprsize]
317    mov    dst3q, [dstq+3*gprsize]
318    mov    dst4q, [dstq+4*gprsize]
319    mov    dst5q, [dstq+5*gprsize]
320    mov     dstq, [dstq]
321    mov     srcq, [srcq]
322%ifidn %3, a
323    test dstq, mmsize-1
324        jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX
325    test srcq, mmsize-1
326        jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX
327    test dst1q, mmsize-1
328        jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX
329    test dst2q, mmsize-1
330        jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX
331    test dst3q, mmsize-1
332        jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX
333    test dst4q, mmsize-1
334        jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX
335    test dst5q, mmsize-1
336        jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX
337%else
338unpack_6ch_%2_to_%1_u_int %+ SUFFIX:
339%endif
340    sub    dst1q, dstq
341    sub    dst2q, dstq
342    sub    dst3q, dstq
343    sub    dst4q, dstq
344    sub    dst5q, dstq
345    %8 x,x,x,x,m7,x
346.loop:
347    mov%3     m0, [srcq   ]
348    mov%3     m1, [srcq+16]
349    mov%3     m2, [srcq+32]
350    mov%3     m3, [srcq+48]
351    mov%3     m4, [srcq+64]
352    mov%3     m5, [srcq+80]
353
354    SBUTTERFLYPS 0, 3, 6
355    SBUTTERFLYPS 1, 4, 6
356    SBUTTERFLYPS 2, 5, 6
357    SBUTTERFLYPS 0, 4, 6
358    SBUTTERFLYPS 3, 2, 6
359    SBUTTERFLYPS 1, 5, 6
360    SWAP 1, 4
361    SWAP 2, 3
362
363    %7 m0,m1,x,x,m7,m6
364    %7 m2,m3,x,x,m7,m6
365    %7 m4,m5,x,x,m7,m6
366
367    mov %+ %3 %+ ps [dstq      ], m0
368    mov %+ %3 %+ ps [dstq+dst1q], m1
369    mov %+ %3 %+ ps [dstq+dst2q], m2
370    mov %+ %3 %+ ps [dstq+dst3q], m3
371    mov %+ %3 %+ ps [dstq+dst4q], m4
372    mov %+ %3 %+ ps [dstq+dst5q], m5
373
374    add      srcq, mmsize*6
375    add      dstq, mmsize
376    sub      lend, mmsize/4
377    jg .loop
378    REP_RET
379%endmacro
380
381%define PACK_8CH_GPRS (10 * ARCH_X86_64) + ((6 + HAVE_ALIGNED_STACK) * ARCH_X86_32)
382
383%macro PACK_8CH 8
384cglobal pack_8ch_%2_to_%1_%3, 2, PACK_8CH_GPRS, %6, ARCH_X86_32*48, dst, src, len, src1, src2, src3, src4, src5, src6, src7
385    mov     dstq, [dstq]
386%if ARCH_X86_32
387    DEFINE_ARGS dst, src, src2, src3, src4, src5, src6
388    %define lend dword r2m
389    %define src1q r0q
390    %define src1m dword [rsp+32]
391%if HAVE_ALIGNED_STACK == 0
392    DEFINE_ARGS dst, src, src2, src3, src5, src6
393    %define src4q r0q
394    %define src4m dword [rsp+36]
395%endif
396    %define src7q r0q
397    %define src7m dword [rsp+40]
398    mov     dstm, dstq
399%endif
400    mov    src7q, [srcq+7*gprsize]
401    mov    src6q, [srcq+6*gprsize]
402%if ARCH_X86_32
403    mov    src7m, src7q
404%endif
405    mov    src5q, [srcq+5*gprsize]
406    mov    src4q, [srcq+4*gprsize]
407    mov    src3q, [srcq+3*gprsize]
408%if ARCH_X86_32 && HAVE_ALIGNED_STACK == 0
409    mov    src4m, src4q
410%endif
411    mov    src2q, [srcq+2*gprsize]
412    mov    src1q, [srcq+1*gprsize]
413    mov     srcq, [srcq]
414%ifidn %3, a
415%if ARCH_X86_32
416    test dstmp, mmsize-1
417%else
418    test dstq, mmsize-1
419%endif
420        jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
421    test srcq, mmsize-1
422        jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
423    test src1q, mmsize-1
424        jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
425    test src2q, mmsize-1
426        jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
427    test src3q, mmsize-1
428        jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
429%if ARCH_X86_32 && HAVE_ALIGNED_STACK == 0
430    test src4m, mmsize-1
431%else
432    test src4q, mmsize-1
433%endif
434        jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
435    test src5q, mmsize-1
436        jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
437    test src6q, mmsize-1
438        jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
439%if ARCH_X86_32
440    test src7m, mmsize-1
441%else
442    test src7q, mmsize-1
443%endif
444        jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
445%else
446pack_8ch_%2_to_%1_u_int %+ SUFFIX:
447%endif
448    sub    src1q, srcq
449    sub    src2q, srcq
450    sub    src3q, srcq
451%if ARCH_X86_64 || HAVE_ALIGNED_STACK
452    sub    src4q, srcq
453%else
454    sub    src4m, srcq
455%endif
456    sub    src5q, srcq
457    sub    src6q, srcq
458%if ARCH_X86_64
459    sub    src7q, srcq
460%else
461    mov src1m, src1q
462    sub src7m, srcq
463%endif
464
465%if ARCH_X86_64
466    %8 x,x,x,x,m9,x
467%elifidn %1, int32
468    %define m9 [flt2p31]
469%else
470    %define m9 [flt2pm31]
471%endif
472
473.loop:
474    mov%3     m0, [srcq      ]
475    mov%3     m1, [srcq+src1q]
476    mov%3     m2, [srcq+src2q]
477%if ARCH_X86_32 && HAVE_ALIGNED_STACK == 0
478    mov    src4q, src4m
479%endif
480    mov%3     m3, [srcq+src3q]
481    mov%3     m4, [srcq+src4q]
482    mov%3     m5, [srcq+src5q]
483%if ARCH_X86_32
484    mov    src7q, src7m
485%endif
486    mov%3     m6, [srcq+src6q]
487    mov%3     m7, [srcq+src7q]
488
489%if ARCH_X86_64
490    TRANSPOSE8x4D 0, 1, 2, 3, 4, 5, 6, 7, 8
491
492    %7 m0,m1,x,x,m9,m8
493    %7 m2,m3,x,x,m9,m8
494    %7 m4,m5,x,x,m9,m8
495    %7 m6,m7,x,x,m9,m8
496
497    mov%3 [dstq], m0
498%else
499    mov     dstq, dstm
500
501    TRANSPOSE8x4D 0, 1, 2, 3, 4, 5, 6, 7, [rsp], [rsp+16], 1
502
503    %7 m0,m1,x,x,m9,m2
504    mova     m2, [rsp]
505    mov%3   [dstq], m0
506    %7 m2,m3,x,x,m9,m0
507    %7 m4,m5,x,x,m9,m0
508    %7 m6,m7,x,x,m9,m0
509
510%endif
511
512    mov%3 [dstq+16],  m1
513    mov%3 [dstq+32],  m2
514    mov%3 [dstq+48],  m3
515    mov%3 [dstq+64],  m4
516    mov%3 [dstq+80],  m5
517    mov%3 [dstq+96],  m6
518    mov%3 [dstq+112], m7
519
520    add      srcq, mmsize
521    add      dstq, mmsize*8
522%if ARCH_X86_32
523    mov      dstm, dstq
524    mov      src1q, src1m
525%endif
526    sub      lend, mmsize/4
527    jg .loop
528    REP_RET
529%endmacro
530
531%macro INT16_TO_INT32_N 6
532    pxor      m2, m2
533    pxor      m3, m3
534    punpcklwd m2, m1
535    punpckhwd m3, m1
536    SWAP 4,0
537    pxor      m0, m0
538    pxor      m1, m1
539    punpcklwd m0, m4
540    punpckhwd m1, m4
541%endmacro
542
543%macro INT32_TO_INT16_N 6
544    psrad     m0, 16
545    psrad     m1, 16
546    psrad     m2, 16
547    psrad     m3, 16
548    packssdw  m0, m1
549    packssdw  m2, m3
550    SWAP 1,2
551%endmacro
552
553%macro INT32_TO_FLOAT_INIT 6
554    mova      %5, [flt2pm31]
555%endmacro
556%macro INT32_TO_FLOAT_N 6
557    cvtdq2ps  %1, %1
558    cvtdq2ps  %2, %2
559    mulps %1, %1, %5
560    mulps %2, %2, %5
561%endmacro
562
563%macro FLOAT_TO_INT32_INIT 6
564    mova      %5, [flt2p31]
565%endmacro
566%macro FLOAT_TO_INT32_N 6
567    mulps %1, %5
568    mulps %2, %5
569    cvtps2dq  %6, %1
570    cmpps %1, %1, %5, 5
571    paddd %1, %6
572    cvtps2dq  %6, %2
573    cmpps %2, %2, %5, 5
574    paddd %2, %6
575%endmacro
576
577%macro INT16_TO_FLOAT_INIT 6
578    mova      m5, [flt2pm31]
579%endmacro
580%macro INT16_TO_FLOAT_N 6
581    INT16_TO_INT32_N %1,%2,%3,%4,%5,%6
582    cvtdq2ps  m0, m0
583    cvtdq2ps  m1, m1
584    cvtdq2ps  m2, m2
585    cvtdq2ps  m3, m3
586    mulps m0, m0, m5
587    mulps m1, m1, m5
588    mulps m2, m2, m5
589    mulps m3, m3, m5
590%endmacro
591
592%macro FLOAT_TO_INT16_INIT 6
593    mova      m5, [flt2p15]
594%endmacro
595%macro FLOAT_TO_INT16_N 6
596    mulps m0, m5
597    mulps m1, m5
598    mulps m2, m5
599    mulps m3, m5
600    cvtps2dq  m0, m0
601    cvtps2dq  m1, m1
602    packssdw  m0, m1
603    cvtps2dq  m1, m2
604    cvtps2dq  m3, m3
605    packssdw  m1, m3
606%endmacro
607
608%macro NOP_N 0-6
609%endmacro
610
611INIT_MMX mmx
612CONV int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
613CONV int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
614CONV int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N
615CONV int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N
616
617PACK_6CH float, float, u, 2, 2, 0, NOP_N, NOP_N
618PACK_6CH float, float, a, 2, 2, 0, NOP_N, NOP_N
619
620INIT_XMM sse
621PACK_6CH float, float, u, 2, 2, 7, NOP_N, NOP_N
622PACK_6CH float, float, a, 2, 2, 7, NOP_N, NOP_N
623
624UNPACK_6CH float, float, u, 2, 2, 7, NOP_N, NOP_N
625UNPACK_6CH float, float, a, 2, 2, 7, NOP_N, NOP_N
626
627INIT_XMM sse2
628CONV int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
629CONV int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
630CONV int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N
631CONV int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N
632
633PACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N
634PACK_2CH int16, int16, a, 1, 1, NOP_N, NOP_N
635PACK_2CH int32, int32, u, 2, 2, NOP_N, NOP_N
636PACK_2CH int32, int32, a, 2, 2, NOP_N, NOP_N
637PACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
638PACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
639PACK_2CH int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N
640PACK_2CH int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N
641
642UNPACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N
643UNPACK_2CH int16, int16, a, 1, 1, NOP_N, NOP_N
644UNPACK_2CH int32, int32, u, 2, 2, NOP_N, NOP_N
645UNPACK_2CH int32, int32, a, 2, 2, NOP_N, NOP_N
646UNPACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
647UNPACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
648UNPACK_2CH int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N
649UNPACK_2CH int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N
650
651CONV float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
652CONV float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
653CONV int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
654CONV int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
655CONV float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
656CONV float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
657CONV int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
658CONV int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
659
660PACK_2CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
661PACK_2CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
662PACK_2CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
663PACK_2CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
664PACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
665PACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
666PACK_2CH int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
667PACK_2CH int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
668
669UNPACK_2CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
670UNPACK_2CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
671UNPACK_2CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
672UNPACK_2CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
673UNPACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
674UNPACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
675UNPACK_2CH int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
676UNPACK_2CH int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
677
678PACK_6CH float, int32, u, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
679PACK_6CH float, int32, a, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
680PACK_6CH int32, float, u, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
681PACK_6CH int32, float, a, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
682
683UNPACK_6CH float, int32, u, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
684UNPACK_6CH float, int32, a, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
685UNPACK_6CH int32, float, u, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
686UNPACK_6CH int32, float, a, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
687
688PACK_8CH float, float, u, 2, 2, 9, NOP_N, NOP_N
689PACK_8CH float, float, a, 2, 2, 9, NOP_N, NOP_N
690
691PACK_8CH float, int32, u, 2, 2, 10, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
692PACK_8CH float, int32, a, 2, 2, 10, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
693PACK_8CH int32, float, u, 2, 2, 10, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
694PACK_8CH int32, float, a, 2, 2, 10, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
695
696INIT_XMM ssse3
697UNPACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N
698UNPACK_2CH int16, int16, a, 1, 1, NOP_N, NOP_N
699UNPACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
700UNPACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
701UNPACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
702UNPACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
703
704%if HAVE_AVX_EXTERNAL
705INIT_XMM avx
706PACK_6CH float, float, u, 2, 2, 8, NOP_N, NOP_N
707PACK_6CH float, float, a, 2, 2, 8, NOP_N, NOP_N
708
709UNPACK_6CH float, float, u, 2, 2, 8, NOP_N, NOP_N
710UNPACK_6CH float, float, a, 2, 2, 8, NOP_N, NOP_N
711
712PACK_6CH float, int32, u, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
713PACK_6CH float, int32, a, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
714PACK_6CH int32, float, u, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
715PACK_6CH int32, float, a, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
716
717UNPACK_6CH float, int32, u, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
718UNPACK_6CH float, int32, a, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
719UNPACK_6CH int32, float, u, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
720UNPACK_6CH int32, float, a, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
721
722PACK_8CH float, float, u, 2, 2, 9, NOP_N, NOP_N
723PACK_8CH float, float, a, 2, 2, 9, NOP_N, NOP_N
724
725PACK_8CH float, int32, u, 2, 2, 10, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
726PACK_8CH float, int32, a, 2, 2, 10, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
727PACK_8CH int32, float, u, 2, 2, 10, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
728PACK_8CH int32, float, a, 2, 2, 10, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
729
730INIT_YMM avx
731CONV float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
732CONV float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
733%endif
734
735%if HAVE_AVX2_EXTERNAL
736INIT_YMM avx2
737CONV int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
738CONV int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
739%endif
740