1;*****************************************************************************
2;* x86-optimized AC-3 downmixing
3;* Copyright (c) 2012 Justin Ruggles
4;*
5;* This file is part of FFmpeg.
6;*
7;* FFmpeg is free software; you can redistribute it and/or
8;* modify it under the terms of the GNU Lesser General Public
9;* License as published by the Free Software Foundation; either
10;* version 2.1 of the License, or (at your option) any later version.
11;*
12;* FFmpeg is distributed in the hope that it will be useful,
13;* but WITHOUT ANY WARRANTY; without even the implied warranty of
14;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15;* Lesser General Public License for more details.
16;*
17;* You should have received a copy of the GNU Lesser General Public
18;* License along with FFmpeg; if not, write to the Free Software
19;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20;******************************************************************************
21
22;******************************************************************************
23;* This is based on the channel mixing asm in libavresample, but it is
24;* simplified for only float coefficients and only 3 to 6 channels.
25;******************************************************************************
26
27%include "libavutil/x86/x86util.asm"
28
29SECTION .text
30
31;-----------------------------------------------------------------------------
32; functions to downmix from 3 to 6 channels to mono or stereo
33; void ff_ac3_downmix_*(float **samples, float **matrix, int len);
34;-----------------------------------------------------------------------------
35
36%macro AC3_DOWNMIX 2 ; %1 = in channels, %2 = out channels
37; define some names to make the code clearer
38%assign  in_channels %1
39%assign out_channels %2
40%assign stereo out_channels - 1
41
42; determine how many matrix elements must go on the stack vs. mmregs
43%assign matrix_elements in_channels * out_channels
44%if stereo
45    %assign needed_mmregs 4
46%else
47    %assign needed_mmregs 3
48%endif
49%assign matrix_elements_mm num_mmregs - needed_mmregs
50%if matrix_elements < matrix_elements_mm
51    %assign matrix_elements_mm matrix_elements
52%endif
53%assign total_mmregs needed_mmregs+matrix_elements_mm
54%if matrix_elements_mm < matrix_elements
55    %assign matrix_elements_stack matrix_elements - matrix_elements_mm
56%else
57    %assign matrix_elements_stack 0
58%endif
59
60cglobal ac3_downmix_%1_to_%2, 3,in_channels+1,total_mmregs,0-matrix_elements_stack*mmsize, src0, src1, len, src2, src3, src4, src5
61
62; load matrix pointers
63%define matrix0q r1q
64%define matrix1q r3q
65%if stereo
66    mov      matrix1q, [matrix0q+gprsize]
67%endif
68    mov      matrix0q, [matrix0q]
69
70; define matrix coeff names
71%assign %%i 0
72%assign %%j needed_mmregs
73%rep in_channels
74    %if %%i >= matrix_elements_mm
75        CAT_XDEFINE mx_stack_0_, %%i, 1
76        CAT_XDEFINE mx_0_, %%i, [rsp+(%%i-matrix_elements_mm)*mmsize]
77    %else
78        CAT_XDEFINE mx_stack_0_, %%i, 0
79        CAT_XDEFINE mx_0_, %%i, m %+ %%j
80        %assign %%j %%j+1
81    %endif
82    %assign %%i %%i+1
83%endrep
84%if stereo
85%assign %%i 0
86%rep in_channels
87    %if in_channels + %%i >= matrix_elements_mm
88        CAT_XDEFINE mx_stack_1_, %%i, 1
89        CAT_XDEFINE mx_1_, %%i, [rsp+(in_channels+%%i-matrix_elements_mm)*mmsize]
90    %else
91        CAT_XDEFINE mx_stack_1_, %%i, 0
92        CAT_XDEFINE mx_1_, %%i, m %+ %%j
93        %assign %%j %%j+1
94    %endif
95    %assign %%i %%i+1
96%endrep
97%endif
98
99; load/splat matrix coeffs
100%assign %%i 0
101%rep in_channels
102    %if mx_stack_0_ %+ %%i
103        VBROADCASTSS m0, [matrix0q+4*%%i]
104        mova  mx_0_ %+ %%i, m0
105    %else
106        VBROADCASTSS mx_0_ %+ %%i, [matrix0q+4*%%i]
107    %endif
108    %if stereo
109    %if mx_stack_1_ %+ %%i
110        VBROADCASTSS m0, [matrix1q+4*%%i]
111        mova  mx_1_ %+ %%i, m0
112    %else
113        VBROADCASTSS mx_1_ %+ %%i, [matrix1q+4*%%i]
114    %endif
115    %endif
116    %assign %%i %%i+1
117%endrep
118
119    lea          lenq, [4*r2d]
120    ; load channel pointers to registers
121%assign %%i 1
122%rep (in_channels - 1)
123    mov         src %+ %%i %+ q, [src0q+%%i*gprsize]
124    add         src %+ %%i %+ q, lenq
125    %assign %%i %%i+1
126%endrep
127    mov         src0q, [src0q]
128    add         src0q, lenq
129    neg          lenq
130.loop:
131    %if stereo || mx_stack_0_0
132    mova           m0, [src0q+lenq]
133    %endif
134    %if stereo
135    mulps          m1, m0, mx_1_0
136    %endif
137    %if stereo || mx_stack_0_0
138    mulps          m0, m0, mx_0_0
139    %else
140    mulps          m0, mx_0_0, [src0q+lenq]
141    %endif
142%assign %%i 1
143%rep (in_channels - 1)
144    %define src_ptr src %+ %%i %+ q
145    ; avoid extra load for mono if matrix is in a mm register
146    %if stereo || mx_stack_0_ %+ %%i
147    mova           m2, [src_ptr+lenq]
148    %endif
149    %if stereo
150    FMULADD_PS     m1, m2, mx_1_ %+ %%i, m1, m3
151    %endif
152    %if stereo || mx_stack_0_ %+ %%i
153    FMULADD_PS     m0, m2, mx_0_ %+ %%i, m0, m2
154    %else
155    FMULADD_PS     m0, mx_0_ %+ %%i, [src_ptr+lenq], m0, m1
156    %endif
157    %assign %%i %%i+1
158%endrep
159    mova [src0q+lenq], m0
160    %if stereo
161    mova [src1q+lenq], m1
162    %endif
163
164    add          lenq, mmsize
165    jl .loop
166    RET
167%endmacro
168
169%macro AC3_DOWNMIX_FUNCS 0
170%assign %%i 3
171%rep 4
172    INIT_XMM sse
173    AC3_DOWNMIX %%i, 1
174    AC3_DOWNMIX %%i, 2
175    INIT_YMM avx
176    AC3_DOWNMIX %%i, 1
177    AC3_DOWNMIX %%i, 2
178    %if HAVE_FMA3_EXTERNAL
179    INIT_YMM fma3
180    AC3_DOWNMIX %%i, 1
181    AC3_DOWNMIX %%i, 2
182    %endif
183    %assign %%i %%i+1
184%endrep
185%endmacro
186
187AC3_DOWNMIX_FUNCS
188