1 /*
2  * Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/>
3  *           (C) 2020 Vladimir Sadovnikov <sadko4u@gmail.com>
4  *
5  * This file is part of lsp-plugins
6  * Created on: 9 дек. 2019 г.
7  *
8  * lsp-plugins is free software: you can redistribute it and/or modify
9  * it under the terms of the GNU Lesser General Public License as published by
10  * the Free Software Foundation, either version 3 of the License, or
11  * any later version.
12  *
13  * lsp-plugins is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public License
19  * along with lsp-plugins. If not, see <https://www.gnu.org/licenses/>.
20  */
21 
22 #ifndef DSP_ARCH_X86_AVX_FFT_H_
23 #define DSP_ARCH_X86_AVX_FFT_H_
24 
25 #ifndef DSP_ARCH_X86_AVX_IMPL
26     #error "This header should not be included directly"
27 #endif /* DSP_ARCH_X86_AVX_IMPL */
28 
29 #include <dsp/arch/x86/avx/fft/const.h>
30 #include <dsp/arch/x86/avx/fft/butterfly.h>
31 #include <dsp/arch/x86/avx/fft/normalize.h>
32 
33 #define FFT_SCRAMBLE_SELF_DIRECT_NAME   scramble_self_direct8
34 #define FFT_SCRAMBLE_SELF_REVERSE_NAME  scramble_self_reverse8
35 #define FFT_SCRAMBLE_COPY_DIRECT_NAME   scramble_copy_direct8
36 #define FFT_SCRAMBLE_COPY_REVERSE_NAME  scramble_copy_reverse8
37 #define FFT_TYPE                        uint8_t
38 #define FFT_FMA(a, b)                   a
39 #include <dsp/arch/x86/avx/fft/scramble.h>
40 
41 #define FFT_SCRAMBLE_SELF_DIRECT_NAME   scramble_self_direct16
42 #define FFT_SCRAMBLE_SELF_REVERSE_NAME  scramble_self_reverse16
43 #define FFT_SCRAMBLE_COPY_DIRECT_NAME   scramble_copy_direct16
44 #define FFT_SCRAMBLE_COPY_REVERSE_NAME  scramble_copy_reverse16
45 #define FFT_TYPE                        uint16_t
46 #define FFT_FMA(a, b)                   a
47 #include <dsp/arch/x86/avx/fft/scramble.h>
48 
49 #define FFT_SCRAMBLE_SELF_DIRECT_NAME   scramble_self_direct8_fma3
50 #define FFT_SCRAMBLE_SELF_REVERSE_NAME  scramble_self_reverse8_fma3
51 #define FFT_SCRAMBLE_COPY_DIRECT_NAME   scramble_copy_direct8_fma3
52 #define FFT_SCRAMBLE_COPY_REVERSE_NAME  scramble_copy_reverse8_fma3
53 #define FFT_TYPE                        uint8_t
54 #define FFT_FMA(a, b)                   b
55 #include <dsp/arch/x86/avx/fft/scramble.h>
56 
57 #define FFT_SCRAMBLE_SELF_DIRECT_NAME   scramble_self_direct16_fma3
58 #define FFT_SCRAMBLE_SELF_REVERSE_NAME  scramble_self_reverse16_fma3
59 #define FFT_SCRAMBLE_COPY_DIRECT_NAME   scramble_copy_direct16_fma3
60 #define FFT_SCRAMBLE_COPY_REVERSE_NAME  scramble_copy_reverse16_fma3
61 #define FFT_TYPE                        uint16_t
62 #define FFT_FMA(a, b)                   b
63 #include <dsp/arch/x86/avx/fft/scramble.h>
64 
65 namespace avx
66 {
small_direct_fft(float * dst_re,float * dst_im,const float * src_re,const float * src_im,size_t rank)67     static inline void small_direct_fft(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank)
68     {
69         if (rank == 2)
70         {
71             float s0_re     = src_re[0] + src_re[1];
72             float s1_re     = src_re[0] - src_re[1];
73             float s2_re     = src_re[2] + src_re[3];
74             float s3_re     = src_re[2] - src_re[3];
75 
76             float s0_im     = src_im[0] + src_im[1];
77             float s1_im     = src_im[0] - src_im[1];
78             float s2_im     = src_im[2] + src_im[3];
79             float s3_im     = src_im[2] - src_im[3];
80 
81             dst_re[0]       = s0_re + s2_re;
82             dst_re[1]       = s1_re + s3_im;
83             dst_re[2]       = s0_re - s2_re;
84             dst_re[3]       = s1_re - s3_im;
85 
86             dst_im[0]       = s0_im + s2_im;
87             dst_im[1]       = s1_im - s3_re;
88             dst_im[2]       = s0_im - s2_im;
89             dst_im[3]       = s1_im + s3_re;
90         }
91         else if (rank == 1)
92         {
93             // s0' = s0 + s1
94             // s1' = s0 - s1
95             float s1_re     = src_re[1];
96             float s1_im     = src_im[1];
97             dst_re[1]       = src_re[0] - s1_re;
98             dst_im[1]       = src_im[0] - s1_im;
99             dst_re[0]       = src_re[0] + s1_re;
100             dst_im[0]       = src_im[0] + s1_im;
101         }
102         else
103         {
104             dst_re[0]       = src_re[0];
105             dst_im[0]       = src_im[0];
106         }
107     }
108 
small_reverse_fft(float * dst_re,float * dst_im,const float * src_re,const float * src_im,size_t rank)109     static inline void small_reverse_fft(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank)
110     {
111         if (rank == 2)
112         {
113             float s0_re     = src_re[0] + src_re[1];
114             float s1_re     = src_re[0] - src_re[1];
115             float s2_re     = src_re[2] + src_re[3];
116             float s3_re     = src_re[2] - src_re[3];
117 
118             float s0_im     = src_im[0] + src_im[1];
119             float s1_im     = src_im[0] - src_im[1];
120             float s2_im     = src_im[2] + src_im[3];
121             float s3_im     = src_im[2] - src_im[3];
122 
123             dst_re[0]       = s0_re + s2_re;
124             dst_re[1]       = s1_re + s3_im;
125             dst_re[2]       = s0_re - s2_re;
126             dst_re[3]       = s1_re - s3_im;
127 
128             dst_im[0]       = s0_im + s2_im;
129             dst_im[1]       = s1_im - s3_re;
130             dst_im[2]       = s0_im - s2_im;
131             dst_im[3]       = s1_im + s3_re;
132         }
133         else if (rank == 1)
134         {
135             // s0' = s0 + s1
136             // s1' = s0 - s1
137             float s1_re     = src_re[1];
138             float s1_im     = src_im[1];
139             dst_re[1]       = src_re[0] - s1_re;
140             dst_im[1]       = src_im[0] - s1_im;
141             dst_re[0]       = src_re[0] + s1_re;
142             dst_im[0]       = src_im[0] + s1_im;
143         }
144         else
145         {
146             dst_re[0]       = src_re[0];
147             dst_im[0]       = src_im[0];
148         }
149     }
150 
direct_fft(float * dst_re,float * dst_im,const float * src_re,const float * src_im,size_t rank)151     void direct_fft(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank)
152     {
153         // Check bounds
154         if (rank <= 2)
155         {
156             small_direct_fft(dst_re, dst_im, src_re, src_im, rank);
157             return;
158         }
159 
160         if ((dst_re == src_re) || (dst_im == src_im) || (rank < 4))
161         {
162             dsp::move(dst_re, src_re, 1 << rank);
163             dsp::move(dst_im, src_im, 1 << rank);
164             if (rank <= 8)
165                 scramble_self_direct8(dst_re, dst_im, rank);
166             else
167                 scramble_self_direct16(dst_re, dst_im, rank);
168         }
169         else
170         {
171             if (rank <= 12)
172                 scramble_copy_direct8(dst_re, dst_im, src_re, src_im, rank-4);
173             else
174                 scramble_copy_direct16(dst_re, dst_im, src_re, src_im, rank-4);
175         }
176 
177         for (size_t i=3; i < rank; ++i)
178             butterfly_direct8p(dst_re, dst_im, i, 1 << (rank - i - 1));
179     }
180 
direct_fft_fma3(float * dst_re,float * dst_im,const float * src_re,const float * src_im,size_t rank)181     void direct_fft_fma3(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank)
182     {
183         // Check bounds
184         if (rank <= 2)
185         {
186             small_direct_fft(dst_re, dst_im, src_re, src_im, rank);
187             return;
188         }
189 
190         if ((dst_re == src_re) || (dst_im == src_im) || (rank < 4))
191         {
192             dsp::move(dst_re, src_re, 1 << rank);
193             dsp::move(dst_im, src_im, 1 << rank);
194             if (rank <= 8)
195                 scramble_self_direct8_fma3(dst_re, dst_im, rank);
196             else
197                 scramble_self_direct16_fma3(dst_re, dst_im, rank);
198         }
199         else
200         {
201             if (rank <= 12)
202                 scramble_copy_direct8_fma3(dst_re, dst_im, src_re, src_im, rank-4);
203             else
204                 scramble_copy_direct16_fma3(dst_re, dst_im, src_re, src_im, rank-4);
205         }
206 
207         for (size_t i=3; i < rank; ++i)
208             butterfly_direct8p_fma3(dst_re, dst_im, i, 1 << (rank - i - 1));
209     }
210 
reverse_fft(float * dst_re,float * dst_im,const float * src_re,const float * src_im,size_t rank)211     void reverse_fft(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank)
212     {
213         // Check bounds
214         if (rank <= 2)
215         {
216             small_reverse_fft(dst_re, dst_im, src_re, src_im, rank);
217             return;
218         }
219 
220         if ((dst_re == src_re) || (dst_im == src_im) || (rank < 4))
221         {
222             dsp::move(dst_re, src_re, 1 << rank);
223             dsp::move(dst_im, src_im, 1 << rank);
224             if (rank <= 8)
225                 scramble_self_reverse8(dst_re, dst_im, rank);
226             else
227                 scramble_self_reverse16(dst_re, dst_im, rank);
228         }
229         else
230         {
231             if (rank <= 12)
232                 scramble_copy_reverse8(dst_re, dst_im, src_re, src_im, rank-4);
233             else
234                 scramble_copy_reverse16(dst_re, dst_im, src_re, src_im, rank-4);
235         }
236 
237         for (size_t i=3; i < rank; ++i)
238             butterfly_reverse8p(dst_re, dst_im, i, 1 << (rank - i - 1));
239 
240         dsp::normalize_fft2(dst_re, dst_im, rank);
241     }
242 
reverse_fft_fma3(float * dst_re,float * dst_im,const float * src_re,const float * src_im,size_t rank)243     void reverse_fft_fma3(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank)
244     {
245         // Check bounds
246         if (rank <= 2)
247         {
248             small_reverse_fft(dst_re, dst_im, src_re, src_im, rank);
249             return;
250         }
251 
252         if ((dst_re == src_re) || (dst_im == src_im) || (rank < 4))
253         {
254             dsp::move(dst_re, src_re, 1 << rank);
255             dsp::move(dst_im, src_im, 1 << rank);
256             if (rank <= 8)
257                 scramble_self_reverse8_fma3(dst_re, dst_im, rank);
258             else
259                 scramble_self_reverse16_fma3(dst_re, dst_im, rank);
260         }
261         else
262         {
263             if (rank <= 12)
264                 scramble_copy_reverse8_fma3(dst_re, dst_im, src_re, src_im, rank-4);
265             else
266                 scramble_copy_reverse16_fma3(dst_re, dst_im, src_re, src_im, rank-4);
267         }
268 
269         for (size_t i=3; i < rank; ++i)
270             butterfly_reverse8p_fma3(dst_re, dst_im, i, 1 << (rank - i - 1));
271 
272         dsp::normalize_fft2(dst_re, dst_im, rank);
273     }
274 }
275 
276 #endif /* DSP_ARCH_X86_AVX_FFT_H_ */
277