1 /*
2  * Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/>
3  *           (C) 2020 Vladimir Sadovnikov <sadko4u@gmail.com>
4  *
5  * This file is part of lsp-plugins
6  * Created on: 05 окт. 2015 г.
7  *
8  * lsp-plugins is free software: you can redistribute it and/or modify
9  * it under the terms of the GNU Lesser General Public License as published by
10  * the Free Software Foundation, either version 3 of the License, or
11  * any later version.
12  *
13  * lsp-plugins is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public License
19  * along with lsp-plugins. If not, see <https://www.gnu.org/licenses/>.
20  */
21 
22 #ifndef DSP_ARCH_X86_SSE_FFT_H_
23 #define DSP_ARCH_X86_SSE_FFT_H_
24 
25 #ifndef DSP_ARCH_X86_SSE_IMPL
26     #error "This header should not be included directly"
27 #endif /* DSP_ARCH_X86_SSE_IMPL */
28 
29 #include <dsp/arch/x86/sse/fft/const.h>
30 #include <dsp/arch/x86/sse/fft/butterfly.h>
31 #include <dsp/arch/x86/sse/fft/p_butterfly.h>
32 #include <dsp/arch/x86/sse/fft/normalize.h>
33 
34 // Use 8-bit-reverse algorithm
35 #define FFT_SCRAMBLE_SELF_DIRECT_NAME   scramble_self_direct8
36 #define FFT_SCRAMBLE_SELF_REVERSE_NAME  scramble_self_reverse8
37 #define FFT_SCRAMBLE_COPY_DIRECT_NAME   scramble_copy_direct8
38 #define FFT_SCRAMBLE_COPY_REVERSE_NAME  scramble_copy_reverse8
39 #define FFT_TYPE                        uint8_t
40 #include <dsp/arch/x86/sse/fft/scramble.h>
41 
42 #define FFT_SCRAMBLE_SELF_DIRECT_NAME   packed_scramble_self_direct8
43 #define FFT_SCRAMBLE_SELF_REVERSE_NAME  packed_scramble_self_reverse8
44 #define FFT_SCRAMBLE_COPY_DIRECT_NAME   packed_scramble_copy_direct8
45 #define FFT_SCRAMBLE_COPY_REVERSE_NAME  packed_scramble_copy_reverse8
46 #define FFT_TYPE                        uint8_t
47 #include <dsp/arch/x86/sse/fft/p_scramble.h>
48 
49 // Use 16-bit-reverse algorithm
50 #define FFT_SCRAMBLE_SELF_DIRECT_NAME   scramble_self_direct16
51 #define FFT_SCRAMBLE_SELF_REVERSE_NAME  scramble_self_reverse16
52 #define FFT_SCRAMBLE_COPY_DIRECT_NAME   scramble_copy_direct16
53 #define FFT_SCRAMBLE_COPY_REVERSE_NAME  scramble_copy_reverse16
54 #define FFT_TYPE                        uint16_t
55 #include <dsp/arch/x86/sse/fft/scramble.h>
56 
57 #define FFT_SCRAMBLE_SELF_DIRECT_NAME   packed_scramble_self_direct16
58 #define FFT_SCRAMBLE_SELF_REVERSE_NAME  packed_scramble_self_reverse16
59 #define FFT_SCRAMBLE_COPY_DIRECT_NAME   packed_scramble_copy_direct16
60 #define FFT_SCRAMBLE_COPY_REVERSE_NAME  packed_scramble_copy_reverse16
61 #define FFT_TYPE                        uint16_t
62 #include <dsp/arch/x86/sse/fft/p_scramble.h>
63 
64 // Make set of scramble-switch implementations
65 #define FFT_SCRAMBLE_SELF_DIRECT_NAME       scramble_self_direct
66 #define FFT_SCRAMBLE_COPY_DIRECT_NAME       scramble_copy_direct
67 #define FFT_SCRAMBLE_SELF_REVERSE_NAME      scramble_self_reverse
68 #define FFT_SCRAMBLE_COPY_REVERSE_NAME      scramble_copy_reverse
69 #define FFT_SCRAMBLE_DIRECT_NAME            scramble_direct
70 #define FFT_SCRAMBLE_REVERSE_NAME           scramble_reverse
71 #include <dsp/arch/x86/sse/fft/switch.h>
72 
73 #define FFT_SCRAMBLE_SELF_DIRECT_NAME       packed_scramble_self_direct
74 #define FFT_SCRAMBLE_COPY_DIRECT_NAME       packed_scramble_copy_direct
75 #define FFT_SCRAMBLE_SELF_REVERSE_NAME      packed_scramble_self_reverse
76 #define FFT_SCRAMBLE_COPY_REVERSE_NAME      packed_scramble_copy_reverse
77 #define FFT_SCRAMBLE_DIRECT_NAME            packed_scramble_direct
78 #define FFT_SCRAMBLE_REVERSE_NAME           packed_scramble_reverse
79 #define FFT_REPACK                          packed_fft_repack
80 #define FFT_REPACK_NORMALIZE                packed_fft_repack_normalize
81 #include <dsp/arch/x86/sse/fft/p_switch.h>
82 
83 namespace sse
84 {
85     // Make set of scramble implementations
direct_fft(float * dst_re,float * dst_im,const float * src_re,const float * src_im,size_t rank)86     void direct_fft(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank)
87     {
88         // Check bounds
89         if (rank <= 2)
90         {
91             if (rank == 2)
92             {
93                 float s0_re     = src_re[0] + src_re[1];
94                 float s1_re     = src_re[0] - src_re[1];
95                 float s2_re     = src_re[2] + src_re[3];
96                 float s3_re     = src_re[2] - src_re[3];
97 
98                 float s0_im     = src_im[0] + src_im[1];
99                 float s1_im     = src_im[0] - src_im[1];
100                 float s2_im     = src_im[2] + src_im[3];
101                 float s3_im     = src_im[2] - src_im[3];
102 
103                 dst_re[0]       = s0_re + s2_re;
104                 dst_re[1]       = s1_re + s3_im;
105                 dst_re[2]       = s0_re - s2_re;
106                 dst_re[3]       = s1_re - s3_im;
107 
108                 dst_im[0]       = s0_im + s2_im;
109                 dst_im[1]       = s1_im - s3_re;
110                 dst_im[2]       = s0_im - s2_im;
111                 dst_im[3]       = s1_im + s3_re;
112             }
113             else if (rank == 1)
114             {
115                 // s0' = s0 + s1
116                 // s1' = s0 - s1
117                 float s1_re     = src_re[1];
118                 float s1_im     = src_im[1];
119                 dst_re[1]       = src_re[0] - s1_re;
120                 dst_im[1]       = src_im[0] - s1_im;
121                 dst_re[0]       = src_re[0] + s1_re;
122                 dst_im[0]       = src_im[0] + s1_im;
123             }
124             else
125             {
126                 dst_re[0]       = src_re[0];
127                 dst_im[0]       = src_im[0];
128             }
129             return;
130         }
131 
132         scramble_direct(dst_re, dst_im, src_re, src_im, rank);
133 
134         for (size_t i=2; i < rank; ++i)
135             butterfly_direct(dst_re, dst_im, i, 1 << (rank - i - 1));
136     }
137 
packed_direct_fft(float * dst,const float * src,size_t rank)138     void packed_direct_fft(float *dst, const float *src, size_t rank)
139     {
140         // Check bounds
141         if (rank <= 2)
142         {
143             if (rank == 2)
144             {
145                 float s0_re     = dst[0] + dst[2];
146                 float s1_re     = dst[0] - dst[2];
147                 float s0_im     = dst[1] + dst[3];
148                 float s1_im     = dst[1] - dst[3];
149 
150                 float s2_re     = dst[4] + dst[6];
151                 float s3_re     = dst[4] - dst[6];
152                 float s2_im     = dst[5] + dst[7];
153                 float s3_im     = dst[5] - dst[7];
154 
155                 dst[0]          = s0_re + s2_re;
156                 dst[1]          = s0_im + s2_im;
157                 dst[2]          = s1_re + s3_im;
158                 dst[3]          = s1_im - s3_re;
159 
160                 dst[4]          = s0_re - s2_re;
161                 dst[5]          = s0_im - s2_im;
162                 dst[6]          = s1_re - s3_im;
163                 dst[7]          = s1_im + s3_re;
164             }
165             else if (rank == 1)
166             {
167                 // s0' = s0 + s1
168                 // s1' = s0 - s1
169                 float s1_re     = src[2];
170                 float s1_im     = src[3];
171                 dst[2]          = src[0] - s1_re;
172                 dst[3]          = src[1] - s1_im;
173                 dst[0]          = src[0] + s1_re;
174                 dst[1]          = src[1] + s1_im;
175             }
176             else
177             {
178                 dst[0]          = src[0];
179                 dst[1]          = src[1];
180             }
181             return;
182         }
183 
184         // Iterate butterflies
185         packed_scramble_direct(dst, src, rank);
186 
187         for (size_t i=2; i < rank; ++i)
188             packed_butterfly_direct(dst, i, 1 << (rank - i - 1));
189 
190         packed_fft_repack(dst, rank);
191     }
192 
reverse_fft(float * dst_re,float * dst_im,const float * src_re,const float * src_im,size_t rank)193     void reverse_fft(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank)
194     {
195         // Check bounds
196         if (rank <= 2)
197         {
198             if (rank == 2)
199             {
200                 float s0_re     = src_re[0] + src_re[1];
201                 float s1_re     = src_re[0] - src_re[1];
202                 float s2_re     = src_re[2] + src_re[3];
203                 float s3_re     = src_re[2] - src_re[3];
204 
205                 float s0_im     = src_im[0] + src_im[1];
206                 float s1_im     = src_im[0] - src_im[1];
207                 float s2_im     = src_im[2] + src_im[3];
208                 float s3_im     = src_im[2] - src_im[3];
209 
210                 dst_re[0]       = (s0_re + s2_re)*0.25f;
211                 dst_re[1]       = (s1_re - s3_im)*0.25f;
212                 dst_re[2]       = (s0_re - s2_re)*0.25f;
213                 dst_re[3]       = (s1_re + s3_im)*0.25f;
214 
215                 dst_im[0]       = (s0_im + s2_im)*0.25f;
216                 dst_im[1]       = (s1_im + s3_re)*0.25f;
217                 dst_im[2]       = (s0_im - s2_im)*0.25f;
218                 dst_im[3]       = (s1_im - s3_re)*0.25f;
219             }
220             else if (rank == 1)
221             {
222                 // s0' = s0 + s1
223                 // s1' = s0 - s1
224                 float s1_re     = src_re[1];
225                 float s1_im     = src_im[1];
226                 dst_re[1]       = (src_re[0] - s1_re) * 0.5f;
227                 dst_im[1]       = (src_im[0] - s1_im) * 0.5f;
228                 dst_re[0]       = (src_re[0] + s1_re) * 0.5f;
229                 dst_im[0]       = (src_im[0] + s1_im) * 0.5f;
230             }
231             else
232             {
233                 dst_re[0]       = src_re[0];
234                 dst_im[0]       = src_im[0];
235             }
236             return;
237         }
238 
239         // Iterate butterflies
240         scramble_reverse(dst_re, dst_im, src_re, src_im, rank);
241 
242         for (size_t i=2; i < rank; ++i)
243             butterfly_reverse(dst_re, dst_im, i, 1 << (rank - i - 1));
244 
245         dsp::normalize_fft2(dst_re, dst_im, rank);
246     }
247 
packed_reverse_fft(float * dst,const float * src,size_t rank)248     void packed_reverse_fft(float *dst, const float *src, size_t rank)
249     {
250         // Check bounds
251         if (rank <= 2)
252         {
253             if (rank == 2)
254             {
255                 float s0_re     = src[0] + src[2];
256                 float s1_re     = src[0] - src[2];
257                 float s2_re     = src[4] + src[6];
258                 float s3_re     = src[4] - src[6];
259 
260                 float s0_im     = src[1] + src[3];
261                 float s1_im     = src[1] - src[3];
262                 float s2_im     = src[5] + src[7];
263                 float s3_im     = src[5] - src[7];
264 
265                 dst[0]          = (s0_re + s2_re)*0.25f;
266                 dst[1]          = (s0_im + s2_im)*0.25f;
267                 dst[2]          = (s1_re - s3_im)*0.25f;
268                 dst[3]          = (s1_im + s3_re)*0.25f;
269 
270                 dst[4]          = (s0_re - s2_re)*0.25f;
271                 dst[5]          = (s0_im - s2_im)*0.25f;
272                 dst[6]          = (s1_re + s3_im)*0.25f;
273                 dst[7]          = (s1_im - s3_re)*0.25f;
274             }
275             else if (rank == 1)
276             {
277                 // s0' = s0 + s1
278                 // s1' = s0 - s1
279                 float s1_re     = src[2];
280                 float s1_im     = src[3];
281                 dst[2]          = src[0] - s1_re;
282                 dst[3]          = src[1] - s1_im;
283                 dst[0]          = src[0] + s1_re;
284                 dst[1]          = src[1] + s1_im;
285             }
286             else
287             {
288                 dst[0]          = src[0];
289                 dst[1]          = src[1];
290             }
291             return;
292         }
293 
294         // Iterate butterflies
295         packed_scramble_reverse(dst, src, rank);
296 
297         for (size_t i=2; i < rank; ++i)
298             packed_butterfly_reverse(dst, i, 1 << (rank - i - 1));
299 
300         packed_fft_repack_normalize(dst, rank);
301     }
302 }
303 
304 #endif /* DSP_ARCH_X86_SSE_FFT_H_ */
305