1 /*
2  * Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/>
3  *           (C) 2020 Vladimir Sadovnikov <sadko4u@gmail.com>
4  *
5  * This file is part of lsp-plugins
6  * Created on: 18 дек. 2019 г.
7  *
8  * lsp-plugins is free software: you can redistribute it and/or modify
9  * it under the terms of the GNU Lesser General Public License as published by
10  * the Free Software Foundation, either version 3 of the License, or
11  * any later version.
12  *
13  * lsp-plugins is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public License
19  * along with lsp-plugins. If not, see <https://www.gnu.org/licenses/>.
20  */
21 
22 #ifndef DSP_ARCH_X86_SSE_FFT_NORMALIZE_H_
23 #define DSP_ARCH_X86_SSE_FFT_NORMALIZE_H_
24 
25 #ifndef DSP_ARCH_X86_SSE_IMPL
26     #error "This header should not be included directly"
27 #endif /* DSP_ARCH_X86_SSE_IMPL */
28 
29 namespace sse
30 {
normalize_fft3(float * dre,float * dim,const float * re,const float * im,size_t rank)31     void normalize_fft3(float *dre, float *dim, const float *re, const float *im, size_t rank)
32     {
33         IF_ARCH_X86(
34             float k = 1.0f/(1 << rank);
35             size_t count = 1 << rank, off = 0;
36         );
37         ARCH_X86_ASM(
38             // x8 blocks
39             __ASM_EMIT  ("shufps        $0x00, %%xmm0, %%xmm0")                 // xmm0   = k
40             __ASM_EMIT32("subl          $8, %[count]")
41             __ASM_EMIT64("sub           $8, %[count]")
42             __ASM_EMIT  ("movaps        %%xmm0, %%xmm1")
43             __ASM_EMIT  ("jb            2f")
44             __ASM_EMIT  ("1:")
45             __ASM_EMIT  ("movups        0x00(%[s_re], %[off]), %%xmm4")
46             __ASM_EMIT  ("movups        0x10(%[s_re], %[off]), %%xmm5")
47             __ASM_EMIT  ("movups        0x00(%[s_im], %[off]), %%xmm6")
48             __ASM_EMIT  ("movups        0x10(%[s_im], %[off]), %%xmm7")
49             __ASM_EMIT  ("mulps         %%xmm0, %%xmm4")
50             __ASM_EMIT  ("mulps         %%xmm1, %%xmm5")
51             __ASM_EMIT  ("mulps         %%xmm0, %%xmm6")
52             __ASM_EMIT  ("mulps         %%xmm1, %%xmm7")
53             __ASM_EMIT  ("movups        %%xmm4, 0x00(%[d_re], %[off])")
54             __ASM_EMIT  ("movups        %%xmm5, 0x10(%[d_re], %[off])")
55             __ASM_EMIT  ("movups        %%xmm6, 0x00(%[d_im], %[off])")
56             __ASM_EMIT  ("movups        %%xmm7, 0x10(%[d_im], %[off])")
57             __ASM_EMIT  ("add           $0x20, %[off]")
58             __ASM_EMIT32("subl          $8, %[count]")
59             __ASM_EMIT64("sub           $8, %[count]")
60             __ASM_EMIT  ("jae           1b")
61             __ASM_EMIT  ("2:")
62             : [off] "+r" (off), [count] __ASM_ARG_RW(count),
63               [k] "+Yz" (k)
64             : [s_re] "r" (re), [s_im] "r" (im),
65               [d_re] "r" (dre), [d_im] "r" (dim)
66             : "cc", "memory",
67               "%xmm1",
68               "%xmm4", "%xmm5", "%xmm6", "%xmm7"
69         );
70     }
71 
normalize_fft2(float * re,float * im,size_t rank)72     void normalize_fft2(float *re, float *im, size_t rank)
73     {
74         IF_ARCH_X86(
75             float k = 1.0f/(1 << rank);
76             size_t count = 1 << rank, off = 0;
77         );
78         ARCH_X86_ASM(
79             // x8 blocks
80             __ASM_EMIT  ("shufps        $0x00, %%xmm0, %%xmm0")                 // xmm0   = k
81             __ASM_EMIT  ("sub           $8, %[count]")
82             __ASM_EMIT  ("movaps        %%xmm0, %%xmm1")
83             __ASM_EMIT  ("jb            2f")
84             __ASM_EMIT  ("1:")
85             __ASM_EMIT  ("movups        0x00(%[d_re], %[off]), %%xmm4")
86             __ASM_EMIT  ("movups        0x10(%[d_re], %[off]), %%xmm5")
87             __ASM_EMIT  ("movups        0x00(%[d_im], %[off]), %%xmm6")
88             __ASM_EMIT  ("movups        0x10(%[d_im], %[off]), %%xmm7")
89             __ASM_EMIT  ("mulps         %%xmm0, %%xmm4")
90             __ASM_EMIT  ("mulps         %%xmm1, %%xmm5")
91             __ASM_EMIT  ("mulps         %%xmm0, %%xmm6")
92             __ASM_EMIT  ("mulps         %%xmm1, %%xmm7")
93             __ASM_EMIT  ("movups        %%xmm4, 0x00(%[d_re], %[off])")
94             __ASM_EMIT  ("movups        %%xmm5, 0x10(%[d_re], %[off])")
95             __ASM_EMIT  ("movups        %%xmm6, 0x00(%[d_im], %[off])")
96             __ASM_EMIT  ("movups        %%xmm7, 0x10(%[d_im], %[off])")
97             __ASM_EMIT  ("add           $0x20, %[off]")
98             __ASM_EMIT  ("sub           $8, %[count]")
99             __ASM_EMIT  ("jae           1b")
100             __ASM_EMIT  ("2:")
101             : [off] "+r" (off), [count] "+r" (count),
102               [k] "+Yz" (k)
103             : [d_re] "r" (re), [d_im] "r" (im)
104             : "cc", "memory",
105               "%xmm1",
106               "%xmm4", "%xmm5", "%xmm6", "%xmm7"
107         );
108     }
109 }
110 
111 #endif /* DSP_ARCH_X86_SSE_FFT_NORMALIZE_H_ */
112