1 /* 2 * Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/> 3 * (C) 2020 Vladimir Sadovnikov <sadko4u@gmail.com> 4 * 5 * This file is part of lsp-plugins 6 * Created on: 18 дек. 2019 г. 7 * 8 * lsp-plugins is free software: you can redistribute it and/or modify 9 * it under the terms of the GNU Lesser General Public License as published by 10 * the Free Software Foundation, either version 3 of the License, or 11 * any later version. 12 * 13 * lsp-plugins is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public License 19 * along with lsp-plugins. If not, see <https://www.gnu.org/licenses/>. 20 */ 21 22 #ifndef DSP_ARCH_X86_SSE_FFT_NORMALIZE_H_ 23 #define DSP_ARCH_X86_SSE_FFT_NORMALIZE_H_ 24 25 #ifndef DSP_ARCH_X86_SSE_IMPL 26 #error "This header should not be included directly" 27 #endif /* DSP_ARCH_X86_SSE_IMPL */ 28 29 namespace sse 30 { normalize_fft3(float * dre,float * dim,const float * re,const float * im,size_t rank)31 void normalize_fft3(float *dre, float *dim, const float *re, const float *im, size_t rank) 32 { 33 IF_ARCH_X86( 34 float k = 1.0f/(1 << rank); 35 size_t count = 1 << rank, off = 0; 36 ); 37 ARCH_X86_ASM( 38 // x8 blocks 39 __ASM_EMIT ("shufps $0x00, %%xmm0, %%xmm0") // xmm0 = k 40 __ASM_EMIT32("subl $8, %[count]") 41 __ASM_EMIT64("sub $8, %[count]") 42 __ASM_EMIT ("movaps %%xmm0, %%xmm1") 43 __ASM_EMIT ("jb 2f") 44 __ASM_EMIT ("1:") 45 __ASM_EMIT ("movups 0x00(%[s_re], %[off]), %%xmm4") 46 __ASM_EMIT ("movups 0x10(%[s_re], %[off]), %%xmm5") 47 __ASM_EMIT ("movups 0x00(%[s_im], %[off]), %%xmm6") 48 __ASM_EMIT ("movups 0x10(%[s_im], %[off]), %%xmm7") 49 __ASM_EMIT ("mulps %%xmm0, %%xmm4") 50 __ASM_EMIT ("mulps %%xmm1, %%xmm5") 51 __ASM_EMIT ("mulps %%xmm0, %%xmm6") 52 __ASM_EMIT ("mulps %%xmm1, %%xmm7") 53 __ASM_EMIT ("movups %%xmm4, 0x00(%[d_re], %[off])") 54 __ASM_EMIT ("movups %%xmm5, 0x10(%[d_re], %[off])") 55 __ASM_EMIT ("movups %%xmm6, 0x00(%[d_im], %[off])") 56 __ASM_EMIT ("movups %%xmm7, 0x10(%[d_im], %[off])") 57 __ASM_EMIT ("add $0x20, %[off]") 58 __ASM_EMIT32("subl $8, %[count]") 59 __ASM_EMIT64("sub $8, %[count]") 60 __ASM_EMIT ("jae 1b") 61 __ASM_EMIT ("2:") 62 : [off] "+r" (off), [count] __ASM_ARG_RW(count), 63 [k] "+Yz" (k) 64 : [s_re] "r" (re), [s_im] "r" (im), 65 [d_re] "r" (dre), [d_im] "r" (dim) 66 : "cc", "memory", 67 "%xmm1", 68 "%xmm4", "%xmm5", "%xmm6", "%xmm7" 69 ); 70 } 71 normalize_fft2(float * re,float * im,size_t rank)72 void normalize_fft2(float *re, float *im, size_t rank) 73 { 74 IF_ARCH_X86( 75 float k = 1.0f/(1 << rank); 76 size_t count = 1 << rank, off = 0; 77 ); 78 ARCH_X86_ASM( 79 // x8 blocks 80 __ASM_EMIT ("shufps $0x00, %%xmm0, %%xmm0") // xmm0 = k 81 __ASM_EMIT ("sub $8, %[count]") 82 __ASM_EMIT ("movaps %%xmm0, %%xmm1") 83 __ASM_EMIT ("jb 2f") 84 __ASM_EMIT ("1:") 85 __ASM_EMIT ("movups 0x00(%[d_re], %[off]), %%xmm4") 86 __ASM_EMIT ("movups 0x10(%[d_re], %[off]), %%xmm5") 87 __ASM_EMIT ("movups 0x00(%[d_im], %[off]), %%xmm6") 88 __ASM_EMIT ("movups 0x10(%[d_im], %[off]), %%xmm7") 89 __ASM_EMIT ("mulps %%xmm0, %%xmm4") 90 __ASM_EMIT ("mulps %%xmm1, %%xmm5") 91 __ASM_EMIT ("mulps %%xmm0, %%xmm6") 92 __ASM_EMIT ("mulps %%xmm1, %%xmm7") 93 __ASM_EMIT ("movups %%xmm4, 0x00(%[d_re], %[off])") 94 __ASM_EMIT ("movups %%xmm5, 0x10(%[d_re], %[off])") 95 __ASM_EMIT ("movups %%xmm6, 0x00(%[d_im], %[off])") 96 __ASM_EMIT ("movups %%xmm7, 0x10(%[d_im], %[off])") 97 __ASM_EMIT ("add $0x20, %[off]") 98 __ASM_EMIT ("sub $8, %[count]") 99 __ASM_EMIT ("jae 1b") 100 __ASM_EMIT ("2:") 101 : [off] "+r" (off), [count] "+r" (count), 102 [k] "+Yz" (k) 103 : [d_re] "r" (re), [d_im] "r" (im) 104 : "cc", "memory", 105 "%xmm1", 106 "%xmm4", "%xmm5", "%xmm6", "%xmm7" 107 ); 108 } 109 } 110 111 #endif /* DSP_ARCH_X86_SSE_FFT_NORMALIZE_H_ */ 112