1 /* 2 * Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/> 3 * (C) 2020 Vladimir Sadovnikov <sadko4u@gmail.com> 4 * 5 * This file is part of lsp-plugins 6 * Created on: 2 янв. 2020 г. 7 * 8 * lsp-plugins is free software: you can redistribute it and/or modify 9 * it under the terms of the GNU Lesser General Public License as published by 10 * the Free Software Foundation, either version 3 of the License, or 11 * any later version. 12 * 13 * lsp-plugins is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public License 19 * along with lsp-plugins. If not, see <https://www.gnu.org/licenses/>. 20 */ 21 22 #ifndef DSP_ARCH_X86_SSE_FILTERS_TRANSFER_H_ 23 #define DSP_ARCH_X86_SSE_FILTERS_TRANSFER_H_ 24 25 #ifndef DSP_ARCH_X86_SSE_IMPL 26 #error "This header should not be included directly" 27 #endif /* DSP_ARCH_X86_SSE_IMPL */ 28 29 namespace sse 30 { 31 #define F_UNPACK \ 32 __ASM_EMIT("movups 0x00(%[c]), %%xmm0") /* x0 = t0 t1 t2 t3 */ \ 33 __ASM_EMIT("movups 0x10(%[c]), %%xmm4") /* x4 = b0 b1 b2 b3 */ \ 34 __ASM_EMIT("movaps %%xmm0, %%xmm2") /* x3 = t0 t1 t2 t3 */ \ 35 __ASM_EMIT("movaps %%xmm4, %%xmm6") /* x6 = b0 b1 b2 b3 */ \ 36 __ASM_EMIT("unpcklps %%xmm0, %%xmm0") /* x0 = t0 t0 t1 t1 */ \ 37 __ASM_EMIT("unpcklps %%xmm4, %%xmm4") /* x4 = b0 b0 b1 b1 */ \ 38 __ASM_EMIT("unpckhps %%xmm2, %%xmm2") /* x2 = t2 t2 t3 t3 */ \ 39 __ASM_EMIT("unpckhps %%xmm6, %%xmm6") /* x6 = b2 b2 b3 b3 */ \ 40 __ASM_EMIT("movaps %%xmm0, %%xmm1") /* x1 = t0 t1 t2 t3 */ \ 41 __ASM_EMIT("movaps %%xmm4, %%xmm5") /* x5 = b0 b1 b2 b3 */ \ 42 __ASM_EMIT("unpcklps %%xmm0, %%xmm0") /* x0 = t0 t0 t0 t0 */ \ 43 __ASM_EMIT("unpcklps %%xmm4, %%xmm4") /* x4 = b0 b0 b0 b0 */ \ 44 __ASM_EMIT("unpckhps %%xmm1, %%xmm1") /* x1 = t1 t1 t1 t1 */ \ 45 __ASM_EMIT("unpckhps %%xmm5, %%xmm5") /* x5 = b1 b1 b1 b1 */ \ 46 __ASM_EMIT("unpcklps %%xmm2, %%xmm2") /* x2 = t2 t2 t2 t2 */ \ 47 __ASM_EMIT("unpcklps %%xmm6, %%xmm6") /* x6 = b2 b2 b2 b2 */ \ 48 __ASM_EMIT("movaps %%xmm0, 0x00 + %[fp]") /* x0 = t0 */ \ 49 __ASM_EMIT("movaps %%xmm1, 0x10 + %[fp]") /* x1 = t1 */ \ 50 __ASM_EMIT("movaps %%xmm2, 0x20 + %[fp]") /* x2 = t2 */ \ 51 __ASM_EMIT("movaps %%xmm4, 0x30 + %[fp]") /* x4 = b0 */ \ 52 __ASM_EMIT("movaps %%xmm5, 0x40 + %[fp]") /* x5 = b1 */ \ 53 __ASM_EMIT("movaps %%xmm6, 0x50 + %[fp]") /* x6 = b2 */ 54 55 #define F_LOAD \ 56 __ASM_EMIT("movaps 0x00 + %[fp], %%xmm0") /* x0 = t0 */ \ 57 __ASM_EMIT("movaps 0x10 + %[fp], %%xmm1") /* x1 = t1 */ \ 58 __ASM_EMIT("movaps 0x20 + %[fp], %%xmm2") /* x2 = t2 */ \ 59 __ASM_EMIT("movaps 0x30 + %[fp], %%xmm4") /* x4 = b0 */ \ 60 __ASM_EMIT("movaps 0x40 + %[fp], %%xmm5") /* x5 = b1 */ \ 61 __ASM_EMIT("movaps 0x50 + %[fp], %%xmm6") /* x6 = b2 */ 62 63 #define HF_CORE \ 64 /* Compute H[f] */ \ 65 __ASM_EMIT("movaps %%xmm3, %%xmm7") /* x7 = f */ \ 66 __ASM_EMIT("mulps %%xmm3, %%xmm1") /* x1 = t_im = t1 * f */ \ 67 __ASM_EMIT("mulps %%xmm7, %%xmm7") /* x7 = f2 = f * f */ \ 68 __ASM_EMIT("mulps %%xmm3, %%xmm5") /* x5 = b_im = b1 * f */ \ 69 __ASM_EMIT("mulps %%xmm7, %%xmm2") /* x2 = t2 * f2 */ \ 70 __ASM_EMIT("mulps %%xmm7, %%xmm6") /* x6 = b2 * f2 */ \ 71 __ASM_EMIT("subps %%xmm2, %%xmm0") /* x0 = t_re = t0 - t2*f2 */ \ 72 __ASM_EMIT("subps %%xmm6, %%xmm4") /* x4 = b_re = b0 - b2*f2 */ \ 73 __ASM_EMIT("movaps %%xmm5, %%xmm3") /* x3 = b_im */ \ 74 __ASM_EMIT("movaps %%xmm4, %%xmm2") /* x2 = b_re */ \ 75 __ASM_EMIT("mulps %%xmm3, %%xmm3") /* x3 = b_im * b_im */ \ 76 __ASM_EMIT("mulps %%xmm2, %%xmm2") /* x2 = b_re * b_re */ \ 77 __ASM_EMIT("addps %%xmm2, %%xmm3") /* x3 = W = b_re * b_re + b_im * b_im */ \ 78 __ASM_EMIT("movaps %%xmm0, %%xmm6") /* x6 = t_re */ \ 79 __ASM_EMIT("movaps %%xmm1, %%xmm7") /* x7 = t_im */ \ 80 __ASM_EMIT("mulps %%xmm4, %%xmm0") /* x0 = t_re * b_re */ \ 81 __ASM_EMIT("mulps %%xmm5, %%xmm7") /* x7 = t_im * b_im */ \ 82 __ASM_EMIT("mulps %%xmm4, %%xmm1") /* x1 = t_im * b_re */ \ 83 __ASM_EMIT("mulps %%xmm5, %%xmm6") /* x6 = t_re * b_im */ \ 84 __ASM_EMIT("addps %%xmm7, %%xmm0") /* x0 = t_re * b_re + t_im * b_im */ \ 85 __ASM_EMIT("subps %%xmm6, %%xmm1") /* x1 = t_im * b_re - t_re * b_im */ \ 86 __ASM_EMIT("divps %%xmm3, %%xmm0") /* x0 = a_re = (t_re * b_re + t_im * b_im) / W */ \ 87 __ASM_EMIT("divps %%xmm3, %%xmm1") /* x1 = a_im = (t_im * b_re - t_re * b_im) / W */ 88 89 #define HF_APPLY \ 90 /* Compute dst = H[f] * dst */ \ 91 __ASM_EMIT("movaps %%xmm0, %%xmm4") /* x4 = a_re */ \ 92 __ASM_EMIT("movaps %%xmm1, %%xmm5") /* x5 = a_im */ \ 93 __ASM_EMIT("mulps %%xmm2, %%xmm0") /* x0 = a_re * b_re */ \ 94 __ASM_EMIT("mulps %%xmm3, %%xmm4") /* x4 = a_re * b_im */ \ 95 __ASM_EMIT("mulps %%xmm2, %%xmm1") /* x1 = a_im * b_re */ \ 96 __ASM_EMIT("mulps %%xmm3, %%xmm5") /* x5 = a_im * b_im */ \ 97 __ASM_EMIT("addps %%xmm4, %%xmm1") /* x1 = a_re * b_im + a_im * b_re */ \ 98 __ASM_EMIT("subps %%xmm5, %%xmm0") /* x0 = a_re * b_re - a_im * b_im */ 99 filter_transfer_calc_ri(float * re,float * im,const f_cascade_t * c,const float * freq,size_t count)100 void filter_transfer_calc_ri(float *re, float *im, const f_cascade_t *c, const float *freq, size_t count) 101 { 102 IF_ARCH_X86( float fp[6*4] __lsp_aligned16; ); 103 104 ARCH_X86_ASM( 105 // Unpack filter params 106 F_UNPACK 107 // x4 blocks 108 __ASM_EMIT("sub $4, %[count]") 109 __ASM_EMIT("jb 2f") 110 __ASM_EMIT("1:") 111 __ASM_EMIT("movups 0x00(%[f]), %%xmm3") // x3 = f 112 HF_CORE 113 __ASM_EMIT("movups %%xmm0, 0x00(%[re])") 114 __ASM_EMIT("movups %%xmm1, 0x00(%[im])") 115 F_LOAD 116 __ASM_EMIT("add $0x10, %[f]") 117 __ASM_EMIT("add $0x10, %[re]") 118 __ASM_EMIT("add $0x10, %[im]") 119 __ASM_EMIT("sub $4, %[count]") 120 __ASM_EMIT("jae 1b") 121 __ASM_EMIT("2:") 122 // x2 block 123 __ASM_EMIT("add $2, %[count]") 124 __ASM_EMIT("jl 4f") 125 __ASM_EMIT("movlps 0x00(%[f]), %%xmm3") // x3 = f 126 HF_CORE 127 __ASM_EMIT("movlps %%xmm0, 0x00(%[re])") 128 __ASM_EMIT("movlps %%xmm1, 0x00(%[im])") 129 F_LOAD 130 __ASM_EMIT("sub $2, %[count]") 131 __ASM_EMIT("add $0x08, %[f]") 132 __ASM_EMIT("add $0x08, %[re]") 133 __ASM_EMIT("add $0x08, %[im]") 134 __ASM_EMIT("4:") 135 // x1 block 136 __ASM_EMIT("add $1, %[count]") 137 __ASM_EMIT("jl 6f") 138 __ASM_EMIT("movss 0x00(%[f]), %%xmm3") // x3 = f 139 HF_CORE 140 __ASM_EMIT("movss %%xmm0, 0x00(%[re])") 141 __ASM_EMIT("movss %%xmm1, 0x00(%[im])") 142 __ASM_EMIT("6:") 143 144 : [re] "+r" (re), [im] "+r" (im), [f] "+r" (freq), [count] "+r" (count) 145 : [c] "r" (c), 146 [fp] "o" (fp) 147 : "cc", "memory", 148 "%xmm0", "%xmm1", "%xmm2", "%xmm3", 149 "%xmm4", "%xmm5", "%xmm6", "%xmm7" 150 ); 151 } 152 filter_transfer_apply_ri(float * re,float * im,const f_cascade_t * c,const float * freq,size_t count)153 void filter_transfer_apply_ri(float *re, float *im, const f_cascade_t *c, const float *freq, size_t count) 154 { 155 IF_ARCH_X86( float fp[6*4] __lsp_aligned16; ); 156 157 ARCH_X86_ASM( 158 // Unpack filter params 159 F_UNPACK 160 // x4 blocks 161 __ASM_EMIT("sub $4, %[count]") 162 __ASM_EMIT("jb 2f") 163 __ASM_EMIT("1:") 164 __ASM_EMIT("movups 0x00(%[f]), %%xmm3") // x3 = f 165 HF_CORE 166 __ASM_EMIT("movups 0x00(%[re]), %%xmm2") // x2 = b_re 167 __ASM_EMIT("movups 0x00(%[im]), %%xmm3") // x3 = b_im 168 HF_APPLY 169 __ASM_EMIT("movups %%xmm0, 0x00(%[re])") 170 __ASM_EMIT("movups %%xmm1, 0x00(%[im])") 171 F_LOAD 172 __ASM_EMIT("add $0x10, %[f]") 173 __ASM_EMIT("add $0x10, %[re]") 174 __ASM_EMIT("add $0x10, %[im]") 175 __ASM_EMIT("sub $4, %[count]") 176 __ASM_EMIT("jae 1b") 177 __ASM_EMIT("2:") 178 // x2 block 179 __ASM_EMIT("add $2, %[count]") 180 __ASM_EMIT("jl 4f") 181 __ASM_EMIT("movlps 0x00(%[f]), %%xmm3") // x3 = f 182 HF_CORE 183 __ASM_EMIT("movlps 0x00(%[re]), %%xmm2") // x2 = b_re 184 __ASM_EMIT("movlps 0x00(%[im]), %%xmm3") // x3 = b_im 185 HF_APPLY 186 __ASM_EMIT("movlps %%xmm0, 0x00(%[re])") 187 __ASM_EMIT("movlps %%xmm1, 0x00(%[im])") 188 F_LOAD 189 __ASM_EMIT("sub $2, %[count]") 190 __ASM_EMIT("add $0x08, %[f]") 191 __ASM_EMIT("add $0x08, %[re]") 192 __ASM_EMIT("add $0x08, %[im]") 193 __ASM_EMIT("4:") 194 // x1 block 195 __ASM_EMIT("add $1, %[count]") 196 __ASM_EMIT("jl 6f") 197 __ASM_EMIT("movss 0x00(%[f]), %%xmm3") // x3 = f 198 HF_CORE 199 __ASM_EMIT("movss 0x00(%[re]), %%xmm2") // x2 = b_re 200 __ASM_EMIT("movss 0x00(%[im]), %%xmm3") // x3 = b_im 201 HF_APPLY 202 __ASM_EMIT("movss %%xmm0, 0x00(%[re])") 203 __ASM_EMIT("movss %%xmm1, 0x00(%[im])") 204 __ASM_EMIT("6:") 205 206 : [re] "+r" (re), [im] "+r" (im), [f] "+r" (freq), [count] "+r" (count) 207 : [c] "r" (c), 208 [fp] "o" (fp) 209 : "cc", "memory", 210 "%xmm0", "%xmm1", "%xmm2", "%xmm3", 211 "%xmm4", "%xmm5", "%xmm6", "%xmm7" 212 ); 213 } 214 215 #undef HF_CORE 216 #undef HF_APPLY 217 218 #define PHF_CORE \ 219 /* Compute H[f] */ \ 220 __ASM_EMIT("movaps %%xmm3, %%xmm7") /* x7 = f */ \ 221 __ASM_EMIT("mulps %%xmm3, %%xmm1") /* x1 = t_im = t1 * f */ \ 222 __ASM_EMIT("mulps %%xmm7, %%xmm7") /* x7 = f2 = f * f */ \ 223 __ASM_EMIT("mulps %%xmm3, %%xmm5") /* x5 = b_im = b1 * f */ \ 224 __ASM_EMIT("mulps %%xmm7, %%xmm2") /* x2 = t2 * f2 */ \ 225 __ASM_EMIT("mulps %%xmm7, %%xmm6") /* x6 = b2 * f2 */ \ 226 __ASM_EMIT("subps %%xmm2, %%xmm0") /* x0 = t_re = t0 - t2*f2 */ \ 227 __ASM_EMIT("subps %%xmm6, %%xmm4") /* x4 = b_re = b0 - b2*f2 */ \ 228 __ASM_EMIT("movaps %%xmm5, %%xmm3") /* x3 = b_im */ \ 229 __ASM_EMIT("movaps %%xmm4, %%xmm2") /* x2 = b_re */ \ 230 __ASM_EMIT("mulps %%xmm3, %%xmm3") /* x3 = b_im * b_im */ \ 231 __ASM_EMIT("mulps %%xmm2, %%xmm2") /* x2 = b_re * b_re */ \ 232 __ASM_EMIT("addps %%xmm2, %%xmm3") /* x3 = W = b_re * b_re + b_im * b_im */ \ 233 __ASM_EMIT("movaps %%xmm0, %%xmm6") /* x6 = t_re */ \ 234 __ASM_EMIT("movaps %%xmm1, %%xmm7") /* x7 = t_im */ \ 235 __ASM_EMIT("mulps %%xmm4, %%xmm0") /* x0 = t_re * b_re */ \ 236 __ASM_EMIT("mulps %%xmm5, %%xmm7") /* x7 = t_im * b_im */ \ 237 __ASM_EMIT("mulps %%xmm4, %%xmm1") /* x1 = t_im * b_re */ \ 238 __ASM_EMIT("mulps %%xmm5, %%xmm6") /* x6 = t_re * b_im */ \ 239 __ASM_EMIT("addps %%xmm7, %%xmm0") /* x0 = t_re * b_re + t_im * b_im */ \ 240 __ASM_EMIT("subps %%xmm6, %%xmm1") /* x1 = t_im * b_re - t_re * b_im */ \ 241 __ASM_EMIT("divps %%xmm3, %%xmm0") /* x0 = a_re = (t_re * b_re + t_im * b_im) / W */ \ 242 __ASM_EMIT("divps %%xmm3, %%xmm1") /* x1 = a_im = (t_im * b_re - t_re * b_im) / W */ 243 244 #define PHF_APPLY \ 245 /* Compute dst = H[f] * dst */ \ 246 __ASM_EMIT("movaps %%xmm0, %%xmm4") /* x4 = a_re */ \ 247 __ASM_EMIT("movaps %%xmm1, %%xmm5") /* x5 = a_im */ \ 248 __ASM_EMIT("mulps %%xmm2, %%xmm0") /* x0 = a_re * b_re */ \ 249 __ASM_EMIT("mulps %%xmm3, %%xmm4") /* x4 = a_re * b_im */ \ 250 __ASM_EMIT("mulps %%xmm2, %%xmm1") /* x1 = a_im * b_re */ \ 251 __ASM_EMIT("mulps %%xmm3, %%xmm5") /* x5 = a_im * b_im */ \ 252 __ASM_EMIT("addps %%xmm4, %%xmm1") /* x1 = a_re * b_im + a_im * b_re */ \ 253 __ASM_EMIT("subps %%xmm5, %%xmm0") /* x0 = a_re * b_re - a_im * b_im */ \ 254 __ASM_EMIT("movaps %%xmm0, %%xmm2") /* x2 = re */ \ 255 __ASM_EMIT("unpcklps %%xmm1, %%xmm0") /* x0 = r0 i0 r1 i1 */ \ 256 __ASM_EMIT("unpckhps %%xmm1, %%xmm2") /* x2 = r2 i2 r3 i3 */ 257 filter_transfer_calc_pc(float * dst,const f_cascade_t * c,const float * freq,size_t count)258 void filter_transfer_calc_pc(float *dst, const f_cascade_t *c, const float *freq, size_t count) 259 { 260 IF_ARCH_X86( float fp[6*4] __lsp_aligned16; ); 261 262 ARCH_X86_ASM( 263 // Unpack filter params 264 F_UNPACK 265 // x4 block 266 __ASM_EMIT("sub $4, %[count]") 267 __ASM_EMIT("jb 2f") 268 __ASM_EMIT("1:") 269 __ASM_EMIT("movups 0x00(%[f]), %%xmm3") // x3 = f 270 PHF_CORE 271 __ASM_EMIT("movaps %%xmm0, %%xmm2") // x2 = re 272 __ASM_EMIT("unpcklps %%xmm1, %%xmm0") // x0 = r0 i0 r1 i1 273 __ASM_EMIT("unpckhps %%xmm1, %%xmm2") // x2 = r2 i2 r3 i3 274 __ASM_EMIT("movups %%xmm0, 0x00(%[dst])") 275 __ASM_EMIT("movups %%xmm2, 0x10(%[dst])") 276 // Load filter params and repeat loop 277 F_LOAD 278 __ASM_EMIT("add $0x10, %[f]") 279 __ASM_EMIT("add $0x20, %[dst]") 280 __ASM_EMIT("sub $4, %[count]") 281 __ASM_EMIT("jae 1b") 282 __ASM_EMIT("2:") 283 // x2 block 284 __ASM_EMIT("add $2, %[count]") 285 __ASM_EMIT("jl 4f") 286 __ASM_EMIT("movlps 0x00(%[f]), %%xmm3") // x3 = f 287 PHF_CORE 288 __ASM_EMIT("unpcklps %%xmm1, %%xmm0") // x0 = r0 i0 r1 i1 289 __ASM_EMIT("movups %%xmm0, 0x00(%[dst])") 290 F_LOAD 291 __ASM_EMIT("sub $2, %[count]") 292 __ASM_EMIT("add $0x08, %[f]") 293 __ASM_EMIT("add $0x10, %[dst]") 294 __ASM_EMIT("4:") 295 // x1 block 296 __ASM_EMIT("add $1, %[count]") 297 __ASM_EMIT("jl 6f") 298 __ASM_EMIT("movss 0x00(%[f]), %%xmm3") // x3 = f 299 PHF_CORE 300 __ASM_EMIT("unpcklps %%xmm1, %%xmm0") // x0 = r0 i0 r1 i1 301 __ASM_EMIT("movlps %%xmm0, 0x00(%[dst])") 302 __ASM_EMIT("6:") 303 304 : [dst] "+r" (dst), [f] "+r" (freq), [count] "+r" (count) 305 : [c] "r" (c), 306 [fp] "o" (fp) 307 : "cc", "memory", 308 "%xmm0", "%xmm1", "%xmm2", "%xmm3", 309 "%xmm4", "%xmm5", "%xmm6", "%xmm7" 310 ); 311 } 312 filter_transfer_apply_pc(float * dst,const f_cascade_t * c,const float * freq,size_t count)313 void filter_transfer_apply_pc(float *dst, const f_cascade_t *c, const float *freq, size_t count) 314 { 315 IF_ARCH_X86( float fp[6*4] __lsp_aligned16; ); 316 317 ARCH_X86_ASM( 318 // Unpack filter params 319 F_UNPACK 320 // x4 block 321 __ASM_EMIT("sub $4, %[count]") 322 __ASM_EMIT("jb 2f") 323 __ASM_EMIT("1:") 324 __ASM_EMIT("movups 0x00(%[f]), %%xmm3") // x3 = f 325 PHF_CORE 326 __ASM_EMIT("movups 0x00(%[dst]), %%xmm2") // x2 = br0 bi0 br1 bi1 327 __ASM_EMIT("movups 0x10(%[dst]), %%xmm4") // x4 = br2 bi2 br3 bi3 328 __ASM_EMIT("movaps %%xmm2, %%xmm3") // x3 = br0 bi0 br1 bi1 329 __ASM_EMIT("shufps $0x88, %%xmm4, %%xmm2") // x2 = br0 br1 br2 br3 330 __ASM_EMIT("shufps $0xdd, %%xmm4, %%xmm3") // x3 = bi0 bi1 bi2 bi3 331 PHF_APPLY 332 __ASM_EMIT("movups %%xmm0, 0x00(%[dst])") 333 __ASM_EMIT("movups %%xmm2, 0x10(%[dst])") 334 // Load filter params and repeat loop 335 F_LOAD 336 __ASM_EMIT("add $0x10, %[f]") 337 __ASM_EMIT("add $0x20, %[dst]") 338 __ASM_EMIT("sub $4, %[count]") 339 __ASM_EMIT("jae 1b") 340 __ASM_EMIT("2:") 341 // x2 block 342 __ASM_EMIT("add $2, %[count]") 343 __ASM_EMIT("jl 4f") 344 __ASM_EMIT("movlps 0x00(%[f]), %%xmm3") // x3 = f 345 PHF_CORE 346 __ASM_EMIT("movups 0x00(%[dst]), %%xmm2") // x2 = br0 bi0 br1 bi1 347 __ASM_EMIT("movaps %%xmm2, %%xmm3") // x3 = br0 bi0 br1 bi1 348 __ASM_EMIT("shufps $0x88, %%xmm2, %%xmm2") // x2 = br0 br1 br0 bi1 349 __ASM_EMIT("shufps $0xdd, %%xmm3, %%xmm3") // x3 = bi0 bi1 bi0 bi1 350 PHF_APPLY 351 __ASM_EMIT("movups %%xmm0, 0x00(%[dst])") 352 F_LOAD 353 __ASM_EMIT("sub $2, %[count]") 354 __ASM_EMIT("add $0x08, %[f]") 355 __ASM_EMIT("add $0x10, %[dst]") 356 __ASM_EMIT("4:") 357 // x1 block 358 __ASM_EMIT("add $1, %[count]") 359 __ASM_EMIT("jl 6f") 360 __ASM_EMIT("movss 0x00(%[f]), %%xmm3") // x3 = f 361 PHF_CORE 362 __ASM_EMIT("movss 0x00(%[dst]), %%xmm2") // x2 = br0 363 __ASM_EMIT("movss 0x04(%[dst]), %%xmm3") // x3 = bi0 364 PHF_APPLY 365 __ASM_EMIT("movlps %%xmm0, 0x00(%[dst])") 366 __ASM_EMIT("6:") 367 368 : [dst] "+r" (dst), [f] "+r" (freq), [count] "+r" (count) 369 : [c] "r" (c), 370 [fp] "o" (fp) 371 : "cc", "memory", 372 "%xmm0", "%xmm1", "%xmm2", "%xmm3", 373 "%xmm4", "%xmm5", "%xmm6", "%xmm7" 374 ); 375 } 376 377 #undef PHF_CORE 378 #undef PHF_APPLY 379 380 #undef F_UNPACK 381 #undef F_LOAD 382 } 383 384 #endif /* DSP_ARCH_X86_SSE_FILTERS_TRANSFER_H_ */ 385