1 /* 2 * Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/> 3 * (C) 2020 Vladimir Sadovnikov <sadko4u@gmail.com> 4 * 5 * This file is part of lsp-plugins 6 * Created on: 19 дек. 2018 г. 7 * 8 * lsp-plugins is free software: you can redistribute it and/or modify 9 * it under the terms of the GNU Lesser General Public License as published by 10 * the Free Software Foundation, either version 3 of the License, or 11 * any later version. 12 * 13 * lsp-plugins is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public License 19 * along with lsp-plugins. If not, see <https://www.gnu.org/licenses/>. 20 */ 21 22 #ifndef DSP_ARCH_X86_AVX2_GRAPHICS_TRANSPOSE_H_ 23 #define DSP_ARCH_X86_AVX2_GRAPHICS_TRANSPOSE_H_ 24 25 #ifndef DSP_ARCH_X86_AVX2_IMPL 26 #error "This header should not be included directly" 27 #endif /* DSP_ARCH_X86_AVX2_IMPL */ 28 29 namespace avx2 30 { 31 #define MAT4_TRANSPOSE(x0, x1, x2, x3, x4, x5) \ 32 /* x0 = a0 a1 a2 a3 */ \ 33 /* x1 = b0 b1 b2 b3 */ \ 34 /* x2 = c0 c1 c2 c3 */ \ 35 /* x3 = d0 d1 d2 d3 */ \ 36 __ASM_EMIT("vpunpckhdq " x1 ", " x0 ", " x4) /* x4 = a2 b2 a3 b3 */ \ 37 __ASM_EMIT("vpunpckhdq " x3 ", " x2 ", " x5) /* x5 = c2 d2 c3 d3 */ \ 38 __ASM_EMIT("vpunpckldq " x1 ", " x0 ", " x0) /* x0 = a0 b0 a1 b1 */ \ 39 __ASM_EMIT("vpunpckldq " x3 ", " x2 ", " x2) /* x2 = c0 d0 c1 d1 */ \ 40 __ASM_EMIT("vpunpckhqdq " x2 ", " x0 ", " x1) /* x1 = a1 b1 c1 d1 */ \ 41 __ASM_EMIT("vpunpckhqdq " x5 ", " x4 ", " x3) /* x3 = a3 b3 c3 d3 */ \ 42 __ASM_EMIT("vpunpcklqdq " x2 ", " x0 ", " x0) /* x1 = a0 b0 c0 d0 */ \ 43 __ASM_EMIT("vpunpcklqdq " x5 ", " x4 ", " x2) /* x2 = a2 b2 c2 d2 */ 44 45 #define MAT4X2_INTERLEAVE(y0, y1, y2, y3, y4, y5) \ 46 /* y0 = c0 c4 */ \ 47 /* y1 = c1 c5 */ \ 48 /* y2 = c2 c6 */ \ 49 /* y3 = c3 c7 */ \ 50 __ASM_EMIT("vextractf128 $1, %%ymm" y0 ", %%xmm" y4 ) /* y4 = c4 0 */ \ 51 __ASM_EMIT("vextractf128 $1, %%ymm" y2 ", %%xmm" y5 ) /* y5 = c6 0 */ \ 52 __ASM_EMIT("vinsertf128 $0, %%xmm" y4 ", %%ymm" y1 ", %%ymm" y4) /* y4 = c4 c5 */ \ 53 __ASM_EMIT("vinsertf128 $0, %%xmm" y5 ", %%ymm" y3 ", %%ymm" y5) /* y5 = c6 c7 */ \ 54 __ASM_EMIT("vinsertf128 $1, %%xmm" y1 ", %%ymm" y0 ", %%ymm" y0) /* y0 = c0 c1 */ \ 55 __ASM_EMIT("vinsertf128 $1, %%xmm" y3 ", %%ymm" y2 ", %%ymm" y1) /* y1 = c2 c3 */ \ 56 __ASM_EMIT("vmovaps %%ymm" y4 ", %%ymm" y2 ) /* y2 = c4 c5 */ \ 57 __ASM_EMIT("vmovaps %%ymm" y5 ", %%ymm" y3 ) /* y3 = c6 c7 */ 58 59 } 60 61 62 #endif /* INCLUDE_DSP_ARCH_X86_AVX2_GRAPHICS_TRANSPOSE_H_ */ 63