1 /*
2  * Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/>
3  *           (C) 2020 Vladimir Sadovnikov <sadko4u@gmail.com>
4  *
5  * This file is part of lsp-plugins
6  * Created on: 19 дек. 2018 г.
7  *
8  * lsp-plugins is free software: you can redistribute it and/or modify
9  * it under the terms of the GNU Lesser General Public License as published by
10  * the Free Software Foundation, either version 3 of the License, or
11  * any later version.
12  *
13  * lsp-plugins is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public License
19  * along with lsp-plugins. If not, see <https://www.gnu.org/licenses/>.
20  */
21 
22 #ifndef DSP_ARCH_X86_AVX2_GRAPHICS_TRANSPOSE_H_
23 #define DSP_ARCH_X86_AVX2_GRAPHICS_TRANSPOSE_H_
24 
25 #ifndef DSP_ARCH_X86_AVX2_IMPL
26     #error "This header should not be included directly"
27 #endif /* DSP_ARCH_X86_AVX2_IMPL */
28 
29 namespace avx2
30 {
31     #define MAT4_TRANSPOSE(x0, x1, x2, x3, x4, x5) \
32         /* x0 = a0 a1 a2 a3 */ \
33         /* x1 = b0 b1 b2 b3 */ \
34         /* x2 = c0 c1 c2 c3 */ \
35         /* x3 = d0 d1 d2 d3 */ \
36         __ASM_EMIT("vpunpckhdq      " x1 ", " x0 ", " x4)   /* x4 = a2 b2 a3 b3 */ \
37         __ASM_EMIT("vpunpckhdq      " x3 ", " x2 ", " x5)   /* x5 = c2 d2 c3 d3 */ \
38         __ASM_EMIT("vpunpckldq      " x1 ", " x0 ", " x0)   /* x0 = a0 b0 a1 b1 */ \
39         __ASM_EMIT("vpunpckldq      " x3 ", " x2 ", " x2)   /* x2 = c0 d0 c1 d1 */ \
40         __ASM_EMIT("vpunpckhqdq     " x2 ", " x0 ", " x1)   /* x1 = a1 b1 c1 d1 */ \
41         __ASM_EMIT("vpunpckhqdq     " x5 ", " x4 ", " x3)   /* x3 = a3 b3 c3 d3 */ \
42         __ASM_EMIT("vpunpcklqdq     " x2 ", " x0 ", " x0)   /* x1 = a0 b0 c0 d0 */ \
43         __ASM_EMIT("vpunpcklqdq     " x5 ", " x4 ", " x2)   /* x2 = a2 b2 c2 d2 */
44 
45     #define MAT4X2_INTERLEAVE(y0, y1, y2, y3, y4, y5) \
46         /* y0 = c0 c4 */ \
47         /* y1 = c1 c5 */ \
48         /* y2 = c2 c6 */ \
49         /* y3 = c3 c7 */ \
50         __ASM_EMIT("vextractf128    $1, %%ymm" y0 ", %%xmm" y4 )                /* y4 = c4 0 */ \
51         __ASM_EMIT("vextractf128    $1, %%ymm" y2 ", %%xmm" y5 )                /* y5 = c6 0 */ \
52         __ASM_EMIT("vinsertf128     $0, %%xmm" y4 ", %%ymm" y1 ", %%ymm" y4)    /* y4 = c4 c5 */ \
53         __ASM_EMIT("vinsertf128     $0, %%xmm" y5 ", %%ymm" y3 ", %%ymm" y5)    /* y5 = c6 c7 */ \
54         __ASM_EMIT("vinsertf128     $1, %%xmm" y1 ", %%ymm" y0 ", %%ymm" y0)    /* y0 = c0 c1 */ \
55         __ASM_EMIT("vinsertf128     $1, %%xmm" y3 ", %%ymm" y2 ", %%ymm" y1)    /* y1 = c2 c3 */ \
56         __ASM_EMIT("vmovaps         %%ymm" y4 ", %%ymm" y2 )                    /* y2 = c4 c5 */ \
57         __ASM_EMIT("vmovaps         %%ymm" y5 ", %%ymm" y3 )                    /* y3 = c6 c7 */
58 
59 }
60 
61 
62 #endif /* INCLUDE_DSP_ARCH_X86_AVX2_GRAPHICS_TRANSPOSE_H_ */
63