1 /*
2  * Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/>
3  *           (C) 2020 Vladimir Sadovnikov <sadko4u@gmail.com>
4  *
5  * This file is part of lsp-plugins
6  * Created on: 27 авг. 2016 г.
7  *
8  * lsp-plugins is free software: you can redistribute it and/or modify
9  * it under the terms of the GNU Lesser General Public License as published by
10  * the Free Software Foundation, either version 3 of the License, or
11  * any later version.
12  *
13  * lsp-plugins is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public License
19  * along with lsp-plugins. If not, see <https://www.gnu.org/licenses/>.
20  */
21 
22 #ifndef DSP_ARCH_X86_GRAPHICS_H_
23 #define DSP_ARCH_X86_GRAPHICS_H_
24 
25 namespace x86
26 {
27 #ifdef ARCH_I386
28     // Limited number of registers
rgba32_to_bgra32(void * dst,const void * src,size_t count)29     void rgba32_to_bgra32(void *dst, const void *src, size_t count)
30     {
31         uint32_t t1, t2;
32 
33         ARCH_X86_ASM
34         (
35             // Check count
36             __ASM_EMIT("test    %[count], %[count]")
37             __ASM_EMIT("jz      2f")
38 
39             // Loop multiple of 4
40             __ASM_EMIT("1:")
41             __ASM_EMIT("mov     (%[src]), %[t1]")       // t1 = *src = RGBA
42             __ASM_EMIT("mov     %[t1], %[t2]")          // t1 = *src = RGBA
43             __ASM_EMIT("and     $0x00ff00ff, %[t1]")    // t1 = R0B0
44             __ASM_EMIT("and     $0xff00ff00, %[t2]")    // t2 = 0G0A
45             __ASM_EMIT("ror     $16, %[t1]")            // t1 = B0R0
46             __ASM_EMIT("add     $0x4, %[src]")          // src ++
47             __ASM_EMIT("or      %[t2], %[t1]")          // t1 = BGRA
48             __ASM_EMIT("mov     %[t1], (%[dst])")       // *dst = BGRA
49             __ASM_EMIT("add     $0x4, %[dst]")          // dst ++
50             __ASM_EMIT("dec     %[count]")              // count --
51             __ASM_EMIT("jnz     1b")                    // count > 0 ?
52 
53             // Loop not multiple of 4
54             __ASM_EMIT("2:")
55 
56             : [count] "+r" (count),
57               [dst] "+r"(dst),
58               [src] "+r"(src),
59               [t1] "=&r" (t1),
60               [t2] "=&r" (t2)
61             :
62             : "cc", "memory"
63         );
64     }
65 #endif /* ARCH_I386 */
66 
67 #ifdef ARCH_X86_64
rgba32_to_bgra32(void * dst,const void * src,size_t count)68     void rgba32_to_bgra32(void *dst, const void *src, size_t count)
69     {
70         uint32_t t1, t2, t3;
71 
72         ARCH_X86_ASM
73         (
74             // Check count
75             __ASM_EMIT("sub     $4, %[count]")
76             __ASM_EMIT("jb      2f")
77 
78             // Loop multiple of 4
79             __ASM_EMIT("1:")
80             __ASM_EMIT("mov     0x00(%[src]), %[t1]")   // t1 = src[0] = RGBA
81             __ASM_EMIT("mov     %[t1], %[t2]")          // t1 = src[0] = RGBA
82             __ASM_EMIT("and     $0x00ff00ff, %[t1]")    // t1 = R0B0
83             __ASM_EMIT("and     $0xff00ff00, %[t2]")    // t2 = 0G0A
84             __ASM_EMIT("ror     $16, %[t1]")            // t1 = B0R0
85             __ASM_EMIT("mov     0x04(%[src]), %[t3]")   // t3 = src[1] = RGBA
86             __ASM_EMIT("or      %[t2], %[t1]")          // t1 = BGRA
87             __ASM_EMIT("mov     %[t1], 0x00(%[dst])")   // dst[0] = BGRA
88 
89             __ASM_EMIT("mov     %[t3], %[t2]")          // t2 = RGBA
90             __ASM_EMIT("and     $0x00ff00ff, %[t3]")    // t3 = R0B0
91             __ASM_EMIT("and     $0xff00ff00, %[t2]")    // t2 = 0G0A
92             __ASM_EMIT("ror     $16, %[t3]")            // t3 = B0R0
93             __ASM_EMIT("mov     0x08(%[src]), %[t1]")   // t1 = src[2] = RGBA
94             __ASM_EMIT("or      %[t2], %[t3]")          // t3 = BGRA
95             __ASM_EMIT("mov     %[t3], 0x04(%[dst])")   // dst[1] = BGRA
96 
97             __ASM_EMIT("mov     %[t1], %[t2]")          // t2 = RGBA
98             __ASM_EMIT("and     $0x00ff00ff, %[t1]")    // t1 = R0B0
99             __ASM_EMIT("and     $0xff00ff00, %[t2]")    // t2 = 0G0A
100             __ASM_EMIT("ror     $16, %[t1]")            // t1 = B0R0
101             __ASM_EMIT("mov     0x0c(%[src]), %[t3]")   // t3 = src[3] = RGBA
102             __ASM_EMIT("or      %[t2], %[t1]")          // t1 = BGRA
103             __ASM_EMIT("mov     %[t1], 0x08(%[dst])")   // dst[2] = BGRA
104 
105             __ASM_EMIT("mov     %[t3], %[t2]")          // t2 = RGBA
106             __ASM_EMIT("and     $0x00ff00ff, %[t3]")    // t3 = R0B0
107             __ASM_EMIT("and     $0xff00ff00, %[t2]")    // t2 = 0G0A
108             __ASM_EMIT("ror     $16, %[t3]")            // t3 = B0R0
109             __ASM_EMIT("add     $0x10, %[src]")         // src += 16
110             __ASM_EMIT("or      %[t2], %[t3]")          // t3 = BGRA
111             __ASM_EMIT("mov     %[t3], 0x0c(%[dst])")   // dst[3] = BGRA
112 
113             __ASM_EMIT("add     $0x10, %[dst]")         // dst += 16
114             __ASM_EMIT("sub     $4, %[count]")          // count -= 4
115             __ASM_EMIT("jae     1b")
116 
117             // Loop not multiple of 4
118             __ASM_EMIT("2:")
119             __ASM_EMIT("add     $3, %[count]")
120             __ASM_EMIT("jl      4f")
121 
122             // Complete tail
123             __ASM_EMIT("3:")
124             __ASM_EMIT("mov     (%[src]), %[t1]")       // t1 = *src = RGBA
125             __ASM_EMIT("mov     %[t1], %[t2]")          // t2 = t1 RGBA
126             __ASM_EMIT("add     $4, %[src]")            // src++
127             __ASM_EMIT("and     $0x00ff00ff, %[t1]")    // t1 = R0B0
128             __ASM_EMIT("and     $0xff00ff00, %[t2]")    // t2 = 0G0A
129             __ASM_EMIT("ror     $16, %[t1]")            // t1 = B0R0
130             __ASM_EMIT("or      %[t2], %[t1]")          // t1 = BGRA
131             __ASM_EMIT("mov     %[t1], (%[dst])")       // *dst = BGRA
132             __ASM_EMIT("add     $4, %[dst]")            // dst++
133             __ASM_EMIT("dec     %[count]")              // count--
134             __ASM_EMIT("jge     3b")
135 
136             __ASM_EMIT("4:")
137 
138             : [count] "+r" (count),
139               [dst] "+r"(dst),
140               [src] "+r"(src),
141               [t1] "=&r" (t1),
142               [t2] "=&r" (t2),
143               [t3] "=&r" (t3)
144             :
145             : "cc", "memory"
146         );
147     }
148     #endif /* ARCH_X86_64 */
149 
150     // Limited number of registers
abgr32_to_bgra32(void * dst,const void * src,size_t count)151     void abgr32_to_bgra32(void *dst, const void *src, size_t count)
152     {
153         uint32_t tmp;
154 
155         ARCH_X86_ASM
156         (
157             // Check count
158             __ASM_EMIT("test    %[count], %[count]")
159             __ASM_EMIT("jz      2f")
160 
161             // Loop multiple of 4
162             __ASM_EMIT("1:")
163             __ASM_EMIT("lodsl") // eax = *src = RGBA
164             __ASM_EMIT("ror     $8, %%eax")             // eax = ARGB
165             __ASM_EMIT("stosl") // eax = *src = RGBA
166             __ASM_EMIT("loop    1b")                    // count > 0 ?
167 
168             // Loop not multiple of 4
169             __ASM_EMIT("2:")
170 
171             : [count] "+c" (count),
172               [dst] "+D"(dst),
173               [src] "+S"(src),
174               [tmp] "=a"(tmp)
175             :
176             : "cc", "memory"
177         );
178     }
179 }
180 
181 #endif /* DSP_ARCH_X86_GRAPHICS_H_ */
182