1 /* 2 * Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/> 3 * (C) 2020 Vladimir Sadovnikov <sadko4u@gmail.com> 4 * 5 * This file is part of lsp-plugins 6 * Created on: 27 авг. 2016 г. 7 * 8 * lsp-plugins is free software: you can redistribute it and/or modify 9 * it under the terms of the GNU Lesser General Public License as published by 10 * the Free Software Foundation, either version 3 of the License, or 11 * any later version. 12 * 13 * lsp-plugins is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public License 19 * along with lsp-plugins. If not, see <https://www.gnu.org/licenses/>. 20 */ 21 22 #ifndef DSP_ARCH_X86_GRAPHICS_H_ 23 #define DSP_ARCH_X86_GRAPHICS_H_ 24 25 namespace x86 26 { 27 #ifdef ARCH_I386 28 // Limited number of registers rgba32_to_bgra32(void * dst,const void * src,size_t count)29 void rgba32_to_bgra32(void *dst, const void *src, size_t count) 30 { 31 uint32_t t1, t2; 32 33 ARCH_X86_ASM 34 ( 35 // Check count 36 __ASM_EMIT("test %[count], %[count]") 37 __ASM_EMIT("jz 2f") 38 39 // Loop multiple of 4 40 __ASM_EMIT("1:") 41 __ASM_EMIT("mov (%[src]), %[t1]") // t1 = *src = RGBA 42 __ASM_EMIT("mov %[t1], %[t2]") // t1 = *src = RGBA 43 __ASM_EMIT("and $0x00ff00ff, %[t1]") // t1 = R0B0 44 __ASM_EMIT("and $0xff00ff00, %[t2]") // t2 = 0G0A 45 __ASM_EMIT("ror $16, %[t1]") // t1 = B0R0 46 __ASM_EMIT("add $0x4, %[src]") // src ++ 47 __ASM_EMIT("or %[t2], %[t1]") // t1 = BGRA 48 __ASM_EMIT("mov %[t1], (%[dst])") // *dst = BGRA 49 __ASM_EMIT("add $0x4, %[dst]") // dst ++ 50 __ASM_EMIT("dec %[count]") // count -- 51 __ASM_EMIT("jnz 1b") // count > 0 ? 52 53 // Loop not multiple of 4 54 __ASM_EMIT("2:") 55 56 : [count] "+r" (count), 57 [dst] "+r"(dst), 58 [src] "+r"(src), 59 [t1] "=&r" (t1), 60 [t2] "=&r" (t2) 61 : 62 : "cc", "memory" 63 ); 64 } 65 #endif /* ARCH_I386 */ 66 67 #ifdef ARCH_X86_64 rgba32_to_bgra32(void * dst,const void * src,size_t count)68 void rgba32_to_bgra32(void *dst, const void *src, size_t count) 69 { 70 uint32_t t1, t2, t3; 71 72 ARCH_X86_ASM 73 ( 74 // Check count 75 __ASM_EMIT("sub $4, %[count]") 76 __ASM_EMIT("jb 2f") 77 78 // Loop multiple of 4 79 __ASM_EMIT("1:") 80 __ASM_EMIT("mov 0x00(%[src]), %[t1]") // t1 = src[0] = RGBA 81 __ASM_EMIT("mov %[t1], %[t2]") // t1 = src[0] = RGBA 82 __ASM_EMIT("and $0x00ff00ff, %[t1]") // t1 = R0B0 83 __ASM_EMIT("and $0xff00ff00, %[t2]") // t2 = 0G0A 84 __ASM_EMIT("ror $16, %[t1]") // t1 = B0R0 85 __ASM_EMIT("mov 0x04(%[src]), %[t3]") // t3 = src[1] = RGBA 86 __ASM_EMIT("or %[t2], %[t1]") // t1 = BGRA 87 __ASM_EMIT("mov %[t1], 0x00(%[dst])") // dst[0] = BGRA 88 89 __ASM_EMIT("mov %[t3], %[t2]") // t2 = RGBA 90 __ASM_EMIT("and $0x00ff00ff, %[t3]") // t3 = R0B0 91 __ASM_EMIT("and $0xff00ff00, %[t2]") // t2 = 0G0A 92 __ASM_EMIT("ror $16, %[t3]") // t3 = B0R0 93 __ASM_EMIT("mov 0x08(%[src]), %[t1]") // t1 = src[2] = RGBA 94 __ASM_EMIT("or %[t2], %[t3]") // t3 = BGRA 95 __ASM_EMIT("mov %[t3], 0x04(%[dst])") // dst[1] = BGRA 96 97 __ASM_EMIT("mov %[t1], %[t2]") // t2 = RGBA 98 __ASM_EMIT("and $0x00ff00ff, %[t1]") // t1 = R0B0 99 __ASM_EMIT("and $0xff00ff00, %[t2]") // t2 = 0G0A 100 __ASM_EMIT("ror $16, %[t1]") // t1 = B0R0 101 __ASM_EMIT("mov 0x0c(%[src]), %[t3]") // t3 = src[3] = RGBA 102 __ASM_EMIT("or %[t2], %[t1]") // t1 = BGRA 103 __ASM_EMIT("mov %[t1], 0x08(%[dst])") // dst[2] = BGRA 104 105 __ASM_EMIT("mov %[t3], %[t2]") // t2 = RGBA 106 __ASM_EMIT("and $0x00ff00ff, %[t3]") // t3 = R0B0 107 __ASM_EMIT("and $0xff00ff00, %[t2]") // t2 = 0G0A 108 __ASM_EMIT("ror $16, %[t3]") // t3 = B0R0 109 __ASM_EMIT("add $0x10, %[src]") // src += 16 110 __ASM_EMIT("or %[t2], %[t3]") // t3 = BGRA 111 __ASM_EMIT("mov %[t3], 0x0c(%[dst])") // dst[3] = BGRA 112 113 __ASM_EMIT("add $0x10, %[dst]") // dst += 16 114 __ASM_EMIT("sub $4, %[count]") // count -= 4 115 __ASM_EMIT("jae 1b") 116 117 // Loop not multiple of 4 118 __ASM_EMIT("2:") 119 __ASM_EMIT("add $3, %[count]") 120 __ASM_EMIT("jl 4f") 121 122 // Complete tail 123 __ASM_EMIT("3:") 124 __ASM_EMIT("mov (%[src]), %[t1]") // t1 = *src = RGBA 125 __ASM_EMIT("mov %[t1], %[t2]") // t2 = t1 RGBA 126 __ASM_EMIT("add $4, %[src]") // src++ 127 __ASM_EMIT("and $0x00ff00ff, %[t1]") // t1 = R0B0 128 __ASM_EMIT("and $0xff00ff00, %[t2]") // t2 = 0G0A 129 __ASM_EMIT("ror $16, %[t1]") // t1 = B0R0 130 __ASM_EMIT("or %[t2], %[t1]") // t1 = BGRA 131 __ASM_EMIT("mov %[t1], (%[dst])") // *dst = BGRA 132 __ASM_EMIT("add $4, %[dst]") // dst++ 133 __ASM_EMIT("dec %[count]") // count-- 134 __ASM_EMIT("jge 3b") 135 136 __ASM_EMIT("4:") 137 138 : [count] "+r" (count), 139 [dst] "+r"(dst), 140 [src] "+r"(src), 141 [t1] "=&r" (t1), 142 [t2] "=&r" (t2), 143 [t3] "=&r" (t3) 144 : 145 : "cc", "memory" 146 ); 147 } 148 #endif /* ARCH_X86_64 */ 149 150 // Limited number of registers abgr32_to_bgra32(void * dst,const void * src,size_t count)151 void abgr32_to_bgra32(void *dst, const void *src, size_t count) 152 { 153 uint32_t tmp; 154 155 ARCH_X86_ASM 156 ( 157 // Check count 158 __ASM_EMIT("test %[count], %[count]") 159 __ASM_EMIT("jz 2f") 160 161 // Loop multiple of 4 162 __ASM_EMIT("1:") 163 __ASM_EMIT("lodsl") // eax = *src = RGBA 164 __ASM_EMIT("ror $8, %%eax") // eax = ARGB 165 __ASM_EMIT("stosl") // eax = *src = RGBA 166 __ASM_EMIT("loop 1b") // count > 0 ? 167 168 // Loop not multiple of 4 169 __ASM_EMIT("2:") 170 171 : [count] "+c" (count), 172 [dst] "+D"(dst), 173 [src] "+S"(src), 174 [tmp] "=a"(tmp) 175 : 176 : "cc", "memory" 177 ); 178 } 179 } 180 181 #endif /* DSP_ARCH_X86_GRAPHICS_H_ */ 182