1 /* -*- C++ -*-
2 *
3 * graphics_accelerated.cpp - Accelerated graphics function chooser
4 *
5 * Copyright (c) 2021 TellowKrinkle
6 *
7 * tellowkrinkle@gmail.com
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, see <http://www.gnu.org/licenses/>
21 * or write to the Free Software Foundation, Inc.,
22 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 */
24
25 #include "graphics_accelerated.h"
26 #include "graphics_common.h"
27
28 #include "graphics_altivec.h"
29 #include "graphics_mmx.h"
30 #include "graphics_sse2.h"
31 #include "graphics_ssse3.h"
32
33 #include <stdio.h>
34
35 #ifdef USE_X86_GFX
36 # if defined(__SSSE3__)
37 # define _M_SSE 0x301
38 # elif defined(__SSE2__)
39 # define _M_SSE 0x200
40 # elif defined(__SSE__)
41 # define _M_SSE 0x100
42 # elif defined(__MMX__)
43 # define _M_SSE 0x001
44 # else
45 # define _M_SSE 0x000
46 #endif
47 # include <cpuid.h>
48 #elif defined(USE_PPC_GFX)
49 # if defined(__linux__) || (defined(__FreeBSD__) && __FreeBSD__ >= 12)
50 # ifdef __linux__
51 #include <asm/cputable.h>
52 # else
53 #include <machine/cpu.h>
54 # endif
55 #include <sys/auxv.h>
56 # elif defined(MACOSX) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)
57 # if defined(__NetBSD__) || defined(__OpenBSD__)
58 #include <machine/cpu.h>
59 # endif
60 #include <sys/sysctl.h>
61 # endif
62 #endif
63
imageFilterMean_Basic(unsigned char * src1,unsigned char * src2,unsigned char * dst,int length)64 void imageFilterMean_Basic(unsigned char *src1, unsigned char *src2, unsigned char *dst, int length) {
65 for (int i = 0; i < length; i++) {
66 dst[i] = mean_pixel(src1[i], src2[i]);
67 }
68 }
69
imageFilterAddTo_Basic(unsigned char * src,unsigned char * dst,int length)70 void imageFilterAddTo_Basic(unsigned char *src, unsigned char *dst, int length) {
71 for (int i = 0; i < length; i++) {
72 addto_pixel(dst[i], src[i]);
73 }
74 }
75
imageFilterSubFrom_Basic(unsigned char * src,unsigned char * dst,int length)76 void imageFilterSubFrom_Basic(unsigned char *src, unsigned char *dst, int length) {
77 for (int i = 0; i < length; i++) {
78 subfrom_pixel(dst[i], src[i]);
79 }
80 }
81
imageFilterBlend_Basic(Uint32 * dst_buffer,Uint32 * src_buffer,Uint8 * alphap,int alpha,int length)82 void imageFilterBlend_Basic(Uint32 *dst_buffer, Uint32 *src_buffer,
83 Uint8 *alphap, int alpha, int length)
84 {
85 int n = length + 1;
86 BASIC_BLEND();
87 }
88
alphaMaskBlend_Basic(SDL_Surface * dst,SDL_Surface * s1,SDL_Surface * s2,SDL_Surface * mask_surface,const SDL_Rect & rect,Uint32 mask_value)89 bool alphaMaskBlend_Basic(SDL_Surface* dst, SDL_Surface *s1, SDL_Surface *s2, SDL_Surface *mask_surface, const SDL_Rect& rect, Uint32 mask_value) {
90 return false;
91 }
92
alphaMaskBlendConst_Basic(SDL_Surface * dst,SDL_Surface * s1,SDL_Surface * s2,const SDL_Rect & rect,Uint32 mask_value)93 void alphaMaskBlendConst_Basic(SDL_Surface* dst, SDL_Surface *s1, SDL_Surface *s2, const SDL_Rect& rect, Uint32 mask_value)
94 {
95 int end_x = rect.x + rect.w;
96 int end_y = rect.y + rect.h;
97 for (int y = rect.y; y < end_y; y++) {
98 Uint32* s1p = getPointerToRow<Uint32>(s1, y);
99 Uint32* s2p = getPointerToRow<Uint32>(s2, y);
100 Uint32* dstp = getPointerToRow<Uint32>(dst, y);
101 for (int x = rect.x; x < end_x; x++) {
102 dstp[x] = blendMaskOnePixel(s1p[x], s2p[x], 0, mask_value);
103 }
104 }
105 }
106
107 #ifdef USE_X86_GFX
108 enum Manufacturer {
109 MF_UNKNOWN,
110 MF_INTEL,
111 MF_AMD,
112 };
113
hasFastPSHUFB(Manufacturer mf,int eax,int ecx)114 static bool hasFastPSHUFB(Manufacturer mf, int eax, int ecx) {
115 if (!(ecx & bit_SSSE3)) { return false; }
116 if (mf != MF_INTEL) { return true; }
117 static const uint8_t SLOW_PSHUFB[] = { // From https://en.wikichip.org/wiki/intel/cpuid
118 0x0F, 0x16, // Merom
119 0x1C, 0x2C, // Bonnell
120 0x27, 0x35, 0x36, // Saltwell
121 0x37, 0x4A, 0x4D, 0x5A, 0x5D, // Silvermont
122 0x4C, // Airmont
123 };
124 uint8_t family = (eax >> 8) & 0xF;
125 if (family != 6) { return true; }
126 uint8_t model = (eax >> 4) & 0xF;
127 model |= (eax >> 12) & 0xF0;
128 for (int i = 0; i < sizeof(SLOW_PSHUFB); i++) {
129 if (SLOW_PSHUFB[i] == model) {
130 return false;
131 }
132 }
133 return true;
134 }
135 #endif
136
accelerated()137 AcceleratedGraphicsFunctions AcceleratedGraphicsFunctions::accelerated() {
138 AcceleratedGraphicsFunctions out;
139
140 #ifdef USE_X86_GFX
141 Manufacturer mf = MF_UNKNOWN;
142 unsigned int func, eax, ebx, ecx, edx;
143 if (__get_cpuid(0, &eax, &ebx, &ecx, &edx) != 0) {
144 if (ebx == 0x68747541 && edx == 0x69746e65 && ecx == 0x444d4163) {
145 mf = MF_AMD;
146 }
147 if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e) {
148 mf = MF_INTEL;
149 }
150 }
151 if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) != 0) {
152 printf("System info: Intel CPU, with functions: ");
153 if (_M_SSE >= 0x001 || edx & bit_MMX) {
154 printf("MMX ");
155 out._imageFilterMean = imageFilterMean_MMX;
156 out._imageFilterAddTo = imageFilterAddTo_MMX;
157 out._imageFilterSubFrom = imageFilterSubFrom_MMX;
158 }
159 if (_M_SSE >= 0x100 || edx & bit_SSE) {
160 printf("SSE ");
161 }
162 if (_M_SSE >= 0x200 || edx & bit_SSE2) {
163 printf("SSE2 ");
164 out._imageFilterMean = imageFilterMean_SSE2;
165 out._imageFilterAddTo = imageFilterAddTo_SSE2;
166 out._imageFilterSubFrom = imageFilterSubFrom_SSE2;
167 out._imageFilterBlend = imageFilterBlend_SSE2;
168 out._alphaMaskBlend = alphaMaskBlend_SSE2;
169 out._alphaMaskBlendConst = alphaMaskBlendConst_SSE2;
170 }
171 if (_M_SSE >= 0x301 || hasFastPSHUFB(mf, eax, ecx)) {
172 printf("SSSE3 ");
173 out._imageFilterBlend = imageFilterBlend_SSSE3;
174 out._alphaMaskBlend = alphaMaskBlend_SSSE3;
175 out._alphaMaskBlendConst = alphaMaskBlendConst_SSSE3;
176 }
177 printf("\n");
178 }
179 #elif defined(USE_PPC_GFX)
180 bool has_altivec = false;
181 # if defined(__linux__) || (defined(__FreeBSD__) && __FreeBSD__ >= 12)
182 // Determine if this PPC CPU supports AltiVec
183 {
184 unsigned long hwcap = 0;
185 # ifdef __linux__
186 hwcap = getauxval(AT_HWCAP);
187 # else
188 elf_aux_info(AT_HWCAP, &hwcap, sizeof(hwcap));
189 # endif
190 if (hwcap & PPC_FEATURE_HAS_ALTIVEC) {
191 has_altivec = true;
192 printf("System info: PowerPC CPU, supports altivec\n");
193 } else {
194 printf("System info: PowerPC CPU, DOES NOT support altivec\n");
195 }
196 }
197 # elif defined(MACOSX) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)
198 // Determine if this PPC CPU supports AltiVec (Roto)
199 {
200 int altivec_present = 0;
201
202 size_t length = sizeof(altivec_present);
203 # if defined(MACOSX)
204 int error = sysctlbyname("hw.optional.altivec", &altivec_present, &length, NULL, 0);
205 # elif defined(__FreeBSD__)
206 int error = sysctlbyname("hw.altivec", &altivec_present, &length, NULL, 0);
207 # else
208 int mib[] = { CTL_MACHDEP, CPU_ALTIVEC };
209 int error = sysctl(mib, sizeof(mib)/sizeof(mib[0]), &altivec_present, &length, NULL, 0);
210 # endif
211 if (error) {
212 return;
213 }
214 if (altivec_present) {
215 has_altivec = true;
216 printf("System info: PowerPC CPU, supports altivec\n");
217 } else {
218 printf("System info: PowerPC CPU, DOES NOT support altivec\n");
219 }
220 }
221 # endif
222 if (has_altivec) {
223 out._imageFilterMean = imageFilterMean_Altivec;
224 out._imageFilterAddTo = imageFilterAddTo_Altivec;
225 out._imageFilterSubFrom = imageFilterSubFrom_Altivec;
226 }
227 #endif
228 return out;
229 }
230