1 /* -*- C++ -*-
2  *
3  *  graphics_accelerated.cpp - Accelerated graphics function chooser
4  *
5  *  Copyright (c) 2021 TellowKrinkle
6  *
7  *  tellowkrinkle@gmail.com
8  *
9  *  This program is free software; you can redistribute it and/or modify
10  *  it under the terms of the GNU General Public License as published by
11  *  the Free Software Foundation; either version 2 of the License, or
12  *  (at your option) any later version.
13  *
14  *  This program is distributed in the hope that it will be useful,
15  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  *  GNU General Public License for more details.
18  *
19  *  You should have received a copy of the GNU General Public License
20  *  along with this program; if not, see <http://www.gnu.org/licenses/>
21  *  or write to the Free Software Foundation, Inc.,
22  *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
23  */
24 
25 #include "graphics_accelerated.h"
26 #include "graphics_common.h"
27 
28 #include "graphics_altivec.h"
29 #include "graphics_mmx.h"
30 #include "graphics_sse2.h"
31 #include "graphics_ssse3.h"
32 
33 #include <stdio.h>
34 
35 #ifdef USE_X86_GFX
36 # if defined(__SSSE3__)
37 #  define _M_SSE 0x301
38 # elif defined(__SSE2__)
39 #  define _M_SSE 0x200
40 # elif defined(__SSE__)
41 #  define _M_SSE 0x100
42 # elif defined(__MMX__)
43 #  define _M_SSE 0x001
44 # else
45 #  define _M_SSE 0x000
46 #endif
47 # include <cpuid.h>
48 #elif defined(USE_PPC_GFX)
49 # if defined(__linux__) || (defined(__FreeBSD__) && __FreeBSD__ >= 12)
50 #  ifdef __linux__
51 #include <asm/cputable.h>
52 #  else
53 #include <machine/cpu.h>
54 #  endif
55 #include <sys/auxv.h>
56 # elif defined(MACOSX) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)
57 #  if defined(__NetBSD__) || defined(__OpenBSD__)
58 #include <machine/cpu.h>
59 #  endif
60 #include <sys/sysctl.h>
61 # endif
62 #endif
63 
imageFilterMean_Basic(unsigned char * src1,unsigned char * src2,unsigned char * dst,int length)64 void imageFilterMean_Basic(unsigned char *src1, unsigned char *src2, unsigned char *dst, int length) {
65     for (int i = 0; i < length; i++) {
66         dst[i] = mean_pixel(src1[i], src2[i]);
67     }
68 }
69 
imageFilterAddTo_Basic(unsigned char * src,unsigned char * dst,int length)70 void imageFilterAddTo_Basic(unsigned char *src, unsigned char *dst, int length) {
71     for (int i = 0; i < length; i++) {
72         addto_pixel(dst[i], src[i]);
73     }
74 }
75 
imageFilterSubFrom_Basic(unsigned char * src,unsigned char * dst,int length)76 void imageFilterSubFrom_Basic(unsigned char *src, unsigned char *dst, int length) {
77     for (int i = 0; i < length; i++) {
78         subfrom_pixel(dst[i], src[i]);
79     }
80 }
81 
imageFilterBlend_Basic(Uint32 * dst_buffer,Uint32 * src_buffer,Uint8 * alphap,int alpha,int length)82 void imageFilterBlend_Basic(Uint32 *dst_buffer, Uint32 *src_buffer,
83                             Uint8 *alphap, int alpha, int length)
84 {
85     int n = length + 1;
86     BASIC_BLEND();
87 }
88 
alphaMaskBlend_Basic(SDL_Surface * dst,SDL_Surface * s1,SDL_Surface * s2,SDL_Surface * mask_surface,const SDL_Rect & rect,Uint32 mask_value)89 bool alphaMaskBlend_Basic(SDL_Surface* dst, SDL_Surface *s1, SDL_Surface *s2, SDL_Surface *mask_surface, const SDL_Rect& rect, Uint32 mask_value) {
90     return false;
91 }
92 
alphaMaskBlendConst_Basic(SDL_Surface * dst,SDL_Surface * s1,SDL_Surface * s2,const SDL_Rect & rect,Uint32 mask_value)93 void alphaMaskBlendConst_Basic(SDL_Surface* dst, SDL_Surface *s1, SDL_Surface *s2, const SDL_Rect& rect, Uint32 mask_value)
94 {
95     int end_x = rect.x + rect.w;
96     int end_y = rect.y + rect.h;
97     for (int y = rect.y; y < end_y; y++) {
98         Uint32* s1p = getPointerToRow<Uint32>(s1, y);
99         Uint32* s2p = getPointerToRow<Uint32>(s2, y);
100         Uint32* dstp = getPointerToRow<Uint32>(dst, y);
101         for (int x = rect.x; x < end_x; x++) {
102             dstp[x] = blendMaskOnePixel(s1p[x], s2p[x], 0, mask_value);
103         }
104     }
105 }
106 
107 #ifdef USE_X86_GFX
108 enum Manufacturer {
109     MF_UNKNOWN,
110     MF_INTEL,
111     MF_AMD,
112 };
113 
hasFastPSHUFB(Manufacturer mf,int eax,int ecx)114 static bool hasFastPSHUFB(Manufacturer mf, int eax, int ecx) {
115     if (!(ecx & bit_SSSE3)) { return false; }
116     if (mf != MF_INTEL) { return true; }
117     static const uint8_t SLOW_PSHUFB[] = { // From https://en.wikichip.org/wiki/intel/cpuid
118         0x0F, 0x16, // Merom
119         0x1C, 0x2C, // Bonnell
120         0x27, 0x35, 0x36, // Saltwell
121         0x37, 0x4A, 0x4D, 0x5A, 0x5D, // Silvermont
122         0x4C, // Airmont
123     };
124     uint8_t family = (eax >> 8) & 0xF;
125     if (family != 6) { return true; }
126     uint8_t model = (eax >> 4) & 0xF;
127     model |= (eax >> 12) & 0xF0;
128     for (int i = 0; i < sizeof(SLOW_PSHUFB); i++) {
129         if (SLOW_PSHUFB[i] == model) {
130             return false;
131         }
132     }
133     return true;
134 }
135 #endif
136 
accelerated()137 AcceleratedGraphicsFunctions AcceleratedGraphicsFunctions::accelerated() {
138     AcceleratedGraphicsFunctions out;
139 
140 #ifdef USE_X86_GFX
141     Manufacturer mf = MF_UNKNOWN;
142     unsigned int func, eax, ebx, ecx, edx;
143     if (__get_cpuid(0, &eax, &ebx, &ecx, &edx) != 0) {
144         if (ebx == 0x68747541 && edx == 0x69746e65 && ecx == 0x444d4163) {
145             mf = MF_AMD;
146         }
147         if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e) {
148             mf = MF_INTEL;
149         }
150     }
151     if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) != 0) {
152         printf("System info: Intel CPU, with functions: ");
153         if (_M_SSE >= 0x001 || edx & bit_MMX) {
154             printf("MMX ");
155             out._imageFilterMean = imageFilterMean_MMX;
156             out._imageFilterAddTo = imageFilterAddTo_MMX;
157             out._imageFilterSubFrom = imageFilterSubFrom_MMX;
158         }
159         if (_M_SSE >= 0x100 || edx & bit_SSE) {
160             printf("SSE ");
161         }
162         if (_M_SSE >= 0x200 || edx & bit_SSE2) {
163             printf("SSE2 ");
164             out._imageFilterMean = imageFilterMean_SSE2;
165             out._imageFilterAddTo = imageFilterAddTo_SSE2;
166             out._imageFilterSubFrom = imageFilterSubFrom_SSE2;
167             out._imageFilterBlend = imageFilterBlend_SSE2;
168             out._alphaMaskBlend = alphaMaskBlend_SSE2;
169             out._alphaMaskBlendConst = alphaMaskBlendConst_SSE2;
170         }
171         if (_M_SSE >= 0x301 || hasFastPSHUFB(mf, eax, ecx)) {
172             printf("SSSE3 ");
173             out._imageFilterBlend = imageFilterBlend_SSSE3;
174             out._alphaMaskBlend = alphaMaskBlend_SSSE3;
175             out._alphaMaskBlendConst = alphaMaskBlendConst_SSSE3;
176         }
177         printf("\n");
178     }
179 #elif defined(USE_PPC_GFX)
180     bool has_altivec = false;
181 # if defined(__linux__) || (defined(__FreeBSD__) && __FreeBSD__ >= 12)
182     // Determine if this PPC CPU supports AltiVec
183     {
184         unsigned long hwcap = 0;
185 #  ifdef __linux__
186         hwcap = getauxval(AT_HWCAP);
187 #  else
188         elf_aux_info(AT_HWCAP, &hwcap, sizeof(hwcap));
189 #  endif
190         if (hwcap & PPC_FEATURE_HAS_ALTIVEC) {
191             has_altivec = true;
192             printf("System info: PowerPC CPU, supports altivec\n");
193         } else {
194             printf("System info: PowerPC CPU, DOES NOT support altivec\n");
195         }
196     }
197 # elif defined(MACOSX) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)
198     // Determine if this PPC CPU supports AltiVec (Roto)
199     {
200         int altivec_present = 0;
201 
202         size_t length = sizeof(altivec_present);
203 #  if defined(MACOSX)
204         int error = sysctlbyname("hw.optional.altivec", &altivec_present, &length, NULL, 0);
205 #  elif defined(__FreeBSD__)
206         int error = sysctlbyname("hw.altivec", &altivec_present, &length, NULL, 0);
207 #  else
208         int mib[] = { CTL_MACHDEP, CPU_ALTIVEC };
209         int error = sysctl(mib, sizeof(mib)/sizeof(mib[0]), &altivec_present, &length, NULL, 0);
210 #  endif
211         if (error) {
212             return;
213         }
214         if (altivec_present) {
215             has_altivec = true;
216             printf("System info: PowerPC CPU, supports altivec\n");
217         } else {
218             printf("System info: PowerPC CPU, DOES NOT support altivec\n");
219         }
220     }
221 # endif
222     if (has_altivec) {
223         out._imageFilterMean = imageFilterMean_Altivec;
224         out._imageFilterAddTo = imageFilterAddTo_Altivec;
225         out._imageFilterSubFrom = imageFilterSubFrom_Altivec;
226     }
227 #endif
228     return out;
229 }
230