1 /***************************************************************************
2 *   Copyright (C) 2010 PCSX4ALL Team                                      *
3 *   Copyright (C) 2010 Unai                                               *
4 *   Copyright (C) 2016 Senquack (dansilsby <AT> gmail <DOT> com)          *
5 *                                                                         *
6 *   This program is free software; you can redistribute it and/or modify  *
7 *   it under the terms of the GNU General Public License as published by  *
8 *   the Free Software Foundation; either version 2 of the License, or     *
9 *   (at your option) any later version.                                   *
10 *                                                                         *
11 *   This program is distributed in the hope that it will be useful,       *
12 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
13 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
14 *   GNU General Public License for more details.                          *
15 *                                                                         *
16 *   You should have received a copy of the GNU General Public License     *
17 *   along with this program; if not, write to the                         *
18 *   Free Software Foundation, Inc.,                                       *
19 *   51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA.           *
20 ***************************************************************************/
21 
22 #ifndef GPU_UNAI_H
23 #define GPU_UNAI_H
24 
25 #include "gpu.h"
26 
27 // Header shared between both standalone gpu_unai (gpu.cpp) and new
28 // gpulib-compatible gpu_unai (gpulib_if.cpp)
29 // -> Anything here should be for gpu_unai's private use. <-
30 
31 ///////////////////////////////////////////////////////////////////////////////
32 //  Compile Options
33 
34 //#define ENABLE_GPU_NULL_SUPPORT   // Enables NullGPU support
35 //#define ENABLE_GPU_LOG_SUPPORT    // Enables gpu logger, very slow only for windows debugging
36 //#define ENABLE_GPU_ARMV7			// Enables ARMv7 optimized assembly
37 
38 //Poly routine options (default is integer math and accurate division)
39 //#define GPU_UNAI_USE_FLOATMATH         // Use float math in poly routines
40 //#define GPU_UNAI_USE_FLOAT_DIV_MULTINV // If GPU_UNAI_USE_FLOATMATH is defined,
41                                          //  use multiply-by-inverse for division
42 //#define GPU_UNAI_USE_INT_DIV_MULTINV   // If GPU_UNAI_USE_FLOATMATH is *not*
43                                          //  defined, use old inaccurate division
44 
45 
46 #define GPU_INLINE static inline __attribute__((always_inline))
47 #define INLINE     static inline __attribute__((always_inline))
48 
49 #define u8  uint8_t
50 #define s8  int8_t
51 #define u16 uint16_t
52 #define s16 int16_t
53 #define u32 uint32_t
54 #define s32 int32_t
55 #define s64 int64_t
56 
57 union PtrUnion
58 {
59 	u32  *U4;
60 	s32  *S4;
61 	u16  *U2;
62 	s16  *S2;
63 	u8   *U1;
64 	s8   *S1;
65 	void *ptr;
66 };
67 
68 union GPUPacket
69 {
70 	u32 U4[16];
71 	s32 S4[16];
72 	u16 U2[32];
73 	s16 S2[32];
74 	u8  U1[64];
75 	s8  S1[64];
76 };
77 
SwapValues(T & x,T & y)78 template<class T> static inline void SwapValues(T &x, T &y)
79 {
80 	T tmp(x);  x = y;  y = tmp;
81 }
82 
83 template<typename T>
Min2(const T a,const T b)84 static inline T Min2 (const T a, const T b)
85 {
86 	return (a<b)?a:b;
87 }
88 
89 template<typename T>
Min3(const T a,const T b,const T c)90 static inline T Min3 (const T a, const T b, const T c)
91 {
92 	return  Min2(Min2(a,b),c);
93 }
94 
95 template<typename T>
Max2(const T a,const T b)96 static inline T Max2 (const T a, const T b)
97 {
98 	return  (a>b)?a:b;
99 }
100 
101 template<typename T>
Max3(const T a,const T b,const T c)102 static inline T Max3 (const T a, const T b, const T c)
103 {
104 	return  Max2(Max2(a,b),c);
105 }
106 
107 
108 ///////////////////////////////////////////////////////////////////////////////
109 //  GPU Raster Macros
110 
111 // Convert 24bpp color parameter of GPU command to 16bpp (15bpp + mask bit)
112 #define	GPU_RGB16(rgb) ((((rgb)&0xF80000)>>9)|(((rgb)&0xF800)>>6)|(((rgb)&0xF8)>>3))
113 
114 // Sign-extend 11-bit coordinate command param
115 #define GPU_EXPANDSIGN(x) (((s32)(x)<<(32-11))>>(32-11))
116 
117 // Max difference between any two X or Y primitive coordinates
118 #define CHKMAX_X 1024
119 #define CHKMAX_Y 512
120 
121 #define	FRAME_BUFFER_SIZE	(1024*512*2)
122 #define	FRAME_WIDTH			  1024
123 #define	FRAME_HEIGHT		  512
124 #define	FRAME_OFFSET(x,y)	(((y)<<10)+(x))
125 #define FRAME_BYTE_STRIDE     2048
126 #define FRAME_BYTES_PER_PIXEL 2
127 
GPU_DIV(s32 rs,s32 rt)128 static inline s32 GPU_DIV(s32 rs, s32 rt)
129 {
130 	return rt ? (rs / rt) : (0);
131 }
132 
133 // 'Unsafe' version of above that doesn't check for div-by-zero
134 #define GPU_FAST_DIV(rs, rt) ((signed)(rs) / (signed)(rt))
135 
136 struct gpu_unai_t {
137 	u32 GPU_GP1;
138 	GPUPacket PacketBuffer;
139 	u16 *vram;
140 
141 #ifdef USE_GPULIB
142 	u16 *downscale_vram;
143 #endif
144 	////////////////////////////////////////////////////////////////////////////
145 	// Variables used only by older standalone version of gpu_unai (gpu.cpp)
146 #ifndef USE_GPULIB
147 	u32  GPU_GP0;
148 	u32  tex_window;       // Current texture window vals (set by GP0(E2h) cmd)
149 	s32  PacketCount;
150 	s32  PacketIndex;
151 	bool fb_dirty;         // Framebuffer is dirty (according to GPU)
152 
153 	//  Display status
154 	//  NOTE: Standalone older gpu_unai didn't care about horiz display range
155 	u16  DisplayArea[6];   // [0] : Start of display area (in VRAM) X
156 	                       // [1] : Start of display area (in VRAM) Y
157 	                       // [2] : Display mode resolution HORIZONTAL
158 	                       // [3] : Display mode resolution VERTICAL
159 	                       // [4] : Vertical display range (on TV) START
160 	                       // [5] : Vertical display range (on TV) END
161 
162 	////////////////////////////////////////////////////////////////////////////
163 	//  Dma Transfers info
164 	struct {
165 		s32  px,py;
166 		s32  x_end,y_end;
167 		u16* pvram;
168 		u32 *last_dma;     // Last dma pointer
169 		bool FrameToRead;  // Load image in progress
170 		bool FrameToWrite; // Store image in progress
171 	} dma;
172 
173 	////////////////////////////////////////////////////////////////////////////
174 	//  Frameskip
175 	struct {
176 		int  skipCount;    // Frame skip (0,1,2,3...)
177 		bool isSkip;       // Skip frame (according to GPU)
178 		bool skipFrame;    // Skip this frame (according to frame skip)
179 		bool wasSkip;      // Skip frame old value (according to GPU)
180 		bool skipGPU;      // Skip GPU primitives
181 	} frameskip;
182 #endif
183 	// END of standalone gpu_unai variables
184 	////////////////////////////////////////////////////////////////////////////
185 
186 	u32 TextureWindowCur;  // Current setting from last GP0(0xE2) cmd (raw form)
187 	u8  TextureWindow[4];  // [0] : Texture window offset X
188 	                       // [1] : Texture window offset Y
189 	                       // [2] : Texture window mask X
190 	                       // [3] : Texture window mask Y
191 
192 	u16 DrawingArea[4];    // [0] : Drawing area top left X
193 	                       // [1] : Drawing area top left Y
194 	                       // [2] : Drawing area bottom right X
195 	                       // [3] : Drawing area bottom right Y
196 
197 	s16 DrawingOffset[2];  // [0] : Drawing offset X (signed)
198 	                       // [1] : Drawing offset Y (signed)
199 
200 	u16* TBA;              // Ptr to current texture in VRAM
201 	u16* CBA;              // Ptr to current CLUT in VRAM
202 
203 	////////////////////////////////////////////////////////////////////////////
204 	//  Inner Loop parameters
205 
206 	// 22.10 Fixed-pt texture coords, mask, scanline advance
207 	// NOTE: U,V are no longer packed together into one u32, this proved to be
208 	//  too imprecise, leading to pixel dropouts.  Example: NFS3's skybox.
209 	u32 u, v;
210 	u32 u_msk, v_msk;
211 	s32 u_inc, v_inc;
212 
213 	// Color for Gouraud-shaded prims
214 	// Packed fixed-pt 8.3:8.3:8.2 rgb triplet
215 	//  layout:  rrrrrrrrXXXggggggggXXXbbbbbbbbXX
216 	//           ^ bit 31                       ^ bit 0
217 	u32 gCol;
218 	u32 gInc;          // Increment along scanline for gCol
219 
220 	// Color for flat-shaded, texture-blended prims
221 	u8  r5, g5, b5;    // 5-bit light for undithered prims
222 	u8  r8, g8, b8;    // 8-bit light for dithered prims
223 
224 	// Color for flat-shaded, untextured prims
225 	u16 PixelData;      // bgr555 color for untextured flat-shaded polys
226 
227 	// End of inner Loop parameters
228 	////////////////////////////////////////////////////////////////////////////
229 
230 
231 	u8 blit_mask;           // Determines what pixels to skip when rendering.
232 	                        //  Only useful on low-resolution devices using
233 	                        //  a simple pixel-dropping downscaler for PS1
234 	                        //  high-res modes. See 'pixel_skip' option.
235 
236 	u8 ilace_mask;          // Determines what lines to skip when rendering.
237 	                        //  Normally 0 when PS1 240 vertical res is in
238 	                        //  use and ilace_force is 0. When running in
239 	                        //  PS1 480 vertical res on a low-resolution
240 	                        //  device (320x240), will usually be set to 1
241 	                        //  so odd lines are not rendered. (Unless future
242 	                        //  full-screen scaling option is in use ..TODO)
243 
244 	bool prog_ilace_flag;   // Tracks successive frames for 'prog_ilace' option
245 
246 	u8 BLEND_MODE;
247 	u8 TEXT_MODE;
248 	u8 Masking;
249 
250 	u16 PixelMSB;
251 
252 	gpu_unai_config_t config;
253 
254 	u8  LightLUT[32*32];    // 5-bit lighting LUT (gpu_inner_light.h)
255 	u32 DitherMatrix[64];   // Matrix of dither coefficients
256 };
257 
258 static gpu_unai_t gpu_unai;
259 
260 // Global config that frontend can alter.. Values are read in GPU_init().
261 // TODO: if frontend menu modifies a setting, add a function that can notify
262 // GPU plugin to use new setting.
263 gpu_unai_config_t gpu_unai_config_ext;
264 
265 ///////////////////////////////////////////////////////////////////////////////
266 // Internal inline funcs to get option status: (Allows flexibility)
LightingEnabled()267 static inline bool LightingEnabled()
268 {
269 	return gpu_unai.config.lighting;
270 }
271 
FastLightingEnabled()272 static inline bool FastLightingEnabled()
273 {
274 	return gpu_unai.config.fast_lighting;
275 }
276 
BlendingEnabled()277 static inline bool BlendingEnabled()
278 {
279 	return gpu_unai.config.blending;
280 }
281 
DitheringEnabled()282 static inline bool DitheringEnabled()
283 {
284 	return gpu_unai.config.dithering;
285 }
286 
287 // For now, this is just for development/experimentation purposes..
288 // If modified to return true, it will allow ignoring the status register
289 //  bit 9 setting (dither enable). It will still restrict dithering only
290 //  to Gouraud-shaded or texture-blended polys.
ForcedDitheringEnabled()291 static inline bool ForcedDitheringEnabled()
292 {
293 	return false;
294 }
295 
ProgressiveInterlaceEnabled()296 static inline bool ProgressiveInterlaceEnabled()
297 {
298 #ifdef USE_GPULIB
299 	// Using this old option greatly decreases quality of image. Disabled
300 	//  for now when using new gpulib, since it also adds more work in loops.
301 	return false;
302 #else
303 	return gpu_unai.config.prog_ilace;
304 #endif
305 }
306 
307 // For now, 320x240 output resolution is assumed, using simple line-skipping
308 //  and pixel-skipping downscaler.
309 // TODO: Flesh these out so they return useful values based on whether
310 //       running on higher-res device or a resampling downscaler is enabled.
PixelSkipEnabled()311 static inline bool PixelSkipEnabled()
312 {
313 	return gpu_unai.config.pixel_skip || gpu_unai.config.scale_hires;
314 }
315 
LineSkipEnabled()316 static inline bool LineSkipEnabled()
317 {
318 	return true;
319 }
320 
321 #endif // GPU_UNAI_H
322