1 // license:BSD-3-Clause
2 // copyright-holders:smf
3 /*
4  * PlayStation Geometry Transformation Engine emulator
5  *
6  * Copyright 2003-2013 smf
7  *
8  */
9 
10 #include "emu.h"
11 #include "gte.h"
12 
13 #if 0
14 void ATTR_PRINTF(2,3) GTELOG( uint32_t pc, const char *a, ...)
15 {
16 	va_list va;
17 	char s_text[ 1024 ];
18 	va_start( va, a );
19 	vsprintf( s_text, a, va );
20 	va_end( va );
21 	logerror( "%08x: GTE: %s\n", pc, s_text );
22 }
23 #else
GTELOG(uint32_t pc,const char * a,...)24 static inline void ATTR_PRINTF(2,3) GTELOG( uint32_t pc, const char *a, ...) {}
25 #endif
26 
27 
28 #define VX0  ( m_cp2dr[ 0 ].sw.l )
29 #define VY0  ( m_cp2dr[ 0 ].sw.h )
30 #define VZ0  ( m_cp2dr[ 1 ].sw.l )
31 #define VX1  ( m_cp2dr[ 2 ].w.l )
32 #define VY1  ( m_cp2dr[ 2 ].w.h )
33 #define VZ1  ( m_cp2dr[ 3 ].w.l )
34 #define VX2  ( m_cp2dr[ 4 ].w.l )
35 #define VY2  ( m_cp2dr[ 4 ].w.h )
36 #define VZ2  ( m_cp2dr[ 5 ].w.l )
37 #define R    ( m_cp2dr[ 6 ].b.l )
38 #define G    ( m_cp2dr[ 6 ].b.h )
39 #define B    ( m_cp2dr[ 6 ].b.h2 )
40 #define CODE ( m_cp2dr[ 6 ].b.h3 )
41 #define OTZ  ( m_cp2dr[ 7 ].w.l )
42 #define IR0  ( m_cp2dr[ 8 ].sw.l )
43 #define IR1  ( m_cp2dr[ 9 ].sw.l )
44 #define IR2  ( m_cp2dr[ 10 ].sw.l )
45 #define IR3  ( m_cp2dr[ 11 ].sw.l )
46 #define SXY0 ( m_cp2dr[ 12 ].d )
47 #define SX0  ( m_cp2dr[ 12 ].sw.l )
48 #define SY0  ( m_cp2dr[ 12 ].sw.h )
49 #define SXY1 ( m_cp2dr[ 13 ].d )
50 #define SX1  ( m_cp2dr[ 13 ].sw.l )
51 #define SY1  ( m_cp2dr[ 13 ].sw.h )
52 #define SXY2 ( m_cp2dr[ 14 ].d )
53 #define SX2  ( m_cp2dr[ 14 ].sw.l )
54 #define SY2  ( m_cp2dr[ 14 ].sw.h )
55 #define SXYP ( m_cp2dr[ 15 ].d )
56 #define SXP  ( m_cp2dr[ 15 ].sw.l )
57 #define SYP  ( m_cp2dr[ 15 ].sw.h )
58 #define SZ0  ( m_cp2dr[ 16 ].w.l )
59 #define SZ1  ( m_cp2dr[ 17 ].w.l )
60 #define SZ2  ( m_cp2dr[ 18 ].w.l )
61 #define SZ3  ( m_cp2dr[ 19 ].w.l )
62 #define RGB0 ( m_cp2dr[ 20 ].d )
63 #define R0   ( m_cp2dr[ 20 ].b.l )
64 #define G0   ( m_cp2dr[ 20 ].b.h )
65 #define B0   ( m_cp2dr[ 20 ].b.h2 )
66 #define CD0  ( m_cp2dr[ 20 ].b.h3 )
67 #define RGB1 ( m_cp2dr[ 21 ].d )
68 #define R1   ( m_cp2dr[ 21 ].b.l )
69 #define G1   ( m_cp2dr[ 21 ].b.h )
70 #define B1   ( m_cp2dr[ 21 ].b.h2 )
71 #define CD1  ( m_cp2dr[ 21 ].b.h3 )
72 #define RGB2 ( m_cp2dr[ 22 ].d )
73 #define R2   ( m_cp2dr[ 22 ].b.l )
74 #define G2   ( m_cp2dr[ 22 ].b.h )
75 #define B2   ( m_cp2dr[ 22 ].b.h2 )
76 #define CD2  ( m_cp2dr[ 22 ].b.h3 )
77 #define RES1 ( m_cp2dr[ 23 ].d )
78 #define MAC0 ( m_cp2dr[ 24 ].sd )
79 #define MAC1 ( m_cp2dr[ 25 ].sd )
80 #define MAC2 ( m_cp2dr[ 26 ].sd )
81 #define MAC3 ( m_cp2dr[ 27 ].sd )
82 #define IRGB ( m_cp2dr[ 28 ].d )
83 #define ORGB ( m_cp2dr[ 29 ].d )
84 #define LZCS ( m_cp2dr[ 30 ].d )
85 #define LZCR ( m_cp2dr[ 31 ].d )
86 
87 #define R11 ( m_cp2cr[ 0 ].sw.l )
88 #define R12 ( m_cp2cr[ 0 ].sw.h )
89 #define R13 ( m_cp2cr[ 1 ].sw.l )
90 #define R21 ( m_cp2cr[ 1 ].sw.h )
91 #define R22 ( m_cp2cr[ 2 ].sw.l )
92 #define R23 ( m_cp2cr[ 2 ].sw.h )
93 #define R31 ( m_cp2cr[ 3 ].sw.l )
94 #define R32 ( m_cp2cr[ 3 ].sw.h )
95 #define R33 ( m_cp2cr[ 4 ].sw.l )
96 #define TRX ( m_cp2cr[ 5 ].sd )
97 #define TRY ( m_cp2cr[ 6 ].sd )
98 #define TRZ ( m_cp2cr[ 7 ].sd )
99 #define L11 ( m_cp2cr[ 8 ].sw.l )
100 #define L12 ( m_cp2cr[ 8 ].sw.h )
101 #define L13 ( m_cp2cr[ 9 ].sw.l )
102 #define L21 ( m_cp2cr[ 9 ].sw.h )
103 #define L22 ( m_cp2cr[ 10 ].sw.l )
104 #define L23 ( m_cp2cr[ 10 ].sw.h )
105 #define L31 ( m_cp2cr[ 11 ].sw.l )
106 #define L32 ( m_cp2cr[ 11 ].sw.h )
107 #define L33 ( m_cp2cr[ 12 ].sw.l )
108 #define RBK ( m_cp2cr[ 13 ].sd )
109 #define GBK ( m_cp2cr[ 14 ].sd )
110 #define BBK ( m_cp2cr[ 15 ].sd )
111 #define LR1 ( m_cp2cr[ 16 ].sw.l )
112 #define LR2 ( m_cp2cr[ 16 ].sw.h )
113 #define LR3 ( m_cp2cr[ 17 ].sw.l )
114 #define LG1 ( m_cp2cr[ 17 ].sw.h )
115 #define LG2 ( m_cp2cr[ 18 ].sw.l )
116 #define LG3 ( m_cp2cr[ 18 ].sw.h )
117 #define LB1 ( m_cp2cr[ 19 ].sw.l )
118 #define LB2 ( m_cp2cr[ 19 ].sw.h )
119 #define LB3 ( m_cp2cr[ 20 ].sw.l )
120 #define RFC ( m_cp2cr[ 21 ].sd )
121 #define GFC ( m_cp2cr[ 22 ].sd )
122 #define BFC ( m_cp2cr[ 23 ].sd )
123 #define OFX ( m_cp2cr[ 24 ].sd )
124 #define OFY ( m_cp2cr[ 25 ].sd )
125 #define H   ( m_cp2cr[ 26 ].sw.l )
126 #define DQA ( m_cp2cr[ 27 ].sw.l )
127 #define DQB ( m_cp2cr[ 28 ].sd )
128 #define ZSF3 ( m_cp2cr[ 29 ].sw.l )
129 #define ZSF4 ( m_cp2cr[ 30 ].sw.l )
130 #define FLAG ( m_cp2cr[ 31 ].d )
131 
132 #define VX( n ) ( n < 3 ? m_cp2dr[ n << 1 ].sw.l : IR1 )
133 #define VY( n ) ( n < 3 ? m_cp2dr[ n << 1 ].sw.h : IR2 )
134 #define VZ( n ) ( n < 3 ? m_cp2dr[ ( n << 1 ) + 1 ].sw.l : IR3 )
135 #define MX11( n ) ( n < 3 ? m_cp2cr[ ( n << 3 ) ].sw.l : -R << 4 )
136 #define MX12( n ) ( n < 3 ? m_cp2cr[ ( n << 3 ) ].sw.h : R << 4 )
137 #define MX13( n ) ( n < 3 ? m_cp2cr[ ( n << 3 ) + 1 ].sw.l : IR0 )
138 #define MX21( n ) ( n < 3 ? m_cp2cr[ ( n << 3 ) + 1 ].sw.h : R13 )
139 #define MX22( n ) ( n < 3 ? m_cp2cr[ ( n << 3 ) + 2 ].sw.l : R13 )
140 #define MX23( n ) ( n < 3 ? m_cp2cr[ ( n << 3 ) + 2 ].sw.h : R13 )
141 #define MX31( n ) ( n < 3 ? m_cp2cr[ ( n << 3 ) + 3 ].sw.l : R22 )
142 #define MX32( n ) ( n < 3 ? m_cp2cr[ ( n << 3 ) + 3 ].sw.h : R22 )
143 #define MX33( n ) ( n < 3 ? m_cp2cr[ ( n << 3 ) + 4 ].sw.l : R22 )
144 #define CV1( n ) ( n < 3 ? m_cp2cr[ ( n << 3 ) + 5 ].sd : 0 )
145 #define CV2( n ) ( n < 3 ? m_cp2cr[ ( n << 3 ) + 6 ].sd : 0 )
146 #define CV3( n ) ( n < 3 ? m_cp2cr[ ( n << 3 ) + 7 ].sd : 0 )
147 
LIM(int32_t value,int32_t max,int32_t min,uint32_t flag)148 int32_t gte::LIM( int32_t value, int32_t max, int32_t min, uint32_t flag )
149 {
150 	if( value > max )
151 	{
152 		FLAG |= flag;
153 		return max;
154 	}
155 	else if( value < min )
156 	{
157 		FLAG |= flag;
158 		return min;
159 	}
160 	return value;
161 }
162 
getcp2dr(uint32_t pc,int reg)163 uint32_t gte::getcp2dr( uint32_t pc, int reg )
164 {
165 	switch( reg )
166 	{
167 	case 1:
168 	case 3:
169 	case 5:
170 	case 8:
171 	case 9:
172 	case 10:
173 	case 11:
174 		m_cp2dr[ reg ].d = (int32_t)m_cp2dr[ reg ].sw.l;
175 		break;
176 
177 	case 7:
178 	case 16:
179 	case 17:
180 	case 18:
181 	case 19:
182 		m_cp2dr[ reg ].d = (uint32_t)m_cp2dr[ reg ].w.l;
183 		break;
184 
185 	case 15:
186 		m_cp2dr[ reg ].d = SXY2;
187 		break;
188 
189 	case 28:
190 	case 29:
191 		m_cp2dr[ reg ].d = LIM( IR1 >> 7, 0x1f, 0, 0 ) | ( LIM( IR2 >> 7, 0x1f, 0, 0 ) << 5 ) | ( LIM( IR3 >> 7, 0x1f, 0, 0 ) << 10 );
192 		break;
193 	}
194 
195 	GTELOG( pc, "get CP2DR%u=%08x", reg, m_cp2dr[ reg ].d );
196 	return m_cp2dr[ reg ].d;
197 }
198 
setcp2dr(uint32_t pc,int reg,uint32_t value)199 void gte::setcp2dr( uint32_t pc, int reg, uint32_t value )
200 {
201 	GTELOG( pc, "set CP2DR%u=%08x", reg, value );
202 
203 	switch( reg )
204 	{
205 	case 15:
206 		SXY0 = SXY1;
207 		SXY1 = SXY2;
208 		SXY2 = value;
209 		break;
210 
211 	case 28:
212 		IR1 = ( value & 0x1f ) << 7;
213 		IR2 = ( value & 0x3e0 ) << 2;
214 		IR3 = ( value & 0x7c00 ) >> 3;
215 		break;
216 
217 	case 30:
218 		LZCR = (value & 0x80000000) == 0 ? count_leading_zeros(value) : count_leading_ones(value);
219 		break;
220 
221 	case 31:
222 		return;
223 	}
224 
225 	m_cp2dr[ reg ].d = value;
226 }
227 
getcp2cr(uint32_t pc,int reg)228 uint32_t gte::getcp2cr( uint32_t pc, int reg )
229 {
230 	GTELOG( pc, "get CP2CR%u=%08x", reg, m_cp2cr[ reg ].d );
231 
232 	return m_cp2cr[ reg ].d;
233 }
234 
setcp2cr(uint32_t pc,int reg,uint32_t value)235 void gte::setcp2cr( uint32_t pc, int reg, uint32_t value )
236 {
237 	GTELOG( pc, "set CP2CR%u=%08x", reg, value );
238 
239 	switch( reg )
240 	{
241 	case 4:
242 	case 12:
243 	case 20:
244 	case 26:
245 	case 27:
246 	case 29:
247 	case 30:
248 		value = (int32_t)(int16_t) value;
249 		break;
250 
251 	case 31:
252 		value = value & 0x7ffff000;
253 		if( ( value & 0x7f87e000 ) != 0 )
254 		{
255 			value |= 0x80000000;
256 		}
257 		break;
258 	}
259 
260 	m_cp2cr[ reg ].d = value;
261 }
262 
gte_shift(int64_t a,int sf)263 static inline int64_t gte_shift( int64_t a, int sf )
264 {
265 	if( sf > 0 )
266 	{
267 		return a >> 12;
268 	}
269 	else if( sf < 0 )
270 	{
271 		return a << 12;
272 	}
273 
274 	return a;
275 }
276 
BOUNDS(int44 value,int max_flag,int min_flag)277 int32_t gte::BOUNDS( int44 value, int max_flag, int min_flag )
278 {
279 	if( value.positive_overflow() )
280 	{
281 		FLAG |= max_flag;
282 	}
283 
284 	if( value.negative_overflow() )
285 	{
286 		FLAG |= min_flag;
287 	}
288 
289 	return gte_shift( value.value(), m_sf );
290 }
291 
gte_divide(uint16_t numerator,uint16_t denominator)292 static inline uint32_t gte_divide( uint16_t numerator, uint16_t denominator )
293 {
294 	if( numerator < ( denominator * 2 ) )
295 	{
296 		static uint8_t table[] =
297 		{
298 			0xff, 0xfd, 0xfb, 0xf9, 0xf7, 0xf5, 0xf3, 0xf1, 0xef, 0xee, 0xec, 0xea, 0xe8, 0xe6, 0xe4, 0xe3,
299 			0xe1, 0xdf, 0xdd, 0xdc, 0xda, 0xd8, 0xd6, 0xd5, 0xd3, 0xd1, 0xd0, 0xce, 0xcd, 0xcb, 0xc9, 0xc8,
300 			0xc6, 0xc5, 0xc3, 0xc1, 0xc0, 0xbe, 0xbd, 0xbb, 0xba, 0xb8, 0xb7, 0xb5, 0xb4, 0xb2, 0xb1, 0xb0,
301 			0xae, 0xad, 0xab, 0xaa, 0xa9, 0xa7, 0xa6, 0xa4, 0xa3, 0xa2, 0xa0, 0x9f, 0x9e, 0x9c, 0x9b, 0x9a,
302 			0x99, 0x97, 0x96, 0x95, 0x94, 0x92, 0x91, 0x90, 0x8f, 0x8d, 0x8c, 0x8b, 0x8a, 0x89, 0x87, 0x86,
303 			0x85, 0x84, 0x83, 0x82, 0x81, 0x7f, 0x7e, 0x7d, 0x7c, 0x7b, 0x7a, 0x79, 0x78, 0x77, 0x75, 0x74,
304 			0x73, 0x72, 0x71, 0x70, 0x6f, 0x6e, 0x6d, 0x6c, 0x6b, 0x6a, 0x69, 0x68, 0x67, 0x66, 0x65, 0x64,
305 			0x63, 0x62, 0x61, 0x60, 0x5f, 0x5e, 0x5d, 0x5d, 0x5c, 0x5b, 0x5a, 0x59, 0x58, 0x57, 0x56, 0x55,
306 			0x54, 0x53, 0x53, 0x52, 0x51, 0x50, 0x4f, 0x4e, 0x4d, 0x4d, 0x4c, 0x4b, 0x4a, 0x49, 0x48, 0x48,
307 			0x47, 0x46, 0x45, 0x44, 0x43, 0x43, 0x42, 0x41, 0x40, 0x3f, 0x3f, 0x3e, 0x3d, 0x3c, 0x3c, 0x3b,
308 			0x3a, 0x39, 0x39, 0x38, 0x37, 0x36, 0x36, 0x35, 0x34, 0x33, 0x33, 0x32, 0x31, 0x31, 0x30, 0x2f,
309 			0x2e, 0x2e, 0x2d, 0x2c, 0x2c, 0x2b, 0x2a, 0x2a, 0x29, 0x28, 0x28, 0x27, 0x26, 0x26, 0x25, 0x24,
310 			0x24, 0x23, 0x22, 0x22, 0x21, 0x20, 0x20, 0x1f, 0x1e, 0x1e, 0x1d, 0x1d, 0x1c, 0x1b, 0x1b, 0x1a,
311 			0x19, 0x19, 0x18, 0x18, 0x17, 0x16, 0x16, 0x15, 0x15, 0x14, 0x14, 0x13, 0x12, 0x12, 0x11, 0x11,
312 			0x10, 0x0f, 0x0f, 0x0e, 0x0e, 0x0d, 0x0d, 0x0c, 0x0c, 0x0b, 0x0a, 0x0a, 0x09, 0x09, 0x08, 0x08,
313 			0x07, 0x07, 0x06, 0x06, 0x05, 0x05, 0x04, 0x04, 0x03, 0x03, 0x02, 0x02, 0x01, 0x01, 0x00, 0x00,
314 			0x00
315 		};
316 
317 		int shift = count_leading_zeros( denominator ) - 16;
318 
319 		int r1 = ( denominator << shift ) & 0x7fff;
320 		int r2 = table[ ( ( r1 + 0x40 ) >> 7 ) ] + 0x101;
321 		int r3 = ( ( 0x80 - ( r2 * ( r1 + 0x8000 ) ) ) >> 8 ) & 0x1ffff;
322 		uint32_t reciprocal = ( ( r2 * r3 ) + 0x80 ) >> 8;
323 
324 		return (uint32_t)( ( ( (uint64_t) reciprocal * ( numerator << shift ) ) + 0x8000 ) >> 16 );
325 	}
326 
327 	return 0xffffffff;
328 }
329 
330 /* Setting bits 12 & 19-22 in FLAG does not set bit 31 */
331 
A1(int44 a)332 int32_t gte::A1( int44 a ) { m_mac1 = a.value(); return BOUNDS( a, ( 1 << 31 ) | ( 1 << 30 ), ( 1 << 31 ) | ( 1 << 27 ) ); }
A2(int44 a)333 int32_t gte::A2( int44 a ) { m_mac2 = a.value(); return BOUNDS( a, ( 1 << 31 ) | ( 1 << 29 ), ( 1 << 31 ) | ( 1 << 26 ) ); }
A3(int44 a)334 int32_t gte::A3( int44 a ) { m_mac3 = a.value(); return BOUNDS( a, ( 1 << 31 ) | ( 1 << 28 ), ( 1 << 31 ) | ( 1 << 25 ) ); }
Lm_B1(int32_t a,int lm)335 int32_t gte::Lm_B1( int32_t a, int lm ) { return LIM( a, 0x7fff, -0x8000 * !lm, ( 1 << 31 ) | ( 1 << 24 ) ); }
Lm_B2(int32_t a,int lm)336 int32_t gte::Lm_B2( int32_t a, int lm ) { return LIM( a, 0x7fff, -0x8000 * !lm, ( 1 << 31 ) | ( 1 << 23 ) ); }
Lm_B3(int32_t a,int lm)337 int32_t gte::Lm_B3( int32_t a, int lm ) { return LIM( a, 0x7fff, -0x8000 * !lm, ( 1 << 22 ) ); }
338 
Lm_B3_sf(int64_t value,int sf,int lm)339 int32_t gte::Lm_B3_sf( int64_t value, int sf, int lm )
340 {
341 	int32_t value_sf = gte_shift( value, sf );
342 	int32_t value_12 = gte_shift( value, 1 );
343 	int max = 0x7fff;
344 	int min = 0;
345 	if( lm == 0 )
346 	{
347 		min = -0x8000;
348 	}
349 
350 	if( value_12 < -0x8000 || value_12 > 0x7fff )
351 	{
352 		FLAG |= ( 1 << 22 );
353 	}
354 
355 	if( value_sf > max )
356 	{
357 		return max;
358 	}
359 	else if( value_sf < min )
360 	{
361 		return min;
362 	}
363 
364 	return value_sf;
365 }
366 
Lm_C1(int32_t a)367 int32_t gte::Lm_C1( int32_t a ) { return LIM( a, 0x00ff, 0x0000, ( 1 << 21 ) ); }
Lm_C2(int32_t a)368 int32_t gte::Lm_C2( int32_t a ) { return LIM( a, 0x00ff, 0x0000, ( 1 << 20 ) ); }
Lm_C3(int32_t a)369 int32_t gte::Lm_C3( int32_t a ) { return LIM( a, 0x00ff, 0x0000, ( 1 << 19 ) ); }
Lm_D(int64_t a,int sf)370 int32_t gte::Lm_D( int64_t a, int sf ) { return LIM( gte_shift( a, sf ), 0xffff, 0x0000, ( 1 << 31 ) | ( 1 << 18 ) ); }
371 
Lm_E(uint32_t result)372 uint32_t gte::Lm_E( uint32_t result )
373 {
374 	if( result == 0xffffffff )
375 	{
376 		FLAG |= ( 1 << 31 ) | ( 1 << 17 );
377 		return 0x1ffff;
378 	}
379 
380 	if( result > 0x1ffff )
381 	{
382 		return 0x1ffff;
383 	}
384 
385 	return result;
386 }
387 
F(int64_t a)388 int64_t gte::F( int64_t a )
389 {
390 	m_mac0 = a;
391 
392 	if( a > 0x7fffffff )
393 	{
394 		FLAG |= ( 1 << 31 ) | ( 1 << 16 );
395 	}
396 
397 	if( a < (int32_t) -0x80000000 )
398 	{
399 		FLAG |= ( 1 << 31 ) | ( 1 << 15 );
400 	}
401 
402 	return a;
403 }
404 
Lm_G1(int64_t a)405 int32_t gte::Lm_G1( int64_t a )
406 {
407 	if( a > 0x3ff )
408 	{
409 		FLAG |= ( 1 << 31 ) | ( 1 << 14 );
410 		return 0x3ff;
411 	}
412 
413 	if( a < -0x400 )
414 	{
415 		FLAG |= ( 1 << 31 ) | ( 1 << 14 );
416 		return -0x400;
417 	}
418 
419 	return a;
420 }
421 
Lm_G2(int64_t a)422 int32_t gte::Lm_G2( int64_t a )
423 {
424 	if( a > 0x3ff )
425 	{
426 		FLAG |= ( 1 << 31 ) | ( 1 << 13 );
427 		return 0x3ff;
428 	}
429 
430 	if( a < -0x400 )
431 	{
432 		FLAG |= ( 1 << 31 ) | ( 1 << 13 );
433 		return -0x400;
434 	}
435 
436 	return a;
437 }
438 
Lm_H(int64_t value,int sf)439 int32_t gte::Lm_H( int64_t value, int sf )
440 {
441 	int64_t value_sf = gte_shift( value, sf );
442 	int32_t value_12 = gte_shift( value, 1 );
443 	int max = 0x1000;
444 	int min = 0x0000;
445 
446 	if( value_sf < min || value_sf > max )
447 	{
448 		FLAG |= ( 1 << 12 );
449 	}
450 
451 	if( value_12 > max )
452 	{
453 		return max;
454 	}
455 
456 	if( value_12 < min )
457 	{
458 		return min;
459 	}
460 
461 	return value_12;
462 }
463 
docop2(uint32_t pc,int gteop)464 int gte::docop2( uint32_t pc, int gteop )
465 {
466 	int v;
467 	int lm;
468 	int cv;
469 	int mx;
470 	int32_t h_over_sz3 = 0;
471 
472 	lm = GTE_LM( gteop );
473 	m_sf = GTE_SF( gteop );
474 
475 	FLAG = 0;
476 
477 	switch( GTE_FUNCT( gteop ) )
478 	{
479 	case 0x00: // drop through to RTPS
480 	case 0x01:
481 		GTELOG( pc, "%08x RTPS", gteop );
482 
483 		MAC1 = A1( int44( (int64_t) TRX << 12 ) + ( R11 * VX0 ) + ( R12 * VY0 ) + ( R13 * VZ0 ) );
484 		MAC2 = A2( int44( (int64_t) TRY << 12 ) + ( R21 * VX0 ) + ( R22 * VY0 ) + ( R23 * VZ0 ) );
485 		MAC3 = A3( int44( (int64_t) TRZ << 12 ) + ( R31 * VX0 ) + ( R32 * VY0 ) + ( R33 * VZ0 ) );
486 		IR1 = Lm_B1( MAC1, lm );
487 		IR2 = Lm_B2( MAC2, lm );
488 		IR3 = Lm_B3_sf( m_mac3, m_sf, lm );
489 		SZ0 = SZ1;
490 		SZ1 = SZ2;
491 		SZ2 = SZ3;
492 		SZ3 = Lm_D( m_mac3, 1 );
493 		h_over_sz3 = Lm_E( gte_divide( H, SZ3 ) );
494 		SXY0 = SXY1;
495 		SXY1 = SXY2;
496 		SX2 = Lm_G1( F( (int64_t) OFX + ( (int64_t) IR1 * h_over_sz3 ) ) >> 16 );
497 		SY2 = Lm_G2( F( (int64_t) OFY + ( (int64_t) IR2 * h_over_sz3 ) ) >> 16 );
498 		MAC0 = F( (int64_t) DQB + ( (int64_t) DQA * h_over_sz3 ) );
499 		IR0 = Lm_H( m_mac0, 1 );
500 		return 1;
501 
502 	case 0x06:
503 		GTELOG( pc, "%08x NCLIP", gteop );
504 
505 		MAC0 = F( (int64_t) ( SX0 * SY1 ) + ( SX1 * SY2 ) + ( SX2 * SY0 ) - ( SX0 * SY2 ) - ( SX1 * SY0 ) - ( SX2 * SY1 ) );
506 		return 1;
507 
508 	case 0x0c:
509 		GTELOG( pc, "%08x OP", gteop );
510 
511 		MAC1 = A1( (int64_t) ( R22 * IR3 ) - ( R33 * IR2 ) );
512 		MAC2 = A2( (int64_t) ( R33 * IR1 ) - ( R11 * IR3 ) );
513 		MAC3 = A3( (int64_t) ( R11 * IR2 ) - ( R22 * IR1 ) );
514 		IR1 = Lm_B1( MAC1, lm );
515 		IR2 = Lm_B2( MAC2, lm );
516 		IR3 = Lm_B3( MAC3, lm );
517 		return 1;
518 
519 	case 0x10:
520 		GTELOG( pc, "%08x DPCS", gteop );
521 
522 		MAC1 = A1( ( R << 16 ) + ( IR0 * Lm_B1( A1( ( (int64_t) RFC << 12 ) - ( R << 16 ) ), 0 ) ) );
523 		MAC2 = A2( ( G << 16 ) + ( IR0 * Lm_B2( A2( ( (int64_t) GFC << 12 ) - ( G << 16 ) ), 0 ) ) );
524 		MAC3 = A3( ( B << 16 ) + ( IR0 * Lm_B3( A3( ( (int64_t) BFC << 12 ) - ( B << 16 ) ), 0 ) ) );
525 		IR1 = Lm_B1( MAC1, lm );
526 		IR2 = Lm_B2( MAC2, lm );
527 		IR3 = Lm_B3( MAC3, lm );
528 		RGB0 = RGB1;
529 		RGB1 = RGB2;
530 		CD2 = CODE;
531 		R2 = Lm_C1( MAC1 >> 4 );
532 		G2 = Lm_C2( MAC2 >> 4 );
533 		B2 = Lm_C3( MAC3 >> 4 );
534 		return 1;
535 
536 	case 0x11:
537 		GTELOG( pc, "%08x INTPL", gteop );
538 
539 		MAC1 = A1( ( IR1 << 12 ) + ( IR0 * Lm_B1( A1( ( (int64_t) RFC << 12 ) - ( IR1 << 12 ) ), 0 ) ) );
540 		MAC2 = A2( ( IR2 << 12 ) + ( IR0 * Lm_B2( A2( ( (int64_t) GFC << 12 ) - ( IR2 << 12 ) ), 0 ) ) );
541 		MAC3 = A3( ( IR3 << 12 ) + ( IR0 * Lm_B3( A3( ( (int64_t) BFC << 12 ) - ( IR3 << 12 ) ), 0 ) ) );
542 		IR1 = Lm_B1( MAC1, lm );
543 		IR2 = Lm_B2( MAC2, lm );
544 		IR3 = Lm_B3( MAC3, lm );
545 		RGB0 = RGB1;
546 		RGB1 = RGB2;
547 		CD2 = CODE;
548 		R2 = Lm_C1( MAC1 >> 4 );
549 		G2 = Lm_C2( MAC2 >> 4 );
550 		B2 = Lm_C3( MAC3 >> 4 );
551 		return 1;
552 
553 	case 0x12:
554 		GTELOG( pc, "%08x MVMVA", gteop );
555 
556 		mx = GTE_MX( gteop );
557 		v = GTE_V( gteop );
558 		cv = GTE_CV( gteop );
559 
560 		switch( cv )
561 		{
562 		case 2:
563 			MAC1 = A1( (int64_t) ( MX12( mx ) * VY( v ) ) + ( MX13( mx ) * VZ( v ) ) );
564 			MAC2 = A2( (int64_t) ( MX22( mx ) * VY( v ) ) + ( MX23( mx ) * VZ( v ) ) );
565 			MAC3 = A3( (int64_t) ( MX32( mx ) * VY( v ) ) + ( MX33( mx ) * VZ( v ) ) );
566 			Lm_B1( A1( ( (int64_t) CV1( cv ) << 12 ) + ( MX11( mx ) * VX( v ) ) ), 0 );
567 			Lm_B2( A2( ( (int64_t) CV2( cv ) << 12 ) + ( MX21( mx ) * VX( v ) ) ), 0 );
568 			Lm_B3( A3( ( (int64_t) CV3( cv ) << 12 ) + ( MX31( mx ) * VX( v ) ) ), 0 );
569 			break;
570 
571 		default:
572 			MAC1 = A1( int44( (int64_t) CV1( cv ) << 12 ) + ( MX11( mx ) * VX( v ) ) + ( MX12( mx ) * VY( v ) ) + ( MX13( mx ) * VZ( v ) ) );
573 			MAC2 = A2( int44( (int64_t) CV2( cv ) << 12 ) + ( MX21( mx ) * VX( v ) ) + ( MX22( mx ) * VY( v ) ) + ( MX23( mx ) * VZ( v ) ) );
574 			MAC3 = A3( int44( (int64_t) CV3( cv ) << 12 ) + ( MX31( mx ) * VX( v ) ) + ( MX32( mx ) * VY( v ) ) + ( MX33( mx ) * VZ( v ) ) );
575 			break;
576 		}
577 
578 		IR1 = Lm_B1( MAC1, lm );
579 		IR2 = Lm_B2( MAC2, lm );
580 		IR3 = Lm_B3( MAC3, lm );
581 		return 1;
582 
583 	case 0x13:
584 		GTELOG( pc, "%08x NCDS", gteop );
585 
586 		MAC1 = A1( (int64_t) ( L11 * VX0 ) + ( L12 * VY0 ) + ( L13 * VZ0 ) );
587 		MAC2 = A2( (int64_t) ( L21 * VX0 ) + ( L22 * VY0 ) + ( L23 * VZ0 ) );
588 		MAC3 = A3( (int64_t) ( L31 * VX0 ) + ( L32 * VY0 ) + ( L33 * VZ0 ) );
589 		IR1 = Lm_B1( MAC1, lm );
590 		IR2 = Lm_B2( MAC2, lm );
591 		IR3 = Lm_B3( MAC3, lm );
592 		MAC1 = A1( int44( (int64_t) RBK << 12 ) + ( LR1 * IR1 ) + ( LR2 * IR2 ) + ( LR3 * IR3 ) );
593 		MAC2 = A2( int44( (int64_t) GBK << 12 ) + ( LG1 * IR1 ) + ( LG2 * IR2 ) + ( LG3 * IR3 ) );
594 		MAC3 = A3( int44( (int64_t) BBK << 12 ) + ( LB1 * IR1 ) + ( LB2 * IR2 ) + ( LB3 * IR3 ) );
595 		IR1 = Lm_B1( MAC1, lm );
596 		IR2 = Lm_B2( MAC2, lm );
597 		IR3 = Lm_B3( MAC3, lm );
598 		MAC1 = A1( ( ( R << 4 ) * IR1 ) + ( IR0 * Lm_B1( A1( ( (int64_t) RFC << 12 ) - ( ( R << 4 ) * IR1 ) ), 0 ) ) );
599 		MAC2 = A2( ( ( G << 4 ) * IR2 ) + ( IR0 * Lm_B2( A2( ( (int64_t) GFC << 12 ) - ( ( G << 4 ) * IR2 ) ), 0 ) ) );
600 		MAC3 = A3( ( ( B << 4 ) * IR3 ) + ( IR0 * Lm_B3( A3( ( (int64_t) BFC << 12 ) - ( ( B << 4 ) * IR3 ) ), 0 ) ) );
601 		IR1 = Lm_B1( MAC1, lm );
602 		IR2 = Lm_B2( MAC2, lm );
603 		IR3 = Lm_B3( MAC3, lm );
604 		RGB0 = RGB1;
605 		RGB1 = RGB2;
606 		CD2 = CODE;
607 		R2 = Lm_C1( MAC1 >> 4 );
608 		G2 = Lm_C2( MAC2 >> 4 );
609 		B2 = Lm_C3( MAC3 >> 4 );
610 		return 1;
611 
612 	case 0x14:
613 		GTELOG( pc, "%08x CDP", gteop );
614 
615 		MAC1 = A1( int44( (int64_t) RBK << 12 ) + ( LR1 * IR1 ) + ( LR2 * IR2 ) + ( LR3 * IR3 ) );
616 		MAC2 = A2( int44( (int64_t) GBK << 12 ) + ( LG1 * IR1 ) + ( LG2 * IR2 ) + ( LG3 * IR3 ) );
617 		MAC3 = A3( int44( (int64_t) BBK << 12 ) + ( LB1 * IR1 ) + ( LB2 * IR2 ) + ( LB3 * IR3 ) );
618 		IR1 = Lm_B1( MAC1, lm );
619 		IR2 = Lm_B2( MAC2, lm );
620 		IR3 = Lm_B3( MAC3, lm );
621 		MAC1 = A1( ( ( R << 4 ) * IR1 ) + ( IR0 * Lm_B1( A1( ( (int64_t) RFC << 12 ) - ( ( R << 4 ) * IR1 ) ), 0 ) ) );
622 		MAC2 = A2( ( ( G << 4 ) * IR2 ) + ( IR0 * Lm_B2( A2( ( (int64_t) GFC << 12 ) - ( ( G << 4 ) * IR2 ) ), 0 ) ) );
623 		MAC3 = A3( ( ( B << 4 ) * IR3 ) + ( IR0 * Lm_B3( A3( ( (int64_t) BFC << 12 ) - ( ( B << 4 ) * IR3 ) ), 0 ) ) );
624 		IR1 = Lm_B1( MAC1, lm );
625 		IR2 = Lm_B2( MAC2, lm );
626 		IR3 = Lm_B3( MAC3, lm );
627 		RGB0 = RGB1;
628 		RGB1 = RGB2;
629 		CD2 = CODE;
630 		R2 = Lm_C1( MAC1 >> 4 );
631 		G2 = Lm_C2( MAC2 >> 4 );
632 		B2 = Lm_C3( MAC3 >> 4 );
633 		return 1;
634 
635 	case 0x16:
636 		GTELOG( pc, "%08x NCDT", gteop );
637 
638 		for( v = 0; v < 3; v++ )
639 		{
640 			MAC1 = A1( (int64_t) ( L11 * VX( v ) ) + ( L12 * VY( v ) ) + ( L13 * VZ( v ) ) );
641 			MAC2 = A2( (int64_t) ( L21 * VX( v ) ) + ( L22 * VY( v ) ) + ( L23 * VZ( v ) ) );
642 			MAC3 = A3( (int64_t) ( L31 * VX( v ) ) + ( L32 * VY( v ) ) + ( L33 * VZ( v ) ) );
643 			IR1 = Lm_B1( MAC1, lm );
644 			IR2 = Lm_B2( MAC2, lm );
645 			IR3 = Lm_B3( MAC3, lm );
646 			MAC1 = A1( int44( (int64_t) RBK << 12 ) + ( LR1 * IR1 ) + ( LR2 * IR2 ) + ( LR3 * IR3 ) );
647 			MAC2 = A2( int44( (int64_t) GBK << 12 ) + ( LG1 * IR1 ) + ( LG2 * IR2 ) + ( LG3 * IR3 ) );
648 			MAC3 = A3( int44( (int64_t) BBK << 12 ) + ( LB1 * IR1 ) + ( LB2 * IR2 ) + ( LB3 * IR3 ) );
649 			IR1 = Lm_B1( MAC1, lm );
650 			IR2 = Lm_B2( MAC2, lm );
651 			IR3 = Lm_B3( MAC3, lm );
652 			MAC1 = A1( ( ( R << 4 ) * IR1 ) + ( IR0 * Lm_B1( A1( ( (int64_t) RFC << 12 ) - ( ( R << 4 ) * IR1 ) ), 0 ) ) );
653 			MAC2 = A2( ( ( G << 4 ) * IR2 ) + ( IR0 * Lm_B2( A2( ( (int64_t) GFC << 12 ) - ( ( G << 4 ) * IR2 ) ), 0 ) ) );
654 			MAC3 = A3( ( ( B << 4 ) * IR3 ) + ( IR0 * Lm_B3( A3( ( (int64_t) BFC << 12 ) - ( ( B << 4 ) * IR3 ) ), 0 ) ) );
655 			IR1 = Lm_B1( MAC1, lm );
656 			IR2 = Lm_B2( MAC2, lm );
657 			IR3 = Lm_B3( MAC3, lm );
658 			RGB0 = RGB1;
659 			RGB1 = RGB2;
660 			CD2 = CODE;
661 			R2 = Lm_C1( MAC1 >> 4 );
662 			G2 = Lm_C2( MAC2 >> 4 );
663 			B2 = Lm_C3( MAC3 >> 4 );
664 		}
665 		return 1;
666 
667 	case 0x1b:
668 		GTELOG( pc, "%08x NCCS", gteop );
669 
670 		MAC1 = A1( (int64_t) ( L11 * VX0 ) + ( L12 * VY0 ) + ( L13 * VZ0 ) );
671 		MAC2 = A2( (int64_t) ( L21 * VX0 ) + ( L22 * VY0 ) + ( L23 * VZ0 ) );
672 		MAC3 = A3( (int64_t) ( L31 * VX0 ) + ( L32 * VY0 ) + ( L33 * VZ0 ) );
673 		IR1 = Lm_B1( MAC1, lm );
674 		IR2 = Lm_B2( MAC2, lm );
675 		IR3 = Lm_B3( MAC3, lm );
676 		MAC1 = A1( int44( (int64_t) RBK << 12 ) + ( LR1 * IR1 ) + ( LR2 * IR2 ) + ( LR3 * IR3 ) );
677 		MAC2 = A2( int44( (int64_t) GBK << 12 ) + ( LG1 * IR1 ) + ( LG2 * IR2 ) + ( LG3 * IR3 ) );
678 		MAC3 = A3( int44( (int64_t) BBK << 12 ) + ( LB1 * IR1 ) + ( LB2 * IR2 ) + ( LB3 * IR3 ) );
679 		IR1 = Lm_B1( MAC1, lm );
680 		IR2 = Lm_B2( MAC2, lm );
681 		IR3 = Lm_B3( MAC3, lm );
682 		MAC1 = A1( ( R << 4 ) * IR1 );
683 		MAC2 = A2( ( G << 4 ) * IR2 );
684 		MAC3 = A3( ( B << 4 ) * IR3 );
685 		IR1 = Lm_B1( MAC1, lm );
686 		IR2 = Lm_B2( MAC2, lm );
687 		IR3 = Lm_B3( MAC3, lm );
688 		RGB0 = RGB1;
689 		RGB1 = RGB2;
690 		CD2 = CODE;
691 		R2 = Lm_C1( MAC1 >> 4 );
692 		G2 = Lm_C2( MAC2 >> 4 );
693 		B2 = Lm_C3( MAC3 >> 4 );
694 		return 1;
695 
696 	case 0x1c:
697 		GTELOG( pc, "%08x CC", gteop );
698 
699 		MAC1 = A1( int44( ( (int64_t) RBK ) << 12 ) + ( LR1 * IR1 ) + ( LR2 * IR2 ) + ( LR3 * IR3 ) );
700 		MAC2 = A2( int44( ( (int64_t) GBK ) << 12 ) + ( LG1 * IR1 ) + ( LG2 * IR2 ) + ( LG3 * IR3 ) );
701 		MAC3 = A3( int44( ( (int64_t) BBK ) << 12 ) + ( LB1 * IR1 ) + ( LB2 * IR2 ) + ( LB3 * IR3 ) );
702 		IR1 = Lm_B1( MAC1, lm );
703 		IR2 = Lm_B2( MAC2, lm );
704 		IR3 = Lm_B3( MAC3, lm );
705 		MAC1 = A1( ( R << 4 ) * IR1 );
706 		MAC2 = A2( ( G << 4 ) * IR2 );
707 		MAC3 = A3( ( B << 4 ) * IR3 );
708 		IR1 = Lm_B1( MAC1, lm );
709 		IR2 = Lm_B2( MAC2, lm );
710 		IR3 = Lm_B3( MAC3, lm );
711 		RGB0 = RGB1;
712 		RGB1 = RGB2;
713 		CD2 = CODE;
714 		R2 = Lm_C1( MAC1 >> 4 );
715 		G2 = Lm_C2( MAC2 >> 4 );
716 		B2 = Lm_C3( MAC3 >> 4 );
717 		return 1;
718 
719 	case 0x1e:
720 		GTELOG( pc, "%08x NCS", gteop );
721 
722 		MAC1 = A1( (int64_t) ( L11 * VX0 ) + ( L12 * VY0 ) + ( L13 * VZ0 ) );
723 		MAC2 = A2( (int64_t) ( L21 * VX0 ) + ( L22 * VY0 ) + ( L23 * VZ0 ) );
724 		MAC3 = A3( (int64_t) ( L31 * VX0 ) + ( L32 * VY0 ) + ( L33 * VZ0 ) );
725 		IR1 = Lm_B1( MAC1, lm );
726 		IR2 = Lm_B2( MAC2, lm );
727 		IR3 = Lm_B3( MAC3, lm );
728 		MAC1 = A1( int44( (int64_t) RBK << 12 ) + ( LR1 * IR1 ) + ( LR2 * IR2 ) + ( LR3 * IR3 ) );
729 		MAC2 = A2( int44( (int64_t) GBK << 12 ) + ( LG1 * IR1 ) + ( LG2 * IR2 ) + ( LG3 * IR3 ) );
730 		MAC3 = A3( int44( (int64_t) BBK << 12 ) + ( LB1 * IR1 ) + ( LB2 * IR2 ) + ( LB3 * IR3 ) );
731 		IR1 = Lm_B1( MAC1, lm );
732 		IR2 = Lm_B2( MAC2, lm );
733 		IR3 = Lm_B3( MAC3, lm );
734 		RGB0 = RGB1;
735 		RGB1 = RGB2;
736 		CD2 = CODE;
737 		R2 = Lm_C1( MAC1 >> 4 );
738 		G2 = Lm_C2( MAC2 >> 4 );
739 		B2 = Lm_C3( MAC3 >> 4 );
740 		return 1;
741 
742 	case 0x20:
743 		GTELOG( pc, "%08x NCT", gteop );
744 
745 		for( v = 0; v < 3; v++ )
746 		{
747 			MAC1 = A1( (int64_t) ( L11 * VX( v ) ) + ( L12 * VY( v ) ) + ( L13 * VZ( v ) ) );
748 			MAC2 = A2( (int64_t) ( L21 * VX( v ) ) + ( L22 * VY( v ) ) + ( L23 * VZ( v ) ) );
749 			MAC3 = A3( (int64_t) ( L31 * VX( v ) ) + ( L32 * VY( v ) ) + ( L33 * VZ( v ) ) );
750 			IR1 = Lm_B1( MAC1, lm );
751 			IR2 = Lm_B2( MAC2, lm );
752 			IR3 = Lm_B3( MAC3, lm );
753 			MAC1 = A1( int44( (int64_t) RBK << 12 ) + ( LR1 * IR1 ) + ( LR2 * IR2 ) + ( LR3 * IR3 ) );
754 			MAC2 = A2( int44( (int64_t) GBK << 12 ) + ( LG1 * IR1 ) + ( LG2 * IR2 ) + ( LG3 * IR3 ) );
755 			MAC3 = A3( int44( (int64_t) BBK << 12 ) + ( LB1 * IR1 ) + ( LB2 * IR2 ) + ( LB3 * IR3 ) );
756 			IR1 = Lm_B1( MAC1, lm );
757 			IR2 = Lm_B2( MAC2, lm );
758 			IR3 = Lm_B3( MAC3, lm );
759 			RGB0 = RGB1;
760 			RGB1 = RGB2;
761 			CD2 = CODE;
762 			R2 = Lm_C1( MAC1 >> 4 );
763 			G2 = Lm_C2( MAC2 >> 4 );
764 			B2 = Lm_C3( MAC3 >> 4 );
765 		}
766 		return 1;
767 
768 	case 0x28:
769 		GTELOG( pc, "%08x SQR", gteop );
770 
771 		MAC1 = A1( IR1 * IR1 );
772 		MAC2 = A2( IR2 * IR2 );
773 		MAC3 = A3( IR3 * IR3 );
774 		IR1 = Lm_B1( MAC1, lm );
775 		IR2 = Lm_B2( MAC2, lm );
776 		IR3 = Lm_B3( MAC3, lm );
777 		return 1;
778 
779 	case 0x1a: // end of NCDT
780 	case 0x29:
781 		GTELOG( pc, "%08x DPCL", gteop );
782 
783 		MAC1 = A1( ( ( R << 4 ) * IR1 ) + ( IR0 * Lm_B1( A1( ( (int64_t) RFC << 12 ) - ( ( R << 4 ) * IR1 ) ), 0 ) ) );
784 		MAC2 = A2( ( ( G << 4 ) * IR2 ) + ( IR0 * Lm_B2( A2( ( (int64_t) GFC << 12 ) - ( ( G << 4 ) * IR2 ) ), 0 ) ) );
785 		MAC3 = A3( ( ( B << 4 ) * IR3 ) + ( IR0 * Lm_B3( A3( ( (int64_t) BFC << 12 ) - ( ( B << 4 ) * IR3 ) ), 0 ) ) );
786 		IR1 = Lm_B1( MAC1, lm );
787 		IR2 = Lm_B2( MAC2, lm );
788 		IR3 = Lm_B3( MAC3, lm );
789 		RGB0 = RGB1;
790 		RGB1 = RGB2;
791 		CD2 = CODE;
792 		R2 = Lm_C1( MAC1 >> 4 );
793 		G2 = Lm_C2( MAC2 >> 4 );
794 		B2 = Lm_C3( MAC3 >> 4 );
795 		return 1;
796 
797 	case 0x2a:
798 		GTELOG( pc, "%08x DPCT", gteop );
799 
800 		for( v = 0; v < 3; v++ )
801 		{
802 			MAC1 = A1( ( R0 << 16 ) + ( IR0 * Lm_B1( A1( ( (int64_t) RFC << 12 ) - ( R0 << 16 ) ), 0 ) ) );
803 			MAC2 = A2( ( G0 << 16 ) + ( IR0 * Lm_B2( A2( ( (int64_t) GFC << 12 ) - ( G0 << 16 ) ), 0 ) ) );
804 			MAC3 = A3( ( B0 << 16 ) + ( IR0 * Lm_B3( A3( ( (int64_t) BFC << 12 ) - ( B0 << 16 ) ), 0 ) ) );
805 			IR1 = Lm_B1( MAC1, lm );
806 			IR2 = Lm_B2( MAC2, lm );
807 			IR3 = Lm_B3( MAC3, lm );
808 			RGB0 = RGB1;
809 			RGB1 = RGB2;
810 			CD2 = CODE;
811 			R2 = Lm_C1( MAC1 >> 4 );
812 			G2 = Lm_C2( MAC2 >> 4 );
813 			B2 = Lm_C3( MAC3 >> 4 );
814 		}
815 		return 1;
816 
817 	case 0x2d:
818 		GTELOG( pc, "%08x AVSZ3", gteop );
819 
820 		MAC0 = F( (int64_t) ( ZSF3 * SZ1 ) + ( ZSF3 * SZ2 ) + ( ZSF3 * SZ3 ) );
821 		OTZ = Lm_D( m_mac0, 1 );
822 		return 1;
823 
824 	case 0x2e:
825 		GTELOG( pc, "%08x AVSZ4", gteop );
826 
827 		MAC0 = F( (int64_t) ( ZSF4 * SZ0 ) + ( ZSF4 * SZ1 ) + ( ZSF4 * SZ2 ) + ( ZSF4 * SZ3 ) );
828 		OTZ = Lm_D( m_mac0, 1 );
829 		return 1;
830 
831 	case 0x30:
832 		GTELOG( pc, "%08x RTPT", gteop );
833 
834 		for( v = 0; v < 3; v++ )
835 		{
836 			MAC1 = A1( int44( (int64_t) TRX << 12 ) + ( R11 * VX( v ) ) + ( R12 * VY( v ) ) + ( R13 * VZ( v ) ) );
837 			MAC2 = A2( int44( (int64_t) TRY << 12 ) + ( R21 * VX( v ) ) + ( R22 * VY( v ) ) + ( R23 * VZ( v ) ) );
838 			MAC3 = A3( int44( (int64_t) TRZ << 12 ) + ( R31 * VX( v ) ) + ( R32 * VY( v ) ) + ( R33 * VZ( v ) ) );
839 			IR1 = Lm_B1( MAC1, lm );
840 			IR2 = Lm_B2( MAC2, lm );
841 			IR3 = Lm_B3_sf( m_mac3, m_sf, lm );
842 			SZ0 = SZ1;
843 			SZ1 = SZ2;
844 			SZ2 = SZ3;
845 			SZ3 = Lm_D( m_mac3, 1 );
846 			h_over_sz3 = Lm_E( gte_divide( H, SZ3 ) );
847 			SXY0 = SXY1;
848 			SXY1 = SXY2;
849 			SX2 = Lm_G1( F( (int64_t) OFX + ( (int64_t) IR1 * h_over_sz3 ) ) >> 16 );
850 			SY2 = Lm_G2( F( (int64_t) OFY + ( (int64_t) IR2 * h_over_sz3 ) ) >> 16 );
851 		}
852 
853 		MAC0 = F( (int64_t) DQB + ( (int64_t) DQA * h_over_sz3 ) );
854 		IR0 = Lm_H( m_mac0, 1 );
855 		return 1;
856 
857 	case 0x3d:
858 		GTELOG( pc, "%08x GPF", gteop );
859 
860 		MAC1 = A1( IR0 * IR1 );
861 		MAC2 = A2( IR0 * IR2 );
862 		MAC3 = A3( IR0 * IR3 );
863 		IR1 = Lm_B1( MAC1, lm );
864 		IR2 = Lm_B2( MAC2, lm );
865 		IR3 = Lm_B3( MAC3, lm );
866 		RGB0 = RGB1;
867 		RGB1 = RGB2;
868 		CD2 = CODE;
869 		R2 = Lm_C1( MAC1 >> 4 );
870 		G2 = Lm_C2( MAC2 >> 4 );
871 		B2 = Lm_C3( MAC3 >> 4 );
872 		return 1;
873 
874 	case 0x3e:
875 		GTELOG( pc, "%08x GPL", gteop );
876 
877 		MAC1 = A1( gte_shift( MAC1, -m_sf ) + ( IR0 * IR1 ) );
878 		MAC2 = A2( gte_shift( MAC2, -m_sf ) + ( IR0 * IR2 ) );
879 		MAC3 = A3( gte_shift( MAC3, -m_sf ) + ( IR0 * IR3 ) );
880 		IR1 = Lm_B1( MAC1, lm );
881 		IR2 = Lm_B2( MAC2, lm );
882 		IR3 = Lm_B3( MAC3, lm );
883 		RGB0 = RGB1;
884 		RGB1 = RGB2;
885 		CD2 = CODE;
886 		R2 = Lm_C1( MAC1 >> 4 );
887 		G2 = Lm_C2( MAC2 >> 4 );
888 		B2 = Lm_C3( MAC3 >> 4 );
889 		return 1;
890 
891 	case 0x3f:
892 		GTELOG( pc, "%08x NCCT", gteop );
893 
894 		for( v = 0; v < 3; v++ )
895 		{
896 			MAC1 = A1( (int64_t) ( L11 * VX( v ) ) + ( L12 * VY( v ) ) + ( L13 * VZ( v ) ) );
897 			MAC2 = A2( (int64_t) ( L21 * VX( v ) ) + ( L22 * VY( v ) ) + ( L23 * VZ( v ) ) );
898 			MAC3 = A3( (int64_t) ( L31 * VX( v ) ) + ( L32 * VY( v ) ) + ( L33 * VZ( v ) ) );
899 			IR1 = Lm_B1( MAC1, lm );
900 			IR2 = Lm_B2( MAC2, lm );
901 			IR3 = Lm_B3( MAC3, lm );
902 			MAC1 = A1( int44( (int64_t) RBK << 12 ) + ( LR1 * IR1 ) + ( LR2 * IR2 ) + ( LR3 * IR3 ) );
903 			MAC2 = A2( int44( (int64_t) GBK << 12 ) + ( LG1 * IR1 ) + ( LG2 * IR2 ) + ( LG3 * IR3 ) );
904 			MAC3 = A3( int44( (int64_t) BBK << 12 ) + ( LB1 * IR1 ) + ( LB2 * IR2 ) + ( LB3 * IR3 ) );
905 			IR1 = Lm_B1( MAC1, lm );
906 			IR2 = Lm_B2( MAC2, lm );
907 			IR3 = Lm_B3( MAC3, lm );
908 			MAC1 = A1( ( R << 4 ) * IR1 );
909 			MAC2 = A2( ( G << 4 ) * IR2 );
910 			MAC3 = A3( ( B << 4 ) * IR3 );
911 			IR1 = Lm_B1( MAC1, lm );
912 			IR2 = Lm_B2( MAC2, lm );
913 			IR3 = Lm_B3( MAC3, lm );
914 			RGB0 = RGB1;
915 			RGB1 = RGB2;
916 			CD2 = CODE;
917 			R2 = Lm_C1( MAC1 >> 4 );
918 			G2 = Lm_C2( MAC2 >> 4 );
919 			B2 = Lm_C3( MAC3 >> 4 );
920 		}
921 		return 1;
922 	}
923 
924 	//popmessage( "unknown GTE op %08x", gteop );
925 	//logerror( "%08x: unknown GTE op %08x\n", pc, gteop );
926 
927 	return 0;
928 }
929