1 //
2 // Blitter core
3 //
4 // by James Hammons
5 // (C) 2010 Underground Software
6 //
7 // JLH = James Hammons <jlhamm@acm.org>
8 //
9 // Who  When        What
10 // ---  ----------  -------------------------------------------------------------
11 // JLH  01/16/2010  Created this log ;-)
12 //
13 
14 //
15 // I owe a debt of gratitude to Curt Vendel and to John Mathieson--to Curt
16 // for supplying the Oberon ASIC nets and to John for making them available
17 // to Curt. ;-) Without that excellent documentation which shows *exactly*
18 // what's going on inside the TOM chip, we'd all still be guessing as to how
19 // the wily blitter and other pieces of the Jaguar puzzle actually work.
20 // Now how about those JERRY ASIC nets gentlemen...? [We have those now!] ;-)
21 //
22 
23 #include "blitter.h"
24 
25 #include <stdlib.h>
26 #include <stdio.h>
27 #include <string.h>
28 #include "jaguar.h"
29 #include "log.h"
30 #include "settings.h"
31 
32 // Various conditional compilation goodies...
33 
34 #define USE_ORIGINAL_BLITTER
35 #define USE_MIDSUMMER_BLITTER_MKII
36 
37 // External global variables
38 
39 extern int jaguar_active_memory_dumps;
40 
41 // Local global variables
42 
43 // Blitter register RAM (most of it is hidden from the user)
44 
45 static uint8_t blitter_ram[0x100];
46 
47 // Other crapola
48 
49 void BlitterMidsummer(uint32_t cmd);
50 void BlitterMidsummer2(void);
51 
52 #define REG(A)	(((uint32_t)blitter_ram[(A)] << 24) | ((uint32_t)blitter_ram[(A)+1] << 16) \
53 				| ((uint32_t)blitter_ram[(A)+2] << 8) | (uint32_t)blitter_ram[(A)+3])
54 #define WREG(A,D)	(blitter_ram[(A)] = ((D)>>24)&0xFF, blitter_ram[(A)+1] = ((D)>>16)&0xFF, \
55 					blitter_ram[(A)+2] = ((D)>>8)&0xFF, blitter_ram[(A)+3] = (D)&0xFF)
56 
57 // Blitter registers (offsets from F02200)
58 
59 #define A1_BASE			((uint32_t)0x00)
60 #define A1_FLAGS		   ((uint32_t)0x04)
61 #define A1_CLIP			((uint32_t)0x08)	// Height and width values for clipping
62 #define A1_PIXEL		   ((uint32_t)0x0C)	// Integer part of the pixel (Y.i and X.i)
63 #define A1_STEP			((uint32_t)0x10)	// Integer part of the step
64 #define A1_FSTEP		   ((uint32_t)0x14)	// Fractional part of the step
65 #define A1_FPIXEL		   ((uint32_t)0x18)	// Fractional part of the pixel (Y.f and X.f)
66 #define A1_INC			   ((uint32_t)0x1C)	// Integer part of the increment
67 #define A1_FINC			((uint32_t)0x20)	// Fractional part of the increment
68 #define A2_BASE			((uint32_t)0x24)
69 #define A2_FLAGS		   ((uint32_t)0x28)
70 #define A2_MASK			((uint32_t)0x2C)	// Modulo values for x and y (M.y  and M.x)
71 #define A2_PIXEL		   ((uint32_t)0x30)	// Integer part of the pixel (no fractional part for A2)
72 #define A2_STEP			((uint32_t)0x34)	// Integer part of the step (no fractional part for A2)
73 #define COMMAND			((uint32_t)0x38)
74 #define PIXLINECOUNTER	((uint32_t)0x3C)	// Inner & outer loop values
75 #define SRCDATA			((uint32_t)0x40)
76 #define DSTDATA			((uint32_t)0x48)
77 #define DSTZ			   ((uint32_t)0x50)
78 #define SRCZINT			((uint32_t)0x58)
79 #define SRCZFRAC		   ((uint32_t)0x60)
80 #define PATTERNDATA		((uint32_t)0x68)
81 #define INTENSITYINC	   ((uint32_t)0x70)
82 #define ZINC			   ((uint32_t)0x74)
83 #define COLLISIONCTRL	((uint32_t)0x78)
84 #define PHRASEINT0		((uint32_t)0x7C)
85 #define PHRASEINT1	   ((uint32_t)0x80)
86 #define PHRASEINT2	   ((uint32_t)0x84)
87 #define PHRASEINT3	   ((uint32_t)0x88)
88 #define PHRASEZ0		   ((uint32_t)0x8C)
89 #define PHRASEZ1		   ((uint32_t)0x90)
90 #define PHRASEZ2		   ((uint32_t)0x94)
91 #define PHRASEZ3		   ((uint32_t)0x98)
92 
93 // Blitter command bits
94 
95 #define SRCEN			(cmd & 0x00000001)
96 #define SRCENZ			(cmd & 0x00000002)
97 #define SRCENX			(cmd & 0x00000004)
98 #define DSTEN			(cmd & 0x00000008)
99 #define DSTENZ			(cmd & 0x00000010)
100 #define DSTWRZ			(cmd & 0x00000020)
101 #define CLIPA1			(cmd & 0x00000040)
102 
103 #define UPDA1F			(cmd & 0x00000100)
104 #define UPDA1			(cmd & 0x00000200)
105 #define UPDA2			(cmd & 0x00000400)
106 
107 #define DSTA2			(cmd & 0x00000800)
108 
109 #define Z_OP_INF		(cmd & 0x00040000)
110 #define Z_OP_EQU		(cmd & 0x00080000)
111 #define Z_OP_SUP		(cmd & 0x00100000)
112 
113 #define LFU_NAN		(cmd & 0x00200000)
114 #define LFU_NA			(cmd & 0x00400000)
115 #define LFU_AN			(cmd & 0x00800000)
116 #define LFU_A			(cmd & 0x01000000)
117 
118 #define CMPDST			(cmd & 0x02000000)
119 #define BCOMPEN		(cmd & 0x04000000)
120 #define DCOMPEN		(cmd & 0x08000000)
121 
122 #define PATDSEL		(cmd & 0x00010000)
123 #define ADDDSEL		(cmd & 0x00020000)
124 #define TOPBEN			(cmd & 0x00004000)
125 #define TOPNEN			(cmd & 0x00008000)
126 #define BKGWREN		(cmd & 0x10000000)
127 #define GOURD			(cmd & 0x00001000)
128 #define GOURZ			(cmd & 0x00002000)
129 #define SRCSHADE		(cmd & 0x40000000)
130 
131 
132 #define XADDPHR      0
133 #define XADDPIX      1
134 #define XADD0        2
135 #define XADDINC      3
136 
137 #define XSIGNSUB_A1		(REG(A1_FLAGS)&0x080000)
138 #define XSIGNSUB_A2		(REG(A2_FLAGS)&0x080000)
139 
140 #define YSIGNSUB_A1		(REG(A1_FLAGS)&0x100000)
141 #define YSIGNSUB_A2		(REG(A2_FLAGS)&0x100000)
142 
143 #define YADD1_A1		(REG(A1_FLAGS)&0x040000)
144 #define YADD1_A2		(REG(A2_FLAGS)&0x040000)
145 
146 /*******************************************************************************
147 ********************** STUFF CUT BELOW THIS LINE! ******************************
148 *******************************************************************************/
149 #ifdef USE_ORIGINAL_BLITTER										// We're ditching this crap for now...
150 
151 //Put 'em back, once we fix the problem!!! [KO]
152 // 1 bpp pixel read
153 #define PIXEL_SHIFT_1(a)      (((~a##_x) >> 16) & 7)
154 #define PIXEL_OFFSET_1(a)     (((((uint32_t)a##_y >> 16) * a##_width / 8) + (((uint32_t)a##_x >> 19) & ~7)) * (1 + a##_pitch) + (((uint32_t)a##_x >> 19) & 7))
155 #define READ_PIXEL_1(a)       ((JaguarReadByte(a##_addr+PIXEL_OFFSET_1(a), BLITTER) >> PIXEL_SHIFT_1(a)) & 0x01)
156 
157 // 2 bpp pixel read
158 #define PIXEL_SHIFT_2(a)      (((~a##_x) >> 15) & 6)
159 #define PIXEL_OFFSET_2(a)     (((((uint32_t)a##_y >> 16) * a##_width / 4) + (((uint32_t)a##_x >> 18) & ~7)) * (1 + a##_pitch) + (((uint32_t)a##_x >> 18) & 7))
160 #define READ_PIXEL_2(a)       ((JaguarReadByte(a##_addr+PIXEL_OFFSET_2(a), BLITTER) >> PIXEL_SHIFT_2(a)) & 0x03)
161 
162 // 4 bpp pixel read
163 #define PIXEL_SHIFT_4(a)      (((~a##_x) >> 14) & 4)
164 #define PIXEL_OFFSET_4(a)     (((((uint32_t)a##_y >> 16) * (a##_width/2)) + (((uint32_t)a##_x >> 17) & ~7)) * (1 + a##_pitch) + (((uint32_t)a##_x >> 17) & 7))
165 #define READ_PIXEL_4(a)       ((JaguarReadByte(a##_addr+PIXEL_OFFSET_4(a), BLITTER) >> PIXEL_SHIFT_4(a)) & 0x0f)
166 
167 // 8 bpp pixel read
168 #define PIXEL_OFFSET_8(a)     (((((uint32_t)a##_y >> 16) * a##_width) + (((uint32_t)a##_x >> 16) & ~7)) * (1 + a##_pitch) + (((uint32_t)a##_x >> 16) & 7))
169 #define READ_PIXEL_8(a)       (JaguarReadByte(a##_addr+PIXEL_OFFSET_8(a), BLITTER))
170 
171 // 16 bpp pixel read
172 #define PIXEL_OFFSET_16(a)    (((((uint32_t)a##_y >> 16) * a##_width) + (((uint32_t)a##_x >> 16) & ~3)) * (1 + a##_pitch) + (((uint32_t)a##_x >> 16) & 3))
173 #define READ_PIXEL_16(a)       (JaguarReadWord(a##_addr+(PIXEL_OFFSET_16(a)<<1), BLITTER))
174 
175 // 32 bpp pixel read
176 #define PIXEL_OFFSET_32(a)    (((((uint32_t)a##_y >> 16) * a##_width) + (((uint32_t)a##_x >> 16) & ~1)) * (1 + a##_pitch) + (((uint32_t)a##_x >> 16) & 1))
177 #define READ_PIXEL_32(a)      (JaguarReadLong(a##_addr+(PIXEL_OFFSET_32(a)<<2), BLITTER))
178 
179 // pixel read
180 #define READ_PIXEL(a,f) (\
181 	 (((f>>3)&0x07) == 0) ? (READ_PIXEL_1(a)) : \
182 	 (((f>>3)&0x07) == 1) ? (READ_PIXEL_2(a)) : \
183 	 (((f>>3)&0x07) == 2) ? (READ_PIXEL_4(a)) : \
184 	 (((f>>3)&0x07) == 3) ? (READ_PIXEL_8(a)) : \
185 	 (((f>>3)&0x07) == 4) ? (READ_PIXEL_16(a)) : \
186 	 (((f>>3)&0x07) == 5) ? (READ_PIXEL_32(a)) : 0)
187 
188 // 16 bpp z data read
189 #define ZDATA_OFFSET_16(a)     (PIXEL_OFFSET_16(a) + a##_zoffs * 4)
190 #define READ_ZDATA_16(a)       (JaguarReadWord(a##_addr+(ZDATA_OFFSET_16(a)<<1), BLITTER))
191 
192 // z data read
193 #define READ_ZDATA(a,f) (READ_ZDATA_16(a))
194 
195 // 16 bpp z data write
196 #define WRITE_ZDATA_16(a,d)     {  JaguarWriteWord(a##_addr+(ZDATA_OFFSET_16(a)<<1), d, BLITTER); }
197 
198 // z data write
199 #define WRITE_ZDATA(a,f,d) WRITE_ZDATA_16(a,d);
200 
201 // 1 bpp r data read
202 #define READ_RDATA_1(r,a,p)  ((p) ?  ((REG(r+(((uint32_t)a##_x >> 19) & 0x04))) >> (((uint32_t)a##_x >> 16) & 0x1F)) & 0x0001 : (REG(r) & 0x0001))
203 
204 // 2 bpp r data read
205 #define READ_RDATA_2(r,a,p)  ((p) ?  ((REG(r+(((uint32_t)a##_x >> 18) & 0x04))) >> (((uint32_t)a##_x >> 15) & 0x3E)) & 0x0003 : (REG(r) & 0x0003))
206 
207 // 4 bpp r data read
208 #define READ_RDATA_4(r,a,p)  ((p) ?  ((REG(r+(((uint32_t)a##_x >> 17) & 0x04))) >> (((uint32_t)a##_x >> 14) & 0x28)) & 0x000F : (REG(r) & 0x000F))
209 
210 // 8 bpp r data read
211 #define READ_RDATA_8(r,a,p)  ((p) ?  ((REG(r+(((uint32_t)a##_x >> 16) & 0x04))) >> (((uint32_t)a##_x >> 13) & 0x18)) & 0x00FF : (REG(r) & 0x00FF))
212 
213 // 16 bpp r data read
214 #define READ_RDATA_16(r,a,p)  ((p) ? ((REG(r+(((uint32_t)a##_x >> 15) & 0x04))) >> (((uint32_t)a##_x >> 12) & 0x10)) & 0xFFFF : (REG(r) & 0xFFFF))
215 
216 // 32 bpp r data read
217 #define READ_RDATA_32(r,a,p)  ((p) ? REG(r+(((uint32_t)a##_x >> 14) & 0x04)) : REG(r))
218 
219 // register data read
220 #define READ_RDATA(r,a,f,p) (\
221 	 (((f>>3)&0x07) == 0) ? (READ_RDATA_1(r,a,p)) : \
222 	 (((f>>3)&0x07) == 1) ? (READ_RDATA_2(r,a,p)) : \
223 	 (((f>>3)&0x07) == 2) ? (READ_RDATA_4(r,a,p)) : \
224 	 (((f>>3)&0x07) == 3) ? (READ_RDATA_8(r,a,p)) : \
225 	 (((f>>3)&0x07) == 4) ? (READ_RDATA_16(r,a,p)) : \
226 	 (((f>>3)&0x07) == 5) ? (READ_RDATA_32(r,a,p)) : 0)
227 
228 // 1 bpp pixel write
229 #define WRITE_PIXEL_1(a,d)       { JaguarWriteByte(a##_addr+PIXEL_OFFSET_1(a), (JaguarReadByte(a##_addr+PIXEL_OFFSET_1(a), BLITTER)&(~(0x01 << PIXEL_SHIFT_1(a))))|(d<<PIXEL_SHIFT_1(a)), BLITTER); }
230 
231 // 2 bpp pixel write
232 #define WRITE_PIXEL_2(a,d)       { JaguarWriteByte(a##_addr+PIXEL_OFFSET_2(a), (JaguarReadByte(a##_addr+PIXEL_OFFSET_2(a), BLITTER)&(~(0x03 << PIXEL_SHIFT_2(a))))|(d<<PIXEL_SHIFT_2(a)), BLITTER); }
233 
234 // 4 bpp pixel write
235 #define WRITE_PIXEL_4(a,d)       { JaguarWriteByte(a##_addr+PIXEL_OFFSET_4(a), (JaguarReadByte(a##_addr+PIXEL_OFFSET_4(a), BLITTER)&(~(0x0f << PIXEL_SHIFT_4(a))))|(d<<PIXEL_SHIFT_4(a)), BLITTER); }
236 
237 // 8 bpp pixel write
238 #define WRITE_PIXEL_8(a,d)       { JaguarWriteByte(a##_addr+PIXEL_OFFSET_8(a), d, BLITTER); }
239 
240 // 16 bpp pixel write
241 #define WRITE_PIXEL_16(a,d)     {  JaguarWriteWord(a##_addr+(PIXEL_OFFSET_16(a)<<1), d, BLITTER); }
242 
243 // 32 bpp pixel write
244 #define WRITE_PIXEL_32(a,d)		{ JaguarWriteLong(a##_addr+(PIXEL_OFFSET_32(a)<<2), d, BLITTER); }
245 
246 // pixel write
247 #define WRITE_PIXEL(a,f,d) {\
248 	switch ((f>>3)&0x07) { \
249 	case 0: WRITE_PIXEL_1(a,d);  break;  \
250 	case 1: WRITE_PIXEL_2(a,d);  break;  \
251 	case 2: WRITE_PIXEL_4(a,d);  break;  \
252 	case 3: WRITE_PIXEL_8(a,d);  break;  \
253 	case 4: WRITE_PIXEL_16(a,d); break;  \
254 	case 5: WRITE_PIXEL_32(a,d); break;  \
255 	}}
256 
257 static uint8_t src;
258 static uint8_t dst;
259 static uint8_t misc;
260 static uint8_t a1ctl;
261 static uint8_t mode;
262 static uint8_t ity;
263 static uint8_t zop;
264 static uint8_t op;
265 static uint8_t ctrl;
266 static uint32_t a1_addr;
267 static uint32_t a2_addr;
268 static int32_t a1_zoffs;
269 static int32_t a2_zoffs;
270 static uint32_t xadd_a1_control;
271 static uint32_t xadd_a2_control;
272 static int32_t a1_pitch;
273 static int32_t a2_pitch;
274 static uint32_t n_pixels;
275 static uint32_t n_lines;
276 static int32_t a1_x;
277 static int32_t a1_y;
278 static int32_t a1_width;
279 static int32_t a2_x;
280 static int32_t a2_y;
281 static int32_t a2_width;
282 static int32_t a2_mask_x;
283 static int32_t a2_mask_y;
284 static int32_t a1_xadd;
285 static int32_t a1_yadd;
286 static int32_t a2_xadd;
287 static int32_t a2_yadd;
288 static uint8_t a1_phrase_mode;
289 static uint8_t a2_phrase_mode;
290 static int32_t a1_step_x = 0;
291 static int32_t a1_step_y = 0;
292 static int32_t a2_step_x = 0;
293 static int32_t a2_step_y = 0;
294 static uint32_t outer_loop;
295 static uint32_t inner_loop;
296 static uint32_t a2_psize;
297 static uint32_t a1_psize;
298 static uint32_t gouraud_add;
299 static int gd_i[4];
300 static int gd_c[4];
301 static int gd_ia, gd_ca;
302 static int colour_index = 0;
303 static int32_t zadd;
304 static uint32_t z_i[4];
305 
306 static int32_t a1_clip_x, a1_clip_y;
307 
308 // In the spirit of "get it right first, *then* optimize" I've taken the liberty
309 // of removing all the unnecessary code caching. If it turns out to be a good way
310 // to optimize the blitter, then we may revisit it in the future...
311 
312 // Generic blit handler
blitter_generic(uint32_t cmd)313 void blitter_generic(uint32_t cmd)
314 {
315    uint32_t srcdata, srczdata, dstdata, dstzdata, writedata, inhibit;
316    uint32_t bppSrc = (DSTA2 ? 1 << ((REG(A1_FLAGS) >> 3) & 0x07) : 1 << ((REG(A2_FLAGS) >> 3) & 0x07));
317 
318    while (outer_loop--)
319    {
320       uint32_t a1_start = a1_x, a2_start = a2_x, bitPos = 0;
321 
322       //Kludge for Hover Strike...
323       //I wonder if this kludge is in conjunction with the SRCENX down below...
324       // This isn't so much a kludge but the way things work in BCOMPEN mode...!
325       if (BCOMPEN && SRCENX)
326       {
327          if (n_pixels < bppSrc)
328             bitPos = bppSrc - n_pixels;
329       }
330 
331       inner_loop = n_pixels;
332       while (inner_loop--)
333       {
334          srcdata = srczdata = dstdata = dstzdata = writedata = inhibit = 0;
335 
336          if (!DSTA2)							// Data movement: A1 <- A2
337          {
338             // load src data and Z
339             //				if (SRCEN)
340             if (SRCEN || SRCENX)	// Not sure if this is correct... (seems to be...!)
341             {
342                srcdata = READ_PIXEL(a2, REG(A2_FLAGS));
343 
344                if (SRCENZ)
345                   srczdata = READ_ZDATA(a2, REG(A2_FLAGS));
346                else if (cmd & 0x0001C020)	// PATDSEL | TOPBEN | TOPNEN | DSTWRZ
347                   srczdata = READ_RDATA(SRCZINT, a2, REG(A2_FLAGS), a2_phrase_mode);
348             }
349             else	// Use SRCDATA register...
350             {
351                srcdata = READ_RDATA(SRCDATA, a2, REG(A2_FLAGS), a2_phrase_mode);
352 
353                if (cmd & 0x0001C020)		// PATDSEL | TOPBEN | TOPNEN | DSTWRZ
354                   srczdata = READ_RDATA(SRCZINT, a2, REG(A2_FLAGS), a2_phrase_mode);
355             }
356 
357             // load dst data and Z
358             if (DSTEN)
359             {
360                dstdata = READ_PIXEL(a1, REG(A1_FLAGS));
361 
362                if (DSTENZ)
363                   dstzdata = READ_ZDATA(a1, REG(A1_FLAGS));
364                else
365                   dstzdata = READ_RDATA(DSTZ, a1, REG(A1_FLAGS), a1_phrase_mode);
366             }
367             else
368             {
369                dstdata = READ_RDATA(DSTDATA, a1, REG(A1_FLAGS), a1_phrase_mode);
370 
371                if (DSTENZ)
372                   dstzdata = READ_RDATA(DSTZ, a1, REG(A1_FLAGS), a1_phrase_mode);
373             }
374 
375             if (GOURZ)
376                srczdata = z_i[colour_index] >> 16;
377 
378             // apply z comparator
379             if (Z_OP_INF && srczdata <  dstzdata)	inhibit = 1;
380             if (Z_OP_EQU && srczdata == dstzdata)	inhibit = 1;
381             if (Z_OP_SUP && srczdata >  dstzdata)	inhibit = 1;
382 
383             // apply data comparator
384             // Note: DCOMPEN only works in 8/16 bpp modes! !!! FIX !!!
385             // Does BCOMPEN only work in 1 bpp mode???
386             //   No, but it always does a 1 bit expansion no matter what the BPP of the channel is set to. !!! FIX !!!
387             //   This is bit tricky... We need to fix the XADD value so that it acts like a 1BPP value while inside
388             //   an 8BPP space.
389             if (DCOMPEN | BCOMPEN)
390             {
391                //Temp, for testing Hover Strike
392                //Doesn't seem to do it... Why?
393                //What needs to happen here is twofold. First, the address generator in the outer loop has
394                //to honor the BPP when calculating the start address (which it kinda does already). Second,
395                //it has to step bit by bit when using BCOMPEN. How to do this???
396                if (BCOMPEN)
397                   //small problem with this approach: it's not accurate... We need a proper address to begin with
398                   //and *then* we can do the bit stepping from there the way it's *supposed* to be done... !!! FIX !!!
399                   //[DONE]
400                {
401                   uint32_t pixShift = (~bitPos) & (bppSrc - 1);
402                   srcdata = (srcdata >> pixShift) & 0x01;
403 
404                   bitPos++;
405                }
406 
407                if (!CMPDST)
408                {
409                   if (srcdata == 0)
410                      inhibit = 1;//*/
411                }
412                else
413                {
414                   // compare destination pixel with pattern pixel
415                   if (dstdata == READ_RDATA(PATTERNDATA, a1, REG(A1_FLAGS), a1_phrase_mode))
416                      //						if (dstdata != READ_RDATA(PATTERNDATA, a1, REG(A1_FLAGS), a1_phrase_mode))
417                      inhibit = 1;
418                }
419             }
420 
421             if (CLIPA1)
422             {
423                inhibit |= (((a1_x >> 16) < a1_clip_x && (a1_x >> 16) >= 0
424                         && (a1_y >> 16) < a1_clip_y && (a1_y >> 16) >= 0) ? 0 : 1);
425             }
426 
427             // compute the write data and store
428             if (!inhibit)
429             {
430                // Houston, we have a problem...
431                // Look here, at PATDSEL and GOURD. If both are active (as they are on the BIOS intro), then there's
432                // a conflict! E.g.:
433                //Blit! (00100000 <- 000095D0) count: 3 x 1, A1/2_FLAGS: 00014220/00004020 [cmd: 00011008]
434                // CMD -> src:  dst: DSTEN  misc:  a1ctl:  mode: GOURD  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl:
435                //  A1 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
436                //  A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 256 (20), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
437                //        A1 x/y: 90/171, A2 x/y: 808/0 Pattern: 776D770077007700
438 
439                if (PATDSEL)
440                {
441                   // use pattern data for write data
442                   writedata = READ_RDATA(PATTERNDATA, a1, REG(A1_FLAGS), a1_phrase_mode);
443                }
444                else if (ADDDSEL)
445                {
446                   writedata = (srcdata & 0xFF) + (dstdata & 0xFF);
447 
448                   if (!TOPBEN)
449                   {
450                      //This is correct now, but slow...
451                      int16_t s = (srcdata & 0xFF) | ((srcdata & 0x80) ? 0xFF00 : 0x0000),
452                              d = dstdata & 0xFF;
453                      int16_t sum = s + d;
454 
455                      if (sum < 0)
456                         writedata = 0x00;
457                      else if (sum > 0xFF)
458                         writedata = 0xFF;
459                      else
460                         writedata = (uint32_t)sum;
461                   }
462 
463                   //This doesn't seem right... Looks like it would muck up the low byte... !!! FIX !!!
464                   writedata |= (srcdata & 0xF00) + (dstdata & 0xF00);
465 
466                   if (!TOPNEN && writedata > 0xFFF)
467                      writedata &= 0xFFF;
468 
469                   writedata |= (srcdata & 0xF000) + (dstdata & 0xF000);
470                }
471                else
472                {
473                   if (LFU_NAN) writedata |= ~srcdata & ~dstdata;
474                   if (LFU_NA)  writedata |= ~srcdata & dstdata;
475                   if (LFU_AN)  writedata |= srcdata  & ~dstdata;
476                   if (LFU_A) 	 writedata |= srcdata  & dstdata;
477                }
478 
479                //Although, this looks like it's OK... (even if it is shitty!)
480                //According to JTRM, this is part of the four things the blitter does with the write data (the other
481                //three being PATDSEL, ADDDSEL, and LFU (default). I'm not sure which gets precedence, this or PATDSEL
482                //(see above blit example)...
483                if (GOURD)
484                   writedata = ((gd_c[colour_index]) << 8) | (gd_i[colour_index] >> 16);
485 
486                if (SRCSHADE)
487                {
488                   int intensity = srcdata & 0xFF;
489                   int ia = gd_ia >> 16;
490                   if (ia & 0x80)
491                      ia = 0xFFFFFF00 | ia;
492                   intensity += ia;
493                   if (intensity < 0)
494                      intensity = 0;
495                   if (intensity > 0xFF)
496                      intensity = 0xFF;
497                   writedata = (srcdata & 0xFF00) | intensity;
498                }
499             }
500             else
501             {
502                writedata = dstdata;
503                srczdata = dstzdata;
504             }
505 
506             //Tried 2nd below for Hover Strike: No dice.
507             if (/*a1_phrase_mode || */BKGWREN || !inhibit)
508                //				if (/*a1_phrase_mode || BKGWREN ||*/ !inhibit)
509             {
510                // write to the destination
511                WRITE_PIXEL(a1, REG(A1_FLAGS), writedata);
512                if (DSTWRZ)
513                   WRITE_ZDATA(a1, REG(A1_FLAGS), srczdata);
514             }
515          }
516          else	// if (DSTA2) 							// Data movement: A1 -> A2
517          {
518             // load src data and Z
519             if (SRCEN)
520             {
521                srcdata = READ_PIXEL(a1, REG(A1_FLAGS));
522                if (SRCENZ)
523                   srczdata = READ_ZDATA(a1, REG(A1_FLAGS));
524                else if (cmd & 0x0001C020)	// PATDSEL | TOPBEN | TOPNEN | DSTWRZ
525                   srczdata = READ_RDATA(SRCZINT, a1, REG(A1_FLAGS), a1_phrase_mode);
526             }
527             else
528             {
529                srcdata = READ_RDATA(SRCDATA, a1, REG(A1_FLAGS), a1_phrase_mode);
530                if (cmd & 0x001C020)	// PATDSEL | TOPBEN | TOPNEN | DSTWRZ
531                   srczdata = READ_RDATA(SRCZINT, a1, REG(A1_FLAGS), a1_phrase_mode);
532             }
533 
534             // load dst data and Z
535             if (DSTEN)
536             {
537                dstdata = READ_PIXEL(a2, REG(A2_FLAGS));
538                if (DSTENZ)
539                   dstzdata = READ_ZDATA(a2, REG(A2_FLAGS));
540                else
541                   dstzdata = READ_RDATA(DSTZ, a2, REG(A2_FLAGS), a2_phrase_mode);
542             }
543             else
544             {
545                dstdata = READ_RDATA(DSTDATA, a2, REG(A2_FLAGS), a2_phrase_mode);
546                if (DSTENZ)
547                   dstzdata = READ_RDATA(DSTZ, a2, REG(A2_FLAGS), a2_phrase_mode);
548             }
549 
550             if (GOURZ)
551                srczdata = z_i[colour_index] >> 16;
552 
553             // apply z comparator
554             if (Z_OP_INF && srczdata < dstzdata)	inhibit = 1;
555             if (Z_OP_EQU && srczdata == dstzdata)	inhibit = 1;
556             if (Z_OP_SUP && srczdata > dstzdata)	inhibit = 1;
557 
558             // apply data comparator
559             //NOTE: The bit comparator (BCOMPEN) is NOT the same at the data comparator!
560             if (DCOMPEN | BCOMPEN)
561             {
562                if (!CMPDST)
563                {
564                   if (srcdata == 0)
565                      inhibit = 1;//*/
566                }
567                else
568                {
569                   // compare destination pixel with pattern pixel
570                   if (dstdata == READ_RDATA(PATTERNDATA, a2, REG(A2_FLAGS), a2_phrase_mode))
571                      //						if (dstdata != READ_RDATA(PATTERNDATA, a2, REG(A2_FLAGS), a2_phrase_mode))
572                      inhibit = 1;
573                }
574             }
575 
576             if (CLIPA1)
577             {
578                inhibit |= (((a1_x >> 16) < a1_clip_x && (a1_x >> 16) >= 0
579                         && (a1_y >> 16) < a1_clip_y && (a1_y >> 16) >= 0) ? 0 : 1);
580             }
581 
582             // compute the write data and store
583             if (!inhibit)
584             {
585                if (PATDSEL)
586                {
587                   // use pattern data for write data
588                   writedata = READ_RDATA(PATTERNDATA, a2, REG(A2_FLAGS), a2_phrase_mode);
589                }
590                else if (ADDDSEL)
591                {
592                   // intensity addition
593                   writedata = (srcdata & 0xFF) + (dstdata & 0xFF);
594                   if (!(TOPBEN) && writedata > 0xFF)
595                      writedata = 0xFF;
596                   writedata |= (srcdata & 0xF00) + (dstdata & 0xF00);
597                   if (!(TOPNEN) && writedata > 0xFFF)
598                      writedata = 0xFFF;
599                   writedata |= (srcdata & 0xF000) + (dstdata & 0xF000);
600                }
601                else
602                {
603                   if (LFU_NAN)
604                      writedata |= ~srcdata & ~dstdata;
605                   if (LFU_NA)
606                      writedata |= ~srcdata & dstdata;
607                   if (LFU_AN)
608                      writedata |= srcdata & ~dstdata;
609                   if (LFU_A)
610                      writedata |= srcdata & dstdata;
611                }
612 
613                if (GOURD)
614                   writedata = ((gd_c[colour_index]) << 8) | (gd_i[colour_index] >> 16);
615 
616                if (SRCSHADE)
617                {
618                   int intensity = srcdata & 0xFF;
619                   int ia = gd_ia >> 16;
620                   if (ia & 0x80)
621                      ia = 0xFFFFFF00 | ia;
622                   intensity += ia;
623                   if (intensity < 0)
624                      intensity = 0;
625                   if (intensity > 0xFF)
626                      intensity = 0xFF;
627                   writedata = (srcdata & 0xFF00) | intensity;
628                }
629             }
630             else
631             {
632                writedata = dstdata;
633                srczdata = dstzdata;
634             }
635 
636             if (/*a2_phrase_mode || */BKGWREN || !inhibit)
637             {
638                // write to the destination
639                WRITE_PIXEL(a2, REG(A2_FLAGS), writedata);
640 
641                if (DSTWRZ)
642                   WRITE_ZDATA(a2, REG(A2_FLAGS), srczdata);
643             }
644          }
645 
646          // Update x and y (inner loop)
647          //Now it does! But crappy, crappy, crappy! !!! FIX !!! [DONE]
648          //This is less than ideal, but it works...
649          if (!BCOMPEN)
650          {//*/
651             a1_x += a1_xadd, a1_y += a1_yadd;
652             a2_x = (a2_x + a2_xadd) & a2_mask_x, a2_y = (a2_y + a2_yadd) & a2_mask_y;
653          }
654          else
655          {
656             a1_y += a1_yadd, a2_y = (a2_y + a2_yadd) & a2_mask_y;
657             if (!DSTA2)
658             {
659                a1_x += a1_xadd;
660                if (bitPos % bppSrc == 0)
661                   a2_x = (a2_x + a2_xadd) & a2_mask_x;
662             }
663             else
664             {
665                a2_x = (a2_x + a2_xadd) & a2_mask_x;
666                if (bitPos % bppSrc == 0)
667                   a1_x += a1_xadd;
668             }
669          }//*/
670 
671          if (GOURZ)
672             z_i[colour_index] += zadd;
673 
674          if (GOURD || SRCSHADE)
675          {
676             gd_i[colour_index] += gd_ia;
677             //Hmm, this doesn't seem to do anything...
678             //But it is correct according to the JTRM...!
679             if ((int32_t)gd_i[colour_index] < 0)
680                gd_i[colour_index] = 0;
681             if (gd_i[colour_index] > 0x00FFFFFF)
682                gd_i[colour_index] = 0x00FFFFFF;//*/
683 
684             gd_c[colour_index] += gd_ca;
685             if ((int32_t)gd_c[colour_index] < 0)
686                gd_c[colour_index] = 0;
687             if (gd_c[colour_index] > 0x000000FF)
688                gd_c[colour_index] = 0x000000FF;//*/
689          }
690 
691          if (GOURD || SRCSHADE || GOURZ)
692          {
693             if (a1_phrase_mode)
694                //This screws things up WORSE (for the BIOS opening screen)
695                //				if (a1_phrase_mode || a2_phrase_mode)
696                colour_index = (colour_index + 1) & 0x03;
697          }
698       }
699 
700       //NOTE: The way to fix the CD BIOS is to uncomment below and comment the stuff after
701       //      the phrase mode mucking around. But it fucks up everything else...
702       //#define SCREWY_CD_DEPENDENT
703 #ifdef SCREWY_CD_DEPENDENT
704       a1_x += a1_step_x;
705       a1_y += a1_step_y;
706       a2_x += a2_step_x;
707       a2_y += a2_step_y;//*/
708 #endif
709 
710       //New: Phrase mode taken into account! :-p
711       if (a1_phrase_mode)			// v2
712       {
713          uint32_t pixelSize;
714          // Bump the pointer to the next phrase boundary
715          // Even though it works, this is crappy... Clean it up!
716          uint32_t size = 64 / a1_psize;
717 
718          // Crappy kludge... ('aligning' source to destination)
719          if (a2_phrase_mode && DSTA2)
720          {
721             uint32_t extra = (a2_start >> 16) % size;
722             a1_x += extra << 16;
723          }
724 
725          pixelSize = (size - 1) << 16;
726          a1_x = (a1_x + pixelSize) & ~pixelSize;
727       }
728 
729       if (a2_phrase_mode)			// v1
730       {
731          uint32_t pixelSize;
732          // Bump the pointer to the next phrase boundary
733          // Even though it works, this is crappy... Clean it up!
734          uint32_t size = 64 / a2_psize;
735 
736          // Crappy kludge... ('aligning' source to destination)
737          // Prolly should do this for A1 channel as well... [DONE]
738          if (a1_phrase_mode && !DSTA2)
739          {
740             uint32_t extra = (a1_start >> 16) % size;
741             a2_x += extra << 16;
742          }
743 
744          pixelSize = (size - 1) << 16;
745          a2_x = (a2_x + pixelSize) & ~pixelSize;
746       }
747 
748       //Not entirely: This still mucks things up... !!! FIX !!!
749       //Should this go before or after the phrase mode mucking around?
750 #ifndef SCREWY_CD_DEPENDENT
751       a1_x += a1_step_x;
752       a1_y += a1_step_y;
753       a2_x += a2_step_x;
754       a2_y += a2_step_y;//*/
755 #endif
756    }
757 
758    // write values back to registers
759    WREG(A1_PIXEL,  (a1_y & 0xFFFF0000) | ((a1_x >> 16) & 0xFFFF));
760    WREG(A1_FPIXEL, (a1_y << 16) | (a1_x & 0xFFFF));
761    WREG(A2_PIXEL,  (a2_y & 0xFFFF0000) | ((a2_x >> 16) & 0xFFFF));
762 }
763 
blitter_blit(uint32_t cmd)764 void blitter_blit(uint32_t cmd)
765 {
766    uint32_t m, e;
767    uint32_t pitchValue[4] = { 0, 1, 3, 2 };
768    colour_index = 0;
769    src = cmd & 0x07;
770    dst = (cmd >> 3) & 0x07;
771    misc = (cmd >> 6) & 0x03;
772    a1ctl = (cmd >> 8) & 0x7;
773    mode = (cmd >> 11) & 0x07;
774    ity = (cmd >> 14) & 0x0F;
775    zop = (cmd >> 18) & 0x07;
776    op = (cmd >> 21) & 0x0F;
777    ctrl = (cmd >> 25) & 0x3F;
778 
779    // Addresses in A1/2_BASE are *phrase* aligned, i.e., bottom three bits are ignored!
780    // NOTE: This fixes Rayman's bad collision detection AND keeps T2K working!
781    a1_addr = REG(A1_BASE) & 0xFFFFFFF8;
782    a2_addr = REG(A2_BASE) & 0xFFFFFFF8;
783 
784    a1_zoffs = (REG(A1_FLAGS) >> 6) & 7;
785    a2_zoffs = (REG(A2_FLAGS) >> 6) & 7;
786 
787    xadd_a1_control = (REG(A1_FLAGS) >> 16) & 0x03;
788    xadd_a2_control = (REG(A2_FLAGS) >> 16) & 0x03;
789 
790    a1_pitch = pitchValue[(REG(A1_FLAGS) & 0x03)];
791    a2_pitch = pitchValue[(REG(A2_FLAGS) & 0x03)];
792 
793    n_pixels = REG(PIXLINECOUNTER) & 0xFFFF;
794    n_lines = (REG(PIXLINECOUNTER) >> 16) & 0xFFFF;
795 
796    a1_x = (REG(A1_PIXEL) << 16) | (REG(A1_FPIXEL) & 0xFFFF);
797    a1_y = (REG(A1_PIXEL) & 0xFFFF0000) | (REG(A1_FPIXEL) >> 16);
798 
799    // According to JTRM, this must give a *whole number* of phrases in the current
800    // pixel size (this means the lookup above is WRONG)... !!! FIX !!!
801    m = (REG(A1_FLAGS) >> 9) & 0x03, e = (REG(A1_FLAGS) >> 11) & 0x0F;
802    a1_width = ((0x04 | m) << e) >> 2;//*/
803 
804    a2_x = (REG(A2_PIXEL) & 0x0000FFFF) << 16;
805    a2_y = (REG(A2_PIXEL) & 0xFFFF0000);
806 
807    // According to JTRM, this must give a *whole number* of phrases in the current
808    // pixel size (this means the lookup above is WRONG)... !!! FIX !!!
809    m = (REG(A2_FLAGS) >> 9) & 0x03, e = (REG(A2_FLAGS) >> 11) & 0x0F;
810    a2_width = ((0x04 | m) << e) >> 2;//*/
811    a2_mask_x = ((REG(A2_MASK) & 0x0000FFFF) << 16) | 0xFFFF;
812    a2_mask_y = (REG(A2_MASK) & 0xFFFF0000) | 0xFFFF;
813 
814    // Check for "use mask" flag
815    if (!(REG(A2_FLAGS) & 0x8000))
816    {
817       a2_mask_x = 0xFFFFFFFF; // must be 16.16
818       a2_mask_y = 0xFFFFFFFF; // must be 16.16
819    }
820 
821    a1_phrase_mode = 0;
822 
823    // According to the official documentation, a hardware bug ties A2's yadd bit to A1's...
824    a2_yadd = a1_yadd = (YADD1_A1 ? 1 << 16 : 0);
825 
826    if (YSIGNSUB_A1)
827       a1_yadd = -a1_yadd;
828 
829    // determine a1_xadd
830    switch (xadd_a1_control)
831    {
832       case XADDPHR:
833          // This is a documented Jaguar bug relating to phrase mode and truncation... Look into it!
834          // add phrase offset to X and truncate
835          a1_xadd = 1 << 16;
836          a1_phrase_mode = 1;
837          break;
838       case XADDPIX:
839          // add pixelsize (1) to X
840          a1_xadd = 1 << 16;
841          break;
842       case XADD0:
843          // add zero (for those nice vertical lines)
844          a1_xadd = 0;
845          break;
846       case XADDINC:
847          // add the contents of the increment register
848          a1_xadd = (REG(A1_INC) << 16)		 | (REG(A1_FINC) & 0x0000FFFF);
849          a1_yadd = (REG(A1_INC) & 0xFFFF0000) | (REG(A1_FINC) >> 16);
850          break;
851    }
852 
853    if (XSIGNSUB_A1)
854       a1_xadd = -a1_xadd;
855 
856    if (YSIGNSUB_A2)
857       a2_yadd = -a2_yadd;
858 
859    a2_phrase_mode = 0;
860 
861    // determine a2_xadd
862    switch (xadd_a2_control)
863    {
864       case XADDPHR:
865          // add phrase offset to X and truncate
866          a2_xadd = 1 << 16;
867          a2_phrase_mode = 1;
868          break;
869       case XADDPIX:
870          // add pixelsize (1) to X
871          a2_xadd = 1 << 16;
872          break;
873       case XADD0:
874          // add zero (for those nice vertical lines)
875          a2_xadd = 0;
876          break;
877          //This really isn't a valid bit combo for A2... Shouldn't this cause the blitter to just say no?
878       case XADDINC:
879          // add the contents of the increment register
880          // since there is no register for a2 we just add 1
881          //Let's do nothing, since it's not listed as a valid bit combo...
882          break;
883    }
884 
885    if (XSIGNSUB_A2)
886       a2_xadd = -a2_xadd;
887 
888    // Modify outer loop steps based on blitter command
889 
890    a1_step_x = 0;
891    a1_step_y = 0;
892    a2_step_x = 0;
893    a2_step_y = 0;
894 
895    if (UPDA1F)
896       a1_step_x = (REG(A1_FSTEP) & 0xFFFF),
897                 a1_step_y = (REG(A1_FSTEP) >> 16);
898 
899    if (UPDA1)
900       a1_step_x |= ((REG(A1_STEP) & 0x0000FFFF) << 16),
901                 a1_step_y |= ((REG(A1_STEP) & 0xFFFF0000));
902 
903    if (UPDA2)
904       a2_step_x = (REG(A2_STEP) & 0x0000FFFF) << 16,
905                 a2_step_y = (REG(A2_STEP) & 0xFFFF0000);
906 
907    outer_loop = n_lines;
908 
909    // Clipping...
910 
911    if (CLIPA1)
912       a1_clip_x = REG(A1_CLIP) & 0x7FFF,
913                 a1_clip_y = (REG(A1_CLIP) >> 16) & 0x7FFF;
914 
915    // This phrase sizing is incorrect as well... !!! FIX !!! [NOTHING TO FIX]
916    // Err, this is pixel size... (and it's OK)
917    a2_psize = 1 << ((REG(A2_FLAGS) >> 3) & 0x07);
918    a1_psize = 1 << ((REG(A1_FLAGS) >> 3) & 0x07);
919 
920    // Z-buffering
921    if (GOURZ)
922    {
923       unsigned v;
924       zadd = REG(ZINC);
925 
926       for(v = 0; v < 4; v++)
927          z_i[v] = REG(PHRASEZ0 + v*4);
928    }
929 
930    // Gouraud shading
931    if (GOURD || GOURZ || SRCSHADE)
932    {
933       gd_c[0] = blitter_ram[PATTERNDATA + 6];
934       gd_i[0]	= ((uint32_t)blitter_ram[PATTERNDATA + 7] << 16)
935          | ((uint32_t)blitter_ram[SRCDATA + 6] << 8) | blitter_ram[SRCDATA + 7];
936 
937       gd_c[1] = blitter_ram[PATTERNDATA + 4];
938       gd_i[1]	= ((uint32_t)blitter_ram[PATTERNDATA + 5] << 16)
939          | ((uint32_t)blitter_ram[SRCDATA + 4] << 8) | blitter_ram[SRCDATA + 5];
940 
941       gd_c[2] = blitter_ram[PATTERNDATA + 2];
942       gd_i[2]	= ((uint32_t)blitter_ram[PATTERNDATA + 3] << 16)
943          | ((uint32_t)blitter_ram[SRCDATA + 2] << 8) | blitter_ram[SRCDATA + 3];
944 
945       gd_c[3] = blitter_ram[PATTERNDATA + 0];
946       gd_i[3]	= ((uint32_t)blitter_ram[PATTERNDATA + 1] << 16)
947          | ((uint32_t)blitter_ram[SRCDATA + 0] << 8) | blitter_ram[SRCDATA + 1];
948 
949       gouraud_add = REG(INTENSITYINC);
950 
951       gd_ia = gouraud_add & 0x00FFFFFF;
952       if (gd_ia & 0x00800000)
953          gd_ia = 0xFF000000 | gd_ia;
954 
955       gd_ca = (gouraud_add >> 24) & 0xFF;
956       if (gd_ca & 0x00000080)
957          gd_ca = 0xFFFFFF00 | gd_ca;
958    }
959 
960    blitter_generic(cmd);
961 }
962 #endif
963 /*******************************************************************************
964 ********************** STUFF CUT ABOVE THIS LINE! ******************************
965 *******************************************************************************/
966 
967 
BlitterInit(void)968 void BlitterInit(void)
969 {
970 	BlitterReset();
971 }
972 
973 
BlitterReset(void)974 void BlitterReset(void)
975 {
976 	memset(blitter_ram, 0x00, 0xA0);
977 }
978 
979 
BlitterDone(void)980 void BlitterDone(void)
981 {
982 	WriteLog("BLIT: Done.\n");
983 }
984 
985 
BlitterReadByte(uint32_t offset,uint32_t who)986 uint8_t BlitterReadByte(uint32_t offset, uint32_t who/*=UNKNOWN*/)
987 {
988 	offset &= 0xFF;
989 
990 	// status register
991 //This isn't cycle accurate--how to fix? !!! FIX !!!
992 //Probably have to do some multi-threaded implementation or at least a reentrant safe implementation...
993 //Real hardware returns $00000805, just like the JTRM says.
994 	if (offset == (0x38 + 0))
995 		return 0x00;
996 	if (offset == (0x38 + 1))
997 		return 0x00;
998 	if (offset == (0x38 + 2))
999 		return 0x08;
1000 	if (offset == (0x38 + 3))
1001 		return 0x05;	// always idle/never stopped (collision detection ignored!)
1002 
1003 // CHECK HERE ONCE THIS FIX HAS BEEN TESTED: [X]
1004 //Fix for AvP:
1005 	if (offset >= 0x04 && offset <= 0x07)
1006 //This is it. I wonder if it just ignores the lower three bits?
1007 //No, this is a documented Jaguar I bug. It also bites the read at $F02230 as well...
1008 		return blitter_ram[offset + 0x08];		// A1_PIXEL ($F0220C) read at $F02204
1009 
1010 	if (offset >= 0x2C && offset <= 0x2F)
1011 		return blitter_ram[offset + 0x04];		// A2_PIXEL ($F02230) read at $F0222C
1012 
1013 	return blitter_ram[offset];
1014 }
1015 
1016 
1017 //Crappy!
BlitterReadWord(uint32_t offset,uint32_t who)1018 uint16_t BlitterReadWord(uint32_t offset, uint32_t who/*=UNKNOWN*/)
1019 {
1020 	return ((uint16_t)BlitterReadByte(offset, who) << 8) | (uint16_t)BlitterReadByte(offset+1, who);
1021 }
1022 
1023 
1024 //Crappy!
BlitterReadLong(uint32_t offset,uint32_t who)1025 uint32_t BlitterReadLong(uint32_t offset, uint32_t who/*=UNKNOWN*/)
1026 {
1027 	return (BlitterReadWord(offset, who) << 16) | BlitterReadWord(offset+2, who);
1028 }
1029 
1030 
BlitterWriteByte(uint32_t offset,uint8_t data,uint32_t who)1031 void BlitterWriteByte(uint32_t offset, uint8_t data, uint32_t who/*=UNKNOWN*/)
1032 {
1033 	offset &= 0xFF;
1034 
1035 	// This handles writes to INTENSITY0-3 by also writing them to their proper places in
1036 	// PATTERNDATA & SOURCEDATA (should do the same for the Z registers! !!! FIX !!! [DONE])
1037 	if ((offset >= 0x7C) && (offset <= 0x9B))
1038 	{
1039 		switch (offset)
1040 		{
1041 		// INTENSITY registers 0-3
1042 		case 0x7C: break;
1043 		case 0x7D: blitter_ram[PATTERNDATA + 7] = data; break;
1044 		case 0x7E: blitter_ram[SRCDATA + 6] = data; break;
1045 		case 0x7F: blitter_ram[SRCDATA + 7] = data; break;
1046 
1047 		case 0x80: break;
1048 		case 0x81: blitter_ram[PATTERNDATA + 5] = data; break;
1049 		case 0x82: blitter_ram[SRCDATA + 4] = data; break;
1050 		case 0x83: blitter_ram[SRCDATA + 5] = data; break;
1051 
1052 		case 0x84: break;
1053 		case 0x85: blitter_ram[PATTERNDATA + 3] = data; break;
1054 		case 0x86: blitter_ram[SRCDATA + 2] = data; break;
1055 		case 0x87: blitter_ram[SRCDATA + 3] = data; break;
1056 
1057 		case 0x88: break;
1058 		case 0x89: blitter_ram[PATTERNDATA + 1] = data; break;
1059 		case 0x8A: blitter_ram[SRCDATA + 0] = data; break;
1060 		case 0x8B: blitter_ram[SRCDATA + 1] = data; break;
1061 
1062 
1063 		// Z registers 0-3
1064 		case 0x8C: blitter_ram[SRCZINT + 6] = data; break;
1065 		case 0x8D: blitter_ram[SRCZINT + 7] = data; break;
1066 		case 0x8E: blitter_ram[SRCZFRAC + 6] = data; break;
1067 		case 0x8F: blitter_ram[SRCZFRAC + 7] = data; break;
1068 
1069 		case 0x90: blitter_ram[SRCZINT + 4] = data; break;
1070 		case 0x91: blitter_ram[SRCZINT + 5] = data; break;
1071 		case 0x92: blitter_ram[SRCZFRAC + 4] = data; break;
1072 		case 0x93: blitter_ram[SRCZFRAC + 5] = data; break;
1073 
1074 		case 0x94: blitter_ram[SRCZINT + 2] = data; break;
1075 		case 0x95: blitter_ram[SRCZINT + 3] = data; break;
1076 		case 0x96: blitter_ram[SRCZFRAC + 2] = data; break;
1077 		case 0x97: blitter_ram[SRCZFRAC + 3] = data; break;
1078 
1079 		case 0x98: blitter_ram[SRCZINT + 0] = data; break;
1080 		case 0x99: blitter_ram[SRCZINT + 1] = data; break;
1081 		case 0x9A: blitter_ram[SRCZFRAC + 0] = data; break;
1082 		case 0x9B: blitter_ram[SRCZFRAC + 1] = data; break;
1083 		}
1084 	}
1085 
1086 	// It looks weird, but this is how the 64 bit registers are actually handled...!
1087 
1088 	else if (((offset >= SRCDATA + 0) && (offset <= SRCDATA + 3))
1089 		|| ((offset >= DSTDATA + 0) && (offset <= DSTDATA + 3))
1090 		|| ((offset >= DSTZ + 0) && (offset <= DSTZ + 3))
1091 		|| ((offset >= SRCZINT + 0) && (offset <= SRCZINT + 3))
1092 		|| ((offset >= SRCZFRAC + 0) && (offset <= SRCZFRAC + 3))
1093 		|| ((offset >= PATTERNDATA + 0) && (offset <= PATTERNDATA + 3))
1094       )
1095 	{
1096 		blitter_ram[offset + 4] = data;
1097 	}
1098 	else if (((offset >= SRCDATA + 4) && (offset <= SRCDATA + 7))
1099 		|| ((offset >= DSTDATA + 4) && (offset <= DSTDATA + 7))
1100 		|| ((offset >= DSTZ + 4) && (offset <= DSTZ + 7))
1101 		|| ((offset >= SRCZINT + 4) && (offset <= SRCZINT + 7))
1102 		|| ((offset >= SRCZFRAC + 4) && (offset <= SRCZFRAC + 7))
1103 		|| ((offset >= PATTERNDATA + 4) && (offset <= PATTERNDATA + 7))
1104       )
1105 	{
1106 		blitter_ram[offset - 4] = data;
1107 	}
1108 	else
1109 		blitter_ram[offset] = data;
1110 }
1111 
1112 
BlitterWriteWord(uint32_t offset,uint16_t data,uint32_t who)1113 void BlitterWriteWord(uint32_t offset, uint16_t data, uint32_t who/*=UNKNOWN*/)
1114 {
1115 	BlitterWriteByte(offset + 0, data >> 8, who);
1116 	BlitterWriteByte(offset + 1, data & 0xFF, who);
1117 
1118 	if ((offset & 0xFF) == 0x3A)
1119 	// I.e., the second write of 32-bit value--not convinced this is the best way to do this!
1120 	// But then again, according to the Jaguar docs, this is correct...!
1121 	{
1122 		if (vjs.useFastBlitter)
1123 			blitter_blit(GET32(blitter_ram, 0x38));
1124 		else
1125 			BlitterMidsummer2();
1126 	}
1127 }
1128 //F02278,9,A,B
1129 
1130 
BlitterWriteLong(uint32_t offset,uint32_t data,uint32_t who)1131 void BlitterWriteLong(uint32_t offset, uint32_t data, uint32_t who)
1132 {
1133 	BlitterWriteWord(offset + 0, data >> 16, who);
1134 	BlitterWriteWord(offset + 2, data & 0xFFFF, who);
1135 }
1136 
1137 // Here's attempt #2--taken from the Oberon chip specs!
1138 
1139 #ifdef USE_MIDSUMMER_BLITTER_MKII
1140 
1141 void ADDRGEN(uint32_t *, uint32_t *, bool, bool,
1142 	uint16_t, uint16_t, uint32_t, uint8_t, uint8_t, uint8_t, uint8_t,
1143 	uint16_t, uint16_t, uint32_t, uint8_t, uint8_t, uint8_t, uint8_t);
1144 void ADDARRAY(uint16_t * addq, uint8_t daddasel, uint8_t daddbsel, uint8_t daddmode,
1145 	uint64_t dstd, uint32_t iinc, uint8_t initcin[], uint64_t initinc, uint16_t initpix,
1146 	uint32_t istep, uint64_t patd, uint64_t srcd, uint64_t srcz1, uint64_t srcz2,
1147 	uint32_t zinc, uint32_t zstep);
1148 void ADD16SAT(uint16_t *r, uint8_t *co, uint16_t a, uint16_t b, uint8_t cin, bool sat, bool eightbit, bool hicinh);
1149 void ADDAMUX(int16_t *adda_x, int16_t *adda_y, uint8_t addasel, int16_t a1_step_x, int16_t a1_step_y,
1150 	int16_t a1_stepf_x, int16_t a1_stepf_y, int16_t a2_step_x, int16_t a2_step_y,
1151 	int16_t a1_inc_x, int16_t a1_inc_y, int16_t a1_incf_x, int16_t a1_incf_y, uint8_t adda_xconst,
1152 	bool adda_yconst, bool addareg, bool suba_x, bool suba_y);
1153 void ADDBMUX(int16_t *addb_x, int16_t *addb_y, uint8_t addbsel, int16_t a1_x, int16_t a1_y,
1154 	int16_t a2_x, int16_t a2_y, int16_t a1_frac_x, int16_t a1_frac_y);
1155 void DATAMUX(int16_t *data_x, int16_t *data_y, uint32_t gpu_din, int16_t addq_x, int16_t addq_y, bool addqsel);
1156 void ADDRADD(int16_t *addq_x, int16_t *addq_y, bool a1fracldi,
1157 	uint16_t adda_x, uint16_t adda_y, uint16_t addb_x, uint16_t addb_y, uint8_t modx, bool suba_x, bool suba_y);
1158 void DATA(uint64_t *wdata, uint8_t *dcomp, uint8_t *zcomp, bool *nowrite,
1159 	bool big_pix, bool cmpdst, uint8_t daddasel, uint8_t daddbsel, uint8_t daddmode, bool daddq_sel, uint8_t data_sel,
1160 	uint8_t dbinh, uint8_t dend, uint8_t dstart, uint64_t dstd, uint32_t iinc, uint8_t lfu_func, uint64_t *patd, bool patdadd,
1161 	bool phrase_mode, uint64_t srcd, bool srcdread, bool srczread, bool srcz2add, uint8_t zmode,
1162 	bool bcompen, bool bkgwren, bool dcompen, uint8_t icount, uint8_t pixsize,
1163 	uint64_t *srcz, uint64_t dstz, uint32_t zinc);
1164 void COMP_CTRL(uint8_t *dbinh, bool *nowrite,
1165 	bool bcompen, bool big_pix, bool bkgwren, uint8_t dcomp, bool dcompen, uint8_t icount,
1166 	uint8_t pixsize, bool phrase_mode, uint8_t srcd, uint8_t zcomp);
1167 
BlitterMidsummer2(void)1168 void BlitterMidsummer2(void)
1169 {
1170    // Here's what the specs say the state machine does. Note that this can probably be
1171    // greatly simplified (also, it's different from what John has in his Oberon docs):
1172    //Will remove stuff that isn't in Jaguar I once fully described (stuff like texture won't
1173    //be described here at all)...
1174 
1175    uint32_t cmd = GET32(blitter_ram, COMMAND);
1176 
1177    // Line states passed in via the command register
1178 
1179    bool srcen = (SRCEN), srcenx = (SRCENX), srcenz = (SRCENZ),
1180         dsten = (DSTEN), dstenz = (DSTENZ), dstwrz = (DSTWRZ), clip_a1 = (CLIPA1),
1181         upda1 = (UPDA1), upda1f = (UPDA1F), upda2 = (UPDA2), dsta2 = (DSTA2),
1182         gourd = (GOURD), gourz = (GOURZ), topben = (TOPBEN), topnen = (TOPNEN),
1183         patdsel = (PATDSEL), adddsel = (ADDDSEL), cmpdst = (CMPDST), bcompen = (BCOMPEN),
1184         dcompen = (DCOMPEN), bkgwren = (BKGWREN), srcshade = (SRCSHADE);
1185 
1186    uint8_t zmode = (cmd & 0x01C0000) >> 18, lfufunc = (cmd & 0x1E00000) >> 21;
1187    //Missing: BUSHI
1188    //Where to find various lines:
1189    // clip_a1  -> inner
1190    // gourd    -> dcontrol, inner, outer, state
1191    // gourz    -> dcontrol, inner, outer, state
1192    // cmpdst   -> blit, data, datacomp, state
1193    // bcompen  -> acontrol, inner, mcontrol, state
1194    // dcompen  -> inner, state
1195    // bkgwren  -> inner, state
1196    // srcshade -> dcontrol, inner, state
1197    // adddsel  -> dcontrol
1198    //NOTE: ADDDSEL takes precedence over PATDSEL, PATDSEL over LFU_FUNC
1199 
1200    // Lines that don't exist in Jaguar I (and will never be asserted)
1201 
1202    bool polygon = false, datinit = false, a1_stepld = false, a2_stepld = false, ext_int = false;
1203    bool istepadd = false, istepfadd = false;
1204    bool zstepfadd = false, zstepadd = false;
1205 
1206    // Various state lines (initial state--basically the reset state of the FDSYNCs)
1207 
1208    bool go = true, idle = true, inner = false, a1fupdate = false, a1update = false,
1209         zfupdate = false, zupdate = false, a2update = false, init_if = false, init_ii = false,
1210         init_zf = false, init_zi = false;
1211 
1212    bool outer0 = false, indone = false;
1213 
1214    bool idlei, inneri, a1fupdatei, a1updatei, zfupdatei, zupdatei, a2updatei, init_ifi, init_iii,
1215         init_zfi, init_zii;
1216 
1217    bool notgzandp = !(gourz && polygon);
1218 
1219    // Various registers set up by user
1220 
1221    uint16_t ocount = GET16(blitter_ram, PIXLINECOUNTER);
1222    uint8_t a1_pitch = blitter_ram[A1_FLAGS + 3] & 0x03;
1223    uint8_t a2_pitch = blitter_ram[A2_FLAGS + 3] & 0x03;
1224    uint8_t a1_pixsize = (blitter_ram[A1_FLAGS + 3] & 0x38) >> 3;
1225    uint8_t a2_pixsize = (blitter_ram[A2_FLAGS + 3] & 0x38) >> 3;
1226    uint8_t a1_zoffset = (GET16(blitter_ram, A1_FLAGS + 2) >> 6) & 0x07;
1227    uint8_t a2_zoffset = (GET16(blitter_ram, A2_FLAGS + 2) >> 6) & 0x07;
1228    uint8_t a1_width = (blitter_ram[A1_FLAGS + 2] >> 1) & 0x3F;
1229    uint8_t a2_width = (blitter_ram[A2_FLAGS + 2] >> 1) & 0x3F;
1230    uint8_t a1addx = blitter_ram[A1_FLAGS + 1] & 0x03, a2addx = blitter_ram[A2_FLAGS + 1] & 0x03;
1231    bool a1addy = blitter_ram[A1_FLAGS + 1] & 0x04, a2addy = blitter_ram[A2_FLAGS + 1] & 0x04;
1232    bool a1xsign = blitter_ram[A1_FLAGS + 1] & 0x08, a2xsign = blitter_ram[A2_FLAGS + 1] & 0x08;
1233    bool a1ysign = blitter_ram[A1_FLAGS + 1] & 0x10, a2ysign = blitter_ram[A2_FLAGS + 1] & 0x10;
1234    uint32_t a1_base = GET32(blitter_ram, A1_BASE) & 0xFFFFFFF8;	// Phrase aligned by ignoring bottom 3 bits
1235    uint32_t a2_base = GET32(blitter_ram, A2_BASE) & 0xFFFFFFF8;
1236 
1237    uint16_t a1_win_x = GET16(blitter_ram, A1_CLIP + 2) & 0x7FFF;
1238    uint16_t a1_win_y = GET16(blitter_ram, A1_CLIP + 0) & 0x7FFF;
1239    int16_t a1_x = (int16_t)GET16(blitter_ram, A1_PIXEL + 2);
1240    int16_t a1_y = (int16_t)GET16(blitter_ram, A1_PIXEL + 0);
1241    int16_t a1_step_x = (int16_t)GET16(blitter_ram, A1_STEP + 2);
1242    int16_t a1_step_y = (int16_t)GET16(blitter_ram, A1_STEP + 0);
1243    uint16_t a1_stepf_x = GET16(blitter_ram, A1_FSTEP + 2);
1244    uint16_t a1_stepf_y = GET16(blitter_ram, A1_FSTEP + 0);
1245    uint16_t a1_frac_x = GET16(blitter_ram, A1_FPIXEL + 2);
1246    uint16_t a1_frac_y = GET16(blitter_ram, A1_FPIXEL + 0);
1247    int16_t a1_inc_x = (int16_t)GET16(blitter_ram, A1_INC + 2);
1248    int16_t a1_inc_y = (int16_t)GET16(blitter_ram, A1_INC + 0);
1249    uint16_t a1_incf_x = GET16(blitter_ram, A1_FINC + 2);
1250    uint16_t a1_incf_y = GET16(blitter_ram, A1_FINC + 0);
1251 
1252    int16_t a2_x = (int16_t)GET16(blitter_ram, A2_PIXEL + 2);
1253    int16_t a2_y = (int16_t)GET16(blitter_ram, A2_PIXEL + 0);
1254 #if 0
1255    bool a2_mask = blitter_ram[A2_FLAGS + 2] & 0x80;
1256    uint16_t a2_mask_x = GET16(blitter_ram, A2_MASK + 2);
1257    uint16_t a2_mask_y = GET16(blitter_ram, A2_MASK + 0);
1258    uint32_t collision = GET32(blitter_ram, COLLISIONCTRL);// 0=RESUME, 1=ABORT, 2=STOPEN
1259 #endif
1260    int16_t a2_step_x = (int16_t)GET16(blitter_ram, A2_STEP + 2);
1261    int16_t a2_step_y = (int16_t)GET16(blitter_ram, A2_STEP + 0);
1262 
1263    uint64_t srcd1 = GET64(blitter_ram, SRCDATA);
1264    uint64_t srcd2 = 0;
1265    uint64_t dstd = GET64(blitter_ram, DSTDATA);
1266    uint64_t patd = GET64(blitter_ram, PATTERNDATA);
1267    uint32_t iinc = GET32(blitter_ram, INTENSITYINC);
1268    uint64_t srcz1 = GET64(blitter_ram, SRCZINT);
1269    uint64_t srcz2 = GET64(blitter_ram, SRCZFRAC);
1270    uint64_t dstz = GET64(blitter_ram, DSTZ);
1271    uint32_t zinc = GET32(blitter_ram, ZINC);
1272 
1273    uint8_t pixsize = (dsta2 ? a2_pixsize : a1_pixsize);	// From ACONTROL
1274 
1275    bool phrase_mode;
1276    uint16_t a1FracCInX = 0, a1FracCInY = 0;
1277 
1278    // Bugs in Jaguar I
1279 
1280    a2addy = a1addy;							// A2 channel Y add bit is tied to A1's
1281 
1282    // Various state lines set up by user
1283 
1284    phrase_mode = ((!dsta2 && a1addx == 0) || (dsta2 && a2addx == 0) ? true : false);	// From ACONTROL
1285 
1286    // Stopgap vars to simulate various lines
1287 
1288 
1289    while (true)
1290    {
1291       // IDLE
1292 
1293       if ((idle && !go) || (inner && outer0 && indone))
1294       {
1295          idlei = true;
1296 
1297          //Instead of a return, let's try breaking out of the loop...
1298          break;
1299       }
1300       else
1301          idlei = false;
1302 
1303       // INNER LOOP ACTIVE
1304 
1305       if ((idle && go && !datinit)
1306             || (inner && !indone)
1307             || (inner && indone && !outer0 && !upda1f && !upda1 && notgzandp && !upda2 && !datinit)
1308             || (a1update && !upda2 && notgzandp && !datinit)
1309             || (zupdate && !upda2 && !datinit)
1310             || (a2update && !datinit)
1311             || (init_ii && !gourz)
1312             || (init_zi))
1313          inneri = true;
1314       else
1315          inneri = false;
1316 
1317       // A1 FRACTION UPDATE
1318 
1319       if (inner && indone && !outer0 && upda1f)
1320          a1fupdatei = true;
1321       else
1322          a1fupdatei = false;
1323 
1324       // A1 POINTER UPDATE
1325 
1326       if ((a1fupdate)
1327             || (inner && indone && !outer0 && !upda1f && upda1))
1328          a1updatei = true;
1329       else
1330          a1updatei = false;
1331 
1332       // Z FRACTION UPDATE
1333 
1334       if ((a1update && gourz && polygon)
1335             || (inner && indone && !outer0 && !upda1f && !upda1 && gourz && polygon))
1336          zfupdatei = true;
1337       else
1338          zfupdatei = false;
1339 
1340       // Z INTEGER UPDATE
1341 
1342       if (zfupdate)
1343          zupdatei = true;
1344       else
1345          zupdatei = false;
1346 
1347       // A2 POINTER UPDATE
1348 
1349       if ((a1update && upda2 && notgzandp)
1350             || (zupdate && upda2)
1351             || (inner && indone && !outer0 && !upda1f && notgzandp && !upda1 && upda2))
1352          a2updatei = true;
1353       else
1354          a2updatei = false;
1355 
1356       // INITIALIZE INTENSITY FRACTION
1357 
1358       if ((zupdate && !upda2 && datinit)
1359             || (a1update && !upda2 && datinit && notgzandp)
1360             || (inner && indone && !outer0 && !upda1f && !upda1 && notgzandp && !upda2 && datinit)
1361             || (a2update && datinit)
1362             || (idle && go && datinit))
1363          init_ifi = true;
1364       else
1365          init_ifi = false;
1366 
1367       // INITIALIZE INTENSITY INTEGER
1368 
1369       if (init_if)
1370          init_iii = true;
1371       else
1372          init_iii = false;
1373 
1374       // INITIALIZE Z FRACTION
1375 
1376       if (init_ii && gourz)
1377          init_zfi = true;
1378       else
1379          init_zfi = false;
1380 
1381       // INITIALIZE Z INTEGER
1382 
1383       if (init_zf)
1384          init_zii = true;
1385       else
1386          init_zii = false;
1387 
1388       // Here we move the fooi into their foo counterparts in order to simulate the moving
1389       // of data into the various FDSYNCs... Each time we loop we simulate one clock cycle...
1390 
1391       idle = idlei;
1392       inner = inneri;
1393       a1fupdate = a1fupdatei;
1394       a1update = a1updatei;
1395       zfupdate = zfupdatei;		// *
1396       zupdate = zupdatei;			// *
1397       a2update = a2updatei;
1398       init_if = init_ifi;			// *
1399       init_ii = init_iii;			// *
1400       init_zf = init_zfi;			// *
1401       init_zi = init_zii;			// *
1402       // * denotes states that will never assert for Jaguar I
1403 
1404       // Now, depending on how we want to handle things, we could either put the implementation
1405       // of the various pieces up above, or handle them down below here.
1406 
1407       // Let's try postprocessing for now...
1408 
1409       if (inner)
1410       {
1411          bool idle_inner = true, step = true, sreadx = false, szreadx = false, sread = false,
1412               szread = false, dread = false, dzread = false, dwrite = false, dzwrite = false;
1413          bool inner0 = false;
1414          bool idle_inneri, sreadxi, szreadxi, sreadi, szreadi, dreadi, dzreadi, dwritei, dzwritei;
1415          // State lines that will never assert in Jaguar I
1416          bool textext = false, txtread = false;
1417          //other stuff
1418          uint8_t srcshift = 0;
1419          uint16_t icount = GET16(blitter_ram, PIXLINECOUNTER + 2);
1420          bool srca_addi, dsta_addi, gensrc, gendst, gena2i, zaddr, fontread, justify, a1_add, a2_add;
1421          bool adda_yconst, addareg, suba_x, suba_y, a1fracldi, srcdreadd, shadeadd;
1422          uint8_t addasel, a1_xconst, a2_xconst, adda_xconst, addbsel, maska1, maska2, modx, daddasel;
1423          uint8_t daddbsel, daddmode;
1424          bool patfadd, patdadd, srcz1add, srcz2add, srcshadd, daddq_sel;
1425          uint8_t data_sel;
1426          uint32_t address, pixAddr;
1427          uint8_t dstxp, srcxp, shftv, pobb;
1428          bool pobbsel;
1429          uint8_t loshd, shfti;
1430          uint64_t srcz;
1431          bool winhibit;
1432 
1433          indone = false;
1434 
1435          //			while (!idle_inner)
1436          while (true)
1437          {
1438             bool sshftld; // D flipflop (D -> Q): instart -> sshftld
1439             uint16_t dstxwr, pseq;
1440             bool penden;
1441             uint8_t window_mask;
1442             uint8_t inner_mask = 0;
1443             uint8_t emask, pma, dend;
1444             uint64_t srcd;
1445             uint8_t zSrcShift;
1446             uint64_t wdata;
1447             uint8_t dcomp, zcomp;
1448 
1449             //NOTE: sshftld probably is only asserted at the beginning of the inner loop. !!! FIX !!!
1450             // IDLE
1451 
1452             if ((idle_inner && !step)
1453                   || (dzwrite && step && inner0)
1454                   || (dwrite && step && !dstwrz && inner0))
1455             {
1456                idle_inneri = true;
1457                break;
1458             }
1459             else
1460                idle_inneri = false;
1461 
1462             // EXTRA SOURCE DATA READ
1463 
1464             if ((idle_inner && step && srcenx)
1465                   || (sreadx && !step))
1466                sreadxi = true;
1467             else
1468                sreadxi = false;
1469 
1470             // EXTRA SOURCE ZED READ
1471 
1472             if ((sreadx && step && srcenz)
1473                   || (szreadx && !step))
1474                szreadxi = true;
1475             else
1476                szreadxi = false;
1477 
1478             // TEXTURE DATA READ (not implemented because not in Jaguar I)
1479 
1480             // SOURCE DATA READ
1481 
1482             if ((szreadx && step && !textext)
1483                   || (sreadx && step && !srcenz && srcen)
1484                   || (idle_inner && step && !srcenx && !textext && srcen)
1485                   || (dzwrite && step && !inner0 && !textext && srcen)
1486                   || (dwrite && step && !dstwrz && !inner0 && !textext && srcen)
1487                   || (txtread && step && srcen)
1488                   || (sread && !step))
1489                sreadi = true;
1490             else
1491                sreadi = false;
1492 
1493             // SOURCE ZED READ
1494 
1495             if ((sread && step && srcenz)
1496                   || (szread && !step))
1497                szreadi = true;
1498             else
1499                szreadi = false;
1500 
1501             // DESTINATION DATA READ
1502 
1503             if ((szread && step && dsten)
1504                   || (sread && step && !srcenz && dsten)
1505                   || (sreadx && step && !srcenz && !textext && !srcen && dsten)
1506                   || (idle_inner && step && !srcenx && !textext && !srcen && dsten)
1507                   || (dzwrite && step && !inner0 && !textext && !srcen && dsten)
1508                   || (dwrite && step && !dstwrz && !inner0 && !textext && !srcen && dsten)
1509                   || (txtread && step && !srcen && dsten)
1510                   || (dread && !step))
1511                dreadi = true;
1512             else
1513                dreadi = false;
1514 
1515             // DESTINATION ZED READ
1516 
1517             if ((dread && step && dstenz)
1518                   || (szread && step && !dsten && dstenz)
1519                   || (sread && step && !srcenz && !dsten && dstenz)
1520                   || (sreadx && step && !srcenz && !textext && !srcen && !dsten && dstenz)
1521                   || (idle_inner && step && !srcenx && !textext && !srcen && !dsten && dstenz)
1522                   || (dzwrite && step && !inner0 && !textext && !srcen && !dsten && dstenz)
1523                   || (dwrite && step && !dstwrz && !inner0 && !textext && !srcen && !dsten && dstenz)
1524                   || (txtread && step && !srcen && !dsten && dstenz)
1525                   || (dzread && !step))
1526                dzreadi = true;
1527             else
1528                dzreadi = false;
1529 
1530             // DESTINATION DATA WRITE
1531 
1532             if ((dzread && step)
1533                   || (dread && step && !dstenz)
1534                   || (szread && step && !dsten && !dstenz)
1535                   || (sread && step && !srcenz && !dsten && !dstenz)
1536                   || (txtread && step && !srcen && !dsten && !dstenz)
1537                   || (sreadx && step && !srcenz && !textext && !srcen && !dsten && !dstenz)
1538                   || (idle_inner && step && !srcenx && !textext && !srcen && !dsten && !dstenz)
1539                   || (dzwrite && step && !inner0 && !textext && !srcen && !dsten && !dstenz)
1540                   || (dwrite && step && !dstwrz && !inner0 && !textext && !srcen && !dsten && !dstenz)
1541                   || (dwrite && !step))
1542                dwritei = true;
1543             else
1544                dwritei = false;
1545 
1546             // DESTINATION ZED WRITE
1547 
1548             if ((dzwrite && !step)
1549                   || (dwrite && step && dstwrz))
1550                dzwritei = true;
1551             else
1552                dzwritei = false;
1553 
1554             //Kludge: A QnD way to make sure that sshftld is asserted only for the first
1555             //        cycle of the inner loop...
1556             sshftld = idle_inner;
1557 
1558             // Here we move the fooi into their foo counterparts in order to simulate the moving
1559             // of data into the various FDSYNCs... Each time we loop we simulate one clock cycle...
1560 
1561             idle_inner = idle_inneri;
1562             sreadx = sreadxi;
1563             szreadx = szreadxi;
1564             sread = sreadi;
1565             szread = szreadi;
1566             dread = dreadi;
1567             dzread = dzreadi;
1568             dwrite = dwritei;
1569             dzwrite = dzwritei;
1570 
1571             // Here's a few more decodes--not sure if they're supposed to go here or not...
1572 
1573 
1574             srca_addi = (sreadxi && !srcenz) || (sreadi && !srcenz) || szreadxi || szreadi;
1575 
1576             dsta_addi = (dwritei && !dstwrz) || dzwritei;
1577 
1578             gensrc = sreadxi || szreadxi || sreadi || szreadi;
1579             gendst = dreadi || dzreadi || dwritei || dzwritei;
1580             gena2i = (gensrc && !dsta2) || (gendst && dsta2);
1581 
1582             zaddr = szreadx || szread || dzread || dzwrite;
1583 
1584             // Some stuff from MCONTROL.NET--not sure if this is the correct use of this decode or not...
1585             /*Fontread\	:= OND1 (fontread\, sread[1], sreadx[1], bcompen);
1586 Fontread	:= INV1 (fontread, fontread\);
1587 Justt		:= NAN3 (justt, fontread\, phrase_mode, tactive\);
1588 Justify		:= TS (justify, justt, busen);*/
1589             fontread = (sread || sreadx) && bcompen;
1590             justify = !(!fontread && phrase_mode /*&& tactive*/);
1591 
1592             /* Generate inner loop update enables */
1593             /*
1594 A1_addi		:= MX2 (a1_addi, dsta_addi, srca_addi, dsta2);
1595 A2_addi		:= MX2 (a2_addi, srca_addi, dsta_addi, dsta2);
1596 A1_add		:= FD1 (a1_add, a1_add\, a1_addi, clk);
1597 A2_add		:= FD1 (a2_add, a2_add\, a2_addi, clk);
1598 A2_addb		:= BUF1 (a2_addb, a2_add);
1599 */
1600             a1_add = (dsta2 ? srca_addi : dsta_addi);
1601             a2_add = (dsta2 ? dsta_addi : srca_addi);
1602 
1603             /* Address adder input A register selection
1604                000	A1 step integer part
1605                001	A1 step fraction part
1606                010	A1 increment integer part
1607                011	A1 increment fraction part
1608                100	A2 step
1609 
1610                bit 2 = a2update
1611                bit 1 = /a2update . (a1_add . a1addx[0..1])
1612                bit 0 = /a2update . ( a1fupdate
1613                + a1_add . atick[0] . a1addx[0..1])
1614                The /a2update term on bits 0 and 1 is redundant.
1615                Now look-ahead based
1616                */
1617 
1618             addasel = (a1fupdate || (a1_add && a1addx == 3) ? 0x01 : 0x00);
1619             addasel |= (a1_add && a1addx == 3 ? 0x02 : 0x00);
1620             addasel |= (a2update ? 0x04 : 0x00);
1621             /* Address adder input A X constant selection
1622                adda_xconst[0..2] generate a power of 2 in the range 1-64 or all
1623                zeroes when they are all 1
1624                Remember - these are pixels, so to add one phrase the pixel size
1625                has to be taken into account to get the appropriate value.
1626                for A1
1627                if a1addx[0..1] are 00 set 6 - pixel size
1628                if a1addx[0..1] are 01 set the value 000
1629                if a1addx[0..1] are 10 set the value 111
1630                similarly for A2
1631 JLH: Also, 11 will likewise set the value to 111
1632 */
1633             a1_xconst = 6 - a1_pixsize;
1634             a2_xconst = 6 - a2_pixsize;
1635 
1636             if (a1addx == 1)
1637                a1_xconst = 0;
1638             else if (a1addx & 0x02)
1639                a1_xconst = 7;
1640 
1641             if (a2addx == 1)
1642                a2_xconst = 0;
1643             else if (a2addx & 0x02)
1644                a2_xconst = 7;
1645 
1646             adda_xconst = (a2_add ? a2_xconst : a1_xconst);
1647             /* Address adder input A Y constant selection
1648                22 June 94 - This was erroneous, because only the a1addy bit was reflected here.
1649                Therefore, the selection has to be controlled by a bug fix bit.
1650 JLH: Bug fix bit in Jaguar II--not in Jaguar I!
1651 */
1652             adda_yconst = a1addy;
1653             /* Address adder input A register versus constant selection
1654                given by	  a1_add . a1addx[0..1]
1655                + a1update
1656                + a1fupdate
1657                + a2_add . a2addx[0..1]
1658                + a2update
1659                */
1660             addareg = ((a1_add && a1addx == 3) || a1update || a1fupdate
1661                   || (a2_add && a2addx == 3) || a2update ? true : false);
1662             /* The adders can be put into subtract mode in add pixel size
1663                mode when the corresponding flags are set */
1664             suba_x = ((a1_add && a1xsign && a1addx == 1) || (a2_add && a2xsign && a2addx == 1) ? true : false);
1665             suba_y = ((a1_add && a1addy && a1ysign) || (a2_add && a2addy && a2ysign) ? true : false);
1666             /* Address adder input B selection
1667                00	A1 pointer
1668                01	A2 pointer
1669                10	A1 fraction
1670                11	Zero
1671 
1672                Bit 1 =   a1fupdate
1673                + (a1_add . atick[0] . a1addx[0..1])
1674                + a1fupdate . a1_stepld
1675                + a1update . a1_stepld
1676                + a2update . a2_stepld
1677                Bit 0 =   a2update + a2_add
1678                + a1fupdate . a1_stepld
1679                + a1update . a1_stepld
1680                + a2update . a2_stepld
1681                */
1682             addbsel = (a2update || a2_add || (a1fupdate && a1_stepld)
1683                   || (a1update && a1_stepld) || (a2update && a2_stepld) ? 0x01 : 0x00);
1684             addbsel |= (a1fupdate || (a1_add && a1addx == 3) || (a1fupdate && a1_stepld)
1685                   || (a1update && a1_stepld) || (a2update && a2_stepld) ? 0x02 : 0x00);
1686 
1687             /* The modulo bits are used to align X onto a phrase boundary when
1688                it is being updated by one phrase
1689                000	no mask
1690                001	mask bit 0
1691                010	mask bits 1-0
1692                ..
1693                110  	mask bits 5-0
1694 
1695                Masking is enabled for a1 when a1addx[0..1] is 00, and the value
1696                is 6 - the pixel size (again!)
1697                */
1698             maska1 = (a1_add && a1addx == 0 ? 6 - a1_pixsize : 0);
1699             maska2 = (a2_add && a2addx == 0 ? 6 - a2_pixsize : 0);
1700             modx = (a2_add ? maska2 : maska1);
1701             /* Generate load strobes for the increment updates */
1702 
1703             /*A1pldt		:= NAN2 (a1pldt, atick[1], a1_add);
1704 A1ptrldi	:= NAN2 (a1ptrldi, a1update\, a1pldt);
1705 
1706 A1fldt		:= NAN4 (a1fldt, atick[0], a1_add, a1addx[0..1]);
1707 A1fracldi	:= NAN2 (a1fracldi, a1fupdate\, a1fldt);
1708 
1709 A2pldt		:= NAN2 (a2pldt, atick[1], a2_add);
1710 A2ptrldi	:= NAN2 (a2ptrldi, a2update\, a2pldt);*/
1711 
1712             a1fracldi = a1fupdate || (a1_add && a1addx == 3);
1713 
1714             // Some more from DCONTROL...
1715             // atick[] just MAY be important here! We're assuming it's true and dropping the term...
1716             // That will probably screw up some of the lower terms that seem to rely on the timing of it...
1717 //#warning srcdreadd is not properly initialized!
1718             srcdreadd = false;						// Set in INNER.NET
1719             //Shadeadd\	:= NAN2H (shadeadd\, dwrite, srcshade);
1720             //Shadeadd	:= INV2 (shadeadd, shadeadd\);
1721             shadeadd = dwrite && srcshade;
1722             /* Data adder control, input A selection
1723                000   Destination data
1724                001   Initialiser pixel value
1725                100   Source data      - computed intensity fraction
1726                101   Pattern data     - computed intensity
1727                110   Source zed 1     - computed zed
1728                111   Source zed 2     - computed zed fraction
1729 
1730                Bit 0 =   dwrite  . gourd . atick[1]
1731                + dzwrite . gourz . atick[0]
1732                + istepadd
1733                + zstepfadd
1734                + init_if + init_ii + init_zf + init_zi
1735                Bit 1 =   dzwrite . gourz . (atick[0] + atick[1])
1736                + zstepadd
1737                + zstepfadd
1738                Bit 2 =   (gourd + gourz) . /(init_if + init_ii + init_zf + init_zi)
1739                + dwrite  . srcshade
1740                */
1741             daddasel = ((dwrite && gourd) || (dzwrite && gourz) || istepadd || zstepfadd
1742                   || init_if || init_ii || init_zf || init_zi ? 0x01 : 0x00);
1743             daddasel |= ((dzwrite && gourz) || zstepadd || zstepfadd ? 0x02 : 0x00);
1744             daddasel |= (((gourd || gourz) && !(init_if || init_ii || init_zf || init_zi))
1745                   || (dwrite && srcshade) ? 0x04 : 0x00);
1746             /* Data adder control, input B selection
1747                0000	Source data
1748                0001	Data initialiser increment
1749                0100	Bottom 16 bits of I increment repeated four times
1750                0101	Top 16 bits of I increment repeated four times
1751                0110	Bottom 16 bits of Z increment repeated four times
1752                0111	Top 16 bits of Z increment repeated four times
1753                1100	Bottom 16 bits of I step repeated four times
1754                1101	Top 16 bits of I step repeated four times
1755                1110	Bottom 16 bits of Z step repeated four times
1756                1111	Top 16 bits of Z step repeated four times
1757 
1758                Bit 0 =   dwrite  . gourd . atick[1]
1759                + dzwrite . gourz . atick[1]
1760                + dwrite  . srcshade
1761                + istepadd
1762                + zstepadd
1763                + init_if + init_ii + init_zf + init_zi
1764                Bit 1 =   dzwrite . gourz . (atick[0] + atick[1])
1765                + zstepadd
1766                + zstepfadd
1767                Bit 2 =   dwrite  . gourd . (atick[0] + atick[1])
1768                + dzwrite . gourz . (atick[0] + atick[1])
1769                + dwrite  . srcshade
1770                + istepadd + istepfadd + zstepadd + zstepfadd
1771                Bit 3 =   istepadd + istepfadd + zstepadd + zstepfadd
1772                */
1773             daddbsel = ((dwrite && gourd) || (dzwrite && gourz) || (dwrite && srcshade)
1774                   || istepadd || zstepadd || init_if || init_ii || init_zf || init_zi ? 0x01 : 0x00);
1775             daddbsel |= ((dzwrite && gourz) || zstepadd || zstepfadd ? 0x02 : 0x00);
1776             daddbsel |= ((dwrite && gourd) || (dzwrite && gourz) || (dwrite && srcshade)
1777                   || istepadd || istepfadd || zstepadd || zstepfadd ? 0x04 : 0x00);
1778             daddbsel |= (istepadd && istepfadd && zstepadd && zstepfadd ? 0x08 : 0x00);
1779             /* Data adder mode control
1780                000	16-bit normal add
1781                001	16-bit saturating add with carry
1782                010	8-bit saturating add with carry, carry into top byte is
1783                inhibited (YCrCb)
1784                011	8-bit saturating add with carry, carry into top byte and
1785                between top nybbles is inhibited (CRY)
1786                100	16-bit normal add with carry
1787                101	16-bit saturating add
1788                110	8-bit saturating add, carry into top byte is inhibited
1789                111	8-bit saturating add, carry into top byte and between top
1790                nybbles is inhibited
1791 
1792                The first five are used for Gouraud calculations, the latter three
1793                for adding source and destination data
1794 
1795                Bit 0 =   dzwrite . gourz . atick[1]
1796                + dwrite  . gourd . atick[1] . /topnen . /topben . /ext_int
1797                + dwrite  . gourd . atick[1] .  topnen .  topben . /ext_int
1798                + zstepadd
1799                + istepadd . /topnen . /topben . /ext_int
1800                + istepadd .  topnen .  topben . /ext_int
1801                + /gourd . /gourz . /topnen . /topben
1802                + /gourd . /gourz .  topnen .  topben
1803                + shadeadd . /topnen . /topben
1804                + shadeadd .  topnen .  topben
1805                + init_ii . /topnen . /topben . /ext_int
1806                + init_ii .  topnen .  topben . /ext_int
1807                + init_zi
1808 
1809                Bit 1 =   dwrite . gourd . atick[1] . /topben . /ext_int
1810                + istepadd . /topben . /ext_int
1811                + /gourd . /gourz .  /topben
1812                + shadeadd .  /topben
1813                + init_ii .  /topben . /ext_int
1814 
1815                Bit 2 =   /gourd . /gourz
1816                + shadeadd
1817                + dwrite  . gourd . atick[1] . ext_int
1818                + istepadd . ext_int
1819                + init_ii . ext_int
1820                */
1821             daddmode = ((dzwrite && gourz) || (dwrite && gourd && !topnen && !topben && !ext_int)
1822                   || (dwrite && gourd && topnen && topben && !ext_int) || zstepadd
1823                   || (istepadd && !topnen && !topben && !ext_int)
1824                   || (istepadd && topnen && topben && !ext_int) || (!gourd && !gourz && !topnen && !topben)
1825                   || (!gourd && !gourz && topnen && topben) || (shadeadd && !topnen && !topben)
1826                   || (shadeadd && topnen && topben) || (init_ii && !topnen && !topben && !ext_int)
1827                   || (init_ii && topnen && topben && !ext_int) || init_zi ? 0x01 : 0x00);
1828             daddmode |= ((dwrite && gourd && !topben && !ext_int) || (istepadd && !topben && !ext_int)
1829                   || (!gourd && !gourz && !topben) || (shadeadd && !topben)
1830                   || (init_ii && !topben && !ext_int) ? 0x02 : 0x00);
1831             daddmode |= ((!gourd && !gourz) || shadeadd || (dwrite && gourd && ext_int)
1832                   || (istepadd && ext_int) || (init_ii && ext_int) ? 0x04 : 0x00);
1833 
1834             patfadd = (dwrite && gourd) || (istepfadd && !datinit) || init_if;
1835             patdadd = (dwrite && gourd) || (istepadd && !datinit) || init_ii;
1836             srcz1add = (dzwrite && gourz) || (zstepadd && !datinit) || init_zi;
1837             srcz2add = (dzwrite && gourz) || zstepfadd || init_zf;
1838             srcshadd = srcdreadd && srcshade;
1839             daddq_sel = patfadd || patdadd || srcz1add || srcz2add || srcshadd;
1840             /* Select write data
1841                This has to be controlled from stage 1 of the pipe-line, delayed
1842                by one tick, as the write occurs in the cycle after the ack.
1843 
1844                00	pattern data
1845                01	lfu data
1846                10	adder output
1847                11	source zed
1848 
1849                Bit 0 =  /patdsel . /adddsel
1850                + dzwrite1d
1851                Bit 1 =   adddsel
1852                + dzwrite1d
1853                */
1854 
1855             data_sel = ((!patdsel && !adddsel) || dzwrite ? 0x01 : 0x00)
1856                | (adddsel || dzwrite ? 0x02 : 0x00);
1857 
1858             ADDRGEN(&address, &pixAddr, gena2i, zaddr,
1859                   a1_x, a1_y, a1_base, a1_pitch, a1_pixsize, a1_width, a1_zoffset,
1860                   a2_x, a2_y, a2_base, a2_pitch, a2_pixsize, a2_width, a2_zoffset);
1861 
1862             //Here's my guess as to how the addresses get truncated to phrase boundaries in phrase mode...
1863             if (!justify)
1864                address &= 0xFFFFF8;
1865 
1866             /* Generate source alignment shift
1867                -------------------------------
1868                The source alignment shift for data move is the difference between
1869                the source and destination X pointers, multiplied by the pixel
1870                size.  Only the low six bits of the pointers are of interest, as
1871                pixel sizes are always a power of 2 and window rows are always
1872                phrase aligned.
1873 
1874                When not in phrase mode, the top 3 bits of the shift value are
1875                set to zero (2/26).
1876 
1877                Source shifting is also used to extract bits for bit-to-byte
1878                expansion in phrase mode.  This involves only the bottom three
1879                bits of the shift value, and is based on the offset within the
1880                phrase of the destination X pointer, in pixels.
1881 
1882                Source shifting is disabled when srcen is not set.
1883                */
1884 
1885             dstxp = (dsta2 ? a2_x : a1_x) & 0x3F;
1886             srcxp = (dsta2 ? a1_x : a2_x) & 0x3F;
1887             shftv = ((dstxp - srcxp) << pixsize) & 0x3F;
1888             /* The phrase mode alignment count is given by the phrase offset
1889                of the first pixel, for bit to byte expansion */
1890             pobb = 0;
1891 
1892             if (pixsize == 3)
1893                pobb = dstxp & 0x07;
1894             if (pixsize == 4)
1895                pobb = dstxp & 0x03;
1896             if (pixsize == 5)
1897                pobb = dstxp & 0x01;
1898 
1899             pobbsel = phrase_mode && bcompen;
1900             loshd = (pobbsel ? pobb : shftv) & 0x07;
1901             shfti = (srcen || pobbsel ? (sshftld ? loshd : srcshift & 0x07) : 0);
1902             /* Enable for high bits is srcen . phrase_mode */
1903             shfti |= (srcen && phrase_mode ? (sshftld ? shftv & 0x38 : srcshift & 0x38) : 0);
1904             srcshift = shfti;
1905 
1906             if (sreadx)
1907             {
1908                //uint32_t srcAddr, pixAddr;
1909                //ADDRGEN(srcAddr, pixAddr, gena2i, zaddr,
1910                //	a1_x, a1_y, a1_base, a1_pitch, a1_pixsize, a1_width, a1_zoffset,
1911                //	a2_x, a2_y, a2_base, a2_pitch, a2_pixsize, a2_width, a2_zoffset);
1912                srcd2 = srcd1;
1913                srcd1 = ((uint64_t)JaguarReadLong(address + 0, BLITTER) << 32)
1914                   | (uint64_t)JaguarReadLong(address + 4, BLITTER);
1915                //Kludge to take pixel size into account...
1916                //Hmm. If we're not in phrase mode, this is most likely NOT going to be used...
1917                //Actually, it would be--because of BCOMPEN expansion, for example...
1918                if (!phrase_mode)
1919                {
1920                   if (bcompen)
1921                      srcd1 >>= 56;
1922                   else
1923                   {
1924                      if (pixsize == 5)
1925                         srcd1 >>= 32;
1926                      else if (pixsize == 4)
1927                         srcd1 >>= 48;
1928                      else
1929                         srcd1 >>= 56;
1930                   }
1931                }//*/
1932             }
1933 
1934             if (szreadx)
1935             {
1936                srcz2 = srcz1;
1937                srcz1 = ((uint64_t)JaguarReadLong(address, BLITTER) << 32) | (uint64_t)JaguarReadLong(address + 4, BLITTER);
1938             }
1939 
1940             if (sread)
1941             {
1942                srcd2 = srcd1;
1943                srcd1 = ((uint64_t)JaguarReadLong(address, BLITTER) << 32) | (uint64_t)JaguarReadLong(address + 4, BLITTER);
1944                //Kludge to take pixel size into account...
1945                if (!phrase_mode)
1946                {
1947                   if (bcompen)
1948                      srcd1 >>= 56;
1949                   else
1950                   {
1951                      if (pixsize == 5)
1952                         srcd1 >>= 32;
1953                      else if (pixsize == 4)
1954                         srcd1 >>= 48;
1955                      else
1956                         srcd1 >>= 56;
1957                   }
1958                }
1959             }
1960 
1961             if (szread)
1962             {
1963                srcz2 = srcz1;
1964                srcz1 = ((uint64_t)JaguarReadLong(address, BLITTER) << 32) | (uint64_t)JaguarReadLong(address + 4, BLITTER);
1965                //Kludge to take pixel size into account... I believe that it only has to take 16BPP mode into account. Not sure tho.
1966                if (!phrase_mode && pixsize == 4)
1967                   srcz1 >>= 48;
1968 
1969             }
1970 
1971             if (dread)
1972             {
1973                dstd = ((uint64_t)JaguarReadLong(address, BLITTER) << 32) | (uint64_t)JaguarReadLong(address + 4, BLITTER);
1974                //Kludge to take pixel size into account...
1975                if (!phrase_mode)
1976                {
1977                   if (pixsize == 5)
1978                      dstd >>= 32;
1979                   else if (pixsize == 4)
1980                      dstd >>= 48;
1981                   else
1982                      dstd >>= 56;
1983                }
1984             }
1985 
1986             if (dzread)
1987             {
1988                // Is Z always 64 bit read? Or sometimes 16 bit (dependent on phrase_mode)?
1989                dstz = ((uint64_t)JaguarReadLong(address, BLITTER) << 32) | (uint64_t)JaguarReadLong(address + 4, BLITTER);
1990                //Kludge to take pixel size into account... I believe that it only has to take 16BPP mode into account. Not sure tho.
1991                if (!phrase_mode && pixsize == 4)
1992                   dstz >>= 48;
1993 
1994             }
1995 
1996             // These vars should probably go further up in the code... !!! FIX !!!
1997             // We can't preassign these unless they're static...
1998             //NOTE: SRCSHADE requires GOURZ to be set to work properly--another Jaguar I bug
1999             if (dwrite)
2000             {
2001                //Counter is done on the dwrite state...! (We'll do it first, since it affects dstart/dend calculations.)
2002                //Here's the voodoo for figuring the correct amount of pixels in phrase mode (or not):
2003                int8_t inct = -((dsta2 ? a2_x : a1_x) & 0x07);	// From INNER_CNT
2004                uint8_t inc = 0;
2005                uint16_t oldicount;
2006                uint8_t dstart = 0;
2007 
2008                inc = (!phrase_mode || (phrase_mode && (inct & 0x01)) ? 0x01 : 0x00);
2009                inc |= (phrase_mode && (((pixsize == 3 || pixsize == 4) && (inct & 0x02)) || (pixsize == 5 && !(inct & 0x01))) ? 0x02 : 0x00);
2010                inc |= (phrase_mode && ((pixsize == 3 && (inct & 0x04)) || (pixsize == 4 && !(inct & 0x03))) ? 0x04 : 0x00);
2011                inc |= (phrase_mode && pixsize == 3 && !(inct & 0x07) ? 0x08 : 0x00);
2012 
2013                oldicount = icount;	// Save icount to detect underflow...
2014                icount -= inc;
2015 
2016                if (icount == 0 || ((icount & 0x8000) && !(oldicount & 0x8000)))
2017                   inner0 = true;
2018                // X/Y stepping is also done here, I think...No. It's done when a1_add or a2_add is asserted...
2019 
2020                //*********************************************************************************
2021                //Start & end write mask computations...
2022                //*********************************************************************************
2023 
2024 
2025                if (pixsize == 3)
2026                   dstart = (dstxp & 0x07) << 3;
2027                if (pixsize == 4)
2028                   dstart = (dstxp & 0x03) << 4;
2029                if (pixsize == 5)
2030                   dstart = (dstxp & 0x01) << 5;
2031 
2032                dstart = (phrase_mode ? dstart : pixAddr & 0x07);
2033 
2034                //This is the other Jaguar I bug... Normally, should ALWAYS select a1_x here.
2035                dstxwr = (dsta2 ? a2_x : a1_x) & 0x7FFE;
2036                pseq = dstxwr ^ (a1_win_x & 0x7FFE);
2037                pseq = (pixsize == 5 ? pseq : pseq & 0x7FFC);
2038                pseq = ((pixsize & 0x06) == 4 ? pseq : pseq & 0x7FF8);
2039                penden = clip_a1 && (pseq == 0);
2040                window_mask = 0;
2041 
2042                if (pixsize == 3)
2043                   window_mask = (a1_win_x & 0x07) << 3;
2044                if (pixsize == 4)
2045                   window_mask = (a1_win_x & 0x03) << 4;
2046                if (pixsize == 5)
2047                   window_mask = (a1_win_x & 0x01) << 5;
2048 
2049                window_mask = (penden ? window_mask : 0);
2050 
2051                /* The mask to be used if within one phrase of the end of the inner
2052                   loop, similarly */
2053 
2054                if (pixsize == 3)
2055                   inner_mask = (icount & 0x07) << 3;
2056                if (pixsize == 4)
2057                   inner_mask = (icount & 0x03) << 4;
2058                if (pixsize == 5)
2059                   inner_mask = (icount & 0x01) << 5;
2060                if (!inner0)
2061                   inner_mask = 0;
2062                /* The actual mask used should be the lesser of the window masks and
2063                   the inner mask, where is all cases 000 means 1000. */
2064                window_mask = (window_mask == 0 ? 0x40 : window_mask);
2065                inner_mask = (inner_mask == 0 ? 0x40 : inner_mask);
2066 
2067                emask = (window_mask > inner_mask ? inner_mask : window_mask);
2068                /* The mask to be used for the pixel size, to which must be added
2069                   the bit offset */
2070                pma = pixAddr + (1 << pixsize);
2071                /* Select the mask */
2072                dend = (phrase_mode ? emask : pma);
2073 
2074                /* The cycle width in phrase mode is normally one phrase.  However,
2075                   at the start and end it may be narrower.  The start and end masks
2076                   are used to generate this.  The width is given by:
2077 
2078                   8 - start mask - (8 - end mask)
2079                   =	end mask - start mask
2080 
2081                   This is only used for writes in phrase mode.
2082                   Start and end from the address level of the pipeline are used.
2083                   */
2084 
2085                //More testing... This is almost certainly wrong, but how else does this work???
2086                //Seems to kinda work... But still, this doesn't seem to make any sense!
2087                if (phrase_mode && !dsten)
2088                   dstd = ((uint64_t)JaguarReadLong(address, BLITTER) << 32) | (uint64_t)JaguarReadLong(address + 4, BLITTER);
2089 
2090                //Testing only... for now...
2091                //This is wrong because the write data is a combination of srcd and dstd--either run
2092                //thru the LFU or in PATDSEL or ADDDSEL mode. [DONE now, thru DATA module]
2093                // Precedence is ADDDSEL > PATDSEL > LFU.
2094                //Also, doesn't take into account the start & end masks, or the phrase width...
2095                //Now it does!
2096 
2097                // srcd2 = xxxx xxxx 0123 4567, srcd = 8901 2345 xxxx xxxx, srcshift = $20 (32)
2098                srcd = (srcd2 << (64 - srcshift)) | (srcd1 >> srcshift);
2099                //bleh, ugly ugly ugly
2100                if (srcshift == 0)
2101                   srcd = srcd1;
2102 
2103                //NOTE: This only works with pixel sizes less than 8BPP...
2104                //DOUBLE NOTE: Still need to do regression testing to ensure that this doesn't break other stuff... !!! CHECK !!!
2105                if (!phrase_mode && srcshift != 0)
2106                   srcd = ((srcd2 & 0xFF) << (8 - srcshift)) | ((srcd1 & 0xFF) >> srcshift);
2107 
2108                //Z DATA() stuff done here... And it has to be done before any Z shifting...
2109                //Note that we need to have phrase mode start/end support here... (Not since we moved it from dzwrite...!)
2110                /*
2111                   Here are a couple of Cybermorph blits with Z:
2112                   $00113078	// DSTEN DSTENZ DSTWRZ CLIP_A1 GOURD GOURZ PATDSEL ZMODE=4
2113                   $09900F39	// SRCEN DSTEN DSTENZ DSTWRZ UPDA1 UPDA1F UPDA2 DSTA2 ZMODE=4 LFUFUNC=C DCOMPEN
2114 
2115                   We're having the same phrase mode overwrite problem we had with the pixels... !!! FIX !!!
2116                   Odd. It's equating 0 with 0... Even though ZMODE is $04 (less than)!
2117                   */
2118                if (gourz)
2119                {
2120                   uint16_t addq[4];
2121                   uint8_t initcin[4] = { 0, 0, 0, 0 };
2122                   ADDARRAY(addq, 7/*daddasel*/, 6/*daddbsel*/, 0/*daddmode*/, 0, 0, initcin, 0, 0, 0, 0, 0, srcz1, srcz2, zinc, 0);
2123                   srcz2 = ((uint64_t)addq[3] << 48) | ((uint64_t)addq[2] << 32) | ((uint64_t)addq[1] << 16) | (uint64_t)addq[0];
2124                   ADDARRAY(addq, 6/*daddasel*/, 7/*daddbsel*/, 1/*daddmode*/, 0, 0, initcin, 0, 0, 0, 0, 0, srcz1, srcz2, zinc, 0);
2125                   srcz1 = ((uint64_t)addq[3] << 48) | ((uint64_t)addq[2] << 32) | ((uint64_t)addq[1] << 16) | (uint64_t)addq[0];
2126 
2127                }
2128 
2129                zSrcShift = srcshift & 0x30;
2130                srcz = (srcz2 << (64 - zSrcShift)) | (srcz1 >> zSrcShift);
2131                //bleh, ugly ugly ugly
2132                if (zSrcShift == 0)
2133                   srcz = srcz1;
2134 
2135 
2136                //When in SRCSHADE mode, it adds the IINC to the read source (from LFU???)
2137                //According to following line, it gets LFU mode. But does it feed the source into the LFU
2138                //after the add?
2139                //Dest write address/pix address: 0014E83E/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=4 dabs=5 dam=7 ds=1 daq=F] [0000000000006505] (icount=003F, inc=1)
2140                //Let's try this:
2141                if (srcshade)
2142                {
2143                   //NOTE: This is basically doubling the work done by DATA--since this is what
2144                   //      ADDARRAY is loaded with when srschshade is enabled... !!! FIX !!!
2145                   //      Also note that it doesn't work properly unless GOURZ is set--there's the clue!
2146                   uint16_t addq[4];
2147                   uint8_t initcin[4] = { 0, 0, 0, 0 };
2148                   ADDARRAY(addq, 4/*daddasel*/, 5/*daddbsel*/, 7/*daddmode*/, dstd, iinc, initcin, 0, 0, 0, patd, srcd, 0, 0, 0, 0);
2149                   srcd = ((uint64_t)addq[3] << 48) | ((uint64_t)addq[2] << 32) | ((uint64_t)addq[1] << 16) | (uint64_t)addq[0];
2150                }
2151                //Seems to work... Not 100% sure tho.
2152                //end try this
2153 
2154                //Temporary kludge, to see if the fractional pattern does anything...
2155                //This works, BTW
2156                //But it seems to mess up in Cybermorph... the shading should be smooth but it isn't...
2157                //Seems the carry out is lost again... !!! FIX !!! [DONE--see below]
2158                if (patfadd)
2159                {
2160                   uint16_t addq[4];
2161                   uint8_t initcin[4] = { 0, 0, 0, 0 };
2162                   ADDARRAY(addq, 4/*daddasel*/, 4/*daddbsel*/, 0/*daddmode*/, dstd, iinc, initcin, 0, 0, 0, patd, srcd, 0, 0, 0, 0);
2163                   srcd1 = ((uint64_t)addq[3] << 48) | ((uint64_t)addq[2] << 32) | ((uint64_t)addq[1] << 16) | (uint64_t)addq[0];
2164                }
2165 
2166                //Note that we still don't take atick[0] & [1] into account here, so this will skip half of the data needed... !!! FIX !!!
2167                //Not yet enumerated: dbinh, srcdread, srczread
2168                //Also, should do srcshift on the z value in phrase mode... !!! FIX !!! [DONE]
2169                //As well as add a srcz variable we can set external to this state... !!! FIX !!! [DONE]
2170 
2171                DATA(&wdata, &dcomp, &zcomp, &winhibit,
2172                      true, cmpdst, daddasel, daddbsel, daddmode, daddq_sel, data_sel, 0/*dbinh*/,
2173                      dend, dstart, dstd, iinc, lfufunc, &patd, patdadd,
2174                      phrase_mode, srcd, false/*srcdread*/, false/*srczread*/, srcz2add, zmode,
2175                      bcompen, bkgwren, dcompen, icount & 0x07, pixsize,
2176                      &srcz, dstz, zinc);
2177 
2178                /*
2179                   DEF ADDRCOMP (
2180                   a1_outside	// A1 pointer is outside window bounds
2181                   :OUT;
2182                   INT16/	a1_x
2183                   INT16/	a1_y
2184                   INT15/	a1_win_x
2185                   INT15/	a1_win_y
2186                   :IN);
2187                   BEGIN
2188 
2189                // The address is outside if negative, or if greater than or equal
2190                // to the window size
2191 
2192 A1_xcomp	:= MAG_15 (a1xgr, a1xeq, a1xlt, a1_x{0..14}, a1_win_x{0..14});
2193 A1_ycomp	:= MAG_15 (a1ygr, a1yeq, a1ylt, a1_y{0..14}, a1_win_y{0..14});
2194 A1_outside	:= OR6 (a1_outside, a1_x{15}, a1xgr, a1xeq, a1_y{15}, a1ygr, a1yeq);
2195 */
2196                //NOTE: There seems to be an off-by-one bug here in the clip_a1 section... !!! FIX !!!
2197                //      Actually, seems to be related to phrase mode writes...
2198                //      Or is it? Could be related to non-15-bit compares as above?
2199                if (clip_a1 && ((a1_x & 0x8000) || (a1_y & 0x8000) || (a1_x >= a1_win_x) || (a1_y >= a1_win_y)))
2200                   winhibit = true;
2201 
2202                if (!winhibit)
2203                {
2204                   if (phrase_mode)
2205                   {
2206                      JaguarWriteLong(address + 0, wdata >> 32, BLITTER);
2207                      JaguarWriteLong(address + 4, wdata & 0xFFFFFFFF, BLITTER);
2208                   }
2209                   else
2210                   {
2211                      if (pixsize == 5)
2212                         JaguarWriteLong(address, wdata & 0xFFFFFFFF, BLITTER);
2213                      else if (pixsize == 4)
2214                         JaguarWriteWord(address, wdata & 0x0000FFFF, BLITTER);
2215                      else
2216                         JaguarWriteByte(address, wdata & 0x000000FF, BLITTER);
2217                   }
2218                }
2219 
2220             }
2221 
2222             if (dzwrite)
2223             {
2224                // OK, here's the big insight: When NOT in GOURZ mode, srcz1 & 2 function EXACTLY the same way that
2225                // srcd1 & 2 work--there's an implicit shift from srcz1 to srcz2 whenever srcz1 is read.
2226                // OTHERWISE, srcz1 is the integer for the computed Z and srcz2 is the fractional part.
2227                // Writes to srcz1 & 2 follow the same pattern as the other 64-bit registers--low 32 at the low address,
2228                // high 32 at the high address (little endian!).
2229                // NOTE: GOURZ is still not properly supported. Check patd/patf handling...
2230                //       Phrase mode start/end masks are not properly supported either...
2231                //This is not correct... !!! FIX !!!
2232                //Should be OK now... We'll see...
2233                //Nope. Having the same starstep write problems in phrase mode as we had with pixels... !!! FIX !!!
2234                //This is not causing the problem in Hover Strike... :-/
2235                //The problem was with the SREADX not shifting. Still problems with Z comparisons & other text in pregame screen...
2236                if (!winhibit)
2237                {
2238                   if (phrase_mode)
2239                   {
2240                      JaguarWriteLong(address + 0, srcz >> 32, BLITTER);
2241                      JaguarWriteLong(address + 4, srcz & 0xFFFFFFFF, BLITTER);
2242                   }
2243                   else
2244                   {
2245                      if (pixsize == 4)
2246                         JaguarWriteWord(address, srcz & 0x0000FFFF, BLITTER);
2247                   }
2248                }//*/
2249             }
2250 
2251 
2252             if (a1_add)
2253             {
2254                int16_t adda_x, adda_y, addb_x, addb_y, addq_x, addq_y;
2255                ADDAMUX(&adda_x, &adda_y, addasel, a1_step_x, a1_step_y, a1_stepf_x, a1_stepf_y, a2_step_x, a2_step_y,
2256                      a1_inc_x, a1_inc_y, a1_incf_x, a1_incf_y, adda_xconst, adda_yconst, addareg, suba_x, suba_y);
2257                ADDBMUX(&addb_x, &addb_y, addbsel, a1_x, a1_y, a2_x, a2_y, a1_frac_x, a1_frac_y);
2258                ADDRADD(&addq_x, &addq_y, a1fracldi, adda_x, adda_y, addb_x, addb_y, modx, suba_x, suba_y);
2259 
2260                //Now, write to what???
2261                //a2ptrld comes from a2ptrldi...
2262                //I believe it's addbsel that determines the writeback...
2263                // This is where atick[0] & [1] come in, in determining which part (fractional, integer)
2264                // gets written to...
2265                //a1_x = addq_x;
2266                //a1_y = addq_y;
2267                //Kludge, to get A1 channel increment working...
2268                if (a1addx == 3)
2269                {
2270                   a1_frac_x = addq_x, a1_frac_y = addq_y;
2271 
2272                   addasel = 2, addbsel = 0, a1fracldi = false;
2273                   ADDAMUX(&adda_x, &adda_y, addasel, a1_step_x, a1_step_y, a1_stepf_x, a1_stepf_y, a2_step_x, a2_step_y,
2274                         a1_inc_x, a1_inc_y, a1_incf_x, a1_incf_y, adda_xconst, adda_yconst, addareg, suba_x, suba_y);
2275                   ADDBMUX(&addb_x,&addb_y, addbsel, a1_x, a1_y, a2_x, a2_y, a1_frac_x, a1_frac_y);
2276                   ADDRADD(&addq_x, &addq_y, a1fracldi, adda_x, adda_y, addb_x, addb_y, modx, suba_x, suba_y);
2277 
2278                   a1_x = addq_x, a1_y = addq_y;
2279                }
2280                else
2281                   a1_x = addq_x, a1_y = addq_y;
2282             }
2283 
2284             if (a2_add)
2285             {
2286                int16_t adda_x, adda_y, addb_x, addb_y, addq_x, addq_y;
2287                ADDAMUX(&adda_x, &adda_y, addasel, a1_step_x, a1_step_y, a1_stepf_x, a1_stepf_y, a2_step_x, a2_step_y,
2288                      a1_inc_x, a1_inc_y, a1_incf_x, a1_incf_y, adda_xconst, adda_yconst, addareg, suba_x, suba_y);
2289                ADDBMUX(&addb_x, &addb_y, addbsel, a1_x, a1_y, a2_x, a2_y, a1_frac_x, a1_frac_y);
2290                ADDRADD(&addq_x, &addq_y, a1fracldi, adda_x, adda_y, addb_x, addb_y, modx, suba_x, suba_y);
2291 
2292                //Now, write to what???
2293                //a2ptrld comes from a2ptrldi...
2294                //I believe it's addbsel that determines the writeback...
2295                a2_x = addq_x;
2296                a2_y = addq_y;
2297             }
2298          }
2299 
2300          indone = true;
2301          // The outer counter is updated here as well on the clock cycle...
2302 
2303          /* the inner loop is started whenever another state is about to
2304             cause the inner state to go active */
2305          //Instart		:= ND7 (instart, innert[0], innert[2..7]);
2306 
2307          //Actually, it's done only when inner gets asserted without the 2nd line of conditions
2308          //(inner AND !indone)
2309          //fixed now...
2310          //Since we don't get here until the inner loop is finished (indone = true) we can get
2311          //away with doing it here...!
2312          ocount--;
2313 
2314          if (ocount == 0)
2315             outer0 = true;
2316       }
2317 
2318       if (a1fupdate)
2319       {
2320          uint32_t a1_frac_xt = (uint32_t)a1_frac_x + (uint32_t)a1_stepf_x;
2321          uint32_t a1_frac_yt = (uint32_t)a1_frac_y + (uint32_t)a1_stepf_y;
2322          a1FracCInX = a1_frac_xt >> 16;
2323          a1FracCInY = a1_frac_yt >> 16;
2324          a1_frac_x = (uint16_t)(a1_frac_xt & 0xFFFF);
2325          a1_frac_y = (uint16_t)(a1_frac_yt & 0xFFFF);
2326       }
2327 
2328       if (a1update)
2329       {
2330          a1_x += a1_step_x + a1FracCInX;
2331          a1_y += a1_step_y + a1FracCInY;
2332       }
2333 
2334       if (a2update)
2335       {
2336          a2_x += a2_step_x;
2337          a2_y += a2_step_y;
2338       }
2339    }
2340 
2341    // We never get here! !!! FIX !!!
2342 
2343 
2344    // Write values back to registers (in real blitter, these are continuously updated)
2345    SET16(blitter_ram, A1_PIXEL + 2, a1_x);
2346    SET16(blitter_ram, A1_PIXEL + 0, a1_y);
2347    SET16(blitter_ram, A1_FPIXEL + 2, a1_frac_x);
2348    SET16(blitter_ram, A1_FPIXEL + 0, a1_frac_y);
2349    SET16(blitter_ram, A2_PIXEL + 2, a2_x);
2350    SET16(blitter_ram, A2_PIXEL + 0, a2_y);
2351 
2352 }
2353 
2354 // Various pieces of the blitter puzzle are teased out here...
2355 
ADDRGEN(uint32_t * address,uint32_t * pixa,bool gena2,bool zaddr,uint16_t a1_x,uint16_t a1_y,uint32_t a1_base,uint8_t a1_pitch,uint8_t a1_pixsize,uint8_t a1_width,uint8_t a1_zoffset,uint16_t a2_x,uint16_t a2_y,uint32_t a2_base,uint8_t a2_pitch,uint8_t a2_pixsize,uint8_t a2_width,uint8_t a2_zoffset)2356 void ADDRGEN(uint32_t *address, uint32_t *pixa, bool gena2, bool zaddr,
2357 	uint16_t a1_x, uint16_t a1_y, uint32_t a1_base, uint8_t a1_pitch, uint8_t a1_pixsize, uint8_t a1_width, uint8_t a1_zoffset,
2358 	uint16_t a2_x, uint16_t a2_y, uint32_t a2_base, uint8_t a2_pitch, uint8_t a2_pixsize, uint8_t a2_width, uint8_t a2_zoffset)
2359 {
2360 	uint16_t x = (gena2 ? a2_x : a1_x) & 0xFFFF;	// Actually uses all 16 bits to generate address...!
2361 	uint16_t y = (gena2 ? a2_y : a1_y) & 0x0FFF;
2362 	uint8_t width = (gena2 ? a2_width : a1_width);
2363 	uint8_t pixsize = (gena2 ? a2_pixsize : a1_pixsize);
2364 	uint8_t pitch = (gena2 ? a2_pitch : a1_pitch);
2365 	uint32_t base = (gena2 ? a2_base : a1_base) >> 3;//Only upper 21 bits are passed around the bus? Seems like it...
2366 	uint8_t zoffset = (gena2 ? a2_zoffset : a1_zoffset);
2367 
2368 	uint32_t ytm = ((uint32_t)y << 2) + ((width & 0x02) ? (uint32_t)y << 1 : 0) + ((width & 0x01) ? (uint32_t)y : 0);
2369 
2370 	uint32_t ya = (ytm << (width >> 2)) >> 2;
2371 
2372 	uint32_t pa = ya + x;
2373    uint8_t pt, za;
2374    uint32_t phradr, shup, addr;
2375 
2376 	*pixa = pa << pixsize;
2377 
2378 	pt = ((pitch & 0x01) && !(pitch & 0x02) ? 0x01 : 0x00)
2379 		| (!(pitch & 0x01) && (pitch & 0x02) ? 0x02 : 0x00);
2380 	phradr = (*pixa >> 6) << pt;
2381 	shup = (pitch == 0x03 ? (*pixa >> 6) : 0);
2382 
2383 	za = (zaddr ? zoffset : 0) & 0x03;
2384 	addr = za + phradr + (shup << 1) + base;
2385 	*address = ((*pixa & 0x38) >> 3) | ((addr & 0x1FFFFF) << 3);
2386 	*pixa &= 0x07;
2387 }
2388 
2389 ////////////////////////////////////////////////////////////////////////////////////////////
2390 ////////////////////////////////////////////////////////////////////////////////////////////
2391 // Here's an important bit: The source data adder logic. Need to track down the inputs!!! //
2392 ////////////////////////////////////////////////////////////////////////////////////////////
2393 ////////////////////////////////////////////////////////////////////////////////////////////
2394 
ADDARRAY(uint16_t * addq,uint8_t daddasel,uint8_t daddbsel,uint8_t daddmode,uint64_t dstd,uint32_t iinc,uint8_t initcin[],uint64_t initinc,uint16_t initpix,uint32_t istep,uint64_t patd,uint64_t srcd,uint64_t srcz1,uint64_t srcz2,uint32_t zinc,uint32_t zstep)2395 void ADDARRAY(uint16_t * addq, uint8_t daddasel, uint8_t daddbsel, uint8_t daddmode,
2396 	uint64_t dstd, uint32_t iinc, uint8_t initcin[], uint64_t initinc, uint16_t initpix,
2397 	uint32_t istep, uint64_t patd, uint64_t srcd, uint64_t srcz1, uint64_t srcz2,
2398 	uint32_t zinc, uint32_t zstep)
2399 {
2400    unsigned i;
2401    uint16_t adda[4];
2402    uint16_t wordmux[8];
2403    uint16_t addb[4];
2404    uint16_t word;
2405    bool dbsel2, iincsel;
2406    uint32_t initpix2 = ((uint32_t)initpix << 16) | initpix;
2407    uint32_t addalo[8], addahi[8];
2408    uint8_t cinsel;
2409    static uint8_t co[4];//These are preserved between calls...
2410    uint8_t cin[4];
2411    bool eightbit;
2412    bool sat, hicinh;
2413 
2414    addalo[0] = dstd & 0xFFFFFFFF;
2415    addalo[1] = initpix2;
2416    addalo[2] = 0;
2417    addalo[3] = 0;
2418    addalo[4] = srcd & 0xFFFFFFFF;
2419    addalo[5] = patd & 0xFFFFFFFF;
2420    addalo[6] = srcz1 & 0xFFFFFFFF;
2421    addalo[7] = srcz2 & 0xFFFFFFFF;
2422    addahi[0] = dstd >> 32;
2423    addahi[1] = initpix2;
2424    addahi[2] = 0;
2425    addahi[3] = 0;
2426    addahi[4] = srcd >> 32;
2427    addahi[5] = patd >> 32;
2428    addahi[6] = srcz1 >> 32;
2429    addahi[7] = srcz2 >> 32;
2430    adda[0] = addalo[daddasel] & 0xFFFF;
2431    adda[1] = addalo[daddasel] >> 16;
2432    adda[2] = addahi[daddasel] & 0xFFFF;
2433    adda[3] = addahi[daddasel] >> 16;
2434 
2435    wordmux[0] = iinc & 0xFFFF;
2436    wordmux[1] = iinc >> 16;
2437    wordmux[2] = zinc & 0xFFFF;
2438    wordmux[3] = zinc >> 16;;
2439    wordmux[4] = istep & 0xFFFF;
2440    wordmux[5] = istep >> 16;;
2441    wordmux[6] = zstep & 0xFFFF;
2442    wordmux[7] = zstep >> 16;;
2443    word = wordmux[((daddbsel & 0x08) >> 1) | (daddbsel & 0x03)];
2444    dbsel2 = daddbsel & 0x04;
2445    iincsel = (daddbsel & 0x01) && !(daddbsel & 0x04);
2446 
2447    if (!dbsel2 && !iincsel)
2448       addb[0] = srcd & 0xFFFF,
2449          addb[1] = (srcd >> 16) & 0xFFFF,
2450          addb[2] = (srcd >> 32) & 0xFFFF,
2451          addb[3] = (srcd >> 48) & 0xFFFF;
2452    else if (dbsel2 && !iincsel)
2453       addb[0] = addb[1] = addb[2] = addb[3] = word;
2454    else if (!dbsel2 && iincsel)
2455       addb[0] = initinc & 0xFFFF,
2456          addb[1] = (initinc >> 16) & 0xFFFF,
2457          addb[2] = (initinc >> 32) & 0xFFFF,
2458          addb[3] = (initinc >> 48) & 0xFFFF;
2459    else
2460       addb[0] = addb[1] = addb[2] = addb[3] = 0;
2461 
2462 
2463    cinsel = (daddmode >= 1 && daddmode <= 4 ? 1 : 0);
2464 
2465    for(i = 0; i < 4; i++)
2466       cin[i] = initcin[i] | (co[i] & cinsel);
2467 
2468    eightbit = daddmode & 0x02;
2469    sat = daddmode & 0x03;
2470    hicinh = ((daddmode & 0x03) == 0x03);
2471 
2472    //Note that the carry out is saved between calls to this function...
2473    for( i=0; i<4; i++)
2474       ADD16SAT(&addq[i], &co[i], adda[i], addb[i], cin[i], sat, eightbit, hicinh);
2475 }
2476 
2477 
ADD16SAT(uint16_t * r,uint8_t * co,uint16_t a,uint16_t b,uint8_t cin,bool sat,bool eightbit,bool hicinh)2478 void ADD16SAT(uint16_t *r, uint8_t *co, uint16_t a, uint16_t b, uint8_t cin, bool sat, bool eightbit, bool hicinh)
2479 {
2480 	uint8_t carry[4];
2481    uint8_t btop, ctop;
2482    bool saturate, hisaturate;
2483 	uint32_t qt   = (a & 0xFF) + (b & 0xFF) + cin;
2484 	uint16_t q    = qt & 0x00FF;
2485 
2486 	carry[0]      = ((qt & 0x0100) ? 1 : 0);
2487 	carry[1]      = (carry[0] && !eightbit ? carry[0] : 0);
2488 	qt            = (a & 0x0F00) + (b & 0x0F00) + (carry[1] << 8);
2489 	carry[2]      = ((qt & 0x1000) ? 1 : 0);
2490 	q            |= qt & 0x0F00;
2491 	carry[3]      = (carry[2] && !hicinh ? carry[2] : 0);
2492 	qt            = (a & 0xF000) + (b & 0xF000) + (carry[3] << 12);
2493 	*co            = ((qt & 0x10000) ? 1 : 0);
2494 	q            |= qt & 0xF000;
2495 
2496 	btop  = (eightbit ? (b & 0x0080) >> 7 : (b & 0x8000) >> 15);
2497 	ctop  = (eightbit ? carry[0] : *co);
2498 
2499 	saturate = sat && (btop ^ ctop);
2500 	hisaturate = saturate && !eightbit;
2501 
2502 	*r = (saturate ? (ctop ? 0x00FF : 0x0000) : q & 0x00FF);
2503 	*r |= (hisaturate ? (ctop ? 0xFF00 : 0x0000) : q & 0xFF00);
2504 }
2505 
ADDAMUX(int16_t * adda_x,int16_t * adda_y,uint8_t addasel,int16_t a1_step_x,int16_t a1_step_y,int16_t a1_stepf_x,int16_t a1_stepf_y,int16_t a2_step_x,int16_t a2_step_y,int16_t a1_inc_x,int16_t a1_inc_y,int16_t a1_incf_x,int16_t a1_incf_y,uint8_t adda_xconst,bool adda_yconst,bool addareg,bool suba_x,bool suba_y)2506 void ADDAMUX(int16_t *adda_x, int16_t *adda_y, uint8_t addasel, int16_t a1_step_x, int16_t a1_step_y,
2507 	int16_t a1_stepf_x, int16_t a1_stepf_y, int16_t a2_step_x, int16_t a2_step_y,
2508 	int16_t a1_inc_x, int16_t a1_inc_y, int16_t a1_incf_x, int16_t a1_incf_y, uint8_t adda_xconst,
2509 	bool adda_yconst, bool addareg, bool suba_x, bool suba_y)
2510 {
2511 
2512    int16_t addar_x, addar_y, addac_x, addac_y, addas_x, addas_y;
2513 	int16_t xterm[4], yterm[4];
2514 	xterm[0] = a1_step_x, xterm[1] = a1_stepf_x, xterm[2] = a1_inc_x, xterm[3] = a1_incf_x;
2515 	yterm[0] = a1_step_y, yterm[1] = a1_stepf_y, yterm[2] = a1_inc_y, yterm[3] = a1_incf_y;
2516 	addar_x = ((addasel & 0x04) ? a2_step_x : xterm[addasel & 0x03]);
2517 	addar_y = ((addasel & 0x04) ? a2_step_y : yterm[addasel & 0x03]);
2518 //////////////////////////////////////////////////////////////////////////////////////
2519 
2520 /* Generate a constant value - this is a power of 2 in the range
2521 0-64, or zero.  The control bits are adda_xconst[0..2], when they
2522 are all 1  the result is 0.
2523 Constants for Y can only be 0 or 1 */
2524 
2525 /*Addac_xlo	:= D38H (addac_x[0..6], unused[0], adda_xconst[0..2]);
2526 Unused[0]	:= DUMMY (unused[0]);
2527 
2528 Addac_x		:= JOIN (addac_x, addac_x[0..6], zero, zero, zero, zero, zero, zero, zero, zero, zero);
2529 Addac_y		:= JOIN (addac_y, adda_yconst, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero,
2530 			zero, zero, zero, zero, zero);*/
2531 ////////////////////////////////////// C++ CODE //////////////////////////////////////
2532 	addac_x = (adda_xconst == 0x07 ? 0 : 1 << adda_xconst);
2533 	addac_y = (adda_yconst ? 0x01 : 0);
2534 //////////////////////////////////////////////////////////////////////////////////////
2535 
2536 /* Select between constant value and register value */
2537 
2538 /*Addas_x		:= MX2 (addas_x, addac_x, addar_x, addareg);
2539 Addas_y		:= MX2 (addas_y, addac_y, addar_y, addareg);*/
2540 ////////////////////////////////////// C++ CODE //////////////////////////////////////
2541 	addas_x = (addareg ? addar_x : addac_x);
2542 	addas_y = (addareg ? addar_y : addac_y);
2543 //////////////////////////////////////////////////////////////////////////////////////
2544 
2545 /* Complement these values (complement flag gives adder carry in)*/
2546 
2547 /*Suba_x16	:= JOIN (suba_x16, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x,
2548 			suba_x, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x);
2549 Suba_y16	:= JOIN (suba_y16, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y,
2550 			suba_y, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y);
2551 Adda_x		:= EO (adda_x, suba_x16, addas_x);
2552 Adda_y		:= EO (adda_y, suba_y16, addas_y);*/
2553 ////////////////////////////////////// C++ CODE //////////////////////////////////////
2554 	*adda_x = addas_x ^ (suba_x ? 0xFFFF : 0x0000);
2555 	*adda_y = addas_y ^ (suba_y ? 0xFFFF : 0x0000);
2556 //////////////////////////////////////////////////////////////////////////////////////
2557 
2558 //END;
2559 }
2560 
2561 
2562 /**  ADDBMUX - Address adder input B selection  *******************
2563 
2564 This module selects the register to be updated by the address
2565 adder.  This can be one of three registers, the A1 and A2
2566 pointers, or the A1 fractional part. It can also be zero, so that the step
2567 registers load directly into the pointers.
2568 */
2569 
2570 /*DEF ADDBMUX (
2571 INT16/	addb_x
2572 INT16/	addb_y
2573 	:OUT;
2574 	addbsel[0..1]
2575 INT16/	a1_x
2576 INT16/	a1_y
2577 INT16/	a2_x
2578 INT16/	a2_y
2579 INT16/	a1_frac_x
2580 INT16/	a1_frac_y
2581 	:IN);
2582 INT16/	zero16 :LOCAL;
2583 BEGIN*/
ADDBMUX(int16_t * addb_x,int16_t * addb_y,uint8_t addbsel,int16_t a1_x,int16_t a1_y,int16_t a2_x,int16_t a2_y,int16_t a1_frac_x,int16_t a1_frac_y)2584 void ADDBMUX(int16_t *addb_x, int16_t *addb_y, uint8_t addbsel, int16_t a1_x, int16_t a1_y,
2585 	int16_t a2_x, int16_t a2_y, int16_t a1_frac_x, int16_t a1_frac_y)
2586 {
2587 
2588 /*Zero		:= TIE0 (zero);
2589 Zero16		:= JOIN (zero16, zero, zero, zero, zero, zero, zero, zero,
2590 			zero, zero, zero, zero, zero, zero, zero, zero, zero);
2591 Addbselb[0-1]	:= BUF8 (addbselb[0-1], addbsel[0-1]);
2592 Addb_x		:= MX4 (addb_x, a1_x, a2_x, a1_frac_x, zero16, addbselb[0..1]);
2593 Addb_y		:= MX4 (addb_y, a1_y, a2_y, a1_frac_y, zero16, addbselb[0..1]);*/
2594 ////////////////////////////////////// C++ CODE //////////////////////////////////////
2595 	int16_t xterm[4], yterm[4];
2596 	xterm[0] = a1_x, xterm[1] = a2_x, xterm[2] = a1_frac_x, xterm[3] = 0;
2597 	yterm[0] = a1_y, yterm[1] = a2_y, yterm[2] = a1_frac_y, yterm[3] = 0;
2598 	*addb_x = xterm[addbsel & 0x03];
2599 	*addb_y = yterm[addbsel & 0x03];
2600 //////////////////////////////////////////////////////////////////////////////////////
2601 
2602 //END;
2603 }
2604 
2605 
2606 /**  DATAMUX - Address local data bus selection  ******************
2607 
2608 Select between the adder output and the input data bus
2609 */
2610 
2611 /*DEF DATAMUX (
2612 INT16/	data_x
2613 INT16/	data_y
2614 	:OUT;
2615 INT32/	gpu_din
2616 INT16/	addq_x
2617 INT16/	addq_y
2618 	addqsel
2619 	:IN);
2620 
2621 INT16/	gpu_lo, gpu_hi
2622 :LOCAL;
2623 BEGIN*/
DATAMUX(int16_t * data_x,int16_t * data_y,uint32_t gpu_din,int16_t addq_x,int16_t addq_y,bool addqsel)2624 void DATAMUX(int16_t *data_x, int16_t *data_y, uint32_t gpu_din, int16_t addq_x, int16_t addq_y, bool addqsel)
2625 {
2626 /*Gpu_lo		:= JOIN (gpu_lo, gpu_din{0..15});
2627 Gpu_hi		:= JOIN (gpu_hi, gpu_din{16..31});
2628 
2629 Addqselb	:= BUF8 (addqselb, addqsel);
2630 Data_x		:= MX2 (data_x, gpu_lo, addq_x, addqselb);
2631 Data_y		:= MX2 (data_y, gpu_hi, addq_y, addqselb);*/
2632 ////////////////////////////////////// C++ CODE //////////////////////////////////////
2633 	*data_x = (addqsel ? addq_x : (int16_t)(gpu_din & 0xFFFF));
2634 	*data_y = (addqsel ? addq_y : (int16_t)(gpu_din >> 16));
2635 //////////////////////////////////////////////////////////////////////////////////////
2636 
2637 //END;
2638 }
2639 
2640 
2641 /******************************************************************
2642 addradd
2643 29/11/90
2644 
2645 Blitter Address Adder
2646 ---------------------
2647 The blitter address adder is a pair of sixteen bit adders, one
2648 each for X and Y.  The multiplexing of the input terms is
2649 performed elsewhere, but this adder can also perform modulo
2650 arithmetic to align X-addresses onto phrase boundaries.
2651 
2652 modx[0..2] take values
2653 000	no mask
2654 001	mask bit 0
2655 010	mask bits 1-0
2656 ..
2657 110  	mask bits 5-0
2658 
2659 ******************************************************************/
2660 
ADDRADD(int16_t * addq_x,int16_t * addq_y,bool a1fracldi,uint16_t adda_x,uint16_t adda_y,uint16_t addb_x,uint16_t addb_y,uint8_t modx,bool suba_x,bool suba_y)2661 void ADDRADD(int16_t *addq_x, int16_t *addq_y, bool a1fracldi,
2662 	uint16_t adda_x, uint16_t adda_y, uint16_t addb_x, uint16_t addb_y, uint8_t modx, bool suba_x, bool suba_y)
2663 {
2664 
2665 /* Perform the addition */
2666 
2667 /*Adder_x		:= ADD16 (addqt_x[0..15], co_x, adda_x{0..15}, addb_x{0..15}, ci_x);
2668 Adder_y		:= ADD16 (addq_y[0..15], co_y, adda_y{0..15}, addb_y{0..15}, ci_y);*/
2669 
2670 /* latch carry and propagate if required */
2671 
2672 /*Cxt0		:= AN2 (cxt[0], co_x, a1fracldi);
2673 Cxt1		:= FD1Q (cxt[1], cxt[0], clk[0]);
2674 Ci_x		:= EO (ci_x, cxt[1], suba_x);
2675 
2676 yt0			:= AN2 (cyt[0], co_y, a1fracldi);
2677 Cyt1		:= FD1Q (cyt[1], cyt[0], clk[0]);
2678 Ci_y		:= EO (ci_y, cyt[1], suba_y);*/
2679 
2680 ////////////////////////////////////// C++ CODE //////////////////////////////////////
2681 //I'm sure the following will generate a bunch of warnings, but will have to do for now.
2682 	static uint16_t co_x = 0, co_y = 0;	// Carry out has to propogate between function calls...
2683 	uint16_t ci_x = co_x ^ (suba_x ? 1 : 0);
2684 	uint16_t ci_y = co_y ^ (suba_y ? 1 : 0);
2685 	uint32_t addqt_x = adda_x + addb_x + ci_x;
2686 	uint32_t addqt_y = adda_y + addb_y + ci_y;
2687 	uint16_t mask[8] = { 0xFFFF, 0xFFFE, 0xFFFC, 0xFFF8, 0xFFF0, 0xFFE0, 0xFFC0, 0x0000 };
2688 	co_x = ((addqt_x & 0x10000) && a1fracldi ? 1 : 0);
2689 	co_y = ((addqt_y & 0x10000) && a1fracldi ? 1 : 0);
2690 //////////////////////////////////////////////////////////////////////////////////////
2691 
2692 /* Mask low bits of X to 0 if required */
2693 
2694 /*Masksel		:= D38H (unused[0], masksel[0..4], maskbit[5], unused[1], modx[0..2]);
2695 
2696 Maskbit[0-4]	:= OR2 (maskbit[0-4], masksel[0-4], maskbit[1-5]);
2697 
2698 Mask[0-5]	:= MX2 (addq_x[0-5], addqt_x[0-5], zero, maskbit[0-5]);
2699 
2700 Addq_x		:= JOIN (addq_x, addq_x[0..5], addqt_x[6..15]);
2701 Addq_y		:= JOIN (addq_y, addq_y[0..15]);*/
2702 
2703 ////////////////////////////////////// C++ CODE //////////////////////////////////////
2704 	*addq_x = addqt_x & mask[modx];
2705 	*addq_y = addqt_y & 0xFFFF;
2706 //////////////////////////////////////////////////////////////////////////////////////
2707 
2708 //Unused[0-1]	:= DUMMY (unused[0-1]);
2709 
2710 //END;
2711 }
2712 
2713 
2714 /*
2715 DEF DATA (
2716 		wdata[0..63]	// co-processor write data bus
2717 		:BUS;
2718 		dcomp[0..7]		// data byte equal flags
2719 		srcd[0..7]		// bits to use for bit to byte expansion
2720 		zcomp[0..3]		// output from Z comparators
2721 		:OUT;
2722 		a1_x[0..1]		// low two bits of A1 X pointer
2723 		big_pix			// pixel organisation is big-endian
2724 		blitter_active	// blitter is active
2725 		clk				// co-processor clock
2726 		cmpdst			// compare dest rather than source
2727 		colorld			// load the pattern color fields
2728 		daddasel[0..2]	// data adder input A selection
2729 		daddbsel[0..3]	// data adder input B selection
2730 		daddmode[0..2]	// data adder mode
2731 		daddq_sel		// select adder output vs. GPU data
2732 		data[0..63]		// co-processor read data bus
2733 		data_ena		// enable write data
2734 		data_sel[0..1]	// select data to write
2735 		dbinh\[0..7]	// byte oriented changed data inhibits
2736 		dend[0..5]		// end of changed write data zone
2737 		dpipe[0..1]		// load computed data pipe-line latch
2738 		dstart[0..5]	// start of changed write data zone
2739 		dstdld[0..1]	// dest data load (two halves)
2740 		dstzld[0..1]	// dest zed load (two halves)
2741 		ext_int			// enable extended precision intensity calculations
2742 INT32/	gpu_din			// GPU data bus
2743 		iincld			// I increment load
2744 		iincldx			// alternate I increment load
2745 		init_if			// initialise I fraction phase
2746 		init_ii			// initialise I integer phase
2747 		init_zf			// initialise Z fraction phase
2748 		intld[0..3]		// computed intensities load
2749 		istepadd		// intensity step integer add
2750 		istepfadd		// intensity step fraction add
2751 		istepld			// I step load
2752 		istepdld		// I step delta load
2753 		lfu_func[0..3]	// LFU function code
2754 		patdadd			// pattern data gouraud add
2755 		patdld[0..1]	// pattern data load (two halves)
2756 		pdsel[0..1]		// select pattern data type
2757 		phrase_mode		// phrase write mode
2758 		reload			// transfer contents of double buffers
2759 		reset\			// system reset
2760 		srcd1ld[0..1]	// source register 1 load (two halves)
2761 		srcdread		// source data read load enable
2762 		srczread		// source zed read load enable
2763 		srcshift[0..5]	// source alignment shift
2764 		srcz1ld[0..1]	// source zed 1 load (two halves)
2765 		srcz2add		// zed fraction gouraud add
2766 		srcz2ld[0..1]	// source zed 2 load (two halves)
2767 		textrgb			// texture mapping in RGB mode
2768 		txtd[0..63]		// data from the texture unit
2769 		zedld[0..3]		// computed zeds load
2770 		zincld			// Z increment load
2771 		zmode[0..2]		// Z comparator mode
2772 		zpipe[0..1]		// load computed zed pipe-line latch
2773 		zstepadd		// zed step integer add
2774 		zstepfadd		// zed step fraction add
2775 		zstepld			// Z step load
2776 		zstepdld		// Z step delta load
2777 		:IN);
2778 */
2779 
DATA(uint64_t * wdata,uint8_t * dcomp,uint8_t * zcomp,bool * nowrite,bool big_pix,bool cmpdst,uint8_t daddasel,uint8_t daddbsel,uint8_t daddmode,bool daddq_sel,uint8_t data_sel,uint8_t dbinh,uint8_t dend,uint8_t dstart,uint64_t dstd,uint32_t iinc,uint8_t lfu_func,uint64_t * patd,bool patdadd,bool phrase_mode,uint64_t srcd,bool srcdread,bool srczread,bool srcz2add,uint8_t zmode,bool bcompen,bool bkgwren,bool dcompen,uint8_t icount,uint8_t pixsize,uint64_t * srcz,uint64_t dstz,uint32_t zinc)2780 void DATA(uint64_t *wdata, uint8_t *dcomp, uint8_t *zcomp, bool *nowrite,
2781 	bool big_pix, bool cmpdst, uint8_t daddasel, uint8_t daddbsel, uint8_t daddmode, bool daddq_sel, uint8_t data_sel,
2782 	uint8_t dbinh, uint8_t dend, uint8_t dstart, uint64_t dstd, uint32_t iinc, uint8_t lfu_func, uint64_t *patd, bool patdadd,
2783 	bool phrase_mode, uint64_t srcd, bool srcdread, bool srczread, bool srcz2add, uint8_t zmode,
2784 	bool bcompen, bool bkgwren, bool dcompen, uint8_t icount, uint8_t pixsize,
2785 	uint64_t *srcz, uint64_t dstz, uint32_t zinc)
2786 {
2787 /*
2788   Stuff we absolutely *need* to have passed in/out:
2789 IN:
2790   patdadd, dstd, srcd, patd, daddasel, daddbsel, daddmode, iinc, srcz1, srcz2, big_pix, phrase_mode, cmpdst
2791 OUT:
2792   changed patd (wdata I guess...) (Nope. We pass it back directly now...)
2793 */
2794 
2795 // Source data registers
2796 
2797 /*Data_src	:= DATA_SRC (srcdlo, srcdhi, srcz[0..1], srczo[0..1], srczp[0..1], srcz1[0..1], srcz2[0..1], big_pix,
2798 			clk, gpu_din, intld[0..3], local_data0, local_data1, srcd1ld[0..1], srcdread, srczread, srcshift[0..5],
2799 			srcz1ld[0..1], srcz2add, srcz2ld[0..1], zedld[0..3], zpipe[0..1]);
2800 Srcd[0-7]	:= JOIN (srcd[0-7], srcdlo{0-7});
2801 Srcd[8-31]	:= JOIN (srcd[8-31], srcdlo{8-31});
2802 Srcd[32-63]	:= JOIN (srcd[32-63], srcdhi{0-31});*/
2803 
2804 // Destination data registers
2805 
2806 /*Data_dst	:= DATA_DST (dstd[0..63], dstz[0..1], clk, dstdld[0..1], dstzld[0..1], load_data[0..1]);
2807 Dstdlo		:= JOIN (dstdlo, dstd[0..31]);
2808 Dstdhi		:= JOIN (dstdhi, dstd[32..63]);*/
2809 
2810 // Pattern and Color data registers
2811 
2812 // Looks like this is simply another register file for the pattern data registers. No adding or anything funky
2813 // going on. Note that patd & patdv will output the same info.
2814 // Patdldl/h (patdld[0..1]) can select the local_data bus to overwrite the current pattern data...
2815 // Actually, it can be either patdld OR patdadd...!
2816 /*Data_pat	:= DATA_PAT (colord[0..15], int0dp[8..10], int1dp[8..10], int2dp[8..10], int3dp[8..10], mixsel[0..2],
2817 			patd[0..63], patdv[0..1], clk, colorld, dpipe[0], ext_int, gpu_din, intld[0..3], local_data0, local_data1,
2818 			patdadd, patdld[0..1], reload, reset\);
2819 Patdlo		:= JOIN (patdlo, patd[0..31]);
2820 Patdhi		:= JOIN (patdhi, patd[32..63]);*/
2821 
2822 // Multiplying data Mixer (NOT IN JAGUAR I)
2823 
2824 /*Datamix		:= DATAMIX (patdo[0..1], clk, colord[0..15], dpipe[1], dstd[0..63], int0dp[8..10], int1dp[8..10],
2825 			int2dp[8..10], int3dp[8..10], mixsel[0..2], patd[0..63], pdsel[0..1], srcd[0..63], textrgb, txtd[0..63]);*/
2826 
2827 // Logic function unit
2828 
2829 /*Lfu		:= LFU (lfu[0..1], srcdlo, srcdhi, dstdlo, dstdhi, lfu_func[0..3]);*/
2830 ////////////////////////////////////// C++ CODE //////////////////////////////////////
2831 	uint64_t funcmask[2] = { 0, 0xFFFFFFFFFFFFFFFFLL };
2832 	uint64_t func0 = funcmask[lfu_func & 0x01];
2833 	uint64_t func1 = funcmask[(lfu_func >> 1) & 0x01];
2834 	uint64_t func2 = funcmask[(lfu_func >> 2) & 0x01];
2835 	uint64_t func3 = funcmask[(lfu_func >> 3) & 0x01];
2836 	uint64_t lfu = (~srcd & ~dstd & func0) | (~srcd & dstd & func1) | (srcd & ~dstd & func2) | (srcd & dstd & func3);
2837    bool mir_bit, mir_byte;
2838    uint16_t masku;
2839    uint8_t e_coarse, e_fine;
2840    uint8_t s_coarse, s_fine;
2841    uint16_t maskt;
2842 	uint8_t decl38e[2][8] = { { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF },
2843 		{ 0xFE, 0xFD, 0xFB, 0xF7, 0xEF, 0xDF, 0xBF, 0x7F } };
2844 	uint8_t dech38[8] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 };
2845 	uint8_t dech38el[2][8] = { { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 },
2846 		{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } };
2847    int en;
2848    uint64_t cmpd;
2849 	uint8_t dbinht;
2850    uint16_t addq[4];
2851    uint8_t initcin[4] = { 0, 0, 0, 0 };
2852    uint16_t mask;
2853    uint64_t dmux[4];
2854    uint64_t ddat;
2855 	uint64_t zwdata;
2856 //////////////////////////////////////////////////////////////////////////////////////
2857 
2858 // Increment and Step Registers
2859 
2860 // Does it do anything without the step add lines? Check it!
2861 // No. This is pretty much just a register file without the Jaguar II lines...
2862 /*Inc_step	:= INC_STEP (iinc, istep[0..31], zinc, zstep[0..31], clk, ext_int, gpu_din, iincld, iincldx, istepadd,
2863 			istepfadd, istepld, istepdld, reload, reset\, zincld, zstepadd, zstepfadd, zstepld, zstepdld);
2864 Istep		:= JOIN (istep, istep[0..31]);
2865 Zstep		:= JOIN (zstep, zstep[0..31]);*/
2866 
2867 // Pixel data comparator
2868 
2869 /*Datacomp	:= DATACOMP (dcomp[0..7], cmpdst, dstdlo, dstdhi, patdlo, patdhi, srcdlo, srcdhi);*/
2870 ////////////////////////////////////// C++ CODE //////////////////////////////////////
2871 	*dcomp = 0;
2872 	cmpd = *patd ^ (cmpdst ? dstd : srcd);
2873 
2874 	if ((cmpd & 0x00000000000000FFLL) == 0)
2875 		*dcomp |= 0x01;
2876 	if ((cmpd & 0x000000000000FF00LL) == 0)
2877 		*dcomp |= 0x02;
2878 	if ((cmpd & 0x0000000000FF0000LL) == 0)
2879 		*dcomp |= 0x04;
2880 	if ((cmpd & 0x00000000FF000000LL) == 0)
2881 		*dcomp |= 0x08;
2882 	if ((cmpd & 0x000000FF00000000LL) == 0)
2883 		*dcomp |= 0x10;
2884 	if ((cmpd & 0x0000FF0000000000LL) == 0)
2885 		*dcomp |= 0x20;
2886 	if ((cmpd & 0x00FF000000000000LL) == 0)
2887 		*dcomp |= 0x40;
2888 	if ((cmpd & 0xFF00000000000000LL) == 0)
2889 		*dcomp |= 0x80;
2890 //////////////////////////////////////////////////////////////////////////////////////
2891 
2892 // Zed comparator for Z-buffer operations
2893 
2894 /*Zedcomp		:= ZEDCOMP (zcomp[0..3], srczp[0..1], dstz[0..1], zmode[0..2]);*/
2895 ////////////////////////////////////// C++ CODE //////////////////////////////////////
2896 //srczp is srcz pipelined, also it goes through a source shift as well...
2897 /*The shift is basically like so (each piece is 16 bits long):
2898 
2899 	0         1         2         3         4          5         6
2900 	srcz1lolo srcz1lohi srcz1hilo srcz1hihi srcrz2lolo srcz2lohi srcz2hilo
2901 
2902 with srcshift bits 4 & 5 selecting the start position
2903 */
2904 //So... basically what we have here is:
2905 	*zcomp = 0;
2906 
2907 	if ((((*srcz & 0x000000000000FFFFLL) < (dstz & 0x000000000000FFFFLL)) && (zmode & 0x01))
2908 		|| (((*srcz & 0x000000000000FFFFLL) == (dstz & 0x000000000000FFFFLL)) && (zmode & 0x02))
2909 		|| (((*srcz & 0x000000000000FFFFLL) > (dstz & 0x000000000000FFFFLL)) && (zmode & 0x04)))
2910 		*zcomp |= 0x01;
2911 
2912 	if ((((*srcz & 0x00000000FFFF0000LL) < (dstz & 0x00000000FFFF0000LL)) && (zmode & 0x01))
2913 		|| (((*srcz & 0x00000000FFFF0000LL) == (dstz & 0x00000000FFFF0000LL)) && (zmode & 0x02))
2914 		|| (((*srcz & 0x00000000FFFF0000LL) > (dstz & 0x00000000FFFF0000LL)) && (zmode & 0x04)))
2915 		*zcomp |= 0x02;
2916 
2917 	if ((((*srcz & 0x0000FFFF00000000LL) < (dstz & 0x0000FFFF00000000LL)) && (zmode & 0x01))
2918 		|| (((*srcz & 0x0000FFFF00000000LL) == (dstz & 0x0000FFFF00000000LL)) && (zmode & 0x02))
2919 		|| (((*srcz & 0x0000FFFF00000000LL) > (dstz & 0x0000FFFF00000000LL)) && (zmode & 0x04)))
2920 		*zcomp |= 0x04;
2921 
2922 	if ((((*srcz & 0xFFFF000000000000LL) < (dstz & 0xFFFF000000000000LL)) && (zmode & 0x01))
2923 		|| (((*srcz & 0xFFFF000000000000LL) == (dstz & 0xFFFF000000000000LL)) && (zmode & 0x02))
2924 		|| (((*srcz & 0xFFFF000000000000LL) > (dstz & 0xFFFF000000000000LL)) && (zmode & 0x04)))
2925 		*zcomp |= 0x08;
2926 
2927 //TEMP, TO TEST IF ZCOMP IS THE CULPRIT...
2928 //Nope, this is NOT the problem...
2929 //zcomp=0;
2930 // We'll do the comparison/bit/byte inhibits here, since that's they way it happens
2931 // in the real thing (dcomp goes out to COMP_CTRL and back into DATA through dbinh)...
2932 	COMP_CTRL(&dbinht, nowrite,
2933 		bcompen, true/*big_pix*/, bkgwren, *dcomp, dcompen, icount, pixsize, phrase_mode, srcd & 0xFF, *zcomp);
2934 	dbinh = dbinht;
2935 
2936 //////////////////////////////////////////////////////////////////////////////////////
2937 
2938 // 22 Mar 94
2939 // The data initializer - allows all four initial values to be computed from one (NOT IN JAGUAR I)
2940 
2941 /*Datinit		:= DATINIT (initcin[0..3], initinc[0..63], initpix[0..15], a1_x[0..1], big_pix, clk, iinc, init_if, init_ii,
2942 			init_zf, istep[0..31], zinc, zstep[0..31]);*/
2943 
2944 // Adder array for Z and intensity increments
2945 
2946 /*Addarray	:= ADDARRAY (addq[0..3], clk, daddasel[0..2], daddbsel[0..3], daddmode[0..2], dstdlo, dstdhi, iinc,
2947 			initcin[0..3], initinc[0..63], initpix[0..15], istep, patdv[0..1], srcdlo, srcdhi, srcz1[0..1],
2948 			srcz2[0..1], reset\, zinc, zstep);*/
2949 /*void ADDARRAY(uint16_t * addq, uint8_t daddasel, uint8_t daddbsel, uint8_t daddmode,
2950 	uint64_t dstd, uint32_t iinc, uint8_t initcin[], uint64_t initinc, uint16_t initpix,
2951 	uint32_t istep, uint64_t patd, uint64_t srcd, uint64_t srcz1, uint64_t srcz2,
2952 	uint32_t zinc, uint32_t zstep)*/
2953 ////////////////////////////////////// C++ CODE //////////////////////////////////////
2954 	ADDARRAY(addq, daddasel, daddbsel, daddmode, dstd, iinc, initcin, 0, 0, 0, *patd, srcd, 0, 0, 0, 0);
2955 
2956 	//This is normally done asynchronously above (thru local_data) when in patdadd mode...
2957 //And now it's passed back to the caller to be persistent between calls...!
2958 //But it's causing some serious fuck-ups in T2K now... !!! FIX !!! [DONE--???]
2959 //Weird! It doesn't anymore...!
2960 	if (patdadd)
2961 		*patd = ((uint64_t)addq[3] << 48) | ((uint64_t)addq[2] << 32) | ((uint64_t)addq[1] << 16) | (uint64_t)addq[0];
2962 //////////////////////////////////////////////////////////////////////////////////////
2963 
2964 // Local data bus multiplexer
2965 
2966 /*Local_mux	:= LOCAL_MUX (local_data[0..1], load_data[0..1],
2967 	addq[0..3], gpu_din, data[0..63], blitter_active, daddq_sel);
2968 Local_data0	:= JOIN (local_data0, local_data[0]);
2969 Local_data1	:= JOIN (local_data1, local_data[1]);*/
2970 ////////////////////////////////////// C++ CODE //////////////////////////////////////
2971 //////////////////////////////////////////////////////////////////////////////////////
2972 
2973 // Data output multiplexer and tri-state drive
2974 
2975 /*Data_mux	:= DATA_MUX (wdata[0..63], addq[0..3], big_pix, dstdlo, dstdhi, dstz[0..1], data_sel[0..1], data_ena,
2976 			dstart[0..5], dend[0..5], dbinh\[0..7], lfu[0..1], patdo[0..1], phrase_mode, srczo[0..1]);*/
2977 ////////////////////////////////////// C++ CODE //////////////////////////////////////
2978 // NOTE: patdo comes from DATAMIX and can be considered the same as patd for Jaguar I
2979 
2980 //////////////////////////////////////////////////////////////////////////////////////
2981 //}
2982 
2983 /*DEF DATA_MUX (
2984 		wdata[0..63]	// co-processor rwrite data bus
2985 		:BUS;
2986 INT16/	addq[0..3]
2987 		big_pix			// Pixel organisation is big-endian
2988 INT32/	dstdlo
2989 INT32/	dstdhi
2990 INT32/	dstzlo
2991 INT32/	dstzhi
2992 		data_sel[0..1]	// source of write data
2993 		data_ena		// enable write data onto read/write bus
2994 		dstart[0..5]	// start of changed write data
2995 		dend[0..5]		// end of changed write data
2996 		dbinh\[0..7]	// byte oriented changed data inhibits
2997 INT32/	lfu[0..1]
2998 INT32/	patd[0..1]
2999 		phrase_mode		// phrase write mode
3000 INT32/	srczlo
3001 INT32/	srczhi
3002 		:IN);*/
3003 
3004 /*INT32/	addql[0..1], ddatlo, ddathi zero32
3005 :LOCAL;
3006 BEGIN
3007 
3008 Phrase_mode\	:= INV1 (phrase_mode\, phrase_mode);
3009 Zero		:= TIE0 (zero);
3010 Zero32		:= JOIN (zero32, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero);*/
3011 
3012 /* Generate a changed data mask */
3013 
3014 /*Edis		:= OR6 (edis\, dend[0..5]);
3015 Ecoarse		:= DECL38E (e_coarse\[0..7], dend[3..5], edis\);
3016 E_coarse[0]	:= INV1 (e_coarse[0], e_coarse\[0]);
3017 Efine		:= DECL38E (unused[0], e_fine\[1..7], dend[0..2], e_coarse[0]);*/
3018 ////////////////////////////////////// C++ CODE //////////////////////////////////////
3019 
3020 	en = ((dend & 0x3F) ? 1 : 0);
3021 	e_coarse = decl38e[en][(dend & 0x38) >> 3];		// Actually, this is e_coarse inverted...
3022 	e_fine = decl38e[(e_coarse & 0x01) ^ 0x01][dend & 0x07];
3023 	e_fine &= 0xFE;
3024 //////////////////////////////////////////////////////////////////////////////////////
3025 
3026 /*Scoarse		:= DECH38 (s_coarse[0..7], dstart[3..5]);
3027 Sfen\		:= INV1 (sfen\, s_coarse[0]);
3028 Sfine		:= DECH38EL (s_fine[0..7], dstart[0..2], sfen\);*/
3029 ////////////////////////////////////// C++ CODE //////////////////////////////////////
3030 	s_coarse = dech38[(dstart & 0x38) >> 3];
3031 	s_fine = dech38el[(s_coarse & 0x01) ^ 0x01][dstart & 0x07];
3032 //////////////////////////////////////////////////////////////////////////////////////
3033 
3034 /*Maskt[0]	:= BUF1 (maskt[0], s_fine[0]);
3035 Maskt[1-7]	:= OAN1P (maskt[1-7], maskt[0-6], s_fine[1-7], e_fine\[1-7]);*/
3036 ////////////////////////////////////// C++ CODE //////////////////////////////////////
3037 	maskt = s_fine & 0x0001;
3038 	maskt |= (((maskt & 0x0001) || (s_fine & 0x02)) && (e_fine & 0x02) ? 0x0002 : 0x0000);
3039 	maskt |= (((maskt & 0x0002) || (s_fine & 0x04)) && (e_fine & 0x04) ? 0x0004 : 0x0000);
3040 	maskt |= (((maskt & 0x0004) || (s_fine & 0x08)) && (e_fine & 0x08) ? 0x0008 : 0x0000);
3041 	maskt |= (((maskt & 0x0008) || (s_fine & 0x10)) && (e_fine & 0x10) ? 0x0010 : 0x0000);
3042 	maskt |= (((maskt & 0x0010) || (s_fine & 0x20)) && (e_fine & 0x20) ? 0x0020 : 0x0000);
3043 	maskt |= (((maskt & 0x0020) || (s_fine & 0x40)) && (e_fine & 0x40) ? 0x0040 : 0x0000);
3044 	maskt |= (((maskt & 0x0040) || (s_fine & 0x80)) && (e_fine & 0x80) ? 0x0080 : 0x0000);
3045 //////////////////////////////////////////////////////////////////////////////////////
3046 
3047 /* Produce a look-ahead on the ripple carry:
3048 masktla = s_coarse[0] . /e_coarse[0] */
3049 /*Masktla		:= AN2 (masktla, s_coarse[0], e_coarse\[0]);
3050 Maskt[8]	:= OAN1P (maskt[8], masktla, s_coarse[1], e_coarse\[1]);
3051 Maskt[9-14]	:= OAN1P (maskt[9-14], maskt[8-13], s_coarse[2-7], e_coarse\[2-7]);*/
3052 ////////////////////////////////////// C++ CODE //////////////////////////////////////
3053 	maskt |= (((s_coarse & e_coarse & 0x01) || (s_coarse & 0x02)) && (e_coarse & 0x02) ? 0x0100 : 0x0000);
3054 	maskt |= (((maskt & 0x0100) || (s_coarse & 0x04)) && (e_coarse & 0x04) ? 0x0200 : 0x0000);
3055 	maskt |= (((maskt & 0x0200) || (s_coarse & 0x08)) && (e_coarse & 0x08) ? 0x0400 : 0x0000);
3056 	maskt |= (((maskt & 0x0400) || (s_coarse & 0x10)) && (e_coarse & 0x10) ? 0x0800 : 0x0000);
3057 	maskt |= (((maskt & 0x0800) || (s_coarse & 0x20)) && (e_coarse & 0x20) ? 0x1000 : 0x0000);
3058 	maskt |= (((maskt & 0x1000) || (s_coarse & 0x40)) && (e_coarse & 0x40) ? 0x2000 : 0x0000);
3059 	maskt |= (((maskt & 0x2000) || (s_coarse & 0x80)) && (e_coarse & 0x80) ? 0x4000 : 0x0000);
3060 //////////////////////////////////////////////////////////////////////////////////////
3061 
3062 /* The bit terms are mirrored for big-endian pixels outside phrase
3063 mode.  The byte terms are mirrored for big-endian pixels in phrase
3064 mode.  */
3065 
3066 /*Mirror_bit	:= AN2M (mir_bit, phrase_mode\, big_pix);
3067 Mirror_byte	:= AN2H (mir_byte, phrase_mode, big_pix);
3068 
3069 Masktb[14]	:= BUF1 (masktb[14], maskt[14]);
3070 Masku[0]	:= MX4 (masku[0],  maskt[0],  maskt[7],  maskt[14],  zero, mir_bit, mir_byte);
3071 Masku[1]	:= MX4 (masku[1],  maskt[1],  maskt[6],  maskt[14],  zero, mir_bit, mir_byte);
3072 Masku[2]	:= MX4 (masku[2],  maskt[2],  maskt[5],  maskt[14],  zero, mir_bit, mir_byte);
3073 Masku[3]	:= MX4 (masku[3],  maskt[3],  maskt[4],  masktb[14], zero, mir_bit, mir_byte);
3074 Masku[4]	:= MX4 (masku[4],  maskt[4],  maskt[3],  masktb[14], zero, mir_bit, mir_byte);
3075 Masku[5]	:= MX4 (masku[5],  maskt[5],  maskt[2],  masktb[14], zero, mir_bit, mir_byte);
3076 Masku[6]	:= MX4 (masku[6],  maskt[6],  maskt[1],  masktb[14], zero, mir_bit, mir_byte);
3077 Masku[7]	:= MX4 (masku[7],  maskt[7],  maskt[0],  masktb[14], zero, mir_bit, mir_byte);
3078 Masku[8]	:= MX2 (masku[8],  maskt[8],  maskt[13], mir_byte);
3079 Masku[9]	:= MX2 (masku[9],  maskt[9],  maskt[12], mir_byte);
3080 Masku[10]	:= MX2 (masku[10], maskt[10], maskt[11], mir_byte);
3081 Masku[11]	:= MX2 (masku[11], maskt[11], maskt[10], mir_byte);
3082 Masku[12]	:= MX2 (masku[12], maskt[12], maskt[9],  mir_byte);
3083 Masku[13]	:= MX2 (masku[13], maskt[13], maskt[8],  mir_byte);
3084 Masku[14]	:= MX2 (masku[14], maskt[14], maskt[0],  mir_byte);*/
3085 ////////////////////////////////////// C++ CODE //////////////////////////////////////
3086 
3087 	mir_bit = true/*big_pix*/ && !phrase_mode;
3088 	mir_byte = true/*big_pix*/ && phrase_mode;
3089 	masku = maskt;
3090 
3091 	if (mir_bit)
3092 	{
3093 		masku &= 0xFF00;
3094 		masku |= (maskt >> 7) & 0x0001;
3095 		masku |= (maskt >> 5) & 0x0002;
3096 		masku |= (maskt >> 3) & 0x0004;
3097 		masku |= (maskt >> 1) & 0x0008;
3098 		masku |= (maskt << 1) & 0x0010;
3099 		masku |= (maskt << 3) & 0x0020;
3100 		masku |= (maskt << 5) & 0x0040;
3101 		masku |= (maskt << 7) & 0x0080;
3102 	}
3103 
3104 	if (mir_byte)
3105 	{
3106 		masku = 0;
3107 		masku |= (maskt >> 14) & 0x0001;
3108 		masku |= (maskt >> 13) & 0x0002;
3109 		masku |= (maskt >> 12) & 0x0004;
3110 		masku |= (maskt >> 11) & 0x0008;
3111 		masku |= (maskt >> 10) & 0x0010;
3112 		masku |= (maskt >> 9)  & 0x0020;
3113 		masku |= (maskt >> 8)  & 0x0040;
3114 		masku |= (maskt >> 7)  & 0x0080;
3115 
3116 		masku |= (maskt >> 5) & 0x0100;
3117 		masku |= (maskt >> 3) & 0x0200;
3118 		masku |= (maskt >> 1) & 0x0400;
3119 		masku |= (maskt << 1) & 0x0800;
3120 		masku |= (maskt << 3) & 0x1000;
3121 		masku |= (maskt << 5) & 0x2000;
3122 		masku |= (maskt << 7) & 0x4000;
3123 	}
3124 //////////////////////////////////////////////////////////////////////////////////////
3125 
3126 /* The maskt terms define the area for changed data, but the byte
3127 inhibit terms can override these */
3128 
3129 /*Mask[0-7]	:= AN2 (mask[0-7], masku[0-7], dbinh\[0]);
3130 Mask[8-14]	:= AN2H (mask[8-14], masku[8-14], dbinh\[1-7]);*/
3131 ////////////////////////////////////// C++ CODE //////////////////////////////////////
3132 	mask = masku & (!(dbinh & 0x01) ? 0xFFFF : 0xFF00);
3133 	mask &= ~(((uint16_t)dbinh & 0x00FE) << 7);
3134 //////////////////////////////////////////////////////////////////////////////////////
3135 
3136 /*Addql[0]	:= JOIN (addql[0], addq[0..1]);
3137 Addql[1]	:= JOIN (addql[1], addq[2..3]);
3138 
3139 Dsel0b[0-1]	:= BUF8 (dsel0b[0-1], data_sel[0]);
3140 Dsel1b[0-1]	:= BUF8 (dsel1b[0-1], data_sel[1]);
3141 Ddatlo		:= MX4 (ddatlo, patd[0], lfu[0], addql[0], zero32, dsel0b[0], dsel1b[0]);
3142 Ddathi		:= MX4 (ddathi, patd[1], lfu[1], addql[1], zero32, dsel0b[1], dsel1b[1]);*/
3143 ////////////////////////////////////// C++ CODE //////////////////////////////////////
3144 	dmux[0] = *patd;
3145 	dmux[1] = lfu;
3146 	dmux[2] = ((uint64_t)addq[3] << 48) | ((uint64_t)addq[2] << 32) | ((uint64_t)addq[1] << 16) | (uint64_t)addq[0];
3147 	dmux[3] = 0;
3148 	ddat = dmux[data_sel];
3149 //////////////////////////////////////////////////////////////////////////////////////
3150 
3151 /*Zed_sel		:= AN2 (zed_sel, data_sel[0..1]);
3152 Zed_selb[0-1]	:= BUF8 (zed_selb[0-1], zed_sel);
3153 
3154 Dat[0-7]	:= MX4 (dat[0-7],   dstdlo{0-7},   ddatlo{0-7},   dstzlo{0-7},   srczlo{0-7},   mask[0-7], zed_selb[0]);
3155 Dat[8-15]	:= MX4 (dat[8-15],  dstdlo{8-15},  ddatlo{8-15},  dstzlo{8-15},  srczlo{8-15},  mask[8],   zed_selb[0]);
3156 Dat[16-23]	:= MX4 (dat[16-23], dstdlo{16-23}, ddatlo{16-23}, dstzlo{16-23}, srczlo{16-23}, mask[9],   zed_selb[0]);
3157 Dat[24-31]	:= MX4 (dat[24-31], dstdlo{24-31}, ddatlo{24-31}, dstzlo{24-31}, srczlo{24-31}, mask[10],  zed_selb[0]);
3158 Dat[32-39]	:= MX4 (dat[32-39], dstdhi{0-7},   ddathi{0-7},   dstzhi{0-7},   srczhi{0-7},   mask[11],  zed_selb[1]);
3159 Dat[40-47]	:= MX4 (dat[40-47], dstdhi{8-15},  ddathi{8-15},  dstzhi{8-15},  srczhi{8-15},  mask[12],  zed_selb[1]);
3160 Dat[48-55]	:= MX4 (dat[48-55], dstdhi{16-23}, ddathi{16-23}, dstzhi{16-23}, srczhi{16-23}, mask[13],  zed_selb[1]);
3161 Dat[56-63]	:= MX4 (dat[56-63], dstdhi{24-31}, ddathi{24-31}, dstzhi{24-31}, srczhi{24-31}, mask[14],  zed_selb[1]);*/
3162 ////////////////////////////////////// C++ CODE //////////////////////////////////////
3163 	*wdata = ((ddat & mask) | (dstd & ~mask)) & 0x00000000000000FFLL;
3164 	*wdata |= ((mask & 0x0100) ? ddat : dstd) & 0x000000000000FF00LL;
3165 	*wdata |= ((mask & 0x0200) ? ddat : dstd) & 0x0000000000FF0000LL;
3166 	*wdata |= ((mask & 0x0400) ? ddat : dstd) & 0x00000000FF000000LL;
3167 	*wdata |= ((mask & 0x0800) ? ddat : dstd) & 0x000000FF00000000LL;
3168 	*wdata |= ((mask & 0x1000) ? ddat : dstd) & 0x0000FF0000000000LL;
3169 	*wdata |= ((mask & 0x2000) ? ddat : dstd) & 0x00FF000000000000LL;
3170 	*wdata |= ((mask & 0x4000) ? ddat : dstd) & 0xFF00000000000000LL;
3171 
3172 //This is a crappy way of handling this, but it should work for now...
3173 	zwdata = ((*srcz & mask) | (dstz & ~mask)) & 0x00000000000000FFLL;
3174 	zwdata |= ((mask & 0x0100) ? *srcz : dstz) & 0x000000000000FF00LL;
3175 	zwdata |= ((mask & 0x0200) ? *srcz : dstz) & 0x0000000000FF0000LL;
3176 	zwdata |= ((mask & 0x0400) ? *srcz : dstz) & 0x00000000FF000000LL;
3177 	zwdata |= ((mask & 0x0800) ? *srcz : dstz) & 0x000000FF00000000LL;
3178 	zwdata |= ((mask & 0x1000) ? *srcz : dstz) & 0x0000FF0000000000LL;
3179 	zwdata |= ((mask & 0x2000) ? *srcz : dstz) & 0x00FF000000000000LL;
3180 	zwdata |= ((mask & 0x4000) ? *srcz : dstz) & 0xFF00000000000000LL;
3181 	*srcz = zwdata;
3182 //////////////////////////////////////////////////////////////////////////////////////
3183 
3184 /*Data_enab[0-1]	:= BUF8 (data_enab[0-1], data_ena);
3185 Datadrv[0-31]	:= TS (wdata[0-31],  dat[0-31],  data_enab[0]);
3186 Datadrv[32-63]	:= TS (wdata[32-63], dat[32-63], data_enab[1]);
3187 
3188 Unused[0]	:= DUMMY (unused[0]);
3189 
3190 END;*/
3191 }
3192 
3193 
3194 /**  COMP_CTRL - Comparator output control logic  *****************
3195 
3196 This block is responsible for taking the comparator outputs and
3197 using them as appropriate to inhibit writes.  Two methods are
3198 supported for inhibiting write data:
3199 
3200 -	suppression of the inner loop controlled write operation
3201 -	a set of eight byte inhibit lines to write back dest data
3202 
3203 The first technique is used in pixel oriented modes, the second in
3204 phrase mode, but the phrase mode form is only applicable to eight
3205 and sixteen bit pixel modes.
3206 
3207 Writes can be suppressed by data being equal, by the Z comparator
3208 conditions being met, or by the bit to pixel expansion scheme.
3209 
3210 Pipe-lining issues: the data derived comparator outputs are stable
3211 until the next data read, well after the affected write from this
3212 operation.  However, the inner counter bits can count immediately
3213 before the ack for the last write.  Therefore, it is necessary to
3214 delay bcompbit select terms by one inner loop pipe-line stage,
3215 when generating the select for the data control - the output is
3216 delayed one further tick to give it write data timing (2/34).
3217 
3218 There is also a problem with computed data - the new values are
3219 calculated before the write associated with the old value has been
3220 performed.  The is taken care of within the zed comparator by
3221 pipe-lining the comparator inputs where appropriate.
3222 */
3223 
COMP_CTRL(uint8_t * dbinh,bool * nowrite,bool bcompen,bool big_pix,bool bkgwren,uint8_t dcomp,bool dcompen,uint8_t icount,uint8_t pixsize,bool phrase_mode,uint8_t srcd,uint8_t zcomp)3224 void COMP_CTRL(uint8_t *dbinh, bool *nowrite,
3225 	bool bcompen, bool big_pix, bool bkgwren, uint8_t dcomp, bool dcompen, uint8_t icount,
3226 	uint8_t pixsize, bool phrase_mode, uint8_t srcd, uint8_t zcomp)
3227 {
3228    //BEGIN
3229 
3230    /*Bkgwren\	:= INV1 (bkgwren\, bkgwren);
3231      Phrase_mode\	:= INV1 (phrase_mode\, phrase_mode);
3232      Pixsize\[0-2]	:= INV2 (pixsize\[0-2], pixsize[0-2]);*/
3233 
3234    /* The bit comparator bits are derived from the source data, which
3235       will have been suitably aligned for phrase mode.  The contents of
3236       the inner counter are used to select which bit to use.
3237 
3238       When not in phrase mode the inner count value is used to select
3239       one bit.  It is assumed that the count has already occurred, so,
3240       7 selects bit 0, etc.  In big-endian pixel mode, this turns round,
3241       so that a count of 7 selects bit 7.
3242 
3243       In phrase mode, the eight bits are used directly, and this mode is
3244       only applicable to 8-bit pixel mode (2/34) */
3245 
3246    /*Bcompselt[0-2]	:= EO (bcompselt[0-2], icount[0-2], big_pix);
3247 Bcompbit	:= MX8 (bcompbit, srcd[7], srcd[6], srcd[5],
3248 srcd[4], srcd[3], srcd[2], srcd[1], srcd[0], bcompselt[0..2]);
3249 Bcompbit\	:= INV1 (bcompbit\, bcompbit);*/
3250    ////////////////////////////////////// C++ CODE //////////////////////////////////////
3251    uint8_t bcompselt = (big_pix ? ~icount : icount) & 0x07;
3252    uint8_t bitmask[8] = { 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01 };
3253    bool bcompbit = srcd & bitmask[bcompselt];
3254    bool winhibit, di0t0_1, di0t4, di1t2, di2t0_1, di2t4, di3t2;
3255    bool di4t0_1, di4t4, di5t2;
3256    bool di6t0_1, di6t4;
3257    bool di7t2;
3258 
3259    //////////////////////////////////////////////////////////////////////////////////////
3260 
3261    /* pipe-line the count */
3262    /*Bcompsel[0-2]	:= FDSYNC (bcompsel[0-2], bcompselt[0-2], step_inner, clk);
3263 Bcompbt		:= MX8 (bcompbitpt, srcd[7], srcd[6], srcd[5],
3264 srcd[4], srcd[3], srcd[2], srcd[1], srcd[0], bcompsel[0..2]);
3265 Bcompbitp	:= FD1Q (bcompbitp, bcompbitpt, clk);
3266 Bcompbitp\	:= INV1 (bcompbitp\, bcompbitp);*/
3267 
3268    /* For pixel mode, generate the write inhibit signal for all modes
3269       on bit inhibit, for 8 and 16 bit modes on comparator inhibit, and
3270       for 16 bit mode on Z inhibit
3271 
3272       Nowrite = bcompen . /bcompbit . /phrase_mode
3273       + dcompen . dcomp[0] . /phrase_mode . pixsize = 011
3274       + dcompen . dcomp[0..1] . /phrase_mode . pixsize = 100
3275       + zcomp[0] . /phrase_mode . pixsize = 100
3276       */
3277 
3278    /*Nowt0		:= NAN3 (nowt[0], bcompen, bcompbit\, phrase_mode\);
3279 Nowt1		:= ND6  (nowt[1], dcompen, dcomp[0], phrase_mode\, pixsize\[2], pixsize[0..1]);
3280 Nowt2		:= ND7  (nowt[2], dcompen, dcomp[0..1], phrase_mode\, pixsize[2], pixsize\[0..1]);
3281 Nowt3		:= NAN5 (nowt[3], zcomp[0], phrase_mode\, pixsize[2], pixsize\[0..1]);
3282 Nowt4		:= NAN4 (nowt[4], nowt[0..3]);
3283 Nowrite		:= AN2  (nowrite, nowt[4], bkgwren\);*/
3284    ////////////////////////////////////// C++ CODE //////////////////////////////////////
3285    *nowrite = ((bcompen && !bcompbit && !phrase_mode)
3286          || (dcompen && (dcomp & 0x01) && !phrase_mode && (pixsize == 3))
3287          || (dcompen && ((dcomp & 0x03) == 0x03) && !phrase_mode && (pixsize == 4))
3288          || ((zcomp & 0x01) && !phrase_mode && (pixsize == 4)))
3289       && !bkgwren;
3290    //////////////////////////////////////////////////////////////////////////////////////
3291 
3292    /*Winht		:= NAN3 (winht, bcompen, bcompbitp\, phrase_mode\);
3293 Winhibit	:= NAN4 (winhibit, winht, nowt[1..3]);*/
3294    ////////////////////////////////////// C++ CODE //////////////////////////////////////
3295    //This is the same as above, but with bcompbit delayed one tick and called 'winhibit'
3296    //Small difference: Besides the pipeline effect, it's also not using !bkgwren...
3297    //	bool winhibit = (bcompen && !
3298    winhibit = (bcompen && !bcompbit && !phrase_mode)
3299       || (dcompen && (dcomp & 0x01) && !phrase_mode && (pixsize == 3))
3300       || (dcompen && ((dcomp & 0x03) == 0x03) && !phrase_mode && (pixsize == 4))
3301       || ((zcomp & 0x01) && !phrase_mode && (pixsize == 4));
3302    //////////////////////////////////////////////////////////////////////////////////////
3303 
3304    /* For phrase mode, generate the byte inhibit signals for eight bit
3305       mode 011, or sixteen bit mode 100
3306       dbinh\[0] =  pixsize[2] . zcomp[0]
3307       +  pixsize[2] . dcomp[0] . dcomp[1] . dcompen
3308       + /pixsize[2] . dcomp[0] . dcompen
3309       + /srcd[0] . bcompen
3310 
3311       Inhibits 0-3 are also used when not in phrase mode to write back
3312       destination data.
3313       */
3314 
3315    /*Srcd\[0-7]	:= INV1 (srcd\[0-7], srcd[0-7]);
3316 
3317 Di0t0		:= NAN2H (di0t[0], pixsize[2], zcomp[0]);
3318 Di0t1		:= NAN4H (di0t[1], pixsize[2], dcomp[0..1], dcompen);
3319 Di0t2		:= NAN2 (di0t[2], srcd\[0], bcompen);
3320 Di0t3		:= NAN3 (di0t[3], pixsize\[2], dcomp[0], dcompen);
3321 Di0t4		:= NAN4 (di0t[4], di0t[0..3]);
3322 Dbinh[0]	:= ANR1P (dbinh\[0], di0t[4], phrase_mode, winhibit);*/
3323    ////////////////////////////////////// C++ CODE //////////////////////////////////////
3324    *dbinh = 0;
3325    di0t0_1 = ((pixsize & 0x04) && (zcomp & 0x01))
3326       || ((pixsize & 0x04) && (dcomp & 0x01) && (dcomp & 0x02) && dcompen);
3327    di0t4 = di0t0_1
3328       || (!(srcd & 0x01) && bcompen)
3329       || (!(pixsize & 0x04) && (dcomp & 0x01) && dcompen);
3330    *dbinh |= (!((di0t4 && phrase_mode) || winhibit) ? 0x01 : 0x00);
3331    //////////////////////////////////////////////////////////////////////////////////////
3332 
3333    /*Di1t0		:= NAN3 (di1t[0], pixsize\[2], dcomp[1], dcompen);
3334 Di1t1		:= NAN2 (di1t[1], srcd\[1], bcompen);
3335 Di1t2		:= NAN4 (di1t[2], di0t[0..1], di1t[0..1]);
3336 Dbinh[1]	:= ANR1 (dbinh\[1], di1t[2], phrase_mode, winhibit);*/
3337    ////////////////////////////////////// C++ CODE //////////////////////////////////////
3338    di1t2 = di0t0_1
3339       || (!(srcd & 0x02) && bcompen)
3340       || (!(pixsize & 0x04) && (dcomp & 0x02) && dcompen);
3341    *dbinh |= (!((di1t2 && phrase_mode) || winhibit) ? 0x02 : 0x00);
3342    //////////////////////////////////////////////////////////////////////////////////////
3343 
3344    /*Di2t0		:= NAN2H (di2t[0], pixsize[2], zcomp[1]);
3345 Di2t1		:= NAN4H (di2t[1], pixsize[2], dcomp[2..3], dcompen);
3346 Di2t2		:= NAN2 (di2t[2], srcd\[2], bcompen);
3347 Di2t3		:= NAN3 (di2t[3], pixsize\[2], dcomp[2], dcompen);
3348 Di2t4		:= NAN4 (di2t[4], di2t[0..3]);
3349 Dbinh[2]	:= ANR1 (dbinh\[2], di2t[4], phrase_mode, winhibit);*/
3350    ////////////////////////////////////// C++ CODE //////////////////////////////////////
3351    //[bcompen=F dcompen=T phrase_mode=T bkgwren=F][nw=F wi=F]
3352    //[di0t0_1=F di0t4=F][di1t2=F][di2t0_1=T di2t4=T][di3t2=T][di4t0_1=F di2t4=F][di5t2=F][di6t0_1=F di6t4=F][di7t2=F]
3353    //[dcomp=$00 dbinh=$0C][7804780400007804] (icount=0005, inc=4)
3354    di2t0_1 = ((pixsize & 0x04) && (zcomp & 0x02))
3355       || ((pixsize & 0x04) && (dcomp & 0x04) && (dcomp & 0x08) && dcompen);
3356    di2t4 = di2t0_1
3357       || (!(srcd & 0x04) && bcompen)
3358       || (!(pixsize & 0x04) && (dcomp & 0x04) && dcompen);
3359    *dbinh |= (!((di2t4 && phrase_mode) || winhibit) ? 0x04 : 0x00);
3360    //////////////////////////////////////////////////////////////////////////////////////
3361 
3362    /*Di3t0		:= NAN3 (di3t[0], pixsize\[2], dcomp[3], dcompen);
3363 Di3t1		:= NAN2 (di3t[1], srcd\[3], bcompen);
3364 Di3t2		:= NAN4 (di3t[2], di2t[0..1], di3t[0..1]);
3365 Dbinh[3]	:= ANR1 (dbinh\[3], di3t[2], phrase_mode, winhibit);*/
3366    ////////////////////////////////////// C++ CODE //////////////////////////////////////
3367    di3t2 = di2t0_1
3368       || (!(srcd & 0x08) && bcompen)
3369       || (!(pixsize & 0x04) && (dcomp & 0x08) && dcompen);
3370    *dbinh |= (!((di3t2 && phrase_mode) || winhibit) ? 0x08 : 0x00);
3371    //////////////////////////////////////////////////////////////////////////////////////
3372 
3373    /*Di4t0		:= NAN2H (di4t[0], pixsize[2], zcomp[2]);
3374 Di4t1		:= NAN4H (di4t[1], pixsize[2], dcomp[4..5], dcompen);
3375 Di4t2		:= NAN2 (di4t[2], srcd\[4], bcompen);
3376 Di4t3		:= NAN3 (di4t[3], pixsize\[2], dcomp[4], dcompen);
3377 Di4t4		:= NAN4 (di4t[4], di4t[0..3]);
3378 Dbinh[4]	:= NAN2 (dbinh\[4], di4t[4], phrase_mode);*/
3379    ////////////////////////////////////// C++ CODE //////////////////////////////////////
3380    di4t0_1 = ((pixsize & 0x04) && (zcomp & 0x04))
3381       || ((pixsize & 0x04) && (dcomp & 0x10) && (dcomp & 0x20) && dcompen);
3382    di4t4 = di4t0_1
3383       || (!(srcd & 0x10) && bcompen)
3384       || (!(pixsize & 0x04) && (dcomp & 0x10) && dcompen);
3385    *dbinh |= (!(di4t4 && phrase_mode) ? 0x10 : 0x00);
3386    //////////////////////////////////////////////////////////////////////////////////////
3387 
3388    /*Di5t0		:= NAN3 (di5t[0], pixsize\[2], dcomp[5], dcompen);
3389 Di5t1		:= NAN2 (di5t[1], srcd\[5], bcompen);
3390 Di5t2		:= NAN4 (di5t[2], di4t[0..1], di5t[0..1]);
3391 Dbinh[5]	:= NAN2 (dbinh\[5], di5t[2], phrase_mode);*/
3392    ////////////////////////////////////// C++ CODE //////////////////////////////////////
3393    di5t2 = di4t0_1
3394       || (!(srcd & 0x20) && bcompen)
3395       || (!(pixsize & 0x04) && (dcomp & 0x20) && dcompen);
3396    *dbinh |= (!(di5t2 && phrase_mode) ? 0x20 : 0x00);
3397    //////////////////////////////////////////////////////////////////////////////////////
3398 
3399    /*Di6t0		:= NAN2H (di6t[0], pixsize[2], zcomp[3]);
3400 Di6t1		:= NAN4H (di6t[1], pixsize[2], dcomp[6..7], dcompen);
3401 Di6t2		:= NAN2 (di6t[2], srcd\[6], bcompen);
3402 Di6t3		:= NAN3 (di6t[3], pixsize\[2], dcomp[6], dcompen);
3403 Di6t4		:= NAN4 (di6t[4], di6t[0..3]);
3404 Dbinh[6]	:= NAN2 (dbinh\[6], di6t[4], phrase_mode);*/
3405    ////////////////////////////////////// C++ CODE //////////////////////////////////////
3406    di6t0_1 = ((pixsize & 0x04) && (zcomp & 0x08))
3407       || ((pixsize & 0x04) && (dcomp & 0x40) && (dcomp & 0x80) && dcompen);
3408    di6t4 = di6t0_1
3409       || (!(srcd & 0x40) && bcompen)
3410       || (!(pixsize & 0x04) && (dcomp & 0x40) && dcompen);
3411    *dbinh |= (!(di6t4 && phrase_mode) ? 0x40 : 0x00);
3412    //////////////////////////////////////////////////////////////////////////////////////
3413 
3414    /*Di7t0		:= NAN3 (di7t[0], pixsize\[2], dcomp[7], dcompen);
3415 Di7t1		:= NAN2 (di7t[1], srcd\[7], bcompen);
3416 Di7t2		:= NAN4 (di7t[2], di6t[0..1], di7t[0..1]);
3417 Dbinh[7]	:= NAN2 (dbinh\[7], di7t[2], phrase_mode);*/
3418    ////////////////////////////////////// C++ CODE //////////////////////////////////////
3419    di7t2 = di6t0_1
3420       || (!(srcd & 0x80) && bcompen)
3421       || (!(pixsize & 0x04) && (dcomp & 0x80) && dcompen);
3422    *dbinh |= (!(di7t2 && phrase_mode) ? 0x80 : 0x00);
3423    //////////////////////////////////////////////////////////////////////////////////////
3424 
3425    //END;
3426    //kludge
3427    *dbinh = ~*dbinh;
3428 }
3429 
3430 #endif
3431