1 //
2 // Blitter core
3 //
4 // by James Hammons
5 // (C) 2010 Underground Software
6 //
7 // JLH = James Hammons <jlhamm@acm.org>
8 //
9 // Who When What
10 // --- ---------- -------------------------------------------------------------
11 // JLH 01/16/2010 Created this log ;-)
12 //
13
14 //
15 // I owe a debt of gratitude to Curt Vendel and to John Mathieson--to Curt
16 // for supplying the Oberon ASIC nets and to John for making them available
17 // to Curt. ;-) Without that excellent documentation which shows *exactly*
18 // what's going on inside the TOM chip, we'd all still be guessing as to how
19 // the wily blitter and other pieces of the Jaguar puzzle actually work.
20 // Now how about those JERRY ASIC nets gentlemen...? [We have those now!] ;-)
21 //
22
23 #include "blitter.h"
24
25 #include <stdlib.h>
26 #include <stdio.h>
27 #include <string.h>
28 #include "jaguar.h"
29 #include "log.h"
30 #include "settings.h"
31
32 // Various conditional compilation goodies...
33
34 #define USE_ORIGINAL_BLITTER
35 #define USE_MIDSUMMER_BLITTER_MKII
36
37 // External global variables
38
39 extern int jaguar_active_memory_dumps;
40
41 // Local global variables
42
43 // Blitter register RAM (most of it is hidden from the user)
44
45 static uint8_t blitter_ram[0x100];
46
47 // Other crapola
48
49 void BlitterMidsummer(uint32_t cmd);
50 void BlitterMidsummer2(void);
51
52 #define REG(A) (((uint32_t)blitter_ram[(A)] << 24) | ((uint32_t)blitter_ram[(A)+1] << 16) \
53 | ((uint32_t)blitter_ram[(A)+2] << 8) | (uint32_t)blitter_ram[(A)+3])
54 #define WREG(A,D) (blitter_ram[(A)] = ((D)>>24)&0xFF, blitter_ram[(A)+1] = ((D)>>16)&0xFF, \
55 blitter_ram[(A)+2] = ((D)>>8)&0xFF, blitter_ram[(A)+3] = (D)&0xFF)
56
57 // Blitter registers (offsets from F02200)
58
59 #define A1_BASE ((uint32_t)0x00)
60 #define A1_FLAGS ((uint32_t)0x04)
61 #define A1_CLIP ((uint32_t)0x08) // Height and width values for clipping
62 #define A1_PIXEL ((uint32_t)0x0C) // Integer part of the pixel (Y.i and X.i)
63 #define A1_STEP ((uint32_t)0x10) // Integer part of the step
64 #define A1_FSTEP ((uint32_t)0x14) // Fractional part of the step
65 #define A1_FPIXEL ((uint32_t)0x18) // Fractional part of the pixel (Y.f and X.f)
66 #define A1_INC ((uint32_t)0x1C) // Integer part of the increment
67 #define A1_FINC ((uint32_t)0x20) // Fractional part of the increment
68 #define A2_BASE ((uint32_t)0x24)
69 #define A2_FLAGS ((uint32_t)0x28)
70 #define A2_MASK ((uint32_t)0x2C) // Modulo values for x and y (M.y and M.x)
71 #define A2_PIXEL ((uint32_t)0x30) // Integer part of the pixel (no fractional part for A2)
72 #define A2_STEP ((uint32_t)0x34) // Integer part of the step (no fractional part for A2)
73 #define COMMAND ((uint32_t)0x38)
74 #define PIXLINECOUNTER ((uint32_t)0x3C) // Inner & outer loop values
75 #define SRCDATA ((uint32_t)0x40)
76 #define DSTDATA ((uint32_t)0x48)
77 #define DSTZ ((uint32_t)0x50)
78 #define SRCZINT ((uint32_t)0x58)
79 #define SRCZFRAC ((uint32_t)0x60)
80 #define PATTERNDATA ((uint32_t)0x68)
81 #define INTENSITYINC ((uint32_t)0x70)
82 #define ZINC ((uint32_t)0x74)
83 #define COLLISIONCTRL ((uint32_t)0x78)
84 #define PHRASEINT0 ((uint32_t)0x7C)
85 #define PHRASEINT1 ((uint32_t)0x80)
86 #define PHRASEINT2 ((uint32_t)0x84)
87 #define PHRASEINT3 ((uint32_t)0x88)
88 #define PHRASEZ0 ((uint32_t)0x8C)
89 #define PHRASEZ1 ((uint32_t)0x90)
90 #define PHRASEZ2 ((uint32_t)0x94)
91 #define PHRASEZ3 ((uint32_t)0x98)
92
93 // Blitter command bits
94
95 #define SRCEN (cmd & 0x00000001)
96 #define SRCENZ (cmd & 0x00000002)
97 #define SRCENX (cmd & 0x00000004)
98 #define DSTEN (cmd & 0x00000008)
99 #define DSTENZ (cmd & 0x00000010)
100 #define DSTWRZ (cmd & 0x00000020)
101 #define CLIPA1 (cmd & 0x00000040)
102
103 #define UPDA1F (cmd & 0x00000100)
104 #define UPDA1 (cmd & 0x00000200)
105 #define UPDA2 (cmd & 0x00000400)
106
107 #define DSTA2 (cmd & 0x00000800)
108
109 #define Z_OP_INF (cmd & 0x00040000)
110 #define Z_OP_EQU (cmd & 0x00080000)
111 #define Z_OP_SUP (cmd & 0x00100000)
112
113 #define LFU_NAN (cmd & 0x00200000)
114 #define LFU_NA (cmd & 0x00400000)
115 #define LFU_AN (cmd & 0x00800000)
116 #define LFU_A (cmd & 0x01000000)
117
118 #define CMPDST (cmd & 0x02000000)
119 #define BCOMPEN (cmd & 0x04000000)
120 #define DCOMPEN (cmd & 0x08000000)
121
122 #define PATDSEL (cmd & 0x00010000)
123 #define ADDDSEL (cmd & 0x00020000)
124 #define TOPBEN (cmd & 0x00004000)
125 #define TOPNEN (cmd & 0x00008000)
126 #define BKGWREN (cmd & 0x10000000)
127 #define GOURD (cmd & 0x00001000)
128 #define GOURZ (cmd & 0x00002000)
129 #define SRCSHADE (cmd & 0x40000000)
130
131
132 #define XADDPHR 0
133 #define XADDPIX 1
134 #define XADD0 2
135 #define XADDINC 3
136
137 #define XSIGNSUB_A1 (REG(A1_FLAGS)&0x080000)
138 #define XSIGNSUB_A2 (REG(A2_FLAGS)&0x080000)
139
140 #define YSIGNSUB_A1 (REG(A1_FLAGS)&0x100000)
141 #define YSIGNSUB_A2 (REG(A2_FLAGS)&0x100000)
142
143 #define YADD1_A1 (REG(A1_FLAGS)&0x040000)
144 #define YADD1_A2 (REG(A2_FLAGS)&0x040000)
145
146 /*******************************************************************************
147 ********************** STUFF CUT BELOW THIS LINE! ******************************
148 *******************************************************************************/
149 #ifdef USE_ORIGINAL_BLITTER // We're ditching this crap for now...
150
151 //Put 'em back, once we fix the problem!!! [KO]
152 // 1 bpp pixel read
153 #define PIXEL_SHIFT_1(a) (((~a##_x) >> 16) & 7)
154 #define PIXEL_OFFSET_1(a) (((((uint32_t)a##_y >> 16) * a##_width / 8) + (((uint32_t)a##_x >> 19) & ~7)) * (1 + a##_pitch) + (((uint32_t)a##_x >> 19) & 7))
155 #define READ_PIXEL_1(a) ((JaguarReadByte(a##_addr+PIXEL_OFFSET_1(a), BLITTER) >> PIXEL_SHIFT_1(a)) & 0x01)
156
157 // 2 bpp pixel read
158 #define PIXEL_SHIFT_2(a) (((~a##_x) >> 15) & 6)
159 #define PIXEL_OFFSET_2(a) (((((uint32_t)a##_y >> 16) * a##_width / 4) + (((uint32_t)a##_x >> 18) & ~7)) * (1 + a##_pitch) + (((uint32_t)a##_x >> 18) & 7))
160 #define READ_PIXEL_2(a) ((JaguarReadByte(a##_addr+PIXEL_OFFSET_2(a), BLITTER) >> PIXEL_SHIFT_2(a)) & 0x03)
161
162 // 4 bpp pixel read
163 #define PIXEL_SHIFT_4(a) (((~a##_x) >> 14) & 4)
164 #define PIXEL_OFFSET_4(a) (((((uint32_t)a##_y >> 16) * (a##_width/2)) + (((uint32_t)a##_x >> 17) & ~7)) * (1 + a##_pitch) + (((uint32_t)a##_x >> 17) & 7))
165 #define READ_PIXEL_4(a) ((JaguarReadByte(a##_addr+PIXEL_OFFSET_4(a), BLITTER) >> PIXEL_SHIFT_4(a)) & 0x0f)
166
167 // 8 bpp pixel read
168 #define PIXEL_OFFSET_8(a) (((((uint32_t)a##_y >> 16) * a##_width) + (((uint32_t)a##_x >> 16) & ~7)) * (1 + a##_pitch) + (((uint32_t)a##_x >> 16) & 7))
169 #define READ_PIXEL_8(a) (JaguarReadByte(a##_addr+PIXEL_OFFSET_8(a), BLITTER))
170
171 // 16 bpp pixel read
172 #define PIXEL_OFFSET_16(a) (((((uint32_t)a##_y >> 16) * a##_width) + (((uint32_t)a##_x >> 16) & ~3)) * (1 + a##_pitch) + (((uint32_t)a##_x >> 16) & 3))
173 #define READ_PIXEL_16(a) (JaguarReadWord(a##_addr+(PIXEL_OFFSET_16(a)<<1), BLITTER))
174
175 // 32 bpp pixel read
176 #define PIXEL_OFFSET_32(a) (((((uint32_t)a##_y >> 16) * a##_width) + (((uint32_t)a##_x >> 16) & ~1)) * (1 + a##_pitch) + (((uint32_t)a##_x >> 16) & 1))
177 #define READ_PIXEL_32(a) (JaguarReadLong(a##_addr+(PIXEL_OFFSET_32(a)<<2), BLITTER))
178
179 // pixel read
180 #define READ_PIXEL(a,f) (\
181 (((f>>3)&0x07) == 0) ? (READ_PIXEL_1(a)) : \
182 (((f>>3)&0x07) == 1) ? (READ_PIXEL_2(a)) : \
183 (((f>>3)&0x07) == 2) ? (READ_PIXEL_4(a)) : \
184 (((f>>3)&0x07) == 3) ? (READ_PIXEL_8(a)) : \
185 (((f>>3)&0x07) == 4) ? (READ_PIXEL_16(a)) : \
186 (((f>>3)&0x07) == 5) ? (READ_PIXEL_32(a)) : 0)
187
188 // 16 bpp z data read
189 #define ZDATA_OFFSET_16(a) (PIXEL_OFFSET_16(a) + a##_zoffs * 4)
190 #define READ_ZDATA_16(a) (JaguarReadWord(a##_addr+(ZDATA_OFFSET_16(a)<<1), BLITTER))
191
192 // z data read
193 #define READ_ZDATA(a,f) (READ_ZDATA_16(a))
194
195 // 16 bpp z data write
196 #define WRITE_ZDATA_16(a,d) { JaguarWriteWord(a##_addr+(ZDATA_OFFSET_16(a)<<1), d, BLITTER); }
197
198 // z data write
199 #define WRITE_ZDATA(a,f,d) WRITE_ZDATA_16(a,d);
200
201 // 1 bpp r data read
202 #define READ_RDATA_1(r,a,p) ((p) ? ((REG(r+(((uint32_t)a##_x >> 19) & 0x04))) >> (((uint32_t)a##_x >> 16) & 0x1F)) & 0x0001 : (REG(r) & 0x0001))
203
204 // 2 bpp r data read
205 #define READ_RDATA_2(r,a,p) ((p) ? ((REG(r+(((uint32_t)a##_x >> 18) & 0x04))) >> (((uint32_t)a##_x >> 15) & 0x3E)) & 0x0003 : (REG(r) & 0x0003))
206
207 // 4 bpp r data read
208 #define READ_RDATA_4(r,a,p) ((p) ? ((REG(r+(((uint32_t)a##_x >> 17) & 0x04))) >> (((uint32_t)a##_x >> 14) & 0x28)) & 0x000F : (REG(r) & 0x000F))
209
210 // 8 bpp r data read
211 #define READ_RDATA_8(r,a,p) ((p) ? ((REG(r+(((uint32_t)a##_x >> 16) & 0x04))) >> (((uint32_t)a##_x >> 13) & 0x18)) & 0x00FF : (REG(r) & 0x00FF))
212
213 // 16 bpp r data read
214 #define READ_RDATA_16(r,a,p) ((p) ? ((REG(r+(((uint32_t)a##_x >> 15) & 0x04))) >> (((uint32_t)a##_x >> 12) & 0x10)) & 0xFFFF : (REG(r) & 0xFFFF))
215
216 // 32 bpp r data read
217 #define READ_RDATA_32(r,a,p) ((p) ? REG(r+(((uint32_t)a##_x >> 14) & 0x04)) : REG(r))
218
219 // register data read
220 #define READ_RDATA(r,a,f,p) (\
221 (((f>>3)&0x07) == 0) ? (READ_RDATA_1(r,a,p)) : \
222 (((f>>3)&0x07) == 1) ? (READ_RDATA_2(r,a,p)) : \
223 (((f>>3)&0x07) == 2) ? (READ_RDATA_4(r,a,p)) : \
224 (((f>>3)&0x07) == 3) ? (READ_RDATA_8(r,a,p)) : \
225 (((f>>3)&0x07) == 4) ? (READ_RDATA_16(r,a,p)) : \
226 (((f>>3)&0x07) == 5) ? (READ_RDATA_32(r,a,p)) : 0)
227
228 // 1 bpp pixel write
229 #define WRITE_PIXEL_1(a,d) { JaguarWriteByte(a##_addr+PIXEL_OFFSET_1(a), (JaguarReadByte(a##_addr+PIXEL_OFFSET_1(a), BLITTER)&(~(0x01 << PIXEL_SHIFT_1(a))))|(d<<PIXEL_SHIFT_1(a)), BLITTER); }
230
231 // 2 bpp pixel write
232 #define WRITE_PIXEL_2(a,d) { JaguarWriteByte(a##_addr+PIXEL_OFFSET_2(a), (JaguarReadByte(a##_addr+PIXEL_OFFSET_2(a), BLITTER)&(~(0x03 << PIXEL_SHIFT_2(a))))|(d<<PIXEL_SHIFT_2(a)), BLITTER); }
233
234 // 4 bpp pixel write
235 #define WRITE_PIXEL_4(a,d) { JaguarWriteByte(a##_addr+PIXEL_OFFSET_4(a), (JaguarReadByte(a##_addr+PIXEL_OFFSET_4(a), BLITTER)&(~(0x0f << PIXEL_SHIFT_4(a))))|(d<<PIXEL_SHIFT_4(a)), BLITTER); }
236
237 // 8 bpp pixel write
238 #define WRITE_PIXEL_8(a,d) { JaguarWriteByte(a##_addr+PIXEL_OFFSET_8(a), d, BLITTER); }
239
240 // 16 bpp pixel write
241 #define WRITE_PIXEL_16(a,d) { JaguarWriteWord(a##_addr+(PIXEL_OFFSET_16(a)<<1), d, BLITTER); }
242
243 // 32 bpp pixel write
244 #define WRITE_PIXEL_32(a,d) { JaguarWriteLong(a##_addr+(PIXEL_OFFSET_32(a)<<2), d, BLITTER); }
245
246 // pixel write
247 #define WRITE_PIXEL(a,f,d) {\
248 switch ((f>>3)&0x07) { \
249 case 0: WRITE_PIXEL_1(a,d); break; \
250 case 1: WRITE_PIXEL_2(a,d); break; \
251 case 2: WRITE_PIXEL_4(a,d); break; \
252 case 3: WRITE_PIXEL_8(a,d); break; \
253 case 4: WRITE_PIXEL_16(a,d); break; \
254 case 5: WRITE_PIXEL_32(a,d); break; \
255 }}
256
257 static uint8_t src;
258 static uint8_t dst;
259 static uint8_t misc;
260 static uint8_t a1ctl;
261 static uint8_t mode;
262 static uint8_t ity;
263 static uint8_t zop;
264 static uint8_t op;
265 static uint8_t ctrl;
266 static uint32_t a1_addr;
267 static uint32_t a2_addr;
268 static int32_t a1_zoffs;
269 static int32_t a2_zoffs;
270 static uint32_t xadd_a1_control;
271 static uint32_t xadd_a2_control;
272 static int32_t a1_pitch;
273 static int32_t a2_pitch;
274 static uint32_t n_pixels;
275 static uint32_t n_lines;
276 static int32_t a1_x;
277 static int32_t a1_y;
278 static int32_t a1_width;
279 static int32_t a2_x;
280 static int32_t a2_y;
281 static int32_t a2_width;
282 static int32_t a2_mask_x;
283 static int32_t a2_mask_y;
284 static int32_t a1_xadd;
285 static int32_t a1_yadd;
286 static int32_t a2_xadd;
287 static int32_t a2_yadd;
288 static uint8_t a1_phrase_mode;
289 static uint8_t a2_phrase_mode;
290 static int32_t a1_step_x = 0;
291 static int32_t a1_step_y = 0;
292 static int32_t a2_step_x = 0;
293 static int32_t a2_step_y = 0;
294 static uint32_t outer_loop;
295 static uint32_t inner_loop;
296 static uint32_t a2_psize;
297 static uint32_t a1_psize;
298 static uint32_t gouraud_add;
299 static int gd_i[4];
300 static int gd_c[4];
301 static int gd_ia, gd_ca;
302 static int colour_index = 0;
303 static int32_t zadd;
304 static uint32_t z_i[4];
305
306 static int32_t a1_clip_x, a1_clip_y;
307
308 // In the spirit of "get it right first, *then* optimize" I've taken the liberty
309 // of removing all the unnecessary code caching. If it turns out to be a good way
310 // to optimize the blitter, then we may revisit it in the future...
311
312 // Generic blit handler
blitter_generic(uint32_t cmd)313 void blitter_generic(uint32_t cmd)
314 {
315 uint32_t srcdata, srczdata, dstdata, dstzdata, writedata, inhibit;
316 uint32_t bppSrc = (DSTA2 ? 1 << ((REG(A1_FLAGS) >> 3) & 0x07) : 1 << ((REG(A2_FLAGS) >> 3) & 0x07));
317
318 while (outer_loop--)
319 {
320 uint32_t a1_start = a1_x, a2_start = a2_x, bitPos = 0;
321
322 //Kludge for Hover Strike...
323 //I wonder if this kludge is in conjunction with the SRCENX down below...
324 // This isn't so much a kludge but the way things work in BCOMPEN mode...!
325 if (BCOMPEN && SRCENX)
326 {
327 if (n_pixels < bppSrc)
328 bitPos = bppSrc - n_pixels;
329 }
330
331 inner_loop = n_pixels;
332 while (inner_loop--)
333 {
334 srcdata = srczdata = dstdata = dstzdata = writedata = inhibit = 0;
335
336 if (!DSTA2) // Data movement: A1 <- A2
337 {
338 // load src data and Z
339 // if (SRCEN)
340 if (SRCEN || SRCENX) // Not sure if this is correct... (seems to be...!)
341 {
342 srcdata = READ_PIXEL(a2, REG(A2_FLAGS));
343
344 if (SRCENZ)
345 srczdata = READ_ZDATA(a2, REG(A2_FLAGS));
346 else if (cmd & 0x0001C020) // PATDSEL | TOPBEN | TOPNEN | DSTWRZ
347 srczdata = READ_RDATA(SRCZINT, a2, REG(A2_FLAGS), a2_phrase_mode);
348 }
349 else // Use SRCDATA register...
350 {
351 srcdata = READ_RDATA(SRCDATA, a2, REG(A2_FLAGS), a2_phrase_mode);
352
353 if (cmd & 0x0001C020) // PATDSEL | TOPBEN | TOPNEN | DSTWRZ
354 srczdata = READ_RDATA(SRCZINT, a2, REG(A2_FLAGS), a2_phrase_mode);
355 }
356
357 // load dst data and Z
358 if (DSTEN)
359 {
360 dstdata = READ_PIXEL(a1, REG(A1_FLAGS));
361
362 if (DSTENZ)
363 dstzdata = READ_ZDATA(a1, REG(A1_FLAGS));
364 else
365 dstzdata = READ_RDATA(DSTZ, a1, REG(A1_FLAGS), a1_phrase_mode);
366 }
367 else
368 {
369 dstdata = READ_RDATA(DSTDATA, a1, REG(A1_FLAGS), a1_phrase_mode);
370
371 if (DSTENZ)
372 dstzdata = READ_RDATA(DSTZ, a1, REG(A1_FLAGS), a1_phrase_mode);
373 }
374
375 if (GOURZ)
376 srczdata = z_i[colour_index] >> 16;
377
378 // apply z comparator
379 if (Z_OP_INF && srczdata < dstzdata) inhibit = 1;
380 if (Z_OP_EQU && srczdata == dstzdata) inhibit = 1;
381 if (Z_OP_SUP && srczdata > dstzdata) inhibit = 1;
382
383 // apply data comparator
384 // Note: DCOMPEN only works in 8/16 bpp modes! !!! FIX !!!
385 // Does BCOMPEN only work in 1 bpp mode???
386 // No, but it always does a 1 bit expansion no matter what the BPP of the channel is set to. !!! FIX !!!
387 // This is bit tricky... We need to fix the XADD value so that it acts like a 1BPP value while inside
388 // an 8BPP space.
389 if (DCOMPEN | BCOMPEN)
390 {
391 //Temp, for testing Hover Strike
392 //Doesn't seem to do it... Why?
393 //What needs to happen here is twofold. First, the address generator in the outer loop has
394 //to honor the BPP when calculating the start address (which it kinda does already). Second,
395 //it has to step bit by bit when using BCOMPEN. How to do this???
396 if (BCOMPEN)
397 //small problem with this approach: it's not accurate... We need a proper address to begin with
398 //and *then* we can do the bit stepping from there the way it's *supposed* to be done... !!! FIX !!!
399 //[DONE]
400 {
401 uint32_t pixShift = (~bitPos) & (bppSrc - 1);
402 srcdata = (srcdata >> pixShift) & 0x01;
403
404 bitPos++;
405 }
406
407 if (!CMPDST)
408 {
409 if (srcdata == 0)
410 inhibit = 1;//*/
411 }
412 else
413 {
414 // compare destination pixel with pattern pixel
415 if (dstdata == READ_RDATA(PATTERNDATA, a1, REG(A1_FLAGS), a1_phrase_mode))
416 // if (dstdata != READ_RDATA(PATTERNDATA, a1, REG(A1_FLAGS), a1_phrase_mode))
417 inhibit = 1;
418 }
419 }
420
421 if (CLIPA1)
422 {
423 inhibit |= (((a1_x >> 16) < a1_clip_x && (a1_x >> 16) >= 0
424 && (a1_y >> 16) < a1_clip_y && (a1_y >> 16) >= 0) ? 0 : 1);
425 }
426
427 // compute the write data and store
428 if (!inhibit)
429 {
430 // Houston, we have a problem...
431 // Look here, at PATDSEL and GOURD. If both are active (as they are on the BIOS intro), then there's
432 // a conflict! E.g.:
433 //Blit! (00100000 <- 000095D0) count: 3 x 1, A1/2_FLAGS: 00014220/00004020 [cmd: 00011008]
434 // CMD -> src: dst: DSTEN misc: a1ctl: mode: GOURD ity: PATDSEL z-op: op: LFU_CLEAR ctrl:
435 // A1 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
436 // A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 256 (20), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
437 // A1 x/y: 90/171, A2 x/y: 808/0 Pattern: 776D770077007700
438
439 if (PATDSEL)
440 {
441 // use pattern data for write data
442 writedata = READ_RDATA(PATTERNDATA, a1, REG(A1_FLAGS), a1_phrase_mode);
443 }
444 else if (ADDDSEL)
445 {
446 writedata = (srcdata & 0xFF) + (dstdata & 0xFF);
447
448 if (!TOPBEN)
449 {
450 //This is correct now, but slow...
451 int16_t s = (srcdata & 0xFF) | ((srcdata & 0x80) ? 0xFF00 : 0x0000),
452 d = dstdata & 0xFF;
453 int16_t sum = s + d;
454
455 if (sum < 0)
456 writedata = 0x00;
457 else if (sum > 0xFF)
458 writedata = 0xFF;
459 else
460 writedata = (uint32_t)sum;
461 }
462
463 //This doesn't seem right... Looks like it would muck up the low byte... !!! FIX !!!
464 writedata |= (srcdata & 0xF00) + (dstdata & 0xF00);
465
466 if (!TOPNEN && writedata > 0xFFF)
467 writedata &= 0xFFF;
468
469 writedata |= (srcdata & 0xF000) + (dstdata & 0xF000);
470 }
471 else
472 {
473 if (LFU_NAN) writedata |= ~srcdata & ~dstdata;
474 if (LFU_NA) writedata |= ~srcdata & dstdata;
475 if (LFU_AN) writedata |= srcdata & ~dstdata;
476 if (LFU_A) writedata |= srcdata & dstdata;
477 }
478
479 //Although, this looks like it's OK... (even if it is shitty!)
480 //According to JTRM, this is part of the four things the blitter does with the write data (the other
481 //three being PATDSEL, ADDDSEL, and LFU (default). I'm not sure which gets precedence, this or PATDSEL
482 //(see above blit example)...
483 if (GOURD)
484 writedata = ((gd_c[colour_index]) << 8) | (gd_i[colour_index] >> 16);
485
486 if (SRCSHADE)
487 {
488 int intensity = srcdata & 0xFF;
489 int ia = gd_ia >> 16;
490 if (ia & 0x80)
491 ia = 0xFFFFFF00 | ia;
492 intensity += ia;
493 if (intensity < 0)
494 intensity = 0;
495 if (intensity > 0xFF)
496 intensity = 0xFF;
497 writedata = (srcdata & 0xFF00) | intensity;
498 }
499 }
500 else
501 {
502 writedata = dstdata;
503 srczdata = dstzdata;
504 }
505
506 //Tried 2nd below for Hover Strike: No dice.
507 if (/*a1_phrase_mode || */BKGWREN || !inhibit)
508 // if (/*a1_phrase_mode || BKGWREN ||*/ !inhibit)
509 {
510 // write to the destination
511 WRITE_PIXEL(a1, REG(A1_FLAGS), writedata);
512 if (DSTWRZ)
513 WRITE_ZDATA(a1, REG(A1_FLAGS), srczdata);
514 }
515 }
516 else // if (DSTA2) // Data movement: A1 -> A2
517 {
518 // load src data and Z
519 if (SRCEN)
520 {
521 srcdata = READ_PIXEL(a1, REG(A1_FLAGS));
522 if (SRCENZ)
523 srczdata = READ_ZDATA(a1, REG(A1_FLAGS));
524 else if (cmd & 0x0001C020) // PATDSEL | TOPBEN | TOPNEN | DSTWRZ
525 srczdata = READ_RDATA(SRCZINT, a1, REG(A1_FLAGS), a1_phrase_mode);
526 }
527 else
528 {
529 srcdata = READ_RDATA(SRCDATA, a1, REG(A1_FLAGS), a1_phrase_mode);
530 if (cmd & 0x001C020) // PATDSEL | TOPBEN | TOPNEN | DSTWRZ
531 srczdata = READ_RDATA(SRCZINT, a1, REG(A1_FLAGS), a1_phrase_mode);
532 }
533
534 // load dst data and Z
535 if (DSTEN)
536 {
537 dstdata = READ_PIXEL(a2, REG(A2_FLAGS));
538 if (DSTENZ)
539 dstzdata = READ_ZDATA(a2, REG(A2_FLAGS));
540 else
541 dstzdata = READ_RDATA(DSTZ, a2, REG(A2_FLAGS), a2_phrase_mode);
542 }
543 else
544 {
545 dstdata = READ_RDATA(DSTDATA, a2, REG(A2_FLAGS), a2_phrase_mode);
546 if (DSTENZ)
547 dstzdata = READ_RDATA(DSTZ, a2, REG(A2_FLAGS), a2_phrase_mode);
548 }
549
550 if (GOURZ)
551 srczdata = z_i[colour_index] >> 16;
552
553 // apply z comparator
554 if (Z_OP_INF && srczdata < dstzdata) inhibit = 1;
555 if (Z_OP_EQU && srczdata == dstzdata) inhibit = 1;
556 if (Z_OP_SUP && srczdata > dstzdata) inhibit = 1;
557
558 // apply data comparator
559 //NOTE: The bit comparator (BCOMPEN) is NOT the same at the data comparator!
560 if (DCOMPEN | BCOMPEN)
561 {
562 if (!CMPDST)
563 {
564 if (srcdata == 0)
565 inhibit = 1;//*/
566 }
567 else
568 {
569 // compare destination pixel with pattern pixel
570 if (dstdata == READ_RDATA(PATTERNDATA, a2, REG(A2_FLAGS), a2_phrase_mode))
571 // if (dstdata != READ_RDATA(PATTERNDATA, a2, REG(A2_FLAGS), a2_phrase_mode))
572 inhibit = 1;
573 }
574 }
575
576 if (CLIPA1)
577 {
578 inhibit |= (((a1_x >> 16) < a1_clip_x && (a1_x >> 16) >= 0
579 && (a1_y >> 16) < a1_clip_y && (a1_y >> 16) >= 0) ? 0 : 1);
580 }
581
582 // compute the write data and store
583 if (!inhibit)
584 {
585 if (PATDSEL)
586 {
587 // use pattern data for write data
588 writedata = READ_RDATA(PATTERNDATA, a2, REG(A2_FLAGS), a2_phrase_mode);
589 }
590 else if (ADDDSEL)
591 {
592 // intensity addition
593 writedata = (srcdata & 0xFF) + (dstdata & 0xFF);
594 if (!(TOPBEN) && writedata > 0xFF)
595 writedata = 0xFF;
596 writedata |= (srcdata & 0xF00) + (dstdata & 0xF00);
597 if (!(TOPNEN) && writedata > 0xFFF)
598 writedata = 0xFFF;
599 writedata |= (srcdata & 0xF000) + (dstdata & 0xF000);
600 }
601 else
602 {
603 if (LFU_NAN)
604 writedata |= ~srcdata & ~dstdata;
605 if (LFU_NA)
606 writedata |= ~srcdata & dstdata;
607 if (LFU_AN)
608 writedata |= srcdata & ~dstdata;
609 if (LFU_A)
610 writedata |= srcdata & dstdata;
611 }
612
613 if (GOURD)
614 writedata = ((gd_c[colour_index]) << 8) | (gd_i[colour_index] >> 16);
615
616 if (SRCSHADE)
617 {
618 int intensity = srcdata & 0xFF;
619 int ia = gd_ia >> 16;
620 if (ia & 0x80)
621 ia = 0xFFFFFF00 | ia;
622 intensity += ia;
623 if (intensity < 0)
624 intensity = 0;
625 if (intensity > 0xFF)
626 intensity = 0xFF;
627 writedata = (srcdata & 0xFF00) | intensity;
628 }
629 }
630 else
631 {
632 writedata = dstdata;
633 srczdata = dstzdata;
634 }
635
636 if (/*a2_phrase_mode || */BKGWREN || !inhibit)
637 {
638 // write to the destination
639 WRITE_PIXEL(a2, REG(A2_FLAGS), writedata);
640
641 if (DSTWRZ)
642 WRITE_ZDATA(a2, REG(A2_FLAGS), srczdata);
643 }
644 }
645
646 // Update x and y (inner loop)
647 //Now it does! But crappy, crappy, crappy! !!! FIX !!! [DONE]
648 //This is less than ideal, but it works...
649 if (!BCOMPEN)
650 {//*/
651 a1_x += a1_xadd, a1_y += a1_yadd;
652 a2_x = (a2_x + a2_xadd) & a2_mask_x, a2_y = (a2_y + a2_yadd) & a2_mask_y;
653 }
654 else
655 {
656 a1_y += a1_yadd, a2_y = (a2_y + a2_yadd) & a2_mask_y;
657 if (!DSTA2)
658 {
659 a1_x += a1_xadd;
660 if (bitPos % bppSrc == 0)
661 a2_x = (a2_x + a2_xadd) & a2_mask_x;
662 }
663 else
664 {
665 a2_x = (a2_x + a2_xadd) & a2_mask_x;
666 if (bitPos % bppSrc == 0)
667 a1_x += a1_xadd;
668 }
669 }//*/
670
671 if (GOURZ)
672 z_i[colour_index] += zadd;
673
674 if (GOURD || SRCSHADE)
675 {
676 gd_i[colour_index] += gd_ia;
677 //Hmm, this doesn't seem to do anything...
678 //But it is correct according to the JTRM...!
679 if ((int32_t)gd_i[colour_index] < 0)
680 gd_i[colour_index] = 0;
681 if (gd_i[colour_index] > 0x00FFFFFF)
682 gd_i[colour_index] = 0x00FFFFFF;//*/
683
684 gd_c[colour_index] += gd_ca;
685 if ((int32_t)gd_c[colour_index] < 0)
686 gd_c[colour_index] = 0;
687 if (gd_c[colour_index] > 0x000000FF)
688 gd_c[colour_index] = 0x000000FF;//*/
689 }
690
691 if (GOURD || SRCSHADE || GOURZ)
692 {
693 if (a1_phrase_mode)
694 //This screws things up WORSE (for the BIOS opening screen)
695 // if (a1_phrase_mode || a2_phrase_mode)
696 colour_index = (colour_index + 1) & 0x03;
697 }
698 }
699
700 //NOTE: The way to fix the CD BIOS is to uncomment below and comment the stuff after
701 // the phrase mode mucking around. But it fucks up everything else...
702 //#define SCREWY_CD_DEPENDENT
703 #ifdef SCREWY_CD_DEPENDENT
704 a1_x += a1_step_x;
705 a1_y += a1_step_y;
706 a2_x += a2_step_x;
707 a2_y += a2_step_y;//*/
708 #endif
709
710 //New: Phrase mode taken into account! :-p
711 if (a1_phrase_mode) // v2
712 {
713 uint32_t pixelSize;
714 // Bump the pointer to the next phrase boundary
715 // Even though it works, this is crappy... Clean it up!
716 uint32_t size = 64 / a1_psize;
717
718 // Crappy kludge... ('aligning' source to destination)
719 if (a2_phrase_mode && DSTA2)
720 {
721 uint32_t extra = (a2_start >> 16) % size;
722 a1_x += extra << 16;
723 }
724
725 pixelSize = (size - 1) << 16;
726 a1_x = (a1_x + pixelSize) & ~pixelSize;
727 }
728
729 if (a2_phrase_mode) // v1
730 {
731 uint32_t pixelSize;
732 // Bump the pointer to the next phrase boundary
733 // Even though it works, this is crappy... Clean it up!
734 uint32_t size = 64 / a2_psize;
735
736 // Crappy kludge... ('aligning' source to destination)
737 // Prolly should do this for A1 channel as well... [DONE]
738 if (a1_phrase_mode && !DSTA2)
739 {
740 uint32_t extra = (a1_start >> 16) % size;
741 a2_x += extra << 16;
742 }
743
744 pixelSize = (size - 1) << 16;
745 a2_x = (a2_x + pixelSize) & ~pixelSize;
746 }
747
748 //Not entirely: This still mucks things up... !!! FIX !!!
749 //Should this go before or after the phrase mode mucking around?
750 #ifndef SCREWY_CD_DEPENDENT
751 a1_x += a1_step_x;
752 a1_y += a1_step_y;
753 a2_x += a2_step_x;
754 a2_y += a2_step_y;//*/
755 #endif
756 }
757
758 // write values back to registers
759 WREG(A1_PIXEL, (a1_y & 0xFFFF0000) | ((a1_x >> 16) & 0xFFFF));
760 WREG(A1_FPIXEL, (a1_y << 16) | (a1_x & 0xFFFF));
761 WREG(A2_PIXEL, (a2_y & 0xFFFF0000) | ((a2_x >> 16) & 0xFFFF));
762 }
763
blitter_blit(uint32_t cmd)764 void blitter_blit(uint32_t cmd)
765 {
766 uint32_t m, e;
767 uint32_t pitchValue[4] = { 0, 1, 3, 2 };
768 colour_index = 0;
769 src = cmd & 0x07;
770 dst = (cmd >> 3) & 0x07;
771 misc = (cmd >> 6) & 0x03;
772 a1ctl = (cmd >> 8) & 0x7;
773 mode = (cmd >> 11) & 0x07;
774 ity = (cmd >> 14) & 0x0F;
775 zop = (cmd >> 18) & 0x07;
776 op = (cmd >> 21) & 0x0F;
777 ctrl = (cmd >> 25) & 0x3F;
778
779 // Addresses in A1/2_BASE are *phrase* aligned, i.e., bottom three bits are ignored!
780 // NOTE: This fixes Rayman's bad collision detection AND keeps T2K working!
781 a1_addr = REG(A1_BASE) & 0xFFFFFFF8;
782 a2_addr = REG(A2_BASE) & 0xFFFFFFF8;
783
784 a1_zoffs = (REG(A1_FLAGS) >> 6) & 7;
785 a2_zoffs = (REG(A2_FLAGS) >> 6) & 7;
786
787 xadd_a1_control = (REG(A1_FLAGS) >> 16) & 0x03;
788 xadd_a2_control = (REG(A2_FLAGS) >> 16) & 0x03;
789
790 a1_pitch = pitchValue[(REG(A1_FLAGS) & 0x03)];
791 a2_pitch = pitchValue[(REG(A2_FLAGS) & 0x03)];
792
793 n_pixels = REG(PIXLINECOUNTER) & 0xFFFF;
794 n_lines = (REG(PIXLINECOUNTER) >> 16) & 0xFFFF;
795
796 a1_x = (REG(A1_PIXEL) << 16) | (REG(A1_FPIXEL) & 0xFFFF);
797 a1_y = (REG(A1_PIXEL) & 0xFFFF0000) | (REG(A1_FPIXEL) >> 16);
798
799 // According to JTRM, this must give a *whole number* of phrases in the current
800 // pixel size (this means the lookup above is WRONG)... !!! FIX !!!
801 m = (REG(A1_FLAGS) >> 9) & 0x03, e = (REG(A1_FLAGS) >> 11) & 0x0F;
802 a1_width = ((0x04 | m) << e) >> 2;//*/
803
804 a2_x = (REG(A2_PIXEL) & 0x0000FFFF) << 16;
805 a2_y = (REG(A2_PIXEL) & 0xFFFF0000);
806
807 // According to JTRM, this must give a *whole number* of phrases in the current
808 // pixel size (this means the lookup above is WRONG)... !!! FIX !!!
809 m = (REG(A2_FLAGS) >> 9) & 0x03, e = (REG(A2_FLAGS) >> 11) & 0x0F;
810 a2_width = ((0x04 | m) << e) >> 2;//*/
811 a2_mask_x = ((REG(A2_MASK) & 0x0000FFFF) << 16) | 0xFFFF;
812 a2_mask_y = (REG(A2_MASK) & 0xFFFF0000) | 0xFFFF;
813
814 // Check for "use mask" flag
815 if (!(REG(A2_FLAGS) & 0x8000))
816 {
817 a2_mask_x = 0xFFFFFFFF; // must be 16.16
818 a2_mask_y = 0xFFFFFFFF; // must be 16.16
819 }
820
821 a1_phrase_mode = 0;
822
823 // According to the official documentation, a hardware bug ties A2's yadd bit to A1's...
824 a2_yadd = a1_yadd = (YADD1_A1 ? 1 << 16 : 0);
825
826 if (YSIGNSUB_A1)
827 a1_yadd = -a1_yadd;
828
829 // determine a1_xadd
830 switch (xadd_a1_control)
831 {
832 case XADDPHR:
833 // This is a documented Jaguar bug relating to phrase mode and truncation... Look into it!
834 // add phrase offset to X and truncate
835 a1_xadd = 1 << 16;
836 a1_phrase_mode = 1;
837 break;
838 case XADDPIX:
839 // add pixelsize (1) to X
840 a1_xadd = 1 << 16;
841 break;
842 case XADD0:
843 // add zero (for those nice vertical lines)
844 a1_xadd = 0;
845 break;
846 case XADDINC:
847 // add the contents of the increment register
848 a1_xadd = (REG(A1_INC) << 16) | (REG(A1_FINC) & 0x0000FFFF);
849 a1_yadd = (REG(A1_INC) & 0xFFFF0000) | (REG(A1_FINC) >> 16);
850 break;
851 }
852
853 if (XSIGNSUB_A1)
854 a1_xadd = -a1_xadd;
855
856 if (YSIGNSUB_A2)
857 a2_yadd = -a2_yadd;
858
859 a2_phrase_mode = 0;
860
861 // determine a2_xadd
862 switch (xadd_a2_control)
863 {
864 case XADDPHR:
865 // add phrase offset to X and truncate
866 a2_xadd = 1 << 16;
867 a2_phrase_mode = 1;
868 break;
869 case XADDPIX:
870 // add pixelsize (1) to X
871 a2_xadd = 1 << 16;
872 break;
873 case XADD0:
874 // add zero (for those nice vertical lines)
875 a2_xadd = 0;
876 break;
877 //This really isn't a valid bit combo for A2... Shouldn't this cause the blitter to just say no?
878 case XADDINC:
879 // add the contents of the increment register
880 // since there is no register for a2 we just add 1
881 //Let's do nothing, since it's not listed as a valid bit combo...
882 break;
883 }
884
885 if (XSIGNSUB_A2)
886 a2_xadd = -a2_xadd;
887
888 // Modify outer loop steps based on blitter command
889
890 a1_step_x = 0;
891 a1_step_y = 0;
892 a2_step_x = 0;
893 a2_step_y = 0;
894
895 if (UPDA1F)
896 a1_step_x = (REG(A1_FSTEP) & 0xFFFF),
897 a1_step_y = (REG(A1_FSTEP) >> 16);
898
899 if (UPDA1)
900 a1_step_x |= ((REG(A1_STEP) & 0x0000FFFF) << 16),
901 a1_step_y |= ((REG(A1_STEP) & 0xFFFF0000));
902
903 if (UPDA2)
904 a2_step_x = (REG(A2_STEP) & 0x0000FFFF) << 16,
905 a2_step_y = (REG(A2_STEP) & 0xFFFF0000);
906
907 outer_loop = n_lines;
908
909 // Clipping...
910
911 if (CLIPA1)
912 a1_clip_x = REG(A1_CLIP) & 0x7FFF,
913 a1_clip_y = (REG(A1_CLIP) >> 16) & 0x7FFF;
914
915 // This phrase sizing is incorrect as well... !!! FIX !!! [NOTHING TO FIX]
916 // Err, this is pixel size... (and it's OK)
917 a2_psize = 1 << ((REG(A2_FLAGS) >> 3) & 0x07);
918 a1_psize = 1 << ((REG(A1_FLAGS) >> 3) & 0x07);
919
920 // Z-buffering
921 if (GOURZ)
922 {
923 unsigned v;
924 zadd = REG(ZINC);
925
926 for(v = 0; v < 4; v++)
927 z_i[v] = REG(PHRASEZ0 + v*4);
928 }
929
930 // Gouraud shading
931 if (GOURD || GOURZ || SRCSHADE)
932 {
933 gd_c[0] = blitter_ram[PATTERNDATA + 6];
934 gd_i[0] = ((uint32_t)blitter_ram[PATTERNDATA + 7] << 16)
935 | ((uint32_t)blitter_ram[SRCDATA + 6] << 8) | blitter_ram[SRCDATA + 7];
936
937 gd_c[1] = blitter_ram[PATTERNDATA + 4];
938 gd_i[1] = ((uint32_t)blitter_ram[PATTERNDATA + 5] << 16)
939 | ((uint32_t)blitter_ram[SRCDATA + 4] << 8) | blitter_ram[SRCDATA + 5];
940
941 gd_c[2] = blitter_ram[PATTERNDATA + 2];
942 gd_i[2] = ((uint32_t)blitter_ram[PATTERNDATA + 3] << 16)
943 | ((uint32_t)blitter_ram[SRCDATA + 2] << 8) | blitter_ram[SRCDATA + 3];
944
945 gd_c[3] = blitter_ram[PATTERNDATA + 0];
946 gd_i[3] = ((uint32_t)blitter_ram[PATTERNDATA + 1] << 16)
947 | ((uint32_t)blitter_ram[SRCDATA + 0] << 8) | blitter_ram[SRCDATA + 1];
948
949 gouraud_add = REG(INTENSITYINC);
950
951 gd_ia = gouraud_add & 0x00FFFFFF;
952 if (gd_ia & 0x00800000)
953 gd_ia = 0xFF000000 | gd_ia;
954
955 gd_ca = (gouraud_add >> 24) & 0xFF;
956 if (gd_ca & 0x00000080)
957 gd_ca = 0xFFFFFF00 | gd_ca;
958 }
959
960 blitter_generic(cmd);
961 }
962 #endif
963 /*******************************************************************************
964 ********************** STUFF CUT ABOVE THIS LINE! ******************************
965 *******************************************************************************/
966
967
BlitterInit(void)968 void BlitterInit(void)
969 {
970 BlitterReset();
971 }
972
973
BlitterReset(void)974 void BlitterReset(void)
975 {
976 memset(blitter_ram, 0x00, 0xA0);
977 }
978
979
BlitterDone(void)980 void BlitterDone(void)
981 {
982 WriteLog("BLIT: Done.\n");
983 }
984
985
BlitterReadByte(uint32_t offset,uint32_t who)986 uint8_t BlitterReadByte(uint32_t offset, uint32_t who/*=UNKNOWN*/)
987 {
988 offset &= 0xFF;
989
990 // status register
991 //This isn't cycle accurate--how to fix? !!! FIX !!!
992 //Probably have to do some multi-threaded implementation or at least a reentrant safe implementation...
993 //Real hardware returns $00000805, just like the JTRM says.
994 if (offset == (0x38 + 0))
995 return 0x00;
996 if (offset == (0x38 + 1))
997 return 0x00;
998 if (offset == (0x38 + 2))
999 return 0x08;
1000 if (offset == (0x38 + 3))
1001 return 0x05; // always idle/never stopped (collision detection ignored!)
1002
1003 // CHECK HERE ONCE THIS FIX HAS BEEN TESTED: [X]
1004 //Fix for AvP:
1005 if (offset >= 0x04 && offset <= 0x07)
1006 //This is it. I wonder if it just ignores the lower three bits?
1007 //No, this is a documented Jaguar I bug. It also bites the read at $F02230 as well...
1008 return blitter_ram[offset + 0x08]; // A1_PIXEL ($F0220C) read at $F02204
1009
1010 if (offset >= 0x2C && offset <= 0x2F)
1011 return blitter_ram[offset + 0x04]; // A2_PIXEL ($F02230) read at $F0222C
1012
1013 return blitter_ram[offset];
1014 }
1015
1016
1017 //Crappy!
BlitterReadWord(uint32_t offset,uint32_t who)1018 uint16_t BlitterReadWord(uint32_t offset, uint32_t who/*=UNKNOWN*/)
1019 {
1020 return ((uint16_t)BlitterReadByte(offset, who) << 8) | (uint16_t)BlitterReadByte(offset+1, who);
1021 }
1022
1023
1024 //Crappy!
BlitterReadLong(uint32_t offset,uint32_t who)1025 uint32_t BlitterReadLong(uint32_t offset, uint32_t who/*=UNKNOWN*/)
1026 {
1027 return (BlitterReadWord(offset, who) << 16) | BlitterReadWord(offset+2, who);
1028 }
1029
1030
BlitterWriteByte(uint32_t offset,uint8_t data,uint32_t who)1031 void BlitterWriteByte(uint32_t offset, uint8_t data, uint32_t who/*=UNKNOWN*/)
1032 {
1033 offset &= 0xFF;
1034
1035 // This handles writes to INTENSITY0-3 by also writing them to their proper places in
1036 // PATTERNDATA & SOURCEDATA (should do the same for the Z registers! !!! FIX !!! [DONE])
1037 if ((offset >= 0x7C) && (offset <= 0x9B))
1038 {
1039 switch (offset)
1040 {
1041 // INTENSITY registers 0-3
1042 case 0x7C: break;
1043 case 0x7D: blitter_ram[PATTERNDATA + 7] = data; break;
1044 case 0x7E: blitter_ram[SRCDATA + 6] = data; break;
1045 case 0x7F: blitter_ram[SRCDATA + 7] = data; break;
1046
1047 case 0x80: break;
1048 case 0x81: blitter_ram[PATTERNDATA + 5] = data; break;
1049 case 0x82: blitter_ram[SRCDATA + 4] = data; break;
1050 case 0x83: blitter_ram[SRCDATA + 5] = data; break;
1051
1052 case 0x84: break;
1053 case 0x85: blitter_ram[PATTERNDATA + 3] = data; break;
1054 case 0x86: blitter_ram[SRCDATA + 2] = data; break;
1055 case 0x87: blitter_ram[SRCDATA + 3] = data; break;
1056
1057 case 0x88: break;
1058 case 0x89: blitter_ram[PATTERNDATA + 1] = data; break;
1059 case 0x8A: blitter_ram[SRCDATA + 0] = data; break;
1060 case 0x8B: blitter_ram[SRCDATA + 1] = data; break;
1061
1062
1063 // Z registers 0-3
1064 case 0x8C: blitter_ram[SRCZINT + 6] = data; break;
1065 case 0x8D: blitter_ram[SRCZINT + 7] = data; break;
1066 case 0x8E: blitter_ram[SRCZFRAC + 6] = data; break;
1067 case 0x8F: blitter_ram[SRCZFRAC + 7] = data; break;
1068
1069 case 0x90: blitter_ram[SRCZINT + 4] = data; break;
1070 case 0x91: blitter_ram[SRCZINT + 5] = data; break;
1071 case 0x92: blitter_ram[SRCZFRAC + 4] = data; break;
1072 case 0x93: blitter_ram[SRCZFRAC + 5] = data; break;
1073
1074 case 0x94: blitter_ram[SRCZINT + 2] = data; break;
1075 case 0x95: blitter_ram[SRCZINT + 3] = data; break;
1076 case 0x96: blitter_ram[SRCZFRAC + 2] = data; break;
1077 case 0x97: blitter_ram[SRCZFRAC + 3] = data; break;
1078
1079 case 0x98: blitter_ram[SRCZINT + 0] = data; break;
1080 case 0x99: blitter_ram[SRCZINT + 1] = data; break;
1081 case 0x9A: blitter_ram[SRCZFRAC + 0] = data; break;
1082 case 0x9B: blitter_ram[SRCZFRAC + 1] = data; break;
1083 }
1084 }
1085
1086 // It looks weird, but this is how the 64 bit registers are actually handled...!
1087
1088 else if (((offset >= SRCDATA + 0) && (offset <= SRCDATA + 3))
1089 || ((offset >= DSTDATA + 0) && (offset <= DSTDATA + 3))
1090 || ((offset >= DSTZ + 0) && (offset <= DSTZ + 3))
1091 || ((offset >= SRCZINT + 0) && (offset <= SRCZINT + 3))
1092 || ((offset >= SRCZFRAC + 0) && (offset <= SRCZFRAC + 3))
1093 || ((offset >= PATTERNDATA + 0) && (offset <= PATTERNDATA + 3))
1094 )
1095 {
1096 blitter_ram[offset + 4] = data;
1097 }
1098 else if (((offset >= SRCDATA + 4) && (offset <= SRCDATA + 7))
1099 || ((offset >= DSTDATA + 4) && (offset <= DSTDATA + 7))
1100 || ((offset >= DSTZ + 4) && (offset <= DSTZ + 7))
1101 || ((offset >= SRCZINT + 4) && (offset <= SRCZINT + 7))
1102 || ((offset >= SRCZFRAC + 4) && (offset <= SRCZFRAC + 7))
1103 || ((offset >= PATTERNDATA + 4) && (offset <= PATTERNDATA + 7))
1104 )
1105 {
1106 blitter_ram[offset - 4] = data;
1107 }
1108 else
1109 blitter_ram[offset] = data;
1110 }
1111
1112
BlitterWriteWord(uint32_t offset,uint16_t data,uint32_t who)1113 void BlitterWriteWord(uint32_t offset, uint16_t data, uint32_t who/*=UNKNOWN*/)
1114 {
1115 BlitterWriteByte(offset + 0, data >> 8, who);
1116 BlitterWriteByte(offset + 1, data & 0xFF, who);
1117
1118 if ((offset & 0xFF) == 0x3A)
1119 // I.e., the second write of 32-bit value--not convinced this is the best way to do this!
1120 // But then again, according to the Jaguar docs, this is correct...!
1121 {
1122 if (vjs.useFastBlitter)
1123 blitter_blit(GET32(blitter_ram, 0x38));
1124 else
1125 BlitterMidsummer2();
1126 }
1127 }
1128 //F02278,9,A,B
1129
1130
BlitterWriteLong(uint32_t offset,uint32_t data,uint32_t who)1131 void BlitterWriteLong(uint32_t offset, uint32_t data, uint32_t who)
1132 {
1133 BlitterWriteWord(offset + 0, data >> 16, who);
1134 BlitterWriteWord(offset + 2, data & 0xFFFF, who);
1135 }
1136
1137 // Here's attempt #2--taken from the Oberon chip specs!
1138
1139 #ifdef USE_MIDSUMMER_BLITTER_MKII
1140
1141 void ADDRGEN(uint32_t *, uint32_t *, bool, bool,
1142 uint16_t, uint16_t, uint32_t, uint8_t, uint8_t, uint8_t, uint8_t,
1143 uint16_t, uint16_t, uint32_t, uint8_t, uint8_t, uint8_t, uint8_t);
1144 void ADDARRAY(uint16_t * addq, uint8_t daddasel, uint8_t daddbsel, uint8_t daddmode,
1145 uint64_t dstd, uint32_t iinc, uint8_t initcin[], uint64_t initinc, uint16_t initpix,
1146 uint32_t istep, uint64_t patd, uint64_t srcd, uint64_t srcz1, uint64_t srcz2,
1147 uint32_t zinc, uint32_t zstep);
1148 void ADD16SAT(uint16_t *r, uint8_t *co, uint16_t a, uint16_t b, uint8_t cin, bool sat, bool eightbit, bool hicinh);
1149 void ADDAMUX(int16_t *adda_x, int16_t *adda_y, uint8_t addasel, int16_t a1_step_x, int16_t a1_step_y,
1150 int16_t a1_stepf_x, int16_t a1_stepf_y, int16_t a2_step_x, int16_t a2_step_y,
1151 int16_t a1_inc_x, int16_t a1_inc_y, int16_t a1_incf_x, int16_t a1_incf_y, uint8_t adda_xconst,
1152 bool adda_yconst, bool addareg, bool suba_x, bool suba_y);
1153 void ADDBMUX(int16_t *addb_x, int16_t *addb_y, uint8_t addbsel, int16_t a1_x, int16_t a1_y,
1154 int16_t a2_x, int16_t a2_y, int16_t a1_frac_x, int16_t a1_frac_y);
1155 void DATAMUX(int16_t *data_x, int16_t *data_y, uint32_t gpu_din, int16_t addq_x, int16_t addq_y, bool addqsel);
1156 void ADDRADD(int16_t *addq_x, int16_t *addq_y, bool a1fracldi,
1157 uint16_t adda_x, uint16_t adda_y, uint16_t addb_x, uint16_t addb_y, uint8_t modx, bool suba_x, bool suba_y);
1158 void DATA(uint64_t *wdata, uint8_t *dcomp, uint8_t *zcomp, bool *nowrite,
1159 bool big_pix, bool cmpdst, uint8_t daddasel, uint8_t daddbsel, uint8_t daddmode, bool daddq_sel, uint8_t data_sel,
1160 uint8_t dbinh, uint8_t dend, uint8_t dstart, uint64_t dstd, uint32_t iinc, uint8_t lfu_func, uint64_t *patd, bool patdadd,
1161 bool phrase_mode, uint64_t srcd, bool srcdread, bool srczread, bool srcz2add, uint8_t zmode,
1162 bool bcompen, bool bkgwren, bool dcompen, uint8_t icount, uint8_t pixsize,
1163 uint64_t *srcz, uint64_t dstz, uint32_t zinc);
1164 void COMP_CTRL(uint8_t *dbinh, bool *nowrite,
1165 bool bcompen, bool big_pix, bool bkgwren, uint8_t dcomp, bool dcompen, uint8_t icount,
1166 uint8_t pixsize, bool phrase_mode, uint8_t srcd, uint8_t zcomp);
1167
BlitterMidsummer2(void)1168 void BlitterMidsummer2(void)
1169 {
1170 // Here's what the specs say the state machine does. Note that this can probably be
1171 // greatly simplified (also, it's different from what John has in his Oberon docs):
1172 //Will remove stuff that isn't in Jaguar I once fully described (stuff like texture won't
1173 //be described here at all)...
1174
1175 uint32_t cmd = GET32(blitter_ram, COMMAND);
1176
1177 // Line states passed in via the command register
1178
1179 bool srcen = (SRCEN), srcenx = (SRCENX), srcenz = (SRCENZ),
1180 dsten = (DSTEN), dstenz = (DSTENZ), dstwrz = (DSTWRZ), clip_a1 = (CLIPA1),
1181 upda1 = (UPDA1), upda1f = (UPDA1F), upda2 = (UPDA2), dsta2 = (DSTA2),
1182 gourd = (GOURD), gourz = (GOURZ), topben = (TOPBEN), topnen = (TOPNEN),
1183 patdsel = (PATDSEL), adddsel = (ADDDSEL), cmpdst = (CMPDST), bcompen = (BCOMPEN),
1184 dcompen = (DCOMPEN), bkgwren = (BKGWREN), srcshade = (SRCSHADE);
1185
1186 uint8_t zmode = (cmd & 0x01C0000) >> 18, lfufunc = (cmd & 0x1E00000) >> 21;
1187 //Missing: BUSHI
1188 //Where to find various lines:
1189 // clip_a1 -> inner
1190 // gourd -> dcontrol, inner, outer, state
1191 // gourz -> dcontrol, inner, outer, state
1192 // cmpdst -> blit, data, datacomp, state
1193 // bcompen -> acontrol, inner, mcontrol, state
1194 // dcompen -> inner, state
1195 // bkgwren -> inner, state
1196 // srcshade -> dcontrol, inner, state
1197 // adddsel -> dcontrol
1198 //NOTE: ADDDSEL takes precedence over PATDSEL, PATDSEL over LFU_FUNC
1199
1200 // Lines that don't exist in Jaguar I (and will never be asserted)
1201
1202 bool polygon = false, datinit = false, a1_stepld = false, a2_stepld = false, ext_int = false;
1203 bool istepadd = false, istepfadd = false;
1204 bool zstepfadd = false, zstepadd = false;
1205
1206 // Various state lines (initial state--basically the reset state of the FDSYNCs)
1207
1208 bool go = true, idle = true, inner = false, a1fupdate = false, a1update = false,
1209 zfupdate = false, zupdate = false, a2update = false, init_if = false, init_ii = false,
1210 init_zf = false, init_zi = false;
1211
1212 bool outer0 = false, indone = false;
1213
1214 bool idlei, inneri, a1fupdatei, a1updatei, zfupdatei, zupdatei, a2updatei, init_ifi, init_iii,
1215 init_zfi, init_zii;
1216
1217 bool notgzandp = !(gourz && polygon);
1218
1219 // Various registers set up by user
1220
1221 uint16_t ocount = GET16(blitter_ram, PIXLINECOUNTER);
1222 uint8_t a1_pitch = blitter_ram[A1_FLAGS + 3] & 0x03;
1223 uint8_t a2_pitch = blitter_ram[A2_FLAGS + 3] & 0x03;
1224 uint8_t a1_pixsize = (blitter_ram[A1_FLAGS + 3] & 0x38) >> 3;
1225 uint8_t a2_pixsize = (blitter_ram[A2_FLAGS + 3] & 0x38) >> 3;
1226 uint8_t a1_zoffset = (GET16(blitter_ram, A1_FLAGS + 2) >> 6) & 0x07;
1227 uint8_t a2_zoffset = (GET16(blitter_ram, A2_FLAGS + 2) >> 6) & 0x07;
1228 uint8_t a1_width = (blitter_ram[A1_FLAGS + 2] >> 1) & 0x3F;
1229 uint8_t a2_width = (blitter_ram[A2_FLAGS + 2] >> 1) & 0x3F;
1230 uint8_t a1addx = blitter_ram[A1_FLAGS + 1] & 0x03, a2addx = blitter_ram[A2_FLAGS + 1] & 0x03;
1231 bool a1addy = blitter_ram[A1_FLAGS + 1] & 0x04, a2addy = blitter_ram[A2_FLAGS + 1] & 0x04;
1232 bool a1xsign = blitter_ram[A1_FLAGS + 1] & 0x08, a2xsign = blitter_ram[A2_FLAGS + 1] & 0x08;
1233 bool a1ysign = blitter_ram[A1_FLAGS + 1] & 0x10, a2ysign = blitter_ram[A2_FLAGS + 1] & 0x10;
1234 uint32_t a1_base = GET32(blitter_ram, A1_BASE) & 0xFFFFFFF8; // Phrase aligned by ignoring bottom 3 bits
1235 uint32_t a2_base = GET32(blitter_ram, A2_BASE) & 0xFFFFFFF8;
1236
1237 uint16_t a1_win_x = GET16(blitter_ram, A1_CLIP + 2) & 0x7FFF;
1238 uint16_t a1_win_y = GET16(blitter_ram, A1_CLIP + 0) & 0x7FFF;
1239 int16_t a1_x = (int16_t)GET16(blitter_ram, A1_PIXEL + 2);
1240 int16_t a1_y = (int16_t)GET16(blitter_ram, A1_PIXEL + 0);
1241 int16_t a1_step_x = (int16_t)GET16(blitter_ram, A1_STEP + 2);
1242 int16_t a1_step_y = (int16_t)GET16(blitter_ram, A1_STEP + 0);
1243 uint16_t a1_stepf_x = GET16(blitter_ram, A1_FSTEP + 2);
1244 uint16_t a1_stepf_y = GET16(blitter_ram, A1_FSTEP + 0);
1245 uint16_t a1_frac_x = GET16(blitter_ram, A1_FPIXEL + 2);
1246 uint16_t a1_frac_y = GET16(blitter_ram, A1_FPIXEL + 0);
1247 int16_t a1_inc_x = (int16_t)GET16(blitter_ram, A1_INC + 2);
1248 int16_t a1_inc_y = (int16_t)GET16(blitter_ram, A1_INC + 0);
1249 uint16_t a1_incf_x = GET16(blitter_ram, A1_FINC + 2);
1250 uint16_t a1_incf_y = GET16(blitter_ram, A1_FINC + 0);
1251
1252 int16_t a2_x = (int16_t)GET16(blitter_ram, A2_PIXEL + 2);
1253 int16_t a2_y = (int16_t)GET16(blitter_ram, A2_PIXEL + 0);
1254 #if 0
1255 bool a2_mask = blitter_ram[A2_FLAGS + 2] & 0x80;
1256 uint16_t a2_mask_x = GET16(blitter_ram, A2_MASK + 2);
1257 uint16_t a2_mask_y = GET16(blitter_ram, A2_MASK + 0);
1258 uint32_t collision = GET32(blitter_ram, COLLISIONCTRL);// 0=RESUME, 1=ABORT, 2=STOPEN
1259 #endif
1260 int16_t a2_step_x = (int16_t)GET16(blitter_ram, A2_STEP + 2);
1261 int16_t a2_step_y = (int16_t)GET16(blitter_ram, A2_STEP + 0);
1262
1263 uint64_t srcd1 = GET64(blitter_ram, SRCDATA);
1264 uint64_t srcd2 = 0;
1265 uint64_t dstd = GET64(blitter_ram, DSTDATA);
1266 uint64_t patd = GET64(blitter_ram, PATTERNDATA);
1267 uint32_t iinc = GET32(blitter_ram, INTENSITYINC);
1268 uint64_t srcz1 = GET64(blitter_ram, SRCZINT);
1269 uint64_t srcz2 = GET64(blitter_ram, SRCZFRAC);
1270 uint64_t dstz = GET64(blitter_ram, DSTZ);
1271 uint32_t zinc = GET32(blitter_ram, ZINC);
1272
1273 uint8_t pixsize = (dsta2 ? a2_pixsize : a1_pixsize); // From ACONTROL
1274
1275 bool phrase_mode;
1276 uint16_t a1FracCInX = 0, a1FracCInY = 0;
1277
1278 // Bugs in Jaguar I
1279
1280 a2addy = a1addy; // A2 channel Y add bit is tied to A1's
1281
1282 // Various state lines set up by user
1283
1284 phrase_mode = ((!dsta2 && a1addx == 0) || (dsta2 && a2addx == 0) ? true : false); // From ACONTROL
1285
1286 // Stopgap vars to simulate various lines
1287
1288
1289 while (true)
1290 {
1291 // IDLE
1292
1293 if ((idle && !go) || (inner && outer0 && indone))
1294 {
1295 idlei = true;
1296
1297 //Instead of a return, let's try breaking out of the loop...
1298 break;
1299 }
1300 else
1301 idlei = false;
1302
1303 // INNER LOOP ACTIVE
1304
1305 if ((idle && go && !datinit)
1306 || (inner && !indone)
1307 || (inner && indone && !outer0 && !upda1f && !upda1 && notgzandp && !upda2 && !datinit)
1308 || (a1update && !upda2 && notgzandp && !datinit)
1309 || (zupdate && !upda2 && !datinit)
1310 || (a2update && !datinit)
1311 || (init_ii && !gourz)
1312 || (init_zi))
1313 inneri = true;
1314 else
1315 inneri = false;
1316
1317 // A1 FRACTION UPDATE
1318
1319 if (inner && indone && !outer0 && upda1f)
1320 a1fupdatei = true;
1321 else
1322 a1fupdatei = false;
1323
1324 // A1 POINTER UPDATE
1325
1326 if ((a1fupdate)
1327 || (inner && indone && !outer0 && !upda1f && upda1))
1328 a1updatei = true;
1329 else
1330 a1updatei = false;
1331
1332 // Z FRACTION UPDATE
1333
1334 if ((a1update && gourz && polygon)
1335 || (inner && indone && !outer0 && !upda1f && !upda1 && gourz && polygon))
1336 zfupdatei = true;
1337 else
1338 zfupdatei = false;
1339
1340 // Z INTEGER UPDATE
1341
1342 if (zfupdate)
1343 zupdatei = true;
1344 else
1345 zupdatei = false;
1346
1347 // A2 POINTER UPDATE
1348
1349 if ((a1update && upda2 && notgzandp)
1350 || (zupdate && upda2)
1351 || (inner && indone && !outer0 && !upda1f && notgzandp && !upda1 && upda2))
1352 a2updatei = true;
1353 else
1354 a2updatei = false;
1355
1356 // INITIALIZE INTENSITY FRACTION
1357
1358 if ((zupdate && !upda2 && datinit)
1359 || (a1update && !upda2 && datinit && notgzandp)
1360 || (inner && indone && !outer0 && !upda1f && !upda1 && notgzandp && !upda2 && datinit)
1361 || (a2update && datinit)
1362 || (idle && go && datinit))
1363 init_ifi = true;
1364 else
1365 init_ifi = false;
1366
1367 // INITIALIZE INTENSITY INTEGER
1368
1369 if (init_if)
1370 init_iii = true;
1371 else
1372 init_iii = false;
1373
1374 // INITIALIZE Z FRACTION
1375
1376 if (init_ii && gourz)
1377 init_zfi = true;
1378 else
1379 init_zfi = false;
1380
1381 // INITIALIZE Z INTEGER
1382
1383 if (init_zf)
1384 init_zii = true;
1385 else
1386 init_zii = false;
1387
1388 // Here we move the fooi into their foo counterparts in order to simulate the moving
1389 // of data into the various FDSYNCs... Each time we loop we simulate one clock cycle...
1390
1391 idle = idlei;
1392 inner = inneri;
1393 a1fupdate = a1fupdatei;
1394 a1update = a1updatei;
1395 zfupdate = zfupdatei; // *
1396 zupdate = zupdatei; // *
1397 a2update = a2updatei;
1398 init_if = init_ifi; // *
1399 init_ii = init_iii; // *
1400 init_zf = init_zfi; // *
1401 init_zi = init_zii; // *
1402 // * denotes states that will never assert for Jaguar I
1403
1404 // Now, depending on how we want to handle things, we could either put the implementation
1405 // of the various pieces up above, or handle them down below here.
1406
1407 // Let's try postprocessing for now...
1408
1409 if (inner)
1410 {
1411 bool idle_inner = true, step = true, sreadx = false, szreadx = false, sread = false,
1412 szread = false, dread = false, dzread = false, dwrite = false, dzwrite = false;
1413 bool inner0 = false;
1414 bool idle_inneri, sreadxi, szreadxi, sreadi, szreadi, dreadi, dzreadi, dwritei, dzwritei;
1415 // State lines that will never assert in Jaguar I
1416 bool textext = false, txtread = false;
1417 //other stuff
1418 uint8_t srcshift = 0;
1419 uint16_t icount = GET16(blitter_ram, PIXLINECOUNTER + 2);
1420 bool srca_addi, dsta_addi, gensrc, gendst, gena2i, zaddr, fontread, justify, a1_add, a2_add;
1421 bool adda_yconst, addareg, suba_x, suba_y, a1fracldi, srcdreadd, shadeadd;
1422 uint8_t addasel, a1_xconst, a2_xconst, adda_xconst, addbsel, maska1, maska2, modx, daddasel;
1423 uint8_t daddbsel, daddmode;
1424 bool patfadd, patdadd, srcz1add, srcz2add, srcshadd, daddq_sel;
1425 uint8_t data_sel;
1426 uint32_t address, pixAddr;
1427 uint8_t dstxp, srcxp, shftv, pobb;
1428 bool pobbsel;
1429 uint8_t loshd, shfti;
1430 uint64_t srcz;
1431 bool winhibit;
1432
1433 indone = false;
1434
1435 // while (!idle_inner)
1436 while (true)
1437 {
1438 bool sshftld; // D flipflop (D -> Q): instart -> sshftld
1439 uint16_t dstxwr, pseq;
1440 bool penden;
1441 uint8_t window_mask;
1442 uint8_t inner_mask = 0;
1443 uint8_t emask, pma, dend;
1444 uint64_t srcd;
1445 uint8_t zSrcShift;
1446 uint64_t wdata;
1447 uint8_t dcomp, zcomp;
1448
1449 //NOTE: sshftld probably is only asserted at the beginning of the inner loop. !!! FIX !!!
1450 // IDLE
1451
1452 if ((idle_inner && !step)
1453 || (dzwrite && step && inner0)
1454 || (dwrite && step && !dstwrz && inner0))
1455 {
1456 idle_inneri = true;
1457 break;
1458 }
1459 else
1460 idle_inneri = false;
1461
1462 // EXTRA SOURCE DATA READ
1463
1464 if ((idle_inner && step && srcenx)
1465 || (sreadx && !step))
1466 sreadxi = true;
1467 else
1468 sreadxi = false;
1469
1470 // EXTRA SOURCE ZED READ
1471
1472 if ((sreadx && step && srcenz)
1473 || (szreadx && !step))
1474 szreadxi = true;
1475 else
1476 szreadxi = false;
1477
1478 // TEXTURE DATA READ (not implemented because not in Jaguar I)
1479
1480 // SOURCE DATA READ
1481
1482 if ((szreadx && step && !textext)
1483 || (sreadx && step && !srcenz && srcen)
1484 || (idle_inner && step && !srcenx && !textext && srcen)
1485 || (dzwrite && step && !inner0 && !textext && srcen)
1486 || (dwrite && step && !dstwrz && !inner0 && !textext && srcen)
1487 || (txtread && step && srcen)
1488 || (sread && !step))
1489 sreadi = true;
1490 else
1491 sreadi = false;
1492
1493 // SOURCE ZED READ
1494
1495 if ((sread && step && srcenz)
1496 || (szread && !step))
1497 szreadi = true;
1498 else
1499 szreadi = false;
1500
1501 // DESTINATION DATA READ
1502
1503 if ((szread && step && dsten)
1504 || (sread && step && !srcenz && dsten)
1505 || (sreadx && step && !srcenz && !textext && !srcen && dsten)
1506 || (idle_inner && step && !srcenx && !textext && !srcen && dsten)
1507 || (dzwrite && step && !inner0 && !textext && !srcen && dsten)
1508 || (dwrite && step && !dstwrz && !inner0 && !textext && !srcen && dsten)
1509 || (txtread && step && !srcen && dsten)
1510 || (dread && !step))
1511 dreadi = true;
1512 else
1513 dreadi = false;
1514
1515 // DESTINATION ZED READ
1516
1517 if ((dread && step && dstenz)
1518 || (szread && step && !dsten && dstenz)
1519 || (sread && step && !srcenz && !dsten && dstenz)
1520 || (sreadx && step && !srcenz && !textext && !srcen && !dsten && dstenz)
1521 || (idle_inner && step && !srcenx && !textext && !srcen && !dsten && dstenz)
1522 || (dzwrite && step && !inner0 && !textext && !srcen && !dsten && dstenz)
1523 || (dwrite && step && !dstwrz && !inner0 && !textext && !srcen && !dsten && dstenz)
1524 || (txtread && step && !srcen && !dsten && dstenz)
1525 || (dzread && !step))
1526 dzreadi = true;
1527 else
1528 dzreadi = false;
1529
1530 // DESTINATION DATA WRITE
1531
1532 if ((dzread && step)
1533 || (dread && step && !dstenz)
1534 || (szread && step && !dsten && !dstenz)
1535 || (sread && step && !srcenz && !dsten && !dstenz)
1536 || (txtread && step && !srcen && !dsten && !dstenz)
1537 || (sreadx && step && !srcenz && !textext && !srcen && !dsten && !dstenz)
1538 || (idle_inner && step && !srcenx && !textext && !srcen && !dsten && !dstenz)
1539 || (dzwrite && step && !inner0 && !textext && !srcen && !dsten && !dstenz)
1540 || (dwrite && step && !dstwrz && !inner0 && !textext && !srcen && !dsten && !dstenz)
1541 || (dwrite && !step))
1542 dwritei = true;
1543 else
1544 dwritei = false;
1545
1546 // DESTINATION ZED WRITE
1547
1548 if ((dzwrite && !step)
1549 || (dwrite && step && dstwrz))
1550 dzwritei = true;
1551 else
1552 dzwritei = false;
1553
1554 //Kludge: A QnD way to make sure that sshftld is asserted only for the first
1555 // cycle of the inner loop...
1556 sshftld = idle_inner;
1557
1558 // Here we move the fooi into their foo counterparts in order to simulate the moving
1559 // of data into the various FDSYNCs... Each time we loop we simulate one clock cycle...
1560
1561 idle_inner = idle_inneri;
1562 sreadx = sreadxi;
1563 szreadx = szreadxi;
1564 sread = sreadi;
1565 szread = szreadi;
1566 dread = dreadi;
1567 dzread = dzreadi;
1568 dwrite = dwritei;
1569 dzwrite = dzwritei;
1570
1571 // Here's a few more decodes--not sure if they're supposed to go here or not...
1572
1573
1574 srca_addi = (sreadxi && !srcenz) || (sreadi && !srcenz) || szreadxi || szreadi;
1575
1576 dsta_addi = (dwritei && !dstwrz) || dzwritei;
1577
1578 gensrc = sreadxi || szreadxi || sreadi || szreadi;
1579 gendst = dreadi || dzreadi || dwritei || dzwritei;
1580 gena2i = (gensrc && !dsta2) || (gendst && dsta2);
1581
1582 zaddr = szreadx || szread || dzread || dzwrite;
1583
1584 // Some stuff from MCONTROL.NET--not sure if this is the correct use of this decode or not...
1585 /*Fontread\ := OND1 (fontread\, sread[1], sreadx[1], bcompen);
1586 Fontread := INV1 (fontread, fontread\);
1587 Justt := NAN3 (justt, fontread\, phrase_mode, tactive\);
1588 Justify := TS (justify, justt, busen);*/
1589 fontread = (sread || sreadx) && bcompen;
1590 justify = !(!fontread && phrase_mode /*&& tactive*/);
1591
1592 /* Generate inner loop update enables */
1593 /*
1594 A1_addi := MX2 (a1_addi, dsta_addi, srca_addi, dsta2);
1595 A2_addi := MX2 (a2_addi, srca_addi, dsta_addi, dsta2);
1596 A1_add := FD1 (a1_add, a1_add\, a1_addi, clk);
1597 A2_add := FD1 (a2_add, a2_add\, a2_addi, clk);
1598 A2_addb := BUF1 (a2_addb, a2_add);
1599 */
1600 a1_add = (dsta2 ? srca_addi : dsta_addi);
1601 a2_add = (dsta2 ? dsta_addi : srca_addi);
1602
1603 /* Address adder input A register selection
1604 000 A1 step integer part
1605 001 A1 step fraction part
1606 010 A1 increment integer part
1607 011 A1 increment fraction part
1608 100 A2 step
1609
1610 bit 2 = a2update
1611 bit 1 = /a2update . (a1_add . a1addx[0..1])
1612 bit 0 = /a2update . ( a1fupdate
1613 + a1_add . atick[0] . a1addx[0..1])
1614 The /a2update term on bits 0 and 1 is redundant.
1615 Now look-ahead based
1616 */
1617
1618 addasel = (a1fupdate || (a1_add && a1addx == 3) ? 0x01 : 0x00);
1619 addasel |= (a1_add && a1addx == 3 ? 0x02 : 0x00);
1620 addasel |= (a2update ? 0x04 : 0x00);
1621 /* Address adder input A X constant selection
1622 adda_xconst[0..2] generate a power of 2 in the range 1-64 or all
1623 zeroes when they are all 1
1624 Remember - these are pixels, so to add one phrase the pixel size
1625 has to be taken into account to get the appropriate value.
1626 for A1
1627 if a1addx[0..1] are 00 set 6 - pixel size
1628 if a1addx[0..1] are 01 set the value 000
1629 if a1addx[0..1] are 10 set the value 111
1630 similarly for A2
1631 JLH: Also, 11 will likewise set the value to 111
1632 */
1633 a1_xconst = 6 - a1_pixsize;
1634 a2_xconst = 6 - a2_pixsize;
1635
1636 if (a1addx == 1)
1637 a1_xconst = 0;
1638 else if (a1addx & 0x02)
1639 a1_xconst = 7;
1640
1641 if (a2addx == 1)
1642 a2_xconst = 0;
1643 else if (a2addx & 0x02)
1644 a2_xconst = 7;
1645
1646 adda_xconst = (a2_add ? a2_xconst : a1_xconst);
1647 /* Address adder input A Y constant selection
1648 22 June 94 - This was erroneous, because only the a1addy bit was reflected here.
1649 Therefore, the selection has to be controlled by a bug fix bit.
1650 JLH: Bug fix bit in Jaguar II--not in Jaguar I!
1651 */
1652 adda_yconst = a1addy;
1653 /* Address adder input A register versus constant selection
1654 given by a1_add . a1addx[0..1]
1655 + a1update
1656 + a1fupdate
1657 + a2_add . a2addx[0..1]
1658 + a2update
1659 */
1660 addareg = ((a1_add && a1addx == 3) || a1update || a1fupdate
1661 || (a2_add && a2addx == 3) || a2update ? true : false);
1662 /* The adders can be put into subtract mode in add pixel size
1663 mode when the corresponding flags are set */
1664 suba_x = ((a1_add && a1xsign && a1addx == 1) || (a2_add && a2xsign && a2addx == 1) ? true : false);
1665 suba_y = ((a1_add && a1addy && a1ysign) || (a2_add && a2addy && a2ysign) ? true : false);
1666 /* Address adder input B selection
1667 00 A1 pointer
1668 01 A2 pointer
1669 10 A1 fraction
1670 11 Zero
1671
1672 Bit 1 = a1fupdate
1673 + (a1_add . atick[0] . a1addx[0..1])
1674 + a1fupdate . a1_stepld
1675 + a1update . a1_stepld
1676 + a2update . a2_stepld
1677 Bit 0 = a2update + a2_add
1678 + a1fupdate . a1_stepld
1679 + a1update . a1_stepld
1680 + a2update . a2_stepld
1681 */
1682 addbsel = (a2update || a2_add || (a1fupdate && a1_stepld)
1683 || (a1update && a1_stepld) || (a2update && a2_stepld) ? 0x01 : 0x00);
1684 addbsel |= (a1fupdate || (a1_add && a1addx == 3) || (a1fupdate && a1_stepld)
1685 || (a1update && a1_stepld) || (a2update && a2_stepld) ? 0x02 : 0x00);
1686
1687 /* The modulo bits are used to align X onto a phrase boundary when
1688 it is being updated by one phrase
1689 000 no mask
1690 001 mask bit 0
1691 010 mask bits 1-0
1692 ..
1693 110 mask bits 5-0
1694
1695 Masking is enabled for a1 when a1addx[0..1] is 00, and the value
1696 is 6 - the pixel size (again!)
1697 */
1698 maska1 = (a1_add && a1addx == 0 ? 6 - a1_pixsize : 0);
1699 maska2 = (a2_add && a2addx == 0 ? 6 - a2_pixsize : 0);
1700 modx = (a2_add ? maska2 : maska1);
1701 /* Generate load strobes for the increment updates */
1702
1703 /*A1pldt := NAN2 (a1pldt, atick[1], a1_add);
1704 A1ptrldi := NAN2 (a1ptrldi, a1update\, a1pldt);
1705
1706 A1fldt := NAN4 (a1fldt, atick[0], a1_add, a1addx[0..1]);
1707 A1fracldi := NAN2 (a1fracldi, a1fupdate\, a1fldt);
1708
1709 A2pldt := NAN2 (a2pldt, atick[1], a2_add);
1710 A2ptrldi := NAN2 (a2ptrldi, a2update\, a2pldt);*/
1711
1712 a1fracldi = a1fupdate || (a1_add && a1addx == 3);
1713
1714 // Some more from DCONTROL...
1715 // atick[] just MAY be important here! We're assuming it's true and dropping the term...
1716 // That will probably screw up some of the lower terms that seem to rely on the timing of it...
1717 //#warning srcdreadd is not properly initialized!
1718 srcdreadd = false; // Set in INNER.NET
1719 //Shadeadd\ := NAN2H (shadeadd\, dwrite, srcshade);
1720 //Shadeadd := INV2 (shadeadd, shadeadd\);
1721 shadeadd = dwrite && srcshade;
1722 /* Data adder control, input A selection
1723 000 Destination data
1724 001 Initialiser pixel value
1725 100 Source data - computed intensity fraction
1726 101 Pattern data - computed intensity
1727 110 Source zed 1 - computed zed
1728 111 Source zed 2 - computed zed fraction
1729
1730 Bit 0 = dwrite . gourd . atick[1]
1731 + dzwrite . gourz . atick[0]
1732 + istepadd
1733 + zstepfadd
1734 + init_if + init_ii + init_zf + init_zi
1735 Bit 1 = dzwrite . gourz . (atick[0] + atick[1])
1736 + zstepadd
1737 + zstepfadd
1738 Bit 2 = (gourd + gourz) . /(init_if + init_ii + init_zf + init_zi)
1739 + dwrite . srcshade
1740 */
1741 daddasel = ((dwrite && gourd) || (dzwrite && gourz) || istepadd || zstepfadd
1742 || init_if || init_ii || init_zf || init_zi ? 0x01 : 0x00);
1743 daddasel |= ((dzwrite && gourz) || zstepadd || zstepfadd ? 0x02 : 0x00);
1744 daddasel |= (((gourd || gourz) && !(init_if || init_ii || init_zf || init_zi))
1745 || (dwrite && srcshade) ? 0x04 : 0x00);
1746 /* Data adder control, input B selection
1747 0000 Source data
1748 0001 Data initialiser increment
1749 0100 Bottom 16 bits of I increment repeated four times
1750 0101 Top 16 bits of I increment repeated four times
1751 0110 Bottom 16 bits of Z increment repeated four times
1752 0111 Top 16 bits of Z increment repeated four times
1753 1100 Bottom 16 bits of I step repeated four times
1754 1101 Top 16 bits of I step repeated four times
1755 1110 Bottom 16 bits of Z step repeated four times
1756 1111 Top 16 bits of Z step repeated four times
1757
1758 Bit 0 = dwrite . gourd . atick[1]
1759 + dzwrite . gourz . atick[1]
1760 + dwrite . srcshade
1761 + istepadd
1762 + zstepadd
1763 + init_if + init_ii + init_zf + init_zi
1764 Bit 1 = dzwrite . gourz . (atick[0] + atick[1])
1765 + zstepadd
1766 + zstepfadd
1767 Bit 2 = dwrite . gourd . (atick[0] + atick[1])
1768 + dzwrite . gourz . (atick[0] + atick[1])
1769 + dwrite . srcshade
1770 + istepadd + istepfadd + zstepadd + zstepfadd
1771 Bit 3 = istepadd + istepfadd + zstepadd + zstepfadd
1772 */
1773 daddbsel = ((dwrite && gourd) || (dzwrite && gourz) || (dwrite && srcshade)
1774 || istepadd || zstepadd || init_if || init_ii || init_zf || init_zi ? 0x01 : 0x00);
1775 daddbsel |= ((dzwrite && gourz) || zstepadd || zstepfadd ? 0x02 : 0x00);
1776 daddbsel |= ((dwrite && gourd) || (dzwrite && gourz) || (dwrite && srcshade)
1777 || istepadd || istepfadd || zstepadd || zstepfadd ? 0x04 : 0x00);
1778 daddbsel |= (istepadd && istepfadd && zstepadd && zstepfadd ? 0x08 : 0x00);
1779 /* Data adder mode control
1780 000 16-bit normal add
1781 001 16-bit saturating add with carry
1782 010 8-bit saturating add with carry, carry into top byte is
1783 inhibited (YCrCb)
1784 011 8-bit saturating add with carry, carry into top byte and
1785 between top nybbles is inhibited (CRY)
1786 100 16-bit normal add with carry
1787 101 16-bit saturating add
1788 110 8-bit saturating add, carry into top byte is inhibited
1789 111 8-bit saturating add, carry into top byte and between top
1790 nybbles is inhibited
1791
1792 The first five are used for Gouraud calculations, the latter three
1793 for adding source and destination data
1794
1795 Bit 0 = dzwrite . gourz . atick[1]
1796 + dwrite . gourd . atick[1] . /topnen . /topben . /ext_int
1797 + dwrite . gourd . atick[1] . topnen . topben . /ext_int
1798 + zstepadd
1799 + istepadd . /topnen . /topben . /ext_int
1800 + istepadd . topnen . topben . /ext_int
1801 + /gourd . /gourz . /topnen . /topben
1802 + /gourd . /gourz . topnen . topben
1803 + shadeadd . /topnen . /topben
1804 + shadeadd . topnen . topben
1805 + init_ii . /topnen . /topben . /ext_int
1806 + init_ii . topnen . topben . /ext_int
1807 + init_zi
1808
1809 Bit 1 = dwrite . gourd . atick[1] . /topben . /ext_int
1810 + istepadd . /topben . /ext_int
1811 + /gourd . /gourz . /topben
1812 + shadeadd . /topben
1813 + init_ii . /topben . /ext_int
1814
1815 Bit 2 = /gourd . /gourz
1816 + shadeadd
1817 + dwrite . gourd . atick[1] . ext_int
1818 + istepadd . ext_int
1819 + init_ii . ext_int
1820 */
1821 daddmode = ((dzwrite && gourz) || (dwrite && gourd && !topnen && !topben && !ext_int)
1822 || (dwrite && gourd && topnen && topben && !ext_int) || zstepadd
1823 || (istepadd && !topnen && !topben && !ext_int)
1824 || (istepadd && topnen && topben && !ext_int) || (!gourd && !gourz && !topnen && !topben)
1825 || (!gourd && !gourz && topnen && topben) || (shadeadd && !topnen && !topben)
1826 || (shadeadd && topnen && topben) || (init_ii && !topnen && !topben && !ext_int)
1827 || (init_ii && topnen && topben && !ext_int) || init_zi ? 0x01 : 0x00);
1828 daddmode |= ((dwrite && gourd && !topben && !ext_int) || (istepadd && !topben && !ext_int)
1829 || (!gourd && !gourz && !topben) || (shadeadd && !topben)
1830 || (init_ii && !topben && !ext_int) ? 0x02 : 0x00);
1831 daddmode |= ((!gourd && !gourz) || shadeadd || (dwrite && gourd && ext_int)
1832 || (istepadd && ext_int) || (init_ii && ext_int) ? 0x04 : 0x00);
1833
1834 patfadd = (dwrite && gourd) || (istepfadd && !datinit) || init_if;
1835 patdadd = (dwrite && gourd) || (istepadd && !datinit) || init_ii;
1836 srcz1add = (dzwrite && gourz) || (zstepadd && !datinit) || init_zi;
1837 srcz2add = (dzwrite && gourz) || zstepfadd || init_zf;
1838 srcshadd = srcdreadd && srcshade;
1839 daddq_sel = patfadd || patdadd || srcz1add || srcz2add || srcshadd;
1840 /* Select write data
1841 This has to be controlled from stage 1 of the pipe-line, delayed
1842 by one tick, as the write occurs in the cycle after the ack.
1843
1844 00 pattern data
1845 01 lfu data
1846 10 adder output
1847 11 source zed
1848
1849 Bit 0 = /patdsel . /adddsel
1850 + dzwrite1d
1851 Bit 1 = adddsel
1852 + dzwrite1d
1853 */
1854
1855 data_sel = ((!patdsel && !adddsel) || dzwrite ? 0x01 : 0x00)
1856 | (adddsel || dzwrite ? 0x02 : 0x00);
1857
1858 ADDRGEN(&address, &pixAddr, gena2i, zaddr,
1859 a1_x, a1_y, a1_base, a1_pitch, a1_pixsize, a1_width, a1_zoffset,
1860 a2_x, a2_y, a2_base, a2_pitch, a2_pixsize, a2_width, a2_zoffset);
1861
1862 //Here's my guess as to how the addresses get truncated to phrase boundaries in phrase mode...
1863 if (!justify)
1864 address &= 0xFFFFF8;
1865
1866 /* Generate source alignment shift
1867 -------------------------------
1868 The source alignment shift for data move is the difference between
1869 the source and destination X pointers, multiplied by the pixel
1870 size. Only the low six bits of the pointers are of interest, as
1871 pixel sizes are always a power of 2 and window rows are always
1872 phrase aligned.
1873
1874 When not in phrase mode, the top 3 bits of the shift value are
1875 set to zero (2/26).
1876
1877 Source shifting is also used to extract bits for bit-to-byte
1878 expansion in phrase mode. This involves only the bottom three
1879 bits of the shift value, and is based on the offset within the
1880 phrase of the destination X pointer, in pixels.
1881
1882 Source shifting is disabled when srcen is not set.
1883 */
1884
1885 dstxp = (dsta2 ? a2_x : a1_x) & 0x3F;
1886 srcxp = (dsta2 ? a1_x : a2_x) & 0x3F;
1887 shftv = ((dstxp - srcxp) << pixsize) & 0x3F;
1888 /* The phrase mode alignment count is given by the phrase offset
1889 of the first pixel, for bit to byte expansion */
1890 pobb = 0;
1891
1892 if (pixsize == 3)
1893 pobb = dstxp & 0x07;
1894 if (pixsize == 4)
1895 pobb = dstxp & 0x03;
1896 if (pixsize == 5)
1897 pobb = dstxp & 0x01;
1898
1899 pobbsel = phrase_mode && bcompen;
1900 loshd = (pobbsel ? pobb : shftv) & 0x07;
1901 shfti = (srcen || pobbsel ? (sshftld ? loshd : srcshift & 0x07) : 0);
1902 /* Enable for high bits is srcen . phrase_mode */
1903 shfti |= (srcen && phrase_mode ? (sshftld ? shftv & 0x38 : srcshift & 0x38) : 0);
1904 srcshift = shfti;
1905
1906 if (sreadx)
1907 {
1908 //uint32_t srcAddr, pixAddr;
1909 //ADDRGEN(srcAddr, pixAddr, gena2i, zaddr,
1910 // a1_x, a1_y, a1_base, a1_pitch, a1_pixsize, a1_width, a1_zoffset,
1911 // a2_x, a2_y, a2_base, a2_pitch, a2_pixsize, a2_width, a2_zoffset);
1912 srcd2 = srcd1;
1913 srcd1 = ((uint64_t)JaguarReadLong(address + 0, BLITTER) << 32)
1914 | (uint64_t)JaguarReadLong(address + 4, BLITTER);
1915 //Kludge to take pixel size into account...
1916 //Hmm. If we're not in phrase mode, this is most likely NOT going to be used...
1917 //Actually, it would be--because of BCOMPEN expansion, for example...
1918 if (!phrase_mode)
1919 {
1920 if (bcompen)
1921 srcd1 >>= 56;
1922 else
1923 {
1924 if (pixsize == 5)
1925 srcd1 >>= 32;
1926 else if (pixsize == 4)
1927 srcd1 >>= 48;
1928 else
1929 srcd1 >>= 56;
1930 }
1931 }//*/
1932 }
1933
1934 if (szreadx)
1935 {
1936 srcz2 = srcz1;
1937 srcz1 = ((uint64_t)JaguarReadLong(address, BLITTER) << 32) | (uint64_t)JaguarReadLong(address + 4, BLITTER);
1938 }
1939
1940 if (sread)
1941 {
1942 srcd2 = srcd1;
1943 srcd1 = ((uint64_t)JaguarReadLong(address, BLITTER) << 32) | (uint64_t)JaguarReadLong(address + 4, BLITTER);
1944 //Kludge to take pixel size into account...
1945 if (!phrase_mode)
1946 {
1947 if (bcompen)
1948 srcd1 >>= 56;
1949 else
1950 {
1951 if (pixsize == 5)
1952 srcd1 >>= 32;
1953 else if (pixsize == 4)
1954 srcd1 >>= 48;
1955 else
1956 srcd1 >>= 56;
1957 }
1958 }
1959 }
1960
1961 if (szread)
1962 {
1963 srcz2 = srcz1;
1964 srcz1 = ((uint64_t)JaguarReadLong(address, BLITTER) << 32) | (uint64_t)JaguarReadLong(address + 4, BLITTER);
1965 //Kludge to take pixel size into account... I believe that it only has to take 16BPP mode into account. Not sure tho.
1966 if (!phrase_mode && pixsize == 4)
1967 srcz1 >>= 48;
1968
1969 }
1970
1971 if (dread)
1972 {
1973 dstd = ((uint64_t)JaguarReadLong(address, BLITTER) << 32) | (uint64_t)JaguarReadLong(address + 4, BLITTER);
1974 //Kludge to take pixel size into account...
1975 if (!phrase_mode)
1976 {
1977 if (pixsize == 5)
1978 dstd >>= 32;
1979 else if (pixsize == 4)
1980 dstd >>= 48;
1981 else
1982 dstd >>= 56;
1983 }
1984 }
1985
1986 if (dzread)
1987 {
1988 // Is Z always 64 bit read? Or sometimes 16 bit (dependent on phrase_mode)?
1989 dstz = ((uint64_t)JaguarReadLong(address, BLITTER) << 32) | (uint64_t)JaguarReadLong(address + 4, BLITTER);
1990 //Kludge to take pixel size into account... I believe that it only has to take 16BPP mode into account. Not sure tho.
1991 if (!phrase_mode && pixsize == 4)
1992 dstz >>= 48;
1993
1994 }
1995
1996 // These vars should probably go further up in the code... !!! FIX !!!
1997 // We can't preassign these unless they're static...
1998 //NOTE: SRCSHADE requires GOURZ to be set to work properly--another Jaguar I bug
1999 if (dwrite)
2000 {
2001 //Counter is done on the dwrite state...! (We'll do it first, since it affects dstart/dend calculations.)
2002 //Here's the voodoo for figuring the correct amount of pixels in phrase mode (or not):
2003 int8_t inct = -((dsta2 ? a2_x : a1_x) & 0x07); // From INNER_CNT
2004 uint8_t inc = 0;
2005 uint16_t oldicount;
2006 uint8_t dstart = 0;
2007
2008 inc = (!phrase_mode || (phrase_mode && (inct & 0x01)) ? 0x01 : 0x00);
2009 inc |= (phrase_mode && (((pixsize == 3 || pixsize == 4) && (inct & 0x02)) || (pixsize == 5 && !(inct & 0x01))) ? 0x02 : 0x00);
2010 inc |= (phrase_mode && ((pixsize == 3 && (inct & 0x04)) || (pixsize == 4 && !(inct & 0x03))) ? 0x04 : 0x00);
2011 inc |= (phrase_mode && pixsize == 3 && !(inct & 0x07) ? 0x08 : 0x00);
2012
2013 oldicount = icount; // Save icount to detect underflow...
2014 icount -= inc;
2015
2016 if (icount == 0 || ((icount & 0x8000) && !(oldicount & 0x8000)))
2017 inner0 = true;
2018 // X/Y stepping is also done here, I think...No. It's done when a1_add or a2_add is asserted...
2019
2020 //*********************************************************************************
2021 //Start & end write mask computations...
2022 //*********************************************************************************
2023
2024
2025 if (pixsize == 3)
2026 dstart = (dstxp & 0x07) << 3;
2027 if (pixsize == 4)
2028 dstart = (dstxp & 0x03) << 4;
2029 if (pixsize == 5)
2030 dstart = (dstxp & 0x01) << 5;
2031
2032 dstart = (phrase_mode ? dstart : pixAddr & 0x07);
2033
2034 //This is the other Jaguar I bug... Normally, should ALWAYS select a1_x here.
2035 dstxwr = (dsta2 ? a2_x : a1_x) & 0x7FFE;
2036 pseq = dstxwr ^ (a1_win_x & 0x7FFE);
2037 pseq = (pixsize == 5 ? pseq : pseq & 0x7FFC);
2038 pseq = ((pixsize & 0x06) == 4 ? pseq : pseq & 0x7FF8);
2039 penden = clip_a1 && (pseq == 0);
2040 window_mask = 0;
2041
2042 if (pixsize == 3)
2043 window_mask = (a1_win_x & 0x07) << 3;
2044 if (pixsize == 4)
2045 window_mask = (a1_win_x & 0x03) << 4;
2046 if (pixsize == 5)
2047 window_mask = (a1_win_x & 0x01) << 5;
2048
2049 window_mask = (penden ? window_mask : 0);
2050
2051 /* The mask to be used if within one phrase of the end of the inner
2052 loop, similarly */
2053
2054 if (pixsize == 3)
2055 inner_mask = (icount & 0x07) << 3;
2056 if (pixsize == 4)
2057 inner_mask = (icount & 0x03) << 4;
2058 if (pixsize == 5)
2059 inner_mask = (icount & 0x01) << 5;
2060 if (!inner0)
2061 inner_mask = 0;
2062 /* The actual mask used should be the lesser of the window masks and
2063 the inner mask, where is all cases 000 means 1000. */
2064 window_mask = (window_mask == 0 ? 0x40 : window_mask);
2065 inner_mask = (inner_mask == 0 ? 0x40 : inner_mask);
2066
2067 emask = (window_mask > inner_mask ? inner_mask : window_mask);
2068 /* The mask to be used for the pixel size, to which must be added
2069 the bit offset */
2070 pma = pixAddr + (1 << pixsize);
2071 /* Select the mask */
2072 dend = (phrase_mode ? emask : pma);
2073
2074 /* The cycle width in phrase mode is normally one phrase. However,
2075 at the start and end it may be narrower. The start and end masks
2076 are used to generate this. The width is given by:
2077
2078 8 - start mask - (8 - end mask)
2079 = end mask - start mask
2080
2081 This is only used for writes in phrase mode.
2082 Start and end from the address level of the pipeline are used.
2083 */
2084
2085 //More testing... This is almost certainly wrong, but how else does this work???
2086 //Seems to kinda work... But still, this doesn't seem to make any sense!
2087 if (phrase_mode && !dsten)
2088 dstd = ((uint64_t)JaguarReadLong(address, BLITTER) << 32) | (uint64_t)JaguarReadLong(address + 4, BLITTER);
2089
2090 //Testing only... for now...
2091 //This is wrong because the write data is a combination of srcd and dstd--either run
2092 //thru the LFU or in PATDSEL or ADDDSEL mode. [DONE now, thru DATA module]
2093 // Precedence is ADDDSEL > PATDSEL > LFU.
2094 //Also, doesn't take into account the start & end masks, or the phrase width...
2095 //Now it does!
2096
2097 // srcd2 = xxxx xxxx 0123 4567, srcd = 8901 2345 xxxx xxxx, srcshift = $20 (32)
2098 srcd = (srcd2 << (64 - srcshift)) | (srcd1 >> srcshift);
2099 //bleh, ugly ugly ugly
2100 if (srcshift == 0)
2101 srcd = srcd1;
2102
2103 //NOTE: This only works with pixel sizes less than 8BPP...
2104 //DOUBLE NOTE: Still need to do regression testing to ensure that this doesn't break other stuff... !!! CHECK !!!
2105 if (!phrase_mode && srcshift != 0)
2106 srcd = ((srcd2 & 0xFF) << (8 - srcshift)) | ((srcd1 & 0xFF) >> srcshift);
2107
2108 //Z DATA() stuff done here... And it has to be done before any Z shifting...
2109 //Note that we need to have phrase mode start/end support here... (Not since we moved it from dzwrite...!)
2110 /*
2111 Here are a couple of Cybermorph blits with Z:
2112 $00113078 // DSTEN DSTENZ DSTWRZ CLIP_A1 GOURD GOURZ PATDSEL ZMODE=4
2113 $09900F39 // SRCEN DSTEN DSTENZ DSTWRZ UPDA1 UPDA1F UPDA2 DSTA2 ZMODE=4 LFUFUNC=C DCOMPEN
2114
2115 We're having the same phrase mode overwrite problem we had with the pixels... !!! FIX !!!
2116 Odd. It's equating 0 with 0... Even though ZMODE is $04 (less than)!
2117 */
2118 if (gourz)
2119 {
2120 uint16_t addq[4];
2121 uint8_t initcin[4] = { 0, 0, 0, 0 };
2122 ADDARRAY(addq, 7/*daddasel*/, 6/*daddbsel*/, 0/*daddmode*/, 0, 0, initcin, 0, 0, 0, 0, 0, srcz1, srcz2, zinc, 0);
2123 srcz2 = ((uint64_t)addq[3] << 48) | ((uint64_t)addq[2] << 32) | ((uint64_t)addq[1] << 16) | (uint64_t)addq[0];
2124 ADDARRAY(addq, 6/*daddasel*/, 7/*daddbsel*/, 1/*daddmode*/, 0, 0, initcin, 0, 0, 0, 0, 0, srcz1, srcz2, zinc, 0);
2125 srcz1 = ((uint64_t)addq[3] << 48) | ((uint64_t)addq[2] << 32) | ((uint64_t)addq[1] << 16) | (uint64_t)addq[0];
2126
2127 }
2128
2129 zSrcShift = srcshift & 0x30;
2130 srcz = (srcz2 << (64 - zSrcShift)) | (srcz1 >> zSrcShift);
2131 //bleh, ugly ugly ugly
2132 if (zSrcShift == 0)
2133 srcz = srcz1;
2134
2135
2136 //When in SRCSHADE mode, it adds the IINC to the read source (from LFU???)
2137 //According to following line, it gets LFU mode. But does it feed the source into the LFU
2138 //after the add?
2139 //Dest write address/pix address: 0014E83E/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=4 dabs=5 dam=7 ds=1 daq=F] [0000000000006505] (icount=003F, inc=1)
2140 //Let's try this:
2141 if (srcshade)
2142 {
2143 //NOTE: This is basically doubling the work done by DATA--since this is what
2144 // ADDARRAY is loaded with when srschshade is enabled... !!! FIX !!!
2145 // Also note that it doesn't work properly unless GOURZ is set--there's the clue!
2146 uint16_t addq[4];
2147 uint8_t initcin[4] = { 0, 0, 0, 0 };
2148 ADDARRAY(addq, 4/*daddasel*/, 5/*daddbsel*/, 7/*daddmode*/, dstd, iinc, initcin, 0, 0, 0, patd, srcd, 0, 0, 0, 0);
2149 srcd = ((uint64_t)addq[3] << 48) | ((uint64_t)addq[2] << 32) | ((uint64_t)addq[1] << 16) | (uint64_t)addq[0];
2150 }
2151 //Seems to work... Not 100% sure tho.
2152 //end try this
2153
2154 //Temporary kludge, to see if the fractional pattern does anything...
2155 //This works, BTW
2156 //But it seems to mess up in Cybermorph... the shading should be smooth but it isn't...
2157 //Seems the carry out is lost again... !!! FIX !!! [DONE--see below]
2158 if (patfadd)
2159 {
2160 uint16_t addq[4];
2161 uint8_t initcin[4] = { 0, 0, 0, 0 };
2162 ADDARRAY(addq, 4/*daddasel*/, 4/*daddbsel*/, 0/*daddmode*/, dstd, iinc, initcin, 0, 0, 0, patd, srcd, 0, 0, 0, 0);
2163 srcd1 = ((uint64_t)addq[3] << 48) | ((uint64_t)addq[2] << 32) | ((uint64_t)addq[1] << 16) | (uint64_t)addq[0];
2164 }
2165
2166 //Note that we still don't take atick[0] & [1] into account here, so this will skip half of the data needed... !!! FIX !!!
2167 //Not yet enumerated: dbinh, srcdread, srczread
2168 //Also, should do srcshift on the z value in phrase mode... !!! FIX !!! [DONE]
2169 //As well as add a srcz variable we can set external to this state... !!! FIX !!! [DONE]
2170
2171 DATA(&wdata, &dcomp, &zcomp, &winhibit,
2172 true, cmpdst, daddasel, daddbsel, daddmode, daddq_sel, data_sel, 0/*dbinh*/,
2173 dend, dstart, dstd, iinc, lfufunc, &patd, patdadd,
2174 phrase_mode, srcd, false/*srcdread*/, false/*srczread*/, srcz2add, zmode,
2175 bcompen, bkgwren, dcompen, icount & 0x07, pixsize,
2176 &srcz, dstz, zinc);
2177
2178 /*
2179 DEF ADDRCOMP (
2180 a1_outside // A1 pointer is outside window bounds
2181 :OUT;
2182 INT16/ a1_x
2183 INT16/ a1_y
2184 INT15/ a1_win_x
2185 INT15/ a1_win_y
2186 :IN);
2187 BEGIN
2188
2189 // The address is outside if negative, or if greater than or equal
2190 // to the window size
2191
2192 A1_xcomp := MAG_15 (a1xgr, a1xeq, a1xlt, a1_x{0..14}, a1_win_x{0..14});
2193 A1_ycomp := MAG_15 (a1ygr, a1yeq, a1ylt, a1_y{0..14}, a1_win_y{0..14});
2194 A1_outside := OR6 (a1_outside, a1_x{15}, a1xgr, a1xeq, a1_y{15}, a1ygr, a1yeq);
2195 */
2196 //NOTE: There seems to be an off-by-one bug here in the clip_a1 section... !!! FIX !!!
2197 // Actually, seems to be related to phrase mode writes...
2198 // Or is it? Could be related to non-15-bit compares as above?
2199 if (clip_a1 && ((a1_x & 0x8000) || (a1_y & 0x8000) || (a1_x >= a1_win_x) || (a1_y >= a1_win_y)))
2200 winhibit = true;
2201
2202 if (!winhibit)
2203 {
2204 if (phrase_mode)
2205 {
2206 JaguarWriteLong(address + 0, wdata >> 32, BLITTER);
2207 JaguarWriteLong(address + 4, wdata & 0xFFFFFFFF, BLITTER);
2208 }
2209 else
2210 {
2211 if (pixsize == 5)
2212 JaguarWriteLong(address, wdata & 0xFFFFFFFF, BLITTER);
2213 else if (pixsize == 4)
2214 JaguarWriteWord(address, wdata & 0x0000FFFF, BLITTER);
2215 else
2216 JaguarWriteByte(address, wdata & 0x000000FF, BLITTER);
2217 }
2218 }
2219
2220 }
2221
2222 if (dzwrite)
2223 {
2224 // OK, here's the big insight: When NOT in GOURZ mode, srcz1 & 2 function EXACTLY the same way that
2225 // srcd1 & 2 work--there's an implicit shift from srcz1 to srcz2 whenever srcz1 is read.
2226 // OTHERWISE, srcz1 is the integer for the computed Z and srcz2 is the fractional part.
2227 // Writes to srcz1 & 2 follow the same pattern as the other 64-bit registers--low 32 at the low address,
2228 // high 32 at the high address (little endian!).
2229 // NOTE: GOURZ is still not properly supported. Check patd/patf handling...
2230 // Phrase mode start/end masks are not properly supported either...
2231 //This is not correct... !!! FIX !!!
2232 //Should be OK now... We'll see...
2233 //Nope. Having the same starstep write problems in phrase mode as we had with pixels... !!! FIX !!!
2234 //This is not causing the problem in Hover Strike... :-/
2235 //The problem was with the SREADX not shifting. Still problems with Z comparisons & other text in pregame screen...
2236 if (!winhibit)
2237 {
2238 if (phrase_mode)
2239 {
2240 JaguarWriteLong(address + 0, srcz >> 32, BLITTER);
2241 JaguarWriteLong(address + 4, srcz & 0xFFFFFFFF, BLITTER);
2242 }
2243 else
2244 {
2245 if (pixsize == 4)
2246 JaguarWriteWord(address, srcz & 0x0000FFFF, BLITTER);
2247 }
2248 }//*/
2249 }
2250
2251
2252 if (a1_add)
2253 {
2254 int16_t adda_x, adda_y, addb_x, addb_y, addq_x, addq_y;
2255 ADDAMUX(&adda_x, &adda_y, addasel, a1_step_x, a1_step_y, a1_stepf_x, a1_stepf_y, a2_step_x, a2_step_y,
2256 a1_inc_x, a1_inc_y, a1_incf_x, a1_incf_y, adda_xconst, adda_yconst, addareg, suba_x, suba_y);
2257 ADDBMUX(&addb_x, &addb_y, addbsel, a1_x, a1_y, a2_x, a2_y, a1_frac_x, a1_frac_y);
2258 ADDRADD(&addq_x, &addq_y, a1fracldi, adda_x, adda_y, addb_x, addb_y, modx, suba_x, suba_y);
2259
2260 //Now, write to what???
2261 //a2ptrld comes from a2ptrldi...
2262 //I believe it's addbsel that determines the writeback...
2263 // This is where atick[0] & [1] come in, in determining which part (fractional, integer)
2264 // gets written to...
2265 //a1_x = addq_x;
2266 //a1_y = addq_y;
2267 //Kludge, to get A1 channel increment working...
2268 if (a1addx == 3)
2269 {
2270 a1_frac_x = addq_x, a1_frac_y = addq_y;
2271
2272 addasel = 2, addbsel = 0, a1fracldi = false;
2273 ADDAMUX(&adda_x, &adda_y, addasel, a1_step_x, a1_step_y, a1_stepf_x, a1_stepf_y, a2_step_x, a2_step_y,
2274 a1_inc_x, a1_inc_y, a1_incf_x, a1_incf_y, adda_xconst, adda_yconst, addareg, suba_x, suba_y);
2275 ADDBMUX(&addb_x,&addb_y, addbsel, a1_x, a1_y, a2_x, a2_y, a1_frac_x, a1_frac_y);
2276 ADDRADD(&addq_x, &addq_y, a1fracldi, adda_x, adda_y, addb_x, addb_y, modx, suba_x, suba_y);
2277
2278 a1_x = addq_x, a1_y = addq_y;
2279 }
2280 else
2281 a1_x = addq_x, a1_y = addq_y;
2282 }
2283
2284 if (a2_add)
2285 {
2286 int16_t adda_x, adda_y, addb_x, addb_y, addq_x, addq_y;
2287 ADDAMUX(&adda_x, &adda_y, addasel, a1_step_x, a1_step_y, a1_stepf_x, a1_stepf_y, a2_step_x, a2_step_y,
2288 a1_inc_x, a1_inc_y, a1_incf_x, a1_incf_y, adda_xconst, adda_yconst, addareg, suba_x, suba_y);
2289 ADDBMUX(&addb_x, &addb_y, addbsel, a1_x, a1_y, a2_x, a2_y, a1_frac_x, a1_frac_y);
2290 ADDRADD(&addq_x, &addq_y, a1fracldi, adda_x, adda_y, addb_x, addb_y, modx, suba_x, suba_y);
2291
2292 //Now, write to what???
2293 //a2ptrld comes from a2ptrldi...
2294 //I believe it's addbsel that determines the writeback...
2295 a2_x = addq_x;
2296 a2_y = addq_y;
2297 }
2298 }
2299
2300 indone = true;
2301 // The outer counter is updated here as well on the clock cycle...
2302
2303 /* the inner loop is started whenever another state is about to
2304 cause the inner state to go active */
2305 //Instart := ND7 (instart, innert[0], innert[2..7]);
2306
2307 //Actually, it's done only when inner gets asserted without the 2nd line of conditions
2308 //(inner AND !indone)
2309 //fixed now...
2310 //Since we don't get here until the inner loop is finished (indone = true) we can get
2311 //away with doing it here...!
2312 ocount--;
2313
2314 if (ocount == 0)
2315 outer0 = true;
2316 }
2317
2318 if (a1fupdate)
2319 {
2320 uint32_t a1_frac_xt = (uint32_t)a1_frac_x + (uint32_t)a1_stepf_x;
2321 uint32_t a1_frac_yt = (uint32_t)a1_frac_y + (uint32_t)a1_stepf_y;
2322 a1FracCInX = a1_frac_xt >> 16;
2323 a1FracCInY = a1_frac_yt >> 16;
2324 a1_frac_x = (uint16_t)(a1_frac_xt & 0xFFFF);
2325 a1_frac_y = (uint16_t)(a1_frac_yt & 0xFFFF);
2326 }
2327
2328 if (a1update)
2329 {
2330 a1_x += a1_step_x + a1FracCInX;
2331 a1_y += a1_step_y + a1FracCInY;
2332 }
2333
2334 if (a2update)
2335 {
2336 a2_x += a2_step_x;
2337 a2_y += a2_step_y;
2338 }
2339 }
2340
2341 // We never get here! !!! FIX !!!
2342
2343
2344 // Write values back to registers (in real blitter, these are continuously updated)
2345 SET16(blitter_ram, A1_PIXEL + 2, a1_x);
2346 SET16(blitter_ram, A1_PIXEL + 0, a1_y);
2347 SET16(blitter_ram, A1_FPIXEL + 2, a1_frac_x);
2348 SET16(blitter_ram, A1_FPIXEL + 0, a1_frac_y);
2349 SET16(blitter_ram, A2_PIXEL + 2, a2_x);
2350 SET16(blitter_ram, A2_PIXEL + 0, a2_y);
2351
2352 }
2353
2354 // Various pieces of the blitter puzzle are teased out here...
2355
ADDRGEN(uint32_t * address,uint32_t * pixa,bool gena2,bool zaddr,uint16_t a1_x,uint16_t a1_y,uint32_t a1_base,uint8_t a1_pitch,uint8_t a1_pixsize,uint8_t a1_width,uint8_t a1_zoffset,uint16_t a2_x,uint16_t a2_y,uint32_t a2_base,uint8_t a2_pitch,uint8_t a2_pixsize,uint8_t a2_width,uint8_t a2_zoffset)2356 void ADDRGEN(uint32_t *address, uint32_t *pixa, bool gena2, bool zaddr,
2357 uint16_t a1_x, uint16_t a1_y, uint32_t a1_base, uint8_t a1_pitch, uint8_t a1_pixsize, uint8_t a1_width, uint8_t a1_zoffset,
2358 uint16_t a2_x, uint16_t a2_y, uint32_t a2_base, uint8_t a2_pitch, uint8_t a2_pixsize, uint8_t a2_width, uint8_t a2_zoffset)
2359 {
2360 uint16_t x = (gena2 ? a2_x : a1_x) & 0xFFFF; // Actually uses all 16 bits to generate address...!
2361 uint16_t y = (gena2 ? a2_y : a1_y) & 0x0FFF;
2362 uint8_t width = (gena2 ? a2_width : a1_width);
2363 uint8_t pixsize = (gena2 ? a2_pixsize : a1_pixsize);
2364 uint8_t pitch = (gena2 ? a2_pitch : a1_pitch);
2365 uint32_t base = (gena2 ? a2_base : a1_base) >> 3;//Only upper 21 bits are passed around the bus? Seems like it...
2366 uint8_t zoffset = (gena2 ? a2_zoffset : a1_zoffset);
2367
2368 uint32_t ytm = ((uint32_t)y << 2) + ((width & 0x02) ? (uint32_t)y << 1 : 0) + ((width & 0x01) ? (uint32_t)y : 0);
2369
2370 uint32_t ya = (ytm << (width >> 2)) >> 2;
2371
2372 uint32_t pa = ya + x;
2373 uint8_t pt, za;
2374 uint32_t phradr, shup, addr;
2375
2376 *pixa = pa << pixsize;
2377
2378 pt = ((pitch & 0x01) && !(pitch & 0x02) ? 0x01 : 0x00)
2379 | (!(pitch & 0x01) && (pitch & 0x02) ? 0x02 : 0x00);
2380 phradr = (*pixa >> 6) << pt;
2381 shup = (pitch == 0x03 ? (*pixa >> 6) : 0);
2382
2383 za = (zaddr ? zoffset : 0) & 0x03;
2384 addr = za + phradr + (shup << 1) + base;
2385 *address = ((*pixa & 0x38) >> 3) | ((addr & 0x1FFFFF) << 3);
2386 *pixa &= 0x07;
2387 }
2388
2389 ////////////////////////////////////////////////////////////////////////////////////////////
2390 ////////////////////////////////////////////////////////////////////////////////////////////
2391 // Here's an important bit: The source data adder logic. Need to track down the inputs!!! //
2392 ////////////////////////////////////////////////////////////////////////////////////////////
2393 ////////////////////////////////////////////////////////////////////////////////////////////
2394
ADDARRAY(uint16_t * addq,uint8_t daddasel,uint8_t daddbsel,uint8_t daddmode,uint64_t dstd,uint32_t iinc,uint8_t initcin[],uint64_t initinc,uint16_t initpix,uint32_t istep,uint64_t patd,uint64_t srcd,uint64_t srcz1,uint64_t srcz2,uint32_t zinc,uint32_t zstep)2395 void ADDARRAY(uint16_t * addq, uint8_t daddasel, uint8_t daddbsel, uint8_t daddmode,
2396 uint64_t dstd, uint32_t iinc, uint8_t initcin[], uint64_t initinc, uint16_t initpix,
2397 uint32_t istep, uint64_t patd, uint64_t srcd, uint64_t srcz1, uint64_t srcz2,
2398 uint32_t zinc, uint32_t zstep)
2399 {
2400 unsigned i;
2401 uint16_t adda[4];
2402 uint16_t wordmux[8];
2403 uint16_t addb[4];
2404 uint16_t word;
2405 bool dbsel2, iincsel;
2406 uint32_t initpix2 = ((uint32_t)initpix << 16) | initpix;
2407 uint32_t addalo[8], addahi[8];
2408 uint8_t cinsel;
2409 static uint8_t co[4];//These are preserved between calls...
2410 uint8_t cin[4];
2411 bool eightbit;
2412 bool sat, hicinh;
2413
2414 addalo[0] = dstd & 0xFFFFFFFF;
2415 addalo[1] = initpix2;
2416 addalo[2] = 0;
2417 addalo[3] = 0;
2418 addalo[4] = srcd & 0xFFFFFFFF;
2419 addalo[5] = patd & 0xFFFFFFFF;
2420 addalo[6] = srcz1 & 0xFFFFFFFF;
2421 addalo[7] = srcz2 & 0xFFFFFFFF;
2422 addahi[0] = dstd >> 32;
2423 addahi[1] = initpix2;
2424 addahi[2] = 0;
2425 addahi[3] = 0;
2426 addahi[4] = srcd >> 32;
2427 addahi[5] = patd >> 32;
2428 addahi[6] = srcz1 >> 32;
2429 addahi[7] = srcz2 >> 32;
2430 adda[0] = addalo[daddasel] & 0xFFFF;
2431 adda[1] = addalo[daddasel] >> 16;
2432 adda[2] = addahi[daddasel] & 0xFFFF;
2433 adda[3] = addahi[daddasel] >> 16;
2434
2435 wordmux[0] = iinc & 0xFFFF;
2436 wordmux[1] = iinc >> 16;
2437 wordmux[2] = zinc & 0xFFFF;
2438 wordmux[3] = zinc >> 16;;
2439 wordmux[4] = istep & 0xFFFF;
2440 wordmux[5] = istep >> 16;;
2441 wordmux[6] = zstep & 0xFFFF;
2442 wordmux[7] = zstep >> 16;;
2443 word = wordmux[((daddbsel & 0x08) >> 1) | (daddbsel & 0x03)];
2444 dbsel2 = daddbsel & 0x04;
2445 iincsel = (daddbsel & 0x01) && !(daddbsel & 0x04);
2446
2447 if (!dbsel2 && !iincsel)
2448 addb[0] = srcd & 0xFFFF,
2449 addb[1] = (srcd >> 16) & 0xFFFF,
2450 addb[2] = (srcd >> 32) & 0xFFFF,
2451 addb[3] = (srcd >> 48) & 0xFFFF;
2452 else if (dbsel2 && !iincsel)
2453 addb[0] = addb[1] = addb[2] = addb[3] = word;
2454 else if (!dbsel2 && iincsel)
2455 addb[0] = initinc & 0xFFFF,
2456 addb[1] = (initinc >> 16) & 0xFFFF,
2457 addb[2] = (initinc >> 32) & 0xFFFF,
2458 addb[3] = (initinc >> 48) & 0xFFFF;
2459 else
2460 addb[0] = addb[1] = addb[2] = addb[3] = 0;
2461
2462
2463 cinsel = (daddmode >= 1 && daddmode <= 4 ? 1 : 0);
2464
2465 for(i = 0; i < 4; i++)
2466 cin[i] = initcin[i] | (co[i] & cinsel);
2467
2468 eightbit = daddmode & 0x02;
2469 sat = daddmode & 0x03;
2470 hicinh = ((daddmode & 0x03) == 0x03);
2471
2472 //Note that the carry out is saved between calls to this function...
2473 for( i=0; i<4; i++)
2474 ADD16SAT(&addq[i], &co[i], adda[i], addb[i], cin[i], sat, eightbit, hicinh);
2475 }
2476
2477
ADD16SAT(uint16_t * r,uint8_t * co,uint16_t a,uint16_t b,uint8_t cin,bool sat,bool eightbit,bool hicinh)2478 void ADD16SAT(uint16_t *r, uint8_t *co, uint16_t a, uint16_t b, uint8_t cin, bool sat, bool eightbit, bool hicinh)
2479 {
2480 uint8_t carry[4];
2481 uint8_t btop, ctop;
2482 bool saturate, hisaturate;
2483 uint32_t qt = (a & 0xFF) + (b & 0xFF) + cin;
2484 uint16_t q = qt & 0x00FF;
2485
2486 carry[0] = ((qt & 0x0100) ? 1 : 0);
2487 carry[1] = (carry[0] && !eightbit ? carry[0] : 0);
2488 qt = (a & 0x0F00) + (b & 0x0F00) + (carry[1] << 8);
2489 carry[2] = ((qt & 0x1000) ? 1 : 0);
2490 q |= qt & 0x0F00;
2491 carry[3] = (carry[2] && !hicinh ? carry[2] : 0);
2492 qt = (a & 0xF000) + (b & 0xF000) + (carry[3] << 12);
2493 *co = ((qt & 0x10000) ? 1 : 0);
2494 q |= qt & 0xF000;
2495
2496 btop = (eightbit ? (b & 0x0080) >> 7 : (b & 0x8000) >> 15);
2497 ctop = (eightbit ? carry[0] : *co);
2498
2499 saturate = sat && (btop ^ ctop);
2500 hisaturate = saturate && !eightbit;
2501
2502 *r = (saturate ? (ctop ? 0x00FF : 0x0000) : q & 0x00FF);
2503 *r |= (hisaturate ? (ctop ? 0xFF00 : 0x0000) : q & 0xFF00);
2504 }
2505
ADDAMUX(int16_t * adda_x,int16_t * adda_y,uint8_t addasel,int16_t a1_step_x,int16_t a1_step_y,int16_t a1_stepf_x,int16_t a1_stepf_y,int16_t a2_step_x,int16_t a2_step_y,int16_t a1_inc_x,int16_t a1_inc_y,int16_t a1_incf_x,int16_t a1_incf_y,uint8_t adda_xconst,bool adda_yconst,bool addareg,bool suba_x,bool suba_y)2506 void ADDAMUX(int16_t *adda_x, int16_t *adda_y, uint8_t addasel, int16_t a1_step_x, int16_t a1_step_y,
2507 int16_t a1_stepf_x, int16_t a1_stepf_y, int16_t a2_step_x, int16_t a2_step_y,
2508 int16_t a1_inc_x, int16_t a1_inc_y, int16_t a1_incf_x, int16_t a1_incf_y, uint8_t adda_xconst,
2509 bool adda_yconst, bool addareg, bool suba_x, bool suba_y)
2510 {
2511
2512 int16_t addar_x, addar_y, addac_x, addac_y, addas_x, addas_y;
2513 int16_t xterm[4], yterm[4];
2514 xterm[0] = a1_step_x, xterm[1] = a1_stepf_x, xterm[2] = a1_inc_x, xterm[3] = a1_incf_x;
2515 yterm[0] = a1_step_y, yterm[1] = a1_stepf_y, yterm[2] = a1_inc_y, yterm[3] = a1_incf_y;
2516 addar_x = ((addasel & 0x04) ? a2_step_x : xterm[addasel & 0x03]);
2517 addar_y = ((addasel & 0x04) ? a2_step_y : yterm[addasel & 0x03]);
2518 //////////////////////////////////////////////////////////////////////////////////////
2519
2520 /* Generate a constant value - this is a power of 2 in the range
2521 0-64, or zero. The control bits are adda_xconst[0..2], when they
2522 are all 1 the result is 0.
2523 Constants for Y can only be 0 or 1 */
2524
2525 /*Addac_xlo := D38H (addac_x[0..6], unused[0], adda_xconst[0..2]);
2526 Unused[0] := DUMMY (unused[0]);
2527
2528 Addac_x := JOIN (addac_x, addac_x[0..6], zero, zero, zero, zero, zero, zero, zero, zero, zero);
2529 Addac_y := JOIN (addac_y, adda_yconst, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero,
2530 zero, zero, zero, zero, zero);*/
2531 ////////////////////////////////////// C++ CODE //////////////////////////////////////
2532 addac_x = (adda_xconst == 0x07 ? 0 : 1 << adda_xconst);
2533 addac_y = (adda_yconst ? 0x01 : 0);
2534 //////////////////////////////////////////////////////////////////////////////////////
2535
2536 /* Select between constant value and register value */
2537
2538 /*Addas_x := MX2 (addas_x, addac_x, addar_x, addareg);
2539 Addas_y := MX2 (addas_y, addac_y, addar_y, addareg);*/
2540 ////////////////////////////////////// C++ CODE //////////////////////////////////////
2541 addas_x = (addareg ? addar_x : addac_x);
2542 addas_y = (addareg ? addar_y : addac_y);
2543 //////////////////////////////////////////////////////////////////////////////////////
2544
2545 /* Complement these values (complement flag gives adder carry in)*/
2546
2547 /*Suba_x16 := JOIN (suba_x16, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x,
2548 suba_x, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x);
2549 Suba_y16 := JOIN (suba_y16, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y,
2550 suba_y, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y);
2551 Adda_x := EO (adda_x, suba_x16, addas_x);
2552 Adda_y := EO (adda_y, suba_y16, addas_y);*/
2553 ////////////////////////////////////// C++ CODE //////////////////////////////////////
2554 *adda_x = addas_x ^ (suba_x ? 0xFFFF : 0x0000);
2555 *adda_y = addas_y ^ (suba_y ? 0xFFFF : 0x0000);
2556 //////////////////////////////////////////////////////////////////////////////////////
2557
2558 //END;
2559 }
2560
2561
2562 /** ADDBMUX - Address adder input B selection *******************
2563
2564 This module selects the register to be updated by the address
2565 adder. This can be one of three registers, the A1 and A2
2566 pointers, or the A1 fractional part. It can also be zero, so that the step
2567 registers load directly into the pointers.
2568 */
2569
2570 /*DEF ADDBMUX (
2571 INT16/ addb_x
2572 INT16/ addb_y
2573 :OUT;
2574 addbsel[0..1]
2575 INT16/ a1_x
2576 INT16/ a1_y
2577 INT16/ a2_x
2578 INT16/ a2_y
2579 INT16/ a1_frac_x
2580 INT16/ a1_frac_y
2581 :IN);
2582 INT16/ zero16 :LOCAL;
2583 BEGIN*/
ADDBMUX(int16_t * addb_x,int16_t * addb_y,uint8_t addbsel,int16_t a1_x,int16_t a1_y,int16_t a2_x,int16_t a2_y,int16_t a1_frac_x,int16_t a1_frac_y)2584 void ADDBMUX(int16_t *addb_x, int16_t *addb_y, uint8_t addbsel, int16_t a1_x, int16_t a1_y,
2585 int16_t a2_x, int16_t a2_y, int16_t a1_frac_x, int16_t a1_frac_y)
2586 {
2587
2588 /*Zero := TIE0 (zero);
2589 Zero16 := JOIN (zero16, zero, zero, zero, zero, zero, zero, zero,
2590 zero, zero, zero, zero, zero, zero, zero, zero, zero);
2591 Addbselb[0-1] := BUF8 (addbselb[0-1], addbsel[0-1]);
2592 Addb_x := MX4 (addb_x, a1_x, a2_x, a1_frac_x, zero16, addbselb[0..1]);
2593 Addb_y := MX4 (addb_y, a1_y, a2_y, a1_frac_y, zero16, addbselb[0..1]);*/
2594 ////////////////////////////////////// C++ CODE //////////////////////////////////////
2595 int16_t xterm[4], yterm[4];
2596 xterm[0] = a1_x, xterm[1] = a2_x, xterm[2] = a1_frac_x, xterm[3] = 0;
2597 yterm[0] = a1_y, yterm[1] = a2_y, yterm[2] = a1_frac_y, yterm[3] = 0;
2598 *addb_x = xterm[addbsel & 0x03];
2599 *addb_y = yterm[addbsel & 0x03];
2600 //////////////////////////////////////////////////////////////////////////////////////
2601
2602 //END;
2603 }
2604
2605
2606 /** DATAMUX - Address local data bus selection ******************
2607
2608 Select between the adder output and the input data bus
2609 */
2610
2611 /*DEF DATAMUX (
2612 INT16/ data_x
2613 INT16/ data_y
2614 :OUT;
2615 INT32/ gpu_din
2616 INT16/ addq_x
2617 INT16/ addq_y
2618 addqsel
2619 :IN);
2620
2621 INT16/ gpu_lo, gpu_hi
2622 :LOCAL;
2623 BEGIN*/
DATAMUX(int16_t * data_x,int16_t * data_y,uint32_t gpu_din,int16_t addq_x,int16_t addq_y,bool addqsel)2624 void DATAMUX(int16_t *data_x, int16_t *data_y, uint32_t gpu_din, int16_t addq_x, int16_t addq_y, bool addqsel)
2625 {
2626 /*Gpu_lo := JOIN (gpu_lo, gpu_din{0..15});
2627 Gpu_hi := JOIN (gpu_hi, gpu_din{16..31});
2628
2629 Addqselb := BUF8 (addqselb, addqsel);
2630 Data_x := MX2 (data_x, gpu_lo, addq_x, addqselb);
2631 Data_y := MX2 (data_y, gpu_hi, addq_y, addqselb);*/
2632 ////////////////////////////////////// C++ CODE //////////////////////////////////////
2633 *data_x = (addqsel ? addq_x : (int16_t)(gpu_din & 0xFFFF));
2634 *data_y = (addqsel ? addq_y : (int16_t)(gpu_din >> 16));
2635 //////////////////////////////////////////////////////////////////////////////////////
2636
2637 //END;
2638 }
2639
2640
2641 /******************************************************************
2642 addradd
2643 29/11/90
2644
2645 Blitter Address Adder
2646 ---------------------
2647 The blitter address adder is a pair of sixteen bit adders, one
2648 each for X and Y. The multiplexing of the input terms is
2649 performed elsewhere, but this adder can also perform modulo
2650 arithmetic to align X-addresses onto phrase boundaries.
2651
2652 modx[0..2] take values
2653 000 no mask
2654 001 mask bit 0
2655 010 mask bits 1-0
2656 ..
2657 110 mask bits 5-0
2658
2659 ******************************************************************/
2660
ADDRADD(int16_t * addq_x,int16_t * addq_y,bool a1fracldi,uint16_t adda_x,uint16_t adda_y,uint16_t addb_x,uint16_t addb_y,uint8_t modx,bool suba_x,bool suba_y)2661 void ADDRADD(int16_t *addq_x, int16_t *addq_y, bool a1fracldi,
2662 uint16_t adda_x, uint16_t adda_y, uint16_t addb_x, uint16_t addb_y, uint8_t modx, bool suba_x, bool suba_y)
2663 {
2664
2665 /* Perform the addition */
2666
2667 /*Adder_x := ADD16 (addqt_x[0..15], co_x, adda_x{0..15}, addb_x{0..15}, ci_x);
2668 Adder_y := ADD16 (addq_y[0..15], co_y, adda_y{0..15}, addb_y{0..15}, ci_y);*/
2669
2670 /* latch carry and propagate if required */
2671
2672 /*Cxt0 := AN2 (cxt[0], co_x, a1fracldi);
2673 Cxt1 := FD1Q (cxt[1], cxt[0], clk[0]);
2674 Ci_x := EO (ci_x, cxt[1], suba_x);
2675
2676 yt0 := AN2 (cyt[0], co_y, a1fracldi);
2677 Cyt1 := FD1Q (cyt[1], cyt[0], clk[0]);
2678 Ci_y := EO (ci_y, cyt[1], suba_y);*/
2679
2680 ////////////////////////////////////// C++ CODE //////////////////////////////////////
2681 //I'm sure the following will generate a bunch of warnings, but will have to do for now.
2682 static uint16_t co_x = 0, co_y = 0; // Carry out has to propogate between function calls...
2683 uint16_t ci_x = co_x ^ (suba_x ? 1 : 0);
2684 uint16_t ci_y = co_y ^ (suba_y ? 1 : 0);
2685 uint32_t addqt_x = adda_x + addb_x + ci_x;
2686 uint32_t addqt_y = adda_y + addb_y + ci_y;
2687 uint16_t mask[8] = { 0xFFFF, 0xFFFE, 0xFFFC, 0xFFF8, 0xFFF0, 0xFFE0, 0xFFC0, 0x0000 };
2688 co_x = ((addqt_x & 0x10000) && a1fracldi ? 1 : 0);
2689 co_y = ((addqt_y & 0x10000) && a1fracldi ? 1 : 0);
2690 //////////////////////////////////////////////////////////////////////////////////////
2691
2692 /* Mask low bits of X to 0 if required */
2693
2694 /*Masksel := D38H (unused[0], masksel[0..4], maskbit[5], unused[1], modx[0..2]);
2695
2696 Maskbit[0-4] := OR2 (maskbit[0-4], masksel[0-4], maskbit[1-5]);
2697
2698 Mask[0-5] := MX2 (addq_x[0-5], addqt_x[0-5], zero, maskbit[0-5]);
2699
2700 Addq_x := JOIN (addq_x, addq_x[0..5], addqt_x[6..15]);
2701 Addq_y := JOIN (addq_y, addq_y[0..15]);*/
2702
2703 ////////////////////////////////////// C++ CODE //////////////////////////////////////
2704 *addq_x = addqt_x & mask[modx];
2705 *addq_y = addqt_y & 0xFFFF;
2706 //////////////////////////////////////////////////////////////////////////////////////
2707
2708 //Unused[0-1] := DUMMY (unused[0-1]);
2709
2710 //END;
2711 }
2712
2713
2714 /*
2715 DEF DATA (
2716 wdata[0..63] // co-processor write data bus
2717 :BUS;
2718 dcomp[0..7] // data byte equal flags
2719 srcd[0..7] // bits to use for bit to byte expansion
2720 zcomp[0..3] // output from Z comparators
2721 :OUT;
2722 a1_x[0..1] // low two bits of A1 X pointer
2723 big_pix // pixel organisation is big-endian
2724 blitter_active // blitter is active
2725 clk // co-processor clock
2726 cmpdst // compare dest rather than source
2727 colorld // load the pattern color fields
2728 daddasel[0..2] // data adder input A selection
2729 daddbsel[0..3] // data adder input B selection
2730 daddmode[0..2] // data adder mode
2731 daddq_sel // select adder output vs. GPU data
2732 data[0..63] // co-processor read data bus
2733 data_ena // enable write data
2734 data_sel[0..1] // select data to write
2735 dbinh\[0..7] // byte oriented changed data inhibits
2736 dend[0..5] // end of changed write data zone
2737 dpipe[0..1] // load computed data pipe-line latch
2738 dstart[0..5] // start of changed write data zone
2739 dstdld[0..1] // dest data load (two halves)
2740 dstzld[0..1] // dest zed load (two halves)
2741 ext_int // enable extended precision intensity calculations
2742 INT32/ gpu_din // GPU data bus
2743 iincld // I increment load
2744 iincldx // alternate I increment load
2745 init_if // initialise I fraction phase
2746 init_ii // initialise I integer phase
2747 init_zf // initialise Z fraction phase
2748 intld[0..3] // computed intensities load
2749 istepadd // intensity step integer add
2750 istepfadd // intensity step fraction add
2751 istepld // I step load
2752 istepdld // I step delta load
2753 lfu_func[0..3] // LFU function code
2754 patdadd // pattern data gouraud add
2755 patdld[0..1] // pattern data load (two halves)
2756 pdsel[0..1] // select pattern data type
2757 phrase_mode // phrase write mode
2758 reload // transfer contents of double buffers
2759 reset\ // system reset
2760 srcd1ld[0..1] // source register 1 load (two halves)
2761 srcdread // source data read load enable
2762 srczread // source zed read load enable
2763 srcshift[0..5] // source alignment shift
2764 srcz1ld[0..1] // source zed 1 load (two halves)
2765 srcz2add // zed fraction gouraud add
2766 srcz2ld[0..1] // source zed 2 load (two halves)
2767 textrgb // texture mapping in RGB mode
2768 txtd[0..63] // data from the texture unit
2769 zedld[0..3] // computed zeds load
2770 zincld // Z increment load
2771 zmode[0..2] // Z comparator mode
2772 zpipe[0..1] // load computed zed pipe-line latch
2773 zstepadd // zed step integer add
2774 zstepfadd // zed step fraction add
2775 zstepld // Z step load
2776 zstepdld // Z step delta load
2777 :IN);
2778 */
2779
DATA(uint64_t * wdata,uint8_t * dcomp,uint8_t * zcomp,bool * nowrite,bool big_pix,bool cmpdst,uint8_t daddasel,uint8_t daddbsel,uint8_t daddmode,bool daddq_sel,uint8_t data_sel,uint8_t dbinh,uint8_t dend,uint8_t dstart,uint64_t dstd,uint32_t iinc,uint8_t lfu_func,uint64_t * patd,bool patdadd,bool phrase_mode,uint64_t srcd,bool srcdread,bool srczread,bool srcz2add,uint8_t zmode,bool bcompen,bool bkgwren,bool dcompen,uint8_t icount,uint8_t pixsize,uint64_t * srcz,uint64_t dstz,uint32_t zinc)2780 void DATA(uint64_t *wdata, uint8_t *dcomp, uint8_t *zcomp, bool *nowrite,
2781 bool big_pix, bool cmpdst, uint8_t daddasel, uint8_t daddbsel, uint8_t daddmode, bool daddq_sel, uint8_t data_sel,
2782 uint8_t dbinh, uint8_t dend, uint8_t dstart, uint64_t dstd, uint32_t iinc, uint8_t lfu_func, uint64_t *patd, bool patdadd,
2783 bool phrase_mode, uint64_t srcd, bool srcdread, bool srczread, bool srcz2add, uint8_t zmode,
2784 bool bcompen, bool bkgwren, bool dcompen, uint8_t icount, uint8_t pixsize,
2785 uint64_t *srcz, uint64_t dstz, uint32_t zinc)
2786 {
2787 /*
2788 Stuff we absolutely *need* to have passed in/out:
2789 IN:
2790 patdadd, dstd, srcd, patd, daddasel, daddbsel, daddmode, iinc, srcz1, srcz2, big_pix, phrase_mode, cmpdst
2791 OUT:
2792 changed patd (wdata I guess...) (Nope. We pass it back directly now...)
2793 */
2794
2795 // Source data registers
2796
2797 /*Data_src := DATA_SRC (srcdlo, srcdhi, srcz[0..1], srczo[0..1], srczp[0..1], srcz1[0..1], srcz2[0..1], big_pix,
2798 clk, gpu_din, intld[0..3], local_data0, local_data1, srcd1ld[0..1], srcdread, srczread, srcshift[0..5],
2799 srcz1ld[0..1], srcz2add, srcz2ld[0..1], zedld[0..3], zpipe[0..1]);
2800 Srcd[0-7] := JOIN (srcd[0-7], srcdlo{0-7});
2801 Srcd[8-31] := JOIN (srcd[8-31], srcdlo{8-31});
2802 Srcd[32-63] := JOIN (srcd[32-63], srcdhi{0-31});*/
2803
2804 // Destination data registers
2805
2806 /*Data_dst := DATA_DST (dstd[0..63], dstz[0..1], clk, dstdld[0..1], dstzld[0..1], load_data[0..1]);
2807 Dstdlo := JOIN (dstdlo, dstd[0..31]);
2808 Dstdhi := JOIN (dstdhi, dstd[32..63]);*/
2809
2810 // Pattern and Color data registers
2811
2812 // Looks like this is simply another register file for the pattern data registers. No adding or anything funky
2813 // going on. Note that patd & patdv will output the same info.
2814 // Patdldl/h (patdld[0..1]) can select the local_data bus to overwrite the current pattern data...
2815 // Actually, it can be either patdld OR patdadd...!
2816 /*Data_pat := DATA_PAT (colord[0..15], int0dp[8..10], int1dp[8..10], int2dp[8..10], int3dp[8..10], mixsel[0..2],
2817 patd[0..63], patdv[0..1], clk, colorld, dpipe[0], ext_int, gpu_din, intld[0..3], local_data0, local_data1,
2818 patdadd, patdld[0..1], reload, reset\);
2819 Patdlo := JOIN (patdlo, patd[0..31]);
2820 Patdhi := JOIN (patdhi, patd[32..63]);*/
2821
2822 // Multiplying data Mixer (NOT IN JAGUAR I)
2823
2824 /*Datamix := DATAMIX (patdo[0..1], clk, colord[0..15], dpipe[1], dstd[0..63], int0dp[8..10], int1dp[8..10],
2825 int2dp[8..10], int3dp[8..10], mixsel[0..2], patd[0..63], pdsel[0..1], srcd[0..63], textrgb, txtd[0..63]);*/
2826
2827 // Logic function unit
2828
2829 /*Lfu := LFU (lfu[0..1], srcdlo, srcdhi, dstdlo, dstdhi, lfu_func[0..3]);*/
2830 ////////////////////////////////////// C++ CODE //////////////////////////////////////
2831 uint64_t funcmask[2] = { 0, 0xFFFFFFFFFFFFFFFFLL };
2832 uint64_t func0 = funcmask[lfu_func & 0x01];
2833 uint64_t func1 = funcmask[(lfu_func >> 1) & 0x01];
2834 uint64_t func2 = funcmask[(lfu_func >> 2) & 0x01];
2835 uint64_t func3 = funcmask[(lfu_func >> 3) & 0x01];
2836 uint64_t lfu = (~srcd & ~dstd & func0) | (~srcd & dstd & func1) | (srcd & ~dstd & func2) | (srcd & dstd & func3);
2837 bool mir_bit, mir_byte;
2838 uint16_t masku;
2839 uint8_t e_coarse, e_fine;
2840 uint8_t s_coarse, s_fine;
2841 uint16_t maskt;
2842 uint8_t decl38e[2][8] = { { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF },
2843 { 0xFE, 0xFD, 0xFB, 0xF7, 0xEF, 0xDF, 0xBF, 0x7F } };
2844 uint8_t dech38[8] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 };
2845 uint8_t dech38el[2][8] = { { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 },
2846 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } };
2847 int en;
2848 uint64_t cmpd;
2849 uint8_t dbinht;
2850 uint16_t addq[4];
2851 uint8_t initcin[4] = { 0, 0, 0, 0 };
2852 uint16_t mask;
2853 uint64_t dmux[4];
2854 uint64_t ddat;
2855 uint64_t zwdata;
2856 //////////////////////////////////////////////////////////////////////////////////////
2857
2858 // Increment and Step Registers
2859
2860 // Does it do anything without the step add lines? Check it!
2861 // No. This is pretty much just a register file without the Jaguar II lines...
2862 /*Inc_step := INC_STEP (iinc, istep[0..31], zinc, zstep[0..31], clk, ext_int, gpu_din, iincld, iincldx, istepadd,
2863 istepfadd, istepld, istepdld, reload, reset\, zincld, zstepadd, zstepfadd, zstepld, zstepdld);
2864 Istep := JOIN (istep, istep[0..31]);
2865 Zstep := JOIN (zstep, zstep[0..31]);*/
2866
2867 // Pixel data comparator
2868
2869 /*Datacomp := DATACOMP (dcomp[0..7], cmpdst, dstdlo, dstdhi, patdlo, patdhi, srcdlo, srcdhi);*/
2870 ////////////////////////////////////// C++ CODE //////////////////////////////////////
2871 *dcomp = 0;
2872 cmpd = *patd ^ (cmpdst ? dstd : srcd);
2873
2874 if ((cmpd & 0x00000000000000FFLL) == 0)
2875 *dcomp |= 0x01;
2876 if ((cmpd & 0x000000000000FF00LL) == 0)
2877 *dcomp |= 0x02;
2878 if ((cmpd & 0x0000000000FF0000LL) == 0)
2879 *dcomp |= 0x04;
2880 if ((cmpd & 0x00000000FF000000LL) == 0)
2881 *dcomp |= 0x08;
2882 if ((cmpd & 0x000000FF00000000LL) == 0)
2883 *dcomp |= 0x10;
2884 if ((cmpd & 0x0000FF0000000000LL) == 0)
2885 *dcomp |= 0x20;
2886 if ((cmpd & 0x00FF000000000000LL) == 0)
2887 *dcomp |= 0x40;
2888 if ((cmpd & 0xFF00000000000000LL) == 0)
2889 *dcomp |= 0x80;
2890 //////////////////////////////////////////////////////////////////////////////////////
2891
2892 // Zed comparator for Z-buffer operations
2893
2894 /*Zedcomp := ZEDCOMP (zcomp[0..3], srczp[0..1], dstz[0..1], zmode[0..2]);*/
2895 ////////////////////////////////////// C++ CODE //////////////////////////////////////
2896 //srczp is srcz pipelined, also it goes through a source shift as well...
2897 /*The shift is basically like so (each piece is 16 bits long):
2898
2899 0 1 2 3 4 5 6
2900 srcz1lolo srcz1lohi srcz1hilo srcz1hihi srcrz2lolo srcz2lohi srcz2hilo
2901
2902 with srcshift bits 4 & 5 selecting the start position
2903 */
2904 //So... basically what we have here is:
2905 *zcomp = 0;
2906
2907 if ((((*srcz & 0x000000000000FFFFLL) < (dstz & 0x000000000000FFFFLL)) && (zmode & 0x01))
2908 || (((*srcz & 0x000000000000FFFFLL) == (dstz & 0x000000000000FFFFLL)) && (zmode & 0x02))
2909 || (((*srcz & 0x000000000000FFFFLL) > (dstz & 0x000000000000FFFFLL)) && (zmode & 0x04)))
2910 *zcomp |= 0x01;
2911
2912 if ((((*srcz & 0x00000000FFFF0000LL) < (dstz & 0x00000000FFFF0000LL)) && (zmode & 0x01))
2913 || (((*srcz & 0x00000000FFFF0000LL) == (dstz & 0x00000000FFFF0000LL)) && (zmode & 0x02))
2914 || (((*srcz & 0x00000000FFFF0000LL) > (dstz & 0x00000000FFFF0000LL)) && (zmode & 0x04)))
2915 *zcomp |= 0x02;
2916
2917 if ((((*srcz & 0x0000FFFF00000000LL) < (dstz & 0x0000FFFF00000000LL)) && (zmode & 0x01))
2918 || (((*srcz & 0x0000FFFF00000000LL) == (dstz & 0x0000FFFF00000000LL)) && (zmode & 0x02))
2919 || (((*srcz & 0x0000FFFF00000000LL) > (dstz & 0x0000FFFF00000000LL)) && (zmode & 0x04)))
2920 *zcomp |= 0x04;
2921
2922 if ((((*srcz & 0xFFFF000000000000LL) < (dstz & 0xFFFF000000000000LL)) && (zmode & 0x01))
2923 || (((*srcz & 0xFFFF000000000000LL) == (dstz & 0xFFFF000000000000LL)) && (zmode & 0x02))
2924 || (((*srcz & 0xFFFF000000000000LL) > (dstz & 0xFFFF000000000000LL)) && (zmode & 0x04)))
2925 *zcomp |= 0x08;
2926
2927 //TEMP, TO TEST IF ZCOMP IS THE CULPRIT...
2928 //Nope, this is NOT the problem...
2929 //zcomp=0;
2930 // We'll do the comparison/bit/byte inhibits here, since that's they way it happens
2931 // in the real thing (dcomp goes out to COMP_CTRL and back into DATA through dbinh)...
2932 COMP_CTRL(&dbinht, nowrite,
2933 bcompen, true/*big_pix*/, bkgwren, *dcomp, dcompen, icount, pixsize, phrase_mode, srcd & 0xFF, *zcomp);
2934 dbinh = dbinht;
2935
2936 //////////////////////////////////////////////////////////////////////////////////////
2937
2938 // 22 Mar 94
2939 // The data initializer - allows all four initial values to be computed from one (NOT IN JAGUAR I)
2940
2941 /*Datinit := DATINIT (initcin[0..3], initinc[0..63], initpix[0..15], a1_x[0..1], big_pix, clk, iinc, init_if, init_ii,
2942 init_zf, istep[0..31], zinc, zstep[0..31]);*/
2943
2944 // Adder array for Z and intensity increments
2945
2946 /*Addarray := ADDARRAY (addq[0..3], clk, daddasel[0..2], daddbsel[0..3], daddmode[0..2], dstdlo, dstdhi, iinc,
2947 initcin[0..3], initinc[0..63], initpix[0..15], istep, patdv[0..1], srcdlo, srcdhi, srcz1[0..1],
2948 srcz2[0..1], reset\, zinc, zstep);*/
2949 /*void ADDARRAY(uint16_t * addq, uint8_t daddasel, uint8_t daddbsel, uint8_t daddmode,
2950 uint64_t dstd, uint32_t iinc, uint8_t initcin[], uint64_t initinc, uint16_t initpix,
2951 uint32_t istep, uint64_t patd, uint64_t srcd, uint64_t srcz1, uint64_t srcz2,
2952 uint32_t zinc, uint32_t zstep)*/
2953 ////////////////////////////////////// C++ CODE //////////////////////////////////////
2954 ADDARRAY(addq, daddasel, daddbsel, daddmode, dstd, iinc, initcin, 0, 0, 0, *patd, srcd, 0, 0, 0, 0);
2955
2956 //This is normally done asynchronously above (thru local_data) when in patdadd mode...
2957 //And now it's passed back to the caller to be persistent between calls...!
2958 //But it's causing some serious fuck-ups in T2K now... !!! FIX !!! [DONE--???]
2959 //Weird! It doesn't anymore...!
2960 if (patdadd)
2961 *patd = ((uint64_t)addq[3] << 48) | ((uint64_t)addq[2] << 32) | ((uint64_t)addq[1] << 16) | (uint64_t)addq[0];
2962 //////////////////////////////////////////////////////////////////////////////////////
2963
2964 // Local data bus multiplexer
2965
2966 /*Local_mux := LOCAL_MUX (local_data[0..1], load_data[0..1],
2967 addq[0..3], gpu_din, data[0..63], blitter_active, daddq_sel);
2968 Local_data0 := JOIN (local_data0, local_data[0]);
2969 Local_data1 := JOIN (local_data1, local_data[1]);*/
2970 ////////////////////////////////////// C++ CODE //////////////////////////////////////
2971 //////////////////////////////////////////////////////////////////////////////////////
2972
2973 // Data output multiplexer and tri-state drive
2974
2975 /*Data_mux := DATA_MUX (wdata[0..63], addq[0..3], big_pix, dstdlo, dstdhi, dstz[0..1], data_sel[0..1], data_ena,
2976 dstart[0..5], dend[0..5], dbinh\[0..7], lfu[0..1], patdo[0..1], phrase_mode, srczo[0..1]);*/
2977 ////////////////////////////////////// C++ CODE //////////////////////////////////////
2978 // NOTE: patdo comes from DATAMIX and can be considered the same as patd for Jaguar I
2979
2980 //////////////////////////////////////////////////////////////////////////////////////
2981 //}
2982
2983 /*DEF DATA_MUX (
2984 wdata[0..63] // co-processor rwrite data bus
2985 :BUS;
2986 INT16/ addq[0..3]
2987 big_pix // Pixel organisation is big-endian
2988 INT32/ dstdlo
2989 INT32/ dstdhi
2990 INT32/ dstzlo
2991 INT32/ dstzhi
2992 data_sel[0..1] // source of write data
2993 data_ena // enable write data onto read/write bus
2994 dstart[0..5] // start of changed write data
2995 dend[0..5] // end of changed write data
2996 dbinh\[0..7] // byte oriented changed data inhibits
2997 INT32/ lfu[0..1]
2998 INT32/ patd[0..1]
2999 phrase_mode // phrase write mode
3000 INT32/ srczlo
3001 INT32/ srczhi
3002 :IN);*/
3003
3004 /*INT32/ addql[0..1], ddatlo, ddathi zero32
3005 :LOCAL;
3006 BEGIN
3007
3008 Phrase_mode\ := INV1 (phrase_mode\, phrase_mode);
3009 Zero := TIE0 (zero);
3010 Zero32 := JOIN (zero32, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero);*/
3011
3012 /* Generate a changed data mask */
3013
3014 /*Edis := OR6 (edis\, dend[0..5]);
3015 Ecoarse := DECL38E (e_coarse\[0..7], dend[3..5], edis\);
3016 E_coarse[0] := INV1 (e_coarse[0], e_coarse\[0]);
3017 Efine := DECL38E (unused[0], e_fine\[1..7], dend[0..2], e_coarse[0]);*/
3018 ////////////////////////////////////// C++ CODE //////////////////////////////////////
3019
3020 en = ((dend & 0x3F) ? 1 : 0);
3021 e_coarse = decl38e[en][(dend & 0x38) >> 3]; // Actually, this is e_coarse inverted...
3022 e_fine = decl38e[(e_coarse & 0x01) ^ 0x01][dend & 0x07];
3023 e_fine &= 0xFE;
3024 //////////////////////////////////////////////////////////////////////////////////////
3025
3026 /*Scoarse := DECH38 (s_coarse[0..7], dstart[3..5]);
3027 Sfen\ := INV1 (sfen\, s_coarse[0]);
3028 Sfine := DECH38EL (s_fine[0..7], dstart[0..2], sfen\);*/
3029 ////////////////////////////////////// C++ CODE //////////////////////////////////////
3030 s_coarse = dech38[(dstart & 0x38) >> 3];
3031 s_fine = dech38el[(s_coarse & 0x01) ^ 0x01][dstart & 0x07];
3032 //////////////////////////////////////////////////////////////////////////////////////
3033
3034 /*Maskt[0] := BUF1 (maskt[0], s_fine[0]);
3035 Maskt[1-7] := OAN1P (maskt[1-7], maskt[0-6], s_fine[1-7], e_fine\[1-7]);*/
3036 ////////////////////////////////////// C++ CODE //////////////////////////////////////
3037 maskt = s_fine & 0x0001;
3038 maskt |= (((maskt & 0x0001) || (s_fine & 0x02)) && (e_fine & 0x02) ? 0x0002 : 0x0000);
3039 maskt |= (((maskt & 0x0002) || (s_fine & 0x04)) && (e_fine & 0x04) ? 0x0004 : 0x0000);
3040 maskt |= (((maskt & 0x0004) || (s_fine & 0x08)) && (e_fine & 0x08) ? 0x0008 : 0x0000);
3041 maskt |= (((maskt & 0x0008) || (s_fine & 0x10)) && (e_fine & 0x10) ? 0x0010 : 0x0000);
3042 maskt |= (((maskt & 0x0010) || (s_fine & 0x20)) && (e_fine & 0x20) ? 0x0020 : 0x0000);
3043 maskt |= (((maskt & 0x0020) || (s_fine & 0x40)) && (e_fine & 0x40) ? 0x0040 : 0x0000);
3044 maskt |= (((maskt & 0x0040) || (s_fine & 0x80)) && (e_fine & 0x80) ? 0x0080 : 0x0000);
3045 //////////////////////////////////////////////////////////////////////////////////////
3046
3047 /* Produce a look-ahead on the ripple carry:
3048 masktla = s_coarse[0] . /e_coarse[0] */
3049 /*Masktla := AN2 (masktla, s_coarse[0], e_coarse\[0]);
3050 Maskt[8] := OAN1P (maskt[8], masktla, s_coarse[1], e_coarse\[1]);
3051 Maskt[9-14] := OAN1P (maskt[9-14], maskt[8-13], s_coarse[2-7], e_coarse\[2-7]);*/
3052 ////////////////////////////////////// C++ CODE //////////////////////////////////////
3053 maskt |= (((s_coarse & e_coarse & 0x01) || (s_coarse & 0x02)) && (e_coarse & 0x02) ? 0x0100 : 0x0000);
3054 maskt |= (((maskt & 0x0100) || (s_coarse & 0x04)) && (e_coarse & 0x04) ? 0x0200 : 0x0000);
3055 maskt |= (((maskt & 0x0200) || (s_coarse & 0x08)) && (e_coarse & 0x08) ? 0x0400 : 0x0000);
3056 maskt |= (((maskt & 0x0400) || (s_coarse & 0x10)) && (e_coarse & 0x10) ? 0x0800 : 0x0000);
3057 maskt |= (((maskt & 0x0800) || (s_coarse & 0x20)) && (e_coarse & 0x20) ? 0x1000 : 0x0000);
3058 maskt |= (((maskt & 0x1000) || (s_coarse & 0x40)) && (e_coarse & 0x40) ? 0x2000 : 0x0000);
3059 maskt |= (((maskt & 0x2000) || (s_coarse & 0x80)) && (e_coarse & 0x80) ? 0x4000 : 0x0000);
3060 //////////////////////////////////////////////////////////////////////////////////////
3061
3062 /* The bit terms are mirrored for big-endian pixels outside phrase
3063 mode. The byte terms are mirrored for big-endian pixels in phrase
3064 mode. */
3065
3066 /*Mirror_bit := AN2M (mir_bit, phrase_mode\, big_pix);
3067 Mirror_byte := AN2H (mir_byte, phrase_mode, big_pix);
3068
3069 Masktb[14] := BUF1 (masktb[14], maskt[14]);
3070 Masku[0] := MX4 (masku[0], maskt[0], maskt[7], maskt[14], zero, mir_bit, mir_byte);
3071 Masku[1] := MX4 (masku[1], maskt[1], maskt[6], maskt[14], zero, mir_bit, mir_byte);
3072 Masku[2] := MX4 (masku[2], maskt[2], maskt[5], maskt[14], zero, mir_bit, mir_byte);
3073 Masku[3] := MX4 (masku[3], maskt[3], maskt[4], masktb[14], zero, mir_bit, mir_byte);
3074 Masku[4] := MX4 (masku[4], maskt[4], maskt[3], masktb[14], zero, mir_bit, mir_byte);
3075 Masku[5] := MX4 (masku[5], maskt[5], maskt[2], masktb[14], zero, mir_bit, mir_byte);
3076 Masku[6] := MX4 (masku[6], maskt[6], maskt[1], masktb[14], zero, mir_bit, mir_byte);
3077 Masku[7] := MX4 (masku[7], maskt[7], maskt[0], masktb[14], zero, mir_bit, mir_byte);
3078 Masku[8] := MX2 (masku[8], maskt[8], maskt[13], mir_byte);
3079 Masku[9] := MX2 (masku[9], maskt[9], maskt[12], mir_byte);
3080 Masku[10] := MX2 (masku[10], maskt[10], maskt[11], mir_byte);
3081 Masku[11] := MX2 (masku[11], maskt[11], maskt[10], mir_byte);
3082 Masku[12] := MX2 (masku[12], maskt[12], maskt[9], mir_byte);
3083 Masku[13] := MX2 (masku[13], maskt[13], maskt[8], mir_byte);
3084 Masku[14] := MX2 (masku[14], maskt[14], maskt[0], mir_byte);*/
3085 ////////////////////////////////////// C++ CODE //////////////////////////////////////
3086
3087 mir_bit = true/*big_pix*/ && !phrase_mode;
3088 mir_byte = true/*big_pix*/ && phrase_mode;
3089 masku = maskt;
3090
3091 if (mir_bit)
3092 {
3093 masku &= 0xFF00;
3094 masku |= (maskt >> 7) & 0x0001;
3095 masku |= (maskt >> 5) & 0x0002;
3096 masku |= (maskt >> 3) & 0x0004;
3097 masku |= (maskt >> 1) & 0x0008;
3098 masku |= (maskt << 1) & 0x0010;
3099 masku |= (maskt << 3) & 0x0020;
3100 masku |= (maskt << 5) & 0x0040;
3101 masku |= (maskt << 7) & 0x0080;
3102 }
3103
3104 if (mir_byte)
3105 {
3106 masku = 0;
3107 masku |= (maskt >> 14) & 0x0001;
3108 masku |= (maskt >> 13) & 0x0002;
3109 masku |= (maskt >> 12) & 0x0004;
3110 masku |= (maskt >> 11) & 0x0008;
3111 masku |= (maskt >> 10) & 0x0010;
3112 masku |= (maskt >> 9) & 0x0020;
3113 masku |= (maskt >> 8) & 0x0040;
3114 masku |= (maskt >> 7) & 0x0080;
3115
3116 masku |= (maskt >> 5) & 0x0100;
3117 masku |= (maskt >> 3) & 0x0200;
3118 masku |= (maskt >> 1) & 0x0400;
3119 masku |= (maskt << 1) & 0x0800;
3120 masku |= (maskt << 3) & 0x1000;
3121 masku |= (maskt << 5) & 0x2000;
3122 masku |= (maskt << 7) & 0x4000;
3123 }
3124 //////////////////////////////////////////////////////////////////////////////////////
3125
3126 /* The maskt terms define the area for changed data, but the byte
3127 inhibit terms can override these */
3128
3129 /*Mask[0-7] := AN2 (mask[0-7], masku[0-7], dbinh\[0]);
3130 Mask[8-14] := AN2H (mask[8-14], masku[8-14], dbinh\[1-7]);*/
3131 ////////////////////////////////////// C++ CODE //////////////////////////////////////
3132 mask = masku & (!(dbinh & 0x01) ? 0xFFFF : 0xFF00);
3133 mask &= ~(((uint16_t)dbinh & 0x00FE) << 7);
3134 //////////////////////////////////////////////////////////////////////////////////////
3135
3136 /*Addql[0] := JOIN (addql[0], addq[0..1]);
3137 Addql[1] := JOIN (addql[1], addq[2..3]);
3138
3139 Dsel0b[0-1] := BUF8 (dsel0b[0-1], data_sel[0]);
3140 Dsel1b[0-1] := BUF8 (dsel1b[0-1], data_sel[1]);
3141 Ddatlo := MX4 (ddatlo, patd[0], lfu[0], addql[0], zero32, dsel0b[0], dsel1b[0]);
3142 Ddathi := MX4 (ddathi, patd[1], lfu[1], addql[1], zero32, dsel0b[1], dsel1b[1]);*/
3143 ////////////////////////////////////// C++ CODE //////////////////////////////////////
3144 dmux[0] = *patd;
3145 dmux[1] = lfu;
3146 dmux[2] = ((uint64_t)addq[3] << 48) | ((uint64_t)addq[2] << 32) | ((uint64_t)addq[1] << 16) | (uint64_t)addq[0];
3147 dmux[3] = 0;
3148 ddat = dmux[data_sel];
3149 //////////////////////////////////////////////////////////////////////////////////////
3150
3151 /*Zed_sel := AN2 (zed_sel, data_sel[0..1]);
3152 Zed_selb[0-1] := BUF8 (zed_selb[0-1], zed_sel);
3153
3154 Dat[0-7] := MX4 (dat[0-7], dstdlo{0-7}, ddatlo{0-7}, dstzlo{0-7}, srczlo{0-7}, mask[0-7], zed_selb[0]);
3155 Dat[8-15] := MX4 (dat[8-15], dstdlo{8-15}, ddatlo{8-15}, dstzlo{8-15}, srczlo{8-15}, mask[8], zed_selb[0]);
3156 Dat[16-23] := MX4 (dat[16-23], dstdlo{16-23}, ddatlo{16-23}, dstzlo{16-23}, srczlo{16-23}, mask[9], zed_selb[0]);
3157 Dat[24-31] := MX4 (dat[24-31], dstdlo{24-31}, ddatlo{24-31}, dstzlo{24-31}, srczlo{24-31}, mask[10], zed_selb[0]);
3158 Dat[32-39] := MX4 (dat[32-39], dstdhi{0-7}, ddathi{0-7}, dstzhi{0-7}, srczhi{0-7}, mask[11], zed_selb[1]);
3159 Dat[40-47] := MX4 (dat[40-47], dstdhi{8-15}, ddathi{8-15}, dstzhi{8-15}, srczhi{8-15}, mask[12], zed_selb[1]);
3160 Dat[48-55] := MX4 (dat[48-55], dstdhi{16-23}, ddathi{16-23}, dstzhi{16-23}, srczhi{16-23}, mask[13], zed_selb[1]);
3161 Dat[56-63] := MX4 (dat[56-63], dstdhi{24-31}, ddathi{24-31}, dstzhi{24-31}, srczhi{24-31}, mask[14], zed_selb[1]);*/
3162 ////////////////////////////////////// C++ CODE //////////////////////////////////////
3163 *wdata = ((ddat & mask) | (dstd & ~mask)) & 0x00000000000000FFLL;
3164 *wdata |= ((mask & 0x0100) ? ddat : dstd) & 0x000000000000FF00LL;
3165 *wdata |= ((mask & 0x0200) ? ddat : dstd) & 0x0000000000FF0000LL;
3166 *wdata |= ((mask & 0x0400) ? ddat : dstd) & 0x00000000FF000000LL;
3167 *wdata |= ((mask & 0x0800) ? ddat : dstd) & 0x000000FF00000000LL;
3168 *wdata |= ((mask & 0x1000) ? ddat : dstd) & 0x0000FF0000000000LL;
3169 *wdata |= ((mask & 0x2000) ? ddat : dstd) & 0x00FF000000000000LL;
3170 *wdata |= ((mask & 0x4000) ? ddat : dstd) & 0xFF00000000000000LL;
3171
3172 //This is a crappy way of handling this, but it should work for now...
3173 zwdata = ((*srcz & mask) | (dstz & ~mask)) & 0x00000000000000FFLL;
3174 zwdata |= ((mask & 0x0100) ? *srcz : dstz) & 0x000000000000FF00LL;
3175 zwdata |= ((mask & 0x0200) ? *srcz : dstz) & 0x0000000000FF0000LL;
3176 zwdata |= ((mask & 0x0400) ? *srcz : dstz) & 0x00000000FF000000LL;
3177 zwdata |= ((mask & 0x0800) ? *srcz : dstz) & 0x000000FF00000000LL;
3178 zwdata |= ((mask & 0x1000) ? *srcz : dstz) & 0x0000FF0000000000LL;
3179 zwdata |= ((mask & 0x2000) ? *srcz : dstz) & 0x00FF000000000000LL;
3180 zwdata |= ((mask & 0x4000) ? *srcz : dstz) & 0xFF00000000000000LL;
3181 *srcz = zwdata;
3182 //////////////////////////////////////////////////////////////////////////////////////
3183
3184 /*Data_enab[0-1] := BUF8 (data_enab[0-1], data_ena);
3185 Datadrv[0-31] := TS (wdata[0-31], dat[0-31], data_enab[0]);
3186 Datadrv[32-63] := TS (wdata[32-63], dat[32-63], data_enab[1]);
3187
3188 Unused[0] := DUMMY (unused[0]);
3189
3190 END;*/
3191 }
3192
3193
3194 /** COMP_CTRL - Comparator output control logic *****************
3195
3196 This block is responsible for taking the comparator outputs and
3197 using them as appropriate to inhibit writes. Two methods are
3198 supported for inhibiting write data:
3199
3200 - suppression of the inner loop controlled write operation
3201 - a set of eight byte inhibit lines to write back dest data
3202
3203 The first technique is used in pixel oriented modes, the second in
3204 phrase mode, but the phrase mode form is only applicable to eight
3205 and sixteen bit pixel modes.
3206
3207 Writes can be suppressed by data being equal, by the Z comparator
3208 conditions being met, or by the bit to pixel expansion scheme.
3209
3210 Pipe-lining issues: the data derived comparator outputs are stable
3211 until the next data read, well after the affected write from this
3212 operation. However, the inner counter bits can count immediately
3213 before the ack for the last write. Therefore, it is necessary to
3214 delay bcompbit select terms by one inner loop pipe-line stage,
3215 when generating the select for the data control - the output is
3216 delayed one further tick to give it write data timing (2/34).
3217
3218 There is also a problem with computed data - the new values are
3219 calculated before the write associated with the old value has been
3220 performed. The is taken care of within the zed comparator by
3221 pipe-lining the comparator inputs where appropriate.
3222 */
3223
COMP_CTRL(uint8_t * dbinh,bool * nowrite,bool bcompen,bool big_pix,bool bkgwren,uint8_t dcomp,bool dcompen,uint8_t icount,uint8_t pixsize,bool phrase_mode,uint8_t srcd,uint8_t zcomp)3224 void COMP_CTRL(uint8_t *dbinh, bool *nowrite,
3225 bool bcompen, bool big_pix, bool bkgwren, uint8_t dcomp, bool dcompen, uint8_t icount,
3226 uint8_t pixsize, bool phrase_mode, uint8_t srcd, uint8_t zcomp)
3227 {
3228 //BEGIN
3229
3230 /*Bkgwren\ := INV1 (bkgwren\, bkgwren);
3231 Phrase_mode\ := INV1 (phrase_mode\, phrase_mode);
3232 Pixsize\[0-2] := INV2 (pixsize\[0-2], pixsize[0-2]);*/
3233
3234 /* The bit comparator bits are derived from the source data, which
3235 will have been suitably aligned for phrase mode. The contents of
3236 the inner counter are used to select which bit to use.
3237
3238 When not in phrase mode the inner count value is used to select
3239 one bit. It is assumed that the count has already occurred, so,
3240 7 selects bit 0, etc. In big-endian pixel mode, this turns round,
3241 so that a count of 7 selects bit 7.
3242
3243 In phrase mode, the eight bits are used directly, and this mode is
3244 only applicable to 8-bit pixel mode (2/34) */
3245
3246 /*Bcompselt[0-2] := EO (bcompselt[0-2], icount[0-2], big_pix);
3247 Bcompbit := MX8 (bcompbit, srcd[7], srcd[6], srcd[5],
3248 srcd[4], srcd[3], srcd[2], srcd[1], srcd[0], bcompselt[0..2]);
3249 Bcompbit\ := INV1 (bcompbit\, bcompbit);*/
3250 ////////////////////////////////////// C++ CODE //////////////////////////////////////
3251 uint8_t bcompselt = (big_pix ? ~icount : icount) & 0x07;
3252 uint8_t bitmask[8] = { 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01 };
3253 bool bcompbit = srcd & bitmask[bcompselt];
3254 bool winhibit, di0t0_1, di0t4, di1t2, di2t0_1, di2t4, di3t2;
3255 bool di4t0_1, di4t4, di5t2;
3256 bool di6t0_1, di6t4;
3257 bool di7t2;
3258
3259 //////////////////////////////////////////////////////////////////////////////////////
3260
3261 /* pipe-line the count */
3262 /*Bcompsel[0-2] := FDSYNC (bcompsel[0-2], bcompselt[0-2], step_inner, clk);
3263 Bcompbt := MX8 (bcompbitpt, srcd[7], srcd[6], srcd[5],
3264 srcd[4], srcd[3], srcd[2], srcd[1], srcd[0], bcompsel[0..2]);
3265 Bcompbitp := FD1Q (bcompbitp, bcompbitpt, clk);
3266 Bcompbitp\ := INV1 (bcompbitp\, bcompbitp);*/
3267
3268 /* For pixel mode, generate the write inhibit signal for all modes
3269 on bit inhibit, for 8 and 16 bit modes on comparator inhibit, and
3270 for 16 bit mode on Z inhibit
3271
3272 Nowrite = bcompen . /bcompbit . /phrase_mode
3273 + dcompen . dcomp[0] . /phrase_mode . pixsize = 011
3274 + dcompen . dcomp[0..1] . /phrase_mode . pixsize = 100
3275 + zcomp[0] . /phrase_mode . pixsize = 100
3276 */
3277
3278 /*Nowt0 := NAN3 (nowt[0], bcompen, bcompbit\, phrase_mode\);
3279 Nowt1 := ND6 (nowt[1], dcompen, dcomp[0], phrase_mode\, pixsize\[2], pixsize[0..1]);
3280 Nowt2 := ND7 (nowt[2], dcompen, dcomp[0..1], phrase_mode\, pixsize[2], pixsize\[0..1]);
3281 Nowt3 := NAN5 (nowt[3], zcomp[0], phrase_mode\, pixsize[2], pixsize\[0..1]);
3282 Nowt4 := NAN4 (nowt[4], nowt[0..3]);
3283 Nowrite := AN2 (nowrite, nowt[4], bkgwren\);*/
3284 ////////////////////////////////////// C++ CODE //////////////////////////////////////
3285 *nowrite = ((bcompen && !bcompbit && !phrase_mode)
3286 || (dcompen && (dcomp & 0x01) && !phrase_mode && (pixsize == 3))
3287 || (dcompen && ((dcomp & 0x03) == 0x03) && !phrase_mode && (pixsize == 4))
3288 || ((zcomp & 0x01) && !phrase_mode && (pixsize == 4)))
3289 && !bkgwren;
3290 //////////////////////////////////////////////////////////////////////////////////////
3291
3292 /*Winht := NAN3 (winht, bcompen, bcompbitp\, phrase_mode\);
3293 Winhibit := NAN4 (winhibit, winht, nowt[1..3]);*/
3294 ////////////////////////////////////// C++ CODE //////////////////////////////////////
3295 //This is the same as above, but with bcompbit delayed one tick and called 'winhibit'
3296 //Small difference: Besides the pipeline effect, it's also not using !bkgwren...
3297 // bool winhibit = (bcompen && !
3298 winhibit = (bcompen && !bcompbit && !phrase_mode)
3299 || (dcompen && (dcomp & 0x01) && !phrase_mode && (pixsize == 3))
3300 || (dcompen && ((dcomp & 0x03) == 0x03) && !phrase_mode && (pixsize == 4))
3301 || ((zcomp & 0x01) && !phrase_mode && (pixsize == 4));
3302 //////////////////////////////////////////////////////////////////////////////////////
3303
3304 /* For phrase mode, generate the byte inhibit signals for eight bit
3305 mode 011, or sixteen bit mode 100
3306 dbinh\[0] = pixsize[2] . zcomp[0]
3307 + pixsize[2] . dcomp[0] . dcomp[1] . dcompen
3308 + /pixsize[2] . dcomp[0] . dcompen
3309 + /srcd[0] . bcompen
3310
3311 Inhibits 0-3 are also used when not in phrase mode to write back
3312 destination data.
3313 */
3314
3315 /*Srcd\[0-7] := INV1 (srcd\[0-7], srcd[0-7]);
3316
3317 Di0t0 := NAN2H (di0t[0], pixsize[2], zcomp[0]);
3318 Di0t1 := NAN4H (di0t[1], pixsize[2], dcomp[0..1], dcompen);
3319 Di0t2 := NAN2 (di0t[2], srcd\[0], bcompen);
3320 Di0t3 := NAN3 (di0t[3], pixsize\[2], dcomp[0], dcompen);
3321 Di0t4 := NAN4 (di0t[4], di0t[0..3]);
3322 Dbinh[0] := ANR1P (dbinh\[0], di0t[4], phrase_mode, winhibit);*/
3323 ////////////////////////////////////// C++ CODE //////////////////////////////////////
3324 *dbinh = 0;
3325 di0t0_1 = ((pixsize & 0x04) && (zcomp & 0x01))
3326 || ((pixsize & 0x04) && (dcomp & 0x01) && (dcomp & 0x02) && dcompen);
3327 di0t4 = di0t0_1
3328 || (!(srcd & 0x01) && bcompen)
3329 || (!(pixsize & 0x04) && (dcomp & 0x01) && dcompen);
3330 *dbinh |= (!((di0t4 && phrase_mode) || winhibit) ? 0x01 : 0x00);
3331 //////////////////////////////////////////////////////////////////////////////////////
3332
3333 /*Di1t0 := NAN3 (di1t[0], pixsize\[2], dcomp[1], dcompen);
3334 Di1t1 := NAN2 (di1t[1], srcd\[1], bcompen);
3335 Di1t2 := NAN4 (di1t[2], di0t[0..1], di1t[0..1]);
3336 Dbinh[1] := ANR1 (dbinh\[1], di1t[2], phrase_mode, winhibit);*/
3337 ////////////////////////////////////// C++ CODE //////////////////////////////////////
3338 di1t2 = di0t0_1
3339 || (!(srcd & 0x02) && bcompen)
3340 || (!(pixsize & 0x04) && (dcomp & 0x02) && dcompen);
3341 *dbinh |= (!((di1t2 && phrase_mode) || winhibit) ? 0x02 : 0x00);
3342 //////////////////////////////////////////////////////////////////////////////////////
3343
3344 /*Di2t0 := NAN2H (di2t[0], pixsize[2], zcomp[1]);
3345 Di2t1 := NAN4H (di2t[1], pixsize[2], dcomp[2..3], dcompen);
3346 Di2t2 := NAN2 (di2t[2], srcd\[2], bcompen);
3347 Di2t3 := NAN3 (di2t[3], pixsize\[2], dcomp[2], dcompen);
3348 Di2t4 := NAN4 (di2t[4], di2t[0..3]);
3349 Dbinh[2] := ANR1 (dbinh\[2], di2t[4], phrase_mode, winhibit);*/
3350 ////////////////////////////////////// C++ CODE //////////////////////////////////////
3351 //[bcompen=F dcompen=T phrase_mode=T bkgwren=F][nw=F wi=F]
3352 //[di0t0_1=F di0t4=F][di1t2=F][di2t0_1=T di2t4=T][di3t2=T][di4t0_1=F di2t4=F][di5t2=F][di6t0_1=F di6t4=F][di7t2=F]
3353 //[dcomp=$00 dbinh=$0C][7804780400007804] (icount=0005, inc=4)
3354 di2t0_1 = ((pixsize & 0x04) && (zcomp & 0x02))
3355 || ((pixsize & 0x04) && (dcomp & 0x04) && (dcomp & 0x08) && dcompen);
3356 di2t4 = di2t0_1
3357 || (!(srcd & 0x04) && bcompen)
3358 || (!(pixsize & 0x04) && (dcomp & 0x04) && dcompen);
3359 *dbinh |= (!((di2t4 && phrase_mode) || winhibit) ? 0x04 : 0x00);
3360 //////////////////////////////////////////////////////////////////////////////////////
3361
3362 /*Di3t0 := NAN3 (di3t[0], pixsize\[2], dcomp[3], dcompen);
3363 Di3t1 := NAN2 (di3t[1], srcd\[3], bcompen);
3364 Di3t2 := NAN4 (di3t[2], di2t[0..1], di3t[0..1]);
3365 Dbinh[3] := ANR1 (dbinh\[3], di3t[2], phrase_mode, winhibit);*/
3366 ////////////////////////////////////// C++ CODE //////////////////////////////////////
3367 di3t2 = di2t0_1
3368 || (!(srcd & 0x08) && bcompen)
3369 || (!(pixsize & 0x04) && (dcomp & 0x08) && dcompen);
3370 *dbinh |= (!((di3t2 && phrase_mode) || winhibit) ? 0x08 : 0x00);
3371 //////////////////////////////////////////////////////////////////////////////////////
3372
3373 /*Di4t0 := NAN2H (di4t[0], pixsize[2], zcomp[2]);
3374 Di4t1 := NAN4H (di4t[1], pixsize[2], dcomp[4..5], dcompen);
3375 Di4t2 := NAN2 (di4t[2], srcd\[4], bcompen);
3376 Di4t3 := NAN3 (di4t[3], pixsize\[2], dcomp[4], dcompen);
3377 Di4t4 := NAN4 (di4t[4], di4t[0..3]);
3378 Dbinh[4] := NAN2 (dbinh\[4], di4t[4], phrase_mode);*/
3379 ////////////////////////////////////// C++ CODE //////////////////////////////////////
3380 di4t0_1 = ((pixsize & 0x04) && (zcomp & 0x04))
3381 || ((pixsize & 0x04) && (dcomp & 0x10) && (dcomp & 0x20) && dcompen);
3382 di4t4 = di4t0_1
3383 || (!(srcd & 0x10) && bcompen)
3384 || (!(pixsize & 0x04) && (dcomp & 0x10) && dcompen);
3385 *dbinh |= (!(di4t4 && phrase_mode) ? 0x10 : 0x00);
3386 //////////////////////////////////////////////////////////////////////////////////////
3387
3388 /*Di5t0 := NAN3 (di5t[0], pixsize\[2], dcomp[5], dcompen);
3389 Di5t1 := NAN2 (di5t[1], srcd\[5], bcompen);
3390 Di5t2 := NAN4 (di5t[2], di4t[0..1], di5t[0..1]);
3391 Dbinh[5] := NAN2 (dbinh\[5], di5t[2], phrase_mode);*/
3392 ////////////////////////////////////// C++ CODE //////////////////////////////////////
3393 di5t2 = di4t0_1
3394 || (!(srcd & 0x20) && bcompen)
3395 || (!(pixsize & 0x04) && (dcomp & 0x20) && dcompen);
3396 *dbinh |= (!(di5t2 && phrase_mode) ? 0x20 : 0x00);
3397 //////////////////////////////////////////////////////////////////////////////////////
3398
3399 /*Di6t0 := NAN2H (di6t[0], pixsize[2], zcomp[3]);
3400 Di6t1 := NAN4H (di6t[1], pixsize[2], dcomp[6..7], dcompen);
3401 Di6t2 := NAN2 (di6t[2], srcd\[6], bcompen);
3402 Di6t3 := NAN3 (di6t[3], pixsize\[2], dcomp[6], dcompen);
3403 Di6t4 := NAN4 (di6t[4], di6t[0..3]);
3404 Dbinh[6] := NAN2 (dbinh\[6], di6t[4], phrase_mode);*/
3405 ////////////////////////////////////// C++ CODE //////////////////////////////////////
3406 di6t0_1 = ((pixsize & 0x04) && (zcomp & 0x08))
3407 || ((pixsize & 0x04) && (dcomp & 0x40) && (dcomp & 0x80) && dcompen);
3408 di6t4 = di6t0_1
3409 || (!(srcd & 0x40) && bcompen)
3410 || (!(pixsize & 0x04) && (dcomp & 0x40) && dcompen);
3411 *dbinh |= (!(di6t4 && phrase_mode) ? 0x40 : 0x00);
3412 //////////////////////////////////////////////////////////////////////////////////////
3413
3414 /*Di7t0 := NAN3 (di7t[0], pixsize\[2], dcomp[7], dcompen);
3415 Di7t1 := NAN2 (di7t[1], srcd\[7], bcompen);
3416 Di7t2 := NAN4 (di7t[2], di6t[0..1], di7t[0..1]);
3417 Dbinh[7] := NAN2 (dbinh\[7], di7t[2], phrase_mode);*/
3418 ////////////////////////////////////// C++ CODE //////////////////////////////////////
3419 di7t2 = di6t0_1
3420 || (!(srcd & 0x80) && bcompen)
3421 || (!(pixsize & 0x04) && (dcomp & 0x80) && dcompen);
3422 *dbinh |= (!(di7t2 && phrase_mode) ? 0x80 : 0x00);
3423 //////////////////////////////////////////////////////////////////////////////////////
3424
3425 //END;
3426 //kludge
3427 *dbinh = ~*dbinh;
3428 }
3429
3430 #endif
3431