1 /* Mednafen - Multi-system Emulator
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 */
17
18 /* MJPEG-ish decoder based on the algorithm and huffman data tables provided by David Michel of MagicEngine and MagicEngine-FX */
19
20 #include "pcfx.h"
21 #include "rainbow.h"
22 #include "king.h"
23 #include "interrupt.h"
24 #include "jrevdct.h"
25 #include "../clamp.h"
26
27 static bool ChromaIP; // Bilinearly interpolate chroma channel
28
29 /* Y = luminance/luma, UV = chrominance/chroma */
30
31 typedef struct
32 {
33 const uint8 *base;
34 const uint8 *codes;
35 const uint32 *minimum;
36 const uint32 *maximum;
37 } HuffmanTable;
38
39 typedef struct
40 {
41 uint8 *lut; // LUT for getting the code.
42 uint8 *lut_bits; // Bit count for the code
43 } HuffmanQuickLUT;
44
45 /* Luma DC Huffman tables */
46 static const uint8 dc_y_base[17] =
47 {
48 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x08, 0x09,
49 0x00, 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
50 0x00
51 };
52
53 static const uint8 dc_y_codes[27] =
54 {
55 0x00, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x01,
56 0x08, 0x09, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14,
57 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C,
58 0x1D, 0x1E, 0x1F
59 };
60
61 static const uint32 dc_y_minimum[17] =
62 {
63 0x0000, 0x0000, 0x0000, 0x0000, 0x000E, 0x0000, 0x003C, 0x007A,
64 0x0000, 0x01F0, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
65 0x0000
66 };
67
68 static const uint32 dc_y_maximum[17] =
69 {
70 0xFFFF, 0xFFFF, 0xFFFF, 0x0006, 0x000E, 0xFFFF, 0x003C, 0x007B,
71 0xFFFF, 0x01FF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
72 0xFFFF
73 };
74
75 static const HuffmanTable dc_y_table =
76 {
77 dc_y_base,
78 dc_y_codes,
79 dc_y_minimum,
80 dc_y_maximum
81 };
82
83 /* Chroma DC Huffman tables */
84
85 static const uint8 dc_uv_base[17] =
86 {
87 0x00, 0x00, 0x00, 0x03, 0x04, 0x05, 0x06, 0x07,
88 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
89 0x00
90 };
91
92 static const uint8 dc_uv_codes[10] =
93 {
94 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
95 0x08, 0x09
96 };
97
98 static const uint32 dc_uv_minimum[17] =
99 {
100 0x0000, 0x0000, 0x0000, 0x0006, 0x000E, 0x001E, 0x003E, 0x007E,
101 0x00FE, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
102 0x0000
103 };
104
105 static const uint32 dc_uv_maximum[17] =
106 {
107 0xFFFF, 0xFFFF, 0x0002, 0x0006, 0x000E, 0x001E, 0x003E, 0x007E,
108 0x00FF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
109 0xFFFF
110 };
111
112 static const HuffmanTable dc_uv_table =
113 {
114 dc_uv_base,
115 dc_uv_codes,
116 dc_uv_minimum,
117 dc_uv_maximum
118 };
119
120 /* Luma AC Huffman tables */
121 static const uint8 ac_y_base[17] =
122 {
123 0x00, 0x00, 0x00, 0x02, 0x03, 0x05, 0x09, 0x0D,
124 0x00, 0x10, 0x00, 0x12, 0x22, 0x00, 0x00, 0x00,
125 0x00
126 };
127
128 static const uint8 ac_y_codes[146] =
129 {
130 0x01, 0x02, 0x03, 0x04, 0x11, 0x05, 0x12, 0x21,
131 0x00, 0x06, 0x31, 0x41, 0x51, 0x13, 0x22, 0x61,
132 0x07, 0x71, 0x09, 0x19, 0x29, 0x39, 0x49, 0x59,
133 0x69, 0x79, 0x89, 0x99, 0xA9, 0xB9, 0xC9, 0xD9,
134 0xE9, 0xF9, 0x08, 0x14, 0x15, 0x16, 0x17, 0x18,
135 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x32, 0x33,
136 0x34, 0x35, 0x36, 0x37, 0x38, 0x42, 0x43, 0x44,
137 0x45, 0x46, 0x47, 0x48, 0x52, 0x53, 0x54, 0x55,
138 0x56, 0x57, 0x58, 0x62, 0x63, 0x64, 0x65, 0x66,
139 0x67, 0x68, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
140 0x78, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
141 0x88, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
142 0x98, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
143 0xA8, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7,
144 0xB8, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
145 0xC8, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
146 0xD8, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
147 0xE8, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
148 0xF8, 0x10
149 };
150
151 static const uint32 ac_y_minimum[17] =
152 {
153 0x0000, 0x0000, 0x0000, 0x0004, 0x000A, 0x0018, 0x0036, 0x0074,
154 0x0000, 0x01DC, 0x0000, 0x0778, 0x0F10, 0x0000, 0x0000, 0x0000,
155 0x0000
156 };
157
158 static const uint32 ac_y_maximum[17] =
159 {
160 0xFFFF, 0xFFFF, 0x0001, 0x0004, 0x000B, 0x001A, 0x0039, 0x0076,
161 0xFFFF, 0x01DD, 0xFFFF, 0x0787, 0x0F7F, 0xFFFF, 0xFFFF, 0xFFFF,
162 0xFFFF
163 };
164
165 static const HuffmanTable ac_y_table =
166 {
167 ac_y_base,
168 ac_y_codes,
169 ac_y_minimum,
170 ac_y_maximum
171 };
172
173 /* Chroma AC Huffman tables */
174 static const uint8 ac_uv_base[17] =
175 {
176 0x00, 0x00, 0x00, 0x02, 0x03, 0x05, 0x0A, 0x0B,
177 0x00, 0x10, 0x00, 0x12, 0x22, 0x00, 0x00, 0x00,
178 0x00
179 };
180
181 static const uint8 ac_uv_codes[146] =
182 {
183 0x01, 0x02, 0x11, 0x03, 0x21, 0x04, 0x12, 0x31,
184 0x41, 0x00, 0x51, 0x05, 0x13, 0x22, 0x61, 0x71,
185 0x32, 0x81, 0x09, 0x19, 0x29, 0x39, 0x49, 0x59,
186 0x69, 0x79, 0x89, 0x99, 0xA9, 0xB9, 0xC9, 0xD9,
187 0xE9, 0xF9, 0x06, 0x07, 0x08, 0x14, 0x15, 0x16,
188 0x17, 0x18, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
189 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x42, 0x43,
190 0x44, 0x45, 0x46, 0x47, 0x48, 0x52, 0x53, 0x54,
191 0x55, 0x56, 0x57, 0x58, 0x62, 0x63, 0x64, 0x65,
192 0x66, 0x67, 0x68, 0x72, 0x73, 0x74, 0x75, 0x76,
193 0x77, 0x78, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
194 0x88, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
195 0x98, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
196 0xA8, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7,
197 0xB8, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
198 0xC8, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
199 0xD8, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
200 0xE8, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
201 0xF8, 0x10
202 };
203
204 static const uint32 ac_uv_minimum[17] =
205 {
206 0x0000, 0x0000, 0x0000, 0x0004, 0x000A, 0x0018, 0x0038, 0x0072,
207 0x0000, 0x01DC, 0x0000, 0x0778, 0x0F10, 0x0000, 0x0000, 0x0000,
208 0x0000
209 };
210
211 static const uint32 ac_uv_maximum[17] =
212 {
213 0xFFFF, 0xFFFF, 0x0001, 0x0004, 0x000B, 0x001B, 0x0038, 0x0076,
214 0xFFFF, 0x01DD, 0xFFFF, 0x0787, 0x0F7F, 0xFFFF, 0xFFFF, 0xFFFF,
215 0xFFFF
216 };
217
218 static const HuffmanTable ac_uv_table =
219 {
220 ac_uv_base,
221 ac_uv_codes,
222 ac_uv_minimum,
223 ac_uv_maximum
224 };
225
226 static const uint8 zigzag[63] =
227 {
228 0x01, 0x08, 0x10, 0x09, 0x02, 0x03, 0x0A, 0x11,
229 0x18, 0x20, 0x19, 0x12, 0x0B, 0x04, 0x05, 0x0C,
230 0x13, 0x1A, 0x21, 0x28, 0x30, 0x29, 0x22, 0x1B,
231 0x14, 0x0D, 0x06, 0x07, 0x0E, 0x15, 0x1C, 0x23,
232 0x2A, 0x31, 0x38, 0x39, 0x32, 0x2B, 0x24, 0x1D,
233 0x16, 0x0F, 0x17, 0x1E, 0x25, 0x2C, 0x33, 0x3A,
234 0x3B, 0x34, 0x2D, 0x26, 0x1F, 0x27, 0x2E, 0x35,
235 0x3C, 0x3D, 0x36, 0x2F, 0x37, 0x3E, 0x3F
236 };
237
238 static HuffmanQuickLUT dc_y_qlut = { NULL }, dc_uv_qlut = { NULL}, ac_y_qlut = { NULL }, ac_uv_qlut = { NULL };
239
KillHuffmanLUT(HuffmanQuickLUT * qlut)240 static void KillHuffmanLUT(HuffmanQuickLUT *qlut)
241 {
242 if(qlut->lut)
243 free(qlut->lut);
244
245 if(qlut->lut_bits)
246 free(qlut->lut_bits);
247
248 qlut->lut = NULL;
249 qlut->lut_bits = NULL;
250 }
251
BuildHuffmanLUT(const HuffmanTable * table,HuffmanQuickLUT * qlut,const int bitmax)252 static bool BuildHuffmanLUT(const HuffmanTable *table, HuffmanQuickLUT *qlut, const int bitmax)
253 {
254 // TODO: Allocate only (1 << bitmax) entries.
255 // TODO: What should we set invalid bitsequences/entries to? 0? ~0? Something else?
256
257 if(!(qlut->lut = (uint8 *)calloc(1 << 12, 1)))
258 return(FALSE);
259
260 if(!(qlut->lut_bits = (uint8 *)calloc(1 << 12, 1)))
261 return(FALSE);
262
263 for(int numbits = 2; numbits <= 12; numbits++)
264 {
265 if(table->maximum[numbits] != 0xFFFF)
266 {
267 for(unsigned int i = table->minimum[numbits]; i <= table->maximum[numbits]; i++)
268 {
269 for(int b = 0; b < (1 << (bitmax - numbits)); b++)
270 {
271 int lut_index = (i << (bitmax - numbits)) + b;
272
273 assert(lut_index < (1 << bitmax));
274
275 qlut->lut[lut_index] = table->codes[table->base[numbits] + (i - table->minimum[numbits])];
276 qlut->lut_bits[lut_index] = numbits;
277 }
278 }
279 }
280 }
281
282 //printf("\n\n%d\n", bitmax);
283 for(int i = 0; i < (1 << bitmax); i++)
284 {
285 //if(!qlut->lut_bits[i])
286 // printf("%d %d\n", i, qlut->lut_bits[i]);
287 }
288
289 return(TRUE);
290 }
291
292
293 static uint8 *DecodeBuffer[2] = { NULL, NULL };
294 static int32 DecodeFormat[2]; // The format each buffer is in(-1 = invalid, 0 = palettized 8-bit, 1 = YUV)
295 static uint32 QuantTables[2][64], QuantTablesBase[2][64]; // 0 = Y, 1 = UV
296
297 static uint32 DecodeBufferWhichRead;
298
299 static int32 RasterReadPos;
300 static uint16 Control;
301 static uint16 NullRunY, NullRunU, NullRunV, HSync;
302 static uint16 HScroll;
303
304 static uint32 bits_buffer;
305 static uint32 bits_buffered_bits;
306 static int32 bits_bytes_left;
307
InitBits(int32 bcount)308 static void InitBits(int32 bcount)
309 {
310 bits_bytes_left = bcount;
311 bits_buffer = 0;
312 bits_buffered_bits = 0;
313 }
314
FetchWidgywabbit(void)315 static INLINE uint8 FetchWidgywabbit(void)
316 {
317 if(bits_bytes_left <= 0)
318 return(0);
319
320 uint8 ret = KING_RB_Fetch();
321 if(ret == 0xFF)
322 KING_RB_Fetch();
323
324 bits_bytes_left--;
325
326 return(ret);
327 }
328
329 enum
330 {
331 MDFNBITS_PEEK = 1,
332 MDFNBITS_FUNNYSIGN = 2,
333 };
334
GetBits(const unsigned int count,const unsigned int how=0)335 static INLINE uint32 GetBits(const unsigned int count, const unsigned int how = 0)
336 {
337 uint32 ret;
338
339 while(bits_buffered_bits < count)
340 {
341 bits_buffer <<= 8;
342 bits_buffer |= FetchWidgywabbit();
343 bits_buffered_bits += 8;
344 }
345
346 ret = (bits_buffer >> (bits_buffered_bits - count)) & ((1 << count) - 1);
347
348 if(!(how & MDFNBITS_PEEK))
349 bits_buffered_bits -= count;
350
351 if((how & MDFNBITS_FUNNYSIGN) && count)
352 {
353 if(ret < (1U << (count - 1)))
354 ret += 1 - (1 << count);
355 }
356
357 return(ret);
358 }
359
360 // Note: SkipBits is intended to be called right after GetBits() with "how" MDFNBITS_PEEK,
361 // and the count pass to SkipBits must be less than or equal to the count passed to GetBits().
SkipBits(const unsigned int count)362 static INLINE void SkipBits(const unsigned int count)
363 {
364 bits_buffered_bits -= count;
365 }
366
367
368 static uint32 HappyColor; // Cached, calculated from null run yuv registers;
CalcHappyColor(void)369 static void CalcHappyColor(void)
370 {
371 uint8 y_c = (int16)NullRunY + 0x80;
372 uint8 u_c = (int16)NullRunU + 0x80;
373 uint8 v_c = (int16)NullRunV + 0x80;
374
375 HappyColor = (y_c << 16) | (u_c << 8) | (v_c << 0);
376 }
377
get_ac_coeff(const HuffmanQuickLUT * table,int32 * zeroes)378 static uint32 get_ac_coeff(const HuffmanQuickLUT *table, int32 *zeroes)
379 {
380 unsigned int numbits;
381 uint32 rawbits;
382 uint32 code;
383
384 rawbits = GetBits(12, MDFNBITS_PEEK);
385 if((rawbits & 0xF80) == 0xF80)
386 //if(rawbits >= 0xF80)
387 {
388 SkipBits(5);
389 *zeroes = 0;
390 return(0);
391 }
392
393 if(!table->lut_bits[rawbits])
394 {
395 FXDBG("Invalid AC bit sequence: %03x\n", rawbits);
396 }
397
398 code = table->lut[rawbits];
399 SkipBits(table->lut_bits[rawbits]);
400
401 numbits = code & 0xF;
402 *zeroes = code >> 4;
403
404 return(GetBits(numbits, MDFNBITS_FUNNYSIGN));
405 }
406
get_dc_coeff(const HuffmanQuickLUT * table,int32 * zeroes,int maxbits)407 static uint32 get_dc_coeff(const HuffmanQuickLUT *table, int32 *zeroes, int maxbits)
408 {
409 uint32 code;
410
411 for(;;)
412 {
413 uint32 rawbits = GetBits(maxbits, MDFNBITS_PEEK);
414
415 if(!table->lut_bits[rawbits])
416 {
417 FXDBG("Invalid DC bit sequence: %03x\n", rawbits);
418 }
419
420 code = table->lut[rawbits];
421 SkipBits(table->lut_bits[rawbits]);
422
423 if(code < 0xF)
424 {
425 *zeroes = 0;
426 return(GetBits(code, MDFNBITS_FUNNYSIGN));
427 }
428 else if(code == 0xF)
429 {
430 get_ac_coeff(&ac_y_qlut, zeroes);
431 (*zeroes)++;
432 return(0);
433 }
434 else if(code >= 0x10)
435 {
436 code -= 0x10;
437
438 for(int i = 0; i < 64; i++)
439 {
440 // Y
441 uint32 coeff = (QuantTablesBase[0][i] * code) >> 2;
442
443 if(coeff < 1)
444 coeff = 1;
445 else if(coeff > 0xFE)
446 coeff = 0xFE;
447
448 QuantTables[0][i] = coeff;
449
450 // UV
451 if(i)
452 coeff = (QuantTablesBase[1][i] * code) >> 2;
453 else
454 coeff = (QuantTablesBase[1][i]) >> 2;
455
456 if(coeff < 1)
457 coeff = 1;
458 else if(coeff > 0xFE)
459 coeff = 0xFE;
460
461 QuantTables[1][i] = coeff;
462 }
463
464 }
465 }
466
467 }
468
get_dc_y_coeff(int32 * zeroes)469 static INLINE uint32 get_dc_y_coeff(int32 *zeroes)
470 {
471 return(get_dc_coeff(&dc_y_qlut, zeroes, 9));
472 }
473
get_dc_uv_coeff(void)474 static uint32 get_dc_uv_coeff(void)
475 {
476 const HuffmanQuickLUT *table = &dc_uv_qlut;
477 uint32 code;
478 uint32 rawbits = GetBits(8, MDFNBITS_PEEK);
479
480 code = table->lut[rawbits];
481 SkipBits(table->lut_bits[rawbits]);
482
483 return(GetBits(code, MDFNBITS_FUNNYSIGN));
484 }
485
486
decode(int32 * dct,const uint32 * QuantTable,const int32 dc,const HuffmanQuickLUT * table)487 static void decode(int32 *dct, const uint32 *QuantTable, const int32 dc, const HuffmanQuickLUT *table)
488 {
489 int32 coeff;
490 int32 zeroes;
491 int count;
492 int index;
493
494 dct[0] = (int16)(QuantTable[0] * dc);
495 count = 0;
496
497 do
498 {
499 coeff = get_ac_coeff(table, &zeroes);
500 if(!coeff)
501 {
502 if(!zeroes)
503 {
504 while(count < 63)
505 {
506 dct[zigzag[count++]] = 0;
507 }
508 break;
509 }
510 else if(zeroes == 1)
511 zeroes = 0xF;
512 }
513
514 while(zeroes-- && count < 63)
515 {
516 dct[zigzag[count++]] = 0;
517 }
518 zeroes = 0;
519 if(count < 63)
520 {
521 index = zigzag[count++];
522 dct[index] = (int16)(QuantTable[index] * coeff);
523 }
524 } while(count < 63);
525
526 }
527
528 static uint32 LastLine[256];
529 static bool FirstDecode;
530 static bool GarbageData;
531
RAINBOW_Init(bool arg_ChromaIP)532 bool RAINBOW_Init(bool arg_ChromaIP)
533 {
534 ChromaIP = arg_ChromaIP;
535
536 for(int i = 0; i < 2; i++)
537 {
538 if(!(DecodeBuffer[i] = (uint8*)malloc(0x2000 * 4)))
539 return(0);
540 memset(DecodeBuffer[i], 0, 0x2000 * 4);
541 }
542
543 if(!BuildHuffmanLUT(&dc_y_table, &dc_y_qlut, 9))
544 return(FALSE);
545
546 if(!BuildHuffmanLUT(&dc_uv_table, &dc_uv_qlut, 8))
547 return(FALSE);
548
549 if(!BuildHuffmanLUT(&ac_y_table, &ac_y_qlut, 12))
550 return(FALSE);
551
552 if(!BuildHuffmanLUT(&ac_uv_table, &ac_uv_qlut, 12))
553 return(FALSE);
554
555 DecodeFormat[0] = DecodeFormat[1] = -1;
556 DecodeBufferWhichRead = 0;
557 GarbageData = FALSE;
558 FirstDecode = TRUE;
559 RasterReadPos = 0;
560
561 return(1);
562 }
563
RAINBOW_Close(void)564 void RAINBOW_Close(void)
565 {
566 for(int i = 0; i < 2; i++)
567 if(DecodeBuffer[i])
568 {
569 free(DecodeBuffer[i]);
570 DecodeBuffer[i] = NULL;
571 }
572
573 KillHuffmanLUT(&dc_y_qlut);
574 KillHuffmanLUT(&dc_uv_qlut);
575 KillHuffmanLUT(&ac_y_qlut);
576 KillHuffmanLUT(&ac_uv_qlut);
577 }
578
579 // RAINBOW base I/O port address: 0x200
580
581 #define REGSETBOFW(_reg, _data, _wb) { (_reg) &= ~(0xFF << ((_wb) * 8)); (_reg) |= (_data) << ((_wb) * 8); }
582 #define REGSETBOFH REGSETBOFW
583
584 // The horizontal scroll register is set up kind of weird...D0-D7 of writes to $200 set the lower byte, D0-D7 of writes to $202 set the upper byte
585
RAINBOW_Write8(uint32 A,uint8 V)586 void RAINBOW_Write8(uint32 A, uint8 V)
587 {
588 //printf("RAINBOW Wr8: %08x %02x\n", A, V);
589 switch(A & 0x1C)
590 {
591 case 0x00: REGSETBOFH(HScroll, V, (A & 0x2) >> 1); HScroll &= 0x1FF; break;
592 case 0x04: REGSETBOFH(Control, V, A & 0x3); break;
593 case 0x08: REGSETBOFH(NullRunY, V, A & 0x3); CalcHappyColor(); break;
594 case 0x0C: REGSETBOFH(NullRunU, V, A & 0x3); CalcHappyColor(); break;
595 case 0x10: REGSETBOFH(NullRunV, V, A & 0x3); CalcHappyColor(); break;
596 case 0x14: REGSETBOFH(HSync, V, A & 0x3); break;
597 }
598 }
599
RAINBOW_Write16(uint32 A,uint16 V)600 void RAINBOW_Write16(uint32 A, uint16 V)
601 {
602 int msh = A & 0x2;
603
604 //printf("RAINBOW Wr16: %08x %04x\n", A, V);
605 switch(A & 0x1C)
606 {
607 case 0x00: REGSETBOFH(HScroll, V & 0xFF, (A & 0x2) >> 1); HScroll &= 0x1FF; break;
608 case 0x04: REGSETHW(Control, V, msh); break;
609 case 0x08: REGSETHW(NullRunY, V, msh); CalcHappyColor(); break;
610 case 0x0C: REGSETHW(NullRunU, V, msh); CalcHappyColor(); break;
611 case 0x10: REGSETHW(NullRunV, V, msh); CalcHappyColor(); break;
612 case 0x14: REGSETHW(HSync, V, msh); break;
613 }
614 }
615
RAINBOW_ForceTransferReset(void)616 void RAINBOW_ForceTransferReset(void)
617 {
618 RasterReadPos = 0;
619 DecodeFormat[0] = DecodeFormat[1] = -1;
620 }
621
RAINBOW_SwapBuffers(void)622 void RAINBOW_SwapBuffers(void)
623 {
624 DecodeBufferWhichRead ^= 1;
625 RasterReadPos = 0;
626 }
627
RAINBOW_DecodeBlock(bool arg_FirstDecode,bool Skip)628 void RAINBOW_DecodeBlock(bool arg_FirstDecode, bool Skip)
629 {
630 uint8 block_type;
631 int32 block_size;
632 int icount;
633 int which_buffer = DecodeBufferWhichRead ^ 1;
634
635 if(!(Control & 0x01))
636 {
637 puts("Rainbow decode when disabled!!");
638 return;
639 }
640
641 if(arg_FirstDecode)
642 {
643 FirstDecode = TRUE;
644 GarbageData = FALSE;
645 }
646
647 if(GarbageData)
648 icount = 0;
649 else
650 icount = 0x200;
651
652 do
653 {
654 do
655 {
656 while(KING_RB_Fetch() != 0xFF && icount > 0)
657 icount--;
658
659 block_type = KING_RB_Fetch();
660 //if(icount > 0 && block_type != 0xF0 && block_type != 0xF1 && block_type != 0xF2 && block_type != 0xF3 && block_type != 0xF8 && block_type != 0xFF)
661 //if(icount > 0 && block_type == 0x11)
662 // printf("%02x\n", block_type);
663 icount--;
664 } while(block_type != 0xF0 && block_type != 0xF1 && block_type != 0xF2 && block_type != 0xF3 && block_type != 0xF8 && block_type != 0xFF && icount > 0);
665
666 {
667 uint16 tmp;
668
669 tmp = KING_RB_Fetch() << 8;
670 tmp |= KING_RB_Fetch() << 0;
671
672 block_size = (int16)tmp;
673 }
674
675 block_size -= 2;
676 if(block_type == 0xFF && block_size <= 0)
677 for(int i = 0; i < 128; i++,icount--) KING_RB_Fetch();
678
679 //fprintf(stderr, "Block: %d\n", block_size);
680 } while(block_size <= 0 && icount > 0);
681
682 //if(!GarbageData && icount < 500)
683 //{
684 // FXDBG("Partial garbage data. %d", icount);
685 //}
686
687 //printf("%d\n", icount);
688 if(icount <= 0)
689 {
690 FXDBG("Garbage data.");
691 GarbageData = TRUE;
692 //printf("Dooom: %d\n");
693 DecodeFormat[which_buffer] = 0;
694 memset(DecodeBuffer[which_buffer], 0, 0x2000);
695 goto BufferNoDecode;
696 }
697
698 if(block_type == 0xf8 || block_type == 0xff)
699 DecodeFormat[which_buffer] = 1;
700 else
701 DecodeFormat[which_buffer] = 0;
702
703 if(block_type == 0xF8 || block_type == 0xFF)
704 {
705 if(block_type == 0xFF)
706 {
707 for(int q = 0; q < 2; q++)
708 for(int i = 0; i < 64; i++)
709 {
710 uint8 meow = KING_RB_Fetch();
711
712 QuantTables[q][i] = meow;
713 QuantTablesBase[q][i] = meow;
714 }
715 block_size -= 128;
716 }
717
718 InitBits(block_size);
719
720 int32 dc_y = 0, dc_u = 0, dc_v = 0;
721 uint32 *dest_base = (uint32 *)DecodeBuffer[which_buffer];
722 for(int column = 0; column < 16; column++)
723 {
724 uint32 *dest_base_column = &dest_base[column * 16];
725 int32 zeroes = 0;
726
727 dc_y += get_dc_y_coeff(&zeroes);
728
729 if(zeroes) // If set, clear the number of columns
730 {
731 do
732 {
733 if(column < 16)
734 {
735 dest_base_column = &dest_base[column * 16];
736
737 for(int y = 0; y < 16; y++)
738 for(int x = 0; x < 16; x++)
739 dest_base_column[y * 256 + x] = HappyColor;
740 }
741 column++;
742 zeroes--;
743 } while(zeroes);
744 column--; // Fix for the column autoincrement in the while(zeroes) loop
745 dc_y = dc_u = dc_v = 0;
746 }
747 else
748 {
749 int32 dct_y[256];
750 int32 dct_u[64];
751 int32 dct_v[64];
752
753 // Y/Luma, 16x16 components
754 // ---------
755 // | A | C |
756 // |-------|
757 // | B | D |
758 // ---------
759 // A (0, 0)
760 decode(&dct_y[0x00], QuantTables[0], dc_y, &ac_y_qlut);
761
762 // B (0, 1)
763 dc_y += get_dc_y_coeff(&zeroes);
764 decode(&dct_y[0x40], QuantTables[0], dc_y, &ac_y_qlut);
765
766 // C (1, 0)
767 dc_y += get_dc_y_coeff(&zeroes);
768 decode(&dct_y[0x80], QuantTables[0], dc_y, &ac_y_qlut);
769
770 // D (1, 1)
771 dc_y += get_dc_y_coeff(&zeroes);
772 decode(&dct_y[0xC0], QuantTables[0], dc_y, &ac_y_qlut);
773
774 // U, 8x8 components
775 dc_u += get_dc_uv_coeff();
776 decode(&dct_u[0x00], QuantTables[1], dc_u, &ac_uv_qlut);
777
778 // V, 8x8 components
779 dc_v += get_dc_uv_coeff();
780 decode(&dct_v[0x00], QuantTables[1], dc_v, &ac_uv_qlut);
781
782 if(Skip)
783 continue;
784
785 j_rev_dct(&dct_y[0x00]);
786 j_rev_dct(&dct_y[0x40]);
787 j_rev_dct(&dct_y[0x80]);
788 j_rev_dct(&dct_y[0xC0]);
789 j_rev_dct(&dct_u[0x00]);
790 j_rev_dct(&dct_v[0x00]);
791
792 for(int y = 0; y < 16; y++)
793 for(int x = 0; x < 16; x++)
794 dest_base_column[y * 256 + x] = clamp_to_u8(dct_y[y * 8 + (x & 0x7) + ((x & 0x8) << 4)] + 0x80) << 16;
795
796 if(!ChromaIP)
797 {
798 for(int y = 0; y < 8; y++)
799 {
800 for(int x = 0; x < 8; x++)
801 {
802 uint32 component_uv = (clamp_to_u8(dct_u[y * 8 + x] + 0x80) << 8) | clamp_to_u8(dct_v[y * 8 + x] + 0x80);
803 dest_base_column[y * 512 + (256 * 0) + x * 2 + 0] |= component_uv;
804 dest_base_column[y * 512 + (256 * 0) + x * 2 + 1] |= component_uv;
805 dest_base_column[y * 512 + (256 * 1) + x * 2 + 0] |= component_uv;
806 dest_base_column[y * 512 + (256 * 1) + x * 2 + 1] |= component_uv;
807 }
808 }
809 }
810 else
811 {
812 for(int y = 0; y < 8; y++)
813 {
814 for(int x = 0; x < 8; x++)
815 {
816 uint32 component_uv = (clamp_to_u8(dct_u[y * 8 + x] + 0x80) << 8) | clamp_to_u8(dct_v[y * 8 + x] + 0x80);
817 dest_base_column[y * 512 + (256 * 1) + x * 2 + 0] |= component_uv;
818 }
819 }
820 }
821 }
822 }
823
824 // Do bilinear interpolation on the chroma channels:
825 if(!Skip && ChromaIP)
826 {
827 for(int y = 0; y < 16; y+= 2)
828 {
829 uint32 *linebase = &dest_base[y * 256];
830 uint32 *linebase1 = &dest_base[(y + 1) * 256];
831
832 for(int x = 0; x < 254; x += 2)
833 {
834 unsigned int u, v;
835
836 u = (((linebase1[x] >> 8) & 0xFF) + ((linebase1[x + 2] >> 8) & 0xFF)) >> 1;
837 v = (((linebase1[x] >> 0) & 0xFF) + ((linebase1[x + 2] >> 0) & 0xFF)) >> 1;
838
839 linebase1[x + 1] = (linebase1[x + 1] & ~ 0xFFFF) | (u << 8) | v;
840 }
841
842 linebase1[0xFF] = (linebase1[0xFF] & ~ 0xFFFF) | (linebase1[0xFE] & 0xFFFF);
843
844 if(FirstDecode)
845 {
846 for(int x = 0; x < 256; x++) linebase[x] = (linebase[x] & ~ 0xFFFF) | (linebase1[x] & 0xFFFF);
847 FirstDecode = 0;
848 }
849 else
850 for(int x = 0; x < 256; x++)
851 {
852 unsigned int u, v;
853
854 u = (((LastLine[x] >> 8) & 0xFF) + ((linebase1[x] >> 8) & 0xFF)) >> 1;
855 v = (((LastLine[x] >> 0) & 0xFF) + ((linebase1[x] >> 0) & 0xFF)) >> 1;
856
857 linebase[x] = (linebase[x] & ~ 0xFFFF) | (u << 8) | v;
858 }
859
860 memcpy(LastLine, linebase1, 256 * 4);
861 }
862 } // End chroma interpolation
863 } // end jpeg-like decoding
864 else
865 {
866 // Earlier code confines the set to F0,F1,F2, and F3.
867 // F0 = (4, 0xF), F1 = (3, 0x7), F2 = (0x2, 0x3), F3 = 0x1, 0x1)
868 const unsigned int plt_shift = 4 - (block_type & 0x3);
869 const unsigned int crl_mask = (1 << plt_shift) - 1;
870 int x = 0;
871
872 while(block_size > 0)
873 {
874 uint8 boot;
875 unsigned int rle_count;
876
877 boot = KING_RB_Fetch();
878 block_size--;
879
880 if(boot == 0xFF)
881 {
882 KING_RB_Fetch();
883 block_size--;
884 }
885
886 if(!(boot & crl_mask)) // Expand mode?
887 {
888 rle_count = KING_RB_Fetch();
889 block_size--;
890 if(rle_count == 0xFF)
891 {
892 KING_RB_Fetch();
893 block_size--;
894 }
895 rle_count++;
896 }
897 else
898 rle_count = boot & crl_mask;
899
900 for(unsigned int i = 0; i < rle_count; i++)
901 {
902 if(x >= 0x2000)
903 {
904 //puts("Oops");
905 break; // Don't overflow our decode buffer!
906 }
907 DecodeBuffer[which_buffer][x] = (boot >> plt_shift);
908 x++;
909 }
910 }
911 } // end RLE decoding
912
913 //for(int i = 0; i < 8 + block_size; i++)
914 // KING_RB_Fetch();
915
916 BufferNoDecode: ;
917 }
918
919 void KING_Moo(void);
920
921 // NOTE: layer_or and palette_ptr are optimizations, the real RAINBOW chip knows not of such things.
RAINBOW_FetchRaster(uint32 * linebuffer,uint32 layer_or,uint32 * palette_ptr)922 int RAINBOW_FetchRaster(uint32 *linebuffer, uint32 layer_or, uint32 *palette_ptr)
923 {
924 int ret;
925
926 ret = DecodeFormat[DecodeBufferWhichRead];
927
928 if(linebuffer)
929 {
930 if(DecodeFormat[DecodeBufferWhichRead] == -1) // None
931 {
932 if(linebuffer)
933 MDFN_FastU32MemsetM8(linebuffer, 0, 256);
934 }
935 else if(DecodeFormat[DecodeBufferWhichRead] == 1) // YUV
936 {
937 uint32 *in_ptr = (uint32*)&DecodeBuffer[DecodeBufferWhichRead][RasterReadPos * 256 * 4];
938
939 if(Control & 0x2) // Endless scroll mode:
940 {
941 uint8 tmpss = HScroll;
942
943 for(int x = 0; x < 256; x++)
944 {
945 linebuffer[x] = in_ptr[tmpss] | layer_or;
946 tmpss++;
947 }
948 }
949 else // Non-endless
950 {
951 uint16 tmpss = HScroll & 0x1FF;
952
953 for(int x = 0; x < 256; x++)
954 {
955 linebuffer[x] = (tmpss < 256) ? (in_ptr[tmpss] | layer_or) : 0;
956 tmpss = (tmpss + 1) & 0x1FF;
957 }
958 }
959 MDFN_FastU32MemsetM8(in_ptr, 0, 256);
960 }
961 else if(DecodeFormat[DecodeBufferWhichRead] == 0) // Palette
962 {
963 uint8 *in_ptr = &DecodeBuffer[DecodeBufferWhichRead][RasterReadPos * 256];
964
965 if(Control & 0x2) // Endless scroll mode:
966 {
967 uint8 tmpss = HScroll;
968
969 for(int x = 0; x < 256; x++)
970 {
971 linebuffer[x] = in_ptr[tmpss] ? (palette_ptr[in_ptr[tmpss]] | layer_or) : 0;
972 tmpss++;
973 }
974 }
975 else // Non-endless
976 {
977 uint16 tmpss = HScroll & 0x1FF;
978
979 for(int x = 0; x < 256; x++)
980 {
981 linebuffer[x] = (tmpss < 256 && in_ptr[tmpss]) ? (palette_ptr[in_ptr[tmpss]] | layer_or) : 0;
982 tmpss = (tmpss + 1) & 0x1FF;
983 }
984 }
985 //MDFN_FastU32MemsetM8((uint32 *)in_ptr, 0, 256 / 4);
986 }
987 }
988
989 RasterReadPos = (RasterReadPos + 1) & 0xF;
990
991 if(!RasterReadPos)
992 DecodeFormat[DecodeBufferWhichRead] = -1; // Invalidate this buffer.
993
994 //printf("Fetch: %d, buffer: %d\n", RasterReadPos, DecodeBufferWhichRead);
995 return(ret);
996 }
997
RAINBOW_Reset(void)998 void RAINBOW_Reset(void)
999 {
1000 Control = 0;
1001 NullRunY = NullRunU = NullRunV = 0;
1002 HScroll = 0;
1003 RasterReadPos = 0;
1004 DecodeBufferWhichRead = 0;
1005
1006 memset(QuantTables, 0, sizeof(QuantTables));
1007 memset(QuantTablesBase, 0, sizeof(QuantTablesBase));
1008 DecodeFormat[0] = DecodeFormat[1] = -1;
1009
1010 CalcHappyColor();
1011 }
1012
1013
RAINBOW_StateAction(StateMem * sm,int load,int data_only)1014 int RAINBOW_StateAction(StateMem *sm, int load, int data_only)
1015 {
1016 SFORMAT StateRegs[] =
1017 {
1018 SFVAR(HScroll),
1019 SFVAR(Control),
1020 SFVAR(RasterReadPos),
1021 SFVAR(DecodeBufferWhichRead),
1022 SFVAR(NullRunY),
1023 SFVAR(NullRunU),
1024 SFVAR(NullRunV),
1025 SFVAR(HSync),
1026 SFARRAY32(DecodeFormat, 2),
1027 // if(!(DecodeBuffer[i] = (uint8*)MDFN_malloc(0x2000 * 4, _("RAINBOW buffer RAM"))))
1028 SFARRAY(DecodeBuffer[0], 0x2000 * 4),
1029 SFARRAY(DecodeBuffer[1], 0x2000 * 4),
1030 SFEND
1031 };
1032
1033 int ret = MDFNSS_StateAction(sm, load, data_only, StateRegs, "RBOW", false);
1034
1035 if(load)
1036 {
1037 RasterReadPos &= 0xF;
1038 CalcHappyColor();
1039 }
1040 return(ret);
1041 }
1042
1043