1 /* Mednafen - Multi-system Emulator
2  *
3  * This program is free software; you can redistribute it and/or modify
4  * it under the terms of the GNU General Public License as published by
5  * the Free Software Foundation; either version 2 of the License, or
6  * (at your option) any later version.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11  * GNU General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16  */
17 
18 /* MJPEG-ish decoder based on the algorithm and huffman data tables provided by David Michel of MagicEngine and MagicEngine-FX */
19 
20 #include "pcfx.h"
21 #include "rainbow.h"
22 #include "king.h"
23 #include "interrupt.h"
24 #include "jrevdct.h"
25 #include "../clamp.h"
26 
27 static bool ChromaIP;	// Bilinearly interpolate chroma channel
28 
29 /* Y = luminance/luma, UV = chrominance/chroma */
30 
31 typedef struct
32 {
33 	const uint8 *base;
34         const uint8 *codes;
35 	const uint32 *minimum;
36 	const uint32 *maximum;
37 } HuffmanTable;
38 
39 typedef struct
40 {
41         uint8 *lut;             // LUT for getting the code.
42         uint8 *lut_bits;        // Bit count for the code
43 } HuffmanQuickLUT;
44 
45 /* Luma DC Huffman tables */
46 static const uint8 dc_y_base[17] =
47 {
48         0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x08, 0x09,
49         0x00, 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
50         0x00
51 };
52 
53 static const uint8 dc_y_codes[27] =
54 {
55         0x00, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x01,
56         0x08, 0x09, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14,
57         0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C,
58         0x1D, 0x1E, 0x1F
59 };
60 
61 static const uint32 dc_y_minimum[17] =
62 {
63         0x0000, 0x0000, 0x0000, 0x0000, 0x000E, 0x0000, 0x003C, 0x007A,
64         0x0000, 0x01F0, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
65         0x0000
66 };
67 
68 static const uint32 dc_y_maximum[17] =
69 {
70         0xFFFF, 0xFFFF, 0xFFFF, 0x0006, 0x000E, 0xFFFF, 0x003C, 0x007B,
71         0xFFFF, 0x01FF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
72         0xFFFF
73 };
74 
75 static const HuffmanTable dc_y_table =
76 {
77 	dc_y_base,
78 	dc_y_codes,
79 	dc_y_minimum,
80 	dc_y_maximum
81 };
82 
83 /* Chroma DC Huffman tables */
84 
85 static const uint8 dc_uv_base[17] =
86 {
87         0x00, 0x00, 0x00, 0x03, 0x04, 0x05, 0x06, 0x07,
88         0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
89         0x00
90 };
91 
92 static const uint8 dc_uv_codes[10] =
93 {
94         0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
95         0x08, 0x09
96 };
97 
98 static const uint32 dc_uv_minimum[17] =
99 {
100         0x0000, 0x0000, 0x0000, 0x0006, 0x000E, 0x001E, 0x003E, 0x007E,
101         0x00FE, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
102         0x0000
103 };
104 
105 static const uint32 dc_uv_maximum[17] =
106 {
107         0xFFFF, 0xFFFF, 0x0002, 0x0006, 0x000E, 0x001E, 0x003E, 0x007E,
108         0x00FF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
109         0xFFFF
110 };
111 
112 static const HuffmanTable dc_uv_table =
113 {
114 	dc_uv_base,
115 	dc_uv_codes,
116 	dc_uv_minimum,
117 	dc_uv_maximum
118 };
119 
120 /* Luma AC Huffman tables */
121 static const uint8 ac_y_base[17] =
122 {
123         0x00, 0x00, 0x00, 0x02, 0x03, 0x05, 0x09, 0x0D,
124         0x00, 0x10, 0x00, 0x12, 0x22, 0x00, 0x00, 0x00,
125         0x00
126 };
127 
128 static const uint8 ac_y_codes[146] =
129 {
130         0x01, 0x02, 0x03, 0x04, 0x11, 0x05, 0x12, 0x21,
131         0x00, 0x06, 0x31, 0x41, 0x51, 0x13, 0x22, 0x61,
132         0x07, 0x71, 0x09, 0x19, 0x29, 0x39, 0x49, 0x59,
133         0x69, 0x79, 0x89, 0x99, 0xA9, 0xB9, 0xC9, 0xD9,
134         0xE9, 0xF9, 0x08, 0x14, 0x15, 0x16, 0x17, 0x18,
135         0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x32, 0x33,
136         0x34, 0x35, 0x36, 0x37, 0x38, 0x42, 0x43, 0x44,
137         0x45, 0x46, 0x47, 0x48, 0x52, 0x53, 0x54, 0x55,
138         0x56, 0x57, 0x58, 0x62, 0x63, 0x64, 0x65, 0x66,
139         0x67, 0x68, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
140         0x78, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
141         0x88, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
142         0x98, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
143         0xA8, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7,
144         0xB8, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
145         0xC8, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
146         0xD8, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
147         0xE8, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
148         0xF8, 0x10
149 };
150 
151 static const uint32 ac_y_minimum[17] =
152 {
153         0x0000, 0x0000, 0x0000, 0x0004, 0x000A, 0x0018, 0x0036, 0x0074,
154         0x0000, 0x01DC, 0x0000, 0x0778, 0x0F10, 0x0000, 0x0000, 0x0000,
155         0x0000
156 };
157 
158 static const uint32 ac_y_maximum[17] =
159 {
160         0xFFFF, 0xFFFF, 0x0001, 0x0004, 0x000B, 0x001A, 0x0039, 0x0076,
161         0xFFFF, 0x01DD, 0xFFFF, 0x0787, 0x0F7F, 0xFFFF, 0xFFFF, 0xFFFF,
162         0xFFFF
163 };
164 
165 static const HuffmanTable ac_y_table =
166 {
167 	ac_y_base,
168 	ac_y_codes,
169 	ac_y_minimum,
170 	ac_y_maximum
171 };
172 
173 /* Chroma AC Huffman tables */
174 static const uint8 ac_uv_base[17] =
175 {
176         0x00, 0x00, 0x00, 0x02, 0x03, 0x05, 0x0A, 0x0B,
177         0x00, 0x10, 0x00, 0x12, 0x22, 0x00, 0x00, 0x00,
178         0x00
179 };
180 
181 static const uint8 ac_uv_codes[146] =
182 {
183         0x01, 0x02, 0x11, 0x03, 0x21, 0x04, 0x12, 0x31,
184         0x41, 0x00, 0x51, 0x05, 0x13, 0x22, 0x61, 0x71,
185         0x32, 0x81, 0x09, 0x19, 0x29, 0x39, 0x49, 0x59,
186         0x69, 0x79, 0x89, 0x99, 0xA9, 0xB9, 0xC9, 0xD9,
187         0xE9, 0xF9, 0x06, 0x07, 0x08, 0x14, 0x15, 0x16,
188         0x17, 0x18, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
189         0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x42, 0x43,
190         0x44, 0x45, 0x46, 0x47, 0x48, 0x52, 0x53, 0x54,
191         0x55, 0x56, 0x57, 0x58, 0x62, 0x63, 0x64, 0x65,
192         0x66, 0x67, 0x68, 0x72, 0x73, 0x74, 0x75, 0x76,
193         0x77, 0x78, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
194         0x88, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
195         0x98, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
196         0xA8, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7,
197         0xB8, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
198         0xC8, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
199         0xD8, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
200         0xE8, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
201         0xF8, 0x10
202 };
203 
204 static const uint32 ac_uv_minimum[17] =
205 {
206         0x0000, 0x0000, 0x0000, 0x0004, 0x000A, 0x0018, 0x0038, 0x0072,
207         0x0000, 0x01DC, 0x0000, 0x0778, 0x0F10, 0x0000, 0x0000, 0x0000,
208         0x0000
209 };
210 
211 static const uint32 ac_uv_maximum[17] =
212 {
213         0xFFFF, 0xFFFF, 0x0001, 0x0004, 0x000B, 0x001B, 0x0038, 0x0076,
214         0xFFFF, 0x01DD, 0xFFFF, 0x0787, 0x0F7F, 0xFFFF, 0xFFFF, 0xFFFF,
215         0xFFFF
216 };
217 
218 static const HuffmanTable ac_uv_table =
219 {
220 	ac_uv_base,
221 	ac_uv_codes,
222 	ac_uv_minimum,
223 	ac_uv_maximum
224 };
225 
226 static const uint8 zigzag[63] =
227 {
228  0x01, 0x08, 0x10, 0x09, 0x02, 0x03, 0x0A, 0x11,
229  0x18, 0x20, 0x19, 0x12, 0x0B, 0x04, 0x05, 0x0C,
230  0x13, 0x1A, 0x21, 0x28, 0x30, 0x29, 0x22, 0x1B,
231  0x14, 0x0D, 0x06, 0x07, 0x0E, 0x15, 0x1C, 0x23,
232  0x2A, 0x31, 0x38, 0x39, 0x32, 0x2B, 0x24, 0x1D,
233  0x16, 0x0F, 0x17, 0x1E, 0x25, 0x2C, 0x33, 0x3A,
234  0x3B, 0x34, 0x2D, 0x26, 0x1F, 0x27, 0x2E, 0x35,
235  0x3C, 0x3D, 0x36, 0x2F, 0x37, 0x3E, 0x3F
236 };
237 
238 static HuffmanQuickLUT dc_y_qlut = { NULL }, dc_uv_qlut = { NULL}, ac_y_qlut = { NULL }, ac_uv_qlut = { NULL };
239 
KillHuffmanLUT(HuffmanQuickLUT * qlut)240 static void KillHuffmanLUT(HuffmanQuickLUT *qlut)
241 {
242  if(qlut->lut)
243   free(qlut->lut);
244 
245  if(qlut->lut_bits)
246   free(qlut->lut_bits);
247 
248  qlut->lut = NULL;
249  qlut->lut_bits = NULL;
250 }
251 
BuildHuffmanLUT(const HuffmanTable * table,HuffmanQuickLUT * qlut,const int bitmax)252 static bool BuildHuffmanLUT(const HuffmanTable *table, HuffmanQuickLUT *qlut, const int bitmax)
253 {
254  // TODO: Allocate only (1 << bitmax) entries.
255  // TODO: What should we set invalid bitsequences/entries to? 0? ~0?  Something else?
256 
257  if(!(qlut->lut = (uint8 *)calloc(1 << 12, 1)))
258   return(FALSE);
259 
260  if(!(qlut->lut_bits = (uint8 *)calloc(1 << 12, 1)))
261   return(FALSE);
262 
263  for(int numbits = 2; numbits <= 12; numbits++)
264  {
265   if(table->maximum[numbits] != 0xFFFF)
266   {
267    for(unsigned int i = table->minimum[numbits]; i <= table->maximum[numbits]; i++)
268    {
269     for(int b = 0; b < (1 << (bitmax - numbits)); b++)
270     {
271      int lut_index = (i << (bitmax - numbits)) + b;
272 
273      assert(lut_index < (1 << bitmax));
274 
275      qlut->lut[lut_index] = table->codes[table->base[numbits] + (i - table->minimum[numbits])];
276      qlut->lut_bits[lut_index] = numbits;
277     }
278    }
279   }
280  }
281 
282  //printf("\n\n%d\n", bitmax);
283  for(int i = 0; i < (1 << bitmax); i++)
284  {
285   //if(!qlut->lut_bits[i])
286   // printf("%d %d\n", i, qlut->lut_bits[i]);
287  }
288 
289  return(TRUE);
290 }
291 
292 
293 static uint8 *DecodeBuffer[2] = { NULL, NULL };
294 static int32 DecodeFormat[2]; // The format each buffer is in(-1 = invalid, 0 = palettized 8-bit, 1 = YUV)
295 static uint32 QuantTables[2][64], QuantTablesBase[2][64];	// 0 = Y, 1 = UV
296 
297 static uint32 DecodeBufferWhichRead;
298 
299 static int32 RasterReadPos;
300 static uint16 Control;
301 static uint16 NullRunY, NullRunU, NullRunV, HSync;
302 static uint16 HScroll;
303 
304 static uint32 bits_buffer;
305 static uint32 bits_buffered_bits;
306 static int32 bits_bytes_left;
307 
InitBits(int32 bcount)308 static void InitBits(int32 bcount)
309 {
310  bits_bytes_left = bcount;
311  bits_buffer = 0;
312  bits_buffered_bits = 0;
313 }
314 
FetchWidgywabbit(void)315 static INLINE uint8 FetchWidgywabbit(void)
316 {
317  if(bits_bytes_left <= 0)
318   return(0);
319 
320  uint8 ret = KING_RB_Fetch();
321  if(ret == 0xFF)
322   KING_RB_Fetch();
323 
324  bits_bytes_left--;
325 
326  return(ret);
327 }
328 
329 enum
330 {
331  MDFNBITS_PEEK = 1,
332  MDFNBITS_FUNNYSIGN = 2,
333 };
334 
GetBits(const unsigned int count,const unsigned int how=0)335 static INLINE uint32 GetBits(const unsigned int count, const unsigned int how = 0)
336 {
337  uint32 ret;
338 
339  while(bits_buffered_bits < count)
340  {
341   bits_buffer <<= 8;
342   bits_buffer |= FetchWidgywabbit();
343   bits_buffered_bits += 8;
344  }
345 
346  ret = (bits_buffer >> (bits_buffered_bits - count)) & ((1 << count) - 1);
347 
348  if(!(how & MDFNBITS_PEEK))
349   bits_buffered_bits -= count;
350 
351  if((how & MDFNBITS_FUNNYSIGN) && count)
352  {
353   if(ret < (1U << (count - 1)))
354    ret += 1 - (1 << count);
355  }
356 
357  return(ret);
358 }
359 
360 // Note: SkipBits is intended to be called right after GetBits() with "how" MDFNBITS_PEEK,
361 // and the count pass to SkipBits must be less than or equal to the count passed to GetBits().
SkipBits(const unsigned int count)362 static INLINE void SkipBits(const unsigned int count)
363 {
364  bits_buffered_bits -= count;
365 }
366 
367 
368 static uint32 HappyColor; // Cached, calculated from null run yuv registers;
CalcHappyColor(void)369 static void CalcHappyColor(void)
370 {
371  uint8 y_c = (int16)NullRunY + 0x80;
372  uint8 u_c = (int16)NullRunU + 0x80;
373  uint8 v_c = (int16)NullRunV + 0x80;
374 
375  HappyColor = (y_c << 16) | (u_c << 8) | (v_c << 0);
376 }
377 
get_ac_coeff(const HuffmanQuickLUT * table,int32 * zeroes)378 static uint32 get_ac_coeff(const HuffmanQuickLUT *table, int32 *zeroes)
379 {
380  unsigned int numbits;
381  uint32 rawbits;
382  uint32 code;
383 
384  rawbits = GetBits(12, MDFNBITS_PEEK);
385  if((rawbits & 0xF80) == 0xF80)
386  //if(rawbits >= 0xF80)
387  {
388   SkipBits(5);
389   *zeroes = 0;
390   return(0);
391  }
392 
393  if(!table->lut_bits[rawbits])
394  {
395   FXDBG("Invalid AC bit sequence: %03x\n", rawbits);
396  }
397 
398  code = table->lut[rawbits];
399  SkipBits(table->lut_bits[rawbits]);
400 
401  numbits = code & 0xF;
402  *zeroes = code >> 4;
403 
404  return(GetBits(numbits, MDFNBITS_FUNNYSIGN));
405 }
406 
get_dc_coeff(const HuffmanQuickLUT * table,int32 * zeroes,int maxbits)407 static uint32 get_dc_coeff(const HuffmanQuickLUT *table, int32 *zeroes, int maxbits)
408 {
409  uint32 code;
410 
411  for(;;)
412  {
413   uint32 rawbits = GetBits(maxbits, MDFNBITS_PEEK);
414 
415   if(!table->lut_bits[rawbits])
416   {
417    FXDBG("Invalid DC bit sequence: %03x\n", rawbits);
418   }
419 
420   code = table->lut[rawbits];
421   SkipBits(table->lut_bits[rawbits]);
422 
423   if(code < 0xF)
424   {
425    *zeroes = 0;
426    return(GetBits(code, MDFNBITS_FUNNYSIGN));
427   }
428   else if(code == 0xF)
429   {
430    get_ac_coeff(&ac_y_qlut, zeroes);
431    (*zeroes)++;
432    return(0);
433   }
434   else if(code >= 0x10)
435   {
436    code -= 0x10;
437 
438    for(int i = 0; i < 64; i++)
439    {
440     // Y
441     uint32 coeff = (QuantTablesBase[0][i] * code) >> 2;
442 
443     if(coeff < 1)
444      coeff = 1;
445     else if(coeff > 0xFE)
446      coeff = 0xFE;
447 
448     QuantTables[0][i] = coeff;
449 
450     // UV
451     if(i)
452      coeff = (QuantTablesBase[1][i] * code) >> 2;
453     else
454      coeff = (QuantTablesBase[1][i]) >> 2;
455 
456     if(coeff < 1)
457      coeff = 1;
458     else if(coeff > 0xFE)
459      coeff = 0xFE;
460 
461     QuantTables[1][i] = coeff;
462    }
463 
464   }
465  }
466 
467 }
468 
get_dc_y_coeff(int32 * zeroes)469 static INLINE uint32 get_dc_y_coeff(int32 *zeroes)
470 {
471  return(get_dc_coeff(&dc_y_qlut, zeroes, 9));
472 }
473 
get_dc_uv_coeff(void)474 static uint32 get_dc_uv_coeff(void)
475 {
476  const HuffmanQuickLUT *table = &dc_uv_qlut;
477  uint32 code;
478  uint32 rawbits = GetBits(8, MDFNBITS_PEEK);
479 
480  code = table->lut[rawbits];
481  SkipBits(table->lut_bits[rawbits]);
482 
483  return(GetBits(code, MDFNBITS_FUNNYSIGN));
484 }
485 
486 
decode(int32 * dct,const uint32 * QuantTable,const int32 dc,const HuffmanQuickLUT * table)487 static void decode(int32 *dct, const uint32 *QuantTable, const int32 dc, const HuffmanQuickLUT *table)
488 {
489  int32 coeff;
490  int32 zeroes;
491  int count;
492  int index;
493 
494  dct[0] = (int16)(QuantTable[0] * dc);
495  count = 0;
496 
497  do
498  {
499   coeff = get_ac_coeff(table, &zeroes);
500   if(!coeff)
501   {
502    if(!zeroes)
503    {
504     while(count < 63)
505     {
506      dct[zigzag[count++]] = 0;
507     }
508     break;
509    }
510    else if(zeroes == 1)
511     zeroes = 0xF;
512   }
513 
514   while(zeroes-- && count < 63)
515   {
516    dct[zigzag[count++]] = 0;
517   }
518   zeroes = 0;
519   if(count < 63)
520   {
521    index = zigzag[count++];
522    dct[index] = (int16)(QuantTable[index] * coeff);
523   }
524  } while(count < 63);
525 
526 }
527 
528 static uint32 LastLine[256];
529 static bool FirstDecode;
530 static bool GarbageData;
531 
RAINBOW_Init(bool arg_ChromaIP)532 bool RAINBOW_Init(bool arg_ChromaIP)
533 {
534  ChromaIP = arg_ChromaIP;
535 
536  for(int i = 0; i < 2; i++)
537  {
538   if(!(DecodeBuffer[i] = (uint8*)malloc(0x2000 * 4)))
539    return(0);
540   memset(DecodeBuffer[i], 0, 0x2000 * 4);
541  }
542 
543  if(!BuildHuffmanLUT(&dc_y_table, &dc_y_qlut, 9))
544   return(FALSE);
545 
546  if(!BuildHuffmanLUT(&dc_uv_table, &dc_uv_qlut, 8))
547   return(FALSE);
548 
549  if(!BuildHuffmanLUT(&ac_y_table, &ac_y_qlut, 12))
550   return(FALSE);
551 
552  if(!BuildHuffmanLUT(&ac_uv_table, &ac_uv_qlut, 12))
553   return(FALSE);
554 
555  DecodeFormat[0] = DecodeFormat[1] = -1;
556  DecodeBufferWhichRead = 0;
557  GarbageData = FALSE;
558  FirstDecode = TRUE;
559  RasterReadPos = 0;
560 
561  return(1);
562 }
563 
RAINBOW_Close(void)564 void RAINBOW_Close(void)
565 {
566  for(int i = 0; i < 2; i++)
567   if(DecodeBuffer[i])
568   {
569    free(DecodeBuffer[i]);
570    DecodeBuffer[i] = NULL;
571   }
572 
573  KillHuffmanLUT(&dc_y_qlut);
574  KillHuffmanLUT(&dc_uv_qlut);
575  KillHuffmanLUT(&ac_y_qlut);
576  KillHuffmanLUT(&ac_uv_qlut);
577 }
578 
579 // RAINBOW base I/O port address: 0x200
580 
581 #define REGSETBOFW(_reg, _data, _wb) { (_reg) &= ~(0xFF << ((_wb) * 8)); (_reg) |= (_data) << ((_wb) * 8); }
582 #define REGSETBOFH REGSETBOFW
583 
584 // The horizontal scroll register is set up kind of weird...D0-D7 of writes to $200 set the lower byte, D0-D7 of writes to $202 set the upper byte
585 
RAINBOW_Write8(uint32 A,uint8 V)586 void RAINBOW_Write8(uint32 A, uint8 V)
587 {
588  //printf("RAINBOW Wr8: %08x %02x\n", A, V);
589  switch(A & 0x1C)
590  {
591   case 0x00: REGSETBOFH(HScroll, V, (A & 0x2) >> 1); HScroll &= 0x1FF; break;
592   case 0x04: REGSETBOFH(Control, V, A & 0x3); break;
593   case 0x08: REGSETBOFH(NullRunY, V, A & 0x3); CalcHappyColor(); break;
594   case 0x0C: REGSETBOFH(NullRunU, V, A & 0x3); CalcHappyColor(); break;
595   case 0x10: REGSETBOFH(NullRunV, V, A & 0x3); CalcHappyColor(); break;
596   case 0x14: REGSETBOFH(HSync, V, A & 0x3); break;
597  }
598 }
599 
RAINBOW_Write16(uint32 A,uint16 V)600 void RAINBOW_Write16(uint32 A, uint16 V)
601 {
602  int msh = A & 0x2;
603 
604  //printf("RAINBOW Wr16: %08x %04x\n", A, V);
605  switch(A & 0x1C)
606  {
607   case 0x00: REGSETBOFH(HScroll, V & 0xFF, (A & 0x2) >> 1); HScroll &= 0x1FF; break;
608   case 0x04: REGSETHW(Control, V, msh); break;
609   case 0x08: REGSETHW(NullRunY, V, msh); CalcHappyColor(); break;
610   case 0x0C: REGSETHW(NullRunU, V, msh); CalcHappyColor(); break;
611   case 0x10: REGSETHW(NullRunV, V, msh); CalcHappyColor(); break;
612   case 0x14: REGSETHW(HSync, V, msh); break;
613  }
614 }
615 
RAINBOW_ForceTransferReset(void)616 void RAINBOW_ForceTransferReset(void)
617 {
618  RasterReadPos = 0;
619  DecodeFormat[0] = DecodeFormat[1] = -1;
620 }
621 
RAINBOW_SwapBuffers(void)622 void RAINBOW_SwapBuffers(void)
623 {
624  DecodeBufferWhichRead ^= 1;
625  RasterReadPos = 0;
626 }
627 
RAINBOW_DecodeBlock(bool arg_FirstDecode,bool Skip)628 void RAINBOW_DecodeBlock(bool arg_FirstDecode, bool Skip)
629 {
630    uint8 block_type;
631    int32 block_size;
632    int icount;
633    int which_buffer = DecodeBufferWhichRead ^ 1;
634 
635    if(!(Control & 0x01))
636    {
637     puts("Rainbow decode when disabled!!");
638     return;
639    }
640 
641    if(arg_FirstDecode)
642    {
643     FirstDecode = TRUE;
644     GarbageData = FALSE;
645    }
646 
647    if(GarbageData)
648     icount = 0;
649    else
650     icount = 0x200;
651 
652    do
653    {
654     do
655     {
656      while(KING_RB_Fetch() != 0xFF && icount > 0)
657       icount--;
658 
659      block_type = KING_RB_Fetch();
660      //if(icount > 0 && block_type != 0xF0 && block_type != 0xF1 && block_type != 0xF2 && block_type != 0xF3 && block_type != 0xF8 && block_type != 0xFF)
661      //if(icount > 0 && block_type == 0x11)
662      // printf("%02x\n", block_type);
663      icount--;
664     } while(block_type != 0xF0 && block_type != 0xF1 && block_type != 0xF2 && block_type != 0xF3 && block_type != 0xF8 && block_type != 0xFF && icount > 0);
665 
666     {
667      uint16 tmp;
668 
669      tmp = KING_RB_Fetch() << 8;
670      tmp |= KING_RB_Fetch() << 0;
671 
672      block_size = (int16)tmp;
673     }
674 
675     block_size -= 2;
676     if(block_type == 0xFF && block_size <= 0)
677      for(int i = 0; i < 128; i++,icount--) KING_RB_Fetch();
678 
679     //fprintf(stderr, "Block: %d\n", block_size);
680    } while(block_size <= 0 && icount > 0);
681 
682    //if(!GarbageData && icount < 500)
683    //{
684    // FXDBG("Partial garbage data. %d", icount);
685    //}
686 
687    //printf("%d\n", icount);
688    if(icount <= 0)
689    {
690     FXDBG("Garbage data.");
691     GarbageData = TRUE;
692     //printf("Dooom: %d\n");
693     DecodeFormat[which_buffer] = 0;
694     memset(DecodeBuffer[which_buffer], 0, 0x2000);
695     goto BufferNoDecode;
696    }
697 
698    if(block_type == 0xf8 || block_type == 0xff)
699     DecodeFormat[which_buffer] = 1;
700    else
701     DecodeFormat[which_buffer] = 0;
702 
703    if(block_type == 0xF8 || block_type == 0xFF)
704    {
705     if(block_type == 0xFF)
706     {
707      for(int q = 0; q < 2; q++)
708       for(int i = 0; i < 64; i++)
709       {
710        uint8 meow = KING_RB_Fetch();
711 
712        QuantTables[q][i] = meow;
713        QuantTablesBase[q][i] = meow;
714       }
715      block_size -= 128;
716     }
717 
718     InitBits(block_size);
719 
720     int32 dc_y = 0, dc_u = 0, dc_v = 0;
721     uint32 *dest_base = (uint32 *)DecodeBuffer[which_buffer];
722     for(int column = 0; column < 16; column++)
723     {
724      uint32 *dest_base_column = &dest_base[column * 16];
725      int32 zeroes = 0;
726 
727      dc_y += get_dc_y_coeff(&zeroes);
728 
729      if(zeroes) // If set, clear the number of columns
730      {
731       do
732       {
733        if(column < 16)
734        {
735 	dest_base_column = &dest_base[column * 16];
736 
737         for(int y = 0; y < 16; y++)
738          for(int x = 0; x < 16; x++)
739           dest_base_column[y * 256 + x] = HappyColor;
740        }
741        column++;
742        zeroes--;
743       } while(zeroes);
744       column--; // Fix for the column autoincrement in the while(zeroes) loop
745       dc_y = dc_u = dc_v = 0;
746      }
747      else
748      {
749       int32 dct_y[256];
750       int32 dct_u[64];
751       int32 dct_v[64];
752 
753       // Y/Luma, 16x16 components
754       // ---------
755       // | A | C |
756       // |-------|
757       // | B | D |
758       // ---------
759       // A (0, 0)
760       decode(&dct_y[0x00], QuantTables[0], dc_y, &ac_y_qlut);
761 
762       // B (0, 1)
763       dc_y += get_dc_y_coeff(&zeroes);
764       decode(&dct_y[0x40], QuantTables[0], dc_y, &ac_y_qlut);
765 
766       // C (1, 0)
767       dc_y += get_dc_y_coeff(&zeroes);
768       decode(&dct_y[0x80], QuantTables[0], dc_y, &ac_y_qlut);
769 
770       // D (1, 1)
771       dc_y += get_dc_y_coeff(&zeroes);
772       decode(&dct_y[0xC0], QuantTables[0], dc_y, &ac_y_qlut);
773 
774       // U, 8x8 components
775       dc_u += get_dc_uv_coeff();
776       decode(&dct_u[0x00], QuantTables[1], dc_u, &ac_uv_qlut);
777 
778       // V, 8x8 components
779       dc_v += get_dc_uv_coeff();
780       decode(&dct_v[0x00], QuantTables[1], dc_v, &ac_uv_qlut);
781 
782       if(Skip)
783        continue;
784 
785       j_rev_dct(&dct_y[0x00]);
786       j_rev_dct(&dct_y[0x40]);
787       j_rev_dct(&dct_y[0x80]);
788       j_rev_dct(&dct_y[0xC0]);
789       j_rev_dct(&dct_u[0x00]);
790       j_rev_dct(&dct_v[0x00]);
791 
792       for(int y = 0; y < 16; y++)
793        for(int x = 0; x < 16; x++)
794         dest_base_column[y * 256 + x] = clamp_to_u8(dct_y[y * 8 + (x & 0x7) + ((x & 0x8) << 4)] + 0x80) << 16;
795 
796       if(!ChromaIP)
797       {
798        for(int y = 0; y < 8; y++)
799        {
800         for(int x = 0; x < 8; x++)
801         {
802          uint32 component_uv = (clamp_to_u8(dct_u[y * 8 + x] + 0x80) << 8) | clamp_to_u8(dct_v[y * 8 + x] + 0x80);
803          dest_base_column[y * 512 + (256 * 0) + x * 2 + 0] |= component_uv;
804          dest_base_column[y * 512 + (256 * 0) + x * 2 + 1] |= component_uv;
805          dest_base_column[y * 512 + (256 * 1) + x * 2 + 0] |= component_uv;
806          dest_base_column[y * 512 + (256 * 1) + x * 2 + 1] |= component_uv;
807         }
808        }
809       }
810       else
811       {
812        for(int y = 0; y < 8; y++)
813        {
814         for(int x = 0; x < 8; x++)
815         {
816          uint32 component_uv = (clamp_to_u8(dct_u[y * 8 + x] + 0x80) << 8) | clamp_to_u8(dct_v[y * 8 + x] + 0x80);
817  	 dest_base_column[y * 512 + (256 * 1) + x * 2 + 0] |= component_uv;
818         }
819        }
820       }
821      }
822     }
823 
824     // Do bilinear interpolation on the chroma channels:
825     if(!Skip && ChromaIP)
826     {
827      for(int y = 0; y < 16; y+= 2)
828      {
829       uint32 *linebase = &dest_base[y * 256];
830       uint32 *linebase1 = &dest_base[(y + 1) * 256];
831 
832       for(int x = 0; x < 254; x += 2)
833       {
834        unsigned int u, v;
835 
836        u = (((linebase1[x] >> 8) & 0xFF) + ((linebase1[x + 2] >> 8) & 0xFF)) >> 1;
837        v = (((linebase1[x] >> 0) & 0xFF) + ((linebase1[x + 2] >> 0) & 0xFF)) >> 1;
838 
839        linebase1[x + 1] = (linebase1[x + 1] & ~ 0xFFFF) | (u << 8) | v;
840       }
841 
842       linebase1[0xFF] = (linebase1[0xFF] & ~ 0xFFFF) | (linebase1[0xFE] & 0xFFFF);
843 
844       if(FirstDecode)
845       {
846        for(int x = 0; x < 256; x++) linebase[x] = (linebase[x] & ~ 0xFFFF) | (linebase1[x] & 0xFFFF);
847        FirstDecode = 0;
848       }
849       else
850        for(int x = 0; x < 256; x++)
851        {
852         unsigned int u, v;
853 
854         u = (((LastLine[x] >> 8) & 0xFF) + ((linebase1[x] >> 8) & 0xFF)) >> 1;
855         v = (((LastLine[x] >> 0) & 0xFF) + ((linebase1[x] >> 0) & 0xFF)) >> 1;
856 
857         linebase[x] = (linebase[x] & ~ 0xFFFF) | (u << 8) | v;
858        }
859 
860       memcpy(LastLine, linebase1, 256 * 4);
861      }
862     } // End chroma interpolation
863    } // end jpeg-like decoding
864    else
865    {
866     // Earlier code confines the set to F0,F1,F2, and F3.
867     // F0 = (4, 0xF), F1 = (3, 0x7), F2 = (0x2, 0x3), F3 = 0x1, 0x1)
868     const unsigned int plt_shift = 4 - (block_type & 0x3);
869     const unsigned int crl_mask = (1 << plt_shift) - 1;
870     int x = 0;
871 
872     while(block_size > 0)
873     {
874      uint8 boot;
875      unsigned int rle_count;
876 
877      boot = KING_RB_Fetch();
878      block_size--;
879 
880      if(boot == 0xFF)
881      {
882       KING_RB_Fetch();
883       block_size--;
884      }
885 
886      if(!(boot & crl_mask)) // Expand mode?
887      {
888       rle_count = KING_RB_Fetch();
889       block_size--;
890       if(rle_count == 0xFF)
891       {
892        KING_RB_Fetch();
893        block_size--;
894       }
895       rle_count++;
896      }
897      else
898       rle_count = boot & crl_mask;
899 
900      for(unsigned int i = 0; i < rle_count; i++)
901      {
902       if(x >= 0x2000)
903       {
904        //puts("Oops");
905        break; // Don't overflow our decode buffer!
906       }
907       DecodeBuffer[which_buffer][x] = (boot >> plt_shift);
908       x++;
909      }
910     }
911    } // end RLE decoding
912 
913    //for(int i = 0; i < 8 + block_size; i++)
914    // KING_RB_Fetch();
915 
916   BufferNoDecode: ;
917 }
918 
919 void KING_Moo(void);
920 
921 // NOTE:  layer_or and palette_ptr are optimizations, the real RAINBOW chip knows not of such things.
RAINBOW_FetchRaster(uint32 * linebuffer,uint32 layer_or,uint32 * palette_ptr)922 int RAINBOW_FetchRaster(uint32 *linebuffer, uint32 layer_or, uint32 *palette_ptr)
923 {
924  int ret;
925 
926  ret = DecodeFormat[DecodeBufferWhichRead];
927 
928  if(linebuffer)
929  {
930   if(DecodeFormat[DecodeBufferWhichRead] == -1) // None
931   {
932    if(linebuffer)
933     MDFN_FastU32MemsetM8(linebuffer, 0, 256);
934   }
935   else if(DecodeFormat[DecodeBufferWhichRead] == 1)	// YUV
936   {
937    uint32 *in_ptr = (uint32*)&DecodeBuffer[DecodeBufferWhichRead][RasterReadPos * 256 * 4];
938 
939    if(Control & 0x2)	// Endless scroll mode:
940    {
941     uint8 tmpss = HScroll;
942 
943     for(int x = 0; x < 256; x++)
944     {
945      linebuffer[x] = in_ptr[tmpss] | layer_or;
946      tmpss++;
947     }
948    }
949    else // Non-endless
950    {
951     uint16 tmpss = HScroll & 0x1FF;
952 
953     for(int x = 0; x < 256; x++)
954     {
955      linebuffer[x] = (tmpss < 256) ? (in_ptr[tmpss] | layer_or) : 0;
956      tmpss = (tmpss + 1) & 0x1FF;
957     }
958    }
959     MDFN_FastU32MemsetM8(in_ptr, 0, 256);
960   }
961   else if(DecodeFormat[DecodeBufferWhichRead] == 0)	// Palette
962   {
963    uint8 *in_ptr = &DecodeBuffer[DecodeBufferWhichRead][RasterReadPos * 256];
964 
965    if(Control & 0x2)    // Endless scroll mode:
966    {
967     uint8 tmpss = HScroll;
968 
969     for(int x = 0; x < 256; x++)
970     {
971      linebuffer[x] = in_ptr[tmpss] ? (palette_ptr[in_ptr[tmpss]] | layer_or) : 0;
972      tmpss++;
973     }
974    }
975    else // Non-endless
976    {
977     uint16 tmpss = HScroll & 0x1FF;
978 
979     for(int x = 0; x < 256; x++)
980     {
981      linebuffer[x] = (tmpss < 256 && in_ptr[tmpss]) ? (palette_ptr[in_ptr[tmpss]] | layer_or) : 0;
982      tmpss = (tmpss + 1) & 0x1FF;
983     }
984    }
985    //MDFN_FastU32MemsetM8((uint32 *)in_ptr, 0, 256 / 4);
986   }
987  }
988 
989  RasterReadPos = (RasterReadPos + 1) & 0xF;
990 
991  if(!RasterReadPos)
992   DecodeFormat[DecodeBufferWhichRead] = -1;     // Invalidate this buffer.
993 
994  //printf("Fetch: %d, buffer: %d\n", RasterReadPos, DecodeBufferWhichRead);
995  return(ret);
996 }
997 
RAINBOW_Reset(void)998 void RAINBOW_Reset(void)
999 {
1000  Control = 0;
1001  NullRunY = NullRunU = NullRunV = 0;
1002  HScroll = 0;
1003  RasterReadPos = 0;
1004  DecodeBufferWhichRead = 0;
1005 
1006  memset(QuantTables, 0, sizeof(QuantTables));
1007  memset(QuantTablesBase, 0, sizeof(QuantTablesBase));
1008  DecodeFormat[0] = DecodeFormat[1] = -1;
1009 
1010  CalcHappyColor();
1011 }
1012 
1013 
RAINBOW_StateAction(StateMem * sm,int load,int data_only)1014 int RAINBOW_StateAction(StateMem *sm, int load, int data_only)
1015 {
1016  SFORMAT StateRegs[] =
1017  {
1018    SFVAR(HScroll),
1019    SFVAR(Control),
1020    SFVAR(RasterReadPos),
1021    SFVAR(DecodeBufferWhichRead),
1022    SFVAR(NullRunY),
1023    SFVAR(NullRunU),
1024    SFVAR(NullRunV),
1025    SFVAR(HSync),
1026    SFARRAY32(DecodeFormat, 2),
1027    //  if(!(DecodeBuffer[i] = (uint8*)MDFN_malloc(0x2000 * 4, _("RAINBOW buffer RAM"))))
1028    SFARRAY(DecodeBuffer[0], 0x2000 * 4),
1029    SFARRAY(DecodeBuffer[1], 0x2000 * 4),
1030    SFEND
1031  };
1032 
1033  int ret = MDFNSS_StateAction(sm, load, data_only, StateRegs, "RBOW", false);
1034 
1035  if(load)
1036  {
1037   RasterReadPos &= 0xF;
1038   CalcHappyColor();
1039  }
1040  return(ret);
1041 }
1042 
1043