1 /* ScummVM - Graphic Adventure Engine
2  *
3  * ScummVM is the legal property of its developers, whose names
4  * are too numerous to list here. Please refer to the COPYRIGHT
5  * file distributed with this source distribution.
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License
9  * as published by the Free Software Foundation; either version 2
10  * of the License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20  *
21  */
22 
23 #include "graphics/scaler/intern.h"
24 #include "graphics/scaler/scalebit.h"
25 #include "common/util.h"
26 #include "common/system.h"
27 #include "common/textconsole.h"
28 
29 int gBitFormat = 565;
30 
31 #ifdef USE_HQ_SCALERS
32 // RGB-to-YUV lookup table
33 extern "C" {
34 
35 #ifdef USE_NASM
36 // NOTE: if your compiler uses different mangled names, add another
37 //       condition here
38 
39 #if !defined(_WIN32) && !defined(MACOSX) && !defined(__OS2__)
40 #define RGBtoYUV _RGBtoYUV
41 #define hqx_highbits _hqx_highbits
42 #define hqx_lowbits _hqx_lowbits
43 #define hqx_low2bits _hqx_low2bits
44 #define hqx_low3bits _hqx_low3bits
45 #define hqx_greenMask _hqx_greenMask
46 #define hqx_redBlueMask _hqx_redBlueMask
47 #define hqx_green_redBlue_Mask _hqx_green_redBlue_Mask
48 #endif
49 
50 uint32 hqx_highbits = 0xF7DEF7DE;
51 uint32 hqx_lowbits = 0x0821;
52 uint32 hqx_low2bits = 0x0C63;
53 uint32 hqx_low3bits = 0x1CE7;
54 uint32 hqx_greenMask = 0;
55 uint32 hqx_redBlueMask = 0;
56 uint32 hqx_green_redBlue_Mask = 0;
57 
58 #endif
59 
60 /**
61  * 16bit RGB to YUV conversion table. This table is setup by InitLUT().
62  * Used by the hq scaler family.
63  *
64  * FIXME/TODO: The RGBtoYUV table sucks up 256 KB. This is bad.
65  * In addition we never free it...
66  *
67  * Note: a memory lookup table is *not* necessarily faster than computing
68  * these things on the fly, because of its size. The table together with
69  * the code, plus the input/output GFX data, may not fit in the cache on some
70  * systems, so main memory has to be accessed, which is about the worst thing
71  * that can happen to code which tries to be fast...
72  *
73  * So we should think about ways to get this smaller / removed. Maybe we can
74  * use the same technique employed by our MPEG code to reduce the size of the
75  * lookup table at the cost of some additional computations?
76  *
77  * Of course, the above is largely a conjecture, and the actual speed
78  * differences are likely to vary a lot between different architectures and
79  * CPUs.
80  */
81 uint32 *RGBtoYUV = 0;
82 }
83 
InitLUT(Graphics::PixelFormat format)84 void InitLUT(Graphics::PixelFormat format) {
85 	uint8 r, g, b;
86 	int Y, u, v;
87 
88 	assert(format.bytesPerPixel == 2);
89 
90 	// Allocate the YUV/LUT buffers on the fly if needed.
91 	if (RGBtoYUV == 0)
92 		RGBtoYUV = (uint32 *)malloc(65536 * sizeof(uint32));
93 
94 	if (!RGBtoYUV)
95 		error("[InitLUT] Cannot allocate memory for YUV/LUT buffers");
96 
97 	for (int color = 0; color < 65536; ++color) {
98 		format.colorToRGB(color, r, g, b);
99 		Y = (r + g + b) >> 2;
100 		u = 128 + ((r - b) >> 2);
101 		v = 128 + ((-r + 2 * g - b) >> 3);
102 		RGBtoYUV[color] = (Y << 16) | (u << 8) | v;
103 	}
104 
105 #ifdef USE_NASM
106 	hqx_lowbits  = (1 << format.rShift) | (1 << format.gShift) | (1 << format.bShift),
107 	hqx_low2bits = (3 << format.rShift) | (3 << format.gShift) | (3 << format.bShift),
108 	hqx_low3bits = (7 << format.rShift) | (7 << format.gShift) | (7 << format.bShift),
109 
110 	hqx_highbits = format.RGBToColor(255, 255, 255) ^ hqx_lowbits;
111 
112 	// FIXME: The following code only does the right thing
113 	// if the color order is RGB or BGR, i.e., green is in the middle.
114 	hqx_greenMask   = format.RGBToColor(  0, 255,   0);
115 	hqx_redBlueMask = format.RGBToColor(255,   0, 255);
116 
117 	hqx_green_redBlue_Mask = (hqx_greenMask << 16) | hqx_redBlueMask;
118 #endif
119 }
120 #endif
121 
122 
123 /** Lookup table for the DotMatrix scaler. */
124 uint16 g_dotmatrix[16] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
125 
126 /** Init the scaler subsystem. */
InitScalers(uint32 BitFormat)127 void InitScalers(uint32 BitFormat) {
128 	gBitFormat = BitFormat;
129 
130 	// FIXME: The pixelformat should be param to this function, not the bitformat.
131 	// Until then, determine the pixelformat in other ways. Unfortunately,
132 	// calling OSystem::getOverlayFormat() here might not be safe on all ports.
133 	Graphics::PixelFormat format;
134 	if (gBitFormat == 555) {
135 		format = Graphics::createPixelFormat<555>();
136 	} else if (gBitFormat == 565) {
137 		format = Graphics::createPixelFormat<565>();
138 	} else {
139 		assert(g_system);
140 		format = g_system->getOverlayFormat();
141 	}
142 
143 #ifdef USE_HQ_SCALERS
144 	InitLUT(format);
145 #endif
146 
147 	// Build dotmatrix lookup table for the DotMatrix scaler.
148 	g_dotmatrix[0] = g_dotmatrix[10] = format.RGBToColor( 0, 63,  0);
149 	g_dotmatrix[1] = g_dotmatrix[11] = format.RGBToColor( 0,  0, 63);
150 	g_dotmatrix[2] = g_dotmatrix[ 8] = format.RGBToColor(63,  0,  0);
151 	g_dotmatrix[4] = g_dotmatrix[ 6] =
152 		g_dotmatrix[12] = g_dotmatrix[14] = format.RGBToColor(63, 63, 63);
153 }
154 
DestroyScalers()155 void DestroyScalers() {
156 #ifdef USE_HQ_SCALERS
157 	free(RGBtoYUV);
158 	RGBtoYUV = 0;
159 #endif
160 }
161 
162 
163 /**
164  * Trivial 'scaler' - in fact it doesn't do any scaling but just copies the
165  * source to the destination.
166  */
Normal1x(const uint8 * srcPtr,uint32 srcPitch,uint8 * dstPtr,uint32 dstPitch,int width,int height)167 void Normal1x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch,
168 							int width, int height) {
169 	// Spot the case when it can all be done in 1 hit
170 	if ((srcPitch == sizeof(uint16) * (uint)width) && (dstPitch == sizeof(uint16) * (uint)width)) {
171 		memcpy(dstPtr, srcPtr, sizeof(uint16) * width * height);
172 		return;
173 	}
174 	while (height--) {
175 		memcpy(dstPtr, srcPtr, sizeof(uint16) * width);
176 		srcPtr += srcPitch;
177 		dstPtr += dstPitch;
178 	}
179 }
180 
181 #ifdef USE_SCALERS
182 
183 
184 #ifdef USE_ARM_SCALER_ASM
185 extern "C" void Normal2xARM(const uint8  *srcPtr,
186                                   uint32  srcPitch,
187                                   uint8  *dstPtr,
188                                   uint32  dstPitch,
189                                   int     width,
190                                   int     height);
191 
Normal2x(const uint8 * srcPtr,uint32 srcPitch,uint8 * dstPtr,uint32 dstPitch,int width,int height)192 void Normal2x(const uint8  *srcPtr,
193                     uint32  srcPitch,
194                     uint8  *dstPtr,
195                     uint32  dstPitch,
196                     int     width,
197                     int     height) {
198 	Normal2xARM(srcPtr, srcPitch, dstPtr, dstPitch, width, height);
199 }
200 
201 #else
202 /**
203  * Trivial nearest-neighbor 2x scaler.
204  */
Normal2x(const uint8 * srcPtr,uint32 srcPitch,uint8 * dstPtr,uint32 dstPitch,int width,int height)205 void Normal2x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch,
206 							int width, int height) {
207 	uint8 *r;
208 
209 	assert(IS_ALIGNED(dstPtr, 4));
210 	while (height--) {
211 		r = dstPtr;
212 		for (int i = 0; i < width; ++i, r += 4) {
213 			uint32 color = *(((const uint16 *)srcPtr) + i);
214 
215 			color |= color << 16;
216 
217 			*(uint32 *)(r) = color;
218 			*(uint32 *)(r + dstPitch) = color;
219 		}
220 		srcPtr += srcPitch;
221 		dstPtr += dstPitch << 1;
222 	}
223 }
224 #endif
225 
226 /**
227  * Trivial nearest-neighbor 3x scaler.
228  */
Normal3x(const uint8 * srcPtr,uint32 srcPitch,uint8 * dstPtr,uint32 dstPitch,int width,int height)229 void Normal3x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch,
230 							int width, int height) {
231 	uint8 *r;
232 	const uint32 dstPitch2 = dstPitch * 2;
233 	const uint32 dstPitch3 = dstPitch * 3;
234 
235 	assert(IS_ALIGNED(dstPtr, 2));
236 	while (height--) {
237 		r = dstPtr;
238 		for (int i = 0; i < width; ++i, r += 6) {
239 			uint16 color = *(((const uint16 *)srcPtr) + i);
240 
241 			*(uint16 *)(r + 0) = color;
242 			*(uint16 *)(r + 2) = color;
243 			*(uint16 *)(r + 4) = color;
244 			*(uint16 *)(r + 0 + dstPitch) = color;
245 			*(uint16 *)(r + 2 + dstPitch) = color;
246 			*(uint16 *)(r + 4 + dstPitch) = color;
247 			*(uint16 *)(r + 0 + dstPitch2) = color;
248 			*(uint16 *)(r + 2 + dstPitch2) = color;
249 			*(uint16 *)(r + 4 + dstPitch2) = color;
250 		}
251 		srcPtr += srcPitch;
252 		dstPtr += dstPitch3;
253 	}
254 }
255 
256 #define interpolate_1_1		interpolate16_1_1<ColorMask>
257 #define interpolate_1_1_1_1	interpolate16_1_1_1_1<ColorMask>
258 
259 /**
260  * Trivial nearest-neighbor 1.5x scaler.
261  */
262 template<typename ColorMask>
Normal1o5xTemplate(const uint8 * srcPtr,uint32 srcPitch,uint8 * dstPtr,uint32 dstPitch,int width,int height)263 void Normal1o5xTemplate(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch,
264 							int width, int height) {
265 	uint8 *r;
266 	const uint32 dstPitch2 = dstPitch * 2;
267 	const uint32 dstPitch3 = dstPitch * 3;
268 	const uint32 srcPitch2 = srcPitch * 2;
269 
270 	assert(IS_ALIGNED(dstPtr, 2));
271 	while (height > 0) {
272 		r = dstPtr;
273 		for (int i = 0; i < width; i += 2, r += 6) {
274 			uint16 color0 = *(((const uint16 *)srcPtr) + i);
275 			uint16 color1 = *(((const uint16 *)srcPtr) + i + 1);
276 			uint16 color2 = *(((const uint16 *)(srcPtr + srcPitch)) + i);
277 			uint16 color3 = *(((const uint16 *)(srcPtr + srcPitch)) + i + 1);
278 
279 			*(uint16 *)(r + 0) = color0;
280 			*(uint16 *)(r + 2) = interpolate_1_1(color0, color1);
281 			*(uint16 *)(r + 4) = color1;
282 			*(uint16 *)(r + 0 + dstPitch) = interpolate_1_1(color0, color2);
283 			*(uint16 *)(r + 2 + dstPitch) = interpolate_1_1_1_1(color0, color1, color2, color3);
284 			*(uint16 *)(r + 4 + dstPitch) = interpolate_1_1(color1, color3);
285 			*(uint16 *)(r + 0 + dstPitch2) = color2;
286 			*(uint16 *)(r + 2 + dstPitch2) = interpolate_1_1(color2, color3);
287 			*(uint16 *)(r + 4 + dstPitch2) = color3;
288 		}
289 		srcPtr += srcPitch2;
290 		dstPtr += dstPitch3;
291 		height -= 2;
292 	}
293 }
294 
Normal1o5x(const uint8 * srcPtr,uint32 srcPitch,uint8 * dstPtr,uint32 dstPitch,int width,int height)295 void Normal1o5x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
296 	if (gBitFormat == 565)
297 		Normal1o5xTemplate<Graphics::ColorMasks<565> >(srcPtr, srcPitch, dstPtr, dstPitch, width, height);
298 	else
299 		Normal1o5xTemplate<Graphics::ColorMasks<555> >(srcPtr, srcPitch, dstPtr, dstPitch, width, height);
300 }
301 
302 /**
303  * The Scale2x filter, also known as AdvMame2x.
304  * See also http://scale2x.sourceforge.net
305  */
AdvMame2x(const uint8 * srcPtr,uint32 srcPitch,uint8 * dstPtr,uint32 dstPitch,int width,int height)306 void AdvMame2x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch,
307 							 int width, int height) {
308 	scale(2, dstPtr, dstPitch, srcPtr - srcPitch, srcPitch, 2, width, height);
309 }
310 
311 /**
312  * The Scale3x filter, also known as AdvMame3x.
313  * See also http://scale2x.sourceforge.net
314  */
AdvMame3x(const uint8 * srcPtr,uint32 srcPitch,uint8 * dstPtr,uint32 dstPitch,int width,int height)315 void AdvMame3x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch,
316 							 int width, int height) {
317 	scale(3, dstPtr, dstPitch, srcPtr - srcPitch, srcPitch, 2, width, height);
318 }
319 
320 template<typename ColorMask>
TV2xTemplate(const uint8 * srcPtr,uint32 srcPitch,uint8 * dstPtr,uint32 dstPitch,int width,int height)321 void TV2xTemplate(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch,
322 					int width, int height) {
323 	const uint32 nextlineSrc = srcPitch / sizeof(uint16);
324 	const uint16 *p = (const uint16 *)srcPtr;
325 
326 	const uint32 nextlineDst = dstPitch / sizeof(uint16);
327 	uint16 *q = (uint16 *)dstPtr;
328 
329 	while (height--) {
330 		for (int i = 0, j = 0; i < width; ++i, j += 2) {
331 			uint16 p1 = *(p + i);
332 			uint32 pi;
333 
334 			pi = (((p1 & ColorMask::kRedBlueMask) * 7) >> 3) & ColorMask::kRedBlueMask;
335 			pi |= (((p1 & ColorMask::kGreenMask) * 7) >> 3) & ColorMask::kGreenMask;
336 
337 			*(q + j) = p1;
338 			*(q + j + 1) = p1;
339 			*(q + j + nextlineDst) = (uint16)pi;
340 			*(q + j + nextlineDst + 1) = (uint16)pi;
341 		}
342 		p += nextlineSrc;
343 		q += nextlineDst << 1;
344 	}
345 }
346 
TV2x(const uint8 * srcPtr,uint32 srcPitch,uint8 * dstPtr,uint32 dstPitch,int width,int height)347 void TV2x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
348 	if (gBitFormat == 565)
349 		TV2xTemplate<Graphics::ColorMasks<565> >(srcPtr, srcPitch, dstPtr, dstPitch, width, height);
350 	else
351 		TV2xTemplate<Graphics::ColorMasks<555> >(srcPtr, srcPitch, dstPtr, dstPitch, width, height);
352 }
353 
DOT_16(const uint16 * dotmatrix,uint16 c,int j,int i)354 static inline uint16 DOT_16(const uint16 *dotmatrix, uint16 c, int j, int i) {
355 	return c - ((c >> 2) & dotmatrix[((j & 3) << 2) + (i & 3)]);
356 }
357 
358 
359 // FIXME: This scaler doesn't quite work. Either it needs to know where on the
360 // screen it's drawing, or the dirty rects will have to be adjusted so that
361 // access to the dotmatrix array are made in a consistent way. (Doing that in
362 // a way that also works together with aspect-ratio correction is left as an
363 // exercise for the reader.)
364 
DotMatrix(const uint8 * srcPtr,uint32 srcPitch,uint8 * dstPtr,uint32 dstPitch,int width,int height)365 void DotMatrix(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch,
366 					int width, int height) {
367 
368 	const uint16 *dotmatrix = g_dotmatrix;
369 
370 	const uint32 nextlineSrc = srcPitch / sizeof(uint16);
371 	const uint16 *p = (const uint16 *)srcPtr;
372 
373 	const uint32 nextlineDst = dstPitch / sizeof(uint16);
374 	uint16 *q = (uint16 *)dstPtr;
375 
376 	for (int j = 0, jj = 0; j < height; ++j, jj += 2) {
377 		for (int i = 0, ii = 0; i < width; ++i, ii += 2) {
378 			uint16 c = *(p + i);
379 			*(q + ii) = DOT_16(dotmatrix, c, jj, ii);
380 			*(q + ii + 1) = DOT_16(dotmatrix, c, jj, ii + 1);
381 			*(q + ii + nextlineDst) = DOT_16(dotmatrix, c, jj + 1, ii);
382 			*(q + ii + nextlineDst + 1) = DOT_16(dotmatrix, c, jj + 1, ii + 1);
383 		}
384 		p += nextlineSrc;
385 		q += nextlineDst << 1;
386 	}
387 }
388 
389 #endif // #ifdef USE_SCALERS
390