1 /***************************************************************************
2 *   Copyright (C) 2010 PCSX4ALL Team                                      *
3 *   Copyright (C) 2010 Unai                                               *
4 *                                                                         *
5 *   This program is free software; you can redistribute it and/or modify  *
6 *   it under the terms of the GNU General Public License as published by  *
7 *   the Free Software Foundation; either version 2 of the License, or     *
8 *   (at your option) any later version.                                   *
9 *                                                                         *
10 *   This program is distributed in the hope that it will be useful,       *
11 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
12 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
13 *   GNU General Public License for more details.                          *
14 *                                                                         *
15 *   You should have received a copy of the GNU General Public License     *
16 *   along with this program; if not, write to the                         *
17 *   Free Software Foundation, Inc.,                                       *
18 *   51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA.           *
19 ***************************************************************************/
20 
21 #ifndef _OP_BLEND_H_
22 #define _OP_BLEND_H_
23 
24 //  GPU Blending operations functions
25 
26 ////////////////////////////////////////////////////////////////////////////////
27 // Blend bgr555 color in 'uSrc' (foreground) with bgr555 color
28 //  in 'uDst' (background), returning resulting color.
29 //
30 // INPUT:
31 //  'uSrc','uDst' input: -bbbbbgggggrrrrr
32 //                       ^ bit 16
33 // OUTPUT:
34 //           u16 output: 0bbbbbgggggrrrrr
35 //                       ^ bit 16
36 // RETURNS:
37 // Where '0' is zero-padding, and '-' is don't care
38 ////////////////////////////////////////////////////////////////////////////////
39 template <int BLENDMODE, bool SKIP_USRC_MSB_MASK>
gpuBlendingGeneric(uint_fast16_t uSrc,uint_fast16_t uDst)40 GPU_INLINE uint_fast16_t gpuBlendingGeneric(uint_fast16_t uSrc, uint_fast16_t uDst)
41 {
42 	// These use Blargg's bitwise modulo-clamping:
43 	//  http://blargg.8bitalley.com/info/rgb_mixing.html
44 	//  http://blargg.8bitalley.com/info/rgb_clamped_add.html
45 	//  http://blargg.8bitalley.com/info/rgb_clamped_sub.html
46 
47 	uint_fast16_t mix;
48 
49 	// 0.5 x Back + 0.5 x Forward
50 	if (BLENDMODE==0) {
51 #ifdef GPU_UNAI_USE_ACCURATE_BLENDING
52 		// Slower, but more accurate (doesn't lose LSB data)
53 		uDst &= 0x7fff;
54 		if (!SKIP_USRC_MSB_MASK)
55 			uSrc &= 0x7fff;
56 		mix = ((uSrc + uDst) - ((uSrc ^ uDst) & 0x0421)) >> 1;
57 #else
58 		mix = ((uDst & 0x7bde) + (uSrc & 0x7bde)) >> 1;
59 #endif
60 	}
61 
62 	// 1.0 x Back + 1.0 x Forward
63 	if (BLENDMODE==1) {
64 		uDst &= 0x7fff;
65 		if (!SKIP_USRC_MSB_MASK)
66 			uSrc &= 0x7fff;
67 		u32 sum      = uSrc + uDst;
68 		u32 low_bits = (uSrc ^ uDst) & 0x0421;
69 		u32 carries  = (sum - low_bits) & 0x8420;
70 		u32 modulo   = sum - carries;
71 		u32 clamp    = carries - (carries >> 5);
72 		mix = modulo | clamp;
73 	}
74 
75 	// 1.0 x Back - 1.0 x Forward
76 	if (BLENDMODE==2) {
77 		uDst &= 0x7fff;
78 		if (!SKIP_USRC_MSB_MASK)
79 			uSrc &= 0x7fff;
80 		u32 diff     = uDst - uSrc + 0x8420;
81 		u32 low_bits = (uDst ^ uSrc) & 0x8420;
82 		u32 borrows  = (diff - low_bits) & 0x8420;
83 		u32 modulo   = diff - borrows;
84 		u32 clamp    = borrows - (borrows >> 5);
85 		mix = modulo & clamp;
86 	}
87 
88 	// 1.0 x Back + 0.25 x Forward
89 	if (BLENDMODE==3) {
90 		uDst &= 0x7fff;
91 		uSrc = ((uSrc >> 2) & 0x1ce7);
92 		u32 sum      = uSrc + uDst;
93 		u32 low_bits = (uSrc ^ uDst) & 0x0421;
94 		u32 carries  = (sum - low_bits) & 0x8420;
95 		u32 modulo   = sum - carries;
96 		u32 clamp    = carries - (carries >> 5);
97 		mix = modulo | clamp;
98 	}
99 
100 	return mix;
101 }
102 
103 
104 ////////////////////////////////////////////////////////////////////////////////
105 // Convert bgr555 color in uSrc to padded u32 5.4:5.4:5.4 bgr fixed-pt
106 //  color triplet suitable for use with HQ 24-bit quantization.
107 //
108 // INPUT:
109 //       'uDst' input: -bbbbbgggggrrrrr
110 //                     ^ bit 16
111 // RETURNS:
112 //         u32 output: 000bbbbbXXXX0gggggXXXX0rrrrrXXXX
113 //                     ^ bit 31
114 // Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care
115 ////////////////////////////////////////////////////////////////////////////////
gpuGetRGB24(uint_fast16_t uSrc)116 GPU_INLINE u32 gpuGetRGB24(uint_fast16_t uSrc)
117 {
118 	return ((uSrc & 0x7C00)<<14)
119 	     | ((uSrc & 0x03E0)<< 9)
120 	     | ((uSrc & 0x001F)<< 4);
121 }
122 
123 
124 ////////////////////////////////////////////////////////////////////////////////
125 // Blend padded u32 5.4:5.4:5.4 bgr fixed-pt color triplet in 'uSrc24'
126 //  (foreground color) with bgr555 color in 'uDst' (background color),
127 //  returning the resulting u32 5.4:5.4:5.4 color.
128 //
129 // INPUT:
130 //     'uSrc24' input: 000bbbbbXXXX0gggggXXXX0rrrrrXXXX
131 //                     ^ bit 31
132 //       'uDst' input: -bbbbbgggggrrrrr
133 //                     ^ bit 16
134 // RETURNS:
135 //         u32 output: 000bbbbbXXXX0gggggXXXX0rrrrrXXXX
136 //                     ^ bit 31
137 // Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care
138 ////////////////////////////////////////////////////////////////////////////////
139 template <int BLENDMODE>
gpuBlending24(u32 uSrc24,uint_fast16_t uDst)140 GPU_INLINE u32 gpuBlending24(u32 uSrc24, uint_fast16_t uDst)
141 {
142 	// These use techniques adapted from Blargg's techniques mentioned in
143 	//  in gpuBlending() comments above. Not as much bitwise trickery is
144 	//  necessary because of presence of 0 padding in uSrc24 format.
145 
146 	u32 uDst24 = gpuGetRGB24(uDst);
147 	u32 mix;
148 
149 	// 0.5 x Back + 0.5 x Forward
150 	if (BLENDMODE==0) {
151 		const u32 uMsk = 0x1FE7F9FE;
152 		// Only need to mask LSBs of uSrc24, uDst24's LSBs are 0 already
153 		mix = (uDst24 + (uSrc24 & uMsk)) >> 1;
154 	}
155 
156 	// 1.0 x Back + 1.0 x Forward
157 	if (BLENDMODE==1) {
158 		u32 sum     = uSrc24 + uDst24;
159 		u32 carries = sum & 0x20080200;
160 		u32 modulo  = sum - carries;
161 		u32 clamp   = carries - (carries >> 9);
162 		mix = modulo | clamp;
163 	}
164 
165 	// 1.0 x Back - 1.0 x Forward
166 	if (BLENDMODE==2) {
167 		// Insert ones in 0-padded borrow slot of color to be subtracted from
168 		uDst24 |= 0x20080200;
169 		u32 diff    = uDst24 - uSrc24;
170 		u32 borrows = diff & 0x20080200;
171 		u32 clamp   = borrows - (borrows >> 9);
172 		mix = diff & clamp;
173 	}
174 
175 	// 1.0 x Back + 0.25 x Forward
176 	if (BLENDMODE==3) {
177 		uSrc24 = (uSrc24 & 0x1FC7F1FC) >> 2;
178 		u32 sum     = uSrc24 + uDst24;
179 		u32 carries = sum & 0x20080200;
180 		u32 modulo  = sum - carries;
181 		u32 clamp   = carries - (carries >> 9);
182 		mix = modulo | clamp;
183 	}
184 
185 	return mix;
186 }
187 
188 #endif  //_OP_BLEND_H_
189