1 /*
2   Copyright (C) 2001 artofcode LLC.
3 
4   This program is free software; you can redistribute it and/or modify it
5   under the terms of the GNU General Public License as published by the
6   Free Software Foundation; either version 2 of the License, or (at your
7   option) any later version.
8 
9   This program is distributed in the hope that it will be useful, but
10   WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12   General Public License for more details.
13 
14   You should have received a copy of the GNU General Public License along
15   with this program; if not, write to the Free Software Foundation, Inc.,
16   59 Temple Place, Suite 330, Boston, MA, 02111-1307.
17 
18 
19   Author: Raph Levien <raph@artofcode.com>
20 */
21 /*$Id: gxblend.c,v 1.2.2.1.2.1 2003/01/17 00:49:03 giles Exp $ */
22 /* PDF 1.4 blending functions */
23 
24 #include "memory_.h"
25 #include "gx.h"
26 #include "gstparam.h"
27 #include "gxblend.h"
28 
29 typedef int art_s32;
30 
31 static void
art_blend_luminosity_rgb_8(byte * dst,const byte * backdrop,const byte * src)32 art_blend_luminosity_rgb_8(byte *dst, const byte *backdrop,
33 			   const byte *src)
34 {
35     int rb = backdrop[0], gb = backdrop[1], bb = backdrop[2];
36     int rs = src[0], gs = src[1], bs = src[2];
37     int delta_y;
38     int r, g, b;
39 
40     delta_y = ((rs - rb) * 77 + (gs - gb) * 151 + (bs - bb) * 28 + 0x80) >> 8;
41     r = rb + delta_y;
42     g = gb + delta_y;
43     b = bb + delta_y;
44     if ((r | g | b) & 0x100) {
45 	int y;
46 	int scale;
47 
48 	y = (rs * 77 + gs * 151 + bs * 28 + 0x80) >> 8;
49 	if (delta_y > 0) {
50 	    int max;
51 
52 	    max = r > g ? r : g;
53 	    max = b > max ? b : max;
54 	    scale = ((255 - y) << 16) / (max - y);
55 	} else {
56 	    int min;
57 
58 	    min = r < g ? r : g;
59 	    min = b < min ? b : min;
60 	    scale = (y << 16) / (y - min);
61 	}
62 	r = y + (((r - y) * scale + 0x8000) >> 16);
63 	g = y + (((g - y) * scale + 0x8000) >> 16);
64 	b = y + (((b - y) * scale + 0x8000) >> 16);
65     }
66     dst[0] = r;
67     dst[1] = g;
68     dst[2] = b;
69 }
70 
71 static void
art_blend_saturation_rgb_8(byte * dst,const byte * backdrop,const byte * src)72 art_blend_saturation_rgb_8(byte *dst, const byte *backdrop,
73 			   const byte *src)
74 {
75     int rb = backdrop[0], gb = backdrop[1], bb = backdrop[2];
76     int rs = src[0], gs = src[1], bs = src[2];
77     int minb, maxb;
78     int mins, maxs;
79     int y;
80     int scale;
81     int r, g, b;
82 
83     minb = rb < gb ? rb : gb;
84     minb = minb < bb ? minb : bb;
85     maxb = rb > gb ? rb : gb;
86     maxb = maxb > bb ? maxb : bb;
87     if (minb == maxb) {
88 	/* backdrop has zero saturation, avoid divide by 0 */
89 	dst[0] = gb;
90 	dst[1] = gb;
91 	dst[2] = gb;
92 	return;
93     }
94 
95     mins = rs < gs ? rs : gs;
96     mins = mins < bs ? mins : bs;
97     maxs = rs > gs ? rs : gs;
98     maxs = maxs > bs ? maxs : bs;
99 
100     scale = ((maxs - mins) << 16) / (maxb - minb);
101     y = (rb * 77 + gb * 151 + bb * 28 + 0x80) >> 8;
102     r = y + ((((rb - y) * scale) + 0x8000) >> 16);
103     g = y + ((((gb - y) * scale) + 0x8000) >> 16);
104     b = y + ((((bb - y) * scale) + 0x8000) >> 16);
105 
106     if ((r | g | b) & 0x100) {
107 	int scalemin, scalemax;
108 	int min, max;
109 
110 	min = r < g ? r : g;
111 	min = min < b ? min : b;
112 	max = r > g ? r : g;
113 	max = max > b ? max : b;
114 
115 	if (min < 0)
116 	    scalemin = (y << 16) / (y - min);
117 	else
118 	    scalemin = 0x10000;
119 
120 	if (max > 255)
121 	    scalemax = ((255 - y) << 16) / (max - y);
122 	else
123 	    scalemax = 0x10000;
124 
125 	scale = scalemin < scalemax ? scalemin : scalemax;
126 	r = y + (((r - y) * scale + 0x8000) >> 16);
127 	g = y + (((g - y) * scale + 0x8000) >> 16);
128 	b = y + (((b - y) * scale + 0x8000) >> 16);
129     }
130 
131     dst[0] = r;
132     dst[1] = g;
133     dst[2] = b;
134 }
135 
136 /* This array consists of floor ((x - x * x / 255.0) * 65536 / 255 +
137    0.5) for x in [0..255]. */
138 const unsigned int art_blend_sq_diff_8[256] = {
139     0, 256, 510, 762, 1012, 1260, 1506, 1750, 1992, 2231, 2469, 2705,
140     2939, 3171, 3401, 3628, 3854, 4078, 4300, 4519, 4737, 4953, 5166,
141     5378, 5588, 5795, 6001, 6204, 6406, 6606, 6803, 6999, 7192, 7384,
142     7573, 7761, 7946, 8129, 8311, 8490, 8668, 8843, 9016, 9188, 9357,
143     9524, 9690, 9853, 10014, 10173, 10331, 10486, 10639, 10790, 10939,
144     11086, 11232, 11375, 11516, 11655, 11792, 11927, 12060, 12191, 12320,
145     12447, 12572, 12695, 12816, 12935, 13052, 13167, 13280, 13390, 13499,
146     13606, 13711, 13814, 13914, 14013, 14110, 14205, 14297, 14388, 14477,
147     14564, 14648, 14731, 14811, 14890, 14967, 15041, 15114, 15184, 15253,
148     15319, 15384, 15446, 15507, 15565, 15622, 15676, 15729, 15779, 15827,
149     15874, 15918, 15960, 16001, 16039, 16075, 16110, 16142, 16172, 16200,
150     16227, 16251, 16273, 16293, 16311, 16327, 16341, 16354, 16364, 16372,
151     16378, 16382, 16384, 16384, 16382, 16378, 16372, 16364, 16354, 16341,
152     16327, 16311, 16293, 16273, 16251, 16227, 16200, 16172, 16142, 16110,
153     16075, 16039, 16001, 15960, 15918, 15874, 15827, 15779, 15729, 15676,
154     15622, 15565, 15507, 15446, 15384, 15319, 15253, 15184, 15114, 15041,
155     14967, 14890, 14811, 14731, 14648, 14564, 14477, 14388, 14297, 14205,
156     14110, 14013, 13914, 13814, 13711, 13606, 13499, 13390, 13280, 13167,
157     13052, 12935, 12816, 12695, 12572, 12447, 12320, 12191, 12060, 11927,
158     11792, 11655, 11516, 11375, 11232, 11086, 10939, 10790, 10639, 10486,
159     10331, 10173, 10014, 9853, 9690, 9524, 9357, 9188, 9016, 8843, 8668,
160     8490, 8311, 8129, 7946, 7761, 7573, 7384, 7192, 6999, 6803, 6606,
161     6406, 6204, 6001, 5795, 5588, 5378, 5166, 4953, 4737, 4519, 4300,
162     4078, 3854, 3628, 3401, 3171, 2939, 2705, 2469, 2231, 1992, 1750,
163     1506, 1260, 1012, 762, 510, 256, 0
164 };
165 
166 /* This array consists of SoftLight (x, 255) - x, for values of x in
167    the range [0..255] (normalized to [0..255 range). The original
168    values were directly sampled from Adobe Illustrator 9. I've fit a
169    quadratic spline to the SoftLight (x, 1) function as follows
170    (normalized to [0..1] range):
171 
172    Anchor point (0, 0)
173    Control point (0.0755, 0.302)
174    Anchor point (0.18, 0.4245)
175    Control point (0.4263, 0.7131)
176    Anchor point (1, 1)
177 
178    I don't believe this is _exactly_ the function that Adobe uses,
179    but it really should be close enough for all practical purposes.  */
180 const byte art_blend_soft_light_8[256] = {
181     0, 3, 6, 9, 11, 14, 16, 19, 21, 23, 26, 28, 30, 32, 33, 35, 37, 39,
182     40, 42, 43, 45, 46, 47, 48, 49, 51, 52, 53, 53, 54, 55, 56, 57, 57,
183     58, 58, 59, 60, 60, 60, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63, 63,
184     63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
185     64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 62, 62, 62,
186     62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 59, 59,
187     59, 59, 59, 58, 58, 58, 58, 57, 57, 57, 57, 56, 56, 56, 56, 55, 55,
188     55, 55, 54, 54, 54, 54, 53, 53, 53, 52, 52, 52, 51, 51, 51, 51, 50,
189     50, 50, 49, 49, 49, 48, 48, 48, 47, 47, 47, 46, 46, 46, 45, 45, 45,
190     44, 44, 43, 43, 43, 42, 42, 42, 41, 41, 40, 40, 40, 39, 39, 39, 38,
191     38, 37, 37, 37, 36, 36, 35, 35, 35, 34, 34, 33, 33, 33, 32, 32, 31,
192     31, 31, 30, 30, 29, 29, 28, 28, 28, 27, 27, 26, 26, 25, 25, 25, 24,
193     24, 23, 23, 22, 22, 21, 21, 21, 20, 20, 19, 19, 18, 18, 17, 17, 16,
194     16, 15, 15, 15, 14, 14, 13, 13, 12, 12, 11, 11, 10, 10, 9, 9, 8, 8, 7,
195     7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0, 0
196 };
197 
198 void
art_blend_pixel_8(byte * dst,const byte * backdrop,const byte * src,int n_chan,gs_blend_mode_t blend_mode)199 art_blend_pixel_8(byte *dst, const byte *backdrop,
200 		  const byte *src, int n_chan, gs_blend_mode_t blend_mode)
201 {
202     int i;
203     byte b, s;
204     bits32 t;
205 
206     switch (blend_mode) {
207 	case BLEND_MODE_Normal:
208 	case BLEND_MODE_Compatible:	/* todo */
209 	    memcpy(dst, src, n_chan);
210 	    break;
211 	case BLEND_MODE_Multiply:
212 	    for (i = 0; i < n_chan; i++) {
213 		t = ((bits32) backdrop[i]) * ((bits32) src[i]);
214 		t += 0x80;
215 		t += (t >> 8);
216 		dst[i] = t >> 8;
217 	    }
218 	    break;
219 	case BLEND_MODE_Screen:
220 	    for (i = 0; i < n_chan; i++) {
221 		t =
222 		    ((bits32) (0xff - backdrop[i])) *
223 		    ((bits32) (0xff - src[i]));
224 		t += 0x80;
225 		t += (t >> 8);
226 		dst[i] = 0xff - (t >> 8);
227 	    }
228 	    break;
229 	case BLEND_MODE_Overlay:
230 	    for (i = 0; i < n_chan; i++) {
231 		b = backdrop[i];
232 		s = src[i];
233 		if (b < 0x80)
234 		    t = 2 * ((bits32) b) * ((bits32) s);
235 		else
236 		    t = 0xfe01 -
237 			2 * ((bits32) (0xff - b)) * ((bits32) (0xff - s));
238 		t += 0x80;
239 		t += (t >> 8);
240 		dst[i] = t >> 8;
241 	    }
242 	    break;
243 	case BLEND_MODE_SoftLight:
244 	    for (i = 0; i < n_chan; i++) {
245 		b = backdrop[i];
246 		s = src[i];
247 		if (s < 0x80) {
248 		    t = (0xff - (s << 1)) * art_blend_sq_diff_8[b];
249 		    t += 0x8000;
250 		    dst[i] = b - (t >> 16);
251 		} else {
252 		    t =
253 			((s << 1) -
254 			 0xff) * ((bits32) (art_blend_soft_light_8[b]));
255 		    t += 0x80;
256 		    t += (t >> 8);
257 		    dst[i] = b + (t >> 8);
258 		}
259 	    }
260 	    break;
261 	case BLEND_MODE_HardLight:
262 	    for (i = 0; i < n_chan; i++) {
263 		b = backdrop[i];
264 		s = src[i];
265 		if (s < 0x80)
266 		    t = 2 * ((bits32) b) * ((bits32) s);
267 		else
268 		    t = 0xfe01 -
269 			2 * ((bits32) (0xff - b)) * ((bits32) (0xff - s));
270 		t += 0x80;
271 		t += (t >> 8);
272 		dst[i] = t >> 8;
273 	    }
274 	    break;
275 	case BLEND_MODE_ColorDodge:
276 	    for (i = 0; i < n_chan; i++) {
277 		b = backdrop[i];
278 		s = 0xff - src[i];
279 		if (b == 0)
280 		    dst[i] = 0;
281 		else if (b >= s)
282 		    dst[i] = 0xff;
283 		else
284 		    dst[i] = (0x1fe * b + s) / (s << 1);
285 	    }
286 	    break;
287 	case BLEND_MODE_ColorBurn:
288 	    for (i = 0; i < n_chan; i++) {
289 		b = 0xff - backdrop[i];
290 		s = src[i];
291 		if (b == 0)
292 		    dst[i] = 0xff;
293 		else if (b >= s)
294 		    dst[i] = 0;
295 		else
296 		    dst[i] = 0xff - (0x1fe * b + s) / (s << 1);
297 	    }
298 	    break;
299 	case BLEND_MODE_Darken:
300 	    for (i = 0; i < n_chan; i++) {
301 		b = backdrop[i];
302 		s = src[i];
303 		dst[i] = b < s ? b : s;
304 	    }
305 	    break;
306 	case BLEND_MODE_Lighten:
307 	    for (i = 0; i < n_chan; i++) {
308 		b = backdrop[i];
309 		s = src[i];
310 		dst[i] = b > s ? b : s;
311 	    }
312 	    break;
313 	case BLEND_MODE_Difference:
314 	    for (i = 0; i < n_chan; i++) {
315 		art_s32 tmp;
316 
317 		tmp = ((art_s32) backdrop[i]) - ((art_s32) src[i]);
318 		dst[i] = tmp < 0 ? -tmp : tmp;
319 	    }
320 	    break;
321 	case BLEND_MODE_Exclusion:
322 	    for (i = 0; i < n_chan; i++) {
323 		b = backdrop[i];
324 		s = src[i];
325 		t = ((bits32) (0xff - b)) * ((bits32) s) +
326 		    ((bits32) b) * ((bits32) (0xff - s));
327 		t += 0x80;
328 		t += (t >> 8);
329 		dst[i] = t >> 8;
330 	    }
331 	    break;
332 	case BLEND_MODE_Luminosity:
333 	    art_blend_luminosity_rgb_8(dst, backdrop, src);
334 	    break;
335 	case BLEND_MODE_Color:
336 	    art_blend_luminosity_rgb_8(dst, src, backdrop);
337 	    break;
338 	case BLEND_MODE_Saturation:
339 	    art_blend_saturation_rgb_8(dst, backdrop, src);
340 	    break;
341 	case BLEND_MODE_Hue:
342 	    {
343 		byte tmp[3];
344 
345 		art_blend_luminosity_rgb_8(tmp, src, backdrop);
346 		art_blend_saturation_rgb_8(dst, tmp, backdrop);
347 	    }
348 	    break;
349 	default:
350 	    dlprintf1("art_blend_pixel_8: blend mode %d not implemented\n",
351 		      blend_mode);
352 	    memcpy(dst, src, n_chan);
353 	    break;
354     }
355 }
356 
357 void
art_blend_pixel(ArtPixMaxDepth * dst,const ArtPixMaxDepth * backdrop,const ArtPixMaxDepth * src,int n_chan,gs_blend_mode_t blend_mode)358 art_blend_pixel(ArtPixMaxDepth* dst, const ArtPixMaxDepth *backdrop,
359 		const ArtPixMaxDepth* src, int n_chan,
360 		gs_blend_mode_t blend_mode)
361 {
362     int i;
363     ArtPixMaxDepth b, s;
364     bits32 t;
365 
366     switch (blend_mode) {
367 	case BLEND_MODE_Normal:
368 	case BLEND_MODE_Compatible:	/* todo */
369 	    memcpy(dst, src, n_chan * sizeof(ArtPixMaxDepth));
370 	    break;
371 	case BLEND_MODE_Multiply:
372 	    for (i = 0; i < n_chan; i++) {
373 		t = ((bits32) backdrop[i]) * ((bits32) src[i]);
374 		t += 0x8000;
375 		t += (t >> 16);
376 		dst[i] = t >> 16;
377 	    }
378 	    break;
379 	case BLEND_MODE_Screen:
380 	    for (i = 0; i < n_chan; i++) {
381 		t =
382 		    ((bits32) (0xffff - backdrop[i])) *
383 		    ((bits32) (0xffff - src[i]));
384 		t += 0x8000;
385 		t += (t >> 16);
386 		dst[i] = 0xffff - (t >> 16);
387 	    }
388 	    break;
389 	case BLEND_MODE_Overlay:
390 	    for (i = 0; i < n_chan; i++) {
391 		b = backdrop[i];
392 		s = src[i];
393 		if (b < 0x8000)
394 		    t = 2 * ((bits32) b) * ((bits32) s);
395 		else
396 		    t = 0xfffe0001u -
397 			2 * ((bits32) (0xffff - b)) * ((bits32) (0xffff - s));
398 		t += 0x8000;
399 		t += (t >> 16);
400 		dst[i] = t >> 16;
401 	    }
402 	    break;
403 	case BLEND_MODE_HardLight:
404 	    for (i = 0; i < n_chan; i++) {
405 		b = backdrop[i];
406 		s = src[i];
407 		if (s < 0x8000)
408 		    t = 2 * ((bits32) b) * ((bits32) s);
409 		else
410 		    t = 0xfffe0001u -
411 			2 * ((bits32) (0xffff - b)) * ((bits32) (0xffff - s));
412 		t += 0x8000;
413 		t += (t >> 16);
414 		dst[i] = t >> 16;
415 	    }
416 	    break;
417 	case BLEND_MODE_ColorDodge:
418 	    for (i = 0; i < n_chan; i++) {
419 		b = backdrop[i];
420 		s = src[i];
421 		if (b == 0)
422 		    dst[i] = 0;
423 		else if (s >= b)
424 		    dst[i] = 0xffff;
425 		else
426 		    dst[i] = (0x1fffe * s + b) / (b << 1);
427 	    }
428 	    break;
429 	case BLEND_MODE_ColorBurn:
430 	    for (i = 0; i < n_chan; i++) {
431 		b = 0xffff - backdrop[i];
432 		s = src[i];
433 		if (b == 0)
434 		    dst[i] = 0xffff;
435 		else if (b >= s)
436 		    dst[i] = 0;
437 		else
438 		    dst[i] = 0xffff - (0x1fffe * b + s) / (s << 1);
439 	    }
440 	case BLEND_MODE_Darken:
441 	    for (i = 0; i < n_chan; i++) {
442 		b = backdrop[i];
443 		s = src[i];
444 		dst[i] = b < s ? b : s;
445 	    }
446 	    break;
447 	case BLEND_MODE_Lighten:
448 	    for (i = 0; i < n_chan; i++) {
449 		b = backdrop[i];
450 		s = src[i];
451 		dst[i] = b > s ? b : s;
452 	    }
453 	    break;
454 	case BLEND_MODE_Difference:
455 	    for (i = 0; i < n_chan; i++) {
456 		art_s32 tmp;
457 
458 		tmp = ((art_s32) backdrop[i]) - ((art_s32) src[i]);
459 		dst[i] = tmp < 0 ? -tmp : tmp;
460 	    }
461 	    break;
462 	case BLEND_MODE_Exclusion:
463 	    for (i = 0; i < n_chan; i++) {
464 		b = backdrop[i];
465 		s = src[i];
466 		t = ((bits32) (0xffff - b)) * ((bits32) s) +
467 		    ((bits32) b) * ((bits32) (0xffff - s));
468 		t += 0x8000;
469 		t += (t >> 16);
470 		dst[i] = t >> 16;
471 	    }
472 	    break;
473 	default:
474 	    dlprintf1("art_blend_pixel: blend mode %d not implemented\n",
475 		      blend_mode);
476 	    memcpy(dst, src, n_chan);
477 	    break;
478     }
479 }
480 
481 byte
art_pdf_union_8(byte alpha1,byte alpha2)482 art_pdf_union_8(byte alpha1, byte alpha2)
483 {
484     int tmp;
485 
486     tmp = (0xff - alpha1) * (0xff - alpha2) + 0x80;
487     return 0xff - ((tmp + (tmp >> 8)) >> 8);
488 }
489 
490 byte
art_pdf_union_mul_8(byte alpha1,byte alpha2,byte alpha_mask)491 art_pdf_union_mul_8(byte alpha1, byte alpha2, byte alpha_mask)
492 {
493     int tmp;
494 
495     if (alpha_mask == 0xff) {
496 	tmp = (0xff - alpha1) * (0xff - alpha2) + 0x80;
497 	return 0xff - ((tmp + (tmp >> 8)) >> 8);
498     } else {
499 	tmp = alpha2 * alpha_mask + 0x80;
500 	tmp = (tmp + (tmp >> 8)) >> 8;
501 	tmp = (0xff - alpha1) * (0xff - tmp) + 0x80;
502 	return 0xff - ((tmp + (tmp >> 8)) >> 8);
503     }
504 }
505 
506 void
art_pdf_composite_pixel_alpha_8(byte * dst,const byte * src,int n_chan,gs_blend_mode_t blend_mode)507 art_pdf_composite_pixel_alpha_8(byte *dst, const byte *src, int n_chan,
508 				gs_blend_mode_t blend_mode)
509 {
510     byte a_b, a_s;
511     unsigned int a_r;
512     int tmp;
513     int src_scale;
514     int c_b, c_s;
515     int i;
516 
517     a_s = src[n_chan];
518     if (a_s == 0) {
519 	/* source alpha is zero, avoid all computations and possible
520 	   divide by zero errors. */
521 	return;
522     }
523 
524     a_b = dst[n_chan];
525     if (a_b == 0) {
526 	/* backdrop alpha is zero, just copy source pixels and avoid
527 	   computation. */
528 
529 	/* this idiom is faster than memcpy (dst, src, n_chan + 1); for
530 	   expected small values of n_chan. */
531 	for (i = 0; i <= n_chan >> 2; i++) {
532 	    ((bits32 *) dst)[i] = ((const bits32 *)src)[i];
533 	}
534 
535 	return;
536     }
537 
538     /* Result alpha is Union of backdrop and source alpha */
539     tmp = (0xff - a_b) * (0xff - a_s) + 0x80;
540     a_r = 0xff - (((tmp >> 8) + tmp) >> 8);
541     /* todo: verify that a_r is nonzero in all cases */
542 
543     /* Compute a_s / a_r in 16.16 format */
544     src_scale = ((a_s << 16) + (a_r >> 1)) / a_r;
545 
546     if (blend_mode == BLEND_MODE_Normal) {
547 	/* Do simple compositing of source over backdrop */
548 	for (i = 0; i < n_chan; i++) {
549 	    c_s = src[i];
550 	    c_b = dst[i];
551 	    tmp = (c_b << 16) + src_scale * (c_s - c_b) + 0x8000;
552 	    dst[i] = tmp >> 16;
553 	}
554     } else {
555 	/* Do compositing with blending */
556 	byte blend[ART_MAX_CHAN];
557 
558 	art_blend_pixel_8(blend, dst, src, n_chan, blend_mode);
559 	for (i = 0; i < n_chan; i++) {
560 	    int c_bl;		/* Result of blend function */
561 	    int c_mix;		/* Blend result mixed with source color */
562 
563 	    c_s = src[i];
564 	    c_b = dst[i];
565 	    c_bl = blend[i];
566 	    tmp = a_b * (c_bl - ((int)c_s)) + 0x80;
567 	    c_mix = c_s + (((tmp >> 8) + tmp) >> 8);
568 	    tmp = (c_b << 16) + src_scale * (c_mix - c_b) + 0x8000;
569 	    dst[i] = tmp >> 16;
570 	}
571     }
572     dst[n_chan] = a_r;
573 }
574 
575 #if 0
576 /**
577  * art_pdf_composite_pixel_knockout_8: Composite two pixels with knockout.
578  * @dst: Where to store resulting pixel, also immediate backdrop.
579  * @backdrop: Initial backdrop color.
580  * @src: Source pixel color.
581  * @n_chan: Number of channels.
582  * @blend_mode: Blend mode.
583  *
584  * Composites two pixels using the compositing operation specialized
585  * for knockout groups (Section 5.5). A few things to keep in mind:
586  *
587  * 1. This is a reference implementation, not a high-performance one.
588  *
589  * 2. All pixels are assumed to have a single alpha channel.
590  *
591  * 3. Zero is black, one is white.
592  *
593  * Also note that src and dst are expected to be allocated aligned to
594  * 32 bit boundaries, ie bytes from [0] to [(n_chan + 3) & -4] may
595  * be accessed.
596  *
597  * All pixel values have both alpha and shape channels, ie with those
598  * included the total number of channels is @n_chan + 2.
599  *
600  * An invariant: shape >= alpha.
601  **/
602 void
603 art_pdf_composite_pixel_knockout_8(byte *dst,
604 				   const byte *backdrop, const byte *src,
605 				   int n_chan, gs_blend_mode_t blend_mode)
606 {
607     int i;
608     byte ct[ART_MAX_CHAN + 1];
609     byte src_shape;
610     byte backdrop_alpha;
611     byte dst_alpha;
612     bits32 src_opacity;
613     bits32 backdrop_weight, t_weight;
614     int tmp;
615 
616     if (src[n_chan] == 0)
617 	return;
618     if (src[n_chan + 1] == 255 && blend_mode == BLEND_MODE_Normal ||
619 	dst[n_chan] == 0) {
620 	/* this idiom is faster than memcpy (dst, src, n_chan + 2); for
621 	   expected small values of n_chan. */
622 	for (i = 0; i <= (n_chan + 1) >> 2; i++) {
623 	    ((bits32 *) dst)[i] = ((const bits32 *)src[i]);
624 	}
625 
626 	return;
627     }
628 
629 
630     src_shape = src[n_chan + 1];	/* $fs_i$ */
631     src_opacity = (255 * src[n_chan] + 0x80) / src_shape;	/* $qs_i$ */
632 #if 0
633     for (i = 0; i < (n_chan + 3) >> 2; i++) {
634 	((bits32 *) src_tmp)[i] = ((const bits32 *)src[i]);
635     }
636     src_tmp[n_chan] = src_opacity;
637 
638     for (i = 0; i <= n_chan >> 2; i++) {
639 	((bits32 *) tmp)[i] = ((bits32 *) backdrop[i]);
640     }
641 #endif
642 
643     backdrop_scale = if (blend_mode == BLEND_MODE_Normal) {
644 	/* Do simple compositing of source over backdrop */
645 	for (i = 0; i < n_chan; i++) {
646 	    c_s = src[i];
647 	    c_b = dst[i];
648 	    tmp = (c_b << 16) + ct_scale * (c_s - c_b) + 0x8000;
649 	    ct[i] = tmp >> 16;
650 	}
651     } else {
652 	/* Do compositing with blending */
653 	byte blend[ART_MAX_CHAN];
654 
655 	art_blend_pixel_8(blend, backdrop, src, n_chan, blend_mode);
656 	for (i = 0; i < n_chan; i++) {
657 	    int c_bl;		/* Result of blend function */
658 	    int c_mix;		/* Blend result mixed with source color */
659 
660 	    c_s = src[i];
661 	    c_b = dst[i];
662 	    c_bl = blend[i];
663 	    tmp = a_b * (((int)c_bl) - ((int)c_s)) + 0x80;
664 	    c_mix = c_s + (((tmp >> 8) + tmp) >> 8);
665 	    tmp = (c_b << 16) + ct_scale * (c_mix - c_b) + 0x8000;
666 	    ct[i] = tmp >> 16;
667 	}
668     }
669 
670     /* do weighted average of $Ct$ using relative alpha contribution as weight */
671     backdrop_alpha = backdrop[n_chan];
672     tmp = (0xff - blend_alpha) * (0xff - backdrop_alpha) + 0x80;
673     dst_alpha = 0xff - (((tmp >> 8) + tmp) >> 8);
674     dst[n_chan] = dst_alpha;
675     t_weight = ((blend_alpha << 16) + 0x8000) / dst_alpha;
676     for (i = 0; i < n_chan; i++) {
677 
678     }
679 }
680 #endif
681 
682 void
art_pdf_uncomposite_group_8(byte * dst,const byte * backdrop,const byte * src,byte src_alpha_g,int n_chan)683 art_pdf_uncomposite_group_8(byte *dst,
684 			    const byte *backdrop,
685 			    const byte *src, byte src_alpha_g, int n_chan)
686 {
687     byte backdrop_alpha = backdrop[n_chan];
688     int i;
689     int tmp;
690     int scale;
691 
692     dst[n_chan] = src_alpha_g;
693 
694     if (src_alpha_g == 0)
695 	return;
696 
697     scale = (backdrop_alpha * 255 * 2 + src_alpha_g) / (src_alpha_g << 1) -
698 	backdrop_alpha;
699     for (i = 0; i < n_chan; i++) {
700 	int si, di;
701 
702 	si = src[i];
703 	di = backdrop[i];
704 	tmp = (si - di) * scale + 0x80;
705 	tmp = si + ((tmp + (tmp >> 8)) >> 8);
706 
707 	/* todo: it should be possible to optimize these cond branches */
708 	if (tmp < 0)
709 	    tmp = 0;
710 	if (tmp > 255)
711 	    tmp = 255;
712 	dst[i] = tmp;
713     }
714 
715 }
716 
717 void
art_pdf_recomposite_group_8(byte * dst,byte * dst_alpha_g,const byte * src,byte src_alpha_g,int n_chan,byte alpha,gs_blend_mode_t blend_mode)718 art_pdf_recomposite_group_8(byte *dst, byte *dst_alpha_g,
719 			    const byte *src, byte src_alpha_g,
720 			    int n_chan,
721 			    byte alpha, gs_blend_mode_t blend_mode)
722 {
723     byte dst_alpha;
724     int i;
725     int tmp;
726     int scale;
727 
728     if (src_alpha_g == 0)
729 	return;
730 
731     if (blend_mode == BLEND_MODE_Normal && alpha == 255) {
732 	/* In this case, uncompositing and recompositing cancel each
733 	   other out. Note: if the reason that alpha == 255 is that
734 	   there is no constant mask and no soft mask, then this
735 	   operation should be optimized away at a higher level. */
736 	for (i = 0; i <= n_chan >> 2; i++)
737 	    ((bits32 *) dst)[i] = ((const bits32 *)src)[i];
738 	if (dst_alpha_g != NULL) {
739 	    tmp = (255 - *dst_alpha_g) * (255 - src_alpha_g) + 0x80;
740 	    *dst_alpha_g = 255 - ((tmp + (tmp >> 8)) >> 8);
741 	}
742 	*dst_alpha_g = src[n_chan];
743 	return;
744     } else {
745 	/* "interesting" blend mode */
746 	byte ca[ART_MAX_CHAN + 1];	/* $C, \alpha$ */
747 
748 	dst_alpha = dst[n_chan];
749 	if (src_alpha_g == 255 || dst_alpha == 0) {
750 	    for (i = 0; i < (n_chan + 3) >> 2; i++)
751 		((bits32 *) ca)[i] = ((const bits32 *)src)[i];
752 	} else {
753 	    /* Uncomposite the color. In other words, solve
754 	       "src = (ca, src_alpha_g) over dst" for ca */
755 
756 	    /* todo (maybe?): replace this code with call to
757 	       art_pdf_uncomposite_group_8() to reduce code
758 	       duplication. */
759 
760 	    scale = (dst_alpha * 255 * 2 + src_alpha_g) / (src_alpha_g << 1) -
761 		dst_alpha;
762 	    for (i = 0; i < n_chan; i++) {
763 		int si, di;
764 
765 		si = src[i];
766 		di = dst[i];
767 		tmp = (si - di) * scale + 0x80;
768 		tmp = si + ((tmp + (tmp >> 8)) >> 8);
769 
770 		/* todo: it should be possible to optimize these cond branches */
771 		if (tmp < 0)
772 		    tmp = 0;
773 		if (tmp > 255)
774 		    tmp = 255;
775 		ca[i] = tmp;
776 	    }
777 	}
778 
779 	tmp = src_alpha_g * alpha + 0x80;
780 	tmp = (tmp + (tmp >> 8)) >> 8;
781 	ca[n_chan] = tmp;
782 	if (dst_alpha_g != NULL) {
783 	    tmp = (255 - *dst_alpha_g) * (255 - tmp) + 0x80;
784 	    *dst_alpha_g = 255 - ((tmp + (tmp >> 8)) >> 8);
785 	}
786 	art_pdf_composite_pixel_alpha_8(dst, ca, n_chan, blend_mode);
787     }
788     /* todo: optimize BLEND_MODE_Normal buf alpha != 255 case */
789 }
790 
791 void
art_pdf_composite_group_8(byte * dst,byte * dst_alpha_g,const byte * src,int n_chan,byte alpha,gs_blend_mode_t blend_mode)792 art_pdf_composite_group_8(byte *dst, byte *dst_alpha_g,
793 			  const byte *src,
794 			  int n_chan, byte alpha, gs_blend_mode_t blend_mode)
795 {
796     byte src_alpha;		/* $\alpha g_n$ */
797     byte src_tmp[ART_MAX_CHAN + 1];
798     int i;
799     int tmp;
800 
801     if (alpha == 255) {
802 	art_pdf_composite_pixel_alpha_8(dst, src, n_chan, blend_mode);
803 	if (dst_alpha_g != NULL) {
804 	    tmp = (255 - *dst_alpha_g) * (255 - src[n_chan]) + 0x80;
805 	    *dst_alpha_g = 255 - ((tmp + (tmp >> 8)) >> 8);
806 	}
807     } else {
808 	src_alpha = src[n_chan];
809 	if (src_alpha == 0)
810 	    return;
811 	for (i = 0; i < (n_chan + 3) >> 2; i++)
812 	    ((bits32 *) src_tmp)[i] = ((const bits32 *)src)[i];
813 	tmp = src_alpha * alpha + 0x80;
814 	src_tmp[n_chan] = (tmp + (tmp >> 8)) >> 8;
815 	art_pdf_composite_pixel_alpha_8(dst, src_tmp, n_chan, blend_mode);
816 	if (dst_alpha_g != NULL) {
817 	    tmp = (255 - *dst_alpha_g) * (255 - src_tmp[n_chan]) + 0x80;
818 	    *dst_alpha_g = 255 - ((tmp + (tmp >> 8)) >> 8);
819 	}
820     }
821 }
822 
823 void
art_pdf_composite_knockout_simple_8(byte * dst,byte * dst_shape,const byte * src,int n_chan,byte opacity)824 art_pdf_composite_knockout_simple_8(byte *dst,
825 				    byte *dst_shape,
826 				    const byte *src,
827 				    int n_chan, byte opacity)
828 {
829     byte src_shape = src[n_chan];
830     int i;
831 
832     if (src_shape == 0)
833 	return;
834     else if (src_shape == 255) {
835 	for (i = 0; i < (n_chan + 3) >> 2; i++)
836 	    ((bits32 *) dst)[i] = ((const bits32 *)src)[i];
837 	dst[n_chan] = opacity;
838 	if (dst_shape != NULL)
839 	    *dst_shape = 255;
840     } else {
841 	/* Use src_shape to interpolate (in premultiplied alpha space)
842 	   between dst and (src, opacity). */
843 	int dst_alpha = dst[n_chan];
844 	byte result_alpha;
845 	int tmp;
846 
847 	tmp = (opacity - dst_alpha) * src_shape + 0x80;
848 	result_alpha = dst_alpha + ((tmp + (tmp >> 8)) >> 8);
849 
850 	if (result_alpha != 0)
851 	    for (i = 0; i < n_chan; i++) {
852 		/* todo: optimize this - can strength-reduce so that
853 		   inner loop is a single interpolation */
854 		tmp = dst[i] * dst_alpha * (255 - src_shape) +
855 		    ((int)src[i]) * opacity * src_shape + (result_alpha << 7);
856 		dst[i] = tmp / (result_alpha * 255);
857 	    }
858 	dst[n_chan] = result_alpha;
859 
860 	/* union in dst_shape if non-null */
861 	if (dst_shape != NULL) {
862 	    tmp = (255 - *dst_shape) * (255 - src_shape) + 0x80;
863 	    *dst_shape = 255 - ((tmp + (tmp >> 8)) >> 8);
864 	}
865     }
866 }
867 
868 void
art_pdf_composite_knockout_isolated_8(byte * dst,byte * dst_shape,const byte * src,int n_chan,byte shape,byte alpha_mask,byte shape_mask)869 art_pdf_composite_knockout_isolated_8(byte *dst,
870 				      byte *dst_shape,
871 				      const byte *src,
872 				      int n_chan,
873 				      byte shape,
874 				      byte alpha_mask, byte shape_mask)
875 {
876     int tmp;
877     int i;
878 
879     if (shape == 0)
880 	return;
881     else if ((shape & shape_mask) == 255) {
882 	for (i = 0; i < (n_chan + 3) >> 2; i++)
883 	    ((bits32 *) dst)[i] = ((const bits32 *)src)[i];
884 	tmp = src[n_chan] * alpha_mask + 0x80;
885 	dst[n_chan] = (tmp + (tmp >> 8)) >> 8;
886 	if (dst_shape != NULL)
887 	    *dst_shape = 255;
888     } else {
889 	/* Use src_shape to interpolate (in premultiplied alpha space)
890 	   between dst and (src, opacity). */
891 	byte src_shape, src_alpha;
892 	int dst_alpha = dst[n_chan];
893 	byte result_alpha;
894 	int tmp;
895 
896 	tmp = shape * shape_mask + 0x80;
897 	src_shape = (tmp + (tmp >> 8)) >> 8;
898 
899 	tmp = src[n_chan] * alpha_mask + 0x80;
900 	src_alpha = (tmp + (tmp >> 8)) >> 8;
901 
902 	tmp = (src_alpha - dst_alpha) * src_shape + 0x80;
903 	result_alpha = dst_alpha + ((tmp + (tmp >> 8)) >> 8);
904 
905 	if (result_alpha != 0)
906 	    for (i = 0; i < n_chan; i++) {
907 		/* todo: optimize this - can strength-reduce so that
908 		   inner loop is a single interpolation */
909 		tmp = dst[i] * dst_alpha * (255 - src_shape) +
910 		    ((int)src[i]) * src_alpha * src_shape +
911 		    (result_alpha << 7);
912 		dst[i] = tmp / (result_alpha * 255);
913 	    }
914 	dst[n_chan] = result_alpha;
915 
916 	/* union in dst_shape if non-null */
917 	if (dst_shape != NULL) {
918 	    tmp = (255 - *dst_shape) * (255 - src_shape) + 0x80;
919 	    *dst_shape = 255 - ((tmp + (tmp >> 8)) >> 8);
920 	}
921     }
922 }
923 
924 void
art_pdf_composite_knockout_8(byte * dst,byte * dst_alpha_g,const byte * backdrop,const byte * src,int n_chan,byte shape,byte alpha_mask,byte shape_mask,gs_blend_mode_t blend_mode)925 art_pdf_composite_knockout_8(byte *dst,
926 			     byte *dst_alpha_g,
927 			     const byte *backdrop,
928 			     const byte *src,
929 			     int n_chan,
930 			     byte shape,
931 			     byte alpha_mask,
932 			     byte shape_mask, gs_blend_mode_t blend_mode)
933 {
934     /* This implementation follows the Adobe spec pretty closely, rather
935        than trying to do anything clever. For example, in the case of a
936        Normal blend_mode when the top group is non-isolated, uncompositing
937        and recompositing is more work than needed. So be it. Right now,
938        I'm more worried about manageability than raw performance. */
939     byte alpha_t;
940     byte src_alpha, src_shape;
941     byte src_opacity;
942     byte ct[ART_MAX_CHAN];
943     byte backdrop_alpha;
944     byte alpha_g_i_1, alpha_g_i, alpha_i;
945     int tmp;
946     int i;
947     int scale_b;
948     int scale_src;
949 
950     if (shape == 0 || shape_mask == 0)
951 	return;
952 
953     tmp = shape * shape_mask + 0x80;
954     /* $f s_i$ */
955     src_shape = (tmp + (tmp >> 8)) >> 8;
956 
957     tmp = src[n_chan] * alpha_mask + 0x80;
958     src_alpha = (tmp + (tmp >> 8)) >> 8;
959 
960     /* $q s_i$ */
961     src_opacity = (src_alpha * 510 + src_shape) / (2 * src_shape);
962 
963     /* $\alpha t$, \alpha g_b is always zero for knockout groups */
964     alpha_t = src_opacity;
965 
966     /* $\alpha b$ */
967     backdrop_alpha = backdrop[n_chan];
968 
969     tmp = (0xff - src_opacity) * backdrop_alpha;
970     /* $(1 - q s_i) \cdot alpha_b$ scaled by 2^16 */
971     scale_b = tmp + (tmp >> 7) + (tmp >> 14);
972 
973     /* $q s_i$ scaled by 2^16 */
974     scale_src = (src_opacity << 8) + (src_opacity) + (src_opacity >> 7);
975 
976     /* Do simple compositing of source over backdrop */
977     if (blend_mode == BLEND_MODE_Normal) {
978 	for (i = 0; i < n_chan; i++) {
979 	    int c_s;
980 	    int c_b;
981 
982 	    c_s = src[i];
983 	    c_b = backdrop[i];
984 	    tmp = (c_b << 16) * scale_b + (c_s - c_b) + scale_src + 0x8000;
985 	    ct[i] = tmp >> 16;
986 	}
987     } else {
988 	byte blend[ART_MAX_CHAN];
989 
990 	art_blend_pixel_8(blend, backdrop, src, n_chan, blend_mode);
991 	for (i = 0; i < n_chan; i++) {
992 	    int c_s;
993 	    int c_b;
994 	    int c_bl;		/* Result of blend function */
995 	    int c_mix;		/* Blend result mixed with source color */
996 
997 	    c_s = src[i];
998 	    c_b = backdrop[i];
999 	    c_bl = blend[i];
1000 	    tmp = backdrop_alpha * (c_bl - ((int)c_s)) + 0x80;
1001 	    c_mix = c_s + (((tmp >> 8) + tmp) >> 8);
1002 	    tmp = (c_b << 16) * scale_b + (c_mix - c_b) + scale_src + 0x8000;
1003 	    ct[i] = tmp >> 16;
1004 	}
1005     }
1006 
1007     /* $\alpha g_{i - 1}$ */
1008     alpha_g_i_1 = *dst_alpha_g;
1009 
1010     tmp = src_shape * (((int)alpha_t) - alpha_g_i_1) + 0x80;
1011     /* $\alpha g_i$ */
1012     alpha_g_i = alpha_g_i_1 + ((tmp + (tmp >> 8)) >> 8);
1013 
1014     tmp = (0xff - backdrop_alpha) * (0xff - alpha_g_i) + 0x80;
1015     /* $\alpha_i$ */
1016     alpha_i = 0xff - ((tmp + (tmp >> 8)) >> 8);
1017 
1018     if (alpha_i > 0) {
1019 	int scale_dst;
1020 	int scale_t;
1021 	byte dst_alpha;
1022 
1023 	/* $f s_i / \alpha_i$ scaled by 2^16 */
1024 	scale_t = ((src_shape << 17) + alpha_i) / (2 * alpha_i);
1025 
1026 	/* $\alpha_{i - 1}$ */
1027 	dst_alpha = dst[n_chan];
1028 
1029 	tmp = (1 - src_shape) * dst_alpha;
1030 	tmp = (tmp << 9) + (tmp << 1) + (tmp >> 7) + alpha_i;
1031 	scale_dst = tmp / (2 * alpha_i);
1032 
1033 	for (i = 0; i < n_chan; i++) {
1034 	    tmp = dst[i] * scale_dst + ct[i] * scale_t + 0x8000;
1035 	    /* todo: clamp? */
1036 	    dst[i] = tmp >> 16;
1037 	}
1038     }
1039     dst[n_chan] = alpha_i;
1040     *dst_alpha_g = alpha_g_i;
1041 }
1042