1 #include <u.h>
2 #include <libc.h>
3 #include <draw.h>
4 #include <memdraw.h>
5 
6 int drawdebug;
7 static int	tablesbuilt;
8 
9 /* perfect approximation to NTSC = .299r+.587g+.114b when 0 ≤ r,g,b < 256 */
10 #define RGB2K(r,g,b)	((156763*(r)+307758*(g)+59769*(b))>>19)
11 
12 /*
13  * For 16-bit values, x / 255 == (t = x+1, (t+(t>>8)) >> 8).
14  * We add another 127 to round to the nearest value rather
15  * than truncate.
16  *
17  * CALCxy does x bytewise calculations on y input images (x=1,4; y=1,2).
18  * CALC2x does two parallel 16-bit calculations on y input images (y=1,2).
19  */
20 #define CALC11(a, v, tmp) \
21 	(tmp=(a)*(v)+128, (tmp+(tmp>>8))>>8)
22 
23 #define CALC12(a1, v1, a2, v2, tmp) \
24 	(tmp=(a1)*(v1)+(a2)*(v2)+128, (tmp+(tmp>>8))>>8)
25 
26 #define MASK 0xFF00FF
27 
28 #define CALC21(a, vvuu, tmp) \
29 	(tmp=(a)*(vvuu)+0x00800080, ((tmp+((tmp>>8)&MASK))>>8)&MASK)
30 
31 #define CALC41(a, rgba, tmp1, tmp2) \
32 	(CALC21(a, rgba & MASK, tmp1) | \
33 	 (CALC21(a, (rgba>>8)&MASK, tmp2)<<8))
34 
35 #define CALC22(a1, vvuu1, a2, vvuu2, tmp) \
36 	(tmp=(a1)*(vvuu1)+(a2)*(vvuu2)+0x00800080, ((tmp+((tmp>>8)&MASK))>>8)&MASK)
37 
38 #define CALC42(a1, rgba1, a2, rgba2, tmp1, tmp2) \
39 	(CALC22(a1, rgba1 & MASK, a2, rgba2 & MASK, tmp1) | \
40 	 (CALC22(a1, (rgba1>>8) & MASK, a2, (rgba2>>8) & MASK, tmp2)<<8))
41 
42 static void mktables(void);
43 typedef int Subdraw(Memdrawparam*);
44 static Subdraw chardraw, alphadraw, memoptdraw;
45 
46 static Memimage*	memones;
47 static Memimage*	memzeros;
48 Memimage *memwhite;
49 Memimage *memblack;
50 Memimage *memtransparent;
51 Memimage *memopaque;
52 
53 int	_ifmt(Fmt*);
54 
55 void
_memimageinit(void)56 _memimageinit(void)
57 {
58 	static int didinit = 0;
59 
60 	if(didinit)
61 		return;
62 
63 	didinit = 1;
64 
65 	mktables();
66 	_memmkcmap();
67 
68 	fmtinstall('R', Rfmt);
69 	fmtinstall('P', Pfmt);
70 
71 	memones = allocmemimage(Rect(0,0,1,1), GREY1);
72 	memones->flags |= Frepl;
73 	memones->clipr = Rect(-0x3FFFFFF, -0x3FFFFFF, 0x3FFFFFF, 0x3FFFFFF);
74 	*byteaddr(memones, ZP) = ~0;
75 
76 	memzeros = allocmemimage(Rect(0,0,1,1), GREY1);
77 	memzeros->flags |= Frepl;
78 	memzeros->clipr = Rect(-0x3FFFFFF, -0x3FFFFFF, 0x3FFFFFF, 0x3FFFFFF);
79 	*byteaddr(memzeros, ZP) = 0;
80 
81 	if(memones == nil || memzeros == nil)
82 		assert(0 /*cannot initialize memimage library */);	/* RSC BUG */
83 
84 	memwhite = memones;
85 	memblack = memzeros;
86 	memopaque = memones;
87 	memtransparent = memzeros;
88 }
89 
90 ulong _imgtorgba(Memimage*, ulong);
91 ulong _rgbatoimg(Memimage*, ulong);
92 ulong _pixelbits(Memimage*, Point);
93 
94 #define DBG if(0)
95 static Memdrawparam par;
96 
97 Memdrawparam*
_memimagedrawsetup(Memimage * dst,Rectangle r,Memimage * src,Point p0,Memimage * mask,Point p1,int op)98 _memimagedrawsetup(Memimage *dst, Rectangle r, Memimage *src, Point p0, Memimage *mask, Point p1, int op)
99 {
100 
101 	if(mask == nil)
102 		mask = memopaque;
103 
104 DBG	print("memimagedraw %p/%luX %R @ %p %p/%luX %P %p/%luX %P... ", dst, dst->chan, r, dst->data->bdata, src, src->chan, p0, mask, mask->chan, p1);
105 
106 	if(drawclip(dst, &r, src, &p0, mask, &p1, &par.sr, &par.mr) == 0){
107 //		if(drawdebug)
108 //			iprint("empty clipped rectangle\n");
109 		return nil;
110 	}
111 
112 	if(op < Clear || op > SoverD){
113 //		if(drawdebug)
114 //			iprint("op out of range: %d\n", op);
115 		return nil;
116 	}
117 
118 	par.op = op;
119 	par.dst = dst;
120 	par.r = r;
121 	par.src = src;
122 	/* par.sr set by drawclip */
123 	par.mask = mask;
124 	/* par.mr set by drawclip */
125 
126 	par.state = 0;
127 	if(src->flags&Frepl){
128 		par.state |= Replsrc;
129 		if(Dx(src->r)==1 && Dy(src->r)==1){
130 			par.sval = _pixelbits(src, src->r.min);
131 			par.state |= Simplesrc;
132 			par.srgba = _imgtorgba(src, par.sval);
133 			par.sdval = _rgbatoimg(dst, par.srgba);
134 			if((par.srgba&0xFF) == 0 && (op&DoutS)){
135 //				if (drawdebug) iprint("fill with transparent source\n");
136 				return nil;	/* no-op successfully handled */
137 			}
138 		}
139 	}
140 
141 	if(mask->flags & Frepl){
142 		par.state |= Replmask;
143 		if(Dx(mask->r)==1 && Dy(mask->r)==1){
144 			par.mval = _pixelbits(mask, mask->r.min);
145 			if(par.mval == 0 && (op&DoutS)){
146 //				if(drawdebug) iprint("fill with zero mask\n");
147 				return nil;	/* no-op successfully handled */
148 			}
149 			par.state |= Simplemask;
150 			if(par.mval == ~0)
151 				par.state |= Fullmask;
152 			par.mrgba = _imgtorgba(mask, par.mval);
153 		}
154 	}
155 
156 //	if(drawdebug)
157 //		iprint("dr %R sr %R mr %R...", r, par.sr, par.mr);
158 DBG print("draw dr %R sr %R mr %R %lux\n", r, par.sr, par.mr, par.state);
159 
160 	return &par;
161 }
162 
163 void
_memimagedraw(Memdrawparam * par)164 _memimagedraw(Memdrawparam *par)
165 {
166 	if (par == nil)
167 		return;
168 
169 	/*
170 	 * Now that we've clipped the parameters down to be consistent, we
171 	 * simply try sub-drawing routines in order until we find one that was able
172 	 * to handle us.  If the sub-drawing routine returns zero, it means it was
173 	 * unable to satisfy the request, so we do not return.
174 	 */
175 
176 	/*
177 	 * Hardware support.  Each video driver provides this function,
178 	 * which checks to see if there is anything it can help with.
179 	 * There could be an if around this checking to see if dst is in video memory.
180 	 */
181 DBG print("test hwdraw\n");
182 	if(hwdraw(par)){
183 //if(drawdebug) iprint("hw handled\n");
184 DBG print("hwdraw handled\n");
185 		return;
186 	}
187 	/*
188 	 * Optimizations using memmove and memset.
189 	 */
190 DBG print("test memoptdraw\n");
191 	if(memoptdraw(par)){
192 //if(drawdebug) iprint("memopt handled\n");
193 DBG print("memopt handled\n");
194 		return;
195 	}
196 
197 	/*
198 	 * Character drawing.
199 	 * Solid source color being painted through a boolean mask onto a high res image.
200 	 */
201 DBG print("test chardraw\n");
202 	if(chardraw(par)){
203 //if(drawdebug) iprint("chardraw handled\n");
204 DBG print("chardraw handled\n");
205 		return;
206 	}
207 
208 	/*
209 	 * General calculation-laden case that does alpha for each pixel.
210 	 */
211 DBG print("do alphadraw\n");
212 	alphadraw(par);
213 //if(drawdebug) iprint("alphadraw handled\n");
214 DBG print("alphadraw handled\n");
215 }
216 #undef DBG
217 
218 /*
219  * Clip the destination rectangle further based on the properties of the
220  * source and mask rectangles.  Once the destination rectangle is properly
221  * clipped, adjust the source and mask rectangles to be the same size.
222  * Then if source or mask is replicated, move its clipped rectangle
223  * so that its minimum point falls within the repl rectangle.
224  *
225  * Return zero if the final rectangle is null.
226  */
227 int
drawclip(Memimage * dst,Rectangle * r,Memimage * src,Point * p0,Memimage * mask,Point * p1,Rectangle * sr,Rectangle * mr)228 drawclip(Memimage *dst, Rectangle *r, Memimage *src, Point *p0, Memimage *mask, Point *p1, Rectangle *sr, Rectangle *mr)
229 {
230 	Point rmin, delta;
231 	int splitcoords;
232 	Rectangle omr;
233 
234 	if(r->min.x>=r->max.x || r->min.y>=r->max.y)
235 		return 0;
236 	splitcoords = (p0->x!=p1->x) || (p0->y!=p1->y);
237 	/* clip to destination */
238 	rmin = r->min;
239 	if(!rectclip(r, dst->r) || !rectclip(r, dst->clipr))
240 		return 0;
241 	/* move mask point */
242 	p1->x += r->min.x-rmin.x;
243 	p1->y += r->min.y-rmin.y;
244 	/* move source point */
245 	p0->x += r->min.x-rmin.x;
246 	p0->y += r->min.y-rmin.y;
247 	/* map destination rectangle into source */
248 	sr->min = *p0;
249 	sr->max.x = p0->x+Dx(*r);
250 	sr->max.y = p0->y+Dy(*r);
251 	/* sr is r in source coordinates; clip to source */
252 	if(!(src->flags&Frepl) && !rectclip(sr, src->r))
253 		return 0;
254 	if(!rectclip(sr, src->clipr))
255 		return 0;
256 	/* compute and clip rectangle in mask */
257 	if(splitcoords){
258 		/* move mask point with source */
259 		p1->x += sr->min.x-p0->x;
260 		p1->y += sr->min.y-p0->y;
261 		mr->min = *p1;
262 		mr->max.x = p1->x+Dx(*sr);
263 		mr->max.y = p1->y+Dy(*sr);
264 		omr = *mr;
265 		/* mr is now rectangle in mask; clip it */
266 		if(!(mask->flags&Frepl) && !rectclip(mr, mask->r))
267 			return 0;
268 		if(!rectclip(mr, mask->clipr))
269 			return 0;
270 		/* reflect any clips back to source */
271 		sr->min.x += mr->min.x-omr.min.x;
272 		sr->min.y += mr->min.y-omr.min.y;
273 		sr->max.x += mr->max.x-omr.max.x;
274 		sr->max.y += mr->max.y-omr.max.y;
275 		*p1 = mr->min;
276 	}else{
277 		if(!(mask->flags&Frepl) && !rectclip(sr, mask->r))
278 			return 0;
279 		if(!rectclip(sr, mask->clipr))
280 			return 0;
281 		*p1 = sr->min;
282 	}
283 
284 	/* move source clipping back to destination */
285 	delta.x = r->min.x - p0->x;
286 	delta.y = r->min.y - p0->y;
287 	r->min.x = sr->min.x + delta.x;
288 	r->min.y = sr->min.y + delta.y;
289 	r->max.x = sr->max.x + delta.x;
290 	r->max.y = sr->max.y + delta.y;
291 
292 	/* move source rectangle so sr->min is in src->r */
293 	if(src->flags&Frepl) {
294 		delta.x = drawreplxy(src->r.min.x, src->r.max.x, sr->min.x) - sr->min.x;
295 		delta.y = drawreplxy(src->r.min.y, src->r.max.y, sr->min.y) - sr->min.y;
296 		sr->min.x += delta.x;
297 		sr->min.y += delta.y;
298 		sr->max.x += delta.x;
299 		sr->max.y += delta.y;
300 	}
301 	*p0 = sr->min;
302 
303 	/* move mask point so it is in mask->r */
304 	*p1 = drawrepl(mask->r, *p1);
305 	mr->min = *p1;
306 	mr->max.x = p1->x+Dx(*sr);
307 	mr->max.y = p1->y+Dy(*sr);
308 
309 	assert(Dx(*sr) == Dx(*mr) && Dx(*mr) == Dx(*r));
310 	assert(Dy(*sr) == Dy(*mr) && Dy(*mr) == Dy(*r));
311 	assert(ptinrect(*p0, src->r));
312 	assert(ptinrect(*p1, mask->r));
313 	assert(ptinrect(r->min, dst->r));
314 
315 	return 1;
316 }
317 
318 /*
319  * Conversion tables.
320  */
321 static uchar replbit[1+8][256];		/* replbit[x][y] is the replication of the x-bit quantity y to 8-bit depth */
322 static uchar conv18[256][8];		/* conv18[x][y] is the yth pixel in the depth-1 pixel x */
323 static uchar conv28[256][4];		/* ... */
324 static uchar conv48[256][2];
325 
326 /*
327  * bitmap of how to replicate n bits to fill 8, for 1 ≤ n ≤ 8.
328  * the X's are where to put the bottom (ones) bit of the n-bit pattern.
329  * only the top 8 bits of the result are actually used.
330  * (the lower 8 bits are needed to get bits in the right place
331  * when n is not a divisor of 8.)
332  *
333  * Should check to see if its easier to just refer to replmul than
334  * use the precomputed values in replbit.  On PCs it may well
335  * be; on machines with slow multiply instructions it probably isn't.
336  */
337 #define a ((((((((((((((((0
338 #define X *2+1)
339 #define _ *2)
340 static int replmul[1+8] = {
341 	0,
342 	a X X X X X X X X X X X X X X X X,
343 	a _ X _ X _ X _ X _ X _ X _ X _ X,
344 	a _ _ X _ _ X _ _ X _ _ X _ _ X _,
345 	a _ _ _ X _ _ _ X _ _ _ X _ _ _ X,
346 	a _ _ _ _ X _ _ _ _ X _ _ _ _ X _,
347 	a _ _ _ _ _ X _ _ _ _ _ X _ _ _ _,
348 	a _ _ _ _ _ _ X _ _ _ _ _ _ X _ _,
349 	a _ _ _ _ _ _ _ X _ _ _ _ _ _ _ X,
350 };
351 #undef a
352 #undef X
353 #undef _
354 
355 static void
mktables(void)356 mktables(void)
357 {
358 	int i, j, mask, sh, small;
359 
360 	if(tablesbuilt)
361 		return;
362 
363 	fmtinstall('R', Rfmt);
364 	fmtinstall('P', Pfmt);
365 	tablesbuilt = 1;
366 
367 	/* bit replication up to 8 bits */
368 	for(i=0; i<256; i++){
369 		for(j=0; j<=8; j++){	/* j <= 8 [sic] */
370 			small = i & ((1<<j)-1);
371 			replbit[j][i] = (small*replmul[j])>>8;
372 		}
373 	}
374 
375 	/* bit unpacking up to 8 bits, only powers of 2 */
376 	for(i=0; i<256; i++){
377 		for(j=0, sh=7, mask=1; j<8; j++, sh--)
378 			conv18[i][j] = replbit[1][(i>>sh)&mask];
379 
380 		for(j=0, sh=6, mask=3; j<4; j++, sh-=2)
381 			conv28[i][j] = replbit[2][(i>>sh)&mask];
382 
383 		for(j=0, sh=4, mask=15; j<2; j++, sh-=4)
384 			conv48[i][j] = replbit[4][(i>>sh)&mask];
385 	}
386 }
387 
388 static uchar ones = 0xff;
389 
390 /*
391  * General alpha drawing case.  Can handle anything.
392  */
393 typedef struct	Buffer	Buffer;
394 struct Buffer {
395 	/* used by most routines */
396 	uchar	*red;
397 	uchar	*grn;
398 	uchar	*blu;
399 	uchar	*alpha;
400 	uchar	*grey;
401 	ulong	*rgba;
402 	int	delta;	/* number of bytes to add to pointer to get next pixel to the right */
403 
404 	/* used by boolcalc* for mask data */
405 	uchar	*m;		/* ptr to mask data r.min byte; like p->bytermin */
406 	int		mskip;	/* no. of left bits to skip in *m */
407 	uchar	*bm;		/* ptr to mask data img->r.min byte; like p->bytey0s */
408 	int		bmskip;	/* no. of left bits to skip in *bm */
409 	uchar	*em;		/* ptr to mask data img->r.max.x byte; like p->bytey0e */
410 	int		emskip;	/* no. of right bits to skip in *em */
411 };
412 
413 typedef struct	Param	Param;
414 typedef Buffer	Readfn(Param*, uchar*, int);
415 typedef void	Writefn(Param*, uchar*, Buffer);
416 typedef Buffer	Calcfn(Buffer, Buffer, Buffer, int, int, int);
417 
418 enum {
419 	MAXBCACHE = 16
420 };
421 
422 /* giant rathole to customize functions with */
423 struct Param {
424 	Readfn	*replcall;
425 	Readfn	*greymaskcall;
426 	Readfn	*convreadcall;
427 	Writefn	*convwritecall;
428 
429 	Memimage *img;
430 	Rectangle	r;
431 	int	dx;	/* of r */
432 	int	needbuf;
433 	int	convgrey;
434 	int	alphaonly;
435 
436 	uchar	*bytey0s;		/* byteaddr(Pt(img->r.min.x, img->r.min.y)) */
437 	uchar	*bytermin;	/* byteaddr(Pt(r.min.x, img->r.min.y)) */
438 	uchar	*bytey0e;		/* byteaddr(Pt(img->r.max.x, img->r.min.y)) */
439 	int		bwidth;
440 
441 	int	replcache;	/* if set, cache buffers */
442 	Buffer	bcache[MAXBCACHE];
443 	ulong	bfilled;
444 	uchar	*bufbase;
445 	int	bufoff;
446 	int	bufdelta;
447 
448 	int	dir;
449 
450 	int	convbufoff;
451 	uchar	*convbuf;
452 	Param	*convdpar;
453 	int	convdx;
454 };
455 
456 static uchar *drawbuf;
457 static int	ndrawbuf;
458 static int	mdrawbuf;
459 static Param spar, mpar, dpar;	/* easier on the stacks */
460 static Readfn	greymaskread, replread, readptr;
461 static Writefn	nullwrite;
462 static Calcfn	alphacalc0, alphacalc14, alphacalc2810, alphacalc3679, alphacalc5, alphacalc11, alphacalcS;
463 static Calcfn	boolcalc14, boolcalc236789, boolcalc1011;
464 
465 static Readfn*	readfn(Memimage*);
466 static Readfn*	readalphafn(Memimage*);
467 static Writefn*	writefn(Memimage*);
468 
469 static Calcfn*	boolcopyfn(Memimage*, Memimage*);
470 static Readfn*	convfn(Memimage*, Param*, Memimage*, Param*);
471 
472 static Calcfn *alphacalc[Ncomp] =
473 {
474 	alphacalc0,		/* Clear */
475 	alphacalc14,		/* DoutS */
476 	alphacalc2810,		/* SoutD */
477 	alphacalc3679,		/* DxorS */
478 	alphacalc14,		/* DinS */
479 	alphacalc5,		/* D */
480 	alphacalc3679,		/* DatopS */
481 	alphacalc3679,		/* DoverS */
482 	alphacalc2810,		/* SinD */
483 	alphacalc3679,		/* SatopD */
484 	alphacalc2810,		/* S */
485 	alphacalc11,		/* SoverD */
486 };
487 
488 static Calcfn *boolcalc[Ncomp] =
489 {
490 	alphacalc0,		/* Clear */
491 	boolcalc14,		/* DoutS */
492 	boolcalc236789,		/* SoutD */
493 	boolcalc236789,		/* DxorS */
494 	boolcalc14,		/* DinS */
495 	alphacalc5,		/* D */
496 	boolcalc236789,		/* DatopS */
497 	boolcalc236789,		/* DoverS */
498 	boolcalc236789,		/* SinD */
499 	boolcalc236789,		/* SatopD */
500 	boolcalc1011,		/* S */
501 	boolcalc1011,		/* SoverD */
502 };
503 
504 static int
allocdrawbuf(void)505 allocdrawbuf(void)
506 {
507 	uchar *p;
508 
509 	if(ndrawbuf > mdrawbuf){
510 		p = realloc(drawbuf, ndrawbuf);
511 		if(p == nil){
512 			werrstr("memimagedraw out of memory");
513 			return -1;
514 		}
515 		drawbuf = p;
516 		mdrawbuf = ndrawbuf;
517 	}
518 	return 0;
519 }
520 
521 static Param
getparam(Memimage * img,Rectangle r,int convgrey,int needbuf)522 getparam(Memimage *img, Rectangle r, int convgrey, int needbuf)
523 {
524 	Param p;
525 	int nbuf;
526 
527 	memset(&p, 0, sizeof p);
528 
529 	p.img = img;
530 	p.r = r;
531 	p.dx = Dx(r);
532 	p.needbuf = needbuf;
533 	p.convgrey = convgrey;
534 
535 	assert(img->r.min.x <= r.min.x && r.min.x < img->r.max.x);
536 
537 	p.bytey0s = byteaddr(img, Pt(img->r.min.x, img->r.min.y));
538 	p.bytermin = byteaddr(img, Pt(r.min.x, img->r.min.y));
539 	p.bytey0e = byteaddr(img, Pt(img->r.max.x, img->r.min.y));
540 	p.bwidth = sizeof(ulong)*img->width;
541 
542 	assert(p.bytey0s <= p.bytermin && p.bytermin <= p.bytey0e);
543 
544 	if(p.r.min.x == p.img->r.min.x)
545 		assert(p.bytermin == p.bytey0s);
546 
547 	nbuf = 1;
548 	if((img->flags&Frepl) && Dy(img->r) <= MAXBCACHE && Dy(img->r) < Dy(r)){
549 		p.replcache = 1;
550 		nbuf = Dy(img->r);
551 	}
552 	p.bufdelta = 4*p.dx;
553 	p.bufoff = ndrawbuf;
554 	ndrawbuf += p.bufdelta*nbuf;
555 
556 	return p;
557 }
558 
559 static void
clipy(Memimage * img,int * y)560 clipy(Memimage *img, int *y)
561 {
562 	int dy;
563 
564 	dy = Dy(img->r);
565 	if(*y == dy)
566 		*y = 0;
567 	else if(*y == -1)
568 		*y = dy-1;
569 	assert(0 <= *y && *y < dy);
570 }
571 
572 static void
dumpbuf(char * s,Buffer b,int n)573 dumpbuf(char *s, Buffer b, int n)
574 {
575 	int i;
576 	uchar *p;
577 
578 	print("%s", s);
579 	for(i=0; i<n; i++){
580 		print(" ");
581 		if((p=b.grey)){
582 			print(" k%.2uX", *p);
583 			b.grey += b.delta;
584 		}else{
585 			if((p=b.red)){
586 				print(" r%.2uX", *p);
587 				b.red += b.delta;
588 			}
589 			if((p=b.grn)){
590 				print(" g%.2uX", *p);
591 				b.grn += b.delta;
592 			}
593 			if((p=b.blu)){
594 				print(" b%.2uX", *p);
595 				b.blu += b.delta;
596 			}
597 		}
598 		if((p=b.alpha) != &ones){
599 			print(" α%.2uX", *p);
600 			b.alpha += b.delta;
601 		}
602 	}
603 	print("\n");
604 }
605 
606 /*
607  * For each scan line, we expand the pixels from source, mask, and destination
608  * into byte-aligned red, green, blue, alpha, and grey channels.  If buffering is not
609  * needed and the channels were already byte-aligned (grey8, rgb24, rgba32, rgb32),
610  * the readers need not copy the data: they can simply return pointers to the data.
611  * If the destination image is grey and the source is not, it is converted using the NTSC
612  * formula.
613  *
614  * Once we have all the channels, we call either rgbcalc or greycalc, depending on
615  * whether the destination image is color.  This is allowed to overwrite the dst buffer (perhaps
616  * the actual data, perhaps a copy) with its result.  It should only overwrite the dst buffer
617  * with the same format (i.e. red bytes with red bytes, etc.)  A new buffer is returned from
618  * the calculator, and that buffer is passed to a function to write it to the destination.
619  * If the buffer is already pointing at the destination, the writing function is a no-op.
620  */
621 #define DBG if(0)
622 static int
alphadraw(Memdrawparam * par)623 alphadraw(Memdrawparam *par)
624 {
625 	int isgrey, starty, endy, op;
626 	int needbuf, dsty, srcy, masky;
627 	int y, dir, dx, dy;
628 	Buffer bsrc, bdst, bmask;
629 	Readfn *rdsrc, *rdmask, *rddst;
630 	Calcfn *calc;
631 	Writefn *wrdst;
632 	Memimage *src, *mask, *dst;
633 	Rectangle r, sr, mr;
634 
635 	r = par->r;
636 	dx = Dx(r);
637 	dy = Dy(r);
638 
639 	ndrawbuf = 0;
640 
641 	src = par->src;
642 	mask = par->mask;
643 	dst = par->dst;
644 	sr = par->sr;
645 	mr = par->mr;
646 	op = par->op;
647 
648 	isgrey = dst->flags&Fgrey;
649 
650 	/*
651 	 * Buffering when src and dst are the same bitmap is sufficient but not
652 	 * necessary.  There are stronger conditions we could use.  We could
653 	 * check to see if the rectangles intersect, and if simply moving in the
654 	 * correct y direction can avoid the need to buffer.
655 	 */
656 	needbuf = (src->data == dst->data);
657 
658 	spar = getparam(src, sr, isgrey, needbuf);
659 	dpar = getparam(dst, r, isgrey, needbuf);
660 	mpar = getparam(mask, mr, 0, needbuf);
661 
662 	dir = (needbuf && byteaddr(dst, r.min) > byteaddr(src, sr.min)) ? -1 : 1;
663 	spar.dir = mpar.dir = dpar.dir = dir;
664 
665 	/*
666 	 * If the mask is purely boolean, we can convert from src to dst format
667 	 * when we read src, and then just copy it to dst where the mask tells us to.
668 	 * This requires a boolean (1-bit grey) mask and lack of a source alpha channel.
669 	 *
670 	 * The computation is accomplished by assigning the function pointers as follows:
671 	 *	rdsrc - read and convert source into dst format in a buffer
672 	 * 	rdmask - convert mask to bytes, set pointer to it
673 	 * 	rddst - fill with pointer to real dst data, but do no reads
674 	 *	calc - copy src onto dst when mask says to.
675 	 *	wrdst - do nothing
676 	 * This is slightly sleazy, since things aren't doing exactly what their names say,
677 	 * but it avoids a fair amount of code duplication to make this a case here
678 	 * rather than have a separate booldraw.
679 	 */
680 //if(drawdebug) iprint("flag %lud mchan %lux=?%x dd %d\n", src->flags&Falpha, mask->chan, GREY1, dst->depth);
681 	if(!(src->flags&Falpha) && mask->chan == GREY1 && dst->depth >= 8 && op == SoverD){
682 //if(drawdebug) iprint("boolcopy...");
683 		rdsrc = convfn(dst, &dpar, src, &spar);
684 		rddst = readptr;
685 		rdmask = readfn(mask);
686 		calc = boolcopyfn(dst, mask);
687 		wrdst = nullwrite;
688 	}else{
689 		/* usual alphadraw parameter fetching */
690 		rdsrc = readfn(src);
691 		rddst = readfn(dst);
692 		wrdst = writefn(dst);
693 		calc = alphacalc[op];
694 
695 		/*
696 		 * If there is no alpha channel, we'll ask for a grey channel
697 		 * and pretend it is the alpha.
698 		 */
699 		if(mask->flags&Falpha){
700 			rdmask = readalphafn(mask);
701 			mpar.alphaonly = 1;
702 		}else{
703 			mpar.greymaskcall = readfn(mask);
704 			mpar.convgrey = 1;
705 			rdmask = greymaskread;
706 
707 			/*
708 			 * Should really be above, but then boolcopyfns would have
709 			 * to deal with bit alignment, and I haven't written that.
710 			 *
711 			 * This is a common case for things like ellipse drawing.
712 			 * When there's no alpha involved and the mask is boolean,
713 			 * we can avoid all the division and multiplication.
714 			 */
715 			if(mask->chan == GREY1 && !(src->flags&Falpha))
716 				calc = boolcalc[op];
717 			else if(op == SoverD && !(src->flags&Falpha))
718 				calc = alphacalcS;
719 		}
720 	}
721 
722 	/*
723 	 * If the image has a small enough repl rectangle,
724 	 * we can just read each line once and cache them.
725 	 */
726 	if(spar.replcache){
727 		spar.replcall = rdsrc;
728 		rdsrc = replread;
729 	}
730 	if(mpar.replcache){
731 		mpar.replcall = rdmask;
732 		rdmask = replread;
733 	}
734 
735 	if(allocdrawbuf() < 0)
736 		return 0;
737 
738 	/*
739 	 * Before we were saving only offsets from drawbuf in the parameter
740 	 * structures; now that drawbuf has been grown to accomodate us,
741 	 * we can fill in the pointers.
742 	 */
743 	spar.bufbase = drawbuf+spar.bufoff;
744 	mpar.bufbase = drawbuf+mpar.bufoff;
745 	dpar.bufbase = drawbuf+dpar.bufoff;
746 	spar.convbuf = drawbuf+spar.convbufoff;
747 
748 	if(dir == 1){
749 		starty = 0;
750 		endy = dy;
751 	}else{
752 		starty = dy-1;
753 		endy = -1;
754 	}
755 
756 	/*
757 	 * srcy, masky, and dsty are offsets from the top of their
758 	 * respective Rectangles.  they need to be contained within
759 	 * the rectangles, so clipy can keep them there without division.
760  	 */
761 	srcy = (starty + sr.min.y - src->r.min.y)%Dy(src->r);
762 	masky = (starty + mr.min.y - mask->r.min.y)%Dy(mask->r);
763 	dsty = starty + r.min.y - dst->r.min.y;
764 
765 	assert(0 <= srcy && srcy < Dy(src->r));
766 	assert(0 <= masky && masky < Dy(mask->r));
767 	assert(0 <= dsty && dsty < Dy(dst->r));
768 
769 	for(y=starty; y!=endy; y+=dir, srcy+=dir, masky+=dir, dsty+=dir){
770 		clipy(src, &srcy);
771 		clipy(dst, &dsty);
772 		clipy(mask, &masky);
773 
774 		bsrc = rdsrc(&spar, spar.bufbase, srcy);
775 DBG print("[");
776 		bmask = rdmask(&mpar, mpar.bufbase, masky);
777 DBG print("]\n");
778 		bdst = rddst(&dpar, dpar.bufbase, dsty);
779 DBG		dumpbuf("src", bsrc, dx);
780 DBG		dumpbuf("mask", bmask, dx);
781 DBG		dumpbuf("dst", bdst, dx);
782 		bdst = calc(bdst, bsrc, bmask, dx, isgrey, op);
783 		wrdst(&dpar, dpar.bytermin+dsty*dpar.bwidth, bdst);
784 	}
785 
786 	return 1;
787 }
788 #undef DBG
789 
790 static Buffer
alphacalc0(Buffer bdst,Buffer b1,Buffer b2,int dx,int grey,int op)791 alphacalc0(Buffer bdst, Buffer b1, Buffer b2, int dx, int grey, int op)
792 {
793 	USED(grey);
794 	USED(op);
795 	memset(bdst.rgba, 0, dx*bdst.delta);
796 	return bdst;
797 }
798 
799 /*
800  * Do the channels in the buffers match enough
801  * that we can do word-at-a-time operations
802  * on the pixels?
803  */
804 static int
chanmatch(Buffer * bdst,Buffer * bsrc)805 chanmatch(Buffer *bdst, Buffer *bsrc)
806 {
807 	uchar *drgb, *srgb;
808 
809 	/*
810 	 * first, r, g, b must be in the same place
811 	 * in the rgba word.
812 	 */
813 	drgb = (uchar*)bdst->rgba;
814 	srgb = (uchar*)bsrc->rgba;
815 	if(bdst->red - drgb != bsrc->red - srgb
816 	|| bdst->blu - drgb != bsrc->blu - srgb
817 	|| bdst->grn - drgb != bsrc->grn - srgb)
818 		return 0;
819 
820 	/*
821 	 * that implies alpha is in the same place,
822 	 * if it is there at all (it might be == &ones).
823 	 * if the destination is &ones, we can scribble
824 	 * over the rgba slot just fine.
825 	 */
826 	if(bdst->alpha == &ones)
827 		return 1;
828 
829 	/*
830 	 * if the destination is not ones but the src is,
831 	 * then the simultaneous calculation will use
832 	 * bogus bytes from the src's rgba.  no good.
833 	 */
834 	if(bsrc->alpha == &ones)
835 		return 0;
836 
837 	/*
838 	 * otherwise, alphas are in the same place.
839 	 */
840 	return 1;
841 }
842 
843 static Buffer
alphacalc14(Buffer bdst,Buffer bsrc,Buffer bmask,int dx,int grey,int op)844 alphacalc14(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
845 {
846 	Buffer obdst;
847 	int fd, sadelta;
848 	int i, sa, ma, q;
849 	ulong t, t1;
850 
851 	obdst = bdst;
852 	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
853 	q = bsrc.delta == 4 && bdst.delta == 4 && chanmatch(&bdst, &bsrc);
854 
855 	for(i=0; i<dx; i++){
856 		sa = *bsrc.alpha;
857 		ma = *bmask.alpha;
858 		fd = CALC11(sa, ma, t);
859 		if(op == DoutS)
860 			fd = 255-fd;
861 
862 		if(grey){
863 			*bdst.grey = CALC11(fd, *bdst.grey, t);
864 			bsrc.grey += bsrc.delta;
865 			bdst.grey += bdst.delta;
866 		}else{
867 			if(q){
868 				*bdst.rgba = CALC41(fd, *bdst.rgba, t, t1);
869 				bsrc.rgba++;
870 				bdst.rgba++;
871 				bsrc.alpha += sadelta;
872 				bmask.alpha += bmask.delta;
873 				continue;
874 			}
875 			*bdst.red = CALC11(fd, *bdst.red, t);
876 			*bdst.grn = CALC11(fd, *bdst.grn, t);
877 			*bdst.blu = CALC11(fd, *bdst.blu, t);
878 			bsrc.red += bsrc.delta;
879 			bsrc.blu += bsrc.delta;
880 			bsrc.grn += bsrc.delta;
881 			bdst.red += bdst.delta;
882 			bdst.blu += bdst.delta;
883 			bdst.grn += bdst.delta;
884 		}
885 		if(bdst.alpha != &ones){
886 			*bdst.alpha = CALC11(fd, *bdst.alpha, t);
887 			bdst.alpha += bdst.delta;
888 		}
889 		bmask.alpha += bmask.delta;
890 		bsrc.alpha += sadelta;
891 	}
892 	return obdst;
893 }
894 
895 static Buffer
alphacalc2810(Buffer bdst,Buffer bsrc,Buffer bmask,int dx,int grey,int op)896 alphacalc2810(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
897 {
898 	Buffer obdst;
899 	int fs, sadelta;
900 	int i, ma, da, q;
901 	ulong t, t1;
902 
903 	obdst = bdst;
904 	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
905 	q = bsrc.delta == 4 && bdst.delta == 4 && chanmatch(&bdst, &bsrc);
906 
907 	for(i=0; i<dx; i++){
908 		ma = *bmask.alpha;
909 		da = *bdst.alpha;
910 		if(op == SoutD)
911 			da = 255-da;
912 		fs = ma;
913 		if(op != S)
914 			fs = CALC11(fs, da, t);
915 
916 		if(grey){
917 			*bdst.grey = CALC11(fs, *bsrc.grey, t);
918 			bsrc.grey += bsrc.delta;
919 			bdst.grey += bdst.delta;
920 		}else{
921 			if(q){
922 				*bdst.rgba = CALC41(fs, *bsrc.rgba, t, t1);
923 				bsrc.rgba++;
924 				bdst.rgba++;
925 				bmask.alpha += bmask.delta;
926 				bdst.alpha += bdst.delta;
927 				continue;
928 			}
929 			*bdst.red = CALC11(fs, *bsrc.red, t);
930 			*bdst.grn = CALC11(fs, *bsrc.grn, t);
931 			*bdst.blu = CALC11(fs, *bsrc.blu, t);
932 			bsrc.red += bsrc.delta;
933 			bsrc.blu += bsrc.delta;
934 			bsrc.grn += bsrc.delta;
935 			bdst.red += bdst.delta;
936 			bdst.blu += bdst.delta;
937 			bdst.grn += bdst.delta;
938 		}
939 		if(bdst.alpha != &ones){
940 			*bdst.alpha = CALC11(fs, *bsrc.alpha, t);
941 			bdst.alpha += bdst.delta;
942 		}
943 		bmask.alpha += bmask.delta;
944 		bsrc.alpha += sadelta;
945 	}
946 	return obdst;
947 }
948 
949 static Buffer
alphacalc3679(Buffer bdst,Buffer bsrc,Buffer bmask,int dx,int grey,int op)950 alphacalc3679(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
951 {
952 	Buffer obdst;
953 	int fs, fd, sadelta;
954 	int i, sa, ma, da, q;
955 	ulong t, t1;
956 
957 	obdst = bdst;
958 	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
959 	q = bsrc.delta == 4 && bdst.delta == 4 && chanmatch(&bdst, &bsrc);
960 
961 	for(i=0; i<dx; i++){
962 		sa = *bsrc.alpha;
963 		ma = *bmask.alpha;
964 		da = *bdst.alpha;
965 		if(op == SatopD)
966 			fs = CALC11(ma, da, t);
967 		else
968 			fs = CALC11(ma, 255-da, t);
969 		if(op == DoverS)
970 			fd = 255;
971 		else{
972 			fd = CALC11(sa, ma, t);
973 			if(op != DatopS)
974 				fd = 255-fd;
975 		}
976 
977 		if(grey){
978 			*bdst.grey = CALC12(fs, *bsrc.grey, fd, *bdst.grey, t);
979 			bsrc.grey += bsrc.delta;
980 			bdst.grey += bdst.delta;
981 		}else{
982 			if(q){
983 				*bdst.rgba = CALC42(fs, *bsrc.rgba, fd, *bdst.rgba, t, t1);
984 				bsrc.rgba++;
985 				bdst.rgba++;
986 				bsrc.alpha += sadelta;
987 				bmask.alpha += bmask.delta;
988 				bdst.alpha += bdst.delta;
989 				continue;
990 			}
991 			*bdst.red = CALC12(fs, *bsrc.red, fd, *bdst.red, t);
992 			*bdst.grn = CALC12(fs, *bsrc.grn, fd, *bdst.grn, t);
993 			*bdst.blu = CALC12(fs, *bsrc.blu, fd, *bdst.blu, t);
994 			bsrc.red += bsrc.delta;
995 			bsrc.blu += bsrc.delta;
996 			bsrc.grn += bsrc.delta;
997 			bdst.red += bdst.delta;
998 			bdst.blu += bdst.delta;
999 			bdst.grn += bdst.delta;
1000 		}
1001 		if(bdst.alpha != &ones){
1002 			*bdst.alpha = CALC12(fs, sa, fd, da, t);
1003 			bdst.alpha += bdst.delta;
1004 		}
1005 		bmask.alpha += bmask.delta;
1006 		bsrc.alpha += sadelta;
1007 	}
1008 	return obdst;
1009 }
1010 
1011 static Buffer
alphacalc5(Buffer bdst,Buffer b1,Buffer b2,int dx,int grey,int op)1012 alphacalc5(Buffer bdst, Buffer b1, Buffer b2, int dx, int grey, int op)
1013 {
1014 	USED(dx);
1015 	USED(grey);
1016 	USED(op);
1017 	return bdst;
1018 }
1019 
1020 static Buffer
alphacalc11(Buffer bdst,Buffer bsrc,Buffer bmask,int dx,int grey,int op)1021 alphacalc11(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
1022 {
1023 	Buffer obdst;
1024 	int fd, sadelta;
1025 	int i, sa, ma, q;
1026 	ulong t, t1;
1027 
1028 	USED(op);
1029 	obdst = bdst;
1030 	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
1031 	q = bsrc.delta == 4 && bdst.delta == 4 && chanmatch(&bdst, &bsrc);
1032 
1033 	for(i=0; i<dx; i++){
1034 		sa = *bsrc.alpha;
1035 		ma = *bmask.alpha;
1036 		fd = 255-CALC11(sa, ma, t);
1037 
1038 		if(grey){
1039 			*bdst.grey = CALC12(ma, *bsrc.grey, fd, *bdst.grey, t);
1040 			bsrc.grey += bsrc.delta;
1041 			bdst.grey += bdst.delta;
1042 		}else{
1043 			if(q){
1044 				*bdst.rgba = CALC42(ma, *bsrc.rgba, fd, *bdst.rgba, t, t1);
1045 				bsrc.rgba++;
1046 				bdst.rgba++;
1047 				bsrc.alpha += sadelta;
1048 				bmask.alpha += bmask.delta;
1049 				continue;
1050 			}
1051 			*bdst.red = CALC12(ma, *bsrc.red, fd, *bdst.red, t);
1052 			*bdst.grn = CALC12(ma, *bsrc.grn, fd, *bdst.grn, t);
1053 			*bdst.blu = CALC12(ma, *bsrc.blu, fd, *bdst.blu, t);
1054 			bsrc.red += bsrc.delta;
1055 			bsrc.blu += bsrc.delta;
1056 			bsrc.grn += bsrc.delta;
1057 			bdst.red += bdst.delta;
1058 			bdst.blu += bdst.delta;
1059 			bdst.grn += bdst.delta;
1060 		}
1061 		if(bdst.alpha != &ones){
1062 			*bdst.alpha = CALC12(ma, sa, fd, *bdst.alpha, t);
1063 			bdst.alpha += bdst.delta;
1064 		}
1065 		bmask.alpha += bmask.delta;
1066 		bsrc.alpha += sadelta;
1067 	}
1068 	return obdst;
1069 }
1070 
1071 /*
1072 not used yet
1073 source and mask alpha 1
1074 static Buffer
1075 alphacalcS0(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
1076 {
1077 	Buffer obdst;
1078 	int i;
1079 
1080 	USED(op);
1081 	obdst = bdst;
1082 	if(bsrc.delta == bdst.delta){
1083 		memmove(bdst.rgba, bsrc.rgba, dx*bdst.delta);
1084 		return obdst;
1085 	}
1086 	for(i=0; i<dx; i++){
1087 		if(grey){
1088 			*bdst.grey = *bsrc.grey;
1089 			bsrc.grey += bsrc.delta;
1090 			bdst.grey += bdst.delta;
1091 		}else{
1092 			*bdst.red = *bsrc.red;
1093 			*bdst.grn = *bsrc.grn;
1094 			*bdst.blu = *bsrc.blu;
1095 			bsrc.red += bsrc.delta;
1096 			bsrc.blu += bsrc.delta;
1097 			bsrc.grn += bsrc.delta;
1098 			bdst.red += bdst.delta;
1099 			bdst.blu += bdst.delta;
1100 			bdst.grn += bdst.delta;
1101 		}
1102 		if(bdst.alpha != &ones){
1103 			*bdst.alpha = 255;
1104 			bdst.alpha += bdst.delta;
1105 		}
1106 	}
1107 	return obdst;
1108 }
1109 */
1110 
1111 /* source alpha 1 */
1112 static Buffer
alphacalcS(Buffer bdst,Buffer bsrc,Buffer bmask,int dx,int grey,int op)1113 alphacalcS(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
1114 {
1115 	Buffer obdst;
1116 	int fd;
1117 	int i, ma;
1118 	ulong t;
1119 
1120 	USED(op);
1121 	obdst = bdst;
1122 
1123 	for(i=0; i<dx; i++){
1124 		ma = *bmask.alpha;
1125 		fd = 255-ma;
1126 
1127 		if(grey){
1128 			*bdst.grey = CALC12(ma, *bsrc.grey, fd, *bdst.grey, t);
1129 			bsrc.grey += bsrc.delta;
1130 			bdst.grey += bdst.delta;
1131 		}else{
1132 			*bdst.red = CALC12(ma, *bsrc.red, fd, *bdst.red, t);
1133 			*bdst.grn = CALC12(ma, *bsrc.grn, fd, *bdst.grn, t);
1134 			*bdst.blu = CALC12(ma, *bsrc.blu, fd, *bdst.blu, t);
1135 			bsrc.red += bsrc.delta;
1136 			bsrc.blu += bsrc.delta;
1137 			bsrc.grn += bsrc.delta;
1138 			bdst.red += bdst.delta;
1139 			bdst.blu += bdst.delta;
1140 			bdst.grn += bdst.delta;
1141 		}
1142 		if(bdst.alpha != &ones){
1143 			*bdst.alpha = ma+CALC11(fd, *bdst.alpha, t);
1144 			bdst.alpha += bdst.delta;
1145 		}
1146 		bmask.alpha += bmask.delta;
1147 	}
1148 	return obdst;
1149 }
1150 
1151 static Buffer
boolcalc14(Buffer bdst,Buffer b1,Buffer bmask,int dx,int grey,int op)1152 boolcalc14(Buffer bdst, Buffer b1, Buffer bmask, int dx, int grey, int op)
1153 {
1154 	Buffer obdst;
1155 	int i, ma, zero;
1156 
1157 	obdst = bdst;
1158 
1159 	for(i=0; i<dx; i++){
1160 		ma = *bmask.alpha;
1161 		zero = ma ? op == DoutS : op == DinS;
1162 
1163 		if(grey){
1164 			if(zero)
1165 				*bdst.grey = 0;
1166 			bdst.grey += bdst.delta;
1167 		}else{
1168 			if(zero)
1169 				*bdst.red = *bdst.grn = *bdst.blu = 0;
1170 			bdst.red += bdst.delta;
1171 			bdst.blu += bdst.delta;
1172 			bdst.grn += bdst.delta;
1173 		}
1174 		bmask.alpha += bmask.delta;
1175 		if(bdst.alpha != &ones){
1176 			if(zero)
1177 				*bdst.alpha = 0;
1178 			bdst.alpha += bdst.delta;
1179 		}
1180 	}
1181 	return obdst;
1182 }
1183 
1184 static Buffer
boolcalc236789(Buffer bdst,Buffer bsrc,Buffer bmask,int dx,int grey,int op)1185 boolcalc236789(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
1186 {
1187 	Buffer obdst;
1188 	int fs, fd;
1189 	int i, ma, da, zero;
1190 	ulong t;
1191 
1192 	obdst = bdst;
1193 	zero = !(op&1);
1194 
1195 	for(i=0; i<dx; i++){
1196 		ma = *bmask.alpha;
1197 		da = *bdst.alpha;
1198 		fs = da;
1199 		if(op&2)
1200 			fs = 255-da;
1201 		fd = 0;
1202 		if(op&4)
1203 			fd = 255;
1204 
1205 		if(grey){
1206 			if(ma)
1207 				*bdst.grey = CALC12(fs, *bsrc.grey, fd, *bdst.grey, t);
1208 			else if(zero)
1209 				*bdst.grey = 0;
1210 			bsrc.grey += bsrc.delta;
1211 			bdst.grey += bdst.delta;
1212 		}else{
1213 			if(ma){
1214 				*bdst.red = CALC12(fs, *bsrc.red, fd, *bdst.red, t);
1215 				*bdst.grn = CALC12(fs, *bsrc.grn, fd, *bdst.grn, t);
1216 				*bdst.blu = CALC12(fs, *bsrc.blu, fd, *bdst.blu, t);
1217 			}
1218 			else if(zero)
1219 				*bdst.red = *bdst.grn = *bdst.blu = 0;
1220 			bsrc.red += bsrc.delta;
1221 			bsrc.blu += bsrc.delta;
1222 			bsrc.grn += bsrc.delta;
1223 			bdst.red += bdst.delta;
1224 			bdst.blu += bdst.delta;
1225 			bdst.grn += bdst.delta;
1226 		}
1227 		bmask.alpha += bmask.delta;
1228 		if(bdst.alpha != &ones){
1229 			if(ma)
1230 				*bdst.alpha = fs+CALC11(fd, da, t);
1231 			else if(zero)
1232 				*bdst.alpha = 0;
1233 			bdst.alpha += bdst.delta;
1234 		}
1235 	}
1236 	return obdst;
1237 }
1238 
1239 static Buffer
boolcalc1011(Buffer bdst,Buffer bsrc,Buffer bmask,int dx,int grey,int op)1240 boolcalc1011(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
1241 {
1242 	Buffer obdst;
1243 	int i, ma, zero;
1244 
1245 	obdst = bdst;
1246 	zero = !(op&1);
1247 
1248 	for(i=0; i<dx; i++){
1249 		ma = *bmask.alpha;
1250 
1251 		if(grey){
1252 			if(ma)
1253 				*bdst.grey = *bsrc.grey;
1254 			else if(zero)
1255 				*bdst.grey = 0;
1256 			bsrc.grey += bsrc.delta;
1257 			bdst.grey += bdst.delta;
1258 		}else{
1259 			if(ma){
1260 				*bdst.red = *bsrc.red;
1261 				*bdst.grn = *bsrc.grn;
1262 				*bdst.blu = *bsrc.blu;
1263 			}
1264 			else if(zero)
1265 				*bdst.red = *bdst.grn = *bdst.blu = 0;
1266 			bsrc.red += bsrc.delta;
1267 			bsrc.blu += bsrc.delta;
1268 			bsrc.grn += bsrc.delta;
1269 			bdst.red += bdst.delta;
1270 			bdst.blu += bdst.delta;
1271 			bdst.grn += bdst.delta;
1272 		}
1273 		bmask.alpha += bmask.delta;
1274 		if(bdst.alpha != &ones){
1275 			if(ma)
1276 				*bdst.alpha = 255;
1277 			else if(zero)
1278 				*bdst.alpha = 0;
1279 			bdst.alpha += bdst.delta;
1280 		}
1281 	}
1282 	return obdst;
1283 }
1284 /*
1285  * Replicated cached scan line read.  Call the function listed in the Param,
1286  * but cache the result so that for replicated images we only do the work once.
1287  */
1288 static Buffer
replread(Param * p,uchar * s,int y)1289 replread(Param *p, uchar *s, int y)
1290 {
1291 	Buffer *b;
1292 
1293 	USED(s);
1294 	b = &p->bcache[y];
1295 	if((p->bfilled & (1<<y)) == 0){
1296 		p->bfilled |= 1<<y;
1297 		*b = p->replcall(p, p->bufbase+y*p->bufdelta, y);
1298 	}
1299 	return *b;
1300 }
1301 
1302 /*
1303  * Alpha reading function that simply relabels the grey pointer.
1304  */
1305 static Buffer
greymaskread(Param * p,uchar * buf,int y)1306 greymaskread(Param *p, uchar *buf, int y)
1307 {
1308 	Buffer b;
1309 
1310 	b = p->greymaskcall(p, buf, y);
1311 	b.alpha = b.grey;
1312 	return b;
1313 }
1314 
1315 #define DBG if(0)
1316 static Buffer
readnbit(Param * p,uchar * buf,int y)1317 readnbit(Param *p, uchar *buf, int y)
1318 {
1319 	Buffer b;
1320 	Memimage *img;
1321 	uchar *repl, *r, *w, *ow, bits;
1322 	int i, n, sh, depth, x, dx, npack, nbits;
1323 
1324 	b.rgba = (ulong*)buf;
1325 	b.grey = w = buf;
1326 	b.red = b.blu = b.grn = w;
1327 	b.alpha = &ones;
1328 	b.delta = 1;
1329 
1330 	dx = p->dx;
1331 	img = p->img;
1332 	depth = img->depth;
1333 	repl = &replbit[depth][0];
1334 	npack = 8/depth;
1335 	sh = 8-depth;
1336 
1337 	/* copy from p->r.min.x until end of repl rectangle */
1338 	x = p->r.min.x;
1339 	n = dx;
1340 	if(n > p->img->r.max.x - x)
1341 		n = p->img->r.max.x - x;
1342 
1343 	r = p->bytermin + y*p->bwidth;
1344 DBG print("readnbit dx %d %p=%p+%d*%d, *r=%d fetch %d ", dx, r, p->bytermin, y, p->bwidth, *r, n);
1345 	bits = *r++;
1346 	nbits = 8;
1347 	if((i=x&(npack-1))){
1348 DBG print("throwaway %d...", i);
1349 		bits <<= depth*i;
1350 		nbits -= depth*i;
1351 	}
1352 	for(i=0; i<n; i++){
1353 		if(nbits == 0){
1354 DBG print("(%.2ux)...", *r);
1355 			bits = *r++;
1356 			nbits = 8;
1357 		}
1358 		*w++ = repl[bits>>sh];
1359 DBG print("bit %x...", repl[bits>>sh]);
1360 		bits <<= depth;
1361 		nbits -= depth;
1362 	}
1363 	dx -= n;
1364 	if(dx == 0)
1365 		return b;
1366 
1367 	assert(x+i == p->img->r.max.x);
1368 
1369 	/* copy from beginning of repl rectangle until where we were before. */
1370 	x = p->img->r.min.x;
1371 	n = dx;
1372 	if(n > p->r.min.x - x)
1373 		n = p->r.min.x - x;
1374 
1375 	r = p->bytey0s + y*p->bwidth;
1376 DBG print("x=%d r=%p...", x, r);
1377 	bits = *r++;
1378 	nbits = 8;
1379 	if((i=x&(npack-1))){
1380 		bits <<= depth*i;
1381 		nbits -= depth*i;
1382 	}
1383 DBG print("nbits=%d...", nbits);
1384 	for(i=0; i<n; i++){
1385 		if(nbits == 0){
1386 			bits = *r++;
1387 			nbits = 8;
1388 		}
1389 		*w++ = repl[bits>>sh];
1390 DBG print("bit %x...", repl[bits>>sh]);
1391 		bits <<= depth;
1392 		nbits -= depth;
1393 DBG print("bits %x nbits %d...", bits, nbits);
1394 	}
1395 	dx -= n;
1396 	if(dx == 0)
1397 		return b;
1398 
1399 	assert(dx > 0);
1400 	/* now we have exactly one full scan line: just replicate the buffer itself until we are done */
1401 	ow = buf;
1402 	while(dx--)
1403 		*w++ = *ow++;
1404 
1405 	return b;
1406 }
1407 #undef DBG
1408 
1409 #define DBG if(0)
1410 static void
writenbit(Param * p,uchar * w,Buffer src)1411 writenbit(Param *p, uchar *w, Buffer src)
1412 {
1413 	uchar *r;
1414 	ulong bits;
1415 	int i, sh, depth, npack, nbits, x, ex;
1416 
1417 	assert(src.grey != nil && src.delta == 1);
1418 
1419 	x = p->r.min.x;
1420 	ex = x+p->dx;
1421 	depth = p->img->depth;
1422 	npack = 8/depth;
1423 
1424 	i=x&(npack-1);
1425 	bits = i ? (*w >> (8-depth*i)) : 0;
1426 	nbits = depth*i;
1427 	sh = 8-depth;
1428 	r = src.grey;
1429 
1430 	for(; x<ex; x++){
1431 		bits <<= depth;
1432 DBG print(" %x", *r);
1433 		bits |= (*r++ >> sh);
1434 		nbits += depth;
1435 		if(nbits == 8){
1436 			*w++ = bits;
1437 			nbits = 0;
1438 		}
1439 	}
1440 
1441 	if(nbits){
1442 		sh = 8-nbits;
1443 		bits <<= sh;
1444 		bits |= *w & ((1<<sh)-1);
1445 		*w = bits;
1446 	}
1447 DBG print("\n");
1448 	return;
1449 }
1450 #undef DBG
1451 
1452 static Buffer
readcmap(Param * p,uchar * buf,int y)1453 readcmap(Param *p, uchar *buf, int y)
1454 {
1455 	Buffer b;
1456 	int a, convgrey, copyalpha, dx, i, m;
1457 	uchar *q, *cmap, *begin, *end, *r, *w;
1458 
1459 	begin = p->bytey0s + y*p->bwidth;
1460 	r = p->bytermin + y*p->bwidth;
1461 	end = p->bytey0e + y*p->bwidth;
1462 	cmap = p->img->cmap->cmap2rgb;
1463 	convgrey = p->convgrey;
1464 	copyalpha = (p->img->flags&Falpha) ? 1 : 0;
1465 
1466 	w = buf;
1467 	dx = p->dx;
1468 	if(copyalpha){
1469 		b.alpha = buf++;
1470 		a = p->img->shift[CAlpha]/8;
1471 		m = p->img->shift[CMap]/8;
1472 		for(i=0; i<dx; i++){
1473 			*w++ = r[a];
1474 			q = cmap+r[m]*3;
1475 			r += 2;
1476 			if(r == end)
1477 				r = begin;
1478 			if(convgrey){
1479 				*w++ = RGB2K(q[0], q[1], q[2]);
1480 			}else{
1481 				*w++ = q[2];	/* blue */
1482 				*w++ = q[1];	/* green */
1483 				*w++ = q[0];	/* red */
1484 			}
1485 		}
1486 	}else{
1487 		b.alpha = &ones;
1488 		for(i=0; i<dx; i++){
1489 			q = cmap+*r++*3;
1490 			if(r == end)
1491 				r = begin;
1492 			if(convgrey){
1493 				*w++ = RGB2K(q[0], q[1], q[2]);
1494 			}else{
1495 				*w++ = q[2];	/* blue */
1496 				*w++ = q[1];	/* green */
1497 				*w++ = q[0];	/* red */
1498 			}
1499 		}
1500 	}
1501 
1502 	b.rgba = (ulong*)(buf-copyalpha);
1503 
1504 	if(convgrey){
1505 		b.grey = buf;
1506 		b.red = b.blu = b.grn = buf;
1507 		b.delta = 1+copyalpha;
1508 	}else{
1509 		b.blu = buf;
1510 		b.grn = buf+1;
1511 		b.red = buf+2;
1512 		b.grey = nil;
1513 		b.delta = 3+copyalpha;
1514 	}
1515 	return b;
1516 }
1517 
1518 static void
writecmap(Param * p,uchar * w,Buffer src)1519 writecmap(Param *p, uchar *w, Buffer src)
1520 {
1521 	uchar *cmap, *red, *grn, *blu;
1522 	int i, dx, delta;
1523 
1524 	cmap = p->img->cmap->rgb2cmap;
1525 
1526 	delta = src.delta;
1527 	red= src.red;
1528 	grn = src.grn;
1529 	blu = src.blu;
1530 
1531 	dx = p->dx;
1532 	for(i=0; i<dx; i++, red+=delta, grn+=delta, blu+=delta)
1533 		*w++ = cmap[(*red>>4)*256+(*grn>>4)*16+(*blu>>4)];
1534 }
1535 
1536 #define DBG if(0)
1537 static Buffer
readbyte(Param * p,uchar * buf,int y)1538 readbyte(Param *p, uchar *buf, int y)
1539 {
1540 	Buffer b;
1541 	Memimage *img;
1542 	int dx, isgrey, convgrey, alphaonly, copyalpha, i, nb;
1543 	uchar *begin, *end, *r, *w, *rrepl, *grepl, *brepl, *arepl, *krepl;
1544 	uchar ured, ugrn, ublu;
1545 	ulong u;
1546 
1547 	img = p->img;
1548 	begin = p->bytey0s + y*p->bwidth;
1549 	r = p->bytermin + y*p->bwidth;
1550 	end = p->bytey0e + y*p->bwidth;
1551 
1552 	w = buf;
1553 	dx = p->dx;
1554 	nb = img->depth/8;
1555 
1556 	convgrey = p->convgrey;	/* convert rgb to grey */
1557 	isgrey = img->flags&Fgrey;
1558 	alphaonly = p->alphaonly;
1559 	copyalpha = (img->flags&Falpha) ? 1 : 0;
1560 
1561 DBG print("copyalpha %d alphaonly %d convgrey %d isgrey %d\n", copyalpha, alphaonly, convgrey, isgrey);
1562 	/* if we can, avoid processing everything */
1563 	if(!(img->flags&Frepl) && !convgrey && (img->flags&Fbytes)){
1564 		memset(&b, 0, sizeof b);
1565 		if(p->needbuf){
1566 			memmove(buf, r, dx*nb);
1567 			r = buf;
1568 		}
1569 		b.rgba = (ulong*)r;
1570 		if(copyalpha)
1571 			b.alpha = r+img->shift[CAlpha]/8;
1572 		else
1573 			b.alpha = &ones;
1574 		if(isgrey){
1575 			b.grey = r+img->shift[CGrey]/8;
1576 			b.red = b.grn = b.blu = b.grey;
1577 		}else{
1578 			b.red = r+img->shift[CRed]/8;
1579 			b.grn = r+img->shift[CGreen]/8;
1580 			b.blu = r+img->shift[CBlue]/8;
1581 		}
1582 		b.delta = nb;
1583 		return b;
1584 	}
1585 
1586 DBG print("2\n");
1587 	rrepl = replbit[img->nbits[CRed]];
1588 	grepl = replbit[img->nbits[CGreen]];
1589 	brepl = replbit[img->nbits[CBlue]];
1590 	arepl = replbit[img->nbits[CAlpha]];
1591 	krepl = replbit[img->nbits[CGrey]];
1592 
1593 	for(i=0; i<dx; i++){
1594 		u = r[0] | (r[1]<<8) | (r[2]<<16) | (r[3]<<24);
1595 		if(copyalpha) {
1596 			*w++ = arepl[(u>>img->shift[CAlpha]) & img->mask[CAlpha]];
1597 DBG print("a %x\n", w[-1]);
1598 		}
1599 
1600 		if(isgrey)
1601 			*w++ = krepl[(u >> img->shift[CGrey]) & img->mask[CGrey]];
1602 		else if(!alphaonly){
1603 			ured = rrepl[(u >> img->shift[CRed]) & img->mask[CRed]];
1604 			ugrn = grepl[(u >> img->shift[CGreen]) & img->mask[CGreen]];
1605 			ublu = brepl[(u >> img->shift[CBlue]) & img->mask[CBlue]];
1606 			if(convgrey){
1607 DBG print("g %x %x %x\n", ured, ugrn, ublu);
1608 				*w++ = RGB2K(ured, ugrn, ublu);
1609 DBG print("%x\n", w[-1]);
1610 			}else{
1611 				*w++ = brepl[(u >> img->shift[CBlue]) & img->mask[CBlue]];
1612 				*w++ = grepl[(u >> img->shift[CGreen]) & img->mask[CGreen]];
1613 				*w++ = rrepl[(u >> img->shift[CRed]) & img->mask[CRed]];
1614 			}
1615 		}
1616 		r += nb;
1617 		if(r == end)
1618 			r = begin;
1619 	}
1620 
1621 	b.alpha = copyalpha ? buf : &ones;
1622 	b.rgba = (ulong*)buf;
1623 	if(alphaonly){
1624 		b.red = b.grn = b.blu = b.grey = nil;
1625 		if(!copyalpha)
1626 			b.rgba = nil;
1627 		b.delta = 1;
1628 	}else if(isgrey || convgrey){
1629 		b.grey = buf+copyalpha;
1630 		b.red = b.grn = b.blu = buf+copyalpha;
1631 		b.delta = copyalpha+1;
1632 DBG print("alpha %x grey %x\n", b.alpha ? *b.alpha : 0xFF, *b.grey);
1633 	}else{
1634 		b.blu = buf+copyalpha;
1635 		b.grn = buf+copyalpha+1;
1636 		b.grey = nil;
1637 		b.red = buf+copyalpha+2;
1638 		b.delta = copyalpha+3;
1639 	}
1640 	return b;
1641 }
1642 #undef DBG
1643 
1644 #define DBG if(0)
1645 static void
writebyte(Param * p,uchar * w,Buffer src)1646 writebyte(Param *p, uchar *w, Buffer src)
1647 {
1648 	Memimage *img;
1649 	int i, isalpha, isgrey, nb, delta, dx, adelta;
1650 	uchar ff, *red, *grn, *blu, *grey, *alpha;
1651 	ulong u, mask;
1652 
1653 	img = p->img;
1654 
1655 	red = src.red;
1656 	grn = src.grn;
1657 	blu = src.blu;
1658 	alpha = src.alpha;
1659 	delta = src.delta;
1660 	grey = src.grey;
1661 	dx = p->dx;
1662 
1663 	nb = img->depth/8;
1664 	mask = (nb==4) ? 0 : ~((1<<img->depth)-1);
1665 
1666 	isalpha = img->flags&Falpha;
1667 	isgrey = img->flags&Fgrey;
1668 	adelta = src.delta;
1669 
1670 	if(isalpha && (alpha == nil || alpha == &ones)){
1671 		ff = 0xFF;
1672 		alpha = &ff;
1673 		adelta = 0;
1674 	}
1675 
1676 	for(i=0; i<dx; i++){
1677 		u = w[0] | (w[1]<<8) | (w[2]<<16) | (w[3]<<24);
1678 DBG print("u %.8lux...", u);
1679 		u &= mask;
1680 DBG print("&mask %.8lux...", u);
1681 		if(isgrey){
1682 			u |= ((*grey >> (8-img->nbits[CGrey])) & img->mask[CGrey]) << img->shift[CGrey];
1683 DBG print("|grey %.8lux...", u);
1684 			grey += delta;
1685 		}else{
1686 			u |= ((*red >> (8-img->nbits[CRed])) & img->mask[CRed]) << img->shift[CRed];
1687 			u |= ((*grn >> (8-img->nbits[CGreen])) & img->mask[CGreen]) << img->shift[CGreen];
1688 			u |= ((*blu >> (8-img->nbits[CBlue])) & img->mask[CBlue]) << img->shift[CBlue];
1689 			red += delta;
1690 			grn += delta;
1691 			blu += delta;
1692 DBG print("|rgb %.8lux...", u);
1693 		}
1694 
1695 		if(isalpha){
1696 			u |= ((*alpha >> (8-img->nbits[CAlpha])) & img->mask[CAlpha]) << img->shift[CAlpha];
1697 			alpha += adelta;
1698 DBG print("|alpha %.8lux...", u);
1699 		}
1700 
1701 		w[0] = u;
1702 		w[1] = u>>8;
1703 		w[2] = u>>16;
1704 		w[3] = u>>24;
1705 		w += nb;
1706 	}
1707 }
1708 #undef DBG
1709 
1710 static Readfn*
readfn(Memimage * img)1711 readfn(Memimage *img)
1712 {
1713 	if(img->depth < 8)
1714 		return readnbit;
1715 	if(img->nbits[CMap] == 8)
1716 		return readcmap;
1717 	return readbyte;
1718 }
1719 
1720 static Readfn*
readalphafn(Memimage * m)1721 readalphafn(Memimage *m)
1722 {
1723 	USED(m);
1724 	return readbyte;
1725 }
1726 
1727 static Writefn*
writefn(Memimage * img)1728 writefn(Memimage *img)
1729 {
1730 	if(img->depth < 8)
1731 		return writenbit;
1732 	if(img->chan == CMAP8)
1733 		return writecmap;
1734 	return writebyte;
1735 }
1736 
1737 static void
nullwrite(Param * p,uchar * s,Buffer b)1738 nullwrite(Param *p, uchar *s, Buffer b)
1739 {
1740 	USED(p);
1741 	USED(s);
1742 }
1743 
1744 static Buffer
readptr(Param * p,uchar * s,int y)1745 readptr(Param *p, uchar *s, int y)
1746 {
1747 	Buffer b;
1748 	uchar *q;
1749 
1750 	USED(s);
1751 	q = p->bytermin + y*p->bwidth;
1752 	b.red = q;	/* ptr to data */
1753 	b.grn = b.blu = b.grey = b.alpha = nil;
1754 	b.rgba = (ulong*)q;
1755 	b.delta = p->img->depth/8;
1756 	return b;
1757 }
1758 
1759 static Buffer
boolmemmove(Buffer bdst,Buffer bsrc,Buffer b1,int dx,int i,int o)1760 boolmemmove(Buffer bdst, Buffer bsrc, Buffer b1, int dx, int i, int o)
1761 {
1762 	USED(i);
1763 	USED(o);
1764 	memmove(bdst.red, bsrc.red, dx*bdst.delta);
1765 	return bdst;
1766 }
1767 
1768 static Buffer
boolcopy8(Buffer bdst,Buffer bsrc,Buffer bmask,int dx,int i,int o)1769 boolcopy8(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
1770 {
1771 	uchar *m, *r, *w, *ew;
1772 
1773 	USED(i);
1774 	USED(o);
1775 	m = bmask.grey;
1776 	w = bdst.red;
1777 	r = bsrc.red;
1778 	ew = w+dx;
1779 	for(; w < ew; w++,r++)
1780 		if(*m++)
1781 			*w = *r;
1782 	return bdst;	/* not used */
1783 }
1784 
1785 static Buffer
boolcopy16(Buffer bdst,Buffer bsrc,Buffer bmask,int dx,int i,int o)1786 boolcopy16(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
1787 {
1788 	uchar *m;
1789 	ushort *r, *w, *ew;
1790 
1791 	USED(i);
1792 	USED(o);
1793 	m = bmask.grey;
1794 	w = (ushort*)bdst.red;
1795 	r = (ushort*)bsrc.red;
1796 	ew = w+dx;
1797 	for(; w < ew; w++,r++)
1798 		if(*m++)
1799 			*w = *r;
1800 	return bdst;	/* not used */
1801 }
1802 
1803 static Buffer
boolcopy24(Buffer bdst,Buffer bsrc,Buffer bmask,int dx,int i,int o)1804 boolcopy24(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
1805 {
1806 	uchar *m;
1807 	uchar *r, *w, *ew;
1808 
1809 	USED(i);
1810 	USED(o);
1811 	m = bmask.grey;
1812 	w = bdst.red;
1813 	r = bsrc.red;
1814 	ew = w+dx*3;
1815 	while(w < ew){
1816 		if(*m++){
1817 			*w++ = *r++;
1818 			*w++ = *r++;
1819 			*w++ = *r++;
1820 		}else{
1821 			w += 3;
1822 			r += 3;
1823 		}
1824 	}
1825 	return bdst;	/* not used */
1826 }
1827 
1828 static Buffer
boolcopy32(Buffer bdst,Buffer bsrc,Buffer bmask,int dx,int i,int o)1829 boolcopy32(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
1830 {
1831 	uchar *m;
1832 	ulong *r, *w, *ew;
1833 
1834 	USED(i);
1835 	USED(o);
1836 	m = bmask.grey;
1837 	w = (ulong*)bdst.red;
1838 	r = (ulong*)bsrc.red;
1839 	ew = w+dx;
1840 	for(; w < ew; w++,r++)
1841 		if(*m++)
1842 			*w = *r;
1843 	return bdst;	/* not used */
1844 }
1845 
1846 static Buffer
genconv(Param * p,uchar * buf,int y)1847 genconv(Param *p, uchar *buf, int y)
1848 {
1849 	Buffer b;
1850 	int nb;
1851 	uchar *r, *w, *ew;
1852 
1853 	/* read from source into RGB format in convbuf */
1854 	b = p->convreadcall(p, p->convbuf, y);
1855 
1856 	/* write RGB format into dst format in buf */
1857 	p->convwritecall(p->convdpar, buf, b);
1858 
1859 	if(p->convdx){
1860 		nb = p->convdpar->img->depth/8;
1861 		r = buf;
1862 		w = buf+nb*p->dx;
1863 		ew = buf+nb*p->convdx;
1864 		while(w<ew)
1865 			*w++ = *r++;
1866 	}
1867 
1868 	b.red = buf;
1869 	b.blu = b.grn = b.grey = b.alpha = nil;
1870 	b.rgba = (ulong*)buf;
1871 	b.delta = 0;
1872 
1873 	return b;
1874 }
1875 
1876 static Readfn*
convfn(Memimage * dst,Param * dpar,Memimage * src,Param * spar)1877 convfn(Memimage *dst, Param *dpar, Memimage *src, Param *spar)
1878 {
1879 	if(dst->chan == src->chan && !(src->flags&Frepl)){
1880 //if(drawdebug) iprint("readptr...");
1881 		return readptr;
1882 	}
1883 
1884 	if(dst->chan==CMAP8 && (src->chan==GREY1||src->chan==GREY2||src->chan==GREY4)){
1885 		/* cheat because we know the replicated value is exactly the color map entry. */
1886 //if(drawdebug) iprint("Readnbit...");
1887 		return readnbit;
1888 	}
1889 
1890 	spar->convreadcall = readfn(src);
1891 	spar->convwritecall = writefn(dst);
1892 	spar->convdpar = dpar;
1893 
1894 	/* allocate a conversion buffer */
1895 	spar->convbufoff = ndrawbuf;
1896 	ndrawbuf += spar->dx*4;
1897 
1898 	if(spar->dx > Dx(spar->img->r)){
1899 		spar->convdx = spar->dx;
1900 		spar->dx = Dx(spar->img->r);
1901 	}
1902 
1903 //if(drawdebug) iprint("genconv...");
1904 	return genconv;
1905 }
1906 
1907 ulong
_pixelbits(Memimage * i,Point pt)1908 _pixelbits(Memimage *i, Point pt)
1909 {
1910 	uchar *p;
1911 	ulong val;
1912 	int off, bpp, npack;
1913 
1914 	val = 0;
1915 	p = byteaddr(i, pt);
1916 	switch(bpp=i->depth){
1917 	case 1:
1918 	case 2:
1919 	case 4:
1920 		npack = 8/bpp;
1921 		off = pt.x%npack;
1922 		val = p[0] >> bpp*(npack-1-off);
1923 		val &= (1<<bpp)-1;
1924 		break;
1925 	case 8:
1926 		val = p[0];
1927 		break;
1928 	case 16:
1929 		val = p[0]|(p[1]<<8);
1930 		break;
1931 	case 24:
1932 		val = p[0]|(p[1]<<8)|(p[2]<<16);
1933 		break;
1934 	case 32:
1935 		val = p[0]|(p[1]<<8)|(p[2]<<16)|(p[3]<<24);
1936 		break;
1937 	}
1938 	while(bpp<32){
1939 		val |= val<<bpp;
1940 		bpp *= 2;
1941 	}
1942 	return val;
1943 }
1944 
1945 static Calcfn*
boolcopyfn(Memimage * img,Memimage * mask)1946 boolcopyfn(Memimage *img, Memimage *mask)
1947 {
1948 	if(mask->flags&Frepl && Dx(mask->r)==1 && Dy(mask->r)==1 && pixelbits(mask, mask->r.min)==~0)
1949 		return boolmemmove;
1950 
1951 	switch(img->depth){
1952 	case 8:
1953 		return boolcopy8;
1954 	case 16:
1955 		return boolcopy16;
1956 	case 24:
1957 		return boolcopy24;
1958 	case 32:
1959 		return boolcopy32;
1960 	default:
1961 		assert(0 /* boolcopyfn */);
1962 	}
1963 	return nil;
1964 }
1965 
1966 /*
1967  * Optimized draw for filling and scrolling; uses memset and memmove.
1968  *
1969 static void
1970 memsetb(void *vp, uchar val, int n)
1971 {
1972 	uchar *p, *ep;
1973 
1974 	p = vp;
1975 	ep = p+n;
1976 	while(p<ep)
1977 		*p++ = val;
1978 }
1979 */
1980 
1981 static void
memsets(void * vp,ushort val,int n)1982 memsets(void *vp, ushort val, int n)
1983 {
1984 	ushort *p, *ep;
1985 
1986 	p = vp;
1987 	ep = p+n;
1988 	while(p<ep)
1989 		*p++ = val;
1990 }
1991 
1992 static void
memsetl(void * vp,ulong val,int n)1993 memsetl(void *vp, ulong val, int n)
1994 {
1995 	ulong *p, *ep;
1996 
1997 	p = vp;
1998 	ep = p+n;
1999 	while(p<ep)
2000 		*p++ = val;
2001 }
2002 
2003 static void
memset24(void * vp,ulong val,int n)2004 memset24(void *vp, ulong val, int n)
2005 {
2006 	uchar *p, *ep;
2007 	uchar a,b,c;
2008 
2009 	p = vp;
2010 	ep = p+3*n;
2011 	a = val;
2012 	b = val>>8;
2013 	c = val>>16;
2014 	while(p<ep){
2015 		*p++ = a;
2016 		*p++ = b;
2017 		*p++ = c;
2018 	}
2019 }
2020 
2021 ulong
_imgtorgba(Memimage * img,ulong val)2022 _imgtorgba(Memimage *img, ulong val)
2023 {
2024 	uchar r, g, b, a;
2025 	int nb, ov, v;
2026 	ulong chan;
2027 	uchar *p;
2028 
2029 	a = 0xFF;
2030 	r = g = b = 0xAA;	/* garbage */
2031 	for(chan=img->chan; chan; chan>>=8){
2032 		nb = NBITS(chan);
2033 		ov = v = val&((1<<nb)-1);
2034 		val >>= nb;
2035 
2036 		while(nb < 8){
2037 			v |= v<<nb;
2038 			nb *= 2;
2039 		}
2040 		v >>= (nb-8);
2041 
2042 		switch(TYPE(chan)){
2043 		case CRed:
2044 			r = v;
2045 			break;
2046 		case CGreen:
2047 			g = v;
2048 			break;
2049 		case CBlue:
2050 			b = v;
2051 			break;
2052 		case CAlpha:
2053 			a = v;
2054 			break;
2055 		case CGrey:
2056 			r = g = b = v;
2057 			break;
2058 		case CMap:
2059 			p = img->cmap->cmap2rgb+3*ov;
2060 			r = *p++;
2061 			g = *p++;
2062 			b = *p;
2063 			break;
2064 		}
2065 	}
2066 	return (r<<24)|(g<<16)|(b<<8)|a;
2067 }
2068 
2069 ulong
_rgbatoimg(Memimage * img,ulong rgba)2070 _rgbatoimg(Memimage *img, ulong rgba)
2071 {
2072 	ulong chan;
2073 	int d, nb;
2074 	ulong v;
2075 	uchar *p, r, g, b, a, m;
2076 
2077 	v = 0;
2078 	r = rgba>>24;
2079 	g = rgba>>16;
2080 	b = rgba>>8;
2081 	a = rgba;
2082 	d = 0;
2083 	for(chan=img->chan; chan; chan>>=8){
2084 		nb = NBITS(chan);
2085 		switch(TYPE(chan)){
2086 		case CRed:
2087 			v |= (r>>(8-nb))<<d;
2088 			break;
2089 		case CGreen:
2090 			v |= (g>>(8-nb))<<d;
2091 			break;
2092 		case CBlue:
2093 			v |= (b>>(8-nb))<<d;
2094 			break;
2095 		case CAlpha:
2096 			v |= (a>>(8-nb))<<d;
2097 			break;
2098 		case CMap:
2099 			p = img->cmap->rgb2cmap;
2100 			m = p[(r>>4)*256+(g>>4)*16+(b>>4)];
2101 			v |= (m>>(8-nb))<<d;
2102 			break;
2103 		case CGrey:
2104 			m = RGB2K(r,g,b);
2105 			v |= (m>>(8-nb))<<d;
2106 			break;
2107 		}
2108 		d += nb;
2109 	}
2110 //	print("rgba2img %.8lux = %.*lux\n", rgba, 2*d/8, v);
2111 	return v;
2112 }
2113 
2114 #define DBG if(0)
2115 static int
memoptdraw(Memdrawparam * par)2116 memoptdraw(Memdrawparam *par)
2117 {
2118 	int m, y, dy, dx, op;
2119 	ulong v;
2120 	Memimage *src;
2121 	Memimage *dst;
2122 
2123 	dx = Dx(par->r);
2124 	dy = Dy(par->r);
2125 	src = par->src;
2126 	dst = par->dst;
2127 	op = par->op;
2128 
2129 DBG print("state %lux mval %lux dd %d\n", par->state, par->mval, dst->depth);
2130 	/*
2131 	 * If we have an opaque mask and source is one opaque pixel we can convert to the
2132 	 * destination format and just replicate with memset.
2133 	 */
2134 	m = Simplesrc|Simplemask|Fullmask;
2135 	if((par->state&m)==m && (par->srgba&0xFF) == 0xFF && (op ==S || op == SoverD)){
2136 		uchar *dp, p[4];
2137 		int d, dwid, ppb, np, nb;
2138 		uchar lm, rm;
2139 
2140 DBG print("memopt, dst %p, dst->data->bdata %p\n", dst, dst->data->bdata);
2141 		dwid = dst->width*sizeof(ulong);
2142 		dp = byteaddr(dst, par->r.min);
2143 		v = par->sdval;
2144 DBG print("sdval %lud, depth %d\n", v, dst->depth);
2145 		switch(dst->depth){
2146 		case 1:
2147 		case 2:
2148 		case 4:
2149 			for(d=dst->depth; d<8; d*=2)
2150 				v |= (v<<d);
2151 			ppb = 8/dst->depth;	/* pixels per byte */
2152 			m = ppb-1;
2153 			/* left edge */
2154 			np = par->r.min.x&m;		/* no. pixels unused on left side of word */
2155 			dx -= (ppb-np);
2156 			nb = 8 - np * dst->depth;		/* no. bits used on right side of word */
2157 			lm = (1<<nb)-1;
2158 DBG print("np %d x %d nb %d lm %ux ppb %d m %ux\n", np, par->r.min.x, nb, lm, ppb, m);
2159 
2160 			/* right edge */
2161 			np = par->r.max.x&m;	/* no. pixels used on left side of word */
2162 			dx -= np;
2163 			nb = 8 - np * dst->depth;		/* no. bits unused on right side of word */
2164 			rm = ~((1<<nb)-1);
2165 DBG print("np %d x %d nb %d rm %ux ppb %d m %ux\n", np, par->r.max.x, nb, rm, ppb, m);
2166 
2167 DBG print("dx %d Dx %d\n", dx, Dx(par->r));
2168 			/* lm, rm are masks that are 1 where we should touch the bits */
2169 			if(dx < 0){	/* just one byte */
2170 				lm &= rm;
2171 				for(y=0; y<dy; y++, dp+=dwid)
2172 					*dp ^= (v ^ *dp) & lm;
2173 			}else if(dx == 0){	/* no full bytes */
2174 				if(lm)
2175 					dwid--;
2176 
2177 				for(y=0; y<dy; y++, dp+=dwid){
2178 					if(lm){
2179 DBG print("dp %p v %lux lm %ux (v ^ *dp) & lm %lux\n", dp, v, lm, (v^*dp)&lm);
2180 						*dp ^= (v ^ *dp) & lm;
2181 						dp++;
2182 					}
2183 					*dp ^= (v ^ *dp) & rm;
2184 				}
2185 			}else{		/* full bytes in middle */
2186 				dx /= ppb;
2187 				if(lm)
2188 					dwid--;
2189 				dwid -= dx;
2190 
2191 				for(y=0; y<dy; y++, dp+=dwid){
2192 					if(lm){
2193 						*dp ^= (v ^ *dp) & lm;
2194 						dp++;
2195 					}
2196 					memset(dp, v, dx);
2197 					dp += dx;
2198 					*dp ^= (v ^ *dp) & rm;
2199 				}
2200 			}
2201 			return 1;
2202 		case 8:
2203 			for(y=0; y<dy; y++, dp+=dwid)
2204 				memset(dp, v, dx);
2205 			return 1;
2206 		case 16:
2207 			p[0] = v;		/* make little endian */
2208 			p[1] = v>>8;
2209 			v = *(ushort*)p;
2210 DBG print("dp=%p; dx=%d; for(y=0; y<%d; y++, dp+=%d)\nmemsets(dp, v, dx);\n",
2211 	dp, dx, dy, dwid);
2212 			for(y=0; y<dy; y++, dp+=dwid)
2213 				memsets(dp, v, dx);
2214 			return 1;
2215 		case 24:
2216 			for(y=0; y<dy; y++, dp+=dwid)
2217 				memset24(dp, v, dx);
2218 			return 1;
2219 		case 32:
2220 			p[0] = v;		/* make little endian */
2221 			p[1] = v>>8;
2222 			p[2] = v>>16;
2223 			p[3] = v>>24;
2224 			v = *(ulong*)p;
2225 			for(y=0; y<dy; y++, dp+=dwid)
2226 				memsetl(dp, v, dx);
2227 			return 1;
2228 		default:
2229 			assert(0 /* bad dest depth in memoptdraw */);
2230 		}
2231 	}
2232 
2233 	/*
2234 	 * If no source alpha, an opaque mask, we can just copy the
2235 	 * source onto the destination.  If the channels are the same and
2236 	 * the source is not replicated, memmove suffices.
2237 	 */
2238 	m = Simplemask|Fullmask;
2239 	if((par->state&(m|Replsrc))==m && src->depth >= 8
2240 	&& src->chan == dst->chan && !(src->flags&Falpha) && (op == S || op == SoverD)){
2241 		uchar *sp, *dp;
2242 		long swid, dwid, nb;
2243 		int dir;
2244 
2245 		if(src->data == dst->data && byteaddr(dst, par->r.min) > byteaddr(src, par->sr.min))
2246 			dir = -1;
2247 		else
2248 			dir = 1;
2249 
2250 		swid = src->width*sizeof(ulong);
2251 		dwid = dst->width*sizeof(ulong);
2252 		sp = byteaddr(src, par->sr.min);
2253 		dp = byteaddr(dst, par->r.min);
2254 		if(dir == -1){
2255 			sp += (dy-1)*swid;
2256 			dp += (dy-1)*dwid;
2257 			swid = -swid;
2258 			dwid = -dwid;
2259 		}
2260 		nb = (dx*src->depth)/8;
2261 		for(y=0; y<dy; y++, sp+=swid, dp+=dwid)
2262 			memmove(dp, sp, nb);
2263 		return 1;
2264 	}
2265 
2266 	/*
2267 	 * If we have a 1-bit mask, 1-bit source, and 1-bit destination, and
2268 	 * they're all bit aligned, we can just use bit operators.  This happens
2269 	 * when we're manipulating boolean masks, e.g. in the arc code.
2270 	 */
2271 	if((par->state&(Simplemask|Simplesrc|Replmask|Replsrc))==0
2272 	&& dst->chan==GREY1 && src->chan==GREY1 && par->mask->chan==GREY1
2273 	&& (par->r.min.x&7)==(par->sr.min.x&7) && (par->r.min.x&7)==(par->mr.min.x&7)){
2274 		uchar *sp, *dp, *mp;
2275 		uchar lm, rm;
2276 		long swid, dwid, mwid;
2277 		int i, x, dir;
2278 
2279 		sp = byteaddr(src, par->sr.min);
2280 		dp = byteaddr(dst, par->r.min);
2281 		mp = byteaddr(par->mask, par->mr.min);
2282 		swid = src->width*sizeof(ulong);
2283 		dwid = dst->width*sizeof(ulong);
2284 		mwid = par->mask->width*sizeof(ulong);
2285 
2286 		if(src->data == dst->data && byteaddr(dst, par->r.min) > byteaddr(src, par->sr.min)){
2287 			dir = -1;
2288 		}else
2289 			dir = 1;
2290 
2291 		lm = 0xFF>>(par->r.min.x&7);
2292 		rm = 0xFF<<(8-(par->r.max.x&7));
2293 		dx -= (8-(par->r.min.x&7)) + (par->r.max.x&7);
2294 
2295 		if(dx < 0){	/* one byte wide */
2296 			lm &= rm;
2297 			if(dir == -1){
2298 				dp += dwid*(dy-1);
2299 				sp += swid*(dy-1);
2300 				mp += mwid*(dy-1);
2301 				dwid = -dwid;
2302 				swid = -swid;
2303 				mwid = -mwid;
2304 			}
2305 			for(y=0; y<dy; y++){
2306 				*dp ^= (*dp ^ *sp) & *mp & lm;
2307 				dp += dwid;
2308 				sp += swid;
2309 				mp += mwid;
2310 			}
2311 			return 1;
2312 		}
2313 
2314 		dx /= 8;
2315 		if(dir == 1){
2316 			i = (lm!=0)+dx+(rm!=0);
2317 			mwid -= i;
2318 			swid -= i;
2319 			dwid -= i;
2320 			for(y=0; y<dy; y++, dp+=dwid, sp+=swid, mp+=mwid){
2321 				if(lm){
2322 					*dp ^= (*dp ^ *sp++) & *mp++ & lm;
2323 					dp++;
2324 				}
2325 				for(x=0; x<dx; x++){
2326 					*dp ^= (*dp ^ *sp++) & *mp++;
2327 					dp++;
2328 				}
2329 				if(rm){
2330 					*dp ^= (*dp ^ *sp++) & *mp++ & rm;
2331 					dp++;
2332 				}
2333 			}
2334 			return 1;
2335 		}else{
2336 		/* dir == -1 */
2337 			i = (lm!=0)+dx+(rm!=0);
2338 			dp += dwid*(dy-1)+i-1;
2339 			sp += swid*(dy-1)+i-1;
2340 			mp += mwid*(dy-1)+i-1;
2341 			dwid = -dwid+i;
2342 			swid = -swid+i;
2343 			mwid = -mwid+i;
2344 			for(y=0; y<dy; y++, dp+=dwid, sp+=swid, mp+=mwid){
2345 				if(rm){
2346 					*dp ^= (*dp ^ *sp--) & *mp-- & rm;
2347 					dp--;
2348 				}
2349 				for(x=0; x<dx; x++){
2350 					*dp ^= (*dp ^ *sp--) & *mp--;
2351 					dp--;
2352 				}
2353 				if(lm){
2354 					*dp ^= (*dp ^ *sp--) & *mp-- & lm;
2355 					dp--;
2356 				}
2357 			}
2358 		}
2359 		return 1;
2360 	}
2361 	return 0;
2362 }
2363 #undef DBG
2364 
2365 /*
2366  * Boolean character drawing.
2367  * Solid opaque color through a 1-bit greyscale mask.
2368  */
2369 #define DBG if(0)
2370 static int
chardraw(Memdrawparam * par)2371 chardraw(Memdrawparam *par)
2372 {
2373 	ulong bits;
2374 	int i, ddepth, dy, dx, x, bx, ex, y, npack, bsh, depth, op;
2375 	ulong v, maskwid, dstwid;
2376 	uchar *wp, *rp, *q, *wc;
2377 	ushort *ws;
2378 	ulong *wl;
2379 	uchar sp[4];
2380 	Rectangle r, mr;
2381 	Memimage *mask, *src, *dst;
2382 
2383 if(0) if(drawdebug) iprint("chardraw? mf %lux md %d sf %lux dxs %d dys %d dd %d ddat %p sdat %p\n",
2384 		par->mask->flags, par->mask->depth, par->src->flags,
2385 		Dx(par->src->r), Dy(par->src->r), par->dst->depth, par->dst->data, par->src->data);
2386 
2387 	mask = par->mask;
2388 	src = par->src;
2389 	dst = par->dst;
2390 	r = par->r;
2391 	mr = par->mr;
2392 	op = par->op;
2393 
2394 	if((par->state&(Replsrc|Simplesrc|Replmask)) != (Replsrc|Simplesrc)
2395 	|| mask->depth != 1 || src->flags&Falpha || dst->depth<8 || dst->data==src->data
2396 	|| op != SoverD)
2397 		return 0;
2398 
2399 //if(drawdebug) iprint("chardraw...");
2400 
2401 	depth = mask->depth;
2402 	maskwid = mask->width*sizeof(ulong);
2403 	rp = byteaddr(mask, mr.min);
2404 	npack = 8/depth;
2405 	bsh = (mr.min.x % npack) * depth;
2406 
2407 	wp = byteaddr(dst, r.min);
2408 	dstwid = dst->width*sizeof(ulong);
2409 DBG print("bsh %d\n", bsh);
2410 	dy = Dy(r);
2411 	dx = Dx(r);
2412 
2413 	ddepth = dst->depth;
2414 
2415 	/*
2416 	 * for loop counts from bsh to bsh+dx
2417 	 *
2418 	 * we want the bottom bits to be the amount
2419 	 * to shift the pixels down, so for n≡0 (mod 8) we want
2420 	 * bottom bits 7.  for n≡1, 6, etc.
2421 	 * the bits come from -n-1.
2422 	 */
2423 
2424 	bx = -bsh-1;
2425 	ex = -bsh-1-dx;
2426 	SET(bits);
2427 	v = par->sdval;
2428 
2429 	/* make little endian */
2430 	sp[0] = v;
2431 	sp[1] = v>>8;
2432 	sp[2] = v>>16;
2433 	sp[3] = v>>24;
2434 
2435 //print("sp %x %x %x %x\n", sp[0], sp[1], sp[2], sp[3]);
2436 	for(y=0; y<dy; y++, rp+=maskwid, wp+=dstwid){
2437 		q = rp;
2438 		if(bsh)
2439 			bits = *q++;
2440 		switch(ddepth){
2441 		case 8:
2442 //if(drawdebug) iprint("8loop...");
2443 			wc = wp;
2444 			for(x=bx; x>ex; x--, wc++){
2445 				i = x&7;
2446 				if(i == 8-1)
2447 					bits = *q++;
2448 DBG print("bits %lux sh %d...", bits, i);
2449 				if((bits>>i)&1)
2450 					*wc = v;
2451 			}
2452 			break;
2453 		case 16:
2454 			ws = (ushort*)wp;
2455 			v = *(ushort*)sp;
2456 			for(x=bx; x>ex; x--, ws++){
2457 				i = x&7;
2458 				if(i == 8-1)
2459 					bits = *q++;
2460 DBG print("bits %lux sh %d...", bits, i);
2461 				if((bits>>i)&1)
2462 					*ws = v;
2463 			}
2464 			break;
2465 		case 24:
2466 			wc = wp;
2467 			for(x=bx; x>ex; x--, wc+=3){
2468 				i = x&7;
2469 				if(i == 8-1)
2470 					bits = *q++;
2471 DBG print("bits %lux sh %d...", bits, i);
2472 				if((bits>>i)&1){
2473 					wc[0] = sp[0];
2474 					wc[1] = sp[1];
2475 					wc[2] = sp[2];
2476 				}
2477 			}
2478 			break;
2479 		case 32:
2480 			wl = (ulong*)wp;
2481 			v = *(ulong*)sp;
2482 			for(x=bx; x>ex; x--, wl++){
2483 				i = x&7;
2484 				if(i == 8-1)
2485 					bits = *q++;
2486 DBG iprint("bits %lux sh %d...", bits, i);
2487 				if((bits>>i)&1)
2488 					*wl = v;
2489 			}
2490 			break;
2491 		}
2492 	}
2493 
2494 DBG print("\n");
2495 	return 1;
2496 }
2497 #undef DBG
2498 
2499 
2500 /*
2501  * Fill entire byte with replicated (if necessary) copy of source pixel,
2502  * assuming destination ldepth is >= source ldepth.
2503  *
2504  * This code is just plain wrong for >8bpp.
2505  *
2506 ulong
2507 membyteval(Memimage *src)
2508 {
2509 	int i, val, bpp;
2510 	uchar uc;
2511 
2512 	unloadmemimage(src, src->r, &uc, 1);
2513 	bpp = src->depth;
2514 	uc <<= (src->r.min.x&(7/src->depth))*src->depth;
2515 	uc &= ~(0xFF>>bpp);
2516 	// pixel value is now in high part of byte. repeat throughout byte
2517 	val = uc;
2518 	for(i=bpp; i<8; i<<=1)
2519 		val |= val>>i;
2520 	return val;
2521 }
2522  *
2523  */
2524 
2525 void
_memfillcolor(Memimage * i,ulong val)2526 _memfillcolor(Memimage *i, ulong val)
2527 {
2528 	ulong bits;
2529 	int d, y;
2530 
2531 	if(val == DNofill)
2532 		return;
2533 
2534 	bits = _rgbatoimg(i, val);
2535 	switch(i->depth){
2536 	case 24:	/* 24-bit images suck */
2537 		for(y=i->r.min.y; y<i->r.max.y; y++)
2538 			memset24(byteaddr(i, Pt(i->r.min.x, y)), bits, Dx(i->r));
2539 		break;
2540 	default:	/* 1, 2, 4, 8, 16, 32 */
2541 		for(d=i->depth; d<32; d*=2)
2542 			bits = (bits << d) | bits;
2543 		memsetl(wordaddr(i, i->r.min), bits, i->width*Dy(i->r));
2544 		break;
2545 	}
2546 }
2547 
2548