1 /*
2 Simple DirectMedia Layer
3 Copyright (C) 1997-2021 Sam Lantinga <slouken@libsdl.org>
4
5 This software is provided 'as-is', without any express or implied
6 warranty. In no event will the authors be held liable for any damages
7 arising from the use of this software.
8
9 Permission is granted to anyone to use this software for any purpose,
10 including commercial applications, and to alter it and redistribute it
11 freely, subject to the following restrictions:
12
13 1. The origin of this software must not be misrepresented; you must not
14 claim that you wrote the original software. If you use this software
15 in a product, an acknowledgment in the product documentation would be
16 appreciated but is not required.
17 2. Altered source versions must be plainly marked as such, and must not be
18 misrepresented as being the original software.
19 3. This notice may not be removed or altered from any source distribution.
20 */
21 #include "../SDL_internal.h"
22
23 #if SDL_HAVE_RLE
24
25 /*
26 * RLE encoding for software colorkey and alpha-channel acceleration
27 *
28 * Original version by Sam Lantinga
29 *
30 * Mattias Engdegård (Yorick): Rewrite. New encoding format, encoder and
31 * decoder. Added per-surface alpha blitter. Added per-pixel alpha
32 * format, encoder and blitter.
33 *
34 * Many thanks to Xark and johns for hints, benchmarks and useful comments
35 * leading to this code.
36 *
37 * Welcome to Macro Mayhem.
38 */
39
40 /*
41 * The encoding translates the image data to a stream of segments of the form
42 *
43 * <skip> <run> <data>
44 *
45 * where <skip> is the number of transparent pixels to skip,
46 * <run> is the number of opaque pixels to blit,
47 * and <data> are the pixels themselves.
48 *
49 * This basic structure is used both for colorkeyed surfaces, used for simple
50 * binary transparency and for per-surface alpha blending, and for surfaces
51 * with per-pixel alpha. The details differ, however:
52 *
53 * Encoding of colorkeyed surfaces:
54 *
55 * Encoded pixels always have the same format as the target surface.
56 * <skip> and <run> are unsigned 8 bit integers, except for 32 bit depth
57 * where they are 16 bit. This makes the pixel data aligned at all times.
58 * Segments never wrap around from one scan line to the next.
59 *
60 * The end of the sequence is marked by a zero <skip>,<run> pair at the *
61 * beginning of a line.
62 *
63 * Encoding of surfaces with per-pixel alpha:
64 *
65 * The sequence begins with a struct RLEDestFormat describing the target
66 * pixel format, to provide reliable un-encoding.
67 *
68 * Each scan line is encoded twice: First all completely opaque pixels,
69 * encoded in the target format as described above, and then all
70 * partially transparent (translucent) pixels (where 1 <= alpha <= 254),
71 * in the following 32-bit format:
72 *
73 * For 32-bit targets, each pixel has the target RGB format but with
74 * the alpha value occupying the highest 8 bits. The <skip> and <run>
75 * counts are 16 bit.
76 *
77 * For 16-bit targets, each pixel has the target RGB format, but with
78 * the middle component (usually green) shifted 16 steps to the left,
79 * and the hole filled with the 5 most significant bits of the alpha value.
80 * i.e. if the target has the format rrrrrggggggbbbbb,
81 * the encoded pixel will be 00000gggggg00000rrrrr0aaaaabbbbb.
82 * The <skip> and <run> counts are 8 bit for the opaque lines, 16 bit
83 * for the translucent lines. Two padding bytes may be inserted
84 * before each translucent line to keep them 32-bit aligned.
85 *
86 * The end of the sequence is marked by a zero <skip>,<run> pair at the
87 * beginning of an opaque line.
88 */
89
90 #include "SDL_video.h"
91 #include "SDL_sysvideo.h"
92 #include "SDL_blit.h"
93 #include "SDL_RLEaccel_c.h"
94
95 #ifndef MIN
96 #define MIN(a, b) ((a) < (b) ? (a) : (b))
97 #endif
98
99 #define PIXEL_COPY(to, from, len, bpp) \
100 SDL_memcpy(to, from, (size_t)(len) * (bpp))
101
102 /*
103 * Various colorkey blit methods, for opaque and per-surface alpha
104 */
105
106 #define OPAQUE_BLIT(to, from, length, bpp, alpha) \
107 PIXEL_COPY(to, from, length, bpp)
108
109 /*
110 * For 32bpp pixels on the form 0x00rrggbb:
111 * If we treat the middle component separately, we can process the two
112 * remaining in parallel. This is safe to do because of the gap to the left
113 * of each component, so the bits from the multiplication don't collide.
114 * This can be used for any RGB permutation of course.
115 */
116 #define ALPHA_BLIT32_888(to, from, length, bpp, alpha) \
117 do { \
118 int i; \
119 Uint32 *src = (Uint32 *)(from); \
120 Uint32 *dst = (Uint32 *)(to); \
121 for (i = 0; i < (int)(length); i++) { \
122 Uint32 s = *src++; \
123 Uint32 d = *dst; \
124 Uint32 s1 = s & 0xff00ff; \
125 Uint32 d1 = d & 0xff00ff; \
126 d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff; \
127 s &= 0xff00; \
128 d &= 0xff00; \
129 d = (d + ((s - d) * alpha >> 8)) & 0xff00; \
130 *dst++ = d1 | d; \
131 } \
132 } while (0)
133
134 /*
135 * For 16bpp pixels we can go a step further: put the middle component
136 * in the high 16 bits of a 32 bit word, and process all three RGB
137 * components at the same time. Since the smallest gap is here just
138 * 5 bits, we have to scale alpha down to 5 bits as well.
139 */
140 #define ALPHA_BLIT16_565(to, from, length, bpp, alpha) \
141 do { \
142 int i; \
143 Uint16 *src = (Uint16 *)(from); \
144 Uint16 *dst = (Uint16 *)(to); \
145 Uint32 ALPHA = alpha >> 3; \
146 for(i = 0; i < (int)(length); i++) { \
147 Uint32 s = *src++; \
148 Uint32 d = *dst; \
149 s = (s | s << 16) & 0x07e0f81f; \
150 d = (d | d << 16) & 0x07e0f81f; \
151 d += (s - d) * ALPHA >> 5; \
152 d &= 0x07e0f81f; \
153 *dst++ = (Uint16)(d | d >> 16); \
154 } \
155 } while(0)
156
157 #define ALPHA_BLIT16_555(to, from, length, bpp, alpha) \
158 do { \
159 int i; \
160 Uint16 *src = (Uint16 *)(from); \
161 Uint16 *dst = (Uint16 *)(to); \
162 Uint32 ALPHA = alpha >> 3; \
163 for(i = 0; i < (int)(length); i++) { \
164 Uint32 s = *src++; \
165 Uint32 d = *dst; \
166 s = (s | s << 16) & 0x03e07c1f; \
167 d = (d | d << 16) & 0x03e07c1f; \
168 d += (s - d) * ALPHA >> 5; \
169 d &= 0x03e07c1f; \
170 *dst++ = (Uint16)(d | d >> 16); \
171 } \
172 } while(0)
173
174 /*
175 * The general slow catch-all function, for remaining depths and formats
176 */
177 #define ALPHA_BLIT_ANY(to, from, length, bpp, alpha) \
178 do { \
179 int i; \
180 Uint8 *src = from; \
181 Uint8 *dst = to; \
182 for (i = 0; i < (int)(length); i++) { \
183 Uint32 s, d; \
184 unsigned rs, gs, bs, rd, gd, bd; \
185 switch (bpp) { \
186 case 2: \
187 s = *(Uint16 *)src; \
188 d = *(Uint16 *)dst; \
189 break; \
190 case 3: \
191 if (SDL_BYTEORDER == SDL_BIG_ENDIAN) { \
192 s = (src[0] << 16) | (src[1] << 8) | src[2]; \
193 d = (dst[0] << 16) | (dst[1] << 8) | dst[2]; \
194 } else { \
195 s = (src[2] << 16) | (src[1] << 8) | src[0]; \
196 d = (dst[2] << 16) | (dst[1] << 8) | dst[0]; \
197 } \
198 break; \
199 case 4: \
200 s = *(Uint32 *)src; \
201 d = *(Uint32 *)dst; \
202 break; \
203 } \
204 RGB_FROM_PIXEL(s, fmt, rs, gs, bs); \
205 RGB_FROM_PIXEL(d, fmt, rd, gd, bd); \
206 rd += (rs - rd) * alpha >> 8; \
207 gd += (gs - gd) * alpha >> 8; \
208 bd += (bs - bd) * alpha >> 8; \
209 PIXEL_FROM_RGB(d, fmt, rd, gd, bd); \
210 switch (bpp) { \
211 case 2: \
212 *(Uint16 *)dst = (Uint16)d; \
213 break; \
214 case 3: \
215 if (SDL_BYTEORDER == SDL_BIG_ENDIAN) { \
216 dst[0] = (Uint8)(d >> 16); \
217 dst[1] = (Uint8)(d >> 8); \
218 dst[2] = (Uint8)(d); \
219 } else { \
220 dst[0] = (Uint8)d; \
221 dst[1] = (Uint8)(d >> 8); \
222 dst[2] = (Uint8)(d >> 16); \
223 } \
224 break; \
225 case 4: \
226 *(Uint32 *)dst = d; \
227 break; \
228 } \
229 src += bpp; \
230 dst += bpp; \
231 } \
232 } while(0)
233
234 /*
235 * Special case: 50% alpha (alpha=128)
236 * This is treated specially because it can be optimized very well, and
237 * since it is good for many cases of semi-translucency.
238 * The theory is to do all three components at the same time:
239 * First zero the lowest bit of each component, which gives us room to
240 * add them. Then shift right and add the sum of the lowest bits.
241 */
242 #define ALPHA_BLIT32_888_50(to, from, length, bpp, alpha) \
243 do { \
244 int i; \
245 Uint32 *src = (Uint32 *)(from); \
246 Uint32 *dst = (Uint32 *)(to); \
247 for(i = 0; i < (int)(length); i++) { \
248 Uint32 s = *src++; \
249 Uint32 d = *dst; \
250 *dst++ = (((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) \
251 + (s & d & 0x00010101); \
252 } \
253 } while(0)
254
255 /*
256 * For 16bpp, we can actually blend two pixels in parallel, if we take
257 * care to shift before we add, not after.
258 */
259
260 /* helper: blend a single 16 bit pixel at 50% */
261 #define BLEND16_50(dst, src, mask) \
262 do { \
263 Uint32 s = *src++; \
264 Uint32 d = *dst; \
265 *dst++ = (Uint16)((((s & mask) + (d & mask)) >> 1) + \
266 (s & d & (~mask & 0xffff))); \
267 } while(0)
268
269 /* basic 16bpp blender. mask is the pixels to keep when adding. */
270 #define ALPHA_BLIT16_50(to, from, length, bpp, alpha, mask) \
271 do { \
272 unsigned n = (length); \
273 Uint16 *src = (Uint16 *)(from); \
274 Uint16 *dst = (Uint16 *)(to); \
275 if (((uintptr_t)src ^ (uintptr_t)dst) & 3) { \
276 /* source and destination not in phase, blit one by one */ \
277 while (n--) \
278 BLEND16_50(dst, src, mask); \
279 } else { \
280 if ((uintptr_t)src & 3) { \
281 /* first odd pixel */ \
282 BLEND16_50(dst, src, mask); \
283 n--; \
284 } \
285 for (; n > 1; n -= 2) { \
286 Uint32 s = *(Uint32 *)src; \
287 Uint32 d = *(Uint32 *)dst; \
288 *(Uint32 *)dst = ((s & (mask | mask << 16)) >> 1) \
289 + ((d & (mask | mask << 16)) >> 1) \
290 + (s & d & (~(mask | mask << 16))); \
291 src += 2; \
292 dst += 2; \
293 } \
294 if (n) \
295 BLEND16_50(dst, src, mask); /* last odd pixel */ \
296 } \
297 } while(0)
298
299 #define ALPHA_BLIT16_565_50(to, from, length, bpp, alpha) \
300 ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xf7deU)
301
302 #define ALPHA_BLIT16_555_50(to, from, length, bpp, alpha) \
303 ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xfbdeU)
304
305 #define CHOOSE_BLIT(blitter, alpha, fmt) \
306 do { \
307 if (alpha == 255) { \
308 switch (fmt->BytesPerPixel) { \
309 case 1: blitter(1, Uint8, OPAQUE_BLIT); break; \
310 case 2: blitter(2, Uint8, OPAQUE_BLIT); break; \
311 case 3: blitter(3, Uint8, OPAQUE_BLIT); break; \
312 case 4: blitter(4, Uint16, OPAQUE_BLIT); break; \
313 } \
314 } else { \
315 switch (fmt->BytesPerPixel) { \
316 case 1: \
317 /* No 8bpp alpha blitting */ \
318 break; \
319 \
320 case 2: \
321 switch (fmt->Rmask | fmt->Gmask | fmt->Bmask) { \
322 case 0xffff: \
323 if (fmt->Gmask == 0x07e0 \
324 || fmt->Rmask == 0x07e0 \
325 || fmt->Bmask == 0x07e0) { \
326 if (alpha == 128) { \
327 blitter(2, Uint8, ALPHA_BLIT16_565_50); \
328 } else { \
329 blitter(2, Uint8, ALPHA_BLIT16_565); \
330 } \
331 } else \
332 goto general16; \
333 break; \
334 \
335 case 0x7fff: \
336 if (fmt->Gmask == 0x03e0 \
337 || fmt->Rmask == 0x03e0 \
338 || fmt->Bmask == 0x03e0) { \
339 if (alpha == 128) { \
340 blitter(2, Uint8, ALPHA_BLIT16_555_50); \
341 } else { \
342 blitter(2, Uint8, ALPHA_BLIT16_555); \
343 } \
344 break; \
345 } else \
346 goto general16; \
347 break; \
348 \
349 default: \
350 general16: \
351 blitter(2, Uint8, ALPHA_BLIT_ANY); \
352 } \
353 break; \
354 \
355 case 3: \
356 blitter(3, Uint8, ALPHA_BLIT_ANY); \
357 break; \
358 \
359 case 4: \
360 if ((fmt->Rmask | fmt->Gmask | fmt->Bmask) == 0x00ffffff \
361 && (fmt->Gmask == 0xff00 || fmt->Rmask == 0xff00 \
362 || fmt->Bmask == 0xff00)) { \
363 if (alpha == 128) { \
364 blitter(4, Uint16, ALPHA_BLIT32_888_50); \
365 } else { \
366 blitter(4, Uint16, ALPHA_BLIT32_888); \
367 } \
368 } else \
369 blitter(4, Uint16, ALPHA_BLIT_ANY); \
370 break; \
371 } \
372 } \
373 } while(0)
374
375 /*
376 * Set a pixel value using the given format, except that the alpha value is
377 * placed in the top byte. This is the format used for RLE with alpha.
378 */
379 #define RLEPIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a) \
380 { \
381 Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)| \
382 ((g>>fmt->Gloss)<<fmt->Gshift)| \
383 ((b>>fmt->Bloss)<<fmt->Bshift)| \
384 (a<<24); \
385 }
386
387 /*
388 * This takes care of the case when the surface is clipped on the left and/or
389 * right. Top clipping has already been taken care of.
390 */
391 static void
RLEClipBlit(int w,Uint8 * srcbuf,SDL_Surface * surf_dst,Uint8 * dstbuf,SDL_Rect * srcrect,unsigned alpha)392 RLEClipBlit(int w, Uint8 * srcbuf, SDL_Surface * surf_dst,
393 Uint8 * dstbuf, SDL_Rect * srcrect, unsigned alpha)
394 {
395 SDL_PixelFormat *fmt = surf_dst->format;
396
397 #define RLECLIPBLIT(bpp, Type, do_blit) \
398 do { \
399 int linecount = srcrect->h; \
400 int ofs = 0; \
401 int left = srcrect->x; \
402 int right = left + srcrect->w; \
403 dstbuf -= left * bpp; \
404 for (;;) { \
405 int run; \
406 ofs += *(Type *)srcbuf; \
407 run = ((Type *)srcbuf)[1]; \
408 srcbuf += 2 * sizeof(Type); \
409 if (run) { \
410 /* clip to left and right borders */ \
411 if (ofs < right) { \
412 int start = 0; \
413 int len = run; \
414 int startcol; \
415 if (left - ofs > 0) { \
416 start = left - ofs; \
417 len -= start; \
418 if (len <= 0) \
419 goto nocopy ## bpp ## do_blit; \
420 } \
421 startcol = ofs + start; \
422 if (len > right - startcol) \
423 len = right - startcol; \
424 do_blit(dstbuf + startcol * bpp, srcbuf + start * bpp, \
425 len, bpp, alpha); \
426 } \
427 nocopy ## bpp ## do_blit: \
428 srcbuf += run * bpp; \
429 ofs += run; \
430 } else if (!ofs) \
431 break; \
432 \
433 if (ofs == w) { \
434 ofs = 0; \
435 dstbuf += surf_dst->pitch; \
436 if (!--linecount) \
437 break; \
438 } \
439 } \
440 } while(0)
441
442 CHOOSE_BLIT(RLECLIPBLIT, alpha, fmt);
443
444 #undef RLECLIPBLIT
445
446 }
447
448
449 /* blit a colorkeyed RLE surface */
450 static int SDLCALL
SDL_RLEBlit(SDL_Surface * surf_src,SDL_Rect * srcrect,SDL_Surface * surf_dst,SDL_Rect * dstrect)451 SDL_RLEBlit(SDL_Surface * surf_src, SDL_Rect * srcrect,
452 SDL_Surface * surf_dst, SDL_Rect * dstrect)
453 {
454 Uint8 *dstbuf;
455 Uint8 *srcbuf;
456 int x, y;
457 int w = surf_src->w;
458 unsigned alpha;
459
460 /* Lock the destination if necessary */
461 if (SDL_MUSTLOCK(surf_dst)) {
462 if (SDL_LockSurface(surf_dst) < 0) {
463 return (-1);
464 }
465 }
466
467 /* Set up the source and destination pointers */
468 x = dstrect->x;
469 y = dstrect->y;
470 dstbuf = (Uint8 *) surf_dst->pixels
471 + y * surf_dst->pitch + x * surf_src->format->BytesPerPixel;
472 srcbuf = (Uint8 *) surf_src->map->data;
473
474 {
475 /* skip lines at the top if necessary */
476 int vskip = srcrect->y;
477 int ofs = 0;
478 if (vskip) {
479
480 #define RLESKIP(bpp, Type) \
481 for(;;) { \
482 int run; \
483 ofs += *(Type *)srcbuf; \
484 run = ((Type *)srcbuf)[1]; \
485 srcbuf += sizeof(Type) * 2; \
486 if(run) { \
487 srcbuf += run * bpp; \
488 ofs += run; \
489 } else if(!ofs) \
490 goto done; \
491 if(ofs == w) { \
492 ofs = 0; \
493 if(!--vskip) \
494 break; \
495 } \
496 }
497
498 switch (surf_src->format->BytesPerPixel) {
499 case 1:
500 RLESKIP(1, Uint8);
501 break;
502 case 2:
503 RLESKIP(2, Uint8);
504 break;
505 case 3:
506 RLESKIP(3, Uint8);
507 break;
508 case 4:
509 RLESKIP(4, Uint16);
510 break;
511 }
512
513 #undef RLESKIP
514
515 }
516 }
517
518 alpha = surf_src->map->info.a;
519 /* if left or right edge clipping needed, call clip blit */
520 if (srcrect->x || srcrect->w != surf_src->w) {
521 RLEClipBlit(w, srcbuf, surf_dst, dstbuf, srcrect, alpha);
522 } else {
523 SDL_PixelFormat *fmt = surf_src->format;
524
525 #define RLEBLIT(bpp, Type, do_blit) \
526 do { \
527 int linecount = srcrect->h; \
528 int ofs = 0; \
529 for(;;) { \
530 unsigned run; \
531 ofs += *(Type *)srcbuf; \
532 run = ((Type *)srcbuf)[1]; \
533 srcbuf += 2 * sizeof(Type); \
534 if(run) { \
535 do_blit(dstbuf + ofs * bpp, srcbuf, run, bpp, alpha); \
536 srcbuf += run * bpp; \
537 ofs += run; \
538 } else if(!ofs) \
539 break; \
540 if(ofs == w) { \
541 ofs = 0; \
542 dstbuf += surf_dst->pitch; \
543 if(!--linecount) \
544 break; \
545 } \
546 } \
547 } while(0)
548
549 CHOOSE_BLIT(RLEBLIT, alpha, fmt);
550
551 #undef RLEBLIT
552 }
553
554 done:
555 /* Unlock the destination if necessary */
556 if (SDL_MUSTLOCK(surf_dst)) {
557 SDL_UnlockSurface(surf_dst);
558 }
559 return (0);
560 }
561
562 #undef OPAQUE_BLIT
563
564 /*
565 * Per-pixel blitting macros for translucent pixels:
566 * These use the same techniques as the per-surface blitting macros
567 */
568
569 /*
570 * For 32bpp pixels, we have made sure the alpha is stored in the top
571 * 8 bits, so proceed as usual
572 */
573 #define BLIT_TRANSL_888(src, dst) \
574 do { \
575 Uint32 s = src; \
576 Uint32 d = dst; \
577 unsigned alpha = s >> 24; \
578 Uint32 s1 = s & 0xff00ff; \
579 Uint32 d1 = d & 0xff00ff; \
580 d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff; \
581 s &= 0xff00; \
582 d &= 0xff00; \
583 d = (d + ((s - d) * alpha >> 8)) & 0xff00; \
584 dst = d1 | d | 0xff000000; \
585 } while(0)
586
587 /*
588 * For 16bpp pixels, we have stored the 5 most significant alpha bits in
589 * bits 5-10. As before, we can process all 3 RGB components at the same time.
590 */
591 #define BLIT_TRANSL_565(src, dst) \
592 do { \
593 Uint32 s = src; \
594 Uint32 d = dst; \
595 unsigned alpha = (s & 0x3e0) >> 5; \
596 s &= 0x07e0f81f; \
597 d = (d | d << 16) & 0x07e0f81f; \
598 d += (s - d) * alpha >> 5; \
599 d &= 0x07e0f81f; \
600 dst = (Uint16)(d | d >> 16); \
601 } while(0)
602
603 #define BLIT_TRANSL_555(src, dst) \
604 do { \
605 Uint32 s = src; \
606 Uint32 d = dst; \
607 unsigned alpha = (s & 0x3e0) >> 5; \
608 s &= 0x03e07c1f; \
609 d = (d | d << 16) & 0x03e07c1f; \
610 d += (s - d) * alpha >> 5; \
611 d &= 0x03e07c1f; \
612 dst = (Uint16)(d | d >> 16); \
613 } while(0)
614
615 /* used to save the destination format in the encoding. Designed to be
616 macro-compatible with SDL_PixelFormat but without the unneeded fields */
617 typedef struct
618 {
619 Uint8 BytesPerPixel;
620 Uint8 padding[3];
621 Uint32 Rmask;
622 Uint32 Gmask;
623 Uint32 Bmask;
624 Uint32 Amask;
625 Uint8 Rloss;
626 Uint8 Gloss;
627 Uint8 Bloss;
628 Uint8 Aloss;
629 Uint8 Rshift;
630 Uint8 Gshift;
631 Uint8 Bshift;
632 Uint8 Ashift;
633 } RLEDestFormat;
634
635 /* blit a pixel-alpha RLE surface clipped at the right and/or left edges */
636 static void
RLEAlphaClipBlit(int w,Uint8 * srcbuf,SDL_Surface * surf_dst,Uint8 * dstbuf,SDL_Rect * srcrect)637 RLEAlphaClipBlit(int w, Uint8 * srcbuf, SDL_Surface * surf_dst,
638 Uint8 * dstbuf, SDL_Rect * srcrect)
639 {
640 SDL_PixelFormat *df = surf_dst->format;
641 /*
642 * clipped blitter: Ptype is the destination pixel type,
643 * Ctype the translucent count type, and do_blend the macro
644 * to blend one pixel.
645 */
646 #define RLEALPHACLIPBLIT(Ptype, Ctype, do_blend) \
647 do { \
648 int linecount = srcrect->h; \
649 int left = srcrect->x; \
650 int right = left + srcrect->w; \
651 dstbuf -= left * sizeof(Ptype); \
652 do { \
653 int ofs = 0; \
654 /* blit opaque pixels on one line */ \
655 do { \
656 unsigned run; \
657 ofs += ((Ctype *)srcbuf)[0]; \
658 run = ((Ctype *)srcbuf)[1]; \
659 srcbuf += 2 * sizeof(Ctype); \
660 if(run) { \
661 /* clip to left and right borders */ \
662 int cofs = ofs; \
663 int crun = run; \
664 if(left - cofs > 0) { \
665 crun -= left - cofs; \
666 cofs = left; \
667 } \
668 if(crun > right - cofs) \
669 crun = right - cofs; \
670 if(crun > 0) \
671 PIXEL_COPY(dstbuf + cofs * sizeof(Ptype), \
672 srcbuf + (cofs - ofs) * sizeof(Ptype), \
673 (unsigned)crun, sizeof(Ptype)); \
674 srcbuf += run * sizeof(Ptype); \
675 ofs += run; \
676 } else if(!ofs) \
677 return; \
678 } while(ofs < w); \
679 /* skip padding if necessary */ \
680 if(sizeof(Ptype) == 2) \
681 srcbuf += (uintptr_t)srcbuf & 2; \
682 /* blit translucent pixels on the same line */ \
683 ofs = 0; \
684 do { \
685 unsigned run; \
686 ofs += ((Uint16 *)srcbuf)[0]; \
687 run = ((Uint16 *)srcbuf)[1]; \
688 srcbuf += 4; \
689 if(run) { \
690 /* clip to left and right borders */ \
691 int cofs = ofs; \
692 int crun = run; \
693 if(left - cofs > 0) { \
694 crun -= left - cofs; \
695 cofs = left; \
696 } \
697 if(crun > right - cofs) \
698 crun = right - cofs; \
699 if(crun > 0) { \
700 Ptype *dst = (Ptype *)dstbuf + cofs; \
701 Uint32 *src = (Uint32 *)srcbuf + (cofs - ofs); \
702 int i; \
703 for(i = 0; i < crun; i++) \
704 do_blend(src[i], dst[i]); \
705 } \
706 srcbuf += run * 4; \
707 ofs += run; \
708 } \
709 } while(ofs < w); \
710 dstbuf += surf_dst->pitch; \
711 } while(--linecount); \
712 } while(0)
713
714 switch (df->BytesPerPixel) {
715 case 2:
716 if (df->Gmask == 0x07e0 || df->Rmask == 0x07e0 || df->Bmask == 0x07e0)
717 RLEALPHACLIPBLIT(Uint16, Uint8, BLIT_TRANSL_565);
718 else
719 RLEALPHACLIPBLIT(Uint16, Uint8, BLIT_TRANSL_555);
720 break;
721 case 4:
722 RLEALPHACLIPBLIT(Uint32, Uint16, BLIT_TRANSL_888);
723 break;
724 }
725 }
726
727 /* blit a pixel-alpha RLE surface */
728 static int SDLCALL
SDL_RLEAlphaBlit(SDL_Surface * surf_src,SDL_Rect * srcrect,SDL_Surface * surf_dst,SDL_Rect * dstrect)729 SDL_RLEAlphaBlit(SDL_Surface * surf_src, SDL_Rect * srcrect,
730 SDL_Surface * surf_dst, SDL_Rect * dstrect)
731 {
732 int x, y;
733 int w = surf_src->w;
734 Uint8 *srcbuf, *dstbuf;
735 SDL_PixelFormat *df = surf_dst->format;
736
737 /* Lock the destination if necessary */
738 if (SDL_MUSTLOCK(surf_dst)) {
739 if (SDL_LockSurface(surf_dst) < 0) {
740 return -1;
741 }
742 }
743
744 x = dstrect->x;
745 y = dstrect->y;
746 dstbuf = (Uint8 *) surf_dst->pixels + y * surf_dst->pitch + x * df->BytesPerPixel;
747 srcbuf = (Uint8 *) surf_src->map->data + sizeof(RLEDestFormat);
748
749 {
750 /* skip lines at the top if necessary */
751 int vskip = srcrect->y;
752 if (vskip) {
753 int ofs;
754 if (df->BytesPerPixel == 2) {
755 /* the 16/32 interleaved format */
756 do {
757 /* skip opaque line */
758 ofs = 0;
759 do {
760 int run;
761 ofs += srcbuf[0];
762 run = srcbuf[1];
763 srcbuf += 2;
764 if (run) {
765 srcbuf += 2 * run;
766 ofs += run;
767 } else if (!ofs)
768 goto done;
769 } while (ofs < w);
770
771 /* skip padding */
772 srcbuf += (uintptr_t) srcbuf & 2;
773
774 /* skip translucent line */
775 ofs = 0;
776 do {
777 int run;
778 ofs += ((Uint16 *) srcbuf)[0];
779 run = ((Uint16 *) srcbuf)[1];
780 srcbuf += 4 * (run + 1);
781 ofs += run;
782 } while (ofs < w);
783 } while (--vskip);
784 } else {
785 /* the 32/32 interleaved format */
786 vskip <<= 1; /* opaque and translucent have same format */
787 do {
788 ofs = 0;
789 do {
790 int run;
791 ofs += ((Uint16 *) srcbuf)[0];
792 run = ((Uint16 *) srcbuf)[1];
793 srcbuf += 4;
794 if (run) {
795 srcbuf += 4 * run;
796 ofs += run;
797 } else if (!ofs)
798 goto done;
799 } while (ofs < w);
800 } while (--vskip);
801 }
802 }
803 }
804
805 /* if left or right edge clipping needed, call clip blit */
806 if (srcrect->x || srcrect->w != surf_src->w) {
807 RLEAlphaClipBlit(w, srcbuf, surf_dst, dstbuf, srcrect);
808 } else {
809
810 /*
811 * non-clipped blitter. Ptype is the destination pixel type,
812 * Ctype the translucent count type, and do_blend the
813 * macro to blend one pixel.
814 */
815 #define RLEALPHABLIT(Ptype, Ctype, do_blend) \
816 do { \
817 int linecount = srcrect->h; \
818 do { \
819 int ofs = 0; \
820 /* blit opaque pixels on one line */ \
821 do { \
822 unsigned run; \
823 ofs += ((Ctype *)srcbuf)[0]; \
824 run = ((Ctype *)srcbuf)[1]; \
825 srcbuf += 2 * sizeof(Ctype); \
826 if(run) { \
827 PIXEL_COPY(dstbuf + ofs * sizeof(Ptype), srcbuf, \
828 run, sizeof(Ptype)); \
829 srcbuf += run * sizeof(Ptype); \
830 ofs += run; \
831 } else if(!ofs) \
832 goto done; \
833 } while(ofs < w); \
834 /* skip padding if necessary */ \
835 if(sizeof(Ptype) == 2) \
836 srcbuf += (uintptr_t)srcbuf & 2; \
837 /* blit translucent pixels on the same line */ \
838 ofs = 0; \
839 do { \
840 unsigned run; \
841 ofs += ((Uint16 *)srcbuf)[0]; \
842 run = ((Uint16 *)srcbuf)[1]; \
843 srcbuf += 4; \
844 if(run) { \
845 Ptype *dst = (Ptype *)dstbuf + ofs; \
846 unsigned i; \
847 for(i = 0; i < run; i++) { \
848 Uint32 src = *(Uint32 *)srcbuf; \
849 do_blend(src, *dst); \
850 srcbuf += 4; \
851 dst++; \
852 } \
853 ofs += run; \
854 } \
855 } while(ofs < w); \
856 dstbuf += surf_dst->pitch; \
857 } while(--linecount); \
858 } while(0)
859
860 switch (df->BytesPerPixel) {
861 case 2:
862 if (df->Gmask == 0x07e0 || df->Rmask == 0x07e0
863 || df->Bmask == 0x07e0)
864 RLEALPHABLIT(Uint16, Uint8, BLIT_TRANSL_565);
865 else
866 RLEALPHABLIT(Uint16, Uint8, BLIT_TRANSL_555);
867 break;
868 case 4:
869 RLEALPHABLIT(Uint32, Uint16, BLIT_TRANSL_888);
870 break;
871 }
872 }
873
874 done:
875 /* Unlock the destination if necessary */
876 if (SDL_MUSTLOCK(surf_dst)) {
877 SDL_UnlockSurface(surf_dst);
878 }
879 return 0;
880 }
881
882 /*
883 * Auxiliary functions:
884 * The encoding functions take 32bpp rgb + a, and
885 * return the number of bytes copied to the destination.
886 * The decoding functions copy to 32bpp rgb + a, and
887 * return the number of bytes copied from the source.
888 * These are only used in the encoder and un-RLE code and are therefore not
889 * highly optimised.
890 */
891
892 /* encode 32bpp rgb + a into 16bpp rgb, losing alpha */
893 static int
copy_opaque_16(void * dst,Uint32 * src,int n,SDL_PixelFormat * sfmt,SDL_PixelFormat * dfmt)894 copy_opaque_16(void *dst, Uint32 * src, int n,
895 SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
896 {
897 int i;
898 Uint16 *d = dst;
899 for (i = 0; i < n; i++) {
900 unsigned r, g, b;
901 RGB_FROM_PIXEL(*src, sfmt, r, g, b);
902 PIXEL_FROM_RGB(*d, dfmt, r, g, b);
903 src++;
904 d++;
905 }
906 return n * 2;
907 }
908
909 /* decode opaque pixels from 16bpp to 32bpp rgb + a */
910 static int
uncopy_opaque_16(Uint32 * dst,void * src,int n,RLEDestFormat * sfmt,SDL_PixelFormat * dfmt)911 uncopy_opaque_16(Uint32 * dst, void *src, int n,
912 RLEDestFormat * sfmt, SDL_PixelFormat * dfmt)
913 {
914 int i;
915 Uint16 *s = src;
916 unsigned alpha = dfmt->Amask ? 255 : 0;
917 for (i = 0; i < n; i++) {
918 unsigned r, g, b;
919 RGB_FROM_PIXEL(*s, sfmt, r, g, b);
920 PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, alpha);
921 s++;
922 dst++;
923 }
924 return n * 2;
925 }
926
927
928
929 /* encode 32bpp rgb + a into 32bpp G0RAB format for blitting into 565 */
930 static int
copy_transl_565(void * dst,Uint32 * src,int n,SDL_PixelFormat * sfmt,SDL_PixelFormat * dfmt)931 copy_transl_565(void *dst, Uint32 * src, int n,
932 SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
933 {
934 int i;
935 Uint32 *d = dst;
936 for (i = 0; i < n; i++) {
937 unsigned r, g, b, a;
938 Uint16 pix;
939 RGBA_FROM_8888(*src, sfmt, r, g, b, a);
940 PIXEL_FROM_RGB(pix, dfmt, r, g, b);
941 *d = ((pix & 0x7e0) << 16) | (pix & 0xf81f) | ((a << 2) & 0x7e0);
942 src++;
943 d++;
944 }
945 return n * 4;
946 }
947
948 /* encode 32bpp rgb + a into 32bpp G0RAB format for blitting into 555 */
949 static int
copy_transl_555(void * dst,Uint32 * src,int n,SDL_PixelFormat * sfmt,SDL_PixelFormat * dfmt)950 copy_transl_555(void *dst, Uint32 * src, int n,
951 SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
952 {
953 int i;
954 Uint32 *d = dst;
955 for (i = 0; i < n; i++) {
956 unsigned r, g, b, a;
957 Uint16 pix;
958 RGBA_FROM_8888(*src, sfmt, r, g, b, a);
959 PIXEL_FROM_RGB(pix, dfmt, r, g, b);
960 *d = ((pix & 0x3e0) << 16) | (pix & 0xfc1f) | ((a << 2) & 0x3e0);
961 src++;
962 d++;
963 }
964 return n * 4;
965 }
966
967 /* decode translucent pixels from 32bpp GORAB to 32bpp rgb + a */
968 static int
uncopy_transl_16(Uint32 * dst,void * src,int n,RLEDestFormat * sfmt,SDL_PixelFormat * dfmt)969 uncopy_transl_16(Uint32 * dst, void *src, int n,
970 RLEDestFormat * sfmt, SDL_PixelFormat * dfmt)
971 {
972 int i;
973 Uint32 *s = src;
974 for (i = 0; i < n; i++) {
975 unsigned r, g, b, a;
976 Uint32 pix = *s++;
977 a = (pix & 0x3e0) >> 2;
978 pix = (pix & ~0x3e0) | pix >> 16;
979 RGB_FROM_PIXEL(pix, sfmt, r, g, b);
980 PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, a);
981 dst++;
982 }
983 return n * 4;
984 }
985
986 /* encode 32bpp rgba into 32bpp rgba, keeping alpha (dual purpose) */
987 static int
copy_32(void * dst,Uint32 * src,int n,SDL_PixelFormat * sfmt,SDL_PixelFormat * dfmt)988 copy_32(void *dst, Uint32 * src, int n,
989 SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
990 {
991 int i;
992 Uint32 *d = dst;
993 for (i = 0; i < n; i++) {
994 unsigned r, g, b, a;
995 RGBA_FROM_8888(*src, sfmt, r, g, b, a);
996 RLEPIXEL_FROM_RGBA(*d, dfmt, r, g, b, a);
997 d++;
998 src++;
999 }
1000 return n * 4;
1001 }
1002
1003 /* decode 32bpp rgba into 32bpp rgba, keeping alpha (dual purpose) */
1004 static int
uncopy_32(Uint32 * dst,void * src,int n,RLEDestFormat * sfmt,SDL_PixelFormat * dfmt)1005 uncopy_32(Uint32 * dst, void *src, int n,
1006 RLEDestFormat * sfmt, SDL_PixelFormat * dfmt)
1007 {
1008 int i;
1009 Uint32 *s = src;
1010 for (i = 0; i < n; i++) {
1011 unsigned r, g, b, a;
1012 Uint32 pixel = *s++;
1013 RGB_FROM_PIXEL(pixel, sfmt, r, g, b);
1014 a = pixel >> 24;
1015 PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, a);
1016 dst++;
1017 }
1018 return n * 4;
1019 }
1020
1021 #define ISOPAQUE(pixel, fmt) ((((pixel) & fmt->Amask) >> fmt->Ashift) == 255)
1022
1023 #define ISTRANSL(pixel, fmt) \
1024 ((unsigned)((((pixel) & fmt->Amask) >> fmt->Ashift) - 1U) < 254U)
1025
1026 /* convert surface to be quickly alpha-blittable onto dest, if possible */
1027 static int
RLEAlphaSurface(SDL_Surface * surface)1028 RLEAlphaSurface(SDL_Surface * surface)
1029 {
1030 SDL_Surface *dest;
1031 SDL_PixelFormat *df;
1032 int maxsize = 0;
1033 int max_opaque_run;
1034 int max_transl_run = 65535;
1035 unsigned masksum;
1036 Uint8 *rlebuf, *dst;
1037 int (*copy_opaque) (void *, Uint32 *, int,
1038 SDL_PixelFormat *, SDL_PixelFormat *);
1039 int (*copy_transl) (void *, Uint32 *, int,
1040 SDL_PixelFormat *, SDL_PixelFormat *);
1041
1042 dest = surface->map->dst;
1043 if (!dest)
1044 return -1;
1045 df = dest->format;
1046 if (surface->format->BitsPerPixel != 32)
1047 return -1; /* only 32bpp source supported */
1048
1049 /* find out whether the destination is one we support,
1050 and determine the max size of the encoded result */
1051 masksum = df->Rmask | df->Gmask | df->Bmask;
1052 switch (df->BytesPerPixel) {
1053 case 2:
1054 /* 16bpp: only support 565 and 555 formats */
1055 switch (masksum) {
1056 case 0xffff:
1057 if (df->Gmask == 0x07e0
1058 || df->Rmask == 0x07e0 || df->Bmask == 0x07e0) {
1059 copy_opaque = copy_opaque_16;
1060 copy_transl = copy_transl_565;
1061 } else
1062 return -1;
1063 break;
1064 case 0x7fff:
1065 if (df->Gmask == 0x03e0
1066 || df->Rmask == 0x03e0 || df->Bmask == 0x03e0) {
1067 copy_opaque = copy_opaque_16;
1068 copy_transl = copy_transl_555;
1069 } else
1070 return -1;
1071 break;
1072 default:
1073 return -1;
1074 }
1075 max_opaque_run = 255; /* runs stored as bytes */
1076
1077 /* worst case is alternating opaque and translucent pixels,
1078 with room for alignment padding between lines */
1079 maxsize = surface->h * (2 + (4 + 2) * (surface->w + 1)) + 2;
1080 break;
1081 case 4:
1082 if (masksum != 0x00ffffff)
1083 return -1; /* requires unused high byte */
1084 copy_opaque = copy_32;
1085 copy_transl = copy_32;
1086 max_opaque_run = 255; /* runs stored as short ints */
1087
1088 /* worst case is alternating opaque and translucent pixels */
1089 maxsize = surface->h * 2 * 4 * (surface->w + 1) + 4;
1090 break;
1091 default:
1092 return -1; /* anything else unsupported right now */
1093 }
1094
1095 maxsize += sizeof(RLEDestFormat);
1096 rlebuf = (Uint8 *) SDL_malloc(maxsize);
1097 if (!rlebuf) {
1098 return SDL_OutOfMemory();
1099 }
1100 {
1101 /* save the destination format so we can undo the encoding later */
1102 RLEDestFormat *r = (RLEDestFormat *) rlebuf;
1103 r->BytesPerPixel = df->BytesPerPixel;
1104 r->Rmask = df->Rmask;
1105 r->Gmask = df->Gmask;
1106 r->Bmask = df->Bmask;
1107 r->Amask = df->Amask;
1108 r->Rloss = df->Rloss;
1109 r->Gloss = df->Gloss;
1110 r->Bloss = df->Bloss;
1111 r->Aloss = df->Aloss;
1112 r->Rshift = df->Rshift;
1113 r->Gshift = df->Gshift;
1114 r->Bshift = df->Bshift;
1115 r->Ashift = df->Ashift;
1116 }
1117 dst = rlebuf + sizeof(RLEDestFormat);
1118
1119 /* Do the actual encoding */
1120 {
1121 int x, y;
1122 int h = surface->h, w = surface->w;
1123 SDL_PixelFormat *sf = surface->format;
1124 Uint32 *src = (Uint32 *) surface->pixels;
1125 Uint8 *lastline = dst; /* end of last non-blank line */
1126
1127 /* opaque counts are 8 or 16 bits, depending on target depth */
1128 #define ADD_OPAQUE_COUNTS(n, m) \
1129 if(df->BytesPerPixel == 4) { \
1130 ((Uint16 *)dst)[0] = n; \
1131 ((Uint16 *)dst)[1] = m; \
1132 dst += 4; \
1133 } else { \
1134 dst[0] = n; \
1135 dst[1] = m; \
1136 dst += 2; \
1137 }
1138
1139 /* translucent counts are always 16 bit */
1140 #define ADD_TRANSL_COUNTS(n, m) \
1141 (((Uint16 *)dst)[0] = n, ((Uint16 *)dst)[1] = m, dst += 4)
1142
1143 for (y = 0; y < h; y++) {
1144 int runstart, skipstart;
1145 int blankline = 0;
1146 /* First encode all opaque pixels of a scan line */
1147 x = 0;
1148 do {
1149 int run, skip, len;
1150 skipstart = x;
1151 while (x < w && !ISOPAQUE(src[x], sf))
1152 x++;
1153 runstart = x;
1154 while (x < w && ISOPAQUE(src[x], sf))
1155 x++;
1156 skip = runstart - skipstart;
1157 if (skip == w)
1158 blankline = 1;
1159 run = x - runstart;
1160 while (skip > max_opaque_run) {
1161 ADD_OPAQUE_COUNTS(max_opaque_run, 0);
1162 skip -= max_opaque_run;
1163 }
1164 len = MIN(run, max_opaque_run);
1165 ADD_OPAQUE_COUNTS(skip, len);
1166 dst += copy_opaque(dst, src + runstart, len, sf, df);
1167 runstart += len;
1168 run -= len;
1169 while (run) {
1170 len = MIN(run, max_opaque_run);
1171 ADD_OPAQUE_COUNTS(0, len);
1172 dst += copy_opaque(dst, src + runstart, len, sf, df);
1173 runstart += len;
1174 run -= len;
1175 }
1176 } while (x < w);
1177
1178 /* Make sure the next output address is 32-bit aligned */
1179 dst += (uintptr_t) dst & 2;
1180
1181 /* Next, encode all translucent pixels of the same scan line */
1182 x = 0;
1183 do {
1184 int run, skip, len;
1185 skipstart = x;
1186 while (x < w && !ISTRANSL(src[x], sf))
1187 x++;
1188 runstart = x;
1189 while (x < w && ISTRANSL(src[x], sf))
1190 x++;
1191 skip = runstart - skipstart;
1192 blankline &= (skip == w);
1193 run = x - runstart;
1194 while (skip > max_transl_run) {
1195 ADD_TRANSL_COUNTS(max_transl_run, 0);
1196 skip -= max_transl_run;
1197 }
1198 len = MIN(run, max_transl_run);
1199 ADD_TRANSL_COUNTS(skip, len);
1200 dst += copy_transl(dst, src + runstart, len, sf, df);
1201 runstart += len;
1202 run -= len;
1203 while (run) {
1204 len = MIN(run, max_transl_run);
1205 ADD_TRANSL_COUNTS(0, len);
1206 dst += copy_transl(dst, src + runstart, len, sf, df);
1207 runstart += len;
1208 run -= len;
1209 }
1210 if (!blankline)
1211 lastline = dst;
1212 } while (x < w);
1213
1214 src += surface->pitch >> 2;
1215 }
1216 dst = lastline; /* back up past trailing blank lines */
1217 ADD_OPAQUE_COUNTS(0, 0);
1218 }
1219
1220 #undef ADD_OPAQUE_COUNTS
1221 #undef ADD_TRANSL_COUNTS
1222
1223 /* Now that we have it encoded, release the original pixels */
1224 if (!(surface->flags & SDL_PREALLOC)) {
1225 SDL_SIMDFree(surface->pixels);
1226 surface->pixels = NULL;
1227 surface->flags &= ~SDL_SIMD_ALIGNED;
1228 }
1229
1230 /* reallocate the buffer to release unused memory */
1231 {
1232 Uint8 *p = SDL_realloc(rlebuf, dst - rlebuf);
1233 if (!p)
1234 p = rlebuf;
1235 surface->map->data = p;
1236 }
1237
1238 return 0;
1239 }
1240
1241 static Uint32
getpix_8(const Uint8 * srcbuf)1242 getpix_8(const Uint8 * srcbuf)
1243 {
1244 return *srcbuf;
1245 }
1246
1247 static Uint32
getpix_16(const Uint8 * srcbuf)1248 getpix_16(const Uint8 * srcbuf)
1249 {
1250 return *(const Uint16 *) srcbuf;
1251 }
1252
1253 static Uint32
getpix_24(const Uint8 * srcbuf)1254 getpix_24(const Uint8 * srcbuf)
1255 {
1256 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
1257 return srcbuf[0] + (srcbuf[1] << 8) + (srcbuf[2] << 16);
1258 #else
1259 return (srcbuf[0] << 16) + (srcbuf[1] << 8) + srcbuf[2];
1260 #endif
1261 }
1262
1263 static Uint32
getpix_32(const Uint8 * srcbuf)1264 getpix_32(const Uint8 * srcbuf)
1265 {
1266 return *(const Uint32 *) srcbuf;
1267 }
1268
1269 typedef Uint32(*getpix_func) (const Uint8 *);
1270
1271 static const getpix_func getpixes[4] = {
1272 getpix_8, getpix_16, getpix_24, getpix_32
1273 };
1274
1275 static int
RLEColorkeySurface(SDL_Surface * surface)1276 RLEColorkeySurface(SDL_Surface * surface)
1277 {
1278 Uint8 *rlebuf, *dst;
1279 int maxn;
1280 int y;
1281 Uint8 *srcbuf, *lastline;
1282 int maxsize = 0;
1283 const int bpp = surface->format->BytesPerPixel;
1284 getpix_func getpix;
1285 Uint32 ckey, rgbmask;
1286 int w, h;
1287
1288 /* calculate the worst case size for the compressed surface */
1289 switch (bpp) {
1290 case 1:
1291 /* worst case is alternating opaque and transparent pixels,
1292 starting with an opaque pixel */
1293 maxsize = surface->h * 3 * (surface->w / 2 + 1) + 2;
1294 break;
1295 case 2:
1296 case 3:
1297 /* worst case is solid runs, at most 255 pixels wide */
1298 maxsize = surface->h * (2 * (surface->w / 255 + 1)
1299 + surface->w * bpp) + 2;
1300 break;
1301 case 4:
1302 /* worst case is solid runs, at most 65535 pixels wide */
1303 maxsize = surface->h * (4 * (surface->w / 65535 + 1)
1304 + surface->w * 4) + 4;
1305 break;
1306
1307 default:
1308 return -1;
1309 }
1310
1311 rlebuf = (Uint8 *) SDL_malloc(maxsize);
1312 if (rlebuf == NULL) {
1313 return SDL_OutOfMemory();
1314 }
1315
1316 /* Set up the conversion */
1317 srcbuf = (Uint8 *) surface->pixels;
1318 maxn = bpp == 4 ? 65535 : 255;
1319 dst = rlebuf;
1320 rgbmask = ~surface->format->Amask;
1321 ckey = surface->map->info.colorkey & rgbmask;
1322 lastline = dst;
1323 getpix = getpixes[bpp - 1];
1324 w = surface->w;
1325 h = surface->h;
1326
1327 #define ADD_COUNTS(n, m) \
1328 if(bpp == 4) { \
1329 ((Uint16 *)dst)[0] = n; \
1330 ((Uint16 *)dst)[1] = m; \
1331 dst += 4; \
1332 } else { \
1333 dst[0] = n; \
1334 dst[1] = m; \
1335 dst += 2; \
1336 }
1337
1338 for (y = 0; y < h; y++) {
1339 int x = 0;
1340 int blankline = 0;
1341 do {
1342 int run, skip, len;
1343 int runstart;
1344 int skipstart = x;
1345
1346 /* find run of transparent, then opaque pixels */
1347 while (x < w && (getpix(srcbuf + x * bpp) & rgbmask) == ckey)
1348 x++;
1349 runstart = x;
1350 while (x < w && (getpix(srcbuf + x * bpp) & rgbmask) != ckey)
1351 x++;
1352 skip = runstart - skipstart;
1353 if (skip == w)
1354 blankline = 1;
1355 run = x - runstart;
1356
1357 /* encode segment */
1358 while (skip > maxn) {
1359 ADD_COUNTS(maxn, 0);
1360 skip -= maxn;
1361 }
1362 len = MIN(run, maxn);
1363 ADD_COUNTS(skip, len);
1364 SDL_memcpy(dst, srcbuf + runstart * bpp, len * bpp);
1365 dst += len * bpp;
1366 run -= len;
1367 runstart += len;
1368 while (run) {
1369 len = MIN(run, maxn);
1370 ADD_COUNTS(0, len);
1371 SDL_memcpy(dst, srcbuf + runstart * bpp, len * bpp);
1372 dst += len * bpp;
1373 runstart += len;
1374 run -= len;
1375 }
1376 if (!blankline)
1377 lastline = dst;
1378 } while (x < w);
1379
1380 srcbuf += surface->pitch;
1381 }
1382 dst = lastline; /* back up bast trailing blank lines */
1383 ADD_COUNTS(0, 0);
1384
1385 #undef ADD_COUNTS
1386
1387 /* Now that we have it encoded, release the original pixels */
1388 if (!(surface->flags & SDL_PREALLOC)) {
1389 SDL_SIMDFree(surface->pixels);
1390 surface->pixels = NULL;
1391 surface->flags &= ~SDL_SIMD_ALIGNED;
1392 }
1393
1394 /* reallocate the buffer to release unused memory */
1395 {
1396 /* If SDL_realloc returns NULL, the original block is left intact */
1397 Uint8 *p = SDL_realloc(rlebuf, dst - rlebuf);
1398 if (!p)
1399 p = rlebuf;
1400 surface->map->data = p;
1401 }
1402
1403 return 0;
1404 }
1405
1406 int
SDL_RLESurface(SDL_Surface * surface)1407 SDL_RLESurface(SDL_Surface * surface)
1408 {
1409 int flags;
1410
1411 /* Clear any previous RLE conversion */
1412 if ((surface->flags & SDL_RLEACCEL) == SDL_RLEACCEL) {
1413 SDL_UnRLESurface(surface, 1);
1414 }
1415
1416 /* We don't support RLE encoding of bitmaps */
1417 if (surface->format->BitsPerPixel < 8) {
1418 return -1;
1419 }
1420
1421 /* Make sure the pixels are available */
1422 if (!surface->pixels) {
1423 return -1;
1424 }
1425
1426 flags = surface->map->info.flags;
1427 if (flags & SDL_COPY_COLORKEY) {
1428 /* ok */
1429 } else if ((flags & SDL_COPY_BLEND) && surface->format->Amask) {
1430 /* ok */
1431 } else {
1432 /* If we don't have colorkey or blending, nothing to do... */
1433 return -1;
1434 }
1435
1436 /* Pass on combinations not supported */
1437 if ((flags & SDL_COPY_MODULATE_COLOR) ||
1438 ((flags & SDL_COPY_MODULATE_ALPHA) && surface->format->Amask) ||
1439 (flags & (SDL_COPY_ADD | SDL_COPY_MOD | SDL_COPY_MUL)) ||
1440 (flags & SDL_COPY_NEAREST)) {
1441 return -1;
1442 }
1443
1444 /* Encode and set up the blit */
1445 if (!surface->format->Amask || !(flags & SDL_COPY_BLEND)) {
1446 if (!surface->map->identity) {
1447 return -1;
1448 }
1449 if (RLEColorkeySurface(surface) < 0) {
1450 return -1;
1451 }
1452 surface->map->blit = SDL_RLEBlit;
1453 surface->map->info.flags |= SDL_COPY_RLE_COLORKEY;
1454 } else {
1455 if (RLEAlphaSurface(surface) < 0) {
1456 return -1;
1457 }
1458 surface->map->blit = SDL_RLEAlphaBlit;
1459 surface->map->info.flags |= SDL_COPY_RLE_ALPHAKEY;
1460 }
1461
1462 /* The surface is now accelerated */
1463 surface->flags |= SDL_RLEACCEL;
1464
1465 return (0);
1466 }
1467
1468 /*
1469 * Un-RLE a surface with pixel alpha
1470 * This may not give back exactly the image before RLE-encoding; all
1471 * completely transparent pixels will be lost, and color and alpha depth
1472 * may have been reduced (when encoding for 16bpp targets).
1473 */
1474 static SDL_bool
UnRLEAlpha(SDL_Surface * surface)1475 UnRLEAlpha(SDL_Surface * surface)
1476 {
1477 Uint8 *srcbuf;
1478 Uint32 *dst;
1479 SDL_PixelFormat *sf = surface->format;
1480 RLEDestFormat *df = surface->map->data;
1481 int (*uncopy_opaque) (Uint32 *, void *, int,
1482 RLEDestFormat *, SDL_PixelFormat *);
1483 int (*uncopy_transl) (Uint32 *, void *, int,
1484 RLEDestFormat *, SDL_PixelFormat *);
1485 int w = surface->w;
1486 int bpp = df->BytesPerPixel;
1487
1488 if (bpp == 2) {
1489 uncopy_opaque = uncopy_opaque_16;
1490 uncopy_transl = uncopy_transl_16;
1491 } else {
1492 uncopy_opaque = uncopy_transl = uncopy_32;
1493 }
1494
1495 surface->pixels = SDL_SIMDAlloc(surface->h * surface->pitch);
1496 if (!surface->pixels) {
1497 return (SDL_FALSE);
1498 }
1499 surface->flags |= SDL_SIMD_ALIGNED;
1500 /* fill background with transparent pixels */
1501 SDL_memset(surface->pixels, 0, surface->h * surface->pitch);
1502
1503 dst = surface->pixels;
1504 srcbuf = (Uint8 *) (df + 1);
1505 for (;;) {
1506 /* copy opaque pixels */
1507 int ofs = 0;
1508 do {
1509 unsigned run;
1510 if (bpp == 2) {
1511 ofs += srcbuf[0];
1512 run = srcbuf[1];
1513 srcbuf += 2;
1514 } else {
1515 ofs += ((Uint16 *) srcbuf)[0];
1516 run = ((Uint16 *) srcbuf)[1];
1517 srcbuf += 4;
1518 }
1519 if (run) {
1520 srcbuf += uncopy_opaque(dst + ofs, srcbuf, run, df, sf);
1521 ofs += run;
1522 } else if (!ofs) {
1523 goto end_function;
1524 }
1525 } while (ofs < w);
1526
1527 /* skip padding if needed */
1528 if (bpp == 2)
1529 srcbuf += (uintptr_t) srcbuf & 2;
1530
1531 /* copy translucent pixels */
1532 ofs = 0;
1533 do {
1534 unsigned run;
1535 ofs += ((Uint16 *) srcbuf)[0];
1536 run = ((Uint16 *) srcbuf)[1];
1537 srcbuf += 4;
1538 if (run) {
1539 srcbuf += uncopy_transl(dst + ofs, srcbuf, run, df, sf);
1540 ofs += run;
1541 }
1542 } while (ofs < w);
1543 dst += surface->pitch >> 2;
1544 }
1545
1546 end_function:
1547 return (SDL_TRUE);
1548 }
1549
1550 void
SDL_UnRLESurface(SDL_Surface * surface,int recode)1551 SDL_UnRLESurface(SDL_Surface * surface, int recode)
1552 {
1553 if (surface->flags & SDL_RLEACCEL) {
1554 surface->flags &= ~SDL_RLEACCEL;
1555
1556 if (recode && !(surface->flags & SDL_PREALLOC)) {
1557 if (surface->map->info.flags & SDL_COPY_RLE_COLORKEY) {
1558 SDL_Rect full;
1559
1560 /* re-create the original surface */
1561 surface->pixels = SDL_SIMDAlloc(surface->h * surface->pitch);
1562 if (!surface->pixels) {
1563 /* Oh crap... */
1564 surface->flags |= SDL_RLEACCEL;
1565 return;
1566 }
1567 surface->flags |= SDL_SIMD_ALIGNED;
1568
1569 /* fill it with the background color */
1570 SDL_FillRect(surface, NULL, surface->map->info.colorkey);
1571
1572 /* now render the encoded surface */
1573 full.x = full.y = 0;
1574 full.w = surface->w;
1575 full.h = surface->h;
1576 SDL_RLEBlit(surface, &full, surface, &full);
1577 } else {
1578 if (!UnRLEAlpha(surface)) {
1579 /* Oh crap... */
1580 surface->flags |= SDL_RLEACCEL;
1581 return;
1582 }
1583 }
1584 }
1585 surface->map->info.flags &=
1586 ~(SDL_COPY_RLE_COLORKEY | SDL_COPY_RLE_ALPHAKEY);
1587
1588 SDL_free(surface->map->data);
1589 surface->map->data = NULL;
1590 }
1591 }
1592
1593 #endif /* SDL_HAVE_RLE */
1594
1595 /* vi: set ts=4 sw=4 expandtab: */
1596