1 /*
2   Simple DirectMedia Layer
3   Copyright (C) 1997-2021 Sam Lantinga <slouken@libsdl.org>
4 
5   This software is provided 'as-is', without any express or implied
6   warranty.  In no event will the authors be held liable for any damages
7   arising from the use of this software.
8 
9   Permission is granted to anyone to use this software for any purpose,
10   including commercial applications, and to alter it and redistribute it
11   freely, subject to the following restrictions:
12 
13   1. The origin of this software must not be misrepresented; you must not
14      claim that you wrote the original software. If you use this software
15      in a product, an acknowledgment in the product documentation would be
16      appreciated but is not required.
17   2. Altered source versions must be plainly marked as such, and must not be
18      misrepresented as being the original software.
19   3. This notice may not be removed or altered from any source distribution.
20 */
21 #include "../SDL_internal.h"
22 
23 #if SDL_HAVE_RLE
24 
25 /*
26  * RLE encoding for software colorkey and alpha-channel acceleration
27  *
28  * Original version by Sam Lantinga
29  *
30  * Mattias Engdegård (Yorick): Rewrite. New encoding format, encoder and
31  * decoder. Added per-surface alpha blitter. Added per-pixel alpha
32  * format, encoder and blitter.
33  *
34  * Many thanks to Xark and johns for hints, benchmarks and useful comments
35  * leading to this code.
36  *
37  * Welcome to Macro Mayhem.
38  */
39 
40 /*
41  * The encoding translates the image data to a stream of segments of the form
42  *
43  * <skip> <run> <data>
44  *
45  * where <skip> is the number of transparent pixels to skip,
46  *       <run>  is the number of opaque pixels to blit,
47  * and   <data> are the pixels themselves.
48  *
49  * This basic structure is used both for colorkeyed surfaces, used for simple
50  * binary transparency and for per-surface alpha blending, and for surfaces
51  * with per-pixel alpha. The details differ, however:
52  *
53  * Encoding of colorkeyed surfaces:
54  *
55  *   Encoded pixels always have the same format as the target surface.
56  *   <skip> and <run> are unsigned 8 bit integers, except for 32 bit depth
57  *   where they are 16 bit. This makes the pixel data aligned at all times.
58  *   Segments never wrap around from one scan line to the next.
59  *
60  *   The end of the sequence is marked by a zero <skip>,<run> pair at the *
61  *   beginning of a line.
62  *
63  * Encoding of surfaces with per-pixel alpha:
64  *
65  *   The sequence begins with a struct RLEDestFormat describing the target
66  *   pixel format, to provide reliable un-encoding.
67  *
68  *   Each scan line is encoded twice: First all completely opaque pixels,
69  *   encoded in the target format as described above, and then all
70  *   partially transparent (translucent) pixels (where 1 <= alpha <= 254),
71  *   in the following 32-bit format:
72  *
73  *   For 32-bit targets, each pixel has the target RGB format but with
74  *   the alpha value occupying the highest 8 bits. The <skip> and <run>
75  *   counts are 16 bit.
76  *
77  *   For 16-bit targets, each pixel has the target RGB format, but with
78  *   the middle component (usually green) shifted 16 steps to the left,
79  *   and the hole filled with the 5 most significant bits of the alpha value.
80  *   i.e. if the target has the format         rrrrrggggggbbbbb,
81  *   the encoded pixel will be 00000gggggg00000rrrrr0aaaaabbbbb.
82  *   The <skip> and <run> counts are 8 bit for the opaque lines, 16 bit
83  *   for the translucent lines. Two padding bytes may be inserted
84  *   before each translucent line to keep them 32-bit aligned.
85  *
86  *   The end of the sequence is marked by a zero <skip>,<run> pair at the
87  *   beginning of an opaque line.
88  */
89 
90 #include "SDL_video.h"
91 #include "SDL_sysvideo.h"
92 #include "SDL_blit.h"
93 #include "SDL_RLEaccel_c.h"
94 
95 #ifndef MIN
96 #define MIN(a, b) ((a) < (b) ? (a) : (b))
97 #endif
98 
99 #define PIXEL_COPY(to, from, len, bpp)          \
100     SDL_memcpy(to, from, (size_t)(len) * (bpp))
101 
102 /*
103  * Various colorkey blit methods, for opaque and per-surface alpha
104  */
105 
106 #define OPAQUE_BLIT(to, from, length, bpp, alpha)   \
107     PIXEL_COPY(to, from, length, bpp)
108 
109 /*
110  * For 32bpp pixels on the form 0x00rrggbb:
111  * If we treat the middle component separately, we can process the two
112  * remaining in parallel. This is safe to do because of the gap to the left
113  * of each component, so the bits from the multiplication don't collide.
114  * This can be used for any RGB permutation of course.
115  */
116 #define ALPHA_BLIT32_888(to, from, length, bpp, alpha)      \
117     do {                                                    \
118         int i;                                              \
119         Uint32 *src = (Uint32 *)(from);                     \
120         Uint32 *dst = (Uint32 *)(to);                       \
121         for (i = 0; i < (int)(length); i++) {               \
122             Uint32 s = *src++;                              \
123             Uint32 d = *dst;                                \
124             Uint32 s1 = s & 0xff00ff;                       \
125             Uint32 d1 = d & 0xff00ff;                       \
126             d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff; \
127             s &= 0xff00;                                    \
128             d &= 0xff00;                                    \
129             d = (d + ((s - d) * alpha >> 8)) & 0xff00;      \
130             *dst++ = d1 | d;                                \
131         }                                                   \
132     } while (0)
133 
134 /*
135  * For 16bpp pixels we can go a step further: put the middle component
136  * in the high 16 bits of a 32 bit word, and process all three RGB
137  * components at the same time. Since the smallest gap is here just
138  * 5 bits, we have to scale alpha down to 5 bits as well.
139  */
140 #define ALPHA_BLIT16_565(to, from, length, bpp, alpha)  \
141     do {                                                \
142         int i;                                          \
143         Uint16 *src = (Uint16 *)(from);                 \
144         Uint16 *dst = (Uint16 *)(to);                   \
145         Uint32 ALPHA = alpha >> 3;                      \
146         for(i = 0; i < (int)(length); i++) {            \
147             Uint32 s = *src++;                          \
148             Uint32 d = *dst;                            \
149             s = (s | s << 16) & 0x07e0f81f;             \
150             d = (d | d << 16) & 0x07e0f81f;             \
151             d += (s - d) * ALPHA >> 5;                  \
152             d &= 0x07e0f81f;                            \
153             *dst++ = (Uint16)(d | d >> 16);             \
154         }                                               \
155     } while(0)
156 
157 #define ALPHA_BLIT16_555(to, from, length, bpp, alpha)  \
158     do {                                                \
159         int i;                                          \
160         Uint16 *src = (Uint16 *)(from);                 \
161         Uint16 *dst = (Uint16 *)(to);                   \
162         Uint32 ALPHA = alpha >> 3;                      \
163         for(i = 0; i < (int)(length); i++) {            \
164             Uint32 s = *src++;                          \
165             Uint32 d = *dst;                            \
166             s = (s | s << 16) & 0x03e07c1f;             \
167             d = (d | d << 16) & 0x03e07c1f;             \
168             d += (s - d) * ALPHA >> 5;                  \
169             d &= 0x03e07c1f;                            \
170             *dst++ = (Uint16)(d | d >> 16);             \
171         }                                               \
172     } while(0)
173 
174 /*
175  * The general slow catch-all function, for remaining depths and formats
176  */
177 #define ALPHA_BLIT_ANY(to, from, length, bpp, alpha)            \
178     do {                                                        \
179         int i;                                                  \
180         Uint8 *src = from;                                      \
181         Uint8 *dst = to;                                        \
182         for (i = 0; i < (int)(length); i++) {                   \
183             Uint32 s, d;                                        \
184             unsigned rs, gs, bs, rd, gd, bd;                    \
185             switch (bpp) {                                      \
186             case 2:                                             \
187                 s = *(Uint16 *)src;                             \
188                 d = *(Uint16 *)dst;                             \
189                 break;                                          \
190             case 3:                                             \
191                 if (SDL_BYTEORDER == SDL_BIG_ENDIAN) {          \
192                     s = (src[0] << 16) | (src[1] << 8) | src[2]; \
193                     d = (dst[0] << 16) | (dst[1] << 8) | dst[2]; \
194                 } else {                                        \
195                     s = (src[2] << 16) | (src[1] << 8) | src[0]; \
196                     d = (dst[2] << 16) | (dst[1] << 8) | dst[0]; \
197                 }                                               \
198                 break;                                          \
199             case 4:                                             \
200                 s = *(Uint32 *)src;                             \
201                 d = *(Uint32 *)dst;                             \
202                 break;                                          \
203             }                                                   \
204             RGB_FROM_PIXEL(s, fmt, rs, gs, bs);                 \
205             RGB_FROM_PIXEL(d, fmt, rd, gd, bd);                 \
206             rd += (rs - rd) * alpha >> 8;                       \
207             gd += (gs - gd) * alpha >> 8;                       \
208             bd += (bs - bd) * alpha >> 8;                       \
209             PIXEL_FROM_RGB(d, fmt, rd, gd, bd);                 \
210             switch (bpp) {                                      \
211             case 2:                                             \
212                 *(Uint16 *)dst = (Uint16)d;                     \
213                 break;                                          \
214             case 3:                                             \
215                 if (SDL_BYTEORDER == SDL_BIG_ENDIAN) {          \
216                     dst[0] = (Uint8)(d >> 16);                  \
217                     dst[1] = (Uint8)(d >> 8);                   \
218                     dst[2] = (Uint8)(d);                        \
219                 } else {                                        \
220                     dst[0] = (Uint8)d;                          \
221                     dst[1] = (Uint8)(d >> 8);                   \
222                     dst[2] = (Uint8)(d >> 16);                  \
223                 }                                               \
224                 break;                                          \
225             case 4:                                             \
226                 *(Uint32 *)dst = d;                             \
227                 break;                                          \
228             }                                                   \
229             src += bpp;                                         \
230             dst += bpp;                                         \
231         }                                                       \
232     } while(0)
233 
234 /*
235  * Special case: 50% alpha (alpha=128)
236  * This is treated specially because it can be optimized very well, and
237  * since it is good for many cases of semi-translucency.
238  * The theory is to do all three components at the same time:
239  * First zero the lowest bit of each component, which gives us room to
240  * add them. Then shift right and add the sum of the lowest bits.
241  */
242 #define ALPHA_BLIT32_888_50(to, from, length, bpp, alpha)       \
243     do {                                                        \
244         int i;                                                  \
245         Uint32 *src = (Uint32 *)(from);                         \
246         Uint32 *dst = (Uint32 *)(to);                           \
247         for(i = 0; i < (int)(length); i++) {                    \
248             Uint32 s = *src++;                                  \
249             Uint32 d = *dst;                                    \
250             *dst++ = (((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) \
251                  + (s & d & 0x00010101);                        \
252         }                                                       \
253     } while(0)
254 
255 /*
256  * For 16bpp, we can actually blend two pixels in parallel, if we take
257  * care to shift before we add, not after.
258  */
259 
260 /* helper: blend a single 16 bit pixel at 50% */
261 #define BLEND16_50(dst, src, mask)                              \
262     do {                                                        \
263         Uint32 s = *src++;                                      \
264         Uint32 d = *dst;                                        \
265         *dst++ = (Uint16)((((s & mask) + (d & mask)) >> 1) +    \
266                           (s & d & (~mask & 0xffff)));          \
267     } while(0)
268 
269 /* basic 16bpp blender. mask is the pixels to keep when adding. */
270 #define ALPHA_BLIT16_50(to, from, length, bpp, alpha, mask)     \
271     do {                                                        \
272         unsigned n = (length);                                  \
273         Uint16 *src = (Uint16 *)(from);                         \
274         Uint16 *dst = (Uint16 *)(to);                           \
275         if (((uintptr_t)src ^ (uintptr_t)dst) & 3) {            \
276             /* source and destination not in phase, blit one by one */ \
277             while (n--)                                         \
278                 BLEND16_50(dst, src, mask);                     \
279         } else {                                                \
280             if ((uintptr_t)src & 3) {                           \
281                 /* first odd pixel */                           \
282                 BLEND16_50(dst, src, mask);                     \
283                 n--;                                            \
284             }                                                   \
285             for (; n > 1; n -= 2) {                             \
286                 Uint32 s = *(Uint32 *)src;                      \
287                 Uint32 d = *(Uint32 *)dst;                      \
288                 *(Uint32 *)dst = ((s & (mask | mask << 16)) >> 1) \
289                     + ((d & (mask | mask << 16)) >> 1)          \
290                     + (s & d & (~(mask | mask << 16)));         \
291                 src += 2;                                       \
292                 dst += 2;                                       \
293             }                                                   \
294             if (n)                                              \
295                 BLEND16_50(dst, src, mask); /* last odd pixel */ \
296         }                                                       \
297     } while(0)
298 
299 #define ALPHA_BLIT16_565_50(to, from, length, bpp, alpha)       \
300     ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xf7deU)
301 
302 #define ALPHA_BLIT16_555_50(to, from, length, bpp, alpha)       \
303     ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xfbdeU)
304 
305 #define CHOOSE_BLIT(blitter, alpha, fmt)                        \
306     do {                                                        \
307         if (alpha == 255) {                                     \
308             switch (fmt->BytesPerPixel) {                       \
309             case 1: blitter(1, Uint8, OPAQUE_BLIT); break;      \
310             case 2: blitter(2, Uint8, OPAQUE_BLIT); break;      \
311             case 3: blitter(3, Uint8, OPAQUE_BLIT); break;      \
312             case 4: blitter(4, Uint16, OPAQUE_BLIT); break;     \
313             }                                                   \
314         } else {                                                \
315             switch (fmt->BytesPerPixel) {                       \
316             case 1:                                             \
317                 /* No 8bpp alpha blitting */                    \
318                 break;                                          \
319                                                                 \
320             case 2:                                             \
321                 switch (fmt->Rmask | fmt->Gmask | fmt->Bmask) { \
322                 case 0xffff:                                    \
323                     if (fmt->Gmask == 0x07e0                    \
324                         || fmt->Rmask == 0x07e0                 \
325                         || fmt->Bmask == 0x07e0) {              \
326                         if (alpha == 128) {                     \
327                             blitter(2, Uint8, ALPHA_BLIT16_565_50); \
328                         } else {                                \
329                             blitter(2, Uint8, ALPHA_BLIT16_565); \
330                         }                                       \
331                     } else                                      \
332                         goto general16;                         \
333                     break;                                      \
334                                                                 \
335                 case 0x7fff:                                    \
336                     if (fmt->Gmask == 0x03e0                    \
337                         || fmt->Rmask == 0x03e0                 \
338                         || fmt->Bmask == 0x03e0) {              \
339                         if (alpha == 128) {                     \
340                             blitter(2, Uint8, ALPHA_BLIT16_555_50); \
341                         } else {                                \
342                             blitter(2, Uint8, ALPHA_BLIT16_555); \
343                         }                                       \
344                         break;                                  \
345                     } else                                      \
346                         goto general16;                         \
347                     break;                                      \
348                                                                 \
349                 default:                                        \
350     general16:                                                  \
351                     blitter(2, Uint8, ALPHA_BLIT_ANY);          \
352                 }                                               \
353                 break;                                          \
354                                                                 \
355             case 3:                                             \
356                 blitter(3, Uint8, ALPHA_BLIT_ANY);              \
357                 break;                                          \
358                                                                 \
359             case 4:                                             \
360                 if ((fmt->Rmask | fmt->Gmask | fmt->Bmask) == 0x00ffffff \
361                     && (fmt->Gmask == 0xff00 || fmt->Rmask == 0xff00 \
362                     || fmt->Bmask == 0xff00)) {                 \
363                     if (alpha == 128) {                         \
364                         blitter(4, Uint16, ALPHA_BLIT32_888_50); \
365                     } else {                                    \
366                         blitter(4, Uint16, ALPHA_BLIT32_888);   \
367                     }                                           \
368                 } else                                          \
369                     blitter(4, Uint16, ALPHA_BLIT_ANY);         \
370                 break;                                          \
371             }                                                   \
372         }                                                       \
373     } while(0)
374 
375 /*
376  * Set a pixel value using the given format, except that the alpha value is
377  * placed in the top byte. This is the format used for RLE with alpha.
378  */
379 #define RLEPIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a)                      \
380 {                                                                       \
381     Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)|                             \
382         ((g>>fmt->Gloss)<<fmt->Gshift)|                                 \
383         ((b>>fmt->Bloss)<<fmt->Bshift)|                                 \
384         (a<<24);                                                        \
385 }
386 
387 /*
388  * This takes care of the case when the surface is clipped on the left and/or
389  * right. Top clipping has already been taken care of.
390  */
391 static void
RLEClipBlit(int w,Uint8 * srcbuf,SDL_Surface * surf_dst,Uint8 * dstbuf,SDL_Rect * srcrect,unsigned alpha)392 RLEClipBlit(int w, Uint8 * srcbuf, SDL_Surface * surf_dst,
393             Uint8 * dstbuf, SDL_Rect * srcrect, unsigned alpha)
394 {
395     SDL_PixelFormat *fmt = surf_dst->format;
396 
397 #define RLECLIPBLIT(bpp, Type, do_blit)                         \
398     do {                                                        \
399         int linecount = srcrect->h;                             \
400         int ofs = 0;                                            \
401         int left = srcrect->x;                                  \
402         int right = left + srcrect->w;                          \
403         dstbuf -= left * bpp;                                   \
404         for (;;) {                                              \
405             int run;                                            \
406             ofs += *(Type *)srcbuf;                             \
407             run = ((Type *)srcbuf)[1];                          \
408             srcbuf += 2 * sizeof(Type);                         \
409             if (run) {                                          \
410                 /* clip to left and right borders */            \
411                 if (ofs < right) {                              \
412                     int start = 0;                              \
413                     int len = run;                              \
414                     int startcol;                               \
415                     if (left - ofs > 0) {                       \
416                         start = left - ofs;                     \
417                         len -= start;                           \
418                         if (len <= 0)                           \
419                             goto nocopy ## bpp ## do_blit;      \
420                     }                                           \
421                     startcol = ofs + start;                     \
422                     if (len > right - startcol)                 \
423                         len = right - startcol;                 \
424                     do_blit(dstbuf + startcol * bpp, srcbuf + start * bpp, \
425                         len, bpp, alpha);                       \
426                 }                                               \
427     nocopy ## bpp ## do_blit:                                   \
428                 srcbuf += run * bpp;                            \
429                 ofs += run;                                     \
430             } else if (!ofs)                                    \
431                 break;                                          \
432                                                                 \
433             if (ofs == w) {                                     \
434                 ofs = 0;                                        \
435                 dstbuf += surf_dst->pitch;                      \
436                 if (!--linecount)                               \
437                     break;                                      \
438             }                                                   \
439         }                                                       \
440     } while(0)
441 
442     CHOOSE_BLIT(RLECLIPBLIT, alpha, fmt);
443 
444 #undef RLECLIPBLIT
445 
446 }
447 
448 
449 /* blit a colorkeyed RLE surface */
450 static int SDLCALL
SDL_RLEBlit(SDL_Surface * surf_src,SDL_Rect * srcrect,SDL_Surface * surf_dst,SDL_Rect * dstrect)451 SDL_RLEBlit(SDL_Surface * surf_src, SDL_Rect * srcrect,
452             SDL_Surface * surf_dst, SDL_Rect * dstrect)
453 {
454     Uint8 *dstbuf;
455     Uint8 *srcbuf;
456     int x, y;
457     int w = surf_src->w;
458     unsigned alpha;
459 
460     /* Lock the destination if necessary */
461     if (SDL_MUSTLOCK(surf_dst)) {
462         if (SDL_LockSurface(surf_dst) < 0) {
463             return (-1);
464         }
465     }
466 
467     /* Set up the source and destination pointers */
468     x = dstrect->x;
469     y = dstrect->y;
470     dstbuf = (Uint8 *) surf_dst->pixels
471         + y * surf_dst->pitch + x * surf_src->format->BytesPerPixel;
472     srcbuf = (Uint8 *) surf_src->map->data;
473 
474     {
475         /* skip lines at the top if necessary */
476         int vskip = srcrect->y;
477         int ofs = 0;
478         if (vskip) {
479 
480 #define RLESKIP(bpp, Type)          \
481         for(;;) {           \
482             int run;            \
483             ofs += *(Type *)srcbuf; \
484             run = ((Type *)srcbuf)[1];  \
485             srcbuf += sizeof(Type) * 2; \
486             if(run) {           \
487             srcbuf += run * bpp;    \
488             ofs += run;     \
489             } else if(!ofs)     \
490             goto done;      \
491             if(ofs == w) {      \
492             ofs = 0;        \
493             if(!--vskip)        \
494                 break;      \
495             }               \
496         }
497 
498             switch (surf_src->format->BytesPerPixel) {
499             case 1:
500                 RLESKIP(1, Uint8);
501                 break;
502             case 2:
503                 RLESKIP(2, Uint8);
504                 break;
505             case 3:
506                 RLESKIP(3, Uint8);
507                 break;
508             case 4:
509                 RLESKIP(4, Uint16);
510                 break;
511             }
512 
513 #undef RLESKIP
514 
515         }
516     }
517 
518     alpha = surf_src->map->info.a;
519     /* if left or right edge clipping needed, call clip blit */
520     if (srcrect->x || srcrect->w != surf_src->w) {
521         RLEClipBlit(w, srcbuf, surf_dst, dstbuf, srcrect, alpha);
522     } else {
523         SDL_PixelFormat *fmt = surf_src->format;
524 
525 #define RLEBLIT(bpp, Type, do_blit)                       \
526         do {                                  \
527         int linecount = srcrect->h;                   \
528         int ofs = 0;                              \
529         for(;;) {                             \
530             unsigned run;                         \
531             ofs += *(Type *)srcbuf;                   \
532             run = ((Type *)srcbuf)[1];                    \
533             srcbuf += 2 * sizeof(Type);                   \
534             if(run) {                             \
535             do_blit(dstbuf + ofs * bpp, srcbuf, run, bpp, alpha); \
536             srcbuf += run * bpp;                      \
537             ofs += run;                       \
538             } else if(!ofs)                       \
539             break;                            \
540             if(ofs == w) {                        \
541             ofs = 0;                          \
542             dstbuf += surf_dst->pitch;                     \
543             if(!--linecount)                      \
544                 break;                        \
545             }                                 \
546         }                                 \
547         } while(0)
548 
549         CHOOSE_BLIT(RLEBLIT, alpha, fmt);
550 
551 #undef RLEBLIT
552     }
553 
554   done:
555     /* Unlock the destination if necessary */
556     if (SDL_MUSTLOCK(surf_dst)) {
557         SDL_UnlockSurface(surf_dst);
558     }
559     return (0);
560 }
561 
562 #undef OPAQUE_BLIT
563 
564 /*
565  * Per-pixel blitting macros for translucent pixels:
566  * These use the same techniques as the per-surface blitting macros
567  */
568 
569 /*
570  * For 32bpp pixels, we have made sure the alpha is stored in the top
571  * 8 bits, so proceed as usual
572  */
573 #define BLIT_TRANSL_888(src, dst)               \
574     do {                            \
575         Uint32 s = src;                     \
576     Uint32 d = dst;                     \
577     unsigned alpha = s >> 24;               \
578     Uint32 s1 = s & 0xff00ff;               \
579     Uint32 d1 = d & 0xff00ff;               \
580     d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff;    \
581     s &= 0xff00;                        \
582     d &= 0xff00;                        \
583     d = (d + ((s - d) * alpha >> 8)) & 0xff00;      \
584     dst = d1 | d | 0xff000000;              \
585     } while(0)
586 
587 /*
588  * For 16bpp pixels, we have stored the 5 most significant alpha bits in
589  * bits 5-10. As before, we can process all 3 RGB components at the same time.
590  */
591 #define BLIT_TRANSL_565(src, dst)       \
592     do {                    \
593     Uint32 s = src;             \
594     Uint32 d = dst;             \
595     unsigned alpha = (s & 0x3e0) >> 5;  \
596     s &= 0x07e0f81f;            \
597     d = (d | d << 16) & 0x07e0f81f;     \
598     d += (s - d) * alpha >> 5;      \
599     d &= 0x07e0f81f;            \
600     dst = (Uint16)(d | d >> 16);            \
601     } while(0)
602 
603 #define BLIT_TRANSL_555(src, dst)       \
604     do {                    \
605     Uint32 s = src;             \
606     Uint32 d = dst;             \
607     unsigned alpha = (s & 0x3e0) >> 5;  \
608     s &= 0x03e07c1f;            \
609     d = (d | d << 16) & 0x03e07c1f;     \
610     d += (s - d) * alpha >> 5;      \
611     d &= 0x03e07c1f;            \
612     dst = (Uint16)(d | d >> 16);            \
613     } while(0)
614 
615 /* used to save the destination format in the encoding. Designed to be
616    macro-compatible with SDL_PixelFormat but without the unneeded fields */
617 typedef struct
618 {
619     Uint8 BytesPerPixel;
620     Uint8 padding[3];
621     Uint32 Rmask;
622     Uint32 Gmask;
623     Uint32 Bmask;
624     Uint32 Amask;
625     Uint8 Rloss;
626     Uint8 Gloss;
627     Uint8 Bloss;
628     Uint8 Aloss;
629     Uint8 Rshift;
630     Uint8 Gshift;
631     Uint8 Bshift;
632     Uint8 Ashift;
633 } RLEDestFormat;
634 
635 /* blit a pixel-alpha RLE surface clipped at the right and/or left edges */
636 static void
RLEAlphaClipBlit(int w,Uint8 * srcbuf,SDL_Surface * surf_dst,Uint8 * dstbuf,SDL_Rect * srcrect)637 RLEAlphaClipBlit(int w, Uint8 * srcbuf, SDL_Surface * surf_dst,
638                  Uint8 * dstbuf, SDL_Rect * srcrect)
639 {
640     SDL_PixelFormat *df = surf_dst->format;
641     /*
642      * clipped blitter: Ptype is the destination pixel type,
643      * Ctype the translucent count type, and do_blend the macro
644      * to blend one pixel.
645      */
646 #define RLEALPHACLIPBLIT(Ptype, Ctype, do_blend)              \
647     do {                                  \
648     int linecount = srcrect->h;                   \
649     int left = srcrect->x;                        \
650     int right = left + srcrect->w;                    \
651     dstbuf -= left * sizeof(Ptype);                   \
652     do {                                  \
653         int ofs = 0;                          \
654         /* blit opaque pixels on one line */              \
655         do {                              \
656         unsigned run;                         \
657         ofs += ((Ctype *)srcbuf)[0];                  \
658         run = ((Ctype *)srcbuf)[1];               \
659         srcbuf += 2 * sizeof(Ctype);                  \
660         if(run) {                         \
661             /* clip to left and right borders */          \
662             int cofs = ofs;                   \
663             int crun = run;                   \
664             if(left - cofs > 0) {                 \
665             crun -= left - cofs;                  \
666             cofs = left;                      \
667             }                             \
668             if(crun > right - cofs)               \
669             crun = right - cofs;                  \
670             if(crun > 0)                      \
671             PIXEL_COPY(dstbuf + cofs * sizeof(Ptype),     \
672                    srcbuf + (cofs - ofs) * sizeof(Ptype), \
673                    (unsigned)crun, sizeof(Ptype));    \
674             srcbuf += run * sizeof(Ptype);            \
675             ofs += run;                       \
676         } else if(!ofs)                       \
677             return;                       \
678         } while(ofs < w);                         \
679         /* skip padding if necessary */               \
680         if(sizeof(Ptype) == 2)                    \
681         srcbuf += (uintptr_t)srcbuf & 2;              \
682         /* blit translucent pixels on the same line */        \
683         ofs = 0;                              \
684         do {                              \
685         unsigned run;                         \
686         ofs += ((Uint16 *)srcbuf)[0];                 \
687         run = ((Uint16 *)srcbuf)[1];                  \
688         srcbuf += 4;                          \
689         if(run) {                         \
690             /* clip to left and right borders */          \
691             int cofs = ofs;                   \
692             int crun = run;                   \
693             if(left - cofs > 0) {                 \
694             crun -= left - cofs;                  \
695             cofs = left;                      \
696             }                             \
697             if(crun > right - cofs)               \
698             crun = right - cofs;                  \
699             if(crun > 0) {                    \
700             Ptype *dst = (Ptype *)dstbuf + cofs;          \
701             Uint32 *src = (Uint32 *)srcbuf + (cofs - ofs);    \
702             int i;                        \
703             for(i = 0; i < crun; i++)             \
704                 do_blend(src[i], dst[i]);             \
705             }                             \
706             srcbuf += run * 4;                    \
707             ofs += run;                       \
708         }                             \
709         } while(ofs < w);                         \
710         dstbuf += surf_dst->pitch;                     \
711     } while(--linecount);                         \
712     } while(0)
713 
714     switch (df->BytesPerPixel) {
715     case 2:
716         if (df->Gmask == 0x07e0 || df->Rmask == 0x07e0 || df->Bmask == 0x07e0)
717             RLEALPHACLIPBLIT(Uint16, Uint8, BLIT_TRANSL_565);
718         else
719             RLEALPHACLIPBLIT(Uint16, Uint8, BLIT_TRANSL_555);
720         break;
721     case 4:
722         RLEALPHACLIPBLIT(Uint32, Uint16, BLIT_TRANSL_888);
723         break;
724     }
725 }
726 
727 /* blit a pixel-alpha RLE surface */
728 static int SDLCALL
SDL_RLEAlphaBlit(SDL_Surface * surf_src,SDL_Rect * srcrect,SDL_Surface * surf_dst,SDL_Rect * dstrect)729 SDL_RLEAlphaBlit(SDL_Surface * surf_src, SDL_Rect * srcrect,
730                  SDL_Surface * surf_dst, SDL_Rect * dstrect)
731 {
732     int x, y;
733     int w = surf_src->w;
734     Uint8 *srcbuf, *dstbuf;
735     SDL_PixelFormat *df = surf_dst->format;
736 
737     /* Lock the destination if necessary */
738     if (SDL_MUSTLOCK(surf_dst)) {
739         if (SDL_LockSurface(surf_dst) < 0) {
740             return -1;
741         }
742     }
743 
744     x = dstrect->x;
745     y = dstrect->y;
746     dstbuf = (Uint8 *) surf_dst->pixels + y * surf_dst->pitch + x * df->BytesPerPixel;
747     srcbuf = (Uint8 *) surf_src->map->data + sizeof(RLEDestFormat);
748 
749     {
750         /* skip lines at the top if necessary */
751         int vskip = srcrect->y;
752         if (vskip) {
753             int ofs;
754             if (df->BytesPerPixel == 2) {
755                 /* the 16/32 interleaved format */
756                 do {
757                     /* skip opaque line */
758                     ofs = 0;
759                     do {
760                         int run;
761                         ofs += srcbuf[0];
762                         run = srcbuf[1];
763                         srcbuf += 2;
764                         if (run) {
765                             srcbuf += 2 * run;
766                             ofs += run;
767                         } else if (!ofs)
768                             goto done;
769                     } while (ofs < w);
770 
771                     /* skip padding */
772                     srcbuf += (uintptr_t) srcbuf & 2;
773 
774                     /* skip translucent line */
775                     ofs = 0;
776                     do {
777                         int run;
778                         ofs += ((Uint16 *) srcbuf)[0];
779                         run = ((Uint16 *) srcbuf)[1];
780                         srcbuf += 4 * (run + 1);
781                         ofs += run;
782                     } while (ofs < w);
783                 } while (--vskip);
784             } else {
785                 /* the 32/32 interleaved format */
786                 vskip <<= 1;    /* opaque and translucent have same format */
787                 do {
788                     ofs = 0;
789                     do {
790                         int run;
791                         ofs += ((Uint16 *) srcbuf)[0];
792                         run = ((Uint16 *) srcbuf)[1];
793                         srcbuf += 4;
794                         if (run) {
795                             srcbuf += 4 * run;
796                             ofs += run;
797                         } else if (!ofs)
798                             goto done;
799                     } while (ofs < w);
800                 } while (--vskip);
801             }
802         }
803     }
804 
805     /* if left or right edge clipping needed, call clip blit */
806     if (srcrect->x || srcrect->w != surf_src->w) {
807         RLEAlphaClipBlit(w, srcbuf, surf_dst, dstbuf, srcrect);
808     } else {
809 
810         /*
811          * non-clipped blitter. Ptype is the destination pixel type,
812          * Ctype the translucent count type, and do_blend the
813          * macro to blend one pixel.
814          */
815 #define RLEALPHABLIT(Ptype, Ctype, do_blend)                 \
816     do {                                 \
817         int linecount = srcrect->h;                  \
818         do {                             \
819         int ofs = 0;                         \
820         /* blit opaque pixels on one line */             \
821         do {                             \
822             unsigned run;                    \
823             ofs += ((Ctype *)srcbuf)[0];             \
824             run = ((Ctype *)srcbuf)[1];              \
825             srcbuf += 2 * sizeof(Ctype);             \
826             if(run) {                        \
827             PIXEL_COPY(dstbuf + ofs * sizeof(Ptype), srcbuf, \
828                    run, sizeof(Ptype));          \
829             srcbuf += run * sizeof(Ptype);           \
830             ofs += run;                  \
831             } else if(!ofs)                  \
832             goto done;                   \
833         } while(ofs < w);                    \
834         /* skip padding if necessary */              \
835         if(sizeof(Ptype) == 2)                   \
836             srcbuf += (uintptr_t)srcbuf & 2;             \
837         /* blit translucent pixels on the same line */       \
838         ofs = 0;                         \
839         do {                             \
840             unsigned run;                    \
841             ofs += ((Uint16 *)srcbuf)[0];            \
842             run = ((Uint16 *)srcbuf)[1];             \
843             srcbuf += 4;                     \
844             if(run) {                        \
845             Ptype *dst = (Ptype *)dstbuf + ofs;      \
846             unsigned i;                  \
847             for(i = 0; i < run; i++) {           \
848                 Uint32 src = *(Uint32 *)srcbuf;      \
849                 do_blend(src, *dst);             \
850                 srcbuf += 4;                 \
851                 dst++;                   \
852             }                        \
853             ofs += run;                  \
854             }                            \
855         } while(ofs < w);                    \
856         dstbuf += surf_dst->pitch;                    \
857         } while(--linecount);                    \
858     } while(0)
859 
860         switch (df->BytesPerPixel) {
861         case 2:
862             if (df->Gmask == 0x07e0 || df->Rmask == 0x07e0
863                 || df->Bmask == 0x07e0)
864                 RLEALPHABLIT(Uint16, Uint8, BLIT_TRANSL_565);
865             else
866                 RLEALPHABLIT(Uint16, Uint8, BLIT_TRANSL_555);
867             break;
868         case 4:
869             RLEALPHABLIT(Uint32, Uint16, BLIT_TRANSL_888);
870             break;
871         }
872     }
873 
874   done:
875     /* Unlock the destination if necessary */
876     if (SDL_MUSTLOCK(surf_dst)) {
877         SDL_UnlockSurface(surf_dst);
878     }
879     return 0;
880 }
881 
882 /*
883  * Auxiliary functions:
884  * The encoding functions take 32bpp rgb + a, and
885  * return the number of bytes copied to the destination.
886  * The decoding functions copy to 32bpp rgb + a, and
887  * return the number of bytes copied from the source.
888  * These are only used in the encoder and un-RLE code and are therefore not
889  * highly optimised.
890  */
891 
892 /* encode 32bpp rgb + a into 16bpp rgb, losing alpha */
893 static int
copy_opaque_16(void * dst,Uint32 * src,int n,SDL_PixelFormat * sfmt,SDL_PixelFormat * dfmt)894 copy_opaque_16(void *dst, Uint32 * src, int n,
895                SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
896 {
897     int i;
898     Uint16 *d = dst;
899     for (i = 0; i < n; i++) {
900         unsigned r, g, b;
901         RGB_FROM_PIXEL(*src, sfmt, r, g, b);
902         PIXEL_FROM_RGB(*d, dfmt, r, g, b);
903         src++;
904         d++;
905     }
906     return n * 2;
907 }
908 
909 /* decode opaque pixels from 16bpp to 32bpp rgb + a */
910 static int
uncopy_opaque_16(Uint32 * dst,void * src,int n,RLEDestFormat * sfmt,SDL_PixelFormat * dfmt)911 uncopy_opaque_16(Uint32 * dst, void *src, int n,
912                  RLEDestFormat * sfmt, SDL_PixelFormat * dfmt)
913 {
914     int i;
915     Uint16 *s = src;
916     unsigned alpha = dfmt->Amask ? 255 : 0;
917     for (i = 0; i < n; i++) {
918         unsigned r, g, b;
919         RGB_FROM_PIXEL(*s, sfmt, r, g, b);
920         PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, alpha);
921         s++;
922         dst++;
923     }
924     return n * 2;
925 }
926 
927 
928 
929 /* encode 32bpp rgb + a into 32bpp G0RAB format for blitting into 565 */
930 static int
copy_transl_565(void * dst,Uint32 * src,int n,SDL_PixelFormat * sfmt,SDL_PixelFormat * dfmt)931 copy_transl_565(void *dst, Uint32 * src, int n,
932                 SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
933 {
934     int i;
935     Uint32 *d = dst;
936     for (i = 0; i < n; i++) {
937         unsigned r, g, b, a;
938         Uint16 pix;
939         RGBA_FROM_8888(*src, sfmt, r, g, b, a);
940         PIXEL_FROM_RGB(pix, dfmt, r, g, b);
941         *d = ((pix & 0x7e0) << 16) | (pix & 0xf81f) | ((a << 2) & 0x7e0);
942         src++;
943         d++;
944     }
945     return n * 4;
946 }
947 
948 /* encode 32bpp rgb + a into 32bpp G0RAB format for blitting into 555 */
949 static int
copy_transl_555(void * dst,Uint32 * src,int n,SDL_PixelFormat * sfmt,SDL_PixelFormat * dfmt)950 copy_transl_555(void *dst, Uint32 * src, int n,
951                 SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
952 {
953     int i;
954     Uint32 *d = dst;
955     for (i = 0; i < n; i++) {
956         unsigned r, g, b, a;
957         Uint16 pix;
958         RGBA_FROM_8888(*src, sfmt, r, g, b, a);
959         PIXEL_FROM_RGB(pix, dfmt, r, g, b);
960         *d = ((pix & 0x3e0) << 16) | (pix & 0xfc1f) | ((a << 2) & 0x3e0);
961         src++;
962         d++;
963     }
964     return n * 4;
965 }
966 
967 /* decode translucent pixels from 32bpp GORAB to 32bpp rgb + a */
968 static int
uncopy_transl_16(Uint32 * dst,void * src,int n,RLEDestFormat * sfmt,SDL_PixelFormat * dfmt)969 uncopy_transl_16(Uint32 * dst, void *src, int n,
970                  RLEDestFormat * sfmt, SDL_PixelFormat * dfmt)
971 {
972     int i;
973     Uint32 *s = src;
974     for (i = 0; i < n; i++) {
975         unsigned r, g, b, a;
976         Uint32 pix = *s++;
977         a = (pix & 0x3e0) >> 2;
978         pix = (pix & ~0x3e0) | pix >> 16;
979         RGB_FROM_PIXEL(pix, sfmt, r, g, b);
980         PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, a);
981         dst++;
982     }
983     return n * 4;
984 }
985 
986 /* encode 32bpp rgba into 32bpp rgba, keeping alpha (dual purpose) */
987 static int
copy_32(void * dst,Uint32 * src,int n,SDL_PixelFormat * sfmt,SDL_PixelFormat * dfmt)988 copy_32(void *dst, Uint32 * src, int n,
989         SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
990 {
991     int i;
992     Uint32 *d = dst;
993     for (i = 0; i < n; i++) {
994         unsigned r, g, b, a;
995         RGBA_FROM_8888(*src, sfmt, r, g, b, a);
996         RLEPIXEL_FROM_RGBA(*d, dfmt, r, g, b, a);
997         d++;
998         src++;
999     }
1000     return n * 4;
1001 }
1002 
1003 /* decode 32bpp rgba into 32bpp rgba, keeping alpha (dual purpose) */
1004 static int
uncopy_32(Uint32 * dst,void * src,int n,RLEDestFormat * sfmt,SDL_PixelFormat * dfmt)1005 uncopy_32(Uint32 * dst, void *src, int n,
1006           RLEDestFormat * sfmt, SDL_PixelFormat * dfmt)
1007 {
1008     int i;
1009     Uint32 *s = src;
1010     for (i = 0; i < n; i++) {
1011         unsigned r, g, b, a;
1012         Uint32 pixel = *s++;
1013         RGB_FROM_PIXEL(pixel, sfmt, r, g, b);
1014         a = pixel >> 24;
1015         PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, a);
1016         dst++;
1017     }
1018     return n * 4;
1019 }
1020 
1021 #define ISOPAQUE(pixel, fmt) ((((pixel) & fmt->Amask) >> fmt->Ashift) == 255)
1022 
1023 #define ISTRANSL(pixel, fmt)    \
1024     ((unsigned)((((pixel) & fmt->Amask) >> fmt->Ashift) - 1U) < 254U)
1025 
1026 /* convert surface to be quickly alpha-blittable onto dest, if possible */
1027 static int
RLEAlphaSurface(SDL_Surface * surface)1028 RLEAlphaSurface(SDL_Surface * surface)
1029 {
1030     SDL_Surface *dest;
1031     SDL_PixelFormat *df;
1032     int maxsize = 0;
1033     int max_opaque_run;
1034     int max_transl_run = 65535;
1035     unsigned masksum;
1036     Uint8 *rlebuf, *dst;
1037     int (*copy_opaque) (void *, Uint32 *, int,
1038                         SDL_PixelFormat *, SDL_PixelFormat *);
1039     int (*copy_transl) (void *, Uint32 *, int,
1040                         SDL_PixelFormat *, SDL_PixelFormat *);
1041 
1042     dest = surface->map->dst;
1043     if (!dest)
1044         return -1;
1045     df = dest->format;
1046     if (surface->format->BitsPerPixel != 32)
1047         return -1;              /* only 32bpp source supported */
1048 
1049     /* find out whether the destination is one we support,
1050        and determine the max size of the encoded result */
1051     masksum = df->Rmask | df->Gmask | df->Bmask;
1052     switch (df->BytesPerPixel) {
1053     case 2:
1054         /* 16bpp: only support 565 and 555 formats */
1055         switch (masksum) {
1056         case 0xffff:
1057             if (df->Gmask == 0x07e0
1058                 || df->Rmask == 0x07e0 || df->Bmask == 0x07e0) {
1059                 copy_opaque = copy_opaque_16;
1060                 copy_transl = copy_transl_565;
1061             } else
1062                 return -1;
1063             break;
1064         case 0x7fff:
1065             if (df->Gmask == 0x03e0
1066                 || df->Rmask == 0x03e0 || df->Bmask == 0x03e0) {
1067                 copy_opaque = copy_opaque_16;
1068                 copy_transl = copy_transl_555;
1069             } else
1070                 return -1;
1071             break;
1072         default:
1073             return -1;
1074         }
1075         max_opaque_run = 255;   /* runs stored as bytes */
1076 
1077         /* worst case is alternating opaque and translucent pixels,
1078            with room for alignment padding between lines */
1079         maxsize = surface->h * (2 + (4 + 2) * (surface->w + 1)) + 2;
1080         break;
1081     case 4:
1082         if (masksum != 0x00ffffff)
1083             return -1;          /* requires unused high byte */
1084         copy_opaque = copy_32;
1085         copy_transl = copy_32;
1086         max_opaque_run = 255;   /* runs stored as short ints */
1087 
1088         /* worst case is alternating opaque and translucent pixels */
1089         maxsize = surface->h * 2 * 4 * (surface->w + 1) + 4;
1090         break;
1091     default:
1092         return -1;              /* anything else unsupported right now */
1093     }
1094 
1095     maxsize += sizeof(RLEDestFormat);
1096     rlebuf = (Uint8 *) SDL_malloc(maxsize);
1097     if (!rlebuf) {
1098         return SDL_OutOfMemory();
1099     }
1100     {
1101         /* save the destination format so we can undo the encoding later */
1102         RLEDestFormat *r = (RLEDestFormat *) rlebuf;
1103         r->BytesPerPixel = df->BytesPerPixel;
1104         r->Rmask = df->Rmask;
1105         r->Gmask = df->Gmask;
1106         r->Bmask = df->Bmask;
1107         r->Amask = df->Amask;
1108         r->Rloss = df->Rloss;
1109         r->Gloss = df->Gloss;
1110         r->Bloss = df->Bloss;
1111         r->Aloss = df->Aloss;
1112         r->Rshift = df->Rshift;
1113         r->Gshift = df->Gshift;
1114         r->Bshift = df->Bshift;
1115         r->Ashift = df->Ashift;
1116     }
1117     dst = rlebuf + sizeof(RLEDestFormat);
1118 
1119     /* Do the actual encoding */
1120     {
1121         int x, y;
1122         int h = surface->h, w = surface->w;
1123         SDL_PixelFormat *sf = surface->format;
1124         Uint32 *src = (Uint32 *) surface->pixels;
1125         Uint8 *lastline = dst;  /* end of last non-blank line */
1126 
1127         /* opaque counts are 8 or 16 bits, depending on target depth */
1128 #define ADD_OPAQUE_COUNTS(n, m)         \
1129     if(df->BytesPerPixel == 4) {        \
1130         ((Uint16 *)dst)[0] = n;     \
1131         ((Uint16 *)dst)[1] = m;     \
1132         dst += 4;               \
1133     } else {                \
1134         dst[0] = n;             \
1135         dst[1] = m;             \
1136         dst += 2;               \
1137     }
1138 
1139         /* translucent counts are always 16 bit */
1140 #define ADD_TRANSL_COUNTS(n, m)     \
1141     (((Uint16 *)dst)[0] = n, ((Uint16 *)dst)[1] = m, dst += 4)
1142 
1143         for (y = 0; y < h; y++) {
1144             int runstart, skipstart;
1145             int blankline = 0;
1146             /* First encode all opaque pixels of a scan line */
1147             x = 0;
1148             do {
1149                 int run, skip, len;
1150                 skipstart = x;
1151                 while (x < w && !ISOPAQUE(src[x], sf))
1152                     x++;
1153                 runstart = x;
1154                 while (x < w && ISOPAQUE(src[x], sf))
1155                     x++;
1156                 skip = runstart - skipstart;
1157                 if (skip == w)
1158                     blankline = 1;
1159                 run = x - runstart;
1160                 while (skip > max_opaque_run) {
1161                     ADD_OPAQUE_COUNTS(max_opaque_run, 0);
1162                     skip -= max_opaque_run;
1163                 }
1164                 len = MIN(run, max_opaque_run);
1165                 ADD_OPAQUE_COUNTS(skip, len);
1166                 dst += copy_opaque(dst, src + runstart, len, sf, df);
1167                 runstart += len;
1168                 run -= len;
1169                 while (run) {
1170                     len = MIN(run, max_opaque_run);
1171                     ADD_OPAQUE_COUNTS(0, len);
1172                     dst += copy_opaque(dst, src + runstart, len, sf, df);
1173                     runstart += len;
1174                     run -= len;
1175                 }
1176             } while (x < w);
1177 
1178             /* Make sure the next output address is 32-bit aligned */
1179             dst += (uintptr_t) dst & 2;
1180 
1181             /* Next, encode all translucent pixels of the same scan line */
1182             x = 0;
1183             do {
1184                 int run, skip, len;
1185                 skipstart = x;
1186                 while (x < w && !ISTRANSL(src[x], sf))
1187                     x++;
1188                 runstart = x;
1189                 while (x < w && ISTRANSL(src[x], sf))
1190                     x++;
1191                 skip = runstart - skipstart;
1192                 blankline &= (skip == w);
1193                 run = x - runstart;
1194                 while (skip > max_transl_run) {
1195                     ADD_TRANSL_COUNTS(max_transl_run, 0);
1196                     skip -= max_transl_run;
1197                 }
1198                 len = MIN(run, max_transl_run);
1199                 ADD_TRANSL_COUNTS(skip, len);
1200                 dst += copy_transl(dst, src + runstart, len, sf, df);
1201                 runstart += len;
1202                 run -= len;
1203                 while (run) {
1204                     len = MIN(run, max_transl_run);
1205                     ADD_TRANSL_COUNTS(0, len);
1206                     dst += copy_transl(dst, src + runstart, len, sf, df);
1207                     runstart += len;
1208                     run -= len;
1209                 }
1210                 if (!blankline)
1211                     lastline = dst;
1212             } while (x < w);
1213 
1214             src += surface->pitch >> 2;
1215         }
1216         dst = lastline;         /* back up past trailing blank lines */
1217         ADD_OPAQUE_COUNTS(0, 0);
1218     }
1219 
1220 #undef ADD_OPAQUE_COUNTS
1221 #undef ADD_TRANSL_COUNTS
1222 
1223     /* Now that we have it encoded, release the original pixels */
1224     if (!(surface->flags & SDL_PREALLOC)) {
1225         SDL_SIMDFree(surface->pixels);
1226         surface->pixels = NULL;
1227         surface->flags &= ~SDL_SIMD_ALIGNED;
1228     }
1229 
1230     /* reallocate the buffer to release unused memory */
1231     {
1232         Uint8 *p = SDL_realloc(rlebuf, dst - rlebuf);
1233         if (!p)
1234             p = rlebuf;
1235         surface->map->data = p;
1236     }
1237 
1238     return 0;
1239 }
1240 
1241 static Uint32
getpix_8(const Uint8 * srcbuf)1242 getpix_8(const Uint8 * srcbuf)
1243 {
1244     return *srcbuf;
1245 }
1246 
1247 static Uint32
getpix_16(const Uint8 * srcbuf)1248 getpix_16(const Uint8 * srcbuf)
1249 {
1250     return *(const Uint16 *) srcbuf;
1251 }
1252 
1253 static Uint32
getpix_24(const Uint8 * srcbuf)1254 getpix_24(const Uint8 * srcbuf)
1255 {
1256 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
1257     return srcbuf[0] + (srcbuf[1] << 8) + (srcbuf[2] << 16);
1258 #else
1259     return (srcbuf[0] << 16) + (srcbuf[1] << 8) + srcbuf[2];
1260 #endif
1261 }
1262 
1263 static Uint32
getpix_32(const Uint8 * srcbuf)1264 getpix_32(const Uint8 * srcbuf)
1265 {
1266     return *(const Uint32 *) srcbuf;
1267 }
1268 
1269 typedef Uint32(*getpix_func) (const Uint8 *);
1270 
1271 static const getpix_func getpixes[4] = {
1272     getpix_8, getpix_16, getpix_24, getpix_32
1273 };
1274 
1275 static int
RLEColorkeySurface(SDL_Surface * surface)1276 RLEColorkeySurface(SDL_Surface * surface)
1277 {
1278     Uint8 *rlebuf, *dst;
1279     int maxn;
1280     int y;
1281     Uint8 *srcbuf, *lastline;
1282     int maxsize = 0;
1283     const int bpp = surface->format->BytesPerPixel;
1284     getpix_func getpix;
1285     Uint32 ckey, rgbmask;
1286     int w, h;
1287 
1288     /* calculate the worst case size for the compressed surface */
1289     switch (bpp) {
1290     case 1:
1291         /* worst case is alternating opaque and transparent pixels,
1292            starting with an opaque pixel */
1293         maxsize = surface->h * 3 * (surface->w / 2 + 1) + 2;
1294         break;
1295     case 2:
1296     case 3:
1297         /* worst case is solid runs, at most 255 pixels wide */
1298         maxsize = surface->h * (2 * (surface->w / 255 + 1)
1299                                 + surface->w * bpp) + 2;
1300         break;
1301     case 4:
1302         /* worst case is solid runs, at most 65535 pixels wide */
1303         maxsize = surface->h * (4 * (surface->w / 65535 + 1)
1304                                 + surface->w * 4) + 4;
1305         break;
1306 
1307     default:
1308         return -1;
1309     }
1310 
1311     rlebuf = (Uint8 *) SDL_malloc(maxsize);
1312     if (rlebuf == NULL) {
1313         return SDL_OutOfMemory();
1314     }
1315 
1316     /* Set up the conversion */
1317     srcbuf = (Uint8 *) surface->pixels;
1318     maxn = bpp == 4 ? 65535 : 255;
1319     dst = rlebuf;
1320     rgbmask = ~surface->format->Amask;
1321     ckey = surface->map->info.colorkey & rgbmask;
1322     lastline = dst;
1323     getpix = getpixes[bpp - 1];
1324     w = surface->w;
1325     h = surface->h;
1326 
1327 #define ADD_COUNTS(n, m)            \
1328     if(bpp == 4) {              \
1329         ((Uint16 *)dst)[0] = n;     \
1330         ((Uint16 *)dst)[1] = m;     \
1331         dst += 4;               \
1332     } else {                \
1333         dst[0] = n;             \
1334         dst[1] = m;             \
1335         dst += 2;               \
1336     }
1337 
1338     for (y = 0; y < h; y++) {
1339         int x = 0;
1340         int blankline = 0;
1341         do {
1342             int run, skip, len;
1343             int runstart;
1344             int skipstart = x;
1345 
1346             /* find run of transparent, then opaque pixels */
1347             while (x < w && (getpix(srcbuf + x * bpp) & rgbmask) == ckey)
1348                 x++;
1349             runstart = x;
1350             while (x < w && (getpix(srcbuf + x * bpp) & rgbmask) != ckey)
1351                 x++;
1352             skip = runstart - skipstart;
1353             if (skip == w)
1354                 blankline = 1;
1355             run = x - runstart;
1356 
1357             /* encode segment */
1358             while (skip > maxn) {
1359                 ADD_COUNTS(maxn, 0);
1360                 skip -= maxn;
1361             }
1362             len = MIN(run, maxn);
1363             ADD_COUNTS(skip, len);
1364             SDL_memcpy(dst, srcbuf + runstart * bpp, len * bpp);
1365             dst += len * bpp;
1366             run -= len;
1367             runstart += len;
1368             while (run) {
1369                 len = MIN(run, maxn);
1370                 ADD_COUNTS(0, len);
1371                 SDL_memcpy(dst, srcbuf + runstart * bpp, len * bpp);
1372                 dst += len * bpp;
1373                 runstart += len;
1374                 run -= len;
1375             }
1376             if (!blankline)
1377                 lastline = dst;
1378         } while (x < w);
1379 
1380         srcbuf += surface->pitch;
1381     }
1382     dst = lastline;             /* back up bast trailing blank lines */
1383     ADD_COUNTS(0, 0);
1384 
1385 #undef ADD_COUNTS
1386 
1387     /* Now that we have it encoded, release the original pixels */
1388     if (!(surface->flags & SDL_PREALLOC)) {
1389         SDL_SIMDFree(surface->pixels);
1390         surface->pixels = NULL;
1391         surface->flags &= ~SDL_SIMD_ALIGNED;
1392     }
1393 
1394     /* reallocate the buffer to release unused memory */
1395     {
1396         /* If SDL_realloc returns NULL, the original block is left intact */
1397         Uint8 *p = SDL_realloc(rlebuf, dst - rlebuf);
1398         if (!p)
1399             p = rlebuf;
1400         surface->map->data = p;
1401     }
1402 
1403     return 0;
1404 }
1405 
1406 int
SDL_RLESurface(SDL_Surface * surface)1407 SDL_RLESurface(SDL_Surface * surface)
1408 {
1409     int flags;
1410 
1411     /* Clear any previous RLE conversion */
1412     if ((surface->flags & SDL_RLEACCEL) == SDL_RLEACCEL) {
1413         SDL_UnRLESurface(surface, 1);
1414     }
1415 
1416     /* We don't support RLE encoding of bitmaps */
1417     if (surface->format->BitsPerPixel < 8) {
1418         return -1;
1419     }
1420 
1421     /* Make sure the pixels are available */
1422     if (!surface->pixels) {
1423         return -1;
1424     }
1425 
1426     flags = surface->map->info.flags;
1427     if (flags & SDL_COPY_COLORKEY) {
1428         /* ok */
1429     } else if ((flags & SDL_COPY_BLEND) && surface->format->Amask) {
1430         /* ok */
1431     } else {
1432         /* If we don't have colorkey or blending, nothing to do... */
1433         return -1;
1434     }
1435 
1436     /* Pass on combinations not supported */
1437     if ((flags & SDL_COPY_MODULATE_COLOR) ||
1438         ((flags & SDL_COPY_MODULATE_ALPHA) && surface->format->Amask) ||
1439         (flags & (SDL_COPY_ADD | SDL_COPY_MOD | SDL_COPY_MUL)) ||
1440         (flags & SDL_COPY_NEAREST)) {
1441         return -1;
1442     }
1443 
1444     /* Encode and set up the blit */
1445     if (!surface->format->Amask || !(flags & SDL_COPY_BLEND)) {
1446         if (!surface->map->identity) {
1447             return -1;
1448         }
1449         if (RLEColorkeySurface(surface) < 0) {
1450             return -1;
1451         }
1452         surface->map->blit = SDL_RLEBlit;
1453         surface->map->info.flags |= SDL_COPY_RLE_COLORKEY;
1454     } else {
1455         if (RLEAlphaSurface(surface) < 0) {
1456             return -1;
1457         }
1458         surface->map->blit = SDL_RLEAlphaBlit;
1459         surface->map->info.flags |= SDL_COPY_RLE_ALPHAKEY;
1460     }
1461 
1462     /* The surface is now accelerated */
1463     surface->flags |= SDL_RLEACCEL;
1464 
1465     return (0);
1466 }
1467 
1468 /*
1469  * Un-RLE a surface with pixel alpha
1470  * This may not give back exactly the image before RLE-encoding; all
1471  * completely transparent pixels will be lost, and color and alpha depth
1472  * may have been reduced (when encoding for 16bpp targets).
1473  */
1474 static SDL_bool
UnRLEAlpha(SDL_Surface * surface)1475 UnRLEAlpha(SDL_Surface * surface)
1476 {
1477     Uint8 *srcbuf;
1478     Uint32 *dst;
1479     SDL_PixelFormat *sf = surface->format;
1480     RLEDestFormat *df = surface->map->data;
1481     int (*uncopy_opaque) (Uint32 *, void *, int,
1482                           RLEDestFormat *, SDL_PixelFormat *);
1483     int (*uncopy_transl) (Uint32 *, void *, int,
1484                           RLEDestFormat *, SDL_PixelFormat *);
1485     int w = surface->w;
1486     int bpp = df->BytesPerPixel;
1487 
1488     if (bpp == 2) {
1489         uncopy_opaque = uncopy_opaque_16;
1490         uncopy_transl = uncopy_transl_16;
1491     } else {
1492         uncopy_opaque = uncopy_transl = uncopy_32;
1493     }
1494 
1495     surface->pixels = SDL_SIMDAlloc(surface->h * surface->pitch);
1496     if (!surface->pixels) {
1497         return (SDL_FALSE);
1498     }
1499     surface->flags |= SDL_SIMD_ALIGNED;
1500     /* fill background with transparent pixels */
1501     SDL_memset(surface->pixels, 0, surface->h * surface->pitch);
1502 
1503     dst = surface->pixels;
1504     srcbuf = (Uint8 *) (df + 1);
1505     for (;;) {
1506         /* copy opaque pixels */
1507         int ofs = 0;
1508         do {
1509             unsigned run;
1510             if (bpp == 2) {
1511                 ofs += srcbuf[0];
1512                 run = srcbuf[1];
1513                 srcbuf += 2;
1514             } else {
1515                 ofs += ((Uint16 *) srcbuf)[0];
1516                 run = ((Uint16 *) srcbuf)[1];
1517                 srcbuf += 4;
1518             }
1519             if (run) {
1520                 srcbuf += uncopy_opaque(dst + ofs, srcbuf, run, df, sf);
1521                 ofs += run;
1522             } else if (!ofs) {
1523                 goto end_function;
1524             }
1525         } while (ofs < w);
1526 
1527         /* skip padding if needed */
1528         if (bpp == 2)
1529             srcbuf += (uintptr_t) srcbuf & 2;
1530 
1531         /* copy translucent pixels */
1532         ofs = 0;
1533         do {
1534             unsigned run;
1535             ofs += ((Uint16 *) srcbuf)[0];
1536             run = ((Uint16 *) srcbuf)[1];
1537             srcbuf += 4;
1538             if (run) {
1539                 srcbuf += uncopy_transl(dst + ofs, srcbuf, run, df, sf);
1540                 ofs += run;
1541             }
1542         } while (ofs < w);
1543         dst += surface->pitch >> 2;
1544     }
1545 
1546 end_function:
1547     return (SDL_TRUE);
1548 }
1549 
1550 void
SDL_UnRLESurface(SDL_Surface * surface,int recode)1551 SDL_UnRLESurface(SDL_Surface * surface, int recode)
1552 {
1553     if (surface->flags & SDL_RLEACCEL) {
1554         surface->flags &= ~SDL_RLEACCEL;
1555 
1556         if (recode && !(surface->flags & SDL_PREALLOC)) {
1557             if (surface->map->info.flags & SDL_COPY_RLE_COLORKEY) {
1558                 SDL_Rect full;
1559 
1560                 /* re-create the original surface */
1561                 surface->pixels = SDL_SIMDAlloc(surface->h * surface->pitch);
1562                 if (!surface->pixels) {
1563                     /* Oh crap... */
1564                     surface->flags |= SDL_RLEACCEL;
1565                     return;
1566                 }
1567                 surface->flags |= SDL_SIMD_ALIGNED;
1568 
1569                 /* fill it with the background color */
1570                 SDL_FillRect(surface, NULL, surface->map->info.colorkey);
1571 
1572                 /* now render the encoded surface */
1573                 full.x = full.y = 0;
1574                 full.w = surface->w;
1575                 full.h = surface->h;
1576                 SDL_RLEBlit(surface, &full, surface, &full);
1577             } else {
1578                 if (!UnRLEAlpha(surface)) {
1579                     /* Oh crap... */
1580                     surface->flags |= SDL_RLEACCEL;
1581                     return;
1582                 }
1583             }
1584         }
1585         surface->map->info.flags &=
1586             ~(SDL_COPY_RLE_COLORKEY | SDL_COPY_RLE_ALPHAKEY);
1587 
1588         SDL_free(surface->map->data);
1589         surface->map->data = NULL;
1590     }
1591 }
1592 
1593 #endif /* SDL_HAVE_RLE */
1594 
1595 /* vi: set ts=4 sw=4 expandtab: */
1596