1 /*
2   Simple DirectMedia Layer
3   Copyright (C) 1997-2021 Sam Lantinga <slouken@libsdl.org>
4 
5   This software is provided 'as-is', without any express or implied
6   warranty.  In no event will the authors be held liable for any damages
7   arising from the use of this software.
8 
9   Permission is granted to anyone to use this software for any purpose,
10   including commercial applications, and to alter it and redistribute it
11   freely, subject to the following restrictions:
12 
13   1. The origin of this software must not be misrepresented; you must not
14      claim that you wrote the original software. If you use this software
15      in a product, an acknowledgment in the product documentation would be
16      appreciated but is not required.
17   2. Altered source versions must be plainly marked as such, and must not be
18      misrepresented as being the original software.
19   3. This notice may not be removed or altered from any source distribution.
20 */
21 #include "../SDL_internal.h"
22 
23 #include "SDL_video.h"
24 #include "SDL_blit.h"
25 #include "SDL_cpuinfo.h"
26 
27 
28 #ifdef __SSE__
29 /* *INDENT-OFF* */
30 
31 #if defined(_MSC_VER) && !defined(__clang__)
32 #define SSE_BEGIN \
33     __m128 c128; \
34     c128.m128_u32[0] = color; \
35     c128.m128_u32[1] = color; \
36     c128.m128_u32[2] = color; \
37     c128.m128_u32[3] = color;
38 #else
39 #define SSE_BEGIN \
40     __m128 c128; \
41     DECLARE_ALIGNED(Uint32, cccc[4], 16); \
42     cccc[0] = color; \
43     cccc[1] = color; \
44     cccc[2] = color; \
45     cccc[3] = color; \
46     c128 = *(__m128 *)cccc;
47 #endif
48 
49 #define SSE_WORK \
50     for (i = n / 64; i--;) { \
51         _mm_stream_ps((float *)(p+0), c128); \
52         _mm_stream_ps((float *)(p+16), c128); \
53         _mm_stream_ps((float *)(p+32), c128); \
54         _mm_stream_ps((float *)(p+48), c128); \
55         p += 64; \
56     }
57 
58 #define SSE_END
59 
60 #define DEFINE_SSE_FILLRECT(bpp, type) \
61 static void \
62 SDL_FillRect##bpp##SSE(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \
63 { \
64     int i, n; \
65     Uint8 *p = NULL; \
66  \
67     SSE_BEGIN; \
68  \
69     while (h--) { \
70         n = w * bpp; \
71         p = pixels; \
72  \
73         if (n > 63) { \
74             int adjust = 16 - ((uintptr_t)p & 15); \
75             if (adjust < 16) { \
76                 n -= adjust; \
77                 adjust /= bpp; \
78                 while (adjust--) { \
79                     *((type *)p) = (type)color; \
80                     p += bpp; \
81                 } \
82             } \
83             SSE_WORK; \
84         } \
85         if (n & 63) { \
86             int remainder = (n & 63); \
87             remainder /= bpp; \
88             while (remainder--) { \
89                 *((type *)p) = (type)color; \
90                 p += bpp; \
91             } \
92         } \
93         pixels += pitch; \
94     } \
95  \
96     SSE_END; \
97 }
98 
99 static void
SDL_FillRect1SSE(Uint8 * pixels,int pitch,Uint32 color,int w,int h)100 SDL_FillRect1SSE(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
101 {
102     int i, n;
103 
104     SSE_BEGIN;
105     while (h--) {
106         Uint8 *p = pixels;
107         n = w;
108 
109         if (n > 63) {
110             int adjust = 16 - ((uintptr_t)p & 15);
111             if (adjust) {
112                 n -= adjust;
113                 SDL_memset(p, color, adjust);
114                 p += adjust;
115             }
116             SSE_WORK;
117         }
118         if (n & 63) {
119             int remainder = (n & 63);
120             SDL_memset(p, color, remainder);
121         }
122         pixels += pitch;
123     }
124 
125     SSE_END;
126 }
127 /* DEFINE_SSE_FILLRECT(1, Uint8) */
128 DEFINE_SSE_FILLRECT(2, Uint16)
129 DEFINE_SSE_FILLRECT(4, Uint32)
130 
131 /* *INDENT-ON* */
132 #endif /* __SSE__ */
133 
134 static void
SDL_FillRect1(Uint8 * pixels,int pitch,Uint32 color,int w,int h)135 SDL_FillRect1(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
136 {
137     int n;
138     Uint8 *p = NULL;
139 
140     while (h--) {
141         n = w;
142         p = pixels;
143 
144         if (n > 3) {
145             switch ((uintptr_t) p & 3) {
146             case 1:
147                 *p++ = (Uint8) color;
148                 --n;                    SDL_FALLTHROUGH;
149             case 2:
150                 *p++ = (Uint8) color;
151                 --n;                    SDL_FALLTHROUGH;
152             case 3:
153                 *p++ = (Uint8) color;
154                 --n;
155             }
156             SDL_memset4(p, color, (n >> 2));
157         }
158         if (n & 3) {
159             p += (n & ~3);
160             switch (n & 3) {
161             case 3:
162                 *p++ = (Uint8) color;   SDL_FALLTHROUGH;
163             case 2:
164                 *p++ = (Uint8) color;   SDL_FALLTHROUGH;
165             case 1:
166                 *p++ = (Uint8) color;
167             }
168         }
169         pixels += pitch;
170     }
171 }
172 
173 static void
SDL_FillRect2(Uint8 * pixels,int pitch,Uint32 color,int w,int h)174 SDL_FillRect2(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
175 {
176     int n;
177     Uint16 *p = NULL;
178 
179     while (h--) {
180         n = w;
181         p = (Uint16 *) pixels;
182 
183         if (n > 1) {
184             if ((uintptr_t) p & 2) {
185                 *p++ = (Uint16) color;
186                 --n;
187             }
188             SDL_memset4(p, color, (n >> 1));
189         }
190         if (n & 1) {
191             p[n - 1] = (Uint16) color;
192         }
193         pixels += pitch;
194     }
195 }
196 
197 static void
SDL_FillRect3(Uint8 * pixels,int pitch,Uint32 color,int w,int h)198 SDL_FillRect3(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
199 {
200 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
201     Uint8 b1 = (Uint8) (color & 0xFF);
202     Uint8 b2 = (Uint8) ((color >> 8) & 0xFF);
203     Uint8 b3 = (Uint8) ((color >> 16) & 0xFF);
204 #elif SDL_BYTEORDER == SDL_BIG_ENDIAN
205     Uint8 b1 = (Uint8) ((color >> 16) & 0xFF);
206     Uint8 b2 = (Uint8) ((color >> 8) & 0xFF);
207     Uint8 b3 = (Uint8) (color & 0xFF);
208 #endif
209     int n;
210     Uint8 *p = NULL;
211 
212     while (h--) {
213         n = w;
214         p = pixels;
215 
216         while (n--) {
217             *p++ = b1;
218             *p++ = b2;
219             *p++ = b3;
220         }
221         pixels += pitch;
222     }
223 }
224 
225 static void
SDL_FillRect4(Uint8 * pixels,int pitch,Uint32 color,int w,int h)226 SDL_FillRect4(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
227 {
228     while (h--) {
229         SDL_memset4(pixels, color, w);
230         pixels += pitch;
231     }
232 }
233 
234 /*
235  * This function performs a fast fill of the given rectangle with 'color'
236  */
237 int
SDL_FillRect(SDL_Surface * dst,const SDL_Rect * rect,Uint32 color)238 SDL_FillRect(SDL_Surface * dst, const SDL_Rect * rect, Uint32 color)
239 {
240     if (!dst) {
241         return SDL_SetError("Passed NULL destination surface");
242     }
243 
244     /* If 'rect' == NULL, then fill the whole surface */
245     if (!rect) {
246         rect = &dst->clip_rect;
247         /* Don't attempt to fill if the surface's clip_rect is empty */
248         if (SDL_RectEmpty(rect)) {
249             return 0;
250         }
251     }
252 
253     return SDL_FillRects(dst, rect, 1, color);
254 }
255 
256 #if SDL_ARM_NEON_BLITTERS
257 void FillRect8ARMNEONAsm(int32_t w, int32_t h, uint8_t *dst, int32_t dst_stride, uint8_t src);
258 void FillRect16ARMNEONAsm(int32_t w, int32_t h, uint16_t *dst, int32_t dst_stride, uint16_t src);
259 void FillRect32ARMNEONAsm(int32_t w, int32_t h, uint32_t *dst, int32_t dst_stride, uint32_t src);
260 
fill_8_neon(Uint8 * pixels,int pitch,Uint32 color,int w,int h)261 static void fill_8_neon(Uint8 * pixels, int pitch, Uint32 color, int w, int h) {
262     FillRect8ARMNEONAsm(w, h, (uint8_t *) pixels, pitch >> 0, color);
263     return;
264 }
265 
fill_16_neon(Uint8 * pixels,int pitch,Uint32 color,int w,int h)266 static void fill_16_neon(Uint8 * pixels, int pitch, Uint32 color, int w, int h) {
267     FillRect16ARMNEONAsm(w, h, (uint16_t *) pixels, pitch >> 1, color);
268     return;
269 }
270 
fill_32_neon(Uint8 * pixels,int pitch,Uint32 color,int w,int h)271 static void fill_32_neon(Uint8 * pixels, int pitch, Uint32 color, int w, int h) {
272     FillRect32ARMNEONAsm(w, h, (uint32_t *) pixels, pitch >> 2, color);
273     return;
274 }
275 #endif
276 
277 #if SDL_ARM_SIMD_BLITTERS
278 void FillRect8ARMSIMDAsm(int32_t w, int32_t h, uint8_t *dst, int32_t dst_stride, uint8_t src);
279 void FillRect16ARMSIMDAsm(int32_t w, int32_t h, uint16_t *dst, int32_t dst_stride, uint16_t src);
280 void FillRect32ARMSIMDAsm(int32_t w, int32_t h, uint32_t *dst, int32_t dst_stride, uint32_t src);
281 
fill_8_simd(Uint8 * pixels,int pitch,Uint32 color,int w,int h)282 static void fill_8_simd(Uint8 * pixels, int pitch, Uint32 color, int w, int h) {
283     FillRect8ARMSIMDAsm(w, h, (uint8_t *) pixels, pitch >> 0, color);
284     return;
285 }
286 
fill_16_simd(Uint8 * pixels,int pitch,Uint32 color,int w,int h)287 static void fill_16_simd(Uint8 * pixels, int pitch, Uint32 color, int w, int h) {
288     FillRect16ARMSIMDAsm(w, h, (uint16_t *) pixels, pitch >> 1, color);
289     return;
290 }
291 
fill_32_simd(Uint8 * pixels,int pitch,Uint32 color,int w,int h)292 static void fill_32_simd(Uint8 * pixels, int pitch, Uint32 color, int w, int h) {
293     FillRect32ARMSIMDAsm(w, h, (uint32_t *) pixels, pitch >> 2, color);
294     return;
295 }
296 #endif
297 
298 int
SDL_FillRects(SDL_Surface * dst,const SDL_Rect * rects,int count,Uint32 color)299 SDL_FillRects(SDL_Surface * dst, const SDL_Rect * rects, int count,
300               Uint32 color)
301 {
302     SDL_Rect clipped;
303     Uint8 *pixels;
304     const SDL_Rect* rect;
305     void (*fill_function)(Uint8 * pixels, int pitch, Uint32 color, int w, int h) = NULL;
306     int i;
307 
308     if (!dst) {
309         return SDL_SetError("Passed NULL destination surface");
310     }
311 
312     /* This function doesn't work on surfaces < 8 bpp */
313     if (dst->format->BitsPerPixel < 8) {
314         return SDL_SetError("SDL_FillRect(): Unsupported surface format");
315     }
316 
317     /* Nothing to do */
318     if (dst->w == 0 || dst->h == 0) {
319         return 0;
320     }
321 
322     /* Perform software fill */
323     if (!dst->pixels) {
324         return SDL_SetError("SDL_FillRect(): You must lock the surface");
325     }
326 
327     if (!rects) {
328         return SDL_SetError("SDL_FillRects() passed NULL rects");
329     }
330 
331 #if SDL_ARM_NEON_BLITTERS
332     if (SDL_HasNEON() && dst->format->BytesPerPixel != 3 && fill_function == NULL) {
333         switch (dst->format->BytesPerPixel) {
334         case 1:
335             fill_function = fill_8_neon;
336             break;
337         case 2:
338             fill_function = fill_16_neon;
339             break;
340         case 4:
341             fill_function = fill_32_neon;
342             break;
343         }
344     }
345 #endif
346 #if SDL_ARM_SIMD_BLITTERS
347     if (SDL_HasARMSIMD() && dst->format->BytesPerPixel != 3 && fill_function == NULL) {
348         switch (dst->format->BytesPerPixel) {
349         case 1:
350             fill_function = fill_8_simd;
351             break;
352         case 2:
353             fill_function = fill_16_simd;
354             break;
355         case 4:
356             fill_function = fill_32_simd;
357             break;
358         }
359     }
360 #endif
361 
362     if (fill_function == NULL) {
363         switch (dst->format->BytesPerPixel) {
364         case 1:
365             {
366                 color |= (color << 8);
367                 color |= (color << 16);
368 #ifdef __SSE__
369                 if (SDL_HasSSE()) {
370                     fill_function = SDL_FillRect1SSE;
371                     break;
372                 }
373 #endif
374                 fill_function = SDL_FillRect1;
375                 break;
376             }
377 
378         case 2:
379             {
380                 color |= (color << 16);
381 #ifdef __SSE__
382                 if (SDL_HasSSE()) {
383                     fill_function = SDL_FillRect2SSE;
384                     break;
385                 }
386 #endif
387                 fill_function = SDL_FillRect2;
388                 break;
389             }
390 
391         case 3:
392             /* 24-bit RGB is a slow path, at least for now. */
393             {
394                 fill_function = SDL_FillRect3;
395                 break;
396             }
397 
398         case 4:
399             {
400 #ifdef __SSE__
401                 if (SDL_HasSSE()) {
402                     fill_function = SDL_FillRect4SSE;
403                     break;
404                 }
405 #endif
406                 fill_function = SDL_FillRect4;
407                 break;
408             }
409 
410         default:
411             return SDL_SetError("Unsupported pixel format");
412         }
413     }
414 
415     for (i = 0; i < count; ++i) {
416         rect = &rects[i];
417         /* Perform clipping */
418         if (!SDL_IntersectRect(rect, &dst->clip_rect, &clipped)) {
419             continue;
420         }
421         rect = &clipped;
422 
423         pixels = (Uint8 *) dst->pixels + rect->y * dst->pitch +
424                                          rect->x * dst->format->BytesPerPixel;
425 
426         fill_function(pixels, dst->pitch, color, rect->w, rect->h);
427     }
428 
429     /* We're done! */
430     return 0;
431 }
432 
433 /* vi: set ts=4 sw=4 expandtab: */
434